Changesets can be listed by changeset number.
The Git repository is here.
- Revision:
- 269
- Log:
Overdue upgrade to AWStats v6.9, the most recent version
available at the time of writing.
- Author:
- rool
- Date:
- Wed May 27 23:57:15 +0100 2009
- Size:
- 30673 Bytes
- Properties:
- Property svn:executable is set
1 | #!/usr/bin/perl |
2 | #----------------------------------------------------------------------------- |
3 | # Allows you to get one unique output log file, sorted on date, |
4 | # built from particular sources. |
5 | # This tool is part of AWStats log analyzer but can be use |
6 | # alone for any other log analyzer. |
7 | # See COPYING.TXT file about AWStats GNU General Public License. |
8 | #----------------------------------------------------------------------------- |
9 | # $Revision: 1.41 $ - $Author: eldy $ - $Date: 2008/11/15 14:58:01 $ |
10 | |
11 | use strict; no strict "refs"; |
12 | #use diagnostics; |
13 | use POSIX qw( strftime ); |
14 | |
15 | |
16 | #----------------------------------------------------------------------------- |
17 | # Defines |
18 | #----------------------------------------------------------------------------- |
19 | |
20 | # ENABLETHREAD --> COMMENT THIS BLOCK TO USE A THREADED VERSION |
21 | my $UseThread=0; |
22 | &Check_Thread_Use(); |
23 | my $NbOfDNSLookupAsked = 0; |
24 | my %threadarray = (); |
25 | my %MyDNSTable = (); |
26 | my %TmpDNSLookup = (); |
27 | |
28 | # ENABLETHREAD --> UNCOMMENT THIS BLOCK TO USE A THREADED VERSION |
29 | #my $UseThread=1; |
30 | #&Check_Thread_Use(); |
31 | #my $NbOfDNSLookupAsked : shared = 0; |
32 | #my %threadarray : shared = (); |
33 | #my %MyDNSTable : shared = (); |
34 | #my %TmpDNSLookup : shared = (); |
35 | |
36 | |
37 | # ---------- Init variables -------- |
38 | use vars qw/ $REVISION $VERSION /; |
39 | $REVISION='$Revision: 1.41 $'; $REVISION =~ /\s(.*)\s/; $REVISION=$1; |
40 | $VERSION="1.2 (build $REVISION)"; |
41 | |
42 | use vars qw/ $NBOFLINESFORBENCHMARK /; |
43 | $NBOFLINESFORBENCHMARK=8192; |
44 | |
45 | use vars qw/ |
46 | $DIR $PROG $Extension |
47 | $Debug $ShowSteps $AddFileNum $AddFileName |
48 | $MaxNbOfThread $DNSLookup $DNSCache $DirCgi $DirData $DNSLookupAlreadyDone |
49 | $NbOfLinesShowsteps $AFINET $QueueCursor $StopOnFirstEof |
50 | /; |
51 | $DIR=''; |
52 | $PROG=''; |
53 | $Extension=''; |
54 | $Debug=0; |
55 | $ShowSteps=0; |
56 | $AddFileNum=0; |
57 | $AddFileName=0; |
58 | $MaxNbOfThread=0; |
59 | $DNSLookup=0; |
60 | $DNSCache=''; |
61 | $DirCgi=''; |
62 | $DirData=''; |
63 | $DNSLookupAlreadyDone=0; |
64 | $NbOfLinesShowsteps=0; |
65 | $AFINET=''; |
66 | $StopOnFirstEof=0; |
67 | |
68 | # ---------- Init arrays -------- |
69 | use vars qw/ |
70 | @SkipDNSLookupFor |
71 | @ParamFile |
72 | /; |
73 | # ---------- Init hash arrays -------- |
74 | use vars qw/ |
75 | %LogFileToDo %linerecord %timerecord %corrupted |
76 | %QueueHostsToResolve %QueueRecords |
77 | /; |
78 | %LogFileToDo = %linerecord = %timerecord = %corrupted = (); |
79 | %QueueHostsToResolve = %QueueRecords = (); |
80 | |
81 | # DRA2: the order of timerecords are kept here, each index in the array is the filerecordnumber, which |
82 | # DRA2: is used as the key for the other hashes |
83 | use vars qw/ |
84 | @timerecordorder |
85 | /; |
86 | @timerecordorder = (); |
87 | |
88 | # ---------- External Program variables ---------- |
89 | # For gzip compression |
90 | my $zcat = 'gzip -cd'; |
91 | my $zcat_file = '\.gz$'; |
92 | # For bz2 compression |
93 | my $bzcat = 'bzcat'; |
94 | my $bzcat_file = '\.bz2$'; |
95 | |
96 | |
97 | |
98 | #----------------------------------------------------------------------------- |
99 | # Functions |
100 | #----------------------------------------------------------------------------- |
101 | |
102 | #------------------------------------------------------------------------------ |
103 | # Function: Write an error message and exit |
104 | # Parameters: $message |
105 | # Input: None |
106 | # Output: None |
107 | # Return: None |
108 | #------------------------------------------------------------------------------ |
109 | sub error { |
110 | print "Error: $_[0].\n"; |
111 | exit 1; |
112 | } |
113 | |
114 | #------------------------------------------------------------------------------ |
115 | # Function: Write a debug message |
116 | # Parameters: $message |
117 | # Input: $Debug |
118 | # Output: None |
119 | # Return: None |
120 | #------------------------------------------------------------------------------ |
121 | sub debug { |
122 | my $level = $_[1] || 1; |
123 | if ($Debug >= $level) { |
124 | my $debugstring = $_[0]; |
125 | print "DEBUG $level - ".localtime(time())." : $debugstring\n"; |
126 | } |
127 | } |
128 | |
129 | #------------------------------------------------------------------------------ |
130 | # Function: Write a warning message |
131 | # Parameters: $message |
132 | # Input: $Debug |
133 | # Output: None |
134 | # Return: None |
135 | #------------------------------------------------------------------------------ |
136 | sub warning { |
137 | my $messagestring=shift; |
138 | if ($Debug) { debug("$messagestring",1); } |
139 | print "$messagestring\n"; |
140 | } |
141 | |
142 | #----------------------------------------------------------------------------- |
143 | # Function: Return 1 if string contains only ascii chars |
144 | # Input: String |
145 | # Return: 0 or 1 |
146 | #----------------------------------------------------------------------------- |
147 | sub IsAscii { |
148 | my $string=shift; |
149 | if ($Debug) { debug("IsAscii($string)",5); } |
150 | if ($string =~ /^[\w\+\-\/\\\.%,;:=\"\'&?!\s]+$/) { |
151 | if ($Debug) { debug(" Yes",5); } |
152 | return 1; # Only alphanum chars (and _) or + - / \ . % , ; : = " ' & ? space \t |
153 | } |
154 | if ($Debug) { debug(" No",5); } |
155 | return 0; |
156 | } |
157 | |
158 | #----------------------------------------------------------------------------- |
159 | # DRA Function: Return 1 if DNS lookup should be skipped |
160 | # Input: String |
161 | # Return: 0 or 1 |
162 | #----------------------------------------------------------------------------- |
163 | sub SkipDNSLookup { |
164 | foreach my $match (@SkipDNSLookupFor) { if ($_[0] =~ /$match/i) { return 1; } } |
165 | 0; # Not in @SkipDNSLookupFor |
166 | } |
167 | |
168 | #----------------------------------------------------------------------------- |
169 | # Function: Function that wait for DNS lookup (can be threaded) |
170 | # Input: String |
171 | # Return: 0 or 1 |
172 | #----------------------------------------------------------------------------- |
173 | sub MakeDNSLookup { |
174 | my $ipaddress=shift; |
175 | $NbOfDNSLookupAsked++; |
176 | use Socket; $AFINET=AF_INET; |
177 | my $tid=0; |
178 | $tid=$MaxNbOfThread?eval("threads->self->tid()"):0; |
179 | if ($Debug) { debug(" ***** Thread id $tid: MakeDNSlookup started (for $ipaddress)",4); } |
180 | my $lookupresult=gethostbyaddr(pack("C4",split(/\./,$ipaddress)),$AFINET); # This is very slow, may took 20 seconds |
181 | if (! $lookupresult || $lookupresult =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ || ! IsAscii($lookupresult)) { |
182 | $TmpDNSLookup{$ipaddress}='*'; |
183 | } |
184 | else { |
185 | $TmpDNSLookup{$ipaddress}=$lookupresult; |
186 | } |
187 | if ($Debug) { debug(" ***** Thread id $tid: MakeDNSlookup done ($ipaddress resolved into $TmpDNSLookup{$ipaddress})",4); } |
188 | delete $threadarray{$ipaddress}; |
189 | return; |
190 | } |
191 | |
192 | #----------------------------------------------------------------------------- |
193 | # Function: WriteRecordsReadyInQueue |
194 | # Input: - |
195 | # Return: 0 |
196 | #----------------------------------------------------------------------------- |
197 | sub WriteRecordsReadyInQueue { |
198 | my $logfilechosen=shift; |
199 | if ($Debug) { debug("Check head of queue to write records ready to flush (QueueCursor=$QueueCursor, QueueSize=".(scalar keys %QueueRecords).")",4); } |
200 | while ( $QueueHostsToResolve{$QueueCursor} && ( ($QueueHostsToResolve{$QueueCursor} eq '*') || ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}}) || ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}) ) ) { |
201 | # $QueueCursor point to a ready record |
202 | if ($QueueHostsToResolve{$QueueCursor} eq '*') { |
203 | if ($Debug) { debug(" First elem in queue is ready. No change on it. We pull it.",4); } |
204 | } |
205 | else { |
206 | if ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}}) { |
207 | if ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}} ne '*') { |
208 | $QueueRecords{$QueueCursor}=~s/$QueueHostsToResolve{$QueueCursor}/$MyDNSTable{$QueueHostsToResolve{$QueueCursor}}/; |
209 | if ($Debug) { debug(" First elem in queue has been resolved (found in MyDNSTable $MyDNSTable{$QueueHostsToResolve{$QueueCursor}}). We pull it.",4); } |
210 | } |
211 | } |
212 | elsif ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}) { |
213 | if ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}} ne '*') { |
214 | $QueueRecords{$QueueCursor}=~s/$QueueHostsToResolve{$QueueCursor}/$TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}/; |
215 | if ($Debug) { debug(" First elem in queue has been resolved (found in TmpDNSLookup $TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}). We pull it.",4); } |
216 | } |
217 | } |
218 | } |
219 | # Record is ready, we output it. |
220 | if ($AddFileNum) { print "$logfilechosen "; } |
221 | if ($AddFileName) { print "$LogFileToDo{$logfilechosen} "; } |
222 | print "$QueueRecords{$QueueCursor}\n"; |
223 | delete $QueueRecords{$QueueCursor}; |
224 | delete $QueueHostsToResolve{$QueueCursor}; |
225 | $QueueCursor++; |
226 | } |
227 | return 0; |
228 | } |
229 | |
230 | #----------------------------------------------------------------------------- |
231 | # Function: Check if thread are enabled or not |
232 | # Input: - |
233 | # Return: - |
234 | #----------------------------------------------------------------------------- |
235 | sub Check_Thread_Use { |
236 | if ($] >= 5.008) { for (0..@ARGV-1) { if ($ARGV[$_] =~ /^-dnslookup[:=](\d{1,2})/i) { |
237 | if ($UseThread) { |
238 | if (!eval ('require "threads.pm";')) { &error("Failed to load perl module 'threads' required for multi-threaded DNS lookup".($@?": $@":"")); } |
239 | if (!eval ('require "threads/shared.pm";')) { &error("Failed to load perl module 'threads::shared' required for multi-threaded DNS lookup".($@?": $@":"")); } |
240 | } |
241 | else { &error("Multi-thread is disabled in default version of this script.\nYou must manually edit the file '$0' to comment/uncomment all\nlines marked with 'ENABLETHREAD' string to enable multi-threading"); } |
242 | } } |
243 | } |
244 | } |
245 | |
246 | |
247 | #----------------------------------------------------------------------------- |
248 | # MAIN |
249 | #----------------------------------------------------------------------------- |
250 | ($DIR=$0) =~ s/([^\/\\]*)$//; ($PROG=$1) =~ s/\.([^\.]*)$//; $Extension=$1; |
251 | |
252 | # Get parameters (Note: $MaxNbOfThread is already known |
253 | my $cpt=1; |
254 | for (0..@ARGV-1) { |
255 | if ($ARGV[$_] =~ /^-/) { |
256 | if ($ARGV[$_] =~ /debug=(\d)/i) { $Debug=$1; } |
257 | elsif ($ARGV[$_] =~ /dnscache=/i) { $DNSLookup||=2; $DNSCache=$ARGV[$_]; $DNSCache =~ s/-dnscache=//; } |
258 | elsif ($ARGV[$_] =~ /dnslookup[:=](\d{1,2})/i) { $DNSLookup||=1; $MaxNbOfThread=$1; } |
259 | elsif ($ARGV[$_] =~ /dnslookup/i) { $DNSLookup||=1; } |
260 | elsif ($ARGV[$_] =~ /showsteps/i) { $ShowSteps=1; } |
261 | elsif ($ARGV[$_] =~ /addfilenum/i) { $AddFileNum=1; } |
262 | elsif ($ARGV[$_] =~ /addfilename/i) { $AddFileName=1; } |
263 | elsif ($ARGV[$_] =~ /stoponfirsteof/i) { $StopOnFirstEof=1; } |
264 | else { print "Unknown argument $ARGV[$_] ignored\n"; } |
265 | } |
266 | else { |
267 | push @ParamFile, $ARGV[$_]; |
268 | $cpt++; |
269 | } |
270 | } |
271 | if ($Debug) { $|=1; } |
272 | |
273 | if ($Debug) { |
274 | debug(ucfirst($PROG)." - $VERSION - Perl $^X $]",1); |
275 | debug("DNSLookup=$DNSLookup"); |
276 | debug("DNSCache=$DNSCache"); |
277 | debug("MaxNbOfThread=$MaxNbOfThread"); |
278 | } |
279 | |
280 | # Disallow MaxNbOfThread and Perl < 5.8 |
281 | if ($] < 5.008 && $MaxNbOfThread) { |
282 | error("Multi-threaded DNS lookup is only supported with Perl 5.8 or higher (not $]). Use -dnslookup option instead"); |
283 | } |
284 | |
285 | # Warning, there is a memory hole in ActiveState perl version (in delete functions) |
286 | if ($^X =~ /activestate/i || $^X =~ /activeperl/i) { |
287 | # TODO Add a warning |
288 | |
289 | } |
290 | |
291 | if (scalar @ParamFile == 0) { |
292 | print "----- $PROG $VERSION (c) Laurent Destailleur -----\n"; |
293 | print "$PROG allows you to get one unique output log file, sorted on date,\n"; |
294 | print "built from particular sources:\n"; |
295 | print " - It can read several input log files,\n"; |
296 | print " - It can read .gz/.bz2 log files,\n"; |
297 | print " - It can also makes a fast reverse DNS lookup to replace\n"; |
298 | print " all IP addresses into host names in resulting log file.\n"; |
299 | print "$PROG comes with ABSOLUTELY NO WARRANTY. It's a free software\n"; |
300 | print "distributed with a GNU General Public License (See COPYING.txt file).\n"; |
301 | print "$PROG is part of AWStats but can be used alone as a log merger\n"; |
302 | print "or resolver before using any other log analyzer.\n"; |
303 | print "\n"; |
304 | print "Usage:\n"; |
305 | print " $PROG.$Extension [options] file\n"; |
306 | print " $PROG.$Extension [options] file1 ... filen\n"; |
307 | print " $PROG.$Extension [options] *.*\n"; |
308 | print " perl $PROG.$Extension [options] *.* > newfile\n"; |
309 | print "Options:\n"; |
310 | print " -dnslookup make a reverse DNS lookup on IP adresses\n"; |
311 | print " -dnslookup=n same with a n parallel threads instead of serial requests\n"; |
312 | print " -dnscache=file make DNS lookup from cache file first before network lookup\n"; |
313 | print " -showsteps print on stderr benchmark information every $NBOFLINESFORBENCHMARK lines\n"; |
314 | print " -addfilenum if used with several files, file number can be added in first\n"; |
315 | print " -addfilename if used with several files, file name can be added in first\n"; |
316 | print " field of output file. This can be used to add a cluster id\n"; |
317 | print " when log files come from several load balanced computers.\n"; |
318 | print " -stoponfirsteof Stop processing when any logfile reaches end-of-file.\n"; |
319 | print "\n"; |
320 | |
321 | print "This runs $PROG in command line to open one or several\n"; |
322 | print "server log files to merge them (sorted on date) and/or to make a reverse\n"; |
323 | print "DNS lookup (if asked). The result log file is sent on standard output.\n"; |
324 | print "Note: $PROG is not a 'sort' tool to sort one file. It's a\n"; |
325 | print "software able to output sorted log records (with a reverse DNS lookup\n"; |
326 | print "included or not) even if log records are dispatched in several files.\n"; |
327 | print "Each of thoose files must be already independently sorted itself\n"; |
328 | print "(but that is the case in all web server log files). So you can use it\n"; |
329 | print "for load balanced log files or to group several old log files.\n"; |
330 | print "\n"; |
331 | print "Don't forget that the main goal of logresolvemerge is to send log records to\n"; |
332 | print "a log analyzer in a sorted order without merging files on disk (NO NEED\n"; |
333 | print "OF DISK SPACE AT ALL) and without loading files into memory (NO NEED\n"; |
334 | print "OF MORE MEMORY). Choose of output records is done on the fly.\n"; |
335 | print "\n"; |
336 | print "So logresolvemerge is particularly usefull when you want to output several\n"; |
337 | print "and/or large log files in a fast process, with no use of disk or\n"; |
338 | print "more memory, and in a chronological order through a pipe (to be used by a log\n"; |
339 | print "analyzer).\n"; |
340 | print "\n"; |
341 | print "Note: If input records are not 'exactly' sorted but 'nearly' sorted (this\n"; |
342 | print "occurs with heavy servers), this is not a problem, the output will also\n"; |
343 | print "be 'nearly' sorted but a few log analyzers (like AWStats) knowns how to deal\n"; |
344 | print "with such logs.\n"; |
345 | print "\n"; |
346 | print "WARNING: If log files are old MAC text files (lines ended with CR char), you\n"; |
347 | print "can't run this tool on Win or Unix platforms.\n"; |
348 | print "\n"; |
349 | print "WARNING: Because of memory holes in ActiveState Perl version, use another\n"; |
350 | print "Perl interpreter if you need to process large log files.\n"; |
351 | print "\n"; |
352 | print "Now supports/detects:\n"; |
353 | print " Automatic detection of log format\n"; |
354 | print " Files can be .gz/.bz2 files if zcat/bzcat tools are available in PATH.\n"; |
355 | print " Multithreaded reverse DNS lookup (several parallel requests) with Perl 5.8+.\n"; |
356 | print "New versions and FAQ at http://awstats.sourceforge.net\n"; |
357 | exit 0; |
358 | } |
359 | |
360 | # Get current time |
361 | my $nowtime=time; |
362 | my ($nowsec,$nowmin,$nowhour,$nowday,$nowmonth,$nowyear) = localtime($nowtime); |
363 | if ($nowyear < 100) { $nowyear+=2000; } else { $nowyear+=1900; } |
364 | my $nowsmallyear=$nowyear;$nowsmallyear =~ s/^..//; |
365 | if (++$nowmonth < 10) { $nowmonth = "0$nowmonth"; } |
366 | if ($nowday < 10) { $nowday = "0$nowday"; } |
367 | if ($nowhour < 10) { $nowhour = "0$nowhour"; } |
368 | if ($nowmin < 10) { $nowmin = "0$nowmin"; } |
369 | if ($nowsec < 10) { $nowsec = "0$nowsec"; } |
370 | # Get tomorrow time (will be used to discard some record with corrupted date (future date)) |
371 | my ($tomorrowsec,$tomorrowmin,$tomorrowhour,$tomorrowday,$tomorrowmonth,$tomorrowyear) = localtime($nowtime+86400); |
372 | if ($tomorrowyear < 100) { $tomorrowyear+=2000; } else { $tomorrowyear+=1900; } |
373 | my $tomorrowsmallyear=$tomorrowyear;$tomorrowsmallyear =~ s/^..//; |
374 | if (++$tomorrowmonth < 10) { $tomorrowmonth = "0$tomorrowmonth"; } |
375 | if ($tomorrowday < 10) { $tomorrowday = "0$tomorrowday"; } |
376 | if ($tomorrowhour < 10) { $tomorrowhour = "0$tomorrowhour"; } |
377 | if ($tomorrowmin < 10) { $tomorrowmin = "0$tomorrowmin"; } |
378 | if ($tomorrowsec < 10) { $tomorrowsec = "0$tomorrowsec"; } |
379 | my $timetomorrow=$tomorrowyear.$tomorrowmonth.$tomorrowday.$tomorrowhour.$tomorrowmin.$tomorrowsec; |
380 | |
381 | # Init other parameters |
382 | $NBOFLINESFORBENCHMARK--; |
383 | if ($ENV{"GATEWAY_INTERFACE"}) { $DirCgi=''; } |
384 | if ($DirCgi && !($DirCgi =~ /\/$/) && !($DirCgi =~ /\\$/)) { $DirCgi .= '/'; } |
385 | if (! $DirData || $DirData eq '.') { $DirData=$DIR; } # If not defined or choosed to "." value then DirData is current dir |
386 | if (! $DirData) { $DirData='.'; } # If current dir not defined then we put it to "." |
387 | $DirData =~ s/\/$//; |
388 | |
389 | #my %monthlib = ( "01","$Message[60]","02","$Message[61]","03","$Message[62]","04","$Message[63]","05","$Message[64]","06","$Message[65]","07","$Message[66]","08","$Message[67]","09","$Message[68]","10","$Message[69]","11","$Message[70]","12","$Message[71]" ); |
390 | # monthnum must be in english because it's used to translate log date in apache log files which are always in english |
391 | my %monthnum = ( "Jan","01","jan","01","Feb","02","feb","02","Mar","03","mar","03","Apr","04","apr","04","May","05","may","05","Jun","06","jun","06","Jul","07","jul","07","Aug","08","aug","08","Sep","09","sep","09","Oct","10","oct","10","Nov","11","nov","11","Dec","12","dec","12" ); |
392 | |
393 | if ($DNSCache) { |
394 | if ($Debug) { debug("Load DNS Cache file $DNSCache",2); } |
395 | open(CACHE, "<$DNSCache") or error("Can't open cache file $DNSCache"); |
396 | while (<CACHE>) { |
397 | my ($time, $ip, $name) = split; |
398 | if ($ip && $name) { |
399 | $name="$ip" if $name eq '*'; |
400 | $MyDNSTable{$ip}=$name; |
401 | } |
402 | } |
403 | close CACHE; |
404 | } |
405 | |
406 | #----------------------------------------------------------------------------- |
407 | # PROCESSING CURRENT LOG(s) |
408 | #----------------------------------------------------------------------------- |
409 | my $NbOfLinesRead=0; |
410 | my $NbOfLinesParsed=0; |
411 | my $logfilechosen=0; |
412 | my $starttime=time(); |
413 | |
414 | # Define the LogFileToDo list |
415 | $cpt=1; |
416 | foreach my $key (0..(@ParamFile-1)) { |
417 | if ($ParamFile[$key] !~ /\*/ && $ParamFile[$key] !~ /\?/) { |
418 | |
419 | if ($Debug) { debug("DBG1 Log file $ParamFile[$key] is added to LogFileToDo with number $cpt."); } |
420 | # Check for supported compression |
421 | if ($ParamFile[$key] =~ /$zcat_file/) { |
422 | if ($Debug) { debug("GZIP compression detected for Log file $ParamFile[$key]."); } |
423 | # Modify the name to include the zcat command |
424 | $ParamFile[$key] = $zcat . ' ' . $ParamFile[$key] . ' |'; |
425 | } |
426 | elsif ($ParamFile[$key] =~ /$bzcat_file/) { |
427 | if ($Debug) { debug("BZ2 compression detected for Log file $ParamFile[$key]."); } |
428 | # Modify the name to include the bzcat command |
429 | $ParamFile[$key] = $bzcat . ' ' . $ParamFile[$key] . ' |'; |
430 | } |
431 | |
432 | $LogFileToDo{$cpt}=@ParamFile[$key]; |
433 | $cpt++; |
434 | |
435 | } |
436 | else { |
437 | my $DirFile=$ParamFile[$key]; $DirFile =~ s/([^\/\\]*)$//; |
438 | $ParamFile[$key] = $1; |
439 | if ($DirFile eq '') { $DirFile = '.'; } |
440 | $ParamFile[$key] =~ s/\./\\\./g; |
441 | $ParamFile[$key] =~ s/\*/\.\*/g; |
442 | $ParamFile[$key] =~ s/\?/\./g; |
443 | if ($Debug) { debug("Search for file \"$ParamFile[$key]\" into \"$DirFile\""); } |
444 | opendir(DIR,"$DirFile"); |
445 | my @filearray = sort readdir DIR; |
446 | close DIR; |
447 | foreach my $i (0..$#filearray) { |
448 | if ("$filearray[$i]" =~ /^$ParamFile[$key]$/ && "$filearray[$i]" ne "." && "$filearray[$i]" ne "..") { |
449 | |
450 | if ($Debug) { debug("DBG2 Log file $filearray[$i] is added to LogFileToDo with number $cpt."); } |
451 | # Check for supported compression |
452 | if ($filearray[$i] =~ /$zcat_file/) { |
453 | if ($Debug) { debug("GZIP compression detected for Log file $filearray[$i]."); } |
454 | # Modify the name to include the zcat command |
455 | $LogFileToDo{$cpt}=$zcat . ' ' . "$DirFile/$filearray[$i]" . ' |'; |
456 | } |
457 | elsif ($filearray[$i] =~ /$bzcat_file/) { |
458 | if ($Debug) { debug("BZ2 compression detected for Log file $filearray[$i]."); } |
459 | # Modify the name to include the bzcat command |
460 | $LogFileToDo{$cpt}=$bzcat . ' ' . "$DirFile/$filearray[$i]" . ' |'; |
461 | } |
462 | else { |
463 | $LogFileToDo{$cpt}="$DirFile/$filearray[$i]"; |
464 | } |
465 | $cpt++; |
466 | |
467 | } |
468 | } |
469 | } |
470 | } |
471 | |
472 | # If no files to process |
473 | if (scalar keys %LogFileToDo == 0) { |
474 | error("No input log file found"); |
475 | } |
476 | |
477 | # Open all log files |
478 | if ($Debug) { debug("Start of processing ".(scalar keys %LogFileToDo)." log file(s), $MaxNbOfThread threads max"); } |
479 | foreach my $logfilenb (keys %LogFileToDo) { |
480 | if ($Debug) { debug("Open log file number $logfilenb: \"$LogFileToDo{$logfilenb}\""); } |
481 | open("LOG$logfilenb","$LogFileToDo{$logfilenb}") || error("Couldn't open log file \"$LogFileToDo{$logfilenb}\" : $!"); |
482 | binmode "LOG$logfilenb"; # To avoid pb of corrupted text log files with binary chars. |
483 | } |
484 | |
485 | $QueueCursor=1; |
486 | STOPONFIRSTEOF: while (1 == 1) |
487 | { |
488 | # BEGIN Read new record |
489 | # For each log file if logfilechosen is 0 |
490 | # If not, we go directly to log file instead of iterating over all keys for a match |
491 | #---------------------------------------------------------------------------------- |
492 | my @readlist; |
493 | if($logfilechosen == 0) { |
494 | @readlist = keys %LogFileToDo; |
495 | } else { |
496 | @readlist = ($logfilechosen); |
497 | } |
498 | foreach my $logfilenb (@readlist) |
499 | { |
500 | if ($Debug) { debug("Search next record in file number $logfilenb",3); } |
501 | # Read chosen log file until we found a record with good date or reaching end of file |
502 | while (1 == 1) { |
503 | my $LOG="LOG$logfilenb"; |
504 | $_=<$LOG>; # Read new line |
505 | if (! $_) |
506 | { # No more records in log file number $logfilenb |
507 | if ($Debug) { debug(" No more records in file number $logfilenb",2); } |
508 | delete $LogFileToDo{$logfilenb}; |
509 | if ($StopOnFirstEof) |
510 | { |
511 | if ($Debug) { debug("Exiting loop due to EOF of logfile $logfilenb",1); } |
512 | last STOPONFIRSTEOF; |
513 | } |
514 | last; |
515 | } |
516 | |
517 | $NbOfLinesRead++; |
518 | chomp $_; s/\r$//; |
519 | |
520 | if (/^#/) { next; } # Ignore comment lines (ISS writes such comments) |
521 | if (/^!!/) { next; } # Ignore comment lines (Webstar writes such comments) |
522 | if (/^$/) { next; } # Ignore blank lines (With ISS: happens sometimes, with Apache: possible when editing log file) |
523 | |
524 | $linerecord{$logfilenb}=$_; |
525 | |
526 | # Check filters |
527 | #---------------------------------------------------------------------- |
528 | |
529 | # Split YYYY-MM-DD HH:MM:SS |
530 | # or DD/Month/YYYY:HH:MM:SS |
531 | # or MM/DD/YY\tHH:MM:SS |
532 | # or 9999.999 |
533 | # or Month DD HH:MM:SS |
534 | my $year=0; my $month=0; my $day=0; my $hour=0; my $minute=0; my $second=0; |
535 | if ($_ =~ /(\d\d\d\d)-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)/) { $year=$1; $month=$2; $day=$3; $hour=$4; $minute=$5; $second=$6; } |
536 | elsif ($_ =~ /\[(\d?\d)[\/:\s](\w+)[\/:\s](\d\d\d\d)[\/:\s](\d\d)[\/:\s](\d\d)[\/:\s](\d\d) /) { $year=$3; $month=$2; $day=$1; $hour=$4; $minute=$5; $second=$6; } |
537 | elsif ($_ =~ /\w+ (\w+) {1,2}(\d?\d) (\d\d)[\/:\s](\d\d)[\/:\s](\d\d) (\d\d\d\d)/) { $year=$6; $month=$1; $day=$2; $hour=$3; $minute=$4; $second=$5; } |
538 | elsif ($_ =~ /^(\d\d\d\d+\.\d\d\d) /) |
539 | { |
540 | my $timetime = strftime('%Y-%m-%d-%T', gmtime($1)); |
541 | $timetime =~ /(\d\d\d\d)-(\d\d)-(\d\d)-(\d\d):(\d\d):(\d\d)/; |
542 | $year=$1; $month=$2; $day=$3; $hour=$4; $minute=$5; $second=$6; |
543 | } |
544 | elsif ($_ =~ /(\w+)\s\s?(\d?\d) (\d\d):(\d\d):(\d\d) /) { # Month DD HH:MM:SS |
545 | $month=$1; $day=$2; $hour=$3; $minute=$4; $second=$5; |
546 | if (($monthnum{$month}>$monthnum{$nowmonth}) || ($monthnum{$month}==$monthnum{$nowmonth} && $day>$nowday)) { |
547 | $year=$nowyear-1; |
548 | } |
549 | else { $year=$nowyear; } |
550 | } |
551 | if (length $day == 1) { $day = "0".$day; } |
552 | |
553 | if ($monthnum{$month}) { $month=$monthnum{$month}; } # Change lib month in num month if necessary |
554 | |
555 | # Create $timerecord like YYYYMMDDHHMMSS |
556 | $timerecord{$logfilenb}=int("$year$month$day$hour$minute$second"); |
557 | if ($timerecord{$logfilenb}<10000000000000) { |
558 | if ($Debug) { debug(" This record is corrupted (no date found)",3); } |
559 | $corrupted{$logfilenb}++; |
560 | next; |
561 | } |
562 | if ($Debug) { debug(" This is next record for file $logfilenb : timerecord=$timerecord{$logfilenb}",3); } |
563 | |
564 | # Sort and insert into timerecordorder, oldest at end/back of array |
565 | # At the beginning, timerecordorder is empty. Then beceause the first pass is |
566 | # a loop on each file to read each first line, the timerecordorder size is |
567 | # number of input files. |
568 | # After, each new loop, read only one new line, so timerecordorder size increase |
569 | # by one but decrease just after by the pop command later. |
570 | my $inserted=0; |
571 | for(my $c=$#timerecordorder; $c>=0 ; $c--) { |
572 | if($timerecord{$logfilenb} <= $timerecord{$timerecordorder[$c]}) |
573 | { |
574 | # Is older or equal than index at $c, add after |
575 | $timerecordorder[$c + 1]=$logfilenb; |
576 | $inserted = 1; |
577 | last; |
578 | } else { |
579 | $timerecordorder[$c + 1]=$timerecordorder[$c]; |
580 | } |
581 | } |
582 | if(! $inserted) { |
583 | $timerecordorder[0] = $logfilenb; |
584 | } |
585 | |
586 | last; |
587 | } |
588 | } |
589 | # END Read new lines for each log file. After this, following var are filled |
590 | # $timerecord{$logfilenb} |
591 | # @timerecordorder array |
592 | |
593 | # We choose which record of which log file to process |
594 | if ($Debug) { debug("Choose which record of which log file to process",3); } |
595 | $logfilechosen=pop(@timerecordorder); |
596 | if(!defined($logfilechosen)) { last; } # No more record to process |
597 | |
598 | # Record is chosen |
599 | if ($Debug) { debug(" We choosed to qualify record of file number $logfilechosen",3); } |
600 | if ($Debug) { debug(" Record is $linerecord{$logfilechosen}",3); } |
601 | |
602 | # Record is approved. We found a new line to parse in file number $logfilechosen |
603 | #------------------------------------------------------------------------------- |
604 | $NbOfLinesParsed++; |
605 | if ($ShowSteps) { |
606 | if ((++$NbOfLinesShowsteps & $NBOFLINESFORBENCHMARK) == 0) { |
607 | my $delay=(time()-$starttime)||1; |
608 | print STDERR "$NbOfLinesParsed lines processed (".(1000*$delay)." ms, ".int($NbOfLinesShowsteps/$delay)." lines/seconds)\n"; |
609 | } |
610 | } |
611 | |
612 | # Do DNS lookup |
613 | #-------------------- |
614 | my $Host=''; |
615 | my $ip=0; |
616 | if ($DNSLookup) { # DNS lookup is 1 or 2 |
617 | if ($linerecord{$logfilechosen} =~ /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/) { $ip=4; $Host=$1; } # IPv4 |
618 | elsif ($linerecord{$logfilechosen} =~ /([0-9A-F]*:)/i) { $ip=6; $Host=$1; } # IPv6 |
619 | if ($ip) { |
620 | # Check in static DNS cache file |
621 | if ($MyDNSTable{$Host}) { |
622 | if ($Debug) { debug(" DNS lookup asked for $Host and found in static DNS cache file: $MyDNSTable{$Host}",4); } |
623 | } |
624 | elsif ($DNSLookup==1) { |
625 | # Check in session cache (dynamic DNS cache file + session DNS cache) |
626 | if (! $threadarray{$Host} && ! $TmpDNSLookup{$Host}) { |
627 | if (@SkipDNSLookupFor && &SkipDNSLookup($Host)) { |
628 | $TmpDNSLookup{$Host}='*'; |
629 | if ($Debug) { debug(" No need of reverse DNS lookup for $Host, skipped at user request.",4); } |
630 | } |
631 | else { |
632 | if ($ip == 4) { |
633 | # Create or not a new thread |
634 | if ($MaxNbOfThread) { |
635 | if (! $threadarray{$Host}) { # No thread already launched for $Host |
636 | while ((scalar keys %threadarray) >= $MaxNbOfThread) { |
637 | if ($Debug) { debug(" $MaxNbOfThread thread running reached, so we wait",4); } |
638 | sleep 1; |
639 | } |
640 | $threadarray{$Host}=1; # Semaphore to tell thread for $Host is active |
641 | # my $t = new Thread \&MakeDNSLookup, $Host; |
642 | my $t = threads->create(sub { MakeDNSLookup($Host) }); |
643 | if (! $t) { error("Failed to create new thread"); } |
644 | if ($Debug) { debug(" Reverse DNS lookup for $Host queued in thread ".$t->tid,4); } |
645 | $t->detach(); # We don't need to keep return code |
646 | } |
647 | else { |
648 | if ($Debug) { debug(" Reverse DNS lookup for $Host already queued in a thread"); } |
649 | } |
650 | # Here, this is the only way, $TmpDNSLookup{$Host} can be not defined |
651 | } else { |
652 | &MakeDNSLookup($Host); |
653 | if ($Debug) { debug(" Reverse DNS lookup for $Host done: $TmpDNSLookup{$Host}",4); } |
654 | } |
655 | } |
656 | elsif ($ip == 6) { |
657 | $TmpDNSLookup{$Host}='*'; |
658 | if ($Debug) { debug(" Reverse DNS lookup for $Host not available for IPv6",4); } |
659 | } |
660 | } |
661 | } else { |
662 | if ($Debug) { debug(" Reverse DNS lookup already queued or done for $Host: $TmpDNSLookup{$Host}",4); } |
663 | } |
664 | } |
665 | else { |
666 | if ($Debug) { debug(" DNS lookup by static DNS cache file asked for $Host but not found.",4); } |
667 | } |
668 | } |
669 | else { |
670 | if ($Debug) { debug(" DNS lookup asked for $Host but this is not an IP address.",4); } |
671 | $DNSLookupAlreadyDone=$LogFileToDo{$logfilechosen}; |
672 | } |
673 | } |
674 | else { |
675 | if ($linerecord{$logfilechosen} =~ /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/) { $ip=4; $Host=$1; } # IPv4 |
676 | elsif ($linerecord{$logfilechosen} =~ /([0-9A-F]*:)/i) { $ip=6; $Host=$1; } # IPv6 |
677 | if ($Debug) { debug(" No DNS lookup asked.",4); } |
678 | } |
679 | |
680 | # Put record in record queue |
681 | if ($Debug) { debug("Add record $NbOfLinesParsed in record queue (with host to resolve = ".($Host?$Host:'*').")",4); } |
682 | $QueueRecords{$NbOfLinesParsed}=$linerecord{$logfilechosen}; |
683 | |
684 | # Put record in host queue |
685 | # If there is a host to resolve, we add line to queue with value of host to resolve |
686 | # $Host is '' (no ip found) or is ip |
687 | if ($DNSLookup==0) { |
688 | $QueueHostsToResolve{$NbOfLinesParsed}='*'; |
689 | } |
690 | if ($DNSLookup==1) { |
691 | $QueueHostsToResolve{$NbOfLinesParsed}=$Host?$Host:'*'; |
692 | } |
693 | if ($DNSLookup==2) { |
694 | $QueueHostsToResolve{$NbOfLinesParsed}=$MyDNSTable{$Host}?$Host:'*'; |
695 | } |
696 | |
697 | # Print all records in head of queue that are ready |
698 | &WriteRecordsReadyInQueue($logfilechosen); |
699 | |
700 | } # End of processing new record. Loop on next one. |
701 | |
702 | if ($Debug) { debug("End of processing log file(s)"); } |
703 | |
704 | # Close all log files |
705 | foreach my $logfilenb (keys %LogFileToDo) { |
706 | if ($Debug) { debug("Close log file number $logfilenb"); } |
707 | close("LOG$logfilenb") || error("Command for pipe '$LogFileToDo{$logfilenb}' failed"); |
708 | } |
709 | |
710 | while ( $QueueHostsToResolve{$QueueCursor} && $QueueHostsToResolve{$QueueCursor} ne '*' && ! $MyDNSTable{$QueueHostsToResolve{$QueueCursor}} && ! $TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}} ) { |
711 | sleep 1; |
712 | # Print all records in head of queue that are ready |
713 | &WriteRecordsReadyInQueue($logfilechosen); |
714 | } |
715 | |
716 | # Waiting queue is empty |
717 | if ($MaxNbOfThread) { |
718 | foreach my $t (threads->list()) { |
719 | if ($Debug) { debug("Join thread $t"); } |
720 | $t->join(); |
721 | } |
722 | } |
723 | |
724 | # DNSLookup warning |
725 | if ($DNSLookup==1 && $DNSLookupAlreadyDone) { |
726 | warning("Warning: $PROG has detected that some host names were already resolved in your logfile $DNSLookupAlreadyDone.\nIf DNS lookup was already made by the logger (web server) in ALL your log files, you should not use -dnslookup option to increase $PROG speed."); |
727 | } |
728 | |
729 | if ($Debug) { |
730 | debug("Total nb of read lines: $NbOfLinesRead"); |
731 | debug("Total nb of parsed lines: $NbOfLinesParsed"); |
732 | debug("Total nb of DNS lookup asked: $NbOfDNSLookupAsked"); |
733 | } |
734 | |
735 | #if ($DNSCache) { |
736 | # open(CACHE, ">$DNSCache") or die; |
737 | # foreach (keys %TmpDNSLookup) { |
738 | # $TmpDNSLookup{$_}="*" if $TmpDNSLookup{$_} eq "ip"; |
739 | # print CACHE "0\t$_\t$TmpDNSLookup{$_}\n"; |
740 | # } |
741 | # close CACHE; |
742 | #} |
743 | |
744 | 0; # Do not remove this line |