Changesets can be listed by changeset number.
The Git repository is here.
- Revision:
- 297
- Log:
Updated to AWStats 7.0.
- Author:
- rool
- Date:
- Fri Mar 18 13:33:29 +0000 2011
- Size:
- 32006 Bytes
- Properties:
- Property svn:executable is set
1 | #!/usr/bin/perl |
2 | #----------------------------------------------------------------------------- |
3 | # Allows you to get one unique output log file, sorted on date, |
4 | # built from particular sources. |
5 | # This tool is part of AWStats log analyzer but can be use |
6 | # alone for any other log analyzer. |
7 | # See COPYING.TXT file about AWStats GNU General Public License. |
8 | #----------------------------------------------------------------------------- |
9 | # $Revision: 1.43 $ - $Author: manolamancha $ - $Date: 2010/04/30 12:26:56 $ |
10 | |
11 | use strict; no strict "refs"; |
12 | #use diagnostics; |
13 | use POSIX qw( strftime ); |
14 | |
15 | |
16 | #----------------------------------------------------------------------------- |
17 | # Defines |
18 | #----------------------------------------------------------------------------- |
19 | |
20 | # ENABLETHREAD --> COMMENT THIS BLOCK TO USE A THREADED VERSION |
21 | my $UseThread=0; |
22 | &Check_Thread_Use(); |
23 | my $NbOfDNSLookupAsked = 0; |
24 | my %threadarray = (); |
25 | my %MyDNSTable = (); |
26 | my %TmpDNSLookup = (); |
27 | |
28 | # ENABLETHREAD --> UNCOMMENT THIS BLOCK TO USE A THREADED VERSION |
29 | #my $UseThread=1; |
30 | #&Check_Thread_Use(); |
31 | #my $NbOfDNSLookupAsked : shared = 0; |
32 | #my %threadarray : shared = (); |
33 | #my %MyDNSTable : shared = (); |
34 | #my %TmpDNSLookup : shared = (); |
35 | |
36 | |
37 | # ---------- Init variables -------- |
38 | use vars qw/ $REVISION $VERSION /; |
39 | $REVISION='$Revision: 1.43 $'; $REVISION =~ /\s(.*)\s/; $REVISION=$1; |
40 | $VERSION="1.2 (build $REVISION)"; |
41 | |
42 | use vars qw/ $NBOFLINESFORBENCHMARK /; |
43 | $NBOFLINESFORBENCHMARK=8192; |
44 | |
45 | use vars qw/ |
46 | $DIR $PROG $Extension |
47 | $Debug $ShowSteps $AddFileNum $AddFileName $LastLogNum $PrintFields |
48 | $MaxNbOfThread $DNSLookup $DNSCache $DirCgi $DirData $DNSLookupAlreadyDone |
49 | $NbOfLinesShowsteps $AFINET $QueueCursor $StopOnFirstEof $IgnoreMissing |
50 | /; |
51 | $DIR=''; |
52 | $PROG=''; |
53 | $Extension=''; |
54 | $Debug=0; |
55 | $ShowSteps=0; |
56 | $AddFileNum=0; |
57 | $AddFileName=0; |
58 | $LastLogNum=0; |
59 | $PrintFields=0; |
60 | $MaxNbOfThread=0; |
61 | $DNSLookup=0; |
62 | $DNSCache=''; |
63 | $DirCgi=''; |
64 | $DirData=''; |
65 | $DNSLookupAlreadyDone=0; |
66 | $NbOfLinesShowsteps=0; |
67 | $AFINET=''; |
68 | $StopOnFirstEof=0; |
69 | $IgnoreMissing=0; |
70 | |
71 | # ---------- Init arrays -------- |
72 | use vars qw/ |
73 | @SkipDNSLookupFor |
74 | @ParamFile |
75 | @Fields |
76 | /; |
77 | # ---------- Init hash arrays -------- |
78 | use vars qw/ |
79 | %LogFileToDo %linerecord %timerecord %corrupted |
80 | %QueueHostsToResolve %QueueRecords |
81 | /; |
82 | %LogFileToDo = %linerecord = %timerecord = %corrupted = (); |
83 | %QueueHostsToResolve = %QueueRecords = (); |
84 | |
85 | # DRA2: the order of timerecords are kept here, each index in the array is the filerecordnumber, which |
86 | # DRA2: is used as the key for the other hashes |
87 | use vars qw/ |
88 | @timerecordorder |
89 | /; |
90 | @timerecordorder = (); |
91 | |
92 | # ---------- External Program variables ---------- |
93 | # For gzip compression |
94 | my $zcat = 'gzip -cd'; |
95 | my $zcat_file = '\.gz$'; |
96 | # For bz2 compression |
97 | my $bzcat = 'bzcat'; |
98 | my $bzcat_file = '\.bz2$'; |
99 | |
100 | |
101 | |
102 | #----------------------------------------------------------------------------- |
103 | # Functions |
104 | #----------------------------------------------------------------------------- |
105 | |
106 | #------------------------------------------------------------------------------ |
107 | # Function: Write an error message and exit |
108 | # Parameters: $message |
109 | # Input: None |
110 | # Output: None |
111 | # Return: None |
112 | #------------------------------------------------------------------------------ |
113 | sub error { |
114 | print "Error: $_[0].\n"; |
115 | exit 1; |
116 | } |
117 | |
118 | #------------------------------------------------------------------------------ |
119 | # Function: Write a debug message |
120 | # Parameters: $message |
121 | # Input: $Debug |
122 | # Output: None |
123 | # Return: None |
124 | #------------------------------------------------------------------------------ |
125 | sub debug { |
126 | my $level = $_[1] || 1; |
127 | if ($Debug >= $level) { |
128 | my $debugstring = $_[0]; |
129 | print "DEBUG $level - ".localtime(time())." : $debugstring\n"; |
130 | } |
131 | } |
132 | |
133 | #------------------------------------------------------------------------------ |
134 | # Function: Write a warning message |
135 | # Parameters: $message |
136 | # Input: $Debug |
137 | # Output: None |
138 | # Return: None |
139 | #------------------------------------------------------------------------------ |
140 | sub warning { |
141 | my $messagestring=shift; |
142 | if ($Debug) { debug("$messagestring",1); } |
143 | print "$messagestring\n"; |
144 | } |
145 | |
146 | #----------------------------------------------------------------------------- |
147 | # Function: Return 1 if string contains only ascii chars |
148 | # Input: String |
149 | # Return: 0 or 1 |
150 | #----------------------------------------------------------------------------- |
151 | sub IsAscii { |
152 | my $string=shift; |
153 | if ($Debug) { debug("IsAscii($string)",5); } |
154 | if ($string =~ /^[\w\+\-\/\\\.%,;:=\"\'&?!\s]+$/) { |
155 | if ($Debug) { debug(" Yes",5); } |
156 | return 1; # Only alphanum chars (and _) or + - / \ . % , ; : = " ' & ? space \t |
157 | } |
158 | if ($Debug) { debug(" No",5); } |
159 | return 0; |
160 | } |
161 | |
162 | #----------------------------------------------------------------------------- |
163 | # DRA Function: Return 1 if DNS lookup should be skipped |
164 | # Input: String |
165 | # Return: 0 or 1 |
166 | #----------------------------------------------------------------------------- |
167 | sub SkipDNSLookup { |
168 | foreach my $match (@SkipDNSLookupFor) { if ($_[0] =~ /$match/i) { return 1; } } |
169 | 0; # Not in @SkipDNSLookupFor |
170 | } |
171 | |
172 | #----------------------------------------------------------------------------- |
173 | # Function: Function that wait for DNS lookup (can be threaded) |
174 | # Input: String |
175 | # Return: 0 or 1 |
176 | #----------------------------------------------------------------------------- |
177 | sub MakeDNSLookup { |
178 | my $ipaddress=shift; |
179 | $NbOfDNSLookupAsked++; |
180 | use Socket; $AFINET=AF_INET; |
181 | my $tid=0; |
182 | $tid=$MaxNbOfThread?eval("threads->self->tid()"):0; |
183 | if ($Debug) { debug(" ***** Thread id $tid: MakeDNSlookup started (for $ipaddress)",4); } |
184 | my $lookupresult=gethostbyaddr(pack("C4",split(/\./,$ipaddress)),$AFINET); # This is very slow, may took 20 seconds |
185 | if (! $lookupresult || $lookupresult =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ || ! IsAscii($lookupresult)) { |
186 | $TmpDNSLookup{$ipaddress}='*'; |
187 | } |
188 | else { |
189 | $TmpDNSLookup{$ipaddress}=$lookupresult; |
190 | } |
191 | if ($Debug) { debug(" ***** Thread id $tid: MakeDNSlookup done ($ipaddress resolved into $TmpDNSLookup{$ipaddress})",4); } |
192 | delete $threadarray{$ipaddress}; |
193 | return; |
194 | } |
195 | |
196 | #----------------------------------------------------------------------------- |
197 | # Function: WriteRecordsReadyInQueue |
198 | # Input: - |
199 | # Return: 0 |
200 | #----------------------------------------------------------------------------- |
201 | sub WriteRecordsReadyInQueue { |
202 | my $logfilechosen=shift; |
203 | if ($Debug) { debug("Check head of queue to write records ready to flush (QueueCursor=$QueueCursor, QueueSize=".(scalar keys %QueueRecords).")",4); } |
204 | while ( $QueueHostsToResolve{$QueueCursor} && ( ($QueueHostsToResolve{$QueueCursor} eq '*') || ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}}) || ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}) ) ) { |
205 | # $QueueCursor point to a ready record |
206 | if ($QueueHostsToResolve{$QueueCursor} eq '*') { |
207 | if ($Debug) { debug(" First elem in queue is ready. No change on it. We pull it.",4); } |
208 | } |
209 | else { |
210 | if ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}}) { |
211 | if ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}} ne '*') { |
212 | $QueueRecords{$QueueCursor}=~s/$QueueHostsToResolve{$QueueCursor}/$MyDNSTable{$QueueHostsToResolve{$QueueCursor}}/; |
213 | if ($Debug) { debug(" First elem in queue has been resolved (found in MyDNSTable $MyDNSTable{$QueueHostsToResolve{$QueueCursor}}). We pull it.",4); } |
214 | } |
215 | } |
216 | elsif ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}) { |
217 | if ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}} ne '*') { |
218 | $QueueRecords{$QueueCursor}=~s/$QueueHostsToResolve{$QueueCursor}/$TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}/; |
219 | if ($Debug) { debug(" First elem in queue has been resolved (found in TmpDNSLookup $TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}). We pull it.",4); } |
220 | } |
221 | } |
222 | } |
223 | # Record is ready, we output it. |
224 | if ($AddFileNum) { print "$logfilechosen "; } |
225 | if ($AddFileName) { print "$LogFileToDo{$logfilechosen} "; } |
226 | # see if we need to dump fields |
227 | if ($PrintFields && $LastLogNum != $logfilechosen){ |
228 | print($Fields[$logfilechosen]."\n"); |
229 | $LastLogNum = $logfilechosen; |
230 | } |
231 | print "$QueueRecords{$QueueCursor}\n"; |
232 | delete $QueueRecords{$QueueCursor}; |
233 | delete $QueueHostsToResolve{$QueueCursor}; |
234 | $QueueCursor++; |
235 | } |
236 | return 0; |
237 | } |
238 | |
239 | #----------------------------------------------------------------------------- |
240 | # Function: Check if thread are enabled or not |
241 | # Input: - |
242 | # Return: - |
243 | #----------------------------------------------------------------------------- |
244 | sub Check_Thread_Use { |
245 | if ($] >= 5.008) { for (0..@ARGV-1) { if ($ARGV[$_] =~ /^-dnslookup[:=](\d{1,2})/i) { |
246 | if ($UseThread) { |
247 | if (!eval ('require "threads.pm";')) { &error("Failed to load perl module 'threads' required for multi-threaded DNS lookup".($@?": $@":"")); } |
248 | if (!eval ('require "threads/shared.pm";')) { &error("Failed to load perl module 'threads::shared' required for multi-threaded DNS lookup".($@?": $@":"")); } |
249 | } |
250 | else { &error("Multi-thread is disabled in default version of this script.\nYou must manually edit the file '$0' to comment/uncomment all\nlines marked with 'ENABLETHREAD' string to enable multi-threading"); } |
251 | } } |
252 | } |
253 | } |
254 | |
255 | |
256 | #----------------------------------------------------------------------------- |
257 | # MAIN |
258 | #----------------------------------------------------------------------------- |
259 | ($DIR=$0) =~ s/([^\/\\]*)$//; ($PROG=$1) =~ s/\.([^\.]*)$//; $Extension=$1; |
260 | |
261 | # Get parameters (Note: $MaxNbOfThread is already known |
262 | my $cpt=1; |
263 | for (0..@ARGV-1) { |
264 | if ($ARGV[$_] =~ /^-/) { |
265 | if ($ARGV[$_] =~ /debug=(\d)/i) { $Debug=$1; } |
266 | elsif ($ARGV[$_] =~ /dnscache=/i) { $DNSLookup||=2; $DNSCache=$ARGV[$_]; $DNSCache =~ s/-dnscache=//; } |
267 | elsif ($ARGV[$_] =~ /dnslookup[:=](\d{1,2})/i) { $DNSLookup||=1; $MaxNbOfThread=$1; } |
268 | elsif ($ARGV[$_] =~ /dnslookup/i) { $DNSLookup||=1; } |
269 | elsif ($ARGV[$_] =~ /showsteps/i) { $ShowSteps=1; } |
270 | elsif ($ARGV[$_] =~ /addfilenum/i) { $AddFileNum=1; } |
271 | elsif ($ARGV[$_] =~ /addfilename/i) { $AddFileName=1; } |
272 | elsif ($ARGV[$_] =~ /stoponfirsteof/i) { $StopOnFirstEof=1; } |
273 | elsif ($ARGV[$_] =~ /printfields/i) { $PrintFields=1; } |
274 | elsif ($ARGV[$_] =~ /ignoremissing/i) { $IgnoreMissing=1; } |
275 | else { print "Unknown argument $ARGV[$_] ignored\n"; } |
276 | } |
277 | else { |
278 | push @ParamFile, $ARGV[$_]; |
279 | $cpt++; |
280 | } |
281 | } |
282 | if ($Debug) { $|=1; } |
283 | |
284 | if ($Debug) { |
285 | debug(ucfirst($PROG)." - $VERSION - Perl $^X $]",1); |
286 | debug("DNSLookup=$DNSLookup"); |
287 | debug("DNSCache=$DNSCache"); |
288 | debug("MaxNbOfThread=$MaxNbOfThread"); |
289 | } |
290 | |
291 | # Disallow MaxNbOfThread and Perl < 5.8 |
292 | if ($] < 5.008 && $MaxNbOfThread) { |
293 | error("Multi-threaded DNS lookup is only supported with Perl 5.8 or higher (not $]). Use -dnslookup option instead"); |
294 | } |
295 | |
296 | # Warning, there is a memory hole in ActiveState perl version (in delete functions) |
297 | if ($^X =~ /activestate/i || $^X =~ /activeperl/i) { |
298 | # TODO Add a warning |
299 | |
300 | } |
301 | |
302 | if (scalar @ParamFile == 0) { |
303 | print "----- $PROG $VERSION (c) Laurent Destailleur -----\n"; |
304 | print "$PROG allows you to get one unique output log file, sorted on date,\n"; |
305 | print "built from particular sources:\n"; |
306 | print " - It can read several input log files,\n"; |
307 | print " - It can read .gz/.bz2 log files,\n"; |
308 | print " - It can also makes a fast reverse DNS lookup to replace\n"; |
309 | print " all IP addresses into host names in resulting log file.\n"; |
310 | print "$PROG comes with ABSOLUTELY NO WARRANTY. It's a free software\n"; |
311 | print "distributed with a GNU General Public License (See COPYING.txt file).\n"; |
312 | print "$PROG is part of AWStats but can be used alone as a log merger\n"; |
313 | print "or resolver before using any other log analyzer.\n"; |
314 | print "\n"; |
315 | print "Usage:\n"; |
316 | print " $PROG.$Extension [options] file\n"; |
317 | print " $PROG.$Extension [options] file1 ... filen\n"; |
318 | print " $PROG.$Extension [options] *.*\n"; |
319 | print " perl $PROG.$Extension [options] *.* > newfile\n"; |
320 | print "Options:\n"; |
321 | print " -dnslookup make a reverse DNS lookup on IP adresses\n"; |
322 | print " -dnslookup=n same with a n parallel threads instead of serial requests\n"; |
323 | print " -dnscache=file make DNS lookup from cache file first before network lookup\n"; |
324 | print " -showsteps print on stderr benchmark information every $NBOFLINESFORBENCHMARK lines\n"; |
325 | print " -addfilenum if used with several files, file number can be added in first\n"; |
326 | print " -addfilename if used with several files, file name can be added in first\n"; |
327 | print " field of output file. This can be used to add a cluster id\n"; |
328 | print " when log files come from several load balanced computers.\n"; |
329 | print " -stoponfirsteof Stop processing when any logfile reaches end-of-file.\n"; |
330 | print " -printfields For IIS or W3C logs, prints the latest field header for\n"; |
331 | print " the currentlog file when switching between log file entries\n"; |
332 | print " so that the parsercan automatically determine which fields\n"; |
333 | print " are avaiable.\n"; |
334 | print " -ignoremissing will not fail if a log file is missing\n"; |
335 | print "\n"; |
336 | |
337 | print "This runs $PROG in command line to open one or several\n"; |
338 | print "server log files to merge them (sorted on date) and/or to make a reverse\n"; |
339 | print "DNS lookup (if asked). The result log file is sent on standard output.\n"; |
340 | print "Note: $PROG is not a 'sort' tool to sort one file. It's a\n"; |
341 | print "software able to output sorted log records (with a reverse DNS lookup\n"; |
342 | print "included or not) even if log records are dispatched in several files.\n"; |
343 | print "Each of thoose files must be already independently sorted itself\n"; |
344 | print "(but that is the case in all web server log files). So you can use it\n"; |
345 | print "for load balanced log files or to group several old log files.\n"; |
346 | print "\n"; |
347 | print "Don't forget that the main goal of logresolvemerge is to send log records to\n"; |
348 | print "a log analyzer in a sorted order without merging files on disk (NO NEED\n"; |
349 | print "OF DISK SPACE AT ALL) and without loading files into memory (NO NEED\n"; |
350 | print "OF MORE MEMORY). Choose of output records is done on the fly.\n"; |
351 | print "\n"; |
352 | print "So logresolvemerge is particularly usefull when you want to output several\n"; |
353 | print "and/or large log files in a fast process, with no use of disk or\n"; |
354 | print "more memory, and in a chronological order through a pipe (to be used by a log\n"; |
355 | print "analyzer).\n"; |
356 | print "\n"; |
357 | print "Note: If input records are not 'exactly' sorted but 'nearly' sorted (this\n"; |
358 | print "occurs with heavy servers), this is not a problem, the output will also\n"; |
359 | print "be 'nearly' sorted but a few log analyzers (like AWStats) knowns how to deal\n"; |
360 | print "with such logs.\n"; |
361 | print "\n"; |
362 | print "WARNING: If log files are old MAC text files (lines ended with CR char), you\n"; |
363 | print "can't run this tool on Win or Unix platforms.\n"; |
364 | print "\n"; |
365 | print "WARNING: Because of memory holes in ActiveState Perl version, use another\n"; |
366 | print "Perl interpreter if you need to process large log files.\n"; |
367 | print "\n"; |
368 | print "Now supports/detects:\n"; |
369 | print " Automatic detection of log format\n"; |
370 | print " Files can be .gz/.bz2 files if zcat/bzcat tools are available in PATH.\n"; |
371 | print " Multithreaded reverse DNS lookup (several parallel requests) with Perl 5.8+.\n"; |
372 | print "New versions and FAQ at http://awstats.sourceforge.net\n"; |
373 | exit 0; |
374 | } |
375 | |
376 | # Get current time |
377 | my $nowtime=time; |
378 | my ($nowsec,$nowmin,$nowhour,$nowday,$nowmonth,$nowyear) = localtime($nowtime); |
379 | if ($nowyear < 100) { $nowyear+=2000; } else { $nowyear+=1900; } |
380 | my $nowsmallyear=$nowyear;$nowsmallyear =~ s/^..//; |
381 | if (++$nowmonth < 10) { $nowmonth = "0$nowmonth"; } |
382 | if ($nowday < 10) { $nowday = "0$nowday"; } |
383 | if ($nowhour < 10) { $nowhour = "0$nowhour"; } |
384 | if ($nowmin < 10) { $nowmin = "0$nowmin"; } |
385 | if ($nowsec < 10) { $nowsec = "0$nowsec"; } |
386 | # Get tomorrow time (will be used to discard some record with corrupted date (future date)) |
387 | my ($tomorrowsec,$tomorrowmin,$tomorrowhour,$tomorrowday,$tomorrowmonth,$tomorrowyear) = localtime($nowtime+86400); |
388 | if ($tomorrowyear < 100) { $tomorrowyear+=2000; } else { $tomorrowyear+=1900; } |
389 | my $tomorrowsmallyear=$tomorrowyear;$tomorrowsmallyear =~ s/^..//; |
390 | if (++$tomorrowmonth < 10) { $tomorrowmonth = "0$tomorrowmonth"; } |
391 | if ($tomorrowday < 10) { $tomorrowday = "0$tomorrowday"; } |
392 | if ($tomorrowhour < 10) { $tomorrowhour = "0$tomorrowhour"; } |
393 | if ($tomorrowmin < 10) { $tomorrowmin = "0$tomorrowmin"; } |
394 | if ($tomorrowsec < 10) { $tomorrowsec = "0$tomorrowsec"; } |
395 | my $timetomorrow=$tomorrowyear.$tomorrowmonth.$tomorrowday.$tomorrowhour.$tomorrowmin.$tomorrowsec; |
396 | |
397 | # Init other parameters |
398 | $NBOFLINESFORBENCHMARK--; |
399 | if ($ENV{"GATEWAY_INTERFACE"}) { $DirCgi=''; } |
400 | if ($DirCgi && !($DirCgi =~ /\/$/) && !($DirCgi =~ /\\$/)) { $DirCgi .= '/'; } |
401 | if (! $DirData || $DirData eq '.') { $DirData=$DIR; } # If not defined or choosed to "." value then DirData is current dir |
402 | if (! $DirData) { $DirData='.'; } # If current dir not defined then we put it to "." |
403 | $DirData =~ s/\/$//; |
404 | |
405 | #my %monthlib = ( "01","$Message[60]","02","$Message[61]","03","$Message[62]","04","$Message[63]","05","$Message[64]","06","$Message[65]","07","$Message[66]","08","$Message[67]","09","$Message[68]","10","$Message[69]","11","$Message[70]","12","$Message[71]" ); |
406 | # monthnum must be in english because it's used to translate log date in apache log files which are always in english |
407 | my %monthnum = ( "Jan","01","jan","01","Feb","02","feb","02","Mar","03","mar","03","Apr","04","apr","04","May","05","may","05","Jun","06","jun","06","Jul","07","jul","07","Aug","08","aug","08","Sep","09","sep","09","Oct","10","oct","10","Nov","11","nov","11","Dec","12","dec","12" ); |
408 | |
409 | if ($DNSCache) { |
410 | if ($Debug) { debug("Load DNS Cache file $DNSCache",2); } |
411 | open(CACHE, "<$DNSCache") or error("Can't open cache file $DNSCache"); |
412 | while (<CACHE>) { |
413 | my ($time, $ip, $name) = split; |
414 | if ($ip && $name) { |
415 | $name="$ip" if $name eq '*'; |
416 | $MyDNSTable{$ip}=$name; |
417 | } |
418 | } |
419 | close CACHE; |
420 | } |
421 | |
422 | #----------------------------------------------------------------------------- |
423 | # PROCESSING CURRENT LOG(s) |
424 | #----------------------------------------------------------------------------- |
425 | my $NbOfLinesRead=0; |
426 | my $NbOfLinesParsed=0; |
427 | my $logfilechosen=0; |
428 | my $starttime=time(); |
429 | |
430 | # Define the LogFileToDo list |
431 | $cpt=1; |
432 | foreach my $key (0..(@ParamFile-1)) { |
433 | if ($ParamFile[$key] !~ /\*/ && $ParamFile[$key] !~ /\?/) { |
434 | |
435 | if ($Debug) { debug("DBG1 Log file $ParamFile[$key] is added to LogFileToDo with number $cpt."); } |
436 | # Check for supported compression |
437 | if ($ParamFile[$key] =~ /$zcat_file/) { |
438 | if ($Debug) { debug("GZIP compression detected for Log file $ParamFile[$key]."); } |
439 | # Modify the name to include the zcat command |
440 | $ParamFile[$key] = $zcat . ' ' . $ParamFile[$key] . ' |'; |
441 | } |
442 | elsif ($ParamFile[$key] =~ /$bzcat_file/) { |
443 | if ($Debug) { debug("BZ2 compression detected for Log file $ParamFile[$key]."); } |
444 | # Modify the name to include the bzcat command |
445 | $ParamFile[$key] = $bzcat . ' ' . $ParamFile[$key] . ' |'; |
446 | } |
447 | |
448 | $LogFileToDo{$cpt}=@ParamFile[$key]; |
449 | $cpt++; |
450 | |
451 | } |
452 | else { |
453 | my $DirFile=$ParamFile[$key]; $DirFile =~ s/([^\/\\]*)$//; |
454 | $ParamFile[$key] = $1; |
455 | if ($DirFile eq '') { $DirFile = '.'; } |
456 | $ParamFile[$key] =~ s/\./\\\./g; |
457 | $ParamFile[$key] =~ s/\*/\.\*/g; |
458 | $ParamFile[$key] =~ s/\?/\./g; |
459 | if ($Debug) { debug("Search for file \"$ParamFile[$key]\" into \"$DirFile\""); } |
460 | opendir(DIR,"$DirFile"); |
461 | my @filearray = sort readdir DIR; |
462 | close DIR; |
463 | foreach my $i (0..$#filearray) { |
464 | if ("$filearray[$i]" =~ /^$ParamFile[$key]$/ && "$filearray[$i]" ne "." && "$filearray[$i]" ne "..") { |
465 | |
466 | if ($Debug) { debug("DBG2 Log file $filearray[$i] is added to LogFileToDo with number $cpt."); } |
467 | # Check for supported compression |
468 | if ($filearray[$i] =~ /$zcat_file/) { |
469 | if ($Debug) { debug("GZIP compression detected for Log file $filearray[$i]."); } |
470 | # Modify the name to include the zcat command |
471 | $LogFileToDo{$cpt}=$zcat . ' ' . "$DirFile/$filearray[$i]" . ' |'; |
472 | } |
473 | elsif ($filearray[$i] =~ /$bzcat_file/) { |
474 | if ($Debug) { debug("BZ2 compression detected for Log file $filearray[$i]."); } |
475 | # Modify the name to include the bzcat command |
476 | $LogFileToDo{$cpt}=$bzcat . ' ' . "$DirFile/$filearray[$i]" . ' |'; |
477 | } |
478 | else { |
479 | $LogFileToDo{$cpt}="$DirFile/$filearray[$i]"; |
480 | } |
481 | $cpt++; |
482 | |
483 | } |
484 | } |
485 | } |
486 | } |
487 | |
488 | # If no files to process |
489 | if (scalar keys %LogFileToDo == 0) { |
490 | error("No input log file found"); |
491 | } |
492 | |
493 | # Open all log files |
494 | if ($Debug) { debug("Start of processing ".(scalar keys %LogFileToDo)." log file(s), $MaxNbOfThread threads max"); } |
495 | foreach my $logfilenb (keys %LogFileToDo) { |
496 | if ($Debug) { debug("Open log file number $logfilenb: \"$LogFileToDo{$logfilenb}\""); } |
497 | if ($IgnoreMissing){ |
498 | if (!open("LOG$logfilenb","$LogFileToDo{$logfilenb}")){ |
499 | debug("Couldn't open log file \"$LogFileToDo{$logfilenb}\" : $!"); |
500 | delete $LogFileToDo{$logfilenb}; |
501 | } |
502 | }else{ |
503 | open("LOG$logfilenb","$LogFileToDo{$logfilenb}") || error("Couldn't open log file \"$LogFileToDo{$logfilenb}\" : $!"); |
504 | } |
505 | binmode "LOG$logfilenb"; # To avoid pb of corrupted text log files with binary chars. |
506 | } |
507 | |
508 | $QueueCursor=1; |
509 | STOPONFIRSTEOF: while (1 == 1) |
510 | { |
511 | # BEGIN Read new record |
512 | # For each log file if logfilechosen is 0 |
513 | # If not, we go directly to log file instead of iterating over all keys for a match |
514 | #---------------------------------------------------------------------------------- |
515 | my @readlist; |
516 | if($logfilechosen == 0) { |
517 | @readlist = keys %LogFileToDo; |
518 | } else { |
519 | @readlist = ($logfilechosen); |
520 | } |
521 | foreach my $logfilenb (@readlist) |
522 | { |
523 | if ($Debug) { debug("Search next record in file number $logfilenb",3); } |
524 | # Read chosen log file until we found a record with good date or reaching end of file |
525 | while (1 == 1) { |
526 | my $LOG="LOG$logfilenb"; |
527 | $_=<$LOG>; # Read new line |
528 | if (! $_) |
529 | { # No more records in log file number $logfilenb |
530 | if ($Debug) { debug(" No more records in file number $logfilenb",2); } |
531 | delete $LogFileToDo{$logfilenb}; |
532 | if ($StopOnFirstEof) |
533 | { |
534 | if ($Debug) { debug("Exiting loop due to EOF of logfile $logfilenb",1); } |
535 | last STOPONFIRSTEOF; |
536 | } |
537 | last; |
538 | } |
539 | |
540 | # Get the latest Fields header for printing IIS and W3C logs |
541 | if ($PrintFields && $_ =~ m/#Fields:/){ |
542 | my $field = $_; |
543 | # strip whitespace |
544 | $field =~ s/^\s+|\s+$//g; |
545 | if (!$Fields[$logfilenb] || $field != $Fields[$logfilenb]){ |
546 | $Fields[$logfilenb] = $field; |
547 | debug("Found new fields in $logfilenb: $Fields[$logfilenb]"); |
548 | } |
549 | } |
550 | |
551 | $NbOfLinesRead++; |
552 | chomp $_; s/\r$//; |
553 | |
554 | if (/^#/) { next; } # Ignore comment lines (ISS writes such comments) |
555 | if (/^!!/) { next; } # Ignore comment lines (Webstar writes such comments) |
556 | if (/^$/) { next; } # Ignore blank lines (With ISS: happens sometimes, with Apache: possible when editing log file) |
557 | |
558 | $linerecord{$logfilenb}=$_; |
559 | |
560 | # Check filters |
561 | #---------------------------------------------------------------------- |
562 | |
563 | # Split YYYY-MM-DD HH:MM:SS |
564 | # or DD/Month/YYYY:HH:MM:SS |
565 | # or MM/DD/YY\tHH:MM:SS |
566 | # or 9999.999 |
567 | # or Month DD HH:MM:SS |
568 | my $year=0; my $month=0; my $day=0; my $hour=0; my $minute=0; my $second=0; |
569 | if ($_ =~ /(\d\d\d\d)-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)/) { $year=$1; $month=$2; $day=$3; $hour=$4; $minute=$5; $second=$6; } |
570 | elsif ($_ =~ /\[(\d?\d)[\/:\s](\w+)[\/:\s](\d\d\d\d)[\/:\s](\d\d)[\/:\s](\d\d)[\/:\s](\d\d) /) { $year=$3; $month=$2; $day=$1; $hour=$4; $minute=$5; $second=$6; } |
571 | elsif ($_ =~ /\w+ (\w+) {1,2}(\d?\d) (\d\d)[\/:\s](\d\d)[\/:\s](\d\d) (\d\d\d\d)/) { $year=$6; $month=$1; $day=$2; $hour=$3; $minute=$4; $second=$5; } |
572 | elsif ($_ =~ /^(\d\d\d\d+\.\d\d\d) /) |
573 | { |
574 | my $timetime = strftime('%Y-%m-%d-%T', gmtime($1)); |
575 | $timetime =~ /(\d\d\d\d)-(\d\d)-(\d\d)-(\d\d):(\d\d):(\d\d)/; |
576 | $year=$1; $month=$2; $day=$3; $hour=$4; $minute=$5; $second=$6; |
577 | } |
578 | elsif ($_ =~ /(\w+)\s\s?(\d?\d) (\d\d):(\d\d):(\d\d) /) { # Month DD HH:MM:SS |
579 | $month=$1; $day=$2; $hour=$3; $minute=$4; $second=$5; |
580 | if (($monthnum{$month}>$monthnum{$nowmonth}) || ($monthnum{$month}==$monthnum{$nowmonth} && $day>$nowday)) { |
581 | $year=$nowyear-1; |
582 | } |
583 | else { $year=$nowyear; } |
584 | } |
585 | if (length $day == 1) { $day = "0".$day; } |
586 | |
587 | if ($monthnum{$month}) { $month=$monthnum{$month}; } # Change lib month in num month if necessary |
588 | |
589 | # Create $timerecord like YYYYMMDDHHMMSS |
590 | $timerecord{$logfilenb}=int("$year$month$day$hour$minute$second"); |
591 | if ($timerecord{$logfilenb}<10000000000000) { |
592 | if ($Debug) { debug(" This record is corrupted (no date found)",3); } |
593 | $corrupted{$logfilenb}++; |
594 | next; |
595 | } |
596 | if ($Debug) { debug(" This is next record for file $logfilenb : timerecord=$timerecord{$logfilenb}",3); } |
597 | |
598 | # Sort and insert into timerecordorder, oldest at end/back of array |
599 | # At the beginning, timerecordorder is empty. Then beceause the first pass is |
600 | # a loop on each file to read each first line, the timerecordorder size is |
601 | # number of input files. |
602 | # After, each new loop, read only one new line, so timerecordorder size increase |
603 | # by one but decrease just after by the pop command later. |
604 | my $inserted=0; |
605 | for(my $c=$#timerecordorder; $c>=0 ; $c--) { |
606 | if($timerecord{$logfilenb} <= $timerecord{$timerecordorder[$c]}) |
607 | { |
608 | # Is older or equal than index at $c, add after |
609 | $timerecordorder[$c + 1]=$logfilenb; |
610 | $inserted = 1; |
611 | last; |
612 | } else { |
613 | $timerecordorder[$c + 1]=$timerecordorder[$c]; |
614 | } |
615 | } |
616 | if(! $inserted) { |
617 | $timerecordorder[0] = $logfilenb; |
618 | } |
619 | |
620 | last; |
621 | } |
622 | } |
623 | # END Read new lines for each log file. After this, following var are filled |
624 | # $timerecord{$logfilenb} |
625 | # @timerecordorder array |
626 | |
627 | # We choose which record of which log file to process |
628 | if ($Debug) { debug("Choose which record of which log file to process",3); } |
629 | $logfilechosen=pop(@timerecordorder); |
630 | if(!defined($logfilechosen)) { last; } # No more record to process |
631 | |
632 | # Record is chosen |
633 | if ($Debug) { debug(" We choosed to qualify record of file number $logfilechosen",3); } |
634 | if ($Debug) { debug(" Record is $linerecord{$logfilechosen}",3); } |
635 | |
636 | # Record is approved. We found a new line to parse in file number $logfilechosen |
637 | #------------------------------------------------------------------------------- |
638 | $NbOfLinesParsed++; |
639 | if ($ShowSteps) { |
640 | if ((++$NbOfLinesShowsteps & $NBOFLINESFORBENCHMARK) == 0) { |
641 | my $delay=(time()-$starttime)||1; |
642 | print STDERR "$NbOfLinesParsed lines processed (".(1000*$delay)." ms, ".int($NbOfLinesShowsteps/$delay)." lines/seconds)\n"; |
643 | } |
644 | } |
645 | |
646 | # Do DNS lookup |
647 | #-------------------- |
648 | my $Host=''; |
649 | my $ip=0; |
650 | if ($DNSLookup) { # DNS lookup is 1 or 2 |
651 | if ($linerecord{$logfilechosen} =~ /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/) { $ip=4; $Host=$1; } # IPv4 |
652 | elsif ($linerecord{$logfilechosen} =~ /([0-9A-F]*:)/i) { $ip=6; $Host=$1; } # IPv6 |
653 | if ($ip) { |
654 | # Check in static DNS cache file |
655 | if ($MyDNSTable{$Host}) { |
656 | if ($Debug) { debug(" DNS lookup asked for $Host and found in static DNS cache file: $MyDNSTable{$Host}",4); } |
657 | } |
658 | elsif ($DNSLookup==1) { |
659 | # Check in session cache (dynamic DNS cache file + session DNS cache) |
660 | if (! $threadarray{$Host} && ! $TmpDNSLookup{$Host}) { |
661 | if (@SkipDNSLookupFor && &SkipDNSLookup($Host)) { |
662 | $TmpDNSLookup{$Host}='*'; |
663 | if ($Debug) { debug(" No need of reverse DNS lookup for $Host, skipped at user request.",4); } |
664 | } |
665 | else { |
666 | if ($ip == 4) { |
667 | # Create or not a new thread |
668 | if ($MaxNbOfThread) { |
669 | if (! $threadarray{$Host}) { # No thread already launched for $Host |
670 | while ((scalar keys %threadarray) >= $MaxNbOfThread) { |
671 | if ($Debug) { debug(" $MaxNbOfThread thread running reached, so we wait",4); } |
672 | sleep 1; |
673 | } |
674 | $threadarray{$Host}=1; # Semaphore to tell thread for $Host is active |
675 | # my $t = new Thread \&MakeDNSLookup, $Host; |
676 | my $t = threads->create(sub { MakeDNSLookup($Host) }); |
677 | if (! $t) { error("Failed to create new thread"); } |
678 | if ($Debug) { debug(" Reverse DNS lookup for $Host queued in thread ".$t->tid,4); } |
679 | $t->detach(); # We don't need to keep return code |
680 | } |
681 | else { |
682 | if ($Debug) { debug(" Reverse DNS lookup for $Host already queued in a thread"); } |
683 | } |
684 | # Here, this is the only way, $TmpDNSLookup{$Host} can be not defined |
685 | } else { |
686 | &MakeDNSLookup($Host); |
687 | if ($Debug) { debug(" Reverse DNS lookup for $Host done: $TmpDNSLookup{$Host}",4); } |
688 | } |
689 | } |
690 | elsif ($ip == 6) { |
691 | $TmpDNSLookup{$Host}='*'; |
692 | if ($Debug) { debug(" Reverse DNS lookup for $Host not available for IPv6",4); } |
693 | } |
694 | } |
695 | } else { |
696 | if ($Debug) { debug(" Reverse DNS lookup already queued or done for $Host: $TmpDNSLookup{$Host}",4); } |
697 | } |
698 | } |
699 | else { |
700 | if ($Debug) { debug(" DNS lookup by static DNS cache file asked for $Host but not found.",4); } |
701 | } |
702 | } |
703 | else { |
704 | if ($Debug) { debug(" DNS lookup asked for $Host but this is not an IP address.",4); } |
705 | $DNSLookupAlreadyDone=$LogFileToDo{$logfilechosen}; |
706 | } |
707 | } |
708 | else { |
709 | if ($linerecord{$logfilechosen} =~ /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/) { $ip=4; $Host=$1; } # IPv4 |
710 | elsif ($linerecord{$logfilechosen} =~ /([0-9A-F]*:)/i) { $ip=6; $Host=$1; } # IPv6 |
711 | if ($Debug) { debug(" No DNS lookup asked.",4); } |
712 | } |
713 | |
714 | # Put record in record queue |
715 | if ($Debug) { debug("Add record $NbOfLinesParsed in record queue (with host to resolve = ".($Host?$Host:'*').")",4); } |
716 | $QueueRecords{$NbOfLinesParsed}=$linerecord{$logfilechosen}; |
717 | |
718 | # Put record in host queue |
719 | # If there is a host to resolve, we add line to queue with value of host to resolve |
720 | # $Host is '' (no ip found) or is ip |
721 | if ($DNSLookup==0) { |
722 | $QueueHostsToResolve{$NbOfLinesParsed}='*'; |
723 | } |
724 | if ($DNSLookup==1) { |
725 | $QueueHostsToResolve{$NbOfLinesParsed}=$Host?$Host:'*'; |
726 | } |
727 | if ($DNSLookup==2) { |
728 | $QueueHostsToResolve{$NbOfLinesParsed}=$MyDNSTable{$Host}?$Host:'*'; |
729 | } |
730 | |
731 | # Print all records in head of queue that are ready |
732 | &WriteRecordsReadyInQueue($logfilechosen); |
733 | |
734 | } # End of processing new record. Loop on next one. |
735 | |
736 | if ($Debug) { debug("End of processing log file(s)"); } |
737 | |
738 | # Close all log files |
739 | foreach my $logfilenb (keys %LogFileToDo) { |
740 | if ($Debug) { debug("Close log file number $logfilenb"); } |
741 | close("LOG$logfilenb") || error("Command for pipe '$LogFileToDo{$logfilenb}' failed"); |
742 | } |
743 | |
744 | while ( $QueueHostsToResolve{$QueueCursor} && $QueueHostsToResolve{$QueueCursor} ne '*' && ! $MyDNSTable{$QueueHostsToResolve{$QueueCursor}} && ! $TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}} ) { |
745 | sleep 1; |
746 | # Print all records in head of queue that are ready |
747 | &WriteRecordsReadyInQueue($logfilechosen); |
748 | } |
749 | |
750 | # Waiting queue is empty |
751 | if ($MaxNbOfThread) { |
752 | foreach my $t (threads->list()) { |
753 | if ($Debug) { debug("Join thread $t"); } |
754 | $t->join(); |
755 | } |
756 | } |
757 | |
758 | # DNSLookup warning |
759 | if ($DNSLookup==1 && $DNSLookupAlreadyDone) { |
760 | warning("Warning: $PROG has detected that some host names were already resolved in your logfile $DNSLookupAlreadyDone.\nIf DNS lookup was already made by the logger (web server) in ALL your log files, you should not use -dnslookup option to increase $PROG speed."); |
761 | } |
762 | |
763 | if ($Debug) { |
764 | debug("Total nb of read lines: $NbOfLinesRead"); |
765 | debug("Total nb of parsed lines: $NbOfLinesParsed"); |
766 | debug("Total nb of DNS lookup asked: $NbOfDNSLookupAsked"); |
767 | } |
768 | |
769 | #if ($DNSCache) { |
770 | # open(CACHE, ">$DNSCache") or die; |
771 | # foreach (keys %TmpDNSLookup) { |
772 | # $TmpDNSLookup{$_}="*" if $TmpDNSLookup{$_} eq "ip"; |
773 | # print CACHE "0\t$_\t$TmpDNSLookup{$_}\n"; |
774 | # } |
775 | # close CACHE; |
776 | #} |
777 | |
778 | 0; # Do not remove this line |