Changesets can be listed by changeset number.
The Git repository is here.
- Revision:
- 297
- Log:
Updated to AWStats 7.0.
- Author:
- rool
- Date:
- Fri Mar 18 13:33:29 +0000 2011
- Size:
- 12725 Bytes
- Properties:
- Property svn:executable is set
1 | #!/usr/bin/perl |
2 | #----------------------------------------------------------------------------- |
3 | # Export lib data values to a text files to allow to use AWStats robots, |
4 | # os, browsers, search_engines database with other log analyzers |
5 | #----------------------------------------------------------------------------- |
6 | # $Revision: 1.6 $ - $Author: eldy $ - $Date: 2010/01/20 17:39:20 $ |
7 | |
8 | #use warnings; # Must be used in test mode only. This reduce a little process speed |
9 | #use diagnostics; # Must be used in test mode only. This reduce a lot of process speed |
10 | use strict;no strict "refs"; |
11 | |
12 | |
13 | |
14 | #----------------------------------------------------------------------------- |
15 | # Defines |
16 | #----------------------------------------------------------------------------- |
17 | use vars qw/ $REVISION $VERSION /; |
18 | my $REVISION='$Revision: 1.6 $'; $REVISION =~ /\s(.*)\s/; $REVISION=$1; |
19 | my $VERSION="5.1 (build $REVISION)"; |
20 | |
21 | # ---------- Init variables ------- |
22 | # Constants |
23 | use vars qw/ |
24 | $DEBUGFORCED |
25 | /; |
26 | $DEBUGFORCED=0; # Force debug level to log lesser level into debug.log file (Keep this value to 0) |
27 | # Running variables |
28 | use vars qw/ |
29 | $DIR $PROG $Extension |
30 | $Debug |
31 | $DebugResetDone |
32 | /; |
33 | $DIR=$PROG=$Extension=''; |
34 | $Debug=0; |
35 | $DebugResetDone=0; |
36 | use vars qw/ |
37 | $LevelForRobotsDetection $LevelForBrowsersDetection $LevelForOSDetection $LevelForRefererAnalyze |
38 | $LevelForSearchEnginesDetection $LevelForKeywordsDetection |
39 | /; |
40 | ($LevelForRobotsDetection, $LevelForBrowsersDetection, $LevelForOSDetection, $LevelForRefererAnalyze, |
41 | $LevelForSearchEnginesDetection, $LevelForKeywordsDetection)= |
42 | (2,1,1,1,1,1); |
43 | use vars qw/ |
44 | $DirLock $DirCgi $DirData $DirIcons $DirLang $AWScript $ArchiveFileName |
45 | $AllowAccessFromWebToFollowingIPAddresses $HTMLHeadSection $HTMLEndSection $LinksToWhoIs $LinksToIPWhoIs |
46 | $LogFile $LogFormat $LogSeparator $Logo $LogoLink $StyleSheet $WrapperScript $SiteDomain |
47 | /; |
48 | ($DirLock, $DirCgi, $DirData, $DirIcons, $DirLang, $AWScript, $ArchiveFileName, |
49 | $AllowAccessFromWebToFollowingIPAddresses, $HTMLHeadSection, $HTMLEndSection, $LinksToWhoIs, $LinksToIPWhoIs, |
50 | $LogFile, $LogFormat, $LogSeparator, $Logo, $LogoLink, $StyleSheet, $WrapperScript, $SiteDomain)= |
51 | ("","","","","","","","","","","","","","","","","","","",""); |
52 | use vars qw/ |
53 | $QueryString $LibToExport $ExportFormat |
54 | /; |
55 | ($QueryString, $LibToExport, $ExportFormat)= |
56 | ('','',''); |
57 | # ---------- Init arrays -------- |
58 | use vars qw/ |
59 | @RobotsSearchIDOrder_list1 @RobotsSearchIDOrder_list2 @RobotsSearchIDOrder_listgen |
60 | @SearchEnginesSearchIDOrder_list1 @SearchEnginesSearchIDOrder_list2 @SearchEnginesSearchIDOrder_listgen |
61 | @BrowsersSearchIDOrder @OSSearchIDOrder @WordsToExtractSearchUrl @WordsToCleanSearchUrl |
62 | @WormsSearchIDOrder |
63 | @RobotsSearchIDOrder @SearchEnginesSearchIDOrder |
64 | /; |
65 | @RobotsSearchIDOrder = @SearchEnginesSearchIDOrder = (); |
66 | # ---------- Init hash arrays -------- |
67 | use vars qw/ |
68 | %BrowsersHashIDLib %BrowsersHashIcon %BrowsersHereAreGrabbers |
69 | %DomainsHashIDLib |
70 | %MimeHashLib %MimeHashIcon %MimeHashFamily |
71 | %OSHashID %OSHashLib |
72 | %RobotsHashIDLib |
73 | %SearchEnginesHashID %SearchEnginesHashLib %SearchEnginesKnownUrl %NotSearchEnginesKeys |
74 | %WormsHashID %WormsHashLib |
75 | /; |
76 | |
77 | |
78 | |
79 | #----------------------------------------------------------------------------- |
80 | # Functions |
81 | #----------------------------------------------------------------------------- |
82 | |
83 | #------------------------------------------------------------------------------ |
84 | # Function: Write error message and exit |
85 | # Parameters: $message $secondmessage $thirdmessage $donotshowsetupinfo |
86 | # Input: $LogSeparator $LogFormat |
87 | # Output: None |
88 | # Return: None |
89 | #------------------------------------------------------------------------------ |
90 | sub error { |
91 | my $message=shift||""; |
92 | my $secondmessage=shift||""; |
93 | my $thirdmessage=shift||""; |
94 | my $donotshowsetupinfo=shift||0; |
95 | if ($Debug) { debug("$message $secondmessage $thirdmessage",1); } |
96 | print "$message"; |
97 | print "\n"; |
98 | exit 1; |
99 | } |
100 | |
101 | #------------------------------------------------------------------------------ |
102 | # Function: Write debug message and exit |
103 | # Parameters: $string $level |
104 | # Input: $Debug = required level $DEBUGFORCED = required level forced |
105 | # Output: None |
106 | # Return: None |
107 | #------------------------------------------------------------------------------ |
108 | sub debug { |
109 | my $level = $_[1] || 1; |
110 | if ($level <= $DEBUGFORCED) { |
111 | my $debugstring = $_[0]; |
112 | if (! $DebugResetDone) { open(DEBUGFORCEDFILE,"debug.log"); close DEBUGFORCEDFILE; chmod 0666,"debug.log"; $DebugResetDone=1; } |
113 | open(DEBUGFORCEDFILE,">>debug.log"); |
114 | print DEBUGFORCEDFILE localtime(time)." - $$ - DEBUG $level - $debugstring\n"; |
115 | close DEBUGFORCEDFILE; |
116 | } |
117 | if ($level <= $Debug) { |
118 | my $debugstring = $_[0]; |
119 | print localtime(time)." - DEBUG $level - $debugstring\n"; |
120 | } |
121 | } |
122 | |
123 | |
124 | #------------------------------------------------------------------------------ |
125 | # Function: Load the reference databases |
126 | # Parameters: None |
127 | # Input: $DIR |
128 | # Output: Arrays and Hash tables are defined |
129 | # Return: None |
130 | #------------------------------------------------------------------------------ |
131 | sub Read_Ref_Data { |
132 | # Check lib files in common possible directories : |
133 | # Windows : "${DIR}lib" (lib in same dir than awstats.pl) |
134 | # Debian package : "/usr/share/awstats/lib" |
135 | # Other possible directories : "./lib" |
136 | my $lib=shift; |
137 | my $dir=$lib; |
138 | $lib=~ s/^.*[\\\/]//; |
139 | $dir =~ s/[^\\\/]+$//; $dir =~ s/[\\\/]+$//; |
140 | debug("Lib: $lib, Dir: $dir"); |
141 | my @PossibleLibDir=("$dir","{DIR}lib","/usr/share/awstats/lib","./lib"); |
142 | |
143 | my %FilePath=(); |
144 | my @FileListToLoad=(); |
145 | push @FileListToLoad, "$lib"; |
146 | foreach my $file (@FileListToLoad) { |
147 | foreach my $dir (@PossibleLibDir) { |
148 | my $searchdir=$dir; |
149 | if ($searchdir && (!($searchdir =~ /\/$/)) && (!($searchdir =~ /\\$/)) ) { $searchdir .= "/"; } |
150 | if (! $FilePath{$file}) { |
151 | if (-s "${searchdir}${file}") { |
152 | $FilePath{$file}="${searchdir}${file}"; |
153 | if ($Debug) { debug("Call to Read_Ref_Data [FilePath{$file}=\"$FilePath{$file}\"]"); } |
154 | # push @INC, "${searchdir}"; require "${file}"; |
155 | require "$FilePath{$file}"; |
156 | } |
157 | } |
158 | } |
159 | if (! $FilePath{$file}) { |
160 | my $filetext=$file; $filetext =~ s/\.pm$//; $filetext =~ s/_/ /g; |
161 | &error("Error: Can't read file \"$file\".\nCheck if file is in ".($PossibleLibDir[0])." directory and is readable."); |
162 | } |
163 | } |
164 | } |
165 | |
166 | #------------------------------------------------------------------------------ |
167 | # Function: Unregex a string |
168 | # Parameters: String |
169 | # Input: - |
170 | # Output: - |
171 | # Return: Unregexed string |
172 | #------------------------------------------------------------------------------ |
173 | sub unregex { |
174 | my $ss=shift; |
175 | $ss=~s/\\//g; |
176 | return $ss; |
177 | } |
178 | |
179 | #------------------------------------------------------------------------------ |
180 | # Function: Unregex a keyword code extractor |
181 | # Parameters: String |
182 | # Input: - |
183 | # Output: - |
184 | # Return: Unregexed string |
185 | #------------------------------------------------------------------------------ |
186 | sub unregexkeywordcode { |
187 | my $ss=shift; |
188 | my $firstoneonly=shift||0; |
189 | my @xx=split(/\|/,$ss); |
190 | my @ll=map { s/[\(\)]//g; $_; } @xx; |
191 | if ($firstoneonly) { return $ll[0]; } |
192 | return join(',',@ll); |
193 | } |
194 | |
195 | |
196 | |
197 | #------------------------------------------------------------------------------ |
198 | # MAIN |
199 | #------------------------------------------------------------------------------ |
200 | ($DIR=$0) =~ s/([^\/\\]*)$//; ($PROG=$1) =~ s/\.([^\.]*)$//; $Extension=$1; |
201 | |
202 | my @AllowedArgs=('-lib','-exportformat','-debug'); |
203 | |
204 | $QueryString=""; |
205 | for (0..@ARGV-1) { |
206 | if ($_ > 0) { $QueryString .= "&"; } |
207 | my $NewLinkParams=$ARGV[$_]; $NewLinkParams =~ s/^-+//; $NewLinkParams =~ s/\s/%20/g; |
208 | $QueryString .= "$NewLinkParams"; |
209 | } |
210 | $ExportFormat="text"; |
211 | if ($QueryString =~ /lib=([^\s&]+)/i) { $LibToExport="$1"; } |
212 | if ($QueryString =~ /exportformat=([^\s&]+)/i) { $ExportFormat="$1"; } |
213 | if ($QueryString =~ /debug=(\d+)/i) { $Debug=$1; } |
214 | |
215 | if ($Debug) { |
216 | debug("$PROG - $VERSION - Perl $^X $]",1); |
217 | debug("QUERY_STRING=$QueryString",2); |
218 | } |
219 | |
220 | if (! $LibToExport || ! $ExportFormat) { |
221 | print "----- $PROG $VERSION (c) Laurent Destailleur -----\n"; |
222 | print "$PROG is a tool to export AWStats lib (Robots, Os, Browsers, search\n"; |
223 | print "engines database) to text files. This allow you to use AWStats lib with some\n"; |
224 | print "other log analyzers (to enhance their capabilities or to make comparison).\n"; |
225 | print "$PROG comes with ABSOLUTELY NO WARRANTY. It's a free software distributed\n"; |
226 | print "with a GNU General Public License (See LICENSE file for details).\n"; |
227 | print "\n"; |
228 | print "Syntax: $PROG.$Extension -lib=/awstatslibpath/libfile.pm [-exportformat=format]\n"; |
229 | print "\n"; |
230 | print "Where format can be:\n"; |
231 | print " text (default)\n"; |
232 | print " webalizer\n"; |
233 | print " analog\n"; |
234 | print "\n"; |
235 | exit 2; |
236 | } |
237 | |
238 | &Read_Ref_Data($LibToExport); |
239 | |
240 | |
241 | my $libisexportable=0; |
242 | |
243 | # Export data |
244 | #------------ |
245 | |
246 | if ($LibToExport =~ /browsers/) { |
247 | foreach my $key (@BrowsersSearchIDOrder) { |
248 | if ($ExportFormat eq 'text') { |
249 | print "$key\t$BrowsersHashIDLib{$key}\n"; |
250 | } |
251 | if ($ExportFormat eq 'webalizer') { |
252 | print "GroupAgent\t$key\n"; |
253 | } |
254 | if ($ExportFormat eq 'analog') { |
255 | print "Analog does not support self-defined browsers.\nUse 'text' export format if you want an export list of AWStats Browsers.\n"; |
256 | last; |
257 | } |
258 | } |
259 | $libisexportable=1; |
260 | } |
261 | |
262 | if ($LibToExport =~ /mime/) { |
263 | if ($ExportFormat eq 'analog') { |
264 | foreach my $key (sort keys %MimeHashFamily) { |
265 | if ($MimeHashFamily{$key} =~ /(text|page|script|document)/) { print "PAGEINCLUDE *.$key\n"; } |
266 | } |
267 | } |
268 | foreach my $key (sort keys %MimeHashFamily) { |
269 | if ($ExportFormat eq 'text') { |
270 | print "$key\t$MimeHashLib{$MimeHashFamily{$key}}\n"; |
271 | } |
272 | if ($ExportFormat eq 'webalizer') { |
273 | print "Webalizer does not support self-defined mime types.\nUse 'text' export format if you want an export list of AWStats Mime types.\n"; |
274 | last; |
275 | } |
276 | if ($ExportFormat eq 'analog') { |
277 | print "TYPEALIAS .$key \"$key [$MimeHashLib{$MimeHashFamily{$key}}]\"\n"; |
278 | } |
279 | } |
280 | $libisexportable=1; |
281 | } |
282 | |
283 | if ($LibToExport =~ /operating_systems/) { |
284 | foreach my $key (sort keys %OSHashLib) { |
285 | if ($ExportFormat eq 'text') { |
286 | print "Feature not ready yet\n"; |
287 | last; |
288 | } |
289 | if ($ExportFormat eq 'webalizer') { |
290 | print "Webalizer does not support self-defined added OS.\nUse 'text' export format if you want an export list of AWStats OS.\n"; |
291 | last; |
292 | } |
293 | if ($ExportFormat eq 'analog') { |
294 | print "Analog does not support self-defined added OS.\nUse 'text' export format if you want an export list of AWStats OS.\n"; |
295 | last; |
296 | } |
297 | } |
298 | $libisexportable=1; |
299 | } |
300 | |
301 | if ($LibToExport =~ /robots/) { |
302 | my %robotlist=(); |
303 | |
304 | my @list; |
305 | # Init RobotsSearchIDOrder required for update process |
306 | @list=(); |
307 | foreach (1..2) { push @list,"list$_"; } |
308 | push @list,"listgen"; |
309 | foreach my $key (@list) { |
310 | push @RobotsSearchIDOrder,@{"RobotsSearchIDOrder_$key"}; |
311 | } |
312 | |
313 | foreach my $key (@RobotsSearchIDOrder) { |
314 | if ($ExportFormat eq 'text') { |
315 | print "$key\t$RobotsHashIDLib{$key}\n"; |
316 | } |
317 | if ($ExportFormat eq 'webalizer') { |
318 | print "GroupAgent\t$key\n"; |
319 | } |
320 | if ($ExportFormat eq 'analog') { |
321 | print "ROBOTINCLUDE REGEXPI:$key\n"; |
322 | } |
323 | } |
324 | $libisexportable=1; |
325 | } |
326 | |
327 | if ($LibToExport =~ /search_engines/) { |
328 | |
329 | my @list; |
330 | # Init SearchEnginesIDOrder required for update process |
331 | @list=(); |
332 | foreach (1..2) { push @list,"list$_"; } |
333 | push @list,"listgen"; # Always added |
334 | foreach my $key (@list) { |
335 | push @SearchEnginesSearchIDOrder,@{"SearchEnginesSearchIDOrder_$key"}; |
336 | } |
337 | |
338 | foreach my $key (@SearchEnginesSearchIDOrder) { |
339 | if ($ExportFormat eq 'text') { |
340 | print "$key\t$SearchEnginesKnownUrl{$SearchEnginesHashID{$key}}\t$SearchEnginesHashLib{$SearchEnginesHashID{$key}}\n"; |
341 | } |
342 | if ($ExportFormat eq 'webalizer') { |
343 | my $urlkeywordsyntax=$SearchEnginesKnownUrl{$SearchEnginesHashID{$key}}; |
344 | $urlkeywordsyntax=&unregexkeywordcode($urlkeywordsyntax,1); |
345 | if (! $urlkeywordsyntax) { next; } # This has no keywordextractcode |
346 | my $newkey=&unregex($key); |
347 | if ($newkey =~ /[\[\]\(\)\|\?\*\+]/) { next; } # This was a regex value that i can't clean |
348 | print "SearchEngine\t$newkey\t$urlkeywordsyntax\n"; |
349 | print "GroupReferrer\t$newkey\t$SearchEnginesHashLib{$SearchEnginesHashID{$key}}\n"; |
350 | } |
351 | if ($ExportFormat eq 'analog') { |
352 | my $urlkeywordsyntax=$SearchEnginesKnownUrl{$SearchEnginesHashID{$key}}; |
353 | $urlkeywordsyntax=~s/=$//; |
354 | $urlkeywordsyntax=&unregexkeywordcode($urlkeywordsyntax); |
355 | if (! $urlkeywordsyntax) { next; } # This has no keywordextractcode |
356 | my $newkey=&unregex($key); |
357 | if ($newkey =~ /[\[\]\(\)\|\?\*\+]/) { next; } # This was a regex value that i can't clean |
358 | print "SEARCHENGINE http://*$newkey*/* $urlkeywordsyntax\n"; |
359 | } |
360 | } |
361 | $libisexportable=1; |
362 | } |
363 | |
364 | if (! $libisexportable) { |
365 | print "Export for AWStats lib '$LibToExport' is not supported in this tool version.\n"; |
366 | } |
367 | |
368 | |
369 | 0; # Do not remove this line |
370 |