#!/usr/local/bin/perl # Copyright (c) 1997, 1998 Gabor Egressy gabor@vmunix.com # All rights reserved. All wrongs reversed. This program is free # software; you can redistribute it and/or modify it under the same # terms as Perl itself. # # Can analyze files compressed with gzip, compress, bzip and bzip2 as # long as those programs exist on the system. require 5.003; use strict; use Getopt::Std; use vars qw($machine $date $url $code $cnt); use vars qw($i $j $prevdate $hits $days $opt); use vars qw(@urls @murls %urls %hits %opts %dates); `stty -echo`; $SIG{INT} = sub { `stty echo`; print "\n"; exit; }; $| = 1; $opt = getopts "Hhqf:m:n:r:v:",\%opts; if(! $opt || defined $opts{'h'} || defined $opts{'H'}) { $0 = substr $0, rindex($0,'/') + 1; print <<"PROG_USAGE"; Usage : $0 [-q] [-n num] [-v num] [-m num] [-f log-file] [-r regex] \t-n num\t\tspecifies the number of pages to tally the hits for \t\t\tdefault is 15 \t-v num\t\tprints the last num hits \t-m num\t\tprints num miscellaneous hits \t-f file\t\tspecify the name of log file, \t\t\tthe default is the environment variable APACHE_LOG \t-r regex\tspecify the regular expression to search for \t\t\tin the html file names, . matches everything \t\t\tby default the script looks for user's hits \t-q\t\tquote regex metacharacters, this is needed if you use \t\t\t-r and don't know what the perl regex metacharacters are eg. $0 -v 10 -n 25 -m 20 -r \\~$ENV{USER} The above will print the last 10 hits to your web pages as well as the top 25 hits and the last 10 miscellaneous hits and the total number of miscellaneous hits using the environement variable APACHE_LOG as the name of the log-file, the -r argument is redundant since it's the default $0 -m 10 -v 25 -f -r '~$ENV{USER}' The above will print the top 15 html pages, the last 25 hits to your web pages and the last 10 non-html hits to your web pages, using 'log-file' as the name of the log-file, the -r argument is redundant since it's the default $0 -r . will print the top 15 hits to the website, the dot(.) is the regex to match anything Can anylyze files that were compressed with gzip, compress, bzip2, or bzip. PROG_USAGE exit; } do { print "You need to set the environemnt variable APACHE_LOG\n". "or specify the log file with -f\n"; exit; } unless defined $ENV{APACHE_LOG} || defined $opts{'f'}; $opts{'f'} = $ENV{APACHE_LOG} unless defined $opts{'f'}; $opts{'m'} = undef unless defined $opts{'m'} && $opts{'m'} =~ /^\d+$/; $opts{'n'} = 15 unless defined $opts{'n'} && $opts{'n'} =~ /^\d+$/; $opts{'r'} = '~' . (getpwuid $<)[0] unless defined $opts{'r'}; $opts{'r'} = quotemeta $opts{'r'} if defined $opts{'q'}; $opts{'v'} = undef unless defined $opts{'v'} && $opts{'v'} =~ /^\d+$/; sub commafy { for (@_) { 1 while s/(\d)(\d\d\d)(?!\d)/$1,$2/; 1 while s/(\.\d+),(?!\s)/$1/; } @_; } $? = 0; # file is gzipped or compressed if($opts{'f'} =~ /(\.gz|\.Z)$/) { -s "$opts{'f'}" or die "$!"; open FILE,"zcat $opts{'f'} |" or die "Error : $opts{'f'} : $!"; } # file is a bzip2 file elsif($opts{'f'} =~ /\.bz2$/) { -s "$opts{'f'}" or die "$!"; open FILE,"bzip2 -dc $opts{'f'} |" or die "Error : $opts{'f'} : $!"; } # file is a bzip file elsif($opts{'f'} =~ /\.bz$/) { -s "$opts{'f'}" or die "$!"; open FILE,"bzip -dc $opts{'f'} |" or die "Error : $opts{'f'} : $!"; } # 'regular' file else { open FILE,"$opts{'f'}" or die "Error : $opts{'f'} : $!"; } die "Error : $opts{'f'} : $!" if $?; $i = 1; # used as a boolean for the loop below for first date read $prevdate = ''; my($perc,$was); my $size = -s FILE; print " 0%" if $size > 0; # parse the file and grab relevant info while() { if($size > 0 && -t STDOUT) { $perc = int ( ( tell( FILE ) / $size ) * 100 ); if($was != $perc) {# && $perc < 100) { printf "\b\b\b%2d%%",$perc; $was = $perc; } } ($machine,$date,$url,$code) = m#([^ ]+)[^\[]+\[([^ ]+)[^"]+[^/]+([^ ]+)[^"]+[^\d]+(\d\d\d)#; next unless defined $machine && defined $date && defined $url && defined $code; next unless $code =~ /^2/; # code must be a 200 level code to be a valid hit $url =~ s/%([0-9a-fA-F][0-9a-fA-F])/pack "H2",$1/eg; # decode URL ++$days unless $prevdate eq substr $date,0,6; # count number of days ($prevdate) = $date =~ m#^(\d\d/\w\w\w)#; # save date # get a count of all '.htm[l]' and '.pdf' files hits if($url =~ m#(?:/|\..?html?|\.pdf)$#i) { # -v on command line specified if(defined $opts{'v'} && $url =~ m#$opts{'r'}#o) { push @urls,[$machine,$date,$url]; shift @urls if --$opts{'v'} < 0; } ++$urls{$url}; # count hits to each URL ++$hits{html}; # count total html/pdf hits } # get a count of graphics files hits elsif($url =~ m#(?:\.gif|\.jpg|\.jpeg)$#i) { ++$hits{graphics}; } # get count of java hits elsif($url =~ m#\.class$#i) { ++$hits{java}; } # miscellaneous hits elsif(defined $opts{'m'}) { if($url =~ m#$opts{'r'}#o) { ++$hits{misc}; push @murls, [$machine,$date,$url]; shift @murls if --$opts{'m'} < 0; } } # save the start date if($i) { ($dates{sdate},$dates{stime}) = $date =~ /([^:]+):(.+)/; $i = 0; } ++$hits{all}; # get a count of all the valid hits ++$cnt; if($size == 0 && -t STDOUT) { print '.' if $cnt % 5000 == 0 && $cnt % 25000 != 0; print '|' if $cnt % 25000 == 0; } } close FILE; print STDERR "\n"; # save the end date ($dates{edate},$dates{etime}) = $date =~ m#([^:]+):(.+)#; unless(defined $hits{all}) { print STDERR "Bad File Format\n"; exit; } format Url_Header = Domain hit came from Date Time Url hit ------------------------------------------------------------------------------- . format Url = @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @<<<<<<<<<<< @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< $machine,$date,$url . print "\n"; if(defined $opts{'v'}) { $~ = "Url_Header"; write; $~ = "Url"; for (@urls) { ($machine,$date,$url) = @$_; $date =~ s/\/\d+:/ /; $date =~ s/:\d+$//; write; } print "\n"; } format Date_Header = Start Date Start Time End Date End Time Days ------------------------------------------------------------------------------- . $~ = "Date_Header"; write; format Date = @<<<<<<<<<<<< @<<<<<<<<<<< @<<<<<<<<<<<< @<<<<<<<<<<< @>>> $dates{sdate},$dates{stime},$dates{edate},$dates{etime},$days . $~ = "Date"; write; print '-' x 79,"\n"; $hits{average} = sprintf "%.2f",$hits{html} / $days; format Hits_Header = Total Hits Graphics Hits HTML/PDF Hits Java Hits Other Hits Average/Day ------------------------------------------------------------------------------- . $~ = "Hits_Header"; write; format Hits = @>>>>>>>>> @>>>>>>>>>>>> @>>>>>>>>>>>> @>>>>>>> @>>>>>>>>> @>>>>>>>>>> $hits{all},$hits{graphics},$hits{html},$hits{java},$hits{other},$hits{average} . $~ = "Hits"; $hits{other} = $hits{all} - ($hits{graphics} + $hits{html} + $hits{java}); commafy $hits{all}, $hits{graphics}, $hits{html}, $hits{java}, $hits{other}, $hits{average}; write; print '-' x 79,"\n"; @urls = (); # get rid of current contents # sort the urls based on number of hits, decreasing order # if hit numbers are same use file name for sort order @urls = sort { $urls{$b} <=> $urls{$a} || $a cmp $b } keys %urls; format Page_Header = Page Hits Average/Day ------------------------------------------------------------------------------- . $~ = "Page_Header"; write; format Page = @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @>>>>>> @>>>>>>>>>> $urls[$i],$urls{$urls[$i]},$hits{average} . $~ = "Page"; for($i = 0,$j = 0,$hits = 0;$j < $opts{'n'} && $i <= $#urls;++$i) { if($urls[$i] =~ m#$opts{'r'}#) { $hits += $urls{$urls[$i]}; $hits{average} = sprintf "%.2f",$urls{$urls[$i]} / $days; commafy $urls{$urls[$i]}; ++$j; write; } } $hits{average} = sprintf "%.2f",$hits / $days; commafy $hits; $urls[$#urls+1] = "Top $j Hits"; $urls{$urls[$#urls]} = $hits; $i = $#urls; print '-' x 79,"\n"; write; pop @urls; $hits = 0; for (@urls) { if(m#$opts{'r'}#) { $urls{$_} =~ s/,//g; $hits += $urls{$_}; } } $urls[$i] = "Total Hits"; $hits{average} = sprintf "%.2f",$hits / $days; commafy $hits; commafy $hits{average}; $urls{$urls[$i]} = $hits; write; if(defined $opts{'m'}) { print "\n"; $urls[$i] = "Miscellaneous Hits"; $hits{average} = sprintf "%.2f",$hits{misc} / $days; commafy $hits{misc}; $urls{$urls[$i]} = $hits{misc}; write; $~ = "Url_Header"; write; $~ = "Url"; for (@murls) { ($machine,$date,$url) = @$_; $date =~ s/\/\d+:/ /; $date =~ s/:\d+$//; write; } } `stty echo`; exit;