#!/usr/bin/perl ############################################################ # # File: addates.pl # # Arguments: filespec -s # filespec file specification to process (if none, then assumes all) # -s if specified, recurse subdirectories # # Description: # Adds a date field to W3C log files that don't have them # Assumes that all log entries are for the date specified # in the header of the file. # # NOTE: This only works for log files rotated at midnight each day! # No longer true (see mod 2000Mar01) but assumes that there are # no 24hour gaps in the logfile # # WARNING: This changes the original files! Backup first! # # Created: 12.February.1999 by Jeremy Wadsack (jwadsack@wadsack-allen.com) # Copyright (C) 1999 Wadsack-Allen. All rights reserved. # Modified: 1.March.2000 by Guido Van Hoecke (Guido.VanHoecke@advalvas.be) # No copyrights: released in the public domain # # This script is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. # ########################################################################## # Date Modification Author # ------------------------------------------------------------------------ # 1999Apr09 Included Expand funciton instead of module. JW # 2000Mar01 Added NextDate function and logic. GVH # 2000May31 Allows for multiple #Field lines (but note that it ignores # multiple #Date: lines because these are likely wrong). JW # 2000May31 Optimized by removing extra loop on contents. JW ########################################################################## use Time::Local; # If no arguments, then assume all files if( @ARGV == 0 ) { $ARGV[0] = '*.*'; } # end if # If -s then recurse subdirectories if( $ARGV[0] =~ /\-s/i ) { $ARGV[1] = $ARGV[0]; $ARGV[0] = '*.*'; } # end if my @filelist = Expand_Files( $ARGV[0], $ARGV[1] =~ /\-s/i ); foreach $filename (@filelist ) { # Open file and get contents (!WARNING this uses lot of memory) print "Checking file $filename. "; open( INPUT, $filename ); @contents = ; close( INPUT ); unless( @contents ) { print "File is empty. Skipping.\n"; next; } # end unless # -- Find the date in the first pass, so we can # abort if there is none $Date = '_empty_'; foreach $contents (@contents) { if( $contents =~ /^\#Date: (.{10})/i ) { $Date = $1; print "Date $Date. "; last; } # end if # -- Jump out of loop if not a starting spec line (just a little OPT) last if ( $contents =~ /^[^\#]/ ); } # end foreach if( $Date eq '_empty_' ) { print "File has no date. Skipping.\n"; next; } # end if # -- Find and update Field specification and replace all log file lines $skipFile = 0; print "Processing... "; $previousTime = '00:00:00'; #smallest possible time foreach $contents (@contents) { if( $contents =~ /^\#Fields:(.+)$/i ) { $Spec = $1; if( $Spec =~ /date/i ) { # -- File already has a date field so skip the file $skipFile = 1; print "File has date field. {$Spec} Skipped\n"; last } else { $contents = "#Fields: date$Spec\n"; } # end if } elsif( $contents =~ /^[^\#]/ ) { $contents =~ /(\S+)\s+/; # match first word, i.e. the time $thisTime = $1; if ( $thisTime lt $previousTime ) { # date has changed $Date = NextDate( $Date ); print "Date $Date. "; } #end if $previousTime = $thisTime; $contents = "$Date $contents"; } # end if } # end foreach next if $skipFile; # Write new file to same name print "done.\n"; open( OUTPUT, ">$filename" ); print OUTPUT @contents; close( OUTPUT ); } # end foreach #------------------------------------ # Sub: Expand_Files # # Arguments: $filespec, $cascade # # Returns: @filelist # # Description: # returns a list of filenames based on the file # specification and the choice to cascade/resurse # subdirectories. #------------------------------------- sub Expand_Files { my ( $spec, $cascade ) = @_; my ( @filenames, $dir, $filespec, $file, @dirnames, $dirs ); @filenames = (""); if(( $spec =~ /.*[\*\?].*/ ) || $cascade) { # expand the set if( !( ($dir, $filespec) = ( $spec =~ /^(.*)[\\\/](.+)$/ ) ) ) { $filespec = $spec; $dir = '.'; } # end if # mung any re metacharacters to literals quotemeta( $filespec ); # mung the wildcards to re's $filespec =~ s/\*/\.\*/; $filespec =~ s/\?/\./; # cascade if enabled if( $cascade != 0 ) { #recurse the directory list opendir(DIR, $dir) || die "Error: Can't read directory $dir: $!"; @dirnames = grep { -d "$dir/$_" } readdir(DIR); closedir DIR; foreach $dirs( @dirnames ) { @filenames = (@filenames, &Expand_Files( "$dirs/$filespec" )); } # end foreach } # end if opendir(DIR, $dir) || die "Error: Can't read directory $dir: $!"; @filenames = (@filenames, grep { /$filespec/ && -f "$dir/$_" } readdir(DIR)); closedir DIR; foreach $file( @filenames ) { $file = "$dir/$file"; } # end foreach } else { # Just a single file, return it @filenames = ( $spec ) } # end if return( @filenames ); } # end Expand_Files #------------------------------ # Sub: NextDate # # Arguments: $startDate (as yyyy-mm-dd) # # Returns: $nextDate # # Description: increments the $startDate variable to obtain $nextDate # #------------------------------ sub NextDate { my ( $startDate ) = @_; my ( $year, $mon, $mday, $time, $nextDate ); $startDate =~ /(\w+)\W+(\w+)\W+(\w+)/; $year = $1 - 1900; $mon = $2 - 1; $mday = $3; $time = timelocal(0, 0, 12, $mday, $mon, $year); # Noon $time += 86400; # Tomorrow ($mday, $mon, $year) = (localtime $time)[3..5]; $nextDate = sprintf "%04d-%02d-%02d", 1900 + $year, $mon + 1, $mday; }