#!/usr/local/bin/perl # DESCRIPTION: # The popular log analysis tool Analog includes a link to the HTML # validation tool at http://www.webtechs.com/html-val-svc/ in its output # files. The domain webtechs.com has apparently been taken over (legally # or not) by a porn site. Until that is resolved, there are a lot of pages # on the web that are unintentionally linking to a porn site. If you make # your stat pages available to the public, this obviously could cause you # much embarrassment. # # This script simply opens the HTML files you specify and removes the link # to WebTechs. It can process all files in a given directory and optionally # any sub-directories (see the OPTIONS section below). # # USAGE: # nowebtechs.pl item1 [item2] [item3] [etc] # # List the filename(s) and/or directory name(s) on the command line that # you want the script to process. Separate multiple files or directories # with a space. The wild-card character for matching multiple files is NOT # supported. You must list each file separately or list a directory name # to process every file in that directory. # # This script must be run from the shell...not via the web as a CGI script! # If you are not able to run it, make sure the path to perl on the first # line is correct (type "which perl" at the command line). Also, make sure # it is executable (type "chmod 700 nowebtechs.pl" at the command line). # # NOTE: This script can be customized for your needs. See the OPTIONS # section below. # # DISCLAIMER: # There are no guarantees or warranties with this script. It works # great for me, but your mileage may vary. USE IT AT YOUR OWN RISK. # # AUTHOR: # Sam Choukri # sam@bagism.com # # Version: 1.0 # The latest version of this script can be found at: # http://www.bagism.com/freeware/nowebtechs.txt # # Copyright (c) 1999 Sam Choukri # OPTIONS: $keep_backups = 1; # For safety, this script makes backups of your HTML files before # processing them. If you want to keep the backups, enter "1" for # this value. If you want the backup files to be automatically deleted # after each file has been successfully processed, enter "0" (zero). $process_sub_dirs = 1; # If you want the script to process the files in any sub-directories # within the starting directory you specify, enter "1" for this value. # If you want it to process only the starting directory, enter "0" (zero). $display_messages = 1; # If you want the script to display messages on the screen telling # you what files it is working on, whether or not the WebTechs link was # found, and any errors which occur, enter "1" for this value. If you # don't want to display any messages (helpful if you are running this # script as a cron job), enter "0" (zero). @permitted_file_extensions = qw(html htm shtml); # This is the list of file extensions that the script is allowed to # process. The case of the extension does NOT matter. # Do NOT add the "." to the extension...that will be added implicitly. ############################################################################ use File::Copy; $ext = join ('|', @permitted_file_extensions); foreach (@ARGV) { unless (-e $_) { &Message("**Skipped $_: non-existant file or directory\n"); } &Shunt($_); } sub Shunt { local($item) = @_; next if (-l $item); # skip symbolic links if (-d $item) { #VERY IMPORTANT: prevents re-reading current and prior directories next if $item =~ /^\./; &OpenDir($item); } elsif (m/\.($ext)$/oi) { &OpenFile($item); } } sub OpenDir { local($dir) = @_; local(@contents) = (); &Message("Opening directory $dir...\n"); opendir (DIR, "$dir") or &Message("**Can't open the directory $dir: $!\n"); @contents = grep(!/^\./, readdir (DIR)); closedir DIR; $dir =~ s|/*$||; foreach (@contents) { s|^(.*)$|$dir/$1|; if (-d $_ && !$process_sub_dirs) { &Message("--Skipping sub-directory $dir\n"); next; } &Shunt($_); } } sub OpenFile { local($file) = @_; local($text) = ''; &Message("--Processing FILE $file...\n"); if ( copy($file,"$file.bak") ) { $found_link = 0; open (INFILE, "$file.bak") or &Message("**Can't read file $file.bak: $!\n"); open (OUTFILE, ">$file") or &Message("**Can't write to file $file: $!\n"); while () { # If we find the beginning of the webtechs link, let's # skip the rest of the lines and just print the closing # body and html tags instead. if (m|^

|i) { print OUTFILE "\n"; print OUTFILE "\n\n"; &Message("----Removed Webtechs link in $file\n"); $found_link = 1; last; } else { print OUTFILE; } } $found_link or &Message("----WebTechs link not found in $file\n"); close (INFILE) or &Message("**Can't close file $file.bak: $!\n"); close (OUTFILE) or &Message("Can't close file $file: $!\n"); unless ($keep_backups) { unlink "$file.bak" or &Message("**Can't delete backup file $file.bak: $!\n"); } } else { &Message("**Can't create a backup file for $file (skipped): $!\n"); } } sub Message { local($message) = @_; print $message if $display_messages; }