#!/usr/bin/perl # General Purpose regular expression matcher for HTML content # Copyright (C) Neptune Web, Inc. 2006 # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # $Id: dwreplace.pl,v 1.28 2007/10/03 15:34:07 cdalsass Exp $ use Getopt::Long; my $replacementfile; my $searchonly; my ($showexample); my $verbose; my $casesensitive; my $null; Getopt::Long::GetOptions("rfile=s" => \$replacementfile, "s+" => \$searchonly, "e+" => \$showexample, "v+" => \$verbose, "ni+" => \$casesensitive, "null+" => \$null); if (!$replacementfile && !$showexample) { print <<'END_OF_MARKER'; usage: dwreplace (--s) --rfile=rfile file1 \n\n Flags: --rfile= => where file is the rfile to use (this is a mandatory argument) --s => search only, do not replace --e => show an example rfile --v => verbose mode, shows detailed matching info and highlighted text for changes --ni => case sensitive mode (not insensitive) --null => assume file list will be null separated and piped to stdin, similar to xargs e.g. find . -print0 | dwreplace --rfile=blah.rfile --null Note: you can also pass regular non-null terminated filenames to stdin. ignore the --null in this case e.g. find . -print | dwreplace --file=blah.rfile END_OF_MARKER exit(1); } elsif ($showexample) { ######################################### SAMPLE RFILE ########################## print <<'END_OF_MARKER'; # Example rfile: $OLD[0] = <<'END_OF_TEXT'; Sample Love Letter: END_OF_TEXT $NEW[0] = <<'END_OF_TEXT'; Sample Love Letter: END_OF_TEXT # Reversing the file to match a ) { $f .= $_; } close (F); eval($f); if ($@) { if ($@ =~ /did not return a true value/i) { print STDERR "Your rfile ($replacementfile) must return a true value... \nYou may need to put a '1;' at the bottom of the rfile. run with --e for an example rfile\n"; print STDERR "Error: $@\n"; } else { print STDERR "Your rfile ($replacementfile) did not compile properly\n"; print STDERR "Error: $@\n"; } exit() }; } my @infiles = @ARGV; if ($#ARGV == -1) { # make check for files coming from stdin, possibly null terminated. if ($null) { my $temp_val = $/; $/ = "\0"; } while (<>) { chop; push(@infiles,$_); } if ($#infiles == -1) { print "no files specified\n"; exit(); } if ($null) { $/ = $temp_val; } } if (!$null && $#infiles == 0 && $infiles[0] =~ /\0/ ) { # check for null terminated arguments without the --null, in case the user forgot. print "did you mean to pass --null?\n"; exit(); } if ($#NEW == -1 || $#OLD == -1) { print "nothing to do (OLD OR NEW EMPTY)\n"; exit(); } if (!($#NEW == $#OLD)) { print "NEW, OLD, AND OPTIONS arrays are of differing size\n"; exit(); } if ($#OPTIONS > $#NEW) { print "options array larger than newtext array.\n"; exit(); } if ($#OPTIONS > $#OLD) { print "options array larger than newtext array.\n"; exit(); } my $j; foreach ($j = 0; $j < $#NEW; $j++) { if (!defined($OLD[$j]) || !defined($NEW[$j])) { print "NEW[$j], OLD[$j], AND OPTIONS[$j] are empty.\n"; exit(); } } my $filename; # loop through each file provided on the command prompt foreach $filename (@infiles) { if (!open G, "<$filename") { print STDERR "Cannot open file $filename for reading\n"; next; } undef $/; my $file = ; $/ = "\n"; my $index; my $matchcount = 0; my %failedMatches; my %mperstr; # match per str my $mperstrcnt = 0; # " " " count my $hlitfile = $file; # file with parts to be changed highlighted for ($index = 0; $index <= $#NEW; $index++) { my $regexp; my $endingModifiers; if ($OPTIONS[$index] && grep(/^isregexp$/,@{$OPTIONS[$index]})) { $regexp = $OLD[$index]; } else { $regexp = removeWhiteSpaceAndEscapeRegexp($OLD[$index]); } # the main 'workhorse' regular expression my $finalRegexp; my $reversedRightHandSide; # only used if reversing, so as not to override the actual global variables. if ($OPTIONS[$index] && grep(/^isregexp$/,@{$OPTIONS[$index]}) && grep(/^interpolator$/,@{$OPTIONS[$index]})) { my $rightSideExp = $NEW[$index]; # it seems that they never want / in the right hand side if they are doing interpolation on right. $rightSideExp =~ s/([\/])/\\$1/g; if (!$searchonly) { $finalRegexp = '$file =~ ' . "s/\$regexp/" . $rightSideExp. "/"; } else { $finalRegexp = '$file =~ ' . "/\$regexp/"; } } else { if ($OPTIONS[$index] && grep(/^reversefile$/,@{$OPTIONS[$index]}) ) { # reverse the right hand side for convenience .... $reversedRightHandSide = scalar reverse $NEW[$index]; if (!$searchonly) { $finalRegexp = '$file =~ ' . "s/\$regexp/\$reversedRightHandSide/"; } else { $finalRegexp = '$file =~ ' . "/\$regexp/"; } } else { if (!$searchonly) { $finalRegexp = '$file =~ ' . "s/\$regexp/\$NEW[\$index]/"; } else { $finalRegexp = '$file =~ ' . "/\$regexp/"; } } } if (!$searchonly && $OPTIONS[$index] && grep(/^nonglobalreplace$/,@{$OPTIONS[$index]}) ) { $endingModifiers = "is"; if ($casesensitive ||grep(/^casesensitive$/,@{$OPTIONS[$index]})) { $endingModifiers = "s"; } } elsif (!$searchonly) { $endingModifiers = "isg"; if ($casesensitive || grep(/^casesensitive$/,@{$OPTIONS[$index]})) { $endingModifiers = "sg"; } } else { # search only $endingModifiers = "isg"; if ($casesensitive || grep(/^casesensitive$/,@{$OPTIONS[$index]})) { $endingModifiers = "sg"; } } # sometimes you'll need to reverse the contents of the file. if ($OPTIONS[$index] && grep(/^reversefile$/,@{$OPTIONS[$index]})) { $file = scalar reverse $file; } #print "\n\n------\n\n$regexp\n\n$finalRegexp\n\n$file\n\n-----\n\n"; # count how many times a particular match occurs my $rr = "\$file =~ " . "/$regexp/" . $endingModifiers; while (eval($rr)){ die "Error ---: $@\n Code:\n$rr\n" if ($@); $mperstrcnt++; } $mperstr{$index} = $mperstrcnt; $mperstrcnt = 0; # if you want to see what you are replacing highlighted within the file, only in verbose mode if ($verbose) { $hlitfile = highlightFile($regexp, $endingModifiers, $hlitfile); } $finalRegexp .= $endingModifiers; # use the e flag when we want to evaluate the right side of the reg exp. during replace if (!$searchonly && grep(/^eflag$/,@{$OPTIONS[$index]})) { $finalRegexp .= "e"; } if (eval($finalRegexp)) { die "Error ---: $@\n Code:\n$finalRegexp\n" if ($@); $matchcount++; } else { # count your failed matches... $failedMatches{$index} = 1; }; # sometimes you'll need to reverse the contents of the file, but don' forget to put it back!! if ($OPTIONS[$index] && grep(/^reversefile$/,@{$OPTIONS[$index]})) { $file = scalar reverse $file; } } if (!$searchonly && $matchcount > 0) { open G, ">$filename" or print STDERR "Cannot open file $filename for writing"; print G "$file"; close G; } print "$filename ($matchcount matches found)\n"; if($verbose) { # sometimes you may want more information on matching, triggers verbose mode my $ii = 1; my $value; # print out how many matches for each match type print $hlitfile . "\n"; print "Times each match is found data below: \n"; foreach $value (keys %mperstr) { # ii --> match index, hash value --> how many of that type of match was made print "\t$ii -- $mperstr{$value} times\n"; $ii++; } } } sub removeWhiteSpaceAndEscapeRegexp() { my ($ins) = @_; # remove all regexp meaningful characters. $ins =~ s/([\@\$\|\*\?\]\[\^\/\+\.\"\(\)])/\\$1/isg; # add whitespace independence, but only after weird (regular expression) chars have been removed. $ins =~ s/\s+/\\s*/isg; return $ins; } sub highlightFile() { my ($r, $endMods, $f) = @_; my $hfile = $f; require POSIX; use Term::Cap; # use the terminal feature to create highlighted text on the screen my $term = $ENV{TERM} || 'vt100'; my $terminal; my $termios = POSIX::Termios->new(); $termios->getattr; my $ospeed = $termios->getospeed; $terminal = Term::Cap->Tgetent( { TERM=>undef, OSPEED=>$ospeed } ); print "Error from eval: $@" if ($@); my ($SO, $SE) = ($terminal->Tputs('so'), $terminal->Tputs('se')); my $exp = '$hfile =~ ' . "s/($r)/\${SO}\$1\${SE}/" . $endMods; eval($exp); die "Error ---: $@\n Code:\n$exp\n" if ($@); return $hfile; }