#!/usr/bin/perl
# licensed under GNU GPL, 2001 by The Yes Men
# Version: $Name: beta20011115c $

require "iwill.conf";   # configuration file

use Socket; # required in Perl 5 
use File::Basename; 

require 'flush.pl';

($_) = @ARGV;

# TODO: it's ignoring everything after the "?"
# which is why 
# http://www.theyesmen.org/yesiwill/example/affiliates/www.corporate-ir.net/ireye/ir_site.zhtml?ticker=DD&script=11945&item_id='financial.htm'
# $file (below) shows up as 
# /yesiwill/example/affiliates/www.corporate-ir.net/ireye/ir_site.zhtml
# How to fix this?

$savefilename = 0;

if (!$basepath) { $basepath = ".."; }
else {
	$basepath =~ s|/$||;
}

if ($dbgfile) {
	$dbgfile = "$basepath/$cachedir/$dbgfile";
	open(DBG,">>$dbgfile") || warn "can't open $dbgfile $!\n";
	seek(DBG,0,2); # don't bother locking
}
if (0) {
	($arg1, $arg2) = @ARGV;
	if ($arg2) {
		dbg("got doohickey ($arg2):\n\t\t$_"); # never happens
	}
	if ($ticker) {
		dbg("got ticker $ticker"); # never happens
	}
	$numargs = $#ARGV;
	dbg("number of arguments: $numargs"); # always 0
}
if (!$givemedomain) {
	dbg("ERROR: you must have a givemedomain!!!");
}

$weblocation = $putithere;
if ($weblocation) {
	($mydomain, $basedir) = SplitURL($weblocation);
}
else {
	dbg("ERROR: weblocation is no longer optional!! on the other hand, it will sometimes work anyhow...");
}
$file = "$_";

# if ($file =~ /corporate\-ir/) {
#	$dbgcorp = 1;
#	dbg("got $file");
# }

makemimehash(); # set up hash of mime-type values

if ($basedir) {
	# make sure $basedir starts with a slash and doesn't end with one
	$origbasedir = $basedir;
	if (!($basedir =~ /^\//)) {
		$basedir = "/$basedir";
	}
	if ($origbasedir =~ /\/$/) {
		$basedir =~ s|/$||;
	}
	# basedir doesn't apply to remote file of course
	$file =~ s/^$basedir//; 
}
else {
	$basedir = ""; 	
}
$mybaredomain = $mydomain;
$mybaredomain =~ s/^www.//i;
$givemebaredomain = $givemedomain;
$givemebaredomain =~ s/^www.//i;

$subdomstr = "/affiliates";
$getdomain = $givemedomain;
$subdomain = 0;

if ($file =~ /^$subdomstr/) {
	$file =~ s|^$subdomstr\/||;
	$suburl = $file;		
	($subdomain, $file) = SplitURL($suburl);
	$getdomain = $subdomain;
	$getdomain =~ s/$mybaredomain/$givemebaredomain/; # convert back from masking
	if ($getdomain eq $givemedomain) { # might make sense to do this later too... or... hm....
		$subdomain = 0; 
	}
}

$sfn = "$basepath/$cachedir/$getdomain$file";
$ishtml = 0;
$isjavascriptfile = 0;
$isjavascript = 0;
$isslash = 0;
$storablesfn = $sfn;
if ($sfn =~ /\/$/) {
	$isslash = 1;
	$storablesfn = "$storablesfn$slashfile";
}

$mimetype = &mimefind($sfn);
if( $mimetype eq 'text/html') { $ishtml = 1 };


if (!$ishtml) {
	if ($sfn =~ /\.htm/ || $sfn =~ /\.asp/ || $sfn =~ /\.txt/ || $sfn =~ /\.php/ || $sfn =~ /\.dll/ || $isslash || $sfn =~ /\.twhat/) {
		dbg("mimetype was not text/html, but the ending of file is possibly html?\n\t$sfn\n----");
		$ishtml = 1;
	}
}
if ($sfn =~ /\.js/) {
	# We want to parse this, but we want to be aware that it's Javascript.
	$isjavascriptfile = 1;
	if (!$ishtml) {
		dbgv("i guess that .js files aren't sent as text/html");
	}
	$ishtml = 1;
}
if ($dbgfile && $dbgfilesizelimit > 0) {
	$dbgfilesize = -s $dbgfile;
	if ($dbgfilesize > $dbgfilesizelimit) {
		dbgv( " - backing up $dbgfile, it is bigger than $dbgfilesizelimit" );
		close(DBG);
		system("mv $dbgfile $dbgfile.bak");
		open(DBG,">>$dbgfile") || print "can't open $dbgfile $!\n";
	}
}

$storedsfn = 0;

# okay basically, we want to use the cached file if:
# 1. the remote file didnt change.
# 2. the local file is really young.
	
unless( -e $storeablesfn) {
	if ($storehtmlalways && $ishtml && (-f $storablesfn)) {
	 	# this is misleading. we may actually not rewrite, 
		# but that's decided later. - nick
	}
	# we're just making the directories here if they don't exist already.
	@sfndirparts = split(/\//, $storablesfn); 
	$pathsofar = 0;
	while (defined($sfndirbit = shift(@sfndirparts))) {
	   if ($pathsofar) {
		   if ($pathsofar ne "") {
			   unless (-d $pathsofar) {
				   mkdir $pathsofar,0777; 
				   chmod 0777, $pathsofar; 

			   }
		   }
	   }
	   else {
		   $pathsofar = "";
	   }
	   $pathsofar = "$pathsofar$sfndirbit/";
	}  # end of making directories
}
 	
$savefilename = $storablesfn;

# $trylocation = 0;


# cue inserts eval code...
unless ($killtime) {
	$killtime = 40;
}	

$SIG{ALRM} = sub { die "timeout" } ; 	
# in practical terms, this line tells the script to go to the "eval error foo"
# if the alarm time expires. just leave it above the eval.

eval {  # functional part of the code is in an eval for the alarm call.
	alarm($killtime) ; # this line actually sets the alarm for the eval. die if not done in this many seconds.
	
	$startoutline = 0;
	$_ = $getdomain;
	/^([^:]*):*([^ ]*)/;
	$getdomain = $1;
	$port = $2;
	$port = 80 unless $port;

	#if cached file very young, don't even make request
	if((-s $savefilename) && ((-M $savefilename)*24*60 < $cachetime) ) { 
		dbgv("using cached version...");
		open(IN, $savefilename) || die "can't read $savefilename: $!";
		@lines = <IN>;
		close IN;
		$cached = 1;   # so we don't try parsing the header, etc.
	} 
	else {
		dbgv("requesting $file from $getdomain......");
		@lines = SocketRequest($getdomain, $port, $file);
	}


	$linecount = 0;
	$line = "asdfasdf";
	$startedhtml = 0;
	$contenttypeline = 0;


	if (!$cached) {     
	dbgv("parsing response header...");    
	while($line = shift @lines) {
		  last if ($line =~ /^\n|^\r/);
		  if($line =~ /^HTTP.*304/) {
			dbgv("304 - using cached version...");
			$filesize = -s $savefilename;
			dbgv("$savefilename is $filesize bytes");
			open(IN, $savefilename) || die "can't read $savefilename: $!";
			@lines = <IN>;
			close IN;
			$cached = 1;   # so we don't try parsing the header, etc.
			last;
		  }	
		  if($line =~ /^HTTP.*404/) {
			  dbg("*****404 on $getdomain/$file *****");
			  print "Content-type: $mimetype\n\n";
			  print "404 not found";  # not sure what else to say.
			  cleanup();
			  exit;
		  }
		  if ($line =~ /^Location: /i) {
			  $line =~ s/^Location: (.*)/$1/;
			  ($redirdomain, $redirfile) = SplitURL($line);
				  $redirdomain =~ s/$givemebaredomain/$mybaredomain/; # just to mask it a bit, visually
				  $newloc = "$basedir$subdomstr/$redirdomain$redirfile"; 
				  dbgv("will redirect to $newloc");
				  print "Location: $newloc\n\n";
				  if ($savefilename) {			  
	  				  open(OUT,">$savefilename") || dbg( "ERROR: Cannot create $savefilename. $!" ); 
					  chmod 0666, $savefilename;
					  flock(OUT,2);
					  print OUT "<html>\n<head>\n";
					  print OUT "<meta http-equiv='refresh' content='0; URL=$newloc'>\n";
					  print OUT "</head>\n<body>\n</body></html>";
					  cleanup();
					  exit(0);
				  }
				  else {
					  dbg("ERROR!!! getting thing from S ($sfn) but there's no savefilename");
				  }
			  }
			  if ($line =~ /^Content-Type/i) {
				  $contenttypeline = "$line\n";
				  $startoutline = $contenttypeline;
			  }
		    }
	  }

	if (!$startoutline) {
		if($cached) {
			$mimetype = mimefind($savefilename);
		} else {	
			$mimetype = mimefind($file);
		}
		$startoutline = "Content-type: $mimetype\n\n";
	}

	print $startoutline;
	@tagsublines = ( );
	@sublines = ( );
	@sublinearrays = ( );

	open(OUT,">$savefilename") || dbg( "ERROR: Cannot create $savefilename. $!" ); 
	chmod 0666, $savefilename;
	flock(OUT,2);

	# if not cached then cache it, unless it's html, which needs to get parsed.
	if(!$cached && !$ishtml) {
		print OUT @lines;
	}

	# if it is cached, or it's not html, display as-is.
	if ($cached || !$ishtml) {
		dbgv("printing non-parsed file: $savefilename.");
		print @lines;
		flush(STDOUT);
		dbgv("---\n");
		cleanup();
		exit;
	} else {  # or do parsing for subsitutions...
		  open(SUBLIST, "$basepath/$sublist") || dbg("ERR: couldn't open $sublist");
		  @sublinesraw = <SUBLIST>;
		  foreach $tline(@sublinesraw) {
			($subf, $subt, $subp) = ParseSubLine($tline);
			if ($subf) {
				$sublineproc = "$subf\t$subt\t$subp";
				if ($subp =~ /a/) {
					push(@tagsublines, $sublineproc);
				}
				# could have an array for NOT for tags...
				# push(@sublinearrays, [$subf, $subt, $subp]); # no good
				push(@sublines, $sublineproc); # do this regardless
			}

		  }
		@checkdoms = split(",", "$givemealsogreedy");
		@checkdomsspecific =       split(",", "$givemealsospecific"); 
		foreach $checkdomspecific(@checkdomsspecific) {
			$specifictopush = "s:$checkdomspecific";
			push(@checkdoms, $specifictopush);
		}

		  $withintag = 0;
		  while ($line = shift @lines) {
			# convert unwieldy pc/mac newlines to unix...
			$line =~ s/\015\012?/\n/g;

			# $line = "$line\n"; # restore the \n

			$origline = $line;
			if ($line =~ /^\s*$/) { # if all whitespace, don't do anything 
			}
			else { # process the line
				if ($withintag && $line=~ />/) {
					$withintag = 0;
				}

				if ($line =~ /<script\s*language\s*=\s*[\"\']?JavaScript/i) {
					$isjavascript = 1;
				}
				if ($isjavascript && $line =~ /<\/script>/i) {
					$isjavascript = 0;
				}
				if ($line =~ /^(\s*[<][^<]*>\s*)+$/) { 
					# make a note of only-tab-start-tab-end lines
					# could also check for withintag and then not worry about begin or end...
					$onlytags;
				}
				$linecount++;
				$limit = 0;
				$num = 0;
				# this is to bunch lines together for better parsing; doesn't really work in this simple way
				while (($num < $limit) && ($line2 = <S>)) {
					$num = $num + 1;
					$line2 =~ s/^\s*//;
					$line2 =~ s/\s*$//;
					$line = "$line $line2";
				}

				# Begin user-specified substitutions
				@arraytodo = @sublines;
				# if ($withintag || $onlytags) {
				#	@arraytodo = @tagsublines; # much smaller
				#		# TODO: should make the reverse instead: or just not worry about this at all.
				# }
				foreach $sublineprocessed(@arraytodo) { 
					($subfrom, $subto, $subparams) = split(/\t/, $sublineprocessed); 
					if ($subparams =~ /t/) { 
						if ($line =~ /$subfrom/) {
							$line = $subto;
						}
					}
					else {
						$percnum = 0;	
						if ($subparams =~ /.*p(\d+).*/) {
							$percnum = $subparams;
							$percnum =~ s/.*p(\d+).*/$1/;
						}
						$goahead = 1;
						# if subfrom starts with a tab, group it with the previous one
						if ($startswithtab) {
							$goahead = $wentahead; 
						}
						if (($percnum > 0) && ($goahead > 0)) {
							$randnum = rand(100);
							if ($randnum > $percnum) {
								$goahead = 0;
							}
						}
						if ($goahead) {
							$ignorecase = 1;
							if ($subparams =~ /c/) {
								$ignorecase = 0;
							}
							$dontworryabouttags = 0;
							$nosmartsubstitutions = 0;
							$smartsubsforsure = 0;
							if ($subparams =~ /h/) {
								$dontworryabouttags = 1;
							}
							if ($subparams =~ /d/) {
								$nosmartsubstitutions = 1;
							}
							if ($subparams =~ /s/) {
								$smartsubsforsure = 1;
							}
							$dosmartsubs = 0;
							if ($smartsubstitutions && !$nosmartsubstitutions && !$dontworryabouttags) {
								# WISHLIST: the above $dontworryabouttags should go when that's done right...
								$dosmartsubs = 1;
							}
							if ($smartsubforsure) {
								$dosmartsubs = 1;
							}
							if ($dosmartsubs) {
								# do a preliminary check for the wordroot -- if is in there, then 
								# go ahead and check for the rest
								if ($dontworryabouttags) { dbg("ERROR--got into dosmartsubs with dontworry");}
								($subfromroot, $subfromsingular, $subfromplural, $subfrompast) = 
									GetForms($subfrom);
								if ($line =~ /$subfromroot/i) {
									($subtoroot, $subtosingular, $subtoplural, $subtopast) = 
										GetForms($subto);
									$line = substitute($line, $subfromplural, $subtoplural, 
										$ignorecase, $dontworryabouttags);
									$line = substitute($line, $subfrompast, $subtopast, 
										$ignorecase, $dontworryabouttags);
									$line = substitute($line, $subfromsingular, $subtosingular, 
										$ignorecase, $dontworryabouttags);
								}
							}
							else {
								$line = substitute($line, $subfrom, $subto, 
										$ignorecase, $dontworryabouttags);
							}
						}
						$wentahead = $goahead;
					}
				}
				# End of user-specified substitutions

				# Begin automatic always-substitutions
				if (1) { # $basedir) { # should do this always 
					$adddir = $basedir; # $basedir can be ""--that shows up as false in perl
					if ($subdomain) {
						$subdomainalt = $subdomain;
						$subdomainalt =~ s/$givemebaredomain/$mybaredomain/; # just to mask it a bit, visually
						$adddir = "$basedir$subdomstr/$subdomainalt";
					}
					if (1) { #!$isjavascriptfile && !$isjavascript) {  # nothing wrong with the tests,
						 # but it's better not to risk it for now
						@checklist = ( );
						push(@checklist, "SRC");
						push(@checklist, "HREF");
						push(@checklist, "BACKGROUND");
						push(@checklist, "ACTION");

						foreach $checkitem(@checklist) {
							$line =~ s|($checkitem\s*=\s*)(\\?[\"\']?)/|\1\2$adddir/|ig;
						}
						# TODO: should put wildcard in below and check for other possibilities
						$line =~ s|openWindow\('/|openWindow('$adddir/|ig;
						# a very brave version of above would be to match on '/ and "/ -- it would
						# be too brave, really, because it would match on javascript stuff like 
						# x[i]+'/'+x[j]
					}
					$dojavascripthacksalways = 1; # unfortunately, javascript occurs within href tags too....
						# could if desired limit to $withintag rather than 1.
					if ($dojavascripthacksalways || $isjavascriptfile || $isjavascript) {
						# Note: when debugging .js-file parsing, be sure to empty your browser's
						# cache as well as delete the stolen/ cache when debugging. It isn't always
						# necessary, but sometimes it is.
						# Because of the total variability of javascript, there's just about no way
						# to insure that things will always work right. Tip: set up your site as a
						# "root" domain, that will avoid at least some hairy errors.
						# You can also hack javascript using substitutions.txt. For example, if you 
						# wanted to replace "homePage == 'yes'" with "false", you could do it either
						# here or with a line in the substitutions.txt file.
						# JAVASCRIPT HACKS SHOULD GO HERE

						# TODO: on shell.com, need to deal with this format--3 can be any number i think, or 
						#     at least 2
						# ReloadPage('3/=/home/html/iwgen...

						# an especially toxic example--
						# this one tries to assure it's never seen as homepage in parsing tests--
						# in case is in subdirectory
						$line =~ s/homePage\s*==\s*[\"\']yes[\"\']/false/i;
	
						# a common variable
						$line =~ s|IMG=\"\/|IMG=\"$adddir/|ig;
						
						# a braver version of above
						# $line =~ s|(=\s*[\"\'])\/|\1$adddir/|ig;

						# an electrifying example
						$line =~ s|href=\"\'\+sublayer|href=\"$adddir\'\+sublayer|ig;
						$line =~ s|href=\"\'\+lydata|href=\"$adddir\'\+lydata|ig;

						# below is electrifyingly brave, but should work for the most part
						$line =~ s|\'\/images/|\'$adddir\/images/|ig;
						# here are less brave efforts that didn't pan out
						# $line =~ s|\,\\\'\/images\/|\'\,\\\'$adddir/images\/|ig;
						# $line =~ s|([\,\(])\\\'\/images\/|\1\\\'$adddir/images\/|ig;

						# below causes more problems than it fixes
						# if ($line =~ /preload/i || $line =~ /href/i || $line =~ /mouse/i || $line =~ /src/i) {
						# 	$line =~ s|([\"\'])/|\1$adddir/|ig;
						# 	$line =~ s|$adddir$adddir|$adddir|ig; # major, major hack
						# }
						# END JAVASCRIPT HACKS
					}
					# check other domains to get similarly to above
					if ($givemealsospecific || $givemealsogreedy) {
						foreach $checkdom(@checkdoms) {
							$domainisspecific = 0;
							if ($checkdom =~ /^s:/) {
								$domainisspecific = 1;
								$checkdom =~ s/^s://;
							}
							$checkdom =~ s/^\s*(.*)\s*$/\1/;
							$replacedom = $checkdom;
							$replacedom =~ s/$givemebaredomain/$mybaredomain/; # just to mask it a bit, visually
							# $replacestr = "$basedir$subdomstr/$replacedom";
							# foreach $checkitem(@checklist) { # cuemonkey commented out and fixed http replace
								# if this breaks it somehow, uncomment above (and loop finish) 
								# for each two left-jutting lines, replace with uncommented
							if (!$domainisspecific) {
								if ($line =~ /$checkdom/) { # just for debug...
									if (0 && $checkdom eq "cnn.net") {
										dbg("with $checkdom in it, \n$line");
									}
									$line =~ 
		# s|($checkitem\s*=\s*)(\\?[\"\']?)http:\/\/(.*\.)$checkdom|\1\2$basedir$subdomstr/\3$replacedom|ig;
		# s|http[s]?:\/\/([^\.]*\.)$checkdom|$basedir$subdomstr/\1$replacedom|ig;
		# TODO: need to make it deal with https requests too, if use above
		s|http:\/\/([^\.]*\.)$checkdom|$basedir$subdomstr/\1$replacedom|ig;
									if (0 && $checkdom eq "cnn.net") {
										dbg("after, \n$line");
									}
								}
							}
							else {
								$line =~ 
		# s|($checkitem\s*=\s*)(\\?[\"\']?)http:\/\/$checkdom|\1\2$basedir$subdomstr/$replacedom|ig;
		# s|http[s]?:\/\/$checkdom|$basedir$subdomstr/$replacedom|ig;
		# TODO: need to make it deal with https requests too, if use above
		s|http:\/\/$checkdom|$basedir$subdomstr/$replacedom|ig;
							}
							# }
						}
					}
				}
				if ($stealemails) {
					if (!$mydomain) {
						dbg("ERROR: request to stealemails but mydomain not defined");
					}
					else {
						$emaildomain = $givemedomain; # maybe leave that...
						$emaildomain =~ s/^www\.//;
						$myemaildomain = $mydomain;
						$myemaildomain =~ s/^www\.//;
						$line =~ s/\@$emaildomain/\@$myemaildomain/g;
					}
				}
				if ($rewritefullurls) {
					if (!$mydomain) {
						dbg("ERROR: request to rewritefullurls but mydomain not defined");
					}
					else {
						$line =~ s/$givemedomain/$mydomain$basedir/g;
					}
				}
				# End of automatic always-substitutions

				if (!$withintag && $origline =~ /.*<[^>]*$/) {
					$withintag = 1;
				}
			} # end of not-all-whitespace line check
			if ($savefilename) {
				print OUT $line;
			}
			print $line;
			flush(STDOUT);
		  }

	  } 
	cleanup();
	
	alarm(0);	# this line just clears the alarm. it should stay right above the end of the eval.
			# so if you decide to move the end of the eval (i.e. the timed portion) then be sure
			# to move the alarm(0); line with it.
					
}; # end eval

#### eval error foo - i.e. what happens if the alarm kills it...
#### leave it right after the end of the eval.

if ($@) {
  if ($@ =~ /timeout/)  {
	# here's where it goes if the alarm expires.
    
	dbg("Timed out after trying for $killtime seconds - cleaning up...");
	
	print "@lines\n";	# causes remaining available text to be printed unparsed, rather than leave 
				# a partially blank page.
				# if this creates too much munged html, get rid of this line. 
	cleanup();
	
	if (-f $savefilename) {	 # we don't want caching of this failed page.
		unlink $savefilename;
	}
	
	alarm 0;			# clear the alarm
 
  }
else {
    # propagate unexpected exception - in other words, it crashed.
    
    dbg ("Uh oh, trouble:  $@"); 
    alarm 0 ;        # clear the stillpending alarm
    die;

  }
} # end if $@
#### end eval error foo


####################### subroutines #############################

sub cleanup {
  if (S) {
	# dbgv("closing S");
	close(S);
  }
  if ($savefilename) {
	dbgv("closing and chmoding $savefilename");
    close(OUT);
    chmod 0666, $savefilename;   
	# sometimes a blank file gets written. destroy them.
	dbgv("unlinking $savefilename.");
	if(-s $savefilename == 0) { unlink $savefilename };
  }
  if ($storedsfn) {
	dbgv("closing $storedsfn");
    close STORED;
  }
  if ($dbgfile) {
    dbgv("closing $dbgfile");
    close DBG;
  }
}

sub dbgv {
	local($msg) = @_;
	print DBG scalar(localtime), "[$$]: $msg\n" if ($dbgfile && $dbgverbose);
	flush(DBG);
}

sub dbg {
	local($msg) = @_;
	print DBG scalar(localtime), "[$$]: $msg\n" if $dbgfile;
	flush(DBG);
}

sub getaddress {
  local($host) = @_;
  local(@ary);
  @ary = gethostbyname($host);
  return(unpack("C4",$ary[4]));
}

sub mysub {
	($line, $subfrommysub, $subtomysub, $subparamsmysub) = @_;
	if ($subparamsmysub =~ /i/) {
		$line =~ s|$subfrommysub|$subtomysub|ig;
	}
	else {
		$line =~ s|$subfrommysub|$subtomysub|g;
	}
	return $line;
}

sub SplitURL {
	local($fullurl) = @_;
	$fullurl =~ s/\n+//;
	$fullurl =~ s/\r+//;
	$fullurl=~ s|[^:]+://||;		#lose the beginning http:// type crap
	local($thehost) = $fullurl;
	local($thefile) = $fullurl;
	$thehost =~ s|(^[^\/]*).*|$1|;		#lose everything after the domain slash
	$thefile =~  s|^[^\/]*(.*)|$1|;		#lose everything before the domain slash
	if (0) { # old clumsy way of doing it
		@dparts = split(/\//, $fullurl); 
		# $thehost is what is before the first /
		$thehost = shift(@dparts);
		$thehost =~ s|^http://||i;
	 
		$thefile = "/";
		# $basedir is what is after the first /
		while (defined($bit = shift(@dparts))) {
			$thefile = "$thefile$bit";
		}
	}
	return ($thehost, $thefile);
}
$didntparse = [0, 0, 0]; 
sub ParseSubLine {
	# dbgv("entered ParseSubLine");
	$break = "\t";
	local($subline) = @_;
	local($subfrom);
	local($subto);
	local($subparams);
	chomp($subline);
	if ($subline =~ /^#/) { return $didntparse; }
	if ($subline =~ /^$/) { return $didntparse; }
	if (!($subline=~ /$break/)) { return $didntparse; }
	$subline =~ s/\t\t/\t/g;
	$subline =~ s/\t\t/\t/g;
	$subline =~ s/\t\t/\t/g;
	$subline =~ s/\t\t/\t/g;
	$subline =~ s/\t\t/\t/g;
	$subline =~ s/\n//g;
	$subline =~ s/\r//g;
	$startswithtab = 0;
	if ($subline =~ /^\t/) {
		$startswithtab = 1;
		$subline =~ s/\t//; # should just get rid of first one
	}
	@sublineparts = split(/\t/, $subline); 
	if (!defined($subfrom = shift(@sublineparts))) {
		dbg( "ERROR: no subfrom in line: $subline" ); return $didntparse;
	}
	if ($subfrom =~ /^$/) {
		dbg( "ERROR: blank subfrom in $subline" ); return $didntparse;
	}
	if (!defined($subto = shift(@sublineparts))) {
		dbg( "ERROR: no subto in line: $subline" ); return $didntparse;
	}
	if ($subto =~ /^$/) {
		dbg( "ERROR: blank subto in $subline" ); return $didntparse;
	}
	if (!defined($subparams = shift(@sublineparts))) {
		$subparams = "";
	}
	if (defined($moreshit = shift(@sublineparts))) {
		dbg( "ERROR: extra shit $moreshit at end of $subline" ); return $didntparse;
	}
	
	return ($subfrom, $subto, $subparams);
}

sub substitute {
	local($line, $subfrom, $subto, $ignorecase, $dontworryabouttags) = @_;
	if ((!($line =~ />/) && !($line =~ /</)) || $dontworryabouttags) {
		# use \b before and after--word boundary
		$b = "[\b\(\)]";
		if ($dontworryabouttags) { 
			# should probably be for specifically inside tags.... and should probably
			# use smartcaps too...
			if ($ignorecase) {
				$line =~ s/($b)$subfrom($b)/\1$subto\2/ig; # don't use \b -- will miss (xx)
			}
			else {
				$line =~ s/($b)$subfrom($b)/\1$subto\2/g;
			}
		}
		else {
			if ($ignorecase) {
				$line =~ s/\b($subfrom)\b/&smartcaps($1,$subto,"","")/eig;
			}
			else {
				$line =~ s/\b$subfrom\b/$subto/g;
			}
		}
	}
	else {
		if ($ignorecase) {
			# $line =~ s/([><]*.*>[^<]*$b)($subfrom)($b)/&smartcaps($2,$subto,$1,$3)/eig;
			# $line =~ s/($b)($subfrom)($b[^>]*<.*)/&smartcaps($2,$subto,$1,$3)/eig;
			$line =~ s/([><]*.*>[^<]*)\b($subfrom)\b/&smartcaps($2,$subto,$1,"")/eig;
			$line =~ s/\b($subfrom)\b([^>]*<.*)/&smartcaps($1,$subto,"",$2)/eig;
		}
		else {
			$line =~ s/([><]*.*>[^<]*)\b$subfrom\b/\1$subto/g;
			$line =~ s/\b$subfrom\b([^>]*<.*)/$subto\1/g;
		}
	}
	return $line;
}


sub smartcaps {
        ($old,$new,$preceding,$trailing)  = @_;
	local($ret);
	
        if($old =~ /^[A-Z]+$/) { 
		$ret = uc($new); 
	}
        elsif($old =~ /^[A-Z][a-z]+.*$/) { 
		$ret = ucfirst($new); 
	}
	else { 
		$ret = lc($new); 
	}
	$ret = "$preceding$ret$trailing";
        return $ret;
}
sub GetForms {
	local($raw) = @_;
	# assume singular
	local($singular) = $raw;
	local($root);
	local($plural);
	local($past);
	local($y_end) = 0;
	local($s_end) = 0;
	$root = $raw;
	if ($raw =~ /y$/) {
		if (!($raw =~ /[aeou]y$/)) {
			$y_end = 1;
			$root =~ s/y$//;
		}
	}
	elsif ($raw=~ /s$/) {
		$s_end = 1;
	}
	if ($y_end) {
		$plural = $root . "ies";
	}
	elsif ($s_end) {
		$plural = $root . "es";
	}
	else {
		$plural = $root . "s";
	}
	if ($y_end) {
		$past = $root . "ied";
	}
	else {
		$past = $root . "ed";
	}
	return ($root, $singular, $plural, $past);
}

sub SocketConnect {
	($getdomain, $port) = @_;
	# dbgv( "will use socket to GET $file from $getdomain" );
	($sockaddr,$there,$response,$tries) = ("Snc4x8");
	$there = pack($sockaddr,2,$port, &getaddress($getdomain));
	($a, $b, $c, $d) = unpack('C4', $hostaddr); # OOPS--hostaddr is never anything!

	$proto = (getprotobyname ('tcp'))[2];

	if (!socket(S,AF_INET,SOCK_STREAM,$proto)) { dbg( "oops dead socket 1" ); 
		die "$0:  Fatal Error.  $!\n"; }
	if (!connect(S,$there)) { dbg( "oops dead socket 2" );
		die "$0:  Fatal Error.  $!\n"; }
	select(S);$|=1;
	select(STDOUT);
	return 1;
}

sub statfiletime { 
	# sub to get the time of an existing file and return 
	# it in GMT in proper HTTP header order
	local ($whatfile) = 	@_;
	local $mtime = (stat $whatfile) [9];
	local $gmval = gmtime($mtime);
	local @datevals = split ('\s', $gmval);  #gmtime is different in scalar and array context
	local $httptime = join (" ", "$datevals[0]\,", "$datevals[2]", "$datevals[1]", "$datevals[4]", "$datevals[3]", "GMT");
	return $httptime;
}		 

# this sends the actual HTTP request.
sub SocketRequest {
	my($domain,$port,$file) = @_;
	
	SocketConnect($domain,$port) || die "no connection!";

	print S "GET $file HTTP/1.0\r\n"; # the HTTP/1.0  necessary for virtual hosts
	# $sfn is the local file
	
 	if (-f $sfn)	{ # if a local cached file exists, then check its time, and send the If-Modified-Since header
 		local $compare = statfiletime($sfn);
		print S "If-Modified-Since: $compare\r\n";  # seems to be a touchy header... keep it up here, don't move it.
	}

	print S "User-Agent: Mozilla 4.2\r\n"; 
	print S "Referer: http://www.whitehouse.gov/\r\n"; 
	print S "Host: $getdomain\r\n";
	# print S "Connection: Keep-Alive\r\n";  # don't use this!

	print S "\r\n";
	my @response = <S>;
	close S;
	return @response;
}

sub makemimehash () {
	# sub to create a global hash "%mime" which contains the extension/mime-type pairs from the mime.types file.
	local $mimefile = "$basepath/mime.types\n";
	$mimefilefound = 0;
	if (!open(MF, "$mimefile")) {
		local $miss = "$mimefile file is missing!";
		warn "$miss\n";
		dbg($miss);
	}
	else {
		$mimefilefound = 1;

		while ($line = <MF>) {
			chomp $line;
			if (($line =~ /^#/) || ($line =~ /^\s+$/)) { 	# ignore comments and blank lines
				next;
			}	 
			local @fields = split ("\t+", $line);
			local @subfields = split (" ", $fields[1]); # this line handles multiple extensions for a mime-type
			local $member;
			foreach $member (@subfields) {
				$mime{$member} = $fields[0]; # keys are exts.. values are mime-types	
				# note: there can be multiple keys (exts) for each value (mime-type).
			}	
		}
		close MF;
	}
}		


sub mimefind () {
	# sub to look up the mime-types in the %mime hash
	my ($filename) = @_;
	if ($mimefilefound > 0) {
		my $ext = (fileparse($filename,'\.[^.]*'))[2];
		$ext =~ s/^\.//;
		my $mimetype = $mime{$ext};
		if (!$mimetype) {
			$mimetype = "text/html"; # this actually works for everything...
			# dbg("no mimetype for $filename so defaulting to $mimetype");
		}
		return $mimetype;
	}
	return 0;
}	
	
	
