#!/usr/bin/perl 
# -------------------------------------------------------------------
# Program: swish-cgi.pl
# Author : John Millard (millarj@muohio.edu)
#
# -------- User defined configuration variables -----------
# Optional parameters to pass to the SWISH searcher
   $params = " ";                            

# The Full name of your organization -- Printed with Search Results
#   $organization = "Fornits Workshop";

# The full name of your department -- Printed with search Results
#   $department = "Swish-e Hack Shop";

# Path to your http root for proper URL translation
         $httpRoot = '/home/straights/thestraights';

# Absolute path and command to execute the SWISH searcher
         $swish = '/home/curiosity/swishfiles/swish-e';     

# URL of where you put this cgi
         $swishcgi = $ENV{'SCRIPT_NAME'};#

# paTH TO THE swISH iNDEX fILE
    $index_path ="/home/straights/swishfiles/straight-swish.idx";

$recipient = "support\@fornits.com";
# ------ End of Configuration Variables ------------
use CGI qw(:all);
#sub read_form
# Read in form data if it exists
read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
     # Split the Name value pairs
@pairs = split(/&/, $buffer);
foreach $pair (@pairs)
{
    ($name, $value) = split(/=/, $pair);
    # Un-Webify plus signs and %-encoding
    $value =~ tr/+/ /;
    $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
    push(@search_tags, $value), next if ($name eq 'search_tags');
    $FORM{$name} = $value
}

if ($FORM{'query'}){
	$query = $FORM{'query'};   
	$results =$FORM{'results'};
	$month_limit= $FORM{'months'};
}elsif (param('query')){
	$query = param('query'); #$FORM{'query'};   
	$results=0 unless $results = param('results'); #$FORM{'results'};
	$month_limit=0 unless $month_limit=param('months');# $FORM{'months'};
}else{
	&html_header($head1,"No Query Was specified");
	&print_form;
	&html_trailer;
	exit;
}


if (@search_tags) 
   {
     $tags = join("",@search_tags);
     $search_tags = "\-t $tags"; 
   } else
     {
       $search_tags = "";
   }


if ($query){
	&html_header($head1,"Your Search Results");
	&print_form;
  &search_parse;

	&html_trailer;
}else{
 &html_header($head1," ");
	&print_form;
  &html_trailer;
}

 

sub print_form {
	my $count=0;
	@dir=<$BasePath/articles/*>;
 # To change the form that get's generated on the fly, edit the HTML below.
 print <<EOF;
<!-- CENTER><H2>Search Form</H2></CENTER -->
<table border = 0 cellspacing=3 cellpadding=10>
<tr><td>
<!-- FORM ACTION="$swishcgi" method=get-->
<FORM method=get>
Enter word(s). You can connect terms with <b>and</b> or <b>or</b><br>
   <INPUT NAME="query"  SIZE=50 value=$query><p>

<B>Maximum # of Items</b>
<SELECT name=results>
<OPTION value=0> No Limit
<OPTION value=10> 10
<OPTION value=20> 20
<OPTION value=30> 30
<OPTION value=40> 40
<OPTION value=50> 50
<OPTION value=100> 100
  </SELECT><br>
<P>
</td><td valign = top></td>
</tr>
<tr><td colspan=2>
Search In the following Tags: Leave Blank to search everything<br>
<table border =0 cellspacing=0 cellpadding=0>  
<td><input type=checkbox name=search_tags value="t">Title Tags</td>
<td><input type=checkbox name=search_tags value="h">Heading Tags</td>
<td><input type=checkbox name=search_tags value="c">Comment Tags</td>
<td><input type=checkbox name=search_tags value="e">Emphasized Text<td></tr>
</table>
<P>
<INPUT TYPE="submit" VALUE="Start Search">
<HR>
</FORM> 
</td>
</tr>   
</table>
</table>
</CENTER>
</body>
</html>
EOF
}

sub search_parse
# Run SWISH and parse output
{
#Initialize counter variable for number of results
$count=0;
    open(SWISH, "$swish -w $query -m $results $search_tags -f $index_path|")|| die "Cant open swish<br>$!";
		while (<SWISH>)
		{ 
		  # First, check to see if search produced an error
		   chop;
		   if ($_ eq "err: no results") 
		   {&search_error("There were no items that matched your search request");}
		
		   if ($_ eq "err: could not open index file") 
		   {&search_error("Could not open SWISH Index File $index");}
		   
		   if ($_ eq "err: no search words specified") 
		   {&search_error("Please Enter at least one Search Word");}
		
		   if ($_ eq "err: a word is too common") 
		 {&search_error("One of your search terms is too common, please try again");}
		  
		# Next Line ignores lines that begin with a non-digit
		  next unless /^\d/;
		  push(@results, $_);
			++$count;
		}

  print "<p>Your Search for <strong>$query</strong>, returned $count Items</p>\n";
  print "<center><table Border ='0' width=90%><tr><td><ol>\n";

foreach (@results)
{
 select(STDOUT);
 ($stringone, $title, $filesize) = split(/\"/, $_);
 ($rank, $url) = split(/ /, $stringone);
	open(IN,$url) || recover();
	 undef($content);
	 $go=0;$line=0;
	 while(<IN>){
	  	if (/[source|Date]\:/gi){$go =1};
	  	if ($go==1){
		  	last if length($content)>250 or $line ==100;
		  	++$line;
		  	$content .= $_;
		  	$content=~s/\<.*?\>//sgi;
		  	$content=~s/\<\/.*?\>//sgi;
				$content=~s/\n/ /sgi;
				$content=~s/\s\s/ /sgi;
		 }
	 }
	close(IN);
 $url =~ s/$httpRoot//i;

 print "<li><a href=\"$url\" target=\"_new\">$title</a><br>\n";
print "<i>".$content."</i>\n" unless $content=~ /[<|>]/sgi;
 print "<dd><font size=-1><b>Relevancy Score: &nbsp;&nbsp;&nbsp;  $rank  Size of Document: $filesize Bytes</b></font><p>\n";
}
print "</ul></td></tr></table></center><P>\n";
}

sub recover{
	$OpenFlag=0;
}

sub search_error
{
 &html_header(head1,"Your Search Results");
 $error_message = $_[0];
 print "$error_message\n";
 &html_trailer;
}

sub html_header
# This subroutine takes the document title as a command
# line parameter and adds header information to the top
# of the HTML document to be returned.

{
  print header(-type  =>  'text/html'),
  start_html(-title=>$_[1],
			-author=>'WebMistress@Fornits.com',
			-meta=>{'keywords'=>'keywords',
			'copyright'=>'copyright 1998 Fornits\' Workshop'},
	#		-bgcolor=>$Conf{'bgcolor'},
	#		-LINK=>$Conf{'link'},
	#		-ALINK=>$Conf{'alink'},
	#		-VLINK=>$Conf{'vlink'},
	#		-TEXT=>$Conf{'text'},
	#		-background=>"$imgpath/$Conf{'background'}"
	),$_[0];
print '<CENTER><table border=0 width=80%><tr><td>';

}


sub html_trailer
# This subroutine prints a suitable HTML trailer
{

print "<P>\n";
print "$organization<br>\n";
print "$department<P></body>\n";
print '</td></tr></table></CENTER>';
print "</body>\n</html>\n";

exit;
}

