# libwhisker v1.7
# libwhisker is a collection of routines used by whisker

#

# libwhisker copyright 2000,2001,2002 rfp.labs

#

# This program is free software; you can redistribute it and/or

# modify it under the terms of the GNU General Public License

# as published by the Free Software Foundation; either version 2

# of the License, or (at your option) any later version.

#

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

# GNU General Public License for more details.

# 

#

# More information can be found at http://www.wiretrip.net/rfp/

# Libwhisker mailing list and resources are also available at

# http://sourceforge.net/projects/whisker/

#



package LW;

use 5.004;

$LW::VERSION="1.7";


####### external module tests ###################################



BEGIN {



## LW module manager stuff ##



	%LW::available		= ();

	$LW::LW_HAS_SOCKET	= 0;

	$LW::LW_HAS_SSL		= 0;

	$LW::LW_SSL_LIB		= 0;

	$LW::LW_NONBLOCK_CONNECT= 0;



## binary helper - may contain functions substituted further down ##

        eval "use LW::bin"; # do we have libwhisker binary helpers?

        if($@){ $LW::available{'LW::bin'}=$LW::bin::VERSION; }



## encode subpackage ##

	eval "require MIME::Base64";

	if($@){

        	*encode_base64 = \&encode_base64_perl; 

	        *decode_base64 = \&decode_base64_perl; 

	} else{ 

		# MIME::Base64 typically has faster C versions

		$LW::available{'mime::base64'}=$MIME::Base64::VERSION;

        	*encode_base64 = \&MIME::Base64::encode_base64;

        	*decode_base64 = \&MIME::Base64::decode_base64;}



## md5 subpackage ##

	eval "require MD5";

	if(!$@){ $LW::available{'md5'}=$MD5::VERSION;}



## http subpackage ##

        eval "use Socket"; # do we have socket support?

        if($@){ $LW::LW_HAS_SOCKET=0; }

        else { $LW::LW_HAS_SOCKET=1;

                $LW::available{'socket'}=$Socket::VERSION;}



    if($LW_HAS_SOCKET){

	eval "use Net::SSLeay"; # do we have SSL support?

        if($@){ $LW::LW_HAS_SSL=0; }

        else { $LW::LW_HAS_SSL=1;

                $LW::LW_SSL_LIB=1;

                $LW::available{'net::ssleay'}=$Net::SSLeay::VERSION;

                Net::SSLeay::load_error_strings();

                Net::SSLeay::SSLeay_add_ssl_algorithms();

                Net::SSLeay::randomize();}

        if(!$LW::LW_HAS_SSL){

                eval "use Net::SSL"; # different SSL lib

                if($@){ $LW::LW_HAS_SSL=0; }

                else { $LW::LW_HAS_SSL=1;

                        $LW::LW_SSL_LIB=2;

                        $LW::available{'net::ssl'}=$Net::SSL::VERSION;}

        }



## non-blocking IO ##



	if($^O!~/Win32/){

		eval "use POSIX qw(:errno_h :fcntl_h)"; # better

		if(!$@){

			$LW::LW_NONBLOCK_CONNECT=1;

		}

	}



    } # if($LW_HAS_SOCKET)



} # BEGIN



####### package variables #######################################



## crawl subpackage ##

	%LW::crawl_config=(	'save_cookies'	=> 0,

				'reuse_cookies'	=> 1,

				'save_offsites'	=> 0,

				'follow_moves'	=> 1,

				'url_limit'	=> 1000,

				'use_params'	=> 0,

				'params_double_record' => 0,

				'skip_ext'	=> '.gif .jpg .gz .mp3 .swf .zip ',

				'save_skipped'	=> 0,

				'save_referrers'=> 0,

				'do_head'	=> 0,

				'callback'	=> 0,

				'slashdot_bug'	=> 1,

				'normalize_uri'	=> 1,

				'source_callback' => 0

			);





	@LW::crawl_urls=();;

	%LW::crawl_server_tags=();

	%LW::crawl_referrers=();

	%LW::crawl_offsites=();

	%LW::crawl_cookies=();

	%LW::crawl_forms=();

	%LW::crawl_temp=();



	# this idea/structure was taken from HTML::LinkExtor.pm,

	# copyright 2000 Gisle Aas and Michael A. Chase

	%LW::crawl_linktags = (

		 'a'       => 'href',

		 'applet'  => [qw(codebase archive code)],

		 'area'    => 'href',

		 'base'    => 'href',

		 'bgsound' => 'src',

		 'blockquote' => 'cite',

		 'body'    => 'background',

		 'del'     => 'cite',

		 'embed'   => [qw(src pluginspage)],

		 'form'    => 'action',

		 'frame'   => [qw(src longdesc)],

		 'iframe'  => [qw(src longdesc)],

		 'ilayer'  => 'background',

		 'img'     => [qw(src lowsrc longdesc usemap)],

		 'input'   => [qw(src usemap)],

		 'ins'     => 'cite',

		 'isindex' => 'action',

		 'head'    => 'profile',

		 'layer'   => [qw(background src)],

		 'link'    => 'href',

		 'object'  => [qw(codebase data archive usemap)],

		 'q'       => 'cite',

		 'script'  => 'src',

		 'table'   => 'background',

		 'td'      => 'background',

		 'th'      => 'background',

		 'xmp'     => 'href',

	);





## forms subpackage ##

	@LW::forms_found=();

	%LW::forms_current=();





## http subpackage ##

	my $SOCKSTATE=0;

	my $TIMEOUT=10; # default

	my ($STATS_REQS,$STATS_SYNS)=(0,0);

	my ($LAST_HOST,$LAST_INET_ATON,$LAST_SSL)=('','',0);

	my ($OUTGOING_QUEUE,$INCOMING_QUEUE)=('','');

	my ($SSL_CTX, $SSL_THINGY);



	my %http_host_cache=();

	# order is following:

	# [0] - SOCKET

	# [1] - $SOCKSTATE

	# [2] - INET_ATON

	# [3] - $SSL_CTX

	# [4] - $SSL_THINGY

	# [5] - $OUTGOING_QUEUE

	# [6] - $INCOMING_QUEUE

	# [7] - $STATS_SYNS

	# [8] - $STATS_REQS



	my $Z; # array ref to current host specs



=pod





=head1 ++ Sub package: anti-ids



The anti-ids sub package implements management routines for various

rewriting/encoding in order to evade intrusion detection systems.



=cut



########################################################################



=pod



=head1 - Function: LW::anti_ids



Params: \%hin, $modes

Return: nothing



LW::anti_ids computes the proper anti-ids encoding/tricks specified by

$modes, and sets up %hin in order to use those tricks.  Valid modes

are (the mode numbers are the same as those found in whisker 1.4):



1 -	Encode some of the characters via normal URL encoding

2 -	Insert directory self-references (/./)

3 -	Premature URL ending (make it appear the request line is done)

4 -	Prepend a long random string in the form of "/string/../URL"

5 -	Add a fake URL parameter

6 -	Use a tab instead of a space as a request spacer

7 -	Change the case of the URL around (works against Windows and Novell)

8 -	Change normal seperators ('/') to Windows version ('\')

9 -	Session splicing (sending data in multiple packets)



You can set multiple modes by setting the string to contain all the modes

desired; i.e. $modes="146" will use modes 1, 4, and 6.



=cut





sub anti_ids {

	my ($rhin,$modes)=(shift,shift);

	my (@T,$x,$c,$s,$y);

	my $ENCODED=0;

	my $W = $$rhin{'whisker'};



	return if(!(defined $rhin && ref($rhin)));



	# in case they didn't do it already

	$$rhin{'whisker'}->{'uri_orig'}=$$rhin{'whisker'}->{'uri'};



	# note: order is important!



	# mode 9 - session splicing

	if($modes=~/9/){

		$$rhin{'whisker'}->{'ids_session_splice'}=1;

	}



	# mode 4 - prepend long random string

	if($modes=~/4/){$s='';

		if($$W{'uri'}=~m#^/#){

			$y=&utils_randstr;

			$s.=$y while(length($s)<512);

			$$W{'uri'}="/$s/..".$$W{'uri'};

		}

	}



	# mode 7  - (windows) random case sensitivity

	if($modes=~/7/){ 

		@T=split(//,$$W{'uri'});

		for($x=0;$x<(scalar @T);$x++){

			if((rand()*2)%2 == 1){

				$T[$x]=uc($T[$x]);}}

		$$W{'uri'}=join('',@T);

	}



	# mode 2 - directory self-reference (/./)

	if($modes=~/2/){

		$$W{'uri'}=~s#/#/./#g;

	}





	# mode 8 - windows directory separator (\)

	if($modes=~/8/){

		$$W{'uri'}=~s#/#\\#g;

		$$W{'uri'}=~s#^\\#/#;

		$$W{'uri'}=~s#^(http|file|ftp|nntp|news|telnet):\\#$1://#;

		$$W{'uri'}=~s#\\$#/#;

	}



	# mode 1 - random URI (non-UTF8) encoding

	if($modes=~/1/){

		if($ENCODED==0){

			$$W{'uri'}=encode_str2ruri($$W{'uri'});

		$ENCODED=1;}

	}	





	# mode 5 - fake parameter

	if($modes=~/5/){ 

		($s,$y)=(&utils_randstr,&utils_randstr); 

		$$W{'uri'}="/$s.html%3f$y=/../$$W{'uri'}";

	}



	# mode 3 - premature URL ending

	if($modes=~/3/){ 

		$s=&utils_randstr;

		$$W{'uri'}="/%20HTTP/1.1%0D%0A%0D%0AAccept%3A%20$s/../..$$W{'uri'}";

	}

	

	# mode 6 - TAB as request spacer

	if($modes=~/6/){

		$$W{'req_spacer'}="\t";

	}	



} # end anti_ids









=pod    





=head1 ++ Sub package: auth

        

The auth sub package implements HTTP authentication routines.



=cut



########################################################################



=pod    



=head1 - Function: LW::auth_brute_force

        

Params: $auth_method, \%hin, $user, \@passwords [, $domain]

Return: $first_valid_password, undef if error/none found



Perform a HTTP authentication brute force against a server (host and URI 

defined in %hin).  It will try every password in the password array for 

the given user.  The first password (in conjunction with the given user) 

that doesn't return HTTP 401 is returned (and the brute force is stopped 

at that point).  $domain is optional, and is only used for NTLM auth.



=cut



sub auth_brute_force {

 my ($auth_method, $hrin, $user, $pwordref, $dom)=@_;

 my ($P,%hout);



 return undef if(!defined $auth_method || length($auth_method)==0);

 return undef if(!defined $user        || length($user)       ==0);

 return undef if(!(defined $hrin     && ref($hrin)    ));

 return undef if(!(defined $pwordref && ref($pwordref)));



 map {

    ($P=$_)=~tr/\r\n//d;

    auth_set_header($auth_method,$hrin,$user,$P,$dom);

    return undef if(http_do_request($hrin,\%hout));

    return $P if($hout{'whisker'}->{'http_resp'} ne 401);

 } @$pwordref;



 return undef;}





########################################################################



=pod



=head1 - Function: LW::auth_set_header



Params: $auth_method, \%hin, $user, $password [, $domain]

Return: nothing (modifies %hin)



Set the appropriate authentication header in %hin.



NOTE: right now only BASIC and NTLM are supported.



=cut



sub auth_set_header {

 my ($method, $href, $user, $pass, $domain)=(lc(shift),@_);



 return if(!(defined $href && ref($href)));

 return if(!defined $user || !defined $pass);



 if($method eq 'basic'){

	$$href{'Authorization'}='Basic '.encode_base64($user.':'.$pass,'');

 }



 if($method eq 'proxy-basic'){

	$$href{'Proxy-Authorization'}='Basic '.encode_base64($user.':'.$pass,'');

 }



 if($method eq 'ntlm'){

	my $o=ntlm_new($user,$pass,$domain);

	$$href{'whisker'}->{'ntlm_obj'}=$o;

	$$href{'whisker'}->{'ntlm_step'}=0;

	$$href{'Authorization'}='NTLM '.ntlm_client($o);

 }



}





########################################################################



=pod



=head1 - Function: LW::do_auth



Params: $auth_method, \%hin, $user, $password [, $domain]

Return: nothing (modifies %hin)



This is an alias for auth_set_header().



=cut



sub do_auth {

	goto &auth_set_header;

}



=pod    



=head1 ++ Sub package: bruteurl



The bruteurl sub package is used to perform a brute-force of HTTP 

requests on an array of string components.



=cut





=pod    



=head1 - Function: LW::bruteurl



Params: \%hin, $pre, $post, \@values_in, \@values_out

Return: Nothing (adds to @out)

        

Bruteurl will perform a brute force against the host/server specified in

%hin.  However, it will make one request per entry in @in, taking the

value and setting $hin{'whisker'}->{'uri'}= $pre.value.$post.  Any URI

responding with an HTTP 200 or 403 response is pushed into @out.  An

example of this would be to brute force usernames, putting a list of

common usernames in @in, setting $pre='/~' and $post='/'.



=cut

sub bruteurl {

 my ($hin, $upre, $upost, $arin, $arout)=@_;

 my ($U,%hout);



 return if(!(defined $hin   && ref($hin)  ));

 return if(!(defined $arin  && ref($arin) ));

 return if(!(defined $arout && ref($arout)));

 return if(!defined $upre  || length($upre) ==0);

 return if(!defined $upost || length($upost)==0);



 http_fixup_request($hin);



 map {

  ($U=$_)=~tr/\r\n//d; next if($U eq '');

  if(!http_do_request($hin,\%hout,{'uri'=>$upre.$U.$upost})){

    if(	$hout{'whisker'}->{'http_resp'}==200 ||

	$hout{'whisker'}->{'http_resp'}==403){

	push(@{$arout},$U);

    }

  }

 } @$arin;

}





=pod    



=head1 ++ Sub package: cookie

        

Cookie handling functions.



Cookies are stored in a "jar" (hash), indexed by cookie name.  The 

contents are an anonymous array:



$jar{'name'}=@( 'value', 'domain', 'path', 'expire', 'secure' )



=cut



########################################################################



=pod    



=head1 - Function: LW::cookie_read

     

Params: \%jar, \%hout

Return: $num_of_cookies_read



Read in cookies from an %hout hash (HTTP response), and put them in %jar.



=cut



sub cookie_read {

 my ($count,$jarref,$href)=(0,@_);



 return 0 if(!(defined $jarref && ref($jarref)));

 return 0 if(!(defined $href   && ref($href)  ));



 my $target = utils_find_lowercase_key($href,'set-cookie');



 if(!defined $target){

	return 0;}



 if(ref($target)){ # multiple headers

	foreach (@{$target}){

		cookie_parse($jarref,$_);

		$count++; }

 } else { # single header

	cookie_parse($jarref,$target);

	$count=1; }



 return $count;

}





########################################################################



=pod    



=head1 - Function: LW::cookie_parse

     

Params: \%jar, $cookie

Return: nothing



Parses the cookie into the various parts and then sets the appropriate 

values in the %jar under the name; if the cookie is blank, it will delete 

it from the jar.



=cut



sub cookie_parse {

 my ($jarref, $header)=@_;

 my ($del,$part,@parts,@construct,$cookie_name)=(0);



 return if(!(defined $jarref && ref($jarref)));

 return if(!(defined $header && length($header)>0));



 @parts=split(/;/,$header);



 foreach $part (@parts){

	if($part=~/^[ \t]*(.+?)=(.*)$/){

		my ($name,$val)=($1,$2);

		if($name=~/^domain$/i){		

			$val=~s#^http://##;

			$val=~s#/.*$##;

			$construct[1]=$val;

		} elsif($name=~/^path$/i){

			$val=~s#/$## if($val ne '/');

			$construct[2]=$val;

		} elsif($name=~/^expires$/i){

			$construct[3]=$val;

		} else {

			$cookie_name=$name;

			if($val eq ''){		$del=1;

			} else {		$construct[0]=$val;}

		}	

	} else {

		if($part=~/secure/){

			$construct[4]=1;}

 }	}



 if($del){

  	delete $$jarref{$cookie_name} if defined $$jarref{$cookie_name};

 } else {

	$$jarref{$cookie_name}=\@construct;

 }

}





########################################################################



=pod    



=head1 - Function: LW::cookie_write

     

Params: \%jar, \%hin, $override

Return: nothing



Goes through the given jar and sets the Cookie header in %hin pending the 

correct domain and path.  If $override is true, then the domain and path

restrictions of the cookies are ignored.



Todo: factor in expire and secure.



=cut



sub cookie_write {

 my ($jarref, $hin, $override)=@_;

 my ($name,$out)=('','');



 return if(!(defined $jarref && ref($jarref)));

 return if(!(defined $hin    && ref($hin)   ));



 $override=$override||0;

 $$hin{'whisker'}->{'ssl'}=$$hin{'whisker'}->{'ssl'}||0;



 foreach $name (keys %$jarref){

	next if($name eq '');

	next if($$hin{'whisker'}->{'ssl'}==0 && $$jarref{$name}->[4]>0);

	if($override || 

          ($$hin{'whisker'}->{'host'}=~/$$jarref{$name}->[1]$/i &&

	   $$hin{'whisker'}->{'uri'}=~/$$jarref{$name}->[2]/i)){

		$out.="$name=$$jarref{$name}->[0];";

 }	}



 if($out ne ''){ $$hin{'Cookie'}=$out; }



}





########################################################################



=pod    



=head1 - Function: LW::cookie_get

     

Params: \%jar, $name

Return: @elements



Fetch the named cookie from the jar, and return the components.



=cut



sub cookie_get {

 my ($jarref,$name)=@_;



 return undef if(!(defined $jarref && ref($jarref)));



 if(defined $$jarref{$name}){

	return @{$$jarref{$name}};}



 return undef;

}





########################################################################



=pod    



=head1 - Function: LW::cookie_set

     

Params: \%jar, $name, $value, $domain, $path, $expire, $secure

Return: nothing



Set the named cookie with the provided values into the %jar.



=cut



sub cookie_set {

 my ($jarref,$name,$value,$domain,$path,$expire,$secure)=@_;

 my @construct;



 return if(!(defined $jarref && ref($jarref)));



 return if($name eq '');

 if($value eq ''){

	delete $$jarref{$name};

	return;}

 $path=$path||'/';

 $secure=$secure||0;



 @construct=($value,$domain,$path,$expire,$secure);

 $$jarref{$name}=\@construct; 

}





########################################################################





=pod



=head1 ++ Sub package: crawl



Used for crawling a website by requesting a (start) page, reading the

HTML, extracting the links, and then requesting those links--up to a

specified depth.  The module also allows various configuration tweaks to

do such things as monitor requests for offsite URLs (pages on other

hosts), track various cookies, etc.



=cut



#####################################################



=pod



=head1 - Function: LW::crawl

  

Params: $START, $MAX_DEPTH, \%tracking, \%hin

Return: Nothing



The heart of the crawl package.  Will perform an HTTP crawl on the

specified HOST, starting at START URI, proceeding up to MAX_DEPTH.  A

tracking hash reference (required) stores the results of each page (and

ongoing progress).  The http_in_options hash reference specifies a

standard HTTP hash for use in the outgoing HTTP requests.  Certain options

are configurable via LW::crawl_set_config().  The tracking hash will

contain all the pages visited; you can get the crawl engine to skip pages

by placing them in the tracking hash ahead of time.



START (first) parameter should be of the form "http://www.host.com/url".



=cut



sub crawl {

 my ($START, $MAX_DEPTH, $hrtrack, $hrin)=@_;

 my (%hout, %jar);

 my ($T, @ST, @links, @tlinks, @vals, @ERRORS)=('');



 return if(!(defined $hrtrack && ref($hrtrack)));

 return if(!(defined $hrin    && ref($hrin)   )); 

 return if(!defined $START || length($START)==0);



 $MAX_DEPTH||=2;



 # $ST[0]=HOST  $ST[1]=URL  $ST[2]=CWD  $ST[3]=HTTPS  $ST[4]=SERVER

 # $ST[5]=PORT  $ST[6]=DEPTH



 @vals=utils_split_uri($START);

 $ST[1]=$vals[0]; 	# uri

 $ST[0]=$vals[2]; 	# host

 $ST[5]=$vals[3]; 	# port

 $ST[4]=undef;		# server tag



 return if($ST[0] eq '');



 # some various informationz...

 $LW::crawl_config{'host'}=$ST[0];

 $LW::crawl_config{'port'}=$ST[5];

 $LW::crawl_config{'start'}=$ST[1];



 $$hrin{'whisker'}->{'host'}=$ST[0];

 $$hrin{'whisker'}->{'port'}=$ST[5];

 $$hrin{'whisker'}->{'lowercase_incoming_headers'}=1; # makes life easier



 http_fixup_request($hrin);



 # this is so callbacks can access internals via references

 $LW::crawl_config{'ref_links'}=\@links;

 $LW::crawl_config{'ref_jar'}=\%jar;

 $LW::crawl_config{'ref_hin'}=$hrin;

 $LW::crawl_config{'ref_hout'}=\%hout;



 %LW::crawl_referrers=(); # empty out existing referrers

 %LW::crawl_server_tags=();

 %LW::crawl_offsites=();

 %LW::crawl_cookies=();

 %LW::crawl_forms=();



 push @links, \@{[$ST[1],1,($vals[1] eq 'https')?1:0]};



 while(@links){

  my $C=shift @links;

  $ST[1]=$C->[0]; # url

  $ST[6]=$C->[1]; # depth

  $ST[3]=$C->[2]; # https



  next if(defined $$hrtrack{$ST[1]} && $$hrtrack{$ST[1]} ne '?');



  if($ST[6] > $MAX_DEPTH){

	$$hrtrack{$ST[1]}='?' if($LW::crawl_config{'save_skipped'}>0);

	next;

  }



  $ST[2]=utils_get_dir($ST[1]);



  $$hrin{'whisker'}->{'uri'}=$ST[1];

  $$hrin{'whisker'}->{'ssl'}=$ST[3];

  my $result = crawl_do_request($hrin,\%hout);

  if($result==1 || $result==2){

	push @ERRORS, "Error on making request for '$ST[1]': $hout{'whisker'}->{'error'}";

	next;

  }



  if($result==0 || $result==4){

	$$hrtrack{$ST[1]}=$hout{'whisker'}->{'http_resp'}; }

  

  if($result==3 || $result==5){

	$$hrtrack{$ST[1]}='?' if($LW::crawl_config{'save_skipped'}>0); }



  if(defined $hout{'server'}){ 

   if(!defined $ST[4]){ # server tag

	$ST[4]=$hout{'server'}; }

   $LW::crawl_server_tags{$hout{'server'}}++;

  }



  if(defined $hout{'set-cookie'}){

		if($LW::crawl_config{'save_cookies'}>0){

			if(ref($hout{'set-cookie'})){

				foreach (@{$hout{'set-cookie'}}){

					$LW::crawl_cookies{$_}++; }

			} else {

				$LW::crawl_cookies{$hout{'set-cookie'}}++; 

		}	}



		if($LW::crawl_config{'reuse_cookies'}>0){

			cookie_read(\%jar,\%hout); }

  }





  next if($result==4 || $result==5);  

  next if(scalar @links > $LW::crawl_config{'url_limit'});



  if($result==0){ # page should be parsed

	if($LW::crawl_config{'source_callback'} != 0  &&

		ref($LW::crawl_config{'source_callback'})){

		&{$LW::crawl_config{'source_callback'}}($hrin,\%hout); }



	LW::html_find_tags(\$hout{'whisker'}->{'data'},

		\&crawl_extract_links_test);

	$LW::crawl_config{'stats_html'}++; # count how many pages we've parsed

  }



  if($result==3){ # follow the move via location header

	push @LW::crawl_urls, $hout{'location'}; }



  foreach $T (@LW::crawl_urls){

	 $T=~tr/\0\r\n//d; # the NULL character is a bug that's somewhere

	 next if (length($T)==0);

	 next if ($T=~/^javascript:/i); # stupid javascript

	 next if ($T=~/^mailto:/i);

	 next if ($T=~m#^([a-zA-Z]*)://# && lc($1) ne 'http' && lc($1) ne 'https');

	 next if ($T=~/^#/i); # fragment



	 if($LW::crawl_config{'callback'} != 0){

		next if &{$LW::crawl_config{'callback'}}($T,@ST); }



	 push(@{$LW::crawl_referrers{$T}}, $ST[1]) 

		if( $LW::crawl_config{'save_referrers'}>0 );



	 $T=utils_absolute_uri($T,$ST[1],1) if($LW::crawl_config{'normalize_uri'}>0);

	 @vals=utils_split_uri($T);



	 # slashdot bug: workaround for the following fsck'd html code:

	 # <FORM ACTION="//slashdot.org/users.pl" METHOD="GET">

	 if($LW::crawl_config{'slashdot_bug'} > 0 && 

			substr($vals[0],0,2) eq '//'){

		if($ST[3]==1){	$T='https:'.$T;

		} else {	$T='http:' .$T; }

		@vals=utils_split_uri($T);

	 }



	 # make sure URL is on same host, port, and protocol

	 if( (defined $vals[2] && $vals[2] ne $ST[0]) || 

			(defined $vals[3] && $vals[3] != $ST[5]) ||

			(defined $vals[1] && ($vals[1] ne 'http' 

				&& $vals[1] ne 'https'))){

		if($LW::crawl_config{'save_offsites'}>0){

			$LW::crawl_offsites{utils_join_uri(@vals)}++; }

		next; }



	 if(substr($vals[0],0,1) ne '/'){

		$vals[0]=$ST[2].$vals[0]; }



	 my $where=rindex($vals[0],'.');

	 my $EXT='';

	 if($where >= 0){

	   $EXT = substr($vals[0], $where+1, length($vals[0])-$where); }



	 $EXT=~tr/0-9a-zA-Z//cd; # yucky chars will puke regex below



	 if($EXT ne '' && $LW::crawl_config{'skip_ext'}=~/\.$EXT /i){

		if($LW::crawl_config{'save_skipped'}>0){

			$$hrtrack{$vals[0]}='?'; }

	 	next; }



	 if(defined $vals[4] && $LW::crawl_config{'use_params'}>0){

		if($LW::crawl_config{'params_double_record'}>0 &&

				!defined $$hrtrack{$vals[0]}){

			$$hrtrack{$vals[0]}='?'; }

		$vals[0]=$vals[0].'?'.$vals[4];	

	 }



	 next if(defined $$hrtrack{$vals[0]});



	 push @links, \@{[$vals[0],$ST[6]+1, ($vals[1] eq 'https')?1:0]};



  } # foreach



  @LW::crawl_urls=(); # reset for next round

 } # while



 my $key;

 foreach $key (keys %LW::crawl_config){

 	delete $LW::crawl_config{$key} if (substr($key,0,4) eq 'ref_');}



 $LW::crawl_config{'stats_reqs'}=$hout{'whisker'}->{'stats_reqs'};

 $LW::crawl_config{'stats_syns'}=$hout{'whisker'}->{'stats_syns'};



} # end sub crawl



#####################################################



=pod



=head1 - Function: LW::crawl_get_config

  

Params: $config_directive

Return: $config_directive_value



Returns the set value of the submitted config_directive.  See

LW::crawl_set_config() for a list of configuration values.



=cut



sub crawl_get_config {

	my $key=shift;

	return $LW::crawl_config{$key};

}



#####################################################



=pod



=head1 - Function: LW::crawl_set_config

  

Params: $config_directive, $value

Return: Nothing



This function adjusts the configuration of the crawl package. Use values

of 0 and 1 for off and on, respectively.  The defaults are set in 

libs/globals.wpl.



save_cookies

- crawl will save all cookies encountered, for later review



save_offsite_urls

- crawl will save all offsite URLs (URLs not on this host); crawl

  will not actually crawl those hosts (use separate calls to crawl)



follow_moves

- crawl will follow the URL received from an HTTP move response



use_params

- crawl will factor in URI parameters when considering if a URI is unique 

  or not



params_double_record

- if both use_params and params_double_record are set, crawl will make two

  entries for each URI which has paramaters: one with and one without the

  parameters



reuse_cookies

- crawl will resubmit any received/prior cookies



skip_ext

- crawl will ignore requests for URLs ending in extensions given; the 

  value requires a specific string format: (dot)extension(space).  For

  example, to ignore GIFs and JPGs, you would run:

 	LW::crawl_set_config('skip_ext',".gif .jpg ");



save_skipped

- any URLs that are skipped via skip_ext, or are above the specified DEPTH 

  will be recorded in the tracking hash with a value of '?' (instead of an

  HTTP response code).



callback

- crawl will call this function (if this is a reference to a function), 

  passing it the current URI and the @ST array (which has host, port, SSL, 

  etc info).  If the function returns a TRUE value, then crawl will skip

  that URI.  Set to value 0 (zero) if you do not want to use a callback.



slashdot_bug

- slashdot.org uses a screwy piece of invalid (yet it works) HTML in

  the form of <FORM ACTION="//slashdot.org/somefile">.  So basically,

  when a URL starts with '//' and slashdot_bug is set to 1 (which it

  is by default), then the proper 'http:' or 'https:' will be prepended

  to the URL.



source_callback

- crawl will call this function (if this is a reference to a function), 

  passing references to %hin and %hout, right before it parses the page

  for HTML links.  This allows the callback function to review or

  modify the HTML before it's parsed for links.  Return value is ignored.

  

url_limit

- number or URLs that crawl will queue up at one time; defaults to 1000



do_head

- use head requests to determine if a file has a content-type worth

  downloading.  Potentially saves some time, assuming the server properly

  supports HEAD requests.  Set to value 1 to use (0/off by default).





=cut



sub crawl_set_config {

	return if(!defined $_[0]);

	my %opts=@_;

	while( my($k,$v)=each %opts){

		$LW::crawl_config{lc($k)}=$v; }

}



#####################################################



=pod



=head1 - Function: LW::crawl_extract_links_test (INTERNAL)

  

Params: $TAG, \%elements, \$html_data, $offset, $len

Return: nothing



This is the callback function used by the crawl function, and passed to 

html_find_tags.  It will find URL/URI links and place them in 

@LW::crawl_urls.



=cut



sub crawl_extract_links_test {

	my ($TAG, $hr, $dr, $start, $len)=(lc(shift),@_);

	my $t;



	# this should be most of the time...

	return undef if(!defined ($t=$LW::crawl_linktags{$TAG}));

	return undef if(!scalar %$hr); # fastpath quickie



	while( my ($key,$val)= each %$hr){ # normalize element values

		$$hr{lc($key)} = $val;

	}



	if(ref($t)){

		foreach (@$t){

			push(@LW::crawl_urls,$$hr{$_}) if(defined $$hr{$_});

		}

	} else {

		push(@LW::crawl_urls,$$hr{$t}) if(defined $$hr{$t});

	}



	if($TAG eq 'form' && defined $$hr{action}){

		my $u=$LW::crawl_config{'ref_hout'}->{'whisker'}->{'uri'};

		$LW::crawl_forms{utils_absolute_uri($$hr{action},$u,1)}++;

	}



	return undef;

}



################################################################



=pod



=head1 - Function: LW::crawl_make_request (INTERNAL)

  

Params: \%hin, \%hout

Return: $status_code



This is an internal function used by LW::crawl(), and is responsible for

making HTTP requests, including any HEAD pre-requests and following move

responses.  Status codes are:

	0	Success

	1	Error during request

	2	Error on connection setup

	3	Move request; follow Location header

	4	File not of text/htm(l) type

	5	File not available



=cut



sub crawl_do_request {

 my ($hrin,$hrout) = @_;

 my $ret;



 if($LW::crawl_config{'do_head'}){  

	my $save=$$hrin{'whisker'}->{'method'};

	$$hrin{'whisker'}->{'method'}='HEAD';

	$ret=http_do_request($hrin,$hrout);

	$$hrin{'whisker'}->{'method'}=$save;



	return 2 if($ret==2); # if there was connection error, do not continue

	if($ret==0){ # successful request

	    	if($$hrout{'whisker'}->{'http_resp'}==501){ # HEAD not allowed

    			$LW::crawl_config{'do_head'}=0; # no more HEAD requests

	    	}



		if($$hrout{'whisker'}->{'http_resp'} <308 &&

				$$hrout{'whisker'}->{'http_resp'} >300){

			if($LW::crawl_config{'follow_moves'} >0){

				return 3 if(defined $$hrout{'location'}); }

			return 5; # not avail

		}



		if($$hrout{'whisker'}->{'http_resp'}==200){

			# no content-type is treated as text/htm

			if(defined $$hrout{'content-type'} &&

					$$hrout{'content-type'}!~/^text\/htm/i){

				return 4;

			}		

			# fall through to GET request below			

		}

    	}

	# request errors are essentially redone via GET, below

  }



 return http_do_request($hrin,$hrout);

}



#####################################################



=pod



=head1 ++ Sub package: dump



The dump subpackage contains various utility functions which emulate

the basic functionality provided by Data::Dumper.



=cut



########################################################################



=pod



=head1 - Function: LW::dumper

  

Params: $name, \@array [, $name, \%hash, $name, \$scalar ]

Return: $code, undef on error



The dumper function will take the given $name and data reference, and

will create an ASCII perl code representation suitable for eval'ing

later to recreate the same structure.  $name is the name of the variable

that it will be saved as.  Example:



	$output = LW::dumper('hin',\%hin);



NOTE: dumper() creates anonymous structures under the name given.  For

example, if you dump the hash %hin under the name 'hin', then when you

eval the dumped code you will need to use %$hin, since $hin is now a

*reference* to a hash.



=cut



sub dumper {

	my %what=@_;

	my ($final,$k,$v)=('');

	while( ($k,$v)=each %what){

		return undef if(ref($k) || !ref($v));

		$final.="\$$k = "._dump(1,$v,1);

		$final=~s#,\n$##;

		$final.=";\n"; }

	return $final;

}



########################################################################



=pod



=head1 - Function: LW::dumper_writefile

  

Params: $file, $name, \@array [, $name, \%hash, $name, \@scalar ]

Return: 0 if success; 1 if error



This calls dumper() and saves the output to the specified $file.  



Note: LW does not checking on the validity of the file name, it's

creation, or anything of the sort.  Files are opened in overwrite

mode.



=cut



sub dumper_writefile {

	my $file=shift;

	my $output=dumper(@_);

	return 1 if(!open(OUT,">$file") || $output eq 'ERROR');

	print OUT $output;

	close(OUT);

}



########################################################################



=pod



=head1 - Function: LW::_dump (INTERNAL)

   

Params: $tabs, $ref

Return: $output



This is an internal function to dumper() which will dereference all

elements and produce the resulting code.



This function is not intended for external use.



=cut



sub _dump { # dereference and dump an element

	my ($t, $ref, $depth)=@_;

	my ($out,$k,$v)=('');

	$depth||=1;



	# to protect against circular loops

	return 'undef' if($depth > 128);



	if(!defined $ref){

		return 'undef';

	} elsif(ref($ref) eq 'HASH'){

		$out.="{\n";

		while( ($k,$v)=each %$ref){

			next if($k eq '');

			$out.= "\t"x$t;

			$out.=_dumpd($k).' => ';

			if(ref($v)){ $out.=_dump($t+1,$v,$depth+1); }

			else { $out.=_dumpd($v); }

			$out.=",\n" unless( substr($out,-2,2) eq ",\n");

		}

		$out=~s#,\n$#\n#;

		$out.="\t"x($t-1);

		$out.="},\n";

	} elsif(ref($ref) eq 'ARRAY'){

		$out.="[";

		if(~~@$ref){

			$out.="\n";

			foreach $v (@$ref) {

				$out.= "\t"x$t;

				if(ref($v)){ $out.=_dump($t+1,$v,$depth+1); }

				else {       $out.=_dumpd($v); }

				$out.=",\n" unless( substr($out,-2,2) eq ",\n");

			}

			$out=~s#,\n$#\n#;

			$out.="\t"x($t-1);

		}

		$out.="],\n";

	} elsif(ref($ref) eq 'SCALAR'){

		$out.=_dumpd($$ref);

	} elsif(ref($ref) eq 'REF'){

		$out.=_dump($t,$$ref,$depth+1);

	} elsif(ref($ref)){ # unknown/unsupported ref

		$out.="undef";

	} else { # normal scalar

		$out.=_dumpd($ref);

	}

	return $out;

}





########################################################################



=pod



=head1 - Function: LW::_dumpd (INTERNAL)

   

Params: $string

Return: $escaped_string



This is an internal function to dumper() which will escape the given

string to make it suitable for printing.



This function is not intended for external use.



=cut



sub _dumpd { # escape a scalar string

	my $v=shift;

	return 'undef' if(!defined $v);

	return "''" if($v eq '');

	return "$v" if($v!~tr/0-9//c);

	return "'$v'" if($v!~tr/ !-~//c);

	$v=~s#\\#\\\\#g;	$v=~s#"#\\"#g;

	$v=~s#\r#\\r#g;		$v=~s#\n#\\n#g;

	$v=~s#\0#\\0#g;		$v=~s#\t#\\t#g;

	$v=~s#([^!-~ ])#sprintf('\\x%02x',ord($1))#eg;

	return "\"$v\"";

}



########################################################################

=pod



=head1 ++ Sub package: easy



The 'easy' subpackage contains many high-level/simple functions to

do basic web tasks.  This should make it easier to use libwhisker

to do basic tasks.



=cut



########################################################################



=pod



=head1 - Function: LW::get_page

  

Params: $url [, \%hin_request]

Return: $code, $data ($code will be set to undef on error, $data will

			contain error message)



This function will fetch the page at the given URL, and return the HTTP response code

and page contents.  Use this in the form of:

($code,$html)=LW::get_page("http://host.com/page.html")



The optional %hin_request will be used if supplied.  This allows you to set

headers and other parameters.



=cut



sub get_page {

	my ($URL,$hr)=(shift,shift);

	return (undef,"No URL supplied") if(length($URL)==0);



	my (%req,%resp);

	my $rptr;



	if(defined $hr && ref($hr)){

		$rptr=$hr;

	} else {

		$rptr=\%req;

		LW::http_init_request(\%req);

	}



	LW::utils_split_uri($URL,$rptr); # this is newer >=1.1 syntax

	LW::http_fixup_request($rptr);



	if(http_do_request($rptr,\%resp)){

		return (undef,$resp{'whisker'}->{'error'});

	}



	return ($resp{'whisker'}->{'code'}, $resp{'whisker'}->{'data'});

}



########################################################################



=pod



=head1 - Function: LW::get_page_hash

  

Params: $url [, \%hin_request]

Return: $hash_ref (undef on no URL)



This function will fetch the page at the given URL, and return the whisker

HTTP response hash.  The return code of the function is set to

$hash_ref->{whisker}->{get_page_hash}, and uses the LW::http_do_request()

response values.



Note: undef is returned if no URL is supplied



=cut



sub get_page_hash {

	my ($URL,$hr)=(shift,shift);

	return undef if(length($URL)==0);



	my (%req,%resp);

	my $rptr;



	if(defined $hr && ref($hr)){

		$rptr=$hr;

	} else {

		$rptr=\%req;

		LW::http_init_request(\%req);

	}



	LW::utils_split_uri($URL,$rptr); # this is newer >=1.1 syntax

	LW::http_fixup_request($rptr);



	my $r=http_do_request($rptr,\%resp);

	$resp{whisker}->{get_page_hash}=$r;



	return \%resp;

}



########################################################################



=pod



=head1 - Function: LW::get_page_to_file

  

Params: $url, $filepath [, \%hin_request]

Return: $code ($code will be set to undef on error)



This function will fetch the page at the given URL, place the resulting HTML

in the file specified, and return the HTTP response code.  The optional

%hin_request hash sets the default parameters to be used in the request.



NOTE: libwhisker does not do any file checking; libwhisker will open the

supplied filepath for writing, overwriting any previously-existing files.

Libwhisker does not differentiate between a bad request, and a bad file

open.  If you're having troubles making this function work, make sure

that your $filepath is legal and valid, and that you have appropriate

write permissions to create/overwrite that file.



=cut



sub get_page_to_file {

	my ($URL, $filepath, $hr)=@_;



	return undef if(length($URL)==0);

	return undef if(length($filepath)==0);



	my (%req,%resp);

	my $rptr;



	if(defined $hr && ref($hr)){

		$rptr=$hr;

	} else {

		$rptr=\%req;

		LW::http_init_request(\%req);

	}



	LW::utils_split_uri($URL,$rptr); # this is newer >=1.1 syntax

	LW::http_fixup_request($rptr);



	if(http_do_request($rptr,\%resp)){

		return undef;

	}

	open(OUT,">$filepath") || return undef;

	binmode(OUT); # stupid Windows

	print OUT $resp{'whisker'}->{'data'};

	close(OUT);



	return $resp{'whisker'}->{'code'};

}



########################################################################



=pod



=head1 - Function: LW::upload_file

  

Params: $url, $filepath, $paramname [, \%hin_request]

Return: $code ($code will be set to undef on error)



This function will upload the specified $file to the given $url as

the parameter named $paramname via a multipart POST request.  The 

optional $hin_request hash lets you set any other particular request

parameters.



NOTE: this is a highly simplied function for basic uploads.  If you

need to do more advanced things like set other multipart form

parameters, send multiple files, etc, then you will need to use the

normal API to do it yourself.



=cut



sub upload_file {

	my ($URL, $filepath, $paramname, $hr)=@_;



	return undef if(length($URL)      ==0);

	return undef if(length($filepath) ==0);

	return undef if(length($paramname)==0);

	return undef if(!(-e $filepath && -f $filepath));



	my (%req,%resp,%multi);

	my $rptr;



	if(defined $hr && ref($hr)){

		$rptr=$hr;

	} else {

		$rptr=\%req;

		LW::http_init_request(\%req);

	}



	LW::utils_split_uri($URL,$rptr); # this is newer >=1.1 syntax

	$rptr{'whisker'}->{'method'}='POST';

	LW::http_fixup_request($rptr);



	LW::multipart_setfile(\%multi,$filepath,$paramname);

	LW::multipart_write(\%multi,$rptr);



	if(http_do_request($rptr,\%resp)){

		return undef;

	}



	return $resp{'whisker'}->{'code'};

}



########################################################################



=pod



=head1 - Function: LW::download_file

  

Params: $url, $filepath [, \%hin_request]

Return: $code ($code will be set to undef on error)



LW::download_file is just an alias for LW::get_page_to_file().



=cut



sub download_file {

	goto &LW::get_page_to_file;

}



########################################################################





=pod    



=head1 ++ Sub package: encode



Various type encodings.  Installing MIME::Base64 will result in a 

compiled C version of base64 functions, which means they will be tons 

faster.  This is useful if you're going to run a Basic authentication 

brute force, which requires a high processing speed.  However, it's not 

required, since I include a Perl version, which is slower.



=cut



########################################################################



=pod    



=head1 - Function: LW::encode_base64

  

Params: $data, $eol

Return: $base64_encoded_data

        

LW::encode_base64 is a stub function which will choose the fastest

function available for doing base64 encoding.  This is done by checking to

see if the MIME::Base64 perl module is available (which uses fast C

routines).  If it's not, then it defaults to a perl version (which is

slower).  You can call the perl version direct, but I suggest using the

stub to gain speed advantages where possible.



=cut



#sub encode_base64;





########################################################################



=pod    



=head1 - Function: LW::decode_base64

  

Params: $data

Return: $base64_decoded_data

        

LW::decode_base64 is a stub function which will choose the fastest

function available for doing base64 decoding.  This is done by checking to

see if the MIME::Base64 perl module is available (which uses fast C

routines).  If it's not, then it defaults to a perl version (which is

slower).  You can call the perl version direct, but I suggest using the

stub to gain speed advantages where possible.



=cut



#sub decode_base64;





########################################################################



=pod    



=head1 - Function: LW::encode_base64_perl

        

Params: $data, $eol

Return: $b64_encoded_data



A perl implementation of base64 encoding.  I recommend you use

LW::encode_base64 instead, since it may use the MIME::Base64 module (if

available), which lead to speed advantages.  The perl code for this

function was actually taken from an older MIME::Base64 perl module, and

bears the following copyright:



Copyright 1995-1999 Gisle Aas <gisle@aas.no>



NOTE: the $eol parameter will be inserted every 76 characters.  This is

used to format the data for output on a 80 character wide terminal.



=cut



sub encode_base64_perl { # ripped from MIME::Base64

    my $res = "";

    my $eol = $_[1];

    $eol = "\n" unless defined $eol;

    pos($_[0]) = 0;

    while ($_[0] =~ /(.{1,45})/gs) {

        $res .= substr(pack('u', $1), 1);

        chop($res);}

    $res =~ tr|` -_|AA-Za-z0-9+/|;

    my $padding = (3 - length($_[0]) % 3) % 3;

    $res =~ s/.{$padding}$/'=' x $padding/e if $padding;

    if (length $eol) {

        $res =~ s/(.{1,76})/$1$eol/g;

    } $res; }





########################################################################



=pod    



=head1 - Function: LW::decode_base64_perl

  

Params: $data

Return: $b64_decoded_data



A perl implementation of base64 decoding.  The perl code for this function

was actually taken from an older MIME::Base64 perl module, and bears the 

following copyright:



Copyright 1995-1999 Gisle Aas <gisle@aas.no>



=cut



sub decode_base64_perl { # ripped from MIME::Base64

    my $str = shift;

    my $res = "";

    $str =~ tr|A-Za-z0-9+=/||cd;

    $str =~ s/=+$//;                        # remove padding

    $str =~ tr|A-Za-z0-9+/| -_|;            # convert to uuencoded format

    while ($str =~ /(.{1,60})/gs) {

        my $len = chr(32 + length($1)*3/4); # compute length byte

        $res .= unpack("u", $len . $1 );    # uudecode

    }$res;}





########################################################################



=pod    



=head1 - Function: LW::encode_str2uri

  

Params: $data

Return: $result



This function encodes every character (except the / character) with normal 

URL hex encoding.



=cut



sub encode_str2uri { # normal hex encoding

	my $str=shift;

	$str=~s/([^\/])/sprintf("%%%02x",ord($1))/ge;

	return $str;}





#########################################################################



=pod    



=head1 - Function: LW::encode_str2ruri

  

Params: $data

Return: $result



This function randomly encodes characters (except the / character) with 

normal URL hex encoding.



=cut



sub encode_str2ruri { # random normal hex encoding

    my @T=split(//,shift);

    my $s;

    foreach (@T) {

     if(m#;=:&@\?#){

        $s.=$_;

        next;

      }

      if((rand()*2)%2 == 1){	$s.=sprintf("%%%02x",ord($_)) ;

      }else{			$s.=$_; }

    }

    return $s;

}



#########################################################################



=pod    



=head1 - Function: LW::encode_unicode

  

Params: $data

Return: $result



This function converts a normal string into Windows unicode format.



=cut



sub encode_unicode

{

	my $r='';

 	foreach $c (split(//,shift)){

		$r.=pack("v",ord($c));

	}

	return $r;

}



#########################################################################

=pod



=head1 ++ Sub package: forms



This subpackage contains various routines to parse and handle HTML forms.  

The goal is to parse the variable, human-readable HTML into concrete

structures useable by your program.  The forms package does do a good job

at making these structures, but I will admit: they are not exactly simple,

and thus not a cinch to work with.  But then again, representing something

as complex as a HTML form is not a simple thing either.  I think the

results are acceptable for what's trying to be done.  Anyways...



Forms are stored in perl hashes, with elements in the following format:



	$form{'element_name'}=@([ 'type', 'value', @params ])



Thus every element in the hash is an array of anonymous arrays.  The first

array value contains the element type (which is 'select', 'textarea',

'button', or an 'input' value of the form 'input-text', 'input-hidden',

'input-radio', etc).



The second value is the value, if applicable (it could be undef if no

value was specified).  Note that select elements will always have an undef

value--the actual values are in the subsequent options elements.



The third value, if defined, is an anonymous array of additional tag

parameters found in the element (like 'onchange="blah"', 'size="20"',

'maxlength="40"', 'selected', etc).



The array does contain one special element, which is stored in the hash

under a NULL character ("\0") key.  This element is of the format:



	$form{"\0"}=['name', 'method', 'action', @parameters];



The element is an anonymous array that contains strings of the form's

name, method, and action (values can be undef), and a @parameters array

similar to that found in normal elements (above).



Accessing individual values stored in the form hash becomes a test of your

perl referencing skills.  Hint: to access the 'value' of the third element

named 'choices', you would need to do:



	$form{'choices'}->[2]->[1];



The '[2]' is the third element (normal array starts with 0), and the

actual value is '[1]' (the type is '[0]', and the parameter array is

'[2]').



=cut



################################################################



=pod



=head1 - Function: LW::forms_read

  

Params: \$html_data

Return: @found_forms



This function parses the given $html_data into libwhisker form hashes.  

It returns an array of hash references to the found forms.



=cut



sub forms_read {

	my $dr=shift;

	return undef if(!ref($dr) || length($$dr)==0);



	@LW::forms_found=();

	LW::html_find_tags($dr,\&forms_parse_callback);



	if(scalar %LW::forms_current){

		my %DUP=%LW::forms_current;

		push(@LW::forms_found,\%DUP);

	}

	return @LW::forms_found;

}



################################################################



=pod



=head1 - Function: LW::forms_write

  

Params: \%form_hash

Return: $html_of_form   [undef on error]



This function will take the given %form hash and compose a generic HTML

representation of it, formatted with tabs and newlines in order to make it

neat and tidy for printing.



Note: this function does *not* escape any special characters that were

embedded in the element values.



=cut



sub forms_write {

	my $hr=shift;

	return undef if(!ref($hr) || !(scalar %$hr));

	return undef if(!defined $$hr{"\0"});

	

	my $t='<form name="'.$$hr{"\0"}->[0].'" method="';

	$t.=$$hr{"\0"}->[1].'" action="'.$$hr{"\0"}->[2].'"';

	if(defined $$hr{"\0"}->[3]){

		$t.=' '.join(' ',@{$$hr{"\0"}->[3]}); }

	$t.=">\n";



	while( my($name,$ar)=each(%$hr) ){

	  next if($name eq "\0");

	  foreach $a (@$ar){

		my $P='';

		$P=' '.join(' ', @{$$a[2]}) if(defined $$a[2]);

		$t.="\t";



		if($$a[0] eq 'textarea'){

			$t.="<textarea name=\"$name\"$P>$$a[1]";

			$t.="</textarea>\n";



		} elsif($$a[0]=~m/^input-(.+)$/){

			$t.="<input type=\"$1\" name=\"$name\" ";

			$t.="value=\"$$a[1]\"$P>\n";



		} elsif($$a[0] eq 'option'){

			$t.="\t<option value=\"$$a[1]\"$P>$$a[1]\n";



		} elsif($$a[0] eq 'select'){

			$t.="<select name=\"$name\"$P>\n";



		} elsif($$a[0] eq '/select'){

			$t.="</select$P>\n";



		} else { # button

			$t.="<button name=\"$name\" value=\"$$a[1]\">\n";

		}

	  }

	}



	$t.="</form>\n";

	return $t;

}



################################################################





=pod



=head1 - Function: LW::forms_parse_html (INTERNAL)

  

Params: $TAG, \%elements, \$html_data, $offset, $len

Return: nothing



This is an &html_find_tags callback used to parse HTML into form hashes.  

You should not call this directly, but instead use &LW::forms_read.



=cut



{ # these are private static variables for &forms_parse_html

%FORMS_ELEMENTS=(	'form'=>1,	'input'=>1,

			'textarea'=>1,	'button'=>1,

			'select'=>1,	'option'=>1,

			'/select'=>1	);

$CURRENT_SELECT=undef;

$UNKNOWNS=0;



sub forms_parse_callback {

	my ($TAG, $hr, $dr, $start, $len)=(lc(shift),@_);

	my ($saveparam, $parr, $key)=(0,undef,'');



	# fastpath shortcut

	return undef if(!defined $FORMS_ELEMENTS{$TAG});

	LW::utils_lowercase_hashkeys($hr) if(scalar %$hr);



	if($TAG eq 'form'){



		if(scalar %LW::forms_current){ # save last form

			my %DUP=%LW::forms_current;

			push (@LW::forms_found, \%DUP);

			%LW::forms_current=();

		}



		$LW::forms_current{"\0"}=[$$hr{name},$$hr{method},

			$$hr{action},undef];

		delete $$hr{'name'}; delete $$hr{'method'}; delete $$hr{'action'};

		$key="\0"; $parr=\@{$LW::forms_current{"\0"}};

		$UNKNOWNS=0;



	} elsif($TAG eq 'input'){

		$$hr{type}='text' if(!defined $$hr{type});

		$$hr{name}='unknown'.$UNKNOWNS++ if(!defined $$hr{name});

		$key=$$hr{name};

	

		push( @{$LW::forms_current{$key}}, 

			(['input-'.$$hr{type},$$hr{value},undef]) );

		delete $$hr{'name'}; delete $$hr{'type'}; delete $$hr{'value'};

		$parr=\@{$LW::forms_current{$key}->[-1]};



	} elsif($TAG eq 'select'){

		$$hr{name}='unknown'.$UNKNOWNS++ if(!defined $$hr{name});

		$key=$$hr{name};

		push( @{$LW::forms_current{$key}}, (['select',undef,undef]) );

		$parr=\@{$LW::forms_current{$key}->[-1]};

		$CURRENT_SELECT=$key;

		delete $$hr{name};



	} elsif($TAG eq '/select'){

		push( @{$LW::forms_current{$CURRENT_SELECT}}, 

			(['/select',undef,undef]) );

		$CURRENT_SELECT=undef;

		return undef;



	} elsif($TAG eq 'option'){

		return undef if(!defined $CURRENT_SELECT);

		if(!defined $$hr{value}){

			my $stop=index($$dr,'<',$start+$len);

			return undef if($stop==-1); # MAJOR PUKE

			$$hr{value}=substr($$dr,$start+$len,

				($stop-$start-$len));

			$$hr{value}=~tr/\r\n//d;

		}

		push( @{$LW::forms_current{$CURRENT_SELECT}}, 

			(['option',$$hr{value},undef]) );

		delete $$hr{value};

		$parr=\@{$LW::forms_current{$CURRENT_SELECT}->[-1]};



	} elsif($TAG eq 'textarea'){

		my $stop=$start+$len;

		# find closing </textarea> tag

		do {	$stop=index($$dr,'</',$stop+2); 

			return undef if($stop==-1); # MAJOR PUKE

		} while( lc(substr($$dr,$stop+2,8)) ne 'textarea');

		$$hr{value}=substr($$dr,$start+$len,($stop-$start-$len));



		$$hr{name}='unknown'.$UNKNOWNS++ if(!defined $$hr{name});

		$key=$$hr{name};

		push( @{$LW::forms_current{$key}}, 

			(['textarea',$$hr{value},undef]) );

		$parr=\@{$LW::forms_current{$key}->[-1]};

		delete $$hr{'name'}; delete $$hr{'value'};



	} else { # button

		$$hr{name}='unknown'.$UNKNOWNS++ if(!defined $$hr{name});

		$key=$$hr{name};

		push( @{$LW::forms_current{$key}}, 

			(['button',$$hr{value},undef]) );

	}



	if(scalar %$hr){

		my @params=();

		foreach $k (keys %$hr){

			if(defined $$hr{$k}){

					push @params, "$k=\"$$hr{$k}\"";

			} else {	push @params, $k; }

		}

		$$parr[2]=\@params;

	}



	return undef;

}}



=pod



=head1 ++ Sub package: html

        

The html sub package implements a simple HTML parser.



=cut



################################################################



=pod



=head1 - Function: LW::html_find_tags

  

Params: \$data, \&callback_function [, $xml_flag]

Return: nothing



LW::html_find_tags parses a piece of HTML and 'extracts' all found tags,

passing the info to the given callback function.  The callback function 

must accept two parameters: the current tag (as a scalar), and a hash ref 

of all the tag's elements. For example, the tag <a href="/file"> will

pass 'a' as the current tag, and a hash reference which contains

{'href'}="/file".



The xml_flag, when set, causes the parser to do some extra processing

and checks to accomodate XML style tags such as <tag foo="bar"/>.



Notice: this function is slow! And using it to rewrite (via passback) is 

slower!  Make sure you have LW::bin installed to get the faster binary 

version.



=cut



sub html_find_tags {

 # use faster binary helper

 goto &LW::bin::html_find_tags 

 	if(defined $LW::available{'lw::bin'});

	

 my ($dataref, $callbackfunc, $xml)=@_;



 return if(!(defined $dataref      && ref($dataref)     ));

 return if(!(defined $callbackfunc && ref($callbackfunc)));

 $xml||=0;



 my ($CURTAG, $ELEMENT, $VALUE, $c, $cc);

 my ($INCOMMENT,$INTAG,$INSCRIPT,$INCDATA)=(0,0,0,0);

 my (%TAG, $ret, $start, $tagstart, $commstart, $scriptstart, $x);



 # YES, this looks like C.  In fact, it's my C version ported to

 # perl.  But it's faster and more dependable than any regex mess

 # someone could come up with.

 my $LEN = length($$dataref);

 for ($c=0; $c<$LEN; $c++){



	$cc=substr($$dataref,$c,1);



	next if(!$INCOMMENT && !$INTAG && !$INSCRIPT && $cc ne '>' && $cc ne '<');



        if($cc eq '<'){

		if($INSCRIPT){

			if(lc(substr($$dataref,$c+1,7)) eq '/script'){

				$INSCRIPT=0;

				$TAG{'='}=substr($$dataref, $scriptstart,

					$c - $scriptstart - 1);

			} else { next; }

		}



		next if($INCDATA); # skip tags in xml CDATA section



                if(substr($$dataref,$c+1,3) eq '!--'){

                        $INCOMMENT=1; $commstart=$c; $c+=3;



		$INCDATA++ if($xml&&substr($$dataref,$c+1,8) eq '![CDATA[');



		} else {

    	                $INTAG=1; $c++;

			$c++ while(substr($$dataref,$c,1)=~tr/< \t\r\n//);

			$tagstart=$c-1; 



			$CURTAG='';

			while(($x=substr($$dataref,$c,1))!~tr/ \t\r\n>=// &&

					$c < $LEN){

				$CURTAG.=$x; $c++;}



			chop $CURTAG if($xml && substr($CURTAG,-1,1) eq '/');

			$c++ if($x ne '>');



			$INSCRIPT=1 if($CURTAG eq 'script' && !$xml);

		}	

		$cc=substr($$dataref,$c,1); # refresh current char (cc)

	}



        if($cc eq '>'){

		if($INSCRIPT){

			if($CURTAG eq 'script'){

				$scriptstart = $c + 1; 

			} else { next; }

		}



		if($INCDATA && substr($$dataref,$c-2,2) eq ']]'){

			$INCDATA=0;

			next;

		}



		if(!$INCOMMENT && $INTAG){ 

			$INTAG=0; 

			$TAG{'/'}++ if($xml&&substr($$dataref,$c-1,1) eq '/');

			$ret=&$callbackfunc($CURTAG,\%TAG, $dataref,

				$tagstart, $c-$tagstart+1);

			$c+=$ret if(defined $ret && $ret != 0);

			$CURTAG='';

			%TAG=();

		}

                if($INCOMMENT && substr($$dataref,$c-2,2) eq '--'){

                        $INCOMMENT=0; 

			$TAG{'='}=substr($$dataref,$commstart+4,

				$c-$commstart-3);

			$ret=&$callbackfunc('!--',\%TAG, $dataref,

				$commstart, $c-$commstart+1);

			$c+=$ret if(defined $ret && $ret != 0);

			delete $TAG{'='};

			next;

		}

	}



        next if($INCOMMENT);



        if($INTAG){



                $ELEMENT=''; $VALUE='';



		# eat whitespace

		$c++ while(substr($$dataref,$c,1)=~tr/ \t\r\n//);



		$start=$c;

		$c++ while(substr($$dataref,$c,1)!~tr/ \t\r\n=\>// && $c<$LEN);



		if($c > $start){

			$ELEMENT=substr($$dataref,$start,$c-$start);

			chop $ELEMENT if($xml&&substr($ELEMENT,-1,1) eq '/');

		}



		if(substr($$dataref,$c,1) ne '>'){

		 # eat whitespace

		 $c++ while(substr($$dataref,$c,1)=~tr/ \t\r\n//);



                 if(substr($$dataref,$c,1) eq '='){ 

                	$c++;

			$start=$c;

			my $p = substr($$dataref,$c,1);

                        if($p eq '"' || $p eq '\''){ 

                        	$c++; $start++;

	                        $c++ while(substr($$dataref,$c,1) ne $p &&

	                        	$c < $LEN);

				$VALUE=substr($$dataref,$start,$c-$start);

                                $c++; 

			} else {

                                $c++ while(substr($$dataref,$c,1)!~tr/ \t\r\n\>// &&

                                	$c < $LEN);

				$VALUE=substr($$dataref,$start,$c-$start);

				chop $VALUE if($xml&&substr($$dataref,$c-1,2) eq '/>');

			}



			# eat whitespace

                	$c++ while(substr($$dataref,$c,1)=~tr/ \t\r\n//);

                 } 

		} # if $c ne '>'

		$c--;

		$TAG{$ELEMENT}=$VALUE if($ELEMENT ne '' && ($xml && $ELEMENT ne '/'));

	}

}}



################################################################



=pod



=head1 ++ Sub package: http



The http package is the core package of libwhisker.  It is responsible

for making the HTTP requests, and parsing the responses.  It can handle

HTTP 0.9, 1.0, and 1.1 requests, and allows pretty much every aspect of

the request to be configured and controlled.  The HTTP functions use a

HTTP in/out hash, which is a normal perl hash.  For outgoing HTTP requests

('hin' hashes), the keys/values represent outgoing HTTP headers.  For HTTP

responses ('hout' hashes), the keys/values represent incoming HTTP

headers.  For both, however, there is a special key, 'whisker', whose

value is a hash reference.  The whisker control hash contains more

configuration variables, which include host, port, and uri of the desired

request.  To access the whisker control hash, use the following

notation: $hash{'whisker'}->{'key'}='value';



You should view LW::http_init_request() for a list of core whisker control

hash values.



The internals of the http subpackage will be rewritten shortly--the 

current implementation is gross and not very good style.  Note that the

API will be unaffected; it will only be an internal reordering.  All

references/uses of $$Z will be cleaned up to be more practical/eliminated.



=cut



##################################################################



=pod



=head1 - Function: LW::http_init_request

   

Params: \%request_hash_to_initialize

Return: Nothing (modifies input hash)



Sets default values to the input hash for use.  Sets the host to

'localhost', port 80, request URI '/', using HTTP 1.1 with GET

method.  The timeout is set to 10 seconds, no proxies are defined, and all

URI formatting is set to standard HTTP syntax.  It also sets the

Connection (Keep-Alive) and User-Agent headers.



NOTICE!!  It's important to use http_init_request before calling 

http_do_request, or http_do_request might puke.  Thus, a special magic 

value is placed in the hash to let http_do_request know that the hash has 

been properly initialized.  If you really must 'roll your own' and not use 

http_init_request before you call http_do_request, you will at least need 

to set the INITIAL_MAGIC value (amongst other things).



=cut



sub http_init_request { # doesn't return anything

 my ($hin)=shift;



 return if(!(defined $hin && ref($hin)));

 %$hin=(); # clear control hash



# control values

 $$hin{'whisker'}={

	req_spacer		=>	' ',

	req_spacer2		=>	' ',

	http_ver		=>	'1.1',

	method			=>	'GET',

	method_postfix		=>	'',

	port			=>	80,

	uri			=>	'/',

	uri_prefix		=>	'',

	uri_postfix		=>	'',

	uri_param_sep		=>	'?',

	host			=>	'localhost',

	http_req_trailer    	=>	'',

	timeout			=>	10,

	include_host_in_uri 	=>	0,

	ignore_duplicate_headers=> 	1,

	normalize_incoming_headers =>	1,

	lowercase_incoming_headers =>	0,

	ssl			=>	0,

	http_eol		=>	"\x0d\x0a",

	force_close		=>	0,

	force_open		=>	0,

	retry			=>	1,

	trailing_slurp		=>	0,

	force_bodysnatch	=>	0,

	INITIAL_MAGIC		=>	31337

};



 

# default header values

 $$hin{'Connection'}='Keep-Alive'; # notice it is now default!

 $$hin{'User-Agent'}="libwhisker/$LW::VERSION"; # heh

}





##################################################################



=pod



=head1 - Function: LW::http_do_request

   

Params: \%request, \%response [, \%configs]

Return: >=1 if error; 0 if no error (also modifies response hash)



*THE* core function of libwhisker.  LW::http_do_request actually performs

the HTTP request, using the values submitted in %request, and placing result

values in %response.  This allows you to resubmit %request in subsequent 

requests (%response is automatically cleared upon execution).  You can 

submit 'runtime' config directives as %configs, which will be spliced into

$hin{'whisker'}->{} before anything else.  That means you can do:



LW::http_do_request(\%req,\%resp,{'uri'=>'/cgi-bin/'});



This will set $req{'whisker'}->{'uri'}='/cgi-bin/' before execution, and

provides a simple shortcut (note: it does modify %req).



This function will also retry any requests that bomb out during the 

transaction (but not during the connecting phase).  This is controlled

by the {whisker}->{retry} value.  Also note that the returned error

message in resp is the *last* error received.  All retry errors are

put into {whisker}->{retry_errors}, which is an anonymous array.



Also note that all NTLM auth logic is implemented in http_do_request().

NTLM requires multiple requests in order to work correctly, and so this

function attempts to wrap that and make it all transparent, so that the

final end result is what's passed to the application.



This function will return 0 on success, 1 on HTTP protocol error, and 2

on non-recoverable network connection error (you can retry error 1, but

error 2 means that the server is totally unreachable and there's no

point in retrying).



=cut



sub http_do_request {

 my @params = @_;

 my $retry_count = ${$params[0]}{'whisker'}->{'retry'} || 0;

 my ($ret, @retry_errors, $auth);



 return 1 if(!(defined $params[0] && ref($params[0])));

 return 1 if(!(defined $params[1] && ref($params[1])));



 if(defined $params[2]){

	foreach (keys %{$params[2]}){

		${$params[0]}{'whisker'}->{$_}=${$params[2]}{$_};}}



 $auth=$params[0]->{'Authorization'} if(defined $params[0]->{'Authorization'});

 do {

    if(defined $auth && $auth=~/^NTLM/){

	$ret=0;

	if($params[0]->{'whisker'}->{'ntlm_step'}==0){

		$ret=LW::http_do_request_ex($params[0],$params[1]);

		return 2 if($ret==2);

		if($ret==0){

			return 0 if($params[1]->{'whisker'}->{'code'} == 200);

			return 1 if($params[1]->{'whisker'}->{'code'} != 401);

			$params[0]->{'whisker'}->{'ntlm_step'}=1;

			my $thead=utils_find_lowercase_key($params[1],'www-authenticate');

			return 1 if(!defined $thead);

			return 1 if($thead!~m/^NTLM (.+)$/);  

			$params[0]->{'Authorization'}='NTLM '.ntlm_client(

				$params[0]->{'whisker'}->{'ntlm_obj'},$1);

		}

	}

	if($ret==0){

		delete $params[0]->{'Authorization'}

			if($params[0]->{'whisker'}->{'ntlm_step'}>1);

		$ret=LW::http_do_request_ex($params[0],$params[1]);

		$params[0]->{'Authorization'}=$auth; 

		if($ret>0){ 	$params[0]->{'whisker'}->{'ntlm_step'}=0;

		} else {	$params[0]->{'whisker'}->{'ntlm_step'}=2; }

		return $ret if($ret==2||$ret==0);

	}

    } else {

    	$ret=LW::http_do_request_ex($params[0],$params[1]);

	push @{${$params[1]}{'whisker'}->{'retry_errors'}},

		@retry_errors if scalar(@retry_errors);

	return $ret if($ret==0 || $ret==2);

    }

    push @retry_errors, ${$params[1]}{'whisker'}->{'error'};

    $retry_count--;

  } while( $retry_count >= 0);



 # if we get here, we still had errors, but no more retries

 return 1;

}



##################################################################



=pod



=head1 - Function: LW::http_do_request_ex

   

Params: \%req, \%resp, \%configs

Return: >=1 if error; 0 if no error



NOTE: you should go through http_do_request(), which calls this function.



This function actually does all the request work.  It is called by

http_do_request(), which has a 'retry wrapper' built into it to catch

errors.



=cut



sub http_do_request_ex {

 my ($hin, $hout, $hashref)=@_;

 my ($temp,$vin,$resp,$S,$a,$b,$vout,@c,$c,$res)=(1,'');

 my $W; # shorthand alias for the {'whisker'} hash



 return 1 if(!(defined $hin  && ref($hin) ));

 return 1 if(!(defined $hout && ref($hout)));



 %$hout=(); # clear output hash

 $$hout{whisker}->{uri}=$$hin{whisker}->{uri}; # for tracking purposes

 $$hout{whisker}->{'INITIAL_MAGIC'}=31338; # we can tell requests from responses



 if($LW::LW_HAS_SOCKET==0){

	$$hout{'whisker'}->{'error'}='Socket support not available';

	return 2;}



 if(!defined $$hin{'whisker'} || 

    !defined $$hin{'whisker'}->{'INITIAL_MAGIC'} ||

    $$hin{'whisker'}->{'INITIAL_MAGIC'}!=31337 ){

	$$hout{'whisker'}->{'error'}='Input hash not initialized';

	return 2;

 }



 if(defined $hashref){

	foreach (keys %$hashref){

		$$hin{'whisker'}->{$_}=$$hashref{$_};}}



 # if we want anti-IDS, make a copy and setup new values

 if(defined $$hin{'whisker'}->{'anti_ids'}){

	my %copy=%{$hin};

	anti_ids(\%copy,$$hin{'whisker'}->{'anti_ids'});

	$W = $copy{'whisker'};

 } else {

	$W = $$hin{'whisker'};

 }



 if($$W{'ssl'}>0 && $LW::LW_HAS_SSL!=1){

	$$hout{'whisker'}->{'error'}='SSL not available';

	return 2;}



 $TIMEOUT=$$W{'timeout'}||10;



 my $cache_key = defined $$W{'proxy_host'} ?

	join(':',$$W{'proxy_host'},$$W{'proxy_port'}) :

	join(':',$$W{'host'},$$W{'port'});



 if(!defined $http_host_cache{$cache_key}){

	# make new entry

	push(@{$http_host_cache{$cache_key}},

		undef, 	# SOCKET		$$Z[0]

		0,	# $SOCKSTATE		$$Z[1]

		undef,	# INET_ATON		$$Z[2]

		undef,	# $SSL_CTX		$$Z[3]

		undef,	# $SSL_THINGY		$$Z[4]

		'',	# $OUTGOING_QUEUE	$$Z[5]

		'',	# $INCOMING_QUEUE	$$Z[6]

		0,	# $STATS_SYNS		$$Z[7]

		0, 	# $STATS_REQS		$$Z[8]

		undef ) # SSL session ID	$$Z[9]

 }



 # NOTE: the 'Z' reference will be going away in future versions...

 $Z = $http_host_cache{$cache_key};



 # use $chost/$cport for actual server we are connecting to

 my ($chost,$cport,$cwhat,$PROXY)=('',80,'',0);



 if(defined $$W{'proxy_host'}){

    $chost=$$W{'proxy_host'};

    $cport=$$W{'proxy_port'}||80;

    $cwhat='proxy';

    $PROXY=1;



    if($$W{'ssl'}>0 && $LW::LW_SSL_LIB==2){

	$ENV{HTTPS_PROXY} ="$$W{'proxy_host'}:";

	$ENV{HTTPS_PROXY}.=$$W{'proxy_port'}||80; }



 } else {

    $chost=$$W{'host'};

    $cport=$$W{'port'};

    $cwhat='host';

 }



 if($$Z[1]>0){ # check to see if socket is still alive

	if(! sock_valid($Z,$hin,$hout) ){

		$$Z[1]=0;

		sock_close($$Z[0],$$Z[4]);

 }	}

 # technically we have a race condition: socket can go

 # bad before we send request, below.  But that's ok,

 # we handle the errors down there.



 if($$Z[1]==0){



	my $SOCK = _newsym(); 	

	if(defined $$W{'UDP'} && $$W{'UDP'}>0){

		if(!socket($SOCK,PF_INET,SOCK_DGRAM,getprotobyname('udp')||0)){

			$$hout{'whisker'}->{'error'}='Socket() problems (UDP)'; 

			return 2;}

	} else {

		if(!socket($SOCK,PF_INET,SOCK_STREAM,getprotobyname('tcp')||0)){

			$$hout{'whisker'}->{'error'}='Socket() problems'; 

			return 2;}

	}



	$$Z[0]=$SOCK;



	if(defined $$W{'bind_socket'}){

		my $port=$$W{'bind_port'}||14011;

		my $addr;

		if(defined $$W{'bind_addr'}){

			$addr=inet_aton($$W{'bind_addr'});

		} else {

			$addr=INADDR_ANY;

		}

		if(!bind($SOCK, sockaddr_in($port,$addr))){

			$$hout{'whisker'}->{'error'}='Bind() on socket failed';

			return 2;

		}

	}



	$$Z[5]=$$Z[6]=''; # flush in/out queues



	if($$W{'ssl'}>0){ # ssl setup stuff



	    if($LW::LW_SSL_LIB==1){

		if(!defined($$Z[3])){

		    if(! ($$Z[3] = Net::SSLeay::CTX_new()) ){

			$$hout{'whisker'}->{'error'}="SSL_CTX error: $!";

			return 2;}

		    if(defined $$W{'ssl_rsacertfile'}){

			if(! (Net::SSLeay::CTX_use_RSAPrivateKey_file($$Z[3], 

					$$W{'ssl_rsacertfile'},

					&Net::SSLeay::FILETYPE_PEM))){

				$$hout{'whisker'}->{'error'}="SSL_CTX_use_rsacert error: $!";

				return 2;}

		    }

		    if(defined $$W{'ssl_certfile'}){

			if(! (Net::SSLeay::CTX_use_certificate_file($$Z[3], 

					$$W{'ssl_certfile'},

					&Net::SSLeay::FILETYPE_PEM))){

				$$hout{'whisker'}->{'error'}="SSL_CTX_use_cert error: $!";

				return 2;}

		    }

		}

		if(! ($$Z[4] = Net::SSLeay::new($$Z[3])) ){

			$$hout{'whisker'}->{'error'}="SSL_new error: $!";

			return 2;}

		if(defined $$W{'ssl_ciphers'}){

			if(!(Net::SSLeay::set_cipher_list($$Z[4], 

					$$W{'ssl_ciphers'}))){

				$$hout{'whisker'}->{'error'}="SSL_set_ciphers error: $!";

				return 2;}

		}

	    }

	}



	$$Z[2]=inet_aton($chost) if(!defined $$Z[2]);

	if(!defined $$Z[2]){ # can't find hostname

		$$hout{'whisker'}->{'error'}="Can't resolve hostname";

		return 2;

	}



	if($$W{'ssl'}>0 && $LW::LW_SSL_LIB==2){

		# proxy set in ENV; we always connect to host

		$$Z[4]= Net::SSL->new(

			PeerAddr => $$hin{'whisker'}->{'host'},

			PeerPort => $$hin{'whisker'}->{'port'},

			Timeout => $TIMEOUT );

		if($@){ $$hout{'whisker'}->{'error'}="Can't connect via SSL: $@[0]";

			return 2;}

		$$Z[4]->autoflush(1);

	} else {

		if($LW::LW_NONBLOCK_CONNECT){

			my $flags=fcntl($$Z[0],F_GETFL,0);

			$flags |= O_NONBLOCK; # set nonblock flag

			if(!(fcntl($$Z[0],F_SETFL,$flags))){ # error setting flag

				$LW::LW_NONBLOCK_CONNECT=0; # revert to normal

			} else {

				my $R=connect($$Z[0],sockaddr_in($cport,$$Z[2]));

				if(!$R){ # we didn't connect...

					if($! != EINPROGRESS){

						close($$Z[0]);

						$$Z[0]=undef; # this is a bad socket

						$$hout{'whisker'}->{'error'}="Can't connect to $cwhat";

						return 2;}

					vec($vin,fileno($$Z[0]),1)=1;

					if(!select(undef,$vin,undef,$TIMEOUT) || !getpeername($$Z[0])){

						close($$Z[0]);

						$$Z[0]=undef; # this is a bad socket

						$$hout{'whisker'}->{'error'}="Can't connect to $cwhat";

						return 2;

				}	}

				$flags &= ~O_NONBLOCK; # clear nonblock flag

				if(!(fcntl($$Z[0],F_SETFL,$flags))){ # not good!

					close($$Z[0]);

					$LW::LW_NONBLOCK_CONNECT=0;

					$$Z[0]=undef;

					$$hout{'whisker'}->{'error'}="Error setting socket to block";

					return 2;

			}	}	

		}	



		if(!defined $$Z[0]){ # this is a safety catch

			$$hout{'whisker'}->{'error'}="Error creating valid socket connection";

			return 2; }



		if($LW::LW_NONBLOCK_CONNECT==0){ # attempt to do a timeout alarm...

			eval {

				local $SIG{ALRM} = sub { die "timeout\n" };

				eval {alarm($TIMEOUT)};

				if(!connect($$Z[0],sockaddr_in($cport,$$Z[2]))){

					alarm(0);

					die("no_connect\n"); }

				eval {alarm(0)};

			};

			if($@ || !(defined $$Z[0])){

				$$hout{'whisker'}->{'error'}="Can't connect to $cwhat";

				return 2;

		}	}



		binmode($$Z[0]); # stupid Windows

		# same as IO::Handle->autoflush(1), without importing 1000+ lines

		my $S=select($$Z[0]); 

		$|++; select($S);

	}



	$$Z[1]=1; $$Z[7]++;



	if($$W{'ssl'}>0){



	    if($LW::LW_SSL_LIB==1){



	        if($PROXY){ # handle the proxy CONNECT stuff...

		    my $SSL_CONNECT = "CONNECT $$W{'host'}".

			":$$W{'port'}/ HTTP/1.0\n\n";

		    syswrite($$Z[0],$SSL_CONNECT, length($SSL_CONNECT)); }



		Net::SSLeay::set_fd($$Z[4], fileno($$Z[0]));

		Net::SSLeay::set_session($$Z[4],$$Z[9]) if(defined $$Z[9]);

		if(! (Net::SSLeay::connect($$Z[4])) ){

			$$hout{'whisker'}->{'error'}="SSL_connect error: $!";

			sock_close($$Z[0],$$Z[4]); return 2;}



		if(defined $$W{'save_ssl_info'} && 

				$$W{'save_ssl_info'}>0){

			ssl_save_info($hout,$$Z[4]); }

		my $x=Net::SSLeay::ctrl($$Z[4],6,0,'');

		$$Z[9]=Net::SSLeay::get_session($$Z[4]) unless(defined $$W{'ssl_resume'} &&

			$$W{'ssl_resume'}==0);

	    }



	} else {

		$$Z[4]=undef;

	}

 }



 if(defined $$W{'ids_session_splice'} &&

            $$W{'ids_session_splice'}>0 &&

		$$W{'ssl'}==0){ # no session_spice over ssl

	setsockopt($$Z[0],SOL_SOCKET,SO_SNDLOWAT,1);

	@c=split(//, &http_req2line($hin));

	# notice we bypass queueing here, in order to trickle the packets

	my $ss;

	foreach $c (@c){ 

		$ss=syswrite($$Z[0],$c,1); # char size assumed to be 1

		if(!defined $ss || $ss==0){

			$$hout{'whisker'}->{'error'}="Error sending session splice request to server";

			sock_close($$Z[0],$$Z[4]); return 1;

		}

		select(undef,undef,undef,.1);

	}

 } else {

	 http_queue(http_req2line($hin)); }



 $$Z[8]++;



 if($$W{'http_ver'} ne '0.9'){

    my %SENT;

    if(defined $$W{'header_order'} && ref($$W{'header_order'})){

	foreach (@{$$W{'header_order'}}){

		next if($_ eq '' || $_ eq 'whisker');

		if(ref($$hin{$_})){

			$SENT{$_}||=0;

			my $v=$$hin{$_}->[$SENT{$_}];

			http_queue("$_: $v$$W{'http_eol'}");

		} else {

			http_queue("$_: $$hin{$_}$$W{'http_eol'}");

		}

		$SENT{$_}++;

	}

    }



    foreach (keys %$hin){

	next if($_ eq '' || $_ eq 'whisker');

	next if(defined $SENT{$_});

	if(ref($$hin{$_})){ # header with multiple values

		my $key=$_;

		foreach (@{$$hin{$key}}){

		  http_queue("$key: $_$$W{'http_eol'}");}

	} else { # normal header

		http_queue("$_: $$hin{$_}$$W{'http_eol'}");

	}

    }



    if(defined $$W{'raw_header_data'}){

	http_queue($$W{'raw_header_data'});}



    http_queue($$W{'http_eol'});



    if(defined $$W{'data'}){ 

	http_queue($$W{'data'});}



 } # http 0.9 support



 # take a MD5 of queue, if wanted

 if(defined $$W{'queue_md5'}){

	$$hout{'whisker'}->{'queue_md5'}= LW::md5($$Z[5]);

 }





 # all data is wrangled...actually send it now

 if($res=http_queue_send($$Z[0],$$Z[4])){

	$$hout{'whisker'}->{'error'}="Error sending request to server: $res";

	sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;}



 undef $vin;

 if(defined $$Z[4]){

	if($LW::LW_SSL_LIB==1){ # Net::SSLeay

 		shutdown $$Z[0], 1; 

 		vec($vin,fileno($$Z[0]),1)=1;

	} else { # Net::SSL

		shutdown $$Z[4], 1;

		vec($vin,fileno($$Z[4]),1)=1;

	}

 } else {

	vec($vin,fileno($$Z[0]),1)=1; 	 

 }



 if(!select($vin,undef,undef,$TIMEOUT)){

	$$hout{'whisker'}->{'error'}="Server read timed out";

	sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;}



my ($LC,$CL,$TE,$CO)=('',-1,'',''); # extra header stuff



$$hout{'whisker'}->{'lowercase_incoming_headers'} = 

	$$W{'lowercase_incoming_headers'};



if($$W{'http_ver'} ne '0.9'){



 do { # catch '100 Continue' responses

  $resp=sock_getline($$Z[0],$$Z[4]);

  #$resp=~tr/\r\n//d if(defined $resp);



  if(!defined $resp){

	$$hout{'whisker'}->{'error'}='Error reading HTTP response';

	if($!){ # this should be left over from sysread via sock_getline

		$$hout{'whisker'}->{'error'}.=": $!"; }

	$$hout{'whisker'}->{'data'}=$$Z[6];

	sock_close($$Z[0],$$Z[4]); $$Z[1]=0; # otherwise bad crap lingers

	return 1;}



  if(defined $$W{'save_raw_headers'}){

	$$hout{'whisker'}->{'raw_header_data'}.=$resp;}



  if($resp!~/^HTTP\/([0-9.]{3})[ \t]+(\d+)[ \t]{0,1}(.*?)[\r\n]+/){

	$$hout{'whisker'}->{'error'}="Invalid HTTP response: $resp";

	# let's save the incoming data...we might want it

	$$hout{'whisker'}->{'data'}=$resp;

	while(defined ($_=sock_getline($$Z[0],$$Z[4]))){ 

		$$hout{'whisker'}->{'data'}.=$_;}

	# normally we'd check the results to see if socket is closed, but

	# we close it anyway, so it doesn't matter

	sock_close($$Z[0],$$Z[4]); $$Z[1]=0; # otherwise bad crap lingers

	return 1;}



  $$hout{'whisker'}->{'http_ver'}	= $1;

  $$hout{'whisker'}->{'http_resp'}	= $2;

  $$hout{'whisker'}->{'http_resp_message'}= $3;

  $$hout{'whisker'}->{'code'}		= $2;



  $$hout{'whisker'}->{'100_continue'}++ if($2 == 100);



  while(defined ($_=sock_getline($$Z[0],$$Z[4]))){ # check pertinent headers



	if(defined $$W{'save_raw_headers'}){

		$$hout{'whisker'}->{'raw_header_data'}.=$_;}



	$_=~s/[\r]{0,1}\n$//; # anchored regex, so it's fast

	last if ($_ eq ''); # acceptable assumption case?



	my $l2=index($_,':'); # this is faster than regex

	$a=substr($_,0,$l2); 

	$b=substr($_,$l2+1);

	$b=~s/^([ \t]*)//; # anchored regex, so it's fast



	$hout{'whisker'}->{'abnormal_header_spacing'}++ if($1 ne ' ');



	$LC = lc($a);

	next         if($LC eq 'whisker');

	$TE = lc($b) if($LC eq 'transfer-encoding');

	$CL = $b     if($LC eq 'content-length');

	$CO = lc($b) if($LC eq 'connection');



	if($$W{'lowercase_incoming_headers'}>0){

		$a=$LC;

	} elsif($$W{'normalize_incoming_headers'}>0){ 

                $a=~s/(-[a-z])/uc($1)/eg;

 	}



	# save the received header order, in case we're curious

	push(@{$$hout{'whisker'}->{'recv_header_order'}},$a);



	if(defined $$hout{$a} && $$W{'ignore_duplicate_headers'}!=1){

	  if(!ref($$hout{$a})){

	    my $temp=$$hout{$a};

	    delete $$hout{$a};

	    push(@{$$hout{$a}},$temp);

	  }

	  push(@{$$hout{$a}},$b);

	} else {

	  $$hout{$a}=$b;

  }	}



  # did we have a socket error?

  if($!){

	$hout{'whisker'}->{'error'}='Error in reading response/headers';

	sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1; }



  if( $CO eq '' ){ # do whatever the client wanted

	$CO = (defined $$hin{'Connection'}) ? lc($$hin{'Connection'}) : 

		'close'; }



 } while($$hout{'whisker'}->{'http_resp'}==100);



} else { # http ver 0.9, we need to fake it

 # Keep in mind lame broken servers, like IIS, still send headers for 

 # 0.9 requests; the headers are treated as data.  Also keep in mind

 # that if the server doesn't support HTTP 0.9 requests, it will spit

 # back an HTTP 1.0 response header.  User is responsible for figuring

 # this out himself.

 $$hout{'whisker'}->{'http_ver'}='0.9';

 $$hout{'whisker'}->{'http_resp'}='200';

 $$hout{'whisker'}->{'http_resp_message'}='';

}



 if($$W{'force_bodysnatch'} || ( $$W{'method'} ne 'HEAD' && 

	$$hout{'whisker'}->{'http_resp'}!=206 &&

	$$hout{'whisker'}->{'http_resp'}!=102)){

  if ($TE eq 'chunked') { 

	if(!defined ($a=sock_getline($$Z[0],$$Z[4]))){

		$$hout{'whisker'}->{'error'}='Error reading chunked data length';

		sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;}

	$a=~tr/a-fA-F0-9//cd; $CL=hex($a); 

	$$hout{'whisker'}->{'data'}='';

	while($CL!=0) { # chunked sucks

		if(!defined ($temp=sock_get($$Z[0],$$Z[4],$CL))){

			$$hout{'whisker'}->{'error'}="Error reading chunked data: $!";

			sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;}

		$$hout{'whisker'}->{'data'}=$$hout{'whisker'}->{'data'} . $temp;

		$temp=sock_getline($$Z[0], $$Z[4]);

		($temp=sock_getline($$Z[0], $$Z[4])) if(defined $temp &&

			$temp=~/^[\r\n]*$/);

		if(!defined $temp){ # this will catch errors in either sock_getline

			$$hout{'whisker'}->{'error'}="Error reading chunked data: $!";

			sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;}

		$temp=~tr/a-fA-F0-9//cd; $CL=hex($temp);}

	# read in trailer headers

	while(defined ($_=sock_getline($$Z[0],$$Z[4]))){ tr/\r\n//d; last if($_ eq ''); }

	# Hmmmm...error, but we should have full body.  Don't return error

	if($!){ $$Z[1]=0; sock_close($$Z[0],$$Z[4]); }

  } else {

 	if ($CL != -1) {

		if(!defined ($temp=sock_get($$Z[0],$$Z[4],$CL))){

			$$hout{'whisker'}->{'error'}="Error reading data: $!";

			sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;}

	} else {  # Yuck...read until server stops sending....

		$temp=sock_getall($$Z[0],$$Z[4]);

		# we go until we puke, so close socket and don't return error

		sock_close($$Z[0],$$Z[4]); $$Z[1]=0;

	}

	$$hout{'whisker'}->{'data'}=$temp; 

  }

 } # /method ne HEAD && http_resp ne 206 or 102/



 if(($CO ne 'keep-alive' || ( defined $$hin{'Connection'} &&

		lc($$hin{'Connection'}) eq 'close')) && $$W{'force_open'}!=1){

	$$Z[1]=0; sock_close($$Z[0],$$Z[4]); 

 }	 



 # this way we know what the state *would* have been...

 $$hout{'whisker'}->{'sockstate'}=$$Z[1];

 if($$W{'force_close'}>0) {

	$$Z[1]=0; sock_close($$Z[0],$$Z[4]); } 



 if($$W{'ssl'}>0){ # we don't reuse SSL sockets

	$$Z[1]=0; sock_close($$Z[0],$$Z[4]); }



 $$hout{'whisker'}->{'stats_reqs'}=$$Z[8];

 $$hout{'whisker'}->{'stats_syns'}=$$Z[7];

 $$hout{'whisker'}->{'error'}=''; # no errors

 return 0;

}





##################################################################



=pod



=head1 - Function: LW::http_req2line (INTERNAL)

  

Params: \%hin, $switch

Return: $request



req2line is used internally by LW::http_do_request, as well as provides a

convienient way to turn a %hin configuration into an actual HTTP request

line.  If $switch is set to 1, then the returned $request will be the URI

only ('/requested/page.html'), versus the entire HTTP request ('GET

/requested/page.html HTTP/1.0\n\n').  Also, if the 'full_request_override'

whisker config variable is set in %hin, then it will be returned instead

of the constructed URI.



=cut



sub http_req2line {

 my ($S,$hin,$UO)=('',@_);

 $UO||=0; # shut up -w warning



 # notice: full_request_override can play havoc with proxy settings

 if(defined $$hin{'whisker'}->{'full_request_override'}){

	return $$hin{'whisker'}->{'full_request_override'};



 } else { # notice the components of a request--this is for flexibility



	if($UO!=1){$S.= 	$$hin{'whisker'}->{'method'}.

				$$hin{'whisker'}->{'method_postfix'}.

				$$hin{'whisker'}->{'req_spacer'};

	

		if($$hin{'whisker'}->{'include_host_in_uri'}>0){

			$S.=	'http://';



			if(defined $$hin{'whisker'}->{'uri_user'}){

			$S.=	$$hin{'whisker'}->{'uri_user'};

			if(defined $$hin{'whisker'}->{'uri_password'}){

				$S.=	':'.$$hin{'whisker'}->{'uri_user'};

			}

			$S.=	'@';

			}



			$S.=	$$hin{'whisker'}->{'host'}.

				':'.$$hin{'whisker'}->{'port'};}}



	$S.=	$$hin{'whisker'}->{'uri_prefix'}.

		$$hin{'whisker'}->{'uri'}.

		$$hin{'whisker'}->{'uri_postfix'};



	if(defined $$hin{'whisker'}->{'uri_param'}){

		$S.= 	$$hin{'whisker'}->{'uri_param_sep'}.

			$$hin{'whisker'}->{'uri_param'};}



	if($UO!=1){  if($$hin{'whisker'}->{'http_ver'} ne '0.9'){

			$S.= 	$$hin{'whisker'}->{'req_spacer2'}.'HTTP/'.

				$$hin{'whisker'}->{'http_ver'}.

				$$hin{'whisker'}->{'http_req_trailer'};}

			$S.=	$$hin{'whisker'}->{'http_eol'};}}

 return $S;}







##################################################################



=pod



=head1 - Function LW::sock_close (INTERNAL)

   

Params: $socket_file_descriptor, $SSL_THINGY

Return: nothing



This function will close the indicated socket and SSL connection (if 

necessary).  They are wrapped in eval()s to make sure if the functions 

puke, it doesn't kill the entire program.



=cut



sub sock_close {

	my ($fd,$ssl)=@_;



	if(defined $ssl){

	    if($LW::LW_SSL_LIB==1){ # Net::SSLeay

		eval "&Net::SSLeay::free($ssl)";

#		eval "&Net::SSLeay::CTX_free($$Z[3])";

	    } else { # Net::SSL

		eval { close($ssl) }; # is that right for Net::SSL?

	    }

	}

	eval { close($fd); };



	$$Z[4]=undef;

}



##################################################################



=pod



=head1 - Function LW::sock_valid (INTERNAL)

   

Params: $Z reference, \%hin, \%hout

Return: 1 if socket valid, 0 if socket disconnected



This is an internal function used to determine if a socket is

still good (i.e. the other END hasn't closed the connection).

This really only applies to persistent (Keep-Alive) connections.



This function is not intended for external use.



=cut



sub sock_valid {

	my ($z,$Hin,$Hout)=@_;



	my $slurp=$$Hin{'whisker'}->{'trailing_slurp'};

	my ($o,$vin)=(undef,'');



	return 0 if(defined $$z[3]); # we don't do SSL yet



	# closed socket sets read flag (and so does waiting data)

 	vec($vin,fileno($$z[0]),1)=1;

 	if(select(($o=$vin),undef,undef,.01)){ # we have data to read

		my ($hold, $res);



		do {

			$res = sysread($$z[0], $hold, 4096);

			$$z[6].=$hold if($slurp==0); # save to queue

			$$Hout{'whisker'}->{'slurped'}.="$hold\0"

				if($slurp==1); # save to hout hash

			# fall through value of 2 doesn't do anything

		} while ($res && select(($o=$vin),undef,undef,.01));



		if(!defined $res || $res==0){ # error or EOF

			return 0;

		}

	}

    

	return 1;

}



##################################################################



=pod



=head1 - Function: LW::sock_getline (INTERNAL)

   

Params: $socket_file_descriptor, $SSL_THINGY

Return: $string, undef on error (timeout)



This function is used internally to read a line of input (up to a '\n')

from the given socket file descriptor (regular or SSL).



This function is not intended for external use.



=cut



sub sock_getline { # read from socket w/ timeouts

        my ($fd,$ssl) = @_;

        my ($str,$t)=('','');



        $t = index($$Z[6],"\n",0);



        while($t < 0){

                return undef if &http_queue_read($fd,$ssl);

                $t=index($$Z[6],"\n",0);

        }



	# MEMLEAK: use following lines; comment out SPEEDUP and LEGACY lines

	# my $r;

	# ($r,$$Z[6])=unpack('A'.($t+1).'A*',$$Z[6]);

	# return $r;



	# SPEEDUP: use following line; comment out MEMLEAK and LEGACY lines

	# return substr($$Z[6],0,$t+1,'');



	# LEGACY: use following lines; comment out MEMLEAK and SPEEDUP lines

	my $r = substr($$Z[6],0,$t+1);

	substr($$Z[6],0,$t+1)='';

	return $r;

}



##################################################################



=pod



=head1 - Function: LW::sock_get (INTERNAL)

   

Params: $socket_file_descriptor, $SSL_THINGY, required $amount

Return: $string, undef on error



This function is used internally to read input from the given socket 

file descriptor (regular or SSL).  Will abort/error if $amount is not

available.



This function is not intended for external use.



=cut



sub sock_get { # read from socket w/ timeouts

        my ($fd,$ssl,$amount) = @_;

        my ($str,$t)=('','');



	while($amount > length($$Z[6])){

                return undef if &http_queue_read($fd,$ssl);

	}



	# MEMLEAK: use following lines; comment out SPEEDUP and LEGACY lines

	# my $r;

	# ($r,$$Z[6])=unpack('A'.$amount.'A*',$$Z[6]);

	# return $r;



	# SPEEDUP: use following line; comment out MEMLEAK and LEGACY lines

	# return substr($$Z[6],0,$amount,'');



	# LEGACY: use following lines; comment out MEMLEAK and SPEEDUP lines

	my $r = substr($$Z[6],0,$amount);

	substr($$Z[6],0,$amount)='';

	return $r;

}



##################################################################



=pod



=head1 - Function: LW::sock_getall (INTERNAL)

   

Params: $socket_file_descriptor, $SSL_THINGY

Return: $string



This function is used internally to read input from the given socket 

file descriptor (regular or SSL).  It will return everything received

until an error (no data or real error) occurs.



This function is not intended for external use.



=cut



sub sock_getall {

        my ($fd,$ssl) = @_;

        1 while( !(&http_queue_read($fd,$ssl)) );

        return $$Z[6];

}



##################################################################



=pod



=head1 - Function: LW::http_queue_read (INTERNAL)

   

Params: $fd, $ssl

Return: $character, undef on error (timeout)



http_queue_read() will put incoming data from the server into 

the incoming queue for reading.  If there's no more data (or

on error), it will return 1.  Otherwise it returns 0.



This function is really for internal use only.



=cut



sub http_queue_read {

	my ($fd,$ssl)=@_;

	my ($vin, $t)=('','');



	if(defined $ssl){

	    if($LW::LW_SSL_LIB==1){ # Net::SSLeay

		local $SIG{ALRM} = sub { die "timeout\n" };

		local $SIG{PIPE} = sub { die "pipe_error\n" };

		eval {

			eval { alarm($TIMEOUT); };

			$t=Net::SSLeay::read($ssl);

			eval { alarm(0); };

		};

        	if($@ || !defined $t || $t eq ''){

			return 1;}

		$$Z[6].=$t;

	    } else { # Net::SSL

		if(!$ssl->read($t,1024)){ return 1;

		} else { $$Z[6].=$t;}

	    }

	} else {

		vec($vin,fileno($fd),1)=1; # wait only so long to read...

		if(!select($vin,undef,undef,$TIMEOUT)){

			return 1;}

               	if(!sysread($fd,$t,4096)){	return 1; # EOF or error

		} else {			$$Z[6].=$t;}

	}



	return 0;

}



##################################################################



=pod



=head1 - Function: LW::http_queue_send (INTERNAL)

   

Params: $sock, $ssl

Return: $status_result (undef=ok, else error message)



This functions sends the current queue (made with LW::http_queue) to the 

server via the specified SSL or socket connection.



=cut



sub http_queue_send { # write to socket

	my ($fd,$ssl)=@_;

	my ($v,$wrote,$err)=('');



	my $len = length($$Z[5]);

	if(defined $ssl){

	    if($LW::LW_SSL_LIB==1){ # Net::SSLeay

		($wrote,$err)=Net::SSLeay::ssl_write_all($ssl,$$Z[5]);

		return 'Could not send entire data queue' if ($wrote!=$len);

		return "SSL_write error: $err" unless $wrote;

	    } else { # Net::SSL

		$ssl->print($$Z[5]);

	    }

	} else {

        	vec($v,fileno($fd),1)=1;

 		if(!select(undef,$v,undef,.01)){ 

			return 'Socket write test failed'; }

		$wrote=syswrite($fd,$$Z[5],length($$Z[5]));

		return "Error sending data queue: $!" if(!defined $wrote);

		return 'Could not send entire data queue' if ($wrote != $len);

	}

	$$Z[5]=''; return undef;

}





##################################################################



=pod



=head1 - Function: LW::http_queue (INTERNAL)

   

Params: $data

Return: nothing



This function will buffer the output to be sent to the server.  Output is 

buffered for various reasons (particularlly because of SSL, but also 

allowing the chance to 'go back' and modify the final output before it's 

actually sent (after header constructions, etc).



=cut



sub http_queue {

	$$Z[5].= shift;

}





##################################################################



=pod



=head1 - Function: LW::http_fixup_request

   

Params: $hash_ref

Return: Nothing



This function takes a %hin hash reference and makes sure the proper 

headers exist (for example, it will add the Host: header, calculate the 

Content-Length: header for POST requests, etc).  For standard requests 

(i.e. you want the request to be HTTP RFC-compliant), you should call this 

function right before you call LW::http_do_request.



=cut



sub http_fixup_request {

 my $hin=shift;



 return if(!(defined $hin && ref($hin)));



 if($$hin{'whisker'}->{'http_ver'} eq '1.1'){

 	$$hin{'Host'}=$$hin{'whisker'}->{'host'} if(!defined $$hin{'Host'});

	$$hin{'Connection'}='Keep-Alive' if(!defined $$hin{'Connection'});

 }



 if(defined $$hin{'whisker'}->{'data'}){ 

 	if(!defined $$hin{'Content-Length'}){

		$$hin{'Content-Length'}=length($$hin{'whisker'}->{'data'});}

#	if(!defined $$hin{'Content-Encoding'}){

#		$$hin{'Content-Encoding'}='application/x-www-form-urlencoded';}

 }



 if(defined $$hin{'whisker'}->{'proxy_host'}){

	$$hin{'whisker'}->{'include_host_in_uri'}=1;}



}



##################################################################



=pod



=head1 - Function: LW::http_reset

     

Params: Nothing

Return: Nothing



The LW::http_reset function will walk through the %http_host_cache, 

closing all open sockets and freeing SSL resources.  It also clears

out the host cache in case you need to rerun everything fresh.



=cut



sub http_reset {

 my $key;



 foreach $key (keys %http_host_cache){

 	# *Z=$http_host_cache{$key};

	sock_close($http_host_cache{$key}->[0],

			$http_host_cache{$key}->[4]);

	my $x=$http_host_cache{$key}->[3];

	if(defined $x && $LW::LW_SSL_LIB==1){

		eval "Net::SSLeay::CTX_free($x)"; }

	delete $http_host_cache{$key};

 }

}



##################################################################



=pod



=head1 - Function: LW::ssl_save_info (INTERNAL)

     

Params: \%hout, $ssl_connection

Return: Nothing



This is an internal function used to save various Net::SSLeay

information into the given hash.  Triggered by setting

{'whisker'}->{'save_ssl_info'}=1.



=cut



sub ssl_save_info {

	my ($hr,$SSL)=@_;

	my $cert;



	return if($LW::LW_SSL_LIB!=1); # only Net::SSLeay used

	$$hr{'whisker'}->{'ssl_cipher'}=Net::SSLeay::get_cipher($SSL);		



	if( $cert = Net::SSLeay::get_peer_certificate($SSL)){

		$$hr{'whisker'}->{'ssl_cert_subject'} = 

			Net::SSLeay::X509_NAME_oneline(

                    	Net::SSLeay::X509_get_subject_name($cert) );



		$$hr{'whisker'}->{'ssl_cert_issuer'} = 

			Net::SSLeay::X509_NAME_oneline(

                    	Net::SSLeay::X509_get_issuer_name($cert) );

	}

}



##################################################################



{ $SYMCOUNT = 0;

sub _newsym { # same as Symbol::gensym; taken from libwhisker2

	my $pkg="LW::";

	my $name = "_STREAM_" . $SYMCOUNT++;

	delete $$pkg{$name};

	return \*{$pkg.$name};

}}



##################################################################

=pod



=head1 ++ Sub package: mdx



The mdx subpackage contains support for making MD4 and MD5 hashes of the 

given data.  It will attempt to use a faster perl module if installed, 

and will fall back on the internal perl version (which is *slow* in 

comparison) if nothing else was found.



This was written in a few hours using the explanation of Applied 

Cryptography as the main reference, and Digest::Perl::MD5 as a secondary

reference.  MD4 was later added, using Authen::NTLM::MD4 as a reference.



This code should be cross-platform (particularly 64-bit) compatible; if 

you get errors, contact rfp@wiretrip.net.



=cut



########################################################################



{ # start md5 packaged varbs

my (@S,@T,@M);

my $code='';



=pod



=head1 - Function: LW::md5



Params: $data

Return: $hex_md5_string



This function takes a data scalar, and composes a MD5 hash of it, and 

returns it in a hex ascii string.  It will use the fastest MD5 function

available.



=cut



sub md5 {

	return undef if(!defined $_[0]); # oops, forgot the data

	return MD5->hexhash($_[0]) if(defined $LW::available{'md5'});

	return md5_perl($_[0]);

}



########################################################################



=pod



=head1 - Function: LW::md5_perl



Params: $data

Return: $hex_md5_string



This is the perl implementation of the MD5 function.  You should use

the md5() function, which will call this function as a last resort.  

You can call this function directly if you want to test the code.



=cut



sub md5_perl {

        my $DATA=shift;

        $DATA=md5_pad($DATA);

        &md5_init() if(!defined $M[0]);

        return md5_perl_generated(\$DATA);

}



########################################################################



=pod



=head1 - Function: LW::md5_init (INTERNAL)



Params: nothing

Return: nothing



This function generates particular values used in the md5_perl function.

Normally you do not have to call it, as md5_perl will call it if needed.

The values here are special MD5 constants.



=cut



sub md5_init {

        return if(defined $S[0]);

        for(my $i=1; $i<=64; $i++){ $T[$i-1]=int((2**32)*abs(sin($i))); }

        my @t=(7,12,17,22,5,9,14,20,4,11,16,23,6,10,15,21);

        for($i=0; $i<64; $i++){  $S[$i]=$t[(int($i/16)*4)+($i%4)]; }

        @M=(    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,

                1,6,11,0,5,10,15,4,9,14,3,8,13,2,7,12,

                5,8,11,14,1,4,7,10,13,0,3,6,9,12,15,2,

                0,7,14,5,12,3,10,1,8,15,6,13,4,11,2,9 );

        &md5_generate();



	# check to see if it works correctly

	my $TEST=md5_pad('foobar');

	if( md5_perl_generated(\$TEST) ne 

		'3858f62230ac3c915f300c664312c63f'){

		die('Error: MD5 self-test not successful.');

	}

}



########################################################################



=pod



=head1 - Function: LW::md5_pad (INTERNAL)



Params: $data

Return: $padded_data



This function pads the data to be compatible with MD5.



This function is from Digest::Perl::MD5, and bears the following

copyrights:



 Copyright 2000 Christian Lackas, Imperia Software Solutions

 Copyright 1998-1999 Gisle Aas.

 Copyright 1995-1996 Neil Winton.

 Copyright 1991-1992 RSA Data Security, Inc.



=cut



sub md5_pad {

	my $l = length(my $msg=shift() . chr(128));

	$ msg .= "\0" x (($l%64<=56?56:120)-$l%64);

	$l=($l-1)*8;

	$msg .= pack 'VV',$l & 0xffffffff, ($l >> 16 >> 16);

	return $msg;

}



########################################################################



=pod



=head1 - Function: LW::md5_generate (INTERNAL)



Params: none

Return: none



This functions generates and compiles the actual MD5 function.  It's

faster to have all the operations inline and in order than to call

functions.  Generating the code via below function cuts the final

code savings to about 1/50th, with the penalty of having to compile

it the first time it's used (which takes all of a second or two).



=cut



sub md5_generate {

 my $N='abcddabccdabbcda';

 my $M='';

 $M='&0xffffffff' if((1 << 16) << 16); # mask for 64bit systems



 $code=<<EOT;

        sub md5_perl_generated {

	BEGIN { \$^H |= 1; }; # use integer

        my (\$A,\$B,\$C,\$D)=(0x67452301,0xefcdab89,0x98badcfe,0x10325476);

        my (\$a,\$b,\$c,\$d,\$t,\$i);

        my \$dr=shift;

        my \$l=length(\$\$dr);

        for my \$L (0 .. ((\$l/64)-1) ) {

                my \@D = unpack('V16', substr(\$\$dr, \$L*64,64));

                (\$a,\$b,\$c,\$d)=(\$A,\$B,\$C,\$D);

EOT



 for($i=0; $i<16; $i++){

        my ($a,$b,$c,$d)=split('',substr($N,($i%4)*4,4));

        $code.="\$t=((\$$d^(\$$b\&(\$$c^\$$d)))+\$$a+\$D[$M[$i]]+$T[$i])$M;\n";

        $code.="\$$a=(((\$t<<$S[$i])|((\$t>>(32-$S[$i]))&((1<<$S[$i])-1)))+\$$b)$M;\n";

 }

 for(; $i<32; $i++){

        my ($a,$b,$c,$d)=split('',substr($N,($i%4)*4,4));

        $code.="\$t=((\$$c^(\$$d\&(\$$b^\$$c)))+\$$a+\$D[$M[$i]]+$T[$i])$M;\n";

        $code.="\$$a=(((\$t<<$S[$i])|((\$t>>(32-$S[$i]))&((1<<$S[$i])-1)))+\$$b)$M;\n";

 }

 for(; $i<48; $i++){

        my ($a,$b,$c,$d)=split('',substr($N,($i%4)*4,4));

        $code.="\$t=((\$$b^\$$c^\$$d)+\$$a+\$D[$M[$i]]+$T[$i])$M;\n";

        $code.="\$$a=(((\$t<<$S[$i])|((\$t>>(32-$S[$i]))&((1<<$S[$i])-1)))+\$$b)$M;\n";

 }

 for(; $i<64; $i++){

        my ($a,$b,$c,$d)=split('',substr($N,($i%4)*4,4));

        $code.="\$t=((\$$c^(\$$b|(~\$$d)))+\$$a+\$D[$M[$i]]+$T[$i])$M;\n";

        $code.="\$$a=(((\$t<<$S[$i])|((\$t>>(32-$S[$i]))&((1<<$S[$i])-1)))+\$$b)$M;\n";

 }



 $code.=<<EOT;

                \$A=\$A+\$a\&0xffffffff; \$B=\$B+\$b\&0xffffffff;

                \$C=\$C+\$c\&0xffffffff; \$D=\$D+\$d\&0xffffffff;

        } # for

	return unpack('H*', pack('V4',\$A,\$B,\$C,\$D)); }

EOT

 eval "$code";

}



} # md5 package container



########################################################################



{ # start md4 packaged varbs

my (@S,@T,@M);

my $code='';



=pod



=head1 - Function: LW::md4



Params: $data

Return: $hex_md4_string



This function takes a data scalar, and composes a MD4 hash of it, and 

returns it in a hex ascii string.  It will use the fastest MD4 function

available.



=cut



sub md4 {

	return undef if(!defined $_[0]); # oops, forgot the data

	md4_perl(@_);

}



########################################################################



=pod



=head1 - Function: LW::md4_perl



Params: $data

Return: $hex_md4_string



This is the perl implementation of the MD4 function.  You should use

the md4() function, which will call this function as a last resort.  

You can call this function directly if you want to test the code.



=cut



sub md4_perl {

        my $DATA=shift;

        $DATA=md5_pad($DATA);

        &md4_init() if(!defined $M[0]);

        return md4_perl_generated(\$DATA);

}



########################################################################



=pod



=head1 - Function: LW::md4_init (INTERNAL)



Params: none

Return: none



This functions generates and compiles the actual MD4 function.  It's

faster to have all the operations inline and in order than to call

functions.  Generating the code via below function cuts the final

code savings to about 1/50th, with the penalty of having to compile

it the first time it's used (which takes all of a second or two).



=cut



sub md4_init {

 return if(defined $S[0]);

 my @t=(3,7,11,19,3,5,9,13,3,9,11,15);

 for($i=0; $i<48; $i++){  $S[$i]=$t[(int($i/16)*4)+($i%4)]; }

 @M=(	0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,

	0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15,

	0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15 );



 my $N='abcddabccdabbcda';

 my $M='';

 $M='&0xffffffff' if((1 << 16) << 16); # mask for 64bit systems



 $code=<<EOT;

        sub md4_perl_generated {

	BEGIN { \$^H |= 1; }; # use integer

        my (\$A,\$B,\$C,\$D)=(0x67452301,0xefcdab89,0x98badcfe,0x10325476);

        my (\$a,\$b,\$c,\$d,\$t,\$i);

        my \$dr=shift;

        my \$l=length(\$\$dr);

        for my \$L (0 .. ((\$l/64)-1) ) {

                my \@D = unpack('V16', substr(\$\$dr, \$L*64,64));

                (\$a,\$b,\$c,\$d)=(\$A,\$B,\$C,\$D);

EOT

 

 for($i=0; $i<16; $i++){

        my ($a,$b,$c,$d)=split('',substr($N,($i%4)*4,4));

	$code.="\$t=((\$$d^(\$$b\&(\$$c^\$$d)))+\$$a+\$D[$M[$i]])$M;\n";

        $code.="\$$a=(((\$t<<$S[$i])|((\$t>>(32-$S[$i]))&((1<<$S[$i])-1))))$M;\n";

 }

 for(; $i<32; $i++){

        my ($a,$b,$c,$d)=split('',substr($N,($i%4)*4,4));

 	$code.="\$t=(( (\$$b&\$$c)|(\$$b&\$$d)|(\$$c&\$$d) )+\$$a+\$D[$M[$i]]+0x5a827999)$M;\n";

        $code.="\$$a=(((\$t<<$S[$i])|((\$t>>(32-$S[$i]))&((1<<$S[$i])-1))))$M;\n";

 }

 for(; $i<48; $i++){

        my ($a,$b,$c,$d)=split('',substr($N,($i%4)*4,4));

 	$code.="\$t=(( \$$b^\$$c^\$$d )+\$$a+\$D[$M[$i]]+0x6ed9eba1)$M;\n";

        $code.="\$$a=(((\$t<<$S[$i])|((\$t>>(32-$S[$i]))&((1<<$S[$i])-1))))$M;\n";

 }

 

 $code.=<<EOT;

                \$A=\$A+\$a\&0xffffffff; \$B=\$B+\$b\&0xffffffff;

                \$C=\$C+\$c\&0xffffffff; \$D=\$D+\$d\&0xffffffff;

        } # for

	return unpack('H*', pack('V4',\$A,\$B,\$C,\$D)); }

EOT

 eval "$code";



 my $TEST=md5_pad('foobar');

 if( md4_perl_generated(\$TEST) ne 

	'547aefd231dcbaac398625718336f143'){

	die('Error: MD4 self-test not successful.');

 }

}



} # md4 package container





=pod



=head1 ++ Sub package: multipart



The multipart subpackage contains various utility functions which

support making multipart requests (useful for uploading files).



=cut



########################################################################



=pod



=head1 - Function: LW::multipart_set

  

Params: \%multi_hash, $param_name, $param_value

Return: nothing



This function sets the named parameter to the given value within the

supplied multipart hash.



=cut



sub multipart_set {

	my ($hr,$n,$v)=@_;

	return if(!ref($hr)); # error check

	return undef if(!defined $n || $n eq '');

	$$hr{$n}=$v;	

}



########################################################################



=pod



=head1 - Function: LW::multipart_get

  

Params: \%multi_hash, $param_name

Return: $param_value, undef on error



This function retrieves the named parameter to the given value within the

supplied multipart hash.  There is a special case where the named

parameter is actually a file--in which case the resulting value will be

"\0FILE".  In general, all special values will be prefixed with a NULL

character.  In order to get a file's info, use multipart_getfile().



=cut



sub multipart_get {

	my ($hr,$n)=@_;

	return undef if(!ref($hr)); # error check

	return undef if(!defined $n || $n eq '');

	return $$hr{$n};

}



########################################################################



=pod



=head1 - Function: LW::multipart_setfile

  

Params: \%multi_hash, $param_name, $file_path [, $filename]

Return: undef on error, 1 on success



NOTE: this function does not actually add the contents of $file_path into

the %multi_hash; instead, multipart_write() inserts the content when

generating the final request.



=cut



sub multipart_setfile {

	my ($hr,$n,$path)=(shift,shift,shift);

	my ($fname)=shift;



	return undef if(!ref($hr)); # error check

	return undef if(!defined $n || $n eq '');

	return undef if(!defined $path);

	return undef if(! (-e $path && -f $path) );



	if(!defined $fname){

		$path=~m/[\\\/]([^\\\/]+)$/;

		$fname=$1||"whisker-file";

	}



	$$hr{$n}="\0FILE";

	$$hr{"\0$n"}=[$path,$fname];

	return 1;

}



########################################################################



=pod



=head1 - Function: LW::multipart_getfile

  

Params: \%multi_hash, $file_param_name

Return: $path, $name ($path=undef on error)



LW::multipart_getfile is used to retrieve information for a file

parameter contained in %multi_hash.  To use this you would most

likely do:

($path,$fname)=LW::multipart_getfile(\%multi, "param_name");



=cut



sub multipart_getfile {

	my ($hr,$n)=@_;



	return undef if(!ref($hr)); # error check

	return undef if(!defined $n || $n eq '');

	return undef if(!defined $$hr{$n} || $$hr{$n} ne "\0FILE");



	return @{$$hr{"\0$n"}};

}



########################################################################



=pod



=head1 - Function: LW::multipart_boundary

  

Params: \%multi_hash [, $new_boundary_name]

Return: $current_boundary_name



LW::multipart_boundary is used to retrieve, and optionally set, the

multipart boundary used for the request.



NOTE: the function does no checking on the supplied boundary, so if 

you want things to work make sure it's a legit boundary.  Libwhisker

does *not* prefix it with any '---' characters.



=cut



sub multipart_boundary {

	my ($hr,$new)=@_;

	my $ret;



	return undef if(!ref($hr)); # error check



	if(!defined $$hr{"\0BOUNDARY"}){

		# create boundary on the fly

		my $b = uc(LW::utils_randstr(20));

		my $b2 = '-' x 32;

		$$hr{"\0BOUNDARY"}="$b2$b";

	}



	$ret=$$hr{"\0BOUNDARY"};

	if(defined $new){

		$$hr{"\0BOUNDARY"}=$new;

	}



	return $ret;

}



########################################################################



=pod



=head1 - Function: LW::multipart_write

  

Params: \%multi_hash, \%hin_request

Return: 1 if successful, undef on error



LW::multipart_write is used to parse and construct the multipart data

contained in %multi_hash, and place it ready to go in the given whisker

hash (%hin) structure, to be sent to the server.



NOTE: file contents are read into the final %hin, so it's possible for

the hash to get *very* large if you have (a) large file(s).



=cut



sub multipart_write {

	my ($mp,$hr)=@_;



	return undef if(!ref($mp)); # error check

	return undef if(!ref($hr)); # error check



	if(!defined $$mp{"\0BOUNDARY"}){

		# create boundary on the fly

		my $b = uc(LW::utils_randstr(20));

		my $b2 = '-' x 32;

		$$mp{"\0BOUNDARY"}="$b2$b";

	}



	my $B = $$mp{"\0BOUNDARY"};

	my $EOL = $$hr{whisker}->{http_eol}||"\x0d\x0a";



	my $keycount=0;

	foreach (keys %$mp){

		next if(substr($_,0,1) eq "\0");

		$keycount++;

		if($$mp{$_} eq "\0FILE"){

			my ($path,$name)=LW::multipart_getfile($mp,$_);

			next if(!defined $path);

			$$hr{whisker}->{data}.="$B$EOL";

			$$hr{whisker}->{data}.="Content-Disposition: ".

				"form-data; name=\"$_\"; ";

			$$hr{whisker}->{data}.="filename=\"$name\"$EOL";

			$$hr{whisker}->{data}.="Content-Type: ".

				"application/octet-stream$EOL";

			$$hr{whisker}->{data}.=$EOL;

			next if(!open(IN,"<$path"));

			binmode(IN); # stupid Windows

			while(<IN>){

				$$hr{whisker}->{data}.=$_; }

			close(IN);

			$$hr{whisker}->{data}.=$EOL;  # WARNING: is this right? 

		} else {

			$$hr{whisker}->{data}.="$B$EOL";

			$$hr{whisker}->{data}.="Content-Disposition: ".

				"form-data; name=\"$_\"$EOL";

			$$hr{whisker}->{data}.="$EOL$$mp{$_}$EOL";

		}

	}



	if($keycount){

		$$hr{whisker}->{data}.="$B--$EOL"; # closing boundary

		$$hr{"Content-Length"}=length($$hr{whisker}->{data});

		$$hr{"Content-Type"}="multipart/form-data; boundary=$B";

		return 1;

	} else {

		# multipart hash didn't contain params to upload

		return undef;

	}

}



########################################################################





=pod



=head1 - Function: LW::multipart_read

  

Params: \%multi_hash, \%hout_response [, $filepath ]

Return: 1 if successful, undef on error



LW::multipart_read will parse the data contents of the supplied

%hout_response hash, by passing the appropriate info to

multipart_read_data().  Please see multipart_read_data() for more

info on parameters and behaviour.



NOTE: this function will return an error if the given %hout_response

Content-Type is not set to "multipart/form-data".



=cut



sub multipart_read {

	my ($mp, $hr, $fp)=@_;



	return undef if(!(defined $mp && ref($mp)));

	return undef if(!(defined $hr && ref($hr)));



	my $ctype = LW::utils_find_lowercase_key($hr,'content-type');

	return undef if(!defined $ctype);

	return undef if($ctype!~m#^multipart/form-data#i);



	return LW::multipart_read_data($mp,

		\${$hr{'whisker'}->{'data'}},undef,$fp);



}



########################################################################



=pod



=head1 - Function: LW::multipart_read_data

  

Params: \%multi_hash, \$data, $boundary [, $filepath ]

Return: 1 if successful, undef on error



LW::multipart_read_data parses the contents of the supplied data using 

the given boundary and puts the values in the supplied %multi_hash.  

Embedded files will *not* be saved unless a $filepath is given, which

should be a directory suitable for writing out temporary files.



NOTE: currently only application/octet-stream is the only supported

file encoding.  All other file encodings will not be parsed/saved.



=cut



sub multipart_read_data {

	my ($mp, $dr, $bound, $fp)=@_;



	return undef if(!(defined $mp && ref($mp)));

	return undef if(!(defined $dr && ref($dr)));



	# if $bound is undef, then we'll snag what looks to be

	# the first boundry from the data.

	if(!defined $bound){

		if($$dr=~/([-]{5,}[A-Z0-9]+)[\r\n]/i){

			$bound=$1;

		} else {

			# we didn't spot a typical boundary; error

			return undef;

		}

	}



	if(defined $fp && !(-d $fp && -w $fp)){

		$fp=undef; }



	my $line = LW::utils_getline_crlf($dr,0);

	return undef if(!defined $line);

	return undef if( index($line,$bound) != 0);



	my $done=0;

	while(!$done){

		$done=multipart_read_data_part($mp, $dr, $bound, $fp);

	}



	return 1;

}



########################################################################



=pod



=head1 - Function: LW::multipart_read_data_part (INTERNAL)

  

Params: \%multi_hash, \$data, $boundary, $filepath

Return: 0 if more to read, 1 if done



This is an internal function used by multipart_read_data, and should

not be called on it's own.  This is the workhorse, and is quite nasty.



=cut



sub multipart_read_data_part {

	my ($mp, $dr, $bound, $fp)=@_;



	my $dispinfo = LW::utils_getline_crlf($dr);

	return 1 if(!defined $dispinfo);

	return 1 if(length($dispinfo)==0);

	my $lcdisp = lc($dispinfo);



	if(index($lcdisp,'content-disposition: form-data;') != 0){

		return 1; } # bad disposition



	my ($s,$e,$l);



	$s=index($lcdisp,'name="',30);

	$e=index($lcdisp, '"', $s+6);

	return 1 if($s == -1 || $e == -1);	

	my $NAME=substr($dispinfo,$s+6,$e-$s-6);



	$s=index($lcdisp,'filename="',$e);

	my $FILENAME=undef;

	if($s != -1){

		$e=index($lcdisp, '"', $s+10);

		return 1 if($e == -1); # puke; malformed filename

		$FILENAME=substr($dispinfo,$s+10,$e-$s-10);

		$s=rindex($FILENAME,'\\');

		$e=rindex($FILENAME,'/');

		$s=$e if($e>$s);

		$FILENAME=substr($FILENAME,$s+1,length($FILENAME)-$s);

	}



	my $CTYPE = LW::utils_getline_crlf($dr);



	return 1 if(!defined $CTYPE);

	$CTYPE = lc($CTYPE);



	if(length($CTYPE)>0){

		$s=index($CTYPE,'content-type:');

		return 1 if($s!=0); # bad ctype line

		$CTYPE=substr($CTYPE,13,length($CTYPE)-13);

		$CTYPE=~tr/ \t//d;

		my $xx=LW::utils_getline_crlf($dr);

		return 1 if(!defined $xx);

		return 1 if(length($xx)>0);

	} else {

		$CTYPE='application/octet-stream';

	}





	my $VALUE='';

	while( defined ($l=LW::utils_getline_crlf($dr)) ){

		last if(index($l,$bound)==0);	

		$VALUE.=$l;

		$VALUE.="\r\n";

	}



	substr($VALUE,-2,2)='';



	if(!defined $FILENAME){ # read in param

		$$mp{$NAME}=$VALUE;

		return 0;



	} else {  # read in file

		$$mp{$NAME}="\0FILE";

		return 0 if(!defined $fp);



		# TODO: funky content types, like application/x-macbinary

		if($CTYPE ne 'application/octet-stream'){

			return 0; }



		my $rfn = lc(LW::utils_randstr(12));

		my $fullpath = "$fp$rfn";



		$$mp{"\0$NAME"}=[undef,$FILENAME];

		return 0 if(!open(OUT,">$fullpath")); # error opening file

		binmode(OUT); # stupid Windows

		$$mp{"\0$NAME"}=[$fullpath,$FILENAME];

		print OUT $VALUE;

		close(OUT);



		return 0;



	} # if !defined $FILENAME



	return 0; # um, this should never be reached...

}





########################################################################



=pod



=head1 - Function: LW::multipart_files_list

  

Params: \%multi_hash

Return: @files



LW::multipart_files_list returns an array of parameter names for all

the files that are contained in %multi_hash.



=cut



sub multipart_files_list {

	my ($mp)=shift;

	my @ret;



	return () if(!(defined $mp && ref($mp)));

	while( my ($K, $V)=each(%$mp)){

		push(@ret,$K) if($V eq "\0FILE"); }

	return @ret;

}





########################################################################



=pod



=head1 - Function: LW::multipart_params_list

  

Params: \%multi_hash

Return: @params



LW::multipart_files_list returns an array of parameter names for all

the regular parameters (non-file) that are contained in %multi_hash.



=cut



sub multipart_params_list {

	my ($mp)=shift;

	my @ret;



	return () if(!(defined $mp && ref($mp)));

	while( my ($K, $V)=each(%$mp)){

		push(@ret,$K) if($V ne "\0FILE" &&

			substr($K,0,1) ne "\0" ); 

	}

	return @ret;

}



########################################################################





=pod    





=head1 ++ Sub package: ntlm

        

The ntlm sub package implements ntlm authentication routines.



=cut



########################################################################



=pod    



=head1 - Function: LW::ntlm_new

        

Params: $username, $password [, $domain, $ntlm_only]

Return: $ntlm_object



Returns a reference to an array (otherwise known as the 'ntlm object')

which contains the various informations specific to a user/pass combo.

If $ntlm_only is set to 1, then only the NTLM hash (and not the LanMan

hash) will be generated.  This results in a speed boost, and is typically

fine for using against IIS servers.



The array contains the following items, in order:

username, password, domain, lmhash(password), ntlmhash(password)



=cut



sub ntlm_new {

	my ($user,$pass,$domain,$flag)=@_; 

	$flag||=0;

	return undef if(!defined $user);

	$pass||=''; $domain||='';

	my @a=("$user","$pass","$domain",undef,undef);

	my $t;



	if($flag==0){

		$t=substr($pass,0,14);

		$t=~tr/a-z/A-Z/;

		$t.= "\0"x(14-length($t));

		$a[3]=des_E_P16($t); # LanMan password hash

		$a[3].= "\0"x(21-length($a[3]));

	}



	$t=md4(encode_unicode($pass));

	$t=~s/([a-z0-9]{2})/sprintf("%c",hex($1))/ieg;

	$t.="\0"x(21-length($t));

	$a[4]=$t; # NTLM password hash



	&des_cache_reset(); # reset the keys hash

	return \@a;

}



########################################################################



=pod    



=head1 - Function: LW::ntlm_generate_responses (INTERNAL)

        

Params: $ntlm_object, $challenge_token

Return: $lanman_hash, $ntlm_hash



Returns the challenge responses to the given tokens, using the password

set in the $ntlm_object.



=cut



sub ntlm_generate_responses {

	my ($obj,$chal)=@_;

	return (undef,undef) if(!defined $obj || !defined $chal);

	return (undef,undef) if(!ref($obj));

	my $x='';

	$x=des_E_P24($obj->[3], $chal) if(defined $obj->[3]);

	return ($x, des_E_P24($obj->[4], $chal));

}



########################################################################



=pod    



=head1 - Function: LW::ntlm_decode_challenge (INTERNAL)

        

Params: $challenge

Return: @challenge_parts



Splits the supplied challenge into the various parts.  The returned array

contains elements in the following order:



unicode_domain, ident, packet_type, domain_len, domain_maxlen,

domain_offset, flags, challenge_token, reserved, empty, raw_data



=cut



sub ntlm_decode_challenge {

  return undef if(!defined $_[0]);

  my $chal=shift;

  my @res;



  @res=unpack('Z8VvvVVa8a8a8',substr($chal,0,48));

  push(@res,substr($chal,48));

  unshift(@res,substr($chal,$res[4],$res[2]));

  return @res;

}



########################################################################



=pod    



=head1 - Function: LW::ntlm_header (INTERNAL)

        

Params: $string, $header_length, $offset

Return: $header



Constructs an appropriate header for the supplied $string.



=cut



sub ntlm_header {

	my ($s,$h,$o)=@_;

	my $l=length($s);

	return pack('vvV',0,0,$o-$h) if($l==0);

	return pack('vvV',$l,$l,$o);

}



########################################################################



=pod    



=head1 - Function: LW::ntlm_client

        

Params: $ntlm_obj [, $server_challenge]

Return: $response



ntlm_client() is responsible for generating the base64-encoded text you

include in the HTTP Authorization header.  If you call ntlm_client()

without a $server_challenge, the function will return the initial NTLM

request packet (message packet #1).  You send this to the server, and

take the server's response (message packet #2) and pass that as

$server_challenge, causing ntlm_client() to generate the final response

packet (message packet #3).



Note: $server_challenge is expected to be base64 encoded.



=cut



sub ntlm_client {

	my ($obj,$p)=@_;

	my $resp="NTLMSSP\0";



	return undef if(!defined $obj || !ref($obj));



	if(defined $p && $p ne ''){ # answer challenge

		$p=~tr/ \t\r\n//d;

		$p=LW::decode_base64($p);

		my @c=ntlm_decode_challenge($p);

		$uu=encode_unicode($obj->[0]); # username

		$resp.=pack('V',3);

		my($hl,$hn)=ntlm_generate_responses($obj,$c[7]); # token

		return undef if(!defined $hl || !defined $hn);

		my $o=64;

		$resp.=ntlm_header($hl,64,$o);			# LM hash

		$resp.=ntlm_header($hn,64,($o+=length($hl)));	# NTLM hash

		$resp.=ntlm_header($c[0],64,($o+=length($hn)));	# domain

		$resp.=ntlm_header($uu,64,($o+=length($c[0])));	# username

		$resp.=ntlm_header($uu,64,($o+=length($uu))); 	# workstation

		$resp.=ntlm_header('',64,($o+=length($uu)));	# session

		$resp.=pack('V',$c[6]);

		$resp.=$hl.$hn.$c[0].$uu.$uu;



	} else { # initiate challenge

		$resp.=pack('VV',1,0x0000b207);

		$resp.=ntlm_header($obj->[0],32,32);

		$resp.=ntlm_header($obj->[2],32,32+length($obj->[0]));

		$resp .= $obj->[0].$obj->[2];

	}



	return encode_base64($resp,'');

}



########################################################################



=pod    





=head1 ++ Sub package: ntlm_des

        

The ntlm_des sub package implements unchained forward DES in perl, which

is needed by the ntlm auth package to do it's thing.  Note that

unchained forward DES is not a symmetrical cipher--it's much more like

using DES as a digest/hash algorithm.  Thus there is very little

practical reuse of this code outside of NTLM authentication.



The code below has also been 'tweaked' for the reuse of the set of keys,

which is typical when requiring multiple authentication runs.  This leads

to a speed increase when multiple authentications are needed.



The code below is a highly-modified version of Authen::NTLM::DES.pm,

written by Mark.Bush@bushnet.demon.co.uk.  Portions of the code below

bear the following copyrights:



Copyright (C) 2001 Mark Bush. <Mark.Bush@bushnet.demon.co.uk>



The code is based on fetchmail code which is Copyright (C) 1997 Eric

S. Raymond.



Fetchmail uses SMB/Netbios code from samba which is Copyright (C)

Andrew Tridgell 1992-1998 with modifications from Jeremy Allison.



All the des_* functions should be considered internal and not called

directly.



=cut



{ # start of DES local container #######################################

my $generated=0;

my $perm1 = [57, 49, 41, 33, 25, 17, 9,	1, 58, 50, 42, 34, 26, 18,

	     10, 2, 59, 51, 43, 35, 27,	19, 11, 3, 60, 52, 44, 36,

	     63, 55, 47, 39, 31, 23, 15, 7, 62, 54, 46, 38, 30, 22,

	     14, 6, 61, 53, 45, 37, 29, 21, 13, 5, 28, 20, 12, 4];

my $perm2 = [14, 17, 11, 24, 1, 5, 3, 28, 15, 6, 21, 10,

	     23, 19, 12, 4, 26, 8, 16, 7, 27, 20, 13, 2,

	     41, 52, 31, 37, 47, 55, 30, 40, 51, 45, 33, 48,

	     44, 49, 39, 56, 34, 53, 46, 42, 50, 36, 29, 32];

my $perm3 = [58, 50, 42, 34, 26, 18, 10, 2, 60, 52, 44, 36, 28, 20, 12, 4,

	     62, 54, 46, 38, 30, 22, 14, 6, 64, 56, 48, 40, 32, 24, 16, 8,

	     57, 49, 41, 33, 25, 17, 9, 1, 59, 51, 43, 35, 27, 19, 11, 3,

	     61, 53, 45, 37, 29, 21, 13, 5, 63, 55, 47, 39, 31, 23, 15, 7];

my $perm4 = [32, 1, 2, 3, 4, 5, 4, 5, 6, 7, 8, 9,

	     8, 9, 10, 11, 12, 13, 12, 13, 14, 15, 16, 17,

	     16, 17, 18, 19, 20, 21, 20, 21, 22, 23, 24, 25,

	     24, 25, 26, 27, 28, 29, 28, 29, 30, 31, 32, 1];

my $perm5 = [16, 7, 20, 21, 29, 12, 28, 17, 1, 15, 23, 26, 5, 18, 31, 10,

	     2, 8, 24, 14, 32, 27, 3, 9, 19, 13, 30, 6, 22, 11, 4, 25];

my $perm6 = [40, 8, 48, 16, 56, 24, 64, 32, 39, 7, 47, 15, 55, 23, 63, 31,

	     38, 6, 46, 14, 54, 22, 62, 30, 37, 5, 45, 13, 53, 21, 61, 29,

	     36, 4, 44, 12, 52, 20, 60, 28, 35, 3, 43, 11, 51, 19, 59, 27,

	     34, 2, 42, 10, 50, 18, 58, 26, 33, 1, 41,  9, 49, 17, 57, 25];

my $sc = [1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1];



sub des_E_P16 {

  my ($p14) = @_;

  my $sp8 = [0x4b, 0x47, 0x53, 0x21, 0x40, 0x23, 0x24, 0x25];

  my $p7 = substr($p14, 0, 7);

  my $p16 = des_smbhash($sp8, $p7);

  $p7 = substr($p14, 7, 7);

  $p16 .= des_smbhash($sp8, $p7);

  return $p16;

}



sub des_E_P24 {

  my ($p21, $c8_str) = @_;

  my @c8 = map {ord($_)} split(//, $c8_str);

  my $p24 = des_smbhash(\@c8, substr($p21, 0, 7));

  $p24 .= des_smbhash(\@c8, substr($p21, 7, 7));

  $p24 .= des_smbhash(\@c8, substr($p21, 14, 7));

}



sub des_permute {

  my ($i,$out, $in, $p, $n) = (0,@_);

  foreach $i (0..($n-1)){

    $out->[$i] = $in->[$p->[$i]-1]; }

}



sub des_lshift {

	my ($c, $d, $count)=@_;

	my (@outc, @outd, $i, $x);

	while($count--){

		push @$c, shift @$c;

		push @$d, shift @$d;

	}

}



my %dohash_cache; # cache for key data; saves some cycles

my %key_cache;	  # another cache for key data

sub des_cache_reset {

	%dohash_cache=();

	%key_cache=();

}



sub des_dohash

{

  my ($out, $in, $key) = @_;

  my ($i, $j, $k, @pk1, @c, @d, @cd,

      @ki, @pd1, @l, @r, @rl);



# if(!defined $dohash_cache{$skey}){

  &des_permute(\@pk1, $key, $perm1, 56);



  for($i=0;$i<28;$i++) {

    $c[$i] = $pk1[$i];

    $d[$i] = $pk1[$i+28];

  }

  for($i=0;$i<16;$i++){

    my @array;

    &des_lshift(\@c,\@d,$sc->[$i]);

    @cd = (@c, @d);

    &des_permute(\@array, \@cd, $perm2, 48);

    $ki[$i] = \@array;

#    $dohash_cache{$skey}->[$i]=\@array; 

  }

# } else {

#	for($i=0;$i<16;$i++){

#		$ki[$i]=$dohash_cache{$skey}->[$i];}

# }



  des_dohash2($in,\@l,\@r,\@ki);



  @rl = (@r, @l);

  &des_permute($out, \@rl, $perm6, 64);

}



sub des_str_to_key{

  my ($str) = @_;

  my ($i,@key,$out);

  unshift(@str,ord($_))while($_=chop($str));

  $key[0] = $str[0]>>1;

  $key[1] = (($str[0]&0x01)<<6) | ($str[1]>>2);

  $key[2] = (($str[1]&0x03)<<5) | ($str[2]>>3);

  $key[3] = (($str[2]&0x07)<<4) | ($str[3]>>4);

  $key[4] = (($str[3]&0x0f)<<3) | ($str[4]>>5);

  $key[5] = (($str[4]&0x1f)<<2) | ($str[5]>>6);

  $key[6] = (($str[5]&0x3f)<<1) | ($str[6]>>7);

  $key[7] = $str[6]&0x7f;

  foreach $i (0..7) {

    $key[$i] = 0xff&($key[$i]<<1); }

  @{$key_cache{$str}}=@key;

  return \@key;

}



sub des_smbhash

{

  # use faster binary helper

  goto &LW::bin::des_smbhash if(defined $LW::available{'lw::bin'});



  my ($in, $key) = @_;

  my $key2;



  &des_generate if(!$generated);

  if(defined $key_cache{$key}){	$key2=$key_cache{$key};

  } else {			$key2=&des_str_to_key($key); }



 my ($i, $div, $mod, @in, @outb, @inb, @keyb, @out);

  foreach $i (0..63){

    $div = int($i/8); $mod = $i%8;

    $inb[$i] = ($in->[$div] & (1<<(7-($mod))))? 1: 0;

    $keyb[$i] = ($key2->[$div] & (1<<(7-($mod))))? 1: 0;

    $outb[$i] = 0;

  }

  &des_dohash(\@outb, \@inb, \@keyb);

  foreach $i (0..7){ $out[$i] = 0; }

  foreach $i (0..63){

    $out[int($i/8)] |= (1<<(7-($i%8))) if ($outb[$i]); }

  my $out = pack("C8", @out);



  return $out;

}





sub des_generate { # really scary dragons here....this code is optimized

		   # for speed, and not readability

 my ($i,$j);

 my $code=<<EOT;

{ my \$sbox = [[

[14,4,13,1,2,15,11,8,3,10,6,12,5,9,0,7],[0,15,7,4,14,2,13,1,10,6,12,11,9,5,3,8],

[4,1,14,8,13,6,2,11,15,12,9,7,3,10,5,0],[15,12,8,2,4,9,1,7,5,11,3,14,10,0,6,13]

],[

[15,1,8,14,6,11,3,4,9,7,2,13,12,0,5,10],[3,13,4,7,15,2,8,14,12,0,1,10,6,9,11,5],

[0,14,7,11,10,4,13,1,5,8,12,6,9,3,2,15],[13,8,10,1,3,15,4,2,11,6,7,12,0,5,14,9]

],[

[10,0,9,14,6,3,15,5,1,13,12,7,11,4,2,8],[13,7,0,9,3,4,6,10,2,8,5,14,12,11,15,1],

[13,6,4,9,8,15,3,0,11,1,2,12,5,10,14,7],[1,10,13,0,6,9,8,7,4,15,14,3,11,5,2,12]

],[

[7,13,14,3,0,6,9,10,1,2,8,5,11,12,4,15],[13,8,11,5,6,15,0,3,4,7,2,12,1,10,14,9],

[10,6,9,0,12,11,7,13,15,1,3,14,5,2,8,4],[3,15,0,6,10,1,13,8,9,4,5,11,12,7,2,14]

],[

[2,12,4,1,7,10,11,6,8,5,3,15,13,0,14,9],[14,11,2,12,4,7,13,1,5,0,15,10,3,9,8,6],

[4,2,1,11,10,13,7,8,15,9,12,5,6,3,0,14],[11,8,12,7,1,14,2,13,6,15,0,9,10,4,5,3]

],[

[12,1,10,15,9,2,6,8,0,13,3,4,14,7,5,11],[10,15,4,2,7,12,9,5,6,1,13,14,0,11,3,8],

[9,14,15,5,2,8,12,3,7,0,4,10,1,13,11,6],[4,3,2,12,9,5,15,10,11,14,1,7,6,0,8,13]

],[

[4,11,2,14,15,0,8,13,3,12,9,7,5,10,6,1],[13,0,11,7,4,9,1,10,14,3,5,12,2,15,8,6],

[1,4,11,13,12,3,7,14,10,15,6,8,0,5,9,2],[6,11,13,8,1,4,10,7,9,5,0,15,14,2,3,12]

],[

[13,2,8,4,6,15,11,1,10,9,3,14,5,0,12,7],[1,15,13,8,10,3,7,4,12,5,6,11,0,14,9,2],

[7,11,4,1,9,12,14,2,0,6,10,13,15,3,5,8],[2,1,14,7,4,10,8,13,15,12,9,0,3,5,6,11]

]];

EOT



 $code.='sub des_dohash2 { my ($in,$l,$r,$ki)=@_; my (@p,$i,$j,$k,$m,$n);';

 for($i=0;$i<64;$i++){

	$code.="\$p[$i] = \$in->[".($perm3->[$i]-1)."];\n"; }

 for($i=0;$i<32;$i++){

	$code.="\$l->[$i]=\$p[$i]; \$r->[$i]=\$p[".($i+32)."];\n"; }

 $code.='for($i=0;$i<16;$i++){ local (@er,@erk,@b,@cb,@pcb,@r2);';

 for($i=0;$i<48;$i++){

	$code.="\$erk[$i]=\$r->[".($perm4->[$i]-1)."]^(\$ki->[\$i]->[$i]);\n"; }

 for($i=0;$i<8;$i++){

	for($j=0;$j<6;$j++){

		$code.="\$b[$i][$j]=\$erk[".($i*6+$j)."];\n"; }}

 for($i=0;$i<8;$i++){

	$code.="\$m=(\$b[$i][0]<<1)|\$b[$i][5];";

	$code.="\$n=(\$b[$i][1]<<3)|(\$b[$i][2]<<2)|(\$b[$i][3]<<1)|\$b[$i][4];";

	for($j=0;$j<4;$j++){

		$code.="\$b[$i][$j]=(\$sbox->[$i][\$m][\$n]&".(1<<(3-$j)).")?1:0;"; }}

 for($i=0;$i<8;$i++){

	for($j=0;$j<4;$j++){

		$code.="\$cb[".($i*4+$j)."]=\$b[$i][$j];\n"; }}

 for($i=0;$i<32;$i++){

	$code.="\$pcb[$i]=\$cb[".($perm5->[$i]-1)."];\n"; }

 for($i=0;$i<32;$i++){

	$code.="\$r2[$i]=(\$l->[$i])^\$pcb[$i];\n"; }

 for($i=0;$i<32;$i++){

	$code.="\$l->[$i]=\$r->[$i]; \$r->[$i]=\$r2[$i];\n"; }

 $code.='}}}';

 

 eval "$code";

 $generated++;

}



} ##### end of DES container ################################################





=pod



=head1 ++ Sub package: utils



The utils subpackage contains various utility functions which serve

different purposes.



=cut



########################################################################



=pod



=head1 - Function: LW::utils_recperm

  

Params: $uri, $depth, \@dir_parts, \@valid, \&func, \%track, \%arrays, \&cfunc

Return: nothing



This is a special function which is used to recursively-permutate through

a given directory listing.  This is really only used by whisker, in order

to traverse down directories, testing them as it goes.  See whisker 2.0 for

exact usage examples.



=cut



# '/', 0, \@dir.split, \@valid, \&func, \%track, \%arrays, \&cfunc

sub utils_recperm {

 my ($p, $pp, $pn, $r, $fr, $dr, $ar, $cr)=(shift,shift,@_);

 $p=~s#/+#/#g; if($pp >= @$pn) { push @$r, $p if &$cr($$dr{$p});

 } else { my $c=$$pn[$pp];

  if($c!~/^\@/){ utils_recperm($p.$c.'/',$pp+1,@_) if(&$fr($p.$c.'/'));

  } else {	$c=~tr/\@//d; if(defined $$ar{$c}){

		foreach $d (@{$$ar{$c}}){

			if(&$fr($p.$d.'/')){

                  utils_recperm($p.$d.'/',$pp+1,@_);}}}}}}





#################################################################



=pod



=head1 - Function: LW::utils_array_shuffle

  

Params: @array

Return: nothing



This function will randomize the order of the elements in the given array.



=cut



sub utils_array_shuffle { # fisher yates shuffle....w00p!

        my $array=shift; my $i;

        for ($i = @$array; --$i;){

                my $j = int rand ($i+1);

                next if $i==$j;

                @$array[$i,$j]=@$array[$j,$i];

}} # end array_shuffle, from Perl Cookbook (rock!)





#################################################################



=pod



=head1 - Function: LW::utils_randstr

  

Params: [ $size, $chars ]

Return: $random_string



This function generates a random string between 10 and 20 characters

long, or of $size if specified.  If $chars is specified, then the

random function picks characters from the supplied string.  For example,

to have a random string of 10 characters, composed of only the characters

'abcdef', then you would run:



LW::utils_randstr(10,'abcdef');



The default character string is alphanumeric.



=cut



sub utils_randstr {

        my $str;

        my $drift=shift||((rand() * 10) % 10)+10; 



	# 'a'..'z' doesn't seem to work on string assignment :(

	my $CHARS = shift || 'abcdefghijklmnopqrstuvwxyz' .

			'ABCDEFGHIJKLMNOPQRSTUVWXYZ' .

			'0123456789';



	my $L = length($CHARS);

        for(1..$drift){

	        $str .= substr($CHARS,((rand() * $L) % $L),1);

	}

        return $str;}



#################################################################



=pod



=head1 - Function: LW::utils_get_dir

  

Params: $uri

Return: $uri_directory



Will take a URI and return the directory base of it, i.e. /rfp/page.php 

will return /rfp/.



=cut



sub utils_get_dir {

        my ($w,$URL)=(0,shift);



	return undef if(!defined $URL);



	$URL=substr($URL,0,$w) if( ($w=index($URL,'#')) >= 0);

	$URL=substr($URL,0,$w) if( ($w=index($URL,'?')) >= 0);



	if( ($w=rindex($URL,'/')) >= 0){

		$URL = substr($URL,0,$w+1);

	}

        return $URL; 

}





#################################################################



=pod



=head1 - Function: LW::utils_port_open

  

Params: $host, $port

Return: $result



Quick function to attempt to make a connection to the given host and

port.  If a connection was successfully made, function will return true

(1).  Otherwise it returns false (0).



Note: this uses standard TCP connections, thus is not recommended for use

in port-scanning type applications.  Extremely slow.



=cut



sub utils_port_open {  # this should be platform-safe

        my ($target,$port)=@_;



	return 0 if(!defined $target || !defined $port);



        if(!(socket(S,PF_INET,SOCK_STREAM,0))){ return 0;}

        if(connect(S,sockaddr_in($port,inet_aton($target)))){

                close(S); return 1;

        } else { return 0;}}





#################################################################



=pod



=head1 - Function: LW::utils_split_uri

  

Params: $uri_string [, \%hin_request]

Return: @uri_parts



Return an array of the following values, in order:  uri, protocol, host,

port, params, frag, user, password.  Values not defined are given an undef

value.  If a %hin_request hash is passed in, then utils_split_uri() will

also set the appropriate values in the hash.  While it attempts to do

RFC-compliant URI parsing, it still caters to HTTP[S] only.



Note:  utils_split_uri() will only set the %hin_request if the protocol

is HTTP or HTTPS!



=cut



sub utils_split_uri {

	my ($uri,$w)=(shift,'',0);

	my ($hr)=shift;

	my @res=(undef,'http',undef,0,undef,undef,undef,undef);



	return undef if(!defined $uri);



	# remove fragments

	($uri,$res[5])=split('#',$uri,2) if(index($uri,'#',0) >=0);



	# get scheme and net_loc

	my $net_loc = undef;

	if($uri=~s/^([-+.a-z0-9A-Z]+)://){

		$res[1]=lc($1);

		if(substr($uri,0,2) eq '//'){

			$w=index($uri,'/',2);

			if($w >= 0){

				$net_loc=substr($uri,2,$w-2);

				$uri=substr($uri,$w,length($uri)-$w);

			} else {

				($net_loc=$uri)=~tr#/##d;

				$uri='/';

			}

		}

	}





	# parse net_loc info

	if(defined $net_loc){

		if(index($net_loc,'@',0) >=0){

			($res[6],$net_loc)=split('@',$net_loc,2);

			if(index($res[6],':',0) >=0){

				($res[6],$res[7])=split(':',$res[6],2);

			}

		}

		$res[3]=$1 if($net_loc=~s/:([0-9]+)$//);

		$res[2]=$net_loc;

	}



	# remove query info

	($uri,$res[4])=split('\?',$uri,2) if(index($uri,'?',0) >=0);



	# whatever is left over is the uri

	$res[0]=$uri;



	if($res[3]==0 && defined $res[1]){

		$res[3]=80 if($res[1] eq 'http');

		$res[3]=443 if($res[1] eq 'https');

	}



	return @res if($res[3]==0);



	# setup whisker hash

	if(defined $hr && ref($hr)){

		# these must always exist

		$$hr{whisker}->{uri}=$res[0] 		if(defined $res[0]);

		$$hr{whisker}->{ssl}=1			if($res[1] eq 'https');

		$$hr{whisker}->{host}=$res[2]		if(defined $res[2]);

		$$hr{whisker}->{port}=$res[3]		;



		# set/delete parameter attributes

		if(defined $res[4]){

			$$hr{whisker}->{uri_param}=$res[4];

		} else { delete $$hr{whisker}->{uri_param}; }

		if(defined $res[6]){

			$$hr{whisker}->{uri_user}=$res[6];

		} else { delete $$hr{whisker}->{uri_user}; }

		if(defined $res[7]){

			$$hr{whisker}->{uri_password}=$res[7];

		} else { delete $$hr{whisker}->{uri_password}; }

	}

		

	return @res;

}



#################################################################

=pod



=head1 - Function: LW::utils_lowercase_headers

  

Params: \%hash

Return: nothing



Will lowercase all the header names (but not values) of the given hash.



=cut



sub utils_lowercase_headers {

	goto &utils_lowercase_hashkeys;

}



#################################################################

=pod



=head1 - Function: LW::utils_lowercase_hashkeys

  

Params: \%hash

Return: nothing



Will lowercase all the header names (but not values) of the given hash.



=cut



sub utils_lowercase_hashkeys {

	my $href=shift;



	return if(!(defined $href && ref($href)));



	while( my ($key,$val)=each %$href ){

		delete $$href{$key};

		$$href{lc($key)}=$val;

	}

}



#################################################################

=pod



=head1 - Function: LW::utils_find_lowercase_key

  

Params: \%hash, $key

Return: $value, undef on error or not exist



Searches the given hash for the $key (regardless of case), and

returns the value.



=cut



sub utils_find_lowercase_key {

	my ($href,$key)=(shift,lc(shift));



	return undef if(!(defined $href && ref($href)));

	return undef if(!defined $key);	



	while( my ($k,$v)=each %$href ){

		return $v if(lc($k) eq $key);

	}

	return undef;

}



#################################################################



=pod



=head1 - Function: LW::utils_join_uri

  

Params: @vals

Return: $url



Takes the @vals array output from utils_split_uri, and returns a single 

scalar/string with them joined again, in the form of:

protocol://user:password@host:port/uri?params#frag



=cut



sub utils_join_uri {

	my @V=@_;

	my $URL;



	$URL.=$V[1].':' if defined $V[1];

	if(defined $V[2]){

		$URL.='//';

		if(defined $V[6]){

			$URL.=$V[6];

			$URL.=':'.$V[7] if defined $V[7];

			$URL.='@';

		}

		$URL.=$V[2];

	}

	if($V[3]>0){

		my $no = 0;

		$no++ if($V[3]==80 && defined $V[1] && $V[1] eq 'http');

		$no++ if($V[3]==443 && defined $V[1] && $V[1] eq 'https');

		$URL .= ':'.$V[3] if(!$no);

	}

	$URL.=$V[0];

	$URL .= '?'.$V[4] if defined $V[4];

	$URL .= '#'.$V[5] if defined $V[5];

	return $URL;

}



#################################################################



=pod



=head1 - Function: LW::utils_getline

  

Params: \$data [, $resetpos ]

Return: $line (undef if no more data)



Fetches the next \n terminated line from the given data.  Use

the optional $resetpos to reset the internal position pointer.

Does *NOT* return trialing \n.



=cut



{ $POS=0;

sub utils_getline {

	my ($dr, $rp)=@_;



	return undef if(!(defined $dr && ref($dr)));

	$POS=$rp if(defined $rp);



	my $where=index($$dr,"\n",$POS);

	return undef if($where==-1);



	my $str=substr($$dr,$POS,$where-$POS);

	$POS=$where+1;



	return $str;

}}



#################################################################



=pod



=head1 - Function: LW::utils_getline_crlf

  

Params: \$data [, $resetpos ]

Return: $line (undef if no more data)



Fetches the next \r\n terminated line from the given data.  Use

the optional $resetpos to reset the internal position pointer.

Does *NOT* return trialing \r\n.



=cut



{ $POS=0;

sub utils_getline_crlf {

	my ($dr, $rp)=@_;



	return undef if(!(defined $dr && ref($dr)));

	$POS=$rp if(defined $rp);



	my $tpos=$POS;

	while(1){

		my $where=index($$dr,"\n",$tpos);

		return undef if($where==-1);



		if(substr($$dr,$where-1,1) eq "\r"){

			my $str=substr($$dr,$POS,$where-$POS-1);

			$POS=$where+1;

			return $str;

		} else {

			$tpos=$where+1;

		}

	}

}}



#################################################################



=pod



=head1 - Function: LW::utils_absolute_uri

  

Params: $uri, $base_uri [, $normalize_flag ]

Return: $absolute_$url



Double checks that the given $uri is in absolute form (that is,

"http://host/file"), and if not (it's in the form "/file"), then

it will append the given $base_uri to make it absolute.  This

provides a compatibility similar to that found in the URI

subpackage.



If $normalize_flag is set to 1, then the output will be passed

through utils_normalize_uri before being returned.



=cut



sub utils_absolute_uri {

        my ($uri, $buri, $norm)=@_;

        return undef if(!defined $uri || !defined $buri);

	return $uri if($uri=~m#^[a-zA-Z]+://#);



	if(substr($uri,0,1) eq '/'){

		if($buri=~m#^[a-zA-Z]+://#){

			my @p=utils_split_uri($buri);

			$buri="$p[1]://$p[2]";

			$buri.=":$p[3]" if($p[3]!=80);

			$buri.='/';

		} else { # ah suck, base URI isn't absolute...

			return $uri;

		}

	} else {

		$buri=~s/[?#].*$//; # remove params and frags

		$buri.='/' if($buri=~m#^[a-z]+://[^/]+$#i);

		$buri=~s#/[^/]*$#/#;

	}

	return utils_normalize_uri("$buri$uri") 

		if(defined $norm && $norm > 0);

        return $buri.$uri;

}



#################################################################



=pod



=head1 - Function: LW::utils_normalize_uri

  

Params: $uri [, $fix_windows_slashes ]

Return: $normalized_uri



Takes the given $uri and does any /./ and /../ dereferencing in

order to come up with the correct absolute URL.  If the $fix_

windows_slashes parameter is set to 1, all \ (back slashes) will

be converted to / (forward slashes).



=cut



sub utils_normalize_uri {

	my ($host,$uri, $win)=('',@_);



	$uri=~tr#\\#/# if(defined $win && $win>0);



	if($uri=~s#^([-+.a-z0-9A-Z]+:)##){

		return undef if($1 ne 'http:' && $1 ne 'https:');

		$host=$1;

		return undef unless($uri=~s#^(//[^/]+)##);

		$host.=$1; }

	return "$host/" if($uri eq '' || $uri eq '/');



	# fast path check

	return "$host$uri" if(index($uri,'/.')<0);



	# parse order/steps as defined in RFC 1808

	1 while($uri=~s#/\./#/# || $uri=~s#//#/#);

	$uri=~s#/\.$#/#;

	1 while($uri=~s#[^/]+/\.\./##);

	1 while($uri=~s#^/\.\./#/#);

	$uri=~s#[^/]*/\.\.$##;

	$uri||='/';

	return $host.$uri;

}



#################################################################



=pod



=head1 - Function: LW::utils_save_page

  

Params: $file, \%response

Return: 0 on success, 1 on error



Saves the data portion of the given whisker %response hash to the

indicated file.  Can technically save the data portion of a

%request hash too.  A file is not written if there is no data.



Note: LW does not do any special file checking; files are opened

in overwrite mode.



=cut



sub utils_save_page {

	my ($file, $hr)=@_;

	return 1 if(!ref($hr) || ref($file));

	return 0 if(!defined $$hr{'whisker'} || 

		!defined $$hr{'whisker'}->{'data'});

	open(OUT,">$file") || return 1;

	print OUT $$hr{'whisker'}->{'data'};

	close(OUT);

	return 0;

}



#################################################################



=pod



=head1 - Function: LW::utils_getopts

  

Params: $opt_str, \%opt_results

Return: 0 on success, 1 on error



This function is a general implementation of GetOpts::Std.  It will

parse @ARGV, looking for the options specified in $opt_str, and will

put the results in %opt_results.  Behavior/parameter values are

similar to GetOpts::Std's getopts().



Note: this function does *not* support long options (--option),

option grouping (-opq), or options with immediate values (-ovalue).

If an option is indicated as having a value, it will take the next

argument regardless.



=cut



sub utils_getopts {

        my ($str,$ref)=@_;

        my (%O,$l);

        my @left;



        return 1 if($str=~tr/-:a-zA-Z0-9//c);



        while($str=~m/([a-z0-9]:{0,1})/ig){

                $l=$1;

                if($l=~tr/://d){        $O{$l}=1;

                } else {                $O{$l}=0; }

        }



        while($l=shift(@ARGV)){

                push(@left,$l)&&next if(substr($l,0,1) ne '-');

                push(@left,$l)&&next if($l eq '-');

                substr($l,0,1)='';

                if(length($l)!=1){

                        %$ref=();

                        return 1; }

                if($O{$l}==1){

                        my $x=shift(@ARGV);

                        $$ref{$l}=$x;

                } else { $$ref{$l}=1; }

        }



        @ARGV=@left;

        return 0;

}



#################################################################



=pod



=head1 - Function: LW::utils_unidecode_uri

  

Params: $unicode_string

Return: $decoded_string



This function attempts to decode a unicode (UTF-8) string by

converting it into a single-byte-character string.  Overlong 

characters are converted to their standard characters in place; 

non-overlong (aka multi-byte) characters are substituted with the 

0xff; invalid encoding characters are left as-is.



Note: this function is useful for dealing with the various unicode

exploits/vulnerabilities found in web servers; it is *not* good for

doing actual UTF-8 parsing, since characters over a single byte are

basically dropped/replaced with a placeholder.



=cut



sub utils_unidecode_uri {

        my $str = $_[0];

        return $str if($str!~tr/!-~//c); # fastpath

        my ($lead,$count,$idx);

        my $out='';

        my $len = length($str);

        my ($ptr,$no,$nu)=(0,0,0);



        while($ptr < $len){

                my $c=substr($str,$ptr,1);

                if( ord($c) >= 0xc0 && ord($c) <= 0xfd){

                        $count=0;

                        $c=ord($c)<<1;

                        while( ($c & 0x80) == 0x80){

                                $c<<=1;

                                last if($count++ ==4);

                        }

                        $c = ($c & 0xff);

                        for( $idx=1; $idx<$count; $idx++){

                                my $o=ord(substr($str,$ptr+$idx,1));

                                $no=1 if($o != 0x80);

                                $nu=1 if($o <0x80 || $o >0xbf);

                        }

                        my $o=ord(substr($str,$ptr+$idx,1));

                        $nu=1 if( $o < 0x80 || $o > 0xbf);

                        if($nu){

                                $out.=substr($str,$ptr++,1);

                        } else {

                                if($no){

                                        $out.="\xff"; # generic replacement char

                                } else {

                                        my $prior=ord(substr($str,$ptr+$count-1,1))<<6;

                                        $out.= pack("C", (ord(substr($str,$ptr+$count,1) )&0x7f)+$prior);

                                }

                                $ptr += $count+1;

                        }

                        $no=$nu=0;

                } else {

                        $out.=$c;

                        $ptr++;

                }

        }

        return $out;

}



#################################################################



=pod



=head1 - Function: LW::utils_text_wrapper

  

Params: $long_text_string [, $crlf, $width ]

Return: $formatted_test_string



This is a simple function used to format a long line of text for

display on a typical limited-character screen, such as a unix

shell console.



$crlf defaults to "\n", and $width defaults to 76.



=cut



sub utils_text_wrapper {

        my ($out,$w,$str,$crlf,$width)=('',0,@_);

	$crlf||="\n";	$width||=76;

        $str.=$crlf if($str!~/$crlf$/);

        return $str if(length($str)<=$width);

        while(length($str)>$width){

                my $w1=rindex($str,' ',$width);

                my $w2=rindex($str,"\t",$width);

                if($w1>$w2){ $w=$w1; } else { $w=$w2; }

                if($w==-1){	$w=$width;

	        } else {	substr($str,$w,1)=''; }

                $out.=substr($str,0,$w,'');

                $out.=$crlf;

        }

        return $out.$str;

}



#################################################################



1;
