/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		Implementation of the Transmission Control Protocol(TCP).
 *
 * Version:	@(#)tcp.c	1.0.16	05/25/93
 *
 * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *		Mark Evans, <evansmp@uhura.aston.ac.uk>
 *		Corey Minyard <wf-rch!minyard@relay.EU.net>
 *		Florian La Roche, <flla@stud.uni-sb.de>
 *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
 *		Linus Torvalds, <torvalds@cs.helsinki.fi>
 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
 *		Matthew Dillon, <dillon@apollo.west.oic.com>
 *		Arnt Gulbrandsen, <agulbra@no.unit.nvg>
 *
 * Fixes:	
 *		Alan Cox	:	Numerous verify_area() calls
 *		Alan Cox	:	Set the ACK bit on a reset
 *		Alan Cox	:	Stopped it crashing if it closed while sk->inuse=1
 *					and was trying to connect (tcp_err()).
 *		Alan Cox	:	All icmp error handling was broken
 *					pointers passed where wrong and the
 *					socket was looked up backwards. Nobody
 *					tested any icmp error code obviously.
 *		Alan Cox	:	tcp_err() now handled properly. It wakes people
 *					on errors. select behaves and the icmp error race
 *					has gone by moving it into sock.c
 *		Alan Cox	:	tcp_reset() fixed to work for everything not just
 *					packets for unknown sockets.
 *		Alan Cox	:	tcp option processing.
 *		Alan Cox	:	Reset tweaked (still not 100%) [Had syn rule wrong]
 *		Herp Rosmanith  :	More reset fixes
 *		Alan Cox	:	No longer acks invalid rst frames. Acking
 *					any kind of RST is right out.
 *		Alan Cox	:	Sets an ignore me flag on an rst receive
 *					otherwise odd bits of prattle escape still
 *		Alan Cox	:	Fixed another acking RST frame bug. Should stop
 *					LAN workplace lockups.
 *		Alan Cox	: 	Some tidyups using the new skb list facilities
 *		Alan Cox	:	sk->keepopen now seems to work
 *		Alan Cox	:	Pulls options out correctly on accepts
 *		Alan Cox	:	Fixed assorted sk->rqueue->next errors
 *		Alan Cox	:	PSH doesn't end a TCP read. Switched a bit to skb ops.
 *		Alan Cox	:	Tidied tcp_data to avoid a potential nasty.
 *		Alan Cox	:	Added some beter commenting, as the tcp is hard to follow
 *		Alan Cox	:	Removed incorrect check for 20 * psh
 *	Michael O'Reilly	:	ack < copied bug fix.
 *	Johannes Stille		:	Misc tcp fixes (not all in yet).
 *		Alan Cox	:	FIN with no memory -> CRASH
 *		Alan Cox	:	Added socket option proto entries. Also added awareness of them to accept.
 *		Alan Cox	:	Added TCP options (SOL_TCP)
 *		Alan Cox	:	Switched wakeup calls to callbacks, so the kernel can layer network sockets.
 *		Alan Cox	:	Use ip_tos/ip_ttl settings.
 *		Alan Cox	:	Handle FIN (more) properly (we hope).
 *		Alan Cox	:	RST frames sent on unsynchronised state ack error/
 *		Alan Cox	:	Put in missing check for SYN bit.
 *		Alan Cox	:	Added tcp_select_window() aka NET2E 
 *					window non shrink trick.
 *		Alan Cox	:	Added a couple of small NET2E timer fixes
 *		Charles Hedrick :	TCP fixes
 *		Toomas Tamm	:	TCP window fixes
 *		Alan Cox	:	Small URG fix to rlogin ^C ack fight
 *		Charles Hedrick	:	Rewrote most of it to actually work
 *		Linus		:	Rewrote tcp_read() and URG handling
 *					completely
 *		Gerhard Koerting:	Fixed some missing timer handling
 *		Matthew Dillon  :	Reworked TCP machine states as per RFC
 *		Gerhard Koerting:	PC/TCP workarounds
 *		Adam Caldwell	:	Assorted timer/timing errors
 *		Matthew Dillon	:	Fixed another RST bug
 *		Alan Cox	:	Move to kernel side addressing changes.
 *		Alan Cox	:	Beginning work on TCP fastpathing (not yet usable)
 *		Arnt Gulbrandsen:	Turbocharged tcp_check() routine.
 *		Alan Cox	:	TCP fast path debugging
 *		Alan Cox	:	Window clamping
 *		Michael Riepe	:	Bug in tcp_check()
 *		Gerhard Koerting:	TCP close without route stuck socket bug.
 *		Alan Cox	:	New sk_buffs and support.
 *		Alan Cox	:	BSD select on error fix.
 *		Alan Cox	:	tcp broadcast connect fix.
 *		Alan Cox	:	Binding support.
 *		Alan Cox	:	Redid connect() and state changes to fix
 *					connect to be BSD, and SNMP to count.
 *		Matt Dillon     :	More TCP improvements and RST bug fixes.
 *		Matt Dillon	:	Yet more nasties removed from the TCP code.
 *		Alan Cox	:	BSD accept semantics.
 *	Peter De Schrijver	:	ENOTCONN check missing in tcp_sendmsg().
 *		Alan Cox	:	Reset on closedown bug.
 *		Michael Pall	:	Handle select() after URG properly in all cases.
 *		Michael Pall	:	Undo the last fix in tcp_read_urg() (multi URG PUSH broke rlogin).
 *		Michael Pall	:	Fix the multi URG PUSH problem in tcp_readable(), select() after URG works now.
 *		Michael Pall	:	recv(...,MSG_OOB) never blocks in the BSD api.
 *		Alan Cox	:	Changed the semantics of sk->socket to 
 *					fix a race and a signal problem with
 *					accept() and async I/O.
 *		Alan Cox	:	Relaxed the rules on tcp_sendto().
 *
 *
 * To Fix:
 *
 *			Fast path the code. Two things here - fix the window calculation
 *		so it doesn't iterate over the queue, also spot packets with no funny
 *		options arriving in order and process directly.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or(at your option) any later version.
 *
 * Description of States:
 *
 *	TCP_SYN_SENT		sent a connection request, waiting for ack
 *
 *	TCP_SYN_RECV		received a connection request, sent ack,
 *				waiting for final ack in three-way handshake.
 *
 *	TCP_ESTABLISHED		connection established
 *
 *	TCP_FIN_WAIT1		our side has shutdown, waiting to complete
 *				transmission of remaining buffered data
 *
 *	TCP_FIN_WAIT2		all buffered data sent, waiting for remote
 *				to shutdown
 *
 *	TCP_CLOSING		both sides have shutdown but we still have
 *				data we have to finish sending
 *
 *	TCP_TIME_WAIT		timeout to catch resent junk before entering
 *				closed, can only be entered from FIN_WAIT2
 *				or CLOSING.  Required because the other end
 *				may not have gotten our last ACK causing it
 *				to retransmit the data packet (which we ignore)
 *
 *	TCP_CLOSE_WAIT		remote side has shutdown and is waiting for
 *				us to finish writing our data and to shutdown
 *				(we have to close() to move on to LAST_ACK)
 *
 *	TCP_LAST_ACK		our side has shutdown after remote has
 *				shutdown.  There may still be data in our
 *				buffer that we have to finish sending
 *		
 *	TCP_CLOSE		socket is finished
 */
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/termios.h>
#include <linux/in.h>
#include <linux/fcntl.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include "snmp.h"
#include "ip.h"
#include "protocol.h"
#include "icmp.h"
#include "tcp.h"
#include <linux/skbuff.h>
#include "sock.h"
#include "route.h"
#include <linux/errno.h>
#include <linux/timer.h>
#include <asm/system.h>
#include <asm/segment.h>
#include <linux/mm.h>
#include <linux/netprotocol.h>

/*#define TCP_FASTPATH*/

struct protocol proto_tcp;
static struct protocol *pr_icmp;

#define SEQ_TICK 3
unsigned long seq_offset;
struct tcp_mib	tcp_statistics;

#ifdef TCP_FASTPATH
unsigned long tcp_rx_miss=0, tcp_rx_hit1=0, tcp_rx_hit2=0;
#endif


static __inline__ int min(unsigned int a, unsigned int b)
{
	if (a < b) 
		return(a);
	return(b);
}

#undef STATE_TRACE

static __inline__ void tcp_set_state(struct sock *sk, int state)
{
	if(sk->state==TCP_ESTABLISHED)
		tcp_statistics.TcpCurrEstab--;
#ifdef STATE_TRACE
	if(sk->debug)
		printk("TCP sk=%s, State %d -> %d\n",sk, sk->state,state);
#endif	
	sk->state=state;
	if(state==TCP_ESTABLISHED)
		tcp_statistics.TcpCurrEstab++;
}

/*
 *	When we queue a frame for retransmission we have to firstly mark it 'not to be freed'
 *	and secondly to append it to the retransmission list. Each frame in the queue has
 *	sk->h.seq holding the rightmost edge sequence number of the frame. 
 *
 */

static void tcp_queue_retransmit(struct sock *sk, sk_buff *skb)
{
	unsigned long flags;
	skb->free=0;
	
	/* The socket now has more outstanding blocks */
		
	sk->packets_out++;
		
	/* Protect the list for a moment */
	save_flags(flags);
	cli();
		
	if (skb->link3 != NULL) 
	{
		printk("tcp_queue_retransmit: link3 != NULL\n");
		skb->link3 = NULL;
	}
	if (sk->send_head == NULL) 
	{
		sk->send_tail = skb;
		sk->send_head = skb;
	}
	else 
	{
		sk->send_tail->link3 = skb;
		sk->send_tail = skb;
	}
	/* skb->link3 is NULL */

	/* Interrupt restore */
	restore_flags(flags);
	/* Set the IP write timeout to the round trip time for the packet.
	   If an acknowledge has not arrived by then we may wish to act */	
	reset_timer(sk, TIME_WRITE, sk->rto);
} 


/* This routine picks a TCP windows for a socket based on
   the following constraints
   
   1. The window can never be shrunk once it is offered (RFC 793)
   2. We limit memory per socket
   
   For now we use NET2E3's heuristic of offering half the memory
   we have handy. All is not as bad as this seems however because
   of two things. Firstly we will bin packets even within the window
   in order to get the data we are waiting for into the memory limit.
   Secondly we bin common duplicate forms at receive time
   
   Better heuristics welcome
*/
   
int tcp_select_window(struct sock *sk)
{
	int new_window = sock_rspace(sk);
	
	if(sk->window_clamp)
		new_window=min(sk->window_clamp,new_window);
/*
 * two things are going on here.  First, we don't ever offer a
 * window less than min(sk->mss, MAX_WINDOW/2).  This is the
 * receiver side of SWS as specified in RFC1122.
 * Second, we always give them at least the window they
 * had before, in order to avoid retracting window.  This
 * is technically allowed, but RFC1122 advises against it and
 * in practice it causes trouble.
 */
	if (new_window < min(sk->mss, MAX_WINDOW/2) || new_window < sk->window)
		return(sk->window);
	return(new_window);
}

/*
 *	Find someone to 'accept'. Must be called with
 *	sk->inuse=1 or cli(). A suitable connection must
 *	be at least ESTABLISHED and in the queue.
 */ 

static sk_buff *tcp_find_established(struct sock *s)
{
	sk_buff *p=skb_peek(&s->receive_queue);
	if(p==NULL)
		return NULL;
	do
	{
		if(p->sk->state==TCP_ESTABLISHED || p->sk->state>=TCP_FIN_WAIT1)
			return p;
		p=p->next;
	}
	while(p!=skb_peek(&s->receive_queue));
	return NULL;
}

/*
 *	Find a connection to accept. The finding is done by
 *	the routine above. We merely unlink the entry from the
 *	list.
 */
 
static sk_buff *tcp_dequeue_established(struct sock *s)
{
	sk_buff *skb;
	unsigned long flags;
	save_flags(flags);
	cli(); 
	skb=tcp_find_established(s);
	if(skb!=NULL)
		skb_unlink(skb);	/* Take it off the queue */
	restore_flags(flags);
	return skb;
}

/*
 *	Enter the time wait state.  This consists of waiting the correct time
 *	period and then moving to the CLOSED state. 
 */

static void tcp_time_wait(struct sock *sk)
{
	tcp_set_state(sk,TCP_TIME_WAIT);
	sk->shutdown = SHUTDOWN_MASK;
	if (!sk->dead)
		sk->state_change(sk);
	reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
}

/*
 *	A socket has timed out on its send queue and wants to do a
 *	little retransmitting. Currently this means TCP. We may want
 *	to resend one frame or lots. The 'all' flag controls this.
 */

static void tcp_do_retransmit(struct sock *sk, int all)
{
  	sk_buff * skb;
  	struct proto *prot;
  	struct device *dev;

	prot = sk->prot;
	skb = sk->send_head;
	
	while (skb != NULL) 
	{
		struct tcphdr *th;
		int size;

		/* FIXME: With the new drivers we can change route over
		   retransmissions */
		dev = skb->dev;
		IS_SKB(skb);
		skb->when = jiffies;

		/* 
		 * In general it's OK just to use the old packet.  However we
		 * need to use the current ack and window fields.  Urg and 
		 * urg_ptr could possibly stand to be updated as well, but we 
		 * don't keep the necessary data.  That shouldn't be a problem,
		 * if the other end is doing the right thing.  Since we're 
		 * changing the packet, we have to issue a new IP identifier.
		 *
		 * When the left edge send code is fixed this needs to adjust the
		 * length of any frame that is currently being partially sent due
		 * to lack of window space.
		 */

		size=((unsigned char *)skb->thptr)-((unsigned char *)skb_data(skb));
		
		/*
		 *	Rip any old headers off and go back to the tcp header.
		 */
		 
		skb_pull(skb, size, NULL);
		th=skb->thptr;
		if(th!=(struct tcphdr *)skb_data(skb))
			printk("rexmit botch\n");
		th->ack_seq = ntohl(sk->acked_seq);
		th->window = ntohs(tcp_select_window(sk));
		tcp_send_check(th, sk->saddr, sk->daddr, skb->len, sk);

		/*
		 *	If the packet is still being sent by the device/protocol
		 *	below then don't retransmit. This is both needed, and good -
		 *	especially with connected mode AX.25 where it stops resends
		 *	occuring of an as yet unsent anyway frame!
		 *	We still add up the counts as the round trip time wants
		 *	adjusting.
		 */
		 
		if (sk && !skb_device_locked(skb))
		{
			/* 
			 *	Remove it from any existing driver queue first!
			 */
			skb_unlink(skb);
			/* 
			 *	Stick an IP header on it and post it 
			 */
			ip_statistics.IpOutRequests++;
			proto_ip.output(&proto_ip, skb, ETH_P_IP, IPPROTO_TCP, &sk->saddr, &sk->daddr, &sk->opt.ip);
		}
		
		/*
		 *	Count retransmissions
		 */
		 
		sk->retransmits++;
		sk->prot->retransmits ++;
		
		/*
		 *	Only one retransmit requested.
		 */

		if (!all) 
			break;

		/*
		 *	This should cut it off before we send too many packets. 
		 */

		if (sk->retransmits >= sk->cong_window) 
			break;
		skb = skb->link3;
	}
}


/*
 * 	This is the normal code called for timeouts.  It does the retransmission
 * 	and then does backoff.  tcp_do_retransmit is separated out because
 * 	tcp_ack needs to send stuff from the retransmit queue without
 * 	initiating a backoff.
 */

static void tcp_retransmit_skb(struct sock *sk, int all)
{
  	tcp_do_retransmit(sk, all);

  	/*
  	 * Increase the timeout each time we retransmit.  Note that
  	 * we do not increase the rtt estimate.  rto is initialized
  	 * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
  	 * that doubling rto each time is the least we can get away with.
  	 * In KA9Q, Karn uses this for the first few times, and then
  	 * goes to quadratic.  netBSD doubles, but only goes up to *64,
  	 * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
  	 * defined in the protocol as the maximum possible RTT.  I guess
  	 * we'll have to use something other than TCP to talk to the
   	 * University of Mars.
   	 */

	sk->retransmits++;
	sk->backoff++;
	sk->rto = min(sk->rto << 1, 120*HZ);
	reset_timer(sk, TIME_WRITE, sk->rto);
}

/*
 *	A timer event has trigger a tcp retransmit timeout. The
 *	socket xmit queue is ready and set up to send. Because
 *	the ack receive code keeps the queue straight we do
 *	nothing clever here.
 */


static void tcp_retransmit(struct sock *sk, int all)
{
	if (all) 
	{
		tcp_retransmit_skb(sk, all);
		return;
	}

	sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
	/* sk->ssthresh in theory can be zero.  I guess that's OK */
	sk->cong_count = 0;

	sk->cong_window = 1;

	/* Do the actual retransmit. */
	tcp_retransmit_skb(sk, all);
}


/*
 * This routine is called by the ICMP module when it gets some
 * sort of error condition.  
 */

void tcp_err(sk_buff *skb)
{
	struct icmphdr *icmph=(struct icmphdr *)skb->h.raw;
	struct iphdr *iph=(struct iphdr *)skb_pull(skb,sizeof(*iph),NULL);
	struct tcphdr *th;
	struct sock *sk;

	/* 
	 *	Find the reflected tcp header and look up the ports 
	 */  
	 
	skb_pull(skb, (4*iph->ihl)-sizeof(*iph),NULL);	/* Options */
	
	/* 
	 *	Passed header space 
	 */
	 
	th=(struct tcphdr *)skb_pull(skb,sizeof(*th), NULL);
	
	
	/*
	 *	Find the socket in question (if one exists) 
	 */
	 
	sk = get_sock(&tcp_prot, th->source, iph->daddr, th->dest, iph->saddr);
	if (sk == NULL) 
		return;
		
	if (icmph->type == ICMP_TIME_EXCEEDED)
		return;	/* Count it as lost */

	/*
	 *	Quench backoff 
	 */  
	 
	if (icmph->type == ICMP_SOURCE_QUENCH) 
	{
		/*
		 * FIXME:
		 * For now we will just trigger a linear backoff.
		 * The slow start code should cause a real backoff here.
		 */
		if (sk->cong_window > 4)
			sk->cong_window--;
		return;
	}

#ifdef FAVOUR_RFC1122_NOT_BROKEN_BSD
	sk->err = icmp_err_convert[icmph->code].errno;  /* TCP should not hide non fatals internally (as BSD does) */
#endif	

	/*
	 * If we've already connected we will keep trying
	 * until we time out, or the user gives up.
	 */

	if (icmp_err_convert[icmph->code].fatal ||  sk->state==TCP_SYN_SENT) 
	{
		if (sk->state == TCP_SYN_SENT) 
		{
			tcp_statistics.TcpAttemptFails++;
			tcp_set_state(sk, TCP_CLOSE);
			sk->error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
		}
		sk->err = icmp_err_convert[icmph->code].errno;		
	}
	return;
}


/*
 *	Walk down the receive queue counting readable data until we hit the end or we find a gap
 *	in the received data queue (ie a frame missing that needs sending to us)
 */

static int tcp_readable(struct sock *sk)
{
	unsigned long counted;
	unsigned long amount;
	sk_buff *skb;
	int sum;
	unsigned long flags;

	if(sk && sk->debug)
	  	printk("tcp_readable: %p - ",sk);

	save_flags(flags);
	cli();
	if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
	{
		restore_flags(flags);
	  	if(sk && sk->debug) 
	  		printk("empty\n");
	  	return(0);
	}
  
	counted = sk->copied_seq+1;	/* Where we are at the moment */
	amount = 0;
  
	/* Do until a push or until we are out of data. */
	do 
	{
		if (before(counted, skb->h.th->seq)) 	/* Found a hole so stops here */
			break;
		sum = skb->len -(counted - skb->h.th->seq);	/* Length - header but start from where we are up to (avoid overlaps) */
		if (skb->h.th->syn)
			sum++;
		if (sum > 0) 
		{					/* Add it up, move on */
			amount += sum;
			if (skb->h.th->syn) 
				amount--;
			counted += sum;
		}
		/*
		 * Don't count urg data ... but do it in the right place!
		 * Consider: "old_data (ptr is here) URG PUSH data"
		 * The old code would stop at the first push because
 		 * it counted the urg (amount==1) and then does amount--
		 * *after* the loop.  This means tcp_readable() always
		 * returned zero if any URG PUSH was in the queue, even
		 * though there was normal data available. If we subtract
		 * the urg data right here, we even get it to work for more
		 * than one URG PUSH skb without normal data.
		 * This means that select() finally works now with urg data
		 * in the queue.  Note that rlogin was never affected
		 * because it doesn't use select(); it uses two processes
		 * and a blocking read().  And the queue scan in tcp_read()
		 * was correct.  Mike <pall@rz.uni-karlsruhe.de>
		 */
		if (skb->h.th->urg)
			amount--;	/* don't count urg data */
		if (amount && skb->h.th->psh) 
			break;
		skb = skb->next;
	}
	while(skb != (sk_buff *)&sk->receive_queue);

	restore_flags(flags);
	if(sk->debug)
	  	printk("got %lu bytes.\n",amount);
	return(amount);
}


/*
 *	Wait for a TCP event. Note the oddity with SEL_IN and reading. The
 *	listening socket has a receive queue of sockets to accept.
 */

static int tcp_select(struct sock *sk, int sel_type, select_table *wait)
{
	sk->inuse = 1;

	switch(sel_type) 
	{
		case SEL_IN:
			select_wait(sk->sleep, wait);
			if (skb_peek(&sk->receive_queue) != NULL) 
			{
				if ((sk->state == TCP_LISTEN && tcp_find_established(sk))|| tcp_readable(sk)) 
				{
					release_sock(sk);
					return(1);
				}
			}
			if (sk->err != 0)	/* Receiver error */
			{
				release_sock(sk);
				return(1);
			}
			if (sk->shutdown & RCV_SHUTDOWN) 
			{
				release_sock(sk);
				return(1);
			} 
			release_sock(sk);
			return(0);

		case SEL_OUT:
			select_wait(sk->sleep, wait);
			if (sk->shutdown & SEND_SHUTDOWN) 
			{
				/* FIXME: should this return an error? */
				release_sock(sk);
				return(0);
			}

			/*
			 *	Is there room to write ?
			 */
			
			if (sock_wspace(sk) >= sk->mtu + 128 + sk->prot->max_header) 
			{
				release_sock(sk);
				/* This should cause connect to work ok. */
				if (sk->state == TCP_SYN_RECV ||
				    sk->state == TCP_SYN_SENT) return(0);
				return(1);
			}
			if(sk->err!=0)
			{
				release_sock(sk);
				return 1;
			}
			
			release_sock(sk);
			return(0);
		case SEL_EX:
			select_wait(sk->sleep,wait);
			if (sk->err || sk->urg_data) 
			{
				release_sock(sk);
				return(1);
			}
			release_sock(sk);
			return(0);
 	}

 	release_sock(sk);
 	return(0);
}


int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
	int err;
	switch(cmd) 
	{

		case TIOCINQ:
#ifdef FIXME	/* FIXME: */
		case FIONREAD:
#endif
		{
			unsigned long amount;

			if (sk->state == TCP_LISTEN) 
				return(-EINVAL);

			sk->inuse = 1;
			amount = tcp_readable(sk);
			release_sock(sk);
			err=verify_area(VERIFY_WRITE,(void *)arg,
						   sizeof(unsigned long));
			if(err)
				return err;
			put_fs_long(amount,(unsigned long *)arg);
			return(0);
		}
		case SIOCATMARK:
		{
			int answ = sk->urg_data && sk->urg_seq == sk->copied_seq+1;

			err = verify_area(VERIFY_WRITE,(void *) arg,
						  sizeof(unsigned long));
			if (err)
				return err;
			put_fs_long(answ,(int *) arg);
			return(0);
		}
		case TIOCOUTQ:
		{
			unsigned long amount;

			if (sk->state == TCP_LISTEN) return(-EINVAL);
			amount = sock_wspace(sk);
			err=verify_area(VERIFY_WRITE,(void *)arg,
						   sizeof(unsigned long));
			if(err)
				return err;
			put_fs_long(amount,(unsigned long *)arg);
			return(0);
		}
		default:
			return(-EINVAL);
	}
}


/*
 *	This routine computes a TCP checksum. 
 */
 
unsigned short tcp_check(struct tcphdr *th, int len,
	  unsigned long saddr, unsigned long daddr)
{     
	unsigned long sum;
   
	if (saddr == 0) 
		saddr = ip_my_addr();

/*
 * stupid, gcc complains when I use just one __asm__ block,
 * something about too many reloads, but this is just two
 * instructions longer than what I want
 */
	__asm__("
	    addl %%ecx, %%ebx
	    adcl %%edx, %%ebx
	    adcl $0, %%ebx
	    "
	: "=b"(sum)
	: "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
	: "bx", "cx", "dx" );
	__asm__("
	    movl %%ecx, %%edx
	    cld
	    cmpl $32, %%ecx
	    jb 2f
	    shrl $5, %%ecx
	    clc
1:	    lodsl
	    adcl %%eax, %%ebx
	    lodsl
	    adcl %%eax, %%ebx
	    lodsl
	    adcl %%eax, %%ebx
	    lodsl
	    adcl %%eax, %%ebx
	    lodsl
	    adcl %%eax, %%ebx
	    lodsl
	    adcl %%eax, %%ebx
	    lodsl
	    adcl %%eax, %%ebx
	    lodsl
	    adcl %%eax, %%ebx
	    loop 1b
	    adcl $0, %%ebx
	    movl %%edx, %%ecx
2:	    andl $28, %%ecx
	    je 4f
	    shrl $2, %%ecx
	    clc
3:	    lodsl
	    adcl %%eax, %%ebx
	    loop 3b
	    adcl $0, %%ebx
4:	    movl $0, %%eax
	    testw $2, %%dx
	    je 5f
	    lodsw
	    addl %%eax, %%ebx
	    adcl $0, %%ebx
	    movw $0, %%ax
5:	    test $1, %%edx
	    je 6f
	    lodsb
	    addl %%eax, %%ebx
	    adcl $0, %%ebx
6:	    movl %%ebx, %%eax
	    shrl $16, %%eax
	    addw %%ax, %%bx
	    adcw $0, %%bx
	    "
	: "=b"(sum)
	: "0"(sum), "c"(len), "S"(th)
	: "ax", "bx", "cx", "dx", "si" );

  	/* We only want the bottom 16 bits, but we never cleared the top 16. */
  
  	return((~sum) & 0xffff);
}



void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
		unsigned long daddr, int len, struct sock *sk)
{
	th->check = 0;
	th->check = tcp_check(th, len, saddr, daddr);
	return;
}


/*
 *	Send an sk_buff. This has to make the decision whether to put the packet onto
 *	the write queue or the retransmit queue (if it can be sent immediately)
 */

static void tcp_send_skb(struct sock *sk, sk_buff *skb)
{
	int size;
	struct tcphdr * th = skb->h.th;

	/*
	 *	Length of packet (not counting length of pre-tcp headers) 
	 */
	 
	size = skb->len - ((unsigned char *) th - skb_data(skb));

	/*
	 *	Sanity check it.. some old stacks explode if they receive an ackless, controlless dataless frame &
	 *	we get stuck if we try to send one. We also don't want to send under or over sized 'accidents'
	 */
	 
	if (size < sizeof(struct tcphdr) || size > skb->len) 
	{
		printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
			skb, skb_data(skb), th, skb->len);
		kfree_skb(skb, FREE_WRITE);
		return;
	}

	/*
	 *	If we have queued a header size packet.. 
	 */
	 
	if (size == sizeof(struct tcphdr)) 
	{
		/* If its got a syn or fin its notionally included in the size..*/
		if(!th->syn && !th->fin) 
		{
			printk("tcp_send_skb: attempt to queue a bogon.\n");
			kfree_skb(skb,FREE_WRITE);
			return;
		}
	}

	tcp_statistics.TcpOutSegs++;  

	/*
	 *	Compute the right hand edge of this frame. FIXME: Should this check/count FIN ?
	 */
	 
	skb->h.seq = ntohl(th->seq) + size - 4*th->doff;
	
	/*
	 *	We cannot send now if the right hand edge of the frame is out of the receiver window
	 *	or we are retransmitting old frames or we have too many frames 'in flight'.
	 *
	 */
	
	if (after(skb->h.seq, sk->window_seq) ||
	    (sk->retransmits && sk->timeout == TIME_WRITE) ||
	     sk->packets_out >= sk->cong_window) 
	{
		/*
		 *	checksum will be supplied by tcp_write_xmit.  So we shouldn't need to 
		 *	set it at all.  I'm being paranoid 
		 */
		th->check = 0;
		if (skb->next != NULL) 
		{
			printk("tcp_send_partial: next != NULL\n");
			skb_unlink(skb);
		}
		skb_queue_tail(&sk->write_queue, skb);
		
		/*
		 *	If there is no space to send the frame TCP moves into probing mode. We force a regular
		 *	ack return to use so that we find out when the window opens. See the TCP specification
		 *	for the explanation of why this is neccessary.
		 */
		 
		if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
		    sk->send_head == NULL &&
		    sk->ack_backlog == 0)
			reset_timer(sk, TIME_PROBE0, sk->rto);
	} 
	else 
	{
		/*
		 *	If the frame fits send it with a current ack sequence report, the current window offering
		 *	and a checksum. Update sent_seq so we know what consitutes a valid ack frame.
		 */
		 
		th->ack_seq = ntohl(sk->acked_seq);
		th->window = ntohs(tcp_select_window(sk));

		tcp_send_check(th, sk->saddr, sk->daddr, size, sk);

		sk->sent_seq = sk->write_seq;
		tcp_queue_retransmit(sk,skb);	/* It will need retransmission on error */
		proto_ip.output(&proto_ip, skb, ETH_P_IP, IPPROTO_TCP, &sk->saddr, &sk->daddr, &sk->opt.ip);
	}
}

/*
 *	Myltiple TCP writers can cause several partial frames to exist. After a timeout we need to empty the partial
 *	queue into the output queue (or send them). This is simply a case of feeding the entire queue through 
 *	tcp_send_skb().
 */
 
sk_buff * tcp_dequeue_partial(struct sock * sk)
{
	sk_buff * skb;
	unsigned long flags;

	save_flags(flags);
	cli();
	skb = sk->partial;
	if (skb) 
	{
		sk->partial = NULL;
		del_timer(&sk->partial_timer);
	}
	restore_flags(flags);
	return skb;
}

static void tcp_send_partial(struct sock *sk)
{
	sk_buff *skb;

	if (sk == NULL)
		return;
	while ((skb = tcp_dequeue_partial(sk)) != NULL)
		tcp_send_skb(sk, skb);
}

/*
 *	Add an entry to the partial queue. Restart the timer. 
 */
 
void tcp_enqueue_partial(sk_buff * skb, struct sock * sk)
{
	sk_buff * tmp;
	unsigned long flags;

	save_flags(flags);
	cli();
	tmp = sk->partial;
	if (tmp)
		del_timer(&sk->partial_timer);
	sk->partial = skb;
	init_timer(&sk->partial_timer);
	sk->partial_timer.expires = HZ;
	sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
	sk->partial_timer.data = (unsigned long) sk;
	add_timer(&sk->partial_timer);
	restore_flags(flags);
	if (tmp)
		tcp_send_skb(sk, tmp);
}


/*
 *	This routine sends an ack and also updates the window. 
 */
 
static void tcp_send_ack(unsigned long sequence, unsigned long ack,
	     struct sock *sk,
	     struct tcphdr *th, unsigned long daddr)
{
	sk_buff *buff;
	struct tcphdr *t1;

	if(sk->zapped)
		return;		/* We have been reset, we may not send again */
	/*
	 * We need to grab some memory, and put together an ack,
	 * and then put it into the queue to be sent.
	 */

	buff = sock_wmalloc(sk, protocol_size(&proto_tcp), 1, GFP_ATOMIC);
	if (buff == NULL) 
	{
		/*
		 *	Force it to send an ack shortly soonish. If we are already about to
		 *	output data then let the existing timer event carry the ack
		 */
		sk->ack_backlog++;
		if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) 
		{
			reset_timer(sk, TIME_WRITE, 10);
		}
		return;
	}
	
	/*
	 *	Assemble an ACK frame.
	 */
	 
	protocol_adjust(buff,&proto_tcp);
	buff->sk = sk;
	buff->localroute = sk->localroute;
	t1 =(struct tcphdr *)skb_push(buff,sizeof(struct tcphdr));
	memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */

	/*
	 *	Swap the send and the receive. Fill in the other tcp header
	 *	fields.
	 */
	 
	t1->dest = th->source;
	t1->source = th->dest;
	t1->seq = ntohl(sequence);
	t1->ack = 1;
	sk->window = tcp_select_window(sk);
	t1->window = ntohs(sk->window);
	t1->res1 = 0;
	t1->res2 = 0;
	t1->rst = 0;
	t1->urg = 0;
	t1->syn = 0;
	t1->psh = 0;
	t1->fin = 0;
	
	/*
	 *	If we are acknowledging everything and there is nothing left to do then
	 *	move into a keepalive or idle mode depending upon the keepalive flag for
	 *	the socket.
	 */
	 
	if (ack == sk->acked_seq) 
	{
		sk->ack_backlog = 0;
		sk->bytes_rcv = 0;
		sk->ack_timed = 0;
		if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL
				  && sk->timeout == TIME_WRITE) 
		{
			if(sk->keepopen) {
				reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
			} else {
				delete_timer(sk);
			}
		}
  	}
  	t1->ack_seq = ntohl(ack);
  	t1->doff = sizeof(*t1)/4;
  	tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
  	if (sk->debug)
  		 printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
  	tcp_statistics.TcpOutSegs++;
  	buff->dev=NULL;
  	proto_ip.output(&proto_ip, buff, ETH_P_IP, IPPROTO_TCP, &sk->saddr, &daddr, &sk->opt.ip);
}


/* 
 *	This routine builds a generic TCP header. 
 */
 
static int tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
{

	/* FIXME: want to get rid of this. */
	memcpy(th,(void *) &(sk->opt.ip.dummy_th), sizeof(*th));
	th->seq = htonl(sk->write_seq);
	th->psh =(push == 0) ? 1 : 0;
	th->doff = sizeof(*th)/4;
	th->ack = 1;
	th->fin = 0;
	sk->ack_backlog = 0;
	sk->bytes_rcv = 0;
	sk->ack_timed = 0;
	th->ack_seq = htonl(sk->acked_seq);
	sk->window = tcp_select_window(sk);
	th->window = htons(sk->window);

	return(sizeof(*th));
}

/*
 *	This routine copies from a user buffer into a socket,
 *	and starts the transmit system.
 */

static int tcp_sendmsg(struct sock *sk, struct msghdr *msg,  int tot_len, int nonblock, unsigned flags)
{
	int copied = 0;
	int copy;
	int tmp;
	sk_buff *skb;
	sk_buff *send_tmp;
	struct proto *prot;
	struct device *dev = NULL;
	int len;
	int iovct=0;
	unsigned char *from;
	struct iovec *iov=msg->msg_iov;
	struct sockaddr_in *addr=(struct sockaddr_in *)msg->msg_name;

	/*
	 *	Only OOB (sends TCP URG data) and local routing apply.
	 */
	 
	if (flags & ~(MSG_OOB|MSG_DONTROUTE))
		return -EINVAL;
		
	/*
	 *	If an address is supplied it must be the one we would use anyway
	 *	as TCP is connection oriented.
	 */
	 
	if(sk->state==TCP_CLOSE)
		return -ENOTCONN;
	
	if(addr)
	{
		if (addr->sin_port != sk->opt.ip.dummy_th.dest) 
			return(-EISCONN);
		if (addr->sin_addr.s_addr != sk->daddr) 
			return(-EISCONN);
	}	
	
	/*
	 *	Block any tampering from timers/bottom halves.
	 */

	sk->inuse=1;
	prot = sk->prot;
	
	/*
	 *	We do this bizarre double loop as it makes the 99.999% of the time case (a single iov entry)
	 *	only a few instructions longer than the old system through the loop.
	 */
	 
	while(iovct++<msg->msg_iovlen)
	{
		from=iov->iov_base;
		len=iov->iov_len;
		iov++;	
		
		while(len > 0) 
		{
		/*
		 *	On an error read no further. If nothing was read we report the error
		 */
			if (sk->err) 
			{	
				release_sock(sk);
				if (copied) 
					return(copied);
				tmp = -sk->err;
				sk->err = 0;
				return(tmp);
			}
	
		/*
		 *	First thing we do is make sure that we are established. 
		 */
		
			if (sk->shutdown & SEND_SHUTDOWN) 
			{
				release_sock(sk);
				sk->err = EPIPE;
				if (copied) 
					return(copied);
				sk->err = 0;
				return(-EPIPE);
			}
	
	
		/* 
		 *	Wait for a connection to finish (not normally executed)
		 */
		
			while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) 
			{
				if (sk->err) 
				{
					release_sock(sk);
					if (copied) 
						return(copied);
					tmp = -sk->err;
					sk->err = 0;
					return(tmp);
				}
	
				if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) 
				{
					release_sock(sk);
					if (copied) 
						return(copied);
	
					if (sk->err) 
					{
						tmp = -sk->err;
						sk->err = 0;
						return(tmp);
					}
	
					if (sk->keepopen) 
					{
						send_sig(SIGPIPE, current, 0);
					}
					return(-EPIPE);
				}

				if (nonblock || copied) 
				{
					release_sock(sk);
					if (copied) 
						return(copied);
					return(-EAGAIN);
				}

				release_sock(sk);
				cli();
			
				if (sk->state != TCP_ESTABLISHED &&
			    		sk->state != TCP_CLOSE_WAIT && sk->err == 0) 
			    	{
					interruptible_sleep_on(sk->sleep);
					if (current->signal & ~current->blocked) 
					{
						sti();
						if (copied) 
							return(copied);
						return(-ERESTARTSYS);
					}
				}
				sk->inuse = 1;
				sti();
			}

		/*
		 * The following code can result in copy <= if sk->mss is ever
		 * decreased.  It shouldn't be.  sk->mss is min(sk->mtu, sk->max_window).
		 * sk->mtu is constant once SYN processing is finished.  I.e. we
		 * had better not get here until we've seen his SYN and at least one
		 * valid ack.  (The SYN sets sk->mtu and the ack sets sk->max_window.)
		 * But ESTABLISHED should guarantee that.  sk->max_window is by definition
		 * non-decreasing.  Note that any ioctl to set user_mss must be done
		 * before the exchange of SYN's.  If the initial ack from the other
		 * end has a window of 0, max_window and thus mss will both be 0.
		 */

		/* 
		 *	Now we need to check if we have a half built packet. 
		 */

			if ((skb = tcp_dequeue_partial(sk)) != NULL) 
			{
			        int hdrlen;

			         /* IP header + TCP header */
				hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb_data(skb))
				         + sizeof(struct tcphdr);
	
				/* Add more stuff to the end of skb->len */
				if (!(flags & MSG_OOB)) 
				{
					copy = min(sk->mss - (skb->len - hdrlen), len);
					/* FIXME: this is really a bug. */
					if (copy <= 0) 
					{
				  		printk("TCP: **bug**: \"copy\" <= 0!!\n");
				  		copy = 0;
					}
	  
					memcpy_fromfs(skb_put(skb,copy), from, copy);
					from += copy;
					copied += copy;
					len -= copy;
					sk->write_seq += copy;
				}
				
				/*
				 *	If this is the full frame (not it should never actually exceed MSS ever)
				 *	then we can send it immediately. If not we throw it back onto the
				 *	partial queue for further work.
				 */
				 
				if ((skb->len - hdrlen) >= sk->mss ||
					(flags & MSG_OOB) || !sk->packets_out)
					tcp_send_skb(sk, skb);
				else
					tcp_enqueue_partial(skb, sk);
				continue;
			}

		/*
		 * We also need to worry about the window.
	 	 * If window < 1/2 the maximum window we've seen from this
	 	 *   host, don't use it.  This is sender side
	 	 *   silly window prevention, as specified in RFC1122.
	 	 *   (Note that this is different than earlier versions of
	 	 *   SWS prevention, e.g. RFC813.).  What we actually do is 
		 *   use the whole MSS.  Since the results in the right
		 *   edge of the packet being outside the window, it will
		 *   be queued for later rather than sent.
		 *
		 *  This causes a problem with hosts that don't chose to update
		 *  their window. I'm still studying RFC1122 to sort this out
		 *  and get CISCO's and PC/TCP to talk nicely with Linux.
		 */

			copy = sk->window_seq - sk->write_seq;
			if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss)
				copy = sk->mss;
			if (copy > len)
				copy = len;

		/*
		 *	We should really check the window here also. 
		 */
	 
			send_tmp = NULL;
			if (copy < sk->mss && !(flags & MSG_OOB)) 
			{
				/*
				 *	We will release the socket incase we sleep here. 
				 */
				release_sock(sk);
				/*
				 *	NB: following must be mtu, because mss can be increased.
				 *	mss is always <= mtu 
				 */
				skb = sock_wmalloc(sk, sk->mtu + 128 + protocol_size(&proto_tcp), 0, GFP_KERNEL);
				sk->inuse = 1;
				send_tmp = skb;
			} 
			else 
			{
				/*
				 *	We will release the socket incase we sleep here. 
				 */
				release_sock(sk);
				skb = sock_wmalloc(sk, copy + protocol_size(&proto_tcp) , 0, GFP_KERNEL);
	  			sk->inuse = 1;
			}

			/*
			 *	If we didn't get any memory, we need to sleep. 
			 */

			if (skb == NULL) 
			{
				if (nonblock) 
				{
					release_sock(sk);
					if (copied) 
						return(copied);
					return(-EAGAIN);
				}

				/*
				 *	Watch this bit of code carefully. The race condition avoidance with 'tmp' is very
				 *	important.
				 */
			 
				tmp = sk->wmem_alloc;
				release_sock(sk);
				cli();

				if (tmp <= sk->wmem_alloc &&
					  (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
					&& sk->err == 0) 
				{
					interruptible_sleep_on(sk->sleep);
					if (current->signal & ~current->blocked) 
					{
						sti();
						if (copied) 
							return(copied);
						return(-ERESTARTSYS);
					}
				}
				sk->inuse = 1;
				sti();
				continue;
			}
		
			/*
			 *	We finally got a new buffer.
			 */
			 
			protocol_adjust(skb,&proto_tcp);
			
			/*
			 *	Fll in the basics and make the headers.
			 */

			skb->sk = sk;
			skb->free = 0;
			skb->localroute = sk->localroute|(flags&MSG_DONTROUTE);

			skb->h.th =(struct tcphdr *)skb_push(skb,sizeof(struct tcphdr));
			skb->thptr=skb->h.th;
			skb->dev = dev;
			tmp = tcp_build_header(skb->h.th, sk, len-copy);
			if (tmp < 0) 
			{
				sock_wfree(sk, skb, skb->mem_len);
				release_sock(sk);
				if (copied) 
					return(copied);
				return(tmp);
			}

			if (flags & MSG_OOB) 
			{
				skb->h.th->urg = 1;
				skb->h.th->urg_ptr = ntohs(copy);
			}
			
			/*
			 *	Copy the data. _Remember_ this can cause a schedule!
			 */
			 
			memcpy_fromfs(skb_put(skb,copy), from, copy);

			from += copy;
			copied += copy;
			len -= copy;
			skb->free = 0;
			sk->write_seq += copy;
			
			/*
			 *	Throw it back on the queue
			 */
	
			if (send_tmp != NULL && sk->packets_out) 
			{
				tcp_enqueue_partial(send_tmp, sk);
				continue;
			}
			/*
			 *	Or maybe even send it to either the write queue or IP.
			 */
			tcp_send_skb(sk, skb);
		}
		sk->err = 0;
	}
	
/*
 *	Nagle's rule. Turn Nagle off with TCP_NODELAY for highly
 *	interactive fast network servers. It's meant to be on and
 *	it really improves the throughput though not the echo time
 *	on my slow slip link - Alan
 */

/*
 *	Avoid possible race on send_tmp - fixed c/o Johannes Stille.
 *
 *	What this does is say IF we have a partly completed frame to
 *	send and there are no packets on the network we should send
 *	the partial frame immediately and not wait around. 
 */
 
	if(sk->partial && ((!sk->packets_out) 
     /* If not nagling we can send on the before case too.. */
	      || (sk->nonagle && before(sk->write_seq , sk->window_seq))
      	))
  		tcp_send_partial(sk);

	release_sock(sk);
	return(copied);
}


/*
 *	We have read data. This has reduce sk->rmem and thus the ack we send will
 *	offer a larger window (assuming the SWS avoidance code allows it). 
 */

static void tcp_read_wakeup(struct sock *sk)
{
	struct tcphdr *t1;
	sk_buff *buff;

	if (!sk->ack_backlog) 
		return;

	/*
	 * FIXME: we need to put code here to prevent this routine from
	 * being called.  Being called once in a while is ok, so only check
	 * if this is the second time in a row.
 	 */

	/*
	 * We need to grab some memory, and put together an ack,
	 * and then put it into the queue to be sent.
	 */

	buff = sock_wmalloc(sk,protocol_size(&proto_tcp),1, GFP_ATOMIC);
	if (buff == NULL) 
	{
		/* Try again real soon. */
		reset_timer(sk, TIME_WRITE, 10);
		return;
 	}
 	protocol_adjust(buff,&proto_tcp);

	buff->sk = sk;
	buff->localroute = sk->localroute;

	/*
	 *	Stick a tcp header on the buffer.
	 */
	 
	t1 =(struct tcphdr *)skb_push(buff,sizeof(struct tcphdr));
	memcpy(t1,(void *) &sk->opt.ip.dummy_th, sizeof(*t1));
	t1->seq = htonl(sk->sent_seq);
	t1->ack = 1;
	t1->res1 = 0;
	t1->res2 = 0;
	t1->rst = 0;
	t1->urg = 0;
	t1->syn = 0;
	t1->psh = 0;
	sk->ack_backlog = 0;
	sk->bytes_rcv = 0;
	sk->window = tcp_select_window(sk);
	t1->window = ntohs(sk->window);
	t1->ack_seq = ntohl(sk->acked_seq);
	t1->doff = sizeof(*t1)/4;
	tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
	
	/*
	 *	It goes out immediately. All such frames do.
	 */
	 
	if(proto_ip.output(&proto_ip, buff, ETH_P_IP, IPPROTO_TCP, &sk->saddr, &sk->daddr, &sk->opt.ip)==0)
		tcp_statistics.TcpOutSegs++;
}


/*
 * 	FIXME:
 * 	This routine frees used buffers.
 * 	It should consider sending an ACK to let the
 * 	other end know we now have a bigger window.
 */

static void cleanup_rbuf(struct sock *sk)
{
	unsigned long flags;
	unsigned long left;
	sk_buff *skb;
	unsigned long rspace;

	if(sk->debug)
	  	printk("cleaning rbuf for sk=%p\n", sk);
  
	save_flags(flags);
	cli();
  
	left = sock_rspace(sk);
 
	/*
	 * 	We have to loop through all the buffer headers,
	 * 	and try to free up all the space we can. Firstly we
	 *	delete any sk_buff which has been totally read.
	 */

	while((skb=skb_peek(&sk->receive_queue)) != NULL) 
	{
		if (!skb->used) 
			break;
		skb_unlink(skb);
		skb->sk = sk;
		kfree_skb(skb, FREE_READ);
	}

	restore_flags(flags);

	if(sk->debug)
		printk("sk->rspace = %lu, was %lu\n", sock_rspace(sk),
  					    left);
  					    
	/*
	 *	If data was freed we need to consider an ACK
	 */
	   					   
	if ((rspace=sock_rspace(sk)) != left) 
	{
		/*
		 * This area has caused the most trouble.  The current strategy
		 * is to simply do nothing if the other end has room to send at
		 * least 3 full packets, because the ack from those will auto-
		 * matically update the window.  If the other end doesn't think
		 * we have much space left, but we have room for at least 1 more
		 * complete packet than it thinks we do, we will send an ack
		 * immediately.  Otherwise we will wait up to .5 seconds in case
		 * the user reads some more.
		 */
		sk->ack_backlog++;
	/*
	 * It's unclear whether to use sk->mtu or sk->mss here.  They differ only
	 * if the other end is offering a window smaller than the agreed on MSS
	 * (called sk->mtu here).  In theory there's no connection between send
	 * and receive, and so no reason to think that they're going to send
	 * small packets.  For the moment I'm using the hack of reducing the mss
	 * only on the send side, so I'm putting mtu here.
	 */

		if (rspace > (sk->window - sk->bytes_rcv + sk->mtu)) 
		{
			/* Send an ack right now. */
			tcp_read_wakeup(sk);
		} 
		else 
		{
			/* Force it to send an ack soon. */
			int was_active = del_timer(&sk->timer);
			if (!was_active || TCP_ACK_TIME < sk->timer.expires) 
			{
				reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
			} 
			else
				add_timer(&sk->timer);
		}
	}
} 


/*
 *	Handle reading urgent data. The work was all done by the receiving code
 *	so this isn't too bad. Note that our urgent handling is BSD rather than
 *	the standard. If we implemented the standard nothing would work 8(
 */
 
static int tcp_recv_urg(struct sock * sk, int nonblock,
	     struct msghdr *msg, int len, unsigned flags)
{
	if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ)
		return -EINVAL;
	sk->inuse = 1;
	if (sk->urg_data & URG_VALID) 
	{
		char c = sk->urg_data;
		if (!(flags & MSG_PEEK))
			sk->urg_data = URG_READ;
		memcpy_toiovec(msg->msg_iov,&c,1);
		release_sock(sk);
		return 1;
	}
	release_sock(sk);
	
	if (sk->err) 
	{
		int tmp = -sk->err;
		sk->err = 0;
		return tmp;
	}

	if (sk->state == TCP_CLOSE || sk->done) 
	{
		if (!sk->done) 
		{
			sk->done = 1;
			return 0;
		}
		return -ENOTCONN;
	}

	if (sk->shutdown & RCV_SHUTDOWN) 
	{
		sk->done = 1;
		return 0;
	}

	/*
	 * Fixed the recv(..., MSG_OOB) behaviour.  BSD docs and
	 * the available implementations agree in this case:
	 * this call should never block, independent of the
	 * blocking state of the socket.
	 * Mike <pall@rz.uni-karlsruhe.de>
	 */
	return -EAGAIN;
}


/*
 *	This routine copies from a sock struct into the user buffer. 
 */
 
static int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
	int len, int nonblock, unsigned flags, int *addr_len)
{
	struct wait_queue wait = { current, NULL };
	int copied = 0;
	unsigned long peek_seq;
	unsigned long *seq;
	unsigned long used;
	struct iovec *iov=msg->msg_iov;
	struct sockaddr_in *sin=msg->msg_name;

	if(sin!=NULL)
	{
		sin->sin_family=AF_INET;
		sin->sin_addr.s_addr=sk->daddr;
		sin->sin_port=sk->opt.ip.dummy_th.dest;
		if(addr_len)
			*addr_len=sizeof(*sin);
	}
	
	/*
	 *	This error should be checked. 
	 */
	 
	if (sk->state == TCP_LISTEN)
		return -ENOTCONN;

	/*
	 *	Urgent data needs to be handled specially. 
	 */
	 
	if (flags & MSG_OOB)
		return tcp_recv_urg(sk, nonblock, msg, len, flags);

	/*
	 *	Remember where we are in sequence space for MSG_PEEK. Also a
	 *	little Linus sneak trick to speed things up in a moment.
	 */
	 
	peek_seq = sk->copied_seq;
	seq = &sk->copied_seq;
	if (flags & MSG_PEEK)
		seq = &peek_seq;

	add_wait_queue(sk->sleep, &wait);
	
	/*
	 *	Don't permit casual twiddlers.
	 */

	sk->inuse = 1;
	while (len > 0) 
	{
		sk_buff * skb;
		unsigned long offset;	
		/*
		 *	Are we at urgent data? Stop if we have read anything.
		 */
		if (copied && sk->urg_data && sk->urg_seq == 1+*seq)
			break;

		current->state = TASK_INTERRUPTIBLE;

		/*
		 *	Get a buffer.
		 */
		 
		skb = skb_peek(&sk->receive_queue);
		do 
		{
			/*
			 *	No buffer - no copying.
			 */
			if (!skb)
				break;
			/*
			 *	If the left edge of this buffer is after the last sequence space
			 *	we read then we cannot read it (yet).
			 */
			 
			if (before(1+*seq, skb->h.th->seq))
				break;
				
			/*
			 *	Find ou how far into the data to start reading. Queued buffers may
			 *	overlap.
			 */
			 
			offset = 1 + *seq - skb->h.th->seq;
			if (skb->h.th->syn)
				offset--;
			if (offset < skb->len)
				goto found_ok_skb;
				
			/*
			 *	Walk past any totally read buffers and mark them as used so that
			 *	the cleanup_rbuf call will delete them.
			 */
			 
			if (!(flags & MSG_PEEK))
				skb->used = 1;
			skb = skb->next;
		}
		while (skb != (sk_buff *)&sk->receive_queue);

		/*
		 *	If we have data we are done on this loop.
		 */
		 
		if (copied)
			break;
			
		/*
		 *	Errors stop the read. Note copied must be 0 here.
		 */

		if (sk->err) 
		{
			copied = -sk->err;
			sk->err = 0;
			break;
		}
		
		/*
		 *	If the socket has closed then I guess we won't get any 
		 *	more data.
		 */

		if (sk->state == TCP_CLOSE) 
		{
			if (!sk->done) 
			{
				sk->done = 1;
				break;
			}
			copied = -ENOTCONN;
			break;
		}

		/*
		 *	If it is shutdown for receiving then we needn't bother either.
		 */
		 
		if (sk->shutdown & RCV_SHUTDOWN) 
		{
			sk->done = 1;
			break;
		}
			
		/*
		 *	If we won't be waiting we can give up now.
		 */
		 
		if (nonblock) 
		{
			copied = -EAGAIN;
			break;
		}

		/*
		 *	Free any used buffers
		 */
		 
		cleanup_rbuf(sk);
		release_sock(sk);
		
		/*
		 *	Sleep waiting for more data to arrive.
		 */
		 
		schedule();
		sk->inuse = 1;

		if (current->signal & ~current->blocked) 
		{
			copied = -ERESTARTSYS;
			break;
		}
		continue;

	found_ok_skb:
		/*
		 *	Ok so how much can we use ? 
		 */
		 
		used = skb->len - offset;
		if (iov->iov_len < used)
			used = iov->iov_len;
		/*
		 *	Do we have urgent data here? 
		 */
		 
		if (sk->urg_data) 
		{
			unsigned long urg_offset = sk->urg_seq - (1 + *seq);
			if (urg_offset < used) 
			{
				if (!urg_offset) 
				{
					if (!sk->urginline) 
					{
						++*seq;
						offset++;
						used--;
					}
				}
				else
					used = urg_offset;
			}
		}
		
		/*
		 *	Copy the data into user space. There is a bug here. If we take a page fault copying
		 *	this buffer to user space then two people reading the socket at once can get the same
		 *	data. Worse still one could free the data! This needs a semaphore on it.
		 */
		 
		memcpy_tofs(iov->iov_base,skb_data(skb)+offset, used);
		copied += used;
		len -= used;
		*seq += used;
		iov->iov_base+=used;
		iov->iov_len-=used;
		if(len)
		{
			/* Find next free iov */
			while(iov->iov_len==0)
				iov++;
		}
		/*
		 *	Have we passed the urgent pointer. If so reset the flag.
		 */
		 
		if (after(sk->copied_seq+1,sk->urg_seq))
			sk->urg_data = 0;
		if (!(flags & MSG_PEEK) && (used + offset >= skb->len))
			skb->used = 1;
	}
	
	/*
	 *	Return to normality.
	 */
	 
	remove_wait_queue(sk->sleep, &wait);
	current->state = TASK_RUNNING;

	/*
	 *	Clean up data we have read: This will do ACK frames 
	 */
	 
	cleanup_rbuf(sk);
	release_sock(sk);
	return copied;
}

 
/*
 *	Shutdown the sending side of a connection.
 */

void tcp_shutdown(struct sock *sk, int how)
{
	sk_buff *buff;
	struct tcphdr *t1, *th;
	struct proto *prot;

	/*
	 *	We need to grab some memory, and put together a FIN,
	 *	and then put it into the queue to be sent.
	 */

	if (!(how & SEND_SHUTDOWN)) 
		return;
	 
	/*
	 *	If we've already sent a FIN, return. 
	 */
	 
	if (sk->state == TCP_FIN_WAIT1 ||
	    sk->state == TCP_FIN_WAIT2 ||
	    sk->state == TCP_CLOSING ||
	    sk->state == TCP_LAST_ACK ||
	    sk->state == TCP_TIME_WAIT
	) 
	{
		return;
	}
	sk->inuse = 1;

	/*
	 *	Flag that the sender has shutdown
	 */

	sk->shutdown |= SEND_SHUTDOWN;

	/*
	 *	Clear out any half completed packets. 
	 */

	if (sk->partial)
		tcp_send_partial(sk);

	prot =(struct proto *)sk->prot;
	th =(struct tcphdr *)&sk->opt.ip.dummy_th;
	
	/*
	 *	Get a buffer for FIN
	 */
	 
	release_sock(sk); 
	buff = sock_wmalloc(sk, protocol_size(&proto_tcp),1 , GFP_KERNEL);
	if (buff == NULL)
		return;		/* This is GFP_KERNEL so won't happen 8) */
	protocol_adjust(buff,&proto_tcp);
	sk->inuse = 1;

	buff->sk = sk;
	buff->localroute = sk->localroute;

	t1 =(struct tcphdr *)skb_push(buff,sizeof(struct tcphdr));
	
	/*
	 *	Fill in the junk.
	 */

	buff->dev = NULL;
	memcpy(t1, th, sizeof(*t1));
	t1->seq = ntohl(sk->write_seq);
	sk->write_seq++;
	buff->h.seq = sk->write_seq;
	t1->ack = 1;
	t1->ack_seq = ntohl(sk->acked_seq);
	t1->window = ntohs(sk->window=tcp_select_window(sk));
	t1->fin = 1;
	t1->rst = 0;
	t1->doff = sizeof(*t1)/4;
	buff->thptr=t1;
	tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);

	/*
	 * 	If there is data in the write queue, the fin must be appended to
	 * 	the write queue. (The FIN occupies sequence space and goes at the end)
 	 */
 	
 	if (skb_peek(&sk->write_queue) != NULL) 
 	{
  		buff->free=0;
		skb_queue_tail(&sk->write_queue, buff);
  	} 
  	else 
  	{
        	sk->sent_seq = sk->write_seq;
        	/*
        	 *	Stick an IP header on it and send it.
        	 */
        	tcp_queue_retransmit(sk,buff);
        	proto_ip.output(&proto_ip,buff, ETH_P_IP, IPPROTO_TCP, &sk->saddr, &sk->daddr, &sk->opt.ip);
	}

	/*
	 *	Perform the relevant state change. These want swapping to tables throughout.
	 */
	 
	if (sk->state == TCP_ESTABLISHED) 
		tcp_set_state(sk,TCP_FIN_WAIT1);
	else if (sk->state == TCP_CLOSE_WAIT)
		tcp_set_state(sk,TCP_LAST_ACK);
	else
		tcp_set_state(sk,TCP_FIN_WAIT2);

	release_sock(sk);
}


/*
 *	This routine will send an RST to the other tcp. This is used in those situations
 *	when we get data for a connection that no longer exists or a connection request
 *	for a socket that isn't. It's basically a shut up and go away response.
 */
 
static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
	  struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
{
	sk_buff *buff;
	struct tcphdr *t1;
  
	/*
	 *	We need to grab some memory, and put together an RST,
	 *	and then put it into the queue to be sent.
	 */

	buff = sock_wmalloc(NULL, protocol_size(&proto_tcp), 1, GFP_ATOMIC);
	
	/*
	 *	RST is unreliable. If this is one of the odd occasions that we have no memory
	 *	the other end will either send us another frame causing another RST send attempt
	 *	or shut up anyway.
	 */
	 
	if (buff == NULL) 
	  	return;
	  	
	protocol_adjust(buff,&proto_tcp);

	buff->sk = NULL;
	buff->dev = dev;
	buff->localroute = 0;

	t1 =(struct tcphdr *)skb_push(buff,sizeof(struct tcphdr));
	memcpy(t1, th, sizeof(*t1));

	/*
	 *	Swap the send and the receive. 
	 */

	t1->dest = th->source;
	t1->source = th->dest;
	t1->rst = 1;  
	t1->window = 0;
  
  
  	/*
  	 *	The nature of the ACK depends upon the ACK bit in the received frame
  	 */
  	 
	if(th->ack)
	{
		t1->ack = 0;
	  	t1->seq = th->ack_seq;
	  	t1->ack_seq = 0;
	}
	else
	{
	  	t1->ack = 1;
	  	if(!th->syn)
  			t1->ack_seq=htonl(th->seq);
  		else
  			t1->ack_seq=htonl(th->seq+1);
  		t1->seq=0;
	}

	t1->syn = 0;
	t1->urg = 0;
	t1->fin = 0;
	t1->psh = 0;
	t1->doff = sizeof(*t1)/4;
	tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
	
	/*
	 *	Send immediately and once only.
	 */
	 
	if(proto_ip.output(&proto_ip,buff,ETH_P_IP, IPPROTO_TCP, &saddr, &daddr, NULL)==0)
		tcp_statistics.TcpOutSegs++;
}


/*
 *	Look for tcp options. Parses everything but only knows about MSS.
 *      This routine is always called with the packet containing the SYN.
 *      However it may also be called with the ack to the SYN.  So you
 *      can't assume this is always the SYN.  It's always called after
 *      we have set up sk->mtu to our own MTU.
 */
 
static void tcp_options(struct sock *sk, struct tcphdr *th)
{
	unsigned char *ptr;
	int length=(th->doff*4)-sizeof(struct tcphdr);
	int mss_seen = 0;
    
	ptr = (unsigned char *)(th + 1);
  
	while(length>0)
	{
	  	int opcode=(int)*ptr++;
	  	int opsize=(int)*ptr++;
	  	switch(opcode)
	  	{
	  		case TCPOPT_EOL:
	  			return;
	  		case TCPOPT_NOP:
	  			length-=2;
	  			continue;
	  		
	  		default:
	  			if(opsize<=2)	/* Avoid silly options looping forever */
	  				return;
	  			switch(opcode)
	  			{
	  				case TCPOPT_MSS:
	  					if(opsize==4 && th->syn)
	  					{
	  						sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
							mss_seen = 1;
	  					}
	  					break;
		  				/* Add other options here as people feel the urge to implement stuff like large windows */
	  			}
	  			ptr+=opsize-2;
	  			length-=opsize;
	  	}
	}
	if (th->syn) 
	{
		if (! mss_seen)
		      sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
	}
#ifdef CONFIG_INET_PCTCP
	sk->mss = min(sk->max_window >> 1, sk->mtu);
#else    
	sk->mss = min(sk->max_window, sk->mtu);
#endif  
}

/*
 *	Compute the normal netmask of a route. We use this when we have 
 *	assume subnets are local configured to hint that all subnets are
 *	actually part of the local ethernet. Now we have per route MSS
 *	values this is really redundant.
 */

static inline unsigned long default_mask(unsigned long dst)
{
	dst = ntohl(dst);
	if (IN_CLASSA(dst))
		return htonl(IN_CLASSA_NET);
	if (IN_CLASSB(dst))
		return htonl(IN_CLASSB_NET);
	return htonl(IN_CLASSC_NET);
}

/*
 *	This routine handles a connection request.
 *	It should make sure we haven't already responded.
 *	Because of the way BSD works, we have to send a syn/ack now.
 *	This also means it will be harder to close a socket which is
 *	listening.
 */
 
static void tcp_conn_request(struct sock *sk, sk_buff *skb,
		 unsigned long daddr, unsigned long saddr,
		 struct options *opt, struct device *dev)
{
	sk_buff *buff;
	struct tcphdr *t1;
	unsigned char *ptr;
	struct sock *newsk;
	struct tcphdr *th;
	struct rtable *rt;
  
	th = (struct tcphdr *)skb_data(skb);

	/*
	 *	If the socket is dead, don't accept the connection. 
	 */
	 
	if (!sk->dead) 
	{
  		sk->data_ready(sk,0);
	}
	else 
	{
		tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->opt.ip.tos,sk->opt.ip.ttl);
		tcp_statistics.TcpAttemptFails++;
		kfree_skb(skb, FREE_READ);
		return;
	}

	/*
	 * Make sure we can accept more.  This will prevent a
	 * flurry of syns from eating up all our memory.
	 */

	if (sk->ack_backlog >= sk->max_ack_backlog) 
	{
		tcp_statistics.TcpAttemptFails++;
		kfree_skb(skb, FREE_READ);
		return;
	}

	/*
	 * We need to build a new sock struct.
	 * It is sort of bad to have a socket without an inode attached
	 * to it, but the wake_up's will just wake up the listening socket,
	 * and if the listening socket is destroyed before this is taken
	 * off of the queue, this will take care of it.
	 */

	newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
	if (newsk == NULL) 
	{
		/* just ignore the syn.  It will get retransmitted. */
		tcp_statistics.TcpAttemptFails++;
		kfree_skb(skb, FREE_READ);
		return;
	}
	
	/*
	 *	Form filling exercise to create the socket. COBOL programmers would be right at home
	 *	here 8)
	 */

	memcpy(newsk, sk, sizeof(*newsk));
	skb_queue_head_init(&newsk->write_queue);
	skb_queue_head_init(&newsk->receive_queue);
	newsk->send_head = NULL;
	newsk->send_tail = NULL;
	skb_queue_head_init(&newsk->back_log);
	newsk->rtt = 0;		/*TCP_CONNECT_TIME<<3*/
	newsk->rto = TCP_TIMEOUT_INIT;
	newsk->mdev = 0;
	newsk->max_window = 0;
	newsk->cong_window = 1;
	newsk->cong_count = 0;
	newsk->ssthresh = 0;
	newsk->backoff = 0;
	newsk->blog = 0;
	newsk->intr = 0;
	newsk->proc = 0;
	newsk->done = 0;
	newsk->partial = NULL;
	newsk->pair = NULL;
	newsk->wmem_alloc = 0;
	newsk->rmem_alloc = 0;
	newsk->localroute = sk->localroute;
	newsk->socket = NULL;

	newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;

	newsk->err = 0;
	newsk->shutdown = 0;
	newsk->ack_backlog = 0;
	newsk->acked_seq = th->seq+1;
	newsk->fin_seq = th->seq;
	newsk->copied_seq = th->seq;
	newsk->state = TCP_SYN_RECV;
	newsk->timeout = 0;
	newsk->write_seq = jiffies * SEQ_TICK - seq_offset;
	newsk->window_seq = newsk->write_seq;
	newsk->rcv_ack_seq = newsk->write_seq;
	newsk->urg_data = 0;
	newsk->retransmits = 0;
	newsk->destroy = 0;
	init_timer(&newsk->timer);
	newsk->timer.data = (unsigned long)newsk;
	newsk->timer.function = &net_timer;
	newsk->opt.ip.family=AF_INET;
	newsk->opt.ip.dummy_th.source = th->dest;
	newsk->opt.ip.dummy_th.dest = th->source;
	
	/*
	 *	Swap these two, they are from our point of view. 
	 */
	 
	newsk->daddr = saddr;
	newsk->saddr = daddr;

	put_sock(newsk->num,newsk);
	newsk->opt.ip.dummy_th.res1 = 0;
	newsk->opt.ip.dummy_th.doff = 6;
	newsk->opt.ip.dummy_th.fin = 0;
	newsk->opt.ip.dummy_th.syn = 0;
	newsk->opt.ip.dummy_th.rst = 0;	
	newsk->opt.ip.dummy_th.psh = 0;
	newsk->opt.ip.dummy_th.ack = 0;
	newsk->opt.ip.dummy_th.urg = 0;
	newsk->opt.ip.dummy_th.res2 = 0;
	newsk->acked_seq = th->seq + 1;
	newsk->copied_seq = th->seq;

	/*
	 *	Grab the ttl and tos values and use them 
	 */

	newsk->opt.ip.ttl=sk->opt.ip.ttl;
	newsk->opt.ip.tos=skb->ip_hdr->tos;

	/*
	 *	Use 512 or whatever user asked for 
	 */

	/*
	 * 	Note use of sk->user_mss, since user has no direct access to newsk 
	 */

	rt=ip_rt_route(saddr, NULL,NULL);
	
	if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
		newsk->window_clamp = rt->rt_window;
	else
		newsk->window_clamp = 0;
		
	if (sk->user_mss)
		newsk->mtu = sk->user_mss;
	else if(rt!=NULL && (rt->rt_flags&RTF_MSS))
		newsk->mtu = rt->rt_mss - HEADER_SIZE;
	else 
	{
#ifdef CONFIG_INET_SNARL	/* Sub Nets Are Local */
		if ((saddr ^ daddr) & default_mask(saddr))
#else
		if ((saddr ^ daddr) & dev->pa_mask)
#endif
			newsk->mtu = 576 - HEADER_SIZE;
		else
			newsk->mtu = MAX_WINDOW;
	}

	/*
	 *	But not bigger than device MTU 
	 */

	newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);

	/*
	 *	This will min with what arrived in the packet 
	 */

	tcp_options(newsk,th);

	/*
	 *	Send a SYN|ACK frame with their sequence number acked and our sequence number quoted. Once they
	 *	ack this we have the transport endpoints synchronised in sequence space and can do the real stuff
	 */
	 
	buff = sock_wmalloc(newsk, protocol_size(&proto_tcp)+4, 1, GFP_ATOMIC);
	if (buff == NULL) 
	{
		/*
		 *	Oops no memory throw it away and let the remote end try again.
		 */
		 
		sk->err = -ENOMEM;
		newsk->dead = 1;
		release_sock(newsk);
		kfree_skb(skb, FREE_READ);
		tcp_statistics.TcpAttemptFails++;
		return;
	}
	
	/*
	 *	Build the frame.
	 */
	 
	protocol_adjust(buff,&proto_tcp);
  
	buff->sk = newsk;
	buff->localroute = newsk->localroute;

	t1 =(struct tcphdr *)skb_push(buff,sizeof(struct tcphdr));  
	buff->thptr=t1;
	memcpy(t1, th, sizeof(*t1));
	
	/*
	 *	This SYN|ACK occupies the first byte of sequence space.
	 */
	 
	buff->h.seq = newsk->write_seq;
	
	/*
	 *	Swap the send and the receive. 
	 */
	
	t1->dest = th->source;
	t1->source = newsk->opt.ip.dummy_th.source;
	t1->seq = ntohl(newsk->write_seq++);
	t1->ack = 1;
	newsk->window = tcp_select_window(newsk);
	newsk->sent_seq = newsk->write_seq;
	t1->window = ntohs(newsk->window);
	t1->res1 = 0;
	t1->res2 = 0;
	t1->rst = 0;
	t1->urg = 0;
	t1->psh = 0;
	t1->syn = 1;
	t1->ack_seq = ntohl(th->seq+1);
	t1->doff = sizeof(*t1)/4+1;
	ptr = skb_put(buff,4);
	ptr[0] = 2;
	ptr[1] = 4;
	ptr[2] = ((newsk->mtu) >> 8) & 0xff;
	ptr[3] =(newsk->mtu) & 0xff;

	tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
	
	/*
	 *	Send the thing out. This will retry until it times out in the same way as any other sequenced
	 *	data.
	 */

	tcp_queue_retransmit(newsk,buff);	
	proto_ip.output(&proto_ip, buff, ETH_P_IP, IPPROTO_TCP, &newsk->saddr, &newsk->daddr, &sk->opt.ip);
	
	/*
	 *	Start the timer for the first.
	 */
	 
	reset_timer(newsk, TIME_WRITE, TCP_TIMEOUT_INIT);
	skb->sk = newsk;

	/*
	 *	Charge the sock_buff to newsk. 
	 */
	 
	sk->rmem_alloc -= skb->mem_len;
	newsk->rmem_alloc += skb->mem_len;
	
	skb_queue_tail(&sk->receive_queue,skb);	
	sk->ack_backlog++;
	release_sock(newsk);
	tcp_statistics.TcpOutSegs++;
}


/*
 *	Close a tcp socket either by timeout or from
 *	a user request. We don't do the freeing of user
 *	process resources like file handles as that is done
 *	by the upper socket abstraction.
 *
 *	This is called when the last process holding the socket
 *	open closes it.
 */
 
static void tcp_close(struct sock *sk, int timeout)
{
 	sk_buff *buff;
	struct tcphdr *t1, *th;
	struct proto *prot;

	/*
	 *	We need to grab some memory, and put together a FIN,	
	 *	and then put it into the queue to be sent.
	 */
	 
	sk->inuse = 1;
	sk->keepopen = 1;
	sk->shutdown = SHUTDOWN_MASK;

	if (!sk->dead) 
	  	sk->state_change(sk);

	/*
	 *	We need to flush the recv. buffs if we have a process calling
	 *	close. On a timeout we leave them for the task to pick up before
	 *	it gets the error.
	 */

	if (timeout == 0)
	{
		while((buff=skb_dequeue(&sk->receive_queue))!=NULL)
			kfree_skb(buff, FREE_READ);
	}
	
	/*
	 *	Get rid off any half-completed packets. 
	 */
		 
	if (sk->partial) 
	{
		tcp_send_partial(sk);
	}

	switch(sk->state) 
	{
		case TCP_FIN_WAIT1:
		case TCP_FIN_WAIT2:
		case TCP_CLOSING:
			/*
			 * These states occur when we have already closed out
			 * our end.  If there is no timeout, we do not do
			 * anything.  We may still be in the middle of sending
			 * the remainder of our buffer, for example...
			 * resetting the timer would be inappropriate.
			 *
			 * XXX if retransmit count reaches limit, is tcp_close()
			 * called with timeout == 1 ? if not, we need to fix that.
			 */
			if (!timeout) {
				int timer_active;
                                timer_active = del_timer(&sk->timer);
                                if (timer_active)
					add_timer(&sk->timer);
				else
					reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
			}
                                                                            			 
                      	if (timeout) 
				tcp_time_wait(sk);
			release_sock(sk);
			return;	/* break causes a double release - messy */
		case TCP_TIME_WAIT:
		case TCP_LAST_ACK:
			/*
			 * A timeout from these states terminates the TCB.
			 */
			if (timeout) 
			{
		  		tcp_set_state(sk,TCP_CLOSE);
			}
			release_sock(sk);
			return;
		case TCP_LISTEN:
			tcp_set_state(sk,TCP_CLOSE);
			release_sock(sk);
			return;
		case TCP_CLOSE:
			release_sock(sk);
			return;
		
		/*
		 *	These states require we send a FIN.
		 */
		 
		case TCP_CLOSE_WAIT:
		case TCP_ESTABLISHED:
		case TCP_SYN_SENT:
		case TCP_SYN_RECV:
			prot =(struct proto *)sk->prot;
			th =(struct tcphdr *)&sk->opt.ip.dummy_th;
			buff = sock_wmalloc(sk, protocol_size(&proto_tcp), 1, GFP_ATOMIC);
			if (buff == NULL) 
			{
				/*
				 *	This will force it to try again later. 
				 */
				release_sock(sk);
				if (sk->state != TCP_CLOSE_WAIT)
					tcp_set_state(sk,TCP_ESTABLISHED);
				reset_timer(sk, TIME_CLOSE, 100);
				return;
			}
			protocol_adjust(buff,&proto_tcp);
			buff->sk = sk;
			buff->free = 1;
			buff->localroute = sk->localroute;

			t1 =(struct tcphdr *)skb_push(buff,sizeof(struct tcphdr));
			memcpy(t1, th, sizeof(*t1));
			t1->seq = ntohl(sk->write_seq);
			sk->write_seq++;
			buff->h.seq = sk->write_seq;
			t1->ack = 1;
	
			/* 
			 *	Ack everything immediately from now on. 
			 */

			sk->delay_acks = 0;
			t1->ack_seq = ntohl(sk->acked_seq);
			t1->window = ntohs(sk->window=tcp_select_window(sk));
			t1->fin = 1;
			t1->rst = 0;
			t1->doff = sizeof(*t1)/4;
			buff->thptr=t1;
			
			tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);

			tcp_statistics.TcpOutSegs++;
	
			
			/*
			 *	Add the frame to the right queue and maybe send it. This simple check
			 *	of write_queue being empty is naive and wrong. This all should be done by
			 *	a single magic routine anyway.
			 */
			 
			if (skb_peek(&sk->write_queue) == NULL) 
			{
				sk->sent_seq = sk->write_seq;
				tcp_queue_retransmit(sk,buff);
				proto_ip.output(&proto_ip, buff, ETH_P_IP, IPPROTO_TCP, &sk->saddr, &sk->daddr, &sk->opt.ip);
			} 
			else 
			{
				reset_timer(sk, TIME_WRITE, sk->rto);
				if (buff->next != NULL) 
				{
					printk("tcp_close: next != NULL\n");
					skb_unlink(buff);
				}
				skb_queue_tail(&sk->write_queue, buff);
			}

			/*
			 * 	If established (normal close), enter FIN_WAIT1.
			 * 	If in CLOSE_WAIT, enter LAST_ACK
			 * 	If in CLOSING, remain in CLOSING
			 * 	otherwise enter FIN_WAIT2
			 */

			if (sk->state == TCP_ESTABLISHED)
				tcp_set_state(sk,TCP_FIN_WAIT1);
			else if (sk->state == TCP_CLOSE_WAIT)
				tcp_set_state(sk,TCP_LAST_ACK);
			else if (sk->state != TCP_CLOSING)
				tcp_set_state(sk,TCP_FIN_WAIT2);
	}
	release_sock(sk);
}


/*
 *	This routine takes stuff off of the write queue,
 *	and puts it in the xmit queue.
 */
 
static void tcp_write_xmit(struct sock *sk)
{
	sk_buff *skb;

	/*
	 *	The bytes will have to remain here. In time closedown will
	 *	empty the write queue and all will be happy 
	 */

	if(sk->zapped)
		return;
		
	/*
	 *	While there are frames and the right edge of the frame fits into the
	 *	allowable send window (Wrong c/f CISCO bug) and we are not in a
	 *	retransmission mode then we can send it. We also don't send more frames
	 *	if they won't fit the congestion window.
	 *
	 */

	while((skb = skb_peek(&sk->write_queue)) != NULL &&
		before(skb->h.seq, sk->window_seq + 1) &&
		(sk->retransmits == 0 ||
		 sk->timeout != TIME_WRITE ||
		 before(skb->h.seq, sk->rcv_ack_seq + 1))
		&& sk->packets_out < sk->cong_window) 
	{
		IS_SKB(skb);
		
		/*
		 *	Take it from the queue
		 */
		 
		skb_unlink(skb);
		
		/*
		 *	See if we really need to send the packet. 
		 */
		 
		if (before(skb->h.seq, sk->rcv_ack_seq +1)) 
		{
			sk->retransmits = 0;
			kfree_skb(skb, FREE_WRITE);
			if (!sk->dead) 
				sk->write_space(sk);
		} 
		else
		{
		
		/*
		 *	Move the frame both onto the network and into the retransmit queue.
		 */
		 
			struct tcphdr *th;
			int size;
		/*
		 * put in the ack seq and window at this point rather than earlier,
		 * in order to keep them monotonic.  We really want to avoid taking
		 * back window allocations.  That's legal, but RFC1122 says it's frowned upon.
		 * Ack and window will in general have changed since this packet was put
		 * on the write queue.
		 */
			th = skb->thptr;
			size = skb->len;
			
			th->ack_seq = ntohl(sk->acked_seq);
			th->window = ntohs(tcp_select_window(sk));

			tcp_send_check(th, sk->saddr, sk->daddr, size, sk);

			sk->sent_seq = skb->h.seq;
			tcp_queue_retransmit(sk, skb);
			proto_ip.output(&proto_ip, skb, ETH_P_IP, IPPROTO_TCP, &sk->saddr, &sk->daddr, &sk->opt.ip);
		}
	}
}


/*
 *	This routine deals with incoming acks, but not outgoing ones.
 */

static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
{
	unsigned long ack;
	int flag = 0;

	/* 
	 * 1 - there was data in packet as well as ack or new data is sent or 
	 *     in shutdown state
	 * 2 - data from retransmit queue was acked and removed
	 * 4 - window shrunk or data from retransmit queue was acked and removed
	 */

	if(sk->zapped)
		return(1);	/* Dead, cant ack any more so why bother */

	ack = ntohl(th->ack_seq);
	
	/*
	 *	If we have seen a larger window remember the fact. It will be 
	 *	needed by the sending code.
	 */
	 
	if (ntohs(th->window) > sk->max_window) 
	{
  		sk->max_window = ntohs(th->window);
#ifdef CONFIG_INET_PCTCP
		sk->mss = min(sk->max_window>>1, sk->mtu);
#else
		sk->mss = min(sk->max_window, sk->mtu);
#endif	
	}
	
	/*
	 *	Retransmitting just for keepalives doesn't count.
	 */

	if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
	  	sk->retransmits = 0;

	/*
	 *	Is the ACK valid ?
	 */
	 
	if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) 
	{
		if(sk->debug)
			printk("Ack ignored %lu %lu\n",ack,sk->sent_seq);
			
		/*
		 *	Keepalive processing.
		 */
		 
		if (after(ack, sk->sent_seq) || (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) 
		{
			return(0);
		}
		if (sk->keepopen) 
		{
			if(sk->timeout==TIME_KEEPOPEN)
				reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
		}
		return(1);
	}
	
	/*
	 *	Is there data in the buffer ?
	 */

	if (len != th->doff*4) 
		flag |= 1;

	/*
	 *	See if our window has been shrunk. 
	 */

	if (after(sk->window_seq, ack+ntohs(th->window))) 
	{
		/*
		 *	We may need to move packets from the send queue
		 *	to the write queue, if the window has been shrunk on us.
		 *	The RFC says you are not allowed to shrink your window
		 *	like this, but if the other end does, you must be able
		 *	to deal with it. [To start with ACK frames are not sequenced
		 *	and can thus arrive out of order causing this effect].
		 */
		sk_buff *skb;
		sk_buff *skb2;
		sk_buff *wskb = NULL;
  	
		skb2 = sk->send_head;
		sk->send_head = NULL;
		sk->send_tail = NULL;
	
		flag |= 4;
	
		sk->window_seq = ack + ntohs(th->window);
		cli();
		while (skb2 != NULL) 
		{
			skb = skb2;
			skb2 = skb->link3;
			skb->link3 = NULL;
			if (after(skb->h.seq, sk->window_seq)) 
			{
				if (sk->packets_out > 0) 
					sk->packets_out--;
				/* We may need to remove this from the dev send list. */
				if (skb->next != NULL) 
				{
					skb_unlink(skb);				
				}
				/* Now add it to the write_queue. */
				if (wskb == NULL)
					skb_queue_head(&sk->write_queue,skb);
				else
					skb_append(wskb,skb);
				wskb = skb;
			} 
			else 
			{
				if (sk->send_head == NULL) 
				{
					sk->send_head = skb;
					sk->send_tail = skb;
				}
				else
				{
					sk->send_tail->link3 = skb;
					sk->send_tail = skb;
				}
				skb->link3 = NULL;
			}
		}
		sti();
	}


	/*
	 *	If we left nothing in the send queue clean up the pointers.
	 */
	 
	if (sk->send_tail == NULL || sk->send_head == NULL) 
	{
		sk->send_head = NULL;
		sk->send_tail = NULL;
		sk->packets_out= 0;
	}

	/*
	 *	This is the new right hand window edge.
	 */
	 
	sk->window_seq = ack + ntohs(th->window);

	/*
	 *	We don't want too many packets out there. 
	 */
	 
	if (sk->timeout == TIME_WRITE && 
		sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) 
	{
	/* 
	 * This is Jacobson's slow start and congestion avoidance. 
	 * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
	 * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
	 * counter and increment it once every cwnd times.  It's possible
	 * that this should be done only if sk->retransmits == 0.  I'm
	 * interpreting "new data is acked" as including data that has
	 * been retransmitted but is just now being acked.
	 */
		if (sk->cong_window < sk->ssthresh)  
		  /* 
		   *	In "safe" area, increase
		   */
			sk->cong_window++;
		else 
		{
		  /*
		   *	In dangerous area, increase slowly.  In theory this is
		   *  	sk->cong_window += 1 / sk->cong_window
		   */
			if (sk->cong_count >= sk->cong_window) 
			{
				sk->cong_window++;
				sk->cong_count = 0;
			}
			else 
				sk->cong_count++;
		}
	}

	sk->rcv_ack_seq = ack;

	/*
	 *	If this ack opens up a zero window, clear backoff.  It was
	 *	being used to time the probes, and is probably far higher than
	 *	it needs to be for normal retransmission.
	 */

	if (sk->timeout == TIME_PROBE0) 
	{
  		if (skb_peek(&sk->write_queue) != NULL &&   /* should always be non-null */
		    ! before (sk->window_seq, sk->write_queue.next->h.seq)) 
		{
			sk->retransmits = 0;
			sk->backoff = 0;
		  /*
		   *	Recompute rto from rtt.  this eliminates any backoff.
		   */

			sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
			if (sk->rto > 120*HZ)
				sk->rto = 120*HZ;
			if (sk->rto < 20)	/* Was 1*HZ, then 1 - turns out we must allow about
						   .2 of a second because of BSD delayed acks - on a 100Mb/sec link
						   .2 of a second is going to need huge windows (SIGH) */
				sk->rto = 20;
		}
	}

	  /* 
	   *	See if we can take anything off of the retransmit queue.
	   */
   
	while(sk->send_head != NULL) 
	{
		/*
		 *	Check for a (long dead) bug. 
		 */
		 
		if (sk->send_head->link3 &&
		    after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) 
			printk("INET: tcp.c: *** bug send_list out of order.\n");
		
		/*
		 *	If the frame has a right hand edge before or equal to the last ack then 
		 *	we may free it up.
		 */

		if (before(sk->send_head->h.seq, ack+1)) 
		{
			sk_buff *oskb;	
			if (sk->retransmits) 
			{	
				/*
				 *	We were retransmitting.  don't count this in RTT est 
				 */
				flag |= 2;

				/*
				 *	Even though we've gotten an ack, we're still
				 *	retransmitting as long as we're sending from
				 *	the retransmit queue.  Keeping retransmits non-zero
				 *	prevents us from getting new data interspersed with
				 *	retransmissions.
				 */

				if (sk->send_head->link3)
					sk->retransmits = 1;
				else
					sk->retransmits = 0;
			}
  			/*
			 *	Note that we only reset backoff and rto in the
			 *	rtt recomputation code.  And that doesn't happen
			 *	if there were retransmissions in effect.  So the
			 *	first new packet after the retransmissions is
			 *	sent with the backoff still in effect.  Not until
			 *	we get an ack from a non-retransmitted packet do
			 *	we reset the backoff and rto.  This allows us to deal
			 *	with a situation where the network delay has increased
			 *	suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
			 */

			/*
			 *	We have one less packet out there. 
			 */
			 
			if (sk->packets_out > 0) 
				sk->packets_out --;
			/* 
			 *	Wake up the process, it can probably write more. 
			 */
			if (!sk->dead) 
				sk->write_space(sk);
			oskb = sk->send_head;

			if (!(flag&2)) 
			{
				long m;
	
				/*
				 *	The following amusing code comes from Jacobson's
				 *	article in SIGCOMM '88.  Note that rtt and mdev
				 *	are scaled versions of rtt and mean deviation.
				 *	This is designed to be as fast as possible 
				 *	m stands for "measurement".
				 */
	
				m = jiffies - oskb->when;  /* RTT */
				if(m<=0)
					m=1;		/* IS THIS RIGHT FOR <0 ??? */
				m -= (sk->rtt >> 3);    /* m is now error in rtt est */
				sk->rtt += m;           /* rtt = 7/8 rtt + 1/8 new */
				if (m < 0)
					m = -m;		/* m is now abs(error) */
				m -= (sk->mdev >> 2);   /* similar update on mdev */
				sk->mdev += m;	    	/* mdev = 3/4 mdev + 1/4 new */
	
				/*
				 *	Now update timeout.  Note that this removes any backoff.
				 */
			 
				sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
				if (sk->rto > 120*HZ)
					sk->rto = 120*HZ;
				if (sk->rto < 20)	/* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */
					sk->rto = 20;
				sk->backoff = 0;
			}
			flag |= (2|4);
			cli();
			oskb = sk->send_head;
			IS_SKB(oskb);
			sk->send_head = oskb->link3;
			if (sk->send_head == NULL) 
			{
				sk->send_tail = NULL;
			}

			/*
			 *	We may need to remove this from the dev send list (if a copy is queued to go off just
			 *	at this moment. Note that if the driver has the buffer in its paws the kfree_skb will
			 *	not free the buffer but the device unlock will.
			 */

			if (oskb->next)
				skb_unlink(oskb);
			sti();
			kfree_skb(oskb, FREE_WRITE); /* write. */
			if (!sk->dead) 
				sk->write_space(sk);
		}
		else
		{
			break;
		}
	}

	/*
	 * XXX someone ought to look at this too.. at the moment, if skb_peek()
	 * returns non-NULL, we complete ignore the timer stuff in the else
	 * clause.  We ought to organize the code so that else clause can
	 * (should) be executed regardless, possibly moving the PROBE timer
	 * reset over.  The skb_peek() thing should only move stuff to the
	 * write queue, NOT also manage the timer functions.
	 */

	/*
	 *	Maybe we can take some stuff off of the write queue,
	 *	and put it onto the xmit queue.
	 */
	 
	if (skb_peek(&sk->write_queue) != NULL) 
	{
		/*
		 *	If the right edge of the frame is within the window and we are
		 *	not retransmitting and there are not too many frames in the 
		 *	network. (Again this is yet another duplicate... )
		 */
		 
		if (after (sk->window_seq+1, sk->write_queue.next->h.seq) &&
		        (sk->retransmits == 0 || 
			 sk->timeout != TIME_WRITE ||
			 before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1))
			&& sk->packets_out < sk->cong_window) 
		{
			flag |= 1;
			tcp_write_xmit(sk);
		}
		
		/*
		 *	If it doesnt't fit the probe. Again this is wrong and we should
		 *	concede defeat and send a partial frame.
		 */
		 
		else if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
 			sk->send_head == NULL &&
 			sk->ack_backlog == 0 &&
 			sk->state != TCP_TIME_WAIT) 
 		{
 	        	reset_timer(sk, TIME_PROBE0, sk->rto);
 		}		
	}
	else
	{
		/*
		 * from TIME_WAIT we stay in TIME_WAIT as long as we rx packets
		 * from TCP_CLOSE we don't do anything
		 *
		 * from anything else, if there is write data (or fin) pending,
		 * we use a TIME_WRITE timeout, else if keepalive we reset to
		 * a KEEPALIVE timeout, else we delete the timer.
		 *
		 * We do not set flag for nominal write data, otherwise we may
		 * force a state where we start to write itsy bitsy tidbits
		 * of data.
		 */

		switch(sk->state) 
		{
		case TCP_TIME_WAIT:
			/*
			 * keep us in TIME_WAIT until we stop getting packets,
			 * reset the timeout.
			 */
			reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
			break;
		case TCP_CLOSE:
			/*
			 * don't touch the timer.
			 */
			break;
		default:
			/*
			 *	Must check send_head, write_queue, and ack_backlog
			 * 	to determine which timeout to use.
			 */
			if (sk->send_head || skb_peek(&sk->write_queue) != NULL || sk->ack_backlog) {
				reset_timer(sk, TIME_WRITE, sk->rto);
			} else if (sk->keepopen) {
				reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
			} else {
				delete_timer(sk);
			}
			break;
		}
	}

	/*
	 *	If there are no packets being sent then kick out any partial frames
	 *	that could go out. This is probably the right place to put the
	 *	partial send overlapping right edge of window bug fix.
	 */
	 
	if (sk->packets_out == 0 && sk->partial != NULL &&
		skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) 
	{
		flag |= 1;
		tcp_send_partial(sk);
	}

	/*
	 *	In the LAST_ACK case, the other end FIN'd us.  We then FIN'd them, and
	 *	we are now waiting for an acknowledge to our FIN.  The other end is
	 *	already in TIME_WAIT.
	 *
	 *	Move to TCP_CLOSE on success.
	 */

	if (sk->state == TCP_LAST_ACK) 
	{
		if (!sk->dead)
			sk->state_change(sk);
		if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq) 
		{
			flag |= 1;
			tcp_set_state(sk,TCP_CLOSE);
			sk->shutdown = SHUTDOWN_MASK;
		}
	}

	/*
	 * Incoming ACK to a FIN we sent in the case of our initiating the close.
	 *
	 * Move to FIN_WAIT2 to await a FIN from the other end. Set
	 * SEND_SHUTDOWN but not RCV_SHUTDOWN as data may still be coming in.
	 */

	if (sk->state == TCP_FIN_WAIT1) 
	{

		if (!sk->dead) 
			sk->state_change(sk);
		if (sk->rcv_ack_seq == sk->write_seq) 
		{
			flag |= 1;
			sk->shutdown |= SEND_SHUTDOWN;
			tcp_set_state(sk,TCP_FIN_WAIT2);
		}
	}

	/*
	 *	Incoming ACK to a FIN we sent in the case of a simultaneous close.
	 *
	 *	Move to TIME_WAIT
	 */

	if (sk->state == TCP_CLOSING) 
	{

		if (!sk->dead) 
			sk->state_change(sk);
		if (sk->rcv_ack_seq == sk->write_seq) 
		{
			flag |= 1;
			tcp_time_wait(sk);
		}
	}

	/*
	 * I make no guarantees about the first clause in the following
	 * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
	 * what conditions "!flag" would be true.  However I think the rest
	 * of the conditions would prevent that from causing any
	 * unnecessary retransmission. 
	 *   Clearly if the first packet has expired it should be 
	 * retransmitted.  The other alternative, "flag&2 && retransmits", is
	 * harder to explain:  You have to look carefully at how and when the
	 * timer is set and with what timeout.  The most recent transmission always
	 * sets the timer.  So in general if the most recent thing has timed
	 * out, everything before it has as well.  So we want to go ahead and
	 * retransmit some more.  If we didn't explicitly test for this
	 * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
	 * would not be true.  If you look at the pattern of timing, you can
	 * show that rto is increased fast enough that the next packet would
	 * almost never be retransmitted immediately.  Then you'd end up
	 * waiting for a timeout to send each packet on the retransmission
	 * queue.  With my implementation of the Karn sampling algorithm,
	 * the timeout would double each time.  The net result is that it would
	 * take a hideous amount of time to recover from a single dropped packet.
	 * It's possible that there should also be a test for TIME_WRITE, but
	 * I think as long as "send_head != NULL" and "retransmit" is on, we've
	 * got to be in real retransmission mode.
	 *   Note that tcp_do_retransmit is called with all==1.  Setting cong_window
	 * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
	 * As long as no further losses occur, this seems reasonable.
	 */
	
	if (((!flag) || (flag&4)) && sk->send_head != NULL &&
	       (((flag&2) && sk->retransmits) ||
	       (sk->send_head->when + sk->rto < jiffies))) 
	{
		tcp_do_retransmit(sk, 1);
		reset_timer(sk, TIME_WRITE, sk->rto);
	}

	return(1);
}


/*
 *	This routine handles the data.  If there is room in the buffer,
 *	it will be have already been moved into it.  If there is no
 *	room, then we will just have to discard the packet.
 */

static int tcp_data(sk_buff *skb, struct sock *sk, struct tcphdr *th,
	 unsigned long saddr, unsigned short len)
{
	sk_buff *skb1, *skb2;
	int dup_dumped=0;
	unsigned long new_seq;

	skb->h.th=(struct tcphdr *)skb_pull(skb,th->doff*4,NULL);/* Pull off tcp header and options */
	
	skb_trim(skb,len -(th->doff*4));

	/* The bytes in the receive read/assembly queue has increased. Needed for the
	   low memory discard algorithm */
	   
	sk->bytes_rcv += skb->len;
	
	if (skb->len == 0 && !th->fin && !th->urg && !th->psh) 
	{
		/* 
		 *	Don't want to keep passing ack's back and forth. 
		 *	(someone sent us dataless, boring frame)
		 */
		if (!th->ack)
			tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr);
		kfree_skb(skb, FREE_READ);
		return(0);
	}
	
	/*
	 *	We no longer have anyone receiving data on this connection.
	 */

	if(sk->shutdown & RCV_SHUTDOWN)
	{
		new_seq= th->seq + skb->len + th->syn;	/* Right edge of _data_ part of frame */
		
		if(after(new_seq,sk->acked_seq+1))	/* If the right edge of this frame is after the last copied byte
							   then it contains data we will never touch. We send an RST to 
							   ensure the far end knows it never got to the application */
		{
			sk->acked_seq = new_seq + th->fin;
			tcp_reset(sk->saddr, sk->daddr, skb->h.th,
				sk->prot, NULL, skb->dev, sk->opt.ip.tos, sk->opt.ip.ttl);
			tcp_statistics.TcpEstabResets++;
			tcp_set_state(sk,TCP_CLOSE);
			sk->err = EPIPE;
			sk->shutdown = SHUTDOWN_MASK;
			kfree_skb(skb, FREE_READ);
			if (!sk->dead)
				sk->state_change(sk);
			return(0);
		}
	}
	/*
	 * 	Now we have to walk the chain, and figure out where this one
	 * 	goes into it.  This is set up so that the last packet we received
	 * 	will be the first one we look at, that way if everything comes
	 * 	in order, there will be no performance loss, and if they come
	 * 	out of order we will be able to fit things in nicely.
	 */

	/* 
	 *	This starts at the last one, and then go around forwards. A lot of things
	 *	would perform much better if we pulled and trimmed all the surplus data off
	 *	when we queued the frames and also discarded any junk. - FIXME.
	 */

	if (skb_peek(&sk->receive_queue) == NULL) 	/* Empty queue is easy case */
	{
		skb_queue_head(&sk->receive_queue,skb);
		skb1= NULL;
	} 
	else
	{
		for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) 
		{
			if(sk->debug)
			{
				printk("skb1=%p :", skb1);
				printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
				printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
				printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
						sk->acked_seq);
			}
			
			/*
			 *	Optimisation: Duplicate frame or extension of previous frame from
			 *	same sequence point (lost ack case).
			 *	The frame contains duplicate data or replaces a previous frame
			 *	discard the previous frame (safe as sk->inuse is set) and put
			 *	the new one in its place.
			 */
			 
			if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
			{
				skb_append(skb1,skb);
				skb_unlink(skb1);
				kfree_skb(skb1,FREE_READ);
				dup_dumped=1;
				skb1=NULL;
				break;
			}
			
			/*
			 *	Found where it fits
			 */
			 
			if (after(th->seq+1, skb1->h.th->seq))
			{
				skb_append(skb1,skb);
				break;
			}
			
			/*
			 *	See if we've hit the start. If so insert.
			 */
			 
			if (skb1 == skb_peek(&sk->receive_queue))
			{
				skb_queue_head(&sk->receive_queue, skb);
				break;
			}
		}
  	}

	/*
	 *	Figure out what the ack value for this frame is
	 */
	 
 	th->ack_seq = th->seq + skb->len;
 	if (th->syn) 
 		th->ack_seq++;
 	if (th->fin)
 		th->ack_seq++;

	/*
	 *	Check for accidents.
	 */
	 
	if (before(sk->acked_seq, sk->copied_seq)) 
	{
		printk("*** tcp.c:tcp_data bug acked < copied\n");
		sk->acked_seq = sk->copied_seq;
	}

	/*
	 *	Now figure out if we can ack anything.
	 */

	if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) 
	{
		/*
		 *	This lot is way way way too slow and complex for what it does.
		 */
		 
		if (before(th->seq, sk->acked_seq+1)) 
		{
			int newwindow;

			if (after(th->ack_seq, sk->acked_seq)) 
			{
				newwindow = sk->window-(th->ack_seq - sk->acked_seq);
				if (newwindow < 0)
					newwindow = 0;	
				sk->window = newwindow;
				sk->acked_seq = th->ack_seq;
			}
			skb->acked = 1;

			/* 
			 *	When we ack the fin, we turn on the RCV_SHUTDOWN flag.
			 */

			if (skb->h.th->fin) 
			{
				if (!sk->dead) 
					sk->state_change(sk);
				sk->shutdown |= RCV_SHUTDOWN;
			}
	  
			for(skb2 = skb->next;
			    skb2 != (sk_buff *)&sk->receive_queue;
			    skb2 = skb2->next) 
			{
				if (before(skb2->h.th->seq, sk->acked_seq+1)) 
				{
					if (after(skb2->h.th->ack_seq, sk->acked_seq))
					{
						newwindow = sk->window -
						 (skb2->h.th->ack_seq - sk->acked_seq);
						if (newwindow < 0)
							newwindow = 0;	
						sk->window = newwindow;
						sk->acked_seq = skb2->h.th->ack_seq;
					}
					skb2->acked = 1;
					/*
					 * 	When we ack the fin, we turn on
					 * 	the RCV_SHUTDOWN flag.
					 */
					if (skb2->h.th->fin) 
					{
						sk->shutdown |= RCV_SHUTDOWN;
						if (!sk->dead)
							sk->state_change(sk);
					}

					/*
					 *	Force an immediate ack.
					 */
					 
					sk->ack_backlog = sk->max_ack_backlog;
				}
				else
				{
					break;
				}
			}

			/*
			 *	This also takes care of updating the window.
			 *	This if statement needs to be simplified.
			 */
			if (!sk->delay_acks ||
			    sk->ack_backlog >= sk->max_ack_backlog || 
			    sk->bytes_rcv > sk->max_unacked || th->fin) {
	/*			tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */
			}
			else 
			{
				sk->ack_backlog++;
				if(sk->debug)
					printk("Ack queued.\n");
				reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
			}
		}
	}

	/*
	 *	If we've missed a packet, send an ack.
	 *	Also start a timer to send another.
	 */
	 
	if (!skb->acked) 
	{
	
	/*
	 *	This is important.  If we don't have much room left,
	 *	we need to throw out a few packets so we have a good
	 *	window.  Note that mtu is used, not mss, because mss is really
	 *	for the send side.  He could be sending us stuff as large as mtu.
	 */
		 
		while (sock_rspace(sk) < sk->mtu) 
		{
			skb1 = skb_peek(&sk->receive_queue);
			if (skb1 == NULL) 
			{
				printk("INET: tcp.c:tcp_data memory leak detected.\n");
				break;
			}

			/*
			 *	Don't throw out something that has been acked. 
			 */
		 
			if (skb1->acked) 
			{
				break;
			}
		
			skb_unlink(skb1);
			kfree_skb(skb1, FREE_READ);
		}
		tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
		sk->ack_backlog++;
		reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
	}
	else
	{
		/* We missed a packet.  Send an ack to try to resync things. */
		tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
	}

	/*
	 *	Now tell the user we may have some data. 
	 */
	 
	if (!sk->dead) 
	{
        	if(sk->debug)
        		printk("Data wakeup.\n");
		sk->data_ready(sk,0);
	} 
	return(0);
}

/*
 *	Check urgent data in a frame.
 */

static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
{
	unsigned long ptr = ntohs(th->urg_ptr);

	if (ptr)
		ptr--;
	ptr += th->seq;

	/* ignore urgent data that we've already seen and read */
	if (after(sk->copied_seq+1, ptr))
		return;

	/* do we already have a newer (or duplicate) urgent pointer? */
	if (sk->urg_data && !after(ptr, sk->urg_seq))
		return;

	/* tell the world about our new urgent pointer */
	if (sk->proc != 0) {
		if (sk->proc > 0) {
			kill_proc(sk->proc, SIGURG, 1);
		} else {
			kill_pg(-sk->proc, SIGURG, 1);
		}
	}
	sk->urg_data = URG_NOTYET;
	sk->urg_seq = ptr;
}

/*
 *	Process a frame with URG set.
 */
 
static inline int tcp_urg(struct sock *sk, struct tcphdr *th,
	unsigned long saddr, unsigned long len)
{
	unsigned long ptr;

	/* check if we get a new urgent pointer */
	if (th->urg)
		tcp_check_urg(sk,th);

	/* do we wait for any urgent data? */
	if (sk->urg_data != URG_NOTYET)
		return 0;

	/* is the urgent pointer pointing into this packet? */
	ptr = sk->urg_seq - th->seq + th->doff*4;
	if (ptr >= len)
		return 0;

	/* ok, got the correct packet, update info */
	sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
	if (!sk->dead)
		sk->data_ready(sk,0);
	return 0;
}


/*
 *  This deals with incoming fins. 'Linus at 9 O'clock' 8-) 
 *
 *  If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
 *  (and thence onto LAST-ACK and finally, CLOSE, we never enter
 *  TIME-WAIT)
 *
 *  If we are in FINWAIT-1, a received FIN indicates simultaneous
 *  close and we go into CLOSING (and later onto TIME-WAIT)
 *
 *  If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
 *
 */
 
static int tcp_fin(sk_buff *skb, struct sock *sk, struct tcphdr *th, 
	 unsigned long saddr, struct device *dev)
{
	sk->fin_seq = th->seq + skb->len + th->syn + th->fin;

	if (!sk->dead) 
	{
		sk->state_change(sk);
	}

	switch(sk->state) 
	{
		case TCP_SYN_RECV:
		case TCP_SYN_SENT:
		case TCP_ESTABLISHED:
			/*
			 * move to CLOSE_WAIT, tcp_data() already handled
			 * sending the ack.
			 */
			reset_timer(sk, TIME_CLOSE, TCP_TIMEOUT_LEN);
			tcp_set_state(sk,TCP_CLOSE_WAIT);
			if (th->rst)
				sk->shutdown = SHUTDOWN_MASK;
			break;

		case TCP_CLOSE_WAIT:
		case TCP_CLOSING:
			/*
			 * received a retransmission of the FIN, do
			 * nothing.
			 */
			break;
		case TCP_TIME_WAIT:
			/*
			 * received a retransmission of the FIN,
			 * restart the TIME_WAIT timer.
			 */
			reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
			return(0);
		case TCP_FIN_WAIT1:
			/*
			 * This case occurs when a simultaneous close
			 * happens, we must ack the received FIN and
			 * enter the CLOSING state.
			 *
			 * XXX timeout not set properly
			 */

			tcp_statistics.TcpCurrEstab--;
			reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
			tcp_set_state(sk,TCP_CLOSING);
			break;
		case TCP_FIN_WAIT2:
			/*
			 * received a FIN -- send ACK and enter TIME_WAIT
			 */
			reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
			sk->shutdown |= SHUTDOWN_MASK;
			tcp_set_state(sk,TCP_TIME_WAIT);
			break;
		case TCP_CLOSE:
			/*
			 * already in CLOSE
			 */
			break;
		default:
			tcp_set_state(sk,TCP_LAST_ACK);
	
			/* Start the timers. */
			reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
			return(0);
	}
	sk->ack_backlog++;

	return(0);
}


/*
 *	This will accept the next outstanding connection. 
 */
 
static struct sock *tcp_accept(struct sock *sk, int flags)
{
	struct sock *newsk;
	sk_buff *skb;
  
	/*
	 * We need to make sure that this socket is listening,
	 * and that it has something pending.
	 */
	
	if (sk->state != TCP_LISTEN) 
	{
		sk->err = EINVAL;
		return(NULL); 
	}

	/* Avoid the race. */
	cli();
	sk->inuse = 1;

	while((skb = tcp_dequeue_established(sk)) == NULL) 
	{
		if (flags & O_NONBLOCK) 
		{
			sti();
			release_sock(sk);
			sk->err = EAGAIN;
			return(NULL);
		}

		release_sock(sk);
		interruptible_sleep_on(sk->sleep);
		if (current->signal & ~current->blocked) 
		{
			sti();
			sk->err = ERESTARTSYS;
			return(NULL);
		}
		sk->inuse = 1;
  	}
	sti();

	/*
	 *	Now all we need to do is return skb->sk. 
	 */

	newsk = skb->sk;

	sk->err=0;		/* Not ideal - FIXME properly */

	kfree_skb(skb, FREE_READ);
	sk->ack_backlog--;
	release_sock(sk);
	return(newsk);
}


/*
 *	This will initiate an outgoing connection. 
 */
 
static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
{
	sk_buff *buff;
	struct device *dev=NULL;
	unsigned char *ptr;
	int tmp;
	struct tcphdr *t1;
	struct rtable *rt;

	if (sk->state != TCP_CLOSE) 
		return(-EISCONN);

	/*
	 *	Don't want a TCP connection going to a broadcast or multicast address 
	 */

	tmp=ip_chk_addr(usin->sin_addr.s_addr);
	
	if(tmp == IS_BROADCAST || tmp == IS_MULTICAST) 
	{ 
		return -EADDRNOTAVAIL;	/* So sayeth my BSD book */
	}
  
  	sk->saddr=ip_get_saddr(sk->saddr, usin->sin_addr.s_addr, sk->localroute);

	/*
	 *	Connect back to the same socket: Blows up so disallow it 
	 */

	if(sk->saddr == usin->sin_addr.s_addr && sk->num==ntohs(usin->sin_port))
		return -EBUSY;

	rt=ip_rt_route(usin->sin_addr.s_addr, NULL, NULL);
	if(rt==NULL)
		return -ENETUNREACH;
		
	dev=rt->rt_dev;

	sk->inuse = 1;
	sk->daddr = usin->sin_addr.s_addr;
	sk->write_seq = jiffies * SEQ_TICK - seq_offset;
	sk->window_seq = sk->write_seq;
	sk->rcv_ack_seq = sk->write_seq -1;
	sk->err = 0;
	sk->opt.ip.dummy_th.dest = usin->sin_port;
	release_sock(sk);

	buff = sock_wmalloc(sk,protocol_size(&proto_tcp)+4,0, GFP_KERNEL);
	if (buff == NULL) 
	{
		return(-ENOBUFS);
	}
	protocol_adjust(buff,&proto_tcp);
	sk->inuse = 1;
	buff->sk = sk;
	buff->free = 1;
	buff->localroute = sk->localroute;

	t1 = (struct tcphdr *)skb_push(buff,sizeof(struct tcphdr));
	
	/*
	 *	Put in the IP header and routing stuff. 
	 */
	 
	memcpy(t1,(void *)&(sk->opt.ip.dummy_th), sizeof(*t1));
	t1->seq = ntohl(sk->write_seq++);
	sk->sent_seq = sk->write_seq;
	buff->h.seq = sk->write_seq;
	t1->ack = 0;
	t1->window = 2;
	t1->res1=0;
	t1->res2=0;
	t1->rst = 0;
	t1->urg = 0;
	t1->psh = 0;
	t1->syn = 1;
	t1->urg_ptr = 0;
	t1->doff = 6;
	/* use 512 or whatever user asked for */
	
	if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
		sk->window_clamp=rt->rt_window;
	else
		sk->window_clamp=0;

	if (sk->user_mss)
		sk->mtu = sk->user_mss;
	else if(rt!=NULL && (rt->rt_flags&RTF_MSS))
		sk->mtu = rt->rt_mss;
	else 
	{
#ifdef CONFIG_INET_SNARL
		if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr))
#else
		if ((sk->saddr ^ sk->daddr) & dev->pa_mask)
#endif
			sk->mtu = 576 - HEADER_SIZE;
		else
			sk->mtu = MAX_WINDOW;
	}
	/*
	 *	but not bigger than device MTU 
	 */

	if(sk->mtu <32)
		sk->mtu = 32;	/* Sanity limit */
		
	sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
	
	/*
	 *	Put in the TCP options to say MTU. 
	 */

	ptr = skb_put(buff,4);
	ptr[0] = 2;
	ptr[1] = 4;
	ptr[2] = (sk->mtu) >> 8;
	ptr[3] = (sk->mtu) & 0xff;
	tcp_send_check(t1, sk->saddr, sk->daddr,
		  sizeof(struct tcphdr) + 4, sk);

	/*
	 *	This must go first otherwise a really quick response will get reset. 
	 */

	tcp_set_state(sk,TCP_SYN_SENT);
	sk->rto = TCP_TIMEOUT_INIT;
	reset_timer(sk, TIME_WRITE, sk->rto);	/* Timer for repeating the SYN until an answer */
	sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
	buff->thptr=t1;
	
	buff->dev=NULL;
	tcp_queue_retransmit(sk,buff);
	if((tmp=proto_ip.output(&proto_ip, buff, ETH_P_IP, IPPROTO_TCP, &sk->saddr, &sk->daddr, &sk->opt.ip))==0)  
	{
		tcp_statistics.TcpActiveOpens++;
		tcp_statistics.TcpOutSegs++;
	}
	else
		sk->err=tmp;
	release_sock(sk);
	return(0);
}

/*
 *	This functions checks to see if the tcp header is actually acceptable. 
 */
 
static int tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
	     struct options *opt, unsigned long saddr, struct device *dev)
{
	unsigned long next_seq;

	next_seq = len - 4*th->doff;
	if (th->fin)
		next_seq++;
		
	/* 
	 *	If we have a zero window, we can't have any data in the packet.. 
	 *	FIXME: It would appear permitted to indicate an URG frame this way. (Check)
	 */
	 
	if (next_seq && !sk->window)
		goto ignore_it;
	next_seq += th->seq;

	/*
	 *	This isn't quite right.  sk->acked_seq could be more recent
	 *	than sk->window.  This is however close enough.  We will accept
	 *	slightly more packets than we should, but it should not cause
	 *	problems unless someone is trying to forge packets.
	 */

	/*
	 *	Have we already seen all of this packet? 
	 */
	 
	if (!after(next_seq+1, sk->acked_seq))
		goto ignore_it;
	/*
	 *	Or does it start beyond the window? 
	 */
	 
	if (!before(th->seq, sk->acked_seq + sk->window + 1))
		goto ignore_it;

	/*
	 *	Ok, at least part of this packet would seem interesting.. 
	 */
	return 1;

ignore_it:
	if (th->rst)
		return 0;

	/*
	 *	Send a reset if we get something not ours and we are
	 *	unsynchronized. Note: We don't do anything to our end. We
	 *	are just killing the bogus remote connection then we will
	 *	connect again and it will work (with luck).
	 */
  	 
	if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) 
	{
		tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->opt.ip.tos,sk->opt.ip.ttl);
		return 1;
	}

	/*
	 *	Try to resync things. 
	 */
	 
	tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
	return 0;
}


#ifdef TCP_FASTPATH
/*
 *	Is the end of the queue clear of fragments as yet unmerged into the data stream
 *	Yes if
 *	a) The queue is empty
 *	b) The last frame on the queue has the acked flag set
 */

static inline int tcp_clean_end(struct sock *sk)
{
	sk_buff *skb=skb_peek(&sk->receive_queue);
	if(skb==NULL || sk->receive_queue.prev->acked)
		return 1;
}

#endif


int tcp_input(struct protocol *p, struct protocol *below, sk_buff *skb, void *saddr, void *daddr)
{
	if(below!=pr_icmp)
		return tcp_rcv(skb, skb->dev, NULL, *(long *)daddr, skb->len, *(long *)saddr, 0, NULL);
	tcp_err(skb);
	return 0;
}

int tcp_rcv(sk_buff *skb, struct device *dev, struct options *opt,
	unsigned long daddr, unsigned short len,
	unsigned long saddr, int redo, struct inet_protocol * protocol)
{
	struct tcphdr *th;
	struct sock *sk;

	if (!skb) 
	{
		return(0);
	}

	if (!dev) 
	{
		return(0);
	}
  
	tcp_statistics.TcpInSegs++;
  
	if(skb->pkt_type!=PACKET_HOST)
	{
	  	kfree_skb(skb,FREE_READ);
	  	return(0);
	}
  
	th = (struct tcphdr *)skb_data(skb);	/* IP header is already pulled off */

	/*
	 *	Find the socket.
	 */

	sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);

	/*
	 *	If this socket has got a reset its to all intents and purposes 
  	 *	really dead 
  	 */
  	 
	if (sk!=NULL && sk->zapped)
		sk=NULL;

	if (!redo) 
	{
		if (tcp_check(th, len, saddr, daddr )) 
		{
			skb->sk = NULL;
			kfree_skb(skb,FREE_READ);
			/*
			 * We don't release the socket because it was
			 * never marked in use.
			 */
			return(0);
		}
		th->seq = ntohl(th->seq);

		/* See if we know about the socket. */
		if (sk == NULL) 
		{
			if (!th->rst)
				tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
			skb->sk = NULL;
			kfree_skb(skb, FREE_READ);
			return(0);
		}

		skb_trim(skb,len);
		skb->sk = sk;
		skb->acked = 0;
		skb->used = 0;
		skb->free = 0;
		skb->saddr = daddr;
		skb->daddr = saddr;
	
		/* We may need to add it to the backlog here. */
		cli();
		if (sk->inuse) 
		{
			skb_queue_head(&sk->back_log, skb);
			sti();
			return(0);
		}
		sk->inuse = 1;
		sti();
	}
	else
	{
		if (!sk) 
		{
			return(0);
		}
	}


	if (!sk->prot) 
	{
		return(0);
	}


	/*
	 *	Charge the memory to the socket. 
	 */
	 
	if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) 
	{
		skb->sk = NULL;
		kfree_skb(skb, FREE_READ);
		release_sock(sk);
		return(0);
	}

	sk->rmem_alloc += skb->mem_len;

#ifdef TCP_FASTPATH
/*
 *	Incoming data stream fastpath. 
 *
 *	We try to optimise two things.
 *	1) Spot general data arriving without funny options and skip extra checks and the switch.
 *	2) Spot the common case in raw data receive streams of a packet that has no funny options,
 *	fits exactly on the end of the current queue and may or may not have the ack bit set.
 *
 *	Case two especially is done inline in this routine so there are no long jumps causing heavy
 *	cache thrashing, no function call overhead (except for the ack sending if needed) and for
 *	speed although further optimizing here is possible.
 */
 
	/* I'm trusting gcc to optimise this sensibly... might need judicious application of a software mallet */
	if(!(sk->shutdown & RCV_SHUTDOWN) && sk->state==TCP_ESTABLISHED && !th->urg && !th->syn && !th->fin && !th->rst)
	{	
		/* Packets in order. Fits window */
		if(th->seq == sk->acked_seq+1 && sk->window && tcp_clean_end(sk))
		{
			/* Ack is harder */
			if(th->ack && !tcp_ack(sk, th, saddr, len))
			{
				kfree_skb(skb, FREE_READ);
				release_sock(sk);
				return 0;
			}
			/*
			 *	Set up variables
			 */
			
			/* Take off the tch header */
			 
			skb_pull(skb,th->doff*4,NULL);
			
			sk->bytes_rcv += skb->len;
			tcp_rx_hit2++;
			if(skb->len)
			{
				skb_queue_tail(&sk->receive_queue,skb);	/* We already know where to put it */
				if(sk->window >= skb->len)
					sk->window-=skb->len;			/* We know its effect on the window */
				else
					sk->window=0;
				sk->acked_seq = th->seq+skb->len;	/* Easy */
				skb->acked=1;				/* Guaranteed true */
				if(!sk->delay_acks || sk->ack_backlog >= sk->max_ack_backlog || 
					sk->bytes_rcv > sk->max_unacked)
				{
					tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th , saddr);
				}
				else
				{
					sk->ack_backlog++;
					reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
				}
				if(!sk->dead)
					sk->data_ready(sk,0);
				release_sock(sk);
				return 0;
			}
		}
		/*
		 *	More generic case of arriving data stream in ESTABLISHED
		 */
		tcp_rx_hit1++;
		if(!tcp_sequence(sk, th, len, opt, saddr, dev))
		{
			kfree_skb(skb, FREE_READ);
			release_sock(sk);
			return 0;
		}
		if(th->ack && !tcp_ack(sk, th, saddr, len))
		{
			kfree_skb(skb, FREE_READ);
			release_sock(sk);
			return 0;
		}
		if(tcp_data(skb, sk, th, saddr, len))
			kfree_skb(skb, FREE_READ);
		release_sock(sk);
		return 0;
	}
	tcp_rx_miss++;
#endif	

	/*
	 *	Now deal with all cases.
	 */
	 
	switch(sk->state) 
	{
	
		/*
		 * This should close the system down if it's waiting
		 * for an ack that is never going to be sent.
		 */
		case TCP_LAST_ACK:
			if (th->rst) 
			{
				sk->zapped=1;
				sk->err = ECONNRESET;
 				tcp_set_state(sk,TCP_CLOSE);
				sk->shutdown = SHUTDOWN_MASK;
				if (!sk->dead) 
				{
					sk->state_change(sk);
				}
				kfree_skb(skb, FREE_READ);
				release_sock(sk);
				return(0);
			}

		case TCP_ESTABLISHED:
		case TCP_CLOSE_WAIT:
		case TCP_CLOSING:
		case TCP_FIN_WAIT1:
		case TCP_FIN_WAIT2:
		case TCP_TIME_WAIT:
			if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
			{
				kfree_skb(skb, FREE_READ);
				release_sock(sk);
				return(0);
			}

			if (th->rst) 
			{
				tcp_statistics.TcpEstabResets++;
				sk->zapped=1;
				/* This means the thing should really be closed. */
				sk->err = ECONNRESET;
				if (sk->state == TCP_CLOSE_WAIT) 
				{
					sk->err = EPIPE;
				}
	
				/*
				 * A reset with a fin just means that
				 * the data was not all read.
				 */
				tcp_set_state(sk,TCP_CLOSE);
				sk->shutdown = SHUTDOWN_MASK;
				if (!sk->dead) 
				{
					sk->state_change(sk);
				}
				kfree_skb(skb, FREE_READ);
				release_sock(sk);
				return(0);
			}
			if (th->syn) 
			{
				tcp_statistics.TcpEstabResets++;
				sk->err = ECONNRESET;
				tcp_set_state(sk,TCP_CLOSE);
				sk->shutdown = SHUTDOWN_MASK;
				tcp_reset(daddr, saddr,  th, sk->prot, opt,dev, sk->opt.ip.tos,sk->opt.ip.ttl);
				if (!sk->dead) {
					sk->state_change(sk);
				}
				kfree_skb(skb, FREE_READ);
				release_sock(sk);
				return(0);
			}
	
			if (th->ack && !tcp_ack(sk, th, saddr, len)) {
				kfree_skb(skb, FREE_READ);
				release_sock(sk);
				return(0);
			}
	
			if (tcp_urg(sk, th, saddr, len)) {
				kfree_skb(skb, FREE_READ);
				release_sock(sk);
				return(0);
			}

	
			if (tcp_data(skb, sk, th, saddr, len)) {
				kfree_skb(skb, FREE_READ);
				release_sock(sk);
				return(0);
			}	

			if (th->fin && tcp_fin(skb, sk, th, saddr, dev)) {
				kfree_skb(skb, FREE_READ);
				release_sock(sk);
				return(0);
			}
	
			release_sock(sk);
			return(0);
		
		case TCP_CLOSE:
			if (sk->dead || sk->daddr) {
				kfree_skb(skb, FREE_READ);
					release_sock(sk);
				return(0);
			}
	
			if (!th->rst) {
				if (!th->ack)
					th->ack_seq = 0;
				tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->opt.ip.tos,sk->opt.ip.ttl);
			}
			kfree_skb(skb, FREE_READ);
			release_sock(sk);
				return(0);
	
		case TCP_LISTEN:
			if (th->rst) {
				kfree_skb(skb, FREE_READ);
				release_sock(sk);
				return(0);
			}
			if (th->ack) {
				tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->opt.ip.tos,sk->opt.ip.ttl);
				kfree_skb(skb, FREE_READ);
				release_sock(sk);
				return(0);
			}
	
			if (th->syn) 
			{
				/*
				 * Now we just put the whole thing including
				 * the header and saddr, and protocol pointer
				 * into the buffer.  We can't respond until the
				 * user tells us to accept the connection.
				 */
				tcp_conn_request(sk, skb, daddr, saddr, opt, dev);
				release_sock(sk);
				return(0);
			}

			kfree_skb(skb, FREE_READ);
			release_sock(sk);
			return(0);

		case TCP_SYN_RECV:
			if (th->syn) {
				/* Probably a retransmitted syn */
				kfree_skb(skb, FREE_READ);
				release_sock(sk);
				return(0);
			}
	
	
		default:
			if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
			{
				kfree_skb(skb, FREE_READ);
				release_sock(sk);
				return(0);
			}
	
		case TCP_SYN_SENT:
			if (th->rst) 
			{
				tcp_statistics.TcpAttemptFails++;
				sk->err = ECONNREFUSED;
				tcp_set_state(sk,TCP_CLOSE);
				sk->shutdown = SHUTDOWN_MASK;
				sk->zapped = 1;
				if (!sk->dead) 
				{
					sk->state_change(sk);
				}
				kfree_skb(skb, FREE_READ);
				release_sock(sk);
				return(0);
			}
			if (!th->ack) 
			{
				if (th->syn) 
				{
					tcp_set_state(sk,TCP_SYN_RECV);
				}
				kfree_skb(skb, FREE_READ);
				release_sock(sk);
				return(0);
			}
	
			switch(sk->state) 
			{
				case TCP_SYN_SENT:
					if (!tcp_ack(sk, th, saddr, len)) 
					{
						tcp_statistics.TcpAttemptFails++;
						tcp_reset(daddr, saddr, th,
							sk->prot, opt,dev,sk->opt.ip.tos,sk->opt.ip.ttl);
						kfree_skb(skb, FREE_READ);
							release_sock(sk);
						return(0);
					}
	
					/*
					 * If the syn bit is also set, switch to
					 * tcp_syn_recv, and then to established.
					 */
					if (!th->syn) 
					{
						kfree_skb(skb, FREE_READ);
						release_sock(sk);
						return(0);
					}
	
					/* Ack the syn and fall through. */
					sk->acked_seq = th->seq+1;
					sk->fin_seq = th->seq;
					tcp_send_ack(sk->sent_seq, th->seq+1,
						sk, th, sk->daddr);
		
				case TCP_SYN_RECV:
					if (!tcp_ack(sk, th, saddr, len)) 
					{
						tcp_statistics.TcpAttemptFails++;
						tcp_reset(daddr, saddr, th,
							sk->prot, opt, dev,sk->opt.ip.tos,sk->opt.ip.ttl);
						kfree_skb(skb, FREE_READ);
						release_sock(sk);
						return(0);
					}
	
					tcp_set_state(sk,TCP_ESTABLISHED);
	
					/*
					 * 	Now we need to finish filling out
					 * 	some of the tcp header.
					 * 
					 *	We need to check for mtu info. 
					 */
					tcp_options(sk, th);
					sk->opt.ip.dummy_th.dest = th->source;
					sk->copied_seq = sk->acked_seq-1;
					if (!sk->dead) 
					{
						sk->state_change(sk);
					}
	
					/*
					 * We've already processed his first
					 * ack.  In just about all cases that
					 * will have set max_window.  This is
					 * to protect us against the possibility
					 * that the initial window he sent was 0.
					 * This must occur after tcp_options, which
					 * sets sk->mtu.
					 */
					if (sk->max_window == 0) 
					{
						sk->max_window = 32;
						sk->mss = min(sk->max_window, sk->mtu);
					}

					/*
					 * Now process the rest like we were
					 * already in the established state.
					 */
					if (th->urg) 
					{
						if (tcp_urg(sk, th, saddr, len)) 
						{ 
							kfree_skb(skb, FREE_READ);
							release_sock(sk);
							return(0);
						}
					}
					if (tcp_data(skb, sk, th, saddr, len))
						kfree_skb(skb, FREE_READ);

					if (th->fin)
						tcp_fin(skb, sk, th, saddr, dev);
					release_sock(sk);
					return(0);
			}
	
			if (th->urg) 
			{
				if (tcp_urg(sk, th, saddr, len)) 
				{
					kfree_skb(skb, FREE_READ);
					release_sock(sk);
					return(0);
				}
			}
			if (tcp_data(skb, sk, th, saddr, len)) 
			{
				kfree_skb(skb, FREE_READ);
				release_sock(sk);
				return(0);
			}
	
			if (!th->fin) 
			{
				release_sock(sk);
				return(0);
			}
			tcp_fin(skb, sk, th, saddr, dev);
			release_sock(sk);
			return(0);
	}
}


/*
 * This routine sends a packet with an out of date sequence
 * number. It assumes the other end will try to ack it.
 */

static void tcp_write_wakeup(struct sock *sk)
{
	sk_buff *buff;
	struct tcphdr *t1;

	if (sk->zapped)
		return;	/* After a valid reset we can send no more */

	/*
	 * Write data can still be transmitted/retransmitted in the
	 * following states.  If any other state is encountered, return.
	 */

	if (sk->state != TCP_ESTABLISHED && 
	    sk->state != TCP_CLOSE_WAIT &&
	    sk->state != TCP_FIN_WAIT1 && 
	    sk->state != TCP_LAST_ACK &&
	    sk->state != TCP_CLOSING
	) {
		return;
	}

	buff = sock_wmalloc(sk,protocol_size(&proto_tcp),1, GFP_ATOMIC);
	if (buff == NULL) 
		return;
	protocol_adjust(buff,&proto_tcp);

	t1 = (struct tcphdr *)skb_push(buff,sizeof(struct tcphdr));

	buff->free = 1;
	buff->sk = sk;
	buff->localroute = sk->localroute;
	memcpy(t1,(void *) &sk->opt.ip.dummy_th, sizeof(*t1));

	/*
	 * Use a previous sequence.
	 * This should cause the other end to send an ack.
	 */
	t1->seq = htonl(sk->sent_seq-1);
	t1->ack = 1; 
	t1->res1= 0;
	t1->res2= 0;
	t1->rst = 0;
	t1->urg = 0;
	t1->psh = 0;
	t1->fin = 0;
	t1->syn = 0;
	t1->ack_seq = ntohl(sk->acked_seq);
	t1->window = ntohs(tcp_select_window(sk));
	t1->doff = sizeof(*t1)/4;
	tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);

	 /*	Send it and free it.
   	  *	This will prevent the timer from automatically being restarted.
	  */

	if(proto_ip.output(&proto_ip,buff, ETH_P_IP, IPPROTO_TCP, &sk->saddr, &sk->daddr, &sk->opt.ip)==0)
		tcp_statistics.TcpOutSegs++;
}

void
tcp_send_probe0(struct sock *sk)
{
	if (sk->zapped)
		return;		/* After a valid reset we can send no more */

	tcp_write_wakeup(sk);

	sk->backoff++;
	sk->rto = min(sk->rto << 1, 120*HZ);
	reset_timer (sk, TIME_PROBE0, sk->rto);
	sk->retransmits++;
	sk->prot->retransmits ++;
}

/*
 *	Socket option code for TCP. 
 */
  
int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
{
	int val,err;

	if(level!=SOL_TCP)
		return ip_setsockopt(sk,level,optname,optval,optlen);

  	if (optval == NULL) 
  		return(-EINVAL);

  	err=verify_area(VERIFY_READ, optval, sizeof(int));
  	if(err)
  		return err;
  	
  	val = get_fs_long((unsigned long *)optval);

	switch(optname)
	{
		case TCP_MAXSEG:
/*
 * values greater than interface MTU won't take effect.  however at
 * the point when this call is done we typically don't yet know
 * which interface is going to be used
 */
	  		if(val<1||val>MAX_WINDOW)
				return -EINVAL;
			sk->user_mss=val;
			return 0;
		case TCP_NODELAY:
			sk->nonagle=(val==0)?0:1;
			return 0;
		default:
			return(-ENOPROTOOPT);
	}
}

int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
{
	int val,err;

	if(level!=SOL_TCP)
		return ip_getsockopt(sk,level,optname,optval,optlen);
			
	switch(optname)
	{
		case TCP_MAXSEG:
			val=sk->user_mss;
			break;
		case TCP_NODELAY:
			val=sk->nonagle;	/* Until Johannes stuff is in */
			break;
		default:
			return(-ENOPROTOOPT);
	}
	err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
	if(err)
  		return err;
  	put_fs_long(sizeof(int),(unsigned long *) optlen);

  	err=verify_area(VERIFY_WRITE, optval, sizeof(int));
  	if(err)
  		return err;
  	put_fs_long(val,(unsigned long *)optval);

  	return(0);
}	


struct proto tcp_prot = {
	tcp_close,		/* Close a TCP socket		*/
	tcp_sendmsg,		/* Now sendmsg does it all 	*/
	tcp_recvmsg,		/* Receive a TCP message	*/
	tcp_connect,		/* Connect a TCP socket		*/
	tcp_accept,		/* Accept a TCP connection	*/
	tcp_retransmit,		/* Retransmit TCP frames	*/
	tcp_write_wakeup,	/* Write wakeup			*/
	tcp_read_wakeup,	/* Read wakeup			*/
	tcp_rcv,		/* Used for resubmitting tcp	*/
	tcp_select,		/* Select is harder for TCP	*/
	tcp_ioctl,		/* TCP ioctl calls		*/
	NULL,			/* No initialiser		*/
	tcp_shutdown,		/* Shutdown a TCP socket	*/
	tcp_setsockopt,		/* Set socket options		*/
	tcp_getsockopt,		/* Get socket options		*/
	128,			/* Maximum header size		*/
	0,			/* Number of retransmits	*/
	{NULL,},		/* Socket array 		*/
	"TCP"			/* Name of protocol		*/
};

static int tcp_get_binding(int protocol, int subid, unsigned char *key)
{
	return -EAFNOSUPPORT;
}

struct protocol proto_tcp=
{
	NULL,
	"TCP",
	sizeof(struct tcphdr),
	0,
	sizeof(struct tcphdr),
	0,
	NULL,
	protocol_defer,
	tcp_input,
	default_protocol_control,
	tcp_get_binding,
	NULL
};

/*
 *	Called from IP init to initialise the TCP protocol layers
 */

void tcp_init(void)
{
	struct protocol *pr=protocol_find("IP");
	if(pr==NULL)
	{
		printk("TCP: Cannot find IP in order to bind.\n");
		return;
	}
	protocol_register(&proto_tcp);
	protocol_bind(pr,&proto_tcp, ETH_P_IP, IPPROTO_TCP);
	
	pr_icmp=protocol_find("ICMP");
	if(pr_icmp==NULL)
	{
		printk("TCP: Cannot find ICMP in order to bind.\n");
		return;
	}
	protocol_bind(pr_icmp,&proto_tcp, ETH_P_IP, IPPROTO_TCP);
}
