/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		The Internet Protocol (IP) module.
 *
 * Version:	@(#)ip.c	1.0.16b	9/1/93
 *
 * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *		Donald Becker, <becker@super.org>
 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
 *
 * Fixes:
 *		Alan Cox	:	Commented a couple of minor bits of surplus code
 *		Alan Cox	:	Undefining IP_FORWARD doesn't include the code
 *					(just stops a compiler warning).
 *		Alan Cox	:	Frames with >=MAX_ROUTE record routes, strict routes or loose routes
 *					are junked rather than corrupting things.
 *		Alan Cox	:	Frames to bad broadcast subnets are dumped
 *					We used to process them non broadcast and
 *					boy could that cause havoc.
 *		Alan Cox	:	ip_forward sets the free flag on the
 *					new frame it queues. Still crap because
 *					it copies the frame but at least it
 *					doesn't eat memory too.
 *		Alan Cox	:	Generic queue code and memory fixes.
 *		Fred Van Kempen :	IP fragment support (borrowed from NET2E)
 *		Gerhard Koerting:	Forward fragmented frames correctly.
 *		Gerhard Koerting: 	Fixes to my fix of the above 8-).
 *		Gerhard Koerting:	IP interface addressing fix.
 *		Linus Torvalds	:	More robustness checks
 *		Alan Cox	:	Even more checks: Still not as robust as it ought to be
 *		Alan Cox	:	Save IP header pointer for later
 *		Alan Cox	:	ip option setting
 *		Alan Cox	:	Use ip_tos/ip_ttl settings
 *		Alan Cox	:	Fragmentation bogosity removed
 *					(Thanks to Mark.Bush@prg.ox.ac.uk)
 *		Dmitry Gorodchanin :	Send of a raw packet crash fix.
 *		Alan Cox	:	Silly ip bug when an overlength
 *					fragment turns up. Now frees the
 *					queue.
 *		Linus Torvalds/ :	Memory leakage on fragmentation
 *		Alan Cox	:	handling.
 *		Gerhard Koerting:	Forwarding uses IP priority hints
 *		Teemu Rantanen	:	Fragment problems.
 *		Alan Cox	:	General cleanup, comments and reformat
 *		Alan Cox	:	SNMP statistics
 *		Alan Cox	:	BSD address rule semantics. Also see
 *					UDP as there is a nasty checksum issue
 *					if you do things the wrong way.
 *		Alan Cox	:	Always defrag, moved IP_FORWARD to the config.in file
 *		Alan Cox	: 	IP options adjust sk->priority.
 *		Alan Cox	:	Naiive use of new buffering.
 *		Gerhard Koerting:	Fixed fragmentation
 *		Alan Cox	:	Restructured ip_output. Added initial option processing.
 *		Alan Cox	:	RFC1122 commentary.
 *
 * To Fix:
 *		IP option processing is mostly not needed. ip_forward needs to know about routing rules
 *		and time stamp but that's about all. Use the route mtu field here too
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 */
#include <asm/segment.h>
#include <asm/system.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include "snmp.h"
#include "ip.h"
#include "protocol.h"
#include "route.h"
#include "tcp.h"
#include <linux/skbuff.h>
#include "sock.h"
#include "arp.h"
#include "icmp.h"





extern int last_retran;
extern void sort_send(struct sock *sk);

#define min(a,b)	((a)<(b)?(a):(b))
#define LOOPBACK(x)	(((x) & htonl(0xff000000)) == htonl(0x7f000000))

/*
 *	SNMP management statistics
 */

#ifdef CONFIG_IP_FORWARDING 
struct ip_mib ip_statistics={1,64,};	/* Forwarding=Yes, Default TTL=64 */
#else
struct ip_mib ip_statistics={0,64,};	/* Forwarding=Yes, Default TTL=64 */
#endif

#ifdef CONFIG_IP_MULTICAST

/*
 * struct for keeping the multicast list in
 */

struct ip_mc_socklist 
{
	struct sock *sk;
	struct ip_mc_socklist *next;
};

struct ip_mc_list 
{
	struct device *interface;
	struct in_addr multiaddr;
	struct ip_mc_socklist *list;
	struct ip_mc_list *next;
};
 
struct ip_mc_list *ip_mc_head=NULL;

#endif

unsigned long ip_get_saddr(unsigned long saddr,unsigned long daddr, int local)
{
	if(saddr==0)
		saddr=ip_my_addr();
	if(LOOPBACK(saddr)&&!LOOPBACK(daddr))
	{
		if(local)
			ip_rt_local(daddr, NULL, &saddr);
		else
			ip_rt_route(daddr, NULL, &saddr);
	
	}
	return saddr;
}

static int ip_output(struct protocol *self, sk_buff *skb, int type, int subtype, void *saddr, void *daddr, void *opt)
{
	int ttl;
	static struct ip_opt ip_default_opt=
	{ 
		AF_INET, 255, 0, {0,}, NULL
#ifdef CONFIG_IP_MULTICAST
			,1,1,""
#endif
	};
	struct ip_opt *ipopt=opt;
	unsigned long src;
	unsigned long dest= *(unsigned long *)daddr;	/* Destination ip address */
	struct iphdr *iph;
	struct rtable *rt;
	unsigned long srcloop;
	unsigned long raddr;

	/*
	 *	RFC 1122: 3.2.1.3   MUST not send to an address of 0.0.0.0 except for
	 *	the purposes of address discovery at boot time.
	 *
	 *	Find the source address and adjust to suit.
	 */
	 
	if(saddr!=NULL)
	{
		src= *(unsigned long *)saddr;	/* Get source ip address */
		if(src==0)
			src=ip_my_addr();
	}
	else
		src= ip_my_addr();

	/*
	 *	FIXME RFC1122: 3.2.1.8 MUST support being the source of a source route.
	 *	FIXME RFC1122: 3.2.1.8 MUST not send a frame with two source route options.
	 *
	 *	Get a suitable option set.
	 */

	if(ipopt==NULL || ipopt->family!=AF_INET)
		ipopt=&ip_default_opt;
		
	/*
	 *	Pick an initial TTL
	 */
		 
	ttl=ipopt->ttl;
#ifdef CONFIG_IP_MULTICAST	
	if(MULTICAST(dest))
		ttl=ipopt->mc_ttl;
#endif		

	/*
	 *	Find the routing table entry
	 */
	 
	if(skb->localroute)
		rt=ip_rt_local(dest, NULL, &srcloop);
	else
		rt=ip_rt_route(dest, NULL, &srcloop);	
	
	/*
	 *	RFC 1122: 3.2.1.3	MUST not send 127.x.y.z outside the local host
	 *
	 *	Network 127 -> Remote gets changed to the relevant interface address.
	 *	127.x.y.z may _never_ appear on the cable.
	 */
	 
	if(LOOPBACK(src)&&!LOOPBACK(dest))
		src=srcloop;
		
	if(rt==NULL)
	{
		ip_statistics.IpOutNoRoutes++;
		if(skb->sk)
		{
			skb->sk->err=ENETUNREACH;
			if(skb->free==1)
				sock_wfree(skb->sk,skb,skb->mem_len);
		}
		else
			kfree_skb(skb, FREE_WRITE);
		return -ENETUNREACH;
	}
	skb->dev=rt->rt_dev;
	raddr=rt->rt_gateway;
	
	/*
	 *	No gateway: Use local route
	 */
	 
	if(raddr==0)
		raddr=dest;
		
	skb->protocol=rt->rt_proto;
	skb->raddr=raddr;
#ifdef BROKEN	
	if(skb->sk)
		skb->sk->saddr=src;
#endif	
	if(subtype==IPPROTO_RAW)
	{
		/*
		 *	User provides the IP header.
		 */
		iph=(struct iphdr *)skb_data(skb);
		iph->saddr=src;
	}
	else
	{
		/*
		 *	Add an IP header.
		 */
		iph=(struct iphdr *)skb_push(skb, sizeof(*iph));
		iph->version = 4;	/* IP classic */
		iph->tos = ipopt->tos;	/* Service Type */
		iph->frag_off = 0;	/* Not a fragment */
		iph->ttl = ttl;		/* Time to live */
		iph->daddr = dest;	/* Destination */
		iph->saddr = src;	/* Source */
		iph->protocol = subtype;/* IP type */
		iph->ihl = 5;		/* No options (for now) */
	}

	/*
	 *	Pass it on to the ip output queuer.
	 */
	 
	ip_queue_xmit(skb->sk, skb, skb->free);
	return 0;
}	


/*
 *	This is a version of ip_compute_csum() optimized for IP headers, which
 *	always checksum on 4 octet boundaries.
 */

static inline unsigned short ip_fast_csum(unsigned char * buff, int wlen)
{
	unsigned long sum = 0;

	if (wlen)
	{
	unsigned long bogus;
	 __asm__("clc\n"
		"1:\t"
		"lodsl\n\t"
		"adcl %3, %0\n\t"
		"decl %2\n\t"
		"jne 1b\n\t"
		"adcl $0, %0\n\t"
		"movl %0, %3\n\t"
		"shrl $16, %3\n\t"
		"addw %w3, %w0\n\t"
		"adcw $0, %w0"
	    : "=r" (sum), "=S" (buff), "=r" (wlen), "=a" (bogus)
	    : "0"  (sum),  "1" (buff),  "2" (wlen));
	}
	return (~sum) & 0xffff;
}

/*
 * This routine does all the checksum computations that don't
 * require anything special (like copying or special headers).
 */

unsigned short ip_compute_csum(unsigned char * buff, int len)
{
	unsigned long sum = 0;

	/* Do the first multiple of 4 bytes and convert to 16 bits. */
	if (len > 3)
	{
		__asm__("clc\n"
		"1:\t"
		"lodsl\n\t"
		"adcl %%eax, %%ebx\n\t"
		"loop 1b\n\t"
		"adcl $0, %%ebx\n\t"
		"movl %%ebx, %%eax\n\t"
		"shrl $16, %%eax\n\t"
		"addw %%ax, %%bx\n\t"
		"adcw $0, %%bx"
		: "=b" (sum) , "=S" (buff)
		: "0" (sum), "c" (len >> 2) ,"1" (buff)
		: "ax", "cx", "si", "bx" );
	}
	if (len & 2)
	{
		__asm__("lodsw\n\t"
		"addw %%ax, %%bx\n\t"
		"adcw $0, %%bx"
		: "=b" (sum), "=S" (buff)
		: "0" (sum), "1" (buff)
		: "bx", "ax", "si");
	}
	if (len & 1)
	{
		__asm__("lodsb\n\t"
		"movb $0, %%ah\n\t"
		"addw %%ax, %%bx\n\t"
		"adcw $0, %%bx"
		: "=b" (sum), "=S" (buff)
		: "0" (sum), "1" (buff)
		: "bx", "ax", "si");
	}
	sum =~sum;
	return(sum & 0xffff);
}

/*
 *	Check the header of an incoming IP datagram.  This version is still used in slhc.c.
 */

int ip_csum(struct iphdr *iph)
{
	return ip_fast_csum((unsigned char *)iph, iph->ihl);
}

/*
 *	Generate a checksum for an outgoing IP datagram.
 */

static void ip_send_check(struct iphdr *iph)
{
	iph->check = 0;
	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
}

/************************ Fragment Handlers From NET2E **********************************/


/*
 *	This fragment handler is a bit of a heap. On the other hand it works quite
 *	happily and handles things quite well.
 */
 
static struct ipq *ipqueue = NULL;		/* IP fragment queue	*/

/*
 *	Create a new fragment entry. 
 */
 
static struct ipfrag *ip_frag_create(int offset, int end, sk_buff *skb, unsigned char *ptr)
{
   	struct ipfrag *fp;
 
   	fp = (struct ipfrag *) kmalloc(sizeof(struct ipfrag), GFP_ATOMIC);
   	if (fp == NULL) 
   	{
	 	printk("IP: frag_create: no memory left !\n");
	 	return(NULL);
   	}
  	memset(fp, 0, sizeof(struct ipfrag));

        /* Fill in the structure. */
	fp->offset = offset;
	fp->end = end;
	fp->len = end - offset;
	fp->skb = skb;
	fp->ptr = ptr;
 
	return(fp);
}
 
 
/*
 *	Find the correct entry in the "incomplete datagrams" queue for
 *	this IP datagram, and return the queue entry address if found.
 */
 
static struct ipq *ip_find(struct iphdr *iph)
{
	struct ipq *qp;
	struct ipq *qplast;
 
	cli();
	qplast = NULL;
	for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next) 
	{
 		if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr &&
			iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol) 
		{
			del_timer(&qp->timer);	/* So it doesn't vanish on us. The timer will be reset anyway */
 			sti();
 			return(qp);
 		}
   	}
	sti();
	return(NULL);
}
 
 
/*
 *	Remove an entry from the "incomplete datagrams" queue, either
 *	because we completed, reassembled and processed it, or because
 *	it timed out.
 */

static void ip_free(struct ipq *qp)
{
	struct ipfrag *fp;
	struct ipfrag *xp;

	/*
	 * Stop the timer for this entry. 
	 */
	 
	del_timer(&qp->timer);

	/* Remove this entry from the "incomplete datagrams" queue. */
	cli();
	if (qp->prev == NULL) 
	{
	 	ipqueue = qp->next;
	 	if (ipqueue != NULL) 
	 		ipqueue->prev = NULL;
   	} 
   	else 
   	{
 		qp->prev->next = qp->next;
 		if (qp->next != NULL) 
 			qp->next->prev = qp->prev;
   	}
 
   	/* Release all fragment data. */

   	fp = qp->fragments;
   	while (fp != NULL) 
   	{
 		xp = fp->next;
 		IS_SKB(fp->skb);
 		kfree_skb(fp->skb,FREE_READ);
 		kfree_s(fp, sizeof(struct ipfrag));
 		fp = xp;
   	}
   	
   	/* Release the MAC header. */
   	kfree_s(qp->mac, qp->maclen);
 
   	/* Release the IP header. */
   	kfree_s(qp->iph, qp->ihlen + 8);
 
   	/* Finally, release the queue descriptor itself. */
   	kfree_s(qp, sizeof(struct ipq));
   	sti();
 }
 
 
/*
 *	Oops- a fragment queue timed out.  Kill it and send an ICMP reply. 
 */
 
static void ip_expire(unsigned long arg)
{
   	struct ipq *qp;
 
   	qp = (struct ipq *)arg;

   	/*
   	 *	Send an ICMP "Fragment Reassembly Timeout" message. 
   	 */

	ip_statistics.IpReasmTimeout++;
	ip_statistics.IpReasmFails++;   	 
   	/* This if is always true... shrug */
 	if(qp->fragments!=NULL)
 		icmp_send(qp->fragments->skb,qp->fragments->skb->h.iph,ICMP_TIME_EXCEEDED,
 				ICMP_EXC_FRAGTIME, qp->dev);
 
   	/* 
   	 *	Nuke the fragment queue. 
   	 */
	ip_free(qp);
}
 
 
/*
 * 	Add an entry to the 'ipq' queue for a newly received IP datagram.
 * 	We will (hopefully :-) receive all other fragments of this datagram
 * 	in time, so we just create a queue for this datagram, in which we
 * 	will insert the received fragments at their respective positions.
 */

static struct ipq *ip_create(sk_buff *skb, struct iphdr *iph, struct device *dev)
{
  	struct ipq *qp;
  	int maclen;
  	int ihlen;

  	qp = (struct ipq *) kmalloc(sizeof(struct ipq), GFP_ATOMIC);
  	if (qp == NULL) 
  	{
		printk("IP: create: no memory left !\n");
		return(NULL);
	   	skb->dev = qp->dev;
  	}
 	memset(qp, 0, sizeof(struct ipq));

  	/*
  	 *	Allocate memory for the MAC header. 
  	 *
  	 *	FIXME: We have a maximum MAC address size limit and define 
  	 *	elsewhere. We should use it here and avoid the 3 kmalloc() calls
  	 */
  	 
  	maclen = ((unsigned long) iph) - ((unsigned long) skb_data(skb));
  	qp->mac = (unsigned char *) kmalloc(maclen, GFP_ATOMIC);
  	if (qp->mac == NULL) 
  	{
		printk("IP: create: no memory left !\n");
		kfree_s(qp, sizeof(struct ipq));
		return(NULL);
  	}

  	/* 
  	 *	Allocate memory for the IP header (plus 8 octets for ICMP). 
  	 */
  	 
  	ihlen = (iph->ihl * sizeof(unsigned long));
  	qp->iph = (struct iphdr *) kmalloc(ihlen + 8, GFP_ATOMIC);
  	if (qp->iph == NULL) 
  	{
		printk("IP: create: no memory left !\n");
		kfree_s(qp->mac, maclen);
		kfree_s(qp, sizeof(struct ipq));
		return(NULL);
  	}

  	/* Fill in the structure. */
  	memcpy(qp->mac, skb_data(skb), maclen);
 	memcpy(qp->iph, iph, ihlen + 8);
  	qp->len = 0;
  	qp->ihlen = ihlen;
  	qp->maclen = maclen;
  	qp->fragments = NULL;
  	qp->dev = dev;
	
  	/* Start a timer for this entry. */
  	qp->timer.expires = IP_FRAG_TIME;		/* about 30 seconds	*/
  	qp->timer.data = (unsigned long) qp;		/* pointer to queue	*/
  	qp->timer.function = ip_expire;			/* expire function	*/
  	init_timer(&qp->timer);
  	add_timer(&qp->timer);

  	/* Add this entry to the queue. */
  	qp->prev = NULL;
  	cli();
  	qp->next = ipqueue;
  	if (qp->next != NULL) 
  		qp->next->prev = qp;
  	ipqueue = qp;
  	sti();
  	return(qp);
}
 
 
/*
 *	See if a fragment queue is complete. 
 */
 
static int ip_done(struct ipq *qp)
{
	struct ipfrag *fp;
	int offset;
 
   	/* Only possible if we received the final fragment. */
   	if (qp->len == 0) 
   		return(0);
 
   	/* Check all fragment offsets to see if they connect. */
  	fp = qp->fragments;
   	offset = 0;
   	while (fp != NULL) 
   	{
 		if (fp->offset > offset) 
 			return(0);	/* fragment(s) missing */
 		offset = fp->end;
 		fp = fp->next;
   	}
 
   	/* All fragments are present. */
   	return(1);
 }
 
 
/* 
 *	Build a new IP datagram from all its fragments. 
 */
 
static sk_buff *ip_glue(struct ipq *qp)
{
	sk_buff *skb;
   	struct iphdr *iph;
   	struct ipfrag *fp;
   	unsigned char *ptr;
   	int count, len;
 
   	/*
   	 *	Allocate a new buffer for the datagram. 
   	 */
   	 
   	len = qp->maclen + qp->ihlen + qp->len;

	/* Use dev_alloc_skb as we want the forwarding slack. THis is really
	   an input packet, it just had to be mended before we could use it 8) */
   	if ((skb = dev_alloc_skb(len)) == NULL) 
   	{
   		ip_statistics.IpReasmFails++;
 		printk("IP: queue_glue: no memory for gluing queue 0x%X\n", (int) qp);
 		ip_free(qp);
 		return(NULL);
   	}
 
   	/* Fill in the basic details. */
   	skb->h.raw = skb_put(skb,(len - qp->maclen));
   	skb->free = 1;
 
   	/* Copy the original MAC and IP headers into the new buffer. */
   	ptr = skb->h.raw;
   	memcpy(ptr, ((unsigned char *) qp->mac), qp->maclen);
   	ptr += qp->maclen;
   	memcpy(ptr, ((unsigned char *) qp->iph), qp->ihlen);
   	ptr += qp->ihlen;
   	skb->h.raw += qp->maclen;
   	
   	count = 0;
 
   	/* Copy the data portions of all fragments into the new buffer. */
   	fp = qp->fragments;
   	while(fp != NULL) 
   	{
   		if(count+fp->len>skb->len)
   		{
   			printk("Invalid fragment list: Fragment over size.\n");
   			ip_free(qp);
   			kfree_skb(skb,FREE_WRITE);
   			ip_statistics.IpReasmFails++;
   			return NULL;
   		}
 		memcpy((ptr + fp->offset), fp->ptr, fp->len);
 		count += fp->len;
 		fp = fp->next;
   	}
 
   	/* We glued together all fragments, so remove the queue entry. */
   	ip_free(qp);
 
   	/* Done with all fragments. Fixup the new IP header. */
   	iph = skb->h.iph;
   	iph->frag_off = 0;
   	iph->tot_len = htons((iph->ihl * sizeof(unsigned long)) + count);
   	skb->ip_hdr = iph;
   	
   	ip_statistics.IpReasmOKs++;
   	return(skb);
}
 

/*
 *	Process an incoming IP datagram fragment. 
 */
 
static sk_buff *ip_defrag(struct iphdr *iph, sk_buff *skb, struct device *dev)
{
	struct ipfrag *prev, *next;
	struct ipfrag *tfp;
	struct ipq *qp;
	sk_buff *skb2;
	unsigned char *ptr;
	int flags, offset;
	int i, ihl, end;

	ip_statistics.IpReasmReqds++;
	
	/* Find the entry of this IP datagram in the "incomplete datagrams" queue. */
   	qp = ip_find(iph);
 
   	/* Is this a non-fragmented datagram? */
   	offset = ntohs(iph->frag_off);
   	flags = offset & ~IP_OFFSET;
   	offset &= IP_OFFSET;
   	if (((flags & IP_MF) == 0) && (offset == 0)) 
   	{
		if (qp != NULL)
 			ip_free(qp);	/* Huh? How could this exist?? */
 		return(skb);
   	}

   	offset <<= 3;		/* offset is in 8-byte chunks */
 
   	/*
    	 * If the queue already existed, keep restarting its timer as long
   	 * as we still are receiving fragments.  Otherwise, create a fresh
    	 * queue entry.
    	 */

	if (qp != NULL) 
	{
		del_timer(&qp->timer);
		qp->timer.expires = IP_FRAG_TIME;	/* about 30 seconds */
		qp->timer.data = (unsigned long) qp;	/* pointer to queue */
		qp->timer.function = ip_expire;		/* expire function */
		init_timer(&qp->timer);
		add_timer(&qp->timer);
	} 
	else 
	{
		/*
		 *	If we failed to create it, then discard the frame
		 */
		if ((qp = ip_create(skb, iph, dev)) == NULL) 
		{
			skb->sk = NULL;
			kfree_skb(skb, FREE_READ);
			ip_statistics.IpReasmFails++;
			return NULL;
		}
	}

   	/*
   	 *	Determine the position of this fragment. 
   	 */
   	 
   	ihl = (iph->ihl * sizeof(unsigned long));
   	end = offset + ntohs(iph->tot_len) - ihl;

   	/*
   	 *	Point into the IP datagram 'data' part.  Easy with
   	 *	the new sk_buff's isn't it !
   	 */

	skb_pull(skb,ihl,NULL);
   	ptr = skb_data(skb);
 
   	/* 
   	 *	Is this the final fragment? 
   	 */

   	if ((flags & IP_MF) == 0) 
   		qp->len = end;
 
   	/*
   	 * 	Find out which fragments are in front and at the back of us
   	 * 	in the chain of fragments so far.  We must know where to put
   	 * 	this fragment, right?
   	 */
   	 
   	prev = NULL;
   	for(next = qp->fragments; next != NULL; next = next->next) 
   	{
 		if (next->offset > offset) 
 			break;	/* bingo! */
 		prev = next;
   	}	
 
   	/*
   	 * 	We found where to put this one.
   	 * 	Check for overlap with preceding fragment, and, if needed,
   	 * 	align things so that any overlaps are eliminated.
   	 */
   	if (prev != NULL && offset < prev->end) 
   	{
 		i = prev->end - offset;
 		offset += i;	/* ptr into datagram */
 		ptr += i;	/* ptr into fragment data */
   	}	
 
   	/*
    	 * Look for overlap with succeeding segments.
    	 * If we can merge fragments, do it.
      	 */
   
   	for(; next != NULL; next = tfp) 
   	{
 		tfp = next->next;
 		if (next->offset >= end) 
 			break;		/* no overlaps at all */
 
 		i = end - next->offset;			/* overlap is 'i' bytes */
 		next->len -= i;				/* so reduce size of	*/
 		next->offset += i;			/* next fragment	*/
 		next->ptr += i;
 		
 		/* 
 		 *	If we get a frag size of <= 0, remove it and the packet
 		 *	that it goes with.
 		 */
 		if (next->len <= 0) 
 		{
 			if (next->prev != NULL) 
 				next->prev->next = next->next;
 		  	else 
 		  		qp->fragments = next->next;
 		
 			if (tfp->next != NULL) 
 				next->next->prev = next->prev;
 				
			kfree_skb(next->skb,FREE_READ); 			
 			kfree_s(next, sizeof(struct ipfrag));
 		}
   	}
 
   	/* 
   	 *	Insert this fragment in the chain of fragments. 
   	 */
   	 
   	tfp = NULL;
   	tfp = ip_frag_create(offset, end, skb, ptr);
   	
   	/*
   	 *	No memory to save the fragment - so throw the lot
   	 */
   	
   	if (!tfp) 
   	{
   		skb->sk = NULL;
   		kfree_skb(skb, FREE_READ);
   		return NULL;
   	}
   	tfp->prev = prev;
   	tfp->next = next;
   	if (prev != NULL) 
   		prev->next = tfp;
     	else 
     		qp->fragments = tfp;
   
   	if (next != NULL) 
   		next->prev = tfp;
 
   	/*
    	 * 	OK, so we inserted this new fragment into the chain.
    	 * 	Check if we now have a full IP datagram which we can
    	 * 	bump up to the IP layer...
    	 */
   
   	if (ip_done(qp)) 
   	{
 		skb2 = ip_glue(qp);		/* glue together the fragments */
 		return(skb2);
   	}
   	return(NULL);
}
 
 
/*
 *	This IP datagram is too large to be sent in one piece.  Break it up into
 *	smaller pieces (each of size equal to the MAC header plus IP header plus
 *	a block of the data of the original IP data part) that will yet fit in a
 *	single device frame, and queue such a frame for sending by calling the
 *	ip_queue_xmit().  Note that this is recursion, and bad things will happen
 *	if this function causes a loop...
 *
 *	Yes this is inefficient, feel free to submit a quicker one.
 *
 *	**Protocol Violation**
 *	We copy all the options to each fragment. !FIXME!
 */
  
void ip_fragment(struct sock *sk, sk_buff *skb, struct device *dev, int is_frag)
{
   	struct iphdr *iph;
   	unsigned char *raw;
   	unsigned char *ptr;
   	sk_buff *skb2;
   	int left, mtu, hlen, len;
   	int offset;
   	unsigned long flags;
 
   	/* 
   	 *	Point into the IP datagram header.
   	 */

	raw = skb_data(skb);   	 
   	iph = (struct iphdr *) skb_data(skb);

	skb->ip_hdr = iph;
	 	
   	/* 
   	 *	Setup starting values. 
   	 */
   	 
   	hlen = (iph->ihl * sizeof(unsigned long));
   	left = ntohs(iph->tot_len) - hlen;	/* Space per frame */
   	mtu = (dev->mtu - hlen);		/* Size of data space */
   	ptr = (raw + hlen);			/* Where to start from */
 	
   	/*
   	 *	Check for any "DF" flag. [DF means do not fragment]
   	 */
   	 
   	if (ntohs(iph->frag_off) & IP_DF) 
   	{
 		ip_statistics.IpFragFails++;
 		icmp_send(skb,iph,ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev); 
 		return;
   	}
 
 	/*
 	 *	The protocol doesn't seem to say what to do in the case that the
 	 *	frame + options doesn't fit the mtu. As it used to fall down dead
 	 *	in this case we were fortunate it didn't happen
 	 */
 	 
 	if(mtu<8)
 	{
 		/* It's wrong but its better than nothing */
 		icmp_send(skb,iph,ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED,dev);
 		ip_statistics.IpFragFails++;
 		return;
 	}
 	
   	/* 
   	 *	Fragment the datagram. 
   	 */
   	 
   	/*
   	 *	The initial offset is 0 for a complete frame. When
   	 *	fragmenting fragments its wherever this one starts.
   	 */

	if (is_frag & 2)
		offset = (ntohs(iph->frag_off) & 0x1fff) << 3;
	else
   		offset = 0;


	/*
	 *	Keep copying data until we run out.
	 */
	    		
   	while(left > 0) 
   	{
 		len = left;
		/* IF: it doesn't fit, use 'mtu' - the data space left */
		if (len > mtu)
			len = mtu;
		/* IF: we are not sending upto and including the packet end
		   then align the next start on an eight byte boundary */
		if (len < left)
		{
			len/=8;
			len*=8;
		}
 		/*
 		 *	Allocate buffer. 
 		 */
 		 
 		if ((skb2 = alloc_skb(len + hlen + protocol_size(skb->protocol),GFP_ATOMIC)) == NULL) 
 		{
 			printk("IP: frag: no memory for new fragment!\n");
 			ip_statistics.IpFragFails++;
 			return;
 		}
 		protocol_adjust(skb2,skb->protocol);
 		
 		/*
 		 *	Set up data on packet
 		 */

 		if(skb->free==0)
 			printk("IP fragmenter: BUG free!=1 in fragmenter\n");
 		skb2->free = 1;
 		skb2->h.raw=(char *) skb_data(skb2);
		/*
		 *	Charge the memory for the fragment to any owner
		 *	it might possess
		 */
		 
		save_flags(flags);
 		if (sk) 
 		{
 			cli();
 			sk->wmem_alloc += skb2->mem_len;
 			skb2->sk=sk;
 		}
 		restore_flags(flags);
 		skb2->raddr = skb->raddr;	/* For rebuild_header - must be here */ 

 		/* 
 		 *	Copy the packet header into the new buffer. 
 		 */
 		 
 		memcpy(skb_put(skb2,hlen), raw, hlen);
 
 		/*
 		 *	Copy a block of the IP datagram. 
 		 */
 		memcpy(skb_put(skb2,len), ptr, len);
 		left -= len;

 		/*
 		 *	Fill in the new header fields. 
 		 */
 		iph = (struct iphdr *)skb2->h.raw;
 		iph->frag_off = htons((offset >> 3));
 		/* 
 		 *	Added AC : If we are fragmenting a fragment thats not the
 		 *		   last fragment then keep MF on each bit 
 		 */
 		if (left > 0 || (is_frag & 1)) 
 			iph->frag_off |= htons(IP_MF);
 		ptr += len;
 		offset += len;
 
 		/* 
 		 *	Put this fragment into the sending queue. 
 		 */
 		 
 		ip_statistics.IpFragCreates++;
 		
 		skb2->dev=dev;
 		skb2->protocol=skb->protocol;
 		ip_queue_xmit(sk, skb2, 2);
   	}
   	ip_statistics.IpFragOKs++;
}
 


#ifdef CONFIG_IP_FORWARD

/* 	
 *	Forward an IP datagram to its next destination.
 */

static void ip_forward(sk_buff *skb, struct device *dev, int is_frag, unsigned long target_addr, int target_strict)
{
	struct device *dev2;	/* Output device */
	struct iphdr *iph;	/* Our header */
	sk_buff *skb2=skb;		/* Output packet */
	struct rtable *rt;	/* Route we use */
	unsigned long raddr;	/* Router IP address */

  	/*
  	 *	According to the RFC, we must first decrease the TTL field. If
  	 *	that reaches zero, we must reply an ICMP control message telling
 	 *	that the packet's lifetime expired.
 	 *
 	 *	Exception:
 	 *	We may not generate an ICMP for an ICMP. icmp_send does the
 	 *	enforcement of this so we can forget it here. It is however
 	 *	sometimes VERY important.
 	 */

	iph = skb->h.iph;
	iph->ttl--;
	if (iph->ttl <= 0) 
	{
		/* Tell the sender its packet died... */
		icmp_send(skb,iph, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, dev);
		kfree_skb(skb, FREE_READ);
		return;
	}

	/* 
	 *	Re-compute the IP header checksum. 
	 *	This is inefficient. We know what has happened to the header
	 *	and could thus adjust the checksum as Phil Karn does in KA9Q
	 */
	 
	ip_send_check(iph);

	/*
	 * OK, the packet is still valid.  Fetch its destination address,
   	 * and give it to the IP sender for further processing.
	 */

	rt = ip_rt_route(target_addr, NULL, NULL);
	if (rt == NULL) 
	{
		/*
		 *	Tell the sender its packet cannot be delivered. Again
		 *	ICMP is screened later.
		 */
		icmp_send(skb, iph, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, dev);
		kfree_skb(skb, FREE_READ);
		return;
	}


	/*
	 * Gosh.  Not only is the packet valid; we even know how to
	 * forward it onto its final destination.  Can we say this
	 * is being plain lucky?
	 * If the router told us that there is no GW, use the dest.
	 * IP address itself- we seem to be connected directly...
	 */

	raddr = rt->rt_gateway;

	if (raddr != 0) 
	{
	
		/*
		 *	Strict routing permits no gatewaying
		 */
		
		if(target_strict)
		{
			icmp_send(skb,iph, ICMP_DEST_UNREACH, ICMP_SR_FAILED, dev);
			kfree_skb(skb, FREE_READ);
			return;
		}

		/*
		 *	There is a gateway so find the correct route for it.
		 *	Gateways cannot in turn be gatewayed.
		 */
		rt = ip_rt_route(raddr, NULL, NULL);
		if (rt == NULL) 
		{
			/* 
			 *	Tell the sender its packet cannot be delivered... 
			 */
			icmp_send(skb, iph, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, dev);
			kfree_skb(skb, FREE_READ);
			return;
		}
		if (rt->rt_gateway != 0) 
			raddr = rt->rt_gateway;
  	} 
  	else 
  		raddr = target_addr;
  		
  	/*
  	 *	Having picked a route we can now send the frame out.
  	 */

	dev2 = rt->rt_dev;

	/*
	 *	In IP you never forward a frame on the interface that it arrived
	 *	upon. We should generate an ICMP HOST REDIRECT giving the route
	 *	we calculated.
	 *	For now just dropping the packet is an acceptable compromise.
	 */

	if (dev == dev2)
	{
		kfree_skb(skb, FREE_READ);	
		return;
	}

	/*
	 * We adjust the buffer, and copy the datagram into it.
	 * If the indicated interface is up and running, kick it.
	 */

	if (dev2->flags & IFF_UP) 
	{
	
		ip_statistics.IpForwDatagrams++;

		/*
		 *	See if it needs fragmenting. Note in ip_rcv we tagged
		 *	the fragment type. This must be right so that
		 *	the fragmenter does the right thing.
		 */

		if(skb->len > dev2->mtu)
		{
			ip_fragment(NULL,skb,dev2, is_frag);
			kfree_skb(skb,FREE_WRITE);
		}
		else
		{
			char haddr[MAX_ADDR_LEN];
			int pri=SOPRI_NORMAL;
			/*
			 *	Map service types to priority. We lie about
			 *	throughput being low priority, but its a good
			 *	choice to help improve general usage.
			 */
			if(iph->tos & IPTOS_LOWDELAY)
				pri=SOPRI_INTERACTIVE;
			else if(iph->tos & IPTOS_THROUGHPUT)
				pri=SOPRI_BACKGROUND;
			skb->protocol=rt->rt_proto;
			skb->dev=dev2;
			skb->priority=pri;
			
			/*
			 *	This isn't meant to happen in normal use. It can happen
			 *	however and its just a speed cost.
			 */
			 
			/* If the header space is too small - copy */
			if(skb_data(skb)-skb->start < protocol_size(skb->protocol))
			{
				skb=alloc_skb(skb->len+protocol_size(skb->protocol), GFP_ATOMIC);
				if(skb==NULL)
				{
					kfree_skb(skb2, FREE_READ);
					return;
				}
				protocol_adjust(skb, skb->protocol);
				memcpy(skb_put(skb, skb2->len),skb_data(skb2),skb2->len);
			}
				
			/* Now stick a MAC header on it. This needs a resolved ARP address */
			if(arp_find(haddr, skb->raddr, skb->dev, skb->saddr, skb)==1)
			{
				/* We got no address.. ARP has now taken over the packet.. our job is done */
				return;
			}	
			skb->protocol->output(NULL, skb, ETH_P_IP, 0, NULL, haddr, NULL);
			if(skb!=skb2)
				kfree_skb(skb2, FREE_READ);
		}
	}
}


#endif

/*
 *	This function receives all incoming IP datagrams. 
 *
 *	Entry:
 *		The next pending data is the IP datagram header.
 *
 *	Calls:
 *		Upper IP protocol layers with skb->h.raw as the IP header
 *		and the next pending data being the upper level header. The
 *		length is trimmed to match the length of the datagram.
 *
 *	Return:
 *		The buffer has been freed or assigned and any error situations
 *		dealt with.
 */
 
int ip_input(struct protocol *p, struct protocol *below, sk_buff *skb, void *saddr, void *daddr)
{
	struct iphdr *iph = (struct iphdr *)skb_data(skb);
	unsigned char flag = 0;
	int brd=IS_MYADDR;			/* Packet type */
	int is_frag=0;
	int target_strict=0;
	unsigned long target_addr;

	skb->lower_head=iph;
	
	/*
	 *	An IP frame was received.
	 */

	ip_statistics.IpInReceives++;

	/*
	 *	Tag the ip header of this packet so we can find it
	 */

	skb->ip_hdr = iph;

	/*
	 *	RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum.
	 *	RFC1122: 3.1.2.3 MUST discard a frame with invalid source address [NEEDS FIXING].
	 *
	 *	Is the datagram acceptable?
	 *
	 *	1.	Length at least the size of an ip header
	 *	2.	Version of 4
	 *	3.	Checksums correctly. [Speed optimisation for later, skip loopback checksums]
	 *	(4.	We ought to check for IP multicast addresses and undefined types.. does this matter ?)
	 */

  	if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0) 
  	{
  		ip_statistics.IpInHdrErrors++;
		kfree_skb(skb, FREE_READ);
		return(-EINVAL);
	}
	
	/*
	 *	Our transport medium may have padded the buffer out. Now we know it
	 *	is IP we can trim to the true length of the frame.
	 */
	 
	skb_trim(skb,ntohs(iph->tot_len));

	/*
	 *	Next analyse the packet for options. Studies show under one packet in
	 *	a thousand have options....
	 */
	 
	target_addr=iph->daddr;	/* If nothing funny is up then the packet goes to its destination */
	   
	if (iph->ihl != 5) 
	{  
		/* Humph.. options. Lots of annoying fiddly bits */
		
		/*
		 *	This is straight from the RFC. It might even be right ;)
		 *
		 * 	RFC 1122: 3.2.1.8 STREAMID option is obsolete and MUST be ignored.
		 *	RFC 1122: 3.2.1.8 MUST NOT crash on a zero length option.
		 *	RFC 1122: 3.2.1.8 MUST support acting as final destination of a source route.
		 */
		 
		int opt_space=4*(iph->ihl-5);
		int opt_size;
		unsigned char *opt_ptr=skb_data(skb)+sizeof(struct iphdr);
		
		while(opt_space>0)
		{
			if(*opt_ptr==IPOPT_NOOP)
			{
				opt_ptr++;
				opt_space--;
				continue;
			}
			if(*opt_ptr==IPOPT_END)
				break;	/* Done */
			if(opt_space<2 || (opt_size=opt_ptr[1])<2 || opt_ptr[1]>opt_space)
			{
				/*
				 *	RFC 1122: 3.2.2.5  SHOULD send parameter problem reports.
				 */
				icmp_send(skb, iph, ICMP_PARAMETERPROB, 0, skb->dev);
				kfree_skb(skb, FREE_READ);
				return -EINVAL;
			}
			switch(opt_ptr[0])
			{
				case IPOPT_SEC:
					/* Should we drop this ?? */
					break;
				case IPOPT_SSRR:	/* These work almost the same way */
					target_strict=1;
					/* Fall through */
				case IPOPT_LSRR:
				case IPOPT_RR:
				/*
				 *	RFC 1122: 3.2.1.8 Support for RR is OPTIONAL.
				 */
					if (iph->daddr!=skb->dev->pa_addr && (brd = ip_chk_addr(iph->daddr)) == 0) 
						break;
					if((opt_size<3) || ( opt_ptr[0]==IPOPT_RR && opt_ptr[2] > opt_size-4 ))
					{
						if(ip_chk_addr(iph->daddr))
							icmp_send(skb, iph, ICMP_PARAMETERPROB, 0, skb->dev);
						kfree_skb(skb, FREE_READ);
						return -EINVAL;
					}
					if(opt_ptr[2] > opt_size-4 )
						break;
					/* Bytes are [IPOPT_xxRR][Length][EntryPointer][Entry0][Entry1].... */
					/* This isn't going to be too portable - FIXME */
					if(opt_ptr[0]!=IPOPT_RR)
						target_addr=*(long *)(&opt_ptr[opt_ptr[2]]);	/* Get hop */
					*(long *)(&opt_ptr[opt_ptr[2]])=skb->dev->pa_addr;	/* Record hop */
					break;
				case IPOPT_TIMESTAMP:
				/*
				 *	RFC 1122: 3.2.1.8 The timestamp option is OPTIONAL but if implemented
				 *	MUST meet various rules (read the spec).
				 */
					printk("ICMP: Someone finish the timestamp routine ;)\n");
					break;
				default:
					break;
			}
			opt_ptr+=opt_size;
			opt_space-=opt_size;
		}
					
	}

	/*
	 *	Remember if the frame is fragmented.
	 */
	 
	if(iph->frag_off)
	{
		if (iph->frag_off & 0x0020)
  			is_frag|=1;
  	  	/*
  		 *	Last fragment ?
  		 */
  		 
  		if (ntohs(iph->frag_off) & 0x1fff)
  			is_frag|=2;
  	}
  	
	/*
	 *	Is this our packet ?
	 */
	   	 
	if (iph->daddr==skb->dev->pa_addr || (brd = ip_chk_addr(iph->daddr)) != 0) 
	{

	  	/*
	  	 * Reassemble IP fragments. 
	  	 */

	  	if(is_frag)
	  	{
			/* Defragment. Obtain the complete packet if there is one */
			skb=ip_defrag(iph,skb,skb->dev);
	        	if(skb==NULL)
	        		return 0;
		        iph=skb->h.iph;
		}

		/*
		 *	Point into the IP datagram, just past the header so that it can be obtained by the upper
		 *	layers.
		 *
		 *	RFC 1122: SHOULD pass TOS value up to the transport layer.
		 */

		skb->ip_hdr = iph;
	
		/*
		 *	Pull the ip header off.
		 */
	 
		skb->h.raw = skb_pull(skb,iph->ihl*4,NULL);

		/*
		 *	Demultiplex it.
		 */
	 
		flag=protocol_pass_demultiplex(&proto_ip, &iph->protocol, skb, &iph->saddr, &iph->daddr);

		/*
		 * All protocols checked.
		 * If this packet was a broadcast, we may *not* reply to it.
		 * (This is now checked by icmp.c along with multicasting)
		 *
		 * RFC1122: 3.2.2.1 SHOULD send PROT_UNREACH if a protocol is not available.
		 */

		if (!flag)
		{
			icmp_send(skb, iph, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, skb->dev);
			kfree_skb(skb, FREE_READ);
			return -EPROTONOSUPPORT;
		}

		return(0);
	}
	
        /*
  	 *	Do any IP forwarding required.  chk_addr() is expensive -- avoid it someday. 
  	 *	We do our best to avoid it by checking the most common case of packet to us
  	 *	arriving on the interface with the matching address.
  	 *
  	 *	This is inefficient. While finding out if it is for us we could also compute
  	 *	the routing table entry. This is where the great unified cache theory comes
  	 *	in as and when someone implements it
  	 */

	/*
	 *	Don't forward multicast or broadcast frames.
	 */
	
	if(skb->pkt_type!=PACKET_HOST)
	{
		kfree_skb(skb,FREE_WRITE);
		return 0;
	}
		
	/*
	 *	The packet is for another target. Forward the frame
	 */
	 
#ifdef CONFIG_IP_FORWARD
	ip_forward(skb, skb->dev, is_frag, target_addr, target_strict);
#else
	ip_statistics.IpInAddrErrors++;
	kfree_skb(skb, FREE_READ);
#endif			
	return(0);
}


/*
 * Queues a packet to be sent, and starts the transmitter
 * if necessary.  if free = 1 then we free the block after
 * transmit, otherwise we don't. If free==2 we not only
 * free the block but also don't assign a new ip seq number.
 * This routine also needs to put in the total length,
 * and compute the checksum
 */
 
int ip_id_count=0;
 
void ip_queue_xmit(struct sock *sk, sk_buff *skb, int free)
{
  	struct iphdr *iph;
  	unsigned char *ptr;
	char haddr[MAX_ADDR_LEN];
	
  	/* Sanity check */
  	if (skb->dev == NULL) 
  	{
		printk("IP: ip_queue_xmit dev = NULL\n");
		return;
  	}
  
  	IS_SKB(skb);
  	
  	/*
  	 *	Do some book-keeping in the packet for later
  	 */

  	skb->when = jiffies;
  
	/*
	 *	Find the IP header and set the length. This is bad
	 *	but once we get the skb data handling code in the
	 *	hardware will push its header sensibly and we will
	 *	set skb->ip_hdr to avoid this mess and the fixed
	 *	header length problem
	 */
	 
	ptr = skb_data(skb);
	iph = (struct iphdr *)ptr;
	skb->ip_hdr = iph;
	iph->tot_len = ntohs(skb->len);

	/*
	 *	No reassigning numbers to fragments...
	 */
	 
	if(free!=2)
		iph->id      = htons(ip_id_count++);
	else
		free=1;
		
	/* All buffers without an owner socket get freed */
  	if (sk == NULL) 
  		free = 1;
  	
  	skb->free = free;		

	/*
	 *	Do we need to fragment. Again this is inefficient. 
	 *	We need to somehow lock the original buffer and use
	 *	bits of it.
	 */
	 
	if(skb->len > skb->dev->mtu)
  	{
		ip_fragment(sk,skb,skb->dev,0);
		IS_SKB(skb);
  		kfree_skb(skb,FREE_WRITE);
  		return;
	}
  
  	/*
  	 *	Add an IP checksum
  	 */
  	 
	ip_send_check(iph);
	
	/*
	 *	More debugging. You cannot queue a packet already on a list
	 *	Spot this and moan loudly.
	 */
	if (skb->next != NULL)
	{
		printk("ip_queue_xmit: next != NULL\n");
		skb_unlink(skb);
	}

	if(free)
	/* Remember who owns the buffer */
		skb->sk = sk;
	
	/*
	 *	Send the packet. 
	 */
	 
	ip_statistics.IpOutRequests++;

	/*
	 *	RFC 1122: 2.4	MUST pass the IP tos value to the link layer. Well we sort of pass it but
	 *			we interpret it into something generic first. Not technically 100% exactly
	 *			perfect but far more sensible.
	 */
	
	if(sk!=NULL && sk->priority)
		skb->priority=sk->priority;

	/*
	 *	Now stick a MAC header on it. This needs a resolved ARP address 
	 */
	 
	if(arp_find(haddr, skb->raddr, skb->dev, skb->saddr, skb)==1)
	{
		/* We got no address.. ARP has now taken over the packet.. our job is done */
		return;
	}
		
	skb->protocol->output(skb->protocol, skb, ETH_P_IP, 0, NULL, haddr, NULL);
}




#ifdef CONFIG_IP_MULTICAST

/*
 *	Write an multicast group list table for the IGMP daemon to
 *	read.
 */
 
int ip_mc_procinfo(char *buffer, char **start, off_t offset, int length)
{
	off_t pos=0, begin=0;
	struct ip_mc_list *im;
	unsigned long flags;
	int len=0;
	
	
	len=sprintf(buffer,"Device    : Multicast\n");  
	save_flags(flags);
	cli();
	
	im=ip_mc_head;
	
	while(im!=NULL)
	{
		len+=sprintf(buffer+len,"%-10s: %08lX\n", im->interface->name, im->multiaddr.s_addr);
		pos=begin+len;
		if(pos<offset)
		{
			len=0;
			begin=pos;
		}
		if(pos>offset+length)
			break;
		im=im->next;
	}
	restore_flags(flags);
	*start=buffer+(offset-begin);
	len-=(offset-begin);
	if(len>length)
		len=length;	
	return len;
}
	
	
	
/*
 *	Drop every multicast reference to this device. (Device has
 *	gone down/been removed etc).
 */

void ip_mc_dropdevice(struct device *dev)
{
	unsigned long flags;
	struct ip_mc_list *im;
	struct ip_mc_list **ih=&ip_mc_head;

	save_flags(flags);
	cli();
	im=ip_mc_head;
	while(im!=NULL)
	{
		struct ip_mc_list *imp;
		if(im->interface==dev)
		{
			struct ip_mc_socklist *is=im->list;
			while(is!=NULL)
			{
				struct ip_mc_socklist *ip=is;
				is=is->next;
				kfree_s(ip,sizeof(*is));
			}
			imp=im;
			(*ih)->next=im->next;
			im=im->next;
			kfree_s(imp,sizeof(*im));
		}
		else
		{
			ih=&im->next;
			im=im->next;
		}
	}
	restore_flags(flags);
}


/*
 *	Search the list & set up the multicast list on the device
 */
 
static struct ip_mc_list *ip_mc_setdevice(struct ip_mc_list *p, struct device *dev)
{
	struct ip_mc_list *n;
	char *mlist;
	unsigned long taddr;
	int count=0;

	/*
	 *	Find the interface. Count the number of entries so we can allocate
	 *	our multicast list for the driver.
	 */
	 
	for(n=p; n!=NULL&&n->interface==dev;n=n->next)
		if(n->list)
			count++;

	/*
	 *	Allocate a multicast list (FIXME: Assumes ethernet)
	 */
	 
	mlist=(char *)kmalloc(6*count,GFP_KERNEL);
	if(mlist==NULL)
	{
		printk("Insufficient memory to allocate multicast list\n");
		return NULL;
	}
			
	/*
	 *	Walk the list
	 */
	 
	count=0;
	for(n=p; n!=NULL&&(n->interface==dev);n=n->next) 
	{
		if(n->list)
		{
			/*
			 *	Assemble an ethernet MAC binding matching the
			 *	multicast address.
			 */
			 
			mlist[count*6+0]=0x01;
			mlist[count*6+1]=0x00;
			mlist[count*6+2]=0x5e;
			taddr=ntohl(n->multiaddr.s_addr);
			mlist[count*6+5]=taddr&0xff;
			taddr=taddr>>8;
			mlist[count*6+4]=taddr&0xff;
			taddr=taddr>>8;
			mlist[count*6+3]=taddr&0x7f;
			count++;
		}
	}
	
	/*
	 *	Activate it.
	 */
	 
	dev->set_multicast_list(dev,count,mlist);
	kfree_s(mlist,6*count);
	return n;
}

/*
 *	Remove all occurences of a socket from the list
 */

void ip_mc_dropsocket(struct sock *sk)
{
	struct ip_mc_list *l, *nextl, *p=NULL;
	struct ip_mc_socklist *t, *nextt, *s;
	struct rtable *rt;
	struct device *dev;
	static struct options optmem;
	unsigned long route_src;
	int dropped=0;
	
/*
 *	Walk the multicast list.
 */	

	for(l=ip_mc_head;l;l=l->next) 
	{
		s=NULL;
		for(t=l->list;t;t=nextt) 
		{
			/*
			 *	Found our socket. Remove it and free it
			 */
			 
			if(t->sk==sk)
			{
				if(!s)
					l->list=t->next;
				else
					s->next=t->next;
				nextt=t->next;
				kfree_s(t,sizeof(struct ip_mc_socklist));
				dropped=1;
			}
			else
				nextt=t->next;
			s=t;
		}
		p=l;
	}
	
	
	/* No drop then no changes to devices (normal case) */
	
	if(!dropped)
		return;

	/* Possible removals - rebuild all devices */
	
	l=ip_mc_head;
	while(l) 
	{
		dev=NULL;
		
		/*
		 *	If no specific interface is set.
		 */
		 
		if(l->interface==NULL) 
		{
			if((rt=ip_rt_route(l->multiaddr.s_addr,&optmem, &route_src))!=NULL)
			{
				dev=rt->rt_dev;
				rt->rt_use--;
			}
		}
		else
		{
			/*
			 *	Find the matching device
			 */
			 
			dev=l->interface;
		}
	
		/*
		 *	There may be no matching device if someone has been playing
		 *	with interface properties. This is fine.
		 */
		 
		if(dev)
			l=ip_mc_setdevice(l,dev);

		/* Remove empty list entries */
		
		p=NULL;
		for(l=ip_mc_head;l;l=nextl) 
		{
			nextl=l->next;
			if(!l->list)
			{
				if(!p)
					ip_mc_head=l->next;
				else
					p->next=l->next;
				kfree_s(l,sizeof(struct ip_mc_list));
			}
			p=l;
		}
	} 
}

#endif


/*
 *	Socket option code for IP. This is the end of the line after any TCP,UDP etc options on
 *	an IP socket.
 *
 *	We implement IP_TOS (type of service), IP_TTL (time to live).
 *
 *	Next release we will sort out IP_OPTIONS since for some people these are 
 *	kind of important.
 *	RFC 1122: 3.2.1.8 MUST provide means for transport layer to set IP options.
 */

int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
{
	int val,err;

	if (optval == NULL)
		return(-EINVAL);

	err=verify_area(VERIFY_READ, optval, sizeof(int));
	if(err)
		return err;

	val = get_fs_long((unsigned long *)optval);

	if(level!=SOL_IP)
		return -EOPNOTSUPP;

	switch(optname)
	{
		/*
		 *	RFC 1122: 3.2.1.6 MUST provide a method for the application to set the TOS
		 *	field.
		 */
		case IP_TOS:
			if(val<0||val>255)
				return -EINVAL;
			sk->opt.ip.tos=val;
			if(val==IPTOS_LOWDELAY)
				sk->priority=SOPRI_INTERACTIVE;
			if(val==IPTOS_THROUGHPUT)
				sk->priority=SOPRI_BACKGROUND;
			return 0;
		case IP_TTL:
		/*
		 *	RFC 1122: 3.2.1.7 MUST not send a datagram with a TTL of 0.
		 */
			if(val<1||val>255)
				return -EINVAL;
			sk->opt.ip.ttl=val;
			return 0;
#ifdef CONFIG_IP_MULTICAST
		case IP_MULTICAST_TTL: 
		{
			unsigned char ucval;

			ucval=get_fs_byte((unsigned char *)optval);
			if(ucval<1||ucval>255)
                                return -EINVAL;
			sk->opt.ip.mc_ttl=(int)ucval;
	                        return 0;
		}

		case IP_MULTICAST_IF: 
		{
			/* Not fully tested */
			struct in_addr addr;
			struct device *dev=NULL;
			
			/*
			 *	Check the arguments are allowable
			 */

			err=verify_area(VERIFY_READ, optval, sizeof(addr));
			if(err)
				return err;
				
			memcpy_fromfs(&addr,optval,sizeof(addr));
			
			/*
			 *	What address has been requested
			 */
			
			if(addr.s_addr==INADDR_ANY)	/* Default */
			{
				sk->opt.ip.mc_name[0]=0;
				return 0;
			}
			
			/*
			 *	Find the device
			 */
			 
			for(dev = dev_base; dev; dev = dev->next)
			{
				if((dev->flags&IFF_UP)&&(dev->flags&IFF_MULTICAST)&&
					(dev->pa_addr==addr.s_addr))
					break;
			}
			
			/*
			 *	Did we find one
			 */
			 
			if(dev) 
			{
				strcpy(sk->opt.ip.mc_name,dev->name);
				return 0;
			}
			return -ENODEV;
		}
		
		case IP_ADD_MEMBERSHIP: 
		{
		
/*
 *	FIXME: Add/Del membership should have a semaphore protecting them from re-entry
 */
			struct ip_mreq mreq;
			struct ip_mc_list *p, *n, *gtemp;
			struct ip_mc_socklist *temp;
			static struct options optmem;
			unsigned long route_src;
			struct rtable *rt;
			struct ip_mc_list *l=NULL;
			struct device *dev=NULL;
			
			/*
			 *	Check the arguments.
			 */

			err=verify_area(VERIFY_READ, optval, sizeof(mreq));
			if(err)
				return err;

			memcpy_fromfs(&mreq,optval,sizeof(mreq));

			/* 
			 *	Get device for use later
			 */

			if(mreq.imr_interface.s_addr==INADDR_ANY) 
			{
				/*
				 *	Not set so scan.
				 */
				if((rt=ip_rt_route(mreq.imr_multiaddr.s_addr,&optmem, &route_src))!=NULL)
				{
					dev=rt->rt_dev;
					rt->rt_use--;
				}
			}
			else
			{
				/*
				 *	Find a suitable device.
				 */
				for(dev = dev_base; dev; dev = dev->next)
				{
					if((dev->flags&IFF_UP)&&(dev->flags&IFF_MULTICAST)&&
						(dev->pa_addr==mreq.imr_interface.s_addr))
						break;
				}
			}
			
			/*
			 *	No device, no cookies.
			 */
			 
			if(!dev)
				return -ENODEV;
				
			/* 
			 *	Find interface - mostly NULL or first entry 
			 */
			 
			/*
			 *	Normally INADDR_ANY
			 */

			for(p=ip_mc_head;p!=NULL&&p->interface!=dev;p=p->next)
				l=p;


			/*
			 *	Are we adding the first multicast groups on the device
			 */
			 
 			if(!p) 
 			{
				/*
				 *	First mcast address on interface. Create an entry.
				 */
				 
				gtemp=(struct ip_mc_list *)kmalloc(sizeof(struct ip_mc_list),GFP_KERNEL);
				gtemp->interface=dev;
				gtemp->multiaddr.s_addr=mreq.imr_multiaddr.s_addr;
				gtemp->list=NULL;
				gtemp->next=NULL;
				if(!ip_mc_head) 		/* first mc address */
					ip_mc_head=gtemp;
				else
					l->next=gtemp;
				p=gtemp;
			}
			 
			/* 
			 *	Find entry or end of list for interface 
			 */

			for(n=p; n==NULL && (n->interface==dev)&& 
					(n->multiaddr.s_addr!=mreq.imr_multiaddr.s_addr);n=n->next)
				l=n;
		
			/*
			 *	Not already receiving group - stick it on the list.
			 */

			if((n==NULL)||(n->interface!=dev)) 
			{
				gtemp=(struct ip_mc_list *)kmalloc(sizeof(struct ip_mc_list),GFP_KERNEL);
				gtemp->interface=dev;
				gtemp->multiaddr.s_addr=mreq.imr_multiaddr.s_addr;
				gtemp->list=NULL;
				gtemp->next=n;
				if(l)
					l->next=gtemp;
				n=gtemp;
			}

			/*
			 *	Already receiving group or just created above   
			 */

			temp=n->list;
			n->list=(struct ip_mc_socklist *)kmalloc(sizeof(struct ip_mc_socklist),GFP_KERNEL);
			n->list->sk=sk;
			n->list->next=temp;

			/*
			 *	Update the device.
			 */
			 
			ip_mc_setdevice(p,dev);

			/*
			 *	Done
			 */

			return 0;
		}
		
		case IP_DROP_MEMBERSHIP: 
		{
			struct ip_mreq mreq;
			struct ip_mc_list *p, *n;
			struct rtable *rt;
			static struct options optmem;
                        unsigned long route_src;
			struct ip_mc_socklist *t, *s=NULL;
			struct ip_mc_list *l=NULL;
			struct device *dev=NULL;

			/*
			 *	Check the arguments
			 */
			 
			err=verify_area(VERIFY_READ, optval, sizeof(mreq));
			if(err)
				return err;

			memcpy_fromfs(&mreq,optval,sizeof(mreq));

			/*
			 *	Get device for use later 
			 */
 
			if(mreq.imr_interface.s_addr==INADDR_ANY) 
			{
				if((rt=ip_rt_route(mreq.imr_multiaddr.s_addr,&optmem, &route_src))!=NULL)
			        {
					dev=rt->rt_dev;
					rt->rt_use--;
				}
			}
			else 
			{
				for(dev = dev_base; dev; dev = dev->next)
				{
					if((dev->flags&IFF_UP)&& (dev->flags&IFF_MULTICAST)&&
							(dev->pa_addr==mreq.imr_interface.s_addr))
						break;
				}
			}
			
			/*
			 *	Did we find a suitable device.
			 */
			 
			if(!dev)
				return -ENODEV;

			/*
			 *	Find interface - mostly NULL or first entry normally INADDR_ANY        
			 */

			for(p=ip_mc_head;p&&p->interface!=dev;p=p->next)
			{
				l=p;
			}

			if(p==NULL)	/* Zero mc addresses on interface */
				return -EINVAL;

			/*
			 *	Find entry or end of list for interface 
			 */

			for(n=p;n&&n->interface==dev&&
				(n->multiaddr.s_addr!=mreq.imr_multiaddr.s_addr);n=n->next)
			{
				l=n;
			}
			
			/*
			 *	Not already receiving group 
			 */

			if((n==NULL)||(n->interface!=dev))
				return -EINVAL;

			/* 
			 *	Found group to delete 
			 */

			if(n->list==NULL) 
			{
				printk("setsockopt: multicast list with no entries");
				if(l==NULL)
					ip_mc_head=n->next;
				else
					l->next=n->next;

				/* 
				 *	First entry for interface going ... 
				 */

				if(n==p)
					p=n->next;

				kfree_s(n,sizeof(struct ip_mc_list));

				/*
				 *	List changed, redo device 
				 */

				if(p->interface!=dev)
					dev->set_multicast_list(dev,0,NULL);
				else
					ip_mc_setdevice(p,dev);

				return 0;
			}
			
			/*
			 *	Find our socket.
			 */

			for(t=n->list;t&&(t->sk!=sk);t=t->next)
				s=t;

			/* 
			 *	No entry for socket 
			 */

			if(!t)
				return -EINVAL;

			if(!s)
				n->list=t->next;
			else
				s->next=t->next;

			kfree_s(t,sizeof(struct ip_mc_socklist));

 			/*
 			 *	Last socket removed from list 
 			 */

			if(n->list==NULL) 
			{
				if(!l)
					ip_mc_head=n->next;
				else
			   		l->next=n->next;

				/*
				 *	First entry for interface going ... 
				 */

				if(n==p)
					p=n->next;

				/*
				 *	Free the entry.
				 */
				 
				kfree_s(n,sizeof(struct ip_mc_list));

				/*
				 *	List changed, redo device 
				 */

				if(p->interface!=dev)
 					dev->set_multicast_list(dev,0,NULL);
				else
					ip_mc_setdevice(p,dev);
			}

			return 0;
		}
#endif
		/* IP_OPTIONS and friends go here eventually */
		default:
			return(-ENOPROTOOPT);
	}
}

/*
 *	Get the options. Note for future reference. The GET of IP options gets the
 *	_received_ ones. The set sets the _sent_ ones.
 */
 
int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
{
	int val,err;
	int len;
	
	if(level!=SOL_IP)
		return -EOPNOTSUPP;
		
	switch(optname)
	{
		case IP_TOS:
			val=sk->opt.ip.tos;
			break;
		case IP_TTL:
			val=sk->opt.ip.ttl;
			break;
#ifdef CONFIG_IP_MULTICAST			
		case IP_MULTICAST_TTL:
			val=sk->opt.ip.mc_ttl;
			break;
		case IP_MULTICAST_IF:
			err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
			if(err)
  				return err;
  			len=strlen(sk->opt.ip.mc_name);
  			put_fs_long(len,(unsigned long *) optlen);
  			err=verify_area(VERIFY_WRITE, optval, len);
		  	if(err)
  				return err;
			memcpy_tofs((void *)optval,sk->opt.ip.mc_name, len);
			return 0;
#endif		
		default:
			return(-ENOPROTOOPT);
	}
	err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
	if(err)
  		return err;
  	put_fs_long(sizeof(int),(unsigned long *) optlen);

  	err=verify_area(VERIFY_WRITE, optval, sizeof(int));
  	if(err)
  		return err;
  	put_fs_long(val,(unsigned long *)optval);

  	return(0);
}

/*
 *	IP registers the packet type and then calls the subprotocol initialisers
 */
 
void ip_init(void)
{
	protocol_register(&proto_ip);
	raw_init();
	icmp_init();
/*	packet_init();*/
	tcp_init();
	udp_init();
}

/*
 *	Handle a binding request to IP
 */
 
static int ip_get_binding(int protocol, int subid, unsigned char *key)
{
	if(protocol!=ETH_P_IP)
		return -EAFNOSUPPORT;
	if(subid<0||subid>255)
		return -EPROTOTYPE;
	*key=(unsigned char)subid;
	return 1;
}

/*
 *	Protocol descriptor for IP
 */
 
struct protocol proto_ip=
{
	NULL,
	"IP",
	sizeof(struct iphdr),
	0,
	sizeof(struct iphdr),
	0,
	ip_output,
	protocol_defer,			/* Defer input to be slow (for now) */
	ip_input,
	default_protocol_control,
	ip_get_binding,
	NULL,
	NULL,
	0
};
