Merge bk://212.42.230.204:994/nf-2.6

into sunset.davemloft.net:/home/davem/src/BK/net-2.6

Merge bk://212.42.230.204:994/nf-2.6
into sunset.davemloft.net:/home/davem/src/BK/net-2.6
8de08f90 · David S. Miller · 0f634eb3 · b20f3c6c · 8de08f90 · 8de08f90
Commit 8de08f90 authored Mar 09, 2005 by David S. Miller
13 changed files
--- a/include/linux/netfilter_ipv4/ip_conntrack_tcp.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_tcp.h
@@ -23,13 +23,16 @@ enum tcp_conntrack {
 /* SACK is permitted by the sender */
 #define IP_CT_TCP_FLAG_SACK_PERM		0x02

+/* This sender sent FIN first */
+#define IP_CT_TCP_FLAG_CLOSE_INIT		0x03
+
 struct ip_ct_tcp_state {
 	u_int32_t	td_end;		/* max of seq + len */
 	u_int32_t	td_maxend;	/* max of ack + max(win, 1) */
 	u_int32_t	td_maxwin;	/* max(win) */
 	u_int8_t	td_scale;	/* window scale factor */
 	u_int8_t	loose;		/* used when connection picked up from the middle */
-	u_int8_t	flags;		/* per direction state flags */
+	u_int8_t	flags;		/* per direction options */
 };

 struct ip_ct_tcp

--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -822,10 +822,10 @@ static int translate_table(struct ebt_replace *repl,
 		/* this will get free'd in do_replace()/ebt_register_table()
 		   if an error occurs */
 		newinfo->chainstack = (struct ebt_chainstack **)
-		   vmalloc(NR_CPUS * sizeof(struct ebt_chainstack));
+		   vmalloc(num_possible_cpus() * sizeof(struct ebt_chainstack));
 		if (!newinfo->chainstack)
 			return -ENOMEM;
-		for (i = 0; i < NR_CPUS; i++) {
+		for (i = 0; i < num_possible_cpus(); i++) {
 			newinfo->chainstack[i] =
 			   vmalloc(udc_cnt * sizeof(struct ebt_chainstack));
 			if (!newinfo->chainstack[i]) {
@@ -898,7 +898,7 @@ static void get_counters(struct ebt_counter *oldcounters,
 	memcpy(counters, oldcounters,
 	   sizeof(struct ebt_counter) * nentries);
 	/* add other counters to those of cpu 0 */
-	for (cpu = 1; cpu < NR_CPUS; cpu++) {
+	for (cpu = 1; cpu < num_possible_cpus(); cpu++) {
 		counter_base = COUNTER_BASE(oldcounters, nentries, cpu);
 		for (i = 0; i < nentries; i++) {
 			counters[i].pcnt += counter_base[i].pcnt;
@@ -930,7 +930,7 @@ static int do_replace(void __user *user, unsigned int len)
 		BUGPRINT("Entries_size never zero\n");
 		return -EINVAL;
 	}
-	countersize = COUNTER_OFFSET(tmp.nentries) * NR_CPUS;
+	countersize = COUNTER_OFFSET(tmp.nentries) * num_possible_cpus();
 	newinfo = (struct ebt_table_info *)
 	   vmalloc(sizeof(struct ebt_table_info) + countersize);
 	if (!newinfo)
@@ -1023,7 +1023,7 @@ static int do_replace(void __user *user, unsigned int len)

 	vfree(table->entries);
 	if (table->chainstack) {
-		for (i = 0; i < NR_CPUS; i++)
+		for (i = 0; i < num_possible_cpus(); i++)
 			vfree(table->chainstack[i]);
 		vfree(table->chainstack);
 	}
@@ -1043,7 +1043,7 @@ static int do_replace(void __user *user, unsigned int len)
 		vfree(counterstmp);
 	/* can be initialized in translate_table() */
 	if (newinfo->chainstack) {
-		for (i = 0; i < NR_CPUS; i++)
+		for (i = 0; i < num_possible_cpus(); i++)
 			vfree(newinfo->chainstack[i]);
 		vfree(newinfo->chainstack);
 	}
@@ -1137,7 +1137,7 @@ int ebt_register_table(struct ebt_table *table)
 		return -EINVAL;
 	}

-	countersize = COUNTER_OFFSET(table->table->nentries) * NR_CPUS;
+	countersize = COUNTER_OFFSET(table->table->nentries) * num_possible_cpus();
 	newinfo = (struct ebt_table_info *)
 	   vmalloc(sizeof(struct ebt_table_info) + countersize);
 	ret = -ENOMEM;
@@ -1191,7 +1191,7 @@ int ebt_register_table(struct ebt_table *table)
 	up(&ebt_mutex);
 free_chainstack:
 	if (newinfo->chainstack) {
-		for (i = 0; i < NR_CPUS; i++)
+		for (i = 0; i < num_possible_cpus(); i++)
 			vfree(newinfo->chainstack[i]);
 		vfree(newinfo->chainstack);
 	}
@@ -1215,7 +1215,7 @@ void ebt_unregister_table(struct ebt_table *table)
 	if (table->private->entries)
 		vfree(table->private->entries);
 	if (table->private->chainstack) {
-		for (i = 0; i < NR_CPUS; i++)
+		for (i = 0; i < num_possible_cpus(); i++)
 			vfree(table->private->chainstack[i]);
 		vfree(table->private->chainstack);
 	}

--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -717,7 +717,7 @@ static int translate_table(const char *name,
 	}

 	/* And one copy for every other CPU */
-	for (i = 1; i < NR_CPUS; i++) {
+	for (i = 1; i < num_possible_cpus(); i++) {
 		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
 		       newinfo->entries,
 		       SMP_ALIGN(newinfo->size));
@@ -768,7 +768,7 @@ static void get_counters(const struct arpt_table_info *t,
 	unsigned int cpu;
 	unsigned int i;

-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
 		i = 0;
 		ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
 				   t->size,
@@ -886,7 +886,7 @@ static int do_replace(void __user *user, unsigned int len)
 		return -ENOMEM;

 	newinfo = vmalloc(sizeof(struct arpt_table_info)
-			  + SMP_ALIGN(tmp.size) * NR_CPUS);
+			  + SMP_ALIGN(tmp.size) * num_possible_cpus());
 	if (!newinfo)
 		return -ENOMEM;

@@ -1159,7 +1159,7 @@ int arpt_register_table(struct arpt_table *table,
 		= { 0, 0, 0, { 0 }, { 0 }, { } };

 	newinfo = vmalloc(sizeof(struct arpt_table_info)
-			  + SMP_ALIGN(repl->size) * NR_CPUS);
+			  + SMP_ALIGN(repl->size) * num_possible_cpus());
 	if (!newinfo) {
 		ret = -ENOMEM;
 		return ret;

--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -400,8 +400,8 @@ static int sctp_packet(struct ip_conntrack *conntrack,
 					return -1;
 			}
 			DEBUGP("Setting vtag %x for dir %d\n", 
-					ih->init_tag, CTINFO2DIR(ctinfo));
-			conntrack->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = ih->init_tag;
+					ih->init_tag, !CTINFO2DIR(ctinfo));
+			conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag;
 		}

 		conntrack->proto.sctp.state = newconntrack;

--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -254,7 +254,7 @@ static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
 *	sSS -> sSR	Standard open.
 *	sSR -> sSR	Retransmitted SYN/ACK.
 *	sES -> sIG	Late retransmitted SYN/ACK?
- *	sFW -> sIG
+ *	sFW -> sIG	Might be SYN/ACK answering ignored SYN
 *	sCW -> sIG
 *	sLA -> sIG
 *	sTW -> sIG
@@ -273,10 +273,10 @@ static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
 *	sCL -> sCL
 */
 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*ack*/	   { sIV, sIG, sIV, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/*ack*/	   { sIV, sIV, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
 /*
- *	sSS -> sIG	Might be a half-open connection.
- *	sSR -> sIV	Simultaneous open.
+ *	sSS -> sIV	Might be a half-open connection.
+ *	sSR -> sSR	Might answer late resent SYN.
 *	sES -> sES	:-)
 *	sFW -> sCW	Normal close request answered by ACK.
 *	sCW -> sCW
@@ -352,14 +352,19 @@ static unsigned int get_conntrack_index(const struct tcphdr *tcph)
   http://www.nluug.nl/events/sane2000/papers.html
   http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
   
-   The boundaries and the conditions are slightly changed:
-   
+   The boundaries and the conditions are changed according to RFC793:
+   the packet must intersect the window (i.e. segments may be
+   after the right or before the left edge) and thus receivers may ACK
+   segments after the right edge of the window.
+
   	td_maxend = max(sack + max(win,1)) seen in reply packets
 	td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
+	td_maxwin += seq + len - sender.td_maxend
+			if seq + len > sender.td_maxend
 	td_end    = max(seq + len) seen in sent packets
   
-   I. 	Upper bound for valid data:	seq + len <= sender.td_maxend
-   II. 	Lower bound for valid data:	seq >= sender.td_end - receiver.td_maxwin
+   I.   Upper bound for valid data:	seq <= sender.td_maxend
+   II.  Lower bound for valid data:	seq + len >= sender.td_end - receiver.td_maxwin
   III.	Upper bound for valid ack:      sack <= receiver.td_end
   IV.	Lower bound for valid ack:	ack >= receiver.td_end - MAXACKWINDOW
   	
@@ -373,7 +378,7 @@ static inline __u32 segment_seq_plus_len(__u32 seq,
 					 size_t len,
 					 struct iphdr *iph,
 					 struct tcphdr *tcph)
-  {
+{
 	return (seq + len - (iph->ihl + tcph->doff)*4
 		+ (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
 }
@@ -444,22 +449,33 @@ static void tcp_options(const struct sk_buff *skb,
 	}
 }

-static void tcp_sack(struct tcphdr *tcph, __u32 *sack)
+static void tcp_sack(const struct sk_buff *skb,
+		     struct iphdr *iph,
+		     struct tcphdr *tcph,
+		     __u32 *sack)
 {
-	__u32 tmp;
+	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
 	unsigned char *ptr;
 	int length = (tcph->doff*4) - sizeof(struct tcphdr);
-	
+	__u32 tmp;
+
+	if (!length)
+		return;
+
+	ptr = skb_header_pointer(skb,
+				 (iph->ihl * 4) + sizeof(struct tcphdr),
+				 length, buff);
+	BUG_ON(ptr == NULL);
+
 	/* Fast path for timestamp-only option */
 	if (length == TCPOLEN_TSTAMP_ALIGNED*4
-	    && *(__u32 *)(tcph + 1) ==
+	    && *(__u32 *)ptr ==
 	        __constant_ntohl((TCPOPT_NOP << 24) 
 	        		 | (TCPOPT_NOP << 16)
 	        		 | (TCPOPT_TIMESTAMP << 8)
 	        		 | TCPOLEN_TIMESTAMP))
 		return;
 		
-	ptr = (unsigned char *)(tcph + 1);
 	while (length > 0) {
 		int opcode=*ptr++;
 		int opsize, i;
@@ -500,7 +516,7 @@ static void tcp_sack(struct tcphdr *tcph, __u32 *sack)

 static int tcp_in_window(struct ip_ct_tcp *state, 
                         enum ip_conntrack_dir dir,
-                         unsigned int *index,
+                         unsigned int index,
                         const struct sk_buff *skb,
                         struct iphdr *iph,
                         struct tcphdr *tcph)
@@ -519,7 +535,7 @@ static int tcp_in_window(struct ip_ct_tcp *state,
 	end = segment_seq_plus_len(seq, skb->len, iph, tcph);
 	
 	if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
-		tcp_sack(tcph, &sack);
+		tcp_sack(skb, iph, tcph, &sack);
 		
 	DEBUGP("tcp_in_window: START\n");
 	DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
@@ -598,20 +614,23 @@ static int tcp_in_window(struct ip_ct_tcp *state,
 		ack = sack = receiver->td_end;
 	}

-	if (seq == end)
+	if (seq == end
+	    && (!tcph->rst 
+	        || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
 		/*
 		 * Packets contains no data: we assume it is valid
 		 * and check the ack value only.
+		 * However RST segments are always validated by their
+		 * SEQ number, except when seq == 0 (reset sent answering
+		 * SYN.
 		 */
 		seq = end = sender->td_end;
 		
 	DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
-	       "seq=%u ack=%u sack =%u win=%u end=%u trim=%u\n",
+	       "seq=%u ack=%u sack =%u win=%u end=%u\n",
 		NIPQUAD(iph->saddr), ntohs(tcph->source),
 		NIPQUAD(iph->daddr), ntohs(tcph->dest),
-		seq, ack, sack, win, end, 
-		after(end, sender->td_maxend) && before(seq, sender->td_maxend)
-		? sender->td_maxend : end);
+		seq, ack, sack, win, end);
 	DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
 	       "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
 		sender->td_end, sender->td_maxend, sender->td_maxwin,
@@ -619,24 +638,15 @@ static int tcp_in_window(struct ip_ct_tcp *state,
 		receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
 		receiver->td_scale);
 	
-	/* Ignore data over the right edge of the receiver's window. */
-	if (after(end, sender->td_maxend) &&
-	    before(seq, sender->td_maxend)) {
-		end = sender->td_maxend;
-		if (*index == TCP_FIN_SET)
-			*index = TCP_ACK_SET;
-	}
 	DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
-		before(end, sender->td_maxend + 1) 
-		    || before(seq, sender->td_maxend + 1),
-	    	after(seq, sender->td_end - receiver->td_maxwin - 1) 
-	    	    || after(end, sender->td_end - receiver->td_maxwin - 1),
+		before(seq, sender->td_maxend + 1),
+	    	after(end, sender->td_end - receiver->td_maxwin - 1),
 	    	before(sack, receiver->td_end + 1),
 	    	after(ack, receiver->td_end - MAXACKWINDOW(sender)));
 	
 	if (sender->loose || receiver->loose ||
-	    (before(end, sender->td_maxend + 1) &&
-	     after(seq, sender->td_end - receiver->td_maxwin - 1) &&
+	    (before(seq, sender->td_maxend + 1) &&
+	     after(end, sender->td_end - receiver->td_maxwin - 1) &&
 	     before(sack, receiver->td_end + 1) &&
 	     after(ack, receiver->td_end - MAXACKWINDOW(sender)))) {
 	    	/*
@@ -653,6 +663,11 @@ static int tcp_in_window(struct ip_ct_tcp *state,
 			sender->td_maxwin = swin;
 		if (after(end, sender->td_end))
 			sender->td_end = end;
+		/*
+		 * Update receiver data.
+		 */
+		if (after(end, sender->td_maxend))
+			receiver->td_maxwin += end - sender->td_maxend;
 		if (after(sack + win, receiver->td_maxend - 1)) {
 			receiver->td_maxend = sack + win;
 			if (win == 0)
@@ -662,7 +677,7 @@ static int tcp_in_window(struct ip_ct_tcp *state,
 		/* 
 		 * Check retransmissions.
 		 */
-		if (*index == TCP_ACK_SET) {
+		if (index == TCP_ACK_SET) {
 			if (state->last_dir == dir
 			    && state->last_seq == seq
 			    && state->last_ack == ack
@@ -687,16 +702,16 @@ static int tcp_in_window(struct ip_ct_tcp *state,
 		if (LOG_INVALID(IPPROTO_TCP))
 			nf_log_packet(PF_INET, 0, skb, NULL, NULL,
 			"ip_ct_tcp: %s ",
-			before(end, sender->td_maxend + 1) ?
-			after(seq, sender->td_end - receiver->td_maxwin - 1) ?
+			before(seq, sender->td_maxend + 1) ?
+			after(end, sender->td_end - receiver->td_maxwin - 1) ?
 			before(sack, receiver->td_end + 1) ?
 			after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
-			: "ACK is under the lower bound (possibly overly delayed ACK)"
-			: "ACK is over the upper bound (ACKed data has never seen yet)"
-			: "SEQ is under the lower bound (retransmitted already ACKed data)"
+			: "ACK is under the lower bound (possible overly delayed ACK)"
+			: "ACK is over the upper bound (ACKed data not seen yet)"
+			: "SEQ is under the lower bound (already ACKed data retransmitted)"
 			: "SEQ is over the upper bound (over the window of the receiver)");

-		res = ip_ct_tcp_be_liberal && !tcph->rst;
+		res = ip_ct_tcp_be_liberal;
  	}
  
 	DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
@@ -849,13 +864,12 @@ static int tcp_packet(struct ip_conntrack *conntrack,
 	switch (new_state) {
 	case TCP_CONNTRACK_IGNORE:
 		/* Either SYN in ORIGINAL
-		 * or SYN/ACK in REPLY
-		 * or ACK in REPLY direction (half-open connection). */
+		 * or SYN/ACK in REPLY. */
 		if (index == TCP_SYNACK_SET
 		    && conntrack->proto.tcp.last_index == TCP_SYN_SET
 		    && conntrack->proto.tcp.last_dir != dir
-		    && after(ntohl(th->ack_seq),
-		    	     conntrack->proto.tcp.last_seq)) {
+		    && ntohl(th->ack_seq) ==
+		    	     conntrack->proto.tcp.last_end) {
 			/* This SYN/ACK acknowledges a SYN that we earlier 
 			 * ignored as invalid. This means that the client and
 			 * the server are both in sync, while the firewall is
@@ -875,6 +889,8 @@ static int tcp_packet(struct ip_conntrack *conntrack,
 		conntrack->proto.tcp.last_index = index;
 		conntrack->proto.tcp.last_dir = dir;
 		conntrack->proto.tcp.last_seq = ntohl(th->seq);
+		conntrack->proto.tcp.last_end = 
+		    segment_seq_plus_len(ntohl(th->seq), skb->len, iph, th);
 		
 		WRITE_UNLOCK(&tcp_lock);
 		if (LOG_INVALID(IPPROTO_TCP))
@@ -892,7 +908,12 @@ static int tcp_packet(struct ip_conntrack *conntrack,
 				  "ip_ct_tcp: invalid state ");
 		return -NF_ACCEPT;
 	case TCP_CONNTRACK_SYN_SENT:
-		if (old_state >= TCP_CONNTRACK_TIME_WAIT) {	
+		if (old_state < TCP_CONNTRACK_TIME_WAIT)
+			break;
+		if ((conntrack->proto.tcp.seen[dir].flags &
+		         IP_CT_TCP_FLAG_CLOSE_INIT)
+		    || after(ntohl(th->seq),
+		    	     conntrack->proto.tcp.seen[dir].td_end)) {	
 		    	/* Attempt to reopen a closed connection.
 		    	* Delete this connection and look up again. */
 		    	WRITE_UNLOCK(&tcp_lock);
@@ -900,23 +921,23 @@ static int tcp_packet(struct ip_conntrack *conntrack,
 		    		conntrack->timeout.function((unsigned long)
 		    					    conntrack);
 		    	return -NF_REPEAT;
+		} else {
+			WRITE_UNLOCK(&tcp_lock);
+			if (LOG_INVALID(IPPROTO_TCP))
+				nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+				              "ip_ct_tcp: invalid SYN");
+			return -NF_ACCEPT;
 		}
-		break;
 	case TCP_CONNTRACK_CLOSE:
 		if (index == TCP_RST_SET
-		    && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
-		         && conntrack->proto.tcp.last_index <= TCP_SYNACK_SET)
-		        || (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
-			 && conntrack->proto.tcp.last_index == TCP_ACK_SET))
-		    && after(ntohl(th->ack_seq),
-		    	     conntrack->proto.tcp.last_seq)) {
-			/* Ignore RST closing down invalid SYN or ACK
-			   we had let trough. */ 
-		    	WRITE_UNLOCK(&tcp_lock);
-			if (LOG_INVALID(IPPROTO_TCP))
-				nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
-					  "ip_ct_tcp: invalid RST (ignored) ");
-			return NF_ACCEPT;
+		    && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
+		    && conntrack->proto.tcp.last_index == TCP_SYN_SET
+		    && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
+			/* RST sent to invalid SYN we had let trough
+			 * SYN was in window then, tear down connection.
+			 * We skip window checking, because packet might ACK
+			 * segments we ignored in the SYN. */
+			goto in_window;
 		}
 		/* Just fall trough */
 	default:
@@ -924,16 +945,14 @@ static int tcp_packet(struct ip_conntrack *conntrack,
 		break;
 	}

-	if (!tcp_in_window(&conntrack->proto.tcp, dir, &index, 
+	if (!tcp_in_window(&conntrack->proto.tcp, dir, index, 
 			   skb, iph, th)) {
 		WRITE_UNLOCK(&tcp_lock);
 		return -NF_ACCEPT;
 	}
-	/* From now on we have got in-window packets */
-	
-	/* If FIN was trimmed off, we don't change state. */
+    in_window:
+	/* From now on we have got in-window packets */	
 	conntrack->proto.tcp.last_index = index;
-	new_state = tcp_conntracks[dir][index][old_state];

 	DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
 	       "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
@@ -944,6 +963,10 @@ static int tcp_packet(struct ip_conntrack *conntrack,
 		old_state, new_state);

 	conntrack->proto.tcp.state = new_state;
+	if (old_state != new_state 
+	    && (new_state == TCP_CONNTRACK_FIN_WAIT
+	    	|| new_state == TCP_CONNTRACK_CLOSE))
+		conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
 	timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
 		  && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
 		  ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
@@ -974,7 +997,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
 	return NF_ACCEPT;
 }
 
-  /* Called when a new connection for this protocol found. */
+/* Called when a new connection for this protocol found. */
 static int tcp_new(struct ip_conntrack *conntrack,
 		   const struct sk_buff *skb)
 {

--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -77,34 +77,70 @@ seq_print_counters(struct seq_file *s,
 #define seq_print_counters(x, y)	0
 #endif

-static void *ct_seq_start(struct seq_file *s, loff_t *pos)
+struct ct_iter_state {
+	unsigned int bucket;
+};
+
+static struct list_head *ct_get_first(struct seq_file *seq)
 {
-	if (*pos >= ip_conntrack_htable_size)
-		return NULL;
-	return &ip_conntrack_hash[*pos];
+	struct ct_iter_state *st = seq->private;
+
+	for (st->bucket = 0;
+	     st->bucket < ip_conntrack_htable_size;
+	     st->bucket++) {
+		if (!list_empty(&ip_conntrack_hash[st->bucket]))
+			return ip_conntrack_hash[st->bucket].next;
+	}
+	return NULL;
 }
-  
-static void ct_seq_stop(struct seq_file *s, void *v)
+
+static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head)
+{
+	struct ct_iter_state *st = seq->private;
+
+	head = head->next;
+	while (head == &ip_conntrack_hash[st->bucket]) {
+		if (++st->bucket >= ip_conntrack_htable_size)
+			return NULL;
+		head = ip_conntrack_hash[st->bucket].next;
+	}
+	return head;
+}
+
+static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos)
 {
+	struct list_head *head = ct_get_first(seq);
+
+	if (head)
+		while (pos && (head = ct_get_next(seq, head)))
+			pos--;
+	return pos ? NULL : head;
+}
+
+static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	READ_LOCK(&ip_conntrack_lock);
+	return ct_get_idx(seq, *pos);
 }

 static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
 {
 	(*pos)++;
-	if (*pos >= ip_conntrack_htable_size)
-		return NULL;
-	return &ip_conntrack_hash[*pos];
+	return ct_get_next(s, v);
 }
  
-/* return 0 on success, 1 in case of error */
-static int ct_seq_real_show(const struct ip_conntrack_tuple_hash *hash,
-			    struct seq_file *s)
+static void ct_seq_stop(struct seq_file *s, void *v)
+{
+	READ_UNLOCK(&ip_conntrack_lock);
+}
+ 
+static int ct_seq_show(struct seq_file *s, void *v)
 {
+	const struct ip_conntrack_tuple_hash *hash = v;
 	const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash);
 	struct ip_conntrack_protocol *proto;

 	MUST_BE_READ_LOCKED(&ip_conntrack_lock);
-
 	IP_NF_ASSERT(conntrack);

 	/* we only want to print DIR_ORIGINAL */
@@ -115,63 +151,50 @@ static int ct_seq_real_show(const struct ip_conntrack_tuple_hash *hash,
 			       .tuple.dst.protonum);
 	IP_NF_ASSERT(proto);

-	if (seq_printf(s, "%-8s %u %lu ",
+	if (seq_printf(s, "%-8s %u %ld ",
 		      proto->name,
 		      conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum,
 		      timer_pending(&conntrack->timeout)
-		      ? (conntrack->timeout.expires - jiffies)/HZ : 0) != 0)
-		return 1;
+		      ? (long)(conntrack->timeout.expires - jiffies)/HZ
+		      : 0) != 0)
+		return -ENOSPC;

 	if (proto->print_conntrack(s, conntrack))
-		return 1;
+		return -ENOSPC;
  
 	if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
 			proto))
-		return 1;
+		return -ENOSPC;

 	if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL]))
-		return 1;
+		return -ENOSPC;

 	if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
 		if (seq_printf(s, "[UNREPLIED] "))
-			return 1;
+			return -ENOSPC;

 	if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
 			proto))
-		return 1;
+		return -ENOSPC;

 	if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY]))
-		return 1;
+		return -ENOSPC;

 	if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
 		if (seq_printf(s, "[ASSURED] "))
-			return 1;
+			return -ENOSPC;

 #if defined(CONFIG_IP_NF_CONNTRACK_MARK)
-	if (seq_printf(s, "mark=%ld ", conntrack->mark))
-		return 1;
+	if (seq_printf(s, "mark=%lu ", conntrack->mark))
+		return -ENOSPC;
 #endif

 	if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use)))
-		return 1;
+		return -ENOSPC;

 	return 0;
 }

-static int ct_seq_show(struct seq_file *s, void *v)
-{
-	struct list_head *list = v;
-	int ret = 0;
-
-	/* FIXME: Simply truncates if hash chain too long. */
-	READ_LOCK(&ip_conntrack_lock);
-	if (LIST_FIND(list, ct_seq_real_show,
-		      struct ip_conntrack_tuple_hash *, s))
-		ret = -ENOSPC;
-	READ_UNLOCK(&ip_conntrack_lock);
-	return ret;
-}
-	
 static struct seq_operations ct_seq_ops = {
 	.start = ct_seq_start,
 	.next  = ct_seq_next,
@@ -181,7 +204,23 @@ static struct seq_operations ct_seq_ops = {
  
 static int ct_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &ct_seq_ops);
+	struct seq_file *seq;
+	struct ct_iter_state *st;
+	int ret;
+
+	st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL);
+	if (st == NULL)
+		return -ENOMEM;
+	ret = seq_open(file, &ct_seq_ops);
+	if (ret)
+		goto out_free;
+	seq          = file->private_data;
+	seq->private = st;
+	memset(st, 0, sizeof(struct ct_iter_state));
+	return ret;
+out_free:
+	kfree(st);
+	return ret;
 }

 static struct file_operations ct_file_ops = {
@@ -189,7 +228,7 @@ static struct file_operations ct_file_ops = {
 	.open    = ct_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
-	.release = seq_release
+	.release = seq_release_private,
 };
  
 /* expects */
@@ -235,8 +274,8 @@ static int exp_seq_show(struct seq_file *s, void *v)
 	struct ip_conntrack_expect *expect = v;

 	if (expect->timeout.function)
-		seq_printf(s, "%lu ", timer_pending(&expect->timeout)
-			   ? (expect->timeout.expires - jiffies)/HZ : 0);
+		seq_printf(s, "%ld ", timer_pending(&expect->timeout)
+			   ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
 	else
 		seq_printf(s, "- ");


--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -14,6 +14,9 @@
 *             Zander).
 * 2000-08-01: Added Nick Williams' MAC support.
 * 2002-06-25: Code cleanup.
+ * 2005-01-10: Added /proc counter for dropped packets; fixed so
+ *             packets aren't delivered to user space if they're going 
+ *             to be dropped. 
 *
 */
 #include <linux/module.h>
@@ -59,6 +62,8 @@ static DEFINE_RWLOCK(queue_lock);
 static int peer_pid;
 static unsigned int copy_range;
 static unsigned int queue_total;
+static unsigned int queue_dropped = 0;
+static unsigned int queue_user_dropped = 0;
 static struct sock *ipqnl;
 static LIST_HEAD(queue_list);
 static DECLARE_MUTEX(ipqnl_sem);
@@ -70,18 +75,11 @@ ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
 	kfree(entry);
 }

-static inline int
+static inline void
 __ipq_enqueue_entry(struct ipq_queue_entry *entry)
 {
-       if (queue_total >= queue_maxlen) {
-               if (net_ratelimit()) 
-                       printk(KERN_WARNING "ip_queue: full at %d entries, "
-                              "dropping packet(s).\n", queue_total);
-               return -ENOSPC;
-       }
       list_add(&entry->list, &queue_list);
       queue_total++;
-       return 0;
 }

 /*
@@ -308,14 +306,24 @@ ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data)
 	if (!peer_pid)
 		goto err_out_free_nskb; 

+	if (queue_total >= queue_maxlen) {
+                queue_dropped++;
+		status = -ENOSPC;
+		if (net_ratelimit())
+		          printk (KERN_WARNING "ip_queue: full at %d entries, "
+				  "dropping packets(s). Dropped: %d\n", queue_total,
+				  queue_dropped);
+		goto err_out_free_nskb;
+	}
+
 	/* netlink_unicast will either free the nskb or attach it to a socket */ 
 	status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
-	if (status < 0)
-		goto err_out_unlock;
-	
-	status = __ipq_enqueue_entry(entry);
-	if (status < 0)
+	if (status < 0) {
+	        queue_user_dropped++;
 		goto err_out_unlock;
+	}
+
+	__ipq_enqueue_entry(entry);

 	write_unlock_bh(&queue_lock);
 	return status;
@@ -637,12 +645,16 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length)
 	              "Copy mode         : %hu\n"
 	              "Copy range        : %u\n"
 	              "Queue length      : %u\n"
-	              "Queue max. length : %u\n",
+	              "Queue max. length : %u\n"
+		      "Queue dropped     : %u\n"
+		      "Netlink dropped   : %u\n",
 	              peer_pid,
 	              copy_mode,
 	              copy_range,
 	              queue_total,
-	              queue_maxlen);
+	              queue_maxlen,
+		      queue_dropped,
+		      queue_user_dropped);

 	read_unlock_bh(&queue_lock);
 	

--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -923,7 +923,7 @@ translate_table(const char *name,
 	}

 	/* And one copy for every other CPU */
-	for (i = 1; i < NR_CPUS; i++) {
+	for (i = 1; i < num_possible_cpus(); i++) {
 		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
 		       newinfo->entries,
 		       SMP_ALIGN(newinfo->size));
@@ -945,7 +945,7 @@ replace_table(struct ipt_table *table,
 		struct ipt_entry *table_base;
 		unsigned int i;

-		for (i = 0; i < NR_CPUS; i++) {
+		for (i = 0; i < num_possible_cpus(); i++) {
 			table_base =
 				(void *)newinfo->entries
 				+ TABLE_OFFSET(newinfo, i);
@@ -992,7 +992,7 @@ get_counters(const struct ipt_table_info *t,
 	unsigned int cpu;
 	unsigned int i;

-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
 		i = 0;
 		IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
 				  t->size,
@@ -1130,7 +1130,7 @@ do_replace(void __user *user, unsigned int len)
 		return -ENOMEM;

 	newinfo = vmalloc(sizeof(struct ipt_table_info)
-			  + SMP_ALIGN(tmp.size) * NR_CPUS);
+			  + SMP_ALIGN(tmp.size) * num_possible_cpus());
 	if (!newinfo)
 		return -ENOMEM;

@@ -1460,7 +1460,7 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
 		= { 0, 0, 0, { 0 }, { 0 }, { } };

 	newinfo = vmalloc(sizeof(struct ipt_table_info)
-			  + SMP_ALIGN(repl->size) * NR_CPUS);
+			  + SMP_ALIGN(repl->size) * num_possible_cpus());
 	if (!newinfo)
 		return -ENOMEM;


--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -198,16 +198,16 @@ static void dump_packet(const struct ipt_log_info *info,
 		static size_t required_len[NR_ICMP_TYPES+1]
 			= { [ICMP_ECHOREPLY] = 4,
 			    [ICMP_DEST_UNREACH]
-			    = 8 + sizeof(struct iphdr) + 8,
+			    = 8 + sizeof(struct iphdr),
 			    [ICMP_SOURCE_QUENCH]
-			    = 8 + sizeof(struct iphdr) + 8,
+			    = 8 + sizeof(struct iphdr),
 			    [ICMP_REDIRECT]
-			    = 8 + sizeof(struct iphdr) + 8,
+			    = 8 + sizeof(struct iphdr),
 			    [ICMP_ECHO] = 4,
 			    [ICMP_TIME_EXCEEDED]
-			    = 8 + sizeof(struct iphdr) + 8,
+			    = 8 + sizeof(struct iphdr),
 			    [ICMP_PARAMETERPROB]
-			    = 8 + sizeof(struct iphdr) + 8,
+			    = 8 + sizeof(struct iphdr),
 			    [ICMP_TIMESTAMP] = 20,
 			    [ICMP_TIMESTAMPREPLY] = 20,
 			    [ICMP_ADDRESS] = 12,

--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -252,10 +252,6 @@ static void send_unreach(struct sk_buff *skb_in, int code)
 	if (iph->frag_off&htons(IP_OFFSET))
 		return;

-	/* Ensure we have at least 8 bytes of proto header. */
-	if (skb_in->len < skb_in->nh.iph->ihl*4 + 8)
-		return;
-
 	/* If we send an ICMP error to an ICMP error a mess would result.. */
 	if (iph->protocol == IPPROTO_ICMP) {
 		struct icmphdr ihdr;

--- a/net/ipv4/netfilter/ipt_hashlimit.c
+++ b/net/ipv4/netfilter/ipt_hashlimit.c
@@ -625,7 +625,7 @@ static inline int dl_seq_real_show(struct dsthash_ent *ent, struct seq_file *s)
 	rateinfo_recalc(ent, jiffies);

 	return seq_printf(s, "%ld %u.%u.%u.%u:%u->%u.%u.%u.%u:%u %u %u %u\n",
-			(ent->expires - jiffies)/HZ,
+			(long)(ent->expires - jiffies)/HZ,
 			NIPQUAD(ent->dst.src_ip), ntohs(ent->dst.src_port),
 			NIPQUAD(ent->dst.dst_ip), ntohs(ent->dst.dst_port),
 			ent->rateinfo.credit, ent->rateinfo.credit_cap,

--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -20,6 +20,9 @@
 *             Few changes needed, mainly the hard_routing code and
 *             the netlink socket protocol (we're NETLINK_IP6_FW).
 * 2002-06-25: Code cleanup. [JM: ported cleanup over from ip_queue.c]
+ * 2005-02-04: Added /proc counter for dropped packets; fixed so
+ *             packets aren't delivered to user space if they're going
+ *             to be dropped.
 */
 #include <linux/module.h>
 #include <linux/skbuff.h>
@@ -64,6 +67,8 @@ static DEFINE_RWLOCK(queue_lock);
 static int peer_pid;
 static unsigned int copy_range;
 static unsigned int queue_total;
+static unsigned int queue_dropped = 0;
+static unsigned int queue_user_dropped = 0;
 static struct sock *ipqnl;
 static LIST_HEAD(queue_list);
 static DECLARE_MUTEX(ipqnl_sem);
@@ -75,18 +80,11 @@ ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
 	kfree(entry);
 }

-static inline int
+static inline void
 __ipq_enqueue_entry(struct ipq_queue_entry *entry)
 {
-       if (queue_total >= queue_maxlen) {
-               if (net_ratelimit()) 
-                       printk(KERN_WARNING "ip6_queue: full at %d entries, "
-                              "dropping packet(s).\n", queue_total);
-               return -ENOSPC;
-       }
       list_add(&entry->list, &queue_list);
       queue_total++;
-       return 0;
 }

 /*
@@ -312,14 +310,24 @@ ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data)
 	if (!peer_pid)
 		goto err_out_free_nskb; 

+	if (queue_total >= queue_maxlen) {
+                queue_dropped++;
+		status = -ENOSPC;
+		if (net_ratelimit())
+		        printk (KERN_WARNING "ip6_queue: fill at %d entries, "
+				"dropping packet(s).  Dropped: %d\n", queue_total,
+				queue_dropped);
+		goto err_out_free_nskb;
+	}
+
 	/* netlink_unicast will either free the nskb or attach it to a socket */ 
 	status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
-	if (status < 0)
+	if (status < 0) {
+ 	        queue_user_dropped++;
 		goto err_out_unlock;
+	}
 	
-	status = __ipq_enqueue_entry(entry);
-	if (status < 0)
-		goto err_out_unlock;
+	__ipq_enqueue_entry(entry);

 	write_unlock_bh(&queue_lock);
 	return status;
@@ -639,12 +647,16 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length)
 	              "Copy mode         : %hu\n"
 	              "Copy range        : %u\n"
 	              "Queue length      : %u\n"
-	              "Queue max. length : %u\n",
+	              "Queue max. length : %u\n"
+		      "Queue dropped     : %u\n"
+		      "Netfilter dropped : %u\n",
 	              peer_pid,
 	              copy_mode,
 	              copy_range,
 	              queue_total,
-	              queue_maxlen);
+	              queue_maxlen,
+		      queue_dropped,
+		      queue_user_dropped);

 	read_unlock_bh(&queue_lock);
 	

--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -952,7 +952,7 @@ translate_table(const char *name,
 	}

 	/* And one copy for every other CPU */
-	for (i = 1; i < NR_CPUS; i++) {
+	for (i = 1; i < num_possible_cpus(); i++) {
 		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
 		       newinfo->entries,
 		       SMP_ALIGN(newinfo->size));
@@ -974,7 +974,7 @@ replace_table(struct ip6t_table *table,
 		struct ip6t_entry *table_base;
 		unsigned int i;

-		for (i = 0; i < NR_CPUS; i++) {
+		for (i = 0; i < num_possible_cpus(); i++) {
 			table_base =
 				(void *)newinfo->entries
 				+ TABLE_OFFSET(newinfo, i);
@@ -1021,7 +1021,7 @@ get_counters(const struct ip6t_table_info *t,
 	unsigned int cpu;
 	unsigned int i;

-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
 		i = 0;
 		IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
 				  t->size,
@@ -1155,7 +1155,7 @@ do_replace(void __user *user, unsigned int len)
 		return -ENOMEM;

 	newinfo = vmalloc(sizeof(struct ip6t_table_info)
-			  + SMP_ALIGN(tmp.size) * NR_CPUS);
+			  + SMP_ALIGN(tmp.size) * num_possible_cpus());
 	if (!newinfo)
 		return -ENOMEM;

@@ -1469,7 +1469,7 @@ int ip6t_register_table(struct ip6t_table *table,
 		= { 0, 0, 0, { 0 }, { 0 }, { } };

 	newinfo = vmalloc(sizeof(struct ip6t_table_info)
-			  + SMP_ALIGN(repl->size) * NR_CPUS);
+			  + SMP_ALIGN(repl->size) * num_possible_cpus());
 	if (!newinfo)
 		return -ENOMEM;