Commit 8de08f90 authored by David S. Miller's avatar David S. Miller

Merge bk://212.42.230.204:994/nf-2.6

into sunset.davemloft.net:/home/davem/src/BK/net-2.6
parents 0f634eb3 b20f3c6c
...@@ -23,13 +23,16 @@ enum tcp_conntrack { ...@@ -23,13 +23,16 @@ enum tcp_conntrack {
/* SACK is permitted by the sender */ /* SACK is permitted by the sender */
#define IP_CT_TCP_FLAG_SACK_PERM 0x02 #define IP_CT_TCP_FLAG_SACK_PERM 0x02
/* This sender sent FIN first */
#define IP_CT_TCP_FLAG_CLOSE_INIT 0x03
struct ip_ct_tcp_state { struct ip_ct_tcp_state {
u_int32_t td_end; /* max of seq + len */ u_int32_t td_end; /* max of seq + len */
u_int32_t td_maxend; /* max of ack + max(win, 1) */ u_int32_t td_maxend; /* max of ack + max(win, 1) */
u_int32_t td_maxwin; /* max(win) */ u_int32_t td_maxwin; /* max(win) */
u_int8_t td_scale; /* window scale factor */ u_int8_t td_scale; /* window scale factor */
u_int8_t loose; /* used when connection picked up from the middle */ u_int8_t loose; /* used when connection picked up from the middle */
u_int8_t flags; /* per direction state flags */ u_int8_t flags; /* per direction options */
}; };
struct ip_ct_tcp struct ip_ct_tcp
......
...@@ -822,10 +822,10 @@ static int translate_table(struct ebt_replace *repl, ...@@ -822,10 +822,10 @@ static int translate_table(struct ebt_replace *repl,
/* this will get free'd in do_replace()/ebt_register_table() /* this will get free'd in do_replace()/ebt_register_table()
if an error occurs */ if an error occurs */
newinfo->chainstack = (struct ebt_chainstack **) newinfo->chainstack = (struct ebt_chainstack **)
vmalloc(NR_CPUS * sizeof(struct ebt_chainstack)); vmalloc(num_possible_cpus() * sizeof(struct ebt_chainstack));
if (!newinfo->chainstack) if (!newinfo->chainstack)
return -ENOMEM; return -ENOMEM;
for (i = 0; i < NR_CPUS; i++) { for (i = 0; i < num_possible_cpus(); i++) {
newinfo->chainstack[i] = newinfo->chainstack[i] =
vmalloc(udc_cnt * sizeof(struct ebt_chainstack)); vmalloc(udc_cnt * sizeof(struct ebt_chainstack));
if (!newinfo->chainstack[i]) { if (!newinfo->chainstack[i]) {
...@@ -898,7 +898,7 @@ static void get_counters(struct ebt_counter *oldcounters, ...@@ -898,7 +898,7 @@ static void get_counters(struct ebt_counter *oldcounters,
memcpy(counters, oldcounters, memcpy(counters, oldcounters,
sizeof(struct ebt_counter) * nentries); sizeof(struct ebt_counter) * nentries);
/* add other counters to those of cpu 0 */ /* add other counters to those of cpu 0 */
for (cpu = 1; cpu < NR_CPUS; cpu++) { for (cpu = 1; cpu < num_possible_cpus(); cpu++) {
counter_base = COUNTER_BASE(oldcounters, nentries, cpu); counter_base = COUNTER_BASE(oldcounters, nentries, cpu);
for (i = 0; i < nentries; i++) { for (i = 0; i < nentries; i++) {
counters[i].pcnt += counter_base[i].pcnt; counters[i].pcnt += counter_base[i].pcnt;
...@@ -930,7 +930,7 @@ static int do_replace(void __user *user, unsigned int len) ...@@ -930,7 +930,7 @@ static int do_replace(void __user *user, unsigned int len)
BUGPRINT("Entries_size never zero\n"); BUGPRINT("Entries_size never zero\n");
return -EINVAL; return -EINVAL;
} }
countersize = COUNTER_OFFSET(tmp.nentries) * NR_CPUS; countersize = COUNTER_OFFSET(tmp.nentries) * num_possible_cpus();
newinfo = (struct ebt_table_info *) newinfo = (struct ebt_table_info *)
vmalloc(sizeof(struct ebt_table_info) + countersize); vmalloc(sizeof(struct ebt_table_info) + countersize);
if (!newinfo) if (!newinfo)
...@@ -1023,7 +1023,7 @@ static int do_replace(void __user *user, unsigned int len) ...@@ -1023,7 +1023,7 @@ static int do_replace(void __user *user, unsigned int len)
vfree(table->entries); vfree(table->entries);
if (table->chainstack) { if (table->chainstack) {
for (i = 0; i < NR_CPUS; i++) for (i = 0; i < num_possible_cpus(); i++)
vfree(table->chainstack[i]); vfree(table->chainstack[i]);
vfree(table->chainstack); vfree(table->chainstack);
} }
...@@ -1043,7 +1043,7 @@ static int do_replace(void __user *user, unsigned int len) ...@@ -1043,7 +1043,7 @@ static int do_replace(void __user *user, unsigned int len)
vfree(counterstmp); vfree(counterstmp);
/* can be initialized in translate_table() */ /* can be initialized in translate_table() */
if (newinfo->chainstack) { if (newinfo->chainstack) {
for (i = 0; i < NR_CPUS; i++) for (i = 0; i < num_possible_cpus(); i++)
vfree(newinfo->chainstack[i]); vfree(newinfo->chainstack[i]);
vfree(newinfo->chainstack); vfree(newinfo->chainstack);
} }
...@@ -1137,7 +1137,7 @@ int ebt_register_table(struct ebt_table *table) ...@@ -1137,7 +1137,7 @@ int ebt_register_table(struct ebt_table *table)
return -EINVAL; return -EINVAL;
} }
countersize = COUNTER_OFFSET(table->table->nentries) * NR_CPUS; countersize = COUNTER_OFFSET(table->table->nentries) * num_possible_cpus();
newinfo = (struct ebt_table_info *) newinfo = (struct ebt_table_info *)
vmalloc(sizeof(struct ebt_table_info) + countersize); vmalloc(sizeof(struct ebt_table_info) + countersize);
ret = -ENOMEM; ret = -ENOMEM;
...@@ -1191,7 +1191,7 @@ int ebt_register_table(struct ebt_table *table) ...@@ -1191,7 +1191,7 @@ int ebt_register_table(struct ebt_table *table)
up(&ebt_mutex); up(&ebt_mutex);
free_chainstack: free_chainstack:
if (newinfo->chainstack) { if (newinfo->chainstack) {
for (i = 0; i < NR_CPUS; i++) for (i = 0; i < num_possible_cpus(); i++)
vfree(newinfo->chainstack[i]); vfree(newinfo->chainstack[i]);
vfree(newinfo->chainstack); vfree(newinfo->chainstack);
} }
...@@ -1215,7 +1215,7 @@ void ebt_unregister_table(struct ebt_table *table) ...@@ -1215,7 +1215,7 @@ void ebt_unregister_table(struct ebt_table *table)
if (table->private->entries) if (table->private->entries)
vfree(table->private->entries); vfree(table->private->entries);
if (table->private->chainstack) { if (table->private->chainstack) {
for (i = 0; i < NR_CPUS; i++) for (i = 0; i < num_possible_cpus(); i++)
vfree(table->private->chainstack[i]); vfree(table->private->chainstack[i]);
vfree(table->private->chainstack); vfree(table->private->chainstack);
} }
......
...@@ -717,7 +717,7 @@ static int translate_table(const char *name, ...@@ -717,7 +717,7 @@ static int translate_table(const char *name,
} }
/* And one copy for every other CPU */ /* And one copy for every other CPU */
for (i = 1; i < NR_CPUS; i++) { for (i = 1; i < num_possible_cpus(); i++) {
memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
newinfo->entries, newinfo->entries,
SMP_ALIGN(newinfo->size)); SMP_ALIGN(newinfo->size));
...@@ -768,7 +768,7 @@ static void get_counters(const struct arpt_table_info *t, ...@@ -768,7 +768,7 @@ static void get_counters(const struct arpt_table_info *t,
unsigned int cpu; unsigned int cpu;
unsigned int i; unsigned int i;
for (cpu = 0; cpu < NR_CPUS; cpu++) { for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
i = 0; i = 0;
ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
t->size, t->size,
...@@ -886,7 +886,7 @@ static int do_replace(void __user *user, unsigned int len) ...@@ -886,7 +886,7 @@ static int do_replace(void __user *user, unsigned int len)
return -ENOMEM; return -ENOMEM;
newinfo = vmalloc(sizeof(struct arpt_table_info) newinfo = vmalloc(sizeof(struct arpt_table_info)
+ SMP_ALIGN(tmp.size) * NR_CPUS); + SMP_ALIGN(tmp.size) * num_possible_cpus());
if (!newinfo) if (!newinfo)
return -ENOMEM; return -ENOMEM;
...@@ -1159,7 +1159,7 @@ int arpt_register_table(struct arpt_table *table, ...@@ -1159,7 +1159,7 @@ int arpt_register_table(struct arpt_table *table,
= { 0, 0, 0, { 0 }, { 0 }, { } }; = { 0, 0, 0, { 0 }, { 0 }, { } };
newinfo = vmalloc(sizeof(struct arpt_table_info) newinfo = vmalloc(sizeof(struct arpt_table_info)
+ SMP_ALIGN(repl->size) * NR_CPUS); + SMP_ALIGN(repl->size) * num_possible_cpus());
if (!newinfo) { if (!newinfo) {
ret = -ENOMEM; ret = -ENOMEM;
return ret; return ret;
......
...@@ -400,8 +400,8 @@ static int sctp_packet(struct ip_conntrack *conntrack, ...@@ -400,8 +400,8 @@ static int sctp_packet(struct ip_conntrack *conntrack,
return -1; return -1;
} }
DEBUGP("Setting vtag %x for dir %d\n", DEBUGP("Setting vtag %x for dir %d\n",
ih->init_tag, CTINFO2DIR(ctinfo)); ih->init_tag, !CTINFO2DIR(ctinfo));
conntrack->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = ih->init_tag; conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag;
} }
conntrack->proto.sctp.state = newconntrack; conntrack->proto.sctp.state = newconntrack;
......
...@@ -254,7 +254,7 @@ static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { ...@@ -254,7 +254,7 @@ static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
* sSS -> sSR Standard open. * sSS -> sSR Standard open.
* sSR -> sSR Retransmitted SYN/ACK. * sSR -> sSR Retransmitted SYN/ACK.
* sES -> sIG Late retransmitted SYN/ACK? * sES -> sIG Late retransmitted SYN/ACK?
* sFW -> sIG * sFW -> sIG Might be SYN/ACK answering ignored SYN
* sCW -> sIG * sCW -> sIG
* sLA -> sIG * sLA -> sIG
* sTW -> sIG * sTW -> sIG
...@@ -273,10 +273,10 @@ static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { ...@@ -273,10 +273,10 @@ static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
* sCL -> sCL * sCL -> sCL
*/ */
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
/*ack*/ { sIV, sIG, sIV, sES, sCW, sCW, sTW, sTW, sCL, sIV }, /*ack*/ { sIV, sIV, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
/* /*
* sSS -> sIG Might be a half-open connection. * sSS -> sIV Might be a half-open connection.
* sSR -> sIV Simultaneous open. * sSR -> sSR Might answer late resent SYN.
* sES -> sES :-) * sES -> sES :-)
* sFW -> sCW Normal close request answered by ACK. * sFW -> sCW Normal close request answered by ACK.
* sCW -> sCW * sCW -> sCW
...@@ -352,14 +352,19 @@ static unsigned int get_conntrack_index(const struct tcphdr *tcph) ...@@ -352,14 +352,19 @@ static unsigned int get_conntrack_index(const struct tcphdr *tcph)
http://www.nluug.nl/events/sane2000/papers.html http://www.nluug.nl/events/sane2000/papers.html
http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
The boundaries and the conditions are slightly changed: The boundaries and the conditions are changed according to RFC793:
the packet must intersect the window (i.e. segments may be
after the right or before the left edge) and thus receivers may ACK
segments after the right edge of the window.
td_maxend = max(sack + max(win,1)) seen in reply packets td_maxend = max(sack + max(win,1)) seen in reply packets
td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
td_maxwin += seq + len - sender.td_maxend
if seq + len > sender.td_maxend
td_end = max(seq + len) seen in sent packets td_end = max(seq + len) seen in sent packets
I. Upper bound for valid data: seq + len <= sender.td_maxend I. Upper bound for valid data: seq <= sender.td_maxend
II. Lower bound for valid data: seq >= sender.td_end - receiver.td_maxwin II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin
III. Upper bound for valid ack: sack <= receiver.td_end III. Upper bound for valid ack: sack <= receiver.td_end
IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW
...@@ -373,7 +378,7 @@ static inline __u32 segment_seq_plus_len(__u32 seq, ...@@ -373,7 +378,7 @@ static inline __u32 segment_seq_plus_len(__u32 seq,
size_t len, size_t len,
struct iphdr *iph, struct iphdr *iph,
struct tcphdr *tcph) struct tcphdr *tcph)
{ {
return (seq + len - (iph->ihl + tcph->doff)*4 return (seq + len - (iph->ihl + tcph->doff)*4
+ (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0)); + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
} }
...@@ -444,22 +449,33 @@ static void tcp_options(const struct sk_buff *skb, ...@@ -444,22 +449,33 @@ static void tcp_options(const struct sk_buff *skb,
} }
} }
static void tcp_sack(struct tcphdr *tcph, __u32 *sack) static void tcp_sack(const struct sk_buff *skb,
struct iphdr *iph,
struct tcphdr *tcph,
__u32 *sack)
{ {
__u32 tmp; unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
unsigned char *ptr; unsigned char *ptr;
int length = (tcph->doff*4) - sizeof(struct tcphdr); int length = (tcph->doff*4) - sizeof(struct tcphdr);
__u32 tmp;
if (!length)
return;
ptr = skb_header_pointer(skb,
(iph->ihl * 4) + sizeof(struct tcphdr),
length, buff);
BUG_ON(ptr == NULL);
/* Fast path for timestamp-only option */ /* Fast path for timestamp-only option */
if (length == TCPOLEN_TSTAMP_ALIGNED*4 if (length == TCPOLEN_TSTAMP_ALIGNED*4
&& *(__u32 *)(tcph + 1) == && *(__u32 *)ptr ==
__constant_ntohl((TCPOPT_NOP << 24) __constant_ntohl((TCPOPT_NOP << 24)
| (TCPOPT_NOP << 16) | (TCPOPT_NOP << 16)
| (TCPOPT_TIMESTAMP << 8) | (TCPOPT_TIMESTAMP << 8)
| TCPOLEN_TIMESTAMP)) | TCPOLEN_TIMESTAMP))
return; return;
ptr = (unsigned char *)(tcph + 1);
while (length > 0) { while (length > 0) {
int opcode=*ptr++; int opcode=*ptr++;
int opsize, i; int opsize, i;
...@@ -500,7 +516,7 @@ static void tcp_sack(struct tcphdr *tcph, __u32 *sack) ...@@ -500,7 +516,7 @@ static void tcp_sack(struct tcphdr *tcph, __u32 *sack)
static int tcp_in_window(struct ip_ct_tcp *state, static int tcp_in_window(struct ip_ct_tcp *state,
enum ip_conntrack_dir dir, enum ip_conntrack_dir dir,
unsigned int *index, unsigned int index,
const struct sk_buff *skb, const struct sk_buff *skb,
struct iphdr *iph, struct iphdr *iph,
struct tcphdr *tcph) struct tcphdr *tcph)
...@@ -519,7 +535,7 @@ static int tcp_in_window(struct ip_ct_tcp *state, ...@@ -519,7 +535,7 @@ static int tcp_in_window(struct ip_ct_tcp *state,
end = segment_seq_plus_len(seq, skb->len, iph, tcph); end = segment_seq_plus_len(seq, skb->len, iph, tcph);
if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
tcp_sack(tcph, &sack); tcp_sack(skb, iph, tcph, &sack);
DEBUGP("tcp_in_window: START\n"); DEBUGP("tcp_in_window: START\n");
DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
...@@ -598,20 +614,23 @@ static int tcp_in_window(struct ip_ct_tcp *state, ...@@ -598,20 +614,23 @@ static int tcp_in_window(struct ip_ct_tcp *state,
ack = sack = receiver->td_end; ack = sack = receiver->td_end;
} }
if (seq == end) if (seq == end
&& (!tcph->rst
|| (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
/* /*
* Packets contains no data: we assume it is valid * Packets contains no data: we assume it is valid
* and check the ack value only. * and check the ack value only.
* However RST segments are always validated by their
* SEQ number, except when seq == 0 (reset sent answering
* SYN.
*/ */
seq = end = sender->td_end; seq = end = sender->td_end;
DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
"seq=%u ack=%u sack =%u win=%u end=%u trim=%u\n", "seq=%u ack=%u sack =%u win=%u end=%u\n",
NIPQUAD(iph->saddr), ntohs(tcph->source), NIPQUAD(iph->saddr), ntohs(tcph->source),
NIPQUAD(iph->daddr), ntohs(tcph->dest), NIPQUAD(iph->daddr), ntohs(tcph->dest),
seq, ack, sack, win, end, seq, ack, sack, win, end);
after(end, sender->td_maxend) && before(seq, sender->td_maxend)
? sender->td_maxend : end);
DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
"receiver end=%u maxend=%u maxwin=%u scale=%i\n", "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
sender->td_end, sender->td_maxend, sender->td_maxwin, sender->td_end, sender->td_maxend, sender->td_maxwin,
...@@ -619,24 +638,15 @@ static int tcp_in_window(struct ip_ct_tcp *state, ...@@ -619,24 +638,15 @@ static int tcp_in_window(struct ip_ct_tcp *state,
receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
receiver->td_scale); receiver->td_scale);
/* Ignore data over the right edge of the receiver's window. */
if (after(end, sender->td_maxend) &&
before(seq, sender->td_maxend)) {
end = sender->td_maxend;
if (*index == TCP_FIN_SET)
*index = TCP_ACK_SET;
}
DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n", DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
before(end, sender->td_maxend + 1) before(seq, sender->td_maxend + 1),
|| before(seq, sender->td_maxend + 1), after(end, sender->td_end - receiver->td_maxwin - 1),
after(seq, sender->td_end - receiver->td_maxwin - 1)
|| after(end, sender->td_end - receiver->td_maxwin - 1),
before(sack, receiver->td_end + 1), before(sack, receiver->td_end + 1),
after(ack, receiver->td_end - MAXACKWINDOW(sender))); after(ack, receiver->td_end - MAXACKWINDOW(sender)));
if (sender->loose || receiver->loose || if (sender->loose || receiver->loose ||
(before(end, sender->td_maxend + 1) && (before(seq, sender->td_maxend + 1) &&
after(seq, sender->td_end - receiver->td_maxwin - 1) && after(end, sender->td_end - receiver->td_maxwin - 1) &&
before(sack, receiver->td_end + 1) && before(sack, receiver->td_end + 1) &&
after(ack, receiver->td_end - MAXACKWINDOW(sender)))) { after(ack, receiver->td_end - MAXACKWINDOW(sender)))) {
/* /*
...@@ -653,6 +663,11 @@ static int tcp_in_window(struct ip_ct_tcp *state, ...@@ -653,6 +663,11 @@ static int tcp_in_window(struct ip_ct_tcp *state,
sender->td_maxwin = swin; sender->td_maxwin = swin;
if (after(end, sender->td_end)) if (after(end, sender->td_end))
sender->td_end = end; sender->td_end = end;
/*
* Update receiver data.
*/
if (after(end, sender->td_maxend))
receiver->td_maxwin += end - sender->td_maxend;
if (after(sack + win, receiver->td_maxend - 1)) { if (after(sack + win, receiver->td_maxend - 1)) {
receiver->td_maxend = sack + win; receiver->td_maxend = sack + win;
if (win == 0) if (win == 0)
...@@ -662,7 +677,7 @@ static int tcp_in_window(struct ip_ct_tcp *state, ...@@ -662,7 +677,7 @@ static int tcp_in_window(struct ip_ct_tcp *state,
/* /*
* Check retransmissions. * Check retransmissions.
*/ */
if (*index == TCP_ACK_SET) { if (index == TCP_ACK_SET) {
if (state->last_dir == dir if (state->last_dir == dir
&& state->last_seq == seq && state->last_seq == seq
&& state->last_ack == ack && state->last_ack == ack
...@@ -687,16 +702,16 @@ static int tcp_in_window(struct ip_ct_tcp *state, ...@@ -687,16 +702,16 @@ static int tcp_in_window(struct ip_ct_tcp *state,
if (LOG_INVALID(IPPROTO_TCP)) if (LOG_INVALID(IPPROTO_TCP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL, nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_tcp: %s ", "ip_ct_tcp: %s ",
before(end, sender->td_maxend + 1) ? before(seq, sender->td_maxend + 1) ?
after(seq, sender->td_end - receiver->td_maxwin - 1) ? after(end, sender->td_end - receiver->td_maxwin - 1) ?
before(sack, receiver->td_end + 1) ? before(sack, receiver->td_end + 1) ?
after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG" after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
: "ACK is under the lower bound (possibly overly delayed ACK)" : "ACK is under the lower bound (possible overly delayed ACK)"
: "ACK is over the upper bound (ACKed data has never seen yet)" : "ACK is over the upper bound (ACKed data not seen yet)"
: "SEQ is under the lower bound (retransmitted already ACKed data)" : "SEQ is under the lower bound (already ACKed data retransmitted)"
: "SEQ is over the upper bound (over the window of the receiver)"); : "SEQ is over the upper bound (over the window of the receiver)");
res = ip_ct_tcp_be_liberal && !tcph->rst; res = ip_ct_tcp_be_liberal;
} }
DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u " DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
...@@ -849,13 +864,12 @@ static int tcp_packet(struct ip_conntrack *conntrack, ...@@ -849,13 +864,12 @@ static int tcp_packet(struct ip_conntrack *conntrack,
switch (new_state) { switch (new_state) {
case TCP_CONNTRACK_IGNORE: case TCP_CONNTRACK_IGNORE:
/* Either SYN in ORIGINAL /* Either SYN in ORIGINAL
* or SYN/ACK in REPLY * or SYN/ACK in REPLY. */
* or ACK in REPLY direction (half-open connection). */
if (index == TCP_SYNACK_SET if (index == TCP_SYNACK_SET
&& conntrack->proto.tcp.last_index == TCP_SYN_SET && conntrack->proto.tcp.last_index == TCP_SYN_SET
&& conntrack->proto.tcp.last_dir != dir && conntrack->proto.tcp.last_dir != dir
&& after(ntohl(th->ack_seq), && ntohl(th->ack_seq) ==
conntrack->proto.tcp.last_seq)) { conntrack->proto.tcp.last_end) {
/* This SYN/ACK acknowledges a SYN that we earlier /* This SYN/ACK acknowledges a SYN that we earlier
* ignored as invalid. This means that the client and * ignored as invalid. This means that the client and
* the server are both in sync, while the firewall is * the server are both in sync, while the firewall is
...@@ -875,6 +889,8 @@ static int tcp_packet(struct ip_conntrack *conntrack, ...@@ -875,6 +889,8 @@ static int tcp_packet(struct ip_conntrack *conntrack,
conntrack->proto.tcp.last_index = index; conntrack->proto.tcp.last_index = index;
conntrack->proto.tcp.last_dir = dir; conntrack->proto.tcp.last_dir = dir;
conntrack->proto.tcp.last_seq = ntohl(th->seq); conntrack->proto.tcp.last_seq = ntohl(th->seq);
conntrack->proto.tcp.last_end =
segment_seq_plus_len(ntohl(th->seq), skb->len, iph, th);
WRITE_UNLOCK(&tcp_lock); WRITE_UNLOCK(&tcp_lock);
if (LOG_INVALID(IPPROTO_TCP)) if (LOG_INVALID(IPPROTO_TCP))
...@@ -892,7 +908,12 @@ static int tcp_packet(struct ip_conntrack *conntrack, ...@@ -892,7 +908,12 @@ static int tcp_packet(struct ip_conntrack *conntrack,
"ip_ct_tcp: invalid state "); "ip_ct_tcp: invalid state ");
return -NF_ACCEPT; return -NF_ACCEPT;
case TCP_CONNTRACK_SYN_SENT: case TCP_CONNTRACK_SYN_SENT:
if (old_state >= TCP_CONNTRACK_TIME_WAIT) { if (old_state < TCP_CONNTRACK_TIME_WAIT)
break;
if ((conntrack->proto.tcp.seen[dir].flags &
IP_CT_TCP_FLAG_CLOSE_INIT)
|| after(ntohl(th->seq),
conntrack->proto.tcp.seen[dir].td_end)) {
/* Attempt to reopen a closed connection. /* Attempt to reopen a closed connection.
* Delete this connection and look up again. */ * Delete this connection and look up again. */
WRITE_UNLOCK(&tcp_lock); WRITE_UNLOCK(&tcp_lock);
...@@ -900,23 +921,23 @@ static int tcp_packet(struct ip_conntrack *conntrack, ...@@ -900,23 +921,23 @@ static int tcp_packet(struct ip_conntrack *conntrack,
conntrack->timeout.function((unsigned long) conntrack->timeout.function((unsigned long)
conntrack); conntrack);
return -NF_REPEAT; return -NF_REPEAT;
} } else {
break;
case TCP_CONNTRACK_CLOSE:
if (index == TCP_RST_SET
&& ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
&& conntrack->proto.tcp.last_index <= TCP_SYNACK_SET)
|| (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
&& conntrack->proto.tcp.last_index == TCP_ACK_SET))
&& after(ntohl(th->ack_seq),
conntrack->proto.tcp.last_seq)) {
/* Ignore RST closing down invalid SYN or ACK
we had let trough. */
WRITE_UNLOCK(&tcp_lock); WRITE_UNLOCK(&tcp_lock);
if (LOG_INVALID(IPPROTO_TCP)) if (LOG_INVALID(IPPROTO_TCP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL, nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_tcp: invalid RST (ignored) "); "ip_ct_tcp: invalid SYN");
return NF_ACCEPT; return -NF_ACCEPT;
}
case TCP_CONNTRACK_CLOSE:
if (index == TCP_RST_SET
&& test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
&& conntrack->proto.tcp.last_index == TCP_SYN_SET
&& ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
/* RST sent to invalid SYN we had let trough
* SYN was in window then, tear down connection.
* We skip window checking, because packet might ACK
* segments we ignored in the SYN. */
goto in_window;
} }
/* Just fall trough */ /* Just fall trough */
default: default:
...@@ -924,16 +945,14 @@ static int tcp_packet(struct ip_conntrack *conntrack, ...@@ -924,16 +945,14 @@ static int tcp_packet(struct ip_conntrack *conntrack,
break; break;
} }
if (!tcp_in_window(&conntrack->proto.tcp, dir, &index, if (!tcp_in_window(&conntrack->proto.tcp, dir, index,
skb, iph, th)) { skb, iph, th)) {
WRITE_UNLOCK(&tcp_lock); WRITE_UNLOCK(&tcp_lock);
return -NF_ACCEPT; return -NF_ACCEPT;
} }
in_window:
/* From now on we have got in-window packets */ /* From now on we have got in-window packets */
/* If FIN was trimmed off, we don't change state. */
conntrack->proto.tcp.last_index = index; conntrack->proto.tcp.last_index = index;
new_state = tcp_conntracks[dir][index][old_state];
DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
"syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
...@@ -944,6 +963,10 @@ static int tcp_packet(struct ip_conntrack *conntrack, ...@@ -944,6 +963,10 @@ static int tcp_packet(struct ip_conntrack *conntrack,
old_state, new_state); old_state, new_state);
conntrack->proto.tcp.state = new_state; conntrack->proto.tcp.state = new_state;
if (old_state != new_state
&& (new_state == TCP_CONNTRACK_FIN_WAIT
|| new_state == TCP_CONNTRACK_CLOSE))
conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
&& *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state]; ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
...@@ -974,7 +997,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, ...@@ -974,7 +997,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
return NF_ACCEPT; return NF_ACCEPT;
} }
/* Called when a new connection for this protocol found. */ /* Called when a new connection for this protocol found. */
static int tcp_new(struct ip_conntrack *conntrack, static int tcp_new(struct ip_conntrack *conntrack,
const struct sk_buff *skb) const struct sk_buff *skb)
{ {
......
...@@ -77,34 +77,70 @@ seq_print_counters(struct seq_file *s, ...@@ -77,34 +77,70 @@ seq_print_counters(struct seq_file *s,
#define seq_print_counters(x, y) 0 #define seq_print_counters(x, y) 0
#endif #endif
static void *ct_seq_start(struct seq_file *s, loff_t *pos) struct ct_iter_state {
unsigned int bucket;
};
static struct list_head *ct_get_first(struct seq_file *seq)
{ {
if (*pos >= ip_conntrack_htable_size) struct ct_iter_state *st = seq->private;
for (st->bucket = 0;
st->bucket < ip_conntrack_htable_size;
st->bucket++) {
if (!list_empty(&ip_conntrack_hash[st->bucket]))
return ip_conntrack_hash[st->bucket].next;
}
return NULL; return NULL;
return &ip_conntrack_hash[*pos];
} }
static void ct_seq_stop(struct seq_file *s, void *v) static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head)
{ {
struct ct_iter_state *st = seq->private;
head = head->next;
while (head == &ip_conntrack_hash[st->bucket]) {
if (++st->bucket >= ip_conntrack_htable_size)
return NULL;
head = ip_conntrack_hash[st->bucket].next;
}
return head;
}
static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos)
{
struct list_head *head = ct_get_first(seq);
if (head)
while (pos && (head = ct_get_next(seq, head)))
pos--;
return pos ? NULL : head;
}
static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
{
READ_LOCK(&ip_conntrack_lock);
return ct_get_idx(seq, *pos);
} }
static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
{ {
(*pos)++; (*pos)++;
if (*pos >= ip_conntrack_htable_size) return ct_get_next(s, v);
return NULL;
return &ip_conntrack_hash[*pos];
} }
/* return 0 on success, 1 in case of error */ static void ct_seq_stop(struct seq_file *s, void *v)
static int ct_seq_real_show(const struct ip_conntrack_tuple_hash *hash,
struct seq_file *s)
{ {
READ_UNLOCK(&ip_conntrack_lock);
}
static int ct_seq_show(struct seq_file *s, void *v)
{
const struct ip_conntrack_tuple_hash *hash = v;
const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash); const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash);
struct ip_conntrack_protocol *proto; struct ip_conntrack_protocol *proto;
MUST_BE_READ_LOCKED(&ip_conntrack_lock); MUST_BE_READ_LOCKED(&ip_conntrack_lock);
IP_NF_ASSERT(conntrack); IP_NF_ASSERT(conntrack);
/* we only want to print DIR_ORIGINAL */ /* we only want to print DIR_ORIGINAL */
...@@ -115,63 +151,50 @@ static int ct_seq_real_show(const struct ip_conntrack_tuple_hash *hash, ...@@ -115,63 +151,50 @@ static int ct_seq_real_show(const struct ip_conntrack_tuple_hash *hash,
.tuple.dst.protonum); .tuple.dst.protonum);
IP_NF_ASSERT(proto); IP_NF_ASSERT(proto);
if (seq_printf(s, "%-8s %u %lu ", if (seq_printf(s, "%-8s %u %ld ",
proto->name, proto->name,
conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum, conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum,
timer_pending(&conntrack->timeout) timer_pending(&conntrack->timeout)
? (conntrack->timeout.expires - jiffies)/HZ : 0) != 0) ? (long)(conntrack->timeout.expires - jiffies)/HZ
return 1; : 0) != 0)
return -ENOSPC;
if (proto->print_conntrack(s, conntrack)) if (proto->print_conntrack(s, conntrack))
return 1; return -ENOSPC;
if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
proto)) proto))
return 1; return -ENOSPC;
if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL])) if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL]))
return 1; return -ENOSPC;
if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status))) if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
if (seq_printf(s, "[UNREPLIED] ")) if (seq_printf(s, "[UNREPLIED] "))
return 1; return -ENOSPC;
if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple, if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
proto)) proto))
return 1; return -ENOSPC;
if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY])) if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY]))
return 1; return -ENOSPC;
if (test_bit(IPS_ASSURED_BIT, &conntrack->status)) if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
if (seq_printf(s, "[ASSURED] ")) if (seq_printf(s, "[ASSURED] "))
return 1; return -ENOSPC;
#if defined(CONFIG_IP_NF_CONNTRACK_MARK) #if defined(CONFIG_IP_NF_CONNTRACK_MARK)
if (seq_printf(s, "mark=%ld ", conntrack->mark)) if (seq_printf(s, "mark=%lu ", conntrack->mark))
return 1; return -ENOSPC;
#endif #endif
if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use))) if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use)))
return 1; return -ENOSPC;
return 0; return 0;
} }
static int ct_seq_show(struct seq_file *s, void *v)
{
struct list_head *list = v;
int ret = 0;
/* FIXME: Simply truncates if hash chain too long. */
READ_LOCK(&ip_conntrack_lock);
if (LIST_FIND(list, ct_seq_real_show,
struct ip_conntrack_tuple_hash *, s))
ret = -ENOSPC;
READ_UNLOCK(&ip_conntrack_lock);
return ret;
}
static struct seq_operations ct_seq_ops = { static struct seq_operations ct_seq_ops = {
.start = ct_seq_start, .start = ct_seq_start,
.next = ct_seq_next, .next = ct_seq_next,
...@@ -181,7 +204,23 @@ static struct seq_operations ct_seq_ops = { ...@@ -181,7 +204,23 @@ static struct seq_operations ct_seq_ops = {
static int ct_open(struct inode *inode, struct file *file) static int ct_open(struct inode *inode, struct file *file)
{ {
return seq_open(file, &ct_seq_ops); struct seq_file *seq;
struct ct_iter_state *st;
int ret;
st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL);
if (st == NULL)
return -ENOMEM;
ret = seq_open(file, &ct_seq_ops);
if (ret)
goto out_free;
seq = file->private_data;
seq->private = st;
memset(st, 0, sizeof(struct ct_iter_state));
return ret;
out_free:
kfree(st);
return ret;
} }
static struct file_operations ct_file_ops = { static struct file_operations ct_file_ops = {
...@@ -189,7 +228,7 @@ static struct file_operations ct_file_ops = { ...@@ -189,7 +228,7 @@ static struct file_operations ct_file_ops = {
.open = ct_open, .open = ct_open,
.read = seq_read, .read = seq_read,
.llseek = seq_lseek, .llseek = seq_lseek,
.release = seq_release .release = seq_release_private,
}; };
/* expects */ /* expects */
...@@ -235,8 +274,8 @@ static int exp_seq_show(struct seq_file *s, void *v) ...@@ -235,8 +274,8 @@ static int exp_seq_show(struct seq_file *s, void *v)
struct ip_conntrack_expect *expect = v; struct ip_conntrack_expect *expect = v;
if (expect->timeout.function) if (expect->timeout.function)
seq_printf(s, "%lu ", timer_pending(&expect->timeout) seq_printf(s, "%ld ", timer_pending(&expect->timeout)
? (expect->timeout.expires - jiffies)/HZ : 0); ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
else else
seq_printf(s, "- "); seq_printf(s, "- ");
......
...@@ -14,6 +14,9 @@ ...@@ -14,6 +14,9 @@
* Zander). * Zander).
* 2000-08-01: Added Nick Williams' MAC support. * 2000-08-01: Added Nick Williams' MAC support.
* 2002-06-25: Code cleanup. * 2002-06-25: Code cleanup.
* 2005-01-10: Added /proc counter for dropped packets; fixed so
* packets aren't delivered to user space if they're going
* to be dropped.
* *
*/ */
#include <linux/module.h> #include <linux/module.h>
...@@ -59,6 +62,8 @@ static DEFINE_RWLOCK(queue_lock); ...@@ -59,6 +62,8 @@ static DEFINE_RWLOCK(queue_lock);
static int peer_pid; static int peer_pid;
static unsigned int copy_range; static unsigned int copy_range;
static unsigned int queue_total; static unsigned int queue_total;
static unsigned int queue_dropped = 0;
static unsigned int queue_user_dropped = 0;
static struct sock *ipqnl; static struct sock *ipqnl;
static LIST_HEAD(queue_list); static LIST_HEAD(queue_list);
static DECLARE_MUTEX(ipqnl_sem); static DECLARE_MUTEX(ipqnl_sem);
...@@ -70,18 +75,11 @@ ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict) ...@@ -70,18 +75,11 @@ ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
kfree(entry); kfree(entry);
} }
static inline int static inline void
__ipq_enqueue_entry(struct ipq_queue_entry *entry) __ipq_enqueue_entry(struct ipq_queue_entry *entry)
{ {
if (queue_total >= queue_maxlen) {
if (net_ratelimit())
printk(KERN_WARNING "ip_queue: full at %d entries, "
"dropping packet(s).\n", queue_total);
return -ENOSPC;
}
list_add(&entry->list, &queue_list); list_add(&entry->list, &queue_list);
queue_total++; queue_total++;
return 0;
} }
/* /*
...@@ -308,14 +306,24 @@ ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) ...@@ -308,14 +306,24 @@ ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data)
if (!peer_pid) if (!peer_pid)
goto err_out_free_nskb; goto err_out_free_nskb;
if (queue_total >= queue_maxlen) {
queue_dropped++;
status = -ENOSPC;
if (net_ratelimit())
printk (KERN_WARNING "ip_queue: full at %d entries, "
"dropping packets(s). Dropped: %d\n", queue_total,
queue_dropped);
goto err_out_free_nskb;
}
/* netlink_unicast will either free the nskb or attach it to a socket */ /* netlink_unicast will either free the nskb or attach it to a socket */
status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT); status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
if (status < 0) if (status < 0) {
queue_user_dropped++;
goto err_out_unlock; goto err_out_unlock;
}
status = __ipq_enqueue_entry(entry); __ipq_enqueue_entry(entry);
if (status < 0)
goto err_out_unlock;
write_unlock_bh(&queue_lock); write_unlock_bh(&queue_lock);
return status; return status;
...@@ -637,12 +645,16 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length) ...@@ -637,12 +645,16 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length)
"Copy mode : %hu\n" "Copy mode : %hu\n"
"Copy range : %u\n" "Copy range : %u\n"
"Queue length : %u\n" "Queue length : %u\n"
"Queue max. length : %u\n", "Queue max. length : %u\n"
"Queue dropped : %u\n"
"Netlink dropped : %u\n",
peer_pid, peer_pid,
copy_mode, copy_mode,
copy_range, copy_range,
queue_total, queue_total,
queue_maxlen); queue_maxlen,
queue_dropped,
queue_user_dropped);
read_unlock_bh(&queue_lock); read_unlock_bh(&queue_lock);
......
...@@ -923,7 +923,7 @@ translate_table(const char *name, ...@@ -923,7 +923,7 @@ translate_table(const char *name,
} }
/* And one copy for every other CPU */ /* And one copy for every other CPU */
for (i = 1; i < NR_CPUS; i++) { for (i = 1; i < num_possible_cpus(); i++) {
memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
newinfo->entries, newinfo->entries,
SMP_ALIGN(newinfo->size)); SMP_ALIGN(newinfo->size));
...@@ -945,7 +945,7 @@ replace_table(struct ipt_table *table, ...@@ -945,7 +945,7 @@ replace_table(struct ipt_table *table,
struct ipt_entry *table_base; struct ipt_entry *table_base;
unsigned int i; unsigned int i;
for (i = 0; i < NR_CPUS; i++) { for (i = 0; i < num_possible_cpus(); i++) {
table_base = table_base =
(void *)newinfo->entries (void *)newinfo->entries
+ TABLE_OFFSET(newinfo, i); + TABLE_OFFSET(newinfo, i);
...@@ -992,7 +992,7 @@ get_counters(const struct ipt_table_info *t, ...@@ -992,7 +992,7 @@ get_counters(const struct ipt_table_info *t,
unsigned int cpu; unsigned int cpu;
unsigned int i; unsigned int i;
for (cpu = 0; cpu < NR_CPUS; cpu++) { for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
i = 0; i = 0;
IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
t->size, t->size,
...@@ -1130,7 +1130,7 @@ do_replace(void __user *user, unsigned int len) ...@@ -1130,7 +1130,7 @@ do_replace(void __user *user, unsigned int len)
return -ENOMEM; return -ENOMEM;
newinfo = vmalloc(sizeof(struct ipt_table_info) newinfo = vmalloc(sizeof(struct ipt_table_info)
+ SMP_ALIGN(tmp.size) * NR_CPUS); + SMP_ALIGN(tmp.size) * num_possible_cpus());
if (!newinfo) if (!newinfo)
return -ENOMEM; return -ENOMEM;
...@@ -1460,7 +1460,7 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl) ...@@ -1460,7 +1460,7 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
= { 0, 0, 0, { 0 }, { 0 }, { } }; = { 0, 0, 0, { 0 }, { 0 }, { } };
newinfo = vmalloc(sizeof(struct ipt_table_info) newinfo = vmalloc(sizeof(struct ipt_table_info)
+ SMP_ALIGN(repl->size) * NR_CPUS); + SMP_ALIGN(repl->size) * num_possible_cpus());
if (!newinfo) if (!newinfo)
return -ENOMEM; return -ENOMEM;
......
...@@ -198,16 +198,16 @@ static void dump_packet(const struct ipt_log_info *info, ...@@ -198,16 +198,16 @@ static void dump_packet(const struct ipt_log_info *info,
static size_t required_len[NR_ICMP_TYPES+1] static size_t required_len[NR_ICMP_TYPES+1]
= { [ICMP_ECHOREPLY] = 4, = { [ICMP_ECHOREPLY] = 4,
[ICMP_DEST_UNREACH] [ICMP_DEST_UNREACH]
= 8 + sizeof(struct iphdr) + 8, = 8 + sizeof(struct iphdr),
[ICMP_SOURCE_QUENCH] [ICMP_SOURCE_QUENCH]
= 8 + sizeof(struct iphdr) + 8, = 8 + sizeof(struct iphdr),
[ICMP_REDIRECT] [ICMP_REDIRECT]
= 8 + sizeof(struct iphdr) + 8, = 8 + sizeof(struct iphdr),
[ICMP_ECHO] = 4, [ICMP_ECHO] = 4,
[ICMP_TIME_EXCEEDED] [ICMP_TIME_EXCEEDED]
= 8 + sizeof(struct iphdr) + 8, = 8 + sizeof(struct iphdr),
[ICMP_PARAMETERPROB] [ICMP_PARAMETERPROB]
= 8 + sizeof(struct iphdr) + 8, = 8 + sizeof(struct iphdr),
[ICMP_TIMESTAMP] = 20, [ICMP_TIMESTAMP] = 20,
[ICMP_TIMESTAMPREPLY] = 20, [ICMP_TIMESTAMPREPLY] = 20,
[ICMP_ADDRESS] = 12, [ICMP_ADDRESS] = 12,
......
...@@ -252,10 +252,6 @@ static void send_unreach(struct sk_buff *skb_in, int code) ...@@ -252,10 +252,6 @@ static void send_unreach(struct sk_buff *skb_in, int code)
if (iph->frag_off&htons(IP_OFFSET)) if (iph->frag_off&htons(IP_OFFSET))
return; return;
/* Ensure we have at least 8 bytes of proto header. */
if (skb_in->len < skb_in->nh.iph->ihl*4 + 8)
return;
/* If we send an ICMP error to an ICMP error a mess would result.. */ /* If we send an ICMP error to an ICMP error a mess would result.. */
if (iph->protocol == IPPROTO_ICMP) { if (iph->protocol == IPPROTO_ICMP) {
struct icmphdr ihdr; struct icmphdr ihdr;
......
...@@ -625,7 +625,7 @@ static inline int dl_seq_real_show(struct dsthash_ent *ent, struct seq_file *s) ...@@ -625,7 +625,7 @@ static inline int dl_seq_real_show(struct dsthash_ent *ent, struct seq_file *s)
rateinfo_recalc(ent, jiffies); rateinfo_recalc(ent, jiffies);
return seq_printf(s, "%ld %u.%u.%u.%u:%u->%u.%u.%u.%u:%u %u %u %u\n", return seq_printf(s, "%ld %u.%u.%u.%u:%u->%u.%u.%u.%u:%u %u %u %u\n",
(ent->expires - jiffies)/HZ, (long)(ent->expires - jiffies)/HZ,
NIPQUAD(ent->dst.src_ip), ntohs(ent->dst.src_port), NIPQUAD(ent->dst.src_ip), ntohs(ent->dst.src_port),
NIPQUAD(ent->dst.dst_ip), ntohs(ent->dst.dst_port), NIPQUAD(ent->dst.dst_ip), ntohs(ent->dst.dst_port),
ent->rateinfo.credit, ent->rateinfo.credit_cap, ent->rateinfo.credit, ent->rateinfo.credit_cap,
......
...@@ -20,6 +20,9 @@ ...@@ -20,6 +20,9 @@
* Few changes needed, mainly the hard_routing code and * Few changes needed, mainly the hard_routing code and
* the netlink socket protocol (we're NETLINK_IP6_FW). * the netlink socket protocol (we're NETLINK_IP6_FW).
* 2002-06-25: Code cleanup. [JM: ported cleanup over from ip_queue.c] * 2002-06-25: Code cleanup. [JM: ported cleanup over from ip_queue.c]
* 2005-02-04: Added /proc counter for dropped packets; fixed so
* packets aren't delivered to user space if they're going
* to be dropped.
*/ */
#include <linux/module.h> #include <linux/module.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
...@@ -64,6 +67,8 @@ static DEFINE_RWLOCK(queue_lock); ...@@ -64,6 +67,8 @@ static DEFINE_RWLOCK(queue_lock);
static int peer_pid; static int peer_pid;
static unsigned int copy_range; static unsigned int copy_range;
static unsigned int queue_total; static unsigned int queue_total;
static unsigned int queue_dropped = 0;
static unsigned int queue_user_dropped = 0;
static struct sock *ipqnl; static struct sock *ipqnl;
static LIST_HEAD(queue_list); static LIST_HEAD(queue_list);
static DECLARE_MUTEX(ipqnl_sem); static DECLARE_MUTEX(ipqnl_sem);
...@@ -75,18 +80,11 @@ ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict) ...@@ -75,18 +80,11 @@ ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
kfree(entry); kfree(entry);
} }
static inline int static inline void
__ipq_enqueue_entry(struct ipq_queue_entry *entry) __ipq_enqueue_entry(struct ipq_queue_entry *entry)
{ {
if (queue_total >= queue_maxlen) {
if (net_ratelimit())
printk(KERN_WARNING "ip6_queue: full at %d entries, "
"dropping packet(s).\n", queue_total);
return -ENOSPC;
}
list_add(&entry->list, &queue_list); list_add(&entry->list, &queue_list);
queue_total++; queue_total++;
return 0;
} }
/* /*
...@@ -312,14 +310,24 @@ ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) ...@@ -312,14 +310,24 @@ ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data)
if (!peer_pid) if (!peer_pid)
goto err_out_free_nskb; goto err_out_free_nskb;
if (queue_total >= queue_maxlen) {
queue_dropped++;
status = -ENOSPC;
if (net_ratelimit())
printk (KERN_WARNING "ip6_queue: fill at %d entries, "
"dropping packet(s). Dropped: %d\n", queue_total,
queue_dropped);
goto err_out_free_nskb;
}
/* netlink_unicast will either free the nskb or attach it to a socket */ /* netlink_unicast will either free the nskb or attach it to a socket */
status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT); status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
if (status < 0) if (status < 0) {
queue_user_dropped++;
goto err_out_unlock; goto err_out_unlock;
}
status = __ipq_enqueue_entry(entry); __ipq_enqueue_entry(entry);
if (status < 0)
goto err_out_unlock;
write_unlock_bh(&queue_lock); write_unlock_bh(&queue_lock);
return status; return status;
...@@ -639,12 +647,16 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length) ...@@ -639,12 +647,16 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length)
"Copy mode : %hu\n" "Copy mode : %hu\n"
"Copy range : %u\n" "Copy range : %u\n"
"Queue length : %u\n" "Queue length : %u\n"
"Queue max. length : %u\n", "Queue max. length : %u\n"
"Queue dropped : %u\n"
"Netfilter dropped : %u\n",
peer_pid, peer_pid,
copy_mode, copy_mode,
copy_range, copy_range,
queue_total, queue_total,
queue_maxlen); queue_maxlen,
queue_dropped,
queue_user_dropped);
read_unlock_bh(&queue_lock); read_unlock_bh(&queue_lock);
......
...@@ -952,7 +952,7 @@ translate_table(const char *name, ...@@ -952,7 +952,7 @@ translate_table(const char *name,
} }
/* And one copy for every other CPU */ /* And one copy for every other CPU */
for (i = 1; i < NR_CPUS; i++) { for (i = 1; i < num_possible_cpus(); i++) {
memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
newinfo->entries, newinfo->entries,
SMP_ALIGN(newinfo->size)); SMP_ALIGN(newinfo->size));
...@@ -974,7 +974,7 @@ replace_table(struct ip6t_table *table, ...@@ -974,7 +974,7 @@ replace_table(struct ip6t_table *table,
struct ip6t_entry *table_base; struct ip6t_entry *table_base;
unsigned int i; unsigned int i;
for (i = 0; i < NR_CPUS; i++) { for (i = 0; i < num_possible_cpus(); i++) {
table_base = table_base =
(void *)newinfo->entries (void *)newinfo->entries
+ TABLE_OFFSET(newinfo, i); + TABLE_OFFSET(newinfo, i);
...@@ -1021,7 +1021,7 @@ get_counters(const struct ip6t_table_info *t, ...@@ -1021,7 +1021,7 @@ get_counters(const struct ip6t_table_info *t,
unsigned int cpu; unsigned int cpu;
unsigned int i; unsigned int i;
for (cpu = 0; cpu < NR_CPUS; cpu++) { for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
i = 0; i = 0;
IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
t->size, t->size,
...@@ -1155,7 +1155,7 @@ do_replace(void __user *user, unsigned int len) ...@@ -1155,7 +1155,7 @@ do_replace(void __user *user, unsigned int len)
return -ENOMEM; return -ENOMEM;
newinfo = vmalloc(sizeof(struct ip6t_table_info) newinfo = vmalloc(sizeof(struct ip6t_table_info)
+ SMP_ALIGN(tmp.size) * NR_CPUS); + SMP_ALIGN(tmp.size) * num_possible_cpus());
if (!newinfo) if (!newinfo)
return -ENOMEM; return -ENOMEM;
...@@ -1469,7 +1469,7 @@ int ip6t_register_table(struct ip6t_table *table, ...@@ -1469,7 +1469,7 @@ int ip6t_register_table(struct ip6t_table *table,
= { 0, 0, 0, { 0 }, { 0 }, { } }; = { 0, 0, 0, { 0 }, { 0 }, { } };
newinfo = vmalloc(sizeof(struct ip6t_table_info) newinfo = vmalloc(sizeof(struct ip6t_table_info)
+ SMP_ALIGN(repl->size) * NR_CPUS); + SMP_ALIGN(repl->size) * num_possible_cpus());
if (!newinfo) if (!newinfo)
return -ENOMEM; return -ENOMEM;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment