Commit 8de08f90 authored by David S. Miller's avatar David S. Miller

Merge bk://212.42.230.204:994/nf-2.6

into sunset.davemloft.net:/home/davem/src/BK/net-2.6
parents 0f634eb3 b20f3c6c
......@@ -23,13 +23,16 @@ enum tcp_conntrack {
/* SACK is permitted by the sender */
#define IP_CT_TCP_FLAG_SACK_PERM 0x02
/* This sender sent FIN first */
#define IP_CT_TCP_FLAG_CLOSE_INIT 0x03
struct ip_ct_tcp_state {
u_int32_t td_end; /* max of seq + len */
u_int32_t td_maxend; /* max of ack + max(win, 1) */
u_int32_t td_maxwin; /* max(win) */
u_int8_t td_scale; /* window scale factor */
u_int8_t loose; /* used when connection picked up from the middle */
u_int8_t flags; /* per direction state flags */
u_int8_t flags; /* per direction options */
};
struct ip_ct_tcp
......
......@@ -822,10 +822,10 @@ static int translate_table(struct ebt_replace *repl,
/* this will get free'd in do_replace()/ebt_register_table()
if an error occurs */
newinfo->chainstack = (struct ebt_chainstack **)
vmalloc(NR_CPUS * sizeof(struct ebt_chainstack));
vmalloc(num_possible_cpus() * sizeof(struct ebt_chainstack));
if (!newinfo->chainstack)
return -ENOMEM;
for (i = 0; i < NR_CPUS; i++) {
for (i = 0; i < num_possible_cpus(); i++) {
newinfo->chainstack[i] =
vmalloc(udc_cnt * sizeof(struct ebt_chainstack));
if (!newinfo->chainstack[i]) {
......@@ -898,7 +898,7 @@ static void get_counters(struct ebt_counter *oldcounters,
memcpy(counters, oldcounters,
sizeof(struct ebt_counter) * nentries);
/* add other counters to those of cpu 0 */
for (cpu = 1; cpu < NR_CPUS; cpu++) {
for (cpu = 1; cpu < num_possible_cpus(); cpu++) {
counter_base = COUNTER_BASE(oldcounters, nentries, cpu);
for (i = 0; i < nentries; i++) {
counters[i].pcnt += counter_base[i].pcnt;
......@@ -930,7 +930,7 @@ static int do_replace(void __user *user, unsigned int len)
BUGPRINT("Entries_size never zero\n");
return -EINVAL;
}
countersize = COUNTER_OFFSET(tmp.nentries) * NR_CPUS;
countersize = COUNTER_OFFSET(tmp.nentries) * num_possible_cpus();
newinfo = (struct ebt_table_info *)
vmalloc(sizeof(struct ebt_table_info) + countersize);
if (!newinfo)
......@@ -1023,7 +1023,7 @@ static int do_replace(void __user *user, unsigned int len)
vfree(table->entries);
if (table->chainstack) {
for (i = 0; i < NR_CPUS; i++)
for (i = 0; i < num_possible_cpus(); i++)
vfree(table->chainstack[i]);
vfree(table->chainstack);
}
......@@ -1043,7 +1043,7 @@ static int do_replace(void __user *user, unsigned int len)
vfree(counterstmp);
/* can be initialized in translate_table() */
if (newinfo->chainstack) {
for (i = 0; i < NR_CPUS; i++)
for (i = 0; i < num_possible_cpus(); i++)
vfree(newinfo->chainstack[i]);
vfree(newinfo->chainstack);
}
......@@ -1137,7 +1137,7 @@ int ebt_register_table(struct ebt_table *table)
return -EINVAL;
}
countersize = COUNTER_OFFSET(table->table->nentries) * NR_CPUS;
countersize = COUNTER_OFFSET(table->table->nentries) * num_possible_cpus();
newinfo = (struct ebt_table_info *)
vmalloc(sizeof(struct ebt_table_info) + countersize);
ret = -ENOMEM;
......@@ -1191,7 +1191,7 @@ int ebt_register_table(struct ebt_table *table)
up(&ebt_mutex);
free_chainstack:
if (newinfo->chainstack) {
for (i = 0; i < NR_CPUS; i++)
for (i = 0; i < num_possible_cpus(); i++)
vfree(newinfo->chainstack[i]);
vfree(newinfo->chainstack);
}
......@@ -1215,7 +1215,7 @@ void ebt_unregister_table(struct ebt_table *table)
if (table->private->entries)
vfree(table->private->entries);
if (table->private->chainstack) {
for (i = 0; i < NR_CPUS; i++)
for (i = 0; i < num_possible_cpus(); i++)
vfree(table->private->chainstack[i]);
vfree(table->private->chainstack);
}
......
......@@ -717,7 +717,7 @@ static int translate_table(const char *name,
}
/* And one copy for every other CPU */
for (i = 1; i < NR_CPUS; i++) {
for (i = 1; i < num_possible_cpus(); i++) {
memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
newinfo->entries,
SMP_ALIGN(newinfo->size));
......@@ -768,7 +768,7 @@ static void get_counters(const struct arpt_table_info *t,
unsigned int cpu;
unsigned int i;
for (cpu = 0; cpu < NR_CPUS; cpu++) {
for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
i = 0;
ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
t->size,
......@@ -886,7 +886,7 @@ static int do_replace(void __user *user, unsigned int len)
return -ENOMEM;
newinfo = vmalloc(sizeof(struct arpt_table_info)
+ SMP_ALIGN(tmp.size) * NR_CPUS);
+ SMP_ALIGN(tmp.size) * num_possible_cpus());
if (!newinfo)
return -ENOMEM;
......@@ -1159,7 +1159,7 @@ int arpt_register_table(struct arpt_table *table,
= { 0, 0, 0, { 0 }, { 0 }, { } };
newinfo = vmalloc(sizeof(struct arpt_table_info)
+ SMP_ALIGN(repl->size) * NR_CPUS);
+ SMP_ALIGN(repl->size) * num_possible_cpus());
if (!newinfo) {
ret = -ENOMEM;
return ret;
......
......@@ -400,8 +400,8 @@ static int sctp_packet(struct ip_conntrack *conntrack,
return -1;
}
DEBUGP("Setting vtag %x for dir %d\n",
ih->init_tag, CTINFO2DIR(ctinfo));
conntrack->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = ih->init_tag;
ih->init_tag, !CTINFO2DIR(ctinfo));
conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag;
}
conntrack->proto.sctp.state = newconntrack;
......
......@@ -254,7 +254,7 @@ static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
* sSS -> sSR Standard open.
* sSR -> sSR Retransmitted SYN/ACK.
* sES -> sIG Late retransmitted SYN/ACK?
* sFW -> sIG
* sFW -> sIG Might be SYN/ACK answering ignored SYN
* sCW -> sIG
* sLA -> sIG
* sTW -> sIG
......@@ -273,10 +273,10 @@ static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
* sCL -> sCL
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
/*ack*/ { sIV, sIG, sIV, sES, sCW, sCW, sTW, sTW, sCL, sIV },
/*ack*/ { sIV, sIV, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
/*
* sSS -> sIG Might be a half-open connection.
* sSR -> sIV Simultaneous open.
* sSS -> sIV Might be a half-open connection.
* sSR -> sSR Might answer late resent SYN.
* sES -> sES :-)
* sFW -> sCW Normal close request answered by ACK.
* sCW -> sCW
......@@ -352,14 +352,19 @@ static unsigned int get_conntrack_index(const struct tcphdr *tcph)
http://www.nluug.nl/events/sane2000/papers.html
http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
The boundaries and the conditions are slightly changed:
The boundaries and the conditions are changed according to RFC793:
the packet must intersect the window (i.e. segments may be
after the right or before the left edge) and thus receivers may ACK
segments after the right edge of the window.
td_maxend = max(sack + max(win,1)) seen in reply packets
td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
td_maxwin += seq + len - sender.td_maxend
if seq + len > sender.td_maxend
td_end = max(seq + len) seen in sent packets
I. Upper bound for valid data: seq + len <= sender.td_maxend
II. Lower bound for valid data: seq >= sender.td_end - receiver.td_maxwin
I. Upper bound for valid data: seq <= sender.td_maxend
II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin
III. Upper bound for valid ack: sack <= receiver.td_end
IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW
......@@ -373,7 +378,7 @@ static inline __u32 segment_seq_plus_len(__u32 seq,
size_t len,
struct iphdr *iph,
struct tcphdr *tcph)
{
{
return (seq + len - (iph->ihl + tcph->doff)*4
+ (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
}
......@@ -444,22 +449,33 @@ static void tcp_options(const struct sk_buff *skb,
}
}
static void tcp_sack(struct tcphdr *tcph, __u32 *sack)
static void tcp_sack(const struct sk_buff *skb,
struct iphdr *iph,
struct tcphdr *tcph,
__u32 *sack)
{
__u32 tmp;
unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
unsigned char *ptr;
int length = (tcph->doff*4) - sizeof(struct tcphdr);
__u32 tmp;
if (!length)
return;
ptr = skb_header_pointer(skb,
(iph->ihl * 4) + sizeof(struct tcphdr),
length, buff);
BUG_ON(ptr == NULL);
/* Fast path for timestamp-only option */
if (length == TCPOLEN_TSTAMP_ALIGNED*4
&& *(__u32 *)(tcph + 1) ==
&& *(__u32 *)ptr ==
__constant_ntohl((TCPOPT_NOP << 24)
| (TCPOPT_NOP << 16)
| (TCPOPT_TIMESTAMP << 8)
| TCPOLEN_TIMESTAMP))
return;
ptr = (unsigned char *)(tcph + 1);
while (length > 0) {
int opcode=*ptr++;
int opsize, i;
......@@ -500,7 +516,7 @@ static void tcp_sack(struct tcphdr *tcph, __u32 *sack)
static int tcp_in_window(struct ip_ct_tcp *state,
enum ip_conntrack_dir dir,
unsigned int *index,
unsigned int index,
const struct sk_buff *skb,
struct iphdr *iph,
struct tcphdr *tcph)
......@@ -519,7 +535,7 @@ static int tcp_in_window(struct ip_ct_tcp *state,
end = segment_seq_plus_len(seq, skb->len, iph, tcph);
if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
tcp_sack(tcph, &sack);
tcp_sack(skb, iph, tcph, &sack);
DEBUGP("tcp_in_window: START\n");
DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
......@@ -598,20 +614,23 @@ static int tcp_in_window(struct ip_ct_tcp *state,
ack = sack = receiver->td_end;
}
if (seq == end)
if (seq == end
&& (!tcph->rst
|| (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
/*
* Packets contains no data: we assume it is valid
* and check the ack value only.
* However RST segments are always validated by their
* SEQ number, except when seq == 0 (reset sent answering
* SYN.
*/
seq = end = sender->td_end;
DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
"seq=%u ack=%u sack =%u win=%u end=%u trim=%u\n",
"seq=%u ack=%u sack =%u win=%u end=%u\n",
NIPQUAD(iph->saddr), ntohs(tcph->source),
NIPQUAD(iph->daddr), ntohs(tcph->dest),
seq, ack, sack, win, end,
after(end, sender->td_maxend) && before(seq, sender->td_maxend)
? sender->td_maxend : end);
seq, ack, sack, win, end);
DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
"receiver end=%u maxend=%u maxwin=%u scale=%i\n",
sender->td_end, sender->td_maxend, sender->td_maxwin,
......@@ -619,24 +638,15 @@ static int tcp_in_window(struct ip_ct_tcp *state,
receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
receiver->td_scale);
/* Ignore data over the right edge of the receiver's window. */
if (after(end, sender->td_maxend) &&
before(seq, sender->td_maxend)) {
end = sender->td_maxend;
if (*index == TCP_FIN_SET)
*index = TCP_ACK_SET;
}
DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
before(end, sender->td_maxend + 1)
|| before(seq, sender->td_maxend + 1),
after(seq, sender->td_end - receiver->td_maxwin - 1)
|| after(end, sender->td_end - receiver->td_maxwin - 1),
before(seq, sender->td_maxend + 1),
after(end, sender->td_end - receiver->td_maxwin - 1),
before(sack, receiver->td_end + 1),
after(ack, receiver->td_end - MAXACKWINDOW(sender)));
if (sender->loose || receiver->loose ||
(before(end, sender->td_maxend + 1) &&
after(seq, sender->td_end - receiver->td_maxwin - 1) &&
(before(seq, sender->td_maxend + 1) &&
after(end, sender->td_end - receiver->td_maxwin - 1) &&
before(sack, receiver->td_end + 1) &&
after(ack, receiver->td_end - MAXACKWINDOW(sender)))) {
/*
......@@ -653,6 +663,11 @@ static int tcp_in_window(struct ip_ct_tcp *state,
sender->td_maxwin = swin;
if (after(end, sender->td_end))
sender->td_end = end;
/*
* Update receiver data.
*/
if (after(end, sender->td_maxend))
receiver->td_maxwin += end - sender->td_maxend;
if (after(sack + win, receiver->td_maxend - 1)) {
receiver->td_maxend = sack + win;
if (win == 0)
......@@ -662,7 +677,7 @@ static int tcp_in_window(struct ip_ct_tcp *state,
/*
* Check retransmissions.
*/
if (*index == TCP_ACK_SET) {
if (index == TCP_ACK_SET) {
if (state->last_dir == dir
&& state->last_seq == seq
&& state->last_ack == ack
......@@ -687,16 +702,16 @@ static int tcp_in_window(struct ip_ct_tcp *state,
if (LOG_INVALID(IPPROTO_TCP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_tcp: %s ",
before(end, sender->td_maxend + 1) ?
after(seq, sender->td_end - receiver->td_maxwin - 1) ?
before(seq, sender->td_maxend + 1) ?
after(end, sender->td_end - receiver->td_maxwin - 1) ?
before(sack, receiver->td_end + 1) ?
after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
: "ACK is under the lower bound (possibly overly delayed ACK)"
: "ACK is over the upper bound (ACKed data has never seen yet)"
: "SEQ is under the lower bound (retransmitted already ACKed data)"
: "ACK is under the lower bound (possible overly delayed ACK)"
: "ACK is over the upper bound (ACKed data not seen yet)"
: "SEQ is under the lower bound (already ACKed data retransmitted)"
: "SEQ is over the upper bound (over the window of the receiver)");
res = ip_ct_tcp_be_liberal && !tcph->rst;
res = ip_ct_tcp_be_liberal;
}
DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
......@@ -849,13 +864,12 @@ static int tcp_packet(struct ip_conntrack *conntrack,
switch (new_state) {
case TCP_CONNTRACK_IGNORE:
/* Either SYN in ORIGINAL
* or SYN/ACK in REPLY
* or ACK in REPLY direction (half-open connection). */
* or SYN/ACK in REPLY. */
if (index == TCP_SYNACK_SET
&& conntrack->proto.tcp.last_index == TCP_SYN_SET
&& conntrack->proto.tcp.last_dir != dir
&& after(ntohl(th->ack_seq),
conntrack->proto.tcp.last_seq)) {
&& ntohl(th->ack_seq) ==
conntrack->proto.tcp.last_end) {
/* This SYN/ACK acknowledges a SYN that we earlier
* ignored as invalid. This means that the client and
* the server are both in sync, while the firewall is
......@@ -875,6 +889,8 @@ static int tcp_packet(struct ip_conntrack *conntrack,
conntrack->proto.tcp.last_index = index;
conntrack->proto.tcp.last_dir = dir;
conntrack->proto.tcp.last_seq = ntohl(th->seq);
conntrack->proto.tcp.last_end =
segment_seq_plus_len(ntohl(th->seq), skb->len, iph, th);
WRITE_UNLOCK(&tcp_lock);
if (LOG_INVALID(IPPROTO_TCP))
......@@ -892,7 +908,12 @@ static int tcp_packet(struct ip_conntrack *conntrack,
"ip_ct_tcp: invalid state ");
return -NF_ACCEPT;
case TCP_CONNTRACK_SYN_SENT:
if (old_state >= TCP_CONNTRACK_TIME_WAIT) {
if (old_state < TCP_CONNTRACK_TIME_WAIT)
break;
if ((conntrack->proto.tcp.seen[dir].flags &
IP_CT_TCP_FLAG_CLOSE_INIT)
|| after(ntohl(th->seq),
conntrack->proto.tcp.seen[dir].td_end)) {
/* Attempt to reopen a closed connection.
* Delete this connection and look up again. */
WRITE_UNLOCK(&tcp_lock);
......@@ -900,23 +921,23 @@ static int tcp_packet(struct ip_conntrack *conntrack,
conntrack->timeout.function((unsigned long)
conntrack);
return -NF_REPEAT;
} else {
WRITE_UNLOCK(&tcp_lock);
if (LOG_INVALID(IPPROTO_TCP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_tcp: invalid SYN");
return -NF_ACCEPT;
}
break;
case TCP_CONNTRACK_CLOSE:
if (index == TCP_RST_SET
&& ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
&& conntrack->proto.tcp.last_index <= TCP_SYNACK_SET)
|| (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
&& conntrack->proto.tcp.last_index == TCP_ACK_SET))
&& after(ntohl(th->ack_seq),
conntrack->proto.tcp.last_seq)) {
/* Ignore RST closing down invalid SYN or ACK
we had let trough. */
WRITE_UNLOCK(&tcp_lock);
if (LOG_INVALID(IPPROTO_TCP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_tcp: invalid RST (ignored) ");
return NF_ACCEPT;
&& test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
&& conntrack->proto.tcp.last_index == TCP_SYN_SET
&& ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
/* RST sent to invalid SYN we had let trough
* SYN was in window then, tear down connection.
* We skip window checking, because packet might ACK
* segments we ignored in the SYN. */
goto in_window;
}
/* Just fall trough */
default:
......@@ -924,16 +945,14 @@ static int tcp_packet(struct ip_conntrack *conntrack,
break;
}
if (!tcp_in_window(&conntrack->proto.tcp, dir, &index,
if (!tcp_in_window(&conntrack->proto.tcp, dir, index,
skb, iph, th)) {
WRITE_UNLOCK(&tcp_lock);
return -NF_ACCEPT;
}
/* From now on we have got in-window packets */
/* If FIN was trimmed off, we don't change state. */
in_window:
/* From now on we have got in-window packets */
conntrack->proto.tcp.last_index = index;
new_state = tcp_conntracks[dir][index][old_state];
DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
"syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
......@@ -944,6 +963,10 @@ static int tcp_packet(struct ip_conntrack *conntrack,
old_state, new_state);
conntrack->proto.tcp.state = new_state;
if (old_state != new_state
&& (new_state == TCP_CONNTRACK_FIN_WAIT
|| new_state == TCP_CONNTRACK_CLOSE))
conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
&& *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
......@@ -974,7 +997,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
return NF_ACCEPT;
}
/* Called when a new connection for this protocol found. */
/* Called when a new connection for this protocol found. */
static int tcp_new(struct ip_conntrack *conntrack,
const struct sk_buff *skb)
{
......
......@@ -77,34 +77,70 @@ seq_print_counters(struct seq_file *s,
#define seq_print_counters(x, y) 0
#endif
static void *ct_seq_start(struct seq_file *s, loff_t *pos)
struct ct_iter_state {
unsigned int bucket;
};
static struct list_head *ct_get_first(struct seq_file *seq)
{
if (*pos >= ip_conntrack_htable_size)
return NULL;
return &ip_conntrack_hash[*pos];
struct ct_iter_state *st = seq->private;
for (st->bucket = 0;
st->bucket < ip_conntrack_htable_size;
st->bucket++) {
if (!list_empty(&ip_conntrack_hash[st->bucket]))
return ip_conntrack_hash[st->bucket].next;
}
return NULL;
}
static void ct_seq_stop(struct seq_file *s, void *v)
static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head)
{
struct ct_iter_state *st = seq->private;
head = head->next;
while (head == &ip_conntrack_hash[st->bucket]) {
if (++st->bucket >= ip_conntrack_htable_size)
return NULL;
head = ip_conntrack_hash[st->bucket].next;
}
return head;
}
static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos)
{
struct list_head *head = ct_get_first(seq);
if (head)
while (pos && (head = ct_get_next(seq, head)))
pos--;
return pos ? NULL : head;
}
static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
{
READ_LOCK(&ip_conntrack_lock);
return ct_get_idx(seq, *pos);
}
static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
(*pos)++;
if (*pos >= ip_conntrack_htable_size)
return NULL;
return &ip_conntrack_hash[*pos];
return ct_get_next(s, v);
}
/* return 0 on success, 1 in case of error */
static int ct_seq_real_show(const struct ip_conntrack_tuple_hash *hash,
struct seq_file *s)
static void ct_seq_stop(struct seq_file *s, void *v)
{
READ_UNLOCK(&ip_conntrack_lock);
}
static int ct_seq_show(struct seq_file *s, void *v)
{
const struct ip_conntrack_tuple_hash *hash = v;
const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash);
struct ip_conntrack_protocol *proto;
MUST_BE_READ_LOCKED(&ip_conntrack_lock);
IP_NF_ASSERT(conntrack);
/* we only want to print DIR_ORIGINAL */
......@@ -115,63 +151,50 @@ static int ct_seq_real_show(const struct ip_conntrack_tuple_hash *hash,
.tuple.dst.protonum);
IP_NF_ASSERT(proto);
if (seq_printf(s, "%-8s %u %lu ",
if (seq_printf(s, "%-8s %u %ld ",
proto->name,
conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum,
timer_pending(&conntrack->timeout)
? (conntrack->timeout.expires - jiffies)/HZ : 0) != 0)
return 1;
? (long)(conntrack->timeout.expires - jiffies)/HZ
: 0) != 0)
return -ENOSPC;
if (proto->print_conntrack(s, conntrack))
return 1;
return -ENOSPC;
if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
proto))
return 1;
return -ENOSPC;
if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL]))
return 1;
return -ENOSPC;
if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
if (seq_printf(s, "[UNREPLIED] "))
return 1;
return -ENOSPC;
if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
proto))
return 1;
return -ENOSPC;
if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY]))
return 1;
return -ENOSPC;
if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
if (seq_printf(s, "[ASSURED] "))
return 1;
return -ENOSPC;
#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
if (seq_printf(s, "mark=%ld ", conntrack->mark))
return 1;
if (seq_printf(s, "mark=%lu ", conntrack->mark))
return -ENOSPC;
#endif
if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use)))
return 1;
return -ENOSPC;
return 0;
}
static int ct_seq_show(struct seq_file *s, void *v)
{
struct list_head *list = v;
int ret = 0;
/* FIXME: Simply truncates if hash chain too long. */
READ_LOCK(&ip_conntrack_lock);
if (LIST_FIND(list, ct_seq_real_show,
struct ip_conntrack_tuple_hash *, s))
ret = -ENOSPC;
READ_UNLOCK(&ip_conntrack_lock);
return ret;
}
static struct seq_operations ct_seq_ops = {
.start = ct_seq_start,
.next = ct_seq_next,
......@@ -181,7 +204,23 @@ static struct seq_operations ct_seq_ops = {
static int ct_open(struct inode *inode, struct file *file)
{
return seq_open(file, &ct_seq_ops);
struct seq_file *seq;
struct ct_iter_state *st;
int ret;
st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL);
if (st == NULL)
return -ENOMEM;
ret = seq_open(file, &ct_seq_ops);
if (ret)
goto out_free;
seq = file->private_data;
seq->private = st;
memset(st, 0, sizeof(struct ct_iter_state));
return ret;
out_free:
kfree(st);
return ret;
}
static struct file_operations ct_file_ops = {
......@@ -189,7 +228,7 @@ static struct file_operations ct_file_ops = {
.open = ct_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
.release = seq_release_private,
};
/* expects */
......@@ -235,8 +274,8 @@ static int exp_seq_show(struct seq_file *s, void *v)
struct ip_conntrack_expect *expect = v;
if (expect->timeout.function)
seq_printf(s, "%lu ", timer_pending(&expect->timeout)
? (expect->timeout.expires - jiffies)/HZ : 0);
seq_printf(s, "%ld ", timer_pending(&expect->timeout)
? (long)(expect->timeout.expires - jiffies)/HZ : 0);
else
seq_printf(s, "- ");
......
......@@ -14,6 +14,9 @@
* Zander).
* 2000-08-01: Added Nick Williams' MAC support.
* 2002-06-25: Code cleanup.
* 2005-01-10: Added /proc counter for dropped packets; fixed so
* packets aren't delivered to user space if they're going
* to be dropped.
*
*/
#include <linux/module.h>
......@@ -59,6 +62,8 @@ static DEFINE_RWLOCK(queue_lock);
static int peer_pid;
static unsigned int copy_range;
static unsigned int queue_total;
static unsigned int queue_dropped = 0;
static unsigned int queue_user_dropped = 0;
static struct sock *ipqnl;
static LIST_HEAD(queue_list);
static DECLARE_MUTEX(ipqnl_sem);
......@@ -70,18 +75,11 @@ ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
kfree(entry);
}
static inline int
static inline void
__ipq_enqueue_entry(struct ipq_queue_entry *entry)
{
if (queue_total >= queue_maxlen) {
if (net_ratelimit())
printk(KERN_WARNING "ip_queue: full at %d entries, "
"dropping packet(s).\n", queue_total);
return -ENOSPC;
}
list_add(&entry->list, &queue_list);
queue_total++;
return 0;
}
/*
......@@ -308,14 +306,24 @@ ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data)
if (!peer_pid)
goto err_out_free_nskb;
if (queue_total >= queue_maxlen) {
queue_dropped++;
status = -ENOSPC;
if (net_ratelimit())
printk (KERN_WARNING "ip_queue: full at %d entries, "
"dropping packets(s). Dropped: %d\n", queue_total,
queue_dropped);
goto err_out_free_nskb;
}
/* netlink_unicast will either free the nskb or attach it to a socket */
status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
if (status < 0)
goto err_out_unlock;
status = __ipq_enqueue_entry(entry);
if (status < 0)
if (status < 0) {
queue_user_dropped++;
goto err_out_unlock;
}
__ipq_enqueue_entry(entry);
write_unlock_bh(&queue_lock);
return status;
......@@ -637,12 +645,16 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length)
"Copy mode : %hu\n"
"Copy range : %u\n"
"Queue length : %u\n"
"Queue max. length : %u\n",
"Queue max. length : %u\n"
"Queue dropped : %u\n"
"Netlink dropped : %u\n",
peer_pid,
copy_mode,
copy_range,
queue_total,
queue_maxlen);
queue_maxlen,
queue_dropped,
queue_user_dropped);
read_unlock_bh(&queue_lock);
......
......@@ -923,7 +923,7 @@ translate_table(const char *name,
}
/* And one copy for every other CPU */
for (i = 1; i < NR_CPUS; i++) {
for (i = 1; i < num_possible_cpus(); i++) {
memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
newinfo->entries,
SMP_ALIGN(newinfo->size));
......@@ -945,7 +945,7 @@ replace_table(struct ipt_table *table,
struct ipt_entry *table_base;
unsigned int i;
for (i = 0; i < NR_CPUS; i++) {
for (i = 0; i < num_possible_cpus(); i++) {
table_base =
(void *)newinfo->entries
+ TABLE_OFFSET(newinfo, i);
......@@ -992,7 +992,7 @@ get_counters(const struct ipt_table_info *t,
unsigned int cpu;
unsigned int i;
for (cpu = 0; cpu < NR_CPUS; cpu++) {
for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
i = 0;
IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
t->size,
......@@ -1130,7 +1130,7 @@ do_replace(void __user *user, unsigned int len)
return -ENOMEM;
newinfo = vmalloc(sizeof(struct ipt_table_info)
+ SMP_ALIGN(tmp.size) * NR_CPUS);
+ SMP_ALIGN(tmp.size) * num_possible_cpus());
if (!newinfo)
return -ENOMEM;
......@@ -1460,7 +1460,7 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
= { 0, 0, 0, { 0 }, { 0 }, { } };
newinfo = vmalloc(sizeof(struct ipt_table_info)
+ SMP_ALIGN(repl->size) * NR_CPUS);
+ SMP_ALIGN(repl->size) * num_possible_cpus());
if (!newinfo)
return -ENOMEM;
......
......@@ -198,16 +198,16 @@ static void dump_packet(const struct ipt_log_info *info,
static size_t required_len[NR_ICMP_TYPES+1]
= { [ICMP_ECHOREPLY] = 4,
[ICMP_DEST_UNREACH]
= 8 + sizeof(struct iphdr) + 8,
= 8 + sizeof(struct iphdr),
[ICMP_SOURCE_QUENCH]
= 8 + sizeof(struct iphdr) + 8,
= 8 + sizeof(struct iphdr),
[ICMP_REDIRECT]
= 8 + sizeof(struct iphdr) + 8,
= 8 + sizeof(struct iphdr),
[ICMP_ECHO] = 4,
[ICMP_TIME_EXCEEDED]
= 8 + sizeof(struct iphdr) + 8,
= 8 + sizeof(struct iphdr),
[ICMP_PARAMETERPROB]
= 8 + sizeof(struct iphdr) + 8,
= 8 + sizeof(struct iphdr),
[ICMP_TIMESTAMP] = 20,
[ICMP_TIMESTAMPREPLY] = 20,
[ICMP_ADDRESS] = 12,
......
......@@ -252,10 +252,6 @@ static void send_unreach(struct sk_buff *skb_in, int code)
if (iph->frag_off&htons(IP_OFFSET))
return;
/* Ensure we have at least 8 bytes of proto header. */
if (skb_in->len < skb_in->nh.iph->ihl*4 + 8)
return;
/* If we send an ICMP error to an ICMP error a mess would result.. */
if (iph->protocol == IPPROTO_ICMP) {
struct icmphdr ihdr;
......
......@@ -625,7 +625,7 @@ static inline int dl_seq_real_show(struct dsthash_ent *ent, struct seq_file *s)
rateinfo_recalc(ent, jiffies);
return seq_printf(s, "%ld %u.%u.%u.%u:%u->%u.%u.%u.%u:%u %u %u %u\n",
(ent->expires - jiffies)/HZ,
(long)(ent->expires - jiffies)/HZ,
NIPQUAD(ent->dst.src_ip), ntohs(ent->dst.src_port),
NIPQUAD(ent->dst.dst_ip), ntohs(ent->dst.dst_port),
ent->rateinfo.credit, ent->rateinfo.credit_cap,
......
......@@ -20,6 +20,9 @@
* Few changes needed, mainly the hard_routing code and
* the netlink socket protocol (we're NETLINK_IP6_FW).
* 2002-06-25: Code cleanup. [JM: ported cleanup over from ip_queue.c]
* 2005-02-04: Added /proc counter for dropped packets; fixed so
* packets aren't delivered to user space if they're going
* to be dropped.
*/
#include <linux/module.h>
#include <linux/skbuff.h>
......@@ -64,6 +67,8 @@ static DEFINE_RWLOCK(queue_lock);
static int peer_pid;
static unsigned int copy_range;
static unsigned int queue_total;
static unsigned int queue_dropped = 0;
static unsigned int queue_user_dropped = 0;
static struct sock *ipqnl;
static LIST_HEAD(queue_list);
static DECLARE_MUTEX(ipqnl_sem);
......@@ -75,18 +80,11 @@ ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
kfree(entry);
}
static inline int
static inline void
__ipq_enqueue_entry(struct ipq_queue_entry *entry)
{
if (queue_total >= queue_maxlen) {
if (net_ratelimit())
printk(KERN_WARNING "ip6_queue: full at %d entries, "
"dropping packet(s).\n", queue_total);
return -ENOSPC;
}
list_add(&entry->list, &queue_list);
queue_total++;
return 0;
}
/*
......@@ -312,14 +310,24 @@ ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data)
if (!peer_pid)
goto err_out_free_nskb;
if (queue_total >= queue_maxlen) {
queue_dropped++;
status = -ENOSPC;
if (net_ratelimit())
printk (KERN_WARNING "ip6_queue: fill at %d entries, "
"dropping packet(s). Dropped: %d\n", queue_total,
queue_dropped);
goto err_out_free_nskb;
}
/* netlink_unicast will either free the nskb or attach it to a socket */
status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
if (status < 0)
if (status < 0) {
queue_user_dropped++;
goto err_out_unlock;
}
status = __ipq_enqueue_entry(entry);
if (status < 0)
goto err_out_unlock;
__ipq_enqueue_entry(entry);
write_unlock_bh(&queue_lock);
return status;
......@@ -639,12 +647,16 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length)
"Copy mode : %hu\n"
"Copy range : %u\n"
"Queue length : %u\n"
"Queue max. length : %u\n",
"Queue max. length : %u\n"
"Queue dropped : %u\n"
"Netfilter dropped : %u\n",
peer_pid,
copy_mode,
copy_range,
queue_total,
queue_maxlen);
queue_maxlen,
queue_dropped,
queue_user_dropped);
read_unlock_bh(&queue_lock);
......
......@@ -952,7 +952,7 @@ translate_table(const char *name,
}
/* And one copy for every other CPU */
for (i = 1; i < NR_CPUS; i++) {
for (i = 1; i < num_possible_cpus(); i++) {
memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
newinfo->entries,
SMP_ALIGN(newinfo->size));
......@@ -974,7 +974,7 @@ replace_table(struct ip6t_table *table,
struct ip6t_entry *table_base;
unsigned int i;
for (i = 0; i < NR_CPUS; i++) {
for (i = 0; i < num_possible_cpus(); i++) {
table_base =
(void *)newinfo->entries
+ TABLE_OFFSET(newinfo, i);
......@@ -1021,7 +1021,7 @@ get_counters(const struct ip6t_table_info *t,
unsigned int cpu;
unsigned int i;
for (cpu = 0; cpu < NR_CPUS; cpu++) {
for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
i = 0;
IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
t->size,
......@@ -1155,7 +1155,7 @@ do_replace(void __user *user, unsigned int len)
return -ENOMEM;
newinfo = vmalloc(sizeof(struct ip6t_table_info)
+ SMP_ALIGN(tmp.size) * NR_CPUS);
+ SMP_ALIGN(tmp.size) * num_possible_cpus());
if (!newinfo)
return -ENOMEM;
......@@ -1469,7 +1469,7 @@ int ip6t_register_table(struct ip6t_table *table,
= { 0, 0, 0, { 0 }, { 0 }, { } };
newinfo = vmalloc(sizeof(struct ip6t_table_info)
+ SMP_ALIGN(repl->size) * NR_CPUS);
+ SMP_ALIGN(repl->size) * num_possible_cpus());
if (!newinfo)
return -ENOMEM;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment