Commit 295ff7ed authored by Arnaldo Carvalho de Melo's avatar Arnaldo Carvalho de Melo Committed by David S. Miller

[TIMEWAIT]: Introduce inet_timewait_death_row

That groups all of the tables and variables associated to the TCP timewait
schedulling/recycling/killing code, that now can be isolated from the TCP
specific code and used by other transport protocols, such as DCCP.

Next changeset will move this code to net/ipv4/inet_timewait_sock.c
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 0b4e03bf
...@@ -19,13 +19,69 @@ ...@@ -19,13 +19,69 @@
#include <linux/ip.h> #include <linux/ip.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/timer.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/workqueue.h>
#include <net/sock.h> #include <net/sock.h>
#include <net/tcp_states.h> #include <net/tcp_states.h>
#include <asm/atomic.h> #include <asm/atomic.h>
struct inet_hashinfo;
#define INET_TWDR_RECYCLE_SLOTS_LOG 5
#define INET_TWDR_RECYCLE_SLOTS (1 << INET_TWDR_RECYCLE_SLOTS_LOG)
/*
* If time > 4sec, it is "slow" path, no recycling is required,
* so that we select tick to get range about 4 seconds.
*/
#if HZ <= 16 || HZ > 4096
# error Unsupported: HZ <= 16 or HZ > 4096
#elif HZ <= 32
# define INET_TWDR_RECYCLE_TICK (5 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#elif HZ <= 64
# define INET_TWDR_RECYCLE_TICK (6 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#elif HZ <= 128
# define INET_TWDR_RECYCLE_TICK (7 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#elif HZ <= 256
# define INET_TWDR_RECYCLE_TICK (8 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#elif HZ <= 512
# define INET_TWDR_RECYCLE_TICK (9 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#elif HZ <= 1024
# define INET_TWDR_RECYCLE_TICK (10 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#elif HZ <= 2048
# define INET_TWDR_RECYCLE_TICK (11 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#else
# define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#endif
/* TIME_WAIT reaping mechanism. */
#define INET_TWDR_TWKILL_SLOTS 8 /* Please keep this a power of 2. */
#define INET_TWDR_TWKILL_QUOTA 100
struct inet_timewait_death_row {
/* Short-time timewait calendar */
int twcal_hand;
int twcal_jiffie;
struct timer_list twcal_timer;
struct hlist_head twcal_row[INET_TWDR_RECYCLE_SLOTS];
spinlock_t death_lock;
int tw_count;
int period;
u32 thread_slots;
struct work_struct twkill_work;
struct timer_list tw_timer;
int slot;
struct hlist_head cells[INET_TWDR_TWKILL_SLOTS];
struct inet_hashinfo *hashinfo;
int sysctl_tw_recycle;
int sysctl_max_tw_buckets;
};
#if (BITS_PER_LONG == 64) #if (BITS_PER_LONG == 64)
#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8 #define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8
#else #else
...@@ -33,7 +89,6 @@ ...@@ -33,7 +89,6 @@
#endif #endif
struct inet_bind_bucket; struct inet_bind_bucket;
struct inet_hashinfo;
/* /*
* This is a TIME_WAIT sock. It works around the memory consumption * This is a TIME_WAIT sock. It works around the memory consumption
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <linux/percpu.h> #include <linux/percpu.h>
#include <net/inet_connection_sock.h> #include <net/inet_connection_sock.h>
#include <net/inet_timewait_sock.h>
#include <net/inet_hashtables.h> #include <net/inet_hashtables.h>
#include <net/checksum.h> #include <net/checksum.h>
#include <net/request_sock.h> #include <net/request_sock.h>
...@@ -42,9 +43,9 @@ ...@@ -42,9 +43,9 @@
extern struct inet_hashinfo tcp_hashinfo; extern struct inet_hashinfo tcp_hashinfo;
extern atomic_t tcp_orphan_count; extern atomic_t tcp_orphan_count;
extern int tcp_tw_count;
extern void tcp_time_wait(struct sock *sk, int state, int timeo); extern void tcp_time_wait(struct sock *sk, int state, int timeo);
extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); extern void inet_twsk_deschedule(struct inet_timewait_sock *tw,
struct inet_timewait_death_row *twdr);
#define MAX_TCP_HEADER (128 + MAX_HEADER) #define MAX_TCP_HEADER (128 + MAX_HEADER)
...@@ -148,33 +149,6 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); ...@@ -148,33 +149,6 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw);
* timestamps. It must be less than * timestamps. It must be less than
* minimal timewait lifetime. * minimal timewait lifetime.
*/ */
#define TCP_TW_RECYCLE_SLOTS_LOG 5
#define TCP_TW_RECYCLE_SLOTS (1<<TCP_TW_RECYCLE_SLOTS_LOG)
/* If time > 4sec, it is "slow" path, no recycling is required,
so that we select tick to get range about 4 seconds.
*/
#if HZ <= 16 || HZ > 4096
# error Unsupported: HZ <= 16 or HZ > 4096
#elif HZ <= 32
# define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG)
#elif HZ <= 64
# define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG)
#elif HZ <= 128
# define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG)
#elif HZ <= 256
# define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG)
#elif HZ <= 512
# define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG)
#elif HZ <= 1024
# define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG)
#elif HZ <= 2048
# define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG)
#else
# define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG)
#endif
/* /*
* TCP option * TCP option
*/ */
...@@ -209,12 +183,13 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); ...@@ -209,12 +183,13 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw);
#define TCP_NAGLE_CORK 2 /* Socket is corked */ #define TCP_NAGLE_CORK 2 /* Socket is corked */
#define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */ #define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */
extern struct inet_timewait_death_row tcp_death_row;
/* sysctl variables for tcp */ /* sysctl variables for tcp */
extern int sysctl_tcp_timestamps; extern int sysctl_tcp_timestamps;
extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_window_scaling;
extern int sysctl_tcp_sack; extern int sysctl_tcp_sack;
extern int sysctl_tcp_fin_timeout; extern int sysctl_tcp_fin_timeout;
extern int sysctl_tcp_tw_recycle;
extern int sysctl_tcp_keepalive_time; extern int sysctl_tcp_keepalive_time;
extern int sysctl_tcp_keepalive_probes; extern int sysctl_tcp_keepalive_probes;
extern int sysctl_tcp_keepalive_intvl; extern int sysctl_tcp_keepalive_intvl;
...@@ -229,7 +204,6 @@ extern int sysctl_tcp_stdurg; ...@@ -229,7 +204,6 @@ extern int sysctl_tcp_stdurg;
extern int sysctl_tcp_rfc1337; extern int sysctl_tcp_rfc1337;
extern int sysctl_tcp_abort_on_overflow; extern int sysctl_tcp_abort_on_overflow;
extern int sysctl_tcp_max_orphans; extern int sysctl_tcp_max_orphans;
extern int sysctl_tcp_max_tw_buckets;
extern int sysctl_tcp_fack; extern int sysctl_tcp_fack;
extern int sysctl_tcp_reordering; extern int sysctl_tcp_reordering;
extern int sysctl_tcp_ecn; extern int sysctl_tcp_ecn;
......
...@@ -65,7 +65,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) ...@@ -65,7 +65,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
socket_seq_show(seq); socket_seq_show(seq);
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
tcp_tw_count, atomic_read(&tcp_sockets_allocated), tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated),
atomic_read(&tcp_memory_allocated)); atomic_read(&tcp_memory_allocated));
seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
......
...@@ -259,7 +259,7 @@ ctl_table ipv4_table[] = { ...@@ -259,7 +259,7 @@ ctl_table ipv4_table[] = {
{ {
.ctl_name = NET_TCP_MAX_TW_BUCKETS, .ctl_name = NET_TCP_MAX_TW_BUCKETS,
.procname = "tcp_max_tw_buckets", .procname = "tcp_max_tw_buckets",
.data = &sysctl_tcp_max_tw_buckets, .data = &tcp_death_row.sysctl_max_tw_buckets,
.maxlen = sizeof(int), .maxlen = sizeof(int),
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec .proc_handler = &proc_dointvec
...@@ -363,7 +363,7 @@ ctl_table ipv4_table[] = { ...@@ -363,7 +363,7 @@ ctl_table ipv4_table[] = {
{ {
.ctl_name = NET_TCP_TW_RECYCLE, .ctl_name = NET_TCP_TW_RECYCLE,
.procname = "tcp_tw_recycle", .procname = "tcp_tw_recycle",
.data = &sysctl_tcp_tw_recycle, .data = &tcp_death_row.sysctl_tw_recycle,
.maxlen = sizeof(int), .maxlen = sizeof(int),
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec .proc_handler = &proc_dointvec
......
...@@ -2109,12 +2109,12 @@ void __init tcp_init(void) ...@@ -2109,12 +2109,12 @@ void __init tcp_init(void)
if (order >= 4) { if (order >= 4) {
sysctl_local_port_range[0] = 32768; sysctl_local_port_range[0] = 32768;
sysctl_local_port_range[1] = 61000; sysctl_local_port_range[1] = 61000;
sysctl_tcp_max_tw_buckets = 180000; tcp_death_row.sysctl_max_tw_buckets = 180000;
sysctl_tcp_max_orphans = 4096 << (order - 4); sysctl_tcp_max_orphans = 4096 << (order - 4);
sysctl_max_syn_backlog = 1024; sysctl_max_syn_backlog = 1024;
} else if (order < 3) { } else if (order < 3) {
sysctl_local_port_range[0] = 1024 * (3 - order); sysctl_local_port_range[0] = 1024 * (3 - order);
sysctl_tcp_max_tw_buckets >>= (3 - order); tcp_death_row.sysctl_max_tw_buckets >>= (3 - order);
sysctl_tcp_max_orphans >>= (3 - order); sysctl_tcp_max_orphans >>= (3 - order);
sysctl_max_syn_backlog = 128; sysctl_max_syn_backlog = 128;
} }
......
...@@ -199,7 +199,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, ...@@ -199,7 +199,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
} else if (tw) { } else if (tw) {
/* Silly. Should hash-dance instead... */ /* Silly. Should hash-dance instead... */
tcp_tw_deschedule(tw); inet_twsk_deschedule(tw, &tcp_death_row);
NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
inet_twsk_put(tw); inet_twsk_put(tw);
...@@ -291,7 +291,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk) ...@@ -291,7 +291,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk)
spin_unlock(&head->lock); spin_unlock(&head->lock);
if (tw) { if (tw) {
tcp_tw_deschedule(tw); inet_twsk_deschedule(tw, &tcp_death_row);;
inet_twsk_put(tw); inet_twsk_put(tw);
} }
...@@ -366,7 +366,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) ...@@ -366,7 +366,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
tp->write_seq = 0; tp->write_seq = 0;
} }
if (sysctl_tcp_tw_recycle && if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
struct inet_peer *peer = rt_get_peer(rt); struct inet_peer *peer = rt_get_peer(rt);
...@@ -965,7 +965,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ...@@ -965,7 +965,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
* are made in the function processing timewait state. * are made in the function processing timewait state.
*/ */
if (tmp_opt.saw_tstamp && if (tmp_opt.saw_tstamp &&
sysctl_tcp_tw_recycle && tcp_death_row.sysctl_tw_recycle &&
(dst = inet_csk_route_req(sk, req)) != NULL && (dst = inet_csk_route_req(sk, req)) != NULL &&
(peer = rt_get_peer((struct rtable *)dst)) != NULL && (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
peer->v4daddr == saddr) { peer->v4daddr == saddr) {
...@@ -1305,7 +1305,8 @@ int tcp_v4_rcv(struct sk_buff *skb) ...@@ -1305,7 +1305,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
ntohs(th->dest), ntohs(th->dest),
inet_iif(skb)); inet_iif(skb));
if (sk2) { if (sk2) {
tcp_tw_deschedule((struct inet_timewait_sock *)sk); inet_twsk_deschedule((struct inet_timewait_sock *)sk,
&tcp_death_row);
inet_twsk_put((struct inet_timewait_sock *)sk); inet_twsk_put((struct inet_timewait_sock *)sk);
sk = sk2; sk = sk2;
goto process; goto process;
......
...@@ -35,13 +35,37 @@ ...@@ -35,13 +35,37 @@
#define SYNC_INIT 1 #define SYNC_INIT 1
#endif #endif
int sysctl_tcp_tw_recycle; /* New-style handling of TIME_WAIT sockets. */
int sysctl_tcp_max_tw_buckets = NR_FILE*2;
static void inet_twdr_hangman(unsigned long data);
static void inet_twdr_twkill_work(void *data);
static void inet_twdr_twcal_tick(unsigned long data);
int sysctl_tcp_syncookies = SYNC_INIT; int sysctl_tcp_syncookies = SYNC_INIT;
int sysctl_tcp_abort_on_overflow; int sysctl_tcp_abort_on_overflow;
static void tcp_tw_schedule(struct inet_timewait_sock *tw, int timeo); struct inet_timewait_death_row tcp_death_row = {
.sysctl_max_tw_buckets = NR_FILE * 2,
.period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
.death_lock = SPIN_LOCK_UNLOCKED,
.hashinfo = &tcp_hashinfo,
.tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
(unsigned long)&tcp_death_row),
.twkill_work = __WORK_INITIALIZER(tcp_death_row.twkill_work,
inet_twdr_twkill_work,
&tcp_death_row),
/* Short-time timewait calendar */
.twcal_hand = -1,
.twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
(unsigned long)&tcp_death_row),
};
EXPORT_SYMBOL_GPL(tcp_death_row);
static void inet_twsk_schedule(struct inet_timewait_sock *tw,
struct inet_timewait_death_row *twdr,
const int timeo);
static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
{ {
...@@ -52,10 +76,6 @@ static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) ...@@ -52,10 +76,6 @@ static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
return (seq == e_win && seq == end_seq); return (seq == e_win && seq == end_seq);
} }
/* New-style handling of TIME_WAIT sockets. */
int tcp_tw_count;
/* /*
* * Main purpose of TIME-WAIT state is to close connection gracefully, * * Main purpose of TIME-WAIT state is to close connection gracefully,
* when one of ends sits in LAST-ACK or CLOSING retransmitting FIN * when one of ends sits in LAST-ACK or CLOSING retransmitting FIN
...@@ -132,7 +152,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, ...@@ -132,7 +152,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
if (!th->fin || if (!th->fin ||
TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) { TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) {
kill_with_rst: kill_with_rst:
tcp_tw_deschedule(tw); inet_twsk_deschedule(tw, &tcp_death_row);
inet_twsk_put(tw); inet_twsk_put(tw);
return TCP_TW_RST; return TCP_TW_RST;
} }
...@@ -151,11 +171,11 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, ...@@ -151,11 +171,11 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
* do not undertsnad recycling in any case, it not * do not undertsnad recycling in any case, it not
* a big problem in practice. --ANK */ * a big problem in practice. --ANK */
if (tw->tw_family == AF_INET && if (tw->tw_family == AF_INET &&
sysctl_tcp_tw_recycle && tcptw->tw_ts_recent_stamp && tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp &&
tcp_v4_tw_remember_stamp(tw)) tcp_v4_tw_remember_stamp(tw))
tcp_tw_schedule(tw, tw->tw_timeout); inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout);
else else
tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN); inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN);
return TCP_TW_ACK; return TCP_TW_ACK;
} }
...@@ -188,12 +208,12 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, ...@@ -188,12 +208,12 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
*/ */
if (sysctl_tcp_rfc1337 == 0) { if (sysctl_tcp_rfc1337 == 0) {
kill: kill:
tcp_tw_deschedule(tw); inet_twsk_deschedule(tw, &tcp_death_row);
inet_twsk_put(tw); inet_twsk_put(tw);
return TCP_TW_SUCCESS; return TCP_TW_SUCCESS;
} }
} }
tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN); inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN);
if (tmp_opt.saw_tstamp) { if (tmp_opt.saw_tstamp) {
tcptw->tw_ts_recent = tmp_opt.rcv_tsval; tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
...@@ -243,7 +263,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, ...@@ -243,7 +263,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
* Do not reschedule in the last case. * Do not reschedule in the last case.
*/ */
if (paws_reject || th->ack) if (paws_reject || th->ack)
tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN); inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN);
/* Send ACK. Note, we do not put the bucket, /* Send ACK. Note, we do not put the bucket,
* it will be released by caller. * it will be released by caller.
...@@ -263,10 +283,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) ...@@ -263,10 +283,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
const struct tcp_sock *tp = tcp_sk(sk); const struct tcp_sock *tp = tcp_sk(sk);
int recycle_ok = 0; int recycle_ok = 0;
if (sysctl_tcp_tw_recycle && tp->rx_opt.ts_recent_stamp) if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
recycle_ok = tp->af_specific->remember_stamp(sk); recycle_ok = tp->af_specific->remember_stamp(sk);
if (tcp_tw_count < sysctl_tcp_max_tw_buckets) if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets)
tw = inet_twsk_alloc(sk, state); tw = inet_twsk_alloc(sk, state);
if (tw != NULL) { if (tw != NULL) {
...@@ -306,7 +326,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) ...@@ -306,7 +326,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
timeo = TCP_TIMEWAIT_LEN; timeo = TCP_TIMEWAIT_LEN;
} }
tcp_tw_schedule(tw, timeo); inet_twsk_schedule(tw, &tcp_death_row, timeo);
inet_twsk_put(tw); inet_twsk_put(tw);
} else { } else {
/* Sorry, if we're out of memory, just CLOSE this /* Sorry, if we're out of memory, just CLOSE this
...@@ -321,26 +341,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) ...@@ -321,26 +341,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tcp_done(sk); tcp_done(sk);
} }
/* Kill off TIME_WAIT sockets once their lifetime has expired. */
static int tcp_tw_death_row_slot;
static void tcp_twkill(unsigned long);
/* TIME_WAIT reaping mechanism. */
#define TCP_TWKILL_SLOTS 8 /* Please keep this a power of 2. */
#define TCP_TWKILL_PERIOD (TCP_TIMEWAIT_LEN/TCP_TWKILL_SLOTS)
#define TCP_TWKILL_QUOTA 100
static struct hlist_head tcp_tw_death_row[TCP_TWKILL_SLOTS];
static DEFINE_SPINLOCK(tw_death_lock);
static struct timer_list tcp_tw_timer = TIMER_INITIALIZER(tcp_twkill, 0, 0);
static void twkill_work(void *);
static DECLARE_WORK(tcp_twkill_work, twkill_work, NULL);
static u32 twkill_thread_slots;
/* Returns non-zero if quota exceeded. */ /* Returns non-zero if quota exceeded. */
static int tcp_do_twkill_work(int slot, unsigned int quota) static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr,
const int slot)
{ {
struct inet_timewait_sock *tw; struct inet_timewait_sock *tw;
struct hlist_node *node; struct hlist_node *node;
...@@ -356,19 +359,19 @@ static int tcp_do_twkill_work(int slot, unsigned int quota) ...@@ -356,19 +359,19 @@ static int tcp_do_twkill_work(int slot, unsigned int quota)
killed = 0; killed = 0;
ret = 0; ret = 0;
rescan: rescan:
inet_twsk_for_each_inmate(tw, node, &tcp_tw_death_row[slot]) { inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) {
__inet_twsk_del_dead_node(tw); __inet_twsk_del_dead_node(tw);
spin_unlock(&tw_death_lock); spin_unlock(&twdr->death_lock);
__inet_twsk_kill(tw, &tcp_hashinfo); __inet_twsk_kill(tw, twdr->hashinfo);
inet_twsk_put(tw); inet_twsk_put(tw);
killed++; killed++;
spin_lock(&tw_death_lock); spin_lock(&twdr->death_lock);
if (killed > quota) { if (killed > INET_TWDR_TWKILL_QUOTA) {
ret = 1; ret = 1;
break; break;
} }
/* While we dropped tw_death_lock, another cpu may have /* While we dropped twdr->death_lock, another cpu may have
* killed off the next TW bucket in the list, therefore * killed off the next TW bucket in the list, therefore
* do a fresh re-read of the hlist head node with the * do a fresh re-read of the hlist head node with the
* lock reacquired. We still use the hlist traversal * lock reacquired. We still use the hlist traversal
...@@ -377,67 +380,68 @@ static int tcp_do_twkill_work(int slot, unsigned int quota) ...@@ -377,67 +380,68 @@ static int tcp_do_twkill_work(int slot, unsigned int quota)
goto rescan; goto rescan;
} }
tcp_tw_count -= killed; twdr->tw_count -= killed;
NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed); NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed);
return ret; return ret;
} }
static void tcp_twkill(unsigned long dummy) static void inet_twdr_hangman(unsigned long data)
{ {
int need_timer, ret; struct inet_timewait_death_row *twdr;
int unsigned need_timer;
spin_lock(&tw_death_lock); twdr = (struct inet_timewait_death_row *)data;
spin_lock(&twdr->death_lock);
if (tcp_tw_count == 0) if (twdr->tw_count == 0)
goto out; goto out;
need_timer = 0; need_timer = 0;
ret = tcp_do_twkill_work(tcp_tw_death_row_slot, TCP_TWKILL_QUOTA); if (inet_twdr_do_twkill_work(twdr, twdr->slot)) {
if (ret) { twdr->thread_slots |= (1 << twdr->slot);
twkill_thread_slots |= (1 << tcp_tw_death_row_slot);
mb(); mb();
schedule_work(&tcp_twkill_work); schedule_work(&twdr->twkill_work);
need_timer = 1; need_timer = 1;
} else { } else {
/* We purged the entire slot, anything left? */ /* We purged the entire slot, anything left? */
if (tcp_tw_count) if (twdr->tw_count)
need_timer = 1; need_timer = 1;
} }
tcp_tw_death_row_slot = twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1));
((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1));
if (need_timer) if (need_timer)
mod_timer(&tcp_tw_timer, jiffies + TCP_TWKILL_PERIOD); mod_timer(&twdr->tw_timer, jiffies + twdr->period);
out: out:
spin_unlock(&tw_death_lock); spin_unlock(&twdr->death_lock);
} }
extern void twkill_slots_invalid(void); extern void twkill_slots_invalid(void);
static void twkill_work(void *dummy) static void inet_twdr_twkill_work(void *data)
{ {
struct inet_timewait_death_row *twdr = data;
int i; int i;
if ((TCP_TWKILL_SLOTS - 1) > (sizeof(twkill_thread_slots) * 8)) if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8))
twkill_slots_invalid(); twkill_slots_invalid();
while (twkill_thread_slots) { while (twdr->thread_slots) {
spin_lock_bh(&tw_death_lock); spin_lock_bh(&twdr->death_lock);
for (i = 0; i < TCP_TWKILL_SLOTS; i++) { for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) {
if (!(twkill_thread_slots & (1 << i))) if (!(twdr->thread_slots & (1 << i)))
continue; continue;
while (tcp_do_twkill_work(i, TCP_TWKILL_QUOTA) != 0) { while (inet_twdr_do_twkill_work(twdr, i) != 0) {
if (need_resched()) { if (need_resched()) {
spin_unlock_bh(&tw_death_lock); spin_unlock_bh(&twdr->death_lock);
schedule(); schedule();
spin_lock_bh(&tw_death_lock); spin_lock_bh(&twdr->death_lock);
} }
} }
twkill_thread_slots &= ~(1 << i); twdr->thread_slots &= ~(1 << i);
} }
spin_unlock_bh(&tw_death_lock); spin_unlock_bh(&twdr->death_lock);
} }
} }
...@@ -446,28 +450,22 @@ static void twkill_work(void *dummy) ...@@ -446,28 +450,22 @@ static void twkill_work(void *dummy)
*/ */
/* This is for handling early-kills of TIME_WAIT sockets. */ /* This is for handling early-kills of TIME_WAIT sockets. */
void tcp_tw_deschedule(struct inet_timewait_sock *tw) void inet_twsk_deschedule(struct inet_timewait_sock *tw,
struct inet_timewait_death_row *twdr)
{ {
spin_lock(&tw_death_lock); spin_lock(&twdr->death_lock);
if (inet_twsk_del_dead_node(tw)) { if (inet_twsk_del_dead_node(tw)) {
inet_twsk_put(tw); inet_twsk_put(tw);
if (--tcp_tw_count == 0) if (--twdr->tw_count == 0)
del_timer(&tcp_tw_timer); del_timer(&twdr->tw_timer);
} }
spin_unlock(&tw_death_lock); spin_unlock(&twdr->death_lock);
__inet_twsk_kill(tw, &tcp_hashinfo); __inet_twsk_kill(tw, twdr->hashinfo);
} }
/* Short-time timewait calendar */ static void inet_twsk_schedule(struct inet_timewait_sock *tw,
struct inet_timewait_death_row *twdr,
static int tcp_twcal_hand = -1; const int timeo)
static int tcp_twcal_jiffie;
static void tcp_twcal_tick(unsigned long);
static struct timer_list tcp_twcal_timer =
TIMER_INITIALIZER(tcp_twcal_tick, 0, 0);
static struct hlist_head tcp_twcal_row[TCP_TW_RECYCLE_SLOTS];
static void tcp_tw_schedule(struct inet_timewait_sock *tw, const int timeo)
{ {
struct hlist_head *list; struct hlist_head *list;
int slot; int slot;
...@@ -496,100 +494,106 @@ static void tcp_tw_schedule(struct inet_timewait_sock *tw, const int timeo) ...@@ -496,100 +494,106 @@ static void tcp_tw_schedule(struct inet_timewait_sock *tw, const int timeo)
* is greater than TS tick!) and detect old duplicates with help * is greater than TS tick!) and detect old duplicates with help
* of PAWS. * of PAWS.
*/ */
slot = (timeo + (1<<TCP_TW_RECYCLE_TICK) - 1) >> TCP_TW_RECYCLE_TICK; slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK;
spin_lock(&tw_death_lock); spin_lock(&twdr->death_lock);
/* Unlink it, if it was scheduled */ /* Unlink it, if it was scheduled */
if (inet_twsk_del_dead_node(tw)) if (inet_twsk_del_dead_node(tw))
tcp_tw_count--; twdr->tw_count--;
else else
atomic_inc(&tw->tw_refcnt); atomic_inc(&tw->tw_refcnt);
if (slot >= TCP_TW_RECYCLE_SLOTS) { if (slot >= INET_TWDR_RECYCLE_SLOTS) {
/* Schedule to slow timer */ /* Schedule to slow timer */
if (timeo >= TCP_TIMEWAIT_LEN) { if (timeo >= TCP_TIMEWAIT_LEN) {
slot = TCP_TWKILL_SLOTS-1; slot = INET_TWDR_TWKILL_SLOTS - 1;
} else { } else {
slot = (timeo + TCP_TWKILL_PERIOD-1) / TCP_TWKILL_PERIOD; slot = (timeo + twdr->period - 1) / twdr->period;
if (slot >= TCP_TWKILL_SLOTS) if (slot >= INET_TWDR_TWKILL_SLOTS)
slot = TCP_TWKILL_SLOTS-1; slot = INET_TWDR_TWKILL_SLOTS - 1;
} }
tw->tw_ttd = jiffies + timeo; tw->tw_ttd = jiffies + timeo;
slot = (tcp_tw_death_row_slot + slot) & (TCP_TWKILL_SLOTS - 1); slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1);
list = &tcp_tw_death_row[slot]; list = &twdr->cells[slot];
} else { } else {
tw->tw_ttd = jiffies + (slot << TCP_TW_RECYCLE_TICK); tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK);
if (tcp_twcal_hand < 0) { if (twdr->twcal_hand < 0) {
tcp_twcal_hand = 0; twdr->twcal_hand = 0;
tcp_twcal_jiffie = jiffies; twdr->twcal_jiffie = jiffies;
tcp_twcal_timer.expires = tcp_twcal_jiffie + (slot<<TCP_TW_RECYCLE_TICK); twdr->twcal_timer.expires = twdr->twcal_jiffie +
add_timer(&tcp_twcal_timer); (slot << INET_TWDR_RECYCLE_TICK);
add_timer(&twdr->twcal_timer);
} else { } else {
if (time_after(tcp_twcal_timer.expires, jiffies + (slot<<TCP_TW_RECYCLE_TICK))) if (time_after(twdr->twcal_timer.expires,
mod_timer(&tcp_twcal_timer, jiffies + (slot<<TCP_TW_RECYCLE_TICK)); jiffies + (slot << INET_TWDR_RECYCLE_TICK)))
slot = (tcp_twcal_hand + slot)&(TCP_TW_RECYCLE_SLOTS-1); mod_timer(&twdr->twcal_timer,
jiffies + (slot << INET_TWDR_RECYCLE_TICK));
slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1);
} }
list = &tcp_twcal_row[slot]; list = &twdr->twcal_row[slot];
} }
hlist_add_head(&tw->tw_death_node, list); hlist_add_head(&tw->tw_death_node, list);
if (tcp_tw_count++ == 0) if (twdr->tw_count++ == 0)
mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD); mod_timer(&twdr->tw_timer, jiffies + twdr->period);
spin_unlock(&tw_death_lock); spin_unlock(&twdr->death_lock);
} }
void tcp_twcal_tick(unsigned long dummy) void inet_twdr_twcal_tick(unsigned long data)
{ {
struct inet_timewait_death_row *twdr;
int n, slot; int n, slot;
unsigned long j; unsigned long j;
unsigned long now = jiffies; unsigned long now = jiffies;
int killed = 0; int killed = 0;
int adv = 0; int adv = 0;
spin_lock(&tw_death_lock); twdr = (struct inet_timewait_death_row *)data;
if (tcp_twcal_hand < 0)
spin_lock(&twdr->death_lock);
if (twdr->twcal_hand < 0)
goto out; goto out;
slot = tcp_twcal_hand; slot = twdr->twcal_hand;
j = tcp_twcal_jiffie; j = twdr->twcal_jiffie;
for (n=0; n<TCP_TW_RECYCLE_SLOTS; n++) { for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) {
if (time_before_eq(j, now)) { if (time_before_eq(j, now)) {
struct hlist_node *node, *safe; struct hlist_node *node, *safe;
struct inet_timewait_sock *tw; struct inet_timewait_sock *tw;
inet_twsk_for_each_inmate_safe(tw, node, safe, inet_twsk_for_each_inmate_safe(tw, node, safe,
&tcp_twcal_row[slot]) { &twdr->twcal_row[slot]) {
__inet_twsk_del_dead_node(tw); __inet_twsk_del_dead_node(tw);
__inet_twsk_kill(tw, &tcp_hashinfo); __inet_twsk_kill(tw, twdr->hashinfo);
inet_twsk_put(tw); inet_twsk_put(tw);
killed++; killed++;
} }
} else { } else {
if (!adv) { if (!adv) {
adv = 1; adv = 1;
tcp_twcal_jiffie = j; twdr->twcal_jiffie = j;
tcp_twcal_hand = slot; twdr->twcal_hand = slot;
} }
if (!hlist_empty(&tcp_twcal_row[slot])) { if (!hlist_empty(&twdr->twcal_row[slot])) {
mod_timer(&tcp_twcal_timer, j); mod_timer(&twdr->twcal_timer, j);
goto out; goto out;
} }
} }
j += (1<<TCP_TW_RECYCLE_TICK); j += 1 << INET_TWDR_RECYCLE_TICK;
slot = (slot+1)&(TCP_TW_RECYCLE_SLOTS-1); slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1);
} }
tcp_twcal_hand = -1; twdr->twcal_hand = -1;
out: out:
if ((tcp_tw_count -= killed) == 0) if ((twdr->tw_count -= killed) == 0)
del_timer(&tcp_tw_timer); del_timer(&twdr->tw_timer);
NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed); NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed);
spin_unlock(&tw_death_lock); spin_unlock(&twdr->death_lock);
} }
/* This is not only more efficient than what we used to do, it eliminates /* This is not only more efficient than what we used to do, it eliminates
...@@ -929,4 +933,4 @@ EXPORT_SYMBOL(tcp_check_req); ...@@ -929,4 +933,4 @@ EXPORT_SYMBOL(tcp_check_req);
EXPORT_SYMBOL(tcp_child_process); EXPORT_SYMBOL(tcp_child_process);
EXPORT_SYMBOL(tcp_create_openreq_child); EXPORT_SYMBOL(tcp_create_openreq_child);
EXPORT_SYMBOL(tcp_timewait_state_process); EXPORT_SYMBOL(tcp_timewait_state_process);
EXPORT_SYMBOL(tcp_tw_deschedule); EXPORT_SYMBOL(inet_twsk_deschedule);
...@@ -521,7 +521,7 @@ static int __tcp_v6_check_established(struct sock *sk, __u16 lport, ...@@ -521,7 +521,7 @@ static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
} else if (tw) { } else if (tw) {
/* Silly. Should hash-dance instead... */ /* Silly. Should hash-dance instead... */
tcp_tw_deschedule(tw); inet_twsk_deschedule(tw, &tcp_death_row);
NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
inet_twsk_put(tw); inet_twsk_put(tw);
...@@ -611,7 +611,7 @@ static int tcp_v6_hash_connect(struct sock *sk) ...@@ -611,7 +611,7 @@ static int tcp_v6_hash_connect(struct sock *sk)
spin_unlock(&head->lock); spin_unlock(&head->lock);
if (tw) { if (tw) {
tcp_tw_deschedule(tw); inet_twsk_deschedule(tw, &tcp_death_row);
inet_twsk_put(tw); inet_twsk_put(tw);
} }
...@@ -1820,8 +1820,9 @@ static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) ...@@ -1820,8 +1820,9 @@ static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb)); sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
if (sk2 != NULL) { if (sk2 != NULL) {
tcp_tw_deschedule((struct inet_timewait_sock *)sk); struct inet_timewait_sock *tw = inet_twsk(sk);
inet_twsk_put((struct inet_timewait_sock *)sk); inet_twsk_deschedule(tw, &tcp_death_row);
inet_twsk_put(tw);
sk = sk2; sk = sk2;
goto process; goto process;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment