[NET] Generalise tcp memory pressure handling

Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@conectiva.com.br>
parent fb5f695c
...@@ -491,10 +491,11 @@ extern int sk_stream_wait_connect(struct sock *sk, long *timeo_p); ...@@ -491,10 +491,11 @@ extern int sk_stream_wait_connect(struct sock *sk, long *timeo_p);
extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p); extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p);
extern void sk_stream_wait_close(struct sock *sk, long timeo_p); extern void sk_stream_wait_close(struct sock *sk, long timeo_p);
extern int sk_stream_error(struct sock *sk, int flags, int err); extern int sk_stream_error(struct sock *sk, int flags, int err);
extern void sk_stream_kill_queues(struct sock *sk);
extern int sk_wait_data(struct sock *sk, long *timeo); extern int sk_wait_data(struct sock *sk, long *timeo);
/* IP protocol blocks we attach to sockets. /* Networking protocol blocks we attach to sockets.
* socket layer -> transport layer interface * socket layer -> transport layer interface
* transport -> network interface is defined by struct inet_proto * transport -> network interface is defined by struct inet_proto
*/ */
...@@ -538,6 +539,21 @@ struct proto { ...@@ -538,6 +539,21 @@ struct proto {
void (*unhash)(struct sock *sk); void (*unhash)(struct sock *sk);
int (*get_port)(struct sock *sk, unsigned short snum); int (*get_port)(struct sock *sk, unsigned short snum);
/* Memory pressure */
void (*enter_memory_pressure)(void);
atomic_t memory_allocated; /* Current allocated memory. */
atomic_t sockets_allocated; /* Current number of sockets. */
/*
* Pressure flag: try to collapse.
* Technical note: it is used by multiple contexts non atomically.
* All the sk_stream_mem_schedule() is of this nature: accounting
* is strict, actions are advisory and have some latency.
*/
int memory_pressure;
int sysctl_mem[3];
int sysctl_wmem[3];
int sysctl_rmem[3];
char name[32]; char name[32];
struct { struct {
...@@ -628,6 +644,22 @@ static inline struct inode *SOCK_INODE(struct socket *socket) ...@@ -628,6 +644,22 @@ static inline struct inode *SOCK_INODE(struct socket *socket)
return &container_of(socket, struct socket_alloc, socket)->vfs_inode; return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
} }
extern void __sk_stream_mem_reclaim(struct sock *sk);
extern int sk_stream_mem_schedule(struct sock *sk, int size, int kind);
#define SK_STREAM_MEM_QUANTUM ((int)PAGE_SIZE)
static inline int sk_stream_pages(int amt)
{
return (amt + SK_STREAM_MEM_QUANTUM - 1) / SK_STREAM_MEM_QUANTUM;
}
static inline void sk_stream_mem_reclaim(struct sock *sk)
{
if (sk->sk_forward_alloc >= SK_STREAM_MEM_QUANTUM)
__sk_stream_mem_reclaim(sk);
}
/* Used by processes to "lock" a socket state, so that /* Used by processes to "lock" a socket state, so that
* interrupts and bottom half handlers won't change it * interrupts and bottom half handlers won't change it
* from under us. It essentially blocks any incoming * from under us. It essentially blocks any incoming
......
...@@ -594,9 +594,6 @@ extern int sysctl_tcp_fack; ...@@ -594,9 +594,6 @@ extern int sysctl_tcp_fack;
extern int sysctl_tcp_reordering; extern int sysctl_tcp_reordering;
extern int sysctl_tcp_ecn; extern int sysctl_tcp_ecn;
extern int sysctl_tcp_dsack; extern int sysctl_tcp_dsack;
extern int sysctl_tcp_mem[3];
extern int sysctl_tcp_wmem[3];
extern int sysctl_tcp_rmem[3];
extern int sysctl_tcp_app_win; extern int sysctl_tcp_app_win;
extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_adv_win_scale;
extern int sysctl_tcp_tw_reuse; extern int sysctl_tcp_tw_reuse;
...@@ -614,10 +611,6 @@ extern int sysctl_tcp_bic_low_window; ...@@ -614,10 +611,6 @@ extern int sysctl_tcp_bic_low_window;
extern int sysctl_tcp_default_win_scale; extern int sysctl_tcp_default_win_scale;
extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_moderate_rcvbuf;
extern atomic_t tcp_memory_allocated;
extern atomic_t tcp_sockets_allocated;
extern int tcp_memory_pressure;
struct open_request; struct open_request;
struct or_calltable { struct or_calltable {
...@@ -1867,24 +1860,7 @@ static __inline__ void tcp_openreq_init(struct open_request *req, ...@@ -1867,24 +1860,7 @@ static __inline__ void tcp_openreq_init(struct open_request *req,
req->rmt_port = skb->h.th->source; req->rmt_port = skb->h.th->source;
} }
#define TCP_MEM_QUANTUM ((int)PAGE_SIZE) extern void tcp_enter_memory_pressure(void);
extern void __tcp_mem_reclaim(struct sock *sk);
extern int tcp_mem_schedule(struct sock *sk, int size, int kind);
static inline void tcp_mem_reclaim(struct sock *sk)
{
if (sk->sk_forward_alloc >= TCP_MEM_QUANTUM)
__tcp_mem_reclaim(sk);
}
static inline void tcp_enter_memory_pressure(void)
{
if (!tcp_memory_pressure) {
NET_INC_STATS(TCPMemoryPressures);
tcp_memory_pressure = 1;
}
}
static inline struct sk_buff *tcp_alloc_pskb(struct sock *sk, int size, int mem, int gfp) static inline struct sk_buff *tcp_alloc_pskb(struct sock *sk, int size, int mem, int gfp)
{ {
...@@ -1893,7 +1869,7 @@ static inline struct sk_buff *tcp_alloc_pskb(struct sock *sk, int size, int mem, ...@@ -1893,7 +1869,7 @@ static inline struct sk_buff *tcp_alloc_pskb(struct sock *sk, int size, int mem,
if (skb) { if (skb) {
skb->truesize += mem; skb->truesize += mem;
if (sk->sk_forward_alloc >= (int)skb->truesize || if (sk->sk_forward_alloc >= (int)skb->truesize ||
tcp_mem_schedule(sk, skb->truesize, 0)) { sk_stream_mem_schedule(sk, skb->truesize, 0)) {
skb_reserve(skb, MAX_TCP_HEADER); skb_reserve(skb, MAX_TCP_HEADER);
return skb; return skb;
} }
...@@ -1913,7 +1889,7 @@ static inline struct sk_buff *tcp_alloc_skb(struct sock *sk, int size, int gfp) ...@@ -1913,7 +1889,7 @@ static inline struct sk_buff *tcp_alloc_skb(struct sock *sk, int size, int gfp)
static inline struct page * tcp_alloc_page(struct sock *sk) static inline struct page * tcp_alloc_page(struct sock *sk)
{ {
if (sk->sk_forward_alloc >= (int)PAGE_SIZE || if (sk->sk_forward_alloc >= (int)PAGE_SIZE ||
tcp_mem_schedule(sk, PAGE_SIZE, 0)) { sk_stream_mem_schedule(sk, PAGE_SIZE, 0)) {
struct page *page = alloc_pages(sk->sk_allocation, 0); struct page *page = alloc_pages(sk->sk_allocation, 0);
if (page) if (page)
return page; return page;
...@@ -1929,7 +1905,7 @@ static inline void tcp_writequeue_purge(struct sock *sk) ...@@ -1929,7 +1905,7 @@ static inline void tcp_writequeue_purge(struct sock *sk)
while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL)
sk_stream_free_skb(sk, skb); sk_stream_free_skb(sk, skb);
tcp_mem_reclaim(sk); sk_stream_mem_reclaim(sk);
} }
extern void tcp_listen_wlock(void); extern void tcp_listen_wlock(void);
......
...@@ -188,3 +188,100 @@ int sk_stream_error(struct sock *sk, int flags, int err) ...@@ -188,3 +188,100 @@ int sk_stream_error(struct sock *sk, int flags, int err)
} }
EXPORT_SYMBOL(sk_stream_error); EXPORT_SYMBOL(sk_stream_error);
void __sk_stream_mem_reclaim(struct sock *sk)
{
if (sk->sk_forward_alloc >= SK_STREAM_MEM_QUANTUM) {
atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM,
&sk->sk_prot->memory_allocated);
sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1;
if (sk->sk_prot->memory_pressure &&
(atomic_read(&sk->sk_prot->memory_allocated) <
sk->sk_prot->sysctl_mem[0]))
sk->sk_prot->memory_pressure = 0;
}
}
EXPORT_SYMBOL(__sk_stream_mem_reclaim);
int sk_stream_mem_schedule(struct sock *sk, int size, int kind)
{
int amt = sk_stream_pages(size);
sk->sk_forward_alloc += amt * SK_STREAM_MEM_QUANTUM;
atomic_add(amt, &sk->sk_prot->memory_allocated);
/* Under limit. */
if (atomic_read(&sk->sk_prot->memory_allocated) < sk->sk_prot->sysctl_mem[0]) {
if (sk->sk_prot->memory_pressure)
sk->sk_prot->memory_pressure = 0;
return 1;
}
/* Over hard limit. */
if (atomic_read(&sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[2]) {
sk->sk_prot->enter_memory_pressure();
goto suppress_allocation;
}
/* Under pressure. */
if (atomic_read(&sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[1])
sk->sk_prot->enter_memory_pressure();
if (kind) {
if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_prot->sysctl_rmem[0])
return 1;
} else if (sk->sk_wmem_queued < sk->sk_prot->sysctl_wmem[0])
return 1;
if (!sk->sk_prot->memory_pressure ||
sk->sk_prot->sysctl_mem[2] > atomic_read(&sk->sk_prot->sockets_allocated) *
sk_stream_pages(sk->sk_wmem_queued +
atomic_read(&sk->sk_rmem_alloc) +
sk->sk_forward_alloc))
return 1;
suppress_allocation:
if (!kind) {
sk_stream_moderate_sndbuf(sk);
/* Fail only if socket is _under_ its sndbuf.
* In this case we cannot block, so that we have to fail.
*/
if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
return 1;
}
/* Alas. Undo changes. */
sk->sk_forward_alloc -= amt * SK_STREAM_MEM_QUANTUM;
atomic_sub(amt, &sk->sk_prot->memory_allocated);
return 0;
}
EXPORT_SYMBOL(sk_stream_mem_schedule);
void sk_stream_kill_queues(struct sock *sk)
{
/* First the read buffer. */
__skb_queue_purge(&sk->sk_receive_queue);
/* Next, the error queue. */
__skb_queue_purge(&sk->sk_error_queue);
/* Next, the write queue. */
BUG_TRAP(skb_queue_empty(&sk->sk_write_queue));
/* Account for returned memory. */
sk_stream_mem_reclaim(sk);
BUG_TRAP(!sk->sk_wmem_queued);
BUG_TRAP(!sk->sk_forward_alloc);
/* It is _impossible_ for the backlog to contain anything
* when we get here. All user references to this socket
* have gone away, only the net layer knows can touch it.
*/
}
EXPORT_SYMBOL(sk_stream_kill_queues);
...@@ -65,8 +65,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) ...@@ -65,8 +65,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
socket_seq_show(seq); socket_seq_show(seq);
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
tcp_tw_count, atomic_read(&tcp_sockets_allocated), tcp_tw_count, atomic_read(&tcp_prot.sockets_allocated),
atomic_read(&tcp_memory_allocated)); atomic_read(&tcp_prot.memory_allocated));
seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues, seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues,
......
...@@ -508,24 +508,24 @@ ctl_table ipv4_table[] = { ...@@ -508,24 +508,24 @@ ctl_table ipv4_table[] = {
{ {
.ctl_name = NET_TCP_MEM, .ctl_name = NET_TCP_MEM,
.procname = "tcp_mem", .procname = "tcp_mem",
.data = &sysctl_tcp_mem, .data = &tcp_prot.sysctl_mem,
.maxlen = sizeof(sysctl_tcp_mem), .maxlen = sizeof(tcp_prot.sysctl_mem),
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec .proc_handler = &proc_dointvec
}, },
{ {
.ctl_name = NET_TCP_WMEM, .ctl_name = NET_TCP_WMEM,
.procname = "tcp_wmem", .procname = "tcp_wmem",
.data = &sysctl_tcp_wmem, .data = &tcp_prot.sysctl_wmem,
.maxlen = sizeof(sysctl_tcp_wmem), .maxlen = sizeof(tcp_prot.sysctl_wmem),
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec .proc_handler = &proc_dointvec
}, },
{ {
.ctl_name = NET_TCP_RMEM, .ctl_name = NET_TCP_RMEM,
.procname = "tcp_rmem", .procname = "tcp_rmem",
.data = &sysctl_tcp_rmem, .data = &tcp_prot.sysctl_rmem,
.maxlen = sizeof(sysctl_tcp_rmem), .maxlen = sizeof(tcp_prot.sysctl_rmem),
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec .proc_handler = &proc_dointvec
}, },
......
...@@ -278,85 +278,11 @@ atomic_t tcp_orphan_count = ATOMIC_INIT(0); ...@@ -278,85 +278,11 @@ atomic_t tcp_orphan_count = ATOMIC_INIT(0);
int sysctl_tcp_default_win_scale; int sysctl_tcp_default_win_scale;
int sysctl_tcp_mem[3]; void tcp_enter_memory_pressure(void)
int sysctl_tcp_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
int sysctl_tcp_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
atomic_t tcp_memory_allocated; /* Current allocated memory. */
atomic_t tcp_sockets_allocated; /* Current number of TCP sockets. */
/* Pressure flag: try to collapse.
* Technical note: it is used by multiple contexts non atomically.
* All the tcp_mem_schedule() is of this nature: accounting
* is strict, actions are advisory and have some latency. */
int tcp_memory_pressure;
#define TCP_PAGES(amt) (((amt) + TCP_MEM_QUANTUM - 1) / TCP_MEM_QUANTUM)
int tcp_mem_schedule(struct sock *sk, int size, int kind)
{
int amt = TCP_PAGES(size);
sk->sk_forward_alloc += amt * TCP_MEM_QUANTUM;
atomic_add(amt, &tcp_memory_allocated);
/* Under limit. */
if (atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
if (tcp_memory_pressure)
tcp_memory_pressure = 0;
return 1;
}
/* Over hard limit. */
if (atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) {
tcp_enter_memory_pressure();
goto suppress_allocation;
}
/* Under pressure. */
if (atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[1])
tcp_enter_memory_pressure();
if (kind) {
if (atomic_read(&sk->sk_rmem_alloc) < sysctl_tcp_rmem[0])
return 1;
} else if (sk->sk_wmem_queued < sysctl_tcp_wmem[0])
return 1;
if (!tcp_memory_pressure ||
sysctl_tcp_mem[2] > atomic_read(&tcp_sockets_allocated) *
TCP_PAGES(sk->sk_wmem_queued +
atomic_read(&sk->sk_rmem_alloc) +
sk->sk_forward_alloc))
return 1;
suppress_allocation:
if (!kind) {
sk_stream_moderate_sndbuf(sk);
/* Fail only if socket is _under_ its sndbuf.
* In this case we cannot block, so that we have to fail.
*/
if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
return 1;
}
/* Alas. Undo changes. */
sk->sk_forward_alloc -= amt * TCP_MEM_QUANTUM;
atomic_sub(amt, &tcp_memory_allocated);
return 0;
}
void __tcp_mem_reclaim(struct sock *sk)
{ {
if (sk->sk_forward_alloc >= TCP_MEM_QUANTUM) { if (!tcp_prot.memory_pressure) {
atomic_sub(sk->sk_forward_alloc / TCP_MEM_QUANTUM, NET_INC_STATS(TCPMemoryPressures);
&tcp_memory_allocated); tcp_prot.memory_pressure = 1;
sk->sk_forward_alloc &= TCP_MEM_QUANTUM - 1;
if (tcp_memory_pressure &&
atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0])
tcp_memory_pressure = 0;
} }
} }
...@@ -1624,29 +1550,6 @@ void tcp_shutdown(struct sock *sk, int how) ...@@ -1624,29 +1550,6 @@ void tcp_shutdown(struct sock *sk, int how)
} }
} }
static __inline__ void tcp_kill_sk_queues(struct sock *sk)
{
/* First the read buffer. */
__skb_queue_purge(&sk->sk_receive_queue);
/* Next, the error queue. */
__skb_queue_purge(&sk->sk_error_queue);
/* Next, the write queue. */
BUG_TRAP(skb_queue_empty(&sk->sk_write_queue));
/* Account for returned memory. */
tcp_mem_reclaim(sk);
BUG_TRAP(!sk->sk_wmem_queued);
BUG_TRAP(!sk->sk_forward_alloc);
/* It is _impossible_ for the backlog to contain anything
* when we get here. All user references to this socket
* have gone away, only the net layer knows can touch it.
*/
}
/* /*
* At this point, there should be no process reference to this * At this point, there should be no process reference to this
* socket, and thus no user references at all. Therefore we * socket, and thus no user references at all. Therefore we
...@@ -1674,7 +1577,7 @@ void tcp_destroy_sock(struct sock *sk) ...@@ -1674,7 +1577,7 @@ void tcp_destroy_sock(struct sock *sk)
sk->sk_prot->destroy(sk); sk->sk_prot->destroy(sk);
tcp_kill_sk_queues(sk); sk_stream_kill_queues(sk);
xfrm_sk_free_policy(sk); xfrm_sk_free_policy(sk);
...@@ -1717,7 +1620,7 @@ void tcp_close(struct sock *sk, long timeout) ...@@ -1717,7 +1620,7 @@ void tcp_close(struct sock *sk, long timeout)
__kfree_skb(skb); __kfree_skb(skb);
} }
tcp_mem_reclaim(sk); sk_stream_mem_reclaim(sk);
/* As outlined in draft-ietf-tcpimpl-prob-03.txt, section /* As outlined in draft-ietf-tcpimpl-prob-03.txt, section
* 3.10, we send a RST here because data was lost. To * 3.10, we send a RST here because data was lost. To
...@@ -1816,10 +1719,10 @@ void tcp_close(struct sock *sk, long timeout) ...@@ -1816,10 +1719,10 @@ void tcp_close(struct sock *sk, long timeout)
} }
} }
if (sk->sk_state != TCP_CLOSE) { if (sk->sk_state != TCP_CLOSE) {
tcp_mem_reclaim(sk); sk_stream_mem_reclaim(sk);
if (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans || if (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans ||
(sk->sk_wmem_queued > SOCK_MIN_SNDBUF && (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) { atomic_read(&tcp_prot.memory_allocated) > tcp_prot.sysctl_mem[2])) {
if (net_ratelimit()) if (net_ratelimit())
printk(KERN_INFO "TCP: too many of orphaned " printk(KERN_INFO "TCP: too many of orphaned "
"sockets\n"); "sockets\n");
...@@ -2366,15 +2269,15 @@ void __init tcp_init(void) ...@@ -2366,15 +2269,15 @@ void __init tcp_init(void)
} }
tcp_port_rover = sysctl_local_port_range[0] - 1; tcp_port_rover = sysctl_local_port_range[0] - 1;
sysctl_tcp_mem[0] = 768 << order; tcp_prot.sysctl_mem[0] = 768 << order;
sysctl_tcp_mem[1] = 1024 << order; tcp_prot.sysctl_mem[1] = 1024 << order;
sysctl_tcp_mem[2] = 1536 << order; tcp_prot.sysctl_mem[2] = 1536 << order;
if (order < 3) { if (order < 3) {
sysctl_tcp_wmem[2] = 64 * 1024; tcp_prot.sysctl_wmem[2] = 64 * 1024;
sysctl_tcp_rmem[0] = PAGE_SIZE; tcp_prot.sysctl_rmem[0] = PAGE_SIZE;
sysctl_tcp_rmem[1] = 43689; tcp_prot.sysctl_rmem[1] = 43689;
sysctl_tcp_rmem[2] = 2 * 43689; tcp_prot.sysctl_rmem[2] = 2 * 43689;
} }
printk(KERN_INFO "TCP: Hash tables configured " printk(KERN_INFO "TCP: Hash tables configured "
...@@ -2384,9 +2287,6 @@ void __init tcp_init(void) ...@@ -2384,9 +2287,6 @@ void __init tcp_init(void)
tcpdiag_init(); tcpdiag_init();
} }
EXPORT_SYMBOL(__tcp_mem_reclaim);
EXPORT_SYMBOL(sysctl_tcp_rmem);
EXPORT_SYMBOL(sysctl_tcp_wmem);
EXPORT_SYMBOL(tcp_accept); EXPORT_SYMBOL(tcp_accept);
EXPORT_SYMBOL(tcp_close); EXPORT_SYMBOL(tcp_close);
EXPORT_SYMBOL(tcp_close_state); EXPORT_SYMBOL(tcp_close_state);
...@@ -2402,6 +2302,5 @@ EXPORT_SYMBOL(tcp_sendmsg); ...@@ -2402,6 +2302,5 @@ EXPORT_SYMBOL(tcp_sendmsg);
EXPORT_SYMBOL(tcp_sendpage); EXPORT_SYMBOL(tcp_sendpage);
EXPORT_SYMBOL(tcp_setsockopt); EXPORT_SYMBOL(tcp_setsockopt);
EXPORT_SYMBOL(tcp_shutdown); EXPORT_SYMBOL(tcp_shutdown);
EXPORT_SYMBOL(tcp_sockets_allocated);
EXPORT_SYMBOL(tcp_statistics); EXPORT_SYMBOL(tcp_statistics);
EXPORT_SYMBOL(tcp_timewait_cachep); EXPORT_SYMBOL(tcp_timewait_cachep);
...@@ -207,7 +207,7 @@ static void tcp_fixup_sndbuf(struct sock *sk) ...@@ -207,7 +207,7 @@ static void tcp_fixup_sndbuf(struct sock *sk)
sizeof(struct sk_buff); sizeof(struct sk_buff);
if (sk->sk_sndbuf < 3 * sndmem) if (sk->sk_sndbuf < 3 * sndmem)
sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]); sk->sk_sndbuf = min(3 * sndmem, tcp_prot.sysctl_wmem[2]);
} }
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh) /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
...@@ -259,7 +259,7 @@ tcp_grow_window(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb) ...@@ -259,7 +259,7 @@ tcp_grow_window(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb)
/* Check #1 */ /* Check #1 */
if (tp->rcv_ssthresh < tp->window_clamp && if (tp->rcv_ssthresh < tp->window_clamp &&
(int)tp->rcv_ssthresh < tcp_space(sk) && (int)tp->rcv_ssthresh < tcp_space(sk) &&
!tcp_memory_pressure) { !tcp_prot.memory_pressure) {
int incr; int incr;
/* Check #2. Increase window, if skb with such overhead /* Check #2. Increase window, if skb with such overhead
...@@ -291,7 +291,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) ...@@ -291,7 +291,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
while (tcp_win_from_space(rcvmem) < tp->advmss) while (tcp_win_from_space(rcvmem) < tp->advmss)
rcvmem += 128; rcvmem += 128;
if (sk->sk_rcvbuf < 4 * rcvmem) if (sk->sk_rcvbuf < 4 * rcvmem)
sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]); sk->sk_rcvbuf = min(4 * rcvmem, tcp_prot.sysctl_rmem[2]);
} }
/* 4. Try to fixup all. It is made iimediately after connection enters /* 4. Try to fixup all. It is made iimediately after connection enters
...@@ -347,12 +347,12 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_opt *tp) ...@@ -347,12 +347,12 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_opt *tp)
* do not clamp window. Try to expand rcvbuf instead. * do not clamp window. Try to expand rcvbuf instead.
*/ */
if (ofo_win) { if (ofo_win) {
if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && if (sk->sk_rcvbuf < tcp_prot.sysctl_rmem[2] &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_memory_pressure && !tcp_prot.memory_pressure &&
atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) atomic_read(&tcp_prot.memory_allocated) < tcp_prot.sysctl_mem[0])
sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
sysctl_tcp_rmem[2]); tcp_prot.sysctl_rmem[2]);
} }
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) { if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
app_win += ofo_win; app_win += ofo_win;
...@@ -473,7 +473,7 @@ void tcp_rcv_space_adjust(struct sock *sk) ...@@ -473,7 +473,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
rcvmem = (tp->advmss + MAX_TCP_HEADER + rcvmem = (tp->advmss + MAX_TCP_HEADER +
16 + sizeof(struct sk_buff)); 16 + sizeof(struct sk_buff));
space *= rcvmem; space *= rcvmem;
space = min(space, sysctl_tcp_rmem[2]); space = min(space, tcp_prot.sysctl_rmem[2]);
if (space > sk->sk_rcvbuf) if (space > sk->sk_rcvbuf)
sk->sk_rcvbuf = space; sk->sk_rcvbuf = space;
} }
...@@ -527,7 +527,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_opt *tp, struct sk_b ...@@ -527,7 +527,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_opt *tp, struct sk_b
* restart window, so that we send ACKs quickly. * restart window, so that we send ACKs quickly.
*/ */
tcp_incr_quickack(tp); tcp_incr_quickack(tp);
tcp_mem_reclaim(sk); sk_stream_mem_reclaim(sk);
} }
} }
tp->ack.lrcvtime = now; tp->ack.lrcvtime = now;
...@@ -3158,7 +3158,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) ...@@ -3158,7 +3158,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
__skb_queue_purge(&tp->out_of_order_queue); __skb_queue_purge(&tp->out_of_order_queue);
if (tp->sack_ok) if (tp->sack_ok)
tcp_sack_reset(tp); tcp_sack_reset(tp);
tcp_mem_reclaim(sk); sk_stream_mem_reclaim(sk);
if (!sock_flag(sk, SOCK_DEAD)) { if (!sock_flag(sk, SOCK_DEAD)) {
sk->sk_state_change(sk); sk->sk_state_change(sk);
...@@ -3393,7 +3393,7 @@ static void tcp_ofo_queue(struct sock *sk) ...@@ -3393,7 +3393,7 @@ static void tcp_ofo_queue(struct sock *sk)
static inline int tcp_rmem_schedule(struct sock *sk, struct sk_buff *skb) static inline int tcp_rmem_schedule(struct sock *sk, struct sk_buff *skb)
{ {
return (int)skb->truesize <= sk->sk_forward_alloc || return (int)skb->truesize <= sk->sk_forward_alloc ||
tcp_mem_schedule(sk, skb->truesize, 1); sk_stream_mem_schedule(sk, skb->truesize, 1);
} }
static int tcp_prune_queue(struct sock *sk); static int tcp_prune_queue(struct sock *sk);
...@@ -3760,14 +3760,14 @@ static int tcp_prune_queue(struct sock *sk) ...@@ -3760,14 +3760,14 @@ static int tcp_prune_queue(struct sock *sk)
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
tcp_clamp_window(sk, tp); tcp_clamp_window(sk, tp);
else if (tcp_memory_pressure) else if (tcp_prot.memory_pressure)
tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
tcp_collapse_ofo_queue(sk); tcp_collapse_ofo_queue(sk);
tcp_collapse(sk, sk->sk_receive_queue.next, tcp_collapse(sk, sk->sk_receive_queue.next,
(struct sk_buff*)&sk->sk_receive_queue, (struct sk_buff*)&sk->sk_receive_queue,
tp->copied_seq, tp->rcv_nxt); tp->copied_seq, tp->rcv_nxt);
tcp_mem_reclaim(sk); sk_stream_mem_reclaim(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
return 0; return 0;
...@@ -3788,7 +3788,7 @@ static int tcp_prune_queue(struct sock *sk) ...@@ -3788,7 +3788,7 @@ static int tcp_prune_queue(struct sock *sk)
*/ */
if (tp->sack_ok) if (tp->sack_ok)
tcp_sack_reset(tp); tcp_sack_reset(tp);
tcp_mem_reclaim(sk); sk_stream_mem_reclaim(sk);
} }
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
...@@ -3840,15 +3840,15 @@ static void tcp_new_space(struct sock *sk) ...@@ -3840,15 +3840,15 @@ static void tcp_new_space(struct sock *sk)
if (tp->packets_out < tp->snd_cwnd && if (tp->packets_out < tp->snd_cwnd &&
!(sk->sk_userlocks & SOCK_SNDBUF_LOCK) && !(sk->sk_userlocks & SOCK_SNDBUF_LOCK) &&
!tcp_memory_pressure && !tcp_prot.memory_pressure &&
atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { atomic_read(&tcp_prot.memory_allocated) < tcp_prot.sysctl_mem[0]) {
int sndmem = max_t(u32, tp->mss_clamp, tp->mss_cache) + int sndmem = max_t(u32, tp->mss_clamp, tp->mss_cache) +
MAX_TCP_HEADER + 16 + sizeof(struct sk_buff), MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
demanded = max_t(unsigned int, tp->snd_cwnd, demanded = max_t(unsigned int, tp->snd_cwnd,
tp->reordering + 1); tp->reordering + 1);
sndmem *= 2*demanded; sndmem *= 2*demanded;
if (sndmem > sk->sk_sndbuf) if (sndmem > sk->sk_sndbuf)
sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); sk->sk_sndbuf = min(sndmem, tcp_prot.sysctl_wmem[2]);
tp->snd_cwnd_stamp = tcp_time_stamp; tp->snd_cwnd_stamp = tcp_time_stamp;
} }
......
...@@ -2086,10 +2086,10 @@ static int tcp_v4_init_sock(struct sock *sk) ...@@ -2086,10 +2086,10 @@ static int tcp_v4_init_sock(struct sock *sk)
tp->af_specific = &ipv4_specific; tp->af_specific = &ipv4_specific;
sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_sndbuf = tcp_prot.sysctl_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1]; sk->sk_rcvbuf = tcp_prot.sysctl_rmem[1];
atomic_inc(&tcp_sockets_allocated); atomic_inc(&tcp_prot.sockets_allocated);
return 0; return 0;
} }
...@@ -2113,7 +2113,7 @@ static int tcp_v4_destroy_sock(struct sock *sk) ...@@ -2113,7 +2113,7 @@ static int tcp_v4_destroy_sock(struct sock *sk)
if (tp->bind_hash) if (tp->bind_hash)
tcp_put_port(sk); tcp_put_port(sk);
atomic_dec(&tcp_sockets_allocated); atomic_dec(&tcp_prot.sockets_allocated);
return 0; return 0;
} }
...@@ -2582,23 +2582,26 @@ void tcp4_proc_exit(void) ...@@ -2582,23 +2582,26 @@ void tcp4_proc_exit(void)
#endif /* CONFIG_PROC_FS */ #endif /* CONFIG_PROC_FS */
struct proto tcp_prot = { struct proto tcp_prot = {
.name = "TCP", .name = "TCP",
.close = tcp_close, .close = tcp_close,
.connect = tcp_v4_connect, .connect = tcp_v4_connect,
.disconnect = tcp_disconnect, .disconnect = tcp_disconnect,
.accept = tcp_accept, .accept = tcp_accept,
.ioctl = tcp_ioctl, .ioctl = tcp_ioctl,
.init = tcp_v4_init_sock, .init = tcp_v4_init_sock,
.destroy = tcp_v4_destroy_sock, .destroy = tcp_v4_destroy_sock,
.shutdown = tcp_shutdown, .shutdown = tcp_shutdown,
.setsockopt = tcp_setsockopt, .setsockopt = tcp_setsockopt,
.getsockopt = tcp_getsockopt, .getsockopt = tcp_getsockopt,
.sendmsg = tcp_sendmsg, .sendmsg = tcp_sendmsg,
.recvmsg = tcp_recvmsg, .recvmsg = tcp_recvmsg,
.backlog_rcv = tcp_v4_do_rcv, .backlog_rcv = tcp_v4_do_rcv,
.hash = tcp_v4_hash, .hash = tcp_v4_hash,
.unhash = tcp_unhash, .unhash = tcp_unhash,
.get_port = tcp_v4_get_port, .get_port = tcp_v4_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
.sysctl_wmem = { 4 * 1024, 16 * 1024, 128 * 1024 },
.sysctl_rmem = { 4 * 1024, 87380, 87380 * 2 },
}; };
......
...@@ -801,7 +801,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, ...@@ -801,7 +801,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
#ifdef INET_REFCNT_DEBUG #ifdef INET_REFCNT_DEBUG
atomic_inc(&inet_sock_nr); atomic_inc(&inet_sock_nr);
#endif #endif
atomic_inc(&tcp_sockets_allocated); atomic_inc(&tcp_prot.sockets_allocated);
if (sock_flag(newsk, SOCK_KEEPOPEN)) if (sock_flag(newsk, SOCK_KEEPOPEN))
tcp_reset_keepalive_timer(newsk, tcp_reset_keepalive_timer(newsk,
......
...@@ -672,7 +672,7 @@ u32 __tcp_select_window(struct sock *sk) ...@@ -672,7 +672,7 @@ u32 __tcp_select_window(struct sock *sk)
if (free_space < full_space/2) { if (free_space < full_space/2) {
tp->ack.quick = 0; tp->ack.quick = 0;
if (tcp_memory_pressure) if (tcp_prot.memory_pressure)
tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss); tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss);
if (free_space < mss) if (free_space < mss)
......
...@@ -113,7 +113,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset) ...@@ -113,7 +113,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset)
if (orphans >= sysctl_tcp_max_orphans || if (orphans >= sysctl_tcp_max_orphans ||
(sk->sk_wmem_queued > SOCK_MIN_SNDBUF && (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) { atomic_read(&tcp_prot.memory_allocated) > tcp_prot.sysctl_mem[2])) {
if (net_ratelimit()) if (net_ratelimit())
printk(KERN_INFO "Out of socket memory\n"); printk(KERN_INFO "Out of socket memory\n");
...@@ -217,7 +217,7 @@ static void tcp_delack_timer(unsigned long data) ...@@ -217,7 +217,7 @@ static void tcp_delack_timer(unsigned long data)
goto out_unlock; goto out_unlock;
} }
tcp_mem_reclaim(sk); sk_stream_mem_reclaim(sk);
if (sk->sk_state == TCP_CLOSE || !(tp->ack.pending & TCP_ACK_TIMER)) if (sk->sk_state == TCP_CLOSE || !(tp->ack.pending & TCP_ACK_TIMER))
goto out; goto out;
...@@ -257,8 +257,8 @@ static void tcp_delack_timer(unsigned long data) ...@@ -257,8 +257,8 @@ static void tcp_delack_timer(unsigned long data)
TCP_CHECK_TIMER(sk); TCP_CHECK_TIMER(sk);
out: out:
if (tcp_memory_pressure) if (tcp_prot.memory_pressure)
tcp_mem_reclaim(sk); sk_stream_mem_reclaim(sk);
out_unlock: out_unlock:
bh_unlock_sock(sk); bh_unlock_sock(sk);
sock_put(sk); sock_put(sk);
...@@ -448,7 +448,7 @@ static void tcp_write_timer(unsigned long data) ...@@ -448,7 +448,7 @@ static void tcp_write_timer(unsigned long data)
TCP_CHECK_TIMER(sk); TCP_CHECK_TIMER(sk);
out: out:
tcp_mem_reclaim(sk); sk_stream_mem_reclaim(sk);
out_unlock: out_unlock:
bh_unlock_sock(sk); bh_unlock_sock(sk);
sock_put(sk); sock_put(sk);
...@@ -633,7 +633,7 @@ static void tcp_keepalive_timer (unsigned long data) ...@@ -633,7 +633,7 @@ static void tcp_keepalive_timer (unsigned long data)
} }
TCP_CHECK_TIMER(sk); TCP_CHECK_TIMER(sk);
tcp_mem_reclaim(sk); sk_stream_mem_reclaim(sk);
resched: resched:
tcp_reset_keepalive_timer (sk, elapsed); tcp_reset_keepalive_timer (sk, elapsed);
......
...@@ -1883,10 +1883,10 @@ static int tcp_v6_init_sock(struct sock *sk) ...@@ -1883,10 +1883,10 @@ static int tcp_v6_init_sock(struct sock *sk)
sk->sk_write_space = sk_stream_write_space; sk->sk_write_space = sk_stream_write_space;
sk->sk_use_write_queue = 1; sk->sk_use_write_queue = 1;
sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_sndbuf = tcp_prot.sysctl_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1]; sk->sk_rcvbuf = tcp_prot.sysctl_rmem[1];
atomic_inc(&tcp_sockets_allocated); atomic_inc(&tcp_prot.sockets_allocated);
return 0; return 0;
} }
...@@ -1910,7 +1910,7 @@ static int tcp_v6_destroy_sock(struct sock *sk) ...@@ -1910,7 +1910,7 @@ static int tcp_v6_destroy_sock(struct sock *sk)
if (tcp_sk(sk)->bind_hash) if (tcp_sk(sk)->bind_hash)
tcp_put_port(sk); tcp_put_port(sk);
atomic_dec(&tcp_sockets_allocated); atomic_dec(&tcp_prot.sockets_allocated);
return inet6_destroy_sock(sk); return inet6_destroy_sock(sk);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment