Commit 334cdf67 authored by David S. Miller's avatar David S. Miller

Merge bk://kernel.bkbits.net/acme/net-2.6

into nuts.davemloft.net:/disk1/BK/acme-2.6
parents f4897eb3 3ac2a2d4
...@@ -129,8 +129,6 @@ struct inet_opt { ...@@ -129,8 +129,6 @@ struct inet_opt {
int mc_index; /* Multicast device index */ int mc_index; /* Multicast device index */
__u32 mc_addr; __u32 mc_addr;
struct ip_mc_socklist *mc_list; /* Group array */ struct ip_mc_socklist *mc_list; /* Group array */
struct page *sndmsg_page; /* Cached page for sendmsg */
u32 sndmsg_off; /* Cached offset for sendmsg */
/* /*
* Following members are used to retain the infomation to build * Following members are used to retain the infomation to build
* an ip header on each ip fragmentation while the socket is corked. * an ip header on each ip fragmentation while the socket is corked.
......
...@@ -664,13 +664,15 @@ static inline int skb_pagelen(const struct sk_buff *skb) ...@@ -664,13 +664,15 @@ static inline int skb_pagelen(const struct sk_buff *skb)
return len + skb_headlen(skb); return len + skb_headlen(skb);
} }
static inline void skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size) static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
struct page *page, int off, int size)
{ {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
frag->page = page; frag->page = page;
frag->page_offset = off; frag->page_offset = off;
frag->size = size; frag->size = size;
skb_shinfo(skb)->nr_frags = i+1; skb_shinfo(skb)->nr_frags = i + 1;
} }
#define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags) #define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags)
......
...@@ -34,8 +34,7 @@ ...@@ -34,8 +34,7 @@
/* This is used to register protocols. */ /* This is used to register protocols. */
struct inet_protocol struct net_protocol {
{
int (*handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb);
void (*err_handler)(struct sk_buff *skb, u32 info); void (*err_handler)(struct sk_buff *skb, u32 info);
int no_policy; int no_policy;
...@@ -78,15 +77,15 @@ struct inet_protosw { ...@@ -78,15 +77,15 @@ struct inet_protosw {
#define INET_PROTOSW_REUSE 0x01 /* Are ports automatically reusable? */ #define INET_PROTOSW_REUSE 0x01 /* Are ports automatically reusable? */
#define INET_PROTOSW_PERMANENT 0x02 /* Permanent protocols are unremovable. */ #define INET_PROTOSW_PERMANENT 0x02 /* Permanent protocols are unremovable. */
extern struct inet_protocol *inet_protocol_base; extern struct net_protocol *inet_protocol_base;
extern struct inet_protocol *inet_protos[MAX_INET_PROTOS]; extern struct net_protocol *inet_protos[MAX_INET_PROTOS];
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
extern struct inet6_protocol *inet6_protos[MAX_INET_PROTOS]; extern struct inet6_protocol *inet6_protos[MAX_INET_PROTOS];
#endif #endif
extern int inet_add_protocol(struct inet_protocol *prot, unsigned char num); extern int inet_add_protocol(struct net_protocol *prot, unsigned char num);
extern int inet_del_protocol(struct inet_protocol *prot, unsigned char num); extern int inet_del_protocol(struct net_protocol *prot, unsigned char num);
extern void inet_register_protosw(struct inet_protosw *p); extern void inet_register_protosw(struct inet_protosw *p);
extern void inet_unregister_protosw(struct inet_protosw *p); extern void inet_unregister_protosw(struct inet_protosw *p);
......
...@@ -167,6 +167,8 @@ struct sock_common { ...@@ -167,6 +167,8 @@ struct sock_common {
* @sk_socket - Identd and reporting IO signals * @sk_socket - Identd and reporting IO signals
* @sk_user_data - RPC layer private data * @sk_user_data - RPC layer private data
* @sk_owner - module that owns this socket * @sk_owner - module that owns this socket
* @sk_sndmsg_page - cached page for sendmsg
* @sk_sndmsg_off - cached offset for sendmsg
* @sk_send_head - front of stuff to transmit * @sk_send_head - front of stuff to transmit
* @sk_write_pending - a write to stream socket waits to start * @sk_write_pending - a write to stream socket waits to start
* @sk_queue_shrunk - write queue has been shrunk recently * @sk_queue_shrunk - write queue has been shrunk recently
...@@ -249,8 +251,10 @@ struct sock { ...@@ -249,8 +251,10 @@ struct sock {
struct timeval sk_stamp; struct timeval sk_stamp;
struct socket *sk_socket; struct socket *sk_socket;
void *sk_user_data; void *sk_user_data;
struct sk_buff *sk_send_head;
struct module *sk_owner; struct module *sk_owner;
struct page *sk_sndmsg_page;
__u32 sk_sndmsg_off;
struct sk_buff *sk_send_head;
int sk_write_pending; int sk_write_pending;
void *sk_security; void *sk_security;
__u8 sk_queue_shrunk; __u8 sk_queue_shrunk;
...@@ -487,10 +491,11 @@ extern int sk_stream_wait_connect(struct sock *sk, long *timeo_p); ...@@ -487,10 +491,11 @@ extern int sk_stream_wait_connect(struct sock *sk, long *timeo_p);
extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p); extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p);
extern void sk_stream_wait_close(struct sock *sk, long timeo_p); extern void sk_stream_wait_close(struct sock *sk, long timeo_p);
extern int sk_stream_error(struct sock *sk, int flags, int err); extern int sk_stream_error(struct sock *sk, int flags, int err);
extern void sk_stream_kill_queues(struct sock *sk);
extern int sk_wait_data(struct sock *sk, long *timeo); extern int sk_wait_data(struct sock *sk, long *timeo);
/* IP protocol blocks we attach to sockets. /* Networking protocol blocks we attach to sockets.
* socket layer -> transport layer interface * socket layer -> transport layer interface
* transport -> network interface is defined by struct inet_proto * transport -> network interface is defined by struct inet_proto
*/ */
...@@ -534,6 +539,21 @@ struct proto { ...@@ -534,6 +539,21 @@ struct proto {
void (*unhash)(struct sock *sk); void (*unhash)(struct sock *sk);
int (*get_port)(struct sock *sk, unsigned short snum); int (*get_port)(struct sock *sk, unsigned short snum);
/* Memory pressure */
void (*enter_memory_pressure)(void);
atomic_t memory_allocated; /* Current allocated memory. */
atomic_t sockets_allocated; /* Current number of sockets. */
/*
* Pressure flag: try to collapse.
* Technical note: it is used by multiple contexts non atomically.
* All the sk_stream_mem_schedule() is of this nature: accounting
* is strict, actions are advisory and have some latency.
*/
int memory_pressure;
int sysctl_mem[3];
int sysctl_wmem[3];
int sysctl_rmem[3];
char name[32]; char name[32];
struct { struct {
...@@ -624,6 +644,22 @@ static inline struct inode *SOCK_INODE(struct socket *socket) ...@@ -624,6 +644,22 @@ static inline struct inode *SOCK_INODE(struct socket *socket)
return &container_of(socket, struct socket_alloc, socket)->vfs_inode; return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
} }
extern void __sk_stream_mem_reclaim(struct sock *sk);
extern int sk_stream_mem_schedule(struct sock *sk, int size, int kind);
#define SK_STREAM_MEM_QUANTUM ((int)PAGE_SIZE)
static inline int sk_stream_pages(int amt)
{
return (amt + SK_STREAM_MEM_QUANTUM - 1) / SK_STREAM_MEM_QUANTUM;
}
static inline void sk_stream_mem_reclaim(struct sock *sk)
{
if (sk->sk_forward_alloc >= SK_STREAM_MEM_QUANTUM)
__sk_stream_mem_reclaim(sk);
}
/* Used by processes to "lock" a socket state, so that /* Used by processes to "lock" a socket state, so that
* interrupts and bottom half handlers won't change it * interrupts and bottom half handlers won't change it
* from under us. It essentially blocks any incoming * from under us. It essentially blocks any incoming
......
...@@ -594,9 +594,6 @@ extern int sysctl_tcp_fack; ...@@ -594,9 +594,6 @@ extern int sysctl_tcp_fack;
extern int sysctl_tcp_reordering; extern int sysctl_tcp_reordering;
extern int sysctl_tcp_ecn; extern int sysctl_tcp_ecn;
extern int sysctl_tcp_dsack; extern int sysctl_tcp_dsack;
extern int sysctl_tcp_mem[3];
extern int sysctl_tcp_wmem[3];
extern int sysctl_tcp_rmem[3];
extern int sysctl_tcp_app_win; extern int sysctl_tcp_app_win;
extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_adv_win_scale;
extern int sysctl_tcp_tw_reuse; extern int sysctl_tcp_tw_reuse;
...@@ -614,10 +611,6 @@ extern int sysctl_tcp_bic_low_window; ...@@ -614,10 +611,6 @@ extern int sysctl_tcp_bic_low_window;
extern int sysctl_tcp_default_win_scale; extern int sysctl_tcp_default_win_scale;
extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_moderate_rcvbuf;
extern atomic_t tcp_memory_allocated;
extern atomic_t tcp_sockets_allocated;
extern int tcp_memory_pressure;
struct open_request; struct open_request;
struct or_calltable { struct or_calltable {
...@@ -1867,24 +1860,7 @@ static __inline__ void tcp_openreq_init(struct open_request *req, ...@@ -1867,24 +1860,7 @@ static __inline__ void tcp_openreq_init(struct open_request *req,
req->rmt_port = skb->h.th->source; req->rmt_port = skb->h.th->source;
} }
#define TCP_MEM_QUANTUM ((int)PAGE_SIZE) extern void tcp_enter_memory_pressure(void);
extern void __tcp_mem_reclaim(struct sock *sk);
extern int tcp_mem_schedule(struct sock *sk, int size, int kind);
static inline void tcp_mem_reclaim(struct sock *sk)
{
if (sk->sk_forward_alloc >= TCP_MEM_QUANTUM)
__tcp_mem_reclaim(sk);
}
static inline void tcp_enter_memory_pressure(void)
{
if (!tcp_memory_pressure) {
NET_INC_STATS(TCPMemoryPressures);
tcp_memory_pressure = 1;
}
}
static inline struct sk_buff *tcp_alloc_pskb(struct sock *sk, int size, int mem, int gfp) static inline struct sk_buff *tcp_alloc_pskb(struct sock *sk, int size, int mem, int gfp)
{ {
...@@ -1893,7 +1869,7 @@ static inline struct sk_buff *tcp_alloc_pskb(struct sock *sk, int size, int mem, ...@@ -1893,7 +1869,7 @@ static inline struct sk_buff *tcp_alloc_pskb(struct sock *sk, int size, int mem,
if (skb) { if (skb) {
skb->truesize += mem; skb->truesize += mem;
if (sk->sk_forward_alloc >= (int)skb->truesize || if (sk->sk_forward_alloc >= (int)skb->truesize ||
tcp_mem_schedule(sk, skb->truesize, 0)) { sk_stream_mem_schedule(sk, skb->truesize, 0)) {
skb_reserve(skb, MAX_TCP_HEADER); skb_reserve(skb, MAX_TCP_HEADER);
return skb; return skb;
} }
...@@ -1913,7 +1889,7 @@ static inline struct sk_buff *tcp_alloc_skb(struct sock *sk, int size, int gfp) ...@@ -1913,7 +1889,7 @@ static inline struct sk_buff *tcp_alloc_skb(struct sock *sk, int size, int gfp)
static inline struct page * tcp_alloc_page(struct sock *sk) static inline struct page * tcp_alloc_page(struct sock *sk)
{ {
if (sk->sk_forward_alloc >= (int)PAGE_SIZE || if (sk->sk_forward_alloc >= (int)PAGE_SIZE ||
tcp_mem_schedule(sk, PAGE_SIZE, 0)) { sk_stream_mem_schedule(sk, PAGE_SIZE, 0)) {
struct page *page = alloc_pages(sk->sk_allocation, 0); struct page *page = alloc_pages(sk->sk_allocation, 0);
if (page) if (page)
return page; return page;
...@@ -1929,7 +1905,7 @@ static inline void tcp_writequeue_purge(struct sock *sk) ...@@ -1929,7 +1905,7 @@ static inline void tcp_writequeue_purge(struct sock *sk)
while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL)
sk_stream_free_skb(sk, skb); sk_stream_free_skb(sk, skb);
tcp_mem_reclaim(sk); sk_stream_mem_reclaim(sk);
} }
extern void tcp_listen_wlock(void); extern void tcp_listen_wlock(void);
......
...@@ -650,6 +650,14 @@ void sk_free(struct sock *sk) ...@@ -650,6 +650,14 @@ void sk_free(struct sock *sk)
printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
__FUNCTION__, atomic_read(&sk->sk_omem_alloc)); __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
/*
* If sendmsg cached page exists, toss it.
*/
if (sk->sk_sndmsg_page) {
__free_page(sk->sk_sndmsg_page);
sk->sk_sndmsg_page = NULL;
}
security_sk_free(sk); security_sk_free(sk);
kmem_cache_free(sk->sk_slab, sk); kmem_cache_free(sk->sk_slab, sk);
module_put(owner); module_put(owner);
...@@ -1175,6 +1183,9 @@ void sock_init_data(struct socket *sock, struct sock *sk) ...@@ -1175,6 +1183,9 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_error_report = sock_def_error_report; sk->sk_error_report = sock_def_error_report;
sk->sk_destruct = sock_def_destruct; sk->sk_destruct = sock_def_destruct;
sk->sk_sndmsg_page = NULL;
sk->sk_sndmsg_off = 0;
sk->sk_peercred.pid = 0; sk->sk_peercred.pid = 0;
sk->sk_peercred.uid = -1; sk->sk_peercred.uid = -1;
sk->sk_peercred.gid = -1; sk->sk_peercred.gid = -1;
......
...@@ -188,3 +188,100 @@ int sk_stream_error(struct sock *sk, int flags, int err) ...@@ -188,3 +188,100 @@ int sk_stream_error(struct sock *sk, int flags, int err)
} }
EXPORT_SYMBOL(sk_stream_error); EXPORT_SYMBOL(sk_stream_error);
void __sk_stream_mem_reclaim(struct sock *sk)
{
if (sk->sk_forward_alloc >= SK_STREAM_MEM_QUANTUM) {
atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM,
&sk->sk_prot->memory_allocated);
sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1;
if (sk->sk_prot->memory_pressure &&
(atomic_read(&sk->sk_prot->memory_allocated) <
sk->sk_prot->sysctl_mem[0]))
sk->sk_prot->memory_pressure = 0;
}
}
EXPORT_SYMBOL(__sk_stream_mem_reclaim);
int sk_stream_mem_schedule(struct sock *sk, int size, int kind)
{
int amt = sk_stream_pages(size);
sk->sk_forward_alloc += amt * SK_STREAM_MEM_QUANTUM;
atomic_add(amt, &sk->sk_prot->memory_allocated);
/* Under limit. */
if (atomic_read(&sk->sk_prot->memory_allocated) < sk->sk_prot->sysctl_mem[0]) {
if (sk->sk_prot->memory_pressure)
sk->sk_prot->memory_pressure = 0;
return 1;
}
/* Over hard limit. */
if (atomic_read(&sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[2]) {
sk->sk_prot->enter_memory_pressure();
goto suppress_allocation;
}
/* Under pressure. */
if (atomic_read(&sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[1])
sk->sk_prot->enter_memory_pressure();
if (kind) {
if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_prot->sysctl_rmem[0])
return 1;
} else if (sk->sk_wmem_queued < sk->sk_prot->sysctl_wmem[0])
return 1;
if (!sk->sk_prot->memory_pressure ||
sk->sk_prot->sysctl_mem[2] > atomic_read(&sk->sk_prot->sockets_allocated) *
sk_stream_pages(sk->sk_wmem_queued +
atomic_read(&sk->sk_rmem_alloc) +
sk->sk_forward_alloc))
return 1;
suppress_allocation:
if (!kind) {
sk_stream_moderate_sndbuf(sk);
/* Fail only if socket is _under_ its sndbuf.
* In this case we cannot block, so that we have to fail.
*/
if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
return 1;
}
/* Alas. Undo changes. */
sk->sk_forward_alloc -= amt * SK_STREAM_MEM_QUANTUM;
atomic_sub(amt, &sk->sk_prot->memory_allocated);
return 0;
}
EXPORT_SYMBOL(sk_stream_mem_schedule);
void sk_stream_kill_queues(struct sock *sk)
{
/* First the read buffer. */
__skb_queue_purge(&sk->sk_receive_queue);
/* Next, the error queue. */
__skb_queue_purge(&sk->sk_error_queue);
/* Next, the write queue. */
BUG_TRAP(skb_queue_empty(&sk->sk_write_queue));
/* Account for returned memory. */
sk_stream_mem_reclaim(sk);
BUG_TRAP(!sk->sk_wmem_queued);
BUG_TRAP(!sk->sk_forward_alloc);
/* It is _impossible_ for the backlog to contain anything
* when we get here. All user references to this socket
* have gone away, only the net layer knows can touch it.
*/
}
EXPORT_SYMBOL(sk_stream_kill_queues);
...@@ -1041,24 +1041,24 @@ void inet_unregister_protosw(struct inet_protosw *p) ...@@ -1041,24 +1041,24 @@ void inet_unregister_protosw(struct inet_protosw *p)
} }
#ifdef CONFIG_IP_MULTICAST #ifdef CONFIG_IP_MULTICAST
static struct inet_protocol igmp_protocol = { static struct net_protocol igmp_protocol = {
.handler = igmp_rcv, .handler = igmp_rcv,
}; };
#endif #endif
static struct inet_protocol tcp_protocol = { static struct net_protocol tcp_protocol = {
.handler = tcp_v4_rcv, .handler = tcp_v4_rcv,
.err_handler = tcp_v4_err, .err_handler = tcp_v4_err,
.no_policy = 1, .no_policy = 1,
}; };
static struct inet_protocol udp_protocol = { static struct net_protocol udp_protocol = {
.handler = udp_rcv, .handler = udp_rcv,
.err_handler = udp_err, .err_handler = udp_err,
.no_policy = 1, .no_policy = 1,
}; };
static struct inet_protocol icmp_protocol = { static struct net_protocol icmp_protocol = {
.handler = icmp_rcv, .handler = icmp_rcv,
}; };
......
...@@ -343,7 +343,7 @@ static struct xfrm_type ah_type = ...@@ -343,7 +343,7 @@ static struct xfrm_type ah_type =
.output = ah_output .output = ah_output
}; };
static struct inet_protocol ah4_protocol = { static struct net_protocol ah4_protocol = {
.handler = xfrm4_rcv, .handler = xfrm4_rcv,
.err_handler = ah4_err, .err_handler = ah4_err,
.no_policy = 1, .no_policy = 1,
......
...@@ -595,7 +595,7 @@ static struct xfrm_type esp_type = ...@@ -595,7 +595,7 @@ static struct xfrm_type esp_type =
.output = esp_output .output = esp_output
}; };
static struct inet_protocol esp4_protocol = { static struct net_protocol esp4_protocol = {
.handler = xfrm4_rcv, .handler = xfrm4_rcv,
.err_handler = esp4_err, .err_handler = esp4_err,
.no_policy = 1, .no_policy = 1,
......
...@@ -592,7 +592,7 @@ static void icmp_unreach(struct sk_buff *skb) ...@@ -592,7 +592,7 @@ static void icmp_unreach(struct sk_buff *skb)
struct iphdr *iph; struct iphdr *iph;
struct icmphdr *icmph; struct icmphdr *icmph;
int hash, protocol; int hash, protocol;
struct inet_protocol *ipprot; struct net_protocol *ipprot;
struct sock *raw_sk; struct sock *raw_sk;
u32 info = 0; u32 info = 0;
......
...@@ -1228,7 +1228,7 @@ int __init ipgre_fb_tunnel_init(struct net_device *dev) ...@@ -1228,7 +1228,7 @@ int __init ipgre_fb_tunnel_init(struct net_device *dev)
} }
static struct inet_protocol ipgre_protocol = { static struct net_protocol ipgre_protocol = {
.handler = ipgre_rcv, .handler = ipgre_rcv,
.err_handler = ipgre_err, .err_handler = ipgre_err,
}; };
......
...@@ -219,7 +219,7 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb) ...@@ -219,7 +219,7 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
int protocol = skb->nh.iph->protocol; int protocol = skb->nh.iph->protocol;
int hash; int hash;
struct sock *raw_sk; struct sock *raw_sk;
struct inet_protocol *ipprot; struct net_protocol *ipprot;
resubmit: resubmit:
hash = protocol & (MAX_INET_PROTOS - 1); hash = protocol & (MAX_INET_PROTOS - 1);
......
...@@ -766,8 +766,8 @@ int ip_append_data(struct sock *sk, ...@@ -766,8 +766,8 @@ int ip_append_data(struct sock *sk,
inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst); inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst);
inet->cork.rt = rt; inet->cork.rt = rt;
inet->cork.length = 0; inet->cork.length = 0;
inet->sndmsg_page = NULL; sk->sk_sndmsg_page = NULL;
inet->sndmsg_off = 0; sk->sk_sndmsg_off = 0;
if ((exthdrlen = rt->u.dst.header_len) != 0) { if ((exthdrlen = rt->u.dst.header_len) != 0) {
length += exthdrlen; length += exthdrlen;
transhdrlen += exthdrlen; transhdrlen += exthdrlen;
...@@ -915,8 +915,8 @@ int ip_append_data(struct sock *sk, ...@@ -915,8 +915,8 @@ int ip_append_data(struct sock *sk,
} else { } else {
int i = skb_shinfo(skb)->nr_frags; int i = skb_shinfo(skb)->nr_frags;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
struct page *page = inet->sndmsg_page; struct page *page = sk->sk_sndmsg_page;
int off = inet->sndmsg_off; int off = sk->sk_sndmsg_off;
unsigned int left; unsigned int left;
if (page && (left = PAGE_SIZE - off) > 0) { if (page && (left = PAGE_SIZE - off) > 0) {
...@@ -928,7 +928,7 @@ int ip_append_data(struct sock *sk, ...@@ -928,7 +928,7 @@ int ip_append_data(struct sock *sk,
goto error; goto error;
} }
get_page(page); get_page(page);
skb_fill_page_desc(skb, i, page, inet->sndmsg_off, 0); skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i]; frag = &skb_shinfo(skb)->frags[i];
} }
} else if (i < MAX_SKB_FRAGS) { } else if (i < MAX_SKB_FRAGS) {
...@@ -939,8 +939,8 @@ int ip_append_data(struct sock *sk, ...@@ -939,8 +939,8 @@ int ip_append_data(struct sock *sk,
err = -ENOMEM; err = -ENOMEM;
goto error; goto error;
} }
inet->sndmsg_page = page; sk->sk_sndmsg_page = page;
inet->sndmsg_off = 0; sk->sk_sndmsg_off = 0;
skb_fill_page_desc(skb, i, page, 0, 0); skb_fill_page_desc(skb, i, page, 0, 0);
frag = &skb_shinfo(skb)->frags[i]; frag = &skb_shinfo(skb)->frags[i];
...@@ -954,7 +954,7 @@ int ip_append_data(struct sock *sk, ...@@ -954,7 +954,7 @@ int ip_append_data(struct sock *sk,
err = -EFAULT; err = -EFAULT;
goto error; goto error;
} }
inet->sndmsg_off += copy; sk->sk_sndmsg_off += copy;
frag->size += copy; frag->size += copy;
skb->len += copy; skb->len += copy;
skb->data_len += copy; skb->data_len += copy;
......
...@@ -409,7 +409,7 @@ static struct xfrm_type ipcomp_type = { ...@@ -409,7 +409,7 @@ static struct xfrm_type ipcomp_type = {
.output = ipcomp_output .output = ipcomp_output
}; };
static struct inet_protocol ipcomp4_protocol = { static struct net_protocol ipcomp4_protocol = {
.handler = xfrm4_rcv, .handler = xfrm4_rcv,
.err_handler = ipcomp4_err, .err_handler = ipcomp4_err,
.no_policy = 1, .no_policy = 1,
......
...@@ -109,7 +109,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local ...@@ -109,7 +109,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert); static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
static struct inet_protocol pim_protocol; static struct net_protocol pim_protocol;
static struct timer_list ipmr_expire_timer; static struct timer_list ipmr_expire_timer;
...@@ -1867,7 +1867,7 @@ static struct file_operations ipmr_mfc_fops = { ...@@ -1867,7 +1867,7 @@ static struct file_operations ipmr_mfc_fops = {
#endif #endif
#ifdef CONFIG_IP_PIMSM_V2 #ifdef CONFIG_IP_PIMSM_V2
static struct inet_protocol pim_protocol = { static struct net_protocol pim_protocol = {
.handler = pim_rcv, .handler = pim_rcv,
}; };
#endif #endif
......
...@@ -65,8 +65,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) ...@@ -65,8 +65,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
socket_seq_show(seq); socket_seq_show(seq);
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
tcp_tw_count, atomic_read(&tcp_sockets_allocated), tcp_tw_count, atomic_read(&tcp_prot.sockets_allocated),
atomic_read(&tcp_memory_allocated)); atomic_read(&tcp_prot.memory_allocated));
seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues, seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues,
......
...@@ -48,14 +48,14 @@ ...@@ -48,14 +48,14 @@
#include <net/ipip.h> #include <net/ipip.h>
#include <linux/igmp.h> #include <linux/igmp.h>
struct inet_protocol *inet_protos[MAX_INET_PROTOS]; struct net_protocol *inet_protos[MAX_INET_PROTOS];
static spinlock_t inet_proto_lock = SPIN_LOCK_UNLOCKED; static spinlock_t inet_proto_lock = SPIN_LOCK_UNLOCKED;
/* /*
* Add a protocol handler to the hash tables * Add a protocol handler to the hash tables
*/ */
int inet_add_protocol(struct inet_protocol *prot, unsigned char protocol) int inet_add_protocol(struct net_protocol *prot, unsigned char protocol)
{ {
int hash, ret; int hash, ret;
...@@ -77,7 +77,7 @@ int inet_add_protocol(struct inet_protocol *prot, unsigned char protocol) ...@@ -77,7 +77,7 @@ int inet_add_protocol(struct inet_protocol *prot, unsigned char protocol)
* Remove a protocol from the hash tables. * Remove a protocol from the hash tables.
*/ */
int inet_del_protocol(struct inet_protocol *prot, unsigned char protocol) int inet_del_protocol(struct net_protocol *prot, unsigned char protocol)
{ {
int hash, ret; int hash, ret;
......
...@@ -508,24 +508,24 @@ ctl_table ipv4_table[] = { ...@@ -508,24 +508,24 @@ ctl_table ipv4_table[] = {
{ {
.ctl_name = NET_TCP_MEM, .ctl_name = NET_TCP_MEM,
.procname = "tcp_mem", .procname = "tcp_mem",
.data = &sysctl_tcp_mem, .data = &tcp_prot.sysctl_mem,
.maxlen = sizeof(sysctl_tcp_mem), .maxlen = sizeof(tcp_prot.sysctl_mem),
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec .proc_handler = &proc_dointvec
}, },
{ {
.ctl_name = NET_TCP_WMEM, .ctl_name = NET_TCP_WMEM,
.procname = "tcp_wmem", .procname = "tcp_wmem",
.data = &sysctl_tcp_wmem, .data = &tcp_prot.sysctl_wmem,
.maxlen = sizeof(sysctl_tcp_wmem), .maxlen = sizeof(tcp_prot.sysctl_wmem),
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec .proc_handler = &proc_dointvec
}, },
{ {
.ctl_name = NET_TCP_RMEM, .ctl_name = NET_TCP_RMEM,
.procname = "tcp_rmem", .procname = "tcp_rmem",
.data = &sysctl_tcp_rmem, .data = &tcp_prot.sysctl_rmem,
.maxlen = sizeof(sysctl_tcp_rmem), .maxlen = sizeof(tcp_prot.sysctl_rmem),
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec .proc_handler = &proc_dointvec
}, },
......
...@@ -278,85 +278,11 @@ atomic_t tcp_orphan_count = ATOMIC_INIT(0); ...@@ -278,85 +278,11 @@ atomic_t tcp_orphan_count = ATOMIC_INIT(0);
int sysctl_tcp_default_win_scale = 7; int sysctl_tcp_default_win_scale = 7;
int sysctl_tcp_mem[3]; void tcp_enter_memory_pressure(void)
int sysctl_tcp_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
int sysctl_tcp_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
atomic_t tcp_memory_allocated; /* Current allocated memory. */
atomic_t tcp_sockets_allocated; /* Current number of TCP sockets. */
/* Pressure flag: try to collapse.
* Technical note: it is used by multiple contexts non atomically.
* All the tcp_mem_schedule() is of this nature: accounting
* is strict, actions are advisory and have some latency. */
int tcp_memory_pressure;
#define TCP_PAGES(amt) (((amt) + TCP_MEM_QUANTUM - 1) / TCP_MEM_QUANTUM)
int tcp_mem_schedule(struct sock *sk, int size, int kind)
{
int amt = TCP_PAGES(size);
sk->sk_forward_alloc += amt * TCP_MEM_QUANTUM;
atomic_add(amt, &tcp_memory_allocated);
/* Under limit. */
if (atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
if (tcp_memory_pressure)
tcp_memory_pressure = 0;
return 1;
}
/* Over hard limit. */
if (atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) {
tcp_enter_memory_pressure();
goto suppress_allocation;
}
/* Under pressure. */
if (atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[1])
tcp_enter_memory_pressure();
if (kind) {
if (atomic_read(&sk->sk_rmem_alloc) < sysctl_tcp_rmem[0])
return 1;
} else if (sk->sk_wmem_queued < sysctl_tcp_wmem[0])
return 1;
if (!tcp_memory_pressure ||
sysctl_tcp_mem[2] > atomic_read(&tcp_sockets_allocated) *
TCP_PAGES(sk->sk_wmem_queued +
atomic_read(&sk->sk_rmem_alloc) +
sk->sk_forward_alloc))
return 1;
suppress_allocation:
if (!kind) {
sk_stream_moderate_sndbuf(sk);
/* Fail only if socket is _under_ its sndbuf.
* In this case we cannot block, so that we have to fail.
*/
if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
return 1;
}
/* Alas. Undo changes. */
sk->sk_forward_alloc -= amt * TCP_MEM_QUANTUM;
atomic_sub(amt, &tcp_memory_allocated);
return 0;
}
void __tcp_mem_reclaim(struct sock *sk)
{ {
if (sk->sk_forward_alloc >= TCP_MEM_QUANTUM) { if (!tcp_prot.memory_pressure) {
atomic_sub(sk->sk_forward_alloc / TCP_MEM_QUANTUM, NET_INC_STATS(TCPMemoryPressures);
&tcp_memory_allocated); tcp_prot.memory_pressure = 1;
sk->sk_forward_alloc &= TCP_MEM_QUANTUM - 1;
if (tcp_memory_pressure &&
atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0])
tcp_memory_pressure = 0;
} }
} }
...@@ -628,16 +554,6 @@ static void tcp_listen_stop (struct sock *sk) ...@@ -628,16 +554,6 @@ static void tcp_listen_stop (struct sock *sk)
BUG_TRAP(!sk->sk_ack_backlog); BUG_TRAP(!sk->sk_ack_backlog);
} }
static inline void fill_page_desc(struct sk_buff *skb, int i,
struct page *page, int off, int size)
{
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
frag->page = page;
frag->page_offset = off;
frag->size = size;
skb_shinfo(skb)->nr_frags = i + 1;
}
static inline void tcp_mark_push(struct tcp_opt *tp, struct sk_buff *skb) static inline void tcp_mark_push(struct tcp_opt *tp, struct sk_buff *skb)
{ {
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
...@@ -740,7 +656,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse ...@@ -740,7 +656,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
skb_shinfo(skb)->frags[i - 1].size += copy; skb_shinfo(skb)->frags[i - 1].size += copy;
} else if (i < MAX_SKB_FRAGS) { } else if (i < MAX_SKB_FRAGS) {
get_page(page); get_page(page);
fill_page_desc(skb, i, page, offset, copy); skb_fill_page_desc(skb, i, page, offset, copy);
} else { } else {
tcp_mark_push(tp, skb); tcp_mark_push(tp, skb);
goto new_segment; goto new_segment;
...@@ -816,8 +732,8 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, ...@@ -816,8 +732,8 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
return res; return res;
} }
#define TCP_PAGE(sk) (inet_sk(sk)->sndmsg_page) #define TCP_PAGE(sk) (sk->sk_sndmsg_page)
#define TCP_OFF(sk) (inet_sk(sk)->sndmsg_off) #define TCP_OFF(sk) (sk->sk_sndmsg_off)
static inline int select_size(struct sock *sk, struct tcp_opt *tp) static inline int select_size(struct sock *sk, struct tcp_opt *tp)
{ {
...@@ -980,7 +896,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ...@@ -980,7 +896,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
skb_shinfo(skb)->frags[i - 1].size += skb_shinfo(skb)->frags[i - 1].size +=
copy; copy;
} else { } else {
fill_page_desc(skb, i, page, off, copy); skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) { if (TCP_PAGE(sk)) {
get_page(page); get_page(page);
} else if (off + copy < PAGE_SIZE) { } else if (off + copy < PAGE_SIZE) {
...@@ -1634,29 +1550,6 @@ void tcp_shutdown(struct sock *sk, int how) ...@@ -1634,29 +1550,6 @@ void tcp_shutdown(struct sock *sk, int how)
} }
} }
static __inline__ void tcp_kill_sk_queues(struct sock *sk)
{
/* First the read buffer. */
__skb_queue_purge(&sk->sk_receive_queue);
/* Next, the error queue. */
__skb_queue_purge(&sk->sk_error_queue);
/* Next, the write queue. */
BUG_TRAP(skb_queue_empty(&sk->sk_write_queue));
/* Account for returned memory. */
tcp_mem_reclaim(sk);
BUG_TRAP(!sk->sk_wmem_queued);
BUG_TRAP(!sk->sk_forward_alloc);
/* It is _impossible_ for the backlog to contain anything
* when we get here. All user references to this socket
* have gone away, only the net layer knows can touch it.
*/
}
/* /*
* At this point, there should be no process reference to this * At this point, there should be no process reference to this
* socket, and thus no user references at all. Therefore we * socket, and thus no user references at all. Therefore we
...@@ -1684,7 +1577,7 @@ void tcp_destroy_sock(struct sock *sk) ...@@ -1684,7 +1577,7 @@ void tcp_destroy_sock(struct sock *sk)
sk->sk_prot->destroy(sk); sk->sk_prot->destroy(sk);
tcp_kill_sk_queues(sk); sk_stream_kill_queues(sk);
xfrm_sk_free_policy(sk); xfrm_sk_free_policy(sk);
...@@ -1727,7 +1620,7 @@ void tcp_close(struct sock *sk, long timeout) ...@@ -1727,7 +1620,7 @@ void tcp_close(struct sock *sk, long timeout)
__kfree_skb(skb); __kfree_skb(skb);
} }
tcp_mem_reclaim(sk); sk_stream_mem_reclaim(sk);
/* As outlined in draft-ietf-tcpimpl-prob-03.txt, section /* As outlined in draft-ietf-tcpimpl-prob-03.txt, section
* 3.10, we send a RST here because data was lost. To * 3.10, we send a RST here because data was lost. To
...@@ -1826,10 +1719,10 @@ void tcp_close(struct sock *sk, long timeout) ...@@ -1826,10 +1719,10 @@ void tcp_close(struct sock *sk, long timeout)
} }
} }
if (sk->sk_state != TCP_CLOSE) { if (sk->sk_state != TCP_CLOSE) {
tcp_mem_reclaim(sk); sk_stream_mem_reclaim(sk);
if (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans || if (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans ||
(sk->sk_wmem_queued > SOCK_MIN_SNDBUF && (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) { atomic_read(&tcp_prot.memory_allocated) > tcp_prot.sysctl_mem[2])) {
if (net_ratelimit()) if (net_ratelimit())
printk(KERN_INFO "TCP: too many of orphaned " printk(KERN_INFO "TCP: too many of orphaned "
"sockets\n"); "sockets\n");
...@@ -2376,15 +2269,15 @@ void __init tcp_init(void) ...@@ -2376,15 +2269,15 @@ void __init tcp_init(void)
} }
tcp_port_rover = sysctl_local_port_range[0] - 1; tcp_port_rover = sysctl_local_port_range[0] - 1;
sysctl_tcp_mem[0] = 768 << order; tcp_prot.sysctl_mem[0] = 768 << order;
sysctl_tcp_mem[1] = 1024 << order; tcp_prot.sysctl_mem[1] = 1024 << order;
sysctl_tcp_mem[2] = 1536 << order; tcp_prot.sysctl_mem[2] = 1536 << order;
if (order < 3) { if (order < 3) {
sysctl_tcp_wmem[2] = 64 * 1024; tcp_prot.sysctl_wmem[2] = 64 * 1024;
sysctl_tcp_rmem[0] = PAGE_SIZE; tcp_prot.sysctl_rmem[0] = PAGE_SIZE;
sysctl_tcp_rmem[1] = 43689; tcp_prot.sysctl_rmem[1] = 43689;
sysctl_tcp_rmem[2] = 2 * 43689; tcp_prot.sysctl_rmem[2] = 2 * 43689;
} }
printk(KERN_INFO "TCP: Hash tables configured " printk(KERN_INFO "TCP: Hash tables configured "
...@@ -2394,9 +2287,6 @@ void __init tcp_init(void) ...@@ -2394,9 +2287,6 @@ void __init tcp_init(void)
tcpdiag_init(); tcpdiag_init();
} }
EXPORT_SYMBOL(__tcp_mem_reclaim);
EXPORT_SYMBOL(sysctl_tcp_rmem);
EXPORT_SYMBOL(sysctl_tcp_wmem);
EXPORT_SYMBOL(tcp_accept); EXPORT_SYMBOL(tcp_accept);
EXPORT_SYMBOL(tcp_close); EXPORT_SYMBOL(tcp_close);
EXPORT_SYMBOL(tcp_close_state); EXPORT_SYMBOL(tcp_close_state);
...@@ -2412,6 +2302,5 @@ EXPORT_SYMBOL(tcp_sendmsg); ...@@ -2412,6 +2302,5 @@ EXPORT_SYMBOL(tcp_sendmsg);
EXPORT_SYMBOL(tcp_sendpage); EXPORT_SYMBOL(tcp_sendpage);
EXPORT_SYMBOL(tcp_setsockopt); EXPORT_SYMBOL(tcp_setsockopt);
EXPORT_SYMBOL(tcp_shutdown); EXPORT_SYMBOL(tcp_shutdown);
EXPORT_SYMBOL(tcp_sockets_allocated);
EXPORT_SYMBOL(tcp_statistics); EXPORT_SYMBOL(tcp_statistics);
EXPORT_SYMBOL(tcp_timewait_cachep); EXPORT_SYMBOL(tcp_timewait_cachep);
...@@ -207,7 +207,7 @@ static void tcp_fixup_sndbuf(struct sock *sk) ...@@ -207,7 +207,7 @@ static void tcp_fixup_sndbuf(struct sock *sk)
sizeof(struct sk_buff); sizeof(struct sk_buff);
if (sk->sk_sndbuf < 3 * sndmem) if (sk->sk_sndbuf < 3 * sndmem)
sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]); sk->sk_sndbuf = min(3 * sndmem, tcp_prot.sysctl_wmem[2]);
} }
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh) /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
...@@ -259,7 +259,7 @@ tcp_grow_window(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb) ...@@ -259,7 +259,7 @@ tcp_grow_window(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb)
/* Check #1 */ /* Check #1 */
if (tp->rcv_ssthresh < tp->window_clamp && if (tp->rcv_ssthresh < tp->window_clamp &&
(int)tp->rcv_ssthresh < tcp_space(sk) && (int)tp->rcv_ssthresh < tcp_space(sk) &&
!tcp_memory_pressure) { !tcp_prot.memory_pressure) {
int incr; int incr;
/* Check #2. Increase window, if skb with such overhead /* Check #2. Increase window, if skb with such overhead
...@@ -291,7 +291,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) ...@@ -291,7 +291,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
while (tcp_win_from_space(rcvmem) < tp->advmss) while (tcp_win_from_space(rcvmem) < tp->advmss)
rcvmem += 128; rcvmem += 128;
if (sk->sk_rcvbuf < 4 * rcvmem) if (sk->sk_rcvbuf < 4 * rcvmem)
sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]); sk->sk_rcvbuf = min(4 * rcvmem, tcp_prot.sysctl_rmem[2]);
} }
/* 4. Try to fixup all. It is made iimediately after connection enters /* 4. Try to fixup all. It is made iimediately after connection enters
...@@ -347,12 +347,12 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_opt *tp) ...@@ -347,12 +347,12 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_opt *tp)
* do not clamp window. Try to expand rcvbuf instead. * do not clamp window. Try to expand rcvbuf instead.
*/ */
if (ofo_win) { if (ofo_win) {
if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && if (sk->sk_rcvbuf < tcp_prot.sysctl_rmem[2] &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_memory_pressure && !tcp_prot.memory_pressure &&
atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) atomic_read(&tcp_prot.memory_allocated) < tcp_prot.sysctl_mem[0])
sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
sysctl_tcp_rmem[2]); tcp_prot.sysctl_rmem[2]);
} }
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) { if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
app_win += ofo_win; app_win += ofo_win;
...@@ -477,7 +477,7 @@ void tcp_rcv_space_adjust(struct sock *sk) ...@@ -477,7 +477,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
while (tcp_win_from_space(rcvmem) < tp->advmss) while (tcp_win_from_space(rcvmem) < tp->advmss)
rcvmem += 128; rcvmem += 128;
space *= rcvmem; space *= rcvmem;
space = min(space, sysctl_tcp_rmem[2]); space = min(space, tcp_prot.sysctl_rmem[2]);
if (space > sk->sk_rcvbuf) { if (space > sk->sk_rcvbuf) {
sk->sk_rcvbuf = space; sk->sk_rcvbuf = space;
...@@ -535,7 +535,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_opt *tp, struct sk_b ...@@ -535,7 +535,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_opt *tp, struct sk_b
* restart window, so that we send ACKs quickly. * restart window, so that we send ACKs quickly.
*/ */
tcp_incr_quickack(tp); tcp_incr_quickack(tp);
tcp_mem_reclaim(sk); sk_stream_mem_reclaim(sk);
} }
} }
tp->ack.lrcvtime = now; tp->ack.lrcvtime = now;
...@@ -3166,7 +3166,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) ...@@ -3166,7 +3166,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
__skb_queue_purge(&tp->out_of_order_queue); __skb_queue_purge(&tp->out_of_order_queue);
if (tp->sack_ok) if (tp->sack_ok)
tcp_sack_reset(tp); tcp_sack_reset(tp);
tcp_mem_reclaim(sk); sk_stream_mem_reclaim(sk);
if (!sock_flag(sk, SOCK_DEAD)) { if (!sock_flag(sk, SOCK_DEAD)) {
sk->sk_state_change(sk); sk->sk_state_change(sk);
...@@ -3401,7 +3401,7 @@ static void tcp_ofo_queue(struct sock *sk) ...@@ -3401,7 +3401,7 @@ static void tcp_ofo_queue(struct sock *sk)
static inline int tcp_rmem_schedule(struct sock *sk, struct sk_buff *skb) static inline int tcp_rmem_schedule(struct sock *sk, struct sk_buff *skb)
{ {
return (int)skb->truesize <= sk->sk_forward_alloc || return (int)skb->truesize <= sk->sk_forward_alloc ||
tcp_mem_schedule(sk, skb->truesize, 1); sk_stream_mem_schedule(sk, skb->truesize, 1);
} }
static int tcp_prune_queue(struct sock *sk); static int tcp_prune_queue(struct sock *sk);
...@@ -3768,14 +3768,14 @@ static int tcp_prune_queue(struct sock *sk) ...@@ -3768,14 +3768,14 @@ static int tcp_prune_queue(struct sock *sk)
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
tcp_clamp_window(sk, tp); tcp_clamp_window(sk, tp);
else if (tcp_memory_pressure) else if (tcp_prot.memory_pressure)
tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
tcp_collapse_ofo_queue(sk); tcp_collapse_ofo_queue(sk);
tcp_collapse(sk, sk->sk_receive_queue.next, tcp_collapse(sk, sk->sk_receive_queue.next,
(struct sk_buff*)&sk->sk_receive_queue, (struct sk_buff*)&sk->sk_receive_queue,
tp->copied_seq, tp->rcv_nxt); tp->copied_seq, tp->rcv_nxt);
tcp_mem_reclaim(sk); sk_stream_mem_reclaim(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
return 0; return 0;
...@@ -3796,7 +3796,7 @@ static int tcp_prune_queue(struct sock *sk) ...@@ -3796,7 +3796,7 @@ static int tcp_prune_queue(struct sock *sk)
*/ */
if (tp->sack_ok) if (tp->sack_ok)
tcp_sack_reset(tp); tcp_sack_reset(tp);
tcp_mem_reclaim(sk); sk_stream_mem_reclaim(sk);
} }
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
...@@ -3848,15 +3848,15 @@ static void tcp_new_space(struct sock *sk) ...@@ -3848,15 +3848,15 @@ static void tcp_new_space(struct sock *sk)
if (tp->packets_out < tp->snd_cwnd && if (tp->packets_out < tp->snd_cwnd &&
!(sk->sk_userlocks & SOCK_SNDBUF_LOCK) && !(sk->sk_userlocks & SOCK_SNDBUF_LOCK) &&
!tcp_memory_pressure && !tcp_prot.memory_pressure &&
atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { atomic_read(&tcp_prot.memory_allocated) < tcp_prot.sysctl_mem[0]) {
int sndmem = max_t(u32, tp->mss_clamp, tp->mss_cache) + int sndmem = max_t(u32, tp->mss_clamp, tp->mss_cache) +
MAX_TCP_HEADER + 16 + sizeof(struct sk_buff), MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
demanded = max_t(unsigned int, tp->snd_cwnd, demanded = max_t(unsigned int, tp->snd_cwnd,
tp->reordering + 1); tp->reordering + 1);
sndmem *= 2*demanded; sndmem *= 2*demanded;
if (sndmem > sk->sk_sndbuf) if (sndmem > sk->sk_sndbuf)
sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); sk->sk_sndbuf = min(sndmem, tcp_prot.sysctl_wmem[2]);
tp->snd_cwnd_stamp = tcp_time_stamp; tp->snd_cwnd_stamp = tcp_time_stamp;
} }
......
...@@ -2086,10 +2086,10 @@ static int tcp_v4_init_sock(struct sock *sk) ...@@ -2086,10 +2086,10 @@ static int tcp_v4_init_sock(struct sock *sk)
tp->af_specific = &ipv4_specific; tp->af_specific = &ipv4_specific;
sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_sndbuf = tcp_prot.sysctl_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1]; sk->sk_rcvbuf = tcp_prot.sysctl_rmem[1];
atomic_inc(&tcp_sockets_allocated); atomic_inc(&tcp_prot.sockets_allocated);
return 0; return 0;
} }
...@@ -2113,11 +2113,7 @@ static int tcp_v4_destroy_sock(struct sock *sk) ...@@ -2113,11 +2113,7 @@ static int tcp_v4_destroy_sock(struct sock *sk)
if (tp->bind_hash) if (tp->bind_hash)
tcp_put_port(sk); tcp_put_port(sk);
/* If sendmsg cached page exists, toss it. */ atomic_dec(&tcp_prot.sockets_allocated);
if (inet_sk(sk)->sndmsg_page)
__free_page(inet_sk(sk)->sndmsg_page);
atomic_dec(&tcp_sockets_allocated);
return 0; return 0;
} }
...@@ -2603,6 +2599,9 @@ struct proto tcp_prot = { ...@@ -2603,6 +2599,9 @@ struct proto tcp_prot = {
.hash = tcp_v4_hash, .hash = tcp_v4_hash,
.unhash = tcp_unhash, .unhash = tcp_unhash,
.get_port = tcp_v4_get_port, .get_port = tcp_v4_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
.sysctl_wmem = { 4 * 1024, 16 * 1024, 128 * 1024 },
.sysctl_rmem = { 4 * 1024, 87380, 87380 * 2 },
}; };
......
...@@ -801,7 +801,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, ...@@ -801,7 +801,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
#ifdef INET_REFCNT_DEBUG #ifdef INET_REFCNT_DEBUG
atomic_inc(&inet_sock_nr); atomic_inc(&inet_sock_nr);
#endif #endif
atomic_inc(&tcp_sockets_allocated); atomic_inc(&tcp_prot.sockets_allocated);
if (sock_flag(newsk, SOCK_KEEPOPEN)) if (sock_flag(newsk, SOCK_KEEPOPEN))
tcp_reset_keepalive_timer(newsk, tcp_reset_keepalive_timer(newsk,
......
...@@ -672,7 +672,7 @@ u32 __tcp_select_window(struct sock *sk) ...@@ -672,7 +672,7 @@ u32 __tcp_select_window(struct sock *sk)
if (free_space < full_space/2) { if (free_space < full_space/2) {
tp->ack.quick = 0; tp->ack.quick = 0;
if (tcp_memory_pressure) if (tcp_prot.memory_pressure)
tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss); tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss);
if (free_space < mss) if (free_space < mss)
......
...@@ -113,7 +113,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset) ...@@ -113,7 +113,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset)
if (orphans >= sysctl_tcp_max_orphans || if (orphans >= sysctl_tcp_max_orphans ||
(sk->sk_wmem_queued > SOCK_MIN_SNDBUF && (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) { atomic_read(&tcp_prot.memory_allocated) > tcp_prot.sysctl_mem[2])) {
if (net_ratelimit()) if (net_ratelimit())
printk(KERN_INFO "Out of socket memory\n"); printk(KERN_INFO "Out of socket memory\n");
...@@ -217,7 +217,7 @@ static void tcp_delack_timer(unsigned long data) ...@@ -217,7 +217,7 @@ static void tcp_delack_timer(unsigned long data)
goto out_unlock; goto out_unlock;
} }
tcp_mem_reclaim(sk); sk_stream_mem_reclaim(sk);
if (sk->sk_state == TCP_CLOSE || !(tp->ack.pending & TCP_ACK_TIMER)) if (sk->sk_state == TCP_CLOSE || !(tp->ack.pending & TCP_ACK_TIMER))
goto out; goto out;
...@@ -257,8 +257,8 @@ static void tcp_delack_timer(unsigned long data) ...@@ -257,8 +257,8 @@ static void tcp_delack_timer(unsigned long data)
TCP_CHECK_TIMER(sk); TCP_CHECK_TIMER(sk);
out: out:
if (tcp_memory_pressure) if (tcp_prot.memory_pressure)
tcp_mem_reclaim(sk); sk_stream_mem_reclaim(sk);
out_unlock: out_unlock:
bh_unlock_sock(sk); bh_unlock_sock(sk);
sock_put(sk); sock_put(sk);
...@@ -448,7 +448,7 @@ static void tcp_write_timer(unsigned long data) ...@@ -448,7 +448,7 @@ static void tcp_write_timer(unsigned long data)
TCP_CHECK_TIMER(sk); TCP_CHECK_TIMER(sk);
out: out:
tcp_mem_reclaim(sk); sk_stream_mem_reclaim(sk);
out_unlock: out_unlock:
bh_unlock_sock(sk); bh_unlock_sock(sk);
sock_put(sk); sock_put(sk);
...@@ -633,7 +633,7 @@ static void tcp_keepalive_timer (unsigned long data) ...@@ -633,7 +633,7 @@ static void tcp_keepalive_timer (unsigned long data)
} }
TCP_CHECK_TIMER(sk); TCP_CHECK_TIMER(sk);
tcp_mem_reclaim(sk); sk_stream_mem_reclaim(sk);
resched: resched:
tcp_reset_keepalive_timer (sk, elapsed); tcp_reset_keepalive_timer (sk, elapsed);
......
...@@ -167,7 +167,7 @@ static struct xfrm_type ipip_type = { ...@@ -167,7 +167,7 @@ static struct xfrm_type ipip_type = {
.output = ipip_output .output = ipip_output
}; };
static struct inet_protocol ipip_protocol = { static struct net_protocol ipip_protocol = {
.handler = ipip_rcv, .handler = ipip_rcv,
.err_handler = ipip_err, .err_handler = ipip_err,
.no_policy = 1, .no_policy = 1,
......
...@@ -852,8 +852,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse ...@@ -852,8 +852,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse
np->cork.hop_limit = hlimit; np->cork.hop_limit = hlimit;
inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst); inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst);
inet->cork.length = 0; inet->cork.length = 0;
inet->sndmsg_page = NULL; sk->sk_sndmsg_page = NULL;
inet->sndmsg_off = 0; sk->sk_sndmsg_off = 0;
exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0); exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
length += exthdrlen; length += exthdrlen;
transhdrlen += exthdrlen; transhdrlen += exthdrlen;
...@@ -969,8 +969,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse ...@@ -969,8 +969,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse
} else { } else {
int i = skb_shinfo(skb)->nr_frags; int i = skb_shinfo(skb)->nr_frags;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
struct page *page = inet->sndmsg_page; struct page *page = sk->sk_sndmsg_page;
int off = inet->sndmsg_off; int off = sk->sk_sndmsg_off;
unsigned int left; unsigned int left;
if (page && (left = PAGE_SIZE - off) > 0) { if (page && (left = PAGE_SIZE - off) > 0) {
...@@ -982,7 +982,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse ...@@ -982,7 +982,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse
goto error; goto error;
} }
get_page(page); get_page(page);
skb_fill_page_desc(skb, i, page, inet->sndmsg_off, 0); skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i]; frag = &skb_shinfo(skb)->frags[i];
} }
} else if(i < MAX_SKB_FRAGS) { } else if(i < MAX_SKB_FRAGS) {
...@@ -993,8 +993,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse ...@@ -993,8 +993,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse
err = -ENOMEM; err = -ENOMEM;
goto error; goto error;
} }
inet->sndmsg_page = page; sk->sk_sndmsg_page = page;
inet->sndmsg_off = 0; sk->sk_sndmsg_off = 0;
skb_fill_page_desc(skb, i, page, 0, 0); skb_fill_page_desc(skb, i, page, 0, 0);
frag = &skb_shinfo(skb)->frags[i]; frag = &skb_shinfo(skb)->frags[i];
...@@ -1008,7 +1008,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse ...@@ -1008,7 +1008,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse
err = -EFAULT; err = -EFAULT;
goto error; goto error;
} }
inet->sndmsg_off += copy; sk->sk_sndmsg_off += copy;
frag->size += copy; frag->size += copy;
skb->len += copy; skb->len += copy;
skb->data_len += copy; skb->data_len += copy;
......
...@@ -788,7 +788,7 @@ int __init ipip6_fb_tunnel_init(struct net_device *dev) ...@@ -788,7 +788,7 @@ int __init ipip6_fb_tunnel_init(struct net_device *dev)
return 0; return 0;
} }
static struct inet_protocol sit_protocol = { static struct net_protocol sit_protocol = {
.handler = ipip6_rcv, .handler = ipip6_rcv,
.err_handler = ipip6_err, .err_handler = ipip6_err,
}; };
......
...@@ -1882,10 +1882,10 @@ static int tcp_v6_init_sock(struct sock *sk) ...@@ -1882,10 +1882,10 @@ static int tcp_v6_init_sock(struct sock *sk)
sk->sk_write_space = sk_stream_write_space; sk->sk_write_space = sk_stream_write_space;
sk->sk_use_write_queue = 1; sk->sk_use_write_queue = 1;
sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_sndbuf = tcp_prot.sysctl_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1]; sk->sk_rcvbuf = tcp_prot.sysctl_rmem[1];
atomic_inc(&tcp_sockets_allocated); atomic_inc(&tcp_prot.sockets_allocated);
return 0; return 0;
} }
...@@ -1893,7 +1893,6 @@ static int tcp_v6_init_sock(struct sock *sk) ...@@ -1893,7 +1893,6 @@ static int tcp_v6_init_sock(struct sock *sk)
static int tcp_v6_destroy_sock(struct sock *sk) static int tcp_v6_destroy_sock(struct sock *sk)
{ {
struct tcp_opt *tp = tcp_sk(sk); struct tcp_opt *tp = tcp_sk(sk);
struct inet_opt *inet = inet_sk(sk);
tcp_clear_xmit_timers(sk); tcp_clear_xmit_timers(sk);
...@@ -1910,11 +1909,7 @@ static int tcp_v6_destroy_sock(struct sock *sk) ...@@ -1910,11 +1909,7 @@ static int tcp_v6_destroy_sock(struct sock *sk)
if (tcp_sk(sk)->bind_hash) if (tcp_sk(sk)->bind_hash)
tcp_put_port(sk); tcp_put_port(sk);
/* If sendmsg cached page exists, toss it. */ atomic_dec(&tcp_prot.sockets_allocated);
if (inet->sndmsg_page != NULL)
__free_page(inet->sndmsg_page);
atomic_dec(&tcp_sockets_allocated);
return inet6_destroy_sock(sk); return inet6_destroy_sock(sk);
} }
......
...@@ -875,7 +875,7 @@ static struct inet_protosw sctp_stream_protosw = { ...@@ -875,7 +875,7 @@ static struct inet_protosw sctp_stream_protosw = {
}; };
/* Register with IP layer. */ /* Register with IP layer. */
static struct inet_protocol sctp_protocol = { static struct net_protocol sctp_protocol = {
.handler = sctp_rcv, .handler = sctp_rcv,
.err_handler = sctp_v4_err, .err_handler = sctp_v4_err,
.no_policy = 1, .no_policy = 1,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment