Commit 334cdf67 authored by David S. Miller's avatar David S. Miller

Merge bk://kernel.bkbits.net/acme/net-2.6

into nuts.davemloft.net:/disk1/BK/acme-2.6
parents f4897eb3 3ac2a2d4
......@@ -129,8 +129,6 @@ struct inet_opt {
int mc_index; /* Multicast device index */
__u32 mc_addr;
struct ip_mc_socklist *mc_list; /* Group array */
struct page *sndmsg_page; /* Cached page for sendmsg */
u32 sndmsg_off; /* Cached offset for sendmsg */
/*
* Following members are used to retain the infomation to build
* an ip header on each ip fragmentation while the socket is corked.
......
......@@ -664,13 +664,15 @@ static inline int skb_pagelen(const struct sk_buff *skb)
return len + skb_headlen(skb);
}
static inline void skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size)
static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
struct page *page, int off, int size)
{
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
frag->page = page;
frag->page_offset = off;
frag->size = size;
skb_shinfo(skb)->nr_frags = i+1;
skb_shinfo(skb)->nr_frags = i + 1;
}
#define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags)
......
......@@ -34,8 +34,7 @@
/* This is used to register protocols. */
struct inet_protocol
{
struct net_protocol {
int (*handler)(struct sk_buff *skb);
void (*err_handler)(struct sk_buff *skb, u32 info);
int no_policy;
......@@ -78,15 +77,15 @@ struct inet_protosw {
#define INET_PROTOSW_REUSE 0x01 /* Are ports automatically reusable? */
#define INET_PROTOSW_PERMANENT 0x02 /* Permanent protocols are unremovable. */
extern struct inet_protocol *inet_protocol_base;
extern struct inet_protocol *inet_protos[MAX_INET_PROTOS];
extern struct net_protocol *inet_protocol_base;
extern struct net_protocol *inet_protos[MAX_INET_PROTOS];
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
extern struct inet6_protocol *inet6_protos[MAX_INET_PROTOS];
#endif
extern int inet_add_protocol(struct inet_protocol *prot, unsigned char num);
extern int inet_del_protocol(struct inet_protocol *prot, unsigned char num);
extern int inet_add_protocol(struct net_protocol *prot, unsigned char num);
extern int inet_del_protocol(struct net_protocol *prot, unsigned char num);
extern void inet_register_protosw(struct inet_protosw *p);
extern void inet_unregister_protosw(struct inet_protosw *p);
......
......@@ -167,6 +167,8 @@ struct sock_common {
* @sk_socket - Identd and reporting IO signals
* @sk_user_data - RPC layer private data
* @sk_owner - module that owns this socket
* @sk_sndmsg_page - cached page for sendmsg
* @sk_sndmsg_off - cached offset for sendmsg
* @sk_send_head - front of stuff to transmit
* @sk_write_pending - a write to stream socket waits to start
* @sk_queue_shrunk - write queue has been shrunk recently
......@@ -249,8 +251,10 @@ struct sock {
struct timeval sk_stamp;
struct socket *sk_socket;
void *sk_user_data;
struct sk_buff *sk_send_head;
struct module *sk_owner;
struct page *sk_sndmsg_page;
__u32 sk_sndmsg_off;
struct sk_buff *sk_send_head;
int sk_write_pending;
void *sk_security;
__u8 sk_queue_shrunk;
......@@ -487,10 +491,11 @@ extern int sk_stream_wait_connect(struct sock *sk, long *timeo_p);
extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p);
extern void sk_stream_wait_close(struct sock *sk, long timeo_p);
extern int sk_stream_error(struct sock *sk, int flags, int err);
extern void sk_stream_kill_queues(struct sock *sk);
extern int sk_wait_data(struct sock *sk, long *timeo);
/* IP protocol blocks we attach to sockets.
/* Networking protocol blocks we attach to sockets.
* socket layer -> transport layer interface
* transport -> network interface is defined by struct inet_proto
*/
......@@ -534,6 +539,21 @@ struct proto {
void (*unhash)(struct sock *sk);
int (*get_port)(struct sock *sk, unsigned short snum);
/* Memory pressure */
void (*enter_memory_pressure)(void);
atomic_t memory_allocated; /* Current allocated memory. */
atomic_t sockets_allocated; /* Current number of sockets. */
/*
* Pressure flag: try to collapse.
* Technical note: it is used by multiple contexts non atomically.
* All the sk_stream_mem_schedule() is of this nature: accounting
* is strict, actions are advisory and have some latency.
*/
int memory_pressure;
int sysctl_mem[3];
int sysctl_wmem[3];
int sysctl_rmem[3];
char name[32];
struct {
......@@ -624,6 +644,22 @@ static inline struct inode *SOCK_INODE(struct socket *socket)
return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
}
extern void __sk_stream_mem_reclaim(struct sock *sk);
extern int sk_stream_mem_schedule(struct sock *sk, int size, int kind);
#define SK_STREAM_MEM_QUANTUM ((int)PAGE_SIZE)
static inline int sk_stream_pages(int amt)
{
return (amt + SK_STREAM_MEM_QUANTUM - 1) / SK_STREAM_MEM_QUANTUM;
}
static inline void sk_stream_mem_reclaim(struct sock *sk)
{
if (sk->sk_forward_alloc >= SK_STREAM_MEM_QUANTUM)
__sk_stream_mem_reclaim(sk);
}
/* Used by processes to "lock" a socket state, so that
* interrupts and bottom half handlers won't change it
* from under us. It essentially blocks any incoming
......
......@@ -594,9 +594,6 @@ extern int sysctl_tcp_fack;
extern int sysctl_tcp_reordering;
extern int sysctl_tcp_ecn;
extern int sysctl_tcp_dsack;
extern int sysctl_tcp_mem[3];
extern int sysctl_tcp_wmem[3];
extern int sysctl_tcp_rmem[3];
extern int sysctl_tcp_app_win;
extern int sysctl_tcp_adv_win_scale;
extern int sysctl_tcp_tw_reuse;
......@@ -614,10 +611,6 @@ extern int sysctl_tcp_bic_low_window;
extern int sysctl_tcp_default_win_scale;
extern int sysctl_tcp_moderate_rcvbuf;
extern atomic_t tcp_memory_allocated;
extern atomic_t tcp_sockets_allocated;
extern int tcp_memory_pressure;
struct open_request;
struct or_calltable {
......@@ -1867,24 +1860,7 @@ static __inline__ void tcp_openreq_init(struct open_request *req,
req->rmt_port = skb->h.th->source;
}
#define TCP_MEM_QUANTUM ((int)PAGE_SIZE)
extern void __tcp_mem_reclaim(struct sock *sk);
extern int tcp_mem_schedule(struct sock *sk, int size, int kind);
static inline void tcp_mem_reclaim(struct sock *sk)
{
if (sk->sk_forward_alloc >= TCP_MEM_QUANTUM)
__tcp_mem_reclaim(sk);
}
static inline void tcp_enter_memory_pressure(void)
{
if (!tcp_memory_pressure) {
NET_INC_STATS(TCPMemoryPressures);
tcp_memory_pressure = 1;
}
}
extern void tcp_enter_memory_pressure(void);
static inline struct sk_buff *tcp_alloc_pskb(struct sock *sk, int size, int mem, int gfp)
{
......@@ -1893,7 +1869,7 @@ static inline struct sk_buff *tcp_alloc_pskb(struct sock *sk, int size, int mem,
if (skb) {
skb->truesize += mem;
if (sk->sk_forward_alloc >= (int)skb->truesize ||
tcp_mem_schedule(sk, skb->truesize, 0)) {
sk_stream_mem_schedule(sk, skb->truesize, 0)) {
skb_reserve(skb, MAX_TCP_HEADER);
return skb;
}
......@@ -1913,7 +1889,7 @@ static inline struct sk_buff *tcp_alloc_skb(struct sock *sk, int size, int gfp)
static inline struct page * tcp_alloc_page(struct sock *sk)
{
if (sk->sk_forward_alloc >= (int)PAGE_SIZE ||
tcp_mem_schedule(sk, PAGE_SIZE, 0)) {
sk_stream_mem_schedule(sk, PAGE_SIZE, 0)) {
struct page *page = alloc_pages(sk->sk_allocation, 0);
if (page)
return page;
......@@ -1929,7 +1905,7 @@ static inline void tcp_writequeue_purge(struct sock *sk)
while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL)
sk_stream_free_skb(sk, skb);
tcp_mem_reclaim(sk);
sk_stream_mem_reclaim(sk);
}
extern void tcp_listen_wlock(void);
......
......@@ -650,6 +650,14 @@ void sk_free(struct sock *sk)
printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
__FUNCTION__, atomic_read(&sk->sk_omem_alloc));
/*
* If sendmsg cached page exists, toss it.
*/
if (sk->sk_sndmsg_page) {
__free_page(sk->sk_sndmsg_page);
sk->sk_sndmsg_page = NULL;
}
security_sk_free(sk);
kmem_cache_free(sk->sk_slab, sk);
module_put(owner);
......@@ -1175,6 +1183,9 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_error_report = sock_def_error_report;
sk->sk_destruct = sock_def_destruct;
sk->sk_sndmsg_page = NULL;
sk->sk_sndmsg_off = 0;
sk->sk_peercred.pid = 0;
sk->sk_peercred.uid = -1;
sk->sk_peercred.gid = -1;
......
......@@ -188,3 +188,100 @@ int sk_stream_error(struct sock *sk, int flags, int err)
}
EXPORT_SYMBOL(sk_stream_error);
void __sk_stream_mem_reclaim(struct sock *sk)
{
if (sk->sk_forward_alloc >= SK_STREAM_MEM_QUANTUM) {
atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM,
&sk->sk_prot->memory_allocated);
sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1;
if (sk->sk_prot->memory_pressure &&
(atomic_read(&sk->sk_prot->memory_allocated) <
sk->sk_prot->sysctl_mem[0]))
sk->sk_prot->memory_pressure = 0;
}
}
EXPORT_SYMBOL(__sk_stream_mem_reclaim);
int sk_stream_mem_schedule(struct sock *sk, int size, int kind)
{
int amt = sk_stream_pages(size);
sk->sk_forward_alloc += amt * SK_STREAM_MEM_QUANTUM;
atomic_add(amt, &sk->sk_prot->memory_allocated);
/* Under limit. */
if (atomic_read(&sk->sk_prot->memory_allocated) < sk->sk_prot->sysctl_mem[0]) {
if (sk->sk_prot->memory_pressure)
sk->sk_prot->memory_pressure = 0;
return 1;
}
/* Over hard limit. */
if (atomic_read(&sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[2]) {
sk->sk_prot->enter_memory_pressure();
goto suppress_allocation;
}
/* Under pressure. */
if (atomic_read(&sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[1])
sk->sk_prot->enter_memory_pressure();
if (kind) {
if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_prot->sysctl_rmem[0])
return 1;
} else if (sk->sk_wmem_queued < sk->sk_prot->sysctl_wmem[0])
return 1;
if (!sk->sk_prot->memory_pressure ||
sk->sk_prot->sysctl_mem[2] > atomic_read(&sk->sk_prot->sockets_allocated) *
sk_stream_pages(sk->sk_wmem_queued +
atomic_read(&sk->sk_rmem_alloc) +
sk->sk_forward_alloc))
return 1;
suppress_allocation:
if (!kind) {
sk_stream_moderate_sndbuf(sk);
/* Fail only if socket is _under_ its sndbuf.
* In this case we cannot block, so that we have to fail.
*/
if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
return 1;
}
/* Alas. Undo changes. */
sk->sk_forward_alloc -= amt * SK_STREAM_MEM_QUANTUM;
atomic_sub(amt, &sk->sk_prot->memory_allocated);
return 0;
}
EXPORT_SYMBOL(sk_stream_mem_schedule);
void sk_stream_kill_queues(struct sock *sk)
{
/* First the read buffer. */
__skb_queue_purge(&sk->sk_receive_queue);
/* Next, the error queue. */
__skb_queue_purge(&sk->sk_error_queue);
/* Next, the write queue. */
BUG_TRAP(skb_queue_empty(&sk->sk_write_queue));
/* Account for returned memory. */
sk_stream_mem_reclaim(sk);
BUG_TRAP(!sk->sk_wmem_queued);
BUG_TRAP(!sk->sk_forward_alloc);
/* It is _impossible_ for the backlog to contain anything
* when we get here. All user references to this socket
* have gone away, only the net layer knows can touch it.
*/
}
EXPORT_SYMBOL(sk_stream_kill_queues);
......@@ -1041,24 +1041,24 @@ void inet_unregister_protosw(struct inet_protosw *p)
}
#ifdef CONFIG_IP_MULTICAST
static struct inet_protocol igmp_protocol = {
static struct net_protocol igmp_protocol = {
.handler = igmp_rcv,
};
#endif
static struct inet_protocol tcp_protocol = {
static struct net_protocol tcp_protocol = {
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
.no_policy = 1,
};
static struct inet_protocol udp_protocol = {
static struct net_protocol udp_protocol = {
.handler = udp_rcv,
.err_handler = udp_err,
.no_policy = 1,
};
static struct inet_protocol icmp_protocol = {
static struct net_protocol icmp_protocol = {
.handler = icmp_rcv,
};
......
......@@ -343,7 +343,7 @@ static struct xfrm_type ah_type =
.output = ah_output
};
static struct inet_protocol ah4_protocol = {
static struct net_protocol ah4_protocol = {
.handler = xfrm4_rcv,
.err_handler = ah4_err,
.no_policy = 1,
......
......@@ -595,7 +595,7 @@ static struct xfrm_type esp_type =
.output = esp_output
};
static struct inet_protocol esp4_protocol = {
static struct net_protocol esp4_protocol = {
.handler = xfrm4_rcv,
.err_handler = esp4_err,
.no_policy = 1,
......
......@@ -592,7 +592,7 @@ static void icmp_unreach(struct sk_buff *skb)
struct iphdr *iph;
struct icmphdr *icmph;
int hash, protocol;
struct inet_protocol *ipprot;
struct net_protocol *ipprot;
struct sock *raw_sk;
u32 info = 0;
......
......@@ -1228,7 +1228,7 @@ int __init ipgre_fb_tunnel_init(struct net_device *dev)
}
static struct inet_protocol ipgre_protocol = {
static struct net_protocol ipgre_protocol = {
.handler = ipgre_rcv,
.err_handler = ipgre_err,
};
......
......@@ -219,7 +219,7 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
int protocol = skb->nh.iph->protocol;
int hash;
struct sock *raw_sk;
struct inet_protocol *ipprot;
struct net_protocol *ipprot;
resubmit:
hash = protocol & (MAX_INET_PROTOS - 1);
......
......@@ -766,8 +766,8 @@ int ip_append_data(struct sock *sk,
inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst);
inet->cork.rt = rt;
inet->cork.length = 0;
inet->sndmsg_page = NULL;
inet->sndmsg_off = 0;
sk->sk_sndmsg_page = NULL;
sk->sk_sndmsg_off = 0;
if ((exthdrlen = rt->u.dst.header_len) != 0) {
length += exthdrlen;
transhdrlen += exthdrlen;
......@@ -915,8 +915,8 @@ int ip_append_data(struct sock *sk,
} else {
int i = skb_shinfo(skb)->nr_frags;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
struct page *page = inet->sndmsg_page;
int off = inet->sndmsg_off;
struct page *page = sk->sk_sndmsg_page;
int off = sk->sk_sndmsg_off;
unsigned int left;
if (page && (left = PAGE_SIZE - off) > 0) {
......@@ -928,7 +928,7 @@ int ip_append_data(struct sock *sk,
goto error;
}
get_page(page);
skb_fill_page_desc(skb, i, page, inet->sndmsg_off, 0);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
} else if (i < MAX_SKB_FRAGS) {
......@@ -939,8 +939,8 @@ int ip_append_data(struct sock *sk,
err = -ENOMEM;
goto error;
}
inet->sndmsg_page = page;
inet->sndmsg_off = 0;
sk->sk_sndmsg_page = page;
sk->sk_sndmsg_off = 0;
skb_fill_page_desc(skb, i, page, 0, 0);
frag = &skb_shinfo(skb)->frags[i];
......@@ -954,7 +954,7 @@ int ip_append_data(struct sock *sk,
err = -EFAULT;
goto error;
}
inet->sndmsg_off += copy;
sk->sk_sndmsg_off += copy;
frag->size += copy;
skb->len += copy;
skb->data_len += copy;
......
......@@ -409,7 +409,7 @@ static struct xfrm_type ipcomp_type = {
.output = ipcomp_output
};
static struct inet_protocol ipcomp4_protocol = {
static struct net_protocol ipcomp4_protocol = {
.handler = xfrm4_rcv,
.err_handler = ipcomp4_err,
.no_policy = 1,
......
......@@ -109,7 +109,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
static struct inet_protocol pim_protocol;
static struct net_protocol pim_protocol;
static struct timer_list ipmr_expire_timer;
......@@ -1867,7 +1867,7 @@ static struct file_operations ipmr_mfc_fops = {
#endif
#ifdef CONFIG_IP_PIMSM_V2
static struct inet_protocol pim_protocol = {
static struct net_protocol pim_protocol = {
.handler = pim_rcv,
};
#endif
......
......@@ -65,8 +65,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
socket_seq_show(seq);
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
tcp_tw_count, atomic_read(&tcp_sockets_allocated),
atomic_read(&tcp_memory_allocated));
tcp_tw_count, atomic_read(&tcp_prot.sockets_allocated),
atomic_read(&tcp_prot.memory_allocated));
seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues,
......
......@@ -48,14 +48,14 @@
#include <net/ipip.h>
#include <linux/igmp.h>
struct inet_protocol *inet_protos[MAX_INET_PROTOS];
struct net_protocol *inet_protos[MAX_INET_PROTOS];
static spinlock_t inet_proto_lock = SPIN_LOCK_UNLOCKED;
/*
* Add a protocol handler to the hash tables
*/
int inet_add_protocol(struct inet_protocol *prot, unsigned char protocol)
int inet_add_protocol(struct net_protocol *prot, unsigned char protocol)
{
int hash, ret;
......@@ -77,7 +77,7 @@ int inet_add_protocol(struct inet_protocol *prot, unsigned char protocol)
* Remove a protocol from the hash tables.
*/
int inet_del_protocol(struct inet_protocol *prot, unsigned char protocol)
int inet_del_protocol(struct net_protocol *prot, unsigned char protocol)
{
int hash, ret;
......
......@@ -508,24 +508,24 @@ ctl_table ipv4_table[] = {
{
.ctl_name = NET_TCP_MEM,
.procname = "tcp_mem",
.data = &sysctl_tcp_mem,
.maxlen = sizeof(sysctl_tcp_mem),
.data = &tcp_prot.sysctl_mem,
.maxlen = sizeof(tcp_prot.sysctl_mem),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = NET_TCP_WMEM,
.procname = "tcp_wmem",
.data = &sysctl_tcp_wmem,
.maxlen = sizeof(sysctl_tcp_wmem),
.data = &tcp_prot.sysctl_wmem,
.maxlen = sizeof(tcp_prot.sysctl_wmem),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = NET_TCP_RMEM,
.procname = "tcp_rmem",
.data = &sysctl_tcp_rmem,
.maxlen = sizeof(sysctl_tcp_rmem),
.data = &tcp_prot.sysctl_rmem,
.maxlen = sizeof(tcp_prot.sysctl_rmem),
.mode = 0644,
.proc_handler = &proc_dointvec
},
......
......@@ -278,85 +278,11 @@ atomic_t tcp_orphan_count = ATOMIC_INIT(0);
int sysctl_tcp_default_win_scale = 7;
int sysctl_tcp_mem[3];
int sysctl_tcp_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
int sysctl_tcp_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
atomic_t tcp_memory_allocated; /* Current allocated memory. */
atomic_t tcp_sockets_allocated; /* Current number of TCP sockets. */
/* Pressure flag: try to collapse.
* Technical note: it is used by multiple contexts non atomically.
* All the tcp_mem_schedule() is of this nature: accounting
* is strict, actions are advisory and have some latency. */
int tcp_memory_pressure;
#define TCP_PAGES(amt) (((amt) + TCP_MEM_QUANTUM - 1) / TCP_MEM_QUANTUM)
int tcp_mem_schedule(struct sock *sk, int size, int kind)
{
int amt = TCP_PAGES(size);
sk->sk_forward_alloc += amt * TCP_MEM_QUANTUM;
atomic_add(amt, &tcp_memory_allocated);
/* Under limit. */
if (atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
if (tcp_memory_pressure)
tcp_memory_pressure = 0;
return 1;
}
/* Over hard limit. */
if (atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) {
tcp_enter_memory_pressure();
goto suppress_allocation;
}
/* Under pressure. */
if (atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[1])
tcp_enter_memory_pressure();
if (kind) {
if (atomic_read(&sk->sk_rmem_alloc) < sysctl_tcp_rmem[0])
return 1;
} else if (sk->sk_wmem_queued < sysctl_tcp_wmem[0])
return 1;
if (!tcp_memory_pressure ||
sysctl_tcp_mem[2] > atomic_read(&tcp_sockets_allocated) *
TCP_PAGES(sk->sk_wmem_queued +
atomic_read(&sk->sk_rmem_alloc) +
sk->sk_forward_alloc))
return 1;
suppress_allocation:
if (!kind) {
sk_stream_moderate_sndbuf(sk);
/* Fail only if socket is _under_ its sndbuf.
* In this case we cannot block, so that we have to fail.
*/
if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
return 1;
}
/* Alas. Undo changes. */
sk->sk_forward_alloc -= amt * TCP_MEM_QUANTUM;
atomic_sub(amt, &tcp_memory_allocated);
return 0;
}
void __tcp_mem_reclaim(struct sock *sk)
void tcp_enter_memory_pressure(void)
{
if (sk->sk_forward_alloc >= TCP_MEM_QUANTUM) {
atomic_sub(sk->sk_forward_alloc / TCP_MEM_QUANTUM,
&tcp_memory_allocated);
sk->sk_forward_alloc &= TCP_MEM_QUANTUM - 1;
if (tcp_memory_pressure &&
atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0])
tcp_memory_pressure = 0;
if (!tcp_prot.memory_pressure) {
NET_INC_STATS(TCPMemoryPressures);
tcp_prot.memory_pressure = 1;
}
}
......@@ -628,16 +554,6 @@ static void tcp_listen_stop (struct sock *sk)
BUG_TRAP(!sk->sk_ack_backlog);
}
static inline void fill_page_desc(struct sk_buff *skb, int i,
struct page *page, int off, int size)
{
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
frag->page = page;
frag->page_offset = off;
frag->size = size;
skb_shinfo(skb)->nr_frags = i + 1;
}
static inline void tcp_mark_push(struct tcp_opt *tp, struct sk_buff *skb)
{
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
......@@ -740,7 +656,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
skb_shinfo(skb)->frags[i - 1].size += copy;
} else if (i < MAX_SKB_FRAGS) {
get_page(page);
fill_page_desc(skb, i, page, offset, copy);
skb_fill_page_desc(skb, i, page, offset, copy);
} else {
tcp_mark_push(tp, skb);
goto new_segment;
......@@ -816,8 +732,8 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
return res;
}
#define TCP_PAGE(sk) (inet_sk(sk)->sndmsg_page)
#define TCP_OFF(sk) (inet_sk(sk)->sndmsg_off)
#define TCP_PAGE(sk) (sk->sk_sndmsg_page)
#define TCP_OFF(sk) (sk->sk_sndmsg_off)
static inline int select_size(struct sock *sk, struct tcp_opt *tp)
{
......@@ -980,7 +896,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
skb_shinfo(skb)->frags[i - 1].size +=
copy;
} else {
fill_page_desc(skb, i, page, off, copy);
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
get_page(page);
} else if (off + copy < PAGE_SIZE) {
......@@ -1634,29 +1550,6 @@ void tcp_shutdown(struct sock *sk, int how)
}
}
static __inline__ void tcp_kill_sk_queues(struct sock *sk)
{
/* First the read buffer. */
__skb_queue_purge(&sk->sk_receive_queue);
/* Next, the error queue. */
__skb_queue_purge(&sk->sk_error_queue);
/* Next, the write queue. */
BUG_TRAP(skb_queue_empty(&sk->sk_write_queue));
/* Account for returned memory. */
tcp_mem_reclaim(sk);
BUG_TRAP(!sk->sk_wmem_queued);
BUG_TRAP(!sk->sk_forward_alloc);
/* It is _impossible_ for the backlog to contain anything
* when we get here. All user references to this socket
* have gone away, only the net layer knows can touch it.
*/
}
/*
* At this point, there should be no process reference to this
* socket, and thus no user references at all. Therefore we
......@@ -1684,7 +1577,7 @@ void tcp_destroy_sock(struct sock *sk)
sk->sk_prot->destroy(sk);
tcp_kill_sk_queues(sk);
sk_stream_kill_queues(sk);
xfrm_sk_free_policy(sk);
......@@ -1727,7 +1620,7 @@ void tcp_close(struct sock *sk, long timeout)
__kfree_skb(skb);
}
tcp_mem_reclaim(sk);
sk_stream_mem_reclaim(sk);
/* As outlined in draft-ietf-tcpimpl-prob-03.txt, section
* 3.10, we send a RST here because data was lost. To
......@@ -1826,10 +1719,10 @@ void tcp_close(struct sock *sk, long timeout)
}
}
if (sk->sk_state != TCP_CLOSE) {
tcp_mem_reclaim(sk);
sk_stream_mem_reclaim(sk);
if (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans ||
(sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
atomic_read(&tcp_prot.memory_allocated) > tcp_prot.sysctl_mem[2])) {
if (net_ratelimit())
printk(KERN_INFO "TCP: too many of orphaned "
"sockets\n");
......@@ -2376,15 +2269,15 @@ void __init tcp_init(void)
}
tcp_port_rover = sysctl_local_port_range[0] - 1;
sysctl_tcp_mem[0] = 768 << order;
sysctl_tcp_mem[1] = 1024 << order;
sysctl_tcp_mem[2] = 1536 << order;
tcp_prot.sysctl_mem[0] = 768 << order;
tcp_prot.sysctl_mem[1] = 1024 << order;
tcp_prot.sysctl_mem[2] = 1536 << order;
if (order < 3) {
sysctl_tcp_wmem[2] = 64 * 1024;
sysctl_tcp_rmem[0] = PAGE_SIZE;
sysctl_tcp_rmem[1] = 43689;
sysctl_tcp_rmem[2] = 2 * 43689;
tcp_prot.sysctl_wmem[2] = 64 * 1024;
tcp_prot.sysctl_rmem[0] = PAGE_SIZE;
tcp_prot.sysctl_rmem[1] = 43689;
tcp_prot.sysctl_rmem[2] = 2 * 43689;
}
printk(KERN_INFO "TCP: Hash tables configured "
......@@ -2394,9 +2287,6 @@ void __init tcp_init(void)
tcpdiag_init();
}
EXPORT_SYMBOL(__tcp_mem_reclaim);
EXPORT_SYMBOL(sysctl_tcp_rmem);
EXPORT_SYMBOL(sysctl_tcp_wmem);
EXPORT_SYMBOL(tcp_accept);
EXPORT_SYMBOL(tcp_close);
EXPORT_SYMBOL(tcp_close_state);
......@@ -2412,6 +2302,5 @@ EXPORT_SYMBOL(tcp_sendmsg);
EXPORT_SYMBOL(tcp_sendpage);
EXPORT_SYMBOL(tcp_setsockopt);
EXPORT_SYMBOL(tcp_shutdown);
EXPORT_SYMBOL(tcp_sockets_allocated);
EXPORT_SYMBOL(tcp_statistics);
EXPORT_SYMBOL(tcp_timewait_cachep);
......@@ -207,7 +207,7 @@ static void tcp_fixup_sndbuf(struct sock *sk)
sizeof(struct sk_buff);
if (sk->sk_sndbuf < 3 * sndmem)
sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]);
sk->sk_sndbuf = min(3 * sndmem, tcp_prot.sysctl_wmem[2]);
}
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
......@@ -259,7 +259,7 @@ tcp_grow_window(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb)
/* Check #1 */
if (tp->rcv_ssthresh < tp->window_clamp &&
(int)tp->rcv_ssthresh < tcp_space(sk) &&
!tcp_memory_pressure) {
!tcp_prot.memory_pressure) {
int incr;
/* Check #2. Increase window, if skb with such overhead
......@@ -291,7 +291,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
while (tcp_win_from_space(rcvmem) < tp->advmss)
rcvmem += 128;
if (sk->sk_rcvbuf < 4 * rcvmem)
sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]);
sk->sk_rcvbuf = min(4 * rcvmem, tcp_prot.sysctl_rmem[2]);
}
/* 4. Try to fixup all. It is made iimediately after connection enters
......@@ -347,12 +347,12 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_opt *tp)
* do not clamp window. Try to expand rcvbuf instead.
*/
if (ofo_win) {
if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
if (sk->sk_rcvbuf < tcp_prot.sysctl_rmem[2] &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_memory_pressure &&
atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0])
!tcp_prot.memory_pressure &&
atomic_read(&tcp_prot.memory_allocated) < tcp_prot.sysctl_mem[0])
sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
sysctl_tcp_rmem[2]);
tcp_prot.sysctl_rmem[2]);
}
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
app_win += ofo_win;
......@@ -477,7 +477,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
while (tcp_win_from_space(rcvmem) < tp->advmss)
rcvmem += 128;
space *= rcvmem;
space = min(space, sysctl_tcp_rmem[2]);
space = min(space, tcp_prot.sysctl_rmem[2]);
if (space > sk->sk_rcvbuf) {
sk->sk_rcvbuf = space;
......@@ -535,7 +535,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_opt *tp, struct sk_b
* restart window, so that we send ACKs quickly.
*/
tcp_incr_quickack(tp);
tcp_mem_reclaim(sk);
sk_stream_mem_reclaim(sk);
}
}
tp->ack.lrcvtime = now;
......@@ -3166,7 +3166,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
__skb_queue_purge(&tp->out_of_order_queue);
if (tp->sack_ok)
tcp_sack_reset(tp);
tcp_mem_reclaim(sk);
sk_stream_mem_reclaim(sk);
if (!sock_flag(sk, SOCK_DEAD)) {
sk->sk_state_change(sk);
......@@ -3401,7 +3401,7 @@ static void tcp_ofo_queue(struct sock *sk)
static inline int tcp_rmem_schedule(struct sock *sk, struct sk_buff *skb)
{
return (int)skb->truesize <= sk->sk_forward_alloc ||
tcp_mem_schedule(sk, skb->truesize, 1);
sk_stream_mem_schedule(sk, skb->truesize, 1);
}
static int tcp_prune_queue(struct sock *sk);
......@@ -3768,14 +3768,14 @@ static int tcp_prune_queue(struct sock *sk)
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
tcp_clamp_window(sk, tp);
else if (tcp_memory_pressure)
else if (tcp_prot.memory_pressure)
tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
tcp_collapse_ofo_queue(sk);
tcp_collapse(sk, sk->sk_receive_queue.next,
(struct sk_buff*)&sk->sk_receive_queue,
tp->copied_seq, tp->rcv_nxt);
tcp_mem_reclaim(sk);
sk_stream_mem_reclaim(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
return 0;
......@@ -3796,7 +3796,7 @@ static int tcp_prune_queue(struct sock *sk)
*/
if (tp->sack_ok)
tcp_sack_reset(tp);
tcp_mem_reclaim(sk);
sk_stream_mem_reclaim(sk);
}
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
......@@ -3848,15 +3848,15 @@ static void tcp_new_space(struct sock *sk)
if (tp->packets_out < tp->snd_cwnd &&
!(sk->sk_userlocks & SOCK_SNDBUF_LOCK) &&
!tcp_memory_pressure &&
atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
!tcp_prot.memory_pressure &&
atomic_read(&tcp_prot.memory_allocated) < tcp_prot.sysctl_mem[0]) {
int sndmem = max_t(u32, tp->mss_clamp, tp->mss_cache) +
MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
demanded = max_t(unsigned int, tp->snd_cwnd,
tp->reordering + 1);
sndmem *= 2*demanded;
if (sndmem > sk->sk_sndbuf)
sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
sk->sk_sndbuf = min(sndmem, tcp_prot.sysctl_wmem[2]);
tp->snd_cwnd_stamp = tcp_time_stamp;
}
......
......@@ -2086,10 +2086,10 @@ static int tcp_v4_init_sock(struct sock *sk)
tp->af_specific = &ipv4_specific;
sk->sk_sndbuf = sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
sk->sk_sndbuf = tcp_prot.sysctl_wmem[1];
sk->sk_rcvbuf = tcp_prot.sysctl_rmem[1];
atomic_inc(&tcp_sockets_allocated);
atomic_inc(&tcp_prot.sockets_allocated);
return 0;
}
......@@ -2113,11 +2113,7 @@ static int tcp_v4_destroy_sock(struct sock *sk)
if (tp->bind_hash)
tcp_put_port(sk);
/* If sendmsg cached page exists, toss it. */
if (inet_sk(sk)->sndmsg_page)
__free_page(inet_sk(sk)->sndmsg_page);
atomic_dec(&tcp_sockets_allocated);
atomic_dec(&tcp_prot.sockets_allocated);
return 0;
}
......@@ -2603,6 +2599,9 @@ struct proto tcp_prot = {
.hash = tcp_v4_hash,
.unhash = tcp_unhash,
.get_port = tcp_v4_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
.sysctl_wmem = { 4 * 1024, 16 * 1024, 128 * 1024 },
.sysctl_rmem = { 4 * 1024, 87380, 87380 * 2 },
};
......
......@@ -801,7 +801,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
#ifdef INET_REFCNT_DEBUG
atomic_inc(&inet_sock_nr);
#endif
atomic_inc(&tcp_sockets_allocated);
atomic_inc(&tcp_prot.sockets_allocated);
if (sock_flag(newsk, SOCK_KEEPOPEN))
tcp_reset_keepalive_timer(newsk,
......
......@@ -672,7 +672,7 @@ u32 __tcp_select_window(struct sock *sk)
if (free_space < full_space/2) {
tp->ack.quick = 0;
if (tcp_memory_pressure)
if (tcp_prot.memory_pressure)
tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss);
if (free_space < mss)
......
......@@ -113,7 +113,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset)
if (orphans >= sysctl_tcp_max_orphans ||
(sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
atomic_read(&tcp_prot.memory_allocated) > tcp_prot.sysctl_mem[2])) {
if (net_ratelimit())
printk(KERN_INFO "Out of socket memory\n");
......@@ -217,7 +217,7 @@ static void tcp_delack_timer(unsigned long data)
goto out_unlock;
}
tcp_mem_reclaim(sk);
sk_stream_mem_reclaim(sk);
if (sk->sk_state == TCP_CLOSE || !(tp->ack.pending & TCP_ACK_TIMER))
goto out;
......@@ -257,8 +257,8 @@ static void tcp_delack_timer(unsigned long data)
TCP_CHECK_TIMER(sk);
out:
if (tcp_memory_pressure)
tcp_mem_reclaim(sk);
if (tcp_prot.memory_pressure)
sk_stream_mem_reclaim(sk);
out_unlock:
bh_unlock_sock(sk);
sock_put(sk);
......@@ -448,7 +448,7 @@ static void tcp_write_timer(unsigned long data)
TCP_CHECK_TIMER(sk);
out:
tcp_mem_reclaim(sk);
sk_stream_mem_reclaim(sk);
out_unlock:
bh_unlock_sock(sk);
sock_put(sk);
......@@ -633,7 +633,7 @@ static void tcp_keepalive_timer (unsigned long data)
}
TCP_CHECK_TIMER(sk);
tcp_mem_reclaim(sk);
sk_stream_mem_reclaim(sk);
resched:
tcp_reset_keepalive_timer (sk, elapsed);
......
......@@ -167,7 +167,7 @@ static struct xfrm_type ipip_type = {
.output = ipip_output
};
static struct inet_protocol ipip_protocol = {
static struct net_protocol ipip_protocol = {
.handler = ipip_rcv,
.err_handler = ipip_err,
.no_policy = 1,
......
......@@ -852,8 +852,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse
np->cork.hop_limit = hlimit;
inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst);
inet->cork.length = 0;
inet->sndmsg_page = NULL;
inet->sndmsg_off = 0;
sk->sk_sndmsg_page = NULL;
sk->sk_sndmsg_off = 0;
exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
length += exthdrlen;
transhdrlen += exthdrlen;
......@@ -969,8 +969,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse
} else {
int i = skb_shinfo(skb)->nr_frags;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
struct page *page = inet->sndmsg_page;
int off = inet->sndmsg_off;
struct page *page = sk->sk_sndmsg_page;
int off = sk->sk_sndmsg_off;
unsigned int left;
if (page && (left = PAGE_SIZE - off) > 0) {
......@@ -982,7 +982,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse
goto error;
}
get_page(page);
skb_fill_page_desc(skb, i, page, inet->sndmsg_off, 0);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
} else if(i < MAX_SKB_FRAGS) {
......@@ -993,8 +993,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse
err = -ENOMEM;
goto error;
}
inet->sndmsg_page = page;
inet->sndmsg_off = 0;
sk->sk_sndmsg_page = page;
sk->sk_sndmsg_off = 0;
skb_fill_page_desc(skb, i, page, 0, 0);
frag = &skb_shinfo(skb)->frags[i];
......@@ -1008,7 +1008,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse
err = -EFAULT;
goto error;
}
inet->sndmsg_off += copy;
sk->sk_sndmsg_off += copy;
frag->size += copy;
skb->len += copy;
skb->data_len += copy;
......
......@@ -788,7 +788,7 @@ int __init ipip6_fb_tunnel_init(struct net_device *dev)
return 0;
}
static struct inet_protocol sit_protocol = {
static struct net_protocol sit_protocol = {
.handler = ipip6_rcv,
.err_handler = ipip6_err,
};
......
......@@ -1882,10 +1882,10 @@ static int tcp_v6_init_sock(struct sock *sk)
sk->sk_write_space = sk_stream_write_space;
sk->sk_use_write_queue = 1;
sk->sk_sndbuf = sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
sk->sk_sndbuf = tcp_prot.sysctl_wmem[1];
sk->sk_rcvbuf = tcp_prot.sysctl_rmem[1];
atomic_inc(&tcp_sockets_allocated);
atomic_inc(&tcp_prot.sockets_allocated);
return 0;
}
......@@ -1893,7 +1893,6 @@ static int tcp_v6_init_sock(struct sock *sk)
static int tcp_v6_destroy_sock(struct sock *sk)
{
struct tcp_opt *tp = tcp_sk(sk);
struct inet_opt *inet = inet_sk(sk);
tcp_clear_xmit_timers(sk);
......@@ -1910,11 +1909,7 @@ static int tcp_v6_destroy_sock(struct sock *sk)
if (tcp_sk(sk)->bind_hash)
tcp_put_port(sk);
/* If sendmsg cached page exists, toss it. */
if (inet->sndmsg_page != NULL)
__free_page(inet->sndmsg_page);
atomic_dec(&tcp_sockets_allocated);
atomic_dec(&tcp_prot.sockets_allocated);
return inet6_destroy_sock(sk);
}
......
......@@ -875,7 +875,7 @@ static struct inet_protosw sctp_stream_protosw = {
};
/* Register with IP layer. */
static struct inet_protocol sctp_protocol = {
static struct net_protocol sctp_protocol = {
.handler = sctp_rcv,
.err_handler = sctp_v4_err,
.no_policy = 1,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment