Commit d1a4c0b3 authored by Glauber Costa's avatar Glauber Costa Committed by David S. Miller

tcp memory pressure controls

This patch introduces memory pressure controls for the tcp
protocol. It uses the generic socket memory pressure code
introduced in earlier patches, and fills in the
necessary data in cg_proto struct.
Signed-off-by: default avatarGlauber Costa <glommer@parallels.com>
Reviewed-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujtisu.com>
CC: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent e1aab161
......@@ -293,6 +293,8 @@ to trigger slab reclaim when those limits are reached.
thresholds. The Memory Controller allows them to be controlled individually
per cgroup, instead of globally.
* tcp memory pressure: sockets memory pressure for the tcp protocol.
3. User Interface
0. Configuration
......
......@@ -86,6 +86,7 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm);
extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
extern struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont);
static inline
int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
......
......@@ -64,6 +64,8 @@
#include <net/dst.h>
#include <net/checksum.h>
int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss);
void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss);
/*
* This structure really needs to be cleaned up.
* Most of it is for TCP, and not used by any of
......
#ifndef _TCP_MEMCG_H
#define _TCP_MEMCG_H
struct tcp_memcontrol {
struct cg_proto cg_proto;
/* per-cgroup tcp memory pressure knobs */
struct res_counter tcp_memory_allocated;
struct percpu_counter tcp_sockets_allocated;
/* those two are read-mostly, leave them at the end */
long tcp_prot_mem[3];
int tcp_memory_pressure;
};
struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg);
int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss);
void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss);
#endif /* _TCP_MEMCG_H */
......@@ -50,6 +50,8 @@
#include <linux/cpu.h>
#include <linux/oom.h>
#include "internal.h"
#include <net/sock.h>
#include <net/tcp_memcontrol.h>
#include <asm/uaccess.h>
......@@ -295,6 +297,10 @@ struct mem_cgroup {
*/
struct mem_cgroup_stat_cpu nocpu_base;
spinlock_t pcp_counter_lock;
#ifdef CONFIG_INET
struct tcp_memcontrol tcp_mem;
#endif
};
/* Stuffs for move charges at task migration. */
......@@ -384,6 +390,7 @@ static void mem_cgroup_put(struct mem_cgroup *memcg);
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
#ifdef CONFIG_INET
#include <net/sock.h>
#include <net/ip.h>
static bool mem_cgroup_is_root(struct mem_cgroup *memcg);
void sock_update_memcg(struct sock *sk)
......@@ -418,6 +425,15 @@ void sock_release_memcg(struct sock *sk)
mem_cgroup_put(memcg);
}
}
struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg)
{
if (!memcg || mem_cgroup_is_root(memcg))
return NULL;
return &memcg->tcp_mem.cg_proto;
}
EXPORT_SYMBOL(tcp_proto_cgroup);
#endif /* CONFIG_INET */
#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */
......@@ -800,7 +816,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
preempt_enable();
}
static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
{
return container_of(cgroup_subsys_state(cont,
mem_cgroup_subsys_id), struct mem_cgroup,
......@@ -4732,14 +4748,34 @@ static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss)
ret = cgroup_add_files(cont, ss, kmem_cgroup_files,
ARRAY_SIZE(kmem_cgroup_files));
/*
* Part of this would be better living in a separate allocation
* function, leaving us with just the cgroup tree population work.
* We, however, depend on state such as network's proto_list that
* is only initialized after cgroup creation. I found the less
* cumbersome way to deal with it to defer it all to populate time
*/
if (!ret)
ret = mem_cgroup_sockets_init(cont, ss);
return ret;
};
static void kmem_cgroup_destroy(struct cgroup_subsys *ss,
struct cgroup *cont)
{
mem_cgroup_sockets_destroy(cont, ss);
}
#else
static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss)
{
return 0;
}
static void kmem_cgroup_destroy(struct cgroup_subsys *ss,
struct cgroup *cont)
{
}
#endif
static struct cftype mem_cgroup_files[] = {
......@@ -5098,6 +5134,8 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss,
{
struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
kmem_cgroup_destroy(ss, cont);
mem_cgroup_put(memcg);
}
......
......@@ -136,6 +136,46 @@
#include <net/tcp.h>
#endif
static DEFINE_RWLOCK(proto_list_lock);
static LIST_HEAD(proto_list);
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss)
{
struct proto *proto;
int ret = 0;
read_lock(&proto_list_lock);
list_for_each_entry(proto, &proto_list, node) {
if (proto->init_cgroup) {
ret = proto->init_cgroup(cgrp, ss);
if (ret)
goto out;
}
}
read_unlock(&proto_list_lock);
return ret;
out:
list_for_each_entry_continue_reverse(proto, &proto_list, node)
if (proto->destroy_cgroup)
proto->destroy_cgroup(cgrp, ss);
read_unlock(&proto_list_lock);
return ret;
}
void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss)
{
struct proto *proto;
read_lock(&proto_list_lock);
list_for_each_entry_reverse(proto, &proto_list, node)
if (proto->destroy_cgroup)
proto->destroy_cgroup(cgrp, ss);
read_unlock(&proto_list_lock);
}
#endif
/*
* Each address family might have different locking rules, so we have
* one slock key per address family:
......@@ -2291,9 +2331,6 @@ void sk_common_release(struct sock *sk)
}
EXPORT_SYMBOL(sk_common_release);
static DEFINE_RWLOCK(proto_list_lock);
static LIST_HEAD(proto_list);
#ifdef CONFIG_PROC_FS
#define PROTO_INUSE_NR 64 /* should be enough for the first time */
struct prot_inuse {
......
......@@ -48,6 +48,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
......
......@@ -73,6 +73,7 @@
#include <net/xfrm.h>
#include <net/netdma.h>
#include <net/secure_seq.h>
#include <net/tcp_memcontrol.h>
#include <linux/inet.h>
#include <linux/ipv6.h>
......@@ -1917,6 +1918,7 @@ static int tcp_v4_init_sock(struct sock *sk)
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
local_bh_disable();
sock_update_memcg(sk);
sk_sockets_allocated_inc(sk);
local_bh_enable();
......@@ -1974,6 +1976,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
}
sk_sockets_allocated_dec(sk);
sock_release_memcg(sk);
}
EXPORT_SYMBOL(tcp_v4_destroy_sock);
......@@ -2634,10 +2637,14 @@ struct proto tcp_prot = {
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
.init_cgroup = tcp_init_cgroup,
.destroy_cgroup = tcp_destroy_cgroup,
.proto_cgroup = tcp_proto_cgroup,
#endif
};
EXPORT_SYMBOL(tcp_prot);
static int __net_init tcp_sk_init(struct net *net)
{
return inet_ctl_sock_create(&net->ipv4.tcp_sock,
......
#include <net/tcp.h>
#include <net/tcp_memcontrol.h>
#include <net/sock.h>
#include <linux/memcontrol.h>
#include <linux/module.h>
static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto)
{
return container_of(cg_proto, struct tcp_memcontrol, cg_proto);
}
static void memcg_tcp_enter_memory_pressure(struct sock *sk)
{
if (!sk->sk_cgrp->memory_pressure)
*sk->sk_cgrp->memory_pressure = 1;
}
EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure);
int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
{
/*
* The root cgroup does not use res_counters, but rather,
* rely on the data already collected by the network
* subsystem
*/
struct res_counter *res_parent = NULL;
struct cg_proto *cg_proto, *parent_cg;
struct tcp_memcontrol *tcp;
struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
struct mem_cgroup *parent = parent_mem_cgroup(memcg);
cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
return 0;
tcp = tcp_from_cgproto(cg_proto);
tcp->tcp_prot_mem[0] = sysctl_tcp_mem[0];
tcp->tcp_prot_mem[1] = sysctl_tcp_mem[1];
tcp->tcp_prot_mem[2] = sysctl_tcp_mem[2];
tcp->tcp_memory_pressure = 0;
parent_cg = tcp_prot.proto_cgroup(parent);
if (parent_cg)
res_parent = parent_cg->memory_allocated;
res_counter_init(&tcp->tcp_memory_allocated, res_parent);
percpu_counter_init(&tcp->tcp_sockets_allocated, 0);
cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure;
cg_proto->memory_pressure = &tcp->tcp_memory_pressure;
cg_proto->sysctl_mem = tcp->tcp_prot_mem;
cg_proto->memory_allocated = &tcp->tcp_memory_allocated;
cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated;
cg_proto->memcg = memcg;
return 0;
}
EXPORT_SYMBOL(tcp_init_cgroup);
void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
{
struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
struct cg_proto *cg_proto;
struct tcp_memcontrol *tcp;
cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
return;
tcp = tcp_from_cgproto(cg_proto);
percpu_counter_destroy(&tcp->tcp_sockets_allocated);
}
EXPORT_SYMBOL(tcp_destroy_cgroup);
......@@ -62,6 +62,7 @@
#include <net/netdma.h>
#include <net/inet_common.h>
#include <net/secure_seq.h>
#include <net/tcp_memcontrol.h>
#include <asm/uaccess.h>
......@@ -1994,6 +1995,7 @@ static int tcp_v6_init_sock(struct sock *sk)
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
local_bh_disable();
sock_update_memcg(sk);
sk_sockets_allocated_inc(sk);
local_bh_enable();
......@@ -2227,6 +2229,9 @@ struct proto tcpv6_prot = {
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
.proto_cgroup = tcp_proto_cgroup,
#endif
};
static const struct inet6_protocol tcpv6_protocol = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment