Commit e805605c authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds

net: tcp_memcontrol: sanitize tcp memory accounting callbacks

There won't be a tcp control soft limit, so integrating the memcg code
into the global skmem limiting scheme complicates things unnecessarily.
Replace this with simple and clear charge and uncharge calls--hidden
behind a jump label--to account skb memory.

Note that this is not purely aesthetic: as a result of shoehorning the
per-memcg code into the same memory accounting functions that handle the
global level, the old code would compare the per-memcg consumption
against the smaller of the per-memcg limit and the global limit.  This
allowed the total consumption of multiple sockets to exceed the global
limit, as long as the individual sockets stayed within bounds.  After
this change, the code will always compare the per-memcg consumption to
the per-memcg limit, and the global consumption to the global limit, and
thus close this loophole.

Without a soft limit, the per-memcg memory pressure state in sockets is
generally questionable.  However, we did it until now, so we continue to
enter it when the hard limit is hit, and packets are dropped, to let
other sockets in the cgroup know that they shouldn't grow their transmit
windows, either.  However, keep it simple in the new callback model and
leave memory pressure lazily when the next packet is accepted (as
opposed to doing it synchroneously when packets are processed).  When
packets are dropped, network performance will already be in the toilet,
so that should be a reasonable trade-off.

As described above, consumption is now checked on the per-memcg level
and the global level separately.  Likewise, memory pressure states are
maintained on both the per-memcg level and the global level, and a
socket is considered under pressure when either level asserts as much.
Signed-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Reviewed-by: default avatarVladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: default avatarDavid S. Miller <davem@davemloft.net>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 80f23124
...@@ -660,12 +660,6 @@ void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx) ...@@ -660,12 +660,6 @@ void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
} }
#endif /* CONFIG_MEMCG */ #endif /* CONFIG_MEMCG */
enum {
UNDER_LIMIT,
SOFT_LIMIT,
OVER_LIMIT,
};
#ifdef CONFIG_CGROUP_WRITEBACK #ifdef CONFIG_CGROUP_WRITEBACK
struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg); struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg);
...@@ -694,6 +688,19 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb, ...@@ -694,6 +688,19 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
struct sock; struct sock;
void sock_update_memcg(struct sock *sk); void sock_update_memcg(struct sock *sk);
void sock_release_memcg(struct sock *sk); void sock_release_memcg(struct sock *sk);
bool mem_cgroup_charge_skmem(struct cg_proto *proto, unsigned int nr_pages);
void mem_cgroup_uncharge_skmem(struct cg_proto *proto, unsigned int nr_pages);
#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
static inline bool mem_cgroup_under_socket_pressure(struct cg_proto *proto)
{
return proto->memory_pressure;
}
#else
static inline bool mem_cgroup_under_pressure(struct cg_proto *proto)
{
return false;
}
#endif
#ifdef CONFIG_MEMCG_KMEM #ifdef CONFIG_MEMCG_KMEM
extern struct static_key memcg_kmem_enabled_key; extern struct static_key memcg_kmem_enabled_key;
......
...@@ -1129,8 +1129,9 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) ...@@ -1129,8 +1129,9 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
if (!sk->sk_prot->memory_pressure) if (!sk->sk_prot->memory_pressure)
return false; return false;
if (mem_cgroup_sockets_enabled && sk->sk_cgrp) if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
return !!sk->sk_cgrp->memory_pressure; mem_cgroup_under_socket_pressure(sk->sk_cgrp))
return true;
return !!*sk->sk_prot->memory_pressure; return !!*sk->sk_prot->memory_pressure;
} }
...@@ -1144,9 +1145,6 @@ static inline void sk_leave_memory_pressure(struct sock *sk) ...@@ -1144,9 +1145,6 @@ static inline void sk_leave_memory_pressure(struct sock *sk)
if (*memory_pressure) if (*memory_pressure)
*memory_pressure = 0; *memory_pressure = 0;
if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
sk->sk_cgrp->memory_pressure = 0;
} }
static inline void sk_enter_memory_pressure(struct sock *sk) static inline void sk_enter_memory_pressure(struct sock *sk)
...@@ -1154,76 +1152,30 @@ static inline void sk_enter_memory_pressure(struct sock *sk) ...@@ -1154,76 +1152,30 @@ static inline void sk_enter_memory_pressure(struct sock *sk)
if (!sk->sk_prot->enter_memory_pressure) if (!sk->sk_prot->enter_memory_pressure)
return; return;
if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
sk->sk_cgrp->memory_pressure = 1;
sk->sk_prot->enter_memory_pressure(sk); sk->sk_prot->enter_memory_pressure(sk);
} }
static inline long sk_prot_mem_limits(const struct sock *sk, int index) static inline long sk_prot_mem_limits(const struct sock *sk, int index)
{ {
long limit = sk->sk_prot->sysctl_mem[index]; return sk->sk_prot->sysctl_mem[index];
if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
limit = min_t(long, limit, sk->sk_cgrp->memory_allocated.limit);
return limit;
}
static inline void memcg_memory_allocated_add(struct cg_proto *prot,
unsigned long amt,
int *parent_status)
{
struct page_counter *counter;
if (page_counter_try_charge(&prot->memory_allocated, amt, &counter))
return;
page_counter_charge(&prot->memory_allocated, amt);
*parent_status = OVER_LIMIT;
}
static inline void memcg_memory_allocated_sub(struct cg_proto *prot,
unsigned long amt)
{
page_counter_uncharge(&prot->memory_allocated, amt);
} }
static inline long static inline long
sk_memory_allocated(const struct sock *sk) sk_memory_allocated(const struct sock *sk)
{ {
struct proto *prot = sk->sk_prot; return atomic_long_read(sk->sk_prot->memory_allocated);
if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
return page_counter_read(&sk->sk_cgrp->memory_allocated);
return atomic_long_read(prot->memory_allocated);
} }
static inline long static inline long
sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status) sk_memory_allocated_add(struct sock *sk, int amt)
{ {
struct proto *prot = sk->sk_prot; return atomic_long_add_return(amt, sk->sk_prot->memory_allocated);
if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
memcg_memory_allocated_add(sk->sk_cgrp, amt, parent_status);
/* update the root cgroup regardless */
atomic_long_add_return(amt, prot->memory_allocated);
return page_counter_read(&sk->sk_cgrp->memory_allocated);
}
return atomic_long_add_return(amt, prot->memory_allocated);
} }
static inline void static inline void
sk_memory_allocated_sub(struct sock *sk, int amt) sk_memory_allocated_sub(struct sock *sk, int amt)
{ {
struct proto *prot = sk->sk_prot; atomic_long_sub(amt, sk->sk_prot->memory_allocated);
if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
memcg_memory_allocated_sub(sk->sk_cgrp, amt);
atomic_long_sub(amt, prot->memory_allocated);
} }
static inline void sk_sockets_allocated_dec(struct sock *sk) static inline void sk_sockets_allocated_dec(struct sock *sk)
......
...@@ -289,8 +289,9 @@ extern int tcp_memory_pressure; ...@@ -289,8 +289,9 @@ extern int tcp_memory_pressure;
/* optimized version of sk_under_memory_pressure() for TCP sockets */ /* optimized version of sk_under_memory_pressure() for TCP sockets */
static inline bool tcp_under_memory_pressure(const struct sock *sk) static inline bool tcp_under_memory_pressure(const struct sock *sk)
{ {
if (mem_cgroup_sockets_enabled && sk->sk_cgrp) if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
return !!sk->sk_cgrp->memory_pressure; mem_cgroup_under_socket_pressure(sk->sk_cgrp))
return true;
return tcp_memory_pressure; return tcp_memory_pressure;
} }
......
...@@ -338,6 +338,38 @@ struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg) ...@@ -338,6 +338,38 @@ struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg)
} }
EXPORT_SYMBOL(tcp_proto_cgroup); EXPORT_SYMBOL(tcp_proto_cgroup);
/**
* mem_cgroup_charge_skmem - charge socket memory
* @proto: proto to charge
* @nr_pages: number of pages to charge
*
* Charges @nr_pages to @proto. Returns %true if the charge fit within
* @proto's configured limit, %false if the charge had to be forced.
*/
bool mem_cgroup_charge_skmem(struct cg_proto *proto, unsigned int nr_pages)
{
struct page_counter *counter;
if (page_counter_try_charge(&proto->memory_allocated,
nr_pages, &counter)) {
proto->memory_pressure = 0;
return true;
}
page_counter_charge(&proto->memory_allocated, nr_pages);
proto->memory_pressure = 1;
return false;
}
/**
* mem_cgroup_uncharge_skmem - uncharge socket memory
* @proto - proto to uncharge
* @nr_pages - number of pages to uncharge
*/
void mem_cgroup_uncharge_skmem(struct cg_proto *proto, unsigned int nr_pages)
{
page_counter_uncharge(&proto->memory_allocated, nr_pages);
}
#endif #endif
#ifdef CONFIG_MEMCG_KMEM #ifdef CONFIG_MEMCG_KMEM
......
...@@ -2084,27 +2084,27 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) ...@@ -2084,27 +2084,27 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
struct proto *prot = sk->sk_prot; struct proto *prot = sk->sk_prot;
int amt = sk_mem_pages(size); int amt = sk_mem_pages(size);
long allocated; long allocated;
int parent_status = UNDER_LIMIT;
sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
allocated = sk_memory_allocated_add(sk, amt, &parent_status); allocated = sk_memory_allocated_add(sk, amt);
if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
!mem_cgroup_charge_skmem(sk->sk_cgrp, amt))
goto suppress_allocation;
/* Under limit. */ /* Under limit. */
if (parent_status == UNDER_LIMIT && if (allocated <= sk_prot_mem_limits(sk, 0)) {
allocated <= sk_prot_mem_limits(sk, 0)) {
sk_leave_memory_pressure(sk); sk_leave_memory_pressure(sk);
return 1; return 1;
} }
/* Under pressure. (we or our parents) */ /* Under pressure. */
if ((parent_status > SOFT_LIMIT) || if (allocated > sk_prot_mem_limits(sk, 1))
allocated > sk_prot_mem_limits(sk, 1))
sk_enter_memory_pressure(sk); sk_enter_memory_pressure(sk);
/* Over hard limit (we or our parents) */ /* Over hard limit. */
if ((parent_status == OVER_LIMIT) || if (allocated > sk_prot_mem_limits(sk, 2))
(allocated > sk_prot_mem_limits(sk, 2)))
goto suppress_allocation; goto suppress_allocation;
/* guarantee minimum buffer size under pressure */ /* guarantee minimum buffer size under pressure */
...@@ -2153,6 +2153,9 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) ...@@ -2153,6 +2153,9 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
sk_memory_allocated_sub(sk, amt); sk_memory_allocated_sub(sk, amt);
if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
mem_cgroup_uncharge_skmem(sk->sk_cgrp, amt);
return 0; return 0;
} }
EXPORT_SYMBOL(__sk_mem_schedule); EXPORT_SYMBOL(__sk_mem_schedule);
...@@ -2168,6 +2171,9 @@ void __sk_mem_reclaim(struct sock *sk, int amount) ...@@ -2168,6 +2171,9 @@ void __sk_mem_reclaim(struct sock *sk, int amount)
sk_memory_allocated_sub(sk, amount); sk_memory_allocated_sub(sk, amount);
sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT; sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
mem_cgroup_uncharge_skmem(sk->sk_cgrp, amount);
if (sk_under_memory_pressure(sk) && if (sk_under_memory_pressure(sk) &&
(sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
sk_leave_memory_pressure(sk); sk_leave_memory_pressure(sk);
......
...@@ -2813,13 +2813,16 @@ void tcp_xmit_retransmit_queue(struct sock *sk) ...@@ -2813,13 +2813,16 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
*/ */
void sk_forced_mem_schedule(struct sock *sk, int size) void sk_forced_mem_schedule(struct sock *sk, int size)
{ {
int amt, status; int amt;
if (size <= sk->sk_forward_alloc) if (size <= sk->sk_forward_alloc)
return; return;
amt = sk_mem_pages(size); amt = sk_mem_pages(size);
sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
sk_memory_allocated_add(sk, amt, &status); sk_memory_allocated_add(sk, amt);
if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
mem_cgroup_charge_skmem(sk->sk_cgrp, amt);
} }
/* Send a FIN. The caller locks the socket for us. /* Send a FIN. The caller locks the socket for us.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment