net-memcg: pass in gfp_t mask to mem_cgroup_charge_skmem()

Add gfp_t mask as an input parameter to mem_cgroup_charge_skmem(), to give more control to the networking stack and enable it to change memcg charging behavior. In the future, the networking stack may decide to avoid oom-kills when fallbacks are more appropriate. One behavior change in mem_cgroup_charge_skmem() by this patch is to avoid force charging by default and let the caller decide when and if force charging is needed through the presence or absence of __GFP_NOFAIL. Signed-off-by: Wei Wang <weiwan@google.com> Reviewed-by: Shakeel Butt <shakeelb@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>

net-memcg: pass in gfp_t mask to mem_cgroup_charge_skmem()
Add gfp_t mask as an input parameter to mem_cgroup_charge_skmem(), to give more control to the networking stack and enable it to change memcg charging behavior. In the future, the networking stack may decide to avoid oom-kills when fallbacks are more appropriate. One behavior change in mem_cgroup_charge_skmem() by this patch is to avoid force charging by default and let the caller decide when and if force charging is needed through the presence or absence of __GFP_NOFAIL. Signed-off-by: Wei Wang <weiwan@google.com> Reviewed-by: Shakeel Butt <shakeelb@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
4b1327be · Wei Wang · David S. Miller · ab44035d · 4b1327be · 4b1327be
Commit 4b1327be authored Aug 17, 2021 by Wei Wang Committed by David S. Miller Aug 18, 2021
6 changed files
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1581,7 +1581,8 @@ static inline void mem_cgroup_flush_foreign(struct bdi_writeback *wb)
 #endif	/* CONFIG_CGROUP_WRITEBACK */
 struct sock;
-bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
+bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
+			     gfp_t gfp_mask);
 void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
 #ifdef CONFIG_MEMCG
 extern struct static_key_false memcg_sockets_enabled_key;

--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2400,6 +2400,11 @@ static inline gfp_t gfp_any(void)
 	return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
 }
+static inline gfp_t gfp_memcg_charge(void)
+{
+	return in_softirq() ? GFP_NOWAIT : GFP_KERNEL;
+}
 static inline long sock_rcvtimeo(const struct sock *sk, bool noblock)
 {
 	return noblock ? 0 : sk->sk_rcvtimeo;

--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -7048,14 +7048,14 @@ void mem_cgroup_sk_free(struct sock *sk)
 * mem_cgroup_charge_skmem - charge socket memory
 * @memcg: memcg to charge
 * @nr_pages: number of pages to charge
+ * @gfp_mask: reclaim mode
 *
 * Charges @nr_pages to @memcg. Returns %true if the charge fit within
- * @memcg's configured limit, %false if the charge had to be forced.
+ * @memcg's configured limit, %false if it doesn't.
 */
-bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
+bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
+			     gfp_t gfp_mask)
 {
-	gfp_t gfp_mask = GFP_KERNEL;
 	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
 		struct page_counter *fail;
@@ -7063,21 +7063,19 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
 			memcg->tcpmem_pressure = 0;
 			return true;
 		}
-		page_counter_charge(&memcg->tcpmem, nr_pages);
 		memcg->tcpmem_pressure = 1;
+		if (gfp_mask & __GFP_NOFAIL) {
+			page_counter_charge(&memcg->tcpmem, nr_pages);
+			return true;
+		}
 		return false;
 	}
-	/* Don't block in the packet receive path */
+	if (try_charge(memcg, gfp_mask, nr_pages) == 0) {
-	if (in_softirq())
-		gfp_mask = GFP_NOWAIT;
 		mod_memcg_state(memcg, MEMCG_SOCK, nr_pages);
-	if (try_charge(memcg, gfp_mask, nr_pages) == 0)
 		return true;
+	}
-	try_charge(memcg, gfp_mask|__GFP_NOFAIL, nr_pages);
 	return false;
 }

--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2728,10 +2728,12 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
 {
 	struct proto *prot = sk->sk_prot;
 	long allocated = sk_memory_allocated_add(sk, amt);
+	bool memcg_charge = mem_cgroup_sockets_enabled && sk->sk_memcg;
 	bool charged = true;
-	if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
+	if (memcg_charge &&
-	    !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt)))
+	    !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt,
+						gfp_memcg_charge())))
 		goto suppress_allocation;
 	/* Under limit. */
@@ -2785,16 +2787,22 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
 		/* Fail only if socket is _under_ its sndbuf.
 		 * In this case we cannot block, so that we have to fail.
 		 */
-		if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
+		if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) {
+			/* Force charge with __GFP_NOFAIL */
+			if (memcg_charge && !charged) {
+				mem_cgroup_charge_skmem(sk->sk_memcg, amt,
+					gfp_memcg_charge() | __GFP_NOFAIL);
+			}
 			return 1;
 		}
+	}
 	if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
 		trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
 	sk_memory_allocated_sub(sk, amt);
-	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+	if (memcg_charge && charged)
 		mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
 	return 0;

--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -534,7 +534,8 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
 				   atomic_read(&newsk->sk_rmem_alloc));
 		mem_cgroup_sk_alloc(newsk);
 		if (newsk->sk_memcg && amt)
-			mem_cgroup_charge_skmem(newsk->sk_memcg, amt);
+			mem_cgroup_charge_skmem(newsk->sk_memcg, amt,
+						GFP_KERNEL | __GFP_NOFAIL);
 		release_sock(newsk);
 	}

--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3373,7 +3373,8 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
 	sk_memory_allocated_add(sk, amt);
 	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
-		mem_cgroup_charge_skmem(sk->sk_memcg, amt);
+		mem_cgroup_charge_skmem(sk->sk_memcg, amt,
+					gfp_memcg_charge() | __GFP_NOFAIL);
 }
 /* Send a FIN. The caller locks the socket for us.