Commit 4890b686 authored by Eric Dumazet's avatar Eric Dumazet Committed by Jakub Kicinski

net: keep sk->sk_forward_alloc as small as possible

Currently, tcp_memory_allocated can hit tcp_mem[] limits quite fast.

Each TCP socket can forward allocate up to 2 MB of memory, even after
flow became less active.

10,000 sockets can have reserved 20 GB of memory,
and we have no shrinker in place to reclaim that.

Instead of trying to reclaim the extra allocations in some places,
just keep sk->sk_forward_alloc values as small as possible.

This should not impact performance too much now we have per-cpu
reserves: Changes to tcp_memory_allocated should not be too frequent.

For sockets not using SO_RESERVE_MEM:
 - idle sockets (no packets in tx/rx queues) have zero forward alloc.
 - non idle sockets have a forward alloc smaller than one page.

Note:

 - Removal of SK_RECLAIM_CHUNK and SK_RECLAIM_THRESHOLD
   is left to MPTCP maintainers as a follow up.
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarShakeel Butt <shakeelb@google.com>
Acked-by: default avatarSoheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 7c80b038
...@@ -1627,19 +1627,6 @@ static inline void sk_mem_reclaim_final(struct sock *sk) ...@@ -1627,19 +1627,6 @@ static inline void sk_mem_reclaim_final(struct sock *sk)
sk_mem_reclaim(sk); sk_mem_reclaim(sk);
} }
static inline void sk_mem_reclaim_partial(struct sock *sk)
{
int reclaimable;
if (!sk_has_account(sk))
return;
reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk);
if (reclaimable > (int)PAGE_SIZE)
__sk_mem_reclaim(sk, reclaimable - 1);
}
static inline void sk_mem_charge(struct sock *sk, int size) static inline void sk_mem_charge(struct sock *sk, int size)
{ {
if (!sk_has_account(sk)) if (!sk_has_account(sk))
...@@ -1647,29 +1634,17 @@ static inline void sk_mem_charge(struct sock *sk, int size) ...@@ -1647,29 +1634,17 @@ static inline void sk_mem_charge(struct sock *sk, int size)
sk->sk_forward_alloc -= size; sk->sk_forward_alloc -= size;
} }
/* the following macros control memory reclaiming in sk_mem_uncharge() /* the following macros control memory reclaiming in mptcp_rmem_uncharge()
*/ */
#define SK_RECLAIM_THRESHOLD (1 << 21) #define SK_RECLAIM_THRESHOLD (1 << 21)
#define SK_RECLAIM_CHUNK (1 << 20) #define SK_RECLAIM_CHUNK (1 << 20)
static inline void sk_mem_uncharge(struct sock *sk, int size) static inline void sk_mem_uncharge(struct sock *sk, int size)
{ {
int reclaimable;
if (!sk_has_account(sk)) if (!sk_has_account(sk))
return; return;
sk->sk_forward_alloc += size; sk->sk_forward_alloc += size;
reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk); sk_mem_reclaim(sk);
/* Avoid a possible overflow.
* TCP send queues can make this happen, if sk_mem_reclaim()
* is not called and more than 2 GBytes are released at once.
*
* If we reach 2 MBytes, reclaim 1 MBytes right now, there is
* no need to hold that much forward allocation anyway.
*/
if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD))
__sk_mem_reclaim(sk, SK_RECLAIM_CHUNK);
} }
/* /*
......
...@@ -320,7 +320,6 @@ EXPORT_SYMBOL(skb_recv_datagram); ...@@ -320,7 +320,6 @@ EXPORT_SYMBOL(skb_recv_datagram);
void skb_free_datagram(struct sock *sk, struct sk_buff *skb) void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
{ {
consume_skb(skb); consume_skb(skb);
sk_mem_reclaim_partial(sk);
} }
EXPORT_SYMBOL(skb_free_datagram); EXPORT_SYMBOL(skb_free_datagram);
...@@ -336,7 +335,6 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len) ...@@ -336,7 +335,6 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
slow = lock_sock_fast(sk); slow = lock_sock_fast(sk);
sk_peek_offset_bwd(sk, len); sk_peek_offset_bwd(sk, len);
skb_orphan(skb); skb_orphan(skb);
sk_mem_reclaim_partial(sk);
unlock_sock_fast(sk, slow); unlock_sock_fast(sk, slow);
/* skb is now orphaned, can be freed outside of locked section */ /* skb is now orphaned, can be freed outside of locked section */
...@@ -396,7 +394,6 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) ...@@ -396,7 +394,6 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
NULL); NULL);
kfree_skb(skb); kfree_skb(skb);
sk_mem_reclaim_partial(sk);
return err; return err;
} }
EXPORT_SYMBOL(skb_kill_datagram); EXPORT_SYMBOL(skb_kill_datagram);
......
...@@ -858,9 +858,6 @@ struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, ...@@ -858,9 +858,6 @@ struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
{ {
struct sk_buff *skb; struct sk_buff *skb;
if (unlikely(tcp_under_memory_pressure(sk)))
sk_mem_reclaim_partial(sk);
skb = alloc_skb_fclone(size + MAX_TCP_HEADER, gfp); skb = alloc_skb_fclone(size + MAX_TCP_HEADER, gfp);
if (likely(skb)) { if (likely(skb)) {
bool mem_scheduled; bool mem_scheduled;
...@@ -2764,8 +2761,6 @@ void __tcp_close(struct sock *sk, long timeout) ...@@ -2764,8 +2761,6 @@ void __tcp_close(struct sock *sk, long timeout)
__kfree_skb(skb); __kfree_skb(skb);
} }
sk_mem_reclaim(sk);
/* If socket has been already reset (e.g. in tcp_reset()) - kill it. */ /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */
if (sk->sk_state == TCP_CLOSE) if (sk->sk_state == TCP_CLOSE)
goto adjudge_to_death; goto adjudge_to_death;
...@@ -2873,7 +2868,6 @@ void __tcp_close(struct sock *sk, long timeout) ...@@ -2873,7 +2868,6 @@ void __tcp_close(struct sock *sk, long timeout)
} }
} }
if (sk->sk_state != TCP_CLOSE) { if (sk->sk_state != TCP_CLOSE) {
sk_mem_reclaim(sk);
if (tcp_check_oom(sk, 0)) { if (tcp_check_oom(sk, 0)) {
tcp_set_state(sk, TCP_CLOSE); tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC); tcp_send_active_reset(sk, GFP_ATOMIC);
...@@ -2951,7 +2945,6 @@ void tcp_write_queue_purge(struct sock *sk) ...@@ -2951,7 +2945,6 @@ void tcp_write_queue_purge(struct sock *sk)
} }
tcp_rtx_queue_purge(sk); tcp_rtx_queue_purge(sk);
INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue); INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
sk_mem_reclaim(sk);
tcp_clear_all_retrans_hints(tcp_sk(sk)); tcp_clear_all_retrans_hints(tcp_sk(sk));
tcp_sk(sk)->packets_out = 0; tcp_sk(sk)->packets_out = 0;
inet_csk(sk)->icsk_backoff = 0; inet_csk(sk)->icsk_backoff = 0;
......
...@@ -805,7 +805,6 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) ...@@ -805,7 +805,6 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
* restart window, so that we send ACKs quickly. * restart window, so that we send ACKs quickly.
*/ */
tcp_incr_quickack(sk, TCP_MAX_QUICKACKS); tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
sk_mem_reclaim(sk);
} }
} }
icsk->icsk_ack.lrcvtime = now; icsk->icsk_ack.lrcvtime = now;
...@@ -4390,7 +4389,6 @@ void tcp_fin(struct sock *sk) ...@@ -4390,7 +4389,6 @@ void tcp_fin(struct sock *sk)
skb_rbtree_purge(&tp->out_of_order_queue); skb_rbtree_purge(&tp->out_of_order_queue);
if (tcp_is_sack(tp)) if (tcp_is_sack(tp))
tcp_sack_reset(&tp->rx_opt); tcp_sack_reset(&tp->rx_opt);
sk_mem_reclaim(sk);
if (!sock_flag(sk, SOCK_DEAD)) { if (!sock_flag(sk, SOCK_DEAD)) {
sk->sk_state_change(sk); sk->sk_state_change(sk);
...@@ -5336,7 +5334,6 @@ static bool tcp_prune_ofo_queue(struct sock *sk) ...@@ -5336,7 +5334,6 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
tcp_drop_reason(sk, rb_to_skb(node), tcp_drop_reason(sk, rb_to_skb(node),
SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE); SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE);
if (!prev || goal <= 0) { if (!prev || goal <= 0) {
sk_mem_reclaim(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
!tcp_under_memory_pressure(sk)) !tcp_under_memory_pressure(sk))
break; break;
...@@ -5383,7 +5380,6 @@ static int tcp_prune_queue(struct sock *sk) ...@@ -5383,7 +5380,6 @@ static int tcp_prune_queue(struct sock *sk)
skb_peek(&sk->sk_receive_queue), skb_peek(&sk->sk_receive_queue),
NULL, NULL,
tp->copied_seq, tp->rcv_nxt); tp->copied_seq, tp->rcv_nxt);
sk_mem_reclaim(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
return 0; return 0;
......
...@@ -290,15 +290,13 @@ void tcp_delack_timer_handler(struct sock *sk) ...@@ -290,15 +290,13 @@ void tcp_delack_timer_handler(struct sock *sk)
{ {
struct inet_connection_sock *icsk = inet_csk(sk); struct inet_connection_sock *icsk = inet_csk(sk);
sk_mem_reclaim_partial(sk);
if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
!(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
goto out; return;
if (time_after(icsk->icsk_ack.timeout, jiffies)) { if (time_after(icsk->icsk_ack.timeout, jiffies)) {
sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
goto out; return;
} }
icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
...@@ -317,10 +315,6 @@ void tcp_delack_timer_handler(struct sock *sk) ...@@ -317,10 +315,6 @@ void tcp_delack_timer_handler(struct sock *sk)
tcp_send_ack(sk); tcp_send_ack(sk);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS); __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
} }
out:
if (tcp_under_memory_pressure(sk))
sk_mem_reclaim(sk);
} }
...@@ -600,11 +594,11 @@ void tcp_write_timer_handler(struct sock *sk) ...@@ -600,11 +594,11 @@ void tcp_write_timer_handler(struct sock *sk)
if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
!icsk->icsk_pending) !icsk->icsk_pending)
goto out; return;
if (time_after(icsk->icsk_timeout, jiffies)) { if (time_after(icsk->icsk_timeout, jiffies)) {
sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
goto out; return;
} }
tcp_mstamp_refresh(tcp_sk(sk)); tcp_mstamp_refresh(tcp_sk(sk));
...@@ -626,9 +620,6 @@ void tcp_write_timer_handler(struct sock *sk) ...@@ -626,9 +620,6 @@ void tcp_write_timer_handler(struct sock *sk)
tcp_probe_timer(sk); tcp_probe_timer(sk);
break; break;
} }
out:
sk_mem_reclaim(sk);
} }
static void tcp_write_timer(struct timer_list *t) static void tcp_write_timer(struct timer_list *t)
...@@ -743,8 +734,6 @@ static void tcp_keepalive_timer (struct timer_list *t) ...@@ -743,8 +734,6 @@ static void tcp_keepalive_timer (struct timer_list *t)
elapsed = keepalive_time_when(tp) - elapsed; elapsed = keepalive_time_when(tp) - elapsed;
} }
sk_mem_reclaim(sk);
resched: resched:
inet_csk_reset_keepalive_timer (sk, elapsed); inet_csk_reset_keepalive_timer (sk, elapsed);
goto out; goto out;
......
...@@ -278,8 +278,6 @@ static void iucv_sock_destruct(struct sock *sk) ...@@ -278,8 +278,6 @@ static void iucv_sock_destruct(struct sock *sk)
skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_receive_queue);
skb_queue_purge(&sk->sk_error_queue); skb_queue_purge(&sk->sk_error_queue);
sk_mem_reclaim(sk);
if (!sock_flag(sk, SOCK_DEAD)) { if (!sock_flag(sk, SOCK_DEAD)) {
pr_err("Attempt to release alive iucv socket %p\n", sk); pr_err("Attempt to release alive iucv socket %p\n", sk);
return; return;
......
...@@ -975,7 +975,7 @@ static void __mptcp_mem_reclaim_partial(struct sock *sk) ...@@ -975,7 +975,7 @@ static void __mptcp_mem_reclaim_partial(struct sock *sk)
if (reclaimable > (int)PAGE_SIZE) if (reclaimable > (int)PAGE_SIZE)
__mptcp_rmem_reclaim(sk, reclaimable - 1); __mptcp_rmem_reclaim(sk, reclaimable - 1);
sk_mem_reclaim_partial(sk); sk_mem_reclaim(sk);
} }
static void mptcp_mem_reclaim_partial(struct sock *sk) static void mptcp_mem_reclaim_partial(struct sock *sk)
......
...@@ -6590,8 +6590,6 @@ static int sctp_eat_data(const struct sctp_association *asoc, ...@@ -6590,8 +6590,6 @@ static int sctp_eat_data(const struct sctp_association *asoc,
pr_debug("%s: under pressure, reneging for tsn:%u\n", pr_debug("%s: under pressure, reneging for tsn:%u\n",
__func__, tsn); __func__, tsn);
deliver = SCTP_CMD_RENEGE; deliver = SCTP_CMD_RENEGE;
} else {
sk_mem_reclaim(sk);
} }
} }
......
...@@ -1824,9 +1824,6 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, ...@@ -1824,9 +1824,6 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
if (sctp_wspace(asoc) < (int)msg_len) if (sctp_wspace(asoc) < (int)msg_len)
sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc)); sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
if (sk_under_memory_pressure(sk))
sk_mem_reclaim(sk);
if (sctp_wspace(asoc) <= 0 || !sk_wmem_schedule(sk, msg_len)) { if (sctp_wspace(asoc) <= 0 || !sk_wmem_schedule(sk, msg_len)) {
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
...@@ -9195,8 +9192,6 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, ...@@ -9195,8 +9192,6 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
goto do_error; goto do_error;
if (signal_pending(current)) if (signal_pending(current))
goto do_interrupted; goto do_interrupted;
if (sk_under_memory_pressure(sk))
sk_mem_reclaim(sk);
if ((int)msg_len <= sctp_wspace(asoc) && if ((int)msg_len <= sctp_wspace(asoc) &&
sk_wmem_schedule(sk, msg_len)) sk_wmem_schedule(sk, msg_len))
break; break;
......
...@@ -979,8 +979,6 @@ static void sctp_renege_events(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, ...@@ -979,8 +979,6 @@ static void sctp_renege_events(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
if (freed >= needed && sctp_ulpevent_idata(ulpq, chunk, gfp) <= 0) if (freed >= needed && sctp_ulpevent_idata(ulpq, chunk, gfp) <= 0)
sctp_intl_start_pd(ulpq, gfp); sctp_intl_start_pd(ulpq, gfp);
sk_mem_reclaim(asoc->base.sk);
} }
static void sctp_intl_stream_abort_pd(struct sctp_ulpq *ulpq, __u16 sid, static void sctp_intl_stream_abort_pd(struct sctp_ulpq *ulpq, __u16 sid,
......
...@@ -1100,12 +1100,8 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, ...@@ -1100,12 +1100,8 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
else if (retval == 1) else if (retval == 1)
sctp_ulpq_reasm_drain(ulpq); sctp_ulpq_reasm_drain(ulpq);
} }
sk_mem_reclaim(asoc->base.sk);
} }
/* Notify the application if an association is aborted and in /* Notify the application if an association is aborted and in
* partial delivery mode. Send up any pending received messages. * partial delivery mode. Send up any pending received messages.
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment