Commit 62748f32 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

net: introduce SO_MAX_PACING_RATE

As mentioned in commit afe4fd06 ("pkt_sched: fq: Fair Queue packet
scheduler"), this patch adds a new socket option.

SO_MAX_PACING_RATE offers the application the ability to cap the
rate computed by transport layer. Value is in bytes per second.

u32 val = 1000000;
setsockopt(sockfd, SOL_SOCKET, SO_MAX_PACING_RATE, &val, sizeof(val));

To be effectively paced, a flow must use FQ packet scheduler.

Note that a packet scheduler takes into account the headers for its
computations. The effective payload rate depends on MSS and retransmits
if any.

I chose to make this pacing rate a SOL_SOCKET option instead of a
TCP one because this can be used by other protocols.
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Steinar H. Gunderson <sesse@google.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 4aa0a03f
...@@ -81,6 +81,8 @@ ...@@ -81,6 +81,8 @@
#define SO_SELECT_ERR_QUEUE 45 #define SO_SELECT_ERR_QUEUE 45
#define SO_BUSY_POLL 46 #define SO_BUSY_POLL 46
#define SO_MAX_PACING_RATE 47
#endif /* _UAPI_ASM_SOCKET_H */ #endif /* _UAPI_ASM_SOCKET_H */
...@@ -76,4 +76,6 @@ ...@@ -76,4 +76,6 @@
#define SO_BUSY_POLL 46 #define SO_BUSY_POLL 46
#define SO_MAX_PACING_RATE 47
#endif /* __ASM_AVR32_SOCKET_H */ #endif /* __ASM_AVR32_SOCKET_H */
...@@ -78,6 +78,8 @@ ...@@ -78,6 +78,8 @@
#define SO_BUSY_POLL 46 #define SO_BUSY_POLL 46
#define SO_MAX_PACING_RATE 47
#endif /* _ASM_SOCKET_H */ #endif /* _ASM_SOCKET_H */
...@@ -76,5 +76,7 @@ ...@@ -76,5 +76,7 @@
#define SO_BUSY_POLL 46 #define SO_BUSY_POLL 46
#define SO_MAX_PACING_RATE 47
#endif /* _ASM_SOCKET_H */ #endif /* _ASM_SOCKET_H */
...@@ -76,4 +76,6 @@ ...@@ -76,4 +76,6 @@
#define SO_BUSY_POLL 46 #define SO_BUSY_POLL 46
#define SO_MAX_PACING_RATE 47
#endif /* _ASM_SOCKET_H */ #endif /* _ASM_SOCKET_H */
...@@ -85,4 +85,6 @@ ...@@ -85,4 +85,6 @@
#define SO_BUSY_POLL 46 #define SO_BUSY_POLL 46
#define SO_MAX_PACING_RATE 47
#endif /* _ASM_IA64_SOCKET_H */ #endif /* _ASM_IA64_SOCKET_H */
...@@ -76,4 +76,6 @@ ...@@ -76,4 +76,6 @@
#define SO_BUSY_POLL 46 #define SO_BUSY_POLL 46
#define SO_MAX_PACING_RATE 47
#endif /* _ASM_M32R_SOCKET_H */ #endif /* _ASM_M32R_SOCKET_H */
...@@ -94,4 +94,6 @@ ...@@ -94,4 +94,6 @@
#define SO_BUSY_POLL 46 #define SO_BUSY_POLL 46
#define SO_MAX_PACING_RATE 47
#endif /* _UAPI_ASM_SOCKET_H */ #endif /* _UAPI_ASM_SOCKET_H */
...@@ -76,4 +76,6 @@ ...@@ -76,4 +76,6 @@
#define SO_BUSY_POLL 46 #define SO_BUSY_POLL 46
#define SO_MAX_PACING_RATE 47
#endif /* _ASM_SOCKET_H */ #endif /* _ASM_SOCKET_H */
...@@ -75,6 +75,8 @@ ...@@ -75,6 +75,8 @@
#define SO_BUSY_POLL 0x4027 #define SO_BUSY_POLL 0x4027
#define SO_MAX_PACING_RATE 0x4048
/* O_NONBLOCK clashes with the bits used for socket types. Therefore we /* O_NONBLOCK clashes with the bits used for socket types. Therefore we
* have to define SOCK_NONBLOCK to a different value here. * have to define SOCK_NONBLOCK to a different value here.
*/ */
......
...@@ -83,4 +83,6 @@ ...@@ -83,4 +83,6 @@
#define SO_BUSY_POLL 46 #define SO_BUSY_POLL 46
#define SO_MAX_PACING_RATE 47
#endif /* _ASM_POWERPC_SOCKET_H */ #endif /* _ASM_POWERPC_SOCKET_H */
...@@ -82,4 +82,6 @@ ...@@ -82,4 +82,6 @@
#define SO_BUSY_POLL 46 #define SO_BUSY_POLL 46
#define SO_MAX_PACING_RATE 47
#endif /* _ASM_SOCKET_H */ #endif /* _ASM_SOCKET_H */
...@@ -72,6 +72,8 @@ ...@@ -72,6 +72,8 @@
#define SO_BUSY_POLL 0x0030 #define SO_BUSY_POLL 0x0030
#define SO_MAX_PACING_RATE 0x0031
/* Security levels - as per NRL IPv6 - don't actually do anything */ /* Security levels - as per NRL IPv6 - don't actually do anything */
#define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_AUTHENTICATION 0x5001
#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
......
...@@ -87,4 +87,6 @@ ...@@ -87,4 +87,6 @@
#define SO_BUSY_POLL 46 #define SO_BUSY_POLL 46
#define SO_MAX_PACING_RATE 47
#endif /* _XTENSA_SOCKET_H */ #endif /* _XTENSA_SOCKET_H */
...@@ -363,6 +363,7 @@ struct sock { ...@@ -363,6 +363,7 @@ struct sock {
int sk_wmem_queued; int sk_wmem_queued;
gfp_t sk_allocation; gfp_t sk_allocation;
u32 sk_pacing_rate; /* bytes per second */ u32 sk_pacing_rate; /* bytes per second */
u32 sk_max_pacing_rate;
netdev_features_t sk_route_caps; netdev_features_t sk_route_caps;
netdev_features_t sk_route_nocaps; netdev_features_t sk_route_nocaps;
int sk_gso_type; int sk_gso_type;
......
...@@ -78,4 +78,6 @@ ...@@ -78,4 +78,6 @@
#define SO_BUSY_POLL 46 #define SO_BUSY_POLL 46
#define SO_MAX_PACING_RATE 47
#endif /* __ASM_GENERIC_SOCKET_H */ #endif /* __ASM_GENERIC_SOCKET_H */
...@@ -914,6 +914,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname, ...@@ -914,6 +914,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
} }
break; break;
#endif #endif
case SO_MAX_PACING_RATE:
sk->sk_max_pacing_rate = val;
sk->sk_pacing_rate = min(sk->sk_pacing_rate,
sk->sk_max_pacing_rate);
break;
default: default:
ret = -ENOPROTOOPT; ret = -ENOPROTOOPT;
break; break;
...@@ -1177,6 +1184,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, ...@@ -1177,6 +1184,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break; break;
#endif #endif
case SO_MAX_PACING_RATE:
v.val = sk->sk_max_pacing_rate;
break;
default: default:
return -ENOPROTOOPT; return -ENOPROTOOPT;
} }
...@@ -2319,6 +2330,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) ...@@ -2319,6 +2330,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_ll_usec = sysctl_net_busy_read; sk->sk_ll_usec = sysctl_net_busy_read;
#endif #endif
sk->sk_max_pacing_rate = ~0U;
/* /*
* Before updating sk_refcnt, we must commit prior changes to memory * Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details) * (Documentation/RCU/rculist_nulls.txt for details)
......
...@@ -735,7 +735,7 @@ static void tcp_update_pacing_rate(struct sock *sk) ...@@ -735,7 +735,7 @@ static void tcp_update_pacing_rate(struct sock *sk)
if (tp->srtt > 8 + 2) if (tp->srtt > 8 + 2)
do_div(rate, tp->srtt); do_div(rate, tp->srtt);
sk->sk_pacing_rate = min_t(u64, rate, ~0U); sk->sk_pacing_rate = min_t(u64, rate, sk->sk_max_pacing_rate);
} }
/* Calculate rto without backoff. This is the second half of Van Jacobson's /* Calculate rto without backoff. This is the second half of Van Jacobson's
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment