Commit 8c2bd38b authored by Eric Dumazet's avatar Eric Dumazet Committed by Jakub Kicinski

icmp: change the order of rate limits

ICMP messages are ratelimited :

After the blamed commits, the two rate limiters are applied in this order:

1) host wide ratelimit (icmp_global_allow())

2) Per destination ratelimit (inetpeer based)

In order to avoid side-channels attacks, we need to apply
the per destination check first.

This patch makes the following change :

1) icmp_global_allow() checks if the host wide limit is reached.
   But credits are not yet consumed. This is deferred to 3)

2) The per destination limit is checked/updated.
   This might add a new node in inetpeer tree.

3) icmp_global_consume() consumes tokens if prior operations succeeded.

This means that host wide ratelimit is still effective
in keeping inetpeer tree small even under DDOS.

As a bonus, I removed icmp_global.lock as the fast path
can use a lock-free operation.

Fixes: c0303efe ("net: reduce cycles spend on ICMP replies that gets rate limited")
Fixes: 4cdf507d ("icmp: add a global rate limitation")
Reported-by: default avatarKeyu Man <keyu.man@email.ucr.edu>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarDavid Ahern <dsahern@kernel.org>
Cc: Jesper Dangaard Brouer <hawk@kernel.org>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20240829144641.3880376-2-edumazet@google.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent b26b6449
...@@ -795,6 +795,8 @@ static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) ...@@ -795,6 +795,8 @@ static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
} }
bool icmp_global_allow(void); bool icmp_global_allow(void);
void icmp_global_consume(void);
extern int sysctl_icmp_msgs_per_sec; extern int sysctl_icmp_msgs_per_sec;
extern int sysctl_icmp_msgs_burst; extern int sysctl_icmp_msgs_burst;
......
...@@ -224,57 +224,59 @@ int sysctl_icmp_msgs_per_sec __read_mostly = 1000; ...@@ -224,57 +224,59 @@ int sysctl_icmp_msgs_per_sec __read_mostly = 1000;
int sysctl_icmp_msgs_burst __read_mostly = 50; int sysctl_icmp_msgs_burst __read_mostly = 50;
static struct { static struct {
spinlock_t lock; atomic_t credit;
u32 credit;
u32 stamp; u32 stamp;
} icmp_global = { } icmp_global;
.lock = __SPIN_LOCK_UNLOCKED(icmp_global.lock),
};
/** /**
* icmp_global_allow - Are we allowed to send one more ICMP message ? * icmp_global_allow - Are we allowed to send one more ICMP message ?
* *
* Uses a token bucket to limit our ICMP messages to ~sysctl_icmp_msgs_per_sec. * Uses a token bucket to limit our ICMP messages to ~sysctl_icmp_msgs_per_sec.
* Returns false if we reached the limit and can not send another packet. * Returns false if we reached the limit and can not send another packet.
* Note: called with BH disabled * Works in tandem with icmp_global_consume().
*/ */
bool icmp_global_allow(void) bool icmp_global_allow(void)
{ {
u32 credit, delta, incr = 0, now = (u32)jiffies; u32 delta, now, oldstamp;
bool rc = false; int incr, new, old;
/* Check if token bucket is empty and cannot be refilled /* Note: many cpus could find this condition true.
* without taking the spinlock. The READ_ONCE() are paired * Then later icmp_global_consume() could consume more credits,
* with the following WRITE_ONCE() in this same function. * this is an acceptable race.
*/ */
if (!READ_ONCE(icmp_global.credit)) { if (atomic_read(&icmp_global.credit) > 0)
delta = min_t(u32, now - READ_ONCE(icmp_global.stamp), HZ); return true;
now = jiffies;
oldstamp = READ_ONCE(icmp_global.stamp);
delta = min_t(u32, now - oldstamp, HZ);
if (delta < HZ / 50) if (delta < HZ / 50)
return false; return false;
}
spin_lock(&icmp_global.lock);
delta = min_t(u32, now - icmp_global.stamp, HZ);
if (delta >= HZ / 50) {
incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ; incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ;
if (incr) if (!incr)
WRITE_ONCE(icmp_global.stamp, now); return false;
}
credit = min_t(u32, icmp_global.credit + incr, if (cmpxchg(&icmp_global.stamp, oldstamp, now) == oldstamp) {
READ_ONCE(sysctl_icmp_msgs_burst)); old = atomic_read(&icmp_global.credit);
if (credit) { do {
/* We want to use a credit of one in average, but need to randomize new = min(old + incr, READ_ONCE(sysctl_icmp_msgs_burst));
* it for security reasons. } while (!atomic_try_cmpxchg(&icmp_global.credit, &old, new));
*/
credit = max_t(int, credit - get_random_u32_below(3), 0);
rc = true;
} }
WRITE_ONCE(icmp_global.credit, credit); return true;
spin_unlock(&icmp_global.lock);
return rc;
} }
EXPORT_SYMBOL(icmp_global_allow); EXPORT_SYMBOL(icmp_global_allow);
void icmp_global_consume(void)
{
int credits = get_random_u32_below(3);
/* Note: this might make icmp_global.credit negative. */
if (credits)
atomic_sub(credits, &icmp_global.credit);
}
EXPORT_SYMBOL(icmp_global_consume);
static bool icmpv4_mask_allow(struct net *net, int type, int code) static bool icmpv4_mask_allow(struct net *net, int type, int code)
{ {
if (type > NR_ICMP_TYPES) if (type > NR_ICMP_TYPES)
...@@ -291,14 +293,16 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code) ...@@ -291,14 +293,16 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code)
return false; return false;
} }
static bool icmpv4_global_allow(struct net *net, int type, int code) static bool icmpv4_global_allow(struct net *net, int type, int code,
bool *apply_ratelimit)
{ {
if (icmpv4_mask_allow(net, type, code)) if (icmpv4_mask_allow(net, type, code))
return true; return true;
if (icmp_global_allow()) if (icmp_global_allow()) {
*apply_ratelimit = true;
return true; return true;
}
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL); __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
return false; return false;
} }
...@@ -308,15 +312,16 @@ static bool icmpv4_global_allow(struct net *net, int type, int code) ...@@ -308,15 +312,16 @@ static bool icmpv4_global_allow(struct net *net, int type, int code)
*/ */
static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
struct flowi4 *fl4, int type, int code) struct flowi4 *fl4, int type, int code,
bool apply_ratelimit)
{ {
struct dst_entry *dst = &rt->dst; struct dst_entry *dst = &rt->dst;
struct inet_peer *peer; struct inet_peer *peer;
bool rc = true; bool rc = true;
int vif; int vif;
if (icmpv4_mask_allow(net, type, code)) if (!apply_ratelimit)
goto out; return true;
/* No rate limit on loopback */ /* No rate limit on loopback */
if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
...@@ -331,6 +336,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, ...@@ -331,6 +336,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
out: out:
if (!rc) if (!rc)
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST); __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST);
else
icmp_global_consume();
return rc; return rc;
} }
...@@ -402,6 +409,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) ...@@ -402,6 +409,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
struct ipcm_cookie ipc; struct ipcm_cookie ipc;
struct rtable *rt = skb_rtable(skb); struct rtable *rt = skb_rtable(skb);
struct net *net = dev_net(rt->dst.dev); struct net *net = dev_net(rt->dst.dev);
bool apply_ratelimit = false;
struct flowi4 fl4; struct flowi4 fl4;
struct sock *sk; struct sock *sk;
struct inet_sock *inet; struct inet_sock *inet;
...@@ -413,11 +421,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) ...@@ -413,11 +421,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb)) if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb))
return; return;
/* Needed by both icmp_global_allow and icmp_xmit_lock */ /* Needed by both icmpv4_global_allow and icmp_xmit_lock */
local_bh_disable(); local_bh_disable();
/* global icmp_msgs_per_sec */ /* is global icmp_msgs_per_sec exhausted ? */
if (!icmpv4_global_allow(net, type, code)) if (!icmpv4_global_allow(net, type, code, &apply_ratelimit))
goto out_bh_enable; goto out_bh_enable;
sk = icmp_xmit_lock(net); sk = icmp_xmit_lock(net);
...@@ -450,7 +458,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) ...@@ -450,7 +458,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
rt = ip_route_output_key(net, &fl4); rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt)) if (IS_ERR(rt))
goto out_unlock; goto out_unlock;
if (icmpv4_xrlim_allow(net, rt, &fl4, type, code)) if (icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit))
icmp_push_reply(sk, icmp_param, &fl4, &ipc, &rt); icmp_push_reply(sk, icmp_param, &fl4, &ipc, &rt);
ip_rt_put(rt); ip_rt_put(rt);
out_unlock: out_unlock:
...@@ -596,6 +604,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, ...@@ -596,6 +604,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
int room; int room;
struct icmp_bxm icmp_param; struct icmp_bxm icmp_param;
struct rtable *rt = skb_rtable(skb_in); struct rtable *rt = skb_rtable(skb_in);
bool apply_ratelimit = false;
struct ipcm_cookie ipc; struct ipcm_cookie ipc;
struct flowi4 fl4; struct flowi4 fl4;
__be32 saddr; __be32 saddr;
...@@ -677,7 +686,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, ...@@ -677,7 +686,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
} }
} }
/* Needed by both icmp_global_allow and icmp_xmit_lock */ /* Needed by both icmpv4_global_allow and icmp_xmit_lock */
local_bh_disable(); local_bh_disable();
/* Check global sysctl_icmp_msgs_per_sec ratelimit, unless /* Check global sysctl_icmp_msgs_per_sec ratelimit, unless
...@@ -685,7 +694,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, ...@@ -685,7 +694,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
* loopback, then peer ratelimit still work (in icmpv4_xrlim_allow) * loopback, then peer ratelimit still work (in icmpv4_xrlim_allow)
*/ */
if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) && if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) &&
!icmpv4_global_allow(net, type, code)) !icmpv4_global_allow(net, type, code, &apply_ratelimit))
goto out_bh_enable; goto out_bh_enable;
sk = icmp_xmit_lock(net); sk = icmp_xmit_lock(net);
...@@ -744,7 +753,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, ...@@ -744,7 +753,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
goto out_unlock; goto out_unlock;
/* peer icmp_ratelimit */ /* peer icmp_ratelimit */
if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code)) if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit))
goto ende; goto ende;
/* RFC says return as much as we can without exceeding 576 bytes. */ /* RFC says return as much as we can without exceeding 576 bytes. */
......
...@@ -175,14 +175,16 @@ static bool icmpv6_mask_allow(struct net *net, int type) ...@@ -175,14 +175,16 @@ static bool icmpv6_mask_allow(struct net *net, int type)
return false; return false;
} }
static bool icmpv6_global_allow(struct net *net, int type) static bool icmpv6_global_allow(struct net *net, int type,
bool *apply_ratelimit)
{ {
if (icmpv6_mask_allow(net, type)) if (icmpv6_mask_allow(net, type))
return true; return true;
if (icmp_global_allow()) if (icmp_global_allow()) {
*apply_ratelimit = true;
return true; return true;
}
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL); __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
return false; return false;
} }
...@@ -191,13 +193,13 @@ static bool icmpv6_global_allow(struct net *net, int type) ...@@ -191,13 +193,13 @@ static bool icmpv6_global_allow(struct net *net, int type)
* Check the ICMP output rate limit * Check the ICMP output rate limit
*/ */
static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
struct flowi6 *fl6) struct flowi6 *fl6, bool apply_ratelimit)
{ {
struct net *net = sock_net(sk); struct net *net = sock_net(sk);
struct dst_entry *dst; struct dst_entry *dst;
bool res = false; bool res = false;
if (icmpv6_mask_allow(net, type)) if (!apply_ratelimit)
return true; return true;
/* /*
...@@ -228,6 +230,8 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, ...@@ -228,6 +230,8 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
if (!res) if (!res)
__ICMP6_INC_STATS(net, ip6_dst_idev(dst), __ICMP6_INC_STATS(net, ip6_dst_idev(dst),
ICMP6_MIB_RATELIMITHOST); ICMP6_MIB_RATELIMITHOST);
else
icmp_global_consume();
dst_release(dst); dst_release(dst);
return res; return res;
} }
...@@ -452,6 +456,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, ...@@ -452,6 +456,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
struct net *net; struct net *net;
struct ipv6_pinfo *np; struct ipv6_pinfo *np;
const struct in6_addr *saddr = NULL; const struct in6_addr *saddr = NULL;
bool apply_ratelimit = false;
struct dst_entry *dst; struct dst_entry *dst;
struct icmp6hdr tmp_hdr; struct icmp6hdr tmp_hdr;
struct flowi6 fl6; struct flowi6 fl6;
...@@ -533,11 +538,12 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, ...@@ -533,11 +538,12 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
return; return;
} }
/* Needed by both icmp_global_allow and icmpv6_xmit_lock */ /* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */
local_bh_disable(); local_bh_disable();
/* Check global sysctl_icmp_msgs_per_sec ratelimit */ /* Check global sysctl_icmp_msgs_per_sec ratelimit */
if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type)) if (!(skb->dev->flags & IFF_LOOPBACK) &&
!icmpv6_global_allow(net, type, &apply_ratelimit))
goto out_bh_enable; goto out_bh_enable;
mip6_addr_swap(skb, parm); mip6_addr_swap(skb, parm);
...@@ -575,7 +581,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, ...@@ -575,7 +581,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
np = inet6_sk(sk); np = inet6_sk(sk);
if (!icmpv6_xrlim_allow(sk, type, &fl6)) if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit))
goto out; goto out;
tmp_hdr.icmp6_type = type; tmp_hdr.icmp6_type = type;
...@@ -717,6 +723,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) ...@@ -717,6 +723,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
struct ipv6_pinfo *np; struct ipv6_pinfo *np;
const struct in6_addr *saddr = NULL; const struct in6_addr *saddr = NULL;
struct icmp6hdr *icmph = icmp6_hdr(skb); struct icmp6hdr *icmph = icmp6_hdr(skb);
bool apply_ratelimit = false;
struct icmp6hdr tmp_hdr; struct icmp6hdr tmp_hdr;
struct flowi6 fl6; struct flowi6 fl6;
struct icmpv6_msg msg; struct icmpv6_msg msg;
...@@ -781,8 +788,9 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) ...@@ -781,8 +788,9 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
goto out; goto out;
/* Check the ratelimit */ /* Check the ratelimit */
if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) || if ((!(skb->dev->flags & IFF_LOOPBACK) &&
!icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6)) !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, &apply_ratelimit)) ||
!icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6, apply_ratelimit))
goto out_dst_release; goto out_dst_release;
idev = __in6_dev_get(skb->dev); idev = __in6_dev_get(skb->dev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment