Commit 789ed80a authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'icmp-avoid-possible-side-channels-attacks'

Eric Dumazet says:

====================
icmp: avoid possible side-channels attacks

Keyu Man reminded us that linux ICMP rate limiting was still allowing
side-channels attacks.

Quoting the fine document [1]:

4.4 Private Source Port Scan Method
...
 We can then use the same global ICMP rate limit as a side
 channel to infer if such an ICMP message has been triggered. At
 first glance, this method can work but at a low speed of one port
 per second, due to the per-IP rate limit on ICMP messages.
 Surprisingly, after we analyze the source code of the ICMP rate
 limit implementation, we find that the global rate limit is checked
 prior to the per-IP rate limit. This means that even if the per-IP
 rate limit may eventually determine that no ICMP reply should be
 sent, a packet is still subjected to the global rate limit check and one
 token is deducted. Ironically, such a decision is consciously made
 by Linux developers to avoid invoking the expensive check of the
 per-IP rate limit [ 22], involving a search process to locate the per-IP
 data structure.
 This effectively means that the per-IP rate limit can be disre-
 garded for the purpose of our side channel based scan, as it only
 determines if the final ICMP reply is generated but has nothing to
 do with the global rate limit counter decrement. As a result, we can
 continue to use roughly the same scan method as efficient as before,
 achieving 1,000 ports per second
...

This series :

1) Changes the order of the two rate limiters to fix the issue.

2-3) Make the 'host-wide' rate limiter a per-netns one.

[1]
Link: https://dl.acm.org/doi/pdf/10.1145/3372297.3417280
====================

Link: https://patch.msgid.link/20240829144641.3880376-1-edumazet@google.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents b26b6449 f17bf505
...@@ -794,9 +794,8 @@ static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) ...@@ -794,9 +794,8 @@ static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
ip_cmsg_recv_offset(msg, skb->sk, skb, 0, 0); ip_cmsg_recv_offset(msg, skb->sk, skb, 0, 0);
} }
bool icmp_global_allow(void); bool icmp_global_allow(struct net *net);
extern int sysctl_icmp_msgs_per_sec; void icmp_global_consume(struct net *net);
extern int sysctl_icmp_msgs_burst;
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
int ip_misc_proc_init(void); int ip_misc_proc_init(void);
......
...@@ -122,7 +122,10 @@ struct netns_ipv4 { ...@@ -122,7 +122,10 @@ struct netns_ipv4 {
u8 sysctl_icmp_errors_use_inbound_ifaddr; u8 sysctl_icmp_errors_use_inbound_ifaddr;
int sysctl_icmp_ratelimit; int sysctl_icmp_ratelimit;
int sysctl_icmp_ratemask; int sysctl_icmp_ratemask;
int sysctl_icmp_msgs_per_sec;
int sysctl_icmp_msgs_burst;
atomic_t icmp_global_credit;
u32 icmp_global_stamp;
u32 ip_rt_min_pmtu; u32 ip_rt_min_pmtu;
int ip_rt_mtu_expires; int ip_rt_mtu_expires;
int ip_rt_min_advmss; int ip_rt_min_advmss;
......
...@@ -220,61 +220,56 @@ static inline void icmp_xmit_unlock(struct sock *sk) ...@@ -220,61 +220,56 @@ static inline void icmp_xmit_unlock(struct sock *sk)
spin_unlock(&sk->sk_lock.slock); spin_unlock(&sk->sk_lock.slock);
} }
int sysctl_icmp_msgs_per_sec __read_mostly = 1000;
int sysctl_icmp_msgs_burst __read_mostly = 50;
static struct {
spinlock_t lock;
u32 credit;
u32 stamp;
} icmp_global = {
.lock = __SPIN_LOCK_UNLOCKED(icmp_global.lock),
};
/** /**
* icmp_global_allow - Are we allowed to send one more ICMP message ? * icmp_global_allow - Are we allowed to send one more ICMP message ?
* @net: network namespace
* *
* Uses a token bucket to limit our ICMP messages to ~sysctl_icmp_msgs_per_sec. * Uses a token bucket to limit our ICMP messages to ~sysctl_icmp_msgs_per_sec.
* Returns false if we reached the limit and can not send another packet. * Returns false if we reached the limit and can not send another packet.
* Note: called with BH disabled * Works in tandem with icmp_global_consume().
*/ */
bool icmp_global_allow(void) bool icmp_global_allow(struct net *net)
{ {
u32 credit, delta, incr = 0, now = (u32)jiffies; u32 delta, now, oldstamp;
bool rc = false; int incr, new, old;
/* Check if token bucket is empty and cannot be refilled /* Note: many cpus could find this condition true.
* without taking the spinlock. The READ_ONCE() are paired * Then later icmp_global_consume() could consume more credits,
* with the following WRITE_ONCE() in this same function. * this is an acceptable race.
*/ */
if (!READ_ONCE(icmp_global.credit)) { if (atomic_read(&net->ipv4.icmp_global_credit) > 0)
delta = min_t(u32, now - READ_ONCE(icmp_global.stamp), HZ); return true;
if (delta < HZ / 50)
return false;
}
spin_lock(&icmp_global.lock); now = jiffies;
delta = min_t(u32, now - icmp_global.stamp, HZ); oldstamp = READ_ONCE(net->ipv4.icmp_global_stamp);
if (delta >= HZ / 50) { delta = min_t(u32, now - oldstamp, HZ);
incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ; if (delta < HZ / 50)
if (incr) return false;
WRITE_ONCE(icmp_global.stamp, now);
} incr = READ_ONCE(net->ipv4.sysctl_icmp_msgs_per_sec) * delta / HZ;
credit = min_t(u32, icmp_global.credit + incr, if (!incr)
READ_ONCE(sysctl_icmp_msgs_burst)); return false;
if (credit) {
/* We want to use a credit of one in average, but need to randomize if (cmpxchg(&net->ipv4.icmp_global_stamp, oldstamp, now) == oldstamp) {
* it for security reasons. old = atomic_read(&net->ipv4.icmp_global_credit);
*/ do {
credit = max_t(int, credit - get_random_u32_below(3), 0); new = min(old + incr, READ_ONCE(net->ipv4.sysctl_icmp_msgs_burst));
rc = true; } while (!atomic_try_cmpxchg(&net->ipv4.icmp_global_credit, &old, new));
} }
WRITE_ONCE(icmp_global.credit, credit); return true;
spin_unlock(&icmp_global.lock);
return rc;
} }
EXPORT_SYMBOL(icmp_global_allow); EXPORT_SYMBOL(icmp_global_allow);
void icmp_global_consume(struct net *net)
{
int credits = get_random_u32_below(3);
/* Note: this might make icmp_global.credit negative. */
if (credits)
atomic_sub(credits, &net->ipv4.icmp_global_credit);
}
EXPORT_SYMBOL(icmp_global_consume);
static bool icmpv4_mask_allow(struct net *net, int type, int code) static bool icmpv4_mask_allow(struct net *net, int type, int code)
{ {
if (type > NR_ICMP_TYPES) if (type > NR_ICMP_TYPES)
...@@ -291,14 +286,16 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code) ...@@ -291,14 +286,16 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code)
return false; return false;
} }
static bool icmpv4_global_allow(struct net *net, int type, int code) static bool icmpv4_global_allow(struct net *net, int type, int code,
bool *apply_ratelimit)
{ {
if (icmpv4_mask_allow(net, type, code)) if (icmpv4_mask_allow(net, type, code))
return true; return true;
if (icmp_global_allow()) if (icmp_global_allow(net)) {
*apply_ratelimit = true;
return true; return true;
}
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL); __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
return false; return false;
} }
...@@ -308,15 +305,16 @@ static bool icmpv4_global_allow(struct net *net, int type, int code) ...@@ -308,15 +305,16 @@ static bool icmpv4_global_allow(struct net *net, int type, int code)
*/ */
static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
struct flowi4 *fl4, int type, int code) struct flowi4 *fl4, int type, int code,
bool apply_ratelimit)
{ {
struct dst_entry *dst = &rt->dst; struct dst_entry *dst = &rt->dst;
struct inet_peer *peer; struct inet_peer *peer;
bool rc = true; bool rc = true;
int vif; int vif;
if (icmpv4_mask_allow(net, type, code)) if (!apply_ratelimit)
goto out; return true;
/* No rate limit on loopback */ /* No rate limit on loopback */
if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
...@@ -331,6 +329,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, ...@@ -331,6 +329,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
out: out:
if (!rc) if (!rc)
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST); __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST);
else
icmp_global_consume(net);
return rc; return rc;
} }
...@@ -402,6 +402,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) ...@@ -402,6 +402,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
struct ipcm_cookie ipc; struct ipcm_cookie ipc;
struct rtable *rt = skb_rtable(skb); struct rtable *rt = skb_rtable(skb);
struct net *net = dev_net(rt->dst.dev); struct net *net = dev_net(rt->dst.dev);
bool apply_ratelimit = false;
struct flowi4 fl4; struct flowi4 fl4;
struct sock *sk; struct sock *sk;
struct inet_sock *inet; struct inet_sock *inet;
...@@ -413,11 +414,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) ...@@ -413,11 +414,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb)) if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb))
return; return;
/* Needed by both icmp_global_allow and icmp_xmit_lock */ /* Needed by both icmpv4_global_allow and icmp_xmit_lock */
local_bh_disable(); local_bh_disable();
/* global icmp_msgs_per_sec */ /* is global icmp_msgs_per_sec exhausted ? */
if (!icmpv4_global_allow(net, type, code)) if (!icmpv4_global_allow(net, type, code, &apply_ratelimit))
goto out_bh_enable; goto out_bh_enable;
sk = icmp_xmit_lock(net); sk = icmp_xmit_lock(net);
...@@ -450,7 +451,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) ...@@ -450,7 +451,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
rt = ip_route_output_key(net, &fl4); rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt)) if (IS_ERR(rt))
goto out_unlock; goto out_unlock;
if (icmpv4_xrlim_allow(net, rt, &fl4, type, code)) if (icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit))
icmp_push_reply(sk, icmp_param, &fl4, &ipc, &rt); icmp_push_reply(sk, icmp_param, &fl4, &ipc, &rt);
ip_rt_put(rt); ip_rt_put(rt);
out_unlock: out_unlock:
...@@ -596,6 +597,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, ...@@ -596,6 +597,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
int room; int room;
struct icmp_bxm icmp_param; struct icmp_bxm icmp_param;
struct rtable *rt = skb_rtable(skb_in); struct rtable *rt = skb_rtable(skb_in);
bool apply_ratelimit = false;
struct ipcm_cookie ipc; struct ipcm_cookie ipc;
struct flowi4 fl4; struct flowi4 fl4;
__be32 saddr; __be32 saddr;
...@@ -677,7 +679,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, ...@@ -677,7 +679,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
} }
} }
/* Needed by both icmp_global_allow and icmp_xmit_lock */ /* Needed by both icmpv4_global_allow and icmp_xmit_lock */
local_bh_disable(); local_bh_disable();
/* Check global sysctl_icmp_msgs_per_sec ratelimit, unless /* Check global sysctl_icmp_msgs_per_sec ratelimit, unless
...@@ -685,7 +687,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, ...@@ -685,7 +687,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
* loopback, then peer ratelimit still work (in icmpv4_xrlim_allow) * loopback, then peer ratelimit still work (in icmpv4_xrlim_allow)
*/ */
if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) && if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) &&
!icmpv4_global_allow(net, type, code)) !icmpv4_global_allow(net, type, code, &apply_ratelimit))
goto out_bh_enable; goto out_bh_enable;
sk = icmp_xmit_lock(net); sk = icmp_xmit_lock(net);
...@@ -744,7 +746,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, ...@@ -744,7 +746,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
goto out_unlock; goto out_unlock;
/* peer icmp_ratelimit */ /* peer icmp_ratelimit */
if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code)) if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit))
goto ende; goto ende;
/* RFC says return as much as we can without exceeding 576 bytes. */ /* RFC says return as much as we can without exceeding 576 bytes. */
...@@ -1487,6 +1489,8 @@ static int __net_init icmp_sk_init(struct net *net) ...@@ -1487,6 +1489,8 @@ static int __net_init icmp_sk_init(struct net *net)
net->ipv4.sysctl_icmp_ratelimit = 1 * HZ; net->ipv4.sysctl_icmp_ratelimit = 1 * HZ;
net->ipv4.sysctl_icmp_ratemask = 0x1818; net->ipv4.sysctl_icmp_ratemask = 0x1818;
net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0; net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;
net->ipv4.sysctl_icmp_msgs_per_sec = 1000;
net->ipv4.sysctl_icmp_msgs_burst = 50;
return 0; return 0;
} }
......
...@@ -600,22 +600,6 @@ static struct ctl_table ipv4_table[] = { ...@@ -600,22 +600,6 @@ static struct ctl_table ipv4_table[] = {
.mode = 0444, .mode = 0444,
.proc_handler = proc_tcp_available_ulp, .proc_handler = proc_tcp_available_ulp,
}, },
{
.procname = "icmp_msgs_per_sec",
.data = &sysctl_icmp_msgs_per_sec,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
{
.procname = "icmp_msgs_burst",
.data = &sysctl_icmp_msgs_burst,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
{ {
.procname = "udp_mem", .procname = "udp_mem",
.data = &sysctl_udp_mem, .data = &sysctl_udp_mem,
...@@ -701,6 +685,22 @@ static struct ctl_table ipv4_net_table[] = { ...@@ -701,6 +685,22 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec .proc_handler = proc_dointvec
}, },
{
.procname = "icmp_msgs_per_sec",
.data = &init_net.ipv4.sysctl_icmp_msgs_per_sec,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
{
.procname = "icmp_msgs_burst",
.data = &init_net.ipv4.sysctl_icmp_msgs_burst,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
{ {
.procname = "ping_group_range", .procname = "ping_group_range",
.data = &init_net.ipv4.ping_group_range.range, .data = &init_net.ipv4.ping_group_range.range,
......
...@@ -175,14 +175,16 @@ static bool icmpv6_mask_allow(struct net *net, int type) ...@@ -175,14 +175,16 @@ static bool icmpv6_mask_allow(struct net *net, int type)
return false; return false;
} }
static bool icmpv6_global_allow(struct net *net, int type) static bool icmpv6_global_allow(struct net *net, int type,
bool *apply_ratelimit)
{ {
if (icmpv6_mask_allow(net, type)) if (icmpv6_mask_allow(net, type))
return true; return true;
if (icmp_global_allow()) if (icmp_global_allow(net)) {
*apply_ratelimit = true;
return true; return true;
}
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL); __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
return false; return false;
} }
...@@ -191,13 +193,13 @@ static bool icmpv6_global_allow(struct net *net, int type) ...@@ -191,13 +193,13 @@ static bool icmpv6_global_allow(struct net *net, int type)
* Check the ICMP output rate limit * Check the ICMP output rate limit
*/ */
static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
struct flowi6 *fl6) struct flowi6 *fl6, bool apply_ratelimit)
{ {
struct net *net = sock_net(sk); struct net *net = sock_net(sk);
struct dst_entry *dst; struct dst_entry *dst;
bool res = false; bool res = false;
if (icmpv6_mask_allow(net, type)) if (!apply_ratelimit)
return true; return true;
/* /*
...@@ -228,6 +230,8 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, ...@@ -228,6 +230,8 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
if (!res) if (!res)
__ICMP6_INC_STATS(net, ip6_dst_idev(dst), __ICMP6_INC_STATS(net, ip6_dst_idev(dst),
ICMP6_MIB_RATELIMITHOST); ICMP6_MIB_RATELIMITHOST);
else
icmp_global_consume(net);
dst_release(dst); dst_release(dst);
return res; return res;
} }
...@@ -452,6 +456,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, ...@@ -452,6 +456,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
struct net *net; struct net *net;
struct ipv6_pinfo *np; struct ipv6_pinfo *np;
const struct in6_addr *saddr = NULL; const struct in6_addr *saddr = NULL;
bool apply_ratelimit = false;
struct dst_entry *dst; struct dst_entry *dst;
struct icmp6hdr tmp_hdr; struct icmp6hdr tmp_hdr;
struct flowi6 fl6; struct flowi6 fl6;
...@@ -533,11 +538,12 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, ...@@ -533,11 +538,12 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
return; return;
} }
/* Needed by both icmp_global_allow and icmpv6_xmit_lock */ /* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */
local_bh_disable(); local_bh_disable();
/* Check global sysctl_icmp_msgs_per_sec ratelimit */ /* Check global sysctl_icmp_msgs_per_sec ratelimit */
if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type)) if (!(skb->dev->flags & IFF_LOOPBACK) &&
!icmpv6_global_allow(net, type, &apply_ratelimit))
goto out_bh_enable; goto out_bh_enable;
mip6_addr_swap(skb, parm); mip6_addr_swap(skb, parm);
...@@ -575,7 +581,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, ...@@ -575,7 +581,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
np = inet6_sk(sk); np = inet6_sk(sk);
if (!icmpv6_xrlim_allow(sk, type, &fl6)) if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit))
goto out; goto out;
tmp_hdr.icmp6_type = type; tmp_hdr.icmp6_type = type;
...@@ -717,6 +723,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) ...@@ -717,6 +723,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
struct ipv6_pinfo *np; struct ipv6_pinfo *np;
const struct in6_addr *saddr = NULL; const struct in6_addr *saddr = NULL;
struct icmp6hdr *icmph = icmp6_hdr(skb); struct icmp6hdr *icmph = icmp6_hdr(skb);
bool apply_ratelimit = false;
struct icmp6hdr tmp_hdr; struct icmp6hdr tmp_hdr;
struct flowi6 fl6; struct flowi6 fl6;
struct icmpv6_msg msg; struct icmpv6_msg msg;
...@@ -781,8 +788,9 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) ...@@ -781,8 +788,9 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
goto out; goto out;
/* Check the ratelimit */ /* Check the ratelimit */
if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) || if ((!(skb->dev->flags & IFF_LOOPBACK) &&
!icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6)) !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, &apply_ratelimit)) ||
!icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6, apply_ratelimit))
goto out_dst_release; goto out_dst_release;
idev = __in6_dev_get(skb->dev); idev = __in6_dev_get(skb->dev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment