Commit 789ed80a authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'icmp-avoid-possible-side-channels-attacks'

Eric Dumazet says:

====================
icmp: avoid possible side-channels attacks

Keyu Man reminded us that linux ICMP rate limiting was still allowing
side-channels attacks.

Quoting the fine document [1]:

4.4 Private Source Port Scan Method
...
 We can then use the same global ICMP rate limit as a side
 channel to infer if such an ICMP message has been triggered. At
 first glance, this method can work but at a low speed of one port
 per second, due to the per-IP rate limit on ICMP messages.
 Surprisingly, after we analyze the source code of the ICMP rate
 limit implementation, we find that the global rate limit is checked
 prior to the per-IP rate limit. This means that even if the per-IP
 rate limit may eventually determine that no ICMP reply should be
 sent, a packet is still subjected to the global rate limit check and one
 token is deducted. Ironically, such a decision is consciously made
 by Linux developers to avoid invoking the expensive check of the
 per-IP rate limit [ 22], involving a search process to locate the per-IP
 data structure.
 This effectively means that the per-IP rate limit can be disre-
 garded for the purpose of our side channel based scan, as it only
 determines if the final ICMP reply is generated but has nothing to
 do with the global rate limit counter decrement. As a result, we can
 continue to use roughly the same scan method as efficient as before,
 achieving 1,000 ports per second
...

This series :

1) Changes the order of the two rate limiters to fix the issue.

2-3) Make the 'host-wide' rate limiter a per-netns one.

[1]
Link: https://dl.acm.org/doi/pdf/10.1145/3372297.3417280
====================

Link: https://patch.msgid.link/20240829144641.3880376-1-edumazet@google.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents b26b6449 f17bf505
......@@ -794,9 +794,8 @@ static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
ip_cmsg_recv_offset(msg, skb->sk, skb, 0, 0);
}
bool icmp_global_allow(void);
extern int sysctl_icmp_msgs_per_sec;
extern int sysctl_icmp_msgs_burst;
bool icmp_global_allow(struct net *net);
void icmp_global_consume(struct net *net);
#ifdef CONFIG_PROC_FS
int ip_misc_proc_init(void);
......
......@@ -122,7 +122,10 @@ struct netns_ipv4 {
u8 sysctl_icmp_errors_use_inbound_ifaddr;
int sysctl_icmp_ratelimit;
int sysctl_icmp_ratemask;
int sysctl_icmp_msgs_per_sec;
int sysctl_icmp_msgs_burst;
atomic_t icmp_global_credit;
u32 icmp_global_stamp;
u32 ip_rt_min_pmtu;
int ip_rt_mtu_expires;
int ip_rt_min_advmss;
......
......@@ -220,61 +220,56 @@ static inline void icmp_xmit_unlock(struct sock *sk)
spin_unlock(&sk->sk_lock.slock);
}
int sysctl_icmp_msgs_per_sec __read_mostly = 1000;
int sysctl_icmp_msgs_burst __read_mostly = 50;
static struct {
spinlock_t lock;
u32 credit;
u32 stamp;
} icmp_global = {
.lock = __SPIN_LOCK_UNLOCKED(icmp_global.lock),
};
/**
* icmp_global_allow - Are we allowed to send one more ICMP message ?
* @net: network namespace
*
* Uses a token bucket to limit our ICMP messages to ~sysctl_icmp_msgs_per_sec.
* Returns false if we reached the limit and can not send another packet.
* Note: called with BH disabled
* Works in tandem with icmp_global_consume().
*/
bool icmp_global_allow(void)
bool icmp_global_allow(struct net *net)
{
u32 credit, delta, incr = 0, now = (u32)jiffies;
bool rc = false;
u32 delta, now, oldstamp;
int incr, new, old;
/* Check if token bucket is empty and cannot be refilled
* without taking the spinlock. The READ_ONCE() are paired
* with the following WRITE_ONCE() in this same function.
/* Note: many cpus could find this condition true.
* Then later icmp_global_consume() could consume more credits,
* this is an acceptable race.
*/
if (!READ_ONCE(icmp_global.credit)) {
delta = min_t(u32, now - READ_ONCE(icmp_global.stamp), HZ);
if (delta < HZ / 50)
return false;
}
if (atomic_read(&net->ipv4.icmp_global_credit) > 0)
return true;
spin_lock(&icmp_global.lock);
delta = min_t(u32, now - icmp_global.stamp, HZ);
if (delta >= HZ / 50) {
incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ;
if (incr)
WRITE_ONCE(icmp_global.stamp, now);
}
credit = min_t(u32, icmp_global.credit + incr,
READ_ONCE(sysctl_icmp_msgs_burst));
if (credit) {
/* We want to use a credit of one in average, but need to randomize
* it for security reasons.
*/
credit = max_t(int, credit - get_random_u32_below(3), 0);
rc = true;
now = jiffies;
oldstamp = READ_ONCE(net->ipv4.icmp_global_stamp);
delta = min_t(u32, now - oldstamp, HZ);
if (delta < HZ / 50)
return false;
incr = READ_ONCE(net->ipv4.sysctl_icmp_msgs_per_sec) * delta / HZ;
if (!incr)
return false;
if (cmpxchg(&net->ipv4.icmp_global_stamp, oldstamp, now) == oldstamp) {
old = atomic_read(&net->ipv4.icmp_global_credit);
do {
new = min(old + incr, READ_ONCE(net->ipv4.sysctl_icmp_msgs_burst));
} while (!atomic_try_cmpxchg(&net->ipv4.icmp_global_credit, &old, new));
}
WRITE_ONCE(icmp_global.credit, credit);
spin_unlock(&icmp_global.lock);
return rc;
return true;
}
EXPORT_SYMBOL(icmp_global_allow);
void icmp_global_consume(struct net *net)
{
int credits = get_random_u32_below(3);
/* Note: this might make icmp_global.credit negative. */
if (credits)
atomic_sub(credits, &net->ipv4.icmp_global_credit);
}
EXPORT_SYMBOL(icmp_global_consume);
static bool icmpv4_mask_allow(struct net *net, int type, int code)
{
if (type > NR_ICMP_TYPES)
......@@ -291,14 +286,16 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code)
return false;
}
static bool icmpv4_global_allow(struct net *net, int type, int code)
static bool icmpv4_global_allow(struct net *net, int type, int code,
bool *apply_ratelimit)
{
if (icmpv4_mask_allow(net, type, code))
return true;
if (icmp_global_allow())
if (icmp_global_allow(net)) {
*apply_ratelimit = true;
return true;
}
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
return false;
}
......@@ -308,15 +305,16 @@ static bool icmpv4_global_allow(struct net *net, int type, int code)
*/
static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
struct flowi4 *fl4, int type, int code)
struct flowi4 *fl4, int type, int code,
bool apply_ratelimit)
{
struct dst_entry *dst = &rt->dst;
struct inet_peer *peer;
bool rc = true;
int vif;
if (icmpv4_mask_allow(net, type, code))
goto out;
if (!apply_ratelimit)
return true;
/* No rate limit on loopback */
if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
......@@ -331,6 +329,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
out:
if (!rc)
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST);
else
icmp_global_consume(net);
return rc;
}
......@@ -402,6 +402,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
struct ipcm_cookie ipc;
struct rtable *rt = skb_rtable(skb);
struct net *net = dev_net(rt->dst.dev);
bool apply_ratelimit = false;
struct flowi4 fl4;
struct sock *sk;
struct inet_sock *inet;
......@@ -413,11 +414,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb))
return;
/* Needed by both icmp_global_allow and icmp_xmit_lock */
/* Needed by both icmpv4_global_allow and icmp_xmit_lock */
local_bh_disable();
/* global icmp_msgs_per_sec */
if (!icmpv4_global_allow(net, type, code))
/* is global icmp_msgs_per_sec exhausted ? */
if (!icmpv4_global_allow(net, type, code, &apply_ratelimit))
goto out_bh_enable;
sk = icmp_xmit_lock(net);
......@@ -450,7 +451,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt))
goto out_unlock;
if (icmpv4_xrlim_allow(net, rt, &fl4, type, code))
if (icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit))
icmp_push_reply(sk, icmp_param, &fl4, &ipc, &rt);
ip_rt_put(rt);
out_unlock:
......@@ -596,6 +597,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
int room;
struct icmp_bxm icmp_param;
struct rtable *rt = skb_rtable(skb_in);
bool apply_ratelimit = false;
struct ipcm_cookie ipc;
struct flowi4 fl4;
__be32 saddr;
......@@ -677,7 +679,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
}
}
/* Needed by both icmp_global_allow and icmp_xmit_lock */
/* Needed by both icmpv4_global_allow and icmp_xmit_lock */
local_bh_disable();
/* Check global sysctl_icmp_msgs_per_sec ratelimit, unless
......@@ -685,7 +687,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
* loopback, then peer ratelimit still work (in icmpv4_xrlim_allow)
*/
if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) &&
!icmpv4_global_allow(net, type, code))
!icmpv4_global_allow(net, type, code, &apply_ratelimit))
goto out_bh_enable;
sk = icmp_xmit_lock(net);
......@@ -744,7 +746,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
goto out_unlock;
/* peer icmp_ratelimit */
if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code))
if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit))
goto ende;
/* RFC says return as much as we can without exceeding 576 bytes. */
......@@ -1487,6 +1489,8 @@ static int __net_init icmp_sk_init(struct net *net)
net->ipv4.sysctl_icmp_ratelimit = 1 * HZ;
net->ipv4.sysctl_icmp_ratemask = 0x1818;
net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;
net->ipv4.sysctl_icmp_msgs_per_sec = 1000;
net->ipv4.sysctl_icmp_msgs_burst = 50;
return 0;
}
......
......@@ -600,22 +600,6 @@ static struct ctl_table ipv4_table[] = {
.mode = 0444,
.proc_handler = proc_tcp_available_ulp,
},
{
.procname = "icmp_msgs_per_sec",
.data = &sysctl_icmp_msgs_per_sec,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
{
.procname = "icmp_msgs_burst",
.data = &sysctl_icmp_msgs_burst,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
{
.procname = "udp_mem",
.data = &sysctl_udp_mem,
......@@ -701,6 +685,22 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "icmp_msgs_per_sec",
.data = &init_net.ipv4.sysctl_icmp_msgs_per_sec,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
{
.procname = "icmp_msgs_burst",
.data = &init_net.ipv4.sysctl_icmp_msgs_burst,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
{
.procname = "ping_group_range",
.data = &init_net.ipv4.ping_group_range.range,
......
......@@ -175,14 +175,16 @@ static bool icmpv6_mask_allow(struct net *net, int type)
return false;
}
static bool icmpv6_global_allow(struct net *net, int type)
static bool icmpv6_global_allow(struct net *net, int type,
bool *apply_ratelimit)
{
if (icmpv6_mask_allow(net, type))
return true;
if (icmp_global_allow())
if (icmp_global_allow(net)) {
*apply_ratelimit = true;
return true;
}
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
return false;
}
......@@ -191,13 +193,13 @@ static bool icmpv6_global_allow(struct net *net, int type)
* Check the ICMP output rate limit
*/
static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
struct flowi6 *fl6)
struct flowi6 *fl6, bool apply_ratelimit)
{
struct net *net = sock_net(sk);
struct dst_entry *dst;
bool res = false;
if (icmpv6_mask_allow(net, type))
if (!apply_ratelimit)
return true;
/*
......@@ -228,6 +230,8 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
if (!res)
__ICMP6_INC_STATS(net, ip6_dst_idev(dst),
ICMP6_MIB_RATELIMITHOST);
else
icmp_global_consume(net);
dst_release(dst);
return res;
}
......@@ -452,6 +456,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
struct net *net;
struct ipv6_pinfo *np;
const struct in6_addr *saddr = NULL;
bool apply_ratelimit = false;
struct dst_entry *dst;
struct icmp6hdr tmp_hdr;
struct flowi6 fl6;
......@@ -533,11 +538,12 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
return;
}
/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
/* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */
local_bh_disable();
/* Check global sysctl_icmp_msgs_per_sec ratelimit */
if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
if (!(skb->dev->flags & IFF_LOOPBACK) &&
!icmpv6_global_allow(net, type, &apply_ratelimit))
goto out_bh_enable;
mip6_addr_swap(skb, parm);
......@@ -575,7 +581,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
np = inet6_sk(sk);
if (!icmpv6_xrlim_allow(sk, type, &fl6))
if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit))
goto out;
tmp_hdr.icmp6_type = type;
......@@ -717,6 +723,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
struct ipv6_pinfo *np;
const struct in6_addr *saddr = NULL;
struct icmp6hdr *icmph = icmp6_hdr(skb);
bool apply_ratelimit = false;
struct icmp6hdr tmp_hdr;
struct flowi6 fl6;
struct icmpv6_msg msg;
......@@ -781,8 +788,9 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
goto out;
/* Check the ratelimit */
if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
!icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
if ((!(skb->dev->flags & IFF_LOOPBACK) &&
!icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, &apply_ratelimit)) ||
!icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6, apply_ratelimit))
goto out_dst_release;
idev = __in6_dev_get(skb->dev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment