Commit 7a95752d authored by Willem de Bruijn's avatar Willem de Bruijn Committed by Stefan Bader

packet: fix bitfield update race

BugLink: http://bugs.launchpad.net/bugs/1768474

[ Upstream commit a6361f0c ]

Updates to the bitfields in struct packet_sock are not atomic.
Serialize these read-modify-write cycles.

Move po->running into a separate variable. Its writes are protected by
po->bind_lock (except for one startup case at packet_create). Also
replace a textual precondition warning with lockdep annotation.

All others are set only in packet_setsockopt. Serialize these
updates by holding the socket lock. Analogous to other field updates,
also hold the lock when testing whether a ring is active (pg_vec).

Fixes: 8dc41944 ("[PACKET]: Add optional checksum computation for recvmsg")
Reported-by: default avatarDaeRyong Jeong <threeearcat@gmail.com>
Reported-by: default avatarByoungyoung Lee <byoungyoung@purdue.edu>
Signed-off-by: default avatarWillem de Bruijn <willemb@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: default avatarJuerg Haefliger <juergh@canonical.com>
Signed-off-by: default avatarKleber Sacilotto de Souza <kleber.souza@canonical.com>
parent e6856cea
...@@ -332,11 +332,11 @@ static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) ...@@ -332,11 +332,11 @@ static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
skb_set_queue_mapping(skb, queue_index); skb_set_queue_mapping(skb, queue_index);
} }
/* register_prot_hook must be invoked with the po->bind_lock held, /* __register_prot_hook must be invoked through register_prot_hook
* or from a context in which asynchronous accesses to the packet * or from a context in which asynchronous accesses to the packet
* socket is not possible (packet_create()). * socket is not possible (packet_create()).
*/ */
static void register_prot_hook(struct sock *sk) static void __register_prot_hook(struct sock *sk)
{ {
struct packet_sock *po = pkt_sk(sk); struct packet_sock *po = pkt_sk(sk);
...@@ -351,8 +351,13 @@ static void register_prot_hook(struct sock *sk) ...@@ -351,8 +351,13 @@ static void register_prot_hook(struct sock *sk)
} }
} }
/* {,__}unregister_prot_hook() must be invoked with the po->bind_lock static void register_prot_hook(struct sock *sk)
* held. If the sync parameter is true, we will temporarily drop {
lockdep_assert_held_once(&pkt_sk(sk)->bind_lock);
__register_prot_hook(sk);
}
/* If the sync parameter is true, we will temporarily drop
* the po->bind_lock and do a synchronize_net to make sure no * the po->bind_lock and do a synchronize_net to make sure no
* asynchronous packet processing paths still refer to the elements * asynchronous packet processing paths still refer to the elements
* of po->prot_hook. If the sync parameter is false, it is the * of po->prot_hook. If the sync parameter is false, it is the
...@@ -362,6 +367,8 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) ...@@ -362,6 +367,8 @@ static void __unregister_prot_hook(struct sock *sk, bool sync)
{ {
struct packet_sock *po = pkt_sk(sk); struct packet_sock *po = pkt_sk(sk);
lockdep_assert_held_once(&po->bind_lock);
po->running = 0; po->running = 0;
if (po->fanout) if (po->fanout)
...@@ -3134,7 +3141,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, ...@@ -3134,7 +3141,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
if (proto) { if (proto) {
po->prot_hook.type = proto; po->prot_hook.type = proto;
register_prot_hook(sk); __register_prot_hook(sk);
} }
mutex_lock(&net->packet.sklist_lock); mutex_lock(&net->packet.sklist_lock);
...@@ -3653,12 +3660,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv ...@@ -3653,12 +3660,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val)) if (optlen != sizeof(val))
return -EINVAL; return -EINVAL;
if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
return -EBUSY;
if (copy_from_user(&val, optval, sizeof(val))) if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT; return -EFAULT;
po->tp_loss = !!val;
return 0; lock_sock(sk);
if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
ret = -EBUSY;
} else {
po->tp_loss = !!val;
ret = 0;
}
release_sock(sk);
return ret;
} }
case PACKET_AUXDATA: case PACKET_AUXDATA:
{ {
...@@ -3669,7 +3682,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv ...@@ -3669,7 +3682,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (copy_from_user(&val, optval, sizeof(val))) if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT; return -EFAULT;
lock_sock(sk);
po->auxdata = !!val; po->auxdata = !!val;
release_sock(sk);
return 0; return 0;
} }
case PACKET_ORIGDEV: case PACKET_ORIGDEV:
...@@ -3681,7 +3696,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv ...@@ -3681,7 +3696,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (copy_from_user(&val, optval, sizeof(val))) if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT; return -EFAULT;
lock_sock(sk);
po->origdev = !!val; po->origdev = !!val;
release_sock(sk);
return 0; return 0;
} }
case PACKET_VNET_HDR: case PACKET_VNET_HDR:
...@@ -3690,15 +3707,20 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv ...@@ -3690,15 +3707,20 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (sock->type != SOCK_RAW) if (sock->type != SOCK_RAW)
return -EINVAL; return -EINVAL;
if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
return -EBUSY;
if (optlen < sizeof(val)) if (optlen < sizeof(val))
return -EINVAL; return -EINVAL;
if (copy_from_user(&val, optval, sizeof(val))) if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT; return -EFAULT;
po->has_vnet_hdr = !!val; lock_sock(sk);
return 0; if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
ret = -EBUSY;
} else {
po->has_vnet_hdr = !!val;
ret = 0;
}
release_sock(sk);
return ret;
} }
case PACKET_TIMESTAMP: case PACKET_TIMESTAMP:
{ {
...@@ -3736,11 +3758,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv ...@@ -3736,11 +3758,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val)) if (optlen != sizeof(val))
return -EINVAL; return -EINVAL;
if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
return -EBUSY;
if (copy_from_user(&val, optval, sizeof(val))) if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT; return -EFAULT;
po->tp_tx_has_off = !!val;
lock_sock(sk);
if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
ret = -EBUSY;
} else {
po->tp_tx_has_off = !!val;
ret = 0;
}
release_sock(sk);
return 0; return 0;
} }
case PACKET_QDISC_BYPASS: case PACKET_QDISC_BYPASS:
......
...@@ -109,10 +109,12 @@ struct packet_sock { ...@@ -109,10 +109,12 @@ struct packet_sock {
int copy_thresh; int copy_thresh;
spinlock_t bind_lock; spinlock_t bind_lock;
struct mutex pg_vec_lock; struct mutex pg_vec_lock;
unsigned int running:1, /* prot_hook is attached*/ unsigned int running; /* bind_lock must be held */
auxdata:1, unsigned int auxdata:1, /* writer must hold sock lock */
origdev:1, origdev:1,
has_vnet_hdr:1; has_vnet_hdr:1,
tp_loss:1,
tp_tx_has_off:1;
int pressure; int pressure;
int ifindex; /* bound device */ int ifindex; /* bound device */
__be16 num; __be16 num;
...@@ -122,8 +124,6 @@ struct packet_sock { ...@@ -122,8 +124,6 @@ struct packet_sock {
enum tpacket_versions tp_version; enum tpacket_versions tp_version;
unsigned int tp_hdrlen; unsigned int tp_hdrlen;
unsigned int tp_reserve; unsigned int tp_reserve;
unsigned int tp_loss:1;
unsigned int tp_tx_has_off:1;
unsigned int tp_tstamp; unsigned int tp_tstamp;
struct net_device __rcu *cached_dev; struct net_device __rcu *cached_dev;
int (*xmit)(struct sk_buff *skb); int (*xmit)(struct sk_buff *skb);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment