Commit c9170f13 authored by David S. Miller's avatar David S. Miller

Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec

Steffen Klassert says:

====================
pull request (net): ipsec 2021-03-31

1) Fix ipv4 pmtu checks for xfrm anf vti interfaces.
   From Eyal Birger.

2) There are situations where the socket passed to
   xfrm_output_resume() is not the same as the one
   attached to the skb. Use the socket passed to
   xfrm_output_resume() to avoid lookup failures
   when xfrm is used with VRFs.
   From Evan Nimmo.

3) Make the xfrm_state_hash_generation sequence counter per
   network namespace because but its write serialization
   lock is also per network namespace. Write protection
   is insufficient otherwise.
   From Ahmed S. Darwish.

4) Fixup sctp featue flags when used with esp offload.
   From Xin Long.

5) xfrm BEET mode doesn't support fragments for inner packets.
   This is a limitation of the protocol, so no fix possible.
   Warn at least to notify the user about that situation.
   From Xin Long.

6) Fix NULL pointer dereference on policy lookup when
   namespaces are uses in combination with esp offload.

7) Fix incorrect transformation on esp offload when
   packets get segmented at layer 3.

8) Fix some user triggered usages of WARN_ONCE in
   the xfrm compat layer.
   From Dmitry Safonov.

Please pull or let me know if there are problems.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents bdc2ab5c ef19e111
......@@ -72,7 +72,9 @@ struct netns_xfrm {
#if IS_ENABLED(CONFIG_IPV6)
struct dst_ops xfrm6_dst_ops;
#endif
spinlock_t xfrm_state_lock;
spinlock_t xfrm_state_lock;
seqcount_spinlock_t xfrm_state_hash_generation;
spinlock_t xfrm_policy_lock;
struct mutex xfrm_cfg_mutex;
};
......
......@@ -1097,7 +1097,7 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir,
return __xfrm_policy_check(sk, ndir, skb, family);
return (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) ||
(skb_dst(skb)->flags & DST_NOPOLICY) ||
(skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) ||
__xfrm_policy_check(sk, ndir, skb, family);
}
......@@ -1557,7 +1557,7 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
int xfrm_trans_queue(struct sk_buff *skb,
int (*finish)(struct net *, struct sock *,
struct sk_buff *));
int xfrm_output_resume(struct sk_buff *skb, int err);
int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err);
int xfrm_output(struct sock *sk, struct sk_buff *skb);
#if IS_ENABLED(CONFIG_NET_PKTGEN)
......
......@@ -141,7 +141,7 @@ static void ah_output_done(struct crypto_async_request *base, int err)
}
kfree(AH_SKB_CB(skb)->tmp);
xfrm_output_resume(skb, err);
xfrm_output_resume(skb->sk, skb, err);
}
static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
......
......@@ -279,7 +279,7 @@ static void esp_output_done(struct crypto_async_request *base, int err)
x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
esp_output_tail_tcp(x, skb);
else
xfrm_output_resume(skb, err);
xfrm_output_resume(skb->sk, skb, err);
}
}
......
......@@ -217,10 +217,12 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
if ((!(skb->dev->gso_partial_features & NETIF_F_HW_ESP) &&
!(features & NETIF_F_HW_ESP)) || x->xso.dev != skb->dev)
esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK |
NETIF_F_SCTP_CRC);
else if (!(features & NETIF_F_HW_ESP_TX_CSUM) &&
!(skb->dev->gso_partial_features & NETIF_F_HW_ESP_TX_CSUM))
esp_features = features & ~NETIF_F_CSUM_MASK;
esp_features = features & ~(NETIF_F_CSUM_MASK |
NETIF_F_SCTP_CRC);
xo->flags |= XFRM_GSO_SEGMENT;
......@@ -312,8 +314,17 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
ip_hdr(skb)->tot_len = htons(skb->len);
ip_send_check(ip_hdr(skb));
if (hw_offload)
if (hw_offload) {
if (!skb_ext_add(skb, SKB_EXT_SEC_PATH))
return -ENOMEM;
xo = xfrm_offload(skb);
if (!xo)
return -EINVAL;
xo->flags |= XFRM_XMIT;
return 0;
}
err = esp_output_tail(x, skb, &esp);
if (err)
......
......@@ -218,7 +218,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
}
if (dst->flags & DST_XFRM_QUEUE)
goto queued;
goto xmit;
if (!vti_state_check(dst->xfrm, parms->iph.daddr, parms->iph.saddr)) {
dev->stats.tx_carrier_errors++;
......@@ -238,6 +238,8 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
if (skb->len > mtu) {
skb_dst_update_pmtu_no_confirm(skb, mtu);
if (skb->protocol == htons(ETH_P_IP)) {
if (!(ip_hdr(skb)->frag_off & htons(IP_DF)))
goto xmit;
icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
} else {
......@@ -251,7 +253,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
queued:
xmit:
skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
skb_dst_set(skb, dst);
skb->dev = skb_dst(skb)->dev;
......
......@@ -316,7 +316,7 @@ static void ah6_output_done(struct crypto_async_request *base, int err)
}
kfree(AH_SKB_CB(skb)->tmp);
xfrm_output_resume(skb, err);
xfrm_output_resume(skb->sk, skb, err);
}
static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
......
......@@ -314,7 +314,7 @@ static void esp_output_done(struct crypto_async_request *base, int err)
x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
esp_output_tail_tcp(x, skb);
else
xfrm_output_resume(skb, err);
xfrm_output_resume(skb->sk, skb, err);
}
}
......
......@@ -254,9 +254,11 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
skb->encap_hdr_csum = 1;
if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev)
esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK |
NETIF_F_SCTP_CRC);
else if (!(features & NETIF_F_HW_ESP_TX_CSUM))
esp_features = features & ~NETIF_F_CSUM_MASK;
esp_features = features & ~(NETIF_F_CSUM_MASK |
NETIF_F_SCTP_CRC);
xo->flags |= XFRM_GSO_SEGMENT;
......@@ -346,8 +348,17 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features
ipv6_hdr(skb)->payload_len = htons(len);
if (hw_offload)
if (hw_offload) {
if (!skb_ext_add(skb, SKB_EXT_SEC_PATH))
return -ENOMEM;
xo = xfrm_offload(skb);
if (!xo)
return -EINVAL;
xo->flags |= XFRM_XMIT;
return 0;
}
err = esp6_output_tail(x, skb, &esp);
if (err)
......
......@@ -494,7 +494,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
}
if (dst->flags & DST_XFRM_QUEUE)
goto queued;
goto xmit;
x = dst->xfrm;
if (!vti6_state_check(x, &t->parms.raddr, &t->parms.laddr))
......@@ -523,6 +523,8 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
} else {
if (!(ip_hdr(skb)->frag_off & htons(IP_DF)))
goto xmit;
icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
}
......@@ -531,7 +533,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
goto tx_err_dst_release;
}
queued:
xmit:
skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
skb_dst_set(skb, dst);
skb->dev = skb_dst(skb)->dev;
......
......@@ -216,7 +216,7 @@ static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb,
case XFRM_MSG_GETSADINFO:
case XFRM_MSG_GETSPDINFO:
default:
WARN_ONCE(1, "unsupported nlmsg_type %d", nlh_src->nlmsg_type);
pr_warn_once("unsupported nlmsg_type %d\n", nlh_src->nlmsg_type);
return ERR_PTR(-EOPNOTSUPP);
}
......@@ -277,7 +277,7 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src)
return xfrm_nla_cpy(dst, src, nla_len(src));
default:
BUILD_BUG_ON(XFRMA_MAX != XFRMA_IF_ID);
WARN_ONCE(1, "unsupported nla_type %d", src->nla_type);
pr_warn_once("unsupported nla_type %d\n", src->nla_type);
return -EOPNOTSUPP;
}
}
......@@ -315,8 +315,10 @@ static int xfrm_alloc_compat(struct sk_buff *skb, const struct nlmsghdr *nlh_src
struct sk_buff *new = NULL;
int err;
if (WARN_ON_ONCE(type >= ARRAY_SIZE(xfrm_msg_min)))
if (type >= ARRAY_SIZE(xfrm_msg_min)) {
pr_warn_once("unsupported nlmsg_type %d\n", nlh_src->nlmsg_type);
return -EOPNOTSUPP;
}
if (skb_shinfo(skb)->frag_list == NULL) {
new = alloc_skb(skb->len + skb_tailroom(skb), GFP_ATOMIC);
......@@ -378,6 +380,10 @@ static int xfrm_attr_cpy32(void *dst, size_t *pos, const struct nlattr *src,
struct nlmsghdr *nlmsg = dst;
struct nlattr *nla;
/* xfrm_user_rcv_msg_compat() relies on fact that 32-bit messages
* have the same len or shorted than 64-bit ones.
* 32-bit translation that is bigger than 64-bit original is unexpected.
*/
if (WARN_ON_ONCE(copy_len > payload))
copy_len = payload;
......
......@@ -134,8 +134,6 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
return skb;
}
xo->flags |= XFRM_XMIT;
if (skb_is_gso(skb) && unlikely(x->xso.dev != dev)) {
struct sk_buff *segs;
......
......@@ -306,6 +306,8 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
} else {
if (!(ip_hdr(skb)->frag_off & htons(IP_DF)))
goto xmit;
icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
}
......@@ -314,6 +316,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
return -EMSGSIZE;
}
xmit:
xfrmi_scrub_packet(skb, !net_eq(xi->net, dev_net(dev)));
skb_dst_set(skb, dst);
skb->dev = tdev;
......
......@@ -503,22 +503,22 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
return err;
}
int xfrm_output_resume(struct sk_buff *skb, int err)
int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err)
{
struct net *net = xs_net(skb_dst(skb)->xfrm);
while (likely((err = xfrm_output_one(skb, err)) == 0)) {
nf_reset_ct(skb);
err = skb_dst(skb)->ops->local_out(net, skb->sk, skb);
err = skb_dst(skb)->ops->local_out(net, sk, skb);
if (unlikely(err != 1))
goto out;
if (!skb_dst(skb)->xfrm)
return dst_output(net, skb->sk, skb);
return dst_output(net, sk, skb);
err = nf_hook(skb_dst(skb)->ops->family,
NF_INET_POST_ROUTING, net, skb->sk, skb,
NF_INET_POST_ROUTING, net, sk, skb,
NULL, skb_dst(skb)->dev, xfrm_output2);
if (unlikely(err != 1))
goto out;
......@@ -534,7 +534,7 @@ EXPORT_SYMBOL_GPL(xfrm_output_resume);
static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{
return xfrm_output_resume(skb, 1);
return xfrm_output_resume(sk, skb, 1);
}
static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb)
......@@ -660,6 +660,12 @@ static int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb)
{
int err;
if (x->outer_mode.encap == XFRM_MODE_BEET &&
ip_is_fragment(ip_hdr(skb))) {
net_warn_ratelimited("BEET mode doesn't support inner IPv4 fragments\n");
return -EAFNOSUPPORT;
}
err = xfrm4_tunnel_check_size(skb);
if (err)
return err;
......@@ -705,8 +711,15 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_IPV6)
unsigned int ptr = 0;
int err;
if (x->outer_mode.encap == XFRM_MODE_BEET &&
ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL, NULL) >= 0) {
net_warn_ratelimited("BEET mode doesn't support inner IPv6 fragments\n");
return -EAFNOSUPPORT;
}
err = xfrm6_tunnel_check_size(skb);
if (err)
return err;
......
......@@ -44,7 +44,6 @@ static void xfrm_state_gc_task(struct work_struct *work);
*/
static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation);
static struct kmem_cache *xfrm_state_cache __ro_after_init;
static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
......@@ -140,7 +139,7 @@ static void xfrm_hash_resize(struct work_struct *work)
}
spin_lock_bh(&net->xfrm.xfrm_state_lock);
write_seqcount_begin(&xfrm_state_hash_generation);
write_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
......@@ -156,7 +155,7 @@ static void xfrm_hash_resize(struct work_struct *work)
rcu_assign_pointer(net->xfrm.state_byspi, nspi);
net->xfrm.state_hmask = nhashmask;
write_seqcount_end(&xfrm_state_hash_generation);
write_seqcount_end(&net->xfrm.xfrm_state_hash_generation);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
osize = (ohashmask + 1) * sizeof(struct hlist_head);
......@@ -1063,7 +1062,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
to_put = NULL;
sequence = read_seqcount_begin(&xfrm_state_hash_generation);
sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
rcu_read_lock();
h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
......@@ -1176,7 +1175,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
if (to_put)
xfrm_state_put(to_put);
if (read_seqcount_retry(&xfrm_state_hash_generation, sequence)) {
if (read_seqcount_retry(&net->xfrm.xfrm_state_hash_generation, sequence)) {
*err = -EAGAIN;
if (x) {
xfrm_state_put(x);
......@@ -2666,6 +2665,8 @@ int __net_init xfrm_state_init(struct net *net)
net->xfrm.state_num = 0;
INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
spin_lock_init(&net->xfrm.xfrm_state_lock);
seqcount_spinlock_init(&net->xfrm.xfrm_state_hash_generation,
&net->xfrm.xfrm_state_lock);
return 0;
out_byspi:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment