Commit 61e7113e authored by Steffen Klassert's avatar Steffen Klassert

Merge 'xfrm: Add compat layer'

Dmitry Safonov says:

====================
Changes since v2:
- added struct xfrm_translator as API to register xfrm_compat.ko with
  xfrm_state.ko. This allows compilation of translator as a loadable
  module
- fixed indention and collected reviewed-by (Johannes Berg)
- moved boilerplate from commit messages into cover-letter (Steffen
  Klassert)
- found on KASAN build and fixed non-initialised stack variable usage
  in the translator

The resulting v2/v3 diff can be found here:
https://gist.github.com/0x7f454c46/8f68311dfa1f240959fdbe7c77ed2259

Patches as a .git branch:
https://github.com/0x7f454c46/linux/tree/xfrm-compat-v3

Changes since v1:
- reworked patches set to use translator
- separated the compat layer into xfrm_compat.c,
  compiled under XFRM_USER_COMPAT config
- 32-bit messages now being sent in frag_list (like wext-core does)
- instead of __packed add compat_u64 members in compat structures
- selftest reworked to kselftest lib API
- added netlink dump testing to the selftest

XFRM is disabled for compatible users because of the UABI difference.
The difference is in structures paddings and in the result the size
of netlink messages differ.

Possibility for compatible application to manage xfrm tunnels was
disabled by: the commmit 19d7df69 ("xfrm: Refuse to insert 32 bit
userspace socket policies on 64 bit systems") and the commit 74005991
("xfrm: Do not parse 32bits compiled xfrm netlink msg on 64bits host").

This is my second attempt to resolve the xfrm/compat problem by adding
the 64=>32 and 32=>64 bit translators those non-visibly to a user
provide translation between compatible user and kernel.
Previous attempt was to interrupt the message ABI according to a syscall
by xfrm_user, which resulted in over-complicated code [1].

Florian Westphal provided the idea of translator and some draft patches
in the discussion. In these patches, his idea is reused and some of his
initial code is also present.

There were a couple of attempts to solve xfrm compat problem:
https://lkml.org/lkml/2017/1/20/733
https://patchwork.ozlabs.org/patch/44600/
http://netdev.vger.kernel.narkive.com/2Gesykj6/patch-net-next-xfrm-correctly-parse-netlink-msg-from-32bits-ip-command-on-64bits-host

All the discussions end in the conclusion that xfrm should have a full
compatible layer to correctly work with 32-bit applications on 64-bit
kernels:
https://lkml.org/lkml/2017/1/23/413
https://patchwork.ozlabs.org/patch/433279/

In some recent lkml discussion, Linus said that it's worth to fix this
problem and not giving people an excuse to stay on 32-bit kernel:
https://lkml.org/lkml/2018/2/13/752

There is also an selftest for ipsec tunnels.
It doesn't depend on any library and compat version can be easy
build with: make CFLAGS=-m32 net/ipsec

[1]: https://lkml.kernel.org/r/20180726023144.31066-1-dima@arista.com
====================
Signed-off-by: default avatarSteffen Klassert <steffen.klassert@secunet.com>
parents 02a20d4f bc2652b7
......@@ -12145,6 +12145,7 @@ F: net/ipv6/ipcomp6.c
F: net/ipv6/xfrm*
F: net/key/
F: net/xfrm/
F: tools/testing/selftests/net/ipsec.c
NETWORKING [IPv4/IPv6]
M: "David S. Miller" <davem@davemloft.net>
......
......@@ -2000,6 +2000,39 @@ static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x,
return 0;
}
extern const int xfrm_msg_min[XFRM_NR_MSGTYPES];
extern const struct nla_policy xfrma_policy[XFRMA_MAX+1];
struct xfrm_translator {
/* Allocate frag_list and put compat translation there */
int (*alloc_compat)(struct sk_buff *skb, const struct nlmsghdr *src);
/* Allocate nlmsg with 64-bit translaton of received 32-bit message */
struct nlmsghdr *(*rcv_msg_compat)(const struct nlmsghdr *nlh,
int maxtype, const struct nla_policy *policy,
struct netlink_ext_ack *extack);
/* Translate 32-bit user_policy from sockptr */
int (*xlate_user_policy_sockptr)(u8 **pdata32, int optlen);
struct module *owner;
};
#if IS_ENABLED(CONFIG_XFRM_USER_COMPAT)
extern int xfrm_register_translator(struct xfrm_translator *xtr);
extern int xfrm_unregister_translator(struct xfrm_translator *xtr);
extern struct xfrm_translator *xfrm_get_translator(void);
extern void xfrm_put_translator(struct xfrm_translator *xtr);
#else
static inline struct xfrm_translator *xfrm_get_translator(void)
{
return NULL;
}
static inline void xfrm_put_translator(struct xfrm_translator *xtr)
{
}
#endif
#if IS_ENABLED(CONFIG_IPV6)
static inline bool xfrm6_local_dontfrag(const struct sock *sk)
{
......
......@@ -2186,13 +2186,35 @@ EXPORT_SYMBOL(__nlmsg_put);
* It would be better to create kernel thread.
*/
static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb,
struct netlink_callback *cb,
struct netlink_ext_ack *extack)
{
struct nlmsghdr *nlh;
nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(nlk->dump_done_errno),
NLM_F_MULTI | cb->answer_flags);
if (WARN_ON(!nlh))
return -ENOBUFS;
nl_dump_check_consistent(cb, nlh);
memcpy(nlmsg_data(nlh), &nlk->dump_done_errno, sizeof(nlk->dump_done_errno));
if (extack->_msg && nlk->flags & NETLINK_F_EXT_ACK) {
nlh->nlmsg_flags |= NLM_F_ACK_TLVS;
if (!nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg))
nlmsg_end(skb, nlh);
}
return 0;
}
static int netlink_dump(struct sock *sk)
{
struct netlink_sock *nlk = nlk_sk(sk);
struct netlink_ext_ack extack = {};
struct netlink_callback *cb;
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
struct module *module;
int err = -ENOBUFS;
int alloc_min_size;
......@@ -2258,22 +2280,19 @@ static int netlink_dump(struct sock *sk)
return 0;
}
nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE,
sizeof(nlk->dump_done_errno),
NLM_F_MULTI | cb->answer_flags);
if (WARN_ON(!nlh))
if (netlink_dump_done(nlk, skb, cb, &extack))
goto errout_skb;
nl_dump_check_consistent(cb, nlh);
memcpy(nlmsg_data(nlh), &nlk->dump_done_errno,
sizeof(nlk->dump_done_errno));
if (extack._msg && nlk->flags & NETLINK_F_EXT_ACK) {
nlh->nlmsg_flags |= NLM_F_ACK_TLVS;
if (!nla_put_string(skb, NLMSGERR_ATTR_MSG, extack._msg))
nlmsg_end(skb, nlh);
#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
/* frag_list skb's data is used for compat tasks
* and the regular skb's data for normal (non-compat) tasks.
* See netlink_recvmsg().
*/
if (unlikely(skb_shinfo(skb)->frag_list)) {
if (netlink_dump_done(nlk, skb_shinfo(skb)->frag_list, cb, &extack))
goto errout_skb;
}
#endif
if (sk_filter(sk, skb))
kfree_skb(skb);
......
......@@ -28,6 +28,17 @@ config XFRM_USER
If unsure, say Y.
config XFRM_USER_COMPAT
tristate "Compatible ABI support"
depends on XFRM_USER && COMPAT_FOR_U64_ALIGNMENT && \
HAVE_EFFICIENT_UNALIGNED_ACCESS
select WANT_COMPAT_NETLINK_MESSAGES
help
Transformation(XFRM) user configuration interface like IPsec
used by compatible Linux applications.
If unsure, say N.
config XFRM_INTERFACE
tristate "Transformation virtual interface"
depends on XFRM && IPV6
......
......@@ -9,6 +9,7 @@ obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
obj-$(CONFIG_XFRM_USER) += xfrm_user.o
obj-$(CONFIG_XFRM_USER_COMPAT) += xfrm_compat.o
obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o
obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o
This diff is collapsed.
......@@ -2264,6 +2264,66 @@ static bool km_is_alive(const struct km_event *c)
return is_alive;
}
#if IS_ENABLED(CONFIG_XFRM_USER_COMPAT)
static DEFINE_SPINLOCK(xfrm_translator_lock);
static struct xfrm_translator __rcu *xfrm_translator;
struct xfrm_translator *xfrm_get_translator(void)
{
struct xfrm_translator *xtr;
rcu_read_lock();
xtr = rcu_dereference(xfrm_translator);
if (unlikely(!xtr))
goto out;
if (!try_module_get(xtr->owner))
xtr = NULL;
out:
rcu_read_unlock();
return xtr;
}
EXPORT_SYMBOL_GPL(xfrm_get_translator);
void xfrm_put_translator(struct xfrm_translator *xtr)
{
module_put(xtr->owner);
}
EXPORT_SYMBOL_GPL(xfrm_put_translator);
int xfrm_register_translator(struct xfrm_translator *xtr)
{
int err = 0;
spin_lock_bh(&xfrm_translator_lock);
if (unlikely(xfrm_translator != NULL))
err = -EEXIST;
else
rcu_assign_pointer(xfrm_translator, xtr);
spin_unlock_bh(&xfrm_translator_lock);
return err;
}
EXPORT_SYMBOL_GPL(xfrm_register_translator);
int xfrm_unregister_translator(struct xfrm_translator *xtr)
{
int err = 0;
spin_lock_bh(&xfrm_translator_lock);
if (likely(xfrm_translator != NULL)) {
if (rcu_access_pointer(xfrm_translator) != xtr)
err = -EINVAL;
else
RCU_INIT_POINTER(xfrm_translator, NULL);
}
spin_unlock_bh(&xfrm_translator_lock);
synchronize_rcu();
return err;
}
EXPORT_SYMBOL_GPL(xfrm_unregister_translator);
#endif
int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen)
{
int err;
......@@ -2271,9 +2331,6 @@ int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen)
struct xfrm_mgr *km;
struct xfrm_policy *pol = NULL;
if (in_compat_syscall())
return -EOPNOTSUPP;
if (sockptr_is_null(optval) && !optlen) {
xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL);
......@@ -2288,6 +2345,20 @@ int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen)
if (IS_ERR(data))
return PTR_ERR(data);
if (in_compat_syscall()) {
struct xfrm_translator *xtr = xfrm_get_translator();
if (!xtr)
return -EOPNOTSUPP;
err = xtr->xlate_user_policy_sockptr(&data, optlen);
xfrm_put_translator(xtr);
if (err) {
kfree(data);
return err;
}
}
err = -EINVAL;
rcu_read_lock();
list_for_each_entry_rcu(km, &xfrm_km_list, list) {
......
......@@ -975,6 +975,7 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
struct xfrm_dump_info *sp = ptr;
struct sk_buff *in_skb = sp->in_skb;
struct sk_buff *skb = sp->out_skb;
struct xfrm_translator *xtr;
struct xfrm_usersa_info *p;
struct nlmsghdr *nlh;
int err;
......@@ -992,6 +993,18 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
return err;
}
nlmsg_end(skb, nlh);
xtr = xfrm_get_translator();
if (xtr) {
err = xtr->alloc_compat(skb, nlh);
xfrm_put_translator(xtr);
if (err) {
nlmsg_cancel(skb, nlh);
return err;
}
}
return 0;
}
......@@ -1006,7 +1019,6 @@ static int xfrm_dump_sa_done(struct netlink_callback *cb)
return 0;
}
static const struct nla_policy xfrma_policy[XFRMA_MAX+1];
static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
......@@ -1083,12 +1095,24 @@ static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb,
u32 pid, unsigned int group)
{
struct sock *nlsk = rcu_dereference(net->xfrm.nlsk);
struct xfrm_translator *xtr;
if (!nlsk) {
kfree_skb(skb);
return -EPIPE;
}
xtr = xfrm_get_translator();
if (xtr) {
int err = xtr->alloc_compat(skb, nlmsg_hdr(skb));
xfrm_put_translator(xtr);
if (err) {
kfree_skb(skb);
return err;
}
}
return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC);
}
......@@ -1308,6 +1332,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *net = sock_net(skb->sk);
struct xfrm_state *x;
struct xfrm_userspi_info *p;
struct xfrm_translator *xtr;
struct sk_buff *resp_skb;
xfrm_address_t *daddr;
int family;
......@@ -1358,6 +1383,17 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
}
xtr = xfrm_get_translator();
if (xtr) {
err = xtr->alloc_compat(skb, nlmsg_hdr(skb));
xfrm_put_translator(xtr);
if (err) {
kfree_skb(resp_skb);
goto out;
}
}
err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid);
out:
......@@ -1764,6 +1800,7 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
struct xfrm_userpolicy_info *p;
struct sk_buff *in_skb = sp->in_skb;
struct sk_buff *skb = sp->out_skb;
struct xfrm_translator *xtr;
struct nlmsghdr *nlh;
int err;
......@@ -1788,6 +1825,18 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
return err;
}
nlmsg_end(skb, nlh);
xtr = xfrm_get_translator();
if (xtr) {
err = xtr->alloc_compat(skb, nlh);
xfrm_put_translator(xtr);
if (err) {
nlmsg_cancel(skb, nlh);
return err;
}
}
return 0;
}
......@@ -2533,7 +2582,7 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
#define XMSGSIZE(type) sizeof(struct type)
static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
[XFRM_MSG_NEWSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info),
[XFRM_MSG_DELSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id),
[XFRM_MSG_GETSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id),
......@@ -2556,10 +2605,11 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
[XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = sizeof(u32),
[XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32),
};
EXPORT_SYMBOL_GPL(xfrm_msg_min);
#undef XMSGSIZE
static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_SA] = { .len = sizeof(struct xfrm_usersa_info)},
[XFRMA_POLICY] = { .len = sizeof(struct xfrm_userpolicy_info)},
[XFRMA_LASTUSED] = { .type = NLA_U64},
......@@ -2591,6 +2641,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_SET_MARK_MASK] = { .type = NLA_U32 },
[XFRMA_IF_ID] = { .type = NLA_U32 },
};
EXPORT_SYMBOL_GPL(xfrma_policy);
static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
[XFRMA_SPD_IPV4_HTHRESH] = { .len = sizeof(struct xfrmu_spdhthresh) },
......@@ -2640,11 +2691,9 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *net = sock_net(skb->sk);
struct nlattr *attrs[XFRMA_MAX+1];
const struct xfrm_link *link;
struct nlmsghdr *nlh64 = NULL;
int type, err;
if (in_compat_syscall())
return -EOPNOTSUPP;
type = nlh->nlmsg_type;
if (type > XFRM_MSG_MAX)
return -EINVAL;
......@@ -2656,32 +2705,55 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!netlink_net_capable(skb, CAP_NET_ADMIN))
return -EPERM;
if (in_compat_syscall()) {
struct xfrm_translator *xtr = xfrm_get_translator();
if (!xtr)
return -EOPNOTSUPP;
nlh64 = xtr->rcv_msg_compat(nlh, link->nla_max,
link->nla_pol, extack);
xfrm_put_translator(xtr);
if (IS_ERR(nlh64))
return PTR_ERR(nlh64);
if (nlh64)
nlh = nlh64;
}
if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) ||
type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) &&
(nlh->nlmsg_flags & NLM_F_DUMP)) {
if (link->dump == NULL)
return -EINVAL;
struct netlink_dump_control c = {
.start = link->start,
.dump = link->dump,
.done = link->done,
};
{
struct netlink_dump_control c = {
.start = link->start,
.dump = link->dump,
.done = link->done,
};
return netlink_dump_start(net->xfrm.nlsk, skb, nlh, &c);
if (link->dump == NULL) {
err = -EINVAL;
goto err;
}
err = netlink_dump_start(net->xfrm.nlsk, skb, nlh, &c);
goto err;
}
err = nlmsg_parse_deprecated(nlh, xfrm_msg_min[type], attrs,
link->nla_max ? : XFRMA_MAX,
link->nla_pol ? : xfrma_policy, extack);
if (err < 0)
return err;
goto err;
if (link->doit == NULL)
return -EINVAL;
if (link->doit == NULL) {
err = -EINVAL;
goto err;
}
return link->doit(skb, nlh, attrs);
err = link->doit(skb, nlh, attrs);
err:
kvfree(nlh64);
return err;
}
static void xfrm_netlink_rcv(struct sk_buff *skb)
......
# SPDX-License-Identifier: GPL-2.0-only
ipsec
msg_zerocopy
socket
psock_fanout
......
......@@ -29,6 +29,7 @@ TEST_GEN_FILES += tcp_fastopen_backup_key
TEST_GEN_FILES += fin_ack_lat
TEST_GEN_FILES += reuseaddr_ports_exhausted
TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
TEST_GEN_FILES += ipsec
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment