Commit 6badad1c authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contains Netfilter fixes for net:

1) Missing netlink attribute sanity check for NFTA_OSF_DREG,
   from Florian Westphal.

2) Use bitmap infrastructure in ipset to fix KASAN slab-out-of-bounds
   reads, from Jozsef Kadlecsik.

3) Missing initial CLOSED state in new sctp connection through
   ctnetlink events, from Jiri Wiesner.

4) Missing check for NFT_CHAIN_HW_OFFLOAD in nf_tables offload
   indirect block infrastructure, from wenxu.

5) Add __nft_chain_type_get() to sanity check family and chain type.

6) Autoload modules from the nf_tables abort path to fix races
   reported by syzbot.

7) Remove unnecessary skb->csum update on inet_proto_csum_replace16(),
   from Praveen Chaudhary.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 722943a5 189c9b1e
......@@ -426,13 +426,6 @@ ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr)
sizeof(*addr));
}
/* Calculate the bytes required to store the inclusive range of a-b */
static inline int
bitmap_bytes(u32 a, u32 b)
{
return 4 * ((((b - a + 8) / 8) + 3) / 4);
}
/* How often should the gc be run by default */
#define IPSET_GC_TIME (3 * 60)
......
......@@ -31,7 +31,7 @@ struct nfnetlink_subsystem {
const struct nfnl_callback *cb; /* callback for individual types */
struct module *owner;
int (*commit)(struct net *net, struct sk_buff *skb);
int (*abort)(struct net *net, struct sk_buff *skb);
int (*abort)(struct net *net, struct sk_buff *skb, bool autoload);
void (*cleanup)(struct net *net);
bool (*valid_genid)(struct net *net, u32 genid);
};
......
......@@ -7,6 +7,7 @@
struct netns_nftables {
struct list_head tables;
struct list_head commit_list;
struct list_head module_list;
struct mutex commit_mutex;
unsigned int base_seq;
u8 gencursor;
......
......@@ -438,6 +438,23 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
}
EXPORT_SYMBOL(inet_proto_csum_replace4);
/**
* inet_proto_csum_replace16 - update layer 4 header checksum field
* @sum: Layer 4 header checksum field
* @skb: sk_buff for the packet
* @from: old IPv6 address
* @to: new IPv6 address
* @pseudohdr: True if layer 4 header checksum includes pseudoheader
*
* Update layer 4 header as per the update in IPv6 src/dst address.
*
* There is no need to update skb->csum in this function, because update in two
* fields a.) IPv6 src/dst address and b.) L4 header checksum cancels each other
* for skb->csum calculation. Whereas inet_proto_csum_replace4 function needs to
* update skb->csum, because update in 3 fields a.) IPv4 src/dst address,
* b.) IPv4 Header checksum and c.) L4 header checksum results in same diff as
* L4 Header checksum for skb->csum calculation.
*/
void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
const __be32 *from, const __be32 *to,
bool pseudohdr)
......@@ -449,9 +466,6 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
if (skb->ip_summed != CHECKSUM_PARTIAL) {
*sum = csum_fold(csum_partial(diff, sizeof(diff),
~csum_unfold(*sum)));
if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
skb->csum = ~csum_partial(diff, sizeof(diff),
~skb->csum);
} else if (pseudohdr)
*sum = ~csum_fold(csum_partial(diff, sizeof(diff),
csum_unfold(*sum)));
......
......@@ -75,7 +75,7 @@ mtype_flush(struct ip_set *set)
if (set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
memset(map->members, 0, map->memsize);
bitmap_zero(map->members, map->elements);
set->elements = 0;
set->ext_size = 0;
}
......
......@@ -37,7 +37,7 @@ MODULE_ALIAS("ip_set_bitmap:ip");
/* Type structure */
struct bitmap_ip {
void *members; /* the set members */
unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
......@@ -220,7 +220,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
u32 first_ip, u32 last_ip,
u32 elements, u32 hosts, u8 netmask)
{
map->members = ip_set_alloc(map->memsize);
map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
......@@ -322,7 +322,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
map->memsize = bitmap_bytes(0, elements - 1);
map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ip;
if (!init_map_ip(set, map, first_ip, last_ip,
elements, hosts, netmask)) {
......
......@@ -42,7 +42,7 @@ enum {
/* Type structure */
struct bitmap_ipmac {
void *members; /* the set members */
unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
......@@ -299,7 +299,7 @@ static bool
init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
u32 first_ip, u32 last_ip, u32 elements)
{
map->members = ip_set_alloc(map->memsize);
map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
......@@ -360,7 +360,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
map->memsize = bitmap_bytes(0, elements - 1);
map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ipmac;
if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
kfree(map);
......
......@@ -30,7 +30,7 @@ MODULE_ALIAS("ip_set_bitmap:port");
/* Type structure */
struct bitmap_port {
void *members; /* the set members */
unsigned long *members; /* the set members */
u16 first_port; /* host byte order, included in range */
u16 last_port; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
......@@ -231,7 +231,7 @@ static bool
init_map_port(struct ip_set *set, struct bitmap_port *map,
u16 first_port, u16 last_port)
{
map->members = ip_set_alloc(map->memsize);
map->members = bitmap_zalloc(map->elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_port = first_port;
......@@ -271,7 +271,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
return -ENOMEM;
map->elements = elements;
map->memsize = bitmap_bytes(0, map->elements);
map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_port;
if (!init_map_port(set, map, first_port, last_port)) {
kfree(map);
......
......@@ -114,7 +114,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */
/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA},
/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA},
/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},
/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS},
......@@ -130,7 +130,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
/* REPLY */
/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */
/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* INIT in sCL Big TODO */
/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},
/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},
/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV, sCL},
/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV, sSR},
/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV, sHA},
......@@ -316,7 +316,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
}
ct->proto.sctp.state = new_state;
ct->proto.sctp.state = SCTP_CONNTRACK_NONE;
}
return true;
......
......@@ -552,48 +552,71 @@ static inline u64 nf_tables_alloc_handle(struct nft_table *table)
static const struct nft_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX];
static const struct nft_chain_type *
__nft_chain_type_get(u8 family, enum nft_chain_types type)
{
if (family >= NFPROTO_NUMPROTO ||
type >= NFT_CHAIN_T_MAX)
return NULL;
return chain_type[family][type];
}
static const struct nft_chain_type *
__nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family)
{
const struct nft_chain_type *type;
int i;
for (i = 0; i < NFT_CHAIN_T_MAX; i++) {
if (chain_type[family][i] != NULL &&
!nla_strcmp(nla, chain_type[family][i]->name))
return chain_type[family][i];
type = __nft_chain_type_get(family, i);
if (!type)
continue;
if (!nla_strcmp(nla, type->name))
return type;
}
return NULL;
}
/*
* Loading a module requires dropping mutex that guards the transaction.
* A different client might race to start a new transaction meanwhile. Zap the
* list of pending transaction and then restore it once the mutex is grabbed
* again. Users of this function return EAGAIN which implicitly triggers the
* transaction abort path to clean up the list of pending transactions.
*/
struct nft_module_request {
struct list_head list;
char module[MODULE_NAME_LEN];
bool done;
};
#ifdef CONFIG_MODULES
static void nft_request_module(struct net *net, const char *fmt, ...)
static int nft_request_module(struct net *net, const char *fmt, ...)
{
char module_name[MODULE_NAME_LEN];
LIST_HEAD(commit_list);
struct nft_module_request *req;
va_list args;
int ret;
list_splice_init(&net->nft.commit_list, &commit_list);
va_start(args, fmt);
ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
va_end(args);
if (ret >= MODULE_NAME_LEN)
return;
return 0;
mutex_unlock(&net->nft.commit_mutex);
request_module("%s", module_name);
mutex_lock(&net->nft.commit_mutex);
list_for_each_entry(req, &net->nft.module_list, list) {
if (!strcmp(req->module, module_name)) {
if (req->done)
return 0;
WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
list_splice(&commit_list, &net->nft.commit_list);
/* A request to load this module already exists. */
return -EAGAIN;
}
}
req = kmalloc(sizeof(*req), GFP_KERNEL);
if (!req)
return -ENOMEM;
req->done = false;
strlcpy(req->module, module_name, MODULE_NAME_LEN);
list_add_tail(&req->list, &net->nft.module_list);
return -EAGAIN;
}
#endif
......@@ -617,10 +640,9 @@ nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla,
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
if (autoload) {
nft_request_module(net, "nft-chain-%u-%.*s", family,
nla_len(nla), (const char *)nla_data(nla));
type = __nf_tables_chain_type_lookup(nla, family);
if (type != NULL)
if (nft_request_module(net, "nft-chain-%u-%.*s", family,
nla_len(nla),
(const char *)nla_data(nla)) == -EAGAIN)
return ERR_PTR(-EAGAIN);
}
#endif
......@@ -1162,11 +1184,8 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
void nft_register_chain_type(const struct nft_chain_type *ctype)
{
if (WARN_ON(ctype->family >= NFPROTO_NUMPROTO))
return;
nfnl_lock(NFNL_SUBSYS_NFTABLES);
if (WARN_ON(chain_type[ctype->family][ctype->type] != NULL)) {
if (WARN_ON(__nft_chain_type_get(ctype->family, ctype->type))) {
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
return;
}
......@@ -1768,7 +1787,10 @@ static int nft_chain_parse_hook(struct net *net,
hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
type = chain_type[family][NFT_CHAIN_T_DEFAULT];
type = __nft_chain_type_get(family, NFT_CHAIN_T_DEFAULT);
if (!type)
return -EOPNOTSUPP;
if (nla[NFTA_CHAIN_TYPE]) {
type = nf_tables_chain_type_lookup(net, nla[NFTA_CHAIN_TYPE],
family, autoload);
......@@ -2328,9 +2350,8 @@ static const struct nft_expr_type *__nft_expr_type_get(u8 family,
static int nft_expr_type_request_module(struct net *net, u8 family,
struct nlattr *nla)
{
nft_request_module(net, "nft-expr-%u-%.*s", family,
nla_len(nla), (char *)nla_data(nla));
if (__nft_expr_type_get(family, nla))
if (nft_request_module(net, "nft-expr-%u-%.*s", family,
nla_len(nla), (char *)nla_data(nla)) == -EAGAIN)
return -EAGAIN;
return 0;
......@@ -2356,9 +2377,9 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net,
if (nft_expr_type_request_module(net, family, nla) == -EAGAIN)
return ERR_PTR(-EAGAIN);
nft_request_module(net, "nft-expr-%.*s",
nla_len(nla), (char *)nla_data(nla));
if (__nft_expr_type_get(family, nla))
if (nft_request_module(net, "nft-expr-%.*s",
nla_len(nla),
(char *)nla_data(nla)) == -EAGAIN)
return ERR_PTR(-EAGAIN);
}
#endif
......@@ -2449,9 +2470,10 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,
err = PTR_ERR(ops);
#ifdef CONFIG_MODULES
if (err == -EAGAIN)
nft_expr_type_request_module(ctx->net,
ctx->family,
tb[NFTA_EXPR_NAME]);
if (nft_expr_type_request_module(ctx->net,
ctx->family,
tb[NFTA_EXPR_NAME]) != -EAGAIN)
err = -ENOENT;
#endif
goto err1;
}
......@@ -3288,8 +3310,7 @@ nft_select_set_ops(const struct nft_ctx *ctx,
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
if (list_empty(&nf_tables_set_types)) {
nft_request_module(ctx->net, "nft-set");
if (!list_empty(&nf_tables_set_types))
if (nft_request_module(ctx->net, "nft-set") == -EAGAIN)
return ERR_PTR(-EAGAIN);
}
#endif
......@@ -5415,8 +5436,7 @@ nft_obj_type_get(struct net *net, u32 objtype)
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
if (type == NULL) {
nft_request_module(net, "nft-obj-%u", objtype);
if (__nft_obj_type_get(objtype))
if (nft_request_module(net, "nft-obj-%u", objtype) == -EAGAIN)
return ERR_PTR(-EAGAIN);
}
#endif
......@@ -5989,8 +6009,7 @@ nft_flowtable_type_get(struct net *net, u8 family)
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
if (type == NULL) {
nft_request_module(net, "nf-flowtable-%u", family);
if (__nft_flowtable_type_get(family))
if (nft_request_module(net, "nf-flowtable-%u", family) == -EAGAIN)
return ERR_PTR(-EAGAIN);
}
#endif
......@@ -6992,6 +7011,18 @@ static void nft_chain_del(struct nft_chain *chain)
list_del_rcu(&chain->list);
}
static void nf_tables_module_autoload_cleanup(struct net *net)
{
struct nft_module_request *req, *next;
WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
list_for_each_entry_safe(req, next, &net->nft.module_list, list) {
WARN_ON_ONCE(!req->done);
list_del(&req->list);
kfree(req);
}
}
static void nf_tables_commit_release(struct net *net)
{
struct nft_trans *trans;
......@@ -7004,6 +7035,7 @@ static void nf_tables_commit_release(struct net *net)
* to prevent expensive synchronize_rcu() in commit phase.
*/
if (list_empty(&net->nft.commit_list)) {
nf_tables_module_autoload_cleanup(net);
mutex_unlock(&net->nft.commit_mutex);
return;
}
......@@ -7018,6 +7050,7 @@ static void nf_tables_commit_release(struct net *net)
list_splice_tail_init(&net->nft.commit_list, &nf_tables_destroy_list);
spin_unlock(&nf_tables_destroy_list_lock);
nf_tables_module_autoload_cleanup(net);
mutex_unlock(&net->nft.commit_mutex);
schedule_work(&trans_destroy_work);
......@@ -7209,6 +7242,26 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
return 0;
}
static void nf_tables_module_autoload(struct net *net)
{
struct nft_module_request *req, *next;
LIST_HEAD(module_list);
list_splice_init(&net->nft.module_list, &module_list);
mutex_unlock(&net->nft.commit_mutex);
list_for_each_entry_safe(req, next, &module_list, list) {
if (req->done) {
list_del(&req->list);
kfree(req);
} else {
request_module("%s", req->module);
req->done = true;
}
}
mutex_lock(&net->nft.commit_mutex);
list_splice(&module_list, &net->nft.module_list);
}
static void nf_tables_abort_release(struct nft_trans *trans)
{
switch (trans->msg_type) {
......@@ -7238,7 +7291,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
kfree(trans);
}
static int __nf_tables_abort(struct net *net)
static int __nf_tables_abort(struct net *net, bool autoload)
{
struct nft_trans *trans, *next;
struct nft_trans_elem *te;
......@@ -7360,6 +7413,11 @@ static int __nf_tables_abort(struct net *net)
nf_tables_abort_release(trans);
}
if (autoload)
nf_tables_module_autoload(net);
else
nf_tables_module_autoload_cleanup(net);
return 0;
}
......@@ -7368,9 +7426,9 @@ static void nf_tables_cleanup(struct net *net)
nft_validate_state_update(net, NFT_VALIDATE_SKIP);
}
static int nf_tables_abort(struct net *net, struct sk_buff *skb)
static int nf_tables_abort(struct net *net, struct sk_buff *skb, bool autoload)
{
int ret = __nf_tables_abort(net);
int ret = __nf_tables_abort(net, autoload);
mutex_unlock(&net->nft.commit_mutex);
......@@ -7965,6 +8023,7 @@ static int __net_init nf_tables_init_net(struct net *net)
{
INIT_LIST_HEAD(&net->nft.tables);
INIT_LIST_HEAD(&net->nft.commit_list);
INIT_LIST_HEAD(&net->nft.module_list);
mutex_init(&net->nft.commit_mutex);
net->nft.base_seq = 1;
net->nft.validate_state = NFT_VALIDATE_SKIP;
......@@ -7976,7 +8035,7 @@ static void __net_exit nf_tables_exit_net(struct net *net)
{
mutex_lock(&net->nft.commit_mutex);
if (!list_empty(&net->nft.commit_list))
__nf_tables_abort(net);
__nf_tables_abort(net, false);
__nft_release_tables(net);
mutex_unlock(&net->nft.commit_mutex);
WARN_ON_ONCE(!list_empty(&net->nft.tables));
......
......@@ -564,7 +564,7 @@ static void nft_indr_block_cb(struct net_device *dev,
mutex_lock(&net->nft.commit_mutex);
chain = __nft_offload_get_chain(dev);
if (chain) {
if (chain && chain->flags & NFT_CHAIN_HW_OFFLOAD) {
struct nft_base_chain *basechain;
basechain = nft_base_chain(chain);
......
......@@ -476,7 +476,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
}
done:
if (status & NFNL_BATCH_REPLAY) {
ss->abort(net, oskb);
ss->abort(net, oskb, true);
nfnl_err_reset(&err_list);
kfree_skb(skb);
module_put(ss->owner);
......@@ -487,11 +487,11 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
status |= NFNL_BATCH_REPLAY;
goto done;
} else if (err) {
ss->abort(net, oskb);
ss->abort(net, oskb, false);
netlink_ack(oskb, nlmsg_hdr(oskb), err, NULL);
}
} else {
ss->abort(net, oskb);
ss->abort(net, oskb, false);
}
if (ss->cleanup)
ss->cleanup(net);
......
......@@ -61,6 +61,9 @@ static int nft_osf_init(const struct nft_ctx *ctx,
int err;
u8 ttl;
if (!tb[NFTA_OSF_DREG])
return -EINVAL;
if (tb[NFTA_OSF_TTL]) {
ttl = nla_get_u8(tb[NFTA_OSF_TTL]);
if (ttl > 2)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment