Commit 90be7a5c authored by David S. Miller's avatar David S. Miller

Merge tag 'nf-24-04-11' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

netfilter pull request 24-04-11

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contains Netfilter fixes for net:

Patches #1 and #2 add missing rcu read side lock when iterating over
expression and object type list which could race with module removal.

Patch #3 prevents promisc packet from visiting the bridge/input hook
	 to amend a recent fix to address conntrack confirmation race
	 in br_netfilter and nf_conntrack_bridge.

Patch #4 adds and uses iterate decorator type to fetch the current
	 pipapo set backend datastructure view when netlink dumps the
	 set elements.

Patch #5 fixes removal of duplicate elements in the pipapo set backend.

Patch #6 flowtable validates pppoe header before accessing it.

Patch #7 fixes flowtable datapath for pppoe packets, otherwise lookup
         fails and pppoe packets follow classic path.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 2ae9a897 6db5dc7b
...@@ -336,7 +336,7 @@ int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow, ...@@ -336,7 +336,7 @@ int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow,
int nf_flow_table_offload_init(void); int nf_flow_table_offload_init(void);
void nf_flow_table_offload_exit(void); void nf_flow_table_offload_exit(void);
static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb)
{ {
__be16 proto; __be16 proto;
...@@ -352,6 +352,16 @@ static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) ...@@ -352,6 +352,16 @@ static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
return 0; return 0;
} }
static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto)
{
if (!pskb_may_pull(skb, PPPOE_SES_HLEN))
return false;
*inner_proto = __nf_flow_pppoe_proto(skb);
return true;
}
#define NF_FLOW_TABLE_STAT_INC(net, count) __this_cpu_inc((net)->ft.stat->count) #define NF_FLOW_TABLE_STAT_INC(net, count) __this_cpu_inc((net)->ft.stat->count)
#define NF_FLOW_TABLE_STAT_DEC(net, count) __this_cpu_dec((net)->ft.stat->count) #define NF_FLOW_TABLE_STAT_DEC(net, count) __this_cpu_dec((net)->ft.stat->count)
#define NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count) \ #define NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count) \
......
...@@ -307,9 +307,23 @@ static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv) ...@@ -307,9 +307,23 @@ static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv)
return (void *)priv; return (void *)priv;
} }
/**
* enum nft_iter_type - nftables set iterator type
*
* @NFT_ITER_READ: read-only iteration over set elements
* @NFT_ITER_UPDATE: iteration under mutex to update set element state
*/
enum nft_iter_type {
NFT_ITER_UNSPEC,
NFT_ITER_READ,
NFT_ITER_UPDATE,
};
struct nft_set; struct nft_set;
struct nft_set_iter { struct nft_set_iter {
u8 genmask; u8 genmask;
enum nft_iter_type type:8;
unsigned int count; unsigned int count;
unsigned int skip; unsigned int skip;
int err; int err;
......
...@@ -30,7 +30,7 @@ br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) ...@@ -30,7 +30,7 @@ br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
return netif_receive_skb(skb); return netif_receive_skb(skb);
} }
static int br_pass_frame_up(struct sk_buff *skb) static int br_pass_frame_up(struct sk_buff *skb, bool promisc)
{ {
struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
struct net_bridge *br = netdev_priv(brdev); struct net_bridge *br = netdev_priv(brdev);
...@@ -65,6 +65,8 @@ static int br_pass_frame_up(struct sk_buff *skb) ...@@ -65,6 +65,8 @@ static int br_pass_frame_up(struct sk_buff *skb)
br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb), br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb),
BR_MCAST_DIR_TX); BR_MCAST_DIR_TX);
BR_INPUT_SKB_CB(skb)->promisc = promisc;
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
dev_net(indev), NULL, skb, indev, NULL, dev_net(indev), NULL, skb, indev, NULL,
br_netif_receive_skb); br_netif_receive_skb);
...@@ -82,6 +84,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb ...@@ -82,6 +84,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
struct net_bridge_mcast *brmctx; struct net_bridge_mcast *brmctx;
struct net_bridge_vlan *vlan; struct net_bridge_vlan *vlan;
struct net_bridge *br; struct net_bridge *br;
bool promisc;
u16 vid = 0; u16 vid = 0;
u8 state; u8 state;
...@@ -137,7 +140,9 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb ...@@ -137,7 +140,9 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
if (p->flags & BR_LEARNING) if (p->flags & BR_LEARNING)
br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0); br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0);
local_rcv = !!(br->dev->flags & IFF_PROMISC); promisc = !!(br->dev->flags & IFF_PROMISC);
local_rcv = promisc;
if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) { if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) {
/* by definition the broadcast is also a multicast address */ /* by definition the broadcast is also a multicast address */
if (is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) { if (is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) {
...@@ -200,7 +205,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb ...@@ -200,7 +205,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
unsigned long now = jiffies; unsigned long now = jiffies;
if (test_bit(BR_FDB_LOCAL, &dst->flags)) if (test_bit(BR_FDB_LOCAL, &dst->flags))
return br_pass_frame_up(skb); return br_pass_frame_up(skb, false);
if (now != dst->used) if (now != dst->used)
dst->used = now; dst->used = now;
...@@ -213,7 +218,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb ...@@ -213,7 +218,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
} }
if (local_rcv) if (local_rcv)
return br_pass_frame_up(skb); return br_pass_frame_up(skb, promisc);
out: out:
return 0; return 0;
...@@ -386,6 +391,8 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb) ...@@ -386,6 +391,8 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
goto forward; goto forward;
} }
BR_INPUT_SKB_CB(skb)->promisc = false;
/* The else clause should be hit when nf_hook(): /* The else clause should be hit when nf_hook():
* - returns < 0 (drop/error) * - returns < 0 (drop/error)
* - returns = 0 (stolen/nf_queue) * - returns = 0 (stolen/nf_queue)
......
...@@ -600,11 +600,17 @@ static unsigned int br_nf_local_in(void *priv, ...@@ -600,11 +600,17 @@ static unsigned int br_nf_local_in(void *priv,
struct sk_buff *skb, struct sk_buff *skb,
const struct nf_hook_state *state) const struct nf_hook_state *state)
{ {
bool promisc = BR_INPUT_SKB_CB(skb)->promisc;
struct nf_conntrack *nfct = skb_nfct(skb); struct nf_conntrack *nfct = skb_nfct(skb);
const struct nf_ct_hook *ct_hook; const struct nf_ct_hook *ct_hook;
struct nf_conn *ct; struct nf_conn *ct;
int ret; int ret;
if (promisc) {
nf_reset_ct(skb);
return NF_ACCEPT;
}
if (!nfct || skb->pkt_type == PACKET_HOST) if (!nfct || skb->pkt_type == PACKET_HOST)
return NF_ACCEPT; return NF_ACCEPT;
......
...@@ -589,6 +589,7 @@ struct br_input_skb_cb { ...@@ -589,6 +589,7 @@ struct br_input_skb_cb {
#endif #endif
u8 proxyarp_replied:1; u8 proxyarp_replied:1;
u8 src_port_isolated:1; u8 src_port_isolated:1;
u8 promisc:1;
#ifdef CONFIG_BRIDGE_VLAN_FILTERING #ifdef CONFIG_BRIDGE_VLAN_FILTERING
u8 vlan_filtered:1; u8 vlan_filtered:1;
#endif #endif
......
...@@ -294,18 +294,24 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, ...@@ -294,18 +294,24 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb, static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state) const struct nf_hook_state *state)
{ {
enum ip_conntrack_info ctinfo; bool promisc = BR_INPUT_SKB_CB(skb)->promisc;
struct nf_conntrack *nfct = skb_nfct(skb);
struct nf_conn *ct; struct nf_conn *ct;
if (skb->pkt_type == PACKET_HOST) if (promisc) {
nf_reset_ct(skb);
return NF_ACCEPT;
}
if (!nfct || skb->pkt_type == PACKET_HOST)
return NF_ACCEPT; return NF_ACCEPT;
/* nf_conntrack_confirm() cannot handle concurrent clones, /* nf_conntrack_confirm() cannot handle concurrent clones,
* this happens for broad/multicast frames with e.g. macvlan on top * this happens for broad/multicast frames with e.g. macvlan on top
* of the bridge device. * of the bridge device.
*/ */
ct = nf_ct_get(skb, &ctinfo); ct = container_of(nfct, struct nf_conn, ct_general);
if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) if (nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
return NF_ACCEPT; return NF_ACCEPT;
/* let inet prerouting call conntrack again */ /* let inet prerouting call conntrack again */
......
...@@ -21,7 +21,8 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb, ...@@ -21,7 +21,8 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
proto = veth->h_vlan_encapsulated_proto; proto = veth->h_vlan_encapsulated_proto;
break; break;
case htons(ETH_P_PPP_SES): case htons(ETH_P_PPP_SES):
proto = nf_flow_pppoe_proto(skb); if (!nf_flow_pppoe_proto(skb, &proto))
return NF_ACCEPT;
break; break;
default: default:
proto = skb->protocol; proto = skb->protocol;
......
...@@ -157,7 +157,7 @@ static void nf_flow_tuple_encap(struct sk_buff *skb, ...@@ -157,7 +157,7 @@ static void nf_flow_tuple_encap(struct sk_buff *skb,
tuple->encap[i].proto = skb->protocol; tuple->encap[i].proto = skb->protocol;
break; break;
case htons(ETH_P_PPP_SES): case htons(ETH_P_PPP_SES):
phdr = (struct pppoe_hdr *)skb_mac_header(skb); phdr = (struct pppoe_hdr *)skb_network_header(skb);
tuple->encap[i].id = ntohs(phdr->sid); tuple->encap[i].id = ntohs(phdr->sid);
tuple->encap[i].proto = skb->protocol; tuple->encap[i].proto = skb->protocol;
break; break;
...@@ -273,10 +273,11 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, ...@@ -273,10 +273,11 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
return NF_STOLEN; return NF_STOLEN;
} }
static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto, static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
u32 *offset) u32 *offset)
{ {
struct vlan_ethhdr *veth; struct vlan_ethhdr *veth;
__be16 inner_proto;
switch (skb->protocol) { switch (skb->protocol) {
case htons(ETH_P_8021Q): case htons(ETH_P_8021Q):
...@@ -287,7 +288,8 @@ static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto, ...@@ -287,7 +288,8 @@ static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
} }
break; break;
case htons(ETH_P_PPP_SES): case htons(ETH_P_PPP_SES):
if (nf_flow_pppoe_proto(skb) == proto) { if (nf_flow_pppoe_proto(skb, &inner_proto) &&
inner_proto == proto) {
*offset += PPPOE_SES_HLEN; *offset += PPPOE_SES_HLEN;
return true; return true;
} }
...@@ -316,7 +318,7 @@ static void nf_flow_encap_pop(struct sk_buff *skb, ...@@ -316,7 +318,7 @@ static void nf_flow_encap_pop(struct sk_buff *skb,
skb_reset_network_header(skb); skb_reset_network_header(skb);
break; break;
case htons(ETH_P_PPP_SES): case htons(ETH_P_PPP_SES):
skb->protocol = nf_flow_pppoe_proto(skb); skb->protocol = __nf_flow_pppoe_proto(skb);
skb_pull(skb, PPPOE_SES_HLEN); skb_pull(skb, PPPOE_SES_HLEN);
skb_reset_network_header(skb); skb_reset_network_header(skb);
break; break;
......
...@@ -626,6 +626,7 @@ static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set) ...@@ -626,6 +626,7 @@ static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set)
{ {
struct nft_set_iter iter = { struct nft_set_iter iter = {
.genmask = nft_genmask_next(ctx->net), .genmask = nft_genmask_next(ctx->net),
.type = NFT_ITER_UPDATE,
.fn = nft_mapelem_deactivate, .fn = nft_mapelem_deactivate,
}; };
...@@ -3060,7 +3061,7 @@ static const struct nft_expr_type *__nft_expr_type_get(u8 family, ...@@ -3060,7 +3061,7 @@ static const struct nft_expr_type *__nft_expr_type_get(u8 family,
{ {
const struct nft_expr_type *type, *candidate = NULL; const struct nft_expr_type *type, *candidate = NULL;
list_for_each_entry(type, &nf_tables_expressions, list) { list_for_each_entry_rcu(type, &nf_tables_expressions, list) {
if (!nla_strcmp(nla, type->name)) { if (!nla_strcmp(nla, type->name)) {
if (!type->family && !candidate) if (!type->family && !candidate)
candidate = type; candidate = type;
...@@ -3092,9 +3093,13 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net, ...@@ -3092,9 +3093,13 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net,
if (nla == NULL) if (nla == NULL)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
rcu_read_lock();
type = __nft_expr_type_get(family, nla); type = __nft_expr_type_get(family, nla);
if (type != NULL && try_module_get(type->owner)) if (type != NULL && try_module_get(type->owner)) {
rcu_read_unlock();
return type; return type;
}
rcu_read_unlock();
lockdep_nfnl_nft_mutex_not_held(); lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES #ifdef CONFIG_MODULES
...@@ -5441,6 +5446,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, ...@@ -5441,6 +5446,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
} }
iter.genmask = nft_genmask_next(ctx->net); iter.genmask = nft_genmask_next(ctx->net);
iter.type = NFT_ITER_UPDATE;
iter.skip = 0; iter.skip = 0;
iter.count = 0; iter.count = 0;
iter.err = 0; iter.err = 0;
...@@ -5514,6 +5520,7 @@ static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set) ...@@ -5514,6 +5520,7 @@ static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set)
{ {
struct nft_set_iter iter = { struct nft_set_iter iter = {
.genmask = nft_genmask_next(ctx->net), .genmask = nft_genmask_next(ctx->net),
.type = NFT_ITER_UPDATE,
.fn = nft_mapelem_activate, .fn = nft_mapelem_activate,
}; };
...@@ -5888,6 +5895,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -5888,6 +5895,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
args.skb = skb; args.skb = skb;
args.reset = dump_ctx->reset; args.reset = dump_ctx->reset;
args.iter.genmask = nft_genmask_cur(net); args.iter.genmask = nft_genmask_cur(net);
args.iter.type = NFT_ITER_READ;
args.iter.skip = cb->args[0]; args.iter.skip = cb->args[0];
args.iter.count = 0; args.iter.count = 0;
args.iter.err = 0; args.iter.err = 0;
...@@ -7372,6 +7380,7 @@ static int nft_set_flush(struct nft_ctx *ctx, struct nft_set *set, u8 genmask) ...@@ -7372,6 +7380,7 @@ static int nft_set_flush(struct nft_ctx *ctx, struct nft_set *set, u8 genmask)
{ {
struct nft_set_iter iter = { struct nft_set_iter iter = {
.genmask = genmask, .genmask = genmask,
.type = NFT_ITER_UPDATE,
.fn = nft_setelem_flush, .fn = nft_setelem_flush,
}; };
...@@ -7607,7 +7616,7 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family) ...@@ -7607,7 +7616,7 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family)
{ {
const struct nft_object_type *type; const struct nft_object_type *type;
list_for_each_entry(type, &nf_tables_objects, list) { list_for_each_entry_rcu(type, &nf_tables_objects, list) {
if (type->family != NFPROTO_UNSPEC && if (type->family != NFPROTO_UNSPEC &&
type->family != family) type->family != family)
continue; continue;
...@@ -7623,9 +7632,13 @@ nft_obj_type_get(struct net *net, u32 objtype, u8 family) ...@@ -7623,9 +7632,13 @@ nft_obj_type_get(struct net *net, u32 objtype, u8 family)
{ {
const struct nft_object_type *type; const struct nft_object_type *type;
rcu_read_lock();
type = __nft_obj_type_get(objtype, family); type = __nft_obj_type_get(objtype, family);
if (type != NULL && try_module_get(type->owner)) if (type != NULL && try_module_get(type->owner)) {
rcu_read_unlock();
return type; return type;
}
rcu_read_unlock();
lockdep_nfnl_nft_mutex_not_held(); lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES #ifdef CONFIG_MODULES
...@@ -10871,6 +10884,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, ...@@ -10871,6 +10884,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
continue; continue;
iter.genmask = nft_genmask_next(ctx->net); iter.genmask = nft_genmask_next(ctx->net);
iter.type = NFT_ITER_UPDATE;
iter.skip = 0; iter.skip = 0;
iter.count = 0; iter.count = 0;
iter.err = 0; iter.err = 0;
......
...@@ -2077,6 +2077,8 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, ...@@ -2077,6 +2077,8 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
rules_fx = rules_f0; rules_fx = rules_f0;
nft_pipapo_for_each_field(f, i, m) { nft_pipapo_for_each_field(f, i, m) {
bool last = i == m->field_count - 1;
if (!pipapo_match_field(f, start, rules_fx, if (!pipapo_match_field(f, start, rules_fx,
match_start, match_end)) match_start, match_end))
break; break;
...@@ -2089,16 +2091,18 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, ...@@ -2089,16 +2091,18 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
}
if (i == m->field_count) { if (last && f->mt[rulemap[i].to].e == e) {
priv->dirty = true; priv->dirty = true;
pipapo_drop(m, rulemap); pipapo_drop(m, rulemap);
return; return;
}
} }
first_rule += rules_f0; first_rule += rules_f0;
} }
WARN_ON_ONCE(1); /* elem_priv not found */
} }
/** /**
...@@ -2115,13 +2119,14 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, ...@@ -2115,13 +2119,14 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_iter *iter) struct nft_set_iter *iter)
{ {
struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo *priv = nft_set_priv(set);
struct net *net = read_pnet(&set->net);
const struct nft_pipapo_match *m; const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f; const struct nft_pipapo_field *f;
unsigned int i, r; unsigned int i, r;
WARN_ON_ONCE(iter->type == NFT_ITER_UNSPEC);
rcu_read_lock(); rcu_read_lock();
if (iter->genmask == nft_genmask_cur(net)) if (iter->type == NFT_ITER_READ)
m = rcu_dereference(priv->match); m = rcu_dereference(priv->match);
else else
m = priv->clone; m = priv->clone;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment