Commit 90be7a5c authored by David S. Miller's avatar David S. Miller

Merge tag 'nf-24-04-11' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

netfilter pull request 24-04-11

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contains Netfilter fixes for net:

Patches #1 and #2 add missing rcu read side lock when iterating over
expression and object type list which could race with module removal.

Patch #3 prevents promisc packet from visiting the bridge/input hook
	 to amend a recent fix to address conntrack confirmation race
	 in br_netfilter and nf_conntrack_bridge.

Patch #4 adds and uses iterate decorator type to fetch the current
	 pipapo set backend datastructure view when netlink dumps the
	 set elements.

Patch #5 fixes removal of duplicate elements in the pipapo set backend.

Patch #6 flowtable validates pppoe header before accessing it.

Patch #7 fixes flowtable datapath for pppoe packets, otherwise lookup
         fails and pppoe packets follow classic path.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 2ae9a897 6db5dc7b
......@@ -336,7 +336,7 @@ int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow,
int nf_flow_table_offload_init(void);
void nf_flow_table_offload_exit(void);
static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb)
{
__be16 proto;
......@@ -352,6 +352,16 @@ static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
return 0;
}
static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto)
{
if (!pskb_may_pull(skb, PPPOE_SES_HLEN))
return false;
*inner_proto = __nf_flow_pppoe_proto(skb);
return true;
}
#define NF_FLOW_TABLE_STAT_INC(net, count) __this_cpu_inc((net)->ft.stat->count)
#define NF_FLOW_TABLE_STAT_DEC(net, count) __this_cpu_dec((net)->ft.stat->count)
#define NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count) \
......
......@@ -307,9 +307,23 @@ static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv)
return (void *)priv;
}
/**
* enum nft_iter_type - nftables set iterator type
*
* @NFT_ITER_READ: read-only iteration over set elements
* @NFT_ITER_UPDATE: iteration under mutex to update set element state
*/
enum nft_iter_type {
NFT_ITER_UNSPEC,
NFT_ITER_READ,
NFT_ITER_UPDATE,
};
struct nft_set;
struct nft_set_iter {
u8 genmask;
enum nft_iter_type type:8;
unsigned int count;
unsigned int skip;
int err;
......
......@@ -30,7 +30,7 @@ br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
return netif_receive_skb(skb);
}
static int br_pass_frame_up(struct sk_buff *skb)
static int br_pass_frame_up(struct sk_buff *skb, bool promisc)
{
struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
struct net_bridge *br = netdev_priv(brdev);
......@@ -65,6 +65,8 @@ static int br_pass_frame_up(struct sk_buff *skb)
br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb),
BR_MCAST_DIR_TX);
BR_INPUT_SKB_CB(skb)->promisc = promisc;
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
dev_net(indev), NULL, skb, indev, NULL,
br_netif_receive_skb);
......@@ -82,6 +84,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
struct net_bridge_mcast *brmctx;
struct net_bridge_vlan *vlan;
struct net_bridge *br;
bool promisc;
u16 vid = 0;
u8 state;
......@@ -137,7 +140,9 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
if (p->flags & BR_LEARNING)
br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0);
local_rcv = !!(br->dev->flags & IFF_PROMISC);
promisc = !!(br->dev->flags & IFF_PROMISC);
local_rcv = promisc;
if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) {
/* by definition the broadcast is also a multicast address */
if (is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) {
......@@ -200,7 +205,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
unsigned long now = jiffies;
if (test_bit(BR_FDB_LOCAL, &dst->flags))
return br_pass_frame_up(skb);
return br_pass_frame_up(skb, false);
if (now != dst->used)
dst->used = now;
......@@ -213,7 +218,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
}
if (local_rcv)
return br_pass_frame_up(skb);
return br_pass_frame_up(skb, promisc);
out:
return 0;
......@@ -386,6 +391,8 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
goto forward;
}
BR_INPUT_SKB_CB(skb)->promisc = false;
/* The else clause should be hit when nf_hook():
* - returns < 0 (drop/error)
* - returns = 0 (stolen/nf_queue)
......
......@@ -600,11 +600,17 @@ static unsigned int br_nf_local_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
bool promisc = BR_INPUT_SKB_CB(skb)->promisc;
struct nf_conntrack *nfct = skb_nfct(skb);
const struct nf_ct_hook *ct_hook;
struct nf_conn *ct;
int ret;
if (promisc) {
nf_reset_ct(skb);
return NF_ACCEPT;
}
if (!nfct || skb->pkt_type == PACKET_HOST)
return NF_ACCEPT;
......
......@@ -589,6 +589,7 @@ struct br_input_skb_cb {
#endif
u8 proxyarp_replied:1;
u8 src_port_isolated:1;
u8 promisc:1;
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
u8 vlan_filtered:1;
#endif
......
......@@ -294,18 +294,24 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
enum ip_conntrack_info ctinfo;
bool promisc = BR_INPUT_SKB_CB(skb)->promisc;
struct nf_conntrack *nfct = skb_nfct(skb);
struct nf_conn *ct;
if (skb->pkt_type == PACKET_HOST)
if (promisc) {
nf_reset_ct(skb);
return NF_ACCEPT;
}
if (!nfct || skb->pkt_type == PACKET_HOST)
return NF_ACCEPT;
/* nf_conntrack_confirm() cannot handle concurrent clones,
* this happens for broad/multicast frames with e.g. macvlan on top
* of the bridge device.
*/
ct = nf_ct_get(skb, &ctinfo);
if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
ct = container_of(nfct, struct nf_conn, ct_general);
if (nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
return NF_ACCEPT;
/* let inet prerouting call conntrack again */
......
......@@ -21,7 +21,8 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
proto = veth->h_vlan_encapsulated_proto;
break;
case htons(ETH_P_PPP_SES):
proto = nf_flow_pppoe_proto(skb);
if (!nf_flow_pppoe_proto(skb, &proto))
return NF_ACCEPT;
break;
default:
proto = skb->protocol;
......
......@@ -157,7 +157,7 @@ static void nf_flow_tuple_encap(struct sk_buff *skb,
tuple->encap[i].proto = skb->protocol;
break;
case htons(ETH_P_PPP_SES):
phdr = (struct pppoe_hdr *)skb_mac_header(skb);
phdr = (struct pppoe_hdr *)skb_network_header(skb);
tuple->encap[i].id = ntohs(phdr->sid);
tuple->encap[i].proto = skb->protocol;
break;
......@@ -273,10 +273,11 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
return NF_STOLEN;
}
static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
u32 *offset)
{
struct vlan_ethhdr *veth;
__be16 inner_proto;
switch (skb->protocol) {
case htons(ETH_P_8021Q):
......@@ -287,7 +288,8 @@ static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
}
break;
case htons(ETH_P_PPP_SES):
if (nf_flow_pppoe_proto(skb) == proto) {
if (nf_flow_pppoe_proto(skb, &inner_proto) &&
inner_proto == proto) {
*offset += PPPOE_SES_HLEN;
return true;
}
......@@ -316,7 +318,7 @@ static void nf_flow_encap_pop(struct sk_buff *skb,
skb_reset_network_header(skb);
break;
case htons(ETH_P_PPP_SES):
skb->protocol = nf_flow_pppoe_proto(skb);
skb->protocol = __nf_flow_pppoe_proto(skb);
skb_pull(skb, PPPOE_SES_HLEN);
skb_reset_network_header(skb);
break;
......
......@@ -626,6 +626,7 @@ static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set)
{
struct nft_set_iter iter = {
.genmask = nft_genmask_next(ctx->net),
.type = NFT_ITER_UPDATE,
.fn = nft_mapelem_deactivate,
};
......@@ -3060,7 +3061,7 @@ static const struct nft_expr_type *__nft_expr_type_get(u8 family,
{
const struct nft_expr_type *type, *candidate = NULL;
list_for_each_entry(type, &nf_tables_expressions, list) {
list_for_each_entry_rcu(type, &nf_tables_expressions, list) {
if (!nla_strcmp(nla, type->name)) {
if (!type->family && !candidate)
candidate = type;
......@@ -3092,9 +3093,13 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net,
if (nla == NULL)
return ERR_PTR(-EINVAL);
rcu_read_lock();
type = __nft_expr_type_get(family, nla);
if (type != NULL && try_module_get(type->owner))
if (type != NULL && try_module_get(type->owner)) {
rcu_read_unlock();
return type;
}
rcu_read_unlock();
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
......@@ -5441,6 +5446,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
}
iter.genmask = nft_genmask_next(ctx->net);
iter.type = NFT_ITER_UPDATE;
iter.skip = 0;
iter.count = 0;
iter.err = 0;
......@@ -5514,6 +5520,7 @@ static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set)
{
struct nft_set_iter iter = {
.genmask = nft_genmask_next(ctx->net),
.type = NFT_ITER_UPDATE,
.fn = nft_mapelem_activate,
};
......@@ -5888,6 +5895,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
args.skb = skb;
args.reset = dump_ctx->reset;
args.iter.genmask = nft_genmask_cur(net);
args.iter.type = NFT_ITER_READ;
args.iter.skip = cb->args[0];
args.iter.count = 0;
args.iter.err = 0;
......@@ -7372,6 +7380,7 @@ static int nft_set_flush(struct nft_ctx *ctx, struct nft_set *set, u8 genmask)
{
struct nft_set_iter iter = {
.genmask = genmask,
.type = NFT_ITER_UPDATE,
.fn = nft_setelem_flush,
};
......@@ -7607,7 +7616,7 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family)
{
const struct nft_object_type *type;
list_for_each_entry(type, &nf_tables_objects, list) {
list_for_each_entry_rcu(type, &nf_tables_objects, list) {
if (type->family != NFPROTO_UNSPEC &&
type->family != family)
continue;
......@@ -7623,9 +7632,13 @@ nft_obj_type_get(struct net *net, u32 objtype, u8 family)
{
const struct nft_object_type *type;
rcu_read_lock();
type = __nft_obj_type_get(objtype, family);
if (type != NULL && try_module_get(type->owner))
if (type != NULL && try_module_get(type->owner)) {
rcu_read_unlock();
return type;
}
rcu_read_unlock();
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
......@@ -10871,6 +10884,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
continue;
iter.genmask = nft_genmask_next(ctx->net);
iter.type = NFT_ITER_UPDATE;
iter.skip = 0;
iter.count = 0;
iter.err = 0;
......
......@@ -2077,6 +2077,8 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
rules_fx = rules_f0;
nft_pipapo_for_each_field(f, i, m) {
bool last = i == m->field_count - 1;
if (!pipapo_match_field(f, start, rules_fx,
match_start, match_end))
break;
......@@ -2089,16 +2091,18 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
}
if (i == m->field_count) {
priv->dirty = true;
pipapo_drop(m, rulemap);
return;
if (last && f->mt[rulemap[i].to].e == e) {
priv->dirty = true;
pipapo_drop(m, rulemap);
return;
}
}
first_rule += rules_f0;
}
WARN_ON_ONCE(1); /* elem_priv not found */
}
/**
......@@ -2115,13 +2119,14 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_iter *iter)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct net *net = read_pnet(&set->net);
const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
unsigned int i, r;
WARN_ON_ONCE(iter->type == NFT_ITER_UNSPEC);
rcu_read_lock();
if (iter->genmask == nft_genmask_cur(net))
if (iter->type == NFT_ITER_READ)
m = rcu_dereference(priv->match);
else
m = priv->clone;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment