Commit 2c59f06c authored by David S. Miller's avatar David S. Miller

Merge branch 'net-Kernel-side-filtering-for-route-dumps'

David Ahern says:

====================
net: Kernel side filtering for route dumps

Implement kernel side filtering of route dumps by protocol (e.g., which
routing daemon installed the route), route type (e.g., unicast), table
id and nexthop device.

iproute2 has been doing this filtering in userspace for years; pushing
the filters to the kernel side reduces the amount of data the kernel
sends and reduces wasted cycles on both sides processing unwanted data.
These initial options provide a huge improvement for efficiently
examining routes on large scale systems.

v2
- better handling of requests for a specific table. Rather than walking
  the hash of all tables, lookup the specific table and dump it
- refactor mr_rtm_dumproute moving the loop over the table into a
  helper that can be invoked directly
- add hook to return NLM_F_DUMP_FILTERED in DONE message to ensure
  it is returned even when the dump returns nothing
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents e8567951 e4e92fb1
......@@ -7,6 +7,7 @@
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/fib_notifier.h>
#include <net/ip_fib.h>
/**
* struct vif_device - interface representor for multicast routing
......@@ -283,6 +284,12 @@ void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg);
int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
struct mr_mfc *c, struct rtmsg *rtm);
int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb,
struct netlink_callback *cb,
int (*fill)(struct mr_table *mrt, struct sk_buff *skb,
u32 portid, u32 seq, struct mr_mfc *c,
int cmd, int flags),
spinlock_t *lock, struct fib_dump_filter *filter);
int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
struct mr_table *(*iter)(struct net *net,
struct mr_table *mrt),
......@@ -290,7 +297,7 @@ int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
struct sk_buff *skb,
u32 portid, u32 seq, struct mr_mfc *c,
int cmd, int flags),
spinlock_t *lock);
spinlock_t *lock, struct fib_dump_filter *filter);
int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
int (*rules_dump)(struct net *net,
......@@ -340,7 +347,7 @@ mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
struct sk_buff *skb,
u32 portid, u32 seq, struct mr_mfc *c,
int cmd, int flags),
spinlock_t *lock)
spinlock_t *lock, struct fib_dump_filter *filter)
{
return -EINVAL;
}
......
......@@ -180,6 +180,7 @@ struct netlink_callback {
u16 family;
u16 min_dump_alloc;
bool strict_check;
u16 answer_flags;
unsigned int prev_seq, seq;
long args[6];
};
......
......@@ -174,6 +174,7 @@ struct rt6_rtnl_dump_arg {
struct sk_buff *skb;
struct netlink_callback *cb;
struct net *net;
struct fib_dump_filter filter;
};
int rt6_dump_route(struct fib6_info *f6i, void *p_arg);
......
......@@ -222,6 +222,16 @@ struct fib_table {
unsigned long __data[0];
};
struct fib_dump_filter {
u32 table_id;
/* filter_set is an optimization that an entry is set */
bool filter_set;
unsigned char protocol;
unsigned char rt_type;
unsigned int flags;
struct net_device *dev;
};
int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
struct fib_result *res, int fib_flags);
int fib_table_insert(struct net *, struct fib_table *, struct fib_config *,
......@@ -229,7 +239,7 @@ int fib_table_insert(struct net *, struct fib_table *, struct fib_config *,
int fib_table_delete(struct net *, struct fib_table *, struct fib_config *,
struct netlink_ext_ack *extack);
int fib_table_dump(struct fib_table *table, struct sk_buff *skb,
struct netlink_callback *cb);
struct netlink_callback *cb, struct fib_dump_filter *filter);
int fib_table_flush(struct net *net, struct fib_table *table);
struct fib_table *fib_trie_unmerge(struct fib_table *main_tb);
void fib_table_flush_external(struct fib_table *table);
......@@ -453,6 +463,7 @@ static inline void fib_proc_exit(struct net *net)
u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr);
int ip_valid_fib_dump_req(const struct nlmsghdr *nlh,
struct netlink_ext_ack *extack);
int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
struct fib_dump_filter *filter,
struct netlink_callback *cb);
#endif /* _NET_FIB_H */
......@@ -802,10 +802,16 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
return err;
}
int ip_valid_fib_dump_req(const struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
struct fib_dump_filter *filter,
struct netlink_callback *cb)
{
struct netlink_ext_ack *extack = cb->extack;
struct nlattr *tb[RTA_MAX + 1];
struct rtmsg *rtm;
int err, i;
ASSERT_RTNL();
if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
NL_SET_ERR_MSG(extack, "Invalid header for FIB dump request");
......@@ -814,8 +820,7 @@ int ip_valid_fib_dump_req(const struct nlmsghdr *nlh,
rtm = nlmsg_data(nlh);
if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos ||
rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
rtm->rtm_type) {
rtm->rtm_scope) {
NL_SET_ERR_MSG(extack, "Invalid values in header for FIB dump request");
return -EINVAL;
}
......@@ -824,9 +829,42 @@ int ip_valid_fib_dump_req(const struct nlmsghdr *nlh,
return -EINVAL;
}
if (nlmsg_attrlen(nlh, sizeof(*rtm))) {
NL_SET_ERR_MSG(extack, "Invalid data after header in FIB dump request");
return -EINVAL;
filter->flags = rtm->rtm_flags;
filter->protocol = rtm->rtm_protocol;
filter->rt_type = rtm->rtm_type;
filter->table_id = rtm->rtm_table;
err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
rtm_ipv4_policy, extack);
if (err < 0)
return err;
for (i = 0; i <= RTA_MAX; ++i) {
int ifindex;
if (!tb[i])
continue;
switch (i) {
case RTA_TABLE:
filter->table_id = nla_get_u32(tb[i]);
break;
case RTA_OIF:
ifindex = nla_get_u32(tb[i]);
filter->dev = __dev_get_by_index(net, ifindex);
if (!filter->dev)
return -ENODEV;
break;
default:
NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request");
return -EINVAL;
}
}
if (filter->flags || filter->protocol || filter->rt_type ||
filter->table_id || filter->dev) {
filter->filter_set = 1;
cb->answer_flags = NLM_F_DUMP_FILTERED;
}
return 0;
......@@ -837,6 +875,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
struct fib_dump_filter filter = {};
unsigned int h, s_h;
unsigned int e = 0, s_e;
struct fib_table *tb;
......@@ -844,15 +883,30 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
int dumped = 0, err;
if (cb->strict_check) {
err = ip_valid_fib_dump_req(nlh, cb->extack);
err = ip_valid_fib_dump_req(net, nlh, &filter, cb);
if (err < 0)
return err;
} else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
struct rtmsg *rtm = nlmsg_data(nlh);
filter.flags = rtm->rtm_flags & (RTM_F_PREFIX | RTM_F_CLONED);
}
if (nlmsg_len(nlh) >= sizeof(struct rtmsg) &&
((struct rtmsg *)nlmsg_data(nlh))->rtm_flags & RTM_F_CLONED)
/* fib entries are never clones and ipv4 does not use prefix flag */
if (filter.flags & (RTM_F_PREFIX | RTM_F_CLONED))
return skb->len;
if (filter.table_id) {
tb = fib_get_table(net, filter.table_id);
if (!tb) {
NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist");
return -ENOENT;
}
err = fib_table_dump(tb, skb, cb, &filter);
return skb->len ? : err;
}
s_h = cb->args[0];
s_e = cb->args[1];
......@@ -867,7 +921,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
if (dumped)
memset(&cb->args[2], 0, sizeof(cb->args) -
2 * sizeof(cb->args[0]));
err = fib_table_dump(tb, skb, cb);
err = fib_table_dump(tb, skb, cb, &filter);
if (err < 0) {
if (likely(skb->len))
goto out;
......
......@@ -2003,12 +2003,17 @@ void fib_free_table(struct fib_table *tb)
}
static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
struct sk_buff *skb, struct netlink_callback *cb)
struct sk_buff *skb, struct netlink_callback *cb,
struct fib_dump_filter *filter)
{
unsigned int flags = NLM_F_MULTI;
__be32 xkey = htonl(l->key);
struct fib_alias *fa;
int i, s_i;
if (filter->filter_set)
flags |= NLM_F_DUMP_FILTERED;
s_i = cb->args[4];
i = 0;
......@@ -2016,25 +2021,35 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
int err;
if (i < s_i) {
i++;
continue;
}
if (i < s_i)
goto next;
if (tb->tb_id != fa->tb_id) {
i++;
continue;
if (tb->tb_id != fa->tb_id)
goto next;
if (filter->filter_set) {
if (filter->rt_type && fa->fa_type != filter->rt_type)
goto next;
if ((filter->protocol &&
fa->fa_info->fib_protocol != filter->protocol))
goto next;
if (filter->dev &&
!fib_info_nh_uses_dev(fa->fa_info, filter->dev))
goto next;
}
err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, RTM_NEWROUTE,
tb->tb_id, fa->fa_type,
xkey, KEYLENGTH - fa->fa_slen,
fa->fa_tos, fa->fa_info, NLM_F_MULTI);
fa->fa_tos, fa->fa_info, flags);
if (err < 0) {
cb->args[4] = i;
return err;
}
next:
i++;
}
......@@ -2044,7 +2059,7 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
/* rcu_read_lock needs to be hold by caller from readside */
int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
struct netlink_callback *cb)
struct netlink_callback *cb, struct fib_dump_filter *filter)
{
struct trie *t = (struct trie *)tb->tb_data;
struct key_vector *l, *tp = t->kv;
......@@ -2057,7 +2072,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
int err;
err = fn_trie_dump_leaf(l, tb, skb, cb);
err = fn_trie_dump_leaf(l, tb, skb, cb, filter);
if (err < 0) {
cb->args[3] = key;
cb->args[2] = count;
......
......@@ -2527,15 +2527,31 @@ static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
if (cb->strict_check) {
int err = ip_valid_fib_dump_req(cb->nlh, cb->extack);
struct fib_dump_filter filter = {};
int err;
if (cb->strict_check) {
err = ip_valid_fib_dump_req(sock_net(skb->sk), cb->nlh,
&filter, cb);
if (err < 0)
return err;
}
if (filter.table_id) {
struct mr_table *mrt;
mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id);
if (!mrt) {
NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist");
return -ENOENT;
}
err = mr_table_dump(mrt, skb, cb, _ipmr_fill_mroute,
&mfc_unres_lock, &filter);
return skb->len ? : err;
}
return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter,
_ipmr_fill_mroute, &mfc_unres_lock);
_ipmr_fill_mroute, &mfc_unres_lock, &filter);
}
static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
......
......@@ -268,6 +268,83 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
}
EXPORT_SYMBOL(mr_fill_mroute);
static bool mr_mfc_uses_dev(const struct mr_table *mrt,
const struct mr_mfc *c,
const struct net_device *dev)
{
int ct;
for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
const struct vif_device *vif;
vif = &mrt->vif_table[ct];
if (vif->dev == dev)
return true;
}
}
return false;
}
int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb,
struct netlink_callback *cb,
int (*fill)(struct mr_table *mrt, struct sk_buff *skb,
u32 portid, u32 seq, struct mr_mfc *c,
int cmd, int flags),
spinlock_t *lock, struct fib_dump_filter *filter)
{
unsigned int e = 0, s_e = cb->args[1];
unsigned int flags = NLM_F_MULTI;
struct mr_mfc *mfc;
int err;
if (filter->filter_set)
flags |= NLM_F_DUMP_FILTERED;
list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
if (e < s_e)
goto next_entry;
if (filter->dev &&
!mr_mfc_uses_dev(mrt, mfc, filter->dev))
goto next_entry;
err = fill(mrt, skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags);
if (err < 0)
goto out;
next_entry:
e++;
}
e = 0;
s_e = 0;
spin_lock_bh(lock);
list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
if (e < s_e)
goto next_entry2;
if (filter->dev &&
!mr_mfc_uses_dev(mrt, mfc, filter->dev))
goto next_entry2;
err = fill(mrt, skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags);
if (err < 0) {
spin_unlock_bh(lock);
goto out;
}
next_entry2:
e++;
}
spin_unlock_bh(lock);
err = 0;
e = 0;
out:
cb->args[1] = e;
return err;
}
EXPORT_SYMBOL(mr_table_dump);
int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
struct mr_table *(*iter)(struct net *net,
struct mr_table *mrt),
......@@ -275,53 +352,35 @@ int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
struct sk_buff *skb,
u32 portid, u32 seq, struct mr_mfc *c,
int cmd, int flags),
spinlock_t *lock)
spinlock_t *lock, struct fib_dump_filter *filter)
{
unsigned int t = 0, e = 0, s_t = cb->args[0], s_e = cb->args[1];
unsigned int t = 0, s_t = cb->args[0];
struct net *net = sock_net(skb->sk);
struct mr_table *mrt;
struct mr_mfc *mfc;
int err;
/* multicast does not track protocol or have route type other
* than RTN_MULTICAST
*/
if (filter->filter_set) {
if (filter->protocol || filter->flags ||
(filter->rt_type && filter->rt_type != RTN_MULTICAST))
return skb->len;
}
rcu_read_lock();
for (mrt = iter(net, NULL); mrt; mrt = iter(net, mrt)) {
if (t < s_t)
goto next_table;
list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
if (e < s_e)
goto next_entry;
if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, mfc,
RTM_NEWROUTE, NLM_F_MULTI) < 0)
goto done;
next_entry:
e++;
}
e = 0;
s_e = 0;
spin_lock_bh(lock);
list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
if (e < s_e)
goto next_entry2;
if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, mfc,
RTM_NEWROUTE, NLM_F_MULTI) < 0) {
spin_unlock_bh(lock);
goto done;
}
next_entry2:
e++;
}
spin_unlock_bh(lock);
e = 0;
s_e = 0;
err = mr_table_dump(mrt, skb, cb, fill, lock, filter);
if (err < 0)
break;
next_table:
t++;
}
done:
rcu_read_unlock();
cb->args[1] = e;
cb->args[0] = t;
return skb->len;
......
......@@ -569,23 +569,29 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
struct rt6_rtnl_dump_arg arg = {};
unsigned int h, s_h;
unsigned int e = 0, s_e;
struct rt6_rtnl_dump_arg arg;
struct fib6_walker *w;
struct fib6_table *tb;
struct hlist_head *head;
int res = 0;
if (cb->strict_check) {
int err = ip_valid_fib_dump_req(nlh, cb->extack);
int err;
err = ip_valid_fib_dump_req(net, nlh, &arg.filter, cb);
if (err < 0)
return err;
} else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
struct rtmsg *rtm = nlmsg_data(nlh);
arg.filter.flags = rtm->rtm_flags & (RTM_F_PREFIX|RTM_F_CLONED);
}
s_h = cb->args[0];
s_e = cb->args[1];
/* fib entries are never clones */
if (arg.filter.flags & RTM_F_CLONED)
return skb->len;
w = (void *)cb->args[2];
if (!w) {
......@@ -611,6 +617,20 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
arg.net = net;
w->args = &arg;
if (arg.filter.table_id) {
tb = fib6_get_table(net, arg.filter.table_id);
if (!tb) {
NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
return -ENOENT;
}
res = fib6_dump_table(tb, skb, cb);
goto out;
}
s_h = cb->args[0];
s_e = cb->args[1];
rcu_read_lock();
for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
e = 0;
......@@ -620,16 +640,16 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
goto next;
res = fib6_dump_table(tb, skb, cb);
if (res != 0)
goto out;
goto out_unlock;
next:
e++;
}
}
out:
out_unlock:
rcu_read_unlock();
cb->args[1] = e;
cb->args[0] = h;
out:
res = res < 0 ? res : skb->len;
if (res <= 0)
fib6_dump_end(cb);
......
......@@ -2458,14 +2458,29 @@ static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = cb->nlh;
struct fib_dump_filter filter = {};
int err;
if (cb->strict_check) {
int err = ip_valid_fib_dump_req(nlh, cb->extack);
err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
&filter, cb);
if (err < 0)
return err;
}
if (filter.table_id) {
struct mr_table *mrt;
mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id);
if (!mrt) {
NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
return -ENOENT;
}
err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
&mfc_unres_lock, &filter);
return skb->len ? : err;
}
return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
_ip6mr_fill_mroute, &mfc_unres_lock);
_ip6mr_fill_mroute, &mfc_unres_lock, &filter);
}
......@@ -4767,28 +4767,52 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
return -EMSGSIZE;
}
static bool fib6_info_uses_dev(const struct fib6_info *f6i,
const struct net_device *dev)
{
if (f6i->fib6_nh.nh_dev == dev)
return true;
if (f6i->fib6_nsiblings) {
struct fib6_info *sibling, *next_sibling;
list_for_each_entry_safe(sibling, next_sibling,
&f6i->fib6_siblings, fib6_siblings) {
if (sibling->fib6_nh.nh_dev == dev)
return true;
}
}
return false;
}
int rt6_dump_route(struct fib6_info *rt, void *p_arg)
{
struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
struct fib_dump_filter *filter = &arg->filter;
unsigned int flags = NLM_F_MULTI;
struct net *net = arg->net;
if (rt == net->ipv6.fib6_null_entry)
return 0;
if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
/* user wants prefix routes only */
if (rtm->rtm_flags & RTM_F_PREFIX &&
!(rt->fib6_flags & RTF_PREFIX_RT)) {
/* success since this is not a prefix route */
if ((filter->flags & RTM_F_PREFIX) &&
!(rt->fib6_flags & RTF_PREFIX_RT)) {
/* success since this is not a prefix route */
return 1;
}
if (filter->filter_set) {
if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
(filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
(filter->protocol && rt->fib6_protocol != filter->protocol)) {
return 1;
}
flags |= NLM_F_DUMP_FILTERED;
}
return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
arg->cb->nlh->nlmsg_seq, flags);
}
static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
......
......@@ -2032,16 +2032,21 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
}
#if IS_ENABLED(CONFIG_INET)
static int mpls_valid_fib_dump_req(const struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
struct fib_dump_filter *filter,
struct netlink_callback *cb)
{
return ip_valid_fib_dump_req(nlh, extack);
return ip_valid_fib_dump_req(net, nlh, filter, cb);
}
#else
static int mpls_valid_fib_dump_req(const struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
struct fib_dump_filter *filter,
struct netlink_callback *cb)
{
struct netlink_ext_ack *extack = cb->extack;
struct nlattr *tb[RTA_MAX + 1];
struct rtmsg *rtm;
int err, i;
if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
NL_SET_ERR_MSG_MOD(extack, "Invalid header for FIB dump request");
......@@ -2050,36 +2055,90 @@ static int mpls_valid_fib_dump_req(const struct nlmsghdr *nlh,
rtm = nlmsg_data(nlh);
if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos ||
rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
rtm->rtm_type || rtm->rtm_flags) {
rtm->rtm_table || rtm->rtm_scope || rtm->rtm_type ||
rtm->rtm_flags) {
NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for FIB dump request");
return -EINVAL;
}
if (nlmsg_attrlen(nlh, sizeof(*rtm))) {
NL_SET_ERR_MSG_MOD(extack, "Invalid data after header in FIB dump request");
return -EINVAL;
if (rtm->rtm_protocol) {
filter->protocol = rtm->rtm_protocol;
filter->filter_set = 1;
cb->answer_flags = NLM_F_DUMP_FILTERED;
}
err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
rtm_mpls_policy, extack);
if (err < 0)
return err;
for (i = 0; i <= RTA_MAX; ++i) {
int ifindex;
if (i == RTA_OIF) {
ifindex = nla_get_u32(tb[i]);
filter->dev = __dev_get_by_index(net, ifindex);
if (!filter->dev)
return -ENODEV;
filter->filter_set = 1;
} else if (tb[i]) {
NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in dump request");
return -EINVAL;
}
}
return 0;
}
#endif
static bool mpls_rt_uses_dev(struct mpls_route *rt,
const struct net_device *dev)
{
struct net_device *nh_dev;
if (rt->rt_nhn == 1) {
struct mpls_nh *nh = rt->rt_nh;
nh_dev = rtnl_dereference(nh->nh_dev);
if (dev == nh_dev)
return true;
} else {
for_nexthops(rt) {
nh_dev = rtnl_dereference(nh->nh_dev);
if (nh_dev == dev)
return true;
} endfor_nexthops(rt);
}
return false;
}
static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
struct mpls_route __rcu **platform_label;
struct fib_dump_filter filter = {};
unsigned int flags = NLM_F_MULTI;
size_t platform_labels;
unsigned int index;
ASSERT_RTNL();
if (cb->strict_check) {
int err = mpls_valid_fib_dump_req(nlh, cb->extack);
int err;
err = mpls_valid_fib_dump_req(net, nlh, &filter, cb);
if (err < 0)
return err;
/* for MPLS, there is only 1 table with fixed type and flags.
* If either are set in the filter then return nothing.
*/
if ((filter.table_id && filter.table_id != RT_TABLE_MAIN) ||
(filter.rt_type && filter.rt_type != RTN_UNICAST) ||
filter.flags)
return skb->len;
}
index = cb->args[0];
......@@ -2088,15 +2147,24 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb)
platform_label = rtnl_dereference(net->mpls.platform_label);
platform_labels = net->mpls.platform_labels;
if (filter.filter_set)
flags |= NLM_F_DUMP_FILTERED;
for (; index < platform_labels; index++) {
struct mpls_route *rt;
rt = rtnl_dereference(platform_label[index]);
if (!rt)
continue;
if ((filter.dev && !mpls_rt_uses_dev(rt, filter.dev)) ||
(filter.protocol && rt->rt_protocol != filter.protocol))
continue;
if (mpls_dump_route(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, RTM_NEWROUTE,
index, rt, NLM_F_MULTI) < 0)
index, rt, flags) < 0)
break;
}
cb->args[0] = index;
......
......@@ -2257,7 +2257,8 @@ static int netlink_dump(struct sock *sk)
}
nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE,
sizeof(nlk->dump_done_errno), NLM_F_MULTI);
sizeof(nlk->dump_done_errno),
NLM_F_MULTI | cb->answer_flags);
if (WARN_ON(!nlh))
goto errout_skb;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment