Commit 9226976f authored by David S. Miller's avatar David S. Miller

Merge branch 'RTM_GETROUTE--return-fib-result'

Roopa Prabhu says:

====================
net: extend RTM_GETROUTE to return fib result

This series adds a new RTM_F_FIB_MATCH flag to return matched fib result
with RTM_GETROUTE. This is useful for applications and protocols in
userspace wanting to query the selected route.

examples (with patched iproute2):
ipv4:
----
$ip route show
default via 192.168.0.2 dev eth0
10.0.14.0/24
        nexthop via 172.16.0.3  dev dummy0 weight 1
        nexthop via 172.16.1.3  dev dummy1 weight 1

$ip route get 10.0.14.2
10.0.14.2 via 172.16.1.3 dev dummy1  src 172.16.1.1
    cache

$ip route get fibmatch 10.0.14.2
10.0.14.0/24
        nexthop via 172.16.0.3  dev dummy0 weight 1
        nexthop via 172.16.1.3  dev dummy1 weight 1

ipv6:
----
$ip -6 route show
2001:db9:100::/120  metric 1024
        nexthop via 2001:db8:2::2  dev dummy0 weight 1
        nexthop via 2001:db8:12::2  dev dummy1 weight 1

$ip -6 route get 2001:db9:100::1
2001:db9:100::1 from :: via 2001:db8:12::2 dev dummy1  src 2001:db8:12::1  metric 1024  pref medium

$ip -6 route get fibmatch 2001:db9:100::1
2001:db9:100::/120  metric 1024
        nexthop via 2001:db8:12::2  dev dummy1 weight 1
        nexthop via 2001:db8:2::2  dev dummy0 weight 1

v2:
        - pick up new forward port of patch-01 from david
        - inet6_rtm_getroute: use container_of for rt6_info to
          dst conversion
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 5dafc87f 18c3a61c
......@@ -136,6 +136,7 @@ struct fib_rule;
struct fib_table;
struct fib_result {
__be32 prefix;
unsigned char prefixlen;
unsigned char nh_sel;
unsigned char type;
......
......@@ -113,13 +113,16 @@ struct in_device;
int ip_rt_init(void);
void rt_cache_flush(struct net *net);
void rt_flush_dev(struct net_device *dev);
struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *flp,
const struct sk_buff *skb);
struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *flp,
const struct sk_buff *skb);
struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *flp,
struct fib_result *res,
const struct sk_buff *skb);
static inline struct rtable *__ip_route_output_key(struct net *net,
struct flowi4 *flp)
{
return __ip_route_output_key_hash(net, flp, NULL);
return ip_route_output_key_hash(net, flp, NULL);
}
struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
......@@ -175,6 +178,9 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4
int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src,
u8 tos, struct net_device *devin);
int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src,
u8 tos, struct net_device *devin,
struct fib_result *res);
static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
u8 tos, struct net_device *devin)
......
......@@ -278,6 +278,7 @@ enum rt_scope_t {
#define RTM_F_EQUALIZE 0x400 /* Multipath equalizer: NI */
#define RTM_F_PREFIX 0x800 /* Prefix addresses */
#define RTM_F_LOOKUP_TABLE 0x1000 /* set rtm_table to FIB lookup result */
#define RTM_F_FIB_MATCH 0x2000 /* return full fib lookup match */
/* Reserved table identifiers */
......
......@@ -1452,6 +1452,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
if (!(fib_flags & FIB_LOOKUP_NOREF))
atomic_inc(&fi->fib_clntref);
res->prefix = htonl(n->key);
res->prefixlen = KEYLENGTH - fa->fa_slen;
res->nh_sel = nhsel;
res->type = fa->fa_type;
......
......@@ -489,7 +489,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev);
security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
rt = __ip_route_output_key_hash(net, fl4, skb_in);
rt = ip_route_output_key_hash(net, fl4, skb_in);
if (IS_ERR(rt))
return rt;
......
......@@ -114,6 +114,8 @@
#include <net/ip_tunnels.h>
#include <net/l3mdev.h>
#include "fib_lookup.h"
#define RT_FL_TOS(oldflp4) \
((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
......@@ -1852,9 +1854,9 @@ static int ip_mkroute_input(struct sk_buff *skb,
*/
static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
u8 tos, struct net_device *dev)
u8 tos, struct net_device *dev,
struct fib_result *res)
{
struct fib_result res;
struct in_device *in_dev = __in_dev_get_rcu(dev);
struct ip_tunnel_info *tun_info;
struct flowi4 fl4;
......@@ -1884,8 +1886,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
goto martian_source;
res.fi = NULL;
res.table = NULL;
res->fi = NULL;
res->table = NULL;
if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
goto brd_input;
......@@ -1921,17 +1923,17 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
fl4.daddr = daddr;
fl4.saddr = saddr;
fl4.flowi4_uid = sock_net_uid(net, NULL);
err = fib_lookup(net, &fl4, &res, 0);
err = fib_lookup(net, &fl4, res, 0);
if (err != 0) {
if (!IN_DEV_FORWARD(in_dev))
err = -EHOSTUNREACH;
goto no_route;
}
if (res.type == RTN_BROADCAST)
if (res->type == RTN_BROADCAST)
goto brd_input;
if (res.type == RTN_LOCAL) {
if (res->type == RTN_LOCAL) {
err = fib_validate_source(skb, saddr, daddr, tos,
0, dev, in_dev, &itag);
if (err < 0)
......@@ -1943,10 +1945,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
err = -EHOSTUNREACH;
goto no_route;
}
if (res.type != RTN_UNICAST)
if (res->type != RTN_UNICAST)
goto martian_destination;
err = ip_mkroute_input(skb, &res, in_dev, daddr, saddr, tos);
err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos);
out: return err;
brd_input:
......@@ -1960,14 +1962,14 @@ out: return err;
goto martian_source;
}
flags |= RTCF_BROADCAST;
res.type = RTN_BROADCAST;
res->type = RTN_BROADCAST;
RT_CACHE_STAT_INC(in_brd);
local_input:
do_cache = false;
if (res.fi) {
if (res->fi) {
if (!itag) {
rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
err = 0;
......@@ -1978,7 +1980,7 @@ out: return err;
}
rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
flags | RTCF_LOCAL, res.type,
flags | RTCF_LOCAL, res->type,
IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
if (!rth)
goto e_nobufs;
......@@ -1988,18 +1990,18 @@ out: return err;
rth->dst.tclassid = itag;
#endif
rth->rt_is_input = 1;
if (res.table)
rth->rt_table_id = res.table->tb_id;
if (res->table)
rth->rt_table_id = res->table->tb_id;
RT_CACHE_STAT_INC(in_slow_tot);
if (res.type == RTN_UNREACHABLE) {
if (res->type == RTN_UNREACHABLE) {
rth->dst.input= ip_error;
rth->dst.error= -err;
rth->rt_flags &= ~RTCF_LOCAL;
}
if (do_cache) {
struct fib_nh *nh = &FIB_RES_NH(res);
struct fib_nh *nh = &FIB_RES_NH(*res);
rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
......@@ -2019,9 +2021,9 @@ out: return err;
no_route:
RT_CACHE_STAT_INC(in_no_route);
res.type = RTN_UNREACHABLE;
res.fi = NULL;
res.table = NULL;
res->type = RTN_UNREACHABLE;
res->fi = NULL;
res->table = NULL;
goto local_input;
/*
......@@ -2051,11 +2053,22 @@ out: return err;
int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
u8 tos, struct net_device *dev)
{
int res;
struct fib_result res;
int err;
tos &= IPTOS_RT_MASK;
rcu_read_lock();
err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
rcu_read_unlock();
return err;
}
EXPORT_SYMBOL(ip_route_input_noref);
/* called with rcu_read_lock held */
int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
u8 tos, struct net_device *dev, struct fib_result *res)
{
/* Multicast recognition logic is moved from route cache to here.
The problem was that too many Ethernet cards have broken/missing
hardware multicast filters :-( As result the host on multicasting
......@@ -2070,6 +2083,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (ipv4_is_multicast(daddr)) {
struct in_device *in_dev = __in_dev_get_rcu(dev);
int our = 0;
int err = -EINVAL;
if (in_dev)
our = ip_check_mc_rcu(in_dev, daddr, saddr,
......@@ -2085,7 +2099,6 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
ip_hdr(skb)->protocol);
}
res = -EINVAL;
if (our
#ifdef CONFIG_IP_MROUTE
||
......@@ -2093,17 +2106,14 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
IN_DEV_MFORWARD(in_dev))
#endif
) {
res = ip_route_input_mc(skb, daddr, saddr,
err = ip_route_input_mc(skb, daddr, saddr,
tos, dev, our);
}
rcu_read_unlock();
return res;
return err;
}
res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
rcu_read_unlock();
return res;
return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
}
EXPORT_SYMBOL(ip_route_input_noref);
/* called with rcu_read_lock() */
static struct rtable *__mkroute_output(const struct fib_result *res,
......@@ -2246,29 +2256,40 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
* Major route resolver routine.
*/
struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
const struct sk_buff *skb)
struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
const struct sk_buff *skb)
{
struct net_device *dev_out = NULL;
__u8 tos = RT_FL_TOS(fl4);
unsigned int flags = 0;
struct fib_result res;
struct rtable *rth;
int orig_oif;
int err = -ENETUNREACH;
res.tclassid = 0;
res.fi = NULL;
res.table = NULL;
orig_oif = fl4->flowi4_oif;
fl4->flowi4_iif = LOOPBACK_IFINDEX;
fl4->flowi4_tos = tos & IPTOS_RT_MASK;
fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
rcu_read_lock();
rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
rcu_read_unlock();
return rth;
}
EXPORT_SYMBOL_GPL(ip_route_output_key_hash);
struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
struct fib_result *res,
const struct sk_buff *skb)
{
struct net_device *dev_out = NULL;
int orig_oif = fl4->flowi4_oif;
unsigned int flags = 0;
struct rtable *rth;
int err = -ENETUNREACH;
if (fl4->saddr) {
rth = ERR_PTR(-EINVAL);
if (ipv4_is_multicast(fl4->saddr) ||
......@@ -2354,15 +2375,15 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
dev_out = net->loopback_dev;
fl4->flowi4_oif = LOOPBACK_IFINDEX;
res.type = RTN_LOCAL;
res->type = RTN_LOCAL;
flags |= RTCF_LOCAL;
goto make_route;
}
err = fib_lookup(net, fl4, &res, 0);
err = fib_lookup(net, fl4, res, 0);
if (err) {
res.fi = NULL;
res.table = NULL;
res->fi = NULL;
res->table = NULL;
if (fl4->flowi4_oif &&
(ipv4_is_multicast(fl4->daddr) ||
!netif_index_is_l3_master(net, fl4->flowi4_oif))) {
......@@ -2387,43 +2408,41 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
if (fl4->saddr == 0)
fl4->saddr = inet_select_addr(dev_out, 0,
RT_SCOPE_LINK);
res.type = RTN_UNICAST;
res->type = RTN_UNICAST;
goto make_route;
}
rth = ERR_PTR(err);
goto out;
}
if (res.type == RTN_LOCAL) {
if (res->type == RTN_LOCAL) {
if (!fl4->saddr) {
if (res.fi->fib_prefsrc)
fl4->saddr = res.fi->fib_prefsrc;
if (res->fi->fib_prefsrc)
fl4->saddr = res->fi->fib_prefsrc;
else
fl4->saddr = fl4->daddr;
}
/* L3 master device is the loopback for that domain */
dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(res)) ? :
dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? :
net->loopback_dev;
fl4->flowi4_oif = dev_out->ifindex;
flags |= RTCF_LOCAL;
goto make_route;
}
fib_select_path(net, &res, fl4, skb);
fib_select_path(net, res, fl4, skb);
dev_out = FIB_RES_DEV(res);
dev_out = FIB_RES_DEV(*res);
fl4->flowi4_oif = dev_out->ifindex;
make_route:
rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags);
out:
rcu_read_unlock();
return rth;
}
EXPORT_SYMBOL_GPL(__ip_route_output_key_hash);
static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
{
......@@ -2517,18 +2536,18 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
}
EXPORT_SYMBOL_GPL(ip_route_output_flow);
/* called with rcu_read_lock held */
static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
u32 seq, int event)
u32 seq, struct rtable *rt)
{
struct rtable *rt = skb_rtable(skb);
struct rtmsg *r;
struct nlmsghdr *nlh;
unsigned long expires = 0;
u32 error;
u32 metrics[RTAX_MAX];
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), 0);
nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0);
if (!nlh)
return -EMSGSIZE;
......@@ -2636,6 +2655,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct net *net = sock_net(in_skb->sk);
struct rtmsg *rtm;
struct nlattr *tb[RTA_MAX+1];
struct fib_result res = {};
struct rtable *rt = NULL;
struct flowi4 fl4;
__be32 dst = 0;
......@@ -2692,10 +2712,12 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fl4.flowi4_mark = mark;
fl4.flowi4_uid = uid;
rcu_read_lock();
if (iif) {
struct net_device *dev;
dev = __dev_get_by_index(net, iif);
dev = dev_get_by_index_rcu(net, iif);
if (!dev) {
err = -ENODEV;
goto errout_free;
......@@ -2704,14 +2726,14 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
skb->protocol = htons(ETH_P_IP);
skb->dev = dev;
skb->mark = mark;
err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos,
dev, &res);
rt = skb_rtable(skb);
if (err == 0 && rt->dst.error)
err = -rt->dst.error;
} else {
rt = ip_route_output_key(net, &fl4);
rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
err = 0;
if (IS_ERR(rt))
err = PTR_ERR(rt);
......@@ -2720,24 +2742,32 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (err)
goto errout_free;
skb_dst_set(skb, &rt->dst);
if (rtm->rtm_flags & RTM_F_NOTIFY)
rt->rt_flags |= RTCF_NOTIFY;
if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
table_id = rt->rt_table_id;
err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
RTM_NEWROUTE);
if (rtm->rtm_flags & RTM_F_FIB_MATCH)
err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
rt->rt_type, res.prefix, res.prefixlen,
fl4.flowi4_tos, res.fi, 0);
else
err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
rt);
if (err < 0)
goto errout_free;
rcu_read_unlock();
err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
errout:
return err;
errout_free:
rcu_read_unlock();
kfree_skb(skb);
goto errout;
}
......
......@@ -3607,11 +3607,13 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
{
struct net *net = sock_net(in_skb->sk);
struct nlattr *tb[RTA_MAX+1];
int err, iif = 0, oif = 0;
struct dst_entry *dst;
struct rt6_info *rt;
struct sk_buff *skb;
struct rtmsg *rtm;
struct flowi6 fl6;
int err, iif = 0, oif = 0;
bool fibmatch;
err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
extack);
......@@ -3622,6 +3624,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
memset(&fl6, 0, sizeof(fl6));
rtm = nlmsg_data(nlh);
fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
if (tb[RTA_SRC]) {
if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
......@@ -3667,12 +3670,23 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (!ipv6_addr_any(&fl6.saddr))
flags |= RT6_LOOKUP_F_HAS_SADDR;
rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
flags);
if (!fibmatch)
dst = ip6_route_input_lookup(net, dev, &fl6, flags);
} else {
fl6.flowi6_oif = oif;
rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
if (!fibmatch)
dst = ip6_route_output(net, NULL, &fl6);
}
if (fibmatch)
dst = ip6_route_lookup(net, &fl6, 0);
rt = container_of(dst, struct rt6_info, dst);
if (rt->dst.error) {
err = rt->dst.error;
ip6_rt_put(rt);
goto errout;
}
if (rt == net->ipv6.ip6_null_entry) {
......@@ -3689,10 +3703,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
}
skb_dst_set(skb, &rt->dst);
err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, 0);
if (fibmatch)
err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, 0);
else
err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, 0);
if (err < 0) {
kfree_skb(skb);
goto errout;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment