Commit 21e27f2d authored by David S. Miller's avatar David S. Miller

Merge branch 'IP-cleanup-LSRR-option-processing'

Paolo Abeni says:

====================
IP: cleanup LSRR option processing

The __ip_options_echo() function expect a valid dst entry in skb->dst;
as result we sometimes need to preserve the dst entry for the whole IP
RX path.

The current usage of skb->dst looks more a relic from ancient past that
a real functional constraint. This patchset tries to remove such usage,
and than drops some hacks currently in place in the IP code to keep
skb->dst around.

__ip_options_echo() uses of skb->dst for two different purposes: retrieving
the netns assicated with the skb, and modify the ingress packet LSRR address
list.

The first patch removes the code modifying the ingress packet, and the second
one provides an explicit netns argument to __ip_options_echo(). The following
patches cleanup the current code keeping arund skb->dst for __ip_options_echo's
sake.

Updating the __ip_options_echo() function has been previously discussed here:

http://marc.info/?l=linux-netdev&m=150064533516348&w=2
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents a54df682 3bdefdf9
...@@ -567,11 +567,12 @@ int ip_forward(struct sk_buff *skb); ...@@ -567,11 +567,12 @@ int ip_forward(struct sk_buff *skb);
void ip_options_build(struct sk_buff *skb, struct ip_options *opt, void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
__be32 daddr, struct rtable *rt, int is_frag); __be32 daddr, struct rtable *rt, int is_frag);
int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb, int __ip_options_echo(struct net *net, struct ip_options *dopt,
const struct ip_options *sopt); struct sk_buff *skb, const struct ip_options *sopt);
static inline int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb) static inline int ip_options_echo(struct net *net, struct ip_options *dopt,
struct sk_buff *skb)
{ {
return __ip_options_echo(dopt, skb, &IPCB(skb)->opt); return __ip_options_echo(net, dopt, skb, &IPCB(skb)->opt);
} }
void ip_options_fragment(struct sk_buff *skb); void ip_options_fragment(struct sk_buff *skb);
......
...@@ -1885,7 +1885,8 @@ extern void tcp_rack_reo_timeout(struct sock *sk); ...@@ -1885,7 +1885,8 @@ extern void tcp_rack_reo_timeout(struct sock *sk);
/* /*
* Save and compile IPv4 options, return a pointer to it * Save and compile IPv4 options, return a pointer to it
*/ */
static inline struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb) static inline struct ip_options_rcu *tcp_v4_save_options(struct net *net,
struct sk_buff *skb)
{ {
const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
struct ip_options_rcu *dopt = NULL; struct ip_options_rcu *dopt = NULL;
...@@ -1894,7 +1895,7 @@ static inline struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb) ...@@ -1894,7 +1895,7 @@ static inline struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
int opt_size = sizeof(*dopt) + opt->optlen; int opt_size = sizeof(*dopt) + opt->optlen;
dopt = kmalloc(opt_size, GFP_ATOMIC); dopt = kmalloc(opt_size, GFP_ATOMIC);
if (dopt && __ip_options_echo(&dopt->opt, skb, opt)) { if (dopt && __ip_options_echo(net, &dopt->opt, skb, opt)) {
kfree(dopt); kfree(dopt);
dopt = NULL; dopt = NULL;
} }
......
...@@ -412,7 +412,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) ...@@ -412,7 +412,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
int type = icmp_param->data.icmph.type; int type = icmp_param->data.icmph.type;
int code = icmp_param->data.icmph.code; int code = icmp_param->data.icmph.code;
if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb))
return; return;
/* Needed by both icmp_global_allow and icmp_xmit_lock */ /* Needed by both icmp_global_allow and icmp_xmit_lock */
...@@ -694,7 +694,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) ...@@ -694,7 +694,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
iph->tos; iph->tos;
mark = IP4_REPLY_MARK(net, skb_in->mark); mark = IP4_REPLY_MARK(net, skb_in->mark);
if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in)) if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in))
goto out_unlock; goto out_unlock;
......
...@@ -86,8 +86,8 @@ void ip_options_build(struct sk_buff *skb, struct ip_options *opt, ...@@ -86,8 +86,8 @@ void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
* NOTE: dopt cannot point to skb. * NOTE: dopt cannot point to skb.
*/ */
int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb, int __ip_options_echo(struct net *net, struct ip_options *dopt,
const struct ip_options *sopt) struct sk_buff *skb, const struct ip_options *sopt)
{ {
unsigned char *sptr, *dptr; unsigned char *sptr, *dptr;
int soffset, doffset; int soffset, doffset;
...@@ -140,7 +140,7 @@ int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb, ...@@ -140,7 +140,7 @@ int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
__be32 addr; __be32 addr;
memcpy(&addr, dptr+soffset-1, 4); memcpy(&addr, dptr+soffset-1, 4);
if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_UNICAST) { if (inet_addr_type(net, addr) != RTN_UNICAST) {
dopt->ts_needtime = 1; dopt->ts_needtime = 1;
soffset += 8; soffset += 8;
} }
...@@ -174,9 +174,6 @@ int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb, ...@@ -174,9 +174,6 @@ int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
doffset -= 4; doffset -= 4;
} }
if (doffset > 3) { if (doffset > 3) {
__be32 daddr = fib_compute_spec_dst(skb);
memcpy(&start[doffset-1], &daddr, 4);
dopt->faddr = faddr; dopt->faddr = faddr;
dptr[0] = start[0]; dptr[0] = start[0];
dptr[1] = doffset+3; dptr[1] = doffset+3;
......
...@@ -1525,7 +1525,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, ...@@ -1525,7 +1525,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
int err; int err;
int oif; int oif;
if (__ip_options_echo(&replyopts.opt.opt, skb, sopt)) if (__ip_options_echo(net, &replyopts.opt.opt, skb, sopt))
return; return;
ipc.addr = daddr; ipc.addr = daddr;
......
...@@ -80,7 +80,8 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb) ...@@ -80,7 +80,8 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
} }
static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) static void ip_cmsg_recv_retopts(struct net *net, struct msghdr *msg,
struct sk_buff *skb)
{ {
unsigned char optbuf[sizeof(struct ip_options) + 40]; unsigned char optbuf[sizeof(struct ip_options) + 40];
struct ip_options *opt = (struct ip_options *)optbuf; struct ip_options *opt = (struct ip_options *)optbuf;
...@@ -88,7 +89,7 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) ...@@ -88,7 +89,7 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
if (IPCB(skb)->opt.optlen == 0) if (IPCB(skb)->opt.optlen == 0)
return; return;
if (ip_options_echo(opt, skb)) { if (ip_options_echo(net, opt, skb)) {
msg->msg_flags |= MSG_CTRUNC; msg->msg_flags |= MSG_CTRUNC;
return; return;
} }
...@@ -204,7 +205,7 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, ...@@ -204,7 +205,7 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
} }
if (flags & IP_CMSG_RETOPTS) { if (flags & IP_CMSG_RETOPTS) {
ip_cmsg_recv_retopts(msg, skb); ip_cmsg_recv_retopts(sock_net(sk), msg, skb);
flags &= ~IP_CMSG_RETOPTS; flags &= ~IP_CMSG_RETOPTS;
if (!flags) if (!flags)
...@@ -1227,14 +1228,7 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) ...@@ -1227,14 +1228,7 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
pktinfo->ipi_ifindex = 0; pktinfo->ipi_ifindex = 0;
pktinfo->ipi_spec_dst.s_addr = 0; pktinfo->ipi_spec_dst.s_addr = 0;
} }
/* We need to keep the dst for __ip_options_echo() skb_dst_drop(skb);
* We could restrict the test to opt.ts_needtime || opt.srr,
* but the following is good enough as IP options are not often used.
*/
if (unlikely(IPCB(skb)->opt.optlen))
skb_dst_force(skb);
else
skb_dst_drop(skb);
} }
int ip_setsockopt(struct sock *sk, int level, int ip_setsockopt(struct sock *sk, int level,
......
...@@ -355,7 +355,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ...@@ -355,7 +355,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
/* We throwed the options of the initial SYN away, so we hope /* We throwed the options of the initial SYN away, so we hope
* the ACK carries the same options again (see RFC1122 4.2.3.8) * the ACK carries the same options again (see RFC1122 4.2.3.8)
*/ */
ireq->opt = tcp_v4_save_options(skb); ireq->opt = tcp_v4_save_options(sock_net(sk), skb);
if (security_inet_conn_request(sk, skb, req)) { if (security_inet_conn_request(sk, skb, req)) {
reqsk_free(req); reqsk_free(req);
......
...@@ -1267,7 +1267,7 @@ static void tcp_v4_init_req(struct request_sock *req, ...@@ -1267,7 +1267,7 @@ static void tcp_v4_init_req(struct request_sock *req,
sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
ireq->opt = tcp_v4_save_options(skb); ireq->opt = tcp_v4_save_options(sock_net(sk_listener), skb);
} }
static struct dst_entry *tcp_v4_route_req(const struct sock *sk, static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
......
...@@ -1176,7 +1176,11 @@ static void udp_set_dev_scratch(struct sk_buff *skb) ...@@ -1176,7 +1176,11 @@ static void udp_set_dev_scratch(struct sk_buff *skb)
scratch->csum_unnecessary = !!skb_csum_unnecessary(skb); scratch->csum_unnecessary = !!skb_csum_unnecessary(skb);
scratch->is_linear = !skb_is_nonlinear(skb); scratch->is_linear = !skb_is_nonlinear(skb);
#endif #endif
if (likely(!skb->_skb_refdst)) /* all head states execept sp (dst, sk, nf) are always cleared by
* udp_rcv() and we need to preserve secpath, if present, to eventually
* process IP_CMSG_PASSSEC at recvmsg() time
*/
if (likely(!skb_sec_path(skb)))
scratch->_tsize_state |= UDP_SKB_IS_STATELESS; scratch->_tsize_state |= UDP_SKB_IS_STATELESS;
} }
...@@ -1782,13 +1786,6 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) ...@@ -1782,13 +1786,6 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
sk_mark_napi_id_once(sk, skb); sk_mark_napi_id_once(sk, skb);
} }
/* At recvmsg() time we may access skb->dst or skb->sp depending on
* the IP options and the cmsg flags, elsewhere can we clear all
* pending head states while they are hot in the cache
*/
if (likely(IPCB(skb)->opt.optlen == 0 && !skb_sec_path(skb)))
skb_release_head_state(skb);
rc = __udp_enqueue_schedule_skb(sk, skb); rc = __udp_enqueue_schedule_skb(sk, skb);
if (rc < 0) { if (rc < 0) {
int is_udplite = IS_UDPLITE(sk); int is_udplite = IS_UDPLITE(sk);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment