Commit faa1befc authored by Alexey Kuznetsov's avatar Alexey Kuznetsov Committed by David S. Miller

[IPv4]: More output path work.

- Fix a bug noted by Maxim Giryaev (gem@asplinux.ru)
when corking on a non-SG interface
- Convert ICMP and ip_send_reply to ip_append_foo.
- Rename generic_getfrag to more namespace friendly
ip_generic_getfrag.
- Kill n_iov member from ip_reply_arg.
parent b97e69ae
...@@ -116,7 +116,7 @@ extern int ip_append_data(struct sock *sk, ...@@ -116,7 +116,7 @@ extern int ip_append_data(struct sock *sk,
struct ipcm_cookie *ipc, struct ipcm_cookie *ipc,
struct rtable *rt, struct rtable *rt,
unsigned int flags); unsigned int flags);
extern int generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb); extern int ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb);
extern ssize_t ip_append_page(struct sock *sk, struct page *page, extern ssize_t ip_append_page(struct sock *sk, struct page *page,
int offset, size_t size, int flags); int offset, size_t size, int flags);
extern int ip_push_pending_frames(struct sock *sk); extern int ip_push_pending_frames(struct sock *sk);
...@@ -141,8 +141,7 @@ static inline void ip_tr_mc_map(u32 addr, char *buf) ...@@ -141,8 +141,7 @@ static inline void ip_tr_mc_map(u32 addr, char *buf)
} }
struct ip_reply_arg { struct ip_reply_arg {
struct iovec iov[2]; struct iovec iov[1];
int n_iov; /* redundant */
u32 csum; u32 csum;
int csumoffset; /* u16 offset of csum in iov[0].iov_base */ int csumoffset; /* u16 offset of csum in iov[0].iov_base */
/* -1 if not needed */ /* -1 if not needed */
......
...@@ -101,7 +101,6 @@ struct icmp_bxm { ...@@ -101,7 +101,6 @@ struct icmp_bxm {
int offset; int offset;
int data_len; int data_len;
unsigned int csum;
struct { struct {
struct icmphdr icmph; struct icmphdr icmph;
__u32 times[3]; __u32 times[3];
...@@ -356,39 +355,45 @@ static void icmp_out_count(int type) ...@@ -356,39 +355,45 @@ static void icmp_out_count(int type)
* Checksum each fragment, and on the first include the headers and final * Checksum each fragment, and on the first include the headers and final
* checksum. * checksum.
*/ */
static int icmp_glue_bits(const void *p, char *to, unsigned int offset, int
unsigned int fraglen, struct sk_buff *skb) icmp_glue_bits(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
{ {
struct icmp_bxm *icmp_param = (struct icmp_bxm *)p; struct icmp_bxm *icmp_param = (struct icmp_bxm *)from;
struct icmphdr *icmph;
unsigned int csum; unsigned int csum;
skb->ip_summed = CHECKSUM_NONE; csum = skb_copy_and_csum_bits(icmp_param->skb,
icmp_param->offset + offset,
to, len, 0);
if (offset) { skb->csum = csum_block_add(skb->csum, csum, odd);
icmp_param->csum = return 0;
skb_copy_and_csum_bits(icmp_param->skb, }
icmp_param->offset +
(offset - icmp_param->head_len),
to, fraglen, icmp_param->csum);
goto out;
}
/* static void
* First fragment includes header. Note that we've done icmp_push_reply(struct icmp_bxm *icmp_param, struct ipcm_cookie *ipc, struct rtable *rt)
* the other fragments first, so that we get the checksum {
* for the whole packet here. struct sk_buff *skb;
*/
ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param,
icmp_param->data_len+icmp_param->head_len,
icmp_param->head_len,
ipc, rt, MSG_DONTWAIT);
if ((skb = skb_peek(&icmp_socket->sk->write_queue)) != NULL) {
struct icmphdr *icmph = skb->h.icmph;
unsigned int csum = 0;
struct sk_buff *skb1;
skb_queue_walk(&icmp_socket->sk->write_queue, skb1) {
csum = csum_add(csum, skb1->csum);
}
csum = csum_partial_copy_nocheck((void *)&icmp_param->data, csum = csum_partial_copy_nocheck((void *)&icmp_param->data,
to, icmp_param->head_len, (char*)icmph, icmp_param->head_len,
icmp_param->csum); csum);
csum = skb_copy_and_csum_bits(icmp_param->skb, icmp_param->offset,
to + icmp_param->head_len,
fraglen - icmp_param->head_len, csum);
icmph = (struct icmphdr *)to;
icmph->checksum = csum_fold(csum); icmph->checksum = csum_fold(csum);
out: skb->ip_summed = CHECKSUM_NONE;
return 0; ip_push_pending_frames(icmp_socket->sk);
}
} }
/* /*
...@@ -408,7 +413,6 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) ...@@ -408,7 +413,6 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
goto out; goto out;
icmp_param->data.icmph.checksum = 0; icmp_param->data.icmph.checksum = 0;
icmp_param->csum = 0;
icmp_out_count(icmp_param->data.icmph.type); icmp_out_count(icmp_param->data.icmph.type);
inet->tos = skb->nh.iph->tos; inet->tos = skb->nh.iph->tos;
...@@ -429,11 +433,8 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) ...@@ -429,11 +433,8 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
goto out_unlock; goto out_unlock;
} }
if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type, if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type,
icmp_param->data.icmph.code)) { icmp_param->data.icmph.code))
ip_build_xmit(sk, icmp_glue_bits, icmp_param, icmp_push_reply(icmp_param, &ipc, rt);
icmp_param->data_len+icmp_param->head_len,
&ipc, rt, MSG_DONTWAIT);
}
ip_rt_put(rt); ip_rt_put(rt);
out_unlock: out_unlock:
icmp_xmit_unlock_bh(); icmp_xmit_unlock_bh();
...@@ -565,7 +566,6 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) ...@@ -565,7 +566,6 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
icmp_param.data.icmph.code = code; icmp_param.data.icmph.code = code;
icmp_param.data.icmph.un.gateway = info; icmp_param.data.icmph.un.gateway = info;
icmp_param.data.icmph.checksum = 0; icmp_param.data.icmph.checksum = 0;
icmp_param.csum = 0;
icmp_param.skb = skb_in; icmp_param.skb = skb_in;
icmp_param.offset = skb_in->nh.raw - skb_in->data; icmp_param.offset = skb_in->nh.raw - skb_in->data;
icmp_out_count(icmp_param.data.icmph.type); icmp_out_count(icmp_param.data.icmph.type);
...@@ -599,9 +599,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) ...@@ -599,9 +599,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
icmp_param.data_len = room; icmp_param.data_len = room;
icmp_param.head_len = sizeof(struct icmphdr); icmp_param.head_len = sizeof(struct icmphdr);
ip_build_xmit(icmp_socket->sk, icmp_glue_bits, &icmp_param, icmp_push_reply(&icmp_param, &ipc, rt);
icmp_param.data_len + sizeof(struct icmphdr),
&ipc, rt, MSG_DONTWAIT);
ende: ende:
ip_rt_put(rt); ip_rt_put(rt);
out_unlock: out_unlock:
......
...@@ -1003,7 +1003,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) ...@@ -1003,7 +1003,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
} }
int int
generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
{ {
struct iovec *iov = from; struct iovec *iov = from;
...@@ -1095,7 +1095,7 @@ int ip_append_data(struct sock *sk, ...@@ -1095,7 +1095,7 @@ int ip_append_data(struct sock *sk,
opt = ipc->opt; opt = ipc->opt;
if (opt) { if (opt) {
if (inet->cork.opt == NULL) if (inet->cork.opt == NULL)
inet->cork.opt = kmalloc(sizeof(struct ip_options)+40, GFP_KERNEL); inet->cork.opt = kmalloc(sizeof(struct ip_options)+40, sk->allocation);
memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen); memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
inet->cork.flags |= IPCORK_OPT; inet->cork.flags |= IPCORK_OPT;
inet->cork.addr = ipc->addr; inet->cork.addr = ipc->addr;
...@@ -1129,7 +1129,6 @@ int ip_append_data(struct sock *sk, ...@@ -1129,7 +1129,6 @@ int ip_append_data(struct sock *sk,
return -EMSGSIZE; return -EMSGSIZE;
} }
#if 0 /* Not now */
/* /*
* transhdrlen > 0 means that this is the first fragment and we wish * transhdrlen > 0 means that this is the first fragment and we wish
* it won't be fragmented in the future. * it won't be fragmented in the future.
...@@ -1139,7 +1138,6 @@ int ip_append_data(struct sock *sk, ...@@ -1139,7 +1138,6 @@ int ip_append_data(struct sock *sk,
rt->u.dst.dev->features&(NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) && rt->u.dst.dev->features&(NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) &&
!exthdrlen) !exthdrlen)
csummode = CHECKSUM_HW; csummode = CHECKSUM_HW;
#endif
inet->cork.length += length; inet->cork.length += length;
...@@ -1151,6 +1149,7 @@ int ip_append_data(struct sock *sk, ...@@ -1151,6 +1149,7 @@ int ip_append_data(struct sock *sk,
char *data; char *data;
unsigned int datalen; unsigned int datalen;
unsigned int fraglen; unsigned int fraglen;
unsigned int alloclen;
BUG_TRAP(copy == 0); BUG_TRAP(copy == 0);
alloc_new_skb: alloc_new_skb:
...@@ -1159,11 +1158,18 @@ int ip_append_data(struct sock *sk, ...@@ -1159,11 +1158,18 @@ int ip_append_data(struct sock *sk,
datalen = length; datalen = length;
fraglen = datalen + fragheaderlen; fraglen = datalen + fragheaderlen;
if ((flags & MSG_MORE) &&
!(rt->u.dst.dev->features&NETIF_F_SG))
alloclen = maxfraglen;
else
alloclen = datalen + fragheaderlen;
if (!(flags & MSG_DONTWAIT) || transhdrlen) { if (!(flags & MSG_DONTWAIT) || transhdrlen) {
skb = sock_alloc_send_skb(sk, fraglen + hh_len + 15, skb = sock_alloc_send_skb(sk,
alloclen + hh_len + 15,
(flags & MSG_DONTWAIT), &err); (flags & MSG_DONTWAIT), &err);
} else { } else {
skb = sock_wmalloc(sk, fraglen + hh_len + 15, 1, skb = sock_wmalloc(sk,
alloclen + hh_len + 15, 1,
sk->allocation); sk->allocation);
if (unlikely(skb == NULL)) if (unlikely(skb == NULL))
err = -ENOBUFS; err = -ENOBUFS;
...@@ -1206,20 +1212,15 @@ int ip_append_data(struct sock *sk, ...@@ -1206,20 +1212,15 @@ int ip_append_data(struct sock *sk,
continue; continue;
} }
if (copy > length)
copy = length;
if (!(rt->u.dst.dev->features&NETIF_F_SG)) { if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
int off; unsigned int off;
if (!((skb->len - fragheaderlen) & 7))
goto alloc_new_skb;
/*
* Align the start address of the next IP fragment
* on 8 byte boundary.
*/
copy = 8 - ((skb->len - fragheaderlen) & 7);
off = skb->len; off = skb->len;
if (copy > length) if (getfrag(from, skb_put(skb, copy),
copy = length; offset, copy, off, skb) < 0) {
if (getfrag(from, skb_put(skb, copy), offset, copy, off, skb) < 0) {
__skb_trim(skb, off); __skb_trim(skb, off);
err = -EFAULT; err = -EFAULT;
goto error; goto error;
...@@ -1231,9 +1232,6 @@ int ip_append_data(struct sock *sk, ...@@ -1231,9 +1232,6 @@ int ip_append_data(struct sock *sk,
int off = inet->sndmsg_off; int off = inet->sndmsg_off;
unsigned int left; unsigned int left;
if (copy > length)
copy = length;
if (page && (left = PAGE_SIZE - off) > 0) { if (page && (left = PAGE_SIZE - off) > 0) {
if (copy >= left) if (copy >= left)
copy = left; copy = left;
...@@ -1518,38 +1516,13 @@ void ip_flush_pending_frames(struct sock *sk) ...@@ -1518,38 +1516,13 @@ void ip_flush_pending_frames(struct sock *sk)
/* /*
* Fetch data from kernel space and fill in checksum if needed. * Fetch data from kernel space and fill in checksum if needed.
*/ */
static int ip_reply_glue_bits(const void *dptr, char *to, unsigned int offset, static int ip_reply_glue_bits(void *dptr, char *to, int offset,
unsigned int fraglen, struct sk_buff *skb) int len, int odd, struct sk_buff *skb)
{ {
struct ip_reply_arg *dp = (struct ip_reply_arg*)dptr; unsigned int csum;
u16 *pktp = (u16 *)to;
struct iovec *iov;
int len;
int hdrflag = 1;
iov = &dp->iov[0];
if (offset >= iov->iov_len) {
offset -= iov->iov_len;
iov++;
hdrflag = 0;
}
len = iov->iov_len - offset;
if (fraglen > len) { /* overlapping. */
dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, len,
dp->csum);
offset = 0;
fraglen -= len;
to += len;
iov++;
}
dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, fraglen,
dp->csum);
if (hdrflag && dp->csumoffset)
*(pktp + dp->csumoffset) = csum_fold(dp->csum); /* fill in checksum */
skb->ip_summed = CHECKSUM_NONE;
csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
skb->csum = csum_block_add(skb->csum, csum, odd);
return 0; return 0;
} }
...@@ -1606,7 +1579,15 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar ...@@ -1606,7 +1579,15 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
inet->tos = skb->nh.iph->tos; inet->tos = skb->nh.iph->tos;
sk->priority = skb->priority; sk->priority = skb->priority;
sk->protocol = skb->nh.iph->protocol; sk->protocol = skb->nh.iph->protocol;
ip_build_xmit(sk, ip_reply_glue_bits, arg, len, &ipc, rt, MSG_DONTWAIT); ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
&ipc, rt, MSG_DONTWAIT);
if ((skb = skb_peek(&sk->write_queue)) != NULL) {
if (arg->csumoffset >= 0)
*((u16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
skb->ip_summed = CHECKSUM_NONE;
ip_push_pending_frames(sk);
}
bh_unlock_sock(sk); bh_unlock_sock(sk);
ip_rt_put(rt); ip_rt_put(rt);
......
...@@ -1188,7 +1188,6 @@ static void tcp_v4_send_reset(struct sk_buff *skb) ...@@ -1188,7 +1188,6 @@ static void tcp_v4_send_reset(struct sk_buff *skb)
arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
skb->nh.iph->saddr, /*XXX*/ skb->nh.iph->saddr, /*XXX*/
sizeof(struct tcphdr), IPPROTO_TCP, 0); sizeof(struct tcphdr), IPPROTO_TCP, 0);
arg.n_iov = 1;
arg.csumoffset = offsetof(struct tcphdr, check) / 2; arg.csumoffset = offsetof(struct tcphdr, check) / 2;
inet_sk(tcp_socket->sk)->ttl = sysctl_ip_default_ttl; inet_sk(tcp_socket->sk)->ttl = sysctl_ip_default_ttl;
...@@ -1217,7 +1216,6 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, ...@@ -1217,7 +1216,6 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
arg.iov[0].iov_base = (unsigned char *)&rep; arg.iov[0].iov_base = (unsigned char *)&rep;
arg.iov[0].iov_len = sizeof(rep.th); arg.iov[0].iov_len = sizeof(rep.th);
arg.n_iov = 1;
if (ts) { if (ts) {
rep.tsopt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | rep.tsopt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_TIMESTAMP << 8) | (TCPOPT_TIMESTAMP << 8) |
......
...@@ -484,7 +484,7 @@ static int udp_getfrag(const void *p, char * to, unsigned int offset, unsigned i ...@@ -484,7 +484,7 @@ static int udp_getfrag(const void *p, char * to, unsigned int offset, unsigned i
if (skb->ip_summed == CHECKSUM_HW) { if (skb->ip_summed == CHECKSUM_HW) {
skb->csum = offsetof(struct udphdr, check); skb->csum = offsetof(struct udphdr, check);
ufh->uh.check = ~csum_tcpudp_magic(ufh->saddr, ufh->daddr, ufh->uh.check = ~csum_tcpudp_magic(ufh->saddr, ufh->daddr,
ntohs(ufh->uh.len), IPPROTO_UDP, ufh->wcheck); ntohs(ufh->uh.len), IPPROTO_UDP, 0);
memcpy(to, ufh, sizeof(struct udphdr)); memcpy(to, ufh, sizeof(struct udphdr));
return memcpy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset, return memcpy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
fraglen-sizeof(struct udphdr)); fraglen-sizeof(struct udphdr));
...@@ -730,7 +730,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ...@@ -730,7 +730,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
do_append_data: do_append_data:
up->len += ulen; up->len += ulen;
err = ip_append_data(sk, generic_getfrag, msg->msg_iov, ulen, sizeof(struct udphdr), &ipc, rt, msg->msg_flags); err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen,
sizeof(struct udphdr), &ipc, rt,
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
if (err) if (err)
udp_flush_pending_frames(sk); udp_flush_pending_frames(sk);
else if (!corkreq) else if (!corkreq)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment