Commit db54275d authored by David S. Miller's avatar David S. Miller

Merge branch 'sctp-gso'

Marcelo Ricardo Leitner says:

====================
sctp: Add GSO support

This patchset adds sctp GSO support.

Performance tests indicates that increases throughput by 10% if using
bigger chunk sizes, specially if bigger than MTU. For small chunks, it
doesn't help much if not using heavy firewall rules.

For small chunks it will probably be of more use once we get something
like MSG_MORE as David Laight had suggested.

overall changes:
v1->v2:
Added support for receiving GSO frames on SCTP stack, as requested by
Dave Miller.

v2->v3:
Consider sctphdr size in skb_gso_transport_seglen()
rebased due to 5c7cdf33 ("gso: Remove arbitrary checks for
unsupported GSO")
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents f2edc4e1 942b3235
......@@ -169,10 +169,9 @@ static void loopback_setup(struct net_device *dev)
dev->flags = IFF_LOOPBACK;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
netif_keep_dst(dev);
dev->hw_features = NETIF_F_ALL_TSO | NETIF_F_UFO;
dev->hw_features = NETIF_F_GSO_SOFTWARE;
dev->features = NETIF_F_SG | NETIF_F_FRAGLIST
| NETIF_F_ALL_TSO
| NETIF_F_UFO
| NETIF_F_GSO_SOFTWARE
| NETIF_F_HW_CSUM
| NETIF_F_RXCSUM
| NETIF_F_SCTP_CRC
......
......@@ -53,8 +53,9 @@ enum {
* headers in software.
*/
NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */
NETIF_F_GSO_SCTP_BIT, /* ... SCTP fragmentation */
/**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */
NETIF_F_GSO_TUNNEL_REMCSUM_BIT,
NETIF_F_GSO_SCTP_BIT,
NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */
NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */
......@@ -128,6 +129,7 @@ enum {
#define NETIF_F_TSO_MANGLEID __NETIF_F(TSO_MANGLEID)
#define NETIF_F_GSO_PARTIAL __NETIF_F(GSO_PARTIAL)
#define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM)
#define NETIF_F_GSO_SCTP __NETIF_F(GSO_SCTP)
#define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
#define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX)
#define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX)
......@@ -166,7 +168,8 @@ enum {
NETIF_F_FSO)
/* List of features with software fallbacks. */
#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_UFO)
#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_UFO | \
NETIF_F_GSO_SCTP)
/*
* If one device supports one of these features, then enable them
......
......@@ -4012,6 +4012,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_PARTIAL != (NETIF_F_GSO_PARTIAL >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_SCTP != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT));
return (features & feature) == feature;
}
......
......@@ -301,6 +301,11 @@ struct sk_buff;
#endif
extern int sysctl_max_skb_frags;
/* Set skb_shinfo(skb)->gso_size to this in case you want skb_segment to
* segment using its current segmentation instead.
*/
#define GSO_BY_FRAGS 0xFFFF
typedef struct skb_frag_struct skb_frag_t;
struct skb_frag_struct {
......@@ -482,6 +487,8 @@ enum {
SKB_GSO_PARTIAL = 1 << 13,
SKB_GSO_TUNNEL_REMCSUM = 1 << 14,
SKB_GSO_SCTP = 1 << 15,
};
#if BITS_PER_LONG > 32
......@@ -2987,6 +2994,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len);
int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
void skb_scrub_packet(struct sk_buff *skb, bool xnet);
unsigned int skb_gso_transport_seglen(const struct sk_buff *skb);
bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu);
struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
int skb_ensure_writable(struct sk_buff *skb, int write_len);
......
......@@ -186,6 +186,10 @@ void sctp_assocs_proc_exit(struct net *net);
int sctp_remaddr_proc_init(struct net *net);
void sctp_remaddr_proc_exit(struct net *net);
/*
* sctp/offload.c
*/
int sctp_offload_init(void);
/*
* Module global variables
......
......@@ -566,6 +566,9 @@ struct sctp_chunk {
/* This points to the sk_buff containing the actual data. */
struct sk_buff *skb;
/* In case of GSO packets, this will store the head one */
struct sk_buff *head_skb;
/* These are the SCTP headers by reverse order in a packet.
* Note that some of these may happen more than once. In that
* case, we point at the "current" one, whatever that means
......@@ -696,6 +699,8 @@ struct sctp_packet {
size_t overhead;
/* This is the total size of all chunks INCLUDING padding. */
size_t size;
/* This is the maximum size this packet may have */
size_t max_size;
/* The packet is destined for this transport address.
* The function we finally use to pass down to the next lower
......
......@@ -89,6 +89,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
[NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation",
[NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial",
[NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation",
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
......
......@@ -49,6 +49,7 @@
#include <linux/slab.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/sctp.h>
#include <linux/netdevice.h>
#ifdef CONFIG_NET_CLS_ACT
#include <net/pkt_sched.h>
......@@ -3116,9 +3117,13 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
int hsize;
int size;
len = head_skb->len - offset;
if (len > mss)
len = mss;
if (unlikely(mss == GSO_BY_FRAGS)) {
len = list_skb->len;
} else {
len = head_skb->len - offset;
if (len > mss)
len = mss;
}
hsize = skb_headlen(head_skb) - offset;
if (hsize < 0)
......@@ -3438,6 +3443,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
NAPI_GRO_CB(skb)->same_flow = 1;
return 0;
}
EXPORT_SYMBOL_GPL(skb_gro_receive);
void __init skb_init(void)
{
......@@ -4378,6 +4384,8 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
thlen += inner_tcp_hdrlen(skb);
} else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
thlen = tcp_hdrlen(skb);
} else if (unlikely(shinfo->gso_type & SKB_GSO_SCTP)) {
thlen = sizeof(struct sctphdr);
}
/* UFO sets gso_size to the size of the fragmentation
* payload, i.e. the size of the L4 (UDP) header is already
......@@ -4387,6 +4395,37 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
/**
* skb_gso_validate_mtu - Return in case such skb fits a given MTU
*
* @skb: GSO skb
*
* skb_gso_validate_mtu validates if a given skb will fit a wanted MTU
* once split.
*/
bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu)
{
const struct skb_shared_info *shinfo = skb_shinfo(skb);
const struct sk_buff *iter;
unsigned int hlen;
hlen = skb_gso_network_seglen(skb);
if (shinfo->gso_size != GSO_BY_FRAGS)
return hlen <= mtu;
/* Undo this so we can re-use header sizes */
hlen -= GSO_BY_FRAGS;
skb_walk_frags(skb, iter) {
if (hlen + skb_headlen(iter) > mtu)
return false;
}
return true;
}
EXPORT_SYMBOL_GPL(skb_gso_validate_mtu);
static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
{
if (skb_cow(skb, skb_headroom(skb)) < 0) {
......
......@@ -54,7 +54,7 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
if (skb->ignore_df)
return false;
if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
return false;
return true;
......
......@@ -225,7 +225,7 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
/* common case: locally created skb or seglen is <= mtu */
if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
skb_gso_network_seglen(skb) <= mtu)
skb_gso_validate_mtu(skb, mtu))
return ip_finish_output2(net, sk, skb);
/* Slowpath - GSO segment length is exceeding the dst MTU.
......
......@@ -368,7 +368,7 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
if (skb->ignore_df)
return false;
if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
return false;
return true;
......
......@@ -91,7 +91,7 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
if (skb->len <= mtu)
return false;
if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
return false;
return true;
......
......@@ -11,7 +11,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
transport.o chunk.o sm_make_chunk.o ulpevent.o \
inqueue.o outqueue.o ulpqueue.o \
tsnmap.o bind_addr.o socket.o primitive.o \
output.o input.o debug.o ssnmap.o auth.o
output.o input.o debug.o ssnmap.o auth.o \
offload.o
sctp_probe-y := probe.o
......
......@@ -112,7 +112,6 @@ int sctp_rcv(struct sk_buff *skb)
struct sctp_ep_common *rcvr;
struct sctp_transport *transport = NULL;
struct sctp_chunk *chunk;
struct sctphdr *sh;
union sctp_addr src;
union sctp_addr dest;
int family;
......@@ -124,28 +123,29 @@ int sctp_rcv(struct sk_buff *skb)
__SCTP_INC_STATS(net, SCTP_MIB_INSCTPPACKS);
if (skb_linearize(skb))
/* If packet is too small to contain a single chunk, let's not
* waste time on it anymore.
*/
if (skb->len < sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr) +
skb_transport_offset(skb))
goto discard_it;
sh = sctp_hdr(skb);
if (!pskb_may_pull(skb, sizeof(struct sctphdr)))
goto discard_it;
/* Pull up the IP and SCTP headers. */
/* Pull up the IP header. */
__skb_pull(skb, skb_transport_offset(skb));
if (skb->len < sizeof(struct sctphdr))
goto discard_it;
skb->csum_valid = 0; /* Previous value not applicable */
if (skb_csum_unnecessary(skb))
__skb_decr_checksum_unnecessary(skb);
else if (!sctp_checksum_disable && sctp_rcv_checksum(net, skb) < 0)
else if (!sctp_checksum_disable &&
!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) &&
sctp_rcv_checksum(net, skb) < 0)
goto discard_it;
skb->csum_valid = 1;
skb_pull(skb, sizeof(struct sctphdr));
/* Make sure we at least have chunk headers worth of data left. */
if (skb->len < sizeof(struct sctp_chunkhdr))
goto discard_it;
__skb_pull(skb, sizeof(struct sctphdr));
family = ipver2af(ip_hdr(skb)->version);
af = sctp_get_af_specific(family);
......@@ -230,7 +230,7 @@ int sctp_rcv(struct sk_buff *skb)
chunk->rcvr = rcvr;
/* Remember the SCTP header. */
chunk->sctp_hdr = sh;
chunk->sctp_hdr = sctp_hdr(skb);
/* Set the source and destination addresses of the incoming chunk. */
sctp_init_addrs(chunk, &src, &dest);
......@@ -660,19 +660,23 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
*/
static int sctp_rcv_ootb(struct sk_buff *skb)
{
sctp_chunkhdr_t *ch;
__u8 *ch_end;
ch = (sctp_chunkhdr_t *) skb->data;
sctp_chunkhdr_t *ch, _ch;
int ch_end, offset = 0;
/* Scan through all the chunks in the packet. */
do {
/* Make sure we have at least the header there */
if (offset + sizeof(sctp_chunkhdr_t) > skb->len)
break;
ch = skb_header_pointer(skb, offset, sizeof(*ch), &_ch);
/* Break out if chunk length is less then minimal. */
if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
break;
ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
if (ch_end > skb_tail_pointer(skb))
ch_end = offset + WORD_ROUND(ntohs(ch->length));
if (ch_end > skb->len)
break;
/* RFC 8.4, 2) If the OOTB packet contains an ABORT chunk, the
......@@ -697,8 +701,8 @@ static int sctp_rcv_ootb(struct sk_buff *skb)
if (SCTP_CID_INIT == ch->type && (void *)ch != skb->data)
goto discard;
ch = (sctp_chunkhdr_t *) ch_end;
} while (ch_end < skb_tail_pointer(skb));
offset = ch_end;
} while (ch_end < skb->len);
return 0;
......@@ -1173,6 +1177,17 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
{
sctp_chunkhdr_t *ch;
/* We do not allow GSO frames here as we need to linearize and
* then cannot guarantee frame boundaries. This shouldn't be an
* issue as packets hitting this are mostly INIT or INIT-ACK and
* those cannot be on GSO-style anyway.
*/
if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP)
return NULL;
if (skb_linearize(skb))
return NULL;
ch = (sctp_chunkhdr_t *) skb->data;
/* The code below will attempt to walk the chunk and extract
......
......@@ -130,13 +130,25 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
* at this time.
*/
if ((chunk = queue->in_progress)) {
chunk = queue->in_progress;
if (chunk) {
/* There is a packet that we have been working on.
* Any post processing work to do before we move on?
*/
if (chunk->singleton ||
chunk->end_of_packet ||
chunk->pdiscard) {
if (chunk->head_skb == chunk->skb) {
chunk->skb = skb_shinfo(chunk->skb)->frag_list;
goto new_skb;
}
if (chunk->skb->next) {
chunk->skb = chunk->skb->next;
goto new_skb;
}
if (chunk->head_skb)
chunk->skb = chunk->head_skb;
sctp_chunk_free(chunk);
chunk = queue->in_progress = NULL;
} else {
......@@ -152,34 +164,64 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
if (!chunk) {
struct list_head *entry;
next_chunk:
/* Is the queue empty? */
if (list_empty(&queue->in_chunk_list))
entry = sctp_list_dequeue(&queue->in_chunk_list);
if (!entry)
return NULL;
entry = queue->in_chunk_list.next;
chunk = queue->in_progress =
list_entry(entry, struct sctp_chunk, list);
list_del_init(entry);
chunk = list_entry(entry, struct sctp_chunk, list);
/* This is the first chunk in the packet. */
chunk->singleton = 1;
ch = (sctp_chunkhdr_t *) chunk->skb->data;
chunk->data_accepted = 0;
/* Linearize if it's not GSO */
if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) != SKB_GSO_SCTP &&
skb_is_nonlinear(chunk->skb)) {
if (skb_linearize(chunk->skb)) {
__SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS);
sctp_chunk_free(chunk);
goto next_chunk;
}
/* Update sctp_hdr as it probably changed */
chunk->sctp_hdr = sctp_hdr(chunk->skb);
}
if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) {
/* GSO-marked skbs but without frags, handle
* them normally
*/
if (skb_shinfo(chunk->skb)->frag_list)
chunk->head_skb = chunk->skb;
/* skbs with "cover letter" */
if (chunk->head_skb && chunk->skb->data_len == chunk->skb->len)
chunk->skb = skb_shinfo(chunk->skb)->frag_list;
if (WARN_ON(!chunk->skb)) {
__SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS);
sctp_chunk_free(chunk);
goto next_chunk;
}
}
if (chunk->asoc)
sock_rps_save_rxhash(chunk->asoc->base.sk, chunk->skb);
queue->in_progress = chunk;
new_skb:
/* This is the first chunk in the packet. */
ch = (sctp_chunkhdr_t *) chunk->skb->data;
chunk->singleton = 1;
chunk->data_accepted = 0;
chunk->pdiscard = 0;
chunk->auth = 0;
chunk->has_asconf = 0;
chunk->end_of_packet = 0;
chunk->ecn_ce_done = 0;
}
chunk->chunk_hdr = ch;
chunk->chunk_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
/* In the unlikely case of an IP reassembly, the skb could be
* non-linear. If so, update chunk_end so that it doesn't go past
* the skb->tail.
*/
if (unlikely(skb_is_nonlinear(chunk->skb))) {
if (chunk->chunk_end > skb_tail_pointer(chunk->skb))
chunk->chunk_end = skb_tail_pointer(chunk->skb);
}
skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
chunk->subh.v = NULL; /* Subheader is no longer valid. */
......
/*
* sctp_offload - GRO/GSO Offloading for SCTP
*
* Copyright (C) 2015, Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
#include <linux/kprobes.h>
#include <linux/socket.h>
#include <linux/sctp.h>
#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/kfifo.h>
#include <linux/time.h>
#include <net/net_namespace.h>
#include <linux/skbuff.h>
#include <net/sctp/sctp.h>
#include <net/sctp/checksum.h>
#include <net/protocol.h>
static __le32 sctp_gso_make_checksum(struct sk_buff *skb)
{
skb->ip_summed = CHECKSUM_NONE;
return sctp_compute_cksum(skb, skb_transport_offset(skb));
}
static struct sk_buff *sctp_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct sctphdr *sh;
sh = sctp_hdr(skb);
if (!pskb_may_pull(skb, sizeof(*sh)))
goto out;
__skb_pull(skb, sizeof(*sh));
if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
/* Packet is from an untrusted source, reset gso_segs. */
struct skb_shared_info *pinfo = skb_shinfo(skb);
struct sk_buff *frag_iter;
pinfo->gso_segs = 0;
if (skb->len != skb->data_len) {
/* Means we have chunks in here too */
pinfo->gso_segs++;
}
skb_walk_frags(skb, frag_iter)
pinfo->gso_segs++;
segs = NULL;
goto out;
}
segs = skb_segment(skb, features | NETIF_F_HW_CSUM);
if (IS_ERR(segs))
goto out;
/* All that is left is update SCTP CRC if necessary */
if (!(features & NETIF_F_SCTP_CRC)) {
for (skb = segs; skb; skb = skb->next) {
if (skb->ip_summed == CHECKSUM_PARTIAL) {
sh = sctp_hdr(skb);
sh->checksum = sctp_gso_make_checksum(skb);
}
}
}
out:
return segs;
}
static const struct net_offload sctp_offload = {
.callbacks = {
.gso_segment = sctp_gso_segment,
},
};
int __init sctp_offload_init(void)
{
return inet_add_offload(&sctp_offload, IPPROTO_SCTP);
}
This diff is collapsed.
......@@ -1516,6 +1516,9 @@ static __init int sctp_init(void)
if (status)
goto err_v6_add_protocol;
if (sctp_offload_init() < 0)
pr_crit("%s: Cannot add SCTP protocol offload\n", __func__);
out:
return status;
err_v6_add_protocol:
......
......@@ -4003,6 +4003,8 @@ static int sctp_init_sock(struct sock *sk)
return -ESOCKTNOSUPPORT;
}
sk->sk_gso_type = SKB_GSO_SCTP;
/* Initialize default send parameters. These parameters can be
* modified with the SCTP_DEFAULT_SEND_PARAM socket option.
*/
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment