Commit b1937227 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

net: reorganize sk_buff for faster __copy_skb_header()

With proliferation of bit fields in sk_buff, __copy_skb_header() became
quite expensive, showing as the most expensive function in a GSO
workload.

__copy_skb_header() performance is also critical for non GSO TCP
operations, as it is used from skb_clone()

This patch carefully moves all the fields that were not copied in a
separate zone : cloned, nohdr, fclone, peeked, head_frag, xmit_more

Then I moved all other fields and all other copied fields in a section
delimited by headers_start[0]/headers_end[0] section so that we
can use a single memcpy() call, inlined by compiler using long
word load/stores.

I also tried to make all copies in the natural orders of sk_buff,
to help hardware prefetching.

I made sure sk_buff size did not change.
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 842abe08
...@@ -527,27 +527,41 @@ struct sk_buff { ...@@ -527,27 +527,41 @@ struct sk_buff {
char cb[48] __aligned(8); char cb[48] __aligned(8);
unsigned long _skb_refdst; unsigned long _skb_refdst;
void (*destructor)(struct sk_buff *skb);
#ifdef CONFIG_XFRM #ifdef CONFIG_XFRM
struct sec_path *sp; struct sec_path *sp;
#endif
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct nf_conntrack *nfct;
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
struct nf_bridge_info *nf_bridge;
#endif #endif
unsigned int len, unsigned int len,
data_len; data_len;
__u16 mac_len, __u16 mac_len,
hdr_len; hdr_len;
union {
__wsum csum; /* Following fields are _not_ copied in __copy_skb_header()
struct { * Note that queue_mapping is here mostly to fill a hole.
__u16 csum_start; */
__u16 csum_offset;
};
};
__u32 priority;
kmemcheck_bitfield_begin(flags1); kmemcheck_bitfield_begin(flags1);
__u8 ignore_df:1, __u16 queue_mapping;
cloned:1, __u8 cloned:1,
ip_summed:2,
nohdr:1, nohdr:1,
nfctinfo:3; fclone:2,
peeked:1,
head_frag:1,
xmit_more:1;
/* one bit hole */
kmemcheck_bitfield_end(flags1);
/* fields enclosed in headers_start/headers_end are copied
* using a single memcpy() in __copy_skb_header()
*/
__u32 headers_start[0];
/* if you move pkt_type around you also must adapt those constants */ /* if you move pkt_type around you also must adapt those constants */
#ifdef __BIG_ENDIAN_BITFIELD #ifdef __BIG_ENDIAN_BITFIELD
...@@ -558,58 +572,53 @@ struct sk_buff { ...@@ -558,58 +572,53 @@ struct sk_buff {
#define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset) #define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset)
__u8 __pkt_type_offset[0]; __u8 __pkt_type_offset[0];
__u8 pkt_type:3, __u8 pkt_type:3;
fclone:2,
ipvs_property:1,
peeked:1,
nf_trace:1;
kmemcheck_bitfield_end(flags1);
__be16 protocol;
void (*destructor)(struct sk_buff *skb);
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct nf_conntrack *nfct;
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
struct nf_bridge_info *nf_bridge;
#endif
int skb_iif;
__u32 hash;
__be16 vlan_proto;
__u16 vlan_tci;
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
#ifdef CONFIG_NET_CLS_ACT
__u16 tc_verd; /* traffic control verdict */
#endif
#endif
__u16 queue_mapping;
kmemcheck_bitfield_begin(flags2);
__u8 xmit_more:1;
#ifdef CONFIG_IPV6_NDISC_NODETYPE
__u8 ndisc_nodetype:2;
#endif
__u8 pfmemalloc:1; __u8 pfmemalloc:1;
__u8 ignore_df:1;
__u8 nfctinfo:3;
__u8 nf_trace:1;
__u8 ip_summed:2;
__u8 ooo_okay:1; __u8 ooo_okay:1;
__u8 l4_hash:1; __u8 l4_hash:1;
__u8 sw_hash:1; __u8 sw_hash:1;
__u8 wifi_acked_valid:1; __u8 wifi_acked_valid:1;
__u8 wifi_acked:1; __u8 wifi_acked:1;
__u8 no_fcs:1; __u8 no_fcs:1;
__u8 head_frag:1;
/* Indicates the inner headers are valid in the skbuff. */ /* Indicates the inner headers are valid in the skbuff. */
__u8 encapsulation:1; __u8 encapsulation:1;
__u8 encap_hdr_csum:1; __u8 encap_hdr_csum:1;
__u8 csum_valid:1; __u8 csum_valid:1;
__u8 csum_complete_sw:1; __u8 csum_complete_sw:1;
/* 1/3 bit hole (depending on ndisc_nodetype presence) */ __u8 csum_level:2;
kmemcheck_bitfield_end(flags2); __u8 csum_bad:1;
#ifdef CONFIG_IPV6_NDISC_NODETYPE
__u8 ndisc_nodetype:2;
#endif
__u8 ipvs_property:1;
/* 5 or 7 bit hole */
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
#ifdef CONFIG_NET_CLS_ACT
__u16 tc_verd; /* traffic control verdict */
#endif
#endif
union {
__wsum csum;
struct {
__u16 csum_start;
__u16 csum_offset;
};
};
__u32 priority;
int skb_iif;
__u32 hash;
__be16 vlan_proto;
__u16 vlan_tci;
#if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
union { union {
unsigned int napi_id; unsigned int napi_id;
...@@ -625,19 +634,18 @@ struct sk_buff { ...@@ -625,19 +634,18 @@ struct sk_buff {
__u32 reserved_tailroom; __u32 reserved_tailroom;
}; };
kmemcheck_bitfield_begin(flags3);
__u8 csum_level:2;
__u8 csum_bad:1;
/* 13 bit hole */
kmemcheck_bitfield_end(flags3);
__be16 inner_protocol; __be16 inner_protocol;
__u16 inner_transport_header; __u16 inner_transport_header;
__u16 inner_network_header; __u16 inner_network_header;
__u16 inner_mac_header; __u16 inner_mac_header;
__be16 protocol;
__u16 transport_header; __u16 transport_header;
__u16 network_header; __u16 network_header;
__u16 mac_header; __u16 mac_header;
__u32 headers_end[0];
/* These elements must be at the end, see alloc_skb() for details. */ /* These elements must be at the end, see alloc_skb() for details. */
sk_buff_data_t tail; sk_buff_data_t tail;
sk_buff_data_t end; sk_buff_data_t end;
...@@ -3040,11 +3048,13 @@ static inline void nf_reset_trace(struct sk_buff *skb) ...@@ -3040,11 +3048,13 @@ static inline void nf_reset_trace(struct sk_buff *skb)
} }
/* Note: This doesn't put any conntrack and bridge info in dst. */ /* Note: This doesn't put any conntrack and bridge info in dst. */
static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src) static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
bool copy)
{ {
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
dst->nfct = src->nfct; dst->nfct = src->nfct;
nf_conntrack_get(src->nfct); nf_conntrack_get(src->nfct);
if (copy)
dst->nfctinfo = src->nfctinfo; dst->nfctinfo = src->nfctinfo;
#endif #endif
#ifdef CONFIG_BRIDGE_NETFILTER #ifdef CONFIG_BRIDGE_NETFILTER
...@@ -3052,6 +3062,7 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src) ...@@ -3052,6 +3062,7 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
nf_bridge_get(src->nf_bridge); nf_bridge_get(src->nf_bridge);
#endif #endif
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES) #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES)
if (copy)
dst->nf_trace = src->nf_trace; dst->nf_trace = src->nf_trace;
#endif #endif
} }
...@@ -3064,7 +3075,7 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) ...@@ -3064,7 +3075,7 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
#ifdef CONFIG_BRIDGE_NETFILTER #ifdef CONFIG_BRIDGE_NETFILTER
nf_bridge_put(dst->nf_bridge); nf_bridge_put(dst->nf_bridge);
#endif #endif
__nf_copy(dst, src); __nf_copy(dst, src, true);
} }
#ifdef CONFIG_NETWORK_SECMARK #ifdef CONFIG_NETWORK_SECMARK
......
...@@ -261,7 +261,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, ...@@ -261,7 +261,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
atomic_t *fclone_ref = (atomic_t *) (child + 1); atomic_t *fclone_ref = (atomic_t *) (child + 1);
kmemcheck_annotate_bitfield(child, flags1); kmemcheck_annotate_bitfield(child, flags1);
kmemcheck_annotate_bitfield(child, flags2);
skb->fclone = SKB_FCLONE_ORIG; skb->fclone = SKB_FCLONE_ORIG;
atomic_set(fclone_ref, 1); atomic_set(fclone_ref, 1);
...@@ -675,57 +674,61 @@ void consume_skb(struct sk_buff *skb) ...@@ -675,57 +674,61 @@ void consume_skb(struct sk_buff *skb)
} }
EXPORT_SYMBOL(consume_skb); EXPORT_SYMBOL(consume_skb);
/* Make sure a field is enclosed inside headers_start/headers_end section */
#define CHECK_SKB_FIELD(field) \
BUILD_BUG_ON(offsetof(struct sk_buff, field) < \
offsetof(struct sk_buff, headers_start)); \
BUILD_BUG_ON(offsetof(struct sk_buff, field) > \
offsetof(struct sk_buff, headers_end)); \
static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
{ {
new->tstamp = old->tstamp; new->tstamp = old->tstamp;
/* We do not copy old->sk */
new->dev = old->dev; new->dev = old->dev;
new->transport_header = old->transport_header; memcpy(new->cb, old->cb, sizeof(old->cb));
new->network_header = old->network_header;
new->mac_header = old->mac_header;
new->inner_protocol = old->inner_protocol;
new->inner_transport_header = old->inner_transport_header;
new->inner_network_header = old->inner_network_header;
new->inner_mac_header = old->inner_mac_header;
skb_dst_copy(new, old); skb_dst_copy(new, old);
skb_copy_hash(new, old);
new->ooo_okay = old->ooo_okay;
new->no_fcs = old->no_fcs;
new->encapsulation = old->encapsulation;
new->encap_hdr_csum = old->encap_hdr_csum;
new->csum_valid = old->csum_valid;
new->csum_complete_sw = old->csum_complete_sw;
#ifdef CONFIG_XFRM #ifdef CONFIG_XFRM
new->sp = secpath_get(old->sp); new->sp = secpath_get(old->sp);
#endif #endif
memcpy(new->cb, old->cb, sizeof(old->cb)); __nf_copy(new, old, false);
new->csum = old->csum;
new->ignore_df = old->ignore_df; /* Note : this field could be in headers_start/headers_end section
new->pkt_type = old->pkt_type; * It is not yet because we do not want to have a 16 bit hole
new->ip_summed = old->ip_summed; */
skb_copy_queue_mapping(new, old); new->queue_mapping = old->queue_mapping;
new->priority = old->priority;
#if IS_ENABLED(CONFIG_IP_VS) memcpy(&new->headers_start, &old->headers_start,
new->ipvs_property = old->ipvs_property; offsetof(struct sk_buff, headers_end) -
offsetof(struct sk_buff, headers_start));
CHECK_SKB_FIELD(protocol);
CHECK_SKB_FIELD(csum);
CHECK_SKB_FIELD(hash);
CHECK_SKB_FIELD(priority);
CHECK_SKB_FIELD(skb_iif);
CHECK_SKB_FIELD(vlan_proto);
CHECK_SKB_FIELD(vlan_tci);
CHECK_SKB_FIELD(transport_header);
CHECK_SKB_FIELD(network_header);
CHECK_SKB_FIELD(mac_header);
CHECK_SKB_FIELD(inner_protocol);
CHECK_SKB_FIELD(inner_transport_header);
CHECK_SKB_FIELD(inner_network_header);
CHECK_SKB_FIELD(inner_mac_header);
CHECK_SKB_FIELD(mark);
#ifdef CONFIG_NETWORK_SECMARK
CHECK_SKB_FIELD(secmark);
#endif
#ifdef CONFIG_NET_RX_BUSY_POLL
CHECK_SKB_FIELD(napi_id);
#endif #endif
new->pfmemalloc = old->pfmemalloc;
new->protocol = old->protocol;
new->mark = old->mark;
new->skb_iif = old->skb_iif;
__nf_copy(new, old);
#ifdef CONFIG_NET_SCHED #ifdef CONFIG_NET_SCHED
new->tc_index = old->tc_index; CHECK_SKB_FIELD(tc_index);
#ifdef CONFIG_NET_CLS_ACT #ifdef CONFIG_NET_CLS_ACT
new->tc_verd = old->tc_verd; CHECK_SKB_FIELD(tc_verd);
#endif #endif
#endif #endif
new->vlan_proto = old->vlan_proto;
new->vlan_tci = old->vlan_tci;
skb_copy_secmark(new, old);
#ifdef CONFIG_NET_RX_BUSY_POLL
new->napi_id = old->napi_id;
#endif
} }
/* /*
...@@ -876,7 +879,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) ...@@ -876,7 +879,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
return NULL; return NULL;
kmemcheck_annotate_bitfield(n, flags1); kmemcheck_annotate_bitfield(n, flags1);
kmemcheck_annotate_bitfield(n, flags2);
n->fclone = SKB_FCLONE_UNAVAILABLE; n->fclone = SKB_FCLONE_UNAVAILABLE;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment