Commit bffa72cf authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

net: sk_buff rbnode reorg

skb->rbnode shares space with skb->next, skb->prev and skb->tstamp

Current uses (TCP receive ofo queue and netem) need to save/restore
tstamp, while skb->dev is either NULL (TCP) or a constant for a given
queue (netem).

Since we plan using an RB tree for TCP retransmit queue to speedup SACK
processing with large BDP, this patch exchanges skb->dev and
skb->tstamp.

This saves some overhead in both TCP and netem.

v2: removes the swtstamp field from struct tcp_skb_cb
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Cc: Wei Wang <weiwan@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Acked-by: default avatarSoheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent a38b2fa3
...@@ -661,8 +661,12 @@ struct sk_buff { ...@@ -661,8 +661,12 @@ struct sk_buff {
struct sk_buff *prev; struct sk_buff *prev;
union { union {
ktime_t tstamp; struct net_device *dev;
u64 skb_mstamp; /* Some protocols might use this space to store information,
* while device pointer would be NULL.
* UDP receive path is one user.
*/
unsigned long dev_scratch;
}; };
}; };
struct rb_node rbnode; /* used in netem & tcp stack */ struct rb_node rbnode; /* used in netem & tcp stack */
...@@ -670,12 +674,8 @@ struct sk_buff { ...@@ -670,12 +674,8 @@ struct sk_buff {
struct sock *sk; struct sock *sk;
union { union {
struct net_device *dev; ktime_t tstamp;
/* Some protocols might use this space to store information, u64 skb_mstamp;
* while device pointer would be NULL.
* UDP receive path is one user.
*/
unsigned long dev_scratch;
}; };
/* /*
* This is the control buffer. It is free to use for every * This is the control buffer. It is free to use for every
......
...@@ -797,12 +797,6 @@ struct tcp_skb_cb { ...@@ -797,12 +797,6 @@ struct tcp_skb_cb {
u16 tcp_gso_segs; u16 tcp_gso_segs;
u16 tcp_gso_size; u16 tcp_gso_size;
}; };
/* Used to stash the receive timestamp while this skb is in the
* out of order queue, as skb->tstamp is overwritten by the
* rbnode.
*/
ktime_t swtstamp;
}; };
__u8 tcp_flags; /* TCP header flags. (tcp[13]) */ __u8 tcp_flags; /* TCP header flags. (tcp[13]) */
......
...@@ -4266,11 +4266,6 @@ static void tcp_sack_remove(struct tcp_sock *tp) ...@@ -4266,11 +4266,6 @@ static void tcp_sack_remove(struct tcp_sock *tp)
tp->rx_opt.num_sacks = num_sacks; tp->rx_opt.num_sacks = num_sacks;
} }
enum tcp_queue {
OOO_QUEUE,
RCV_QUEUE,
};
/** /**
* tcp_try_coalesce - try to merge skb to prior one * tcp_try_coalesce - try to merge skb to prior one
* @sk: socket * @sk: socket
...@@ -4286,7 +4281,6 @@ enum tcp_queue { ...@@ -4286,7 +4281,6 @@ enum tcp_queue {
* Returns true if caller should free @from instead of queueing it * Returns true if caller should free @from instead of queueing it
*/ */
static bool tcp_try_coalesce(struct sock *sk, static bool tcp_try_coalesce(struct sock *sk,
enum tcp_queue dest,
struct sk_buff *to, struct sk_buff *to,
struct sk_buff *from, struct sk_buff *from,
bool *fragstolen) bool *fragstolen)
...@@ -4311,10 +4305,7 @@ static bool tcp_try_coalesce(struct sock *sk, ...@@ -4311,10 +4305,7 @@ static bool tcp_try_coalesce(struct sock *sk,
if (TCP_SKB_CB(from)->has_rxtstamp) { if (TCP_SKB_CB(from)->has_rxtstamp) {
TCP_SKB_CB(to)->has_rxtstamp = true; TCP_SKB_CB(to)->has_rxtstamp = true;
if (dest == OOO_QUEUE) to->tstamp = from->tstamp;
TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp;
else
to->tstamp = from->tstamp;
} }
return true; return true;
...@@ -4351,9 +4342,6 @@ static void tcp_ofo_queue(struct sock *sk) ...@@ -4351,9 +4342,6 @@ static void tcp_ofo_queue(struct sock *sk)
} }
p = rb_next(p); p = rb_next(p);
rb_erase(&skb->rbnode, &tp->out_of_order_queue); rb_erase(&skb->rbnode, &tp->out_of_order_queue);
/* Replace tstamp which was stomped by rbnode */
if (TCP_SKB_CB(skb)->has_rxtstamp)
skb->tstamp = TCP_SKB_CB(skb)->swtstamp;
if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
SOCK_DEBUG(sk, "ofo packet was already received\n"); SOCK_DEBUG(sk, "ofo packet was already received\n");
...@@ -4365,8 +4353,7 @@ static void tcp_ofo_queue(struct sock *sk) ...@@ -4365,8 +4353,7 @@ static void tcp_ofo_queue(struct sock *sk)
TCP_SKB_CB(skb)->end_seq); TCP_SKB_CB(skb)->end_seq);
tail = skb_peek_tail(&sk->sk_receive_queue); tail = skb_peek_tail(&sk->sk_receive_queue);
eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE, eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
tail, skb, &fragstolen);
tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
if (!eaten) if (!eaten)
...@@ -4420,10 +4407,6 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) ...@@ -4420,10 +4407,6 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
return; return;
} }
/* Stash tstamp to avoid being stomped on by rbnode */
if (TCP_SKB_CB(skb)->has_rxtstamp)
TCP_SKB_CB(skb)->swtstamp = skb->tstamp;
/* Disable header prediction. */ /* Disable header prediction. */
tp->pred_flags = 0; tp->pred_flags = 0;
inet_csk_schedule_ack(sk); inet_csk_schedule_ack(sk);
...@@ -4451,7 +4434,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) ...@@ -4451,7 +4434,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
/* In the typical case, we are adding an skb to the end of the list. /* In the typical case, we are adding an skb to the end of the list.
* Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
*/ */
if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb, if (tcp_try_coalesce(sk, tp->ooo_last_skb,
skb, &fragstolen)) { skb, &fragstolen)) {
coalesce_done: coalesce_done:
tcp_grow_window(sk, skb); tcp_grow_window(sk, skb);
...@@ -4502,7 +4485,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) ...@@ -4502,7 +4485,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
__kfree_skb(skb1); __kfree_skb(skb1);
goto merge_right; goto merge_right;
} }
} else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1, } else if (tcp_try_coalesce(sk, skb1,
skb, &fragstolen)) { skb, &fragstolen)) {
goto coalesce_done; goto coalesce_done;
} }
...@@ -4554,7 +4537,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int ...@@ -4554,7 +4537,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
__skb_pull(skb, hdrlen); __skb_pull(skb, hdrlen);
eaten = (tail && eaten = (tail &&
tcp_try_coalesce(sk, RCV_QUEUE, tail, tcp_try_coalesce(sk, tail,
skb, fragstolen)) ? 1 : 0; skb, fragstolen)) ? 1 : 0;
tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
if (!eaten) { if (!eaten) {
......
...@@ -146,7 +146,6 @@ struct netem_sched_data { ...@@ -146,7 +146,6 @@ struct netem_sched_data {
*/ */
struct netem_skb_cb { struct netem_skb_cb {
psched_time_t time_to_send; psched_time_t time_to_send;
ktime_t tstamp_save;
}; };
...@@ -561,7 +560,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, ...@@ -561,7 +560,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
} }
cb->time_to_send = now + delay; cb->time_to_send = now + delay;
cb->tstamp_save = skb->tstamp;
++q->counter; ++q->counter;
tfifo_enqueue(skb, sch); tfifo_enqueue(skb, sch);
} else { } else {
...@@ -629,7 +627,10 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) ...@@ -629,7 +627,10 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
qdisc_qstats_backlog_dec(sch, skb); qdisc_qstats_backlog_dec(sch, skb);
skb->next = NULL; skb->next = NULL;
skb->prev = NULL; skb->prev = NULL;
skb->tstamp = netem_skb_cb(skb)->tstamp_save; /* skb->dev shares skb->rbnode area,
* we need to restore its value.
*/
skb->dev = qdisc_dev(sch);
#ifdef CONFIG_NET_CLS_ACT #ifdef CONFIG_NET_CLS_ACT
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment