Commit 71a1d9ee authored by David S. Miller's avatar David S. Miller

Merge branch 'tun_rfs'

Zhi Yong Wu says:

====================
tun: add the RFS support

Since Tom Herbert's hash related patchset was modified and got merged,
his pachset about adding support for RFS on tun flows also need to get
adjusted accordingly. I tried to update them, and before i will start
to do some perf tests, i hope to get one correct code base, so it's time
to post them out now. Any constructive comments are welcome, thanks.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 84a6a0ac 9bc88939
...@@ -152,6 +152,7 @@ struct tun_flow_entry { ...@@ -152,6 +152,7 @@ struct tun_flow_entry {
struct tun_struct *tun; struct tun_struct *tun;
u32 rxhash; u32 rxhash;
u32 rps_rxhash;
int queue_index; int queue_index;
unsigned long updated; unsigned long updated;
}; };
...@@ -220,6 +221,7 @@ static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun, ...@@ -220,6 +221,7 @@ static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun,
rxhash, queue_index); rxhash, queue_index);
e->updated = jiffies; e->updated = jiffies;
e->rxhash = rxhash; e->rxhash = rxhash;
e->rps_rxhash = 0;
e->queue_index = queue_index; e->queue_index = queue_index;
e->tun = tun; e->tun = tun;
hlist_add_head_rcu(&e->hash_link, head); hlist_add_head_rcu(&e->hash_link, head);
...@@ -232,6 +234,7 @@ static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e) ...@@ -232,6 +234,7 @@ static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e)
{ {
tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n", tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n",
e->rxhash, e->queue_index); e->rxhash, e->queue_index);
sock_rps_reset_flow_hash(e->rps_rxhash);
hlist_del_rcu(&e->hash_link); hlist_del_rcu(&e->hash_link);
kfree_rcu(e, rcu); kfree_rcu(e, rcu);
--tun->flow_count; --tun->flow_count;
...@@ -325,6 +328,7 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash, ...@@ -325,6 +328,7 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
/* TODO: keep queueing to old queue until it's empty? */ /* TODO: keep queueing to old queue until it's empty? */
e->queue_index = queue_index; e->queue_index = queue_index;
e->updated = jiffies; e->updated = jiffies;
sock_rps_record_flow_hash(e->rps_rxhash);
} else { } else {
spin_lock_bh(&tun->lock); spin_lock_bh(&tun->lock);
if (!tun_flow_find(head, rxhash) && if (!tun_flow_find(head, rxhash) &&
...@@ -341,6 +345,18 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash, ...@@ -341,6 +345,18 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
rcu_read_unlock(); rcu_read_unlock();
} }
/**
* Save the hash received in the stack receive path and update the
* flow_hash table accordingly.
*/
static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
{
if (unlikely(e->rps_rxhash != hash)) {
sock_rps_reset_flow_hash(e->rps_rxhash);
e->rps_rxhash = hash;
}
}
/* We try to identify a flow through its rxhash first. The reason that /* We try to identify a flow through its rxhash first. The reason that
* we do not check rxq no. is because some cards(e.g 82599), chooses * we do not check rxq no. is because some cards(e.g 82599), chooses
* the rxq based on the txq where the last packet of the flow comes. As * the rxq based on the txq where the last packet of the flow comes. As
...@@ -361,9 +377,10 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb) ...@@ -361,9 +377,10 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb)
txq = skb_get_hash(skb); txq = skb_get_hash(skb);
if (txq) { if (txq) {
e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq); e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
if (e) if (e) {
txq = e->queue_index; txq = e->queue_index;
else tun_flow_save_rps_rxhash(e, txq);
} else
/* use multiply and shift instead of expensive divide */ /* use multiply and shift instead of expensive divide */
txq = ((u64)txq * numqueues) >> 32; txq = ((u64)txq * numqueues) >> 32;
} else if (likely(skb_rx_queue_recorded(skb))) { } else if (likely(skb_rx_queue_recorded(skb))) {
...@@ -728,6 +745,22 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -728,6 +745,22 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
if (txq >= tun->numqueues) if (txq >= tun->numqueues)
goto drop; goto drop;
if (tun->numqueues == 1) {
/* Select queue was not called for the skbuff, so we extract the
* RPS hash and save it into the flow_table here.
*/
__u32 rxhash;
rxhash = skb_get_hash(skb);
if (rxhash) {
struct tun_flow_entry *e;
e = tun_flow_find(&tun->flows[tun_hashfn(rxhash)],
rxhash);
if (e)
tun_flow_save_rps_rxhash(e, rxhash);
}
}
tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len); tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
BUG_ON(!tfile); BUG_ON(!tfile);
......
...@@ -820,30 +820,40 @@ static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) ...@@ -820,30 +820,40 @@ static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
return sk->sk_backlog_rcv(sk, skb); return sk->sk_backlog_rcv(sk, skb);
} }
static inline void sock_rps_record_flow(const struct sock *sk) static inline void sock_rps_record_flow_hash(__u32 hash)
{ {
#ifdef CONFIG_RPS #ifdef CONFIG_RPS
struct rps_sock_flow_table *sock_flow_table; struct rps_sock_flow_table *sock_flow_table;
rcu_read_lock(); rcu_read_lock();
sock_flow_table = rcu_dereference(rps_sock_flow_table); sock_flow_table = rcu_dereference(rps_sock_flow_table);
rps_record_sock_flow(sock_flow_table, sk->sk_rxhash); rps_record_sock_flow(sock_flow_table, hash);
rcu_read_unlock(); rcu_read_unlock();
#endif #endif
} }
static inline void sock_rps_reset_flow(const struct sock *sk) static inline void sock_rps_reset_flow_hash(__u32 hash)
{ {
#ifdef CONFIG_RPS #ifdef CONFIG_RPS
struct rps_sock_flow_table *sock_flow_table; struct rps_sock_flow_table *sock_flow_table;
rcu_read_lock(); rcu_read_lock();
sock_flow_table = rcu_dereference(rps_sock_flow_table); sock_flow_table = rcu_dereference(rps_sock_flow_table);
rps_reset_sock_flow(sock_flow_table, sk->sk_rxhash); rps_reset_sock_flow(sock_flow_table, hash);
rcu_read_unlock(); rcu_read_unlock();
#endif #endif
} }
static inline void sock_rps_record_flow(const struct sock *sk)
{
sock_rps_record_flow_hash(sk->sk_rxhash);
}
static inline void sock_rps_reset_flow(const struct sock *sk)
{
sock_rps_reset_flow_hash(sk->sk_rxhash);
}
static inline void sock_rps_save_rxhash(struct sock *sk, static inline void sock_rps_save_rxhash(struct sock *sk,
const struct sk_buff *skb) const struct sk_buff *skb)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment