Commit 71f9b61c authored by David S. Miller's avatar David S. Miller

Merge branch '40GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue

Jeff Kirsher says:

====================
40GbE Intel Wired LAN Driver Updates 2018-09-25

This series contains updates to i40e and xsk.

Mariusz fixes an issue where the VF link state was not being updated
properly when the PF is down or up.  Also cleaned up the promiscuous
configuration during a VF reset.

Patryk simplifies the code a bit to use the variables for PF and HW that
are declared, rather than using the VSI pointers.  Cleaned up the
message length parameter to several virtchnl functions, since it was not
being used (or needed).

Harshitha fixes two potential race conditions when trying to change VF
settings by creating a helper function to validate that the VF is
enabled and that the VSI is set up.

Sergey corrects a double "link down" message by putting in a check for
whether or not the link is up or going down.

Björn addresses an AF_XDP zero-copy issue that buffers passed
from userspace to the kernel was leaked when the hardware descriptor
ring was torn down.  A zero-copy capable driver picks buffers off the
fill ring and places them on the hardware receive ring to be completed at
a later point when DMA is complete. Similar on the transmit side; The
driver picks buffers off the transmit ring and places them on the
transmit hardware ring.

In the typical flow, the receive buffer will be placed onto an receive
ring (completed to the user), and the transmit buffer will be placed on
the completion ring to notify the user that the transfer is done.

However, if the driver needs to tear down the hardware rings for some
reason (interface goes down, reconfiguration and such), the userspace
buffers cannot be leaked. They have to be reused or completed back to
userspace.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 7a153655 3ab52af5
......@@ -5,6 +5,7 @@
#include "i40e.h"
#include "i40e_diag.h"
#include "i40e_txrx_common.h"
/* ethtool statistics helpers */
......@@ -1710,6 +1711,13 @@ static int i40e_set_ringparam(struct net_device *netdev,
(new_rx_count == vsi->rx_rings[0]->count))
return 0;
/* If there is a AF_XDP UMEM attached to any of Rx rings,
* disallow changing the number of descriptors -- regardless
* if the netdev is running or not.
*/
if (i40e_xsk_any_rx_ring_enabled(vsi))
return -EBUSY;
while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) {
timeout--;
if (!timeout)
......
......@@ -1532,8 +1532,8 @@ static int i40e_set_mac(struct net_device *netdev, void *p)
return 0;
}
if (test_bit(__I40E_VSI_DOWN, vsi->back->state) ||
test_bit(__I40E_RESET_RECOVERY_PENDING, vsi->back->state))
if (test_bit(__I40E_DOWN, pf->state) ||
test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
return -EADDRNOTAVAIL;
if (ether_addr_equal(hw->mac.addr, addr->sa_data))
......@@ -1557,8 +1557,7 @@ static int i40e_set_mac(struct net_device *netdev, void *p)
if (vsi->type == I40E_VSI_MAIN) {
i40e_status ret;
ret = i40e_aq_mac_address_write(&vsi->back->hw,
I40E_AQC_WRITE_TYPE_LAA_WOL,
ret = i40e_aq_mac_address_write(hw, I40E_AQC_WRITE_TYPE_LAA_WOL,
addr->sa_data, NULL);
if (ret)
netdev_info(netdev, "Ignoring error from firmware on LAA update, status %s, AQ ret %s\n",
......@@ -1569,7 +1568,7 @@ static int i40e_set_mac(struct net_device *netdev, void *p)
/* schedule our worker thread which will take care of
* applying the new filter changes
*/
i40e_service_event_schedule(vsi->back);
i40e_service_event_schedule(pf);
return 0;
}
......@@ -6432,7 +6431,10 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
char *req_fec = "";
char *an = "";
if (isup)
new_speed = pf->hw.phy.link_info.link_speed;
else
new_speed = I40E_LINK_SPEED_UNKNOWN;
if ((vsi->current_isup == isup) && (vsi->current_speed == new_speed))
return;
......@@ -8509,14 +8511,9 @@ static void i40e_link_event(struct i40e_pf *pf)
i40e_status status;
bool new_link, old_link;
/* save off old link status information */
pf->hw.phy.link_info_old = pf->hw.phy.link_info;
/* set this to force the get_link_status call to refresh state */
pf->hw.phy.get_link_info = true;
old_link = (pf->hw.phy.link_info_old.link_info & I40E_AQ_LINK_UP);
status = i40e_get_link_status(&pf->hw, &new_link);
/* On success, disable temp link polling */
......
......@@ -636,13 +636,18 @@ void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
unsigned long bi_size;
u16 i;
if (ring_is_xdp(tx_ring) && tx_ring->xsk_umem) {
i40e_xsk_clean_tx_ring(tx_ring);
} else {
/* ring already cleared, nothing to do */
if (!tx_ring->tx_bi)
return;
/* Free all the Tx ring sk_buffs */
for (i = 0; i < tx_ring->count; i++)
i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
i40e_unmap_and_free_tx_resource(tx_ring,
&tx_ring->tx_bi[i]);
}
bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
memset(tx_ring->tx_bi, 0, bi_size);
......@@ -1350,8 +1355,10 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
rx_ring->skb = NULL;
}
if (rx_ring->xsk_umem)
if (rx_ring->xsk_umem) {
i40e_xsk_clean_rx_ring(rx_ring);
goto skip_free;
}
/* Free all the Rx ring sk_buffs */
for (i = 0; i < rx_ring->count; i++) {
......
......@@ -87,4 +87,8 @@ static inline void i40e_arm_wb(struct i40e_ring *tx_ring,
}
}
void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring);
void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring);
bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi);
#endif /* I40E_TXRX_COMMON_ */
......@@ -140,6 +140,7 @@ static void i40e_xsk_umem_dma_unmap(struct i40e_vsi *vsi, struct xdp_umem *umem)
static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
u16 qid)
{
struct xdp_umem_fq_reuse *reuseq;
bool if_running;
int err;
......@@ -156,6 +157,12 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
return -EBUSY;
}
reuseq = xsk_reuseq_prepare(vsi->rx_rings[0]->count);
if (!reuseq)
return -ENOMEM;
xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
err = i40e_xsk_umem_dma_map(vsi, umem);
if (err)
return err;
......@@ -353,16 +360,46 @@ static bool i40e_alloc_buffer_zc(struct i40e_ring *rx_ring,
}
/**
* i40e_alloc_rx_buffers_zc - Allocates a number of Rx buffers
* i40e_alloc_buffer_slow_zc - Allocates an i40e_rx_buffer
* @rx_ring: Rx ring
* @count: The number of buffers to allocate
* @bi: Rx buffer to populate
*
* This function allocates a number of Rx buffers and places them on
* the Rx ring.
* This function allocates an Rx buffer. The buffer can come from fill
* queue, or via the reuse queue.
*
* Returns true for a successful allocation, false otherwise
**/
bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count)
static bool i40e_alloc_buffer_slow_zc(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *bi)
{
struct xdp_umem *umem = rx_ring->xsk_umem;
u64 handle, hr;
if (!xsk_umem_peek_addr_rq(umem, &handle)) {
rx_ring->rx_stats.alloc_page_failed++;
return false;
}
handle &= rx_ring->xsk_umem->chunk_mask;
hr = umem->headroom + XDP_PACKET_HEADROOM;
bi->dma = xdp_umem_get_dma(umem, handle);
bi->dma += hr;
bi->addr = xdp_umem_get_data(umem, handle);
bi->addr += hr;
bi->handle = handle + umem->headroom;
xsk_umem_discard_addr_rq(umem);
return true;
}
static __always_inline bool
__i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count,
bool alloc(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *bi))
{
u16 ntu = rx_ring->next_to_use;
union i40e_rx_desc *rx_desc;
......@@ -372,7 +409,7 @@ bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count)
rx_desc = I40E_RX_DESC(rx_ring, ntu);
bi = &rx_ring->rx_bi[ntu];
do {
if (!i40e_alloc_buffer_zc(rx_ring, bi)) {
if (!alloc(rx_ring, bi)) {
ok = false;
goto no_buffers;
}
......@@ -404,6 +441,38 @@ bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count)
return ok;
}
/**
* i40e_alloc_rx_buffers_zc - Allocates a number of Rx buffers
* @rx_ring: Rx ring
* @count: The number of buffers to allocate
*
* This function allocates a number of Rx buffers from the reuse queue
* or fill ring and places them on the Rx ring.
*
* Returns true for a successful allocation, false otherwise
**/
bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count)
{
return __i40e_alloc_rx_buffers_zc(rx_ring, count,
i40e_alloc_buffer_slow_zc);
}
/**
* i40e_alloc_rx_buffers_fast_zc - Allocates a number of Rx buffers
* @rx_ring: Rx ring
* @count: The number of buffers to allocate
*
* This function allocates a number of Rx buffers from the fill ring
* or the internal recycle mechanism and places them on the Rx ring.
*
* Returns true for a successful allocation, false otherwise
**/
static bool i40e_alloc_rx_buffers_fast_zc(struct i40e_ring *rx_ring, u16 count)
{
return __i40e_alloc_rx_buffers_zc(rx_ring, count,
i40e_alloc_buffer_zc);
}
/**
* i40e_get_rx_buffer_zc - Return the current Rx buffer
* @rx_ring: Rx ring
......@@ -571,7 +640,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
failure = failure ||
!i40e_alloc_rx_buffers_zc(rx_ring,
!i40e_alloc_rx_buffers_fast_zc(rx_ring,
cleaned_count);
cleaned_count = 0;
}
......@@ -830,3 +899,69 @@ int i40e_xsk_async_xmit(struct net_device *dev, u32 queue_id)
return 0;
}
void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring)
{
u16 i;
for (i = 0; i < rx_ring->count; i++) {
struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i];
if (!rx_bi->addr)
continue;
xsk_umem_fq_reuse(rx_ring->xsk_umem, rx_bi->handle);
rx_bi->addr = NULL;
}
}
/**
* i40e_xsk_clean_xdp_ring - Clean the XDP Tx ring on shutdown
* @xdp_ring: XDP Tx ring
**/
void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring)
{
u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use;
struct xdp_umem *umem = tx_ring->xsk_umem;
struct i40e_tx_buffer *tx_bi;
u32 xsk_frames = 0;
while (ntc != ntu) {
tx_bi = &tx_ring->tx_bi[ntc];
if (tx_bi->xdpf)
i40e_clean_xdp_tx_buffer(tx_ring, tx_bi);
else
xsk_frames++;
tx_bi->xdpf = NULL;
ntc++;
if (ntc >= tx_ring->count)
ntc = 0;
}
if (xsk_frames)
xsk_umem_complete_tx(umem, xsk_frames);
}
/**
* i40e_xsk_any_rx_ring_enabled - Checks if Rx rings have AF_XDP UMEM attached
* @vsi: vsi
*
* Returns true if any of the Rx rings has an AF_XDP UMEM attached
**/
bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi)
{
int i;
if (!vsi->xsk_umems)
return false;
for (i = 0; i < vsi->num_queue_pairs; i++) {
if (vsi->xsk_umems[i])
return true;
}
return false;
}
......@@ -21,6 +21,12 @@ struct xdp_umem_page {
dma_addr_t dma;
};
struct xdp_umem_fq_reuse {
u32 nentries;
u32 length;
u64 handles[];
};
struct xdp_umem {
struct xsk_queue *fq;
struct xsk_queue *cq;
......@@ -37,6 +43,7 @@ struct xdp_umem {
struct page **pgs;
u32 npgs;
struct net_device *dev;
struct xdp_umem_fq_reuse *fq_reuse;
u16 queue_id;
bool zc;
spinlock_t xsk_list_lock;
......@@ -75,6 +82,10 @@ void xsk_umem_discard_addr(struct xdp_umem *umem);
void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries);
bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len);
void xsk_umem_consume_tx_done(struct xdp_umem *umem);
struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries);
struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
struct xdp_umem_fq_reuse *newq);
void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq);
static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
{
......@@ -85,6 +96,35 @@ static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
{
return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1));
}
/* Reuse-queue aware version of FILL queue helpers */
static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
{
struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
if (!rq->length)
return xsk_umem_peek_addr(umem, addr);
*addr = rq->handles[rq->length - 1];
return addr;
}
static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem)
{
struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
if (!rq->length)
xsk_umem_discard_addr(umem);
else
rq->length--;
}
static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
{
struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
rq->handles[rq->length++] = addr;
}
#else
static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
......@@ -128,6 +168,21 @@ static inline void xsk_umem_consume_tx_done(struct xdp_umem *umem)
{
}
static inline struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries)
{
return NULL;
}
static inline struct xdp_umem_fq_reuse *xsk_reuseq_swap(
struct xdp_umem *umem,
struct xdp_umem_fq_reuse *newq)
{
return NULL;
}
static inline void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq)
{
}
static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
{
return NULL;
......@@ -137,6 +192,20 @@ static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
{
return 0;
}
static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
{
return NULL;
}
static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem)
{
}
static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
{
}
#endif /* CONFIG_XDP_SOCKETS */
#endif /* _LINUX_XDP_SOCK_H */
......@@ -165,6 +165,8 @@ static void xdp_umem_release(struct xdp_umem *umem)
umem->cq = NULL;
}
xsk_reuseq_destroy(umem);
xdp_umem_unpin_pages(umem);
task = get_pid_task(umem->pid, PIDTYPE_PID);
......
......@@ -3,7 +3,9 @@
* Copyright(c) 2018 Intel Corporation.
*/
#include <linux/log2.h>
#include <linux/slab.h>
#include <linux/overflow.h>
#include "xsk_queue.h"
......@@ -62,3 +64,56 @@ void xskq_destroy(struct xsk_queue *q)
page_frag_free(q->ring);
kfree(q);
}
struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries)
{
struct xdp_umem_fq_reuse *newq;
/* Check for overflow */
if (nentries > (u32)roundup_pow_of_two(nentries))
return NULL;
nentries = roundup_pow_of_two(nentries);
newq = kvmalloc(struct_size(newq, handles, nentries), GFP_KERNEL);
if (!newq)
return NULL;
memset(newq, 0, offsetof(typeof(*newq), handles));
newq->nentries = nentries;
return newq;
}
EXPORT_SYMBOL_GPL(xsk_reuseq_prepare);
struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
struct xdp_umem_fq_reuse *newq)
{
struct xdp_umem_fq_reuse *oldq = umem->fq_reuse;
if (!oldq) {
umem->fq_reuse = newq;
return NULL;
}
if (newq->nentries < oldq->length)
return newq;
memcpy(newq->handles, oldq->handles,
array_size(oldq->length, sizeof(u64)));
newq->length = oldq->length;
umem->fq_reuse = newq;
return oldq;
}
EXPORT_SYMBOL_GPL(xsk_reuseq_swap);
void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq)
{
kvfree(rq);
}
EXPORT_SYMBOL_GPL(xsk_reuseq_free);
void xsk_reuseq_destroy(struct xdp_umem *umem)
{
xsk_reuseq_free(umem->fq_reuse);
umem->fq_reuse = NULL;
}
......@@ -258,4 +258,7 @@ void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask);
struct xsk_queue *xskq_create(u32 nentries, bool umem_queue);
void xskq_destroy(struct xsk_queue *q_ops);
/* Executed by the core when the entire UMEM gets freed */
void xsk_reuseq_destroy(struct xdp_umem *umem);
#endif /* _LINUX_XSK_QUEUE_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment