Commit 0492b71c authored by David S. Miller's avatar David S. Miller

Merge branch '40GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue

Jeff Kirsher says:

====================
40GbE Intel Wired LAN Driver Updates 2017-04-08

This series contains updates to i40e and i40evf only.

Mitch fixes an issue where the client driver (i40iw) was attempting to
load on x710 devices (which do not support iWARP), so only register with
the client if iWARP is supported.

Jake fixes up error messages to better clarify to the user when adding a
invalid flow type.  Updates the driver to look up the MAC address from
eth_get_platform_mac_address() first before checking what the firmware
provides.  Cleans up code so we are not repeating a duplicate loop, by
checking both transmit and receive queues in a single loop.  Also cleans
up flags never used, so remove the definitions.

Alex does cleanup so that we are always updating pf->flags when a change
is made to the private flags.  Adds support for 3K buffers to the receive
path so that we can provide the additional padding needed in the event
of NET_IP_ALIGN being non-zero or a cache line being greater than 64.
Adds support for build_skb() to i40e/i40evf.

Maciej adjusts the scope of the rtnl lock held during reset because it
was stopping other PFs from running their reset procedures.

Alan reduces code complexity in i40e_detect_recover_hung_queue().
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 417d978f f8b45b74
......@@ -389,13 +389,9 @@ struct i40e_pf {
#define I40E_FLAG_MSIX_ENABLED BIT_ULL(3)
#define I40E_FLAG_RSS_ENABLED BIT_ULL(6)
#define I40E_FLAG_VMDQ_ENABLED BIT_ULL(7)
#define I40E_FLAG_NEED_LINK_UPDATE BIT_ULL(9)
#define I40E_FLAG_IWARP_ENABLED BIT_ULL(10)
#define I40E_FLAG_CLEAN_ADMINQ BIT_ULL(14)
#define I40E_FLAG_FILTER_SYNC BIT_ULL(15)
#define I40E_FLAG_SERVICE_CLIENT_REQUESTED BIT_ULL(16)
#define I40E_FLAG_PROCESS_MDD_EVENT BIT_ULL(17)
#define I40E_FLAG_PROCESS_VFLR_EVENT BIT_ULL(18)
#define I40E_FLAG_SRIOV_ENABLED BIT_ULL(19)
#define I40E_FLAG_DCB_ENABLED BIT_ULL(20)
#define I40E_FLAG_FD_SB_ENABLED BIT_ULL(21)
......@@ -617,7 +613,6 @@ struct i40e_vsi {
u32 tx_busy;
u64 tx_linearize;
u64 tx_force_wb;
u64 tx_lost_interrupt;
u32 rx_buf_failed;
u32 rx_page_failed;
......@@ -703,9 +698,6 @@ struct i40e_q_vector {
u8 num_ringpairs; /* total number of ring pairs in vector */
#define I40E_Q_VECTOR_HUNG_DETECT 0 /* Bit Index for hung detection logic */
unsigned long hung_detected; /* Set/Reset for hung_detection logic */
cpumask_t affinity_mask;
struct irq_affinity_notify affinity_notify;
......@@ -837,7 +829,7 @@ void i40e_down(struct i40e_vsi *vsi);
extern const char i40e_driver_name[];
extern const char i40e_driver_version_str[];
void i40e_do_reset_safe(struct i40e_pf *pf, u32 reset_flags);
void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags);
void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired);
int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut,
......
......@@ -89,7 +89,6 @@ static const struct i40e_stats i40e_gstrings_misc_stats[] = {
I40E_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol),
I40E_VSI_STAT("tx_linearize", tx_linearize),
I40E_VSI_STAT("tx_force_wb", tx_force_wb),
I40E_VSI_STAT("tx_lost_interrupt", tx_lost_interrupt),
I40E_VSI_STAT("rx_alloc_fail", rx_buf_failed),
I40E_VSI_STAT("rx_pg_alloc_fail", rx_page_failed),
};
......@@ -1852,7 +1851,7 @@ static void i40e_diag_test(struct net_device *netdev,
* link then the following link test would have
* to be moved to before the reset
*/
i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED));
i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true);
if (i40e_link_test(netdev, &data[I40E_ETH_TEST_LINK]))
eth_test->flags |= ETH_TEST_FL_FAILED;
......@@ -1868,7 +1867,7 @@ static void i40e_diag_test(struct net_device *netdev,
eth_test->flags |= ETH_TEST_FL_FAILED;
clear_bit(__I40E_TESTING, &pf->state);
i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED));
i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true);
if (if_running)
i40e_open(netdev);
......@@ -4099,7 +4098,7 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
*/
if ((changed_flags & I40E_FLAG_VEB_STATS_ENABLED) ||
((changed_flags & I40E_FLAG_LEGACY_RX) && netif_running(dev)))
i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED));
i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true);
return 0;
}
......
This diff is collapsed.
......@@ -533,14 +533,15 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi,
break;
default:
/* We cannot support masking based on protocol */
goto unsupported_flow;
dev_info(&pf->pdev->dev, "Unsupported IPv4 protocol 0x%02x\n",
input->ip4_proto);
return -EINVAL;
}
break;
default:
unsupported_flow:
dev_info(&pf->pdev->dev, "Could not specify spec type %d\n",
dev_info(&pf->pdev->dev, "Unsupported flow type 0x%02x\n",
input->flow_type);
ret = -EINVAL;
return -EINVAL;
}
/* The buffer allocated here will be normally be freed by
......@@ -710,19 +711,15 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
/**
* i40e_get_tx_pending - how many tx descriptors not processed
* @tx_ring: the ring of descriptors
* @in_sw: is tx_pending being checked in SW or HW
*
* Since there is no access to the ring head register
* in XL710, we need to use our local copies
**/
u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw)
u32 i40e_get_tx_pending(struct i40e_ring *ring)
{
u32 head, tail;
if (!in_sw)
head = i40e_get_head(ring);
else
head = ring->next_to_clean;
head = i40e_get_head(ring);
tail = readl(ring->tail);
if (head != tail)
......@@ -845,7 +842,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
* them to be written back in case we stay in NAPI.
* In this mode on X722 we do not enable Interrupt.
*/
unsigned int j = i40e_get_tx_pending(tx_ring, false);
unsigned int j = i40e_get_tx_pending(tx_ring);
if (budget &&
((j / WB_STRIDE) == 0) && (j > 0) &&
......@@ -1141,14 +1138,15 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
dma_sync_single_range_for_cpu(rx_ring->dev,
rx_bi->dma,
rx_bi->page_offset,
I40E_RXBUFFER_2048,
rx_ring->rx_buf_len,
DMA_FROM_DEVICE);
/* free resources associated with mapping */
dma_unmap_page_attrs(rx_ring->dev, rx_bi->dma,
PAGE_SIZE,
i40e_rx_pg_size(rx_ring),
DMA_FROM_DEVICE,
I40E_RX_DMA_ATTR);
__page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias);
rx_bi->page = NULL;
......@@ -1249,6 +1247,17 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
writel(val, rx_ring->tail);
}
/**
* i40e_rx_offset - Return expected offset into page to access data
* @rx_ring: Ring we are requesting offset of
*
* Returns the offset value for ring into the data buffer.
*/
static inline unsigned int i40e_rx_offset(struct i40e_ring *rx_ring)
{
return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0;
}
/**
* i40e_alloc_mapped_page - recycle or make a new page
* @rx_ring: ring to use
......@@ -1270,7 +1279,7 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
}
/* alloc new page for storage */
page = dev_alloc_page();
page = dev_alloc_pages(i40e_rx_pg_order(rx_ring));
if (unlikely(!page)) {
rx_ring->rx_stats.alloc_page_failed++;
return false;
......@@ -1278,7 +1287,7 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
/* map page for use */
dma = dma_map_page_attrs(rx_ring->dev, page, 0,
PAGE_SIZE,
i40e_rx_pg_size(rx_ring),
DMA_FROM_DEVICE,
I40E_RX_DMA_ATTR);
......@@ -1286,14 +1295,14 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
* there isn't much point in holding memory we can't use
*/
if (dma_mapping_error(rx_ring->dev, dma)) {
__free_pages(page, 0);
__free_pages(page, i40e_rx_pg_order(rx_ring));
rx_ring->rx_stats.alloc_page_failed++;
return false;
}
bi->dma = dma;
bi->page = page;
bi->page_offset = 0;
bi->page_offset = i40e_rx_offset(rx_ring);
/* initialize pagecnt_bias to 1 representing we fully own page */
bi->pagecnt_bias = 1;
......@@ -1346,7 +1355,7 @@ bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
/* sync the buffer for use by the device */
dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
bi->page_offset,
I40E_RXBUFFER_2048,
rx_ring->rx_buf_len,
DMA_FROM_DEVICE);
/* Refresh the desc even if buffer_addrs didn't change
......@@ -1648,9 +1657,6 @@ static inline bool i40e_page_is_reusable(struct page *page)
**/
static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer)
{
#if (PAGE_SIZE >= 8192)
unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048;
#endif
unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
struct page *page = rx_buffer->page;
......@@ -1663,7 +1669,9 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer)
if (unlikely((page_count(page) - pagecnt_bias) > 1))
return false;
#else
if (rx_buffer->page_offset > last_offset)
#define I40E_LAST_OFFSET \
(SKB_WITH_OVERHEAD(PAGE_SIZE) - I40E_RXBUFFER_2048)
if (rx_buffer->page_offset > I40E_LAST_OFFSET)
return false;
#endif
......@@ -1697,9 +1705,9 @@ static void i40e_add_rx_frag(struct i40e_ring *rx_ring,
unsigned int size)
{
#if (PAGE_SIZE < 8192)
unsigned int truesize = I40E_RXBUFFER_2048;
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else
unsigned int truesize = SKB_DATA_ALIGN(size);
unsigned int truesize = SKB_DATA_ALIGN(size + i40e_rx_offset(rx_ring));
#endif
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
......@@ -1758,7 +1766,7 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
{
void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
#if (PAGE_SIZE < 8192)
unsigned int truesize = I40E_RXBUFFER_2048;
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else
unsigned int truesize = SKB_DATA_ALIGN(size);
#endif
......@@ -1807,6 +1815,51 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
return skb;
}
/**
* i40e_build_skb - Build skb around an existing buffer
* @rx_ring: Rx descriptor ring to transact packets on
* @rx_buffer: Rx buffer to pull data from
* @size: size of buffer to add to skb
*
* This function builds an skb around an existing Rx buffer, taking care
* to set up the skb correctly and avoid any memcpy overhead.
*/
static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer,
unsigned int size)
{
void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
#if (PAGE_SIZE < 8192)
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else
unsigned int truesize = SKB_DATA_ALIGN(size);
#endif
struct sk_buff *skb;
/* prefetch first cache line of first page */
prefetch(va);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
#endif
/* build an skb around the page buffer */
skb = build_skb(va - I40E_SKB_PAD, truesize);
if (unlikely(!skb))
return NULL;
/* update pointers within the skb to store the data */
skb_reserve(skb, I40E_SKB_PAD);
__skb_put(skb, size);
/* buffer is used by skb, update page_offset */
#if (PAGE_SIZE < 8192)
rx_buffer->page_offset ^= truesize;
#else
rx_buffer->page_offset += truesize;
#endif
return skb;
}
/**
* i40e_put_rx_buffer - Clean up used buffer and either recycle or free
* @rx_ring: rx descriptor ring to transact packets on
......@@ -1824,7 +1877,8 @@ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
rx_ring->rx_stats.page_reuse_count++;
} else {
/* we are not reusing the buffer so unmap it */
dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, PAGE_SIZE,
dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
i40e_rx_pg_size(rx_ring),
DMA_FROM_DEVICE, I40E_RX_DMA_ATTR);
__page_frag_cache_drain(rx_buffer->page,
rx_buffer->pagecnt_bias);
......@@ -1930,6 +1984,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
/* retrieve a buffer from the ring */
if (skb)
i40e_add_rx_frag(rx_ring, rx_buffer, skb, size);
else if (ring_uses_build_skb(rx_ring))
skb = i40e_build_skb(rx_ring, rx_buffer, size);
else
skb = i40e_construct_skb(rx_ring, rx_buffer, size);
......@@ -2125,8 +2181,6 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
return 0;
}
/* Clear hung_detected bit */
clear_bit(I40E_Q_VECTOR_HUNG_DETECT, &q_vector->hung_detected);
/* Since the actual Tx work is minimal, we can give the Tx a larger
* budget and be more aggressive about cleaning up the Tx descriptors.
*/
......@@ -2262,8 +2316,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
/* Due to lack of space, no more new filters can be programmed */
if (th->syn && (pf->hw_disabled_flags & I40E_FLAG_FD_ATR_ENABLED))
return;
if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) &&
(!(pf->hw_disabled_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) {
if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) {
/* HW ATR eviction will take care of removing filters on FIN
* and RST packets.
*/
......@@ -2325,8 +2378,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) &&
(!(pf->hw_disabled_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)))
if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK;
fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
......
......@@ -119,6 +119,7 @@ enum i40e_dyn_idx_t {
#define I40E_RXBUFFER_256 256
#define I40E_RXBUFFER_1536 1536 /* 128B aligned standard Ethernet frame */
#define I40E_RXBUFFER_2048 2048
#define I40E_RXBUFFER_3072 3072 /* Used for large frames w/ padding */
#define I40E_MAX_RXBUFFER 9728 /* largest size for single descriptor */
/* NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we
......@@ -134,6 +135,58 @@ enum i40e_dyn_idx_t {
#define I40E_RX_DMA_ATTR \
(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
/* Attempt to maximize the headroom available for incoming frames. We
* use a 2K buffer for receives and need 1536/1534 to store the data for
* the frame. This leaves us with 512 bytes of room. From that we need
* to deduct the space needed for the shared info and the padding needed
* to IP align the frame.
*
* Note: For cache line sizes 256 or larger this value is going to end
* up negative. In these cases we should fall back to the legacy
* receive path.
*/
#if (PAGE_SIZE < 8192)
#define I40E_2K_TOO_SMALL_WITH_PADDING \
((NET_SKB_PAD + I40E_RXBUFFER_1536) > SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048))
static inline int i40e_compute_pad(int rx_buf_len)
{
int page_size, pad_size;
page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len;
return pad_size;
}
static inline int i40e_skb_pad(void)
{
int rx_buf_len;
/* If a 2K buffer cannot handle a standard Ethernet frame then
* optimize padding for a 3K buffer instead of a 1.5K buffer.
*
* For a 3K buffer we need to add enough padding to allow for
* tailroom due to NET_IP_ALIGN possibly shifting us out of
* cache-line alignment.
*/
if (I40E_2K_TOO_SMALL_WITH_PADDING)
rx_buf_len = I40E_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
else
rx_buf_len = I40E_RXBUFFER_1536;
/* if needed make room for NET_IP_ALIGN */
rx_buf_len -= NET_IP_ALIGN;
return i40e_compute_pad(rx_buf_len);
}
#define I40E_SKB_PAD i40e_skb_pad()
#else
#define I40E_2K_TOO_SMALL_WITH_PADDING false
#define I40E_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
#endif
/**
* i40e_test_staterr - tests bits in Rx descriptor status and error fields
* @rx_desc: pointer to receive descriptor (in le64 format)
......@@ -275,7 +328,6 @@ struct i40e_tx_queue_stats {
u64 tx_done_old;
u64 tx_linearize;
u64 tx_force_wb;
u64 tx_lost_interrupt;
};
struct i40e_rx_queue_stats {
......@@ -341,7 +393,8 @@ struct i40e_ring {
u8 packet_stride;
u16 flags;
#define I40E_TXR_FLAGS_WB_ON_ITR BIT(0)
#define I40E_TXR_FLAGS_WB_ON_ITR BIT(0)
#define I40E_RXR_FLAGS_BUILD_SKB_ENABLED BIT(1)
/* stats structs */
struct i40e_queue_stats stats;
......@@ -369,6 +422,21 @@ struct i40e_ring {
*/
} ____cacheline_internodealigned_in_smp;
static inline bool ring_uses_build_skb(struct i40e_ring *ring)
{
return !!(ring->flags & I40E_RXR_FLAGS_BUILD_SKB_ENABLED);
}
static inline void set_ring_build_skb_enabled(struct i40e_ring *ring)
{
ring->flags |= I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
}
static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring)
{
ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
}
enum i40e_latency_range {
I40E_LOWEST_LATENCY = 0,
I40E_LOW_LATENCY = 1,
......@@ -390,6 +458,17 @@ struct i40e_ring_container {
#define i40e_for_each_ring(pos, head) \
for (pos = (head).ring; pos != NULL; pos = pos->next)
static inline unsigned int i40e_rx_pg_order(struct i40e_ring *ring)
{
#if (PAGE_SIZE < 8192)
if (ring->rx_buf_len > (PAGE_SIZE / 2))
return 1;
#endif
return 0;
}
#define i40e_rx_pg_size(_ring) (PAGE_SIZE << i40e_rx_pg_order(_ring))
bool i40e_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count);
netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
void i40e_clean_tx_ring(struct i40e_ring *tx_ring);
......@@ -400,7 +479,7 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring);
void i40e_free_rx_resources(struct i40e_ring *rx_ring);
int i40e_napi_poll(struct napi_struct *napi, int budget);
void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector);
u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
u32 i40e_get_tx_pending(struct i40e_ring *ring);
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb);
......
......@@ -509,14 +509,15 @@ void i40evf_clean_rx_ring(struct i40e_ring *rx_ring)
dma_sync_single_range_for_cpu(rx_ring->dev,
rx_bi->dma,
rx_bi->page_offset,
I40E_RXBUFFER_2048,
rx_ring->rx_buf_len,
DMA_FROM_DEVICE);
/* free resources associated with mapping */
dma_unmap_page_attrs(rx_ring->dev, rx_bi->dma,
PAGE_SIZE,
i40e_rx_pg_size(rx_ring),
DMA_FROM_DEVICE,
I40E_RX_DMA_ATTR);
__page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias);
rx_bi->page = NULL;
......@@ -617,6 +618,17 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
writel(val, rx_ring->tail);
}
/**
* i40e_rx_offset - Return expected offset into page to access data
* @rx_ring: Ring we are requesting offset of
*
* Returns the offset value for ring into the data buffer.
*/
static inline unsigned int i40e_rx_offset(struct i40e_ring *rx_ring)
{
return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0;
}
/**
* i40e_alloc_mapped_page - recycle or make a new page
* @rx_ring: ring to use
......@@ -638,7 +650,7 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
}
/* alloc new page for storage */
page = dev_alloc_page();
page = dev_alloc_pages(i40e_rx_pg_order(rx_ring));
if (unlikely(!page)) {
rx_ring->rx_stats.alloc_page_failed++;
return false;
......@@ -646,7 +658,7 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
/* map page for use */
dma = dma_map_page_attrs(rx_ring->dev, page, 0,
PAGE_SIZE,
i40e_rx_pg_size(rx_ring),
DMA_FROM_DEVICE,
I40E_RX_DMA_ATTR);
......@@ -654,14 +666,14 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
* there isn't much point in holding memory we can't use
*/
if (dma_mapping_error(rx_ring->dev, dma)) {
__free_pages(page, 0);
__free_pages(page, i40e_rx_pg_order(rx_ring));
rx_ring->rx_stats.alloc_page_failed++;
return false;
}
bi->dma = dma;
bi->page = page;
bi->page_offset = 0;
bi->page_offset = i40e_rx_offset(rx_ring);
/* initialize pagecnt_bias to 1 representing we fully own page */
bi->pagecnt_bias = 1;
......@@ -714,7 +726,7 @@ bool i40evf_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
/* sync the buffer for use by the device */
dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
bi->page_offset,
I40E_RXBUFFER_2048,
rx_ring->rx_buf_len,
DMA_FROM_DEVICE);
/* Refresh the desc even if buffer_addrs didn't change
......@@ -1006,9 +1018,6 @@ static inline bool i40e_page_is_reusable(struct page *page)
**/
static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer)
{
#if (PAGE_SIZE >= 8192)
unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048;
#endif
unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
struct page *page = rx_buffer->page;
......@@ -1021,7 +1030,9 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer)
if (unlikely((page_count(page) - pagecnt_bias) > 1))
return false;
#else
if (rx_buffer->page_offset > last_offset)
#define I40E_LAST_OFFSET \
(SKB_WITH_OVERHEAD(PAGE_SIZE) - I40E_RXBUFFER_2048)
if (rx_buffer->page_offset > I40E_LAST_OFFSET)
return false;
#endif
......@@ -1055,9 +1066,9 @@ static void i40e_add_rx_frag(struct i40e_ring *rx_ring,
unsigned int size)
{
#if (PAGE_SIZE < 8192)
unsigned int truesize = I40E_RXBUFFER_2048;
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else
unsigned int truesize = SKB_DATA_ALIGN(size);
unsigned int truesize = SKB_DATA_ALIGN(size + i40e_rx_offset(rx_ring));
#endif
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
......@@ -1116,7 +1127,7 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
{
void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
#if (PAGE_SIZE < 8192)
unsigned int truesize = I40E_RXBUFFER_2048;
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else
unsigned int truesize = SKB_DATA_ALIGN(size);
#endif
......@@ -1165,6 +1176,51 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
return skb;
}
/**
* i40e_build_skb - Build skb around an existing buffer
* @rx_ring: Rx descriptor ring to transact packets on
* @rx_buffer: Rx buffer to pull data from
* @size: size of buffer to add to skb
*
* This function builds an skb around an existing Rx buffer, taking care
* to set up the skb correctly and avoid any memcpy overhead.
*/
static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer,
unsigned int size)
{
void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
#if (PAGE_SIZE < 8192)
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else
unsigned int truesize = SKB_DATA_ALIGN(size);
#endif
struct sk_buff *skb;
/* prefetch first cache line of first page */
prefetch(va);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
#endif
/* build an skb around the page buffer */
skb = build_skb(va - I40E_SKB_PAD, truesize);
if (unlikely(!skb))
return NULL;
/* update pointers within the skb to store the data */
skb_reserve(skb, I40E_SKB_PAD);
__skb_put(skb, size);
/* buffer is used by skb, update page_offset */
#if (PAGE_SIZE < 8192)
rx_buffer->page_offset ^= truesize;
#else
rx_buffer->page_offset += truesize;
#endif
return skb;
}
/**
* i40e_put_rx_buffer - Clean up used buffer and either recycle or free
* @rx_ring: rx descriptor ring to transact packets on
......@@ -1182,7 +1238,8 @@ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
rx_ring->rx_stats.page_reuse_count++;
} else {
/* we are not reusing the buffer so unmap it */
dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, PAGE_SIZE,
dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
i40e_rx_pg_size(rx_ring),
DMA_FROM_DEVICE, I40E_RX_DMA_ATTR);
__page_frag_cache_drain(rx_buffer->page,
rx_buffer->pagecnt_bias);
......@@ -1283,6 +1340,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
/* retrieve a buffer from the ring */
if (skb)
i40e_add_rx_frag(rx_ring, rx_buffer, skb, size);
else if (ring_uses_build_skb(rx_ring))
skb = i40e_build_skb(rx_ring, rx_buffer, size);
else
skb = i40e_construct_skb(rx_ring, rx_buffer, size);
......
......@@ -106,6 +106,7 @@ enum i40e_dyn_idx_t {
#define I40E_RXBUFFER_256 256
#define I40E_RXBUFFER_1536 1536 /* 128B aligned standard Ethernet frame */
#define I40E_RXBUFFER_2048 2048
#define I40E_RXBUFFER_3072 3072 /* Used for large frames w/ padding */
#define I40E_MAX_RXBUFFER 9728 /* largest size for single descriptor */
/* NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we
......@@ -121,6 +122,58 @@ enum i40e_dyn_idx_t {
#define I40E_RX_DMA_ATTR \
(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
/* Attempt to maximize the headroom available for incoming frames. We
* use a 2K buffer for receives and need 1536/1534 to store the data for
* the frame. This leaves us with 512 bytes of room. From that we need
* to deduct the space needed for the shared info and the padding needed
* to IP align the frame.
*
* Note: For cache line sizes 256 or larger this value is going to end
* up negative. In these cases we should fall back to the legacy
* receive path.
*/
#if (PAGE_SIZE < 8192)
#define I40E_2K_TOO_SMALL_WITH_PADDING \
((NET_SKB_PAD + I40E_RXBUFFER_1536) > SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048))
static inline int i40e_compute_pad(int rx_buf_len)
{
int page_size, pad_size;
page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len;
return pad_size;
}
static inline int i40e_skb_pad(void)
{
int rx_buf_len;
/* If a 2K buffer cannot handle a standard Ethernet frame then
* optimize padding for a 3K buffer instead of a 1.5K buffer.
*
* For a 3K buffer we need to add enough padding to allow for
* tailroom due to NET_IP_ALIGN possibly shifting us out of
* cache-line alignment.
*/
if (I40E_2K_TOO_SMALL_WITH_PADDING)
rx_buf_len = I40E_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
else
rx_buf_len = I40E_RXBUFFER_1536;
/* if needed make room for NET_IP_ALIGN */
rx_buf_len -= NET_IP_ALIGN;
return i40e_compute_pad(rx_buf_len);
}
#define I40E_SKB_PAD i40e_skb_pad()
#else
#define I40E_2K_TOO_SMALL_WITH_PADDING false
#define I40E_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
#endif
/**
* i40e_test_staterr - tests bits in Rx descriptor status and error fields
* @rx_desc: pointer to receive descriptor (in le64 format)
......@@ -327,7 +380,8 @@ struct i40e_ring {
u8 packet_stride;
u16 flags;
#define I40E_TXR_FLAGS_WB_ON_ITR BIT(0)
#define I40E_TXR_FLAGS_WB_ON_ITR BIT(0)
#define I40E_RXR_FLAGS_BUILD_SKB_ENABLED BIT(1)
/* stats structs */
struct i40e_queue_stats stats;
......@@ -355,6 +409,21 @@ struct i40e_ring {
*/
} ____cacheline_internodealigned_in_smp;
static inline bool ring_uses_build_skb(struct i40e_ring *ring)
{
return !!(ring->flags & I40E_RXR_FLAGS_BUILD_SKB_ENABLED);
}
static inline void set_ring_build_skb_enabled(struct i40e_ring *ring)
{
ring->flags |= I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
}
static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring)
{
ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
}
enum i40e_latency_range {
I40E_LOWEST_LATENCY = 0,
I40E_LOW_LATENCY = 1,
......@@ -376,6 +445,17 @@ struct i40e_ring_container {
#define i40e_for_each_ring(pos, head) \
for (pos = (head).ring; pos != NULL; pos = pos->next)
static inline unsigned int i40e_rx_pg_order(struct i40e_ring *ring)
{
#if (PAGE_SIZE < 8192)
if (ring->rx_buf_len > (PAGE_SIZE / 2))
return 1;
#endif
return 0;
}
#define i40e_rx_pg_size(_ring) (PAGE_SIZE << i40e_rx_pg_order(_ring))
bool i40evf_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count);
netdev_tx_t i40evf_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
void i40evf_clean_tx_ring(struct i40e_ring *tx_ring);
......
......@@ -205,7 +205,6 @@ struct i40evf_adapter {
#define I40EVF_FLAG_IN_NETPOLL BIT(4)
#define I40EVF_FLAG_IMIR_ENABLED BIT(5)
#define I40EVF_FLAG_MQ_CAPABLE BIT(6)
#define I40EVF_FLAG_NEED_LINK_UPDATE BIT(7)
#define I40EVF_FLAG_PF_COMMS_FAILED BIT(8)
#define I40EVF_FLAG_RESET_PENDING BIT(9)
#define I40EVF_FLAG_RESET_NEEDED BIT(10)
......
......@@ -694,11 +694,18 @@ static void i40evf_configure_rx(struct i40evf_adapter *adapter)
/* Legacy Rx will always default to a 2048 buffer size. */
#if (PAGE_SIZE < 8192)
if (!(adapter->flags & I40EVF_FLAG_LEGACY_RX)) {
/* For jumbo frames on systems with 4K pages we have to use
* an order 1 page, so we might as well increase the size
* of our Rx buffer to make better use of the available space
*/
rx_buf_len = I40E_RXBUFFER_3072;
/* We use a 1536 buffer size for configurations with
* standard Ethernet mtu. On x86 this gives us enough room
* for shared info and 192 bytes of padding.
*/
if (netdev->mtu <= ETH_DATA_LEN)
if (!I40E_2K_TOO_SMALL_WITH_PADDING &&
(netdev->mtu <= ETH_DATA_LEN))
rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
}
#endif
......@@ -706,6 +713,11 @@ static void i40evf_configure_rx(struct i40evf_adapter *adapter)
for (i = 0; i < adapter->num_active_queues; i++) {
adapter->rx_rings[i].tail = hw->hw_addr + I40E_QRX_TAIL1(i);
adapter->rx_rings[i].rx_buf_len = rx_buf_len;
if (adapter->flags & I40EVF_FLAG_LEGACY_RX)
clear_ring_build_skb_enabled(&adapter->rx_rings[i]);
else
set_ring_build_skb_enabled(&adapter->rx_rings[i]);
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment