Commit e3b205b1 authored by David S. Miller's avatar David S. Miller

Merge branch 'sfc-Add-XDP-support'

Charles McLachlan says:

====================
sfc: Add XDP support

Supply the XDP callbacks in netdevice ops that enable lower level processing
of XDP frames.

Changes in v4:
- Handle the failure to send some frames in efx_xdp_tx_buffers() properly.

Changes in v3:
- Fix a BUG_ON when trying to allocate piobufs to xdp queues.
- Add a missed trace_xdp_exception.

Changes in v2:
- Use of xdp_return_frame_rx_napi() in tx.c
- Addition of xdp_rxq_info_valid and xdp_rxq_info_failed to track when
  xdp_rxq_info failures occur.
- Renaming of rc to err and more use of unlikely().
- Cut some duplicated code and fix an array overrun.
- Actually increment n_rx_xdp_tx when packets are transmitted.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents d170eb69 cd846bef
...@@ -946,8 +946,10 @@ static int efx_ef10_link_piobufs(struct efx_nic *efx) ...@@ -946,8 +946,10 @@ static int efx_ef10_link_piobufs(struct efx_nic *efx)
/* Extra channels, even those with TXQs (PTP), do not require /* Extra channels, even those with TXQs (PTP), do not require
* PIO resources. * PIO resources.
*/ */
if (!channel->type->want_pio) if (!channel->type->want_pio ||
channel->channel >= efx->xdp_channel_offset)
continue; continue;
efx_for_each_channel_tx_queue(tx_queue, channel) { efx_for_each_channel_tx_queue(tx_queue, channel) {
/* We assign the PIO buffers to queues in /* We assign the PIO buffers to queues in
* reverse order to allow for the following * reverse order to allow for the following
...@@ -1296,8 +1298,9 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) ...@@ -1296,8 +1298,9 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx)
int rc; int rc;
channel_vis = max(efx->n_channels, channel_vis = max(efx->n_channels,
(efx->n_tx_channels + efx->n_extra_tx_channels) * ((efx->n_tx_channels + efx->n_extra_tx_channels) *
EFX_TXQ_TYPES); EFX_TXQ_TYPES) +
efx->n_xdp_channels * efx->xdp_tx_per_channel);
#ifdef EFX_USE_PIO #ifdef EFX_USE_PIO
/* Try to allocate PIO buffers if wanted and if the full /* Try to allocate PIO buffers if wanted and if the full
...@@ -2434,11 +2437,12 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) ...@@ -2434,11 +2437,12 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
/* TSOv2 is a limited resource that can only be configured on a limited /* TSOv2 is a limited resource that can only be configured on a limited
* number of queues. TSO without checksum offload is not really a thing, * number of queues. TSO without checksum offload is not really a thing,
* so we only enable it for those queues. * so we only enable it for those queues.
* TSOv2 cannot be used with Hardware timestamping. * TSOv2 cannot be used with Hardware timestamping, and is never needed
* for XDP tx.
*/ */
if (csum_offload && (nic_data->datapath_caps2 & if (csum_offload && (nic_data->datapath_caps2 &
(1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN)) && (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN)) &&
!tx_queue->timestamping) { !tx_queue->timestamping && !tx_queue->xdp_tx) {
tso_v2 = true; tso_v2 = true;
netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n", netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n",
channel->channel); channel->channel);
......
This diff is collapsed.
...@@ -322,4 +322,7 @@ static inline bool efx_rwsem_assert_write_locked(struct rw_semaphore *sem) ...@@ -322,4 +322,7 @@ static inline bool efx_rwsem_assert_write_locked(struct rw_semaphore *sem)
return true; return true;
} }
int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs,
bool flush);
#endif /* EFX_EFX_H */ #endif /* EFX_EFX_H */
...@@ -83,6 +83,10 @@ static const struct efx_sw_stat_desc efx_sw_stat_desc[] = { ...@@ -83,6 +83,10 @@ static const struct efx_sw_stat_desc efx_sw_stat_desc[] = {
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc),
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_events), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_events),
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_packets), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_packets),
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_drops),
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_bad_drops),
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_tx),
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_redirect),
}; };
#define EFX_ETHTOOL_SW_STAT_COUNT ARRAY_SIZE(efx_sw_stat_desc) #define EFX_ETHTOOL_SW_STAT_COUNT ARRAY_SIZE(efx_sw_stat_desc)
...@@ -399,6 +403,19 @@ static size_t efx_describe_per_queue_stats(struct efx_nic *efx, u8 *strings) ...@@ -399,6 +403,19 @@ static size_t efx_describe_per_queue_stats(struct efx_nic *efx, u8 *strings)
} }
} }
} }
if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) {
unsigned short xdp;
for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) {
n_stats++;
if (strings) {
snprintf(strings, ETH_GSTRING_LEN,
"tx-xdp-cpu-%hu.tx_packets", xdp);
strings += ETH_GSTRING_LEN;
}
}
}
return n_stats; return n_stats;
} }
...@@ -509,6 +526,14 @@ static void efx_ethtool_get_stats(struct net_device *net_dev, ...@@ -509,6 +526,14 @@ static void efx_ethtool_get_stats(struct net_device *net_dev,
data++; data++;
} }
} }
if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) {
int xdp;
for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) {
data[0] = efx->xdp_tx_queues[xdp]->tx_packets;
data++;
}
}
efx_ptp_update_stats(efx, data); efx_ptp_update_stats(efx, data);
} }
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <linux/i2c.h> #include <linux/i2c.h>
#include <linux/mtd/mtd.h> #include <linux/mtd/mtd.h>
#include <net/busy_poll.h> #include <net/busy_poll.h>
#include <net/xdp.h>
#include "enum.h" #include "enum.h"
#include "bitfield.h" #include "bitfield.h"
...@@ -136,7 +137,8 @@ struct efx_special_buffer { ...@@ -136,7 +137,8 @@ struct efx_special_buffer {
* struct efx_tx_buffer - buffer state for a TX descriptor * struct efx_tx_buffer - buffer state for a TX descriptor
* @skb: When @flags & %EFX_TX_BUF_SKB, the associated socket buffer to be * @skb: When @flags & %EFX_TX_BUF_SKB, the associated socket buffer to be
* freed when descriptor completes * freed when descriptor completes
* @option: When @flags & %EFX_TX_BUF_OPTION, a NIC-specific option descriptor. * @xdpf: When @flags & %EFX_TX_BUF_XDP, the XDP frame information; its @data
* member is the associated buffer to drop a page reference on.
* @dma_addr: DMA address of the fragment. * @dma_addr: DMA address of the fragment.
* @flags: Flags for allocation and DMA mapping type * @flags: Flags for allocation and DMA mapping type
* @len: Length of this fragment. * @len: Length of this fragment.
...@@ -146,7 +148,10 @@ struct efx_special_buffer { ...@@ -146,7 +148,10 @@ struct efx_special_buffer {
* Only valid if @unmap_len != 0. * Only valid if @unmap_len != 0.
*/ */
struct efx_tx_buffer { struct efx_tx_buffer {
const struct sk_buff *skb; union {
const struct sk_buff *skb;
struct xdp_frame *xdpf;
};
union { union {
efx_qword_t option; efx_qword_t option;
dma_addr_t dma_addr; dma_addr_t dma_addr;
...@@ -160,6 +165,7 @@ struct efx_tx_buffer { ...@@ -160,6 +165,7 @@ struct efx_tx_buffer {
#define EFX_TX_BUF_SKB 2 /* buffer is last part of skb */ #define EFX_TX_BUF_SKB 2 /* buffer is last part of skb */
#define EFX_TX_BUF_MAP_SINGLE 8 /* buffer was mapped with dma_map_single() */ #define EFX_TX_BUF_MAP_SINGLE 8 /* buffer was mapped with dma_map_single() */
#define EFX_TX_BUF_OPTION 0x10 /* empty buffer for option descriptor */ #define EFX_TX_BUF_OPTION 0x10 /* empty buffer for option descriptor */
#define EFX_TX_BUF_XDP 0x20 /* buffer was sent with XDP */
/** /**
* struct efx_tx_queue - An Efx TX queue * struct efx_tx_queue - An Efx TX queue
...@@ -189,6 +195,7 @@ struct efx_tx_buffer { ...@@ -189,6 +195,7 @@ struct efx_tx_buffer {
* @piobuf_offset: Buffer offset to be specified in PIO descriptors * @piobuf_offset: Buffer offset to be specified in PIO descriptors
* @initialised: Has hardware queue been initialised? * @initialised: Has hardware queue been initialised?
* @timestamping: Is timestamping enabled for this channel? * @timestamping: Is timestamping enabled for this channel?
* @xdp_tx: Is this an XDP tx queue?
* @handle_tso: TSO xmit preparation handler. Sets up the TSO metadata and * @handle_tso: TSO xmit preparation handler. Sets up the TSO metadata and
* may also map tx data, depending on the nature of the TSO implementation. * may also map tx data, depending on the nature of the TSO implementation.
* @read_count: Current read pointer. * @read_count: Current read pointer.
...@@ -250,6 +257,7 @@ struct efx_tx_queue { ...@@ -250,6 +257,7 @@ struct efx_tx_queue {
unsigned int piobuf_offset; unsigned int piobuf_offset;
bool initialised; bool initialised;
bool timestamping; bool timestamping;
bool xdp_tx;
/* Function pointers used in the fast path. */ /* Function pointers used in the fast path. */
int (*handle_tso)(struct efx_tx_queue*, struct sk_buff*, bool *); int (*handle_tso)(struct efx_tx_queue*, struct sk_buff*, bool *);
...@@ -363,6 +371,8 @@ struct efx_rx_page_state { ...@@ -363,6 +371,8 @@ struct efx_rx_page_state {
* refill was triggered. * refill was triggered.
* @recycle_count: RX buffer recycle counter. * @recycle_count: RX buffer recycle counter.
* @slow_fill: Timer used to defer efx_nic_generate_fill_event(). * @slow_fill: Timer used to defer efx_nic_generate_fill_event().
* @xdp_rxq_info: XDP specific RX queue information.
* @xdp_rxq_info_valid: Is xdp_rxq_info valid data?.
*/ */
struct efx_rx_queue { struct efx_rx_queue {
struct efx_nic *efx; struct efx_nic *efx;
...@@ -394,6 +404,8 @@ struct efx_rx_queue { ...@@ -394,6 +404,8 @@ struct efx_rx_queue {
unsigned int slow_fill_count; unsigned int slow_fill_count;
/* Statistics to supplement MAC stats */ /* Statistics to supplement MAC stats */
unsigned long rx_packets; unsigned long rx_packets;
struct xdp_rxq_info xdp_rxq_info;
bool xdp_rxq_info_valid;
}; };
enum efx_sync_events_state { enum efx_sync_events_state {
...@@ -441,6 +453,10 @@ enum efx_sync_events_state { ...@@ -441,6 +453,10 @@ enum efx_sync_events_state {
* lack of descriptors * lack of descriptors
* @n_rx_merge_events: Number of RX merged completion events * @n_rx_merge_events: Number of RX merged completion events
* @n_rx_merge_packets: Number of RX packets completed by merged events * @n_rx_merge_packets: Number of RX packets completed by merged events
* @n_rx_xdp_drops: Count of RX packets intentionally dropped due to XDP
* @n_rx_xdp_bad_drops: Count of RX packets dropped due to XDP errors
* @n_rx_xdp_tx: Count of RX packets retransmitted due to XDP
* @n_rx_xdp_redirect: Count of RX packets redirected to a different NIC by XDP
* @rx_pkt_n_frags: Number of fragments in next packet to be delivered by * @rx_pkt_n_frags: Number of fragments in next packet to be delivered by
* __efx_rx_packet(), or zero if there is none * __efx_rx_packet(), or zero if there is none
* @rx_pkt_index: Ring index of first buffer for next packet to be delivered * @rx_pkt_index: Ring index of first buffer for next packet to be delivered
...@@ -494,6 +510,10 @@ struct efx_channel { ...@@ -494,6 +510,10 @@ struct efx_channel {
unsigned int n_rx_nodesc_trunc; unsigned int n_rx_nodesc_trunc;
unsigned int n_rx_merge_events; unsigned int n_rx_merge_events;
unsigned int n_rx_merge_packets; unsigned int n_rx_merge_packets;
unsigned int n_rx_xdp_drops;
unsigned int n_rx_xdp_bad_drops;
unsigned int n_rx_xdp_tx;
unsigned int n_rx_xdp_redirect;
unsigned int rx_pkt_n_frags; unsigned int rx_pkt_n_frags;
unsigned int rx_pkt_index; unsigned int rx_pkt_index;
...@@ -818,6 +838,8 @@ struct efx_async_filter_insertion { ...@@ -818,6 +838,8 @@ struct efx_async_filter_insertion {
* @msi_context: Context for each MSI * @msi_context: Context for each MSI
* @extra_channel_types: Types of extra (non-traffic) channels that * @extra_channel_types: Types of extra (non-traffic) channels that
* should be allocated for this NIC * should be allocated for this NIC
* @xdp_tx_queue_count: Number of entries in %xdp_tx_queues.
* @xdp_tx_queues: Array of pointers to tx queues used for XDP transmit.
* @rxq_entries: Size of receive queues requested by user. * @rxq_entries: Size of receive queues requested by user.
* @txq_entries: Size of transmit queues requested by user. * @txq_entries: Size of transmit queues requested by user.
* @txq_stop_thresh: TX queue fill level at or above which we stop it. * @txq_stop_thresh: TX queue fill level at or above which we stop it.
...@@ -830,6 +852,9 @@ struct efx_async_filter_insertion { ...@@ -830,6 +852,9 @@ struct efx_async_filter_insertion {
* @n_rx_channels: Number of channels used for RX (= number of RX queues) * @n_rx_channels: Number of channels used for RX (= number of RX queues)
* @n_tx_channels: Number of channels used for TX * @n_tx_channels: Number of channels used for TX
* @n_extra_tx_channels: Number of extra channels with TX queues * @n_extra_tx_channels: Number of extra channels with TX queues
* @n_xdp_channels: Number of channels used for XDP TX
* @xdp_channel_offset: Offset of zeroth channel used for XPD TX.
* @xdp_tx_per_channel: Max number of TX queues on an XDP TX channel.
* @rx_ip_align: RX DMA address offset to have IP header aligned in * @rx_ip_align: RX DMA address offset to have IP header aligned in
* in accordance with NET_IP_ALIGN * in accordance with NET_IP_ALIGN
* @rx_dma_len: Current maximum RX DMA length * @rx_dma_len: Current maximum RX DMA length
...@@ -894,6 +919,7 @@ struct efx_async_filter_insertion { ...@@ -894,6 +919,7 @@ struct efx_async_filter_insertion {
* @loopback_mode: Loopback status * @loopback_mode: Loopback status
* @loopback_modes: Supported loopback mode bitmask * @loopback_modes: Supported loopback mode bitmask
* @loopback_selftest: Offline self-test private state * @loopback_selftest: Offline self-test private state
* @xdp_prog: Current XDP programme for this interface
* @filter_sem: Filter table rw_semaphore, protects existence of @filter_state * @filter_sem: Filter table rw_semaphore, protects existence of @filter_state
* @filter_state: Architecture-dependent filter table state * @filter_state: Architecture-dependent filter table state
* @rps_mutex: Protects RPS state of all channels * @rps_mutex: Protects RPS state of all channels
...@@ -919,6 +945,8 @@ struct efx_async_filter_insertion { ...@@ -919,6 +945,8 @@ struct efx_async_filter_insertion {
* @ptp_data: PTP state data * @ptp_data: PTP state data
* @ptp_warned: has this NIC seen and warned about unexpected PTP events? * @ptp_warned: has this NIC seen and warned about unexpected PTP events?
* @vpd_sn: Serial number read from VPD * @vpd_sn: Serial number read from VPD
* @xdp_rxq_info_failed: Have any of the rx queues failed to initialise their
* xdp_rxq_info structures?
* @monitor_work: Hardware monitor workitem * @monitor_work: Hardware monitor workitem
* @biu_lock: BIU (bus interface unit) lock * @biu_lock: BIU (bus interface unit) lock
* @last_irq_cpu: Last CPU to handle a possible test interrupt. This * @last_irq_cpu: Last CPU to handle a possible test interrupt. This
...@@ -966,6 +994,9 @@ struct efx_nic { ...@@ -966,6 +994,9 @@ struct efx_nic {
const struct efx_channel_type * const struct efx_channel_type *
extra_channel_type[EFX_MAX_EXTRA_CHANNELS]; extra_channel_type[EFX_MAX_EXTRA_CHANNELS];
unsigned int xdp_tx_queue_count;
struct efx_tx_queue **xdp_tx_queues;
unsigned rxq_entries; unsigned rxq_entries;
unsigned txq_entries; unsigned txq_entries;
unsigned int txq_stop_thresh; unsigned int txq_stop_thresh;
...@@ -984,6 +1015,9 @@ struct efx_nic { ...@@ -984,6 +1015,9 @@ struct efx_nic {
unsigned tx_channel_offset; unsigned tx_channel_offset;
unsigned n_tx_channels; unsigned n_tx_channels;
unsigned n_extra_tx_channels; unsigned n_extra_tx_channels;
unsigned int n_xdp_channels;
unsigned int xdp_channel_offset;
unsigned int xdp_tx_per_channel;
unsigned int rx_ip_align; unsigned int rx_ip_align;
unsigned int rx_dma_len; unsigned int rx_dma_len;
unsigned int rx_buffer_order; unsigned int rx_buffer_order;
...@@ -1053,6 +1087,10 @@ struct efx_nic { ...@@ -1053,6 +1087,10 @@ struct efx_nic {
u64 loopback_modes; u64 loopback_modes;
void *loopback_selftest; void *loopback_selftest;
/* We access loopback_selftest immediately before running XDP,
* so we want them next to each other.
*/
struct bpf_prog __rcu *xdp_prog;
struct rw_semaphore filter_sem; struct rw_semaphore filter_sem;
void *filter_state; void *filter_state;
...@@ -1082,6 +1120,7 @@ struct efx_nic { ...@@ -1082,6 +1120,7 @@ struct efx_nic {
bool ptp_warned; bool ptp_warned;
char *vpd_sn; char *vpd_sn;
bool xdp_rxq_info_failed;
/* The following fields may be written more often */ /* The following fields may be written more often */
...@@ -1473,10 +1512,24 @@ efx_get_tx_queue(struct efx_nic *efx, unsigned index, unsigned type) ...@@ -1473,10 +1512,24 @@ efx_get_tx_queue(struct efx_nic *efx, unsigned index, unsigned type)
return &efx->channel[efx->tx_channel_offset + index]->tx_queue[type]; return &efx->channel[efx->tx_channel_offset + index]->tx_queue[type];
} }
static inline struct efx_channel *
efx_get_xdp_channel(struct efx_nic *efx, unsigned int index)
{
EFX_WARN_ON_ONCE_PARANOID(index >= efx->n_xdp_channels);
return efx->channel[efx->xdp_channel_offset + index];
}
static inline bool efx_channel_is_xdp_tx(struct efx_channel *channel)
{
return channel->channel - channel->efx->xdp_channel_offset <
channel->efx->n_xdp_channels;
}
static inline bool efx_channel_has_tx_queues(struct efx_channel *channel) static inline bool efx_channel_has_tx_queues(struct efx_channel *channel)
{ {
return channel->type && channel->type->want_txqs && return efx_channel_is_xdp_tx(channel) ||
channel->type->want_txqs(channel); (channel->type && channel->type->want_txqs &&
channel->type->want_txqs(channel));
} }
static inline struct efx_tx_queue * static inline struct efx_tx_queue *
...@@ -1500,7 +1553,8 @@ static inline bool efx_tx_queue_used(struct efx_tx_queue *tx_queue) ...@@ -1500,7 +1553,8 @@ static inline bool efx_tx_queue_used(struct efx_tx_queue *tx_queue)
else \ else \
for (_tx_queue = (_channel)->tx_queue; \ for (_tx_queue = (_channel)->tx_queue; \
_tx_queue < (_channel)->tx_queue + EFX_TXQ_TYPES && \ _tx_queue < (_channel)->tx_queue + EFX_TXQ_TYPES && \
efx_tx_queue_used(_tx_queue); \ (efx_tx_queue_used(_tx_queue) || \
efx_channel_is_xdp_tx(_channel)); \
_tx_queue++) _tx_queue++)
/* Iterate over all possible TX queues belonging to a channel */ /* Iterate over all possible TX queues belonging to a channel */
......
...@@ -17,6 +17,8 @@ ...@@ -17,6 +17,8 @@
#include <linux/iommu.h> #include <linux/iommu.h>
#include <net/ip.h> #include <net/ip.h>
#include <net/checksum.h> #include <net/checksum.h>
#include <net/xdp.h>
#include <linux/bpf_trace.h>
#include "net_driver.h" #include "net_driver.h"
#include "efx.h" #include "efx.h"
#include "filter.h" #include "filter.h"
...@@ -27,6 +29,9 @@ ...@@ -27,6 +29,9 @@
/* Preferred number of descriptors to fill at once */ /* Preferred number of descriptors to fill at once */
#define EFX_RX_PREFERRED_BATCH 8U #define EFX_RX_PREFERRED_BATCH 8U
/* Maximum rx prefix used by any architecture. */
#define EFX_MAX_RX_PREFIX_SIZE 16
/* Number of RX buffers to recycle pages for. When creating the RX page recycle /* Number of RX buffers to recycle pages for. When creating the RX page recycle
* ring, this number is divided by the number of buffers per page to calculate * ring, this number is divided by the number of buffers per page to calculate
* the number of pages to store in the RX page recycle ring. * the number of pages to store in the RX page recycle ring.
...@@ -95,7 +100,7 @@ void efx_rx_config_page_split(struct efx_nic *efx) ...@@ -95,7 +100,7 @@ void efx_rx_config_page_split(struct efx_nic *efx)
EFX_RX_BUF_ALIGNMENT); EFX_RX_BUF_ALIGNMENT);
efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 : efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 :
((PAGE_SIZE - sizeof(struct efx_rx_page_state)) / ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) /
efx->rx_page_buf_step); (efx->rx_page_buf_step + XDP_PACKET_HEADROOM));
efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) / efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) /
efx->rx_bufs_per_page; efx->rx_bufs_per_page;
efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH, efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH,
...@@ -185,6 +190,9 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic) ...@@ -185,6 +190,9 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic)
page_offset = sizeof(struct efx_rx_page_state); page_offset = sizeof(struct efx_rx_page_state);
do { do {
page_offset += XDP_PACKET_HEADROOM;
dma_addr += XDP_PACKET_HEADROOM;
index = rx_queue->added_count & rx_queue->ptr_mask; index = rx_queue->added_count & rx_queue->ptr_mask;
rx_buf = efx_rx_buffer(rx_queue, index); rx_buf = efx_rx_buffer(rx_queue, index);
rx_buf->dma_addr = dma_addr + efx->rx_ip_align; rx_buf->dma_addr = dma_addr + efx->rx_ip_align;
...@@ -635,6 +643,123 @@ static void efx_rx_deliver(struct efx_channel *channel, u8 *eh, ...@@ -635,6 +643,123 @@ static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
netif_receive_skb(skb); netif_receive_skb(skb);
} }
/** efx_do_xdp: perform XDP processing on a received packet
*
* Returns true if packet should still be delivered.
*/
static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel,
struct efx_rx_buffer *rx_buf, u8 **ehp)
{
u8 rx_prefix[EFX_MAX_RX_PREFIX_SIZE];
struct efx_rx_queue *rx_queue;
struct bpf_prog *xdp_prog;
struct xdp_frame *xdpf;
struct xdp_buff xdp;
u32 xdp_act;
s16 offset;
int err;
rcu_read_lock();
xdp_prog = rcu_dereference(efx->xdp_prog);
if (!xdp_prog) {
rcu_read_unlock();
return true;
}
rx_queue = efx_channel_get_rx_queue(channel);
if (unlikely(channel->rx_pkt_n_frags > 1)) {
/* We can't do XDP on fragmented packets - drop. */
rcu_read_unlock();
efx_free_rx_buffers(rx_queue, rx_buf,
channel->rx_pkt_n_frags);
if (net_ratelimit())
netif_err(efx, rx_err, efx->net_dev,
"XDP is not possible with multiple receive fragments (%d)\n",
channel->rx_pkt_n_frags);
channel->n_rx_xdp_bad_drops++;
return false;
}
dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr,
rx_buf->len, DMA_FROM_DEVICE);
/* Save the rx prefix. */
EFX_WARN_ON_PARANOID(efx->rx_prefix_size > EFX_MAX_RX_PREFIX_SIZE);
memcpy(rx_prefix, *ehp - efx->rx_prefix_size,
efx->rx_prefix_size);
xdp.data = *ehp;
xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
/* No support yet for XDP metadata */
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + rx_buf->len;
xdp.rxq = &rx_queue->xdp_rxq_info;
xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp);
rcu_read_unlock();
offset = (u8 *)xdp.data - *ehp;
switch (xdp_act) {
case XDP_PASS:
/* Fix up rx prefix. */
if (offset) {
*ehp += offset;
rx_buf->page_offset += offset;
rx_buf->len -= offset;
memcpy(*ehp - efx->rx_prefix_size, rx_prefix,
efx->rx_prefix_size);
}
break;
case XDP_TX:
/* Buffer ownership passes to tx on success. */
xdpf = convert_to_xdp_frame(&xdp);
err = efx_xdp_tx_buffers(efx, 1, &xdpf, true);
if (unlikely(err != 1)) {
efx_free_rx_buffers(rx_queue, rx_buf, 1);
if (net_ratelimit())
netif_err(efx, rx_err, efx->net_dev,
"XDP TX failed (%d)\n", err);
channel->n_rx_xdp_bad_drops++;
} else {
channel->n_rx_xdp_tx++;
}
break;
case XDP_REDIRECT:
err = xdp_do_redirect(efx->net_dev, &xdp, xdp_prog);
if (unlikely(err)) {
efx_free_rx_buffers(rx_queue, rx_buf, 1);
if (net_ratelimit())
netif_err(efx, rx_err, efx->net_dev,
"XDP redirect failed (%d)\n", err);
channel->n_rx_xdp_bad_drops++;
} else {
channel->n_rx_xdp_redirect++;
}
break;
default:
bpf_warn_invalid_xdp_action(xdp_act);
efx_free_rx_buffers(rx_queue, rx_buf, 1);
channel->n_rx_xdp_bad_drops++;
break;
case XDP_ABORTED:
trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act);
/* Fall through */
case XDP_DROP:
efx_free_rx_buffers(rx_queue, rx_buf, 1);
channel->n_rx_xdp_drops++;
break;
}
return xdp_act == XDP_PASS;
}
/* Handle a received packet. Second half: Touches packet payload. */ /* Handle a received packet. Second half: Touches packet payload. */
void __efx_rx_packet(struct efx_channel *channel) void __efx_rx_packet(struct efx_channel *channel)
{ {
...@@ -663,6 +788,9 @@ void __efx_rx_packet(struct efx_channel *channel) ...@@ -663,6 +788,9 @@ void __efx_rx_packet(struct efx_channel *channel)
goto out; goto out;
} }
if (!efx_do_xdp(efx, channel, rx_buf, &eh))
goto out;
if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM))) if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM)))
rx_buf->flags &= ~EFX_RX_PKT_CSUMMED; rx_buf->flags &= ~EFX_RX_PKT_CSUMMED;
...@@ -731,6 +859,7 @@ void efx_init_rx_queue(struct efx_rx_queue *rx_queue) ...@@ -731,6 +859,7 @@ void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
{ {
struct efx_nic *efx = rx_queue->efx; struct efx_nic *efx = rx_queue->efx;
unsigned int max_fill, trigger, max_trigger; unsigned int max_fill, trigger, max_trigger;
int rc = 0;
netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
"initialising RX queue %d\n", efx_rx_queue_index(rx_queue)); "initialising RX queue %d\n", efx_rx_queue_index(rx_queue));
...@@ -764,6 +893,19 @@ void efx_init_rx_queue(struct efx_rx_queue *rx_queue) ...@@ -764,6 +893,19 @@ void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
rx_queue->fast_fill_trigger = trigger; rx_queue->fast_fill_trigger = trigger;
rx_queue->refill_enabled = true; rx_queue->refill_enabled = true;
/* Initialise XDP queue information */
rc = xdp_rxq_info_reg(&rx_queue->xdp_rxq_info, efx->net_dev,
rx_queue->core_index);
if (rc) {
netif_err(efx, rx_err, efx->net_dev,
"Failure to initialise XDP queue information rc=%d\n",
rc);
efx->xdp_rxq_info_failed = true;
} else {
rx_queue->xdp_rxq_info_valid = true;
}
/* Set up RX descriptor ring */ /* Set up RX descriptor ring */
efx_nic_init_rx(rx_queue); efx_nic_init_rx(rx_queue);
} }
...@@ -805,6 +947,11 @@ void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) ...@@ -805,6 +947,11 @@ void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
} }
kfree(rx_queue->page_ring); kfree(rx_queue->page_ring);
rx_queue->page_ring = NULL; rx_queue->page_ring = NULL;
if (rx_queue->xdp_rxq_info_valid)
xdp_rxq_info_unreg(&rx_queue->xdp_rxq_info);
rx_queue->xdp_rxq_info_valid = false;
} }
void efx_remove_rx_queue(struct efx_rx_queue *rx_queue) void efx_remove_rx_queue(struct efx_rx_queue *rx_queue)
......
...@@ -95,6 +95,8 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, ...@@ -95,6 +95,8 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev, netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
"TX queue %d transmission id %x complete\n", "TX queue %d transmission id %x complete\n",
tx_queue->queue, tx_queue->read_count); tx_queue->queue, tx_queue->read_count);
} else if (buffer->flags & EFX_TX_BUF_XDP) {
xdp_return_frame_rx_napi(buffer->xdpf);
} }
buffer->len = 0; buffer->len = 0;
...@@ -597,6 +599,94 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) ...@@ -597,6 +599,94 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
return NETDEV_TX_OK; return NETDEV_TX_OK;
} }
static void efx_xdp_return_frames(int n, struct xdp_frame **xdpfs)
{
int i;
for (i = 0; i < n; i++)
xdp_return_frame_rx_napi(xdpfs[i]);
}
/* Transmit a packet from an XDP buffer
*
* Returns number of packets sent on success, error code otherwise.
* Runs in NAPI context, either in our poll (for XDP TX) or a different NIC
* (for XDP redirect).
*/
int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs,
bool flush)
{
struct efx_tx_buffer *tx_buffer;
struct efx_tx_queue *tx_queue;
struct xdp_frame *xdpf;
dma_addr_t dma_addr;
unsigned int len;
int space;
int cpu;
int i;
cpu = raw_smp_processor_id();
if (!efx->xdp_tx_queue_count ||
unlikely(cpu >= efx->xdp_tx_queue_count))
return -EINVAL;
tx_queue = efx->xdp_tx_queues[cpu];
if (unlikely(!tx_queue))
return -EINVAL;
if (unlikely(n && !xdpfs))
return -EINVAL;
if (!n)
return 0;
/* Check for available space. We should never need multiple
* descriptors per frame.
*/
space = efx->txq_entries +
tx_queue->read_count - tx_queue->insert_count;
for (i = 0; i < n; i++) {
xdpf = xdpfs[i];
if (i >= space)
break;
/* We'll want a descriptor for this tx. */
prefetchw(__efx_tx_queue_get_insert_buffer(tx_queue));
len = xdpf->len;
/* Map for DMA. */
dma_addr = dma_map_single(&efx->pci_dev->dev,
xdpf->data, len,
DMA_TO_DEVICE);
if (dma_mapping_error(&efx->pci_dev->dev, dma_addr))
break;
/* Create descriptor and set up for unmapping DMA. */
tx_buffer = efx_tx_map_chunk(tx_queue, dma_addr, len);
tx_buffer->xdpf = xdpf;
tx_buffer->flags = EFX_TX_BUF_XDP |
EFX_TX_BUF_MAP_SINGLE;
tx_buffer->dma_offset = 0;
tx_buffer->unmap_len = len;
tx_queue->tx_packets++;
}
/* Pass mapped frames to hardware. */
if (flush && i > 0)
efx_nic_push_buffers(tx_queue);
if (i == 0)
return -EIO;
efx_xdp_return_frames(n - i, xdpfs + i);
return i;
}
/* Remove packets from the TX queue /* Remove packets from the TX queue
* *
* This removes packets from the TX queue, up to and including the * This removes packets from the TX queue, up to and including the
...@@ -857,6 +947,8 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue) ...@@ -857,6 +947,8 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
tx_queue->completed_timestamp_major = 0; tx_queue->completed_timestamp_major = 0;
tx_queue->completed_timestamp_minor = 0; tx_queue->completed_timestamp_minor = 0;
tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel);
/* Set up default function pointers. These may get replaced by /* Set up default function pointers. These may get replaced by
* efx_nic_init_tx() based off NIC/queue capabilities. * efx_nic_init_tx() based off NIC/queue capabilities.
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment