Commit 27c90dcb authored by David S. Miller's avatar David S. Miller

Merge branch 'for-davem' of git://git.kernel.org/pub/scm/linux/kernel/git/bwh/sfc-next

Ben Hutchings says:

====================
1. Some cleanup from Fengguang Wu and his kbuild robot.
2. Support for ethtool register dump on EF10.
3. Change soft-TSO to take advantage of firmware assistance on EF10.
4. Support for PIO TX buffers and descriptors on EF10, enabled on
architectures that support write-combining mappings.
5. Accelerated RFS support for TCP/IPv6 and UDP/IPv6 on EF10.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents f6982299 c47b2d9d
This diff is collapsed.
......@@ -315,6 +315,7 @@
#define ESF_DZ_TX_PIO_TYPE_WIDTH 1
#define ESF_DZ_TX_PIO_OPT_LBN 60
#define ESF_DZ_TX_PIO_OPT_WIDTH 3
#define ESE_DZ_TX_OPTION_DESC_PIO 1
#define ESF_DZ_TX_PIO_CONT_LBN 59
#define ESF_DZ_TX_PIO_CONT_WIDTH 1
#define ESF_DZ_TX_PIO_BYTE_CNT_LBN 32
......
......@@ -30,6 +30,7 @@ efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb);
extern void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
extern int efx_setup_tc(struct net_device *net_dev, u8 num_tc);
extern unsigned int efx_tx_max_skb_descs(struct efx_nic *efx);
extern unsigned int efx_piobuf_size;
/* RX */
extern void efx_rx_config_page_split(struct efx_nic *efx);
......
......@@ -70,6 +70,7 @@ static const struct efx_sw_stat_desc efx_sw_stat_desc[] = {
EFX_ETHTOOL_UINT_TXQ_STAT(tso_long_headers),
EFX_ETHTOOL_UINT_TXQ_STAT(tso_packets),
EFX_ETHTOOL_UINT_TXQ_STAT(pushes),
EFX_ETHTOOL_UINT_TXQ_STAT(pio_packets),
EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(rx_reset),
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tobe_disc),
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_ip_hdr_chksum_err),
......@@ -1035,8 +1036,8 @@ static int efx_ethtool_set_rxfh_indir(struct net_device *net_dev,
return 0;
}
int efx_ethtool_get_ts_info(struct net_device *net_dev,
struct ethtool_ts_info *ts_info)
static int efx_ethtool_get_ts_info(struct net_device *net_dev,
struct ethtool_ts_info *ts_info)
{
struct efx_nic *efx = netdev_priv(net_dev);
......
......@@ -66,6 +66,11 @@
#define EFX_USE_QWORD_IO 1
#endif
/* PIO is a win only if write-combining is possible */
#ifdef ARCH_HAS_IOREMAP_WC
#define EFX_USE_PIO 1
#endif
#ifdef EFX_USE_QWORD_IO
static inline void _efx_writeq(struct efx_nic *efx, __le64 value,
unsigned int reg)
......
......@@ -182,6 +182,9 @@ struct efx_tx_buffer {
* @tsoh_page: Array of pages of TSO header buffers
* @txd: The hardware descriptor ring
* @ptr_mask: The size of the ring minus 1.
* @piobuf: PIO buffer region for this TX queue (shared with its partner).
* Size of the region is efx_piobuf_size.
* @piobuf_offset: Buffer offset to be specified in PIO descriptors
* @initialised: Has hardware queue been initialised?
* @read_count: Current read pointer.
* This is the number of buffers that have been removed from both rings.
......@@ -209,6 +212,7 @@ struct efx_tx_buffer {
* blocks
* @tso_packets: Number of packets via the TSO xmit path
* @pushes: Number of times the TX push feature has been used
* @pio_packets: Number of times the TX PIO feature has been used
* @empty_read_count: If the completion path has seen the queue as empty
* and the transmission path has not yet checked this, the value of
* @read_count bitwise-added to %EFX_EMPTY_COUNT_VALID; otherwise 0.
......@@ -223,6 +227,8 @@ struct efx_tx_queue {
struct efx_buffer *tsoh_page;
struct efx_special_buffer txd;
unsigned int ptr_mask;
void __iomem *piobuf;
unsigned int piobuf_offset;
bool initialised;
/* Members used mainly on the completion path */
......@@ -238,6 +244,7 @@ struct efx_tx_queue {
unsigned int tso_long_headers;
unsigned int tso_packets;
unsigned int pushes;
unsigned int pio_packets;
/* Members shared between paths and sometimes updated */
unsigned int empty_read_count ____cacheline_aligned_in_smp;
......
......@@ -19,6 +19,7 @@
#include "bitfield.h"
#include "efx.h"
#include "nic.h"
#include "ef10_regs.h"
#include "farch_regs.h"
#include "io.h"
#include "workarounds.h"
......@@ -166,26 +167,30 @@ void efx_nic_fini_interrupt(struct efx_nic *efx)
/* Register dump */
#define REGISTER_REVISION_A 1
#define REGISTER_REVISION_B 2
#define REGISTER_REVISION_C 3
#define REGISTER_REVISION_Z 3 /* latest revision */
#define REGISTER_REVISION_FA 1
#define REGISTER_REVISION_FB 2
#define REGISTER_REVISION_FC 3
#define REGISTER_REVISION_FZ 3 /* last Falcon arch revision */
#define REGISTER_REVISION_ED 4
#define REGISTER_REVISION_EZ 4 /* latest EF10 revision */
struct efx_nic_reg {
u32 offset:24;
u32 min_revision:2, max_revision:2;
u32 min_revision:3, max_revision:3;
};
#define REGISTER(name, min_rev, max_rev) { \
FR_ ## min_rev ## max_rev ## _ ## name, \
REGISTER_REVISION_ ## min_rev, REGISTER_REVISION_ ## max_rev \
#define REGISTER(name, arch, min_rev, max_rev) { \
arch ## R_ ## min_rev ## max_rev ## _ ## name, \
REGISTER_REVISION_ ## arch ## min_rev, \
REGISTER_REVISION_ ## arch ## max_rev \
}
#define REGISTER_AA(name) REGISTER(name, A, A)
#define REGISTER_AB(name) REGISTER(name, A, B)
#define REGISTER_AZ(name) REGISTER(name, A, Z)
#define REGISTER_BB(name) REGISTER(name, B, B)
#define REGISTER_BZ(name) REGISTER(name, B, Z)
#define REGISTER_CZ(name) REGISTER(name, C, Z)
#define REGISTER_AA(name) REGISTER(name, F, A, A)
#define REGISTER_AB(name) REGISTER(name, F, A, B)
#define REGISTER_AZ(name) REGISTER(name, F, A, Z)
#define REGISTER_BB(name) REGISTER(name, F, B, B)
#define REGISTER_BZ(name) REGISTER(name, F, B, Z)
#define REGISTER_CZ(name) REGISTER(name, F, C, Z)
#define REGISTER_DZ(name) REGISTER(name, E, D, Z)
static const struct efx_nic_reg efx_nic_regs[] = {
REGISTER_AZ(ADR_REGION),
......@@ -292,37 +297,42 @@ static const struct efx_nic_reg efx_nic_regs[] = {
REGISTER_AB(XX_TXDRV_CTL),
/* XX_PRBS_CTL, XX_PRBS_CHK and XX_PRBS_ERR are not used */
/* XX_CORE_STAT is partly RC */
REGISTER_DZ(BIU_HW_REV_ID),
REGISTER_DZ(MC_DB_LWRD),
REGISTER_DZ(MC_DB_HWRD),
};
struct efx_nic_reg_table {
u32 offset:24;
u32 min_revision:2, max_revision:2;
u32 min_revision:3, max_revision:3;
u32 step:6, rows:21;
};
#define REGISTER_TABLE_DIMENSIONS(_, offset, min_rev, max_rev, step, rows) { \
#define REGISTER_TABLE_DIMENSIONS(_, offset, arch, min_rev, max_rev, step, rows) { \
offset, \
REGISTER_REVISION_ ## min_rev, REGISTER_REVISION_ ## max_rev, \
REGISTER_REVISION_ ## arch ## min_rev, \
REGISTER_REVISION_ ## arch ## max_rev, \
step, rows \
}
#define REGISTER_TABLE(name, min_rev, max_rev) \
#define REGISTER_TABLE(name, arch, min_rev, max_rev) \
REGISTER_TABLE_DIMENSIONS( \
name, FR_ ## min_rev ## max_rev ## _ ## name, \
min_rev, max_rev, \
FR_ ## min_rev ## max_rev ## _ ## name ## _STEP, \
FR_ ## min_rev ## max_rev ## _ ## name ## _ROWS)
#define REGISTER_TABLE_AA(name) REGISTER_TABLE(name, A, A)
#define REGISTER_TABLE_AZ(name) REGISTER_TABLE(name, A, Z)
#define REGISTER_TABLE_BB(name) REGISTER_TABLE(name, B, B)
#define REGISTER_TABLE_BZ(name) REGISTER_TABLE(name, B, Z)
name, arch ## R_ ## min_rev ## max_rev ## _ ## name, \
arch, min_rev, max_rev, \
arch ## R_ ## min_rev ## max_rev ## _ ## name ## _STEP, \
arch ## R_ ## min_rev ## max_rev ## _ ## name ## _ROWS)
#define REGISTER_TABLE_AA(name) REGISTER_TABLE(name, F, A, A)
#define REGISTER_TABLE_AZ(name) REGISTER_TABLE(name, F, A, Z)
#define REGISTER_TABLE_BB(name) REGISTER_TABLE(name, F, B, B)
#define REGISTER_TABLE_BZ(name) REGISTER_TABLE(name, F, B, Z)
#define REGISTER_TABLE_BB_CZ(name) \
REGISTER_TABLE_DIMENSIONS(name, FR_BZ_ ## name, B, B, \
REGISTER_TABLE_DIMENSIONS(name, FR_BZ_ ## name, F, B, B, \
FR_BZ_ ## name ## _STEP, \
FR_BB_ ## name ## _ROWS), \
REGISTER_TABLE_DIMENSIONS(name, FR_BZ_ ## name, C, Z, \
REGISTER_TABLE_DIMENSIONS(name, FR_BZ_ ## name, F, C, Z, \
FR_BZ_ ## name ## _STEP, \
FR_CZ_ ## name ## _ROWS)
#define REGISTER_TABLE_CZ(name) REGISTER_TABLE(name, C, Z)
#define REGISTER_TABLE_CZ(name) REGISTER_TABLE(name, F, C, Z)
#define REGISTER_TABLE_DZ(name) REGISTER_TABLE(name, E, D, Z)
static const struct efx_nic_reg_table efx_nic_reg_tables[] = {
/* DRIVER is not used */
......@@ -340,9 +350,9 @@ static const struct efx_nic_reg_table efx_nic_reg_tables[] = {
* 1K entries allows for some expansion of queue count and
* size before we need to change the version. */
REGISTER_TABLE_DIMENSIONS(BUF_FULL_TBL_KER, FR_AA_BUF_FULL_TBL_KER,
A, A, 8, 1024),
F, A, A, 8, 1024),
REGISTER_TABLE_DIMENSIONS(BUF_FULL_TBL, FR_BZ_BUF_FULL_TBL,
B, Z, 8, 1024),
F, B, Z, 8, 1024),
REGISTER_TABLE_CZ(RX_MAC_FILTER_TBL0),
REGISTER_TABLE_BB_CZ(TIMER_TBL),
REGISTER_TABLE_BB_CZ(TX_PACE_TBL),
......@@ -353,6 +363,7 @@ static const struct efx_nic_reg_table efx_nic_reg_tables[] = {
/* MSIX_PBA_TABLE is not mapped */
/* SRM_DBG is not mapped (and is redundant with BUF_FLL_TBL) */
REGISTER_TABLE_BZ(RX_FILTER_TBL0),
REGISTER_TABLE_DZ(BIU_MC_SFT_STATUS),
};
size_t efx_nic_get_regs_len(struct efx_nic *efx)
......
......@@ -71,6 +71,26 @@ efx_tx_desc(struct efx_tx_queue *tx_queue, unsigned int index)
return ((efx_qword_t *) (tx_queue->txd.buf.addr)) + index;
}
/* Report whether the NIC considers this TX queue empty, given the
* write_count used for the last doorbell push. May return false
* negative.
*/
static inline bool __efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue,
unsigned int write_count)
{
unsigned int empty_read_count = ACCESS_ONCE(tx_queue->empty_read_count);
if (empty_read_count == 0)
return false;
return ((empty_read_count ^ write_count) & ~EFX_EMPTY_COUNT_VALID) == 0;
}
static inline bool efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue)
{
return __efx_nic_tx_is_empty(tx_queue, tx_queue->write_count);
}
/* Decide whether to push a TX descriptor to the NIC vs merely writing
* the doorbell. This can reduce latency when we are adding a single
* descriptor to an empty queue, but is otherwise pointless. Further,
......@@ -80,14 +100,10 @@ efx_tx_desc(struct efx_tx_queue *tx_queue, unsigned int index)
static inline bool efx_nic_may_push_tx_desc(struct efx_tx_queue *tx_queue,
unsigned int write_count)
{
unsigned empty_read_count = ACCESS_ONCE(tx_queue->empty_read_count);
if (empty_read_count == 0)
return false;
bool was_empty = __efx_nic_tx_is_empty(tx_queue, write_count);
tx_queue->empty_read_count = 0;
return ((empty_read_count ^ write_count) & ~EFX_EMPTY_COUNT_VALID) == 0
&& tx_queue->write_count - write_count == 1;
return was_empty && tx_queue->write_count - write_count == 1;
}
/* Returns a pointer to the specified descriptor in the RX descriptor queue */
......@@ -389,6 +405,12 @@ enum {
EF10_STAT_COUNT
};
/* Maximum number of TX PIO buffers we may allocate to a function.
* This matches the total number of buffers on each SFC9100-family
* controller.
*/
#define EF10_TX_PIOBUF_COUNT 16
/**
* struct efx_ef10_nic_data - EF10 architecture NIC state
* @mcdi_buf: DMA buffer for MCDI
......@@ -397,6 +419,13 @@ enum {
* @n_allocated_vis: Number of VIs allocated to this function
* @must_realloc_vis: Flag: VIs have yet to be reallocated after MC reboot
* @must_restore_filters: Flag: filters have yet to be restored after MC reboot
* @n_piobufs: Number of PIO buffers allocated to this function
* @wc_membase: Base address of write-combining mapping of the memory BAR
* @pio_write_base: Base address for writing PIO buffers
* @pio_write_vi_base: Relative VI number for @pio_write_base
* @piobuf_handle: Handle of each PIO buffer allocated
* @must_restore_piobufs: Flag: PIO buffers have yet to be restored after MC
* reboot
* @rx_rss_context: Firmware handle for our RSS context
* @stats: Hardware statistics
* @workaround_35388: Flag: firmware supports workaround for bug 35388
......@@ -412,6 +441,11 @@ struct efx_ef10_nic_data {
unsigned int n_allocated_vis;
bool must_realloc_vis;
bool must_restore_filters;
unsigned int n_piobufs;
void __iomem *wc_membase, *pio_write_base;
unsigned int pio_write_vi_base;
unsigned int piobuf_handle[EF10_TX_PIOBUF_COUNT];
bool must_restore_piobufs;
u32 rx_rss_context;
u64 stats[EF10_STAT_COUNT];
bool workaround_35388;
......
......@@ -12,6 +12,7 @@
#include <linux/in.h>
#include <linux/slab.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/prefetch.h>
......@@ -818,44 +819,70 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
struct efx_nic *efx = netdev_priv(net_dev);
struct efx_channel *channel;
struct efx_filter_spec spec;
const struct iphdr *ip;
const __be16 *ports;
__be16 ether_type;
int nhoff;
int rc;
nhoff = skb_network_offset(skb);
/* The core RPS/RFS code has already parsed and validated
* VLAN, IP and transport headers. We assume they are in the
* header area.
*/
if (skb->protocol == htons(ETH_P_8021Q)) {
EFX_BUG_ON_PARANOID(skb_headlen(skb) <
nhoff + sizeof(struct vlan_hdr));
if (((const struct vlan_hdr *)skb->data + nhoff)->
h_vlan_encapsulated_proto != htons(ETH_P_IP))
return -EPROTONOSUPPORT;
const struct vlan_hdr *vh =
(const struct vlan_hdr *)skb->data;
/* This is IP over 802.1q VLAN. We can't filter on the
* IP 5-tuple and the vlan together, so just strip the
* vlan header and filter on the IP part.
/* We can't filter on the IP 5-tuple and the vlan
* together, so just strip the vlan header and filter
* on the IP part.
*/
nhoff += sizeof(struct vlan_hdr);
} else if (skb->protocol != htons(ETH_P_IP)) {
return -EPROTONOSUPPORT;
EFX_BUG_ON_PARANOID(skb_headlen(skb) < sizeof(*vh));
ether_type = vh->h_vlan_encapsulated_proto;
nhoff = sizeof(struct vlan_hdr);
} else {
ether_type = skb->protocol;
nhoff = 0;
}
/* RFS must validate the IP header length before calling us */
EFX_BUG_ON_PARANOID(skb_headlen(skb) < nhoff + sizeof(*ip));
ip = (const struct iphdr *)(skb->data + nhoff);
if (ip_is_fragment(ip))
if (ether_type != htons(ETH_P_IP) && ether_type != htons(ETH_P_IPV6))
return -EPROTONOSUPPORT;
EFX_BUG_ON_PARANOID(skb_headlen(skb) < nhoff + 4 * ip->ihl + 4);
ports = (const __be16 *)(skb->data + nhoff + 4 * ip->ihl);
efx_filter_init_rx(&spec, EFX_FILTER_PRI_HINT,
efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
rxq_index);
rc = efx_filter_set_ipv4_full(&spec, ip->protocol,
ip->daddr, ports[1], ip->saddr, ports[0]);
if (rc)
return rc;
spec.match_flags =
EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO |
EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT |
EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT;
spec.ether_type = ether_type;
if (ether_type == htons(ETH_P_IP)) {
const struct iphdr *ip =
(const struct iphdr *)(skb->data + nhoff);
EFX_BUG_ON_PARANOID(skb_headlen(skb) < nhoff + sizeof(*ip));
if (ip_is_fragment(ip))
return -EPROTONOSUPPORT;
spec.ip_proto = ip->protocol;
spec.rem_host[0] = ip->saddr;
spec.loc_host[0] = ip->daddr;
EFX_BUG_ON_PARANOID(skb_headlen(skb) < nhoff + 4 * ip->ihl + 4);
ports = (const __be16 *)(skb->data + nhoff + 4 * ip->ihl);
} else {
const struct ipv6hdr *ip6 =
(const struct ipv6hdr *)(skb->data + nhoff);
EFX_BUG_ON_PARANOID(skb_headlen(skb) <
nhoff + sizeof(*ip6) + 4);
spec.ip_proto = ip6->nexthdr;
memcpy(spec.rem_host, &ip6->saddr, sizeof(ip6->saddr));
memcpy(spec.loc_host, &ip6->daddr, sizeof(ip6->daddr));
ports = (const __be16 *)(ip6 + 1);
}
spec.rem_port = ports[0];
spec.loc_port = ports[1];
rc = efx->type->filter_rfs_insert(efx, &spec);
if (rc < 0)
......@@ -866,11 +893,18 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
channel = efx_get_channel(efx, skb_get_rx_queue(skb));
++channel->rfs_filters_added;
netif_info(efx, rx_status, efx->net_dev,
"steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d]\n",
(ip->protocol == IPPROTO_TCP) ? "TCP" : "UDP",
&ip->saddr, ntohs(ports[0]), &ip->daddr, ntohs(ports[1]),
rxq_index, flow_id, rc);
if (ether_type == htons(ETH_P_IP))
netif_info(efx, rx_status, efx->net_dev,
"steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d]\n",
(spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
spec.rem_host, ntohs(ports[0]), spec.loc_host,
ntohs(ports[1]), rxq_index, flow_id, rc);
else
netif_info(efx, rx_status, efx->net_dev,
"steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d]\n",
(spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
spec.rem_host, ntohs(ports[0]), spec.loc_host,
ntohs(ports[1]), rxq_index, flow_id, rc);
return rc;
}
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment