Commit e85fbf53 authored by David S. Miller's avatar David S. Miller

Merge branch 'gve-improvements'

Jeroen de Borst says:

====================
gve improvements

This patchset consists of unrelated changes:

A bug fix for an issue that disabled jumbo-frame support, a few code
improvements and minor funcitonal changes and 3 new features:
  Supporting tx|rx-coalesce-usec for DQO
  Suspend/resume/shutdown
  Optional metadata descriptors
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 75df1a24 6081ac20
...@@ -229,6 +229,7 @@ struct gve_rx_ring { ...@@ -229,6 +229,7 @@ struct gve_rx_ring {
/* A TX desc ring entry */ /* A TX desc ring entry */
union gve_tx_desc { union gve_tx_desc {
struct gve_tx_pkt_desc pkt; /* first desc for a packet */ struct gve_tx_pkt_desc pkt; /* first desc for a packet */
struct gve_tx_mtd_desc mtd; /* optional metadata descriptor */
struct gve_tx_seg_desc seg; /* subsequent descs for a packet */ struct gve_tx_seg_desc seg; /* subsequent descs for a packet */
}; };
...@@ -441,13 +442,13 @@ struct gve_tx_ring { ...@@ -441,13 +442,13 @@ struct gve_tx_ring {
* associated with that irq. * associated with that irq.
*/ */
struct gve_notify_block { struct gve_notify_block {
__be32 irq_db_index; /* idx into Bar2 - set by device, must be 1st */ __be32 *irq_db_index; /* pointer to idx into Bar2 */
char name[IFNAMSIZ + 16]; /* name registered with the kernel */ char name[IFNAMSIZ + 16]; /* name registered with the kernel */
struct napi_struct napi; /* kernel napi struct for this block */ struct napi_struct napi; /* kernel napi struct for this block */
struct gve_priv *priv; struct gve_priv *priv;
struct gve_tx_ring *tx; /* tx rings on this block */ struct gve_tx_ring *tx; /* tx rings on this block */
struct gve_rx_ring *rx; /* rx rings on this block */ struct gve_rx_ring *rx; /* rx rings on this block */
} ____cacheline_aligned; };
/* Tracks allowed and current queue settings */ /* Tracks allowed and current queue settings */
struct gve_queue_config { struct gve_queue_config {
...@@ -466,6 +467,10 @@ struct gve_options_dqo_rda { ...@@ -466,6 +467,10 @@ struct gve_options_dqo_rda {
u16 rx_buff_ring_entries; /* number of rx_buff descriptors */ u16 rx_buff_ring_entries; /* number of rx_buff descriptors */
}; };
struct gve_irq_db {
__be32 index;
} ____cacheline_aligned;
struct gve_ptype { struct gve_ptype {
u8 l3_type; /* `gve_l3_type` in gve_adminq.h */ u8 l3_type; /* `gve_l3_type` in gve_adminq.h */
u8 l4_type; /* `gve_l4_type` in gve_adminq.h */ u8 l4_type; /* `gve_l4_type` in gve_adminq.h */
...@@ -492,7 +497,8 @@ struct gve_priv { ...@@ -492,7 +497,8 @@ struct gve_priv {
struct gve_rx_ring *rx; /* array of rx_cfg.num_queues */ struct gve_rx_ring *rx; /* array of rx_cfg.num_queues */
struct gve_queue_page_list *qpls; /* array of num qpls */ struct gve_queue_page_list *qpls; /* array of num qpls */
struct gve_notify_block *ntfy_blocks; /* array of num_ntfy_blks */ struct gve_notify_block *ntfy_blocks; /* array of num_ntfy_blks */
dma_addr_t ntfy_block_bus; struct gve_irq_db *irq_db_indices; /* array of num_ntfy_blks */
dma_addr_t irq_db_indices_bus;
struct msix_entry *msix_vectors; /* array of num_ntfy_blks + 1 */ struct msix_entry *msix_vectors; /* array of num_ntfy_blks + 1 */
char mgmt_msix_name[IFNAMSIZ + 16]; char mgmt_msix_name[IFNAMSIZ + 16];
u32 mgmt_msix_idx; u32 mgmt_msix_idx;
...@@ -551,6 +557,8 @@ struct gve_priv { ...@@ -551,6 +557,8 @@ struct gve_priv {
u32 page_alloc_fail; /* count of page alloc fails */ u32 page_alloc_fail; /* count of page alloc fails */
u32 dma_mapping_error; /* count of dma mapping errors */ u32 dma_mapping_error; /* count of dma mapping errors */
u32 stats_report_trigger_cnt; /* count of device-requested stats-reports since last reset */ u32 stats_report_trigger_cnt; /* count of device-requested stats-reports since last reset */
u32 suspend_cnt; /* count of times suspended */
u32 resume_cnt; /* count of times resumed */
struct workqueue_struct *gve_wq; struct workqueue_struct *gve_wq;
struct work_struct service_task; struct work_struct service_task;
struct work_struct stats_report_task; struct work_struct stats_report_task;
...@@ -567,6 +575,7 @@ struct gve_priv { ...@@ -567,6 +575,7 @@ struct gve_priv {
/* Gvnic device link speed from hypervisor. */ /* Gvnic device link speed from hypervisor. */
u64 link_speed; u64 link_speed;
bool up_before_suspend; /* True if dev was up before suspend */
struct gve_options_dqo_rda options_dqo_rda; struct gve_options_dqo_rda options_dqo_rda;
struct gve_ptype_lut *ptype_lut_dqo; struct gve_ptype_lut *ptype_lut_dqo;
...@@ -575,6 +584,10 @@ struct gve_priv { ...@@ -575,6 +584,10 @@ struct gve_priv {
int data_buffer_size_dqo; int data_buffer_size_dqo;
enum gve_queue_format queue_format; enum gve_queue_format queue_format;
/* Interrupt coalescing settings */
u32 tx_coalesce_usecs;
u32 rx_coalesce_usecs;
}; };
enum gve_service_task_flags_bit { enum gve_service_task_flags_bit {
...@@ -733,7 +746,7 @@ static inline void gve_clear_report_stats(struct gve_priv *priv) ...@@ -733,7 +746,7 @@ static inline void gve_clear_report_stats(struct gve_priv *priv)
static inline __be32 __iomem *gve_irq_doorbell(struct gve_priv *priv, static inline __be32 __iomem *gve_irq_doorbell(struct gve_priv *priv,
struct gve_notify_block *block) struct gve_notify_block *block)
{ {
return &priv->db_bar2[be32_to_cpu(block->irq_db_index)]; return &priv->db_bar2[be32_to_cpu(*block->irq_db_index)];
} }
/* Returns the index into ntfy_blocks of the given tx ring's block /* Returns the index into ntfy_blocks of the given tx ring's block
......
...@@ -462,7 +462,7 @@ int gve_adminq_configure_device_resources(struct gve_priv *priv, ...@@ -462,7 +462,7 @@ int gve_adminq_configure_device_resources(struct gve_priv *priv,
.num_counters = cpu_to_be32(num_counters), .num_counters = cpu_to_be32(num_counters),
.irq_db_addr = cpu_to_be64(db_array_bus_addr), .irq_db_addr = cpu_to_be64(db_array_bus_addr),
.num_irq_dbs = cpu_to_be32(num_ntfy_blks), .num_irq_dbs = cpu_to_be32(num_ntfy_blks),
.irq_db_stride = cpu_to_be32(sizeof(priv->ntfy_blocks[0])), .irq_db_stride = cpu_to_be32(sizeof(*priv->irq_db_indices)),
.ntfy_blk_msix_base_idx = .ntfy_blk_msix_base_idx =
cpu_to_be32(GVE_NTFY_BLK_BASE_MSIX_IDX), cpu_to_be32(GVE_NTFY_BLK_BASE_MSIX_IDX),
.queue_format = priv->queue_format, .queue_format = priv->queue_format,
...@@ -738,10 +738,7 @@ int gve_adminq_describe_device(struct gve_priv *priv) ...@@ -738,10 +738,7 @@ int gve_adminq_describe_device(struct gve_priv *priv)
* is not set to GqiRda, choose the queue format in a priority order: * is not set to GqiRda, choose the queue format in a priority order:
* DqoRda, GqiRda, GqiQpl. Use GqiQpl as default. * DqoRda, GqiRda, GqiQpl. Use GqiQpl as default.
*/ */
if (priv->queue_format == GVE_GQI_RDA_FORMAT) { if (dev_op_dqo_rda) {
dev_info(&priv->pdev->dev,
"Driver is running with GQI RDA queue format.\n");
} else if (dev_op_dqo_rda) {
priv->queue_format = GVE_DQO_RDA_FORMAT; priv->queue_format = GVE_DQO_RDA_FORMAT;
dev_info(&priv->pdev->dev, dev_info(&priv->pdev->dev,
"Driver is running with DQO RDA queue format.\n"); "Driver is running with DQO RDA queue format.\n");
...@@ -753,6 +750,9 @@ int gve_adminq_describe_device(struct gve_priv *priv) ...@@ -753,6 +750,9 @@ int gve_adminq_describe_device(struct gve_priv *priv)
"Driver is running with GQI RDA queue format.\n"); "Driver is running with GQI RDA queue format.\n");
supported_features_mask = supported_features_mask =
be32_to_cpu(dev_op_gqi_rda->supported_features_mask); be32_to_cpu(dev_op_gqi_rda->supported_features_mask);
} else if (priv->queue_format == GVE_GQI_RDA_FORMAT) {
dev_info(&priv->pdev->dev,
"Driver is running with GQI RDA queue format.\n");
} else { } else {
priv->queue_format = GVE_GQI_QPL_FORMAT; priv->queue_format = GVE_GQI_QPL_FORMAT;
if (dev_op_gqi_qpl) if (dev_op_gqi_qpl)
......
...@@ -33,6 +33,14 @@ struct gve_tx_pkt_desc { ...@@ -33,6 +33,14 @@ struct gve_tx_pkt_desc {
__be64 seg_addr; /* Base address (see note) of this segment */ __be64 seg_addr; /* Base address (see note) of this segment */
} __packed; } __packed;
struct gve_tx_mtd_desc {
u8 type_flags; /* type is lower 4 bits, subtype upper */
u8 path_state; /* state is lower 4 bits, hash type upper */
__be16 reserved0;
__be32 path_hash;
__be64 reserved1;
} __packed;
struct gve_tx_seg_desc { struct gve_tx_seg_desc {
u8 type_flags; /* type is lower 4 bits, flags upper */ u8 type_flags; /* type is lower 4 bits, flags upper */
u8 l3_offset; /* TSO: 2 byte units to start of IPH */ u8 l3_offset; /* TSO: 2 byte units to start of IPH */
...@@ -46,6 +54,7 @@ struct gve_tx_seg_desc { ...@@ -46,6 +54,7 @@ struct gve_tx_seg_desc {
#define GVE_TXD_STD (0x0 << 4) /* Std with Host Address */ #define GVE_TXD_STD (0x0 << 4) /* Std with Host Address */
#define GVE_TXD_TSO (0x1 << 4) /* TSO with Host Address */ #define GVE_TXD_TSO (0x1 << 4) /* TSO with Host Address */
#define GVE_TXD_SEG (0x2 << 4) /* Seg with Host Address */ #define GVE_TXD_SEG (0x2 << 4) /* Seg with Host Address */
#define GVE_TXD_MTD (0x3 << 4) /* Metadata */
/* GVE Transmit Descriptor Flags for Std Pkts */ /* GVE Transmit Descriptor Flags for Std Pkts */
#define GVE_TXF_L4CSUM BIT(0) /* Need csum offload */ #define GVE_TXF_L4CSUM BIT(0) /* Need csum offload */
...@@ -54,6 +63,17 @@ struct gve_tx_seg_desc { ...@@ -54,6 +63,17 @@ struct gve_tx_seg_desc {
/* GVE Transmit Descriptor Flags for TSO Segs */ /* GVE Transmit Descriptor Flags for TSO Segs */
#define GVE_TXSF_IPV6 BIT(1) /* IPv6 TSO */ #define GVE_TXSF_IPV6 BIT(1) /* IPv6 TSO */
/* GVE Transmit Descriptor Options for MTD Segs */
#define GVE_MTD_SUBTYPE_PATH 0
#define GVE_MTD_PATH_STATE_DEFAULT 0
#define GVE_MTD_PATH_STATE_TIMEOUT 1
#define GVE_MTD_PATH_STATE_CONGESTION 2
#define GVE_MTD_PATH_STATE_RETRANSMIT 3
#define GVE_MTD_PATH_HASH_NONE (0x0 << 4)
#define GVE_MTD_PATH_HASH_L4 (0x1 << 4)
/* GVE Receive Packet Descriptor */ /* GVE Receive Packet Descriptor */
/* The start of an ethernet packet comes 2 bytes into the rx buffer. /* The start of an ethernet packet comes 2 bytes into the rx buffer.
* gVNIC adds this padding so that both the DMA and the L3/4 protocol header * gVNIC adds this padding so that both the DMA and the L3/4 protocol header
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#define GVE_TX_IRQ_RATELIMIT_US_DQO 50 #define GVE_TX_IRQ_RATELIMIT_US_DQO 50
#define GVE_RX_IRQ_RATELIMIT_US_DQO 20 #define GVE_RX_IRQ_RATELIMIT_US_DQO 20
#define GVE_MAX_ITR_INTERVAL_DQO (GVE_ITR_INTERVAL_DQO_MASK * 2)
/* Timeout in seconds to wait for a reinjection completion after receiving /* Timeout in seconds to wait for a reinjection completion after receiving
* its corresponding miss completion. * its corresponding miss completion.
...@@ -54,17 +55,17 @@ gve_tx_put_doorbell_dqo(const struct gve_priv *priv, ...@@ -54,17 +55,17 @@ gve_tx_put_doorbell_dqo(const struct gve_priv *priv,
} }
/* Builds register value to write to DQO IRQ doorbell to enable with specified /* Builds register value to write to DQO IRQ doorbell to enable with specified
* ratelimit. * ITR interval.
*/ */
static inline u32 gve_set_itr_ratelimit_dqo(u32 ratelimit_us) static inline u32 gve_setup_itr_interval_dqo(u32 interval_us)
{ {
u32 result = GVE_ITR_ENABLE_BIT_DQO; u32 result = GVE_ITR_ENABLE_BIT_DQO;
/* Interval has 2us granularity. */ /* Interval has 2us granularity. */
ratelimit_us >>= 1; interval_us >>= 1;
ratelimit_us &= GVE_ITR_INTERVAL_DQO_MASK; interval_us &= GVE_ITR_INTERVAL_DQO_MASK;
result |= (ratelimit_us << GVE_ITR_INTERVAL_DQO_SHIFT); result |= (interval_us << GVE_ITR_INTERVAL_DQO_SHIFT);
return result; return result;
} }
...@@ -73,9 +74,20 @@ static inline void ...@@ -73,9 +74,20 @@ static inline void
gve_write_irq_doorbell_dqo(const struct gve_priv *priv, gve_write_irq_doorbell_dqo(const struct gve_priv *priv,
const struct gve_notify_block *block, u32 val) const struct gve_notify_block *block, u32 val)
{ {
u32 index = be32_to_cpu(block->irq_db_index); u32 index = be32_to_cpu(*block->irq_db_index);
iowrite32(val, &priv->db_bar2[index]); iowrite32(val, &priv->db_bar2[index]);
} }
/* Sets interrupt throttling interval and enables interrupt
* by writing to IRQ doorbell.
*/
static inline void
gve_set_itr_coalesce_usecs_dqo(struct gve_priv *priv,
struct gve_notify_block *block,
u32 usecs)
{
gve_write_irq_doorbell_dqo(priv, block,
gve_setup_itr_interval_dqo(usecs));
}
#endif /* _GVE_DQO_H_ */ #endif /* _GVE_DQO_H_ */
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <linux/rtnetlink.h> #include <linux/rtnetlink.h>
#include "gve.h" #include "gve.h"
#include "gve_adminq.h" #include "gve_adminq.h"
#include "gve_dqo.h"
static void gve_get_drvinfo(struct net_device *netdev, static void gve_get_drvinfo(struct net_device *netdev,
struct ethtool_drvinfo *info) struct ethtool_drvinfo *info)
...@@ -42,7 +43,7 @@ static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = { ...@@ -42,7 +43,7 @@ static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = {
}; };
static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = { static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = {
"rx_posted_desc[%u]", "rx_completed_desc[%u]", "rx_bytes[%u]", "rx_posted_desc[%u]", "rx_completed_desc[%u]", "rx_consumed_desc[%u]", "rx_bytes[%u]",
"rx_cont_packet_cnt[%u]", "rx_frag_flip_cnt[%u]", "rx_frag_copy_cnt[%u]", "rx_cont_packet_cnt[%u]", "rx_frag_flip_cnt[%u]", "rx_frag_copy_cnt[%u]",
"rx_dropped_pkt[%u]", "rx_copybreak_pkt[%u]", "rx_copied_pkt[%u]", "rx_dropped_pkt[%u]", "rx_copybreak_pkt[%u]", "rx_copied_pkt[%u]",
"rx_queue_drop_cnt[%u]", "rx_no_buffers_posted[%u]", "rx_queue_drop_cnt[%u]", "rx_no_buffers_posted[%u]",
...@@ -50,7 +51,7 @@ static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = { ...@@ -50,7 +51,7 @@ static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = {
}; };
static const char gve_gstrings_tx_stats[][ETH_GSTRING_LEN] = { static const char gve_gstrings_tx_stats[][ETH_GSTRING_LEN] = {
"tx_posted_desc[%u]", "tx_completed_desc[%u]", "tx_bytes[%u]", "tx_posted_desc[%u]", "tx_completed_desc[%u]", "tx_consumed_desc[%u]", "tx_bytes[%u]",
"tx_wake[%u]", "tx_stop[%u]", "tx_event_counter[%u]", "tx_wake[%u]", "tx_stop[%u]", "tx_event_counter[%u]",
"tx_dma_mapping_error[%u]", "tx_dma_mapping_error[%u]",
}; };
...@@ -139,10 +140,11 @@ static void ...@@ -139,10 +140,11 @@ static void
gve_get_ethtool_stats(struct net_device *netdev, gve_get_ethtool_stats(struct net_device *netdev,
struct ethtool_stats *stats, u64 *data) struct ethtool_stats *stats, u64 *data)
{ {
u64 tmp_rx_pkts, tmp_rx_bytes, tmp_rx_skb_alloc_fail, tmp_rx_buf_alloc_fail, u64 tmp_rx_pkts, tmp_rx_bytes, tmp_rx_skb_alloc_fail,
tmp_rx_desc_err_dropped_pkt, tmp_tx_pkts, tmp_tx_bytes; tmp_rx_buf_alloc_fail, tmp_rx_desc_err_dropped_pkt,
tmp_tx_pkts, tmp_tx_bytes;
u64 rx_buf_alloc_fail, rx_desc_err_dropped_pkt, rx_pkts, u64 rx_buf_alloc_fail, rx_desc_err_dropped_pkt, rx_pkts,
rx_skb_alloc_fail, rx_bytes, tx_pkts, tx_bytes; rx_skb_alloc_fail, rx_bytes, tx_pkts, tx_bytes, tx_dropped;
int stats_idx, base_stats_idx, max_stats_idx; int stats_idx, base_stats_idx, max_stats_idx;
struct stats *report_stats; struct stats *report_stats;
int *rx_qid_to_stats_idx; int *rx_qid_to_stats_idx;
...@@ -191,7 +193,7 @@ gve_get_ethtool_stats(struct net_device *netdev, ...@@ -191,7 +193,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
rx_desc_err_dropped_pkt += tmp_rx_desc_err_dropped_pkt; rx_desc_err_dropped_pkt += tmp_rx_desc_err_dropped_pkt;
} }
} }
for (tx_pkts = 0, tx_bytes = 0, ring = 0; for (tx_pkts = 0, tx_bytes = 0, tx_dropped = 0, ring = 0;
ring < priv->tx_cfg.num_queues; ring++) { ring < priv->tx_cfg.num_queues; ring++) {
if (priv->tx) { if (priv->tx) {
do { do {
...@@ -203,6 +205,7 @@ gve_get_ethtool_stats(struct net_device *netdev, ...@@ -203,6 +205,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
start)); start));
tx_pkts += tmp_tx_pkts; tx_pkts += tmp_tx_pkts;
tx_bytes += tmp_tx_bytes; tx_bytes += tmp_tx_bytes;
tx_dropped += priv->tx[ring].dropped_pkt;
} }
} }
...@@ -214,9 +217,7 @@ gve_get_ethtool_stats(struct net_device *netdev, ...@@ -214,9 +217,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
/* total rx dropped packets */ /* total rx dropped packets */
data[i++] = rx_skb_alloc_fail + rx_buf_alloc_fail + data[i++] = rx_skb_alloc_fail + rx_buf_alloc_fail +
rx_desc_err_dropped_pkt; rx_desc_err_dropped_pkt;
/* Skip tx_dropped */ data[i++] = tx_dropped;
i++;
data[i++] = priv->tx_timeo_cnt; data[i++] = priv->tx_timeo_cnt;
data[i++] = rx_skb_alloc_fail; data[i++] = rx_skb_alloc_fail;
data[i++] = rx_buf_alloc_fail; data[i++] = rx_buf_alloc_fail;
...@@ -255,6 +256,7 @@ gve_get_ethtool_stats(struct net_device *netdev, ...@@ -255,6 +256,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
data[i++] = rx->fill_cnt; data[i++] = rx->fill_cnt;
data[i++] = rx->cnt; data[i++] = rx->cnt;
data[i++] = rx->fill_cnt - rx->cnt;
do { do {
start = start =
u64_stats_fetch_begin(&priv->rx[ring].statss); u64_stats_fetch_begin(&priv->rx[ring].statss);
...@@ -318,12 +320,14 @@ gve_get_ethtool_stats(struct net_device *netdev, ...@@ -318,12 +320,14 @@ gve_get_ethtool_stats(struct net_device *netdev,
if (gve_is_gqi(priv)) { if (gve_is_gqi(priv)) {
data[i++] = tx->req; data[i++] = tx->req;
data[i++] = tx->done; data[i++] = tx->done;
data[i++] = tx->req - tx->done;
} else { } else {
/* DQO doesn't currently support /* DQO doesn't currently support
* posted/completed descriptor counts; * posted/completed descriptor counts;
*/ */
data[i++] = 0; data[i++] = 0;
data[i++] = 0; data[i++] = 0;
data[i++] = tx->dqo_tx.tail - tx->dqo_tx.head;
} }
do { do {
start = start =
...@@ -537,7 +541,65 @@ static int gve_get_link_ksettings(struct net_device *netdev, ...@@ -537,7 +541,65 @@ static int gve_get_link_ksettings(struct net_device *netdev,
return err; return err;
} }
static int gve_get_coalesce(struct net_device *netdev,
struct ethtool_coalesce *ec,
struct kernel_ethtool_coalesce *kernel_ec,
struct netlink_ext_ack *extack)
{
struct gve_priv *priv = netdev_priv(netdev);
if (gve_is_gqi(priv))
return -EOPNOTSUPP;
ec->tx_coalesce_usecs = priv->tx_coalesce_usecs;
ec->rx_coalesce_usecs = priv->rx_coalesce_usecs;
return 0;
}
static int gve_set_coalesce(struct net_device *netdev,
struct ethtool_coalesce *ec,
struct kernel_ethtool_coalesce *kernel_ec,
struct netlink_ext_ack *extack)
{
struct gve_priv *priv = netdev_priv(netdev);
u32 tx_usecs_orig = priv->tx_coalesce_usecs;
u32 rx_usecs_orig = priv->rx_coalesce_usecs;
int idx;
if (gve_is_gqi(priv))
return -EOPNOTSUPP;
if (ec->tx_coalesce_usecs > GVE_MAX_ITR_INTERVAL_DQO ||
ec->rx_coalesce_usecs > GVE_MAX_ITR_INTERVAL_DQO)
return -EINVAL;
priv->tx_coalesce_usecs = ec->tx_coalesce_usecs;
priv->rx_coalesce_usecs = ec->rx_coalesce_usecs;
if (tx_usecs_orig != priv->tx_coalesce_usecs) {
for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
gve_set_itr_coalesce_usecs_dqo(priv, block,
priv->tx_coalesce_usecs);
}
}
if (rx_usecs_orig != priv->rx_coalesce_usecs) {
for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
gve_set_itr_coalesce_usecs_dqo(priv, block,
priv->rx_coalesce_usecs);
}
}
return 0;
}
const struct ethtool_ops gve_ethtool_ops = { const struct ethtool_ops gve_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS,
.get_drvinfo = gve_get_drvinfo, .get_drvinfo = gve_get_drvinfo,
.get_strings = gve_get_strings, .get_strings = gve_get_strings,
.get_sset_count = gve_get_sset_count, .get_sset_count = gve_get_sset_count,
...@@ -547,6 +609,8 @@ const struct ethtool_ops gve_ethtool_ops = { ...@@ -547,6 +609,8 @@ const struct ethtool_ops gve_ethtool_ops = {
.set_channels = gve_set_channels, .set_channels = gve_set_channels,
.get_channels = gve_get_channels, .get_channels = gve_get_channels,
.get_link = ethtool_op_get_link, .get_link = ethtool_op_get_link,
.get_coalesce = gve_get_coalesce,
.set_coalesce = gve_set_coalesce,
.get_ringparam = gve_get_ringparam, .get_ringparam = gve_get_ringparam,
.reset = gve_user_reset, .reset = gve_user_reset,
.get_tunable = gve_get_tunable, .get_tunable = gve_get_tunable,
......
...@@ -334,15 +334,23 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) ...@@ -334,15 +334,23 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
goto abort_with_msix_enabled; goto abort_with_msix_enabled;
} }
priv->ntfy_blocks = priv->irq_db_indices =
dma_alloc_coherent(&priv->pdev->dev, dma_alloc_coherent(&priv->pdev->dev,
priv->num_ntfy_blks * priv->num_ntfy_blks *
sizeof(*priv->ntfy_blocks), sizeof(*priv->irq_db_indices),
&priv->ntfy_block_bus, GFP_KERNEL); &priv->irq_db_indices_bus, GFP_KERNEL);
if (!priv->ntfy_blocks) { if (!priv->irq_db_indices) {
err = -ENOMEM; err = -ENOMEM;
goto abort_with_mgmt_vector; goto abort_with_mgmt_vector;
} }
priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks *
sizeof(*priv->ntfy_blocks), GFP_KERNEL);
if (!priv->ntfy_blocks) {
err = -ENOMEM;
goto abort_with_irq_db_indices;
}
/* Setup the other blocks - the first n-1 vectors */ /* Setup the other blocks - the first n-1 vectors */
for (i = 0; i < priv->num_ntfy_blks; i++) { for (i = 0; i < priv->num_ntfy_blks; i++) {
struct gve_notify_block *block = &priv->ntfy_blocks[i]; struct gve_notify_block *block = &priv->ntfy_blocks[i];
...@@ -361,6 +369,7 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) ...@@ -361,6 +369,7 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
} }
irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
get_cpu_mask(i % active_cpus)); get_cpu_mask(i % active_cpus));
block->irq_db_index = &priv->irq_db_indices[i].index;
} }
return 0; return 0;
abort_with_some_ntfy_blocks: abort_with_some_ntfy_blocks:
...@@ -372,10 +381,13 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) ...@@ -372,10 +381,13 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
NULL); NULL);
free_irq(priv->msix_vectors[msix_idx].vector, block); free_irq(priv->msix_vectors[msix_idx].vector, block);
} }
dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * kvfree(priv->ntfy_blocks);
sizeof(*priv->ntfy_blocks),
priv->ntfy_blocks, priv->ntfy_block_bus);
priv->ntfy_blocks = NULL; priv->ntfy_blocks = NULL;
abort_with_irq_db_indices:
dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
sizeof(*priv->irq_db_indices),
priv->irq_db_indices, priv->irq_db_indices_bus);
priv->irq_db_indices = NULL;
abort_with_mgmt_vector: abort_with_mgmt_vector:
free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
abort_with_msix_enabled: abort_with_msix_enabled:
...@@ -403,10 +415,12 @@ static void gve_free_notify_blocks(struct gve_priv *priv) ...@@ -403,10 +415,12 @@ static void gve_free_notify_blocks(struct gve_priv *priv)
free_irq(priv->msix_vectors[msix_idx].vector, block); free_irq(priv->msix_vectors[msix_idx].vector, block);
} }
free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
dma_free_coherent(&priv->pdev->dev, kvfree(priv->ntfy_blocks);
priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks),
priv->ntfy_blocks, priv->ntfy_block_bus);
priv->ntfy_blocks = NULL; priv->ntfy_blocks = NULL;
dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
sizeof(*priv->irq_db_indices),
priv->irq_db_indices, priv->irq_db_indices_bus);
priv->irq_db_indices = NULL;
pci_disable_msix(priv->pdev); pci_disable_msix(priv->pdev);
kvfree(priv->msix_vectors); kvfree(priv->msix_vectors);
priv->msix_vectors = NULL; priv->msix_vectors = NULL;
...@@ -428,7 +442,7 @@ static int gve_setup_device_resources(struct gve_priv *priv) ...@@ -428,7 +442,7 @@ static int gve_setup_device_resources(struct gve_priv *priv)
err = gve_adminq_configure_device_resources(priv, err = gve_adminq_configure_device_resources(priv,
priv->counter_array_bus, priv->counter_array_bus,
priv->num_event_counters, priv->num_event_counters,
priv->ntfy_block_bus, priv->irq_db_indices_bus,
priv->num_ntfy_blks); priv->num_ntfy_blks);
if (unlikely(err)) { if (unlikely(err)) {
dev_err(&priv->pdev->dev, dev_err(&priv->pdev->dev,
...@@ -817,8 +831,7 @@ void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, ...@@ -817,8 +831,7 @@ void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
put_page(page); put_page(page);
} }
static void gve_free_queue_page_list(struct gve_priv *priv, static void gve_free_queue_page_list(struct gve_priv *priv, u32 id)
int id)
{ {
struct gve_queue_page_list *qpl = &priv->qpls[id]; struct gve_queue_page_list *qpl = &priv->qpls[id];
int i; int i;
...@@ -1100,9 +1113,8 @@ static void gve_turnup(struct gve_priv *priv) ...@@ -1100,9 +1113,8 @@ static void gve_turnup(struct gve_priv *priv)
if (gve_is_gqi(priv)) { if (gve_is_gqi(priv)) {
iowrite32be(0, gve_irq_doorbell(priv, block)); iowrite32be(0, gve_irq_doorbell(priv, block));
} else { } else {
u32 val = gve_set_itr_ratelimit_dqo(GVE_TX_IRQ_RATELIMIT_US_DQO); gve_set_itr_coalesce_usecs_dqo(priv, block,
priv->tx_coalesce_usecs);
gve_write_irq_doorbell_dqo(priv, block, val);
} }
} }
for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
...@@ -1113,9 +1125,8 @@ static void gve_turnup(struct gve_priv *priv) ...@@ -1113,9 +1125,8 @@ static void gve_turnup(struct gve_priv *priv)
if (gve_is_gqi(priv)) { if (gve_is_gqi(priv)) {
iowrite32be(0, gve_irq_doorbell(priv, block)); iowrite32be(0, gve_irq_doorbell(priv, block));
} else { } else {
u32 val = gve_set_itr_ratelimit_dqo(GVE_RX_IRQ_RATELIMIT_US_DQO); gve_set_itr_coalesce_usecs_dqo(priv, block,
priv->rx_coalesce_usecs);
gve_write_irq_doorbell_dqo(priv, block, val);
} }
} }
...@@ -1412,6 +1423,11 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) ...@@ -1412,6 +1423,11 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
if (!gve_is_gqi(priv)) {
priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO;
priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO;
}
setup_device: setup_device:
err = gve_setup_device_resources(priv); err = gve_setup_device_resources(priv);
if (!err) if (!err)
...@@ -1663,6 +1679,58 @@ static void gve_remove(struct pci_dev *pdev) ...@@ -1663,6 +1679,58 @@ static void gve_remove(struct pci_dev *pdev)
pci_disable_device(pdev); pci_disable_device(pdev);
} }
static void gve_shutdown(struct pci_dev *pdev)
{
struct net_device *netdev = pci_get_drvdata(pdev);
struct gve_priv *priv = netdev_priv(netdev);
bool was_up = netif_carrier_ok(priv->dev);
rtnl_lock();
if (was_up && gve_close(priv->dev)) {
/* If the dev was up, attempt to close, if close fails, reset */
gve_reset_and_teardown(priv, was_up);
} else {
/* If the dev wasn't up or close worked, finish tearing down */
gve_teardown_priv_resources(priv);
}
rtnl_unlock();
}
#ifdef CONFIG_PM
static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
{
struct net_device *netdev = pci_get_drvdata(pdev);
struct gve_priv *priv = netdev_priv(netdev);
bool was_up = netif_carrier_ok(priv->dev);
priv->suspend_cnt++;
rtnl_lock();
if (was_up && gve_close(priv->dev)) {
/* If the dev was up, attempt to close, if close fails, reset */
gve_reset_and_teardown(priv, was_up);
} else {
/* If the dev wasn't up or close worked, finish tearing down */
gve_teardown_priv_resources(priv);
}
priv->up_before_suspend = was_up;
rtnl_unlock();
return 0;
}
static int gve_resume(struct pci_dev *pdev)
{
struct net_device *netdev = pci_get_drvdata(pdev);
struct gve_priv *priv = netdev_priv(netdev);
int err;
priv->resume_cnt++;
rtnl_lock();
err = gve_reset_recovery(priv, priv->up_before_suspend);
rtnl_unlock();
return err;
}
#endif /* CONFIG_PM */
static const struct pci_device_id gve_id_table[] = { static const struct pci_device_id gve_id_table[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
{ } { }
...@@ -1673,6 +1741,11 @@ static struct pci_driver gvnic_driver = { ...@@ -1673,6 +1741,11 @@ static struct pci_driver gvnic_driver = {
.id_table = gve_id_table, .id_table = gve_id_table,
.probe = gve_probe, .probe = gve_probe,
.remove = gve_remove, .remove = gve_remove,
.shutdown = gve_shutdown,
#ifdef CONFIG_PM
.suspend = gve_suspend,
.resume = gve_resume,
#endif
}; };
module_pci_driver(gvnic_driver); module_pci_driver(gvnic_driver);
......
...@@ -639,8 +639,6 @@ bool gve_rx_work_pending(struct gve_rx_ring *rx) ...@@ -639,8 +639,6 @@ bool gve_rx_work_pending(struct gve_rx_ring *rx)
desc = rx->desc.desc_ring + next_idx; desc = rx->desc.desc_ring + next_idx;
flags_seq = desc->flags_seq; flags_seq = desc->flags_seq;
/* Make sure we have synchronized the seq no with the device */
smp_rmb();
return (GVE_SEQNO(flags_seq) == rx->desc.seqno); return (GVE_SEQNO(flags_seq) == rx->desc.seqno);
} }
......
...@@ -296,11 +296,14 @@ static inline int gve_skb_fifo_bytes_required(struct gve_tx_ring *tx, ...@@ -296,11 +296,14 @@ static inline int gve_skb_fifo_bytes_required(struct gve_tx_ring *tx,
return bytes; return bytes;
} }
/* The most descriptors we could need is MAX_SKB_FRAGS + 3 : 1 for each skb frag, /* The most descriptors we could need is MAX_SKB_FRAGS + 4 :
* +1 for the skb linear portion, +1 for when tcp hdr needs to be in separate descriptor, * 1 for each skb frag
* and +1 if the payload wraps to the beginning of the FIFO. * 1 for the skb linear portion
* 1 for when tcp hdr needs to be in separate descriptor
* 1 if the payload wraps to the beginning of the FIFO
* 1 for metadata descriptor
*/ */
#define MAX_TX_DESC_NEEDED (MAX_SKB_FRAGS + 3) #define MAX_TX_DESC_NEEDED (MAX_SKB_FRAGS + 4)
static void gve_tx_unmap_buf(struct device *dev, struct gve_tx_buffer_state *info) static void gve_tx_unmap_buf(struct device *dev, struct gve_tx_buffer_state *info)
{ {
if (info->skb) { if (info->skb) {
...@@ -395,6 +398,19 @@ static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc, ...@@ -395,6 +398,19 @@ static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc,
pkt_desc->pkt.seg_addr = cpu_to_be64(addr); pkt_desc->pkt.seg_addr = cpu_to_be64(addr);
} }
static void gve_tx_fill_mtd_desc(union gve_tx_desc *mtd_desc,
struct sk_buff *skb)
{
BUILD_BUG_ON(sizeof(mtd_desc->mtd) != sizeof(mtd_desc->pkt));
mtd_desc->mtd.type_flags = GVE_TXD_MTD | GVE_MTD_SUBTYPE_PATH;
mtd_desc->mtd.path_state = GVE_MTD_PATH_STATE_DEFAULT |
GVE_MTD_PATH_HASH_L4;
mtd_desc->mtd.path_hash = cpu_to_be32(skb->hash);
mtd_desc->mtd.reserved0 = 0;
mtd_desc->mtd.reserved1 = 0;
}
static void gve_tx_fill_seg_desc(union gve_tx_desc *seg_desc, static void gve_tx_fill_seg_desc(union gve_tx_desc *seg_desc,
struct sk_buff *skb, bool is_gso, struct sk_buff *skb, bool is_gso,
u16 len, u64 addr) u16 len, u64 addr)
...@@ -426,6 +442,7 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st ...@@ -426,6 +442,7 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st
int pad_bytes, hlen, hdr_nfrags, payload_nfrags, l4_hdr_offset; int pad_bytes, hlen, hdr_nfrags, payload_nfrags, l4_hdr_offset;
union gve_tx_desc *pkt_desc, *seg_desc; union gve_tx_desc *pkt_desc, *seg_desc;
struct gve_tx_buffer_state *info; struct gve_tx_buffer_state *info;
int mtd_desc_nr = !!skb->l4_hash;
bool is_gso = skb_is_gso(skb); bool is_gso = skb_is_gso(skb);
u32 idx = tx->req & tx->mask; u32 idx = tx->req & tx->mask;
int payload_iov = 2; int payload_iov = 2;
...@@ -457,7 +474,7 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st ...@@ -457,7 +474,7 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st
&info->iov[payload_iov]); &info->iov[payload_iov]);
gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset, gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
1 + payload_nfrags, hlen, 1 + mtd_desc_nr + payload_nfrags, hlen,
info->iov[hdr_nfrags - 1].iov_offset); info->iov[hdr_nfrags - 1].iov_offset);
skb_copy_bits(skb, 0, skb_copy_bits(skb, 0,
...@@ -468,8 +485,13 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st ...@@ -468,8 +485,13 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st
info->iov[hdr_nfrags - 1].iov_len); info->iov[hdr_nfrags - 1].iov_len);
copy_offset = hlen; copy_offset = hlen;
if (mtd_desc_nr) {
next_idx = (tx->req + 1) & tx->mask;
gve_tx_fill_mtd_desc(&tx->desc[next_idx], skb);
}
for (i = payload_iov; i < payload_nfrags + payload_iov; i++) { for (i = payload_iov; i < payload_nfrags + payload_iov; i++) {
next_idx = (tx->req + 1 + i - payload_iov) & tx->mask; next_idx = (tx->req + 1 + mtd_desc_nr + i - payload_iov) & tx->mask;
seg_desc = &tx->desc[next_idx]; seg_desc = &tx->desc[next_idx];
gve_tx_fill_seg_desc(seg_desc, skb, is_gso, gve_tx_fill_seg_desc(seg_desc, skb, is_gso,
...@@ -485,16 +507,17 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st ...@@ -485,16 +507,17 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st
copy_offset += info->iov[i].iov_len; copy_offset += info->iov[i].iov_len;
} }
return 1 + payload_nfrags; return 1 + mtd_desc_nr + payload_nfrags;
} }
static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx, static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx,
struct sk_buff *skb) struct sk_buff *skb)
{ {
const struct skb_shared_info *shinfo = skb_shinfo(skb); const struct skb_shared_info *shinfo = skb_shinfo(skb);
int hlen, payload_nfrags, l4_hdr_offset; int hlen, num_descriptors, l4_hdr_offset;
union gve_tx_desc *pkt_desc, *seg_desc; union gve_tx_desc *pkt_desc, *mtd_desc, *seg_desc;
struct gve_tx_buffer_state *info; struct gve_tx_buffer_state *info;
int mtd_desc_nr = !!skb->l4_hash;
bool is_gso = skb_is_gso(skb); bool is_gso = skb_is_gso(skb);
u32 idx = tx->req & tx->mask; u32 idx = tx->req & tx->mask;
u64 addr; u64 addr;
...@@ -523,23 +546,30 @@ static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx, ...@@ -523,23 +546,30 @@ static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx,
dma_unmap_len_set(info, len, len); dma_unmap_len_set(info, len, len);
dma_unmap_addr_set(info, dma, addr); dma_unmap_addr_set(info, dma, addr);
payload_nfrags = shinfo->nr_frags; num_descriptors = 1 + shinfo->nr_frags;
if (hlen < len)
num_descriptors++;
if (mtd_desc_nr)
num_descriptors++;
gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
num_descriptors, hlen, addr);
if (mtd_desc_nr) {
idx = (idx + 1) & tx->mask;
mtd_desc = &tx->desc[idx];
gve_tx_fill_mtd_desc(mtd_desc, skb);
}
if (hlen < len) { if (hlen < len) {
/* For gso the rest of the linear portion of the skb needs to /* For gso the rest of the linear portion of the skb needs to
* be in its own descriptor. * be in its own descriptor.
*/ */
payload_nfrags++;
gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
1 + payload_nfrags, hlen, addr);
len -= hlen; len -= hlen;
addr += hlen; addr += hlen;
idx = (tx->req + 1) & tx->mask; idx = (idx + 1) & tx->mask;
seg_desc = &tx->desc[idx]; seg_desc = &tx->desc[idx];
gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr); gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr);
} else {
gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
1 + payload_nfrags, hlen, addr);
} }
for (i = 0; i < shinfo->nr_frags; i++) { for (i = 0; i < shinfo->nr_frags; i++) {
...@@ -560,11 +590,14 @@ static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx, ...@@ -560,11 +590,14 @@ static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx,
gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr); gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr);
} }
return 1 + payload_nfrags; return num_descriptors;
unmap_drop: unmap_drop:
i += (payload_nfrags == shinfo->nr_frags ? 1 : 2); i += num_descriptors - shinfo->nr_frags;
while (i--) { while (i--) {
/* Skip metadata descriptor, if set */
if (i == 1 && mtd_desc_nr == 1)
continue;
idx--; idx--;
gve_tx_unmap_buf(tx->dev, &tx->info[idx & tx->mask]); gve_tx_unmap_buf(tx->dev, &tx->info[idx & tx->mask]);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment