Commit 8da3803d authored by David S. Miller's avatar David S. Miller

Merge branch 'mlx5-hyperv'

Haiyang Zhang says:

====================
Add software backchannel and mlx5e HV VHCA stats

This patch set adds paravirtual backchannel in software in pci_hyperv,
which is required by the mlx5e driver HV VHCA stats agent.

The stats agent is responsible on running a periodic rx/tx packets/bytes
stats update.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents fed07ef3 cef35af3
......@@ -7469,6 +7469,7 @@ F: drivers/hid/hid-hyperv.c
F: drivers/hv/
F: drivers/input/serio/hyperv-keyboard.c
F: drivers/pci/controller/pci-hyperv.c
F: drivers/pci/controller/pci-hyperv-intf.c
F: drivers/net/hyperv/
F: drivers/scsi/storvsc_drv.c
F: drivers/uio/uio_hv_generic.c
......
......@@ -36,6 +36,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \
lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o \
en/tc_tun_geneve.o diag/en_tc_tracepoint.o
mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
#
# Core extra
......@@ -45,6 +46,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offlo
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o lib/hv_vhca.o
#
# Ipoib netdev
......
......@@ -54,6 +54,7 @@
#include "mlx5_core.h"
#include "en_stats.h"
#include "en/fs.h"
#include "lib/hv_vhca.h"
extern const struct net_device_ops mlx5e_netdev_ops;
struct page_pool;
......@@ -782,6 +783,15 @@ struct mlx5e_modify_sq_param {
int rl_index;
};
#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
struct mlx5e_hv_vhca_stats_agent {
struct mlx5_hv_vhca_agent *agent;
struct delayed_work work;
u16 delay;
void *buf;
};
#endif
struct mlx5e_xsk {
/* UMEMs are stored separately from channels, because we don't want to
* lose them when channels are recreated. The kernel also stores UMEMs,
......@@ -853,6 +863,9 @@ struct mlx5e_priv {
struct devlink_health_reporter *tx_reporter;
struct devlink_health_reporter *rx_reporter;
struct mlx5e_xsk xsk;
#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
struct mlx5e_hv_vhca_stats_agent stats_agent;
#endif
};
struct mlx5e_profile {
......
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
// Copyright (c) 2018 Mellanox Technologies
#include "en.h"
#include "en/hv_vhca_stats.h"
#include "lib/hv_vhca.h"
#include "lib/hv.h"
struct mlx5e_hv_vhca_per_ring_stats {
u64 rx_packets;
u64 rx_bytes;
u64 tx_packets;
u64 tx_bytes;
};
static void
mlx5e_hv_vhca_fill_ring_stats(struct mlx5e_priv *priv, int ch,
struct mlx5e_hv_vhca_per_ring_stats *data)
{
struct mlx5e_channel_stats *stats;
int tc;
stats = &priv->channel_stats[ch];
data->rx_packets = stats->rq.packets;
data->rx_bytes = stats->rq.bytes;
for (tc = 0; tc < priv->max_opened_tc; tc++) {
data->tx_packets += stats->sq[tc].packets;
data->tx_bytes += stats->sq[tc].bytes;
}
}
static void mlx5e_hv_vhca_fill_stats(struct mlx5e_priv *priv, u64 *data,
int buf_len)
{
int ch, i = 0;
for (ch = 0; ch < priv->max_nch; ch++) {
u64 *buf = data + i;
if (WARN_ON_ONCE(buf +
sizeof(struct mlx5e_hv_vhca_per_ring_stats) >
data + buf_len))
return;
mlx5e_hv_vhca_fill_ring_stats(priv, ch,
(struct mlx5e_hv_vhca_per_ring_stats *)buf);
i += sizeof(struct mlx5e_hv_vhca_per_ring_stats) / sizeof(u64);
}
}
static int mlx5e_hv_vhca_stats_buf_size(struct mlx5e_priv *priv)
{
return (sizeof(struct mlx5e_hv_vhca_per_ring_stats) *
priv->max_nch);
}
static void mlx5e_hv_vhca_stats_work(struct work_struct *work)
{
struct mlx5e_hv_vhca_stats_agent *sagent;
struct mlx5_hv_vhca_agent *agent;
struct delayed_work *dwork;
struct mlx5e_priv *priv;
int buf_len, rc;
void *buf;
dwork = to_delayed_work(work);
sagent = container_of(dwork, struct mlx5e_hv_vhca_stats_agent, work);
priv = container_of(sagent, struct mlx5e_priv, stats_agent);
buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
agent = sagent->agent;
buf = sagent->buf;
memset(buf, 0, buf_len);
mlx5e_hv_vhca_fill_stats(priv, buf, buf_len);
rc = mlx5_hv_vhca_agent_write(agent, buf, buf_len);
if (rc) {
mlx5_core_err(priv->mdev,
"%s: Failed to write stats, err = %d\n",
__func__, rc);
return;
}
if (sagent->delay)
queue_delayed_work(priv->wq, &sagent->work, sagent->delay);
}
enum {
MLX5_HV_VHCA_STATS_VERSION = 1,
MLX5_HV_VHCA_STATS_UPDATE_ONCE = 0xFFFF,
};
static void mlx5e_hv_vhca_stats_control(struct mlx5_hv_vhca_agent *agent,
struct mlx5_hv_vhca_control_block *block)
{
struct mlx5e_hv_vhca_stats_agent *sagent;
struct mlx5e_priv *priv;
priv = mlx5_hv_vhca_agent_priv(agent);
sagent = &priv->stats_agent;
block->version = MLX5_HV_VHCA_STATS_VERSION;
block->rings = priv->max_nch;
if (!block->command) {
cancel_delayed_work_sync(&priv->stats_agent.work);
return;
}
sagent->delay = block->command == MLX5_HV_VHCA_STATS_UPDATE_ONCE ? 0 :
msecs_to_jiffies(block->command * 100);
queue_delayed_work(priv->wq, &sagent->work, sagent->delay);
}
static void mlx5e_hv_vhca_stats_cleanup(struct mlx5_hv_vhca_agent *agent)
{
struct mlx5e_priv *priv = mlx5_hv_vhca_agent_priv(agent);
cancel_delayed_work_sync(&priv->stats_agent.work);
}
int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
{
int buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
struct mlx5_hv_vhca_agent *agent;
priv->stats_agent.buf = kvzalloc(buf_len, GFP_KERNEL);
if (!priv->stats_agent.buf)
return -ENOMEM;
agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca,
MLX5_HV_VHCA_AGENT_STATS,
mlx5e_hv_vhca_stats_control, NULL,
mlx5e_hv_vhca_stats_cleanup,
priv);
if (IS_ERR_OR_NULL(agent)) {
if (IS_ERR(agent))
netdev_warn(priv->netdev,
"Failed to create hv vhca stats agent, err = %ld\n",
PTR_ERR(agent));
kfree(priv->stats_agent.buf);
return IS_ERR_OR_NULL(agent);
}
priv->stats_agent.agent = agent;
INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work);
return 0;
}
void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
{
if (IS_ERR_OR_NULL(priv->stats_agent.agent))
return;
mlx5_hv_vhca_agent_destroy(priv->stats_agent.agent);
kfree(priv->stats_agent.buf);
}
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2019 Mellanox Technologies. */
#ifndef __MLX5_EN_STATS_VHCA_H__
#define __MLX5_EN_STATS_VHCA_H__
#include "en.h"
#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv);
void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv);
#else
static inline int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
{
return 0;
}
static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
{
}
#endif
#endif /* __MLX5_EN_STATS_VHCA_H__ */
......@@ -62,6 +62,7 @@
#include "en/xsk/setup.h"
#include "en/xsk/rx.h"
#include "en/xsk/tx.h"
#include "en/hv_vhca_stats.h"
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
......@@ -5109,6 +5110,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
if (mlx5e_monitor_counter_supported(priv))
mlx5e_monitor_counter_init(priv);
mlx5e_hv_vhca_stats_create(priv);
if (netdev->reg_state != NETREG_REGISTERED)
return;
#ifdef CONFIG_MLX5_CORE_EN_DCB
......@@ -5141,6 +5143,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
queue_work(priv->wq, &priv->set_rx_mode_work);
mlx5e_hv_vhca_stats_destroy(priv);
if (mlx5e_monitor_counter_supported(priv))
mlx5e_monitor_counter_cleanup(priv);
......
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
// Copyright (c) 2018 Mellanox Technologies
#include <linux/hyperv.h>
#include "mlx5_core.h"
#include "lib/hv.h"
static int mlx5_hv_config_common(struct mlx5_core_dev *dev, void *buf, int len,
int offset, bool read)
{
int rc = -EOPNOTSUPP;
int bytes_returned;
int block_id;
if (offset % HV_CONFIG_BLOCK_SIZE_MAX || len % HV_CONFIG_BLOCK_SIZE_MAX)
return -EINVAL;
block_id = offset / HV_CONFIG_BLOCK_SIZE_MAX;
rc = read ?
hyperv_read_cfg_blk(dev->pdev, buf,
HV_CONFIG_BLOCK_SIZE_MAX, block_id,
&bytes_returned) :
hyperv_write_cfg_blk(dev->pdev, buf,
HV_CONFIG_BLOCK_SIZE_MAX, block_id);
/* Make sure len bytes were read successfully */
if (read)
rc |= !(len == bytes_returned);
if (rc) {
mlx5_core_err(dev, "Failed to %s hv config, err = %d, len = %d, offset = %d\n",
read ? "read" : "write", rc, len,
offset);
return rc;
}
return 0;
}
int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
int offset)
{
return mlx5_hv_config_common(dev, buf, len, offset, true);
}
int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
int offset)
{
return mlx5_hv_config_common(dev, buf, len, offset, false);
}
int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
void (*block_invalidate)(void *context,
u64 block_mask))
{
return hyperv_reg_block_invalidate(dev->pdev, context,
block_invalidate);
}
void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev)
{
hyperv_reg_block_invalidate(dev->pdev, NULL, NULL);
}
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2019 Mellanox Technologies. */
#ifndef __LIB_HV_H__
#define __LIB_HV_H__
#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
#include <linux/hyperv.h>
#include <linux/mlx5/driver.h>
int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
int offset);
int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
int offset);
int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
void (*block_invalidate)(void *context,
u64 block_mask));
void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev);
#endif
#endif /* __LIB_HV_H__ */
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
// Copyright (c) 2018 Mellanox Technologies
#include <linux/hyperv.h>
#include "mlx5_core.h"
#include "lib/hv.h"
#include "lib/hv_vhca.h"
struct mlx5_hv_vhca {
struct mlx5_core_dev *dev;
struct workqueue_struct *work_queue;
struct mlx5_hv_vhca_agent *agents[MLX5_HV_VHCA_AGENT_MAX];
struct mutex agents_lock; /* Protect agents array */
};
struct mlx5_hv_vhca_work {
struct work_struct invalidate_work;
struct mlx5_hv_vhca *hv_vhca;
u64 block_mask;
};
struct mlx5_hv_vhca_data_block {
u16 sequence;
u16 offset;
u8 reserved[4];
u64 data[15];
};
struct mlx5_hv_vhca_agent {
enum mlx5_hv_vhca_agent_type type;
struct mlx5_hv_vhca *hv_vhca;
void *priv;
u16 seq;
void (*control)(struct mlx5_hv_vhca_agent *agent,
struct mlx5_hv_vhca_control_block *block);
void (*invalidate)(struct mlx5_hv_vhca_agent *agent,
u64 block_mask);
void (*cleanup)(struct mlx5_hv_vhca_agent *agent);
};
struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
{
struct mlx5_hv_vhca *hv_vhca = NULL;
hv_vhca = kzalloc(sizeof(*hv_vhca), GFP_KERNEL);
if (!hv_vhca)
return ERR_PTR(-ENOMEM);
hv_vhca->work_queue = create_singlethread_workqueue("mlx5_hv_vhca");
if (!hv_vhca->work_queue) {
kfree(hv_vhca);
return ERR_PTR(-ENOMEM);
}
hv_vhca->dev = dev;
mutex_init(&hv_vhca->agents_lock);
return hv_vhca;
}
void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
{
if (IS_ERR_OR_NULL(hv_vhca))
return;
destroy_workqueue(hv_vhca->work_queue);
kfree(hv_vhca);
}
static void mlx5_hv_vhca_invalidate_work(struct work_struct *work)
{
struct mlx5_hv_vhca_work *hwork;
struct mlx5_hv_vhca *hv_vhca;
int i;
hwork = container_of(work, struct mlx5_hv_vhca_work, invalidate_work);
hv_vhca = hwork->hv_vhca;
mutex_lock(&hv_vhca->agents_lock);
for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
if (!agent || !agent->invalidate)
continue;
if (!(BIT(agent->type) & hwork->block_mask))
continue;
agent->invalidate(agent, hwork->block_mask);
}
mutex_unlock(&hv_vhca->agents_lock);
kfree(hwork);
}
void mlx5_hv_vhca_invalidate(void *context, u64 block_mask)
{
struct mlx5_hv_vhca *hv_vhca = (struct mlx5_hv_vhca *)context;
struct mlx5_hv_vhca_work *work;
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work)
return;
INIT_WORK(&work->invalidate_work, mlx5_hv_vhca_invalidate_work);
work->hv_vhca = hv_vhca;
work->block_mask = block_mask;
queue_work(hv_vhca->work_queue, &work->invalidate_work);
}
#define AGENT_MASK(type) (type ? BIT(type - 1) : 0 /* control */)
static void mlx5_hv_vhca_agents_control(struct mlx5_hv_vhca *hv_vhca,
struct mlx5_hv_vhca_control_block *block)
{
int i;
for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
if (!agent || !agent->control)
continue;
if (!(AGENT_MASK(agent->type) & block->control))
continue;
agent->control(agent, block);
}
}
static void mlx5_hv_vhca_capabilities(struct mlx5_hv_vhca *hv_vhca,
u32 *capabilities)
{
int i;
for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
if (agent)
*capabilities |= AGENT_MASK(agent->type);
}
}
static void
mlx5_hv_vhca_control_agent_invalidate(struct mlx5_hv_vhca_agent *agent,
u64 block_mask)
{
struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
struct mlx5_core_dev *dev = hv_vhca->dev;
struct mlx5_hv_vhca_control_block *block;
u32 capabilities = 0;
int err;
block = kzalloc(sizeof(*block), GFP_KERNEL);
if (!block)
return;
err = mlx5_hv_read_config(dev, block, sizeof(*block), 0);
if (err)
goto free_block;
mlx5_hv_vhca_capabilities(hv_vhca, &capabilities);
/* In case no capabilities, send empty block in return */
if (!capabilities) {
memset(block, 0, sizeof(*block));
goto write;
}
if (block->capabilities != capabilities)
block->capabilities = capabilities;
if (block->control & ~capabilities)
goto free_block;
mlx5_hv_vhca_agents_control(hv_vhca, block);
block->command_ack = block->command;
write:
mlx5_hv_write_config(dev, block, sizeof(*block), 0);
free_block:
kfree(block);
}
static struct mlx5_hv_vhca_agent *
mlx5_hv_vhca_control_agent_create(struct mlx5_hv_vhca *hv_vhca)
{
return mlx5_hv_vhca_agent_create(hv_vhca, MLX5_HV_VHCA_AGENT_CONTROL,
NULL,
mlx5_hv_vhca_control_agent_invalidate,
NULL, NULL);
}
static void mlx5_hv_vhca_control_agent_destroy(struct mlx5_hv_vhca_agent *agent)
{
mlx5_hv_vhca_agent_destroy(agent);
}
int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
{
struct mlx5_hv_vhca_agent *agent;
int err;
if (IS_ERR_OR_NULL(hv_vhca))
return IS_ERR_OR_NULL(hv_vhca);
err = mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
mlx5_hv_vhca_invalidate);
if (err)
return err;
agent = mlx5_hv_vhca_control_agent_create(hv_vhca);
if (IS_ERR_OR_NULL(agent)) {
mlx5_hv_unregister_invalidate(hv_vhca->dev);
return IS_ERR_OR_NULL(agent);
}
hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL] = agent;
return 0;
}
void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
{
struct mlx5_hv_vhca_agent *agent;
int i;
if (IS_ERR_OR_NULL(hv_vhca))
return;
agent = hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL];
if (agent)
mlx5_hv_vhca_control_agent_destroy(agent);
mutex_lock(&hv_vhca->agents_lock);
for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++)
WARN_ON(hv_vhca->agents[i]);
mutex_unlock(&hv_vhca->agents_lock);
mlx5_hv_unregister_invalidate(hv_vhca->dev);
}
static void mlx5_hv_vhca_agents_update(struct mlx5_hv_vhca *hv_vhca)
{
mlx5_hv_vhca_invalidate(hv_vhca, BIT(MLX5_HV_VHCA_AGENT_CONTROL));
}
struct mlx5_hv_vhca_agent *
mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
enum mlx5_hv_vhca_agent_type type,
void (*control)(struct mlx5_hv_vhca_agent*,
struct mlx5_hv_vhca_control_block *block),
void (*invalidate)(struct mlx5_hv_vhca_agent*,
u64 block_mask),
void (*cleaup)(struct mlx5_hv_vhca_agent *agent),
void *priv)
{
struct mlx5_hv_vhca_agent *agent;
if (IS_ERR_OR_NULL(hv_vhca))
return ERR_PTR(-ENOMEM);
if (type >= MLX5_HV_VHCA_AGENT_MAX)
return ERR_PTR(-EINVAL);
mutex_lock(&hv_vhca->agents_lock);
if (hv_vhca->agents[type]) {
mutex_unlock(&hv_vhca->agents_lock);
return ERR_PTR(-EINVAL);
}
mutex_unlock(&hv_vhca->agents_lock);
agent = kzalloc(sizeof(*agent), GFP_KERNEL);
if (!agent)
return ERR_PTR(-ENOMEM);
agent->type = type;
agent->hv_vhca = hv_vhca;
agent->priv = priv;
agent->control = control;
agent->invalidate = invalidate;
agent->cleanup = cleaup;
mutex_lock(&hv_vhca->agents_lock);
hv_vhca->agents[type] = agent;
mutex_unlock(&hv_vhca->agents_lock);
mlx5_hv_vhca_agents_update(hv_vhca);
return agent;
}
void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
{
struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
mutex_lock(&hv_vhca->agents_lock);
if (WARN_ON(agent != hv_vhca->agents[agent->type])) {
mutex_unlock(&hv_vhca->agents_lock);
return;
}
hv_vhca->agents[agent->type] = NULL;
mutex_unlock(&hv_vhca->agents_lock);
if (agent->cleanup)
agent->cleanup(agent);
kfree(agent);
mlx5_hv_vhca_agents_update(hv_vhca);
}
static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent,
struct mlx5_hv_vhca_data_block *data_block,
void *src, int len, int *offset)
{
int bytes = min_t(int, (int)sizeof(data_block->data), len);
data_block->sequence = agent->seq;
data_block->offset = (*offset)++;
memcpy(data_block->data, src, bytes);
return bytes;
}
static void mlx5_hv_vhca_agent_seq_update(struct mlx5_hv_vhca_agent *agent)
{
agent->seq++;
}
int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
void *buf, int len)
{
int offset = agent->type * HV_CONFIG_BLOCK_SIZE_MAX;
int block_offset = 0;
int total = 0;
int err;
while (len) {
struct mlx5_hv_vhca_data_block data_block = {0};
int bytes;
bytes = mlx5_hv_vhca_data_block_prepare(agent, &data_block,
buf + total,
len, &block_offset);
if (!bytes)
return -ENOMEM;
err = mlx5_hv_write_config(agent->hv_vhca->dev, &data_block,
sizeof(data_block), offset);
if (err)
return err;
total += bytes;
len -= bytes;
}
mlx5_hv_vhca_agent_seq_update(agent);
return 0;
}
void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent)
{
return agent->priv;
}
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2019 Mellanox Technologies. */
#ifndef __LIB_HV_VHCA_H__
#define __LIB_HV_VHCA_H__
#include "en.h"
#include "lib/hv.h"
struct mlx5_hv_vhca_agent;
struct mlx5_hv_vhca;
struct mlx5_hv_vhca_control_block;
enum mlx5_hv_vhca_agent_type {
MLX5_HV_VHCA_AGENT_CONTROL = 0,
MLX5_HV_VHCA_AGENT_STATS = 1,
MLX5_HV_VHCA_AGENT_MAX = 32,
};
#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
struct mlx5_hv_vhca_control_block {
u32 capabilities;
u32 control;
u16 command;
u16 command_ack;
u16 version;
u16 rings;
u32 reserved1[28];
};
struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev);
void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca);
int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca);
void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca);
void mlx5_hv_vhca_invalidate(void *context, u64 block_mask);
struct mlx5_hv_vhca_agent *
mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
enum mlx5_hv_vhca_agent_type type,
void (*control)(struct mlx5_hv_vhca_agent*,
struct mlx5_hv_vhca_control_block *block),
void (*invalidate)(struct mlx5_hv_vhca_agent*,
u64 block_mask),
void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
void *context);
void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent);
int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
void *buf, int len);
void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent);
#else
static inline struct mlx5_hv_vhca *
mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
{
return NULL;
}
static inline void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
{
}
static inline int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
{
return 0;
}
static inline void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
{
}
static inline void mlx5_hv_vhca_invalidate(void *context,
u64 block_mask)
{
}
static inline struct mlx5_hv_vhca_agent *
mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
enum mlx5_hv_vhca_agent_type type,
void (*control)(struct mlx5_hv_vhca_agent*,
struct mlx5_hv_vhca_control_block *block),
void (*invalidate)(struct mlx5_hv_vhca_agent*,
u64 block_mask),
void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
void *context)
{
return NULL;
}
static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
{
}
static inline int
mlx5_hv_vhca_write_agent(struct mlx5_hv_vhca_agent *agent,
void *buf, int len)
{
return 0;
}
#endif
#endif /* __LIB_HV_VHCA_H__ */
......@@ -69,6 +69,7 @@
#include "lib/pci_vsc.h"
#include "diag/fw_tracer.h"
#include "ecpf.h"
#include "lib/hv_vhca.h"
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
......@@ -870,6 +871,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
}
dev->tracer = mlx5_fw_tracer_create(dev);
dev->hv_vhca = mlx5_hv_vhca_create(dev);
return 0;
......@@ -900,6 +902,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
{
mlx5_hv_vhca_destroy(dev->hv_vhca);
mlx5_fw_tracer_destroy(dev->tracer);
mlx5_fpga_cleanup(dev);
mlx5_eswitch_cleanup(dev->priv.eswitch);
......@@ -1067,6 +1070,8 @@ static int mlx5_load(struct mlx5_core_dev *dev)
goto err_fw_tracer;
}
mlx5_hv_vhca_init(dev->hv_vhca);
err = mlx5_fpga_device_start(dev);
if (err) {
mlx5_core_err(dev, "fpga device start failed %d\n", err);
......@@ -1122,6 +1127,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
err_ipsec_start:
mlx5_fpga_device_stop(dev);
err_fpga_start:
mlx5_hv_vhca_cleanup(dev->hv_vhca);
mlx5_fw_tracer_cleanup(dev->tracer);
err_fw_tracer:
mlx5_eq_table_destroy(dev);
......@@ -1142,6 +1148,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
mlx5_accel_ipsec_cleanup(dev);
mlx5_accel_tls_cleanup(dev);
mlx5_fpga_device_stop(dev);
mlx5_hv_vhca_cleanup(dev->hv_vhca);
mlx5_fw_tracer_cleanup(dev->tracer);
mlx5_eq_table_destroy(dev);
mlx5_irq_table_destroy(dev);
......
......@@ -182,6 +182,7 @@ config PCI_LABEL
config PCI_HYPERV
tristate "Hyper-V PCI Frontend"
depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64
select PCI_HYPERV_INTERFACE
help
The PCI device frontend driver allows the kernel to import arbitrary
PCI devices from a PCI backend to support PCI driver domains.
......
......@@ -281,5 +281,12 @@ config VMD
To compile this driver as a module, choose M here: the
module will be called vmd.
config PCI_HYPERV_INTERFACE
tristate "Hyper-V PCI Interface"
depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64
help
The Hyper-V PCI Interface is a helper driver allows other drivers to
have a common interface with the Hyper-V PCI frontend driver.
source "drivers/pci/controller/dwc/Kconfig"
endmenu
......@@ -4,6 +4,7 @@ obj-$(CONFIG_PCIE_CADENCE_HOST) += pcie-cadence-host.o
obj-$(CONFIG_PCIE_CADENCE_EP) += pcie-cadence-ep.o
obj-$(CONFIG_PCI_FTPCI100) += pci-ftpci100.o
obj-$(CONFIG_PCI_HYPERV) += pci-hyperv.o
obj-$(CONFIG_PCI_HYPERV_INTERFACE) += pci-hyperv-intf.o
obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o
obj-$(CONFIG_PCI_AARDVARK) += pci-aardvark.o
obj-$(CONFIG_PCI_TEGRA) += pci-tegra.o
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) Microsoft Corporation.
*
* Author:
* Haiyang Zhang <haiyangz@microsoft.com>
*
* This small module is a helper driver allows other drivers to
* have a common interface with the Hyper-V PCI frontend driver.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/hyperv.h>
struct hyperv_pci_block_ops hvpci_block_ops;
EXPORT_SYMBOL_GPL(hvpci_block_ops);
int hyperv_read_cfg_blk(struct pci_dev *dev, void *buf, unsigned int buf_len,
unsigned int block_id, unsigned int *bytes_returned)
{
if (!hvpci_block_ops.read_block)
return -EOPNOTSUPP;
return hvpci_block_ops.read_block(dev, buf, buf_len, block_id,
bytes_returned);
}
EXPORT_SYMBOL_GPL(hyperv_read_cfg_blk);
int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len,
unsigned int block_id)
{
if (!hvpci_block_ops.write_block)
return -EOPNOTSUPP;
return hvpci_block_ops.write_block(dev, buf, len, block_id);
}
EXPORT_SYMBOL_GPL(hyperv_write_cfg_blk);
int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
void (*block_invalidate)(void *context,
u64 block_mask))
{
if (!hvpci_block_ops.reg_blk_invalidate)
return -EOPNOTSUPP;
return hvpci_block_ops.reg_blk_invalidate(dev, context,
block_invalidate);
}
EXPORT_SYMBOL_GPL(hyperv_reg_block_invalidate);
static void __exit exit_hv_pci_intf(void)
{
}
static int __init init_hv_pci_intf(void)
{
return 0;
}
module_init(init_hv_pci_intf);
module_exit(exit_hv_pci_intf);
MODULE_DESCRIPTION("Hyper-V PCI Interface");
MODULE_LICENSE("GPL v2");
......@@ -365,6 +365,39 @@ struct pci_delete_interrupt {
struct tran_int_desc int_desc;
} __packed;
/*
* Note: the VM must pass a valid block id, wslot and bytes_requested.
*/
struct pci_read_block {
struct pci_message message_type;
u32 block_id;
union win_slot_encoding wslot;
u32 bytes_requested;
} __packed;
struct pci_read_block_response {
struct vmpacket_descriptor hdr;
u32 status;
u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX];
} __packed;
/*
* Note: the VM must pass a valid block id, wslot and byte_count.
*/
struct pci_write_block {
struct pci_message message_type;
u32 block_id;
union win_slot_encoding wslot;
u32 byte_count;
u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX];
} __packed;
struct pci_dev_inval_block {
struct pci_incoming_message incoming;
union win_slot_encoding wslot;
u64 block_mask;
} __packed;
struct pci_dev_incoming {
struct pci_incoming_message incoming;
union win_slot_encoding wslot;
......@@ -499,6 +532,9 @@ struct hv_pci_dev {
struct hv_pcibus_device *hbus;
struct work_struct wrk;
void (*block_invalidate)(void *context, u64 block_mask);
void *invalidate_context;
/*
* What would be observed if one wrote 0xFFFFFFFF to a BAR and then
* read it back, for each of the BAR offsets within config space.
......@@ -817,6 +853,253 @@ static struct pci_ops hv_pcifront_ops = {
.write = hv_pcifront_write_config,
};
/*
* Paravirtual backchannel
*
* Hyper-V SR-IOV provides a backchannel mechanism in software for
* communication between a VF driver and a PF driver. These
* "configuration blocks" are similar in concept to PCI configuration space,
* but instead of doing reads and writes in 32-bit chunks through a very slow
* path, packets of up to 128 bytes can be sent or received asynchronously.
*
* Nearly every SR-IOV device contains just such a communications channel in
* hardware, so using this one in software is usually optional. Using the
* software channel, however, allows driver implementers to leverage software
* tools that fuzz the communications channel looking for vulnerabilities.
*
* The usage model for these packets puts the responsibility for reading or
* writing on the VF driver. The VF driver sends a read or a write packet,
* indicating which "block" is being referred to by number.
*
* If the PF driver wishes to initiate communication, it can "invalidate" one or
* more of the first 64 blocks. This invalidation is delivered via a callback
* supplied by the VF driver by this driver.
*
* No protocol is implied, except that supplied by the PF and VF drivers.
*/
struct hv_read_config_compl {
struct hv_pci_compl comp_pkt;
void *buf;
unsigned int len;
unsigned int bytes_returned;
};
/**
* hv_pci_read_config_compl() - Invoked when a response packet
* for a read config block operation arrives.
* @context: Identifies the read config operation
* @resp: The response packet itself
* @resp_packet_size: Size in bytes of the response packet
*/
static void hv_pci_read_config_compl(void *context, struct pci_response *resp,
int resp_packet_size)
{
struct hv_read_config_compl *comp = context;
struct pci_read_block_response *read_resp =
(struct pci_read_block_response *)resp;
unsigned int data_len, hdr_len;
hdr_len = offsetof(struct pci_read_block_response, bytes);
if (resp_packet_size < hdr_len) {
comp->comp_pkt.completion_status = -1;
goto out;
}
data_len = resp_packet_size - hdr_len;
if (data_len > 0 && read_resp->status == 0) {
comp->bytes_returned = min(comp->len, data_len);
memcpy(comp->buf, read_resp->bytes, comp->bytes_returned);
} else {
comp->bytes_returned = 0;
}
comp->comp_pkt.completion_status = read_resp->status;
out:
complete(&comp->comp_pkt.host_event);
}
/**
* hv_read_config_block() - Sends a read config block request to
* the back-end driver running in the Hyper-V parent partition.
* @pdev: The PCI driver's representation for this device.
* @buf: Buffer into which the config block will be copied.
* @len: Size in bytes of buf.
* @block_id: Identifies the config block which has been requested.
* @bytes_returned: Size which came back from the back-end driver.
*
* Return: 0 on success, -errno on failure
*/
int hv_read_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
unsigned int block_id, unsigned int *bytes_returned)
{
struct hv_pcibus_device *hbus =
container_of(pdev->bus->sysdata, struct hv_pcibus_device,
sysdata);
struct {
struct pci_packet pkt;
char buf[sizeof(struct pci_read_block)];
} pkt;
struct hv_read_config_compl comp_pkt;
struct pci_read_block *read_blk;
int ret;
if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX)
return -EINVAL;
init_completion(&comp_pkt.comp_pkt.host_event);
comp_pkt.buf = buf;
comp_pkt.len = len;
memset(&pkt, 0, sizeof(pkt));
pkt.pkt.completion_func = hv_pci_read_config_compl;
pkt.pkt.compl_ctxt = &comp_pkt;
read_blk = (struct pci_read_block *)&pkt.pkt.message;
read_blk->message_type.type = PCI_READ_BLOCK;
read_blk->wslot.slot = devfn_to_wslot(pdev->devfn);
read_blk->block_id = block_id;
read_blk->bytes_requested = len;
ret = vmbus_sendpacket(hbus->hdev->channel, read_blk,
sizeof(*read_blk), (unsigned long)&pkt.pkt,
VM_PKT_DATA_INBAND,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
if (ret)
return ret;
ret = wait_for_response(hbus->hdev, &comp_pkt.comp_pkt.host_event);
if (ret)
return ret;
if (comp_pkt.comp_pkt.completion_status != 0 ||
comp_pkt.bytes_returned == 0) {
dev_err(&hbus->hdev->device,
"Read Config Block failed: 0x%x, bytes_returned=%d\n",
comp_pkt.comp_pkt.completion_status,
comp_pkt.bytes_returned);
return -EIO;
}
*bytes_returned = comp_pkt.bytes_returned;
return 0;
}
/**
* hv_pci_write_config_compl() - Invoked when a response packet for a write
* config block operation arrives.
* @context: Identifies the write config operation
* @resp: The response packet itself
* @resp_packet_size: Size in bytes of the response packet
*/
static void hv_pci_write_config_compl(void *context, struct pci_response *resp,
int resp_packet_size)
{
struct hv_pci_compl *comp_pkt = context;
comp_pkt->completion_status = resp->status;
complete(&comp_pkt->host_event);
}
/**
* hv_write_config_block() - Sends a write config block request to the
* back-end driver running in the Hyper-V parent partition.
* @pdev: The PCI driver's representation for this device.
* @buf: Buffer from which the config block will be copied.
* @len: Size in bytes of buf.
* @block_id: Identifies the config block which is being written.
*
* Return: 0 on success, -errno on failure
*/
int hv_write_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
unsigned int block_id)
{
struct hv_pcibus_device *hbus =
container_of(pdev->bus->sysdata, struct hv_pcibus_device,
sysdata);
struct {
struct pci_packet pkt;
char buf[sizeof(struct pci_write_block)];
u32 reserved;
} pkt;
struct hv_pci_compl comp_pkt;
struct pci_write_block *write_blk;
u32 pkt_size;
int ret;
if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX)
return -EINVAL;
init_completion(&comp_pkt.host_event);
memset(&pkt, 0, sizeof(pkt));
pkt.pkt.completion_func = hv_pci_write_config_compl;
pkt.pkt.compl_ctxt = &comp_pkt;
write_blk = (struct pci_write_block *)&pkt.pkt.message;
write_blk->message_type.type = PCI_WRITE_BLOCK;
write_blk->wslot.slot = devfn_to_wslot(pdev->devfn);
write_blk->block_id = block_id;
write_blk->byte_count = len;
memcpy(write_blk->bytes, buf, len);
pkt_size = offsetof(struct pci_write_block, bytes) + len;
/*
* This quirk is required on some hosts shipped around 2018, because
* these hosts don't check the pkt_size correctly (new hosts have been
* fixed since early 2019). The quirk is also safe on very old hosts
* and new hosts, because, on them, what really matters is the length
* specified in write_blk->byte_count.
*/
pkt_size += sizeof(pkt.reserved);
ret = vmbus_sendpacket(hbus->hdev->channel, write_blk, pkt_size,
(unsigned long)&pkt.pkt, VM_PKT_DATA_INBAND,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
if (ret)
return ret;
ret = wait_for_response(hbus->hdev, &comp_pkt.host_event);
if (ret)
return ret;
if (comp_pkt.completion_status != 0) {
dev_err(&hbus->hdev->device,
"Write Config Block failed: 0x%x\n",
comp_pkt.completion_status);
return -EIO;
}
return 0;
}
/**
* hv_register_block_invalidate() - Invoked when a config block invalidation
* arrives from the back-end driver.
* @pdev: The PCI driver's representation for this device.
* @context: Identifies the device.
* @block_invalidate: Identifies all of the blocks being invalidated.
*
* Return: 0 on success, -errno on failure
*/
int hv_register_block_invalidate(struct pci_dev *pdev, void *context,
void (*block_invalidate)(void *context,
u64 block_mask))
{
struct hv_pcibus_device *hbus =
container_of(pdev->bus->sysdata, struct hv_pcibus_device,
sysdata);
struct hv_pci_dev *hpdev;
hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
if (!hpdev)
return -ENODEV;
hpdev->block_invalidate = block_invalidate;
hpdev->invalidate_context = context;
put_pcichild(hpdev);
return 0;
}
/* Interrupt management hooks */
static void hv_int_desc_free(struct hv_pci_dev *hpdev,
struct tran_int_desc *int_desc)
......@@ -1968,6 +2251,7 @@ static void hv_pci_onchannelcallback(void *context)
struct pci_response *response;
struct pci_incoming_message *new_message;
struct pci_bus_relations *bus_rel;
struct pci_dev_inval_block *inval;
struct pci_dev_incoming *dev_message;
struct hv_pci_dev *hpdev;
......@@ -2045,6 +2329,21 @@ static void hv_pci_onchannelcallback(void *context)
}
break;
case PCI_INVALIDATE_BLOCK:
inval = (struct pci_dev_inval_block *)buffer;
hpdev = get_pcichild_wslot(hbus,
inval->wslot.slot);
if (hpdev) {
if (hpdev->block_invalidate) {
hpdev->block_invalidate(
hpdev->invalidate_context,
inval->block_mask);
}
put_pcichild(hpdev);
}
break;
default:
dev_warn(&hbus->hdev->device,
"Unimplemented protocol message %x\n",
......@@ -2743,10 +3042,19 @@ static struct hv_driver hv_pci_drv = {
static void __exit exit_hv_pci_drv(void)
{
vmbus_driver_unregister(&hv_pci_drv);
hvpci_block_ops.read_block = NULL;
hvpci_block_ops.write_block = NULL;
hvpci_block_ops.reg_blk_invalidate = NULL;
}
static int __init init_hv_pci_drv(void)
{
/* Initialize PCI block r/w interface */
hvpci_block_ops.read_block = hv_read_config_block;
hvpci_block_ops.write_block = hv_write_config_block;
hvpci_block_ops.reg_blk_invalidate = hv_register_block_invalidate;
return vmbus_driver_register(&hv_pci_drv);
}
......
......@@ -1578,4 +1578,33 @@ hv_pkt_iter_next(struct vmbus_channel *channel,
for (pkt = hv_pkt_iter_first(channel); pkt; \
pkt = hv_pkt_iter_next(channel, pkt))
/*
* Interface for passing data between SR-IOV PF and VF drivers. The VF driver
* sends requests to read and write blocks. Each block must be 128 bytes or
* smaller. Optionally, the VF driver can register a callback function which
* will be invoked when the host says that one or more of the first 64 block
* IDs is "invalid" which means that the VF driver should reread them.
*/
#define HV_CONFIG_BLOCK_SIZE_MAX 128
int hyperv_read_cfg_blk(struct pci_dev *dev, void *buf, unsigned int buf_len,
unsigned int block_id, unsigned int *bytes_returned);
int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len,
unsigned int block_id);
int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
void (*block_invalidate)(void *context,
u64 block_mask));
struct hyperv_pci_block_ops {
int (*read_block)(struct pci_dev *dev, void *buf, unsigned int buf_len,
unsigned int block_id, unsigned int *bytes_returned);
int (*write_block)(struct pci_dev *dev, void *buf, unsigned int len,
unsigned int block_id);
int (*reg_blk_invalidate)(struct pci_dev *dev, void *context,
void (*block_invalidate)(void *context,
u64 block_mask));
};
extern struct hyperv_pci_block_ops hvpci_block_ops;
#endif /* _HYPERV_H */
......@@ -659,6 +659,7 @@ struct mlx5_clock {
struct mlx5_fw_tracer;
struct mlx5_vxlan;
struct mlx5_geneve;
struct mlx5_hv_vhca;
struct mlx5_core_dev {
struct device *device;
......@@ -706,6 +707,7 @@ struct mlx5_core_dev {
struct mlx5_ib_clock_info *clock_info;
struct mlx5_fw_tracer *tracer;
u32 vsc_addr;
struct mlx5_hv_vhca *hv_vhca;
};
struct mlx5_db {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment