Commit ab0faf5f authored by David S. Miller's avatar David S. Miller

Merge tag 'mlx5-fixes-2020-09-30' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux

From: Saeed Mahameed <saeedm@nvidia.com>

====================
This series introduces some fixes to mlx5 driver.

v1->v2:
 - Patch #1 Don't return while mutex is held. (Dave)

v2->v3:
 - Drop patch #1, will consider a better approach (Jakub)
 - use cpu_relax() instead of cond_resched() (Jakub)
 - while(i--) to reveres a loop (Jakub)
 - Drop old mellanox email sign-off and change the committer email
   (Jakub)

Please pull and let me know if there is any problem.

For -stable v4.15
 ('net/mlx5e: Fix VLAN cleanup flow')
 ('net/mlx5e: Fix VLAN create flow')

For -stable v4.16
 ('net/mlx5: Fix request_irqs error flow')

For -stable v5.4
 ('net/mlx5e: Add resiliency in Striding RQ mode for packets larger than MTU')
 ('net/mlx5: Avoid possible free of command entry while timeout comp handler')

For -stable v5.7
 ('net/mlx5e: Fix return status when setting unsupported FEC mode')

For -stable v5.8
 ('net/mlx5e: Fix race condition on nhe->n pointer in neigh update')
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 9d8c05ad 1253935a
......@@ -69,12 +69,10 @@ enum {
MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10,
};
static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
struct mlx5_cmd_msg *in,
struct mlx5_cmd_msg *out,
void *uout, int uout_size,
mlx5_cmd_cbk_t cbk,
void *context, int page_queue)
static struct mlx5_cmd_work_ent *
cmd_alloc_ent(struct mlx5_cmd *cmd, struct mlx5_cmd_msg *in,
struct mlx5_cmd_msg *out, void *uout, int uout_size,
mlx5_cmd_cbk_t cbk, void *context, int page_queue)
{
gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL;
struct mlx5_cmd_work_ent *ent;
......@@ -83,6 +81,7 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
if (!ent)
return ERR_PTR(-ENOMEM);
ent->idx = -EINVAL;
ent->in = in;
ent->out = out;
ent->uout = uout;
......@@ -91,10 +90,16 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
ent->context = context;
ent->cmd = cmd;
ent->page_queue = page_queue;
refcount_set(&ent->refcnt, 1);
return ent;
}
static void cmd_free_ent(struct mlx5_cmd_work_ent *ent)
{
kfree(ent);
}
static u8 alloc_token(struct mlx5_cmd *cmd)
{
u8 token;
......@@ -109,7 +114,7 @@ static u8 alloc_token(struct mlx5_cmd *cmd)
return token;
}
static int alloc_ent(struct mlx5_cmd *cmd)
static int cmd_alloc_index(struct mlx5_cmd *cmd)
{
unsigned long flags;
int ret;
......@@ -123,7 +128,7 @@ static int alloc_ent(struct mlx5_cmd *cmd)
return ret < cmd->max_reg_cmds ? ret : -ENOMEM;
}
static void free_ent(struct mlx5_cmd *cmd, int idx)
static void cmd_free_index(struct mlx5_cmd *cmd, int idx)
{
unsigned long flags;
......@@ -132,6 +137,22 @@ static void free_ent(struct mlx5_cmd *cmd, int idx)
spin_unlock_irqrestore(&cmd->alloc_lock, flags);
}
static void cmd_ent_get(struct mlx5_cmd_work_ent *ent)
{
refcount_inc(&ent->refcnt);
}
static void cmd_ent_put(struct mlx5_cmd_work_ent *ent)
{
if (!refcount_dec_and_test(&ent->refcnt))
return;
if (ent->idx >= 0)
cmd_free_index(ent->cmd, ent->idx);
cmd_free_ent(ent);
}
static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx)
{
return cmd->cmd_buf + (idx << cmd->log_stride);
......@@ -219,11 +240,6 @@ static void poll_timeout(struct mlx5_cmd_work_ent *ent)
ent->ret = -ETIMEDOUT;
}
static void free_cmd(struct mlx5_cmd_work_ent *ent)
{
kfree(ent);
}
static int verify_signature(struct mlx5_cmd_work_ent *ent)
{
struct mlx5_cmd_mailbox *next = ent->out->next;
......@@ -837,11 +853,22 @@ static void cb_timeout_handler(struct work_struct *work)
struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev,
cmd);
mlx5_cmd_eq_recover(dev);
/* Maybe got handled by eq recover ? */
if (!test_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state)) {
mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, recovered after timeout\n", ent->idx,
mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
goto out; /* phew, already handled */
}
ent->ret = -ETIMEDOUT;
mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
mlx5_command_str(msg_to_opcode(ent->in)),
msg_to_opcode(ent->in));
mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, timeout. Will cause a leak of a command resource\n",
ent->idx, mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
out:
cmd_ent_put(ent); /* for the cmd_ent_get() took on schedule delayed work */
}
static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg);
......@@ -856,6 +883,32 @@ static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode)
return cmd->allowed_opcode == opcode;
}
static int cmd_alloc_index_retry(struct mlx5_cmd *cmd)
{
unsigned long alloc_end = jiffies + msecs_to_jiffies(1000);
int idx;
retry:
idx = cmd_alloc_index(cmd);
if (idx < 0 && time_before(jiffies, alloc_end)) {
/* Index allocation can fail on heavy load of commands. This is a temporary
* situation as the current command already holds the semaphore, meaning that
* another command completion is being handled and it is expected to release
* the entry index soon.
*/
cpu_relax();
goto retry;
}
return idx;
}
bool mlx5_cmd_is_down(struct mlx5_core_dev *dev)
{
return pci_channel_offline(dev->pdev) ||
dev->cmd.state != MLX5_CMDIF_STATE_UP ||
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR;
}
static void cmd_work_handler(struct work_struct *work)
{
struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
......@@ -873,14 +926,14 @@ static void cmd_work_handler(struct work_struct *work)
sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
down(sem);
if (!ent->page_queue) {
alloc_ret = alloc_ent(cmd);
alloc_ret = cmd_alloc_index_retry(cmd);
if (alloc_ret < 0) {
mlx5_core_err_rl(dev, "failed to allocate command entry\n");
if (ent->callback) {
ent->callback(-EAGAIN, ent->context);
mlx5_free_cmd_msg(dev, ent->out);
free_msg(dev, ent->in);
free_cmd(ent);
cmd_ent_put(ent);
} else {
ent->ret = -EAGAIN;
complete(&ent->done);
......@@ -916,15 +969,12 @@ static void cmd_work_handler(struct work_struct *work)
ent->ts1 = ktime_get_ns();
cmd_mode = cmd->mode;
if (ent->callback)
schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
if (ent->callback && schedule_delayed_work(&ent->cb_timeout_work, cb_timeout))
cmd_ent_get(ent);
set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
/* Skip sending command to fw if internal error */
if (pci_channel_offline(dev->pdev) ||
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
cmd->state != MLX5_CMDIF_STATE_UP ||
!opcode_allowed(&dev->cmd, ent->op)) {
if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) {
u8 status = 0;
u32 drv_synd;
......@@ -933,13 +983,10 @@ static void cmd_work_handler(struct work_struct *work)
MLX5_SET(mbox_out, ent->out, syndrome, drv_synd);
mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
/* no doorbell, no need to keep the entry */
free_ent(cmd, ent->idx);
if (ent->callback)
free_cmd(ent);
return;
}
cmd_ent_get(ent); /* for the _real_ FW event on completion */
/* ring doorbell after the descriptor is valid */
mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
wmb();
......@@ -983,6 +1030,35 @@ static const char *deliv_status_to_str(u8 status)
}
}
enum {
MLX5_CMD_TIMEOUT_RECOVER_MSEC = 5 * 1000,
};
static void wait_func_handle_exec_timeout(struct mlx5_core_dev *dev,
struct mlx5_cmd_work_ent *ent)
{
unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_RECOVER_MSEC);
mlx5_cmd_eq_recover(dev);
/* Re-wait on the ent->done after executing the recovery flow. If the
* recovery flow (or any other recovery flow running simultaneously)
* has recovered an EQE, it should cause the entry to be completed by
* the command interface.
*/
if (wait_for_completion_timeout(&ent->done, timeout)) {
mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) recovered after timeout\n", ent->idx,
mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
return;
}
mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) No done completion\n", ent->idx,
mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
ent->ret = -ETIMEDOUT;
mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
}
static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
{
unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
......@@ -994,12 +1070,10 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
ent->ret = -ECANCELED;
goto out_err;
}
if (cmd->mode == CMD_MODE_POLLING || ent->polling) {
if (cmd->mode == CMD_MODE_POLLING || ent->polling)
wait_for_completion(&ent->done);
} else if (!wait_for_completion_timeout(&ent->done, timeout)) {
ent->ret = -ETIMEDOUT;
mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
}
else if (!wait_for_completion_timeout(&ent->done, timeout))
wait_func_handle_exec_timeout(dev, ent);
out_err:
err = ent->ret;
......@@ -1039,11 +1113,16 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
if (callback && page_queue)
return -EINVAL;
ent = alloc_cmd(cmd, in, out, uout, uout_size, callback, context,
page_queue);
ent = cmd_alloc_ent(cmd, in, out, uout, uout_size,
callback, context, page_queue);
if (IS_ERR(ent))
return PTR_ERR(ent);
/* put for this ent is when consumed, depending on the use case
* 1) (!callback) blocking flow: by caller after wait_func completes
* 2) (callback) flow: by mlx5_cmd_comp_handler() when ent is handled
*/
ent->token = token;
ent->polling = force_polling;
......@@ -1062,12 +1141,10 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
}
if (callback)
goto out;
goto out; /* mlx5_cmd_comp_handler() will put(ent) */
err = wait_func(dev, ent);
if (err == -ETIMEDOUT)
goto out;
if (err == -ECANCELED)
if (err == -ETIMEDOUT || err == -ECANCELED)
goto out_free;
ds = ent->ts2 - ent->ts1;
......@@ -1085,7 +1162,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
*status = ent->status;
out_free:
free_cmd(ent);
cmd_ent_put(ent);
out:
return err;
}
......@@ -1516,14 +1593,19 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
if (!forced) {
mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n",
ent->idx);
free_ent(cmd, ent->idx);
free_cmd(ent);
cmd_ent_put(ent);
}
continue;
}
if (ent->callback)
cancel_delayed_work(&ent->cb_timeout_work);
if (ent->callback && cancel_delayed_work(&ent->cb_timeout_work))
cmd_ent_put(ent); /* timeout work was canceled */
if (!forced || /* Real FW completion */
pci_channel_offline(dev->pdev) || /* FW is inaccessible */
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
cmd_ent_put(ent);
if (ent->page_queue)
sem = &cmd->pages_sem;
else
......@@ -1545,10 +1627,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
ent->ret, deliv_status_to_str(ent->status), ent->status);
}
/* only real completion will free the entry slot */
if (!forced)
free_ent(cmd, ent->idx);
if (ent->callback) {
ds = ent->ts2 - ent->ts1;
if (ent->op < MLX5_CMD_OP_MAX) {
......@@ -1576,10 +1654,13 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
free_msg(dev, ent->in);
err = err ? err : ent->status;
if (!forced)
free_cmd(ent);
/* final consumer is done, release ent */
cmd_ent_put(ent);
callback(err, context);
} else {
/* release wait_func() so mlx5_cmd_invoke()
* can make the final ent_put()
*/
complete(&ent->done);
}
up(sem);
......@@ -1589,8 +1670,11 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
{
struct mlx5_cmd *cmd = &dev->cmd;
unsigned long bitmask;
unsigned long flags;
u64 vector;
int i;
/* wait for pending handlers to complete */
mlx5_eq_synchronize_cmd_irq(dev);
......@@ -1599,11 +1683,20 @@ void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
if (!vector)
goto no_trig;
bitmask = vector;
/* we must increment the allocated entries refcount before triggering the completions
* to guarantee pending commands will not get freed in the meanwhile.
* For that reason, it also has to be done inside the alloc_lock.
*/
for_each_set_bit(i, &bitmask, (1 << cmd->log_sz))
cmd_ent_get(cmd->ent_arr[i]);
vector |= MLX5_TRIGGERED_CMD_COMP;
spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
mlx5_cmd_comp_handler(dev, vector, true);
for_each_set_bit(i, &bitmask, (1 << cmd->log_sz))
cmd_ent_put(cmd->ent_arr[i]);
return;
no_trig:
......@@ -1711,10 +1804,7 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
u8 token;
opcode = MLX5_GET(mbox_in, in, opcode);
if (pci_channel_offline(dev->pdev) ||
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
dev->cmd.state != MLX5_CMDIF_STATE_UP ||
!opcode_allowed(&dev->cmd, opcode)) {
if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode)) {
err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status);
MLX5_SET(mbox_out, out, status, status);
MLX5_SET(mbox_out, out, syndrome, drv_synd);
......
......@@ -91,7 +91,12 @@ struct page_pool;
#define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
#define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2)
#define MLX5E_REQUIRED_WQE_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8))
/* Add another page to MLX5E_REQUIRED_WQE_MTTS as a buffer between
* WQEs, This page will absorb write overflow by the hardware, when
* receiving packets larger than MTU. These oversize packets are
* dropped by the driver at a later stage.
*/
#define MLX5E_REQUIRED_WQE_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE + 1, 8))
#define MLX5E_LOG_ALIGNED_MPWQE_PPW (ilog2(MLX5E_REQUIRED_WQE_MTTS))
#define MLX5E_REQUIRED_MTTS(wqes) (wqes * MLX5E_REQUIRED_WQE_MTTS)
#define MLX5E_MAX_RQ_NUM_MTTS \
......@@ -617,6 +622,7 @@ struct mlx5e_rq {
u32 rqn;
struct mlx5_core_dev *mdev;
struct mlx5_core_mkey umr_mkey;
struct mlx5e_dma_info wqe_overflow;
/* XDP read-mostly */
struct xdp_rxq_info xdp_rxq;
......
......@@ -569,6 +569,9 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy)
if (fec_policy >= (1 << MLX5E_FEC_LLRS_272_257_1) && !fec_50g_per_lane)
return -EOPNOTSUPP;
if (fec_policy && !mlx5e_fec_in_caps(dev, fec_policy))
return -EOPNOTSUPP;
MLX5_SET(pplm_reg, in, local_port, 1);
err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
if (err)
......
......@@ -110,11 +110,25 @@ static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
rtnl_unlock();
}
struct neigh_update_work {
struct work_struct work;
struct neighbour *n;
struct mlx5e_neigh_hash_entry *nhe;
};
static void mlx5e_release_neigh_update_work(struct neigh_update_work *update_work)
{
neigh_release(update_work->n);
mlx5e_rep_neigh_entry_release(update_work->nhe);
kfree(update_work);
}
static void mlx5e_rep_neigh_update(struct work_struct *work)
{
struct mlx5e_neigh_hash_entry *nhe =
container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work);
struct neighbour *n = nhe->n;
struct neigh_update_work *update_work = container_of(work, struct neigh_update_work,
work);
struct mlx5e_neigh_hash_entry *nhe = update_work->nhe;
struct neighbour *n = update_work->n;
struct mlx5e_encap_entry *e;
unsigned char ha[ETH_ALEN];
struct mlx5e_priv *priv;
......@@ -146,30 +160,42 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
mlx5e_encap_put(priv, e);
}
mlx5e_rep_neigh_entry_release(nhe);
rtnl_unlock();
neigh_release(n);
mlx5e_release_neigh_update_work(update_work);
}
static void mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv,
struct mlx5e_neigh_hash_entry *nhe,
static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv *priv,
struct neighbour *n)
{
/* Take a reference to ensure the neighbour and mlx5 encap
* entry won't be destructed until we drop the reference in
* delayed work.
*/
neigh_hold(n);
struct neigh_update_work *update_work;
struct mlx5e_neigh_hash_entry *nhe;
struct mlx5e_neigh m_neigh = {};
/* This assignment is valid as long as the the neigh reference
* is taken
*/
nhe->n = n;
update_work = kzalloc(sizeof(*update_work), GFP_ATOMIC);
if (WARN_ON(!update_work))
return NULL;
if (!queue_work(priv->wq, &nhe->neigh_update_work)) {
mlx5e_rep_neigh_entry_release(nhe);
neigh_release(n);
m_neigh.dev = n->dev;
m_neigh.family = n->ops->family;
memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
/* Obtain reference to nhe as last step in order not to release it in
* atomic context.
*/
rcu_read_lock();
nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
rcu_read_unlock();
if (!nhe) {
kfree(update_work);
return NULL;
}
INIT_WORK(&update_work->work, mlx5e_rep_neigh_update);
neigh_hold(n);
update_work->n = n;
update_work->nhe = nhe;
return update_work;
}
static int mlx5e_rep_netevent_event(struct notifier_block *nb,
......@@ -181,7 +207,7 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb,
struct net_device *netdev = rpriv->netdev;
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5e_neigh_hash_entry *nhe = NULL;
struct mlx5e_neigh m_neigh = {};
struct neigh_update_work *update_work;
struct neigh_parms *p;
struct neighbour *n;
bool found = false;
......@@ -196,17 +222,11 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb,
#endif
return NOTIFY_DONE;
m_neigh.dev = n->dev;
m_neigh.family = n->ops->family;
memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
rcu_read_lock();
nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
rcu_read_unlock();
if (!nhe)
update_work = mlx5e_alloc_neigh_update_work(priv, n);
if (!update_work)
return NOTIFY_DONE;
mlx5e_rep_queue_neigh_update_work(priv, nhe, n);
queue_work(priv->wq, &update_work->work);
break;
case NETEVENT_DELAY_PROBE_TIME_UPDATE:
......@@ -352,7 +372,6 @@ int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
(*nhe)->priv = priv;
memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh));
INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update);
spin_lock_init(&(*nhe)->encap_list_lock);
INIT_LIST_HEAD(&(*nhe)->encap_list);
refcount_set(&(*nhe)->refcnt, 1);
......
......@@ -246,8 +246,10 @@ mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
ip6_offset /= 4;
if (ip6_offset < 8)
if (ip6_offset < 4)
tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
else if (ip6_offset < 8)
tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
else
return -EOPNOTSUPP;
break;
......
......@@ -217,6 +217,9 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
break;
}
if (WARN_ONCE(*rule_p, "VLAN rule already exists type %d", rule_type))
return 0;
*rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
if (IS_ERR(*rule_p)) {
......@@ -397,8 +400,7 @@ static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv)
for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID)
mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
if (priv->fs.vlan.cvlan_filter_disabled &&
!(priv->netdev->flags & IFF_PROMISC))
if (priv->fs.vlan.cvlan_filter_disabled)
mlx5e_add_any_vid_rules(priv);
}
......@@ -415,8 +417,12 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv)
for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID)
mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
if (priv->fs.vlan.cvlan_filter_disabled &&
!(priv->netdev->flags & IFF_PROMISC))
WARN_ON_ONCE(!(test_bit(MLX5E_STATE_DESTROYING, &priv->state)));
/* must be called after DESTROY bit is set and
* set_rx_mode is called and flushed
*/
if (priv->fs.vlan.cvlan_filter_disabled)
mlx5e_del_any_vid_rules(priv);
}
......
......@@ -246,12 +246,17 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
u64 npages, u8 page_shift,
struct mlx5_core_mkey *umr_mkey)
struct mlx5_core_mkey *umr_mkey,
dma_addr_t filler_addr)
{
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_mtt *mtt;
int inlen;
void *mkc;
u32 *in;
int err;
int i;
inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + sizeof(*mtt) * npages;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
......@@ -271,6 +276,18 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
MLX5_SET(mkc, mkc, translations_octword_size,
MLX5_MTT_OCTW(npages));
MLX5_SET(mkc, mkc, log_page_size, page_shift);
MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
MLX5_MTT_OCTW(npages));
/* Initialize the mkey with all MTTs pointing to a default
* page (filler_addr). When the channels are activated, UMR
* WQEs will redirect the RX WQEs to the actual memory from
* the RQ's pool, while the gaps (wqe_overflow) remain mapped
* to the default page.
*/
mtt = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
for (i = 0 ; i < npages ; i++)
mtt[i].ptag = cpu_to_be64(filler_addr);
err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
......@@ -282,7 +299,8 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq
{
u64 num_mtts = MLX5E_REQUIRED_MTTS(mlx5_wq_ll_get_size(&rq->mpwqe.wq));
return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey);
return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey,
rq->wqe_overflow.addr);
}
static inline u64 mlx5e_get_mpwqe_offset(struct mlx5e_rq *rq, u16 wqe_ix)
......@@ -350,6 +368,28 @@ static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work)
mlx5e_reporter_rq_cqe_err(rq);
}
static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
{
rq->wqe_overflow.page = alloc_page(GFP_KERNEL);
if (!rq->wqe_overflow.page)
return -ENOMEM;
rq->wqe_overflow.addr = dma_map_page(rq->pdev, rq->wqe_overflow.page, 0,
PAGE_SIZE, rq->buff.map_dir);
if (dma_mapping_error(rq->pdev, rq->wqe_overflow.addr)) {
__free_page(rq->wqe_overflow.page);
return -ENOMEM;
}
return 0;
}
static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
{
dma_unmap_page(rq->pdev, rq->wqe_overflow.addr, PAGE_SIZE,
rq->buff.map_dir);
__free_page(rq->wqe_overflow.page);
}
static int mlx5e_alloc_rq(struct mlx5e_channel *c,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
......@@ -396,7 +436,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
rq_xdp_ix += params->num_channels * MLX5E_RQ_GROUP_XSK;
err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix);
if (err < 0)
goto err_rq_wq_destroy;
goto err_rq_xdp_prog;
rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk);
......@@ -406,6 +446,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq,
&rq->wq_ctrl);
if (err)
goto err_rq_xdp;
err = mlx5e_alloc_mpwqe_rq_drop_page(rq);
if (err)
goto err_rq_wq_destroy;
......@@ -424,18 +468,18 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
err = mlx5e_create_rq_umr_mkey(mdev, rq);
if (err)
goto err_rq_wq_destroy;
goto err_rq_drop_page;
rq->mkey_be = cpu_to_be32(rq->umr_mkey.key);
err = mlx5e_rq_alloc_mpwqe_info(rq, c);
if (err)
goto err_free;
goto err_rq_mkey;
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq,
&rq->wq_ctrl);
if (err)
goto err_rq_wq_destroy;
goto err_rq_xdp;
rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR];
......@@ -450,19 +494,19 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
GFP_KERNEL, cpu_to_node(c->cpu));
if (!rq->wqe.frags) {
err = -ENOMEM;
goto err_free;
goto err_rq_wq_destroy;
}
err = mlx5e_init_di_list(rq, wq_sz, c->cpu);
if (err)
goto err_free;
goto err_rq_frags;
rq->mkey_be = c->mkey_be;
}
err = mlx5e_rq_set_handlers(rq, params, xsk);
if (err)
goto err_free;
goto err_free_by_rq_type;
if (xsk) {
err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
......@@ -486,13 +530,13 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
if (IS_ERR(rq->page_pool)) {
err = PTR_ERR(rq->page_pool);
rq->page_pool = NULL;
goto err_free;
goto err_free_by_rq_type;
}
err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
MEM_TYPE_PAGE_POOL, rq->page_pool);
}
if (err)
goto err_free;
goto err_free_by_rq_type;
for (i = 0; i < wq_sz; i++) {
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
......@@ -542,23 +586,27 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
return 0;
err_free:
err_free_by_rq_type:
switch (rq->wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
kvfree(rq->mpwqe.info);
err_rq_mkey:
mlx5_core_destroy_mkey(mdev, &rq->umr_mkey);
err_rq_drop_page:
mlx5e_free_mpwqe_rq_drop_page(rq);
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
kvfree(rq->wqe.frags);
mlx5e_free_di_list(rq);
err_rq_frags:
kvfree(rq->wqe.frags);
}
err_rq_wq_destroy:
mlx5_wq_destroy(&rq->wq_ctrl);
err_rq_xdp:
xdp_rxq_info_unreg(&rq->xdp_rxq);
err_rq_xdp_prog:
if (params->xdp_prog)
bpf_prog_put(params->xdp_prog);
xdp_rxq_info_unreg(&rq->xdp_rxq);
page_pool_destroy(rq->page_pool);
mlx5_wq_destroy(&rq->wq_ctrl);
return err;
}
......@@ -580,6 +628,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
kvfree(rq->mpwqe.info);
mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey);
mlx5e_free_mpwqe_rq_drop_page(rq);
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
kvfree(rq->wqe.frags);
......@@ -4177,6 +4226,21 @@ int mlx5e_get_vf_stats(struct net_device *dev,
}
#endif
static bool mlx5e_gre_tunnel_inner_proto_offload_supported(struct mlx5_core_dev *mdev,
struct sk_buff *skb)
{
switch (skb->inner_protocol) {
case htons(ETH_P_IP):
case htons(ETH_P_IPV6):
case htons(ETH_P_TEB):
return true;
case htons(ETH_P_MPLS_UC):
case htons(ETH_P_MPLS_MC):
return MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre);
}
return false;
}
static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
struct sk_buff *skb,
netdev_features_t features)
......@@ -4199,7 +4263,9 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
switch (proto) {
case IPPROTO_GRE:
if (mlx5e_gre_tunnel_inner_proto_offload_supported(priv->mdev, skb))
return features;
break;
case IPPROTO_IPIP:
case IPPROTO_IPV6:
if (mlx5e_tunnel_proto_supported(priv->mdev, IPPROTO_IPIP))
......
......@@ -135,12 +135,6 @@ struct mlx5e_neigh_hash_entry {
/* encap list sharing the same neigh */
struct list_head encap_list;
/* valid only when the neigh reference is taken during
* neigh_update_work workqueue callback.
*/
struct neighbour *n;
struct work_struct neigh_update_work;
/* neigh hash entry can be deleted only when the refcount is zero.
* refcount is needed to avoid neigh hash entry removal by TC, while
* it's used by the neigh notification call.
......
......@@ -189,6 +189,29 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq)
return count_eqe;
}
static void mlx5_eq_async_int_lock(struct mlx5_eq_async *eq, unsigned long *flags)
__acquires(&eq->lock)
{
if (in_irq())
spin_lock(&eq->lock);
else
spin_lock_irqsave(&eq->lock, *flags);
}
static void mlx5_eq_async_int_unlock(struct mlx5_eq_async *eq, unsigned long *flags)
__releases(&eq->lock)
{
if (in_irq())
spin_unlock(&eq->lock);
else
spin_unlock_irqrestore(&eq->lock, *flags);
}
enum async_eq_nb_action {
ASYNC_EQ_IRQ_HANDLER = 0,
ASYNC_EQ_RECOVER = 1,
};
static int mlx5_eq_async_int(struct notifier_block *nb,
unsigned long action, void *data)
{
......@@ -198,11 +221,14 @@ static int mlx5_eq_async_int(struct notifier_block *nb,
struct mlx5_eq_table *eqt;
struct mlx5_core_dev *dev;
struct mlx5_eqe *eqe;
unsigned long flags;
int num_eqes = 0;
dev = eq->dev;
eqt = dev->priv.eq_table;
mlx5_eq_async_int_lock(eq_async, &flags);
eqe = next_eqe_sw(eq);
if (!eqe)
goto out;
......@@ -223,8 +249,19 @@ static int mlx5_eq_async_int(struct notifier_block *nb,
out:
eq_update_ci(eq, 1);
mlx5_eq_async_int_unlock(eq_async, &flags);
return 0;
return unlikely(action == ASYNC_EQ_RECOVER) ? num_eqes : 0;
}
void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev)
{
struct mlx5_eq_async *eq = &dev->priv.eq_table->cmd_eq;
int eqes;
eqes = mlx5_eq_async_int(&eq->irq_nb, ASYNC_EQ_RECOVER, NULL);
if (eqes)
mlx5_core_warn(dev, "Recovered %d EQEs on cmd_eq\n", eqes);
}
static void init_eq_buf(struct mlx5_eq *eq)
......@@ -569,6 +606,7 @@ setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq,
int err;
eq->irq_nb.notifier_call = mlx5_eq_async_int;
spin_lock_init(&eq->lock);
err = create_async_eq(dev, &eq->core, param);
if (err) {
......@@ -656,8 +694,10 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev)
cleanup_async_eq(dev, &table->pages_eq, "pages");
cleanup_async_eq(dev, &table->async_eq, "async");
mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_DESTROY_EQ);
mlx5_cmd_use_polling(dev);
cleanup_async_eq(dev, &table->cmd_eq, "cmd");
mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
}
......
......@@ -37,6 +37,7 @@ struct mlx5_eq {
struct mlx5_eq_async {
struct mlx5_eq core;
struct notifier_block irq_nb;
spinlock_t lock; /* To avoid irq EQ handle races with resiliency flows */
};
struct mlx5_eq_comp {
......@@ -81,6 +82,7 @@ void mlx5_cq_tasklet_cb(unsigned long data);
struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix);
u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq);
void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev);
void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev);
void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev);
......
......@@ -432,7 +432,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
u32 npages;
u32 i = 0;
if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
if (!mlx5_cmd_is_down(dev))
return mlx5_cmd_exec(dev, in, in_size, out, out_size);
/* No hard feelings, we want our pages back! */
......
......@@ -115,7 +115,7 @@ static int request_irqs(struct mlx5_core_dev *dev, int nvec)
return 0;
err_request_irq:
for (; i >= 0; i--) {
while (i--) {
struct mlx5_irq *irq = mlx5_irq_get(dev, i);
int irqn = pci_irq_vector(dev->pdev, i);
......
......@@ -767,6 +767,8 @@ struct mlx5_cmd_work_ent {
u64 ts2;
u16 op;
bool polling;
/* Track the max comp handlers */
refcount_t refcnt;
};
struct mlx5_pas {
......@@ -933,6 +935,7 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
void *out, int out_size);
void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome);
bool mlx5_cmd_is_down(struct mlx5_core_dev *dev);
int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment