Commit 5d6b0cb3 authored by Denis Drozdov's avatar Denis Drozdov Committed by Saeed Mahameed

RDMA/netdev: Fix netlink support in IPoIB

IPoIB netlink support was broken by the below commit since integrating
the rdma_netdev support relies on an allocation flow for netdevs that
was controlled by the ipoib driver while netdev's rtnl_newlink
implementation assumes that the netdev will be allocated by netlink.
Such situation leads to crash in __ipoib_device_add, once trying to
reuse netlink device.

This patch fixes the kernel oops for both mlx4 and mlx5
devices triggered by the following command:

Fixes: cd565b4b ("IB/IPoIB: Support acceleration options callbacks")
Signed-off-by: default avatarDenis Drozdov <denisd@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
Signed-off-by: default avatarFeras Daoud <ferasda@mellanox.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
parent f6a8a19b
...@@ -2643,13 +2643,27 @@ struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num, ...@@ -2643,13 +2643,27 @@ struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
if (!netdev) if (!netdev)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
rc = params.initialize_rdma_netdev(device, port_num, netdev,
params.param);
if (rc) {
free_netdev(netdev);
return ERR_PTR(rc);
}
return netdev; return netdev;
} }
EXPORT_SYMBOL(rdma_alloc_netdev); EXPORT_SYMBOL(rdma_alloc_netdev);
int rdma_init_netdev(struct ib_device *device, u8 port_num,
enum rdma_netdev_t type, const char *name,
unsigned char name_assign_type,
void (*setup)(struct net_device *),
struct net_device *netdev)
{
struct rdma_netdev_alloc_params params;
int rc;
if (!device->rdma_netdev_get_params)
return -EOPNOTSUPP;
rc = device->rdma_netdev_get_params(device, port_num, type, &params);
if (rc)
return rc;
return params.initialize_rdma_netdev(device, port_num,
netdev, params.param);
}
EXPORT_SYMBOL(rdma_init_netdev);
...@@ -499,8 +499,10 @@ void ipoib_reap_ah(struct work_struct *work); ...@@ -499,8 +499,10 @@ void ipoib_reap_ah(struct work_struct *work);
struct ipoib_path *__path_find(struct net_device *dev, void *gid); struct ipoib_path *__path_find(struct net_device *dev, void *gid);
void ipoib_mark_paths_invalid(struct net_device *dev); void ipoib_mark_paths_invalid(struct net_device *dev);
void ipoib_flush_paths(struct net_device *dev); void ipoib_flush_paths(struct net_device *dev);
struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port, struct net_device *ipoib_intf_alloc(struct ib_device *hca, u8 port,
const char *format); const char *format);
int ipoib_intf_init(struct ib_device *hca, u8 port, const char *format,
struct net_device *dev);
void ipoib_ib_tx_timer_func(struct timer_list *t); void ipoib_ib_tx_timer_func(struct timer_list *t);
void ipoib_ib_dev_flush_light(struct work_struct *work); void ipoib_ib_dev_flush_light(struct work_struct *work);
void ipoib_ib_dev_flush_normal(struct work_struct *work); void ipoib_ib_dev_flush_normal(struct work_struct *work);
...@@ -531,6 +533,8 @@ int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req); ...@@ -531,6 +533,8 @@ int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req);
void ipoib_dma_unmap_tx(struct ipoib_dev_priv *priv, void ipoib_dma_unmap_tx(struct ipoib_dev_priv *priv,
struct ipoib_tx_buf *tx_req); struct ipoib_tx_buf *tx_req);
struct rtnl_link_ops *ipoib_get_link_ops(void);
static inline void ipoib_build_sge(struct ipoib_dev_priv *priv, static inline void ipoib_build_sge(struct ipoib_dev_priv *priv,
struct ipoib_tx_buf *tx_req) struct ipoib_tx_buf *tx_req)
{ {
......
...@@ -2115,77 +2115,58 @@ static const struct net_device_ops ipoib_netdev_default_pf = { ...@@ -2115,77 +2115,58 @@ static const struct net_device_ops ipoib_netdev_default_pf = {
.ndo_stop = ipoib_ib_dev_stop_default, .ndo_stop = ipoib_ib_dev_stop_default,
}; };
static struct net_device static struct net_device *ipoib_alloc_netdev(struct ib_device *hca, u8 port,
*ipoib_create_netdev_default(struct ib_device *hca, const char *name)
const char *name,
unsigned char name_assign_type,
void (*setup)(struct net_device *))
{
struct net_device *dev;
struct rdma_netdev *rn;
dev = alloc_netdev((int)sizeof(struct rdma_netdev),
name,
name_assign_type, setup);
if (!dev)
return NULL;
rn = netdev_priv(dev);
rn->send = ipoib_send;
rn->attach_mcast = ipoib_mcast_attach;
rn->detach_mcast = ipoib_mcast_detach;
rn->hca = hca;
dev->netdev_ops = &ipoib_netdev_default_pf;
return dev;
}
static struct net_device *ipoib_get_netdev(struct ib_device *hca, u8 port,
const char *name)
{ {
struct net_device *dev; struct net_device *dev;
dev = rdma_alloc_netdev(hca, port, RDMA_NETDEV_IPOIB, name, dev = rdma_alloc_netdev(hca, port, RDMA_NETDEV_IPOIB, name,
NET_NAME_UNKNOWN, ipoib_setup_common); NET_NAME_UNKNOWN, ipoib_setup_common);
if (!IS_ERR(dev)) if (!IS_ERR(dev) || PTR_ERR(dev) != -EOPNOTSUPP)
return dev; return dev;
if (PTR_ERR(dev) != -EOPNOTSUPP)
return NULL;
return ipoib_create_netdev_default(hca, name, NET_NAME_UNKNOWN, dev = alloc_netdev(sizeof(struct rdma_netdev), name, NET_NAME_UNKNOWN,
ipoib_setup_common); ipoib_setup_common);
if (!dev)
return ERR_PTR(-ENOMEM);
return dev;
} }
struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port, int ipoib_intf_init(struct ib_device *hca, u8 port, const char *name,
const char *name) struct net_device *dev)
{ {
struct net_device *dev; struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_dev_priv *priv; struct ipoib_dev_priv *priv;
struct rdma_netdev *rn; int rc;
priv = kzalloc(sizeof(*priv), GFP_KERNEL); priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv) if (!priv)
return NULL; return -ENOMEM;
priv->ca = hca; priv->ca = hca;
priv->port = port; priv->port = port;
dev = ipoib_get_netdev(hca, port, name); rc = rdma_init_netdev(hca, port, RDMA_NETDEV_IPOIB, name,
if (!dev) NET_NAME_UNKNOWN, ipoib_setup_common, dev);
goto free_priv; if (rc) {
if (rc != -EOPNOTSUPP)
goto out;
dev->netdev_ops = &ipoib_netdev_default_pf;
rn->send = ipoib_send;
rn->attach_mcast = ipoib_mcast_attach;
rn->detach_mcast = ipoib_mcast_detach;
rn->hca = hca;
}
priv->rn_ops = dev->netdev_ops; priv->rn_ops = dev->netdev_ops;
/* fixme : should be after the query_cap */ if (hca->attrs.device_cap_flags & IB_DEVICE_VIRTUAL_FUNCTION)
if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION)
dev->netdev_ops = &ipoib_netdev_ops_vf; dev->netdev_ops = &ipoib_netdev_ops_vf;
else else
dev->netdev_ops = &ipoib_netdev_ops_pf; dev->netdev_ops = &ipoib_netdev_ops_pf;
rn = netdev_priv(dev);
rn->clnt_priv = priv; rn->clnt_priv = priv;
/* /*
* Only the child register_netdev flows can handle priv_destructor * Only the child register_netdev flows can handle priv_destructor
* being set, so we force it to NULL here and handle manually until it * being set, so we force it to NULL here and handle manually until it
...@@ -2196,10 +2177,35 @@ struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port, ...@@ -2196,10 +2177,35 @@ struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
ipoib_build_priv(dev); ipoib_build_priv(dev);
return priv; return 0;
free_priv:
out:
kfree(priv); kfree(priv);
return NULL; return rc;
}
struct net_device *ipoib_intf_alloc(struct ib_device *hca, u8 port,
const char *name)
{
struct net_device *dev;
int rc;
dev = ipoib_alloc_netdev(hca, port, name);
if (IS_ERR(dev))
return dev;
rc = ipoib_intf_init(hca, port, name, dev);
if (rc) {
free_netdev(dev);
return ERR_PTR(rc);
}
/*
* Upon success the caller must ensure ipoib_intf_free is called or
* register_netdevice succeed'd and priv_destructor is set to
* ipoib_intf_free.
*/
return dev;
} }
void ipoib_intf_free(struct net_device *dev) void ipoib_intf_free(struct net_device *dev)
...@@ -2382,16 +2388,19 @@ int ipoib_add_pkey_attr(struct net_device *dev) ...@@ -2382,16 +2388,19 @@ int ipoib_add_pkey_attr(struct net_device *dev)
static struct net_device *ipoib_add_port(const char *format, static struct net_device *ipoib_add_port(const char *format,
struct ib_device *hca, u8 port) struct ib_device *hca, u8 port)
{ {
struct rtnl_link_ops *ops = ipoib_get_link_ops();
struct rdma_netdev_alloc_params params;
struct ipoib_dev_priv *priv; struct ipoib_dev_priv *priv;
struct net_device *ndev; struct net_device *ndev;
int result; int result;
priv = ipoib_intf_alloc(hca, port, format); ndev = ipoib_intf_alloc(hca, port, format);
if (!priv) { if (IS_ERR(ndev)) {
pr_warn("%s, %d: ipoib_intf_alloc failed\n", hca->name, port); pr_warn("%s, %d: ipoib_intf_alloc failed %ld\n", hca->name, port,
return ERR_PTR(-ENOMEM); PTR_ERR(ndev));
return ndev;
} }
ndev = priv->dev; priv = ipoib_priv(ndev);
INIT_IB_EVENT_HANDLER(&priv->event_handler, INIT_IB_EVENT_HANDLER(&priv->event_handler,
priv->ca, ipoib_event); priv->ca, ipoib_event);
...@@ -2412,6 +2421,14 @@ static struct net_device *ipoib_add_port(const char *format, ...@@ -2412,6 +2421,14 @@ static struct net_device *ipoib_add_port(const char *format,
return ERR_PTR(result); return ERR_PTR(result);
} }
if (hca->rdma_netdev_get_params) {
int rc = hca->rdma_netdev_get_params(hca, port,
RDMA_NETDEV_IPOIB,
&params);
if (!rc && ops->priv_size < params.sizeof_priv)
ops->priv_size = params.sizeof_priv;
}
/* /*
* We cannot set priv_destructor before register_netdev because we * We cannot set priv_destructor before register_netdev because we
* need priv to be always valid during the error flow to execute * need priv to be always valid during the error flow to execute
......
...@@ -122,12 +122,26 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev, ...@@ -122,12 +122,26 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
} else } else
child_pkey = nla_get_u16(data[IFLA_IPOIB_PKEY]); child_pkey = nla_get_u16(data[IFLA_IPOIB_PKEY]);
err = ipoib_intf_init(ppriv->ca, ppriv->port, dev->name, dev);
if (err) {
ipoib_warn(ppriv, "failed to initialize pkey device\n");
return err;
}
err = __ipoib_vlan_add(ppriv, ipoib_priv(dev), err = __ipoib_vlan_add(ppriv, ipoib_priv(dev),
child_pkey, IPOIB_RTNL_CHILD); child_pkey, IPOIB_RTNL_CHILD);
if (err)
return err;
if (!err && data) if (data) {
err = ipoib_changelink(dev, tb, data, extack); err = ipoib_changelink(dev, tb, data, extack);
return err; if (err) {
unregister_netdevice(dev);
return err;
}
}
return 0;
} }
static size_t ipoib_get_size(const struct net_device *dev) static size_t ipoib_get_size(const struct net_device *dev)
...@@ -149,6 +163,11 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = { ...@@ -149,6 +163,11 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
.fill_info = ipoib_fill_info, .fill_info = ipoib_fill_info,
}; };
struct rtnl_link_ops *ipoib_get_link_ops(void)
{
return &ipoib_link_ops;
}
int __init ipoib_netlink_init(void) int __init ipoib_netlink_init(void)
{ {
return rtnl_link_register(&ipoib_link_ops); return rtnl_link_register(&ipoib_link_ops);
......
...@@ -85,7 +85,7 @@ static bool is_child_unique(struct ipoib_dev_priv *ppriv, ...@@ -85,7 +85,7 @@ static bool is_child_unique(struct ipoib_dev_priv *ppriv,
/* /*
* NOTE: If this function fails then the priv->dev will remain valid, however * NOTE: If this function fails then the priv->dev will remain valid, however
* priv can have been freed and must not be touched by caller in the error * priv will have been freed and must not be touched by caller in the error
* case. * case.
* *
* If (ndev->reg_state == NETREG_UNINITIALIZED) then it is up to the caller to * If (ndev->reg_state == NETREG_UNINITIALIZED) then it is up to the caller to
...@@ -100,6 +100,12 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, ...@@ -100,6 +100,12 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
ASSERT_RTNL(); ASSERT_RTNL();
/*
* We do not need to touch priv if register_netdevice fails, so just
* always use this flow.
*/
ndev->priv_destructor = ipoib_intf_free;
/* /*
* Racing with unregister of the parent must be prevented by the * Racing with unregister of the parent must be prevented by the
* caller. * caller.
...@@ -120,9 +126,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, ...@@ -120,9 +126,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
goto out_early; goto out_early;
} }
/* We do not need to touch priv if register_netdevice fails */
ndev->priv_destructor = ipoib_intf_free;
result = register_netdevice(ndev); result = register_netdevice(ndev);
if (result) { if (result) {
ipoib_warn(priv, "failed to initialize; error %i", result); ipoib_warn(priv, "failed to initialize; error %i", result);
...@@ -182,12 +185,12 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) ...@@ -182,12 +185,12 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
snprintf(intf_name, sizeof(intf_name), "%s.%04x", snprintf(intf_name, sizeof(intf_name), "%s.%04x",
ppriv->dev->name, pkey); ppriv->dev->name, pkey);
priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name); ndev = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
if (!priv) { if (IS_ERR(ndev)) {
result = -ENOMEM; result = PTR_ERR(ndev);
goto out; goto out;
} }
ndev = priv->dev; priv = ipoib_priv(ndev);
result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD); result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD);
......
...@@ -4198,4 +4198,11 @@ struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num, ...@@ -4198,4 +4198,11 @@ struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
enum rdma_netdev_t type, const char *name, enum rdma_netdev_t type, const char *name,
unsigned char name_assign_type, unsigned char name_assign_type,
void (*setup)(struct net_device *)); void (*setup)(struct net_device *));
int rdma_init_netdev(struct ib_device *device, u8 port_num,
enum rdma_netdev_t type, const char *name,
unsigned char name_assign_type,
void (*setup)(struct net_device *),
struct net_device *netdev);
#endif /* IB_VERBS_H */ #endif /* IB_VERBS_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment