Commit aae425ef authored by Jan Sokolowski's avatar Jan Sokolowski Committed by David S. Miller

i40e: Fix DMA mappings leak

During reallocation of RX buffers, new DMA mappings are created for
those buffers.

steps for reproduction:
while :
do
for ((i=0; i<=8160; i=i+32))
do
ethtool -G enp130s0f0 rx $i tx $i
sleep 0.5
ethtool -g enp130s0f0
done
done

This resulted in crash:
i40e 0000:01:00.1: Unable to allocate memory for the Rx descriptor ring, size=65536
Driver BUG
WARNING: CPU: 0 PID: 4300 at net/core/xdp.c:141 xdp_rxq_info_unreg+0x43/0x50
Call Trace:
i40e_free_rx_resources+0x70/0x80 [i40e]
i40e_set_ringparam+0x27c/0x800 [i40e]
ethnl_set_rings+0x1b2/0x290
genl_family_rcv_msg_doit.isra.15+0x10f/0x150
genl_family_rcv_msg+0xb3/0x160
? rings_fill_reply+0x1a0/0x1a0
genl_rcv_msg+0x47/0x90
? genl_family_rcv_msg+0x160/0x160
netlink_rcv_skb+0x4c/0x120
genl_rcv+0x24/0x40
netlink_unicast+0x196/0x230
netlink_sendmsg+0x204/0x3d0
sock_sendmsg+0x4c/0x50
__sys_sendto+0xee/0x160
? handle_mm_fault+0xbe/0x1e0
? syscall_trace_enter+0x1d3/0x2c0
__x64_sys_sendto+0x24/0x30
do_syscall_64+0x5b/0x1a0
entry_SYSCALL_64_after_hwframe+0x65/0xca
RIP: 0033:0x7f5eac8b035b
Missing register, driver bug
WARNING: CPU: 0 PID: 4300 at net/core/xdp.c:119 xdp_rxq_info_unreg_mem_model+0x69/0x140
Call Trace:
xdp_rxq_info_unreg+0x1e/0x50
i40e_free_rx_resources+0x70/0x80 [i40e]
i40e_set_ringparam+0x27c/0x800 [i40e]
ethnl_set_rings+0x1b2/0x290
genl_family_rcv_msg_doit.isra.15+0x10f/0x150
genl_family_rcv_msg+0xb3/0x160
? rings_fill_reply+0x1a0/0x1a0
genl_rcv_msg+0x47/0x90
? genl_family_rcv_msg+0x160/0x160
netlink_rcv_skb+0x4c/0x120
genl_rcv+0x24/0x40
netlink_unicast+0x196/0x230
netlink_sendmsg+0x204/0x3d0
sock_sendmsg+0x4c/0x50
__sys_sendto+0xee/0x160
? handle_mm_fault+0xbe/0x1e0
? syscall_trace_enter+0x1d3/0x2c0
__x64_sys_sendto+0x24/0x30
do_syscall_64+0x5b/0x1a0
entry_SYSCALL_64_after_hwframe+0x65/0xca
RIP: 0033:0x7f5eac8b035b

This was caused because of new buffers with different RX ring count should
substitute older ones, but those buffers were freed in
i40e_configure_rx_ring and reallocated again with i40e_alloc_rx_bi,
thus kfree on rx_bi caused leak of already mapped DMA.

Fix this by reallocating ZC with rx_bi_zc struct when BPF program loads. Additionally
reallocate back to rx_bi when BPF program unloads.

If BPF program is loaded/unloaded and XSK pools are created, reallocate
RX queues accordingly in XSP_SETUP_XSK_POOL handler.

Fixes: be1222b5 ("i40e: Separate kernel allocated rx_bi rings from AF_XDP rings")
Signed-off-by: default avatarJan Sokolowski <jan.sokolowski@intel.com>
Signed-off-by: default avatarMateusz Palczewski <mateusz.palczewski@intel.com>
Signed-off-by: default avatarJacob Keller <jacob.e.keller@intel.com>
Tested-by: Chandan <chandanx.rout@intel.com> (A Contingent Worker at Intel)
Tested-by: Gurucharan <gurucharanx.g@intel.com> (A Contingent worker at Intel)
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 0d4636f7
...@@ -2181,9 +2181,6 @@ static int i40e_set_ringparam(struct net_device *netdev, ...@@ -2181,9 +2181,6 @@ static int i40e_set_ringparam(struct net_device *netdev,
*/ */
rx_rings[i].tail = hw->hw_addr + I40E_PRTGEN_STATUS; rx_rings[i].tail = hw->hw_addr + I40E_PRTGEN_STATUS;
err = i40e_setup_rx_descriptors(&rx_rings[i]); err = i40e_setup_rx_descriptors(&rx_rings[i]);
if (err)
goto rx_unwind;
err = i40e_alloc_rx_bi(&rx_rings[i]);
if (err) if (err)
goto rx_unwind; goto rx_unwind;
......
...@@ -3566,12 +3566,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) ...@@ -3566,12 +3566,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
if (ring->vsi->type == I40E_VSI_MAIN) if (ring->vsi->type == I40E_VSI_MAIN)
xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
kfree(ring->rx_bi);
ring->xsk_pool = i40e_xsk_pool(ring); ring->xsk_pool = i40e_xsk_pool(ring);
if (ring->xsk_pool) { if (ring->xsk_pool) {
ret = i40e_alloc_rx_bi_zc(ring);
if (ret)
return ret;
ring->rx_buf_len = ring->rx_buf_len =
xsk_pool_get_rx_frame_size(ring->xsk_pool); xsk_pool_get_rx_frame_size(ring->xsk_pool);
/* For AF_XDP ZC, we disallow packets to span on /* For AF_XDP ZC, we disallow packets to span on
...@@ -3589,9 +3585,6 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) ...@@ -3589,9 +3585,6 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
ring->queue_index); ring->queue_index);
} else { } else {
ret = i40e_alloc_rx_bi(ring);
if (ret)
return ret;
ring->rx_buf_len = vsi->rx_buf_len; ring->rx_buf_len = vsi->rx_buf_len;
if (ring->vsi->type == I40E_VSI_MAIN) { if (ring->vsi->type == I40E_VSI_MAIN) {
ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
...@@ -13296,6 +13289,14 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, ...@@ -13296,6 +13289,14 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
i40e_reset_and_rebuild(pf, true, true); i40e_reset_and_rebuild(pf, true, true);
} }
if (!i40e_enabled_xdp_vsi(vsi) && prog) {
if (i40e_realloc_rx_bi_zc(vsi, true))
return -ENOMEM;
} else if (i40e_enabled_xdp_vsi(vsi) && !prog) {
if (i40e_realloc_rx_bi_zc(vsi, false))
return -ENOMEM;
}
for (i = 0; i < vsi->num_queue_pairs; i++) for (i = 0; i < vsi->num_queue_pairs; i++)
WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog); WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
...@@ -13528,6 +13529,7 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair) ...@@ -13528,6 +13529,7 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair)
i40e_queue_pair_disable_irq(vsi, queue_pair); i40e_queue_pair_disable_irq(vsi, queue_pair);
err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */); err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */);
i40e_clean_rx_ring(vsi->rx_rings[queue_pair]);
i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */);
i40e_queue_pair_clean_rings(vsi, queue_pair); i40e_queue_pair_clean_rings(vsi, queue_pair);
i40e_queue_pair_reset_stats(vsi, queue_pair); i40e_queue_pair_reset_stats(vsi, queue_pair);
......
...@@ -1457,14 +1457,6 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring) ...@@ -1457,14 +1457,6 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
return -ENOMEM; return -ENOMEM;
} }
int i40e_alloc_rx_bi(struct i40e_ring *rx_ring)
{
unsigned long sz = sizeof(*rx_ring->rx_bi) * rx_ring->count;
rx_ring->rx_bi = kzalloc(sz, GFP_KERNEL);
return rx_ring->rx_bi ? 0 : -ENOMEM;
}
static void i40e_clear_rx_bi(struct i40e_ring *rx_ring) static void i40e_clear_rx_bi(struct i40e_ring *rx_ring)
{ {
memset(rx_ring->rx_bi, 0, sizeof(*rx_ring->rx_bi) * rx_ring->count); memset(rx_ring->rx_bi, 0, sizeof(*rx_ring->rx_bi) * rx_ring->count);
...@@ -1593,6 +1585,11 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring) ...@@ -1593,6 +1585,11 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
rx_ring->xdp_prog = rx_ring->vsi->xdp_prog; rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
rx_ring->rx_bi =
kcalloc(rx_ring->count, sizeof(*rx_ring->rx_bi), GFP_KERNEL);
if (!rx_ring->rx_bi)
return -ENOMEM;
return 0; return 0;
} }
......
...@@ -469,7 +469,6 @@ int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size); ...@@ -469,7 +469,6 @@ int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb); bool __i40e_chk_linearize(struct sk_buff *skb);
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags); u32 flags);
int i40e_alloc_rx_bi(struct i40e_ring *rx_ring);
/** /**
* i40e_get_head - Retrieve head from head writeback * i40e_get_head - Retrieve head from head writeback
......
...@@ -10,14 +10,6 @@ ...@@ -10,14 +10,6 @@
#include "i40e_txrx_common.h" #include "i40e_txrx_common.h"
#include "i40e_xsk.h" #include "i40e_xsk.h"
int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring)
{
unsigned long sz = sizeof(*rx_ring->rx_bi_zc) * rx_ring->count;
rx_ring->rx_bi_zc = kzalloc(sz, GFP_KERNEL);
return rx_ring->rx_bi_zc ? 0 : -ENOMEM;
}
void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring) void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring)
{ {
memset(rx_ring->rx_bi_zc, 0, memset(rx_ring->rx_bi_zc, 0,
...@@ -29,6 +21,58 @@ static struct xdp_buff **i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx) ...@@ -29,6 +21,58 @@ static struct xdp_buff **i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx)
return &rx_ring->rx_bi_zc[idx]; return &rx_ring->rx_bi_zc[idx];
} }
/**
* i40e_realloc_rx_xdp_bi - reallocate SW ring for either XSK or normal buffer
* @rx_ring: Current rx ring
* @pool_present: is pool for XSK present
*
* Try allocating memory and return ENOMEM, if failed to allocate.
* If allocation was successful, substitute buffer with allocated one.
* Returns 0 on success, negative on failure
*/
static int i40e_realloc_rx_xdp_bi(struct i40e_ring *rx_ring, bool pool_present)
{
size_t elem_size = pool_present ? sizeof(*rx_ring->rx_bi_zc) :
sizeof(*rx_ring->rx_bi);
void *sw_ring = kcalloc(rx_ring->count, elem_size, GFP_KERNEL);
if (!sw_ring)
return -ENOMEM;
if (pool_present) {
kfree(rx_ring->rx_bi);
rx_ring->rx_bi = NULL;
rx_ring->rx_bi_zc = sw_ring;
} else {
kfree(rx_ring->rx_bi_zc);
rx_ring->rx_bi_zc = NULL;
rx_ring->rx_bi = sw_ring;
}
return 0;
}
/**
* i40e_realloc_rx_bi_zc - reallocate rx SW rings
* @vsi: Current VSI
* @zc: is zero copy set
*
* Reallocate buffer for rx_rings that might be used by XSK.
* XDP requires more memory, than rx_buf provides.
* Returns 0 on success, negative on failure
*/
int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc)
{
struct i40e_ring *rx_ring;
unsigned long q;
for_each_set_bit(q, vsi->af_xdp_zc_qps, vsi->alloc_queue_pairs) {
rx_ring = vsi->rx_rings[q];
if (i40e_realloc_rx_xdp_bi(rx_ring, zc))
return -ENOMEM;
}
return 0;
}
/** /**
* i40e_xsk_pool_enable - Enable/associate an AF_XDP buffer pool to a * i40e_xsk_pool_enable - Enable/associate an AF_XDP buffer pool to a
* certain ring/qid * certain ring/qid
...@@ -69,6 +113,10 @@ static int i40e_xsk_pool_enable(struct i40e_vsi *vsi, ...@@ -69,6 +113,10 @@ static int i40e_xsk_pool_enable(struct i40e_vsi *vsi,
if (err) if (err)
return err; return err;
err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], true);
if (err)
return err;
err = i40e_queue_pair_enable(vsi, qid); err = i40e_queue_pair_enable(vsi, qid);
if (err) if (err)
return err; return err;
...@@ -113,6 +161,9 @@ static int i40e_xsk_pool_disable(struct i40e_vsi *vsi, u16 qid) ...@@ -113,6 +161,9 @@ static int i40e_xsk_pool_disable(struct i40e_vsi *vsi, u16 qid)
xsk_pool_dma_unmap(pool, I40E_RX_DMA_ATTR); xsk_pool_dma_unmap(pool, I40E_RX_DMA_ATTR);
if (if_running) { if (if_running) {
err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], false);
if (err)
return err;
err = i40e_queue_pair_enable(vsi, qid); err = i40e_queue_pair_enable(vsi, qid);
if (err) if (err)
return err; return err;
......
...@@ -32,7 +32,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget); ...@@ -32,7 +32,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring); bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring);
int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags); int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring); int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc);
void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring); void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring);
#endif /* _I40E_XSK_H_ */ #endif /* _I40E_XSK_H_ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment