Commit 3c97f5de authored by Rajkumar Manoharan's avatar Rajkumar Manoharan Committed by Kalle Valo

ath10k: implement NAPI support

Add NAPI support for rx and tx completion. NAPI poll is scheduled
from interrupt handler. The design is as below

 - on interrupt
     - schedule napi and mask interrupts
 - on poll
   - process all pipes (no actual Tx/Rx)
   - process Rx within budget
   - if quota exceeds budget reschedule napi poll by returning budget
   - process Tx completions and update budget if necessary
   - process Tx fetch indications (pull-push)
   - push any other pending Tx (if possible)
   - before resched or napi completion replenish htt rx ring buffer
   - if work done < budget, complete napi poll and unmask interrupts

This change also get rid of two tasklets (intr_tq and txrx_compl_task).

Measured peak throughput with NAPI on IPQ4019 platform in controlled
environment. No noticeable reduction in throughput is seen and also
observed improvements in CPU usage. Approx. 15% CPU usage got reduced
in UDP uplink case.

DL: AP DUT Tx
UL: AP DUT Rx

IPQ4019 (avg. cpu usage %)

========
                TOT              +NAPI
              ===========      =============
TCP DL       644 Mbps (42%)    645 Mbps (36%)
TCP UL       673 Mbps (30%)    675 Mbps (26%)
UDP DL       682 Mbps (49%)    680 Mbps (49%)
UDP UL       720 Mbps (28%)    717 Mbps (11%)
Signed-off-by: default avatarRajkumar Manoharan <rmanohar@qti.qualcomm.com>
Signed-off-by: default avatarKalle Valo <kvalo@qca.qualcomm.com>
parent c39265f7
......@@ -462,13 +462,13 @@ static void ath10k_ahb_halt_chip(struct ath10k *ar)
static irqreturn_t ath10k_ahb_interrupt_handler(int irq, void *arg)
{
struct ath10k *ar = arg;
struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
if (!ath10k_pci_irq_pending(ar))
return IRQ_NONE;
ath10k_pci_disable_and_clear_legacy_irq(ar);
tasklet_schedule(&ar_pci->intr_tq);
ath10k_pci_irq_msi_fw_mask(ar);
napi_schedule(&ar->napi);
return IRQ_HANDLED;
}
......@@ -717,6 +717,9 @@ static void ath10k_ahb_hif_stop(struct ath10k *ar)
synchronize_irq(ar_ahb->irq);
ath10k_pci_flush(ar);
napi_synchronize(&ar->napi);
napi_disable(&ar->napi);
}
static int ath10k_ahb_hif_power_up(struct ath10k *ar)
......@@ -748,6 +751,7 @@ static int ath10k_ahb_hif_power_up(struct ath10k *ar)
ath10k_err(ar, "could not wake up target CPU: %d\n", ret);
goto err_ce_deinit;
}
napi_enable(&ar->napi);
return 0;
......@@ -831,7 +835,7 @@ static int ath10k_ahb_probe(struct platform_device *pdev)
goto err_resource_deinit;
}
ath10k_pci_init_irq_tasklets(ar);
ath10k_pci_init_napi(ar);
ret = ath10k_ahb_request_irq_legacy(ar);
if (ret)
......
......@@ -2322,6 +2322,8 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev,
INIT_WORK(&ar->register_work, ath10k_core_register_work);
INIT_WORK(&ar->restart_work, ath10k_core_restart);
init_dummy_netdev(&ar->napi_dev);
ret = ath10k_debug_create(ar);
if (ret)
goto err_free_aux_wq;
......
......@@ -65,6 +65,10 @@
#define ATH10K_KEEPALIVE_MAX_IDLE 3895
#define ATH10K_KEEPALIVE_MAX_UNRESPONSIVE 3900
/* NAPI poll budget */
#define ATH10K_NAPI_BUDGET 64
#define ATH10K_NAPI_QUOTA_LIMIT 60
struct ath10k;
enum ath10k_bus {
......@@ -954,6 +958,10 @@ struct ath10k {
struct ath10k_thermal thermal;
struct ath10k_wow wow;
/* NAPI */
struct net_device napi_dev;
struct napi_struct napi;
/* must be last */
u8 drv_priv[0] __aligned(sizeof(void *));
};
......
......@@ -1665,7 +1665,6 @@ struct ath10k_htt {
/* This is used to group tx/rx completions separately and process them
* in batches to reduce cache stalls */
struct tasklet_struct txrx_compl_task;
struct sk_buff_head rx_compl_q;
struct sk_buff_head rx_in_ord_compl_q;
struct sk_buff_head tx_fetch_ind_q;
......@@ -1798,5 +1797,6 @@ int ath10k_htt_tx(struct ath10k_htt *htt,
struct sk_buff *msdu);
void ath10k_htt_rx_pktlog_completion_handler(struct ath10k *ar,
struct sk_buff *skb);
int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget);
#endif
This diff is collapsed.
......@@ -390,8 +390,6 @@ void ath10k_htt_tx_free(struct ath10k_htt *htt)
{
int size;
tasklet_kill(&htt->txrx_compl_task);
idr_for_each(&htt->pending_tx, ath10k_htt_tx_clean_up_pending, htt->ar);
idr_destroy(&htt->pending_tx);
......
......@@ -1506,12 +1506,10 @@ void ath10k_pci_hif_send_complete_check(struct ath10k *ar, u8 pipe,
ath10k_ce_per_engine_service(ar, pipe);
}
void ath10k_pci_kill_tasklet(struct ath10k *ar)
static void ath10k_pci_rx_retry_sync(struct ath10k *ar)
{
struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
tasklet_kill(&ar_pci->intr_tq);
del_timer_sync(&ar_pci->rx_post_retry);
}
......@@ -1570,7 +1568,7 @@ void ath10k_pci_hif_get_default_pipe(struct ath10k *ar,
ul_pipe, dl_pipe);
}
static void ath10k_pci_irq_msi_fw_mask(struct ath10k *ar)
void ath10k_pci_irq_msi_fw_mask(struct ath10k *ar)
{
u32 val;
......@@ -1753,7 +1751,7 @@ void ath10k_pci_ce_deinit(struct ath10k *ar)
void ath10k_pci_flush(struct ath10k *ar)
{
ath10k_pci_kill_tasklet(ar);
ath10k_pci_rx_retry_sync(ar);
ath10k_pci_buffer_cleanup(ar);
}
......@@ -1780,6 +1778,8 @@ static void ath10k_pci_hif_stop(struct ath10k *ar)
ath10k_pci_irq_disable(ar);
ath10k_pci_irq_sync(ar);
ath10k_pci_flush(ar);
napi_synchronize(&ar->napi);
napi_disable(&ar->napi);
spin_lock_irqsave(&ar_pci->ps_lock, flags);
WARN_ON(ar_pci->ps_wake_refcount > 0);
......@@ -2533,6 +2533,7 @@ static int ath10k_pci_hif_power_up(struct ath10k *ar)
ath10k_err(ar, "could not wake up target CPU: %d\n", ret);
goto err_ce;
}
napi_enable(&ar->napi);
return 0;
......@@ -2772,35 +2773,53 @@ static irqreturn_t ath10k_pci_interrupt_handler(int irq, void *arg)
return IRQ_NONE;
}
if (ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) {
if (!ath10k_pci_irq_pending(ar))
if ((ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) &&
!ath10k_pci_irq_pending(ar))
return IRQ_NONE;
ath10k_pci_disable_and_clear_legacy_irq(ar);
}
tasklet_schedule(&ar_pci->intr_tq);
ath10k_pci_irq_msi_fw_mask(ar);
napi_schedule(&ar->napi);
return IRQ_HANDLED;
}
static void ath10k_pci_tasklet(unsigned long data)
static int ath10k_pci_napi_poll(struct napi_struct *ctx, int budget)
{
struct ath10k *ar = (struct ath10k *)data;
struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
struct ath10k *ar = container_of(ctx, struct ath10k, napi);
int done = 0;
if (ath10k_pci_has_fw_crashed(ar)) {
ath10k_pci_irq_disable(ar);
ath10k_pci_fw_crashed_clear(ar);
ath10k_pci_fw_crashed_dump(ar);
return;
napi_complete(ctx);
return done;
}
ath10k_ce_per_engine_service_any(ar);
/* Re-enable legacy irq that was disabled in the irq handler */
if (ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY)
done = ath10k_htt_txrx_compl_task(ar, budget);
if (done < budget) {
napi_complete(ctx);
/* In case of MSI, it is possible that interrupts are received
* while NAPI poll is inprogress. So pending interrupts that are
* received after processing all copy engine pipes by NAPI poll
* will not be handled again. This is causing failure to
* complete boot sequence in x86 platform. So before enabling
* interrupts safer to check for pending interrupts for
* immediate servicing.
*/
if (CE_INTERRUPT_SUMMARY(ar)) {
napi_reschedule(ctx);
goto out;
}
ath10k_pci_enable_legacy_irq(ar);
ath10k_pci_irq_msi_fw_unmask(ar);
}
out:
return done;
}
static int ath10k_pci_request_irq_msi(struct ath10k *ar)
......@@ -2858,11 +2877,10 @@ static void ath10k_pci_free_irq(struct ath10k *ar)
free_irq(ar_pci->pdev->irq, ar);
}
void ath10k_pci_init_irq_tasklets(struct ath10k *ar)
void ath10k_pci_init_napi(struct ath10k *ar)
{
struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
tasklet_init(&ar_pci->intr_tq, ath10k_pci_tasklet, (unsigned long)ar);
netif_napi_add(&ar->napi_dev, &ar->napi, ath10k_pci_napi_poll,
ATH10K_NAPI_BUDGET);
}
static int ath10k_pci_init_irq(struct ath10k *ar)
......@@ -2870,7 +2888,7 @@ static int ath10k_pci_init_irq(struct ath10k *ar)
struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
int ret;
ath10k_pci_init_irq_tasklets(ar);
ath10k_pci_init_napi(ar);
if (ath10k_pci_irq_mode != ATH10K_PCI_IRQ_AUTO)
ath10k_info(ar, "limiting irq mode to: %d\n",
......@@ -3131,7 +3149,8 @@ int ath10k_pci_setup_resource(struct ath10k *ar)
void ath10k_pci_release_resource(struct ath10k *ar)
{
ath10k_pci_kill_tasklet(ar);
ath10k_pci_rx_retry_sync(ar);
netif_napi_del(&ar->napi);
ath10k_pci_ce_deinit(ar);
ath10k_pci_free_pipes(ar);
}
......@@ -3298,7 +3317,7 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
err_free_irq:
ath10k_pci_free_irq(ar);
ath10k_pci_kill_tasklet(ar);
ath10k_pci_rx_retry_sync(ar);
err_deinit_irq:
ath10k_pci_deinit_irq(ar);
......
......@@ -177,8 +177,6 @@ struct ath10k_pci {
/* Operating interrupt mode */
enum ath10k_pci_irq_mode oper_irq_mode;
struct tasklet_struct intr_tq;
struct ath10k_pci_pipe pipe_info[CE_COUNT_MAX];
/* Copy Engine used for Diagnostic Accesses */
......@@ -294,8 +292,7 @@ void ath10k_pci_free_pipes(struct ath10k *ar);
void ath10k_pci_free_pipes(struct ath10k *ar);
void ath10k_pci_rx_replenish_retry(unsigned long ptr);
void ath10k_pci_ce_deinit(struct ath10k *ar);
void ath10k_pci_init_irq_tasklets(struct ath10k *ar);
void ath10k_pci_kill_tasklet(struct ath10k *ar);
void ath10k_pci_init_napi(struct ath10k *ar);
int ath10k_pci_init_pipes(struct ath10k *ar);
int ath10k_pci_init_config(struct ath10k *ar);
void ath10k_pci_rx_post(struct ath10k *ar);
......@@ -303,6 +300,7 @@ void ath10k_pci_flush(struct ath10k *ar);
void ath10k_pci_enable_legacy_irq(struct ath10k *ar);
bool ath10k_pci_irq_pending(struct ath10k *ar);
void ath10k_pci_disable_and_clear_legacy_irq(struct ath10k *ar);
void ath10k_pci_irq_msi_fw_mask(struct ath10k *ar);
int ath10k_pci_wait_for_target_init(struct ath10k *ar);
int ath10k_pci_setup_resource(struct ath10k *ar);
void ath10k_pci_release_resource(struct ath10k *ar);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment