Commit 67654b26 authored by Wen Gong's avatar Wen Gong Committed by Kalle Valo

ath10k: add workqueue for RX path of sdio

For RX, it has two parts, one is to read data from sdio, another
is to indicate the packets to upper stack. Recently it has only
one thread to do all RX things, it results that it is sequential
for RX and low throughout, change RX to parallel for the two parts
will increase throughout.

This patch move the indication to a workqueue, it results in
significant performance improvement on RX path.

Udp rx throughout is 200Mbps without this patch, and it arrives
400Mbps with this patch.

Tested with QCA6174 SDIO with firmware
WLAN.RMH.4.4.1-00017-QCARMSWPZ-1
Signed-off-by: default avatarWen Gong <wgong@codeaurora.org>
Signed-off-by: default avatarKalle Valo <kvalo@codeaurora.org>
parent 22477652
...@@ -124,6 +124,7 @@ struct ath10k_skb_cb { ...@@ -124,6 +124,7 @@ struct ath10k_skb_cb {
struct ath10k_skb_rxcb { struct ath10k_skb_rxcb {
dma_addr_t paddr; dma_addr_t paddr;
struct hlist_node hlist; struct hlist_node hlist;
u8 eid;
}; };
static inline struct ath10k_skb_cb *ATH10K_SKB_CB(struct sk_buff *skb) static inline struct ath10k_skb_cb *ATH10K_SKB_CB(struct sk_buff *skb)
......
...@@ -2235,7 +2235,10 @@ static bool ath10k_htt_rx_proc_rx_ind_hl(struct ath10k_htt *htt, ...@@ -2235,7 +2235,10 @@ static bool ath10k_htt_rx_proc_rx_ind_hl(struct ath10k_htt *htt,
hdr = (struct ieee80211_hdr *)skb->data; hdr = (struct ieee80211_hdr *)skb->data;
qos = ieee80211_is_data_qos(hdr->frame_control); qos = ieee80211_is_data_qos(hdr->frame_control);
rx_status = IEEE80211_SKB_RXCB(skb); rx_status = IEEE80211_SKB_RXCB(skb);
memset(rx_status, 0, sizeof(*rx_status));
rx_status->chains |= BIT(0); rx_status->chains |= BIT(0);
if (rx->ppdu.combined_rssi == 0) { if (rx->ppdu.combined_rssi == 0) {
/* SDIO firmware does not provide signal */ /* SDIO firmware does not provide signal */
......
...@@ -419,6 +419,7 @@ static int ath10k_sdio_mbox_rx_process_packets(struct ath10k *ar, ...@@ -419,6 +419,7 @@ static int ath10k_sdio_mbox_rx_process_packets(struct ath10k *ar,
struct ath10k_htc *htc = &ar->htc; struct ath10k_htc *htc = &ar->htc;
struct ath10k_sdio_rx_data *pkt; struct ath10k_sdio_rx_data *pkt;
struct ath10k_htc_ep *ep; struct ath10k_htc_ep *ep;
struct ath10k_skb_rxcb *cb;
enum ath10k_htc_ep_id id; enum ath10k_htc_ep_id id;
int ret, i, *n_lookahead_local; int ret, i, *n_lookahead_local;
u32 *lookaheads_local; u32 *lookaheads_local;
...@@ -464,10 +465,16 @@ static int ath10k_sdio_mbox_rx_process_packets(struct ath10k *ar, ...@@ -464,10 +465,16 @@ static int ath10k_sdio_mbox_rx_process_packets(struct ath10k *ar,
if (ret) if (ret)
goto out; goto out;
if (!pkt->trailer_only) if (!pkt->trailer_only) {
ep->ep_ops.ep_rx_complete(ar_sdio->ar, pkt->skb); cb = ATH10K_SKB_RXCB(pkt->skb);
else cb->eid = id;
skb_queue_tail(&ar_sdio->rx_head, pkt->skb);
queue_work(ar->workqueue_aux,
&ar_sdio->async_work_rx);
} else {
kfree_skb(pkt->skb); kfree_skb(pkt->skb);
}
/* The RX complete handler now owns the skb...*/ /* The RX complete handler now owns the skb...*/
pkt->skb = NULL; pkt->skb = NULL;
...@@ -1317,6 +1324,28 @@ static void __ath10k_sdio_write_async(struct ath10k *ar, ...@@ -1317,6 +1324,28 @@ static void __ath10k_sdio_write_async(struct ath10k *ar,
ath10k_sdio_free_bus_req(ar, req); ath10k_sdio_free_bus_req(ar, req);
} }
/* To improve throughput use workqueue to deliver packets to HTC layer,
* this way SDIO bus is utilised much better.
*/
static void ath10k_rx_indication_async_work(struct work_struct *work)
{
struct ath10k_sdio *ar_sdio = container_of(work, struct ath10k_sdio,
async_work_rx);
struct ath10k *ar = ar_sdio->ar;
struct ath10k_htc_ep *ep;
struct ath10k_skb_rxcb *cb;
struct sk_buff *skb;
while (true) {
skb = skb_dequeue(&ar_sdio->rx_head);
if (!skb)
break;
cb = ATH10K_SKB_RXCB(skb);
ep = &ar->htc.endpoint[cb->eid];
ep->ep_ops.ep_rx_complete(ar, skb);
}
}
static void ath10k_sdio_write_async_work(struct work_struct *work) static void ath10k_sdio_write_async_work(struct work_struct *work)
{ {
struct ath10k_sdio *ar_sdio = container_of(work, struct ath10k_sdio, struct ath10k_sdio *ar_sdio = container_of(work, struct ath10k_sdio,
...@@ -2087,6 +2116,9 @@ static int ath10k_sdio_probe(struct sdio_func *func, ...@@ -2087,6 +2116,9 @@ static int ath10k_sdio_probe(struct sdio_func *func,
for (i = 0; i < ATH10K_SDIO_BUS_REQUEST_MAX_NUM; i++) for (i = 0; i < ATH10K_SDIO_BUS_REQUEST_MAX_NUM; i++)
ath10k_sdio_free_bus_req(ar, &ar_sdio->bus_req[i]); ath10k_sdio_free_bus_req(ar, &ar_sdio->bus_req[i]);
skb_queue_head_init(&ar_sdio->rx_head);
INIT_WORK(&ar_sdio->async_work_rx, ath10k_rx_indication_async_work);
dev_id_base = FIELD_GET(QCA_MANUFACTURER_ID_BASE, id->device); dev_id_base = FIELD_GET(QCA_MANUFACTURER_ID_BASE, id->device);
switch (dev_id_base) { switch (dev_id_base) {
case QCA_MANUFACTURER_ID_AR6005_BASE: case QCA_MANUFACTURER_ID_AR6005_BASE:
......
...@@ -187,6 +187,9 @@ struct ath10k_sdio { ...@@ -187,6 +187,9 @@ struct ath10k_sdio {
struct ath10k_sdio_bus_request bus_req[ATH10K_SDIO_BUS_REQUEST_MAX_NUM]; struct ath10k_sdio_bus_request bus_req[ATH10K_SDIO_BUS_REQUEST_MAX_NUM];
/* free list of bus requests */ /* free list of bus requests */
struct list_head bus_req_freeq; struct list_head bus_req_freeq;
struct sk_buff_head rx_head;
/* protects access to bus_req_freeq */ /* protects access to bus_req_freeq */
spinlock_t lock; spinlock_t lock;
...@@ -213,6 +216,8 @@ struct ath10k_sdio { ...@@ -213,6 +216,8 @@ struct ath10k_sdio {
struct list_head wr_asyncq; struct list_head wr_asyncq;
/* protects access to wr_asyncq */ /* protects access to wr_asyncq */
spinlock_t wr_async_lock; spinlock_t wr_async_lock;
struct work_struct async_work_rx;
}; };
static inline struct ath10k_sdio *ath10k_sdio_priv(struct ath10k *ar) static inline struct ath10k_sdio *ath10k_sdio_priv(struct ath10k *ar)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment