Commit 1f8bb151 authored by Michal Kazior's avatar Michal Kazior Committed by Kalle Valo

ath10k: use msdu headroom to store txfrag

Instead of allocating sk_buff for a mere 16-byte
tx fragment list buffer use headroom of the
original msdu sk_buff.

This decreases CPU cache pressure and improves
performance.

Measured improvement on AP135 is 560mbps ->
590mbps of UDP TX briding traffic.
Signed-off-by: default avatarMichal Kazior <michal.kazior@tieto.com>
Signed-off-by: default avatarKalle Valo <kvalo@qca.qualcomm.com>
parent 2f3773bc
...@@ -54,7 +54,9 @@ struct ath10k_skb_cb { ...@@ -54,7 +54,9 @@ struct ath10k_skb_cb {
u8 vdev_id; u8 vdev_id;
u8 tid; u8 tid;
bool is_offchan; bool is_offchan;
struct sk_buff *txfrag;
u8 frag_len;
u8 pad_len;
} __packed htt; } __packed htt;
} __packed; } __packed;
......
...@@ -307,7 +307,8 @@ int ath10k_htt_mgmt_tx(struct ath10k_htt *htt, struct sk_buff *msdu) ...@@ -307,7 +307,8 @@ int ath10k_htt_mgmt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
struct device *dev = htt->ar->dev; struct device *dev = htt->ar->dev;
struct sk_buff *txdesc = NULL; struct sk_buff *txdesc = NULL;
struct htt_cmd *cmd; struct htt_cmd *cmd;
u8 vdev_id = ATH10K_SKB_CB(msdu)->htt.vdev_id; struct ath10k_skb_cb *skb_cb = ATH10K_SKB_CB(msdu);
u8 vdev_id = skb_cb->htt.vdev_id;
int len = 0; int len = 0;
int msdu_id = -1; int msdu_id = -1;
int res; int res;
...@@ -350,6 +351,9 @@ int ath10k_htt_mgmt_tx(struct ath10k_htt *htt, struct sk_buff *msdu) ...@@ -350,6 +351,9 @@ int ath10k_htt_mgmt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
memcpy(cmd->mgmt_tx.hdr, msdu->data, memcpy(cmd->mgmt_tx.hdr, msdu->data,
min_t(int, msdu->len, HTT_MGMT_FRM_HDR_DOWNLOAD_LEN)); min_t(int, msdu->len, HTT_MGMT_FRM_HDR_DOWNLOAD_LEN));
skb_cb->htt.frag_len = 0;
skb_cb->htt.pad_len = 0;
res = ath10k_htc_send(&htt->ar->htc, htt->eid, txdesc); res = ath10k_htc_send(&htt->ar->htc, htt->eid, txdesc);
if (res) if (res)
goto err_unmap_msdu; goto err_unmap_msdu;
...@@ -377,13 +381,12 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu) ...@@ -377,13 +381,12 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
struct htt_cmd *cmd; struct htt_cmd *cmd;
struct htt_data_tx_desc_frag *tx_frags; struct htt_data_tx_desc_frag *tx_frags;
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)msdu->data; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)msdu->data;
struct ath10k_skb_cb *skb_cb = ATH10K_SKB_CB(msdu);
struct sk_buff *txdesc = NULL; struct sk_buff *txdesc = NULL;
struct sk_buff *txfrag = NULL;
bool use_frags; bool use_frags;
u8 vdev_id = ATH10K_SKB_CB(msdu)->htt.vdev_id; u8 vdev_id = ATH10K_SKB_CB(msdu)->htt.vdev_id;
u8 tid; u8 tid;
int prefetch_len, desc_len, frag_len; int prefetch_len, desc_len;
dma_addr_t frags_paddr;
int msdu_id = -1; int msdu_id = -1;
int res; int res;
u8 flags0; u8 flags0;
...@@ -407,7 +410,6 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu) ...@@ -407,7 +410,6 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
prefetch_len = roundup(prefetch_len, 4); prefetch_len = roundup(prefetch_len, 4);
desc_len = sizeof(cmd->hdr) + sizeof(cmd->data_tx) + prefetch_len; desc_len = sizeof(cmd->hdr) + sizeof(cmd->data_tx) + prefetch_len;
frag_len = sizeof(*tx_frags) * 2;
txdesc = ath10k_htc_alloc_skb(desc_len); txdesc = ath10k_htc_alloc_skb(desc_len);
if (!txdesc) { if (!txdesc) {
...@@ -421,41 +423,44 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu) ...@@ -421,41 +423,44 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
use_frags = htt->target_version_major < 3 || use_frags = htt->target_version_major < 3 ||
!ieee80211_is_mgmt(hdr->frame_control); !ieee80211_is_mgmt(hdr->frame_control);
if (use_frags) {
txfrag = dev_alloc_skb(frag_len);
if (!txfrag) {
res = -ENOMEM;
goto err_free_txdesc;
}
}
if (!IS_ALIGNED((unsigned long)txdesc->data, 4)) { if (!IS_ALIGNED((unsigned long)txdesc->data, 4)) {
ath10k_warn("htt alignment check failed. dropping packet.\n"); ath10k_warn("htt alignment check failed. dropping packet.\n");
res = -EIO; res = -EIO;
goto err_free_txfrag; goto err_free_txdesc;
}
if (use_frags) {
skb_cb->htt.frag_len = sizeof(*tx_frags) * 2;
skb_cb->htt.pad_len = (unsigned long)msdu->data -
round_down((unsigned long)msdu->data, 4);
skb_push(msdu, skb_cb->htt.frag_len + skb_cb->htt.pad_len);
} else {
skb_cb->htt.frag_len = 0;
skb_cb->htt.pad_len = 0;
} }
res = ath10k_skb_map(dev, msdu); res = ath10k_skb_map(dev, msdu);
if (res) if (res)
goto err_free_txfrag; goto err_pull_txfrag;
if (use_frags) { if (use_frags) {
dma_sync_single_for_cpu(dev, skb_cb->paddr, msdu->len,
DMA_TO_DEVICE);
/* tx fragment list must be terminated with zero-entry */ /* tx fragment list must be terminated with zero-entry */
skb_put(txfrag, frag_len); tx_frags = (struct htt_data_tx_desc_frag *)msdu->data;
tx_frags = (struct htt_data_tx_desc_frag *)txfrag->data; tx_frags[0].paddr = __cpu_to_le32(skb_cb->paddr +
tx_frags[0].paddr = __cpu_to_le32(ATH10K_SKB_CB(msdu)->paddr); skb_cb->htt.frag_len +
tx_frags[0].len = __cpu_to_le32(msdu->len); skb_cb->htt.pad_len);
tx_frags[0].len = __cpu_to_le32(msdu->len -
skb_cb->htt.frag_len -
skb_cb->htt.pad_len);
tx_frags[1].paddr = __cpu_to_le32(0); tx_frags[1].paddr = __cpu_to_le32(0);
tx_frags[1].len = __cpu_to_le32(0); tx_frags[1].len = __cpu_to_le32(0);
res = ath10k_skb_map(dev, txfrag); dma_sync_single_for_device(dev, skb_cb->paddr, msdu->len,
if (res) DMA_TO_DEVICE);
goto err_unmap_msdu;
ath10k_dbg(ATH10K_DBG_HTT, "txfrag 0x%llx\n",
(unsigned long long) ATH10K_SKB_CB(txfrag)->paddr);
ath10k_dbg_dump(ATH10K_DBG_HTT_DUMP, NULL, "txfrag: ",
txfrag->data, frag_len);
} }
ath10k_dbg(ATH10K_DBG_HTT, "msdu 0x%llx\n", ath10k_dbg(ATH10K_DBG_HTT, "msdu 0x%llx\n",
...@@ -488,35 +493,28 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu) ...@@ -488,35 +493,28 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L3_OFFLOAD; flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L3_OFFLOAD;
flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L4_OFFLOAD; flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L4_OFFLOAD;
if (use_frags)
frags_paddr = ATH10K_SKB_CB(txfrag)->paddr;
else
frags_paddr = ATH10K_SKB_CB(msdu)->paddr;
cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_TX_FRM; cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_TX_FRM;
cmd->data_tx.flags0 = flags0; cmd->data_tx.flags0 = flags0;
cmd->data_tx.flags1 = __cpu_to_le16(flags1); cmd->data_tx.flags1 = __cpu_to_le16(flags1);
cmd->data_tx.len = __cpu_to_le16(msdu->len); cmd->data_tx.len = __cpu_to_le16(msdu->len -
skb_cb->htt.frag_len -
skb_cb->htt.pad_len);
cmd->data_tx.id = __cpu_to_le16(msdu_id); cmd->data_tx.id = __cpu_to_le16(msdu_id);
cmd->data_tx.frags_paddr = __cpu_to_le32(frags_paddr); cmd->data_tx.frags_paddr = __cpu_to_le32(skb_cb->paddr);
cmd->data_tx.peerid = __cpu_to_le32(HTT_INVALID_PEERID); cmd->data_tx.peerid = __cpu_to_le32(HTT_INVALID_PEERID);
memcpy(cmd->data_tx.prefetch, msdu->data, prefetch_len); memcpy(cmd->data_tx.prefetch, hdr, prefetch_len);
res = ath10k_htc_send(&htt->ar->htc, htt->eid, txdesc); res = ath10k_htc_send(&htt->ar->htc, htt->eid, txdesc);
if (res) if (res)
goto err_restore; goto err_unmap_msdu;
return 0; return 0;
err_restore:
if (use_frags)
ath10k_skb_unmap(dev, txfrag);
err_unmap_msdu: err_unmap_msdu:
ath10k_skb_unmap(dev, msdu); ath10k_skb_unmap(dev, msdu);
err_free_txfrag: err_pull_txfrag:
if (use_frags) skb_pull(msdu, skb_cb->htt.frag_len + skb_cb->htt.pad_len);
dev_kfree_skb_any(txfrag);
err_free_txdesc: err_free_txdesc:
dev_kfree_skb_any(txdesc); dev_kfree_skb_any(txdesc);
err_free_msdu_id: err_free_msdu_id:
......
...@@ -3342,6 +3342,10 @@ int ath10k_mac_register(struct ath10k *ar) ...@@ -3342,6 +3342,10 @@ int ath10k_mac_register(struct ath10k *ar)
IEEE80211_HW_WANT_MONITOR_VIF | IEEE80211_HW_WANT_MONITOR_VIF |
IEEE80211_HW_AP_LINK_PS; IEEE80211_HW_AP_LINK_PS;
/* MSDU can have HTT TX fragment pushed in front. The additional 4
* bytes is used for padding/alignment if necessary. */
ar->hw->extra_tx_headroom += sizeof(struct htt_data_tx_desc_frag)*2 + 4;
if (ar->ht_cap_info & WMI_HT_CAP_DYNAMIC_SMPS) if (ar->ht_cap_info & WMI_HT_CAP_DYNAMIC_SMPS)
ar->hw->flags |= IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS; ar->hw->flags |= IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS;
......
...@@ -49,7 +49,8 @@ void ath10k_txrx_tx_unref(struct ath10k_htt *htt, ...@@ -49,7 +49,8 @@ void ath10k_txrx_tx_unref(struct ath10k_htt *htt,
{ {
struct device *dev = htt->ar->dev; struct device *dev = htt->ar->dev;
struct ieee80211_tx_info *info; struct ieee80211_tx_info *info;
struct sk_buff *msdu, *txfrag; struct ath10k_skb_cb *skb_cb;
struct sk_buff *msdu;
int ret; int ret;
ath10k_dbg(ATH10K_DBG_HTT, "htt tx completion msdu_id %u discard %d no_ack %d\n", ath10k_dbg(ATH10K_DBG_HTT, "htt tx completion msdu_id %u discard %d no_ack %d\n",
...@@ -62,20 +63,15 @@ void ath10k_txrx_tx_unref(struct ath10k_htt *htt, ...@@ -62,20 +63,15 @@ void ath10k_txrx_tx_unref(struct ath10k_htt *htt,
} }
msdu = htt->pending_tx[tx_done->msdu_id]; msdu = htt->pending_tx[tx_done->msdu_id];
txfrag = ATH10K_SKB_CB(msdu)->htt.txfrag; skb_cb = ATH10K_SKB_CB(msdu);
if (txfrag) {
ret = ath10k_skb_unmap(dev, txfrag);
if (ret)
ath10k_warn("txfrag unmap failed (%d)\n", ret);
dev_kfree_skb_any(txfrag);
}
ret = ath10k_skb_unmap(dev, msdu); ret = ath10k_skb_unmap(dev, msdu);
if (ret) if (ret)
ath10k_warn("data skb unmap failed (%d)\n", ret); ath10k_warn("data skb unmap failed (%d)\n", ret);
if (skb_cb->htt.frag_len)
skb_pull(msdu, skb_cb->htt.frag_len + skb_cb->htt.pad_len);
ath10k_report_offchan_tx(htt->ar, msdu); ath10k_report_offchan_tx(htt->ar, msdu);
info = IEEE80211_SKB_CB(msdu); info = IEEE80211_SKB_CB(msdu);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment