Commit c9c5962b authored by Johannes Berg's avatar Johannes Berg

mac80211: enable collecting station statistics per-CPU

If the driver advertises the new HW flag USE_RSS, make the
station statistics on the fast-rx path per-CPU. This will
enable calling the RX in parallel, only hitting locking or
shared cachelines when the fast-RX path isn't available.
Signed-off-by: default avatarJohannes Berg <johannes.berg@intel.com>
parent 49ddf8e6
...@@ -1980,6 +1980,9 @@ struct ieee80211_txq { ...@@ -1980,6 +1980,9 @@ struct ieee80211_txq {
* order and does not need to manage its own reorder buffer or BA session * order and does not need to manage its own reorder buffer or BA session
* timeout. * timeout.
* *
* @IEEE80211_HW_USES_RSS: The device uses RSS and thus requires parallel RX,
* which implies using per-CPU station statistics.
*
* @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
*/ */
enum ieee80211_hw_flags { enum ieee80211_hw_flags {
...@@ -2017,6 +2020,7 @@ enum ieee80211_hw_flags { ...@@ -2017,6 +2020,7 @@ enum ieee80211_hw_flags {
IEEE80211_HW_BEACON_TX_STATUS, IEEE80211_HW_BEACON_TX_STATUS,
IEEE80211_HW_NEEDS_UNIQUE_STA_ADDR, IEEE80211_HW_NEEDS_UNIQUE_STA_ADDR,
IEEE80211_HW_SUPPORTS_REORDERING_BUFFER, IEEE80211_HW_SUPPORTS_REORDERING_BUFFER,
IEEE80211_HW_USES_RSS,
/* keep last, obviously */ /* keep last, obviously */
NUM_IEEE80211_HW_FLAGS NUM_IEEE80211_HW_FLAGS
......
...@@ -127,6 +127,7 @@ static const char *hw_flag_names[] = { ...@@ -127,6 +127,7 @@ static const char *hw_flag_names[] = {
FLAG(BEACON_TX_STATUS), FLAG(BEACON_TX_STATUS),
FLAG(NEEDS_UNIQUE_STA_ADDR), FLAG(NEEDS_UNIQUE_STA_ADDR),
FLAG(SUPPORTS_REORDERING_BUFFER), FLAG(SUPPORTS_REORDERING_BUFFER),
FLAG(USES_RSS),
#undef FLAG #undef FLAG
}; };
......
...@@ -3528,6 +3528,8 @@ void ieee80211_check_fast_rx(struct sta_info *sta) ...@@ -3528,6 +3528,8 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
ether_addr_copy(fastrx.rfc1042_hdr, rfc1042_header); ether_addr_copy(fastrx.rfc1042_hdr, rfc1042_header);
ether_addr_copy(fastrx.vif_addr, sdata->vif.addr); ether_addr_copy(fastrx.vif_addr, sdata->vif.addr);
fastrx.uses_rss = ieee80211_hw_check(&local->hw, USES_RSS);
/* fast-rx doesn't do reordering */ /* fast-rx doesn't do reordering */
if (ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION) && if (ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION) &&
!ieee80211_hw_check(&local->hw, SUPPORTS_REORDERING_BUFFER)) !ieee80211_hw_check(&local->hw, SUPPORTS_REORDERING_BUFFER))
...@@ -3678,6 +3680,10 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, ...@@ -3678,6 +3680,10 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
u8 da[ETH_ALEN]; u8 da[ETH_ALEN];
u8 sa[ETH_ALEN]; u8 sa[ETH_ALEN];
} addrs __aligned(2); } addrs __aligned(2);
struct ieee80211_sta_rx_stats *stats = &sta->rx_stats;
if (fast_rx->uses_rss)
stats = this_cpu_ptr(sta->pcpu_rx_stats);
/* for parallel-rx, we need to have DUP_VALIDATED, otherwise we write /* for parallel-rx, we need to have DUP_VALIDATED, otherwise we write
* to a common data structure; drivers can implement that per queue * to a common data structure; drivers can implement that per queue
...@@ -3759,29 +3765,32 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, ...@@ -3759,29 +3765,32 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
} }
/* statistics part of ieee80211_rx_h_sta_process() */ /* statistics part of ieee80211_rx_h_sta_process() */
sta->rx_stats.last_rx = jiffies; stats->last_rx = jiffies;
sta->rx_stats.last_rate = sta_stats_encode_rate(status); stats->last_rate = sta_stats_encode_rate(status);
sta->rx_stats.fragments++; stats->fragments++;
if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) { if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
sta->rx_stats.last_signal = status->signal; stats->last_signal = status->signal;
ewma_signal_add(&sta->rx_stats_avg.signal, -status->signal); if (!fast_rx->uses_rss)
ewma_signal_add(&sta->rx_stats_avg.signal,
-status->signal);
} }
if (status->chains) { if (status->chains) {
int i; int i;
sta->rx_stats.chains = status->chains; stats->chains = status->chains;
for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) { for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) {
int signal = status->chain_signal[i]; int signal = status->chain_signal[i];
if (!(status->chains & BIT(i))) if (!(status->chains & BIT(i)))
continue; continue;
sta->rx_stats.chain_signal_last[i] = signal; stats->chain_signal_last[i] = signal;
ewma_signal_add(&sta->rx_stats_avg.chain_signal[i], if (!fast_rx->uses_rss)
-signal); ewma_signal_add(&sta->rx_stats_avg.chain_signal[i],
-signal);
} }
} }
/* end of statistics */ /* end of statistics */
...@@ -3806,10 +3815,10 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, ...@@ -3806,10 +3815,10 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
* for non-QoS-data frames. Here we know it's a data * for non-QoS-data frames. Here we know it's a data
* frame, so count MSDUs. * frame, so count MSDUs.
*/ */
u64_stats_update_begin(&sta->rx_stats.syncp); u64_stats_update_begin(&stats->syncp);
sta->rx_stats.msdu[rx->seqno_idx]++; stats->msdu[rx->seqno_idx]++;
sta->rx_stats.bytes += orig_len; stats->bytes += orig_len;
u64_stats_update_end(&sta->rx_stats.syncp); u64_stats_update_end(&stats->syncp);
if (fast_rx->internal_forward) { if (fast_rx->internal_forward) {
struct sta_info *dsta = sta_info_get(rx->sdata, skb->data); struct sta_info *dsta = sta_info_get(rx->sdata, skb->data);
...@@ -3840,7 +3849,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, ...@@ -3840,7 +3849,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
return true; return true;
drop: drop:
dev_kfree_skb(skb); dev_kfree_skb(skb);
sta->rx_stats.dropped++; stats->dropped++;
return true; return true;
} }
......
...@@ -254,6 +254,7 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) ...@@ -254,6 +254,7 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
#ifdef CONFIG_MAC80211_MESH #ifdef CONFIG_MAC80211_MESH
kfree(sta->mesh); kfree(sta->mesh);
#endif #endif
free_percpu(sta->pcpu_rx_stats);
kfree(sta); kfree(sta);
} }
...@@ -311,6 +312,13 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, ...@@ -311,6 +312,13 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
if (!sta) if (!sta)
return NULL; return NULL;
if (ieee80211_hw_check(hw, USES_RSS)) {
sta->pcpu_rx_stats =
alloc_percpu(struct ieee80211_sta_rx_stats);
if (!sta->pcpu_rx_stats)
goto free;
}
spin_lock_init(&sta->lock); spin_lock_init(&sta->lock);
spin_lock_init(&sta->ps_lock); spin_lock_init(&sta->ps_lock);
INIT_WORK(&sta->drv_deliver_wk, sta_deliver_ps_frames); INIT_WORK(&sta->drv_deliver_wk, sta_deliver_ps_frames);
...@@ -1932,6 +1940,28 @@ u8 sta_info_tx_streams(struct sta_info *sta) ...@@ -1932,6 +1940,28 @@ u8 sta_info_tx_streams(struct sta_info *sta)
>> IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT) + 1; >> IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT) + 1;
} }
static struct ieee80211_sta_rx_stats *
sta_get_last_rx_stats(struct sta_info *sta)
{
struct ieee80211_sta_rx_stats *stats = &sta->rx_stats;
struct ieee80211_local *local = sta->local;
int cpu;
if (!ieee80211_hw_check(&local->hw, USES_RSS))
return stats;
for_each_possible_cpu(cpu) {
struct ieee80211_sta_rx_stats *cpustats;
cpustats = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
if (time_after(cpustats->last_rx, stats->last_rx))
stats = cpustats;
}
return stats;
}
static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate, static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate,
struct rate_info *rinfo) struct rate_info *rinfo)
{ {
...@@ -1967,7 +1997,7 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate, ...@@ -1967,7 +1997,7 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate,
static void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo) static void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
{ {
u16 rate = ACCESS_ONCE(sta->rx_stats.last_rate); u16 rate = ACCESS_ONCE(sta_get_last_rx_stats(sta)->last_rate);
if (rate == STA_STATS_RATE_INVALID) if (rate == STA_STATS_RATE_INVALID)
rinfo->flags = 0; rinfo->flags = 0;
...@@ -2010,13 +2040,29 @@ static void sta_set_tidstats(struct sta_info *sta, ...@@ -2010,13 +2040,29 @@ static void sta_set_tidstats(struct sta_info *sta,
} }
} }
static inline u64 sta_get_stats_bytes(struct ieee80211_sta_rx_stats *rxstats)
{
unsigned int start;
u64 value;
do {
start = u64_stats_fetch_begin(&rxstats->syncp);
value = rxstats->bytes;
} while (u64_stats_fetch_retry(&rxstats->syncp, start));
return value;
}
void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
{ {
struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_sub_if_data *sdata = sta->sdata;
struct ieee80211_local *local = sdata->local; struct ieee80211_local *local = sdata->local;
struct rate_control_ref *ref = NULL; struct rate_control_ref *ref = NULL;
u32 thr = 0; u32 thr = 0;
int i, ac; int i, ac, cpu;
struct ieee80211_sta_rx_stats *last_rxstats;
last_rxstats = sta_get_last_rx_stats(sta);
if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
ref = local->rate_ctrl; ref = local->rate_ctrl;
...@@ -2064,17 +2110,30 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) ...@@ -2064,17 +2110,30 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
if (!(sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES64) | if (!(sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES64) |
BIT(NL80211_STA_INFO_RX_BYTES)))) { BIT(NL80211_STA_INFO_RX_BYTES)))) {
unsigned int start; sinfo->rx_bytes += sta_get_stats_bytes(&sta->rx_stats);
if (sta->pcpu_rx_stats) {
for_each_possible_cpu(cpu) {
struct ieee80211_sta_rx_stats *cpurxs;
cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
sinfo->rx_bytes += sta_get_stats_bytes(cpurxs);
}
}
do {
start = u64_stats_fetch_begin(&sta->rx_stats.syncp);
sinfo->rx_bytes = sta->rx_stats.bytes;
} while (u64_stats_fetch_retry(&sta->rx_stats.syncp, start));
sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES64); sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES64);
} }
if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_PACKETS))) { if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_PACKETS))) {
sinfo->rx_packets = sta->rx_stats.packets; sinfo->rx_packets = sta->rx_stats.packets;
if (sta->pcpu_rx_stats) {
for_each_possible_cpu(cpu) {
struct ieee80211_sta_rx_stats *cpurxs;
cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
sinfo->rx_packets += cpurxs->packets;
}
}
sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS); sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS);
} }
...@@ -2089,6 +2148,14 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) ...@@ -2089,6 +2148,14 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
} }
sinfo->rx_dropped_misc = sta->rx_stats.dropped; sinfo->rx_dropped_misc = sta->rx_stats.dropped;
if (sta->pcpu_rx_stats) {
for_each_possible_cpu(cpu) {
struct ieee80211_sta_rx_stats *cpurxs;
cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
sinfo->rx_packets += cpurxs->dropped;
}
}
if (sdata->vif.type == NL80211_IFTYPE_STATION && if (sdata->vif.type == NL80211_IFTYPE_STATION &&
!(sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)) { !(sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)) {
...@@ -2100,27 +2167,34 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) ...@@ -2100,27 +2167,34 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
if (ieee80211_hw_check(&sta->local->hw, SIGNAL_DBM) || if (ieee80211_hw_check(&sta->local->hw, SIGNAL_DBM) ||
ieee80211_hw_check(&sta->local->hw, SIGNAL_UNSPEC)) { ieee80211_hw_check(&sta->local->hw, SIGNAL_UNSPEC)) {
if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL))) { if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL))) {
sinfo->signal = (s8)sta->rx_stats.last_signal; sinfo->signal = (s8)last_rxstats->last_signal;
sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL); sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
} }
if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))) { if (!sta->pcpu_rx_stats &&
!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))) {
sinfo->signal_avg = sinfo->signal_avg =
-ewma_signal_read(&sta->rx_stats_avg.signal); -ewma_signal_read(&sta->rx_stats_avg.signal);
sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG); sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG);
} }
} }
if (sta->rx_stats.chains && /* for the average - if pcpu_rx_stats isn't set - rxstats must point to
* the sta->rx_stats struct, so the check here is fine with and without
* pcpu statistics
*/
if (last_rxstats->chains &&
!(sinfo->filled & (BIT(NL80211_STA_INFO_CHAIN_SIGNAL) | !(sinfo->filled & (BIT(NL80211_STA_INFO_CHAIN_SIGNAL) |
BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) { BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) {
sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL) | sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL);
BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG); if (!sta->pcpu_rx_stats)
sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG);
sinfo->chains = last_rxstats->chains;
sinfo->chains = sta->rx_stats.chains;
for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) { for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) {
sinfo->chain_signal[i] = sinfo->chain_signal[i] =
sta->rx_stats.chain_signal_last[i]; last_rxstats->chain_signal_last[i];
sinfo->chain_signal_avg[i] = sinfo->chain_signal_avg[i] =
-ewma_signal_read(&sta->rx_stats_avg.chain_signal[i]); -ewma_signal_read(&sta->rx_stats_avg.chain_signal[i]);
} }
...@@ -2213,7 +2287,9 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) ...@@ -2213,7 +2287,9 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
unsigned long ieee80211_sta_last_active(struct sta_info *sta) unsigned long ieee80211_sta_last_active(struct sta_info *sta)
{ {
if (time_after(sta->rx_stats.last_rx, sta->status_stats.last_ack)) struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta);
return sta->rx_stats.last_rx;
if (time_after(stats->last_rx, sta->status_stats.last_ack))
return stats->last_rx;
return sta->status_stats.last_ack; return sta->status_stats.last_ack;
} }
...@@ -297,6 +297,7 @@ struct ieee80211_fast_tx { ...@@ -297,6 +297,7 @@ struct ieee80211_fast_tx {
* @key: bool indicating encryption is expected (key is set) * @key: bool indicating encryption is expected (key is set)
* @sta_notify: notify the MLME code (once) * @sta_notify: notify the MLME code (once)
* @internal_forward: forward froms internally on AP/VLAN type interfaces * @internal_forward: forward froms internally on AP/VLAN type interfaces
* @uses_rss: copy of USES_RSS hw flag
* @da_offs: offset of the DA in the header (for header conversion) * @da_offs: offset of the DA in the header (for header conversion)
* @sa_offs: offset of the SA in the header (for header conversion) * @sa_offs: offset of the SA in the header (for header conversion)
* @rcu_head: RCU head for freeing this structure * @rcu_head: RCU head for freeing this structure
...@@ -311,7 +312,8 @@ struct ieee80211_fast_rx { ...@@ -311,7 +312,8 @@ struct ieee80211_fast_rx {
u8 icv_len; u8 icv_len;
u8 key:1, u8 key:1,
sta_notify:1, sta_notify:1,
internal_forward:1; internal_forward:1,
uses_rss:1;
u8 da_offs, sa_offs; u8 da_offs, sa_offs;
struct rcu_head rcu_head; struct rcu_head rcu_head;
...@@ -367,6 +369,21 @@ struct mesh_sta { ...@@ -367,6 +369,21 @@ struct mesh_sta {
DECLARE_EWMA(signal, 1024, 8) DECLARE_EWMA(signal, 1024, 8)
struct ieee80211_sta_rx_stats {
unsigned long packets;
unsigned long last_rx;
unsigned long num_duplicates;
unsigned long fragments;
unsigned long dropped;
int last_signal;
u8 chains;
s8 chain_signal_last[IEEE80211_MAX_CHAINS];
u16 last_rate;
struct u64_stats_sync syncp;
u64 bytes;
u64 msdu[IEEE80211_NUM_TIDS + 1];
};
/** /**
* struct sta_info - STA information * struct sta_info - STA information
* *
...@@ -428,6 +445,8 @@ DECLARE_EWMA(signal, 1024, 8) ...@@ -428,6 +445,8 @@ DECLARE_EWMA(signal, 1024, 8)
* the BSS one. * the BSS one.
* @tx_stats: TX statistics * @tx_stats: TX statistics
* @rx_stats: RX statistics * @rx_stats: RX statistics
* @pcpu_rx_stats: per-CPU RX statistics, assigned only if the driver needs
* this (by advertising the USES_RSS hw flag)
* @status_stats: TX status statistics * @status_stats: TX status statistics
*/ */
struct sta_info { struct sta_info {
...@@ -448,6 +467,7 @@ struct sta_info { ...@@ -448,6 +467,7 @@ struct sta_info {
struct ieee80211_fast_tx __rcu *fast_tx; struct ieee80211_fast_tx __rcu *fast_tx;
struct ieee80211_fast_rx __rcu *fast_rx; struct ieee80211_fast_rx __rcu *fast_rx;
struct ieee80211_sta_rx_stats __percpu *pcpu_rx_stats;
#ifdef CONFIG_MAC80211_MESH #ifdef CONFIG_MAC80211_MESH
struct mesh_sta *mesh; struct mesh_sta *mesh;
...@@ -477,21 +497,7 @@ struct sta_info { ...@@ -477,21 +497,7 @@ struct sta_info {
long last_connected; long last_connected;
/* Updated from RX path only, no locking requirements */ /* Updated from RX path only, no locking requirements */
struct { struct ieee80211_sta_rx_stats rx_stats;
unsigned long packets;
unsigned long last_rx;
unsigned long num_duplicates;
unsigned long fragments;
unsigned long dropped;
int last_signal;
u8 chains;
s8 chain_signal_last[IEEE80211_MAX_CHAINS];
u16 last_rate;
struct u64_stats_sync syncp;
u64 bytes;
u64 msdu[IEEE80211_NUM_TIDS + 1];
} rx_stats;
struct { struct {
struct ewma_signal signal; struct ewma_signal signal;
struct ewma_signal chain_signal[IEEE80211_MAX_CHAINS]; struct ewma_signal chain_signal[IEEE80211_MAX_CHAINS];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment