Commit ccb445ae authored by Paolo Abeni's avatar Paolo Abeni

Merge branch 'net-smc-introduce-ringbufs-usage-statistics'

Wen Gu says:

====================
net/smc: introduce ringbufs usage statistics

Currently, we have histograms that show the sizes of ringbufs that ever
used by SMC connections. However, they are always incremental and since
SMC allows the reuse of ringbufs, we cannot know the actual amount of
ringbufs being allocated or actively used.

So this patch set introduces statistics for the amount of ringbufs that
actually allocated by link group and actively used by connections of a
certain net namespace, so that we can react based on these memory usage
information, e.g. active fallback to TCP.

With appropriate adaptations of smc-tools, we can obtain these ringbufs
usage information:

$ smcr -d linkgroup
LG-ID    : 00000500
LG-Role  : SERV
LG-Type  : ASYML
VLAN     : 0
PNET-ID  :
Version  : 1
Conns    : 0
Sndbuf   : 12910592 B    <-
RMB      : 12910592 B    <-

or

$ smcr -d stats
[...]
RX Stats
  Data transmitted (Bytes)      869225943 (869.2M)
  Total requests                 18494479
  Buffer usage  (Bytes)          12910592 (12.31M)  <-
  [...]

TX Stats
  Data transmitted (Bytes)    12760884405 (12.76G)
  Total requests                 36988338
  Buffer usage  (Bytes)          12910592 (12.31M)  <-
  [...]
[...]

Change log:
v3->v2
- use new helper nla_put_uint() instead of nla_put_u64_64bit().

v2->v1
https://lore.kernel.org/r/20240807075939.57882-1-guwen@linux.alibaba.com/
- remove inline keyword in .c files.
- use local variable in macros to avoid potential side effects.

v1
https://lore.kernel.org/r/20240805090551.80786-1-guwen@linux.alibaba.com/
====================

Link: https://patch.msgid.link/20240814130827.73321-1-guwen@linux.alibaba.comSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents dca9d62a e0d10354
...@@ -127,6 +127,8 @@ enum { ...@@ -127,6 +127,8 @@ enum {
SMC_NLA_LGR_R_NET_COOKIE, /* u64 */ SMC_NLA_LGR_R_NET_COOKIE, /* u64 */
SMC_NLA_LGR_R_PAD, /* flag */ SMC_NLA_LGR_R_PAD, /* flag */
SMC_NLA_LGR_R_BUF_TYPE, /* u8 */ SMC_NLA_LGR_R_BUF_TYPE, /* u8 */
SMC_NLA_LGR_R_SNDBUF_ALLOC, /* uint */
SMC_NLA_LGR_R_RMB_ALLOC, /* uint */
__SMC_NLA_LGR_R_MAX, __SMC_NLA_LGR_R_MAX,
SMC_NLA_LGR_R_MAX = __SMC_NLA_LGR_R_MAX - 1 SMC_NLA_LGR_R_MAX = __SMC_NLA_LGR_R_MAX - 1
}; };
...@@ -162,6 +164,8 @@ enum { ...@@ -162,6 +164,8 @@ enum {
SMC_NLA_LGR_D_V2_COMMON, /* nest */ SMC_NLA_LGR_D_V2_COMMON, /* nest */
SMC_NLA_LGR_D_EXT_GID, /* u64 */ SMC_NLA_LGR_D_EXT_GID, /* u64 */
SMC_NLA_LGR_D_PEER_EXT_GID, /* u64 */ SMC_NLA_LGR_D_PEER_EXT_GID, /* u64 */
SMC_NLA_LGR_D_SNDBUF_ALLOC, /* uint */
SMC_NLA_LGR_D_DMB_ALLOC, /* uint */
__SMC_NLA_LGR_D_MAX, __SMC_NLA_LGR_D_MAX,
SMC_NLA_LGR_D_MAX = __SMC_NLA_LGR_D_MAX - 1 SMC_NLA_LGR_D_MAX = __SMC_NLA_LGR_D_MAX - 1
}; };
...@@ -249,6 +253,8 @@ enum { ...@@ -249,6 +253,8 @@ enum {
SMC_NLA_STATS_T_TX_BYTES, /* u64 */ SMC_NLA_STATS_T_TX_BYTES, /* u64 */
SMC_NLA_STATS_T_RX_CNT, /* u64 */ SMC_NLA_STATS_T_RX_CNT, /* u64 */
SMC_NLA_STATS_T_TX_CNT, /* u64 */ SMC_NLA_STATS_T_TX_CNT, /* u64 */
SMC_NLA_STATS_T_RX_RMB_USAGE, /* uint */
SMC_NLA_STATS_T_TX_RMB_USAGE, /* uint */
__SMC_NLA_STATS_T_MAX, __SMC_NLA_STATS_T_MAX,
SMC_NLA_STATS_T_MAX = __SMC_NLA_STATS_T_MAX - 1 SMC_NLA_STATS_T_MAX = __SMC_NLA_STATS_T_MAX - 1
}; };
......
...@@ -221,6 +221,35 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) ...@@ -221,6 +221,35 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
write_unlock_bh(&lgr->conns_lock); write_unlock_bh(&lgr->conns_lock);
} }
static void smc_lgr_buf_list_add(struct smc_link_group *lgr,
bool is_rmb,
struct list_head *buf_list,
struct smc_buf_desc *buf_desc)
{
list_add(&buf_desc->list, buf_list);
if (is_rmb) {
lgr->alloc_rmbs += buf_desc->len;
lgr->alloc_rmbs +=
lgr->is_smcd ? sizeof(struct smcd_cdc_msg) : 0;
} else {
lgr->alloc_sndbufs += buf_desc->len;
}
}
static void smc_lgr_buf_list_del(struct smc_link_group *lgr,
bool is_rmb,
struct smc_buf_desc *buf_desc)
{
list_del(&buf_desc->list);
if (is_rmb) {
lgr->alloc_rmbs -= buf_desc->len;
lgr->alloc_rmbs -=
lgr->is_smcd ? sizeof(struct smcd_cdc_msg) : 0;
} else {
lgr->alloc_sndbufs -= buf_desc->len;
}
}
int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
{ {
struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
...@@ -363,6 +392,10 @@ static int smc_nl_fill_lgr(struct smc_link_group *lgr, ...@@ -363,6 +392,10 @@ static int smc_nl_fill_lgr(struct smc_link_group *lgr,
smc_target[SMC_MAX_PNETID_LEN] = 0; smc_target[SMC_MAX_PNETID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target)) if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target))
goto errattr; goto errattr;
if (nla_put_uint(skb, SMC_NLA_LGR_R_SNDBUF_ALLOC, lgr->alloc_sndbufs))
goto errattr;
if (nla_put_uint(skb, SMC_NLA_LGR_R_RMB_ALLOC, lgr->alloc_rmbs))
goto errattr;
if (lgr->smc_version > SMC_V1) { if (lgr->smc_version > SMC_V1) {
v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2_COMMON); v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2_COMMON);
if (!v2_attrs) if (!v2_attrs)
...@@ -541,6 +574,10 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, ...@@ -541,6 +574,10 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
goto errattr; goto errattr;
if (nla_put_u32(skb, SMC_NLA_LGR_D_CHID, smc_ism_get_chid(lgr->smcd))) if (nla_put_u32(skb, SMC_NLA_LGR_D_CHID, smc_ism_get_chid(lgr->smcd)))
goto errattr; goto errattr;
if (nla_put_uint(skb, SMC_NLA_LGR_D_SNDBUF_ALLOC, lgr->alloc_sndbufs))
goto errattr;
if (nla_put_uint(skb, SMC_NLA_LGR_D_DMB_ALLOC, lgr->alloc_rmbs))
goto errattr;
memcpy(smc_pnet, lgr->smcd->pnetid, SMC_MAX_PNETID_LEN); memcpy(smc_pnet, lgr->smcd->pnetid, SMC_MAX_PNETID_LEN);
smc_pnet[SMC_MAX_PNETID_LEN] = 0; smc_pnet[SMC_MAX_PNETID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet)) if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet))
...@@ -1138,7 +1175,7 @@ static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb, ...@@ -1138,7 +1175,7 @@ static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
lock = is_rmb ? &lgr->rmbs_lock : lock = is_rmb ? &lgr->rmbs_lock :
&lgr->sndbufs_lock; &lgr->sndbufs_lock;
down_write(lock); down_write(lock);
list_del(&buf_desc->list); smc_lgr_buf_list_del(lgr, is_rmb, buf_desc);
up_write(lock); up_write(lock);
smc_buf_free(lgr, is_rmb, buf_desc); smc_buf_free(lgr, is_rmb, buf_desc);
...@@ -1166,22 +1203,30 @@ static void smcd_buf_detach(struct smc_connection *conn) ...@@ -1166,22 +1203,30 @@ static void smcd_buf_detach(struct smc_connection *conn)
static void smc_buf_unuse(struct smc_connection *conn, static void smc_buf_unuse(struct smc_connection *conn,
struct smc_link_group *lgr) struct smc_link_group *lgr)
{ {
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
bool is_smcd = lgr->is_smcd;
int bufsize;
if (conn->sndbuf_desc) { if (conn->sndbuf_desc) {
if (!lgr->is_smcd && conn->sndbuf_desc->is_vm) { bufsize = conn->sndbuf_desc->len;
if (!is_smcd && conn->sndbuf_desc->is_vm) {
smcr_buf_unuse(conn->sndbuf_desc, false, lgr); smcr_buf_unuse(conn->sndbuf_desc, false, lgr);
} else { } else {
memzero_explicit(conn->sndbuf_desc->cpu_addr, conn->sndbuf_desc->len); memzero_explicit(conn->sndbuf_desc->cpu_addr, bufsize);
WRITE_ONCE(conn->sndbuf_desc->used, 0); WRITE_ONCE(conn->sndbuf_desc->used, 0);
} }
SMC_STAT_RMB_SIZE(smc, is_smcd, false, false, bufsize);
} }
if (conn->rmb_desc) { if (conn->rmb_desc) {
if (!lgr->is_smcd) { bufsize = conn->rmb_desc->len;
if (!is_smcd) {
smcr_buf_unuse(conn->rmb_desc, true, lgr); smcr_buf_unuse(conn->rmb_desc, true, lgr);
} else { } else {
memzero_explicit(conn->rmb_desc->cpu_addr, bufsize += sizeof(struct smcd_cdc_msg);
conn->rmb_desc->len + sizeof(struct smcd_cdc_msg)); memzero_explicit(conn->rmb_desc->cpu_addr, bufsize);
WRITE_ONCE(conn->rmb_desc->used, 0); WRITE_ONCE(conn->rmb_desc->used, 0);
} }
SMC_STAT_RMB_SIZE(smc, is_smcd, true, false, bufsize);
} }
} }
...@@ -1377,7 +1422,7 @@ static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) ...@@ -1377,7 +1422,7 @@ static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
buf_list = &lgr->sndbufs[i]; buf_list = &lgr->sndbufs[i];
list_for_each_entry_safe(buf_desc, bf_desc, buf_list, list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
list) { list) {
list_del(&buf_desc->list); smc_lgr_buf_list_del(lgr, is_rmb, buf_desc);
smc_buf_free(lgr, is_rmb, buf_desc); smc_buf_free(lgr, is_rmb, buf_desc);
} }
} }
...@@ -2390,7 +2435,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) ...@@ -2390,7 +2435,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
buf_desc = smc_buf_get_slot(bufsize_comp, lock, buf_list); buf_desc = smc_buf_get_slot(bufsize_comp, lock, buf_list);
if (buf_desc) { if (buf_desc) {
buf_desc->is_dma_need_sync = 0; buf_desc->is_dma_need_sync = 0;
SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize); SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, true, bufsize);
SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb); SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb);
break; /* found reusable slot */ break; /* found reusable slot */
} }
...@@ -2411,10 +2456,10 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) ...@@ -2411,10 +2456,10 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
} }
SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rmb); SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rmb);
SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize); SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, true, bufsize);
buf_desc->used = 1; buf_desc->used = 1;
down_write(lock); down_write(lock);
list_add(&buf_desc->list, buf_list); smc_lgr_buf_list_add(lgr, is_rmb, buf_list, buf_desc);
up_write(lock); up_write(lock);
break; /* found */ break; /* found */
} }
...@@ -2496,7 +2541,8 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd) ...@@ -2496,7 +2541,8 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd)
rc = __smc_buf_create(smc, is_smcd, true); rc = __smc_buf_create(smc, is_smcd, true);
if (rc && smc->conn.sndbuf_desc) { if (rc && smc->conn.sndbuf_desc) {
down_write(&smc->conn.lgr->sndbufs_lock); down_write(&smc->conn.lgr->sndbufs_lock);
list_del(&smc->conn.sndbuf_desc->list); smc_lgr_buf_list_del(smc->conn.lgr, false,
smc->conn.sndbuf_desc);
up_write(&smc->conn.lgr->sndbufs_lock); up_write(&smc->conn.lgr->sndbufs_lock);
smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc); smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
smc->conn.sndbuf_desc = NULL; smc->conn.sndbuf_desc = NULL;
......
...@@ -281,6 +281,8 @@ struct smc_link_group { ...@@ -281,6 +281,8 @@ struct smc_link_group {
struct rw_semaphore sndbufs_lock; /* protects tx buffers */ struct rw_semaphore sndbufs_lock; /* protects tx buffers */
struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */ struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */
struct rw_semaphore rmbs_lock; /* protects rx buffers */ struct rw_semaphore rmbs_lock; /* protects rx buffers */
u64 alloc_sndbufs; /* stats of tx buffers */
u64 alloc_rmbs; /* stats of rx buffers */
u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */ u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */
struct delayed_work free_work; /* delayed freeing of an lgr */ struct delayed_work free_work; /* delayed freeing of an lgr */
......
...@@ -218,6 +218,12 @@ static int smc_nl_fill_stats_tech_data(struct sk_buff *skb, ...@@ -218,6 +218,12 @@ static int smc_nl_fill_stats_tech_data(struct sk_buff *skb,
smc_tech->tx_bytes, smc_tech->tx_bytes,
SMC_NLA_STATS_PAD)) SMC_NLA_STATS_PAD))
goto errattr; goto errattr;
if (nla_put_uint(skb, SMC_NLA_STATS_T_RX_RMB_USAGE,
smc_tech->rx_rmbuse))
goto errattr;
if (nla_put_uint(skb, SMC_NLA_STATS_T_TX_RMB_USAGE,
smc_tech->tx_rmbuse))
goto errattr;
if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_RX_CNT, if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_RX_CNT,
smc_tech->rx_cnt, smc_tech->rx_cnt,
SMC_NLA_STATS_PAD)) SMC_NLA_STATS_PAD))
......
...@@ -79,6 +79,8 @@ struct smc_stats_tech { ...@@ -79,6 +79,8 @@ struct smc_stats_tech {
u64 tx_bytes; u64 tx_bytes;
u64 rx_cnt; u64 rx_cnt;
u64 tx_cnt; u64 tx_cnt;
u64 rx_rmbuse;
u64 tx_rmbuse;
}; };
struct smc_stats { struct smc_stats {
...@@ -135,38 +137,46 @@ do { \ ...@@ -135,38 +137,46 @@ do { \
} \ } \
while (0) while (0)
#define SMC_STAT_RMB_SIZE_SUB(_smc_stats, _tech, k, _len) \ #define SMC_STAT_RMB_SIZE_SUB(_smc_stats, _tech, k, _is_add, _len) \
do { \ do { \
typeof(_smc_stats) stats = (_smc_stats); \
typeof(_is_add) is_a = (_is_add); \
typeof(_len) _l = (_len); \ typeof(_len) _l = (_len); \
typeof(_tech) t = (_tech); \ typeof(_tech) t = (_tech); \
int _pos; \ int _pos; \
int m = SMC_BUF_MAX - 1; \ int m = SMC_BUF_MAX - 1; \
if (_l <= 0) \ if (_l <= 0) \
break; \ break; \
_pos = fls((_l - 1) >> 13); \ if (is_a) { \
_pos = (_pos <= m) ? _pos : m; \ _pos = fls((_l - 1) >> 13); \
this_cpu_inc((*(_smc_stats)).smc[t].k ## _rmbsize.buf[_pos]); \ _pos = (_pos <= m) ? _pos : m; \
this_cpu_inc((*stats).smc[t].k ## _rmbsize.buf[_pos]); \
this_cpu_add((*stats).smc[t].k ## _rmbuse, _l); \
} else { \
this_cpu_sub((*stats).smc[t].k ## _rmbuse, _l); \
} \
} \ } \
while (0) while (0)
#define SMC_STAT_RMB_SUB(_smc_stats, type, t, key) \ #define SMC_STAT_RMB_SUB(_smc_stats, type, t, key) \
this_cpu_inc((*(_smc_stats)).smc[t].rmb ## _ ## key.type ## _cnt) this_cpu_inc((*(_smc_stats)).smc[t].rmb ## _ ## key.type ## _cnt)
#define SMC_STAT_RMB_SIZE(_smc, _is_smcd, _is_rx, _len) \ #define SMC_STAT_RMB_SIZE(_smc, _is_smcd, _is_rx, _is_add, _len) \
do { \ do { \
struct net *_net = sock_net(&(_smc)->sk); \ struct net *_net = sock_net(&(_smc)->sk); \
struct smc_stats __percpu *_smc_stats = _net->smc.smc_stats; \ struct smc_stats __percpu *_smc_stats = _net->smc.smc_stats; \
typeof(_is_add) is_add = (_is_add); \
typeof(_is_smcd) is_d = (_is_smcd); \ typeof(_is_smcd) is_d = (_is_smcd); \
typeof(_is_rx) is_r = (_is_rx); \ typeof(_is_rx) is_r = (_is_rx); \
typeof(_len) l = (_len); \ typeof(_len) l = (_len); \
if ((is_d) && (is_r)) \ if ((is_d) && (is_r)) \
SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_D, rx, l); \ SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_D, rx, is_add, l); \
if ((is_d) && !(is_r)) \ if ((is_d) && !(is_r)) \
SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_D, tx, l); \ SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_D, tx, is_add, l); \
if (!(is_d) && (is_r)) \ if (!(is_d) && (is_r)) \
SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_R, rx, l); \ SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_R, rx, is_add, l); \
if (!(is_d) && !(is_r)) \ if (!(is_d) && !(is_r)) \
SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_R, tx, l); \ SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_R, tx, is_add, l); \
} \ } \
while (0) while (0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment