Commit 9db298a4 authored by Shakeel Butt's avatar Shakeel Butt Committed by Andrew Morton

memcg: increase the valid index range for memcg stats

Patch series "Kernel stack usage histogram", v6.

Provide histogram of stack sizes for the exited threads:
Example outputs:
Intel:
$ grep kstack /proc/vmstat
kstack_1k 3
kstack_2k 188
kstack_4k 11391
kstack_8k 243
kstack_16k 0

ARM with 64K page_size:
$ grep kstack /proc/vmstat
kstack_1k 1
kstack_2k 340
kstack_4k 25212
kstack_8k 1659
kstack_16k 0
kstack_32k 0
kstack_64k 0


This patch (of 3):

At the moment the valid index for the indirection tables for memcg stats
and events is < S8_MAX.  These indirection tables are used in performance
critical codepaths.  With the latest addition to the vm_events, the
NR_VM_EVENT_ITEMS has gone over S8_MAX.  One way to resolve is to increase
the entry size of the indirection table from int8_t to int16_t but this
will increase the potential number of cachelines needed to access the
indirection table.

This patch took a different approach and make the valid index < U8_MAX. 
In this way the size of the indirection tables will remain same and we
only need to invalid index check from less than 0 to equal to U8_MAX.  In
this approach we have also removed a subtraction from the performance
critical codepaths.

[pasha.tatashin@soleen.com: v6]
Link: https://lkml.kernel.org/r/20240730150158.832783-1-pasha.tatashin@soleen.com
Link: https://lkml.kernel.org/r/20240724203322.2765486-1-pasha.tatashin@soleen.com
Link: https://lkml.kernel.org/r/20240724203322.2765486-2-pasha.tatashin@soleen.comSigned-off-by: default avatarShakeel Butt <shakeel.butt@linux.dev>
Co-developed-by: default avatarPasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: default avatarPasha Tatashin <pasha.tatashin@soleen.com>
Cc: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Li Zhijian <lizhijian@fujitsu.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent c495b976
......@@ -320,24 +320,27 @@ static const unsigned int memcg_stat_items[] = {
#define NR_MEMCG_NODE_STAT_ITEMS ARRAY_SIZE(memcg_node_stat_items)
#define MEMCG_VMSTAT_SIZE (NR_MEMCG_NODE_STAT_ITEMS + \
ARRAY_SIZE(memcg_stat_items))
static int8_t mem_cgroup_stats_index[MEMCG_NR_STAT] __read_mostly;
#define BAD_STAT_IDX(index) ((u32)(index) >= U8_MAX)
static u8 mem_cgroup_stats_index[MEMCG_NR_STAT] __read_mostly;
static void init_memcg_stats(void)
{
int8_t i, j = 0;
u8 i, j = 0;
BUILD_BUG_ON(MEMCG_NR_STAT >= S8_MAX);
BUILD_BUG_ON(MEMCG_NR_STAT >= U8_MAX);
for (i = 0; i < NR_MEMCG_NODE_STAT_ITEMS; ++i)
mem_cgroup_stats_index[memcg_node_stat_items[i]] = ++j;
memset(mem_cgroup_stats_index, U8_MAX, sizeof(mem_cgroup_stats_index));
for (i = 0; i < ARRAY_SIZE(memcg_stat_items); ++i)
mem_cgroup_stats_index[memcg_stat_items[i]] = ++j;
for (i = 0; i < NR_MEMCG_NODE_STAT_ITEMS; ++i, ++j)
mem_cgroup_stats_index[memcg_node_stat_items[i]] = j;
for (i = 0; i < ARRAY_SIZE(memcg_stat_items); ++i, ++j)
mem_cgroup_stats_index[memcg_stat_items[i]] = j;
}
static inline int memcg_stats_index(int idx)
{
return mem_cgroup_stats_index[idx] - 1;
return mem_cgroup_stats_index[idx];
}
struct lruvec_stats_percpu {
......@@ -369,7 +372,7 @@ unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx)
return node_page_state(lruvec_pgdat(lruvec), idx);
i = memcg_stats_index(idx);
if (WARN_ONCE(i < 0, "%s: missing stat item %d\n", __func__, idx))
if (WARN_ONCE(BAD_STAT_IDX(i), "%s: missing stat item %d\n", __func__, idx))
return 0;
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
......@@ -392,7 +395,7 @@ unsigned long lruvec_page_state_local(struct lruvec *lruvec,
return node_page_state(lruvec_pgdat(lruvec), idx);
i = memcg_stats_index(idx);
if (WARN_ONCE(i < 0, "%s: missing stat item %d\n", __func__, idx))
if (WARN_ONCE(BAD_STAT_IDX(i), "%s: missing stat item %d\n", __func__, idx))
return 0;
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
......@@ -435,21 +438,24 @@ static const unsigned int memcg_vm_event_stat[] = {
};
#define NR_MEMCG_EVENTS ARRAY_SIZE(memcg_vm_event_stat)
static int8_t mem_cgroup_events_index[NR_VM_EVENT_ITEMS] __read_mostly;
static u8 mem_cgroup_events_index[NR_VM_EVENT_ITEMS] __read_mostly;
static void init_memcg_events(void)
{
int8_t i;
u8 i;
BUILD_BUG_ON(NR_VM_EVENT_ITEMS >= U8_MAX);
BUILD_BUG_ON(NR_VM_EVENT_ITEMS >= S8_MAX);
memset(mem_cgroup_events_index, U8_MAX,
sizeof(mem_cgroup_events_index));
for (i = 0; i < NR_MEMCG_EVENTS; ++i)
mem_cgroup_events_index[memcg_vm_event_stat[i]] = i + 1;
mem_cgroup_events_index[memcg_vm_event_stat[i]] = i;
}
static inline int memcg_events_index(enum vm_event_item idx)
{
return mem_cgroup_events_index[idx] - 1;
return mem_cgroup_events_index[idx];
}
struct memcg_vmstats_percpu {
......@@ -621,7 +627,7 @@ unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
long x;
int i = memcg_stats_index(idx);
if (WARN_ONCE(i < 0, "%s: missing stat item %d\n", __func__, idx))
if (WARN_ONCE(BAD_STAT_IDX(i), "%s: missing stat item %d\n", __func__, idx))
return 0;
x = READ_ONCE(memcg->vmstats->state[i]);
......@@ -662,7 +668,7 @@ void __mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx,
if (mem_cgroup_disabled())
return;
if (WARN_ONCE(i < 0, "%s: missing stat item %d\n", __func__, idx))
if (WARN_ONCE(BAD_STAT_IDX(i), "%s: missing stat item %d\n", __func__, idx))
return;
__this_cpu_add(memcg->vmstats_percpu->state[i], val);
......@@ -675,7 +681,7 @@ unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx)
long x;
int i = memcg_stats_index(idx);
if (WARN_ONCE(i < 0, "%s: missing stat item %d\n", __func__, idx))
if (WARN_ONCE(BAD_STAT_IDX(i), "%s: missing stat item %d\n", __func__, idx))
return 0;
x = READ_ONCE(memcg->vmstats->state_local[i]);
......@@ -694,7 +700,7 @@ static void __mod_memcg_lruvec_state(struct lruvec *lruvec,
struct mem_cgroup *memcg;
int i = memcg_stats_index(idx);
if (WARN_ONCE(i < 0, "%s: missing stat item %d\n", __func__, idx))
if (WARN_ONCE(BAD_STAT_IDX(i), "%s: missing stat item %d\n", __func__, idx))
return;
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
......@@ -810,7 +816,7 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
if (mem_cgroup_disabled())
return;
if (WARN_ONCE(i < 0, "%s: missing stat item %d\n", __func__, idx))
if (WARN_ONCE(BAD_STAT_IDX(i), "%s: missing stat item %d\n", __func__, idx))
return;
memcg_stats_lock();
......@@ -823,7 +829,7 @@ unsigned long memcg_events(struct mem_cgroup *memcg, int event)
{
int i = memcg_events_index(event);
if (WARN_ONCE(i < 0, "%s: missing stat item %d\n", __func__, event))
if (WARN_ONCE(BAD_STAT_IDX(i), "%s: missing stat item %d\n", __func__, event))
return 0;
return READ_ONCE(memcg->vmstats->events[i]);
......@@ -833,7 +839,7 @@ unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
{
int i = memcg_events_index(event);
if (WARN_ONCE(i < 0, "%s: missing stat item %d\n", __func__, event))
if (WARN_ONCE(BAD_STAT_IDX(i), "%s: missing stat item %d\n", __func__, event))
return 0;
return READ_ONCE(memcg->vmstats->events_local[i]);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment