Commit 991e7673 authored by Shakeel Butt's avatar Shakeel Butt Committed by Linus Torvalds

mm: memcontrol: account kernel stack per node

Currently the kernel stack is being accounted per-zone.  There is no need
to do that.  In addition due to being per-zone, memcg has to keep a
separate MEMCG_KERNEL_STACK_KB.  Make the stat per-node and deprecate
MEMCG_KERNEL_STACK_KB as memcg_stat_item is an extension of
node_stat_item.  In addition localize the kernel stack stats updates to
account_kernel_stack().
Signed-off-by: default avatarShakeel Butt <shakeelb@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Reviewed-by: default avatarRoman Gushchin <guro@fb.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Link: http://lkml.kernel.org/r/20200630161539.1759185-1-shakeelb@google.comSigned-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent fbc1ac9d
...@@ -440,9 +440,9 @@ static ssize_t node_read_meminfo(struct device *dev, ...@@ -440,9 +440,9 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_FILE_MAPPED)), nid, K(node_page_state(pgdat, NR_FILE_MAPPED)),
nid, K(node_page_state(pgdat, NR_ANON_MAPPED)), nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
nid, K(i.sharedram), nid, K(i.sharedram),
nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB), nid, node_page_state(pgdat, NR_KERNEL_STACK_KB),
#ifdef CONFIG_SHADOW_CALL_STACK #ifdef CONFIG_SHADOW_CALL_STACK
nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_KB), nid, node_page_state(pgdat, NR_KERNEL_SCS_KB),
#endif #endif
nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)), nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
nid, 0UL, nid, 0UL,
......
...@@ -101,10 +101,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) ...@@ -101,10 +101,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "SReclaimable: ", sreclaimable); show_val_kb(m, "SReclaimable: ", sreclaimable);
show_val_kb(m, "SUnreclaim: ", sunreclaim); show_val_kb(m, "SUnreclaim: ", sunreclaim);
seq_printf(m, "KernelStack: %8lu kB\n", seq_printf(m, "KernelStack: %8lu kB\n",
global_zone_page_state(NR_KERNEL_STACK_KB)); global_node_page_state(NR_KERNEL_STACK_KB));
#ifdef CONFIG_SHADOW_CALL_STACK #ifdef CONFIG_SHADOW_CALL_STACK
seq_printf(m, "ShadowCallStack:%8lu kB\n", seq_printf(m, "ShadowCallStack:%8lu kB\n",
global_zone_page_state(NR_KERNEL_SCS_KB)); global_node_page_state(NR_KERNEL_SCS_KB));
#endif #endif
show_val_kb(m, "PageTables: ", show_val_kb(m, "PageTables: ",
global_zone_page_state(NR_PAGETABLE)); global_zone_page_state(NR_PAGETABLE));
......
...@@ -32,8 +32,6 @@ struct kmem_cache; ...@@ -32,8 +32,6 @@ struct kmem_cache;
enum memcg_stat_item { enum memcg_stat_item {
MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS, MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
MEMCG_SOCK, MEMCG_SOCK,
/* XXX: why are these zone and not node counters? */
MEMCG_KERNEL_STACK_KB,
MEMCG_NR_STAT, MEMCG_NR_STAT,
}; };
...@@ -729,8 +727,19 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, ...@@ -729,8 +727,19 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
int val); int val);
void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val); void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val);
void mod_memcg_obj_state(void *p, int idx, int val); void mod_memcg_obj_state(void *p, int idx, int val);
static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx,
int val)
{
unsigned long flags;
local_irq_save(flags);
__mod_lruvec_slab_state(p, idx, val);
local_irq_restore(flags);
}
static inline void mod_memcg_lruvec_state(struct lruvec *lruvec, static inline void mod_memcg_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx, int val) enum node_stat_item idx, int val)
{ {
...@@ -1151,6 +1160,14 @@ static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, ...@@ -1151,6 +1160,14 @@ static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx,
__mod_node_page_state(page_pgdat(page), idx, val); __mod_node_page_state(page_pgdat(page), idx, val);
} }
static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx,
int val)
{
struct page *page = virt_to_head_page(p);
mod_node_page_state(page_pgdat(page), idx, val);
}
static inline void mod_memcg_obj_state(void *p, int idx, int val) static inline void mod_memcg_obj_state(void *p, int idx, int val)
{ {
} }
......
...@@ -155,10 +155,6 @@ enum zone_stat_item { ...@@ -155,10 +155,6 @@ enum zone_stat_item {
NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */
NR_MLOCK, /* mlock()ed pages found and moved off LRU */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */
NR_PAGETABLE, /* used for pagetables */ NR_PAGETABLE, /* used for pagetables */
NR_KERNEL_STACK_KB, /* measured in KiB */
#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
NR_KERNEL_SCS_KB, /* measured in KiB */
#endif
/* Second 128 byte cacheline */ /* Second 128 byte cacheline */
NR_BOUNCE, NR_BOUNCE,
#if IS_ENABLED(CONFIG_ZSMALLOC) #if IS_ENABLED(CONFIG_ZSMALLOC)
...@@ -203,6 +199,10 @@ enum node_stat_item { ...@@ -203,6 +199,10 @@ enum node_stat_item {
NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */ NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */
NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */ NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */
NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */ NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */
NR_KERNEL_STACK_KB, /* measured in KiB */
#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
NR_KERNEL_SCS_KB, /* measured in KiB */
#endif
NR_VM_NODE_STAT_ITEMS NR_VM_NODE_STAT_ITEMS
}; };
......
...@@ -276,13 +276,8 @@ static inline void free_thread_stack(struct task_struct *tsk) ...@@ -276,13 +276,8 @@ static inline void free_thread_stack(struct task_struct *tsk)
if (vm) { if (vm) {
int i; int i;
for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
mod_memcg_page_state(vm->pages[i],
MEMCG_KERNEL_STACK_KB,
-(int)(PAGE_SIZE / 1024));
memcg_kmem_uncharge_page(vm->pages[i], 0); memcg_kmem_uncharge_page(vm->pages[i], 0);
}
for (i = 0; i < NR_CACHED_STACKS; i++) { for (i = 0; i < NR_CACHED_STACKS; i++) {
if (this_cpu_cmpxchg(cached_stacks[i], if (this_cpu_cmpxchg(cached_stacks[i],
...@@ -382,31 +377,14 @@ static void account_kernel_stack(struct task_struct *tsk, int account) ...@@ -382,31 +377,14 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
void *stack = task_stack_page(tsk); void *stack = task_stack_page(tsk);
struct vm_struct *vm = task_stack_vm_area(tsk); struct vm_struct *vm = task_stack_vm_area(tsk);
BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
if (vm) {
int i;
BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { /* All stack pages are in the same node. */
mod_zone_page_state(page_zone(vm->pages[i]), if (vm)
NR_KERNEL_STACK_KB, mod_lruvec_page_state(vm->pages[0], NR_KERNEL_STACK_KB,
PAGE_SIZE / 1024 * account); account * (THREAD_SIZE / 1024));
} else
} else { mod_lruvec_slab_state(stack, NR_KERNEL_STACK_KB,
/* account * (THREAD_SIZE / 1024));
* All stack pages are in the same zone and belong to the
* same memcg.
*/
struct page *first_page = virt_to_page(stack);
mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
THREAD_SIZE / 1024 * account);
mod_memcg_obj_state(stack, MEMCG_KERNEL_STACK_KB,
account * (THREAD_SIZE / 1024));
}
} }
static int memcg_charge_kernel_stack(struct task_struct *tsk) static int memcg_charge_kernel_stack(struct task_struct *tsk)
...@@ -415,24 +393,23 @@ static int memcg_charge_kernel_stack(struct task_struct *tsk) ...@@ -415,24 +393,23 @@ static int memcg_charge_kernel_stack(struct task_struct *tsk)
struct vm_struct *vm = task_stack_vm_area(tsk); struct vm_struct *vm = task_stack_vm_area(tsk);
int ret; int ret;
BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
if (vm) { if (vm) {
int i; int i;
BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
/* /*
* If memcg_kmem_charge_page() fails, page->mem_cgroup * If memcg_kmem_charge_page() fails, page->mem_cgroup
* pointer is NULL, and both memcg_kmem_uncharge_page() * pointer is NULL, and memcg_kmem_uncharge_page() in
* and mod_memcg_page_state() in free_thread_stack() * free_thread_stack() will ignore this page.
* will ignore this page. So it's safe.
*/ */
ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL,
0); 0);
if (ret) if (ret)
return ret; return ret;
mod_memcg_page_state(vm->pages[i],
MEMCG_KERNEL_STACK_KB,
PAGE_SIZE / 1024);
} }
} }
#endif #endif
......
...@@ -17,7 +17,7 @@ static void __scs_account(void *s, int account) ...@@ -17,7 +17,7 @@ static void __scs_account(void *s, int account)
{ {
struct page *scs_page = virt_to_page(s); struct page *scs_page = virt_to_page(s);
mod_zone_page_state(page_zone(scs_page), NR_KERNEL_SCS_KB, mod_node_page_state(page_pgdat(scs_page), NR_KERNEL_SCS_KB,
account * (SCS_SIZE / SZ_1K)); account * (SCS_SIZE / SZ_1K));
} }
......
...@@ -1485,7 +1485,7 @@ static char *memory_stat_format(struct mem_cgroup *memcg) ...@@ -1485,7 +1485,7 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
(u64)memcg_page_state(memcg, NR_FILE_PAGES) * (u64)memcg_page_state(memcg, NR_FILE_PAGES) *
PAGE_SIZE); PAGE_SIZE);
seq_buf_printf(&s, "kernel_stack %llu\n", seq_buf_printf(&s, "kernel_stack %llu\n",
(u64)memcg_page_state(memcg, MEMCG_KERNEL_STACK_KB) * (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) *
1024); 1024);
seq_buf_printf(&s, "slab %llu\n", seq_buf_printf(&s, "slab %llu\n",
(u64)(memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) + (u64)(memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) +
......
...@@ -5396,6 +5396,10 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) ...@@ -5396,6 +5396,10 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
" anon_thp: %lukB" " anon_thp: %lukB"
#endif #endif
" writeback_tmp:%lukB" " writeback_tmp:%lukB"
" kernel_stack:%lukB"
#ifdef CONFIG_SHADOW_CALL_STACK
" shadow_call_stack:%lukB"
#endif
" all_unreclaimable? %s" " all_unreclaimable? %s"
"\n", "\n",
pgdat->node_id, pgdat->node_id,
...@@ -5417,6 +5421,10 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) ...@@ -5417,6 +5421,10 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR), K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR),
#endif #endif
K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
node_page_state(pgdat, NR_KERNEL_STACK_KB),
#ifdef CONFIG_SHADOW_CALL_STACK
node_page_state(pgdat, NR_KERNEL_SCS_KB),
#endif
pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ? pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
"yes" : "no"); "yes" : "no");
} }
...@@ -5448,10 +5456,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) ...@@ -5448,10 +5456,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
" present:%lukB" " present:%lukB"
" managed:%lukB" " managed:%lukB"
" mlocked:%lukB" " mlocked:%lukB"
" kernel_stack:%lukB"
#ifdef CONFIG_SHADOW_CALL_STACK
" shadow_call_stack:%lukB"
#endif
" pagetables:%lukB" " pagetables:%lukB"
" bounce:%lukB" " bounce:%lukB"
" free_pcp:%lukB" " free_pcp:%lukB"
...@@ -5473,10 +5477,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) ...@@ -5473,10 +5477,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(zone->present_pages), K(zone->present_pages),
K(zone_managed_pages(zone)), K(zone_managed_pages(zone)),
K(zone_page_state(zone, NR_MLOCK)), K(zone_page_state(zone, NR_MLOCK)),
zone_page_state(zone, NR_KERNEL_STACK_KB),
#ifdef CONFIG_SHADOW_CALL_STACK
zone_page_state(zone, NR_KERNEL_SCS_KB),
#endif
K(zone_page_state(zone, NR_PAGETABLE)), K(zone_page_state(zone, NR_PAGETABLE)),
K(zone_page_state(zone, NR_BOUNCE)), K(zone_page_state(zone, NR_BOUNCE)),
K(free_pcp), K(free_pcp),
......
...@@ -1140,10 +1140,6 @@ const char * const vmstat_text[] = { ...@@ -1140,10 +1140,6 @@ const char * const vmstat_text[] = {
"nr_zone_write_pending", "nr_zone_write_pending",
"nr_mlock", "nr_mlock",
"nr_page_table_pages", "nr_page_table_pages",
"nr_kernel_stack",
#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
"nr_shadow_call_stack",
#endif
"nr_bounce", "nr_bounce",
#if IS_ENABLED(CONFIG_ZSMALLOC) #if IS_ENABLED(CONFIG_ZSMALLOC)
"nr_zspages", "nr_zspages",
...@@ -1194,6 +1190,10 @@ const char * const vmstat_text[] = { ...@@ -1194,6 +1190,10 @@ const char * const vmstat_text[] = {
"nr_kernel_misc_reclaimable", "nr_kernel_misc_reclaimable",
"nr_foll_pin_acquired", "nr_foll_pin_acquired",
"nr_foll_pin_released", "nr_foll_pin_released",
"nr_kernel_stack",
#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
"nr_shadow_call_stack",
#endif
/* enum writeback_stat_item counters */ /* enum writeback_stat_item counters */
"nr_dirty_threshold", "nr_dirty_threshold",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment