Commit 00f3ca2c authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds

mm: memcontrol: per-lruvec stats infrastructure

lruvecs are at the intersection of the NUMA node and memcg, which is the
scope for most paging activity.

Introduce a convenient accounting infrastructure that maintains
statistics per node, per memcg, and the lruvec itself.

Then convert over accounting sites for statistics that are already
tracked in both nodes and memcgs and can be easily switched.

[hannes@cmpxchg.org: fix crash in the new cgroup stat keeping code]
  Link: http://lkml.kernel.org/r/20170531171450.GA10481@cmpxchg.org
[hannes@cmpxchg.org: don't track uncharged pages at all
  Link: http://lkml.kernel.org/r/20170605175254.GA8547@cmpxchg.org
[hannes@cmpxchg.org: add missing free_percpu()]
  Link: http://lkml.kernel.org/r/20170605175354.GB8547@cmpxchg.org
[linux@roeck-us.net: hexagon: fix build error caused by include file order]
  Link: http://lkml.kernel.org/r/20170617153721.GA4382@roeck-us.net
Link: http://lkml.kernel.org/r/20170530181724.27197-6-hannes@cmpxchg.orgSigned-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Signed-off-by: default avatarGuenter Roeck <linux@roeck-us.net>
Acked-by: default avatarVladimir Davydov <vdavydov.dev@gmail.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent ed52be7b
...@@ -24,7 +24,6 @@ ...@@ -24,7 +24,6 @@
/* /*
* Page table definitions for Qualcomm Hexagon processor. * Page table definitions for Qualcomm Hexagon processor.
*/ */
#include <linux/swap.h>
#include <asm/page.h> #include <asm/page.h>
#define __ARCH_USE_5LEVEL_HACK #define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h> #include <asm-generic/pgtable-nopmd.h>
......
...@@ -25,7 +25,6 @@ ...@@ -25,7 +25,6 @@
#include <linux/compat.h> #include <linux/compat.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/mm.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/kbuild.h> #include <linux/kbuild.h>
#include <asm/ptrace.h> #include <asm/ptrace.h>
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
* be instantiated for it, differently from a native build. * be instantiated for it, differently from a native build.
*/ */
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/sched.h>
#include <asm/page.h> #include <asm/page.h>
#include <asm/hexagon_vm.h> #include <asm/hexagon_vm.h>
......
...@@ -26,7 +26,8 @@ ...@@ -26,7 +26,8 @@
#include <linux/page_counter.h> #include <linux/page_counter.h>
#include <linux/vmpressure.h> #include <linux/vmpressure.h>
#include <linux/eventfd.h> #include <linux/eventfd.h>
#include <linux/mmzone.h> #include <linux/mm.h>
#include <linux/vmstat.h>
#include <linux/writeback.h> #include <linux/writeback.h>
#include <linux/page-flags.h> #include <linux/page-flags.h>
...@@ -98,11 +99,16 @@ struct mem_cgroup_reclaim_iter { ...@@ -98,11 +99,16 @@ struct mem_cgroup_reclaim_iter {
unsigned int generation; unsigned int generation;
}; };
struct lruvec_stat {
long count[NR_VM_NODE_STAT_ITEMS];
};
/* /*
* per-zone information in memory controller. * per-zone information in memory controller.
*/ */
struct mem_cgroup_per_node { struct mem_cgroup_per_node {
struct lruvec lruvec; struct lruvec lruvec;
struct lruvec_stat __percpu *lruvec_stat;
unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1]; struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1];
...@@ -496,23 +502,18 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, ...@@ -496,23 +502,18 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
return val; return val;
} }
static inline void mod_memcg_state(struct mem_cgroup *memcg, static inline void __mod_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx, int val) enum memcg_stat_item idx, int val)
{ {
if (!mem_cgroup_disabled()) if (!mem_cgroup_disabled())
this_cpu_add(memcg->stat->count[idx], val); __this_cpu_add(memcg->stat->count[idx], val);
}
static inline void inc_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx)
{
mod_memcg_state(memcg, idx, 1);
} }
static inline void dec_memcg_state(struct mem_cgroup *memcg, static inline void mod_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx) enum memcg_stat_item idx, int val)
{ {
mod_memcg_state(memcg, idx, -1); if (!mem_cgroup_disabled())
this_cpu_add(memcg->stat->count[idx], val);
} }
/** /**
...@@ -532,6 +533,13 @@ static inline void dec_memcg_state(struct mem_cgroup *memcg, ...@@ -532,6 +533,13 @@ static inline void dec_memcg_state(struct mem_cgroup *memcg,
* *
* Kernel pages are an exception to this, since they'll never move. * Kernel pages are an exception to this, since they'll never move.
*/ */
static inline void __mod_memcg_page_state(struct page *page,
enum memcg_stat_item idx, int val)
{
if (page->mem_cgroup)
__mod_memcg_state(page->mem_cgroup, idx, val);
}
static inline void mod_memcg_page_state(struct page *page, static inline void mod_memcg_page_state(struct page *page,
enum memcg_stat_item idx, int val) enum memcg_stat_item idx, int val)
{ {
...@@ -539,16 +547,76 @@ static inline void mod_memcg_page_state(struct page *page, ...@@ -539,16 +547,76 @@ static inline void mod_memcg_page_state(struct page *page,
mod_memcg_state(page->mem_cgroup, idx, val); mod_memcg_state(page->mem_cgroup, idx, val);
} }
static inline void inc_memcg_page_state(struct page *page, static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
enum memcg_stat_item idx) enum node_stat_item idx)
{ {
mod_memcg_page_state(page, idx, 1); struct mem_cgroup_per_node *pn;
long val = 0;
int cpu;
if (mem_cgroup_disabled())
return node_page_state(lruvec_pgdat(lruvec), idx);
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
for_each_possible_cpu(cpu)
val += per_cpu(pn->lruvec_stat->count[idx], cpu);
if (val < 0)
val = 0;
return val;
} }
static inline void dec_memcg_page_state(struct page *page, static inline void __mod_lruvec_state(struct lruvec *lruvec,
enum memcg_stat_item idx) enum node_stat_item idx, int val)
{ {
mod_memcg_page_state(page, idx, -1); struct mem_cgroup_per_node *pn;
__mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
if (mem_cgroup_disabled())
return;
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
__mod_memcg_state(pn->memcg, idx, val);
__this_cpu_add(pn->lruvec_stat->count[idx], val);
}
static inline void mod_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx, int val)
{
struct mem_cgroup_per_node *pn;
mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
if (mem_cgroup_disabled())
return;
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
mod_memcg_state(pn->memcg, idx, val);
this_cpu_add(pn->lruvec_stat->count[idx], val);
}
static inline void __mod_lruvec_page_state(struct page *page,
enum node_stat_item idx, int val)
{
struct mem_cgroup_per_node *pn;
__mod_node_page_state(page_pgdat(page), idx, val);
if (mem_cgroup_disabled() || !page->mem_cgroup)
return;
__mod_memcg_state(page->mem_cgroup, idx, val);
pn = page->mem_cgroup->nodeinfo[page_to_nid(page)];
__this_cpu_add(pn->lruvec_stat->count[idx], val);
}
static inline void mod_lruvec_page_state(struct page *page,
enum node_stat_item idx, int val)
{
struct mem_cgroup_per_node *pn;
mod_node_page_state(page_pgdat(page), idx, val);
if (mem_cgroup_disabled() || !page->mem_cgroup)
return;
mod_memcg_state(page->mem_cgroup, idx, val);
pn = page->mem_cgroup->nodeinfo[page_to_nid(page)];
this_cpu_add(pn->lruvec_stat->count[idx], val);
} }
unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
...@@ -777,19 +845,21 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, ...@@ -777,19 +845,21 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
return 0; return 0;
} }
static inline void mod_memcg_state(struct mem_cgroup *memcg, static inline void __mod_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx, enum memcg_stat_item idx,
int nr) int nr)
{ {
} }
static inline void inc_memcg_state(struct mem_cgroup *memcg, static inline void mod_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx) enum memcg_stat_item idx,
int nr)
{ {
} }
static inline void dec_memcg_state(struct mem_cgroup *memcg, static inline void __mod_memcg_page_state(struct page *page,
enum memcg_stat_item idx) enum memcg_stat_item idx,
int nr)
{ {
} }
...@@ -799,14 +869,34 @@ static inline void mod_memcg_page_state(struct page *page, ...@@ -799,14 +869,34 @@ static inline void mod_memcg_page_state(struct page *page,
{ {
} }
static inline void inc_memcg_page_state(struct page *page, static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
enum memcg_stat_item idx) enum node_stat_item idx)
{ {
return node_page_state(lruvec_pgdat(lruvec), idx);
} }
static inline void dec_memcg_page_state(struct page *page, static inline void __mod_lruvec_state(struct lruvec *lruvec,
enum memcg_stat_item idx) enum node_stat_item idx, int val)
{ {
__mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
}
static inline void mod_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx, int val)
{
mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
}
static inline void __mod_lruvec_page_state(struct page *page,
enum node_stat_item idx, int val)
{
__mod_node_page_state(page_pgdat(page), idx, val);
}
static inline void mod_lruvec_page_state(struct page *page,
enum node_stat_item idx, int val)
{
mod_node_page_state(page_pgdat(page), idx, val);
} }
static inline static inline
...@@ -838,6 +928,102 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) ...@@ -838,6 +928,102 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
} }
#endif /* CONFIG_MEMCG */ #endif /* CONFIG_MEMCG */
static inline void __inc_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx)
{
__mod_memcg_state(memcg, idx, 1);
}
static inline void __dec_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx)
{
__mod_memcg_state(memcg, idx, -1);
}
static inline void __inc_memcg_page_state(struct page *page,
enum memcg_stat_item idx)
{
__mod_memcg_page_state(page, idx, 1);
}
static inline void __dec_memcg_page_state(struct page *page,
enum memcg_stat_item idx)
{
__mod_memcg_page_state(page, idx, -1);
}
static inline void __inc_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx)
{
__mod_lruvec_state(lruvec, idx, 1);
}
static inline void __dec_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx)
{
__mod_lruvec_state(lruvec, idx, -1);
}
static inline void __inc_lruvec_page_state(struct page *page,
enum node_stat_item idx)
{
__mod_lruvec_page_state(page, idx, 1);
}
static inline void __dec_lruvec_page_state(struct page *page,
enum node_stat_item idx)
{
__mod_lruvec_page_state(page, idx, -1);
}
static inline void inc_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx)
{
mod_memcg_state(memcg, idx, 1);
}
static inline void dec_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx)
{
mod_memcg_state(memcg, idx, -1);
}
static inline void inc_memcg_page_state(struct page *page,
enum memcg_stat_item idx)
{
mod_memcg_page_state(page, idx, 1);
}
static inline void dec_memcg_page_state(struct page *page,
enum memcg_stat_item idx)
{
mod_memcg_page_state(page, idx, -1);
}
static inline void inc_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx)
{
mod_lruvec_state(lruvec, idx, 1);
}
static inline void dec_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx)
{
mod_lruvec_state(lruvec, idx, -1);
}
static inline void inc_lruvec_page_state(struct page *page,
enum node_stat_item idx)
{
mod_lruvec_page_state(page, idx, 1);
}
static inline void dec_lruvec_page_state(struct page *page,
enum node_stat_item idx)
{
mod_lruvec_page_state(page, idx, -1);
}
#ifdef CONFIG_CGROUP_WRITEBACK #ifdef CONFIG_CGROUP_WRITEBACK
struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg); struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg);
......
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/mm.h>
#include <linux/mmzone.h> #include <linux/mmzone.h>
#include <linux/vm_event_item.h> #include <linux/vm_event_item.h>
#include <linux/atomic.h> #include <linux/atomic.h>
......
...@@ -4122,6 +4122,12 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) ...@@ -4122,6 +4122,12 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
if (!pn) if (!pn)
return 1; return 1;
pn->lruvec_stat = alloc_percpu(struct lruvec_stat);
if (!pn->lruvec_stat) {
kfree(pn);
return 1;
}
lruvec_init(&pn->lruvec); lruvec_init(&pn->lruvec);
pn->usage_in_excess = 0; pn->usage_in_excess = 0;
pn->on_tree = false; pn->on_tree = false;
...@@ -4133,7 +4139,10 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) ...@@ -4133,7 +4139,10 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
{ {
kfree(memcg->nodeinfo[node]); struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
free_percpu(pn->lruvec_stat);
kfree(pn);
} }
static void __mem_cgroup_free(struct mem_cgroup *memcg) static void __mem_cgroup_free(struct mem_cgroup *memcg)
......
...@@ -2433,8 +2433,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping) ...@@ -2433,8 +2433,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
inode_attach_wb(inode, page); inode_attach_wb(inode, page);
wb = inode_to_wb(inode); wb = inode_to_wb(inode);
inc_memcg_page_state(page, NR_FILE_DIRTY); __inc_lruvec_page_state(page, NR_FILE_DIRTY);
__inc_node_page_state(page, NR_FILE_DIRTY);
__inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); __inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
__inc_node_page_state(page, NR_DIRTIED); __inc_node_page_state(page, NR_DIRTIED);
__inc_wb_stat(wb, WB_RECLAIMABLE); __inc_wb_stat(wb, WB_RECLAIMABLE);
...@@ -2455,8 +2454,7 @@ void account_page_cleaned(struct page *page, struct address_space *mapping, ...@@ -2455,8 +2454,7 @@ void account_page_cleaned(struct page *page, struct address_space *mapping,
struct bdi_writeback *wb) struct bdi_writeback *wb)
{ {
if (mapping_cap_account_dirty(mapping)) { if (mapping_cap_account_dirty(mapping)) {
dec_memcg_page_state(page, NR_FILE_DIRTY); dec_lruvec_page_state(page, NR_FILE_DIRTY);
dec_node_page_state(page, NR_FILE_DIRTY);
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
dec_wb_stat(wb, WB_RECLAIMABLE); dec_wb_stat(wb, WB_RECLAIMABLE);
task_io_account_cancelled_write(PAGE_SIZE); task_io_account_cancelled_write(PAGE_SIZE);
...@@ -2712,8 +2710,7 @@ int clear_page_dirty_for_io(struct page *page) ...@@ -2712,8 +2710,7 @@ int clear_page_dirty_for_io(struct page *page)
*/ */
wb = unlocked_inode_to_wb_begin(inode, &locked); wb = unlocked_inode_to_wb_begin(inode, &locked);
if (TestClearPageDirty(page)) { if (TestClearPageDirty(page)) {
dec_memcg_page_state(page, NR_FILE_DIRTY); dec_lruvec_page_state(page, NR_FILE_DIRTY);
dec_node_page_state(page, NR_FILE_DIRTY);
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
dec_wb_stat(wb, WB_RECLAIMABLE); dec_wb_stat(wb, WB_RECLAIMABLE);
ret = 1; ret = 1;
...@@ -2759,8 +2756,7 @@ int test_clear_page_writeback(struct page *page) ...@@ -2759,8 +2756,7 @@ int test_clear_page_writeback(struct page *page)
ret = TestClearPageWriteback(page); ret = TestClearPageWriteback(page);
} }
if (ret) { if (ret) {
dec_memcg_page_state(page, NR_WRITEBACK); dec_lruvec_page_state(page, NR_WRITEBACK);
dec_node_page_state(page, NR_WRITEBACK);
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
inc_node_page_state(page, NR_WRITTEN); inc_node_page_state(page, NR_WRITTEN);
} }
...@@ -2814,8 +2810,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write) ...@@ -2814,8 +2810,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
ret = TestSetPageWriteback(page); ret = TestSetPageWriteback(page);
} }
if (!ret) { if (!ret) {
inc_memcg_page_state(page, NR_WRITEBACK); inc_lruvec_page_state(page, NR_WRITEBACK);
inc_node_page_state(page, NR_WRITEBACK);
inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
} }
unlock_page_memcg(page); unlock_page_memcg(page);
......
...@@ -1145,8 +1145,7 @@ void page_add_file_rmap(struct page *page, bool compound) ...@@ -1145,8 +1145,7 @@ void page_add_file_rmap(struct page *page, bool compound)
if (!atomic_inc_and_test(&page->_mapcount)) if (!atomic_inc_and_test(&page->_mapcount))
goto out; goto out;
} }
__mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, nr); __mod_lruvec_page_state(page, NR_FILE_MAPPED, nr);
mod_memcg_page_state(page, NR_FILE_MAPPED, nr);
out: out:
unlock_page_memcg(page); unlock_page_memcg(page);
} }
...@@ -1181,12 +1180,11 @@ static void page_remove_file_rmap(struct page *page, bool compound) ...@@ -1181,12 +1180,11 @@ static void page_remove_file_rmap(struct page *page, bool compound)
} }
/* /*
* We use the irq-unsafe __{inc|mod}_zone_page_state because * We use the irq-unsafe __{inc|mod}_lruvec_page_state because
* these counters are not modified in interrupt context, and * these counters are not modified in interrupt context, and
* pte lock(a spinlock) is held, which implies preemption disabled. * pte lock(a spinlock) is held, which implies preemption disabled.
*/ */
__mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, -nr); __mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr);
mod_memcg_page_state(page, NR_FILE_MAPPED, -nr);
if (unlikely(PageMlocked(page))) if (unlikely(PageMlocked(page)))
clear_page_mlock(page); clear_page_mlock(page);
......
...@@ -288,12 +288,10 @@ bool workingset_refault(void *shadow) ...@@ -288,12 +288,10 @@ bool workingset_refault(void *shadow)
*/ */
refault_distance = (refault - eviction) & EVICTION_MASK; refault_distance = (refault - eviction) & EVICTION_MASK;
inc_node_state(pgdat, WORKINGSET_REFAULT); inc_lruvec_state(lruvec, WORKINGSET_REFAULT);
inc_memcg_state(memcg, WORKINGSET_REFAULT);
if (refault_distance <= active_file) { if (refault_distance <= active_file) {
inc_node_state(pgdat, WORKINGSET_ACTIVATE); inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE);
inc_memcg_state(memcg, WORKINGSET_ACTIVATE);
rcu_read_unlock(); rcu_read_unlock();
return true; return true;
} }
...@@ -474,8 +472,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, ...@@ -474,8 +472,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
} }
if (WARN_ON_ONCE(node->exceptional)) if (WARN_ON_ONCE(node->exceptional))
goto out_invalid; goto out_invalid;
inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM); inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM);
inc_memcg_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM);
__radix_tree_delete_node(&mapping->page_tree, node, __radix_tree_delete_node(&mapping->page_tree, node,
workingset_update_node, mapping); workingset_update_node, mapping);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment