Commit 04f94e3f authored by Dan Schatzberg's avatar Dan Schatzberg Committed by Linus Torvalds

mm: charge active memcg when no mm is set

set_active_memcg() worked for kernel allocations but was silently ignored
for user pages.

This patch establishes a precedence order for who gets charged:

1. If there is a memcg associated with the page already, that memcg is
   charged. This happens during swapin.

2. If an explicit mm is passed, mm->memcg is charged. This happens
   during page faults, which can be triggered in remote VMs (eg gup).

3. Otherwise consult the current process context. If there is an
   active_memcg, use that. Otherwise, current->mm->memcg.

Previously, if a NULL mm was passed to mem_cgroup_charge (case 3) it would
always charge the root cgroup.  Now it looks up the active_memcg first
(falling back to charging the root cgroup if not set).

Link: https://lkml.kernel.org/r/20210610173944.1203706-3-schatzberg.dan@gmail.comSigned-off-by: default avatarDan Schatzberg <schatzberg.dan@gmail.com>
Acked-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Acked-by: default avatarTejun Heo <tj@kernel.org>
Acked-by: default avatarChris Down <chris@chrisdown.name>
Acked-by: default avatarJens Axboe <axboe@kernel.dk>
Reviewed-by: default avatarShakeel Butt <shakeelb@google.com>
Reviewed-by: default avatarMichal Koutný <mkoutny@suse.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 87579e9b
...@@ -872,7 +872,7 @@ noinline int __add_to_page_cache_locked(struct page *page, ...@@ -872,7 +872,7 @@ noinline int __add_to_page_cache_locked(struct page *page,
page->index = offset; page->index = offset;
if (!huge) { if (!huge) {
error = mem_cgroup_charge(page, current->mm, gfp); error = mem_cgroup_charge(page, NULL, gfp);
if (error) if (error)
goto error; goto error;
charged = true; charged = true;
......
...@@ -897,13 +897,24 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) ...@@ -897,13 +897,24 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
} }
EXPORT_SYMBOL(mem_cgroup_from_task); EXPORT_SYMBOL(mem_cgroup_from_task);
static __always_inline struct mem_cgroup *active_memcg(void)
{
if (in_interrupt())
return this_cpu_read(int_active_memcg);
else
return current->active_memcg;
}
/** /**
* get_mem_cgroup_from_mm: Obtain a reference on given mm_struct's memcg. * get_mem_cgroup_from_mm: Obtain a reference on given mm_struct's memcg.
* @mm: mm from which memcg should be extracted. It can be NULL. * @mm: mm from which memcg should be extracted. It can be NULL.
* *
* Obtain a reference on mm->memcg and returns it if successful. Otherwise * Obtain a reference on mm->memcg and returns it if successful. If mm
* root_mem_cgroup is returned. However if mem_cgroup is disabled, NULL is * is NULL, then the memcg is chosen as follows:
* returned. * 1) The active memcg, if set.
* 2) current->mm->memcg, if available
* 3) root memcg
* If mem_cgroup is disabled, NULL is returned.
*/ */
struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
{ {
...@@ -921,8 +932,17 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) ...@@ -921,8 +932,17 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
* counting is disabled on the root level in the * counting is disabled on the root level in the
* cgroup core. See CSS_NO_REF. * cgroup core. See CSS_NO_REF.
*/ */
if (unlikely(!mm)) if (unlikely(!mm)) {
return root_mem_cgroup; memcg = active_memcg();
if (unlikely(memcg)) {
/* remote memcg must hold a ref */
css_get(&memcg->css);
return memcg;
}
mm = current->mm;
if (unlikely(!mm))
return root_mem_cgroup;
}
rcu_read_lock(); rcu_read_lock();
do { do {
...@@ -935,14 +955,6 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) ...@@ -935,14 +955,6 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
} }
EXPORT_SYMBOL(get_mem_cgroup_from_mm); EXPORT_SYMBOL(get_mem_cgroup_from_mm);
static __always_inline struct mem_cgroup *active_memcg(void)
{
if (in_interrupt())
return this_cpu_read(int_active_memcg);
else
return current->active_memcg;
}
static __always_inline bool memcg_kmem_bypass(void) static __always_inline bool memcg_kmem_bypass(void)
{ {
/* Allow remote memcg charging from any context. */ /* Allow remote memcg charging from any context. */
...@@ -6711,7 +6723,8 @@ static int __mem_cgroup_charge(struct page *page, struct mem_cgroup *memcg, ...@@ -6711,7 +6723,8 @@ static int __mem_cgroup_charge(struct page *page, struct mem_cgroup *memcg,
* @gfp_mask: reclaim mode * @gfp_mask: reclaim mode
* *
* Try to charge @page to the memcg that @mm belongs to, reclaiming * Try to charge @page to the memcg that @mm belongs to, reclaiming
* pages according to @gfp_mask if necessary. * pages according to @gfp_mask if necessary. if @mm is NULL, try to
* charge to the active memcg.
* *
* Do not use this for pages allocated for swapin. * Do not use this for pages allocated for swapin.
* *
......
...@@ -1695,7 +1695,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, ...@@ -1695,7 +1695,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
{ {
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_inode_info *info = SHMEM_I(inode);
struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm; struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL;
struct swap_info_struct *si; struct swap_info_struct *si;
struct page *page = NULL; struct page *page = NULL;
swp_entry_t swap; swp_entry_t swap;
...@@ -1828,7 +1828,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, ...@@ -1828,7 +1828,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
} }
sbinfo = SHMEM_SB(inode->i_sb); sbinfo = SHMEM_SB(inode->i_sb);
charge_mm = vma ? vma->vm_mm : current->mm; charge_mm = vma ? vma->vm_mm : NULL;
page = pagecache_get_page(mapping, index, page = pagecache_get_page(mapping, index,
FGP_ENTRY | FGP_HEAD | FGP_LOCK, 0); FGP_ENTRY | FGP_HEAD | FGP_LOCK, 0);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment