Commit 7b15c27e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'core-mm-2021-02-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull tlb gather updates from Ingo Molnar:
 "Theses fix MM (soft-)dirty bit management in the procfs code & clean
  up the TLB gather API"

* tag 'core-mm-2021-02-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/ldt: Use tlb_gather_mmu_fullmm() when freeing LDT page-tables
  tlb: arch: Remove empty __tlb_remove_tlb_entry() stubs
  tlb: mmu_gather: Remove start/end arguments from tlb_gather_mmu()
  tlb: mmu_gather: Introduce tlb_gather_mmu_fullmm()
  tlb: mmu_gather: Remove unused start/end arguments from tlb_finish_mmu()
  mm: proc: Invalidate TLB after clearing soft-dirty page state
parents 9eef0233 8cf55f24
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
* unmapping a portion of the virtual address space, these hooks are called according to * unmapping a portion of the virtual address space, these hooks are called according to
* the following template: * the following template:
* *
* tlb <- tlb_gather_mmu(mm, start, end); // start unmap for address space MM * tlb <- tlb_gather_mmu(mm); // start unmap for address space MM
* { * {
* for each vma that needs a shootdown do { * for each vma that needs a shootdown do {
* tlb_start_vma(tlb, vma); * tlb_start_vma(tlb, vma);
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
* tlb_end_vma(tlb, vma); * tlb_end_vma(tlb, vma);
* } * }
* } * }
* tlb_finish_mmu(tlb, start, end); // finish unmap for address space MM * tlb_finish_mmu(tlb); // finish unmap for address space MM
*/ */
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
......
...@@ -24,7 +24,6 @@ void flush_tlb_pending(void); ...@@ -24,7 +24,6 @@ void flush_tlb_pending(void);
#define tlb_start_vma(tlb, vma) do { } while (0) #define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0) #define tlb_end_vma(tlb, vma) do { } while (0)
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
#define tlb_flush(tlb) flush_tlb_pending() #define tlb_flush(tlb) flush_tlb_pending()
/* /*
......
...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
#define tlb_start_vma(tlb, vma) do { } while (0) #define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0) #define tlb_end_vma(tlb, vma) do { } while (0)
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
#define tlb_flush tlb_flush #define tlb_flush tlb_flush
static inline void tlb_flush(struct mmu_gather *tlb); static inline void tlb_flush(struct mmu_gather *tlb);
......
...@@ -398,9 +398,15 @@ static void free_ldt_pgtables(struct mm_struct *mm) ...@@ -398,9 +398,15 @@ static void free_ldt_pgtables(struct mm_struct *mm)
if (!boot_cpu_has(X86_FEATURE_PTI)) if (!boot_cpu_has(X86_FEATURE_PTI))
return; return;
tlb_gather_mmu(&tlb, mm, start, end); /*
* Although free_pgd_range() is intended for freeing user
* page-tables, it also works out for kernel mappings on x86.
* We use tlb_gather_mmu_fullmm() to avoid confusing the
* range-tracking logic in __tlb_adjust_range().
*/
tlb_gather_mmu_fullmm(&tlb, mm);
free_pgd_range(&tlb, start, end, start, end); free_pgd_range(&tlb, start, end, start, end);
tlb_finish_mmu(&tlb, start, end); tlb_finish_mmu(&tlb);
#endif #endif
} }
......
...@@ -708,7 +708,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) ...@@ -708,7 +708,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
return -ENOMEM; return -ENOMEM;
lru_add_drain(); lru_add_drain();
tlb_gather_mmu(&tlb, mm, old_start, old_end); tlb_gather_mmu(&tlb, mm);
if (new_end > old_start) { if (new_end > old_start) {
/* /*
* when the old and new regions overlap clear from new_end. * when the old and new regions overlap clear from new_end.
...@@ -725,7 +725,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) ...@@ -725,7 +725,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
free_pgd_range(&tlb, old_start, old_end, new_end, free_pgd_range(&tlb, old_start, old_end, new_end,
vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING); vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
} }
tlb_finish_mmu(&tlb, old_start, old_end); tlb_finish_mmu(&tlb);
/* /*
* Shrink the vma to just the new range. Always succeeds. * Shrink the vma to just the new range. Always succeeds.
......
...@@ -1210,7 +1210,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, ...@@ -1210,7 +1210,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
struct mm_struct *mm; struct mm_struct *mm;
struct vm_area_struct *vma; struct vm_area_struct *vma;
enum clear_refs_types type; enum clear_refs_types type;
struct mmu_gather tlb;
int itype; int itype;
int rv; int rv;
...@@ -1249,7 +1248,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, ...@@ -1249,7 +1248,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
goto out_unlock; goto out_unlock;
} }
tlb_gather_mmu(&tlb, mm, 0, -1);
if (type == CLEAR_REFS_SOFT_DIRTY) { if (type == CLEAR_REFS_SOFT_DIRTY) {
for (vma = mm->mmap; vma; vma = vma->vm_next) { for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (!(vma->vm_flags & VM_SOFTDIRTY)) if (!(vma->vm_flags & VM_SOFTDIRTY))
...@@ -1258,15 +1256,18 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, ...@@ -1258,15 +1256,18 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
vma_set_page_prot(vma); vma_set_page_prot(vma);
} }
inc_tlb_flush_pending(mm);
mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY, mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
0, NULL, mm, 0, -1UL); 0, NULL, mm, 0, -1UL);
mmu_notifier_invalidate_range_start(&range); mmu_notifier_invalidate_range_start(&range);
} }
walk_page_range(mm, 0, mm->highest_vm_end, &clear_refs_walk_ops, walk_page_range(mm, 0, mm->highest_vm_end, &clear_refs_walk_ops,
&cp); &cp);
if (type == CLEAR_REFS_SOFT_DIRTY) if (type == CLEAR_REFS_SOFT_DIRTY) {
mmu_notifier_invalidate_range_end(&range); mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb, 0, -1); flush_tlb_mm(mm);
dec_tlb_flush_pending(mm);
}
out_unlock: out_unlock:
mmap_write_unlock(mm); mmap_write_unlock(mm);
out_mm: out_mm:
......
...@@ -46,7 +46,9 @@ ...@@ -46,7 +46,9 @@
* *
* The mmu_gather API consists of: * The mmu_gather API consists of:
* *
* - tlb_gather_mmu() / tlb_finish_mmu(); start and finish a mmu_gather * - tlb_gather_mmu() / tlb_gather_mmu_fullmm() / tlb_finish_mmu()
*
* start and finish a mmu_gather
* *
* Finish in particular will issue a (final) TLB invalidate and free * Finish in particular will issue a (final) TLB invalidate and free
* all (remaining) queued pages. * all (remaining) queued pages.
...@@ -91,7 +93,7 @@ ...@@ -91,7 +93,7 @@
* *
* - mmu_gather::fullmm * - mmu_gather::fullmm
* *
* A flag set by tlb_gather_mmu() to indicate we're going to free * A flag set by tlb_gather_mmu_fullmm() to indicate we're going to free
* the entire mm; this allows a number of optimizations. * the entire mm; this allows a number of optimizations.
* *
* - We can ignore tlb_{start,end}_vma(); because we don't * - We can ignore tlb_{start,end}_vma(); because we don't
......
...@@ -588,10 +588,9 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) ...@@ -588,10 +588,9 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
} }
struct mmu_gather; struct mmu_gather;
extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
unsigned long start, unsigned long end); extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
extern void tlb_finish_mmu(struct mmu_gather *tlb, extern void tlb_finish_mmu(struct mmu_gather *tlb);
unsigned long start, unsigned long end);
static inline void init_tlb_flush_pending(struct mm_struct *mm) static inline void init_tlb_flush_pending(struct mm_struct *mm)
{ {
......
...@@ -4008,25 +4008,11 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb, ...@@ -4008,25 +4008,11 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end, struct page *ref_page) unsigned long end, struct page *ref_page)
{ {
struct mm_struct *mm;
struct mmu_gather tlb; struct mmu_gather tlb;
unsigned long tlb_start = start;
unsigned long tlb_end = end;
/* tlb_gather_mmu(&tlb, vma->vm_mm);
* If shared PMDs were possibly used within this vma range, adjust
* start/end for worst case tlb flushing.
* Note that we can not be sure if PMDs are shared until we try to
* unmap pages. However, we want to make sure TLB flushing covers
* the largest possible range.
*/
adjust_range_if_pmd_sharing_possible(vma, &tlb_start, &tlb_end);
mm = vma->vm_mm;
tlb_gather_mmu(&tlb, mm, tlb_start, tlb_end);
__unmap_hugepage_range(&tlb, vma, start, end, ref_page); __unmap_hugepage_range(&tlb, vma, start, end, ref_page);
tlb_finish_mmu(&tlb, tlb_start, tlb_end); tlb_finish_mmu(&tlb);
} }
/* /*
......
...@@ -506,9 +506,9 @@ static long madvise_cold(struct vm_area_struct *vma, ...@@ -506,9 +506,9 @@ static long madvise_cold(struct vm_area_struct *vma,
return -EINVAL; return -EINVAL;
lru_add_drain(); lru_add_drain();
tlb_gather_mmu(&tlb, mm, start_addr, end_addr); tlb_gather_mmu(&tlb, mm);
madvise_cold_page_range(&tlb, vma, start_addr, end_addr); madvise_cold_page_range(&tlb, vma, start_addr, end_addr);
tlb_finish_mmu(&tlb, start_addr, end_addr); tlb_finish_mmu(&tlb);
return 0; return 0;
} }
...@@ -558,9 +558,9 @@ static long madvise_pageout(struct vm_area_struct *vma, ...@@ -558,9 +558,9 @@ static long madvise_pageout(struct vm_area_struct *vma,
return 0; return 0;
lru_add_drain(); lru_add_drain();
tlb_gather_mmu(&tlb, mm, start_addr, end_addr); tlb_gather_mmu(&tlb, mm);
madvise_pageout_page_range(&tlb, vma, start_addr, end_addr); madvise_pageout_page_range(&tlb, vma, start_addr, end_addr);
tlb_finish_mmu(&tlb, start_addr, end_addr); tlb_finish_mmu(&tlb);
return 0; return 0;
} }
...@@ -723,7 +723,7 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, ...@@ -723,7 +723,7 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
range.start, range.end); range.start, range.end);
lru_add_drain(); lru_add_drain();
tlb_gather_mmu(&tlb, mm, range.start, range.end); tlb_gather_mmu(&tlb, mm);
update_hiwater_rss(mm); update_hiwater_rss(mm);
mmu_notifier_invalidate_range_start(&range); mmu_notifier_invalidate_range_start(&range);
...@@ -732,7 +732,7 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, ...@@ -732,7 +732,7 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
&madvise_free_walk_ops, &tlb); &madvise_free_walk_ops, &tlb);
tlb_end_vma(&tlb, vma); tlb_end_vma(&tlb, vma);
mmu_notifier_invalidate_range_end(&range); mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb, range.start, range.end); tlb_finish_mmu(&tlb);
return 0; return 0;
} }
......
...@@ -1534,13 +1534,13 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start, ...@@ -1534,13 +1534,13 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
lru_add_drain(); lru_add_drain();
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
start, start + size); start, start + size);
tlb_gather_mmu(&tlb, vma->vm_mm, start, range.end); tlb_gather_mmu(&tlb, vma->vm_mm);
update_hiwater_rss(vma->vm_mm); update_hiwater_rss(vma->vm_mm);
mmu_notifier_invalidate_range_start(&range); mmu_notifier_invalidate_range_start(&range);
for ( ; vma && vma->vm_start < range.end; vma = vma->vm_next) for ( ; vma && vma->vm_start < range.end; vma = vma->vm_next)
unmap_single_vma(&tlb, vma, start, range.end, NULL); unmap_single_vma(&tlb, vma, start, range.end, NULL);
mmu_notifier_invalidate_range_end(&range); mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb, start, range.end); tlb_finish_mmu(&tlb);
} }
/** /**
...@@ -1561,12 +1561,12 @@ static void zap_page_range_single(struct vm_area_struct *vma, unsigned long addr ...@@ -1561,12 +1561,12 @@ static void zap_page_range_single(struct vm_area_struct *vma, unsigned long addr
lru_add_drain(); lru_add_drain();
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
address, address + size); address, address + size);
tlb_gather_mmu(&tlb, vma->vm_mm, address, range.end); tlb_gather_mmu(&tlb, vma->vm_mm);
update_hiwater_rss(vma->vm_mm); update_hiwater_rss(vma->vm_mm);
mmu_notifier_invalidate_range_start(&range); mmu_notifier_invalidate_range_start(&range);
unmap_single_vma(&tlb, vma, address, range.end, details); unmap_single_vma(&tlb, vma, address, range.end, details);
mmu_notifier_invalidate_range_end(&range); mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb, address, range.end); tlb_finish_mmu(&tlb);
} }
/** /**
......
...@@ -2671,12 +2671,12 @@ static void unmap_region(struct mm_struct *mm, ...@@ -2671,12 +2671,12 @@ static void unmap_region(struct mm_struct *mm,
struct mmu_gather tlb; struct mmu_gather tlb;
lru_add_drain(); lru_add_drain();
tlb_gather_mmu(&tlb, mm, start, end); tlb_gather_mmu(&tlb, mm);
update_hiwater_rss(mm); update_hiwater_rss(mm);
unmap_vmas(&tlb, vma, start, end); unmap_vmas(&tlb, vma, start, end);
free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
next ? next->vm_start : USER_PGTABLES_CEILING); next ? next->vm_start : USER_PGTABLES_CEILING);
tlb_finish_mmu(&tlb, start, end); tlb_finish_mmu(&tlb);
} }
/* /*
...@@ -3214,12 +3214,12 @@ void exit_mmap(struct mm_struct *mm) ...@@ -3214,12 +3214,12 @@ void exit_mmap(struct mm_struct *mm)
lru_add_drain(); lru_add_drain();
flush_cache_mm(mm); flush_cache_mm(mm);
tlb_gather_mmu(&tlb, mm, 0, -1); tlb_gather_mmu_fullmm(&tlb, mm);
/* update_hiwater_rss(mm) here? but nobody should be looking */ /* update_hiwater_rss(mm) here? but nobody should be looking */
/* Use -1 here to ensure all VMAs in the mm are unmapped */ /* Use -1 here to ensure all VMAs in the mm are unmapped */
unmap_vmas(&tlb, vma, 0, -1); unmap_vmas(&tlb, vma, 0, -1);
free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
tlb_finish_mmu(&tlb, 0, -1); tlb_finish_mmu(&tlb);
/* /*
* Walk the list again, actually closing and freeing it, * Walk the list again, actually closing and freeing it,
......
...@@ -253,21 +253,17 @@ void tlb_flush_mmu(struct mmu_gather *tlb) ...@@ -253,21 +253,17 @@ void tlb_flush_mmu(struct mmu_gather *tlb)
* tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down
* @tlb: the mmu_gather structure to initialize * @tlb: the mmu_gather structure to initialize
* @mm: the mm_struct of the target address space * @mm: the mm_struct of the target address space
* @start: start of the region that will be removed from the page-table * @fullmm: @mm is without users and we're going to destroy the full address
* @end: end of the region that will be removed from the page-table * space (exit/execve)
* *
* Called to initialize an (on-stack) mmu_gather structure for page-table * Called to initialize an (on-stack) mmu_gather structure for page-table
* tear-down from @mm. The @start and @end are set to 0 and -1 * tear-down from @mm.
* respectively when @mm is without users and we're going to destroy
* the full address space (exit/execve).
*/ */
void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
unsigned long start, unsigned long end) bool fullmm)
{ {
tlb->mm = mm; tlb->mm = mm;
tlb->fullmm = fullmm;
/* Is it from 0 to ~0? */
tlb->fullmm = !(start | (end+1));
#ifndef CONFIG_MMU_GATHER_NO_GATHER #ifndef CONFIG_MMU_GATHER_NO_GATHER
tlb->need_flush_all = 0; tlb->need_flush_all = 0;
...@@ -287,17 +283,24 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, ...@@ -287,17 +283,24 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
inc_tlb_flush_pending(tlb->mm); inc_tlb_flush_pending(tlb->mm);
} }
void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm)
{
__tlb_gather_mmu(tlb, mm, false);
}
void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm)
{
__tlb_gather_mmu(tlb, mm, true);
}
/** /**
* tlb_finish_mmu - finish an mmu_gather structure * tlb_finish_mmu - finish an mmu_gather structure
* @tlb: the mmu_gather structure to finish * @tlb: the mmu_gather structure to finish
* @start: start of the region that will be removed from the page-table
* @end: end of the region that will be removed from the page-table
* *
* Called at the end of the shootdown operation to free up any resources that * Called at the end of the shootdown operation to free up any resources that
* were required. * were required.
*/ */
void tlb_finish_mmu(struct mmu_gather *tlb, void tlb_finish_mmu(struct mmu_gather *tlb)
unsigned long start, unsigned long end)
{ {
/* /*
* If there are parallel threads are doing PTE changes on same range * If there are parallel threads are doing PTE changes on same range
......
...@@ -546,15 +546,15 @@ bool __oom_reap_task_mm(struct mm_struct *mm) ...@@ -546,15 +546,15 @@ bool __oom_reap_task_mm(struct mm_struct *mm)
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0,
vma, mm, vma->vm_start, vma, mm, vma->vm_start,
vma->vm_end); vma->vm_end);
tlb_gather_mmu(&tlb, mm, range.start, range.end); tlb_gather_mmu(&tlb, mm);
if (mmu_notifier_invalidate_range_start_nonblock(&range)) { if (mmu_notifier_invalidate_range_start_nonblock(&range)) {
tlb_finish_mmu(&tlb, range.start, range.end); tlb_finish_mmu(&tlb);
ret = false; ret = false;
continue; continue;
} }
unmap_page_range(&tlb, vma, range.start, range.end, NULL); unmap_page_range(&tlb, vma, range.start, range.end, NULL);
mmu_notifier_invalidate_range_end(&range); mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb, range.start, range.end); tlb_finish_mmu(&tlb);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment