Commit 9a95f3cf authored by Paul Cassella's avatar Paul Cassella Committed by Linus Torvalds

mm: describe mmap_sem rules for __lock_page_or_retry() and callers

Add a comment describing the circumstances in which
__lock_page_or_retry() will or will not release the mmap_sem when
returning 0.

Add comments to lock_page_or_retry()'s callers (filemap_fault(),
do_swap_page()) noting the impact on VM_FAULT_RETRY returns.

Add comments on up the call tree, particularly replacing the false "We
return with mmap_sem still held" comments.
Signed-off-by: default avatarPaul Cassella <cassella@cray.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 4ffeaf35
...@@ -1218,7 +1218,8 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, ...@@ -1218,7 +1218,8 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
/* /*
* If for any reason at all we couldn't handle the fault, * If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo * make sure we exit gracefully rather than endlessly redo
* the fault: * the fault. Since we never set FAULT_FLAG_RETRY_NOWAIT, if
* we get VM_FAULT_RETRY back, the mmap_sem has been unlocked.
*/ */
fault = handle_mm_fault(mm, vma, address, flags); fault = handle_mm_fault(mm, vma, address, flags);
......
...@@ -484,6 +484,9 @@ static inline int lock_page_killable(struct page *page) ...@@ -484,6 +484,9 @@ static inline int lock_page_killable(struct page *page)
/* /*
* lock_page_or_retry - Lock the page, unless this would block and the * lock_page_or_retry - Lock the page, unless this would block and the
* caller indicated that it can handle a retry. * caller indicated that it can handle a retry.
*
* Return value and mmap_sem implications depend on flags; see
* __lock_page_or_retry().
*/ */
static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm, static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
unsigned int flags) unsigned int flags)
......
...@@ -808,6 +808,17 @@ int __lock_page_killable(struct page *page) ...@@ -808,6 +808,17 @@ int __lock_page_killable(struct page *page)
} }
EXPORT_SYMBOL_GPL(__lock_page_killable); EXPORT_SYMBOL_GPL(__lock_page_killable);
/*
* Return values:
* 1 - page is locked; mmap_sem is still held.
* 0 - page is not locked.
* mmap_sem has been released (up_read()), unless flags had both
* FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in
* which case mmap_sem is still held.
*
* If neither ALLOW_RETRY nor KILLABLE are set, will always return 1
* with the page locked and the mmap_sem unperturbed.
*/
int __lock_page_or_retry(struct page *page, struct mm_struct *mm, int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
unsigned int flags) unsigned int flags)
{ {
...@@ -1827,6 +1838,18 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma, ...@@ -1827,6 +1838,18 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma,
* The goto's are kind of ugly, but this streamlines the normal case of having * The goto's are kind of ugly, but this streamlines the normal case of having
* it in the page cache, and handles the special cases reasonably without * it in the page cache, and handles the special cases reasonably without
* having a lot of duplicated code. * having a lot of duplicated code.
*
* vma->vm_mm->mmap_sem must be held on entry.
*
* If our return value has VM_FAULT_RETRY set, it's because
* lock_page_or_retry() returned 0.
* The mmap_sem has usually been released in this case.
* See __lock_page_or_retry() for the exception.
*
* If our return value does not have VM_FAULT_RETRY set, the mmap_sem
* has not been released.
*
* We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set.
*/ */
int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{ {
......
...@@ -258,6 +258,11 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address, ...@@ -258,6 +258,11 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
return ret; return ret;
} }
/*
* mmap_sem must be held on entry. If @nonblocking != NULL and
* *@flags does not include FOLL_NOWAIT, the mmap_sem may be released.
* If it is, *@nonblocking will be set to 0 and -EBUSY returned.
*/
static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
unsigned long address, unsigned int *flags, int *nonblocking) unsigned long address, unsigned int *flags, int *nonblocking)
{ {
...@@ -373,7 +378,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) ...@@ -373,7 +378,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
* with a put_page() call when it is finished with. vmas will only * with a put_page() call when it is finished with. vmas will only
* remain valid while mmap_sem is held. * remain valid while mmap_sem is held.
* *
* Must be called with mmap_sem held for read or write. * Must be called with mmap_sem held. It may be released. See below.
* *
* __get_user_pages walks a process's page tables and takes a reference to * __get_user_pages walks a process's page tables and takes a reference to
* each struct page that each user address corresponds to at a given * each struct page that each user address corresponds to at a given
...@@ -396,7 +401,14 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) ...@@ -396,7 +401,14 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
* *
* If @nonblocking != NULL, __get_user_pages will not wait for disk IO * If @nonblocking != NULL, __get_user_pages will not wait for disk IO
* or mmap_sem contention, and if waiting is needed to pin all pages, * or mmap_sem contention, and if waiting is needed to pin all pages,
* *@nonblocking will be set to 0. * *@nonblocking will be set to 0. Further, if @gup_flags does not
* include FOLL_NOWAIT, the mmap_sem will be released via up_read() in
* this case.
*
* A caller using such a combination of @nonblocking and @gup_flags
* must therefore hold the mmap_sem for reading only, and recognize
* when it's been released. Otherwise, it must be held for either
* reading or writing and will not be released.
* *
* In most cases, get_user_pages or get_user_pages_fast should be used * In most cases, get_user_pages or get_user_pages_fast should be used
* instead of __get_user_pages. __get_user_pages should be used only if * instead of __get_user_pages. __get_user_pages should be used only if
...@@ -528,7 +540,7 @@ EXPORT_SYMBOL(__get_user_pages); ...@@ -528,7 +540,7 @@ EXPORT_SYMBOL(__get_user_pages);
* such architectures, gup() will not be enough to make a subsequent access * such architectures, gup() will not be enough to make a subsequent access
* succeed. * succeed.
* *
* This should be called with the mm_sem held for read. * This has the same semantics wrt the @mm->mmap_sem as does filemap_fault().
*/ */
int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
unsigned long address, unsigned int fault_flags) unsigned long address, unsigned int fault_flags)
......
...@@ -2399,7 +2399,10 @@ EXPORT_SYMBOL(unmap_mapping_range); ...@@ -2399,7 +2399,10 @@ EXPORT_SYMBOL(unmap_mapping_range);
/* /*
* We enter with non-exclusive mmap_sem (to exclude vma changes, * We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked. * but allow concurrent faults), and pte mapped but not yet locked.
* We return with mmap_sem still held, but pte unmapped and unlocked. * We return with pte unmapped and unlocked.
*
* We return with the mmap_sem locked or unlocked in the same cases
* as does filemap_fault().
*/ */
static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd, unsigned long address, pte_t *page_table, pmd_t *pmd,
...@@ -2688,6 +2691,11 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2688,6 +2691,11 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
return VM_FAULT_OOM; return VM_FAULT_OOM;
} }
/*
* The mmap_sem must have been held on entry, and may have been
* released depending on flags and vma->vm_ops->fault() return value.
* See filemap_fault() and __lock_page_retry().
*/
static int __do_fault(struct vm_area_struct *vma, unsigned long address, static int __do_fault(struct vm_area_struct *vma, unsigned long address,
pgoff_t pgoff, unsigned int flags, struct page **page) pgoff_t pgoff, unsigned int flags, struct page **page)
{ {
...@@ -3016,6 +3024,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -3016,6 +3024,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return ret; return ret;
} }
/*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults).
* The mmap_sem may have been released depending on flags and our
* return value. See filemap_fault() and __lock_page_or_retry().
*/
static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd, unsigned long address, pte_t *page_table, pmd_t *pmd,
unsigned int flags, pte_t orig_pte) unsigned int flags, pte_t orig_pte)
...@@ -3040,7 +3054,9 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -3040,7 +3054,9 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
* *
* We enter with non-exclusive mmap_sem (to exclude vma changes, * We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked. * but allow concurrent faults), and pte mapped but not yet locked.
* We return with mmap_sem still held, but pte unmapped and unlocked. * We return with pte unmapped and unlocked.
* The mmap_sem may have been released depending on flags and our
* return value. See filemap_fault() and __lock_page_or_retry().
*/ */
static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd, unsigned long address, pte_t *page_table, pmd_t *pmd,
...@@ -3172,7 +3188,10 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -3172,7 +3188,10 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
* *
* We enter with non-exclusive mmap_sem (to exclude vma changes, * We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked. * but allow concurrent faults), and pte mapped but not yet locked.
* We return with mmap_sem still held, but pte unmapped and unlocked. * We return with pte unmapped and unlocked.
*
* The mmap_sem may have been released depending on flags and our
* return value. See filemap_fault() and __lock_page_or_retry().
*/ */
static int handle_pte_fault(struct mm_struct *mm, static int handle_pte_fault(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address, struct vm_area_struct *vma, unsigned long address,
...@@ -3232,6 +3251,9 @@ static int handle_pte_fault(struct mm_struct *mm, ...@@ -3232,6 +3251,9 @@ static int handle_pte_fault(struct mm_struct *mm,
/* /*
* By the time we get here, we already hold the mm semaphore * By the time we get here, we already hold the mm semaphore
*
* The mmap_sem may have been released depending on flags and our
* return value. See filemap_fault() and __lock_page_or_retry().
*/ */
static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags) unsigned long address, unsigned int flags)
...@@ -3313,6 +3335,12 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -3313,6 +3335,12 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return handle_pte_fault(mm, vma, address, pte, pmd, flags); return handle_pte_fault(mm, vma, address, pte, pmd, flags);
} }
/*
* By the time we get here, we already hold the mm semaphore
*
* The mmap_sem may have been released depending on flags and our
* return value. See filemap_fault() and __lock_page_or_retry().
*/
int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags) unsigned long address, unsigned int flags)
{ {
......
...@@ -210,12 +210,19 @@ unsigned int munlock_vma_page(struct page *page) ...@@ -210,12 +210,19 @@ unsigned int munlock_vma_page(struct page *page)
* @vma: target vma * @vma: target vma
* @start: start address * @start: start address
* @end: end address * @end: end address
* @nonblocking:
* *
* This takes care of making the pages present too. * This takes care of making the pages present too.
* *
* return 0 on success, negative error code on error. * return 0 on success, negative error code on error.
* *
* vma->vm_mm->mmap_sem must be held for at least read. * vma->vm_mm->mmap_sem must be held.
*
* If @nonblocking is NULL, it may be held for read or write and will
* be unperturbed.
*
* If @nonblocking is non-NULL, it must held for read only and may be
* released. If it's released, *@nonblocking will be set to 0.
*/ */
long __mlock_vma_pages_range(struct vm_area_struct *vma, long __mlock_vma_pages_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end, int *nonblocking) unsigned long start, unsigned long end, int *nonblocking)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment