Commit bf491692 authored by Rik van Riel's avatar Rik van Riel Committed by Andrew Morton

hugetlbfs: extend hugetlb_vma_lock to private VMAs

Extend the locking scheme used to protect shared hugetlb mappings from
truncate vs page fault races, in order to protect private hugetlb mappings
(with resv_map) against MADV_DONTNEED.

Add a read-write semaphore to the resv_map data structure, and use that
from the hugetlb_vma_(un)lock_* functions, in preparation for closing the
race between MADV_DONTNEED and page faults.

Link: https://lkml.kernel.org/r/20231006040020.3677377-3-riel@surriel.com
Fixes: 04ada095 ("hugetlb: don't delete vma_lock in hugetlb MADV_DONTNEED processing")
Signed-off-by: default avatarRik van Riel <riel@surriel.com>
Reviewed-by: default avatarMike Kravetz <mike.kravetz@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 92fe9dcb
...@@ -60,6 +60,7 @@ struct resv_map { ...@@ -60,6 +60,7 @@ struct resv_map {
long adds_in_progress; long adds_in_progress;
struct list_head region_cache; struct list_head region_cache;
long region_cache_count; long region_cache_count;
struct rw_semaphore rw_sema;
#ifdef CONFIG_CGROUP_HUGETLB #ifdef CONFIG_CGROUP_HUGETLB
/* /*
* On private mappings, the counter to uncharge reservations is stored * On private mappings, the counter to uncharge reservations is stored
...@@ -1233,6 +1234,11 @@ static inline bool __vma_shareable_lock(struct vm_area_struct *vma) ...@@ -1233,6 +1234,11 @@ static inline bool __vma_shareable_lock(struct vm_area_struct *vma)
return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data;
} }
static inline bool __vma_private_lock(struct vm_area_struct *vma)
{
return (!(vma->vm_flags & VM_MAYSHARE)) && vma->vm_private_data;
}
/* /*
* Safe version of huge_pte_offset() to check the locks. See comments * Safe version of huge_pte_offset() to check the locks. See comments
* above huge_pte_offset(). * above huge_pte_offset().
......
...@@ -97,6 +97,7 @@ static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma); ...@@ -97,6 +97,7 @@ static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma);
static void hugetlb_unshare_pmds(struct vm_area_struct *vma, static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
unsigned long start, unsigned long end); unsigned long start, unsigned long end);
static struct resv_map *vma_resv_map(struct vm_area_struct *vma);
static inline bool subpool_is_free(struct hugepage_subpool *spool) static inline bool subpool_is_free(struct hugepage_subpool *spool)
{ {
...@@ -267,6 +268,10 @@ void hugetlb_vma_lock_read(struct vm_area_struct *vma) ...@@ -267,6 +268,10 @@ void hugetlb_vma_lock_read(struct vm_area_struct *vma)
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
down_read(&vma_lock->rw_sema); down_read(&vma_lock->rw_sema);
} else if (__vma_private_lock(vma)) {
struct resv_map *resv_map = vma_resv_map(vma);
down_read(&resv_map->rw_sema);
} }
} }
...@@ -276,6 +281,10 @@ void hugetlb_vma_unlock_read(struct vm_area_struct *vma) ...@@ -276,6 +281,10 @@ void hugetlb_vma_unlock_read(struct vm_area_struct *vma)
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
up_read(&vma_lock->rw_sema); up_read(&vma_lock->rw_sema);
} else if (__vma_private_lock(vma)) {
struct resv_map *resv_map = vma_resv_map(vma);
up_read(&resv_map->rw_sema);
} }
} }
...@@ -285,6 +294,10 @@ void hugetlb_vma_lock_write(struct vm_area_struct *vma) ...@@ -285,6 +294,10 @@ void hugetlb_vma_lock_write(struct vm_area_struct *vma)
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
down_write(&vma_lock->rw_sema); down_write(&vma_lock->rw_sema);
} else if (__vma_private_lock(vma)) {
struct resv_map *resv_map = vma_resv_map(vma);
down_write(&resv_map->rw_sema);
} }
} }
...@@ -294,17 +307,27 @@ void hugetlb_vma_unlock_write(struct vm_area_struct *vma) ...@@ -294,17 +307,27 @@ void hugetlb_vma_unlock_write(struct vm_area_struct *vma)
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
up_write(&vma_lock->rw_sema); up_write(&vma_lock->rw_sema);
} else if (__vma_private_lock(vma)) {
struct resv_map *resv_map = vma_resv_map(vma);
up_write(&resv_map->rw_sema);
} }
} }
int hugetlb_vma_trylock_write(struct vm_area_struct *vma) int hugetlb_vma_trylock_write(struct vm_area_struct *vma)
{ {
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
if (!__vma_shareable_lock(vma)) if (__vma_shareable_lock(vma)) {
return 1; struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
return down_write_trylock(&vma_lock->rw_sema); return down_write_trylock(&vma_lock->rw_sema);
} else if (__vma_private_lock(vma)) {
struct resv_map *resv_map = vma_resv_map(vma);
return down_write_trylock(&resv_map->rw_sema);
}
return 1;
} }
void hugetlb_vma_assert_locked(struct vm_area_struct *vma) void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
...@@ -313,6 +336,10 @@ void hugetlb_vma_assert_locked(struct vm_area_struct *vma) ...@@ -313,6 +336,10 @@ void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
lockdep_assert_held(&vma_lock->rw_sema); lockdep_assert_held(&vma_lock->rw_sema);
} else if (__vma_private_lock(vma)) {
struct resv_map *resv_map = vma_resv_map(vma);
lockdep_assert_held(&resv_map->rw_sema);
} }
} }
...@@ -345,6 +372,11 @@ static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma) ...@@ -345,6 +372,11 @@ static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma)
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
__hugetlb_vma_unlock_write_put(vma_lock); __hugetlb_vma_unlock_write_put(vma_lock);
} else if (__vma_private_lock(vma)) {
struct resv_map *resv_map = vma_resv_map(vma);
/* no free for anon vmas, but still need to unlock */
up_write(&resv_map->rw_sema);
} }
} }
...@@ -1068,6 +1100,7 @@ struct resv_map *resv_map_alloc(void) ...@@ -1068,6 +1100,7 @@ struct resv_map *resv_map_alloc(void)
kref_init(&resv_map->refs); kref_init(&resv_map->refs);
spin_lock_init(&resv_map->lock); spin_lock_init(&resv_map->lock);
INIT_LIST_HEAD(&resv_map->regions); INIT_LIST_HEAD(&resv_map->regions);
init_rwsem(&resv_map->rw_sema);
resv_map->adds_in_progress = 0; resv_map->adds_in_progress = 0;
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment