Commit a232591b authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge fixes from Andrew Morton:
 "11 fixes.

  The presence of 'thp: reduce indentation level in change_huge_pmd()'
  is unfortunate. But the patchset had been decently reviewed and tested
  before we decided it was needed in -stable and I felt it best not to
  churn things at the last minute"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  mailmap: add Martin Kepplinger's email
  zsmalloc: expand class bit
  zram: do not use copy_page with non-page aligned address
  zram: fix operator precedence to get offset
  hugetlbfs: fix offset overflow in hugetlbfs mmap
  thp: fix MADV_DONTNEED vs clear soft dirty race
  thp: fix MADV_DONTNEED vs. MADV_FREE race
  mm: drop unused pmdp_huge_get_and_clear_notify()
  thp: fix MADV_DONTNEED vs. numa balancing race
  thp: reduce indentation level in change_huge_pmd()
  z3fold: fix page locking in z3fold_alloc()
parents d8a6e3ae 5714320d
......@@ -99,6 +99,8 @@ Linas Vepstas <linas@austin.ibm.com>
Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@web.de>
Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch>
Mark Brown <broonie@sirena.org.uk>
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
Matthieu CASTET <castet.matthieu@free.fr>
Mauro Carvalho Chehab <mchehab@kernel.org> <mchehab@brturbo.com.br>
Mauro Carvalho Chehab <mchehab@kernel.org> <maurochehab@gmail.com>
......
......@@ -523,7 +523,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
if (size == PAGE_SIZE) {
copy_page(mem, cmem);
memcpy(mem, cmem, PAGE_SIZE);
} else {
struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
......@@ -717,7 +717,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
src = kmap_atomic(page);
copy_page(cmem, src);
memcpy(cmem, src, PAGE_SIZE);
kunmap_atomic(src);
} else {
memcpy(cmem, src, clen);
......@@ -928,7 +928,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
}
index = sector >> SECTORS_PER_PAGE_SHIFT;
offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT;
offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
bv.bv_page = page;
bv.bv_len = PAGE_SIZE;
......
......@@ -136,17 +136,26 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
vma->vm_ops = &hugetlb_vm_ops;
/*
* Offset passed to mmap (before page shift) could have been
* negative when represented as a (l)off_t.
*/
if (((loff_t)vma->vm_pgoff << PAGE_SHIFT) < 0)
return -EINVAL;
if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
return -EINVAL;
vma_len = (loff_t)(vma->vm_end - vma->vm_start);
len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
/* check for overflow */
if (len < vma_len)
return -EINVAL;
inode_lock(inode);
file_accessed(file);
ret = -ENOMEM;
len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
if (hugetlb_reserve_pages(inode,
vma->vm_pgoff >> huge_page_order(h),
len >> huge_page_shift(h), vma,
......@@ -155,7 +164,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
ret = 0;
if (vma->vm_flags & VM_WRITE && inode->i_size < len)
inode->i_size = len;
i_size_write(inode, len);
out:
inode_unlock(inode);
......
......@@ -900,7 +900,14 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
pmd_t pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp);
pmd_t pmd = *pmdp;
/* See comment in change_huge_pmd() */
pmdp_invalidate(vma, addr, pmdp);
if (pmd_dirty(*pmdp))
pmd = pmd_mkdirty(pmd);
if (pmd_young(*pmdp))
pmd = pmd_mkyoung(pmd);
pmd = pmd_wrprotect(pmd);
pmd = pmd_clear_soft_dirty(pmd);
......
......@@ -394,18 +394,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
___pud; \
})
#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd) \
({ \
unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \
pmd_t ___pmd; \
\
___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd); \
mmu_notifier_invalidate_range(__mm, ___haddr, \
___haddr + HPAGE_PMD_SIZE); \
\
___pmd; \
})
/*
* set_pte_at_notify() sets the pte _after_ running the notifier.
* This is safe to start by updating the secondary MMUs, because the primary MMU
......@@ -489,7 +477,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
#define ptep_clear_flush_notify ptep_clear_flush
#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
#define pudp_huge_clear_flush_notify pudp_huge_clear_flush
#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear
#define set_pte_at_notify set_pte_at
#endif /* CONFIG_MMU_NOTIFIER */
......
......@@ -1568,8 +1568,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
deactivate_page(page);
if (pmd_young(orig_pmd) || pmd_dirty(orig_pmd)) {
orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd,
tlb->fullmm);
pmdp_invalidate(vma, addr, pmd);
orig_pmd = pmd_mkold(orig_pmd);
orig_pmd = pmd_mkclean(orig_pmd);
......@@ -1724,37 +1723,69 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
{
struct mm_struct *mm = vma->vm_mm;
spinlock_t *ptl;
int ret = 0;
pmd_t entry;
bool preserve_write;
int ret;
ptl = __pmd_trans_huge_lock(pmd, vma);
if (ptl) {
pmd_t entry;
bool preserve_write = prot_numa && pmd_write(*pmd);
ret = 1;
if (!ptl)
return 0;
/*
* Avoid trapping faults against the zero page. The read-only
* data is likely to be read-cached on the local CPU and
* local/remote hits to the zero page are not interesting.
*/
if (prot_numa && is_huge_zero_pmd(*pmd)) {
spin_unlock(ptl);
return ret;
}
preserve_write = prot_numa && pmd_write(*pmd);
ret = 1;
if (!prot_numa || !pmd_protnone(*pmd)) {
entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
entry = pmd_modify(entry, newprot);
if (preserve_write)
entry = pmd_mk_savedwrite(entry);
ret = HPAGE_PMD_NR;
set_pmd_at(mm, addr, pmd, entry);
BUG_ON(vma_is_anonymous(vma) && !preserve_write &&
pmd_write(entry));
}
spin_unlock(ptl);
}
/*
* Avoid trapping faults against the zero page. The read-only
* data is likely to be read-cached on the local CPU and
* local/remote hits to the zero page are not interesting.
*/
if (prot_numa && is_huge_zero_pmd(*pmd))
goto unlock;
if (prot_numa && pmd_protnone(*pmd))
goto unlock;
/*
* In case prot_numa, we are under down_read(mmap_sem). It's critical
* to not clear pmd intermittently to avoid race with MADV_DONTNEED
* which is also under down_read(mmap_sem):
*
* CPU0: CPU1:
* change_huge_pmd(prot_numa=1)
* pmdp_huge_get_and_clear_notify()
* madvise_dontneed()
* zap_pmd_range()
* pmd_trans_huge(*pmd) == 0 (without ptl)
* // skip the pmd
* set_pmd_at();
* // pmd is re-established
*
* The race makes MADV_DONTNEED miss the huge pmd and don't clear it
* which may break userspace.
*
* pmdp_invalidate() is required to make sure we don't miss
* dirty/young flags set by hardware.
*/
entry = *pmd;
pmdp_invalidate(vma, addr, pmd);
/*
* Recover dirty/young flags. It relies on pmdp_invalidate to not
* corrupt them.
*/
if (pmd_dirty(*pmd))
entry = pmd_mkdirty(entry);
if (pmd_young(*pmd))
entry = pmd_mkyoung(entry);
entry = pmd_modify(entry, newprot);
if (preserve_write)
entry = pmd_mk_savedwrite(entry);
ret = HPAGE_PMD_NR;
set_pmd_at(mm, addr, pmd, entry);
BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry));
unlock:
spin_unlock(ptl);
return ret;
}
......
......@@ -185,6 +185,12 @@ static inline void z3fold_page_lock(struct z3fold_header *zhdr)
spin_lock(&zhdr->page_lock);
}
/* Try to lock a z3fold page */
static inline int z3fold_page_trylock(struct z3fold_header *zhdr)
{
return spin_trylock(&zhdr->page_lock);
}
/* Unlock a z3fold page */
static inline void z3fold_page_unlock(struct z3fold_header *zhdr)
{
......@@ -385,7 +391,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
spin_lock(&pool->lock);
zhdr = list_first_entry_or_null(&pool->unbuddied[i],
struct z3fold_header, buddy);
if (!zhdr) {
if (!zhdr || !z3fold_page_trylock(zhdr)) {
spin_unlock(&pool->lock);
continue;
}
......@@ -394,7 +400,6 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
spin_unlock(&pool->lock);
page = virt_to_page(zhdr);
z3fold_page_lock(zhdr);
if (zhdr->first_chunks == 0) {
if (zhdr->middle_chunks != 0 &&
chunks >= zhdr->start_middle)
......
......@@ -276,7 +276,7 @@ struct zs_pool {
struct zspage {
struct {
unsigned int fullness:FULLNESS_BITS;
unsigned int class:CLASS_BITS;
unsigned int class:CLASS_BITS + 1;
unsigned int isolated:ISOLATED_BITS;
unsigned int magic:MAGIC_VAL_BITS;
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment