Commit 6da8e963 authored by Peter Xu's avatar Peter Xu Committed by Andrew Morton

mm: new follow_pfnmap API

Introduce a pair of APIs to follow pfn mappings to get entry information. 
It's very similar to what follow_pte() does before, but different in that
it recognizes huge pfn mappings.

Link: https://lkml.kernel.org/r/20240826204353.2228736-10-peterx@redhat.comSigned-off-by: default avatarPeter Xu <peterx@redhat.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Gavin Shan <gshan@redhat.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Niklas Schnelle <schnelle@linux.ibm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 0515e022
...@@ -2373,6 +2373,37 @@ int follow_pte(struct vm_area_struct *vma, unsigned long address, ...@@ -2373,6 +2373,37 @@ int follow_pte(struct vm_area_struct *vma, unsigned long address,
int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
void *buf, int len, int write); void *buf, int len, int write);
struct follow_pfnmap_args {
/**
* Inputs:
* @vma: Pointer to @vm_area_struct struct
* @address: the virtual address to walk
*/
struct vm_area_struct *vma;
unsigned long address;
/**
* Internals:
*
* The caller shouldn't touch any of these.
*/
spinlock_t *lock;
pte_t *ptep;
/**
* Outputs:
*
* @pfn: the PFN of the address
* @pgprot: the pgprot_t of the mapping
* @writable: whether the mapping is writable
* @special: whether the mapping is a special mapping (real PFN maps)
*/
unsigned long pfn;
pgprot_t pgprot;
bool writable;
bool special;
};
int follow_pfnmap_start(struct follow_pfnmap_args *args);
void follow_pfnmap_end(struct follow_pfnmap_args *args);
extern void truncate_pagecache(struct inode *inode, loff_t new); extern void truncate_pagecache(struct inode *inode, loff_t new);
extern void truncate_setsize(struct inode *inode, loff_t newsize); extern void truncate_setsize(struct inode *inode, loff_t newsize);
void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to); void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
......
...@@ -6172,6 +6172,156 @@ int follow_pte(struct vm_area_struct *vma, unsigned long address, ...@@ -6172,6 +6172,156 @@ int follow_pte(struct vm_area_struct *vma, unsigned long address,
} }
EXPORT_SYMBOL_GPL(follow_pte); EXPORT_SYMBOL_GPL(follow_pte);
static inline void pfnmap_args_setup(struct follow_pfnmap_args *args,
spinlock_t *lock, pte_t *ptep,
pgprot_t pgprot, unsigned long pfn_base,
unsigned long addr_mask, bool writable,
bool special)
{
args->lock = lock;
args->ptep = ptep;
args->pfn = pfn_base + ((args->address & ~addr_mask) >> PAGE_SHIFT);
args->pgprot = pgprot;
args->writable = writable;
args->special = special;
}
static inline void pfnmap_lockdep_assert(struct vm_area_struct *vma)
{
#ifdef CONFIG_LOCKDEP
struct address_space *mapping = vma->vm_file->f_mapping;
if (mapping)
lockdep_assert(lockdep_is_held(&vma->vm_file->f_mapping->i_mmap_rwsem) ||
lockdep_is_held(&vma->vm_mm->mmap_lock));
else
lockdep_assert(lockdep_is_held(&vma->vm_mm->mmap_lock));
#endif
}
/**
* follow_pfnmap_start() - Look up a pfn mapping at a user virtual address
* @args: Pointer to struct @follow_pfnmap_args
*
* The caller needs to setup args->vma and args->address to point to the
* virtual address as the target of such lookup. On a successful return,
* the results will be put into other output fields.
*
* After the caller finished using the fields, the caller must invoke
* another follow_pfnmap_end() to proper releases the locks and resources
* of such look up request.
*
* During the start() and end() calls, the results in @args will be valid
* as proper locks will be held. After the end() is called, all the fields
* in @follow_pfnmap_args will be invalid to be further accessed. Further
* use of such information after end() may require proper synchronizations
* by the caller with page table updates, otherwise it can create a
* security bug.
*
* If the PTE maps a refcounted page, callers are responsible to protect
* against invalidation with MMU notifiers; otherwise access to the PFN at
* a later point in time can trigger use-after-free.
*
* Only IO mappings and raw PFN mappings are allowed. The mmap semaphore
* should be taken for read, and the mmap semaphore cannot be released
* before the end() is invoked.
*
* This function must not be used to modify PTE content.
*
* Return: zero on success, negative otherwise.
*/
int follow_pfnmap_start(struct follow_pfnmap_args *args)
{
struct vm_area_struct *vma = args->vma;
unsigned long address = args->address;
struct mm_struct *mm = vma->vm_mm;
spinlock_t *lock;
pgd_t *pgdp;
p4d_t *p4dp, p4d;
pud_t *pudp, pud;
pmd_t *pmdp, pmd;
pte_t *ptep, pte;
pfnmap_lockdep_assert(vma);
if (unlikely(address < vma->vm_start || address >= vma->vm_end))
goto out;
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
goto out;
retry:
pgdp = pgd_offset(mm, address);
if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp)))
goto out;
p4dp = p4d_offset(pgdp, address);
p4d = READ_ONCE(*p4dp);
if (p4d_none(p4d) || unlikely(p4d_bad(p4d)))
goto out;
pudp = pud_offset(p4dp, address);
pud = READ_ONCE(*pudp);
if (pud_none(pud))
goto out;
if (pud_leaf(pud)) {
lock = pud_lock(mm, pudp);
if (!unlikely(pud_leaf(pud))) {
spin_unlock(lock);
goto retry;
}
pfnmap_args_setup(args, lock, NULL, pud_pgprot(pud),
pud_pfn(pud), PUD_MASK, pud_write(pud),
pud_special(pud));
return 0;
}
pmdp = pmd_offset(pudp, address);
pmd = pmdp_get_lockless(pmdp);
if (pmd_leaf(pmd)) {
lock = pmd_lock(mm, pmdp);
if (!unlikely(pmd_leaf(pmd))) {
spin_unlock(lock);
goto retry;
}
pfnmap_args_setup(args, lock, NULL, pmd_pgprot(pmd),
pmd_pfn(pmd), PMD_MASK, pmd_write(pmd),
pmd_special(pmd));
return 0;
}
ptep = pte_offset_map_lock(mm, pmdp, address, &lock);
if (!ptep)
goto out;
pte = ptep_get(ptep);
if (!pte_present(pte))
goto unlock;
pfnmap_args_setup(args, lock, ptep, pte_pgprot(pte),
pte_pfn(pte), PAGE_MASK, pte_write(pte),
pte_special(pte));
return 0;
unlock:
pte_unmap_unlock(ptep, lock);
out:
return -EINVAL;
}
EXPORT_SYMBOL_GPL(follow_pfnmap_start);
/**
* follow_pfnmap_end(): End a follow_pfnmap_start() process
* @args: Pointer to struct @follow_pfnmap_args
*
* Must be used in pair of follow_pfnmap_start(). See the start() function
* above for more information.
*/
void follow_pfnmap_end(struct follow_pfnmap_args *args)
{
if (args->lock)
spin_unlock(args->lock);
if (args->ptep)
pte_unmap(args->ptep);
}
EXPORT_SYMBOL_GPL(follow_pfnmap_end);
#ifdef CONFIG_HAVE_IOREMAP_PROT #ifdef CONFIG_HAVE_IOREMAP_PROT
/** /**
* generic_access_phys - generic implementation for iomem mmap access * generic_access_phys - generic implementation for iomem mmap access
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment