Commit 9f186f9e authored by Miaohe Lin's avatar Miaohe Lin Committed by akpm

mm/swapfile: unuse_pte can map random data if swap read fails

Patch series "A few fixup patches for mm", v4.

This series contains a few patches to avoid mapping random data if swap
read fails and fix lost swap bits in unuse_pte.  Also we free hwpoison and
swapin error entry in madvise_free_pte_range and so on.  More details can
be found in the respective changelogs.  


This patch (of 5):

There is a bug in unuse_pte(): when swap page happens to be unreadable,
page filled with random data is mapped into user address space.  In case
of error, a special swap entry indicating swap read fails is set to the
page table.  So the swapcache page can be freed and the user won't end up
with a permanently mounted swap because a sector is bad.  And if the page
is accessed later, the user process will be killed so that corrupted data
is never consumed.  On the other hand, if the page is never accessed, the
user won't even notice it.

Link: https://lkml.kernel.org/r/20220519125030.21486-1-linmiaohe@huawei.com
Link: https://lkml.kernel.org/r/20220519125030.21486-2-linmiaohe@huawei.comSigned-off-by: default avatarMiaohe Lin <linmiaohe@huawei.com>
Acked-by: default avatarDavid Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Howells <dhowells@redhat.com>
Cc: NeilBrown <neilb@suse.de>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent f079a020
...@@ -55,6 +55,10 @@ static inline int current_is_kswapd(void) ...@@ -55,6 +55,10 @@ static inline int current_is_kswapd(void)
* actions on faults. * actions on faults.
*/ */
#define SWP_SWAPIN_ERROR_NUM 1
#define SWP_SWAPIN_ERROR (MAX_SWAPFILES + SWP_HWPOISON_NUM + \
SWP_MIGRATION_NUM + SWP_DEVICE_NUM + \
SWP_PTE_MARKER_NUM)
/* /*
* PTE markers are used to persist information onto PTEs that are mapped with * PTE markers are used to persist information onto PTEs that are mapped with
* file-backed memories. As its name "PTE" hints, it should only be applied to * file-backed memories. As its name "PTE" hints, it should only be applied to
...@@ -120,7 +124,8 @@ static inline int current_is_kswapd(void) ...@@ -120,7 +124,8 @@ static inline int current_is_kswapd(void)
#define MAX_SWAPFILES \ #define MAX_SWAPFILES \
((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \ ((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \
SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - SWP_PTE_MARKER_NUM) SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - \
SWP_PTE_MARKER_NUM - SWP_SWAPIN_ERROR_NUM)
/* /*
* Magic header for a swap area. The first part of the union is * Magic header for a swap area. The first part of the union is
......
...@@ -108,6 +108,16 @@ static inline void *swp_to_radix_entry(swp_entry_t entry) ...@@ -108,6 +108,16 @@ static inline void *swp_to_radix_entry(swp_entry_t entry)
return xa_mk_value(entry.val); return xa_mk_value(entry.val);
} }
static inline swp_entry_t make_swapin_error_entry(struct page *page)
{
return swp_entry(SWP_SWAPIN_ERROR, page_to_pfn(page));
}
static inline int is_swapin_error_entry(swp_entry_t entry)
{
return swp_type(entry) == SWP_SWAPIN_ERROR;
}
#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) #if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset) static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset)
{ {
......
...@@ -1487,7 +1487,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, ...@@ -1487,7 +1487,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
/* Only drop the uffd-wp marker if explicitly requested */ /* Only drop the uffd-wp marker if explicitly requested */
if (!zap_drop_file_uffd_wp(details)) if (!zap_drop_file_uffd_wp(details))
continue; continue;
} else if (is_hwpoison_entry(entry)) { } else if (is_hwpoison_entry(entry) ||
is_swapin_error_entry(entry)) {
if (!should_zap_cows(details)) if (!should_zap_cows(details))
continue; continue;
} else { } else {
...@@ -3727,6 +3728,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) ...@@ -3727,6 +3728,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
ret = vmf->page->pgmap->ops->migrate_to_ram(vmf); ret = vmf->page->pgmap->ops->migrate_to_ram(vmf);
} else if (is_hwpoison_entry(entry)) { } else if (is_hwpoison_entry(entry)) {
ret = VM_FAULT_HWPOISON; ret = VM_FAULT_HWPOISON;
} else if (is_swapin_error_entry(entry)) {
ret = VM_FAULT_SIGBUS;
} else if (is_pte_marker_entry(entry)) { } else if (is_pte_marker_entry(entry)) {
ret = handle_pte_marker(vmf); ret = handle_pte_marker(vmf);
} else { } else {
......
...@@ -1788,6 +1788,17 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, ...@@ -1788,6 +1788,17 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
goto out; goto out;
} }
if (unlikely(!PageUptodate(page))) {
pte_t pteval;
dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
pteval = swp_entry_to_pte(make_swapin_error_entry(page));
set_pte_at(vma->vm_mm, addr, pte, pteval);
swap_free(entry);
ret = 0;
goto out;
}
/* See do_swap_page() */ /* See do_swap_page() */
BUG_ON(!PageAnon(page) && PageMappedToDisk(page)); BUG_ON(!PageAnon(page) && PageMappedToDisk(page));
BUG_ON(PageAnon(page) && PageAnonExclusive(page)); BUG_ON(PageAnon(page) && PageAnonExclusive(page));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment