Commit f0ab773f authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "13 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  rbtree: include rcu.h
  scripts/faddr2line: fix error when addr2line output contains discriminator
  ocfs2: take inode cluster lock before moving reflinked inode from orphan dir
  mm, oom: fix concurrent munlock and oom reaper unmap, v3
  mm: migrate: fix double call of radix_tree_replace_slot()
  proc/kcore: don't bounds check against address 0
  mm: don't show nr_indirectly_reclaimable in /proc/vmstat
  mm: sections are not offlined during memory hotremove
  z3fold: fix reclaim lock-ups
  init: fix false positives in W+X checking
  lib/find_bit_benchmark.c: avoid soft lockup in test_find_first_bit()
  KASAN: prohibit KASAN+STRUCTLEAK combination
  MAINTAINERS: update Shuah's email address
parents 4bc87198 2075b16e
......@@ -3691,7 +3691,6 @@ F: drivers/cpufreq/arm_big_little_dt.c
CPU POWER MONITORING SUBSYSTEM
M: Thomas Renninger <trenn@suse.com>
M: Shuah Khan <shuahkh@osg.samsung.com>
M: Shuah Khan <shuah@kernel.org>
L: linux-pm@vger.kernel.org
S: Maintained
......@@ -7696,7 +7695,6 @@ F: include/linux/sunrpc/
F: include/uapi/linux/sunrpc/
KERNEL SELFTEST FRAMEWORK
M: Shuah Khan <shuahkh@osg.samsung.com>
M: Shuah Khan <shuah@kernel.org>
L: linux-kselftest@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git
......@@ -14650,7 +14648,6 @@ F: drivers/usb/common/usb-otg-fsm.c
USB OVER IP DRIVER
M: Valentina Manea <valentina.manea.m@gmail.com>
M: Shuah Khan <shuahkh@osg.samsung.com>
M: Shuah Khan <shuah@kernel.org>
L: linux-usb@vger.kernel.org
S: Maintained
......
......@@ -464,6 +464,10 @@ config GCC_PLUGIN_LATENT_ENTROPY
config GCC_PLUGIN_STRUCTLEAK
bool "Force initialization of variables containing userspace addresses"
depends on GCC_PLUGINS
# Currently STRUCTLEAK inserts initialization out of live scope of
# variables from KASAN point of view. This leads to KASAN false
# positive reports. Prohibit this combination for now.
depends on !KASAN_EXTRA
help
This plugin zero-initializes any structures containing a
__user attribute. This can prevent some classes of information
......
......@@ -4250,10 +4250,11 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
struct dentry *new_dentry, bool preserve)
{
int error;
int error, had_lock;
struct inode *inode = d_inode(old_dentry);
struct buffer_head *old_bh = NULL;
struct inode *new_orphan_inode = NULL;
struct ocfs2_lock_holder oh;
if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
return -EOPNOTSUPP;
......@@ -4295,6 +4296,14 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
goto out;
}
had_lock = ocfs2_inode_lock_tracker(new_orphan_inode, NULL, 1,
&oh);
if (had_lock < 0) {
error = had_lock;
mlog_errno(error);
goto out;
}
/* If the security isn't preserved, we need to re-initialize them. */
if (!preserve) {
error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
......@@ -4302,14 +4311,15 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
if (error)
mlog_errno(error);
}
out:
if (!error) {
error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
new_dentry);
if (error)
mlog_errno(error);
}
ocfs2_inode_unlock_tracker(new_orphan_inode, 1, &oh, had_lock);
out:
if (new_orphan_inode) {
/*
* We need to open_unlock the inode no matter whether we
......
......@@ -209,25 +209,34 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
{
struct list_head *head = (struct list_head *)arg;
struct kcore_list *ent;
struct page *p;
if (!pfn_valid(pfn))
return 1;
p = pfn_to_page(pfn);
if (!memmap_valid_within(pfn, p, page_zone(p)))
return 1;
ent = kmalloc(sizeof(*ent), GFP_KERNEL);
if (!ent)
return -ENOMEM;
ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT));
ent->addr = (unsigned long)page_to_virt(p);
ent->size = nr_pages << PAGE_SHIFT;
/* Sanity check: Can happen in 32bit arch...maybe */
if (ent->addr < (unsigned long) __va(0))
if (!virt_addr_valid(ent->addr))
goto free_out;
/* cut not-mapped area. ....from ppc-32 code. */
if (ULONG_MAX - ent->addr < ent->size)
ent->size = ULONG_MAX - ent->addr;
/* cut when vmalloc() area is higher than direct-map area */
if (VMALLOC_START > (unsigned long)__va(0)) {
if (ent->addr > VMALLOC_START)
goto free_out;
/*
* We've already checked virt_addr_valid so we know this address
* is a valid pointer, therefore we can check against it to determine
* if we need to trim
*/
if (VMALLOC_START > ent->addr) {
if (VMALLOC_START - ent->addr < ent->size)
ent->size = VMALLOC_START - ent->addr;
}
......
......@@ -95,6 +95,8 @@ static inline int check_stable_address_space(struct mm_struct *mm)
return 0;
}
void __oom_reap_task_mm(struct mm_struct *mm);
extern unsigned long oom_badness(struct task_struct *p,
struct mem_cgroup *memcg, const nodemask_t *nodemask,
unsigned long totalpages);
......
......@@ -26,6 +26,7 @@
#include <linux/compiler.h>
#include <linux/rbtree.h>
#include <linux/rcupdate.h>
/*
* Please note - only struct rb_augment_callbacks and the prototypes for
......
......@@ -35,6 +35,7 @@
#include <linux/rbtree.h>
#include <linux/seqlock.h>
#include <linux/rcupdate.h>
struct latch_tree_node {
struct rb_node node[2];
......
......@@ -1034,6 +1034,13 @@ __setup("rodata=", set_debug_rodata);
static void mark_readonly(void)
{
if (rodata_enabled) {
/*
* load_module() results in W+X mappings, which are cleaned up
* with call_rcu_sched(). Let's make sure that queued work is
* flushed so that we don't hit false positives looking for
* insecure pages which are W+X.
*/
rcu_barrier_sched();
mark_rodata_ro();
rodata_test();
} else
......
......@@ -3517,6 +3517,11 @@ static noinline int do_init_module(struct module *mod)
* walking this with preempt disabled. In all the failure paths, we
* call synchronize_sched(), but we don't want to slow down the success
* path, so use actual RCU here.
* Note that module_alloc() on most architectures creates W+X page
* mappings which won't be cleaned up until do_free_init() runs. Any
* code such as mark_rodata_ro() which depends on those mappings to
* be cleaned up needs to sync with the queued work - ie
* rcu_barrier_sched()
*/
call_rcu_sched(&freeinit->rcu, do_free_init);
mutex_unlock(&module_mutex);
......
......@@ -132,7 +132,12 @@ static int __init find_bit_test(void)
test_find_next_bit(bitmap, BITMAP_LEN);
test_find_next_zero_bit(bitmap, BITMAP_LEN);
test_find_last_bit(bitmap, BITMAP_LEN);
test_find_first_bit(bitmap, BITMAP_LEN);
/*
* test_find_first_bit() may take some time, so
* traverse only part of bitmap to avoid soft lockup.
*/
test_find_first_bit(bitmap, BITMAP_LEN / 10);
test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
pr_err("\nStart testing find_bit() with sparse bitmap\n");
......
......@@ -528,14 +528,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
int i;
int index = page_index(page);
for (i = 0; i < HPAGE_PMD_NR; i++) {
for (i = 1; i < HPAGE_PMD_NR; i++) {
pslot = radix_tree_lookup_slot(&mapping->i_pages,
index + i);
radix_tree_replace_slot(&mapping->i_pages, pslot,
newpage + i);
}
} else {
radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
}
/*
......
......@@ -3056,6 +3056,32 @@ void exit_mmap(struct mm_struct *mm)
/* mm's last user has gone, and its about to be pulled down */
mmu_notifier_release(mm);
if (unlikely(mm_is_oom_victim(mm))) {
/*
* Manually reap the mm to free as much memory as possible.
* Then, as the oom reaper does, set MMF_OOM_SKIP to disregard
* this mm from further consideration. Taking mm->mmap_sem for
* write after setting MMF_OOM_SKIP will guarantee that the oom
* reaper will not run on this mm again after mmap_sem is
* dropped.
*
* Nothing can be holding mm->mmap_sem here and the above call
* to mmu_notifier_release(mm) ensures mmu notifier callbacks in
* __oom_reap_task_mm() will not block.
*
* This needs to be done before calling munlock_vma_pages_all(),
* which clears VM_LOCKED, otherwise the oom reaper cannot
* reliably test it.
*/
mutex_lock(&oom_lock);
__oom_reap_task_mm(mm);
mutex_unlock(&oom_lock);
set_bit(MMF_OOM_SKIP, &mm->flags);
down_write(&mm->mmap_sem);
up_write(&mm->mmap_sem);
}
if (mm->locked_vm) {
vma = mm->mmap;
while (vma) {
......@@ -3077,24 +3103,6 @@ void exit_mmap(struct mm_struct *mm)
/* update_hiwater_rss(mm) here? but nobody should be looking */
/* Use -1 here to ensure all VMAs in the mm are unmapped */
unmap_vmas(&tlb, vma, 0, -1);
if (unlikely(mm_is_oom_victim(mm))) {
/*
* Wait for oom_reap_task() to stop working on this
* mm. Because MMF_OOM_SKIP is already set before
* calling down_read(), oom_reap_task() will not run
* on this "mm" post up_write().
*
* mm_is_oom_victim() cannot be set from under us
* either because victim->mm is already set to NULL
* under task_lock before calling mmput and oom_mm is
* set not NULL by the OOM killer only if victim->mm
* is found not NULL while holding the task_lock.
*/
set_bit(MMF_OOM_SKIP, &mm->flags);
down_write(&mm->mmap_sem);
up_write(&mm->mmap_sem);
}
free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
tlb_finish_mmu(&tlb, 0, -1);
......
......@@ -469,7 +469,6 @@ bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
return false;
}
#ifdef CONFIG_MMU
/*
* OOM Reaper kernel thread which tries to reap the memory used by the OOM
......@@ -480,16 +479,54 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
static struct task_struct *oom_reaper_list;
static DEFINE_SPINLOCK(oom_reaper_lock);
static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
void __oom_reap_task_mm(struct mm_struct *mm)
{
struct mmu_gather tlb;
struct vm_area_struct *vma;
/*
* Tell all users of get_user/copy_from_user etc... that the content
* is no longer stable. No barriers really needed because unmapping
* should imply barriers already and the reader would hit a page fault
* if it stumbled over a reaped memory.
*/
set_bit(MMF_UNSTABLE, &mm->flags);
for (vma = mm->mmap ; vma; vma = vma->vm_next) {
if (!can_madv_dontneed_vma(vma))
continue;
/*
* Only anonymous pages have a good chance to be dropped
* without additional steps which we cannot afford as we
* are OOM already.
*
* We do not even care about fs backed pages because all
* which are reclaimable have already been reclaimed and
* we do not want to block exit_mmap by keeping mm ref
* count elevated without a good reason.
*/
if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
const unsigned long start = vma->vm_start;
const unsigned long end = vma->vm_end;
struct mmu_gather tlb;
tlb_gather_mmu(&tlb, mm, start, end);
mmu_notifier_invalidate_range_start(mm, start, end);
unmap_page_range(&tlb, vma, start, end, NULL);
mmu_notifier_invalidate_range_end(mm, start, end);
tlb_finish_mmu(&tlb, start, end);
}
}
}
static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
{
bool ret = true;
/*
* We have to make sure to not race with the victim exit path
* and cause premature new oom victim selection:
* __oom_reap_task_mm exit_mm
* oom_reap_task_mm exit_mm
* mmget_not_zero
* mmput
* atomic_dec_and_test
......@@ -534,39 +571,8 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
trace_start_task_reaping(tsk->pid);
/*
* Tell all users of get_user/copy_from_user etc... that the content
* is no longer stable. No barriers really needed because unmapping
* should imply barriers already and the reader would hit a page fault
* if it stumbled over a reaped memory.
*/
set_bit(MMF_UNSTABLE, &mm->flags);
for (vma = mm->mmap ; vma; vma = vma->vm_next) {
if (!can_madv_dontneed_vma(vma))
continue;
__oom_reap_task_mm(mm);
/*
* Only anonymous pages have a good chance to be dropped
* without additional steps which we cannot afford as we
* are OOM already.
*
* We do not even care about fs backed pages because all
* which are reclaimable have already been reclaimed and
* we do not want to block exit_mmap by keeping mm ref
* count elevated without a good reason.
*/
if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
const unsigned long start = vma->vm_start;
const unsigned long end = vma->vm_end;
tlb_gather_mmu(&tlb, mm, start, end);
mmu_notifier_invalidate_range_start(mm, start, end);
unmap_page_range(&tlb, vma, start, end, NULL);
mmu_notifier_invalidate_range_end(mm, start, end);
tlb_finish_mmu(&tlb, start, end);
}
}
pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
task_pid_nr(tsk), tsk->comm,
K(get_mm_counter(mm, MM_ANONPAGES)),
......@@ -587,14 +593,13 @@ static void oom_reap_task(struct task_struct *tsk)
struct mm_struct *mm = tsk->signal->oom_mm;
/* Retry the down_read_trylock(mmap_sem) a few times */
while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm))
while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm))
schedule_timeout_idle(HZ/10);
if (attempts <= MAX_OOM_REAP_RETRIES ||
test_bit(MMF_OOM_SKIP, &mm->flags))
goto done;
pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
task_pid_nr(tsk), tsk->comm);
debug_show_all_locks();
......
......@@ -629,7 +629,7 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
unsigned long pfn;
for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
unsigned long section_nr = pfn_to_section_nr(start_pfn);
unsigned long section_nr = pfn_to_section_nr(pfn);
struct mem_section *ms;
/*
......
......@@ -1161,7 +1161,7 @@ const char * const vmstat_text[] = {
"nr_vmscan_immediate_reclaim",
"nr_dirtied",
"nr_written",
"nr_indirectly_reclaimable",
"", /* nr_indirectly_reclaimable */
/* enum writeback_stat_item counters */
"nr_dirty_threshold",
......@@ -1740,6 +1740,10 @@ static int vmstat_show(struct seq_file *m, void *arg)
unsigned long *l = arg;
unsigned long off = l - (unsigned long *)m->private;
/* Skip hidden vmstat items. */
if (*vmstat_text[off] == '\0')
return 0;
seq_puts(m, vmstat_text[off]);
seq_put_decimal_ull(m, " ", *l);
seq_putc(m, '\n');
......
......@@ -144,7 +144,8 @@ enum z3fold_page_flags {
PAGE_HEADLESS = 0,
MIDDLE_CHUNK_MAPPED,
NEEDS_COMPACTING,
PAGE_STALE
PAGE_STALE,
UNDER_RECLAIM
};
/*****************
......@@ -173,6 +174,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page,
clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
clear_bit(NEEDS_COMPACTING, &page->private);
clear_bit(PAGE_STALE, &page->private);
clear_bit(UNDER_RECLAIM, &page->private);
spin_lock_init(&zhdr->page_lock);
kref_init(&zhdr->refcount);
......@@ -756,6 +758,10 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
atomic64_dec(&pool->pages_nr);
return;
}
if (test_bit(UNDER_RECLAIM, &page->private)) {
z3fold_page_unlock(zhdr);
return;
}
if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
z3fold_page_unlock(zhdr);
return;
......@@ -840,6 +846,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
kref_get(&zhdr->refcount);
list_del_init(&zhdr->buddy);
zhdr->cpu = -1;
set_bit(UNDER_RECLAIM, &page->private);
break;
}
list_del_init(&page->lru);
......@@ -887,25 +895,35 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
goto next;
}
next:
spin_lock(&pool->lock);
if (test_bit(PAGE_HEADLESS, &page->private)) {
if (ret == 0) {
spin_unlock(&pool->lock);
free_z3fold_page(page);
return 0;
}
} else if (kref_put(&zhdr->refcount, release_z3fold_page)) {
atomic64_dec(&pool->pages_nr);
spin_lock(&pool->lock);
list_add(&page->lru, &pool->lru);
spin_unlock(&pool->lock);
} else {
z3fold_page_lock(zhdr);
clear_bit(UNDER_RECLAIM, &page->private);
if (kref_put(&zhdr->refcount,
release_z3fold_page_locked)) {
atomic64_dec(&pool->pages_nr);
return 0;
}
/*
* if we are here, the page is still not completely
* free. Take the global pool lock then to be able
* to add it back to the lru list
*/
spin_lock(&pool->lock);
list_add(&page->lru, &pool->lru);
spin_unlock(&pool->lock);
return 0;
z3fold_page_unlock(zhdr);
}
/*
* Add to the beginning of LRU.
* Pool lock has to be kept here to ensure the page has
* not already been released
*/
list_add(&page->lru, &pool->lru);
/* We started off locked to we need to lock the pool back */
spin_lock(&pool->lock);
}
spin_unlock(&pool->lock);
return -EAGAIN;
......
......@@ -170,7 +170,10 @@ __faddr2line() {
echo "$file_lines" | while read -r line
do
echo $line
eval $(echo $line | awk -F "[ :]" '{printf("n1=%d;n2=%d;f=%s",$NF-5, $NF+5, $(NF-1))}')
n=$(echo $line | sed 's/.*:\([0-9]\+\).*/\1/g')
n1=$[$n-5]
n2=$[$n+5]
f=$(echo $line | sed 's/.*at \(.\+\):.*/\1/g')
awk 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") {printf("%d\t%s\n", NR, $0)}' $f
done
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment