Commit 7b3064f0 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "Various fixes and followups"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  mm, compaction: make sure we isolate a valid PFN
  include/linux/generic-radix-tree.h: fix kerneldoc comment
  kernel/signal.c: trace_signal_deliver when signal_group_exit
  drivers/iommu/intel-iommu.c: fix variable 'iommu' set but not used
  spdxcheck.py: fix directory structures
  kasan: initialize tag to 0xff in __kasan_kmalloc
  z3fold: fix sheduling while atomic
  scripts/gdb: fix invocation when CONFIG_COMMON_CLK is not set
  mm/gup: continue VM_FAULT_RETRY processing even for pre-faults
  ocfs2: fix error path kobject memory leak
  memcg: make it work on sparse non-0-node systems
  mm, memcg: consider subtrees in memory.events
  prctl_set_mm: downgrade mmap_sem to read lock
  prctl_set_mm: refactor checks from validate_prctl_map
  kernel/fork.c: make max_threads symbol static
  arch/arm/boot/compressed/decompress.c: fix build error due to lz4 changes
  arch/parisc/configs/c8000_defconfig: remove obsoleted CONFIG_DEBUG_SLAB_LEAK
  mm/vmalloc.c: fix typo in comment
  lib/sort.c: fix kernel-doc notation warnings
  mm: fix Documentation/vm/hmm.rst Sphinx warnings
parents 3ab4436f e577c8b6
......@@ -177,6 +177,15 @@ cgroup v2 currently supports the following mount options.
ignored on non-init namespace mounts. Please refer to the
Delegation section for details.
memory_localevents
Only populate memory.events with data for the current cgroup,
and not any subtrees. This is legacy behaviour, the default
behaviour without this option is to include subtree counts.
This option is system wide and can only be set on mount or
modified through remount from the init namespace. The mount
option is ignored on non-init namespace mounts.
Organizing Processes and Threads
--------------------------------
......
......@@ -288,15 +288,17 @@ For instance if the device flags for device entries are:
WRITE (1 << 62)
Now let say that device driver wants to fault with at least read a range then
it does set:
range->default_flags = (1 << 63)
it does set::
range->default_flags = (1 << 63);
range->pfn_flags_mask = 0;
and calls hmm_range_fault() as described above. This will fill fault all page
in the range with at least read permission.
Now let say driver wants to do the same except for one page in the range for
which its want to have write. Now driver set:
which its want to have write. Now driver set::
range->default_flags = (1 << 63);
range->pfn_flags_mask = (1 << 62);
range->pfns[index_of_write] = (1 << 62);
......
......@@ -32,6 +32,7 @@
extern char * strstr(const char * s1, const char *s2);
extern size_t strlen(const char *s);
extern int memcmp(const void *cs, const void *ct, size_t count);
extern char * strchrnul(const char *, int);
#ifdef CONFIG_KERNEL_GZIP
#include "../../../../lib/decompress_inflate.c"
......
......@@ -225,7 +225,6 @@ CONFIG_UNUSED_SYMBOLS=y
CONFIG_DEBUG_FS=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_SLAB=y
CONFIG_DEBUG_SLAB_LEAK=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_PANIC_ON_OOPS=y
......
......@@ -3034,7 +3034,8 @@ static int __init iommu_prepare_static_identity_mapping(int hw)
{
struct pci_dev *pdev = NULL;
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
/* To avoid a -Wunused-but-set-variable warning. */
struct intel_iommu *iommu __maybe_unused;
struct device *dev;
int i;
int ret = 0;
......
......@@ -193,6 +193,7 @@ int ocfs2_filecheck_create_sysfs(struct ocfs2_super *osb)
ret = kobject_init_and_add(&entry->fs_kobj, &ocfs2_ktype_filecheck,
NULL, "filecheck");
if (ret) {
kobject_put(&entry->fs_kobj);
kfree(fcheck);
return ret;
}
......
......@@ -89,6 +89,11 @@ enum {
* Enable cpuset controller in v1 cgroup to use v2 behavior.
*/
CGRP_ROOT_CPUSET_V2_MODE = (1 << 4),
/*
* Enable legacy local memory.events.
*/
CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 5),
};
/* cftype->flags */
......
......@@ -2,7 +2,7 @@
#define _LINUX_GENERIC_RADIX_TREE_H
/**
* DOC: Generic radix trees/sparse arrays:
* DOC: Generic radix trees/sparse arrays
*
* Very simple and minimalistic, supporting arbitrary size entries up to
* PAGE_SIZE.
......
......@@ -54,6 +54,7 @@ struct list_lru {
#ifdef CONFIG_MEMCG_KMEM
struct list_head list;
int shrinker_id;
bool memcg_aware;
#endif
};
......
......@@ -737,8 +737,14 @@ static inline void count_memcg_event_mm(struct mm_struct *mm,
static inline void memcg_memory_event(struct mem_cgroup *memcg,
enum memcg_memory_event event)
{
do {
atomic_long_inc(&memcg->memory_events[event]);
cgroup_file_notify(&memcg->events_file);
if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
break;
} while ((memcg = parent_mem_cgroup(memcg)) &&
!mem_cgroup_is_root(memcg));
}
static inline void memcg_memory_event_mm(struct mm_struct *mm,
......
......@@ -1810,11 +1810,13 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
enum cgroup2_param {
Opt_nsdelegate,
Opt_memory_localevents,
nr__cgroup2_params
};
static const struct fs_parameter_spec cgroup2_param_specs[] = {
fsparam_flag ("nsdelegate", Opt_nsdelegate),
fsparam_flag("nsdelegate", Opt_nsdelegate),
fsparam_flag("memory_localevents", Opt_memory_localevents),
{}
};
......@@ -1837,6 +1839,9 @@ static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param
case Opt_nsdelegate:
ctx->flags |= CGRP_ROOT_NS_DELEGATE;
return 0;
case Opt_memory_localevents:
ctx->flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
return 0;
}
return -EINVAL;
}
......@@ -1848,6 +1853,11 @@ static void apply_cgroup_root_flags(unsigned int root_flags)
cgrp_dfl_root.flags |= CGRP_ROOT_NS_DELEGATE;
else
cgrp_dfl_root.flags &= ~CGRP_ROOT_NS_DELEGATE;
if (root_flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
else
cgrp_dfl_root.flags &= ~CGRP_ROOT_MEMORY_LOCAL_EVENTS;
}
}
......@@ -1855,6 +1865,8 @@ static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root
{
if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE)
seq_puts(seq, ",nsdelegate");
if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
seq_puts(seq, ",memory_localevents");
return 0;
}
......@@ -6325,7 +6337,7 @@ static struct kobj_attribute cgroup_delegate_attr = __ATTR_RO(delegate);
static ssize_t features_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
return snprintf(buf, PAGE_SIZE, "nsdelegate\n");
return snprintf(buf, PAGE_SIZE, "nsdelegate\nmemory_localevents\n");
}
static struct kobj_attribute cgroup_features_attr = __ATTR_RO(features);
......
......@@ -123,7 +123,7 @@
unsigned long total_forks; /* Handle normal Linux uptimes. */
int nr_threads; /* The idle threads do not count.. */
int max_threads; /* tunable limit on nr_threads */
static int max_threads; /* tunable limit on nr_threads */
DEFINE_PER_CPU(unsigned long, process_counts) = 0;
......
......@@ -2485,6 +2485,8 @@ bool get_signal(struct ksignal *ksig)
if (signal_group_exit(signal)) {
ksig->info.si_signo = signr = SIGKILL;
sigdelset(&current->pending.signal, SIGKILL);
trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO,
&sighand->action[SIGKILL - 1]);
recalc_sigpending();
goto fatal;
}
......
......@@ -1882,13 +1882,14 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
}
/*
* Check arithmetic relations of passed addresses.
*
* WARNING: we don't require any capability here so be very careful
* in what is allowed for modification from userspace.
*/
static int validate_prctl_map(struct prctl_mm_map *prctl_map)
static int validate_prctl_map_addr(struct prctl_mm_map *prctl_map)
{
unsigned long mmap_max_addr = TASK_SIZE;
struct mm_struct *mm = current->mm;
int error = -EINVAL, i;
static const unsigned char offsets[] = {
......@@ -1949,24 +1950,6 @@ static int validate_prctl_map(struct prctl_mm_map *prctl_map)
prctl_map->start_data))
goto out;
/*
* Someone is trying to cheat the auxv vector.
*/
if (prctl_map->auxv_size) {
if (!prctl_map->auxv || prctl_map->auxv_size > sizeof(mm->saved_auxv))
goto out;
}
/*
* Finally, make sure the caller has the rights to
* change /proc/pid/exe link: only local sys admin should
* be allowed to.
*/
if (prctl_map->exe_fd != (u32)-1) {
if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
goto out;
}
error = 0;
out:
return error;
......@@ -1993,11 +1976,18 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
if (copy_from_user(&prctl_map, addr, sizeof(prctl_map)))
return -EFAULT;
error = validate_prctl_map(&prctl_map);
error = validate_prctl_map_addr(&prctl_map);
if (error)
return error;
if (prctl_map.auxv_size) {
/*
* Someone is trying to cheat the auxv vector.
*/
if (!prctl_map.auxv ||
prctl_map.auxv_size > sizeof(mm->saved_auxv))
return -EINVAL;
memset(user_auxv, 0, sizeof(user_auxv));
if (copy_from_user(user_auxv,
(const void __user *)prctl_map.auxv,
......@@ -2010,6 +2000,14 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
}
if (prctl_map.exe_fd != (u32)-1) {
/*
* Make sure the caller has the rights to
* change /proc/pid/exe link: only local sys admin should
* be allowed to.
*/
if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return -EINVAL;
error = prctl_set_mm_exe_file(mm, prctl_map.exe_fd);
if (error)
return error;
......@@ -2097,7 +2095,11 @@ static int prctl_set_mm(int opt, unsigned long addr,
unsigned long arg4, unsigned long arg5)
{
struct mm_struct *mm = current->mm;
struct prctl_mm_map prctl_map;
struct prctl_mm_map prctl_map = {
.auxv = NULL,
.auxv_size = 0,
.exe_fd = -1,
};
struct vm_area_struct *vma;
int error;
......@@ -2125,9 +2127,15 @@ static int prctl_set_mm(int opt, unsigned long addr,
error = -EINVAL;
down_write(&mm->mmap_sem);
/*
* arg_lock protects concurent updates of arg boundaries, we need
* mmap_sem for a) concurrent sys_brk, b) finding VMA for addr
* validation.
*/
down_read(&mm->mmap_sem);
vma = find_vma(mm, addr);
spin_lock(&mm->arg_lock);
prctl_map.start_code = mm->start_code;
prctl_map.end_code = mm->end_code;
prctl_map.start_data = mm->start_data;
......@@ -2139,9 +2147,6 @@ static int prctl_set_mm(int opt, unsigned long addr,
prctl_map.arg_end = mm->arg_end;
prctl_map.env_start = mm->env_start;
prctl_map.env_end = mm->env_end;
prctl_map.auxv = NULL;
prctl_map.auxv_size = 0;
prctl_map.exe_fd = -1;
switch (opt) {
case PR_SET_MM_START_CODE:
......@@ -2181,7 +2186,7 @@ static int prctl_set_mm(int opt, unsigned long addr,
goto out;
}
error = validate_prctl_map(&prctl_map);
error = validate_prctl_map_addr(&prctl_map);
if (error)
goto out;
......@@ -2218,7 +2223,8 @@ static int prctl_set_mm(int opt, unsigned long addr,
error = 0;
out:
up_write(&mm->mmap_sem);
spin_unlock(&mm->arg_lock);
up_read(&mm->mmap_sem);
return error;
}
......
......@@ -43,8 +43,9 @@ static bool is_aligned(const void *base, size_t size, unsigned char align)
/**
* swap_words_32 - swap two elements in 32-bit chunks
* @a, @b: pointers to the elements
* @size: element size (must be a multiple of 4)
* @a: pointer to the first element to swap
* @b: pointer to the second element to swap
* @n: element size (must be a multiple of 4)
*
* Exchange the two objects in memory. This exploits base+index addressing,
* which basically all CPUs have, to minimize loop overhead computations.
......@@ -65,8 +66,9 @@ static void swap_words_32(void *a, void *b, size_t n)
/**
* swap_words_64 - swap two elements in 64-bit chunks
* @a, @b: pointers to the elements
* @size: element size (must be a multiple of 8)
* @a: pointer to the first element to swap
* @b: pointer to the second element to swap
* @n: element size (must be a multiple of 8)
*
* Exchange the two objects in memory. This exploits base+index
* addressing, which basically all CPUs have, to minimize loop overhead
......@@ -100,8 +102,9 @@ static void swap_words_64(void *a, void *b, size_t n)
/**
* swap_bytes - swap two elements a byte at a time
* @a, @b: pointers to the elements
* @size: element size
* @a: pointer to the first element to swap
* @b: pointer to the second element to swap
* @n: element size
*
* This is the fallback if alignment doesn't allow using larger chunks.
*/
......
......@@ -1399,7 +1399,7 @@ fast_isolate_freepages(struct compact_control *cc)
page = pfn_to_page(highest);
cc->free_pfn = highest;
} else {
if (cc->direct_compaction) {
if (cc->direct_compaction && pfn_valid(min_pfn)) {
page = pfn_to_page(min_pfn);
cc->free_pfn = min_pfn;
}
......
......@@ -1042,10 +1042,6 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
BUG_ON(ret >= nr_pages);
}
if (!pages)
/* If it's a prefault don't insist harder */
return ret;
if (ret > 0) {
nr_pages -= ret;
pages_done += ret;
......@@ -1061,7 +1057,11 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
pages_done = ret;
break;
}
/* VM_FAULT_RETRY triggered, so seek to the faulting offset */
/*
* VM_FAULT_RETRY triggered, so seek to the faulting offset.
* For the prefault case (!pages) we only update counts.
*/
if (likely(pages))
pages += ret;
start += ret << PAGE_SHIFT;
......@@ -1085,6 +1085,7 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
pages_done++;
if (!nr_pages)
break;
if (likely(pages))
pages++;
start += PAGE_SIZE;
}
......
......@@ -464,7 +464,7 @@ static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object,
{
unsigned long redzone_start;
unsigned long redzone_end;
u8 tag;
u8 tag = 0xff;
if (gfpflags_allow_blocking(flags))
quarantine_reduce();
......
......@@ -38,11 +38,7 @@ static int lru_shrinker_id(struct list_lru *lru)
static inline bool list_lru_memcg_aware(struct list_lru *lru)
{
/*
* This needs node 0 to be always present, even
* in the systems supporting sparse numa ids.
*/
return !!lru->node[0].memcg_lrus;
return lru->memcg_aware;
}
static inline struct list_lru_one *
......@@ -452,6 +448,8 @@ static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
{
int i;
lru->memcg_aware = memcg_aware;
if (!memcg_aware)
return 0;
......
......@@ -718,12 +718,12 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen)
if (!mm->arg_end)
goto out_mm; /* Shh! No looking before we're done */
down_read(&mm->mmap_sem);
spin_lock(&mm->arg_lock);
arg_start = mm->arg_start;
arg_end = mm->arg_end;
env_start = mm->env_start;
env_end = mm->env_end;
up_read(&mm->mmap_sem);
spin_unlock(&mm->arg_lock);
len = arg_end - arg_start;
......
......@@ -815,7 +815,7 @@ find_vmap_lowest_match(unsigned long size,
}
/*
* OK. We roll back and find the fist right sub-tree,
* OK. We roll back and find the first right sub-tree,
* that will satisfy the search criteria. It can happen
* only once due to "vstart" restriction.
*/
......
......@@ -190,10 +190,11 @@ static int size_to_chunks(size_t size)
static void compact_page_work(struct work_struct *w);
static inline struct z3fold_buddy_slots *alloc_slots(struct z3fold_pool *pool)
static inline struct z3fold_buddy_slots *alloc_slots(struct z3fold_pool *pool,
gfp_t gfp)
{
struct z3fold_buddy_slots *slots = kmem_cache_alloc(pool->c_handle,
GFP_KERNEL);
gfp);
if (slots) {
memset(slots->slot, 0, sizeof(slots->slot));
......@@ -295,10 +296,10 @@ static void z3fold_unregister_migration(struct z3fold_pool *pool)
/* Initializes the z3fold header of a newly allocated z3fold page */
static struct z3fold_header *init_z3fold_page(struct page *page,
struct z3fold_pool *pool)
struct z3fold_pool *pool, gfp_t gfp)
{
struct z3fold_header *zhdr = page_address(page);
struct z3fold_buddy_slots *slots = alloc_slots(pool);
struct z3fold_buddy_slots *slots = alloc_slots(pool, gfp);
if (!slots)
return NULL;
......@@ -912,7 +913,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
if (!page)
return -ENOMEM;
zhdr = init_z3fold_page(page, pool);
zhdr = init_z3fold_page(page, pool, gfp);
if (!zhdr) {
__free_page(page);
return -ENOMEM;
......
......@@ -40,7 +40,8 @@
import gdb
/* linux/clk-provider.h */
LX_GDBPARSED(CLK_GET_RATE_NOCACHE)
if IS_BUILTIN(CONFIG_COMMON_CLK):
LX_GDBPARSED(CLK_GET_RATE_NOCACHE)
/* linux/fs.h */
LX_VALUE(SB_RDONLY)
......
......@@ -32,7 +32,8 @@ class SPDXdata(object):
def read_spdxdata(repo):
# The subdirectories of LICENSES in the kernel source
license_dirs = [ "preferred", "deprecated", "exceptions", "dual" ]
# Note: exceptions needs to be parsed as last directory.
license_dirs = [ "preferred", "dual", "deprecated", "exceptions" ]
lictree = repo.head.commit.tree['LICENSES']
spdx = SPDXdata()
......@@ -58,13 +59,13 @@ def read_spdxdata(repo):
elif l.startswith('SPDX-Licenses:'):
for lic in l.split(':')[1].upper().strip().replace(' ', '').replace('\t', '').split(','):
if not lic in spdx.licenses:
raise SPDXException(None, 'Exception %s missing license %s' %(ex, lic))
raise SPDXException(None, 'Exception %s missing license %s' %(exception, lic))
spdx.exceptions[exception].append(lic)
elif l.startswith("License-Text:"):
if exception:
if not len(spdx.exceptions[exception]):
raise SPDXException(el, 'Exception %s is missing SPDX-Licenses' %excid)
raise SPDXException(el, 'Exception %s is missing SPDX-Licenses' %exception)
spdx.exception_files += 1
else:
spdx.license_files += 1
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment