Commit e87f327e authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc

Pull sparc fixes from David Miller:

 1) Fix TLB context wrap races, from Pavel Tatashin.

 2) Cure some gcc-7 build issues.

 3) Handle invalid setup_hugepagesz command line values properly, from
    Liam R Howlett.

 4) Copy TSB using the correct address shift for the huge TSB, from Mike
    Kravetz.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc:
  sparc64: delete old wrap code
  sparc64: new context wrap
  sparc64: add per-cpu mm of secondary contexts
  sparc64: redefine first version
  sparc64: combine activate_mm and switch_mm
  sparc64: reset mm cpumask after wrap
  sparc/mm/hugepages: Fix setup_hugepagesz for invalid values.
  sparc: Machine description indices can vary
  sparc64: mm: fix copy_tsb to correctly copy huge page TSBs
  arch/sparc: support NR_CPUS = 4096
  sparc64: Add __multi3 for gcc 7.x and later.
  sparc64: Fix build warnings with gcc 7.
  arch/sparc: increase CONFIG_NODES_SHIFT on SPARC64 to 5
parents abb2ea7d b3aefc2f
......@@ -192,9 +192,9 @@ config NR_CPUS
int "Maximum number of CPUs"
depends on SMP
range 2 32 if SPARC32
range 2 1024 if SPARC64
range 2 4096 if SPARC64
default 32 if SPARC32
default 64 if SPARC64
default 4096 if SPARC64
source kernel/Kconfig.hz
......@@ -295,9 +295,13 @@ config NUMA
depends on SPARC64 && SMP
config NODES_SHIFT
int
default "4"
int "Maximum NUMA Nodes (as a power of 2)"
range 4 5 if SPARC64
default "5"
depends on NEED_MULTIPLE_NODES
help
Specify the maximum number of NUMA Nodes available on the target
system. Increases memory reserved to accommodate various tables.
# Some NUMA nodes have memory ranges that span
# other nodes. Even though a pfn is valid and
......
......@@ -52,7 +52,7 @@
#define CTX_NR_MASK TAG_CONTEXT_BITS
#define CTX_HW_MASK (CTX_NR_MASK | CTX_PGSZ_MASK)
#define CTX_FIRST_VERSION ((_AC(1,UL) << CTX_VERSION_SHIFT) + _AC(1,UL))
#define CTX_FIRST_VERSION BIT(CTX_VERSION_SHIFT)
#define CTX_VALID(__ctx) \
(!(((__ctx.sparc64_ctx_val) ^ tlb_context_cache) & CTX_VERSION_MASK))
#define CTX_HWBITS(__ctx) ((__ctx.sparc64_ctx_val) & CTX_HW_MASK)
......
......@@ -19,13 +19,8 @@ extern spinlock_t ctx_alloc_lock;
extern unsigned long tlb_context_cache;
extern unsigned long mmu_context_bmap[];
DECLARE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm);
void get_new_mmu_context(struct mm_struct *mm);
#ifdef CONFIG_SMP
void smp_new_mmu_context_version(void);
#else
#define smp_new_mmu_context_version() do { } while (0)
#endif
int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
void destroy_context(struct mm_struct *mm);
......@@ -76,8 +71,9 @@ void __flush_tlb_mm(unsigned long, unsigned long);
static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk)
{
unsigned long ctx_valid, flags;
int cpu;
int cpu = smp_processor_id();
per_cpu(per_cpu_secondary_mm, cpu) = mm;
if (unlikely(mm == &init_mm))
return;
......@@ -123,7 +119,6 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str
* for the first time, we must flush that context out of the
* local TLB.
*/
cpu = smp_processor_id();
if (!ctx_valid || !cpumask_test_cpu(cpu, mm_cpumask(mm))) {
cpumask_set_cpu(cpu, mm_cpumask(mm));
__flush_tlb_mm(CTX_HWBITS(mm->context),
......@@ -133,26 +128,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str
}
#define deactivate_mm(tsk,mm) do { } while (0)
/* Activate a new MM instance for the current task. */
static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm)
{
unsigned long flags;
int cpu;
spin_lock_irqsave(&mm->context.lock, flags);
if (!CTX_VALID(mm->context))
get_new_mmu_context(mm);
cpu = smp_processor_id();
if (!cpumask_test_cpu(cpu, mm_cpumask(mm)))
cpumask_set_cpu(cpu, mm_cpumask(mm));
load_secondary_context(mm);
__flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT);
tsb_context_switch(mm);
spin_unlock_irqrestore(&mm->context.lock, flags);
}
#define activate_mm(active_mm, mm) switch_mm(active_mm, mm, NULL)
#endif /* !(__ASSEMBLY__) */
#endif /* !(__SPARC64_MMU_CONTEXT_H) */
......@@ -20,7 +20,6 @@
#define PIL_SMP_CALL_FUNC 1
#define PIL_SMP_RECEIVE_SIGNAL 2
#define PIL_SMP_CAPTURE 3
#define PIL_SMP_CTX_NEW_VERSION 4
#define PIL_DEVICE_IRQ 5
#define PIL_SMP_CALL_FUNC_SNGL 6
#define PIL_DEFERRED_PCR_WORK 7
......
......@@ -327,6 +327,7 @@ struct vio_dev {
int compat_len;
u64 dev_no;
u64 id;
unsigned long channel_id;
......
......@@ -909,7 +909,7 @@ static int register_services(struct ds_info *dp)
pbuf.req.handle = cp->handle;
pbuf.req.major = 1;
pbuf.req.minor = 0;
strcpy(pbuf.req.svc_id, cp->service_id);
strcpy(pbuf.id_buf, cp->service_id);
err = __ds_send(lp, &pbuf, msg_len);
if (err > 0)
......
......@@ -1034,17 +1034,26 @@ static void __init init_cpu_send_mondo_info(struct trap_per_cpu *tb)
{
#ifdef CONFIG_SMP
unsigned long page;
void *mondo, *p;
BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64));
BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > PAGE_SIZE);
/* Make sure mondo block is 64byte aligned */
p = kzalloc(127, GFP_KERNEL);
if (!p) {
prom_printf("SUN4V: Error, cannot allocate mondo block.\n");
prom_halt();
}
mondo = (void *)(((unsigned long)p + 63) & ~0x3f);
tb->cpu_mondo_block_pa = __pa(mondo);
page = get_zeroed_page(GFP_KERNEL);
if (!page) {
prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n");
prom_printf("SUN4V: Error, cannot allocate cpu list page.\n");
prom_halt();
}
tb->cpu_mondo_block_pa = __pa(page);
tb->cpu_list_pa = __pa(page + 64);
tb->cpu_list_pa = __pa(page);
#endif
}
......
......@@ -37,7 +37,6 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr
/* smp_64.c */
void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs);
void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs);
void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs);
void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs);
void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs);
......
......@@ -964,37 +964,6 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
preempt_enable();
}
void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
{
struct mm_struct *mm;
unsigned long flags;
clear_softint(1 << irq);
/* See if we need to allocate a new TLB context because
* the version of the one we are using is now out of date.
*/
mm = current->active_mm;
if (unlikely(!mm || (mm == &init_mm)))
return;
spin_lock_irqsave(&mm->context.lock, flags);
if (unlikely(!CTX_VALID(mm->context)))
get_new_mmu_context(mm);
spin_unlock_irqrestore(&mm->context.lock, flags);
load_secondary_context(mm);
__flush_tlb_mm(CTX_HWBITS(mm->context),
SECONDARY_CONTEXT);
}
void smp_new_mmu_context_version(void)
{
smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0);
}
#ifdef CONFIG_KGDB
void kgdb_roundup_cpus(unsigned long flags)
{
......
......@@ -455,13 +455,16 @@ __tsb_context_switch:
.type copy_tsb,#function
copy_tsb: /* %o0=old_tsb_base, %o1=old_tsb_size
* %o2=new_tsb_base, %o3=new_tsb_size
* %o4=page_size_shift
*/
sethi %uhi(TSB_PASS_BITS), %g7
srlx %o3, 4, %o3
add %o0, %o1, %g1 /* end of old tsb */
add %o0, %o1, %o1 /* end of old tsb */
sllx %g7, 32, %g7
sub %o3, 1, %o3 /* %o3 == new tsb hash mask */
mov %o4, %g1 /* page_size_shift */
661: prefetcha [%o0] ASI_N, #one_read
.section .tsb_phys_patch, "ax"
.word 661b
......@@ -486,9 +489,9 @@ copy_tsb: /* %o0=old_tsb_base, %o1=old_tsb_size
/* This can definitely be computed faster... */
srlx %o0, 4, %o5 /* Build index */
and %o5, 511, %o5 /* Mask index */
sllx %o5, PAGE_SHIFT, %o5 /* Put into vaddr position */
sllx %o5, %g1, %o5 /* Put into vaddr position */
or %o4, %o5, %o4 /* Full VADDR. */
srlx %o4, PAGE_SHIFT, %o4 /* Shift down to create index */
srlx %o4, %g1, %o4 /* Shift down to create index */
and %o4, %o3, %o4 /* Mask with new_tsb_nents-1 */
sllx %o4, 4, %o4 /* Shift back up into tsb ent offset */
TSB_STORE(%o2 + %o4, %g2) /* Store TAG */
......@@ -496,7 +499,7 @@ copy_tsb: /* %o0=old_tsb_base, %o1=old_tsb_size
TSB_STORE(%o2 + %o4, %g3) /* Store TTE */
80: add %o0, 16, %o0
cmp %o0, %g1
cmp %o0, %o1
bne,pt %xcc, 90b
nop
......
......@@ -50,7 +50,7 @@ tl0_resv03e: BTRAP(0x3e) BTRAP(0x3f) BTRAP(0x40)
tl0_irq1: TRAP_IRQ(smp_call_function_client, 1)
tl0_irq2: TRAP_IRQ(smp_receive_signal_client, 2)
tl0_irq3: TRAP_IRQ(smp_penguin_jailcell, 3)
tl0_irq4: TRAP_IRQ(smp_new_mmu_context_version_client, 4)
tl0_irq4: BTRAP(0x44)
#else
tl0_irq1: BTRAP(0x41)
tl0_irq2: BTRAP(0x42)
......
......@@ -302,13 +302,16 @@ static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp,
if (!id) {
dev_set_name(&vdev->dev, "%s", bus_id_name);
vdev->dev_no = ~(u64)0;
vdev->id = ~(u64)0;
} else if (!cfg_handle) {
dev_set_name(&vdev->dev, "%s-%llu", bus_id_name, *id);
vdev->dev_no = *id;
vdev->id = ~(u64)0;
} else {
dev_set_name(&vdev->dev, "%s-%llu-%llu", bus_id_name,
*cfg_handle, *id);
vdev->dev_no = *cfg_handle;
vdev->id = *id;
}
vdev->dev.parent = parent;
......@@ -351,27 +354,84 @@ static void vio_add(struct mdesc_handle *hp, u64 node)
(void) vio_create_one(hp, node, &root_vdev->dev);
}
struct vio_md_node_query {
const char *type;
u64 dev_no;
u64 id;
};
static int vio_md_node_match(struct device *dev, void *arg)
{
struct vio_md_node_query *query = (struct vio_md_node_query *) arg;
struct vio_dev *vdev = to_vio_dev(dev);
if (vdev->mp == (u64) arg)
return 1;
if (vdev->dev_no != query->dev_no)
return 0;
if (vdev->id != query->id)
return 0;
if (strcmp(vdev->type, query->type))
return 0;
return 1;
}
static void vio_remove(struct mdesc_handle *hp, u64 node)
{
const char *type;
const u64 *id, *cfg_handle;
u64 a;
struct vio_md_node_query query;
struct device *dev;
dev = device_find_child(&root_vdev->dev, (void *) node,
type = mdesc_get_property(hp, node, "device-type", NULL);
if (!type) {
type = mdesc_get_property(hp, node, "name", NULL);
if (!type)
type = mdesc_node_name(hp, node);
}
query.type = type;
id = mdesc_get_property(hp, node, "id", NULL);
cfg_handle = NULL;
mdesc_for_each_arc(a, hp, node, MDESC_ARC_TYPE_BACK) {
u64 target;
target = mdesc_arc_target(hp, a);
cfg_handle = mdesc_get_property(hp, target,
"cfg-handle", NULL);
if (cfg_handle)
break;
}
if (!id) {
query.dev_no = ~(u64)0;
query.id = ~(u64)0;
} else if (!cfg_handle) {
query.dev_no = *id;
query.id = ~(u64)0;
} else {
query.dev_no = *cfg_handle;
query.id = *id;
}
dev = device_find_child(&root_vdev->dev, &query,
vio_md_node_match);
if (dev) {
printk(KERN_INFO "VIO: Removing device %s\n", dev_name(dev));
device_unregister(dev);
put_device(dev);
} else {
if (!id)
printk(KERN_ERR "VIO: Removed unknown %s node.\n",
type);
else if (!cfg_handle)
printk(KERN_ERR "VIO: Removed unknown %s node %llu.\n",
type, *id);
else
printk(KERN_ERR "VIO: Removed unknown %s node %llu-%llu.\n",
type, *cfg_handle, *id);
}
}
......
......@@ -15,6 +15,7 @@ lib-$(CONFIG_SPARC32) += copy_user.o locks.o
lib-$(CONFIG_SPARC64) += atomic_64.o
lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
lib-$(CONFIG_SPARC64) += multi3.o
lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
......
#include <linux/linkage.h>
#include <asm/export.h>
.text
.align 4
ENTRY(__multi3) /* %o0 = u, %o1 = v */
mov %o1, %g1
srl %o3, 0, %g4
mulx %g4, %g1, %o1
srlx %g1, 0x20, %g3
mulx %g3, %g4, %g5
sllx %g5, 0x20, %o5
srl %g1, 0, %g4
sub %o1, %o5, %o5
srlx %o5, 0x20, %o5
addcc %g5, %o5, %g5
srlx %o3, 0x20, %o5
mulx %g4, %o5, %g4
mulx %g3, %o5, %o5
sethi %hi(0x80000000), %g3
addcc %g5, %g4, %g5
srlx %g5, 0x20, %g5
add %g3, %g3, %g3
movcc %xcc, %g0, %g3
addcc %o5, %g5, %o5
sllx %g4, 0x20, %g4
add %o1, %g4, %o1
add %o5, %g3, %g2
mulx %g1, %o2, %g1
add %g1, %g2, %g1
mulx %o0, %o3, %o0
retl
add %g1, %o0, %o0
ENDPROC(__multi3)
EXPORT_SYMBOL(__multi3)
......@@ -358,7 +358,8 @@ static int __init setup_hugepagesz(char *string)
}
if ((hv_pgsz_mask & cpu_pgsz_mask) == 0U) {
pr_warn("hugepagesz=%llu not supported by MMU.\n",
hugetlb_bad_size();
pr_err("hugepagesz=%llu not supported by MMU.\n",
hugepage_size);
goto out;
}
......@@ -706,10 +707,58 @@ EXPORT_SYMBOL(__flush_dcache_range);
/* get_new_mmu_context() uses "cache + 1". */
DEFINE_SPINLOCK(ctx_alloc_lock);
unsigned long tlb_context_cache = CTX_FIRST_VERSION - 1;
unsigned long tlb_context_cache = CTX_FIRST_VERSION;
#define MAX_CTX_NR (1UL << CTX_NR_BITS)
#define CTX_BMAP_SLOTS BITS_TO_LONGS(MAX_CTX_NR)
DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR);
DEFINE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm) = {0};
static void mmu_context_wrap(void)
{
unsigned long old_ver = tlb_context_cache & CTX_VERSION_MASK;
unsigned long new_ver, new_ctx, old_ctx;
struct mm_struct *mm;
int cpu;
bitmap_zero(mmu_context_bmap, 1 << CTX_NR_BITS);
/* Reserve kernel context */
set_bit(0, mmu_context_bmap);
new_ver = (tlb_context_cache & CTX_VERSION_MASK) + CTX_FIRST_VERSION;
if (unlikely(new_ver == 0))
new_ver = CTX_FIRST_VERSION;
tlb_context_cache = new_ver;
/*
* Make sure that any new mm that are added into per_cpu_secondary_mm,
* are going to go through get_new_mmu_context() path.
*/
mb();
/*
* Updated versions to current on those CPUs that had valid secondary
* contexts
*/
for_each_online_cpu(cpu) {
/*
* If a new mm is stored after we took this mm from the array,
* it will go into get_new_mmu_context() path, because we
* already bumped the version in tlb_context_cache.
*/
mm = per_cpu(per_cpu_secondary_mm, cpu);
if (unlikely(!mm || mm == &init_mm))
continue;
old_ctx = mm->context.sparc64_ctx_val;
if (likely((old_ctx & CTX_VERSION_MASK) == old_ver)) {
new_ctx = (old_ctx & ~CTX_VERSION_MASK) | new_ver;
set_bit(new_ctx & CTX_NR_MASK, mmu_context_bmap);
mm->context.sparc64_ctx_val = new_ctx;
}
}
}
/* Caller does TLB context flushing on local CPU if necessary.
* The caller also ensures that CTX_VALID(mm->context) is false.
......@@ -725,48 +774,30 @@ void get_new_mmu_context(struct mm_struct *mm)
{
unsigned long ctx, new_ctx;
unsigned long orig_pgsz_bits;
int new_version;
spin_lock(&ctx_alloc_lock);
retry:
/* wrap might have happened, test again if our context became valid */
if (unlikely(CTX_VALID(mm->context)))
goto out;
orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
new_version = 0;
if (new_ctx >= (1 << CTX_NR_BITS)) {
new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
if (new_ctx >= ctx) {
int i;
new_ctx = (tlb_context_cache & CTX_VERSION_MASK) +
CTX_FIRST_VERSION;
if (new_ctx == 1)
new_ctx = CTX_FIRST_VERSION;
/* Don't call memset, for 16 entries that's just
* plain silly...
*/
mmu_context_bmap[0] = 3;
mmu_context_bmap[1] = 0;
mmu_context_bmap[2] = 0;
mmu_context_bmap[3] = 0;
for (i = 4; i < CTX_BMAP_SLOTS; i += 4) {
mmu_context_bmap[i + 0] = 0;
mmu_context_bmap[i + 1] = 0;
mmu_context_bmap[i + 2] = 0;
mmu_context_bmap[i + 3] = 0;
}
new_version = 1;
goto out;
mmu_context_wrap();
goto retry;
}
}
if (mm->context.sparc64_ctx_val)
cpumask_clear(mm_cpumask(mm));
mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63));
new_ctx |= (tlb_context_cache & CTX_VERSION_MASK);
out:
tlb_context_cache = new_ctx;
mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits;
out:
spin_unlock(&ctx_alloc_lock);
if (unlikely(new_version))
smp_new_mmu_context_version();
}
static int numa_enabled = 1;
......
......@@ -496,7 +496,8 @@ void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
extern void copy_tsb(unsigned long old_tsb_base,
unsigned long old_tsb_size,
unsigned long new_tsb_base,
unsigned long new_tsb_size);
unsigned long new_tsb_size,
unsigned long page_size_shift);
unsigned long old_tsb_base = (unsigned long) old_tsb;
unsigned long new_tsb_base = (unsigned long) new_tsb;
......@@ -504,7 +505,9 @@ void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
old_tsb_base = __pa(old_tsb_base);
new_tsb_base = __pa(new_tsb_base);
}
copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size);
copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size,
tsb_index == MM_TSB_BASE ?
PAGE_SHIFT : REAL_HPAGE_SHIFT);
}
mm->context.tsb_block[tsb_index].tsb = new_tsb;
......
......@@ -971,11 +971,6 @@ xcall_capture:
wr %g0, (1 << PIL_SMP_CAPTURE), %set_softint
retry
.globl xcall_new_mmu_context_version
xcall_new_mmu_context_version:
wr %g0, (1 << PIL_SMP_CTX_NEW_VERSION), %set_softint
retry
#ifdef CONFIG_KGDB
.globl xcall_kgdb_capture
xcall_kgdb_capture:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment