Commit d26c4bbf authored by Palmer Dabbelt's avatar Palmer Dabbelt

RISC-V: SMP cleanup and new features

This patch series now has evolved to contain several related changes.

1. Updated the assorted cleanup series by Palmer.
The original cleanup patch series can be found here.
http://lists.infradead.org/pipermail/linux-riscv/2018-August/001232.html

2. Implemented decoupling linux logical CPU ids from hart id.
Some of the work has been inspired from ARM64.
Tested on QEMU & HighFive Unleashed board with/without SMP enabled.

3. Included Anup's cleanup and IPI stat patch.

All the patch series have been combined to avoid conflicts as a lot of
common code is changed different patch sets. Atish has mostly addressed
review comments and fixed checkpatch errors from Palmer's and Anup's
series.
Signed-off-by: default avatarPalmer Dabbelt <palmer@sifive.com>
parents a6de21ba 8b20d2db
......@@ -88,7 +88,7 @@ static inline void wait_for_interrupt(void)
}
struct device_node;
extern int riscv_of_processor_hart(struct device_node *node);
int riscv_of_processor_hartid(struct device_node *node);
extern void riscv_fill_hwcap(void);
......
......@@ -14,16 +14,24 @@
#ifndef _ASM_RISCV_SMP_H
#define _ASM_RISCV_SMP_H
/* This both needs asm-offsets.h and is used when generating it. */
#ifndef GENERATING_ASM_OFFSETS
#include <asm/asm-offsets.h>
#endif
#include <linux/cpumask.h>
#include <linux/irqreturn.h>
#include <linux/thread_info.h>
#define INVALID_HARTID ULONG_MAX
/*
* Mapping between linux logical cpu index and hartid.
*/
extern unsigned long __cpuid_to_hartid_map[NR_CPUS];
#define cpuid_to_hartid_map(cpu) __cpuid_to_hartid_map[cpu]
struct seq_file;
#ifdef CONFIG_SMP
/* print IPI stats */
void show_ipi_stats(struct seq_file *p, int prec);
/* SMP initialization hook for setup_arch */
void __init setup_smp(void);
......@@ -33,14 +41,31 @@ void arch_send_call_function_ipi_mask(struct cpumask *mask);
/* Hook for the generic smp_call_function_single() routine. */
void arch_send_call_function_single_ipi(int cpu);
int riscv_hartid_to_cpuid(int hartid);
void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out);
/*
* This is particularly ugly: it appears we can't actually get the definition
* of task_struct here, but we need access to the CPU this task is running on.
* Instead of using C we're using asm-offsets.h to get the current processor
* ID.
* Obtains the hart ID of the currently executing task. This relies on
* THREAD_INFO_IN_TASK, but we define that unconditionally.
*/
#define raw_smp_processor_id() (*((int*)((char*)get_current() + TASK_TI_CPU)))
#define raw_smp_processor_id() (current_thread_info()->cpu)
#endif /* CONFIG_SMP */
#else
static inline void show_ipi_stats(struct seq_file *p, int prec)
{
}
static inline int riscv_hartid_to_cpuid(int hartid)
{
return 0;
}
static inline void riscv_cpuid_to_hartid_mask(const struct cpumask *in,
struct cpumask *out)
{
cpumask_set_cpu(cpuid_to_hartid_map(0), out);
}
#endif /* CONFIG_SMP */
#endif /* _ASM_RISCV_SMP_H */
......@@ -16,6 +16,7 @@
#define _ASM_RISCV_TLBFLUSH_H
#include <linux/mm_types.h>
#include <asm/smp.h>
/*
* Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction
......@@ -49,13 +50,22 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
#include <asm/sbi.h>
static inline void remote_sfence_vma(struct cpumask *cmask, unsigned long start,
unsigned long size)
{
struct cpumask hmask;
cpumask_clear(&hmask);
riscv_cpuid_to_hartid_mask(cmask, &hmask);
sbi_remote_sfence_vma(hmask.bits, start, size);
}
#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1)
#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0)
#define flush_tlb_range(vma, start, end) \
sbi_remote_sfence_vma(mm_cpumask((vma)->vm_mm)->bits, \
start, (end) - (start))
remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start))
#define flush_tlb_mm(mm) \
sbi_remote_sfence_vma(mm_cpumask(mm)->bits, 0, -1)
remote_sfence_vma(mm_cpumask(mm), 0, -1)
#endif /* CONFIG_SMP */
......
......@@ -14,9 +14,13 @@
#include <linux/init.h>
#include <linux/seq_file.h>
#include <linux/of.h>
#include <asm/smp.h>
/* Return -1 if not a valid hart */
int riscv_of_processor_hart(struct device_node *node)
/*
* Returns the hart ID of the given device tree node, or -1 if the device tree
* node isn't a RISC-V hart.
*/
int riscv_of_processor_hartid(struct device_node *node)
{
const char *isa, *status;
u32 hart;
......@@ -58,6 +62,64 @@ int riscv_of_processor_hart(struct device_node *node)
#ifdef CONFIG_PROC_FS
static void print_isa(struct seq_file *f, const char *orig_isa)
{
static const char *ext = "mafdc";
const char *isa = orig_isa;
const char *e;
/*
* Linux doesn't support rv32e or rv128i, and we only support booting
* kernels on harts with the same ISA that the kernel is compiled for.
*/
#if defined(CONFIG_32BIT)
if (strncmp(isa, "rv32i", 5) != 0)
return;
#elif defined(CONFIG_64BIT)
if (strncmp(isa, "rv64i", 5) != 0)
return;
#endif
/* Print the base ISA, as we already know it's legal. */
seq_puts(f, "isa\t\t: ");
seq_write(f, isa, 5);
isa += 5;
/*
* Check the rest of the ISA string for valid extensions, printing those
* we find. RISC-V ISA strings define an order, so we only print the
* extension bits when they're in order.
*/
for (e = ext; *e != '\0'; ++e) {
if (isa[0] == e[0]) {
seq_write(f, isa, 1);
isa++;
}
}
seq_puts(f, "\n");
/*
* If we were given an unsupported ISA in the device tree then print
* a bit of info describing what went wrong.
*/
if (isa[0] != '\0')
pr_info("unsupported ISA \"%s\" in device tree", orig_isa);
}
static void print_mmu(struct seq_file *f, const char *mmu_type)
{
#if defined(CONFIG_32BIT)
if (strcmp(mmu_type, "riscv,sv32") != 0)
return;
#elif defined(CONFIG_64BIT)
if (strcmp(mmu_type, "riscv,sv39") != 0 &&
strcmp(mmu_type, "riscv,sv48") != 0)
return;
#endif
seq_printf(f, "mmu\t\t: %s\n", mmu_type+6);
}
static void *c_start(struct seq_file *m, loff_t *pos)
{
*pos = cpumask_next(*pos - 1, cpu_online_mask);
......@@ -78,21 +140,20 @@ static void c_stop(struct seq_file *m, void *v)
static int c_show(struct seq_file *m, void *v)
{
unsigned long hart_id = (unsigned long)v - 1;
struct device_node *node = of_get_cpu_node(hart_id, NULL);
unsigned long cpu_id = (unsigned long)v - 1;
struct device_node *node = of_get_cpu_node(cpuid_to_hartid_map(cpu_id),
NULL);
const char *compat, *isa, *mmu;
seq_printf(m, "hart\t: %lu\n", hart_id);
if (!of_property_read_string(node, "riscv,isa", &isa)
&& isa[0] == 'r'
&& isa[1] == 'v')
seq_printf(m, "isa\t: %s\n", isa);
if (!of_property_read_string(node, "mmu-type", &mmu)
&& !strncmp(mmu, "riscv,", 6))
seq_printf(m, "mmu\t: %s\n", mmu+6);
seq_printf(m, "processor\t: %lu\n", cpu_id);
seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id));
if (!of_property_read_string(node, "riscv,isa", &isa))
print_isa(m, isa);
if (!of_property_read_string(node, "mmu-type", &mmu))
print_mmu(m, mmu);
if (!of_property_read_string(node, "compatible", &compat)
&& strcmp(compat, "riscv"))
seq_printf(m, "uarch\t: %s\n", compat);
seq_printf(m, "uarch\t\t: %s\n", compat);
seq_puts(m, "\n");
return 0;
......
......@@ -168,7 +168,6 @@ ENTRY(handle_exception)
/* Handle interrupts */
move a0, sp /* pt_regs */
move a1, s4 /* scause */
tail do_IRQ
1:
/* Exceptions run with interrupts enabled */
......
......@@ -47,6 +47,8 @@ ENTRY(_start)
/* Save hart ID and DTB physical address */
mv s0, a0
mv s1, a1
la a2, boot_cpu_hartid
REG_S a0, (a2)
/* Initialize page tables and relocate to virtual addresses */
la sp, init_thread_union + THREAD_SIZE
......@@ -55,7 +57,7 @@ ENTRY(_start)
/* Restore C environment */
la tp, init_task
sw s0, TASK_TI_CPU(tp)
sw zero, TASK_TI_CPU(tp)
la sp, init_thread_union
li a0, ASM_THREAD_SIZE
......
......@@ -8,6 +8,8 @@
#include <linux/interrupt.h>
#include <linux/irqchip.h>
#include <linux/irqdomain.h>
#include <linux/seq_file.h>
#include <asm/smp.h>
/*
* Possible interrupt causes:
......@@ -24,12 +26,18 @@
*/
#define INTERRUPT_CAUSE_FLAG (1UL << (__riscv_xlen - 1))
asmlinkage void __irq_entry do_IRQ(struct pt_regs *regs, unsigned long cause)
int arch_show_interrupts(struct seq_file *p, int prec)
{
show_ipi_stats(p, prec);
return 0;
}
asmlinkage void __irq_entry do_IRQ(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
irq_enter();
switch (cause & ~INTERRUPT_CAUSE_FLAG) {
switch (regs->scause & ~INTERRUPT_CAUSE_FLAG) {
case INTERRUPT_CAUSE_TIMER:
riscv_timer_interrupt();
break;
......
......@@ -81,6 +81,16 @@ EXPORT_SYMBOL(empty_zero_page);
/* The lucky hart to first increment this variable will boot the other cores */
atomic_t hart_lottery;
unsigned long boot_cpu_hartid;
unsigned long __cpuid_to_hartid_map[NR_CPUS] = {
[0 ... NR_CPUS-1] = INVALID_HARTID
};
void __init smp_setup_processor_id(void)
{
cpuid_to_hartid_map(0) = boot_cpu_hartid;
}
#ifdef CONFIG_BLK_DEV_INITRD
static void __init setup_initrd(void)
......
......@@ -22,23 +22,44 @@
#include <linux/interrupt.h>
#include <linux/smp.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <asm/sbi.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
/* A collection of single bit ipi messages. */
static struct {
unsigned long bits ____cacheline_aligned;
} ipi_data[NR_CPUS] __cacheline_aligned;
enum ipi_message_type {
IPI_RESCHEDULE,
IPI_CALL_FUNC,
IPI_MAX
};
/* A collection of single bit ipi messages. */
static struct {
unsigned long stats[IPI_MAX] ____cacheline_aligned;
unsigned long bits ____cacheline_aligned;
} ipi_data[NR_CPUS] __cacheline_aligned;
int riscv_hartid_to_cpuid(int hartid)
{
int i = -1;
for (i = 0; i < NR_CPUS; i++)
if (cpuid_to_hartid_map(i) == hartid)
return i;
pr_err("Couldn't find cpu id for hartid [%d]\n", hartid);
BUG();
return i;
}
void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out)
{
int cpu;
for_each_cpu(cpu, in)
cpumask_set_cpu(cpuid_to_hartid_map(cpu), out);
}
/* Unsupported */
int setup_profiling_timer(unsigned int multiplier)
{
......@@ -48,6 +69,7 @@ int setup_profiling_timer(unsigned int multiplier)
void riscv_software_interrupt(void)
{
unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits;
unsigned long *stats = ipi_data[smp_processor_id()].stats;
/* Clear pending IPI */
csr_clear(sip, SIE_SSIE);
......@@ -62,11 +84,15 @@ void riscv_software_interrupt(void)
if (ops == 0)
return;
if (ops & (1 << IPI_RESCHEDULE))
if (ops & (1 << IPI_RESCHEDULE)) {
stats[IPI_RESCHEDULE]++;
scheduler_ipi();
}
if (ops & (1 << IPI_CALL_FUNC))
if (ops & (1 << IPI_CALL_FUNC)) {
stats[IPI_CALL_FUNC]++;
generic_smp_call_function_interrupt();
}
BUG_ON((ops >> IPI_MAX) != 0);
......@@ -78,14 +104,36 @@ void riscv_software_interrupt(void)
static void
send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation)
{
int i;
int cpuid, hartid;
struct cpumask hartid_mask;
cpumask_clear(&hartid_mask);
mb();
for_each_cpu(i, to_whom)
set_bit(operation, &ipi_data[i].bits);
for_each_cpu(cpuid, to_whom) {
set_bit(operation, &ipi_data[cpuid].bits);
hartid = cpuid_to_hartid_map(cpuid);
cpumask_set_cpu(hartid, &hartid_mask);
}
mb();
sbi_send_ipi(cpumask_bits(to_whom));
sbi_send_ipi(cpumask_bits(&hartid_mask));
}
static const char * const ipi_names[] = {
[IPI_RESCHEDULE] = "Rescheduling interrupts",
[IPI_CALL_FUNC] = "Function call interrupts",
};
void show_ipi_stats(struct seq_file *p, int prec)
{
unsigned int cpu, i;
for (i = 0; i < IPI_MAX; i++) {
seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
prec >= 4 ? " " : "");
for_each_online_cpu(cpu)
seq_printf(p, "%10lu ", ipi_data[cpu].stats[i]);
seq_printf(p, " %s\n", ipi_names[i]);
}
}
void arch_send_call_function_ipi_mask(struct cpumask *mask)
......@@ -127,7 +175,7 @@ void smp_send_reschedule(int cpu)
void flush_icache_mm(struct mm_struct *mm, bool local)
{
unsigned int cpu;
cpumask_t others, *mask;
cpumask_t others, hmask, *mask;
preempt_disable();
......@@ -145,9 +193,11 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
*/
cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
local |= cpumask_empty(&others);
if (mm != current->active_mm || !local)
sbi_remote_fence_i(others.bits);
else {
if (mm != current->active_mm || !local) {
cpumask_clear(&hmask);
riscv_cpuid_to_hartid_mask(&others, &hmask);
sbi_remote_fence_i(hmask.bits);
} else {
/*
* It's assumed that at least one strongly ordered operation is
* performed on this hart between setting a hart's cpumask bit
......
......@@ -30,6 +30,7 @@
#include <linux/irq.h>
#include <linux/of.h>
#include <linux/sched/task_stack.h>
#include <linux/sched/mm.h>
#include <asm/irq.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
......@@ -50,25 +51,33 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
void __init setup_smp(void)
{
struct device_node *dn = NULL;
int hart, im_okay_therefore_i_am = 0;
int hart;
bool found_boot_cpu = false;
int cpuid = 1;
while ((dn = of_find_node_by_type(dn, "cpu"))) {
hart = riscv_of_processor_hart(dn);
if (hart >= 0) {
set_cpu_possible(hart, true);
set_cpu_present(hart, true);
if (hart == smp_processor_id()) {
BUG_ON(im_okay_therefore_i_am);
im_okay_therefore_i_am = 1;
}
hart = riscv_of_processor_hartid(dn);
if (hart < 0)
continue;
if (hart == cpuid_to_hartid_map(0)) {
BUG_ON(found_boot_cpu);
found_boot_cpu = 1;
continue;
}
cpuid_to_hartid_map(cpuid) = hart;
set_cpu_possible(cpuid, true);
set_cpu_present(cpuid, true);
cpuid++;
}
BUG_ON(!im_okay_therefore_i_am);
BUG_ON(!found_boot_cpu);
}
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
int hartid = cpuid_to_hartid_map(cpu);
tidle->thread_info.cpu = cpu;
/*
......@@ -79,8 +88,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
* the spinning harts that they can continue the boot process.
*/
smp_mb();
__cpu_up_stack_pointer[cpu] = task_stack_page(tidle) + THREAD_SIZE;
__cpu_up_task_pointer[cpu] = tidle;
WRITE_ONCE(__cpu_up_stack_pointer[hartid],
task_stack_page(tidle) + THREAD_SIZE);
WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle);
while (!cpu_online(cpu))
cpu_relax();
......@@ -100,14 +110,22 @@ asmlinkage void __init smp_callin(void)
struct mm_struct *mm = &init_mm;
/* All kernel threads share the same mm context. */
atomic_inc(&mm->mm_count);
mmgrab(mm);
current->active_mm = mm;
trap_init();
notify_cpu_starting(smp_processor_id());
set_cpu_online(smp_processor_id(), 1);
/*
* Remote TLB flushes are ignored while the CPU is offline, so emit
* a local TLB flush right now just in case.
*/
local_flush_tlb_all();
local_irq_enable();
/*
* Disable preemption before enabling interrupts, so we don't try to
* schedule a CPU that hasn't actually started yet.
*/
preempt_disable();
local_irq_enable();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
......@@ -8,6 +8,7 @@
#include <linux/cpu.h>
#include <linux/delay.h>
#include <linux/irq.h>
#include <asm/smp.h>
#include <asm/sbi.h>
/*
......@@ -84,13 +85,16 @@ void riscv_timer_interrupt(void)
static int __init riscv_timer_init_dt(struct device_node *n)
{
int cpu_id = riscv_of_processor_hart(n), error;
int cpuid, hartid, error;
struct clocksource *cs;
if (cpu_id != smp_processor_id())
hartid = riscv_of_processor_hartid(n);
cpuid = riscv_hartid_to_cpuid(hartid);
if (cpuid != smp_processor_id())
return 0;
cs = per_cpu_ptr(&riscv_clocksource, cpu_id);
cs = per_cpu_ptr(&riscv_clocksource, cpuid);
clocksource_register_hz(cs, riscv_timebase);
error = cpuhp_setup_state(CPUHP_AP_RISCV_TIMER_STARTING,
......@@ -98,7 +102,7 @@ static int __init riscv_timer_init_dt(struct device_node *n)
riscv_timer_starting_cpu, riscv_timer_dying_cpu);
if (error)
pr_err("RISCV timer register failed [%d] for cpu = [%d]\n",
error, cpu_id);
error, cpuid);
return error;
}
......
......@@ -15,6 +15,7 @@
#include <linux/of_irq.h>
#include <linux/platform_device.h>
#include <linux/spinlock.h>
#include <asm/smp.h>
/*
* This driver implements a version of the RISC-V PLIC with the actual layout
......@@ -176,7 +177,7 @@ static int plic_find_hart_id(struct device_node *node)
{
for (; node; node = node->parent) {
if (of_device_is_compatible(node, "riscv"))
return riscv_of_processor_hart(node);
return riscv_of_processor_hartid(node);
}
return -1;
......@@ -218,7 +219,7 @@ static int __init plic_init(struct device_node *node,
struct of_phandle_args parent;
struct plic_handler *handler;
irq_hw_number_t hwirq;
int cpu;
int cpu, hartid;
if (of_irq_parse_one(node, i, &parent)) {
pr_err("failed to parse parent for context %d.\n", i);
......@@ -229,12 +230,13 @@ static int __init plic_init(struct device_node *node,
if (parent.args[0] == -1)
continue;
cpu = plic_find_hart_id(parent.np);
if (cpu < 0) {
hartid = plic_find_hart_id(parent.np);
if (hartid < 0) {
pr_warn("failed to parse hart ID for context %d.\n", i);
continue;
}
cpu = riscv_hartid_to_cpuid(hartid);
handler = per_cpu_ptr(&plic_handlers, cpu);
handler->present = true;
handler->ctxid = i;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment