Commit 4f0234f4 authored by David S. Miller's avatar David S. Miller

[SPARC64]: Initial LDOM cpu hotplug support.

Only adding cpus is supports at the moment, removal
will come next.

When new cpus are configured, the machine description is
updated.  When we get the configure request we pass in a
cpu mask of to-be-added cpus to the mdesc CPU node parser
so it only fetches information for those cpus.  That code
also proceeds to update the SMT/multi-core scheduling bitmaps.

cpu_up() does all the work and we return the status back
over the DS channel.

CPUs via dr-cpu need to be booted straight out of the
hypervisor, and this requires:

1) A new trampoline mechanism.  CPUs are booted straight
   out of the hypervisor with MMU disabled and running in
   physical addresses with no mappings installed in the TLB.

   The new hvtramp.S code sets up the critical cpu state,
   installs the locked TLB mappings for the kernel, and
   turns the MMU on.  It then proceeds to follow the logic
   of the existing trampoline.S SMP cpu bringup code.

2) All calls into OBP have to be disallowed when domaining
   is enabled.  Since cpus boot straight into the kernel from
   the hypervisor, OBP has no state about that cpu and therefore
   cannot handle being invoked on that cpu.

   Luckily it's only a handful of interfaces which can be called
   after the OBP device tree is obtained.  For example, rebooting,
   halting, powering-off, and setting options node variables.

CPU removal support will require some infrastructure changes
here.  Namely we'll have to process the requests via a true
kernel thread instead of in a workqueue.  workqueues run on
a per-cpu thread, but when unconfiguring we might need to
force the thread to execute on another cpu if the current cpu
is the one being removed.  Removal of a cpu also causes the kernel
to destroy that cpu's workqueue running thread.

Another issue on removal is that we may have interrupts still
pointing to the cpu-to-be-removed.  So new code will be needed
to walk the active INO list and retarget those cpus as-needed.
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent b3e13fbe
......@@ -108,6 +108,15 @@ config SECCOMP
source kernel/Kconfig.hz
config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs"
depends on SMP
select HOTPLUG
---help---
Say Y here to experiment with turning CPUs off and on. CPUs
can be controlled through /sys/devices/system/cpu/cpu#.
Say N if you want to disable CPU hotplug.
source "init/Kconfig"
config SYSVIPC_COMPAT
......@@ -307,6 +316,7 @@ config SUN_IO
config SUN_LDOMS
bool "Sun Logical Domains support"
select HOTPLUG_CPU
help
Say Y here is you want to support virtual devices via
Logical Domains.
......
......@@ -12,7 +12,8 @@ obj-y := process.o setup.o cpu.o idprom.o \
irq.o ptrace.o time.o sys_sparc.o signal.o \
unaligned.o central.o pci.o starfire.o semaphore.o \
power.o sbus.o iommu_common.o sparc64_ksyms.o chmc.o \
visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o
visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o \
hvtramp.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_PCI) += ebus.o isa.o pci_common.o pci_iommu.o \
......
This diff is collapsed.
/* hvtramp.S: Hypervisor start-cpu trampoline code.
*
* Copyright (C) 2007 David S. Miller <davem@davemloft.net>
*/
#include <asm/thread_info.h>
#include <asm/hypervisor.h>
#include <asm/scratchpad.h>
#include <asm/spitfire.h>
#include <asm/hvtramp.h>
#include <asm/pstate.h>
#include <asm/ptrace.h>
#include <asm/asi.h>
.text
.align 8
.globl hv_cpu_startup, hv_cpu_startup_end
/* This code executes directly out of the hypervisor
* with physical addressing (va==pa). %o0 contains
* our client argument which for Linux points to
* a descriptor data structure which defines the
* MMU entries we need to load up.
*
* After we set things up we enable the MMU and call
* into the kernel.
*
* First setup basic privileged cpu state.
*/
hv_cpu_startup:
wrpr %g0, 0, %gl
wrpr %g0, 15, %pil
wrpr %g0, 0, %canrestore
wrpr %g0, 0, %otherwin
wrpr %g0, 6, %cansave
wrpr %g0, 6, %cleanwin
wrpr %g0, 0, %cwp
wrpr %g0, 0, %wstate
wrpr %g0, 0, %tl
sethi %hi(sparc64_ttable_tl0), %g1
wrpr %g1, %tba
mov %o0, %l0
lduw [%l0 + HVTRAMP_DESCR_CPU], %g1
mov SCRATCHPAD_CPUID, %g2
stxa %g1, [%g2] ASI_SCRATCHPAD
ldx [%l0 + HVTRAMP_DESCR_FAULT_INFO_VA], %g2
stxa %g2, [%g0] ASI_SCRATCHPAD
mov 0, %l1
lduw [%l0 + HVTRAMP_DESCR_NUM_MAPPINGS], %l2
add %l0, HVTRAMP_DESCR_MAPS, %l3
1: ldx [%l3 + HVTRAMP_MAPPING_VADDR], %o0
clr %o1
ldx [%l3 + HVTRAMP_MAPPING_TTE], %o2
mov HV_MMU_IMMU | HV_MMU_DMMU, %o3
mov HV_FAST_MMU_MAP_PERM_ADDR, %o5
ta HV_FAST_TRAP
brnz,pn %o0, 80f
nop
add %l1, 1, %l1
cmp %l1, %l2
blt,a,pt %xcc, 1b
add %l3, HVTRAMP_MAPPING_SIZE, %l3
ldx [%l0 + HVTRAMP_DESCR_FAULT_INFO_PA], %o0
mov HV_FAST_MMU_FAULT_AREA_CONF, %o5
ta HV_FAST_TRAP
brnz,pn %o0, 80f
nop
wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate
ldx [%l0 + HVTRAMP_DESCR_THREAD_REG], %l6
mov 1, %o0
set 1f, %o1
mov HV_FAST_MMU_ENABLE, %o5
ta HV_FAST_TRAP
ba,pt %xcc, 80f
nop
1:
wr %g0, 0, %fprs
wr %g0, ASI_P, %asi
mov PRIMARY_CONTEXT, %g7
stxa %g0, [%g7] ASI_MMU
membar #Sync
mov SECONDARY_CONTEXT, %g7
stxa %g0, [%g7] ASI_MMU
membar #Sync
mov %l6, %g6
ldx [%g6 + TI_TASK], %g4
mov 1, %g5
sllx %g5, THREAD_SHIFT, %g5
sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
add %g6, %g5, %sp
mov 0, %fp
call init_irqwork_curcpu
nop
call hard_smp_processor_id
nop
mov %o0, %o1
mov 0, %o0
mov 0, %o2
call sun4v_init_mondo_queues
mov 1, %o3
call init_cur_cpu_trap
mov %g6, %o0
wrpr %g0, (PSTATE_PRIV | PSTATE_PEF | PSTATE_IE), %pstate
call smp_callin
nop
call cpu_idle
mov 0, %o0
call cpu_panic
nop
80: ba,pt %xcc, 80b
nop
.align 8
hv_cpu_startup_end:
......@@ -434,6 +434,22 @@ static void __init report_platform_properties(void)
if (v)
printk("PLATFORM: max-cpus [%lu]\n", *v);
#ifdef CONFIG_SMP
{
int max_cpu, i;
if (v) {
max_cpu = *v;
if (max_cpu > NR_CPUS)
max_cpu = NR_CPUS;
} else {
max_cpu = NR_CPUS;
}
for (i = 0; i < max_cpu; i++)
cpu_set(i, cpu_possible_map);
}
#endif
mdesc_release(hp);
}
......@@ -451,9 +467,9 @@ static int inline find_in_proplist(const char *list, const char *match, int len)
return 0;
}
static void __init fill_in_one_cache(cpuinfo_sparc *c,
struct mdesc_handle *hp,
u64 mp)
static void __devinit fill_in_one_cache(cpuinfo_sparc *c,
struct mdesc_handle *hp,
u64 mp)
{
const u64 *level = mdesc_get_property(hp, mp, "level", NULL);
const u64 *size = mdesc_get_property(hp, mp, "size", NULL);
......@@ -496,7 +512,8 @@ static void __init fill_in_one_cache(cpuinfo_sparc *c,
}
}
static void __init mark_core_ids(struct mdesc_handle *hp, u64 mp, int core_id)
static void __devinit mark_core_ids(struct mdesc_handle *hp, u64 mp,
int core_id)
{
u64 a;
......@@ -529,7 +546,7 @@ static void __init mark_core_ids(struct mdesc_handle *hp, u64 mp, int core_id)
}
}
static void __init set_core_ids(struct mdesc_handle *hp)
static void __devinit set_core_ids(struct mdesc_handle *hp)
{
int idx;
u64 mp;
......@@ -554,7 +571,8 @@ static void __init set_core_ids(struct mdesc_handle *hp)
}
}
static void __init mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id)
static void __devinit mark_proc_ids(struct mdesc_handle *hp, u64 mp,
int proc_id)
{
u64 a;
......@@ -573,8 +591,8 @@ static void __init mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id)
}
}
static void __init __set_proc_ids(struct mdesc_handle *hp,
const char *exec_unit_name)
static void __devinit __set_proc_ids(struct mdesc_handle *hp,
const char *exec_unit_name)
{
int idx;
u64 mp;
......@@ -595,13 +613,14 @@ static void __init __set_proc_ids(struct mdesc_handle *hp,
}
}
static void __init set_proc_ids(struct mdesc_handle *hp)
static void __devinit set_proc_ids(struct mdesc_handle *hp)
{
__set_proc_ids(hp, "exec_unit");
__set_proc_ids(hp, "exec-unit");
}
static void __init get_one_mondo_bits(const u64 *p, unsigned int *mask, unsigned char def)
static void __devinit get_one_mondo_bits(const u64 *p, unsigned int *mask,
unsigned char def)
{
u64 val;
......@@ -619,8 +638,8 @@ static void __init get_one_mondo_bits(const u64 *p, unsigned int *mask, unsigned
*mask = ((1U << def) * 64U) - 1U;
}
static void __init get_mondo_data(struct mdesc_handle *hp, u64 mp,
struct trap_per_cpu *tb)
static void __devinit get_mondo_data(struct mdesc_handle *hp, u64 mp,
struct trap_per_cpu *tb)
{
const u64 *val;
......@@ -637,7 +656,7 @@ static void __init get_mondo_data(struct mdesc_handle *hp, u64 mp,
get_one_mondo_bits(val, &tb->nonresum_qmask, 2);
}
static void __init mdesc_fill_in_cpu_data(void)
void __devinit mdesc_fill_in_cpu_data(cpumask_t mask)
{
struct mdesc_handle *hp = mdesc_grab();
u64 mp;
......@@ -658,6 +677,8 @@ static void __init mdesc_fill_in_cpu_data(void)
#ifdef CONFIG_SMP
if (cpuid >= NR_CPUS)
continue;
if (!cpu_isset(cpuid, mask))
continue;
#else
/* On uniprocessor we only want the values for the
* real physical cpu the kernel booted onto, however
......@@ -696,7 +717,6 @@ static void __init mdesc_fill_in_cpu_data(void)
#ifdef CONFIG_SMP
cpu_set(cpuid, cpu_present_map);
cpu_set(cpuid, phys_cpu_present_map);
#endif
c->core_id = 0;
......@@ -719,6 +739,7 @@ void __init sun4v_mdesc_init(void)
{
struct mdesc_handle *hp;
unsigned long len, real_len, status;
cpumask_t mask;
(void) sun4v_mach_desc(0UL, 0UL, &len);
......@@ -742,5 +763,7 @@ void __init sun4v_mdesc_init(void)
cur_mdesc = hp;
report_platform_properties();
mdesc_fill_in_cpu_data();
cpus_setall(mask);
mdesc_fill_in_cpu_data(mask);
}
......@@ -1808,7 +1808,7 @@ static void __init of_fill_in_cpu_data(void)
#ifdef CONFIG_SMP
cpu_set(cpuid, cpu_present_map);
cpu_set(cpuid, phys_cpu_present_map);
cpu_set(cpuid, cpu_possible_map);
#endif
}
......
......@@ -41,6 +41,7 @@
#include <asm/sections.h>
#include <asm/prom.h>
#include <asm/mdesc.h>
#include <asm/ldc.h>
extern void calibrate_delay(void);
......@@ -49,12 +50,18 @@ int sparc64_multi_core __read_mostly;
/* Please don't make this stuff initdata!!! --DaveM */
unsigned char boot_cpu_id;
cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE;
cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
cpumask_t phys_cpu_present_map __read_mostly = CPU_MASK_NONE;
cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly =
{ [0 ... NR_CPUS-1] = CPU_MASK_NONE };
cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
{ [0 ... NR_CPUS-1] = CPU_MASK_NONE };
EXPORT_SYMBOL(cpu_possible_map);
EXPORT_SYMBOL(cpu_online_map);
EXPORT_SYMBOL(cpu_sibling_map);
EXPORT_SYMBOL(cpu_core_map);
static cpumask_t smp_commenced_mask;
static cpumask_t cpu_callout_map;
......@@ -84,9 +91,10 @@ extern void setup_sparc64_timer(void);
static volatile unsigned long callin_flag = 0;
void __init smp_callin(void)
void __devinit smp_callin(void)
{
int cpuid = hard_smp_processor_id();
struct trap_per_cpu *tb = &trap_block[cpuid];;
__local_per_cpu_offset = __per_cpu_offset(cpuid);
......@@ -117,6 +125,11 @@ void __init smp_callin(void)
atomic_inc(&init_mm.mm_count);
current->active_mm = &init_mm;
if (tb->hdesc) {
kfree(tb->hdesc);
tb->hdesc = NULL;
}
while (!cpu_isset(cpuid, smp_commenced_mask))
rmb();
......@@ -296,14 +309,20 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu)
/* Alloc the mondo queues, cpu will load them. */
sun4v_init_mondo_queues(0, cpu, 1, 0);
prom_startcpu_cpuid(cpu, entry, cookie);
#ifdef CONFIG_SUN_LDOMS
if (ldom_domaining_enabled)
ldom_startcpu_cpuid(cpu,
(unsigned long) cpu_new_thread);
else
#endif
prom_startcpu_cpuid(cpu, entry, cookie);
} else {
struct device_node *dp = of_find_node_by_cpuid(cpu);
prom_startcpu(dp->node, entry, cookie);
}
for (timeout = 0; timeout < 5000000; timeout++) {
for (timeout = 0; timeout < 50000; timeout++) {
if (callin_flag)
break;
udelay(100);
......@@ -1163,22 +1182,8 @@ int setup_profiling_timer(unsigned int multiplier)
return -EINVAL;
}
/* Constrain the number of cpus to max_cpus. */
void __init smp_prepare_cpus(unsigned int max_cpus)
{
int i;
if (num_possible_cpus() > max_cpus) {
for_each_possible_cpu(i) {
if (i != boot_cpu_id) {
cpu_clear(i, phys_cpu_present_map);
cpu_clear(i, cpu_present_map);
if (num_possible_cpus() <= max_cpus)
break;
}
}
}
cpu_data(boot_cpu_id).udelay_val = loops_per_jiffy;
}
......@@ -1242,6 +1247,20 @@ int __cpuinit __cpu_up(unsigned int cpu)
return ret;
}
#ifdef CONFIG_HOTPLUG_CPU
int __cpu_disable(void)
{
printk(KERN_ERR "SMP: __cpu_disable() on cpu %d\n",
smp_processor_id());
return -ENODEV;
}
void __cpu_die(unsigned int cpu)
{
printk(KERN_ERR "SMP: __cpu_die(%u)\n", cpu);
}
#endif
void __init smp_cpus_done(unsigned int max_cpus)
{
unsigned long bogosum = 0;
......
......@@ -124,10 +124,6 @@ EXPORT_SYMBOL(__write_lock);
EXPORT_SYMBOL(__write_unlock);
EXPORT_SYMBOL(__write_trylock);
/* CPU online map and active count. */
EXPORT_SYMBOL(cpu_online_map);
EXPORT_SYMBOL(phys_cpu_present_map);
EXPORT_SYMBOL(smp_call_function);
#endif /* CONFIG_SMP */
......
......@@ -96,6 +96,10 @@ void prom_cmdline(void)
*/
void prom_halt(void)
{
#ifdef CONFIG_SUN_LDOMS
if (ldom_domaining_enabled)
ldom_power_off();
#endif
again:
p1275_cmd("exit", P1275_INOUT(0, 0));
goto again; /* PROM is out to get me -DaveM */
......@@ -103,6 +107,10 @@ void prom_halt(void)
void prom_halt_power_off(void)
{
#ifdef CONFIG_SUN_LDOMS
if (ldom_domaining_enabled)
ldom_power_off();
#endif
p1275_cmd("SUNW,power-off", P1275_INOUT(0, 0));
/* if nothing else helps, we just halt */
......
......@@ -16,6 +16,7 @@
#include <asm/system.h>
#include <asm/spitfire.h>
#include <asm/pstate.h>
#include <asm/ldc.h>
struct {
long prom_callback; /* 0x00 */
......
......@@ -80,7 +80,8 @@ struct trap_per_cpu {
unsigned int dev_mondo_qmask;
unsigned int resum_qmask;
unsigned int nonresum_qmask;
unsigned int __pad2[3];
unsigned int __pad2[1];
void *hdesc;
} __attribute__((aligned(64)));
extern struct trap_per_cpu trap_block[NR_CPUS];
extern void init_cur_cpu_trap(struct thread_info *);
......
#ifndef _SPARC64_HVTRAP_H
#define _SPARC64_HVTRAP_H
#ifndef __ASSEMBLY__
#include <linux/types.h>
struct hvtramp_mapping {
__u64 vaddr;
__u64 tte;
};
struct hvtramp_descr {
__u32 cpu;
__u32 num_mappings;
__u64 fault_info_va;
__u64 fault_info_pa;
__u64 thread_reg;
struct hvtramp_mapping maps[2];
};
extern void hv_cpu_startup(unsigned long hvdescr_pa);
#endif
#define HVTRAMP_DESCR_CPU 0x00
#define HVTRAMP_DESCR_NUM_MAPPINGS 0x04
#define HVTRAMP_DESCR_FAULT_INFO_VA 0x08
#define HVTRAMP_DESCR_FAULT_INFO_PA 0x10
#define HVTRAMP_DESCR_THREAD_REG 0x18
#define HVTRAMP_DESCR_MAPS 0x20
#define HVTRAMP_MAPPING_VADDR 0x00
#define HVTRAMP_MAPPING_TTE 0x08
#define HVTRAMP_MAPPING_SIZE 0x10
#endif /* _SPARC64_HVTRAP_H */
......@@ -98,7 +98,7 @@
#define HV_FAST_MACH_EXIT 0x00
#ifndef __ASSEMBLY__
extern void sun4v_mach_exit(unsigned long exit_core);
extern void sun4v_mach_exit(unsigned long exit_code);
#endif
/* Domain services. */
......
......@@ -6,6 +6,8 @@
extern int ldom_domaining_enabled;
extern void ldom_set_var(const char *var, const char *value);
extern void ldom_reboot(const char *boot_command);
extern void ldom_power_off(void);
extern void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg);
/* The event handler will be evoked when link state changes
* or data becomes available on the receive side.
......
......@@ -2,6 +2,7 @@
#define _SPARC64_MDESC_H
#include <linux/types.h>
#include <linux/cpumask.h>
#include <asm/prom.h>
struct mdesc_handle;
......@@ -60,6 +61,8 @@ extern u64 mdesc_arc_target(struct mdesc_handle *hp, u64 arc);
extern void mdesc_update(void);
extern void mdesc_fill_in_cpu_data(cpumask_t mask);
extern void sun4v_mdesc_init(void);
#endif
......@@ -29,9 +29,6 @@
#include <asm/bitops.h>
#include <asm/atomic.h>
extern cpumask_t phys_cpu_present_map;
#define cpu_possible_map phys_cpu_present_map
extern cpumask_t cpu_sibling_map[NR_CPUS];
extern cpumask_t cpu_core_map[NR_CPUS];
extern int sparc64_multi_core;
......@@ -46,6 +43,11 @@ extern int hard_smp_processor_id(void);
extern void smp_fill_in_sib_core_maps(void);
extern unsigned char boot_cpu_id;
#ifdef CONFIG_HOTPLUG_CPU
extern int __cpu_disable(void);
extern void __cpu_die(unsigned int cpu);
#endif
#endif /* !(__ASSEMBLY__) */
#else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment