Commit 15cddddb authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] ppc64: Add support for hotplug cpus

From: Joel Schopp <jschopp@austin.ibm.com>

Add support for hotplug cpus
parent eabf4910
......@@ -248,6 +248,14 @@ source "fs/Kconfig.binfmt"
source "drivers/pci/Kconfig"
config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs"
depends on SMP && HOTPLUG && EXPERIMENTAL
---help---
Say Y here to be able to turn CPUs off and on.
Say N if you are unsure.
source "drivers/pcmcia/Kconfig"
source "drivers/pci/hotplug/Kconfig"
......
......@@ -26,6 +26,7 @@
#include <linux/unistd.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/cpu.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
......@@ -150,12 +151,18 @@ int default_idle(void)
}
schedule();
if (cpu_is_offline(smp_processor_id()) &&
system_state == SYSTEM_RUNNING)
cpu_die();
}
return 0;
}
#ifdef CONFIG_PPC_PSERIES
DECLARE_PER_CPU(smt_snooze_delay);
int dedicated_idle(void)
{
long oldval;
......@@ -236,6 +243,9 @@ int dedicated_idle(void)
HMT_medium();
lpaca->xLpPaca.xIdle = 0;
schedule();
if (cpu_is_offline(smp_processor_id()) &&
system_state == SYSTEM_RUNNING)
cpu_die();
}
return 0;
}
......@@ -245,6 +255,10 @@ int shared_idle(void)
struct paca_struct *lpaca = get_paca();
while (1) {
if (cpu_is_offline(smp_processor_id()) &&
system_state == SYSTEM_RUNNING)
cpu_die();
/* Indicate to the HV that we are idle. Now would be
* a good time to find other work to dispatch. */
lpaca->xLpPaca.xIdle = 1;
......
......@@ -683,6 +683,7 @@ static struct proc_dir_entry * root_irq_dir;
static struct proc_dir_entry * irq_dir [NR_IRQS];
static struct proc_dir_entry * smp_affinity_entry [NR_IRQS];
/* Protected by irq descriptor spinlock */
#ifdef CONFIG_IRQ_ALL_CPUS
cpumask_t irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL };
#else /* CONFIG_IRQ_ALL_CPUS */
......@@ -702,16 +703,17 @@ static int irq_affinity_read_proc (char *page, char **start, off_t off,
static int irq_affinity_write_proc (struct file *file, const char *buffer,
unsigned long count, void *data)
{
int irq = (long)data, full_count = count, err;
int irq = (long)data;
int ret;
cpumask_t new_value, tmp;
cpumask_t allcpus = CPU_MASK_ALL;
if (!irq_desc[irq].handler->set_affinity)
return -EIO;
err = cpumask_parse(buffer, count, new_value);
if (err)
return err;
ret = cpumask_parse(buffer, count, new_value);
if (ret != 0)
return ret;
/*
* We check for CPU_MASK_ALL in xics to send irqs to all cpus.
......@@ -721,19 +723,30 @@ static int irq_affinity_write_proc (struct file *file, const char *buffer,
*/
cpus_and(new_value, new_value, allcpus);
/*
* Grab lock here so cpu_online_map can't change, and also
* protect irq_affinity[].
*/
spin_lock(&irq_desc[irq].lock);
/*
* Do not allow disabling IRQs completely - it's a too easy
* way to make the system unusable accidentally :-) At least
* one online CPU still has to be targeted.
*/
cpus_and(tmp, new_value, cpu_online_map);
if (cpus_empty(tmp))
return -EINVAL;
if (cpus_empty(tmp)) {
ret = -EINVAL;
goto out;
}
irq_affinity[irq] = new_value;
irq_desc[irq].handler->set_affinity(irq, new_value);
ret = count;
return full_count;
out:
spin_unlock(&irq_desc[irq].lock);
return ret;
}
static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
......@@ -946,5 +959,4 @@ unsigned int real_irq_to_virt_slowpath(unsigned int real_irq)
}
#endif
......@@ -494,6 +494,25 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
return 0;
}
#ifdef CONFIG_HOTPLUG_CPU
/* This version can't take the spinlock. */
void rtas_stop_self(void)
{
struct rtas_args *rtas_args = &(get_paca()->xRtas);
rtas_args->token = rtas_token("stop-self");
BUG_ON(rtas_args->token == RTAS_UNKNOWN_SERVICE);
rtas_args->nargs = 0;
rtas_args->nret = 1;
rtas_args->rets = &(rtas_args->args[0]);
printk("%u %u Ready to die...\n",
smp_processor_id(), hard_smp_processor_id());
enter_rtas((void *)__pa(rtas_args));
panic("Alas, I survived.\n");
}
#endif /* CONFIG_HOTPLUG_CPU */
EXPORT_SYMBOL(rtas_firmware_flash_list);
EXPORT_SYMBOL(rtas_token);
......
......@@ -25,6 +25,7 @@
#include <linux/version.h>
#include <linux/tty.h>
#include <linux/root_dev.h>
#include <linux/cpu.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/processor.h>
......@@ -338,8 +339,13 @@ static int show_cpuinfo(struct seq_file *m, void *v)
return 0;
}
if (!cpu_online(cpu_id))
/* We only show online cpus: disable preempt (overzealous, I
* knew) to prevent cpu going down. */
preempt_disable();
if (!cpu_online(cpu_id)) {
preempt_enable();
return 0;
}
#ifdef CONFIG_SMP
pvr = per_cpu(pvr, cpu_id);
......@@ -372,7 +378,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
ppc_proc_freq % 1000000);
seq_printf(m, "revision\t: %hd.%hd\n\n", maj, min);
preempt_enable();
return 0;
}
......
......@@ -230,10 +230,237 @@ static void __devinit smp_openpic_setup_cpu(int cpu)
do_openpic_setup_cpu();
}
#ifdef CONFIG_HOTPLUG_CPU
/* Get state of physical CPU.
* Return codes:
* 0 - The processor is in the RTAS stopped state
* 1 - stop-self is in progress
* 2 - The processor is not in the RTAS stopped state
* -1 - Hardware Error
* -2 - Hardware Busy, Try again later.
*/
static int query_cpu_stopped(unsigned int pcpu)
{
long cpu_status;
int status, qcss_tok;
qcss_tok = rtas_token("query-cpu-stopped-state");
BUG_ON(qcss_tok == RTAS_UNKNOWN_SERVICE);
status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu);
if (status != 0) {
printk(KERN_ERR
"RTAS query-cpu-stopped-state failed: %i\n", status);
return status;
}
return cpu_status;
}
int __cpu_disable(void)
{
/* FIXME: go put this in a header somewhere */
extern void xics_migrate_irqs_away(void);
systemcfg->processorCount--;
/*fix boot_cpuid here*/
if (smp_processor_id() == boot_cpuid)
boot_cpuid = any_online_cpu(cpu_online_map);
/* FIXME: abstract this to not be platform specific later on */
xics_migrate_irqs_away();
return 0;
}
void __cpu_die(unsigned int cpu)
{
int tries;
int cpu_status;
unsigned int pcpu = get_hard_smp_processor_id(cpu);
for (tries = 0; tries < 5; tries++) {
cpu_status = query_cpu_stopped(pcpu);
if (cpu_status == 0)
break;
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(HZ);
}
if (cpu_status != 0) {
printk("Querying DEAD? cpu %i (%i) shows %i\n",
cpu, pcpu, cpu_status);
}
/* Isolation and deallocation are definatly done by
* drslot_chrp_cpu. If they were not they would be
* done here. Change isolate state to Isolate and
* change allocation-state to Unusable.
*/
paca[cpu].xProcStart = 0;
/* So we can recognize if it fails to come up next time. */
cpu_callin_map[cpu] = 0;
}
/* Kill this cpu */
void cpu_die(void)
{
local_irq_disable();
rtas_stop_self();
/* Should never get here... */
BUG();
for(;;);
}
/* Search all cpu device nodes for an offline logical cpu. If a
* device node has a "ibm,my-drc-index" property (meaning this is an
* LPAR), paranoid-check whether we own the cpu. For each "thread"
* of a cpu, if it is offline and has the same hw index as before,
* grab that in preference.
*/
static unsigned int find_physical_cpu_to_start(unsigned int old_hwindex)
{
struct device_node *np = NULL;
unsigned int best = -1U;
while ((np = of_find_node_by_type(np, "cpu"))) {
int nr_threads, len;
u32 *index = (u32 *)get_property(np, "ibm,my-drc-index", NULL);
u32 *tid = (u32 *)
get_property(np, "ibm,ppc-interrupt-server#s", &len);
if (!tid)
tid = (u32 *)get_property(np, "reg", &len);
if (!tid)
continue;
/* If there is a drc-index, make sure that we own
* the cpu.
*/
if (index) {
int state;
int rc = rtas_get_sensor(9003, *index, &state);
if (rc != 0 || state != 1)
continue;
}
nr_threads = len / sizeof(u32);
while (nr_threads--) {
if (0 == query_cpu_stopped(tid[nr_threads])) {
best = tid[nr_threads];
if (best == old_hwindex)
goto out;
}
}
}
out:
of_node_put(np);
return best;
}
/**
* smp_startup_cpu() - start the given cpu
*
* At boot time, there is nothing to do. At run-time, call RTAS with
* the appropriate start location, if the cpu is in the RTAS stopped
* state.
*
* Returns:
* 0 - failure
* 1 - success
*/
static inline int __devinit smp_startup_cpu(unsigned int lcpu)
{
int status;
extern void (*pseries_secondary_smp_init)(unsigned int cpu);
unsigned long start_here = __pa(pseries_secondary_smp_init);
unsigned int pcpu;
/* At boot time the cpus are already spinning in hold
* loops, so nothing to do. */
if (system_state == SYSTEM_BOOTING)
return 1;
pcpu = find_physical_cpu_to_start(get_hard_smp_processor_id(lcpu));
if (pcpu == -1U) {
printk(KERN_INFO "No more cpus available, failing\n");
return 0;
}
/* Fixup atomic count: it exited inside IRQ handler. */
((struct task_struct *)paca[lcpu].xCurrent)->thread_info->preempt_count
= 0;
/* Fixup SLB round-robin so next segment (kernel) goes in segment 0 */
paca[lcpu].xStab_data.next_round_robin = 0;
/* At boot this is done in prom.c. */
paca[lcpu].xHwProcNum = pcpu;
status = rtas_call(rtas_token("start-cpu"), 3, 1, NULL,
pcpu, start_here, lcpu);
if (status != 0) {
printk(KERN_ERR "start-cpu failed: %i\n", status);
return 0;
}
return 1;
}
static inline void look_for_more_cpus(void)
{
int num_addr_cell, num_size_cell, len, i, maxcpus;
struct device_node *np;
unsigned int *ireg;
/* Find the property which will tell us about how many CPUs
* we're allowed to have. */
if ((np = find_path_device("/rtas")) == NULL) {
printk(KERN_ERR "Could not find /rtas in device tree!");
return;
}
num_addr_cell = prom_n_addr_cells(np);
num_size_cell = prom_n_size_cells(np);
ireg = (unsigned int *)get_property(np, "ibm,lrdr-capacity", &len);
if (ireg == NULL) {
/* FIXME: make sure not marked as lrdr_capable() */
return;
}
maxcpus = ireg[num_addr_cell + num_size_cell];
/* DRENG need to account for threads here too */
if (maxcpus > NR_CPUS) {
printk(KERN_WARNING
"Partition configured for %d cpus, "
"operating system maximum is %d.\n", maxcpus, NR_CPUS);
maxcpus = NR_CPUS;
} else
printk(KERN_INFO "Partition configured for %d cpus.\n",
maxcpus);
/* Make those cpus (which might appear later) possible too. */
for (i = 0; i < maxcpus; i++)
cpu_set(i, cpu_possible_map);
}
#else /* ... CONFIG_HOTPLUG_CPU */
static inline int __devinit smp_startup_cpu(unsigned int lcpu)
{
return 1;
}
static inline void look_for_more_cpus(void)
{
}
#endif /* CONFIG_HOTPLUG_CPU */
static void smp_pSeries_kick_cpu(int nr)
{
BUG_ON(nr < 0 || nr >= NR_CPUS);
if (!smp_startup_cpu(nr))
return;
/* The processor is currently spinning, waiting
* for the xProcStart field to become non-zero
* After we set xProcStart, the processor will
......@@ -241,7 +468,7 @@ static void smp_pSeries_kick_cpu(int nr)
*/
paca[nr].xProcStart = 1;
}
#endif
#endif /* CONFIG_PPC_PSERIES */
static void __init smp_space_timers(unsigned int max_cpus)
{
......@@ -462,12 +689,9 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
int wait)
{
struct call_data_struct data;
int ret = -1, cpus = num_online_cpus()-1;
int ret = -1, cpus;
unsigned long timeout;
if (!cpus)
return 0;
data.func = func;
data.info = info;
atomic_set(&data.started, 0);
......@@ -476,6 +700,14 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
atomic_set(&data.finished, 0);
spin_lock(&call_lock);
/* Must grab online cpu count with preempt disabled, otherwise
* it can change. */
cpus = num_online_cpus() - 1;
if (!cpus) {
ret = 0;
goto out;
}
call_data = &data;
wmb();
/* Send a message to all other CPUs and wait for them to respond */
......@@ -565,8 +797,31 @@ static void __devinit smp_store_cpu_info(int id)
per_cpu(pvr, id) = _get_PVR();
}
static void __init smp_create_idle(unsigned int cpu)
{
struct pt_regs regs;
struct task_struct *p;
/* create a process for the processor */
/* only regs.msr is actually used, and 0 is OK for it */
memset(&regs, 0, sizeof(struct pt_regs));
p = copy_process(CLONE_VM | CLONE_IDLETASK,
0, &regs, 0, NULL, NULL);
if (IS_ERR(p))
panic("failed fork for CPU %u: %li", cpu, PTR_ERR(p));
wake_up_forked_process(p);
init_idle(p, cpu);
unhash_process(p);
paca[cpu].xCurrent = (u64)p;
current_set[cpu] = p->thread_info;
}
void __init smp_prepare_cpus(unsigned int max_cpus)
{
unsigned int cpu;
/*
* setup_cpu may need to be called on the boot cpu. We havent
* spun any cpus up but lets be paranoid.
......@@ -593,6 +848,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
* number of msecs off until someone does a settimeofday()
*/
do_gtod.tb_orig_stamp = tb_last_stamp;
look_for_more_cpus();
#endif
max_cpus = smp_ops->probe();
......@@ -601,20 +858,31 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
__save_cpu_setup();
smp_space_timers(max_cpus);
for_each_cpu(cpu)
if (cpu != boot_cpuid)
smp_create_idle(cpu);
}
void __devinit smp_prepare_boot_cpu(void)
{
cpu_set(smp_processor_id(), cpu_online_map);
/* FIXME: what about cpu_possible()? */
BUG_ON(smp_processor_id() != boot_cpuid);
/* cpu_possible is set up in prom.c */
cpu_set(boot_cpuid, cpu_online_map);
paca[boot_cpuid].xCurrent = (u64)current;
current_set[boot_cpuid] = current->thread_info;
}
int __devinit __cpu_up(unsigned int cpu)
{
struct pt_regs regs;
struct task_struct *p;
int c;
/* At boot, don't bother with non-present cpus -JSCHOPP */
if (system_state == SYSTEM_BOOTING && !cpu_present_at_boot(cpu))
return -ENOENT;
paca[cpu].prof_counter = 1;
paca[cpu].prof_multiplier = 1;
paca[cpu].default_decr = tb_ticks_per_jiffy / decr_overclock;
......@@ -632,19 +900,9 @@ int __devinit __cpu_up(unsigned int cpu)
paca[cpu].xStab_data.real = virt_to_abs(tmp);
}
/* create a process for the processor */
/* only regs.msr is actually used, and 0 is OK for it */
memset(&regs, 0, sizeof(struct pt_regs));
p = copy_process(CLONE_VM|CLONE_IDLETASK, 0, &regs, 0, NULL, NULL);
if (IS_ERR(p))
panic("failed fork for CPU %u: %li", cpu, PTR_ERR(p));
wake_up_forked_process(p);
init_idle(p, cpu);
unhash_process(p);
paca[cpu].xCurrent = (u64)p;
current_set[cpu] = p->thread_info;
/* The information for processor bringup must be written out
* to main store before we release the processor. */
mb();
/* The information for processor bringup must
* be written out to main store before we release
......
......@@ -19,6 +19,7 @@
#include <linux/init.h>
#include <linux/gfp.h>
#include <linux/radix-tree.h>
#include <linux/cpu.h>
#include <asm/prom.h>
#include <asm/io.h>
#include <asm/pgtable.h>
......@@ -372,6 +373,9 @@ irqreturn_t xics_ipi_action(int irq, void *dev_id, struct pt_regs *regs)
int cpu = smp_processor_id();
ops->qirr_info(cpu, 0xff);
WARN_ON(cpu_is_offline(cpu));
while (xics_ipi_message[cpu].value) {
if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION,
&xics_ipi_message[cpu].value)) {
......@@ -514,6 +518,9 @@ void xics_init_IRQ(void)
if (systemcfg->platform == PLATFORM_PSERIES) {
#ifdef CONFIG_SMP
for_each_cpu(i) {
/* FIXME: Do this dynamically! --RR */
if (!cpu_present_at_boot(i))
continue;
xics_per_cpu[i] = __ioremap((ulong)inodes[get_hard_smp_processor_id(i)].addr,
(ulong)inodes[get_hard_smp_processor_id(i)].size,
_PAGE_NO_CACHE);
......@@ -575,9 +582,7 @@ void xics_request_IPIs(void)
static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
{
irq_desc_t *desc = irq_desc + virq;
unsigned int irq;
unsigned long flags;
long status;
unsigned long xics_status[2];
unsigned long newmask;
......@@ -589,14 +594,12 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
if (irq == XICS_IPI)
return;
spin_lock_irqsave(&desc->lock, flags);
status = rtas_call(ibm_get_xive, 1, 3, (void *)&xics_status, irq);
if (status) {
printk(KERN_ERR "xics_set_affinity: irq=%d ibm,get-xive "
"returns %ld\n", irq, status);
goto out;
return;
}
/* For the moment only implement delivery to all cpus or one cpu */
......@@ -605,7 +608,7 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
} else {
cpus_and(tmp, cpu_online_map, cpumask);
if (cpus_empty(tmp))
goto out;
return;
newmask = get_hard_smp_processor_id(first_cpu(cpumask));
}
......@@ -615,9 +618,86 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
if (status) {
printk(KERN_ERR "xics_set_affinity irq=%d ibm,set-xive "
"returns %ld\n", irq, status);
goto out;
return;
}
}
#ifdef CONFIG_HOTPLUG_CPU
/* Interrupts are disabled. */
void xics_migrate_irqs_away(void)
{
int set_indicator = rtas_token("set-indicator");
const unsigned long giqs = 9005UL; /* Global Interrupt Queue Server */
unsigned long status = 0;
unsigned int irq, cpu = smp_processor_id();
unsigned long xics_status[2];
unsigned long flags;
BUG_ON(set_indicator == RTAS_UNKNOWN_SERVICE);
/* Reject any interrupt that was queued to us... */
ops->cppr_info(cpu, 0);
iosync();
/* Refuse any new interrupts... */
rtas_call(set_indicator, 3, 1, &status, giqs,
hard_smp_processor_id(), 0UL);
WARN_ON(status != 0);
/* Allow IPIs again... */
ops->cppr_info(cpu, DEFAULT_PRIORITY);
iosync();
printk(KERN_WARNING "HOTPLUG: Migrating IRQs away\n");
for_each_irq(irq) {
irq_desc_t *desc = get_irq_desc(irq);
/* We need to get IPIs still. */
if (irq_offset_down(irq) == XICS_IPI)
continue;
/* We only need to migrate enabled IRQS */
if (desc == NULL || desc->handler == NULL
|| desc->action == NULL
|| desc->handler->set_affinity == NULL)
continue;
spin_lock_irqsave(&desc->lock, flags);
status = rtas_call(ibm_get_xive, 1, 3, (void *)&xics_status,
irq);
if (status) {
printk(KERN_ERR "migrate_irqs_away: irq=%d "
"ibm,get-xive returns %ld\n",
irq, status);
goto unlock;
}
/*
* We only support delivery to all cpus or to one cpu.
* The irq has to be migrated only in the single cpu
* case.
*/
if (xics_status[0] != get_hard_smp_processor_id(cpu))
goto unlock;
printk(KERN_WARNING "IRQ %d affinity broken off cpu %u\n",
irq, cpu);
/* Reset affinity to all cpus */
xics_status[0] = default_distrib_server;
status = rtas_call(ibm_set_xive, 3, 1, NULL,
irq, xics_status[0], xics_status[1]);
if (status)
printk(KERN_ERR "migrate_irqs_away irq=%d "
"ibm,set-xive returns %ld\n",
irq, status);
unlock:
spin_unlock_irqrestore(&desc->lock, flags);
}
out:
spin_unlock_irqrestore(&desc->lock, flags);
}
#endif
......@@ -219,6 +219,8 @@ extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal);
extern spinlock_t rtas_data_buf_lock;
extern char rtas_data_buf[RTAS_DATA_BUF_SIZE];
extern void rtas_stop_self(void);
/* RMO buffer reserved for user-space RTAS use */
extern unsigned long rtas_rmo_buf;
......
......@@ -70,6 +70,9 @@ extern cpumask_t cpu_available_map;
void smp_init_iSeries(void);
void smp_init_pSeries(void);
extern int __cpu_disable(void);
extern void __cpu_die(unsigned int cpu);
extern void cpu_die(void) __attribute__((noreturn));
#endif /* !(CONFIG_SMP) */
#define get_hard_smp_processor_id(CPU) (paca[(CPU)].xHwProcNum)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment