Commit ede13d81 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-2.6.23' of master.kernel.org:/pub/scm/linux/kernel/git/arnd/cell-2.6

* 'for-2.6.23' of master.kernel.org:/pub/scm/linux/kernel/git/arnd/cell-2.6: (37 commits)
  [CELL] spufs: rework list management and associated locking
  [CELL] oprofile: add support to OProfile for profiling CELL BE SPUs
  [CELL] oprofile: enable SPU switch notification to detect currently active SPU tasks
  [CELL] spu_base: locking cleanup
  [CELL] cell: indexing of SPUs based on firmware vicinity properties
  [CELL] spufs: integration of SPE affinity with the scheduller
  [CELL] cell: add placement computation for scheduling of affinity contexts
  [CELL] spufs: extension of spu_create to support affinity definition
  [CELL] cell: add hardcoded spu vicinity information for QS20
  [CELL] cell: add vicinity information on spus
  [CELL] cell: add per BE structure with info about its SPUs
  [CELL] spufs: use find_first_bit() instead of sched_find_first_bit()
  [CELL] spufs: remove unused file argument from spufs_run_spu()
  [CELL] spufs: change decrementer restore timing
  [CELL] spufs: dont halt decrementer at restore step 47
  [CELL] spufs: limit saving MFC_CNTL bits
  [CELL] spufs: fix read and write for decr_status file
  [CELL] spufs: fix decr_status meanings
  [CELL] spufs: remove needless context save/restore code
  [CELL] spufs: fix array size of channel index
  ...
parents 20082208 486acd48
......@@ -1455,7 +1455,8 @@ CONFIG_HAS_DMA=y
# Instrumentation Support
#
CONFIG_PROFILING=y
CONFIG_OPROFILE=y
CONFIG_OPROFILE=m
CONFIG_OPROFILE_CELL=y
# CONFIG_KPROBES is not set
#
......
......@@ -219,6 +219,72 @@ void crash_kexec_secondary(struct pt_regs *regs)
cpus_in_sr = CPU_MASK_NONE;
}
#endif
#ifdef CONFIG_SPU_BASE
#include <asm/spu.h>
#include <asm/spu_priv1.h>
struct crash_spu_info {
struct spu *spu;
u32 saved_spu_runcntl_RW;
u32 saved_spu_status_R;
u32 saved_spu_npc_RW;
u64 saved_mfc_sr1_RW;
u64 saved_mfc_dar;
u64 saved_mfc_dsisr;
};
#define CRASH_NUM_SPUS 16 /* Enough for current hardware */
static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS];
static void crash_kexec_stop_spus(void)
{
struct spu *spu;
int i;
u64 tmp;
for (i = 0; i < CRASH_NUM_SPUS; i++) {
if (!crash_spu_info[i].spu)
continue;
spu = crash_spu_info[i].spu;
crash_spu_info[i].saved_spu_runcntl_RW =
in_be32(&spu->problem->spu_runcntl_RW);
crash_spu_info[i].saved_spu_status_R =
in_be32(&spu->problem->spu_status_R);
crash_spu_info[i].saved_spu_npc_RW =
in_be32(&spu->problem->spu_npc_RW);
crash_spu_info[i].saved_mfc_dar = spu_mfc_dar_get(spu);
crash_spu_info[i].saved_mfc_dsisr = spu_mfc_dsisr_get(spu);
tmp = spu_mfc_sr1_get(spu);
crash_spu_info[i].saved_mfc_sr1_RW = tmp;
tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
spu_mfc_sr1_set(spu, tmp);
__delay(200);
}
}
void crash_register_spus(struct list_head *list)
{
struct spu *spu;
list_for_each_entry(spu, list, full_list) {
if (WARN_ON(spu->number >= CRASH_NUM_SPUS))
continue;
crash_spu_info[spu->number].spu = spu;
}
}
#else
static inline void crash_kexec_stop_spus(void)
{
}
#endif /* CONFIG_SPU_BASE */
void default_machine_crash_shutdown(struct pt_regs *regs)
{
......@@ -254,6 +320,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
crash_save_cpu(regs, crashing_cpu);
crash_kexec_prepare_cpus(crashing_cpu);
cpu_set(crashing_cpu, cpus_in_crash);
crash_kexec_stop_spus();
if (ppc_md.kexec_cpu_down)
ppc_md.kexec_cpu_down(1, 0);
}
......@@ -122,6 +122,7 @@ extern struct timezone sys_tz;
static long timezone_offset;
unsigned long ppc_proc_freq;
EXPORT_SYMBOL(ppc_proc_freq);
unsigned long ppc_tb_freq;
static u64 tb_last_jiffy __cacheline_aligned_in_smp;
......
......@@ -15,3 +15,10 @@ config OPROFILE
If unsure, say N.
config OPROFILE_CELL
bool "OProfile for Cell Broadband Engine"
depends on (SPU_FS = y && OPROFILE = m) || (SPU_FS = y && OPROFILE = y) || (SPU_FS = m && OPROFILE = m)
default y
help
Profiling of Cell BE SPUs requires special support enabled
by this option.
......@@ -11,7 +11,9 @@ DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \
timer_int.o )
oprofile-y := $(DRIVER_OBJS) common.o backtrace.o
oprofile-$(CONFIG_PPC_CELL_NATIVE) += op_model_cell.o
oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \
cell/spu_profiler.o cell/vma_map.o \
cell/spu_task_sync.o
oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o op_model_pa6t.o
oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o
oprofile-$(CONFIG_6xx) += op_model_7450.o
/*
* Cell Broadband Engine OProfile Support
*
* (C) Copyright IBM Corporation 2006
*
* Author: Maynard Johnson <maynardj@us.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#ifndef PR_UTIL_H
#define PR_UTIL_H
#include <linux/cpumask.h>
#include <linux/oprofile.h>
#include <asm/cell-pmu.h>
#include <asm/spu.h>
#include "../../platforms/cell/cbe_regs.h"
/* Defines used for sync_start */
#define SKIP_GENERIC_SYNC 0
#define SYNC_START_ERROR -1
#define DO_GENERIC_SYNC 1
struct spu_overlay_info { /* map of sections within an SPU overlay */
unsigned int vma; /* SPU virtual memory address from elf */
unsigned int size; /* size of section from elf */
unsigned int offset; /* offset of section into elf file */
unsigned int buf;
};
struct vma_to_fileoffset_map { /* map of sections within an SPU program */
struct vma_to_fileoffset_map *next; /* list pointer */
unsigned int vma; /* SPU virtual memory address from elf */
unsigned int size; /* size of section from elf */
unsigned int offset; /* offset of section into elf file */
unsigned int guard_ptr;
unsigned int guard_val;
/*
* The guard pointer is an entry in the _ovly_buf_table,
* computed using ovly.buf as the index into the table. Since
* ovly.buf values begin at '1' to reference the first (or 0th)
* entry in the _ovly_buf_table, the computation subtracts 1
* from ovly.buf.
* The guard value is stored in the _ovly_buf_table entry and
* is an index (starting at 1) back to the _ovly_table entry
* that is pointing at this _ovly_buf_table entry. So, for
* example, for an overlay scenario with one overlay segment
* and two overlay sections:
* - Section 1 points to the first entry of the
* _ovly_buf_table, which contains a guard value
* of '1', referencing the first (index=0) entry of
* _ovly_table.
* - Section 2 points to the second entry of the
* _ovly_buf_table, which contains a guard value
* of '2', referencing the second (index=1) entry of
* _ovly_table.
*/
};
/* The three functions below are for maintaining and accessing
* the vma-to-fileoffset map.
*/
struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu,
u64 objectid);
unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map,
unsigned int vma, const struct spu *aSpu,
int *grd_val);
void vma_map_free(struct vma_to_fileoffset_map *map);
/*
* Entry point for SPU profiling.
* cycles_reset is the SPU_CYCLES count value specified by the user.
*/
int start_spu_profiling(unsigned int cycles_reset);
void stop_spu_profiling(void);
/* add the necessary profiling hooks */
int spu_sync_start(void);
/* remove the hooks */
int spu_sync_stop(void);
/* Record SPU program counter samples to the oprofile event buffer. */
void spu_sync_buffer(int spu_num, unsigned int *samples,
int num_samples);
void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset);
#endif /* PR_UTIL_H */
/*
* Cell Broadband Engine OProfile Support
*
* (C) Copyright IBM Corporation 2006
*
* Authors: Maynard Johnson <maynardj@us.ibm.com>
* Carl Love <carll@us.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/hrtimer.h>
#include <linux/smp.h>
#include <linux/slab.h>
#include <asm/cell-pmu.h>
#include "pr_util.h"
#define TRACE_ARRAY_SIZE 1024
#define SCALE_SHIFT 14
static u32 *samples;
static int spu_prof_running;
static unsigned int profiling_interval;
#define NUM_SPU_BITS_TRBUF 16
#define SPUS_PER_TB_ENTRY 4
#define SPUS_PER_NODE 8
#define SPU_PC_MASK 0xFFFF
static DEFINE_SPINLOCK(sample_array_lock);
unsigned long sample_array_lock_flags;
void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset)
{
unsigned long ns_per_cyc;
if (!freq_khz)
freq_khz = ppc_proc_freq/1000;
/* To calculate a timeout in nanoseconds, the basic
* formula is ns = cycles_reset * (NSEC_PER_SEC / cpu frequency).
* To avoid floating point math, we use the scale math
* technique as described in linux/jiffies.h. We use
* a scale factor of SCALE_SHIFT, which provides 4 decimal places
* of precision. This is close enough for the purpose at hand.
*
* The value of the timeout should be small enough that the hw
* trace buffer will not get more then about 1/3 full for the
* maximum user specified (the LFSR value) hw sampling frequency.
* This is to ensure the trace buffer will never fill even if the
* kernel thread scheduling varies under a heavy system load.
*/
ns_per_cyc = (USEC_PER_SEC << SCALE_SHIFT)/freq_khz;
profiling_interval = (ns_per_cyc * cycles_reset) >> SCALE_SHIFT;
}
/*
* Extract SPU PC from trace buffer entry
*/
static void spu_pc_extract(int cpu, int entry)
{
/* the trace buffer is 128 bits */
u64 trace_buffer[2];
u64 spu_mask;
int spu;
spu_mask = SPU_PC_MASK;
/* Each SPU PC is 16 bits; hence, four spus in each of
* the two 64-bit buffer entries that make up the
* 128-bit trace_buffer entry. Process two 64-bit values
* simultaneously.
* trace[0] SPU PC contents are: 0 1 2 3
* trace[1] SPU PC contents are: 4 5 6 7
*/
cbe_read_trace_buffer(cpu, trace_buffer);
for (spu = SPUS_PER_TB_ENTRY-1; spu >= 0; spu--) {
/* spu PC trace entry is upper 16 bits of the
* 18 bit SPU program counter
*/
samples[spu * TRACE_ARRAY_SIZE + entry]
= (spu_mask & trace_buffer[0]) << 2;
samples[(spu + SPUS_PER_TB_ENTRY) * TRACE_ARRAY_SIZE + entry]
= (spu_mask & trace_buffer[1]) << 2;
trace_buffer[0] = trace_buffer[0] >> NUM_SPU_BITS_TRBUF;
trace_buffer[1] = trace_buffer[1] >> NUM_SPU_BITS_TRBUF;
}
}
static int cell_spu_pc_collection(int cpu)
{
u32 trace_addr;
int entry;
/* process the collected SPU PC for the node */
entry = 0;
trace_addr = cbe_read_pm(cpu, trace_address);
while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) {
/* there is data in the trace buffer to process */
spu_pc_extract(cpu, entry);
entry++;
if (entry >= TRACE_ARRAY_SIZE)
/* spu_samples is full */
break;
trace_addr = cbe_read_pm(cpu, trace_address);
}
return entry;
}
static enum hrtimer_restart profile_spus(struct hrtimer *timer)
{
ktime_t kt;
int cpu, node, k, num_samples, spu_num;
if (!spu_prof_running)
goto stop;
for_each_online_cpu(cpu) {
if (cbe_get_hw_thread_id(cpu))
continue;
node = cbe_cpu_to_node(cpu);
/* There should only be one kernel thread at a time processing
* the samples. In the very unlikely case that the processing
* is taking a very long time and multiple kernel threads are
* started to process the samples. Make sure only one kernel
* thread is working on the samples array at a time. The
* sample array must be loaded and then processed for a given
* cpu. The sample array is not per cpu.
*/
spin_lock_irqsave(&sample_array_lock,
sample_array_lock_flags);
num_samples = cell_spu_pc_collection(cpu);
if (num_samples == 0) {
spin_unlock_irqrestore(&sample_array_lock,
sample_array_lock_flags);
continue;
}
for (k = 0; k < SPUS_PER_NODE; k++) {
spu_num = k + (node * SPUS_PER_NODE);
spu_sync_buffer(spu_num,
samples + (k * TRACE_ARRAY_SIZE),
num_samples);
}
spin_unlock_irqrestore(&sample_array_lock,
sample_array_lock_flags);
}
smp_wmb(); /* insure spu event buffer updates are written */
/* don't want events intermingled... */
kt = ktime_set(0, profiling_interval);
if (!spu_prof_running)
goto stop;
hrtimer_forward(timer, timer->base->get_time(), kt);
return HRTIMER_RESTART;
stop:
printk(KERN_INFO "SPU_PROF: spu-prof timer ending\n");
return HRTIMER_NORESTART;
}
static struct hrtimer timer;
/*
* Entry point for SPU profiling.
* NOTE: SPU profiling is done system-wide, not per-CPU.
*
* cycles_reset is the count value specified by the user when
* setting up OProfile to count SPU_CYCLES.
*/
int start_spu_profiling(unsigned int cycles_reset)
{
ktime_t kt;
pr_debug("timer resolution: %lu\n", TICK_NSEC);
kt = ktime_set(0, profiling_interval);
hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
timer.expires = kt;
timer.function = profile_spus;
/* Allocate arrays for collecting SPU PC samples */
samples = kzalloc(SPUS_PER_NODE *
TRACE_ARRAY_SIZE * sizeof(u32), GFP_KERNEL);
if (!samples)
return -ENOMEM;
spu_prof_running = 1;
hrtimer_start(&timer, kt, HRTIMER_MODE_REL);
return 0;
}
void stop_spu_profiling(void)
{
spu_prof_running = 0;
hrtimer_cancel(&timer);
kfree(samples);
pr_debug("SPU_PROF: stop_spu_profiling issued\n");
}
This diff is collapsed.
/*
* Cell Broadband Engine OProfile Support
*
* (C) Copyright IBM Corporation 2006
*
* Author: Maynard Johnson <maynardj@us.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
/* The code in this source file is responsible for generating
* vma-to-fileOffset maps for both overlay and non-overlay SPU
* applications.
*/
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/uaccess.h>
#include <linux/elf.h>
#include "pr_util.h"
void vma_map_free(struct vma_to_fileoffset_map *map)
{
while (map) {
struct vma_to_fileoffset_map *next = map->next;
kfree(map);
map = next;
}
}
unsigned int
vma_map_lookup(struct vma_to_fileoffset_map *map, unsigned int vma,
const struct spu *aSpu, int *grd_val)
{
/*
* Default the offset to the physical address + a flag value.
* Addresses of dynamically generated code can't be found in the vma
* map. For those addresses the flagged value will be sent on to
* the user space tools so they can be reported rather than just
* thrown away.
*/
u32 offset = 0x10000000 + vma;
u32 ovly_grd;
for (; map; map = map->next) {
if (vma < map->vma || vma >= map->vma + map->size)
continue;
if (map->guard_ptr) {
ovly_grd = *(u32 *)(aSpu->local_store + map->guard_ptr);
if (ovly_grd != map->guard_val)
continue;
*grd_val = ovly_grd;
}
offset = vma - map->vma + map->offset;
break;
}
return offset;
}
static struct vma_to_fileoffset_map *
vma_map_add(struct vma_to_fileoffset_map *map, unsigned int vma,
unsigned int size, unsigned int offset, unsigned int guard_ptr,
unsigned int guard_val)
{
struct vma_to_fileoffset_map *new =
kzalloc(sizeof(struct vma_to_fileoffset_map), GFP_KERNEL);
if (!new) {
printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n",
__FUNCTION__, __LINE__);
vma_map_free(map);
return NULL;
}
new->next = map;
new->vma = vma;
new->size = size;
new->offset = offset;
new->guard_ptr = guard_ptr;
new->guard_val = guard_val;
return new;
}
/* Parse SPE ELF header and generate a list of vma_maps.
* A pointer to the first vma_map in the generated list
* of vma_maps is returned. */
struct vma_to_fileoffset_map *create_vma_map(const struct spu *aSpu,
unsigned long spu_elf_start)
{
static const unsigned char expected[EI_PAD] = {
[EI_MAG0] = ELFMAG0,
[EI_MAG1] = ELFMAG1,
[EI_MAG2] = ELFMAG2,
[EI_MAG3] = ELFMAG3,
[EI_CLASS] = ELFCLASS32,
[EI_DATA] = ELFDATA2MSB,
[EI_VERSION] = EV_CURRENT,
[EI_OSABI] = ELFOSABI_NONE
};
int grd_val;
struct vma_to_fileoffset_map *map = NULL;
struct spu_overlay_info ovly;
unsigned int overlay_tbl_offset = -1;
unsigned long phdr_start, shdr_start;
Elf32_Ehdr ehdr;
Elf32_Phdr phdr;
Elf32_Shdr shdr, shdr_str;
Elf32_Sym sym;
int i, j;
char name[32];
unsigned int ovly_table_sym = 0;
unsigned int ovly_buf_table_sym = 0;
unsigned int ovly_table_end_sym = 0;
unsigned int ovly_buf_table_end_sym = 0;
unsigned long ovly_table;
unsigned int n_ovlys;
/* Get and validate ELF header. */
if (copy_from_user(&ehdr, (void *) spu_elf_start, sizeof (ehdr)))
goto fail;
if (memcmp(ehdr.e_ident, expected, EI_PAD) != 0) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: Unexpected e_ident parsing SPU ELF\n",
__FUNCTION__, __LINE__);
goto fail;
}
if (ehdr.e_machine != EM_SPU) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: Unexpected e_machine parsing SPU ELF\n",
__FUNCTION__, __LINE__);
goto fail;
}
if (ehdr.e_type != ET_EXEC) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: Unexpected e_type parsing SPU ELF\n",
__FUNCTION__, __LINE__);
goto fail;
}
phdr_start = spu_elf_start + ehdr.e_phoff;
shdr_start = spu_elf_start + ehdr.e_shoff;
/* Traverse program headers. */
for (i = 0; i < ehdr.e_phnum; i++) {
if (copy_from_user(&phdr,
(void *) (phdr_start + i * sizeof(phdr)),
sizeof(phdr)))
goto fail;
if (phdr.p_type != PT_LOAD)
continue;
if (phdr.p_flags & (1 << 27))
continue;
map = vma_map_add(map, phdr.p_vaddr, phdr.p_memsz,
phdr.p_offset, 0, 0);
if (!map)
goto fail;
}
pr_debug("SPU_PROF: Created non-overlay maps\n");
/* Traverse section table and search for overlay-related symbols. */
for (i = 0; i < ehdr.e_shnum; i++) {
if (copy_from_user(&shdr,
(void *) (shdr_start + i * sizeof(shdr)),
sizeof(shdr)))
goto fail;
if (shdr.sh_type != SHT_SYMTAB)
continue;
if (shdr.sh_entsize != sizeof (sym))
continue;
if (copy_from_user(&shdr_str,
(void *) (shdr_start + shdr.sh_link *
sizeof(shdr)),
sizeof(shdr)))
goto fail;
if (shdr_str.sh_type != SHT_STRTAB)
goto fail;;
for (j = 0; j < shdr.sh_size / sizeof (sym); j++) {
if (copy_from_user(&sym, (void *) (spu_elf_start +
shdr.sh_offset + j *
sizeof (sym)),
sizeof (sym)))
goto fail;
if (copy_from_user(name, (void *)
(spu_elf_start + shdr_str.sh_offset +
sym.st_name),
20))
goto fail;
if (memcmp(name, "_ovly_table", 12) == 0)
ovly_table_sym = sym.st_value;
if (memcmp(name, "_ovly_buf_table", 16) == 0)
ovly_buf_table_sym = sym.st_value;
if (memcmp(name, "_ovly_table_end", 16) == 0)
ovly_table_end_sym = sym.st_value;
if (memcmp(name, "_ovly_buf_table_end", 20) == 0)
ovly_buf_table_end_sym = sym.st_value;
}
}
/* If we don't have overlays, we're done. */
if (ovly_table_sym == 0 || ovly_buf_table_sym == 0
|| ovly_table_end_sym == 0 || ovly_buf_table_end_sym == 0) {
pr_debug("SPU_PROF: No overlay table found\n");
goto out;
} else {
pr_debug("SPU_PROF: Overlay table found\n");
}
/* The _ovly_table symbol represents a table with one entry
* per overlay section. The _ovly_buf_table symbol represents
* a table with one entry per overlay region.
* The struct spu_overlay_info gives the structure of the _ovly_table
* entries. The structure of _ovly_table_buf is simply one
* u32 word per entry.
*/
overlay_tbl_offset = vma_map_lookup(map, ovly_table_sym,
aSpu, &grd_val);
if (overlay_tbl_offset < 0) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: Error finding SPU overlay table\n",
__FUNCTION__, __LINE__);
goto fail;
}
ovly_table = spu_elf_start + overlay_tbl_offset;
n_ovlys = (ovly_table_end_sym -
ovly_table_sym) / sizeof (ovly);
/* Traverse overlay table. */
for (i = 0; i < n_ovlys; i++) {
if (copy_from_user(&ovly, (void *)
(ovly_table + i * sizeof (ovly)),
sizeof (ovly)))
goto fail;
/* The ovly.vma/size/offset arguments are analogous to the same
* arguments used above for non-overlay maps. The final two
* args are referred to as the guard pointer and the guard
* value.
* The guard pointer is an entry in the _ovly_buf_table,
* computed using ovly.buf as the index into the table. Since
* ovly.buf values begin at '1' to reference the first (or 0th)
* entry in the _ovly_buf_table, the computation subtracts 1
* from ovly.buf.
* The guard value is stored in the _ovly_buf_table entry and
* is an index (starting at 1) back to the _ovly_table entry
* that is pointing at this _ovly_buf_table entry. So, for
* example, for an overlay scenario with one overlay segment
* and two overlay sections:
* - Section 1 points to the first entry of the
* _ovly_buf_table, which contains a guard value
* of '1', referencing the first (index=0) entry of
* _ovly_table.
* - Section 2 points to the second entry of the
* _ovly_buf_table, which contains a guard value
* of '2', referencing the second (index=1) entry of
* _ovly_table.
*/
map = vma_map_add(map, ovly.vma, ovly.size, ovly.offset,
ovly_buf_table_sym + (ovly.buf-1) * 4, i+1);
if (!map)
goto fail;
}
goto out;
fail:
map = NULL;
out:
return map;
}
......@@ -29,6 +29,8 @@ static struct op_powerpc_model *model;
static struct op_counter_config ctr[OP_MAX_COUNTER];
static struct op_system_config sys;
static int op_per_cpu_rc;
static void op_handle_interrupt(struct pt_regs *regs)
{
model->handle_interrupt(regs, ctr);
......@@ -36,25 +38,41 @@ static void op_handle_interrupt(struct pt_regs *regs)
static void op_powerpc_cpu_setup(void *dummy)
{
model->cpu_setup(ctr);
int ret;
ret = model->cpu_setup(ctr);
if (ret != 0)
op_per_cpu_rc = ret;
}
static int op_powerpc_setup(void)
{
int err;
op_per_cpu_rc = 0;
/* Grab the hardware */
err = reserve_pmc_hardware(op_handle_interrupt);
if (err)
return err;
/* Pre-compute the values to stuff in the hardware registers. */
model->reg_setup(ctr, &sys, model->num_counters);
op_per_cpu_rc = model->reg_setup(ctr, &sys, model->num_counters);
/* Configure the registers on all cpus. */
if (op_per_cpu_rc)
goto out;
/* Configure the registers on all cpus. If an error occurs on one
* of the cpus, op_per_cpu_rc will be set to the error */
on_each_cpu(op_powerpc_cpu_setup, NULL, 0, 1);
return 0;
out: if (op_per_cpu_rc) {
/* error on setup release the performance counter hardware */
release_pmc_hardware();
}
return op_per_cpu_rc;
}
static void op_powerpc_shutdown(void)
......@@ -64,16 +82,29 @@ static void op_powerpc_shutdown(void)
static void op_powerpc_cpu_start(void *dummy)
{
model->start(ctr);
/* If any of the cpus have return an error, set the
* global flag to the error so it can be returned
* to the generic OProfile caller.
*/
int ret;
ret = model->start(ctr);
if (ret != 0)
op_per_cpu_rc = ret;
}
static int op_powerpc_start(void)
{
op_per_cpu_rc = 0;
if (model->global_start)
model->global_start(ctr);
if (model->start)
return model->global_start(ctr);
if (model->start) {
on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1);
return 0;
return op_per_cpu_rc;
}
return -EIO; /* No start function is defined for this
power architecture */
}
static inline void op_powerpc_cpu_stop(void *dummy)
......@@ -147,11 +178,13 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
switch (cur_cpu_spec->oprofile_type) {
#ifdef CONFIG_PPC64
#ifdef CONFIG_PPC_CELL_NATIVE
#ifdef CONFIG_OPROFILE_CELL
case PPC_OPROFILE_CELL:
if (firmware_has_feature(FW_FEATURE_LPAR))
return -ENODEV;
model = &op_model_cell;
ops->sync_start = model->sync_start;
ops->sync_stop = model->sync_stop;
break;
#endif
case PPC_OPROFILE_RS64:
......
......@@ -81,7 +81,7 @@ static void pmc_stop_ctrs(void)
/* Configures the counters on this CPU based on the global
* settings */
static void fsl7450_cpu_setup(struct op_counter_config *ctr)
static int fsl7450_cpu_setup(struct op_counter_config *ctr)
{
/* freeze all counters */
pmc_stop_ctrs();
......@@ -89,12 +89,14 @@ static void fsl7450_cpu_setup(struct op_counter_config *ctr)
mtspr(SPRN_MMCR0, mmcr0_val);
mtspr(SPRN_MMCR1, mmcr1_val);
mtspr(SPRN_MMCR2, mmcr2_val);
return 0;
}
#define NUM_CTRS 6
/* Configures the global settings for the countes on all CPUs. */
static void fsl7450_reg_setup(struct op_counter_config *ctr,
static int fsl7450_reg_setup(struct op_counter_config *ctr,
struct op_system_config *sys,
int num_ctrs)
{
......@@ -126,10 +128,12 @@ static void fsl7450_reg_setup(struct op_counter_config *ctr,
| mmcr1_event6(ctr[5].event);
mmcr2_val = 0;
return 0;
}
/* Sets the counters on this CPU to the chosen values, and starts them */
static void fsl7450_start(struct op_counter_config *ctr)
static int fsl7450_start(struct op_counter_config *ctr)
{
int i;
......@@ -148,6 +152,8 @@ static void fsl7450_start(struct op_counter_config *ctr)
pmc_start_ctrs();
oprofile_running = 1;
return 0;
}
/* Stop the counters on this CPU */
......@@ -193,7 +199,7 @@ static void fsl7450_handle_interrupt(struct pt_regs *regs,
/* The freeze bit was set by the interrupt. */
/* Clear the freeze bit, and reenable the interrupt.
* The counters won't actually start until the rfi clears
* the PMM bit */
* the PM/M bit */
pmc_start_ctrs();
}
......
This diff is collapsed.
......@@ -244,7 +244,7 @@ static void dump_pmcs(void)
mfpmr(PMRN_PMLCA3), mfpmr(PMRN_PMLCB3));
}
static void fsl_booke_cpu_setup(struct op_counter_config *ctr)
static int fsl_booke_cpu_setup(struct op_counter_config *ctr)
{
int i;
......@@ -258,9 +258,11 @@ static void fsl_booke_cpu_setup(struct op_counter_config *ctr)
set_pmc_user_kernel(i, ctr[i].user, ctr[i].kernel);
}
return 0;
}
static void fsl_booke_reg_setup(struct op_counter_config *ctr,
static int fsl_booke_reg_setup(struct op_counter_config *ctr,
struct op_system_config *sys,
int num_ctrs)
{
......@@ -276,9 +278,10 @@ static void fsl_booke_reg_setup(struct op_counter_config *ctr,
for (i = 0; i < num_counters; ++i)
reset_value[i] = 0x80000000UL - ctr[i].count;
return 0;
}
static void fsl_booke_start(struct op_counter_config *ctr)
static int fsl_booke_start(struct op_counter_config *ctr)
{
int i;
......@@ -308,6 +311,8 @@ static void fsl_booke_start(struct op_counter_config *ctr)
pr_debug("start on cpu %d, pmgc0 %x\n", smp_processor_id(),
mfpmr(PMRN_PMGC0));
return 0;
}
static void fsl_booke_stop(void)
......
......@@ -89,7 +89,7 @@ static inline void ctr_write(unsigned int i, u64 val)
/* precompute the values to stuff in the hardware registers */
static void pa6t_reg_setup(struct op_counter_config *ctr,
static int pa6t_reg_setup(struct op_counter_config *ctr,
struct op_system_config *sys,
int num_ctrs)
{
......@@ -135,10 +135,12 @@ static void pa6t_reg_setup(struct op_counter_config *ctr,
pr_debug("reset_value for pmc%u inited to 0x%lx\n",
pmc, reset_value[pmc]);
}
return 0;
}
/* configure registers on this cpu */
static void pa6t_cpu_setup(struct op_counter_config *ctr)
static int pa6t_cpu_setup(struct op_counter_config *ctr)
{
u64 mmcr0 = mmcr0_val;
u64 mmcr1 = mmcr1_val;
......@@ -154,9 +156,11 @@ static void pa6t_cpu_setup(struct op_counter_config *ctr)
mfspr(SPRN_PA6T_MMCR0));
pr_debug("setup on cpu %d, mmcr1 %016lx\n", smp_processor_id(),
mfspr(SPRN_PA6T_MMCR1));
return 0;
}
static void pa6t_start(struct op_counter_config *ctr)
static int pa6t_start(struct op_counter_config *ctr)
{
int i;
......@@ -174,6 +178,8 @@ static void pa6t_start(struct op_counter_config *ctr)
oprofile_running = 1;
pr_debug("start on cpu %d, mmcr0 %lx\n", smp_processor_id(), mmcr0);
return 0;
}
static void pa6t_stop(void)
......
......@@ -32,7 +32,7 @@ static u32 mmcr0_val;
static u64 mmcr1_val;
static u64 mmcra_val;
static void power4_reg_setup(struct op_counter_config *ctr,
static int power4_reg_setup(struct op_counter_config *ctr,
struct op_system_config *sys,
int num_ctrs)
{
......@@ -60,6 +60,8 @@ static void power4_reg_setup(struct op_counter_config *ctr,
mmcr0_val &= ~MMCR0_PROBLEM_DISABLE;
else
mmcr0_val |= MMCR0_PROBLEM_DISABLE;
return 0;
}
extern void ppc64_enable_pmcs(void);
......@@ -84,7 +86,7 @@ static inline int mmcra_must_set_sample(void)
return 0;
}
static void power4_cpu_setup(struct op_counter_config *ctr)
static int power4_cpu_setup(struct op_counter_config *ctr)
{
unsigned int mmcr0 = mmcr0_val;
unsigned long mmcra = mmcra_val;
......@@ -111,9 +113,11 @@ static void power4_cpu_setup(struct op_counter_config *ctr)
mfspr(SPRN_MMCR1));
dbg("setup on cpu %d, mmcra %lx\n", smp_processor_id(),
mfspr(SPRN_MMCRA));
return 0;
}
static void power4_start(struct op_counter_config *ctr)
static int power4_start(struct op_counter_config *ctr)
{
int i;
unsigned int mmcr0;
......@@ -148,6 +152,7 @@ static void power4_start(struct op_counter_config *ctr)
oprofile_running = 1;
dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
return 0;
}
static void power4_stop(void)
......
......@@ -88,7 +88,7 @@ static unsigned long reset_value[OP_MAX_COUNTER];
static int num_counters;
static void rs64_reg_setup(struct op_counter_config *ctr,
static int rs64_reg_setup(struct op_counter_config *ctr,
struct op_system_config *sys,
int num_ctrs)
{
......@@ -100,9 +100,10 @@ static void rs64_reg_setup(struct op_counter_config *ctr,
reset_value[i] = 0x80000000UL - ctr[i].count;
/* XXX setup user and kernel profiling */
return 0;
}
static void rs64_cpu_setup(struct op_counter_config *ctr)
static int rs64_cpu_setup(struct op_counter_config *ctr)
{
unsigned int mmcr0;
......@@ -125,9 +126,11 @@ static void rs64_cpu_setup(struct op_counter_config *ctr)
mfspr(SPRN_MMCR0));
dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(),
mfspr(SPRN_MMCR1));
return 0;
}
static void rs64_start(struct op_counter_config *ctr)
static int rs64_start(struct op_counter_config *ctr)
{
int i;
unsigned int mmcr0;
......@@ -155,6 +158,7 @@ static void rs64_start(struct op_counter_config *ctr)
mtspr(SPRN_MMCR0, mmcr0);
dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
return 0;
}
static void rs64_stop(void)
......
......@@ -272,4 +272,14 @@ config CPM2
you wish to build a kernel for a machine with a CPM2 coprocessor
on it (826x, 827x, 8560).
config AXON_RAM
tristate "Axon DDR2 memory device driver"
depends on PPC_IBM_CELL_BLADE
default m
help
It registers one block device per Axon's DDR2 memory bank found
on a system. Block devices are called axonram?, their major and
minor numbers are available in /proc/devices, /proc/partitions or
in /sys/block/axonram?/dev.
endmenu
......@@ -73,4 +73,14 @@ config CBE_CPUFREQ
For details, take a look at <file:Documentation/cpu-freq/>.
If you don't have such processor, say N
config CBE_CPUFREQ_PMI
tristate "CBE frequency scaling using PMI interface"
depends on CBE_CPUFREQ && PPC_PMI && EXPERIMENTAL
default n
help
Select this, if you want to use the PMI interface
to switch frequencies. Using PMI, the
processor will not only be able to run at lower speed,
but also at lower core voltage.
endmenu
......@@ -4,7 +4,9 @@ obj-$(CONFIG_PPC_CELL_NATIVE) += interrupt.o iommu.o setup.o \
obj-$(CONFIG_CBE_RAS) += ras.o
obj-$(CONFIG_CBE_THERM) += cbe_thermal.o
obj-$(CONFIG_CBE_CPUFREQ) += cbe_cpufreq.o
obj-$(CONFIG_CBE_CPUFREQ_PMI) += cbe_cpufreq_pmi.o
obj-$(CONFIG_CBE_CPUFREQ) += cbe-cpufreq.o
cbe-cpufreq-y += cbe_cpufreq_pervasive.o cbe_cpufreq.o
ifeq ($(CONFIG_SMP),y)
obj-$(CONFIG_PPC_CELL_NATIVE) += smp.o
......@@ -23,3 +25,5 @@ obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \
$(spu-priv1-y) \
$(spu-manage-y) \
spufs/
obj-$(CONFIG_PCI_MSI) += axon_msi.o
This diff is collapsed.
/*
* cpufreq driver for the cell processor
*
* (C) Copyright IBM Deutschland Entwicklung GmbH 2005
* (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
*
* Author: Christian Krafft <krafft@de.ibm.com>
*
......@@ -21,18 +21,11 @@
*/
#include <linux/cpufreq.h>
#include <linux/timer.h>
#include <asm/hw_irq.h>
#include <asm/io.h>
#include <asm/machdep.h>
#include <asm/processor.h>
#include <asm/prom.h>
#include <asm/time.h>
#include <asm/pmi.h>
#include <asm/of_platform.h>
#include <asm/prom.h>
#include "cbe_regs.h"
#include "cbe_cpufreq.h"
static DEFINE_MUTEX(cbe_switch_mutex);
......@@ -50,159 +43,24 @@ static struct cpufreq_frequency_table cbe_freqs[] = {
{0, CPUFREQ_TABLE_END},
};
/* to write to MIC register */
static u64 MIC_Slow_Fast_Timer_table[] = {
[0 ... 7] = 0x007fc00000000000ull,
};
/* more values for the MIC */
static u64 MIC_Slow_Next_Timer_table[] = {
0x0000240000000000ull,
0x0000268000000000ull,
0x000029C000000000ull,
0x00002D0000000000ull,
0x0000300000000000ull,
0x0000334000000000ull,
0x000039C000000000ull,
0x00003FC000000000ull,
};
static unsigned int pmi_frequency_limit = 0;
/*
* hardware specific functions
*/
static struct of_device *pmi_dev;
#ifdef CONFIG_PPC_PMI
static int set_pmode_pmi(int cpu, unsigned int pmode)
{
int ret;
pmi_message_t pmi_msg;
#ifdef DEBUG
u64 time;
#endif
pmi_msg.type = PMI_TYPE_FREQ_CHANGE;
pmi_msg.data1 = cbe_cpu_to_node(cpu);
pmi_msg.data2 = pmode;
#ifdef DEBUG
time = (u64) get_cycles();
#endif
pmi_send_message(pmi_dev, pmi_msg);
ret = pmi_msg.data2;
pr_debug("PMI returned slow mode %d\n", ret);
#ifdef DEBUG
time = (u64) get_cycles() - time; /* actual cycles (not cpu cycles!) */
time = 1000000000 * time / CLOCK_TICK_RATE; /* time in ns (10^-9) */
pr_debug("had to wait %lu ns for a transition\n", time);
#endif
return ret;
}
#endif
static int get_pmode(int cpu)
static int set_pmode(unsigned int cpu, unsigned int slow_mode)
{
int ret;
struct cbe_pmd_regs __iomem *pmd_regs;
pmd_regs = cbe_get_cpu_pmd_regs(cpu);
ret = in_be64(&pmd_regs->pmsr) & 0x07;
return ret;
}
static int set_pmode_reg(int cpu, unsigned int pmode)
{
struct cbe_pmd_regs __iomem *pmd_regs;
struct cbe_mic_tm_regs __iomem *mic_tm_regs;
u64 flags;
u64 value;
local_irq_save(flags);
mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu);
pmd_regs = cbe_get_cpu_pmd_regs(cpu);
pr_debug("pm register is mapped at %p\n", &pmd_regs->pmcr);
pr_debug("mic register is mapped at %p\n", &mic_tm_regs->slow_fast_timer_0);
out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]);
out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]);
out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]);
out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]);
value = in_be64(&pmd_regs->pmcr);
/* set bits to zero */
value &= 0xFFFFFFFFFFFFFFF8ull;
/* set bits to next pmode */
value |= pmode;
out_be64(&pmd_regs->pmcr, value);
/* wait until new pmode appears in status register */
value = in_be64(&pmd_regs->pmsr) & 0x07;
while(value != pmode) {
cpu_relax();
value = in_be64(&pmd_regs->pmsr) & 0x07;
}
local_irq_restore(flags);
return 0;
}
int rc;
static int set_pmode(int cpu, unsigned int slow_mode) {
#ifdef CONFIG_PPC_PMI
if (pmi_dev)
return set_pmode_pmi(cpu, slow_mode);
if (cbe_cpufreq_has_pmi)
rc = cbe_cpufreq_set_pmode_pmi(cpu, slow_mode);
else
#endif
return set_pmode_reg(cpu, slow_mode);
}
static void cbe_cpufreq_handle_pmi(struct of_device *dev, pmi_message_t pmi_msg)
{
u8 cpu;
u8 cbe_pmode_new;
BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE);
rc = cbe_cpufreq_set_pmode(cpu, slow_mode);
cpu = cbe_node_to_cpu(pmi_msg.data1);
cbe_pmode_new = pmi_msg.data2;
pr_debug("register contains slow mode %d\n", cbe_cpufreq_get_pmode(cpu));
pmi_frequency_limit = cbe_freqs[cbe_pmode_new].frequency;
pr_debug("cbe_handle_pmi: max freq=%d\n", pmi_frequency_limit);
}
static int pmi_notifier(struct notifier_block *nb,
unsigned long event, void *data)
{
struct cpufreq_policy *policy = data;
if (event != CPUFREQ_INCOMPATIBLE)
return 0;
cpufreq_verify_within_limits(policy, 0, pmi_frequency_limit);
return 0;
return rc;
}
static struct notifier_block pmi_notifier_block = {
.notifier_call = pmi_notifier,
};
static struct pmi_handler cbe_pmi_handler = {
.type = PMI_TYPE_FREQ_CHANGE,
.handle_pmi_message = cbe_cpufreq_handle_pmi,
};
/*
* cpufreq functions
*/
......@@ -221,8 +79,19 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
pr_debug("init cpufreq on CPU %d\n", policy->cpu);
/*
* Let's check we can actually get to the CELL regs
*/
if (!cbe_get_cpu_pmd_regs(policy->cpu) ||
!cbe_get_cpu_mic_tm_regs(policy->cpu)) {
pr_info("invalid CBE regs pointers for cpufreq\n");
return -EINVAL;
}
max_freqp = of_get_property(cpu, "clock-frequency", NULL);
of_node_put(cpu);
if (!max_freqp)
return -EINVAL;
......@@ -239,10 +108,12 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
}
policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
/* if DEBUG is enabled set_pmode() measures the correct latency of a transition */
/* if DEBUG is enabled set_pmode() measures the latency
* of a transition */
policy->cpuinfo.transition_latency = 25000;
cur_pmode = get_pmode(policy->cpu);
cur_pmode = cbe_cpufreq_get_pmode(policy->cpu);
pr_debug("current pmode is at %d\n",cur_pmode);
policy->cur = cbe_freqs[cur_pmode].frequency;
......@@ -253,21 +124,13 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
cpufreq_frequency_table_get_attr(cbe_freqs, policy->cpu);
if (pmi_dev) {
/* frequency might get limited later, initialize limit with max_freq */
pmi_frequency_limit = max_freq;
cpufreq_register_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
}
/* this ensures that policy->cpuinfo_min and policy->cpuinfo_max are set correctly */
/* this ensures that policy->cpuinfo_min
* and policy->cpuinfo_max are set correctly */
return cpufreq_frequency_table_cpuinfo(policy, cbe_freqs);
}
static int cbe_cpufreq_cpu_exit(struct cpufreq_policy *policy)
{
if (pmi_dev)
cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
cpufreq_frequency_table_put_attr(policy->cpu);
return 0;
}
......@@ -277,13 +140,13 @@ static int cbe_cpufreq_verify(struct cpufreq_policy *policy)
return cpufreq_frequency_table_verify(policy, cbe_freqs);
}
static int cbe_cpufreq_target(struct cpufreq_policy *policy, unsigned int target_freq,
unsigned int relation)
static int cbe_cpufreq_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
{
int rc;
struct cpufreq_freqs freqs;
int cbe_pmode_new;
unsigned int cbe_pmode_new;
cpufreq_frequency_table_target(policy,
cbe_freqs,
......@@ -298,12 +161,14 @@ static int cbe_cpufreq_target(struct cpufreq_policy *policy, unsigned int target
mutex_lock(&cbe_switch_mutex);
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
pr_debug("setting frequency for cpu %d to %d kHz, 1/%d of max frequency\n",
pr_debug("setting frequency for cpu %d to %d kHz, " \
"1/%d of max frequency\n",
policy->cpu,
cbe_freqs[cbe_pmode_new].frequency,
cbe_freqs[cbe_pmode_new].index);
rc = set_pmode(policy->cpu, cbe_pmode_new);
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
mutex_unlock(&cbe_switch_mutex);
......@@ -326,28 +191,14 @@ static struct cpufreq_driver cbe_cpufreq_driver = {
static int __init cbe_cpufreq_init(void)
{
#ifdef CONFIG_PPC_PMI
struct device_node *np;
#endif
if (!machine_is(cell))
return -ENODEV;
#ifdef CONFIG_PPC_PMI
np = of_find_node_by_type(NULL, "ibm,pmi");
pmi_dev = of_find_device_by_node(np);
if (pmi_dev)
pmi_register_handler(pmi_dev, &cbe_pmi_handler);
#endif
return cpufreq_register_driver(&cbe_cpufreq_driver);
}
static void __exit cbe_cpufreq_exit(void)
{
#ifdef CONFIG_PPC_PMI
if (pmi_dev)
pmi_unregister_handler(pmi_dev, &cbe_pmi_handler);
#endif
cpufreq_unregister_driver(&cbe_cpufreq_driver);
}
......
/*
* cbe_cpufreq.h
*
* This file contains the definitions used by the cbe_cpufreq driver.
*
* (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
*
* Author: Christian Krafft <krafft@de.ibm.com>
*
*/
#include <linux/cpufreq.h>
#include <linux/types.h>
int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode);
int cbe_cpufreq_get_pmode(int cpu);
int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode);
#if defined(CONFIG_CBE_CPUFREQ_PMI) || defined(CONFIG_CBE_CPUFREQ_PMI_MODULE)
extern bool cbe_cpufreq_has_pmi;
#else
#define cbe_cpufreq_has_pmi (0)
#endif
/*
* pervasive backend for the cbe_cpufreq driver
*
* This driver makes use of the pervasive unit to
* engage the desired frequency.
*
* (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
*
* Author: Christian Krafft <krafft@de.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/time.h>
#include <asm/machdep.h>
#include <asm/hw_irq.h>
#include "cbe_regs.h"
#include "cbe_cpufreq.h"
/* to write to MIC register */
static u64 MIC_Slow_Fast_Timer_table[] = {
[0 ... 7] = 0x007fc00000000000ull,
};
/* more values for the MIC */
static u64 MIC_Slow_Next_Timer_table[] = {
0x0000240000000000ull,
0x0000268000000000ull,
0x000029C000000000ull,
0x00002D0000000000ull,
0x0000300000000000ull,
0x0000334000000000ull,
0x000039C000000000ull,
0x00003FC000000000ull,
};
int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode)
{
struct cbe_pmd_regs __iomem *pmd_regs;
struct cbe_mic_tm_regs __iomem *mic_tm_regs;
u64 flags;
u64 value;
#ifdef DEBUG
long time;
#endif
local_irq_save(flags);
mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu);
pmd_regs = cbe_get_cpu_pmd_regs(cpu);
#ifdef DEBUG
time = jiffies;
#endif
out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]);
out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]);
out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]);
out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]);
value = in_be64(&pmd_regs->pmcr);
/* set bits to zero */
value &= 0xFFFFFFFFFFFFFFF8ull;
/* set bits to next pmode */
value |= pmode;
out_be64(&pmd_regs->pmcr, value);
#ifdef DEBUG
/* wait until new pmode appears in status register */
value = in_be64(&pmd_regs->pmsr) & 0x07;
while (value != pmode) {
cpu_relax();
value = in_be64(&pmd_regs->pmsr) & 0x07;
}
time = jiffies - time;
time = jiffies_to_msecs(time);
pr_debug("had to wait %lu ms for a transition using " \
"pervasive unit\n", time);
#endif
local_irq_restore(flags);
return 0;
}
int cbe_cpufreq_get_pmode(int cpu)
{
int ret;
struct cbe_pmd_regs __iomem *pmd_regs;
pmd_regs = cbe_get_cpu_pmd_regs(cpu);
ret = in_be64(&pmd_regs->pmsr) & 0x07;
return ret;
}
/*
* pmi backend for the cbe_cpufreq driver
*
* (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
*
* Author: Christian Krafft <krafft@de.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/timer.h>
#include <asm/of_platform.h>
#include <asm/processor.h>
#include <asm/prom.h>
#include <asm/pmi.h>
#ifdef DEBUG
#include <asm/time.h>
#endif
#include "cbe_regs.h"
#include "cbe_cpufreq.h"
static u8 pmi_slow_mode_limit[MAX_CBE];
bool cbe_cpufreq_has_pmi = false;
EXPORT_SYMBOL_GPL(cbe_cpufreq_has_pmi);
/*
* hardware specific functions
*/
int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode)
{
int ret;
pmi_message_t pmi_msg;
#ifdef DEBUG
long time;
#endif
pmi_msg.type = PMI_TYPE_FREQ_CHANGE;
pmi_msg.data1 = cbe_cpu_to_node(cpu);
pmi_msg.data2 = pmode;
#ifdef DEBUG
time = jiffies;
#endif
pmi_send_message(pmi_msg);
#ifdef DEBUG
time = jiffies - time;
time = jiffies_to_msecs(time);
pr_debug("had to wait %lu ms for a transition using " \
"PMI\n", time);
#endif
ret = pmi_msg.data2;
pr_debug("PMI returned slow mode %d\n", ret);
return ret;
}
EXPORT_SYMBOL_GPL(cbe_cpufreq_set_pmode_pmi);
static void cbe_cpufreq_handle_pmi(pmi_message_t pmi_msg)
{
u8 node, slow_mode;
BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE);
node = pmi_msg.data1;
slow_mode = pmi_msg.data2;
pmi_slow_mode_limit[node] = slow_mode;
pr_debug("cbe_handle_pmi: node: %d max_freq: %d\n", node, slow_mode);
}
static int pmi_notifier(struct notifier_block *nb,
unsigned long event, void *data)
{
struct cpufreq_policy *policy = data;
struct cpufreq_frequency_table *cbe_freqs;
u8 node;
cbe_freqs = cpufreq_frequency_get_table(policy->cpu);
node = cbe_cpu_to_node(policy->cpu);
pr_debug("got notified, event=%lu, node=%u\n", event, node);
if (pmi_slow_mode_limit[node] != 0) {
pr_debug("limiting node %d to slow mode %d\n",
node, pmi_slow_mode_limit[node]);
cpufreq_verify_within_limits(policy, 0,
cbe_freqs[pmi_slow_mode_limit[node]].frequency);
}
return 0;
}
static struct notifier_block pmi_notifier_block = {
.notifier_call = pmi_notifier,
};
static struct pmi_handler cbe_pmi_handler = {
.type = PMI_TYPE_FREQ_CHANGE,
.handle_pmi_message = cbe_cpufreq_handle_pmi,
};
static int __init cbe_cpufreq_pmi_init(void)
{
cbe_cpufreq_has_pmi = pmi_register_handler(&cbe_pmi_handler) == 0;
if (!cbe_cpufreq_has_pmi)
return -ENODEV;
cpufreq_register_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
return 0;
}
static void __exit cbe_cpufreq_pmi_exit(void)
{
cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
pmi_unregister_handler(&cbe_pmi_handler);
}
module_init(cbe_cpufreq_pmi_init);
module_exit(cbe_cpufreq_pmi_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
......@@ -174,6 +174,13 @@ static struct device_node *cbe_get_be_node(int cpu_id)
cpu_handle = of_get_property(np, "cpus", &len);
/*
* the CAB SLOF tree is non compliant, so we just assume
* there is only one node
*/
if (WARN_ON_ONCE(!cpu_handle))
return np;
for (i=0; i<len; i++)
if (of_find_node_by_phandle(cpu_handle[i]) == of_get_cpu_node(cpu_id, NULL))
return np;
......
......@@ -292,7 +292,7 @@ static struct attribute_group ppe_attribute_group = {
/*
* initialize throttling with default values
*/
static void __init init_default_values(void)
static int __init init_default_values(void)
{
int cpu;
struct cbe_pmd_regs __iomem *pmd_regs;
......@@ -339,25 +339,40 @@ static void __init init_default_values(void)
for_each_possible_cpu (cpu) {
pr_debug("processing cpu %d\n", cpu);
sysdev = get_cpu_sysdev(cpu);
if (!sysdev) {
pr_info("invalid sysdev pointer for cbe_thermal\n");
return -EINVAL;
}
pmd_regs = cbe_get_cpu_pmd_regs(sysdev->id);
if (!pmd_regs) {
pr_info("invalid CBE regs pointer for cbe_thermal\n");
return -EINVAL;
}
out_be64(&pmd_regs->tm_str2, str2);
out_be64(&pmd_regs->tm_str1.val, str1.val);
out_be64(&pmd_regs->tm_tpr.val, tpr.val);
out_be64(&pmd_regs->tm_cr1.val, cr1.val);
out_be64(&pmd_regs->tm_cr2, cr2);
}
return 0;
}
static int __init thermal_init(void)
{
init_default_values();
int rc = init_default_values();
spu_add_sysdev_attr_group(&spu_attribute_group);
cpu_add_sysdev_attr_group(&ppe_attribute_group);
if (rc == 0) {
spu_add_sysdev_attr_group(&spu_attribute_group);
cpu_add_sysdev_attr_group(&ppe_attribute_group);
}
return 0;
return rc;
}
module_init(thermal_init);
......
This diff is collapsed.
......@@ -34,14 +34,27 @@ struct spufs_calls spufs_calls = {
* this file is not used and the syscalls directly enter the fs code */
asmlinkage long sys_spu_create(const char __user *name,
unsigned int flags, mode_t mode)
unsigned int flags, mode_t mode, int neighbor_fd)
{
long ret;
struct module *owner = spufs_calls.owner;
struct file *neighbor;
int fput_needed;
ret = -ENOSYS;
if (owner && try_module_get(owner)) {
ret = spufs_calls.create_thread(name, flags, mode);
if (flags & SPU_CREATE_AFFINITY_SPU) {
neighbor = fget_light(neighbor_fd, &fput_needed);
if (neighbor) {
ret = spufs_calls.create_thread(name, flags,
mode, neighbor);
fput_light(neighbor, fput_needed);
}
}
else {
ret = spufs_calls.create_thread(name, flags,
mode, NULL);
}
module_put(owner);
}
return ret;
......
......@@ -22,6 +22,7 @@
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <asm/atomic.h>
#include <asm/spu.h>
......@@ -55,12 +56,12 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang)
ctx->ops = &spu_backing_ops;
ctx->owner = get_task_mm(current);
INIT_LIST_HEAD(&ctx->rq);
INIT_LIST_HEAD(&ctx->aff_list);
if (gang)
spu_gang_add_ctx(gang, ctx);
ctx->cpus_allowed = current->cpus_allowed;
spu_set_timeslice(ctx);
ctx->stats.execution_state = SPUCTX_UTIL_USER;
ctx->stats.tstamp = jiffies;
ctx->stats.util_state = SPU_UTIL_IDLE_LOADED;
atomic_inc(&nr_spu_contexts);
goto out;
......@@ -81,6 +82,8 @@ void destroy_spu_context(struct kref *kref)
spu_fini_csa(&ctx->csa);
if (ctx->gang)
spu_gang_remove_ctx(ctx->gang, ctx);
if (ctx->prof_priv_kref)
kref_put(ctx->prof_priv_kref, ctx->prof_priv_release);
BUG_ON(!list_empty(&ctx->rq));
atomic_dec(&nr_spu_contexts);
kfree(ctx);
......@@ -166,6 +169,39 @@ int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags)
void spu_acquire_saved(struct spu_context *ctx)
{
spu_acquire(ctx);
if (ctx->state != SPU_STATE_SAVED)
if (ctx->state != SPU_STATE_SAVED) {
set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags);
spu_deactivate(ctx);
}
}
/**
* spu_release_saved - unlock spu context and return it to the runqueue
* @ctx: context to unlock
*/
void spu_release_saved(struct spu_context *ctx)
{
BUG_ON(ctx->state != SPU_STATE_SAVED);
if (test_and_clear_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags))
spu_activate(ctx, 0);
spu_release(ctx);
}
void spu_set_profile_private_kref(struct spu_context *ctx,
struct kref *prof_info_kref,
void ( * prof_info_release) (struct kref *kref))
{
ctx->prof_priv_kref = prof_info_kref;
ctx->prof_priv_release = prof_info_release;
}
EXPORT_SYMBOL_GPL(spu_set_profile_private_kref);
void *spu_get_profile_private_kref(struct spu_context *ctx)
{
return ctx->prof_priv_kref;
}
EXPORT_SYMBOL_GPL(spu_get_profile_private_kref);
......@@ -226,7 +226,7 @@ static void spufs_arch_write_notes(struct file *file)
spu_acquire_saved(ctx_info->ctx);
for (j = 0; j < spufs_coredump_num_notes; j++)
spufs_arch_write_note(ctx_info, j, file);
spu_release(ctx_info->ctx);
spu_release_saved(ctx_info->ctx);
list_del(&ctx_info->list);
kfree(ctx_info);
}
......
......@@ -179,16 +179,14 @@ int spufs_handle_class1(struct spu_context *ctx)
if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)))
return 0;
spuctx_switch_state(ctx, SPUCTX_UTIL_IOWAIT);
spuctx_switch_state(ctx, SPU_UTIL_IOWAIT);
pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea,
dsisr, ctx->state);
ctx->stats.hash_flt++;
if (ctx->state == SPU_STATE_RUNNABLE) {
if (ctx->state == SPU_STATE_RUNNABLE)
ctx->spu->stats.hash_flt++;
spu_switch_state(ctx->spu, SPU_UTIL_IOWAIT);
}
/* we must not hold the lock when entering spu_handle_mm_fault */
spu_release(ctx);
......@@ -226,7 +224,7 @@ int spufs_handle_class1(struct spu_context *ctx)
} else
spufs_handle_dma_error(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE);
spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM);
spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
return ret;
}
EXPORT_SYMBOL_GPL(spufs_handle_class1);
......@@ -370,7 +370,7 @@ spufs_regs_read(struct file *file, char __user *buffer,
spu_acquire_saved(ctx);
ret = __spufs_regs_read(ctx, buffer, size, pos);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -392,7 +392,7 @@ spufs_regs_write(struct file *file, const char __user *buffer,
ret = copy_from_user(lscsa->gprs + *pos - size,
buffer, size) ? -EFAULT : size;
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -421,7 +421,7 @@ spufs_fpcr_read(struct file *file, char __user * buffer,
spu_acquire_saved(ctx);
ret = __spufs_fpcr_read(ctx, buffer, size, pos);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -443,7 +443,7 @@ spufs_fpcr_write(struct file *file, const char __user * buffer,
ret = copy_from_user((char *)&lscsa->fpcr + *pos - size,
buffer, size) ? -EFAULT : size;
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -868,7 +868,7 @@ static ssize_t spufs_signal1_read(struct file *file, char __user *buf,
spu_acquire_saved(ctx);
ret = __spufs_signal1_read(ctx, buf, len, pos);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -934,6 +934,13 @@ static const struct file_operations spufs_signal1_fops = {
.mmap = spufs_signal1_mmap,
};
static const struct file_operations spufs_signal1_nosched_fops = {
.open = spufs_signal1_open,
.release = spufs_signal1_release,
.write = spufs_signal1_write,
.mmap = spufs_signal1_mmap,
};
static int spufs_signal2_open(struct inode *inode, struct file *file)
{
struct spufs_inode_info *i = SPUFS_I(inode);
......@@ -992,7 +999,7 @@ static ssize_t spufs_signal2_read(struct file *file, char __user *buf,
spu_acquire_saved(ctx);
ret = __spufs_signal2_read(ctx, buf, len, pos);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -1062,6 +1069,13 @@ static const struct file_operations spufs_signal2_fops = {
.mmap = spufs_signal2_mmap,
};
static const struct file_operations spufs_signal2_nosched_fops = {
.open = spufs_signal2_open,
.release = spufs_signal2_release,
.write = spufs_signal2_write,
.mmap = spufs_signal2_mmap,
};
static void spufs_signal1_type_set(void *data, u64 val)
{
struct spu_context *ctx = data;
......@@ -1612,7 +1626,7 @@ static void spufs_decr_set(void *data, u64 val)
struct spu_lscsa *lscsa = ctx->csa.lscsa;
spu_acquire_saved(ctx);
lscsa->decr.slot[0] = (u32) val;
spu_release(ctx);
spu_release_saved(ctx);
}
static u64 __spufs_decr_get(void *data)
......@@ -1628,7 +1642,7 @@ static u64 spufs_decr_get(void *data)
u64 ret;
spu_acquire_saved(ctx);
ret = __spufs_decr_get(data);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set,
......@@ -1637,17 +1651,21 @@ DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set,
static void spufs_decr_status_set(void *data, u64 val)
{
struct spu_context *ctx = data;
struct spu_lscsa *lscsa = ctx->csa.lscsa;
spu_acquire_saved(ctx);
lscsa->decr_status.slot[0] = (u32) val;
spu_release(ctx);
if (val)
ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING;
else
ctx->csa.priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING;
spu_release_saved(ctx);
}
static u64 __spufs_decr_status_get(void *data)
{
struct spu_context *ctx = data;
struct spu_lscsa *lscsa = ctx->csa.lscsa;
return lscsa->decr_status.slot[0];
if (ctx->csa.priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING)
return SPU_DECR_STATUS_RUNNING;
else
return 0;
}
static u64 spufs_decr_status_get(void *data)
......@@ -1656,7 +1674,7 @@ static u64 spufs_decr_status_get(void *data)
u64 ret;
spu_acquire_saved(ctx);
ret = __spufs_decr_status_get(data);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get,
......@@ -1668,7 +1686,7 @@ static void spufs_event_mask_set(void *data, u64 val)
struct spu_lscsa *lscsa = ctx->csa.lscsa;
spu_acquire_saved(ctx);
lscsa->event_mask.slot[0] = (u32) val;
spu_release(ctx);
spu_release_saved(ctx);
}
static u64 __spufs_event_mask_get(void *data)
......@@ -1684,7 +1702,7 @@ static u64 spufs_event_mask_get(void *data)
u64 ret;
spu_acquire_saved(ctx);
ret = __spufs_event_mask_get(data);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
DEFINE_SIMPLE_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get,
......@@ -1708,7 +1726,7 @@ static u64 spufs_event_status_get(void *data)
spu_acquire_saved(ctx);
ret = __spufs_event_status_get(data);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
DEFINE_SIMPLE_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get,
......@@ -1720,7 +1738,7 @@ static void spufs_srr0_set(void *data, u64 val)
struct spu_lscsa *lscsa = ctx->csa.lscsa;
spu_acquire_saved(ctx);
lscsa->srr0.slot[0] = (u32) val;
spu_release(ctx);
spu_release_saved(ctx);
}
static u64 spufs_srr0_get(void *data)
......@@ -1730,7 +1748,7 @@ static u64 spufs_srr0_get(void *data)
u64 ret;
spu_acquire_saved(ctx);
ret = lscsa->srr0.slot[0];
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
DEFINE_SIMPLE_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set,
......@@ -1786,7 +1804,7 @@ static u64 spufs_lslr_get(void *data)
spu_acquire_saved(ctx);
ret = __spufs_lslr_get(data);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -1850,7 +1868,7 @@ static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf,
spin_lock(&ctx->csa.register_lock);
ret = __spufs_mbox_info_read(ctx, buf, len, pos);
spin_unlock(&ctx->csa.register_lock);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -1888,7 +1906,7 @@ static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf,
spin_lock(&ctx->csa.register_lock);
ret = __spufs_ibox_info_read(ctx, buf, len, pos);
spin_unlock(&ctx->csa.register_lock);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -1929,7 +1947,7 @@ static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf,
spin_lock(&ctx->csa.register_lock);
ret = __spufs_wbox_info_read(ctx, buf, len, pos);
spin_unlock(&ctx->csa.register_lock);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -1979,7 +1997,7 @@ static ssize_t spufs_dma_info_read(struct file *file, char __user *buf,
spin_lock(&ctx->csa.register_lock);
ret = __spufs_dma_info_read(ctx, buf, len, pos);
spin_unlock(&ctx->csa.register_lock);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -2030,7 +2048,7 @@ static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf,
spin_lock(&ctx->csa.register_lock);
ret = __spufs_proxydma_info_read(ctx, buf, len, pos);
spin_unlock(&ctx->csa.register_lock);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -2065,14 +2083,26 @@ static const char *ctx_state_names[] = {
};
static unsigned long long spufs_acct_time(struct spu_context *ctx,
enum spuctx_execution_state state)
enum spu_utilization_state state)
{
unsigned long time = ctx->stats.times[state];
struct timespec ts;
unsigned long long time = ctx->stats.times[state];
if (ctx->stats.execution_state == state)
time += jiffies - ctx->stats.tstamp;
/*
* In general, utilization statistics are updated by the controlling
* thread as the spu context moves through various well defined
* state transitions, but if the context is lazily loaded its
* utilization statistics are not updated as the controlling thread
* is not tightly coupled with the execution of the spu context. We
* calculate and apply the time delta from the last recorded state
* of the spu context.
*/
if (ctx->spu && ctx->stats.util_state == state) {
ktime_get_ts(&ts);
time += timespec_to_ns(&ts) - ctx->stats.tstamp;
}
return jiffies_to_msecs(time);
return time / NSEC_PER_MSEC;
}
static unsigned long long spufs_slb_flts(struct spu_context *ctx)
......@@ -2107,11 +2137,11 @@ static int spufs_show_stat(struct seq_file *s, void *private)
spu_acquire(ctx);
seq_printf(s, "%s %llu %llu %llu %llu "
"%llu %llu %llu %llu %llu %llu %llu %llu\n",
ctx_state_names[ctx->stats.execution_state],
spufs_acct_time(ctx, SPUCTX_UTIL_USER),
spufs_acct_time(ctx, SPUCTX_UTIL_SYSTEM),
spufs_acct_time(ctx, SPUCTX_UTIL_IOWAIT),
spufs_acct_time(ctx, SPUCTX_UTIL_LOADED),
ctx_state_names[ctx->stats.util_state],
spufs_acct_time(ctx, SPU_UTIL_USER),
spufs_acct_time(ctx, SPU_UTIL_SYSTEM),
spufs_acct_time(ctx, SPU_UTIL_IOWAIT),
spufs_acct_time(ctx, SPU_UTIL_IDLE_LOADED),
ctx->stats.vol_ctx_switch,
ctx->stats.invol_ctx_switch,
spufs_slb_flts(ctx),
......@@ -2184,8 +2214,8 @@ struct tree_descr spufs_dir_nosched_contents[] = {
{ "mbox_stat", &spufs_mbox_stat_fops, 0444, },
{ "ibox_stat", &spufs_ibox_stat_fops, 0444, },
{ "wbox_stat", &spufs_wbox_stat_fops, 0444, },
{ "signal1", &spufs_signal1_fops, 0666, },
{ "signal2", &spufs_signal2_fops, 0666, },
{ "signal1", &spufs_signal1_nosched_fops, 0222, },
{ "signal2", &spufs_signal2_nosched_fops, 0222, },
{ "signal1_type", &spufs_signal1_type, 0666, },
{ "signal2_type", &spufs_signal2_type, 0666, },
{ "mss", &spufs_mss_fops, 0666, },
......
......@@ -35,7 +35,9 @@ struct spu_gang *alloc_spu_gang(void)
kref_init(&gang->kref);
mutex_init(&gang->mutex);
mutex_init(&gang->aff_mutex);
INIT_LIST_HEAD(&gang->list);
INIT_LIST_HEAD(&gang->aff_list_head);
out:
return gang;
......@@ -73,6 +75,10 @@ void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx)
{
mutex_lock(&gang->mutex);
WARN_ON(ctx->gang != gang);
if (!list_empty(&ctx->aff_list)) {
list_del_init(&ctx->aff_list);
gang->aff_flags &= ~AFF_OFFSETS_SET;
}
list_del_init(&ctx->gang_list);
gang->contexts--;
mutex_unlock(&gang->mutex);
......
......@@ -316,11 +316,107 @@ static int spufs_context_open(struct dentry *dentry, struct vfsmount *mnt)
return ret;
}
static int spufs_create_context(struct inode *inode,
struct dentry *dentry,
struct vfsmount *mnt, int flags, int mode)
static struct spu_context *
spufs_assert_affinity(unsigned int flags, struct spu_gang *gang,
struct file *filp)
{
struct spu_context *tmp, *neighbor;
int count, node;
int aff_supp;
aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next,
struct spu, cbe_list))->aff_list);
if (!aff_supp)
return ERR_PTR(-EINVAL);
if (flags & SPU_CREATE_GANG)
return ERR_PTR(-EINVAL);
if (flags & SPU_CREATE_AFFINITY_MEM &&
gang->aff_ref_ctx &&
gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM)
return ERR_PTR(-EEXIST);
if (gang->aff_flags & AFF_MERGED)
return ERR_PTR(-EBUSY);
neighbor = NULL;
if (flags & SPU_CREATE_AFFINITY_SPU) {
if (!filp || filp->f_op != &spufs_context_fops)
return ERR_PTR(-EINVAL);
neighbor = get_spu_context(
SPUFS_I(filp->f_dentry->d_inode)->i_ctx);
if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) &&
!list_is_last(&neighbor->aff_list, &gang->aff_list_head) &&
!list_entry(neighbor->aff_list.next, struct spu_context,
aff_list)->aff_head)
return ERR_PTR(-EEXIST);
if (gang != neighbor->gang)
return ERR_PTR(-EINVAL);
count = 1;
list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
count++;
if (list_empty(&neighbor->aff_list))
count++;
for (node = 0; node < MAX_NUMNODES; node++) {
if ((cbe_spu_info[node].n_spus - atomic_read(
&cbe_spu_info[node].reserved_spus)) >= count)
break;
}
if (node == MAX_NUMNODES)
return ERR_PTR(-EEXIST);
}
return neighbor;
}
static void
spufs_set_affinity(unsigned int flags, struct spu_context *ctx,
struct spu_context *neighbor)
{
if (flags & SPU_CREATE_AFFINITY_MEM)
ctx->gang->aff_ref_ctx = ctx;
if (flags & SPU_CREATE_AFFINITY_SPU) {
if (list_empty(&neighbor->aff_list)) {
list_add_tail(&neighbor->aff_list,
&ctx->gang->aff_list_head);
neighbor->aff_head = 1;
}
if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head)
|| list_entry(neighbor->aff_list.next, struct spu_context,
aff_list)->aff_head) {
list_add(&ctx->aff_list, &neighbor->aff_list);
} else {
list_add_tail(&ctx->aff_list, &neighbor->aff_list);
if (neighbor->aff_head) {
neighbor->aff_head = 0;
ctx->aff_head = 1;
}
}
if (!ctx->gang->aff_ref_ctx)
ctx->gang->aff_ref_ctx = ctx;
}
}
static int
spufs_create_context(struct inode *inode, struct dentry *dentry,
struct vfsmount *mnt, int flags, int mode,
struct file *aff_filp)
{
int ret;
int affinity;
struct spu_gang *gang;
struct spu_context *neighbor;
ret = -EPERM;
if ((flags & SPU_CREATE_NOSCHED) &&
......@@ -336,9 +432,29 @@ static int spufs_create_context(struct inode *inode,
if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader)
goto out_unlock;
gang = NULL;
neighbor = NULL;
affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU);
if (affinity) {
gang = SPUFS_I(inode)->i_gang;
ret = -EINVAL;
if (!gang)
goto out_unlock;
mutex_lock(&gang->aff_mutex);
neighbor = spufs_assert_affinity(flags, gang, aff_filp);
if (IS_ERR(neighbor)) {
ret = PTR_ERR(neighbor);
goto out_aff_unlock;
}
}
ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO);
if (ret)
goto out_unlock;
goto out_aff_unlock;
if (affinity)
spufs_set_affinity(flags, SPUFS_I(dentry->d_inode)->i_ctx,
neighbor);
/*
* get references for dget and mntget, will be released
......@@ -352,6 +468,9 @@ static int spufs_create_context(struct inode *inode,
goto out;
}
out_aff_unlock:
if (affinity)
mutex_unlock(&gang->aff_mutex);
out_unlock:
mutex_unlock(&inode->i_mutex);
out:
......@@ -450,7 +569,8 @@ static int spufs_create_gang(struct inode *inode,
static struct file_system_type spufs_type;
long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode)
long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode,
struct file *filp)
{
struct dentry *dentry;
int ret;
......@@ -487,7 +607,7 @@ long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode)
dentry, nd->mnt, mode);
else
return spufs_create_context(nd->dentry->d_inode,
dentry, nd->mnt, flags, mode);
dentry, nd->mnt, flags, mode, filp);
out_dput:
dput(dentry);
......
......@@ -18,15 +18,17 @@ void spufs_stop_callback(struct spu *spu)
wake_up_all(&ctx->stop_wq);
}
static inline int spu_stopped(struct spu_context *ctx, u32 * stat)
static inline int spu_stopped(struct spu_context *ctx, u32 *stat)
{
struct spu *spu;
u64 pte_fault;
*stat = ctx->ops->status_read(ctx);
if (ctx->state != SPU_STATE_RUNNABLE)
return 1;
spu = ctx->spu;
if (ctx->state != SPU_STATE_RUNNABLE ||
test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags))
return 1;
pte_fault = spu->dsisr &
(MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED);
return (!(*stat & SPU_STATUS_RUNNING) || pte_fault || spu->class_0_pending) ?
......@@ -124,8 +126,10 @@ static int spu_setup_isolated(struct spu_context *ctx)
return ret;
}
static int spu_run_init(struct spu_context *ctx, u32 * npc)
static int spu_run_init(struct spu_context *ctx, u32 *npc)
{
spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
if (ctx->flags & SPU_CREATE_ISOLATE) {
unsigned long runcntl;
......@@ -151,16 +155,20 @@ static int spu_run_init(struct spu_context *ctx, u32 * npc)
ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
}
spuctx_switch_state(ctx, SPU_UTIL_USER);
return 0;
}
static int spu_run_fini(struct spu_context *ctx, u32 * npc,
u32 * status)
static int spu_run_fini(struct spu_context *ctx, u32 *npc,
u32 *status)
{
int ret = 0;
*status = ctx->ops->status_read(ctx);
*npc = ctx->ops->npc_read(ctx);
spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
spu_release(ctx);
if (signal_pending(current))
......@@ -289,10 +297,10 @@ static inline int spu_process_events(struct spu_context *ctx)
return ret;
}
long spufs_run_spu(struct file *file, struct spu_context *ctx,
u32 *npc, u32 *event)
long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event)
{
int ret;
struct spu *spu;
u32 status;
if (mutex_lock_interruptible(&ctx->run_mutex))
......@@ -328,6 +336,17 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx,
ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status));
if (unlikely(ret))
break;
spu = ctx->spu;
if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE,
&ctx->sched_flags))) {
if (!(status & SPU_STATUS_STOPPED_BY_STOP)) {
spu_switch_notify(spu, ctx);
continue;
}
}
spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
(status >> SPU_STOP_STATUS_SHIFT == 0x2104)) {
ret = spu_process_callback(ctx);
......@@ -356,6 +375,7 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx,
(ctx->state == SPU_STATE_RUNNABLE))
ctx->stats.libassist++;
ctx->ops->master_stop(ctx);
ret = spu_run_fini(ctx, npc, &status);
spu_yield(ctx);
......
This diff is collapsed.
......@@ -84,13 +84,13 @@ static inline void restore_decr(void)
unsigned int decr_running;
unsigned int decr;
/* Restore, Step 6:
/* Restore, Step 6(moved):
* If the LSCSA "decrementer running" flag is set
* then write the SPU_WrDec channel with the
* decrementer value from LSCSA.
*/
offset = LSCSA_QW_OFFSET(decr_status);
decr_running = regs_spill[offset].slot[0];
decr_running = regs_spill[offset].slot[0] & SPU_DECR_STATUS_RUNNING;
if (decr_running) {
offset = LSCSA_QW_OFFSET(decr);
decr = regs_spill[offset].slot[0];
......@@ -318,10 +318,10 @@ int main()
build_dma_list(lscsa_ea); /* Step 3. */
restore_upper_240kb(lscsa_ea); /* Step 4. */
/* Step 5: done by 'exit'. */
restore_decr(); /* Step 6. */
enqueue_putllc(lscsa_ea); /* Step 7. */
set_tag_update(); /* Step 8. */
read_tag_status(); /* Step 9. */
restore_decr(); /* moved Step 6. */
read_llar_status(); /* Step 10. */
write_ppu_mb(); /* Step 11. */
write_ppuint_mb(); /* Step 12. */
......
......@@ -40,17 +40,13 @@ enum {
struct spu_context_ops;
struct spu_gang;
/*
* This is the state for spu utilization reporting to userspace.
* Because this state is visible to userspace it must never change and needs
* to be kept strictly separate from any internal state kept by the kernel.
*/
enum spuctx_execution_state {
SPUCTX_UTIL_USER = 0,
SPUCTX_UTIL_SYSTEM,
SPUCTX_UTIL_IOWAIT,
SPUCTX_UTIL_LOADED,
SPUCTX_UTIL_MAX
enum {
SPU_SCHED_WAS_ACTIVE, /* was active upon spu_acquire_saved() */
};
/* ctx->sched_flags */
enum {
SPU_SCHED_NOTIFY_ACTIVE,
};
struct spu_context {
......@@ -89,6 +85,8 @@ struct spu_context {
struct list_head gang_list;
struct spu_gang *gang;
struct kref *prof_priv_kref;
void ( * prof_priv_release) (struct kref *kref);
/* owner thread */
pid_t tid;
......@@ -104,9 +102,9 @@ struct spu_context {
/* statistics */
struct {
/* updates protected by ctx->state_mutex */
enum spuctx_execution_state execution_state;
unsigned long tstamp; /* time of last ctx switch */
unsigned long times[SPUCTX_UTIL_MAX];
enum spu_utilization_state util_state;
unsigned long long tstamp; /* time of last state switch */
unsigned long long times[SPU_UTIL_MAX];
unsigned long long vol_ctx_switch;
unsigned long long invol_ctx_switch;
unsigned long long min_flt;
......@@ -118,6 +116,10 @@ struct spu_context {
unsigned long long class2_intr_base; /* # at last ctx switch */
unsigned long long libassist;
} stats;
struct list_head aff_list;
int aff_head;
int aff_offset;
};
struct spu_gang {
......@@ -125,8 +127,19 @@ struct spu_gang {
struct mutex mutex;
struct kref kref;
int contexts;
struct spu_context *aff_ref_ctx;
struct list_head aff_list_head;
struct mutex aff_mutex;
int aff_flags;
struct spu *aff_ref_spu;
atomic_t aff_sched_count;
};
/* Flag bits for spu_gang aff_flags */
#define AFF_OFFSETS_SET 1
#define AFF_MERGED 2
struct mfc_dma_command {
int32_t pad; /* reserved */
uint32_t lsa; /* local storage address */
......@@ -190,10 +203,9 @@ extern struct tree_descr spufs_dir_contents[];
extern struct tree_descr spufs_dir_nosched_contents[];
/* system call implementation */
long spufs_run_spu(struct file *file,
struct spu_context *ctx, u32 *npc, u32 *status);
long spufs_create(struct nameidata *nd,
unsigned int flags, mode_t mode);
long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status);
long spufs_create(struct nameidata *nd, unsigned int flags,
mode_t mode, struct file *filp);
extern const struct file_operations spufs_context_fops;
/* gang management */
......@@ -206,6 +218,9 @@ void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx);
/* fault handling */
int spufs_handle_class1(struct spu_context *ctx);
/* affinity */
struct spu *affinity_check(struct spu_context *ctx);
/* context management */
extern atomic_t nr_spu_contexts;
static inline void spu_acquire(struct spu_context *ctx)
......@@ -227,15 +242,17 @@ void spu_unmap_mappings(struct spu_context *ctx);
void spu_forget(struct spu_context *ctx);
int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags);
void spu_acquire_saved(struct spu_context *ctx);
void spu_release_saved(struct spu_context *ctx);
int spu_activate(struct spu_context *ctx, unsigned long flags);
void spu_deactivate(struct spu_context *ctx);
void spu_yield(struct spu_context *ctx);
void spu_switch_notify(struct spu *spu, struct spu_context *ctx);
void spu_set_timeslice(struct spu_context *ctx);
void spu_update_sched_info(struct spu_context *ctx);
void __spu_update_sched_info(struct spu_context *ctx);
int __init spu_sched_init(void);
void __exit spu_sched_exit(void);
void spu_sched_exit(void);
extern char *isolated_loader;
......@@ -293,30 +310,34 @@ extern int spufs_coredump_num_notes;
* line.
*/
static inline void spuctx_switch_state(struct spu_context *ctx,
enum spuctx_execution_state new_state)
enum spu_utilization_state new_state)
{
WARN_ON(!mutex_is_locked(&ctx->state_mutex));
if (ctx->stats.execution_state != new_state) {
unsigned long curtime = jiffies;
ctx->stats.times[ctx->stats.execution_state] +=
curtime - ctx->stats.tstamp;
ctx->stats.tstamp = curtime;
ctx->stats.execution_state = new_state;
}
}
unsigned long long curtime;
signed long long delta;
struct timespec ts;
struct spu *spu;
enum spu_utilization_state old_state;
static inline void spu_switch_state(struct spu *spu,
enum spuctx_execution_state new_state)
{
if (spu->stats.utilization_state != new_state) {
unsigned long curtime = jiffies;
ktime_get_ts(&ts);
curtime = timespec_to_ns(&ts);
delta = curtime - ctx->stats.tstamp;
spu->stats.times[spu->stats.utilization_state] +=
curtime - spu->stats.tstamp;
WARN_ON(!mutex_is_locked(&ctx->state_mutex));
WARN_ON(delta < 0);
spu = ctx->spu;
old_state = ctx->stats.util_state;
ctx->stats.util_state = new_state;
ctx->stats.tstamp = curtime;
/*
* Update the physical SPU utilization statistics.
*/
if (spu) {
ctx->stats.times[old_state] += delta;
spu->stats.times[old_state] += delta;
spu->stats.util_state = new_state;
spu->stats.tstamp = curtime;
spu->stats.utilization_state = new_state;
}
}
......
This diff is collapsed.
......@@ -47,7 +47,7 @@ static long do_spu_run(struct file *filp,
goto out;
i = SPUFS_I(filp->f_path.dentry->d_inode);
ret = spufs_run_spu(filp, i->i_ctx, &npc, &status);
ret = spufs_run_spu(i->i_ctx, &npc, &status);
if (put_user(npc, unpc))
ret = -EFAULT;
......@@ -76,8 +76,8 @@ asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus)
}
#endif
asmlinkage long sys_spu_create(const char __user *pathname,
unsigned int flags, mode_t mode)
asmlinkage long do_spu_create(const char __user *pathname, unsigned int flags,
mode_t mode, struct file *neighbor)
{
char *tmp;
int ret;
......@@ -90,7 +90,7 @@ asmlinkage long sys_spu_create(const char __user *pathname,
ret = path_lookup(tmp, LOOKUP_PARENT|
LOOKUP_OPEN|LOOKUP_CREATE, &nd);
if (!ret) {
ret = spufs_create(&nd, flags, mode);
ret = spufs_create(&nd, flags, mode, neighbor);
path_release(&nd);
}
putname(tmp);
......@@ -99,8 +99,32 @@ asmlinkage long sys_spu_create(const char __user *pathname,
return ret;
}
#ifndef MODULE
asmlinkage long sys_spu_create(const char __user *pathname, unsigned int flags,
mode_t mode, int neighbor_fd)
{
int fput_needed;
struct file *neighbor;
long ret;
if (flags & SPU_CREATE_AFFINITY_SPU) {
ret = -EBADF;
neighbor = fget_light(neighbor_fd, &fput_needed);
if (neighbor) {
ret = do_spu_create(pathname, flags, mode, neighbor);
fput_light(neighbor, fput_needed);
}
}
else {
ret = do_spu_create(pathname, flags, mode, NULL);
}
return ret;
}
#endif
struct spufs_calls spufs_calls = {
.create_thread = sys_spu_create,
.create_thread = do_spu_create,
.spu_run = do_spu_run,
.owner = THIS_MODULE,
};
......@@ -17,6 +17,7 @@ obj-$(CONFIG_QUICC_ENGINE) += qe_lib/
mv64x60-$(CONFIG_PCI) += mv64x60_pci.o
obj-$(CONFIG_MV64X60) += $(mv64x60-y) mv64x60_pic.o mv64x60_dev.o
obj-$(CONFIG_RTC_DRV_CMOS) += rtc_cmos_setup.o
obj-$(CONFIG_AXON_RAM) += axonram.o
# contains only the suspend handler for time
ifeq ($(CONFIG_RTC_CLASS),)
......
This diff is collapsed.
This diff is collapsed.
......@@ -26,8 +26,9 @@
#include <linux/profile.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/oprofile.h>
#include <linux/sched.h>
#include "oprofile_stats.h"
#include "event_buffer.h"
#include "cpu_buffer.h"
......
......@@ -19,28 +19,10 @@ void free_event_buffer(void);
/* wake up the process sleeping on the event file */
void wake_up_buffer_waiter(void);
/* Each escaped entry is prefixed by ESCAPE_CODE
* then one of the following codes, then the
* relevant data.
*/
#define ESCAPE_CODE ~0UL
#define CTX_SWITCH_CODE 1
#define CPU_SWITCH_CODE 2
#define COOKIE_SWITCH_CODE 3
#define KERNEL_ENTER_SWITCH_CODE 4
#define KERNEL_EXIT_SWITCH_CODE 5
#define MODULE_LOADED_CODE 6
#define CTX_TGID_CODE 7
#define TRACE_BEGIN_CODE 8
#define TRACE_END_CODE 9
#define INVALID_COOKIE ~0UL
#define NO_COOKIE 0UL
/* add data to the event buffer */
void add_event_entry(unsigned long data);
extern const struct file_operations event_buffer_fops;
/* mutex between sync_cpu_buffers() and the
......
This diff is collapsed.
......@@ -39,14 +39,16 @@ struct op_system_config {
/* Per-arch configuration */
struct op_powerpc_model {
void (*reg_setup) (struct op_counter_config *,
int (*reg_setup) (struct op_counter_config *,
struct op_system_config *,
int num_counters);
void (*cpu_setup) (struct op_counter_config *);
void (*start) (struct op_counter_config *);
void (*global_start) (struct op_counter_config *);
int (*cpu_setup) (struct op_counter_config *);
int (*start) (struct op_counter_config *);
int (*global_start) (struct op_counter_config *);
void (*stop) (void);
void (*global_stop) (void);
int (*sync_start)(void);
int (*sync_stop)(void);
void (*handle_interrupt) (struct pt_regs *,
struct op_counter_config *);
int num_counters;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -12,6 +12,7 @@
#ifdef CONFIG_PROFILING
#include <linux/dcache.h>
#include <linux/types.h>
struct dcookie_user;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment