Commit ede13d81 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-2.6.23' of master.kernel.org:/pub/scm/linux/kernel/git/arnd/cell-2.6

* 'for-2.6.23' of master.kernel.org:/pub/scm/linux/kernel/git/arnd/cell-2.6: (37 commits)
  [CELL] spufs: rework list management and associated locking
  [CELL] oprofile: add support to OProfile for profiling CELL BE SPUs
  [CELL] oprofile: enable SPU switch notification to detect currently active SPU tasks
  [CELL] spu_base: locking cleanup
  [CELL] cell: indexing of SPUs based on firmware vicinity properties
  [CELL] spufs: integration of SPE affinity with the scheduller
  [CELL] cell: add placement computation for scheduling of affinity contexts
  [CELL] spufs: extension of spu_create to support affinity definition
  [CELL] cell: add hardcoded spu vicinity information for QS20
  [CELL] cell: add vicinity information on spus
  [CELL] cell: add per BE structure with info about its SPUs
  [CELL] spufs: use find_first_bit() instead of sched_find_first_bit()
  [CELL] spufs: remove unused file argument from spufs_run_spu()
  [CELL] spufs: change decrementer restore timing
  [CELL] spufs: dont halt decrementer at restore step 47
  [CELL] spufs: limit saving MFC_CNTL bits
  [CELL] spufs: fix read and write for decr_status file
  [CELL] spufs: fix decr_status meanings
  [CELL] spufs: remove needless context save/restore code
  [CELL] spufs: fix array size of channel index
  ...
parents 20082208 486acd48
......@@ -1455,7 +1455,8 @@ CONFIG_HAS_DMA=y
# Instrumentation Support
#
CONFIG_PROFILING=y
CONFIG_OPROFILE=y
CONFIG_OPROFILE=m
CONFIG_OPROFILE_CELL=y
# CONFIG_KPROBES is not set
#
......
......@@ -219,6 +219,72 @@ void crash_kexec_secondary(struct pt_regs *regs)
cpus_in_sr = CPU_MASK_NONE;
}
#endif
#ifdef CONFIG_SPU_BASE
#include <asm/spu.h>
#include <asm/spu_priv1.h>
struct crash_spu_info {
struct spu *spu;
u32 saved_spu_runcntl_RW;
u32 saved_spu_status_R;
u32 saved_spu_npc_RW;
u64 saved_mfc_sr1_RW;
u64 saved_mfc_dar;
u64 saved_mfc_dsisr;
};
#define CRASH_NUM_SPUS 16 /* Enough for current hardware */
static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS];
static void crash_kexec_stop_spus(void)
{
struct spu *spu;
int i;
u64 tmp;
for (i = 0; i < CRASH_NUM_SPUS; i++) {
if (!crash_spu_info[i].spu)
continue;
spu = crash_spu_info[i].spu;
crash_spu_info[i].saved_spu_runcntl_RW =
in_be32(&spu->problem->spu_runcntl_RW);
crash_spu_info[i].saved_spu_status_R =
in_be32(&spu->problem->spu_status_R);
crash_spu_info[i].saved_spu_npc_RW =
in_be32(&spu->problem->spu_npc_RW);
crash_spu_info[i].saved_mfc_dar = spu_mfc_dar_get(spu);
crash_spu_info[i].saved_mfc_dsisr = spu_mfc_dsisr_get(spu);
tmp = spu_mfc_sr1_get(spu);
crash_spu_info[i].saved_mfc_sr1_RW = tmp;
tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
spu_mfc_sr1_set(spu, tmp);
__delay(200);
}
}
void crash_register_spus(struct list_head *list)
{
struct spu *spu;
list_for_each_entry(spu, list, full_list) {
if (WARN_ON(spu->number >= CRASH_NUM_SPUS))
continue;
crash_spu_info[spu->number].spu = spu;
}
}
#else
static inline void crash_kexec_stop_spus(void)
{
}
#endif /* CONFIG_SPU_BASE */
void default_machine_crash_shutdown(struct pt_regs *regs)
{
......@@ -254,6 +320,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
crash_save_cpu(regs, crashing_cpu);
crash_kexec_prepare_cpus(crashing_cpu);
cpu_set(crashing_cpu, cpus_in_crash);
crash_kexec_stop_spus();
if (ppc_md.kexec_cpu_down)
ppc_md.kexec_cpu_down(1, 0);
}
......@@ -122,6 +122,7 @@ extern struct timezone sys_tz;
static long timezone_offset;
unsigned long ppc_proc_freq;
EXPORT_SYMBOL(ppc_proc_freq);
unsigned long ppc_tb_freq;
static u64 tb_last_jiffy __cacheline_aligned_in_smp;
......
......@@ -15,3 +15,10 @@ config OPROFILE
If unsure, say N.
config OPROFILE_CELL
bool "OProfile for Cell Broadband Engine"
depends on (SPU_FS = y && OPROFILE = m) || (SPU_FS = y && OPROFILE = y) || (SPU_FS = m && OPROFILE = m)
default y
help
Profiling of Cell BE SPUs requires special support enabled
by this option.
......@@ -11,7 +11,9 @@ DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \
timer_int.o )
oprofile-y := $(DRIVER_OBJS) common.o backtrace.o
oprofile-$(CONFIG_PPC_CELL_NATIVE) += op_model_cell.o
oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \
cell/spu_profiler.o cell/vma_map.o \
cell/spu_task_sync.o
oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o op_model_pa6t.o
oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o
oprofile-$(CONFIG_6xx) += op_model_7450.o
/*
* Cell Broadband Engine OProfile Support
*
* (C) Copyright IBM Corporation 2006
*
* Author: Maynard Johnson <maynardj@us.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#ifndef PR_UTIL_H
#define PR_UTIL_H
#include <linux/cpumask.h>
#include <linux/oprofile.h>
#include <asm/cell-pmu.h>
#include <asm/spu.h>
#include "../../platforms/cell/cbe_regs.h"
/* Defines used for sync_start */
#define SKIP_GENERIC_SYNC 0
#define SYNC_START_ERROR -1
#define DO_GENERIC_SYNC 1
struct spu_overlay_info { /* map of sections within an SPU overlay */
unsigned int vma; /* SPU virtual memory address from elf */
unsigned int size; /* size of section from elf */
unsigned int offset; /* offset of section into elf file */
unsigned int buf;
};
struct vma_to_fileoffset_map { /* map of sections within an SPU program */
struct vma_to_fileoffset_map *next; /* list pointer */
unsigned int vma; /* SPU virtual memory address from elf */
unsigned int size; /* size of section from elf */
unsigned int offset; /* offset of section into elf file */
unsigned int guard_ptr;
unsigned int guard_val;
/*
* The guard pointer is an entry in the _ovly_buf_table,
* computed using ovly.buf as the index into the table. Since
* ovly.buf values begin at '1' to reference the first (or 0th)
* entry in the _ovly_buf_table, the computation subtracts 1
* from ovly.buf.
* The guard value is stored in the _ovly_buf_table entry and
* is an index (starting at 1) back to the _ovly_table entry
* that is pointing at this _ovly_buf_table entry. So, for
* example, for an overlay scenario with one overlay segment
* and two overlay sections:
* - Section 1 points to the first entry of the
* _ovly_buf_table, which contains a guard value
* of '1', referencing the first (index=0) entry of
* _ovly_table.
* - Section 2 points to the second entry of the
* _ovly_buf_table, which contains a guard value
* of '2', referencing the second (index=1) entry of
* _ovly_table.
*/
};
/* The three functions below are for maintaining and accessing
* the vma-to-fileoffset map.
*/
struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu,
u64 objectid);
unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map,
unsigned int vma, const struct spu *aSpu,
int *grd_val);
void vma_map_free(struct vma_to_fileoffset_map *map);
/*
* Entry point for SPU profiling.
* cycles_reset is the SPU_CYCLES count value specified by the user.
*/
int start_spu_profiling(unsigned int cycles_reset);
void stop_spu_profiling(void);
/* add the necessary profiling hooks */
int spu_sync_start(void);
/* remove the hooks */
int spu_sync_stop(void);
/* Record SPU program counter samples to the oprofile event buffer. */
void spu_sync_buffer(int spu_num, unsigned int *samples,
int num_samples);
void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset);
#endif /* PR_UTIL_H */
/*
* Cell Broadband Engine OProfile Support
*
* (C) Copyright IBM Corporation 2006
*
* Authors: Maynard Johnson <maynardj@us.ibm.com>
* Carl Love <carll@us.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/hrtimer.h>
#include <linux/smp.h>
#include <linux/slab.h>
#include <asm/cell-pmu.h>
#include "pr_util.h"
#define TRACE_ARRAY_SIZE 1024
#define SCALE_SHIFT 14
static u32 *samples;
static int spu_prof_running;
static unsigned int profiling_interval;
#define NUM_SPU_BITS_TRBUF 16
#define SPUS_PER_TB_ENTRY 4
#define SPUS_PER_NODE 8
#define SPU_PC_MASK 0xFFFF
static DEFINE_SPINLOCK(sample_array_lock);
unsigned long sample_array_lock_flags;
void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset)
{
unsigned long ns_per_cyc;
if (!freq_khz)
freq_khz = ppc_proc_freq/1000;
/* To calculate a timeout in nanoseconds, the basic
* formula is ns = cycles_reset * (NSEC_PER_SEC / cpu frequency).
* To avoid floating point math, we use the scale math
* technique as described in linux/jiffies.h. We use
* a scale factor of SCALE_SHIFT, which provides 4 decimal places
* of precision. This is close enough for the purpose at hand.
*
* The value of the timeout should be small enough that the hw
* trace buffer will not get more then about 1/3 full for the
* maximum user specified (the LFSR value) hw sampling frequency.
* This is to ensure the trace buffer will never fill even if the
* kernel thread scheduling varies under a heavy system load.
*/
ns_per_cyc = (USEC_PER_SEC << SCALE_SHIFT)/freq_khz;
profiling_interval = (ns_per_cyc * cycles_reset) >> SCALE_SHIFT;
}
/*
* Extract SPU PC from trace buffer entry
*/
static void spu_pc_extract(int cpu, int entry)
{
/* the trace buffer is 128 bits */
u64 trace_buffer[2];
u64 spu_mask;
int spu;
spu_mask = SPU_PC_MASK;
/* Each SPU PC is 16 bits; hence, four spus in each of
* the two 64-bit buffer entries that make up the
* 128-bit trace_buffer entry. Process two 64-bit values
* simultaneously.
* trace[0] SPU PC contents are: 0 1 2 3
* trace[1] SPU PC contents are: 4 5 6 7
*/
cbe_read_trace_buffer(cpu, trace_buffer);
for (spu = SPUS_PER_TB_ENTRY-1; spu >= 0; spu--) {
/* spu PC trace entry is upper 16 bits of the
* 18 bit SPU program counter
*/
samples[spu * TRACE_ARRAY_SIZE + entry]
= (spu_mask & trace_buffer[0]) << 2;
samples[(spu + SPUS_PER_TB_ENTRY) * TRACE_ARRAY_SIZE + entry]
= (spu_mask & trace_buffer[1]) << 2;
trace_buffer[0] = trace_buffer[0] >> NUM_SPU_BITS_TRBUF;
trace_buffer[1] = trace_buffer[1] >> NUM_SPU_BITS_TRBUF;
}
}
static int cell_spu_pc_collection(int cpu)
{
u32 trace_addr;
int entry;
/* process the collected SPU PC for the node */
entry = 0;
trace_addr = cbe_read_pm(cpu, trace_address);
while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) {
/* there is data in the trace buffer to process */
spu_pc_extract(cpu, entry);
entry++;
if (entry >= TRACE_ARRAY_SIZE)
/* spu_samples is full */
break;
trace_addr = cbe_read_pm(cpu, trace_address);
}
return entry;
}
static enum hrtimer_restart profile_spus(struct hrtimer *timer)
{
ktime_t kt;
int cpu, node, k, num_samples, spu_num;
if (!spu_prof_running)
goto stop;
for_each_online_cpu(cpu) {
if (cbe_get_hw_thread_id(cpu))
continue;
node = cbe_cpu_to_node(cpu);
/* There should only be one kernel thread at a time processing
* the samples. In the very unlikely case that the processing
* is taking a very long time and multiple kernel threads are
* started to process the samples. Make sure only one kernel
* thread is working on the samples array at a time. The
* sample array must be loaded and then processed for a given
* cpu. The sample array is not per cpu.
*/
spin_lock_irqsave(&sample_array_lock,
sample_array_lock_flags);
num_samples = cell_spu_pc_collection(cpu);
if (num_samples == 0) {
spin_unlock_irqrestore(&sample_array_lock,
sample_array_lock_flags);
continue;
}
for (k = 0; k < SPUS_PER_NODE; k++) {
spu_num = k + (node * SPUS_PER_NODE);
spu_sync_buffer(spu_num,
samples + (k * TRACE_ARRAY_SIZE),
num_samples);
}
spin_unlock_irqrestore(&sample_array_lock,
sample_array_lock_flags);
}
smp_wmb(); /* insure spu event buffer updates are written */
/* don't want events intermingled... */
kt = ktime_set(0, profiling_interval);
if (!spu_prof_running)
goto stop;
hrtimer_forward(timer, timer->base->get_time(), kt);
return HRTIMER_RESTART;
stop:
printk(KERN_INFO "SPU_PROF: spu-prof timer ending\n");
return HRTIMER_NORESTART;
}
static struct hrtimer timer;
/*
* Entry point for SPU profiling.
* NOTE: SPU profiling is done system-wide, not per-CPU.
*
* cycles_reset is the count value specified by the user when
* setting up OProfile to count SPU_CYCLES.
*/
int start_spu_profiling(unsigned int cycles_reset)
{
ktime_t kt;
pr_debug("timer resolution: %lu\n", TICK_NSEC);
kt = ktime_set(0, profiling_interval);
hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
timer.expires = kt;
timer.function = profile_spus;
/* Allocate arrays for collecting SPU PC samples */
samples = kzalloc(SPUS_PER_NODE *
TRACE_ARRAY_SIZE * sizeof(u32), GFP_KERNEL);
if (!samples)
return -ENOMEM;
spu_prof_running = 1;
hrtimer_start(&timer, kt, HRTIMER_MODE_REL);
return 0;
}
void stop_spu_profiling(void)
{
spu_prof_running = 0;
hrtimer_cancel(&timer);
kfree(samples);
pr_debug("SPU_PROF: stop_spu_profiling issued\n");
}
/*
* Cell Broadband Engine OProfile Support
*
* (C) Copyright IBM Corporation 2006
*
* Author: Maynard Johnson <maynardj@us.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
/* The purpose of this file is to handle SPU event task switching
* and to record SPU context information into the OProfile
* event buffer.
*
* Additionally, the spu_sync_buffer function is provided as a helper
* for recoding actual SPU program counter samples to the event buffer.
*/
#include <linux/dcookies.h>
#include <linux/kref.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/notifier.h>
#include <linux/numa.h>
#include <linux/oprofile.h>
#include <linux/spinlock.h>
#include "pr_util.h"
#define RELEASE_ALL 9999
static DEFINE_SPINLOCK(buffer_lock);
static DEFINE_SPINLOCK(cache_lock);
static int num_spu_nodes;
int spu_prof_num_nodes;
int last_guard_val[MAX_NUMNODES * 8];
/* Container for caching information about an active SPU task. */
struct cached_info {
struct vma_to_fileoffset_map *map;
struct spu *the_spu; /* needed to access pointer to local_store */
struct kref cache_ref;
};
static struct cached_info *spu_info[MAX_NUMNODES * 8];
static void destroy_cached_info(struct kref *kref)
{
struct cached_info *info;
info = container_of(kref, struct cached_info, cache_ref);
vma_map_free(info->map);
kfree(info);
module_put(THIS_MODULE);
}
/* Return the cached_info for the passed SPU number.
* ATTENTION: Callers are responsible for obtaining the
* cache_lock if needed prior to invoking this function.
*/
static struct cached_info *get_cached_info(struct spu *the_spu, int spu_num)
{
struct kref *ref;
struct cached_info *ret_info;
if (spu_num >= num_spu_nodes) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: Invalid index %d into spu info cache\n",
__FUNCTION__, __LINE__, spu_num);
ret_info = NULL;
goto out;
}
if (!spu_info[spu_num] && the_spu) {
ref = spu_get_profile_private_kref(the_spu->ctx);
if (ref) {
spu_info[spu_num] = container_of(ref, struct cached_info, cache_ref);
kref_get(&spu_info[spu_num]->cache_ref);
}
}
ret_info = spu_info[spu_num];
out:
return ret_info;
}
/* Looks for cached info for the passed spu. If not found, the
* cached info is created for the passed spu.
* Returns 0 for success; otherwise, -1 for error.
*/
static int
prepare_cached_spu_info(struct spu *spu, unsigned long objectId)
{
unsigned long flags;
struct vma_to_fileoffset_map *new_map;
int retval = 0;
struct cached_info *info;
/* We won't bother getting cache_lock here since
* don't do anything with the cached_info that's returned.
*/
info = get_cached_info(spu, spu->number);
if (info) {
pr_debug("Found cached SPU info.\n");
goto out;
}
/* Create cached_info and set spu_info[spu->number] to point to it.
* spu->number is a system-wide value, not a per-node value.
*/
info = kzalloc(sizeof(struct cached_info), GFP_KERNEL);
if (!info) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: create vma_map failed\n",
__FUNCTION__, __LINE__);
retval = -ENOMEM;
goto err_alloc;
}
new_map = create_vma_map(spu, objectId);
if (!new_map) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: create vma_map failed\n",
__FUNCTION__, __LINE__);
retval = -ENOMEM;
goto err_alloc;
}
pr_debug("Created vma_map\n");
info->map = new_map;
info->the_spu = spu;
kref_init(&info->cache_ref);
spin_lock_irqsave(&cache_lock, flags);
spu_info[spu->number] = info;
/* Increment count before passing off ref to SPUFS. */
kref_get(&info->cache_ref);
/* We increment the module refcount here since SPUFS is
* responsible for the final destruction of the cached_info,
* and it must be able to access the destroy_cached_info()
* function defined in the OProfile module. We decrement
* the module refcount in destroy_cached_info.
*/
try_module_get(THIS_MODULE);
spu_set_profile_private_kref(spu->ctx, &info->cache_ref,
destroy_cached_info);
spin_unlock_irqrestore(&cache_lock, flags);
goto out;
err_alloc:
kfree(info);
out:
return retval;
}
/*
* NOTE: The caller is responsible for locking the
* cache_lock prior to calling this function.
*/
static int release_cached_info(int spu_index)
{
int index, end;
if (spu_index == RELEASE_ALL) {
end = num_spu_nodes;
index = 0;
} else {
if (spu_index >= num_spu_nodes) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: "
"Invalid index %d into spu info cache\n",
__FUNCTION__, __LINE__, spu_index);
goto out;
}
end = spu_index + 1;
index = spu_index;
}
for (; index < end; index++) {
if (spu_info[index]) {
kref_put(&spu_info[index]->cache_ref,
destroy_cached_info);
spu_info[index] = NULL;
}
}
out:
return 0;
}
/* The source code for fast_get_dcookie was "borrowed"
* from drivers/oprofile/buffer_sync.c.
*/
/* Optimisation. We can manage without taking the dcookie sem
* because we cannot reach this code without at least one
* dcookie user still being registered (namely, the reader
* of the event buffer).
*/
static inline unsigned long fast_get_dcookie(struct dentry *dentry,
struct vfsmount *vfsmnt)
{
unsigned long cookie;
if (dentry->d_cookie)
return (unsigned long)dentry;
get_dcookie(dentry, vfsmnt, &cookie);
return cookie;
}
/* Look up the dcookie for the task's first VM_EXECUTABLE mapping,
* which corresponds loosely to "application name". Also, determine
* the offset for the SPU ELF object. If computed offset is
* non-zero, it implies an embedded SPU object; otherwise, it's a
* separate SPU binary, in which case we retrieve it's dcookie.
* For the embedded case, we must determine if SPU ELF is embedded
* in the executable application or another file (i.e., shared lib).
* If embedded in a shared lib, we must get the dcookie and return
* that to the caller.
*/
static unsigned long
get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp,
unsigned long *spu_bin_dcookie,
unsigned long spu_ref)
{
unsigned long app_cookie = 0;
unsigned int my_offset = 0;
struct file *app = NULL;
struct vm_area_struct *vma;
struct mm_struct *mm = spu->mm;
if (!mm)
goto out;
down_read(&mm->mmap_sem);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (!vma->vm_file)
continue;
if (!(vma->vm_flags & VM_EXECUTABLE))
continue;
app_cookie = fast_get_dcookie(vma->vm_file->f_dentry,
vma->vm_file->f_vfsmnt);
pr_debug("got dcookie for %s\n",
vma->vm_file->f_dentry->d_name.name);
app = vma->vm_file;
break;
}
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (vma->vm_start > spu_ref || vma->vm_end <= spu_ref)
continue;
my_offset = spu_ref - vma->vm_start;
if (!vma->vm_file)
goto fail_no_image_cookie;
pr_debug("Found spu ELF at %X(object-id:%lx) for file %s\n",
my_offset, spu_ref,
vma->vm_file->f_dentry->d_name.name);
*offsetp = my_offset;
break;
}
*spu_bin_dcookie = fast_get_dcookie(vma->vm_file->f_dentry,
vma->vm_file->f_vfsmnt);
pr_debug("got dcookie for %s\n", vma->vm_file->f_dentry->d_name.name);
up_read(&mm->mmap_sem);
out:
return app_cookie;
fail_no_image_cookie:
up_read(&mm->mmap_sem);
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: Cannot find dcookie for SPU binary\n",
__FUNCTION__, __LINE__);
goto out;
}
/* This function finds or creates cached context information for the
* passed SPU and records SPU context information into the OProfile
* event buffer.
*/
static int process_context_switch(struct spu *spu, unsigned long objectId)
{
unsigned long flags;
int retval;
unsigned int offset = 0;
unsigned long spu_cookie = 0, app_dcookie;
retval = prepare_cached_spu_info(spu, objectId);
if (retval)
goto out;
/* Get dcookie first because a mutex_lock is taken in that
* code path, so interrupts must not be disabled.
*/
app_dcookie = get_exec_dcookie_and_offset(spu, &offset, &spu_cookie, objectId);
if (!app_dcookie || !spu_cookie) {
retval = -ENOENT;
goto out;
}
/* Record context info in event buffer */
spin_lock_irqsave(&buffer_lock, flags);
add_event_entry(ESCAPE_CODE);
add_event_entry(SPU_CTX_SWITCH_CODE);
add_event_entry(spu->number);
add_event_entry(spu->pid);
add_event_entry(spu->tgid);
add_event_entry(app_dcookie);
add_event_entry(spu_cookie);
add_event_entry(offset);
spin_unlock_irqrestore(&buffer_lock, flags);
smp_wmb(); /* insure spu event buffer updates are written */
/* don't want entries intermingled... */
out:
return retval;
}
/*
* This function is invoked on either a bind_context or unbind_context.
* If called for an unbind_context, the val arg is 0; otherwise,
* it is the object-id value for the spu context.
* The data arg is of type 'struct spu *'.
*/
static int spu_active_notify(struct notifier_block *self, unsigned long val,
void *data)
{
int retval;
unsigned long flags;
struct spu *the_spu = data;
pr_debug("SPU event notification arrived\n");
if (!val) {
spin_lock_irqsave(&cache_lock, flags);
retval = release_cached_info(the_spu->number);
spin_unlock_irqrestore(&cache_lock, flags);
} else {
retval = process_context_switch(the_spu, val);
}
return retval;
}
static struct notifier_block spu_active = {
.notifier_call = spu_active_notify,
};
static int number_of_online_nodes(void)
{
u32 cpu; u32 tmp;
int nodes = 0;
for_each_online_cpu(cpu) {
tmp = cbe_cpu_to_node(cpu) + 1;
if (tmp > nodes)
nodes++;
}
return nodes;
}
/* The main purpose of this function is to synchronize
* OProfile with SPUFS by registering to be notified of
* SPU task switches.
*
* NOTE: When profiling SPUs, we must ensure that only
* spu_sync_start is invoked and not the generic sync_start
* in drivers/oprofile/oprof.c. A return value of
* SKIP_GENERIC_SYNC or SYNC_START_ERROR will
* accomplish this.
*/
int spu_sync_start(void)
{
int k;
int ret = SKIP_GENERIC_SYNC;
int register_ret;
unsigned long flags = 0;
spu_prof_num_nodes = number_of_online_nodes();
num_spu_nodes = spu_prof_num_nodes * 8;
spin_lock_irqsave(&buffer_lock, flags);
add_event_entry(ESCAPE_CODE);
add_event_entry(SPU_PROFILING_CODE);
add_event_entry(num_spu_nodes);
spin_unlock_irqrestore(&buffer_lock, flags);
/* Register for SPU events */
register_ret = spu_switch_event_register(&spu_active);
if (register_ret) {
ret = SYNC_START_ERROR;
goto out;
}
for (k = 0; k < (MAX_NUMNODES * 8); k++)
last_guard_val[k] = 0;
pr_debug("spu_sync_start -- running.\n");
out:
return ret;
}
/* Record SPU program counter samples to the oprofile event buffer. */
void spu_sync_buffer(int spu_num, unsigned int *samples,
int num_samples)
{
unsigned long long file_offset;
unsigned long flags;
int i;
struct vma_to_fileoffset_map *map;
struct spu *the_spu;
unsigned long long spu_num_ll = spu_num;
unsigned long long spu_num_shifted = spu_num_ll << 32;
struct cached_info *c_info;
/* We need to obtain the cache_lock here because it's
* possible that after getting the cached_info, the SPU job
* corresponding to this cached_info may end, thus resulting
* in the destruction of the cached_info.
*/
spin_lock_irqsave(&cache_lock, flags);
c_info = get_cached_info(NULL, spu_num);
if (!c_info) {
/* This legitimately happens when the SPU task ends before all
* samples are recorded.
* No big deal -- so we just drop a few samples.
*/
pr_debug("SPU_PROF: No cached SPU contex "
"for SPU #%d. Dropping samples.\n", spu_num);
goto out;
}
map = c_info->map;
the_spu = c_info->the_spu;
spin_lock(&buffer_lock);
for (i = 0; i < num_samples; i++) {
unsigned int sample = *(samples+i);
int grd_val = 0;
file_offset = 0;
if (sample == 0)
continue;
file_offset = vma_map_lookup( map, sample, the_spu, &grd_val);
/* If overlays are used by this SPU application, the guard
* value is non-zero, indicating which overlay section is in
* use. We need to discard samples taken during the time
* period which an overlay occurs (i.e., guard value changes).
*/
if (grd_val && grd_val != last_guard_val[spu_num]) {
last_guard_val[spu_num] = grd_val;
/* Drop the rest of the samples. */
break;
}
add_event_entry(file_offset | spu_num_shifted);
}
spin_unlock(&buffer_lock);
out:
spin_unlock_irqrestore(&cache_lock, flags);
}
int spu_sync_stop(void)
{
unsigned long flags = 0;
int ret = spu_switch_event_unregister(&spu_active);
if (ret) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: spu_switch_event_unregister returned %d\n",
__FUNCTION__, __LINE__, ret);
goto out;
}
spin_lock_irqsave(&cache_lock, flags);
ret = release_cached_info(RELEASE_ALL);
spin_unlock_irqrestore(&cache_lock, flags);
out:
pr_debug("spu_sync_stop -- done.\n");
return ret;
}
/*
* Cell Broadband Engine OProfile Support
*
* (C) Copyright IBM Corporation 2006
*
* Author: Maynard Johnson <maynardj@us.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
/* The code in this source file is responsible for generating
* vma-to-fileOffset maps for both overlay and non-overlay SPU
* applications.
*/
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/uaccess.h>
#include <linux/elf.h>
#include "pr_util.h"
void vma_map_free(struct vma_to_fileoffset_map *map)
{
while (map) {
struct vma_to_fileoffset_map *next = map->next;
kfree(map);
map = next;
}
}
unsigned int
vma_map_lookup(struct vma_to_fileoffset_map *map, unsigned int vma,
const struct spu *aSpu, int *grd_val)
{
/*
* Default the offset to the physical address + a flag value.
* Addresses of dynamically generated code can't be found in the vma
* map. For those addresses the flagged value will be sent on to
* the user space tools so they can be reported rather than just
* thrown away.
*/
u32 offset = 0x10000000 + vma;
u32 ovly_grd;
for (; map; map = map->next) {
if (vma < map->vma || vma >= map->vma + map->size)
continue;
if (map->guard_ptr) {
ovly_grd = *(u32 *)(aSpu->local_store + map->guard_ptr);
if (ovly_grd != map->guard_val)
continue;
*grd_val = ovly_grd;
}
offset = vma - map->vma + map->offset;
break;
}
return offset;
}
static struct vma_to_fileoffset_map *
vma_map_add(struct vma_to_fileoffset_map *map, unsigned int vma,
unsigned int size, unsigned int offset, unsigned int guard_ptr,
unsigned int guard_val)
{
struct vma_to_fileoffset_map *new =
kzalloc(sizeof(struct vma_to_fileoffset_map), GFP_KERNEL);
if (!new) {
printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n",
__FUNCTION__, __LINE__);
vma_map_free(map);
return NULL;
}
new->next = map;
new->vma = vma;
new->size = size;
new->offset = offset;
new->guard_ptr = guard_ptr;
new->guard_val = guard_val;
return new;
}
/* Parse SPE ELF header and generate a list of vma_maps.
* A pointer to the first vma_map in the generated list
* of vma_maps is returned. */
struct vma_to_fileoffset_map *create_vma_map(const struct spu *aSpu,
unsigned long spu_elf_start)
{
static const unsigned char expected[EI_PAD] = {
[EI_MAG0] = ELFMAG0,
[EI_MAG1] = ELFMAG1,
[EI_MAG2] = ELFMAG2,
[EI_MAG3] = ELFMAG3,
[EI_CLASS] = ELFCLASS32,
[EI_DATA] = ELFDATA2MSB,
[EI_VERSION] = EV_CURRENT,
[EI_OSABI] = ELFOSABI_NONE
};
int grd_val;
struct vma_to_fileoffset_map *map = NULL;
struct spu_overlay_info ovly;
unsigned int overlay_tbl_offset = -1;
unsigned long phdr_start, shdr_start;
Elf32_Ehdr ehdr;
Elf32_Phdr phdr;
Elf32_Shdr shdr, shdr_str;
Elf32_Sym sym;
int i, j;
char name[32];
unsigned int ovly_table_sym = 0;
unsigned int ovly_buf_table_sym = 0;
unsigned int ovly_table_end_sym = 0;
unsigned int ovly_buf_table_end_sym = 0;
unsigned long ovly_table;
unsigned int n_ovlys;
/* Get and validate ELF header. */
if (copy_from_user(&ehdr, (void *) spu_elf_start, sizeof (ehdr)))
goto fail;
if (memcmp(ehdr.e_ident, expected, EI_PAD) != 0) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: Unexpected e_ident parsing SPU ELF\n",
__FUNCTION__, __LINE__);
goto fail;
}
if (ehdr.e_machine != EM_SPU) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: Unexpected e_machine parsing SPU ELF\n",
__FUNCTION__, __LINE__);
goto fail;
}
if (ehdr.e_type != ET_EXEC) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: Unexpected e_type parsing SPU ELF\n",
__FUNCTION__, __LINE__);
goto fail;
}
phdr_start = spu_elf_start + ehdr.e_phoff;
shdr_start = spu_elf_start + ehdr.e_shoff;
/* Traverse program headers. */
for (i = 0; i < ehdr.e_phnum; i++) {
if (copy_from_user(&phdr,
(void *) (phdr_start + i * sizeof(phdr)),
sizeof(phdr)))
goto fail;
if (phdr.p_type != PT_LOAD)
continue;
if (phdr.p_flags & (1 << 27))
continue;
map = vma_map_add(map, phdr.p_vaddr, phdr.p_memsz,
phdr.p_offset, 0, 0);
if (!map)
goto fail;
}
pr_debug("SPU_PROF: Created non-overlay maps\n");
/* Traverse section table and search for overlay-related symbols. */
for (i = 0; i < ehdr.e_shnum; i++) {
if (copy_from_user(&shdr,
(void *) (shdr_start + i * sizeof(shdr)),
sizeof(shdr)))
goto fail;
if (shdr.sh_type != SHT_SYMTAB)
continue;
if (shdr.sh_entsize != sizeof (sym))
continue;
if (copy_from_user(&shdr_str,
(void *) (shdr_start + shdr.sh_link *
sizeof(shdr)),
sizeof(shdr)))
goto fail;
if (shdr_str.sh_type != SHT_STRTAB)
goto fail;;
for (j = 0; j < shdr.sh_size / sizeof (sym); j++) {
if (copy_from_user(&sym, (void *) (spu_elf_start +
shdr.sh_offset + j *
sizeof (sym)),
sizeof (sym)))
goto fail;
if (copy_from_user(name, (void *)
(spu_elf_start + shdr_str.sh_offset +
sym.st_name),
20))
goto fail;
if (memcmp(name, "_ovly_table", 12) == 0)
ovly_table_sym = sym.st_value;
if (memcmp(name, "_ovly_buf_table", 16) == 0)
ovly_buf_table_sym = sym.st_value;
if (memcmp(name, "_ovly_table_end", 16) == 0)
ovly_table_end_sym = sym.st_value;
if (memcmp(name, "_ovly_buf_table_end", 20) == 0)
ovly_buf_table_end_sym = sym.st_value;
}
}
/* If we don't have overlays, we're done. */
if (ovly_table_sym == 0 || ovly_buf_table_sym == 0
|| ovly_table_end_sym == 0 || ovly_buf_table_end_sym == 0) {
pr_debug("SPU_PROF: No overlay table found\n");
goto out;
} else {
pr_debug("SPU_PROF: Overlay table found\n");
}
/* The _ovly_table symbol represents a table with one entry
* per overlay section. The _ovly_buf_table symbol represents
* a table with one entry per overlay region.
* The struct spu_overlay_info gives the structure of the _ovly_table
* entries. The structure of _ovly_table_buf is simply one
* u32 word per entry.
*/
overlay_tbl_offset = vma_map_lookup(map, ovly_table_sym,
aSpu, &grd_val);
if (overlay_tbl_offset < 0) {
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: Error finding SPU overlay table\n",
__FUNCTION__, __LINE__);
goto fail;
}
ovly_table = spu_elf_start + overlay_tbl_offset;
n_ovlys = (ovly_table_end_sym -
ovly_table_sym) / sizeof (ovly);
/* Traverse overlay table. */
for (i = 0; i < n_ovlys; i++) {
if (copy_from_user(&ovly, (void *)
(ovly_table + i * sizeof (ovly)),
sizeof (ovly)))
goto fail;
/* The ovly.vma/size/offset arguments are analogous to the same
* arguments used above for non-overlay maps. The final two
* args are referred to as the guard pointer and the guard
* value.
* The guard pointer is an entry in the _ovly_buf_table,
* computed using ovly.buf as the index into the table. Since
* ovly.buf values begin at '1' to reference the first (or 0th)
* entry in the _ovly_buf_table, the computation subtracts 1
* from ovly.buf.
* The guard value is stored in the _ovly_buf_table entry and
* is an index (starting at 1) back to the _ovly_table entry
* that is pointing at this _ovly_buf_table entry. So, for
* example, for an overlay scenario with one overlay segment
* and two overlay sections:
* - Section 1 points to the first entry of the
* _ovly_buf_table, which contains a guard value
* of '1', referencing the first (index=0) entry of
* _ovly_table.
* - Section 2 points to the second entry of the
* _ovly_buf_table, which contains a guard value
* of '2', referencing the second (index=1) entry of
* _ovly_table.
*/
map = vma_map_add(map, ovly.vma, ovly.size, ovly.offset,
ovly_buf_table_sym + (ovly.buf-1) * 4, i+1);
if (!map)
goto fail;
}
goto out;
fail:
map = NULL;
out:
return map;
}
......@@ -29,6 +29,8 @@ static struct op_powerpc_model *model;
static struct op_counter_config ctr[OP_MAX_COUNTER];
static struct op_system_config sys;
static int op_per_cpu_rc;
static void op_handle_interrupt(struct pt_regs *regs)
{
model->handle_interrupt(regs, ctr);
......@@ -36,25 +38,41 @@ static void op_handle_interrupt(struct pt_regs *regs)
static void op_powerpc_cpu_setup(void *dummy)
{
model->cpu_setup(ctr);
int ret;
ret = model->cpu_setup(ctr);
if (ret != 0)
op_per_cpu_rc = ret;
}
static int op_powerpc_setup(void)
{
int err;
op_per_cpu_rc = 0;
/* Grab the hardware */
err = reserve_pmc_hardware(op_handle_interrupt);
if (err)
return err;
/* Pre-compute the values to stuff in the hardware registers. */
model->reg_setup(ctr, &sys, model->num_counters);
op_per_cpu_rc = model->reg_setup(ctr, &sys, model->num_counters);
/* Configure the registers on all cpus. */
if (op_per_cpu_rc)
goto out;
/* Configure the registers on all cpus. If an error occurs on one
* of the cpus, op_per_cpu_rc will be set to the error */
on_each_cpu(op_powerpc_cpu_setup, NULL, 0, 1);
return 0;
out: if (op_per_cpu_rc) {
/* error on setup release the performance counter hardware */
release_pmc_hardware();
}
return op_per_cpu_rc;
}
static void op_powerpc_shutdown(void)
......@@ -64,16 +82,29 @@ static void op_powerpc_shutdown(void)
static void op_powerpc_cpu_start(void *dummy)
{
model->start(ctr);
/* If any of the cpus have return an error, set the
* global flag to the error so it can be returned
* to the generic OProfile caller.
*/
int ret;
ret = model->start(ctr);
if (ret != 0)
op_per_cpu_rc = ret;
}
static int op_powerpc_start(void)
{
op_per_cpu_rc = 0;
if (model->global_start)
model->global_start(ctr);
if (model->start)
return model->global_start(ctr);
if (model->start) {
on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1);
return 0;
return op_per_cpu_rc;
}
return -EIO; /* No start function is defined for this
power architecture */
}
static inline void op_powerpc_cpu_stop(void *dummy)
......@@ -147,11 +178,13 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
switch (cur_cpu_spec->oprofile_type) {
#ifdef CONFIG_PPC64
#ifdef CONFIG_PPC_CELL_NATIVE
#ifdef CONFIG_OPROFILE_CELL
case PPC_OPROFILE_CELL:
if (firmware_has_feature(FW_FEATURE_LPAR))
return -ENODEV;
model = &op_model_cell;
ops->sync_start = model->sync_start;
ops->sync_stop = model->sync_stop;
break;
#endif
case PPC_OPROFILE_RS64:
......
......@@ -81,7 +81,7 @@ static void pmc_stop_ctrs(void)
/* Configures the counters on this CPU based on the global
* settings */
static void fsl7450_cpu_setup(struct op_counter_config *ctr)
static int fsl7450_cpu_setup(struct op_counter_config *ctr)
{
/* freeze all counters */
pmc_stop_ctrs();
......@@ -89,12 +89,14 @@ static void fsl7450_cpu_setup(struct op_counter_config *ctr)
mtspr(SPRN_MMCR0, mmcr0_val);
mtspr(SPRN_MMCR1, mmcr1_val);
mtspr(SPRN_MMCR2, mmcr2_val);
return 0;
}
#define NUM_CTRS 6
/* Configures the global settings for the countes on all CPUs. */
static void fsl7450_reg_setup(struct op_counter_config *ctr,
static int fsl7450_reg_setup(struct op_counter_config *ctr,
struct op_system_config *sys,
int num_ctrs)
{
......@@ -126,10 +128,12 @@ static void fsl7450_reg_setup(struct op_counter_config *ctr,
| mmcr1_event6(ctr[5].event);
mmcr2_val = 0;
return 0;
}
/* Sets the counters on this CPU to the chosen values, and starts them */
static void fsl7450_start(struct op_counter_config *ctr)
static int fsl7450_start(struct op_counter_config *ctr)
{
int i;
......@@ -148,6 +152,8 @@ static void fsl7450_start(struct op_counter_config *ctr)
pmc_start_ctrs();
oprofile_running = 1;
return 0;
}
/* Stop the counters on this CPU */
......@@ -193,7 +199,7 @@ static void fsl7450_handle_interrupt(struct pt_regs *regs,
/* The freeze bit was set by the interrupt. */
/* Clear the freeze bit, and reenable the interrupt.
* The counters won't actually start until the rfi clears
* the PMM bit */
* the PM/M bit */
pmc_start_ctrs();
}
......
......@@ -38,6 +38,19 @@
#include "../platforms/cell/interrupt.h"
#include "../platforms/cell/cbe_regs.h"
#include "cell/pr_util.h"
static void cell_global_stop_spu(void);
/*
* spu_cycle_reset is the number of cycles between samples.
* This variable is used for SPU profiling and should ONLY be set
* at the beginning of cell_reg_setup; otherwise, it's read-only.
*/
static unsigned int spu_cycle_reset;
#define NUM_SPUS_PER_NODE 8
#define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */
#define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */
#define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying
......@@ -51,6 +64,7 @@
#define NUM_TRACE_BUS_WORDS 4
#define NUM_INPUT_BUS_WORDS 2
#define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */
struct pmc_cntrl_data {
unsigned long vcntr;
......@@ -62,10 +76,9 @@ struct pmc_cntrl_data {
/*
* ibm,cbe-perftools rtas parameters
*/
struct pm_signal {
u16 cpu; /* Processor to modify */
u16 sub_unit; /* hw subunit this applies to (if applicable) */
u16 sub_unit; /* hw subunit this applies to (if applicable)*/
short int signal_group; /* Signal Group to Enable/Disable */
u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event
* Bus Word(s) (bitmask)
......@@ -112,21 +125,42 @@ static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values);
static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];
/* Interpetation of hdw_thread:
/*
* The CELL profiling code makes rtas calls to setup the debug bus to
* route the performance signals. Additionally, SPU profiling requires
* a second rtas call to setup the hardware to capture the SPU PCs.
* The EIO error value is returned if the token lookups or the rtas
* call fail. The EIO error number is the best choice of the existing
* error numbers. The probability of rtas related error is very low. But
* by returning EIO and printing additional information to dmsg the user
* will know that OProfile did not start and dmesg will tell them why.
* OProfile does not support returning errors on Stop. Not a huge issue
* since failure to reset the debug bus or stop the SPU PC collection is
* not a fatel issue. Chances are if the Stop failed, Start doesn't work
* either.
*/
/*
* Interpetation of hdw_thread:
* 0 - even virtual cpus 0, 2, 4,...
* 1 - odd virtual cpus 1, 3, 5, ...
*
* FIXME: this is strictly wrong, we need to clean this up in a number
* of places. It works for now. -arnd
*/
static u32 hdw_thread;
static u32 virt_cntr_inter_mask;
static struct timer_list timer_virt_cntr;
/* pm_signal needs to be global since it is initialized in
/*
* pm_signal needs to be global since it is initialized in
* cell_reg_setup at the time when the necessary information
* is available.
*/
static struct pm_signal pm_signal[NR_PHYS_CTRS];
static int pm_rtas_token;
static int pm_rtas_token; /* token for debug bus setup call */
static int spu_rtas_token; /* token for SPU cycle profiling */
static u32 reset_value[NR_PHYS_CTRS];
static int num_counters;
......@@ -147,8 +181,8 @@ rtas_ibm_cbe_perftools(int subfunc, int passthru,
{
u64 paddr = __pa(address);
return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, passthru,
paddr >> 32, paddr & 0xffffffff, length);
return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc,
passthru, paddr >> 32, paddr & 0xffffffff, length);
}
static void pm_rtas_reset_signals(u32 node)
......@@ -156,7 +190,8 @@ static void pm_rtas_reset_signals(u32 node)
int ret;
struct pm_signal pm_signal_local;
/* The debug bus is being set to the passthru disable state.
/*
* The debug bus is being set to the passthru disable state.
* However, the FW still expects atleast one legal signal routing
* entry or it will return an error on the arguments. If we don't
* supply a valid entry, we must ignore all return values. Ignoring
......@@ -175,18 +210,24 @@ static void pm_rtas_reset_signals(u32 node)
&pm_signal_local,
sizeof(struct pm_signal));
if (ret)
if (unlikely(ret))
/*
* Not a fatal error. For Oprofile stop, the oprofile
* functions do not support returning an error for
* failure to stop OProfile.
*/
printk(KERN_WARNING "%s: rtas returned: %d\n",
__FUNCTION__, ret);
}
static void pm_rtas_activate_signals(u32 node, u32 count)
static int pm_rtas_activate_signals(u32 node, u32 count)
{
int ret;
int i, j;
struct pm_signal pm_signal_local[NR_PHYS_CTRS];
/* There is no debug setup required for the cycles event.
/*
* There is no debug setup required for the cycles event.
* Note that only events in the same group can be used.
* Otherwise, there will be conflicts in correctly routing
* the signals on the debug bus. It is the responsiblity
......@@ -213,10 +254,14 @@ static void pm_rtas_activate_signals(u32 node, u32 count)
pm_signal_local,
i * sizeof(struct pm_signal));
if (ret)
if (unlikely(ret)) {
printk(KERN_WARNING "%s: rtas returned: %d\n",
__FUNCTION__, ret);
return -EIO;
}
}
return 0;
}
/*
......@@ -260,7 +305,8 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask)
pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity);
pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control);
/* Some of the islands signal selection is based on 64 bit words.
/*
* Some of the islands signal selection is based on 64 bit words.
* The debug bus words are 32 bits, the input words to the performance
* counters are defined as 32 bits. Need to convert the 64 bit island
* specification to the appropriate 32 input bit and bus word for the
......@@ -298,6 +344,7 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask)
input_bus[j] = i;
pm_regs.group_control |=
(i << (31 - i));
break;
}
}
......@@ -309,7 +356,8 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask)
static void write_pm_cntrl(int cpu)
{
/* Oprofile will use 32 bit counters, set bits 7:10 to 0
/*
* Oprofile will use 32 bit counters, set bits 7:10 to 0
* pmregs.pm_cntrl is a global
*/
......@@ -326,7 +374,8 @@ static void write_pm_cntrl(int cpu)
if (pm_regs.pm_cntrl.freeze == 1)
val |= CBE_PM_FREEZE_ALL_CTRS;
/* Routine set_count_mode must be called previously to set
/*
* Routine set_count_mode must be called previously to set
* the count mode based on the user selection of user and kernel.
*/
val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode);
......@@ -336,7 +385,8 @@ static void write_pm_cntrl(int cpu)
static inline void
set_count_mode(u32 kernel, u32 user)
{
/* The user must specify user and kernel if they want them. If
/*
* The user must specify user and kernel if they want them. If
* neither is specified, OProfile will count in hypervisor mode.
* pm_regs.pm_cntrl is a global
*/
......@@ -377,19 +427,19 @@ static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl)
* pair of per-cpu arrays is used for storing the previous and next
* pmc values for a given node.
* NOTE: We use the per-cpu variable to improve cache performance.
*
* This routine will alternate loading the virtual counters for
* virtual CPUs
*/
static void cell_virtual_cntr(unsigned long data)
{
/* This routine will alternate loading the virtual counters for
* virtual CPUs
*/
int i, prev_hdw_thread, next_hdw_thread;
u32 cpu;
unsigned long flags;
/* Make sure that the interrupt_hander and
* the virt counter are not both playing with
* the counters on the same node.
/*
* Make sure that the interrupt_hander and the virt counter are
* not both playing with the counters on the same node.
*/
spin_lock_irqsave(&virt_cntr_lock, flags);
......@@ -400,22 +450,25 @@ static void cell_virtual_cntr(unsigned long data)
hdw_thread = 1 ^ hdw_thread;
next_hdw_thread = hdw_thread;
for (i = 0; i < num_counters; i++)
/* There are some per thread events. Must do the
/*
* There are some per thread events. Must do the
* set event, for the thread that is being started
*/
for (i = 0; i < num_counters; i++)
set_pm_event(i,
pmc_cntrl[next_hdw_thread][i].evnts,
pmc_cntrl[next_hdw_thread][i].masks);
/* The following is done only once per each node, but
/*
* The following is done only once per each node, but
* we need cpu #, not node #, to pass to the cbe_xxx functions.
*/
for_each_online_cpu(cpu) {
if (cbe_get_hw_thread_id(cpu))
continue;
/* stop counters, save counter values, restore counts
/*
* stop counters, save counter values, restore counts
* for previous thread
*/
cbe_disable_pm(cpu);
......@@ -444,13 +497,15 @@ static void cell_virtual_cntr(unsigned long data)
next_hdw_thread)[i]);
}
/* Switch to the other thread. Change the interrupt
/*
* Switch to the other thread. Change the interrupt
* and control regs to be scheduled on the CPU
* corresponding to the thread to execute.
*/
for (i = 0; i < num_counters; i++) {
if (pmc_cntrl[next_hdw_thread][i].enabled) {
/* There are some per thread events.
/*
* There are some per thread events.
* Must do the set event, enable_cntr
* for each cpu.
*/
......@@ -482,17 +537,42 @@ static void start_virt_cntrs(void)
}
/* This function is called once for all cpus combined */
static void
cell_reg_setup(struct op_counter_config *ctr,
static int cell_reg_setup(struct op_counter_config *ctr,
struct op_system_config *sys, int num_ctrs)
{
int i, j, cpu;
spu_cycle_reset = 0;
if (ctr[0].event == SPU_CYCLES_EVENT_NUM) {
spu_cycle_reset = ctr[0].count;
/*
* Each node will need to make the rtas call to start
* and stop SPU profiling. Get the token once and store it.
*/
spu_rtas_token = rtas_token("ibm,cbe-spu-perftools");
if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) {
printk(KERN_ERR
"%s: rtas token ibm,cbe-spu-perftools unknown\n",
__FUNCTION__);
return -EIO;
}
}
pm_rtas_token = rtas_token("ibm,cbe-perftools");
if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) {
printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n",
/*
* For all events excetp PPU CYCLEs, each node will need to make
* the rtas cbe-perftools call to setup and reset the debug bus.
* Make the token lookup call once and store it in the global
* variable pm_rtas_token.
*/
if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
printk(KERN_ERR
"%s: rtas token ibm,cbe-perftools unknown\n",
__FUNCTION__);
goto out;
return -EIO;
}
num_counters = num_ctrs;
......@@ -520,7 +600,8 @@ cell_reg_setup(struct op_counter_config *ctr,
per_cpu(pmc_values, j)[i] = 0;
}
/* Setup the thread 1 events, map the thread 0 event to the
/*
* Setup the thread 1 events, map the thread 0 event to the
* equivalent thread 1 event.
*/
for (i = 0; i < num_ctrs; ++i) {
......@@ -544,7 +625,8 @@ cell_reg_setup(struct op_counter_config *ctr,
for (i = 0; i < NUM_INPUT_BUS_WORDS; i++)
input_bus[i] = 0xff;
/* Our counters count up, and "count" refers to
/*
* Our counters count up, and "count" refers to
* how much before the next interrupt, and we interrupt
* on overflow. So we calculate the starting value
* which will give us "count" until overflow.
......@@ -569,28 +651,27 @@ cell_reg_setup(struct op_counter_config *ctr,
for (i = 0; i < num_counters; ++i) {
per_cpu(pmc_values, cpu)[i] = reset_value[i];
}
out:
;
return 0;
}
/* This function is called once for each cpu */
static void cell_cpu_setup(struct op_counter_config *cntr)
static int cell_cpu_setup(struct op_counter_config *cntr)
{
u32 cpu = smp_processor_id();
u32 num_enabled = 0;
int i;
if (spu_cycle_reset)
return 0;
/* There is one performance monitor per processor chip (i.e. node),
* so we only need to perform this function once per node.
*/
if (cbe_get_hw_thread_id(cpu))
goto out;
if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) {
printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n",
__FUNCTION__);
goto out;
}
return 0;
/* Stop all counters */
cbe_disable_pm(cpu);
......@@ -609,16 +690,286 @@ static void cell_cpu_setup(struct op_counter_config *cntr)
}
}
pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled);
/*
* The pm_rtas_activate_signals will return -EIO if the FW
* call failed.
*/
return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled);
}
#define ENTRIES 303
#define MAXLFSR 0xFFFFFF
/* precomputed table of 24 bit LFSR values */
static int initial_lfsr[] = {
8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 10753424,
15507573, 7458917, 285419, 2641121, 9780088, 3915503, 6668768, 1548716,
4885000, 8774424, 9650099, 2044357, 2304411, 9326253, 10332526, 4421547,
3440748, 10179459, 13332843, 10375561, 1313462, 8375100, 5198480, 6071392,
9341783, 1526887, 3985002, 1439429, 13923762, 7010104, 11969769, 4547026,
2040072, 4025602, 3437678, 7939992, 11444177, 4496094, 9803157, 10745556,
3671780, 4257846, 5662259, 13196905, 3237343, 12077182, 16222879, 7587769,
14706824, 2184640, 12591135, 10420257, 7406075, 3648978, 11042541, 15906893,
11914928, 4732944, 10695697, 12928164, 11980531, 4430912, 11939291, 2917017,
6119256, 4172004, 9373765, 8410071, 14788383, 5047459, 5474428, 1737756,
15967514, 13351758, 6691285, 8034329, 2856544, 14394753, 11310160, 12149558,
7487528, 7542781, 15668898, 12525138, 12790975, 3707933, 9106617, 1965401,
16219109, 12801644, 2443203, 4909502, 8762329, 3120803, 6360315, 9309720,
15164599, 10844842, 4456529, 6667610, 14924259, 884312, 6234963, 3326042,
15973422, 13919464, 5272099, 6414643, 3909029, 2764324, 5237926, 4774955,
10445906, 4955302, 5203726, 10798229, 11443419, 2303395, 333836, 9646934,
3464726, 4159182, 568492, 995747, 10318756, 13299332, 4836017, 8237783,
3878992, 2581665, 11394667, 5672745, 14412947, 3159169, 9094251, 16467278,
8671392, 15230076, 4843545, 7009238, 15504095, 1494895, 9627886, 14485051,
8304291, 252817, 12421642, 16085736, 4774072, 2456177, 4160695, 15409741,
4902868, 5793091, 13162925, 16039714, 782255, 11347835, 14884586, 366972,
16308990, 11913488, 13390465, 2958444, 10340278, 1177858, 1319431, 10426302,
2868597, 126119, 5784857, 5245324, 10903900, 16436004, 3389013, 1742384,
14674502, 10279218, 8536112, 10364279, 6877778, 14051163, 1025130, 6072469,
1988305, 8354440, 8216060, 16342977, 13112639, 3976679, 5913576, 8816697,
6879995, 14043764, 3339515, 9364420, 15808858, 12261651, 2141560, 5636398,
10345425, 10414756, 781725, 6155650, 4746914, 5078683, 7469001, 6799140,
10156444, 9667150, 10116470, 4133858, 2121972, 1124204, 1003577, 1611214,
14304602, 16221850, 13878465, 13577744, 3629235, 8772583, 10881308, 2410386,
7300044, 5378855, 9301235, 12755149, 4977682, 8083074, 10327581, 6395087,
9155434, 15501696, 7514362, 14520507, 15808945, 3244584, 4741962, 9658130,
14336147, 8654727, 7969093, 15759799, 14029445, 5038459, 9894848, 8659300,
13699287, 8834306, 10712885, 14753895, 10410465, 3373251, 309501, 9561475,
5526688, 14647426, 14209836, 5339224, 207299, 14069911, 8722990, 2290950,
3258216, 12505185, 6007317, 9218111, 14661019, 10537428, 11731949, 9027003,
6641507, 9490160, 200241, 9720425, 16277895, 10816638, 1554761, 10431375,
7467528, 6790302, 3429078, 14633753, 14428997, 11463204, 3576212, 2003426,
6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607
};
/*
* The hardware uses an LFSR counting sequence to determine when to capture
* the SPU PCs. An LFSR sequence is like a puesdo random number sequence
* where each number occurs once in the sequence but the sequence is not in
* numerical order. The SPU PC capture is done when the LFSR sequence reaches
* the last value in the sequence. Hence the user specified value N
* corresponds to the LFSR number that is N from the end of the sequence.
*
* To avoid the time to compute the LFSR, a lookup table is used. The 24 bit
* LFSR sequence is broken into four ranges. The spacing of the precomputed
* values is adjusted in each range so the error between the user specifed
* number (N) of events between samples and the actual number of events based
* on the precomputed value will be les then about 6.2%. Note, if the user
* specifies N < 2^16, the LFSR value that is 2^16 from the end will be used.
* This is to prevent the loss of samples because the trace buffer is full.
*
* User specified N Step between Index in
* precomputed values precomputed
* table
* 0 to 2^16-1 ---- 0
* 2^16 to 2^16+2^19-1 2^12 1 to 128
* 2^16+2^19 to 2^16+2^19+2^22-1 2^15 129 to 256
* 2^16+2^19+2^22 to 2^24-1 2^18 257 to 302
*
*
* For example, the LFSR values in the second range are computed for 2^16,
* 2^16+2^12, ... , 2^19-2^16, 2^19 and stored in the table at indicies
* 1, 2,..., 127, 128.
*
* The 24 bit LFSR value for the nth number in the sequence can be
* calculated using the following code:
*
* #define size 24
* int calculate_lfsr(int n)
* {
* int i;
* unsigned int newlfsr0;
* unsigned int lfsr = 0xFFFFFF;
* unsigned int howmany = n;
*
* for (i = 2; i < howmany + 2; i++) {
* newlfsr0 = (((lfsr >> (size - 1 - 0)) & 1) ^
* ((lfsr >> (size - 1 - 1)) & 1) ^
* (((lfsr >> (size - 1 - 6)) & 1) ^
* ((lfsr >> (size - 1 - 23)) & 1)));
*
* lfsr >>= 1;
* lfsr = lfsr | (newlfsr0 << (size - 1));
* }
* return lfsr;
* }
*/
#define V2_16 (0x1 << 16)
#define V2_19 (0x1 << 19)
#define V2_22 (0x1 << 22)
static int calculate_lfsr(int n)
{
/*
* The ranges and steps are in powers of 2 so the calculations
* can be done using shifts rather then divide.
*/
int index;
if ((n >> 16) == 0)
index = 0;
else if (((n - V2_16) >> 19) == 0)
index = ((n - V2_16) >> 12) + 1;
else if (((n - V2_16 - V2_19) >> 22) == 0)
index = ((n - V2_16 - V2_19) >> 15 ) + 1 + 128;
else if (((n - V2_16 - V2_19 - V2_22) >> 24) == 0)
index = ((n - V2_16 - V2_19 - V2_22) >> 18 ) + 1 + 256;
else
index = ENTRIES-1;
/* make sure index is valid */
if ((index > ENTRIES) || (index < 0))
index = ENTRIES-1;
return initial_lfsr[index];
}
static int pm_rtas_activate_spu_profiling(u32 node)
{
int ret, i;
struct pm_signal pm_signal_local[NR_PHYS_CTRS];
/*
* Set up the rtas call to configure the debug bus to
* route the SPU PCs. Setup the pm_signal for each SPU
*/
for (i = 0; i < NUM_SPUS_PER_NODE; i++) {
pm_signal_local[i].cpu = node;
pm_signal_local[i].signal_group = 41;
/* spu i on word (i/2) */
pm_signal_local[i].bus_word = 1 << i / 2;
/* spu i */
pm_signal_local[i].sub_unit = i;
pm_signal_local[i].bit = 63;
}
ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE,
PASSTHRU_ENABLE, pm_signal_local,
(NUM_SPUS_PER_NODE
* sizeof(struct pm_signal)));
if (unlikely(ret)) {
printk(KERN_WARNING "%s: rtas returned: %d\n",
__FUNCTION__, ret);
return -EIO;
}
return 0;
}
#ifdef CONFIG_CPU_FREQ
static int
oprof_cpufreq_notify(struct notifier_block *nb, unsigned long val, void *data)
{
int ret = 0;
struct cpufreq_freqs *frq = data;
if ((val == CPUFREQ_PRECHANGE && frq->old < frq->new) ||
(val == CPUFREQ_POSTCHANGE && frq->old > frq->new) ||
(val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE))
set_spu_profiling_frequency(frq->new, spu_cycle_reset);
return ret;
}
static struct notifier_block cpu_freq_notifier_block = {
.notifier_call = oprof_cpufreq_notify
};
#endif
static int cell_global_start_spu(struct op_counter_config *ctr)
{
int subfunc;
unsigned int lfsr_value;
int cpu;
int ret;
int rtas_error;
unsigned int cpu_khzfreq = 0;
/* The SPU profiling uses time-based profiling based on
* cpu frequency, so if configured with the CPU_FREQ
* option, we should detect frequency changes and react
* accordingly.
*/
#ifdef CONFIG_CPU_FREQ
ret = cpufreq_register_notifier(&cpu_freq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
if (ret < 0)
/* this is not a fatal error */
printk(KERN_ERR "CPU freq change registration failed: %d\n",
ret);
else
cpu_khzfreq = cpufreq_quick_get(smp_processor_id());
#endif
set_spu_profiling_frequency(cpu_khzfreq, spu_cycle_reset);
for_each_online_cpu(cpu) {
if (cbe_get_hw_thread_id(cpu))
continue;
/*
* Setup SPU cycle-based profiling.
* Set perf_mon_control bit 0 to a zero before
* enabling spu collection hardware.
*/
cbe_write_pm(cpu, pm_control, 0);
if (spu_cycle_reset > MAX_SPU_COUNT)
/* use largest possible value */
lfsr_value = calculate_lfsr(MAX_SPU_COUNT-1);
else
lfsr_value = calculate_lfsr(spu_cycle_reset);
/* must use a non zero value. Zero disables data collection. */
if (lfsr_value == 0)
lfsr_value = calculate_lfsr(1);
lfsr_value = lfsr_value << 8; /* shift lfsr to correct
* register location
*/
/* debug bus setup */
ret = pm_rtas_activate_spu_profiling(cbe_cpu_to_node(cpu));
if (unlikely(ret)) {
rtas_error = ret;
goto out;
}
subfunc = 2; /* 2 - activate SPU tracing, 3 - deactivate */
/* start profiling */
ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc,
cbe_cpu_to_node(cpu), lfsr_value);
if (unlikely(ret != 0)) {
printk(KERN_ERR
"%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n",
__FUNCTION__, ret);
rtas_error = -EIO;
goto out;
}
}
rtas_error = start_spu_profiling(spu_cycle_reset);
if (rtas_error)
goto out_stop;
oprofile_running = 1;
return 0;
out_stop:
cell_global_stop_spu(); /* clean up the PMU/debug bus */
out:
;
return rtas_error;
}
static void cell_global_start(struct op_counter_config *ctr)
static int cell_global_start_ppu(struct op_counter_config *ctr)
{
u32 cpu;
u32 cpu, i;
u32 interrupt_mask = 0;
u32 i;
/* This routine gets called once for the system.
* There is one performance monitor per node, so we
......@@ -651,19 +1002,79 @@ static void cell_global_start(struct op_counter_config *ctr)
oprofile_running = 1;
smp_wmb();
/* NOTE: start_virt_cntrs will result in cell_virtual_cntr() being
/*
* NOTE: start_virt_cntrs will result in cell_virtual_cntr() being
* executed which manipulates the PMU. We start the "virtual counter"
* here so that we do not need to synchronize access to the PMU in
* the above for-loop.
*/
start_virt_cntrs();
return 0;
}
static void cell_global_stop(void)
static int cell_global_start(struct op_counter_config *ctr)
{
if (spu_cycle_reset)
return cell_global_start_spu(ctr);
else
return cell_global_start_ppu(ctr);
}
/*
* Note the generic OProfile stop calls do not support returning
* an error on stop. Hence, will not return an error if the FW
* calls fail on stop. Failure to reset the debug bus is not an issue.
* Failure to disable the SPU profiling is not an issue. The FW calls
* to enable the performance counters and debug bus will work even if
* the hardware was not cleanly reset.
*/
static void cell_global_stop_spu(void)
{
int subfunc, rtn_value;
unsigned int lfsr_value;
int cpu;
oprofile_running = 0;
#ifdef CONFIG_CPU_FREQ
cpufreq_unregister_notifier(&cpu_freq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
#endif
for_each_online_cpu(cpu) {
if (cbe_get_hw_thread_id(cpu))
continue;
subfunc = 3; /*
* 2 - activate SPU tracing,
* 3 - deactivate
*/
lfsr_value = 0x8f100000;
rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL,
subfunc, cbe_cpu_to_node(cpu),
lfsr_value);
if (unlikely(rtn_value != 0)) {
printk(KERN_ERR
"%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n",
__FUNCTION__, rtn_value);
}
/* Deactivate the signals */
pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
}
stop_spu_profiling();
}
static void cell_global_stop_ppu(void)
{
int cpu;
/* This routine will be called once for the system.
/*
* This routine will be called once for the system.
* There is one performance monitor per node, so we
* only need to perform this function once per node.
*/
......@@ -687,8 +1098,16 @@ static void cell_global_stop(void)
}
}
static void
cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
static void cell_global_stop(void)
{
if (spu_cycle_reset)
cell_global_stop_spu();
else
cell_global_stop_ppu();
}
static void cell_handle_interrupt(struct pt_regs *regs,
struct op_counter_config *ctr)
{
u32 cpu;
u64 pc;
......@@ -699,13 +1118,15 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
cpu = smp_processor_id();
/* Need to make sure the interrupt handler and the virt counter
/*
* Need to make sure the interrupt handler and the virt counter
* routine are not running at the same time. See the
* cell_virtual_cntr() routine for additional comments.
*/
spin_lock_irqsave(&virt_cntr_lock, flags);
/* Need to disable and reenable the performance counters
/*
* Need to disable and reenable the performance counters
* to get the desired behavior from the hardware. This
* is hardware specific.
*/
......@@ -714,7 +1135,8 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu);
/* If the interrupt mask has been cleared, then the virt cntr
/*
* If the interrupt mask has been cleared, then the virt cntr
* has cleared the interrupt. When the thread that generated
* the interrupt is restored, the data count will be restored to
* 0xffffff0 to cause the interrupt to be regenerated.
......@@ -732,7 +1154,8 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
}
}
/* The counters were frozen by the interrupt.
/*
* The counters were frozen by the interrupt.
* Reenable the interrupt and restart the counters.
* If there was a race between the interrupt handler and
* the virtual counter routine. The virutal counter
......@@ -742,7 +1165,8 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
cbe_enable_pm_interrupts(cpu, hdw_thread,
virt_cntr_inter_mask);
/* The writes to the various performance counters only writes
/*
* The writes to the various performance counters only writes
* to a latch. The new values (interrupt setting bits, reset
* counter value etc.) are not copied to the actual registers
* until the performance monitor is enabled. In order to get
......@@ -755,10 +1179,33 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
spin_unlock_irqrestore(&virt_cntr_lock, flags);
}
/*
* This function is called from the generic OProfile
* driver. When profiling PPUs, we need to do the
* generic sync start; otherwise, do spu_sync_start.
*/
static int cell_sync_start(void)
{
if (spu_cycle_reset)
return spu_sync_start();
else
return DO_GENERIC_SYNC;
}
static int cell_sync_stop(void)
{
if (spu_cycle_reset)
return spu_sync_stop();
else
return 1;
}
struct op_powerpc_model op_model_cell = {
.reg_setup = cell_reg_setup,
.cpu_setup = cell_cpu_setup,
.global_start = cell_global_start,
.global_stop = cell_global_stop,
.sync_start = cell_sync_start,
.sync_stop = cell_sync_stop,
.handle_interrupt = cell_handle_interrupt,
};
......@@ -244,7 +244,7 @@ static void dump_pmcs(void)
mfpmr(PMRN_PMLCA3), mfpmr(PMRN_PMLCB3));
}
static void fsl_booke_cpu_setup(struct op_counter_config *ctr)
static int fsl_booke_cpu_setup(struct op_counter_config *ctr)
{
int i;
......@@ -258,9 +258,11 @@ static void fsl_booke_cpu_setup(struct op_counter_config *ctr)
set_pmc_user_kernel(i, ctr[i].user, ctr[i].kernel);
}
return 0;
}
static void fsl_booke_reg_setup(struct op_counter_config *ctr,
static int fsl_booke_reg_setup(struct op_counter_config *ctr,
struct op_system_config *sys,
int num_ctrs)
{
......@@ -276,9 +278,10 @@ static void fsl_booke_reg_setup(struct op_counter_config *ctr,
for (i = 0; i < num_counters; ++i)
reset_value[i] = 0x80000000UL - ctr[i].count;
return 0;
}
static void fsl_booke_start(struct op_counter_config *ctr)
static int fsl_booke_start(struct op_counter_config *ctr)
{
int i;
......@@ -308,6 +311,8 @@ static void fsl_booke_start(struct op_counter_config *ctr)
pr_debug("start on cpu %d, pmgc0 %x\n", smp_processor_id(),
mfpmr(PMRN_PMGC0));
return 0;
}
static void fsl_booke_stop(void)
......
......@@ -89,7 +89,7 @@ static inline void ctr_write(unsigned int i, u64 val)
/* precompute the values to stuff in the hardware registers */
static void pa6t_reg_setup(struct op_counter_config *ctr,
static int pa6t_reg_setup(struct op_counter_config *ctr,
struct op_system_config *sys,
int num_ctrs)
{
......@@ -135,10 +135,12 @@ static void pa6t_reg_setup(struct op_counter_config *ctr,
pr_debug("reset_value for pmc%u inited to 0x%lx\n",
pmc, reset_value[pmc]);
}
return 0;
}
/* configure registers on this cpu */
static void pa6t_cpu_setup(struct op_counter_config *ctr)
static int pa6t_cpu_setup(struct op_counter_config *ctr)
{
u64 mmcr0 = mmcr0_val;
u64 mmcr1 = mmcr1_val;
......@@ -154,9 +156,11 @@ static void pa6t_cpu_setup(struct op_counter_config *ctr)
mfspr(SPRN_PA6T_MMCR0));
pr_debug("setup on cpu %d, mmcr1 %016lx\n", smp_processor_id(),
mfspr(SPRN_PA6T_MMCR1));
return 0;
}
static void pa6t_start(struct op_counter_config *ctr)
static int pa6t_start(struct op_counter_config *ctr)
{
int i;
......@@ -174,6 +178,8 @@ static void pa6t_start(struct op_counter_config *ctr)
oprofile_running = 1;
pr_debug("start on cpu %d, mmcr0 %lx\n", smp_processor_id(), mmcr0);
return 0;
}
static void pa6t_stop(void)
......
......@@ -32,7 +32,7 @@ static u32 mmcr0_val;
static u64 mmcr1_val;
static u64 mmcra_val;
static void power4_reg_setup(struct op_counter_config *ctr,
static int power4_reg_setup(struct op_counter_config *ctr,
struct op_system_config *sys,
int num_ctrs)
{
......@@ -60,6 +60,8 @@ static void power4_reg_setup(struct op_counter_config *ctr,
mmcr0_val &= ~MMCR0_PROBLEM_DISABLE;
else
mmcr0_val |= MMCR0_PROBLEM_DISABLE;
return 0;
}
extern void ppc64_enable_pmcs(void);
......@@ -84,7 +86,7 @@ static inline int mmcra_must_set_sample(void)
return 0;
}
static void power4_cpu_setup(struct op_counter_config *ctr)
static int power4_cpu_setup(struct op_counter_config *ctr)
{
unsigned int mmcr0 = mmcr0_val;
unsigned long mmcra = mmcra_val;
......@@ -111,9 +113,11 @@ static void power4_cpu_setup(struct op_counter_config *ctr)
mfspr(SPRN_MMCR1));
dbg("setup on cpu %d, mmcra %lx\n", smp_processor_id(),
mfspr(SPRN_MMCRA));
return 0;
}
static void power4_start(struct op_counter_config *ctr)
static int power4_start(struct op_counter_config *ctr)
{
int i;
unsigned int mmcr0;
......@@ -148,6 +152,7 @@ static void power4_start(struct op_counter_config *ctr)
oprofile_running = 1;
dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
return 0;
}
static void power4_stop(void)
......
......@@ -88,7 +88,7 @@ static unsigned long reset_value[OP_MAX_COUNTER];
static int num_counters;
static void rs64_reg_setup(struct op_counter_config *ctr,
static int rs64_reg_setup(struct op_counter_config *ctr,
struct op_system_config *sys,
int num_ctrs)
{
......@@ -100,9 +100,10 @@ static void rs64_reg_setup(struct op_counter_config *ctr,
reset_value[i] = 0x80000000UL - ctr[i].count;
/* XXX setup user and kernel profiling */
return 0;
}
static void rs64_cpu_setup(struct op_counter_config *ctr)
static int rs64_cpu_setup(struct op_counter_config *ctr)
{
unsigned int mmcr0;
......@@ -125,9 +126,11 @@ static void rs64_cpu_setup(struct op_counter_config *ctr)
mfspr(SPRN_MMCR0));
dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(),
mfspr(SPRN_MMCR1));
return 0;
}
static void rs64_start(struct op_counter_config *ctr)
static int rs64_start(struct op_counter_config *ctr)
{
int i;
unsigned int mmcr0;
......@@ -155,6 +158,7 @@ static void rs64_start(struct op_counter_config *ctr)
mtspr(SPRN_MMCR0, mmcr0);
dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
return 0;
}
static void rs64_stop(void)
......
......@@ -272,4 +272,14 @@ config CPM2
you wish to build a kernel for a machine with a CPM2 coprocessor
on it (826x, 827x, 8560).
config AXON_RAM
tristate "Axon DDR2 memory device driver"
depends on PPC_IBM_CELL_BLADE
default m
help
It registers one block device per Axon's DDR2 memory bank found
on a system. Block devices are called axonram?, their major and
minor numbers are available in /proc/devices, /proc/partitions or
in /sys/block/axonram?/dev.
endmenu
......@@ -73,4 +73,14 @@ config CBE_CPUFREQ
For details, take a look at <file:Documentation/cpu-freq/>.
If you don't have such processor, say N
config CBE_CPUFREQ_PMI
tristate "CBE frequency scaling using PMI interface"
depends on CBE_CPUFREQ && PPC_PMI && EXPERIMENTAL
default n
help
Select this, if you want to use the PMI interface
to switch frequencies. Using PMI, the
processor will not only be able to run at lower speed,
but also at lower core voltage.
endmenu
......@@ -4,7 +4,9 @@ obj-$(CONFIG_PPC_CELL_NATIVE) += interrupt.o iommu.o setup.o \
obj-$(CONFIG_CBE_RAS) += ras.o
obj-$(CONFIG_CBE_THERM) += cbe_thermal.o
obj-$(CONFIG_CBE_CPUFREQ) += cbe_cpufreq.o
obj-$(CONFIG_CBE_CPUFREQ_PMI) += cbe_cpufreq_pmi.o
obj-$(CONFIG_CBE_CPUFREQ) += cbe-cpufreq.o
cbe-cpufreq-y += cbe_cpufreq_pervasive.o cbe_cpufreq.o
ifeq ($(CONFIG_SMP),y)
obj-$(CONFIG_PPC_CELL_NATIVE) += smp.o
......@@ -23,3 +25,5 @@ obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \
$(spu-priv1-y) \
$(spu-manage-y) \
spufs/
obj-$(CONFIG_PCI_MSI) += axon_msi.o
/*
* Copyright 2007, Michael Ellerman, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/msi.h>
#include <linux/reboot.h>
#include <asm/dcr.h>
#include <asm/machdep.h>
#include <asm/prom.h>
/*
* MSIC registers, specified as offsets from dcr_base
*/
#define MSIC_CTRL_REG 0x0
/* Base Address registers specify FIFO location in BE memory */
#define MSIC_BASE_ADDR_HI_REG 0x3
#define MSIC_BASE_ADDR_LO_REG 0x4
/* Hold the read/write offsets into the FIFO */
#define MSIC_READ_OFFSET_REG 0x5
#define MSIC_WRITE_OFFSET_REG 0x6
/* MSIC control register flags */
#define MSIC_CTRL_ENABLE 0x0001
#define MSIC_CTRL_FIFO_FULL_ENABLE 0x0002
#define MSIC_CTRL_IRQ_ENABLE 0x0008
#define MSIC_CTRL_FULL_STOP_ENABLE 0x0010
/*
* The MSIC can be configured to use a FIFO of 32KB, 64KB, 128KB or 256KB.
* Currently we're using a 64KB FIFO size.
*/
#define MSIC_FIFO_SIZE_SHIFT 16
#define MSIC_FIFO_SIZE_BYTES (1 << MSIC_FIFO_SIZE_SHIFT)
/*
* To configure the FIFO size as (1 << n) bytes, we write (n - 15) into bits
* 8-9 of the MSIC control reg.
*/
#define MSIC_CTRL_FIFO_SIZE (((MSIC_FIFO_SIZE_SHIFT - 15) << 8) & 0x300)
/*
* We need to mask the read/write offsets to make sure they stay within
* the bounds of the FIFO. Also they should always be 16-byte aligned.
*/
#define MSIC_FIFO_SIZE_MASK ((MSIC_FIFO_SIZE_BYTES - 1) & ~0xFu)
/* Each entry in the FIFO is 16 bytes, the first 4 bytes hold the irq # */
#define MSIC_FIFO_ENTRY_SIZE 0x10
struct axon_msic {
struct device_node *dn;
struct irq_host *irq_host;
__le32 *fifo;
dcr_host_t dcr_host;
struct list_head list;
u32 read_offset;
u32 dcr_base;
};
static LIST_HEAD(axon_msic_list);
static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val)
{
pr_debug("axon_msi: dcr_write(0x%x, 0x%x)\n", val, dcr_n);
dcr_write(msic->dcr_host, msic->dcr_base + dcr_n, val);
}
static u32 msic_dcr_read(struct axon_msic *msic, unsigned int dcr_n)
{
return dcr_read(msic->dcr_host, msic->dcr_base + dcr_n);
}
static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
{
struct axon_msic *msic = get_irq_data(irq);
u32 write_offset, msi;
int idx;
write_offset = msic_dcr_read(msic, MSIC_WRITE_OFFSET_REG);
pr_debug("axon_msi: original write_offset 0x%x\n", write_offset);
/* write_offset doesn't wrap properly, so we have to mask it */
write_offset &= MSIC_FIFO_SIZE_MASK;
while (msic->read_offset != write_offset) {
idx = msic->read_offset / sizeof(__le32);
msi = le32_to_cpu(msic->fifo[idx]);
msi &= 0xFFFF;
pr_debug("axon_msi: woff %x roff %x msi %x\n",
write_offset, msic->read_offset, msi);
msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
msic->read_offset &= MSIC_FIFO_SIZE_MASK;
if (msi < NR_IRQS && irq_map[msi].host == msic->irq_host)
generic_handle_irq(msi);
else
pr_debug("axon_msi: invalid irq 0x%x!\n", msi);
}
desc->chip->eoi(irq);
}
static struct axon_msic *find_msi_translator(struct pci_dev *dev)
{
struct irq_host *irq_host;
struct device_node *dn, *tmp;
const phandle *ph;
struct axon_msic *msic = NULL;
dn = pci_device_to_OF_node(dev);
if (!dn) {
dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
return NULL;
}
for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) {
ph = of_get_property(dn, "msi-translator", NULL);
if (ph)
break;
}
if (!ph) {
dev_dbg(&dev->dev,
"axon_msi: no msi-translator property found\n");
goto out_error;
}
tmp = dn;
dn = of_find_node_by_phandle(*ph);
if (!dn) {
dev_dbg(&dev->dev,
"axon_msi: msi-translator doesn't point to a node\n");
goto out_error;
}
irq_host = irq_find_host(dn);
if (!irq_host) {
dev_dbg(&dev->dev, "axon_msi: no irq_host found for node %s\n",
dn->full_name);
goto out_error;
}
msic = irq_host->host_data;
out_error:
of_node_put(dn);
of_node_put(tmp);
return msic;
}
static int axon_msi_check_device(struct pci_dev *dev, int nvec, int type)
{
if (!find_msi_translator(dev))
return -ENODEV;
return 0;
}
static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg)
{
struct device_node *dn, *tmp;
struct msi_desc *entry;
int len;
const u32 *prop;
dn = pci_device_to_OF_node(dev);
if (!dn) {
dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
return -ENODEV;
}
entry = list_first_entry(&dev->msi_list, struct msi_desc, list);
for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) {
if (entry->msi_attrib.is_64) {
prop = of_get_property(dn, "msi-address-64", &len);
if (prop)
break;
}
prop = of_get_property(dn, "msi-address-32", &len);
if (prop)
break;
}
if (!prop) {
dev_dbg(&dev->dev,
"axon_msi: no msi-address-(32|64) properties found\n");
return -ENOENT;
}
switch (len) {
case 8:
msg->address_hi = prop[0];
msg->address_lo = prop[1];
break;
case 4:
msg->address_hi = 0;
msg->address_lo = prop[0];
break;
default:
dev_dbg(&dev->dev,
"axon_msi: malformed msi-address-(32|64) property\n");
of_node_put(dn);
return -EINVAL;
}
of_node_put(dn);
return 0;
}
static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
unsigned int virq, rc;
struct msi_desc *entry;
struct msi_msg msg;
struct axon_msic *msic;
msic = find_msi_translator(dev);
if (!msic)
return -ENODEV;
rc = setup_msi_msg_address(dev, &msg);
if (rc)
return rc;
/* We rely on being able to stash a virq in a u16 */
BUILD_BUG_ON(NR_IRQS > 65536);
list_for_each_entry(entry, &dev->msi_list, list) {
virq = irq_create_direct_mapping(msic->irq_host);
if (virq == NO_IRQ) {
dev_warn(&dev->dev,
"axon_msi: virq allocation failed!\n");
return -1;
}
dev_dbg(&dev->dev, "axon_msi: allocated virq 0x%x\n", virq);
set_irq_msi(virq, entry);
msg.data = virq;
write_msi_msg(virq, &msg);
}
return 0;
}
static void axon_msi_teardown_msi_irqs(struct pci_dev *dev)
{
struct msi_desc *entry;
dev_dbg(&dev->dev, "axon_msi: tearing down msi irqs\n");
list_for_each_entry(entry, &dev->msi_list, list) {
if (entry->irq == NO_IRQ)
continue;
set_irq_msi(entry->irq, NULL);
irq_dispose_mapping(entry->irq);
}
}
static struct irq_chip msic_irq_chip = {
.mask = mask_msi_irq,
.unmask = unmask_msi_irq,
.shutdown = unmask_msi_irq,
.typename = "AXON-MSI",
};
static int msic_host_map(struct irq_host *h, unsigned int virq,
irq_hw_number_t hw)
{
set_irq_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq);
return 0;
}
static int msic_host_match(struct irq_host *host, struct device_node *dn)
{
struct axon_msic *msic = host->host_data;
return msic->dn == dn;
}
static struct irq_host_ops msic_host_ops = {
.match = msic_host_match,
.map = msic_host_map,
};
static int axon_msi_notify_reboot(struct notifier_block *nb,
unsigned long code, void *data)
{
struct axon_msic *msic;
u32 tmp;
list_for_each_entry(msic, &axon_msic_list, list) {
pr_debug("axon_msi: disabling %s\n", msic->dn->full_name);
tmp = msic_dcr_read(msic, MSIC_CTRL_REG);
tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE;
msic_dcr_write(msic, MSIC_CTRL_REG, tmp);
}
return 0;
}
static struct notifier_block axon_msi_reboot_notifier = {
.notifier_call = axon_msi_notify_reboot
};
static int axon_msi_setup_one(struct device_node *dn)
{
struct page *page;
struct axon_msic *msic;
unsigned int virq;
int dcr_len;
pr_debug("axon_msi: setting up dn %s\n", dn->full_name);
msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL);
if (!msic) {
printk(KERN_ERR "axon_msi: couldn't allocate msic for %s\n",
dn->full_name);
goto out;
}
msic->dcr_base = dcr_resource_start(dn, 0);
dcr_len = dcr_resource_len(dn, 0);
if (msic->dcr_base == 0 || dcr_len == 0) {
printk(KERN_ERR
"axon_msi: couldn't parse dcr properties on %s\n",
dn->full_name);
goto out;
}
msic->dcr_host = dcr_map(dn, msic->dcr_base, dcr_len);
if (!DCR_MAP_OK(msic->dcr_host)) {
printk(KERN_ERR "axon_msi: dcr_map failed for %s\n",
dn->full_name);
goto out_free_msic;
}
page = alloc_pages_node(of_node_to_nid(dn), GFP_KERNEL,
get_order(MSIC_FIFO_SIZE_BYTES));
if (!page) {
printk(KERN_ERR "axon_msi: couldn't allocate fifo for %s\n",
dn->full_name);
goto out_free_msic;
}
msic->fifo = page_address(page);
msic->irq_host = irq_alloc_host(IRQ_HOST_MAP_NOMAP, NR_IRQS,
&msic_host_ops, 0);
if (!msic->irq_host) {
printk(KERN_ERR "axon_msi: couldn't allocate irq_host for %s\n",
dn->full_name);
goto out_free_fifo;
}
msic->irq_host->host_data = msic;
virq = irq_of_parse_and_map(dn, 0);
if (virq == NO_IRQ) {
printk(KERN_ERR "axon_msi: irq parse and map failed for %s\n",
dn->full_name);
goto out_free_host;
}
msic->dn = of_node_get(dn);
set_irq_data(virq, msic);
set_irq_chained_handler(virq, axon_msi_cascade);
pr_debug("axon_msi: irq 0x%x setup for axon_msi\n", virq);
/* Enable the MSIC hardware */
msic_dcr_write(msic, MSIC_BASE_ADDR_HI_REG, (u64)msic->fifo >> 32);
msic_dcr_write(msic, MSIC_BASE_ADDR_LO_REG,
(u64)msic->fifo & 0xFFFFFFFF);
msic_dcr_write(msic, MSIC_CTRL_REG,
MSIC_CTRL_IRQ_ENABLE | MSIC_CTRL_ENABLE |
MSIC_CTRL_FIFO_SIZE);
list_add(&msic->list, &axon_msic_list);
printk(KERN_DEBUG "axon_msi: setup MSIC on %s\n", dn->full_name);
return 0;
out_free_host:
kfree(msic->irq_host);
out_free_fifo:
__free_pages(virt_to_page(msic->fifo), get_order(MSIC_FIFO_SIZE_BYTES));
out_free_msic:
kfree(msic);
out:
return -1;
}
static int axon_msi_init(void)
{
struct device_node *dn;
int found = 0;
pr_debug("axon_msi: initialising ...\n");
for_each_compatible_node(dn, NULL, "ibm,axon-msic") {
if (axon_msi_setup_one(dn) == 0)
found++;
}
if (found) {
ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs;
ppc_md.teardown_msi_irqs = axon_msi_teardown_msi_irqs;
ppc_md.msi_check_device = axon_msi_check_device;
register_reboot_notifier(&axon_msi_reboot_notifier);
pr_debug("axon_msi: registered callbacks!\n");
}
return 0;
}
arch_initcall(axon_msi_init);
/*
* cpufreq driver for the cell processor
*
* (C) Copyright IBM Deutschland Entwicklung GmbH 2005
* (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
*
* Author: Christian Krafft <krafft@de.ibm.com>
*
......@@ -21,18 +21,11 @@
*/
#include <linux/cpufreq.h>
#include <linux/timer.h>
#include <asm/hw_irq.h>
#include <asm/io.h>
#include <asm/machdep.h>
#include <asm/processor.h>
#include <asm/prom.h>
#include <asm/time.h>
#include <asm/pmi.h>
#include <asm/of_platform.h>
#include <asm/prom.h>
#include "cbe_regs.h"
#include "cbe_cpufreq.h"
static DEFINE_MUTEX(cbe_switch_mutex);
......@@ -50,159 +43,24 @@ static struct cpufreq_frequency_table cbe_freqs[] = {
{0, CPUFREQ_TABLE_END},
};
/* to write to MIC register */
static u64 MIC_Slow_Fast_Timer_table[] = {
[0 ... 7] = 0x007fc00000000000ull,
};
/* more values for the MIC */
static u64 MIC_Slow_Next_Timer_table[] = {
0x0000240000000000ull,
0x0000268000000000ull,
0x000029C000000000ull,
0x00002D0000000000ull,
0x0000300000000000ull,
0x0000334000000000ull,
0x000039C000000000ull,
0x00003FC000000000ull,
};
static unsigned int pmi_frequency_limit = 0;
/*
* hardware specific functions
*/
static struct of_device *pmi_dev;
#ifdef CONFIG_PPC_PMI
static int set_pmode_pmi(int cpu, unsigned int pmode)
static int set_pmode(unsigned int cpu, unsigned int slow_mode)
{
int ret;
pmi_message_t pmi_msg;
#ifdef DEBUG
u64 time;
#endif
pmi_msg.type = PMI_TYPE_FREQ_CHANGE;
pmi_msg.data1 = cbe_cpu_to_node(cpu);
pmi_msg.data2 = pmode;
#ifdef DEBUG
time = (u64) get_cycles();
#endif
pmi_send_message(pmi_dev, pmi_msg);
ret = pmi_msg.data2;
pr_debug("PMI returned slow mode %d\n", ret);
#ifdef DEBUG
time = (u64) get_cycles() - time; /* actual cycles (not cpu cycles!) */
time = 1000000000 * time / CLOCK_TICK_RATE; /* time in ns (10^-9) */
pr_debug("had to wait %lu ns for a transition\n", time);
#endif
return ret;
}
#endif
static int get_pmode(int cpu)
{
int ret;
struct cbe_pmd_regs __iomem *pmd_regs;
pmd_regs = cbe_get_cpu_pmd_regs(cpu);
ret = in_be64(&pmd_regs->pmsr) & 0x07;
return ret;
}
static int set_pmode_reg(int cpu, unsigned int pmode)
{
struct cbe_pmd_regs __iomem *pmd_regs;
struct cbe_mic_tm_regs __iomem *mic_tm_regs;
u64 flags;
u64 value;
local_irq_save(flags);
mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu);
pmd_regs = cbe_get_cpu_pmd_regs(cpu);
pr_debug("pm register is mapped at %p\n", &pmd_regs->pmcr);
pr_debug("mic register is mapped at %p\n", &mic_tm_regs->slow_fast_timer_0);
out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]);
out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]);
out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]);
out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]);
value = in_be64(&pmd_regs->pmcr);
/* set bits to zero */
value &= 0xFFFFFFFFFFFFFFF8ull;
/* set bits to next pmode */
value |= pmode;
out_be64(&pmd_regs->pmcr, value);
/* wait until new pmode appears in status register */
value = in_be64(&pmd_regs->pmsr) & 0x07;
while(value != pmode) {
cpu_relax();
value = in_be64(&pmd_regs->pmsr) & 0x07;
}
local_irq_restore(flags);
return 0;
}
int rc;
static int set_pmode(int cpu, unsigned int slow_mode) {
#ifdef CONFIG_PPC_PMI
if (pmi_dev)
return set_pmode_pmi(cpu, slow_mode);
if (cbe_cpufreq_has_pmi)
rc = cbe_cpufreq_set_pmode_pmi(cpu, slow_mode);
else
#endif
return set_pmode_reg(cpu, slow_mode);
}
rc = cbe_cpufreq_set_pmode(cpu, slow_mode);
static void cbe_cpufreq_handle_pmi(struct of_device *dev, pmi_message_t pmi_msg)
{
u8 cpu;
u8 cbe_pmode_new;
BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE);
cpu = cbe_node_to_cpu(pmi_msg.data1);
cbe_pmode_new = pmi_msg.data2;
pr_debug("register contains slow mode %d\n", cbe_cpufreq_get_pmode(cpu));
pmi_frequency_limit = cbe_freqs[cbe_pmode_new].frequency;
pr_debug("cbe_handle_pmi: max freq=%d\n", pmi_frequency_limit);
}
static int pmi_notifier(struct notifier_block *nb,
unsigned long event, void *data)
{
struct cpufreq_policy *policy = data;
if (event != CPUFREQ_INCOMPATIBLE)
return 0;
cpufreq_verify_within_limits(policy, 0, pmi_frequency_limit);
return 0;
return rc;
}
static struct notifier_block pmi_notifier_block = {
.notifier_call = pmi_notifier,
};
static struct pmi_handler cbe_pmi_handler = {
.type = PMI_TYPE_FREQ_CHANGE,
.handle_pmi_message = cbe_cpufreq_handle_pmi,
};
/*
* cpufreq functions
*/
......@@ -221,8 +79,19 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
pr_debug("init cpufreq on CPU %d\n", policy->cpu);
/*
* Let's check we can actually get to the CELL regs
*/
if (!cbe_get_cpu_pmd_regs(policy->cpu) ||
!cbe_get_cpu_mic_tm_regs(policy->cpu)) {
pr_info("invalid CBE regs pointers for cpufreq\n");
return -EINVAL;
}
max_freqp = of_get_property(cpu, "clock-frequency", NULL);
of_node_put(cpu);
if (!max_freqp)
return -EINVAL;
......@@ -239,10 +108,12 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
}
policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
/* if DEBUG is enabled set_pmode() measures the correct latency of a transition */
/* if DEBUG is enabled set_pmode() measures the latency
* of a transition */
policy->cpuinfo.transition_latency = 25000;
cur_pmode = get_pmode(policy->cpu);
cur_pmode = cbe_cpufreq_get_pmode(policy->cpu);
pr_debug("current pmode is at %d\n",cur_pmode);
policy->cur = cbe_freqs[cur_pmode].frequency;
......@@ -253,21 +124,13 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
cpufreq_frequency_table_get_attr(cbe_freqs, policy->cpu);
if (pmi_dev) {
/* frequency might get limited later, initialize limit with max_freq */
pmi_frequency_limit = max_freq;
cpufreq_register_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
}
/* this ensures that policy->cpuinfo_min and policy->cpuinfo_max are set correctly */
/* this ensures that policy->cpuinfo_min
* and policy->cpuinfo_max are set correctly */
return cpufreq_frequency_table_cpuinfo(policy, cbe_freqs);
}
static int cbe_cpufreq_cpu_exit(struct cpufreq_policy *policy)
{
if (pmi_dev)
cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
cpufreq_frequency_table_put_attr(policy->cpu);
return 0;
}
......@@ -277,13 +140,13 @@ static int cbe_cpufreq_verify(struct cpufreq_policy *policy)
return cpufreq_frequency_table_verify(policy, cbe_freqs);
}
static int cbe_cpufreq_target(struct cpufreq_policy *policy, unsigned int target_freq,
static int cbe_cpufreq_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
{
int rc;
struct cpufreq_freqs freqs;
int cbe_pmode_new;
unsigned int cbe_pmode_new;
cpufreq_frequency_table_target(policy,
cbe_freqs,
......@@ -298,12 +161,14 @@ static int cbe_cpufreq_target(struct cpufreq_policy *policy, unsigned int target
mutex_lock(&cbe_switch_mutex);
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
pr_debug("setting frequency for cpu %d to %d kHz, 1/%d of max frequency\n",
pr_debug("setting frequency for cpu %d to %d kHz, " \
"1/%d of max frequency\n",
policy->cpu,
cbe_freqs[cbe_pmode_new].frequency,
cbe_freqs[cbe_pmode_new].index);
rc = set_pmode(policy->cpu, cbe_pmode_new);
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
mutex_unlock(&cbe_switch_mutex);
......@@ -326,28 +191,14 @@ static struct cpufreq_driver cbe_cpufreq_driver = {
static int __init cbe_cpufreq_init(void)
{
#ifdef CONFIG_PPC_PMI
struct device_node *np;
#endif
if (!machine_is(cell))
return -ENODEV;
#ifdef CONFIG_PPC_PMI
np = of_find_node_by_type(NULL, "ibm,pmi");
pmi_dev = of_find_device_by_node(np);
if (pmi_dev)
pmi_register_handler(pmi_dev, &cbe_pmi_handler);
#endif
return cpufreq_register_driver(&cbe_cpufreq_driver);
}
static void __exit cbe_cpufreq_exit(void)
{
#ifdef CONFIG_PPC_PMI
if (pmi_dev)
pmi_unregister_handler(pmi_dev, &cbe_pmi_handler);
#endif
cpufreq_unregister_driver(&cbe_cpufreq_driver);
}
......
/*
* cbe_cpufreq.h
*
* This file contains the definitions used by the cbe_cpufreq driver.
*
* (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
*
* Author: Christian Krafft <krafft@de.ibm.com>
*
*/
#include <linux/cpufreq.h>
#include <linux/types.h>
int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode);
int cbe_cpufreq_get_pmode(int cpu);
int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode);
#if defined(CONFIG_CBE_CPUFREQ_PMI) || defined(CONFIG_CBE_CPUFREQ_PMI_MODULE)
extern bool cbe_cpufreq_has_pmi;
#else
#define cbe_cpufreq_has_pmi (0)
#endif
/*
* pervasive backend for the cbe_cpufreq driver
*
* This driver makes use of the pervasive unit to
* engage the desired frequency.
*
* (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
*
* Author: Christian Krafft <krafft@de.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/time.h>
#include <asm/machdep.h>
#include <asm/hw_irq.h>
#include "cbe_regs.h"
#include "cbe_cpufreq.h"
/* to write to MIC register */
static u64 MIC_Slow_Fast_Timer_table[] = {
[0 ... 7] = 0x007fc00000000000ull,
};
/* more values for the MIC */
static u64 MIC_Slow_Next_Timer_table[] = {
0x0000240000000000ull,
0x0000268000000000ull,
0x000029C000000000ull,
0x00002D0000000000ull,
0x0000300000000000ull,
0x0000334000000000ull,
0x000039C000000000ull,
0x00003FC000000000ull,
};
int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode)
{
struct cbe_pmd_regs __iomem *pmd_regs;
struct cbe_mic_tm_regs __iomem *mic_tm_regs;
u64 flags;
u64 value;
#ifdef DEBUG
long time;
#endif
local_irq_save(flags);
mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu);
pmd_regs = cbe_get_cpu_pmd_regs(cpu);
#ifdef DEBUG
time = jiffies;
#endif
out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]);
out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]);
out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]);
out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]);
value = in_be64(&pmd_regs->pmcr);
/* set bits to zero */
value &= 0xFFFFFFFFFFFFFFF8ull;
/* set bits to next pmode */
value |= pmode;
out_be64(&pmd_regs->pmcr, value);
#ifdef DEBUG
/* wait until new pmode appears in status register */
value = in_be64(&pmd_regs->pmsr) & 0x07;
while (value != pmode) {
cpu_relax();
value = in_be64(&pmd_regs->pmsr) & 0x07;
}
time = jiffies - time;
time = jiffies_to_msecs(time);
pr_debug("had to wait %lu ms for a transition using " \
"pervasive unit\n", time);
#endif
local_irq_restore(flags);
return 0;
}
int cbe_cpufreq_get_pmode(int cpu)
{
int ret;
struct cbe_pmd_regs __iomem *pmd_regs;
pmd_regs = cbe_get_cpu_pmd_regs(cpu);
ret = in_be64(&pmd_regs->pmsr) & 0x07;
return ret;
}
/*
* pmi backend for the cbe_cpufreq driver
*
* (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
*
* Author: Christian Krafft <krafft@de.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/timer.h>
#include <asm/of_platform.h>
#include <asm/processor.h>
#include <asm/prom.h>
#include <asm/pmi.h>
#ifdef DEBUG
#include <asm/time.h>
#endif
#include "cbe_regs.h"
#include "cbe_cpufreq.h"
static u8 pmi_slow_mode_limit[MAX_CBE];
bool cbe_cpufreq_has_pmi = false;
EXPORT_SYMBOL_GPL(cbe_cpufreq_has_pmi);
/*
* hardware specific functions
*/
int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode)
{
int ret;
pmi_message_t pmi_msg;
#ifdef DEBUG
long time;
#endif
pmi_msg.type = PMI_TYPE_FREQ_CHANGE;
pmi_msg.data1 = cbe_cpu_to_node(cpu);
pmi_msg.data2 = pmode;
#ifdef DEBUG
time = jiffies;
#endif
pmi_send_message(pmi_msg);
#ifdef DEBUG
time = jiffies - time;
time = jiffies_to_msecs(time);
pr_debug("had to wait %lu ms for a transition using " \
"PMI\n", time);
#endif
ret = pmi_msg.data2;
pr_debug("PMI returned slow mode %d\n", ret);
return ret;
}
EXPORT_SYMBOL_GPL(cbe_cpufreq_set_pmode_pmi);
static void cbe_cpufreq_handle_pmi(pmi_message_t pmi_msg)
{
u8 node, slow_mode;
BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE);
node = pmi_msg.data1;
slow_mode = pmi_msg.data2;
pmi_slow_mode_limit[node] = slow_mode;
pr_debug("cbe_handle_pmi: node: %d max_freq: %d\n", node, slow_mode);
}
static int pmi_notifier(struct notifier_block *nb,
unsigned long event, void *data)
{
struct cpufreq_policy *policy = data;
struct cpufreq_frequency_table *cbe_freqs;
u8 node;
cbe_freqs = cpufreq_frequency_get_table(policy->cpu);
node = cbe_cpu_to_node(policy->cpu);
pr_debug("got notified, event=%lu, node=%u\n", event, node);
if (pmi_slow_mode_limit[node] != 0) {
pr_debug("limiting node %d to slow mode %d\n",
node, pmi_slow_mode_limit[node]);
cpufreq_verify_within_limits(policy, 0,
cbe_freqs[pmi_slow_mode_limit[node]].frequency);
}
return 0;
}
static struct notifier_block pmi_notifier_block = {
.notifier_call = pmi_notifier,
};
static struct pmi_handler cbe_pmi_handler = {
.type = PMI_TYPE_FREQ_CHANGE,
.handle_pmi_message = cbe_cpufreq_handle_pmi,
};
static int __init cbe_cpufreq_pmi_init(void)
{
cbe_cpufreq_has_pmi = pmi_register_handler(&cbe_pmi_handler) == 0;
if (!cbe_cpufreq_has_pmi)
return -ENODEV;
cpufreq_register_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
return 0;
}
static void __exit cbe_cpufreq_pmi_exit(void)
{
cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
pmi_unregister_handler(&cbe_pmi_handler);
}
module_init(cbe_cpufreq_pmi_init);
module_exit(cbe_cpufreq_pmi_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
......@@ -174,6 +174,13 @@ static struct device_node *cbe_get_be_node(int cpu_id)
cpu_handle = of_get_property(np, "cpus", &len);
/*
* the CAB SLOF tree is non compliant, so we just assume
* there is only one node
*/
if (WARN_ON_ONCE(!cpu_handle))
return np;
for (i=0; i<len; i++)
if (of_find_node_by_phandle(cpu_handle[i]) == of_get_cpu_node(cpu_id, NULL))
return np;
......
......@@ -292,7 +292,7 @@ static struct attribute_group ppe_attribute_group = {
/*
* initialize throttling with default values
*/
static void __init init_default_values(void)
static int __init init_default_values(void)
{
int cpu;
struct cbe_pmd_regs __iomem *pmd_regs;
......@@ -339,25 +339,40 @@ static void __init init_default_values(void)
for_each_possible_cpu (cpu) {
pr_debug("processing cpu %d\n", cpu);
sysdev = get_cpu_sysdev(cpu);
if (!sysdev) {
pr_info("invalid sysdev pointer for cbe_thermal\n");
return -EINVAL;
}
pmd_regs = cbe_get_cpu_pmd_regs(sysdev->id);
if (!pmd_regs) {
pr_info("invalid CBE regs pointer for cbe_thermal\n");
return -EINVAL;
}
out_be64(&pmd_regs->tm_str2, str2);
out_be64(&pmd_regs->tm_str1.val, str1.val);
out_be64(&pmd_regs->tm_tpr.val, tpr.val);
out_be64(&pmd_regs->tm_cr1.val, cr1.val);
out_be64(&pmd_regs->tm_cr2, cr2);
}
return 0;
}
static int __init thermal_init(void)
{
init_default_values();
int rc = init_default_values();
if (rc == 0) {
spu_add_sysdev_attr_group(&spu_attribute_group);
cpu_add_sysdev_attr_group(&ppe_attribute_group);
}
return 0;
return rc;
}
module_init(thermal_init);
......
......@@ -35,18 +35,37 @@
#include <asm/spu.h>
#include <asm/spu_priv1.h>
#include <asm/xmon.h>
#include <asm/prom.h>
#include "spu_priv1_mmio.h"
const struct spu_management_ops *spu_management_ops;
EXPORT_SYMBOL_GPL(spu_management_ops);
const struct spu_priv1_ops *spu_priv1_ops;
EXPORT_SYMBOL_GPL(spu_priv1_ops);
static struct list_head spu_list[MAX_NUMNODES];
static LIST_HEAD(spu_full_list);
static DEFINE_MUTEX(spu_mutex);
static DEFINE_SPINLOCK(spu_list_lock);
struct cbe_spu_info cbe_spu_info[MAX_NUMNODES];
EXPORT_SYMBOL_GPL(cbe_spu_info);
EXPORT_SYMBOL_GPL(spu_priv1_ops);
/*
* Protects cbe_spu_info and spu->number.
*/
static DEFINE_SPINLOCK(spu_lock);
/*
* List of all spus in the system.
*
* This list is iterated by callers from irq context and callers that
* want to sleep. Thus modifications need to be done with both
* spu_full_list_lock and spu_full_list_mutex held, while iterating
* through it requires either of these locks.
*
* In addition spu_full_list_lock protects all assignmens to
* spu->mm.
*/
static LIST_HEAD(spu_full_list);
static DEFINE_SPINLOCK(spu_full_list_lock);
static DEFINE_MUTEX(spu_full_list_mutex);
void spu_invalidate_slbs(struct spu *spu)
{
......@@ -65,12 +84,12 @@ void spu_flush_all_slbs(struct mm_struct *mm)
struct spu *spu;
unsigned long flags;
spin_lock_irqsave(&spu_list_lock, flags);
spin_lock_irqsave(&spu_full_list_lock, flags);
list_for_each_entry(spu, &spu_full_list, full_list) {
if (spu->mm == mm)
spu_invalidate_slbs(spu);
}
spin_unlock_irqrestore(&spu_list_lock, flags);
spin_unlock_irqrestore(&spu_full_list_lock, flags);
}
/* The hack below stinks... try to do something better one of
......@@ -88,9 +107,9 @@ void spu_associate_mm(struct spu *spu, struct mm_struct *mm)
{
unsigned long flags;
spin_lock_irqsave(&spu_list_lock, flags);
spin_lock_irqsave(&spu_full_list_lock, flags);
spu->mm = mm;
spin_unlock_irqrestore(&spu_list_lock, flags);
spin_unlock_irqrestore(&spu_full_list_lock, flags);
if (mm)
mm_needs_global_tlbie(mm);
}
......@@ -390,7 +409,7 @@ static void spu_free_irqs(struct spu *spu)
free_irq(spu->irqs[2], spu);
}
static void spu_init_channels(struct spu *spu)
void spu_init_channels(struct spu *spu)
{
static const struct {
unsigned channel;
......@@ -423,46 +442,7 @@ static void spu_init_channels(struct spu *spu)
out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count);
}
}
struct spu *spu_alloc_node(int node)
{
struct spu *spu = NULL;
mutex_lock(&spu_mutex);
if (!list_empty(&spu_list[node])) {
spu = list_entry(spu_list[node].next, struct spu, list);
list_del_init(&spu->list);
pr_debug("Got SPU %d %d\n", spu->number, spu->node);
}
mutex_unlock(&spu_mutex);
if (spu)
spu_init_channels(spu);
return spu;
}
EXPORT_SYMBOL_GPL(spu_alloc_node);
struct spu *spu_alloc(void)
{
struct spu *spu = NULL;
int node;
for (node = 0; node < MAX_NUMNODES; node++) {
spu = spu_alloc_node(node);
if (spu)
break;
}
return spu;
}
void spu_free(struct spu *spu)
{
mutex_lock(&spu_mutex);
list_add_tail(&spu->list, &spu_list[spu->node]);
mutex_unlock(&spu_mutex);
}
EXPORT_SYMBOL_GPL(spu_free);
EXPORT_SYMBOL_GPL(spu_init_channels);
static int spu_shutdown(struct sys_device *sysdev)
{
......@@ -481,12 +461,12 @@ struct sysdev_class spu_sysdev_class = {
int spu_add_sysdev_attr(struct sysdev_attribute *attr)
{
struct spu *spu;
mutex_lock(&spu_mutex);
mutex_lock(&spu_full_list_mutex);
list_for_each_entry(spu, &spu_full_list, full_list)
sysdev_create_file(&spu->sysdev, attr);
mutex_unlock(&spu_full_list_mutex);
mutex_unlock(&spu_mutex);
return 0;
}
EXPORT_SYMBOL_GPL(spu_add_sysdev_attr);
......@@ -494,12 +474,12 @@ EXPORT_SYMBOL_GPL(spu_add_sysdev_attr);
int spu_add_sysdev_attr_group(struct attribute_group *attrs)
{
struct spu *spu;
mutex_lock(&spu_mutex);
mutex_lock(&spu_full_list_mutex);
list_for_each_entry(spu, &spu_full_list, full_list)
sysfs_create_group(&spu->sysdev.kobj, attrs);
mutex_unlock(&spu_full_list_mutex);
mutex_unlock(&spu_mutex);
return 0;
}
EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group);
......@@ -508,24 +488,22 @@ EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group);
void spu_remove_sysdev_attr(struct sysdev_attribute *attr)
{
struct spu *spu;
mutex_lock(&spu_mutex);
mutex_lock(&spu_full_list_mutex);
list_for_each_entry(spu, &spu_full_list, full_list)
sysdev_remove_file(&spu->sysdev, attr);
mutex_unlock(&spu_mutex);
mutex_unlock(&spu_full_list_mutex);
}
EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr);
void spu_remove_sysdev_attr_group(struct attribute_group *attrs)
{
struct spu *spu;
mutex_lock(&spu_mutex);
mutex_lock(&spu_full_list_mutex);
list_for_each_entry(spu, &spu_full_list, full_list)
sysfs_remove_group(&spu->sysdev.kobj, attrs);
mutex_unlock(&spu_mutex);
mutex_unlock(&spu_full_list_mutex);
}
EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr_group);
......@@ -553,16 +531,19 @@ static int __init create_spu(void *data)
int ret;
static int number;
unsigned long flags;
struct timespec ts;
ret = -ENOMEM;
spu = kzalloc(sizeof (*spu), GFP_KERNEL);
if (!spu)
goto out;
spu->alloc_state = SPU_FREE;
spin_lock_init(&spu->register_lock);
mutex_lock(&spu_mutex);
spin_lock(&spu_lock);
spu->number = number++;
mutex_unlock(&spu_mutex);
spin_unlock(&spu_lock);
ret = spu_create_spu(spu, data);
......@@ -579,15 +560,22 @@ static int __init create_spu(void *data)
if (ret)
goto out_free_irqs;
mutex_lock(&spu_mutex);
spin_lock_irqsave(&spu_list_lock, flags);
list_add(&spu->list, &spu_list[spu->node]);
mutex_lock(&cbe_spu_info[spu->node].list_mutex);
list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus);
cbe_spu_info[spu->node].n_spus++;
mutex_unlock(&cbe_spu_info[spu->node].list_mutex);
mutex_lock(&spu_full_list_mutex);
spin_lock_irqsave(&spu_full_list_lock, flags);
list_add(&spu->full_list, &spu_full_list);
spin_unlock_irqrestore(&spu_list_lock, flags);
mutex_unlock(&spu_mutex);
spin_unlock_irqrestore(&spu_full_list_lock, flags);
mutex_unlock(&spu_full_list_mutex);
spu->stats.util_state = SPU_UTIL_IDLE_LOADED;
ktime_get_ts(&ts);
spu->stats.tstamp = timespec_to_ns(&ts);
spu->stats.utilization_state = SPU_UTIL_IDLE;
spu->stats.tstamp = jiffies;
INIT_LIST_HEAD(&spu->aff_list);
goto out;
......@@ -608,12 +596,20 @@ static const char *spu_state_names[] = {
static unsigned long long spu_acct_time(struct spu *spu,
enum spu_utilization_state state)
{
struct timespec ts;
unsigned long long time = spu->stats.times[state];
if (spu->stats.utilization_state == state)
time += jiffies - spu->stats.tstamp;
/*
* If the spu is idle or the context is stopped, utilization
* statistics are not updated. Apply the time delta from the
* last recorded state of the spu.
*/
if (spu->stats.util_state == state) {
ktime_get_ts(&ts);
time += timespec_to_ns(&ts) - spu->stats.tstamp;
}
return jiffies_to_msecs(time);
return time / NSEC_PER_MSEC;
}
......@@ -623,11 +619,11 @@ static ssize_t spu_stat_show(struct sys_device *sysdev, char *buf)
return sprintf(buf, "%s %llu %llu %llu %llu "
"%llu %llu %llu %llu %llu %llu %llu %llu\n",
spu_state_names[spu->stats.utilization_state],
spu_state_names[spu->stats.util_state],
spu_acct_time(spu, SPU_UTIL_USER),
spu_acct_time(spu, SPU_UTIL_SYSTEM),
spu_acct_time(spu, SPU_UTIL_IOWAIT),
spu_acct_time(spu, SPU_UTIL_IDLE),
spu_acct_time(spu, SPU_UTIL_IDLE_LOADED),
spu->stats.vol_ctx_switch,
spu->stats.invol_ctx_switch,
spu->stats.slb_flt,
......@@ -640,12 +636,146 @@ static ssize_t spu_stat_show(struct sys_device *sysdev, char *buf)
static SYSDEV_ATTR(stat, 0644, spu_stat_show, NULL);
/* Hardcoded affinity idxs for QS20 */
#define SPES_PER_BE 8
static int QS20_reg_idxs[SPES_PER_BE] = { 0, 2, 4, 6, 7, 5, 3, 1 };
static int QS20_reg_memory[SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 };
static struct spu *spu_lookup_reg(int node, u32 reg)
{
struct spu *spu;
list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
if (*(u32 *)get_property(spu_devnode(spu), "reg", NULL) == reg)
return spu;
}
return NULL;
}
static void init_aff_QS20_harcoded(void)
{
int node, i;
struct spu *last_spu, *spu;
u32 reg;
for (node = 0; node < MAX_NUMNODES; node++) {
last_spu = NULL;
for (i = 0; i < SPES_PER_BE; i++) {
reg = QS20_reg_idxs[i];
spu = spu_lookup_reg(node, reg);
if (!spu)
continue;
spu->has_mem_affinity = QS20_reg_memory[reg];
if (last_spu)
list_add_tail(&spu->aff_list,
&last_spu->aff_list);
last_spu = spu;
}
}
}
static int of_has_vicinity(void)
{
struct spu* spu;
spu = list_entry(cbe_spu_info[0].spus.next, struct spu, cbe_list);
return of_find_property(spu_devnode(spu), "vicinity", NULL) != NULL;
}
static struct spu *aff_devnode_spu(int cbe, struct device_node *dn)
{
struct spu *spu;
list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list)
if (spu_devnode(spu) == dn)
return spu;
return NULL;
}
static struct spu *
aff_node_next_to(int cbe, struct device_node *target, struct device_node *avoid)
{
struct spu *spu;
const phandle *vic_handles;
int lenp, i;
list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) {
if (spu_devnode(spu) == avoid)
continue;
vic_handles = get_property(spu_devnode(spu), "vicinity", &lenp);
for (i=0; i < (lenp / sizeof(phandle)); i++) {
if (vic_handles[i] == target->linux_phandle)
return spu;
}
}
return NULL;
}
static void init_aff_fw_vicinity_node(int cbe)
{
struct spu *spu, *last_spu;
struct device_node *vic_dn, *last_spu_dn;
phandle avoid_ph;
const phandle *vic_handles;
const char *name;
int lenp, i, added, mem_aff;
last_spu = list_entry(cbe_spu_info[cbe].spus.next, struct spu, cbe_list);
avoid_ph = 0;
for (added = 1; added < cbe_spu_info[cbe].n_spus; added++) {
last_spu_dn = spu_devnode(last_spu);
vic_handles = get_property(last_spu_dn, "vicinity", &lenp);
for (i = 0; i < (lenp / sizeof(phandle)); i++) {
if (vic_handles[i] == avoid_ph)
continue;
vic_dn = of_find_node_by_phandle(vic_handles[i]);
if (!vic_dn)
continue;
name = get_property(vic_dn, "name", NULL);
if (strcmp(name, "spe") == 0) {
spu = aff_devnode_spu(cbe, vic_dn);
avoid_ph = last_spu_dn->linux_phandle;
}
else {
mem_aff = strcmp(name, "mic-tm") == 0;
spu = aff_node_next_to(cbe, vic_dn, last_spu_dn);
if (!spu)
continue;
if (mem_aff) {
last_spu->has_mem_affinity = 1;
spu->has_mem_affinity = 1;
}
avoid_ph = vic_dn->linux_phandle;
}
list_add_tail(&spu->aff_list, &last_spu->aff_list);
last_spu = spu;
break;
}
}
}
static void init_aff_fw_vicinity(void)
{
int cbe;
/* sets has_mem_affinity for each spu, as long as the
* spu->aff_list list, linking each spu to its neighbors
*/
for (cbe = 0; cbe < MAX_NUMNODES; cbe++)
init_aff_fw_vicinity_node(cbe);
}
static int __init init_spu_base(void)
{
int i, ret = 0;
for (i = 0; i < MAX_NUMNODES; i++)
INIT_LIST_HEAD(&spu_list[i]);
for (i = 0; i < MAX_NUMNODES; i++) {
mutex_init(&cbe_spu_info[i].list_mutex);
INIT_LIST_HEAD(&cbe_spu_info[i].spus);
}
if (!spu_management_ops)
goto out;
......@@ -675,16 +805,25 @@ static int __init init_spu_base(void)
fb_append_extra_logo(&logo_spe_clut224, ret);
}
mutex_lock(&spu_full_list_mutex);
xmon_register_spus(&spu_full_list);
crash_register_spus(&spu_full_list);
mutex_unlock(&spu_full_list_mutex);
spu_add_sysdev_attr(&attr_stat);
if (of_has_vicinity()) {
init_aff_fw_vicinity();
} else {
long root = of_get_flat_dt_root();
if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
init_aff_QS20_harcoded();
}
return 0;
out_unregister_sysdev_class:
sysdev_class_unregister(&spu_sysdev_class);
out:
return ret;
}
module_init(init_spu_base);
......
......@@ -34,14 +34,27 @@ struct spufs_calls spufs_calls = {
* this file is not used and the syscalls directly enter the fs code */
asmlinkage long sys_spu_create(const char __user *name,
unsigned int flags, mode_t mode)
unsigned int flags, mode_t mode, int neighbor_fd)
{
long ret;
struct module *owner = spufs_calls.owner;
struct file *neighbor;
int fput_needed;
ret = -ENOSYS;
if (owner && try_module_get(owner)) {
ret = spufs_calls.create_thread(name, flags, mode);
if (flags & SPU_CREATE_AFFINITY_SPU) {
neighbor = fget_light(neighbor_fd, &fput_needed);
if (neighbor) {
ret = spufs_calls.create_thread(name, flags,
mode, neighbor);
fput_light(neighbor, fput_needed);
}
}
else {
ret = spufs_calls.create_thread(name, flags,
mode, NULL);
}
module_put(owner);
}
return ret;
......
......@@ -22,6 +22,7 @@
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <asm/atomic.h>
#include <asm/spu.h>
......@@ -55,12 +56,12 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang)
ctx->ops = &spu_backing_ops;
ctx->owner = get_task_mm(current);
INIT_LIST_HEAD(&ctx->rq);
INIT_LIST_HEAD(&ctx->aff_list);
if (gang)
spu_gang_add_ctx(gang, ctx);
ctx->cpus_allowed = current->cpus_allowed;
spu_set_timeslice(ctx);
ctx->stats.execution_state = SPUCTX_UTIL_USER;
ctx->stats.tstamp = jiffies;
ctx->stats.util_state = SPU_UTIL_IDLE_LOADED;
atomic_inc(&nr_spu_contexts);
goto out;
......@@ -81,6 +82,8 @@ void destroy_spu_context(struct kref *kref)
spu_fini_csa(&ctx->csa);
if (ctx->gang)
spu_gang_remove_ctx(ctx->gang, ctx);
if (ctx->prof_priv_kref)
kref_put(ctx->prof_priv_kref, ctx->prof_priv_release);
BUG_ON(!list_empty(&ctx->rq));
atomic_dec(&nr_spu_contexts);
kfree(ctx);
......@@ -166,6 +169,39 @@ int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags)
void spu_acquire_saved(struct spu_context *ctx)
{
spu_acquire(ctx);
if (ctx->state != SPU_STATE_SAVED)
if (ctx->state != SPU_STATE_SAVED) {
set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags);
spu_deactivate(ctx);
}
}
/**
* spu_release_saved - unlock spu context and return it to the runqueue
* @ctx: context to unlock
*/
void spu_release_saved(struct spu_context *ctx)
{
BUG_ON(ctx->state != SPU_STATE_SAVED);
if (test_and_clear_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags))
spu_activate(ctx, 0);
spu_release(ctx);
}
void spu_set_profile_private_kref(struct spu_context *ctx,
struct kref *prof_info_kref,
void ( * prof_info_release) (struct kref *kref))
{
ctx->prof_priv_kref = prof_info_kref;
ctx->prof_priv_release = prof_info_release;
}
EXPORT_SYMBOL_GPL(spu_set_profile_private_kref);
void *spu_get_profile_private_kref(struct spu_context *ctx)
{
return ctx->prof_priv_kref;
}
EXPORT_SYMBOL_GPL(spu_get_profile_private_kref);
......@@ -226,7 +226,7 @@ static void spufs_arch_write_notes(struct file *file)
spu_acquire_saved(ctx_info->ctx);
for (j = 0; j < spufs_coredump_num_notes; j++)
spufs_arch_write_note(ctx_info, j, file);
spu_release(ctx_info->ctx);
spu_release_saved(ctx_info->ctx);
list_del(&ctx_info->list);
kfree(ctx_info);
}
......
......@@ -179,16 +179,14 @@ int spufs_handle_class1(struct spu_context *ctx)
if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)))
return 0;
spuctx_switch_state(ctx, SPUCTX_UTIL_IOWAIT);
spuctx_switch_state(ctx, SPU_UTIL_IOWAIT);
pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea,
dsisr, ctx->state);
ctx->stats.hash_flt++;
if (ctx->state == SPU_STATE_RUNNABLE) {
if (ctx->state == SPU_STATE_RUNNABLE)
ctx->spu->stats.hash_flt++;
spu_switch_state(ctx->spu, SPU_UTIL_IOWAIT);
}
/* we must not hold the lock when entering spu_handle_mm_fault */
spu_release(ctx);
......@@ -226,7 +224,7 @@ int spufs_handle_class1(struct spu_context *ctx)
} else
spufs_handle_dma_error(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE);
spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM);
spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
return ret;
}
EXPORT_SYMBOL_GPL(spufs_handle_class1);
......@@ -370,7 +370,7 @@ spufs_regs_read(struct file *file, char __user *buffer,
spu_acquire_saved(ctx);
ret = __spufs_regs_read(ctx, buffer, size, pos);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -392,7 +392,7 @@ spufs_regs_write(struct file *file, const char __user *buffer,
ret = copy_from_user(lscsa->gprs + *pos - size,
buffer, size) ? -EFAULT : size;
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -421,7 +421,7 @@ spufs_fpcr_read(struct file *file, char __user * buffer,
spu_acquire_saved(ctx);
ret = __spufs_fpcr_read(ctx, buffer, size, pos);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -443,7 +443,7 @@ spufs_fpcr_write(struct file *file, const char __user * buffer,
ret = copy_from_user((char *)&lscsa->fpcr + *pos - size,
buffer, size) ? -EFAULT : size;
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -868,7 +868,7 @@ static ssize_t spufs_signal1_read(struct file *file, char __user *buf,
spu_acquire_saved(ctx);
ret = __spufs_signal1_read(ctx, buf, len, pos);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -934,6 +934,13 @@ static const struct file_operations spufs_signal1_fops = {
.mmap = spufs_signal1_mmap,
};
static const struct file_operations spufs_signal1_nosched_fops = {
.open = spufs_signal1_open,
.release = spufs_signal1_release,
.write = spufs_signal1_write,
.mmap = spufs_signal1_mmap,
};
static int spufs_signal2_open(struct inode *inode, struct file *file)
{
struct spufs_inode_info *i = SPUFS_I(inode);
......@@ -992,7 +999,7 @@ static ssize_t spufs_signal2_read(struct file *file, char __user *buf,
spu_acquire_saved(ctx);
ret = __spufs_signal2_read(ctx, buf, len, pos);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -1062,6 +1069,13 @@ static const struct file_operations spufs_signal2_fops = {
.mmap = spufs_signal2_mmap,
};
static const struct file_operations spufs_signal2_nosched_fops = {
.open = spufs_signal2_open,
.release = spufs_signal2_release,
.write = spufs_signal2_write,
.mmap = spufs_signal2_mmap,
};
static void spufs_signal1_type_set(void *data, u64 val)
{
struct spu_context *ctx = data;
......@@ -1612,7 +1626,7 @@ static void spufs_decr_set(void *data, u64 val)
struct spu_lscsa *lscsa = ctx->csa.lscsa;
spu_acquire_saved(ctx);
lscsa->decr.slot[0] = (u32) val;
spu_release(ctx);
spu_release_saved(ctx);
}
static u64 __spufs_decr_get(void *data)
......@@ -1628,7 +1642,7 @@ static u64 spufs_decr_get(void *data)
u64 ret;
spu_acquire_saved(ctx);
ret = __spufs_decr_get(data);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set,
......@@ -1637,17 +1651,21 @@ DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set,
static void spufs_decr_status_set(void *data, u64 val)
{
struct spu_context *ctx = data;
struct spu_lscsa *lscsa = ctx->csa.lscsa;
spu_acquire_saved(ctx);
lscsa->decr_status.slot[0] = (u32) val;
spu_release(ctx);
if (val)
ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING;
else
ctx->csa.priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING;
spu_release_saved(ctx);
}
static u64 __spufs_decr_status_get(void *data)
{
struct spu_context *ctx = data;
struct spu_lscsa *lscsa = ctx->csa.lscsa;
return lscsa->decr_status.slot[0];
if (ctx->csa.priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING)
return SPU_DECR_STATUS_RUNNING;
else
return 0;
}
static u64 spufs_decr_status_get(void *data)
......@@ -1656,7 +1674,7 @@ static u64 spufs_decr_status_get(void *data)
u64 ret;
spu_acquire_saved(ctx);
ret = __spufs_decr_status_get(data);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get,
......@@ -1668,7 +1686,7 @@ static void spufs_event_mask_set(void *data, u64 val)
struct spu_lscsa *lscsa = ctx->csa.lscsa;
spu_acquire_saved(ctx);
lscsa->event_mask.slot[0] = (u32) val;
spu_release(ctx);
spu_release_saved(ctx);
}
static u64 __spufs_event_mask_get(void *data)
......@@ -1684,7 +1702,7 @@ static u64 spufs_event_mask_get(void *data)
u64 ret;
spu_acquire_saved(ctx);
ret = __spufs_event_mask_get(data);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
DEFINE_SIMPLE_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get,
......@@ -1708,7 +1726,7 @@ static u64 spufs_event_status_get(void *data)
spu_acquire_saved(ctx);
ret = __spufs_event_status_get(data);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
DEFINE_SIMPLE_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get,
......@@ -1720,7 +1738,7 @@ static void spufs_srr0_set(void *data, u64 val)
struct spu_lscsa *lscsa = ctx->csa.lscsa;
spu_acquire_saved(ctx);
lscsa->srr0.slot[0] = (u32) val;
spu_release(ctx);
spu_release_saved(ctx);
}
static u64 spufs_srr0_get(void *data)
......@@ -1730,7 +1748,7 @@ static u64 spufs_srr0_get(void *data)
u64 ret;
spu_acquire_saved(ctx);
ret = lscsa->srr0.slot[0];
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
DEFINE_SIMPLE_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set,
......@@ -1786,7 +1804,7 @@ static u64 spufs_lslr_get(void *data)
spu_acquire_saved(ctx);
ret = __spufs_lslr_get(data);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -1850,7 +1868,7 @@ static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf,
spin_lock(&ctx->csa.register_lock);
ret = __spufs_mbox_info_read(ctx, buf, len, pos);
spin_unlock(&ctx->csa.register_lock);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -1888,7 +1906,7 @@ static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf,
spin_lock(&ctx->csa.register_lock);
ret = __spufs_ibox_info_read(ctx, buf, len, pos);
spin_unlock(&ctx->csa.register_lock);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -1929,7 +1947,7 @@ static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf,
spin_lock(&ctx->csa.register_lock);
ret = __spufs_wbox_info_read(ctx, buf, len, pos);
spin_unlock(&ctx->csa.register_lock);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -1979,7 +1997,7 @@ static ssize_t spufs_dma_info_read(struct file *file, char __user *buf,
spin_lock(&ctx->csa.register_lock);
ret = __spufs_dma_info_read(ctx, buf, len, pos);
spin_unlock(&ctx->csa.register_lock);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -2030,7 +2048,7 @@ static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf,
spin_lock(&ctx->csa.register_lock);
ret = __spufs_proxydma_info_read(ctx, buf, len, pos);
spin_unlock(&ctx->csa.register_lock);
spu_release(ctx);
spu_release_saved(ctx);
return ret;
}
......@@ -2065,14 +2083,26 @@ static const char *ctx_state_names[] = {
};
static unsigned long long spufs_acct_time(struct spu_context *ctx,
enum spuctx_execution_state state)
enum spu_utilization_state state)
{
unsigned long time = ctx->stats.times[state];
struct timespec ts;
unsigned long long time = ctx->stats.times[state];
if (ctx->stats.execution_state == state)
time += jiffies - ctx->stats.tstamp;
/*
* In general, utilization statistics are updated by the controlling
* thread as the spu context moves through various well defined
* state transitions, but if the context is lazily loaded its
* utilization statistics are not updated as the controlling thread
* is not tightly coupled with the execution of the spu context. We
* calculate and apply the time delta from the last recorded state
* of the spu context.
*/
if (ctx->spu && ctx->stats.util_state == state) {
ktime_get_ts(&ts);
time += timespec_to_ns(&ts) - ctx->stats.tstamp;
}
return jiffies_to_msecs(time);
return time / NSEC_PER_MSEC;
}
static unsigned long long spufs_slb_flts(struct spu_context *ctx)
......@@ -2107,11 +2137,11 @@ static int spufs_show_stat(struct seq_file *s, void *private)
spu_acquire(ctx);
seq_printf(s, "%s %llu %llu %llu %llu "
"%llu %llu %llu %llu %llu %llu %llu %llu\n",
ctx_state_names[ctx->stats.execution_state],
spufs_acct_time(ctx, SPUCTX_UTIL_USER),
spufs_acct_time(ctx, SPUCTX_UTIL_SYSTEM),
spufs_acct_time(ctx, SPUCTX_UTIL_IOWAIT),
spufs_acct_time(ctx, SPUCTX_UTIL_LOADED),
ctx_state_names[ctx->stats.util_state],
spufs_acct_time(ctx, SPU_UTIL_USER),
spufs_acct_time(ctx, SPU_UTIL_SYSTEM),
spufs_acct_time(ctx, SPU_UTIL_IOWAIT),
spufs_acct_time(ctx, SPU_UTIL_IDLE_LOADED),
ctx->stats.vol_ctx_switch,
ctx->stats.invol_ctx_switch,
spufs_slb_flts(ctx),
......@@ -2184,8 +2214,8 @@ struct tree_descr spufs_dir_nosched_contents[] = {
{ "mbox_stat", &spufs_mbox_stat_fops, 0444, },
{ "ibox_stat", &spufs_ibox_stat_fops, 0444, },
{ "wbox_stat", &spufs_wbox_stat_fops, 0444, },
{ "signal1", &spufs_signal1_fops, 0666, },
{ "signal2", &spufs_signal2_fops, 0666, },
{ "signal1", &spufs_signal1_nosched_fops, 0222, },
{ "signal2", &spufs_signal2_nosched_fops, 0222, },
{ "signal1_type", &spufs_signal1_type, 0666, },
{ "signal2_type", &spufs_signal2_type, 0666, },
{ "mss", &spufs_mss_fops, 0666, },
......
......@@ -35,7 +35,9 @@ struct spu_gang *alloc_spu_gang(void)
kref_init(&gang->kref);
mutex_init(&gang->mutex);
mutex_init(&gang->aff_mutex);
INIT_LIST_HEAD(&gang->list);
INIT_LIST_HEAD(&gang->aff_list_head);
out:
return gang;
......@@ -73,6 +75,10 @@ void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx)
{
mutex_lock(&gang->mutex);
WARN_ON(ctx->gang != gang);
if (!list_empty(&ctx->aff_list)) {
list_del_init(&ctx->aff_list);
gang->aff_flags &= ~AFF_OFFSETS_SET;
}
list_del_init(&ctx->gang_list);
gang->contexts--;
mutex_unlock(&gang->mutex);
......
......@@ -316,11 +316,107 @@ static int spufs_context_open(struct dentry *dentry, struct vfsmount *mnt)
return ret;
}
static int spufs_create_context(struct inode *inode,
struct dentry *dentry,
struct vfsmount *mnt, int flags, int mode)
static struct spu_context *
spufs_assert_affinity(unsigned int flags, struct spu_gang *gang,
struct file *filp)
{
struct spu_context *tmp, *neighbor;
int count, node;
int aff_supp;
aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next,
struct spu, cbe_list))->aff_list);
if (!aff_supp)
return ERR_PTR(-EINVAL);
if (flags & SPU_CREATE_GANG)
return ERR_PTR(-EINVAL);
if (flags & SPU_CREATE_AFFINITY_MEM &&
gang->aff_ref_ctx &&
gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM)
return ERR_PTR(-EEXIST);
if (gang->aff_flags & AFF_MERGED)
return ERR_PTR(-EBUSY);
neighbor = NULL;
if (flags & SPU_CREATE_AFFINITY_SPU) {
if (!filp || filp->f_op != &spufs_context_fops)
return ERR_PTR(-EINVAL);
neighbor = get_spu_context(
SPUFS_I(filp->f_dentry->d_inode)->i_ctx);
if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) &&
!list_is_last(&neighbor->aff_list, &gang->aff_list_head) &&
!list_entry(neighbor->aff_list.next, struct spu_context,
aff_list)->aff_head)
return ERR_PTR(-EEXIST);
if (gang != neighbor->gang)
return ERR_PTR(-EINVAL);
count = 1;
list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
count++;
if (list_empty(&neighbor->aff_list))
count++;
for (node = 0; node < MAX_NUMNODES; node++) {
if ((cbe_spu_info[node].n_spus - atomic_read(
&cbe_spu_info[node].reserved_spus)) >= count)
break;
}
if (node == MAX_NUMNODES)
return ERR_PTR(-EEXIST);
}
return neighbor;
}
static void
spufs_set_affinity(unsigned int flags, struct spu_context *ctx,
struct spu_context *neighbor)
{
if (flags & SPU_CREATE_AFFINITY_MEM)
ctx->gang->aff_ref_ctx = ctx;
if (flags & SPU_CREATE_AFFINITY_SPU) {
if (list_empty(&neighbor->aff_list)) {
list_add_tail(&neighbor->aff_list,
&ctx->gang->aff_list_head);
neighbor->aff_head = 1;
}
if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head)
|| list_entry(neighbor->aff_list.next, struct spu_context,
aff_list)->aff_head) {
list_add(&ctx->aff_list, &neighbor->aff_list);
} else {
list_add_tail(&ctx->aff_list, &neighbor->aff_list);
if (neighbor->aff_head) {
neighbor->aff_head = 0;
ctx->aff_head = 1;
}
}
if (!ctx->gang->aff_ref_ctx)
ctx->gang->aff_ref_ctx = ctx;
}
}
static int
spufs_create_context(struct inode *inode, struct dentry *dentry,
struct vfsmount *mnt, int flags, int mode,
struct file *aff_filp)
{
int ret;
int affinity;
struct spu_gang *gang;
struct spu_context *neighbor;
ret = -EPERM;
if ((flags & SPU_CREATE_NOSCHED) &&
......@@ -336,9 +432,29 @@ static int spufs_create_context(struct inode *inode,
if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader)
goto out_unlock;
gang = NULL;
neighbor = NULL;
affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU);
if (affinity) {
gang = SPUFS_I(inode)->i_gang;
ret = -EINVAL;
if (!gang)
goto out_unlock;
mutex_lock(&gang->aff_mutex);
neighbor = spufs_assert_affinity(flags, gang, aff_filp);
if (IS_ERR(neighbor)) {
ret = PTR_ERR(neighbor);
goto out_aff_unlock;
}
}
ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO);
if (ret)
goto out_unlock;
goto out_aff_unlock;
if (affinity)
spufs_set_affinity(flags, SPUFS_I(dentry->d_inode)->i_ctx,
neighbor);
/*
* get references for dget and mntget, will be released
......@@ -352,6 +468,9 @@ static int spufs_create_context(struct inode *inode,
goto out;
}
out_aff_unlock:
if (affinity)
mutex_unlock(&gang->aff_mutex);
out_unlock:
mutex_unlock(&inode->i_mutex);
out:
......@@ -450,7 +569,8 @@ static int spufs_create_gang(struct inode *inode,
static struct file_system_type spufs_type;
long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode)
long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode,
struct file *filp)
{
struct dentry *dentry;
int ret;
......@@ -487,7 +607,7 @@ long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode)
dentry, nd->mnt, mode);
else
return spufs_create_context(nd->dentry->d_inode,
dentry, nd->mnt, flags, mode);
dentry, nd->mnt, flags, mode, filp);
out_dput:
dput(dentry);
......
......@@ -18,15 +18,17 @@ void spufs_stop_callback(struct spu *spu)
wake_up_all(&ctx->stop_wq);
}
static inline int spu_stopped(struct spu_context *ctx, u32 * stat)
static inline int spu_stopped(struct spu_context *ctx, u32 *stat)
{
struct spu *spu;
u64 pte_fault;
*stat = ctx->ops->status_read(ctx);
if (ctx->state != SPU_STATE_RUNNABLE)
return 1;
spu = ctx->spu;
if (ctx->state != SPU_STATE_RUNNABLE ||
test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags))
return 1;
pte_fault = spu->dsisr &
(MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED);
return (!(*stat & SPU_STATUS_RUNNING) || pte_fault || spu->class_0_pending) ?
......@@ -124,8 +126,10 @@ static int spu_setup_isolated(struct spu_context *ctx)
return ret;
}
static int spu_run_init(struct spu_context *ctx, u32 * npc)
static int spu_run_init(struct spu_context *ctx, u32 *npc)
{
spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
if (ctx->flags & SPU_CREATE_ISOLATE) {
unsigned long runcntl;
......@@ -151,16 +155,20 @@ static int spu_run_init(struct spu_context *ctx, u32 * npc)
ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
}
spuctx_switch_state(ctx, SPU_UTIL_USER);
return 0;
}
static int spu_run_fini(struct spu_context *ctx, u32 * npc,
u32 * status)
static int spu_run_fini(struct spu_context *ctx, u32 *npc,
u32 *status)
{
int ret = 0;
*status = ctx->ops->status_read(ctx);
*npc = ctx->ops->npc_read(ctx);
spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
spu_release(ctx);
if (signal_pending(current))
......@@ -289,10 +297,10 @@ static inline int spu_process_events(struct spu_context *ctx)
return ret;
}
long spufs_run_spu(struct file *file, struct spu_context *ctx,
u32 *npc, u32 *event)
long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event)
{
int ret;
struct spu *spu;
u32 status;
if (mutex_lock_interruptible(&ctx->run_mutex))
......@@ -328,6 +336,17 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx,
ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status));
if (unlikely(ret))
break;
spu = ctx->spu;
if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE,
&ctx->sched_flags))) {
if (!(status & SPU_STATUS_STOPPED_BY_STOP)) {
spu_switch_notify(spu, ctx);
continue;
}
}
spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
(status >> SPU_STOP_STATUS_SHIFT == 0x2104)) {
ret = spu_process_callback(ctx);
......@@ -356,6 +375,7 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx,
(ctx->state == SPU_STATE_RUNNABLE))
ctx->stats.libassist++;
ctx->ops->master_stop(ctx);
ret = spu_run_fini(ctx, npc, &status);
spu_yield(ctx);
......
......@@ -51,9 +51,6 @@ struct spu_prio_array {
DECLARE_BITMAP(bitmap, MAX_PRIO);
struct list_head runq[MAX_PRIO];
spinlock_t runq_lock;
struct list_head active_list[MAX_NUMNODES];
struct mutex active_mutex[MAX_NUMNODES];
int nr_active[MAX_NUMNODES];
int nr_waiting;
};
......@@ -127,7 +124,7 @@ void __spu_update_sched_info(struct spu_context *ctx)
ctx->policy = current->policy;
/*
* A lot of places that don't hold active_mutex poke into
* A lot of places that don't hold list_mutex poke into
* cpus_allowed, including grab_runnable_context which
* already holds the runq_lock. So abuse runq_lock
* to protect this field aswell.
......@@ -141,9 +138,9 @@ void spu_update_sched_info(struct spu_context *ctx)
{
int node = ctx->spu->node;
mutex_lock(&spu_prio->active_mutex[node]);
mutex_lock(&cbe_spu_info[node].list_mutex);
__spu_update_sched_info(ctx);
mutex_unlock(&spu_prio->active_mutex[node]);
mutex_unlock(&cbe_spu_info[node].list_mutex);
}
static int __node_allowed(struct spu_context *ctx, int node)
......@@ -169,56 +166,56 @@ static int node_allowed(struct spu_context *ctx, int node)
return rval;
}
/**
* spu_add_to_active_list - add spu to active list
* @spu: spu to add to the active list
*/
static void spu_add_to_active_list(struct spu *spu)
{
int node = spu->node;
mutex_lock(&spu_prio->active_mutex[node]);
spu_prio->nr_active[node]++;
list_add_tail(&spu->list, &spu_prio->active_list[node]);
mutex_unlock(&spu_prio->active_mutex[node]);
}
static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
static void __spu_remove_from_active_list(struct spu *spu)
void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
{
list_del_init(&spu->list);
spu_prio->nr_active[spu->node]--;
blocking_notifier_call_chain(&spu_switch_notifier,
ctx ? ctx->object_id : 0, spu);
}
/**
* spu_remove_from_active_list - remove spu from active list
* @spu: spu to remove from the active list
*/
static void spu_remove_from_active_list(struct spu *spu)
static void notify_spus_active(void)
{
int node = spu->node;
mutex_lock(&spu_prio->active_mutex[node]);
__spu_remove_from_active_list(spu);
mutex_unlock(&spu_prio->active_mutex[node]);
}
int node;
static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
/*
* Wake up the active spu_contexts.
*
* When the awakened processes see their "notify_active" flag is set,
* they will call spu_switch_notify();
*/
for_each_online_node(node) {
struct spu *spu;
static void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
{
blocking_notifier_call_chain(&spu_switch_notifier,
ctx ? ctx->object_id : 0, spu);
mutex_lock(&cbe_spu_info[node].list_mutex);
list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
if (spu->alloc_state != SPU_FREE) {
struct spu_context *ctx = spu->ctx;
set_bit(SPU_SCHED_NOTIFY_ACTIVE,
&ctx->sched_flags);
mb();
wake_up_all(&ctx->stop_wq);
}
}
mutex_unlock(&cbe_spu_info[node].list_mutex);
}
}
int spu_switch_event_register(struct notifier_block * n)
{
return blocking_notifier_chain_register(&spu_switch_notifier, n);
int ret;
ret = blocking_notifier_chain_register(&spu_switch_notifier, n);
if (!ret)
notify_spus_active();
return ret;
}
EXPORT_SYMBOL_GPL(spu_switch_event_register);
int spu_switch_event_unregister(struct notifier_block * n)
{
return blocking_notifier_chain_unregister(&spu_switch_notifier, n);
}
EXPORT_SYMBOL_GPL(spu_switch_event_unregister);
/**
* spu_bind_context - bind spu context to physical spu
......@@ -229,6 +226,12 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
{
pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid,
spu->number, spu->node);
spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
if (ctx->flags & SPU_CREATE_NOSCHED)
atomic_inc(&cbe_spu_info[spu->node].reserved_spus);
if (!list_empty(&ctx->aff_list))
atomic_inc(&ctx->gang->aff_sched_count);
ctx->stats.slb_flt_base = spu->stats.slb_flt;
ctx->stats.class2_intr_base = spu->stats.class2_intr;
......@@ -238,6 +241,7 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
ctx->spu = spu;
ctx->ops = &spu_hw_ops;
spu->pid = current->pid;
spu->tgid = current->tgid;
spu_associate_mm(spu, ctx->owner);
spu->ibox_callback = spufs_ibox_callback;
spu->wbox_callback = spufs_wbox_callback;
......@@ -251,7 +255,153 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
spu_cpu_affinity_set(spu, raw_smp_processor_id());
spu_switch_notify(spu, ctx);
ctx->state = SPU_STATE_RUNNABLE;
spu_switch_state(spu, SPU_UTIL_SYSTEM);
spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
}
/*
* Must be used with the list_mutex held.
*/
static inline int sched_spu(struct spu *spu)
{
BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex));
return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED));
}
static void aff_merge_remaining_ctxs(struct spu_gang *gang)
{
struct spu_context *ctx;
list_for_each_entry(ctx, &gang->aff_list_head, aff_list) {
if (list_empty(&ctx->aff_list))
list_add(&ctx->aff_list, &gang->aff_list_head);
}
gang->aff_flags |= AFF_MERGED;
}
static void aff_set_offsets(struct spu_gang *gang)
{
struct spu_context *ctx;
int offset;
offset = -1;
list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
aff_list) {
if (&ctx->aff_list == &gang->aff_list_head)
break;
ctx->aff_offset = offset--;
}
offset = 0;
list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) {
if (&ctx->aff_list == &gang->aff_list_head)
break;
ctx->aff_offset = offset++;
}
gang->aff_flags |= AFF_OFFSETS_SET;
}
static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff,
int group_size, int lowest_offset)
{
struct spu *spu;
int node, n;
/*
* TODO: A better algorithm could be used to find a good spu to be
* used as reference location for the ctxs chain.
*/
node = cpu_to_node(raw_smp_processor_id());
for (n = 0; n < MAX_NUMNODES; n++, node++) {
node = (node < MAX_NUMNODES) ? node : 0;
if (!node_allowed(ctx, node))
continue;
mutex_lock(&cbe_spu_info[node].list_mutex);
list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
if ((!mem_aff || spu->has_mem_affinity) &&
sched_spu(spu)) {
mutex_unlock(&cbe_spu_info[node].list_mutex);
return spu;
}
}
mutex_unlock(&cbe_spu_info[node].list_mutex);
}
return NULL;
}
static void aff_set_ref_point_location(struct spu_gang *gang)
{
int mem_aff, gs, lowest_offset;
struct spu_context *ctx;
struct spu *tmp;
mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM;
lowest_offset = 0;
gs = 0;
list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
gs++;
list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
aff_list) {
if (&ctx->aff_list == &gang->aff_list_head)
break;
lowest_offset = ctx->aff_offset;
}
gang->aff_ref_spu = aff_ref_location(ctx, mem_aff, gs, lowest_offset);
}
static struct spu *ctx_location(struct spu *ref, int offset, int node)
{
struct spu *spu;
spu = NULL;
if (offset >= 0) {
list_for_each_entry(spu, ref->aff_list.prev, aff_list) {
BUG_ON(spu->node != node);
if (offset == 0)
break;
if (sched_spu(spu))
offset--;
}
} else {
list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) {
BUG_ON(spu->node != node);
if (offset == 0)
break;
if (sched_spu(spu))
offset++;
}
}
return spu;
}
/*
* affinity_check is called each time a context is going to be scheduled.
* It returns the spu ptr on which the context must run.
*/
static int has_affinity(struct spu_context *ctx)
{
struct spu_gang *gang = ctx->gang;
if (list_empty(&ctx->aff_list))
return 0;
mutex_lock(&gang->aff_mutex);
if (!gang->aff_ref_spu) {
if (!(gang->aff_flags & AFF_MERGED))
aff_merge_remaining_ctxs(gang);
if (!(gang->aff_flags & AFF_OFFSETS_SET))
aff_set_offsets(gang);
aff_set_ref_point_location(gang);
}
mutex_unlock(&gang->aff_mutex);
return gang->aff_ref_spu != NULL;
}
/**
......@@ -263,9 +413,13 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
{
pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__,
spu->pid, spu->number, spu->node);
spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
spu_switch_state(spu, SPU_UTIL_IDLE);
if (spu->ctx->flags & SPU_CREATE_NOSCHED)
atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
if (!list_empty(&ctx->aff_list))
if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
ctx->gang->aff_ref_spu = NULL;
spu_switch_notify(spu, NULL);
spu_unmap_mappings(ctx);
spu_save(&ctx->csa, spu);
......@@ -278,8 +432,8 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
spu->dma_callback = NULL;
spu_associate_mm(spu, NULL);
spu->pid = 0;
spu->tgid = 0;
ctx->ops = &spu_backing_ops;
ctx->spu = NULL;
spu->flags = 0;
spu->ctx = NULL;
......@@ -287,6 +441,10 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
(spu->stats.slb_flt - ctx->stats.slb_flt_base);
ctx->stats.class2_intr +=
(spu->stats.class2_intr - ctx->stats.class2_intr_base);
/* This maps the underlying spu state to idle */
spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
ctx->spu = NULL;
}
/**
......@@ -352,18 +510,41 @@ static void spu_prio_wait(struct spu_context *ctx)
static struct spu *spu_get_idle(struct spu_context *ctx)
{
struct spu *spu = NULL;
int node = cpu_to_node(raw_smp_processor_id());
int n;
struct spu *spu;
int node, n;
if (has_affinity(ctx)) {
node = ctx->gang->aff_ref_spu->node;
mutex_lock(&cbe_spu_info[node].list_mutex);
spu = ctx_location(ctx->gang->aff_ref_spu, ctx->aff_offset, node);
if (spu && spu->alloc_state == SPU_FREE)
goto found;
mutex_unlock(&cbe_spu_info[node].list_mutex);
return NULL;
}
node = cpu_to_node(raw_smp_processor_id());
for (n = 0; n < MAX_NUMNODES; n++, node++) {
node = (node < MAX_NUMNODES) ? node : 0;
if (!node_allowed(ctx, node))
continue;
spu = spu_alloc_node(node);
if (spu)
break;
mutex_lock(&cbe_spu_info[node].list_mutex);
list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
if (spu->alloc_state == SPU_FREE)
goto found;
}
mutex_unlock(&cbe_spu_info[node].list_mutex);
}
return NULL;
found:
spu->alloc_state = SPU_USED;
mutex_unlock(&cbe_spu_info[node].list_mutex);
pr_debug("Got SPU %d %d\n", spu->number, spu->node);
spu_init_channels(spu);
return spu;
}
......@@ -393,15 +574,15 @@ static struct spu *find_victim(struct spu_context *ctx)
if (!node_allowed(ctx, node))
continue;
mutex_lock(&spu_prio->active_mutex[node]);
list_for_each_entry(spu, &spu_prio->active_list[node], list) {
mutex_lock(&cbe_spu_info[node].list_mutex);
list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
struct spu_context *tmp = spu->ctx;
if (tmp->prio > ctx->prio &&
(!victim || tmp->prio > victim->prio))
victim = spu->ctx;
}
mutex_unlock(&spu_prio->active_mutex[node]);
mutex_unlock(&cbe_spu_info[node].list_mutex);
if (victim) {
/*
......@@ -426,7 +607,11 @@ static struct spu *find_victim(struct spu_context *ctx)
victim = NULL;
goto restart;
}
spu_remove_from_active_list(spu);
mutex_lock(&cbe_spu_info[node].list_mutex);
cbe_spu_info[node].nr_active--;
mutex_unlock(&cbe_spu_info[node].list_mutex);
spu_unbind_context(spu, victim);
victim->stats.invol_ctx_switch++;
spu->stats.invol_ctx_switch++;
......@@ -455,8 +640,6 @@ static struct spu *find_victim(struct spu_context *ctx)
*/
int spu_activate(struct spu_context *ctx, unsigned long flags)
{
spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM);
do {
struct spu *spu;
......@@ -477,8 +660,12 @@ int spu_activate(struct spu_context *ctx, unsigned long flags)
if (!spu && rt_prio(ctx->prio))
spu = find_victim(ctx);
if (spu) {
int node = spu->node;
mutex_lock(&cbe_spu_info[node].list_mutex);
spu_bind_context(spu, ctx);
spu_add_to_active_list(spu);
cbe_spu_info[node].nr_active++;
mutex_unlock(&cbe_spu_info[node].list_mutex);
return 0;
}
......@@ -500,7 +687,7 @@ static struct spu_context *grab_runnable_context(int prio, int node)
int best;
spin_lock(&spu_prio->runq_lock);
best = sched_find_first_bit(spu_prio->bitmap);
best = find_first_bit(spu_prio->bitmap, prio);
while (best < prio) {
struct list_head *rq = &spu_prio->runq[best];
......@@ -527,11 +714,17 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
if (spu) {
new = grab_runnable_context(max_prio, spu->node);
if (new || force) {
spu_remove_from_active_list(spu);
int node = spu->node;
mutex_lock(&cbe_spu_info[node].list_mutex);
spu_unbind_context(spu, ctx);
spu->alloc_state = SPU_FREE;
cbe_spu_info[node].nr_active--;
mutex_unlock(&cbe_spu_info[node].list_mutex);
ctx->stats.vol_ctx_switch++;
spu->stats.vol_ctx_switch++;
spu_free(spu);
if (new)
wake_up(&new->stop_wq);
}
......@@ -550,17 +743,7 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
*/
void spu_deactivate(struct spu_context *ctx)
{
/*
* We must never reach this for a nosched context,
* but handle the case gracefull instead of panicing.
*/
if (ctx->flags & SPU_CREATE_NOSCHED) {
WARN_ON(1);
return;
}
__spu_deactivate(ctx, 1, MAX_PRIO);
spuctx_switch_state(ctx, SPUCTX_UTIL_USER);
}
/**
......@@ -575,17 +758,12 @@ void spu_yield(struct spu_context *ctx)
{
if (!(ctx->flags & SPU_CREATE_NOSCHED)) {
mutex_lock(&ctx->state_mutex);
if (__spu_deactivate(ctx, 0, MAX_PRIO))
spuctx_switch_state(ctx, SPUCTX_UTIL_USER);
else {
spuctx_switch_state(ctx, SPUCTX_UTIL_LOADED);
spu_switch_state(ctx->spu, SPU_UTIL_USER);
}
__spu_deactivate(ctx, 0, MAX_PRIO);
mutex_unlock(&ctx->state_mutex);
}
}
static void spusched_tick(struct spu_context *ctx)
static noinline void spusched_tick(struct spu_context *ctx)
{
if (ctx->flags & SPU_CREATE_NOSCHED)
return;
......@@ -596,7 +774,7 @@ static void spusched_tick(struct spu_context *ctx)
return;
/*
* Unfortunately active_mutex ranks outside of state_mutex, so
* Unfortunately list_mutex ranks outside of state_mutex, so
* we have to trylock here. If we fail give the context another
* tick and try again.
*/
......@@ -606,12 +784,11 @@ static void spusched_tick(struct spu_context *ctx)
new = grab_runnable_context(ctx->prio + 1, spu->node);
if (new) {
__spu_remove_from_active_list(spu);
spu_unbind_context(spu, ctx);
ctx->stats.invol_ctx_switch++;
spu->stats.invol_ctx_switch++;
spu_free(spu);
spu->alloc_state = SPU_FREE;
cbe_spu_info[spu->node].nr_active--;
wake_up(&new->stop_wq);
/*
* We need to break out of the wait loop in
......@@ -632,7 +809,7 @@ static void spusched_tick(struct spu_context *ctx)
*
* Return the number of tasks currently running or waiting to run.
*
* Note that we don't take runq_lock / active_mutex here. Reading
* Note that we don't take runq_lock / list_mutex here. Reading
* a single 32bit value is atomic on powerpc, and we don't care
* about memory ordering issues here.
*/
......@@ -641,7 +818,7 @@ static unsigned long count_active_contexts(void)
int nr_active = 0, node;
for (node = 0; node < MAX_NUMNODES; node++)
nr_active += spu_prio->nr_active[node];
nr_active += cbe_spu_info[node].nr_active;
nr_active += spu_prio->nr_waiting;
return nr_active;
......@@ -681,19 +858,18 @@ static void spusched_wake(unsigned long data)
static int spusched_thread(void *unused)
{
struct spu *spu, *next;
struct spu *spu;
int node;
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
schedule();
for (node = 0; node < MAX_NUMNODES; node++) {
mutex_lock(&spu_prio->active_mutex[node]);
list_for_each_entry_safe(spu, next,
&spu_prio->active_list[node],
list)
mutex_lock(&cbe_spu_info[node].list_mutex);
list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
if (spu->ctx)
spusched_tick(spu->ctx);
mutex_unlock(&spu_prio->active_mutex[node]);
mutex_unlock(&cbe_spu_info[node].list_mutex);
}
}
......@@ -751,10 +927,9 @@ int __init spu_sched_init(void)
INIT_LIST_HEAD(&spu_prio->runq[i]);
__clear_bit(i, spu_prio->bitmap);
}
__set_bit(MAX_PRIO, spu_prio->bitmap);
for (i = 0; i < MAX_NUMNODES; i++) {
mutex_init(&spu_prio->active_mutex[i]);
INIT_LIST_HEAD(&spu_prio->active_list[i]);
mutex_init(&cbe_spu_info[i].list_mutex);
INIT_LIST_HEAD(&cbe_spu_info[i].spus);
}
spin_lock_init(&spu_prio->runq_lock);
......@@ -783,9 +958,9 @@ int __init spu_sched_init(void)
return err;
}
void __exit spu_sched_exit(void)
void spu_sched_exit(void)
{
struct spu *spu, *tmp;
struct spu *spu;
int node;
remove_proc_entry("spu_loadavg", NULL);
......@@ -794,13 +969,11 @@ void __exit spu_sched_exit(void)
kthread_stop(spusched_task);
for (node = 0; node < MAX_NUMNODES; node++) {
mutex_lock(&spu_prio->active_mutex[node]);
list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node],
list) {
list_del_init(&spu->list);
spu_free(spu);
}
mutex_unlock(&spu_prio->active_mutex[node]);
mutex_lock(&cbe_spu_info[node].list_mutex);
list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
if (spu->alloc_state != SPU_FREE)
spu->alloc_state = SPU_FREE;
mutex_unlock(&cbe_spu_info[node].list_mutex);
}
kfree(spu_prio);
}
......@@ -84,13 +84,13 @@ static inline void restore_decr(void)
unsigned int decr_running;
unsigned int decr;
/* Restore, Step 6:
/* Restore, Step 6(moved):
* If the LSCSA "decrementer running" flag is set
* then write the SPU_WrDec channel with the
* decrementer value from LSCSA.
*/
offset = LSCSA_QW_OFFSET(decr_status);
decr_running = regs_spill[offset].slot[0];
decr_running = regs_spill[offset].slot[0] & SPU_DECR_STATUS_RUNNING;
if (decr_running) {
offset = LSCSA_QW_OFFSET(decr);
decr = regs_spill[offset].slot[0];
......@@ -318,10 +318,10 @@ int main()
build_dma_list(lscsa_ea); /* Step 3. */
restore_upper_240kb(lscsa_ea); /* Step 4. */
/* Step 5: done by 'exit'. */
restore_decr(); /* Step 6. */
enqueue_putllc(lscsa_ea); /* Step 7. */
set_tag_update(); /* Step 8. */
read_tag_status(); /* Step 9. */
restore_decr(); /* moved Step 6. */
read_llar_status(); /* Step 10. */
write_ppu_mb(); /* Step 11. */
write_ppuint_mb(); /* Step 12. */
......
......@@ -10,7 +10,7 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = {
0x24fd8081,
0x1cd80081,
0x33001180,
0x42030003,
0x42034003,
0x33800284,
0x1c010204,
0x40200000,
......@@ -24,22 +24,22 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = {
0x23fffd84,
0x1c100183,
0x217ffa85,
0x3080a000,
0x3080a201,
0x3080a402,
0x3080a603,
0x3080a804,
0x3080aa05,
0x3080ac06,
0x3080ae07,
0x3080b008,
0x3080b209,
0x3080b40a,
0x3080b60b,
0x3080b80c,
0x3080ba0d,
0x3080bc0e,
0x3080be0f,
0x3080b000,
0x3080b201,
0x3080b402,
0x3080b603,
0x3080b804,
0x3080ba05,
0x3080bc06,
0x3080be07,
0x3080c008,
0x3080c209,
0x3080c40a,
0x3080c60b,
0x3080c80c,
0x3080ca0d,
0x3080cc0e,
0x3080ce0f,
0x00003ffc,
0x00000000,
0x00000000,
......@@ -48,19 +48,18 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = {
0x3ec00083,
0xb0a14103,
0x01a00204,
0x3ec10082,
0x4202800e,
0x04000703,
0xb0a14202,
0x21a00803,
0x3fbf028d,
0x3f20068d,
0x3fbe0682,
0x3ec10083,
0x4202c002,
0xb0a14203,
0x21a00802,
0x3fbf028a,
0x3f20050a,
0x3fbe0502,
0x3fe30102,
0x21a00882,
0x3f82028f,
0x3fe3078f,
0x3fbf0784,
0x3f82028b,
0x3fe3058b,
0x3fbf0584,
0x3f200204,
0x3fbe0204,
0x3fe30204,
......@@ -75,252 +74,285 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = {
0x21a00083,
0x40800082,
0x21a00b02,
0x10002818,
0x42a00002,
0x32800007,
0x4207000c,
0x18008208,
0x40a0000b,
0x4080020a,
0x40800709,
0x00200000,
0x42070002,
0x3ac30384,
0x10002612,
0x42a00003,
0x42074006,
0x1800c204,
0x40a00008,
0x40800789,
0x1c010305,
0x34000302,
0x1cffc489,
0x00200000,
0x18008383,
0x38830382,
0x4cffc486,
0x3ac28185,
0xb0408584,
0x28830382,
0x1c020387,
0x38828182,
0xb0408405,
0x1802c408,
0x28828182,
0x217ff886,
0x04000583,
0x21a00803,
0x3fbe0682,
0x3fe30102,
0x04000106,
0x21a00886,
0x04000603,
0x21a00903,
0x40803c02,
0x21a00982,
0x40800003,
0x04000184,
0x21a00a04,
0x3ec00303,
0x3ec00287,
0xb0408403,
0x24000302,
0x34000282,
0x1c020306,
0xb0408207,
0x18020204,
0x24000282,
0x217ffa09,
0x04000402,
0x21a00802,
0x3fbe0504,
0x3fe30204,
0x21a00884,
0x42074002,
0x21a00902,
0x40803c03,
0x21a00983,
0x04000485,
0x21a00a05,
0x40802202,
0x21a00a82,
0x42028005,
0x34208702,
0x21002282,
0x21a00804,
0x21a00886,
0x3fbf0782,
0x21a00805,
0x21a00884,
0x3fbf0582,
0x3f200102,
0x3fbe0102,
0x3fe30102,
0x21a00902,
0x40804003,
0x21a00983,
0x21a00a04,
0x21a00a05,
0x40805a02,
0x21a00a82,
0x40800083,
0x21a00b83,
0x01a00c02,
0x01a00d83,
0x3420c282,
0x30809c03,
0x34000182,
0x14004102,
0x21002082,
0x01a00d82,
0x3080a003,
0x34000182,
0x21a00e02,
0x34210283,
0x21a00f03,
0x34200284,
0x77400200,
0x3421c282,
0x3080a203,
0x34000182,
0x21a00f02,
0x3080a403,
0x34000182,
0x77400100,
0x3080a603,
0x34000182,
0x21a00702,
0x34218283,
0x21a00083,
0x34214282,
0x3080a803,
0x34000182,
0x21a00082,
0x3080aa03,
0x34000182,
0x21a00b02,
0x4200480c,
0x00200000,
0x1c010286,
0x34220284,
0x34220302,
0x0f608203,
0x5c024204,
0x3b81810b,
0x42013c02,
0x00200000,
0x18008185,
0x38808183,
0x3b814182,
0x21004e84,
0x4020007f,
0x3080ae02,
0x42004805,
0x3080ac04,
0x34000103,
0x34000202,
0x1cffc183,
0x3b810106,
0x0f608184,
0x42013802,
0x5c020183,
0x38810102,
0x3b810102,
0x21000e83,
0x4020007f,
0x35000100,
0x000004e0,
0x000002a0,
0x000002e8,
0x00000428,
0x00000470,
0x000002f8,
0x00000430,
0x00000360,
0x000002e8,
0x000004a0,
0x00000468,
0x000002f8,
0x000003c8,
0x000004a8,
0x00000298,
0x00000360,
0x00200000,
0x409ffe02,
0x30801203,
0x40800204,
0x3ec40085,
0x10009c09,
0x3ac10606,
0xb060c105,
0x4020007f,
0x4020007f,
0x40800208,
0x3ec40084,
0x40800407,
0x3ac20289,
0xb060c104,
0x3ac1c284,
0x20801203,
0x38810602,
0xb0408586,
0x28810602,
0x32004180,
0x34204702,
0x38820282,
0x41004003,
0xb0408189,
0x28820282,
0x3881c282,
0xb0408304,
0x2881c282,
0x00400000,
0x40800003,
0x35000000,
0x30809e03,
0x34000182,
0x21a00382,
0x4020007f,
0x327fdc80,
0x327fde00,
0x409ffe02,
0x30801203,
0x40800204,
0x3ec40087,
0x40800405,
0x00200000,
0x40800606,
0x3ac10608,
0x3ac14609,
0x3ac1860a,
0xb060c107,
0x40800206,
0x3ec40084,
0x40800407,
0x40800608,
0x3ac1828a,
0x3ac20289,
0xb060c104,
0x3ac1c284,
0x20801203,
0x38818282,
0x41004003,
0x38810602,
0x4020007f,
0xb0408188,
0x4020007f,
0x28810602,
0x41201002,
0x38814603,
0x10009c09,
0xb060c109,
0x4020007f,
0x28814603,
0xb040818a,
0x10005b0b,
0x41201003,
0x28818282,
0x3881c282,
0xb0408184,
0x41193f83,
0x38818602,
0x60ffc003,
0xb040818a,
0x28818602,
0x32003080,
0x2881c282,
0x38820282,
0xb0408189,
0x28820282,
0x327fef80,
0x409ffe02,
0x30801203,
0x40800204,
0x3ec40087,
0x41201008,
0x10009c14,
0x40800405,
0x3ac10609,
0x40800606,
0x3ac1460a,
0xb060c107,
0x3ac1860b,
0x40800207,
0x3ec40086,
0x4120100b,
0x10005b14,
0x40800404,
0x3ac1c289,
0x40800608,
0xb060c106,
0x3ac10286,
0x3ac2028a,
0x20801203,
0x38810602,
0xb0408409,
0x28810602,
0x38814603,
0xb060c40a,
0x4020007f,
0x28814603,
0x3881c282,
0x41193f83,
0x38818602,
0x60ffc003,
0xb040818b,
0x28818602,
0x32002380,
0x409ffe02,
0x30801204,
0x40800205,
0x3ec40083,
0x40800406,
0x3ac14607,
0x3ac18608,
0xb0810103,
0x41004002,
0x20801204,
0x4020007f,
0x38814603,
0x10009c0b,
0xb060c107,
0x4020007f,
0x4020007f,
0x28814603,
0x38818602,
0x4020007f,
0xb0408589,
0x2881c282,
0x38810282,
0xb0408586,
0x28810282,
0x38820282,
0xb040818a,
0x28820282,
0x4020007f,
0xb0408588,
0x28818602,
0x327fe280,
0x409ffe02,
0x30801203,
0x40800207,
0x3ec40084,
0x40800408,
0x10005b14,
0x40800609,
0x3ac1c28a,
0x3ac2028b,
0xb060c104,
0x3ac24284,
0x20801203,
0x41201003,
0x3881c282,
0xb040830a,
0x2881c282,
0x38820282,
0xb040818b,
0x41193f83,
0x60ffc003,
0x28820282,
0x38824282,
0xb0408184,
0x28824282,
0x4020007f,
0x32001780,
0x327fd580,
0x409ffe02,
0x1000640e,
0x40800204,
0x1000658e,
0x40800206,
0x30801203,
0x40800405,
0x3ec40087,
0x40800606,
0x3ac10608,
0x3ac14609,
0x3ac1860a,
0xb060c107,
0x40800407,
0x3ec40084,
0x40800608,
0x3ac1828a,
0x3ac20289,
0xb060c104,
0x3ac1c284,
0x20801203,
0x413d8003,
0x38810602,
0x38818282,
0x4020007f,
0x327fd780,
0x409ffe02,
0x10007f0c,
0x40800205,
0x30801204,
0x40800406,
0x3ec40083,
0x3ac14607,
0x3ac18608,
0xb0810103,
0x413d8002,
0x20801204,
0x38814603,
0x327fd800,
0x409ffe03,
0x30801202,
0x40800207,
0x3ec40084,
0x10005b09,
0x3ac1c288,
0xb0408184,
0x4020007f,
0x327feb80,
0x4020007f,
0x20801202,
0x3881c282,
0xb0408308,
0x2881c282,
0x327fc680,
0x409ffe02,
0x1000588b,
0x40800208,
0x30801203,
0x40800204,
0x3ec40087,
0x40800405,
0x1000650a,
0x40800606,
0x3ac10608,
0x3ac14609,
0x3ac1860a,
0xb060c107,
0x40800407,
0x3ec40084,
0x3ac20289,
0xb060c104,
0x3ac1c284,
0x20801203,
0x38810602,
0xb0408588,
0x4020007f,
0x327fc980,
0x00400000,
0x40800003,
0x4020007f,
0x35000000,
0x413d8003,
0x38820282,
0x327fbd80,
0x00200000,
0x00000da0,
0x00000000,
0x00000000,
0x00000000,
0x00000d90,
0x00000000,
0x00000000,
0x00000000,
0x00000db0,
0x00000000,
0x00000000,
0x00000000,
0x00000dc0,
0x00000000,
0x00000000,
0x00000000,
0x00000d80,
0x00000000,
0x00000000,
0x00000000,
0x00000df0,
0x00000000,
0x00000000,
0x00000000,
0x00000de0,
0x00000000,
0x00000000,
0x00000000,
0x00000dd0,
0x00000000,
0x00000000,
0x00000000,
0x00000e04,
0x00000000,
0x00000000,
0x00000000,
0x00000e00,
0x00000000,
0x00000000,
0x00000000,
......
......@@ -40,17 +40,13 @@ enum {
struct spu_context_ops;
struct spu_gang;
/*
* This is the state for spu utilization reporting to userspace.
* Because this state is visible to userspace it must never change and needs
* to be kept strictly separate from any internal state kept by the kernel.
*/
enum spuctx_execution_state {
SPUCTX_UTIL_USER = 0,
SPUCTX_UTIL_SYSTEM,
SPUCTX_UTIL_IOWAIT,
SPUCTX_UTIL_LOADED,
SPUCTX_UTIL_MAX
enum {
SPU_SCHED_WAS_ACTIVE, /* was active upon spu_acquire_saved() */
};
/* ctx->sched_flags */
enum {
SPU_SCHED_NOTIFY_ACTIVE,
};
struct spu_context {
......@@ -89,6 +85,8 @@ struct spu_context {
struct list_head gang_list;
struct spu_gang *gang;
struct kref *prof_priv_kref;
void ( * prof_priv_release) (struct kref *kref);
/* owner thread */
pid_t tid;
......@@ -104,9 +102,9 @@ struct spu_context {
/* statistics */
struct {
/* updates protected by ctx->state_mutex */
enum spuctx_execution_state execution_state;
unsigned long tstamp; /* time of last ctx switch */
unsigned long times[SPUCTX_UTIL_MAX];
enum spu_utilization_state util_state;
unsigned long long tstamp; /* time of last state switch */
unsigned long long times[SPU_UTIL_MAX];
unsigned long long vol_ctx_switch;
unsigned long long invol_ctx_switch;
unsigned long long min_flt;
......@@ -118,6 +116,10 @@ struct spu_context {
unsigned long long class2_intr_base; /* # at last ctx switch */
unsigned long long libassist;
} stats;
struct list_head aff_list;
int aff_head;
int aff_offset;
};
struct spu_gang {
......@@ -125,8 +127,19 @@ struct spu_gang {
struct mutex mutex;
struct kref kref;
int contexts;
struct spu_context *aff_ref_ctx;
struct list_head aff_list_head;
struct mutex aff_mutex;
int aff_flags;
struct spu *aff_ref_spu;
atomic_t aff_sched_count;
};
/* Flag bits for spu_gang aff_flags */
#define AFF_OFFSETS_SET 1
#define AFF_MERGED 2
struct mfc_dma_command {
int32_t pad; /* reserved */
uint32_t lsa; /* local storage address */
......@@ -190,10 +203,9 @@ extern struct tree_descr spufs_dir_contents[];
extern struct tree_descr spufs_dir_nosched_contents[];
/* system call implementation */
long spufs_run_spu(struct file *file,
struct spu_context *ctx, u32 *npc, u32 *status);
long spufs_create(struct nameidata *nd,
unsigned int flags, mode_t mode);
long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status);
long spufs_create(struct nameidata *nd, unsigned int flags,
mode_t mode, struct file *filp);
extern const struct file_operations spufs_context_fops;
/* gang management */
......@@ -206,6 +218,9 @@ void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx);
/* fault handling */
int spufs_handle_class1(struct spu_context *ctx);
/* affinity */
struct spu *affinity_check(struct spu_context *ctx);
/* context management */
extern atomic_t nr_spu_contexts;
static inline void spu_acquire(struct spu_context *ctx)
......@@ -227,15 +242,17 @@ void spu_unmap_mappings(struct spu_context *ctx);
void spu_forget(struct spu_context *ctx);
int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags);
void spu_acquire_saved(struct spu_context *ctx);
void spu_release_saved(struct spu_context *ctx);
int spu_activate(struct spu_context *ctx, unsigned long flags);
void spu_deactivate(struct spu_context *ctx);
void spu_yield(struct spu_context *ctx);
void spu_switch_notify(struct spu *spu, struct spu_context *ctx);
void spu_set_timeslice(struct spu_context *ctx);
void spu_update_sched_info(struct spu_context *ctx);
void __spu_update_sched_info(struct spu_context *ctx);
int __init spu_sched_init(void);
void __exit spu_sched_exit(void);
void spu_sched_exit(void);
extern char *isolated_loader;
......@@ -293,30 +310,34 @@ extern int spufs_coredump_num_notes;
* line.
*/
static inline void spuctx_switch_state(struct spu_context *ctx,
enum spuctx_execution_state new_state)
enum spu_utilization_state new_state)
{
WARN_ON(!mutex_is_locked(&ctx->state_mutex));
unsigned long long curtime;
signed long long delta;
struct timespec ts;
struct spu *spu;
enum spu_utilization_state old_state;
if (ctx->stats.execution_state != new_state) {
unsigned long curtime = jiffies;
ktime_get_ts(&ts);
curtime = timespec_to_ns(&ts);
delta = curtime - ctx->stats.tstamp;
ctx->stats.times[ctx->stats.execution_state] +=
curtime - ctx->stats.tstamp;
ctx->stats.tstamp = curtime;
ctx->stats.execution_state = new_state;
}
}
WARN_ON(!mutex_is_locked(&ctx->state_mutex));
WARN_ON(delta < 0);
static inline void spu_switch_state(struct spu *spu,
enum spuctx_execution_state new_state)
{
if (spu->stats.utilization_state != new_state) {
unsigned long curtime = jiffies;
spu = ctx->spu;
old_state = ctx->stats.util_state;
ctx->stats.util_state = new_state;
ctx->stats.tstamp = curtime;
spu->stats.times[spu->stats.utilization_state] +=
curtime - spu->stats.tstamp;
/*
* Update the physical SPU utilization statistics.
*/
if (spu) {
ctx->stats.times[old_state] += delta;
spu->stats.times[old_state] += delta;
spu->stats.util_state = new_state;
spu->stats.tstamp = curtime;
spu->stats.utilization_state = new_state;
}
}
......
......@@ -180,7 +180,7 @@ static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu)
case MFC_CNTL_SUSPEND_COMPLETE:
if (csa) {
csa->priv2.mfc_control_RW =
in_be64(&priv2->mfc_control_RW) |
MFC_CNTL_SUSPEND_MASK |
MFC_CNTL_SUSPEND_DMA_QUEUE;
}
break;
......@@ -190,9 +190,7 @@ static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu)
MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
MFC_CNTL_SUSPEND_COMPLETE);
if (csa) {
csa->priv2.mfc_control_RW =
in_be64(&priv2->mfc_control_RW) &
~MFC_CNTL_SUSPEND_DMA_QUEUE;
csa->priv2.mfc_control_RW = 0;
}
break;
}
......@@ -251,16 +249,8 @@ static inline void save_mfc_decr(struct spu_state *csa, struct spu *spu)
* Read MFC_CNTL[Ds]. Update saved copy of
* CSA.MFC_CNTL[Ds].
*/
if (in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DECREMENTER_RUNNING) {
csa->priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING;
csa->suspend_time = get_cycles();
out_be64(&priv2->spu_chnlcntptr_RW, 7ULL);
eieio();
csa->spu_chnldata_RW[7] = in_be64(&priv2->spu_chnldata_RW);
eieio();
} else {
csa->priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING;
}
csa->priv2.mfc_control_RW |=
in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DECREMENTER_RUNNING;
}
static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu)
......@@ -271,7 +261,8 @@ static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu)
* Write MFC_CNTL[Dh] set to a '1' to halt
* the decrementer.
*/
out_be64(&priv2->mfc_control_RW, MFC_CNTL_DECREMENTER_HALTED);
out_be64(&priv2->mfc_control_RW,
MFC_CNTL_DECREMENTER_HALTED | MFC_CNTL_SUSPEND_MASK);
eieio();
}
......@@ -615,7 +606,7 @@ static inline void save_ppuint_mb(struct spu_state *csa, struct spu *spu)
static inline void save_ch_part1(struct spu_state *csa, struct spu *spu)
{
struct spu_priv2 __iomem *priv2 = spu->priv2;
u64 idx, ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
int i;
/* Save, Step 42:
......@@ -626,7 +617,7 @@ static inline void save_ch_part1(struct spu_state *csa, struct spu *spu)
csa->spu_chnldata_RW[1] = in_be64(&priv2->spu_chnldata_RW);
/* Save the following CH: [0,3,4,24,25,27] */
for (i = 0; i < 7; i++) {
for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
idx = ch_indices[i];
out_be64(&priv2->spu_chnlcntptr_RW, idx);
eieio();
......@@ -983,13 +974,13 @@ static inline void terminate_spu_app(struct spu_state *csa, struct spu *spu)
*/
}
static inline void suspend_mfc(struct spu_state *csa, struct spu *spu)
static inline void suspend_mfc_and_halt_decr(struct spu_state *csa,
struct spu *spu)
{
struct spu_priv2 __iomem *priv2 = spu->priv2;
/* Restore, Step 7:
* Restore, Step 47.
* Write MFC_Cntl[Dh,Sc]='1','1' to suspend
* Write MFC_Cntl[Dh,Sc,Sm]='1','1','0' to suspend
* the queue and halt the decrementer.
*/
out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE |
......@@ -1090,7 +1081,7 @@ static inline void clear_spu_status(struct spu_state *csa, struct spu *spu)
static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu)
{
struct spu_priv2 __iomem *priv2 = spu->priv2;
u64 ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
u64 ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
u64 idx;
int i;
......@@ -1102,7 +1093,7 @@ static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu)
out_be64(&priv2->spu_chnldata_RW, 0UL);
/* Reset the following CH: [0,3,4,24,25,27] */
for (i = 0; i < 7; i++) {
for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
idx = ch_indices[i];
out_be64(&priv2->spu_chnlcntptr_RW, idx);
eieio();
......@@ -1289,7 +1280,15 @@ static inline void setup_decr(struct spu_state *csa, struct spu *spu)
cycles_t resume_time = get_cycles();
cycles_t delta_time = resume_time - csa->suspend_time;
csa->lscsa->decr_status.slot[0] = SPU_DECR_STATUS_RUNNING;
if (csa->lscsa->decr.slot[0] < delta_time) {
csa->lscsa->decr_status.slot[0] |=
SPU_DECR_STATUS_WRAPPED;
}
csa->lscsa->decr.slot[0] -= delta_time;
} else {
csa->lscsa->decr_status.slot[0] = 0;
}
}
......@@ -1398,6 +1397,18 @@ static inline void restore_ls_16kb(struct spu_state *csa, struct spu *spu)
send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
}
static inline void suspend_mfc(struct spu_state *csa, struct spu *spu)
{
struct spu_priv2 __iomem *priv2 = spu->priv2;
/* Restore, Step 47.
* Write MFC_Cntl[Sc,Sm]='1','0' to suspend
* the queue.
*/
out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE);
eieio();
}
static inline void clear_interrupts(struct spu_state *csa, struct spu *spu)
{
/* Restore, Step 49:
......@@ -1548,10 +1559,10 @@ static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu)
* "wrapped" flag is set, OR in a '1' to
* CSA.SPU_Event_Status[Tm].
*/
if (csa->lscsa->decr_status.slot[0] == 1) {
if (csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED) {
csa->spu_chnldata_RW[0] |= 0x20;
}
if ((csa->lscsa->decr_status.slot[0] == 1) &&
if ((csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED) &&
(csa->spu_chnlcnt_RW[0] == 0 &&
((csa->spu_chnldata_RW[2] & 0x20) == 0x0) &&
((csa->spu_chnldata_RW[0] & 0x20) != 0x1))) {
......@@ -1562,18 +1573,13 @@ static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu)
static inline void restore_ch_part1(struct spu_state *csa, struct spu *spu)
{
struct spu_priv2 __iomem *priv2 = spu->priv2;
u64 idx, ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
int i;
/* Restore, Step 59:
* Restore the following CH: [0,3,4,24,25,27]
*/
/* Restore CH 1 without count */
out_be64(&priv2->spu_chnlcntptr_RW, 1);
out_be64(&priv2->spu_chnldata_RW, csa->spu_chnldata_RW[1]);
/* Restore the following CH: [0,3,4,24,25,27] */
for (i = 0; i < 7; i++) {
for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
idx = ch_indices[i];
out_be64(&priv2->spu_chnlcntptr_RW, idx);
eieio();
......@@ -1932,7 +1938,7 @@ static void harvest(struct spu_state *prev, struct spu *spu)
set_switch_pending(prev, spu); /* Step 5. */
stop_spu_isolate(spu); /* NEW. */
remove_other_spu_access(prev, spu); /* Step 6. */
suspend_mfc(prev, spu); /* Step 7. */
suspend_mfc_and_halt_decr(prev, spu); /* Step 7. */
wait_suspend_mfc_complete(prev, spu); /* Step 8. */
if (!suspend_spe(prev, spu)) /* Step 9. */
clear_spu_status(prev, spu); /* Step 10. */
......
......@@ -47,7 +47,7 @@ static long do_spu_run(struct file *filp,
goto out;
i = SPUFS_I(filp->f_path.dentry->d_inode);
ret = spufs_run_spu(filp, i->i_ctx, &npc, &status);
ret = spufs_run_spu(i->i_ctx, &npc, &status);
if (put_user(npc, unpc))
ret = -EFAULT;
......@@ -76,8 +76,8 @@ asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus)
}
#endif
asmlinkage long sys_spu_create(const char __user *pathname,
unsigned int flags, mode_t mode)
asmlinkage long do_spu_create(const char __user *pathname, unsigned int flags,
mode_t mode, struct file *neighbor)
{
char *tmp;
int ret;
......@@ -90,7 +90,7 @@ asmlinkage long sys_spu_create(const char __user *pathname,
ret = path_lookup(tmp, LOOKUP_PARENT|
LOOKUP_OPEN|LOOKUP_CREATE, &nd);
if (!ret) {
ret = spufs_create(&nd, flags, mode);
ret = spufs_create(&nd, flags, mode, neighbor);
path_release(&nd);
}
putname(tmp);
......@@ -99,8 +99,32 @@ asmlinkage long sys_spu_create(const char __user *pathname,
return ret;
}
#ifndef MODULE
asmlinkage long sys_spu_create(const char __user *pathname, unsigned int flags,
mode_t mode, int neighbor_fd)
{
int fput_needed;
struct file *neighbor;
long ret;
if (flags & SPU_CREATE_AFFINITY_SPU) {
ret = -EBADF;
neighbor = fget_light(neighbor_fd, &fput_needed);
if (neighbor) {
ret = do_spu_create(pathname, flags, mode, neighbor);
fput_light(neighbor, fput_needed);
}
}
else {
ret = do_spu_create(pathname, flags, mode, NULL);
}
return ret;
}
#endif
struct spufs_calls spufs_calls = {
.create_thread = sys_spu_create,
.create_thread = do_spu_create,
.spu_run = do_spu_run,
.owner = THIS_MODULE,
};
......@@ -17,6 +17,7 @@ obj-$(CONFIG_QUICC_ENGINE) += qe_lib/
mv64x60-$(CONFIG_PCI) += mv64x60_pci.o
obj-$(CONFIG_MV64X60) += $(mv64x60-y) mv64x60_pic.o mv64x60_dev.o
obj-$(CONFIG_RTC_DRV_CMOS) += rtc_cmos_setup.o
obj-$(CONFIG_AXON_RAM) += axonram.o
# contains only the suspend handler for time
ifeq ($(CONFIG_RTC_CLASS),)
......
/*
* (C) Copyright IBM Deutschland Entwicklung GmbH 2006
*
* Author: Maxim Shchetynin <maxim@de.ibm.com>
*
* Axon DDR2 device driver.
* It registers one block device per Axon's DDR2 memory bank found on a system.
* Block devices are called axonram?, their major and minor numbers are
* available in /proc/devices, /proc/partitions or in /sys/block/axonram?/dev.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/device.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/genhd.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/ioport.h>
#include <linux/irq.h>
#include <linux/irqreturn.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/types.h>
#include <asm/of_device.h>
#include <asm/of_platform.h>
#include <asm/page.h>
#include <asm/prom.h>
#define AXON_RAM_MODULE_NAME "axonram"
#define AXON_RAM_DEVICE_NAME "axonram"
#define AXON_RAM_MINORS_PER_DISK 16
#define AXON_RAM_BLOCK_SHIFT PAGE_SHIFT
#define AXON_RAM_BLOCK_SIZE 1 << AXON_RAM_BLOCK_SHIFT
#define AXON_RAM_SECTOR_SHIFT 9
#define AXON_RAM_SECTOR_SIZE 1 << AXON_RAM_SECTOR_SHIFT
#define AXON_RAM_IRQ_FLAGS IRQF_SHARED | IRQF_TRIGGER_RISING
struct axon_ram_bank {
struct of_device *device;
struct gendisk *disk;
unsigned int irq_correctable;
unsigned int irq_uncorrectable;
unsigned long ph_addr;
unsigned long io_addr;
unsigned long size;
unsigned long ecc_counter;
};
static ssize_t
axon_ram_sysfs_ecc(struct device *dev, struct device_attribute *attr, char *buf)
{
struct of_device *device = to_of_device(dev);
struct axon_ram_bank *bank = device->dev.platform_data;
BUG_ON(!bank);
return sprintf(buf, "%ld\n", bank->ecc_counter);
}
static DEVICE_ATTR(ecc, S_IRUGO, axon_ram_sysfs_ecc, NULL);
/**
* axon_ram_irq_handler - interrupt handler for Axon RAM ECC
* @irq: interrupt ID
* @dev: pointer to of_device
*/
static irqreturn_t
axon_ram_irq_handler(int irq, void *dev)
{
struct of_device *device = dev;
struct axon_ram_bank *bank = device->dev.platform_data;
BUG_ON(!bank);
if (irq == bank->irq_correctable) {
dev_err(&device->dev, "Correctable memory error occured\n");
bank->ecc_counter++;
return IRQ_HANDLED;
} else if (irq == bank->irq_uncorrectable) {
dev_err(&device->dev, "Uncorrectable memory error occured\n");
panic("Critical ECC error on %s", device->node->full_name);
}
return IRQ_NONE;
}
/**
* axon_ram_make_request - make_request() method for block device
* @queue, @bio: see blk_queue_make_request()
*/
static int
axon_ram_make_request(struct request_queue *queue, struct bio *bio)
{
struct axon_ram_bank *bank = bio->bi_bdev->bd_disk->private_data;
unsigned long phys_mem, phys_end;
void *user_mem;
struct bio_vec *vec;
unsigned int transfered;
unsigned short idx;
int rc = 0;
phys_mem = bank->io_addr + (bio->bi_sector << AXON_RAM_SECTOR_SHIFT);
phys_end = bank->io_addr + bank->size;
transfered = 0;
bio_for_each_segment(vec, bio, idx) {
if (unlikely(phys_mem + vec->bv_len > phys_end)) {
bio_io_error(bio, bio->bi_size);
rc = -ERANGE;
break;
}
user_mem = page_address(vec->bv_page) + vec->bv_offset;
if (bio_data_dir(bio) == READ)
memcpy(user_mem, (void *) phys_mem, vec->bv_len);
else
memcpy((void *) phys_mem, user_mem, vec->bv_len);
phys_mem += vec->bv_len;
transfered += vec->bv_len;
}
bio_endio(bio, transfered, 0);
return rc;
}
/**
* axon_ram_direct_access - direct_access() method for block device
* @device, @sector, @data: see block_device_operations method
*/
static int
axon_ram_direct_access(struct block_device *device, sector_t sector,
unsigned long *data)
{
struct axon_ram_bank *bank = device->bd_disk->private_data;
loff_t offset;
offset = sector << AXON_RAM_SECTOR_SHIFT;
if (offset >= bank->size) {
dev_err(&bank->device->dev, "Access outside of address space\n");
return -ERANGE;
}
*data = bank->ph_addr + offset;
return 0;
}
static struct block_device_operations axon_ram_devops = {
.owner = THIS_MODULE,
.direct_access = axon_ram_direct_access
};
/**
* axon_ram_probe - probe() method for platform driver
* @device, @device_id: see of_platform_driver method
*/
static int
axon_ram_probe(struct of_device *device, const struct of_device_id *device_id)
{
static int axon_ram_bank_id = -1;
struct axon_ram_bank *bank;
struct resource resource;
int rc = 0;
axon_ram_bank_id++;
dev_info(&device->dev, "Found memory controller on %s\n",
device->node->full_name);
bank = kzalloc(sizeof(struct axon_ram_bank), GFP_KERNEL);
if (bank == NULL) {
dev_err(&device->dev, "Out of memory\n");
rc = -ENOMEM;
goto failed;
}
device->dev.platform_data = bank;
bank->device = device;
if (of_address_to_resource(device->node, 0, &resource) != 0) {
dev_err(&device->dev, "Cannot access device tree\n");
rc = -EFAULT;
goto failed;
}
bank->size = resource.end - resource.start + 1;
if (bank->size == 0) {
dev_err(&device->dev, "No DDR2 memory found for %s%d\n",
AXON_RAM_DEVICE_NAME, axon_ram_bank_id);
rc = -ENODEV;
goto failed;
}
dev_info(&device->dev, "Register DDR2 memory device %s%d with %luMB\n",
AXON_RAM_DEVICE_NAME, axon_ram_bank_id, bank->size >> 20);
bank->ph_addr = resource.start;
bank->io_addr = (unsigned long) ioremap_flags(
bank->ph_addr, bank->size, _PAGE_NO_CACHE);
if (bank->io_addr == 0) {
dev_err(&device->dev, "ioremap() failed\n");
rc = -EFAULT;
goto failed;
}
bank->disk = alloc_disk(AXON_RAM_MINORS_PER_DISK);
if (bank->disk == NULL) {
dev_err(&device->dev, "Cannot register disk\n");
rc = -EFAULT;
goto failed;
}
bank->disk->first_minor = 0;
bank->disk->fops = &axon_ram_devops;
bank->disk->private_data = bank;
bank->disk->driverfs_dev = &device->dev;
sprintf(bank->disk->disk_name, "%s%d",
AXON_RAM_DEVICE_NAME, axon_ram_bank_id);
bank->disk->major = register_blkdev(0, bank->disk->disk_name);
if (bank->disk->major < 0) {
dev_err(&device->dev, "Cannot register block device\n");
rc = -EFAULT;
goto failed;
}
bank->disk->queue = blk_alloc_queue(GFP_KERNEL);
if (bank->disk->queue == NULL) {
dev_err(&device->dev, "Cannot register disk queue\n");
rc = -EFAULT;
goto failed;
}
set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT);
blk_queue_make_request(bank->disk->queue, axon_ram_make_request);
blk_queue_hardsect_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE);
add_disk(bank->disk);
bank->irq_correctable = irq_of_parse_and_map(device->node, 0);
bank->irq_uncorrectable = irq_of_parse_and_map(device->node, 1);
if ((bank->irq_correctable <= 0) || (bank->irq_uncorrectable <= 0)) {
dev_err(&device->dev, "Cannot access ECC interrupt ID\n");
rc = -EFAULT;
goto failed;
}
rc = request_irq(bank->irq_correctable, axon_ram_irq_handler,
AXON_RAM_IRQ_FLAGS, bank->disk->disk_name, device);
if (rc != 0) {
dev_err(&device->dev, "Cannot register ECC interrupt handler\n");
bank->irq_correctable = bank->irq_uncorrectable = 0;
rc = -EFAULT;
goto failed;
}
rc = request_irq(bank->irq_uncorrectable, axon_ram_irq_handler,
AXON_RAM_IRQ_FLAGS, bank->disk->disk_name, device);
if (rc != 0) {
dev_err(&device->dev, "Cannot register ECC interrupt handler\n");
bank->irq_uncorrectable = 0;
rc = -EFAULT;
goto failed;
}
rc = device_create_file(&device->dev, &dev_attr_ecc);
if (rc != 0) {
dev_err(&device->dev, "Cannot create sysfs file\n");
rc = -EFAULT;
goto failed;
}
return 0;
failed:
if (bank != NULL) {
if (bank->irq_uncorrectable > 0)
free_irq(bank->irq_uncorrectable, device);
if (bank->irq_correctable > 0)
free_irq(bank->irq_correctable, device);
if (bank->disk != NULL) {
if (bank->disk->queue != NULL)
blk_cleanup_queue(bank->disk->queue);
if (bank->disk->major > 0)
unregister_blkdev(bank->disk->major,
bank->disk->disk_name);
del_gendisk(bank->disk);
}
device->dev.platform_data = NULL;
if (bank->io_addr != 0)
iounmap((void __iomem *) bank->io_addr);
kfree(bank);
}
return rc;
}
/**
* axon_ram_remove - remove() method for platform driver
* @device: see of_platform_driver method
*/
static int
axon_ram_remove(struct of_device *device)
{
struct axon_ram_bank *bank = device->dev.platform_data;
BUG_ON(!bank || !bank->disk);
device_remove_file(&device->dev, &dev_attr_ecc);
free_irq(bank->irq_uncorrectable, device);
free_irq(bank->irq_correctable, device);
blk_cleanup_queue(bank->disk->queue);
unregister_blkdev(bank->disk->major, bank->disk->disk_name);
del_gendisk(bank->disk);
iounmap((void __iomem *) bank->io_addr);
kfree(bank);
return 0;
}
static struct of_device_id axon_ram_device_id[] = {
{
.type = "dma-memory"
},
{}
};
static struct of_platform_driver axon_ram_driver = {
.owner = THIS_MODULE,
.name = AXON_RAM_MODULE_NAME,
.match_table = axon_ram_device_id,
.probe = axon_ram_probe,
.remove = axon_ram_remove
};
/**
* axon_ram_init
*/
static int __init
axon_ram_init(void)
{
return of_register_platform_driver(&axon_ram_driver);
}
/**
* axon_ram_exit
*/
static void __exit
axon_ram_exit(void)
{
of_unregister_platform_driver(&axon_ram_driver);
}
module_init(axon_ram_init);
module_exit(axon_ram_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Maxim Shchetynin <maxim@de.ibm.com>");
MODULE_DESCRIPTION("Axon DDR2 RAM device driver for IBM Cell BE");
......@@ -48,15 +48,13 @@ struct pmi_data {
struct work_struct work;
};
static struct pmi_data *data;
static int pmi_irq_handler(int irq, void *dev_id)
{
struct pmi_data *data;
u8 type;
int rc;
data = dev_id;
spin_lock(&data->pmi_spinlock);
type = ioread8(data->pmi_reg + PMI_READ_TYPE);
......@@ -111,16 +109,13 @@ MODULE_DEVICE_TABLE(of, pmi_match);
static void pmi_notify_handlers(struct work_struct *work)
{
struct pmi_data *data;
struct pmi_handler *handler;
data = container_of(work, struct pmi_data, work);
spin_lock(&data->handler_spinlock);
list_for_each_entry(handler, &data->handler, node) {
pr_debug(KERN_INFO "pmi: notifying handler %p\n", handler);
if (handler->type == data->msg.type)
handler->handle_pmi_message(data->dev, data->msg);
handler->handle_pmi_message(data->msg);
}
spin_unlock(&data->handler_spinlock);
}
......@@ -129,9 +124,14 @@ static int pmi_of_probe(struct of_device *dev,
const struct of_device_id *match)
{
struct device_node *np = dev->node;
struct pmi_data *data;
int rc;
if (data) {
printk(KERN_ERR "pmi: driver has already been initialized.\n");
rc = -EBUSY;
goto out;
}
data = kzalloc(sizeof(struct pmi_data), GFP_KERNEL);
if (!data) {
printk(KERN_ERR "pmi: could not allocate memory.\n");
......@@ -154,7 +154,6 @@ static int pmi_of_probe(struct of_device *dev,
INIT_WORK(&data->work, pmi_notify_handlers);
dev->dev.driver_data = data;
data->dev = dev;
data->irq = irq_of_parse_and_map(np, 0);
......@@ -164,7 +163,7 @@ static int pmi_of_probe(struct of_device *dev,
goto error_cleanup_iomap;
}
rc = request_irq(data->irq, pmi_irq_handler, 0, "pmi", data);
rc = request_irq(data->irq, pmi_irq_handler, 0, "pmi", NULL);
if (rc) {
printk(KERN_ERR "pmi: can't request IRQ %d: returned %d\n",
data->irq, rc);
......@@ -187,12 +186,9 @@ static int pmi_of_probe(struct of_device *dev,
static int pmi_of_remove(struct of_device *dev)
{
struct pmi_data *data;
struct pmi_handler *handler, *tmp;
data = dev->dev.driver_data;
free_irq(data->irq, data);
free_irq(data->irq, NULL);
iounmap(data->pmi_reg);
spin_lock(&data->handler_spinlock);
......@@ -202,7 +198,8 @@ static int pmi_of_remove(struct of_device *dev)
spin_unlock(&data->handler_spinlock);
kfree(dev->dev.driver_data);
kfree(data);
data = NULL;
return 0;
}
......@@ -226,13 +223,13 @@ static void __exit pmi_module_exit(void)
}
module_exit(pmi_module_exit);
void pmi_send_message(struct of_device *device, pmi_message_t msg)
int pmi_send_message(pmi_message_t msg)
{
struct pmi_data *data;
unsigned long flags;
DECLARE_COMPLETION_ONSTACK(completion);
data = device->dev.driver_data;
if (!data)
return -ENODEV;
mutex_lock(&data->msg_mutex);
......@@ -256,30 +253,26 @@ void pmi_send_message(struct of_device *device, pmi_message_t msg)
data->completion = NULL;
mutex_unlock(&data->msg_mutex);
return 0;
}
EXPORT_SYMBOL_GPL(pmi_send_message);
void pmi_register_handler(struct of_device *device,
struct pmi_handler *handler)
int pmi_register_handler(struct pmi_handler *handler)
{
struct pmi_data *data;
data = device->dev.driver_data;
if (!data)
return;
return -ENODEV;
spin_lock(&data->handler_spinlock);
list_add_tail(&handler->node, &data->handler);
spin_unlock(&data->handler_spinlock);
return 0;
}
EXPORT_SYMBOL_GPL(pmi_register_handler);
void pmi_unregister_handler(struct of_device *device,
struct pmi_handler *handler)
void pmi_unregister_handler(struct pmi_handler *handler)
{
struct pmi_data *data;
data = device->dev.driver_data;
if (!data)
return;
......
......@@ -26,6 +26,7 @@
#include <linux/profile.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/oprofile.h>
#include <linux/sched.h>
#include "oprofile_stats.h"
......
......@@ -20,27 +20,9 @@ void free_event_buffer(void);
/* wake up the process sleeping on the event file */
void wake_up_buffer_waiter(void);
/* Each escaped entry is prefixed by ESCAPE_CODE
* then one of the following codes, then the
* relevant data.
*/
#define ESCAPE_CODE ~0UL
#define CTX_SWITCH_CODE 1
#define CPU_SWITCH_CODE 2
#define COOKIE_SWITCH_CODE 3
#define KERNEL_ENTER_SWITCH_CODE 4
#define KERNEL_EXIT_SWITCH_CODE 5
#define MODULE_LOADED_CODE 6
#define CTX_TGID_CODE 7
#define TRACE_BEGIN_CODE 8
#define TRACE_END_CODE 9
#define INVALID_COOKIE ~0UL
#define NO_COOKIE 0UL
/* add data to the event buffer */
void add_event_entry(unsigned long data);
extern const struct file_operations event_buffer_fops;
/* mutex between sync_cpu_buffers() and the
......
......@@ -53,9 +53,24 @@ int oprofile_setup(void)
* us missing task deaths and eventually oopsing
* when trying to process the event buffer.
*/
if (oprofile_ops.sync_start) {
int sync_ret = oprofile_ops.sync_start();
switch (sync_ret) {
case 0:
goto post_sync;
case 1:
goto do_generic;
case -1:
goto out3;
default:
goto out3;
}
}
do_generic:
if ((err = sync_start()))
goto out3;
post_sync:
is_setup = 1;
mutex_unlock(&start_mutex);
return 0;
......@@ -118,7 +133,20 @@ void oprofile_stop(void)
void oprofile_shutdown(void)
{
mutex_lock(&start_mutex);
if (oprofile_ops.sync_stop) {
int sync_ret = oprofile_ops.sync_stop();
switch (sync_ret) {
case 0:
goto post_sync;
case 1:
goto do_generic;
default:
goto post_sync;
}
}
do_generic:
sync_stop();
post_sync:
if (oprofile_ops.shutdown)
oprofile_ops.shutdown();
is_setup = 0;
......
......@@ -39,14 +39,16 @@ struct op_system_config {
/* Per-arch configuration */
struct op_powerpc_model {
void (*reg_setup) (struct op_counter_config *,
int (*reg_setup) (struct op_counter_config *,
struct op_system_config *,
int num_counters);
void (*cpu_setup) (struct op_counter_config *);
void (*start) (struct op_counter_config *);
void (*global_start) (struct op_counter_config *);
int (*cpu_setup) (struct op_counter_config *);
int (*start) (struct op_counter_config *);
int (*global_start) (struct op_counter_config *);
void (*stop) (void);
void (*global_stop) (void);
int (*sync_start)(void);
int (*sync_stop)(void);
void (*handle_interrupt) (struct pt_regs *,
struct op_counter_config *);
int num_counters;
......
......@@ -55,13 +55,13 @@ typedef struct {
struct pmi_handler {
struct list_head node;
u8 type;
void (*handle_pmi_message) (struct of_device *, pmi_message_t);
void (*handle_pmi_message) (pmi_message_t);
};
void pmi_register_handler(struct of_device *, struct pmi_handler *);
void pmi_unregister_handler(struct of_device *, struct pmi_handler *);
int pmi_register_handler(struct pmi_handler *);
void pmi_unregister_handler(struct pmi_handler *);
void pmi_send_message(struct of_device *, pmi_message_t);
int pmi_send_message(pmi_message_t);
#endif /* __KERNEL__ */
#endif /* _POWERPC_PMI_H */
......@@ -107,10 +107,10 @@ struct spu_runqueue;
struct device_node;
enum spu_utilization_state {
SPU_UTIL_SYSTEM,
SPU_UTIL_USER,
SPU_UTIL_SYSTEM,
SPU_UTIL_IOWAIT,
SPU_UTIL_IDLE,
SPU_UTIL_IDLE_LOADED,
SPU_UTIL_MAX
};
......@@ -121,9 +121,9 @@ struct spu {
unsigned long problem_phys;
struct spu_problem __iomem *problem;
struct spu_priv2 __iomem *priv2;
struct list_head list;
struct list_head sched_list;
struct list_head cbe_list;
struct list_head full_list;
enum { SPU_FREE, SPU_USED } alloc_state;
int number;
unsigned int irqs[3];
u32 node;
......@@ -137,6 +137,7 @@ struct spu {
struct spu_runqueue *rq;
unsigned long long timestamp;
pid_t pid;
pid_t tgid;
int class_0_pending;
spinlock_t register_lock;
......@@ -165,11 +166,14 @@ struct spu {
struct sys_device sysdev;
int has_mem_affinity;
struct list_head aff_list;
struct {
/* protected by interrupt reentrancy */
enum spu_utilization_state utilization_state;
unsigned long tstamp; /* time of last ctx switch */
unsigned long times[SPU_UTIL_MAX];
enum spu_utilization_state util_state;
unsigned long long tstamp;
unsigned long long times[SPU_UTIL_MAX];
unsigned long long vol_ctx_switch;
unsigned long long invol_ctx_switch;
unsigned long long min_flt;
......@@ -181,13 +185,29 @@ struct spu {
} stats;
};
struct spu *spu_alloc(void);
struct spu *spu_alloc_node(int node);
void spu_free(struct spu *spu);
struct cbe_spu_info {
struct mutex list_mutex;
struct list_head spus;
int n_spus;
int nr_active;
atomic_t reserved_spus;
};
extern struct cbe_spu_info cbe_spu_info[];
void spu_init_channels(struct spu *spu);
int spu_irq_class_0_bottom(struct spu *spu);
int spu_irq_class_1_bottom(struct spu *spu);
void spu_irq_setaffinity(struct spu *spu, int cpu);
#ifdef CONFIG_KEXEC
void crash_register_spus(struct list_head *list);
#else
static inline void crash_register_spus(struct list_head *list)
{
}
#endif
extern void spu_invalidate_slbs(struct spu *spu);
extern void spu_associate_mm(struct spu *spu, struct mm_struct *mm);
......@@ -195,6 +215,20 @@ extern void spu_associate_mm(struct spu *spu, struct mm_struct *mm);
struct mm_struct;
extern void spu_flush_all_slbs(struct mm_struct *mm);
/* This interface allows a profiler (e.g., OProfile) to store a ref
* to spu context information that it creates. This caching technique
* avoids the need to recreate this information after a save/restore operation.
*
* Assumes the caller has already incremented the ref count to
* profile_info; then spu_context_destroy must call kref_put
* on prof_info_kref.
*/
void spu_set_profile_private_kref(struct spu_context *ctx,
struct kref *prof_info_kref,
void ( * prof_info_release) (struct kref *kref));
void *spu_get_profile_private_kref(struct spu_context *ctx);
/* system callbacks from the SPU */
struct spu_syscall_block {
u64 nr_ret;
......@@ -206,7 +240,8 @@ extern long spu_sys_callback(struct spu_syscall_block *s);
struct file;
extern struct spufs_calls {
asmlinkage long (*create_thread)(const char __user *name,
unsigned int flags, mode_t mode);
unsigned int flags, mode_t mode,
struct file *neighbor);
asmlinkage long (*spu_run)(struct file *filp, __u32 __user *unpc,
__u32 __user *ustatus);
struct module *owner;
......@@ -233,8 +268,10 @@ struct spu_coredump_calls {
#define SPU_CREATE_GANG 0x0002
#define SPU_CREATE_NOSCHED 0x0004
#define SPU_CREATE_ISOLATE 0x0008
#define SPU_CREATE_AFFINITY_SPU 0x0010
#define SPU_CREATE_AFFINITY_MEM 0x0020
#define SPU_CREATE_FLAG_ALL 0x000f /* mask of all valid flags */
#define SPU_CREATE_FLAG_ALL 0x003f /* mask of all valid flags */
#ifdef CONFIG_SPU_FS_MODULE
......@@ -403,6 +440,7 @@ struct spu_priv2 {
#define MFC_CNTL_RESUME_DMA_QUEUE (0ull << 0)
#define MFC_CNTL_SUSPEND_DMA_QUEUE (1ull << 0)
#define MFC_CNTL_SUSPEND_DMA_QUEUE_MASK (1ull << 0)
#define MFC_CNTL_SUSPEND_MASK (1ull << 4)
#define MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION (0ull << 8)
#define MFC_CNTL_SUSPEND_IN_PROGRESS (1ull << 8)
#define MFC_CNTL_SUSPEND_COMPLETE (3ull << 8)
......
......@@ -50,6 +50,12 @@
#define SPU_STOPPED_STATUS_P_I 8
#define SPU_STOPPED_STATUS_R 9
/*
* Definitions for software decrementer status flag.
*/
#define SPU_DECR_STATUS_RUNNING 0x1
#define SPU_DECR_STATUS_WRAPPED 0x2
#ifndef __ASSEMBLY__
/**
* spu_reg128 - generic 128-bit register definition.
......@@ -63,7 +69,7 @@ struct spu_reg128 {
* @gprs: Array of saved registers.
* @fpcr: Saved floating point status control register.
* @decr: Saved decrementer value.
* @decr_status: Indicates decrementer run status.
* @decr_status: Indicates software decrementer status flags.
* @ppu_mb: Saved PPU mailbox data.
* @ppuint_mb: Saved PPU interrupting mailbox data.
* @tag_mask: Saved tag group mask.
......
......@@ -12,6 +12,7 @@
#ifdef CONFIG_PROFILING
#include <linux/dcache.h>
#include <linux/types.h>
struct dcookie_user;
......
......@@ -21,6 +21,7 @@
#define EM_SPARC32PLUS 18 /* Sun's "v8plus" */
#define EM_PPC 20 /* PowerPC */
#define EM_PPC64 21 /* PowerPC64 */
#define EM_SPU 23 /* Cell BE SPU */
#define EM_SH 42 /* SuperH */
#define EM_SPARCV9 43 /* SPARC v9 64-bit */
#define EM_IA_64 50 /* HP/Intel IA-64 */
......
......@@ -17,6 +17,26 @@
#include <linux/spinlock.h>
#include <asm/atomic.h>
/* Each escaped entry is prefixed by ESCAPE_CODE
* then one of the following codes, then the
* relevant data.
* These #defines live in this file so that arch-specific
* buffer sync'ing code can access them.
*/
#define ESCAPE_CODE ~0UL
#define CTX_SWITCH_CODE 1
#define CPU_SWITCH_CODE 2
#define COOKIE_SWITCH_CODE 3
#define KERNEL_ENTER_SWITCH_CODE 4
#define KERNEL_EXIT_SWITCH_CODE 5
#define MODULE_LOADED_CODE 6
#define CTX_TGID_CODE 7
#define TRACE_BEGIN_CODE 8
#define TRACE_END_CODE 9
#define XEN_ENTER_SWITCH_CODE 10
#define SPU_PROFILING_CODE 11
#define SPU_CTX_SWITCH_CODE 12
struct super_block;
struct dentry;
struct file_operations;
......@@ -35,6 +55,14 @@ struct oprofile_operations {
int (*start)(void);
/* Stop delivering interrupts. */
void (*stop)(void);
/* Arch-specific buffer sync functions.
* Return value = 0: Success
* Return value = -1: Failure
* Return value = 1: Run generic sync function
*/
int (*sync_start)(void);
int (*sync_stop)(void);
/* Initiate a stack backtrace. Optional. */
void (*backtrace)(struct pt_regs * const regs, unsigned int depth);
/* CPU identification string. */
......@@ -55,6 +83,13 @@ int oprofile_arch_init(struct oprofile_operations * ops);
*/
void oprofile_arch_exit(void);
/**
* Add data to the event buffer.
* The data passed is free-form, but typically consists of
* file offsets, dcookies, context information, and ESCAPE codes.
*/
void add_event_entry(unsigned long data);
/**
* Add a sample. This may be called from any context. Pass
* smp_processor_id() as cpu.
......
......@@ -549,7 +549,7 @@ asmlinkage long sys_inotify_rm_watch(int fd, u32 wd);
asmlinkage long sys_spu_run(int fd, __u32 __user *unpc,
__u32 __user *ustatus);
asmlinkage long sys_spu_create(const char __user *name,
unsigned int flags, mode_t mode);
unsigned int flags, mode_t mode, int fd);
asmlinkage long sys_mknodat(int dfd, const char __user * filename, int mode,
unsigned dev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment