Merge branch 'for-2.6.23' of master.kernel.org:/pub/scm/linux/kernel/git/arnd/cell-2.6
* 'for-2.6.23' of master.kernel.org:/pub/scm/linux/kernel/git/arnd/cell-2.6: (37 commits) [CELL] spufs: rework list management and associated locking [CELL] oprofile: add support to OProfile for profiling CELL BE SPUs [CELL] oprofile: enable SPU switch notification to detect currently active SPU tasks [CELL] spu_base: locking cleanup [CELL] cell: indexing of SPUs based on firmware vicinity properties [CELL] spufs: integration of SPE affinity with the scheduller [CELL] cell: add placement computation for scheduling of affinity contexts [CELL] spufs: extension of spu_create to support affinity definition [CELL] cell: add hardcoded spu vicinity information for QS20 [CELL] cell: add vicinity information on spus [CELL] cell: add per BE structure with info about its SPUs [CELL] spufs: use find_first_bit() instead of sched_find_first_bit() [CELL] spufs: remove unused file argument from spufs_run_spu() [CELL] spufs: change decrementer restore timing [CELL] spufs: dont halt decrementer at restore step 47 [CELL] spufs: limit saving MFC_CNTL bits [CELL] spufs: fix read and write for decr_status file [CELL] spufs: fix decr_status meanings [CELL] spufs: remove needless context save/restore code [CELL] spufs: fix array size of channel index ...
This commit is contained in:
commit
ede13d81b4
|
@ -1455,7 +1455,8 @@ CONFIG_HAS_DMA=y
|
|||
# Instrumentation Support
|
||||
#
|
||||
CONFIG_PROFILING=y
|
||||
CONFIG_OPROFILE=y
|
||||
CONFIG_OPROFILE=m
|
||||
CONFIG_OPROFILE_CELL=y
|
||||
# CONFIG_KPROBES is not set
|
||||
|
||||
#
|
||||
|
|
|
@ -219,6 +219,72 @@ void crash_kexec_secondary(struct pt_regs *regs)
|
|||
cpus_in_sr = CPU_MASK_NONE;
|
||||
}
|
||||
#endif
|
||||
#ifdef CONFIG_SPU_BASE
|
||||
|
||||
#include <asm/spu.h>
|
||||
#include <asm/spu_priv1.h>
|
||||
|
||||
struct crash_spu_info {
|
||||
struct spu *spu;
|
||||
u32 saved_spu_runcntl_RW;
|
||||
u32 saved_spu_status_R;
|
||||
u32 saved_spu_npc_RW;
|
||||
u64 saved_mfc_sr1_RW;
|
||||
u64 saved_mfc_dar;
|
||||
u64 saved_mfc_dsisr;
|
||||
};
|
||||
|
||||
#define CRASH_NUM_SPUS 16 /* Enough for current hardware */
|
||||
static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS];
|
||||
|
||||
static void crash_kexec_stop_spus(void)
|
||||
{
|
||||
struct spu *spu;
|
||||
int i;
|
||||
u64 tmp;
|
||||
|
||||
for (i = 0; i < CRASH_NUM_SPUS; i++) {
|
||||
if (!crash_spu_info[i].spu)
|
||||
continue;
|
||||
|
||||
spu = crash_spu_info[i].spu;
|
||||
|
||||
crash_spu_info[i].saved_spu_runcntl_RW =
|
||||
in_be32(&spu->problem->spu_runcntl_RW);
|
||||
crash_spu_info[i].saved_spu_status_R =
|
||||
in_be32(&spu->problem->spu_status_R);
|
||||
crash_spu_info[i].saved_spu_npc_RW =
|
||||
in_be32(&spu->problem->spu_npc_RW);
|
||||
|
||||
crash_spu_info[i].saved_mfc_dar = spu_mfc_dar_get(spu);
|
||||
crash_spu_info[i].saved_mfc_dsisr = spu_mfc_dsisr_get(spu);
|
||||
tmp = spu_mfc_sr1_get(spu);
|
||||
crash_spu_info[i].saved_mfc_sr1_RW = tmp;
|
||||
|
||||
tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
|
||||
spu_mfc_sr1_set(spu, tmp);
|
||||
|
||||
__delay(200);
|
||||
}
|
||||
}
|
||||
|
||||
void crash_register_spus(struct list_head *list)
|
||||
{
|
||||
struct spu *spu;
|
||||
|
||||
list_for_each_entry(spu, list, full_list) {
|
||||
if (WARN_ON(spu->number >= CRASH_NUM_SPUS))
|
||||
continue;
|
||||
|
||||
crash_spu_info[spu->number].spu = spu;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
static inline void crash_kexec_stop_spus(void)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_SPU_BASE */
|
||||
|
||||
void default_machine_crash_shutdown(struct pt_regs *regs)
|
||||
{
|
||||
|
@ -254,6 +320,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
|
|||
crash_save_cpu(regs, crashing_cpu);
|
||||
crash_kexec_prepare_cpus(crashing_cpu);
|
||||
cpu_set(crashing_cpu, cpus_in_crash);
|
||||
crash_kexec_stop_spus();
|
||||
if (ppc_md.kexec_cpu_down)
|
||||
ppc_md.kexec_cpu_down(1, 0);
|
||||
}
|
||||
|
|
|
@ -122,6 +122,7 @@ extern struct timezone sys_tz;
|
|||
static long timezone_offset;
|
||||
|
||||
unsigned long ppc_proc_freq;
|
||||
EXPORT_SYMBOL(ppc_proc_freq);
|
||||
unsigned long ppc_tb_freq;
|
||||
|
||||
static u64 tb_last_jiffy __cacheline_aligned_in_smp;
|
||||
|
|
|
@ -15,3 +15,10 @@ config OPROFILE
|
|||
|
||||
If unsure, say N.
|
||||
|
||||
config OPROFILE_CELL
|
||||
bool "OProfile for Cell Broadband Engine"
|
||||
depends on (SPU_FS = y && OPROFILE = m) || (SPU_FS = y && OPROFILE = y) || (SPU_FS = m && OPROFILE = m)
|
||||
default y
|
||||
help
|
||||
Profiling of Cell BE SPUs requires special support enabled
|
||||
by this option.
|
||||
|
|
|
@ -11,7 +11,9 @@ DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \
|
|||
timer_int.o )
|
||||
|
||||
oprofile-y := $(DRIVER_OBJS) common.o backtrace.o
|
||||
oprofile-$(CONFIG_PPC_CELL_NATIVE) += op_model_cell.o
|
||||
oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \
|
||||
cell/spu_profiler.o cell/vma_map.o \
|
||||
cell/spu_task_sync.o
|
||||
oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o op_model_pa6t.o
|
||||
oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o
|
||||
oprofile-$(CONFIG_6xx) += op_model_7450.o
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* Cell Broadband Engine OProfile Support
|
||||
*
|
||||
* (C) Copyright IBM Corporation 2006
|
||||
*
|
||||
* Author: Maynard Johnson <maynardj@us.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#ifndef PR_UTIL_H
|
||||
#define PR_UTIL_H
|
||||
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/oprofile.h>
|
||||
#include <asm/cell-pmu.h>
|
||||
#include <asm/spu.h>
|
||||
|
||||
#include "../../platforms/cell/cbe_regs.h"
|
||||
|
||||
/* Defines used for sync_start */
|
||||
#define SKIP_GENERIC_SYNC 0
|
||||
#define SYNC_START_ERROR -1
|
||||
#define DO_GENERIC_SYNC 1
|
||||
|
||||
struct spu_overlay_info { /* map of sections within an SPU overlay */
|
||||
unsigned int vma; /* SPU virtual memory address from elf */
|
||||
unsigned int size; /* size of section from elf */
|
||||
unsigned int offset; /* offset of section into elf file */
|
||||
unsigned int buf;
|
||||
};
|
||||
|
||||
struct vma_to_fileoffset_map { /* map of sections within an SPU program */
|
||||
struct vma_to_fileoffset_map *next; /* list pointer */
|
||||
unsigned int vma; /* SPU virtual memory address from elf */
|
||||
unsigned int size; /* size of section from elf */
|
||||
unsigned int offset; /* offset of section into elf file */
|
||||
unsigned int guard_ptr;
|
||||
unsigned int guard_val;
|
||||
/*
|
||||
* The guard pointer is an entry in the _ovly_buf_table,
|
||||
* computed using ovly.buf as the index into the table. Since
|
||||
* ovly.buf values begin at '1' to reference the first (or 0th)
|
||||
* entry in the _ovly_buf_table, the computation subtracts 1
|
||||
* from ovly.buf.
|
||||
* The guard value is stored in the _ovly_buf_table entry and
|
||||
* is an index (starting at 1) back to the _ovly_table entry
|
||||
* that is pointing at this _ovly_buf_table entry. So, for
|
||||
* example, for an overlay scenario with one overlay segment
|
||||
* and two overlay sections:
|
||||
* - Section 1 points to the first entry of the
|
||||
* _ovly_buf_table, which contains a guard value
|
||||
* of '1', referencing the first (index=0) entry of
|
||||
* _ovly_table.
|
||||
* - Section 2 points to the second entry of the
|
||||
* _ovly_buf_table, which contains a guard value
|
||||
* of '2', referencing the second (index=1) entry of
|
||||
* _ovly_table.
|
||||
*/
|
||||
|
||||
};
|
||||
|
||||
/* The three functions below are for maintaining and accessing
|
||||
* the vma-to-fileoffset map.
|
||||
*/
|
||||
struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu,
|
||||
u64 objectid);
|
||||
unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map,
|
||||
unsigned int vma, const struct spu *aSpu,
|
||||
int *grd_val);
|
||||
void vma_map_free(struct vma_to_fileoffset_map *map);
|
||||
|
||||
/*
|
||||
* Entry point for SPU profiling.
|
||||
* cycles_reset is the SPU_CYCLES count value specified by the user.
|
||||
*/
|
||||
int start_spu_profiling(unsigned int cycles_reset);
|
||||
|
||||
void stop_spu_profiling(void);
|
||||
|
||||
|
||||
/* add the necessary profiling hooks */
|
||||
int spu_sync_start(void);
|
||||
|
||||
/* remove the hooks */
|
||||
int spu_sync_stop(void);
|
||||
|
||||
/* Record SPU program counter samples to the oprofile event buffer. */
|
||||
void spu_sync_buffer(int spu_num, unsigned int *samples,
|
||||
int num_samples);
|
||||
|
||||
void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset);
|
||||
|
||||
#endif /* PR_UTIL_H */
|
|
@ -0,0 +1,221 @@
|
|||
/*
|
||||
* Cell Broadband Engine OProfile Support
|
||||
*
|
||||
* (C) Copyright IBM Corporation 2006
|
||||
*
|
||||
* Authors: Maynard Johnson <maynardj@us.ibm.com>
|
||||
* Carl Love <carll@us.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <linux/hrtimer.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/slab.h>
|
||||
#include <asm/cell-pmu.h>
|
||||
#include "pr_util.h"
|
||||
|
||||
#define TRACE_ARRAY_SIZE 1024
|
||||
#define SCALE_SHIFT 14
|
||||
|
||||
static u32 *samples;
|
||||
|
||||
static int spu_prof_running;
|
||||
static unsigned int profiling_interval;
|
||||
|
||||
#define NUM_SPU_BITS_TRBUF 16
|
||||
#define SPUS_PER_TB_ENTRY 4
|
||||
#define SPUS_PER_NODE 8
|
||||
|
||||
#define SPU_PC_MASK 0xFFFF
|
||||
|
||||
static DEFINE_SPINLOCK(sample_array_lock);
|
||||
unsigned long sample_array_lock_flags;
|
||||
|
||||
void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset)
|
||||
{
|
||||
unsigned long ns_per_cyc;
|
||||
|
||||
if (!freq_khz)
|
||||
freq_khz = ppc_proc_freq/1000;
|
||||
|
||||
/* To calculate a timeout in nanoseconds, the basic
|
||||
* formula is ns = cycles_reset * (NSEC_PER_SEC / cpu frequency).
|
||||
* To avoid floating point math, we use the scale math
|
||||
* technique as described in linux/jiffies.h. We use
|
||||
* a scale factor of SCALE_SHIFT, which provides 4 decimal places
|
||||
* of precision. This is close enough for the purpose at hand.
|
||||
*
|
||||
* The value of the timeout should be small enough that the hw
|
||||
* trace buffer will not get more then about 1/3 full for the
|
||||
* maximum user specified (the LFSR value) hw sampling frequency.
|
||||
* This is to ensure the trace buffer will never fill even if the
|
||||
* kernel thread scheduling varies under a heavy system load.
|
||||
*/
|
||||
|
||||
ns_per_cyc = (USEC_PER_SEC << SCALE_SHIFT)/freq_khz;
|
||||
profiling_interval = (ns_per_cyc * cycles_reset) >> SCALE_SHIFT;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Extract SPU PC from trace buffer entry
|
||||
*/
|
||||
static void spu_pc_extract(int cpu, int entry)
|
||||
{
|
||||
/* the trace buffer is 128 bits */
|
||||
u64 trace_buffer[2];
|
||||
u64 spu_mask;
|
||||
int spu;
|
||||
|
||||
spu_mask = SPU_PC_MASK;
|
||||
|
||||
/* Each SPU PC is 16 bits; hence, four spus in each of
|
||||
* the two 64-bit buffer entries that make up the
|
||||
* 128-bit trace_buffer entry. Process two 64-bit values
|
||||
* simultaneously.
|
||||
* trace[0] SPU PC contents are: 0 1 2 3
|
||||
* trace[1] SPU PC contents are: 4 5 6 7
|
||||
*/
|
||||
|
||||
cbe_read_trace_buffer(cpu, trace_buffer);
|
||||
|
||||
for (spu = SPUS_PER_TB_ENTRY-1; spu >= 0; spu--) {
|
||||
/* spu PC trace entry is upper 16 bits of the
|
||||
* 18 bit SPU program counter
|
||||
*/
|
||||
samples[spu * TRACE_ARRAY_SIZE + entry]
|
||||
= (spu_mask & trace_buffer[0]) << 2;
|
||||
samples[(spu + SPUS_PER_TB_ENTRY) * TRACE_ARRAY_SIZE + entry]
|
||||
= (spu_mask & trace_buffer[1]) << 2;
|
||||
|
||||
trace_buffer[0] = trace_buffer[0] >> NUM_SPU_BITS_TRBUF;
|
||||
trace_buffer[1] = trace_buffer[1] >> NUM_SPU_BITS_TRBUF;
|
||||
}
|
||||
}
|
||||
|
||||
static int cell_spu_pc_collection(int cpu)
|
||||
{
|
||||
u32 trace_addr;
|
||||
int entry;
|
||||
|
||||
/* process the collected SPU PC for the node */
|
||||
|
||||
entry = 0;
|
||||
|
||||
trace_addr = cbe_read_pm(cpu, trace_address);
|
||||
while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) {
|
||||
/* there is data in the trace buffer to process */
|
||||
spu_pc_extract(cpu, entry);
|
||||
|
||||
entry++;
|
||||
|
||||
if (entry >= TRACE_ARRAY_SIZE)
|
||||
/* spu_samples is full */
|
||||
break;
|
||||
|
||||
trace_addr = cbe_read_pm(cpu, trace_address);
|
||||
}
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
|
||||
static enum hrtimer_restart profile_spus(struct hrtimer *timer)
|
||||
{
|
||||
ktime_t kt;
|
||||
int cpu, node, k, num_samples, spu_num;
|
||||
|
||||
if (!spu_prof_running)
|
||||
goto stop;
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
if (cbe_get_hw_thread_id(cpu))
|
||||
continue;
|
||||
|
||||
node = cbe_cpu_to_node(cpu);
|
||||
|
||||
/* There should only be one kernel thread at a time processing
|
||||
* the samples. In the very unlikely case that the processing
|
||||
* is taking a very long time and multiple kernel threads are
|
||||
* started to process the samples. Make sure only one kernel
|
||||
* thread is working on the samples array at a time. The
|
||||
* sample array must be loaded and then processed for a given
|
||||
* cpu. The sample array is not per cpu.
|
||||
*/
|
||||
spin_lock_irqsave(&sample_array_lock,
|
||||
sample_array_lock_flags);
|
||||
num_samples = cell_spu_pc_collection(cpu);
|
||||
|
||||
if (num_samples == 0) {
|
||||
spin_unlock_irqrestore(&sample_array_lock,
|
||||
sample_array_lock_flags);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (k = 0; k < SPUS_PER_NODE; k++) {
|
||||
spu_num = k + (node * SPUS_PER_NODE);
|
||||
spu_sync_buffer(spu_num,
|
||||
samples + (k * TRACE_ARRAY_SIZE),
|
||||
num_samples);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&sample_array_lock,
|
||||
sample_array_lock_flags);
|
||||
|
||||
}
|
||||
smp_wmb(); /* insure spu event buffer updates are written */
|
||||
/* don't want events intermingled... */
|
||||
|
||||
kt = ktime_set(0, profiling_interval);
|
||||
if (!spu_prof_running)
|
||||
goto stop;
|
||||
hrtimer_forward(timer, timer->base->get_time(), kt);
|
||||
return HRTIMER_RESTART;
|
||||
|
||||
stop:
|
||||
printk(KERN_INFO "SPU_PROF: spu-prof timer ending\n");
|
||||
return HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
static struct hrtimer timer;
|
||||
/*
|
||||
* Entry point for SPU profiling.
|
||||
* NOTE: SPU profiling is done system-wide, not per-CPU.
|
||||
*
|
||||
* cycles_reset is the count value specified by the user when
|
||||
* setting up OProfile to count SPU_CYCLES.
|
||||
*/
|
||||
int start_spu_profiling(unsigned int cycles_reset)
|
||||
{
|
||||
ktime_t kt;
|
||||
|
||||
pr_debug("timer resolution: %lu\n", TICK_NSEC);
|
||||
kt = ktime_set(0, profiling_interval);
|
||||
hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
timer.expires = kt;
|
||||
timer.function = profile_spus;
|
||||
|
||||
/* Allocate arrays for collecting SPU PC samples */
|
||||
samples = kzalloc(SPUS_PER_NODE *
|
||||
TRACE_ARRAY_SIZE * sizeof(u32), GFP_KERNEL);
|
||||
|
||||
if (!samples)
|
||||
return -ENOMEM;
|
||||
|
||||
spu_prof_running = 1;
|
||||
hrtimer_start(&timer, kt, HRTIMER_MODE_REL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void stop_spu_profiling(void)
|
||||
{
|
||||
spu_prof_running = 0;
|
||||
hrtimer_cancel(&timer);
|
||||
kfree(samples);
|
||||
pr_debug("SPU_PROF: stop_spu_profiling issued\n");
|
||||
}
|
|
@ -0,0 +1,484 @@
|
|||
/*
|
||||
* Cell Broadband Engine OProfile Support
|
||||
*
|
||||
* (C) Copyright IBM Corporation 2006
|
||||
*
|
||||
* Author: Maynard Johnson <maynardj@us.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
/* The purpose of this file is to handle SPU event task switching
|
||||
* and to record SPU context information into the OProfile
|
||||
* event buffer.
|
||||
*
|
||||
* Additionally, the spu_sync_buffer function is provided as a helper
|
||||
* for recoding actual SPU program counter samples to the event buffer.
|
||||
*/
|
||||
#include <linux/dcookies.h>
|
||||
#include <linux/kref.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/numa.h>
|
||||
#include <linux/oprofile.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include "pr_util.h"
|
||||
|
||||
#define RELEASE_ALL 9999
|
||||
|
||||
static DEFINE_SPINLOCK(buffer_lock);
|
||||
static DEFINE_SPINLOCK(cache_lock);
|
||||
static int num_spu_nodes;
|
||||
int spu_prof_num_nodes;
|
||||
int last_guard_val[MAX_NUMNODES * 8];
|
||||
|
||||
/* Container for caching information about an active SPU task. */
|
||||
struct cached_info {
|
||||
struct vma_to_fileoffset_map *map;
|
||||
struct spu *the_spu; /* needed to access pointer to local_store */
|
||||
struct kref cache_ref;
|
||||
};
|
||||
|
||||
static struct cached_info *spu_info[MAX_NUMNODES * 8];
|
||||
|
||||
static void destroy_cached_info(struct kref *kref)
|
||||
{
|
||||
struct cached_info *info;
|
||||
|
||||
info = container_of(kref, struct cached_info, cache_ref);
|
||||
vma_map_free(info->map);
|
||||
kfree(info);
|
||||
module_put(THIS_MODULE);
|
||||
}
|
||||
|
||||
/* Return the cached_info for the passed SPU number.
|
||||
* ATTENTION: Callers are responsible for obtaining the
|
||||
* cache_lock if needed prior to invoking this function.
|
||||
*/
|
||||
static struct cached_info *get_cached_info(struct spu *the_spu, int spu_num)
|
||||
{
|
||||
struct kref *ref;
|
||||
struct cached_info *ret_info;
|
||||
|
||||
if (spu_num >= num_spu_nodes) {
|
||||
printk(KERN_ERR "SPU_PROF: "
|
||||
"%s, line %d: Invalid index %d into spu info cache\n",
|
||||
__FUNCTION__, __LINE__, spu_num);
|
||||
ret_info = NULL;
|
||||
goto out;
|
||||
}
|
||||
if (!spu_info[spu_num] && the_spu) {
|
||||
ref = spu_get_profile_private_kref(the_spu->ctx);
|
||||
if (ref) {
|
||||
spu_info[spu_num] = container_of(ref, struct cached_info, cache_ref);
|
||||
kref_get(&spu_info[spu_num]->cache_ref);
|
||||
}
|
||||
}
|
||||
|
||||
ret_info = spu_info[spu_num];
|
||||
out:
|
||||
return ret_info;
|
||||
}
|
||||
|
||||
|
||||
/* Looks for cached info for the passed spu. If not found, the
|
||||
* cached info is created for the passed spu.
|
||||
* Returns 0 for success; otherwise, -1 for error.
|
||||
*/
|
||||
static int
|
||||
prepare_cached_spu_info(struct spu *spu, unsigned long objectId)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct vma_to_fileoffset_map *new_map;
|
||||
int retval = 0;
|
||||
struct cached_info *info;
|
||||
|
||||
/* We won't bother getting cache_lock here since
|
||||
* don't do anything with the cached_info that's returned.
|
||||
*/
|
||||
info = get_cached_info(spu, spu->number);
|
||||
|
||||
if (info) {
|
||||
pr_debug("Found cached SPU info.\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Create cached_info and set spu_info[spu->number] to point to it.
|
||||
* spu->number is a system-wide value, not a per-node value.
|
||||
*/
|
||||
info = kzalloc(sizeof(struct cached_info), GFP_KERNEL);
|
||||
if (!info) {
|
||||
printk(KERN_ERR "SPU_PROF: "
|
||||
"%s, line %d: create vma_map failed\n",
|
||||
__FUNCTION__, __LINE__);
|
||||
retval = -ENOMEM;
|
||||
goto err_alloc;
|
||||
}
|
||||
new_map = create_vma_map(spu, objectId);
|
||||
if (!new_map) {
|
||||
printk(KERN_ERR "SPU_PROF: "
|
||||
"%s, line %d: create vma_map failed\n",
|
||||
__FUNCTION__, __LINE__);
|
||||
retval = -ENOMEM;
|
||||
goto err_alloc;
|
||||
}
|
||||
|
||||
pr_debug("Created vma_map\n");
|
||||
info->map = new_map;
|
||||
info->the_spu = spu;
|
||||
kref_init(&info->cache_ref);
|
||||
spin_lock_irqsave(&cache_lock, flags);
|
||||
spu_info[spu->number] = info;
|
||||
/* Increment count before passing off ref to SPUFS. */
|
||||
kref_get(&info->cache_ref);
|
||||
|
||||
/* We increment the module refcount here since SPUFS is
|
||||
* responsible for the final destruction of the cached_info,
|
||||
* and it must be able to access the destroy_cached_info()
|
||||
* function defined in the OProfile module. We decrement
|
||||
* the module refcount in destroy_cached_info.
|
||||
*/
|
||||
try_module_get(THIS_MODULE);
|
||||
spu_set_profile_private_kref(spu->ctx, &info->cache_ref,
|
||||
destroy_cached_info);
|
||||
spin_unlock_irqrestore(&cache_lock, flags);
|
||||
goto out;
|
||||
|
||||
err_alloc:
|
||||
kfree(info);
|
||||
out:
|
||||
return retval;
|
||||
}
|
||||
|
||||
/*
|
||||
* NOTE: The caller is responsible for locking the
|
||||
* cache_lock prior to calling this function.
|
||||
*/
|
||||
static int release_cached_info(int spu_index)
|
||||
{
|
||||
int index, end;
|
||||
|
||||
if (spu_index == RELEASE_ALL) {
|
||||
end = num_spu_nodes;
|
||||
index = 0;
|
||||
} else {
|
||||
if (spu_index >= num_spu_nodes) {
|
||||
printk(KERN_ERR "SPU_PROF: "
|
||||
"%s, line %d: "
|
||||
"Invalid index %d into spu info cache\n",
|
||||
__FUNCTION__, __LINE__, spu_index);
|
||||
goto out;
|
||||
}
|
||||
end = spu_index + 1;
|
||||
index = spu_index;
|
||||
}
|
||||
for (; index < end; index++) {
|
||||
if (spu_info[index]) {
|
||||
kref_put(&spu_info[index]->cache_ref,
|
||||
destroy_cached_info);
|
||||
spu_info[index] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The source code for fast_get_dcookie was "borrowed"
|
||||
* from drivers/oprofile/buffer_sync.c.
|
||||
*/
|
||||
|
||||
/* Optimisation. We can manage without taking the dcookie sem
|
||||
* because we cannot reach this code without at least one
|
||||
* dcookie user still being registered (namely, the reader
|
||||
* of the event buffer).
|
||||
*/
|
||||
static inline unsigned long fast_get_dcookie(struct dentry *dentry,
|
||||
struct vfsmount *vfsmnt)
|
||||
{
|
||||
unsigned long cookie;
|
||||
|
||||
if (dentry->d_cookie)
|
||||
return (unsigned long)dentry;
|
||||
get_dcookie(dentry, vfsmnt, &cookie);
|
||||
return cookie;
|
||||
}
|
||||
|
||||
/* Look up the dcookie for the task's first VM_EXECUTABLE mapping,
|
||||
* which corresponds loosely to "application name". Also, determine
|
||||
* the offset for the SPU ELF object. If computed offset is
|
||||
* non-zero, it implies an embedded SPU object; otherwise, it's a
|
||||
* separate SPU binary, in which case we retrieve it's dcookie.
|
||||
* For the embedded case, we must determine if SPU ELF is embedded
|
||||
* in the executable application or another file (i.e., shared lib).
|
||||
* If embedded in a shared lib, we must get the dcookie and return
|
||||
* that to the caller.
|
||||
*/
|
||||
static unsigned long
|
||||
get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp,
|
||||
unsigned long *spu_bin_dcookie,
|
||||
unsigned long spu_ref)
|
||||
{
|
||||
unsigned long app_cookie = 0;
|
||||
unsigned int my_offset = 0;
|
||||
struct file *app = NULL;
|
||||
struct vm_area_struct *vma;
|
||||
struct mm_struct *mm = spu->mm;
|
||||
|
||||
if (!mm)
|
||||
goto out;
|
||||
|
||||
down_read(&mm->mmap_sem);
|
||||
|
||||
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
||||
if (!vma->vm_file)
|
||||
continue;
|
||||
if (!(vma->vm_flags & VM_EXECUTABLE))
|
||||
continue;
|
||||
app_cookie = fast_get_dcookie(vma->vm_file->f_dentry,
|
||||
vma->vm_file->f_vfsmnt);
|
||||
pr_debug("got dcookie for %s\n",
|
||||
vma->vm_file->f_dentry->d_name.name);
|
||||
app = vma->vm_file;
|
||||
break;
|
||||
}
|
||||
|
||||
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
||||
if (vma->vm_start > spu_ref || vma->vm_end <= spu_ref)
|
||||
continue;
|
||||
my_offset = spu_ref - vma->vm_start;
|
||||
if (!vma->vm_file)
|
||||
goto fail_no_image_cookie;
|
||||
|
||||
pr_debug("Found spu ELF at %X(object-id:%lx) for file %s\n",
|
||||
my_offset, spu_ref,
|
||||
vma->vm_file->f_dentry->d_name.name);
|
||||
*offsetp = my_offset;
|
||||
break;
|
||||
}
|
||||
|
||||
*spu_bin_dcookie = fast_get_dcookie(vma->vm_file->f_dentry,
|
||||
vma->vm_file->f_vfsmnt);
|
||||
pr_debug("got dcookie for %s\n", vma->vm_file->f_dentry->d_name.name);
|
||||
|
||||
up_read(&mm->mmap_sem);
|
||||
|
||||
out:
|
||||
return app_cookie;
|
||||
|
||||
fail_no_image_cookie:
|
||||
up_read(&mm->mmap_sem);
|
||||
|
||||
printk(KERN_ERR "SPU_PROF: "
|
||||
"%s, line %d: Cannot find dcookie for SPU binary\n",
|
||||
__FUNCTION__, __LINE__);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* This function finds or creates cached context information for the
|
||||
* passed SPU and records SPU context information into the OProfile
|
||||
* event buffer.
|
||||
*/
|
||||
static int process_context_switch(struct spu *spu, unsigned long objectId)
|
||||
{
|
||||
unsigned long flags;
|
||||
int retval;
|
||||
unsigned int offset = 0;
|
||||
unsigned long spu_cookie = 0, app_dcookie;
|
||||
|
||||
retval = prepare_cached_spu_info(spu, objectId);
|
||||
if (retval)
|
||||
goto out;
|
||||
|
||||
/* Get dcookie first because a mutex_lock is taken in that
|
||||
* code path, so interrupts must not be disabled.
|
||||
*/
|
||||
app_dcookie = get_exec_dcookie_and_offset(spu, &offset, &spu_cookie, objectId);
|
||||
if (!app_dcookie || !spu_cookie) {
|
||||
retval = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Record context info in event buffer */
|
||||
spin_lock_irqsave(&buffer_lock, flags);
|
||||
add_event_entry(ESCAPE_CODE);
|
||||
add_event_entry(SPU_CTX_SWITCH_CODE);
|
||||
add_event_entry(spu->number);
|
||||
add_event_entry(spu->pid);
|
||||
add_event_entry(spu->tgid);
|
||||
add_event_entry(app_dcookie);
|
||||
add_event_entry(spu_cookie);
|
||||
add_event_entry(offset);
|
||||
spin_unlock_irqrestore(&buffer_lock, flags);
|
||||
smp_wmb(); /* insure spu event buffer updates are written */
|
||||
/* don't want entries intermingled... */
|
||||
out:
|
||||
return retval;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is invoked on either a bind_context or unbind_context.
|
||||
* If called for an unbind_context, the val arg is 0; otherwise,
|
||||
* it is the object-id value for the spu context.
|
||||
* The data arg is of type 'struct spu *'.
|
||||
*/
|
||||
static int spu_active_notify(struct notifier_block *self, unsigned long val,
|
||||
void *data)
|
||||
{
|
||||
int retval;
|
||||
unsigned long flags;
|
||||
struct spu *the_spu = data;
|
||||
|
||||
pr_debug("SPU event notification arrived\n");
|
||||
if (!val) {
|
||||
spin_lock_irqsave(&cache_lock, flags);
|
||||
retval = release_cached_info(the_spu->number);
|
||||
spin_unlock_irqrestore(&cache_lock, flags);
|
||||
} else {
|
||||
retval = process_context_switch(the_spu, val);
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
static struct notifier_block spu_active = {
|
||||
.notifier_call = spu_active_notify,
|
||||
};
|
||||
|
||||
static int number_of_online_nodes(void)
|
||||
{
|
||||
u32 cpu; u32 tmp;
|
||||
int nodes = 0;
|
||||
for_each_online_cpu(cpu) {
|
||||
tmp = cbe_cpu_to_node(cpu) + 1;
|
||||
if (tmp > nodes)
|
||||
nodes++;
|
||||
}
|
||||
return nodes;
|
||||
}
|
||||
|
||||
/* The main purpose of this function is to synchronize
|
||||
* OProfile with SPUFS by registering to be notified of
|
||||
* SPU task switches.
|
||||
*
|
||||
* NOTE: When profiling SPUs, we must ensure that only
|
||||
* spu_sync_start is invoked and not the generic sync_start
|
||||
* in drivers/oprofile/oprof.c. A return value of
|
||||
* SKIP_GENERIC_SYNC or SYNC_START_ERROR will
|
||||
* accomplish this.
|
||||
*/
|
||||
int spu_sync_start(void)
|
||||
{
|
||||
int k;
|
||||
int ret = SKIP_GENERIC_SYNC;
|
||||
int register_ret;
|
||||
unsigned long flags = 0;
|
||||
|
||||
spu_prof_num_nodes = number_of_online_nodes();
|
||||
num_spu_nodes = spu_prof_num_nodes * 8;
|
||||
|
||||
spin_lock_irqsave(&buffer_lock, flags);
|
||||
add_event_entry(ESCAPE_CODE);
|
||||
add_event_entry(SPU_PROFILING_CODE);
|
||||
add_event_entry(num_spu_nodes);
|
||||
spin_unlock_irqrestore(&buffer_lock, flags);
|
||||
|
||||
/* Register for SPU events */
|
||||
register_ret = spu_switch_event_register(&spu_active);
|
||||
if (register_ret) {
|
||||
ret = SYNC_START_ERROR;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (k = 0; k < (MAX_NUMNODES * 8); k++)
|
||||
last_guard_val[k] = 0;
|
||||
pr_debug("spu_sync_start -- running.\n");
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Record SPU program counter samples to the oprofile event buffer. */
|
||||
void spu_sync_buffer(int spu_num, unsigned int *samples,
|
||||
int num_samples)
|
||||
{
|
||||
unsigned long long file_offset;
|
||||
unsigned long flags;
|
||||
int i;
|
||||
struct vma_to_fileoffset_map *map;
|
||||
struct spu *the_spu;
|
||||
unsigned long long spu_num_ll = spu_num;
|
||||
unsigned long long spu_num_shifted = spu_num_ll << 32;
|
||||
struct cached_info *c_info;
|
||||
|
||||
/* We need to obtain the cache_lock here because it's
|
||||
* possible that after getting the cached_info, the SPU job
|
||||
* corresponding to this cached_info may end, thus resulting
|
||||
* in the destruction of the cached_info.
|
||||
*/
|
||||
spin_lock_irqsave(&cache_lock, flags);
|
||||
c_info = get_cached_info(NULL, spu_num);
|
||||
if (!c_info) {
|
||||
/* This legitimately happens when the SPU task ends before all
|
||||
* samples are recorded.
|
||||
* No big deal -- so we just drop a few samples.
|
||||
*/
|
||||
pr_debug("SPU_PROF: No cached SPU contex "
|
||||
"for SPU #%d. Dropping samples.\n", spu_num);
|
||||
goto out;
|
||||
}
|
||||
|
||||
map = c_info->map;
|
||||
the_spu = c_info->the_spu;
|
||||
spin_lock(&buffer_lock);
|
||||
for (i = 0; i < num_samples; i++) {
|
||||
unsigned int sample = *(samples+i);
|
||||
int grd_val = 0;
|
||||
file_offset = 0;
|
||||
if (sample == 0)
|
||||
continue;
|
||||
file_offset = vma_map_lookup( map, sample, the_spu, &grd_val);
|
||||
|
||||
/* If overlays are used by this SPU application, the guard
|
||||
* value is non-zero, indicating which overlay section is in
|
||||
* use. We need to discard samples taken during the time
|
||||
* period which an overlay occurs (i.e., guard value changes).
|
||||
*/
|
||||
if (grd_val && grd_val != last_guard_val[spu_num]) {
|
||||
last_guard_val[spu_num] = grd_val;
|
||||
/* Drop the rest of the samples. */
|
||||
break;
|
||||
}
|
||||
|
||||
add_event_entry(file_offset | spu_num_shifted);
|
||||
}
|
||||
spin_unlock(&buffer_lock);
|
||||
out:
|
||||
spin_unlock_irqrestore(&cache_lock, flags);
|
||||
}
|
||||
|
||||
|
||||
int spu_sync_stop(void)
|
||||
{
|
||||
unsigned long flags = 0;
|
||||
int ret = spu_switch_event_unregister(&spu_active);
|
||||
if (ret) {
|
||||
printk(KERN_ERR "SPU_PROF: "
|
||||
"%s, line %d: spu_switch_event_unregister returned %d\n",
|
||||
__FUNCTION__, __LINE__, ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&cache_lock, flags);
|
||||
ret = release_cached_info(RELEASE_ALL);
|
||||
spin_unlock_irqrestore(&cache_lock, flags);
|
||||
out:
|
||||
pr_debug("spu_sync_stop -- done.\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,287 @@
|
|||
/*
|
||||
* Cell Broadband Engine OProfile Support
|
||||
*
|
||||
* (C) Copyright IBM Corporation 2006
|
||||
*
|
||||
* Author: Maynard Johnson <maynardj@us.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
/* The code in this source file is responsible for generating
|
||||
* vma-to-fileOffset maps for both overlay and non-overlay SPU
|
||||
* applications.
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/elf.h>
|
||||
#include "pr_util.h"
|
||||
|
||||
|
||||
void vma_map_free(struct vma_to_fileoffset_map *map)
|
||||
{
|
||||
while (map) {
|
||||
struct vma_to_fileoffset_map *next = map->next;
|
||||
kfree(map);
|
||||
map = next;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int
|
||||
vma_map_lookup(struct vma_to_fileoffset_map *map, unsigned int vma,
|
||||
const struct spu *aSpu, int *grd_val)
|
||||
{
|
||||
/*
|
||||
* Default the offset to the physical address + a flag value.
|
||||
* Addresses of dynamically generated code can't be found in the vma
|
||||
* map. For those addresses the flagged value will be sent on to
|
||||
* the user space tools so they can be reported rather than just
|
||||
* thrown away.
|
||||
*/
|
||||
u32 offset = 0x10000000 + vma;
|
||||
u32 ovly_grd;
|
||||
|
||||
for (; map; map = map->next) {
|
||||
if (vma < map->vma || vma >= map->vma + map->size)
|
||||
continue;
|
||||
|
||||
if (map->guard_ptr) {
|
||||
ovly_grd = *(u32 *)(aSpu->local_store + map->guard_ptr);
|
||||
if (ovly_grd != map->guard_val)
|
||||
continue;
|
||||
*grd_val = ovly_grd;
|
||||
}
|
||||
offset = vma - map->vma + map->offset;
|
||||
break;
|
||||
}
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
static struct vma_to_fileoffset_map *
|
||||
vma_map_add(struct vma_to_fileoffset_map *map, unsigned int vma,
|
||||
unsigned int size, unsigned int offset, unsigned int guard_ptr,
|
||||
unsigned int guard_val)
|
||||
{
|
||||
struct vma_to_fileoffset_map *new =
|
||||
kzalloc(sizeof(struct vma_to_fileoffset_map), GFP_KERNEL);
|
||||
if (!new) {
|
||||
printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n",
|
||||
__FUNCTION__, __LINE__);
|
||||
vma_map_free(map);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
new->next = map;
|
||||
new->vma = vma;
|
||||
new->size = size;
|
||||
new->offset = offset;
|
||||
new->guard_ptr = guard_ptr;
|
||||
new->guard_val = guard_val;
|
||||
|
||||
return new;
|
||||
}
|
||||
|
||||
|
||||
/* Parse SPE ELF header and generate a list of vma_maps.
|
||||
* A pointer to the first vma_map in the generated list
|
||||
* of vma_maps is returned. */
|
||||
struct vma_to_fileoffset_map *create_vma_map(const struct spu *aSpu,
|
||||
unsigned long spu_elf_start)
|
||||
{
|
||||
static const unsigned char expected[EI_PAD] = {
|
||||
[EI_MAG0] = ELFMAG0,
|
||||
[EI_MAG1] = ELFMAG1,
|
||||
[EI_MAG2] = ELFMAG2,
|
||||
[EI_MAG3] = ELFMAG3,
|
||||
[EI_CLASS] = ELFCLASS32,
|
||||
[EI_DATA] = ELFDATA2MSB,
|
||||
[EI_VERSION] = EV_CURRENT,
|
||||
[EI_OSABI] = ELFOSABI_NONE
|
||||
};
|
||||
|
||||
int grd_val;
|
||||
struct vma_to_fileoffset_map *map = NULL;
|
||||
struct spu_overlay_info ovly;
|
||||
unsigned int overlay_tbl_offset = -1;
|
||||
unsigned long phdr_start, shdr_start;
|
||||
Elf32_Ehdr ehdr;
|
||||
Elf32_Phdr phdr;
|
||||
Elf32_Shdr shdr, shdr_str;
|
||||
Elf32_Sym sym;
|
||||
int i, j;
|
||||
char name[32];
|
||||
|
||||
unsigned int ovly_table_sym = 0;
|
||||
unsigned int ovly_buf_table_sym = 0;
|
||||
unsigned int ovly_table_end_sym = 0;
|
||||
unsigned int ovly_buf_table_end_sym = 0;
|
||||
unsigned long ovly_table;
|
||||
unsigned int n_ovlys;
|
||||
|
||||
/* Get and validate ELF header. */
|
||||
|
||||
if (copy_from_user(&ehdr, (void *) spu_elf_start, sizeof (ehdr)))
|
||||
goto fail;
|
||||
|
||||
if (memcmp(ehdr.e_ident, expected, EI_PAD) != 0) {
|
||||
printk(KERN_ERR "SPU_PROF: "
|
||||
"%s, line %d: Unexpected e_ident parsing SPU ELF\n",
|
||||
__FUNCTION__, __LINE__);
|
||||
goto fail;
|
||||
}
|
||||
if (ehdr.e_machine != EM_SPU) {
|
||||
printk(KERN_ERR "SPU_PROF: "
|
||||
"%s, line %d: Unexpected e_machine parsing SPU ELF\n",
|
||||
__FUNCTION__, __LINE__);
|
||||
goto fail;
|
||||
}
|
||||
if (ehdr.e_type != ET_EXEC) {
|
||||
printk(KERN_ERR "SPU_PROF: "
|
||||
"%s, line %d: Unexpected e_type parsing SPU ELF\n",
|
||||
__FUNCTION__, __LINE__);
|
||||
goto fail;
|
||||
}
|
||||
phdr_start = spu_elf_start + ehdr.e_phoff;
|
||||
shdr_start = spu_elf_start + ehdr.e_shoff;
|
||||
|
||||
/* Traverse program headers. */
|
||||
for (i = 0; i < ehdr.e_phnum; i++) {
|
||||
if (copy_from_user(&phdr,
|
||||
(void *) (phdr_start + i * sizeof(phdr)),
|
||||
sizeof(phdr)))
|
||||
goto fail;
|
||||
|
||||
if (phdr.p_type != PT_LOAD)
|
||||
continue;
|
||||
if (phdr.p_flags & (1 << 27))
|
||||
continue;
|
||||
|
||||
map = vma_map_add(map, phdr.p_vaddr, phdr.p_memsz,
|
||||
phdr.p_offset, 0, 0);
|
||||
if (!map)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
pr_debug("SPU_PROF: Created non-overlay maps\n");
|
||||
/* Traverse section table and search for overlay-related symbols. */
|
||||
for (i = 0; i < ehdr.e_shnum; i++) {
|
||||
if (copy_from_user(&shdr,
|
||||
(void *) (shdr_start + i * sizeof(shdr)),
|
||||
sizeof(shdr)))
|
||||
goto fail;
|
||||
|
||||
if (shdr.sh_type != SHT_SYMTAB)
|
||||
continue;
|
||||
if (shdr.sh_entsize != sizeof (sym))
|
||||
continue;
|
||||
|
||||
if (copy_from_user(&shdr_str,
|
||||
(void *) (shdr_start + shdr.sh_link *
|
||||
sizeof(shdr)),
|
||||
sizeof(shdr)))
|
||||
goto fail;
|
||||
|
||||
if (shdr_str.sh_type != SHT_STRTAB)
|
||||
goto fail;;
|
||||
|
||||
for (j = 0; j < shdr.sh_size / sizeof (sym); j++) {
|
||||
if (copy_from_user(&sym, (void *) (spu_elf_start +
|
||||
shdr.sh_offset + j *
|
||||
sizeof (sym)),
|
||||
sizeof (sym)))
|
||||
goto fail;
|
||||
|
||||
if (copy_from_user(name, (void *)
|
||||
(spu_elf_start + shdr_str.sh_offset +
|
||||
sym.st_name),
|
||||
20))
|
||||
goto fail;
|
||||
|
||||
if (memcmp(name, "_ovly_table", 12) == 0)
|
||||
ovly_table_sym = sym.st_value;
|
||||
if (memcmp(name, "_ovly_buf_table", 16) == 0)
|
||||
ovly_buf_table_sym = sym.st_value;
|
||||
if (memcmp(name, "_ovly_table_end", 16) == 0)
|
||||
ovly_table_end_sym = sym.st_value;
|
||||
if (memcmp(name, "_ovly_buf_table_end", 20) == 0)
|
||||
ovly_buf_table_end_sym = sym.st_value;
|
||||
}
|
||||
}
|
||||
|
||||
/* If we don't have overlays, we're done. */
|
||||
if (ovly_table_sym == 0 || ovly_buf_table_sym == 0
|
||||
|| ovly_table_end_sym == 0 || ovly_buf_table_end_sym == 0) {
|
||||
pr_debug("SPU_PROF: No overlay table found\n");
|
||||
goto out;
|
||||
} else {
|
||||
pr_debug("SPU_PROF: Overlay table found\n");
|
||||
}
|
||||
|
||||
/* The _ovly_table symbol represents a table with one entry
|
||||
* per overlay section. The _ovly_buf_table symbol represents
|
||||
* a table with one entry per overlay region.
|
||||
* The struct spu_overlay_info gives the structure of the _ovly_table
|
||||
* entries. The structure of _ovly_table_buf is simply one
|
||||
* u32 word per entry.
|
||||
*/
|
||||
overlay_tbl_offset = vma_map_lookup(map, ovly_table_sym,
|
||||
aSpu, &grd_val);
|
||||
if (overlay_tbl_offset < 0) {
|
||||
printk(KERN_ERR "SPU_PROF: "
|
||||
"%s, line %d: Error finding SPU overlay table\n",
|
||||
__FUNCTION__, __LINE__);
|
||||
goto fail;
|
||||
}
|
||||
ovly_table = spu_elf_start + overlay_tbl_offset;
|
||||
|
||||
n_ovlys = (ovly_table_end_sym -
|
||||
ovly_table_sym) / sizeof (ovly);
|
||||
|
||||
/* Traverse overlay table. */
|
||||
for (i = 0; i < n_ovlys; i++) {
|
||||
if (copy_from_user(&ovly, (void *)
|
||||
(ovly_table + i * sizeof (ovly)),
|
||||
sizeof (ovly)))
|
||||
goto fail;
|
||||
|
||||
/* The ovly.vma/size/offset arguments are analogous to the same
|
||||
* arguments used above for non-overlay maps. The final two
|
||||
* args are referred to as the guard pointer and the guard
|
||||
* value.
|
||||
* The guard pointer is an entry in the _ovly_buf_table,
|
||||
* computed using ovly.buf as the index into the table. Since
|
||||
* ovly.buf values begin at '1' to reference the first (or 0th)
|
||||
* entry in the _ovly_buf_table, the computation subtracts 1
|
||||
* from ovly.buf.
|
||||
* The guard value is stored in the _ovly_buf_table entry and
|
||||
* is an index (starting at 1) back to the _ovly_table entry
|
||||
* that is pointing at this _ovly_buf_table entry. So, for
|
||||
* example, for an overlay scenario with one overlay segment
|
||||
* and two overlay sections:
|
||||
* - Section 1 points to the first entry of the
|
||||
* _ovly_buf_table, which contains a guard value
|
||||
* of '1', referencing the first (index=0) entry of
|
||||
* _ovly_table.
|
||||
* - Section 2 points to the second entry of the
|
||||
* _ovly_buf_table, which contains a guard value
|
||||
* of '2', referencing the second (index=1) entry of
|
||||
* _ovly_table.
|
||||
*/
|
||||
map = vma_map_add(map, ovly.vma, ovly.size, ovly.offset,
|
||||
ovly_buf_table_sym + (ovly.buf-1) * 4, i+1);
|
||||
if (!map)
|
||||
goto fail;
|
||||
}
|
||||
goto out;
|
||||
|
||||
fail:
|
||||
map = NULL;
|
||||
out:
|
||||
return map;
|
||||
}
|
|
@ -29,6 +29,8 @@ static struct op_powerpc_model *model;
|
|||
static struct op_counter_config ctr[OP_MAX_COUNTER];
|
||||
static struct op_system_config sys;
|
||||
|
||||
static int op_per_cpu_rc;
|
||||
|
||||
static void op_handle_interrupt(struct pt_regs *regs)
|
||||
{
|
||||
model->handle_interrupt(regs, ctr);
|
||||
|
@ -36,25 +38,41 @@ static void op_handle_interrupt(struct pt_regs *regs)
|
|||
|
||||
static void op_powerpc_cpu_setup(void *dummy)
|
||||
{
|
||||
model->cpu_setup(ctr);
|
||||
int ret;
|
||||
|
||||
ret = model->cpu_setup(ctr);
|
||||
|
||||
if (ret != 0)
|
||||
op_per_cpu_rc = ret;
|
||||
}
|
||||
|
||||
static int op_powerpc_setup(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
op_per_cpu_rc = 0;
|
||||
|
||||
/* Grab the hardware */
|
||||
err = reserve_pmc_hardware(op_handle_interrupt);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* Pre-compute the values to stuff in the hardware registers. */
|
||||
model->reg_setup(ctr, &sys, model->num_counters);
|
||||
op_per_cpu_rc = model->reg_setup(ctr, &sys, model->num_counters);
|
||||
|
||||
/* Configure the registers on all cpus. */
|
||||
if (op_per_cpu_rc)
|
||||
goto out;
|
||||
|
||||
/* Configure the registers on all cpus. If an error occurs on one
|
||||
* of the cpus, op_per_cpu_rc will be set to the error */
|
||||
on_each_cpu(op_powerpc_cpu_setup, NULL, 0, 1);
|
||||
|
||||
return 0;
|
||||
out: if (op_per_cpu_rc) {
|
||||
/* error on setup release the performance counter hardware */
|
||||
release_pmc_hardware();
|
||||
}
|
||||
|
||||
return op_per_cpu_rc;
|
||||
}
|
||||
|
||||
static void op_powerpc_shutdown(void)
|
||||
|
@ -64,16 +82,29 @@ static void op_powerpc_shutdown(void)
|
|||
|
||||
static void op_powerpc_cpu_start(void *dummy)
|
||||
{
|
||||
model->start(ctr);
|
||||
/* If any of the cpus have return an error, set the
|
||||
* global flag to the error so it can be returned
|
||||
* to the generic OProfile caller.
|
||||
*/
|
||||
int ret;
|
||||
|
||||
ret = model->start(ctr);
|
||||
if (ret != 0)
|
||||
op_per_cpu_rc = ret;
|
||||
}
|
||||
|
||||
static int op_powerpc_start(void)
|
||||
{
|
||||
op_per_cpu_rc = 0;
|
||||
|
||||
if (model->global_start)
|
||||
model->global_start(ctr);
|
||||
if (model->start)
|
||||
return model->global_start(ctr);
|
||||
if (model->start) {
|
||||
on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1);
|
||||
return 0;
|
||||
return op_per_cpu_rc;
|
||||
}
|
||||
return -EIO; /* No start function is defined for this
|
||||
power architecture */
|
||||
}
|
||||
|
||||
static inline void op_powerpc_cpu_stop(void *dummy)
|
||||
|
@ -147,11 +178,13 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
|
|||
|
||||
switch (cur_cpu_spec->oprofile_type) {
|
||||
#ifdef CONFIG_PPC64
|
||||
#ifdef CONFIG_PPC_CELL_NATIVE
|
||||
#ifdef CONFIG_OPROFILE_CELL
|
||||
case PPC_OPROFILE_CELL:
|
||||
if (firmware_has_feature(FW_FEATURE_LPAR))
|
||||
return -ENODEV;
|
||||
model = &op_model_cell;
|
||||
ops->sync_start = model->sync_start;
|
||||
ops->sync_stop = model->sync_stop;
|
||||
break;
|
||||
#endif
|
||||
case PPC_OPROFILE_RS64:
|
||||
|
|
|
@ -81,7 +81,7 @@ static void pmc_stop_ctrs(void)
|
|||
|
||||
/* Configures the counters on this CPU based on the global
|
||||
* settings */
|
||||
static void fsl7450_cpu_setup(struct op_counter_config *ctr)
|
||||
static int fsl7450_cpu_setup(struct op_counter_config *ctr)
|
||||
{
|
||||
/* freeze all counters */
|
||||
pmc_stop_ctrs();
|
||||
|
@ -89,12 +89,14 @@ static void fsl7450_cpu_setup(struct op_counter_config *ctr)
|
|||
mtspr(SPRN_MMCR0, mmcr0_val);
|
||||
mtspr(SPRN_MMCR1, mmcr1_val);
|
||||
mtspr(SPRN_MMCR2, mmcr2_val);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define NUM_CTRS 6
|
||||
|
||||
/* Configures the global settings for the countes on all CPUs. */
|
||||
static void fsl7450_reg_setup(struct op_counter_config *ctr,
|
||||
static int fsl7450_reg_setup(struct op_counter_config *ctr,
|
||||
struct op_system_config *sys,
|
||||
int num_ctrs)
|
||||
{
|
||||
|
@ -126,10 +128,12 @@ static void fsl7450_reg_setup(struct op_counter_config *ctr,
|
|||
| mmcr1_event6(ctr[5].event);
|
||||
|
||||
mmcr2_val = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Sets the counters on this CPU to the chosen values, and starts them */
|
||||
static void fsl7450_start(struct op_counter_config *ctr)
|
||||
static int fsl7450_start(struct op_counter_config *ctr)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
@ -148,6 +152,8 @@ static void fsl7450_start(struct op_counter_config *ctr)
|
|||
pmc_start_ctrs();
|
||||
|
||||
oprofile_running = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Stop the counters on this CPU */
|
||||
|
@ -193,7 +199,7 @@ static void fsl7450_handle_interrupt(struct pt_regs *regs,
|
|||
/* The freeze bit was set by the interrupt. */
|
||||
/* Clear the freeze bit, and reenable the interrupt.
|
||||
* The counters won't actually start until the rfi clears
|
||||
* the PMM bit */
|
||||
* the PM/M bit */
|
||||
pmc_start_ctrs();
|
||||
}
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*
|
||||
* Author: David Erb (djerb@us.ibm.com)
|
||||
* Modifications:
|
||||
* Carl Love <carll@us.ibm.com>
|
||||
* Maynard Johnson <maynardj@us.ibm.com>
|
||||
* Carl Love <carll@us.ibm.com>
|
||||
* Maynard Johnson <maynardj@us.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
|
@ -38,12 +38,25 @@
|
|||
|
||||
#include "../platforms/cell/interrupt.h"
|
||||
#include "../platforms/cell/cbe_regs.h"
|
||||
#include "cell/pr_util.h"
|
||||
|
||||
static void cell_global_stop_spu(void);
|
||||
|
||||
/*
|
||||
* spu_cycle_reset is the number of cycles between samples.
|
||||
* This variable is used for SPU profiling and should ONLY be set
|
||||
* at the beginning of cell_reg_setup; otherwise, it's read-only.
|
||||
*/
|
||||
static unsigned int spu_cycle_reset;
|
||||
|
||||
#define NUM_SPUS_PER_NODE 8
|
||||
#define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */
|
||||
|
||||
#define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */
|
||||
#define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying
|
||||
* PPU_CYCLES event
|
||||
*/
|
||||
#define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */
|
||||
#define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying
|
||||
* PPU_CYCLES event
|
||||
*/
|
||||
#define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */
|
||||
|
||||
#define NUM_THREADS 2 /* number of physical threads in
|
||||
* physical processor
|
||||
|
@ -51,6 +64,7 @@
|
|||
#define NUM_TRACE_BUS_WORDS 4
|
||||
#define NUM_INPUT_BUS_WORDS 2
|
||||
|
||||
#define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */
|
||||
|
||||
struct pmc_cntrl_data {
|
||||
unsigned long vcntr;
|
||||
|
@ -62,11 +76,10 @@ struct pmc_cntrl_data {
|
|||
/*
|
||||
* ibm,cbe-perftools rtas parameters
|
||||
*/
|
||||
|
||||
struct pm_signal {
|
||||
u16 cpu; /* Processor to modify */
|
||||
u16 sub_unit; /* hw subunit this applies to (if applicable) */
|
||||
short int signal_group; /* Signal Group to Enable/Disable */
|
||||
u16 sub_unit; /* hw subunit this applies to (if applicable)*/
|
||||
short int signal_group; /* Signal Group to Enable/Disable */
|
||||
u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event
|
||||
* Bus Word(s) (bitmask)
|
||||
*/
|
||||
|
@ -112,21 +125,42 @@ static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values);
|
|||
|
||||
static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];
|
||||
|
||||
/* Interpetation of hdw_thread:
|
||||
/*
|
||||
* The CELL profiling code makes rtas calls to setup the debug bus to
|
||||
* route the performance signals. Additionally, SPU profiling requires
|
||||
* a second rtas call to setup the hardware to capture the SPU PCs.
|
||||
* The EIO error value is returned if the token lookups or the rtas
|
||||
* call fail. The EIO error number is the best choice of the existing
|
||||
* error numbers. The probability of rtas related error is very low. But
|
||||
* by returning EIO and printing additional information to dmsg the user
|
||||
* will know that OProfile did not start and dmesg will tell them why.
|
||||
* OProfile does not support returning errors on Stop. Not a huge issue
|
||||
* since failure to reset the debug bus or stop the SPU PC collection is
|
||||
* not a fatel issue. Chances are if the Stop failed, Start doesn't work
|
||||
* either.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Interpetation of hdw_thread:
|
||||
* 0 - even virtual cpus 0, 2, 4,...
|
||||
* 1 - odd virtual cpus 1, 3, 5, ...
|
||||
*
|
||||
* FIXME: this is strictly wrong, we need to clean this up in a number
|
||||
* of places. It works for now. -arnd
|
||||
*/
|
||||
static u32 hdw_thread;
|
||||
|
||||
static u32 virt_cntr_inter_mask;
|
||||
static struct timer_list timer_virt_cntr;
|
||||
|
||||
/* pm_signal needs to be global since it is initialized in
|
||||
/*
|
||||
* pm_signal needs to be global since it is initialized in
|
||||
* cell_reg_setup at the time when the necessary information
|
||||
* is available.
|
||||
*/
|
||||
static struct pm_signal pm_signal[NR_PHYS_CTRS];
|
||||
static int pm_rtas_token;
|
||||
static int pm_rtas_token; /* token for debug bus setup call */
|
||||
static int spu_rtas_token; /* token for SPU cycle profiling */
|
||||
|
||||
static u32 reset_value[NR_PHYS_CTRS];
|
||||
static int num_counters;
|
||||
|
@ -147,8 +181,8 @@ rtas_ibm_cbe_perftools(int subfunc, int passthru,
|
|||
{
|
||||
u64 paddr = __pa(address);
|
||||
|
||||
return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, passthru,
|
||||
paddr >> 32, paddr & 0xffffffff, length);
|
||||
return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc,
|
||||
passthru, paddr >> 32, paddr & 0xffffffff, length);
|
||||
}
|
||||
|
||||
static void pm_rtas_reset_signals(u32 node)
|
||||
|
@ -156,12 +190,13 @@ static void pm_rtas_reset_signals(u32 node)
|
|||
int ret;
|
||||
struct pm_signal pm_signal_local;
|
||||
|
||||
/* The debug bus is being set to the passthru disable state.
|
||||
* However, the FW still expects atleast one legal signal routing
|
||||
* entry or it will return an error on the arguments. If we don't
|
||||
* supply a valid entry, we must ignore all return values. Ignoring
|
||||
* all return values means we might miss an error we should be
|
||||
* concerned about.
|
||||
/*
|
||||
* The debug bus is being set to the passthru disable state.
|
||||
* However, the FW still expects atleast one legal signal routing
|
||||
* entry or it will return an error on the arguments. If we don't
|
||||
* supply a valid entry, we must ignore all return values. Ignoring
|
||||
* all return values means we might miss an error we should be
|
||||
* concerned about.
|
||||
*/
|
||||
|
||||
/* fw expects physical cpu #. */
|
||||
|
@ -175,18 +210,24 @@ static void pm_rtas_reset_signals(u32 node)
|
|||
&pm_signal_local,
|
||||
sizeof(struct pm_signal));
|
||||
|
||||
if (ret)
|
||||
if (unlikely(ret))
|
||||
/*
|
||||
* Not a fatal error. For Oprofile stop, the oprofile
|
||||
* functions do not support returning an error for
|
||||
* failure to stop OProfile.
|
||||
*/
|
||||
printk(KERN_WARNING "%s: rtas returned: %d\n",
|
||||
__FUNCTION__, ret);
|
||||
}
|
||||
|
||||
static void pm_rtas_activate_signals(u32 node, u32 count)
|
||||
static int pm_rtas_activate_signals(u32 node, u32 count)
|
||||
{
|
||||
int ret;
|
||||
int i, j;
|
||||
struct pm_signal pm_signal_local[NR_PHYS_CTRS];
|
||||
|
||||
/* There is no debug setup required for the cycles event.
|
||||
/*
|
||||
* There is no debug setup required for the cycles event.
|
||||
* Note that only events in the same group can be used.
|
||||
* Otherwise, there will be conflicts in correctly routing
|
||||
* the signals on the debug bus. It is the responsiblity
|
||||
|
@ -213,10 +254,14 @@ static void pm_rtas_activate_signals(u32 node, u32 count)
|
|||
pm_signal_local,
|
||||
i * sizeof(struct pm_signal));
|
||||
|
||||
if (ret)
|
||||
if (unlikely(ret)) {
|
||||
printk(KERN_WARNING "%s: rtas returned: %d\n",
|
||||
__FUNCTION__, ret);
|
||||
return -EIO;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -260,11 +305,12 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask)
|
|||
pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity);
|
||||
pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control);
|
||||
|
||||
/* Some of the islands signal selection is based on 64 bit words.
|
||||
/*
|
||||
* Some of the islands signal selection is based on 64 bit words.
|
||||
* The debug bus words are 32 bits, the input words to the performance
|
||||
* counters are defined as 32 bits. Need to convert the 64 bit island
|
||||
* specification to the appropriate 32 input bit and bus word for the
|
||||
* performance counter event selection. See the CELL Performance
|
||||
* performance counter event selection. See the CELL Performance
|
||||
* monitoring signals manual and the Perf cntr hardware descriptions
|
||||
* for the details.
|
||||
*/
|
||||
|
@ -298,6 +344,7 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask)
|
|||
input_bus[j] = i;
|
||||
pm_regs.group_control |=
|
||||
(i << (31 - i));
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -309,7 +356,8 @@ out:
|
|||
|
||||
static void write_pm_cntrl(int cpu)
|
||||
{
|
||||
/* Oprofile will use 32 bit counters, set bits 7:10 to 0
|
||||
/*
|
||||
* Oprofile will use 32 bit counters, set bits 7:10 to 0
|
||||
* pmregs.pm_cntrl is a global
|
||||
*/
|
||||
|
||||
|
@ -326,7 +374,8 @@ static void write_pm_cntrl(int cpu)
|
|||
if (pm_regs.pm_cntrl.freeze == 1)
|
||||
val |= CBE_PM_FREEZE_ALL_CTRS;
|
||||
|
||||
/* Routine set_count_mode must be called previously to set
|
||||
/*
|
||||
* Routine set_count_mode must be called previously to set
|
||||
* the count mode based on the user selection of user and kernel.
|
||||
*/
|
||||
val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode);
|
||||
|
@ -336,7 +385,8 @@ static void write_pm_cntrl(int cpu)
|
|||
static inline void
|
||||
set_count_mode(u32 kernel, u32 user)
|
||||
{
|
||||
/* The user must specify user and kernel if they want them. If
|
||||
/*
|
||||
* The user must specify user and kernel if they want them. If
|
||||
* neither is specified, OProfile will count in hypervisor mode.
|
||||
* pm_regs.pm_cntrl is a global
|
||||
*/
|
||||
|
@ -364,7 +414,7 @@ static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl)
|
|||
|
||||
/*
|
||||
* Oprofile is expected to collect data on all CPUs simultaneously.
|
||||
* However, there is one set of performance counters per node. There are
|
||||
* However, there is one set of performance counters per node. There are
|
||||
* two hardware threads or virtual CPUs on each node. Hence, OProfile must
|
||||
* multiplex in time the performance counter collection on the two virtual
|
||||
* CPUs. The multiplexing of the performance counters is done by this
|
||||
|
@ -377,19 +427,19 @@ static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl)
|
|||
* pair of per-cpu arrays is used for storing the previous and next
|
||||
* pmc values for a given node.
|
||||
* NOTE: We use the per-cpu variable to improve cache performance.
|
||||
*
|
||||
* This routine will alternate loading the virtual counters for
|
||||
* virtual CPUs
|
||||
*/
|
||||
static void cell_virtual_cntr(unsigned long data)
|
||||
{
|
||||
/* This routine will alternate loading the virtual counters for
|
||||
* virtual CPUs
|
||||
*/
|
||||
int i, prev_hdw_thread, next_hdw_thread;
|
||||
u32 cpu;
|
||||
unsigned long flags;
|
||||
|
||||
/* Make sure that the interrupt_hander and
|
||||
* the virt counter are not both playing with
|
||||
* the counters on the same node.
|
||||
/*
|
||||
* Make sure that the interrupt_hander and the virt counter are
|
||||
* not both playing with the counters on the same node.
|
||||
*/
|
||||
|
||||
spin_lock_irqsave(&virt_cntr_lock, flags);
|
||||
|
@ -400,22 +450,25 @@ static void cell_virtual_cntr(unsigned long data)
|
|||
hdw_thread = 1 ^ hdw_thread;
|
||||
next_hdw_thread = hdw_thread;
|
||||
|
||||
for (i = 0; i < num_counters; i++)
|
||||
/* There are some per thread events. Must do the
|
||||
/*
|
||||
* There are some per thread events. Must do the
|
||||
* set event, for the thread that is being started
|
||||
*/
|
||||
for (i = 0; i < num_counters; i++)
|
||||
set_pm_event(i,
|
||||
pmc_cntrl[next_hdw_thread][i].evnts,
|
||||
pmc_cntrl[next_hdw_thread][i].masks);
|
||||
|
||||
/* The following is done only once per each node, but
|
||||
/*
|
||||
* The following is done only once per each node, but
|
||||
* we need cpu #, not node #, to pass to the cbe_xxx functions.
|
||||
*/
|
||||
for_each_online_cpu(cpu) {
|
||||
if (cbe_get_hw_thread_id(cpu))
|
||||
continue;
|
||||
|
||||
/* stop counters, save counter values, restore counts
|
||||
/*
|
||||
* stop counters, save counter values, restore counts
|
||||
* for previous thread
|
||||
*/
|
||||
cbe_disable_pm(cpu);
|
||||
|
@ -428,7 +481,7 @@ static void cell_virtual_cntr(unsigned long data)
|
|||
== 0xFFFFFFFF)
|
||||
/* If the cntr value is 0xffffffff, we must
|
||||
* reset that to 0xfffffff0 when the current
|
||||
* thread is restarted. This will generate a
|
||||
* thread is restarted. This will generate a
|
||||
* new interrupt and make sure that we never
|
||||
* restore the counters to the max value. If
|
||||
* the counters were restored to the max value,
|
||||
|
@ -444,13 +497,15 @@ static void cell_virtual_cntr(unsigned long data)
|
|||
next_hdw_thread)[i]);
|
||||
}
|
||||
|
||||
/* Switch to the other thread. Change the interrupt
|
||||
/*
|
||||
* Switch to the other thread. Change the interrupt
|
||||
* and control regs to be scheduled on the CPU
|
||||
* corresponding to the thread to execute.
|
||||
*/
|
||||
for (i = 0; i < num_counters; i++) {
|
||||
if (pmc_cntrl[next_hdw_thread][i].enabled) {
|
||||
/* There are some per thread events.
|
||||
/*
|
||||
* There are some per thread events.
|
||||
* Must do the set event, enable_cntr
|
||||
* for each cpu.
|
||||
*/
|
||||
|
@ -482,17 +537,42 @@ static void start_virt_cntrs(void)
|
|||
}
|
||||
|
||||
/* This function is called once for all cpus combined */
|
||||
static void
|
||||
cell_reg_setup(struct op_counter_config *ctr,
|
||||
struct op_system_config *sys, int num_ctrs)
|
||||
static int cell_reg_setup(struct op_counter_config *ctr,
|
||||
struct op_system_config *sys, int num_ctrs)
|
||||
{
|
||||
int i, j, cpu;
|
||||
spu_cycle_reset = 0;
|
||||
|
||||
if (ctr[0].event == SPU_CYCLES_EVENT_NUM) {
|
||||
spu_cycle_reset = ctr[0].count;
|
||||
|
||||
/*
|
||||
* Each node will need to make the rtas call to start
|
||||
* and stop SPU profiling. Get the token once and store it.
|
||||
*/
|
||||
spu_rtas_token = rtas_token("ibm,cbe-spu-perftools");
|
||||
|
||||
if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) {
|
||||
printk(KERN_ERR
|
||||
"%s: rtas token ibm,cbe-spu-perftools unknown\n",
|
||||
__FUNCTION__);
|
||||
return -EIO;
|
||||
}
|
||||
}
|
||||
|
||||
pm_rtas_token = rtas_token("ibm,cbe-perftools");
|
||||
if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) {
|
||||
printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n",
|
||||
|
||||
/*
|
||||
* For all events excetp PPU CYCLEs, each node will need to make
|
||||
* the rtas cbe-perftools call to setup and reset the debug bus.
|
||||
* Make the token lookup call once and store it in the global
|
||||
* variable pm_rtas_token.
|
||||
*/
|
||||
if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
|
||||
printk(KERN_ERR
|
||||
"%s: rtas token ibm,cbe-perftools unknown\n",
|
||||
__FUNCTION__);
|
||||
goto out;
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
num_counters = num_ctrs;
|
||||
|
@ -520,7 +600,8 @@ cell_reg_setup(struct op_counter_config *ctr,
|
|||
per_cpu(pmc_values, j)[i] = 0;
|
||||
}
|
||||
|
||||
/* Setup the thread 1 events, map the thread 0 event to the
|
||||
/*
|
||||
* Setup the thread 1 events, map the thread 0 event to the
|
||||
* equivalent thread 1 event.
|
||||
*/
|
||||
for (i = 0; i < num_ctrs; ++i) {
|
||||
|
@ -544,9 +625,10 @@ cell_reg_setup(struct op_counter_config *ctr,
|
|||
for (i = 0; i < NUM_INPUT_BUS_WORDS; i++)
|
||||
input_bus[i] = 0xff;
|
||||
|
||||
/* Our counters count up, and "count" refers to
|
||||
/*
|
||||
* Our counters count up, and "count" refers to
|
||||
* how much before the next interrupt, and we interrupt
|
||||
* on overflow. So we calculate the starting value
|
||||
* on overflow. So we calculate the starting value
|
||||
* which will give us "count" until overflow.
|
||||
* Then we set the events on the enabled counters.
|
||||
*/
|
||||
|
@ -569,28 +651,27 @@ cell_reg_setup(struct op_counter_config *ctr,
|
|||
for (i = 0; i < num_counters; ++i) {
|
||||
per_cpu(pmc_values, cpu)[i] = reset_value[i];
|
||||
}
|
||||
out:
|
||||
;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* This function is called once for each cpu */
|
||||
static void cell_cpu_setup(struct op_counter_config *cntr)
|
||||
static int cell_cpu_setup(struct op_counter_config *cntr)
|
||||
{
|
||||
u32 cpu = smp_processor_id();
|
||||
u32 num_enabled = 0;
|
||||
int i;
|
||||
|
||||
if (spu_cycle_reset)
|
||||
return 0;
|
||||
|
||||
/* There is one performance monitor per processor chip (i.e. node),
|
||||
* so we only need to perform this function once per node.
|
||||
*/
|
||||
if (cbe_get_hw_thread_id(cpu))
|
||||
goto out;
|
||||
|
||||
if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) {
|
||||
printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n",
|
||||
__FUNCTION__);
|
||||
goto out;
|
||||
}
|
||||
return 0;
|
||||
|
||||
/* Stop all counters */
|
||||
cbe_disable_pm(cpu);
|
||||
|
@ -609,16 +690,286 @@ static void cell_cpu_setup(struct op_counter_config *cntr)
|
|||
}
|
||||
}
|
||||
|
||||
pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled);
|
||||
out:
|
||||
;
|
||||
/*
|
||||
* The pm_rtas_activate_signals will return -EIO if the FW
|
||||
* call failed.
|
||||
*/
|
||||
return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled);
|
||||
}
|
||||
|
||||
static void cell_global_start(struct op_counter_config *ctr)
|
||||
#define ENTRIES 303
|
||||
#define MAXLFSR 0xFFFFFF
|
||||
|
||||
/* precomputed table of 24 bit LFSR values */
|
||||
static int initial_lfsr[] = {
|
||||
8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 10753424,
|
||||
15507573, 7458917, 285419, 2641121, 9780088, 3915503, 6668768, 1548716,
|
||||
4885000, 8774424, 9650099, 2044357, 2304411, 9326253, 10332526, 4421547,
|
||||
3440748, 10179459, 13332843, 10375561, 1313462, 8375100, 5198480, 6071392,
|
||||
9341783, 1526887, 3985002, 1439429, 13923762, 7010104, 11969769, 4547026,
|
||||
2040072, 4025602, 3437678, 7939992, 11444177, 4496094, 9803157, 10745556,
|
||||
3671780, 4257846, 5662259, 13196905, 3237343, 12077182, 16222879, 7587769,
|
||||
14706824, 2184640, 12591135, 10420257, 7406075, 3648978, 11042541, 15906893,
|
||||
11914928, 4732944, 10695697, 12928164, 11980531, 4430912, 11939291, 2917017,
|
||||
6119256, 4172004, 9373765, 8410071, 14788383, 5047459, 5474428, 1737756,
|
||||
15967514, 13351758, 6691285, 8034329, 2856544, 14394753, 11310160, 12149558,
|
||||
7487528, 7542781, 15668898, 12525138, 12790975, 3707933, 9106617, 1965401,
|
||||
16219109, 12801644, 2443203, 4909502, 8762329, 3120803, 6360315, 9309720,
|
||||
15164599, 10844842, 4456529, 6667610, 14924259, 884312, 6234963, 3326042,
|
||||
15973422, 13919464, 5272099, 6414643, 3909029, 2764324, 5237926, 4774955,
|
||||
10445906, 4955302, 5203726, 10798229, 11443419, 2303395, 333836, 9646934,
|
||||
3464726, 4159182, 568492, 995747, 10318756, 13299332, 4836017, 8237783,
|
||||
3878992, 2581665, 11394667, 5672745, 14412947, 3159169, 9094251, 16467278,
|
||||
8671392, 15230076, 4843545, 7009238, 15504095, 1494895, 9627886, 14485051,
|
||||
8304291, 252817, 12421642, 16085736, 4774072, 2456177, 4160695, 15409741,
|
||||
4902868, 5793091, 13162925, 16039714, 782255, 11347835, 14884586, 366972,
|
||||
16308990, 11913488, 13390465, 2958444, 10340278, 1177858, 1319431, 10426302,
|
||||
2868597, 126119, 5784857, 5245324, 10903900, 16436004, 3389013, 1742384,
|
||||
14674502, 10279218, 8536112, 10364279, 6877778, 14051163, 1025130, 6072469,
|
||||
1988305, 8354440, 8216060, 16342977, 13112639, 3976679, 5913576, 8816697,
|
||||
6879995, 14043764, 3339515, 9364420, 15808858, 12261651, 2141560, 5636398,
|
||||
10345425, 10414756, 781725, 6155650, 4746914, 5078683, 7469001, 6799140,
|
||||
10156444, 9667150, 10116470, 4133858, 2121972, 1124204, 1003577, 1611214,
|
||||
14304602, 16221850, 13878465, 13577744, 3629235, 8772583, 10881308, 2410386,
|
||||
7300044, 5378855, 9301235, 12755149, 4977682, 8083074, 10327581, 6395087,
|
||||
9155434, 15501696, 7514362, 14520507, 15808945, 3244584, 4741962, 9658130,
|
||||
14336147, 8654727, 7969093, 15759799, 14029445, 5038459, 9894848, 8659300,
|
||||
13699287, 8834306, 10712885, 14753895, 10410465, 3373251, 309501, 9561475,
|
||||
5526688, 14647426, 14209836, 5339224, 207299, 14069911, 8722990, 2290950,
|
||||
3258216, 12505185, 6007317, 9218111, 14661019, 10537428, 11731949, 9027003,
|
||||
6641507, 9490160, 200241, 9720425, 16277895, 10816638, 1554761, 10431375,
|
||||
7467528, 6790302, 3429078, 14633753, 14428997, 11463204, 3576212, 2003426,
|
||||
6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607
|
||||
};
|
||||
|
||||
/*
|
||||
* The hardware uses an LFSR counting sequence to determine when to capture
|
||||
* the SPU PCs. An LFSR sequence is like a puesdo random number sequence
|
||||
* where each number occurs once in the sequence but the sequence is not in
|
||||
* numerical order. The SPU PC capture is done when the LFSR sequence reaches
|
||||
* the last value in the sequence. Hence the user specified value N
|
||||
* corresponds to the LFSR number that is N from the end of the sequence.
|
||||
*
|
||||
* To avoid the time to compute the LFSR, a lookup table is used. The 24 bit
|
||||
* LFSR sequence is broken into four ranges. The spacing of the precomputed
|
||||
* values is adjusted in each range so the error between the user specifed
|
||||
* number (N) of events between samples and the actual number of events based
|
||||
* on the precomputed value will be les then about 6.2%. Note, if the user
|
||||
* specifies N < 2^16, the LFSR value that is 2^16 from the end will be used.
|
||||
* This is to prevent the loss of samples because the trace buffer is full.
|
||||
*
|
||||
* User specified N Step between Index in
|
||||
* precomputed values precomputed
|
||||
* table
|
||||
* 0 to 2^16-1 ---- 0
|
||||
* 2^16 to 2^16+2^19-1 2^12 1 to 128
|
||||
* 2^16+2^19 to 2^16+2^19+2^22-1 2^15 129 to 256
|
||||
* 2^16+2^19+2^22 to 2^24-1 2^18 257 to 302
|
||||
*
|
||||
*
|
||||
* For example, the LFSR values in the second range are computed for 2^16,
|
||||
* 2^16+2^12, ... , 2^19-2^16, 2^19 and stored in the table at indicies
|
||||
* 1, 2,..., 127, 128.
|
||||
*
|
||||
* The 24 bit LFSR value for the nth number in the sequence can be
|
||||
* calculated using the following code:
|
||||
*
|
||||
* #define size 24
|
||||
* int calculate_lfsr(int n)
|
||||
* {
|
||||
* int i;
|
||||
* unsigned int newlfsr0;
|
||||
* unsigned int lfsr = 0xFFFFFF;
|
||||
* unsigned int howmany = n;
|
||||
*
|
||||
* for (i = 2; i < howmany + 2; i++) {
|
||||
* newlfsr0 = (((lfsr >> (size - 1 - 0)) & 1) ^
|
||||
* ((lfsr >> (size - 1 - 1)) & 1) ^
|
||||
* (((lfsr >> (size - 1 - 6)) & 1) ^
|
||||
* ((lfsr >> (size - 1 - 23)) & 1)));
|
||||
*
|
||||
* lfsr >>= 1;
|
||||
* lfsr = lfsr | (newlfsr0 << (size - 1));
|
||||
* }
|
||||
* return lfsr;
|
||||
* }
|
||||
*/
|
||||
|
||||
#define V2_16 (0x1 << 16)
|
||||
#define V2_19 (0x1 << 19)
|
||||
#define V2_22 (0x1 << 22)
|
||||
|
||||
static int calculate_lfsr(int n)
|
||||
{
|
||||
u32 cpu;
|
||||
/*
|
||||
* The ranges and steps are in powers of 2 so the calculations
|
||||
* can be done using shifts rather then divide.
|
||||
*/
|
||||
int index;
|
||||
|
||||
if ((n >> 16) == 0)
|
||||
index = 0;
|
||||
else if (((n - V2_16) >> 19) == 0)
|
||||
index = ((n - V2_16) >> 12) + 1;
|
||||
else if (((n - V2_16 - V2_19) >> 22) == 0)
|
||||
index = ((n - V2_16 - V2_19) >> 15 ) + 1 + 128;
|
||||
else if (((n - V2_16 - V2_19 - V2_22) >> 24) == 0)
|
||||
index = ((n - V2_16 - V2_19 - V2_22) >> 18 ) + 1 + 256;
|
||||
else
|
||||
index = ENTRIES-1;
|
||||
|
||||
/* make sure index is valid */
|
||||
if ((index > ENTRIES) || (index < 0))
|
||||
index = ENTRIES-1;
|
||||
|
||||
return initial_lfsr[index];
|
||||
}
|
||||
|
||||
static int pm_rtas_activate_spu_profiling(u32 node)
|
||||
{
|
||||
int ret, i;
|
||||
struct pm_signal pm_signal_local[NR_PHYS_CTRS];
|
||||
|
||||
/*
|
||||
* Set up the rtas call to configure the debug bus to
|
||||
* route the SPU PCs. Setup the pm_signal for each SPU
|
||||
*/
|
||||
for (i = 0; i < NUM_SPUS_PER_NODE; i++) {
|
||||
pm_signal_local[i].cpu = node;
|
||||
pm_signal_local[i].signal_group = 41;
|
||||
/* spu i on word (i/2) */
|
||||
pm_signal_local[i].bus_word = 1 << i / 2;
|
||||
/* spu i */
|
||||
pm_signal_local[i].sub_unit = i;
|
||||
pm_signal_local[i].bit = 63;
|
||||
}
|
||||
|
||||
ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE,
|
||||
PASSTHRU_ENABLE, pm_signal_local,
|
||||
(NUM_SPUS_PER_NODE
|
||||
* sizeof(struct pm_signal)));
|
||||
|
||||
if (unlikely(ret)) {
|
||||
printk(KERN_WARNING "%s: rtas returned: %d\n",
|
||||
__FUNCTION__, ret);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CPU_FREQ
|
||||
static int
|
||||
oprof_cpufreq_notify(struct notifier_block *nb, unsigned long val, void *data)
|
||||
{
|
||||
int ret = 0;
|
||||
struct cpufreq_freqs *frq = data;
|
||||
if ((val == CPUFREQ_PRECHANGE && frq->old < frq->new) ||
|
||||
(val == CPUFREQ_POSTCHANGE && frq->old > frq->new) ||
|
||||
(val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE))
|
||||
set_spu_profiling_frequency(frq->new, spu_cycle_reset);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct notifier_block cpu_freq_notifier_block = {
|
||||
.notifier_call = oprof_cpufreq_notify
|
||||
};
|
||||
#endif
|
||||
|
||||
static int cell_global_start_spu(struct op_counter_config *ctr)
|
||||
{
|
||||
int subfunc;
|
||||
unsigned int lfsr_value;
|
||||
int cpu;
|
||||
int ret;
|
||||
int rtas_error;
|
||||
unsigned int cpu_khzfreq = 0;
|
||||
|
||||
/* The SPU profiling uses time-based profiling based on
|
||||
* cpu frequency, so if configured with the CPU_FREQ
|
||||
* option, we should detect frequency changes and react
|
||||
* accordingly.
|
||||
*/
|
||||
#ifdef CONFIG_CPU_FREQ
|
||||
ret = cpufreq_register_notifier(&cpu_freq_notifier_block,
|
||||
CPUFREQ_TRANSITION_NOTIFIER);
|
||||
if (ret < 0)
|
||||
/* this is not a fatal error */
|
||||
printk(KERN_ERR "CPU freq change registration failed: %d\n",
|
||||
ret);
|
||||
|
||||
else
|
||||
cpu_khzfreq = cpufreq_quick_get(smp_processor_id());
|
||||
#endif
|
||||
|
||||
set_spu_profiling_frequency(cpu_khzfreq, spu_cycle_reset);
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
if (cbe_get_hw_thread_id(cpu))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Setup SPU cycle-based profiling.
|
||||
* Set perf_mon_control bit 0 to a zero before
|
||||
* enabling spu collection hardware.
|
||||
*/
|
||||
cbe_write_pm(cpu, pm_control, 0);
|
||||
|
||||
if (spu_cycle_reset > MAX_SPU_COUNT)
|
||||
/* use largest possible value */
|
||||
lfsr_value = calculate_lfsr(MAX_SPU_COUNT-1);
|
||||
else
|
||||
lfsr_value = calculate_lfsr(spu_cycle_reset);
|
||||
|
||||
/* must use a non zero value. Zero disables data collection. */
|
||||
if (lfsr_value == 0)
|
||||
lfsr_value = calculate_lfsr(1);
|
||||
|
||||
lfsr_value = lfsr_value << 8; /* shift lfsr to correct
|
||||
* register location
|
||||
*/
|
||||
|
||||
/* debug bus setup */
|
||||
ret = pm_rtas_activate_spu_profiling(cbe_cpu_to_node(cpu));
|
||||
|
||||
if (unlikely(ret)) {
|
||||
rtas_error = ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
||||
subfunc = 2; /* 2 - activate SPU tracing, 3 - deactivate */
|
||||
|
||||
/* start profiling */
|
||||
ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc,
|
||||
cbe_cpu_to_node(cpu), lfsr_value);
|
||||
|
||||
if (unlikely(ret != 0)) {
|
||||
printk(KERN_ERR
|
||||
"%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n",
|
||||
__FUNCTION__, ret);
|
||||
rtas_error = -EIO;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
rtas_error = start_spu_profiling(spu_cycle_reset);
|
||||
if (rtas_error)
|
||||
goto out_stop;
|
||||
|
||||
oprofile_running = 1;
|
||||
return 0;
|
||||
|
||||
out_stop:
|
||||
cell_global_stop_spu(); /* clean up the PMU/debug bus */
|
||||
out:
|
||||
return rtas_error;
|
||||
}
|
||||
|
||||
static int cell_global_start_ppu(struct op_counter_config *ctr)
|
||||
{
|
||||
u32 cpu, i;
|
||||
u32 interrupt_mask = 0;
|
||||
u32 i;
|
||||
|
||||
/* This routine gets called once for the system.
|
||||
* There is one performance monitor per node, so we
|
||||
|
@ -651,19 +1002,79 @@ static void cell_global_start(struct op_counter_config *ctr)
|
|||
oprofile_running = 1;
|
||||
smp_wmb();
|
||||
|
||||
/* NOTE: start_virt_cntrs will result in cell_virtual_cntr() being
|
||||
* executed which manipulates the PMU. We start the "virtual counter"
|
||||
/*
|
||||
* NOTE: start_virt_cntrs will result in cell_virtual_cntr() being
|
||||
* executed which manipulates the PMU. We start the "virtual counter"
|
||||
* here so that we do not need to synchronize access to the PMU in
|
||||
* the above for-loop.
|
||||
*/
|
||||
start_virt_cntrs();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cell_global_stop(void)
|
||||
static int cell_global_start(struct op_counter_config *ctr)
|
||||
{
|
||||
if (spu_cycle_reset)
|
||||
return cell_global_start_spu(ctr);
|
||||
else
|
||||
return cell_global_start_ppu(ctr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Note the generic OProfile stop calls do not support returning
|
||||
* an error on stop. Hence, will not return an error if the FW
|
||||
* calls fail on stop. Failure to reset the debug bus is not an issue.
|
||||
* Failure to disable the SPU profiling is not an issue. The FW calls
|
||||
* to enable the performance counters and debug bus will work even if
|
||||
* the hardware was not cleanly reset.
|
||||
*/
|
||||
static void cell_global_stop_spu(void)
|
||||
{
|
||||
int subfunc, rtn_value;
|
||||
unsigned int lfsr_value;
|
||||
int cpu;
|
||||
|
||||
oprofile_running = 0;
|
||||
|
||||
#ifdef CONFIG_CPU_FREQ
|
||||
cpufreq_unregister_notifier(&cpu_freq_notifier_block,
|
||||
CPUFREQ_TRANSITION_NOTIFIER);
|
||||
#endif
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
if (cbe_get_hw_thread_id(cpu))
|
||||
continue;
|
||||
|
||||
subfunc = 3; /*
|
||||
* 2 - activate SPU tracing,
|
||||
* 3 - deactivate
|
||||
*/
|
||||
lfsr_value = 0x8f100000;
|
||||
|
||||
rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL,
|
||||
subfunc, cbe_cpu_to_node(cpu),
|
||||
lfsr_value);
|
||||
|
||||
if (unlikely(rtn_value != 0)) {
|
||||
printk(KERN_ERR
|
||||
"%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n",
|
||||
__FUNCTION__, rtn_value);
|
||||
}
|
||||
|
||||
/* Deactivate the signals */
|
||||
pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
|
||||
}
|
||||
|
||||
stop_spu_profiling();
|
||||
}
|
||||
|
||||
static void cell_global_stop_ppu(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
/* This routine will be called once for the system.
|
||||
/*
|
||||
* This routine will be called once for the system.
|
||||
* There is one performance monitor per node, so we
|
||||
* only need to perform this function once per node.
|
||||
*/
|
||||
|
@ -687,8 +1098,16 @@ static void cell_global_stop(void)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
|
||||
static void cell_global_stop(void)
|
||||
{
|
||||
if (spu_cycle_reset)
|
||||
cell_global_stop_spu();
|
||||
else
|
||||
cell_global_stop_ppu();
|
||||
}
|
||||
|
||||
static void cell_handle_interrupt(struct pt_regs *regs,
|
||||
struct op_counter_config *ctr)
|
||||
{
|
||||
u32 cpu;
|
||||
u64 pc;
|
||||
|
@ -699,13 +1118,15 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
|
|||
|
||||
cpu = smp_processor_id();
|
||||
|
||||
/* Need to make sure the interrupt handler and the virt counter
|
||||
/*
|
||||
* Need to make sure the interrupt handler and the virt counter
|
||||
* routine are not running at the same time. See the
|
||||
* cell_virtual_cntr() routine for additional comments.
|
||||
*/
|
||||
spin_lock_irqsave(&virt_cntr_lock, flags);
|
||||
|
||||
/* Need to disable and reenable the performance counters
|
||||
/*
|
||||
* Need to disable and reenable the performance counters
|
||||
* to get the desired behavior from the hardware. This
|
||||
* is hardware specific.
|
||||
*/
|
||||
|
@ -714,7 +1135,8 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
|
|||
|
||||
interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu);
|
||||
|
||||
/* If the interrupt mask has been cleared, then the virt cntr
|
||||
/*
|
||||
* If the interrupt mask has been cleared, then the virt cntr
|
||||
* has cleared the interrupt. When the thread that generated
|
||||
* the interrupt is restored, the data count will be restored to
|
||||
* 0xffffff0 to cause the interrupt to be regenerated.
|
||||
|
@ -732,18 +1154,20 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
|
|||
}
|
||||
}
|
||||
|
||||
/* The counters were frozen by the interrupt.
|
||||
/*
|
||||
* The counters were frozen by the interrupt.
|
||||
* Reenable the interrupt and restart the counters.
|
||||
* If there was a race between the interrupt handler and
|
||||
* the virtual counter routine. The virutal counter
|
||||
* the virtual counter routine. The virutal counter
|
||||
* routine may have cleared the interrupts. Hence must
|
||||
* use the virt_cntr_inter_mask to re-enable the interrupts.
|
||||
*/
|
||||
cbe_enable_pm_interrupts(cpu, hdw_thread,
|
||||
virt_cntr_inter_mask);
|
||||
|
||||
/* The writes to the various performance counters only writes
|
||||
* to a latch. The new values (interrupt setting bits, reset
|
||||
/*
|
||||
* The writes to the various performance counters only writes
|
||||
* to a latch. The new values (interrupt setting bits, reset
|
||||
* counter value etc.) are not copied to the actual registers
|
||||
* until the performance monitor is enabled. In order to get
|
||||
* this to work as desired, the permormance monitor needs to
|
||||
|
@ -755,10 +1179,33 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
|
|||
spin_unlock_irqrestore(&virt_cntr_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called from the generic OProfile
|
||||
* driver. When profiling PPUs, we need to do the
|
||||
* generic sync start; otherwise, do spu_sync_start.
|
||||
*/
|
||||
static int cell_sync_start(void)
|
||||
{
|
||||
if (spu_cycle_reset)
|
||||
return spu_sync_start();
|
||||
else
|
||||
return DO_GENERIC_SYNC;
|
||||
}
|
||||
|
||||
static int cell_sync_stop(void)
|
||||
{
|
||||
if (spu_cycle_reset)
|
||||
return spu_sync_stop();
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
struct op_powerpc_model op_model_cell = {
|
||||
.reg_setup = cell_reg_setup,
|
||||
.cpu_setup = cell_cpu_setup,
|
||||
.global_start = cell_global_start,
|
||||
.global_stop = cell_global_stop,
|
||||
.sync_start = cell_sync_start,
|
||||
.sync_stop = cell_sync_stop,
|
||||
.handle_interrupt = cell_handle_interrupt,
|
||||
};
|
||||
|
|
|
@ -244,7 +244,7 @@ static void dump_pmcs(void)
|
|||
mfpmr(PMRN_PMLCA3), mfpmr(PMRN_PMLCB3));
|
||||
}
|
||||
|
||||
static void fsl_booke_cpu_setup(struct op_counter_config *ctr)
|
||||
static int fsl_booke_cpu_setup(struct op_counter_config *ctr)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
@ -258,9 +258,11 @@ static void fsl_booke_cpu_setup(struct op_counter_config *ctr)
|
|||
|
||||
set_pmc_user_kernel(i, ctr[i].user, ctr[i].kernel);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void fsl_booke_reg_setup(struct op_counter_config *ctr,
|
||||
static int fsl_booke_reg_setup(struct op_counter_config *ctr,
|
||||
struct op_system_config *sys,
|
||||
int num_ctrs)
|
||||
{
|
||||
|
@ -276,9 +278,10 @@ static void fsl_booke_reg_setup(struct op_counter_config *ctr,
|
|||
for (i = 0; i < num_counters; ++i)
|
||||
reset_value[i] = 0x80000000UL - ctr[i].count;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void fsl_booke_start(struct op_counter_config *ctr)
|
||||
static int fsl_booke_start(struct op_counter_config *ctr)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
@ -308,6 +311,8 @@ static void fsl_booke_start(struct op_counter_config *ctr)
|
|||
|
||||
pr_debug("start on cpu %d, pmgc0 %x\n", smp_processor_id(),
|
||||
mfpmr(PMRN_PMGC0));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void fsl_booke_stop(void)
|
||||
|
|
|
@ -89,7 +89,7 @@ static inline void ctr_write(unsigned int i, u64 val)
|
|||
|
||||
|
||||
/* precompute the values to stuff in the hardware registers */
|
||||
static void pa6t_reg_setup(struct op_counter_config *ctr,
|
||||
static int pa6t_reg_setup(struct op_counter_config *ctr,
|
||||
struct op_system_config *sys,
|
||||
int num_ctrs)
|
||||
{
|
||||
|
@ -135,10 +135,12 @@ static void pa6t_reg_setup(struct op_counter_config *ctr,
|
|||
pr_debug("reset_value for pmc%u inited to 0x%lx\n",
|
||||
pmc, reset_value[pmc]);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* configure registers on this cpu */
|
||||
static void pa6t_cpu_setup(struct op_counter_config *ctr)
|
||||
static int pa6t_cpu_setup(struct op_counter_config *ctr)
|
||||
{
|
||||
u64 mmcr0 = mmcr0_val;
|
||||
u64 mmcr1 = mmcr1_val;
|
||||
|
@ -154,9 +156,11 @@ static void pa6t_cpu_setup(struct op_counter_config *ctr)
|
|||
mfspr(SPRN_PA6T_MMCR0));
|
||||
pr_debug("setup on cpu %d, mmcr1 %016lx\n", smp_processor_id(),
|
||||
mfspr(SPRN_PA6T_MMCR1));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pa6t_start(struct op_counter_config *ctr)
|
||||
static int pa6t_start(struct op_counter_config *ctr)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
@ -174,6 +178,8 @@ static void pa6t_start(struct op_counter_config *ctr)
|
|||
oprofile_running = 1;
|
||||
|
||||
pr_debug("start on cpu %d, mmcr0 %lx\n", smp_processor_id(), mmcr0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pa6t_stop(void)
|
||||
|
|
|
@ -32,7 +32,7 @@ static u32 mmcr0_val;
|
|||
static u64 mmcr1_val;
|
||||
static u64 mmcra_val;
|
||||
|
||||
static void power4_reg_setup(struct op_counter_config *ctr,
|
||||
static int power4_reg_setup(struct op_counter_config *ctr,
|
||||
struct op_system_config *sys,
|
||||
int num_ctrs)
|
||||
{
|
||||
|
@ -60,6 +60,8 @@ static void power4_reg_setup(struct op_counter_config *ctr,
|
|||
mmcr0_val &= ~MMCR0_PROBLEM_DISABLE;
|
||||
else
|
||||
mmcr0_val |= MMCR0_PROBLEM_DISABLE;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern void ppc64_enable_pmcs(void);
|
||||
|
@ -84,7 +86,7 @@ static inline int mmcra_must_set_sample(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void power4_cpu_setup(struct op_counter_config *ctr)
|
||||
static int power4_cpu_setup(struct op_counter_config *ctr)
|
||||
{
|
||||
unsigned int mmcr0 = mmcr0_val;
|
||||
unsigned long mmcra = mmcra_val;
|
||||
|
@ -111,9 +113,11 @@ static void power4_cpu_setup(struct op_counter_config *ctr)
|
|||
mfspr(SPRN_MMCR1));
|
||||
dbg("setup on cpu %d, mmcra %lx\n", smp_processor_id(),
|
||||
mfspr(SPRN_MMCRA));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void power4_start(struct op_counter_config *ctr)
|
||||
static int power4_start(struct op_counter_config *ctr)
|
||||
{
|
||||
int i;
|
||||
unsigned int mmcr0;
|
||||
|
@ -148,6 +152,7 @@ static void power4_start(struct op_counter_config *ctr)
|
|||
oprofile_running = 1;
|
||||
|
||||
dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void power4_stop(void)
|
||||
|
|
|
@ -88,7 +88,7 @@ static unsigned long reset_value[OP_MAX_COUNTER];
|
|||
|
||||
static int num_counters;
|
||||
|
||||
static void rs64_reg_setup(struct op_counter_config *ctr,
|
||||
static int rs64_reg_setup(struct op_counter_config *ctr,
|
||||
struct op_system_config *sys,
|
||||
int num_ctrs)
|
||||
{
|
||||
|
@ -100,9 +100,10 @@ static void rs64_reg_setup(struct op_counter_config *ctr,
|
|||
reset_value[i] = 0x80000000UL - ctr[i].count;
|
||||
|
||||
/* XXX setup user and kernel profiling */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void rs64_cpu_setup(struct op_counter_config *ctr)
|
||||
static int rs64_cpu_setup(struct op_counter_config *ctr)
|
||||
{
|
||||
unsigned int mmcr0;
|
||||
|
||||
|
@ -125,9 +126,11 @@ static void rs64_cpu_setup(struct op_counter_config *ctr)
|
|||
mfspr(SPRN_MMCR0));
|
||||
dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(),
|
||||
mfspr(SPRN_MMCR1));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void rs64_start(struct op_counter_config *ctr)
|
||||
static int rs64_start(struct op_counter_config *ctr)
|
||||
{
|
||||
int i;
|
||||
unsigned int mmcr0;
|
||||
|
@ -155,6 +158,7 @@ static void rs64_start(struct op_counter_config *ctr)
|
|||
mtspr(SPRN_MMCR0, mmcr0);
|
||||
|
||||
dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void rs64_stop(void)
|
||||
|
|
|
@ -272,4 +272,14 @@ config CPM2
|
|||
you wish to build a kernel for a machine with a CPM2 coprocessor
|
||||
on it (826x, 827x, 8560).
|
||||
|
||||
config AXON_RAM
|
||||
tristate "Axon DDR2 memory device driver"
|
||||
depends on PPC_IBM_CELL_BLADE
|
||||
default m
|
||||
help
|
||||
It registers one block device per Axon's DDR2 memory bank found
|
||||
on a system. Block devices are called axonram?, their major and
|
||||
minor numbers are available in /proc/devices, /proc/partitions or
|
||||
in /sys/block/axonram?/dev.
|
||||
|
||||
endmenu
|
||||
|
|
|
@ -73,4 +73,14 @@ config CBE_CPUFREQ
|
|||
For details, take a look at <file:Documentation/cpu-freq/>.
|
||||
If you don't have such processor, say N
|
||||
|
||||
config CBE_CPUFREQ_PMI
|
||||
tristate "CBE frequency scaling using PMI interface"
|
||||
depends on CBE_CPUFREQ && PPC_PMI && EXPERIMENTAL
|
||||
default n
|
||||
help
|
||||
Select this, if you want to use the PMI interface
|
||||
to switch frequencies. Using PMI, the
|
||||
processor will not only be able to run at lower speed,
|
||||
but also at lower core voltage.
|
||||
|
||||
endmenu
|
||||
|
|
|
@ -4,7 +4,9 @@ obj-$(CONFIG_PPC_CELL_NATIVE) += interrupt.o iommu.o setup.o \
|
|||
obj-$(CONFIG_CBE_RAS) += ras.o
|
||||
|
||||
obj-$(CONFIG_CBE_THERM) += cbe_thermal.o
|
||||
obj-$(CONFIG_CBE_CPUFREQ) += cbe_cpufreq.o
|
||||
obj-$(CONFIG_CBE_CPUFREQ_PMI) += cbe_cpufreq_pmi.o
|
||||
obj-$(CONFIG_CBE_CPUFREQ) += cbe-cpufreq.o
|
||||
cbe-cpufreq-y += cbe_cpufreq_pervasive.o cbe_cpufreq.o
|
||||
|
||||
ifeq ($(CONFIG_SMP),y)
|
||||
obj-$(CONFIG_PPC_CELL_NATIVE) += smp.o
|
||||
|
@ -23,3 +25,5 @@ obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \
|
|||
$(spu-priv1-y) \
|
||||
$(spu-manage-y) \
|
||||
spufs/
|
||||
|
||||
obj-$(CONFIG_PCI_MSI) += axon_msi.o
|
||||
|
|
|
@ -0,0 +1,445 @@
|
|||
/*
|
||||
* Copyright 2007, Michael Ellerman, IBM Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/irq.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/msi.h>
|
||||
#include <linux/reboot.h>
|
||||
|
||||
#include <asm/dcr.h>
|
||||
#include <asm/machdep.h>
|
||||
#include <asm/prom.h>
|
||||
|
||||
|
||||
/*
|
||||
* MSIC registers, specified as offsets from dcr_base
|
||||
*/
|
||||
#define MSIC_CTRL_REG 0x0
|
||||
|
||||
/* Base Address registers specify FIFO location in BE memory */
|
||||
#define MSIC_BASE_ADDR_HI_REG 0x3
|
||||
#define MSIC_BASE_ADDR_LO_REG 0x4
|
||||
|
||||
/* Hold the read/write offsets into the FIFO */
|
||||
#define MSIC_READ_OFFSET_REG 0x5
|
||||
#define MSIC_WRITE_OFFSET_REG 0x6
|
||||
|
||||
|
||||
/* MSIC control register flags */
|
||||
#define MSIC_CTRL_ENABLE 0x0001
|
||||
#define MSIC_CTRL_FIFO_FULL_ENABLE 0x0002
|
||||
#define MSIC_CTRL_IRQ_ENABLE 0x0008
|
||||
#define MSIC_CTRL_FULL_STOP_ENABLE 0x0010
|
||||
|
||||
/*
|
||||
* The MSIC can be configured to use a FIFO of 32KB, 64KB, 128KB or 256KB.
|
||||
* Currently we're using a 64KB FIFO size.
|
||||
*/
|
||||
#define MSIC_FIFO_SIZE_SHIFT 16
|
||||
#define MSIC_FIFO_SIZE_BYTES (1 << MSIC_FIFO_SIZE_SHIFT)
|
||||
|
||||
/*
|
||||
* To configure the FIFO size as (1 << n) bytes, we write (n - 15) into bits
|
||||
* 8-9 of the MSIC control reg.
|
||||
*/
|
||||
#define MSIC_CTRL_FIFO_SIZE (((MSIC_FIFO_SIZE_SHIFT - 15) << 8) & 0x300)
|
||||
|
||||
/*
|
||||
* We need to mask the read/write offsets to make sure they stay within
|
||||
* the bounds of the FIFO. Also they should always be 16-byte aligned.
|
||||
*/
|
||||
#define MSIC_FIFO_SIZE_MASK ((MSIC_FIFO_SIZE_BYTES - 1) & ~0xFu)
|
||||
|
||||
/* Each entry in the FIFO is 16 bytes, the first 4 bytes hold the irq # */
|
||||
#define MSIC_FIFO_ENTRY_SIZE 0x10
|
||||
|
||||
|
||||
struct axon_msic {
|
||||
struct device_node *dn;
|
||||
struct irq_host *irq_host;
|
||||
__le32 *fifo;
|
||||
dcr_host_t dcr_host;
|
||||
struct list_head list;
|
||||
u32 read_offset;
|
||||
u32 dcr_base;
|
||||
};
|
||||
|
||||
static LIST_HEAD(axon_msic_list);
|
||||
|
||||
static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val)
|
||||
{
|
||||
pr_debug("axon_msi: dcr_write(0x%x, 0x%x)\n", val, dcr_n);
|
||||
|
||||
dcr_write(msic->dcr_host, msic->dcr_base + dcr_n, val);
|
||||
}
|
||||
|
||||
static u32 msic_dcr_read(struct axon_msic *msic, unsigned int dcr_n)
|
||||
{
|
||||
return dcr_read(msic->dcr_host, msic->dcr_base + dcr_n);
|
||||
}
|
||||
|
||||
static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
|
||||
{
|
||||
struct axon_msic *msic = get_irq_data(irq);
|
||||
u32 write_offset, msi;
|
||||
int idx;
|
||||
|
||||
write_offset = msic_dcr_read(msic, MSIC_WRITE_OFFSET_REG);
|
||||
pr_debug("axon_msi: original write_offset 0x%x\n", write_offset);
|
||||
|
||||
/* write_offset doesn't wrap properly, so we have to mask it */
|
||||
write_offset &= MSIC_FIFO_SIZE_MASK;
|
||||
|
||||
while (msic->read_offset != write_offset) {
|
||||
idx = msic->read_offset / sizeof(__le32);
|
||||
msi = le32_to_cpu(msic->fifo[idx]);
|
||||
msi &= 0xFFFF;
|
||||
|
||||
pr_debug("axon_msi: woff %x roff %x msi %x\n",
|
||||
write_offset, msic->read_offset, msi);
|
||||
|
||||
msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
|
||||
msic->read_offset &= MSIC_FIFO_SIZE_MASK;
|
||||
|
||||
if (msi < NR_IRQS && irq_map[msi].host == msic->irq_host)
|
||||
generic_handle_irq(msi);
|
||||
else
|
||||
pr_debug("axon_msi: invalid irq 0x%x!\n", msi);
|
||||
}
|
||||
|
||||
desc->chip->eoi(irq);
|
||||
}
|
||||
|
||||
static struct axon_msic *find_msi_translator(struct pci_dev *dev)
|
||||
{
|
||||
struct irq_host *irq_host;
|
||||
struct device_node *dn, *tmp;
|
||||
const phandle *ph;
|
||||
struct axon_msic *msic = NULL;
|
||||
|
||||
dn = pci_device_to_OF_node(dev);
|
||||
if (!dn) {
|
||||
dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) {
|
||||
ph = of_get_property(dn, "msi-translator", NULL);
|
||||
if (ph)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!ph) {
|
||||
dev_dbg(&dev->dev,
|
||||
"axon_msi: no msi-translator property found\n");
|
||||
goto out_error;
|
||||
}
|
||||
|
||||
tmp = dn;
|
||||
dn = of_find_node_by_phandle(*ph);
|
||||
if (!dn) {
|
||||
dev_dbg(&dev->dev,
|
||||
"axon_msi: msi-translator doesn't point to a node\n");
|
||||
goto out_error;
|
||||
}
|
||||
|
||||
irq_host = irq_find_host(dn);
|
||||
if (!irq_host) {
|
||||
dev_dbg(&dev->dev, "axon_msi: no irq_host found for node %s\n",
|
||||
dn->full_name);
|
||||
goto out_error;
|
||||
}
|
||||
|
||||
msic = irq_host->host_data;
|
||||
|
||||
out_error:
|
||||
of_node_put(dn);
|
||||
of_node_put(tmp);
|
||||
|
||||
return msic;
|
||||
}
|
||||
|
||||
static int axon_msi_check_device(struct pci_dev *dev, int nvec, int type)
|
||||
{
|
||||
if (!find_msi_translator(dev))
|
||||
return -ENODEV;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg)
|
||||
{
|
||||
struct device_node *dn, *tmp;
|
||||
struct msi_desc *entry;
|
||||
int len;
|
||||
const u32 *prop;
|
||||
|
||||
dn = pci_device_to_OF_node(dev);
|
||||
if (!dn) {
|
||||
dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
entry = list_first_entry(&dev->msi_list, struct msi_desc, list);
|
||||
|
||||
for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) {
|
||||
if (entry->msi_attrib.is_64) {
|
||||
prop = of_get_property(dn, "msi-address-64", &len);
|
||||
if (prop)
|
||||
break;
|
||||
}
|
||||
|
||||
prop = of_get_property(dn, "msi-address-32", &len);
|
||||
if (prop)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!prop) {
|
||||
dev_dbg(&dev->dev,
|
||||
"axon_msi: no msi-address-(32|64) properties found\n");
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
switch (len) {
|
||||
case 8:
|
||||
msg->address_hi = prop[0];
|
||||
msg->address_lo = prop[1];
|
||||
break;
|
||||
case 4:
|
||||
msg->address_hi = 0;
|
||||
msg->address_lo = prop[0];
|
||||
break;
|
||||
default:
|
||||
dev_dbg(&dev->dev,
|
||||
"axon_msi: malformed msi-address-(32|64) property\n");
|
||||
of_node_put(dn);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
of_node_put(dn);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
|
||||
{
|
||||
unsigned int virq, rc;
|
||||
struct msi_desc *entry;
|
||||
struct msi_msg msg;
|
||||
struct axon_msic *msic;
|
||||
|
||||
msic = find_msi_translator(dev);
|
||||
if (!msic)
|
||||
return -ENODEV;
|
||||
|
||||
rc = setup_msi_msg_address(dev, &msg);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
/* We rely on being able to stash a virq in a u16 */
|
||||
BUILD_BUG_ON(NR_IRQS > 65536);
|
||||
|
||||
list_for_each_entry(entry, &dev->msi_list, list) {
|
||||
virq = irq_create_direct_mapping(msic->irq_host);
|
||||
if (virq == NO_IRQ) {
|
||||
dev_warn(&dev->dev,
|
||||
"axon_msi: virq allocation failed!\n");
|
||||
return -1;
|
||||
}
|
||||
dev_dbg(&dev->dev, "axon_msi: allocated virq 0x%x\n", virq);
|
||||
|
||||
set_irq_msi(virq, entry);
|
||||
msg.data = virq;
|
||||
write_msi_msg(virq, &msg);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void axon_msi_teardown_msi_irqs(struct pci_dev *dev)
|
||||
{
|
||||
struct msi_desc *entry;
|
||||
|
||||
dev_dbg(&dev->dev, "axon_msi: tearing down msi irqs\n");
|
||||
|
||||
list_for_each_entry(entry, &dev->msi_list, list) {
|
||||
if (entry->irq == NO_IRQ)
|
||||
continue;
|
||||
|
||||
set_irq_msi(entry->irq, NULL);
|
||||
irq_dispose_mapping(entry->irq);
|
||||
}
|
||||
}
|
||||
|
||||
static struct irq_chip msic_irq_chip = {
|
||||
.mask = mask_msi_irq,
|
||||
.unmask = unmask_msi_irq,
|
||||
.shutdown = unmask_msi_irq,
|
||||
.typename = "AXON-MSI",
|
||||
};
|
||||
|
||||
static int msic_host_map(struct irq_host *h, unsigned int virq,
|
||||
irq_hw_number_t hw)
|
||||
{
|
||||
set_irq_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int msic_host_match(struct irq_host *host, struct device_node *dn)
|
||||
{
|
||||
struct axon_msic *msic = host->host_data;
|
||||
|
||||
return msic->dn == dn;
|
||||
}
|
||||
|
||||
static struct irq_host_ops msic_host_ops = {
|
||||
.match = msic_host_match,
|
||||
.map = msic_host_map,
|
||||
};
|
||||
|
||||
static int axon_msi_notify_reboot(struct notifier_block *nb,
|
||||
unsigned long code, void *data)
|
||||
{
|
||||
struct axon_msic *msic;
|
||||
u32 tmp;
|
||||
|
||||
list_for_each_entry(msic, &axon_msic_list, list) {
|
||||
pr_debug("axon_msi: disabling %s\n", msic->dn->full_name);
|
||||
tmp = msic_dcr_read(msic, MSIC_CTRL_REG);
|
||||
tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE;
|
||||
msic_dcr_write(msic, MSIC_CTRL_REG, tmp);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct notifier_block axon_msi_reboot_notifier = {
|
||||
.notifier_call = axon_msi_notify_reboot
|
||||
};
|
||||
|
||||
static int axon_msi_setup_one(struct device_node *dn)
|
||||
{
|
||||
struct page *page;
|
||||
struct axon_msic *msic;
|
||||
unsigned int virq;
|
||||
int dcr_len;
|
||||
|
||||
pr_debug("axon_msi: setting up dn %s\n", dn->full_name);
|
||||
|
||||
msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL);
|
||||
if (!msic) {
|
||||
printk(KERN_ERR "axon_msi: couldn't allocate msic for %s\n",
|
||||
dn->full_name);
|
||||
goto out;
|
||||
}
|
||||
|
||||
msic->dcr_base = dcr_resource_start(dn, 0);
|
||||
dcr_len = dcr_resource_len(dn, 0);
|
||||
|
||||
if (msic->dcr_base == 0 || dcr_len == 0) {
|
||||
printk(KERN_ERR
|
||||
"axon_msi: couldn't parse dcr properties on %s\n",
|
||||
dn->full_name);
|
||||
goto out;
|
||||
}
|
||||
|
||||
msic->dcr_host = dcr_map(dn, msic->dcr_base, dcr_len);
|
||||
if (!DCR_MAP_OK(msic->dcr_host)) {
|
||||
printk(KERN_ERR "axon_msi: dcr_map failed for %s\n",
|
||||
dn->full_name);
|
||||
goto out_free_msic;
|
||||
}
|
||||
|
||||
page = alloc_pages_node(of_node_to_nid(dn), GFP_KERNEL,
|
||||
get_order(MSIC_FIFO_SIZE_BYTES));
|
||||
if (!page) {
|
||||
printk(KERN_ERR "axon_msi: couldn't allocate fifo for %s\n",
|
||||
dn->full_name);
|
||||
goto out_free_msic;
|
||||
}
|
||||
|
||||
msic->fifo = page_address(page);
|
||||
|
||||
msic->irq_host = irq_alloc_host(IRQ_HOST_MAP_NOMAP, NR_IRQS,
|
||||
&msic_host_ops, 0);
|
||||
if (!msic->irq_host) {
|
||||
printk(KERN_ERR "axon_msi: couldn't allocate irq_host for %s\n",
|
||||
dn->full_name);
|
||||
goto out_free_fifo;
|
||||
}
|
||||
|
||||
msic->irq_host->host_data = msic;
|
||||
|
||||
virq = irq_of_parse_and_map(dn, 0);
|
||||
if (virq == NO_IRQ) {
|
||||
printk(KERN_ERR "axon_msi: irq parse and map failed for %s\n",
|
||||
dn->full_name);
|
||||
goto out_free_host;
|
||||
}
|
||||
|
||||
msic->dn = of_node_get(dn);
|
||||
|
||||
set_irq_data(virq, msic);
|
||||
set_irq_chained_handler(virq, axon_msi_cascade);
|
||||
pr_debug("axon_msi: irq 0x%x setup for axon_msi\n", virq);
|
||||
|
||||
/* Enable the MSIC hardware */
|
||||
msic_dcr_write(msic, MSIC_BASE_ADDR_HI_REG, (u64)msic->fifo >> 32);
|
||||
msic_dcr_write(msic, MSIC_BASE_ADDR_LO_REG,
|
||||
(u64)msic->fifo & 0xFFFFFFFF);
|
||||
msic_dcr_write(msic, MSIC_CTRL_REG,
|
||||
MSIC_CTRL_IRQ_ENABLE | MSIC_CTRL_ENABLE |
|
||||
MSIC_CTRL_FIFO_SIZE);
|
||||
|
||||
list_add(&msic->list, &axon_msic_list);
|
||||
|
||||
printk(KERN_DEBUG "axon_msi: setup MSIC on %s\n", dn->full_name);
|
||||
|
||||
return 0;
|
||||
|
||||
out_free_host:
|
||||
kfree(msic->irq_host);
|
||||
out_free_fifo:
|
||||
__free_pages(virt_to_page(msic->fifo), get_order(MSIC_FIFO_SIZE_BYTES));
|
||||
out_free_msic:
|
||||
kfree(msic);
|
||||
out:
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int axon_msi_init(void)
|
||||
{
|
||||
struct device_node *dn;
|
||||
int found = 0;
|
||||
|
||||
pr_debug("axon_msi: initialising ...\n");
|
||||
|
||||
for_each_compatible_node(dn, NULL, "ibm,axon-msic") {
|
||||
if (axon_msi_setup_one(dn) == 0)
|
||||
found++;
|
||||
}
|
||||
|
||||
if (found) {
|
||||
ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs;
|
||||
ppc_md.teardown_msi_irqs = axon_msi_teardown_msi_irqs;
|
||||
ppc_md.msi_check_device = axon_msi_check_device;
|
||||
|
||||
register_reboot_notifier(&axon_msi_reboot_notifier);
|
||||
|
||||
pr_debug("axon_msi: registered callbacks!\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
arch_initcall(axon_msi_init);
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
* cpufreq driver for the cell processor
|
||||
*
|
||||
* (C) Copyright IBM Deutschland Entwicklung GmbH 2005
|
||||
* (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
|
||||
*
|
||||
* Author: Christian Krafft <krafft@de.ibm.com>
|
||||
*
|
||||
|
@ -21,18 +21,11 @@
|
|||
*/
|
||||
|
||||
#include <linux/cpufreq.h>
|
||||
#include <linux/timer.h>
|
||||
|
||||
#include <asm/hw_irq.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/machdep.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/prom.h>
|
||||
#include <asm/time.h>
|
||||
#include <asm/pmi.h>
|
||||
#include <asm/of_platform.h>
|
||||
|
||||
#include <asm/prom.h>
|
||||
#include "cbe_regs.h"
|
||||
#include "cbe_cpufreq.h"
|
||||
|
||||
static DEFINE_MUTEX(cbe_switch_mutex);
|
||||
|
||||
|
@ -50,159 +43,24 @@ static struct cpufreq_frequency_table cbe_freqs[] = {
|
|||
{0, CPUFREQ_TABLE_END},
|
||||
};
|
||||
|
||||
/* to write to MIC register */
|
||||
static u64 MIC_Slow_Fast_Timer_table[] = {
|
||||
[0 ... 7] = 0x007fc00000000000ull,
|
||||
};
|
||||
|
||||
/* more values for the MIC */
|
||||
static u64 MIC_Slow_Next_Timer_table[] = {
|
||||
0x0000240000000000ull,
|
||||
0x0000268000000000ull,
|
||||
0x000029C000000000ull,
|
||||
0x00002D0000000000ull,
|
||||
0x0000300000000000ull,
|
||||
0x0000334000000000ull,
|
||||
0x000039C000000000ull,
|
||||
0x00003FC000000000ull,
|
||||
};
|
||||
|
||||
static unsigned int pmi_frequency_limit = 0;
|
||||
/*
|
||||
* hardware specific functions
|
||||
*/
|
||||
|
||||
static struct of_device *pmi_dev;
|
||||
|
||||
#ifdef CONFIG_PPC_PMI
|
||||
static int set_pmode_pmi(int cpu, unsigned int pmode)
|
||||
static int set_pmode(unsigned int cpu, unsigned int slow_mode)
|
||||
{
|
||||
int ret;
|
||||
pmi_message_t pmi_msg;
|
||||
#ifdef DEBUG
|
||||
u64 time;
|
||||
#endif
|
||||
int rc;
|
||||
|
||||
pmi_msg.type = PMI_TYPE_FREQ_CHANGE;
|
||||
pmi_msg.data1 = cbe_cpu_to_node(cpu);
|
||||
pmi_msg.data2 = pmode;
|
||||
|
||||
#ifdef DEBUG
|
||||
time = (u64) get_cycles();
|
||||
#endif
|
||||
|
||||
pmi_send_message(pmi_dev, pmi_msg);
|
||||
ret = pmi_msg.data2;
|
||||
|
||||
pr_debug("PMI returned slow mode %d\n", ret);
|
||||
|
||||
#ifdef DEBUG
|
||||
time = (u64) get_cycles() - time; /* actual cycles (not cpu cycles!) */
|
||||
time = 1000000000 * time / CLOCK_TICK_RATE; /* time in ns (10^-9) */
|
||||
pr_debug("had to wait %lu ns for a transition\n", time);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int get_pmode(int cpu)
|
||||
{
|
||||
int ret;
|
||||
struct cbe_pmd_regs __iomem *pmd_regs;
|
||||
|
||||
pmd_regs = cbe_get_cpu_pmd_regs(cpu);
|
||||
ret = in_be64(&pmd_regs->pmsr) & 0x07;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int set_pmode_reg(int cpu, unsigned int pmode)
|
||||
{
|
||||
struct cbe_pmd_regs __iomem *pmd_regs;
|
||||
struct cbe_mic_tm_regs __iomem *mic_tm_regs;
|
||||
u64 flags;
|
||||
u64 value;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu);
|
||||
pmd_regs = cbe_get_cpu_pmd_regs(cpu);
|
||||
|
||||
pr_debug("pm register is mapped at %p\n", &pmd_regs->pmcr);
|
||||
pr_debug("mic register is mapped at %p\n", &mic_tm_regs->slow_fast_timer_0);
|
||||
|
||||
out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]);
|
||||
out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]);
|
||||
|
||||
out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]);
|
||||
out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]);
|
||||
|
||||
value = in_be64(&pmd_regs->pmcr);
|
||||
/* set bits to zero */
|
||||
value &= 0xFFFFFFFFFFFFFFF8ull;
|
||||
/* set bits to next pmode */
|
||||
value |= pmode;
|
||||
|
||||
out_be64(&pmd_regs->pmcr, value);
|
||||
|
||||
/* wait until new pmode appears in status register */
|
||||
value = in_be64(&pmd_regs->pmsr) & 0x07;
|
||||
while(value != pmode) {
|
||||
cpu_relax();
|
||||
value = in_be64(&pmd_regs->pmsr) & 0x07;
|
||||
}
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int set_pmode(int cpu, unsigned int slow_mode) {
|
||||
#ifdef CONFIG_PPC_PMI
|
||||
if (pmi_dev)
|
||||
return set_pmode_pmi(cpu, slow_mode);
|
||||
if (cbe_cpufreq_has_pmi)
|
||||
rc = cbe_cpufreq_set_pmode_pmi(cpu, slow_mode);
|
||||
else
|
||||
#endif
|
||||
return set_pmode_reg(cpu, slow_mode);
|
||||
rc = cbe_cpufreq_set_pmode(cpu, slow_mode);
|
||||
|
||||
pr_debug("register contains slow mode %d\n", cbe_cpufreq_get_pmode(cpu));
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void cbe_cpufreq_handle_pmi(struct of_device *dev, pmi_message_t pmi_msg)
|
||||
{
|
||||
u8 cpu;
|
||||
u8 cbe_pmode_new;
|
||||
|
||||
BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE);
|
||||
|
||||
cpu = cbe_node_to_cpu(pmi_msg.data1);
|
||||
cbe_pmode_new = pmi_msg.data2;
|
||||
|
||||
pmi_frequency_limit = cbe_freqs[cbe_pmode_new].frequency;
|
||||
|
||||
pr_debug("cbe_handle_pmi: max freq=%d\n", pmi_frequency_limit);
|
||||
}
|
||||
|
||||
static int pmi_notifier(struct notifier_block *nb,
|
||||
unsigned long event, void *data)
|
||||
{
|
||||
struct cpufreq_policy *policy = data;
|
||||
|
||||
if (event != CPUFREQ_INCOMPATIBLE)
|
||||
return 0;
|
||||
|
||||
cpufreq_verify_within_limits(policy, 0, pmi_frequency_limit);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct notifier_block pmi_notifier_block = {
|
||||
.notifier_call = pmi_notifier,
|
||||
};
|
||||
|
||||
static struct pmi_handler cbe_pmi_handler = {
|
||||
.type = PMI_TYPE_FREQ_CHANGE,
|
||||
.handle_pmi_message = cbe_cpufreq_handle_pmi,
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* cpufreq functions
|
||||
*/
|
||||
|
@ -221,8 +79,19 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
|
|||
|
||||
pr_debug("init cpufreq on CPU %d\n", policy->cpu);
|
||||
|
||||
/*
|
||||
* Let's check we can actually get to the CELL regs
|
||||
*/
|
||||
if (!cbe_get_cpu_pmd_regs(policy->cpu) ||
|
||||
!cbe_get_cpu_mic_tm_regs(policy->cpu)) {
|
||||
pr_info("invalid CBE regs pointers for cpufreq\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
max_freqp = of_get_property(cpu, "clock-frequency", NULL);
|
||||
|
||||
of_node_put(cpu);
|
||||
|
||||
if (!max_freqp)
|
||||
return -EINVAL;
|
||||
|
||||
|
@ -239,10 +108,12 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
|
|||
}
|
||||
|
||||
policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
|
||||
/* if DEBUG is enabled set_pmode() measures the correct latency of a transition */
|
||||
|
||||
/* if DEBUG is enabled set_pmode() measures the latency
|
||||
* of a transition */
|
||||
policy->cpuinfo.transition_latency = 25000;
|
||||
|
||||
cur_pmode = get_pmode(policy->cpu);
|
||||
cur_pmode = cbe_cpufreq_get_pmode(policy->cpu);
|
||||
pr_debug("current pmode is at %d\n",cur_pmode);
|
||||
|
||||
policy->cur = cbe_freqs[cur_pmode].frequency;
|
||||
|
@ -253,21 +124,13 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
|
|||
|
||||
cpufreq_frequency_table_get_attr(cbe_freqs, policy->cpu);
|
||||
|
||||
if (pmi_dev) {
|
||||
/* frequency might get limited later, initialize limit with max_freq */
|
||||
pmi_frequency_limit = max_freq;
|
||||
cpufreq_register_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
|
||||
}
|
||||
|
||||
/* this ensures that policy->cpuinfo_min and policy->cpuinfo_max are set correctly */
|
||||
/* this ensures that policy->cpuinfo_min
|
||||
* and policy->cpuinfo_max are set correctly */
|
||||
return cpufreq_frequency_table_cpuinfo(policy, cbe_freqs);
|
||||
}
|
||||
|
||||
static int cbe_cpufreq_cpu_exit(struct cpufreq_policy *policy)
|
||||
{
|
||||
if (pmi_dev)
|
||||
cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
|
||||
|
||||
cpufreq_frequency_table_put_attr(policy->cpu);
|
||||
return 0;
|
||||
}
|
||||
|
@ -277,13 +140,13 @@ static int cbe_cpufreq_verify(struct cpufreq_policy *policy)
|
|||
return cpufreq_frequency_table_verify(policy, cbe_freqs);
|
||||
}
|
||||
|
||||
|
||||
static int cbe_cpufreq_target(struct cpufreq_policy *policy, unsigned int target_freq,
|
||||
unsigned int relation)
|
||||
static int cbe_cpufreq_target(struct cpufreq_policy *policy,
|
||||
unsigned int target_freq,
|
||||
unsigned int relation)
|
||||
{
|
||||
int rc;
|
||||
struct cpufreq_freqs freqs;
|
||||
int cbe_pmode_new;
|
||||
unsigned int cbe_pmode_new;
|
||||
|
||||
cpufreq_frequency_table_target(policy,
|
||||
cbe_freqs,
|
||||
|
@ -298,12 +161,14 @@ static int cbe_cpufreq_target(struct cpufreq_policy *policy, unsigned int target
|
|||
mutex_lock(&cbe_switch_mutex);
|
||||
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
|
||||
|
||||
pr_debug("setting frequency for cpu %d to %d kHz, 1/%d of max frequency\n",
|
||||
pr_debug("setting frequency for cpu %d to %d kHz, " \
|
||||
"1/%d of max frequency\n",
|
||||
policy->cpu,
|
||||
cbe_freqs[cbe_pmode_new].frequency,
|
||||
cbe_freqs[cbe_pmode_new].index);
|
||||
|
||||
rc = set_pmode(policy->cpu, cbe_pmode_new);
|
||||
|
||||
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
|
||||
mutex_unlock(&cbe_switch_mutex);
|
||||
|
||||
|
@ -326,28 +191,14 @@ static struct cpufreq_driver cbe_cpufreq_driver = {
|
|||
|
||||
static int __init cbe_cpufreq_init(void)
|
||||
{
|
||||
#ifdef CONFIG_PPC_PMI
|
||||
struct device_node *np;
|
||||
#endif
|
||||
if (!machine_is(cell))
|
||||
return -ENODEV;
|
||||
#ifdef CONFIG_PPC_PMI
|
||||
np = of_find_node_by_type(NULL, "ibm,pmi");
|
||||
|
||||
pmi_dev = of_find_device_by_node(np);
|
||||
|
||||
if (pmi_dev)
|
||||
pmi_register_handler(pmi_dev, &cbe_pmi_handler);
|
||||
#endif
|
||||
return cpufreq_register_driver(&cbe_cpufreq_driver);
|
||||
}
|
||||
|
||||
static void __exit cbe_cpufreq_exit(void)
|
||||
{
|
||||
#ifdef CONFIG_PPC_PMI
|
||||
if (pmi_dev)
|
||||
pmi_unregister_handler(pmi_dev, &cbe_pmi_handler);
|
||||
#endif
|
||||
cpufreq_unregister_driver(&cbe_cpufreq_driver);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
/*
|
||||
* cbe_cpufreq.h
|
||||
*
|
||||
* This file contains the definitions used by the cbe_cpufreq driver.
|
||||
*
|
||||
* (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
|
||||
*
|
||||
* Author: Christian Krafft <krafft@de.ibm.com>
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/cpufreq.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode);
|
||||
int cbe_cpufreq_get_pmode(int cpu);
|
||||
|
||||
int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode);
|
||||
|
||||
#if defined(CONFIG_CBE_CPUFREQ_PMI) || defined(CONFIG_CBE_CPUFREQ_PMI_MODULE)
|
||||
extern bool cbe_cpufreq_has_pmi;
|
||||
#else
|
||||
#define cbe_cpufreq_has_pmi (0)
|
||||
#endif
|
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
* pervasive backend for the cbe_cpufreq driver
|
||||
*
|
||||
* This driver makes use of the pervasive unit to
|
||||
* engage the desired frequency.
|
||||
*
|
||||
* (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
|
||||
*
|
||||
* Author: Christian Krafft <krafft@de.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*/
|
||||
|
||||
#include <linux/io.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/time.h>
|
||||
#include <asm/machdep.h>
|
||||
#include <asm/hw_irq.h>
|
||||
|
||||
#include "cbe_regs.h"
|
||||
#include "cbe_cpufreq.h"
|
||||
|
||||
/* to write to MIC register */
|
||||
static u64 MIC_Slow_Fast_Timer_table[] = {
|
||||
[0 ... 7] = 0x007fc00000000000ull,
|
||||
};
|
||||
|
||||
/* more values for the MIC */
|
||||
static u64 MIC_Slow_Next_Timer_table[] = {
|
||||
0x0000240000000000ull,
|
||||
0x0000268000000000ull,
|
||||
0x000029C000000000ull,
|
||||
0x00002D0000000000ull,
|
||||
0x0000300000000000ull,
|
||||
0x0000334000000000ull,
|
||||
0x000039C000000000ull,
|
||||
0x00003FC000000000ull,
|
||||
};
|
||||
|
||||
|
||||
int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode)
|
||||
{
|
||||
struct cbe_pmd_regs __iomem *pmd_regs;
|
||||
struct cbe_mic_tm_regs __iomem *mic_tm_regs;
|
||||
u64 flags;
|
||||
u64 value;
|
||||
#ifdef DEBUG
|
||||
long time;
|
||||
#endif
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu);
|
||||
pmd_regs = cbe_get_cpu_pmd_regs(cpu);
|
||||
|
||||
#ifdef DEBUG
|
||||
time = jiffies;
|
||||
#endif
|
||||
|
||||
out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]);
|
||||
out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]);
|
||||
|
||||
out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]);
|
||||
out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]);
|
||||
|
||||
value = in_be64(&pmd_regs->pmcr);
|
||||
/* set bits to zero */
|
||||
value &= 0xFFFFFFFFFFFFFFF8ull;
|
||||
/* set bits to next pmode */
|
||||
value |= pmode;
|
||||
|
||||
out_be64(&pmd_regs->pmcr, value);
|
||||
|
||||
#ifdef DEBUG
|
||||
/* wait until new pmode appears in status register */
|
||||
value = in_be64(&pmd_regs->pmsr) & 0x07;
|
||||
while (value != pmode) {
|
||||
cpu_relax();
|
||||
value = in_be64(&pmd_regs->pmsr) & 0x07;
|
||||
}
|
||||
|
||||
time = jiffies - time;
|
||||
time = jiffies_to_msecs(time);
|
||||
pr_debug("had to wait %lu ms for a transition using " \
|
||||
"pervasive unit\n", time);
|
||||
#endif
|
||||
local_irq_restore(flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int cbe_cpufreq_get_pmode(int cpu)
|
||||
{
|
||||
int ret;
|
||||
struct cbe_pmd_regs __iomem *pmd_regs;
|
||||
|
||||
pmd_regs = cbe_get_cpu_pmd_regs(cpu);
|
||||
ret = in_be64(&pmd_regs->pmsr) & 0x07;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -0,0 +1,148 @@
|
|||
/*
|
||||
* pmi backend for the cbe_cpufreq driver
|
||||
*
|
||||
* (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
|
||||
*
|
||||
* Author: Christian Krafft <krafft@de.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/timer.h>
|
||||
#include <asm/of_platform.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/prom.h>
|
||||
#include <asm/pmi.h>
|
||||
|
||||
#ifdef DEBUG
|
||||
#include <asm/time.h>
|
||||
#endif
|
||||
|
||||
#include "cbe_regs.h"
|
||||
#include "cbe_cpufreq.h"
|
||||
|
||||
static u8 pmi_slow_mode_limit[MAX_CBE];
|
||||
|
||||
bool cbe_cpufreq_has_pmi = false;
|
||||
EXPORT_SYMBOL_GPL(cbe_cpufreq_has_pmi);
|
||||
|
||||
/*
|
||||
* hardware specific functions
|
||||
*/
|
||||
|
||||
int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode)
|
||||
{
|
||||
int ret;
|
||||
pmi_message_t pmi_msg;
|
||||
#ifdef DEBUG
|
||||
long time;
|
||||
#endif
|
||||
pmi_msg.type = PMI_TYPE_FREQ_CHANGE;
|
||||
pmi_msg.data1 = cbe_cpu_to_node(cpu);
|
||||
pmi_msg.data2 = pmode;
|
||||
|
||||
#ifdef DEBUG
|
||||
time = jiffies;
|
||||
#endif
|
||||
pmi_send_message(pmi_msg);
|
||||
|
||||
#ifdef DEBUG
|
||||
time = jiffies - time;
|
||||
time = jiffies_to_msecs(time);
|
||||
pr_debug("had to wait %lu ms for a transition using " \
|
||||
"PMI\n", time);
|
||||
#endif
|
||||
ret = pmi_msg.data2;
|
||||
pr_debug("PMI returned slow mode %d\n", ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cbe_cpufreq_set_pmode_pmi);
|
||||
|
||||
|
||||
static void cbe_cpufreq_handle_pmi(pmi_message_t pmi_msg)
|
||||
{
|
||||
u8 node, slow_mode;
|
||||
|
||||
BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE);
|
||||
|
||||
node = pmi_msg.data1;
|
||||
slow_mode = pmi_msg.data2;
|
||||
|
||||
pmi_slow_mode_limit[node] = slow_mode;
|
||||
|
||||
pr_debug("cbe_handle_pmi: node: %d max_freq: %d\n", node, slow_mode);
|
||||
}
|
||||
|
||||
static int pmi_notifier(struct notifier_block *nb,
|
||||
unsigned long event, void *data)
|
||||
{
|
||||
struct cpufreq_policy *policy = data;
|
||||
struct cpufreq_frequency_table *cbe_freqs;
|
||||
u8 node;
|
||||
|
||||
cbe_freqs = cpufreq_frequency_get_table(policy->cpu);
|
||||
node = cbe_cpu_to_node(policy->cpu);
|
||||
|
||||
pr_debug("got notified, event=%lu, node=%u\n", event, node);
|
||||
|
||||
if (pmi_slow_mode_limit[node] != 0) {
|
||||
pr_debug("limiting node %d to slow mode %d\n",
|
||||
node, pmi_slow_mode_limit[node]);
|
||||
|
||||
cpufreq_verify_within_limits(policy, 0,
|
||||
|
||||
cbe_freqs[pmi_slow_mode_limit[node]].frequency);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct notifier_block pmi_notifier_block = {
|
||||
.notifier_call = pmi_notifier,
|
||||
};
|
||||
|
||||
static struct pmi_handler cbe_pmi_handler = {
|
||||
.type = PMI_TYPE_FREQ_CHANGE,
|
||||
.handle_pmi_message = cbe_cpufreq_handle_pmi,
|
||||
};
|
||||
|
||||
|
||||
|
||||
static int __init cbe_cpufreq_pmi_init(void)
|
||||
{
|
||||
cbe_cpufreq_has_pmi = pmi_register_handler(&cbe_pmi_handler) == 0;
|
||||
|
||||
if (!cbe_cpufreq_has_pmi)
|
||||
return -ENODEV;
|
||||
|
||||
cpufreq_register_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit cbe_cpufreq_pmi_exit(void)
|
||||
{
|
||||
cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
|
||||
pmi_unregister_handler(&cbe_pmi_handler);
|
||||
}
|
||||
|
||||
module_init(cbe_cpufreq_pmi_init);
|
||||
module_exit(cbe_cpufreq_pmi_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
|
|
@ -174,6 +174,13 @@ static struct device_node *cbe_get_be_node(int cpu_id)
|
|||
|
||||
cpu_handle = of_get_property(np, "cpus", &len);
|
||||
|
||||
/*
|
||||
* the CAB SLOF tree is non compliant, so we just assume
|
||||
* there is only one node
|
||||
*/
|
||||
if (WARN_ON_ONCE(!cpu_handle))
|
||||
return np;
|
||||
|
||||
for (i=0; i<len; i++)
|
||||
if (of_find_node_by_phandle(cpu_handle[i]) == of_get_cpu_node(cpu_id, NULL))
|
||||
return np;
|
||||
|
|
|
@ -292,7 +292,7 @@ static struct attribute_group ppe_attribute_group = {
|
|||
/*
|
||||
* initialize throttling with default values
|
||||
*/
|
||||
static void __init init_default_values(void)
|
||||
static int __init init_default_values(void)
|
||||
{
|
||||
int cpu;
|
||||
struct cbe_pmd_regs __iomem *pmd_regs;
|
||||
|
@ -339,25 +339,40 @@ static void __init init_default_values(void)
|
|||
for_each_possible_cpu (cpu) {
|
||||
pr_debug("processing cpu %d\n", cpu);
|
||||
sysdev = get_cpu_sysdev(cpu);
|
||||
|
||||
if (!sysdev) {
|
||||
pr_info("invalid sysdev pointer for cbe_thermal\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
pmd_regs = cbe_get_cpu_pmd_regs(sysdev->id);
|
||||
|
||||
if (!pmd_regs) {
|
||||
pr_info("invalid CBE regs pointer for cbe_thermal\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
out_be64(&pmd_regs->tm_str2, str2);
|
||||
out_be64(&pmd_regs->tm_str1.val, str1.val);
|
||||
out_be64(&pmd_regs->tm_tpr.val, tpr.val);
|
||||
out_be64(&pmd_regs->tm_cr1.val, cr1.val);
|
||||
out_be64(&pmd_regs->tm_cr2, cr2);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int __init thermal_init(void)
|
||||
{
|
||||
init_default_values();
|
||||
int rc = init_default_values();
|
||||
|
||||
spu_add_sysdev_attr_group(&spu_attribute_group);
|
||||
cpu_add_sysdev_attr_group(&ppe_attribute_group);
|
||||
if (rc == 0) {
|
||||
spu_add_sysdev_attr_group(&spu_attribute_group);
|
||||
cpu_add_sysdev_attr_group(&ppe_attribute_group);
|
||||
}
|
||||
|
||||
return 0;
|
||||
return rc;
|
||||
}
|
||||
module_init(thermal_init);
|
||||
|
||||
|
|
|
@ -35,19 +35,38 @@
|
|||
#include <asm/spu.h>
|
||||
#include <asm/spu_priv1.h>
|
||||
#include <asm/xmon.h>
|
||||
#include <asm/prom.h>
|
||||
#include "spu_priv1_mmio.h"
|
||||
|
||||
const struct spu_management_ops *spu_management_ops;
|
||||
EXPORT_SYMBOL_GPL(spu_management_ops);
|
||||
|
||||
const struct spu_priv1_ops *spu_priv1_ops;
|
||||
|
||||
static struct list_head spu_list[MAX_NUMNODES];
|
||||
static LIST_HEAD(spu_full_list);
|
||||
static DEFINE_MUTEX(spu_mutex);
|
||||
static DEFINE_SPINLOCK(spu_list_lock);
|
||||
|
||||
EXPORT_SYMBOL_GPL(spu_priv1_ops);
|
||||
|
||||
struct cbe_spu_info cbe_spu_info[MAX_NUMNODES];
|
||||
EXPORT_SYMBOL_GPL(cbe_spu_info);
|
||||
|
||||
/*
|
||||
* Protects cbe_spu_info and spu->number.
|
||||
*/
|
||||
static DEFINE_SPINLOCK(spu_lock);
|
||||
|
||||
/*
|
||||
* List of all spus in the system.
|
||||
*
|
||||
* This list is iterated by callers from irq context and callers that
|
||||
* want to sleep. Thus modifications need to be done with both
|
||||
* spu_full_list_lock and spu_full_list_mutex held, while iterating
|
||||
* through it requires either of these locks.
|
||||
*
|
||||
* In addition spu_full_list_lock protects all assignmens to
|
||||
* spu->mm.
|
||||
*/
|
||||
static LIST_HEAD(spu_full_list);
|
||||
static DEFINE_SPINLOCK(spu_full_list_lock);
|
||||
static DEFINE_MUTEX(spu_full_list_mutex);
|
||||
|
||||
void spu_invalidate_slbs(struct spu *spu)
|
||||
{
|
||||
struct spu_priv2 __iomem *priv2 = spu->priv2;
|
||||
|
@ -65,12 +84,12 @@ void spu_flush_all_slbs(struct mm_struct *mm)
|
|||
struct spu *spu;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&spu_list_lock, flags);
|
||||
spin_lock_irqsave(&spu_full_list_lock, flags);
|
||||
list_for_each_entry(spu, &spu_full_list, full_list) {
|
||||
if (spu->mm == mm)
|
||||
spu_invalidate_slbs(spu);
|
||||
}
|
||||
spin_unlock_irqrestore(&spu_list_lock, flags);
|
||||
spin_unlock_irqrestore(&spu_full_list_lock, flags);
|
||||
}
|
||||
|
||||
/* The hack below stinks... try to do something better one of
|
||||
|
@ -88,9 +107,9 @@ void spu_associate_mm(struct spu *spu, struct mm_struct *mm)
|
|||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&spu_list_lock, flags);
|
||||
spin_lock_irqsave(&spu_full_list_lock, flags);
|
||||
spu->mm = mm;
|
||||
spin_unlock_irqrestore(&spu_list_lock, flags);
|
||||
spin_unlock_irqrestore(&spu_full_list_lock, flags);
|
||||
if (mm)
|
||||
mm_needs_global_tlbie(mm);
|
||||
}
|
||||
|
@ -390,7 +409,7 @@ static void spu_free_irqs(struct spu *spu)
|
|||
free_irq(spu->irqs[2], spu);
|
||||
}
|
||||
|
||||
static void spu_init_channels(struct spu *spu)
|
||||
void spu_init_channels(struct spu *spu)
|
||||
{
|
||||
static const struct {
|
||||
unsigned channel;
|
||||
|
@ -423,46 +442,7 @@ static void spu_init_channels(struct spu *spu)
|
|||
out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count);
|
||||
}
|
||||
}
|
||||
|
||||
struct spu *spu_alloc_node(int node)
|
||||
{
|
||||
struct spu *spu = NULL;
|
||||
|
||||
mutex_lock(&spu_mutex);
|
||||
if (!list_empty(&spu_list[node])) {
|
||||
spu = list_entry(spu_list[node].next, struct spu, list);
|
||||
list_del_init(&spu->list);
|
||||
pr_debug("Got SPU %d %d\n", spu->number, spu->node);
|
||||
}
|
||||
mutex_unlock(&spu_mutex);
|
||||
|
||||
if (spu)
|
||||
spu_init_channels(spu);
|
||||
return spu;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(spu_alloc_node);
|
||||
|
||||
struct spu *spu_alloc(void)
|
||||
{
|
||||
struct spu *spu = NULL;
|
||||
int node;
|
||||
|
||||
for (node = 0; node < MAX_NUMNODES; node++) {
|
||||
spu = spu_alloc_node(node);
|
||||
if (spu)
|
||||
break;
|
||||
}
|
||||
|
||||
return spu;
|
||||
}
|
||||
|
||||
void spu_free(struct spu *spu)
|
||||
{
|
||||
mutex_lock(&spu_mutex);
|
||||
list_add_tail(&spu->list, &spu_list[spu->node]);
|
||||
mutex_unlock(&spu_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(spu_free);
|
||||
EXPORT_SYMBOL_GPL(spu_init_channels);
|
||||
|
||||
static int spu_shutdown(struct sys_device *sysdev)
|
||||
{
|
||||
|
@ -481,12 +461,12 @@ struct sysdev_class spu_sysdev_class = {
|
|||
int spu_add_sysdev_attr(struct sysdev_attribute *attr)
|
||||
{
|
||||
struct spu *spu;
|
||||
mutex_lock(&spu_mutex);
|
||||
|
||||
mutex_lock(&spu_full_list_mutex);
|
||||
list_for_each_entry(spu, &spu_full_list, full_list)
|
||||
sysdev_create_file(&spu->sysdev, attr);
|
||||
mutex_unlock(&spu_full_list_mutex);
|
||||
|
||||
mutex_unlock(&spu_mutex);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(spu_add_sysdev_attr);
|
||||
|
@ -494,12 +474,12 @@ EXPORT_SYMBOL_GPL(spu_add_sysdev_attr);
|
|||
int spu_add_sysdev_attr_group(struct attribute_group *attrs)
|
||||
{
|
||||
struct spu *spu;
|
||||
mutex_lock(&spu_mutex);
|
||||
|
||||
mutex_lock(&spu_full_list_mutex);
|
||||
list_for_each_entry(spu, &spu_full_list, full_list)
|
||||
sysfs_create_group(&spu->sysdev.kobj, attrs);
|
||||
mutex_unlock(&spu_full_list_mutex);
|
||||
|
||||
mutex_unlock(&spu_mutex);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group);
|
||||
|
@ -508,24 +488,22 @@ EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group);
|
|||
void spu_remove_sysdev_attr(struct sysdev_attribute *attr)
|
||||
{
|
||||
struct spu *spu;
|
||||
mutex_lock(&spu_mutex);
|
||||
|
||||
mutex_lock(&spu_full_list_mutex);
|
||||
list_for_each_entry(spu, &spu_full_list, full_list)
|
||||
sysdev_remove_file(&spu->sysdev, attr);
|
||||
|
||||
mutex_unlock(&spu_mutex);
|
||||
mutex_unlock(&spu_full_list_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr);
|
||||
|
||||
void spu_remove_sysdev_attr_group(struct attribute_group *attrs)
|
||||
{
|
||||
struct spu *spu;
|
||||
mutex_lock(&spu_mutex);
|
||||
|
||||
mutex_lock(&spu_full_list_mutex);
|
||||
list_for_each_entry(spu, &spu_full_list, full_list)
|
||||
sysfs_remove_group(&spu->sysdev.kobj, attrs);
|
||||
|
||||
mutex_unlock(&spu_mutex);
|
||||
mutex_unlock(&spu_full_list_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr_group);
|
||||
|
||||
|
@ -553,16 +531,19 @@ static int __init create_spu(void *data)
|
|||
int ret;
|
||||
static int number;
|
||||
unsigned long flags;
|
||||
struct timespec ts;
|
||||
|
||||
ret = -ENOMEM;
|
||||
spu = kzalloc(sizeof (*spu), GFP_KERNEL);
|
||||
if (!spu)
|
||||
goto out;
|
||||
|
||||
spu->alloc_state = SPU_FREE;
|
||||
|
||||
spin_lock_init(&spu->register_lock);
|
||||
mutex_lock(&spu_mutex);
|
||||
spin_lock(&spu_lock);
|
||||
spu->number = number++;
|
||||
mutex_unlock(&spu_mutex);
|
||||
spin_unlock(&spu_lock);
|
||||
|
||||
ret = spu_create_spu(spu, data);
|
||||
|
||||
|
@ -579,15 +560,22 @@ static int __init create_spu(void *data)
|
|||
if (ret)
|
||||
goto out_free_irqs;
|
||||
|
||||
mutex_lock(&spu_mutex);
|
||||
spin_lock_irqsave(&spu_list_lock, flags);
|
||||
list_add(&spu->list, &spu_list[spu->node]);
|
||||
list_add(&spu->full_list, &spu_full_list);
|
||||
spin_unlock_irqrestore(&spu_list_lock, flags);
|
||||
mutex_unlock(&spu_mutex);
|
||||
mutex_lock(&cbe_spu_info[spu->node].list_mutex);
|
||||
list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus);
|
||||
cbe_spu_info[spu->node].n_spus++;
|
||||
mutex_unlock(&cbe_spu_info[spu->node].list_mutex);
|
||||
|
||||
spu->stats.utilization_state = SPU_UTIL_IDLE;
|
||||
spu->stats.tstamp = jiffies;
|
||||
mutex_lock(&spu_full_list_mutex);
|
||||
spin_lock_irqsave(&spu_full_list_lock, flags);
|
||||
list_add(&spu->full_list, &spu_full_list);
|
||||
spin_unlock_irqrestore(&spu_full_list_lock, flags);
|
||||
mutex_unlock(&spu_full_list_mutex);
|
||||
|
||||
spu->stats.util_state = SPU_UTIL_IDLE_LOADED;
|
||||
ktime_get_ts(&ts);
|
||||
spu->stats.tstamp = timespec_to_ns(&ts);
|
||||
|
||||
INIT_LIST_HEAD(&spu->aff_list);
|
||||
|
||||
goto out;
|
||||
|
||||
|
@ -608,12 +596,20 @@ static const char *spu_state_names[] = {
|
|||
static unsigned long long spu_acct_time(struct spu *spu,
|
||||
enum spu_utilization_state state)
|
||||
{
|
||||
struct timespec ts;
|
||||
unsigned long long time = spu->stats.times[state];
|
||||
|
||||
if (spu->stats.utilization_state == state)
|
||||
time += jiffies - spu->stats.tstamp;
|
||||
/*
|
||||
* If the spu is idle or the context is stopped, utilization
|
||||
* statistics are not updated. Apply the time delta from the
|
||||
* last recorded state of the spu.
|
||||
*/
|
||||
if (spu->stats.util_state == state) {
|
||||
ktime_get_ts(&ts);
|
||||
time += timespec_to_ns(&ts) - spu->stats.tstamp;
|
||||
}
|
||||
|
||||
return jiffies_to_msecs(time);
|
||||
return time / NSEC_PER_MSEC;
|
||||
}
|
||||
|
||||
|
||||
|
@ -623,11 +619,11 @@ static ssize_t spu_stat_show(struct sys_device *sysdev, char *buf)
|
|||
|
||||
return sprintf(buf, "%s %llu %llu %llu %llu "
|
||||
"%llu %llu %llu %llu %llu %llu %llu %llu\n",
|
||||
spu_state_names[spu->stats.utilization_state],
|
||||
spu_state_names[spu->stats.util_state],
|
||||
spu_acct_time(spu, SPU_UTIL_USER),
|
||||
spu_acct_time(spu, SPU_UTIL_SYSTEM),
|
||||
spu_acct_time(spu, SPU_UTIL_IOWAIT),
|
||||
spu_acct_time(spu, SPU_UTIL_IDLE),
|
||||
spu_acct_time(spu, SPU_UTIL_IDLE_LOADED),
|
||||
spu->stats.vol_ctx_switch,
|
||||
spu->stats.invol_ctx_switch,
|
||||
spu->stats.slb_flt,
|
||||
|
@ -640,12 +636,146 @@ static ssize_t spu_stat_show(struct sys_device *sysdev, char *buf)
|
|||
|
||||
static SYSDEV_ATTR(stat, 0644, spu_stat_show, NULL);
|
||||
|
||||
/* Hardcoded affinity idxs for QS20 */
|
||||
#define SPES_PER_BE 8
|
||||
static int QS20_reg_idxs[SPES_PER_BE] = { 0, 2, 4, 6, 7, 5, 3, 1 };
|
||||
static int QS20_reg_memory[SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 };
|
||||
|
||||
static struct spu *spu_lookup_reg(int node, u32 reg)
|
||||
{
|
||||
struct spu *spu;
|
||||
|
||||
list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
|
||||
if (*(u32 *)get_property(spu_devnode(spu), "reg", NULL) == reg)
|
||||
return spu;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void init_aff_QS20_harcoded(void)
|
||||
{
|
||||
int node, i;
|
||||
struct spu *last_spu, *spu;
|
||||
u32 reg;
|
||||
|
||||
for (node = 0; node < MAX_NUMNODES; node++) {
|
||||
last_spu = NULL;
|
||||
for (i = 0; i < SPES_PER_BE; i++) {
|
||||
reg = QS20_reg_idxs[i];
|
||||
spu = spu_lookup_reg(node, reg);
|
||||
if (!spu)
|
||||
continue;
|
||||
spu->has_mem_affinity = QS20_reg_memory[reg];
|
||||
if (last_spu)
|
||||
list_add_tail(&spu->aff_list,
|
||||
&last_spu->aff_list);
|
||||
last_spu = spu;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int of_has_vicinity(void)
|
||||
{
|
||||
struct spu* spu;
|
||||
|
||||
spu = list_entry(cbe_spu_info[0].spus.next, struct spu, cbe_list);
|
||||
return of_find_property(spu_devnode(spu), "vicinity", NULL) != NULL;
|
||||
}
|
||||
|
||||
static struct spu *aff_devnode_spu(int cbe, struct device_node *dn)
|
||||
{
|
||||
struct spu *spu;
|
||||
|
||||
list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list)
|
||||
if (spu_devnode(spu) == dn)
|
||||
return spu;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct spu *
|
||||
aff_node_next_to(int cbe, struct device_node *target, struct device_node *avoid)
|
||||
{
|
||||
struct spu *spu;
|
||||
const phandle *vic_handles;
|
||||
int lenp, i;
|
||||
|
||||
list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) {
|
||||
if (spu_devnode(spu) == avoid)
|
||||
continue;
|
||||
vic_handles = get_property(spu_devnode(spu), "vicinity", &lenp);
|
||||
for (i=0; i < (lenp / sizeof(phandle)); i++) {
|
||||
if (vic_handles[i] == target->linux_phandle)
|
||||
return spu;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void init_aff_fw_vicinity_node(int cbe)
|
||||
{
|
||||
struct spu *spu, *last_spu;
|
||||
struct device_node *vic_dn, *last_spu_dn;
|
||||
phandle avoid_ph;
|
||||
const phandle *vic_handles;
|
||||
const char *name;
|
||||
int lenp, i, added, mem_aff;
|
||||
|
||||
last_spu = list_entry(cbe_spu_info[cbe].spus.next, struct spu, cbe_list);
|
||||
avoid_ph = 0;
|
||||
for (added = 1; added < cbe_spu_info[cbe].n_spus; added++) {
|
||||
last_spu_dn = spu_devnode(last_spu);
|
||||
vic_handles = get_property(last_spu_dn, "vicinity", &lenp);
|
||||
|
||||
for (i = 0; i < (lenp / sizeof(phandle)); i++) {
|
||||
if (vic_handles[i] == avoid_ph)
|
||||
continue;
|
||||
|
||||
vic_dn = of_find_node_by_phandle(vic_handles[i]);
|
||||
if (!vic_dn)
|
||||
continue;
|
||||
|
||||
name = get_property(vic_dn, "name", NULL);
|
||||
if (strcmp(name, "spe") == 0) {
|
||||
spu = aff_devnode_spu(cbe, vic_dn);
|
||||
avoid_ph = last_spu_dn->linux_phandle;
|
||||
}
|
||||
else {
|
||||
mem_aff = strcmp(name, "mic-tm") == 0;
|
||||
spu = aff_node_next_to(cbe, vic_dn, last_spu_dn);
|
||||
if (!spu)
|
||||
continue;
|
||||
if (mem_aff) {
|
||||
last_spu->has_mem_affinity = 1;
|
||||
spu->has_mem_affinity = 1;
|
||||
}
|
||||
avoid_ph = vic_dn->linux_phandle;
|
||||
}
|
||||
list_add_tail(&spu->aff_list, &last_spu->aff_list);
|
||||
last_spu = spu;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void init_aff_fw_vicinity(void)
|
||||
{
|
||||
int cbe;
|
||||
|
||||
/* sets has_mem_affinity for each spu, as long as the
|
||||
* spu->aff_list list, linking each spu to its neighbors
|
||||
*/
|
||||
for (cbe = 0; cbe < MAX_NUMNODES; cbe++)
|
||||
init_aff_fw_vicinity_node(cbe);
|
||||
}
|
||||
|
||||
static int __init init_spu_base(void)
|
||||
{
|
||||
int i, ret = 0;
|
||||
|
||||
for (i = 0; i < MAX_NUMNODES; i++)
|
||||
INIT_LIST_HEAD(&spu_list[i]);
|
||||
for (i = 0; i < MAX_NUMNODES; i++) {
|
||||
mutex_init(&cbe_spu_info[i].list_mutex);
|
||||
INIT_LIST_HEAD(&cbe_spu_info[i].spus);
|
||||
}
|
||||
|
||||
if (!spu_management_ops)
|
||||
goto out;
|
||||
|
@ -675,16 +805,25 @@ static int __init init_spu_base(void)
|
|||
fb_append_extra_logo(&logo_spe_clut224, ret);
|
||||
}
|
||||
|
||||
mutex_lock(&spu_full_list_mutex);
|
||||
xmon_register_spus(&spu_full_list);
|
||||
|
||||
crash_register_spus(&spu_full_list);
|
||||
mutex_unlock(&spu_full_list_mutex);
|
||||
spu_add_sysdev_attr(&attr_stat);
|
||||
|
||||
if (of_has_vicinity()) {
|
||||
init_aff_fw_vicinity();
|
||||
} else {
|
||||
long root = of_get_flat_dt_root();
|
||||
if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
|
||||
init_aff_QS20_harcoded();
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
out_unregister_sysdev_class:
|
||||
sysdev_class_unregister(&spu_sysdev_class);
|
||||
out:
|
||||
|
||||
return ret;
|
||||
}
|
||||
module_init(init_spu_base);
|
||||
|
|
|
@ -34,14 +34,27 @@ struct spufs_calls spufs_calls = {
|
|||
* this file is not used and the syscalls directly enter the fs code */
|
||||
|
||||
asmlinkage long sys_spu_create(const char __user *name,
|
||||
unsigned int flags, mode_t mode)
|
||||
unsigned int flags, mode_t mode, int neighbor_fd)
|
||||
{
|
||||
long ret;
|
||||
struct module *owner = spufs_calls.owner;
|
||||
struct file *neighbor;
|
||||
int fput_needed;
|
||||
|
||||
ret = -ENOSYS;
|
||||
if (owner && try_module_get(owner)) {
|
||||
ret = spufs_calls.create_thread(name, flags, mode);
|
||||
if (flags & SPU_CREATE_AFFINITY_SPU) {
|
||||
neighbor = fget_light(neighbor_fd, &fput_needed);
|
||||
if (neighbor) {
|
||||
ret = spufs_calls.create_thread(name, flags,
|
||||
mode, neighbor);
|
||||
fput_light(neighbor, fput_needed);
|
||||
}
|
||||
}
|
||||
else {
|
||||
ret = spufs_calls.create_thread(name, flags,
|
||||
mode, NULL);
|
||||
}
|
||||
module_put(owner);
|
||||
}
|
||||
return ret;
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <asm/spu.h>
|
||||
|
@ -55,12 +56,12 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang)
|
|||
ctx->ops = &spu_backing_ops;
|
||||
ctx->owner = get_task_mm(current);
|
||||
INIT_LIST_HEAD(&ctx->rq);
|
||||
INIT_LIST_HEAD(&ctx->aff_list);
|
||||
if (gang)
|
||||
spu_gang_add_ctx(gang, ctx);
|
||||
ctx->cpus_allowed = current->cpus_allowed;
|
||||
spu_set_timeslice(ctx);
|
||||
ctx->stats.execution_state = SPUCTX_UTIL_USER;
|
||||
ctx->stats.tstamp = jiffies;
|
||||
ctx->stats.util_state = SPU_UTIL_IDLE_LOADED;
|
||||
|
||||
atomic_inc(&nr_spu_contexts);
|
||||
goto out;
|
||||
|
@ -81,6 +82,8 @@ void destroy_spu_context(struct kref *kref)
|
|||
spu_fini_csa(&ctx->csa);
|
||||
if (ctx->gang)
|
||||
spu_gang_remove_ctx(ctx->gang, ctx);
|
||||
if (ctx->prof_priv_kref)
|
||||
kref_put(ctx->prof_priv_kref, ctx->prof_priv_release);
|
||||
BUG_ON(!list_empty(&ctx->rq));
|
||||
atomic_dec(&nr_spu_contexts);
|
||||
kfree(ctx);
|
||||
|
@ -166,6 +169,39 @@ int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags)
|
|||
void spu_acquire_saved(struct spu_context *ctx)
|
||||
{
|
||||
spu_acquire(ctx);
|
||||
if (ctx->state != SPU_STATE_SAVED)
|
||||
if (ctx->state != SPU_STATE_SAVED) {
|
||||
set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags);
|
||||
spu_deactivate(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* spu_release_saved - unlock spu context and return it to the runqueue
|
||||
* @ctx: context to unlock
|
||||
*/
|
||||
void spu_release_saved(struct spu_context *ctx)
|
||||
{
|
||||
BUG_ON(ctx->state != SPU_STATE_SAVED);
|
||||
|
||||
if (test_and_clear_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags))
|
||||
spu_activate(ctx, 0);
|
||||
|
||||
spu_release(ctx);
|
||||
}
|
||||
|
||||
void spu_set_profile_private_kref(struct spu_context *ctx,
|
||||
struct kref *prof_info_kref,
|
||||
void ( * prof_info_release) (struct kref *kref))
|
||||
{
|
||||
ctx->prof_priv_kref = prof_info_kref;
|
||||
ctx->prof_priv_release = prof_info_release;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(spu_set_profile_private_kref);
|
||||
|
||||
void *spu_get_profile_private_kref(struct spu_context *ctx)
|
||||
{
|
||||
return ctx->prof_priv_kref;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(spu_get_profile_private_kref);
|
||||
|
||||
|
||||
|
|
|
@ -226,7 +226,7 @@ static void spufs_arch_write_notes(struct file *file)
|
|||
spu_acquire_saved(ctx_info->ctx);
|
||||
for (j = 0; j < spufs_coredump_num_notes; j++)
|
||||
spufs_arch_write_note(ctx_info, j, file);
|
||||
spu_release(ctx_info->ctx);
|
||||
spu_release_saved(ctx_info->ctx);
|
||||
list_del(&ctx_info->list);
|
||||
kfree(ctx_info);
|
||||
}
|
||||
|
|
|
@ -179,16 +179,14 @@ int spufs_handle_class1(struct spu_context *ctx)
|
|||
if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)))
|
||||
return 0;
|
||||
|
||||
spuctx_switch_state(ctx, SPUCTX_UTIL_IOWAIT);
|
||||
spuctx_switch_state(ctx, SPU_UTIL_IOWAIT);
|
||||
|
||||
pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea,
|
||||
dsisr, ctx->state);
|
||||
|
||||
ctx->stats.hash_flt++;
|
||||
if (ctx->state == SPU_STATE_RUNNABLE) {
|
||||
if (ctx->state == SPU_STATE_RUNNABLE)
|
||||
ctx->spu->stats.hash_flt++;
|
||||
spu_switch_state(ctx->spu, SPU_UTIL_IOWAIT);
|
||||
}
|
||||
|
||||
/* we must not hold the lock when entering spu_handle_mm_fault */
|
||||
spu_release(ctx);
|
||||
|
@ -226,7 +224,7 @@ int spufs_handle_class1(struct spu_context *ctx)
|
|||
} else
|
||||
spufs_handle_dma_error(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE);
|
||||
|
||||
spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM);
|
||||
spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(spufs_handle_class1);
|
||||
|
|
|
@ -370,7 +370,7 @@ spufs_regs_read(struct file *file, char __user *buffer,
|
|||
|
||||
spu_acquire_saved(ctx);
|
||||
ret = __spufs_regs_read(ctx, buffer, size, pos);
|
||||
spu_release(ctx);
|
||||
spu_release_saved(ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -392,7 +392,7 @@ spufs_regs_write(struct file *file, const char __user *buffer,
|
|||
ret = copy_from_user(lscsa->gprs + *pos - size,
|
||||
buffer, size) ? -EFAULT : size;
|
||||
|
||||
spu_release(ctx);
|
||||
spu_release_saved(ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -421,7 +421,7 @@ spufs_fpcr_read(struct file *file, char __user * buffer,
|
|||
|
||||
spu_acquire_saved(ctx);
|
||||
ret = __spufs_fpcr_read(ctx, buffer, size, pos);
|
||||
spu_release(ctx);
|
||||
spu_release_saved(ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -443,7 +443,7 @@ spufs_fpcr_write(struct file *file, const char __user * buffer,
|
|||
ret = copy_from_user((char *)&lscsa->fpcr + *pos - size,
|
||||
buffer, size) ? -EFAULT : size;
|
||||
|
||||
spu_release(ctx);
|
||||
spu_release_saved(ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -868,7 +868,7 @@ static ssize_t spufs_signal1_read(struct file *file, char __user *buf,
|
|||
|
||||
spu_acquire_saved(ctx);
|
||||
ret = __spufs_signal1_read(ctx, buf, len, pos);
|
||||
spu_release(ctx);
|
||||
spu_release_saved(ctx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -934,6 +934,13 @@ static const struct file_operations spufs_signal1_fops = {
|
|||
.mmap = spufs_signal1_mmap,
|
||||
};
|
||||
|
||||
static const struct file_operations spufs_signal1_nosched_fops = {
|
||||
.open = spufs_signal1_open,
|
||||
.release = spufs_signal1_release,
|
||||
.write = spufs_signal1_write,
|
||||
.mmap = spufs_signal1_mmap,
|
||||
};
|
||||
|
||||
static int spufs_signal2_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct spufs_inode_info *i = SPUFS_I(inode);
|
||||
|
@ -992,7 +999,7 @@ static ssize_t spufs_signal2_read(struct file *file, char __user *buf,
|
|||
|
||||
spu_acquire_saved(ctx);
|
||||
ret = __spufs_signal2_read(ctx, buf, len, pos);
|
||||
spu_release(ctx);
|
||||
spu_release_saved(ctx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1062,6 +1069,13 @@ static const struct file_operations spufs_signal2_fops = {
|
|||
.mmap = spufs_signal2_mmap,
|
||||
};
|
||||
|
||||
static const struct file_operations spufs_signal2_nosched_fops = {
|
||||
.open = spufs_signal2_open,
|
||||
.release = spufs_signal2_release,
|
||||
.write = spufs_signal2_write,
|
||||
.mmap = spufs_signal2_mmap,
|
||||
};
|
||||
|
||||
static void spufs_signal1_type_set(void *data, u64 val)
|
||||
{
|
||||
struct spu_context *ctx = data;
|
||||
|
@ -1612,7 +1626,7 @@ static void spufs_decr_set(void *data, u64 val)
|
|||
struct spu_lscsa *lscsa = ctx->csa.lscsa;
|
||||
spu_acquire_saved(ctx);
|
||||
lscsa->decr.slot[0] = (u32) val;
|
||||
spu_release(ctx);
|
||||
spu_release_saved(ctx);
|
||||
}
|
||||
|
||||
static u64 __spufs_decr_get(void *data)
|
||||
|
@ -1628,7 +1642,7 @@ static u64 spufs_decr_get(void *data)
|
|||
u64 ret;
|
||||
spu_acquire_saved(ctx);
|
||||
ret = __spufs_decr_get(data);
|
||||
spu_release(ctx);
|
||||
spu_release_saved(ctx);
|
||||
return ret;
|
||||
}
|
||||
DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set,
|
||||
|
@ -1637,17 +1651,21 @@ DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set,
|
|||
static void spufs_decr_status_set(void *data, u64 val)
|
||||
{
|
||||
struct spu_context *ctx = data;
|
||||
struct spu_lscsa *lscsa = ctx->csa.lscsa;
|
||||
spu_acquire_saved(ctx);
|
||||
lscsa->decr_status.slot[0] = (u32) val;
|
||||
spu_release(ctx);
|
||||
if (val)
|
||||
ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING;
|
||||
else
|
||||
ctx->csa.priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING;
|
||||
spu_release_saved(ctx);
|
||||
}
|
||||
|
||||
static u64 __spufs_decr_status_get(void *data)
|
||||
{
|
||||
struct spu_context *ctx = data;
|
||||
struct spu_lscsa *lscsa = ctx->csa.lscsa;
|
||||
return lscsa->decr_status.slot[0];
|
||||
if (ctx->csa.priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING)
|
||||
return SPU_DECR_STATUS_RUNNING;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 spufs_decr_status_get(void *data)
|
||||
|
@ -1656,7 +1674,7 @@ static u64 spufs_decr_status_get(void *data)
|
|||
u64 ret;
|
||||
spu_acquire_saved(ctx);
|
||||
ret = __spufs_decr_status_get(data);
|
||||
spu_release(ctx);
|
||||
spu_release_saved(ctx);
|
||||
return ret;
|
||||
}
|
||||
DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get,
|
||||
|
@ -1668,7 +1686,7 @@ static void spufs_event_mask_set(void *data, u64 val)
|
|||
struct spu_lscsa *lscsa = ctx->csa.lscsa;
|
||||
spu_acquire_saved(ctx);
|
||||
lscsa->event_mask.slot[0] = (u32) val;
|
||||
spu_release(ctx);
|
||||
spu_release_saved(ctx);
|
||||
}
|
||||
|
||||
static u64 __spufs_event_mask_get(void *data)
|
||||
|
@ -1684,7 +1702,7 @@ static u64 spufs_event_mask_get(void *data)
|
|||
u64 ret;
|
||||
spu_acquire_saved(ctx);
|
||||
ret = __spufs_event_mask_get(data);
|
||||
spu_release(ctx);
|
||||
spu_release_saved(ctx);
|
||||
return ret;
|
||||
}
|
||||
DEFINE_SIMPLE_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get,
|
||||
|
@ -1708,7 +1726,7 @@ static u64 spufs_event_status_get(void *data)
|
|||
|
||||
spu_acquire_saved(ctx);
|
||||
ret = __spufs_event_status_get(data);
|
||||
spu_release(ctx);
|
||||
spu_release_saved(ctx);
|
||||
return ret;
|
||||
}
|
||||
DEFINE_SIMPLE_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get,
|
||||
|
@ -1720,7 +1738,7 @@ static void spufs_srr0_set(void *data, u64 val)
|
|||
struct spu_lscsa *lscsa = ctx->csa.lscsa;
|
||||
spu_acquire_saved(ctx);
|
||||
lscsa->srr0.slot[0] = (u32) val;
|
||||
spu_release(ctx);
|
||||
spu_release_saved(ctx);
|
||||
}
|
||||
|
||||
static u64 spufs_srr0_get(void *data)
|
||||
|
@ -1730,7 +1748,7 @@ static u64 spufs_srr0_get(void *data)
|
|||
u64 ret;
|
||||
spu_acquire_saved(ctx);
|
||||
ret = lscsa->srr0.slot[0];
|
||||
spu_release(ctx);
|
||||
spu_release_saved(ctx);
|
||||
return ret;
|
||||
}
|
||||
DEFINE_SIMPLE_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set,
|
||||
|
@ -1786,7 +1804,7 @@ static u64 spufs_lslr_get(void *data)
|
|||
|
||||
spu_acquire_saved(ctx);
|
||||
ret = __spufs_lslr_get(data);
|
||||
spu_release(ctx);
|
||||
spu_release_saved(ctx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1850,7 +1868,7 @@ static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf,
|
|||
spin_lock(&ctx->csa.register_lock);
|
||||
ret = __spufs_mbox_info_read(ctx, buf, len, pos);
|
||||
spin_unlock(&ctx->csa.register_lock);
|
||||
spu_release(ctx);
|
||||
spu_release_saved(ctx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1888,7 +1906,7 @@ static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf,
|
|||
spin_lock(&ctx->csa.register_lock);
|
||||
ret = __spufs_ibox_info_read(ctx, buf, len, pos);
|
||||
spin_unlock(&ctx->csa.register_lock);
|
||||
spu_release(ctx);
|
||||
spu_release_saved(ctx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1929,7 +1947,7 @@ static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf,
|
|||
spin_lock(&ctx->csa.register_lock);
|
||||
ret = __spufs_wbox_info_read(ctx, buf, len, pos);
|
||||
spin_unlock(&ctx->csa.register_lock);
|
||||
spu_release(ctx);
|
||||
spu_release_saved(ctx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1979,7 +1997,7 @@ static ssize_t spufs_dma_info_read(struct file *file, char __user *buf,
|
|||
spin_lock(&ctx->csa.register_lock);
|
||||
ret = __spufs_dma_info_read(ctx, buf, len, pos);
|
||||
spin_unlock(&ctx->csa.register_lock);
|
||||
spu_release(ctx);
|
||||
spu_release_saved(ctx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -2030,7 +2048,7 @@ static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf,
|
|||
spin_lock(&ctx->csa.register_lock);
|
||||
ret = __spufs_proxydma_info_read(ctx, buf, len, pos);
|
||||
spin_unlock(&ctx->csa.register_lock);
|
||||
spu_release(ctx);
|
||||
spu_release_saved(ctx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -2065,14 +2083,26 @@ static const char *ctx_state_names[] = {
|
|||
};
|
||||
|
||||
static unsigned long long spufs_acct_time(struct spu_context *ctx,
|
||||
enum spuctx_execution_state state)
|
||||
enum spu_utilization_state state)
|
||||
{
|
||||
unsigned long time = ctx->stats.times[state];
|
||||
struct timespec ts;
|
||||
unsigned long long time = ctx->stats.times[state];
|
||||
|
||||
if (ctx->stats.execution_state == state)
|
||||
time += jiffies - ctx->stats.tstamp;
|
||||
/*
|
||||
* In general, utilization statistics are updated by the controlling
|
||||
* thread as the spu context moves through various well defined
|
||||
* state transitions, but if the context is lazily loaded its
|
||||
* utilization statistics are not updated as the controlling thread
|
||||
* is not tightly coupled with the execution of the spu context. We
|
||||
* calculate and apply the time delta from the last recorded state
|
||||
* of the spu context.
|
||||
*/
|
||||
if (ctx->spu && ctx->stats.util_state == state) {
|
||||
ktime_get_ts(&ts);
|
||||
time += timespec_to_ns(&ts) - ctx->stats.tstamp;
|
||||
}
|
||||
|
||||
return jiffies_to_msecs(time);
|
||||
return time / NSEC_PER_MSEC;
|
||||
}
|
||||
|
||||
static unsigned long long spufs_slb_flts(struct spu_context *ctx)
|
||||
|
@ -2107,11 +2137,11 @@ static int spufs_show_stat(struct seq_file *s, void *private)
|
|||
spu_acquire(ctx);
|
||||
seq_printf(s, "%s %llu %llu %llu %llu "
|
||||
"%llu %llu %llu %llu %llu %llu %llu %llu\n",
|
||||
ctx_state_names[ctx->stats.execution_state],
|
||||
spufs_acct_time(ctx, SPUCTX_UTIL_USER),
|
||||
spufs_acct_time(ctx, SPUCTX_UTIL_SYSTEM),
|
||||
spufs_acct_time(ctx, SPUCTX_UTIL_IOWAIT),
|
||||
spufs_acct_time(ctx, SPUCTX_UTIL_LOADED),
|
||||
ctx_state_names[ctx->stats.util_state],
|
||||
spufs_acct_time(ctx, SPU_UTIL_USER),
|
||||
spufs_acct_time(ctx, SPU_UTIL_SYSTEM),
|
||||
spufs_acct_time(ctx, SPU_UTIL_IOWAIT),
|
||||
spufs_acct_time(ctx, SPU_UTIL_IDLE_LOADED),
|
||||
ctx->stats.vol_ctx_switch,
|
||||
ctx->stats.invol_ctx_switch,
|
||||
spufs_slb_flts(ctx),
|
||||
|
@ -2184,8 +2214,8 @@ struct tree_descr spufs_dir_nosched_contents[] = {
|
|||
{ "mbox_stat", &spufs_mbox_stat_fops, 0444, },
|
||||
{ "ibox_stat", &spufs_ibox_stat_fops, 0444, },
|
||||
{ "wbox_stat", &spufs_wbox_stat_fops, 0444, },
|
||||
{ "signal1", &spufs_signal1_fops, 0666, },
|
||||
{ "signal2", &spufs_signal2_fops, 0666, },
|
||||
{ "signal1", &spufs_signal1_nosched_fops, 0222, },
|
||||
{ "signal2", &spufs_signal2_nosched_fops, 0222, },
|
||||
{ "signal1_type", &spufs_signal1_type, 0666, },
|
||||
{ "signal2_type", &spufs_signal2_type, 0666, },
|
||||
{ "mss", &spufs_mss_fops, 0666, },
|
||||
|
|
|
@ -35,7 +35,9 @@ struct spu_gang *alloc_spu_gang(void)
|
|||
|
||||
kref_init(&gang->kref);
|
||||
mutex_init(&gang->mutex);
|
||||
mutex_init(&gang->aff_mutex);
|
||||
INIT_LIST_HEAD(&gang->list);
|
||||
INIT_LIST_HEAD(&gang->aff_list_head);
|
||||
|
||||
out:
|
||||
return gang;
|
||||
|
@ -73,6 +75,10 @@ void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx)
|
|||
{
|
||||
mutex_lock(&gang->mutex);
|
||||
WARN_ON(ctx->gang != gang);
|
||||
if (!list_empty(&ctx->aff_list)) {
|
||||
list_del_init(&ctx->aff_list);
|
||||
gang->aff_flags &= ~AFF_OFFSETS_SET;
|
||||
}
|
||||
list_del_init(&ctx->gang_list);
|
||||
gang->contexts--;
|
||||
mutex_unlock(&gang->mutex);
|
||||
|
|
|
@ -316,11 +316,107 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int spufs_create_context(struct inode *inode,
|
||||
struct dentry *dentry,
|
||||
struct vfsmount *mnt, int flags, int mode)
|
||||
static struct spu_context *
|
||||
spufs_assert_affinity(unsigned int flags, struct spu_gang *gang,
|
||||
struct file *filp)
|
||||
{
|
||||
struct spu_context *tmp, *neighbor;
|
||||
int count, node;
|
||||
int aff_supp;
|
||||
|
||||
aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next,
|
||||
struct spu, cbe_list))->aff_list);
|
||||
|
||||
if (!aff_supp)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (flags & SPU_CREATE_GANG)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (flags & SPU_CREATE_AFFINITY_MEM &&
|
||||
gang->aff_ref_ctx &&
|
||||
gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM)
|
||||
return ERR_PTR(-EEXIST);
|
||||
|
||||
if (gang->aff_flags & AFF_MERGED)
|
||||
return ERR_PTR(-EBUSY);
|
||||
|
||||
neighbor = NULL;
|
||||
if (flags & SPU_CREATE_AFFINITY_SPU) {
|
||||
if (!filp || filp->f_op != &spufs_context_fops)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
neighbor = get_spu_context(
|
||||
SPUFS_I(filp->f_dentry->d_inode)->i_ctx);
|
||||
|
||||
if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) &&
|
||||
!list_is_last(&neighbor->aff_list, &gang->aff_list_head) &&
|
||||
!list_entry(neighbor->aff_list.next, struct spu_context,
|
||||
aff_list)->aff_head)
|
||||
return ERR_PTR(-EEXIST);
|
||||
|
||||
if (gang != neighbor->gang)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
count = 1;
|
||||
list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
|
||||
count++;
|
||||
if (list_empty(&neighbor->aff_list))
|
||||
count++;
|
||||
|
||||
for (node = 0; node < MAX_NUMNODES; node++) {
|
||||
if ((cbe_spu_info[node].n_spus - atomic_read(
|
||||
&cbe_spu_info[node].reserved_spus)) >= count)
|
||||
break;
|
||||
}
|
||||
|
||||
if (node == MAX_NUMNODES)
|
||||
return ERR_PTR(-EEXIST);
|
||||
}
|
||||
|
||||
return neighbor;
|
||||
}
|
||||
|
||||
static void
|
||||
spufs_set_affinity(unsigned int flags, struct spu_context *ctx,
|
||||
struct spu_context *neighbor)
|
||||
{
|
||||
if (flags & SPU_CREATE_AFFINITY_MEM)
|
||||
ctx->gang->aff_ref_ctx = ctx;
|
||||
|
||||
if (flags & SPU_CREATE_AFFINITY_SPU) {
|
||||
if (list_empty(&neighbor->aff_list)) {
|
||||
list_add_tail(&neighbor->aff_list,
|
||||
&ctx->gang->aff_list_head);
|
||||
neighbor->aff_head = 1;
|
||||
}
|
||||
|
||||
if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head)
|
||||
|| list_entry(neighbor->aff_list.next, struct spu_context,
|
||||
aff_list)->aff_head) {
|
||||
list_add(&ctx->aff_list, &neighbor->aff_list);
|
||||
} else {
|
||||
list_add_tail(&ctx->aff_list, &neighbor->aff_list);
|
||||
if (neighbor->aff_head) {
|
||||
neighbor->aff_head = 0;
|
||||
ctx->aff_head = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ctx->gang->aff_ref_ctx)
|
||||
ctx->gang->aff_ref_ctx = ctx;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
spufs_create_context(struct inode *inode, struct dentry *dentry,
|
||||
struct vfsmount *mnt, int flags, int mode,
|
||||
struct file *aff_filp)
|
||||
{
|
||||
int ret;
|
||||
int affinity;
|
||||
struct spu_gang *gang;
|
||||
struct spu_context *neighbor;
|
||||
|
||||
ret = -EPERM;
|
||||
if ((flags & SPU_CREATE_NOSCHED) &&
|
||||
|
@ -336,9 +432,29 @@ static int spufs_create_context(struct inode *inode,
|
|||
if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader)
|
||||
goto out_unlock;
|
||||
|
||||
gang = NULL;
|
||||
neighbor = NULL;
|
||||
affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU);
|
||||
if (affinity) {
|
||||
gang = SPUFS_I(inode)->i_gang;
|
||||
ret = -EINVAL;
|
||||
if (!gang)
|
||||
goto out_unlock;
|
||||
mutex_lock(&gang->aff_mutex);
|
||||
neighbor = spufs_assert_affinity(flags, gang, aff_filp);
|
||||
if (IS_ERR(neighbor)) {
|
||||
ret = PTR_ERR(neighbor);
|
||||
goto out_aff_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
goto out_aff_unlock;
|
||||
|
||||
if (affinity)
|
||||
spufs_set_affinity(flags, SPUFS_I(dentry->d_inode)->i_ctx,
|
||||
neighbor);
|
||||
|
||||
/*
|
||||
* get references for dget and mntget, will be released
|
||||
|
@ -352,6 +468,9 @@ static int spufs_create_context(struct inode *inode,
|
|||
goto out;
|
||||
}
|
||||
|
||||
out_aff_unlock:
|
||||
if (affinity)
|
||||
mutex_unlock(&gang->aff_mutex);
|
||||
out_unlock:
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
out:
|
||||
|
@ -450,7 +569,8 @@ out:
|
|||
|
||||
static struct file_system_type spufs_type;
|
||||
|
||||
long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode)
|
||||
long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode,
|
||||
struct file *filp)
|
||||
{
|
||||
struct dentry *dentry;
|
||||
int ret;
|
||||
|
@ -487,7 +607,7 @@ long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode)
|
|||
dentry, nd->mnt, mode);
|
||||
else
|
||||
return spufs_create_context(nd->dentry->d_inode,
|
||||
dentry, nd->mnt, flags, mode);
|
||||
dentry, nd->mnt, flags, mode, filp);
|
||||
|
||||
out_dput:
|
||||
dput(dentry);
|
||||
|
|
|
@ -18,15 +18,17 @@ void spufs_stop_callback(struct spu *spu)
|
|||
wake_up_all(&ctx->stop_wq);
|
||||
}
|
||||
|
||||
static inline int spu_stopped(struct spu_context *ctx, u32 * stat)
|
||||
static inline int spu_stopped(struct spu_context *ctx, u32 *stat)
|
||||
{
|
||||
struct spu *spu;
|
||||
u64 pte_fault;
|
||||
|
||||
*stat = ctx->ops->status_read(ctx);
|
||||
if (ctx->state != SPU_STATE_RUNNABLE)
|
||||
return 1;
|
||||
|
||||
spu = ctx->spu;
|
||||
if (ctx->state != SPU_STATE_RUNNABLE ||
|
||||
test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags))
|
||||
return 1;
|
||||
pte_fault = spu->dsisr &
|
||||
(MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED);
|
||||
return (!(*stat & SPU_STATUS_RUNNING) || pte_fault || spu->class_0_pending) ?
|
||||
|
@ -124,8 +126,10 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int spu_run_init(struct spu_context *ctx, u32 * npc)
|
||||
static int spu_run_init(struct spu_context *ctx, u32 *npc)
|
||||
{
|
||||
spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
|
||||
|
||||
if (ctx->flags & SPU_CREATE_ISOLATE) {
|
||||
unsigned long runcntl;
|
||||
|
||||
|
@ -151,16 +155,20 @@ static int spu_run_init(struct spu_context *ctx, u32 * npc)
|
|||
ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
|
||||
}
|
||||
|
||||
spuctx_switch_state(ctx, SPU_UTIL_USER);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int spu_run_fini(struct spu_context *ctx, u32 * npc,
|
||||
u32 * status)
|
||||
static int spu_run_fini(struct spu_context *ctx, u32 *npc,
|
||||
u32 *status)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
*status = ctx->ops->status_read(ctx);
|
||||
*npc = ctx->ops->npc_read(ctx);
|
||||
|
||||
spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
|
||||
spu_release(ctx);
|
||||
|
||||
if (signal_pending(current))
|
||||
|
@ -289,10 +297,10 @@ static inline int spu_process_events(struct spu_context *ctx)
|
|||
return ret;
|
||||
}
|
||||
|
||||
long spufs_run_spu(struct file *file, struct spu_context *ctx,
|
||||
u32 *npc, u32 *event)
|
||||
long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event)
|
||||
{
|
||||
int ret;
|
||||
struct spu *spu;
|
||||
u32 status;
|
||||
|
||||
if (mutex_lock_interruptible(&ctx->run_mutex))
|
||||
|
@ -328,6 +336,17 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx,
|
|||
ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status));
|
||||
if (unlikely(ret))
|
||||
break;
|
||||
spu = ctx->spu;
|
||||
if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE,
|
||||
&ctx->sched_flags))) {
|
||||
if (!(status & SPU_STATUS_STOPPED_BY_STOP)) {
|
||||
spu_switch_notify(spu, ctx);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
|
||||
|
||||
if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
|
||||
(status >> SPU_STOP_STATUS_SHIFT == 0x2104)) {
|
||||
ret = spu_process_callback(ctx);
|
||||
|
@ -356,6 +375,7 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx,
|
|||
(ctx->state == SPU_STATE_RUNNABLE))
|
||||
ctx->stats.libassist++;
|
||||
|
||||
|
||||
ctx->ops->master_stop(ctx);
|
||||
ret = spu_run_fini(ctx, npc, &status);
|
||||
spu_yield(ctx);
|
||||
|
|
|
@ -51,9 +51,6 @@ struct spu_prio_array {
|
|||
DECLARE_BITMAP(bitmap, MAX_PRIO);
|
||||
struct list_head runq[MAX_PRIO];
|
||||
spinlock_t runq_lock;
|
||||
struct list_head active_list[MAX_NUMNODES];
|
||||
struct mutex active_mutex[MAX_NUMNODES];
|
||||
int nr_active[MAX_NUMNODES];
|
||||
int nr_waiting;
|
||||
};
|
||||
|
||||
|
@ -127,7 +124,7 @@ void __spu_update_sched_info(struct spu_context *ctx)
|
|||
ctx->policy = current->policy;
|
||||
|
||||
/*
|
||||
* A lot of places that don't hold active_mutex poke into
|
||||
* A lot of places that don't hold list_mutex poke into
|
||||
* cpus_allowed, including grab_runnable_context which
|
||||
* already holds the runq_lock. So abuse runq_lock
|
||||
* to protect this field aswell.
|
||||
|
@ -141,9 +138,9 @@ void spu_update_sched_info(struct spu_context *ctx)
|
|||
{
|
||||
int node = ctx->spu->node;
|
||||
|
||||
mutex_lock(&spu_prio->active_mutex[node]);
|
||||
mutex_lock(&cbe_spu_info[node].list_mutex);
|
||||
__spu_update_sched_info(ctx);
|
||||
mutex_unlock(&spu_prio->active_mutex[node]);
|
||||
mutex_unlock(&cbe_spu_info[node].list_mutex);
|
||||
}
|
||||
|
||||
static int __node_allowed(struct spu_context *ctx, int node)
|
||||
|
@ -169,56 +166,56 @@ static int node_allowed(struct spu_context *ctx, int node)
|
|||
return rval;
|
||||
}
|
||||
|
||||
/**
|
||||
* spu_add_to_active_list - add spu to active list
|
||||
* @spu: spu to add to the active list
|
||||
*/
|
||||
static void spu_add_to_active_list(struct spu *spu)
|
||||
{
|
||||
int node = spu->node;
|
||||
|
||||
mutex_lock(&spu_prio->active_mutex[node]);
|
||||
spu_prio->nr_active[node]++;
|
||||
list_add_tail(&spu->list, &spu_prio->active_list[node]);
|
||||
mutex_unlock(&spu_prio->active_mutex[node]);
|
||||
}
|
||||
|
||||
static void __spu_remove_from_active_list(struct spu *spu)
|
||||
{
|
||||
list_del_init(&spu->list);
|
||||
spu_prio->nr_active[spu->node]--;
|
||||
}
|
||||
|
||||
/**
|
||||
* spu_remove_from_active_list - remove spu from active list
|
||||
* @spu: spu to remove from the active list
|
||||
*/
|
||||
static void spu_remove_from_active_list(struct spu *spu)
|
||||
{
|
||||
int node = spu->node;
|
||||
|
||||
mutex_lock(&spu_prio->active_mutex[node]);
|
||||
__spu_remove_from_active_list(spu);
|
||||
mutex_unlock(&spu_prio->active_mutex[node]);
|
||||
}
|
||||
|
||||
static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
|
||||
|
||||
static void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
|
||||
void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
|
||||
{
|
||||
blocking_notifier_call_chain(&spu_switch_notifier,
|
||||
ctx ? ctx->object_id : 0, spu);
|
||||
}
|
||||
|
||||
static void notify_spus_active(void)
|
||||
{
|
||||
int node;
|
||||
|
||||
/*
|
||||
* Wake up the active spu_contexts.
|
||||
*
|
||||
* When the awakened processes see their "notify_active" flag is set,
|
||||
* they will call spu_switch_notify();
|
||||
*/
|
||||
for_each_online_node(node) {
|
||||
struct spu *spu;
|
||||
|
||||
mutex_lock(&cbe_spu_info[node].list_mutex);
|
||||
list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
|
||||
if (spu->alloc_state != SPU_FREE) {
|
||||
struct spu_context *ctx = spu->ctx;
|
||||
set_bit(SPU_SCHED_NOTIFY_ACTIVE,
|
||||
&ctx->sched_flags);
|
||||
mb();
|
||||
wake_up_all(&ctx->stop_wq);
|
||||
}
|
||||
}
|
||||
mutex_unlock(&cbe_spu_info[node].list_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
int spu_switch_event_register(struct notifier_block * n)
|
||||
{
|
||||
return blocking_notifier_chain_register(&spu_switch_notifier, n);
|
||||
int ret;
|
||||
ret = blocking_notifier_chain_register(&spu_switch_notifier, n);
|
||||
if (!ret)
|
||||
notify_spus_active();
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(spu_switch_event_register);
|
||||
|
||||
int spu_switch_event_unregister(struct notifier_block * n)
|
||||
{
|
||||
return blocking_notifier_chain_unregister(&spu_switch_notifier, n);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(spu_switch_event_unregister);
|
||||
|
||||
/**
|
||||
* spu_bind_context - bind spu context to physical spu
|
||||
|
@ -229,6 +226,12 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
|
|||
{
|
||||
pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid,
|
||||
spu->number, spu->node);
|
||||
spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
|
||||
|
||||
if (ctx->flags & SPU_CREATE_NOSCHED)
|
||||
atomic_inc(&cbe_spu_info[spu->node].reserved_spus);
|
||||
if (!list_empty(&ctx->aff_list))
|
||||
atomic_inc(&ctx->gang->aff_sched_count);
|
||||
|
||||
ctx->stats.slb_flt_base = spu->stats.slb_flt;
|
||||
ctx->stats.class2_intr_base = spu->stats.class2_intr;
|
||||
|
@ -238,6 +241,7 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
|
|||
ctx->spu = spu;
|
||||
ctx->ops = &spu_hw_ops;
|
||||
spu->pid = current->pid;
|
||||
spu->tgid = current->tgid;
|
||||
spu_associate_mm(spu, ctx->owner);
|
||||
spu->ibox_callback = spufs_ibox_callback;
|
||||
spu->wbox_callback = spufs_wbox_callback;
|
||||
|
@ -251,7 +255,153 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
|
|||
spu_cpu_affinity_set(spu, raw_smp_processor_id());
|
||||
spu_switch_notify(spu, ctx);
|
||||
ctx->state = SPU_STATE_RUNNABLE;
|
||||
spu_switch_state(spu, SPU_UTIL_SYSTEM);
|
||||
|
||||
spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
|
||||
}
|
||||
|
||||
/*
|
||||
* Must be used with the list_mutex held.
|
||||
*/
|
||||
static inline int sched_spu(struct spu *spu)
|
||||
{
|
||||
BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex));
|
||||
|
||||
return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED));
|
||||
}
|
||||
|
||||
static void aff_merge_remaining_ctxs(struct spu_gang *gang)
|
||||
{
|
||||
struct spu_context *ctx;
|
||||
|
||||
list_for_each_entry(ctx, &gang->aff_list_head, aff_list) {
|
||||
if (list_empty(&ctx->aff_list))
|
||||
list_add(&ctx->aff_list, &gang->aff_list_head);
|
||||
}
|
||||
gang->aff_flags |= AFF_MERGED;
|
||||
}
|
||||
|
||||
static void aff_set_offsets(struct spu_gang *gang)
|
||||
{
|
||||
struct spu_context *ctx;
|
||||
int offset;
|
||||
|
||||
offset = -1;
|
||||
list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
|
||||
aff_list) {
|
||||
if (&ctx->aff_list == &gang->aff_list_head)
|
||||
break;
|
||||
ctx->aff_offset = offset--;
|
||||
}
|
||||
|
||||
offset = 0;
|
||||
list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) {
|
||||
if (&ctx->aff_list == &gang->aff_list_head)
|
||||
break;
|
||||
ctx->aff_offset = offset++;
|
||||
}
|
||||
|
||||
gang->aff_flags |= AFF_OFFSETS_SET;
|
||||
}
|
||||
|
||||
static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff,
|
||||
int group_size, int lowest_offset)
|
||||
{
|
||||
struct spu *spu;
|
||||
int node, n;
|
||||
|
||||
/*
|
||||
* TODO: A better algorithm could be used to find a good spu to be
|
||||
* used as reference location for the ctxs chain.
|
||||
*/
|
||||
node = cpu_to_node(raw_smp_processor_id());
|
||||
for (n = 0; n < MAX_NUMNODES; n++, node++) {
|
||||
node = (node < MAX_NUMNODES) ? node : 0;
|
||||
if (!node_allowed(ctx, node))
|
||||
continue;
|
||||
mutex_lock(&cbe_spu_info[node].list_mutex);
|
||||
list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
|
||||
if ((!mem_aff || spu->has_mem_affinity) &&
|
||||
sched_spu(spu)) {
|
||||
mutex_unlock(&cbe_spu_info[node].list_mutex);
|
||||
return spu;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&cbe_spu_info[node].list_mutex);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void aff_set_ref_point_location(struct spu_gang *gang)
|
||||
{
|
||||
int mem_aff, gs, lowest_offset;
|
||||
struct spu_context *ctx;
|
||||
struct spu *tmp;
|
||||
|
||||
mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM;
|
||||
lowest_offset = 0;
|
||||
gs = 0;
|
||||
|
||||
list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
|
||||
gs++;
|
||||
|
||||
list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
|
||||
aff_list) {
|
||||
if (&ctx->aff_list == &gang->aff_list_head)
|
||||
break;
|
||||
lowest_offset = ctx->aff_offset;
|
||||
}
|
||||
|
||||
gang->aff_ref_spu = aff_ref_location(ctx, mem_aff, gs, lowest_offset);
|
||||
}
|
||||
|
||||
static struct spu *ctx_location(struct spu *ref, int offset, int node)
|
||||
{
|
||||
struct spu *spu;
|
||||
|
||||
spu = NULL;
|
||||
if (offset >= 0) {
|
||||
list_for_each_entry(spu, ref->aff_list.prev, aff_list) {
|
||||
BUG_ON(spu->node != node);
|
||||
if (offset == 0)
|
||||
break;
|
||||
if (sched_spu(spu))
|
||||
offset--;
|
||||
}
|
||||
} else {
|
||||
list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) {
|
||||
BUG_ON(spu->node != node);
|
||||
if (offset == 0)
|
||||
break;
|
||||
if (sched_spu(spu))
|
||||
offset++;
|
||||
}
|
||||
}
|
||||
|
||||
return spu;
|
||||
}
|
||||
|
||||
/*
|
||||
* affinity_check is called each time a context is going to be scheduled.
|
||||
* It returns the spu ptr on which the context must run.
|
||||
*/
|
||||
static int has_affinity(struct spu_context *ctx)
|
||||
{
|
||||
struct spu_gang *gang = ctx->gang;
|
||||
|
||||
if (list_empty(&ctx->aff_list))
|
||||
return 0;
|
||||
|
||||
mutex_lock(&gang->aff_mutex);
|
||||
if (!gang->aff_ref_spu) {
|
||||
if (!(gang->aff_flags & AFF_MERGED))
|
||||
aff_merge_remaining_ctxs(gang);
|
||||
if (!(gang->aff_flags & AFF_OFFSETS_SET))
|
||||
aff_set_offsets(gang);
|
||||
aff_set_ref_point_location(gang);
|
||||
}
|
||||
mutex_unlock(&gang->aff_mutex);
|
||||
|
||||
return gang->aff_ref_spu != NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -263,9 +413,13 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
|
|||
{
|
||||
pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__,
|
||||
spu->pid, spu->number, spu->node);
|
||||
spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
|
||||
|
||||
spu_switch_state(spu, SPU_UTIL_IDLE);
|
||||
|
||||
if (spu->ctx->flags & SPU_CREATE_NOSCHED)
|
||||
atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
|
||||
if (!list_empty(&ctx->aff_list))
|
||||
if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
|
||||
ctx->gang->aff_ref_spu = NULL;
|
||||
spu_switch_notify(spu, NULL);
|
||||
spu_unmap_mappings(ctx);
|
||||
spu_save(&ctx->csa, spu);
|
||||
|
@ -278,8 +432,8 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
|
|||
spu->dma_callback = NULL;
|
||||
spu_associate_mm(spu, NULL);
|
||||
spu->pid = 0;
|
||||
spu->tgid = 0;
|
||||
ctx->ops = &spu_backing_ops;
|
||||
ctx->spu = NULL;
|
||||
spu->flags = 0;
|
||||
spu->ctx = NULL;
|
||||
|
||||
|
@ -287,6 +441,10 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
|
|||
(spu->stats.slb_flt - ctx->stats.slb_flt_base);
|
||||
ctx->stats.class2_intr +=
|
||||
(spu->stats.class2_intr - ctx->stats.class2_intr_base);
|
||||
|
||||
/* This maps the underlying spu state to idle */
|
||||
spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
|
||||
ctx->spu = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -352,18 +510,41 @@ static void spu_prio_wait(struct spu_context *ctx)
|
|||
|
||||
static struct spu *spu_get_idle(struct spu_context *ctx)
|
||||
{
|
||||
struct spu *spu = NULL;
|
||||
int node = cpu_to_node(raw_smp_processor_id());
|
||||
int n;
|
||||
struct spu *spu;
|
||||
int node, n;
|
||||
|
||||
if (has_affinity(ctx)) {
|
||||
node = ctx->gang->aff_ref_spu->node;
|
||||
|
||||
mutex_lock(&cbe_spu_info[node].list_mutex);
|
||||
spu = ctx_location(ctx->gang->aff_ref_spu, ctx->aff_offset, node);
|
||||
if (spu && spu->alloc_state == SPU_FREE)
|
||||
goto found;
|
||||
mutex_unlock(&cbe_spu_info[node].list_mutex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
node = cpu_to_node(raw_smp_processor_id());
|
||||
for (n = 0; n < MAX_NUMNODES; n++, node++) {
|
||||
node = (node < MAX_NUMNODES) ? node : 0;
|
||||
if (!node_allowed(ctx, node))
|
||||
continue;
|
||||
spu = spu_alloc_node(node);
|
||||
if (spu)
|
||||
break;
|
||||
|
||||
mutex_lock(&cbe_spu_info[node].list_mutex);
|
||||
list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
|
||||
if (spu->alloc_state == SPU_FREE)
|
||||
goto found;
|
||||
}
|
||||
mutex_unlock(&cbe_spu_info[node].list_mutex);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
||||
found:
|
||||
spu->alloc_state = SPU_USED;
|
||||
mutex_unlock(&cbe_spu_info[node].list_mutex);
|
||||
pr_debug("Got SPU %d %d\n", spu->number, spu->node);
|
||||
spu_init_channels(spu);
|
||||
return spu;
|
||||
}
|
||||
|
||||
|
@ -393,15 +574,15 @@ static struct spu *find_victim(struct spu_context *ctx)
|
|||
if (!node_allowed(ctx, node))
|
||||
continue;
|
||||
|
||||
mutex_lock(&spu_prio->active_mutex[node]);
|
||||
list_for_each_entry(spu, &spu_prio->active_list[node], list) {
|
||||
mutex_lock(&cbe_spu_info[node].list_mutex);
|
||||
list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
|
||||
struct spu_context *tmp = spu->ctx;
|
||||
|
||||
if (tmp->prio > ctx->prio &&
|
||||
(!victim || tmp->prio > victim->prio))
|
||||
victim = spu->ctx;
|
||||
}
|
||||
mutex_unlock(&spu_prio->active_mutex[node]);
|
||||
mutex_unlock(&cbe_spu_info[node].list_mutex);
|
||||
|
||||
if (victim) {
|
||||
/*
|
||||
|
@ -426,7 +607,11 @@ static struct spu *find_victim(struct spu_context *ctx)
|
|||
victim = NULL;
|
||||
goto restart;
|
||||
}
|
||||
spu_remove_from_active_list(spu);
|
||||
|
||||
mutex_lock(&cbe_spu_info[node].list_mutex);
|
||||
cbe_spu_info[node].nr_active--;
|
||||
mutex_unlock(&cbe_spu_info[node].list_mutex);
|
||||
|
||||
spu_unbind_context(spu, victim);
|
||||
victim->stats.invol_ctx_switch++;
|
||||
spu->stats.invol_ctx_switch++;
|
||||
|
@ -455,8 +640,6 @@ static struct spu *find_victim(struct spu_context *ctx)
|
|||
*/
|
||||
int spu_activate(struct spu_context *ctx, unsigned long flags)
|
||||
{
|
||||
spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM);
|
||||
|
||||
do {
|
||||
struct spu *spu;
|
||||
|
||||
|
@ -477,8 +660,12 @@ int spu_activate(struct spu_context *ctx, unsigned long flags)
|
|||
if (!spu && rt_prio(ctx->prio))
|
||||
spu = find_victim(ctx);
|
||||
if (spu) {
|
||||
int node = spu->node;
|
||||
|
||||
mutex_lock(&cbe_spu_info[node].list_mutex);
|
||||
spu_bind_context(spu, ctx);
|
||||
spu_add_to_active_list(spu);
|
||||
cbe_spu_info[node].nr_active++;
|
||||
mutex_unlock(&cbe_spu_info[node].list_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -500,7 +687,7 @@ static struct spu_context *grab_runnable_context(int prio, int node)
|
|||
int best;
|
||||
|
||||
spin_lock(&spu_prio->runq_lock);
|
||||
best = sched_find_first_bit(spu_prio->bitmap);
|
||||
best = find_first_bit(spu_prio->bitmap, prio);
|
||||
while (best < prio) {
|
||||
struct list_head *rq = &spu_prio->runq[best];
|
||||
|
||||
|
@ -527,11 +714,17 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
|
|||
if (spu) {
|
||||
new = grab_runnable_context(max_prio, spu->node);
|
||||
if (new || force) {
|
||||
spu_remove_from_active_list(spu);
|
||||
int node = spu->node;
|
||||
|
||||
mutex_lock(&cbe_spu_info[node].list_mutex);
|
||||
spu_unbind_context(spu, ctx);
|
||||
spu->alloc_state = SPU_FREE;
|
||||
cbe_spu_info[node].nr_active--;
|
||||
mutex_unlock(&cbe_spu_info[node].list_mutex);
|
||||
|
||||
ctx->stats.vol_ctx_switch++;
|
||||
spu->stats.vol_ctx_switch++;
|
||||
spu_free(spu);
|
||||
|
||||
if (new)
|
||||
wake_up(&new->stop_wq);
|
||||
}
|
||||
|
@ -550,21 +743,11 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
|
|||
*/
|
||||
void spu_deactivate(struct spu_context *ctx)
|
||||
{
|
||||
/*
|
||||
* We must never reach this for a nosched context,
|
||||
* but handle the case gracefull instead of panicing.
|
||||
*/
|
||||
if (ctx->flags & SPU_CREATE_NOSCHED) {
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
|
||||
__spu_deactivate(ctx, 1, MAX_PRIO);
|
||||
spuctx_switch_state(ctx, SPUCTX_UTIL_USER);
|
||||
}
|
||||
|
||||
/**
|
||||
* spu_yield - yield a physical spu if others are waiting
|
||||
* spu_yield - yield a physical spu if others are waiting
|
||||
* @ctx: spu context to yield
|
||||
*
|
||||
* Check if there is a higher priority context waiting and if yes
|
||||
|
@ -575,17 +758,12 @@ void spu_yield(struct spu_context *ctx)
|
|||
{
|
||||
if (!(ctx->flags & SPU_CREATE_NOSCHED)) {
|
||||
mutex_lock(&ctx->state_mutex);
|
||||
if (__spu_deactivate(ctx, 0, MAX_PRIO))
|
||||
spuctx_switch_state(ctx, SPUCTX_UTIL_USER);
|
||||
else {
|
||||
spuctx_switch_state(ctx, SPUCTX_UTIL_LOADED);
|
||||
spu_switch_state(ctx->spu, SPU_UTIL_USER);
|
||||
}
|
||||
__spu_deactivate(ctx, 0, MAX_PRIO);
|
||||
mutex_unlock(&ctx->state_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
static void spusched_tick(struct spu_context *ctx)
|
||||
static noinline void spusched_tick(struct spu_context *ctx)
|
||||
{
|
||||
if (ctx->flags & SPU_CREATE_NOSCHED)
|
||||
return;
|
||||
|
@ -596,7 +774,7 @@ static void spusched_tick(struct spu_context *ctx)
|
|||
return;
|
||||
|
||||
/*
|
||||
* Unfortunately active_mutex ranks outside of state_mutex, so
|
||||
* Unfortunately list_mutex ranks outside of state_mutex, so
|
||||
* we have to trylock here. If we fail give the context another
|
||||
* tick and try again.
|
||||
*/
|
||||
|
@ -606,12 +784,11 @@ static void spusched_tick(struct spu_context *ctx)
|
|||
|
||||
new = grab_runnable_context(ctx->prio + 1, spu->node);
|
||||
if (new) {
|
||||
|
||||
__spu_remove_from_active_list(spu);
|
||||
spu_unbind_context(spu, ctx);
|
||||
ctx->stats.invol_ctx_switch++;
|
||||
spu->stats.invol_ctx_switch++;
|
||||
spu_free(spu);
|
||||
spu->alloc_state = SPU_FREE;
|
||||
cbe_spu_info[spu->node].nr_active--;
|
||||
wake_up(&new->stop_wq);
|
||||
/*
|
||||
* We need to break out of the wait loop in
|
||||
|
@ -632,7 +809,7 @@ static void spusched_tick(struct spu_context *ctx)
|
|||
*
|
||||
* Return the number of tasks currently running or waiting to run.
|
||||
*
|
||||
* Note that we don't take runq_lock / active_mutex here. Reading
|
||||
* Note that we don't take runq_lock / list_mutex here. Reading
|
||||
* a single 32bit value is atomic on powerpc, and we don't care
|
||||
* about memory ordering issues here.
|
||||
*/
|
||||
|
@ -641,7 +818,7 @@ static unsigned long count_active_contexts(void)
|
|||
int nr_active = 0, node;
|
||||
|
||||
for (node = 0; node < MAX_NUMNODES; node++)
|
||||
nr_active += spu_prio->nr_active[node];
|
||||
nr_active += cbe_spu_info[node].nr_active;
|
||||
nr_active += spu_prio->nr_waiting;
|
||||
|
||||
return nr_active;
|
||||
|
@ -681,19 +858,18 @@ static void spusched_wake(unsigned long data)
|
|||
|
||||
static int spusched_thread(void *unused)
|
||||
{
|
||||
struct spu *spu, *next;
|
||||
struct spu *spu;
|
||||
int node;
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
schedule();
|
||||
for (node = 0; node < MAX_NUMNODES; node++) {
|
||||
mutex_lock(&spu_prio->active_mutex[node]);
|
||||
list_for_each_entry_safe(spu, next,
|
||||
&spu_prio->active_list[node],
|
||||
list)
|
||||
spusched_tick(spu->ctx);
|
||||
mutex_unlock(&spu_prio->active_mutex[node]);
|
||||
mutex_lock(&cbe_spu_info[node].list_mutex);
|
||||
list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
|
||||
if (spu->ctx)
|
||||
spusched_tick(spu->ctx);
|
||||
mutex_unlock(&cbe_spu_info[node].list_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -751,10 +927,9 @@ int __init spu_sched_init(void)
|
|||
INIT_LIST_HEAD(&spu_prio->runq[i]);
|
||||
__clear_bit(i, spu_prio->bitmap);
|
||||
}
|
||||
__set_bit(MAX_PRIO, spu_prio->bitmap);
|
||||
for (i = 0; i < MAX_NUMNODES; i++) {
|
||||
mutex_init(&spu_prio->active_mutex[i]);
|
||||
INIT_LIST_HEAD(&spu_prio->active_list[i]);
|
||||
mutex_init(&cbe_spu_info[i].list_mutex);
|
||||
INIT_LIST_HEAD(&cbe_spu_info[i].spus);
|
||||
}
|
||||
spin_lock_init(&spu_prio->runq_lock);
|
||||
|
||||
|
@ -783,9 +958,9 @@ int __init spu_sched_init(void)
|
|||
return err;
|
||||
}
|
||||
|
||||
void __exit spu_sched_exit(void)
|
||||
void spu_sched_exit(void)
|
||||
{
|
||||
struct spu *spu, *tmp;
|
||||
struct spu *spu;
|
||||
int node;
|
||||
|
||||
remove_proc_entry("spu_loadavg", NULL);
|
||||
|
@ -794,13 +969,11 @@ void __exit spu_sched_exit(void)
|
|||
kthread_stop(spusched_task);
|
||||
|
||||
for (node = 0; node < MAX_NUMNODES; node++) {
|
||||
mutex_lock(&spu_prio->active_mutex[node]);
|
||||
list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node],
|
||||
list) {
|
||||
list_del_init(&spu->list);
|
||||
spu_free(spu);
|
||||
}
|
||||
mutex_unlock(&spu_prio->active_mutex[node]);
|
||||
mutex_lock(&cbe_spu_info[node].list_mutex);
|
||||
list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
|
||||
if (spu->alloc_state != SPU_FREE)
|
||||
spu->alloc_state = SPU_FREE;
|
||||
mutex_unlock(&cbe_spu_info[node].list_mutex);
|
||||
}
|
||||
kfree(spu_prio);
|
||||
}
|
||||
|
|
|
@ -84,13 +84,13 @@ static inline void restore_decr(void)
|
|||
unsigned int decr_running;
|
||||
unsigned int decr;
|
||||
|
||||
/* Restore, Step 6:
|
||||
/* Restore, Step 6(moved):
|
||||
* If the LSCSA "decrementer running" flag is set
|
||||
* then write the SPU_WrDec channel with the
|
||||
* decrementer value from LSCSA.
|
||||
*/
|
||||
offset = LSCSA_QW_OFFSET(decr_status);
|
||||
decr_running = regs_spill[offset].slot[0];
|
||||
decr_running = regs_spill[offset].slot[0] & SPU_DECR_STATUS_RUNNING;
|
||||
if (decr_running) {
|
||||
offset = LSCSA_QW_OFFSET(decr);
|
||||
decr = regs_spill[offset].slot[0];
|
||||
|
@ -318,10 +318,10 @@ int main()
|
|||
build_dma_list(lscsa_ea); /* Step 3. */
|
||||
restore_upper_240kb(lscsa_ea); /* Step 4. */
|
||||
/* Step 5: done by 'exit'. */
|
||||
restore_decr(); /* Step 6. */
|
||||
enqueue_putllc(lscsa_ea); /* Step 7. */
|
||||
set_tag_update(); /* Step 8. */
|
||||
read_tag_status(); /* Step 9. */
|
||||
restore_decr(); /* moved Step 6. */
|
||||
read_llar_status(); /* Step 10. */
|
||||
write_ppu_mb(); /* Step 11. */
|
||||
write_ppuint_mb(); /* Step 12. */
|
||||
|
|
|
@ -10,7 +10,7 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = {
|
|||
0x24fd8081,
|
||||
0x1cd80081,
|
||||
0x33001180,
|
||||
0x42030003,
|
||||
0x42034003,
|
||||
0x33800284,
|
||||
0x1c010204,
|
||||
0x40200000,
|
||||
|
@ -24,22 +24,22 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = {
|
|||
0x23fffd84,
|
||||
0x1c100183,
|
||||
0x217ffa85,
|
||||
0x3080a000,
|
||||
0x3080a201,
|
||||
0x3080a402,
|
||||
0x3080a603,
|
||||
0x3080a804,
|
||||
0x3080aa05,
|
||||
0x3080ac06,
|
||||
0x3080ae07,
|
||||
0x3080b008,
|
||||
0x3080b209,
|
||||
0x3080b40a,
|
||||
0x3080b60b,
|
||||
0x3080b80c,
|
||||
0x3080ba0d,
|
||||
0x3080bc0e,
|
||||
0x3080be0f,
|
||||
0x3080b000,
|
||||
0x3080b201,
|
||||
0x3080b402,
|
||||
0x3080b603,
|
||||
0x3080b804,
|
||||
0x3080ba05,
|
||||
0x3080bc06,
|
||||
0x3080be07,
|
||||
0x3080c008,
|
||||
0x3080c209,
|
||||
0x3080c40a,
|
||||
0x3080c60b,
|
||||
0x3080c80c,
|
||||
0x3080ca0d,
|
||||
0x3080cc0e,
|
||||
0x3080ce0f,
|
||||
0x00003ffc,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
|
@ -48,19 +48,18 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = {
|
|||
0x3ec00083,
|
||||
0xb0a14103,
|
||||
0x01a00204,
|
||||
0x3ec10082,
|
||||
0x4202800e,
|
||||
0x04000703,
|
||||
0xb0a14202,
|
||||
0x21a00803,
|
||||
0x3fbf028d,
|
||||
0x3f20068d,
|
||||
0x3fbe0682,
|
||||
0x3ec10083,
|
||||
0x4202c002,
|
||||
0xb0a14203,
|
||||
0x21a00802,
|
||||
0x3fbf028a,
|
||||
0x3f20050a,
|
||||
0x3fbe0502,
|
||||
0x3fe30102,
|
||||
0x21a00882,
|
||||
0x3f82028f,
|
||||
0x3fe3078f,
|
||||
0x3fbf0784,
|
||||
0x3f82028b,
|
||||
0x3fe3058b,
|
||||
0x3fbf0584,
|
||||
0x3f200204,
|
||||
0x3fbe0204,
|
||||
0x3fe30204,
|
||||
|
@ -75,254 +74,287 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = {
|
|||
0x21a00083,
|
||||
0x40800082,
|
||||
0x21a00b02,
|
||||
0x10002818,
|
||||
0x42a00002,
|
||||
0x32800007,
|
||||
0x4207000c,
|
||||
0x18008208,
|
||||
0x40a0000b,
|
||||
0x4080020a,
|
||||
0x40800709,
|
||||
0x00200000,
|
||||
0x42070002,
|
||||
0x3ac30384,
|
||||
0x10002612,
|
||||
0x42a00003,
|
||||
0x42074006,
|
||||
0x1800c204,
|
||||
0x40a00008,
|
||||
0x40800789,
|
||||
0x1c010305,
|
||||
0x34000302,
|
||||
0x1cffc489,
|
||||
0x00200000,
|
||||
0x18008383,
|
||||
0x38830382,
|
||||
0x4cffc486,
|
||||
0x3ac28185,
|
||||
0xb0408584,
|
||||
0x28830382,
|
||||
0x1c020387,
|
||||
0x38828182,
|
||||
0xb0408405,
|
||||
0x1802c408,
|
||||
0x28828182,
|
||||
0x217ff886,
|
||||
0x04000583,
|
||||
0x21a00803,
|
||||
0x3fbe0682,
|
||||
0x3fe30102,
|
||||
0x04000106,
|
||||
0x21a00886,
|
||||
0x04000603,
|
||||
0x21a00903,
|
||||
0x40803c02,
|
||||
0x21a00982,
|
||||
0x40800003,
|
||||
0x04000184,
|
||||
0x21a00a04,
|
||||
0x3ec00303,
|
||||
0x3ec00287,
|
||||
0xb0408403,
|
||||
0x24000302,
|
||||
0x34000282,
|
||||
0x1c020306,
|
||||
0xb0408207,
|
||||
0x18020204,
|
||||
0x24000282,
|
||||
0x217ffa09,
|
||||
0x04000402,
|
||||
0x21a00802,
|
||||
0x3fbe0504,
|
||||
0x3fe30204,
|
||||
0x21a00884,
|
||||
0x42074002,
|
||||
0x21a00902,
|
||||
0x40803c03,
|
||||
0x21a00983,
|
||||
0x04000485,
|
||||
0x21a00a05,
|
||||
0x40802202,
|
||||
0x21a00a82,
|
||||
0x42028005,
|
||||
0x34208702,
|
||||
0x21002282,
|
||||
0x21a00804,
|
||||
0x21a00886,
|
||||
0x3fbf0782,
|
||||
0x21a00805,
|
||||
0x21a00884,
|
||||
0x3fbf0582,
|
||||
0x3f200102,
|
||||
0x3fbe0102,
|
||||
0x3fe30102,
|
||||
0x21a00902,
|
||||
0x40804003,
|
||||
0x21a00983,
|
||||
0x21a00a04,
|
||||
0x21a00a05,
|
||||
0x40805a02,
|
||||
0x21a00a82,
|
||||
0x40800083,
|
||||
0x21a00b83,
|
||||
0x01a00c02,
|
||||
0x01a00d83,
|
||||
0x3420c282,
|
||||
0x30809c03,
|
||||
0x34000182,
|
||||
0x14004102,
|
||||
0x21002082,
|
||||
0x01a00d82,
|
||||
0x3080a003,
|
||||
0x34000182,
|
||||
0x21a00e02,
|
||||
0x34210283,
|
||||
0x21a00f03,
|
||||
0x34200284,
|
||||
0x77400200,
|
||||
0x3421c282,
|
||||
0x3080a203,
|
||||
0x34000182,
|
||||
0x21a00f02,
|
||||
0x3080a403,
|
||||
0x34000182,
|
||||
0x77400100,
|
||||
0x3080a603,
|
||||
0x34000182,
|
||||
0x21a00702,
|
||||
0x34218283,
|
||||
0x21a00083,
|
||||
0x34214282,
|
||||
0x3080a803,
|
||||
0x34000182,
|
||||
0x21a00082,
|
||||
0x3080aa03,
|
||||
0x34000182,
|
||||
0x21a00b02,
|
||||
0x4200480c,
|
||||
0x00200000,
|
||||
0x1c010286,
|
||||
0x34220284,
|
||||
0x34220302,
|
||||
0x0f608203,
|
||||
0x5c024204,
|
||||
0x3b81810b,
|
||||
0x42013c02,
|
||||
0x00200000,
|
||||
0x18008185,
|
||||
0x38808183,
|
||||
0x3b814182,
|
||||
0x21004e84,
|
||||
0x4020007f,
|
||||
0x3080ae02,
|
||||
0x42004805,
|
||||
0x3080ac04,
|
||||
0x34000103,
|
||||
0x34000202,
|
||||
0x1cffc183,
|
||||
0x3b810106,
|
||||
0x0f608184,
|
||||
0x42013802,
|
||||
0x5c020183,
|
||||
0x38810102,
|
||||
0x3b810102,
|
||||
0x21000e83,
|
||||
0x4020007f,
|
||||
0x35000100,
|
||||
0x000004e0,
|
||||
0x000002a0,
|
||||
0x000002e8,
|
||||
0x00000428,
|
||||
0x00000470,
|
||||
0x000002f8,
|
||||
0x00000430,
|
||||
0x00000360,
|
||||
0x000002e8,
|
||||
0x000004a0,
|
||||
0x00000468,
|
||||
0x000002f8,
|
||||
0x000003c8,
|
||||
0x000004a8,
|
||||
0x00000298,
|
||||
0x00000360,
|
||||
0x409ffe02,
|
||||
0x30801203,
|
||||
0x40800204,
|
||||
0x3ec40085,
|
||||
0x10009c09,
|
||||
0x3ac10606,
|
||||
0xb060c105,
|
||||
0x4020007f,
|
||||
0x4020007f,
|
||||
0x20801203,
|
||||
0x38810602,
|
||||
0xb0408586,
|
||||
0x28810602,
|
||||
0x32004180,
|
||||
0x34204702,
|
||||
0x21a00382,
|
||||
0x4020007f,
|
||||
0x327fdc80,
|
||||
0x409ffe02,
|
||||
0x30801203,
|
||||
0x40800204,
|
||||
0x3ec40087,
|
||||
0x40800405,
|
||||
0x00200000,
|
||||
0x40800606,
|
||||
0x3ac10608,
|
||||
0x3ac14609,
|
||||
0x3ac1860a,
|
||||
0xb060c107,
|
||||
0x409ffe02,
|
||||
0x30801203,
|
||||
0x40800208,
|
||||
0x3ec40084,
|
||||
0x40800407,
|
||||
0x3ac20289,
|
||||
0xb060c104,
|
||||
0x3ac1c284,
|
||||
0x20801203,
|
||||
0x38820282,
|
||||
0x41004003,
|
||||
0x38810602,
|
||||
0x4020007f,
|
||||
0xb0408188,
|
||||
0x4020007f,
|
||||
0x28810602,
|
||||
0x41201002,
|
||||
0x38814603,
|
||||
0x10009c09,
|
||||
0xb060c109,
|
||||
0x4020007f,
|
||||
0x28814603,
|
||||
0x41193f83,
|
||||
0x38818602,
|
||||
0x60ffc003,
|
||||
0xb040818a,
|
||||
0x28818602,
|
||||
0x32003080,
|
||||
0x409ffe02,
|
||||
0x30801203,
|
||||
0x40800204,
|
||||
0x3ec40087,
|
||||
0x41201008,
|
||||
0x10009c14,
|
||||
0x40800405,
|
||||
0x3ac10609,
|
||||
0x40800606,
|
||||
0x3ac1460a,
|
||||
0xb060c107,
|
||||
0x3ac1860b,
|
||||
0x20801203,
|
||||
0x38810602,
|
||||
0xb0408409,
|
||||
0x28810602,
|
||||
0x38814603,
|
||||
0xb060c40a,
|
||||
0x4020007f,
|
||||
0x28814603,
|
||||
0x41193f83,
|
||||
0x38818602,
|
||||
0x60ffc003,
|
||||
0xb040818b,
|
||||
0x28818602,
|
||||
0x32002380,
|
||||
0x409ffe02,
|
||||
0x30801204,
|
||||
0x40800205,
|
||||
0x3ec40083,
|
||||
0x40800406,
|
||||
0x3ac14607,
|
||||
0x3ac18608,
|
||||
0xb0810103,
|
||||
0x41004002,
|
||||
0x20801204,
|
||||
0x4020007f,
|
||||
0x38814603,
|
||||
0x10009c0b,
|
||||
0xb060c107,
|
||||
0x4020007f,
|
||||
0x4020007f,
|
||||
0x28814603,
|
||||
0x38818602,
|
||||
0x4020007f,
|
||||
0x4020007f,
|
||||
0xb0408588,
|
||||
0x28818602,
|
||||
0x4020007f,
|
||||
0x32001780,
|
||||
0x409ffe02,
|
||||
0x1000640e,
|
||||
0x40800204,
|
||||
0x30801203,
|
||||
0x40800405,
|
||||
0x3ec40087,
|
||||
0x40800606,
|
||||
0x3ac10608,
|
||||
0x3ac14609,
|
||||
0x3ac1860a,
|
||||
0xb060c107,
|
||||
0x20801203,
|
||||
0x413d8003,
|
||||
0x38810602,
|
||||
0x4020007f,
|
||||
0x327fd780,
|
||||
0x409ffe02,
|
||||
0x10007f0c,
|
||||
0x40800205,
|
||||
0x30801204,
|
||||
0x40800406,
|
||||
0x3ec40083,
|
||||
0x3ac14607,
|
||||
0x3ac18608,
|
||||
0xb0810103,
|
||||
0x413d8002,
|
||||
0x20801204,
|
||||
0x38814603,
|
||||
0x4020007f,
|
||||
0x327feb80,
|
||||
0x409ffe02,
|
||||
0x30801203,
|
||||
0x40800204,
|
||||
0x3ec40087,
|
||||
0x40800405,
|
||||
0x1000650a,
|
||||
0x40800606,
|
||||
0x3ac10608,
|
||||
0x3ac14609,
|
||||
0x3ac1860a,
|
||||
0xb060c107,
|
||||
0x20801203,
|
||||
0x38810602,
|
||||
0xb0408588,
|
||||
0x4020007f,
|
||||
0x327fc980,
|
||||
0xb0408189,
|
||||
0x28820282,
|
||||
0x3881c282,
|
||||
0xb0408304,
|
||||
0x2881c282,
|
||||
0x00400000,
|
||||
0x40800003,
|
||||
0x4020007f,
|
||||
0x35000000,
|
||||
0x30809e03,
|
||||
0x34000182,
|
||||
0x21a00382,
|
||||
0x4020007f,
|
||||
0x327fde00,
|
||||
0x409ffe02,
|
||||
0x30801203,
|
||||
0x40800206,
|
||||
0x3ec40084,
|
||||
0x40800407,
|
||||
0x40800608,
|
||||
0x3ac1828a,
|
||||
0x3ac20289,
|
||||
0xb060c104,
|
||||
0x3ac1c284,
|
||||
0x20801203,
|
||||
0x38818282,
|
||||
0x41004003,
|
||||
0xb040818a,
|
||||
0x10005b0b,
|
||||
0x41201003,
|
||||
0x28818282,
|
||||
0x3881c282,
|
||||
0xb0408184,
|
||||
0x41193f83,
|
||||
0x60ffc003,
|
||||
0x2881c282,
|
||||
0x38820282,
|
||||
0xb0408189,
|
||||
0x28820282,
|
||||
0x327fef80,
|
||||
0x409ffe02,
|
||||
0x30801203,
|
||||
0x40800207,
|
||||
0x3ec40086,
|
||||
0x4120100b,
|
||||
0x10005b14,
|
||||
0x40800404,
|
||||
0x3ac1c289,
|
||||
0x40800608,
|
||||
0xb060c106,
|
||||
0x3ac10286,
|
||||
0x3ac2028a,
|
||||
0x20801203,
|
||||
0x3881c282,
|
||||
0x41193f83,
|
||||
0x60ffc003,
|
||||
0xb0408589,
|
||||
0x2881c282,
|
||||
0x38810282,
|
||||
0xb0408586,
|
||||
0x28810282,
|
||||
0x38820282,
|
||||
0xb040818a,
|
||||
0x28820282,
|
||||
0x4020007f,
|
||||
0x327fe280,
|
||||
0x409ffe02,
|
||||
0x30801203,
|
||||
0x40800207,
|
||||
0x3ec40084,
|
||||
0x40800408,
|
||||
0x10005b14,
|
||||
0x40800609,
|
||||
0x3ac1c28a,
|
||||
0x3ac2028b,
|
||||
0xb060c104,
|
||||
0x3ac24284,
|
||||
0x20801203,
|
||||
0x41201003,
|
||||
0x3881c282,
|
||||
0xb040830a,
|
||||
0x2881c282,
|
||||
0x38820282,
|
||||
0xb040818b,
|
||||
0x41193f83,
|
||||
0x60ffc003,
|
||||
0x28820282,
|
||||
0x38824282,
|
||||
0xb0408184,
|
||||
0x28824282,
|
||||
0x4020007f,
|
||||
0x327fd580,
|
||||
0x409ffe02,
|
||||
0x1000658e,
|
||||
0x40800206,
|
||||
0x30801203,
|
||||
0x40800407,
|
||||
0x3ec40084,
|
||||
0x40800608,
|
||||
0x3ac1828a,
|
||||
0x3ac20289,
|
||||
0xb060c104,
|
||||
0x3ac1c284,
|
||||
0x20801203,
|
||||
0x413d8003,
|
||||
0x38818282,
|
||||
0x4020007f,
|
||||
0x327fd800,
|
||||
0x409ffe03,
|
||||
0x30801202,
|
||||
0x40800207,
|
||||
0x3ec40084,
|
||||
0x10005b09,
|
||||
0x3ac1c288,
|
||||
0xb0408184,
|
||||
0x4020007f,
|
||||
0x4020007f,
|
||||
0x20801202,
|
||||
0x3881c282,
|
||||
0xb0408308,
|
||||
0x2881c282,
|
||||
0x327fc680,
|
||||
0x409ffe02,
|
||||
0x1000588b,
|
||||
0x40800208,
|
||||
0x30801203,
|
||||
0x40800407,
|
||||
0x3ec40084,
|
||||
0x3ac20289,
|
||||
0xb060c104,
|
||||
0x3ac1c284,
|
||||
0x20801203,
|
||||
0x413d8003,
|
||||
0x38820282,
|
||||
0x327fbd80,
|
||||
0x00200000,
|
||||
0x00000da0,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000d90,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000db0,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000dc0,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000d80,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000df0,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000de0,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000dd0,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000e04,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000e00,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
|
|
|
@ -40,17 +40,13 @@ enum {
|
|||
struct spu_context_ops;
|
||||
struct spu_gang;
|
||||
|
||||
/*
|
||||
* This is the state for spu utilization reporting to userspace.
|
||||
* Because this state is visible to userspace it must never change and needs
|
||||
* to be kept strictly separate from any internal state kept by the kernel.
|
||||
*/
|
||||
enum spuctx_execution_state {
|
||||
SPUCTX_UTIL_USER = 0,
|
||||
SPUCTX_UTIL_SYSTEM,
|
||||
SPUCTX_UTIL_IOWAIT,
|
||||
SPUCTX_UTIL_LOADED,
|
||||
SPUCTX_UTIL_MAX
|
||||
enum {
|
||||
SPU_SCHED_WAS_ACTIVE, /* was active upon spu_acquire_saved() */
|
||||
};
|
||||
|
||||
/* ctx->sched_flags */
|
||||
enum {
|
||||
SPU_SCHED_NOTIFY_ACTIVE,
|
||||
};
|
||||
|
||||
struct spu_context {
|
||||
|
@ -89,6 +85,8 @@ struct spu_context {
|
|||
|
||||
struct list_head gang_list;
|
||||
struct spu_gang *gang;
|
||||
struct kref *prof_priv_kref;
|
||||
void ( * prof_priv_release) (struct kref *kref);
|
||||
|
||||
/* owner thread */
|
||||
pid_t tid;
|
||||
|
@ -104,9 +102,9 @@ struct spu_context {
|
|||
/* statistics */
|
||||
struct {
|
||||
/* updates protected by ctx->state_mutex */
|
||||
enum spuctx_execution_state execution_state;
|
||||
unsigned long tstamp; /* time of last ctx switch */
|
||||
unsigned long times[SPUCTX_UTIL_MAX];
|
||||
enum spu_utilization_state util_state;
|
||||
unsigned long long tstamp; /* time of last state switch */
|
||||
unsigned long long times[SPU_UTIL_MAX];
|
||||
unsigned long long vol_ctx_switch;
|
||||
unsigned long long invol_ctx_switch;
|
||||
unsigned long long min_flt;
|
||||
|
@ -118,6 +116,10 @@ struct spu_context {
|
|||
unsigned long long class2_intr_base; /* # at last ctx switch */
|
||||
unsigned long long libassist;
|
||||
} stats;
|
||||
|
||||
struct list_head aff_list;
|
||||
int aff_head;
|
||||
int aff_offset;
|
||||
};
|
||||
|
||||
struct spu_gang {
|
||||
|
@ -125,8 +127,19 @@ struct spu_gang {
|
|||
struct mutex mutex;
|
||||
struct kref kref;
|
||||
int contexts;
|
||||
|
||||
struct spu_context *aff_ref_ctx;
|
||||
struct list_head aff_list_head;
|
||||
struct mutex aff_mutex;
|
||||
int aff_flags;
|
||||
struct spu *aff_ref_spu;
|
||||
atomic_t aff_sched_count;
|
||||
};
|
||||
|
||||
/* Flag bits for spu_gang aff_flags */
|
||||
#define AFF_OFFSETS_SET 1
|
||||
#define AFF_MERGED 2
|
||||
|
||||
struct mfc_dma_command {
|
||||
int32_t pad; /* reserved */
|
||||
uint32_t lsa; /* local storage address */
|
||||
|
@ -190,10 +203,9 @@ extern struct tree_descr spufs_dir_contents[];
|
|||
extern struct tree_descr spufs_dir_nosched_contents[];
|
||||
|
||||
/* system call implementation */
|
||||
long spufs_run_spu(struct file *file,
|
||||
struct spu_context *ctx, u32 *npc, u32 *status);
|
||||
long spufs_create(struct nameidata *nd,
|
||||
unsigned int flags, mode_t mode);
|
||||
long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status);
|
||||
long spufs_create(struct nameidata *nd, unsigned int flags,
|
||||
mode_t mode, struct file *filp);
|
||||
extern const struct file_operations spufs_context_fops;
|
||||
|
||||
/* gang management */
|
||||
|
@ -206,6 +218,9 @@ void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx);
|
|||
/* fault handling */
|
||||
int spufs_handle_class1(struct spu_context *ctx);
|
||||
|
||||
/* affinity */
|
||||
struct spu *affinity_check(struct spu_context *ctx);
|
||||
|
||||
/* context management */
|
||||
extern atomic_t nr_spu_contexts;
|
||||
static inline void spu_acquire(struct spu_context *ctx)
|
||||
|
@ -227,15 +242,17 @@ void spu_unmap_mappings(struct spu_context *ctx);
|
|||
void spu_forget(struct spu_context *ctx);
|
||||
int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags);
|
||||
void spu_acquire_saved(struct spu_context *ctx);
|
||||
void spu_release_saved(struct spu_context *ctx);
|
||||
|
||||
int spu_activate(struct spu_context *ctx, unsigned long flags);
|
||||
void spu_deactivate(struct spu_context *ctx);
|
||||
void spu_yield(struct spu_context *ctx);
|
||||
void spu_switch_notify(struct spu *spu, struct spu_context *ctx);
|
||||
void spu_set_timeslice(struct spu_context *ctx);
|
||||
void spu_update_sched_info(struct spu_context *ctx);
|
||||
void __spu_update_sched_info(struct spu_context *ctx);
|
||||
int __init spu_sched_init(void);
|
||||
void __exit spu_sched_exit(void);
|
||||
void spu_sched_exit(void);
|
||||
|
||||
extern char *isolated_loader;
|
||||
|
||||
|
@ -293,30 +310,34 @@ extern int spufs_coredump_num_notes;
|
|||
* line.
|
||||
*/
|
||||
static inline void spuctx_switch_state(struct spu_context *ctx,
|
||||
enum spuctx_execution_state new_state)
|
||||
enum spu_utilization_state new_state)
|
||||
{
|
||||
unsigned long long curtime;
|
||||
signed long long delta;
|
||||
struct timespec ts;
|
||||
struct spu *spu;
|
||||
enum spu_utilization_state old_state;
|
||||
|
||||
ktime_get_ts(&ts);
|
||||
curtime = timespec_to_ns(&ts);
|
||||
delta = curtime - ctx->stats.tstamp;
|
||||
|
||||
WARN_ON(!mutex_is_locked(&ctx->state_mutex));
|
||||
WARN_ON(delta < 0);
|
||||
|
||||
if (ctx->stats.execution_state != new_state) {
|
||||
unsigned long curtime = jiffies;
|
||||
spu = ctx->spu;
|
||||
old_state = ctx->stats.util_state;
|
||||
ctx->stats.util_state = new_state;
|
||||
ctx->stats.tstamp = curtime;
|
||||
|
||||
ctx->stats.times[ctx->stats.execution_state] +=
|
||||
curtime - ctx->stats.tstamp;
|
||||
ctx->stats.tstamp = curtime;
|
||||
ctx->stats.execution_state = new_state;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void spu_switch_state(struct spu *spu,
|
||||
enum spuctx_execution_state new_state)
|
||||
{
|
||||
if (spu->stats.utilization_state != new_state) {
|
||||
unsigned long curtime = jiffies;
|
||||
|
||||
spu->stats.times[spu->stats.utilization_state] +=
|
||||
curtime - spu->stats.tstamp;
|
||||
/*
|
||||
* Update the physical SPU utilization statistics.
|
||||
*/
|
||||
if (spu) {
|
||||
ctx->stats.times[old_state] += delta;
|
||||
spu->stats.times[old_state] += delta;
|
||||
spu->stats.util_state = new_state;
|
||||
spu->stats.tstamp = curtime;
|
||||
spu->stats.utilization_state = new_state;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -180,7 +180,7 @@ static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu)
|
|||
case MFC_CNTL_SUSPEND_COMPLETE:
|
||||
if (csa) {
|
||||
csa->priv2.mfc_control_RW =
|
||||
in_be64(&priv2->mfc_control_RW) |
|
||||
MFC_CNTL_SUSPEND_MASK |
|
||||
MFC_CNTL_SUSPEND_DMA_QUEUE;
|
||||
}
|
||||
break;
|
||||
|
@ -190,9 +190,7 @@ static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu)
|
|||
MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
|
||||
MFC_CNTL_SUSPEND_COMPLETE);
|
||||
if (csa) {
|
||||
csa->priv2.mfc_control_RW =
|
||||
in_be64(&priv2->mfc_control_RW) &
|
||||
~MFC_CNTL_SUSPEND_DMA_QUEUE;
|
||||
csa->priv2.mfc_control_RW = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -251,16 +249,8 @@ static inline void save_mfc_decr(struct spu_state *csa, struct spu *spu)
|
|||
* Read MFC_CNTL[Ds]. Update saved copy of
|
||||
* CSA.MFC_CNTL[Ds].
|
||||
*/
|
||||
if (in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DECREMENTER_RUNNING) {
|
||||
csa->priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING;
|
||||
csa->suspend_time = get_cycles();
|
||||
out_be64(&priv2->spu_chnlcntptr_RW, 7ULL);
|
||||
eieio();
|
||||
csa->spu_chnldata_RW[7] = in_be64(&priv2->spu_chnldata_RW);
|
||||
eieio();
|
||||
} else {
|
||||
csa->priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING;
|
||||
}
|
||||
csa->priv2.mfc_control_RW |=
|
||||
in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DECREMENTER_RUNNING;
|
||||
}
|
||||
|
||||
static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu)
|
||||
|
@ -271,7 +261,8 @@ static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu)
|
|||
* Write MFC_CNTL[Dh] set to a '1' to halt
|
||||
* the decrementer.
|
||||
*/
|
||||
out_be64(&priv2->mfc_control_RW, MFC_CNTL_DECREMENTER_HALTED);
|
||||
out_be64(&priv2->mfc_control_RW,
|
||||
MFC_CNTL_DECREMENTER_HALTED | MFC_CNTL_SUSPEND_MASK);
|
||||
eieio();
|
||||
}
|
||||
|
||||
|
@ -615,7 +606,7 @@ static inline void save_ppuint_mb(struct spu_state *csa, struct spu *spu)
|
|||
static inline void save_ch_part1(struct spu_state *csa, struct spu *spu)
|
||||
{
|
||||
struct spu_priv2 __iomem *priv2 = spu->priv2;
|
||||
u64 idx, ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
|
||||
u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
|
||||
int i;
|
||||
|
||||
/* Save, Step 42:
|
||||
|
@ -626,7 +617,7 @@ static inline void save_ch_part1(struct spu_state *csa, struct spu *spu)
|
|||
csa->spu_chnldata_RW[1] = in_be64(&priv2->spu_chnldata_RW);
|
||||
|
||||
/* Save the following CH: [0,3,4,24,25,27] */
|
||||
for (i = 0; i < 7; i++) {
|
||||
for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
|
||||
idx = ch_indices[i];
|
||||
out_be64(&priv2->spu_chnlcntptr_RW, idx);
|
||||
eieio();
|
||||
|
@ -983,13 +974,13 @@ static inline void terminate_spu_app(struct spu_state *csa, struct spu *spu)
|
|||
*/
|
||||
}
|
||||
|
||||
static inline void suspend_mfc(struct spu_state *csa, struct spu *spu)
|
||||
static inline void suspend_mfc_and_halt_decr(struct spu_state *csa,
|
||||
struct spu *spu)
|
||||
{
|
||||
struct spu_priv2 __iomem *priv2 = spu->priv2;
|
||||
|
||||
/* Restore, Step 7:
|
||||
* Restore, Step 47.
|
||||
* Write MFC_Cntl[Dh,Sc]='1','1' to suspend
|
||||
* Write MFC_Cntl[Dh,Sc,Sm]='1','1','0' to suspend
|
||||
* the queue and halt the decrementer.
|
||||
*/
|
||||
out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE |
|
||||
|
@ -1090,7 +1081,7 @@ static inline void clear_spu_status(struct spu_state *csa, struct spu *spu)
|
|||
static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu)
|
||||
{
|
||||
struct spu_priv2 __iomem *priv2 = spu->priv2;
|
||||
u64 ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
|
||||
u64 ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
|
||||
u64 idx;
|
||||
int i;
|
||||
|
||||
|
@ -1102,7 +1093,7 @@ static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu)
|
|||
out_be64(&priv2->spu_chnldata_RW, 0UL);
|
||||
|
||||
/* Reset the following CH: [0,3,4,24,25,27] */
|
||||
for (i = 0; i < 7; i++) {
|
||||
for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
|
||||
idx = ch_indices[i];
|
||||
out_be64(&priv2->spu_chnlcntptr_RW, idx);
|
||||
eieio();
|
||||
|
@ -1289,7 +1280,15 @@ static inline void setup_decr(struct spu_state *csa, struct spu *spu)
|
|||
cycles_t resume_time = get_cycles();
|
||||
cycles_t delta_time = resume_time - csa->suspend_time;
|
||||
|
||||
csa->lscsa->decr_status.slot[0] = SPU_DECR_STATUS_RUNNING;
|
||||
if (csa->lscsa->decr.slot[0] < delta_time) {
|
||||
csa->lscsa->decr_status.slot[0] |=
|
||||
SPU_DECR_STATUS_WRAPPED;
|
||||
}
|
||||
|
||||
csa->lscsa->decr.slot[0] -= delta_time;
|
||||
} else {
|
||||
csa->lscsa->decr_status.slot[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1398,6 +1397,18 @@ static inline void restore_ls_16kb(struct spu_state *csa, struct spu *spu)
|
|||
send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
|
||||
}
|
||||
|
||||
static inline void suspend_mfc(struct spu_state *csa, struct spu *spu)
|
||||
{
|
||||
struct spu_priv2 __iomem *priv2 = spu->priv2;
|
||||
|
||||
/* Restore, Step 47.
|
||||
* Write MFC_Cntl[Sc,Sm]='1','0' to suspend
|
||||
* the queue.
|
||||
*/
|
||||
out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE);
|
||||
eieio();
|
||||
}
|
||||
|
||||
static inline void clear_interrupts(struct spu_state *csa, struct spu *spu)
|
||||
{
|
||||
/* Restore, Step 49:
|
||||
|
@ -1548,10 +1559,10 @@ static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu)
|
|||
* "wrapped" flag is set, OR in a '1' to
|
||||
* CSA.SPU_Event_Status[Tm].
|
||||
*/
|
||||
if (csa->lscsa->decr_status.slot[0] == 1) {
|
||||
if (csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED) {
|
||||
csa->spu_chnldata_RW[0] |= 0x20;
|
||||
}
|
||||
if ((csa->lscsa->decr_status.slot[0] == 1) &&
|
||||
if ((csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED) &&
|
||||
(csa->spu_chnlcnt_RW[0] == 0 &&
|
||||
((csa->spu_chnldata_RW[2] & 0x20) == 0x0) &&
|
||||
((csa->spu_chnldata_RW[0] & 0x20) != 0x1))) {
|
||||
|
@ -1562,18 +1573,13 @@ static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu)
|
|||
static inline void restore_ch_part1(struct spu_state *csa, struct spu *spu)
|
||||
{
|
||||
struct spu_priv2 __iomem *priv2 = spu->priv2;
|
||||
u64 idx, ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
|
||||
u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
|
||||
int i;
|
||||
|
||||
/* Restore, Step 59:
|
||||
* Restore the following CH: [0,3,4,24,25,27]
|
||||
*/
|
||||
|
||||
/* Restore CH 1 without count */
|
||||
out_be64(&priv2->spu_chnlcntptr_RW, 1);
|
||||
out_be64(&priv2->spu_chnldata_RW, csa->spu_chnldata_RW[1]);
|
||||
|
||||
/* Restore the following CH: [0,3,4,24,25,27] */
|
||||
for (i = 0; i < 7; i++) {
|
||||
for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
|
||||
idx = ch_indices[i];
|
||||
out_be64(&priv2->spu_chnlcntptr_RW, idx);
|
||||
eieio();
|
||||
|
@ -1932,7 +1938,7 @@ static void harvest(struct spu_state *prev, struct spu *spu)
|
|||
set_switch_pending(prev, spu); /* Step 5. */
|
||||
stop_spu_isolate(spu); /* NEW. */
|
||||
remove_other_spu_access(prev, spu); /* Step 6. */
|
||||
suspend_mfc(prev, spu); /* Step 7. */
|
||||
suspend_mfc_and_halt_decr(prev, spu); /* Step 7. */
|
||||
wait_suspend_mfc_complete(prev, spu); /* Step 8. */
|
||||
if (!suspend_spe(prev, spu)) /* Step 9. */
|
||||
clear_spu_status(prev, spu); /* Step 10. */
|
||||
|
|
|
@ -47,7 +47,7 @@ static long do_spu_run(struct file *filp,
|
|||
goto out;
|
||||
|
||||
i = SPUFS_I(filp->f_path.dentry->d_inode);
|
||||
ret = spufs_run_spu(filp, i->i_ctx, &npc, &status);
|
||||
ret = spufs_run_spu(i->i_ctx, &npc, &status);
|
||||
|
||||
if (put_user(npc, unpc))
|
||||
ret = -EFAULT;
|
||||
|
@ -76,8 +76,8 @@ asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus)
|
|||
}
|
||||
#endif
|
||||
|
||||
asmlinkage long sys_spu_create(const char __user *pathname,
|
||||
unsigned int flags, mode_t mode)
|
||||
asmlinkage long do_spu_create(const char __user *pathname, unsigned int flags,
|
||||
mode_t mode, struct file *neighbor)
|
||||
{
|
||||
char *tmp;
|
||||
int ret;
|
||||
|
@ -90,7 +90,7 @@ asmlinkage long sys_spu_create(const char __user *pathname,
|
|||
ret = path_lookup(tmp, LOOKUP_PARENT|
|
||||
LOOKUP_OPEN|LOOKUP_CREATE, &nd);
|
||||
if (!ret) {
|
||||
ret = spufs_create(&nd, flags, mode);
|
||||
ret = spufs_create(&nd, flags, mode, neighbor);
|
||||
path_release(&nd);
|
||||
}
|
||||
putname(tmp);
|
||||
|
@ -99,8 +99,32 @@ asmlinkage long sys_spu_create(const char __user *pathname,
|
|||
return ret;
|
||||
}
|
||||
|
||||
#ifndef MODULE
|
||||
asmlinkage long sys_spu_create(const char __user *pathname, unsigned int flags,
|
||||
mode_t mode, int neighbor_fd)
|
||||
{
|
||||
int fput_needed;
|
||||
struct file *neighbor;
|
||||
long ret;
|
||||
|
||||
if (flags & SPU_CREATE_AFFINITY_SPU) {
|
||||
ret = -EBADF;
|
||||
neighbor = fget_light(neighbor_fd, &fput_needed);
|
||||
if (neighbor) {
|
||||
ret = do_spu_create(pathname, flags, mode, neighbor);
|
||||
fput_light(neighbor, fput_needed);
|
||||
}
|
||||
}
|
||||
else {
|
||||
ret = do_spu_create(pathname, flags, mode, NULL);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
struct spufs_calls spufs_calls = {
|
||||
.create_thread = sys_spu_create,
|
||||
.create_thread = do_spu_create,
|
||||
.spu_run = do_spu_run,
|
||||
.owner = THIS_MODULE,
|
||||
};
|
||||
|
|
|
@ -17,6 +17,7 @@ obj-$(CONFIG_QUICC_ENGINE) += qe_lib/
|
|||
mv64x60-$(CONFIG_PCI) += mv64x60_pci.o
|
||||
obj-$(CONFIG_MV64X60) += $(mv64x60-y) mv64x60_pic.o mv64x60_dev.o
|
||||
obj-$(CONFIG_RTC_DRV_CMOS) += rtc_cmos_setup.o
|
||||
obj-$(CONFIG_AXON_RAM) += axonram.o
|
||||
|
||||
# contains only the suspend handler for time
|
||||
ifeq ($(CONFIG_RTC_CLASS),)
|
||||
|
|
|
@ -0,0 +1,381 @@
|
|||
/*
|
||||
* (C) Copyright IBM Deutschland Entwicklung GmbH 2006
|
||||
*
|
||||
* Author: Maxim Shchetynin <maxim@de.ibm.com>
|
||||
*
|
||||
* Axon DDR2 device driver.
|
||||
* It registers one block device per Axon's DDR2 memory bank found on a system.
|
||||
* Block devices are called axonram?, their major and minor numbers are
|
||||
* available in /proc/devices, /proc/partitions or in /sys/block/axonram?/dev.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*/
|
||||
|
||||
#include <linux/bio.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/genhd.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/ioport.h>
|
||||
#include <linux/irq.h>
|
||||
#include <linux/irqreturn.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mod_devicetable.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
#include <asm/of_device.h>
|
||||
#include <asm/of_platform.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/prom.h>
|
||||
|
||||
#define AXON_RAM_MODULE_NAME "axonram"
|
||||
#define AXON_RAM_DEVICE_NAME "axonram"
|
||||
#define AXON_RAM_MINORS_PER_DISK 16
|
||||
#define AXON_RAM_BLOCK_SHIFT PAGE_SHIFT
|
||||
#define AXON_RAM_BLOCK_SIZE 1 << AXON_RAM_BLOCK_SHIFT
|
||||
#define AXON_RAM_SECTOR_SHIFT 9
|
||||
#define AXON_RAM_SECTOR_SIZE 1 << AXON_RAM_SECTOR_SHIFT
|
||||
#define AXON_RAM_IRQ_FLAGS IRQF_SHARED | IRQF_TRIGGER_RISING
|
||||
|
||||
struct axon_ram_bank {
|
||||
struct of_device *device;
|
||||
struct gendisk *disk;
|
||||
unsigned int irq_correctable;
|
||||
unsigned int irq_uncorrectable;
|
||||
unsigned long ph_addr;
|
||||
unsigned long io_addr;
|
||||
unsigned long size;
|
||||
unsigned long ecc_counter;
|
||||
};
|
||||
|
||||
static ssize_t
|
||||
axon_ram_sysfs_ecc(struct device *dev, struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct of_device *device = to_of_device(dev);
|
||||
struct axon_ram_bank *bank = device->dev.platform_data;
|
||||
|
||||
BUG_ON(!bank);
|
||||
|
||||
return sprintf(buf, "%ld\n", bank->ecc_counter);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(ecc, S_IRUGO, axon_ram_sysfs_ecc, NULL);
|
||||
|
||||
/**
|
||||
* axon_ram_irq_handler - interrupt handler for Axon RAM ECC
|
||||
* @irq: interrupt ID
|
||||
* @dev: pointer to of_device
|
||||
*/
|
||||
static irqreturn_t
|
||||
axon_ram_irq_handler(int irq, void *dev)
|
||||
{
|
||||
struct of_device *device = dev;
|
||||
struct axon_ram_bank *bank = device->dev.platform_data;
|
||||
|
||||
BUG_ON(!bank);
|
||||
|
||||
if (irq == bank->irq_correctable) {
|
||||
dev_err(&device->dev, "Correctable memory error occured\n");
|
||||
bank->ecc_counter++;
|
||||
return IRQ_HANDLED;
|
||||
} else if (irq == bank->irq_uncorrectable) {
|
||||
dev_err(&device->dev, "Uncorrectable memory error occured\n");
|
||||
panic("Critical ECC error on %s", device->node->full_name);
|
||||
}
|
||||
|
||||
return IRQ_NONE;
|
||||
}
|
||||
|
||||
/**
|
||||
* axon_ram_make_request - make_request() method for block device
|
||||
* @queue, @bio: see blk_queue_make_request()
|
||||
*/
|
||||
static int
|
||||
axon_ram_make_request(struct request_queue *queue, struct bio *bio)
|
||||
{
|
||||
struct axon_ram_bank *bank = bio->bi_bdev->bd_disk->private_data;
|
||||
unsigned long phys_mem, phys_end;
|
||||
void *user_mem;
|
||||
struct bio_vec *vec;
|
||||
unsigned int transfered;
|
||||
unsigned short idx;
|
||||
int rc = 0;
|
||||
|
||||
phys_mem = bank->io_addr + (bio->bi_sector << AXON_RAM_SECTOR_SHIFT);
|
||||
phys_end = bank->io_addr + bank->size;
|
||||
transfered = 0;
|
||||
bio_for_each_segment(vec, bio, idx) {
|
||||
if (unlikely(phys_mem + vec->bv_len > phys_end)) {
|
||||
bio_io_error(bio, bio->bi_size);
|
||||
rc = -ERANGE;
|
||||
break;
|
||||
}
|
||||
|
||||
user_mem = page_address(vec->bv_page) + vec->bv_offset;
|
||||
if (bio_data_dir(bio) == READ)
|
||||
memcpy(user_mem, (void *) phys_mem, vec->bv_len);
|
||||
else
|
||||
memcpy((void *) phys_mem, user_mem, vec->bv_len);
|
||||
|
||||
phys_mem += vec->bv_len;
|
||||
transfered += vec->bv_len;
|
||||
}
|
||||
bio_endio(bio, transfered, 0);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* axon_ram_direct_access - direct_access() method for block device
|
||||
* @device, @sector, @data: see block_device_operations method
|
||||
*/
|
||||
static int
|
||||
axon_ram_direct_access(struct block_device *device, sector_t sector,
|
||||
unsigned long *data)
|
||||
{
|
||||
struct axon_ram_bank *bank = device->bd_disk->private_data;
|
||||
loff_t offset;
|
||||
|
||||
offset = sector << AXON_RAM_SECTOR_SHIFT;
|
||||
if (offset >= bank->size) {
|
||||
dev_err(&bank->device->dev, "Access outside of address space\n");
|
||||
return -ERANGE;
|
||||
}
|
||||
|
||||
*data = bank->ph_addr + offset;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct block_device_operations axon_ram_devops = {
|
||||
.owner = THIS_MODULE,
|
||||
.direct_access = axon_ram_direct_access
|
||||
};
|
||||
|
||||
/**
|
||||
* axon_ram_probe - probe() method for platform driver
|
||||
* @device, @device_id: see of_platform_driver method
|
||||
*/
|
||||
static int
|
||||
axon_ram_probe(struct of_device *device, const struct of_device_id *device_id)
|
||||
{
|
||||
static int axon_ram_bank_id = -1;
|
||||
struct axon_ram_bank *bank;
|
||||
struct resource resource;
|
||||
int rc = 0;
|
||||
|
||||
axon_ram_bank_id++;
|
||||
|
||||
dev_info(&device->dev, "Found memory controller on %s\n",
|
||||
device->node->full_name);
|
||||
|
||||
bank = kzalloc(sizeof(struct axon_ram_bank), GFP_KERNEL);
|
||||
if (bank == NULL) {
|
||||
dev_err(&device->dev, "Out of memory\n");
|
||||
rc = -ENOMEM;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
device->dev.platform_data = bank;
|
||||
|
||||
bank->device = device;
|
||||
|
||||
if (of_address_to_resource(device->node, 0, &resource) != 0) {
|
||||
dev_err(&device->dev, "Cannot access device tree\n");
|
||||
rc = -EFAULT;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
bank->size = resource.end - resource.start + 1;
|
||||
|
||||
if (bank->size == 0) {
|
||||
dev_err(&device->dev, "No DDR2 memory found for %s%d\n",
|
||||
AXON_RAM_DEVICE_NAME, axon_ram_bank_id);
|
||||
rc = -ENODEV;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
dev_info(&device->dev, "Register DDR2 memory device %s%d with %luMB\n",
|
||||
AXON_RAM_DEVICE_NAME, axon_ram_bank_id, bank->size >> 20);
|
||||
|
||||
bank->ph_addr = resource.start;
|
||||
bank->io_addr = (unsigned long) ioremap_flags(
|
||||
bank->ph_addr, bank->size, _PAGE_NO_CACHE);
|
||||
if (bank->io_addr == 0) {
|
||||
dev_err(&device->dev, "ioremap() failed\n");
|
||||
rc = -EFAULT;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
bank->disk = alloc_disk(AXON_RAM_MINORS_PER_DISK);
|
||||
if (bank->disk == NULL) {
|
||||
dev_err(&device->dev, "Cannot register disk\n");
|
||||
rc = -EFAULT;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
bank->disk->first_minor = 0;
|
||||
bank->disk->fops = &axon_ram_devops;
|
||||
bank->disk->private_data = bank;
|
||||
bank->disk->driverfs_dev = &device->dev;
|
||||
|
||||
sprintf(bank->disk->disk_name, "%s%d",
|
||||
AXON_RAM_DEVICE_NAME, axon_ram_bank_id);
|
||||
bank->disk->major = register_blkdev(0, bank->disk->disk_name);
|
||||
if (bank->disk->major < 0) {
|
||||
dev_err(&device->dev, "Cannot register block device\n");
|
||||
rc = -EFAULT;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
bank->disk->queue = blk_alloc_queue(GFP_KERNEL);
|
||||
if (bank->disk->queue == NULL) {
|
||||
dev_err(&device->dev, "Cannot register disk queue\n");
|
||||
rc = -EFAULT;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT);
|
||||
blk_queue_make_request(bank->disk->queue, axon_ram_make_request);
|
||||
blk_queue_hardsect_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE);
|
||||
add_disk(bank->disk);
|
||||
|
||||
bank->irq_correctable = irq_of_parse_and_map(device->node, 0);
|
||||
bank->irq_uncorrectable = irq_of_parse_and_map(device->node, 1);
|
||||
if ((bank->irq_correctable <= 0) || (bank->irq_uncorrectable <= 0)) {
|
||||
dev_err(&device->dev, "Cannot access ECC interrupt ID\n");
|
||||
rc = -EFAULT;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
rc = request_irq(bank->irq_correctable, axon_ram_irq_handler,
|
||||
AXON_RAM_IRQ_FLAGS, bank->disk->disk_name, device);
|
||||
if (rc != 0) {
|
||||
dev_err(&device->dev, "Cannot register ECC interrupt handler\n");
|
||||
bank->irq_correctable = bank->irq_uncorrectable = 0;
|
||||
rc = -EFAULT;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
rc = request_irq(bank->irq_uncorrectable, axon_ram_irq_handler,
|
||||
AXON_RAM_IRQ_FLAGS, bank->disk->disk_name, device);
|
||||
if (rc != 0) {
|
||||
dev_err(&device->dev, "Cannot register ECC interrupt handler\n");
|
||||
bank->irq_uncorrectable = 0;
|
||||
rc = -EFAULT;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
rc = device_create_file(&device->dev, &dev_attr_ecc);
|
||||
if (rc != 0) {
|
||||
dev_err(&device->dev, "Cannot create sysfs file\n");
|
||||
rc = -EFAULT;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
failed:
|
||||
if (bank != NULL) {
|
||||
if (bank->irq_uncorrectable > 0)
|
||||
free_irq(bank->irq_uncorrectable, device);
|
||||
if (bank->irq_correctable > 0)
|
||||
free_irq(bank->irq_correctable, device);
|
||||
if (bank->disk != NULL) {
|
||||
if (bank->disk->queue != NULL)
|
||||
blk_cleanup_queue(bank->disk->queue);
|
||||
if (bank->disk->major > 0)
|
||||
unregister_blkdev(bank->disk->major,
|
||||
bank->disk->disk_name);
|
||||
del_gendisk(bank->disk);
|
||||
}
|
||||
device->dev.platform_data = NULL;
|
||||
if (bank->io_addr != 0)
|
||||
iounmap((void __iomem *) bank->io_addr);
|
||||
kfree(bank);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* axon_ram_remove - remove() method for platform driver
|
||||
* @device: see of_platform_driver method
|
||||
*/
|
||||
static int
|
||||
axon_ram_remove(struct of_device *device)
|
||||
{
|
||||
struct axon_ram_bank *bank = device->dev.platform_data;
|
||||
|
||||
BUG_ON(!bank || !bank->disk);
|
||||
|
||||
device_remove_file(&device->dev, &dev_attr_ecc);
|
||||
free_irq(bank->irq_uncorrectable, device);
|
||||
free_irq(bank->irq_correctable, device);
|
||||
blk_cleanup_queue(bank->disk->queue);
|
||||
unregister_blkdev(bank->disk->major, bank->disk->disk_name);
|
||||
del_gendisk(bank->disk);
|
||||
iounmap((void __iomem *) bank->io_addr);
|
||||
kfree(bank);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct of_device_id axon_ram_device_id[] = {
|
||||
{
|
||||
.type = "dma-memory"
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static struct of_platform_driver axon_ram_driver = {
|
||||
.owner = THIS_MODULE,
|
||||
.name = AXON_RAM_MODULE_NAME,
|
||||
.match_table = axon_ram_device_id,
|
||||
.probe = axon_ram_probe,
|
||||
.remove = axon_ram_remove
|
||||
};
|
||||
|
||||
/**
|
||||
* axon_ram_init
|
||||
*/
|
||||
static int __init
|
||||
axon_ram_init(void)
|
||||
{
|
||||
return of_register_platform_driver(&axon_ram_driver);
|
||||
}
|
||||
|
||||
/**
|
||||
* axon_ram_exit
|
||||
*/
|
||||
static void __exit
|
||||
axon_ram_exit(void)
|
||||
{
|
||||
of_unregister_platform_driver(&axon_ram_driver);
|
||||
}
|
||||
|
||||
module_init(axon_ram_init);
|
||||
module_exit(axon_ram_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Maxim Shchetynin <maxim@de.ibm.com>");
|
||||
MODULE_DESCRIPTION("Axon DDR2 RAM device driver for IBM Cell BE");
|
|
@ -48,15 +48,13 @@ struct pmi_data {
|
|||
struct work_struct work;
|
||||
};
|
||||
|
||||
static struct pmi_data *data;
|
||||
|
||||
static int pmi_irq_handler(int irq, void *dev_id)
|
||||
{
|
||||
struct pmi_data *data;
|
||||
u8 type;
|
||||
int rc;
|
||||
|
||||
data = dev_id;
|
||||
|
||||
spin_lock(&data->pmi_spinlock);
|
||||
|
||||
type = ioread8(data->pmi_reg + PMI_READ_TYPE);
|
||||
|
@ -111,16 +109,13 @@ MODULE_DEVICE_TABLE(of, pmi_match);
|
|||
|
||||
static void pmi_notify_handlers(struct work_struct *work)
|
||||
{
|
||||
struct pmi_data *data;
|
||||
struct pmi_handler *handler;
|
||||
|
||||
data = container_of(work, struct pmi_data, work);
|
||||
|
||||
spin_lock(&data->handler_spinlock);
|
||||
list_for_each_entry(handler, &data->handler, node) {
|
||||
pr_debug(KERN_INFO "pmi: notifying handler %p\n", handler);
|
||||
if (handler->type == data->msg.type)
|
||||
handler->handle_pmi_message(data->dev, data->msg);
|
||||
handler->handle_pmi_message(data->msg);
|
||||
}
|
||||
spin_unlock(&data->handler_spinlock);
|
||||
}
|
||||
|
@ -129,9 +124,14 @@ static int pmi_of_probe(struct of_device *dev,
|
|||
const struct of_device_id *match)
|
||||
{
|
||||
struct device_node *np = dev->node;
|
||||
struct pmi_data *data;
|
||||
int rc;
|
||||
|
||||
if (data) {
|
||||
printk(KERN_ERR "pmi: driver has already been initialized.\n");
|
||||
rc = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
data = kzalloc(sizeof(struct pmi_data), GFP_KERNEL);
|
||||
if (!data) {
|
||||
printk(KERN_ERR "pmi: could not allocate memory.\n");
|
||||
|
@ -154,7 +154,6 @@ static int pmi_of_probe(struct of_device *dev,
|
|||
|
||||
INIT_WORK(&data->work, pmi_notify_handlers);
|
||||
|
||||
dev->dev.driver_data = data;
|
||||
data->dev = dev;
|
||||
|
||||
data->irq = irq_of_parse_and_map(np, 0);
|
||||
|
@ -164,7 +163,7 @@ static int pmi_of_probe(struct of_device *dev,
|
|||
goto error_cleanup_iomap;
|
||||
}
|
||||
|
||||
rc = request_irq(data->irq, pmi_irq_handler, 0, "pmi", data);
|
||||
rc = request_irq(data->irq, pmi_irq_handler, 0, "pmi", NULL);
|
||||
if (rc) {
|
||||
printk(KERN_ERR "pmi: can't request IRQ %d: returned %d\n",
|
||||
data->irq, rc);
|
||||
|
@ -187,12 +186,9 @@ out:
|
|||
|
||||
static int pmi_of_remove(struct of_device *dev)
|
||||
{
|
||||
struct pmi_data *data;
|
||||
struct pmi_handler *handler, *tmp;
|
||||
|
||||
data = dev->dev.driver_data;
|
||||
|
||||
free_irq(data->irq, data);
|
||||
free_irq(data->irq, NULL);
|
||||
iounmap(data->pmi_reg);
|
||||
|
||||
spin_lock(&data->handler_spinlock);
|
||||
|
@ -202,7 +198,8 @@ static int pmi_of_remove(struct of_device *dev)
|
|||
|
||||
spin_unlock(&data->handler_spinlock);
|
||||
|
||||
kfree(dev->dev.driver_data);
|
||||
kfree(data);
|
||||
data = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -226,13 +223,13 @@ static void __exit pmi_module_exit(void)
|
|||
}
|
||||
module_exit(pmi_module_exit);
|
||||
|
||||
void pmi_send_message(struct of_device *device, pmi_message_t msg)
|
||||
int pmi_send_message(pmi_message_t msg)
|
||||
{
|
||||
struct pmi_data *data;
|
||||
unsigned long flags;
|
||||
DECLARE_COMPLETION_ONSTACK(completion);
|
||||
|
||||
data = device->dev.driver_data;
|
||||
if (!data)
|
||||
return -ENODEV;
|
||||
|
||||
mutex_lock(&data->msg_mutex);
|
||||
|
||||
|
@ -256,30 +253,26 @@ void pmi_send_message(struct of_device *device, pmi_message_t msg)
|
|||
data->completion = NULL;
|
||||
|
||||
mutex_unlock(&data->msg_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(pmi_send_message);
|
||||
|
||||
void pmi_register_handler(struct of_device *device,
|
||||
struct pmi_handler *handler)
|
||||
int pmi_register_handler(struct pmi_handler *handler)
|
||||
{
|
||||
struct pmi_data *data;
|
||||
data = device->dev.driver_data;
|
||||
|
||||
if (!data)
|
||||
return;
|
||||
return -ENODEV;
|
||||
|
||||
spin_lock(&data->handler_spinlock);
|
||||
list_add_tail(&handler->node, &data->handler);
|
||||
spin_unlock(&data->handler_spinlock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(pmi_register_handler);
|
||||
|
||||
void pmi_unregister_handler(struct of_device *device,
|
||||
struct pmi_handler *handler)
|
||||
void pmi_unregister_handler(struct pmi_handler *handler)
|
||||
{
|
||||
struct pmi_data *data;
|
||||
data = device->dev.driver_data;
|
||||
|
||||
if (!data)
|
||||
return;
|
||||
|
||||
|
|
|
@ -26,8 +26,9 @@
|
|||
#include <linux/profile.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/oprofile.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
|
||||
#include "oprofile_stats.h"
|
||||
#include "event_buffer.h"
|
||||
#include "cpu_buffer.h"
|
||||
|
|
|
@ -19,28 +19,10 @@ void free_event_buffer(void);
|
|||
|
||||
/* wake up the process sleeping on the event file */
|
||||
void wake_up_buffer_waiter(void);
|
||||
|
||||
/* Each escaped entry is prefixed by ESCAPE_CODE
|
||||
* then one of the following codes, then the
|
||||
* relevant data.
|
||||
*/
|
||||
#define ESCAPE_CODE ~0UL
|
||||
#define CTX_SWITCH_CODE 1
|
||||
#define CPU_SWITCH_CODE 2
|
||||
#define COOKIE_SWITCH_CODE 3
|
||||
#define KERNEL_ENTER_SWITCH_CODE 4
|
||||
#define KERNEL_EXIT_SWITCH_CODE 5
|
||||
#define MODULE_LOADED_CODE 6
|
||||
#define CTX_TGID_CODE 7
|
||||
#define TRACE_BEGIN_CODE 8
|
||||
#define TRACE_END_CODE 9
|
||||
|
||||
|
||||
#define INVALID_COOKIE ~0UL
|
||||
#define NO_COOKIE 0UL
|
||||
|
||||
/* add data to the event buffer */
|
||||
void add_event_entry(unsigned long data);
|
||||
|
||||
extern const struct file_operations event_buffer_fops;
|
||||
|
||||
/* mutex between sync_cpu_buffers() and the
|
||||
|
|
|
@ -53,9 +53,24 @@ int oprofile_setup(void)
|
|||
* us missing task deaths and eventually oopsing
|
||||
* when trying to process the event buffer.
|
||||
*/
|
||||
if (oprofile_ops.sync_start) {
|
||||
int sync_ret = oprofile_ops.sync_start();
|
||||
switch (sync_ret) {
|
||||
case 0:
|
||||
goto post_sync;
|
||||
case 1:
|
||||
goto do_generic;
|
||||
case -1:
|
||||
goto out3;
|
||||
default:
|
||||
goto out3;
|
||||
}
|
||||
}
|
||||
do_generic:
|
||||
if ((err = sync_start()))
|
||||
goto out3;
|
||||
|
||||
post_sync:
|
||||
is_setup = 1;
|
||||
mutex_unlock(&start_mutex);
|
||||
return 0;
|
||||
|
@ -118,7 +133,20 @@ out:
|
|||
void oprofile_shutdown(void)
|
||||
{
|
||||
mutex_lock(&start_mutex);
|
||||
if (oprofile_ops.sync_stop) {
|
||||
int sync_ret = oprofile_ops.sync_stop();
|
||||
switch (sync_ret) {
|
||||
case 0:
|
||||
goto post_sync;
|
||||
case 1:
|
||||
goto do_generic;
|
||||
default:
|
||||
goto post_sync;
|
||||
}
|
||||
}
|
||||
do_generic:
|
||||
sync_stop();
|
||||
post_sync:
|
||||
if (oprofile_ops.shutdown)
|
||||
oprofile_ops.shutdown();
|
||||
is_setup = 0;
|
||||
|
|
|
@ -39,14 +39,16 @@ struct op_system_config {
|
|||
|
||||
/* Per-arch configuration */
|
||||
struct op_powerpc_model {
|
||||
void (*reg_setup) (struct op_counter_config *,
|
||||
int (*reg_setup) (struct op_counter_config *,
|
||||
struct op_system_config *,
|
||||
int num_counters);
|
||||
void (*cpu_setup) (struct op_counter_config *);
|
||||
void (*start) (struct op_counter_config *);
|
||||
void (*global_start) (struct op_counter_config *);
|
||||
int (*cpu_setup) (struct op_counter_config *);
|
||||
int (*start) (struct op_counter_config *);
|
||||
int (*global_start) (struct op_counter_config *);
|
||||
void (*stop) (void);
|
||||
void (*global_stop) (void);
|
||||
int (*sync_start)(void);
|
||||
int (*sync_stop)(void);
|
||||
void (*handle_interrupt) (struct pt_regs *,
|
||||
struct op_counter_config *);
|
||||
int num_counters;
|
||||
|
|
|
@ -55,13 +55,13 @@ typedef struct {
|
|||
struct pmi_handler {
|
||||
struct list_head node;
|
||||
u8 type;
|
||||
void (*handle_pmi_message) (struct of_device *, pmi_message_t);
|
||||
void (*handle_pmi_message) (pmi_message_t);
|
||||
};
|
||||
|
||||
void pmi_register_handler(struct of_device *, struct pmi_handler *);
|
||||
void pmi_unregister_handler(struct of_device *, struct pmi_handler *);
|
||||
int pmi_register_handler(struct pmi_handler *);
|
||||
void pmi_unregister_handler(struct pmi_handler *);
|
||||
|
||||
void pmi_send_message(struct of_device *, pmi_message_t);
|
||||
int pmi_send_message(pmi_message_t);
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* _POWERPC_PMI_H */
|
||||
|
|
|
@ -107,10 +107,10 @@ struct spu_runqueue;
|
|||
struct device_node;
|
||||
|
||||
enum spu_utilization_state {
|
||||
SPU_UTIL_SYSTEM,
|
||||
SPU_UTIL_USER,
|
||||
SPU_UTIL_SYSTEM,
|
||||
SPU_UTIL_IOWAIT,
|
||||
SPU_UTIL_IDLE,
|
||||
SPU_UTIL_IDLE_LOADED,
|
||||
SPU_UTIL_MAX
|
||||
};
|
||||
|
||||
|
@ -121,9 +121,9 @@ struct spu {
|
|||
unsigned long problem_phys;
|
||||
struct spu_problem __iomem *problem;
|
||||
struct spu_priv2 __iomem *priv2;
|
||||
struct list_head list;
|
||||
struct list_head sched_list;
|
||||
struct list_head cbe_list;
|
||||
struct list_head full_list;
|
||||
enum { SPU_FREE, SPU_USED } alloc_state;
|
||||
int number;
|
||||
unsigned int irqs[3];
|
||||
u32 node;
|
||||
|
@ -137,6 +137,7 @@ struct spu {
|
|||
struct spu_runqueue *rq;
|
||||
unsigned long long timestamp;
|
||||
pid_t pid;
|
||||
pid_t tgid;
|
||||
int class_0_pending;
|
||||
spinlock_t register_lock;
|
||||
|
||||
|
@ -165,11 +166,14 @@ struct spu {
|
|||
|
||||
struct sys_device sysdev;
|
||||
|
||||
int has_mem_affinity;
|
||||
struct list_head aff_list;
|
||||
|
||||
struct {
|
||||
/* protected by interrupt reentrancy */
|
||||
enum spu_utilization_state utilization_state;
|
||||
unsigned long tstamp; /* time of last ctx switch */
|
||||
unsigned long times[SPU_UTIL_MAX];
|
||||
enum spu_utilization_state util_state;
|
||||
unsigned long long tstamp;
|
||||
unsigned long long times[SPU_UTIL_MAX];
|
||||
unsigned long long vol_ctx_switch;
|
||||
unsigned long long invol_ctx_switch;
|
||||
unsigned long long min_flt;
|
||||
|
@ -181,13 +185,29 @@ struct spu {
|
|||
} stats;
|
||||
};
|
||||
|
||||
struct spu *spu_alloc(void);
|
||||
struct spu *spu_alloc_node(int node);
|
||||
void spu_free(struct spu *spu);
|
||||
struct cbe_spu_info {
|
||||
struct mutex list_mutex;
|
||||
struct list_head spus;
|
||||
int n_spus;
|
||||
int nr_active;
|
||||
atomic_t reserved_spus;
|
||||
};
|
||||
|
||||
extern struct cbe_spu_info cbe_spu_info[];
|
||||
|
||||
void spu_init_channels(struct spu *spu);
|
||||
int spu_irq_class_0_bottom(struct spu *spu);
|
||||
int spu_irq_class_1_bottom(struct spu *spu);
|
||||
void spu_irq_setaffinity(struct spu *spu, int cpu);
|
||||
|
||||
#ifdef CONFIG_KEXEC
|
||||
void crash_register_spus(struct list_head *list);
|
||||
#else
|
||||
static inline void crash_register_spus(struct list_head *list)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
extern void spu_invalidate_slbs(struct spu *spu);
|
||||
extern void spu_associate_mm(struct spu *spu, struct mm_struct *mm);
|
||||
|
||||
|
@ -195,6 +215,20 @@ extern void spu_associate_mm(struct spu *spu, struct mm_struct *mm);
|
|||
struct mm_struct;
|
||||
extern void spu_flush_all_slbs(struct mm_struct *mm);
|
||||
|
||||
/* This interface allows a profiler (e.g., OProfile) to store a ref
|
||||
* to spu context information that it creates. This caching technique
|
||||
* avoids the need to recreate this information after a save/restore operation.
|
||||
*
|
||||
* Assumes the caller has already incremented the ref count to
|
||||
* profile_info; then spu_context_destroy must call kref_put
|
||||
* on prof_info_kref.
|
||||
*/
|
||||
void spu_set_profile_private_kref(struct spu_context *ctx,
|
||||
struct kref *prof_info_kref,
|
||||
void ( * prof_info_release) (struct kref *kref));
|
||||
|
||||
void *spu_get_profile_private_kref(struct spu_context *ctx);
|
||||
|
||||
/* system callbacks from the SPU */
|
||||
struct spu_syscall_block {
|
||||
u64 nr_ret;
|
||||
|
@ -206,7 +240,8 @@ extern long spu_sys_callback(struct spu_syscall_block *s);
|
|||
struct file;
|
||||
extern struct spufs_calls {
|
||||
asmlinkage long (*create_thread)(const char __user *name,
|
||||
unsigned int flags, mode_t mode);
|
||||
unsigned int flags, mode_t mode,
|
||||
struct file *neighbor);
|
||||
asmlinkage long (*spu_run)(struct file *filp, __u32 __user *unpc,
|
||||
__u32 __user *ustatus);
|
||||
struct module *owner;
|
||||
|
@ -233,8 +268,10 @@ struct spu_coredump_calls {
|
|||
#define SPU_CREATE_GANG 0x0002
|
||||
#define SPU_CREATE_NOSCHED 0x0004
|
||||
#define SPU_CREATE_ISOLATE 0x0008
|
||||
#define SPU_CREATE_AFFINITY_SPU 0x0010
|
||||
#define SPU_CREATE_AFFINITY_MEM 0x0020
|
||||
|
||||
#define SPU_CREATE_FLAG_ALL 0x000f /* mask of all valid flags */
|
||||
#define SPU_CREATE_FLAG_ALL 0x003f /* mask of all valid flags */
|
||||
|
||||
|
||||
#ifdef CONFIG_SPU_FS_MODULE
|
||||
|
@ -403,6 +440,7 @@ struct spu_priv2 {
|
|||
#define MFC_CNTL_RESUME_DMA_QUEUE (0ull << 0)
|
||||
#define MFC_CNTL_SUSPEND_DMA_QUEUE (1ull << 0)
|
||||
#define MFC_CNTL_SUSPEND_DMA_QUEUE_MASK (1ull << 0)
|
||||
#define MFC_CNTL_SUSPEND_MASK (1ull << 4)
|
||||
#define MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION (0ull << 8)
|
||||
#define MFC_CNTL_SUSPEND_IN_PROGRESS (1ull << 8)
|
||||
#define MFC_CNTL_SUSPEND_COMPLETE (3ull << 8)
|
||||
|
|
|
@ -50,6 +50,12 @@
|
|||
#define SPU_STOPPED_STATUS_P_I 8
|
||||
#define SPU_STOPPED_STATUS_R 9
|
||||
|
||||
/*
|
||||
* Definitions for software decrementer status flag.
|
||||
*/
|
||||
#define SPU_DECR_STATUS_RUNNING 0x1
|
||||
#define SPU_DECR_STATUS_WRAPPED 0x2
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
/**
|
||||
* spu_reg128 - generic 128-bit register definition.
|
||||
|
@ -63,7 +69,7 @@ struct spu_reg128 {
|
|||
* @gprs: Array of saved registers.
|
||||
* @fpcr: Saved floating point status control register.
|
||||
* @decr: Saved decrementer value.
|
||||
* @decr_status: Indicates decrementer run status.
|
||||
* @decr_status: Indicates software decrementer status flags.
|
||||
* @ppu_mb: Saved PPU mailbox data.
|
||||
* @ppuint_mb: Saved PPU interrupting mailbox data.
|
||||
* @tag_mask: Saved tag group mask.
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
#ifdef CONFIG_PROFILING
|
||||
|
||||
#include <linux/dcache.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
struct dcookie_user;
|
||||
|
|
|
@ -20,7 +20,8 @@
|
|||
#define EM_PARISC 15 /* HPPA */
|
||||
#define EM_SPARC32PLUS 18 /* Sun's "v8plus" */
|
||||
#define EM_PPC 20 /* PowerPC */
|
||||
#define EM_PPC64 21 /* PowerPC64 */
|
||||
#define EM_PPC64 21 /* PowerPC64 */
|
||||
#define EM_SPU 23 /* Cell BE SPU */
|
||||
#define EM_SH 42 /* SuperH */
|
||||
#define EM_SPARCV9 43 /* SPARC v9 64-bit */
|
||||
#define EM_IA_64 50 /* HP/Intel IA-64 */
|
||||
|
|
|
@ -17,6 +17,26 @@
|
|||
#include <linux/spinlock.h>
|
||||
#include <asm/atomic.h>
|
||||
|
||||
/* Each escaped entry is prefixed by ESCAPE_CODE
|
||||
* then one of the following codes, then the
|
||||
* relevant data.
|
||||
* These #defines live in this file so that arch-specific
|
||||
* buffer sync'ing code can access them.
|
||||
*/
|
||||
#define ESCAPE_CODE ~0UL
|
||||
#define CTX_SWITCH_CODE 1
|
||||
#define CPU_SWITCH_CODE 2
|
||||
#define COOKIE_SWITCH_CODE 3
|
||||
#define KERNEL_ENTER_SWITCH_CODE 4
|
||||
#define KERNEL_EXIT_SWITCH_CODE 5
|
||||
#define MODULE_LOADED_CODE 6
|
||||
#define CTX_TGID_CODE 7
|
||||
#define TRACE_BEGIN_CODE 8
|
||||
#define TRACE_END_CODE 9
|
||||
#define XEN_ENTER_SWITCH_CODE 10
|
||||
#define SPU_PROFILING_CODE 11
|
||||
#define SPU_CTX_SWITCH_CODE 12
|
||||
|
||||
struct super_block;
|
||||
struct dentry;
|
||||
struct file_operations;
|
||||
|
@ -35,6 +55,14 @@ struct oprofile_operations {
|
|||
int (*start)(void);
|
||||
/* Stop delivering interrupts. */
|
||||
void (*stop)(void);
|
||||
/* Arch-specific buffer sync functions.
|
||||
* Return value = 0: Success
|
||||
* Return value = -1: Failure
|
||||
* Return value = 1: Run generic sync function
|
||||
*/
|
||||
int (*sync_start)(void);
|
||||
int (*sync_stop)(void);
|
||||
|
||||
/* Initiate a stack backtrace. Optional. */
|
||||
void (*backtrace)(struct pt_regs * const regs, unsigned int depth);
|
||||
/* CPU identification string. */
|
||||
|
@ -55,6 +83,13 @@ int oprofile_arch_init(struct oprofile_operations * ops);
|
|||
*/
|
||||
void oprofile_arch_exit(void);
|
||||
|
||||
/**
|
||||
* Add data to the event buffer.
|
||||
* The data passed is free-form, but typically consists of
|
||||
* file offsets, dcookies, context information, and ESCAPE codes.
|
||||
*/
|
||||
void add_event_entry(unsigned long data);
|
||||
|
||||
/**
|
||||
* Add a sample. This may be called from any context. Pass
|
||||
* smp_processor_id() as cpu.
|
||||
|
|
|
@ -549,7 +549,7 @@ asmlinkage long sys_inotify_rm_watch(int fd, u32 wd);
|
|||
asmlinkage long sys_spu_run(int fd, __u32 __user *unpc,
|
||||
__u32 __user *ustatus);
|
||||
asmlinkage long sys_spu_create(const char __user *name,
|
||||
unsigned int flags, mode_t mode);
|
||||
unsigned int flags, mode_t mode, int fd);
|
||||
|
||||
asmlinkage long sys_mknodat(int dfd, const char __user * filename, int mode,
|
||||
unsigned dev);
|
||||
|
|
Loading…
Reference in New Issue