Commit 99ee21ee authored by John Levon's avatar John Levon Committed by Linus Torvalds

[PATCH] oprofile - core

Add the oprofile core.  The core design is very similar to that we
discussed in private mail.  The nasty details should be documented in
the patch below.
parent 7b401a13
No related merge requests found
/**
* @file buffer_sync.c
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon <levon@movementarian.org>
*
* This is the core of the buffer management. Each
* CPU buffer is processed and entered into the
* global event buffer. Such processing is necessary
* in several circumstances, mentioned below.
*
* The processing does the job of converting the
* transitory EIP value into a persistent dentry/offset
* value that the profiler can record at its leisure.
*
* See fs/dcookies.c for a description of the dentry/offset
* objects.
*/
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/timer.h>
#include <linux/dcookies.h>
#include <linux/notifier.h>
#include <linux/profile.h>
#include <linux/workqueue.h>
#include "event_buffer.h"
#include "cpu_buffer.h"
#include "oprofile_stats.h"
#define DEFAULT_EXPIRE (HZ / 4)
static void wq_sync_buffers(void *);
static DECLARE_WORK(sync_wq, wq_sync_buffers, 0);
static struct timer_list sync_timer;
static void timer_ping(unsigned long data);
static void sync_cpu_buffers(void);
/* We must make sure to process every entry in the CPU buffers
* before a task got the PF_EXITING flag, otherwise we will hold
* references to a possibly freed task_struct. We are safe with
* samples past the PF_EXITING point in do_exit(), because we
* explicitly check for that in cpu_buffer.c
*/
static int exit_task_notify(struct notifier_block * self, unsigned long val, void * data)
{
sync_cpu_buffers();
return 0;
}
/* There are two cases of tasks modifying task->mm->mmap list we
* must concern ourselves with. First, when a task is about to
* exit (exit_mmap()), we should process the buffer to deal with
* any samples in the CPU buffer, before we lose the ->mmap information
* we need. Second, a task may unmap (part of) an executable mmap,
* so we want to process samples before that happens too
*/
static int mm_notify(struct notifier_block * self, unsigned long val, void * data)
{
sync_cpu_buffers();
return 0;
}
static struct notifier_block exit_task_nb = {
.notifier_call = exit_task_notify,
};
static struct notifier_block exec_unmap_nb = {
.notifier_call = mm_notify,
};
static struct notifier_block exit_mmap_nb = {
.notifier_call = mm_notify,
};
int sync_start(void)
{
int err = profile_event_register(EXIT_TASK, &exit_task_nb);
if (err)
goto out;
err = profile_event_register(EXIT_MMAP, &exit_mmap_nb);
if (err)
goto out2;
err = profile_event_register(EXEC_UNMAP, &exec_unmap_nb);
if (err)
goto out3;
sync_timer.function = timer_ping;
sync_timer.expires = jiffies + DEFAULT_EXPIRE;
add_timer(&sync_timer);
out:
return err;
out3:
profile_event_unregister(EXIT_MMAP, &exit_mmap_nb);
out2:
profile_event_unregister(EXIT_TASK, &exit_task_nb);
goto out;
}
void sync_stop(void)
{
profile_event_unregister(EXIT_TASK, &exit_task_nb);
profile_event_unregister(EXIT_MMAP, &exit_mmap_nb);
profile_event_unregister(EXEC_UNMAP, &exec_unmap_nb);
del_timer_sync(&sync_timer);
}
/* Optimisation. We can manage without taking the dcookie sem
* because we cannot reach this code without at least one
* dcookie user still being registered (namely, the reader
* of the event buffer). */
static inline unsigned long fast_get_dcookie(struct dentry * dentry,
struct vfsmount * vfsmnt)
{
unsigned long cookie;
if (dentry->d_cookie)
return (unsigned long)dentry;
get_dcookie(dentry, vfsmnt, &cookie);
return cookie;
}
/* Look up the dcookie for the task's first VM_EXECUTABLE mapping,
* which corresponds loosely to "application name". This is
* not strictly necessary but allows oprofile to associate
* shared-library samples with particular applications
*/
static unsigned long get_exec_dcookie(struct mm_struct * mm)
{
unsigned long cookie = 0;
struct vm_area_struct * vma;
if (!mm)
goto out;
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (!vma->vm_file)
continue;
if (!vma->vm_flags & VM_EXECUTABLE)
continue;
cookie = fast_get_dcookie(vma->vm_file->f_dentry,
vma->vm_file->f_vfsmnt);
break;
}
out:
return cookie;
}
/* Convert the EIP value of a sample into a persistent dentry/offset
* pair that can then be added to the global event buffer. We make
* sure to do this lookup before a mm->mmap modification happens so
* we don't lose track.
*/
static unsigned long lookup_dcookie(struct mm_struct * mm, unsigned long addr, off_t * offset)
{
unsigned long cookie = 0;
struct vm_area_struct * vma;
for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
if (!vma)
goto out;
if (!vma->vm_file)
continue;
if (addr < vma->vm_start || addr >= vma->vm_end)
continue;
cookie = fast_get_dcookie(vma->vm_file->f_dentry,
vma->vm_file->f_vfsmnt);
*offset = (vma->vm_pgoff << PAGE_SHIFT) + addr - vma->vm_start;
break;
}
out:
return cookie;
}
static unsigned long last_cookie = ~0UL;
static void add_cpu_switch(int i)
{
add_event_entry(ESCAPE_CODE);
add_event_entry(CPU_SWITCH_CODE);
add_event_entry(i);
last_cookie = ~0UL;
}
static void add_ctx_switch(pid_t pid, unsigned long cookie)
{
add_event_entry(ESCAPE_CODE);
add_event_entry(CTX_SWITCH_CODE);
add_event_entry(pid);
add_event_entry(cookie);
}
static void add_cookie_switch(unsigned long cookie)
{
add_event_entry(ESCAPE_CODE);
add_event_entry(COOKIE_SWITCH_CODE);
add_event_entry(cookie);
}
static void add_sample_entry(unsigned long offset, unsigned long event)
{
add_event_entry(offset);
add_event_entry(event);
}
static void add_us_sample(struct mm_struct * mm, struct op_sample * s)
{
unsigned long cookie;
off_t offset;
cookie = lookup_dcookie(mm, s->eip, &offset);
if (!cookie)
return;
if (cookie != last_cookie) {
add_cookie_switch(cookie);
last_cookie = cookie;
}
add_sample_entry(offset, s->event);
}
static inline int is_kernel(unsigned long val)
{
return val > __PAGE_OFFSET;
}
/* Add a sample to the global event buffer. If possible the
* sample is converted into a persistent dentry/offset pair
* for later lookup from userspace.
*/
static void add_sample(struct mm_struct * mm, struct op_sample * s)
{
if (is_kernel(s->eip)) {
add_sample_entry(s->eip, s->event);
} else if (mm) {
add_us_sample(mm, s);
}
}
static void release_mm(struct mm_struct * mm)
{
if (mm)
up_read(&mm->mmap_sem);
}
/* Take the task's mmap_sem to protect ourselves from
* races when we do lookup_dcookie().
*/
static struct mm_struct * take_task_mm(struct task_struct * task)
{
struct mm_struct * mm;
task_lock(task);
mm = task->mm;
task_unlock(task);
/* if task->mm !NULL, mm_count must be at least 1. It cannot
* drop to 0 without the task exiting, which will have to sleep
* on buffer_sem first. So we do not need to mark mm_count
* ourselves.
*/
if (mm) {
/* More ugliness. If a task took its mmap
* sem then came to sleep on buffer_sem we
* will deadlock waiting for it. So we can
* but try. This will lose samples :/
*/
if (!down_read_trylock(&mm->mmap_sem)) {
/* FIXME: this underestimates samples lost */
atomic_inc(&oprofile_stats.sample_lost_mmap_sem);
mm = NULL;
}
}
return mm;
}
static inline int is_ctx_switch(unsigned long val)
{
return val == ~0UL;
}
/* Sync one of the CPU's buffers into the global event buffer.
* Here we need to go through each batch of samples punctuated
* by context switch notes, taking the task's mmap_sem and doing
* lookup in task->mm->mmap to convert EIP into dcookie/offset
* value.
*/
static void sync_buffer(struct oprofile_cpu_buffer * cpu_buf)
{
struct mm_struct * mm = 0;
struct task_struct * new;
unsigned long cookie;
int i;
for (i=0; i < cpu_buf->pos; ++i) {
struct op_sample * s = &cpu_buf->buffer[i];
if (is_ctx_switch(s->eip)) {
new = (struct task_struct *)s->event;
release_mm(mm);
mm = take_task_mm(new);
cookie = get_exec_dcookie(mm);
add_ctx_switch(new->pid, cookie);
} else {
add_sample(mm, s);
}
}
release_mm(mm);
cpu_buf->pos = 0;
}
/* Process each CPU's local buffer into the global
* event buffer.
*/
static void sync_cpu_buffers(void)
{
int i;
down(&buffer_sem);
for (i = 0; i < NR_CPUS; ++i) {
struct oprofile_cpu_buffer * cpu_buf;
if (!cpu_possible(i))
continue;
cpu_buf = &cpu_buffer[i];
/* We take a spin lock even though we might
* sleep. It's OK because other users are try
* lockers only, and this region is already
* protected by buffer_sem. It's raw to prevent
* the preempt bogometer firing. Fruity, huh ? */
_raw_spin_lock(&cpu_buf->int_lock);
add_cpu_switch(i);
sync_buffer(cpu_buf);
_raw_spin_unlock(&cpu_buf->int_lock);
}
up(&buffer_sem);
mod_timer(&sync_timer, jiffies + DEFAULT_EXPIRE);
}
static void wq_sync_buffers(void * data)
{
sync_cpu_buffers();
}
/* It is possible that we could have no munmap() or
* other events for a period of time. This will lead
* the CPU buffers to overflow and lose samples and
* context switches. We try to reduce the problem
* by timing out when nothing happens for a while.
*/
static void timer_ping(unsigned long data)
{
schedule_work(&sync_wq);
/* timer is re-added by the scheduled task */
}
/**
* @file buffer_sync.h
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon <levon@movementarian.org>
*/
#ifndef OPROFILE_BUFFER_SYNC_H
#define OPROFILE_BUFFER_SYNC_H
/* add the necessary profiling hooks */
int sync_start(void);
/* remove the hooks */
void sync_stop(void);
#endif /* OPROFILE_BUFFER_SYNC_H */
/**
* @file cpu_buffer.c
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon <levon@movementarian.org>
*
* Each CPU has a local buffer that stores PC value/event
* pairs. We also log context switches when we notice them.
* Eventually each CPU's buffer is processed into the global
* event buffer by sync_cpu_buffers().
*
* We use a local buffer for two reasons: an NMI or similar
* interrupt cannot synchronise, and high sampling rates
* would lead to catastrophic global synchronisation if
* a global buffer was used.
*/
#include <linux/sched.h>
#include <linux/vmalloc.h>
#include <linux/smp.h>
#include "cpu_buffer.h"
#include "oprof.h"
#include "oprofile_stats.h"
struct oprofile_cpu_buffer cpu_buffer[NR_CPUS] __cacheline_aligned;
static unsigned long buffer_size;
static void __free_cpu_buffers(int num)
{
int i;
for (i=0; i < num; ++i) {
struct oprofile_cpu_buffer * b = &cpu_buffer[i];
if (!cpu_possible(i))
continue;
vfree(b->buffer);
}
}
int alloc_cpu_buffers(void)
{
int i;
buffer_size = fs_cpu_buffer_size;
for (i=0; i < NR_CPUS; ++i) {
struct oprofile_cpu_buffer * b = &cpu_buffer[i];
if (!cpu_possible(i))
continue;
b->buffer = vmalloc(sizeof(struct op_sample) * buffer_size);
if (!b->buffer)
goto fail;
spin_lock_init(&b->int_lock);
b->pos = 0;
b->last_task = 0;
b->sample_received = 0;
b->sample_lost_locked = 0;
b->sample_lost_overflow = 0;
}
return 0;
fail:
__free_cpu_buffers(i);
return -ENOMEM;
}
void free_cpu_buffers(void)
{
__free_cpu_buffers(NR_CPUS);
}
/* Note we can't use a semaphore here as this is supposed to
* be safe from any context. Instead we trylock the CPU's int_lock.
* int_lock is taken by the processing code in sync_cpu_buffers()
* so we avoid disturbing that.
*/
void oprofile_add_sample(unsigned long eip, unsigned long event, int cpu)
{
struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[cpu];
struct task_struct * task;
/* temporary ? */
BUG_ON(!oprofile_started);
cpu_buf->sample_received++;
if (!spin_trylock(&cpu_buf->int_lock)) {
cpu_buf->sample_lost_locked++;
return;
}
if (cpu_buf->pos > buffer_size - 2) {
cpu_buf->sample_lost_overflow++;
goto out;
}
task = current;
/* notice a task switch */
if (cpu_buf->last_task != task) {
cpu_buf->last_task = task;
if (!(task->flags & PF_EXITING)) {
cpu_buf->buffer[cpu_buf->pos].eip = ~0UL;
cpu_buf->buffer[cpu_buf->pos].event = (unsigned long)task;
cpu_buf->pos++;
}
}
/* If the task is exiting it's not safe to take a sample
* as the task_struct is about to be freed. We can't just
* notify at release_task() time because of CLONE_DETACHED
* tasks that release_task() themselves.
*/
if (task->flags & PF_EXITING) {
cpu_buf->sample_lost_task_exit++;
goto out;
}
cpu_buf->buffer[cpu_buf->pos].eip = eip;
cpu_buf->buffer[cpu_buf->pos].event = event;
cpu_buf->pos++;
out:
spin_unlock(&cpu_buf->int_lock);
}
/**
* @file cpu_buffer.h
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon <levon@movementarian.org>
*/
#ifndef OPROFILE_CPU_BUFFER_H
#define OPROFILE_CPU_BUFFER_H
#include <linux/types.h>
#include <linux/spinlock.h>
struct task_struct;
/* allocate a sample buffer for each CPU */
int alloc_cpu_buffers(void);
void free_cpu_buffers(void);
/* CPU buffer is composed of such entries (which are
* also used for context switch notes)
*/
struct op_sample {
unsigned long eip;
unsigned long event;
};
struct oprofile_cpu_buffer {
spinlock_t int_lock;
/* protected by int_lock */
unsigned long pos;
struct task_struct * last_task;
struct op_sample * buffer;
unsigned long sample_received;
unsigned long sample_lost_locked;
unsigned long sample_lost_overflow;
unsigned long sample_lost_task_exit;
} ____cacheline_aligned;
extern struct oprofile_cpu_buffer cpu_buffer[];
#endif /* OPROFILE_CPU_BUFFER_H */
/**
* @file event_buffer.c
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon <levon@movementarian.org>
*
* This is the global event buffer that the user-space
* daemon reads from. The event buffer is an untyped array
* of unsigned longs. Entries are prefixed by the
* escape value ESCAPE_CODE followed by an identifying code.
*/
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/vmalloc.h>
#include <linux/smp.h>
#include <linux/dcookies.h>
#include <linux/oprofile.h>
#include <asm/uaccess.h>
#include <asm/atomic.h>
#include "event_buffer.h"
#include "cpu_buffer.h"
#include "oprof.h"
#include "oprofile_stats.h"
DECLARE_MUTEX(buffer_sem);
static unsigned long buffer_opened;
static DECLARE_WAIT_QUEUE_HEAD(buffer_wait);
static unsigned long * event_buffer;
static unsigned long buffer_size;
static unsigned long buffer_watershed;
static size_t buffer_pos;
/* atomic_t because wait_event checks it outside of buffer_sem */
static atomic_t buffer_ready = ATOMIC_INIT(0);
/* Add an entry to the event buffer. When we
* get near to the end we wake up the process
* sleeping on the read() of the file.
*/
void add_event_entry(unsigned long value)
{
if (buffer_pos == buffer_size) {
atomic_inc(&oprofile_stats.event_lost_overflow);
return;
}
event_buffer[buffer_pos] = value;
if (++buffer_pos == buffer_size - buffer_watershed) {
atomic_set(&buffer_ready, 1);
wake_up(&buffer_wait);
}
}
/* Wake up the waiting process if any. This happens
* on "echo 0 >/dev/oprofile/enable" so the daemon
* processes the data remaining in the event buffer.
*/
void wake_up_buffer_waiter(void)
{
down(&buffer_sem);
atomic_set(&buffer_ready, 1);
wake_up(&buffer_wait);
up(&buffer_sem);
}
int alloc_event_buffer(void)
{
int err = -ENOMEM;
spin_lock(&oprofilefs_lock);
buffer_size = fs_buffer_size;
buffer_watershed = fs_buffer_watershed;
spin_unlock(&oprofilefs_lock);
if (buffer_watershed >= buffer_size)
return -EINVAL;
event_buffer = vmalloc(sizeof(unsigned long) * buffer_size);
if (!event_buffer)
goto out;
err = 0;
out:
return err;
}
void free_event_buffer(void)
{
vfree(event_buffer);
}
int event_buffer_open(struct inode * inode, struct file * file)
{
int err = -EPERM;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (test_and_set_bit(0, &buffer_opened))
return -EBUSY;
/* Register as a user of dcookies
* to ensure they persist for the lifetime of
* the open event file
*/
err = -EINVAL;
file->private_data = dcookie_register();
if (!file->private_data)
goto out;
if ((err = oprofile_setup()))
goto fail;
/* NB: the actual start happens from userspace
* echo 1 >/dev/oprofile/enable
*/
return 0;
fail:
dcookie_unregister(file->private_data);
out:
clear_bit(0, &buffer_opened);
return err;
}
int event_buffer_release(struct inode * inode, struct file * file)
{
oprofile_stop();
oprofile_shutdown();
dcookie_unregister(file->private_data);
buffer_pos = 0;
atomic_set(&buffer_ready, 0);
clear_bit(0, &buffer_opened);
return 0;
}
ssize_t event_buffer_read(struct file * file, char * buf, size_t count, loff_t * offset)
{
int retval = -EINVAL;
size_t const max = buffer_size * sizeof(unsigned long);
/* handling partial reads is more trouble than it's worth */
if (count != max || *offset)
return -EINVAL;
/* wait for the event buffer to fill up with some data */
wait_event_interruptible(buffer_wait, atomic_read(&buffer_ready));
if (signal_pending(current))
return -EINTR;
down(&buffer_sem);
atomic_set(&buffer_ready, 0);
retval = -EFAULT;
count = buffer_pos * sizeof(unsigned long);
if (copy_to_user(buf, event_buffer, count))
goto out;
retval = count;
buffer_pos = 0;
out:
up(&buffer_sem);
return retval;
}
struct file_operations event_buffer_fops = {
.open = event_buffer_open,
.release = event_buffer_release,
.read = event_buffer_read,
};
/**
* @file event_buffer.h
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon <levon@movementarian.org>
*/
#ifndef EVENT_BUFFER_H
#define EVENT_BUFFER_H
#include <linux/types.h>
#include <linux/sem.h>
int alloc_event_buffer(void);
void free_event_buffer(void);
/* wake up the process sleeping on the event file */
void wake_up_buffer_waiter(void);
/* Each escaped entry is prefixed by ESCAPE_CODE
* then one of the following codes, then the
* relevant data.
*/
#define ESCAPE_CODE ~0UL
#define CTX_SWITCH_CODE 1
#define CPU_SWITCH_CODE 2
#define COOKIE_SWITCH_CODE 3
/* add data to the event buffer */
void add_event_entry(unsigned long data);
extern struct file_operations event_buffer_fops;
/* mutex between sync_cpu_buffers() and the
* file reading code.
*/
extern struct semaphore buffer_sem;
#endif /* EVENT_BUFFER_H */
/**
* @file oprof.c
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon <levon@movementarian.org>
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/dcookies.h>
#include <linux/notifier.h>
#include <linux/profile.h>
#include <linux/oprofile.h>
#include "oprof.h"
#include "event_buffer.h"
#include "cpu_buffer.h"
#include "buffer_sync.h"
#include "oprofile_stats.h"
struct oprofile_operations * oprofile_ops;
enum oprofile_cpu oprofile_cpu_type;
unsigned long oprofile_started;
static unsigned long is_setup;
static DECLARE_MUTEX(start_sem);
int oprofile_setup(void)
{
int err;
if ((err = alloc_cpu_buffers()))
goto out;
if ((err = alloc_event_buffer()))
goto out1;
if (oprofile_ops->setup && (err = oprofile_ops->setup()))
goto out2;
/* Note even though this starts part of the
* profiling overhead, it's necessary to prevent
* us missing task deaths and eventually oopsing
* when trying to process the event buffer.
*/
if ((err = sync_start()))
goto out3;
down(&start_sem);
is_setup = 1;
up(&start_sem);
return 0;
out3:
if (oprofile_ops->shutdown)
oprofile_ops->shutdown();
out2:
free_event_buffer();
out1:
free_cpu_buffers();
out:
return err;
}
/* Actually start profiling (echo 1>/dev/oprofile/enable) */
int oprofile_start(void)
{
int err = -EINVAL;
down(&start_sem);
if (!is_setup)
goto out;
err = 0;
if (oprofile_started)
goto out;
if ((err = oprofile_ops->start()))
goto out;
oprofile_started = 1;
oprofile_reset_stats();
out:
up(&start_sem);
return err;
}
/* echo 0>/dev/oprofile/enable */
void oprofile_stop(void)
{
down(&start_sem);
if (!oprofile_started)
goto out;
oprofile_ops->stop();
oprofile_started = 0;
/* wake up the daemon to read what remains */
wake_up_buffer_waiter();
out:
up(&start_sem);
}
void oprofile_shutdown(void)
{
sync_stop();
if (oprofile_ops->shutdown)
oprofile_ops->shutdown();
/* down() is also necessary to synchronise all pending events
* before freeing */
down(&buffer_sem);
is_setup = 0;
up(&buffer_sem);
free_event_buffer();
free_cpu_buffers();
}
static int __init oprofile_init(void)
{
int err;
/* Architecture must fill in the interrupt ops and the
* logical CPU type.
*/
err = oprofile_arch_init(&oprofile_ops, &oprofile_cpu_type);
if (err)
goto out;
err = oprofilefs_register();
if (err)
goto out;
out:
return err;
}
static void __exit oprofile_exit(void)
{
oprofilefs_unregister();
}
MODULE_LICENSE("GPL");
module_init(oprofile_init);
module_exit(oprofile_exit);
/**
* @file oprof.h
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon <levon@movementarian.org>
*/
#ifndef OPROF_H
#define OPROF_H
#include <linux/spinlock.h>
#include <linux/oprofile.h>
int oprofile_setup(void);
void oprofile_shutdown(void);
int oprofilefs_register(void);
void oprofilefs_unregister(void);
int oprofile_start(void);
void oprofile_stop(void);
extern unsigned long fs_buffer_size;
extern unsigned long fs_cpu_buffer_size;
extern unsigned long fs_buffer_watershed;
extern enum oprofile_cpu oprofile_cpu_type;
extern struct oprofile_operations * oprofile_ops;
extern unsigned long oprofile_started;
void oprofile_create_files(struct super_block * sb, struct dentry * root);
#endif /* OPROF_H */
/**
* @file oprofile_files.c
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon <levon@movementarian.org>
*/
#include <linux/oprofile.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <asm/uaccess.h>
#include "oprof.h"
#include "event_buffer.h"
#include "oprofile_stats.h"
unsigned long fs_buffer_size = 131072;
unsigned long fs_cpu_buffer_size = 8192;
unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */
static int simple_open(struct inode * inode, struct file * filp)
{
return 0;
}
static ssize_t cpu_type_read(struct file * file, char * buf, size_t count, loff_t * offset)
{
unsigned long cpu_type = oprofile_cpu_type;
return oprofilefs_ulong_to_user(&cpu_type, buf, count, offset);
}
static struct file_operations cpu_type_fops = {
.open = simple_open,
.read = cpu_type_read,
};
static ssize_t enable_read(struct file * file, char * buf, size_t count, loff_t * offset)
{
return oprofilefs_ulong_to_user(&oprofile_started, buf, count, offset);
}
static ssize_t enable_write(struct file *file, char const * buf, size_t count, loff_t * offset)
{
unsigned long val;
int retval;
if (*offset)
return -EINVAL;
retval = oprofilefs_ulong_from_user(&val, buf, count);
if (retval)
return retval;
if (val)
retval = oprofile_start();
else
oprofile_stop();
if (retval)
return retval;
return count;
}
static struct file_operations enable_fops = {
.open = simple_open,
.read = enable_read,
.write = enable_write,
};
void oprofile_create_files(struct super_block * sb, struct dentry * root)
{
oprofilefs_create_file(sb, root, "enable", &enable_fops);
oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size);
oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed);
oprofilefs_create_ulong(sb, root, "cpu_buffer_size", &fs_cpu_buffer_size);
oprofilefs_create_file(sb, root, "cpu_type", &cpu_type_fops);
oprofile_create_stats_files(sb, root);
if (oprofile_ops->create_files)
oprofile_ops->create_files(sb, root);
}
/**
* @file oprofile_stats.c
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon
*/
#include <linux/oprofile.h>
#include <linux/smp.h>
#include "oprofile_stats.h"
#include "cpu_buffer.h"
struct oprofile_stat_struct oprofile_stats;
void oprofile_reset_stats(void)
{
struct oprofile_cpu_buffer * cpu_buf;
int i;
for (i = 0; i < NR_CPUS; ++i) {
if (!cpu_possible(i))
continue;
cpu_buf = &cpu_buffer[i];
cpu_buf->sample_received = 0;
cpu_buf->sample_lost_locked = 0;
cpu_buf->sample_lost_overflow = 0;
cpu_buf->sample_lost_task_exit = 0;
}
atomic_set(&oprofile_stats.sample_lost_mmap_sem, 0);
atomic_set(&oprofile_stats.event_lost_overflow, 0);
}
void oprofile_create_stats_files(struct super_block * sb, struct dentry * root)
{
struct oprofile_cpu_buffer * cpu_buf;
struct dentry * cpudir;
struct dentry * dir;
char buf[10];
int i;
dir = oprofilefs_mkdir(sb, root, "stats");
if (!dir)
return;
for (i = 0; i < NR_CPUS; ++i) {
if (!cpu_possible(i))
continue;
cpu_buf = &cpu_buffer[i];
snprintf(buf, 6, "cpu%d", i);
cpudir = oprofilefs_mkdir(sb, dir, buf);
/* Strictly speaking access to these ulongs is racy,
* but we can't simply lock them, and they are
* informational only.
*/
oprofilefs_create_ro_ulong(sb, cpudir, "sample_received",
&cpu_buf->sample_received);
oprofilefs_create_ro_ulong(sb, cpudir, "sample_lost_locked",
&cpu_buf->sample_lost_locked);
oprofilefs_create_ro_ulong(sb, cpudir, "sample_lost_overflow",
&cpu_buf->sample_lost_overflow);
oprofilefs_create_ro_ulong(sb, cpudir, "sample_lost_task_exit",
&cpu_buf->sample_lost_task_exit);
}
oprofilefs_create_ro_atomic(sb, dir, "sample_lost_mmap_sem",
&oprofile_stats.sample_lost_mmap_sem);
oprofilefs_create_ro_atomic(sb, dir, "event_lost_overflow",
&oprofile_stats.event_lost_overflow);
}
/**
* @file oprofile_stats.h
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon
*/
#ifndef OPROFILE_STATS_H
#define OPROFILE_STATS_H
#include <asm/atomic.h>
struct oprofile_stat_struct {
atomic_t sample_lost_mmap_sem;
atomic_t event_lost_overflow;
};
extern struct oprofile_stat_struct oprofile_stats;
/* reset all stats to zero */
void oprofile_reset_stats(void);
struct super_block;
struct dentry;
/* create the stats/ dir */
void oprofile_create_stats_files(struct super_block * sb, struct dentry * root);
#endif /* OPROFILE_STATS_H */
/**
* @file oprofilefs.c
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon
*
* A simple filesystem for configuration and
* access of oprofile.
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/pagemap.h>
#include <linux/fs.h>
#include <linux/dcache.h>
#include <linux/file.h>
#include <linux/namei.h>
#include <linux/oprofile.h>
#include <asm/uaccess.h>
#include "oprof.h"
#define OPROFILEFS_MAGIC 0x6f70726f
spinlock_t oprofilefs_lock = SPIN_LOCK_UNLOCKED;
static struct inode * oprofilefs_get_inode(struct super_block * sb, int mode)
{
struct inode * inode = new_inode(sb);
if (inode) {
inode->i_mode = mode;
inode->i_uid = 0;
inode->i_gid = 0;
inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
}
return inode;
}
static struct super_operations s_ops = {
.statfs = simple_statfs,
.drop_inode = generic_delete_inode,
};
#define TMPBUFSIZE 50
ssize_t oprofilefs_ulong_to_user(unsigned long * val, char * buf, size_t count, loff_t * offset)
{
char tmpbuf[TMPBUFSIZE];
size_t maxlen;
if (!count)
return 0;
spin_lock(&oprofilefs_lock);
maxlen = snprintf(tmpbuf, TMPBUFSIZE, "%lu\n", *val);
spin_unlock(&oprofilefs_lock);
if (maxlen > TMPBUFSIZE)
maxlen = TMPBUFSIZE;
if (*offset > maxlen)
return 0;
if (count > maxlen - *offset)
count = maxlen - *offset;
if (copy_to_user(buf, tmpbuf + *offset, count))
return -EFAULT;
*offset += count;
return count;
}
int oprofilefs_ulong_from_user(unsigned long * val, char const * buf, size_t count)
{
char tmpbuf[TMPBUFSIZE];
if (!count)
return 0;
if (count > TMPBUFSIZE - 1)
return -EINVAL;
memset(tmpbuf, 0x0, TMPBUFSIZE);
if (copy_from_user(tmpbuf, buf, count))
return -EFAULT;
spin_lock(&oprofilefs_lock);
*val = simple_strtoul(tmpbuf, NULL, 10);
spin_unlock(&oprofilefs_lock);
return 0;
}
static ssize_t ulong_read_file(struct file * file, char * buf, size_t count, loff_t * offset)
{
return oprofilefs_ulong_to_user(file->private_data, buf, count, offset);
}
static ssize_t ulong_write_file(struct file * file, char const * buf, size_t count, loff_t * offset)
{
unsigned long * value = file->private_data;
int retval;
if (*offset)
return -EINVAL;
retval = oprofilefs_ulong_from_user(value, buf, count);
if (retval)
return retval;
return count;
}
static int default_open(struct inode * inode, struct file * filp)
{
if (inode->u.generic_ip)
filp->private_data = inode->u.generic_ip;
return 0;
}
static struct file_operations ulong_fops = {
.read = ulong_read_file,
.write = ulong_write_file,
.open = default_open,
};
static struct file_operations ulong_ro_fops = {
.read = ulong_read_file,
.open = default_open,
};
static struct dentry * __oprofilefs_create_file(struct super_block * sb,
struct dentry * root, char const * name, struct file_operations * fops)
{
struct dentry * dentry;
struct inode * inode;
struct qstr qname;
qname.name = name;
qname.len = strlen(name);
qname.hash = full_name_hash(qname.name, qname.len);
dentry = d_alloc(root, &qname);
if (!dentry)
return 0;
inode = oprofilefs_get_inode(sb, S_IFREG | 0644);
if (!inode) {
dput(dentry);
return 0;
}
inode->i_fop = fops;
d_add(dentry, inode);
return dentry;
}
int oprofilefs_create_ulong(struct super_block * sb, struct dentry * root,
char const * name, unsigned long * val)
{
struct dentry * d = __oprofilefs_create_file(sb, root, name, &ulong_fops);
if (!d)
return -EFAULT;
d->d_inode->u.generic_ip = val;
return 0;
}
int oprofilefs_create_ro_ulong(struct super_block * sb, struct dentry * root,
char const * name, unsigned long * val)
{
struct dentry * d = __oprofilefs_create_file(sb, root, name, &ulong_ro_fops);
if (!d)
return -EFAULT;
d->d_inode->u.generic_ip = val;
return 0;
}
static ssize_t atomic_read_file(struct file * file, char * buf, size_t count, loff_t * offset)
{
atomic_t * aval = file->private_data;
unsigned long val = atomic_read(aval);
return oprofilefs_ulong_to_user(&val, buf, count, offset);
}
static struct file_operations atomic_ro_fops = {
.read = atomic_read_file,
.open = default_open,
};
int oprofilefs_create_ro_atomic(struct super_block * sb, struct dentry * root,
char const * name, atomic_t * val)
{
struct dentry * d = __oprofilefs_create_file(sb, root, name, &atomic_ro_fops);
if (!d)
return -EFAULT;
d->d_inode->u.generic_ip = val;
return 0;
}
int oprofilefs_create_file(struct super_block * sb, struct dentry * root,
char const * name, struct file_operations * fops)
{
if (!__oprofilefs_create_file(sb, root, name, fops))
return -EFAULT;
return 0;
}
struct dentry * oprofilefs_mkdir(struct super_block * sb,
struct dentry * root, char const * name)
{
struct dentry * dentry;
struct inode * inode;
struct qstr qname;
qname.name = name;
qname.len = strlen(name);
qname.hash = full_name_hash(qname.name, qname.len);
dentry = d_alloc(root, &qname);
if (!dentry)
return 0;
inode = oprofilefs_get_inode(sb, S_IFDIR | 0755);
if (!inode) {
dput(dentry);
return 0;
}
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
d_add(dentry, inode);
return dentry;
}
static int oprofilefs_fill_super(struct super_block * sb, void * data, int silent)
{
struct inode * root_inode;
struct dentry * root_dentry;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
sb->s_magic = OPROFILEFS_MAGIC;
sb->s_op = &s_ops;
root_inode = oprofilefs_get_inode(sb, S_IFDIR | 0755);
if (!root_inode)
return -ENOMEM;
root_inode->i_op = &simple_dir_inode_operations;
root_inode->i_fop = &simple_dir_operations;
root_dentry = d_alloc_root(root_inode);
if (!root_dentry) {
iput(root_inode);
return -ENOMEM;
}
sb->s_root = root_dentry;
oprofile_create_files(sb, root_dentry);
// FIXME: verify kill_litter_super removes our dentries
return 0;
}
static struct super_block * oprofilefs_get_sb(struct file_system_type * fs_type,
int flags, char * dev_name, void * data)
{
return get_sb_single(fs_type, flags, data, oprofilefs_fill_super);
}
static struct file_system_type oprofilefs_type = {
.owner = THIS_MODULE,
.name = "oprofilefs",
.get_sb = oprofilefs_get_sb,
.kill_sb = kill_litter_super,
};
int __init oprofilefs_register(void)
{
return register_filesystem(&oprofilefs_type);
}
void __exit oprofilefs_unregister(void)
{
unregister_filesystem(&oprofilefs_type);
}
/**
* @file oprofile.h
*
* API for machine-specific interrupts to interface
* to oprofile.
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon <levon@movementarian.org>
*/
#ifndef OPROFILE_H
#define OPROFILE_H
#include <linux/types.h>
#include <linux/spinlock.h>
#include <asm/atomic.h>
struct super_block;
struct dentry;
struct file_operations;
enum oprofile_cpu {
OPROFILE_CPU_PPRO,
OPROFILE_CPU_PII,
OPROFILE_CPU_PIII,
OPROFILE_CPU_ATHLON,
OPROFILE_CPU_TIMER
};
/* Operations structure to be filled in */
struct oprofile_operations {
/* create any necessary configuration files in the oprofile fs.
* Optional. */
int (*create_files)(struct super_block * sb, struct dentry * root);
/* Do any necessary interrupt setup. Optional. */
int (*setup)(void);
/* Do any necessary interrupt shutdown. Optional. */
void (*shutdown)(void);
/* Start delivering interrupts. */
int (*start)(void);
/* Stop delivering interrupts. */
void (*stop)(void);
};
/**
* One-time initialisation. *ops must be set to a filled-in
* operations structure. oprofile_cpu_type must be set.
* Return 0 on success.
*/
int oprofile_arch_init(struct oprofile_operations ** ops, enum oprofile_cpu * cpu);
/**
* Add a sample. This may be called from any context. Pass
* smp_processor_id() as cpu.
*/
extern void FASTCALL(oprofile_add_sample(unsigned long eip, unsigned long event, int cpu));
/**
* Create a file of the given name as a child of the given root, with
* the specified file operations.
*/
int oprofilefs_create_file(struct super_block * sb, struct dentry * root,
char const * name, struct file_operations * fops);
/** Create a file for read/write access to an unsigned long. */
int oprofilefs_create_ulong(struct super_block * sb, struct dentry * root,
char const * name, ulong * val);
/** Create a file for read-only access to an unsigned long. */
int oprofilefs_create_ro_ulong(struct super_block * sb, struct dentry * root,
char const * name, ulong * val);
/** Create a file for read-only access to an atomic_t. */
int oprofilefs_create_ro_atomic(struct super_block * sb, struct dentry * root,
char const * name, atomic_t * val);
/** create a directory */
struct dentry * oprofilefs_mkdir(struct super_block * sb, struct dentry * root,
char const * name);
/**
* Convert an unsigned long value into ASCII and copy it to the user buffer @buf,
* updating *offset appropriately. Returns bytes written or -EFAULT.
*/
ssize_t oprofilefs_ulong_to_user(unsigned long * val, char * buf, size_t count, loff_t * offset);
/**
* Read an ASCII string for a number from a userspace buffer and fill *val on success.
* Returns 0 on success, < 0 on error.
*/
int oprofilefs_ulong_from_user(unsigned long * val, char const * buf, size_t count);
/** lock for read/write safety */
extern spinlock_t oprofilefs_lock;
#endif /* OPROFILE_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment