Commit 9658e7b7 authored by Linus Torvalds's avatar Linus Torvalds

Merge http://lia64.bkbits.net/to-linus-2.5

into ppc970.osdl.org:/home/torvalds/v2.5/linux
parents 26d5aa5a e455002a
...@@ -288,39 +288,6 @@ config FORCE_MAX_ZONEORDER ...@@ -288,39 +288,6 @@ config FORCE_MAX_ZONEORDER
int int
default "18" default "18"
choice
prompt "Huge TLB page size"
depends on HUGETLB_PAGE
default HUGETLB_PAGE_SIZE_16MB
config HUGETLB_PAGE_SIZE_4GB
depends on MCKINLEY
bool "4GB"
config HUGETLB_PAGE_SIZE_1GB
depends on MCKINLEY
bool "1GB"
config HUGETLB_PAGE_SIZE_256MB
bool "256MB"
config HUGETLB_PAGE_SIZE_64MB
bool "64MB"
config HUGETLB_PAGE_SIZE_16MB
bool "16MB"
config HUGETLB_PAGE_SIZE_4MB
bool "4MB"
config HUGETLB_PAGE_SIZE_1MB
bool "1MB"
config HUGETLB_PAGE_SIZE_256KB
bool "256KB"
endchoice
config IA64_PAL_IDLE config IA64_PAL_IDLE
bool "Use PAL_HALT_LIGHT in idle loop" bool "Use PAL_HALT_LIGHT in idle loop"
help help
......
...@@ -816,6 +816,19 @@ GLOBAL_ENTRY(ia64_delay_loop) ...@@ -816,6 +816,19 @@ GLOBAL_ENTRY(ia64_delay_loop)
br.ret.sptk.many rp br.ret.sptk.many rp
END(ia64_delay_loop) END(ia64_delay_loop)
GLOBAL_ENTRY(ia64_invoke_kernel_thread_helper)
.prologue
.save rp, r0 // this is the end of the call-chain
.body
alloc r2 = ar.pfs, 0, 0, 2, 0
mov out0 = r9
mov out1 = r11;;
br.call.sptk.many rp = kernel_thread_helper;;
mov out0 = r8
br.call.sptk.many rp = sys_exit;;
1: br.sptk.few 1b // not reached
END(ia64_invoke_kernel_thread_helper)
#ifdef CONFIG_IA64_BRL_EMU #ifdef CONFIG_IA64_BRL_EMU
/* /*
......
...@@ -103,6 +103,7 @@ static spinlock_t iosapic_lock = SPIN_LOCK_UNLOCKED; ...@@ -103,6 +103,7 @@ static spinlock_t iosapic_lock = SPIN_LOCK_UNLOCKED;
static struct iosapic_intr_info { static struct iosapic_intr_info {
char *addr; /* base address of IOSAPIC */ char *addr; /* base address of IOSAPIC */
u32 low32; /* current value of low word of Redirection table entry */
unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */ unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */
char rte_index; /* IOSAPIC RTE index (-1 => not an IOSAPIC interrupt) */ char rte_index; /* IOSAPIC RTE index (-1 => not an IOSAPIC interrupt) */
unsigned char dmode : 3; /* delivery mode (see iosapic.h) */ unsigned char dmode : 3; /* delivery mode (see iosapic.h) */
...@@ -213,6 +214,7 @@ set_rte (unsigned int vector, unsigned int dest) ...@@ -213,6 +214,7 @@ set_rte (unsigned int vector, unsigned int dest)
writel(high32, addr + IOSAPIC_WINDOW); writel(high32, addr + IOSAPIC_WINDOW);
writel(IOSAPIC_RTE_LOW(rte_index), addr + IOSAPIC_REG_SELECT); writel(IOSAPIC_RTE_LOW(rte_index), addr + IOSAPIC_REG_SELECT);
writel(low32, addr + IOSAPIC_WINDOW); writel(low32, addr + IOSAPIC_WINDOW);
iosapic_intr_info[vector].low32 = low32;
} }
static void static void
...@@ -239,9 +241,10 @@ mask_irq (unsigned int irq) ...@@ -239,9 +241,10 @@ mask_irq (unsigned int irq)
spin_lock_irqsave(&iosapic_lock, flags); spin_lock_irqsave(&iosapic_lock, flags);
{ {
writel(IOSAPIC_RTE_LOW(rte_index), addr + IOSAPIC_REG_SELECT); writel(IOSAPIC_RTE_LOW(rte_index), addr + IOSAPIC_REG_SELECT);
low32 = readl(addr + IOSAPIC_WINDOW);
low32 |= (1 << IOSAPIC_MASK_SHIFT); /* set only the mask bit */ /* set only the mask bit */
low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK;
writel(low32, addr + IOSAPIC_WINDOW); writel(low32, addr + IOSAPIC_WINDOW);
} }
spin_unlock_irqrestore(&iosapic_lock, flags); spin_unlock_irqrestore(&iosapic_lock, flags);
...@@ -264,9 +267,7 @@ unmask_irq (unsigned int irq) ...@@ -264,9 +267,7 @@ unmask_irq (unsigned int irq)
spin_lock_irqsave(&iosapic_lock, flags); spin_lock_irqsave(&iosapic_lock, flags);
{ {
writel(IOSAPIC_RTE_LOW(rte_index), addr + IOSAPIC_REG_SELECT); writel(IOSAPIC_RTE_LOW(rte_index), addr + IOSAPIC_REG_SELECT);
low32 = readl(addr + IOSAPIC_WINDOW); low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK;
low32 &= ~(1 << IOSAPIC_MASK_SHIFT); /* clear only the mask bit */
writel(low32, addr + IOSAPIC_WINDOW); writel(low32, addr + IOSAPIC_WINDOW);
} }
spin_unlock_irqrestore(&iosapic_lock, flags); spin_unlock_irqrestore(&iosapic_lock, flags);
...@@ -307,9 +308,7 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask) ...@@ -307,9 +308,7 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask)
{ {
/* get current delivery mode by reading the low32 */ /* get current delivery mode by reading the low32 */
writel(IOSAPIC_RTE_LOW(rte_index), addr + IOSAPIC_REG_SELECT); writel(IOSAPIC_RTE_LOW(rte_index), addr + IOSAPIC_REG_SELECT);
low32 = readl(addr + IOSAPIC_WINDOW); low32 = iosapic_intr_info[vec].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT);
low32 &= ~(7 << IOSAPIC_DELIVERY_SHIFT);
if (redir) if (redir)
/* change delivery mode to lowest priority */ /* change delivery mode to lowest priority */
low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT); low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
...@@ -317,6 +316,7 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask) ...@@ -317,6 +316,7 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask)
/* change delivery mode to fixed */ /* change delivery mode to fixed */
low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT); low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
iosapic_intr_info[vec].low32 = low32;
writel(IOSAPIC_RTE_HIGH(rte_index), addr + IOSAPIC_REG_SELECT); writel(IOSAPIC_RTE_HIGH(rte_index), addr + IOSAPIC_REG_SELECT);
writel(high32, addr + IOSAPIC_WINDOW); writel(high32, addr + IOSAPIC_WINDOW);
writel(IOSAPIC_RTE_LOW(rte_index), addr + IOSAPIC_REG_SELECT); writel(IOSAPIC_RTE_LOW(rte_index), addr + IOSAPIC_REG_SELECT);
......
...@@ -455,7 +455,6 @@ unsigned int do_IRQ(unsigned long irq, struct pt_regs *regs) ...@@ -455,7 +455,6 @@ unsigned int do_IRQ(unsigned long irq, struct pt_regs *regs)
unsigned int status; unsigned int status;
int cpu; int cpu;
irq_enter();
cpu = smp_processor_id(); /* for CONFIG_PREEMPT, this must come after irq_enter()! */ cpu = smp_processor_id(); /* for CONFIG_PREEMPT, this must come after irq_enter()! */
kstat_cpu(cpu).irqs[irq]++; kstat_cpu(cpu).irqs[irq]++;
...@@ -525,7 +524,6 @@ unsigned int do_IRQ(unsigned long irq, struct pt_regs *regs) ...@@ -525,7 +524,6 @@ unsigned int do_IRQ(unsigned long irq, struct pt_regs *regs)
desc->handler->end(irq); desc->handler->end(irq);
spin_unlock(&desc->lock); spin_unlock(&desc->lock);
} }
irq_exit();
return 1; return 1;
} }
......
...@@ -120,6 +120,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) ...@@ -120,6 +120,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
* 16 (without this, it would be ~240, which could easily lead * 16 (without this, it would be ~240, which could easily lead
* to kernel stack overflows). * to kernel stack overflows).
*/ */
irq_enter();
saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
ia64_srlz_d(); ia64_srlz_d();
while (vector != IA64_SPURIOUS_INT_VECTOR) { while (vector != IA64_SPURIOUS_INT_VECTOR) {
...@@ -143,8 +144,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) ...@@ -143,8 +144,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
* handler needs to be able to wait for further keyboard interrupts, which can't * handler needs to be able to wait for further keyboard interrupts, which can't
* come through until ia64_eoi() has been done. * come through until ia64_eoi() has been done.
*/ */
if (local_softirq_pending()) irq_exit();
do_softirq();
} }
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
......
...@@ -118,10 +118,11 @@ ENTRY(vhpt_miss) ...@@ -118,10 +118,11 @@ ENTRY(vhpt_miss)
#ifdef CONFIG_HUGETLB_PAGE #ifdef CONFIG_HUGETLB_PAGE
extr.u r26=r25,2,6 extr.u r26=r25,2,6
;; ;;
cmp.eq p8,p0=HPAGE_SHIFT,r26 cmp.ne p8,p0=r18,r26
sub r27=r26,r18
;; ;;
(p8) dep r25=r18,r25,2,6 (p8) dep r25=r18,r25,2,6
(p8) shr r22=r22,HPAGE_SHIFT-PAGE_SHIFT (p8) shr r22=r22,r27
#endif #endif
;; ;;
cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5? cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5?
......
...@@ -82,7 +82,7 @@ ...@@ -82,7 +82,7 @@
#define PFM_REG_IMPL 0x1 /* register implemented */ #define PFM_REG_IMPL 0x1 /* register implemented */
#define PFM_REG_END 0x2 /* end marker */ #define PFM_REG_END 0x2 /* end marker */
#define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */ #define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */
#define PFM_REG_COUNTING (0x2<<4|PFM_REG_MONITOR|PFM_REG_IMPL) /* a monitor + pmc.oi+ PMD used as a counter */ #define PFM_REG_COUNTING (0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */
#define PFM_REG_CONTROL (0x4<<4|PFM_REG_IMPL) /* PMU control register */ #define PFM_REG_CONTROL (0x4<<4|PFM_REG_IMPL) /* PMU control register */
#define PFM_REG_CONFIG (0x8<<4|PFM_REG_IMPL) /* configuration register */ #define PFM_REG_CONFIG (0x8<<4|PFM_REG_IMPL) /* configuration register */
#define PFM_REG_BUFFER (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */ #define PFM_REG_BUFFER (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */
...@@ -109,14 +109,15 @@ ...@@ -109,14 +109,15 @@
#define PMD_PMD_DEP(i) pmu_conf.pmd_desc[i].dep_pmd[0] #define PMD_PMD_DEP(i) pmu_conf.pmd_desc[i].dep_pmd[0]
#define PMC_PMD_DEP(i) pmu_conf.pmc_desc[i].dep_pmd[0] #define PMC_PMD_DEP(i) pmu_conf.pmc_desc[i].dep_pmd[0]
/* k assumed unsigned (up to 64 registers) */ #define PFM_NUM_IBRS IA64_NUM_DBG_REGS
#define IBR_IS_IMPL(k) (k< IA64_NUM_DBG_REGS) #define PFM_NUM_DBRS IA64_NUM_DBG_REGS
#define DBR_IS_IMPL(k) (k< IA64_NUM_DBG_REGS)
#define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0) #define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0)
#define CTX_HAS_SMPL(c) ((c)->ctx_fl_is_sampling) #define CTX_HAS_SMPL(c) ((c)->ctx_fl_is_sampling)
#define PFM_CTX_TASK(h) (h)->ctx_task #define PFM_CTX_TASK(h) (h)->ctx_task
#define PMU_PMC_OI 5 /* position of pmc.oi bit */
/* XXX: does not support more than 64 PMDs */ /* XXX: does not support more than 64 PMDs */
#define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask) #define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask)
#define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL) #define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL)
...@@ -218,6 +219,8 @@ ...@@ -218,6 +219,8 @@
/* /*
* debugging * debugging
*/ */
#define PFM_DEBUGGING 1
#ifdef PFM_DEBUGGING
#define DPRINT(a) \ #define DPRINT(a) \
do { \ do { \
if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \ if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
...@@ -227,18 +230,7 @@ ...@@ -227,18 +230,7 @@
do { \ do { \
if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \ if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
} while (0) } while (0)
/* #endif
* Architected PMC structure
*/
typedef struct {
unsigned long pmc_plm:4; /* privilege level mask */
unsigned long pmc_ev:1; /* external visibility */
unsigned long pmc_oi:1; /* overflow interrupt */
unsigned long pmc_pm:1; /* privileged monitor */
unsigned long pmc_ig1:1; /* reserved */
unsigned long pmc_es:8; /* event select */
unsigned long pmc_ig2:48; /* reserved */
} pfm_monitor_t;
/* /*
* 64-bit software counter structure * 64-bit software counter structure
...@@ -469,20 +461,13 @@ typedef struct { ...@@ -469,20 +461,13 @@ typedef struct {
#define PFM_CMD_STOP 0x08 /* command does not work on zombie context */ #define PFM_CMD_STOP 0x08 /* command does not work on zombie context */
#define PFM_CMD_IDX(cmd) (cmd) #define PFM_CMD_NAME(cmd) pfm_cmd_tab[(cmd)].cmd_name
#define PFM_CMD_IS_VALID(cmd) ((PFM_CMD_IDX(cmd) >= 0) && (PFM_CMD_IDX(cmd) < PFM_CMD_COUNT) \ #define PFM_CMD_READ_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ)
&& pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_func != NULL) #define PFM_CMD_RW_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW)
#define PFM_CMD_USE_FD(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD)
#define PFM_CMD_NAME(cmd) pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_name #define PFM_CMD_STOPPED(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP)
#define PFM_CMD_READ_ARG(cmd) (pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_ARG_READ)
#define PFM_CMD_RW_ARG(cmd) (pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_ARG_RW)
#define PFM_CMD_USE_FD(cmd) (pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_FD)
#define PFM_CMD_STOPPED(cmd) (pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_STOP)
#define PFM_CMD_ARG_MANY -1 /* cannot be zero */ #define PFM_CMD_ARG_MANY -1 /* cannot be zero */
#define PFM_CMD_NARG(cmd) (pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_narg)
#define PFM_CMD_ARG_SIZE(cmd) (pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_argsize)
#define PFM_CMD_GETSIZE(cmd) (pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_getsize)
typedef struct { typedef struct {
int debug; /* turn on/off debugging via syslog */ int debug; /* turn on/off debugging via syslog */
...@@ -2834,10 +2819,11 @@ static int ...@@ -2834,10 +2819,11 @@ static int
pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{ {
struct thread_struct *thread = NULL; struct thread_struct *thread = NULL;
struct task_struct *task;
pfarg_reg_t *req = (pfarg_reg_t *)arg; pfarg_reg_t *req = (pfarg_reg_t *)arg;
unsigned long value; unsigned long value, pmc_pm;
unsigned long smpl_pmds, reset_pmds; unsigned long smpl_pmds, reset_pmds, impl_pmds;
unsigned int cnum, reg_flags, flags; unsigned int cnum, reg_flags, flags, pmc_type;
int i, can_access_pmu = 0, is_loaded, is_system; int i, can_access_pmu = 0, is_loaded, is_system;
int is_monitor, is_counting, state; int is_monitor, is_counting, state;
int ret = -EINVAL; int ret = -EINVAL;
...@@ -2846,12 +2832,13 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) ...@@ -2846,12 +2832,13 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
state = ctx->ctx_state; state = ctx->ctx_state;
is_loaded = state == PFM_CTX_LOADED ? 1 : 0; is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
is_system = ctx->ctx_fl_system; is_system = ctx->ctx_fl_system;
task = ctx->ctx_task;
impl_pmds = pmu_conf.impl_pmds[0];
if (state == PFM_CTX_TERMINATED || state == PFM_CTX_ZOMBIE) return -EINVAL; if (state == PFM_CTX_TERMINATED || state == PFM_CTX_ZOMBIE) return -EINVAL;
if (is_loaded) { if (is_loaded) {
thread = &ctx->ctx_task->thread; thread = &task->thread;
/* /*
* In system wide and when the context is loaded, access can only happen * In system wide and when the context is loaded, access can only happen
* when the caller is running on the CPU being monitored by the session. * when the caller is running on the CPU being monitored by the session.
...@@ -2861,7 +2848,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) ...@@ -2861,7 +2848,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
DPRINT(("[%d] should be running on CPU%d\n", current->pid, ctx->ctx_cpu)); DPRINT(("[%d] should be running on CPU%d\n", current->pid, ctx->ctx_cpu));
return -EBUSY; return -EBUSY;
} }
can_access_pmu = GET_PMU_OWNER() == ctx->ctx_task || is_system ? 1 : 0; can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
} }
for (i = 0; i < count; i++, req++) { for (i = 0; i < count; i++, req++) {
...@@ -2873,16 +2860,24 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) ...@@ -2873,16 +2860,24 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
reset_pmds = req->reg_reset_pmds[0]; reset_pmds = req->reg_reset_pmds[0];
flags = 0; flags = 0;
is_counting = PMC_IS_COUNTING(cnum);
is_monitor = PMC_IS_MONITOR(cnum); if (cnum >= PMU_MAX_PMCS) {
DPRINT(("pmc%u is invalid\n", cnum));
goto error;
}
pmc_type = pmu_conf.pmc_desc[cnum].type;
pmc_pm = (value >> pmu_conf.pmc_desc[cnum].pm_pos) & 0x1;
is_counting = (pmc_type & PFM_REG_COUNTING) == PFM_REG_COUNTING ? 1 : 0;
is_monitor = (pmc_type & PFM_REG_MONITOR) == PFM_REG_MONITOR ? 1 : 0;
/* /*
* we reject all non implemented PMC as well * we reject all non implemented PMC as well
* as attempts to modify PMC[0-3] which are used * as attempts to modify PMC[0-3] which are used
* as status registers by the PMU * as status registers by the PMU
*/ */
if (PMC_IS_IMPL(cnum) == 0 || PMC_IS_CONTROL(cnum)) { if ((pmc_type & PFM_REG_IMPL) == 0 || (pmc_type & PFM_REG_CONTROL) == PFM_REG_CONTROL) {
DPRINT(("pmc%u is unimplemented or invalid\n", cnum)); DPRINT(("pmc%u is unimplemented or no-access pmc_type=%x\n", cnum, pmc_type));
goto error; goto error;
} }
/* /*
...@@ -2890,21 +2885,20 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) ...@@ -2890,21 +2885,20 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
* - system-wide session: PMCx.pm=1 (privileged monitor) * - system-wide session: PMCx.pm=1 (privileged monitor)
* - per-task : PMCx.pm=0 (user monitor) * - per-task : PMCx.pm=0 (user monitor)
*/ */
if ((is_monitor || is_counting) && value != PMC_DFL_VAL(cnum) && PFM_CHECK_PMC_PM(ctx, cnum, value)) { if (is_monitor && value != PMC_DFL_VAL(cnum) && is_system ^ pmc_pm) {
DPRINT(("pmc%u pmc_pm=%ld fl_system=%d\n", DPRINT(("pmc%u pmc_pm=%lu is_system=%d\n",
cnum, cnum,
PMC_PM(cnum, value), pmc_pm,
ctx->ctx_fl_system)); is_system));
goto error; goto error;
} }
if (is_counting) { if (is_counting) {
pfm_monitor_t *p = (pfm_monitor_t *)&value;
/* /*
* enforce generation of overflow interrupt. Necessary on all * enforce generation of overflow interrupt. Necessary on all
* CPUs. * CPUs.
*/ */
p->pmc_oi = 1; value |= 1 << PMU_PMC_OI;
if (reg_flags & PFM_REGFL_OVFL_NOTIFY) { if (reg_flags & PFM_REGFL_OVFL_NOTIFY) {
flags |= PFM_REGFL_OVFL_NOTIFY; flags |= PFM_REGFL_OVFL_NOTIFY;
...@@ -2913,13 +2907,13 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) ...@@ -2913,13 +2907,13 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM; if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM;
/* verify validity of smpl_pmds */ /* verify validity of smpl_pmds */
if ((smpl_pmds & pmu_conf.impl_pmds[0]) != smpl_pmds) { if ((smpl_pmds & impl_pmds) != smpl_pmds) {
DPRINT(("invalid smpl_pmds 0x%lx for pmc%u\n", smpl_pmds, cnum)); DPRINT(("invalid smpl_pmds 0x%lx for pmc%u\n", smpl_pmds, cnum));
goto error; goto error;
} }
/* verify validity of reset_pmds */ /* verify validity of reset_pmds */
if ((reset_pmds & pmu_conf.impl_pmds[0]) != reset_pmds) { if ((reset_pmds & impl_pmds) != reset_pmds) {
DPRINT(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum)); DPRINT(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum));
goto error; goto error;
} }
...@@ -2935,7 +2929,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) ...@@ -2935,7 +2929,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
* execute write checker, if any * execute write checker, if any
*/ */
if (PMC_WR_FUNC(cnum)) { if (PMC_WR_FUNC(cnum)) {
ret = PMC_WR_FUNC(cnum)(ctx->ctx_task, ctx, cnum, &value, regs); ret = PMC_WR_FUNC(cnum)(task, ctx, cnum, &value, regs);
if (ret) goto error; if (ret) goto error;
ret = -EINVAL; ret = -EINVAL;
} }
...@@ -2997,7 +2991,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) ...@@ -2997,7 +2991,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
* *
* The value in ctx_pmcs[] can only be changed in pfm_write_pmcs(). * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs().
* *
* The value in t->pmc[] may be modified on overflow, i.e., when * The value in thread->pmcs[] may be modified on overflow, i.e., when
* monitoring needs to be stopped. * monitoring needs to be stopped.
*/ */
if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum); if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum);
...@@ -3056,11 +3050,6 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) ...@@ -3056,11 +3050,6 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
return 0; return 0;
error: error:
PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
req->reg_flags = PFM_REG_RETFL_EINVAL;
DPRINT(("pmc[%u]=0x%lx error %d\n", cnum, value, ret));
return ret; return ret;
} }
...@@ -3068,6 +3057,7 @@ static int ...@@ -3068,6 +3057,7 @@ static int
pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{ {
struct thread_struct *thread = NULL; struct thread_struct *thread = NULL;
struct task_struct *task;
pfarg_reg_t *req = (pfarg_reg_t *)arg; pfarg_reg_t *req = (pfarg_reg_t *)arg;
unsigned long value, hw_value, ovfl_mask; unsigned long value, hw_value, ovfl_mask;
unsigned int cnum; unsigned int cnum;
...@@ -3080,25 +3070,26 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) ...@@ -3080,25 +3070,26 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
is_loaded = state == PFM_CTX_LOADED ? 1 : 0; is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
is_system = ctx->ctx_fl_system; is_system = ctx->ctx_fl_system;
ovfl_mask = pmu_conf.ovfl_val; ovfl_mask = pmu_conf.ovfl_val;
task = ctx->ctx_task;
if (state == PFM_CTX_TERMINATED || state == PFM_CTX_ZOMBIE) return -EINVAL; if (unlikely(state == PFM_CTX_TERMINATED || state == PFM_CTX_ZOMBIE)) return -EINVAL;
/* /*
* on both UP and SMP, we can only write to the PMC when the task is * on both UP and SMP, we can only write to the PMC when the task is
* the owner of the local PMU. * the owner of the local PMU.
*/ */
if (is_loaded) { if (likely(is_loaded)) {
thread = &ctx->ctx_task->thread; thread = &task->thread;
/* /*
* In system wide and when the context is loaded, access can only happen * In system wide and when the context is loaded, access can only happen
* when the caller is running on the CPU being monitored by the session. * when the caller is running on the CPU being monitored by the session.
* It does not have to be the owner (ctx_task) of the context per se. * It does not have to be the owner (ctx_task) of the context per se.
*/ */
if (is_system && ctx->ctx_cpu != smp_processor_id()) { if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
DPRINT(("[%d] should be running on CPU%d\n", current->pid, ctx->ctx_cpu)); DPRINT(("[%d] should be running on CPU%d\n", current->pid, ctx->ctx_cpu));
return -EBUSY; return -EBUSY;
} }
can_access_pmu = GET_PMU_OWNER() == ctx->ctx_task || is_system ? 1 : 0; can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
} }
for (i = 0; i < count; i++, req++) { for (i = 0; i < count; i++, req++) {
...@@ -3118,7 +3109,7 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) ...@@ -3118,7 +3109,7 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
if (PMD_WR_FUNC(cnum)) { if (PMD_WR_FUNC(cnum)) {
unsigned long v = value; unsigned long v = value;
ret = PMD_WR_FUNC(cnum)(ctx->ctx_task, ctx, cnum, &v, regs); ret = PMD_WR_FUNC(cnum)(task, ctx, cnum, &v, regs);
if (ret) goto abort_mission; if (ret) goto abort_mission;
value = v; value = v;
...@@ -3243,16 +3234,6 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) ...@@ -3243,16 +3234,6 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
* for now, we have only one possibility for error * for now, we have only one possibility for error
*/ */
PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
/*
* we change the return value to EFAULT in case we cannot write register return code.
* The caller first must correct this error, then a resubmission of the request will
* eventually yield the EINVAL.
*/
req->reg_flags = PFM_REG_RETFL_EINVAL;
DPRINT(("pmd[%u]=0x%lx ret %d\n", cnum, value, ret));
return ret; return ret;
} }
...@@ -3269,11 +3250,12 @@ static int ...@@ -3269,11 +3250,12 @@ static int
pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{ {
struct thread_struct *thread = NULL; struct thread_struct *thread = NULL;
unsigned long val = 0UL, lval, ovfl_mask; struct task_struct *task;
unsigned long val = 0UL, lval, ovfl_mask, sval;
pfarg_reg_t *req = (pfarg_reg_t *)arg; pfarg_reg_t *req = (pfarg_reg_t *)arg;
unsigned int cnum, reg_flags = 0; unsigned int cnum, reg_flags = 0;
int i, can_access_pmu = 0, state; int i, can_access_pmu = 0, state;
int is_loaded, is_system; int is_loaded, is_system, is_counting;
int ret = -EINVAL; int ret = -EINVAL;
/* /*
...@@ -3285,32 +3267,33 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) ...@@ -3285,32 +3267,33 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
is_loaded = state == PFM_CTX_LOADED ? 1 : 0; is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
is_system = ctx->ctx_fl_system; is_system = ctx->ctx_fl_system;
ovfl_mask = pmu_conf.ovfl_val; ovfl_mask = pmu_conf.ovfl_val;
task = ctx->ctx_task;
if (state == PFM_CTX_ZOMBIE) return -EINVAL; if (state == PFM_CTX_ZOMBIE) return -EINVAL;
if (is_loaded) { if (likely(is_loaded)) {
thread = &ctx->ctx_task->thread; thread = &task->thread;
/* /*
* In system wide and when the context is loaded, access can only happen * In system wide and when the context is loaded, access can only happen
* when the caller is running on the CPU being monitored by the session. * when the caller is running on the CPU being monitored by the session.
* It does not have to be the owner (ctx_task) of the context per se. * It does not have to be the owner (ctx_task) of the context per se.
*/ */
if (is_system && ctx->ctx_cpu != smp_processor_id()) { if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
DPRINT(("[%d] should be running on CPU%d\n", current->pid, ctx->ctx_cpu)); DPRINT(("[%d] should be running on CPU%d\n", current->pid, ctx->ctx_cpu));
return -EBUSY; return -EBUSY;
} }
/* /*
* this can be true when not self-monitoring only in UP * this can be true when not self-monitoring only in UP
*/ */
can_access_pmu = GET_PMU_OWNER() == ctx->ctx_task || is_system ? 1 : 0; can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
if (can_access_pmu) ia64_srlz_d(); if (can_access_pmu) ia64_srlz_d();
} }
DPRINT(("enter loaded=%d access_pmu=%d ctx_state=%d\n", DPRINT(("loaded=%d access_pmu=%d ctx_state=%d\n",
is_loaded, is_loaded,
can_access_pmu, can_access_pmu,
ctx->ctx_state)); state));
/* /*
* on both UP and SMP, we can only read the PMD from the hardware register when * on both UP and SMP, we can only read the PMD from the hardware register when
...@@ -3319,11 +3302,10 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) ...@@ -3319,11 +3302,10 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
for (i = 0; i < count; i++, req++) { for (i = 0; i < count; i++, req++) {
lval = 0UL;
cnum = req->reg_num; cnum = req->reg_num;
reg_flags = req->reg_flags; reg_flags = req->reg_flags;
if (!PMD_IS_IMPL(cnum)) goto error; if (unlikely(!PMD_IS_IMPL(cnum))) goto error;
/* /*
* we can only read the register that we use. That includes * we can only read the register that we use. That includes
* the one we explicitely initialize AND the one we want included * the one we explicitely initialize AND the one we want included
...@@ -3332,7 +3314,11 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) ...@@ -3332,7 +3314,11 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
* Having this restriction allows optimization in the ctxsw routine * Having this restriction allows optimization in the ctxsw routine
* without compromising security (leaks) * without compromising security (leaks)
*/ */
if (!CTX_IS_USED_PMD(ctx, cnum)) goto error; if (unlikely(!CTX_IS_USED_PMD(ctx, cnum))) goto error;
sval = ctx->ctx_pmds[cnum].val;
lval = ctx->ctx_pmds[cnum].lval;
is_counting = PMD_IS_COUNTING(cnum);
/* /*
* If the task is not the current one, then we check if the * If the task is not the current one, then we check if the
...@@ -3347,23 +3333,21 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) ...@@ -3347,23 +3333,21 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
* if context is zombie, then task does not exist anymore. * if context is zombie, then task does not exist anymore.
* In this case, we use the full value saved in the context (pfm_flush_regs()). * In this case, we use the full value saved in the context (pfm_flush_regs()).
*/ */
val = state == PFM_CTX_LOADED ? thread->pmds[cnum] : 0UL; val = is_loaded ? thread->pmds[cnum] : 0UL;
} }
if (PMD_IS_COUNTING(cnum)) { if (is_counting) {
/* /*
* XXX: need to check for overflow when loaded * XXX: need to check for overflow when loaded
*/ */
val &= ovfl_mask; val &= ovfl_mask;
val += ctx->ctx_pmds[cnum].val; val += sval;
lval = ctx->ctx_pmds[cnum].lval;
} }
/* /*
* execute read checker, if any * execute read checker, if any
*/ */
if (PMD_RD_FUNC(cnum)) { if (unlikely(PMD_RD_FUNC(cnum))) {
unsigned long v = val; unsigned long v = val;
ret = PMD_RD_FUNC(cnum)(ctx->ctx_task, ctx, cnum, &v, regs); ret = PMD_RD_FUNC(cnum)(ctx->ctx_task, ctx, cnum, &v, regs);
if (ret) goto error; if (ret) goto error;
...@@ -3373,12 +3357,7 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) ...@@ -3373,12 +3357,7 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
PFM_REG_RETFLAG_SET(reg_flags, 0); PFM_REG_RETFLAG_SET(reg_flags, 0);
DPRINT(("pmd[%u]=0x%lx loaded=%d access_pmu=%d ctx_state=%d\n", DPRINT(("pmd[%u]=0x%lx\n", cnum, val));
cnum,
val,
is_loaded,
can_access_pmu,
ctx->ctx_state));
/* /*
* update register return value, abort all if problem during copy. * update register return value, abort all if problem during copy.
...@@ -3393,12 +3372,7 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) ...@@ -3393,12 +3372,7 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
return 0; return 0;
error: error:
PFM_REG_RETFLAG_SET(reg_flags, PFM_REG_RETFL_EINVAL); PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
req->reg_flags = PFM_REG_RETFL_EINVAL;
DPRINT(("error pmd[%u]=0x%lx\n", cnum, val));
return ret; return ret;
} }
...@@ -3628,7 +3602,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) ...@@ -3628,7 +3602,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
prefetch(ctx->ctx_smpl_hdr); prefetch(ctx->ctx_smpl_hdr);
rst_ctrl.bits.mask_monitoring = 0; rst_ctrl.bits.mask_monitoring = 0;
rst_ctrl.bits.reset_ovfl_pmds = 1; rst_ctrl.bits.reset_ovfl_pmds = 0;
if (state == PFM_CTX_LOADED) if (state == PFM_CTX_LOADED)
ret = pfm_buf_fmt_restart_active(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); ret = pfm_buf_fmt_restart_active(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
...@@ -3748,6 +3722,7 @@ static int ...@@ -3748,6 +3722,7 @@ static int
pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{ {
struct thread_struct *thread = NULL; struct thread_struct *thread = NULL;
struct task_struct *task;
pfarg_dbreg_t *req = (pfarg_dbreg_t *)arg; pfarg_dbreg_t *req = (pfarg_dbreg_t *)arg;
unsigned long flags; unsigned long flags;
dbreg_t dbreg; dbreg_t dbreg;
...@@ -3762,6 +3737,7 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_ ...@@ -3762,6 +3737,7 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_
state = ctx->ctx_state; state = ctx->ctx_state;
is_loaded = state == PFM_CTX_LOADED ? 1 : 0; is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
is_system = ctx->ctx_fl_system; is_system = ctx->ctx_fl_system;
task = ctx->ctx_task;
if (state == PFM_CTX_TERMINATED || state == PFM_CTX_ZOMBIE) return -EINVAL; if (state == PFM_CTX_TERMINATED || state == PFM_CTX_ZOMBIE) return -EINVAL;
...@@ -3770,17 +3746,17 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_ ...@@ -3770,17 +3746,17 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_
* the owner of the local PMU. * the owner of the local PMU.
*/ */
if (is_loaded) { if (is_loaded) {
thread = &ctx->ctx_task->thread; thread = &task->thread;
/* /*
* In system wide and when the context is loaded, access can only happen * In system wide and when the context is loaded, access can only happen
* when the caller is running on the CPU being monitored by the session. * when the caller is running on the CPU being monitored by the session.
* It does not have to be the owner (ctx_task) of the context per se. * It does not have to be the owner (ctx_task) of the context per se.
*/ */
if (is_system && ctx->ctx_cpu != smp_processor_id()) { if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
DPRINT(("[%d] should be running on CPU%d\n", current->pid, ctx->ctx_cpu)); DPRINT(("[%d] should be running on CPU%d\n", current->pid, ctx->ctx_cpu));
return -EBUSY; return -EBUSY;
} }
can_access_pmu = GET_PMU_OWNER() == ctx->ctx_task || is_system ? 1 : 0; can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
} }
/* /*
...@@ -3796,7 +3772,7 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_ ...@@ -3796,7 +3772,7 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_
* don't bother if we are loaded and task is being debugged * don't bother if we are loaded and task is being debugged
*/ */
if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) { if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) {
DPRINT(("debug registers already in use for [%d]\n", ctx->ctx_task->pid)); DPRINT(("debug registers already in use for [%d]\n", task->pid));
return -EBUSY; return -EBUSY;
} }
...@@ -3837,7 +3813,7 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_ ...@@ -3837,7 +3813,7 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_
* is shared by all processes running on it * is shared by all processes running on it
*/ */
if (first_time && can_access_pmu) { if (first_time && can_access_pmu) {
DPRINT(("[%d] clearing ibrs, dbrs\n", ctx->ctx_task->pid)); DPRINT(("[%d] clearing ibrs, dbrs\n", task->pid));
for (i=0; i < pmu_conf.num_ibrs; i++) { for (i=0; i < pmu_conf.num_ibrs; i++) {
ia64_set_ibr(i, 0UL); ia64_set_ibr(i, 0UL);
ia64_srlz_i(); ia64_srlz_i();
...@@ -3860,7 +3836,7 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_ ...@@ -3860,7 +3836,7 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_
ret = -EINVAL; ret = -EINVAL;
if ((mode == PFM_CODE_RR && !IBR_IS_IMPL(rnum)) || ((mode == PFM_DATA_RR) && !DBR_IS_IMPL(rnum))) { if ((mode == PFM_CODE_RR && rnum >= PFM_NUM_IBRS) || ((mode == PFM_DATA_RR) && rnum >= PFM_NUM_DBRS)) {
DPRINT(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n", DPRINT(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n",
rnum, dbreg.val, mode, i, count)); rnum, dbreg.val, mode, i, count));
...@@ -4434,6 +4410,7 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg ...@@ -4434,6 +4410,7 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
struct task_struct *task = PFM_CTX_TASK(ctx); struct task_struct *task = PFM_CTX_TASK(ctx);
struct pt_regs *tregs; struct pt_regs *tregs;
int state, is_system; int state, is_system;
int ret;
DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task->pid : -1)); DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task->pid : -1));
...@@ -4451,7 +4428,8 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg ...@@ -4451,7 +4428,8 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
/* /*
* clear psr and dcr bits * clear psr and dcr bits
*/ */
pfm_stop(ctx, NULL, 0, regs); ret = pfm_stop(ctx, NULL, 0, regs);
if (ret) return ret;
ctx->ctx_state = state = PFM_CTX_UNLOADED; ctx->ctx_state = state = PFM_CTX_UNLOADED;
...@@ -4760,37 +4738,45 @@ sys_perfmonctl (int fd, int cmd, void *arg, int count, long arg5, long arg6, lon ...@@ -4760,37 +4738,45 @@ sys_perfmonctl (int fd, int cmd, void *arg, int count, long arg5, long arg6, lon
void *args_k = NULL; void *args_k = NULL;
long ret; /* will expand int return types */ long ret; /* will expand int return types */
size_t base_sz, sz, xtra_sz = 0; size_t base_sz, sz, xtra_sz = 0;
int narg, completed_args = 0, call_made = 0; int narg, completed_args = 0, call_made = 0, cmd_flags;
int (*func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
int (*getsize)(void *arg, size_t *sz);
#define PFM_MAX_ARGSIZE 4096 #define PFM_MAX_ARGSIZE 4096
/* /*
* reject any call if perfmon was disabled at initialization time * reject any call if perfmon was disabled at initialization
mask*/ */
if (PFM_IS_DISABLED()) return -ENOSYS; if (unlikely(PFM_IS_DISABLED())) return -ENOSYS;
if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT)) {
DPRINT(("[%d] invalid cmd=%d\n", current->pid, cmd));
return -EINVAL;
}
func = pfm_cmd_tab[cmd].cmd_func;
narg = pfm_cmd_tab[cmd].cmd_narg;
base_sz = pfm_cmd_tab[cmd].cmd_argsize;
getsize = pfm_cmd_tab[cmd].cmd_getsize;
cmd_flags = pfm_cmd_tab[cmd].cmd_flags;
if (unlikely(PFM_CMD_IS_VALID(cmd) == 0)) { if (unlikely(func == NULL)) {
DPRINT(("[%d] invalid cmd=%d\n", current->pid, cmd)); DPRINT(("[%d] invalid cmd=%d\n", current->pid, cmd));
return -EINVAL; return -EINVAL;
} }
DPRINT(("cmd=%s idx=%d valid=%d narg=0x%x argsz=%lu count=%d\n", DPRINT(("cmd=%s idx=%d narg=0x%x argsz=%lu count=%d\n",
PFM_CMD_NAME(cmd), PFM_CMD_NAME(cmd),
PFM_CMD_IDX(cmd), cmd,
PFM_CMD_IS_VALID(cmd), narg,
PFM_CMD_NARG(cmd), base_sz,
PFM_CMD_ARG_SIZE(cmd),
count)); count));
/* /*
* check if number of arguments matches what the command expects * check if number of arguments matches what the command expects
*/ */
narg = PFM_CMD_NARG(cmd); if (unlikely((narg == PFM_CMD_ARG_MANY && count <= 0) || (narg > 0 && narg != count)))
if ((narg == PFM_CMD_ARG_MANY && count <= 0) || (narg > 0 && narg != count))
return -EINVAL; return -EINVAL;
/* get single argument size */
base_sz = PFM_CMD_ARG_SIZE(cmd);
restart_args: restart_args:
sz = xtra_sz + base_sz*count; sz = xtra_sz + base_sz*count;
/* /*
...@@ -4804,7 +4790,7 @@ sys_perfmonctl (int fd, int cmd, void *arg, int count, long arg5, long arg6, lon ...@@ -4804,7 +4790,7 @@ sys_perfmonctl (int fd, int cmd, void *arg, int count, long arg5, long arg6, lon
/* /*
* allocate default-sized argument buffer * allocate default-sized argument buffer
*/ */
if (count && args_k == NULL) { if (likely(count && args_k == NULL)) {
args_k = kmalloc(PFM_MAX_ARGSIZE, GFP_KERNEL); args_k = kmalloc(PFM_MAX_ARGSIZE, GFP_KERNEL);
if (args_k == NULL) return -ENOMEM; if (args_k == NULL) return -ENOMEM;
} }
...@@ -4824,11 +4810,11 @@ sys_perfmonctl (int fd, int cmd, void *arg, int count, long arg5, long arg6, lon ...@@ -4824,11 +4810,11 @@ sys_perfmonctl (int fd, int cmd, void *arg, int count, long arg5, long arg6, lon
/* /*
* check if command supports extra parameters * check if command supports extra parameters
*/ */
if (completed_args == 0 && PFM_CMD_GETSIZE(cmd)) { if (completed_args == 0 && getsize) {
/* /*
* get extra parameters size (based on main argument) * get extra parameters size (based on main argument)
*/ */
ret = PFM_CMD_GETSIZE(cmd)(args_k, &xtra_sz); ret = (*getsize)(args_k, &xtra_sz);
if (ret) goto error_args; if (ret) goto error_args;
completed_args = 1; completed_args = 1;
...@@ -4836,29 +4822,29 @@ sys_perfmonctl (int fd, int cmd, void *arg, int count, long arg5, long arg6, lon ...@@ -4836,29 +4822,29 @@ sys_perfmonctl (int fd, int cmd, void *arg, int count, long arg5, long arg6, lon
DPRINT(("[%d] restart_args sz=%lu xtra_sz=%lu\n", current->pid, sz, xtra_sz)); DPRINT(("[%d] restart_args sz=%lu xtra_sz=%lu\n", current->pid, sz, xtra_sz));
/* retry if necessary */ /* retry if necessary */
if (xtra_sz) goto restart_args; if (likely(xtra_sz)) goto restart_args;
} }
if (PFM_CMD_USE_FD(cmd)) { if (unlikely((cmd_flags & PFM_CMD_FD) == 0)) goto skip_fd;
ret = -EBADF; ret = -EBADF;
file = fget(fd); file = fget(fd);
if (file == NULL) { if (unlikely(file == NULL)) {
DPRINT(("[%d] invalid fd %d\n", current->pid, fd)); DPRINT(("[%d] invalid fd %d\n", current->pid, fd));
goto error_args; goto error_args;
} }
if (PFM_IS_FILE(file) == 0) { if (unlikely(PFM_IS_FILE(file) == 0)) {
DPRINT(("[%d] fd %d not related to perfmon\n", current->pid, fd)); DPRINT(("[%d] fd %d not related to perfmon\n", current->pid, fd));
goto error_args; goto error_args;
} }
ctx = (pfm_context_t *)file->private_data; ctx = (pfm_context_t *)file->private_data;
if (ctx == NULL) { if (unlikely(ctx == NULL)) {
DPRINT(("[%d] no context for fd %d\n", current->pid, fd)); DPRINT(("[%d] no context for fd %d\n", current->pid, fd));
goto error_args; goto error_args;
} }
prefetch(&ctx->ctx_state);
PROTECT_CTX(ctx, flags); PROTECT_CTX(ctx, flags);
...@@ -4866,15 +4852,15 @@ sys_perfmonctl (int fd, int cmd, void *arg, int count, long arg5, long arg6, lon ...@@ -4866,15 +4852,15 @@ sys_perfmonctl (int fd, int cmd, void *arg, int count, long arg5, long arg6, lon
* check task is stopped * check task is stopped
*/ */
ret = pfm_check_task_state(ctx, cmd, flags); ret = pfm_check_task_state(ctx, cmd, flags);
if (ret) goto abort_locked; if (unlikely(ret)) goto abort_locked;
}
ret = (*pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_func)(ctx, args_k, count, regs); skip_fd:
ret = (*func)(ctx, args_k, count, regs);
call_made = 1; call_made = 1;
abort_locked: abort_locked:
if (ctx) { if (likely(ctx)) {
DPRINT(("[%d] context unlocked\n", current->pid)); DPRINT(("[%d] context unlocked\n", current->pid));
UNPROTECT_CTX(ctx, flags); UNPROTECT_CTX(ctx, flags);
fput(file); fput(file);
...@@ -4907,7 +4893,7 @@ pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_reg ...@@ -4907,7 +4893,7 @@ pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_reg
if (CTX_HAS_SMPL(ctx)) { if (CTX_HAS_SMPL(ctx)) {
rst_ctrl.bits.mask_monitoring = 0; rst_ctrl.bits.mask_monitoring = 0;
rst_ctrl.bits.reset_ovfl_pmds = 1; rst_ctrl.bits.reset_ovfl_pmds = 0;
if (state == PFM_CTX_LOADED) if (state == PFM_CTX_LOADED)
ret = pfm_buf_fmt_restart_active(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs); ret = pfm_buf_fmt_restart_active(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
...@@ -5096,7 +5082,7 @@ pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds) ...@@ -5096,7 +5082,7 @@ pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds)
msg->pfm_ovfl_msg.msg_ovfl_pmds[1] = 0UL; msg->pfm_ovfl_msg.msg_ovfl_pmds[1] = 0UL;
msg->pfm_ovfl_msg.msg_ovfl_pmds[2] = 0UL; msg->pfm_ovfl_msg.msg_ovfl_pmds[2] = 0UL;
msg->pfm_ovfl_msg.msg_ovfl_pmds[3] = 0UL; msg->pfm_ovfl_msg.msg_ovfl_pmds[3] = 0UL;
msg->pfm_ovfl_msg.msg_tstamp = ia64_get_itc(); /* relevant on UP only */ msg->pfm_ovfl_msg.msg_tstamp = 0UL;
} }
DPRINT(("ovfl msg: msg=%p no_msg=%d fd=%d pid=%d ovfl_pmds=0x%lx\n", DPRINT(("ovfl msg: msg=%p no_msg=%d fd=%d pid=%d ovfl_pmds=0x%lx\n",
...@@ -5119,10 +5105,12 @@ pfm_end_notify_user(pfm_context_t *ctx) ...@@ -5119,10 +5105,12 @@ pfm_end_notify_user(pfm_context_t *ctx)
printk(KERN_ERR "perfmon: pfm_end_notify_user no more notification msgs\n"); printk(KERN_ERR "perfmon: pfm_end_notify_user no more notification msgs\n");
return -1; return -1;
} }
/* no leak */
memset(msg, 0, sizeof(*msg));
msg->pfm_end_msg.msg_type = PFM_MSG_END; msg->pfm_end_msg.msg_type = PFM_MSG_END;
msg->pfm_end_msg.msg_ctx_fd = ctx->ctx_fd; msg->pfm_end_msg.msg_ctx_fd = ctx->ctx_fd;
msg->pfm_ovfl_msg.msg_tstamp = ia64_get_itc(); /* relevant on UP only */ msg->pfm_ovfl_msg.msg_tstamp = 0UL;
DPRINT(("end msg: msg=%p no_msg=%d ctx_fd=%d pid=%d\n", DPRINT(("end msg: msg=%p no_msg=%d ctx_fd=%d pid=%d\n",
msg, msg,
...@@ -5141,8 +5129,8 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str ...@@ -5141,8 +5129,8 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
{ {
pfm_ovfl_arg_t ovfl_arg; pfm_ovfl_arg_t ovfl_arg;
unsigned long mask; unsigned long mask;
unsigned long old_val, ovfl_val; unsigned long old_val, ovfl_val, new_val;
unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL; unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL, reset_pmds;
unsigned long tstamp; unsigned long tstamp;
pfm_ovfl_ctrl_t ovfl_ctrl; pfm_ovfl_ctrl_t ovfl_ctrl;
unsigned int i, has_smpl; unsigned int i, has_smpl;
...@@ -5156,20 +5144,18 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str ...@@ -5156,20 +5144,18 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
if (unlikely((pmc0 & 0x1) == 0)) goto sanity_check; if (unlikely((pmc0 & 0x1) == 0)) goto sanity_check;
tstamp = ia64_get_itc(); tstamp = ia64_get_itc();
mask = pmc0 >> PMU_FIRST_COUNTER; mask = pmc0 >> PMU_FIRST_COUNTER;
ovfl_val = pmu_conf.ovfl_val; ovfl_val = pmu_conf.ovfl_val;
has_smpl = CTX_HAS_SMPL(ctx);
DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s " DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s "
"used_pmds=0x%lx reload_pmcs=0x%lx\n", "used_pmds=0x%lx\n",
pmc0, pmc0,
task ? task->pid: -1, task ? task->pid: -1,
(regs ? regs->cr_iip : 0), (regs ? regs->cr_iip : 0),
CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking", CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking",
ctx->ctx_used_pmds[0], ctx->ctx_used_pmds[0]));
ctx->ctx_reload_pmcs[0]));
has_smpl = CTX_HAS_SMPL(ctx);
/* /*
* first we update the virtual counters * first we update the virtual counters
...@@ -5180,29 +5166,31 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str ...@@ -5180,29 +5166,31 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
/* skip pmd which did not overflow */ /* skip pmd which did not overflow */
if ((mask & 0x1) == 0) continue; if ((mask & 0x1) == 0) continue;
DPRINT_ovfl(("pmd[%d] overflowed hw_pmd=0x%lx ctx_pmd=0x%lx\n",
i, ia64_get_pmd(i), ctx->ctx_pmds[i].val));
/* /*
* Note that the pmd is not necessarily 0 at this point as qualified events * Note that the pmd is not necessarily 0 at this point as qualified events
* may have happened before the PMU was frozen. The residual count is not * may have happened before the PMU was frozen. The residual count is not
* taken into consideration here but will be with any read of the pmd via * taken into consideration here but will be with any read of the pmd via
* pfm_read_pmds(). * pfm_read_pmds().
*/ */
old_val = ctx->ctx_pmds[i].val; old_val = new_val = ctx->ctx_pmds[i].val;
ctx->ctx_pmds[i].val += 1 + ovfl_val; new_val += 1 + ovfl_val;
ctx->ctx_pmds[i].val = new_val;
/* /*
* check for overflow condition * check for overflow condition
*/ */
if (likely(old_val > ctx->ctx_pmds[i].val)) { if (likely(old_val > new_val)) {
ovfl_pmds |= 1UL << i; ovfl_pmds |= 1UL << i;
if (PMC_OVFL_NOTIFY(ctx, i)) ovfl_notify |= 1UL << i; if (PMC_OVFL_NOTIFY(ctx, i)) ovfl_notify |= 1UL << i;
} }
DPRINT_ovfl(("ctx_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx smpl_pmds=0x%lx\n", DPRINT_ovfl(("ctx_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n",
i, ctx->ctx_pmds[i].val, old_val, i,
ia64_get_pmd(i) & ovfl_val, ovfl_pmds, ovfl_notify, smpl_pmds)); new_val,
old_val,
ia64_get_pmd(i) & ovfl_val,
ovfl_pmds,
ovfl_notify));
} }
/* /*
...@@ -5214,6 +5202,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str ...@@ -5214,6 +5202,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
* reset all control bits * reset all control bits
*/ */
ovfl_ctrl.val = 0; ovfl_ctrl.val = 0;
reset_pmds = 0UL;
/* /*
* if a sampling format module exists, then we "cache" the overflow by * if a sampling format module exists, then we "cache" the overflow by
...@@ -5275,7 +5264,10 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str ...@@ -5275,7 +5264,10 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
ovfl_ctrl.bits.notify_user |= ovfl_arg.ovfl_ctrl.bits.notify_user; ovfl_ctrl.bits.notify_user |= ovfl_arg.ovfl_ctrl.bits.notify_user;
ovfl_ctrl.bits.block_task |= ovfl_arg.ovfl_ctrl.bits.block_task; ovfl_ctrl.bits.block_task |= ovfl_arg.ovfl_ctrl.bits.block_task;
ovfl_ctrl.bits.mask_monitoring |= ovfl_arg.ovfl_ctrl.bits.mask_monitoring; ovfl_ctrl.bits.mask_monitoring |= ovfl_arg.ovfl_ctrl.bits.mask_monitoring;
ovfl_ctrl.bits.reset_ovfl_pmds |= ovfl_arg.ovfl_ctrl.bits.reset_ovfl_pmds; /* yes or no */ /*
* build the bitmask of pmds to reset now
*/
if (ovfl_arg.ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask;
pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles; pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles;
} }
...@@ -5287,6 +5279,10 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str ...@@ -5287,6 +5279,10 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
current->pid, current->pid,
pmd_mask<<PMU_FIRST_COUNTER)); pmd_mask<<PMU_FIRST_COUNTER));
} }
/*
* remove the pmds we reset now from the set of pmds to reset in pfm_restart()
*/
ovfl_pmds &= ~reset_pmds;
} else { } else {
/* /*
* when no sampling module is used, then the default * when no sampling module is used, then the default
...@@ -5296,14 +5292,21 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str ...@@ -5296,14 +5292,21 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
ovfl_ctrl.bits.block_task = ovfl_notify ? 1 : 0; ovfl_ctrl.bits.block_task = ovfl_notify ? 1 : 0;
ovfl_ctrl.bits.mask_monitoring = ovfl_notify ? 1 : 0; /* XXX: change for saturation */ ovfl_ctrl.bits.mask_monitoring = ovfl_notify ? 1 : 0; /* XXX: change for saturation */
ovfl_ctrl.bits.reset_ovfl_pmds = ovfl_notify ? 0 : 1; ovfl_ctrl.bits.reset_ovfl_pmds = ovfl_notify ? 0 : 1;
/*
* if needed, we reset all overflowed pmds
*/
if (ovfl_notify == 0) reset_pmds = ovfl_pmds;
} }
DPRINT(("current [%d] ovfl_pmds=0x%lx reset_pmds=0x%lx\n",
current->pid,
ovfl_pmds,
reset_pmds));
/* /*
* if we (still) have some overflowed PMD but no notification is requested * reset the requested PMD registers using the short reset values
* then we use the short reset period.
*/ */
if (ovfl_ctrl.bits.reset_ovfl_pmds) { if (reset_pmds) {
unsigned long bm = ovfl_pmds; unsigned long bm = reset_pmds;
pfm_reset_regs(ctx, &bm, PFM_PMD_SHORT_RESET); pfm_reset_regs(ctx, &bm, PFM_PMD_SHORT_RESET);
} }
......
...@@ -178,6 +178,7 @@ default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct ...@@ -178,6 +178,7 @@ default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct
ent->tstamp = stamp; ent->tstamp = stamp;
ent->cpu = smp_processor_id(); ent->cpu = smp_processor_id();
ent->set = arg->active_set; ent->set = arg->active_set;
ent->tgid = current->tgid;
/* /*
* selectively store PMDs in increasing index number * selectively store PMDs in increasing index number
......
...@@ -259,10 +259,12 @@ ia64_load_extra (struct task_struct *task) ...@@ -259,10 +259,12 @@ ia64_load_extra (struct task_struct *task)
* *
* We get here through the following call chain: * We get here through the following call chain:
* *
* <clone syscall> * from user-level: from kernel:
* sys_clone *
* do_fork * <clone syscall> <some kernel call frames>
* copy_thread * sys_clone :
* do_fork do_fork
* copy_thread copy_thread
* *
* This means that the stack layout is as follows: * This means that the stack layout is as follows:
* *
...@@ -276,9 +278,6 @@ ia64_load_extra (struct task_struct *task) ...@@ -276,9 +278,6 @@ ia64_load_extra (struct task_struct *task)
* | | <-- sp (lowest addr) * | | <-- sp (lowest addr)
* +---------------------+ * +---------------------+
* *
* Note: if we get called through kernel_thread() then the memory above "(highest addr)"
* is valid kernel stack memory that needs to be copied as well.
*
* Observe that we copy the unat values that are in pt_regs and switch_stack. Spilling an * Observe that we copy the unat values that are in pt_regs and switch_stack. Spilling an
* integer to address X causes bit N in ar.unat to be set to the NaT bit of the register, * integer to address X causes bit N in ar.unat to be set to the NaT bit of the register,
* with N=(X & 0x1ff)/8. Thus, copying the unat value preserves the NaT bits ONLY if the * with N=(X & 0x1ff)/8. Thus, copying the unat value preserves the NaT bits ONLY if the
...@@ -291,9 +290,9 @@ copy_thread (int nr, unsigned long clone_flags, ...@@ -291,9 +290,9 @@ copy_thread (int nr, unsigned long clone_flags,
unsigned long user_stack_base, unsigned long user_stack_size, unsigned long user_stack_base, unsigned long user_stack_size,
struct task_struct *p, struct pt_regs *regs) struct task_struct *p, struct pt_regs *regs)
{ {
unsigned long rbs, child_rbs, rbs_size, stack_offset, stack_top, stack_used;
struct switch_stack *child_stack, *stack;
extern char ia64_ret_from_clone, ia32_ret_from_clone; extern char ia64_ret_from_clone, ia32_ret_from_clone;
struct switch_stack *child_stack, *stack;
unsigned long rbs, child_rbs, rbs_size;
struct pt_regs *child_ptregs; struct pt_regs *child_ptregs;
int retval = 0; int retval = 0;
...@@ -306,16 +305,13 @@ copy_thread (int nr, unsigned long clone_flags, ...@@ -306,16 +305,13 @@ copy_thread (int nr, unsigned long clone_flags,
return 0; return 0;
#endif #endif
stack_top = (unsigned long) current + IA64_STK_OFFSET;
stack = ((struct switch_stack *) regs) - 1; stack = ((struct switch_stack *) regs) - 1;
stack_used = stack_top - (unsigned long) stack;
stack_offset = IA64_STK_OFFSET - stack_used;
child_stack = (struct switch_stack *) ((unsigned long) p + stack_offset); child_ptregs = (struct pt_regs *) ((unsigned long) p + IA64_STK_OFFSET) - 1;
child_ptregs = (struct pt_regs *) (child_stack + 1); child_stack = (struct switch_stack *) child_ptregs - 1;
/* copy parent's switch_stack & pt_regs to child: */ /* copy parent's switch_stack & pt_regs to child: */
memcpy(child_stack, stack, stack_used); memcpy(child_stack, stack, sizeof(*child_ptregs) + sizeof(*child_stack));
rbs = (unsigned long) current + IA64_RBS_OFFSET; rbs = (unsigned long) current + IA64_RBS_OFFSET;
child_rbs = (unsigned long) p + IA64_RBS_OFFSET; child_rbs = (unsigned long) p + IA64_RBS_OFFSET;
...@@ -324,7 +320,7 @@ copy_thread (int nr, unsigned long clone_flags, ...@@ -324,7 +320,7 @@ copy_thread (int nr, unsigned long clone_flags,
/* copy the parent's register backing store to the child: */ /* copy the parent's register backing store to the child: */
memcpy((void *) child_rbs, (void *) rbs, rbs_size); memcpy((void *) child_rbs, (void *) rbs, rbs_size);
if (user_mode(child_ptregs)) { if (likely(user_mode(child_ptregs))) {
if ((clone_flags & CLONE_SETTLS) && !IS_IA32_PROCESS(regs)) if ((clone_flags & CLONE_SETTLS) && !IS_IA32_PROCESS(regs))
child_ptregs->r13 = regs->r16; /* see sys_clone2() in entry.S */ child_ptregs->r13 = regs->r16; /* see sys_clone2() in entry.S */
if (user_stack_base) { if (user_stack_base) {
...@@ -341,14 +337,14 @@ copy_thread (int nr, unsigned long clone_flags, ...@@ -341,14 +337,14 @@ copy_thread (int nr, unsigned long clone_flags,
* been taken care of by the caller of sys_clone() * been taken care of by the caller of sys_clone()
* already. * already.
*/ */
child_ptregs->r12 = (unsigned long) (child_ptregs + 1); /* kernel sp */ child_ptregs->r12 = (unsigned long) child_ptregs - 16; /* kernel sp */
child_ptregs->r13 = (unsigned long) p; /* set `current' pointer */ child_ptregs->r13 = (unsigned long) p; /* set `current' pointer */
} }
child_stack->ar_bspstore = child_rbs + rbs_size;
if (IS_IA32_PROCESS(regs)) if (IS_IA32_PROCESS(regs))
child_stack->b0 = (unsigned long) &ia32_ret_from_clone; child_stack->b0 = (unsigned long) &ia32_ret_from_clone;
else else
child_stack->b0 = (unsigned long) &ia64_ret_from_clone; child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
child_stack->ar_bspstore = child_rbs + rbs_size;
/* copy parts of thread_struct: */ /* copy parts of thread_struct: */
p->thread.ksp = (unsigned long) child_stack - 16; p->thread.ksp = (unsigned long) child_stack - 16;
...@@ -578,12 +574,32 @@ ia64_set_personality (struct elf64_hdr *elf_ex, int ibcs2_interpreter) ...@@ -578,12 +574,32 @@ ia64_set_personality (struct elf64_hdr *elf_ex, int ibcs2_interpreter)
pid_t pid_t
kernel_thread (int (*fn)(void *), void *arg, unsigned long flags) kernel_thread (int (*fn)(void *), void *arg, unsigned long flags)
{ {
struct task_struct *parent = current; extern void ia64_invoke_kernel_thread_helper (void);
int result; unsigned long *helper_fptr = (unsigned long *) &ia64_invoke_kernel_thread_helper;
pid_t tid; struct {
struct switch_stack sw;
struct pt_regs pt;
} regs;
memset(&regs, 0, sizeof(regs));
regs.pt.cr_iip = helper_fptr[0]; /* set entry point (IP) */
regs.pt.r1 = helper_fptr[1]; /* set GP */
regs.pt.r9 = (unsigned long) fn; /* 1st argument */
regs.pt.r11 = (unsigned long) arg; /* 2nd argument */
/* Preserve PSR bits, except for bits 32-34 and 37-45, which we can't read. */
regs.pt.cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN;
regs.pt.cr_ifs = 1UL << 63; /* mark as valid, empty frame */
regs.sw.ar_fpsr = regs.pt.ar_fpsr = ia64_getreg(_IA64_REG_AR_FPSR);
regs.sw.ar_bspstore = (unsigned long) current + IA64_RBS_OFFSET;
return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs.pt, 0, NULL, NULL);
}
EXPORT_SYMBOL(kernel_thread);
tid = clone(flags | CLONE_VM | CLONE_UNTRACED, 0); /* This gets called from kernel_thread() via ia64_invoke_thread_helper(). */
if (parent != current) { int
kernel_thread_helper (int (*fn)(void *), void *arg)
{
#ifdef CONFIG_IA32_SUPPORT #ifdef CONFIG_IA32_SUPPORT
if (IS_IA32_PROCESS(ia64_task_regs(current))) { if (IS_IA32_PROCESS(ia64_task_regs(current))) {
/* A kernel thread is always a 64-bit process. */ /* A kernel thread is always a 64-bit process. */
...@@ -593,12 +609,8 @@ kernel_thread (int (*fn)(void *), void *arg, unsigned long flags) ...@@ -593,12 +609,8 @@ kernel_thread (int (*fn)(void *), void *arg, unsigned long flags)
ia64_set_kr(IA64_KR_TSSD, current->thread.old_k1); ia64_set_kr(IA64_KR_TSSD, current->thread.old_k1);
} }
#endif #endif
result = (*fn)(arg); return (*fn)(arg);
_exit(result);
}
return tid;
} }
EXPORT_SYMBOL(kernel_thread);
/* /*
* Flush thread state. This is called when a thread does an execve(). * Flush thread state. This is called when a thread does an execve().
......
/* /*
* IA-64 Huge TLB Page Support for Kernel. * IA-64 Huge TLB Page Support for Kernel.
* *
* Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> * Copyright (C) 2002-2004 Rohit Seth <rohit.seth@intel.com>
* Copyright (C) 2003-2004 Ken Chen <kenneth.w.chen@intel.com>
*
* Sep, 2003: add numa support
* Feb, 2004: dynamic hugetlb page size via boot parameter
*/ */
#include <linux/config.h> #include <linux/config.h>
...@@ -18,11 +22,10 @@ ...@@ -18,11 +22,10 @@
#include <asm/tlb.h> #include <asm/tlb.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#define TASK_HPAGE_BASE (REGION_HPAGE << REGION_SHIFT)
static long htlbpagemem; static long htlbpagemem;
int htlbpage_max; int htlbpage_max;
static long htlbzone_pages; static long htlbzone_pages;
unsigned int hpage_shift=HPAGE_SHIFT_DEFAULT;
static struct list_head hugepage_freelists[MAX_NUMNODES]; static struct list_head hugepage_freelists[MAX_NUMNODES];
static spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED; static spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED;
...@@ -407,7 +410,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, u ...@@ -407,7 +410,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, u
return -EINVAL; return -EINVAL;
/* This code assumes that REGION_HPAGE != 0. */ /* This code assumes that REGION_HPAGE != 0. */
if ((REGION_NUMBER(addr) != REGION_HPAGE) || (addr & (HPAGE_SIZE - 1))) if ((REGION_NUMBER(addr) != REGION_HPAGE) || (addr & (HPAGE_SIZE - 1)))
addr = TASK_HPAGE_BASE; addr = HPAGE_REGION_BASE;
else else
addr = ALIGN(addr, HPAGE_SIZE); addr = ALIGN(addr, HPAGE_SIZE);
for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) { for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
...@@ -520,6 +523,35 @@ static int __init hugetlb_setup(char *s) ...@@ -520,6 +523,35 @@ static int __init hugetlb_setup(char *s)
} }
__setup("hugepages=", hugetlb_setup); __setup("hugepages=", hugetlb_setup);
static int __init hugetlb_setup_sz(char *str)
{
u64 tr_pages;
unsigned long long size;
if (ia64_pal_vm_page_size(&tr_pages, NULL) != 0)
/*
* shouldn't happen, but just in case.
*/
tr_pages = 0x15557000UL;
size = memparse(str, &str);
if (*str || (size & (size-1)) || !(tr_pages & size) ||
size <= PAGE_SIZE ||
size >= (1UL << PAGE_SHIFT << MAX_ORDER)) {
printk(KERN_WARNING "Invalid huge page size specified\n");
return 1;
}
hpage_shift = __ffs(size);
/*
* boot cpu already executed ia64_mmu_init, and has HPAGE_SHIFT_DEFAULT
* override here with new page shift.
*/
ia64_set_rr(HPAGE_REGION_BASE, hpage_shift << 2);
return 1;
}
__setup("hugepagesz=", hugetlb_setup_sz);
static int __init hugetlb_init(void) static int __init hugetlb_init(void)
{ {
int i; int i;
...@@ -540,7 +572,7 @@ static int __init hugetlb_init(void) ...@@ -540,7 +572,7 @@ static int __init hugetlb_init(void)
printk("Total HugeTLB memory allocated, %ld\n", htlbpagemem); printk("Total HugeTLB memory allocated, %ld\n", htlbpagemem);
return 0; return 0;
} }
module_init(hugetlb_init); __initcall(hugetlb_init);
int hugetlb_report_meminfo(char *buf) int hugetlb_report_meminfo(char *buf)
{ {
......
...@@ -342,6 +342,10 @@ ia64_mmu_init (void *my_cpu_data) ...@@ -342,6 +342,10 @@ ia64_mmu_init (void *my_cpu_data)
ia64_tlb_init(); ia64_tlb_init();
#ifdef CONFIG_HUGETLB_PAGE
ia64_set_rr(HPAGE_REGION_BASE, HPAGE_SHIFT << 2);
#endif
#ifdef CONFIG_IA64_MCA #ifdef CONFIG_IA64_MCA
cpu = smp_processor_id(); cpu = smp_processor_id();
......
...@@ -45,9 +45,9 @@ ...@@ -45,9 +45,9 @@
/* /*
* Mask bit * Mask bit
*/ */
#define IOSAPIC_MASK_SHIFT 16 #define IOSAPIC_MASK_SHIFT 16
#define IOSAPIC_UNMASK 0 #define IOSAPIC_MASK (1<<IOSAPIC_MASK_SHIFT)
#define IOSAPIC_MSAK 1
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
......
...@@ -140,8 +140,9 @@ reload_context (mm_context_t context) ...@@ -140,8 +140,9 @@ reload_context (mm_context_t context)
{ {
unsigned long rid; unsigned long rid;
unsigned long rid_incr = 0; unsigned long rid_incr = 0;
unsigned long rr0, rr1, rr2, rr3, rr4; unsigned long rr0, rr1, rr2, rr3, rr4, old_rr4;
old_rr4 = ia64_get_rr(0x8000000000000000);
rid = context << 3; /* make space for encoding the region number */ rid = context << 3; /* make space for encoding the region number */
rid_incr = 1 << 8; rid_incr = 1 << 8;
...@@ -152,7 +153,7 @@ reload_context (mm_context_t context) ...@@ -152,7 +153,7 @@ reload_context (mm_context_t context)
rr3 = rr0 + 3*rid_incr; rr3 = rr0 + 3*rid_incr;
rr4 = rr0 + 4*rid_incr; rr4 = rr0 + 4*rid_incr;
#ifdef CONFIG_HUGETLB_PAGE #ifdef CONFIG_HUGETLB_PAGE
rr4 = (rr4 & (~(0xfcUL))) | (HPAGE_SHIFT << 2); rr4 = (rr4 & (~(0xfcUL))) | (old_rr4 & 0xfc);
#endif #endif
ia64_set_rr(0x0000000000000000, rr0); ia64_set_rr(0x0000000000000000, rr0);
......
...@@ -37,31 +37,14 @@ ...@@ -37,31 +37,14 @@
#define RGN_MAP_LIMIT ((1UL << (4*PAGE_SHIFT - 12)) - PAGE_SIZE) /* per region addr limit */ #define RGN_MAP_LIMIT ((1UL << (4*PAGE_SHIFT - 12)) - PAGE_SIZE) /* per region addr limit */
#ifdef CONFIG_HUGETLB_PAGE #ifdef CONFIG_HUGETLB_PAGE
# define REGION_HPAGE (4UL) /* note: this is hardcoded in reload_context()!*/
# if defined(CONFIG_HUGETLB_PAGE_SIZE_4GB)
# define HPAGE_SHIFT 32
# elif defined(CONFIG_HUGETLB_PAGE_SIZE_1GB)
# define HPAGE_SHIFT 30
# elif defined(CONFIG_HUGETLB_PAGE_SIZE_256MB)
# define HPAGE_SHIFT 28
# elif defined(CONFIG_HUGETLB_PAGE_SIZE_64MB)
# define HPAGE_SHIFT 26
# elif defined(CONFIG_HUGETLB_PAGE_SIZE_16MB)
# define HPAGE_SHIFT 24
# elif defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
# define HPAGE_SHIFT 22
# elif defined(CONFIG_HUGETLB_PAGE_SIZE_1MB)
# define HPAGE_SHIFT 20
# elif defined(CONFIG_HUGETLB_PAGE_SIZE_256KB)
# define HPAGE_SHIFT 18
# else
# error Unsupported IA-64 HugeTLB Page Size!
# endif
# define REGION_HPAGE (4UL) /* note: this is hardcoded in mmu_context.h:reload_context()!*/
# define REGION_SHIFT 61 # define REGION_SHIFT 61
# define HPAGE_REGION_BASE (REGION_HPAGE << REGION_SHIFT)
# define HPAGE_SHIFT hpage_shift
# define HPAGE_SHIFT_DEFAULT 28 /* check ia64 SDM for architecture supported size */
# define HPAGE_SIZE (__IA64_UL_CONST(1) << HPAGE_SHIFT) # define HPAGE_SIZE (__IA64_UL_CONST(1) << HPAGE_SHIFT)
# define HPAGE_MASK (~(HPAGE_SIZE - 1)) # define HPAGE_MASK (~(HPAGE_SIZE - 1))
# define HAVE_ARCH_HUGETLB_UNMAPPED_AREA # define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
# define ARCH_HAS_HUGEPAGE_ONLY_RANGE # define ARCH_HAS_HUGEPAGE_ONLY_RANGE
#endif /* CONFIG_HUGETLB_PAGE */ #endif /* CONFIG_HUGETLB_PAGE */
...@@ -140,6 +123,7 @@ typedef union ia64_va { ...@@ -140,6 +123,7 @@ typedef union ia64_va {
# define is_hugepage_only_range(addr, len) \ # define is_hugepage_only_range(addr, len) \
(REGION_NUMBER(addr) == REGION_HPAGE && \ (REGION_NUMBER(addr) == REGION_HPAGE && \
REGION_NUMBER((addr)+(len)) == REGION_HPAGE) REGION_NUMBER((addr)+(len)) == REGION_HPAGE)
extern unsigned int hpage_shift;
#endif #endif
static __inline__ int static __inline__ int
......
...@@ -59,7 +59,7 @@ typedef struct { ...@@ -59,7 +59,7 @@ typedef struct {
* last_reset_value member indicates the initial value of the overflowed PMD. * last_reset_value member indicates the initial value of the overflowed PMD.
*/ */
typedef struct { typedef struct {
int pid; /* active process at PMU interrupt point */ int pid; /* thread id (for NPTL, this is gettid()) */
unsigned char reserved1[3]; /* reserved for future use */ unsigned char reserved1[3]; /* reserved for future use */
unsigned char ovfl_pmd; /* index of overflowed PMD */ unsigned char ovfl_pmd; /* index of overflowed PMD */
...@@ -69,7 +69,7 @@ typedef struct { ...@@ -69,7 +69,7 @@ typedef struct {
unsigned short cpu; /* cpu on which the overfow occured */ unsigned short cpu; /* cpu on which the overfow occured */
unsigned short set; /* event set active when overflow ocurred */ unsigned short set; /* event set active when overflow ocurred */
unsigned int reserved2; /* for future use */ int tgid; /* thread group id (for NPTL, this is getpid()) */
} pfm_default_smpl_entry_t; } pfm_default_smpl_entry_t;
#define PFM_DEFAULT_MAX_PMDS 64 /* how many pmds supported by data structures (sizeof(unsigned long) */ #define PFM_DEFAULT_MAX_PMDS 64 /* how many pmds supported by data structures (sizeof(unsigned long) */
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#define _ASM_IA64_SCATTERLIST_H #define _ASM_IA64_SCATTERLIST_H
/* /*
* Modified 1998-1999, 2001-2002 * Modified 1998-1999, 2001-2002, 2004
* David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co * David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
*/ */
...@@ -15,6 +15,14 @@ struct scatterlist { ...@@ -15,6 +15,14 @@ struct scatterlist {
unsigned int dma_length; unsigned int dma_length;
}; };
#define ISA_DMA_THRESHOLD (~0UL) /*
* It used to be that ISA_DMA_THRESHOLD had something to do with the
* DMA-limits of ISA-devices. Nowadays, its only remaining use (apart
* from the aha1542.c driver, which isn't 64-bit clean anyhow) is to
* tell the block-layer (via BLK_BOUNCE_ISA) what the max. physical
* address of a page is that is allocated with GFP_DMA. On IA-64,
* that's 4GB - 1.
*/
#define ISA_DMA_THRESHOLD 0xffffffff
#endif /* _ASM_IA64_SCATTERLIST_H */ #endif /* _ASM_IA64_SCATTERLIST_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment