Commit 3c82eb67 authored by David Mosberger's avatar David Mosberger

Merge tiger.hpl.hp.com:/data1/bk/vanilla/linux-2.5

into tiger.hpl.hp.com:/data1/bk/lia64/to-linus-2.5
parents d103aab6 196b60d3
......@@ -102,30 +102,14 @@ compressed: vmlinux
rawboot:
@$(MAKEBOOT) rawboot
#
# My boot writes directly to a specific disk partition, I doubt most
# people will want to do that without changes..
#
msb my-special-boot:
@$(MAKEBOOT) msb
bootimage:
@$(MAKEBOOT) bootimage
srmboot:
@$(MAKEBOOT) srmboot
archclean:
@$(MAKEBOOT) clean
archmrproper:
@$(MAKE) -C arch/$(ARCH)/tools mrproper
bootpfile:
@$(MAKEBOOT) bootpfile
prepare: $(TOPDIR)/include/asm-ia64/offsets.h
$(TOPDIR)/include/asm-ia64/offsets.h: include/asm include/linux/version.h \
include/config/MARKER
@$(MAKE) -C arch/$(ARCH)/tools $@
\ No newline at end of file
@$(MAKE) -C arch/$(ARCH)/tools $@
......@@ -86,6 +86,31 @@ fi
define_bool CONFIG_KCORE_ELF y # On IA-64, we always want an ELF /proc/kcore.
define_int CONFIG_FORCE_MAX_ZONEORDER 18
bool 'IA-64 Huge TLB Page Support' CONFIG_HUGETLB_PAGE
if [ "$CONFIG_HUGETLB_PAGE" = "y" ]; then
if [ "$CONFIG_MCKINLEY" = "y" ]; then
choice ' IA-64 Huge TLB Page Size' \
"4GB CONFIG_HUGETLB_PAGE_SIZE_4GB \
256MB CONFIG_HUGETLB_PAGE_SIZE_256MB \
64MB CONFIG_HUGETLB_PAGE_SIZE_64MB \
16MB CONFIG_HUGETLB_PAGE_SIZE_16MB \
4MB CONFIG_HUGETLB_PAGE_SIZE_4MB \
1MB CONFIG_HUGETLB_PAGE_SIZE_1MB \
256KB CONFIG_HUGETLB_PAGE_SIZE_256KB" 16MB
else
choice ' IA-64 Huge TLB Page Size' \
"256MB CONFIG_HUGETLB_PAGE_SIZE_256MB \
64MB CONFIG_HUGETLB_PAGE_SIZE_64MB \
16MB CONFIG_HUGETLB_PAGE_SIZE_16MB \
4MB CONFIG_HUGETLB_PAGE_SIZE_4MB \
1MB CONFIG_HUGETLB_PAGE_SIZE_1MB \
256KB CONFIG_HUGETLB_PAGE_SIZE_256KB" 16MB
fi
fi
bool 'SMP support' CONFIG_SMP
bool 'Support running of Linux/x86 binaries' CONFIG_IA32_SUPPORT
bool 'Performance monitor support' CONFIG_PERFMON
......
......@@ -39,9 +39,7 @@
#define DRIVER_NAME "SBA"
#ifndef CONFIG_IA64_HP_PROTO
#define ALLOW_IOV_BYPASS
#endif
#define ENABLE_MARK_CLEAN
/*
** The number of debug flags is a clue - this code is fragile.
......@@ -1252,10 +1250,6 @@ sba_ioc_init(struct sba_device *sba_dev, struct ioc *ioc, int ioc_num)
** Firmware programs the maximum IOV space size into the imask reg
*/
iova_space_size = ~(READ_REG(ioc->ioc_hpa + IOC_IMASK) & 0xFFFFFFFFUL) + 1;
#ifdef CONFIG_IA64_HP_PROTO
if (!iova_space_size)
iova_space_size = GB(1);
#endif
/*
** iov_order is always based on a 1GB IOVA space since we want to
......@@ -1625,10 +1619,8 @@ void __init sba_init(void)
device->slot_name, hpa);
if ((hw_rev & 0xFF) < 0x20) {
printk(KERN_INFO "%s WARNING rev 2.0 or greater will be required for IO MMU support in the future\n", DRIVER_NAME);
#ifndef CONFIG_IA64_HP_PROTO
panic("%s: CONFIG_IA64_HP_PROTO MUST be enabled to support SBA rev less than 2.0", DRIVER_NAME);
#endif
printk("%s: SBA rev less than 2.0 not supported", DRIVER_NAME);
return;
}
sba_dev = kmalloc(sizeof(struct sba_device), GFP_KERNEL);
......
......@@ -33,60 +33,73 @@ struct fake_pci_dev {
static struct pci_ops *orig_pci_ops;
#define HP_CFG_RD(sz, bits, name) \
static int hp_cfg_read##sz (struct pci_dev *dev, int where, u##bits *value) \
{ \
struct fake_pci_dev *fake_dev; \
if (!(fake_dev = (struct fake_pci_dev *) dev->sysdata)) \
return orig_pci_ops->name(dev, where, value); \
\
if (where == PCI_BASE_ADDRESS_0) { \
if (fake_dev->sizing) \
*value = ~(fake_dev->csr_size - 1); \
else \
*value = (fake_dev->csr_base & \
PCI_BASE_ADDRESS_MEM_MASK) | \
PCI_BASE_ADDRESS_SPACE_MEMORY; \
fake_dev->sizing = 0; \
return PCIBIOS_SUCCESSFUL; \
} \
*value = read##sz(fake_dev->mapped_csrs + where); \
if (where == PCI_COMMAND) \
*value |= PCI_COMMAND_MEMORY; /* SBA omits this */ \
return PCIBIOS_SUCCESSFUL; \
struct fake_pci_dev *
lookup_fake_dev (struct pci_bus *bus, unsigned int devfn)
{
struct pci_dev *dev;
list_for_each_entry(dev, &bus->devices, bus_list)
if (dev->devfn == devfn)
return (struct fake_pci_dev *) dev->sysdata;
return NULL;
}
#define HP_CFG_WR(sz, bits, name) \
static int hp_cfg_write##sz (struct pci_dev *dev, int where, u##bits value) \
{ \
struct fake_pci_dev *fake_dev; \
\
if (!(fake_dev = (struct fake_pci_dev *) dev->sysdata)) \
return orig_pci_ops->name(dev, where, value); \
\
if (where == PCI_BASE_ADDRESS_0) { \
if (value == (u##bits) ~0) \
fake_dev->sizing = 1; \
return PCIBIOS_SUCCESSFUL; \
} else \
write##sz(value, fake_dev->mapped_csrs + where); \
return PCIBIOS_SUCCESSFUL; \
static int
hp_cfg_read (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
{
struct fake_pci_dev *fake_dev = lookup_fake_dev(bus, devfn);
if (!fake_dev)
return (*orig_pci_ops->read)(bus, devfn, where, size, value);
if (where == PCI_BASE_ADDRESS_0) {
if (fake_dev->sizing)
*value = ~(fake_dev->csr_size - 1);
else
*value = ((fake_dev->csr_base & PCI_BASE_ADDRESS_MEM_MASK)
| PCI_BASE_ADDRESS_SPACE_MEMORY);
fake_dev->sizing = 0;
return PCIBIOS_SUCCESSFUL;
}
switch (size) {
case 1: *value = readb(fake_dev->mapped_csrs + where); break;
case 2: *value = readw(fake_dev->mapped_csrs + where); break;
case 4: *value = readl(fake_dev->mapped_csrs + where); break;
default:
printk(KERN_WARNING"hp_cfg_read: bad size = %d bytes", size);
break;
}
if (where == PCI_COMMAND)
*value |= PCI_COMMAND_MEMORY; /* SBA omits this */
return PCIBIOS_SUCCESSFUL;
}
HP_CFG_RD(b, 8, read_byte)
HP_CFG_RD(w, 16, read_word)
HP_CFG_RD(l, 32, read_dword)
HP_CFG_WR(b, 8, write_byte)
HP_CFG_WR(w, 16, write_word)
HP_CFG_WR(l, 32, write_dword)
static int
hp_cfg_write (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value)
{
struct fake_pci_dev *fake_dev = lookup_fake_dev(bus, devfn);
if (!fake_dev)
return (*orig_pci_ops->write)(bus, devfn, where, size, value);
if (where == PCI_BASE_ADDRESS_0) {
if (value == ((1UL << 8*size) - 1))
fake_dev->sizing = 1;
return PCIBIOS_SUCCESSFUL;
}
switch (size) {
case 1: writeb(value, fake_dev->mapped_csrs + where); break;
case 2: writew(value, fake_dev->mapped_csrs + where); break;
case 4: writel(value, fake_dev->mapped_csrs + where); break;
default:
printk(KERN_WARNING"hp_cfg_write: bad size = %d bytes", size);
break;
}
return PCIBIOS_SUCCESSFUL;
}
static struct pci_ops hp_pci_conf = {
hp_cfg_readb,
hp_cfg_readw,
hp_cfg_readl,
hp_cfg_writeb,
hp_cfg_writew,
hp_cfg_writel,
.read = hp_cfg_read,
.write = hp_cfg_write
};
static void
......@@ -309,40 +322,8 @@ hpzx1_acpi_dev_init(void)
* HWP0003: AGP LBA device
*/
acpi_get_devices("HWP0001", hpzx1_sba_probe, "HWP0001", NULL);
#ifdef CONFIG_IA64_HP_PROTO
if (hpzx1_devices) {
#endif
acpi_get_devices("HWP0002", hpzx1_lba_probe, "HWP0002 PCI LBA", NULL);
acpi_get_devices("HWP0003", hpzx1_lba_probe, "HWP0003 AGP LBA", NULL);
#ifdef CONFIG_IA64_HP_PROTO
}
#define ZX1_FUNC_ID_VALUE (PCI_DEVICE_ID_HP_ZX1_SBA << 16) | PCI_VENDOR_ID_HP
/*
* Early protos don't have bridges in the ACPI namespace, so
* if we didn't find anything, add the things we know are
* there.
*/
if (hpzx1_devices == 0) {
u64 hpa, csr_base;
csr_base = 0xfed00000UL;
hpa = (u64) ioremap(csr_base, 0x2000);
if (__raw_readl(hpa) == ZX1_FUNC_ID_VALUE) {
hpzx1_fake_pci_dev("HWP0001 SBA", 0, csr_base, 0x1000);
hpzx1_fake_pci_dev("HWP0001 IOC", 0, csr_base + 0x1000,
0x1000);
csr_base = 0xfed24000UL;
iounmap(hpa);
hpa = (u64) ioremap(csr_base, 0x1000);
hpzx1_fake_pci_dev("HWP0003 AGP LBA", 0x40, csr_base,
0x1000);
}
iounmap(hpa);
}
#endif
}
extern void sba_init(void);
......
......@@ -2111,8 +2111,8 @@ struct shm_info32 {
};
struct ipc_kludge {
struct msgbuf *msgp;
long msgtyp;
u32 msgp;
s32 msgtyp;
};
#define SEMOP 1
......
......@@ -33,6 +33,15 @@
#define EFI_DEBUG 0
#ifdef CONFIG_HUGETLB_PAGE
/* By default at total of 512MB is reserved huge pages. */
#define HTLBZONE_SIZE_DEFAULT 0x20000000
unsigned long htlbzone_pages = (HTLBZONE_SIZE_DEFAULT >> HPAGE_SHIFT);
#endif
extern efi_status_t efi_call_phys (void *, ...);
struct efi efi;
......@@ -399,6 +408,25 @@ efi_init (void)
++cp;
}
}
#ifdef CONFIG_HUGETLB_PAGE
/* Just duplicating the above algo for lpzone start */
for (cp = saved_command_line; *cp; ) {
if (memcmp(cp, "lpmem=", 6) == 0) {
cp += 6;
htlbzone_pages = memparse(cp, &end);
htlbzone_pages = (htlbzone_pages >> HPAGE_SHIFT);
if (end != cp)
break;
cp = end;
} else {
while (*cp != ' ' && *cp)
++cp;
while (*cp == ' ')
++cp;
}
}
printk("Total HugeTLB_Page memory pages requested 0x%lx \n", htlbzone_pages);
#endif
if (mem_limit != ~0UL)
printk("Ignoring memory above %luMB\n", mem_limit >> 20);
......
......@@ -90,15 +90,23 @@ ENTRY(ia64_execve)
br.ret.sptk.many rp
END(ia64_execve)
/*
* sys_clone2(u64 flags, u64 ustack_base, u64 ustack_size, u64 user_tid, u64 tls)
*/
GLOBAL_ENTRY(sys_clone2)
.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
alloc r16=ar.pfs,3,2,4,0
alloc r16=ar.pfs,5,2,5,0
DO_SAVE_SWITCH_STACK
adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
mov loc0=rp
mov loc1=r16 // save ar.pfs across do_fork
.body
mov out1=in1
mov out3=in2
tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
mov out4=in3 // valid only w/CLONE_SETTID and/or CLONE_CLEARTID
;;
(p6) st8 [r2]=in4 // store TLS in r13 (tp)
adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = &regs
dep out0=0,in0,CLONE_IDLETASK_BIT,1 // out0 = clone_flags & ~CLONE_IDLETASK
br.call.sptk.many rp=do_fork
......@@ -115,15 +123,24 @@ GLOBAL_ENTRY(sys_clone2)
br.ret.sptk.many rp
END(sys_clone2)
/*
* sys_clone(u64 flags, u64 ustack_base, u64 user_tid, u64 tls)
* Deprecated. Use sys_clone2() instead.
*/
GLOBAL_ENTRY(sys_clone)
.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
alloc r16=ar.pfs,2,2,4,0
alloc r16=ar.pfs,4,2,5,0
DO_SAVE_SWITCH_STACK
adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
mov loc0=rp
mov loc1=r16 // save ar.pfs across do_fork
.body
mov out1=in1
mov out3=16 // stacksize (compensates for 16-byte scratch area)
tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
mov out4=in2 // out4 = user_tid (optional)
;;
(p6) st8 [r2]=in3 // store TLS in r13 (tp)
adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = &regs
dep out0=0,in0,CLONE_IDLETASK_BIT,1 // out0 = clone_flags & ~CLONE_IDLETASK
br.call.sptk.many rp=do_fork
......@@ -521,7 +538,7 @@ GLOBAL_ENTRY(ia64_ret_from_clone)
#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
/*
* We need to call schedule_tail() to complete the scheduling process.
* Called by ia64_switch_to after do_fork()->copy_thread(). r8 contains the
* Called by ia64_switch_to() after do_fork()->copy_thread(). r8 contains the
* address of the previously executing task.
*/
br.call.sptk.many rp=ia64_invoke_schedule_tail
......@@ -872,7 +889,7 @@ END(invoke_schedule)
#endif /* __GNUC__ < 3 */
/*
* Setup stack and call ia64_do_signal. Note that pSys and pNonSys need to
* Setup stack and call do_notify_resume_user(). Note that pSys and pNonSys need to
* be set up by the caller. We declare 8 input registers so the system call
* args get preserved, in case we need to restart a system call.
*/
......@@ -900,7 +917,7 @@ ENTRY(notify_resume_user)
mov ar.unat=r9
mov ar.pfs=loc1
br.ret.sptk.many rp
END(do_notify_resume_user)
END(notify_resume_user)
GLOBAL_ENTRY(sys_rt_sigsuspend)
.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
......@@ -1224,10 +1241,10 @@ sys_call_table:
data8 sys_futex // 1230
data8 sys_sched_setaffinity
data8 sys_sched_getaffinity
data8 ia64_ni_syscall
data8 ia64_ni_syscall
data8 ia64_ni_syscall // 1235
data8 ia64_ni_syscall
data8 sys_security
data8 sys_alloc_hugepages
data8 sys_free_hugepages // 1235
data8 sys_exit_group
data8 ia64_ni_syscall
data8 sys_io_setup
data8 sys_io_destroy
......
......@@ -2,7 +2,7 @@
* This file contains the code that gets mapped at the upper end of each task's text
* region. For now, it contains the signal trampoline code only.
*
* Copyright (C) 1999-2001 Hewlett-Packard Co
* Copyright (C) 1999-2002 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
*/
......@@ -135,7 +135,7 @@ back_from_setup_rbs:
;;
ld8 r8=[base0] // restore (perhaps modified) CFM0, EC0, and CPL0
cmp.ne p8,p0=r14,r15 // do we need to restore the rbs?
(p8) br.cond.spnt restore_rbs // yup -> (clobbers r14 and r16)
(p8) br.cond.spnt restore_rbs // yup -> (clobbers r14-r18, f6 & f7)
;;
back_from_restore_rbs:
adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
......@@ -189,20 +189,69 @@ setup_rbs:
.spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
.body
restore_rbs:
// On input:
// r14 = bsp1 (bsp at the time of return from signal handler)
// r15 = bsp0 (bsp at the time the signal occurred)
//
// Here, we need to calculate bspstore0, the value that ar.bspstore needs
// to be set to, based on bsp0 and the size of the dirty partition on
// the alternate stack (sc_loadrs >> 16). This can be done with the
// following algorithm:
//
// bspstore0 = rse_skip_regs(bsp0, -rse_num_regs(bsp1 - (loadrs >> 19), bsp1));
//
// This is what the code below does.
//
alloc r2=ar.pfs,0,0,0,0 // alloc null frame
adds r16=(LOADRS_OFF+SIGCONTEXT_OFF),sp
adds r18=(RNAT_OFF+SIGCONTEXT_OFF),sp
;;
ld8 r14=[r16]
adds r16=(RNAT_OFF+SIGCONTEXT_OFF),sp
ld8 r17=[r16]
ld8 r16=[r18] // get new rnat
extr.u r18=r15,3,6 // r18 <- rse_slot_num(bsp0)
;;
mov ar.rsc=r14 // put RSE into enforced lazy mode
ld8 r14=[r16] // get new rnat
mov ar.rsc=r17 // put RSE into enforced lazy mode
shr.u r17=r17,16
;;
loadrs // restore dirty partition
sub r14=r14,r17 // r14 (bspstore1) <- bsp1 - (sc_loadrs >> 16)
shr.u r17=r17,3 // r17 <- (sc_loadrs >> 19)
;;
loadrs // restore dirty partition
extr.u r14=r14,3,6 // r14 <- rse_slot_num(bspstore1)
;;
add r14=r14,r17 // r14 <- rse_slot_num(bspstore1) + (sc_loadrs >> 19)
;;
shr.u r14=r14,6 // r14 <- (rse_slot_num(bspstore1) + (sc_loadrs >> 19))/0x40
;;
sub r14=r14,r17 // r14 <- -rse_num_regs(bspstore1, bsp1)
movl r17=0x8208208208208209
;;
add r18=r18,r14 // r18 (delta) <- rse_slot_num(bsp0) - rse_num_regs(bspstore1,bsp1)
setf.sig f7=r17
cmp.lt p7,p0=r14,r0 // p7 <- (r14 < 0)?
;;
(p7) adds r18=-62,r18 // delta -= 62
;;
setf.sig f6=r18
;;
xmpy.h f6=f6,f7
;;
getf.sig r17=f6
;;
add r17=r17,r18
shr r18=r18,63
;;
shr r17=r17,5
;;
sub r17=r17,r18 // r17 = delta/63
;;
add r17=r14,r17 // r17 <- delta/63 - rse_num_regs(bspstore1, bsp1)
;;
shladd r15=r17,3,r15 // r15 <- bsp0 + 8*(delta/63 - rse_num_regs(bspstore1, bsp1))
;;
mov ar.bspstore=r15 // switch back to old register backing store area
;;
mov ar.rnat=r14 // restore RNaT
mov ar.rnat=r16 // restore RNaT
mov ar.rsc=0xf // (will be restored later on from sc_ar_rsc)
// invala not necessary as that will happen when returning to user-mode
br.cond.sptk back_from_restore_rbs
......
......@@ -13,6 +13,8 @@
* Copyright (C) 1999 Intel Corp.
* Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com>
* Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com>
* Copyright (C) 2002 Fenghua Yu <fenghua.yu@intel.com>
* -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2.
*/
#include <linux/config.h>
......@@ -260,302 +262,297 @@ GLOBAL_ENTRY(ia64_load_debug_regs)
END(ia64_load_debug_regs)
GLOBAL_ENTRY(__ia64_save_fpu)
alloc r2=ar.pfs,1,0,0,0
adds r3=16,in0
alloc r2=ar.pfs,1,4,0,0
adds loc0=96*16-16,in0
adds loc1=96*16-16-128,in0
;;
stf.spill.nta [in0]=f32,32
stf.spill.nta [ r3]=f33,32
stf.spill.nta [loc0]=f127,-256
stf.spill.nta [loc1]=f119,-256
;;
stf.spill.nta [in0]=f34,32
stf.spill.nta [ r3]=f35,32
stf.spill.nta [loc0]=f111,-256
stf.spill.nta [loc1]=f103,-256
;;
stf.spill.nta [in0]=f36,32
stf.spill.nta [ r3]=f37,32
stf.spill.nta [loc0]=f95,-256
stf.spill.nta [loc1]=f87,-256
;;
stf.spill.nta [in0]=f38,32
stf.spill.nta [ r3]=f39,32
stf.spill.nta [loc0]=f79,-256
stf.spill.nta [loc1]=f71,-256
;;
stf.spill.nta [in0]=f40,32
stf.spill.nta [ r3]=f41,32
stf.spill.nta [loc0]=f63,-256
stf.spill.nta [loc1]=f55,-256
adds loc2=96*16-32,in0
;;
stf.spill.nta [in0]=f42,32
stf.spill.nta [ r3]=f43,32
stf.spill.nta [loc0]=f47,-256
stf.spill.nta [loc1]=f39,-256
adds loc3=96*16-32-128,in0
;;
stf.spill.nta [in0]=f44,32
stf.spill.nta [ r3]=f45,32
stf.spill.nta [loc2]=f126,-256
stf.spill.nta [loc3]=f118,-256
;;
stf.spill.nta [in0]=f46,32
stf.spill.nta [ r3]=f47,32
stf.spill.nta [loc2]=f110,-256
stf.spill.nta [loc3]=f102,-256
;;
stf.spill.nta [in0]=f48,32
stf.spill.nta [ r3]=f49,32
stf.spill.nta [loc2]=f94,-256
stf.spill.nta [loc3]=f86,-256
;;
stf.spill.nta [in0]=f50,32
stf.spill.nta [ r3]=f51,32
stf.spill.nta [loc2]=f78,-256
stf.spill.nta [loc3]=f70,-256
;;
stf.spill.nta [in0]=f52,32
stf.spill.nta [ r3]=f53,32
stf.spill.nta [loc2]=f62,-256
stf.spill.nta [loc3]=f54,-256
adds loc0=96*16-48,in0
;;
stf.spill.nta [in0]=f54,32
stf.spill.nta [ r3]=f55,32
stf.spill.nta [loc2]=f46,-256
stf.spill.nta [loc3]=f38,-256
adds loc1=96*16-48-128,in0
;;
stf.spill.nta [in0]=f56,32
stf.spill.nta [ r3]=f57,32
stf.spill.nta [loc0]=f125,-256
stf.spill.nta [loc1]=f117,-256
;;
stf.spill.nta [in0]=f58,32
stf.spill.nta [ r3]=f59,32
stf.spill.nta [loc0]=f109,-256
stf.spill.nta [loc1]=f101,-256
;;
stf.spill.nta [in0]=f60,32
stf.spill.nta [ r3]=f61,32
stf.spill.nta [loc0]=f93,-256
stf.spill.nta [loc1]=f85,-256
;;
stf.spill.nta [in0]=f62,32
stf.spill.nta [ r3]=f63,32
stf.spill.nta [loc0]=f77,-256
stf.spill.nta [loc1]=f69,-256
;;
stf.spill.nta [in0]=f64,32
stf.spill.nta [ r3]=f65,32
stf.spill.nta [loc0]=f61,-256
stf.spill.nta [loc1]=f53,-256
adds loc2=96*16-64,in0
;;
stf.spill.nta [in0]=f66,32
stf.spill.nta [ r3]=f67,32
stf.spill.nta [loc0]=f45,-256
stf.spill.nta [loc1]=f37,-256
adds loc3=96*16-64-128,in0
;;
stf.spill.nta [in0]=f68,32
stf.spill.nta [ r3]=f69,32
stf.spill.nta [loc2]=f124,-256
stf.spill.nta [loc3]=f116,-256
;;
stf.spill.nta [in0]=f70,32
stf.spill.nta [ r3]=f71,32
stf.spill.nta [loc2]=f108,-256
stf.spill.nta [loc3]=f100,-256
;;
stf.spill.nta [in0]=f72,32
stf.spill.nta [ r3]=f73,32
stf.spill.nta [loc2]=f92,-256
stf.spill.nta [loc3]=f84,-256
;;
stf.spill.nta [in0]=f74,32
stf.spill.nta [ r3]=f75,32
stf.spill.nta [loc2]=f76,-256
stf.spill.nta [loc3]=f68,-256
;;
stf.spill.nta [in0]=f76,32
stf.spill.nta [ r3]=f77,32
stf.spill.nta [loc2]=f60,-256
stf.spill.nta [loc3]=f52,-256
adds loc0=96*16-80,in0
;;
stf.spill.nta [in0]=f78,32
stf.spill.nta [ r3]=f79,32
stf.spill.nta [loc2]=f44,-256
stf.spill.nta [loc3]=f36,-256
adds loc1=96*16-80-128,in0
;;
stf.spill.nta [in0]=f80,32
stf.spill.nta [ r3]=f81,32
stf.spill.nta [loc0]=f123,-256
stf.spill.nta [loc1]=f115,-256
;;
stf.spill.nta [in0]=f82,32
stf.spill.nta [ r3]=f83,32
stf.spill.nta [loc0]=f107,-256
stf.spill.nta [loc1]=f99,-256
;;
stf.spill.nta [in0]=f84,32
stf.spill.nta [ r3]=f85,32
stf.spill.nta [loc0]=f91,-256
stf.spill.nta [loc1]=f83,-256
;;
stf.spill.nta [in0]=f86,32
stf.spill.nta [ r3]=f87,32
stf.spill.nta [loc0]=f75,-256
stf.spill.nta [loc1]=f67,-256
;;
stf.spill.nta [in0]=f88,32
stf.spill.nta [ r3]=f89,32
stf.spill.nta [loc0]=f59,-256
stf.spill.nta [loc1]=f51,-256
adds loc2=96*16-96,in0
;;
stf.spill.nta [in0]=f90,32
stf.spill.nta [ r3]=f91,32
stf.spill.nta [loc0]=f43,-256
stf.spill.nta [loc1]=f35,-256
adds loc3=96*16-96-128,in0
;;
stf.spill.nta [in0]=f92,32
stf.spill.nta [ r3]=f93,32
stf.spill.nta [loc2]=f122,-256
stf.spill.nta [loc3]=f114,-256
;;
stf.spill.nta [in0]=f94,32
stf.spill.nta [ r3]=f95,32
stf.spill.nta [loc2]=f106,-256
stf.spill.nta [loc3]=f98,-256
;;
stf.spill.nta [in0]=f96,32
stf.spill.nta [ r3]=f97,32
stf.spill.nta [loc2]=f90,-256
stf.spill.nta [loc3]=f82,-256
;;
stf.spill.nta [in0]=f98,32
stf.spill.nta [ r3]=f99,32
stf.spill.nta [loc2]=f74,-256
stf.spill.nta [loc3]=f66,-256
;;
stf.spill.nta [in0]=f100,32
stf.spill.nta [ r3]=f101,32
stf.spill.nta [loc2]=f58,-256
stf.spill.nta [loc3]=f50,-256
adds loc0=96*16-112,in0
;;
stf.spill.nta [in0]=f102,32
stf.spill.nta [ r3]=f103,32
stf.spill.nta [loc2]=f42,-256
stf.spill.nta [loc3]=f34,-256
adds loc1=96*16-112-128,in0
;;
stf.spill.nta [in0]=f104,32
stf.spill.nta [ r3]=f105,32
stf.spill.nta [loc0]=f121,-256
stf.spill.nta [loc1]=f113,-256
;;
stf.spill.nta [in0]=f106,32
stf.spill.nta [ r3]=f107,32
stf.spill.nta [loc0]=f105,-256
stf.spill.nta [loc1]=f97,-256
;;
stf.spill.nta [in0]=f108,32
stf.spill.nta [ r3]=f109,32
stf.spill.nta [loc0]=f89,-256
stf.spill.nta [loc1]=f81,-256
;;
stf.spill.nta [in0]=f110,32
stf.spill.nta [ r3]=f111,32
stf.spill.nta [loc0]=f73,-256
stf.spill.nta [loc1]=f65,-256
;;
stf.spill.nta [in0]=f112,32
stf.spill.nta [ r3]=f113,32
stf.spill.nta [loc0]=f57,-256
stf.spill.nta [loc1]=f49,-256
adds loc2=96*16-128,in0
;;
stf.spill.nta [in0]=f114,32
stf.spill.nta [ r3]=f115,32
stf.spill.nta [loc0]=f41,-256
stf.spill.nta [loc1]=f33,-256
adds loc3=96*16-128-128,in0
;;
stf.spill.nta [in0]=f116,32
stf.spill.nta [ r3]=f117,32
stf.spill.nta [loc2]=f120,-256
stf.spill.nta [loc3]=f112,-256
;;
stf.spill.nta [in0]=f118,32
stf.spill.nta [ r3]=f119,32
stf.spill.nta [loc2]=f104,-256
stf.spill.nta [loc3]=f96,-256
;;
stf.spill.nta [in0]=f120,32
stf.spill.nta [ r3]=f121,32
stf.spill.nta [loc2]=f88,-256
stf.spill.nta [loc3]=f80,-256
;;
stf.spill.nta [in0]=f122,32
stf.spill.nta [ r3]=f123,32
stf.spill.nta [loc2]=f72,-256
stf.spill.nta [loc3]=f64,-256
;;
stf.spill.nta [in0]=f124,32
stf.spill.nta [ r3]=f125,32
stf.spill.nta [loc2]=f56,-256
stf.spill.nta [loc3]=f48,-256
;;
stf.spill.nta [in0]=f126,32
stf.spill.nta [ r3]=f127,32
stf.spill.nta [loc2]=f40
stf.spill.nta [loc3]=f32
br.ret.sptk.many rp
END(__ia64_save_fpu)
GLOBAL_ENTRY(__ia64_load_fpu)
alloc r2=ar.pfs,1,0,0,0
adds r3=16,in0
;;
ldf.fill.nta f32=[in0],32
ldf.fill.nta f33=[ r3],32
;;
ldf.fill.nta f34=[in0],32
ldf.fill.nta f35=[ r3],32
;;
ldf.fill.nta f36=[in0],32
ldf.fill.nta f37=[ r3],32
;;
ldf.fill.nta f38=[in0],32
ldf.fill.nta f39=[ r3],32
;;
ldf.fill.nta f40=[in0],32
ldf.fill.nta f41=[ r3],32
;;
ldf.fill.nta f42=[in0],32
ldf.fill.nta f43=[ r3],32
;;
ldf.fill.nta f44=[in0],32
ldf.fill.nta f45=[ r3],32
;;
ldf.fill.nta f46=[in0],32
ldf.fill.nta f47=[ r3],32
;;
ldf.fill.nta f48=[in0],32
ldf.fill.nta f49=[ r3],32
;;
ldf.fill.nta f50=[in0],32
ldf.fill.nta f51=[ r3],32
;;
ldf.fill.nta f52=[in0],32
ldf.fill.nta f53=[ r3],32
;;
ldf.fill.nta f54=[in0],32
ldf.fill.nta f55=[ r3],32
;;
ldf.fill.nta f56=[in0],32
ldf.fill.nta f57=[ r3],32
;;
ldf.fill.nta f58=[in0],32
ldf.fill.nta f59=[ r3],32
;;
ldf.fill.nta f60=[in0],32
ldf.fill.nta f61=[ r3],32
;;
ldf.fill.nta f62=[in0],32
ldf.fill.nta f63=[ r3],32
;;
ldf.fill.nta f64=[in0],32
ldf.fill.nta f65=[ r3],32
;;
ldf.fill.nta f66=[in0],32
ldf.fill.nta f67=[ r3],32
;;
ldf.fill.nta f68=[in0],32
ldf.fill.nta f69=[ r3],32
;;
ldf.fill.nta f70=[in0],32
ldf.fill.nta f71=[ r3],32
;;
ldf.fill.nta f72=[in0],32
ldf.fill.nta f73=[ r3],32
;;
ldf.fill.nta f74=[in0],32
ldf.fill.nta f75=[ r3],32
;;
ldf.fill.nta f76=[in0],32
ldf.fill.nta f77=[ r3],32
;;
ldf.fill.nta f78=[in0],32
ldf.fill.nta f79=[ r3],32
;;
ldf.fill.nta f80=[in0],32
ldf.fill.nta f81=[ r3],32
;;
ldf.fill.nta f82=[in0],32
ldf.fill.nta f83=[ r3],32
;;
ldf.fill.nta f84=[in0],32
ldf.fill.nta f85=[ r3],32
;;
ldf.fill.nta f86=[in0],32
ldf.fill.nta f87=[ r3],32
;;
ldf.fill.nta f88=[in0],32
ldf.fill.nta f89=[ r3],32
;;
ldf.fill.nta f90=[in0],32
ldf.fill.nta f91=[ r3],32
;;
ldf.fill.nta f92=[in0],32
ldf.fill.nta f93=[ r3],32
;;
ldf.fill.nta f94=[in0],32
ldf.fill.nta f95=[ r3],32
;;
ldf.fill.nta f96=[in0],32
ldf.fill.nta f97=[ r3],32
;;
ldf.fill.nta f98=[in0],32
ldf.fill.nta f99=[ r3],32
;;
ldf.fill.nta f100=[in0],32
ldf.fill.nta f101=[ r3],32
;;
ldf.fill.nta f102=[in0],32
ldf.fill.nta f103=[ r3],32
;;
ldf.fill.nta f104=[in0],32
ldf.fill.nta f105=[ r3],32
;;
ldf.fill.nta f106=[in0],32
ldf.fill.nta f107=[ r3],32
;;
ldf.fill.nta f108=[in0],32
ldf.fill.nta f109=[ r3],32
;;
ldf.fill.nta f110=[in0],32
ldf.fill.nta f111=[ r3],32
;;
ldf.fill.nta f112=[in0],32
ldf.fill.nta f113=[ r3],32
;;
ldf.fill.nta f114=[in0],32
ldf.fill.nta f115=[ r3],32
;;
ldf.fill.nta f116=[in0],32
ldf.fill.nta f117=[ r3],32
;;
ldf.fill.nta f118=[in0],32
ldf.fill.nta f119=[ r3],32
;;
ldf.fill.nta f120=[in0],32
ldf.fill.nta f121=[ r3],32
;;
ldf.fill.nta f122=[in0],32
ldf.fill.nta f123=[ r3],32
;;
ldf.fill.nta f124=[in0],32
ldf.fill.nta f125=[ r3],32
;;
ldf.fill.nta f126=[in0],32
ldf.fill.nta f127=[ r3],32
alloc r2=ar.pfs,1,2,0,0
adds r3=128,in0
adds r14=256,in0
adds r15=384,in0
mov loc0=512
mov loc1=-1024+16
;;
ldf.fill.nta f32=[in0],loc0
ldf.fill.nta f40=[ r3],loc0
ldf.fill.nta f48=[r14],loc0
ldf.fill.nta f56=[r15],loc0
;;
ldf.fill.nta f64=[in0],loc0
ldf.fill.nta f72=[ r3],loc0
ldf.fill.nta f80=[r14],loc0
ldf.fill.nta f88=[r15],loc0
;;
ldf.fill.nta f96=[in0],loc1
ldf.fill.nta f104=[ r3],loc1
ldf.fill.nta f112=[r14],loc1
ldf.fill.nta f120=[r15],loc1
;;
ldf.fill.nta f33=[in0],loc0
ldf.fill.nta f41=[ r3],loc0
ldf.fill.nta f49=[r14],loc0
ldf.fill.nta f57=[r15],loc0
;;
ldf.fill.nta f65=[in0],loc0
ldf.fill.nta f73=[ r3],loc0
ldf.fill.nta f81=[r14],loc0
ldf.fill.nta f89=[r15],loc0
;;
ldf.fill.nta f97=[in0],loc1
ldf.fill.nta f105=[ r3],loc1
ldf.fill.nta f113=[r14],loc1
ldf.fill.nta f121=[r15],loc1
;;
ldf.fill.nta f34=[in0],loc0
ldf.fill.nta f42=[ r3],loc0
ldf.fill.nta f50=[r14],loc0
ldf.fill.nta f58=[r15],loc0
;;
ldf.fill.nta f66=[in0],loc0
ldf.fill.nta f74=[ r3],loc0
ldf.fill.nta f82=[r14],loc0
ldf.fill.nta f90=[r15],loc0
;;
ldf.fill.nta f98=[in0],loc1
ldf.fill.nta f106=[ r3],loc1
ldf.fill.nta f114=[r14],loc1
ldf.fill.nta f122=[r15],loc1
;;
ldf.fill.nta f35=[in0],loc0
ldf.fill.nta f43=[ r3],loc0
ldf.fill.nta f51=[r14],loc0
ldf.fill.nta f59=[r15],loc0
;;
ldf.fill.nta f67=[in0],loc0
ldf.fill.nta f75=[ r3],loc0
ldf.fill.nta f83=[r14],loc0
ldf.fill.nta f91=[r15],loc0
;;
ldf.fill.nta f99=[in0],loc1
ldf.fill.nta f107=[ r3],loc1
ldf.fill.nta f115=[r14],loc1
ldf.fill.nta f123=[r15],loc1
;;
ldf.fill.nta f36=[in0],loc0
ldf.fill.nta f44=[ r3],loc0
ldf.fill.nta f52=[r14],loc0
ldf.fill.nta f60=[r15],loc0
;;
ldf.fill.nta f68=[in0],loc0
ldf.fill.nta f76=[ r3],loc0
ldf.fill.nta f84=[r14],loc0
ldf.fill.nta f92=[r15],loc0
;;
ldf.fill.nta f100=[in0],loc1
ldf.fill.nta f108=[ r3],loc1
ldf.fill.nta f116=[r14],loc1
ldf.fill.nta f124=[r15],loc1
;;
ldf.fill.nta f37=[in0],loc0
ldf.fill.nta f45=[ r3],loc0
ldf.fill.nta f53=[r14],loc0
ldf.fill.nta f61=[r15],loc0
;;
ldf.fill.nta f69=[in0],loc0
ldf.fill.nta f77=[ r3],loc0
ldf.fill.nta f85=[r14],loc0
ldf.fill.nta f93=[r15],loc0
;;
ldf.fill.nta f101=[in0],loc1
ldf.fill.nta f109=[ r3],loc1
ldf.fill.nta f117=[r14],loc1
ldf.fill.nta f125=[r15],loc1
;;
ldf.fill.nta f38 =[in0],loc0
ldf.fill.nta f46 =[ r3],loc0
ldf.fill.nta f54 =[r14],loc0
ldf.fill.nta f62 =[r15],loc0
;;
ldf.fill.nta f70 =[in0],loc0
ldf.fill.nta f78 =[ r3],loc0
ldf.fill.nta f86 =[r14],loc0
ldf.fill.nta f94 =[r15],loc0
;;
ldf.fill.nta f102=[in0],loc1
ldf.fill.nta f110=[ r3],loc1
ldf.fill.nta f118=[r14],loc1
ldf.fill.nta f126=[r15],loc1
;;
ldf.fill.nta f39 =[in0],loc0
ldf.fill.nta f47 =[ r3],loc0
ldf.fill.nta f55 =[r14],loc0
ldf.fill.nta f63 =[r15],loc0
;;
ldf.fill.nta f71 =[in0],loc0
ldf.fill.nta f79 =[ r3],loc0
ldf.fill.nta f87 =[r14],loc0
ldf.fill.nta f95 =[r15],loc0
;;
ldf.fill.nta f103=[in0]
ldf.fill.nta f111=[ r3]
ldf.fill.nta f119=[r14]
ldf.fill.nta f127=[r15]
br.ret.sptk.many rp
END(__ia64_load_fpu)
......
......@@ -127,6 +127,8 @@ EXPORT_SYMBOL(ia64_pal_call_phys_stacked);
EXPORT_SYMBOL(ia64_pal_call_phys_static);
EXPORT_SYMBOL(ia64_pal_call_stacked);
EXPORT_SYMBOL(ia64_pal_call_static);
EXPORT_SYMBOL(ia64_load_scratch_fpregs);
EXPORT_SYMBOL(ia64_save_scratch_fpregs);
extern struct efi efi;
EXPORT_SYMBOL(efi);
......
......@@ -16,7 +16,7 @@
static struct fs_struct init_fs = INIT_FS;
static struct files_struct init_files = INIT_FILES;
static struct signal_struct init_signals = INIT_SIGNALS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
struct mm_struct init_mm = INIT_MM(init_mm);
/*
......
......@@ -403,8 +403,8 @@ unsigned int do_IRQ(unsigned long irq, struct pt_regs *regs)
break;
desc->status &= ~IRQ_PENDING;
}
out:
desc->status &= ~IRQ_INPROGRESS;
out:
/*
* The ->end() handler has to deal with interrupts which got
* disabled while the handler was running.
......@@ -788,7 +788,7 @@ int setup_irq(unsigned int irq, struct irqaction * new)
if (!shared) {
desc->depth = 0;
desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING);
desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING | IRQ_INPROGRESS);
desc->handler->startup(irq);
}
spin_unlock_irqrestore(&desc->lock,flags);
......
......@@ -70,24 +70,6 @@
mov r19=n;; /* prepare to save predicates */ \
br.sptk.many dispatch_to_fault_handler
/*
* As we don't (hopefully) use the space available, we need to fill it with
* nops. the parameter may be used for debugging and is representing the entry
* number
*/
#define BREAK_BUNDLE(a) break.m (a); \
break.i (a); \
break.i (a)
/*
* 4 breaks bundles all together
*/
#define BREAK_BUNDLE4(a); BREAK_BUNDLE(a); BREAK_BUNDLE(a); BREAK_BUNDLE(a); BREAK_BUNDLE(a)
/*
* 8 bundles all together (too lazy to use only 4 at a time !)
*/
#define BREAK_BUNDLE8(a); BREAK_BUNDLE4(a); BREAK_BUNDLE4(a)
.section .text.ivt,"ax"
.align 32768 // align on 32KB boundary
......@@ -115,6 +97,10 @@ ENTRY(vhpt_miss)
* - the faulting virtual address has no L1, L2, or L3 mapping
*/
mov r16=cr.ifa // get address that caused the TLB miss
#ifdef CONFIG_HUGETLB_PAGE
movl r18=PAGE_SHIFT
mov r25=cr.itir
#endif
;;
rsm psr.dt // use physical addressing for data
mov r31=pr // save the predicate registers
......@@ -122,8 +108,18 @@ ENTRY(vhpt_miss)
shl r21=r16,3 // shift bit 60 into sign bit
shr.u r17=r16,61 // get the region number into r17
;;
shr r22=r21,3
#ifdef CONFIG_HUGETLB_PAGE
extr.u r26=r25,2,6
;;
cmp.eq p8,p0=HPAGE_SHIFT,r26
;;
(p8) dep r25=r18,r25,2,6
(p8) shr r22=r22,HPAGE_SHIFT-PAGE_SHIFT
;;
#endif
cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5?
shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of the faulting address
shr.u r18=r22,PGDIR_SHIFT // get bits 33-63 of the faulting address
;;
(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
srlz.d // ensure "rsm psr.dt" has taken effect
......@@ -134,7 +130,7 @@ ENTRY(vhpt_miss)
(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
cmp.eq p7,p6=0,r21 // unused address bits all zeroes?
shr.u r18=r16,PMD_SHIFT // shift L2 index into position
shr.u r18=r22,PMD_SHIFT // shift L2 index into position
;;
ld8 r17=[r17] // fetch the L1 entry (may be 0)
;;
......@@ -142,7 +138,7 @@ ENTRY(vhpt_miss)
dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry
;;
(p7) ld8 r20=[r17] // fetch the L2 entry (may be 0)
shr.u r19=r16,PAGE_SHIFT // shift L3 index into position
shr.u r19=r22,PAGE_SHIFT // shift L3 index into position
;;
(p7) cmp.eq.or.andcm p6,p7=r20,r0 // was L2 entry NULL?
dep r21=r19,r20,3,(PAGE_SHIFT-3) // compute address of L3 page table entry
......@@ -161,6 +157,10 @@ ENTRY(vhpt_miss)
(p6) br.cond.spnt.many page_fault // handle bad address/page not present (page fault)
mov cr.ifa=r22
#ifdef CONFIG_HUGETLB_PAGE
(p8) mov cr.itir=r25 // change to default page-size for VHPT
#endif
/*
* Now compute and insert the TLB entry for the virtual page table. We never
* execute in a page table page so there is no need to set the exception deferral
......
......@@ -245,3 +245,48 @@ GLOBAL_ENTRY(ia64_pal_call_phys_stacked)
br.ret.sptk.many b0
END(ia64_pal_call_phys_stacked)
/*
* Save scratch fp scratch regs which aren't saved in pt_regs already (fp10-fp15).
*
* NOTE: We need to do this since firmware (SAL and PAL) may use any of the scratch
* regs fp-low partition.
*
* Inputs:
* in0 Address of stack storage for fp regs
*/
GLOBAL_ENTRY(ia64_save_scratch_fpregs)
alloc r3=ar.pfs,1,0,0,0
add r2=16,in0
;;
stf.spill [in0] = f10,32
stf.spill [r2] = f11,32
;;
stf.spill [in0] = f12,32
stf.spill [r2] = f13,32
;;
stf.spill [in0] = f14,32
stf.spill [r2] = f15,32
br.ret.sptk.many rp
END(ia64_save_scratch_fpregs)
/*
* Load scratch fp scratch regs (fp10-fp15)
*
* Inputs:
* in0 Address of stack storage for fp regs
*/
GLOBAL_ENTRY(ia64_load_scratch_fpregs)
alloc r3=ar.pfs,1,0,0,0
add r2=16,in0
;;
ldf.fill f10 = [in0],32
ldf.fill f11 = [r2],32
;;
ldf.fill f12 = [in0],32
ldf.fill f13 = [r2],32
;;
ldf.fill f14 = [in0],32
ldf.fill f15 = [r2],32
br.ret.sptk.many rp
END(ia64_load_scratch_fpregs)
......@@ -2,6 +2,11 @@
* pci.c - Low-Level PCI Access in IA-64
*
* Derived from bios32.c of i386 tree.
*
* Copyright (C) 2002 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
*
* Note: Above list of copyright holders is incomplete...
*/
#include <linux/config.h>
......@@ -85,15 +90,15 @@ __pci_sal_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value)
static int
pci_sal_read (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
{
return __pci_sal_read(0, bus->number, PCI_SLOT(devfn),
PCI_FUNC(devfn), where, size, value);
return __pci_sal_read(0, bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn),
where, size, value);
}
static int
pci_sal_write (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value)
{
return __pci_sal_write(0, bus->number, PCI_SLOT(devfn),
PCI_FUNC(devfn), where, size, value);
return __pci_sal_write(0, bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn),
where, size, value);
}
struct pci_ops pci_sal_ops = {
......@@ -202,8 +207,8 @@ pcibios_fixup_pbus_ranges (struct pci_bus * bus, struct pbus_set_ranges_data * r
{
}
int
pcibios_enable_device (struct pci_dev *dev)
static inline int
pcibios_enable_resources (struct pci_dev *dev, int mask)
{
u16 cmd, old_cmd;
int idx;
......@@ -215,6 +220,10 @@ pcibios_enable_device (struct pci_dev *dev)
pci_read_config_word(dev, PCI_COMMAND, &cmd);
old_cmd = cmd;
for (idx=0; idx<6; idx++) {
/* Only set up the desired resources. */
if (!(mask & (1 << idx)))
continue;
r = &dev->resource[idx];
if (!r->start && r->end) {
printk(KERN_ERR
......@@ -233,9 +242,19 @@ pcibios_enable_device (struct pci_dev *dev)
printk("PCI: Enabling device %s (%04x -> %04x)\n", dev->slot_name, old_cmd, cmd);
pci_write_config_word(dev, PCI_COMMAND, cmd);
}
return 0;
}
printk(KERN_INFO "PCI: Found IRQ %d for device %s\n", dev->irq, dev->slot_name);
int
pcibios_enable_device (struct pci_dev *dev, int mask)
{
int ret;
ret = pcibios_enable_resources(dev, mask);
if (ret < 0)
return ret;
printk(KERN_INFO "PCI: Found IRQ %d for device %s\n", dev->irq, dev->slot_name);
return 0;
}
......
......@@ -76,7 +76,7 @@
/* XXX: these three assume that register i is implemented */
#define PMD_IS_COUNTING(i) (pmu_conf.pmd_desc[i].type == PFM_REG_COUNTING)
#define PMC_IS_COUNTING(i) (pmu_conf.pmc_desc[i].type == PFM_REG_COUNTING)
#define PMC_IS_MONITOR(c) (pmu_conf.pmc_desc[i].type == PFM_REG_MONITOR)
#define PMC_IS_MONITOR(i) (pmu_conf.pmc_desc[i].type == PFM_REG_MONITOR)
/* k assume unsigned */
#define IBR_IS_IMPL(k) (k<pmu_conf.num_ibrs)
......@@ -193,10 +193,12 @@ typedef enum {
*/
typedef struct {
u64 val; /* virtual 64bit counter value */
u64 ival; /* initial value from user */
u64 lval; /* last value */
u64 long_reset; /* reset value on sampling overflow */
u64 short_reset;/* reset value on overflow */
u64 reset_pmds[4]; /* which other pmds to reset when this counter overflows */
u64 seed; /* seed for random-number generator */
u64 mask; /* mask for random-number generator */
int flags; /* notify/do not notify */
} pfm_counter_t;
......@@ -336,7 +338,7 @@ typedef struct {
#define PFM_CMD_PID 0x1 /* command requires pid argument */
#define PFM_CMD_ARG_READ 0x2 /* command must read argument(s) */
#define PFM_CMD_ARG_WRITE 0x4 /* command must write argument(s) */
#define PFM_CMD_ARG_RW 0x4 /* command must read/write argument(s) */
#define PFM_CMD_CTX 0x8 /* command needs a perfmon context */
#define PFM_CMD_NOCHK 0x10 /* command does not need to check task's state */
......@@ -347,7 +349,7 @@ typedef struct {
#define PFM_CMD_USE_PID(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_PID) != 0)
#define PFM_CMD_READ_ARG(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_ARG_READ) != 0)
#define PFM_CMD_WRITE_ARG(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_ARG_WRITE) != 0)
#define PFM_CMD_RW_ARG(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_ARG_RW) != 0)
#define PFM_CMD_USE_CTX(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_CTX) != 0)
#define PFM_CMD_CHK(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_NOCHK) == 0)
......@@ -366,6 +368,7 @@ typedef struct {
unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */
unsigned long pfm_recorded_samples_count;
unsigned long pfm_full_smpl_buffer_count; /* how many times the sampling buffer was full */
char pad[SMP_CACHE_BYTES] ____cacheline_aligned;
} pfm_stats_t;
/*
......@@ -374,7 +377,8 @@ typedef struct {
static pmu_config_t pmu_conf; /* PMU configuration */
static pfm_session_t pfm_sessions; /* global sessions information */
static struct proc_dir_entry *perfmon_dir; /* for debug only */
static pfm_stats_t pfm_stats;
static pfm_stats_t pfm_stats[NR_CPUS];
DEFINE_PER_CPU(int, pfm_syst_wide);
static DEFINE_PER_CPU(int, pfm_dcr_pp);
......@@ -410,7 +414,8 @@ static struct vm_operations_struct pfm_vm_ops={
*/
static struct {
struct task_struct *owner;
} ____cacheline_aligned pmu_owners[NR_CPUS];
char pad[SMP_CACHE_BYTES] ____cacheline_aligned;
} pmu_owners[NR_CPUS];
......@@ -743,15 +748,14 @@ pfm_smpl_buffer_alloc(pfm_context_t *ctx, unsigned long *which_pmds, unsigned lo
psb = kmalloc(sizeof(*psb), GFP_KERNEL);
if (psb == NULL) {
DBprintk(("Can't allocate sampling buffer descriptor\n"));
pfm_rvfree(smpl_buf, size);
return -ENOMEM;
goto error_kmalloc;
}
/* allocate vma */
vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
if (!vma) {
DBprintk(("Cannot allocate vma\n"));
goto error;
goto error_kmem;
}
/*
* partially initialize the vma for the sampling buffer
......@@ -851,8 +855,11 @@ pfm_smpl_buffer_alloc(pfm_context_t *ctx, unsigned long *which_pmds, unsigned lo
return 0;
error:
pfm_rvfree(smpl_buf, size);
kmem_cache_free(vm_area_cachep, vma);
error_kmem:
kfree(psb);
error_kmalloc:
pfm_rvfree(smpl_buf, size);
return -ENOMEM;
}
......@@ -961,7 +968,7 @@ pfm_context_create(struct task_struct *task, pfm_context_t *ctx, void *req, int
*/
if (task != current) return -EINVAL;
if (copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
ret = pfx_is_sane(task, &tmp);
if (ret < 0) return ret;
......@@ -1017,8 +1024,8 @@ pfm_context_create(struct task_struct *task, pfm_context_t *ctx, void *req, int
if (notify_pid == current->pid) {
ctx->ctx_notify_task = task = current;
current->thread.pfm_context = ctx;
ctx->ctx_notify_task = current;
task->thread.pfm_context = ctx;
} else if (notify_pid!=0) {
struct task_struct *notify_task;
......@@ -1034,7 +1041,10 @@ pfm_context_create(struct task_struct *task, pfm_context_t *ctx, void *req, int
/*
* check if we can send this task a signal
*/
if (pfm_bad_permissions(notify_task)) goto buffer_error;
if (pfm_bad_permissions(notify_task)) {
read_unlock(&tasklist_lock);
goto buffer_error;
}
/*
* make visible
......@@ -1044,7 +1054,7 @@ pfm_context_create(struct task_struct *task, pfm_context_t *ctx, void *req, int
* okay because child will do the scan for nothing which
* won't hurt.
*/
current->thread.pfm_context = ctx;
task->thread.pfm_context = ctx;
/*
* will cause task to check on exit for monitored
......@@ -1101,7 +1111,7 @@ pfm_context_create(struct task_struct *task, pfm_context_t *ctx, void *req, int
sema_init(&ctx->ctx_restart_sem, 0); /* init this semaphore to locked */
if (copy_to_user(req, &tmp, sizeof(tmp))) {
if (__copy_to_user(req, &tmp, sizeof(tmp))) {
ret = -EFAULT;
goto buffer_error;
}
......@@ -1147,16 +1157,38 @@ pfm_context_create(struct task_struct *task, pfm_context_t *ctx, void *req, int
abort:
UNLOCK_PFS();
/* make sure we don't leave anything behind */
task->thread.pfm_context = NULL;
return ret;
}
static inline unsigned long
pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset)
{
unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset;
unsigned long new_seed, old_seed = reg->seed, mask = reg->mask;
extern unsigned long carta_random32 (unsigned long seed);
if (reg->flags & PFM_REGFL_RANDOM) {
new_seed = carta_random32(old_seed);
val -= (old_seed & mask); /* counter values are negative numbers! */
if ((mask >> 32) != 0)
/* construct a full 64-bit random value: */
new_seed |= carta_random32(old_seed >> 32) << 32;
reg->seed = new_seed;
}
reg->lval = val;
return val;
}
static void
pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int flag)
{
unsigned long mask = ovfl_regs[0];
unsigned long reset_others = 0UL;
unsigned long val;
int i;
int i, is_long_reset = (flag & PFM_RELOAD_LONG_RESET);
DBprintk(("masks=0x%lx\n", mask));
......@@ -1166,15 +1198,11 @@ pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int flag)
mask >>= PMU_FIRST_COUNTER;
for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) {
if (mask & 0x1) {
val = flag == PFM_RELOAD_LONG_RESET ?
ctx->ctx_soft_pmds[i].long_reset:
ctx->ctx_soft_pmds[i].short_reset;
val = pfm_new_counter_value(ctx->ctx_soft_pmds + i, is_long_reset);
reset_others |= ctx->ctx_soft_pmds[i].reset_pmds[0];
DBprintk(("[%d] %s reset soft_pmd[%d]=%lx\n",
current->pid,
flag == PFM_RELOAD_LONG_RESET ? "long" : "short", i, val));
DBprintk(("[%d] %s reset soft_pmd[%d]=%lx\n", current->pid,
is_long_reset ? "long" : "short", i, val));
/* upper part is ignored on rval */
pfm_write_soft_counter(ctx, i, val);
......@@ -1188,23 +1216,17 @@ pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int flag)
if ((reset_others & 0x1) == 0) continue;
val = flag == PFM_RELOAD_LONG_RESET ?
ctx->ctx_soft_pmds[i].long_reset:
ctx->ctx_soft_pmds[i].short_reset;
val = pfm_new_counter_value(ctx->ctx_soft_pmds + i, is_long_reset);
if (PMD_IS_COUNTING(i)) {
pfm_write_soft_counter(ctx, i, val);
} else {
ia64_set_pmd(i, val);
}
DBprintk(("[%d] %s reset_others pmd[%d]=%lx\n",
current->pid,
flag == PFM_RELOAD_LONG_RESET ? "long" : "short", i, val));
DBprintk(("[%d] %s reset_others pmd[%d]=%lx\n", current->pid,
is_long_reset ? "long" : "short", i, val));
}
ia64_srlz_d();
/* just in case ! */
ctx->ctx_ovfl_regs[0] = 0UL;
}
static int
......@@ -1212,9 +1234,10 @@ pfm_write_pmcs(struct task_struct *task, pfm_context_t *ctx, void *arg, int coun
{
struct thread_struct *th = &task->thread;
pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
unsigned int cnum;
unsigned long value;
unsigned int cnum, reg_flags, flags;
int i;
int ret = 0, reg_retval = 0;
int ret = -EINVAL;
/* we don't quite support this right now */
if (task != current) return -EINVAL;
......@@ -1225,10 +1248,12 @@ pfm_write_pmcs(struct task_struct *task, pfm_context_t *ctx, void *arg, int coun
for (i = 0; i < count; i++, req++) {
if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
if (copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
cnum = tmp.reg_num;
cnum = tmp.reg_num;
reg_flags = tmp.reg_flags;
value = tmp.reg_value;
flags = 0;
/*
* we reject all non implemented PMC as well
......@@ -1237,8 +1262,7 @@ pfm_write_pmcs(struct task_struct *task, pfm_context_t *ctx, void *arg, int coun
*/
if (!PMC_IS_IMPL(cnum) || cnum < 4) {
DBprintk(("pmc[%u] is unimplemented or invalid\n", cnum));
ret = -EINVAL;
goto abort_mission;
goto error;
}
/*
* A PMC used to configure monitors must be:
......@@ -1247,73 +1271,79 @@ pfm_write_pmcs(struct task_struct *task, pfm_context_t *ctx, void *arg, int coun
* any other configuration is rejected.
*/
if (PMC_IS_MONITOR(cnum) || PMC_IS_COUNTING(cnum)) {
DBprintk(("pmc[%u].pm=%ld\n", cnum, PMC_PM(cnum, tmp.reg_value)));
if (ctx->ctx_fl_system ^ PMC_PM(cnum, tmp.reg_value)) {
DBprintk(("pmc_pm=%ld fl_system=%d\n", PMC_PM(cnum, tmp.reg_value), ctx->ctx_fl_system));
ret = -EINVAL;
goto abort_mission;
if (ctx->ctx_fl_system ^ PMC_PM(cnum, value)) {
DBprintk(("pmc_pm=%ld fl_system=%d\n", PMC_PM(cnum, value), ctx->ctx_fl_system));
goto error;
}
}
if (PMC_IS_COUNTING(cnum)) {
pfm_monitor_t *p = (pfm_monitor_t *)&tmp.reg_value;
pfm_monitor_t *p = (pfm_monitor_t *)&value;
/*
* enforce generation of overflow interrupt. Necessary on all
* CPUs.
*/
p->pmc_oi = 1;
if (tmp.reg_flags & PFM_REGFL_OVFL_NOTIFY) {
if (reg_flags & PFM_REGFL_OVFL_NOTIFY) {
/*
* must have a target for the signal
*/
* must have a target for the signal
*/
if (ctx->ctx_notify_task == NULL) {
DBprintk(("no notify_task && PFM_REGFL_OVFL_NOTIFY\n"));
ret = -EINVAL;
goto abort_mission;
DBprintk(("cannot set ovfl_notify: no notify_task\n"));
goto error;
}
ctx->ctx_soft_pmds[cnum].flags |= PFM_REGFL_OVFL_NOTIFY;
flags |= PFM_REGFL_OVFL_NOTIFY;
}
/*
* copy reset vector
*/
ctx->ctx_soft_pmds[cnum].reset_pmds[0] = tmp.reg_reset_pmds[0];
ctx->ctx_soft_pmds[cnum].reset_pmds[1] = tmp.reg_reset_pmds[1];
ctx->ctx_soft_pmds[cnum].reset_pmds[2] = tmp.reg_reset_pmds[2];
ctx->ctx_soft_pmds[cnum].reset_pmds[3] = tmp.reg_reset_pmds[3];
if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM;
} else if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) {
DBprintk(("cannot set ovfl_notify or random on pmc%u\n", cnum));
goto error;
}
/*
* execute write checker, if any
*/
if (PMC_WR_FUNC(cnum)) ret = PMC_WR_FUNC(cnum)(task, cnum, &tmp.reg_value, regs);
abort_mission:
if (ret == -EINVAL) reg_retval = PFM_REG_RETFL_EINVAL;
if (PMC_WR_FUNC(cnum)) {
ret = PMC_WR_FUNC(cnum)(task, cnum, &value, regs);
if (ret) goto error;
ret = -EINVAL;
}
PFM_REG_RETFLAG_SET(tmp.reg_flags, reg_retval);
/*
* no error on this register
*/
PFM_REG_RETFLAG_SET(tmp.reg_flags, 0);
/*
* update register return value, abort all if problem during copy.
* we only modify the reg_flags field. no check mode is fine because
* access has been verified upfront in sys_perfmonctl().
*
* If this fails, then the software state is not modified
*/
if (copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT;
if (__put_user(tmp.reg_flags, &req->reg_flags)) return -EFAULT;
/*
* if there was something wrong on this register, don't touch
* the hardware at all and abort write request for others.
*
* On error, the user mut sequentially scan the table and the first
* entry which has a return flag set is the one that caused the error.
* Now we commit the changes to the software state
*/
if (ret != 0) {
DBprintk(("[%d] pmc[%u]=0x%lx error %d\n",
task->pid, cnum, tmp.reg_value, reg_retval));
break;
}
/*
* We can proceed with this register!
* full flag update each time a register is programmed
*/
ctx->ctx_soft_pmds[cnum].flags = flags;
if (PMC_IS_COUNTING(cnum)) {
/*
* copy reset vector
*/
ctx->ctx_soft_pmds[cnum].reset_pmds[0] = tmp.reg_reset_pmds[0];
ctx->ctx_soft_pmds[cnum].reset_pmds[1] = tmp.reg_reset_pmds[1];
ctx->ctx_soft_pmds[cnum].reset_pmds[2] = tmp.reg_reset_pmds[2];
ctx->ctx_soft_pmds[cnum].reset_pmds[3] = tmp.reg_reset_pmds[3];
}
/*
* Needed in case the user does not initialize the equivalent
......@@ -1325,16 +1355,26 @@ pfm_write_pmcs(struct task_struct *task, pfm_context_t *ctx, void *arg, int coun
/*
* keep copy the pmc, used for register reload
*/
th->pmc[cnum] = tmp.reg_value;
th->pmc[cnum] = value;
ia64_set_pmc(cnum, tmp.reg_value);
ia64_set_pmc(cnum, value);
DBprintk(("[%d] pmc[%u]=0x%lx flags=0x%x used_pmds=0x%lx\n",
task->pid, cnum, tmp.reg_value,
task->pid, cnum, value,
ctx->ctx_soft_pmds[cnum].flags,
ctx->ctx_used_pmds[0]));
}
return 0;
error:
PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL);
if (__put_user(tmp.reg_flags, &req->reg_flags)) ret = -EFAULT;
DBprintk(("[%d] pmc[%u]=0x%lx error %d\n", task->pid, cnum, value, ret));
return ret;
}
......@@ -1342,9 +1382,10 @@ static int
pfm_write_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
unsigned long value, hw_value;
unsigned int cnum;
int i;
int ret = 0, reg_retval = 0;
int ret;
/* we don't quite support this right now */
if (task != current) return -EINVAL;
......@@ -1354,65 +1395,74 @@ pfm_write_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int coun
*/
if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
/* XXX: ctx locking may be required here */
ret = -EINVAL;
for (i = 0; i < count; i++, req++) {
if (copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
cnum = tmp.reg_num;
value = tmp.reg_value;
cnum = tmp.reg_num;
if (!PMD_IS_IMPL(cnum)) {
ret = -EINVAL;
DBprintk(("pmd[%u] is unimplemented or invalid\n", cnum));
goto abort_mission;
}
/* update virtualized (64bits) counter */
if (PMD_IS_COUNTING(cnum)) {
ctx->ctx_soft_pmds[cnum].ival = tmp.reg_value;
ctx->ctx_soft_pmds[cnum].val = tmp.reg_value & ~pmu_conf.perf_ovfl_val;
ctx->ctx_soft_pmds[cnum].long_reset = tmp.reg_long_reset;
ctx->ctx_soft_pmds[cnum].short_reset = tmp.reg_short_reset;
}
/*
* execute write checker, if any
*/
if (PMD_WR_FUNC(cnum)) ret = PMD_WR_FUNC(cnum)(task, cnum, &tmp.reg_value, regs);
abort_mission:
if (ret == -EINVAL) reg_retval = PFM_REG_RETFL_EINVAL;
PFM_REG_RETFLAG_SET(tmp.reg_flags, reg_retval);
if (PMD_WR_FUNC(cnum)) {
unsigned long v = value;
ret = PMD_WR_FUNC(cnum)(task, cnum, &v, regs);
if (ret) goto abort_mission;
value = v;
ret = -EINVAL;
}
hw_value = value;
/*
* no error on this register
*/
PFM_REG_RETFLAG_SET(tmp.reg_flags, 0);
if (copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT;
if (__put_user(tmp.reg_flags, &req->reg_flags)) return -EFAULT;
/*
* if there was something wrong on this register, don't touch
* the hardware at all and abort write request for others.
*
* On error, the user mut sequentially scan the table and the first
* entry which has a return flag set is the one that caused the error.
* now commit changes to software state
*/
if (ret != 0) {
DBprintk(("[%d] pmc[%u]=0x%lx error %d\n",
task->pid, cnum, tmp.reg_value, reg_retval));
break;
/* update virtualized (64bits) counter */
if (PMD_IS_COUNTING(cnum)) {
ctx->ctx_soft_pmds[cnum].lval = value;
ctx->ctx_soft_pmds[cnum].val = value & ~pmu_conf.perf_ovfl_val;
hw_value = value & pmu_conf.perf_ovfl_val;
ctx->ctx_soft_pmds[cnum].long_reset = tmp.reg_long_reset;
ctx->ctx_soft_pmds[cnum].short_reset = tmp.reg_short_reset;
ctx->ctx_soft_pmds[cnum].seed = tmp.reg_random_seed;
ctx->ctx_soft_pmds[cnum].mask = tmp.reg_random_mask;
}
/* keep track of what we use */
CTX_USED_PMD(ctx, pmu_conf.pmd_desc[(cnum)].dep_pmd[0]);
/* mark this register as used as well */
CTX_USED_PMD(ctx, RDEP(cnum));
/* writes to unimplemented part is ignored, so this is safe */
ia64_set_pmd(cnum, tmp.reg_value & pmu_conf.perf_ovfl_val);
ia64_set_pmd(cnum, hw_value);
/* to go away */
ia64_srlz_d();
DBprintk(("[%d] pmd[%u]: soft_pmd=0x%lx short_reset=0x%lx "
DBprintk(("[%d] pmd[%u]: value=0x%lx hw_value=0x%lx soft_pmd=0x%lx short_reset=0x%lx "
"long_reset=0x%lx hw_pmd=%lx notify=%c used_pmds=0x%lx reset_pmds=0x%lx\n",
task->pid, cnum,
value, hw_value,
ctx->ctx_soft_pmds[cnum].val,
ctx->ctx_soft_pmds[cnum].short_reset,
ctx->ctx_soft_pmds[cnum].long_reset,
......@@ -1421,17 +1471,36 @@ pfm_write_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int coun
ctx->ctx_used_pmds[0],
ctx->ctx_soft_pmds[cnum].reset_pmds[0]));
}
return 0;
abort_mission:
/*
* for now, we have only one possibility for error
*/
PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL);
/*
* we change the return value to EFAULT in case we cannot write register return code.
* The caller first must correct this error, then a resubmission of the request will
* eventually yield the EINVAL.
*/
if (__put_user(tmp.reg_flags, &req->reg_flags)) ret = -EFAULT;
DBprintk(("[%d] pmc[%u]=0x%lx ret %d\n", task->pid, cnum, value, ret));
return ret;
}
static int
pfm_read_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
struct thread_struct *th = &task->thread;
unsigned long val=0;
pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
unsigned int cnum;
int i, ret = 0;
unsigned long val = 0UL;
pfarg_reg_t *req = (pfarg_reg_t *)arg;
unsigned int cnum, reg_flags = 0;
int i, ret = -EINVAL;
if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
......@@ -1447,11 +1516,9 @@ pfm_read_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count
DBprintk(("ctx_last_cpu=%d for [%d]\n", atomic_read(&ctx->ctx_last_cpu), task->pid));
for (i = 0; i < count; i++, req++) {
unsigned long ctx_val = ~0UL;
if (copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
cnum = tmp.reg_num;
if (__get_user(cnum, &req->reg_num)) return -EFAULT;
if (__get_user(reg_flags, &req->reg_flags)) return -EFAULT;
if (!PMD_IS_IMPL(cnum)) goto abort_mission;
/*
......@@ -1501,34 +1568,40 @@ pfm_read_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count
*/
val &= pmu_conf.perf_ovfl_val;
val += ctx_val = ctx->ctx_soft_pmds[cnum].val;
val += ctx->ctx_soft_pmds[cnum].val;
}
tmp.reg_value = val;
/*
* execute read checker, if any
*/
if (PMD_RD_FUNC(cnum)) {
ret = PMD_RD_FUNC(cnum)(task, cnum, &tmp.reg_value, regs);
unsigned long v = val;
ret = PMD_RD_FUNC(cnum)(task, cnum, &v, regs);
val = v;
}
PFM_REG_RETFLAG_SET(tmp.reg_flags, ret);
PFM_REG_RETFLAG_SET(reg_flags, 0);
DBprintk(("read pmd[%u] ret=%d value=0x%lx pmc=0x%lx\n",
cnum, ret, val, ia64_get_pmc(cnum)));
if (copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT;
cnum, ret, val, ia64_get_pmc(cnum)));
/*
* update register return value, abort all if problem during copy.
* we only modify the reg_flags field. no check mode is fine because
* access has been verified upfront in sys_perfmonctl().
*/
if (__put_user(cnum, &req->reg_num)) return -EFAULT;
if (__put_user(val, &req->reg_value)) return -EFAULT;
if (__put_user(reg_flags, &req->reg_flags)) return -EFAULT;
}
return 0;
abort_mission:
PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL);
/*
* XXX: if this fails, we stick with the original failure, flag not updated!
*/
copy_to_user(req, &tmp, sizeof(tmp));
return -EINVAL;
PFM_REG_RETFLAG_SET(reg_flags, PFM_REG_RETFL_EINVAL);
if (__put_user(reg_flags, &req->reg_flags)) ret = -EFAULT;
return ret;
}
#ifdef PFM_PMU_USES_DBR
......@@ -1697,44 +1770,6 @@ pfm_restart(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
return 0;
}
#ifndef CONFIG_SMP
/*
* On UP kernels, we do not need to constantly set the psr.pp bit
* when a task is scheduled. The psr.pp bit can only be changed in
* the kernel because of a user request. Given we are on a UP non preeemptive
* kernel we know that no other task is running, so we cna simply update their
* psr.pp from their saved state. There is this no impact on the context switch
* code compared to the SMP case.
*/
static void
pfm_tasklist_toggle_pp(unsigned int val)
{
struct task_struct *p;
struct pt_regs *regs;
DBprintk(("invoked by [%d] pp=%u\n", current->pid, val));
read_lock(&tasklist_lock);
for_each_task(p) {
regs = (struct pt_regs *)((unsigned long) p + IA64_STK_OFFSET);
/*
* position on pt_regs saved on stack on 1st entry into the kernel
*/
regs--;
/*
* update psr.pp
*/
ia64_psr(regs)->pp = val;
}
read_unlock(&tasklist_lock);
}
#endif
static int
pfm_stop(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
struct pt_regs *regs)
......@@ -1763,11 +1798,8 @@ pfm_stop(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
ia64_srlz_i();
#ifdef CONFIG_SMP
__get_cpu_var(pfm_dcr_pp) = 0;
#else
pfm_tasklist_toggle_pp(0);
#endif
ia64_psr(regs)->pp = 0;
} else {
......@@ -2013,7 +2045,7 @@ pfm_write_ibr_dbr(int mode, struct task_struct *task, void *arg, int count, stru
for (i = 0; i < count; i++, req++) {
if (copy_from_user(&tmp, req, sizeof(tmp))) goto abort_mission;
if (__copy_from_user(&tmp, req, sizeof(tmp))) goto abort_mission;
rnum = tmp.dbreg_num;
dbreg.val = tmp.dbreg_value;
......@@ -2046,7 +2078,7 @@ pfm_write_ibr_dbr(int mode, struct task_struct *task, void *arg, int count, stru
PFM_REG_RETFLAG_SET(tmp.dbreg_flags, 0);
if (copy_to_user(req, &tmp, sizeof(tmp))) goto abort_mission;
if (__copy_to_user(req, &tmp, sizeof(tmp))) goto abort_mission;
/*
* Debug registers, just like PMC, can only be modified
......@@ -2101,7 +2133,7 @@ pfm_write_ibr_dbr(int mode, struct task_struct *task, void *arg, int count, stru
* XXX: for now we can only come here on EINVAL
*/
PFM_REG_RETFLAG_SET(tmp.dbreg_flags, PFM_REG_RETFL_EINVAL);
copy_to_user(req, &tmp, sizeof(tmp));
__put_user(tmp.dbreg_flags, &req->dbreg_flags);
}
return ret;
}
......@@ -2142,7 +2174,7 @@ pfm_get_features(struct task_struct *task, pfm_context_t *ctx, void *arg, int co
tmp.ft_version = PFM_VERSION;
tmp.ft_smpl_version = PFM_SMPL_VERSION;
if (copy_to_user(arg, &tmp, sizeof(tmp))) return -EFAULT;
if (__copy_to_user(arg, &tmp, sizeof(tmp))) return -EFAULT;
return 0;
}
......@@ -2171,11 +2203,8 @@ pfm_start(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
if (ctx->ctx_fl_system) {
#ifdef CONFIG_SMP
__get_cpu_var(pfm_dcr_pp) = 1;
#else
pfm_tasklist_toggle_pp(1);
#endif
/* set user level psr.pp */
ia64_psr(regs)->pp = 1;
......@@ -2226,10 +2255,8 @@ pfm_enable(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
__asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
ia64_srlz_i();
#ifdef CONFIG_SMP
__get_cpu_var(pfm_syst_wide) = 1;
__get_cpu_var(pfm_dcr_pp) = 0;
#endif
__get_cpu_var(pfm_syst_wide) = 1;
} else {
/*
* needed in case the task was a passive task during
......@@ -2270,11 +2297,11 @@ pfm_get_pmc_reset(struct task_struct *task, pfm_context_t *ctx, void *arg, int c
{
pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
unsigned int cnum;
int i;
int i, ret = -EINVAL;
for (i = 0; i < count; i++, req++) {
if (copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
cnum = tmp.reg_num;
......@@ -2286,16 +2313,13 @@ pfm_get_pmc_reset(struct task_struct *task, pfm_context_t *ctx, void *arg, int c
DBprintk(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, tmp.reg_value));
if (copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT;
if (__copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT;
}
return 0;
abort_mission:
PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL);
/*
* XXX: if this fails, we stick with the original failure, flag not updated!
*/
copy_to_user(req, &tmp, sizeof(tmp));
return -EINVAL;
if (__copy_to_user(req, &tmp, sizeof(tmp))) ret = -EFAULT;
return ret;
}
/*
......@@ -2303,21 +2327,21 @@ pfm_get_pmc_reset(struct task_struct *task, pfm_context_t *ctx, void *arg, int c
*/
static pfm_cmd_desc_t pfm_cmd_tab[]={
/* 0 */{ NULL, 0, 0, 0}, /* not used */
/* 1 */{ pfm_write_pmcs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
/* 2 */{ pfm_write_pmds, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
/* 3 */{ pfm_read_pmds,PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
/* 1 */{ pfm_write_pmcs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
/* 2 */{ pfm_write_pmds, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
/* 3 */{ pfm_read_pmds,PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
/* 4 */{ pfm_stop, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
/* 5 */{ pfm_start, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
/* 6 */{ pfm_enable, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
/* 7 */{ pfm_disable, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
/* 8 */{ pfm_context_create, PFM_CMD_PID|PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, 1, sizeof(pfarg_context_t)},
/* 8 */{ pfm_context_create, PFM_CMD_PID|PFM_CMD_ARG_RW, 1, sizeof(pfarg_context_t)},
/* 9 */{ pfm_context_destroy, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
/* 10 */{ pfm_restart, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_NOCHK, 0, 0},
/* 11 */{ pfm_protect_context, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
/* 12 */{ pfm_get_features, PFM_CMD_ARG_WRITE, 0, 0},
/* 12 */{ pfm_get_features, PFM_CMD_ARG_RW, 0, 0},
/* 13 */{ pfm_debug, 0, 1, sizeof(unsigned int)},
/* 14 */{ pfm_context_unprotect, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
/* 15 */{ pfm_get_pmc_reset, PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
/* 15 */{ pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
/* 16 */{ NULL, 0, 0, 0}, /* not used */
/* 17 */{ NULL, 0, 0, 0}, /* not used */
/* 18 */{ NULL, 0, 0, 0}, /* not used */
......@@ -2335,8 +2359,8 @@ static pfm_cmd_desc_t pfm_cmd_tab[]={
/* 30 */{ NULL, 0, 0, 0}, /* not used */
/* 31 */{ NULL, 0, 0, 0}, /* not used */
#ifdef PFM_PMU_USES_DBR
/* 32 */{ pfm_write_ibrs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, PFM_CMD_ARG_MANY, sizeof(pfarg_dbreg_t)},
/* 33 */{ pfm_write_dbrs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, PFM_CMD_ARG_MANY, sizeof(pfarg_dbreg_t)}
/* 32 */{ pfm_write_ibrs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_dbreg_t)},
/* 33 */{ pfm_write_dbrs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, sizeof(pfarg_dbreg_t)}
#endif
};
#define PFM_CMD_COUNT (sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t))
......@@ -2345,19 +2369,22 @@ static int
check_task_state(struct task_struct *task)
{
int ret = 0;
#ifdef CONFIG_SMP
/* We must wait until the state has been completely
* saved. There can be situations where the reader arrives before
* after the task is marked as STOPPED but before pfm_save_regs()
* is completed.
*/
if (task->state != TASK_ZOMBIE && task->state != TASK_STOPPED) return -EBUSY;
DBprintk(("before wait_task_inactive [%d] state %ld\n", task->pid, task->state));
wait_task_inactive(task);
DBprintk(("after wait_task_inactive [%d] state %ld\n", task->pid, task->state));
#else
if (task->state != TASK_ZOMBIE && task->state != TASK_STOPPED) {
DBprintk(("warning [%d] not in stable state %ld\n", task->pid, task->state));
ret = -EBUSY;
}
DBprintk(("before wait_task_inactive [%d] state %ld\n", task->pid, task->state));
wait_task_inactive(task);
DBprintk(("after wait_task_inactive [%d] state %ld\n", task->pid, task->state));
#endif
return ret;
}
......@@ -2389,7 +2416,7 @@ sys_perfmonctl (pid_t pid, int cmd, void *arg, int count, long arg5, long arg6,
if (PFM_CMD_READ_ARG(cmd) && !access_ok(VERIFY_READ, arg, sz*count)) return -EFAULT;
if (PFM_CMD_WRITE_ARG(cmd) && !access_ok(VERIFY_WRITE, arg, sz*count)) return -EFAULT;
if (PFM_CMD_RW_ARG(cmd) && !access_ok(VERIFY_WRITE, arg, sz*count)) return -EFAULT;
if (PFM_CMD_USE_PID(cmd)) {
/*
......@@ -2551,9 +2578,16 @@ pfm_record_sample(struct task_struct *task, pfm_context_t *ctx, unsigned long ov
*/
h->pid = current->pid;
h->cpu = smp_processor_id();
h->rate = 0; /* XXX: add the sampling rate used here */
h->ip = regs ? regs->cr_iip : 0x0; /* where did the fault happened */
h->regs = ovfl_mask; /* which registers overflowed */
h->last_reset_value = ovfl_mask ? ctx->ctx_soft_pmds[ffz(~ovfl_mask)].lval : 0UL;
/*
* where did the fault happen
*/
h->ip = regs ? regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3): 0x0UL;
/*
* which registers overflowed
*/
h->regs = ovfl_mask;
/* guaranteed to monotonically increase on each cpu */
h->stamp = pfm_get_stamp();
......@@ -2572,15 +2606,13 @@ pfm_record_sample(struct task_struct *task, pfm_context_t *ctx, unsigned long ov
if (PMD_IS_COUNTING(j)) {
*e = pfm_read_soft_counter(ctx, j);
/* check if this pmd overflowed as well */
*e += ovfl_mask & (1UL<<j) ? 1 + pmu_conf.perf_ovfl_val : 0;
} else {
*e = ia64_get_pmd(j); /* slow */
}
DBprintk_ovfl(("e=%p pmd%d =0x%lx\n", (void *)e, j, *e));
e++;
}
pfm_stats.pfm_recorded_samples_count++;
pfm_stats[smp_processor_id()].pfm_recorded_samples_count++;
/*
* make the new entry visible to user, needs to be atomic
......@@ -2597,7 +2629,7 @@ pfm_record_sample(struct task_struct *task, pfm_context_t *ctx, unsigned long ov
/*
* XXX: must reset buffer in blocking mode and lost notified
*/
pfm_stats.pfm_full_smpl_buffer_count++;
pfm_stats[smp_processor_id()].pfm_full_smpl_buffer_count++;
return 1;
}
return 0;
......@@ -2674,23 +2706,13 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
i, ia64_get_pmd(i), ctx->ctx_soft_pmds[i].val));
/*
* Because we sometimes (EARS/BTB) reset to a specific value, we cannot simply use
* val to count the number of times we overflowed. Otherwise we would loose the
* current value in the PMD (which can be >0). So to make sure we don't loose
* the residual counts we set val to contain full 64bits value of the counter.
* Note that the pmd is not necessarily 0 at this point as qualified events
* may have happened before the PMU was frozen. The residual count is not
* taken into consideration here but will be with any read of the pmd via
* pfm_read_pmds().
*/
old_val = ctx->ctx_soft_pmds[i].val;
ctx->ctx_soft_pmds[i].val = 1 + pmu_conf.perf_ovfl_val + pfm_read_soft_counter(ctx, i);
DBprintk_ovfl(("soft_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx\n",
i, ctx->ctx_soft_pmds[i].val, old_val,
ia64_get_pmd(i) & pmu_conf.perf_ovfl_val));
/*
* now that we have extracted the hardware counter, we can clear it to ensure
* that a subsequent PFM_READ_PMDS will not include it again.
*/
ia64_set_pmd(i, 0UL);
ctx->ctx_soft_pmds[i].val += 1 + pmu_conf.perf_ovfl_val;
/*
* check for overflow condition
......@@ -2699,12 +2721,15 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
ovfl_pmds |= 1UL << i;
DBprintk_ovfl(("soft_pmd[%d] overflowed flags=0x%x, ovfl=0x%lx\n", i, ctx->ctx_soft_pmds[i].flags, ovfl_pmds));
if (PMC_OVFL_NOTIFY(ctx, i)) {
ovfl_notify |= 1UL << i;
}
}
DBprintk_ovfl(("soft_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n",
i, ctx->ctx_soft_pmds[i].val, old_val,
ia64_get_pmd(i) & pmu_conf.perf_ovfl_val, ovfl_pmds, ovfl_notify));
}
/*
......@@ -2893,7 +2918,7 @@ perfmon_interrupt (int irq, void *arg, struct pt_regs *regs)
struct task_struct *task;
pfm_context_t *ctx;
pfm_stats.pfm_ovfl_intr_count++;
pfm_stats[smp_processor_id()].pfm_ovfl_intr_count++;
/*
* srlz.d done before arriving here
......@@ -2951,10 +2976,7 @@ perfmon_interrupt (int irq, void *arg, struct pt_regs *regs)
atomic_set(&ctx->ctx_is_busy, 0);
#endif
} else {
pfm_stats.pfm_spurious_ovfl_intr_count++;
printk("perfmon: Spurious PMU overflow interrupt on CPU%d: pmc0=0x%lx owner=%p\n",
smp_processor_id(), pmc0, (void *)PMU_OWNER());
pfm_stats[smp_processor_id()].pfm_spurious_ovfl_intr_count++;
}
}
......@@ -2965,28 +2987,24 @@ perfmon_proc_info(char *page)
char *p = page;
int i;
p += sprintf(p, "enabled : %s\n", pmu_conf.pfm_is_disabled ? "No": "Yes");
p += sprintf(p, "fastctxsw : %s\n", pfm_sysctl.fastctxsw > 0 ? "Yes": "No");
p += sprintf(p, "ovfl_mask : 0x%lx\n", pmu_conf.perf_ovfl_val);
p += sprintf(p, "overflow intrs : %lu\n", pfm_stats.pfm_ovfl_intr_count);
p += sprintf(p, "spurious intrs : %lu\n", pfm_stats.pfm_spurious_ovfl_intr_count);
p += sprintf(p, "recorded samples : %lu\n", pfm_stats.pfm_recorded_samples_count);
p += sprintf(p, "smpl buffer full : %lu\n", pfm_stats.pfm_full_smpl_buffer_count);
p += sprintf(p, "enabled : %s\n", pmu_conf.pfm_is_disabled ? "No": "Yes");
p += sprintf(p, "fastctxsw : %s\n", pfm_sysctl.fastctxsw > 0 ? "Yes": "No");
p += sprintf(p, "ovfl_mask : 0x%lx\n", pmu_conf.perf_ovfl_val);
#ifdef CONFIG_SMP
p += sprintf(p, "CPU%d syst_wide : %d\n"
"CPU%d dcr_pp : %d\n",
smp_processor_id(),
__get_cpu_var(pfm_syst_wide),
smp_processor_id(),
__get_cpu_var(pfm_dcr_pp));
#endif
for(i=0; i < NR_CPUS; i++) {
if (cpu_is_online(i) == 0) continue;
p += sprintf(p, "CPU%-2d overflow intrs : %lu\n", i, pfm_stats[i].pfm_ovfl_intr_count);
p += sprintf(p, "CPU%-2d spurious intrs : %lu\n", i, pfm_stats[i].pfm_spurious_ovfl_intr_count);
p += sprintf(p, "CPU%-2d recorded samples : %lu\n", i, pfm_stats[i].pfm_recorded_samples_count);
p += sprintf(p, "CPU%-2d smpl buffer full : %lu\n", i, pfm_stats[i].pfm_full_smpl_buffer_count);
p += sprintf(p, "CPU%-2d owner : %d\n", i, pmu_owners[i].owner ? pmu_owners[i].owner->pid: -1);
}
LOCK_PFS();
p += sprintf(p, "proc_sessions : %lu\n"
"sys_sessions : %lu\n"
"sys_use_dbregs : %lu\n"
"ptrace_use_dbregs: %lu\n",
p += sprintf(p, "proc_sessions : %lu\n"
"sys_sessions : %lu\n"
"sys_use_dbregs : %lu\n"
"ptrace_use_dbregs : %lu\n",
pfm_sessions.pfs_task_sessions,
pfm_sessions.pfs_sys_sessions,
pfm_sessions.pfs_sys_use_dbregs,
......@@ -2994,30 +3012,6 @@ perfmon_proc_info(char *page)
UNLOCK_PFS();
for(i=0; i < NR_CPUS; i++) {
if (cpu_is_online(i)) {
p += sprintf(p, "CPU%d owner : %-6d\n",
i,
pmu_owners[i].owner ? pmu_owners[i].owner->pid: -1);
}
}
for(i=0; pmd_desc[i].type != PFM_REG_NONE; i++) {
p += sprintf(p, "PMD%-2d: %d 0x%lx 0x%lx\n",
i,
pmd_desc[i].type,
pmd_desc[i].dep_pmd[0],
pmd_desc[i].dep_pmc[0]);
}
for(i=0; pmc_desc[i].type != PFM_REG_NONE; i++) {
p += sprintf(p, "PMC%-2d: %d 0x%lx 0x%lx\n",
i,
pmc_desc[i].type,
pmc_desc[i].dep_pmd[0],
pmc_desc[i].dep_pmc[0]);
}
return p - page;
}
......@@ -3038,7 +3032,6 @@ perfmon_read_entry(char *page, char **start, off_t off, int count, int *eof, voi
return len;
}
#ifdef CONFIG_SMP
void
pfm_syst_wide_update_task(struct task_struct *task, int mode)
{
......@@ -3051,8 +3044,6 @@ pfm_syst_wide_update_task(struct task_struct *task, int mode)
*/
ia64_psr(regs)->pp = mode ? __get_cpu_var(pfm_dcr_pp) : 0;
}
#endif
void
pfm_save_regs (struct task_struct *task)
......@@ -3292,6 +3283,30 @@ pfm_load_regs (struct task_struct *task)
owner = PMU_OWNER();
ctx = task->thread.pfm_context;
t = &task->thread;
/*
* we restore ALL the debug registers to avoid picking up
* stale state.
*
* This must be done even when the task is still the owner
* as the registers may have been modified via ptrace()
* (not perfmon) by the previous task.
*
* XXX: dealing with this in a lazy fashion requires modifications
* to the way the the debug registers are managed. This is will done
* in the next version of perfmon.
*/
if (ctx->ctx_fl_using_dbreg) {
for (i=0; i < pmu_conf.num_ibrs; i++) {
ia64_set_ibr(i, t->ibr[i]);
}
ia64_srlz_i();
for (i=0; i < pmu_conf.num_dbrs; i++) {
ia64_set_dbr(i, t->dbr[i]);
}
ia64_srlz_d();
}
/*
* if we were the last user, then nothing to do except restore psr
......@@ -3327,7 +3342,6 @@ pfm_load_regs (struct task_struct *task)
pfm_fetch_regs(cpu, task, ctx);
}
#endif
t = &task->thread;
/*
* To avoid leaking information to the user level when psr.sp=0,
......@@ -3357,21 +3371,6 @@ pfm_load_regs (struct task_struct *task)
if (mask & 0x1) ia64_set_pmc(i, t->pmc[i]);
}
/*
* we restore ALL the debug registers to avoid picking up
* stale state.
*/
if (ctx->ctx_fl_using_dbreg) {
for (i=0; i < pmu_conf.num_ibrs; i++) {
ia64_set_ibr(i, t->ibr[i]);
}
ia64_srlz_i();
for (i=0; i < pmu_conf.num_dbrs; i++) {
ia64_set_dbr(i, t->dbr[i]);
}
}
ia64_srlz_d();
if (t->pmc[0] & ~0x1) {
pfm_overflow_handler(task, ctx, t->pmc[0], NULL);
}
......@@ -3542,12 +3541,8 @@ pfm_flush_regs (struct task_struct *task)
ia64_srlz_i();
#ifdef CONFIG_SMP
__get_cpu_var(pfm_syst_wide) = 0;
__get_cpu_var(pfm_dcr_pp) = 0;
#else
pfm_tasklist_toggle_pp(0);
#endif
} else {
/* stop monitoring */
......@@ -3766,18 +3761,12 @@ pfm_inherit(struct task_struct *task, struct pt_regs *regs)
m = nctx->ctx_used_pmds[0] >> PMU_FIRST_COUNTER;
for(i = PMU_FIRST_COUNTER ; m ; m>>=1, i++) {
if ((m & 0x1) && pmu_conf.pmd_desc[i].type == PFM_REG_COUNTING) {
nctx->ctx_soft_pmds[i].val = nctx->ctx_soft_pmds[i].ival & ~pmu_conf.perf_ovfl_val;
thread->pmd[i] = nctx->ctx_soft_pmds[i].ival & pmu_conf.perf_ovfl_val;
nctx->ctx_soft_pmds[i].val = nctx->ctx_soft_pmds[i].lval & ~pmu_conf.perf_ovfl_val;
thread->pmd[i] = nctx->ctx_soft_pmds[i].lval & pmu_conf.perf_ovfl_val;
} else {
thread->pmd[i] = 0UL; /* reset to initial state */
}
/* what about the other pmds? zero or keep as is */
}
/*
* clear BTB index register
* XXX: CPU-model specific knowledge!
*/
thread->pmd[16] = 0;
nctx->ctx_fl_frozen = 0;
nctx->ctx_ovfl_regs[0] = 0UL;
......@@ -3947,7 +3936,8 @@ pfm_context_exit(struct task_struct *task)
pfm_sessions.pfs_sys_session[ctx->ctx_cpu] = NULL;
pfm_sessions.pfs_sys_sessions--;
DBprintk(("freeing syswide session on CPU%ld\n", ctx->ctx_cpu));
/* update perfmon debug register counter */
/* update perfmon debug register usage counter */
if (ctx->ctx_fl_using_dbreg) {
if (pfm_sessions.pfs_sys_use_dbregs == 0) {
printk("perfmon: invalid release for [%d] sys_use_dbregs=0\n", task->pid);
......@@ -3990,7 +3980,8 @@ pfm_cleanup_smpl_buf(struct task_struct *task)
* Walk through the list and free the sampling buffer and psb
*/
while (psb) {
DBprintk(("[%d] freeing smpl @%p size %ld\n", current->pid, psb->psb_hdr, psb->psb_size));
DBprintk(("[%d] freeing smpl @%p size %ld\n",
current->pid, psb->psb_hdr, psb->psb_size));
pfm_rvfree(psb->psb_hdr, psb->psb_size);
tmp = psb->psb_next;
......@@ -4011,14 +4002,14 @@ pfm_cleanup_smpl_buf(struct task_struct *task)
void
pfm_cleanup_owners(struct task_struct *task)
{
struct task_struct *p;
struct task_struct *g, *p;
pfm_context_t *ctx;
DBprintk(("called by [%d] for [%d]\n", current->pid, task->pid));
read_lock(&tasklist_lock);
for_each_task(p) {
do_each_thread(g, p) {
/*
* It is safe to do the 2-step test here, because thread.ctx
* is cleaned up only in release_thread() and at that point
......@@ -4056,7 +4047,8 @@ pfm_cleanup_owners(struct task_struct *task)
DBprintk(("done for notifier [%d] in [%d]\n", task->pid, p->pid));
}
}
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
atomic_set(&task->thread.pfm_owners_check, 0);
......@@ -4070,23 +4062,21 @@ pfm_cleanup_owners(struct task_struct *task)
void
pfm_cleanup_notifiers(struct task_struct *task)
{
struct task_struct *p;
struct task_struct *g, *p;
pfm_context_t *ctx;
DBprintk(("called by [%d] for [%d]\n", current->pid, task->pid));
read_lock(&tasklist_lock);
for_each_task(p) {
do_each_thread(g, p) {
/*
* It is safe to do the 2-step test here, because thread.ctx
* is cleaned up only in release_thread() and at that point
* the task has been detached from the tasklist which is an
* operation which uses the write_lock() on the tasklist_lock
* so it cannot run concurrently to this loop. So we have the
* guarantee that if we find p and it has a perfmon ctx then
* it is going to stay like this for the entire execution of this
* loop.
* It is safe to do the 2-step test here, because thread.ctx is cleaned up
* only in release_thread() and at that point the task has been detached
* from the tasklist which is an operation which uses the write_lock() on
* the tasklist_lock so it cannot run concurrently to this loop. So we
* have the guarantee that if we find p and it has a perfmon ctx then it
* is going to stay like this for the entire execution of this loop.
*/
ctx = p->thread.pfm_context;
......@@ -4095,16 +4085,16 @@ pfm_cleanup_notifiers(struct task_struct *task)
if (ctx && ctx->ctx_notify_task == task) {
DBprintk(("trying for notifier [%d] in [%d]\n", task->pid, p->pid));
/*
* the spinlock is required to take care of a race condition
* with the send_sig_info() call. We must make sure that
* either the send_sig_info() completes using a valid task,
* or the notify_task is cleared before the send_sig_info()
* can pick up a stale value. Note that by the time this
* function is executed the 'task' is already detached from the
* tasklist. The problem is that the notifiers have a direct
* pointer to it. It is okay to send a signal to a task in this
* stage, it simply will have no effect. But it is better than sending
* to a completely destroyed task or worse to a new task using the same
* the spinlock is required to take care of a race condition with
* the send_sig_info() call. We must make sure that either the
* send_sig_info() completes using a valid task, or the
* notify_task is cleared before the send_sig_info() can pick up a
* stale value. Note that by the time this function is executed
* the 'task' is already detached from the tasklist. The problem
* is that the notifiers have a direct pointer to it. It is okay
* to send a signal to a task in this stage, it simply will have
* no effect. But it is better than sending to a completely
* destroyed task or worse to a new task using the same
* task_struct address.
*/
LOCK_CTX(ctx);
......@@ -4115,7 +4105,8 @@ pfm_cleanup_notifiers(struct task_struct *task)
DBprintk(("done for notifier [%d] in [%d]\n", task->pid, p->pid));
}
}
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
atomic_set(&task->thread.pfm_notifiers_check, 0);
......
......@@ -80,6 +80,12 @@ show_stack (struct task_struct *task)
}
}
void
dump_stack (void)
{
show_stack(NULL);
}
void
show_regs (struct pt_regs *regs)
{
......@@ -248,18 +254,15 @@ ia64_load_extra (struct task_struct *task)
* | | <-- sp (lowest addr)
* +---------------------+
*
* Note: if we get called through kernel_thread() then the memory
* above "(highest addr)" is valid kernel stack memory that needs to
* be copied as well.
* Note: if we get called through kernel_thread() then the memory above "(highest addr)"
* is valid kernel stack memory that needs to be copied as well.
*
* Observe that we copy the unat values that are in pt_regs and
* switch_stack. Spilling an integer to address X causes bit N in
* ar.unat to be set to the NaT bit of the register, with N=(X &
* 0x1ff)/8. Thus, copying the unat value preserves the NaT bits ONLY
* if the pt_regs structure in the parent is congruent to that of the
* child, modulo 512. Since the stack is page aligned and the page
* size is at least 4KB, this is always the case, so there is nothing
* to worry about.
* Observe that we copy the unat values that are in pt_regs and switch_stack. Spilling an
* integer to address X causes bit N in ar.unat to be set to the NaT bit of the register,
* with N=(X & 0x1ff)/8. Thus, copying the unat value preserves the NaT bits ONLY if the
* pt_regs structure in the parent is congruent to that of the child, modulo 512. Since
* the stack is page aligned and the page size is at least 4KB, this is always the case,
* so there is nothing to worry about.
*/
int
copy_thread (int nr, unsigned long clone_flags,
......@@ -300,6 +303,8 @@ copy_thread (int nr, unsigned long clone_flags,
memcpy((void *) child_rbs, (void *) rbs, rbs_size);
if (user_mode(child_ptregs)) {
if (clone_flags & CLONE_SETTLS)
child_ptregs->r13 = regs->r16; /* see sys_clone2() in entry.S */
if (user_stack_base) {
child_ptregs->r12 = user_stack_base + user_stack_size - 16;
child_ptregs->ar_bspstore = user_stack_base;
......
......@@ -474,7 +474,7 @@ threads_sync_user_rbs (struct task_struct *child, unsigned long child_urbs_end,
{
struct switch_stack *sw;
unsigned long urbs_end;
struct task_struct *p;
struct task_struct *g, *p;
struct mm_struct *mm;
struct pt_regs *pt;
long multi_threaded;
......@@ -495,7 +495,7 @@ threads_sync_user_rbs (struct task_struct *child, unsigned long child_urbs_end,
} else {
read_lock(&tasklist_lock);
{
for_each_task(p) {
do_each_thread(g, p) {
if (p->mm == mm && p->state != TASK_RUNNING) {
sw = (struct switch_stack *) (p->thread.ksp + 16);
pt = ia64_task_regs(p);
......@@ -504,7 +504,7 @@ threads_sync_user_rbs (struct task_struct *child, unsigned long child_urbs_end,
if (make_writable)
user_flushrs(p, pt);
}
}
} while_each_thread(g, p);
}
read_unlock(&tasklist_lock);
}
......
......@@ -15,8 +15,8 @@
* test if they need to do any extra work (up needs to do something
* only if count was negative before the increment operation.
*
* "sleepers" and the contention routine ordering is protected by the
* semaphore spinlock.
* "sleeping" and the contention routine ordering is protected
* by the spinlock in the semaphore's waitqueue head.
*
* Note that these functions are only called when there is contention
* on the lock, and as such all this is the "non-critical" part of the
......@@ -44,40 +44,42 @@ __up (struct semaphore *sem)
wake_up(&sem->wait);
}
static spinlock_t semaphore_lock = SPIN_LOCK_UNLOCKED;
void
__down (struct semaphore *sem)
{
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
unsigned long flags;
tsk->state = TASK_UNINTERRUPTIBLE;
add_wait_queue_exclusive(&sem->wait, &wait);
spin_lock_irqsave(&sem->wait.lock, flags);
add_wait_queue_exclusive_locked(&sem->wait, &wait);
spin_lock_irq(&semaphore_lock);
sem->sleepers++;
for (;;) {
int sleepers = sem->sleepers;
/*
* Add "everybody else" into it. They aren't
* playing, because we own the spinlock.
* playing, because we own the spinlock in
* the wait_queue_head.
*/
if (!atomic_add_negative(sleepers - 1, &sem->count)) {
sem->sleepers = 0;
break;
}
sem->sleepers = 1; /* us - see -1 above */
spin_unlock_irq(&semaphore_lock);
spin_unlock_irqrestore(&sem->wait.lock, flags);
schedule();
spin_lock_irqsave(&sem->wait.lock, flags);
tsk->state = TASK_UNINTERRUPTIBLE;
spin_lock_irq(&semaphore_lock);
}
spin_unlock_irq(&semaphore_lock);
remove_wait_queue(&sem->wait, &wait);
remove_wait_queue_locked(&sem->wait, &wait);
wake_up_locked(&sem->wait);
spin_unlock_irqrestore(&sem->wait.lock, flags);
tsk->state = TASK_RUNNING;
wake_up(&sem->wait);
}
int
......@@ -86,10 +88,12 @@ __down_interruptible (struct semaphore * sem)
int retval = 0;
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
unsigned long flags;
tsk->state = TASK_INTERRUPTIBLE;
add_wait_queue_exclusive(&sem->wait, &wait);
spin_lock_irqsave(&sem->wait.lock, flags);
add_wait_queue_exclusive_locked(&sem->wait, &wait);
spin_lock_irq(&semaphore_lock);
sem->sleepers ++;
for (;;) {
int sleepers = sem->sleepers;
......@@ -110,25 +114,27 @@ __down_interruptible (struct semaphore * sem)
/*
* Add "everybody else" into it. They aren't
* playing, because we own the spinlock. The
* "-1" is because we're still hoping to get
* the lock.
* playing, because we own the spinlock in
* wait_queue_head. The "-1" is because we're
* still hoping to get the semaphore.
*/
if (!atomic_add_negative(sleepers - 1, &sem->count)) {
sem->sleepers = 0;
break;
}
sem->sleepers = 1; /* us - see -1 above */
spin_unlock_irq(&semaphore_lock);
spin_unlock_irqrestore(&sem->wait.lock, flags);
schedule();
spin_lock_irqsave(&sem->wait.lock, flags);
tsk->state = TASK_INTERRUPTIBLE;
spin_lock_irq(&semaphore_lock);
}
spin_unlock_irq(&semaphore_lock);
remove_wait_queue_locked(&sem->wait, &wait);
wake_up_locked(&sem->wait);
spin_unlock_irqrestore(&sem->wait.lock, flags);
tsk->state = TASK_RUNNING;
remove_wait_queue(&sem->wait, &wait);
wake_up(&sem->wait);
return retval;
}
......@@ -142,17 +148,19 @@ __down_trylock (struct semaphore *sem)
unsigned long flags;
int sleepers;
spin_lock_irqsave(&semaphore_lock, flags);
spin_lock_irqsave(&sem->wait.lock, flags);
sleepers = sem->sleepers + 1;
sem->sleepers = 0;
/*
* Add "everybody else" and us into it. They aren't
* playing, because we own the spinlock.
* playing, because we own the spinlock in the
* wait_queue_head.
*/
if (!atomic_add_negative(sleepers, &sem->count))
wake_up(&sem->wait);
if (!atomic_add_negative(sleepers, &sem->count)) {
wake_up_locked(&sem->wait);
}
spin_unlock_irqrestore(&semaphore_lock, flags);
spin_unlock_irqrestore(&sem->wait.lock, flags);
return 1;
}
......@@ -354,6 +354,15 @@ setup_sigcontext (struct sigcontext *sc, sigset_t *mask, struct sigscratch *scr)
return err;
}
/*
* Check whether the register-backing store is already on the signal stack.
*/
static inline int
rbs_on_sig_stack (unsigned long bsp)
{
return (bsp - current->sas_ss_sp < current->sas_ss_size);
}
static long
setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set,
struct sigscratch *scr)
......@@ -366,10 +375,17 @@ setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set,
frame = (void *) scr->pt.r12;
tramp_addr = GATE_ADDR + (ia64_sigtramp - __start_gate_section);
if ((ka->sa.sa_flags & SA_ONSTACK) != 0 && !on_sig_stack((unsigned long) frame)) {
new_rbs = (current->sas_ss_sp + sizeof(long) - 1) & ~(sizeof(long) - 1);
frame = (void *) ((current->sas_ss_sp + current->sas_ss_size)
& ~(STACK_ALIGN - 1));
if (ka->sa.sa_flags & SA_ONSTACK) {
/*
* We need to check the memory and register stacks separately, because
* they're switched separately (memory stack is switched in the kernel,
* register stack is switched in the signal trampoline).
*/
if (!on_sig_stack((unsigned long) frame))
frame = (void *) ((current->sas_ss_sp + current->sas_ss_size)
& ~(STACK_ALIGN - 1));
if (!rbs_on_sig_stack(scr->pt.ar_bspstore))
new_rbs = (current->sas_ss_sp + sizeof(long) - 1) & ~(sizeof(long) - 1);
}
frame = (void *) frame - ((sizeof(*frame) + STACK_ALIGN - 1) & ~(STACK_ALIGN - 1));
......@@ -460,7 +476,6 @@ handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigse
long
ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
{
struct signal_struct *sig;
struct k_sigaction *ka;
siginfo_t info;
long restart = in_syscall;
......@@ -487,7 +502,7 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
}
} else
#endif
if (scr->pt.r10 != -1) {
if (scr->pt.r10 != -1)
/*
* A system calls has to be restarted only if one of the error codes
* ERESTARTNOHAND, ERESTARTSYS, or ERESTARTNOINTR is returned. If r10
......@@ -495,101 +510,14 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
* restart the syscall, so we can clear the "restart" flag here.
*/
restart = 0;
}
for (;;) {
unsigned long signr;
spin_lock_irq(&current->sigmask_lock);
signr = dequeue_signal(&current->blocked, &info);
spin_unlock_irq(&current->sigmask_lock);
while (1) {
int signr = get_signal_to_deliver(&info, &scr->pt);
if (!signr)
if (signr <= 0)
break;
if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) {
/* Let the debugger run. */
current->exit_code = signr;
current->thread.siginfo = &info;
current->state = TASK_STOPPED;
notify_parent(current, SIGCHLD);
schedule();
signr = current->exit_code;
current->thread.siginfo = 0;
/* We're back. Did the debugger cancel the sig? */
if (!signr)
continue;
current->exit_code = 0;
/* The debugger continued. Ignore SIGSTOP. */
if (signr == SIGSTOP)
continue;
/* Update the siginfo structure. Is this good? */
if (signr != info.si_signo) {
info.si_signo = signr;
info.si_errno = 0;
info.si_code = SI_USER;
info.si_pid = current->parent->pid;
info.si_uid = current->parent->uid;
}
/* If the (new) signal is now blocked, requeue it. */
if (sigismember(&current->blocked, signr)) {
send_sig_info(signr, &info, current);
continue;
}
}
ka = &current->sig->action[signr - 1];
if (ka->sa.sa_handler == SIG_IGN) {
if (signr != SIGCHLD)
continue;
/* Check for SIGCHLD: it's special. */
while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0)
/* nothing */;
continue;
}
if (ka->sa.sa_handler == SIG_DFL) {
int exit_code = signr;
/* Init gets no signals it doesn't want. */
if (current->pid == 1)
continue;
switch (signr) {
case SIGCONT: case SIGCHLD: case SIGWINCH: case SIGURG:
continue;
case SIGTSTP: case SIGTTIN: case SIGTTOU:
if (is_orphaned_pgrp(current->pgrp))
continue;
/* FALLTHRU */
case SIGSTOP:
current->state = TASK_STOPPED;
current->exit_code = signr;
sig = current->parent->sig;
if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
notify_parent(current, SIGCHLD);
schedule();
continue;
case SIGQUIT: case SIGILL: case SIGTRAP:
case SIGABRT: case SIGFPE: case SIGSEGV:
case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ:
if (do_coredump(signr, &scr->pt))
exit_code |= 0x80;
/* FALLTHRU */
default:
sig_exit(signr, exit_code, &info);
/* NOTREACHED */
}
}
if (restart) {
switch (errno) {
......@@ -601,7 +529,7 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
scr->pt.r8 = -EINTR;
else
#endif
scr->pt.r8 = EINTR;
scr->pt.r8 = EINTR;
/* note: scr->pt.r10 is already -1 */
break;
}
......@@ -612,13 +540,14 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
scr->pt.cr_iip -= 2;
} else
#endif
ia64_decrement_ip(&scr->pt);
ia64_decrement_ip(&scr->pt);
}
}
/* Whee! Actually deliver the signal. If the
delivery failed, we need to continue to iterate in
this loop so we can deliver the SIGSEGV... */
/*
* Whee! Actually deliver the signal. If the delivery failed, we need to
* continue to iterate in this loop so we can deliver the SIGSEGV...
*/
if (handle_signal(signr, ka, &info, oldset, scr))
return 1;
}
......@@ -634,9 +563,8 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
} else
#endif
/*
* Note: the syscall number is in r15 which is
* saved in pt_regs so all we need to do here
* is adjust ip so that the "break"
* Note: the syscall number is in r15 which is saved in pt_regs so
* all we need to do here is adjust ip so that the "break"
* instruction gets re-executed.
*/
ia64_decrement_ip(&scr->pt);
......
......@@ -257,31 +257,6 @@ smp_setup_percpu_timer (void)
local_cpu_data->prof_multiplier = 1;
}
/*
* Architecture specific routine called by the kernel just before init is
* fired off. This allows the BP to have everything in order [we hope].
* At the end of this all the APs will hit the system scheduling and off
* we go. Each AP will jump through the kernel
* init into idle(). At this point the scheduler will one day take over
* and give them jobs to do. smp_callin is a standard routine
* we use to track CPUs as they power up.
*/
static volatile atomic_t smp_commenced = ATOMIC_INIT(0);
static void __init
smp_commence (void)
{
/*
* Lets the callins below out of their loop.
*/
Dprintk("Setting commenced=1, go go go\n");
wmb();
atomic_set(&smp_commenced, 1);
}
static void __init
smp_callin (void)
{
......@@ -361,7 +336,7 @@ fork_by_hand (void)
* don't care about the eip and regs settings since we'll never reschedule the
* forked task.
*/
return do_fork(CLONE_VM|CLONE_IDLETASK, 0, 0, 0);
return do_fork(CLONE_VM|CLONE_IDLETASK, 0, 0, 0, NULL);
}
static int __init
......
......@@ -19,6 +19,12 @@
#include <asm/shmparam.h>
#include <asm/uaccess.h>
#ifdef CONFIG_HUGETLB_PAGE
# define SHMLBA_HPAGE HPAGE_SIZE
# define COLOR_HALIGN(addr) (((addr) + SHMLBA_HPAGE - 1) & ~(SHMLBA_HPAGE - 1))
# define TASK_HPAGE_BASE ((REGION_HPAGE << REGION_SHIFT) | HPAGE_SIZE)
#endif
unsigned long
arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags)
......@@ -56,16 +62,14 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len
}
asmlinkage long
ia64_getpriority (int which, int who, long arg2, long arg3, long arg4, long arg5, long arg6,
long arg7, long stack)
ia64_getpriority (int which, int who)
{
struct pt_regs *regs = (struct pt_regs *) &stack;
extern long sys_getpriority (int, int);
long prio;
prio = sys_getpriority(which, who);
if (prio >= 0) {
regs->r8 = 0; /* ensure negative priority is not mistaken as error code */
force_successful_syscall_return();
prio = 20 - prio;
}
return prio;
......@@ -79,10 +83,8 @@ sys_getpagesize (void)
}
asmlinkage unsigned long
ia64_shmat (int shmid, void *shmaddr, int shmflg, long arg3, long arg4, long arg5, long arg6,
long arg7, long stack)
ia64_shmat (int shmid, void *shmaddr, int shmflg)
{
struct pt_regs *regs = (struct pt_regs *) &stack;
unsigned long raddr;
int retval;
......@@ -90,16 +92,14 @@ ia64_shmat (int shmid, void *shmaddr, int shmflg, long arg3, long arg4, long arg
if (retval < 0)
return retval;
regs->r8 = 0; /* ensure negative addresses are not mistaken as an error code */
force_successful_syscall_return();
return raddr;
}
asmlinkage unsigned long
ia64_brk (unsigned long brk, long arg1, long arg2, long arg3,
long arg4, long arg5, long arg6, long arg7, long stack)
ia64_brk (unsigned long brk)
{
extern int vm_enough_memory (long pages);
struct pt_regs *regs = (struct pt_regs *) &stack;
unsigned long rlim, retval, newbrk, oldbrk;
struct mm_struct *mm = current->mm;
......@@ -145,7 +145,7 @@ ia64_brk (unsigned long brk, long arg1, long arg2, long arg3,
out:
retval = mm->brk;
up_write(&mm->mmap_sem);
regs->r8 = 0; /* ensure large retval isn't mistaken as error code */
force_successful_syscall_return();
return retval;
}
......@@ -222,32 +222,98 @@ out: if (file)
* of) files that are larger than the address space of the CPU.
*/
asmlinkage unsigned long
sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff,
long arg6, long arg7, long stack)
sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff)
{
struct pt_regs *regs = (struct pt_regs *) &stack;
addr = do_mmap2(addr, len, prot, flags, fd, pgoff);
if (!IS_ERR((void *) addr))
regs->r8 = 0; /* ensure large addresses are not mistaken as failures... */
force_successful_syscall_return();
return addr;
}
asmlinkage unsigned long
sys_mmap (unsigned long addr, unsigned long len, int prot, int flags,
int fd, long off, long arg6, long arg7, long stack)
sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, int fd, long off)
{
struct pt_regs *regs = (struct pt_regs *) &stack;
if ((off & ~PAGE_MASK) != 0)
return -EINVAL;
addr = do_mmap2(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
if (!IS_ERR((void *) addr))
regs->r8 = 0; /* ensure large addresses are not mistaken as failures... */
force_successful_syscall_return();
return addr;
}
#ifdef CONFIG_HUGETLB_PAGE
asmlinkage unsigned long
sys_alloc_hugepages (int key, unsigned long addr, size_t len, int prot, int flag)
{
struct mm_struct *mm = current->mm;
long retval;
extern int alloc_hugetlb_pages (int, unsigned long, unsigned long, int, int);
if ((key < 0) || (len & (HPAGE_SIZE - 1)))
return -EINVAL;
if (addr && ((REGION_NUMBER(addr) != REGION_HPAGE) || (addr & (HPAGE_SIZE - 1))))
addr = TASK_HPAGE_BASE;
if (!addr)
addr = TASK_HPAGE_BASE;
down_write(&mm->mmap_sem);
{
retval = arch_get_unmapped_area(NULL, COLOR_HALIGN(addr), len, 0, 0);
if (retval != -ENOMEM)
retval = alloc_hugetlb_pages(key, retval, len, prot, flag);
}
up_write(&mm->mmap_sem);
if (IS_ERR((void *) retval))
return retval;
force_successful_syscall_return();
return retval;
}
asmlinkage int
sys_free_hugepages (unsigned long addr)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
extern int free_hugepages(struct vm_area_struct *);
int retval;
vma = find_vma(mm, addr);
if (!vma || !is_vm_hugetlb_page(vma) || (vma->vm_start != addr))
return -EINVAL;
down_write(&mm->mmap_sem);
{
spin_lock(&mm->page_table_lock);
{
retval = free_hugepages(vma);
}
spin_unlock(&mm->page_table_lock);
}
up_write(&mm->mmap_sem);
return retval;
}
#else /* !CONFIG_HUGETLB_PAGE */
asmlinkage unsigned long
sys_alloc_hugepages (int key, size_t addr, unsigned long len, int prot, int flag)
{
return -ENOSYS;
}
asmlinkage unsigned long
sys_free_hugepages (unsigned long addr)
{
return -ENOSYS;
}
#endif /* !CONFIG_HUGETLB_PAGE */
asmlinkage long
sys_vm86 (long arg0, long arg1, long arg2, long arg3)
{
......@@ -256,16 +322,14 @@ sys_vm86 (long arg0, long arg1, long arg2, long arg3)
}
asmlinkage unsigned long
ia64_create_module (const char *name_user, size_t size, long arg2, long arg3,
long arg4, long arg5, long arg6, long arg7, long stack)
ia64_create_module (const char *name_user, size_t size)
{
extern unsigned long sys_create_module (const char *, size_t);
struct pt_regs *regs = (struct pt_regs *) &stack;
unsigned long addr;
addr = sys_create_module (name_user, size);
if (!IS_ERR((void *) addr))
regs->r8 = 0; /* ensure large addresses are not mistaken as failures... */
force_successful_syscall_return();
return addr;
}
......
......@@ -104,7 +104,8 @@ do_settimeofday (struct timeval *tv)
tv->tv_sec--;
}
xtime = *tv;
xtime.tv_sec = tv->tv_sec;
xtime.tv_nsec = 1000 * tv->tv_usec;
time_adjust = 0; /* stop active adjtime() */
time_status |= STA_UNSYNC;
time_maxerror = NTP_PHASE_LIMIT;
......@@ -135,7 +136,7 @@ do_gettimeofday (struct timeval *tv)
} while (cmpxchg(&last_time_offset, old, usec) != old);
sec = xtime.tv_sec;
usec += xtime.tv_usec;
usec += xtime.tv_nsec / 1000;
}
read_unlock_irqrestore(&xtime_lock, flags);
......
......@@ -15,6 +15,7 @@ obj-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \
obj-$(CONFIG_ITANIUM) += copy_page.o copy_user.o memcpy.o
obj-$(CONFIG_MCKINLEY) += copy_page_mck.o memcpy_mck.o
obj-$(CONFIG_PERFMON) += carta_random.o
IGNORE_FLAGS_OBJS = __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \
__divdi3.o __udivdi3.o __moddi3.o __umoddi3.o
......
/*
* Fast, simple, yet decent quality random number generator based on
* a paper by David G. Carta ("Two Fast Implementations of the
* `Minimal Standard' Random Number Generator," Communications of the
* ACM, January, 1990).
*
* Copyright (C) 2002 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
*/
#include <asm/asmmacro.h>
#define a r2
#define m r3
#define lo r8
#define hi r9
#define t0 r16
#define t1 r17
#define seed r32
GLOBAL_ENTRY(carta_random32)
movl a = (16807 << 16) | 16807
;;
pmpyshr2.u t0 = a, seed, 0
pmpyshr2.u t1 = a, seed, 16
;;
unpack2.l t0 = t1, t0
dep m = -1, r0, 0, 31
;;
zxt4 lo = t0
shr.u hi = t0, 32
;;
dep t0 = 0, hi, 15, 49 // t0 = (hi & 0x7fff)
;;
shl t0 = t0, 16 // t0 = (hi & 0x7fff) << 16
shr t1 = hi, 15 // t1 = (hi >> 15)
;;
add lo = lo, t0
;;
cmp.gtu p6, p0 = lo, m
;;
(p6) and lo = lo, m
;;
(p6) add lo = 1, lo
;;
add lo = lo, t1
;;
cmp.gtu p6, p0 = lo, m
;;
(p6) and lo = lo, m
;;
(p6) add lo = 1, lo
br.ret.sptk.many rp
END(carta_random32)
......@@ -10,5 +10,6 @@
O_TARGET := mm.o
obj-y := init.o fault.o tlb.o extable.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
include $(TOPDIR)/Rules.make
/*
* IA-64 Huge TLB Page Support for Kernel.
*
* Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
*/
#include <linux/config.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/smp_lock.h>
#include <linux/slab.h>
#include <asm/mman.h>
#include <asm/pgalloc.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
static struct vm_operations_struct hugetlb_vm_ops;
struct list_head htlbpage_freelist;
spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED;
extern long htlbpagemem;
static void zap_hugetlb_resources (struct vm_area_struct *);
#define MAX_ID 32
struct htlbpagekey {
struct inode *in;
int key;
} htlbpagek[MAX_ID];
static struct inode *
find_key_inode(int key)
{
int i;
for (i = 0; i < MAX_ID; i++) {
if (htlbpagek[i].key == key)
return (htlbpagek[i].in);
}
return NULL;
}
static struct page *
alloc_hugetlb_page (void)
{
struct list_head *curr, *head;
struct page *page;
spin_lock(&htlbpage_lock);
head = &htlbpage_freelist;
curr = head->next;
if (curr == head) {
spin_unlock(&htlbpage_lock);
return NULL;
}
page = list_entry(curr, struct page, list);
list_del(curr);
htlbpagemem--;
spin_unlock(&htlbpage_lock);
set_page_count(page, 1);
memset(page_address(page), 0, HPAGE_SIZE);
return page;
}
static void
free_hugetlb_page (struct page *page)
{
spin_lock(&htlbpage_lock);
if ((page->mapping != NULL) && (page_count(page) == 2)) {
struct inode *inode = page->mapping->host;
int i;
ClearPageDirty(page);
remove_from_page_cache(page);
set_page_count(page, 1);
if ((inode->i_size -= HPAGE_SIZE) == 0) {
for (i = 0; i < MAX_ID; i++)
if (htlbpagek[i].key == inode->i_ino) {
htlbpagek[i].key = 0;
htlbpagek[i].in = NULL;
break;
}
kfree(inode);
}
}
if (put_page_testzero(page)) {
list_add(&page->list, &htlbpage_freelist);
htlbpagemem++;
}
spin_unlock(&htlbpage_lock);
}
static pte_t *
huge_pte_alloc (struct mm_struct *mm, unsigned long addr)
{
unsigned long taddr = htlbpage_to_page(addr);
pgd_t *pgd;
pmd_t *pmd;
pte_t *pte = NULL;
pgd = pgd_offset(mm, taddr);
pmd = pmd_alloc(mm, pgd, taddr);
if (pmd)
pte = pte_alloc_map(mm, pmd, taddr);
return pte;
}
static pte_t *
huge_pte_offset (struct mm_struct *mm, unsigned long addr)
{
unsigned long taddr = htlbpage_to_page(addr);
pgd_t *pgd;
pmd_t *pmd;
pte_t *pte = NULL;
pgd = pgd_offset(mm, taddr);
pmd = pmd_offset(pgd, taddr);
pte = pte_offset_map(pmd, taddr);
return pte;
}
#define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; }
static void
set_huge_pte (struct mm_struct *mm, struct vm_area_struct *vma,
struct page *page, pte_t * page_table, int write_access)
{
pte_t entry;
mm->rss += (HPAGE_SIZE / PAGE_SIZE);
if (write_access) {
entry =
pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
} else
entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
entry = pte_mkyoung(entry);
mk_pte_huge(entry);
set_pte(page_table, entry);
return;
}
static int
anon_get_hugetlb_page (struct mm_struct *mm, struct vm_area_struct *vma,
int write_access, pte_t * page_table)
{
struct page *page;
page = alloc_hugetlb_page();
if (page == NULL)
return -1;
set_huge_pte(mm, vma, page, page_table, write_access);
return 1;
}
static int
make_hugetlb_pages_present (unsigned long addr, unsigned long end, int flags)
{
int write;
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
pte_t *pte;
vma = find_vma(mm, addr);
if (!vma)
goto out_error1;
write = (vma->vm_flags & VM_WRITE) != 0;
if ((vma->vm_end - vma->vm_start) & (HPAGE_SIZE - 1))
goto out_error1;
spin_lock(&mm->page_table_lock);
do {
pte = huge_pte_alloc(mm, addr);
if ((pte) && (pte_none(*pte))) {
if (anon_get_hugetlb_page(mm, vma, write ? VM_WRITE : VM_READ, pte) == -1)
goto out_error;
} else
goto out_error;
addr += HPAGE_SIZE;
} while (addr < end);
spin_unlock(&mm->page_table_lock);
vma->vm_flags |= (VM_HUGETLB | VM_RESERVED);
if (flags & MAP_PRIVATE)
vma->vm_flags |= VM_DONTCOPY;
vma->vm_ops = &hugetlb_vm_ops;
return 0;
out_error:
if (addr > vma->vm_start) {
vma->vm_end = addr;
zap_hugetlb_resources(vma);
vma->vm_end = end;
}
spin_unlock(&mm->page_table_lock);
out_error1:
return -1;
}
int
copy_hugetlb_page_range (struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma)
{
pte_t *src_pte, *dst_pte, entry;
struct page *ptepage;
unsigned long addr = vma->vm_start;
unsigned long end = vma->vm_end;
while (addr < end) {
dst_pte = huge_pte_alloc(dst, addr);
if (!dst_pte)
goto nomem;
src_pte = huge_pte_offset(src, addr);
entry = *src_pte;
ptepage = pte_page(entry);
get_page(ptepage);
set_pte(dst_pte, entry);
dst->rss += (HPAGE_SIZE / PAGE_SIZE);
addr += HPAGE_SIZE;
}
return 0;
nomem:
return -ENOMEM;
}
int
follow_hugetlb_page (struct mm_struct *mm, struct vm_area_struct *vma,
struct page **pages, struct vm_area_struct **vmas,
unsigned long *st, int *length, int i)
{
pte_t *ptep, pte;
unsigned long start = *st;
unsigned long pstart;
int len = *length;
struct page *page;
do {
pstart = start & HPAGE_MASK;
ptep = huge_pte_offset(mm, start);
pte = *ptep;
back1:
page = pte_page(pte);
if (pages) {
page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT);
pages[i] = page;
}
if (vmas)
vmas[i] = vma;
i++;
len--;
start += PAGE_SIZE;
if (((start & HPAGE_MASK) == pstart) && len
&& (start < vma->vm_end))
goto back1;
} while (len && start < vma->vm_end);
*length = len;
*st = start;
return i;
}
static void
zap_hugetlb_resources (struct vm_area_struct *mpnt)
{
struct mm_struct *mm = mpnt->vm_mm;
unsigned long len, addr, end;
pte_t *ptep;
struct page *page;
addr = mpnt->vm_start;
end = mpnt->vm_end;
len = end - addr;
do {
ptep = huge_pte_offset(mm, addr);
page = pte_page(*ptep);
pte_clear(ptep);
free_hugetlb_page(page);
addr += HPAGE_SIZE;
} while (addr < end);
mm->rss -= (len >> PAGE_SHIFT);
mpnt->vm_ops = NULL;
flush_tlb_range(mpnt, end - len, end);
}
static void
unlink_vma (struct vm_area_struct *mpnt)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
vma = mm->mmap;
if (vma == mpnt) {
mm->mmap = vma->vm_next;
} else {
while (vma->vm_next != mpnt) {
vma = vma->vm_next;
}
vma->vm_next = mpnt->vm_next;
}
rb_erase(&mpnt->vm_rb, &mm->mm_rb);
mm->mmap_cache = NULL;
mm->map_count--;
}
int
free_hugepages (struct vm_area_struct *mpnt)
{
unlink_vma(mpnt);
zap_hugetlb_resources(mpnt);
kmem_cache_free(vm_area_cachep, mpnt);
return 1;
}
static struct inode *
set_new_inode (unsigned long len, int prot, int flag, int key)
{
struct inode *inode;
int i;
for (i = 0; i < MAX_ID; i++) {
if (htlbpagek[i].key == 0)
break;
}
if (i == MAX_ID)
return NULL;
inode = kmalloc(sizeof (struct inode), GFP_ATOMIC);
if (inode == NULL)
return NULL;
inode_init_once(inode);
atomic_inc(&inode->i_writecount);
inode->i_mapping = &inode->i_data;
inode->i_mapping->host = inode;
inode->i_ino = (unsigned long) key;
htlbpagek[i].key = key;
htlbpagek[i].in = inode;
inode->i_uid = current->fsuid;
inode->i_gid = current->fsgid;
inode->i_mode = prot;
inode->i_size = len;
return inode;
}
static int
check_size_prot (struct inode *inode, unsigned long len, int prot, int flag)
{
if (inode->i_uid != current->fsuid)
return -1;
if (inode->i_gid != current->fsgid)
return -1;
if (inode->i_size != len)
return -1;
return 0;
}
int
alloc_shared_hugetlb_pages (int key, unsigned long addr, unsigned long len, int prot, int flag)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct inode *inode;
struct address_space *mapping;
struct page *page;
int idx;
int retval = -ENOMEM;
int newalloc = 0;
try_again:
spin_lock(&htlbpage_lock);
inode = find_key_inode(key);
if (inode == NULL) {
if (!capable(CAP_SYS_ADMIN)) {
if (!in_group_p(0)) {
retval = -EPERM;
goto out_err;
}
}
if (!(flag & IPC_CREAT)) {
retval = -ENOENT;
goto out_err;
}
inode = set_new_inode(len, prot, flag, key);
if (inode == NULL)
goto out_err;
newalloc = 1;
} else {
if (check_size_prot(inode, len, prot, flag) < 0) {
retval = -EINVAL;
goto out_err;
}
else if (atomic_read(&inode->i_writecount)) {
spin_unlock(&htlbpage_lock);
goto try_again;
}
}
spin_unlock(&htlbpage_lock);
mapping = inode->i_mapping;
addr = do_mmap_pgoff(NULL, addr, len, (unsigned long) prot,
MAP_NORESERVE|MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, 0);
if (IS_ERR((void *) addr))
goto freeinode;
vma = find_vma(mm, addr);
if (!vma) {
retval = -EINVAL;
goto freeinode;
}
spin_lock(&mm->page_table_lock);
do {
pte_t *pte = huge_pte_alloc(mm, addr);
if ((pte) && (pte_none(*pte))) {
idx = (addr - vma->vm_start) >> HPAGE_SHIFT;
page = find_get_page(mapping, idx);
if (page == NULL) {
page = alloc_hugetlb_page();
if (page == NULL)
goto out;
add_to_page_cache(page, mapping, idx);
}
set_huge_pte(mm, vma, page, pte,
(vma->vm_flags & VM_WRITE));
} else
goto out;
addr += HPAGE_SIZE;
} while (addr < vma->vm_end);
retval = 0;
vma->vm_flags |= (VM_HUGETLB | VM_RESERVED);
vma->vm_ops = &hugetlb_vm_ops;
spin_unlock(&mm->page_table_lock);
spin_lock(&htlbpage_lock);
atomic_set(&inode->i_writecount, 0);
spin_unlock(&htlbpage_lock);
return retval;
out:
if (addr > vma->vm_start) {
unsigned long raddr = vma->vm_end;
vma->vm_end = addr;
zap_hugetlb_resources(vma);
vma->vm_end = raddr;
}
spin_unlock(&mm->page_table_lock);
do_munmap(mm, vma->vm_start, len);
if (newalloc)
goto freeinode;
return retval;
out_err:
spin_unlock(&htlbpage_lock);
freeinode:
if (newalloc) {
for (idx = 0; idx < MAX_ID; idx++)
if (htlbpagek[idx].key == inode->i_ino) {
htlbpagek[idx].key = 0;
htlbpagek[idx].in = NULL;
break;
}
kfree(inode);
}
return retval;
}
static int
alloc_private_hugetlb_pages (int key, unsigned long addr, unsigned long len, int prot, int flag)
{
if (!capable(CAP_SYS_ADMIN)) {
if (!in_group_p(0))
return -EPERM;
}
addr = do_mmap_pgoff(NULL, addr, len, prot,
MAP_NORESERVE | MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, 0);
if (IS_ERR((void *) addr))
return -ENOMEM;
if (make_hugetlb_pages_present(addr, (addr + len), flag) < 0) {
do_munmap(current->mm, addr, len);
return -ENOMEM;
}
return 0;
}
int
alloc_hugetlb_pages (int key, unsigned long addr, unsigned long len, int prot, int flag)
{
if (key > 0)
return alloc_shared_hugetlb_pages(key, addr, len, prot, flag);
else
return alloc_private_hugetlb_pages(key, addr, len, prot, flag);
}
int
set_hugetlb_mem_size (int count)
{
int j, lcount;
struct page *page, *map;
extern long htlbzone_pages;
extern struct list_head htlbpage_freelist;
if (count < 0)
lcount = count;
else
lcount = count - htlbzone_pages;
if (lcount > 0) { /*Increase the mem size. */
while (lcount--) {
page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER);
if (page == NULL)
break;
map = page;
for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
SetPageReserved(map);
map++;
}
spin_lock(&htlbpage_lock);
list_add(&page->list, &htlbpage_freelist);
htlbpagemem++;
htlbzone_pages++;
spin_unlock(&htlbpage_lock);
}
return (int) htlbzone_pages;
}
/*Shrink the memory size. */
while (lcount++) {
page = alloc_hugetlb_page();
if (page == NULL)
break;
spin_lock(&htlbpage_lock);
htlbzone_pages--;
spin_unlock(&htlbpage_lock);
map = page;
for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
map->flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
1 << PG_private | 1<< PG_writeback);
map++;
}
set_page_count(page, 1);
__free_pages(page, HUGETLB_PAGE_ORDER);
}
return (int) htlbzone_pages;
}
static struct vm_operations_struct hugetlb_vm_ops = {
.close = zap_hugetlb_resources
};
......@@ -78,7 +78,7 @@ ia64_init_addr_space (void)
vma->vm_mm = current->mm;
vma->vm_start = IA64_RBS_BOT;
vma->vm_end = vma->vm_start + PAGE_SIZE;
vma->vm_page_prot = PAGE_COPY;
vma->vm_page_prot = protection_map[VM_READ | VM_WRITE];
vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE|VM_GROWSUP;
vma->vm_ops = NULL;
vma->vm_pgoff = 0;
......@@ -339,6 +339,14 @@ ia64_mmu_init (void *my_cpu_data)
/*
* Set up the page tables.
*/
#ifdef CONFIG_HUGETLB_PAGE
long htlbpagemem;
int htlbpage_max;
extern long htlbzone_pages;
extern struct list_head htlbpage_freelist;
#endif
void
paging_init (void)
{
......@@ -439,4 +447,29 @@ mem_init (void)
#ifdef CONFIG_IA32_SUPPORT
ia32_gdt_init();
#endif
#ifdef CONFIG_HUGETLB_PAGE
{
long i;
int j;
struct page *page, *map;
if ((htlbzone_pages << (HPAGE_SHIFT - PAGE_SHIFT)) >= max_low_pfn)
htlbzone_pages = (max_low_pfn >> ((HPAGE_SHIFT - PAGE_SHIFT) + 1));
INIT_LIST_HEAD(&htlbpage_freelist);
for (i = 0; i < htlbzone_pages; i++) {
page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER);
if (!page)
break;
map = page;
for (j = 0; j < (HPAGE_SIZE/PAGE_SIZE); j++) {
SetPageReserved(map);
map++;
}
list_add(&page->list, &htlbpage_freelist);
}
printk("Total Huge_TLB_Page memory pages allocated %ld \n", i);
htlbzone_pages = htlbpagemem = i;
htlbpage_max = (int)i;
}
#endif
}
......@@ -63,7 +63,7 @@ wrap_mmu_context (struct mm_struct *mm)
read_lock(&tasklist_lock);
repeat:
for_each_task(tsk) {
for_each_process(tsk) {
if (!tsk->mm)
continue;
tsk_context = tsk->mm->context;
......
......@@ -9,6 +9,7 @@ BEGIN {
print " */"
print ""
print "#define CLONE_IDLETASK_BIT 12"
print "#define CLONE_SETTLS_BIT 19"
}
# look for .tab:
......
......@@ -202,6 +202,7 @@ main (int argc, char **argv)
}
printf ("\n#define CLONE_IDLETASK_BIT %ld\n", ia64_fls (CLONE_IDLETASK));
printf ("\n#define CLONE_SETTLS_BIT %ld\n", ia64_fls (CLONE_SETTLS));
printf ("\n#endif /* _ASM_IA64_OFFSETS_H */\n");
return 0;
......
......@@ -83,6 +83,7 @@
#define hardirq_trylock() (!in_interrupt())
#define hardirq_endlock() do { } while (0)
#define in_atomic() (preempt_count() != 0)
#define irq_enter() (preempt_count() += HARDIRQ_OFFSET)
#if CONFIG_PREEMPT
......
......@@ -90,20 +90,6 @@ ide_init_default_hwifs (void)
#endif
}
#define ide_request_irq(irq,hand,flg,dev,id) request_irq((irq),(hand),(flg),(dev),(id))
#define ide_free_irq(irq,dev_id) free_irq((irq), (dev_id))
#define ide_check_region(from,extent) check_region((from), (extent))
#define ide_request_region(from,extent,name) request_region((from), (extent), (name))
#define ide_release_region(from,extent) release_region((from), (extent))
/*
* The following are not needed for the non-m68k ports
*/
#define ide_ack_intr(hwif) (1)
#define ide_fix_driveid(id) do {} while (0)
#define ide_release_lock(lock) do {} while (0)
#define ide_get_lock(lock, hdlr, data) do {} while (0)
#endif /* __KERNEL__ */
#endif /* __ASM_IA64_IDE_H */
......@@ -110,6 +110,10 @@ reload_context (struct mm_struct *mm)
rr2 = rr0 + 2*rid_incr;
rr3 = rr0 + 3*rid_incr;
rr4 = rr0 + 4*rid_incr;
#ifdef CONFIG_HUGETLB_PAGE
rr4 = (rr4 & (~(0xfcUL))) | (HPAGE_SHIFT << 2);
#endif
ia64_set_rr(0x0000000000000000, rr0);
ia64_set_rr(0x2000000000000000, rr1);
ia64_set_rr(0x4000000000000000, rr2);
......
......@@ -30,6 +30,32 @@
#define PAGE_MASK (~(PAGE_SIZE - 1))
#define PAGE_ALIGN(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
#ifdef CONFIG_HUGETLB_PAGE
# if defined(CONFIG_HUGETLB_PAGE_SIZE_4GB)
# define HPAGE_SHIFT 32
# elif defined(CONFIG_HUGETLB_PAGE_SIZE_256MB)
# define HPAGE_SHIFT 28
# elif defined(CONFIG_HUGETLB_PAGE_SIZE_64MB)
# define HPAGE_SHIFT 26
# elif defined(CONFIG_HUGETLB_PAGE_SIZE_16MB)
# define HPAGE_SHIFT 24
# elif defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
# define HPAGE_SHIFT 22
# elif defined(CONFIG_HUGETLB_PAGE_SIZE_1MB)
# define HPAGE_SHIFT 20
# elif defined(CONFIG_HUGETLB_PAGE_SIZE_256KB)
# define HPAGE_SHIFT 18
# else
# error Unsupported IA-64 HugeTLB Page Size!
# endif
# define REGION_HPAGE (4UL) /* note: this is hardcoded in mmu_context.h:reload_context()!*/
# define REGION_SHIFT 61
# define HPAGE_SIZE (__IA64_UL_CONST(1) << HPAGE_SHIFT)
# define HPAGE_MASK (~(HPAGE_SIZE - 1))
#endif /* CONFIG_HUGETLB_PAGE */
#ifdef __ASSEMBLY__
# define __pa(x) ((x) - PAGE_OFFSET)
# define __va(x) ((x) + PAGE_OFFSET)
......@@ -87,6 +113,12 @@ typedef union ia64_va {
#define REGION_SIZE REGION_NUMBER(1)
#define REGION_KERNEL 7
#ifdef CONFIG_HUGETLB_PAGE
# define htlbpage_to_page(x) ((REGION_NUMBER(x) << 61) \
| (REGION_OFFSET(x) >> (HPAGE_SHIFT-PAGE_SHIFT)))
# define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
#endif
#if (__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
# define ia64_abort() __builtin_trap()
#else
......
......@@ -78,6 +78,7 @@
#ifndef __ASSEMBLY__
#include <linux/types.h>
#include <asm/fpu.h>
/*
* Data types needed to pass information into PAL procedures and
......@@ -649,12 +650,43 @@ extern struct ia64_pal_retval ia64_pal_call_static (u64, u64, u64, u64, u64);
extern struct ia64_pal_retval ia64_pal_call_stacked (u64, u64, u64, u64);
extern struct ia64_pal_retval ia64_pal_call_phys_static (u64, u64, u64, u64);
extern struct ia64_pal_retval ia64_pal_call_phys_stacked (u64, u64, u64, u64);
#define PAL_CALL(iprv,a0,a1,a2,a3) iprv = ia64_pal_call_static(a0, a1, a2, a3, 0)
#define PAL_CALL_IC_OFF(iprv,a0,a1,a2,a3) iprv = ia64_pal_call_static(a0, a1, a2, a3, 1)
#define PAL_CALL_STK(iprv,a0,a1,a2,a3) iprv = ia64_pal_call_stacked(a0, a1, a2, a3)
#define PAL_CALL_PHYS(iprv,a0,a1,a2,a3) iprv = ia64_pal_call_phys_static(a0, a1, a2, a3)
#define PAL_CALL_PHYS_STK(iprv,a0,a1,a2,a3) iprv = ia64_pal_call_phys_stacked(a0, a1, a2, a3)
extern void ia64_save_scratch_fpregs (struct ia64_fpreg *);
extern void ia64_load_scratch_fpregs (struct ia64_fpreg *);
#define PAL_CALL(iprv,a0,a1,a2,a3) do { \
struct ia64_fpreg fr[6]; \
ia64_save_scratch_fpregs(fr); \
iprv = ia64_pal_call_static(a0, a1, a2, a3, 0); \
ia64_load_scratch_fpregs(fr); \
} while (0)
#define PAL_CALL_IC_OFF(iprv,a0,a1,a2,a3) do { \
struct ia64_fpreg fr[6]; \
ia64_save_scratch_fpregs(fr); \
iprv = ia64_pal_call_static(a0, a1, a2, a3, 1); \
ia64_load_scratch_fpregs(fr); \
} while (0)
#define PAL_CALL_STK(iprv,a0,a1,a2,a3) do { \
struct ia64_fpreg fr[6]; \
ia64_save_scratch_fpregs(fr); \
iprv = ia64_pal_call_stacked(a0, a1, a2, a3); \
ia64_load_scratch_fpregs(fr); \
} while (0)
#define PAL_CALL_PHYS(iprv,a0,a1,a2,a3) do { \
struct ia64_fpreg fr[6]; \
ia64_save_scratch_fpregs(fr); \
iprv = ia64_pal_call_phys_static(a0, a1, a2, a3); \
ia64_load_scratch_fpregs(fr); \
} while (0)
#define PAL_CALL_PHYS_STK(iprv,a0,a1,a2,a3) do { \
struct ia64_fpreg fr[6]; \
ia64_save_scratch_fpregs(fr); \
iprv = ia64_pal_call_phys_stacked(a0, a1, a2, a3); \
ia64_load_scratch_fpregs(fr); \
} while (0)
typedef int (*ia64_pal_handler) (u64, ...);
extern ia64_pal_handler ia64_pal;
......
......@@ -45,6 +45,7 @@
* PMC flags
*/
#define PFM_REGFL_OVFL_NOTIFY 0x1 /* send notification on overflow */
#define PFM_REGFL_RANDOM 0x2 /* randomize sampling periods */
/*
* PMD/PMC/IBR/DBR return flags (ignored on input)
......@@ -86,8 +87,10 @@ typedef struct {
unsigned long reg_short_reset;/* reset after counter overflow (small) */
unsigned long reg_reset_pmds[4]; /* which other counters to reset on overflow */
unsigned long reg_random_seed; /* seed value when randomization is used */
unsigned long reg_random_mask; /* bitmask used to limit random value */
unsigned long reserved[16]; /* for future use */
unsigned long reserved[14]; /* for future use */
} pfarg_reg_t;
typedef struct {
......@@ -132,28 +135,28 @@ typedef struct {
#define PFM_VERSION_MINOR(x) ((x) & 0xffff)
/*
* Entry header in the sampling buffer.
* The header is directly followed with the PMDS saved in increasing index
* order: PMD4, PMD5, .... How many PMDs are present is determined by the
* user program during context creation.
* Entry header in the sampling buffer. The header is directly followed
* with the PMDs saved in increasing index order: PMD4, PMD5, .... How
* many PMDs are present is determined by the user program during
* context creation.
*
* XXX: in this version of the entry, only up to 64 registers can be recorded
* This should be enough for quite some time. Always check sampling format
* before parsing entries!
* XXX: in this version of the entry, only up to 64 registers can be
* recorded. This should be enough for quite some time. Always check
* sampling format before parsing entries!
*
* Inn the case where multiple counters have overflowed at the same time, the
* rate field indicate the initial value of the first PMD, based on the index.
* For instance, if PMD2 and PMD5 have ovewrflowed for this entry, the rate field
* will show the initial value of PMD2.
* In the case where multiple counters overflow at the same time, the
* last_reset_value member indicates the initial value of the PMD with
* the smallest index. For instance, if PMD2 and PMD5 have overflowed,
* the last_reset_value member contains the initial value of PMD2.
*/
typedef struct {
int pid; /* identification of process */
int cpu; /* which cpu was used */
unsigned long rate; /* initial value of overflowed counter */
unsigned long stamp; /* timestamp */
unsigned long ip; /* where did the overflow interrupt happened */
unsigned long regs; /* bitmask of which registers overflowed */
unsigned long period; /* sampling period used by overflowed counter (smallest pmd index) */
int pid; /* identification of process */
int cpu; /* which cpu was used */
unsigned long last_reset_value; /* initial value of counter that overflowed */
unsigned long stamp; /* timestamp */
unsigned long ip; /* where did the overflow interrupt happened */
unsigned long regs; /* bitmask of which registers overflowed */
unsigned long period; /* unused */
} perfmon_smpl_entry_t;
extern int perfmonctl(pid_t pid, int cmd, void *arg, int narg);
......
......@@ -236,7 +236,15 @@ struct thread_struct {
__u64 ssd; /* IA32 stack selector descriptor */
__u64 old_k1; /* old value of ar.k1 */
__u64 old_iob; /* old IOBase value */
# define INIT_THREAD_IA32 0, 0, 0x17800000037fULL, 0, 0, 0, 0, 0, 0,
# define INIT_THREAD_IA32 .eflag = 0, \
.fsr = 0, \
.fcr = 0x17800000037fULL, \
.fir = 0, \
.fdr = 0, \
.csd = 0, \
.ssd = 0, \
.old_k1 = 0, \
.old_iob = 0,
#else
# define INIT_THREAD_IA32
#endif /* CONFIG_IA32_SUPPORT */
......@@ -248,7 +256,13 @@ struct thread_struct {
atomic_t pfm_notifiers_check; /* when >0, will cleanup ctx_notify_task in tasklist */
atomic_t pfm_owners_check; /* when >0, will cleanup ctx_owner in tasklist */
void *pfm_smpl_buf_list; /* list of sampling buffers to vfree */
# define INIT_THREAD_PM {0, }, {0, }, 0, NULL, {0}, {0}, NULL,
# define INIT_THREAD_PM .pmc = {0, }, \
.pmd = {0, }, \
.pfm_ovfl_block_reset = 0, \
.pfm_context = NULL, \
.pfm_notifiers_check = { 0 }, \
.pfm_owners_check = { 0 }, \
.pfm_smpl_buf_list = NULL,
#else
# define INIT_THREAD_PM
#endif
......@@ -258,16 +272,17 @@ struct thread_struct {
};
#define INIT_THREAD { \
flags: 0, \
ksp: 0, \
map_base: DEFAULT_MAP_BASE, \
task_size: DEFAULT_TASK_SIZE, \
siginfo: 0, \
.flags = 0, \
.ksp = 0, \
.map_base = DEFAULT_MAP_BASE, \
.task_size = DEFAULT_TASK_SIZE, \
.siginfo = 0, \
.last_fph_cpu = 0, \
INIT_THREAD_IA32 \
INIT_THREAD_PM \
dbr: {0, }, \
ibr: {0, }, \
fph: {{{{0}}}, } \
.dbr = {0, }, \
.ibr = {0, }, \
.fph = {{{{0}}}, } \
}
#define start_thread(regs,new_ip,new_sp) do { \
......
......@@ -37,9 +37,9 @@ ia64_rse_rnat_addr (unsigned long *slot_addr)
}
/*
* Calcuate the number of registers in the dirty partition starting at
* BSPSTORE with a size of DIRTY bytes. This isn't simply DIRTY
* divided by eight because the 64th slot is used to store ar.rnat.
* Calculate the number of registers in the dirty partition starting at BSPSTORE and
* ending at BSP. This isn't simply (BSP-BSPSTORE)/8 because every 64th slot stores
* ar.rnat.
*/
static __inline__ unsigned long
ia64_rse_num_regs (unsigned long *bspstore, unsigned long *bsp)
......
......@@ -38,9 +38,12 @@ extern spinlock_t sal_lock;
# define SAL_CALL(result,args...) do { \
unsigned long flags; \
struct ia64_fpreg fr[6]; \
ia64_save_scratch_fpregs(fr); \
spin_lock_irqsave(&sal_lock, flags); \
__SAL_CALL(result,args); \
spin_unlock_irqrestore(&sal_lock, flags); \
ia64_load_scratch_fpregs(fr); \
} while (0)
#define SAL_SET_VECTORS 0x01000000
......
......@@ -66,6 +66,7 @@ typedef struct siginfo {
long _band; /* POLL_IN, POLL_OUT, POLL_MSG (XPG requires a "long") */
int _fd;
} _sigpoll;
/* SIGPROF */
struct {
pid_t _pid; /* which child */
......
......@@ -166,7 +166,6 @@ struct k_sigaction {
# include <asm/sigcontext.h>
#define HAVE_ARCH_GET_SIGNAL_TO_DELIVER
#define HAVE_ARCH_SYS_PAUSE
#endif /* __KERNEL__ */
......
......@@ -108,7 +108,8 @@ typedef struct {
} rwlock_t;
#define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 }
#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
#define rwlock_is_locked(x) (*(volatile int *) (x) != 0)
#define _raw_read_lock(rw) \
do { \
......
/* dummy (must be non-empty to prevent prejudicial removal...) */
......@@ -148,7 +148,7 @@ do { \
"cmp.ne p6,p7=%1,r0;;" \
"(p6) ssm psr.i;" \
"(p7) rsm psr.i;;" \
"srlz.d" \
"(p6) srlz.d" \
: "=&r" (old_psr) : "r"((psr) & IA64_PSR_I) \
: "p6", "p7", "memory"); \
if ((old_psr & IA64_PSR_I) && !(psr & IA64_PSR_I)) { \
......@@ -174,6 +174,13 @@ do { \
#define local_irq_enable() __asm__ __volatile__ (";; ssm psr.i;; srlz.d" ::: "memory")
#define local_save_flags(flags) __asm__ __volatile__ ("mov %0=psr" : "=r" (flags) :: "memory")
#define irqs_disabled() \
({ \
unsigned long flags; \
local_save_flags(flags); \
(flags & IA64_PSR_I) == 0; \
})
/*
* Force an unresolved reference if someone tries to use
* ia64_fetch_and_add() with a bad value.
......@@ -367,14 +374,14 @@ struct task_struct;
extern void ia64_save_extra (struct task_struct *task);
extern void ia64_load_extra (struct task_struct *task);
#if defined(CONFIG_SMP) && defined(CONFIG_PERFMON)
#ifdef CONFIG_PERFMON
DECLARE_PER_CPU(int, pfm_syst_wide);
# define PERFMON_IS_SYSWIDE() (get_cpu_var(pfm_syst_wide) != 0)
#else
# define PERFMON_IS_SYSWIDE() (0)
#endif
#define __switch_to(prev,next,last) do { \
#define __switch_to(prev,next,last) do { \
if (((prev)->thread.flags & (IA64_THREAD_DBG_VALID|IA64_THREAD_PM_VALID)) \
|| IS_IA32_PROCESS(ia64_task_regs(prev)) || PERFMON_IS_SYSWIDE()) \
ia64_save_extra(prev); \
......
......@@ -3,15 +3,19 @@
/*
* Copyright (C) 1998-2001 Hewlett-Packard Co
* Copyright (C) 1998-2001 David Mosberger-Tang <davidm@hpl.hp.com>
* David Mosberger-Tang <davidm@hpl.hp.com>
*/
/*
* 2001/01/18 davidm Removed CLOCK_TICK_RATE. It makes no sense on IA-64.
* Also removed cacheflush_time as it's entirely unused.
*/
#include <asm/processor.h>
typedef unsigned long cycles_t;
#define CLOCK_TICK_RATE 100000000
static inline cycles_t
get_cycles (void)
{
......
......@@ -199,4 +199,8 @@ tlb_remove_page (mmu_gather_t *tlb, struct page *page)
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
#define tlb_remove_tlb_entry(tlb, ptep, addr) __tlb_remove_tlb_entry(tlb, ptep, addr)
#define pte_free_tlb(tlb, ptep) __pte_free_tlb(tlb, ptep)
#define pmd_free_tlb(tlb, ptep) __pmd_free_tlb(tlb, ptep)
#endif /* _ASM_IA64_TLB_H */
......@@ -225,7 +225,7 @@
#define __NR_security 1233
#define __NR_alloc_hugepages 1234
#define __NR_free_hugepages 1235
/* 1236 currently unused */
#define __NR_exit_group 1236
/* 1237 currently unused */
#define __NR_io_setup 1238
#define __NR_io_destroy 1239
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment