Commit 8b49c803 authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds

[PATCH] x86-64 merge for 2.6.0test3

Without these changes an x86-64 NUMA kernel won't boot in many
configurations.

The main change is the improved IOMMU code which supports merging of
mappings and has various bugfixes.

 - Update defconfig
 - Use argument ptregs in 32bit elf_core_copy_task_fpregs
 - Harden aperture fixup code: read aperture from the AGP bridge if needed,
   better error checking.
 - Support nmi_watchdog=panic to panic on watchdog trigger
 - IOMMU: Support panic on IOMMU overflow (iommu=panic)
 - IOMMU: Force SAC for mappings >40bits when iommu=force is active
   (this can potentially give better performance)
 - IOMMU: Cache northbridges for faster TLB flush
 - IOMMU: Fix SMP race in TLB flush
 - IOMMU: Merge pci_alloc_consistent and pci_map_single
 - IOMMU: Clean up leak tracing
 - IOMMU: Rewrite pci_map_sg, support merging of mappings
   On overflow fall back to piece-by-piece mapping.
 - IOMMU: Tell block layer to assume merging when iommu force is active
   (this gives better performance with MTP fusion, drawback is that the
   overflow/fragmentation handling of the IOMMU area is still a big
   dubious with that)
 - Fix/clean up per cpu data
 - Add 64bit clean time(2)
 - Export cpu_callout_map for IPv6
 - Handle nodes with no own memory in NUMA discovery.
   This fixes boot on various newer Opteron motherboards where the memory
   is only connected to a single CPU.
 - Fix fallback path for failed NUMA discovery. numnodes has to be reset.
 - Check for enabled nodes in NUMA discovery (Eric Biederman)
 - Remove NUMA emunodes support. Has badly bitrotted.
 - Add __clear_bit_string for IOMMU code
 - Add new 32bit system calls to ia32_unistd.h
 - Remove duplicate default_do_nmi prototype
 - Make PCI_DMA_BUS_IS_PHYS dependent on no_iommu
 - Fix padding length of siginfo_t to match glibc
 - More pci direct access functions.
parent 7d701696
...@@ -22,7 +22,7 @@ CONFIG_SWAP=y ...@@ -22,7 +22,7 @@ CONFIG_SWAP=y
CONFIG_SYSVIPC=y CONFIG_SYSVIPC=y
# CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_BSD_PROCESS_ACCT is not set
CONFIG_SYSCTL=y CONFIG_SYSCTL=y
CONFIG_LOG_BUF_SHIFT=16 CONFIG_LOG_BUF_SHIFT=18
# CONFIG_EMBEDDED is not set # CONFIG_EMBEDDED is not set
CONFIG_KALLSYMS=y CONFIG_KALLSYMS=y
CONFIG_FUTEX=y CONFIG_FUTEX=y
...@@ -149,17 +149,12 @@ CONFIG_LBD=y ...@@ -149,17 +149,12 @@ CONFIG_LBD=y
# ATA/ATAPI/MFM/RLL support # ATA/ATAPI/MFM/RLL support
# #
CONFIG_IDE=y CONFIG_IDE=y
#
# IDE, ATA and ATAPI Block devices
#
CONFIG_BLK_DEV_IDE=y CONFIG_BLK_DEV_IDE=y
# #
# Please see Documentation/ide.txt for help/info on IDE drives # Please see Documentation/ide.txt for help/info on IDE drives
# #
# CONFIG_BLK_DEV_HD_IDE is not set # CONFIG_BLK_DEV_HD_IDE is not set
# CONFIG_BLK_DEV_HD is not set
CONFIG_BLK_DEV_IDEDISK=y CONFIG_BLK_DEV_IDEDISK=y
CONFIG_IDEDISK_MULTI_MODE=y CONFIG_IDEDISK_MULTI_MODE=y
# CONFIG_IDEDISK_STROKE is not set # CONFIG_IDEDISK_STROKE is not set
...@@ -174,15 +169,16 @@ CONFIG_BLK_DEV_IDECD=y ...@@ -174,15 +169,16 @@ CONFIG_BLK_DEV_IDECD=y
# #
# CONFIG_BLK_DEV_CMD640 is not set # CONFIG_BLK_DEV_CMD640 is not set
CONFIG_BLK_DEV_IDEPCI=y CONFIG_BLK_DEV_IDEPCI=y
# CONFIG_BLK_DEV_GENERIC is not set
# CONFIG_IDEPCI_SHARE_IRQ is not set # CONFIG_IDEPCI_SHARE_IRQ is not set
# CONFIG_BLK_DEV_OFFBOARD is not set
# CONFIG_BLK_DEV_GENERIC is not set
# CONFIG_BLK_DEV_OPTI621 is not set
# CONFIG_BLK_DEV_RZ1000 is not set
CONFIG_BLK_DEV_IDEDMA_PCI=y CONFIG_BLK_DEV_IDEDMA_PCI=y
# CONFIG_BLK_DEV_IDE_TCQ is not set # CONFIG_BLK_DEV_IDE_TCQ is not set
# CONFIG_BLK_DEV_OFFBOARD is not set
# CONFIG_BLK_DEV_IDEDMA_FORCED is not set # CONFIG_BLK_DEV_IDEDMA_FORCED is not set
CONFIG_IDEDMA_PCI_AUTO=y CONFIG_IDEDMA_PCI_AUTO=y
# CONFIG_IDEDMA_ONLYDISK is not set # CONFIG_IDEDMA_ONLYDISK is not set
CONFIG_BLK_DEV_IDEDMA=y
# CONFIG_IDEDMA_PCI_WIP is not set # CONFIG_IDEDMA_PCI_WIP is not set
CONFIG_BLK_DEV_ADMA=y CONFIG_BLK_DEV_ADMA=y
# CONFIG_BLK_DEV_AEC62XX is not set # CONFIG_BLK_DEV_AEC62XX is not set
...@@ -192,23 +188,25 @@ CONFIG_BLK_DEV_AMD74XX=y ...@@ -192,23 +188,25 @@ CONFIG_BLK_DEV_AMD74XX=y
# CONFIG_BLK_DEV_TRIFLEX is not set # CONFIG_BLK_DEV_TRIFLEX is not set
# CONFIG_BLK_DEV_CY82C693 is not set # CONFIG_BLK_DEV_CY82C693 is not set
# CONFIG_BLK_DEV_CS5520 is not set # CONFIG_BLK_DEV_CS5520 is not set
# CONFIG_BLK_DEV_CS5530 is not set
# CONFIG_BLK_DEV_HPT34X is not set # CONFIG_BLK_DEV_HPT34X is not set
# CONFIG_BLK_DEV_HPT366 is not set # CONFIG_BLK_DEV_HPT366 is not set
# CONFIG_BLK_DEV_SC1200 is not set # CONFIG_BLK_DEV_SC1200 is not set
# CONFIG_BLK_DEV_PIIX is not set # CONFIG_BLK_DEV_PIIX is not set
# CONFIG_BLK_DEV_NS87415 is not set # CONFIG_BLK_DEV_NS87415 is not set
# CONFIG_BLK_DEV_OPTI621 is not set
# CONFIG_BLK_DEV_PDC202XX_OLD is not set # CONFIG_BLK_DEV_PDC202XX_OLD is not set
# CONFIG_BLK_DEV_PDC202XX_NEW is not set # CONFIG_BLK_DEV_PDC202XX_NEW is not set
# CONFIG_BLK_DEV_RZ1000 is not set
# CONFIG_BLK_DEV_SVWKS is not set # CONFIG_BLK_DEV_SVWKS is not set
# CONFIG_BLK_DEV_SIIMAGE is not set # CONFIG_BLK_DEV_SIIMAGE is not set
# CONFIG_BLK_DEV_SIS5513 is not set # CONFIG_BLK_DEV_SIS5513 is not set
# CONFIG_BLK_DEV_SLC90E66 is not set # CONFIG_BLK_DEV_SLC90E66 is not set
# CONFIG_BLK_DEV_TRM290 is not set # CONFIG_BLK_DEV_TRM290 is not set
# CONFIG_BLK_DEV_VIA82CXXX is not set # CONFIG_BLK_DEV_VIA82CXXX is not set
CONFIG_IDEDMA_AUTO=y CONFIG_BLK_DEV_IDEDMA=y
# CONFIG_IDEDMA_IVB is not set # CONFIG_IDEDMA_IVB is not set
CONFIG_IDEDMA_AUTO=y
# CONFIG_DMA_NONPCI is not set
# CONFIG_BLK_DEV_HD is not set
# #
# SCSI device support # SCSI device support
...@@ -251,7 +249,7 @@ CONFIG_BLK_DEV_SD=y ...@@ -251,7 +249,7 @@ CONFIG_BLK_DEV_SD=y
# CONFIG_SCSI_EATA_PIO is not set # CONFIG_SCSI_EATA_PIO is not set
# CONFIG_SCSI_FUTURE_DOMAIN is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set
# CONFIG_SCSI_GDTH is not set # CONFIG_SCSI_GDTH is not set
# CONFIG_SCSI_IPS is not set CONFIG_SCSI_IPS=m
# CONFIG_SCSI_INITIO is not set # CONFIG_SCSI_INITIO is not set
# CONFIG_SCSI_INIA100 is not set # CONFIG_SCSI_INIA100 is not set
# CONFIG_SCSI_SYM53C8XX_2 is not set # CONFIG_SCSI_SYM53C8XX_2 is not set
...@@ -301,7 +299,6 @@ CONFIG_NET=y ...@@ -301,7 +299,6 @@ CONFIG_NET=y
CONFIG_PACKET=y CONFIG_PACKET=y
# CONFIG_PACKET_MMAP is not set # CONFIG_PACKET_MMAP is not set
# CONFIG_NETLINK_DEV is not set # CONFIG_NETLINK_DEV is not set
# CONFIG_NETFILTER is not set
CONFIG_UNIX=y CONFIG_UNIX=y
# CONFIG_NET_KEY is not set # CONFIG_NET_KEY is not set
CONFIG_INET=y CONFIG_INET=y
...@@ -317,12 +314,10 @@ CONFIG_IP_MULTICAST=y ...@@ -317,12 +314,10 @@ CONFIG_IP_MULTICAST=y
# CONFIG_INET_AH is not set # CONFIG_INET_AH is not set
# CONFIG_INET_ESP is not set # CONFIG_INET_ESP is not set
# CONFIG_INET_IPCOMP is not set # CONFIG_INET_IPCOMP is not set
CONFIG_IPV6=y # CONFIG_IPV6 is not set
CONFIG_IPV6_PRIVACY=y # CONFIG_DECNET is not set
# CONFIG_INET6_AH is not set # CONFIG_BRIDGE is not set
# CONFIG_INET6_ESP is not set # CONFIG_NETFILTER is not set
# CONFIG_INET6_IPCOMP is not set
# CONFIG_IPV6_TUNNEL is not set
# CONFIG_XFRM_USER is not set # CONFIG_XFRM_USER is not set
# #
...@@ -333,8 +328,6 @@ CONFIG_IPV6_SCTP__=y ...@@ -333,8 +328,6 @@ CONFIG_IPV6_SCTP__=y
# CONFIG_ATM is not set # CONFIG_ATM is not set
# CONFIG_VLAN_8021Q is not set # CONFIG_VLAN_8021Q is not set
# CONFIG_LLC is not set # CONFIG_LLC is not set
# CONFIG_DECNET is not set
# CONFIG_BRIDGE is not set
# CONFIG_X25 is not set # CONFIG_X25 is not set
# CONFIG_LAPB is not set # CONFIG_LAPB is not set
# CONFIG_NET_DIVERT is not set # CONFIG_NET_DIVERT is not set
...@@ -546,11 +539,7 @@ CONFIG_UNIX98_PTY_COUNT=256 ...@@ -546,11 +539,7 @@ CONFIG_UNIX98_PTY_COUNT=256
# #
# IPMI # IPMI
# #
CONFIG_IPMI_HANDLER=y # CONFIG_IPMI_HANDLER is not set
CONFIG_IPMI_PANIC_EVENT=y
CONFIG_IPMI_DEVICE_INTERFACE=y
CONFIG_IPMI_KCS=y
CONFIG_IPMI_WATCHDOG=y
# #
# Watchdog Cards # Watchdog Cards
...@@ -570,12 +559,7 @@ CONFIG_RTC=y ...@@ -570,12 +559,7 @@ CONFIG_RTC=y
# CONFIG_FTAPE is not set # CONFIG_FTAPE is not set
CONFIG_AGP=y CONFIG_AGP=y
CONFIG_AGP_AMD_8151=y CONFIG_AGP_AMD_8151=y
CONFIG_DRM=y # CONFIG_DRM is not set
# CONFIG_DRM_TDFX is not set
# CONFIG_DRM_GAMMA is not set
# CONFIG_DRM_R128 is not set
CONFIG_DRM_RADEON=y
# CONFIG_DRM_MGA is not set
# CONFIG_MWAVE is not set # CONFIG_MWAVE is not set
CONFIG_RAW_DRIVER=y CONFIG_RAW_DRIVER=y
CONFIG_HANGCHECK_TIMER=y CONFIG_HANGCHECK_TIMER=y
...@@ -598,19 +582,25 @@ CONFIG_HANGCHECK_TIMER=y ...@@ -598,19 +582,25 @@ CONFIG_HANGCHECK_TIMER=y
# File systems # File systems
# #
CONFIG_EXT2_FS=y CONFIG_EXT2_FS=y
# CONFIG_EXT2_FS_XATTR is not set CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
# CONFIG_EXT2_FS_SECURITY is not set
CONFIG_EXT3_FS=y CONFIG_EXT3_FS=y
# CONFIG_EXT3_FS_XATTR is not set CONFIG_EXT3_FS_XATTR=y
CONFIG_EXT3_FS_POSIX_ACL=y
# CONFIG_EXT3_FS_SECURITY is not set
CONFIG_JBD=y CONFIG_JBD=y
# CONFIG_JBD_DEBUG is not set # CONFIG_JBD_DEBUG is not set
CONFIG_FS_MBCACHE=y
CONFIG_REISERFS_FS=y CONFIG_REISERFS_FS=y
# CONFIG_REISERFS_CHECK is not set # CONFIG_REISERFS_CHECK is not set
# CONFIG_REISERFS_PROC_INFO is not set # CONFIG_REISERFS_PROC_INFO is not set
# CONFIG_JFS_FS is not set CONFIG_JFS_FS=y
CONFIG_XFS_FS=m CONFIG_JFS_POSIX_ACL=y
# CONFIG_XFS_RT is not set # CONFIG_JFS_DEBUG is not set
# CONFIG_XFS_QUOTA is not set # CONFIG_JFS_STATISTICS is not set
# CONFIG_XFS_POSIX_ACL is not set CONFIG_FS_POSIX_ACL=y
# CONFIG_XFS_FS is not set
# CONFIG_MINIX_FS is not set # CONFIG_MINIX_FS is not set
# CONFIG_ROMFS_FS is not set # CONFIG_ROMFS_FS is not set
# CONFIG_QUOTA is not set # CONFIG_QUOTA is not set
...@@ -684,6 +674,49 @@ CONFIG_SUNRPC=y ...@@ -684,6 +674,49 @@ CONFIG_SUNRPC=y
# #
# CONFIG_PARTITION_ADVANCED is not set # CONFIG_PARTITION_ADVANCED is not set
CONFIG_MSDOS_PARTITION=y CONFIG_MSDOS_PARTITION=y
CONFIG_NLS=y
#
# Native Language Support
#
CONFIG_NLS_DEFAULT="iso8859-1"
# CONFIG_NLS_CODEPAGE_437 is not set
# CONFIG_NLS_CODEPAGE_737 is not set
# CONFIG_NLS_CODEPAGE_775 is not set
# CONFIG_NLS_CODEPAGE_850 is not set
# CONFIG_NLS_CODEPAGE_852 is not set
# CONFIG_NLS_CODEPAGE_855 is not set
# CONFIG_NLS_CODEPAGE_857 is not set
# CONFIG_NLS_CODEPAGE_860 is not set
# CONFIG_NLS_CODEPAGE_861 is not set
# CONFIG_NLS_CODEPAGE_862 is not set
# CONFIG_NLS_CODEPAGE_863 is not set
# CONFIG_NLS_CODEPAGE_864 is not set
# CONFIG_NLS_CODEPAGE_865 is not set
# CONFIG_NLS_CODEPAGE_866 is not set
# CONFIG_NLS_CODEPAGE_869 is not set
# CONFIG_NLS_CODEPAGE_936 is not set
# CONFIG_NLS_CODEPAGE_950 is not set
# CONFIG_NLS_CODEPAGE_932 is not set
# CONFIG_NLS_CODEPAGE_949 is not set
# CONFIG_NLS_CODEPAGE_874 is not set
# CONFIG_NLS_ISO8859_8 is not set
# CONFIG_NLS_CODEPAGE_1250 is not set
# CONFIG_NLS_CODEPAGE_1251 is not set
# CONFIG_NLS_ISO8859_1 is not set
# CONFIG_NLS_ISO8859_2 is not set
# CONFIG_NLS_ISO8859_3 is not set
# CONFIG_NLS_ISO8859_4 is not set
# CONFIG_NLS_ISO8859_5 is not set
# CONFIG_NLS_ISO8859_6 is not set
# CONFIG_NLS_ISO8859_7 is not set
# CONFIG_NLS_ISO8859_9 is not set
# CONFIG_NLS_ISO8859_13 is not set
# CONFIG_NLS_ISO8859_14 is not set
# CONFIG_NLS_ISO8859_15 is not set
# CONFIG_NLS_KOI8_R is not set
# CONFIG_NLS_KOI8_U is not set
# CONFIG_NLS_UTF8 is not set
# #
# Graphics support # Graphics support
...@@ -759,8 +792,10 @@ CONFIG_DEBUG_KERNEL=y ...@@ -759,8 +792,10 @@ CONFIG_DEBUG_KERNEL=y
CONFIG_MAGIC_SYSRQ=y CONFIG_MAGIC_SYSRQ=y
# CONFIG_DEBUG_SPINLOCK is not set # CONFIG_DEBUG_SPINLOCK is not set
# CONFIG_INIT_DEBUG is not set # CONFIG_INIT_DEBUG is not set
# CONFIG_DEBUG_INFO is not set
# CONFIG_FRAME_POINTER is not set # CONFIG_FRAME_POINTER is not set
# CONFIG_IOMMU_DEBUG is not set CONFIG_IOMMU_DEBUG=y
CONFIG_IOMMU_LEAK=y
CONFIG_MCE_DEBUG=y CONFIG_MCE_DEBUG=y
# #
...@@ -771,21 +806,7 @@ CONFIG_MCE_DEBUG=y ...@@ -771,21 +806,7 @@ CONFIG_MCE_DEBUG=y
# #
# Cryptographic options # Cryptographic options
# #
CONFIG_CRYPTO=y # CONFIG_CRYPTO is not set
# CONFIG_CRYPTO_HMAC is not set
# CONFIG_CRYPTO_NULL is not set
# CONFIG_CRYPTO_MD4 is not set
CONFIG_CRYPTO_MD5=y
# CONFIG_CRYPTO_SHA1 is not set
# CONFIG_CRYPTO_SHA256 is not set
# CONFIG_CRYPTO_SHA512 is not set
# CONFIG_CRYPTO_DES is not set
# CONFIG_CRYPTO_BLOWFISH is not set
# CONFIG_CRYPTO_TWOFISH is not set
# CONFIG_CRYPTO_SERPENT is not set
# CONFIG_CRYPTO_AES is not set
# CONFIG_CRYPTO_DEFLATE is not set
# CONFIG_CRYPTO_TEST is not set
# #
# Library routines # Library routines
......
...@@ -204,10 +204,9 @@ static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* ...@@ -204,10 +204,9 @@ static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t*
} }
static inline int static inline int
elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *xregs, elf_fpregset_t *fpu) elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpregset_t *fpu)
{ {
struct _fpstate_ia32 *fpstate = (void*)fpu; struct _fpstate_ia32 *fpstate = (void*)fpu;
struct pt_regs *regs = (struct pt_regs *)(tsk->thread.rsp0);
mm_segment_t oldfs = get_fs(); mm_segment_t oldfs = get_fs();
if (!tsk->used_math) if (!tsk->used_math)
......
...@@ -6,7 +6,7 @@ extra-y := head.o head64.o init_task.o ...@@ -6,7 +6,7 @@ extra-y := head.o head64.o init_task.o
EXTRA_AFLAGS := -traditional EXTRA_AFLAGS := -traditional
obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_x86_64.o \ ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_x86_64.o \
pci-dma.o x8664_ksyms.o i387.o syscall.o vsyscall.o \ x8664_ksyms.o i387.o syscall.o vsyscall.o \
setup64.o bluesmoke.o bootflag.o e820.o reboot.o warmreboot.o setup64.o bluesmoke.o bootflag.o e820.o reboot.o warmreboot.o
obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_MTRR) += mtrr/
...@@ -19,7 +19,8 @@ obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o ...@@ -19,7 +19,8 @@ obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o
obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o suspend_asm.o obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o suspend_asm.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o
obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_MODULES) += module.o
$(obj)/bootflag.c: $(obj)/bootflag.c:
......
/* /*
* Firmware replacement code. * Firmware replacement code.
* *
* Work around broken BIOSes that don't set an aperture. * Work around broken BIOSes that don't set an aperture or only set the
* The IOMMU code needs an aperture even who no AGP is present in the system. * aperture in the AGP bridge.
* Map the aperture over some low memory. This is cheaper than doing bounce * If all fails map the aperture over some low memory. This is cheaper than
* buffering. The memory is lost. This is done at early boot because only * doing bounce buffering. The memory is lost. This is done at early boot
* the bootmem allocator can allocate 32+MB. * because only the bootmem allocator can allocate 32+MB.
* *
* Copyright 2002 Andi Kleen, SuSE Labs. * Copyright 2002 Andi Kleen, SuSE Labs.
* $Id: aperture.c,v 1.2 2002/09/19 19:25:32 ak Exp $ * $Id: aperture.c,v 1.7 2003/08/01 03:36:18 ak Exp $
*/ */
#include <linux/config.h> #include <linux/config.h>
#include <linux/kernel.h> #include <linux/kernel.h>
...@@ -17,6 +17,8 @@ ...@@ -17,6 +17,8 @@
#include <linux/bootmem.h> #include <linux/bootmem.h>
#include <linux/mmzone.h> #include <linux/mmzone.h>
#include <linux/pci_ids.h> #include <linux/pci_ids.h>
#include <linux/pci.h>
#include <linux/bitops.h>
#include <asm/e820.h> #include <asm/e820.h>
#include <asm/io.h> #include <asm/io.h>
#include <asm/proto.h> #include <asm/proto.h>
...@@ -45,10 +47,10 @@ static u32 __init allocate_aperture(void) ...@@ -45,10 +47,10 @@ static u32 __init allocate_aperture(void)
aper_size = (32 * 1024 * 1024) << fallback_aper_order; aper_size = (32 * 1024 * 1024) << fallback_aper_order;
/* /*
* Aperture has to be naturally aligned it seems. This means an * Aperture has to be naturally aligned. This means an 2GB aperture won't
* 2GB aperture won't have much changes to succeed in the lower 4GB of * have much chances to find a place in the lower 4GB of memory.
* memory. Unfortunately we cannot move it up because that would make * Unfortunately we cannot move it up because that would make the
* the IOMMU useless. * IOMMU useless.
*/ */
p = __alloc_bootmem_node(nd0, aper_size, aper_size, 0); p = __alloc_bootmem_node(nd0, aper_size, aper_size, 0);
if (!p || __pa(p)+aper_size > 0xffffffff) { if (!p || __pa(p)+aper_size > 0xffffffff) {
...@@ -63,21 +65,136 @@ static u32 __init allocate_aperture(void) ...@@ -63,21 +65,136 @@ static u32 __init allocate_aperture(void)
return (u32)__pa(p); return (u32)__pa(p);
} }
static int __init aperture_valid(char *name, u64 aper_base, u32 aper_size)
{
if (!aper_base)
return 0;
if (aper_size < 64*1024*1024) {
printk("Aperture from %s too small (%d MB)\n", name, aper_size>>20);
return 0;
}
if (aper_base + aper_size >= 0xffffffff) {
printk("Aperture from %s beyond 4GB. Ignoring.\n",name);
return 0;
}
if (e820_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
printk("Aperture from %s pointing to e820 RAM. Ignoring.\n",name);
return 0;
}
return 1;
}
/* Find a PCI capability */
static __u32 __init find_cap(int num, int slot, int func, int cap)
{
if (!(read_pci_config_16(num,slot,func,PCI_STATUS) & PCI_STATUS_CAP_LIST))
return 0;
u8 pos = read_pci_config_byte(num,slot,func,PCI_CAPABILITY_LIST);
int bytes;
for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
pos &= ~3;
u8 id = read_pci_config_byte(num,slot,func,pos+PCI_CAP_LIST_ID);
if (id == 0xff)
break;
if (id == cap)
return pos;
pos = read_pci_config_byte(num,slot,func,pos+PCI_CAP_LIST_NEXT);
}
return 0;
}
/* Read a standard AGPv3 bridge header */
static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
{
printk("AGP bridge at %02x:%02x:%02x\n", num, slot, func);
u32 apsizereg = read_pci_config_16(num,slot,func, cap + 0x14);
if (apsizereg == 0xffffffff) {
printk("APSIZE in AGP bridge unreadable\n");
return 0;
}
u32 apsize = apsizereg & 0xfff;
/* Some BIOS use weird encodings not in the AGPv3 table. */
if (apsize & 0xff)
apsize |= 0xf00;
int nbits = hweight16(apsize);
*order = 7 - nbits;
if ((int)*order < 0) /* < 32MB */
*order = 0;
u32 aper_low = read_pci_config(num,slot,func, 0x10);
u32 aper_hi = read_pci_config(num,slot,func,0x14);
u64 aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32);
printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n",
aper, 32 << *order, apsizereg);
if (!aperture_valid("AGP bridge", aper, (32*1024*1024) << *order))
return 0;
return (u32)aper;
}
/* Look for an AGP bridge. Windows only expects the aperture in the
AGP bridge and some BIOS forget to initialize the Northbridge too.
Work around this here.
Do an PCI bus scan by hand because we're running before the PCI
subsystem.
All K8 AGP bridges are AGPv3 compliant, so we can do this scan
generically. It's probably overkill to always scan all slots because
the AGP bridges should be always an own bus on the HT hierarchy,
but do it here for future safety. */
static __u32 __init search_agp_bridge(u32 *order, int *valid_agp)
{
int num, slot, func;
/* Poor man's PCI discovery */
for (num = 0; num < 32; num++) {
for (slot = 0; slot < 32; slot++) {
for (func = 0; func < 8; func++) {
u32 class, cap;
class = read_pci_config(num,slot,func,
PCI_CLASS_REVISION);
if (class == 0xffffffff)
break;
switch (class >> 16) {
case PCI_CLASS_BRIDGE_HOST:
case PCI_CLASS_BRIDGE_OTHER: /* needed? */
/* AGP bridge? */
cap = find_cap(num,slot,func,PCI_CAP_ID_AGP);
if (!cap)
break;
*valid_agp = 1;
return read_agp(num,slot,func,cap,order);
}
/* No multi-function device? */
u8 type = read_pci_config_byte(num,slot,func,
PCI_HEADER_TYPE);
if (!(type & 0x80))
break;
}
}
}
printk("No AGP bridge found\n");
return 0;
}
void __init iommu_hole_init(void) void __init iommu_hole_init(void)
{ {
int fix, num; int fix, num;
u32 aper_size, aper_alloc, aper_order; u32 aper_size, aper_alloc = 0, aper_order;
u64 aper_base; u64 aper_base;
int valid_agp = 0;
if (no_iommu)
return;
if (end_pfn < (0xffffffff>>PAGE_SHIFT) && !force_mmu)
return;
printk("Checking aperture...\n"); printk("Checking aperture...\n");
fix = 0; fix = 0;
for (num = 24; num < 32; num++) { for (num = 24; num < 32; num++) {
char name[30];
if (read_pci_config(0, num, 3, 0x00) != NB_ID_3) if (read_pci_config(0, num, 3, 0x00) != NB_ID_3)
continue; continue;
...@@ -86,15 +203,12 @@ void __init iommu_hole_init(void) ...@@ -86,15 +203,12 @@ void __init iommu_hole_init(void)
aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff; aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff;
aper_base <<= 25; aper_base <<= 25;
printk("CPU %d: aperture @ %Lx size %u KB\n", num-24, printk("CPU %d: aperture @ %Lx size %u MB\n", num-24,
aper_base, aper_size>>10); aper_base, aper_size>>20);
if (!aper_base || aper_base + aper_size >= 0xffffffff) {
fix = 1;
break;
}
if (e820_mapped(aper_base, aper_base + aper_size, E820_RAM)) { sprintf(name, "northbridge cpu %d", num-24);
printk("Aperture pointing to e820 RAM. Ignoring.\n");
if (!aperture_valid(name, aper_base, aper_size)) {
fix = 1; fix = 1;
break; break;
} }
...@@ -103,12 +217,40 @@ void __init iommu_hole_init(void) ...@@ -103,12 +217,40 @@ void __init iommu_hole_init(void)
if (!fix && !fallback_aper_force) if (!fix && !fallback_aper_force)
return; return;
if (!fallback_aper_force)
aper_alloc = search_agp_bridge(&aper_order, &valid_agp);
if (aper_alloc) {
/* Got the aperture from the AGP bridge */
} else if ((!no_iommu && end_pfn >= 0xffffffff>>PAGE_SHIFT) ||
force_iommu ||
valid_agp ||
fallback_aper_force) {
/* When there is a AGP bridge in the system assume the
user wants to use the AGP driver too and needs an
aperture. However this case (AGP but no good
aperture) should only happen with a more broken than
usual BIOS, because it would even break Windows. */
printk("Your BIOS doesn't leave a aperture memory hole\n"); printk("Your BIOS doesn't leave a aperture memory hole\n");
printk("Please enable the IOMMU option in the BIOS setup\n"); printk("Please enable the IOMMU option in the BIOS setup\n");
printk("This costs you %d MB of RAM\n", 32 << fallback_aper_order);
aper_order = fallback_aper_order;
aper_alloc = allocate_aperture(); aper_alloc = allocate_aperture();
if (!aper_alloc) if (!aper_alloc) {
/* Could disable AGP and IOMMU here, but it's probably
not worth it. But the later users cannot deal with
bad apertures and turning on the aperture over memory
causes very strange problems, so it's better to
panic early. */
panic("Not enough memory for aperture");
}
} else {
return; return;
}
/* Fix up the north bridges */
for (num = 24; num < 32; num++) { for (num = 24; num < 32; num++) {
if (read_pci_config(0, num, 3, 0x00) != NB_ID_3) if (read_pci_config(0, num, 3, 0x00) != NB_ID_3)
continue; continue;
...@@ -116,7 +258,7 @@ void __init iommu_hole_init(void) ...@@ -116,7 +258,7 @@ void __init iommu_hole_init(void)
/* Don't enable translation yet. That is done later. /* Don't enable translation yet. That is done later.
Assume this BIOS didn't initialise the GART so Assume this BIOS didn't initialise the GART so
just overwrite all previous bits */ just overwrite all previous bits */
write_pci_config(0, num, 3, 0x90, fallback_aper_order<<1); write_pci_config(0, num, 3, 0x90, aper_order<<1);
write_pci_config(0, num, 3, 0x94, aper_alloc>>25); write_pci_config(0, num, 3, 0x94, aper_alloc>>25);
} }
} }
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
* -1: the lapic NMI watchdog is disabled, but can be enabled * -1: the lapic NMI watchdog is disabled, but can be enabled
*/ */
static int nmi_active; static int nmi_active;
static int panic_on_timeout;
unsigned int nmi_watchdog = NMI_IO_APIC; unsigned int nmi_watchdog = NMI_IO_APIC;
static unsigned int nmi_hz = HZ; static unsigned int nmi_hz = HZ;
...@@ -115,6 +116,14 @@ static int __init setup_nmi_watchdog(char *str) ...@@ -115,6 +116,14 @@ static int __init setup_nmi_watchdog(char *str)
{ {
int nmi; int nmi;
if (!strncmp(str,"panic",5)) {
panic_on_timeout = 1;
str = strchr(str, ',');
if (!str)
return 1;
++str;
}
get_option(&str, &nmi); get_option(&str, &nmi);
if (nmi >= NMI_INVALID) if (nmi >= NMI_INVALID)
...@@ -327,6 +336,8 @@ void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) ...@@ -327,6 +336,8 @@ void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
bust_spinlocks(1); bust_spinlocks(1);
printk("NMI Watchdog detected LOCKUP on CPU%d, registers:\n", cpu); printk("NMI Watchdog detected LOCKUP on CPU%d, registers:\n", cpu);
show_registers(regs); show_registers(regs);
if (panic_on_timeout)
panic("nmi watchdog");
printk("console shuts up ...\n"); printk("console shuts up ...\n");
console_silent(); console_silent();
spin_unlock(&nmi_print_lock); spin_unlock(&nmi_print_lock);
...@@ -374,3 +385,4 @@ EXPORT_SYMBOL(disable_lapic_nmi_watchdog); ...@@ -374,3 +385,4 @@ EXPORT_SYMBOL(disable_lapic_nmi_watchdog);
EXPORT_SYMBOL(enable_lapic_nmi_watchdog); EXPORT_SYMBOL(enable_lapic_nmi_watchdog);
EXPORT_SYMBOL(disable_timer_nmi_watchdog); EXPORT_SYMBOL(disable_timer_nmi_watchdog);
EXPORT_SYMBOL(enable_timer_nmi_watchdog); EXPORT_SYMBOL(enable_timer_nmi_watchdog);
EXPORT_SYMBOL(touch_nmi_watchdog);
...@@ -9,8 +9,6 @@ ...@@ -9,8 +9,6 @@
#include <linux/module.h> #include <linux/module.h>
#include <asm/io.h> #include <asm/io.h>
dma_addr_t bad_dma_address = -1UL;
/* Map a set of buffers described by scatterlist in streaming /* Map a set of buffers described by scatterlist in streaming
* mode for DMA. This is the scatter-gather version of the * mode for DMA. This is the scatter-gather version of the
* above pci_map_single interface. Here the scatter gather list * above pci_map_single interface. Here the scatter gather list
...@@ -34,16 +32,9 @@ int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, ...@@ -34,16 +32,9 @@ int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
BUG_ON(direction == PCI_DMA_NONE); BUG_ON(direction == PCI_DMA_NONE);
for (i = 0; i < nents; i++ ) { for (i = 0; i < nents; i++ ) {
struct scatterlist *s = &sg[i]; struct scatterlist *s = &sg[i];
BUG_ON(!s->page); BUG_ON(!s->page);
s->dma_address = pci_map_page(hwdev, s->page, s->offset, s->dma_address = pci_map_page(hwdev, s->page, s->offset,
s->length, direction); s->length, direction);
if (unlikely(s->dma_address == bad_dma_address)) {
pci_unmap_sg(hwdev, sg, i, direction);
return 0;
}
} }
return nents; return nents;
} }
......
...@@ -8,20 +8,8 @@ ...@@ -8,20 +8,8 @@
* See Documentation/DMA-mapping.txt for the interface specification. * See Documentation/DMA-mapping.txt for the interface specification.
* *
* Copyright 2002 Andi Kleen, SuSE Labs. * Copyright 2002 Andi Kleen, SuSE Labs.
* $Id: pci-gart.c,v 1.20 2003/03/12 08:23:29 ak Exp $
*/ */
/*
* Notebook:
possible future tuning:
fast path for sg streaming mappings - only take the locks once.
more intelligent flush strategy - flush only the NB of the CPU directly
connected to the device?
move boundary between IOMMU and AGP in GART dynamically
*/
#include <linux/config.h> #include <linux/config.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/ctype.h> #include <linux/ctype.h>
...@@ -32,6 +20,8 @@ possible future tuning: ...@@ -32,6 +20,8 @@ possible future tuning:
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/topology.h>
#include <asm/atomic.h>
#include <asm/io.h> #include <asm/io.h>
#include <asm/mtrr.h> #include <asm/mtrr.h>
#include <asm/bitops.h> #include <asm/bitops.h>
...@@ -41,6 +31,8 @@ possible future tuning: ...@@ -41,6 +31,8 @@ possible future tuning:
#include <asm/kdebug.h> #include <asm/kdebug.h>
#include <asm/proto.h> #include <asm/proto.h>
dma_addr_t bad_dma_address;
unsigned long iommu_bus_base; /* GART remapping area (physical) */ unsigned long iommu_bus_base; /* GART remapping area (physical) */
static unsigned long iommu_size; /* size of remapping area bytes */ static unsigned long iommu_size; /* size of remapping area bytes */
static unsigned long iommu_pages; /* .. and in pages */ static unsigned long iommu_pages; /* .. and in pages */
...@@ -50,9 +42,13 @@ u32 *iommu_gatt_base; /* Remapping table */ ...@@ -50,9 +42,13 @@ u32 *iommu_gatt_base; /* Remapping table */
int no_iommu; int no_iommu;
static int no_agp; static int no_agp;
#ifdef CONFIG_IOMMU_DEBUG #ifdef CONFIG_IOMMU_DEBUG
int force_mmu = 1; int panic_on_overflow = 1;
int force_iommu = 1;
int sac_force_size = 0;
#else #else
int force_mmu = 0; int panic_on_overflow = 1; /* for testing */
int force_iommu = 0;
int sac_force_size = 256*1024*1024;
#endif #endif
/* Allocation bitmap for the remapping area */ /* Allocation bitmap for the remapping area */
...@@ -65,12 +61,18 @@ static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */ ...@@ -65,12 +61,18 @@ static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
(((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
#define to_pages(addr,size) \
(round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
#define for_all_nb(dev) \ #define for_all_nb(dev) \
dev=NULL; \ dev = NULL; \
while ((dev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) \ while ((dev = pci_find_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)\
if (dev->bus->number == 0 && PCI_FUNC(dev->devfn) == 3 && \ if (dev->bus->number == 0 && \
(PCI_SLOT(dev->devfn) >= 24) && (PCI_SLOT(dev->devfn) <= 31)) (PCI_SLOT(dev->devfn) >= 24) && (PCI_SLOT(dev->devfn) <= 31))
static struct pci_dev *northbridges[NR_CPUS + 1];
static u32 northbridge_flush_word[NR_CPUS + 1];
#define EMERGENCY_PAGES 32 /* = 128KB */ #define EMERGENCY_PAGES 32 /* = 128KB */
#ifdef CONFIG_AGP #ifdef CONFIG_AGP
...@@ -85,15 +87,16 @@ AGPEXTERN int agp_memory_reserved; ...@@ -85,15 +87,16 @@ AGPEXTERN int agp_memory_reserved;
AGPEXTERN __u32 *agp_gatt_table; AGPEXTERN __u32 *agp_gatt_table;
static unsigned long next_bit; /* protected by iommu_bitmap_lock */ static unsigned long next_bit; /* protected by iommu_bitmap_lock */
static int need_flush; /* global flush state. set for each gart wrap */
static unsigned long alloc_iommu(int size, int *flush) static unsigned long alloc_iommu(int size)
{ {
unsigned long offset, flags; unsigned long offset, flags;
spin_lock_irqsave(&iommu_bitmap_lock, flags); spin_lock_irqsave(&iommu_bitmap_lock, flags);
offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size); offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size);
if (offset == -1) { if (offset == -1) {
*flush = 1; need_flush = 1;
offset = find_next_zero_string(iommu_gart_bitmap,0,next_bit,size); offset = find_next_zero_string(iommu_gart_bitmap,0,next_bit,size);
} }
if (offset != -1) { if (offset != -1) {
...@@ -101,7 +104,7 @@ static unsigned long alloc_iommu(int size, int *flush) ...@@ -101,7 +104,7 @@ static unsigned long alloc_iommu(int size, int *flush)
next_bit = offset+size; next_bit = offset+size;
if (next_bit >= iommu_pages) { if (next_bit >= iommu_pages) {
next_bit = 0; next_bit = 0;
*flush = 1; need_flush = 1;
} }
} }
spin_unlock_irqrestore(&iommu_bitmap_lock, flags); spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
...@@ -110,32 +113,59 @@ static unsigned long alloc_iommu(int size, int *flush) ...@@ -110,32 +113,59 @@ static unsigned long alloc_iommu(int size, int *flush)
static void free_iommu(unsigned long offset, int size) static void free_iommu(unsigned long offset, int size)
{ {
if (size == 1) {
clear_bit(offset, iommu_gart_bitmap);
return;
}
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&iommu_bitmap_lock, flags); spin_lock_irqsave(&iommu_bitmap_lock, flags);
clear_bit_string(iommu_gart_bitmap, offset, size); __clear_bit_string(iommu_gart_bitmap, offset, size);
spin_unlock_irqrestore(&iommu_bitmap_lock, flags); spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
} }
static inline void flush_gart(void) /*
* Use global flush state to avoid races with multiple flushers.
*/
static void __flush_gart(struct pci_dev *dev)
{ {
struct pci_dev *nb; unsigned long flags;
for_all_nb(nb) { int bus = dev ? dev->bus->number : -1;
u32 flag; int flushed = 0;
pci_read_config_dword(nb, 0x9c, &flag); /* could cache this */ int i;
/* could complain for PTE walk errors here (bit 1 of flag) */
flag |= 1; spin_lock_irqsave(&iommu_bitmap_lock, flags);
pci_write_config_dword(nb, 0x9c, flag); /* recheck flush count inside lock */
if (need_flush) {
for (i = 0; northbridges[i]; i++) {
if (bus >= 0 && !(pcibus_to_cpumask(bus) & (1UL << i)))
continue;
pci_write_config_dword(northbridges[i], 0x9c,
northbridge_flush_word[i] | 1);
flushed++;
}
if (!flushed)
printk("nothing to flush? %d\n", bus);
need_flush = 0;
} }
spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
} }
static inline void flush_gart(struct pci_dev *dev)
{
if (need_flush)
__flush_gart(dev);
}
/*
* Allocate memory for a consistent mapping.
* All mappings are consistent here, so this is just a wrapper around
* pci_map_single.
*/
void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
dma_addr_t *dma_handle) dma_addr_t *dma_handle)
{ {
void *memory; void *memory;
int gfp = GFP_ATOMIC; int gfp = GFP_ATOMIC;
int i;
int flush = 0;
unsigned long iommu_page;
unsigned long dma_mask; unsigned long dma_mask;
if (hwdev == NULL) { if (hwdev == NULL) {
...@@ -149,20 +179,14 @@ void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, ...@@ -149,20 +179,14 @@ void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
if (dma_mask < 0xffffffff || no_iommu) if (dma_mask < 0xffffffff || no_iommu)
gfp |= GFP_DMA; gfp |= GFP_DMA;
/*
* First try to allocate continuous and use directly if already
* in lowmem.
*/
size = round_up(size, PAGE_SIZE);
memory = (void *)__get_free_pages(gfp, get_order(size)); memory = (void *)__get_free_pages(gfp, get_order(size));
if (memory == NULL) { if (memory == NULL) {
return NULL; return NULL;
} else { } else {
int high = 0, mmu; int high, mmu;
if (((unsigned long)virt_to_bus(memory) + size) > dma_mask) high = ((unsigned long)virt_to_bus(memory) + size) >= dma_mask;
high = 1; mmu = high;
mmu = 1; if (force_iommu && !(gfp & GFP_DMA))
if (force_mmu && !(gfp & GFP_DMA))
mmu = 1; mmu = 1;
if (no_iommu) { if (no_iommu) {
if (high) goto error; if (high) goto error;
...@@ -175,27 +199,15 @@ void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, ...@@ -175,27 +199,15 @@ void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
} }
} }
size >>= PAGE_SHIFT; *dma_handle = pci_map_single(hwdev, memory, size, 0);
if (*dma_handle == bad_dma_address)
iommu_page = alloc_iommu(size, &flush);
if (iommu_page == -1)
goto error; goto error;
/* Fill in the GATT */
for (i = 0; i < size; i++) {
unsigned long phys_mem;
void *mem = memory + i*PAGE_SIZE;
phys_mem = virt_to_phys(mem);
BUG_ON(phys_mem & ~PHYSICAL_PAGE_MASK);
iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
}
if (flush)
flush_gart();
*dma_handle = iommu_bus_base + (iommu_page << PAGE_SHIFT);
return memory; return memory;
error: error:
if (panic_on_overflow)
panic("pci_map_single: overflow %lu bytes\n", size);
free_pages((unsigned long)memory, get_order(size)); free_pages((unsigned long)memory, get_order(size));
return NULL; return NULL;
} }
...@@ -207,25 +219,17 @@ void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, ...@@ -207,25 +219,17 @@ void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
void pci_free_consistent(struct pci_dev *hwdev, size_t size, void pci_free_consistent(struct pci_dev *hwdev, size_t size,
void *vaddr, dma_addr_t bus) void *vaddr, dma_addr_t bus)
{ {
unsigned long iommu_page; pci_unmap_single(hwdev, bus, size, 0);
size = round_up(size, PAGE_SIZE);
if (bus >= iommu_bus_base && bus <= iommu_bus_base + iommu_size) {
unsigned pages = size >> PAGE_SHIFT;
int i;
iommu_page = (bus - iommu_bus_base) >> PAGE_SHIFT;
vaddr = __va(GPTE_DECODE(iommu_gatt_base[iommu_page]));
for (i = 0; i < pages; i++) {
u64 pte = iommu_gatt_base[iommu_page + i];
BUG_ON((pte & GPTE_VALID) == 0);
iommu_gatt_base[iommu_page + i] = 0;
}
free_iommu(iommu_page, pages);
}
free_pages((unsigned long)vaddr, get_order(size)); free_pages((unsigned long)vaddr, get_order(size));
} }
#ifdef CONFIG_IOMMU_LEAK #ifdef CONFIG_IOMMU_LEAK
#define SET_LEAK(x) if (iommu_leak_tab) \
iommu_leak_tab[x] = __builtin_return_address(0);
#define CLEAR_LEAK(x) if (iommu_leak_tab) \
iommu_leak_tab[x] = 0;
/* Debugging aid for drivers that don't free their IOMMU tables */ /* Debugging aid for drivers that don't free their IOMMU tables */
static void **iommu_leak_tab; static void **iommu_leak_tab;
static int leak_trace; static int leak_trace;
...@@ -246,9 +250,12 @@ void dump_leak(void) ...@@ -246,9 +250,12 @@ void dump_leak(void)
} }
printk("\n"); printk("\n");
} }
#else
#define SET_LEAK(x)
#define CLEAR_LEAK(x)
#endif #endif
static void iommu_full(struct pci_dev *dev, void *addr, size_t size, int dir) static void iommu_full(struct pci_dev *dev, size_t size, int dir)
{ {
/* /*
* Ran out of IOMMU space for this operation. This is very bad. * Ran out of IOMMU space for this operation. This is very bad.
...@@ -261,8 +268,8 @@ static void iommu_full(struct pci_dev *dev, void *addr, size_t size, int dir) ...@@ -261,8 +268,8 @@ static void iommu_full(struct pci_dev *dev, void *addr, size_t size, int dir)
*/ */
printk(KERN_ERR printk(KERN_ERR
"PCI-DMA: Out of IOMMU space for %p size %lu at device %s[%s]\n", "PCI-DMA: Out of IOMMU space for %lu bytes at device %s[%s]\n",
addr,size, dev ? dev->dev.name : "?", dev ? pci_name(dev) : "?"); size, dev ? dev->dev.name : "?", dev ? dev->slot_name : "?");
if (size > PAGE_SIZE*EMERGENCY_PAGES) { if (size > PAGE_SIZE*EMERGENCY_PAGES) {
if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
...@@ -279,24 +286,61 @@ static void iommu_full(struct pci_dev *dev, void *addr, size_t size, int dir) ...@@ -279,24 +286,61 @@ static void iommu_full(struct pci_dev *dev, void *addr, size_t size, int dir)
static inline int need_iommu(struct pci_dev *dev, unsigned long addr, size_t size) static inline int need_iommu(struct pci_dev *dev, unsigned long addr, size_t size)
{ {
u64 mask = dev ? dev->dma_mask : 0xffffffff; u64 mask = dev ? dev->dma_mask : 0xffffffff;
int high = (~mask & (unsigned long)(addr + size)) != 0; int high = addr + size >= mask;
int mmu = high; int mmu = high;
if (force_mmu) if (force_iommu)
mmu = 1; mmu = 1;
if (no_iommu) { if (no_iommu) {
if (high) if (high)
panic("pci_map_single: high address but no IOMMU.\n"); panic("PCI-DMA: high address but no IOMMU.\n");
mmu = 0;
}
return mmu;
}
static inline int nonforced_iommu(struct pci_dev *dev, unsigned long addr, size_t size)
{
u64 mask = dev ? dev->dma_mask : 0xffffffff;
int high = addr + size >= mask;
int mmu = high;
if (no_iommu) {
if (high)
panic("PCI-DMA: high address but no IOMMU.\n");
mmu = 0; mmu = 0;
} }
return mmu; return mmu;
} }
/* Map a single continuous physical area into the IOMMU.
* Caller needs to check if the iommu is needed and flush.
*/
static dma_addr_t pci_map_area(struct pci_dev *dev, unsigned long phys_mem,
size_t size, int dir)
{
unsigned long npages = to_pages(phys_mem, size);
unsigned long iommu_page = alloc_iommu(npages);
if (iommu_page == -1) {
if (!nonforced_iommu(dev, phys_mem, size))
return phys_mem;
if (panic_on_overflow)
panic("pci_map_area overflow %lu bytes\n", size);
iommu_full(dev, size, dir);
return bad_dma_address;
}
int i;
for (i = 0; i < npages; i++) {
iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
SET_LEAK(iommu_page + i);
phys_mem += PAGE_SIZE;
}
return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
}
/* Map a single area into the IOMMU */
dma_addr_t pci_map_single(struct pci_dev *dev, void *addr, size_t size, int dir) dma_addr_t pci_map_single(struct pci_dev *dev, void *addr, size_t size, int dir)
{ {
unsigned long iommu_page;
unsigned long phys_mem, bus; unsigned long phys_mem, bus;
int i, npages;
int flush = 0;
BUG_ON(dir == PCI_DMA_NONE); BUG_ON(dir == PCI_DMA_NONE);
...@@ -304,39 +348,158 @@ dma_addr_t pci_map_single(struct pci_dev *dev, void *addr, size_t size, int dir) ...@@ -304,39 +348,158 @@ dma_addr_t pci_map_single(struct pci_dev *dev, void *addr, size_t size, int dir)
if (!need_iommu(dev, phys_mem, size)) if (!need_iommu(dev, phys_mem, size))
return phys_mem; return phys_mem;
npages = round_up(size + ((u64)addr & ~PAGE_MASK), PAGE_SIZE) >> PAGE_SHIFT; bus = pci_map_area(dev, phys_mem, size, dir);
flush_gart(dev);
return bus;
}
/* Fallback for pci_map_sg in case of overflow */
static int pci_map_sg_nonforce(struct pci_dev *dev, struct scatterlist *sg,
int nents, int dir)
{
int i;
iommu_page = alloc_iommu(npages, &flush); #ifdef CONFIG_IOMMU_DEBUG
if (iommu_page == -1) { printk(KERN_DEBUG "pci_map_sg overflow\n");
iommu_full(dev, addr, size, dir); #endif
return iommu_bus_base;
for (i = 0; i < nents; i++ ) {
struct scatterlist *s = &sg[i];
unsigned long addr = page_to_phys(s->page) + s->offset;
if (nonforced_iommu(dev, addr, s->length)) {
addr = pci_map_area(dev, addr, s->length, dir);
if (addr == bad_dma_address) {
if (i > 0)
pci_unmap_sg(dev, sg, i, dir);
nents = 0;
break;
}
}
s->dma_address = addr;
} }
flush_gart(dev);
return nents;
}
phys_mem &= PAGE_MASK; /* Map multiple scatterlist entries continuous into the first. */
for (i = 0; i < npages; i++, phys_mem += PAGE_SIZE) { static int __pci_map_cont(struct scatterlist *sg, int start, int stopat,
BUG_ON(phys_mem & ~PHYSICAL_PAGE_MASK); struct scatterlist *sout, unsigned long pages)
{
unsigned long iommu_start = alloc_iommu(pages);
if (iommu_start == -1)
return -1;
/* unsigned long iommu_page = iommu_start;
* Set coherent mapping here to avoid needing to flush int i;
* the caches on mapping.
for (i = start; i < stopat; i++) {
struct scatterlist *s = &sg[i];
unsigned long start_addr = s->dma_address;
BUG_ON(i > 0 && s->offset);
if (i == start) {
*sout = *s;
sout->dma_address = iommu_bus_base;
sout->dma_address += iommu_page*PAGE_SIZE + s->offset;
} else {
sout->length += s->length;
}
unsigned long addr = start_addr;
while (addr < start_addr + s->length) {
iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
SET_LEAK(iommu_page);
addr += PAGE_SIZE;
iommu_page++;
}
BUG_ON(i > 0 && addr % PAGE_SIZE);
}
BUG_ON(iommu_page - iommu_start != pages);
return 0;
}
static inline int pci_map_cont(struct scatterlist *sg, int start, int stopat,
struct scatterlist *sout,
unsigned long pages, int need)
{
if (!need) {
BUG_ON(stopat - start != 1);
*sout = sg[start];
return 0;
}
return __pci_map_cont(sg, start, stopat, sout, pages);
}
#define PCI_NO_MERGE 0
/*
* DMA map all entries in a scatterlist.
* Merge chunks that have page aligned sizes into a continuous mapping.
*/ */
iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem); int pci_map_sg(struct pci_dev *dev, struct scatterlist *sg, int nents, int dir)
{
int i;
int out;
int start;
unsigned long pages = 0;
int need = 0;
#ifdef CONFIG_IOMMU_LEAK unsigned long size = 0;
/* XXX need eventually caller of pci_map_sg */
if (iommu_leak_tab) BUG_ON(dir == PCI_DMA_NONE);
iommu_leak_tab[iommu_page + i] = __builtin_return_address(0); if (nents == 0)
#endif return 0;
out = 0;
start = 0;
for (i = 0; i < nents; i++) {
struct scatterlist *s = &sg[i];
dma_addr_t addr = page_to_phys(s->page) + s->offset;
s->dma_address = addr;
BUG_ON(s->length == 0);
size += s->length;
/* Handle the previous not yet processed entries */
if (i > start) {
struct scatterlist *ps = &sg[i-1];
/* Can only merge when the last chunk ends on a page
boundary. */
if (PCI_NO_MERGE || !need || (i-1 > start && ps->offset) ||
(ps->offset + ps->length) % PAGE_SIZE) {
if (pci_map_cont(sg, start, i, sg+out, pages,
need) < 0)
goto error;
out++;
pages = 0;
start = i;
}
} }
if (flush)
flush_gart();
bus = iommu_bus_base + iommu_page*PAGE_SIZE; need = need_iommu(dev, addr, s->length);
return bus + ((unsigned long)addr & ~PAGE_MASK); pages += to_pages(s->offset, s->length);
}
if (pci_map_cont(sg, start, i, sg+out, pages, need) < 0)
goto error;
out++;
flush_gart(dev);
if (out < nents)
sg[out].length = 0;
return out;
error:
flush_gart(NULL);
pci_unmap_sg(dev, sg, nents, dir);
/* When it was forced try again unforced */
if (force_iommu)
return pci_map_sg_nonforce(dev, sg, nents, dir);
if (panic_on_overflow)
panic("pci_map_sg: overflow on %lu pages\n", pages);
iommu_full(dev, pages << PAGE_SHIFT, dir);
for (i = 0; i < nents; i++)
sg[i].dma_address = bad_dma_address;
return 0;
} }
/* /*
* Free a temporary PCI mapping. * Free a PCI mapping.
*/ */
void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr,
size_t size, int direction) size_t size, int direction)
...@@ -347,20 +510,68 @@ void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, ...@@ -347,20 +510,68 @@ void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr,
dma_addr > iommu_bus_base + iommu_size) dma_addr > iommu_bus_base + iommu_size)
return; return;
iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
npages = round_up(size + (dma_addr & ~PAGE_MASK), PAGE_SIZE) >> PAGE_SHIFT; npages = to_pages(dma_addr, size);
int i; int i;
for (i = 0; i < npages; i++) { for (i = 0; i < npages; i++) {
iommu_gatt_base[iommu_page + i] = 0; iommu_gatt_base[iommu_page + i] = 0;
#ifdef CONFIG_IOMMU_LEAK CLEAR_LEAK(iommu_page + i);
if (iommu_leak_tab)
iommu_leak_tab[iommu_page + i] = 0;
#endif
} }
free_iommu(iommu_page, npages); free_iommu(iommu_page, npages);
} }
/*
* Wrapper for pci_unmap_single working with scatterlists.
*/
void pci_unmap_sg(struct pci_dev *dev, struct scatterlist *sg, int nents,
int dir)
{
int i;
for (i = 0; i < nents; i++) {
struct scatterlist *s = &sg[i];
if (!s->length)
break;
pci_unmap_single(dev, s->dma_address, s->length, dir);
}
}
int pci_dma_supported(struct pci_dev *dev, u64 mask)
{
/* Copied from i386. Doesn't make much sense, because it will
only work for pci_alloc_consistent.
The caller just has to use GFP_DMA in this case. */
if (mask < 0x00ffffff)
return 0;
/* Tell the device to use SAC when IOMMU force is on.
This allows the driver to use cheaper accesses in some cases.
Problem with this is that if we overflow the IOMMU area
and return DAC as fallback address the device may not handle it correctly.
As a compromise we only do this if the IOMMU area is >= 256MB
which should make overflow unlikely enough.
As a special case some controllers have a 39bit address mode
that is as efficient as 32bit (aic79xx). Don't force SAC for these.
Assume all masks <= 40 bits are of this type. Normally this doesn't
make any difference, but gives more gentle handling of IOMMU overflow. */
if (force_iommu && (mask > 0xffffffffffULL) && (iommu_size >= sac_force_size)){
printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->slot_name,mask);
return 0;
}
if (no_iommu && (mask < (end_pfn << PAGE_SHIFT)))
return 0;
return 1;
}
EXPORT_SYMBOL(pci_unmap_sg);
EXPORT_SYMBOL(pci_map_sg);
EXPORT_SYMBOL(pci_map_single); EXPORT_SYMBOL(pci_map_single);
EXPORT_SYMBOL(pci_unmap_single); EXPORT_SYMBOL(pci_unmap_single);
EXPORT_SYMBOL(pci_dma_supported);
EXPORT_SYMBOL(no_iommu);
EXPORT_SYMBOL(force_iommu);
static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
{ {
...@@ -452,13 +663,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info) ...@@ -452,13 +663,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
pci_write_config_dword(dev, 0x90, ctl); pci_write_config_dword(dev, 0x90, ctl);
} }
flush_gart(); flush_gart(NULL);
printk("PCI-DMA: aperture base @ %x size %u KB\n", aper_base, aper_size>>10); printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10);
return 0; return 0;
nommu: nommu:
/* XXX: reject 0xffffffff mask now in pci mapping functions */
printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n" printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n"
KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction."); KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction.");
return -1; return -1;
...@@ -466,11 +676,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info) ...@@ -466,11 +676,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
extern int agp_amdk8_init(void); extern int agp_amdk8_init(void);
int __init pci_iommu_init(void) static int __init pci_iommu_init(void)
{ {
struct agp_kern_info info; struct agp_kern_info info;
unsigned long aper_size; unsigned long aper_size;
unsigned long iommu_start; unsigned long iommu_start;
struct pci_dev *dev;
#ifndef CONFIG_AGP_AMD_8151 #ifndef CONFIG_AGP_AMD_8151
no_agp = 1; no_agp = 1;
...@@ -482,7 +693,7 @@ int __init pci_iommu_init(void) ...@@ -482,7 +693,7 @@ int __init pci_iommu_init(void)
(agp_copy_info(&info) < 0); (agp_copy_info(&info) < 0);
#endif #endif
if (no_iommu || (!force_mmu && end_pfn < 0xffffffff>>PAGE_SHIFT)) { if (no_iommu || (!force_iommu && end_pfn < 0xffffffff>>PAGE_SHIFT)) {
printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
no_iommu = 1; no_iommu = 1;
return -1; return -1;
...@@ -492,7 +703,7 @@ int __init pci_iommu_init(void) ...@@ -492,7 +703,7 @@ int __init pci_iommu_init(void)
int err = -1; int err = -1;
printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
no_agp = 1; no_agp = 1;
if (force_mmu || end_pfn >= 0xffffffff>>PAGE_SHIFT) if (force_iommu || end_pfn >= 0xffffffff>>PAGE_SHIFT)
err = init_k8_gatt(&info); err = init_k8_gatt(&info);
if (err < 0) { if (err < 0) {
printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
...@@ -529,25 +740,38 @@ int __init pci_iommu_init(void) ...@@ -529,25 +740,38 @@ int __init pci_iommu_init(void)
set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
agp_memory_reserved = iommu_size; agp_memory_reserved = iommu_size;
printk(KERN_INFO"PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", printk(KERN_INFO
"PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
iommu_size>>20); iommu_size>>20);
iommu_start = aper_size - iommu_size; iommu_start = aper_size - iommu_size;
iommu_bus_base = info.aper_base + iommu_start; iommu_bus_base = info.aper_base + iommu_start;
iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT);
bad_dma_address = iommu_bus_base; bad_dma_address = iommu_bus_base;
iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT);
/* /*
* Unmap the IOMMU part of the GART. The alias of the page is always mapped * Unmap the IOMMU part of the GART. The alias of the page is
* with cache enabled and there is no full cache coherency across the GART * always mapped with cache enabled and there is no full cache
* remapping. The unmapping avoids automatic prefetches from the CPU * coherency across the GART remapping. The unmapping avoids
* allocating cache lines in there. All CPU accesses are done via the * automatic prefetches from the CPU allocating cache lines in
* direct mapping to the backing memory. The GART address is only used by PCI * there. All CPU accesses are done via the direct mapping to
* the backing memory. The GART address is only used by PCI
* devices. * devices.
*/ */
clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size); clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size);
flush_gart(); for_all_nb(dev) {
u32 flag;
int cpu = PCI_SLOT(dev->devfn) - 24;
if (cpu >= NR_CPUS)
continue;
northbridges[cpu] = dev;
pci_read_config_dword(dev, 0x9c, &flag); /* cache flush word */
northbridge_flush_word[cpu] = flag;
}
flush_gart(NULL);
return 0; return 0;
} }
...@@ -561,8 +785,8 @@ fs_initcall(pci_iommu_init); ...@@ -561,8 +785,8 @@ fs_initcall(pci_iommu_init);
off don't use the IOMMU off don't use the IOMMU
leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on) leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on)
memaper[=order] allocate an own aperture over RAM with size 32MB^order. memaper[=order] allocate an own aperture over RAM with size 32MB^order.
noforce don't force IOMMU usage. Should be fastest. noforce don't force IOMMU usage. Default.
force Force IOMMU and turn on unmap debugging. force Force IOMMU.
*/ */
__init int iommu_setup(char *opt) __init int iommu_setup(char *opt)
{ {
...@@ -575,15 +799,19 @@ __init int iommu_setup(char *opt) ...@@ -575,15 +799,19 @@ __init int iommu_setup(char *opt)
if (!memcmp(p,"off", 3)) if (!memcmp(p,"off", 3))
no_iommu = 1; no_iommu = 1;
if (!memcmp(p,"force", 5)) if (!memcmp(p,"force", 5))
force_mmu = 1; force_iommu = 1;
if (!memcmp(p,"noforce", 7)) if (!memcmp(p,"noforce", 7))
force_mmu = 0; force_iommu = 0;
if (!memcmp(p, "memaper", 7)) { if (!memcmp(p, "memaper", 7)) {
fallback_aper_force = 1; fallback_aper_force = 1;
p += 7; p += 7;
if (*p == '=' && get_option(&p, &arg)) if (*p == '=' && get_option(&p, &arg))
fallback_aper_order = arg; fallback_aper_order = arg;
} }
if (!memcmp(p, "panic", 5))
panic_on_overflow = 1;
if (!memcmp(p, "nopanic", 7))
panic_on_overflow = 0;
#ifdef CONFIG_IOMMU_LEAK #ifdef CONFIG_IOMMU_LEAK
if (!memcmp(p,"leak", 4)) { if (!memcmp(p,"leak", 4)) {
leak_trace = 1; leak_trace = 1;
......
...@@ -33,15 +33,30 @@ void pci_free_consistent(struct pci_dev *hwdev, size_t size, ...@@ -33,15 +33,30 @@ void pci_free_consistent(struct pci_dev *hwdev, size_t size,
free_pages((unsigned long)vaddr, get_order(size)); free_pages((unsigned long)vaddr, get_order(size));
} }
int pci_dma_supported(struct pci_dev *hwdev, u64 mask)
{
/*
* we fall back to GFP_DMA when the mask isn't all 1s,
* so we can't guarantee allocations that must be
* within a tighter range than GFP_DMA..
* RED-PEN this won't work for pci_map_single. Caller has to
* use GFP_DMA in the first place.
*/
if (mask < 0x00ffffff)
return 0;
return 1;
}
static void __init check_ram(void) EXPORT_SYMBOL(pci_dma_supported);
static int __init check_ram(void)
{ {
if (end_pfn >= 0xffffffff>>PAGE_SHIFT) { if (end_pfn >= 0xffffffff>>PAGE_SHIFT) {
printk(KERN_ERR "WARNING more than 4GB of memory but no IOMMU.\n" printk(KERN_ERR "WARNING more than 4GB of memory but no IOMMU.\n"
KERN_ERR "WARNING 32bit PCI may malfunction.\n"); KERN_ERR "WARNING 32bit PCI may malfunction.\n");
/* Could play with highmem_start_page here to trick some subsystems
into bounce buffers. Unfortunately that would require setting
CONFIG_HIGHMEM too.
*/
} }
return 0;
} }
__initcall(check_ram);
...@@ -131,14 +131,16 @@ void __init setup_per_cpu_areas(void) ...@@ -131,14 +131,16 @@ void __init setup_per_cpu_areas(void)
size = PERCPU_ENOUGH_ROOM; size = PERCPU_ENOUGH_ROOM;
#endif #endif
/* We don't support CPU hotplug, so only allocate as much as needed here */ for (i = 0; i < NR_CPUS; i++) {
unsigned char *ptr;
int maxi = max_t(unsigned, numnodes, num_online_cpus());
for (i = 0; i < maxi; i++) {
/* If possible allocate on the node of the CPU. /* If possible allocate on the node of the CPU.
In case it doesn't exist round-robin nodes. */ In case it doesn't exist round-robin nodes. */
unsigned char *ptr = alloc_bootmem_node(NODE_DATA(i % numnodes), size); if (!NODE_DATA(i % numnodes)) {
printk("cpu with no node %d, numnodes %d\n", i, numnodes);
ptr = alloc_bootmem(size);
} else {
ptr = alloc_bootmem_node(NODE_DATA(i % numnodes), size);
}
if (!ptr) if (!ptr)
panic("Cannot allocate cpu data for CPU %d\n", i); panic("Cannot allocate cpu data for CPU %d\n", i);
cpu_pda[i].data_offset = ptr - __per_cpu_start; cpu_pda[i].data_offset = ptr - __per_cpu_start;
...@@ -158,7 +160,6 @@ void pda_init(int cpu) ...@@ -158,7 +160,6 @@ void pda_init(int cpu)
pda->me = pda; pda->me = pda;
pda->cpunumber = cpu; pda->cpunumber = cpu;
pda->irqcount = -1; pda->irqcount = -1;
pda->data_offset = 0;
pda->kernelstack = pda->kernelstack =
(unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE; (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE;
pda->active_mm = &init_mm; pda->active_mm = &init_mm;
...@@ -170,14 +171,14 @@ void pda_init(int cpu) ...@@ -170,14 +171,14 @@ void pda_init(int cpu)
pda->irqstackptr = boot_cpu_stack; pda->irqstackptr = boot_cpu_stack;
level4 = init_level4_pgt; level4 = init_level4_pgt;
} else { } else {
level4 = (pml4_t *)__get_free_pages(GFP_ATOMIC, 0);
if (!level4)
panic("Cannot allocate top level page for cpu %d", cpu);
pda->irqstackptr = (char *) pda->irqstackptr = (char *)
__get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
if (!pda->irqstackptr) if (!pda->irqstackptr)
panic("cannot allocate irqstack for cpu %d\n", cpu); panic("cannot allocate irqstack for cpu %d", cpu);
level4 = (pml4_t *)__get_free_pages(GFP_ATOMIC, 0);
} }
if (!level4)
panic("Cannot allocate top level page for cpu %d", cpu);
pda->level4_pgt = (unsigned long *)level4; pda->level4_pgt = (unsigned long *)level4;
if (level4 != init_level4_pgt) if (level4 != init_level4_pgt)
......
...@@ -122,3 +122,17 @@ asmlinkage long wrap_sys_shmat(int shmid, char *shmaddr, int shmflg) ...@@ -122,3 +122,17 @@ asmlinkage long wrap_sys_shmat(int shmid, char *shmaddr, int shmflg)
unsigned long raddr; unsigned long raddr;
return sys_shmat(shmid,shmaddr,shmflg,&raddr) ?: (long)raddr; return sys_shmat(shmid,shmaddr,shmflg,&raddr) ?: (long)raddr;
} }
asmlinkage long sys_time64(long * tloc)
{
struct timeval now;
int i;
do_gettimeofday(&now);
i = now.tv_sec;
if (tloc) {
if (put_user(i,tloc))
i = -EFAULT;
}
return i;
}
...@@ -121,6 +121,7 @@ EXPORT_SYMBOL_NOVERS(__read_lock_failed); ...@@ -121,6 +121,7 @@ EXPORT_SYMBOL_NOVERS(__read_lock_failed);
EXPORT_SYMBOL(synchronize_irq); EXPORT_SYMBOL(synchronize_irq);
EXPORT_SYMBOL(smp_call_function); EXPORT_SYMBOL(smp_call_function);
EXPORT_SYMBOL(cpu_callout_map);
#endif #endif
#ifdef CONFIG_VT #ifdef CONFIG_VT
......
...@@ -47,6 +47,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) ...@@ -47,6 +47,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
struct node nodes[MAXNODE]; struct node nodes[MAXNODE];
int nodeid, i, nb; int nodeid, i, nb;
int found = 0; int found = 0;
int nmax;
nb = find_northbridge(); nb = find_northbridge();
if (nb < 0) if (nb < 0)
...@@ -54,22 +55,28 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) ...@@ -54,22 +55,28 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb); printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb);
numnodes = (1 << ((read_pci_config(0, nb, 0, 0x60 ) >> 4) & 3)); nmax = (1 << ((read_pci_config(0, nb, 0, 0x60 ) >> 4) & 3));
numnodes = nmax;
printk(KERN_INFO "Assuming %d nodes\n", numnodes - 1);
memset(&nodes,0,sizeof(nodes)); memset(&nodes,0,sizeof(nodes));
prevbase = 0; prevbase = 0;
for (i = 0; i < numnodes; i++) { for (i = 0; i < 8; i++) {
unsigned long base,limit; unsigned long base,limit;
base = read_pci_config(0, nb, 1, 0x40 + i*8); base = read_pci_config(0, nb, 1, 0x40 + i*8);
limit = read_pci_config(0, nb, 1, 0x44 + i*8); limit = read_pci_config(0, nb, 1, 0x44 + i*8);
nodeid = limit & 3; nodeid = limit & 3;
if ((base & 3) == 0) {
if (i < nmax)
printk("Skipping disabled node %d\n", i);
continue;
}
if (!limit) { if (!limit) {
printk(KERN_ERR "Skipping node entry %d (base %lx)\n", i, base); printk(KERN_INFO "Skipping node entry %d (base %lx)\n", i,
return -1; base);
continue;
} }
if ((base >> 8) & 3 || (limit >> 8) & 3) { if ((base >> 8) & 3 || (limit >> 8) & 3) {
printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n", printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n",
...@@ -77,7 +84,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) ...@@ -77,7 +84,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
return -1; return -1;
} }
if ((1UL << nodeid) & nodes_present) { if ((1UL << nodeid) & nodes_present) {
printk(KERN_INFO "Node %d already present. Skipping\n", nodeid); printk(KERN_INFO "Node %d already present. Skipping\n",
nodeid);
continue; continue;
} }
...@@ -104,7 +112,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) ...@@ -104,7 +112,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
if (limit < base) { if (limit < base) {
printk(KERN_ERR "Node %d bogus settings %lx-%lx.\n", printk(KERN_ERR "Node %d bogus settings %lx-%lx.\n",
nodeid, base, limit); nodeid, base, limit);
return -1; continue;
} }
/* Could sort here, but pun for now. Should not happen anyroads. */ /* Could sort here, but pun for now. Should not happen anyroads. */
...@@ -135,11 +143,26 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) ...@@ -135,11 +143,26 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
} }
printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift); printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift);
for (i = 0; i < numnodes; i++) { for (i = 0; i < MAXNODE; i++) {
if (nodes[i].start != nodes[i].end) if (nodes[i].start != nodes[i].end)
setup_node_bootmem(i, nodes[i].start, nodes[i].end); setup_node_bootmem(i, nodes[i].start, nodes[i].end);
} }
/* There are unfortunately some poorly designed mainboards around
that only connect memory to a single CPU. This breaks the 1:1 cpu->node
mapping. To avoid this fill in the mapping for all possible
CPUs, as the number of CPUs is not known yet.
We round robin the existing nodes. */
int rr = 0;
for (i = 0; i < MAXNODE; i++) {
if (nodes_present & (1UL<<i))
continue;
if ((nodes_present >> rr) == 0)
rr = 0;
rr = ffz(~nodes_present >> rr);
node_data[i] = node_data[rr];
rr++;
}
return 0; return 0;
} }
...@@ -26,8 +26,6 @@ static int numa_off __initdata; ...@@ -26,8 +26,6 @@ static int numa_off __initdata;
unsigned long nodes_present; unsigned long nodes_present;
static int emunodes __initdata;
int __init compute_hash_shift(struct node *nodes) int __init compute_hash_shift(struct node *nodes)
{ {
int i; int i;
...@@ -103,11 +101,8 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en ...@@ -103,11 +101,8 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en
reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size); reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT); reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT);
if (nodeid + 1 > numnodes) { if (nodeid + 1 > numnodes)
numnodes = nodeid + 1; numnodes = nodeid + 1;
printk(KERN_INFO
"setup_node_bootmem: enlarging numnodes to %d\n", numnodes);
}
nodes_present |= (1UL << nodeid); nodes_present |= (1UL << nodeid);
} }
...@@ -149,26 +144,6 @@ int __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) ...@@ -149,26 +144,6 @@ int __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
printk(KERN_INFO "%s\n", printk(KERN_INFO "%s\n",
numa_off ? "NUMA turned off" : "No NUMA configuration found"); numa_off ? "NUMA turned off" : "No NUMA configuration found");
if (!numa_off && emunodes > 0) {
struct node nodes[MAXNODE];
unsigned long nodesize = (end_pfn << PAGE_SHIFT) / emunodes;
int i;
if (emunodes > MAXNODE)
emunodes = MAXNODE;
memset(&nodes, 0, sizeof(nodes));
printk(KERN_INFO "Faking %d nodes of size %ld MB\n", emunodes, nodesize>>20);
for (i = 0; i < emunodes; i++) {
unsigned long end = (i+1)*nodesize;
if (i == emunodes-1)
end = end_pfn << PAGE_SHIFT;
nodes[i].start = i * nodesize;
nodes[i].end = end;
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
memnode_shift = compute_hash_shift(nodes);
return 0;
}
printk(KERN_INFO "Faking a node at %016lx-%016lx\n", printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
start_pfn << PAGE_SHIFT, start_pfn << PAGE_SHIFT,
end_pfn << PAGE_SHIFT); end_pfn << PAGE_SHIFT);
...@@ -176,6 +151,7 @@ int __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) ...@@ -176,6 +151,7 @@ int __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
fake_node = 1; fake_node = 1;
memnode_shift = 63; memnode_shift = 63;
memnodemap[0] = 0; memnodemap[0] = 0;
numnodes = 1;
setup_node_bootmem(0, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); setup_node_bootmem(0, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
return -1; return -1;
} }
...@@ -199,13 +175,10 @@ void __init paging_init(void) ...@@ -199,13 +175,10 @@ void __init paging_init(void)
} }
/* [numa=off] */ /* [numa=off] */
/* [numa=emunodes] */
__init int numa_setup(char *opt) __init int numa_setup(char *opt)
{ {
if (!strncmp(opt,"off",3)) if (!strncmp(opt,"off",3))
numa_off = 1; numa_off = 1;
if (isdigit(opt[0]))
emunodes = simple_strtoul(opt, NULL, 10);
return 1; return 1;
} }
......
...@@ -402,12 +402,12 @@ static inline void set_bit_string(unsigned long *bitmap, unsigned long i, ...@@ -402,12 +402,12 @@ static inline void set_bit_string(unsigned long *bitmap, unsigned long i,
} }
} }
static inline void clear_bit_string(unsigned long *bitmap, unsigned long i, static inline void __clear_bit_string(unsigned long *bitmap, unsigned long i,
int len) int len)
{ {
unsigned long end = i + len; unsigned long end = i + len;
while (i < end) { while (i < end) {
clear_bit(i, bitmap); __clear_bit(i, bitmap);
i++; i++;
} }
} }
......
...@@ -264,7 +264,20 @@ ...@@ -264,7 +264,20 @@
#define __NR_ia32_sys_epoll_wait 256 #define __NR_ia32_sys_epoll_wait 256
#define __NR_ia32_remap_file_pages 257 #define __NR_ia32_remap_file_pages 257
#define __NR_ia32_set_tid_address 258 #define __NR_ia32_set_tid_address 258
#define __NR_ia32_timer_create 259
#define __NR_ia32_timer_settime (__NR_ia32_timer_create+1)
#define __NR_ia32_timer_gettime (__NR_ia32_timer_create+2)
#define __NR_ia32_timer_getoverrun (__NR_ia32_timer_create+3)
#define __NR_ia32_timer_delete (__NR_ia32_timer_create+4)
#define __NR_ia32_clock_settime (__NR_ia32_timer_create+5)
#define __NR_ia32_clock_gettime (__NR_ia32_timer_create+6)
#define __NR_ia32_clock_getres (__NR_ia32_timer_create+7)
#define __NR_ia32_clock_nanosleep (__NR_ia32_timer_create+8)
#define __NR_ia32_statfs64 268
#define __NR_ia32_fstatfs64 269
#define __NR_ia32_tgkill 270
#define __NR_ia32_utimes 271
#define IA32_NR_syscalls 265 /* must be > than biggest syscall! */ #define IA32_NR_syscalls 275 /* must be > than biggest syscall! */
#endif /* _ASM_X86_64_IA32_UNISTD_H_ */ #endif /* _ASM_X86_64_IA32_UNISTD_H_ */
...@@ -301,6 +301,12 @@ static inline int isa_check_signature(unsigned long io_addr, ...@@ -301,6 +301,12 @@ static inline int isa_check_signature(unsigned long io_addr,
#define flush_write_buffers() #define flush_write_buffers()
/* Disable vmerge for now. Need to fix the block layer code
to check for non iommu addresses first.
When the IOMMU is force it is safe to enable. */
extern int force_iommu;
#define BIO_VERMGE_BOUNDARY (force_iommu ? 4096 : 0)
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif #endif
...@@ -48,6 +48,4 @@ static inline void unset_nmi_pm_callback(struct pm_dev * dev) ...@@ -48,6 +48,4 @@ static inline void unset_nmi_pm_callback(struct pm_dev * dev)
extern void default_do_nmi(struct pt_regs *); extern void default_do_nmi(struct pt_regs *);
extern void default_do_nmi(struct pt_regs *);
#endif /* ASM_NMI_H */ #endif /* ASM_NMI_H */
...@@ -14,7 +14,26 @@ static inline u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset) ...@@ -14,7 +14,26 @@ static inline u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset)
u32 v; u32 v;
outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
v = inl(0xcfc); v = inl(0xcfc);
PDprintk("%x reading from %x: %x\n", slot, offset, v); if (v != 0xffffffff)
PDprintk("%x reading 4 from %x: %x\n", slot, offset, v);
return v;
}
static inline u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset)
{
u8 v;
outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
v = inb(0xcfc + (offset&3));
PDprintk("%x reading 1 from %x: %x\n", slot, offset, v);
return v;
}
static inline u8 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset)
{
u16 v;
outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
v = inw(0xcfc + (offset&2));
PDprintk("%x reading 2 from %x: %x\n", slot, offset, v);
return v; return v;
} }
......
...@@ -8,9 +8,6 @@ ...@@ -8,9 +8,6 @@
#include <linux/mm.h> /* for struct page */ #include <linux/mm.h> /* for struct page */
extern dma_addr_t bad_dma_address;
/* Can be used to override the logic in pci_scan_bus for skipping /* Can be used to override the logic in pci_scan_bus for skipping
already-configured bus numbers - to be used for buggy BIOSes already-configured bus numbers - to be used for buggy BIOSes
or architectures with incomplete PCI setup by the loader */ or architectures with incomplete PCI setup by the loader */
...@@ -21,6 +18,8 @@ extern unsigned int pcibios_assign_all_busses(void); ...@@ -21,6 +18,8 @@ extern unsigned int pcibios_assign_all_busses(void);
#define pcibios_assign_all_busses() 0 #define pcibios_assign_all_busses() 0
#endif #endif
extern int no_iommu, force_iommu;
extern unsigned long pci_mem_start; extern unsigned long pci_mem_start;
#define PCIBIOS_MIN_IO 0x1000 #define PCIBIOS_MIN_IO 0x1000
#define PCIBIOS_MIN_MEM (pci_mem_start) #define PCIBIOS_MIN_MEM (pci_mem_start)
...@@ -46,6 +45,9 @@ struct pci_dev; ...@@ -46,6 +45,9 @@ struct pci_dev;
extern int iommu_setup(char *opt); extern int iommu_setup(char *opt);
extern dma_addr_t bad_dma_address;
#define pci_dma_error(x) ((x) == bad_dma_address)
/* Allocate and map kernel buffer using consistent mode DMA for a device. /* Allocate and map kernel buffer using consistent mode DMA for a device.
* hwdev should be valid struct pci_dev pointer for PCI devices, * hwdev should be valid struct pci_dev pointer for PCI devices,
* NULL for PCI-like buses (ISA, EISA). * NULL for PCI-like buses (ISA, EISA).
...@@ -119,10 +121,16 @@ static inline void pci_dma_sync_sg(struct pci_dev *hwdev, ...@@ -119,10 +121,16 @@ static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
/* The PCI address space does equal the physical memory /* The PCI address space does equal the physical memory
* address space. The networking and block device layers use * address space. The networking and block device layers use
* this boolean for bounce buffer decisions. * this boolean for bounce buffer decisions
*
* On AMD64 it mostly equals, but we set it to zero to tell some subsystems
* that an IOMMU is available.
*/ */
#define PCI_DMA_BUS_IS_PHYS (0) #define PCI_DMA_BUS_IS_PHYS (no_iommu ? 1 : 0)
/* We lie slightly when the IOMMU is forced to get the device to
use SAC instead of DAC. */
#define pci_dac_dma_supported(pci_dev, mask) (force_iommu ? 0 : 1)
#else #else
static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
...@@ -206,6 +214,7 @@ static inline void pci_dma_sync_sg(struct pci_dev *hwdev, ...@@ -206,6 +214,7 @@ static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
#define PCI_DMA_BUS_IS_PHYS 1 #define PCI_DMA_BUS_IS_PHYS 1
#define pci_dac_dma_supported(pci_dev, mask) 1
#endif #endif
extern int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, extern int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
...@@ -220,21 +229,7 @@ extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, ...@@ -220,21 +229,7 @@ extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
* only drive the low 24-bits during PCI bus mastering, then * only drive the low 24-bits during PCI bus mastering, then
* you would pass 0x00ffffff as the mask to this function. * you would pass 0x00ffffff as the mask to this function.
*/ */
static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) extern int pci_dma_supported(struct pci_dev *hwdev, u64 mask);
{
/*
* we fall back to GFP_DMA when the mask isn't all 1s,
* so we can't guarantee allocations that must be
* within a tighter range than GFP_DMA..
*/
if(mask < 0x00ffffff)
return 0;
return 1;
}
/* This is always fine. */
#define pci_dac_dma_supported(pci_dev, mask) (1)
static __inline__ dma64_addr_t static __inline__ dma64_addr_t
pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long offset, int direction) pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long offset, int direction)
......
#ifndef _ASM_X8664_PERCPU_H_ #ifndef _ASM_X8664_PERCPU_H_
#define _ASM_X8664_PERCPU_H_ #define _ASM_X8664_PERCPU_H_
#include <linux/compiler.h>
#include <asm/pda.h> /* Same as asm-generic/percpu.h, except that we store the per cpu offset
in the PDA. Longer term the PDA and every per cpu variable
should be just put into a single section and referenced directly
from %gs */
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* Same as the generic code except that we cache the per cpu offset
in the pda. This gives an 3 instruction reference for per cpu data */
#include <linux/compiler.h>
#include <asm/pda.h> #include <asm/pda.h>
#define __my_cpu_offset() read_pda(data_offset)
#define __per_cpu_offset(cpu) (cpu_pda[cpu].data_offset) #define __per_cpu_offset(cpu) (cpu_pda[cpu].data_offset)
#define __my_cpu_offset() read_pda(data_offset)
/* Separate out the type, so (int[3], foo) works. */ /* Separate out the type, so (int[3], foo) works. */
#define DEFINE_PER_CPU(type, name) \ #define DEFINE_PER_CPU(type, name) \
__attribute__((__section__(".data.percpu"))) __typeof__(type) name##__per_cpu __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
/* var is in discarded region: offset to particular copy we want */ /* var is in discarded region: offset to particular copy we want */
#define per_cpu(var, cpu) (*RELOC_HIDE(&var##__per_cpu, __per_cpu_offset(cpu))) #define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)))
#define __get_cpu_var(var) \ #define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()))
(*RELOC_HIDE(&var##__per_cpu, __my_cpu_offset()))
/* A macro to avoid #include hell... */
static inline void percpu_modcopy(void *pcpudst, const void *src, #define percpu_modcopy(pcpudst, src, size) \
unsigned long size) do { \
{ unsigned int __i; \
unsigned int i; for (__i = 0; __i < NR_CPUS; __i++) \
for (i = 0; i < NR_CPUS; i++) if (cpu_possible(__i)) \
if (cpu_possible(i)) memcpy((pcpudst)+__per_cpu_offset(__i), \
memcpy(pcpudst + __per_cpu_offset(i), src, size); (src), (size)); \
} } while (0)
extern void setup_per_cpu_areas(void);
#else /* ! SMP */ #else /* ! SMP */
#define DEFINE_PER_CPU(type, name) \ #define DEFINE_PER_CPU(type, name) \
__typeof__(type) name##__per_cpu __typeof__(type) per_cpu__##name
#define per_cpu(var, cpu) ((void)cpu, var##__per_cpu) #define per_cpu(var, cpu) ((void)cpu, per_cpu__##var)
#define __get_cpu_var(var) var##__per_cpu #define __get_cpu_var(var) per_cpu__##var
#endif /* SMP */ #endif /* SMP */
#define DECLARE_PER_CPU(type, name) extern __typeof__(type) name##__per_cpu #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(var##__per_cpu) #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(var##__per_cpu) #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
DECLARE_PER_CPU(struct x8664_pda, per_cpu_pda); extern void setup_per_cpu_areas(void);
#endif #endif /* _ASM_X8664_PERCPU_H_ */
...@@ -77,7 +77,7 @@ extern unsigned long end_pfn; ...@@ -77,7 +77,7 @@ extern unsigned long end_pfn;
extern unsigned long table_start, table_end; extern unsigned long table_start, table_end;
extern int exception_trace; extern int exception_trace;
extern int no_iommu, force_mmu; extern int force_iommu, no_iommu;
extern int using_apic_timer; extern int using_apic_timer;
extern int disable_apic; extern int disable_apic;
extern unsigned cpu_khz; extern unsigned cpu_khz;
......
#ifndef _X8664_SIGINFO_H #ifndef _X8664_SIGINFO_H
#define _X8664_SIGINFO_H #define _X8664_SIGINFO_H
#define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int))
#include <asm-generic/siginfo.h> #include <asm-generic/siginfo.h>
#endif #endif
...@@ -461,7 +461,7 @@ __SYSCALL(__NR_fremovexattr, sys_fremovexattr) ...@@ -461,7 +461,7 @@ __SYSCALL(__NR_fremovexattr, sys_fremovexattr)
#define __NR_tkill 200 #define __NR_tkill 200
__SYSCALL(__NR_tkill, sys_tkill) __SYSCALL(__NR_tkill, sys_tkill)
#define __NR_time 201 #define __NR_time 201
__SYSCALL(__NR_time, sys_time) __SYSCALL(__NR_time, sys_time64)
#define __NR_futex 202 #define __NR_futex 202
__SYSCALL(__NR_futex, sys_futex) __SYSCALL(__NR_futex, sys_futex)
#define __NR_sched_setaffinity 203 #define __NR_sched_setaffinity 203
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment