Commit 0b6ca82a authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86

* git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86: (32 commits)
  x86: cpa, strict range check in try_preserve_large_page()
  x86: cpa, enable CONFIG_DEBUG_PAGEALLOC on 64-bit
  x86: cpa, use page pool
  x86: introduce page pool in cpa
  x86: DEBUG_PAGEALLOC: enable after mem_init()
  brk: help text typo fix
  lguest: accept guest _PAGE_PWT page table entries
  x86 PM: update stale comments
  x86 PM: consolidate suspend and hibernation code
  x86 PM: rename 32-bit files in arch/x86/power
  x86 PM: move 64-bit hibernation files to arch/x86/power
  x86: trivial printk optimizations
  x86: fix early_ioremap pagetable ops
  x86: construct 32-bit boot time page tables in native format.
  x86, core: remove CONFIG_FORCED_INLINING
  x86: avoid unused variable warning in mm/init_64.c
  x86: fixup more paravirt fallout
  brk: document randomize_va_space and CONFIG_COMPAT_BRK (was Re:
  x86: fix sparse warnings in acpi/bus.c
  x86: fix sparse warning in topology.c
  ...
parents bfc1de0c fac84939
......@@ -111,15 +111,6 @@ Who: Christoph Hellwig <hch@lst.de>
---------------------------
What: CONFIG_FORCED_INLINING
When: June 2006
Why: Config option is there to see if gcc is good enough. (in january
2006). If it is, the behavior should just be the default. If it's not,
the option should just go away entirely.
Who: Arjan van de Ven
---------------------------
What: eepro100 network driver
When: January 2007
Why: replaced by the e100 driver
......
......@@ -41,6 +41,7 @@ show up in /proc/sys/kernel:
- pid_max
- powersave-nap [ PPC only ]
- printk
- randomize_va_space
- real-root-dev ==> Documentation/initrd.txt
- reboot-cmd [ SPARC only ]
- rtsig-max
......@@ -280,6 +281,34 @@ send before ratelimiting kicks in.
==============================================================
randomize-va-space:
This option can be used to select the type of process address
space randomization that is used in the system, for architectures
that support this feature.
0 - Turn the process address space randomization off by default.
1 - Make the addresses of mmap base, stack and VDSO page randomized.
This, among other things, implies that shared libraries will be
loaded to random addresses. Also for PIE-linked binaries, the location
of code start is randomized.
With heap randomization, the situation is a little bit more
complicated.
There a few legacy applications out there (such as some ancient
versions of libc.so.5 from 1996) that assume that brk area starts
just after the end of the code+bss. These applications break when
start of the brk area is randomized. There are however no known
non-legacy applications that would be broken this way, so for most
systems it is safe to choose full randomization. However there is
a CONFIG_COMPAT_BRK option for systems with ancient and/or broken
binaries, that makes heap non-randomized, but keeps all other
parts of process address space randomized if randomize_va_space
sysctl is turned on.
==============================================================
reboot-cmd: (Sparc only)
??? This seems to be a way to give an argument to the Sparc
......
......@@ -34,13 +34,9 @@ config DEBUG_STACK_USAGE
This option will slow down process creation somewhat.
comment "Page alloc debug is incompatible with Software Suspend on i386"
depends on DEBUG_KERNEL && HIBERNATION
depends on X86_32
config DEBUG_PAGEALLOC
bool "Debug page memory allocations"
depends on DEBUG_KERNEL && X86_32
depends on DEBUG_KERNEL
help
Unmap pages from the kernel linear mapping after free_pages().
This results in a large slowdown, but helps to find certain types
......
......@@ -191,8 +191,10 @@ drivers-$(CONFIG_PCI) += arch/x86/pci/
# must be linked after kernel/
drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
ifeq ($(CONFIG_X86_32),y)
# suspend and hibernation support
drivers-$(CONFIG_PM) += arch/x86/power/
ifeq ($(CONFIG_X86_32),y)
drivers-$(CONFIG_FB) += arch/x86/video/
endif
......
......@@ -33,8 +33,8 @@ static int skip_atoi(const char **s)
#define PLUS 4 /* show plus */
#define SPACE 8 /* space if plus */
#define LEFT 16 /* left justified */
#define SPECIAL 32 /* 0x */
#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */
#define SMALL 32 /* Must be 32 == 0x20 */
#define SPECIAL 64 /* 0x */
#define do_div(n,base) ({ \
int __res; \
......@@ -45,12 +45,16 @@ __res; })
static char *number(char *str, long num, int base, int size, int precision,
int type)
{
char c, sign, tmp[66];
const char *digits = "0123456789abcdefghijklmnopqrstuvwxyz";
/* we are called with base 8, 10 or 16, only, thus don't need "G..." */
static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
char tmp[66];
char c, sign, locase;
int i;
if (type & LARGE)
digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
/* locase = 0 or 0x20. ORing digits or letters with 'locase'
* produces same digits or (maybe lowercased) letters */
locase = (type & SMALL);
if (type & LEFT)
type &= ~ZEROPAD;
if (base < 2 || base > 36)
......@@ -81,7 +85,7 @@ static char *number(char *str, long num, int base, int size, int precision,
tmp[i++] = '0';
else
while (num != 0)
tmp[i++] = digits[do_div(num, base)];
tmp[i++] = (digits[do_div(num, base)] | locase);
if (i > precision)
precision = i;
size -= precision;
......@@ -95,7 +99,7 @@ static char *number(char *str, long num, int base, int size, int precision,
*str++ = '0';
else if (base == 16) {
*str++ = '0';
*str++ = digits[33];
*str++ = ('X' | locase);
}
}
if (!(type & LEFT))
......@@ -244,9 +248,9 @@ int vsprintf(char *buf, const char *fmt, va_list args)
base = 8;
break;
case 'X':
flags |= LARGE;
case 'x':
flags |= SMALL;
case 'X':
base = 16;
break;
......
......@@ -1421,7 +1421,6 @@ CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_VM is not set
# CONFIG_DEBUG_LIST is not set
# CONFIG_FRAME_POINTER is not set
# CONFIG_FORCED_INLINING is not set
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_LKDTM is not set
# CONFIG_FAULT_INJECTION is not set
......
......@@ -1346,7 +1346,6 @@ CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_VM is not set
# CONFIG_DEBUG_LIST is not set
# CONFIG_FRAME_POINTER is not set
# CONFIG_FORCED_INLINING is not set
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_LKDTM is not set
# CONFIG_FAULT_INJECTION is not set
......
......@@ -84,8 +84,6 @@ ifeq ($(CONFIG_X86_64),y)
obj-y += genapic_64.o genapic_flat_64.o
obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
obj-$(CONFIG_AUDIT) += audit_64.o
obj-$(CONFIG_PM) += suspend_64.o
obj-$(CONFIG_HIBERNATION) += suspend_asm_64.o
obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
......
......@@ -118,7 +118,7 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
{
return sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
}
/* Mutex protecting device creation against CPU hotplug */
......
......@@ -409,7 +409,8 @@ restore_nocheck_notrace:
RESTORE_REGS
addl $4, %esp # skip orig_eax/error_code
CFI_ADJUST_CFA_OFFSET -4
1: INTERRUPT_RETURN
ENTRY(irq_return)
INTERRUPT_RETURN
.section .fixup,"ax"
iret_exc:
pushl $0 # no error code
......@@ -418,7 +419,7 @@ iret_exc:
.previous
.section __ex_table,"a"
.align 4
.long 1b,iret_exc
.long irq_return,iret_exc
.previous
CFI_RESTORE_STATE
......@@ -865,20 +866,16 @@ nmi_espfix_stack:
RESTORE_REGS
lss 12+4(%esp), %esp # back to espfix stack
CFI_ADJUST_CFA_OFFSET -24
1: INTERRUPT_RETURN
jmp irq_return
CFI_ENDPROC
.section __ex_table,"a"
.align 4
.long 1b,iret_exc
.previous
KPROBE_END(nmi)
#ifdef CONFIG_PARAVIRT
ENTRY(native_iret)
1: iret
iret
.section __ex_table,"a"
.align 4
.long 1b,iret_exc
.long native_iret, iret_exc
.previous
END(native_iret)
......
......@@ -581,16 +581,24 @@ retint_restore_args: /* return to kernel space */
*/
TRACE_IRQS_IRETQ
restore_args:
RESTORE_ARGS 0,8,0
#ifdef CONFIG_PARAVIRT
RESTORE_ARGS 0,8,0
ENTRY(irq_return)
INTERRUPT_RETURN
#endif
.section __ex_table, "a"
.quad irq_return, bad_iret
.previous
#ifdef CONFIG_PARAVIRT
ENTRY(native_iret)
iretq
.section __ex_table,"a"
.quad native_iret, bad_iret
.previous
#endif
.section .fixup,"ax"
bad_iret:
/*
......@@ -804,7 +812,7 @@ paranoid_swapgs\trace:
SWAPGS_UNSAFE_STACK
paranoid_restore\trace:
RESTORE_ALL 8
INTERRUPT_RETURN
jmp irq_return
paranoid_userspace\trace:
GET_THREAD_INFO(%rcx)
movl threadinfo_flags(%rcx),%ebx
......@@ -919,7 +927,7 @@ error_kernelspace:
iret run with kernel gs again, so don't set the user space flag.
B stepping K8s sometimes report an truncated RIP for IRET
exceptions returning to compat mode. Check for these here too. */
leaq native_iret(%rip),%rbp
leaq irq_return(%rip),%rbp
cmpq %rbp,RIP(%rsp)
je error_swapgs
movl %ebp,%ebp /* zero extend */
......
......@@ -163,14 +163,11 @@ EXPORT_SYMBOL_GPL(geode_gpio_setup_event);
static int __init geode_southbridge_init(void)
{
int timers;
if (!is_geode())
return -ENODEV;
init_lbars();
timers = geode_mfgpt_detect();
printk(KERN_INFO "geode: %d MFGPT timers available.\n", timers);
(void) mfgpt_timer_setup();
return 0;
}
......
......@@ -19,6 +19,10 @@
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
#include <asm/setup.h>
#include <asm/processor-flags.h>
/* Physical address */
#define pa(X) ((X) - __PAGE_OFFSET)
/*
* References to members of the new_cpu_data structure.
......@@ -80,10 +84,6 @@ INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_
*/
.section .text.head,"ax",@progbits
ENTRY(startup_32)
/* check to see if KEEP_SEGMENTS flag is meaningful */
cmpw $0x207, BP_version(%esi)
jb 1f
/* test KEEP_SEGMENTS flag to see if the bootloader is asking
us to not reload segments */
testb $(1<<6), BP_loadflags(%esi)
......@@ -92,7 +92,7 @@ ENTRY(startup_32)
/*
* Set segments to known values.
*/
1: lgdt boot_gdt_descr - __PAGE_OFFSET
lgdt pa(boot_gdt_descr)
movl $(__BOOT_DS),%eax
movl %eax,%ds
movl %eax,%es
......@@ -105,8 +105,8 @@ ENTRY(startup_32)
*/
cld
xorl %eax,%eax
movl $__bss_start - __PAGE_OFFSET,%edi
movl $__bss_stop - __PAGE_OFFSET,%ecx
movl $pa(__bss_start),%edi
movl $pa(__bss_stop),%ecx
subl %edi,%ecx
shrl $2,%ecx
rep ; stosl
......@@ -118,31 +118,32 @@ ENTRY(startup_32)
* (kexec on panic case). Hence copy out the parameters before initializing
* page tables.
*/
movl $(boot_params - __PAGE_OFFSET),%edi
movl $pa(boot_params),%edi
movl $(PARAM_SIZE/4),%ecx
cld
rep
movsl
movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi
movl pa(boot_params) + NEW_CL_POINTER,%esi
andl %esi,%esi
jz 1f # No comand line
movl $(boot_command_line - __PAGE_OFFSET),%edi
movl $pa(boot_command_line),%edi
movl $(COMMAND_LINE_SIZE/4),%ecx
rep
movsl
1:
#ifdef CONFIG_PARAVIRT
cmpw $0x207, (boot_params + BP_version - __PAGE_OFFSET)
/* This is can only trip for a broken bootloader... */
cmpw $0x207, pa(boot_params + BP_version)
jb default_entry
/* Paravirt-compatible boot parameters. Look to see what architecture
we're booting under. */
movl (boot_params + BP_hardware_subarch - __PAGE_OFFSET), %eax
movl pa(boot_params + BP_hardware_subarch), %eax
cmpl $num_subarch_entries, %eax
jae bad_subarch
movl subarch_entries - __PAGE_OFFSET(,%eax,4), %eax
movl pa(subarch_entries)(,%eax,4), %eax
subl $__PAGE_OFFSET, %eax
jmp *%eax
......@@ -170,17 +171,68 @@ num_subarch_entries = (. - subarch_entries) / 4
* Mappings are created both at virtual address 0 (identity mapping)
* and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
*
* Warning: don't use %esi or the stack in this code. However, %esp
* can be used as a GPR if you really need it...
* Note that the stack is not yet set up!
*/
page_pde_offset = (__PAGE_OFFSET >> 20);
#define PTE_ATTR 0x007 /* PRESENT+RW+USER */
#define PDE_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */
#define PGD_ATTR 0x001 /* PRESENT (no other attributes) */
default_entry:
movl $(pg0 - __PAGE_OFFSET), %edi
movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */
#ifdef CONFIG_X86_PAE
/*
* In PAE mode swapper_pg_dir is statically defined to contain enough
* entries to cover the VMSPLIT option (that is the top 1, 2 or 3
* entries). The identity mapping is handled by pointing two PGD
* entries to the first kernel PMD.
*
* Note the upper half of each PMD or PTE are always zero at
* this stage.
*/
#define KPMDS ((0x100000000-__PAGE_OFFSET) >> 30) /* Number of kernel PMDs */
xorl %ebx,%ebx /* %ebx is kept at zero */
movl $pa(pg0), %edi
movl $pa(swapper_pg_pmd), %edx
movl $PTE_ATTR, %eax
10:
leal PDE_ATTR(%edi),%ecx /* Create PMD entry */
movl %ecx,(%edx) /* Store PMD entry */
/* Upper half already zero */
addl $8,%edx
movl $512,%ecx
11:
stosl
xchgl %eax,%ebx
stosl
xchgl %eax,%ebx
addl $0x1000,%eax
loop 11b
/*
* End condition: we must map up to and including INIT_MAP_BEYOND_END
* bytes beyond the end of our own page tables.
*/
leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp
cmpl %ebp,%eax
jb 10b
1:
movl %edi,pa(init_pg_tables_end)
/* Do early initialization of the fixmap area */
movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8)
#else /* Not PAE */
page_pde_offset = (__PAGE_OFFSET >> 20);
movl $pa(pg0), %edi
movl $pa(swapper_pg_dir), %edx
movl $PTE_ATTR, %eax
10:
leal 0x007(%edi),%ecx /* Create PDE entry */
leal PDE_ATTR(%edi),%ecx /* Create PDE entry */
movl %ecx,(%edx) /* Store identity PDE entry */
movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
addl $4,%edx
......@@ -189,19 +241,20 @@ default_entry:
stosl
addl $0x1000,%eax
loop 11b
/* End condition: we must map up to and including INIT_MAP_BEYOND_END */
/* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */
leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp
/*
* End condition: we must map up to and including INIT_MAP_BEYOND_END
* bytes beyond the end of our own page tables; the +0x007 is
* the attribute bits
*/
leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp
cmpl %ebp,%eax
jb 10b
movl %edi,(init_pg_tables_end - __PAGE_OFFSET)
/* Do an early initialization of the fixmap area */
movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
movl $(swapper_pg_pmd - __PAGE_OFFSET), %eax
addl $0x67, %eax /* 0x67 == _PAGE_TABLE */
movl %eax, 4092(%edx)
movl %edi,pa(init_pg_tables_end)
/* Do early initialization of the fixmap area */
movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
movl %eax,pa(swapper_pg_dir+0xffc)
#endif
jmp 3f
/*
* Non-boot CPU entry point; entered from trampoline.S
......@@ -241,7 +294,7 @@ ENTRY(startup_32_smp)
* NOTE! We have to correct for the fact that we're
* not yet offset PAGE_OFFSET..
*/
#define cr4_bits mmu_cr4_features-__PAGE_OFFSET
#define cr4_bits pa(mmu_cr4_features)
movl cr4_bits,%edx
andl %edx,%edx
jz 6f
......@@ -276,10 +329,10 @@ ENTRY(startup_32_smp)
/*
* Enable paging
*/
movl $swapper_pg_dir-__PAGE_OFFSET,%eax
movl $pa(swapper_pg_dir),%eax
movl %eax,%cr3 /* set the page table pointer.. */
movl %cr0,%eax
orl $0x80000000,%eax
orl $X86_CR0_PG,%eax
movl %eax,%cr0 /* ..and set paging (PG) bit */
ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */
1:
......@@ -552,16 +605,44 @@ ENTRY(_stext)
*/
.section ".bss.page_aligned","wa"
.align PAGE_SIZE_asm
#ifdef CONFIG_X86_PAE
ENTRY(swapper_pg_pmd)
.fill 1024*KPMDS,4,0
#else
ENTRY(swapper_pg_dir)
.fill 1024,4,0
ENTRY(swapper_pg_pmd)
#endif
ENTRY(swapper_pg_fixmap)
.fill 1024,4,0
ENTRY(empty_zero_page)
.fill 4096,1,0
/*
* This starts the data section.
*/
#ifdef CONFIG_X86_PAE
.section ".data.page_aligned","wa"
/* Page-aligned for the benefit of paravirt? */
.align PAGE_SIZE_asm
ENTRY(swapper_pg_dir)
.long pa(swapper_pg_pmd+PGD_ATTR),0 /* low identity map */
# if KPMDS == 3
.long pa(swapper_pg_pmd+PGD_ATTR),0
.long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0
.long pa(swapper_pg_pmd+PGD_ATTR+0x2000),0
# elif KPMDS == 2
.long 0,0
.long pa(swapper_pg_pmd+PGD_ATTR),0
.long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0
# elif KPMDS == 1
.long 0,0
.long 0,0
.long pa(swapper_pg_pmd+PGD_ATTR),0
# else
# error "Kernel PMDs should be 1, 2 or 3"
# endif
.align PAGE_SIZE_asm /* needs to be page-sized too */
#endif
.data
ENTRY(stack_start)
.long init_thread_union+THREAD_SIZE
......
......@@ -12,48 +12,37 @@
*/
/*
* We are using the 32Khz input clock - its the only one that has the
* We are using the 32.768kHz input clock - it's the only one that has the
* ranges we find desirable. The following table lists the suitable
* divisors and the associated hz, minimum interval
* and the maximum interval:
* divisors and the associated Hz, minimum interval and the maximum interval:
*
* Divisor Hz Min Delta (S) Max Delta (S)
* 1 32000 .0005 2.048
* 2 16000 .001 4.096
* 4 8000 .002 8.192
* 8 4000 .004 16.384
* 16 2000 .008 32.768
* 32 1000 .016 65.536
* 64 500 .032 131.072
* 128 250 .064 262.144
* 256 125 .128 524.288
* Divisor Hz Min Delta (s) Max Delta (s)
* 1 32768 .00048828125 2.000
* 2 16384 .0009765625 4.000
* 4 8192 .001953125 8.000
* 8 4096 .00390625 16.000
* 16 2048 .0078125 32.000
* 32 1024 .015625 64.000
* 64 512 .03125 128.000
* 128 256 .0625 256.000
* 256 128 .125 512.000
*/
#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <asm/geode.h>
#define F_AVAIL 0x01
static struct mfgpt_timer_t {
int flags;
struct module *owner;
unsigned int avail:1;
} mfgpt_timers[MFGPT_MAX_TIMERS];
/* Selected from the table above */
#define MFGPT_DIVISOR 16
#define MFGPT_SCALE 4 /* divisor = 2^(scale) */
#define MFGPT_HZ (32000 / MFGPT_DIVISOR)
#define MFGPT_HZ (32768 / MFGPT_DIVISOR)
#define MFGPT_PERIODIC (MFGPT_HZ / HZ)
#ifdef CONFIG_GEODE_MFGPT_TIMER
static int __init mfgpt_timer_setup(void);
#else
#define mfgpt_timer_setup() (0)
#endif
/* Allow for disabling of MFGPTs */
static int disable;
static int __init mfgpt_disable(char *s)
......@@ -85,28 +74,37 @@ __setup("mfgptfix", mfgpt_fix);
* In other cases (such as with VSAless OpenFirmware), the system firmware
* leaves timers available for us to use.
*/
int __init geode_mfgpt_detect(void)
static int timers = -1;
static void geode_mfgpt_detect(void)
{
int count = 0, i;
int i;
u16 val;
timers = 0;
if (disable) {
printk(KERN_INFO "geode-mfgpt: Skipping MFGPT setup\n");
return 0;
printk(KERN_INFO "geode-mfgpt: MFGPT support is disabled\n");
goto done;
}
if (!geode_get_dev_base(GEODE_DEV_MFGPT)) {
printk(KERN_INFO "geode-mfgpt: MFGPT LBAR is not set up\n");
goto done;
}
for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
val = geode_mfgpt_read(i, MFGPT_REG_SETUP);
if (!(val & MFGPT_SETUP_SETUP)) {
mfgpt_timers[i].flags = F_AVAIL;
count++;
mfgpt_timers[i].avail = 1;
timers++;
}
}
/* set up clock event device, if desired */
i = mfgpt_timer_setup();
return count;
done:
printk(KERN_INFO "geode-mfgpt: %d MFGPT timers available.\n", timers);
}
int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable)
......@@ -183,36 +181,41 @@ int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable)
return 0;
}
static int mfgpt_get(int timer, struct module *owner)
static int mfgpt_get(int timer)
{
mfgpt_timers[timer].flags &= ~F_AVAIL;
mfgpt_timers[timer].owner = owner;
mfgpt_timers[timer].avail = 0;
printk(KERN_INFO "geode-mfgpt: Registered timer %d\n", timer);
return timer;
}
int geode_mfgpt_alloc_timer(int timer, int domain, struct module *owner)
int geode_mfgpt_alloc_timer(int timer, int domain)
{
int i;
if (!geode_get_dev_base(GEODE_DEV_MFGPT))
return -ENODEV;
if (timers == -1) {
/* timers haven't been detected yet */
geode_mfgpt_detect();
}
if (!timers)
return -1;
if (timer >= MFGPT_MAX_TIMERS)
return -EIO;
return -1;
if (timer < 0) {
/* Try to find an available timer */
for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
if (mfgpt_timers[i].flags & F_AVAIL)
return mfgpt_get(i, owner);
if (mfgpt_timers[i].avail)
return mfgpt_get(i);
if (i == 5 && domain == MFGPT_DOMAIN_WORKING)
break;
}
} else {
/* If they requested a specific timer, try to honor that */
if (mfgpt_timers[timer].flags & F_AVAIL)
return mfgpt_get(timer, owner);
if (mfgpt_timers[timer].avail)
return mfgpt_get(timer);
}
/* No timers available - too bad */
......@@ -244,10 +247,11 @@ static int __init mfgpt_setup(char *str)
}
__setup("mfgpt_irq=", mfgpt_setup);
static inline void mfgpt_disable_timer(u16 clock)
static void mfgpt_disable_timer(u16 clock)
{
u16 val = geode_mfgpt_read(clock, MFGPT_REG_SETUP);
geode_mfgpt_write(clock, MFGPT_REG_SETUP, val & ~MFGPT_SETUP_CNTEN);
/* avoid races by clearing CMP1 and CMP2 unconditionally */
geode_mfgpt_write(clock, MFGPT_REG_SETUP, (u16) ~MFGPT_SETUP_CNTEN |
MFGPT_SETUP_CMP1 | MFGPT_SETUP_CMP2);
}
static int mfgpt_next_event(unsigned long, struct clock_event_device *);
......@@ -263,7 +267,7 @@ static struct clock_event_device mfgpt_clockevent = {
.shift = 32
};
static inline void mfgpt_start_timer(u16 clock, u16 delta)
static void mfgpt_start_timer(u16 delta)
{
geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_CMP2, (u16) delta);
geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0);
......@@ -278,21 +282,25 @@ static void mfgpt_set_mode(enum clock_event_mode mode,
mfgpt_disable_timer(mfgpt_event_clock);
if (mode == CLOCK_EVT_MODE_PERIODIC)
mfgpt_start_timer(mfgpt_event_clock, MFGPT_PERIODIC);
mfgpt_start_timer(MFGPT_PERIODIC);
mfgpt_tick_mode = mode;
}
static int mfgpt_next_event(unsigned long delta, struct clock_event_device *evt)
{
mfgpt_start_timer(mfgpt_event_clock, delta);
mfgpt_start_timer(delta);
return 0;
}
/* Assume (foolishly?), that this interrupt was due to our tick */
static irqreturn_t mfgpt_tick(int irq, void *dev_id)
{
u16 val = geode_mfgpt_read(mfgpt_event_clock, MFGPT_REG_SETUP);
/* See if the interrupt was for us */
if (!(val & (MFGPT_SETUP_SETUP | MFGPT_SETUP_CMP2 | MFGPT_SETUP_CMP1)))
return IRQ_NONE;
/* Turn off the clock (and clear the event) */
mfgpt_disable_timer(mfgpt_event_clock);
......@@ -320,13 +328,12 @@ static struct irqaction mfgptirq = {
.name = "mfgpt-timer"
};
static int __init mfgpt_timer_setup(void)
int __init mfgpt_timer_setup(void)
{
int timer, ret;
u16 val;
timer = geode_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING,
THIS_MODULE);
timer = geode_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING);
if (timer < 0) {
printk(KERN_ERR
"mfgpt-timer: Could not allocate a MFPGT timer\n");
......@@ -363,7 +370,7 @@ static int __init mfgpt_timer_setup(void)
&mfgpt_clockevent);
printk(KERN_INFO
"mfgpt-timer: registering the MFGT timer as a clock event.\n");
"mfgpt-timer: registering the MFGPT timer as a clock event.\n");
clockevents_register_device(&mfgpt_clockevent);
return 0;
......
......@@ -154,7 +154,11 @@ struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
EXPORT_SYMBOL(boot_cpu_data);
#ifndef CONFIG_X86_PAE
unsigned long mmu_cr4_features;
#else
unsigned long mmu_cr4_features = X86_CR4_PAE;
#endif
/* for MCA, but anyone else can use it if they want */
unsigned int machine_id;
......
......@@ -53,7 +53,7 @@ EXPORT_SYMBOL(arch_register_cpu);
void arch_unregister_cpu(int num)
{
return unregister_cpu(&per_cpu(cpu_devices, num).cpu);
unregister_cpu(&per_cpu(cpu_devices, num).cpu);
}
EXPORT_SYMBOL(arch_unregister_cpu);
#else
......
......@@ -46,6 +46,7 @@
#include <asm/pgalloc.h>
#include <asm/sections.h>
#include <asm/paravirt.h>
#include <asm/setup.h>
unsigned int __VMALLOC_RESERVE = 128 << 20;
......@@ -328,44 +329,38 @@ pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
void __init native_pagetable_setup_start(pgd_t *base)
{
#ifdef CONFIG_X86_PAE
int i;
unsigned long pfn, va;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
/*
* Init entries of the first-level page table to the
* zero page, if they haven't already been set up.
*
* In a normal native boot, we'll be running on a
* pagetable rooted in swapper_pg_dir, but not in PAE
* mode, so this will end up clobbering the mappings
* for the lower 24Mbytes of the address space,
* without affecting the kernel address space.
* Remove any mappings which extend past the end of physical
* memory from the boot time page table:
*/
for (i = 0; i < USER_PTRS_PER_PGD; i++)
set_pgd(&base[i],
__pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
/* Make sure kernel address space is empty so that a pagetable
will be allocated for it. */
memset(&base[USER_PTRS_PER_PGD], 0,
KERNEL_PGD_PTRS * sizeof(pgd_t));
#else
for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) {
va = PAGE_OFFSET + (pfn<<PAGE_SHIFT);
pgd = base + pgd_index(va);
if (!pgd_present(*pgd))
break;
pud = pud_offset(pgd, va);
pmd = pmd_offset(pud, va);
if (!pmd_present(*pmd))
break;
pte = pte_offset_kernel(pmd, va);
if (!pte_present(*pte))
break;
pte_clear(NULL, va, pte);
}
paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT);
#endif
}
void __init native_pagetable_setup_done(pgd_t *base)
{
#ifdef CONFIG_X86_PAE
/*
* Add low memory identity-mappings - SMP needs it when
* starting up on an AP from real-mode. In the non-PAE
* case we already have these mappings through head.S.
* All user-space mappings are explicitly cleared after
* SMP startup.
*/
set_pgd(&base[0], base[USER_PTRS_PER_PGD]);
#endif
}
/*
......@@ -374,9 +369,8 @@ void __init native_pagetable_setup_done(pgd_t *base)
* the boot process.
*
* If we're booting on native hardware, this will be a pagetable
* constructed in arch/i386/kernel/head.S, and not running in PAE mode
* (even if we'll end up running in PAE). The root of the pagetable
* will be swapper_pg_dir.
* constructed in arch/x86/kernel/head_32.S. The root of the
* pagetable will be swapper_pg_dir.
*
* If we're booting paravirtualized under a hypervisor, then there are
* more options: we may already be running PAE, and the pagetable may
......@@ -537,14 +531,6 @@ void __init paging_init(void)
load_cr3(swapper_pg_dir);
#ifdef CONFIG_X86_PAE
/*
* We will bail out later - printk doesn't work right now so
* the user would just see a hanging kernel.
*/
if (cpu_has_pae)
set_in_cr4(X86_CR4_PAE);
#endif
__flush_tlb_all();
kmap_init();
......@@ -675,13 +661,11 @@ void __init mem_init(void)
BUG_ON((unsigned long)high_memory > VMALLOC_START);
#endif /* double-sanity-check paranoia */
#ifdef CONFIG_X86_PAE
if (!cpu_has_pae)
panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
#endif
if (boot_cpu_data.wp_works_ok < 0)
test_wp_bit();
cpa_init();
/*
* Subtle. SMP is doing it's boot stuff late (because it has to
* fork idle threads) - but it also needs low mappings for the
......
......@@ -528,13 +528,15 @@ void __init mem_init(void)
reservedpages << (PAGE_SHIFT-10),
datasize >> 10,
initsize >> 10);
cpa_init();
}
void free_init_pages(char *what, unsigned long begin, unsigned long end)
{
unsigned long addr;
unsigned long addr = begin;
if (begin >= end)
if (addr >= end)
return;
/*
......@@ -549,7 +551,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
#else
printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
for (addr = begin; addr < end; addr += PAGE_SIZE) {
for (; addr < end; addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
init_page_count(virt_to_page(addr));
memset((void *)(addr & ~(PAGE_SIZE-1)),
......
......@@ -260,41 +260,46 @@ static int __init early_ioremap_debug_setup(char *str)
early_param("early_ioremap_debug", early_ioremap_debug_setup);
static __initdata int after_paging_init;
static __initdata unsigned long bm_pte[1024]
static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
__attribute__((aligned(PAGE_SIZE)));
static inline unsigned long * __init early_ioremap_pgd(unsigned long addr)
static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
{
return (unsigned long *)swapper_pg_dir + ((addr >> 22) & 1023);
pgd_t *pgd = &swapper_pg_dir[pgd_index(addr)];
pud_t *pud = pud_offset(pgd, addr);
pmd_t *pmd = pmd_offset(pud, addr);
return pmd;
}
static inline unsigned long * __init early_ioremap_pte(unsigned long addr)
static inline pte_t * __init early_ioremap_pte(unsigned long addr)
{
return bm_pte + ((addr >> PAGE_SHIFT) & 1023);
return &bm_pte[pte_index(addr)];
}
void __init early_ioremap_init(void)
{
unsigned long *pgd;
pmd_t *pmd;
if (early_ioremap_debug)
printk(KERN_INFO "early_ioremap_init()\n");
pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
*pgd = __pa(bm_pte) | _PAGE_TABLE;
pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
memset(bm_pte, 0, sizeof(bm_pte));
pmd_populate_kernel(&init_mm, pmd, bm_pte);
/*
* The boot-ioremap range spans multiple pgds, for which
* The boot-ioremap range spans multiple pmds, for which
* we are not prepared:
*/
if (pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))) {
if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
WARN_ON(1);
printk(KERN_WARNING "pgd %p != %p\n",
pgd, early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END)));
printk(KERN_WARNING "pmd %p != %p\n",
pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
fix_to_virt(FIX_BTMAP_BEGIN));
fix_to_virt(FIX_BTMAP_BEGIN));
printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END): %08lx\n",
fix_to_virt(FIX_BTMAP_END));
fix_to_virt(FIX_BTMAP_END));
printk(KERN_WARNING "FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
printk(KERN_WARNING "FIX_BTMAP_BEGIN: %d\n",
......@@ -304,28 +309,29 @@ void __init early_ioremap_init(void)
void __init early_ioremap_clear(void)
{
unsigned long *pgd;
pmd_t *pmd;
if (early_ioremap_debug)
printk(KERN_INFO "early_ioremap_clear()\n");
pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
*pgd = 0;
paravirt_release_pt(__pa(pgd) >> PAGE_SHIFT);
pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
pmd_clear(pmd);
paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT);
__flush_tlb_all();
}
void __init early_ioremap_reset(void)
{
enum fixed_addresses idx;
unsigned long *pte, phys, addr;
unsigned long addr, phys;
pte_t *pte;
after_paging_init = 1;
for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) {
addr = fix_to_virt(idx);
pte = early_ioremap_pte(addr);
if (*pte & _PAGE_PRESENT) {
phys = *pte & PAGE_MASK;
if (pte_present(*pte)) {
phys = pte_val(*pte) & PAGE_MASK;
set_fixmap(idx, phys);
}
}
......@@ -334,7 +340,8 @@ void __init early_ioremap_reset(void)
static void __init __early_set_fixmap(enum fixed_addresses idx,
unsigned long phys, pgprot_t flags)
{
unsigned long *pte, addr = __fix_to_virt(idx);
unsigned long addr = __fix_to_virt(idx);
pte_t *pte;
if (idx >= __end_of_fixed_addresses) {
BUG();
......@@ -342,9 +349,9 @@ static void __init __early_set_fixmap(enum fixed_addresses idx,
}
pte = early_ioremap_pte(addr);
if (pgprot_val(flags))
*pte = (phys & PAGE_MASK) | pgprot_val(flags);
set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
else
*pte = 0;
pte_clear(NULL, addr, pte);
__flush_tlb_one(addr);
}
......
......@@ -8,6 +8,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <asm/e820.h>
#include <asm/processor.h>
......@@ -191,7 +192,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
* or when the present bit is not set. Otherwise we would return a
* pointer to a nonexisting mapping.
*/
pte_t *lookup_address(unsigned long address, int *level)
pte_t *lookup_address(unsigned long address, unsigned int *level)
{
pgd_t *pgd = pgd_offset_k(address);
pud_t *pud;
......@@ -252,10 +253,11 @@ static int
try_preserve_large_page(pte_t *kpte, unsigned long address,
struct cpa_data *cpa)
{
unsigned long nextpage_addr, numpages, pmask, psize, flags;
unsigned long nextpage_addr, numpages, pmask, psize, flags, addr;
pte_t new_pte, old_pte, *tmp;
pgprot_t old_prot, new_prot;
int level, do_split = 1;
int i, do_split = 1;
unsigned int level;
spin_lock_irqsave(&pgd_lock, flags);
/*
......@@ -301,6 +303,19 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
new_prot = static_protections(new_prot, address);
/*
* We need to check the full range, whether
* static_protection() requires a different pgprot for one of
* the pages in the range we try to preserve:
*/
addr = address + PAGE_SIZE;
for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE) {
pgprot_t chk_prot = static_protections(new_prot, addr);
if (pgprot_val(chk_prot) != pgprot_val(new_prot))
goto out_unlock;
}
/*
* If there are no changes, return. maxpages has been updated
* above:
......@@ -335,23 +350,103 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
return do_split;
}
static LIST_HEAD(page_pool);
static unsigned long pool_size, pool_pages, pool_low;
static unsigned long pool_used, pool_failed, pool_refill;
static void cpa_fill_pool(void)
{
struct page *p;
gfp_t gfp = GFP_KERNEL;
/* Do not allocate from interrupt context */
if (in_irq() || irqs_disabled())
return;
/*
* Check unlocked. I does not matter when we have one more
* page in the pool. The bit lock avoids recursive pool
* allocations:
*/
if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill))
return;
#ifdef CONFIG_DEBUG_PAGEALLOC
/*
* We could do:
* gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL;
* but this fails on !PREEMPT kernels
*/
gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
#endif
while (pool_pages < pool_size) {
p = alloc_pages(gfp, 0);
if (!p) {
pool_failed++;
break;
}
spin_lock_irq(&pgd_lock);
list_add(&p->lru, &page_pool);
pool_pages++;
spin_unlock_irq(&pgd_lock);
}
clear_bit_unlock(0, &pool_refill);
}
#define SHIFT_MB (20 - PAGE_SHIFT)
#define ROUND_MB_GB ((1 << 10) - 1)
#define SHIFT_MB_GB 10
#define POOL_PAGES_PER_GB 16
void __init cpa_init(void)
{
struct sysinfo si;
unsigned long gb;
si_meminfo(&si);
/*
* Calculate the number of pool pages:
*
* Convert totalram (nr of pages) to MiB and round to the next
* GiB. Shift MiB to Gib and multiply the result by
* POOL_PAGES_PER_GB:
*/
gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
pool_size = POOL_PAGES_PER_GB * gb;
pool_low = pool_size;
cpa_fill_pool();
printk(KERN_DEBUG
"CPA: page pool initialized %lu of %lu pages preallocated\n",
pool_pages, pool_size);
}
static int split_large_page(pte_t *kpte, unsigned long address)
{
unsigned long flags, pfn, pfninc = 1;
gfp_t gfp_flags = GFP_KERNEL;
unsigned int i, level;
pte_t *pbase, *tmp;
pgprot_t ref_prot;
struct page *base;
#ifdef CONFIG_DEBUG_PAGEALLOC
gfp_flags = GFP_ATOMIC | __GFP_NOWARN;
#endif
base = alloc_pages(gfp_flags, 0);
if (!base)
/*
* Get a page from the pool. The pool list is protected by the
* pgd_lock, which we have to take anyway for the split
* operation:
*/
spin_lock_irqsave(&pgd_lock, flags);
if (list_empty(&page_pool)) {
spin_unlock_irqrestore(&pgd_lock, flags);
return -ENOMEM;
}
base = list_first_entry(&page_pool, struct page, lru);
list_del(&base->lru);
pool_pages--;
if (pool_pages < pool_low)
pool_low = pool_pages;
spin_lock_irqsave(&pgd_lock, flags);
/*
* Check for races, another CPU might have split this page
* up for us already:
......@@ -396,17 +491,24 @@ static int split_large_page(pte_t *kpte, unsigned long address)
base = NULL;
out_unlock:
/*
* If we dropped out via the lookup_address check under
* pgd_lock then stick the page back into the pool:
*/
if (base) {
list_add(&base->lru, &page_pool);
pool_pages++;
} else
pool_used++;
spin_unlock_irqrestore(&pgd_lock, flags);
if (base)
__free_pages(base, 0);
return 0;
}
static int __change_page_attr(unsigned long address, struct cpa_data *cpa)
{
int level, do_split, err;
int do_split, err;
unsigned int level;
struct page *kpte_page;
pte_t *kpte;
......@@ -598,7 +700,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
* Check whether we really changed something:
*/
if (!cpa.flushtlb)
return ret;
goto out;
/*
* No need to flush, when we did not set any of the caching
......@@ -617,6 +719,8 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
else
cpa_flush_all(cache);
out:
cpa_fill_pool();
return ret;
}
......@@ -770,6 +874,12 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
* but that can deadlock->flush only current cpu:
*/
__flush_tlb_all();
/*
* Try to refill the page pool here. We can do this only after
* the tlb flush.
*/
cpa_fill_pool();
}
#endif
......
obj-$(CONFIG_PM) += cpu.o
obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o
obj-$(CONFIG_PM_SLEEP) += cpu_$(BITS).o
obj-$(CONFIG_HIBERNATION) += hibernate_$(BITS).o hibernate_asm_$(BITS).o
......@@ -40,7 +40,7 @@ static void __save_processor_state(struct saved_context *ctxt)
savesegment(ss, ctxt->ss);
/*
* control registers
* control registers
*/
ctxt->cr0 = read_cr0();
ctxt->cr2 = read_cr2();
......
/*
* Suspend support specific for i386.
* Suspend and hibernation support for x86-64
*
* Distribute under GPLv2
*
* Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl>
* Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
* Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
*/
......@@ -14,9 +15,6 @@
#include <asm/pgtable.h>
#include <asm/mtrr.h>
/* References to section boundaries */
extern const void __nosave_begin, __nosave_end;
static void fix_processor_context(void);
struct saved_context saved_context;
......@@ -63,7 +61,7 @@ static void __save_processor_state(struct saved_context *ctxt)
mtrr_save_fixed_ranges(NULL);
/*
* control registers
* control registers
*/
rdmsrl(MSR_EFER, ctxt->efer);
ctxt->cr0 = read_cr0();
......@@ -166,155 +164,3 @@ static void fix_processor_context(void)
loaddebug(&current->thread, 7);
}
}
#ifdef CONFIG_HIBERNATION
/* Defined in arch/x86_64/kernel/suspend_asm.S */
extern int restore_image(void);
/*
* Address to jump to in the last phase of restore in order to get to the image
* kernel's text (this value is passed in the image header).
*/
unsigned long restore_jump_address;
/*
* Value of the cr3 register from before the hibernation (this value is passed
* in the image header).
*/
unsigned long restore_cr3;
pgd_t *temp_level4_pgt;
void *relocated_restore_code;
static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
{
long i, j;
i = pud_index(address);
pud = pud + i;
for (; i < PTRS_PER_PUD; pud++, i++) {
unsigned long paddr;
pmd_t *pmd;
paddr = address + i*PUD_SIZE;
if (paddr >= end)
break;
pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
if (!pmd)
return -ENOMEM;
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
unsigned long pe;
if (paddr >= end)
break;
pe = __PAGE_KERNEL_LARGE_EXEC | paddr;
pe &= __supported_pte_mask;
set_pmd(pmd, __pmd(pe));
}
}
return 0;
}
static int set_up_temporary_mappings(void)
{
unsigned long start, end, next;
int error;
temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
if (!temp_level4_pgt)
return -ENOMEM;
/* It is safe to reuse the original kernel mapping */
set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
init_level4_pgt[pgd_index(__START_KERNEL_map)]);
/* Set up the direct mapping from scratch */
start = (unsigned long)pfn_to_kaddr(0);
end = (unsigned long)pfn_to_kaddr(end_pfn);
for (; start < end; start = next) {
pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
if (!pud)
return -ENOMEM;
next = start + PGDIR_SIZE;
if (next > end)
next = end;
if ((error = res_phys_pud_init(pud, __pa(start), __pa(next))))
return error;
set_pgd(temp_level4_pgt + pgd_index(start),
mk_kernel_pgd(__pa(pud)));
}
return 0;
}
int swsusp_arch_resume(void)
{
int error;
/* We have got enough memory and from now on we cannot recover */
if ((error = set_up_temporary_mappings()))
return error;
relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC);
if (!relocated_restore_code)
return -ENOMEM;
memcpy(relocated_restore_code, &core_restore_code,
&restore_registers - &core_restore_code);
restore_image();
return 0;
}
/*
* pfn_is_nosave - check if given pfn is in the 'nosave' section
*/
int pfn_is_nosave(unsigned long pfn)
{
unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
}
struct restore_data_record {
unsigned long jump_address;
unsigned long cr3;
unsigned long magic;
};
#define RESTORE_MAGIC 0x0123456789ABCDEFUL
/**
* arch_hibernation_header_save - populate the architecture specific part
* of a hibernation image header
* @addr: address to save the data at
*/
int arch_hibernation_header_save(void *addr, unsigned int max_size)
{
struct restore_data_record *rdr = addr;
if (max_size < sizeof(struct restore_data_record))
return -EOVERFLOW;
rdr->jump_address = restore_jump_address;
rdr->cr3 = restore_cr3;
rdr->magic = RESTORE_MAGIC;
return 0;
}
/**
* arch_hibernation_header_restore - read the architecture specific data
* from the hibernation image header
* @addr: address to read the data from
*/
int arch_hibernation_header_restore(void *addr)
{
struct restore_data_record *rdr = addr;
restore_jump_address = rdr->jump_address;
restore_cr3 = rdr->cr3;
return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
}
#endif /* CONFIG_HIBERNATION */
/*
* Suspend support specific for i386 - temporary page tables
* Hibernation support specific for i386 - temporary page tables
*
* Distribute under GPLv2
*
......@@ -13,7 +13,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
/* Defined in arch/i386/power/swsusp.S */
/* Defined in hibernate_asm_32.S */
extern int restore_image(void);
/* References to section boundaries */
......@@ -23,7 +23,7 @@ extern const void __nosave_begin, __nosave_end;
pgd_t *resume_pg_dir;
/* The following three functions are based on the analogous code in
* arch/i386/mm/init.c
* arch/x86/mm/init_32.c
*/
/*
......
/*
* Hibernation support for x86-64
*
* Distribute under GPLv2
*
* Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl>
* Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
* Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
*/
#include <linux/smp.h>
#include <linux/suspend.h>
#include <asm/proto.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/mtrr.h>
/* References to section boundaries */
extern const void __nosave_begin, __nosave_end;
/* Defined in hibernate_asm_64.S */
extern int restore_image(void);
/*
* Address to jump to in the last phase of restore in order to get to the image
* kernel's text (this value is passed in the image header).
*/
unsigned long restore_jump_address;
/*
* Value of the cr3 register from before the hibernation (this value is passed
* in the image header).
*/
unsigned long restore_cr3;
pgd_t *temp_level4_pgt;
void *relocated_restore_code;
static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
{
long i, j;
i = pud_index(address);
pud = pud + i;
for (; i < PTRS_PER_PUD; pud++, i++) {
unsigned long paddr;
pmd_t *pmd;
paddr = address + i*PUD_SIZE;
if (paddr >= end)
break;
pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
if (!pmd)
return -ENOMEM;
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
unsigned long pe;
if (paddr >= end)
break;
pe = __PAGE_KERNEL_LARGE_EXEC | paddr;
pe &= __supported_pte_mask;
set_pmd(pmd, __pmd(pe));
}
}
return 0;
}
static int set_up_temporary_mappings(void)
{
unsigned long start, end, next;
int error;
temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
if (!temp_level4_pgt)
return -ENOMEM;
/* It is safe to reuse the original kernel mapping */
set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
init_level4_pgt[pgd_index(__START_KERNEL_map)]);
/* Set up the direct mapping from scratch */
start = (unsigned long)pfn_to_kaddr(0);
end = (unsigned long)pfn_to_kaddr(end_pfn);
for (; start < end; start = next) {
pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
if (!pud)
return -ENOMEM;
next = start + PGDIR_SIZE;
if (next > end)
next = end;
if ((error = res_phys_pud_init(pud, __pa(start), __pa(next))))
return error;
set_pgd(temp_level4_pgt + pgd_index(start),
mk_kernel_pgd(__pa(pud)));
}
return 0;
}
int swsusp_arch_resume(void)
{
int error;
/* We have got enough memory and from now on we cannot recover */
if ((error = set_up_temporary_mappings()))
return error;
relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC);
if (!relocated_restore_code)
return -ENOMEM;
memcpy(relocated_restore_code, &core_restore_code,
&restore_registers - &core_restore_code);
restore_image();
return 0;
}
/*
* pfn_is_nosave - check if given pfn is in the 'nosave' section
*/
int pfn_is_nosave(unsigned long pfn)
{
unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
}
struct restore_data_record {
unsigned long jump_address;
unsigned long cr3;
unsigned long magic;
};
#define RESTORE_MAGIC 0x0123456789ABCDEFUL
/**
* arch_hibernation_header_save - populate the architecture specific part
* of a hibernation image header
* @addr: address to save the data at
*/
int arch_hibernation_header_save(void *addr, unsigned int max_size)
{
struct restore_data_record *rdr = addr;
if (max_size < sizeof(struct restore_data_record))
return -EOVERFLOW;
rdr->jump_address = restore_jump_address;
rdr->cr3 = restore_cr3;
rdr->magic = RESTORE_MAGIC;
return 0;
}
/**
* arch_hibernation_header_restore - read the architecture specific data
* from the hibernation image header
* @addr: address to read the data from
*/
int arch_hibernation_header_restore(void *addr)
{
struct restore_data_record *rdr = addr;
restore_jump_address = rdr->jump_address;
restore_cr3 = rdr->cr3;
return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
}
.text
/* Originally gcc generated, modified by hand
*
/*
* This may not use any stack, nor any variable that is not "NoSave":
*
* Its rewriting one kernel image with another. What is stack in "old"
......
/* Copyright 2004,2005 Pavel Machek <pavel@suse.cz>, Andi Kleen <ak@suse.de>, Rafael J. Wysocki <rjw@sisk.pl>
/*
* Hibernation support for x86-64
*
* Distribute under GPLv2.
*
* Copyright 2007 Rafael J. Wysocki <rjw@sisk.pl>
* Copyright 2005 Andi Kleen <ak@suse.de>
* Copyright 2004 Pavel Machek <pavel@suse.cz>
*
* swsusp_arch_resume must not use any stack or any nonlocal variables while
* copying pages:
*
......@@ -9,7 +14,7 @@
* image could very well be data page in "new" image, and overwriting
* your own stack under you is bad idea.
*/
.text
#include <linux/linkage.h>
#include <asm/segment.h>
......
......@@ -58,7 +58,7 @@
xmaddr_t arbitrary_virt_to_machine(unsigned long address)
{
int level;
unsigned int level;
pte_t *pte = lookup_address(address, &level);
unsigned offset = address & PAGE_MASK;
......@@ -71,7 +71,7 @@ void make_lowmem_page_readonly(void *vaddr)
{
pte_t *pte, ptev;
unsigned long address = (unsigned long)vaddr;
int level;
unsigned int level;
pte = lookup_address(address, &level);
BUG_ON(pte == NULL);
......@@ -86,7 +86,7 @@ void make_lowmem_page_readwrite(void *vaddr)
{
pte_t *pte, ptev;
unsigned long address = (unsigned long)vaddr;
int level;
unsigned int level;
pte = lookup_address(address, &level);
BUG_ON(pte == NULL);
......
......@@ -217,17 +217,17 @@ unsigned long long xen_sched_clock(void)
/* Get the CPU speed from Xen */
unsigned long xen_cpu_khz(void)
{
u64 cpu_khz = 1000000ULL << 32;
u64 xen_khz = 1000000ULL << 32;
const struct vcpu_time_info *info =
&HYPERVISOR_shared_info->vcpu_info[0].time;
do_div(cpu_khz, info->tsc_to_system_mul);
do_div(xen_khz, info->tsc_to_system_mul);
if (info->tsc_shift < 0)
cpu_khz <<= -info->tsc_shift;
xen_khz <<= -info->tsc_shift;
else
cpu_khz >>= info->tsc_shift;
xen_khz >>= info->tsc_shift;
return cpu_khz;
return xen_khz;
}
/*
......
......@@ -31,6 +31,7 @@
#include <linux/pm.h>
#include <linux/device.h>
#include <linux/proc_fs.h>
#include <linux/acpi.h>
#ifdef CONFIG_X86
#include <asm/mpspec.h>
#endif
......@@ -39,9 +40,6 @@
#define _COMPONENT ACPI_BUS_COMPONENT
ACPI_MODULE_NAME("bus");
#ifdef CONFIG_X86
extern void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger);
#endif
struct acpi_device *acpi_root;
struct proc_dir_entry *acpi_root_dir;
......@@ -653,8 +651,6 @@ void __init acpi_early_init(void)
#ifdef CONFIG_X86
if (!acpi_ioapic) {
extern u8 acpi_sci_flags;
/* compatible (0) means level (3) */
if (!(acpi_sci_flags & ACPI_MADT_TRIGGER_MASK)) {
acpi_sci_flags &= ~ACPI_MADT_TRIGGER_MASK;
......@@ -664,7 +660,6 @@ void __init acpi_early_init(void)
acpi_pic_sci_set_trigger(acpi_gbl_FADT.sci_interrupt,
(acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) >> 2);
} else {
extern int acpi_sci_override_gsi;
/*
* now that acpi_gbl_FADT is initialized,
* update it with result from INT_SRC_OVR parsing
......
......@@ -178,8 +178,8 @@ static void release_pte(pte_t pte)
static void check_gpte(struct lg_cpu *cpu, pte_t gpte)
{
if ((pte_flags(gpte) & (_PAGE_PWT|_PAGE_PSE))
|| pte_pfn(gpte) >= cpu->lg->pfn_limit)
if ((pte_flags(gpte) & _PAGE_PSE) ||
pte_pfn(gpte) >= cpu->lg->pfn_limit)
kill_guest(cpu, "bad page table entry");
}
......
......@@ -89,6 +89,10 @@ extern int acpi_pci_disabled;
extern int acpi_skip_timer_override;
extern int acpi_use_timer_override;
extern u8 acpi_sci_flags;
extern int acpi_sci_override_gsi;
void acpi_pic_sci_set_trigger(unsigned int, u16);
static inline void disable_acpi(void)
{
acpi_disabled = 1;
......
......@@ -44,6 +44,8 @@ int set_memory_np(unsigned long addr, int numpages);
void clflush_cache_range(void *addr, unsigned int size);
void cpa_init(void);
#ifdef CONFIG_DEBUG_RODATA
void mark_rodata_ro(void);
#endif
......
......@@ -206,12 +206,17 @@ static inline u16 geode_mfgpt_read(int timer, u16 reg)
return inw(base + reg + (timer * 8));
}
extern int __init geode_mfgpt_detect(void);
extern int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable);
extern int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable);
extern int geode_mfgpt_alloc_timer(int timer, int domain, struct module *owner);
extern int geode_mfgpt_alloc_timer(int timer, int domain);
#define geode_mfgpt_setup_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 1)
#define geode_mfgpt_release_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 0)
#ifdef CONFIG_GEODE_MFGPT_TIMER
extern int __init mfgpt_timer_setup(void);
#else
static inline int mfgpt_timer_setup(void) { return 0; }
#endif
#endif
......@@ -48,7 +48,6 @@ typedef unsigned long pgprotval_t;
typedef unsigned long phys_addr_t;
typedef union { pteval_t pte, pte_low; } pte_t;
typedef pte_t boot_pte_t;
#endif /* __ASSEMBLY__ */
#endif /* CONFIG_X86_PAE */
......
......@@ -255,7 +255,7 @@ enum {
* NOTE: the return type is pte_t but if the pmd is PSE then we return it
* as a pte too.
*/
extern pte_t *lookup_address(unsigned long address, int *level);
extern pte_t *lookup_address(unsigned long address, unsigned int *level);
/* local pte updates need not use xchg for locking */
static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
......
......@@ -52,10 +52,6 @@ void paging_init(void);
#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
#define TWOLEVEL_PGDIR_SHIFT 22
#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT)
#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS)
/* Just any arbitrary offset to the start of the vmalloc VM area: the
* current 8MB value just means that there will be a 8MB "hole" after the
* physical memory until the kernel virtual memory starts. That means that
......
......@@ -5,15 +5,6 @@
/* These definitions are for GCC v4.x. */
#include <linux/compiler-gcc.h>
#ifdef CONFIG_FORCED_INLINING
# undef inline
# undef __inline__
# undef __inline
# define inline inline __attribute__((always_inline))
# define __inline__ __inline__ __attribute__((always_inline))
# define __inline __inline __attribute__((always_inline))
#endif
#define __used __attribute__((__used__))
#define __must_check __attribute__((warn_unused_result))
#define __compiler_offsetof(a,b) __builtin_offsetof(a,b)
......
......@@ -587,7 +587,7 @@ config COMPAT_BRK
disabled, and can be overriden runtime by setting
/proc/sys/kernel/randomize_va_space to 2.
On non-ancient distros (post-2000 ones) Y is usually a safe choice.
On non-ancient distros (post-2000 ones) N is usually a safe choice.
config BASE_FULL
default y
......
......@@ -558,7 +558,6 @@ asmlinkage void __init start_kernel(void)
preempt_disable();
build_all_zonelists();
page_alloc_init();
enable_debug_pagealloc();
printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
parse_early_param();
parse_args("Booting kernel", static_command_line, __start___param,
......@@ -614,6 +613,7 @@ asmlinkage void __init start_kernel(void)
vfs_caches_init_early();
cpuset_init_early();
mem_init();
enable_debug_pagealloc();
cpu_hotplug_init();
kmem_cache_init();
setup_per_cpu_pageset();
......
......@@ -465,20 +465,6 @@ config FRAME_POINTER
some architectures or if you use external debuggers.
If you don't debug the kernel, you can say N.
config FORCED_INLINING
bool "Force gcc to inline functions marked 'inline'"
depends on DEBUG_KERNEL
default y
help
This option determines if the kernel forces gcc to inline the functions
developers have marked 'inline'. Doing so takes away freedom from gcc to
do what it thinks is best, which is desirable for the gcc 3.x series of
compilers. The gcc 4.x series have a rewritten inlining algorithm and
disabling this option will generate a smaller kernel there. Hopefully
this algorithm is so good that allowing gcc4 to make the decision can
become the default in the future, until then this option is there to
test gcc for this.
config BOOT_PRINTK_DELAY
bool "Delay each boot printk message by N milliseconds"
depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY
......
......@@ -26,6 +26,9 @@
#include <asm/page.h> /* for PAGE_SIZE */
#include <asm/div64.h>
/* Works only for digits and letters, but small and fast */
#define TOLOWER(x) ((x) | 0x20)
/**
* simple_strtoul - convert a string to an unsigned long
* @cp: The start of the string
......@@ -41,17 +44,17 @@ unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base)
if (*cp == '0') {
base = 8;
cp++;
if ((toupper(*cp) == 'X') && isxdigit(cp[1])) {
if ((TOLOWER(*cp) == 'x') && isxdigit(cp[1])) {
cp++;
base = 16;
}
}
} else if (base == 16) {
if (cp[0] == '0' && toupper(cp[1]) == 'X')
if (cp[0] == '0' && TOLOWER(cp[1]) == 'x')
cp += 2;
}
while (isxdigit(*cp) &&
(value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) {
(value = isdigit(*cp) ? *cp-'0' : TOLOWER(*cp)-'a'+10) < base) {
result = result*base + value;
cp++;
}
......@@ -92,17 +95,17 @@ unsigned long long simple_strtoull(const char *cp,char **endp,unsigned int base)
if (*cp == '0') {
base = 8;
cp++;
if ((toupper(*cp) == 'X') && isxdigit(cp[1])) {
if ((TOLOWER(*cp) == 'x') && isxdigit(cp[1])) {
cp++;
base = 16;
}
}
} else if (base == 16) {
if (cp[0] == '0' && toupper(cp[1]) == 'X')
if (cp[0] == '0' && TOLOWER(cp[1]) == 'x')
cp += 2;
}
while (isxdigit(*cp) && (value = isdigit(*cp) ? *cp-'0' : (islower(*cp)
? toupper(*cp) : *cp)-'A'+10) < base) {
while (isxdigit(*cp)
&& (value = isdigit(*cp) ? *cp-'0' : TOLOWER(*cp)-'a'+10) < base) {
result = result*base + value;
cp++;
}
......@@ -360,24 +363,25 @@ static noinline char* put_dec(char *buf, unsigned long long num)
#define PLUS 4 /* show plus */
#define SPACE 8 /* space if plus */
#define LEFT 16 /* left justified */
#define SPECIAL 32 /* 0x */
#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */
#define SMALL 32 /* Must be 32 == 0x20 */
#define SPECIAL 64 /* 0x */
static char *number(char *buf, char *end, unsigned long long num, int base, int size, int precision, int type)
{
char sign,tmp[66];
const char *digits;
/* we are called with base 8, 10 or 16, only, thus don't need "g..." */
static const char small_digits[] = "0123456789abcdefx"; /* "ghijklmnopqrstuvwxyz"; */
static const char large_digits[] = "0123456789ABCDEFX"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
/* we are called with base 8, 10 or 16, only, thus don't need "G..." */
static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
char tmp[66];
char sign;
char locase;
int need_pfx = ((type & SPECIAL) && base != 10);
int i;
digits = (type & LARGE) ? large_digits : small_digits;
/* locase = 0 or 0x20. ORing digits or letters with 'locase'
* produces same digits or (maybe lowercased) letters */
locase = (type & SMALL);
if (type & LEFT)
type &= ~ZEROPAD;
if (base < 2 || base > 36)
return NULL;
sign = 0;
if (type & SIGN) {
if ((signed long long) num < 0) {
......@@ -404,7 +408,7 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
tmp[i++] = '0';
/* Generic code, for any base:
else do {
tmp[i++] = digits[do_div(num,base)];
tmp[i++] = (digits[do_div(num,base)] | locase);
} while (num != 0);
*/
else if (base != 10) { /* 8 or 16 */
......@@ -412,7 +416,7 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
int shift = 3;
if (base == 16) shift = 4;
do {
tmp[i++] = digits[((unsigned char)num) & mask];
tmp[i++] = (digits[((unsigned char)num) & mask] | locase);
num >>= shift;
} while (num);
} else { /* base 10 */
......@@ -444,7 +448,7 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
++buf;
if (base == 16) {
if (buf < end)
*buf = digits[16]; /* for arbitrary base: digits[33]; */
*buf = ('X' | locase);
++buf;
}
}
......@@ -644,6 +648,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
continue;
case 'p':
flags |= SMALL;
if (field_width == -1) {
field_width = 2*sizeof(void *);
flags |= ZEROPAD;
......@@ -680,9 +685,9 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
base = 8;
break;
case 'X':
flags |= LARGE;
case 'x':
flags |= SMALL;
case 'X':
base = 16;
break;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment