Commit 9014493a authored by Patrick Mochel's avatar Patrick Mochel

Merge bk://linux.bkbits.net/linux-2.5

into kernel.bkbits.net:/home/mochel/linux-2.6-power
parents f9464393 f38f89a2
obj-$(CONFIG_PM) += cpu.o
obj-$(CONFIG_PM_DISK) += pmdisk.o
obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o
/* Originally gcc generated, modified by hand */
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page.h>
.text
ENTRY(pmdisk_arch_suspend)
cmpl $0,4(%esp)
jne .L1450
movl %esp, saved_context_esp
movl %ebx, saved_context_ebx
movl %ebp, saved_context_ebp
movl %esi, saved_context_esi
movl %edi, saved_context_edi
pushfl ; popl saved_context_eflags
call pmdisk_suspend
jmp .L1449
.p2align 4,,7
.L1450:
movl $swsusp_pg_dir-__PAGE_OFFSET,%ecx
movl %ecx,%cr3
movl pm_pagedir_nosave,%ebx
xorl %eax, %eax
xorl %edx, %edx
.p2align 4,,7
.L1455:
movl 4(%ebx,%edx),%edi
movl (%ebx,%edx),%esi
movl $1024, %ecx
rep
movsl
movl %cr3, %ecx;
movl %ecx, %cr3; # flush TLB
incl %eax
addl $16, %edx
cmpl pmdisk_pages,%eax
jb .L1455
.p2align 4,,7
.L1453:
movl saved_context_esp, %esp
movl saved_context_ebp, %ebp
movl saved_context_ebx, %ebx
movl saved_context_esi, %esi
movl saved_context_edi, %edi
pushl saved_context_eflags ; popfl
call pmdisk_resume
.L1449:
ret
......@@ -15,83 +15,47 @@
.text
ENTRY(do_magic)
pushl %ebx
cmpl $0,8(%esp)
jne resume
call do_magic_suspend_1
call save_processor_state
ENTRY(swsusp_arch_suspend)
movl %esp, saved_context_esp
movl %eax, saved_context_eax
movl %ebx, saved_context_ebx
movl %ecx, saved_context_ecx
movl %edx, saved_context_edx
movl %ebp, saved_context_ebp
movl %esi, saved_context_esi
movl %edi, saved_context_edi
pushfl ; popl saved_context_eflags
call do_magic_suspend_2
popl %ebx
call swsusp_save
ret
resume:
ENTRY(swsusp_arch_resume)
movl $swsusp_pg_dir-__PAGE_OFFSET,%ecx
movl %ecx,%cr3
call do_magic_resume_1
movl $0,loop
cmpl $0,nr_copy_pages
je copy_done
copy_loop:
movl $0,loop2
movl pagedir_nosave,%ebx
xorl %eax, %eax
xorl %edx, %edx
.p2align 4,,7
copy_one_page:
movl pagedir_nosave,%ecx
movl loop,%eax
movl loop2,%edx
sall $4,%eax
movl 4(%ecx,%eax),%ebx
movl (%ecx,%eax),%eax
movb (%edx,%eax),%al
movb %al,(%edx,%ebx)
movl loop2,%eax
leal 1(%eax),%edx
movl %edx,loop2
movl %edx,%eax
cmpl $4095,%eax
jbe copy_one_page
movl loop,%eax
leal 1(%eax),%edx
movl %edx,loop
movl %edx,%eax
cmpl nr_copy_pages,%eax
jb copy_loop
copy_loop:
movl 4(%ebx,%edx),%edi
movl (%ebx,%edx),%esi
movl $1024, %ecx
rep
movsl
copy_done:
movl $__USER_DS,%eax
incl %eax
addl $16, %edx
cmpl nr_copy_pages,%eax
jb copy_loop
.p2align 4,,7
movw %ax, %ds
movw %ax, %es
movl saved_context_esp, %esp
movl saved_context_ebp, %ebp
movl saved_context_eax, %eax
movl saved_context_ebx, %ebx
movl saved_context_ecx, %ecx
movl saved_context_edx, %edx
movl saved_context_esi, %esi
movl saved_context_edi, %edi
call restore_processor_state
pushl saved_context_eflags ; popfl
call do_magic_resume_2
popl %ebx
call swsusp_restore
ret
.section .data.nosave
loop:
.quad 0
loop2:
.quad 0
.previous
/* originally gcc generated, but now changed. don't overwrite. */
/* Originally gcc generated, modified by hand
*
* This may not use any stack, nor any variable that is not "NoSave":
*
* Its rewriting one kernel image with another. What is stack in "old"
* image could very well be data page in "new" image, and overwriting
* your own stack under you is bad idea.
*/
.text
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page.h>
/* Input:
* rdi resume flag
*/
ENTRY(do_magic)
.LFB5:
subq $8, %rsp
.LCFI2:
testl %edi, %edi
jne .L90
call do_magic_suspend_1
call save_processor_state
ENTRY(swsusp_arch_suspend)
movq %rsp, saved_context_esp(%rip)
movq %rax, saved_context_eax(%rip)
......@@ -36,9 +32,10 @@ ENTRY(do_magic)
movq %r15, saved_context_r15(%rip)
pushfq ; popq saved_context_eflags(%rip)
addq $8, %rsp
jmp do_magic_suspend_2
.L90:
call swsusp_save
ret
ENTRY(swsusp_arch_resume)
/* set up cr3 */
leaq init_level4_pgt(%rip),%rax
subq $__START_KERNEL_map,%rax
......@@ -53,7 +50,6 @@ ENTRY(do_magic)
movq %rcx, %cr3;
movq %rax, %cr4; # turn PGE back on
call do_magic_resume_1
movl nr_copy_pages(%rip), %eax
xorl %ecx, %ecx
movq $0, loop(%rip)
......@@ -113,9 +109,8 @@ ENTRY(do_magic)
movq saved_context_r14(%rip), %r14
movq saved_context_r15(%rip), %r15
pushq saved_context_eflags(%rip) ; popfq
call restore_processor_state
addq $8, %rsp
jmp do_magic_resume_2
call swsusp_restore
ret
.section .data.nosave
loop:
......
......@@ -23,16 +23,6 @@ typedef struct pbe {
#define SWAP_FILENAME_MAXLENGTH 32
struct suspend_header {
u32 version_code;
unsigned long num_physpages;
char machine[8];
char version[20];
int num_cpus;
int page_size;
suspend_pagedir_t *suspend_pagedir;
unsigned int num_pbes;
};
#define SUSPEND_PD_PAGES(x) (((x)*sizeof(struct pbe))/PAGE_SIZE+1)
......@@ -45,16 +35,12 @@ extern void drain_local_pages(void);
/* kernel/power/swsusp.c */
extern int software_suspend(void);
extern unsigned int nr_copy_pages __nosavedata;
extern suspend_pagedir_t *pagedir_nosave __nosavedata;
#else /* CONFIG_SOFTWARE_SUSPEND */
static inline int software_suspend(void)
{
printk("Warning: fake suspend called\n");
return -EPERM;
}
#define software_resume() do { } while(0)
#endif /* CONFIG_SOFTWARE_SUSPEND */
......@@ -78,12 +64,6 @@ static inline void disable_nonboot_cpus(void) {}
static inline void enable_nonboot_cpus(void) {}
#endif
asmlinkage void do_magic(int is_resume);
asmlinkage void do_magic_resume_1(void);
asmlinkage void do_magic_resume_2(void);
asmlinkage void do_magic_suspend_1(void);
asmlinkage void do_magic_suspend_2(void);
void save_processor_state(void);
void restore_processor_state(void);
struct saved_context;
......
......@@ -18,6 +18,13 @@ config PM
will issue the hlt instruction if nothing is to be done, thereby
sending the processor to sleep and saving power.
config PM_DEBUG
bool "Power Management Debug Support"
---help---
This option enables verbose debugging support in the Power Management
code. This is helpful when debugging and reporting various PM bugs,
like suspend support.
config SOFTWARE_SUSPEND
bool "Software Suspend (EXPERIMENTAL)"
depends on EXPERIMENTAL && PM && SWAP
......@@ -42,33 +49,12 @@ config SOFTWARE_SUSPEND
For more information take a look at Documentation/power/swsusp.txt.
config PM_DISK
bool "Suspend-to-Disk Support"
depends on PM && SWAP && X86 && !X86_64
---help---
Suspend-to-disk is a power management state in which the contents
of memory are stored on disk and the entire system is shut down or
put into a low-power state (e.g. ACPI S4). When the computer is
turned back on, the stored image is loaded from disk and execution
resumes from where it left off before suspending.
This config option enables the core infrastructure necessary to
perform the suspend and resume transition.
Currently, this suspend-to-disk implementation is based on a forked
version of the swsusp code base. As such, it's still experimental,
and still relies on CONFIG_SWAP.
More information can be found in Documentation/power/.
If unsure, Say N.
config PM_DISK_PARTITION
config PM_STD_PARTITION
string "Default resume partition"
depends on PM_DISK
depends on SOFTWARE_SUSPEND
default ""
---help---
The default resume partition is the partition that the pmdisk suspend-
The default resume partition is the partition that the suspend-
to-disk implementation will look for a suspended disk image.
The partition specified here will be different for almost every user.
......@@ -77,16 +63,10 @@ config PM_DISK_PARTITION
The partition specified can be overridden by specifying:
pmdisk=/dev/<other device>
resume=/dev/<other device>
which will set the resume partition to the device specified.
One may also do:
pmdisk=off
to inform the kernel not to perform a resume transition.
Note there is currently not a way to specify which device to save the
suspended image to. It will simply pick the first available swap
device.
......
ifeq ($(CONFIG_PM_DEBUG),y)
EXTRA_CFLAGS += -DDEBUG
endif
swsusp-smp-$(CONFIG_SMP) += smp.o
obj-y := main.o process.o console.o pm.o
obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o $(swsusp-smp-y)
obj-$(CONFIG_PM_DISK) += disk.o pmdisk.o
obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o $(swsusp-smp-y) disk.o
obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
......@@ -8,13 +8,11 @@
*
*/
#define DEBUG
#include <linux/suspend.h>
#include <linux/syscalls.h>
#include <linux/reboot.h>
#include <linux/string.h>
#include <linux/device.h>
#include <linux/delay.h>
#include <linux/fs.h>
#include "power.h"
......@@ -23,13 +21,16 @@
extern u32 pm_disk_mode;
extern struct pm_ops * pm_ops;
extern int pmdisk_save(void);
extern int pmdisk_write(void);
extern int pmdisk_read(void);
extern int pmdisk_restore(void);
extern int pmdisk_free(void);
extern int swsusp_suspend(void);
extern int swsusp_write(void);
extern int swsusp_read(void);
extern int swsusp_resume(void);
extern int swsusp_free(void);
static int noresume = 0;
char resume_file[256] = CONFIG_PM_STD_PARTITION;
/**
* power_down - Shut machine down for hibernate.
* @mode: Suspend-to-disk mode
......@@ -46,16 +47,18 @@ static int power_down(u32 mode)
int error = 0;
local_irq_save(flags);
device_power_down(PM_SUSPEND_DISK);
switch(mode) {
case PM_DISK_PLATFORM:
device_power_down(PM_SUSPEND_DISK);
error = pm_ops->enter(PM_SUSPEND_DISK);
break;
case PM_DISK_SHUTDOWN:
printk("Powering off system\n");
device_shutdown();
machine_power_off();
break;
case PM_DISK_REBOOT:
device_shutdown();
machine_restart(NULL);
break;
}
......@@ -99,6 +102,7 @@ static void finish(void)
{
device_resume();
platform_finish();
enable_nonboot_cpus();
thaw_processes();
pm_restore_console();
}
......@@ -126,6 +130,7 @@ static int prepare(void)
/* Free memory before shutting down devices. */
free_some_memory();
disable_nonboot_cpus();
if ((error = device_suspend(PM_SUSPEND_DISK)))
goto Finish;
......@@ -133,6 +138,7 @@ static int prepare(void)
Finish:
platform_finish();
Thaw:
enable_nonboot_cpus();
thaw_processes();
pm_restore_console();
return error;
......@@ -161,7 +167,7 @@ int pm_suspend_disk(void)
pr_debug("PM: snapshotting memory.\n");
in_suspend = 1;
if ((error = pmdisk_save()))
if ((error = swsusp_suspend()))
goto Done;
if (in_suspend) {
......@@ -173,14 +179,14 @@ int pm_suspend_disk(void)
mb();
barrier();
error = pmdisk_write();
error = swsusp_write();
if (!error) {
error = power_down(pm_disk_mode);
pr_debug("PM: Power down failed.\n");
}
} else
pr_debug("PM: Image restored successfully.\n");
pmdisk_free();
swsusp_free();
Done:
finish();
return error;
......@@ -188,7 +194,7 @@ int pm_suspend_disk(void)
/**
* pm_resume - Resume from a saved image.
* software_resume - Resume from a saved image.
*
* Called as a late_initcall (so all devices are discovered and
* initialized), we call pmdisk to see if we have a saved image or not.
......@@ -199,13 +205,21 @@ int pm_suspend_disk(void)
*
*/
static int pm_resume(void)
static int software_resume(void)
{
int error;
if (noresume) {
/**
* FIXME: If noresume is specified, we need to find the partition
* and reset it back to normal swap space.
*/
return 0;
}
pr_debug("PM: Reading pmdisk image.\n");
if ((error = pmdisk_read()))
if ((error = swsusp_read()))
goto Done;
pr_debug("PM: Preparing system for restore.\n");
......@@ -216,28 +230,18 @@ static int pm_resume(void)
barrier();
mb();
/* FIXME: The following (comment and mdelay()) are from swsusp.
* Are they really necessary?
*
* We do not want some readahead with DMA to corrupt our memory, right?
* Do it with disabled interrupts for best effect. That way, if some
* driver scheduled DMA, we have good chance for DMA to finish ;-).
*/
pr_debug("PM: Waiting for DMAs to settle down.\n");
mdelay(1000);
pr_debug("PM: Restoring saved image.\n");
pmdisk_restore();
swsusp_resume();
pr_debug("PM: Restore failed, recovering.n");
finish();
Free:
pmdisk_free();
swsusp_free();
Done:
pr_debug("PM: Resume from disk failed.\n");
return 0;
}
late_initcall(pm_resume);
late_initcall(software_resume);
static char * pm_disk_modes[] = {
......@@ -336,3 +340,22 @@ static int __init pm_disk_init(void)
}
core_initcall(pm_disk_init);
static int __init resume_setup(char *str)
{
if (noresume)
return 1;
strncpy( resume_file, str, 255 );
return 1;
}
static int __init noresume_setup(char *str)
{
noresume = 1;
return 1;
}
__setup("noresume", noresume_setup);
__setup("resume=", resume_setup);
......@@ -8,8 +8,6 @@
*
*/
#define DEBUG
#include <linux/suspend.h>
#include <linux/kobject.h>
#include <linux/string.h>
......@@ -169,6 +167,15 @@ static int enter_state(u32 state)
return error;
}
/*
* This is main interface to the outside world. It needs to be
* called from process context.
*/
int software_suspend(void)
{
return enter_state(PM_SUSPEND_DISK);
}
/**
* pm_suspend - Externally visible function for suspending system.
......
/*
* kernel/power/pmdisk.c - Suspend-to-disk implmentation
*
* This STD implementation is initially derived from swsusp (suspend-to-swap).
* The original copyright on that was:
*
* Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
* Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
*
* The additional parts are:
*
* Copyright (C) 2003 Patrick Mochel
* Copyright (C) 2003 Open Source Development Lab
*
* This file is released under the GPLv2.
*
* For more information, please see the text files in Documentation/power/
*
*/
#undef DEBUG
#include <linux/mm.h>
#include <linux/bio.h>
#include <linux/suspend.h>
#include <linux/version.h>
#include <linux/reboot.h>
#include <linux/device.h>
#include <linux/swapops.h>
#include <linux/bootmem.h>
#include <linux/utsname.h>
#include <asm/mmu_context.h>
#include "power.h"
extern asmlinkage int pmdisk_arch_suspend(int resume);
#define __ADDRESS(x) ((unsigned long) phys_to_virt(x))
#define ADDRESS(x) __ADDRESS((x) << PAGE_SHIFT)
#define ADDRESS2(x) __ADDRESS(__pa(x)) /* Needed for x86-64 where some pages are in memory twice */
/* References to section boundaries */
extern char __nosave_begin, __nosave_end;
extern int is_head_of_free_region(struct page *);
/* Variables to be preserved over suspend */
static int pagedir_order_check;
static int nr_copy_pages_check;
/* For resume= kernel option */
static char resume_file[256] = CONFIG_PM_DISK_PARTITION;
static dev_t resume_device;
/* Local variables that should not be affected by save */
unsigned int pmdisk_pages __nosavedata = 0;
/* Suspend pagedir is allocated before final copy, therefore it
must be freed after resume
Warning: this is evil. There are actually two pagedirs at time of
resume. One is "pagedir_save", which is empty frame allocated at
time of suspend, that must be freed. Second is "pagedir_nosave",
allocated at time of resume, that travels through memory not to
collide with anything.
*/
suspend_pagedir_t *pm_pagedir_nosave __nosavedata = NULL;
static suspend_pagedir_t *pagedir_save;
static int pagedir_order __nosavedata = 0;
struct pmdisk_info {
struct new_utsname uts;
u32 version_code;
unsigned long num_physpages;
int cpus;
unsigned long image_pages;
unsigned long pagedir_pages;
swp_entry_t pagedir[768];
} __attribute__((aligned(PAGE_SIZE))) pmdisk_info;
#define PMDISK_SIG "pmdisk-swap1"
struct pmdisk_header {
char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)];
swp_entry_t pmdisk_info;
char orig_sig[10];
char sig[10];
} __attribute__((packed, aligned(PAGE_SIZE))) pmdisk_header;
/*
* XXX: We try to keep some more pages free so that I/O operations succeed
* without paging. Might this be more?
*/
#define PAGES_FOR_IO 512
/*
* Saving part...
*/
/* We memorize in swapfile_used what swap devices are used for suspension */
#define SWAPFILE_UNUSED 0
#define SWAPFILE_SUSPEND 1 /* This is the suspending device */
#define SWAPFILE_IGNORED 2 /* Those are other swap devices ignored for suspension */
static unsigned short swapfile_used[MAX_SWAPFILES];
static unsigned short root_swap;
static int mark_swapfiles(swp_entry_t prev)
{
int error;
rw_swap_page_sync(READ,
swp_entry(root_swap, 0),
virt_to_page((unsigned long)&pmdisk_header));
if (!memcmp("SWAP-SPACE",pmdisk_header.sig,10) ||
!memcmp("SWAPSPACE2",pmdisk_header.sig,10)) {
memcpy(pmdisk_header.orig_sig,pmdisk_header.sig,10);
memcpy(pmdisk_header.sig,PMDISK_SIG,10);
pmdisk_header.pmdisk_info = prev;
error = rw_swap_page_sync(WRITE,
swp_entry(root_swap, 0),
virt_to_page((unsigned long)
&pmdisk_header));
} else {
pr_debug("pmdisk: Partition is not swap space.\n");
error = -ENODEV;
}
return error;
}
static int read_swapfiles(void) /* This is called before saving image */
{
int i, len;
len=strlen(resume_file);
root_swap = 0xFFFF;
swap_list_lock();
for(i=0; i<MAX_SWAPFILES; i++) {
if (swap_info[i].flags == 0) {
swapfile_used[i]=SWAPFILE_UNUSED;
} else {
if(!len) {
pr_debug("pmdisk: Default resume partition not set.\n");
if(root_swap == 0xFFFF) {
swapfile_used[i] = SWAPFILE_SUSPEND;
root_swap = i;
} else
swapfile_used[i] = SWAPFILE_IGNORED;
} else {
/* we ignore all swap devices that are not the resume_file */
if (1) {
// FIXME if(resume_device == swap_info[i].swap_device) {
swapfile_used[i] = SWAPFILE_SUSPEND;
root_swap = i;
} else
swapfile_used[i] = SWAPFILE_IGNORED;
}
}
}
swap_list_unlock();
return (root_swap != 0xffff) ? 0 : -ENODEV;
}
/* This is called after saving image so modification
will be lost after resume... and that's what we want. */
static void lock_swapdevices(void)
{
int i;
swap_list_lock();
for(i = 0; i< MAX_SWAPFILES; i++)
if(swapfile_used[i] == SWAPFILE_IGNORED) {
swap_info[i].flags ^= 0xFF; /* we make the device unusable. A new call to
lock_swapdevices can unlock the devices. */
}
swap_list_unlock();
}
/**
* write_swap_page - Write one page to a fresh swap location.
* @addr: Address we're writing.
* @loc: Place to store the entry we used.
*
* Allocate a new swap entry and 'sync' it. Note we discard -EIO
* errors. That is an artifact left over from swsusp. It did not
* check the return of rw_swap_page_sync() at all, since most pages
* written back to swap would return -EIO.
* This is a partial improvement, since we will at least return other
* errors, though we need to eventually fix the damn code.
*/
static int write_swap_page(unsigned long addr, swp_entry_t * loc)
{
swp_entry_t entry;
int error = 0;
entry = get_swap_page();
if (swp_offset(entry) &&
swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) {
error = rw_swap_page_sync(WRITE, entry,
virt_to_page(addr));
if (error == -EIO)
error = 0;
if (!error)
*loc = entry;
} else
error = -ENOSPC;
return error;
}
/**
* free_data - Free the swap entries used by the saved image.
*
* Walk the list of used swap entries and free each one.
*/
static void free_data(void)
{
swp_entry_t entry;
int i;
for (i = 0; i < pmdisk_pages; i++) {
entry = (pm_pagedir_nosave + i)->swap_address;
if (entry.val)
swap_free(entry);
else
break;
(pm_pagedir_nosave + i)->swap_address = (swp_entry_t){0};
}
}
/**
* write_data - Write saved image to swap.
*
* Walk the list of pages in the image and sync each one to swap.
*/
static int write_data(void)
{
int error = 0;
int i;
printk( "Writing data to swap (%d pages): ", pmdisk_pages );
for (i = 0; i < pmdisk_pages && !error; i++) {
if (!(i%100))
printk( "." );
error = write_swap_page((pm_pagedir_nosave+i)->address,
&((pm_pagedir_nosave+i)->swap_address));
}
printk(" %d Pages done.\n",i);
return error;
}
/**
* free_pagedir - Free pages used by the page directory.
*/
static void free_pagedir_entries(void)
{
int num = pmdisk_info.pagedir_pages;
int i;
for (i = 0; i < num; i++)
swap_free(pmdisk_info.pagedir[i]);
}
/**
* write_pagedir - Write the array of pages holding the page directory.
* @last: Last swap entry we write (needed for header).
*/
static int write_pagedir(void)
{
unsigned long addr = (unsigned long)pm_pagedir_nosave;
int error = 0;
int n = SUSPEND_PD_PAGES(pmdisk_pages);
int i;
pmdisk_info.pagedir_pages = n;
printk( "Writing pagedir (%d pages)\n", n);
for (i = 0; i < n && !error; i++, addr += PAGE_SIZE)
error = write_swap_page(addr,&pmdisk_info.pagedir[i]);
return error;
}
#ifdef DEBUG
static void dump_pmdisk_info(void)
{
printk(" pmdisk: Version: %u\n",pmdisk_info.version_code);
printk(" pmdisk: Num Pages: %ld\n",pmdisk_info.num_physpages);
printk(" pmdisk: UTS Sys: %s\n",pmdisk_info.uts.sysname);
printk(" pmdisk: UTS Node: %s\n",pmdisk_info.uts.nodename);
printk(" pmdisk: UTS Release: %s\n",pmdisk_info.uts.release);
printk(" pmdisk: UTS Version: %s\n",pmdisk_info.uts.version);
printk(" pmdisk: UTS Machine: %s\n",pmdisk_info.uts.machine);
printk(" pmdisk: UTS Domain: %s\n",pmdisk_info.uts.domainname);
printk(" pmdisk: CPUs: %d\n",pmdisk_info.cpus);
printk(" pmdisk: Image: %ld Pages\n",pmdisk_info.image_pages);
printk(" pmdisk: Pagedir: %ld Pages\n",pmdisk_info.pagedir_pages);
}
#else
static void dump_pmdisk_info(void)
{
}
#endif
static void init_header(void)
{
memset(&pmdisk_info,0,sizeof(pmdisk_info));
pmdisk_info.version_code = LINUX_VERSION_CODE;
pmdisk_info.num_physpages = num_physpages;
memcpy(&pmdisk_info.uts,&system_utsname,sizeof(system_utsname));
pmdisk_info.cpus = num_online_cpus();
pmdisk_info.image_pages = pmdisk_pages;
}
/**
* write_header - Fill and write the suspend header.
* @entry: Location of the last swap entry used.
*
* Allocate a page, fill header, write header.
*
* @entry is the location of the last pagedir entry written on
* entrance. On exit, it contains the location of the header.
*/
static int write_header(swp_entry_t * entry)
{
dump_pmdisk_info();
return write_swap_page((unsigned long)&pmdisk_info,entry);
}
/**
* write_suspend_image - Write entire image and metadata.
*
*/
static int write_suspend_image(void)
{
int error;
swp_entry_t prev = { 0 };
init_header();
if ((error = write_data()))
goto FreeData;
if ((error = write_pagedir()))
goto FreePagedir;
if ((error = write_header(&prev)))
goto FreePagedir;
error = mark_swapfiles(prev);
Done:
return error;
FreePagedir:
free_pagedir_entries();
FreeData:
free_data();
goto Done;
}
/**
* saveable - Determine whether a page should be cloned or not.
* @pfn: The page
*
* We save a page if it's Reserved, and not in the range of pages
* statically defined as 'unsaveable', or if it isn't reserved, and
* isn't part of a free chunk of pages.
* If it is part of a free chunk, we update @pfn to point to the last
* page of the chunk.
*/
static int saveable(unsigned long * pfn)
{
struct page * page = pfn_to_page(*pfn);
if (PageNosave(page))
return 0;
if (!PageReserved(page)) {
int chunk_size;
if ((chunk_size = is_head_of_free_region(page))) {
*pfn += chunk_size - 1;
return 0;
}
} else if (PageReserved(page)) {
/* Just copy whole code segment.
* Hopefully it is not that big.
*/
if ((ADDRESS(*pfn) >= (unsigned long) ADDRESS2(&__nosave_begin)) &&
(ADDRESS(*pfn) < (unsigned long) ADDRESS2(&__nosave_end))) {
pr_debug("[nosave %lx]\n", ADDRESS(*pfn));
return 0;
}
/* Hmm, perhaps copying all reserved pages is not
* too healthy as they may contain
* critical bios data?
*/
}
return 1;
}
/**
* count_pages - Determine size of page directory.
*
* Iterate over all the pages in the system and tally the number
* we need to clone.
*/
static void count_pages(void)
{
unsigned long pfn;
int n = 0;
for (pfn = 0; pfn < max_pfn; pfn++) {
if (saveable(&pfn))
n++;
}
pmdisk_pages = n;
}
/**
* copy_pages - Atomically snapshot memory.
*
* Iterate over all the pages in the system and copy each one
* into its corresponding location in the pagedir.
* We rely on the fact that the number of pages that we're snap-
* shotting hasn't changed since we counted them.
*/
static void copy_pages(void)
{
struct pbe * p = pagedir_save;
unsigned long pfn;
int n = 0;
for (pfn = 0; pfn < max_pfn; pfn++) {
if (saveable(&pfn)) {
n++;
p->orig_address = ADDRESS(pfn);
copy_page((void *) p->address,
(void *) p->orig_address);
p++;
}
}
BUG_ON(n != pmdisk_pages);
}
/**
* free_image_pages - Free each page allocated for snapshot.
*/
static void free_image_pages(void)
{
struct pbe * p;
int i;
for (i = 0, p = pagedir_save; i < pmdisk_pages; i++, p++) {
ClearPageNosave(virt_to_page(p->address));
free_page(p->address);
}
}
/**
* free_pagedir - Free the page directory.
*/
static void free_pagedir(void)
{
free_image_pages();
free_pages((unsigned long)pagedir_save, pagedir_order);
}
static void calc_order(void)
{
int diff;
int order;
order = get_bitmask_order(SUSPEND_PD_PAGES(pmdisk_pages));
pmdisk_pages += 1 << order;
do {
diff = get_bitmask_order(SUSPEND_PD_PAGES(pmdisk_pages)) - order;
if (diff) {
order += diff;
pmdisk_pages += 1 << diff;
}
} while(diff);
pagedir_order = order;
}
/**
* alloc_pagedir - Allocate the page directory.
*
* First, determine exactly how many contiguous pages we need,
* allocate them, then mark each 'unsavable'.
*/
static int alloc_pagedir(void)
{
calc_order();
pagedir_save = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD,
pagedir_order);
if(!pagedir_save)
return -ENOMEM;
memset(pagedir_save,0,(1 << pagedir_order) * PAGE_SIZE);
pm_pagedir_nosave = pagedir_save;
return 0;
}
/**
* alloc_image_pages - Allocate pages for the snapshot.
*
*/
static int alloc_image_pages(void)
{
struct pbe * p;
int i;
for (i = 0, p = pagedir_save; i < pmdisk_pages; i++, p++) {
p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
if(!p->address)
goto Error;
SetPageNosave(virt_to_page(p->address));
}
return 0;
Error:
do {
if (p->address)
free_page(p->address);
p->address = 0;
} while (p-- > pagedir_save);
return -ENOMEM;
}
/**
* enough_free_mem - Make sure we enough free memory to snapshot.
*
* Returns TRUE or FALSE after checking the number of available
* free pages.
*/
static int enough_free_mem(void)
{
if(nr_free_pages() < (pmdisk_pages + PAGES_FOR_IO)) {
pr_debug("pmdisk: Not enough free pages: Have %d\n",
nr_free_pages());
return 0;
}
return 1;
}
/**
* enough_swap - Make sure we have enough swap to save the image.
*
* Returns TRUE or FALSE after checking the total amount of swap
* space avaiable.
*
* FIXME: si_swapinfo(&i) returns all swap devices information.
* We should only consider resume_device.
*/
static int enough_swap(void)
{
struct sysinfo i;
si_swapinfo(&i);
if (i.freeswap < (pmdisk_pages + PAGES_FOR_IO)) {
pr_debug("pmdisk: Not enough swap. Need %ld\n",i.freeswap);
return 0;
}
return 1;
}
/**
* pmdisk_suspend - Atomically snapshot the system.
*
* This must be called with interrupts disabled, to prevent the
* system changing at all from underneath us.
*
* To do this, we count the number of pages in the system that we
* need to save; make sure we have enough memory and swap to clone
* the pages and save them in swap, allocate the space to hold them,
* and then snapshot them all.
*/
int pmdisk_suspend(void)
{
int error = 0;
if ((error = read_swapfiles()))
return error;
drain_local_pages();
pm_pagedir_nosave = NULL;
pr_debug("pmdisk: Counting pages to copy.\n" );
count_pages();
pr_debug("pmdisk: (pages needed: %d + %d free: %d)\n",
pmdisk_pages,PAGES_FOR_IO,nr_free_pages());
if (!enough_free_mem())
return -ENOMEM;
if (!enough_swap())
return -ENOSPC;
if ((error = alloc_pagedir())) {
pr_debug("pmdisk: Allocating pagedir failed.\n");
return error;
}
if ((error = alloc_image_pages())) {
pr_debug("pmdisk: Allocating image pages failed.\n");
free_pagedir();
return error;
}
nr_copy_pages_check = pmdisk_pages;
pagedir_order_check = pagedir_order;
/* During allocating of suspend pagedir, new cold pages may appear.
* Kill them
*/
drain_local_pages();
/* copy */
copy_pages();
/*
* End of critical section. From now on, we can write to memory,
* but we should not touch disk. This specially means we must _not_
* touch swap space! Except we must write out our image of course.
*/
pr_debug("pmdisk: %d pages copied\n", pmdisk_pages );
return 0;
}
/**
* suspend_save_image - Prepare and write saved image to swap.
*
* IRQs are re-enabled here so we can resume devices and safely write
* to the swap devices. We disable them again before we leave.
*
* The second lock_swapdevices() will unlock ignored swap devices since
* writing is finished.
* It is important _NOT_ to umount filesystems at this point. We want
* them synced (in case something goes wrong) but we DO not want to mark
* filesystem clean: it is not. (And it does not matter, if we resume
* correctly, we'll mark system clean, anyway.)
*/
static int suspend_save_image(void)
{
int error;
device_resume();
lock_swapdevices();
error = write_suspend_image();
lock_swapdevices();
return error;
}
/*
* Magic happens here
*/
int pmdisk_resume(void)
{
BUG_ON (nr_copy_pages_check != pmdisk_pages);
BUG_ON (pagedir_order_check != pagedir_order);
/* Even mappings of "global" things (vmalloc) need to be fixed */
__flush_tlb_global();
return 0;
}
/* pmdisk_arch_suspend() is implemented in arch/?/power/pmdisk.S,
and basically does:
if (!resume) {
save_processor_state();
SAVE_REGISTERS
return pmdisk_suspend();
}
GO_TO_SWAPPER_PAGE_TABLES
COPY_PAGES_BACK
RESTORE_REGISTERS
restore_processor_state();
return pmdisk_resume();
*/
/* More restore stuff */
#define does_collide(addr) does_collide_order(pm_pagedir_nosave, addr, 0)
/*
* Returns true if given address/order collides with any orig_address
*/
static int __init does_collide_order(suspend_pagedir_t *pagedir,
unsigned long addr, int order)
{
int i;
unsigned long addre = addr + (PAGE_SIZE<<order);
for(i=0; i < pmdisk_pages; i++)
if((pagedir+i)->orig_address >= addr &&
(pagedir+i)->orig_address < addre)
return 1;
return 0;
}
/*
* We check here that pagedir & pages it points to won't collide with pages
* where we're going to restore from the loaded pages later
*/
static int __init check_pagedir(void)
{
int i;
for(i=0; i < pmdisk_pages; i++) {
unsigned long addr;
do {
addr = get_zeroed_page(GFP_ATOMIC);
if(!addr)
return -ENOMEM;
} while (does_collide(addr));
(pm_pagedir_nosave+i)->address = addr;
}
return 0;
}
static int __init relocate_pagedir(void)
{
/*
* We have to avoid recursion (not to overflow kernel stack),
* and that's why code looks pretty cryptic
*/
suspend_pagedir_t *old_pagedir = pm_pagedir_nosave;
void **eaten_memory = NULL;
void **c = eaten_memory, *m, *f;
int err;
pr_debug("pmdisk: Relocating pagedir\n");
if(!does_collide_order(old_pagedir, (unsigned long)old_pagedir, pagedir_order)) {
pr_debug("pmdisk: Relocation not necessary\n");
return 0;
}
err = -ENOMEM;
while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order)) != NULL) {
if (!does_collide_order(old_pagedir, (unsigned long)m,
pagedir_order)) {
pm_pagedir_nosave =
memcpy(m, old_pagedir,
PAGE_SIZE << pagedir_order);
err = 0;
break;
}
eaten_memory = m;
printk( "." );
*eaten_memory = c;
c = eaten_memory;
}
c = eaten_memory;
while(c) {
printk(":");
f = c;
c = *c;
free_pages((unsigned long)f, pagedir_order);
}
printk("|\n");
return err;
}
static struct block_device * resume_bdev;
/**
* Using bio to read from swap.
* This code requires a bit more work than just using buffer heads
* but, it is the recommended way for 2.5/2.6.
* The following are to signal the beginning and end of I/O. Bios
* finish asynchronously, while we want them to happen synchronously.
* A simple atomic_t, and a wait loop take care of this problem.
*/
static atomic_t io_done = ATOMIC_INIT(0);
static void start_io(void)
{
atomic_set(&io_done,1);
}
static int end_io(struct bio * bio, unsigned int num, int err)
{
atomic_set(&io_done,0);
return 0;
}
static void wait_io(void)
{
while(atomic_read(&io_done))
io_schedule();
}
/**
* submit - submit BIO request.
* @rw: READ or WRITE.
* @off physical offset of page.
* @page: page we're reading or writing.
*
* Straight from the textbook - allocate and initialize the bio.
* If we're writing, make sure the page is marked as dirty.
* Then submit it and wait.
*/
static int submit(int rw, pgoff_t page_off, void * page)
{
int error = 0;
struct bio * bio;
bio = bio_alloc(GFP_ATOMIC,1);
if (!bio)
return -ENOMEM;
bio->bi_sector = page_off * (PAGE_SIZE >> 9);
bio_get(bio);
bio->bi_bdev = resume_bdev;
bio->bi_end_io = end_io;
if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) {
printk("pmdisk: ERROR: adding page to bio at %ld\n",page_off);
error = -EFAULT;
goto Done;
}
if (rw == WRITE)
bio_set_pages_dirty(bio);
start_io();
submit_bio(rw | (1 << BIO_RW_SYNC), bio);
wait_io();
Done:
bio_put(bio);
return error;
}
static int
read_page(pgoff_t page_off, void * page)
{
return submit(READ,page_off,page);
}
static int
write_page(pgoff_t page_off, void * page)
{
return submit(WRITE,page_off,page);
}
extern dev_t __init name_to_dev_t(const char *line);
static int __init check_sig(void)
{
int error;
memset(&pmdisk_header,0,sizeof(pmdisk_header));
if ((error = read_page(0,&pmdisk_header)))
return error;
if (!memcmp(PMDISK_SIG,pmdisk_header.sig,10)) {
memcpy(pmdisk_header.sig,pmdisk_header.orig_sig,10);
/*
* Reset swap signature now.
*/
error = write_page(0,&pmdisk_header);
} else {
pr_debug(KERN_ERR "pmdisk: Invalid partition type.\n");
return -EINVAL;
}
if (!error)
pr_debug("pmdisk: Signature found, resuming\n");
return error;
}
/*
* Sanity check if this image makes sense with this kernel/swap context
* I really don't think that it's foolproof but more than nothing..
*/
static const char * __init sanity_check(void)
{
dump_pmdisk_info();
if(pmdisk_info.version_code != LINUX_VERSION_CODE)
return "kernel version";
if(pmdisk_info.num_physpages != num_physpages)
return "memory size";
if (strcmp(pmdisk_info.uts.sysname,system_utsname.sysname))
return "system type";
if (strcmp(pmdisk_info.uts.release,system_utsname.release))
return "kernel release";
if (strcmp(pmdisk_info.uts.version,system_utsname.version))
return "version";
if (strcmp(pmdisk_info.uts.machine,system_utsname.machine))
return "machine";
if(pmdisk_info.cpus != num_online_cpus())
return "number of cpus";
return NULL;
}
static int __init check_header(void)
{
const char * reason = NULL;
int error;
init_header();
if ((error = read_page(swp_offset(pmdisk_header.pmdisk_info),
&pmdisk_info)))
return error;
/* Is this same machine? */
if ((reason = sanity_check())) {
printk(KERN_ERR "pmdisk: Resume mismatch: %s\n",reason);
return -EPERM;
}
pmdisk_pages = pmdisk_info.image_pages;
return error;
}
static int __init read_pagedir(void)
{
unsigned long addr;
int i, n = pmdisk_info.pagedir_pages;
int error = 0;
pagedir_order = get_bitmask_order(n);
addr =__get_free_pages(GFP_ATOMIC, pagedir_order);
if (!addr)
return -ENOMEM;
pm_pagedir_nosave = (struct pbe *)addr;
pr_debug("pmdisk: Reading pagedir (%d Pages)\n",n);
for (i = 0; i < n && !error; i++, addr += PAGE_SIZE) {
unsigned long offset = swp_offset(pmdisk_info.pagedir[i]);
if (offset)
error = read_page(offset, (void *)addr);
else
error = -EFAULT;
}
if (error)
free_pages((unsigned long)pm_pagedir_nosave,pagedir_order);
return error;
}
/**
* read_image_data - Read image pages from swap.
*
* You do not need to check for overlaps, check_pagedir()
* already did that.
*/
static int __init read_image_data(void)
{
struct pbe * p;
int error = 0;
int i;
printk( "Reading image data (%d pages): ", pmdisk_pages );
for(i = 0, p = pm_pagedir_nosave; i < pmdisk_pages && !error; i++, p++) {
if (!(i%100))
printk( "." );
error = read_page(swp_offset(p->swap_address),
(void *)p->address);
}
printk(" %d done.\n",i);
return error;
}
static int __init read_suspend_image(void)
{
int error = 0;
if ((error = check_sig()))
return error;
if ((error = check_header()))
return error;
if ((error = read_pagedir()))
return error;
if ((error = relocate_pagedir()))
goto FreePagedir;
if ((error = check_pagedir()))
goto FreePagedir;
if ((error = read_image_data()))
goto FreePagedir;
Done:
return error;
FreePagedir:
free_pages((unsigned long)pm_pagedir_nosave,pagedir_order);
goto Done;
}
/**
* pmdisk_save - Snapshot memory
*/
int pmdisk_save(void)
{
int error;
#if defined (CONFIG_HIGHMEM) || defined (CONFIG_DISCONTIGMEM)
pr_debug("pmdisk: not supported with high- or discontig-mem.\n");
return -EPERM;
#endif
if ((error = arch_prepare_suspend()))
return error;
local_irq_disable();
save_processor_state();
error = pmdisk_arch_suspend(0);
restore_processor_state();
local_irq_enable();
return error;
}
/**
* pmdisk_write - Write saved memory image to swap.
*
* pmdisk_arch_suspend(0) returns after system is resumed.
*
* pmdisk_arch_suspend() copies all "used" memory to "free" memory,
* then unsuspends all device drivers, and writes memory to disk
* using normal kernel mechanism.
*/
int pmdisk_write(void)
{
return suspend_save_image();
}
/**
* pmdisk_read - Read saved image from swap.
*/
int __init pmdisk_read(void)
{
int error;
if (!strlen(resume_file))
return -ENOENT;
resume_device = name_to_dev_t(resume_file);
pr_debug("pmdisk: Resume From Partition: %s\n", resume_file);
resume_bdev = open_by_devnum(resume_device, FMODE_READ);
if (!IS_ERR(resume_bdev)) {
set_blocksize(resume_bdev, PAGE_SIZE);
error = read_suspend_image();
blkdev_put(resume_bdev);
} else
error = PTR_ERR(resume_bdev);
if (!error)
pr_debug("Reading resume file was successful\n");
else
pr_debug("pmdisk: Error %d resuming\n", error);
return error;
}
/**
* pmdisk_restore - Replace running kernel with saved image.
*/
int __init pmdisk_restore(void)
{
int error;
local_irq_disable();
save_processor_state();
error = pmdisk_arch_suspend(1);
restore_processor_state();
local_irq_enable();
return error;
}
/**
* pmdisk_free - Free memory allocated to hold snapshot.
*/
int pmdisk_free(void)
{
pr_debug( "Freeing prev allocated pagedir\n" );
free_pagedir();
return 0;
}
static int __init pmdisk_setup(char *str)
{
if (strlen(str)) {
if (!strcmp(str,"off"))
resume_file[0] = '\0';
else
strncpy(resume_file, str, 255);
} else
resume_file[0] = '\0';
return 1;
}
__setup("pmdisk=", pmdisk_setup);
#include <linux/suspend.h>
#include <linux/utsname.h>
/* With SUSPEND_CONSOLE defined, it suspend looks *really* cool, but
we probably do not take enough locks for switching consoles, etc,
......@@ -9,7 +10,20 @@
#endif
#ifdef CONFIG_PM_DISK
struct swsusp_info {
struct new_utsname uts;
u32 version_code;
unsigned long num_physpages;
int cpus;
unsigned long image_pages;
unsigned long pagedir_pages;
suspend_pagedir_t * suspend_pagedir;
swp_entry_t pagedir[768];
} __attribute__((aligned(PAGE_SIZE)));
#ifdef CONFIG_SOFTWARE_SUSPEND
extern int pm_suspend_disk(void);
#else
......@@ -18,7 +32,6 @@ static inline int pm_suspend_disk(void)
return -EPERM;
}
#endif
extern struct semaphore pm_sem;
#define power_attr(_name) \
static struct subsys_attribute _name##_attr = { \
......
......@@ -62,6 +62,7 @@
#include <linux/syscalls.h>
#include <linux/console.h>
#include <linux/highmem.h>
#include <linux/bio.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
......@@ -70,25 +71,16 @@
#include "power.h"
unsigned char software_suspend_enabled = 0;
#define NORESUME 1
#define RESUME_SPECIFIED 2
/* References to section boundaries */
extern char __nosave_begin, __nosave_end;
extern int is_head_of_free_region(struct page *);
/* Locks */
spinlock_t suspend_pagedir_lock __nosavedata = SPIN_LOCK_UNLOCKED;
/* Variables to be preserved over suspend */
static int pagedir_order_check;
static int nr_copy_pages_check;
int pagedir_order_check;
int nr_copy_pages_check;
static int resume_status;
static char resume_file[256] = ""; /* For resume= kernel option */
extern char resume_file[];
static dev_t resume_device;
/* Local variables that should not be affected by save */
unsigned int nr_copy_pages __nosavedata = 0;
......@@ -107,19 +99,19 @@ unsigned int nr_copy_pages __nosavedata = 0;
MMU hardware.
*/
suspend_pagedir_t *pagedir_nosave __nosavedata = NULL;
static suspend_pagedir_t *pagedir_save;
static int pagedir_order __nosavedata = 0;
suspend_pagedir_t *pagedir_save;
int pagedir_order __nosavedata = 0;
struct link {
char dummy[PAGE_SIZE - sizeof(swp_entry_t)];
swp_entry_t next;
};
#define SWSUSP_SIG "S1SUSPEND"
union diskpage {
union swap_header swh;
struct link link;
struct suspend_header sh;
};
struct swsusp_header {
char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)];
swp_entry_t swsusp_info;
char orig_sig[10];
char sig[10];
} __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header;
struct swsusp_info swsusp_info;
/*
* XXX: We try to keep some more pages free so that I/O operations succeed
......@@ -130,51 +122,10 @@ union diskpage {
static const char name_suspend[] = "Suspend Machine: ";
static const char name_resume[] = "Resume Machine: ";
/*
* Debug
*/
#define DEBUG_DEFAULT
#undef DEBUG_PROCESS
#undef DEBUG_SLOW
#define TEST_SWSUSP 0 /* Set to 1 to reboot instead of halt machine after suspension */
#ifdef DEBUG_DEFAULT
# define PRINTK(f, a...) printk(f, ## a)
#else
# define PRINTK(f, a...) do { } while(0)
#endif
#ifdef DEBUG_SLOW
#define MDELAY(a) mdelay(a)
#else
#define MDELAY(a) do { } while(0)
#endif
/*
* Saving part...
*/
static __inline__ int fill_suspend_header(struct suspend_header *sh)
{
memset((char *)sh, 0, sizeof(*sh));
sh->version_code = LINUX_VERSION_CODE;
sh->num_physpages = num_physpages;
strncpy(sh->machine, system_utsname.machine, 8);
strncpy(sh->version, system_utsname.version, 20);
/* FIXME: Is this bogus? --RR */
sh->num_cpus = num_online_cpus();
sh->page_size = PAGE_SIZE;
sh->suspend_pagedir = pagedir_nosave;
BUG_ON (pagedir_save != pagedir_nosave);
sh->num_pbes = nr_copy_pages;
/* TODO: needed? mounted fs' last mounted date comparison
* [so they haven't been mounted since last suspend.
* Maybe it isn't.] [we'd need to do this for _all_ fs-es]
*/
return 0;
}
/* We memorize in swapfile_used what swap devices are used for suspension */
#define SWAPFILE_UNUSED 0
#define SWAPFILE_SUSPEND 1 /* This is the suspending device */
......@@ -182,47 +133,30 @@ static __inline__ int fill_suspend_header(struct suspend_header *sh)
static unsigned short swapfile_used[MAX_SWAPFILES];
static unsigned short root_swap;
#define MARK_SWAP_SUSPEND 0
#define MARK_SWAP_RESUME 2
static void mark_swapfiles(swp_entry_t prev, int mode)
static int mark_swapfiles(swp_entry_t prev)
{
swp_entry_t entry;
union diskpage *cur;
struct page *page;
int error;
if (root_swap == 0xFFFF) /* ignored */
return;
page = alloc_page(GFP_ATOMIC);
if (!page)
panic("Out of memory in mark_swapfiles");
cur = page_address(page);
/* XXX: this is dirty hack to get first page of swap file */
entry = swp_entry(root_swap, 0);
rw_swap_page_sync(READ, entry, page);
if (mode == MARK_SWAP_RESUME) {
if (!memcmp("S1",cur->swh.magic.magic,2))
memcpy(cur->swh.magic.magic,"SWAP-SPACE",10);
else if (!memcmp("S2",cur->swh.magic.magic,2))
memcpy(cur->swh.magic.magic,"SWAPSPACE2",10);
else printk("%sUnable to find suspended-data signature (%.10s - misspelled?\n",
name_resume, cur->swh.magic.magic);
rw_swap_page_sync(READ,
swp_entry(root_swap, 0),
virt_to_page((unsigned long)&swsusp_header));
if (!memcmp("SWAP-SPACE",swsusp_header.sig,10) ||
!memcmp("SWAPSPACE2",swsusp_header.sig,10)) {
memcpy(swsusp_header.orig_sig,swsusp_header.sig,10);
memcpy(swsusp_header.sig,SWSUSP_SIG,10);
swsusp_header.swsusp_info = prev;
error = rw_swap_page_sync(WRITE,
swp_entry(root_swap, 0),
virt_to_page((unsigned long)
&swsusp_header));
} else {
if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)))
memcpy(cur->swh.magic.magic,"S1SUSP....",10);
else if ((!memcmp("SWAPSPACE2",cur->swh.magic.magic,10)))
memcpy(cur->swh.magic.magic,"S2SUSP....",10);
else panic("\nSwapspace is not swapspace (%.10s)\n", cur->swh.magic.magic);
cur->link.next = prev; /* prev is the first/last swap page of the resume area */
/* link.next lies *no more* in last 4/8 bytes of magic */
pr_debug("swsusp: Partition is not swap space.\n");
error = -ENODEV;
}
rw_swap_page_sync(WRITE, entry, page);
__free_page(page);
return error;
}
/*
* Check whether the swap device is the specified resume
* device, irrespective of whether they are specified by
......@@ -243,7 +177,7 @@ static int is_resume_device(const struct swap_info_struct *swap_info)
resume_device == MKDEV(imajor(inode), iminor(inode));
}
static void read_swapfiles(void) /* This is called before saving image */
int swsusp_swap_check(void) /* This is called before saving image */
{
int i, len;
......@@ -274,114 +208,209 @@ static void read_swapfiles(void) /* This is called before saving image */
}
}
swap_list_unlock();
return (root_swap != 0xffff) ? 0 : -ENODEV;
}
static void lock_swapdevices(void) /* This is called after saving image so modification
will be lost after resume... and that's what we want. */
/**
* This is called after saving image so modification
* will be lost after resume... and that's what we want.
* we make the device unusable. A new call to
* lock_swapdevices can unlock the devices.
*/
static void lock_swapdevices(void)
{
int i;
swap_list_lock();
for(i = 0; i< MAX_SWAPFILES; i++)
if(swapfile_used[i] == SWAPFILE_IGNORED) {
swap_info[i].flags ^= 0xFF; /* we make the device unusable. A new call to
lock_swapdevices can unlock the devices. */
swap_info[i].flags ^= 0xFF;
}
swap_list_unlock();
}
/**
* write_suspend_image - Write entire image to disk.
* write_swap_page - Write one page to a fresh swap location.
* @addr: Address we're writing.
* @loc: Place to store the entry we used.
*
* After writing suspend signature to the disk, suspend may no
* longer fail: we have ready-to-run image in swap, and rollback
* would happen on next reboot -- corrupting data.
* Allocate a new swap entry and 'sync' it. Note we discard -EIO
* errors. That is an artifact left over from swsusp. It did not
* check the return of rw_swap_page_sync() at all, since most pages
* written back to swap would return -EIO.
* This is a partial improvement, since we will at least return other
* errors, though we need to eventually fix the damn code.
*/
static int write_page(unsigned long addr, swp_entry_t * loc)
{
swp_entry_t entry;
int error = 0;
entry = get_swap_page();
if (swp_offset(entry) &&
swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) {
error = rw_swap_page_sync(WRITE, entry,
virt_to_page(addr));
if (error == -EIO)
error = 0;
if (!error)
*loc = entry;
} else
error = -ENOSPC;
return error;
}
/**
* free_data - Free the swap entries used by the saved image.
*
* Note: The buffer we allocate to use to write the suspend header is
* not freed; its not needed since the system is going down anyway
* (plus it causes an oops and I'm lazy^H^H^H^Htoo busy).
* Walk the list of used swap entries and free each one.
*/
static int write_suspend_image(void)
static void data_free(void)
{
swp_entry_t entry;
int i;
swp_entry_t entry, prev = { 0 };
int nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages);
union diskpage *cur, *buffer = (union diskpage *)get_zeroed_page(GFP_ATOMIC);
unsigned long address;
struct page *page;
if (!buffer)
return -ENOMEM;
for (i = 0; i < nr_copy_pages; i++) {
entry = (pagedir_nosave + i)->swap_address;
if (entry.val)
swap_free(entry);
else
break;
(pagedir_nosave + i)->swap_address = (swp_entry_t){0};
}
}
/**
* write_data - Write saved image to swap.
*
* Walk the list of pages in the image and sync each one to swap.
*/
static int data_write(void)
{
int error = 0;
int i;
printk( "Writing data to swap (%d pages): ", nr_copy_pages );
for (i=0; i<nr_copy_pages; i++) {
for (i = 0; i < nr_copy_pages && !error; i++) {
if (!(i%100))
printk( "." );
entry = get_swap_page();
if (!entry.val)
panic("\nNot enough swapspace when writing data" );
if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
panic("\nPage %d: not enough swapspace on suspend device", i );
address = (pagedir_nosave+i)->address;
page = virt_to_page(address);
rw_swap_page_sync(WRITE, entry, page);
(pagedir_nosave+i)->swap_address = entry;
error = write_page((pagedir_nosave+i)->address,
&((pagedir_nosave+i)->swap_address));
}
printk( "|\n" );
printk( "Writing pagedir (%d pages): ", nr_pgdir_pages);
for (i=0; i<nr_pgdir_pages; i++) {
cur = (union diskpage *)((char *) pagedir_nosave)+i;
BUG_ON ((char *) cur != (((char *) pagedir_nosave) + i*PAGE_SIZE));
printk( "." );
entry = get_swap_page();
if (!entry.val) {
printk(KERN_CRIT "Not enough swapspace when writing pgdir\n" );
panic("Don't know how to recover");
free_page((unsigned long) buffer);
return -ENOSPC;
}
printk(" %d Pages done.\n",i);
return error;
}
if(swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
panic("\nNot enough swapspace for pagedir on suspend device" );
static void dump_info(void)
{
pr_debug(" swsusp: Version: %u\n",swsusp_info.version_code);
pr_debug(" swsusp: Num Pages: %ld\n",swsusp_info.num_physpages);
pr_debug(" swsusp: UTS Sys: %s\n",swsusp_info.uts.sysname);
pr_debug(" swsusp: UTS Node: %s\n",swsusp_info.uts.nodename);
pr_debug(" swsusp: UTS Release: %s\n",swsusp_info.uts.release);
pr_debug(" swsusp: UTS Version: %s\n",swsusp_info.uts.version);
pr_debug(" swsusp: UTS Machine: %s\n",swsusp_info.uts.machine);
pr_debug(" swsusp: UTS Domain: %s\n",swsusp_info.uts.domainname);
pr_debug(" swsusp: CPUs: %d\n",swsusp_info.cpus);
pr_debug(" swsusp: Image: %ld Pages\n",swsusp_info.image_pages);
pr_debug(" swsusp: Pagedir: %ld Pages\n",swsusp_info.pagedir_pages);
}
BUG_ON (sizeof(swp_entry_t) != sizeof(long));
BUG_ON (PAGE_SIZE % sizeof(struct pbe));
static void init_header(void)
{
memset(&swsusp_info,0,sizeof(swsusp_info));
swsusp_info.version_code = LINUX_VERSION_CODE;
swsusp_info.num_physpages = num_physpages;
memcpy(&swsusp_info.uts,&system_utsname,sizeof(system_utsname));
swsusp_info.suspend_pagedir = pagedir_nosave;
swsusp_info.cpus = num_online_cpus();
swsusp_info.image_pages = nr_copy_pages;
dump_info();
}
cur->link.next = prev;
page = virt_to_page((unsigned long)cur);
rw_swap_page_sync(WRITE, entry, page);
prev = entry;
static int close_swap(void)
{
swp_entry_t entry;
int error;
error = write_page((unsigned long)&swsusp_info,&entry);
if (!error) {
printk( "S" );
error = mark_swapfiles(entry);
printk( "|\n" );
}
printk("H");
BUG_ON (sizeof(struct suspend_header) > PAGE_SIZE-sizeof(swp_entry_t));
BUG_ON (sizeof(union diskpage) != PAGE_SIZE);
BUG_ON (sizeof(struct link) != PAGE_SIZE);
entry = get_swap_page();
if (!entry.val)
panic( "\nNot enough swapspace when writing header" );
if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
panic("\nNot enough swapspace for header on suspend device" );
cur = (void *) buffer;
if (fill_suspend_header(&cur->sh))
BUG(); /* Not a BUG_ON(): we want fill_suspend_header to be called, always */
cur->link.next = prev;
page = virt_to_page((unsigned long)cur);
rw_swap_page_sync(WRITE, entry, page);
prev = entry;
printk( "S" );
mark_swapfiles(prev, MARK_SWAP_SUSPEND);
printk( "|\n" );
MDELAY(1000);
return 0;
return error;
}
/**
* free_pagedir - Free pages used by the page directory.
*/
static void free_pagedir_entries(void)
{
int num = swsusp_info.pagedir_pages;
int i;
for (i = 0; i < num; i++)
swap_free(swsusp_info.pagedir[i]);
}
/**
* write_pagedir - Write the array of pages holding the page directory.
* @last: Last swap entry we write (needed for header).
*/
static int write_pagedir(void)
{
unsigned long addr = (unsigned long)pagedir_nosave;
int error = 0;
int n = SUSPEND_PD_PAGES(nr_copy_pages);
int i;
swsusp_info.pagedir_pages = n;
printk( "Writing pagedir (%d pages)\n", n);
for (i = 0; i < n && !error; i++, addr += PAGE_SIZE)
error = write_page(addr,&swsusp_info.pagedir[i]);
return error;
}
/**
* write_suspend_image - Write entire image and metadata.
*
*/
static int write_suspend_image(void)
{
int error;
init_header();
if ((error = data_write()))
goto FreeData;
if ((error = write_pagedir()))
goto FreePagedir;
if ((error = close_swap()))
goto FreePagedir;
Done:
return error;
FreePagedir:
free_pagedir_entries();
FreeData:
data_free();
goto Done;
}
#ifdef CONFIG_HIGHMEM
struct highmem_page {
char *data;
......@@ -438,22 +467,30 @@ static int save_highmem_zone(struct zone *zone)
}
return 0;
}
#endif /* CONFIG_HIGHMEM */
static int save_highmem(void)
{
#ifdef CONFIG_HIGHMEM
struct zone *zone;
int res = 0;
pr_debug("swsusp: Saving Highmem\n");
for_each_zone(zone) {
if (is_highmem(zone))
res = save_highmem_zone(zone);
if (res)
return res;
}
#endif
return 0;
}
static int restore_highmem(void)
{
#ifdef CONFIG_HIGHMEM
printk("swsusp: Restoring Highmem\n");
while (highmem_copy) {
struct highmem_page *save = highmem_copy;
void *kaddr;
......@@ -465,9 +502,10 @@ static int restore_highmem(void)
free_page((long) save->data);
kfree(save);
}
#endif
return 0;
}
#endif
static int pfn_is_nosave(unsigned long pfn)
{
......@@ -476,57 +514,86 @@ static int pfn_is_nosave(unsigned long pfn)
return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
}
/* if *pagedir_p != NULL it also copies the counted pages */
static int count_and_copy_zone(struct zone *zone, struct pbe **pagedir_p)
/**
* saveable - Determine whether a page should be cloned or not.
* @pfn: The page
*
* We save a page if it's Reserved, and not in the range of pages
* statically defined as 'unsaveable', or if it isn't reserved, and
* isn't part of a free chunk of pages.
* If it is part of a free chunk, we update @pfn to point to the last
* page of the chunk.
*/
static int saveable(struct zone * zone, unsigned long * zone_pfn)
{
unsigned long zone_pfn, chunk_size, nr_copy_pages = 0;
struct pbe *pbe = *pagedir_p;
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
struct page *page;
unsigned long pfn = zone_pfn + zone->zone_start_pfn;
unsigned long pfn = *zone_pfn + zone->zone_start_pfn;
unsigned long chunk_size;
struct page * page;
if (!(pfn%1000))
printk(".");
if (!pfn_valid(pfn))
continue;
page = pfn_to_page(pfn);
BUG_ON(PageReserved(page) && PageNosave(page));
if (PageNosave(page))
continue;
if (PageReserved(page) && pfn_is_nosave(pfn)) {
PRINTK("[nosave pfn 0x%lx]", pfn);
continue;
}
if ((chunk_size = is_head_of_free_region(page))) {
pfn += chunk_size - 1;
zone_pfn += chunk_size - 1;
continue;
if (!pfn_valid(pfn))
return 0;
if (!(pfn%1000))
printk(".");
page = pfn_to_page(pfn);
BUG_ON(PageReserved(page) && PageNosave(page));
if (PageNosave(page))
return 0;
if (PageReserved(page) && pfn_is_nosave(pfn)) {
pr_debug("[nosave pfn 0x%lx]", pfn);
return 0;
}
if ((chunk_size = is_head_of_free_region(page))) {
*zone_pfn += chunk_size - 1;
return 0;
}
return 1;
}
static void count_data_pages(void)
{
struct zone *zone;
unsigned long zone_pfn;
nr_copy_pages = 0;
for_each_zone(zone) {
if (!is_highmem(zone)) {
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
nr_copy_pages += saveable(zone, &zone_pfn);
}
nr_copy_pages++;
if (!pbe)
continue;
pbe->orig_address = (long) page_address(page);
/* Copy page is dangerous: it likes to mess with
preempt count on specific cpus. Wrong preempt count is then copied,
oops. */
copy_page((void *)pbe->address, (void *)pbe->orig_address);
pbe++;
}
*pagedir_p = pbe;
return nr_copy_pages;
}
static int count_and_copy_data_pages(struct pbe *pagedir_p)
static void copy_data_pages(void)
{
int nr_copy_pages = 0;
struct zone *zone;
unsigned long zone_pfn;
struct pbe * pbe = pagedir_nosave;
for_each_zone(zone) {
if (!is_highmem(zone))
nr_copy_pages += count_and_copy_zone(zone, &pagedir_p);
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
if (saveable(zone, &zone_pfn)) {
struct page * page;
page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
pbe->orig_address = (long) page_address(page);
/* Copy page is dangerous: it likes to mess with
preempt count on specific cpus. Wrong preempt
count is then copied, oops.
*/
copy_page((void *)pbe->address,
(void *)pbe->orig_address);
pbe++;
}
}
}
return nr_copy_pages;
}
static void free_suspend_pagedir_zone(struct zone *zone, unsigned long pagedir)
{
unsigned long zone_pfn, pagedir_end, pagedir_pfn, pagedir_end_pfn;
......@@ -547,119 +614,199 @@ static void free_suspend_pagedir_zone(struct zone *zone, unsigned long pagedir)
}
}
static void free_suspend_pagedir(unsigned long this_pagedir)
void swsusp_free(void)
{
unsigned long p = (unsigned long)pagedir_save;
struct zone *zone;
for_each_zone(zone) {
if (!is_highmem(zone))
free_suspend_pagedir_zone(zone, this_pagedir);
free_suspend_pagedir_zone(zone, p);
}
free_pages(this_pagedir, pagedir_order);
free_pages(p, pagedir_order);
}
static suspend_pagedir_t *create_suspend_pagedir(int nr_copy_pages)
/**
* calc_order - Determine the order of allocation needed for pagedir_save.
*
* This looks tricky, but is just subtle. Please fix it some time.
* Since there are %nr_copy_pages worth of pages in the snapshot, we need
* to allocate enough contiguous space to hold
* (%nr_copy_pages * sizeof(struct pbe)),
* which has the saved/orig locations of the page..
*
* SUSPEND_PD_PAGES() tells us how many pages we need to hold those
* structures, then we call get_bitmask_order(), which will tell us the
* last bit set in the number, starting with 1. (If we need 30 pages, that
* is 0x0000001e in hex. The last bit is the 5th, which is the order we
* would use to allocate 32 contiguous pages).
*
* Since we also need to save those pages, we add the number of pages that
* we need to nr_copy_pages, and in case of an overflow, do the
* calculation again to update the number of pages needed.
*
* With this model, we will tend to waste a lot of memory if we just cross
* an order boundary. Plus, the higher the order of allocation that we try
* to do, the more likely we are to fail in a low-memory situtation
* (though we're unlikely to get this far in such a case, since swsusp
* requires half of memory to be free anyway).
*/
static void calc_order(void)
{
int i;
suspend_pagedir_t *pagedir;
struct pbe *p;
struct page *page;
int diff = 0;
int order = 0;
do {
diff = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages)) - order;
if (diff) {
order += diff;
nr_copy_pages += 1 << diff;
}
} while(diff);
pagedir_order = order;
}
/**
* alloc_pagedir - Allocate the page directory.
*
* First, determine exactly how many contiguous pages we need,
* allocate them, then mark each 'unsavable'.
*/
static int alloc_pagedir(void)
{
calc_order();
pagedir_save = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD,
pagedir_order);
if(!pagedir_save)
return -ENOMEM;
memset(pagedir_save,0,(1 << pagedir_order) * PAGE_SIZE);
pagedir_nosave = pagedir_save;
return 0;
}
pagedir_order = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages));
/**
* alloc_image_pages - Allocate pages for the snapshot.
*
*/
p = pagedir = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD, pagedir_order);
if (!pagedir)
return NULL;
static int alloc_image_pages(void)
{
struct pbe * p;
int i;
page = virt_to_page(pagedir);
for(i=0; i < 1<<pagedir_order; i++)
SetPageNosave(page++);
while(nr_copy_pages--) {
for (i = 0, p = pagedir_save; i < nr_copy_pages; i++, p++) {
p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
if (!p->address) {
free_suspend_pagedir((unsigned long) pagedir);
return NULL;
}
if(!p->address)
goto Error;
SetPageNosave(virt_to_page(p->address));
p->orig_address = 0;
p++;
}
return pagedir;
return 0;
Error:
do {
if (p->address)
free_page(p->address);
p->address = 0;
} while (p-- > pagedir_save);
return -ENOMEM;
}
static int prepare_suspend_processes(void)
/**
* enough_free_mem - Make sure we enough free memory to snapshot.
*
* Returns TRUE or FALSE after checking the number of available
* free pages.
*/
static int enough_free_mem(void)
{
sys_sync(); /* Syncing needs pdflushd, so do it before stopping processes */
if (freeze_processes()) {
printk( KERN_ERR "Suspend failed: Not all processes stopped!\n" );
thaw_processes();
return 1;
if(nr_free_pages() < (nr_copy_pages + PAGES_FOR_IO)) {
pr_debug("swsusp: Not enough free pages: Have %d\n",
nr_free_pages());
return 0;
}
return 0;
return 1;
}
/*
* Try to free as much memory as possible, but do not OOM-kill anyone
/**
* enough_swap - Make sure we have enough swap to save the image.
*
* Returns TRUE or FALSE after checking the total amount of swap
* space avaiable.
*
* Notice: all userland should be stopped at this point, or livelock is possible.
* FIXME: si_swapinfo(&i) returns all swap devices information.
* We should only consider resume_device.
*/
static void free_some_memory(void)
static int enough_swap(void)
{
printk("Freeing memory: ");
while (shrink_all_memory(10000))
printk(".");
printk("|\n");
struct sysinfo i;
si_swapinfo(&i);
if (i.freeswap < (nr_copy_pages + PAGES_FOR_IO)) {
pr_debug("swsusp: Not enough swap. Need %ld\n",i.freeswap);
return 0;
}
return 1;
}
static int suspend_prepare_image(void)
static int swsusp_alloc(void)
{
struct sysinfo i;
unsigned int nr_needed_pages = 0;
int error;
pr_debug("suspend: (pages needed: %d + %d free: %d)\n",
nr_copy_pages,PAGES_FOR_IO,nr_free_pages());
pagedir_nosave = NULL;
printk( "/critical section: ");
#ifdef CONFIG_HIGHMEM
printk( "handling highmem" );
if (save_highmem()) {
printk(KERN_CRIT "%sNot enough free pages for highmem\n", name_suspend);
if (!enough_free_mem())
return -ENOMEM;
}
printk(", ");
#endif
printk("counting pages to copy" );
drain_local_pages();
nr_copy_pages = count_and_copy_data_pages(NULL);
nr_needed_pages = nr_copy_pages + PAGES_FOR_IO;
printk(" (pages needed: %d+%d=%d free: %d)\n",nr_copy_pages,PAGES_FOR_IO,nr_needed_pages,nr_free_pages());
if(nr_free_pages() < nr_needed_pages) {
printk(KERN_CRIT "%sCouldn't get enough free pages, on %d pages short\n",
name_suspend, nr_needed_pages-nr_free_pages());
root_swap = 0xFFFF;
return -ENOMEM;
}
si_swapinfo(&i); /* FIXME: si_swapinfo(&i) returns all swap devices information.
We should only consider resume_device. */
if (i.freeswap < nr_needed_pages) {
printk(KERN_CRIT "%sThere's not enough swap space available, on %ld pages short\n",
name_suspend, nr_needed_pages-i.freeswap);
if (!enough_swap())
return -ENOSPC;
}
PRINTK( "Alloc pagedir\n" );
pagedir_save = pagedir_nosave = create_suspend_pagedir(nr_copy_pages);
if (!pagedir_nosave) {
/* Pagedir is big, one-chunk allocation. It is easily possible for this allocation to fail */
printk(KERN_CRIT "%sCouldn't allocate continuous pagedir\n", name_suspend);
return -ENOMEM;
if ((error = alloc_pagedir())) {
pr_debug("suspend: Allocating pagedir failed.\n");
return error;
}
if ((error = alloc_image_pages())) {
pr_debug("suspend: Allocating image pages failed.\n");
swsusp_free();
return error;
}
nr_copy_pages_check = nr_copy_pages;
pagedir_order_check = pagedir_order;
return 0;
}
int suspend_prepare_image(void)
{
unsigned int nr_needed_pages = 0;
drain_local_pages(); /* During allocating of suspend pagedir, new cold pages may appear. Kill them */
if (nr_copy_pages != count_and_copy_data_pages(pagedir_nosave)) /* copy */
BUG();
pr_debug("swsusp: critical section: \n");
if (save_highmem()) {
printk(KERN_CRIT "%sNot enough free pages for highmem\n", name_suspend);
return -ENOMEM;
}
drain_local_pages();
count_data_pages();
printk("swsusp: Need to copy %u pages\n",nr_copy_pages);
nr_needed_pages = nr_copy_pages + PAGES_FOR_IO;
swsusp_alloc();
/* During allocating of suspend pagedir, new cold pages may appear.
* Kill them.
*/
drain_local_pages();
copy_data_pages();
/*
* End of critical section. From now on, we can write to memory,
......@@ -667,205 +814,79 @@ static int suspend_prepare_image(void)
* touch swap space! Except we must write out our image of course.
*/
printk( "critical section/: done (%d pages copied)\n", nr_copy_pages );
printk("swsusp: critical section/: done (%d pages copied)\n", nr_copy_pages );
return 0;
}
static void suspend_save_image(void)
/* It is important _NOT_ to umount filesystems at this point. We want
* them synced (in case something goes wrong) but we DO not want to mark
* filesystem clean: it is not. (And it does not matter, if we resume
* correctly, we'll mark system clean, anyway.)
*/
int swsusp_write(void)
{
int error;
device_resume();
lock_swapdevices();
write_suspend_image();
lock_swapdevices(); /* This will unlock ignored swap devices since writing is finished */
error = write_suspend_image();
/* This will unlock ignored swap devices since writing is finished */
lock_swapdevices();
return error;
/* It is important _NOT_ to umount filesystems at this point. We want
* them synced (in case something goes wrong) but we DO not want to mark
* filesystem clean: it is not. (And it does not matter, if we resume
* correctly, we'll mark system clean, anyway.)
*/
}
static void suspend_power_down(void)
{
extern int C_A_D;
C_A_D = 0;
printk(KERN_EMERG "%s%s Trying to power down.\n", name_suspend, TEST_SWSUSP ? "Disable TEST_SWSUSP. NOT ": "");
#ifdef CONFIG_VT
PRINTK(KERN_EMERG "shift_state: %04x\n", shift_state);
mdelay(1000);
if (TEST_SWSUSP ^ (!!(shift_state & (1 << KG_CTRL))))
machine_restart(NULL);
else
#endif
{
device_suspend(3);
device_shutdown();
machine_power_off();
}
printk(KERN_EMERG "%sProbably not capable for powerdown. System halted.\n", name_suspend);
machine_halt();
while (1);
/* NOTREACHED */
}
/*
* Magic happens here
*/
extern asmlinkage int swsusp_arch_suspend(void);
extern asmlinkage int swsusp_arch_resume(void);
asmlinkage void do_magic_resume_1(void)
{
barrier();
mb();
spin_lock_irq(&suspend_pagedir_lock); /* Done to disable interrupts */
device_power_down(3);
PRINTK( "Waiting for DMAs to settle down...\n");
mdelay(1000); /* We do not want some readahead with DMA to corrupt our memory, right?
Do it with disabled interrupts for best effect. That way, if some
driver scheduled DMA, we have good chance for DMA to finish ;-). */
}
asmlinkage void do_magic_resume_2(void)
asmlinkage int swsusp_save(void)
{
BUG_ON (nr_copy_pages_check != nr_copy_pages);
BUG_ON (pagedir_order_check != pagedir_order);
__flush_tlb_global(); /* Even mappings of "global" things (vmalloc) need to be fixed */
PRINTK( "Freeing prev allocated pagedir\n" );
free_suspend_pagedir((unsigned long) pagedir_save);
#ifdef CONFIG_HIGHMEM
printk( "Restoring highmem\n" );
restore_highmem();
#endif
printk("done, devices\n");
device_power_up();
spin_unlock_irq(&suspend_pagedir_lock);
device_resume();
int error = 0;
/* Fixme: this is too late; we should do this ASAP to avoid "infinite reboots" problem */
PRINTK( "Fixing swap signatures... " );
mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME);
PRINTK( "ok\n" );
#ifdef SUSPEND_CONSOLE
acquire_console_sem();
update_screen(fg_console);
release_console_sem();
#endif
if ((error = swsusp_swap_check()))
return error;
return suspend_prepare_image();
}
/* do_magic() is implemented in arch/?/kernel/suspend_asm.S, and basically does:
if (!resume) {
do_magic_suspend_1();
save_processor_state();
SAVE_REGISTERS
do_magic_suspend_2();
return;
}
GO_TO_SWAPPER_PAGE_TABLES
do_magic_resume_1();
COPY_PAGES_BACK
RESTORE_REGISTERS
int swsusp_suspend(void)
{
int error;
if ((error = arch_prepare_suspend()))
return error;
local_irq_disable();
save_processor_state();
error = swsusp_arch_suspend();
restore_processor_state();
do_magic_resume_2();
local_irq_enable();
return error;
}
*/
asmlinkage void do_magic_suspend_1(void)
asmlinkage int swsusp_restore(void)
{
mb();
barrier();
BUG_ON(in_atomic());
spin_lock_irq(&suspend_pagedir_lock);
BUG_ON (nr_copy_pages_check != nr_copy_pages);
BUG_ON (pagedir_order_check != pagedir_order);
/* Even mappings of "global" things (vmalloc) need to be fixed */
__flush_tlb_global();
return 0;
}
asmlinkage void do_magic_suspend_2(void)
int swsusp_resume(void)
{
int is_problem;
read_swapfiles();
device_power_down(3);
is_problem = suspend_prepare_image();
device_power_up();
spin_unlock_irq(&suspend_pagedir_lock);
if (!is_problem) {
kernel_fpu_end(); /* save_processor_state() does kernel_fpu_begin, and we need to revert it in order to pass in_atomic() checks */
BUG_ON(in_atomic());
suspend_save_image();
suspend_power_down(); /* FIXME: if suspend_power_down is commented out, console is lost after few suspends ?! */
}
printk(KERN_EMERG "%sSuspend failed, trying to recover...\n", name_suspend);
MDELAY(1000); /* So user can wait and report us messages if armageddon comes :-) */
barrier();
mb();
spin_lock_irq(&suspend_pagedir_lock); /* Done to disable interrupts */
free_pages((unsigned long) pagedir_nosave, pagedir_order);
spin_unlock_irq(&suspend_pagedir_lock);
device_resume();
PRINTK( "Fixing swap signatures... " );
mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME);
PRINTK( "ok\n" );
int error;
local_irq_disable();
save_processor_state();
error = swsusp_arch_resume();
restore_processor_state();
restore_highmem();
local_irq_enable();
return error;
}
/*
* This is main interface to the outside world. It needs to be
* called from process context.
*/
int software_suspend(void)
{
int res;
if (!software_suspend_enabled)
return -EAGAIN;
software_suspend_enabled = 0;
might_sleep();
if (arch_prepare_suspend()) {
printk("%sArchitecture failed to prepare\n", name_suspend);
return -EPERM;
}
if (pm_prepare_console())
printk( "%sCan't allocate a console... proceeding\n", name_suspend);
if (!prepare_suspend_processes()) {
/* At this point, all user processes and "dangerous"
kernel threads are stopped. Free some memory, as we
need half of memory free. */
free_some_memory();
disable_nonboot_cpus();
/* Save state of all device drivers, and stop them. */
printk("Suspending devices... ");
if ((res = device_suspend(3))==0) {
/* If stopping device drivers worked, we proceed basically into
* suspend_save_image.
*
* do_magic(0) returns after system is resumed.
*
* do_magic() copies all "used" memory to "free" memory, then
* unsuspends all device drivers, and writes memory to disk
* using normal kernel mechanism.
*/
do_magic(0);
}
thaw_processes();
enable_nonboot_cpus();
} else
res = -EBUSY;
software_suspend_enabled = 1;
MDELAY(1000);
pm_restore_console();
return res;
}
/* More restore stuff */
......@@ -874,7 +895,7 @@ int software_suspend(void)
/*
* Returns true if given address/order collides with any orig_address
*/
static int does_collide_order(suspend_pagedir_t *pagedir, unsigned long addr,
static int __init does_collide_order(suspend_pagedir_t *pagedir, unsigned long addr,
int order)
{
int i;
......@@ -892,7 +913,7 @@ static int does_collide_order(suspend_pagedir_t *pagedir, unsigned long addr,
* We check here that pagedir & pages it points to won't collide with pages
* where we're going to restore from the loaded pages later
*/
static int check_pagedir(void)
static int __init check_pagedir(void)
{
int i;
......@@ -910,7 +931,7 @@ static int check_pagedir(void)
return 0;
}
static int relocate_pagedir(void)
static int __init swsusp_pagedir_relocate(void)
{
/*
* We have to avoid recursion (not to overflow kernel stack),
......@@ -953,283 +974,263 @@ static int relocate_pagedir(void)
free_pages((unsigned long)f, pagedir_order);
}
printk("|\n");
return ret;
return check_pagedir();
}
/*
* Sanity check if this image makes sense with this kernel/swap context
* I really don't think that it's foolproof but more than nothing..
/**
* Using bio to read from swap.
* This code requires a bit more work than just using buffer heads
* but, it is the recommended way for 2.5/2.6.
* The following are to signal the beginning and end of I/O. Bios
* finish asynchronously, while we want them to happen synchronously.
* A simple atomic_t, and a wait loop take care of this problem.
*/
static int sanity_check_failed(char *reason)
{
printk(KERN_ERR "%s%s\n", name_resume, reason);
return -EPERM;
}
static atomic_t io_done = ATOMIC_INIT(0);
static int sanity_check(struct suspend_header *sh)
static void start_io(void)
{
if (sh->version_code != LINUX_VERSION_CODE)
return sanity_check_failed("Incorrect kernel version");
if (sh->num_physpages != num_physpages)
return sanity_check_failed("Incorrect memory size");
if (strncmp(sh->machine, system_utsname.machine, 8))
return sanity_check_failed("Incorrect machine type");
if (strncmp(sh->version, system_utsname.version, 20))
return sanity_check_failed("Incorrect version");
if (sh->num_cpus != num_online_cpus())
return sanity_check_failed("Incorrect number of cpus");
if (sh->page_size != PAGE_SIZE)
return sanity_check_failed("Incorrect PAGE_SIZE");
return 0;
atomic_set(&io_done,1);
}
static int bdev_read_page(struct block_device *bdev, long pos, void *buf)
static int end_io(struct bio * bio, unsigned int num, int err)
{
struct buffer_head *bh;
BUG_ON (pos%PAGE_SIZE);
bh = __bread(bdev, pos/PAGE_SIZE, PAGE_SIZE);
if (!bh || (!bh->b_data)) {
return -1;
}
memcpy(buf, bh->b_data, PAGE_SIZE); /* FIXME: may need kmap() */
BUG_ON(!buffer_uptodate(bh));
brelse(bh);
atomic_set(&io_done,0);
return 0;
}
}
static int bdev_write_page(struct block_device *bdev, long pos, void *buf)
static void wait_io(void)
{
#if 0
struct buffer_head *bh;
BUG_ON (pos%PAGE_SIZE);
bh = __bread(bdev, pos/PAGE_SIZE, PAGE_SIZE);
if (!bh || (!bh->b_data)) {
return -1;
}
memcpy(bh->b_data, buf, PAGE_SIZE); /* FIXME: may need kmap() */
BUG_ON(!buffer_uptodate(bh));
generic_make_request(WRITE, bh);
if (!buffer_uptodate(bh))
printk(KERN_CRIT "%sWarning %s: Fixing swap signatures unsuccessful...\n", name_resume, resume_file);
wait_on_buffer(bh);
brelse(bh);
return 0;
#endif
printk(KERN_CRIT "%sWarning %s: Fixing swap signatures unimplemented...\n", name_resume, resume_file);
return 0;
while(atomic_read(&io_done))
io_schedule();
}
extern dev_t __init name_to_dev_t(const char *line);
static int __init __read_suspend_image(struct block_device *bdev, union diskpage *cur, int noresume)
{
swp_entry_t next;
int i, nr_pgdir_pages;
static struct block_device * resume_bdev;
#define PREPARENEXT \
{ next = cur->link.next; \
next.val = swp_offset(next) * PAGE_SIZE; \
}
/**
* submit - submit BIO request.
* @rw: READ or WRITE.
* @off physical offset of page.
* @page: page we're reading or writing.
*
* Straight from the textbook - allocate and initialize the bio.
* If we're writing, make sure the page is marked as dirty.
* Then submit it and wait.
*/
if (bdev_read_page(bdev, 0, cur)) return -EIO;
static int submit(int rw, pgoff_t page_off, void * page)
{
int error = 0;
struct bio * bio;
if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)) ||
(!memcmp("SWAPSPACE2",cur->swh.magic.magic,10))) {
printk(KERN_ERR "%sThis is normal swap space\n", name_resume );
return -EINVAL;
bio = bio_alloc(GFP_ATOMIC,1);
if (!bio)
return -ENOMEM;
bio->bi_sector = page_off * (PAGE_SIZE >> 9);
bio_get(bio);
bio->bi_bdev = resume_bdev;
bio->bi_end_io = end_io;
if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) {
printk("swsusp: ERROR: adding page to bio at %ld\n",page_off);
error = -EFAULT;
goto Done;
}
PREPARENEXT; /* We have to read next position before we overwrite it */
if (!memcmp("S1",cur->swh.magic.magic,2))
memcpy(cur->swh.magic.magic,"SWAP-SPACE",10);
else if (!memcmp("S2",cur->swh.magic.magic,2))
memcpy(cur->swh.magic.magic,"SWAPSPACE2",10);
else {
if (noresume)
return -EINVAL;
panic("%sUnable to find suspended-data signature (%.10s - misspelled?\n",
name_resume, cur->swh.magic.magic);
}
if (noresume) {
/* We don't do a sanity check here: we want to restore the swap
whatever version of kernel made the suspend image;
We need to write swap, but swap is *not* enabled so
we must write the device directly */
printk("%s: Fixing swap signatures %s...\n", name_resume, resume_file);
bdev_write_page(bdev, 0, cur);
}
if (rw == WRITE)
bio_set_pages_dirty(bio);
start_io();
submit_bio(rw | (1 << BIO_RW_SYNC), bio);
wait_io();
Done:
bio_put(bio);
return error;
}
printk( "%sSignature found, resuming\n", name_resume );
MDELAY(1000);
int bio_read_page(pgoff_t page_off, void * page)
{
return submit(READ,page_off,page);
}
if (bdev_read_page(bdev, next.val, cur)) return -EIO;
if (sanity_check(&cur->sh)) /* Is this same machine? */
return -EPERM;
PREPARENEXT;
int bio_write_page(pgoff_t page_off, void * page)
{
return submit(WRITE,page_off,page);
}
pagedir_save = cur->sh.suspend_pagedir;
nr_copy_pages = cur->sh.num_pbes;
nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages);
pagedir_order = get_bitmask_order(nr_pgdir_pages);
/*
* Sanity check if this image makes sense with this kernel/swap context
* I really don't think that it's foolproof but more than nothing..
*/
pagedir_nosave = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC, pagedir_order);
if (!pagedir_nosave)
return -ENOMEM;
static const char * __init sanity_check(void)
{
dump_info();
if(swsusp_info.version_code != LINUX_VERSION_CODE)
return "kernel version";
if(swsusp_info.num_physpages != num_physpages)
return "memory size";
if (strcmp(swsusp_info.uts.sysname,system_utsname.sysname))
return "system type";
if (strcmp(swsusp_info.uts.release,system_utsname.release))
return "kernel release";
if (strcmp(swsusp_info.uts.version,system_utsname.version))
return "version";
if (strcmp(swsusp_info.uts.machine,system_utsname.machine))
return "machine";
if(swsusp_info.cpus != num_online_cpus())
return "number of cpus";
return NULL;
}
PRINTK( "%sReading pagedir, ", name_resume );
/* We get pages in reverse order of saving! */
for (i=nr_pgdir_pages-1; i>=0; i--) {
BUG_ON (!next.val);
cur = (union diskpage *)((char *) pagedir_nosave)+i;
if (bdev_read_page(bdev, next.val, cur)) return -EIO;
PREPARENEXT;
}
BUG_ON (next.val);
static int __init check_header(void)
{
const char * reason = NULL;
int error;
if (relocate_pagedir())
return -ENOMEM;
if (check_pagedir())
return -ENOMEM;
if ((error = bio_read_page(swp_offset(swsusp_header.swsusp_info), &swsusp_info)))
return error;
printk( "Reading image data (%d pages): ", nr_copy_pages );
for(i=0; i < nr_copy_pages; i++) {
swp_entry_t swap_address = (pagedir_nosave+i)->swap_address;
if (!(i%100))
printk( "." );
/* You do not need to check for overlaps...
... check_pagedir already did this work */
if (bdev_read_page(bdev, swp_offset(swap_address) * PAGE_SIZE, (char *)((pagedir_nosave+i)->address)))
return -EIO;
/* Is this same machine? */
if ((reason = sanity_check())) {
printk(KERN_ERR "swsusp: Resume mismatch: %s\n",reason);
return -EPERM;
}
printk( "|\n" );
return 0;
nr_copy_pages = swsusp_info.image_pages;
return error;
}
static int __init read_suspend_image(const char * specialfile, int noresume)
static int __init check_sig(void)
{
union diskpage *cur;
unsigned long scratch_page = 0;
int error;
char b[BDEVNAME_SIZE];
resume_device = name_to_dev_t(specialfile);
scratch_page = get_zeroed_page(GFP_ATOMIC);
cur = (void *) scratch_page;
if (cur) {
struct block_device *bdev;
printk("Resuming from device %s\n",
__bdevname(resume_device, b));
bdev = open_by_devnum(resume_device, FMODE_READ);
if (IS_ERR(bdev)) {
error = PTR_ERR(bdev);
} else {
set_blocksize(bdev, PAGE_SIZE);
error = __read_suspend_image(bdev, cur, noresume);
blkdev_put(bdev);
}
} else error = -ENOMEM;
if (scratch_page)
free_page(scratch_page);
switch (error) {
case 0:
PRINTK("Reading resume file was successful\n");
break;
case -EINVAL:
break;
case -EIO:
printk( "%sI/O error\n", name_resume);
break;
case -ENOENT:
printk( "%s%s: No such file or directory\n", name_resume, specialfile);
break;
case -ENOMEM:
printk( "%sNot enough memory\n", name_resume);
break;
default:
printk( "%sError %d resuming\n", name_resume, error );
memset(&swsusp_header,0,sizeof(swsusp_header));
if ((error = bio_read_page(0,&swsusp_header)))
return error;
if (!memcmp(SWSUSP_SIG,swsusp_header.sig,10)) {
memcpy(swsusp_header.sig,swsusp_header.orig_sig,10);
/*
* Reset swap signature now.
*/
error = bio_write_page(0,&swsusp_header);
} else {
pr_debug(KERN_ERR "swsusp: Invalid partition type.\n");
return -EINVAL;
}
MDELAY(1000);
if (!error)
pr_debug("swsusp: Signature found, resuming\n");
return error;
}
int __init verify(void)
{
int error;
if (!(error = check_sig()))
error = check_header();
return error;
}
/**
* software_resume - Resume from a saved image.
*
* Called as a late_initcall (so all devices are discovered and
* initialized), we call swsusp to see if we have a saved image or not.
* If so, we quiesce devices, then restore the saved image. We will
* return above (in pm_suspend_disk() ) if everything goes well.
* Otherwise, we fail gracefully and return to the normally
* scheduled program.
* swsusp_read_data - Read image pages from swap.
*
* You do not need to check for overlaps, check_pagedir()
* already did that.
*/
static int __init software_resume(void)
{
if (num_online_cpus() > 1) {
printk(KERN_WARNING "Software Suspend has malfunctioning SMP support. Disabled :(\n");
return -EINVAL;
}
/* We enable the possibility of machine suspend */
software_suspend_enabled = 1;
if (!resume_status)
return 0;
printk( "%s", name_resume );
if (resume_status == NORESUME) {
if(resume_file[0])
read_suspend_image(resume_file, 1);
printk( "disabled\n" );
return 0;
}
MDELAY(1000);
static int __init data_read(void)
{
struct pbe * p;
int error;
int i;
if (pm_prepare_console())
printk("swsusp: Can't allocate a console... proceeding\n");
if ((error = swsusp_pagedir_relocate()))
return error;
if (!resume_file[0] && resume_status == RESUME_SPECIFIED) {
printk( "suspension device unspecified\n" );
return -EINVAL;
printk( "Reading image data (%d pages): ", nr_copy_pages );
for(i = 0, p = pagedir_nosave; i < nr_copy_pages && !error; i++, p++) {
if (!(i%100))
printk( "." );
error = bio_read_page(swp_offset(p->swap_address),
(void *)p->address);
}
printk(" %d done.\n",i);
return error;
printk( "resuming from %s\n", resume_file);
if (read_suspend_image(resume_file, 0))
goto read_failure;
/* FIXME: Should we stop processes here, just to be safer? */
disable_nonboot_cpus();
device_suspend(3);
do_magic(1);
panic("This never returns");
read_failure:
pm_restore_console();
return 0;
}
late_initcall(software_resume);
extern dev_t __init name_to_dev_t(const char *line);
static int __init resume_setup(char *str)
static int __init read_pagedir(void)
{
if (resume_status == NORESUME)
return 1;
unsigned long addr;
int i, n = swsusp_info.pagedir_pages;
int error = 0;
strncpy( resume_file, str, 255 );
resume_status = RESUME_SPECIFIED;
pagedir_order = get_bitmask_order(n);
return 1;
addr =__get_free_pages(GFP_ATOMIC, pagedir_order);
if (!addr)
return -ENOMEM;
pagedir_nosave = (struct pbe *)addr;
pr_debug("pmdisk: Reading pagedir (%d Pages)\n",n);
for (i = 0; i < n && !error; i++, addr += PAGE_SIZE) {
unsigned long offset = swp_offset(swsusp_info.pagedir[i]);
if (offset)
error = bio_read_page(offset, (void *)addr);
else
error = -EFAULT;
}
if (error)
free_pages((unsigned long)pagedir_nosave,pagedir_order);
return error;
}
static int __init noresume_setup(char *str)
static int __init read_suspend_image(void)
{
resume_status = NORESUME;
return 1;
int error = 0;
if ((error = verify()))
return error;
if ((error = read_pagedir()))
return error;
if ((error = data_read())) {
free_pages((unsigned long)pagedir_nosave,pagedir_order);
}
return error;
}
__setup("noresume", noresume_setup);
__setup("resume=", resume_setup);
/**
* pmdisk_read - Read saved image from swap.
*/
int __init swsusp_read(void)
{
int error;
if (!strlen(resume_file))
return -ENOENT;
EXPORT_SYMBOL(software_suspend);
EXPORT_SYMBOL(software_suspend_enabled);
resume_device = name_to_dev_t(resume_file);
pr_debug("swsusp: Resume From Partition: %s\n", resume_file);
resume_bdev = open_by_devnum(resume_device, FMODE_READ);
if (!IS_ERR(resume_bdev)) {
set_blocksize(resume_bdev, PAGE_SIZE);
error = read_suspend_image();
blkdev_put(resume_bdev);
} else
error = PTR_ERR(resume_bdev);
if (!error)
pr_debug("Reading resume file was successful\n");
else
pr_debug("pmdisk: Error %d resuming\n", error);
return error;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment