Commit ce640f19 authored by Linus Torvalds's avatar Linus Torvalds

Merge bk://kernel.bkbits.net//home/mochel/linux-2.6-power

into ppc970.osdl.org:/home/torvalds/v2.6/linux
parents 6c4cd043 d5df4e65
...@@ -20,26 +20,6 @@ From kernel/suspend.c: ...@@ -20,26 +20,6 @@ From kernel/suspend.c:
You need to append resume=/dev/your_swap_partition to kernel command You need to append resume=/dev/your_swap_partition to kernel command
line. Then you suspend by echo 4 > /proc/acpi/sleep. line. Then you suspend by echo 4 > /proc/acpi/sleep.
Pavel's unreliable guide to swsusp mess
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
There are currently two versions of swap suspend in the kernel, the old
"Pavel's" version in kernel/power/swsusp.c and the new "Patrick's"
version in kernel/power/pmdisk.c. They provide the same functionality;
the old version looks ugly but was tested, while the new version looks
nicer but did not receive so much testing. echo 4 > /proc/acpi/sleep
calls the old version, echo disk > /sys/power/state calls the new one.
[In the future, when the new version is stable enough, two things can
happen:
* the new version is moved into swsusp.c, and swsusp is renamed to swap
suspend (Pavel prefers this)
* pmdisk is kept as is and swsusp.c is removed from the kernel]
Article about goals and implementation of Software Suspend for Linux Article about goals and implementation of Software Suspend for Linux
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Author: G‚ábor Kuti Author: G‚ábor Kuti
...@@ -75,10 +55,6 @@ hardware! ...@@ -75,10 +55,6 @@ hardware!
About the code About the code
Things to implement Things to implement
- SMP support. I've done an SMP support but since I don't have access to a kind
of this one I cannot test it. Please SMP people test it. .. Tested it,
doesn't work. Had no time to figure out why. There is some mess with
interrupts AFAIK..
- We should only make a copy of data related to kernel segment, since any - We should only make a copy of data related to kernel segment, since any
process data won't be changed. process data won't be changed.
- Should make more sanity checks. Or are these enough? - Should make more sanity checks. Or are these enough?
...@@ -90,11 +66,6 @@ Not so important ideas for implementing ...@@ -90,11 +66,6 @@ Not so important ideas for implementing
- We should not free pages at the beginning so aggressively, most of them - We should not free pages at the beginning so aggressively, most of them
go there anyway.. go there anyway..
Drivers that need support
- pc_keyb -- perhaps we can wait for vojtech's input patches
- do IDE cdroms need some kind of support?
- IDE CD-RW -- how to deal with that?
Sleep states summary (thanx, Ducrot) Sleep states summary (thanx, Ducrot)
==================================== ====================================
...@@ -109,7 +80,8 @@ and perhaps ...@@ -109,7 +80,8 @@ and perhaps
echo 4b > /proc/acpi/sleep # for suspend to disk via s4bios echo 4b > /proc/acpi/sleep # for suspend to disk via s4bios
FAQ: Frequently Asked Questions
==========================
Q: well, suspending a server is IMHO a really stupid thing, Q: well, suspending a server is IMHO a really stupid thing,
but... (Diego Zuccato): but... (Diego Zuccato):
......
obj-$(CONFIG_PM) += cpu.o obj-$(CONFIG_PM) += cpu.o
obj-$(CONFIG_PM_DISK) += pmdisk.o
obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o
/* Originally gcc generated, modified by hand */
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page.h>
.text
ENTRY(pmdisk_arch_suspend)
cmpl $0,4(%esp)
jne .L1450
movl %esp, saved_context_esp
movl %ebx, saved_context_ebx
movl %ebp, saved_context_ebp
movl %esi, saved_context_esi
movl %edi, saved_context_edi
pushfl ; popl saved_context_eflags
call pmdisk_suspend
jmp .L1449
.p2align 4,,7
.L1450:
movl $swsusp_pg_dir-__PAGE_OFFSET,%ecx
movl %ecx,%cr3
movl pm_pagedir_nosave,%ebx
xorl %eax, %eax
xorl %edx, %edx
.p2align 4,,7
.L1455:
movl 4(%ebx,%edx),%edi
movl (%ebx,%edx),%esi
movl $1024, %ecx
rep
movsl
movl %cr3, %ecx;
movl %ecx, %cr3; # flush TLB
incl %eax
addl $16, %edx
cmpl pmdisk_pages,%eax
jb .L1455
.p2align 4,,7
.L1453:
movl saved_context_esp, %esp
movl saved_context_ebp, %ebp
movl saved_context_ebx, %ebx
movl saved_context_esi, %esi
movl saved_context_edi, %edi
pushl saved_context_eflags ; popfl
call pmdisk_resume
.L1449:
ret
...@@ -15,83 +15,47 @@ ...@@ -15,83 +15,47 @@
.text .text
ENTRY(do_magic) ENTRY(swsusp_arch_suspend)
pushl %ebx
cmpl $0,8(%esp)
jne resume
call do_magic_suspend_1
call save_processor_state
movl %esp, saved_context_esp movl %esp, saved_context_esp
movl %eax, saved_context_eax
movl %ebx, saved_context_ebx movl %ebx, saved_context_ebx
movl %ecx, saved_context_ecx
movl %edx, saved_context_edx
movl %ebp, saved_context_ebp movl %ebp, saved_context_ebp
movl %esi, saved_context_esi movl %esi, saved_context_esi
movl %edi, saved_context_edi movl %edi, saved_context_edi
pushfl ; popl saved_context_eflags pushfl ; popl saved_context_eflags
call do_magic_suspend_2 call swsusp_save
popl %ebx
ret ret
resume: ENTRY(swsusp_arch_resume)
movl $swsusp_pg_dir-__PAGE_OFFSET,%ecx movl $swsusp_pg_dir-__PAGE_OFFSET,%ecx
movl %ecx,%cr3 movl %ecx,%cr3
call do_magic_resume_1 movl pagedir_nosave, %ebx
movl $0,loop xorl %eax, %eax
cmpl $0,nr_copy_pages xorl %edx, %edx
je copy_done
copy_loop:
movl $0,loop2
.p2align 4,,7 .p2align 4,,7
copy_one_page:
movl pagedir_nosave,%ecx
movl loop,%eax
movl loop2,%edx
sall $4,%eax
movl 4(%ecx,%eax),%ebx
movl (%ecx,%eax),%eax
movb (%edx,%eax),%al
movb %al,(%edx,%ebx)
movl loop2,%eax copy_loop:
leal 1(%eax),%edx movl 4(%ebx,%edx),%edi
movl %edx,loop2 movl (%ebx,%edx),%esi
movl %edx,%eax
cmpl $4095,%eax movl $1024, %ecx
jbe copy_one_page rep
movl loop,%eax movsl
leal 1(%eax),%edx
movl %edx,loop incl %eax
movl %edx,%eax addl $16, %edx
cmpl nr_copy_pages,%eax cmpl nr_copy_pages,%eax
jb copy_loop jb copy_loop
.p2align 4,,7
copy_done:
movl $__USER_DS,%eax
movw %ax, %ds
movw %ax, %es
movl saved_context_esp, %esp movl saved_context_esp, %esp
movl saved_context_ebp, %ebp movl saved_context_ebp, %ebp
movl saved_context_eax, %eax
movl saved_context_ebx, %ebx movl saved_context_ebx, %ebx
movl saved_context_ecx, %ecx
movl saved_context_edx, %edx
movl saved_context_esi, %esi movl saved_context_esi, %esi
movl saved_context_edi, %edi movl saved_context_edi, %edi
call restore_processor_state
pushl saved_context_eflags ; popfl pushl saved_context_eflags ; popfl
call do_magic_resume_2 call swsusp_restore
popl %ebx
ret ret
.section .data.nosave
loop:
.quad 0
loop2:
.quad 0
.previous
/* originally gcc generated, but now changed. don't overwrite. */ /* Originally gcc generated, modified by hand
*
* This may not use any stack, nor any variable that is not "NoSave":
*
* Its rewriting one kernel image with another. What is stack in "old"
* image could very well be data page in "new" image, and overwriting
* your own stack under you is bad idea.
*/
.text .text
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/segment.h> #include <asm/segment.h>
#include <asm/page.h> #include <asm/page.h>
/* Input: ENTRY(swsusp_arch_suspend)
* rdi resume flag
*/
ENTRY(do_magic)
.LFB5:
subq $8, %rsp
.LCFI2:
testl %edi, %edi
jne .L90
call do_magic_suspend_1
call save_processor_state
movq %rsp, saved_context_esp(%rip) movq %rsp, saved_context_esp(%rip)
movq %rax, saved_context_eax(%rip) movq %rax, saved_context_eax(%rip)
...@@ -36,9 +32,10 @@ ENTRY(do_magic) ...@@ -36,9 +32,10 @@ ENTRY(do_magic)
movq %r15, saved_context_r15(%rip) movq %r15, saved_context_r15(%rip)
pushfq ; popq saved_context_eflags(%rip) pushfq ; popq saved_context_eflags(%rip)
addq $8, %rsp call swsusp_save
jmp do_magic_suspend_2 ret
.L90:
ENTRY(swsusp_arch_resume)
/* set up cr3 */ /* set up cr3 */
leaq init_level4_pgt(%rip),%rax leaq init_level4_pgt(%rip),%rax
subq $__START_KERNEL_map,%rax subq $__START_KERNEL_map,%rax
...@@ -53,7 +50,6 @@ ENTRY(do_magic) ...@@ -53,7 +50,6 @@ ENTRY(do_magic)
movq %rcx, %cr3; movq %rcx, %cr3;
movq %rax, %cr4; # turn PGE back on movq %rax, %cr4; # turn PGE back on
call do_magic_resume_1
movl nr_copy_pages(%rip), %eax movl nr_copy_pages(%rip), %eax
xorl %ecx, %ecx xorl %ecx, %ecx
movq $0, loop(%rip) movq $0, loop(%rip)
...@@ -113,9 +109,8 @@ ENTRY(do_magic) ...@@ -113,9 +109,8 @@ ENTRY(do_magic)
movq saved_context_r14(%rip), %r14 movq saved_context_r14(%rip), %r14
movq saved_context_r15(%rip), %r15 movq saved_context_r15(%rip), %r15
pushq saved_context_eflags(%rip) ; popfq pushq saved_context_eflags(%rip) ; popfq
call restore_processor_state call swsusp_restore
addq $8, %rsp ret
jmp do_magic_resume_2
.section .data.nosave .section .data.nosave
loop: loop:
......
...@@ -217,7 +217,8 @@ static int __init acpi_sleep_init(void) ...@@ -217,7 +217,8 @@ static int __init acpi_sleep_init(void)
sleep_states[i] = 1; sleep_states[i] = 1;
printk(" S4bios"); printk(" S4bios");
acpi_pm_ops.pm_disk_mode = PM_DISK_FIRMWARE; acpi_pm_ops.pm_disk_mode = PM_DISK_FIRMWARE;
} else if (sleep_states[i]) }
if (sleep_states[i])
acpi_pm_ops.pm_disk_mode = PM_DISK_PLATFORM; acpi_pm_ops.pm_disk_mode = PM_DISK_PLATFORM;
} }
} }
......
...@@ -194,11 +194,12 @@ extern void (*pm_idle)(void); ...@@ -194,11 +194,12 @@ extern void (*pm_idle)(void);
extern void (*pm_power_off)(void); extern void (*pm_power_off)(void);
enum { enum {
PM_SUSPEND_ON, PM_SUSPEND_ON = 0,
PM_SUSPEND_STANDBY, PM_SUSPEND_STANDBY = 1,
PM_SUSPEND_MEM, /* NOTE: PM_SUSPEND_MEM == PCI_D3hot */
PM_SUSPEND_DISK, PM_SUSPEND_MEM = 3,
PM_SUSPEND_MAX, PM_SUSPEND_DISK = 4,
PM_SUSPEND_MAX = 5,
}; };
enum { enum {
......
...@@ -23,16 +23,6 @@ typedef struct pbe { ...@@ -23,16 +23,6 @@ typedef struct pbe {
#define SWAP_FILENAME_MAXLENGTH 32 #define SWAP_FILENAME_MAXLENGTH 32
struct suspend_header {
u32 version_code;
unsigned long num_physpages;
char machine[8];
char version[20];
int num_cpus;
int page_size;
suspend_pagedir_t *suspend_pagedir;
unsigned int num_pbes;
};
#define SUSPEND_PD_PAGES(x) (((x)*sizeof(struct pbe))/PAGE_SIZE+1) #define SUSPEND_PD_PAGES(x) (((x)*sizeof(struct pbe))/PAGE_SIZE+1)
...@@ -45,16 +35,12 @@ extern void drain_local_pages(void); ...@@ -45,16 +35,12 @@ extern void drain_local_pages(void);
/* kernel/power/swsusp.c */ /* kernel/power/swsusp.c */
extern int software_suspend(void); extern int software_suspend(void);
extern unsigned int nr_copy_pages __nosavedata;
extern suspend_pagedir_t *pagedir_nosave __nosavedata;
#else /* CONFIG_SOFTWARE_SUSPEND */ #else /* CONFIG_SOFTWARE_SUSPEND */
static inline int software_suspend(void) static inline int software_suspend(void)
{ {
printk("Warning: fake suspend called\n"); printk("Warning: fake suspend called\n");
return -EPERM; return -EPERM;
} }
#define software_resume() do { } while(0)
#endif /* CONFIG_SOFTWARE_SUSPEND */ #endif /* CONFIG_SOFTWARE_SUSPEND */
...@@ -78,12 +64,6 @@ static inline void disable_nonboot_cpus(void) {} ...@@ -78,12 +64,6 @@ static inline void disable_nonboot_cpus(void) {}
static inline void enable_nonboot_cpus(void) {} static inline void enable_nonboot_cpus(void) {}
#endif #endif
asmlinkage void do_magic(int is_resume);
asmlinkage void do_magic_resume_1(void);
asmlinkage void do_magic_resume_2(void);
asmlinkage void do_magic_suspend_1(void);
asmlinkage void do_magic_suspend_2(void);
void save_processor_state(void); void save_processor_state(void);
void restore_processor_state(void); void restore_processor_state(void);
struct saved_context; struct saved_context;
......
...@@ -18,6 +18,13 @@ config PM ...@@ -18,6 +18,13 @@ config PM
will issue the hlt instruction if nothing is to be done, thereby will issue the hlt instruction if nothing is to be done, thereby
sending the processor to sleep and saving power. sending the processor to sleep and saving power.
config PM_DEBUG
bool "Power Management Debug Support"
---help---
This option enables verbose debugging support in the Power Management
code. This is helpful when debugging and reporting various PM bugs,
like suspend support.
config SOFTWARE_SUSPEND config SOFTWARE_SUSPEND
bool "Software Suspend (EXPERIMENTAL)" bool "Software Suspend (EXPERIMENTAL)"
depends on EXPERIMENTAL && PM && SWAP depends on EXPERIMENTAL && PM && SWAP
...@@ -42,33 +49,12 @@ config SOFTWARE_SUSPEND ...@@ -42,33 +49,12 @@ config SOFTWARE_SUSPEND
For more information take a look at Documentation/power/swsusp.txt. For more information take a look at Documentation/power/swsusp.txt.
config PM_DISK config PM_STD_PARTITION
bool "Suspend-to-Disk Support"
depends on PM && SWAP && X86 && !X86_64
---help---
Suspend-to-disk is a power management state in which the contents
of memory are stored on disk and the entire system is shut down or
put into a low-power state (e.g. ACPI S4). When the computer is
turned back on, the stored image is loaded from disk and execution
resumes from where it left off before suspending.
This config option enables the core infrastructure necessary to
perform the suspend and resume transition.
Currently, this suspend-to-disk implementation is based on a forked
version of the swsusp code base. As such, it's still experimental,
and still relies on CONFIG_SWAP.
More information can be found in Documentation/power/.
If unsure, Say N.
config PM_DISK_PARTITION
string "Default resume partition" string "Default resume partition"
depends on PM_DISK depends on SOFTWARE_SUSPEND
default "" default ""
---help--- ---help---
The default resume partition is the partition that the pmdisk suspend- The default resume partition is the partition that the suspend-
to-disk implementation will look for a suspended disk image. to-disk implementation will look for a suspended disk image.
The partition specified here will be different for almost every user. The partition specified here will be different for almost every user.
...@@ -77,16 +63,10 @@ config PM_DISK_PARTITION ...@@ -77,16 +63,10 @@ config PM_DISK_PARTITION
The partition specified can be overridden by specifying: The partition specified can be overridden by specifying:
pmdisk=/dev/<other device> resume=/dev/<other device>
which will set the resume partition to the device specified. which will set the resume partition to the device specified.
One may also do:
pmdisk=off
to inform the kernel not to perform a resume transition.
Note there is currently not a way to specify which device to save the Note there is currently not a way to specify which device to save the
suspended image to. It will simply pick the first available swap suspended image to. It will simply pick the first available swap
device. device.
......
ifeq ($(CONFIG_PM_DEBUG),y)
EXTRA_CFLAGS += -DDEBUG
endif
swsusp-smp-$(CONFIG_SMP) += smp.o swsusp-smp-$(CONFIG_SMP) += smp.o
obj-y := main.o process.o console.o pm.o obj-y := main.o process.o console.o pm.o
obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o $(swsusp-smp-y) obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o $(swsusp-smp-y) disk.o
obj-$(CONFIG_PM_DISK) += disk.o pmdisk.o
obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
...@@ -8,13 +8,11 @@ ...@@ -8,13 +8,11 @@
* *
*/ */
#define DEBUG
#include <linux/suspend.h> #include <linux/suspend.h>
#include <linux/syscalls.h> #include <linux/syscalls.h>
#include <linux/reboot.h> #include <linux/reboot.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/device.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/fs.h> #include <linux/fs.h>
#include "power.h" #include "power.h"
...@@ -23,12 +21,15 @@ ...@@ -23,12 +21,15 @@
extern u32 pm_disk_mode; extern u32 pm_disk_mode;
extern struct pm_ops * pm_ops; extern struct pm_ops * pm_ops;
extern int pmdisk_save(void); extern int swsusp_suspend(void);
extern int pmdisk_write(void); extern int swsusp_write(void);
extern int pmdisk_read(void); extern int swsusp_read(void);
extern int pmdisk_restore(void); extern int swsusp_resume(void);
extern int pmdisk_free(void); extern int swsusp_free(void);
static int noresume = 0;
char resume_file[256] = CONFIG_PM_STD_PARTITION;
/** /**
* power_down - Shut machine down for hibernate. * power_down - Shut machine down for hibernate.
...@@ -46,22 +47,26 @@ static int power_down(u32 mode) ...@@ -46,22 +47,26 @@ static int power_down(u32 mode)
int error = 0; int error = 0;
local_irq_save(flags); local_irq_save(flags);
device_power_down(PM_SUSPEND_DISK);
switch(mode) { switch(mode) {
case PM_DISK_PLATFORM: case PM_DISK_PLATFORM:
device_power_down(PM_SUSPEND_DISK);
error = pm_ops->enter(PM_SUSPEND_DISK); error = pm_ops->enter(PM_SUSPEND_DISK);
break; break;
case PM_DISK_SHUTDOWN: case PM_DISK_SHUTDOWN:
printk("Powering off system\n"); printk("Powering off system\n");
device_shutdown();
machine_power_off(); machine_power_off();
break; break;
case PM_DISK_REBOOT: case PM_DISK_REBOOT:
device_shutdown();
machine_restart(NULL); machine_restart(NULL);
break; break;
} }
machine_halt(); machine_halt();
device_power_up(); /* Valid image is on the disk, if we continue we risk serious data corruption
local_irq_restore(flags); after resume. */
printk(KERN_CRIT "Please power me down manually\n");
while(1);
return 0; return 0;
} }
...@@ -99,6 +104,7 @@ static void finish(void) ...@@ -99,6 +104,7 @@ static void finish(void)
{ {
device_resume(); device_resume();
platform_finish(); platform_finish();
enable_nonboot_cpus();
thaw_processes(); thaw_processes();
pm_restore_console(); pm_restore_console();
} }
...@@ -126,6 +132,7 @@ static int prepare(void) ...@@ -126,6 +132,7 @@ static int prepare(void)
/* Free memory before shutting down devices. */ /* Free memory before shutting down devices. */
free_some_memory(); free_some_memory();
disable_nonboot_cpus();
if ((error = device_suspend(PM_SUSPEND_DISK))) if ((error = device_suspend(PM_SUSPEND_DISK)))
goto Finish; goto Finish;
...@@ -133,6 +140,7 @@ static int prepare(void) ...@@ -133,6 +140,7 @@ static int prepare(void)
Finish: Finish:
platform_finish(); platform_finish();
Thaw: Thaw:
enable_nonboot_cpus();
thaw_processes(); thaw_processes();
pm_restore_console(); pm_restore_console();
return error; return error;
...@@ -161,7 +169,7 @@ int pm_suspend_disk(void) ...@@ -161,7 +169,7 @@ int pm_suspend_disk(void)
pr_debug("PM: snapshotting memory.\n"); pr_debug("PM: snapshotting memory.\n");
in_suspend = 1; in_suspend = 1;
if ((error = pmdisk_save())) if ((error = swsusp_suspend()))
goto Done; goto Done;
if (in_suspend) { if (in_suspend) {
...@@ -173,14 +181,14 @@ int pm_suspend_disk(void) ...@@ -173,14 +181,14 @@ int pm_suspend_disk(void)
mb(); mb();
barrier(); barrier();
error = pmdisk_write(); error = swsusp_write();
if (!error) { if (!error) {
error = power_down(pm_disk_mode); error = power_down(pm_disk_mode);
pr_debug("PM: Power down failed.\n"); pr_debug("PM: Power down failed.\n");
} }
} else } else
pr_debug("PM: Image restored successfully.\n"); pr_debug("PM: Image restored successfully.\n");
pmdisk_free(); swsusp_free();
Done: Done:
finish(); finish();
return error; return error;
...@@ -188,7 +196,7 @@ int pm_suspend_disk(void) ...@@ -188,7 +196,7 @@ int pm_suspend_disk(void)
/** /**
* pm_resume - Resume from a saved image. * software_resume - Resume from a saved image.
* *
* Called as a late_initcall (so all devices are discovered and * Called as a late_initcall (so all devices are discovered and
* initialized), we call pmdisk to see if we have a saved image or not. * initialized), we call pmdisk to see if we have a saved image or not.
...@@ -199,13 +207,21 @@ int pm_suspend_disk(void) ...@@ -199,13 +207,21 @@ int pm_suspend_disk(void)
* *
*/ */
static int pm_resume(void) static int software_resume(void)
{ {
int error; int error;
if (noresume) {
/**
* FIXME: If noresume is specified, we need to find the partition
* and reset it back to normal swap space.
*/
return 0;
}
pr_debug("PM: Reading pmdisk image.\n"); pr_debug("PM: Reading pmdisk image.\n");
if ((error = pmdisk_read())) if ((error = swsusp_read()))
goto Done; goto Done;
pr_debug("PM: Preparing system for restore.\n"); pr_debug("PM: Preparing system for restore.\n");
...@@ -216,28 +232,18 @@ static int pm_resume(void) ...@@ -216,28 +232,18 @@ static int pm_resume(void)
barrier(); barrier();
mb(); mb();
/* FIXME: The following (comment and mdelay()) are from swsusp.
* Are they really necessary?
*
* We do not want some readahead with DMA to corrupt our memory, right?
* Do it with disabled interrupts for best effect. That way, if some
* driver scheduled DMA, we have good chance for DMA to finish ;-).
*/
pr_debug("PM: Waiting for DMAs to settle down.\n");
mdelay(1000);
pr_debug("PM: Restoring saved image.\n"); pr_debug("PM: Restoring saved image.\n");
pmdisk_restore(); swsusp_resume();
pr_debug("PM: Restore failed, recovering.n"); pr_debug("PM: Restore failed, recovering.n");
finish(); finish();
Free: Free:
pmdisk_free(); swsusp_free();
Done: Done:
pr_debug("PM: Resume from disk failed.\n"); pr_debug("PM: Resume from disk failed.\n");
return 0; return 0;
} }
late_initcall(pm_resume); late_initcall(software_resume);
static char * pm_disk_modes[] = { static char * pm_disk_modes[] = {
...@@ -336,3 +342,22 @@ static int __init pm_disk_init(void) ...@@ -336,3 +342,22 @@ static int __init pm_disk_init(void)
} }
core_initcall(pm_disk_init); core_initcall(pm_disk_init);
static int __init resume_setup(char *str)
{
if (noresume)
return 1;
strncpy( resume_file, str, 255 );
return 1;
}
static int __init noresume_setup(char *str)
{
noresume = 1;
return 1;
}
__setup("noresume", noresume_setup);
__setup("resume=", resume_setup);
...@@ -8,8 +8,6 @@ ...@@ -8,8 +8,6 @@
* *
*/ */
#define DEBUG
#include <linux/suspend.h> #include <linux/suspend.h>
#include <linux/kobject.h> #include <linux/kobject.h>
#include <linux/string.h> #include <linux/string.h>
...@@ -35,8 +33,6 @@ void pm_set_ops(struct pm_ops * ops) ...@@ -35,8 +33,6 @@ void pm_set_ops(struct pm_ops * ops)
{ {
down(&pm_sem); down(&pm_sem);
pm_ops = ops; pm_ops = ops;
if (ops->pm_disk_mode && ops->pm_disk_mode < PM_DISK_MAX)
pm_disk_mode = ops->pm_disk_mode;
up(&pm_sem); up(&pm_sem);
} }
...@@ -169,6 +165,15 @@ static int enter_state(u32 state) ...@@ -169,6 +165,15 @@ static int enter_state(u32 state)
return error; return error;
} }
/*
* This is main interface to the outside world. It needs to be
* called from process context.
*/
int software_suspend(void)
{
return enter_state(PM_SUSPEND_DISK);
}
/** /**
* pm_suspend - Externally visible function for suspending system. * pm_suspend - Externally visible function for suspending system.
...@@ -225,8 +230,8 @@ static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n ...@@ -225,8 +230,8 @@ static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n
p = memchr(buf, '\n', n); p = memchr(buf, '\n', n);
len = p ? p - buf : n; len = p ? p - buf : n;
for (s = &pm_states[state]; *s; s++, state++) { for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) {
if (!strncmp(buf, *s, len)) if (*s && !strncmp(buf, *s, len))
break; break;
} }
if (*s) if (*s)
......
/*
* kernel/power/pmdisk.c - Suspend-to-disk implmentation
*
* This STD implementation is initially derived from swsusp (suspend-to-swap).
* The original copyright on that was:
*
* Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
* Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
*
* The additional parts are:
*
* Copyright (C) 2003 Patrick Mochel
* Copyright (C) 2003 Open Source Development Lab
*
* This file is released under the GPLv2.
*
* For more information, please see the text files in Documentation/power/
*
*/
#undef DEBUG
#include <linux/mm.h>
#include <linux/bio.h>
#include <linux/suspend.h>
#include <linux/version.h>
#include <linux/reboot.h>
#include <linux/device.h>
#include <linux/swapops.h>
#include <linux/bootmem.h>
#include <linux/utsname.h>
#include <asm/mmu_context.h>
#include "power.h"
extern asmlinkage int pmdisk_arch_suspend(int resume);
#define __ADDRESS(x) ((unsigned long) phys_to_virt(x))
#define ADDRESS(x) __ADDRESS((x) << PAGE_SHIFT)
#define ADDRESS2(x) __ADDRESS(__pa(x)) /* Needed for x86-64 where some pages are in memory twice */
/* References to section boundaries */
extern char __nosave_begin, __nosave_end;
extern int is_head_of_free_region(struct page *);
/* Variables to be preserved over suspend */
static int pagedir_order_check;
static int nr_copy_pages_check;
/* For resume= kernel option */
static char resume_file[256] = CONFIG_PM_DISK_PARTITION;
static dev_t resume_device;
/* Local variables that should not be affected by save */
unsigned int pmdisk_pages __nosavedata = 0;
/* Suspend pagedir is allocated before final copy, therefore it
must be freed after resume
Warning: this is evil. There are actually two pagedirs at time of
resume. One is "pagedir_save", which is empty frame allocated at
time of suspend, that must be freed. Second is "pagedir_nosave",
allocated at time of resume, that travels through memory not to
collide with anything.
*/
suspend_pagedir_t *pm_pagedir_nosave __nosavedata = NULL;
static suspend_pagedir_t *pagedir_save;
static int pagedir_order __nosavedata = 0;
struct pmdisk_info {
struct new_utsname uts;
u32 version_code;
unsigned long num_physpages;
int cpus;
unsigned long image_pages;
unsigned long pagedir_pages;
swp_entry_t pagedir[768];
} __attribute__((aligned(PAGE_SIZE))) pmdisk_info;
#define PMDISK_SIG "pmdisk-swap1"
struct pmdisk_header {
char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)];
swp_entry_t pmdisk_info;
char orig_sig[10];
char sig[10];
} __attribute__((packed, aligned(PAGE_SIZE))) pmdisk_header;
/*
* XXX: We try to keep some more pages free so that I/O operations succeed
* without paging. Might this be more?
*/
#define PAGES_FOR_IO 512
/*
* Saving part...
*/
/* We memorize in swapfile_used what swap devices are used for suspension */
#define SWAPFILE_UNUSED 0
#define SWAPFILE_SUSPEND 1 /* This is the suspending device */
#define SWAPFILE_IGNORED 2 /* Those are other swap devices ignored for suspension */
static unsigned short swapfile_used[MAX_SWAPFILES];
static unsigned short root_swap;
static int mark_swapfiles(swp_entry_t prev)
{
int error;
rw_swap_page_sync(READ,
swp_entry(root_swap, 0),
virt_to_page((unsigned long)&pmdisk_header));
if (!memcmp("SWAP-SPACE",pmdisk_header.sig,10) ||
!memcmp("SWAPSPACE2",pmdisk_header.sig,10)) {
memcpy(pmdisk_header.orig_sig,pmdisk_header.sig,10);
memcpy(pmdisk_header.sig,PMDISK_SIG,10);
pmdisk_header.pmdisk_info = prev;
error = rw_swap_page_sync(WRITE,
swp_entry(root_swap, 0),
virt_to_page((unsigned long)
&pmdisk_header));
} else {
pr_debug("pmdisk: Partition is not swap space.\n");
error = -ENODEV;
}
return error;
}
static int read_swapfiles(void) /* This is called before saving image */
{
int i, len;
len=strlen(resume_file);
root_swap = 0xFFFF;
swap_list_lock();
for(i=0; i<MAX_SWAPFILES; i++) {
if (swap_info[i].flags == 0) {
swapfile_used[i]=SWAPFILE_UNUSED;
} else {
if(!len) {
pr_debug("pmdisk: Default resume partition not set.\n");
if(root_swap == 0xFFFF) {
swapfile_used[i] = SWAPFILE_SUSPEND;
root_swap = i;
} else
swapfile_used[i] = SWAPFILE_IGNORED;
} else {
/* we ignore all swap devices that are not the resume_file */
if (1) {
// FIXME if(resume_device == swap_info[i].swap_device) {
swapfile_used[i] = SWAPFILE_SUSPEND;
root_swap = i;
} else
swapfile_used[i] = SWAPFILE_IGNORED;
}
}
}
swap_list_unlock();
return (root_swap != 0xffff) ? 0 : -ENODEV;
}
/* This is called after saving image so modification
will be lost after resume... and that's what we want. */
static void lock_swapdevices(void)
{
int i;
swap_list_lock();
for(i = 0; i< MAX_SWAPFILES; i++)
if(swapfile_used[i] == SWAPFILE_IGNORED) {
swap_info[i].flags ^= 0xFF; /* we make the device unusable. A new call to
lock_swapdevices can unlock the devices. */
}
swap_list_unlock();
}
/**
* write_swap_page - Write one page to a fresh swap location.
* @addr: Address we're writing.
* @loc: Place to store the entry we used.
*
* Allocate a new swap entry and 'sync' it. Note we discard -EIO
* errors. That is an artifact left over from swsusp. It did not
* check the return of rw_swap_page_sync() at all, since most pages
* written back to swap would return -EIO.
* This is a partial improvement, since we will at least return other
* errors, though we need to eventually fix the damn code.
*/
static int write_swap_page(unsigned long addr, swp_entry_t * loc)
{
swp_entry_t entry;
int error = 0;
entry = get_swap_page();
if (swp_offset(entry) &&
swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) {
error = rw_swap_page_sync(WRITE, entry,
virt_to_page(addr));
if (error == -EIO)
error = 0;
if (!error)
*loc = entry;
} else
error = -ENOSPC;
return error;
}
/**
* free_data - Free the swap entries used by the saved image.
*
* Walk the list of used swap entries and free each one.
*/
static void free_data(void)
{
swp_entry_t entry;
int i;
for (i = 0; i < pmdisk_pages; i++) {
entry = (pm_pagedir_nosave + i)->swap_address;
if (entry.val)
swap_free(entry);
else
break;
(pm_pagedir_nosave + i)->swap_address = (swp_entry_t){0};
}
}
/**
* write_data - Write saved image to swap.
*
* Walk the list of pages in the image and sync each one to swap.
*/
static int write_data(void)
{
int error = 0;
int i;
printk( "Writing data to swap (%d pages): ", pmdisk_pages );
for (i = 0; i < pmdisk_pages && !error; i++) {
if (!(i%100))
printk( "." );
error = write_swap_page((pm_pagedir_nosave+i)->address,
&((pm_pagedir_nosave+i)->swap_address));
}
printk(" %d Pages done.\n",i);
return error;
}
/**
* free_pagedir - Free pages used by the page directory.
*/
static void free_pagedir_entries(void)
{
int num = pmdisk_info.pagedir_pages;
int i;
for (i = 0; i < num; i++)
swap_free(pmdisk_info.pagedir[i]);
}
/**
* write_pagedir - Write the array of pages holding the page directory.
* @last: Last swap entry we write (needed for header).
*/
static int write_pagedir(void)
{
unsigned long addr = (unsigned long)pm_pagedir_nosave;
int error = 0;
int n = SUSPEND_PD_PAGES(pmdisk_pages);
int i;
pmdisk_info.pagedir_pages = n;
printk( "Writing pagedir (%d pages)\n", n);
for (i = 0; i < n && !error; i++, addr += PAGE_SIZE)
error = write_swap_page(addr,&pmdisk_info.pagedir[i]);
return error;
}
#ifdef DEBUG
static void dump_pmdisk_info(void)
{
printk(" pmdisk: Version: %u\n",pmdisk_info.version_code);
printk(" pmdisk: Num Pages: %ld\n",pmdisk_info.num_physpages);
printk(" pmdisk: UTS Sys: %s\n",pmdisk_info.uts.sysname);
printk(" pmdisk: UTS Node: %s\n",pmdisk_info.uts.nodename);
printk(" pmdisk: UTS Release: %s\n",pmdisk_info.uts.release);
printk(" pmdisk: UTS Version: %s\n",pmdisk_info.uts.version);
printk(" pmdisk: UTS Machine: %s\n",pmdisk_info.uts.machine);
printk(" pmdisk: UTS Domain: %s\n",pmdisk_info.uts.domainname);
printk(" pmdisk: CPUs: %d\n",pmdisk_info.cpus);
printk(" pmdisk: Image: %ld Pages\n",pmdisk_info.image_pages);
printk(" pmdisk: Pagedir: %ld Pages\n",pmdisk_info.pagedir_pages);
}
#else
static void dump_pmdisk_info(void)
{
}
#endif
static void init_header(void)
{
memset(&pmdisk_info,0,sizeof(pmdisk_info));
pmdisk_info.version_code = LINUX_VERSION_CODE;
pmdisk_info.num_physpages = num_physpages;
memcpy(&pmdisk_info.uts,&system_utsname,sizeof(system_utsname));
pmdisk_info.cpus = num_online_cpus();
pmdisk_info.image_pages = pmdisk_pages;
}
/**
* write_header - Fill and write the suspend header.
* @entry: Location of the last swap entry used.
*
* Allocate a page, fill header, write header.
*
* @entry is the location of the last pagedir entry written on
* entrance. On exit, it contains the location of the header.
*/
static int write_header(swp_entry_t * entry)
{
dump_pmdisk_info();
return write_swap_page((unsigned long)&pmdisk_info,entry);
}
/**
* write_suspend_image - Write entire image and metadata.
*
*/
static int write_suspend_image(void)
{
int error;
swp_entry_t prev = { 0 };
init_header();
if ((error = write_data()))
goto FreeData;
if ((error = write_pagedir()))
goto FreePagedir;
if ((error = write_header(&prev)))
goto FreePagedir;
error = mark_swapfiles(prev);
Done:
return error;
FreePagedir:
free_pagedir_entries();
FreeData:
free_data();
goto Done;
}
/**
* saveable - Determine whether a page should be cloned or not.
* @pfn: The page
*
* We save a page if it's Reserved, and not in the range of pages
* statically defined as 'unsaveable', or if it isn't reserved, and
* isn't part of a free chunk of pages.
* If it is part of a free chunk, we update @pfn to point to the last
* page of the chunk.
*/
static int saveable(unsigned long * pfn)
{
struct page * page = pfn_to_page(*pfn);
if (PageNosave(page))
return 0;
if (!PageReserved(page)) {
int chunk_size;
if ((chunk_size = is_head_of_free_region(page))) {
*pfn += chunk_size - 1;
return 0;
}
} else if (PageReserved(page)) {
/* Just copy whole code segment.
* Hopefully it is not that big.
*/
if ((ADDRESS(*pfn) >= (unsigned long) ADDRESS2(&__nosave_begin)) &&
(ADDRESS(*pfn) < (unsigned long) ADDRESS2(&__nosave_end))) {
pr_debug("[nosave %lx]\n", ADDRESS(*pfn));
return 0;
}
/* Hmm, perhaps copying all reserved pages is not
* too healthy as they may contain
* critical bios data?
*/
}
return 1;
}
/**
* count_pages - Determine size of page directory.
*
* Iterate over all the pages in the system and tally the number
* we need to clone.
*/
static void count_pages(void)
{
unsigned long pfn;
int n = 0;
for (pfn = 0; pfn < max_pfn; pfn++) {
if (saveable(&pfn))
n++;
}
pmdisk_pages = n;
}
/**
* copy_pages - Atomically snapshot memory.
*
* Iterate over all the pages in the system and copy each one
* into its corresponding location in the pagedir.
* We rely on the fact that the number of pages that we're snap-
* shotting hasn't changed since we counted them.
*/
static void copy_pages(void)
{
struct pbe * p = pagedir_save;
unsigned long pfn;
int n = 0;
for (pfn = 0; pfn < max_pfn; pfn++) {
if (saveable(&pfn)) {
n++;
p->orig_address = ADDRESS(pfn);
copy_page((void *) p->address,
(void *) p->orig_address);
p++;
}
}
BUG_ON(n != pmdisk_pages);
}
/**
* free_image_pages - Free each page allocated for snapshot.
*/
static void free_image_pages(void)
{
struct pbe * p;
int i;
for (i = 0, p = pagedir_save; i < pmdisk_pages; i++, p++) {
ClearPageNosave(virt_to_page(p->address));
free_page(p->address);
}
}
/**
* free_pagedir - Free the page directory.
*/
static void free_pagedir(void)
{
free_image_pages();
free_pages((unsigned long)pagedir_save, pagedir_order);
}
static void calc_order(void)
{
int diff;
int order;
order = get_bitmask_order(SUSPEND_PD_PAGES(pmdisk_pages));
pmdisk_pages += 1 << order;
do {
diff = get_bitmask_order(SUSPEND_PD_PAGES(pmdisk_pages)) - order;
if (diff) {
order += diff;
pmdisk_pages += 1 << diff;
}
} while(diff);
pagedir_order = order;
}
/**
* alloc_pagedir - Allocate the page directory.
*
* First, determine exactly how many contiguous pages we need,
* allocate them, then mark each 'unsavable'.
*/
static int alloc_pagedir(void)
{
calc_order();
pagedir_save = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD,
pagedir_order);
if(!pagedir_save)
return -ENOMEM;
memset(pagedir_save,0,(1 << pagedir_order) * PAGE_SIZE);
pm_pagedir_nosave = pagedir_save;
return 0;
}
/**
* alloc_image_pages - Allocate pages for the snapshot.
*
*/
static int alloc_image_pages(void)
{
struct pbe * p;
int i;
for (i = 0, p = pagedir_save; i < pmdisk_pages; i++, p++) {
p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
if(!p->address)
goto Error;
SetPageNosave(virt_to_page(p->address));
}
return 0;
Error:
do {
if (p->address)
free_page(p->address);
p->address = 0;
} while (p-- > pagedir_save);
return -ENOMEM;
}
/**
* enough_free_mem - Make sure we enough free memory to snapshot.
*
* Returns TRUE or FALSE after checking the number of available
* free pages.
*/
static int enough_free_mem(void)
{
if(nr_free_pages() < (pmdisk_pages + PAGES_FOR_IO)) {
pr_debug("pmdisk: Not enough free pages: Have %d\n",
nr_free_pages());
return 0;
}
return 1;
}
/**
* enough_swap - Make sure we have enough swap to save the image.
*
* Returns TRUE or FALSE after checking the total amount of swap
* space avaiable.
*
* FIXME: si_swapinfo(&i) returns all swap devices information.
* We should only consider resume_device.
*/
static int enough_swap(void)
{
struct sysinfo i;
si_swapinfo(&i);
if (i.freeswap < (pmdisk_pages + PAGES_FOR_IO)) {
pr_debug("pmdisk: Not enough swap. Need %ld\n",i.freeswap);
return 0;
}
return 1;
}
/**
* pmdisk_suspend - Atomically snapshot the system.
*
* This must be called with interrupts disabled, to prevent the
* system changing at all from underneath us.
*
* To do this, we count the number of pages in the system that we
* need to save; make sure we have enough memory and swap to clone
* the pages and save them in swap, allocate the space to hold them,
* and then snapshot them all.
*/
int pmdisk_suspend(void)
{
int error = 0;
if ((error = read_swapfiles()))
return error;
drain_local_pages();
pm_pagedir_nosave = NULL;
pr_debug("pmdisk: Counting pages to copy.\n" );
count_pages();
pr_debug("pmdisk: (pages needed: %d + %d free: %d)\n",
pmdisk_pages,PAGES_FOR_IO,nr_free_pages());
if (!enough_free_mem())
return -ENOMEM;
if (!enough_swap())
return -ENOSPC;
if ((error = alloc_pagedir())) {
pr_debug("pmdisk: Allocating pagedir failed.\n");
return error;
}
if ((error = alloc_image_pages())) {
pr_debug("pmdisk: Allocating image pages failed.\n");
free_pagedir();
return error;
}
nr_copy_pages_check = pmdisk_pages;
pagedir_order_check = pagedir_order;
/* During allocating of suspend pagedir, new cold pages may appear.
* Kill them
*/
drain_local_pages();
/* copy */
copy_pages();
/*
* End of critical section. From now on, we can write to memory,
* but we should not touch disk. This specially means we must _not_
* touch swap space! Except we must write out our image of course.
*/
pr_debug("pmdisk: %d pages copied\n", pmdisk_pages );
return 0;
}
/**
* suspend_save_image - Prepare and write saved image to swap.
*
* IRQs are re-enabled here so we can resume devices and safely write
* to the swap devices. We disable them again before we leave.
*
* The second lock_swapdevices() will unlock ignored swap devices since
* writing is finished.
* It is important _NOT_ to umount filesystems at this point. We want
* them synced (in case something goes wrong) but we DO not want to mark
* filesystem clean: it is not. (And it does not matter, if we resume
* correctly, we'll mark system clean, anyway.)
*/
static int suspend_save_image(void)
{
int error;
device_resume();
lock_swapdevices();
error = write_suspend_image();
lock_swapdevices();
return error;
}
/*
* Magic happens here
*/
int pmdisk_resume(void)
{
BUG_ON (nr_copy_pages_check != pmdisk_pages);
BUG_ON (pagedir_order_check != pagedir_order);
/* Even mappings of "global" things (vmalloc) need to be fixed */
__flush_tlb_global();
return 0;
}
/* pmdisk_arch_suspend() is implemented in arch/?/power/pmdisk.S,
and basically does:
if (!resume) {
save_processor_state();
SAVE_REGISTERS
return pmdisk_suspend();
}
GO_TO_SWAPPER_PAGE_TABLES
COPY_PAGES_BACK
RESTORE_REGISTERS
restore_processor_state();
return pmdisk_resume();
*/
/* More restore stuff */
#define does_collide(addr) does_collide_order(pm_pagedir_nosave, addr, 0)
/*
* Returns true if given address/order collides with any orig_address
*/
static int __init does_collide_order(suspend_pagedir_t *pagedir,
unsigned long addr, int order)
{
int i;
unsigned long addre = addr + (PAGE_SIZE<<order);
for(i=0; i < pmdisk_pages; i++)
if((pagedir+i)->orig_address >= addr &&
(pagedir+i)->orig_address < addre)
return 1;
return 0;
}
/*
* We check here that pagedir & pages it points to won't collide with pages
* where we're going to restore from the loaded pages later
*/
static int __init check_pagedir(void)
{
int i;
for(i=0; i < pmdisk_pages; i++) {
unsigned long addr;
do {
addr = get_zeroed_page(GFP_ATOMIC);
if(!addr)
return -ENOMEM;
} while (does_collide(addr));
(pm_pagedir_nosave+i)->address = addr;
}
return 0;
}
static int __init relocate_pagedir(void)
{
/*
* We have to avoid recursion (not to overflow kernel stack),
* and that's why code looks pretty cryptic
*/
suspend_pagedir_t *old_pagedir = pm_pagedir_nosave;
void **eaten_memory = NULL;
void **c = eaten_memory, *m, *f;
int err;
pr_debug("pmdisk: Relocating pagedir\n");
if(!does_collide_order(old_pagedir, (unsigned long)old_pagedir, pagedir_order)) {
pr_debug("pmdisk: Relocation not necessary\n");
return 0;
}
err = -ENOMEM;
while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order)) != NULL) {
if (!does_collide_order(old_pagedir, (unsigned long)m,
pagedir_order)) {
pm_pagedir_nosave =
memcpy(m, old_pagedir,
PAGE_SIZE << pagedir_order);
err = 0;
break;
}
eaten_memory = m;
printk( "." );
*eaten_memory = c;
c = eaten_memory;
}
c = eaten_memory;
while(c) {
printk(":");
f = c;
c = *c;
free_pages((unsigned long)f, pagedir_order);
}
printk("|\n");
return err;
}
static struct block_device * resume_bdev;
/**
* Using bio to read from swap.
* This code requires a bit more work than just using buffer heads
* but, it is the recommended way for 2.5/2.6.
* The following are to signal the beginning and end of I/O. Bios
* finish asynchronously, while we want them to happen synchronously.
* A simple atomic_t, and a wait loop take care of this problem.
*/
static atomic_t io_done = ATOMIC_INIT(0);
static void start_io(void)
{
atomic_set(&io_done,1);
}
static int end_io(struct bio * bio, unsigned int num, int err)
{
atomic_set(&io_done,0);
return 0;
}
static void wait_io(void)
{
while(atomic_read(&io_done))
io_schedule();
}
/**
* submit - submit BIO request.
* @rw: READ or WRITE.
* @off physical offset of page.
* @page: page we're reading or writing.
*
* Straight from the textbook - allocate and initialize the bio.
* If we're writing, make sure the page is marked as dirty.
* Then submit it and wait.
*/
static int submit(int rw, pgoff_t page_off, void * page)
{
int error = 0;
struct bio * bio;
bio = bio_alloc(GFP_ATOMIC,1);
if (!bio)
return -ENOMEM;
bio->bi_sector = page_off * (PAGE_SIZE >> 9);
bio_get(bio);
bio->bi_bdev = resume_bdev;
bio->bi_end_io = end_io;
if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) {
printk("pmdisk: ERROR: adding page to bio at %ld\n",page_off);
error = -EFAULT;
goto Done;
}
if (rw == WRITE)
bio_set_pages_dirty(bio);
start_io();
submit_bio(rw | (1 << BIO_RW_SYNC), bio);
wait_io();
Done:
bio_put(bio);
return error;
}
static int
read_page(pgoff_t page_off, void * page)
{
return submit(READ,page_off,page);
}
static int
write_page(pgoff_t page_off, void * page)
{
return submit(WRITE,page_off,page);
}
extern dev_t __init name_to_dev_t(const char *line);
static int __init check_sig(void)
{
int error;
memset(&pmdisk_header,0,sizeof(pmdisk_header));
if ((error = read_page(0,&pmdisk_header)))
return error;
if (!memcmp(PMDISK_SIG,pmdisk_header.sig,10)) {
memcpy(pmdisk_header.sig,pmdisk_header.orig_sig,10);
/*
* Reset swap signature now.
*/
error = write_page(0,&pmdisk_header);
} else {
pr_debug(KERN_ERR "pmdisk: Invalid partition type.\n");
return -EINVAL;
}
if (!error)
pr_debug("pmdisk: Signature found, resuming\n");
return error;
}
/*
* Sanity check if this image makes sense with this kernel/swap context
* I really don't think that it's foolproof but more than nothing..
*/
static const char * __init sanity_check(void)
{
dump_pmdisk_info();
if(pmdisk_info.version_code != LINUX_VERSION_CODE)
return "kernel version";
if(pmdisk_info.num_physpages != num_physpages)
return "memory size";
if (strcmp(pmdisk_info.uts.sysname,system_utsname.sysname))
return "system type";
if (strcmp(pmdisk_info.uts.release,system_utsname.release))
return "kernel release";
if (strcmp(pmdisk_info.uts.version,system_utsname.version))
return "version";
if (strcmp(pmdisk_info.uts.machine,system_utsname.machine))
return "machine";
if(pmdisk_info.cpus != num_online_cpus())
return "number of cpus";
return NULL;
}
static int __init check_header(void)
{
const char * reason = NULL;
int error;
init_header();
if ((error = read_page(swp_offset(pmdisk_header.pmdisk_info),
&pmdisk_info)))
return error;
/* Is this same machine? */
if ((reason = sanity_check())) {
printk(KERN_ERR "pmdisk: Resume mismatch: %s\n",reason);
return -EPERM;
}
pmdisk_pages = pmdisk_info.image_pages;
return error;
}
static int __init read_pagedir(void)
{
unsigned long addr;
int i, n = pmdisk_info.pagedir_pages;
int error = 0;
pagedir_order = get_bitmask_order(n);
addr =__get_free_pages(GFP_ATOMIC, pagedir_order);
if (!addr)
return -ENOMEM;
pm_pagedir_nosave = (struct pbe *)addr;
pr_debug("pmdisk: Reading pagedir (%d Pages)\n",n);
for (i = 0; i < n && !error; i++, addr += PAGE_SIZE) {
unsigned long offset = swp_offset(pmdisk_info.pagedir[i]);
if (offset)
error = read_page(offset, (void *)addr);
else
error = -EFAULT;
}
if (error)
free_pages((unsigned long)pm_pagedir_nosave,pagedir_order);
return error;
}
/**
* read_image_data - Read image pages from swap.
*
* You do not need to check for overlaps, check_pagedir()
* already did that.
*/
static int __init read_image_data(void)
{
struct pbe * p;
int error = 0;
int i;
printk( "Reading image data (%d pages): ", pmdisk_pages );
for(i = 0, p = pm_pagedir_nosave; i < pmdisk_pages && !error; i++, p++) {
if (!(i%100))
printk( "." );
error = read_page(swp_offset(p->swap_address),
(void *)p->address);
}
printk(" %d done.\n",i);
return error;
}
static int __init read_suspend_image(void)
{
int error = 0;
if ((error = check_sig()))
return error;
if ((error = check_header()))
return error;
if ((error = read_pagedir()))
return error;
if ((error = relocate_pagedir()))
goto FreePagedir;
if ((error = check_pagedir()))
goto FreePagedir;
if ((error = read_image_data()))
goto FreePagedir;
Done:
return error;
FreePagedir:
free_pages((unsigned long)pm_pagedir_nosave,pagedir_order);
goto Done;
}
/**
* pmdisk_save - Snapshot memory
*/
int pmdisk_save(void)
{
int error;
#if defined (CONFIG_HIGHMEM) || defined (CONFIG_DISCONTIGMEM)
pr_debug("pmdisk: not supported with high- or discontig-mem.\n");
return -EPERM;
#endif
if ((error = arch_prepare_suspend()))
return error;
local_irq_disable();
save_processor_state();
error = pmdisk_arch_suspend(0);
restore_processor_state();
local_irq_enable();
return error;
}
/**
* pmdisk_write - Write saved memory image to swap.
*
* pmdisk_arch_suspend(0) returns after system is resumed.
*
* pmdisk_arch_suspend() copies all "used" memory to "free" memory,
* then unsuspends all device drivers, and writes memory to disk
* using normal kernel mechanism.
*/
int pmdisk_write(void)
{
return suspend_save_image();
}
/**
* pmdisk_read - Read saved image from swap.
*/
int __init pmdisk_read(void)
{
int error;
if (!strlen(resume_file))
return -ENOENT;
resume_device = name_to_dev_t(resume_file);
pr_debug("pmdisk: Resume From Partition: %s\n", resume_file);
resume_bdev = open_by_devnum(resume_device, FMODE_READ);
if (!IS_ERR(resume_bdev)) {
set_blocksize(resume_bdev, PAGE_SIZE);
error = read_suspend_image();
blkdev_put(resume_bdev);
} else
error = PTR_ERR(resume_bdev);
if (!error)
pr_debug("Reading resume file was successful\n");
else
pr_debug("pmdisk: Error %d resuming\n", error);
return error;
}
/**
* pmdisk_restore - Replace running kernel with saved image.
*/
int __init pmdisk_restore(void)
{
int error;
local_irq_disable();
save_processor_state();
error = pmdisk_arch_suspend(1);
restore_processor_state();
local_irq_enable();
return error;
}
/**
* pmdisk_free - Free memory allocated to hold snapshot.
*/
int pmdisk_free(void)
{
pr_debug( "Freeing prev allocated pagedir\n" );
free_pagedir();
return 0;
}
static int __init pmdisk_setup(char *str)
{
if (strlen(str)) {
if (!strcmp(str,"off"))
resume_file[0] = '\0';
else
strncpy(resume_file, str, 255);
} else
resume_file[0] = '\0';
return 1;
}
__setup("pmdisk=", pmdisk_setup);
#include <linux/suspend.h>
#include <linux/utsname.h>
/* With SUSPEND_CONSOLE defined, it suspend looks *really* cool, but /* With SUSPEND_CONSOLE defined, it suspend looks *really* cool, but
we probably do not take enough locks for switching consoles, etc, we probably do not take enough locks for switching consoles, etc,
...@@ -9,7 +10,20 @@ ...@@ -9,7 +10,20 @@
#endif #endif
#ifdef CONFIG_PM_DISK struct swsusp_info {
struct new_utsname uts;
u32 version_code;
unsigned long num_physpages;
int cpus;
unsigned long image_pages;
unsigned long pagedir_pages;
suspend_pagedir_t * suspend_pagedir;
swp_entry_t pagedir[768];
} __attribute__((aligned(PAGE_SIZE)));
#ifdef CONFIG_SOFTWARE_SUSPEND
extern int pm_suspend_disk(void); extern int pm_suspend_disk(void);
#else #else
...@@ -18,7 +32,6 @@ static inline int pm_suspend_disk(void) ...@@ -18,7 +32,6 @@ static inline int pm_suspend_disk(void)
return -EPERM; return -EPERM;
} }
#endif #endif
extern struct semaphore pm_sem; extern struct semaphore pm_sem;
#define power_attr(_name) \ #define power_attr(_name) \
static struct subsys_attribute _name##_attr = { \ static struct subsys_attribute _name##_attr = { \
......
...@@ -62,6 +62,7 @@ ...@@ -62,6 +62,7 @@
#include <linux/syscalls.h> #include <linux/syscalls.h>
#include <linux/console.h> #include <linux/console.h>
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/bio.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
...@@ -70,25 +71,16 @@ ...@@ -70,25 +71,16 @@
#include "power.h" #include "power.h"
unsigned char software_suspend_enabled = 0;
#define NORESUME 1
#define RESUME_SPECIFIED 2
/* References to section boundaries */ /* References to section boundaries */
extern char __nosave_begin, __nosave_end; extern char __nosave_begin, __nosave_end;
extern int is_head_of_free_region(struct page *); extern int is_head_of_free_region(struct page *);
/* Locks */
spinlock_t suspend_pagedir_lock __nosavedata = SPIN_LOCK_UNLOCKED;
/* Variables to be preserved over suspend */ /* Variables to be preserved over suspend */
static int pagedir_order_check; int pagedir_order_check;
static int nr_copy_pages_check; int nr_copy_pages_check;
static int resume_status; extern char resume_file[];
static char resume_file[256] = ""; /* For resume= kernel option */
static dev_t resume_device; static dev_t resume_device;
/* Local variables that should not be affected by save */ /* Local variables that should not be affected by save */
unsigned int nr_copy_pages __nosavedata = 0; unsigned int nr_copy_pages __nosavedata = 0;
...@@ -110,16 +102,16 @@ suspend_pagedir_t *pagedir_nosave __nosavedata = NULL; ...@@ -110,16 +102,16 @@ suspend_pagedir_t *pagedir_nosave __nosavedata = NULL;
static suspend_pagedir_t *pagedir_save; static suspend_pagedir_t *pagedir_save;
static int pagedir_order __nosavedata = 0; static int pagedir_order __nosavedata = 0;
struct link { #define SWSUSP_SIG "S1SUSPEND"
char dummy[PAGE_SIZE - sizeof(swp_entry_t)];
swp_entry_t next;
};
union diskpage { struct swsusp_header {
union swap_header swh; char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)];
struct link link; swp_entry_t swsusp_info;
struct suspend_header sh; char orig_sig[10];
}; char sig[10];
} __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header;
struct swsusp_info swsusp_info;
/* /*
* XXX: We try to keep some more pages free so that I/O operations succeed * XXX: We try to keep some more pages free so that I/O operations succeed
...@@ -127,54 +119,10 @@ union diskpage { ...@@ -127,54 +119,10 @@ union diskpage {
*/ */
#define PAGES_FOR_IO 512 #define PAGES_FOR_IO 512
static const char name_suspend[] = "Suspend Machine: ";
static const char name_resume[] = "Resume Machine: ";
/*
* Debug
*/
#define DEBUG_DEFAULT
#undef DEBUG_PROCESS
#undef DEBUG_SLOW
#define TEST_SWSUSP 0 /* Set to 1 to reboot instead of halt machine after suspension */
#ifdef DEBUG_DEFAULT
# define PRINTK(f, a...) printk(f, ## a)
#else
# define PRINTK(f, a...) do { } while(0)
#endif
#ifdef DEBUG_SLOW
#define MDELAY(a) mdelay(a)
#else
#define MDELAY(a) do { } while(0)
#endif
/* /*
* Saving part... * Saving part...
*/ */
static __inline__ int fill_suspend_header(struct suspend_header *sh)
{
memset((char *)sh, 0, sizeof(*sh));
sh->version_code = LINUX_VERSION_CODE;
sh->num_physpages = num_physpages;
strncpy(sh->machine, system_utsname.machine, 8);
strncpy(sh->version, system_utsname.version, 20);
/* FIXME: Is this bogus? --RR */
sh->num_cpus = num_online_cpus();
sh->page_size = PAGE_SIZE;
sh->suspend_pagedir = pagedir_nosave;
BUG_ON (pagedir_save != pagedir_nosave);
sh->num_pbes = nr_copy_pages;
/* TODO: needed? mounted fs' last mounted date comparison
* [so they haven't been mounted since last suspend.
* Maybe it isn't.] [we'd need to do this for _all_ fs-es]
*/
return 0;
}
/* We memorize in swapfile_used what swap devices are used for suspension */ /* We memorize in swapfile_used what swap devices are used for suspension */
#define SWAPFILE_UNUSED 0 #define SWAPFILE_UNUSED 0
#define SWAPFILE_SUSPEND 1 /* This is the suspending device */ #define SWAPFILE_SUSPEND 1 /* This is the suspending device */
...@@ -182,47 +130,30 @@ static __inline__ int fill_suspend_header(struct suspend_header *sh) ...@@ -182,47 +130,30 @@ static __inline__ int fill_suspend_header(struct suspend_header *sh)
static unsigned short swapfile_used[MAX_SWAPFILES]; static unsigned short swapfile_used[MAX_SWAPFILES];
static unsigned short root_swap; static unsigned short root_swap;
#define MARK_SWAP_SUSPEND 0
#define MARK_SWAP_RESUME 2
static void mark_swapfiles(swp_entry_t prev, int mode) static int mark_swapfiles(swp_entry_t prev)
{ {
swp_entry_t entry; int error;
union diskpage *cur;
struct page *page;
if (root_swap == 0xFFFF) /* ignored */ rw_swap_page_sync(READ,
return; swp_entry(root_swap, 0),
virt_to_page((unsigned long)&swsusp_header));
page = alloc_page(GFP_ATOMIC); if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) ||
if (!page) !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) {
panic("Out of memory in mark_swapfiles"); memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
cur = page_address(page); memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
/* XXX: this is dirty hack to get first page of swap file */ swsusp_header.swsusp_info = prev;
entry = swp_entry(root_swap, 0); error = rw_swap_page_sync(WRITE,
rw_swap_page_sync(READ, entry, page); swp_entry(root_swap, 0),
virt_to_page((unsigned long)
if (mode == MARK_SWAP_RESUME) { &swsusp_header));
if (!memcmp("S1",cur->swh.magic.magic,2))
memcpy(cur->swh.magic.magic,"SWAP-SPACE",10);
else if (!memcmp("S2",cur->swh.magic.magic,2))
memcpy(cur->swh.magic.magic,"SWAPSPACE2",10);
else printk("%sUnable to find suspended-data signature (%.10s - misspelled?\n",
name_resume, cur->swh.magic.magic);
} else { } else {
if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10))) pr_debug("swsusp: Partition is not swap space.\n");
memcpy(cur->swh.magic.magic,"S1SUSP....",10); error = -ENODEV;
else if ((!memcmp("SWAPSPACE2",cur->swh.magic.magic,10)))
memcpy(cur->swh.magic.magic,"S2SUSP....",10);
else panic("\nSwapspace is not swapspace (%.10s)\n", cur->swh.magic.magic);
cur->link.next = prev; /* prev is the first/last swap page of the resume area */
/* link.next lies *no more* in last 4/8 bytes of magic */
} }
rw_swap_page_sync(WRITE, entry, page); return error;
__free_page(page);
} }
/* /*
* Check whether the swap device is the specified resume * Check whether the swap device is the specified resume
* device, irrespective of whether they are specified by * device, irrespective of whether they are specified by
...@@ -243,7 +174,7 @@ static int is_resume_device(const struct swap_info_struct *swap_info) ...@@ -243,7 +174,7 @@ static int is_resume_device(const struct swap_info_struct *swap_info)
resume_device == MKDEV(imajor(inode), iminor(inode)); resume_device == MKDEV(imajor(inode), iminor(inode));
} }
static void read_swapfiles(void) /* This is called before saving image */ int swsusp_swap_check(void) /* This is called before saving image */
{ {
int i, len; int i, len;
...@@ -274,114 +205,211 @@ static void read_swapfiles(void) /* This is called before saving image */ ...@@ -274,114 +205,211 @@ static void read_swapfiles(void) /* This is called before saving image */
} }
} }
swap_list_unlock(); swap_list_unlock();
return (root_swap != 0xffff) ? 0 : -ENODEV;
} }
static void lock_swapdevices(void) /* This is called after saving image so modification /**
will be lost after resume... and that's what we want. */ * This is called after saving image so modification
* will be lost after resume... and that's what we want.
* we make the device unusable. A new call to
* lock_swapdevices can unlock the devices.
*/
static void lock_swapdevices(void)
{ {
int i; int i;
swap_list_lock(); swap_list_lock();
for(i = 0; i< MAX_SWAPFILES; i++) for(i = 0; i< MAX_SWAPFILES; i++)
if(swapfile_used[i] == SWAPFILE_IGNORED) { if(swapfile_used[i] == SWAPFILE_IGNORED) {
swap_info[i].flags ^= 0xFF; /* we make the device unusable. A new call to swap_info[i].flags ^= 0xFF;
lock_swapdevices can unlock the devices. */
} }
swap_list_unlock(); swap_list_unlock();
} }
/** /**
* write_suspend_image - Write entire image to disk. * write_swap_page - Write one page to a fresh swap location.
* @addr: Address we're writing.
* @loc: Place to store the entry we used.
* *
* After writing suspend signature to the disk, suspend may no * Allocate a new swap entry and 'sync' it. Note we discard -EIO
* longer fail: we have ready-to-run image in swap, and rollback * errors. That is an artifact left over from swsusp. It did not
* would happen on next reboot -- corrupting data. * check the return of rw_swap_page_sync() at all, since most pages
* written back to swap would return -EIO.
* This is a partial improvement, since we will at least return other
* errors, though we need to eventually fix the damn code.
*/
static int write_page(unsigned long addr, swp_entry_t * loc)
{
swp_entry_t entry;
int error = 0;
entry = get_swap_page();
if (swp_offset(entry) &&
swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) {
error = rw_swap_page_sync(WRITE, entry,
virt_to_page(addr));
if (error == -EIO)
error = 0;
if (!error)
*loc = entry;
} else
error = -ENOSPC;
return error;
}
/**
* data_free - Free the swap entries used by the saved image.
* *
* Note: The buffer we allocate to use to write the suspend header is * Walk the list of used swap entries and free each one.
* not freed; its not needed since the system is going down anyway * This is only used for cleanup when suspend fails.
* (plus it causes an oops and I'm lazy^H^H^H^Htoo busy).
*/ */
static int write_suspend_image(void)
static void data_free(void)
{ {
swp_entry_t entry;
int i; int i;
swp_entry_t entry, prev = { 0 };
int nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages);
union diskpage *cur, *buffer = (union diskpage *)get_zeroed_page(GFP_ATOMIC);
unsigned long address;
struct page *page;
if (!buffer) for (i = 0; i < nr_copy_pages; i++) {
return -ENOMEM; entry = (pagedir_nosave + i)->swap_address;
if (entry.val)
swap_free(entry);
else
break;
(pagedir_nosave + i)->swap_address = (swp_entry_t){0};
}
}
/**
* data_write - Write saved image to swap.
*
* Walk the list of pages in the image and sync each one to swap.
*/
static int data_write(void)
{
int error = 0;
int i;
printk( "Writing data to swap (%d pages): ", nr_copy_pages ); printk( "Writing data to swap (%d pages): ", nr_copy_pages );
for (i=0; i<nr_copy_pages; i++) { for (i = 0; i < nr_copy_pages && !error; i++) {
if (!(i%100)) if (!(i%100))
printk( "." ); printk( "." );
entry = get_swap_page(); error = write_page((pagedir_nosave+i)->address,
if (!entry.val) &((pagedir_nosave+i)->swap_address));
panic("\nNot enough swapspace when writing data" ); }
printk(" %d Pages done.\n",i);
return error;
}
if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND) static void dump_info(void)
panic("\nPage %d: not enough swapspace on suspend device", i ); {
pr_debug(" swsusp: Version: %u\n",swsusp_info.version_code);
pr_debug(" swsusp: Num Pages: %ld\n",swsusp_info.num_physpages);
pr_debug(" swsusp: UTS Sys: %s\n",swsusp_info.uts.sysname);
pr_debug(" swsusp: UTS Node: %s\n",swsusp_info.uts.nodename);
pr_debug(" swsusp: UTS Release: %s\n",swsusp_info.uts.release);
pr_debug(" swsusp: UTS Version: %s\n",swsusp_info.uts.version);
pr_debug(" swsusp: UTS Machine: %s\n",swsusp_info.uts.machine);
pr_debug(" swsusp: UTS Domain: %s\n",swsusp_info.uts.domainname);
pr_debug(" swsusp: CPUs: %d\n",swsusp_info.cpus);
pr_debug(" swsusp: Image: %ld Pages\n",swsusp_info.image_pages);
pr_debug(" swsusp: Pagedir: %ld Pages\n",swsusp_info.pagedir_pages);
}
address = (pagedir_nosave+i)->address; static void init_header(void)
page = virt_to_page(address); {
rw_swap_page_sync(WRITE, entry, page); memset(&swsusp_info,0,sizeof(swsusp_info));
(pagedir_nosave+i)->swap_address = entry; swsusp_info.version_code = LINUX_VERSION_CODE;
} swsusp_info.num_physpages = num_physpages;
memcpy(&swsusp_info.uts,&system_utsname,sizeof(system_utsname));
swsusp_info.suspend_pagedir = pagedir_nosave;
swsusp_info.cpus = num_online_cpus();
swsusp_info.image_pages = nr_copy_pages;
dump_info();
}
static int close_swap(void)
{
swp_entry_t entry;
int error;
error = write_page((unsigned long)&swsusp_info,&entry);
if (!error) {
printk( "S" );
error = mark_swapfiles(entry);
printk( "|\n" ); printk( "|\n" );
printk( "Writing pagedir (%d pages): ", nr_pgdir_pages);
for (i=0; i<nr_pgdir_pages; i++) {
cur = (union diskpage *)((char *) pagedir_nosave)+i;
BUG_ON ((char *) cur != (((char *) pagedir_nosave) + i*PAGE_SIZE));
printk( "." );
entry = get_swap_page();
if (!entry.val) {
printk(KERN_CRIT "Not enough swapspace when writing pgdir\n" );
panic("Don't know how to recover");
free_page((unsigned long) buffer);
return -ENOSPC;
} }
return error;
}
/**
* free_pagedir_entries - Free pages used by the page directory.
*
* This is used during suspend for error recovery.
*/
if(swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND) static void free_pagedir_entries(void)
panic("\nNot enough swapspace for pagedir on suspend device" ); {
int i;
BUG_ON (sizeof(swp_entry_t) != sizeof(long)); for (i = 0; i < swsusp_info.pagedir_pages; i++)
BUG_ON (PAGE_SIZE % sizeof(struct pbe)); swap_free(swsusp_info.pagedir[i]);
}
cur->link.next = prev;
page = virt_to_page((unsigned long)cur);
rw_swap_page_sync(WRITE, entry, page);
prev = entry;
}
printk("H");
BUG_ON (sizeof(struct suspend_header) > PAGE_SIZE-sizeof(swp_entry_t));
BUG_ON (sizeof(union diskpage) != PAGE_SIZE);
BUG_ON (sizeof(struct link) != PAGE_SIZE);
entry = get_swap_page();
if (!entry.val)
panic( "\nNot enough swapspace when writing header" );
if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
panic("\nNot enough swapspace for header on suspend device" );
cur = (void *) buffer; /**
if (fill_suspend_header(&cur->sh)) * write_pagedir - Write the array of pages holding the page directory.
BUG(); /* Not a BUG_ON(): we want fill_suspend_header to be called, always */ * @last: Last swap entry we write (needed for header).
*/
cur->link.next = prev; static int write_pagedir(void)
{
unsigned long addr = (unsigned long)pagedir_nosave;
int error = 0;
int n = SUSPEND_PD_PAGES(nr_copy_pages);
int i;
page = virt_to_page((unsigned long)cur); swsusp_info.pagedir_pages = n;
rw_swap_page_sync(WRITE, entry, page); printk( "Writing pagedir (%d pages)\n", n);
prev = entry; for (i = 0; i < n && !error; i++, addr += PAGE_SIZE)
error = write_page(addr, &swsusp_info.pagedir[i]);
return error;
}
printk( "S" ); /**
mark_swapfiles(prev, MARK_SWAP_SUSPEND); * write_suspend_image - Write entire image and metadata.
printk( "|\n" ); *
*/
MDELAY(1000); static int write_suspend_image(void)
return 0; {
int error;
init_header();
if ((error = data_write()))
goto FreeData;
if ((error = write_pagedir()))
goto FreePagedir;
if ((error = close_swap()))
goto FreePagedir;
Done:
return error;
FreePagedir:
free_pagedir_entries();
FreeData:
data_free();
goto Done;
} }
#ifdef CONFIG_HIGHMEM #ifdef CONFIG_HIGHMEM
struct highmem_page { struct highmem_page {
char *data; char *data;
...@@ -438,22 +466,30 @@ static int save_highmem_zone(struct zone *zone) ...@@ -438,22 +466,30 @@ static int save_highmem_zone(struct zone *zone)
} }
return 0; return 0;
} }
#endif /* CONFIG_HIGHMEM */
static int save_highmem(void) static int save_highmem(void)
{ {
#ifdef CONFIG_HIGHMEM
struct zone *zone; struct zone *zone;
int res = 0; int res = 0;
pr_debug("swsusp: Saving Highmem\n");
for_each_zone(zone) { for_each_zone(zone) {
if (is_highmem(zone)) if (is_highmem(zone))
res = save_highmem_zone(zone); res = save_highmem_zone(zone);
if (res) if (res)
return res; return res;
} }
#endif
return 0; return 0;
} }
static int restore_highmem(void) static int restore_highmem(void)
{ {
#ifdef CONFIG_HIGHMEM
printk("swsusp: Restoring Highmem\n");
while (highmem_copy) { while (highmem_copy) {
struct highmem_page *save = highmem_copy; struct highmem_page *save = highmem_copy;
void *kaddr; void *kaddr;
...@@ -465,9 +501,10 @@ static int restore_highmem(void) ...@@ -465,9 +501,10 @@ static int restore_highmem(void)
free_page((long) save->data); free_page((long) save->data);
kfree(save); kfree(save);
} }
#endif
return 0; return 0;
} }
#endif
static int pfn_is_nosave(unsigned long pfn) static int pfn_is_nosave(unsigned long pfn)
{ {
...@@ -476,57 +513,82 @@ static int pfn_is_nosave(unsigned long pfn) ...@@ -476,57 +513,82 @@ static int pfn_is_nosave(unsigned long pfn)
return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
} }
/* if *pagedir_p != NULL it also copies the counted pages */ /**
static int count_and_copy_zone(struct zone *zone, struct pbe **pagedir_p) * saveable - Determine whether a page should be cloned or not.
* @pfn: The page
*
* We save a page if it's Reserved, and not in the range of pages
* statically defined as 'unsaveable', or if it isn't reserved, and
* isn't part of a free chunk of pages.
* If it is part of a free chunk, we update @pfn to point to the last
* page of the chunk.
*/
static int saveable(struct zone * zone, unsigned long * zone_pfn)
{ {
unsigned long zone_pfn, chunk_size, nr_copy_pages = 0; unsigned long pfn = *zone_pfn + zone->zone_start_pfn;
struct pbe *pbe = *pagedir_p; unsigned long chunk_size;
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) { struct page * page;
struct page *page;
unsigned long pfn = zone_pfn + zone->zone_start_pfn; if (!pfn_valid(pfn))
return 0;
if (!(pfn%1000)) if (!(pfn%1000))
printk("."); printk(".");
if (!pfn_valid(pfn))
continue;
page = pfn_to_page(pfn); page = pfn_to_page(pfn);
BUG_ON(PageReserved(page) && PageNosave(page)); BUG_ON(PageReserved(page) && PageNosave(page));
if (PageNosave(page)) if (PageNosave(page))
continue; return 0;
if (PageReserved(page) && pfn_is_nosave(pfn)) { if (PageReserved(page) && pfn_is_nosave(pfn)) {
PRINTK("[nosave pfn 0x%lx]", pfn); pr_debug("[nosave pfn 0x%lx]", pfn);
continue; return 0;
} }
if ((chunk_size = is_head_of_free_region(page))) { if ((chunk_size = is_head_of_free_region(page))) {
pfn += chunk_size - 1; *zone_pfn += chunk_size - 1;
zone_pfn += chunk_size - 1; return 0;
continue; }
return 1;
}
static void count_data_pages(void)
{
struct zone *zone;
unsigned long zone_pfn;
nr_copy_pages = 0;
for_each_zone(zone) {
if (!is_highmem(zone)) {
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
nr_copy_pages += saveable(zone, &zone_pfn);
} }
nr_copy_pages++;
if (!pbe)
continue;
pbe->orig_address = (long) page_address(page);
/* Copy page is dangerous: it likes to mess with
preempt count on specific cpus. Wrong preempt count is then copied,
oops. */
copy_page((void *)pbe->address, (void *)pbe->orig_address);
pbe++;
} }
*pagedir_p = pbe;
return nr_copy_pages;
} }
static int count_and_copy_data_pages(struct pbe *pagedir_p)
static void copy_data_pages(void)
{ {
int nr_copy_pages = 0;
struct zone *zone; struct zone *zone;
unsigned long zone_pfn;
struct pbe * pbe = pagedir_nosave;
for_each_zone(zone) { for_each_zone(zone) {
if (!is_highmem(zone)) if (!is_highmem(zone))
nr_copy_pages += count_and_copy_zone(zone, &pagedir_p); for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
if (saveable(zone, &zone_pfn)) {
struct page * page;
page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
pbe->orig_address = (long) page_address(page);
/* copy_page is no usable for copying task structs. */
memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE);
pbe++;
}
}
} }
return nr_copy_pages;
} }
static void free_suspend_pagedir_zone(struct zone *zone, unsigned long pagedir) static void free_suspend_pagedir_zone(struct zone *zone, unsigned long pagedir)
{ {
unsigned long zone_pfn, pagedir_end, pagedir_pfn, pagedir_end_pfn; unsigned long zone_pfn, pagedir_end, pagedir_pfn, pagedir_end_pfn;
...@@ -547,119 +609,202 @@ static void free_suspend_pagedir_zone(struct zone *zone, unsigned long pagedir) ...@@ -547,119 +609,202 @@ static void free_suspend_pagedir_zone(struct zone *zone, unsigned long pagedir)
} }
} }
static void free_suspend_pagedir(unsigned long this_pagedir) void swsusp_free(void)
{ {
unsigned long p = (unsigned long)pagedir_save;
struct zone *zone; struct zone *zone;
for_each_zone(zone) { for_each_zone(zone) {
if (!is_highmem(zone)) if (!is_highmem(zone))
free_suspend_pagedir_zone(zone, this_pagedir); free_suspend_pagedir_zone(zone, p);
}
free_pages(p, pagedir_order);
}
/**
* calc_order - Determine the order of allocation needed for pagedir_save.
*
* This looks tricky, but is just subtle. Please fix it some time.
* Since there are %nr_copy_pages worth of pages in the snapshot, we need
* to allocate enough contiguous space to hold
* (%nr_copy_pages * sizeof(struct pbe)),
* which has the saved/orig locations of the page..
*
* SUSPEND_PD_PAGES() tells us how many pages we need to hold those
* structures, then we call get_bitmask_order(), which will tell us the
* last bit set in the number, starting with 1. (If we need 30 pages, that
* is 0x0000001e in hex. The last bit is the 5th, which is the order we
* would use to allocate 32 contiguous pages).
*
* Since we also need to save those pages, we add the number of pages that
* we need to nr_copy_pages, and in case of an overflow, do the
* calculation again to update the number of pages needed.
*
* With this model, we will tend to waste a lot of memory if we just cross
* an order boundary. Plus, the higher the order of allocation that we try
* to do, the more likely we are to fail in a low-memory situtation
* (though we're unlikely to get this far in such a case, since swsusp
* requires half of memory to be free anyway).
*/
static void calc_order(void)
{
int diff = 0;
int order = 0;
do {
diff = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages)) - order;
if (diff) {
order += diff;
nr_copy_pages += 1 << diff;
} }
free_pages(this_pagedir, pagedir_order); } while(diff);
pagedir_order = order;
} }
static suspend_pagedir_t *create_suspend_pagedir(int nr_copy_pages)
/**
* alloc_pagedir - Allocate the page directory.
*
* First, determine exactly how many contiguous pages we need and
* allocate them.
*/
static int alloc_pagedir(void)
{ {
int i; calc_order();
suspend_pagedir_t *pagedir; pagedir_save = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD,
struct pbe *p; pagedir_order);
struct page *page; if (!pagedir_save)
return -ENOMEM;
memset(pagedir_save, 0, (1 << pagedir_order) * PAGE_SIZE);
pagedir_nosave = pagedir_save;
return 0;
}
pagedir_order = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages));
p = pagedir = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD, pagedir_order); /**
if (!pagedir) * alloc_image_pages - Allocate pages for the snapshot.
return NULL; *
*/
page = virt_to_page(pagedir); static int alloc_image_pages(void)
for(i=0; i < 1<<pagedir_order; i++) {
SetPageNosave(page++); struct pbe * p;
int i;
while(nr_copy_pages--) { for (i = 0, p = pagedir_save; i < nr_copy_pages; i++, p++) {
p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD); p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
if (!p->address) { if(!p->address)
free_suspend_pagedir((unsigned long) pagedir); goto Error;
return NULL;
}
SetPageNosave(virt_to_page(p->address)); SetPageNosave(virt_to_page(p->address));
p->orig_address = 0;
p++;
} }
return pagedir; return 0;
Error:
do {
if (p->address)
free_page(p->address);
p->address = 0;
} while (p-- > pagedir_save);
return -ENOMEM;
} }
static int prepare_suspend_processes(void)
/**
* enough_free_mem - Make sure we enough free memory to snapshot.
*
* Returns TRUE or FALSE after checking the number of available
* free pages.
*/
static int enough_free_mem(void)
{ {
sys_sync(); /* Syncing needs pdflushd, so do it before stopping processes */ if (nr_free_pages() < (nr_copy_pages + PAGES_FOR_IO)) {
if (freeze_processes()) { pr_debug("swsusp: Not enough free pages: Have %d\n",
printk( KERN_ERR "Suspend failed: Not all processes stopped!\n" ); nr_free_pages());
thaw_processes();
return 1;
}
return 0; return 0;
}
return 1;
} }
/*
* Try to free as much memory as possible, but do not OOM-kill anyone /**
* enough_swap - Make sure we have enough swap to save the image.
* *
* Notice: all userland should be stopped at this point, or livelock is possible. * Returns TRUE or FALSE after checking the total amount of swap
* space avaiable.
*
* FIXME: si_swapinfo(&i) returns all swap devices information.
* We should only consider resume_device.
*/ */
static void free_some_memory(void)
static int enough_swap(void)
{ {
printk("Freeing memory: "); struct sysinfo i;
while (shrink_all_memory(10000))
printk("."); si_swapinfo(&i);
printk("|\n"); if (i.freeswap < (nr_copy_pages + PAGES_FOR_IO)) {
pr_debug("swsusp: Not enough swap. Need %ld\n",i.freeswap);
return 0;
}
return 1;
} }
static int suspend_prepare_image(void) static int swsusp_alloc(void)
{ {
struct sysinfo i; int error;
unsigned int nr_needed_pages = 0;
pr_debug("suspend: (pages needed: %d + %d free: %d)\n",
nr_copy_pages, PAGES_FOR_IO, nr_free_pages());
pagedir_nosave = NULL; pagedir_nosave = NULL;
printk( "/critical section: "); if (!enough_free_mem())
#ifdef CONFIG_HIGHMEM
printk( "handling highmem" );
if (save_highmem()) {
printk(KERN_CRIT "%sNot enough free pages for highmem\n", name_suspend);
return -ENOMEM; return -ENOMEM;
}
printk(", ");
#endif
printk("counting pages to copy" ); if (!enough_swap())
drain_local_pages(); return -ENOSPC;
nr_copy_pages = count_and_copy_data_pages(NULL);
nr_needed_pages = nr_copy_pages + PAGES_FOR_IO;
printk(" (pages needed: %d+%d=%d free: %d)\n",nr_copy_pages,PAGES_FOR_IO,nr_needed_pages,nr_free_pages()); if ((error = alloc_pagedir())) {
if(nr_free_pages() < nr_needed_pages) { pr_debug("suspend: Allocating pagedir failed.\n");
printk(KERN_CRIT "%sCouldn't get enough free pages, on %d pages short\n", return error;
name_suspend, nr_needed_pages-nr_free_pages());
root_swap = 0xFFFF;
return -ENOMEM;
} }
si_swapinfo(&i); /* FIXME: si_swapinfo(&i) returns all swap devices information. if ((error = alloc_image_pages())) {
We should only consider resume_device. */ pr_debug("suspend: Allocating image pages failed.\n");
if (i.freeswap < nr_needed_pages) { swsusp_free();
printk(KERN_CRIT "%sThere's not enough swap space available, on %ld pages short\n", return error;
name_suspend, nr_needed_pages-i.freeswap);
return -ENOSPC;
} }
PRINTK( "Alloc pagedir\n" );
pagedir_save = pagedir_nosave = create_suspend_pagedir(nr_copy_pages);
if (!pagedir_nosave) {
/* Pagedir is big, one-chunk allocation. It is easily possible for this allocation to fail */
printk(KERN_CRIT "%sCouldn't allocate continuous pagedir\n", name_suspend);
return -ENOMEM;
}
nr_copy_pages_check = nr_copy_pages; nr_copy_pages_check = nr_copy_pages;
pagedir_order_check = pagedir_order; pagedir_order_check = pagedir_order;
return 0;
}
int suspend_prepare_image(void)
{
unsigned int nr_needed_pages = 0;
int error;
pr_debug("swsusp: critical section: \n");
if (save_highmem()) {
printk(KERN_CRIT "Suspend machine: Not enough free pages for highmem\n");
return -ENOMEM;
}
drain_local_pages();
count_data_pages();
printk("swsusp: Need to copy %u pages\n",nr_copy_pages);
nr_needed_pages = nr_copy_pages + PAGES_FOR_IO;
drain_local_pages(); /* During allocating of suspend pagedir, new cold pages may appear. Kill them */ error = swsusp_alloc();
if (nr_copy_pages != count_and_copy_data_pages(pagedir_nosave)) /* copy */ if (error)
BUG(); return error;
/* During allocating of suspend pagedir, new cold pages may appear.
* Kill them.
*/
drain_local_pages();
copy_data_pages();
/* /*
* End of critical section. From now on, we can write to memory, * End of critical section. From now on, we can write to memory,
...@@ -667,205 +812,79 @@ static int suspend_prepare_image(void) ...@@ -667,205 +812,79 @@ static int suspend_prepare_image(void)
* touch swap space! Except we must write out our image of course. * touch swap space! Except we must write out our image of course.
*/ */
printk( "critical section/: done (%d pages copied)\n", nr_copy_pages ); printk("swsusp: critical section/: done (%d pages copied)\n", nr_copy_pages );
return 0; return 0;
} }
static void suspend_save_image(void)
{
device_resume();
lock_swapdevices();
write_suspend_image();
lock_swapdevices(); /* This will unlock ignored swap devices since writing is finished */
/* It is important _NOT_ to umount filesystems at this point. We want /* It is important _NOT_ to umount filesystems at this point. We want
* them synced (in case something goes wrong) but we DO not want to mark * them synced (in case something goes wrong) but we DO not want to mark
* filesystem clean: it is not. (And it does not matter, if we resume * filesystem clean: it is not. (And it does not matter, if we resume
* correctly, we'll mark system clean, anyway.) * correctly, we'll mark system clean, anyway.)
*/ */
} int swsusp_write(void)
static void suspend_power_down(void)
{ {
extern int C_A_D; int error;
C_A_D = 0; device_resume();
printk(KERN_EMERG "%s%s Trying to power down.\n", name_suspend, TEST_SWSUSP ? "Disable TEST_SWSUSP. NOT ": ""); lock_swapdevices();
#ifdef CONFIG_VT error = write_suspend_image();
PRINTK(KERN_EMERG "shift_state: %04x\n", shift_state); /* This will unlock ignored swap devices since writing is finished */
mdelay(1000); lock_swapdevices();
if (TEST_SWSUSP ^ (!!(shift_state & (1 << KG_CTRL)))) return error;
machine_restart(NULL);
else
#endif
{
device_suspend(3);
device_shutdown();
machine_power_off();
}
printk(KERN_EMERG "%sProbably not capable for powerdown. System halted.\n", name_suspend);
machine_halt();
while (1);
/* NOTREACHED */
}
/*
* Magic happens here
*/
asmlinkage void do_magic_resume_1(void)
{
barrier();
mb();
spin_lock_irq(&suspend_pagedir_lock); /* Done to disable interrupts */
device_power_down(3);
PRINTK( "Waiting for DMAs to settle down...\n");
mdelay(1000); /* We do not want some readahead with DMA to corrupt our memory, right?
Do it with disabled interrupts for best effect. That way, if some
driver scheduled DMA, we have good chance for DMA to finish ;-). */
} }
asmlinkage void do_magic_resume_2(void)
{
BUG_ON (nr_copy_pages_check != nr_copy_pages);
BUG_ON (pagedir_order_check != pagedir_order);
__flush_tlb_global(); /* Even mappings of "global" things (vmalloc) need to be fixed */
PRINTK( "Freeing prev allocated pagedir\n" );
free_suspend_pagedir((unsigned long) pagedir_save);
#ifdef CONFIG_HIGHMEM extern asmlinkage int swsusp_arch_suspend(void);
printk( "Restoring highmem\n" ); extern asmlinkage int swsusp_arch_resume(void);
restore_highmem();
#endif
printk("done, devices\n");
device_power_up();
spin_unlock_irq(&suspend_pagedir_lock);
device_resume();
/* Fixme: this is too late; we should do this ASAP to avoid "infinite reboots" problem */ asmlinkage int swsusp_save(void)
PRINTK( "Fixing swap signatures... " ); {
mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME); int error = 0;
PRINTK( "ok\n" );
#ifdef SUSPEND_CONSOLE if ((error = swsusp_swap_check()))
acquire_console_sem(); return error;
update_screen(fg_console); return suspend_prepare_image();
release_console_sem();
#endif
} }
/* do_magic() is implemented in arch/?/kernel/suspend_asm.S, and basically does: int swsusp_suspend(void)
{
if (!resume) { int error;
do_magic_suspend_1(); if ((error = arch_prepare_suspend()))
return error;
local_irq_disable();
save_processor_state(); save_processor_state();
SAVE_REGISTERS error = swsusp_arch_suspend();
do_magic_suspend_2();
return;
}
GO_TO_SWAPPER_PAGE_TABLES
do_magic_resume_1();
COPY_PAGES_BACK
RESTORE_REGISTERS
restore_processor_state(); restore_processor_state();
do_magic_resume_2(); local_irq_enable();
return error;
*/
asmlinkage void do_magic_suspend_1(void)
{
mb();
barrier();
BUG_ON(in_atomic());
spin_lock_irq(&suspend_pagedir_lock);
} }
asmlinkage void do_magic_suspend_2(void)
{
int is_problem;
read_swapfiles();
device_power_down(3);
is_problem = suspend_prepare_image();
device_power_up();
spin_unlock_irq(&suspend_pagedir_lock);
if (!is_problem) {
kernel_fpu_end(); /* save_processor_state() does kernel_fpu_begin, and we need to revert it in order to pass in_atomic() checks */
BUG_ON(in_atomic());
suspend_save_image();
suspend_power_down(); /* FIXME: if suspend_power_down is commented out, console is lost after few suspends ?! */
}
printk(KERN_EMERG "%sSuspend failed, trying to recover...\n", name_suspend);
MDELAY(1000); /* So user can wait and report us messages if armageddon comes :-) */
barrier(); asmlinkage int swsusp_restore(void)
mb(); {
spin_lock_irq(&suspend_pagedir_lock); /* Done to disable interrupts */ BUG_ON (nr_copy_pages_check != nr_copy_pages);
BUG_ON (pagedir_order_check != pagedir_order);
free_pages((unsigned long) pagedir_nosave, pagedir_order);
spin_unlock_irq(&suspend_pagedir_lock);
device_resume(); /* Even mappings of "global" things (vmalloc) need to be fixed */
PRINTK( "Fixing swap signatures... " ); __flush_tlb_global();
mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME); return 0;
PRINTK( "ok\n" );
} }
/* int swsusp_resume(void)
* This is main interface to the outside world. It needs to be
* called from process context.
*/
int software_suspend(void)
{ {
int res; int error;
if (!software_suspend_enabled) local_irq_disable();
return -EAGAIN; save_processor_state();
error = swsusp_arch_resume();
restore_processor_state();
restore_highmem();
local_irq_enable();
return error;
}
software_suspend_enabled = 0;
might_sleep();
if (arch_prepare_suspend()) {
printk("%sArchitecture failed to prepare\n", name_suspend);
return -EPERM;
}
if (pm_prepare_console())
printk( "%sCan't allocate a console... proceeding\n", name_suspend);
if (!prepare_suspend_processes()) {
/* At this point, all user processes and "dangerous"
kernel threads are stopped. Free some memory, as we
need half of memory free. */
free_some_memory();
disable_nonboot_cpus();
/* Save state of all device drivers, and stop them. */
printk("Suspending devices... ");
if ((res = device_suspend(3))==0) {
/* If stopping device drivers worked, we proceed basically into
* suspend_save_image.
*
* do_magic(0) returns after system is resumed.
*
* do_magic() copies all "used" memory to "free" memory, then
* unsuspends all device drivers, and writes memory to disk
* using normal kernel mechanism.
*/
do_magic(0);
}
thaw_processes();
enable_nonboot_cpus();
} else
res = -EBUSY;
software_suspend_enabled = 1;
MDELAY(1000);
pm_restore_console();
return res;
}
/* More restore stuff */ /* More restore stuff */
...@@ -874,14 +893,14 @@ int software_suspend(void) ...@@ -874,14 +893,14 @@ int software_suspend(void)
/* /*
* Returns true if given address/order collides with any orig_address * Returns true if given address/order collides with any orig_address
*/ */
static int does_collide_order(suspend_pagedir_t *pagedir, unsigned long addr, static int __init does_collide_order(suspend_pagedir_t *pagedir, unsigned long addr,
int order) int order)
{ {
int i; int i;
unsigned long addre = addr + (PAGE_SIZE<<order); unsigned long addre = addr + (PAGE_SIZE<<order);
for(i=0; i < nr_copy_pages; i++) for (i=0; i < nr_copy_pages; i++)
if((pagedir+i)->orig_address >= addr && if ((pagedir+i)->orig_address >= addr &&
(pagedir+i)->orig_address < addre) (pagedir+i)->orig_address < addre)
return 1; return 1;
...@@ -892,7 +911,7 @@ static int does_collide_order(suspend_pagedir_t *pagedir, unsigned long addr, ...@@ -892,7 +911,7 @@ static int does_collide_order(suspend_pagedir_t *pagedir, unsigned long addr,
* We check here that pagedir & pages it points to won't collide with pages * We check here that pagedir & pages it points to won't collide with pages
* where we're going to restore from the loaded pages later * where we're going to restore from the loaded pages later
*/ */
static int check_pagedir(void) static int __init check_pagedir(void)
{ {
int i; int i;
...@@ -910,7 +929,7 @@ static int check_pagedir(void) ...@@ -910,7 +929,7 @@ static int check_pagedir(void)
return 0; return 0;
} }
static int relocate_pagedir(void) static int __init swsusp_pagedir_relocate(void)
{ {
/* /*
* We have to avoid recursion (not to overflow kernel stack), * We have to avoid recursion (not to overflow kernel stack),
...@@ -923,9 +942,9 @@ static int relocate_pagedir(void) ...@@ -923,9 +942,9 @@ static int relocate_pagedir(void)
printk("Relocating pagedir "); printk("Relocating pagedir ");
if(!does_collide_order(old_pagedir, (unsigned long)old_pagedir, pagedir_order)) { if (!does_collide_order(old_pagedir, (unsigned long)old_pagedir, pagedir_order)) {
printk("not necessary\n"); printk("not necessary\n");
return 0; return check_pagedir();
} }
while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order)) != NULL) { while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order)) != NULL) {
...@@ -953,283 +972,253 @@ static int relocate_pagedir(void) ...@@ -953,283 +972,253 @@ static int relocate_pagedir(void)
free_pages((unsigned long)f, pagedir_order); free_pages((unsigned long)f, pagedir_order);
} }
printk("|\n"); printk("|\n");
return ret; return check_pagedir();
} }
/* /**
* Sanity check if this image makes sense with this kernel/swap context * Using bio to read from swap.
* I really don't think that it's foolproof but more than nothing.. * This code requires a bit more work than just using buffer heads
* but, it is the recommended way for 2.5/2.6.
* The following are to signal the beginning and end of I/O. Bios
* finish asynchronously, while we want them to happen synchronously.
* A simple atomic_t, and a wait loop take care of this problem.
*/ */
static int sanity_check_failed(char *reason) static atomic_t io_done = ATOMIC_INIT(0);
{
printk(KERN_ERR "%s%s\n", name_resume, reason);
return -EPERM;
}
static int sanity_check(struct suspend_header *sh) static void start_io(void)
{ {
if (sh->version_code != LINUX_VERSION_CODE) atomic_set(&io_done,1);
return sanity_check_failed("Incorrect kernel version");
if (sh->num_physpages != num_physpages)
return sanity_check_failed("Incorrect memory size");
if (strncmp(sh->machine, system_utsname.machine, 8))
return sanity_check_failed("Incorrect machine type");
if (strncmp(sh->version, system_utsname.version, 20))
return sanity_check_failed("Incorrect version");
if (sh->num_cpus != num_online_cpus())
return sanity_check_failed("Incorrect number of cpus");
if (sh->page_size != PAGE_SIZE)
return sanity_check_failed("Incorrect PAGE_SIZE");
return 0;
} }
static int bdev_read_page(struct block_device *bdev, long pos, void *buf) static int end_io(struct bio * bio, unsigned int num, int err)
{ {
struct buffer_head *bh; atomic_set(&io_done,0);
BUG_ON (pos%PAGE_SIZE);
bh = __bread(bdev, pos/PAGE_SIZE, PAGE_SIZE);
if (!bh || (!bh->b_data)) {
return -1;
}
memcpy(buf, bh->b_data, PAGE_SIZE); /* FIXME: may need kmap() */
BUG_ON(!buffer_uptodate(bh));
brelse(bh);
return 0; return 0;
} }
static int bdev_write_page(struct block_device *bdev, long pos, void *buf) static void wait_io(void)
{ {
#if 0 while(atomic_read(&io_done))
struct buffer_head *bh; io_schedule();
BUG_ON (pos%PAGE_SIZE);
bh = __bread(bdev, pos/PAGE_SIZE, PAGE_SIZE);
if (!bh || (!bh->b_data)) {
return -1;
}
memcpy(bh->b_data, buf, PAGE_SIZE); /* FIXME: may need kmap() */
BUG_ON(!buffer_uptodate(bh));
generic_make_request(WRITE, bh);
if (!buffer_uptodate(bh))
printk(KERN_CRIT "%sWarning %s: Fixing swap signatures unsuccessful...\n", name_resume, resume_file);
wait_on_buffer(bh);
brelse(bh);
return 0;
#endif
printk(KERN_CRIT "%sWarning %s: Fixing swap signatures unimplemented...\n", name_resume, resume_file);
return 0;
} }
extern dev_t __init name_to_dev_t(const char *line);
static int __init __read_suspend_image(struct block_device *bdev, union diskpage *cur, int noresume) static struct block_device * resume_bdev;
{
swp_entry_t next;
int i, nr_pgdir_pages;
#define PREPARENEXT \ /**
{ next = cur->link.next; \ * submit - submit BIO request.
next.val = swp_offset(next) * PAGE_SIZE; \ * @rw: READ or WRITE.
} * @off physical offset of page.
* @page: page we're reading or writing.
*
* Straight from the textbook - allocate and initialize the bio.
* If we're writing, make sure the page is marked as dirty.
* Then submit it and wait.
*/
if (bdev_read_page(bdev, 0, cur)) return -EIO; static int submit(int rw, pgoff_t page_off, void * page)
{
int error = 0;
struct bio * bio;
if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)) || bio = bio_alloc(GFP_ATOMIC, 1);
(!memcmp("SWAPSPACE2",cur->swh.magic.magic,10))) { if (!bio)
printk(KERN_ERR "%sThis is normal swap space\n", name_resume ); return -ENOMEM;
return -EINVAL; bio->bi_sector = page_off * (PAGE_SIZE >> 9);
bio_get(bio);
bio->bi_bdev = resume_bdev;
bio->bi_end_io = end_io;
if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) {
printk("swsusp: ERROR: adding page to bio at %ld\n",page_off);
error = -EFAULT;
goto Done;
} }
PREPARENEXT; /* We have to read next position before we overwrite it */ if (rw == WRITE)
bio_set_pages_dirty(bio);
if (!memcmp("S1",cur->swh.magic.magic,2)) start_io();
memcpy(cur->swh.magic.magic,"SWAP-SPACE",10); submit_bio(rw | (1 << BIO_RW_SYNC), bio);
else if (!memcmp("S2",cur->swh.magic.magic,2)) wait_io();
memcpy(cur->swh.magic.magic,"SWAPSPACE2",10); Done:
else { bio_put(bio);
if (noresume) return error;
return -EINVAL; }
panic("%sUnable to find suspended-data signature (%.10s - misspelled?\n",
name_resume, cur->swh.magic.magic);
}
if (noresume) {
/* We don't do a sanity check here: we want to restore the swap
whatever version of kernel made the suspend image;
We need to write swap, but swap is *not* enabled so
we must write the device directly */
printk("%s: Fixing swap signatures %s...\n", name_resume, resume_file);
bdev_write_page(bdev, 0, cur);
}
printk( "%sSignature found, resuming\n", name_resume ); int bio_read_page(pgoff_t page_off, void * page)
MDELAY(1000); {
return submit(READ, page_off, page);
}
if (bdev_read_page(bdev, next.val, cur)) return -EIO; int bio_write_page(pgoff_t page_off, void * page)
if (sanity_check(&cur->sh)) /* Is this same machine? */ {
return -EPERM; return submit(WRITE, page_off, page);
PREPARENEXT; }
pagedir_save = cur->sh.suspend_pagedir; /*
nr_copy_pages = cur->sh.num_pbes; * Sanity check if this image makes sense with this kernel/swap context
nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages); * I really don't think that it's foolproof but more than nothing..
pagedir_order = get_bitmask_order(nr_pgdir_pages); */
pagedir_nosave = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC, pagedir_order); static const char * __init sanity_check(void)
if (!pagedir_nosave) {
return -ENOMEM; dump_info();
if(swsusp_info.version_code != LINUX_VERSION_CODE)
return "kernel version";
if(swsusp_info.num_physpages != num_physpages)
return "memory size";
if (strcmp(swsusp_info.uts.sysname,system_utsname.sysname))
return "system type";
if (strcmp(swsusp_info.uts.release,system_utsname.release))
return "kernel release";
if (strcmp(swsusp_info.uts.version,system_utsname.version))
return "version";
if (strcmp(swsusp_info.uts.machine,system_utsname.machine))
return "machine";
if(swsusp_info.cpus != num_online_cpus())
return "number of cpus";
return NULL;
}
PRINTK( "%sReading pagedir, ", name_resume );
/* We get pages in reverse order of saving! */ static int __init check_header(void)
for (i=nr_pgdir_pages-1; i>=0; i--) { {
BUG_ON (!next.val); const char * reason = NULL;
cur = (union diskpage *)((char *) pagedir_nosave)+i; int error;
if (bdev_read_page(bdev, next.val, cur)) return -EIO;
PREPARENEXT;
}
BUG_ON (next.val);
if (relocate_pagedir()) if ((error = bio_read_page(swp_offset(swsusp_header.swsusp_info), &swsusp_info)))
return -ENOMEM; return error;
if (check_pagedir())
return -ENOMEM;
printk( "Reading image data (%d pages): ", nr_copy_pages ); /* Is this same machine? */
for(i=0; i < nr_copy_pages; i++) { if ((reason = sanity_check())) {
swp_entry_t swap_address = (pagedir_nosave+i)->swap_address; printk(KERN_ERR "swsusp: Resume mismatch: %s\n",reason);
if (!(i%100)) return -EPERM;
printk( "." );
/* You do not need to check for overlaps...
... check_pagedir already did this work */
if (bdev_read_page(bdev, swp_offset(swap_address) * PAGE_SIZE, (char *)((pagedir_nosave+i)->address)))
return -EIO;
} }
printk( "|\n" ); nr_copy_pages = swsusp_info.image_pages;
return 0; return error;
} }
static int __init read_suspend_image(const char * specialfile, int noresume) static int __init check_sig(void)
{ {
union diskpage *cur;
unsigned long scratch_page = 0;
int error; int error;
char b[BDEVNAME_SIZE];
resume_device = name_to_dev_t(specialfile);
scratch_page = get_zeroed_page(GFP_ATOMIC);
cur = (void *) scratch_page;
if (cur) {
struct block_device *bdev;
printk("Resuming from device %s\n",
__bdevname(resume_device, b));
bdev = open_by_devnum(resume_device, FMODE_READ);
if (IS_ERR(bdev)) {
error = PTR_ERR(bdev);
} else {
set_blocksize(bdev, PAGE_SIZE);
error = __read_suspend_image(bdev, cur, noresume);
blkdev_put(bdev);
}
} else error = -ENOMEM;
if (scratch_page) memset(&swsusp_header, 0, sizeof(swsusp_header));
free_page(scratch_page); if ((error = bio_read_page(0, &swsusp_header)))
switch (error) { return error;
case 0: if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) {
PRINTK("Reading resume file was successful\n"); memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10);
break;
case -EINVAL: /*
break; * Reset swap signature now.
case -EIO: */
printk( "%sI/O error\n", name_resume); error = bio_write_page(0, &swsusp_header);
break; } else {
case -ENOENT: pr_debug(KERN_ERR "swsusp: Invalid partition type.\n");
printk( "%s%s: No such file or directory\n", name_resume, specialfile); return -EINVAL;
break;
case -ENOMEM:
printk( "%sNot enough memory\n", name_resume);
break;
default:
printk( "%sError %d resuming\n", name_resume, error );
} }
MDELAY(1000); if (!error)
pr_debug("swsusp: Signature found, resuming\n");
return error; return error;
} }
/** /**
* software_resume - Resume from a saved image. * swsusp_read_data - Read image pages from swap.
*
* Called as a late_initcall (so all devices are discovered and
* initialized), we call swsusp to see if we have a saved image or not.
* If so, we quiesce devices, then restore the saved image. We will
* return above (in pm_suspend_disk() ) if everything goes well.
* Otherwise, we fail gracefully and return to the normally
* scheduled program.
* *
* You do not need to check for overlaps, check_pagedir()
* already did that.
*/ */
static int __init software_resume(void)
{
if (num_online_cpus() > 1) {
printk(KERN_WARNING "Software Suspend has malfunctioning SMP support. Disabled :(\n");
return -EINVAL;
}
/* We enable the possibility of machine suspend */
software_suspend_enabled = 1;
if (!resume_status)
return 0;
printk( "%s", name_resume ); static int __init data_read(void)
if (resume_status == NORESUME) { {
if(resume_file[0]) struct pbe * p;
read_suspend_image(resume_file, 1); int error;
printk( "disabled\n" ); int i;
return 0;
}
MDELAY(1000);
if (pm_prepare_console()) if ((error = swsusp_pagedir_relocate()))
printk("swsusp: Can't allocate a console... proceeding\n"); return error;
if (!resume_file[0] && resume_status == RESUME_SPECIFIED) { printk( "Reading image data (%d pages): ", nr_copy_pages );
printk( "suspension device unspecified\n" ); for(i = 0, p = pagedir_nosave; i < nr_copy_pages && !error; i++, p++) {
return -EINVAL; if (!(i%100))
printk( "." );
error = bio_read_page(swp_offset(p->swap_address),
(void *)p->address);
} }
printk(" %d done.\n",i);
return error;
printk( "resuming from %s\n", resume_file);
if (read_suspend_image(resume_file, 0))
goto read_failure;
/* FIXME: Should we stop processes here, just to be safer? */
disable_nonboot_cpus();
device_suspend(3);
do_magic(1);
panic("This never returns");
read_failure:
pm_restore_console();
return 0;
} }
late_initcall(software_resume); extern dev_t __init name_to_dev_t(const char *line);
static int __init resume_setup(char *str) static int __init read_pagedir(void)
{ {
if (resume_status == NORESUME) unsigned long addr;
return 1; int i, n = swsusp_info.pagedir_pages;
int error = 0;
strncpy( resume_file, str, 255 ); pagedir_order = get_bitmask_order(n);
resume_status = RESUME_SPECIFIED;
return 1; addr =__get_free_pages(GFP_ATOMIC, pagedir_order);
if (!addr)
return -ENOMEM;
pagedir_nosave = (struct pbe *)addr;
pr_debug("pmdisk: Reading pagedir (%d Pages)\n",n);
for (i = 0; i < n && !error; i++, addr += PAGE_SIZE) {
unsigned long offset = swp_offset(swsusp_info.pagedir[i]);
if (offset)
error = bio_read_page(offset, (void *)addr);
else
error = -EFAULT;
}
if (error)
free_pages((unsigned long)pagedir_nosave, pagedir_order);
return error;
} }
static int __init noresume_setup(char *str) static int __init read_suspend_image(void)
{ {
resume_status = NORESUME; int error = 0;
return 1;
if ((error = check_sig()))
return error;
if ((error = check_header()))
return error;
if ((error = read_pagedir()))
return error;
if ((error = data_read()))
free_pages((unsigned long)pagedir_nosave, pagedir_order);
return error;
} }
__setup("noresume", noresume_setup); /**
__setup("resume=", resume_setup); * pmdisk_read - Read saved image from swap.
*/
int __init swsusp_read(void)
{
int error;
if (!strlen(resume_file))
return -ENOENT;
EXPORT_SYMBOL(software_suspend); resume_device = name_to_dev_t(resume_file);
EXPORT_SYMBOL(software_suspend_enabled); pr_debug("swsusp: Resume From Partition: %s\n", resume_file);
resume_bdev = open_by_devnum(resume_device, FMODE_READ);
if (!IS_ERR(resume_bdev)) {
set_blocksize(resume_bdev, PAGE_SIZE);
error = read_suspend_image();
blkdev_put(resume_bdev);
} else
error = PTR_ERR(resume_bdev);
if (!error)
pr_debug("Reading resume file was successful\n");
else
pr_debug("pmdisk: Error %d resuming\n", error);
return error;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment