diff --git a/arch/i386/power/Makefile b/arch/i386/power/Makefile index 2e1c9ab34d4cb313b82b34482a4b420ae00a88bc..8cfa4e8a719d6ea2dd9cf140862cb2c6a64b669a 100644 --- a/arch/i386/power/Makefile +++ b/arch/i386/power/Makefile @@ -1,3 +1,2 @@ obj-$(CONFIG_PM) += cpu.o -obj-$(CONFIG_PM_DISK) += pmdisk.o obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o diff --git a/arch/i386/power/pmdisk.S b/arch/i386/power/pmdisk.S deleted file mode 100644 index b8106ae2339712d1352b69bf9020749b71176fd9..0000000000000000000000000000000000000000 --- a/arch/i386/power/pmdisk.S +++ /dev/null @@ -1,56 +0,0 @@ -/* Originally gcc generated, modified by hand */ - -#include <linux/linkage.h> -#include <asm/segment.h> -#include <asm/page.h> - - .text - -ENTRY(pmdisk_arch_suspend) - cmpl $0,4(%esp) - jne .L1450 - - movl %esp, saved_context_esp - movl %ebx, saved_context_ebx - movl %ebp, saved_context_ebp - movl %esi, saved_context_esi - movl %edi, saved_context_edi - pushfl ; popl saved_context_eflags - - call pmdisk_suspend - jmp .L1449 - .p2align 4,,7 -.L1450: - movl $swsusp_pg_dir-__PAGE_OFFSET,%ecx - movl %ecx,%cr3 - - movl pm_pagedir_nosave,%ebx - xorl %eax, %eax - xorl %edx, %edx - .p2align 4,,7 -.L1455: - movl 4(%ebx,%edx),%edi - movl (%ebx,%edx),%esi - - movl $1024, %ecx - rep - movsl - - movl %cr3, %ecx; - movl %ecx, %cr3; # flush TLB - - incl %eax - addl $16, %edx - cmpl pmdisk_pages,%eax - jb .L1455 - .p2align 4,,7 -.L1453: - movl saved_context_esp, %esp - movl saved_context_ebp, %ebp - movl saved_context_ebx, %ebx - movl saved_context_esi, %esi - movl saved_context_edi, %edi - pushl saved_context_eflags ; popfl - call pmdisk_resume -.L1449: - ret diff --git a/arch/i386/power/swsusp.S b/arch/i386/power/swsusp.S index 8e4a7bacaadf1f34162da2eb95b711db99bf22f3..221ac27a204f03c3978edfea38785e4c4a3bff71 100644 --- a/arch/i386/power/swsusp.S +++ b/arch/i386/power/swsusp.S @@ -15,83 +15,47 @@ .text -ENTRY(do_magic) - pushl %ebx - cmpl $0,8(%esp) - jne resume - call do_magic_suspend_1 - call save_processor_state +ENTRY(swsusp_arch_suspend) movl %esp, saved_context_esp - movl %eax, saved_context_eax movl %ebx, saved_context_ebx - movl %ecx, saved_context_ecx - movl %edx, saved_context_edx movl %ebp, saved_context_ebp movl %esi, saved_context_esi movl %edi, saved_context_edi pushfl ; popl saved_context_eflags - call do_magic_suspend_2 - popl %ebx + call swsusp_save ret -resume: +ENTRY(swsusp_arch_resume) movl $swsusp_pg_dir-__PAGE_OFFSET,%ecx movl %ecx,%cr3 - call do_magic_resume_1 - movl $0,loop - cmpl $0,nr_copy_pages - je copy_done -copy_loop: - movl $0,loop2 + movl pagedir_nosave,%ebx + xorl %eax, %eax + xorl %edx, %edx .p2align 4,,7 -copy_one_page: - movl pagedir_nosave,%ecx - movl loop,%eax - movl loop2,%edx - sall $4,%eax - movl 4(%ecx,%eax),%ebx - movl (%ecx,%eax),%eax - movb (%edx,%eax),%al - movb %al,(%edx,%ebx) - movl loop2,%eax - leal 1(%eax),%edx - movl %edx,loop2 - movl %edx,%eax - cmpl $4095,%eax - jbe copy_one_page - movl loop,%eax - leal 1(%eax),%edx - movl %edx,loop - movl %edx,%eax - cmpl nr_copy_pages,%eax - jb copy_loop +copy_loop: + movl 4(%ebx,%edx),%edi + movl (%ebx,%edx),%esi + + movl $1024, %ecx + rep + movsl -copy_done: - movl $__USER_DS,%eax + incl %eax + addl $16, %edx + cmpl nr_copy_pages,%eax + jb copy_loop + .p2align 4,,7 - movw %ax, %ds - movw %ax, %es movl saved_context_esp, %esp movl saved_context_ebp, %ebp - movl saved_context_eax, %eax movl saved_context_ebx, %ebx - movl saved_context_ecx, %ecx - movl saved_context_edx, %edx movl saved_context_esi, %esi movl saved_context_edi, %edi - call restore_processor_state + pushl saved_context_eflags ; popfl - call do_magic_resume_2 - popl %ebx + call swsusp_restore ret - - .section .data.nosave -loop: - .quad 0 -loop2: - .quad 0 - .previous diff --git a/arch/x86_64/kernel/suspend_asm.S b/arch/x86_64/kernel/suspend_asm.S index 99a1adf868a748bb74658e0d8cf22a1d9a555182..48ad06289d49570bd1d10c3e567bd3cee8214049 100644 --- a/arch/x86_64/kernel/suspend_asm.S +++ b/arch/x86_64/kernel/suspend_asm.S @@ -1,22 +1,18 @@ -/* originally gcc generated, but now changed. don't overwrite. */ +/* Originally gcc generated, modified by hand + * + * This may not use any stack, nor any variable that is not "NoSave": + * + * Its rewriting one kernel image with another. What is stack in "old" + * image could very well be data page in "new" image, and overwriting + * your own stack under you is bad idea. + */ .text #include <linux/linkage.h> #include <asm/segment.h> #include <asm/page.h> -/* Input: - * rdi resume flag - */ - -ENTRY(do_magic) -.LFB5: - subq $8, %rsp -.LCFI2: - testl %edi, %edi - jne .L90 - call do_magic_suspend_1 - call save_processor_state +ENTRY(swsusp_arch_suspend) movq %rsp, saved_context_esp(%rip) movq %rax, saved_context_eax(%rip) @@ -36,9 +32,10 @@ ENTRY(do_magic) movq %r15, saved_context_r15(%rip) pushfq ; popq saved_context_eflags(%rip) - addq $8, %rsp - jmp do_magic_suspend_2 -.L90: + call swsusp_save + ret + +ENTRY(swsusp_arch_resume) /* set up cr3 */ leaq init_level4_pgt(%rip),%rax subq $__START_KERNEL_map,%rax @@ -53,7 +50,6 @@ ENTRY(do_magic) movq %rcx, %cr3; movq %rax, %cr4; # turn PGE back on - call do_magic_resume_1 movl nr_copy_pages(%rip), %eax xorl %ecx, %ecx movq $0, loop(%rip) @@ -113,9 +109,8 @@ ENTRY(do_magic) movq saved_context_r14(%rip), %r14 movq saved_context_r15(%rip), %r15 pushq saved_context_eflags(%rip) ; popfq - call restore_processor_state - addq $8, %rsp - jmp do_magic_resume_2 + call swsusp_restore + ret .section .data.nosave loop: diff --git a/include/linux/suspend.h b/include/linux/suspend.h index d0955f06c9b4aba49ef8ee92d9437b73147c42c3..932dc2ca41527fb06fdef7a7c4e4710362ae4e8b 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -23,16 +23,6 @@ typedef struct pbe { #define SWAP_FILENAME_MAXLENGTH 32 -struct suspend_header { - u32 version_code; - unsigned long num_physpages; - char machine[8]; - char version[20]; - int num_cpus; - int page_size; - suspend_pagedir_t *suspend_pagedir; - unsigned int num_pbes; -}; #define SUSPEND_PD_PAGES(x) (((x)*sizeof(struct pbe))/PAGE_SIZE+1) @@ -45,16 +35,12 @@ extern void drain_local_pages(void); /* kernel/power/swsusp.c */ extern int software_suspend(void); -extern unsigned int nr_copy_pages __nosavedata; -extern suspend_pagedir_t *pagedir_nosave __nosavedata; - #else /* CONFIG_SOFTWARE_SUSPEND */ static inline int software_suspend(void) { printk("Warning: fake suspend called\n"); return -EPERM; } -#define software_resume() do { } while(0) #endif /* CONFIG_SOFTWARE_SUSPEND */ @@ -78,12 +64,6 @@ static inline void disable_nonboot_cpus(void) {} static inline void enable_nonboot_cpus(void) {} #endif -asmlinkage void do_magic(int is_resume); -asmlinkage void do_magic_resume_1(void); -asmlinkage void do_magic_resume_2(void); -asmlinkage void do_magic_suspend_1(void); -asmlinkage void do_magic_suspend_2(void); - void save_processor_state(void); void restore_processor_state(void); struct saved_context; diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 6bb62269f3eb8cd6a2913eac6c03c06e6dd20177..67f955286b6c9598b776bb8d56a8951c1dd9c4eb 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -18,6 +18,13 @@ config PM will issue the hlt instruction if nothing is to be done, thereby sending the processor to sleep and saving power. +config PM_DEBUG + bool "Power Management Debug Support" + ---help--- + This option enables verbose debugging support in the Power Management + code. This is helpful when debugging and reporting various PM bugs, + like suspend support. + config SOFTWARE_SUSPEND bool "Software Suspend (EXPERIMENTAL)" depends on EXPERIMENTAL && PM && SWAP @@ -42,33 +49,12 @@ config SOFTWARE_SUSPEND For more information take a look at Documentation/power/swsusp.txt. -config PM_DISK - bool "Suspend-to-Disk Support" - depends on PM && SWAP && X86 && !X86_64 - ---help--- - Suspend-to-disk is a power management state in which the contents - of memory are stored on disk and the entire system is shut down or - put into a low-power state (e.g. ACPI S4). When the computer is - turned back on, the stored image is loaded from disk and execution - resumes from where it left off before suspending. - - This config option enables the core infrastructure necessary to - perform the suspend and resume transition. - - Currently, this suspend-to-disk implementation is based on a forked - version of the swsusp code base. As such, it's still experimental, - and still relies on CONFIG_SWAP. - - More information can be found in Documentation/power/. - - If unsure, Say N. - -config PM_DISK_PARTITION +config PM_STD_PARTITION string "Default resume partition" - depends on PM_DISK + depends on SOFTWARE_SUSPEND default "" ---help--- - The default resume partition is the partition that the pmdisk suspend- + The default resume partition is the partition that the suspend- to-disk implementation will look for a suspended disk image. The partition specified here will be different for almost every user. @@ -77,16 +63,10 @@ config PM_DISK_PARTITION The partition specified can be overridden by specifying: - pmdisk=/dev/<other device> + resume=/dev/<other device> which will set the resume partition to the device specified. - One may also do: - - pmdisk=off - - to inform the kernel not to perform a resume transition. - Note there is currently not a way to specify which device to save the suspended image to. It will simply pick the first available swap device. diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 2509213f50f27e0d180b10236a0bfcca773db2d0..fbdc634135a765a90b04eb224c2c38126747340c 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -1,8 +1,11 @@ +ifeq ($(CONFIG_PM_DEBUG),y) +EXTRA_CFLAGS += -DDEBUG +endif + swsusp-smp-$(CONFIG_SMP) += smp.o obj-y := main.o process.o console.o pm.o -obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o $(swsusp-smp-y) -obj-$(CONFIG_PM_DISK) += disk.o pmdisk.o +obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o $(swsusp-smp-y) disk.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 6abcf99b7ada01e3450c5f260d97149daa7b3f25..f09de7ad0eff5d739e24248a3bc0e7669f3e0d94 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -8,13 +8,11 @@ * */ -#define DEBUG - - #include <linux/suspend.h> #include <linux/syscalls.h> #include <linux/reboot.h> #include <linux/string.h> +#include <linux/device.h> #include <linux/delay.h> #include <linux/fs.h> #include "power.h" @@ -23,13 +21,16 @@ extern u32 pm_disk_mode; extern struct pm_ops * pm_ops; -extern int pmdisk_save(void); -extern int pmdisk_write(void); -extern int pmdisk_read(void); -extern int pmdisk_restore(void); -extern int pmdisk_free(void); +extern int swsusp_suspend(void); +extern int swsusp_write(void); +extern int swsusp_read(void); +extern int swsusp_resume(void); +extern int swsusp_free(void); +static int noresume = 0; +char resume_file[256] = CONFIG_PM_STD_PARTITION; + /** * power_down - Shut machine down for hibernate. * @mode: Suspend-to-disk mode @@ -46,16 +47,18 @@ static int power_down(u32 mode) int error = 0; local_irq_save(flags); - device_power_down(PM_SUSPEND_DISK); switch(mode) { case PM_DISK_PLATFORM: + device_power_down(PM_SUSPEND_DISK); error = pm_ops->enter(PM_SUSPEND_DISK); break; case PM_DISK_SHUTDOWN: printk("Powering off system\n"); + device_shutdown(); machine_power_off(); break; case PM_DISK_REBOOT: + device_shutdown(); machine_restart(NULL); break; } @@ -99,6 +102,7 @@ static void finish(void) { device_resume(); platform_finish(); + enable_nonboot_cpus(); thaw_processes(); pm_restore_console(); } @@ -126,6 +130,7 @@ static int prepare(void) /* Free memory before shutting down devices. */ free_some_memory(); + disable_nonboot_cpus(); if ((error = device_suspend(PM_SUSPEND_DISK))) goto Finish; @@ -133,6 +138,7 @@ static int prepare(void) Finish: platform_finish(); Thaw: + enable_nonboot_cpus(); thaw_processes(); pm_restore_console(); return error; @@ -161,7 +167,7 @@ int pm_suspend_disk(void) pr_debug("PM: snapshotting memory.\n"); in_suspend = 1; - if ((error = pmdisk_save())) + if ((error = swsusp_suspend())) goto Done; if (in_suspend) { @@ -173,14 +179,14 @@ int pm_suspend_disk(void) mb(); barrier(); - error = pmdisk_write(); + error = swsusp_write(); if (!error) { error = power_down(pm_disk_mode); pr_debug("PM: Power down failed.\n"); } } else pr_debug("PM: Image restored successfully.\n"); - pmdisk_free(); + swsusp_free(); Done: finish(); return error; @@ -188,7 +194,7 @@ int pm_suspend_disk(void) /** - * pm_resume - Resume from a saved image. + * software_resume - Resume from a saved image. * * Called as a late_initcall (so all devices are discovered and * initialized), we call pmdisk to see if we have a saved image or not. @@ -199,13 +205,21 @@ int pm_suspend_disk(void) * */ -static int pm_resume(void) +static int software_resume(void) { int error; + if (noresume) { + /** + * FIXME: If noresume is specified, we need to find the partition + * and reset it back to normal swap space. + */ + return 0; + } + pr_debug("PM: Reading pmdisk image.\n"); - if ((error = pmdisk_read())) + if ((error = swsusp_read())) goto Done; pr_debug("PM: Preparing system for restore.\n"); @@ -216,28 +230,18 @@ static int pm_resume(void) barrier(); mb(); - /* FIXME: The following (comment and mdelay()) are from swsusp. - * Are they really necessary? - * - * We do not want some readahead with DMA to corrupt our memory, right? - * Do it with disabled interrupts for best effect. That way, if some - * driver scheduled DMA, we have good chance for DMA to finish ;-). - */ - pr_debug("PM: Waiting for DMAs to settle down.\n"); - mdelay(1000); - pr_debug("PM: Restoring saved image.\n"); - pmdisk_restore(); + swsusp_resume(); pr_debug("PM: Restore failed, recovering.n"); finish(); Free: - pmdisk_free(); + swsusp_free(); Done: pr_debug("PM: Resume from disk failed.\n"); return 0; } -late_initcall(pm_resume); +late_initcall(software_resume); static char * pm_disk_modes[] = { @@ -336,3 +340,22 @@ static int __init pm_disk_init(void) } core_initcall(pm_disk_init); + + +static int __init resume_setup(char *str) +{ + if (noresume) + return 1; + + strncpy( resume_file, str, 255 ); + return 1; +} + +static int __init noresume_setup(char *str) +{ + noresume = 1; + return 1; +} + +__setup("noresume", noresume_setup); +__setup("resume=", resume_setup); diff --git a/kernel/power/main.c b/kernel/power/main.c index d582906fecc619ebb19fed35dc07a5303c039f28..3461308043b9a674fef65a8d28c0a2635ccfb2f1 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -8,8 +8,6 @@ * */ -#define DEBUG - #include <linux/suspend.h> #include <linux/kobject.h> #include <linux/string.h> @@ -169,6 +167,15 @@ static int enter_state(u32 state) return error; } +/* + * This is main interface to the outside world. It needs to be + * called from process context. + */ +int software_suspend(void) +{ + return enter_state(PM_SUSPEND_DISK); +} + /** * pm_suspend - Externally visible function for suspending system. diff --git a/kernel/power/pmdisk.c b/kernel/power/pmdisk.c deleted file mode 100644 index 318bfb9fa5f8b846e99a63051df7326f2182b80a..0000000000000000000000000000000000000000 --- a/kernel/power/pmdisk.c +++ /dev/null @@ -1,1166 +0,0 @@ -/* - * kernel/power/pmdisk.c - Suspend-to-disk implmentation - * - * This STD implementation is initially derived from swsusp (suspend-to-swap). - * The original copyright on that was: - * - * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu> - * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz> - * - * The additional parts are: - * - * Copyright (C) 2003 Patrick Mochel - * Copyright (C) 2003 Open Source Development Lab - * - * This file is released under the GPLv2. - * - * For more information, please see the text files in Documentation/power/ - * - */ - -#undef DEBUG - -#include <linux/mm.h> -#include <linux/bio.h> -#include <linux/suspend.h> -#include <linux/version.h> -#include <linux/reboot.h> -#include <linux/device.h> -#include <linux/swapops.h> -#include <linux/bootmem.h> -#include <linux/utsname.h> - -#include <asm/mmu_context.h> - -#include "power.h" - - -extern asmlinkage int pmdisk_arch_suspend(int resume); - -#define __ADDRESS(x) ((unsigned long) phys_to_virt(x)) -#define ADDRESS(x) __ADDRESS((x) << PAGE_SHIFT) -#define ADDRESS2(x) __ADDRESS(__pa(x)) /* Needed for x86-64 where some pages are in memory twice */ - -/* References to section boundaries */ -extern char __nosave_begin, __nosave_end; - -extern int is_head_of_free_region(struct page *); - -/* Variables to be preserved over suspend */ -static int pagedir_order_check; -static int nr_copy_pages_check; - -/* For resume= kernel option */ -static char resume_file[256] = CONFIG_PM_DISK_PARTITION; - -static dev_t resume_device; -/* Local variables that should not be affected by save */ -unsigned int pmdisk_pages __nosavedata = 0; - -/* Suspend pagedir is allocated before final copy, therefore it - must be freed after resume - - Warning: this is evil. There are actually two pagedirs at time of - resume. One is "pagedir_save", which is empty frame allocated at - time of suspend, that must be freed. Second is "pagedir_nosave", - allocated at time of resume, that travels through memory not to - collide with anything. - */ -suspend_pagedir_t *pm_pagedir_nosave __nosavedata = NULL; -static suspend_pagedir_t *pagedir_save; -static int pagedir_order __nosavedata = 0; - - -struct pmdisk_info { - struct new_utsname uts; - u32 version_code; - unsigned long num_physpages; - int cpus; - unsigned long image_pages; - unsigned long pagedir_pages; - swp_entry_t pagedir[768]; -} __attribute__((aligned(PAGE_SIZE))) pmdisk_info; - - - -#define PMDISK_SIG "pmdisk-swap1" - -struct pmdisk_header { - char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)]; - swp_entry_t pmdisk_info; - char orig_sig[10]; - char sig[10]; -} __attribute__((packed, aligned(PAGE_SIZE))) pmdisk_header; - -/* - * XXX: We try to keep some more pages free so that I/O operations succeed - * without paging. Might this be more? - */ -#define PAGES_FOR_IO 512 - - -/* - * Saving part... - */ - - -/* We memorize in swapfile_used what swap devices are used for suspension */ -#define SWAPFILE_UNUSED 0 -#define SWAPFILE_SUSPEND 1 /* This is the suspending device */ -#define SWAPFILE_IGNORED 2 /* Those are other swap devices ignored for suspension */ - -static unsigned short swapfile_used[MAX_SWAPFILES]; -static unsigned short root_swap; - - -static int mark_swapfiles(swp_entry_t prev) -{ - int error; - - rw_swap_page_sync(READ, - swp_entry(root_swap, 0), - virt_to_page((unsigned long)&pmdisk_header)); - if (!memcmp("SWAP-SPACE",pmdisk_header.sig,10) || - !memcmp("SWAPSPACE2",pmdisk_header.sig,10)) { - memcpy(pmdisk_header.orig_sig,pmdisk_header.sig,10); - memcpy(pmdisk_header.sig,PMDISK_SIG,10); - pmdisk_header.pmdisk_info = prev; - error = rw_swap_page_sync(WRITE, - swp_entry(root_swap, 0), - virt_to_page((unsigned long) - &pmdisk_header)); - } else { - pr_debug("pmdisk: Partition is not swap space.\n"); - error = -ENODEV; - } - return error; -} - -static int read_swapfiles(void) /* This is called before saving image */ -{ - int i, len; - - len=strlen(resume_file); - root_swap = 0xFFFF; - - swap_list_lock(); - for(i=0; i<MAX_SWAPFILES; i++) { - if (swap_info[i].flags == 0) { - swapfile_used[i]=SWAPFILE_UNUSED; - } else { - if(!len) { - pr_debug("pmdisk: Default resume partition not set.\n"); - if(root_swap == 0xFFFF) { - swapfile_used[i] = SWAPFILE_SUSPEND; - root_swap = i; - } else - swapfile_used[i] = SWAPFILE_IGNORED; - } else { - /* we ignore all swap devices that are not the resume_file */ - if (1) { -// FIXME if(resume_device == swap_info[i].swap_device) { - swapfile_used[i] = SWAPFILE_SUSPEND; - root_swap = i; - } else - swapfile_used[i] = SWAPFILE_IGNORED; - } - } - } - swap_list_unlock(); - return (root_swap != 0xffff) ? 0 : -ENODEV; -} - - -/* This is called after saving image so modification - will be lost after resume... and that's what we want. */ -static void lock_swapdevices(void) -{ - int i; - - swap_list_lock(); - for(i = 0; i< MAX_SWAPFILES; i++) - if(swapfile_used[i] == SWAPFILE_IGNORED) { - swap_info[i].flags ^= 0xFF; /* we make the device unusable. A new call to - lock_swapdevices can unlock the devices. */ - } - swap_list_unlock(); -} - - - -/** - * write_swap_page - Write one page to a fresh swap location. - * @addr: Address we're writing. - * @loc: Place to store the entry we used. - * - * Allocate a new swap entry and 'sync' it. Note we discard -EIO - * errors. That is an artifact left over from swsusp. It did not - * check the return of rw_swap_page_sync() at all, since most pages - * written back to swap would return -EIO. - * This is a partial improvement, since we will at least return other - * errors, though we need to eventually fix the damn code. - */ - -static int write_swap_page(unsigned long addr, swp_entry_t * loc) -{ - swp_entry_t entry; - int error = 0; - - entry = get_swap_page(); - if (swp_offset(entry) && - swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) { - error = rw_swap_page_sync(WRITE, entry, - virt_to_page(addr)); - if (error == -EIO) - error = 0; - if (!error) - *loc = entry; - } else - error = -ENOSPC; - return error; -} - - -/** - * free_data - Free the swap entries used by the saved image. - * - * Walk the list of used swap entries and free each one. - */ - -static void free_data(void) -{ - swp_entry_t entry; - int i; - - for (i = 0; i < pmdisk_pages; i++) { - entry = (pm_pagedir_nosave + i)->swap_address; - if (entry.val) - swap_free(entry); - else - break; - (pm_pagedir_nosave + i)->swap_address = (swp_entry_t){0}; - } -} - - -/** - * write_data - Write saved image to swap. - * - * Walk the list of pages in the image and sync each one to swap. - */ - -static int write_data(void) -{ - int error = 0; - int i; - - printk( "Writing data to swap (%d pages): ", pmdisk_pages ); - for (i = 0; i < pmdisk_pages && !error; i++) { - if (!(i%100)) - printk( "." ); - error = write_swap_page((pm_pagedir_nosave+i)->address, - &((pm_pagedir_nosave+i)->swap_address)); - } - printk(" %d Pages done.\n",i); - return error; -} - - -/** - * free_pagedir - Free pages used by the page directory. - */ - -static void free_pagedir_entries(void) -{ - int num = pmdisk_info.pagedir_pages; - int i; - - for (i = 0; i < num; i++) - swap_free(pmdisk_info.pagedir[i]); -} - - -/** - * write_pagedir - Write the array of pages holding the page directory. - * @last: Last swap entry we write (needed for header). - */ - -static int write_pagedir(void) -{ - unsigned long addr = (unsigned long)pm_pagedir_nosave; - int error = 0; - int n = SUSPEND_PD_PAGES(pmdisk_pages); - int i; - - pmdisk_info.pagedir_pages = n; - printk( "Writing pagedir (%d pages)\n", n); - for (i = 0; i < n && !error; i++, addr += PAGE_SIZE) - error = write_swap_page(addr,&pmdisk_info.pagedir[i]); - return error; -} - - -#ifdef DEBUG -static void dump_pmdisk_info(void) -{ - printk(" pmdisk: Version: %u\n",pmdisk_info.version_code); - printk(" pmdisk: Num Pages: %ld\n",pmdisk_info.num_physpages); - printk(" pmdisk: UTS Sys: %s\n",pmdisk_info.uts.sysname); - printk(" pmdisk: UTS Node: %s\n",pmdisk_info.uts.nodename); - printk(" pmdisk: UTS Release: %s\n",pmdisk_info.uts.release); - printk(" pmdisk: UTS Version: %s\n",pmdisk_info.uts.version); - printk(" pmdisk: UTS Machine: %s\n",pmdisk_info.uts.machine); - printk(" pmdisk: UTS Domain: %s\n",pmdisk_info.uts.domainname); - printk(" pmdisk: CPUs: %d\n",pmdisk_info.cpus); - printk(" pmdisk: Image: %ld Pages\n",pmdisk_info.image_pages); - printk(" pmdisk: Pagedir: %ld Pages\n",pmdisk_info.pagedir_pages); -} -#else -static void dump_pmdisk_info(void) -{ - -} -#endif - -static void init_header(void) -{ - memset(&pmdisk_info,0,sizeof(pmdisk_info)); - pmdisk_info.version_code = LINUX_VERSION_CODE; - pmdisk_info.num_physpages = num_physpages; - memcpy(&pmdisk_info.uts,&system_utsname,sizeof(system_utsname)); - - pmdisk_info.cpus = num_online_cpus(); - pmdisk_info.image_pages = pmdisk_pages; -} - -/** - * write_header - Fill and write the suspend header. - * @entry: Location of the last swap entry used. - * - * Allocate a page, fill header, write header. - * - * @entry is the location of the last pagedir entry written on - * entrance. On exit, it contains the location of the header. - */ - -static int write_header(swp_entry_t * entry) -{ - dump_pmdisk_info(); - return write_swap_page((unsigned long)&pmdisk_info,entry); -} - - - -/** - * write_suspend_image - Write entire image and metadata. - * - */ - -static int write_suspend_image(void) -{ - int error; - swp_entry_t prev = { 0 }; - - init_header(); - - if ((error = write_data())) - goto FreeData; - - if ((error = write_pagedir())) - goto FreePagedir; - - if ((error = write_header(&prev))) - goto FreePagedir; - - error = mark_swapfiles(prev); - Done: - return error; - FreePagedir: - free_pagedir_entries(); - FreeData: - free_data(); - goto Done; -} - - - -/** - * saveable - Determine whether a page should be cloned or not. - * @pfn: The page - * - * We save a page if it's Reserved, and not in the range of pages - * statically defined as 'unsaveable', or if it isn't reserved, and - * isn't part of a free chunk of pages. - * If it is part of a free chunk, we update @pfn to point to the last - * page of the chunk. - */ - -static int saveable(unsigned long * pfn) -{ - struct page * page = pfn_to_page(*pfn); - - if (PageNosave(page)) - return 0; - - if (!PageReserved(page)) { - int chunk_size; - - if ((chunk_size = is_head_of_free_region(page))) { - *pfn += chunk_size - 1; - return 0; - } - } else if (PageReserved(page)) { - /* Just copy whole code segment. - * Hopefully it is not that big. - */ - if ((ADDRESS(*pfn) >= (unsigned long) ADDRESS2(&__nosave_begin)) && - (ADDRESS(*pfn) < (unsigned long) ADDRESS2(&__nosave_end))) { - pr_debug("[nosave %lx]\n", ADDRESS(*pfn)); - return 0; - } - /* Hmm, perhaps copying all reserved pages is not - * too healthy as they may contain - * critical bios data? - */ - } - return 1; -} - - - -/** - * count_pages - Determine size of page directory. - * - * Iterate over all the pages in the system and tally the number - * we need to clone. - */ - -static void count_pages(void) -{ - unsigned long pfn; - int n = 0; - - for (pfn = 0; pfn < max_pfn; pfn++) { - if (saveable(&pfn)) - n++; - } - pmdisk_pages = n; -} - - -/** - * copy_pages - Atomically snapshot memory. - * - * Iterate over all the pages in the system and copy each one - * into its corresponding location in the pagedir. - * We rely on the fact that the number of pages that we're snap- - * shotting hasn't changed since we counted them. - */ - -static void copy_pages(void) -{ - struct pbe * p = pagedir_save; - unsigned long pfn; - int n = 0; - - for (pfn = 0; pfn < max_pfn; pfn++) { - if (saveable(&pfn)) { - n++; - p->orig_address = ADDRESS(pfn); - copy_page((void *) p->address, - (void *) p->orig_address); - p++; - } - } - BUG_ON(n != pmdisk_pages); -} - - -/** - * free_image_pages - Free each page allocated for snapshot. - */ - -static void free_image_pages(void) -{ - struct pbe * p; - int i; - - for (i = 0, p = pagedir_save; i < pmdisk_pages; i++, p++) { - ClearPageNosave(virt_to_page(p->address)); - free_page(p->address); - } -} - - -/** - * free_pagedir - Free the page directory. - */ - -static void free_pagedir(void) -{ - free_image_pages(); - free_pages((unsigned long)pagedir_save, pagedir_order); -} - - -static void calc_order(void) -{ - int diff; - int order; - - order = get_bitmask_order(SUSPEND_PD_PAGES(pmdisk_pages)); - pmdisk_pages += 1 << order; - do { - diff = get_bitmask_order(SUSPEND_PD_PAGES(pmdisk_pages)) - order; - if (diff) { - order += diff; - pmdisk_pages += 1 << diff; - } - } while(diff); - pagedir_order = order; -} - - -/** - * alloc_pagedir - Allocate the page directory. - * - * First, determine exactly how many contiguous pages we need, - * allocate them, then mark each 'unsavable'. - */ - -static int alloc_pagedir(void) -{ - calc_order(); - pagedir_save = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD, - pagedir_order); - if(!pagedir_save) - return -ENOMEM; - memset(pagedir_save,0,(1 << pagedir_order) * PAGE_SIZE); - pm_pagedir_nosave = pagedir_save; - return 0; -} - - -/** - * alloc_image_pages - Allocate pages for the snapshot. - * - */ - -static int alloc_image_pages(void) -{ - struct pbe * p; - int i; - - for (i = 0, p = pagedir_save; i < pmdisk_pages; i++, p++) { - p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD); - if(!p->address) - goto Error; - SetPageNosave(virt_to_page(p->address)); - } - return 0; - Error: - do { - if (p->address) - free_page(p->address); - p->address = 0; - } while (p-- > pagedir_save); - return -ENOMEM; -} - - -/** - * enough_free_mem - Make sure we enough free memory to snapshot. - * - * Returns TRUE or FALSE after checking the number of available - * free pages. - */ - -static int enough_free_mem(void) -{ - if(nr_free_pages() < (pmdisk_pages + PAGES_FOR_IO)) { - pr_debug("pmdisk: Not enough free pages: Have %d\n", - nr_free_pages()); - return 0; - } - return 1; -} - - -/** - * enough_swap - Make sure we have enough swap to save the image. - * - * Returns TRUE or FALSE after checking the total amount of swap - * space avaiable. - * - * FIXME: si_swapinfo(&i) returns all swap devices information. - * We should only consider resume_device. - */ - -static int enough_swap(void) -{ - struct sysinfo i; - - si_swapinfo(&i); - if (i.freeswap < (pmdisk_pages + PAGES_FOR_IO)) { - pr_debug("pmdisk: Not enough swap. Need %ld\n",i.freeswap); - return 0; - } - return 1; -} - - -/** - * pmdisk_suspend - Atomically snapshot the system. - * - * This must be called with interrupts disabled, to prevent the - * system changing at all from underneath us. - * - * To do this, we count the number of pages in the system that we - * need to save; make sure we have enough memory and swap to clone - * the pages and save them in swap, allocate the space to hold them, - * and then snapshot them all. - */ - -int pmdisk_suspend(void) -{ - int error = 0; - - if ((error = read_swapfiles())) - return error; - - drain_local_pages(); - - pm_pagedir_nosave = NULL; - pr_debug("pmdisk: Counting pages to copy.\n" ); - count_pages(); - - pr_debug("pmdisk: (pages needed: %d + %d free: %d)\n", - pmdisk_pages,PAGES_FOR_IO,nr_free_pages()); - - if (!enough_free_mem()) - return -ENOMEM; - - if (!enough_swap()) - return -ENOSPC; - - if ((error = alloc_pagedir())) { - pr_debug("pmdisk: Allocating pagedir failed.\n"); - return error; - } - if ((error = alloc_image_pages())) { - pr_debug("pmdisk: Allocating image pages failed.\n"); - free_pagedir(); - return error; - } - - nr_copy_pages_check = pmdisk_pages; - pagedir_order_check = pagedir_order; - - /* During allocating of suspend pagedir, new cold pages may appear. - * Kill them - */ - drain_local_pages(); - - /* copy */ - copy_pages(); - - /* - * End of critical section. From now on, we can write to memory, - * but we should not touch disk. This specially means we must _not_ - * touch swap space! Except we must write out our image of course. - */ - - pr_debug("pmdisk: %d pages copied\n", pmdisk_pages ); - return 0; -} - - -/** - * suspend_save_image - Prepare and write saved image to swap. - * - * IRQs are re-enabled here so we can resume devices and safely write - * to the swap devices. We disable them again before we leave. - * - * The second lock_swapdevices() will unlock ignored swap devices since - * writing is finished. - * It is important _NOT_ to umount filesystems at this point. We want - * them synced (in case something goes wrong) but we DO not want to mark - * filesystem clean: it is not. (And it does not matter, if we resume - * correctly, we'll mark system clean, anyway.) - */ - -static int suspend_save_image(void) -{ - int error; - device_resume(); - lock_swapdevices(); - error = write_suspend_image(); - lock_swapdevices(); - return error; -} - -/* - * Magic happens here - */ - -int pmdisk_resume(void) -{ - BUG_ON (nr_copy_pages_check != pmdisk_pages); - BUG_ON (pagedir_order_check != pagedir_order); - - /* Even mappings of "global" things (vmalloc) need to be fixed */ - __flush_tlb_global(); - return 0; -} - -/* pmdisk_arch_suspend() is implemented in arch/?/power/pmdisk.S, - and basically does: - - if (!resume) { - save_processor_state(); - SAVE_REGISTERS - return pmdisk_suspend(); - } - GO_TO_SWAPPER_PAGE_TABLES - COPY_PAGES_BACK - RESTORE_REGISTERS - restore_processor_state(); - return pmdisk_resume(); - - */ - - -/* More restore stuff */ - -#define does_collide(addr) does_collide_order(pm_pagedir_nosave, addr, 0) - -/* - * Returns true if given address/order collides with any orig_address - */ -static int __init does_collide_order(suspend_pagedir_t *pagedir, - unsigned long addr, int order) -{ - int i; - unsigned long addre = addr + (PAGE_SIZE<<order); - - for(i=0; i < pmdisk_pages; i++) - if((pagedir+i)->orig_address >= addr && - (pagedir+i)->orig_address < addre) - return 1; - - return 0; -} - -/* - * We check here that pagedir & pages it points to won't collide with pages - * where we're going to restore from the loaded pages later - */ -static int __init check_pagedir(void) -{ - int i; - - for(i=0; i < pmdisk_pages; i++) { - unsigned long addr; - - do { - addr = get_zeroed_page(GFP_ATOMIC); - if(!addr) - return -ENOMEM; - } while (does_collide(addr)); - - (pm_pagedir_nosave+i)->address = addr; - } - return 0; -} - -static int __init relocate_pagedir(void) -{ - /* - * We have to avoid recursion (not to overflow kernel stack), - * and that's why code looks pretty cryptic - */ - suspend_pagedir_t *old_pagedir = pm_pagedir_nosave; - void **eaten_memory = NULL; - void **c = eaten_memory, *m, *f; - int err; - - pr_debug("pmdisk: Relocating pagedir\n"); - - if(!does_collide_order(old_pagedir, (unsigned long)old_pagedir, pagedir_order)) { - pr_debug("pmdisk: Relocation not necessary\n"); - return 0; - } - - err = -ENOMEM; - while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order)) != NULL) { - if (!does_collide_order(old_pagedir, (unsigned long)m, - pagedir_order)) { - pm_pagedir_nosave = - memcpy(m, old_pagedir, - PAGE_SIZE << pagedir_order); - err = 0; - break; - } - eaten_memory = m; - printk( "." ); - *eaten_memory = c; - c = eaten_memory; - } - - c = eaten_memory; - while(c) { - printk(":"); - f = c; - c = *c; - free_pages((unsigned long)f, pagedir_order); - } - printk("|\n"); - return err; -} - - -static struct block_device * resume_bdev; - - -/** - * Using bio to read from swap. - * This code requires a bit more work than just using buffer heads - * but, it is the recommended way for 2.5/2.6. - * The following are to signal the beginning and end of I/O. Bios - * finish asynchronously, while we want them to happen synchronously. - * A simple atomic_t, and a wait loop take care of this problem. - */ - -static atomic_t io_done = ATOMIC_INIT(0); - -static void start_io(void) -{ - atomic_set(&io_done,1); -} - -static int end_io(struct bio * bio, unsigned int num, int err) -{ - atomic_set(&io_done,0); - return 0; -} - -static void wait_io(void) -{ - while(atomic_read(&io_done)) - io_schedule(); -} - - -/** - * submit - submit BIO request. - * @rw: READ or WRITE. - * @off physical offset of page. - * @page: page we're reading or writing. - * - * Straight from the textbook - allocate and initialize the bio. - * If we're writing, make sure the page is marked as dirty. - * Then submit it and wait. - */ - -static int submit(int rw, pgoff_t page_off, void * page) -{ - int error = 0; - struct bio * bio; - - bio = bio_alloc(GFP_ATOMIC,1); - if (!bio) - return -ENOMEM; - bio->bi_sector = page_off * (PAGE_SIZE >> 9); - bio_get(bio); - bio->bi_bdev = resume_bdev; - bio->bi_end_io = end_io; - - if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) { - printk("pmdisk: ERROR: adding page to bio at %ld\n",page_off); - error = -EFAULT; - goto Done; - } - - if (rw == WRITE) - bio_set_pages_dirty(bio); - start_io(); - submit_bio(rw | (1 << BIO_RW_SYNC), bio); - wait_io(); - Done: - bio_put(bio); - return error; -} - -static int -read_page(pgoff_t page_off, void * page) -{ - return submit(READ,page_off,page); -} - -static int -write_page(pgoff_t page_off, void * page) -{ - return submit(WRITE,page_off,page); -} - - -extern dev_t __init name_to_dev_t(const char *line); - - -static int __init check_sig(void) -{ - int error; - - memset(&pmdisk_header,0,sizeof(pmdisk_header)); - if ((error = read_page(0,&pmdisk_header))) - return error; - if (!memcmp(PMDISK_SIG,pmdisk_header.sig,10)) { - memcpy(pmdisk_header.sig,pmdisk_header.orig_sig,10); - - /* - * Reset swap signature now. - */ - error = write_page(0,&pmdisk_header); - } else { - pr_debug(KERN_ERR "pmdisk: Invalid partition type.\n"); - return -EINVAL; - } - if (!error) - pr_debug("pmdisk: Signature found, resuming\n"); - return error; -} - - -/* - * Sanity check if this image makes sense with this kernel/swap context - * I really don't think that it's foolproof but more than nothing.. - */ - -static const char * __init sanity_check(void) -{ - dump_pmdisk_info(); - if(pmdisk_info.version_code != LINUX_VERSION_CODE) - return "kernel version"; - if(pmdisk_info.num_physpages != num_physpages) - return "memory size"; - if (strcmp(pmdisk_info.uts.sysname,system_utsname.sysname)) - return "system type"; - if (strcmp(pmdisk_info.uts.release,system_utsname.release)) - return "kernel release"; - if (strcmp(pmdisk_info.uts.version,system_utsname.version)) - return "version"; - if (strcmp(pmdisk_info.uts.machine,system_utsname.machine)) - return "machine"; - if(pmdisk_info.cpus != num_online_cpus()) - return "number of cpus"; - return NULL; -} - - -static int __init check_header(void) -{ - const char * reason = NULL; - int error; - - init_header(); - - if ((error = read_page(swp_offset(pmdisk_header.pmdisk_info), - &pmdisk_info))) - return error; - - /* Is this same machine? */ - if ((reason = sanity_check())) { - printk(KERN_ERR "pmdisk: Resume mismatch: %s\n",reason); - return -EPERM; - } - pmdisk_pages = pmdisk_info.image_pages; - return error; -} - - -static int __init read_pagedir(void) -{ - unsigned long addr; - int i, n = pmdisk_info.pagedir_pages; - int error = 0; - - pagedir_order = get_bitmask_order(n); - - addr =__get_free_pages(GFP_ATOMIC, pagedir_order); - if (!addr) - return -ENOMEM; - pm_pagedir_nosave = (struct pbe *)addr; - - pr_debug("pmdisk: Reading pagedir (%d Pages)\n",n); - - for (i = 0; i < n && !error; i++, addr += PAGE_SIZE) { - unsigned long offset = swp_offset(pmdisk_info.pagedir[i]); - if (offset) - error = read_page(offset, (void *)addr); - else - error = -EFAULT; - } - if (error) - free_pages((unsigned long)pm_pagedir_nosave,pagedir_order); - return error; -} - - -/** - * read_image_data - Read image pages from swap. - * - * You do not need to check for overlaps, check_pagedir() - * already did that. - */ - -static int __init read_image_data(void) -{ - struct pbe * p; - int error = 0; - int i; - - printk( "Reading image data (%d pages): ", pmdisk_pages ); - for(i = 0, p = pm_pagedir_nosave; i < pmdisk_pages && !error; i++, p++) { - if (!(i%100)) - printk( "." ); - error = read_page(swp_offset(p->swap_address), - (void *)p->address); - } - printk(" %d done.\n",i); - return error; -} - - -static int __init read_suspend_image(void) -{ - int error = 0; - - if ((error = check_sig())) - return error; - if ((error = check_header())) - return error; - if ((error = read_pagedir())) - return error; - if ((error = relocate_pagedir())) - goto FreePagedir; - if ((error = check_pagedir())) - goto FreePagedir; - if ((error = read_image_data())) - goto FreePagedir; - Done: - return error; - FreePagedir: - free_pages((unsigned long)pm_pagedir_nosave,pagedir_order); - goto Done; -} - -/** - * pmdisk_save - Snapshot memory - */ - -int pmdisk_save(void) -{ - int error; - -#if defined (CONFIG_HIGHMEM) || defined (CONFIG_DISCONTIGMEM) - pr_debug("pmdisk: not supported with high- or discontig-mem.\n"); - return -EPERM; -#endif - if ((error = arch_prepare_suspend())) - return error; - local_irq_disable(); - save_processor_state(); - error = pmdisk_arch_suspend(0); - restore_processor_state(); - local_irq_enable(); - return error; -} - - -/** - * pmdisk_write - Write saved memory image to swap. - * - * pmdisk_arch_suspend(0) returns after system is resumed. - * - * pmdisk_arch_suspend() copies all "used" memory to "free" memory, - * then unsuspends all device drivers, and writes memory to disk - * using normal kernel mechanism. - */ - -int pmdisk_write(void) -{ - return suspend_save_image(); -} - - -/** - * pmdisk_read - Read saved image from swap. - */ - -int __init pmdisk_read(void) -{ - int error; - - if (!strlen(resume_file)) - return -ENOENT; - - resume_device = name_to_dev_t(resume_file); - pr_debug("pmdisk: Resume From Partition: %s\n", resume_file); - - resume_bdev = open_by_devnum(resume_device, FMODE_READ); - if (!IS_ERR(resume_bdev)) { - set_blocksize(resume_bdev, PAGE_SIZE); - error = read_suspend_image(); - blkdev_put(resume_bdev); - } else - error = PTR_ERR(resume_bdev); - - if (!error) - pr_debug("Reading resume file was successful\n"); - else - pr_debug("pmdisk: Error %d resuming\n", error); - return error; -} - - -/** - * pmdisk_restore - Replace running kernel with saved image. - */ - -int __init pmdisk_restore(void) -{ - int error; - local_irq_disable(); - save_processor_state(); - error = pmdisk_arch_suspend(1); - restore_processor_state(); - local_irq_enable(); - return error; -} - - -/** - * pmdisk_free - Free memory allocated to hold snapshot. - */ - -int pmdisk_free(void) -{ - pr_debug( "Freeing prev allocated pagedir\n" ); - free_pagedir(); - return 0; -} - -static int __init pmdisk_setup(char *str) -{ - if (strlen(str)) { - if (!strcmp(str,"off")) - resume_file[0] = '\0'; - else - strncpy(resume_file, str, 255); - } else - resume_file[0] = '\0'; - return 1; -} - -__setup("pmdisk=", pmdisk_setup); - diff --git a/kernel/power/power.h b/kernel/power/power.h index d180b0a192cdbff36001ef4877ff31c74176666e..cd6a3493cc0dc693928a907f68631b166954dfc6 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -1,4 +1,5 @@ - +#include <linux/suspend.h> +#include <linux/utsname.h> /* With SUSPEND_CONSOLE defined, it suspend looks *really* cool, but we probably do not take enough locks for switching consoles, etc, @@ -9,7 +10,20 @@ #endif -#ifdef CONFIG_PM_DISK +struct swsusp_info { + struct new_utsname uts; + u32 version_code; + unsigned long num_physpages; + int cpus; + unsigned long image_pages; + unsigned long pagedir_pages; + suspend_pagedir_t * suspend_pagedir; + swp_entry_t pagedir[768]; +} __attribute__((aligned(PAGE_SIZE))); + + + +#ifdef CONFIG_SOFTWARE_SUSPEND extern int pm_suspend_disk(void); #else @@ -18,7 +32,6 @@ static inline int pm_suspend_disk(void) return -EPERM; } #endif - extern struct semaphore pm_sem; #define power_attr(_name) \ static struct subsys_attribute _name##_attr = { \ diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 081b65103abd3205a885499a01659701744732d4..8971c8aaf4bced71c3ad7298cc2f4d4578b18424 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -62,6 +62,7 @@ #include <linux/syscalls.h> #include <linux/console.h> #include <linux/highmem.h> +#include <linux/bio.h> #include <asm/uaccess.h> #include <asm/mmu_context.h> @@ -70,25 +71,16 @@ #include "power.h" -unsigned char software_suspend_enabled = 0; - -#define NORESUME 1 -#define RESUME_SPECIFIED 2 - /* References to section boundaries */ extern char __nosave_begin, __nosave_end; extern int is_head_of_free_region(struct page *); -/* Locks */ -spinlock_t suspend_pagedir_lock __nosavedata = SPIN_LOCK_UNLOCKED; - /* Variables to be preserved over suspend */ -static int pagedir_order_check; -static int nr_copy_pages_check; +int pagedir_order_check; +int nr_copy_pages_check; -static int resume_status; -static char resume_file[256] = ""; /* For resume= kernel option */ +extern char resume_file[]; static dev_t resume_device; /* Local variables that should not be affected by save */ unsigned int nr_copy_pages __nosavedata = 0; @@ -107,19 +99,19 @@ unsigned int nr_copy_pages __nosavedata = 0; MMU hardware. */ suspend_pagedir_t *pagedir_nosave __nosavedata = NULL; -static suspend_pagedir_t *pagedir_save; -static int pagedir_order __nosavedata = 0; +suspend_pagedir_t *pagedir_save; +int pagedir_order __nosavedata = 0; -struct link { - char dummy[PAGE_SIZE - sizeof(swp_entry_t)]; - swp_entry_t next; -}; +#define SWSUSP_SIG "S1SUSPEND" -union diskpage { - union swap_header swh; - struct link link; - struct suspend_header sh; -}; +struct swsusp_header { + char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)]; + swp_entry_t swsusp_info; + char orig_sig[10]; + char sig[10]; +} __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header; + +struct swsusp_info swsusp_info; /* * XXX: We try to keep some more pages free so that I/O operations succeed @@ -130,51 +122,10 @@ union diskpage { static const char name_suspend[] = "Suspend Machine: "; static const char name_resume[] = "Resume Machine: "; -/* - * Debug - */ -#define DEBUG_DEFAULT -#undef DEBUG_PROCESS -#undef DEBUG_SLOW -#define TEST_SWSUSP 0 /* Set to 1 to reboot instead of halt machine after suspension */ - -#ifdef DEBUG_DEFAULT -# define PRINTK(f, a...) printk(f, ## a) -#else -# define PRINTK(f, a...) do { } while(0) -#endif - -#ifdef DEBUG_SLOW -#define MDELAY(a) mdelay(a) -#else -#define MDELAY(a) do { } while(0) -#endif - /* * Saving part... */ -static __inline__ int fill_suspend_header(struct suspend_header *sh) -{ - memset((char *)sh, 0, sizeof(*sh)); - - sh->version_code = LINUX_VERSION_CODE; - sh->num_physpages = num_physpages; - strncpy(sh->machine, system_utsname.machine, 8); - strncpy(sh->version, system_utsname.version, 20); - /* FIXME: Is this bogus? --RR */ - sh->num_cpus = num_online_cpus(); - sh->page_size = PAGE_SIZE; - sh->suspend_pagedir = pagedir_nosave; - BUG_ON (pagedir_save != pagedir_nosave); - sh->num_pbes = nr_copy_pages; - /* TODO: needed? mounted fs' last mounted date comparison - * [so they haven't been mounted since last suspend. - * Maybe it isn't.] [we'd need to do this for _all_ fs-es] - */ - return 0; -} - /* We memorize in swapfile_used what swap devices are used for suspension */ #define SWAPFILE_UNUSED 0 #define SWAPFILE_SUSPEND 1 /* This is the suspending device */ @@ -182,47 +133,30 @@ static __inline__ int fill_suspend_header(struct suspend_header *sh) static unsigned short swapfile_used[MAX_SWAPFILES]; static unsigned short root_swap; -#define MARK_SWAP_SUSPEND 0 -#define MARK_SWAP_RESUME 2 -static void mark_swapfiles(swp_entry_t prev, int mode) +static int mark_swapfiles(swp_entry_t prev) { - swp_entry_t entry; - union diskpage *cur; - struct page *page; + int error; - if (root_swap == 0xFFFF) /* ignored */ - return; - - page = alloc_page(GFP_ATOMIC); - if (!page) - panic("Out of memory in mark_swapfiles"); - cur = page_address(page); - /* XXX: this is dirty hack to get first page of swap file */ - entry = swp_entry(root_swap, 0); - rw_swap_page_sync(READ, entry, page); - - if (mode == MARK_SWAP_RESUME) { - if (!memcmp("S1",cur->swh.magic.magic,2)) - memcpy(cur->swh.magic.magic,"SWAP-SPACE",10); - else if (!memcmp("S2",cur->swh.magic.magic,2)) - memcpy(cur->swh.magic.magic,"SWAPSPACE2",10); - else printk("%sUnable to find suspended-data signature (%.10s - misspelled?\n", - name_resume, cur->swh.magic.magic); + rw_swap_page_sync(READ, + swp_entry(root_swap, 0), + virt_to_page((unsigned long)&swsusp_header)); + if (!memcmp("SWAP-SPACE",swsusp_header.sig,10) || + !memcmp("SWAPSPACE2",swsusp_header.sig,10)) { + memcpy(swsusp_header.orig_sig,swsusp_header.sig,10); + memcpy(swsusp_header.sig,SWSUSP_SIG,10); + swsusp_header.swsusp_info = prev; + error = rw_swap_page_sync(WRITE, + swp_entry(root_swap, 0), + virt_to_page((unsigned long) + &swsusp_header)); } else { - if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10))) - memcpy(cur->swh.magic.magic,"S1SUSP....",10); - else if ((!memcmp("SWAPSPACE2",cur->swh.magic.magic,10))) - memcpy(cur->swh.magic.magic,"S2SUSP....",10); - else panic("\nSwapspace is not swapspace (%.10s)\n", cur->swh.magic.magic); - cur->link.next = prev; /* prev is the first/last swap page of the resume area */ - /* link.next lies *no more* in last 4/8 bytes of magic */ + pr_debug("swsusp: Partition is not swap space.\n"); + error = -ENODEV; } - rw_swap_page_sync(WRITE, entry, page); - __free_page(page); + return error; } - /* * Check whether the swap device is the specified resume * device, irrespective of whether they are specified by @@ -243,7 +177,7 @@ static int is_resume_device(const struct swap_info_struct *swap_info) resume_device == MKDEV(imajor(inode), iminor(inode)); } -static void read_swapfiles(void) /* This is called before saving image */ +int swsusp_swap_check(void) /* This is called before saving image */ { int i, len; @@ -274,114 +208,209 @@ static void read_swapfiles(void) /* This is called before saving image */ } } swap_list_unlock(); + return (root_swap != 0xffff) ? 0 : -ENODEV; } -static void lock_swapdevices(void) /* This is called after saving image so modification - will be lost after resume... and that's what we want. */ +/** + * This is called after saving image so modification + * will be lost after resume... and that's what we want. + * we make the device unusable. A new call to + * lock_swapdevices can unlock the devices. + */ +static void lock_swapdevices(void) { int i; swap_list_lock(); for(i = 0; i< MAX_SWAPFILES; i++) if(swapfile_used[i] == SWAPFILE_IGNORED) { - swap_info[i].flags ^= 0xFF; /* we make the device unusable. A new call to - lock_swapdevices can unlock the devices. */ + swap_info[i].flags ^= 0xFF; } swap_list_unlock(); } + + /** - * write_suspend_image - Write entire image to disk. + * write_swap_page - Write one page to a fresh swap location. + * @addr: Address we're writing. + * @loc: Place to store the entry we used. * - * After writing suspend signature to the disk, suspend may no - * longer fail: we have ready-to-run image in swap, and rollback - * would happen on next reboot -- corrupting data. + * Allocate a new swap entry and 'sync' it. Note we discard -EIO + * errors. That is an artifact left over from swsusp. It did not + * check the return of rw_swap_page_sync() at all, since most pages + * written back to swap would return -EIO. + * This is a partial improvement, since we will at least return other + * errors, though we need to eventually fix the damn code. + */ + +static int write_page(unsigned long addr, swp_entry_t * loc) +{ + swp_entry_t entry; + int error = 0; + + entry = get_swap_page(); + if (swp_offset(entry) && + swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) { + error = rw_swap_page_sync(WRITE, entry, + virt_to_page(addr)); + if (error == -EIO) + error = 0; + if (!error) + *loc = entry; + } else + error = -ENOSPC; + return error; +} + + +/** + * free_data - Free the swap entries used by the saved image. * - * Note: The buffer we allocate to use to write the suspend header is - * not freed; its not needed since the system is going down anyway - * (plus it causes an oops and I'm lazy^H^H^H^Htoo busy). + * Walk the list of used swap entries and free each one. */ -static int write_suspend_image(void) + +static void data_free(void) { + swp_entry_t entry; int i; - swp_entry_t entry, prev = { 0 }; - int nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages); - union diskpage *cur, *buffer = (union diskpage *)get_zeroed_page(GFP_ATOMIC); - unsigned long address; - struct page *page; - if (!buffer) - return -ENOMEM; + for (i = 0; i < nr_copy_pages; i++) { + entry = (pagedir_nosave + i)->swap_address; + if (entry.val) + swap_free(entry); + else + break; + (pagedir_nosave + i)->swap_address = (swp_entry_t){0}; + } +} + + +/** + * write_data - Write saved image to swap. + * + * Walk the list of pages in the image and sync each one to swap. + */ + +static int data_write(void) +{ + int error = 0; + int i; printk( "Writing data to swap (%d pages): ", nr_copy_pages ); - for (i=0; i<nr_copy_pages; i++) { + for (i = 0; i < nr_copy_pages && !error; i++) { if (!(i%100)) printk( "." ); - entry = get_swap_page(); - if (!entry.val) - panic("\nNot enough swapspace when writing data" ); - - if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND) - panic("\nPage %d: not enough swapspace on suspend device", i ); - - address = (pagedir_nosave+i)->address; - page = virt_to_page(address); - rw_swap_page_sync(WRITE, entry, page); - (pagedir_nosave+i)->swap_address = entry; + error = write_page((pagedir_nosave+i)->address, + &((pagedir_nosave+i)->swap_address)); } - printk( "|\n" ); - printk( "Writing pagedir (%d pages): ", nr_pgdir_pages); - for (i=0; i<nr_pgdir_pages; i++) { - cur = (union diskpage *)((char *) pagedir_nosave)+i; - BUG_ON ((char *) cur != (((char *) pagedir_nosave) + i*PAGE_SIZE)); - printk( "." ); - entry = get_swap_page(); - if (!entry.val) { - printk(KERN_CRIT "Not enough swapspace when writing pgdir\n" ); - panic("Don't know how to recover"); - free_page((unsigned long) buffer); - return -ENOSPC; - } + printk(" %d Pages done.\n",i); + return error; +} - if(swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND) - panic("\nNot enough swapspace for pagedir on suspend device" ); +static void dump_info(void) +{ + pr_debug(" swsusp: Version: %u\n",swsusp_info.version_code); + pr_debug(" swsusp: Num Pages: %ld\n",swsusp_info.num_physpages); + pr_debug(" swsusp: UTS Sys: %s\n",swsusp_info.uts.sysname); + pr_debug(" swsusp: UTS Node: %s\n",swsusp_info.uts.nodename); + pr_debug(" swsusp: UTS Release: %s\n",swsusp_info.uts.release); + pr_debug(" swsusp: UTS Version: %s\n",swsusp_info.uts.version); + pr_debug(" swsusp: UTS Machine: %s\n",swsusp_info.uts.machine); + pr_debug(" swsusp: UTS Domain: %s\n",swsusp_info.uts.domainname); + pr_debug(" swsusp: CPUs: %d\n",swsusp_info.cpus); + pr_debug(" swsusp: Image: %ld Pages\n",swsusp_info.image_pages); + pr_debug(" swsusp: Pagedir: %ld Pages\n",swsusp_info.pagedir_pages); +} - BUG_ON (sizeof(swp_entry_t) != sizeof(long)); - BUG_ON (PAGE_SIZE % sizeof(struct pbe)); +static void init_header(void) +{ + memset(&swsusp_info,0,sizeof(swsusp_info)); + swsusp_info.version_code = LINUX_VERSION_CODE; + swsusp_info.num_physpages = num_physpages; + memcpy(&swsusp_info.uts,&system_utsname,sizeof(system_utsname)); + + swsusp_info.suspend_pagedir = pagedir_nosave; + swsusp_info.cpus = num_online_cpus(); + swsusp_info.image_pages = nr_copy_pages; + dump_info(); +} - cur->link.next = prev; - page = virt_to_page((unsigned long)cur); - rw_swap_page_sync(WRITE, entry, page); - prev = entry; +static int close_swap(void) +{ + swp_entry_t entry; + int error; + + error = write_page((unsigned long)&swsusp_info,&entry); + if (!error) { + printk( "S" ); + error = mark_swapfiles(entry); + printk( "|\n" ); } - printk("H"); - BUG_ON (sizeof(struct suspend_header) > PAGE_SIZE-sizeof(swp_entry_t)); - BUG_ON (sizeof(union diskpage) != PAGE_SIZE); - BUG_ON (sizeof(struct link) != PAGE_SIZE); - entry = get_swap_page(); - if (!entry.val) - panic( "\nNot enough swapspace when writing header" ); - if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND) - panic("\nNot enough swapspace for header on suspend device" ); - - cur = (void *) buffer; - if (fill_suspend_header(&cur->sh)) - BUG(); /* Not a BUG_ON(): we want fill_suspend_header to be called, always */ - - cur->link.next = prev; - - page = virt_to_page((unsigned long)cur); - rw_swap_page_sync(WRITE, entry, page); - prev = entry; - - printk( "S" ); - mark_swapfiles(prev, MARK_SWAP_SUSPEND); - printk( "|\n" ); - - MDELAY(1000); - return 0; + return error; +} + +/** + * free_pagedir - Free pages used by the page directory. + */ + +static void free_pagedir_entries(void) +{ + int num = swsusp_info.pagedir_pages; + int i; + + for (i = 0; i < num; i++) + swap_free(swsusp_info.pagedir[i]); } + +/** + * write_pagedir - Write the array of pages holding the page directory. + * @last: Last swap entry we write (needed for header). + */ + +static int write_pagedir(void) +{ + unsigned long addr = (unsigned long)pagedir_nosave; + int error = 0; + int n = SUSPEND_PD_PAGES(nr_copy_pages); + int i; + + swsusp_info.pagedir_pages = n; + printk( "Writing pagedir (%d pages)\n", n); + for (i = 0; i < n && !error; i++, addr += PAGE_SIZE) + error = write_page(addr,&swsusp_info.pagedir[i]); + return error; +} + +/** + * write_suspend_image - Write entire image and metadata. + * + */ + +static int write_suspend_image(void) +{ + int error; + + init_header(); + if ((error = data_write())) + goto FreeData; + + if ((error = write_pagedir())) + goto FreePagedir; + + if ((error = close_swap())) + goto FreePagedir; + Done: + return error; + FreePagedir: + free_pagedir_entries(); + FreeData: + data_free(); + goto Done; +} + + #ifdef CONFIG_HIGHMEM struct highmem_page { char *data; @@ -438,22 +467,30 @@ static int save_highmem_zone(struct zone *zone) } return 0; } +#endif /* CONFIG_HIGHMEM */ + static int save_highmem(void) { +#ifdef CONFIG_HIGHMEM struct zone *zone; int res = 0; + + pr_debug("swsusp: Saving Highmem\n"); for_each_zone(zone) { if (is_highmem(zone)) res = save_highmem_zone(zone); if (res) return res; } +#endif return 0; } static int restore_highmem(void) { +#ifdef CONFIG_HIGHMEM + printk("swsusp: Restoring Highmem\n"); while (highmem_copy) { struct highmem_page *save = highmem_copy; void *kaddr; @@ -465,9 +502,10 @@ static int restore_highmem(void) free_page((long) save->data); kfree(save); } +#endif return 0; } -#endif + static int pfn_is_nosave(unsigned long pfn) { @@ -476,57 +514,86 @@ static int pfn_is_nosave(unsigned long pfn) return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); } -/* if *pagedir_p != NULL it also copies the counted pages */ -static int count_and_copy_zone(struct zone *zone, struct pbe **pagedir_p) +/** + * saveable - Determine whether a page should be cloned or not. + * @pfn: The page + * + * We save a page if it's Reserved, and not in the range of pages + * statically defined as 'unsaveable', or if it isn't reserved, and + * isn't part of a free chunk of pages. + * If it is part of a free chunk, we update @pfn to point to the last + * page of the chunk. + */ + +static int saveable(struct zone * zone, unsigned long * zone_pfn) { - unsigned long zone_pfn, chunk_size, nr_copy_pages = 0; - struct pbe *pbe = *pagedir_p; - for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) { - struct page *page; - unsigned long pfn = zone_pfn + zone->zone_start_pfn; + unsigned long pfn = *zone_pfn + zone->zone_start_pfn; + unsigned long chunk_size; + struct page * page; - if (!(pfn%1000)) - printk("."); - if (!pfn_valid(pfn)) - continue; - page = pfn_to_page(pfn); - BUG_ON(PageReserved(page) && PageNosave(page)); - if (PageNosave(page)) - continue; - if (PageReserved(page) && pfn_is_nosave(pfn)) { - PRINTK("[nosave pfn 0x%lx]", pfn); - continue; - } - if ((chunk_size = is_head_of_free_region(page))) { - pfn += chunk_size - 1; - zone_pfn += chunk_size - 1; - continue; + if (!pfn_valid(pfn)) + return 0; + + if (!(pfn%1000)) + printk("."); + page = pfn_to_page(pfn); + BUG_ON(PageReserved(page) && PageNosave(page)); + if (PageNosave(page)) + return 0; + if (PageReserved(page) && pfn_is_nosave(pfn)) { + pr_debug("[nosave pfn 0x%lx]", pfn); + return 0; + } + if ((chunk_size = is_head_of_free_region(page))) { + *zone_pfn += chunk_size - 1; + return 0; + } + + return 1; +} + +static void count_data_pages(void) +{ + struct zone *zone; + unsigned long zone_pfn; + + nr_copy_pages = 0; + + for_each_zone(zone) { + if (!is_highmem(zone)) { + for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) + nr_copy_pages += saveable(zone, &zone_pfn); } - nr_copy_pages++; - if (!pbe) - continue; - pbe->orig_address = (long) page_address(page); - /* Copy page is dangerous: it likes to mess with - preempt count on specific cpus. Wrong preempt count is then copied, - oops. */ - copy_page((void *)pbe->address, (void *)pbe->orig_address); - pbe++; } - *pagedir_p = pbe; - return nr_copy_pages; } -static int count_and_copy_data_pages(struct pbe *pagedir_p) + +static void copy_data_pages(void) { - int nr_copy_pages = 0; struct zone *zone; + unsigned long zone_pfn; + struct pbe * pbe = pagedir_nosave; + for_each_zone(zone) { if (!is_highmem(zone)) - nr_copy_pages += count_and_copy_zone(zone, &pagedir_p); + for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) { + if (saveable(zone, &zone_pfn)) { + struct page * page; + page = pfn_to_page(zone_pfn + zone->zone_start_pfn); + pbe->orig_address = (long) page_address(page); + /* Copy page is dangerous: it likes to mess with + preempt count on specific cpus. Wrong preempt + count is then copied, oops. + */ + copy_page((void *)pbe->address, + (void *)pbe->orig_address); + pbe++; + } + } } - return nr_copy_pages; } + static void free_suspend_pagedir_zone(struct zone *zone, unsigned long pagedir) { unsigned long zone_pfn, pagedir_end, pagedir_pfn, pagedir_end_pfn; @@ -547,119 +614,199 @@ static void free_suspend_pagedir_zone(struct zone *zone, unsigned long pagedir) } } -static void free_suspend_pagedir(unsigned long this_pagedir) +void swsusp_free(void) { + unsigned long p = (unsigned long)pagedir_save; struct zone *zone; for_each_zone(zone) { if (!is_highmem(zone)) - free_suspend_pagedir_zone(zone, this_pagedir); + free_suspend_pagedir_zone(zone, p); } - free_pages(this_pagedir, pagedir_order); + free_pages(p, pagedir_order); } -static suspend_pagedir_t *create_suspend_pagedir(int nr_copy_pages) + +/** + * calc_order - Determine the order of allocation needed for pagedir_save. + * + * This looks tricky, but is just subtle. Please fix it some time. + * Since there are %nr_copy_pages worth of pages in the snapshot, we need + * to allocate enough contiguous space to hold + * (%nr_copy_pages * sizeof(struct pbe)), + * which has the saved/orig locations of the page.. + * + * SUSPEND_PD_PAGES() tells us how many pages we need to hold those + * structures, then we call get_bitmask_order(), which will tell us the + * last bit set in the number, starting with 1. (If we need 30 pages, that + * is 0x0000001e in hex. The last bit is the 5th, which is the order we + * would use to allocate 32 contiguous pages). + * + * Since we also need to save those pages, we add the number of pages that + * we need to nr_copy_pages, and in case of an overflow, do the + * calculation again to update the number of pages needed. + * + * With this model, we will tend to waste a lot of memory if we just cross + * an order boundary. Plus, the higher the order of allocation that we try + * to do, the more likely we are to fail in a low-memory situtation + * (though we're unlikely to get this far in such a case, since swsusp + * requires half of memory to be free anyway). + */ + + +static void calc_order(void) { - int i; - suspend_pagedir_t *pagedir; - struct pbe *p; - struct page *page; + int diff = 0; + int order = 0; + + do { + diff = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages)) - order; + if (diff) { + order += diff; + nr_copy_pages += 1 << diff; + } + } while(diff); + pagedir_order = order; +} + + +/** + * alloc_pagedir - Allocate the page directory. + * + * First, determine exactly how many contiguous pages we need, + * allocate them, then mark each 'unsavable'. + */ + +static int alloc_pagedir(void) +{ + calc_order(); + pagedir_save = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD, + pagedir_order); + if(!pagedir_save) + return -ENOMEM; + memset(pagedir_save,0,(1 << pagedir_order) * PAGE_SIZE); + pagedir_nosave = pagedir_save; + return 0; +} + - pagedir_order = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages)); +/** + * alloc_image_pages - Allocate pages for the snapshot. + * + */ - p = pagedir = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD, pagedir_order); - if (!pagedir) - return NULL; +static int alloc_image_pages(void) +{ + struct pbe * p; + int i; - page = virt_to_page(pagedir); - for(i=0; i < 1<<pagedir_order; i++) - SetPageNosave(page++); - - while(nr_copy_pages--) { + for (i = 0, p = pagedir_save; i < nr_copy_pages; i++, p++) { p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD); - if (!p->address) { - free_suspend_pagedir((unsigned long) pagedir); - return NULL; - } + if(!p->address) + goto Error; SetPageNosave(virt_to_page(p->address)); - p->orig_address = 0; - p++; } - return pagedir; + return 0; + Error: + do { + if (p->address) + free_page(p->address); + p->address = 0; + } while (p-- > pagedir_save); + return -ENOMEM; } -static int prepare_suspend_processes(void) + +/** + * enough_free_mem - Make sure we enough free memory to snapshot. + * + * Returns TRUE or FALSE after checking the number of available + * free pages. + */ + +static int enough_free_mem(void) { - sys_sync(); /* Syncing needs pdflushd, so do it before stopping processes */ - if (freeze_processes()) { - printk( KERN_ERR "Suspend failed: Not all processes stopped!\n" ); - thaw_processes(); - return 1; + if(nr_free_pages() < (nr_copy_pages + PAGES_FOR_IO)) { + pr_debug("swsusp: Not enough free pages: Have %d\n", + nr_free_pages()); + return 0; } - return 0; + return 1; } -/* - * Try to free as much memory as possible, but do not OOM-kill anyone + +/** + * enough_swap - Make sure we have enough swap to save the image. + * + * Returns TRUE or FALSE after checking the total amount of swap + * space avaiable. * - * Notice: all userland should be stopped at this point, or livelock is possible. + * FIXME: si_swapinfo(&i) returns all swap devices information. + * We should only consider resume_device. */ -static void free_some_memory(void) + +static int enough_swap(void) { - printk("Freeing memory: "); - while (shrink_all_memory(10000)) - printk("."); - printk("|\n"); + struct sysinfo i; + + si_swapinfo(&i); + if (i.freeswap < (nr_copy_pages + PAGES_FOR_IO)) { + pr_debug("swsusp: Not enough swap. Need %ld\n",i.freeswap); + return 0; + } + return 1; } -static int suspend_prepare_image(void) +static int swsusp_alloc(void) { - struct sysinfo i; - unsigned int nr_needed_pages = 0; + int error; + + pr_debug("suspend: (pages needed: %d + %d free: %d)\n", + nr_copy_pages,PAGES_FOR_IO,nr_free_pages()); pagedir_nosave = NULL; - printk( "/critical section: "); -#ifdef CONFIG_HIGHMEM - printk( "handling highmem" ); - if (save_highmem()) { - printk(KERN_CRIT "%sNot enough free pages for highmem\n", name_suspend); + if (!enough_free_mem()) return -ENOMEM; - } - printk(", "); -#endif - printk("counting pages to copy" ); - drain_local_pages(); - nr_copy_pages = count_and_copy_data_pages(NULL); - nr_needed_pages = nr_copy_pages + PAGES_FOR_IO; - - printk(" (pages needed: %d+%d=%d free: %d)\n",nr_copy_pages,PAGES_FOR_IO,nr_needed_pages,nr_free_pages()); - if(nr_free_pages() < nr_needed_pages) { - printk(KERN_CRIT "%sCouldn't get enough free pages, on %d pages short\n", - name_suspend, nr_needed_pages-nr_free_pages()); - root_swap = 0xFFFF; - return -ENOMEM; - } - si_swapinfo(&i); /* FIXME: si_swapinfo(&i) returns all swap devices information. - We should only consider resume_device. */ - if (i.freeswap < nr_needed_pages) { - printk(KERN_CRIT "%sThere's not enough swap space available, on %ld pages short\n", - name_suspend, nr_needed_pages-i.freeswap); + if (!enough_swap()) return -ENOSPC; - } - PRINTK( "Alloc pagedir\n" ); - pagedir_save = pagedir_nosave = create_suspend_pagedir(nr_copy_pages); - if (!pagedir_nosave) { - /* Pagedir is big, one-chunk allocation. It is easily possible for this allocation to fail */ - printk(KERN_CRIT "%sCouldn't allocate continuous pagedir\n", name_suspend); - return -ENOMEM; + if ((error = alloc_pagedir())) { + pr_debug("suspend: Allocating pagedir failed.\n"); + return error; + } + if ((error = alloc_image_pages())) { + pr_debug("suspend: Allocating image pages failed.\n"); + swsusp_free(); + return error; } + nr_copy_pages_check = nr_copy_pages; pagedir_order_check = pagedir_order; + return 0; +} + +int suspend_prepare_image(void) +{ + unsigned int nr_needed_pages = 0; - drain_local_pages(); /* During allocating of suspend pagedir, new cold pages may appear. Kill them */ - if (nr_copy_pages != count_and_copy_data_pages(pagedir_nosave)) /* copy */ - BUG(); + pr_debug("swsusp: critical section: \n"); + if (save_highmem()) { + printk(KERN_CRIT "%sNot enough free pages for highmem\n", name_suspend); + return -ENOMEM; + } + + drain_local_pages(); + count_data_pages(); + printk("swsusp: Need to copy %u pages\n",nr_copy_pages); + nr_needed_pages = nr_copy_pages + PAGES_FOR_IO; + + swsusp_alloc(); + + /* During allocating of suspend pagedir, new cold pages may appear. + * Kill them. + */ + drain_local_pages(); + copy_data_pages(); /* * End of critical section. From now on, we can write to memory, @@ -667,205 +814,79 @@ static int suspend_prepare_image(void) * touch swap space! Except we must write out our image of course. */ - printk( "critical section/: done (%d pages copied)\n", nr_copy_pages ); + printk("swsusp: critical section/: done (%d pages copied)\n", nr_copy_pages ); return 0; } -static void suspend_save_image(void) + +/* It is important _NOT_ to umount filesystems at this point. We want + * them synced (in case something goes wrong) but we DO not want to mark + * filesystem clean: it is not. (And it does not matter, if we resume + * correctly, we'll mark system clean, anyway.) + */ +int swsusp_write(void) { + int error; device_resume(); - lock_swapdevices(); - write_suspend_image(); - lock_swapdevices(); /* This will unlock ignored swap devices since writing is finished */ + error = write_suspend_image(); + /* This will unlock ignored swap devices since writing is finished */ + lock_swapdevices(); + return error; - /* It is important _NOT_ to umount filesystems at this point. We want - * them synced (in case something goes wrong) but we DO not want to mark - * filesystem clean: it is not. (And it does not matter, if we resume - * correctly, we'll mark system clean, anyway.) - */ } -static void suspend_power_down(void) -{ - extern int C_A_D; - C_A_D = 0; - printk(KERN_EMERG "%s%s Trying to power down.\n", name_suspend, TEST_SWSUSP ? "Disable TEST_SWSUSP. NOT ": ""); -#ifdef CONFIG_VT - PRINTK(KERN_EMERG "shift_state: %04x\n", shift_state); - mdelay(1000); - if (TEST_SWSUSP ^ (!!(shift_state & (1 << KG_CTRL)))) - machine_restart(NULL); - else -#endif - { - device_suspend(3); - device_shutdown(); - machine_power_off(); - } - - printk(KERN_EMERG "%sProbably not capable for powerdown. System halted.\n", name_suspend); - machine_halt(); - while (1); - /* NOTREACHED */ -} -/* - * Magic happens here - */ +extern asmlinkage int swsusp_arch_suspend(void); +extern asmlinkage int swsusp_arch_resume(void); -asmlinkage void do_magic_resume_1(void) -{ - barrier(); - mb(); - spin_lock_irq(&suspend_pagedir_lock); /* Done to disable interrupts */ - - device_power_down(3); - PRINTK( "Waiting for DMAs to settle down...\n"); - mdelay(1000); /* We do not want some readahead with DMA to corrupt our memory, right? - Do it with disabled interrupts for best effect. That way, if some - driver scheduled DMA, we have good chance for DMA to finish ;-). */ -} -asmlinkage void do_magic_resume_2(void) +asmlinkage int swsusp_save(void) { - BUG_ON (nr_copy_pages_check != nr_copy_pages); - BUG_ON (pagedir_order_check != pagedir_order); - - __flush_tlb_global(); /* Even mappings of "global" things (vmalloc) need to be fixed */ - - PRINTK( "Freeing prev allocated pagedir\n" ); - free_suspend_pagedir((unsigned long) pagedir_save); - -#ifdef CONFIG_HIGHMEM - printk( "Restoring highmem\n" ); - restore_highmem(); -#endif - printk("done, devices\n"); - - device_power_up(); - spin_unlock_irq(&suspend_pagedir_lock); - device_resume(); + int error = 0; - /* Fixme: this is too late; we should do this ASAP to avoid "infinite reboots" problem */ - PRINTK( "Fixing swap signatures... " ); - mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME); - PRINTK( "ok\n" ); - -#ifdef SUSPEND_CONSOLE - acquire_console_sem(); - update_screen(fg_console); - release_console_sem(); -#endif + if ((error = swsusp_swap_check())) + return error; + return suspend_prepare_image(); } -/* do_magic() is implemented in arch/?/kernel/suspend_asm.S, and basically does: - - if (!resume) { - do_magic_suspend_1(); - save_processor_state(); - SAVE_REGISTERS - do_magic_suspend_2(); - return; - } - GO_TO_SWAPPER_PAGE_TABLES - do_magic_resume_1(); - COPY_PAGES_BACK - RESTORE_REGISTERS +int swsusp_suspend(void) +{ + int error; + if ((error = arch_prepare_suspend())) + return error; + local_irq_disable(); + save_processor_state(); + error = swsusp_arch_suspend(); restore_processor_state(); - do_magic_resume_2(); + local_irq_enable(); + return error; +} - */ -asmlinkage void do_magic_suspend_1(void) +asmlinkage int swsusp_restore(void) { - mb(); - barrier(); - BUG_ON(in_atomic()); - spin_lock_irq(&suspend_pagedir_lock); + BUG_ON (nr_copy_pages_check != nr_copy_pages); + BUG_ON (pagedir_order_check != pagedir_order); + + /* Even mappings of "global" things (vmalloc) need to be fixed */ + __flush_tlb_global(); + return 0; } -asmlinkage void do_magic_suspend_2(void) +int swsusp_resume(void) { - int is_problem; - read_swapfiles(); - device_power_down(3); - is_problem = suspend_prepare_image(); - device_power_up(); - spin_unlock_irq(&suspend_pagedir_lock); - if (!is_problem) { - kernel_fpu_end(); /* save_processor_state() does kernel_fpu_begin, and we need to revert it in order to pass in_atomic() checks */ - BUG_ON(in_atomic()); - suspend_save_image(); - suspend_power_down(); /* FIXME: if suspend_power_down is commented out, console is lost after few suspends ?! */ - } - - printk(KERN_EMERG "%sSuspend failed, trying to recover...\n", name_suspend); - MDELAY(1000); /* So user can wait and report us messages if armageddon comes :-) */ - - barrier(); - mb(); - spin_lock_irq(&suspend_pagedir_lock); /* Done to disable interrupts */ - - free_pages((unsigned long) pagedir_nosave, pagedir_order); - spin_unlock_irq(&suspend_pagedir_lock); - - device_resume(); - PRINTK( "Fixing swap signatures... " ); - mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME); - PRINTK( "ok\n" ); + int error; + local_irq_disable(); + save_processor_state(); + error = swsusp_arch_resume(); + restore_processor_state(); + restore_highmem(); + local_irq_enable(); + return error; } -/* - * This is main interface to the outside world. It needs to be - * called from process context. - */ -int software_suspend(void) -{ - int res; - if (!software_suspend_enabled) - return -EAGAIN; - - software_suspend_enabled = 0; - might_sleep(); - if (arch_prepare_suspend()) { - printk("%sArchitecture failed to prepare\n", name_suspend); - return -EPERM; - } - if (pm_prepare_console()) - printk( "%sCan't allocate a console... proceeding\n", name_suspend); - if (!prepare_suspend_processes()) { - - /* At this point, all user processes and "dangerous" - kernel threads are stopped. Free some memory, as we - need half of memory free. */ - - free_some_memory(); - disable_nonboot_cpus(); - /* Save state of all device drivers, and stop them. */ - printk("Suspending devices... "); - if ((res = device_suspend(3))==0) { - /* If stopping device drivers worked, we proceed basically into - * suspend_save_image. - * - * do_magic(0) returns after system is resumed. - * - * do_magic() copies all "used" memory to "free" memory, then - * unsuspends all device drivers, and writes memory to disk - * using normal kernel mechanism. - */ - do_magic(0); - } - thaw_processes(); - enable_nonboot_cpus(); - } else - res = -EBUSY; - software_suspend_enabled = 1; - MDELAY(1000); - pm_restore_console(); - return res; -} /* More restore stuff */ @@ -874,7 +895,7 @@ int software_suspend(void) /* * Returns true if given address/order collides with any orig_address */ -static int does_collide_order(suspend_pagedir_t *pagedir, unsigned long addr, +static int __init does_collide_order(suspend_pagedir_t *pagedir, unsigned long addr, int order) { int i; @@ -892,7 +913,7 @@ static int does_collide_order(suspend_pagedir_t *pagedir, unsigned long addr, * We check here that pagedir & pages it points to won't collide with pages * where we're going to restore from the loaded pages later */ -static int check_pagedir(void) +static int __init check_pagedir(void) { int i; @@ -910,7 +931,7 @@ static int check_pagedir(void) return 0; } -static int relocate_pagedir(void) +static int __init swsusp_pagedir_relocate(void) { /* * We have to avoid recursion (not to overflow kernel stack), @@ -953,283 +974,263 @@ static int relocate_pagedir(void) free_pages((unsigned long)f, pagedir_order); } printk("|\n"); - return ret; + return check_pagedir(); } -/* - * Sanity check if this image makes sense with this kernel/swap context - * I really don't think that it's foolproof but more than nothing.. +/** + * Using bio to read from swap. + * This code requires a bit more work than just using buffer heads + * but, it is the recommended way for 2.5/2.6. + * The following are to signal the beginning and end of I/O. Bios + * finish asynchronously, while we want them to happen synchronously. + * A simple atomic_t, and a wait loop take care of this problem. */ -static int sanity_check_failed(char *reason) -{ - printk(KERN_ERR "%s%s\n", name_resume, reason); - return -EPERM; -} +static atomic_t io_done = ATOMIC_INIT(0); -static int sanity_check(struct suspend_header *sh) +static void start_io(void) { - if (sh->version_code != LINUX_VERSION_CODE) - return sanity_check_failed("Incorrect kernel version"); - if (sh->num_physpages != num_physpages) - return sanity_check_failed("Incorrect memory size"); - if (strncmp(sh->machine, system_utsname.machine, 8)) - return sanity_check_failed("Incorrect machine type"); - if (strncmp(sh->version, system_utsname.version, 20)) - return sanity_check_failed("Incorrect version"); - if (sh->num_cpus != num_online_cpus()) - return sanity_check_failed("Incorrect number of cpus"); - if (sh->page_size != PAGE_SIZE) - return sanity_check_failed("Incorrect PAGE_SIZE"); - return 0; + atomic_set(&io_done,1); } -static int bdev_read_page(struct block_device *bdev, long pos, void *buf) +static int end_io(struct bio * bio, unsigned int num, int err) { - struct buffer_head *bh; - BUG_ON (pos%PAGE_SIZE); - bh = __bread(bdev, pos/PAGE_SIZE, PAGE_SIZE); - if (!bh || (!bh->b_data)) { - return -1; - } - memcpy(buf, bh->b_data, PAGE_SIZE); /* FIXME: may need kmap() */ - BUG_ON(!buffer_uptodate(bh)); - brelse(bh); + atomic_set(&io_done,0); return 0; -} +} -static int bdev_write_page(struct block_device *bdev, long pos, void *buf) +static void wait_io(void) { -#if 0 - struct buffer_head *bh; - BUG_ON (pos%PAGE_SIZE); - bh = __bread(bdev, pos/PAGE_SIZE, PAGE_SIZE); - if (!bh || (!bh->b_data)) { - return -1; - } - memcpy(bh->b_data, buf, PAGE_SIZE); /* FIXME: may need kmap() */ - BUG_ON(!buffer_uptodate(bh)); - generic_make_request(WRITE, bh); - if (!buffer_uptodate(bh)) - printk(KERN_CRIT "%sWarning %s: Fixing swap signatures unsuccessful...\n", name_resume, resume_file); - wait_on_buffer(bh); - brelse(bh); - return 0; -#endif - printk(KERN_CRIT "%sWarning %s: Fixing swap signatures unimplemented...\n", name_resume, resume_file); - return 0; + while(atomic_read(&io_done)) + io_schedule(); } -extern dev_t __init name_to_dev_t(const char *line); -static int __init __read_suspend_image(struct block_device *bdev, union diskpage *cur, int noresume) -{ - swp_entry_t next; - int i, nr_pgdir_pages; +static struct block_device * resume_bdev; -#define PREPARENEXT \ - { next = cur->link.next; \ - next.val = swp_offset(next) * PAGE_SIZE; \ - } +/** + * submit - submit BIO request. + * @rw: READ or WRITE. + * @off physical offset of page. + * @page: page we're reading or writing. + * + * Straight from the textbook - allocate and initialize the bio. + * If we're writing, make sure the page is marked as dirty. + * Then submit it and wait. + */ - if (bdev_read_page(bdev, 0, cur)) return -EIO; +static int submit(int rw, pgoff_t page_off, void * page) +{ + int error = 0; + struct bio * bio; - if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)) || - (!memcmp("SWAPSPACE2",cur->swh.magic.magic,10))) { - printk(KERN_ERR "%sThis is normal swap space\n", name_resume ); - return -EINVAL; + bio = bio_alloc(GFP_ATOMIC,1); + if (!bio) + return -ENOMEM; + bio->bi_sector = page_off * (PAGE_SIZE >> 9); + bio_get(bio); + bio->bi_bdev = resume_bdev; + bio->bi_end_io = end_io; + + if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) { + printk("swsusp: ERROR: adding page to bio at %ld\n",page_off); + error = -EFAULT; + goto Done; } - PREPARENEXT; /* We have to read next position before we overwrite it */ - - if (!memcmp("S1",cur->swh.magic.magic,2)) - memcpy(cur->swh.magic.magic,"SWAP-SPACE",10); - else if (!memcmp("S2",cur->swh.magic.magic,2)) - memcpy(cur->swh.magic.magic,"SWAPSPACE2",10); - else { - if (noresume) - return -EINVAL; - panic("%sUnable to find suspended-data signature (%.10s - misspelled?\n", - name_resume, cur->swh.magic.magic); - } - if (noresume) { - /* We don't do a sanity check here: we want to restore the swap - whatever version of kernel made the suspend image; - We need to write swap, but swap is *not* enabled so - we must write the device directly */ - printk("%s: Fixing swap signatures %s...\n", name_resume, resume_file); - bdev_write_page(bdev, 0, cur); - } + if (rw == WRITE) + bio_set_pages_dirty(bio); + start_io(); + submit_bio(rw | (1 << BIO_RW_SYNC), bio); + wait_io(); + Done: + bio_put(bio); + return error; +} - printk( "%sSignature found, resuming\n", name_resume ); - MDELAY(1000); +int bio_read_page(pgoff_t page_off, void * page) +{ + return submit(READ,page_off,page); +} - if (bdev_read_page(bdev, next.val, cur)) return -EIO; - if (sanity_check(&cur->sh)) /* Is this same machine? */ - return -EPERM; - PREPARENEXT; +int bio_write_page(pgoff_t page_off, void * page) +{ + return submit(WRITE,page_off,page); +} - pagedir_save = cur->sh.suspend_pagedir; - nr_copy_pages = cur->sh.num_pbes; - nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages); - pagedir_order = get_bitmask_order(nr_pgdir_pages); +/* + * Sanity check if this image makes sense with this kernel/swap context + * I really don't think that it's foolproof but more than nothing.. + */ - pagedir_nosave = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC, pagedir_order); - if (!pagedir_nosave) - return -ENOMEM; +static const char * __init sanity_check(void) +{ + dump_info(); + if(swsusp_info.version_code != LINUX_VERSION_CODE) + return "kernel version"; + if(swsusp_info.num_physpages != num_physpages) + return "memory size"; + if (strcmp(swsusp_info.uts.sysname,system_utsname.sysname)) + return "system type"; + if (strcmp(swsusp_info.uts.release,system_utsname.release)) + return "kernel release"; + if (strcmp(swsusp_info.uts.version,system_utsname.version)) + return "version"; + if (strcmp(swsusp_info.uts.machine,system_utsname.machine)) + return "machine"; + if(swsusp_info.cpus != num_online_cpus()) + return "number of cpus"; + return NULL; +} - PRINTK( "%sReading pagedir, ", name_resume ); - /* We get pages in reverse order of saving! */ - for (i=nr_pgdir_pages-1; i>=0; i--) { - BUG_ON (!next.val); - cur = (union diskpage *)((char *) pagedir_nosave)+i; - if (bdev_read_page(bdev, next.val, cur)) return -EIO; - PREPARENEXT; - } - BUG_ON (next.val); +static int __init check_header(void) +{ + const char * reason = NULL; + int error; - if (relocate_pagedir()) - return -ENOMEM; - if (check_pagedir()) - return -ENOMEM; + if ((error = bio_read_page(swp_offset(swsusp_header.swsusp_info), &swsusp_info))) + return error; - printk( "Reading image data (%d pages): ", nr_copy_pages ); - for(i=0; i < nr_copy_pages; i++) { - swp_entry_t swap_address = (pagedir_nosave+i)->swap_address; - if (!(i%100)) - printk( "." ); - /* You do not need to check for overlaps... - ... check_pagedir already did this work */ - if (bdev_read_page(bdev, swp_offset(swap_address) * PAGE_SIZE, (char *)((pagedir_nosave+i)->address))) - return -EIO; + /* Is this same machine? */ + if ((reason = sanity_check())) { + printk(KERN_ERR "swsusp: Resume mismatch: %s\n",reason); + return -EPERM; } - printk( "|\n" ); - return 0; + nr_copy_pages = swsusp_info.image_pages; + return error; } -static int __init read_suspend_image(const char * specialfile, int noresume) +static int __init check_sig(void) { - union diskpage *cur; - unsigned long scratch_page = 0; int error; - char b[BDEVNAME_SIZE]; - - resume_device = name_to_dev_t(specialfile); - scratch_page = get_zeroed_page(GFP_ATOMIC); - cur = (void *) scratch_page; - if (cur) { - struct block_device *bdev; - printk("Resuming from device %s\n", - __bdevname(resume_device, b)); - bdev = open_by_devnum(resume_device, FMODE_READ); - if (IS_ERR(bdev)) { - error = PTR_ERR(bdev); - } else { - set_blocksize(bdev, PAGE_SIZE); - error = __read_suspend_image(bdev, cur, noresume); - blkdev_put(bdev); - } - } else error = -ENOMEM; - if (scratch_page) - free_page(scratch_page); - switch (error) { - case 0: - PRINTK("Reading resume file was successful\n"); - break; - case -EINVAL: - break; - case -EIO: - printk( "%sI/O error\n", name_resume); - break; - case -ENOENT: - printk( "%s%s: No such file or directory\n", name_resume, specialfile); - break; - case -ENOMEM: - printk( "%sNot enough memory\n", name_resume); - break; - default: - printk( "%sError %d resuming\n", name_resume, error ); + memset(&swsusp_header,0,sizeof(swsusp_header)); + if ((error = bio_read_page(0,&swsusp_header))) + return error; + if (!memcmp(SWSUSP_SIG,swsusp_header.sig,10)) { + memcpy(swsusp_header.sig,swsusp_header.orig_sig,10); + + /* + * Reset swap signature now. + */ + error = bio_write_page(0,&swsusp_header); + } else { + pr_debug(KERN_ERR "swsusp: Invalid partition type.\n"); + return -EINVAL; } - MDELAY(1000); + if (!error) + pr_debug("swsusp: Signature found, resuming\n"); return error; } + +int __init verify(void) +{ + int error; + + if (!(error = check_sig())) + error = check_header(); + return error; +} + + /** - * software_resume - Resume from a saved image. - * - * Called as a late_initcall (so all devices are discovered and - * initialized), we call swsusp to see if we have a saved image or not. - * If so, we quiesce devices, then restore the saved image. We will - * return above (in pm_suspend_disk() ) if everything goes well. - * Otherwise, we fail gracefully and return to the normally - * scheduled program. + * swsusp_read_data - Read image pages from swap. * + * You do not need to check for overlaps, check_pagedir() + * already did that. */ -static int __init software_resume(void) -{ - if (num_online_cpus() > 1) { - printk(KERN_WARNING "Software Suspend has malfunctioning SMP support. Disabled :(\n"); - return -EINVAL; - } - /* We enable the possibility of machine suspend */ - software_suspend_enabled = 1; - if (!resume_status) - return 0; - printk( "%s", name_resume ); - if (resume_status == NORESUME) { - if(resume_file[0]) - read_suspend_image(resume_file, 1); - printk( "disabled\n" ); - return 0; - } - MDELAY(1000); +static int __init data_read(void) +{ + struct pbe * p; + int error; + int i; - if (pm_prepare_console()) - printk("swsusp: Can't allocate a console... proceeding\n"); + if ((error = swsusp_pagedir_relocate())) + return error; - if (!resume_file[0] && resume_status == RESUME_SPECIFIED) { - printk( "suspension device unspecified\n" ); - return -EINVAL; + printk( "Reading image data (%d pages): ", nr_copy_pages ); + for(i = 0, p = pagedir_nosave; i < nr_copy_pages && !error; i++, p++) { + if (!(i%100)) + printk( "." ); + error = bio_read_page(swp_offset(p->swap_address), + (void *)p->address); } + printk(" %d done.\n",i); + return error; - printk( "resuming from %s\n", resume_file); - if (read_suspend_image(resume_file, 0)) - goto read_failure; - /* FIXME: Should we stop processes here, just to be safer? */ - disable_nonboot_cpus(); - device_suspend(3); - do_magic(1); - panic("This never returns"); - -read_failure: - pm_restore_console(); - return 0; } -late_initcall(software_resume); +extern dev_t __init name_to_dev_t(const char *line); -static int __init resume_setup(char *str) +static int __init read_pagedir(void) { - if (resume_status == NORESUME) - return 1; + unsigned long addr; + int i, n = swsusp_info.pagedir_pages; + int error = 0; - strncpy( resume_file, str, 255 ); - resume_status = RESUME_SPECIFIED; + pagedir_order = get_bitmask_order(n); - return 1; + addr =__get_free_pages(GFP_ATOMIC, pagedir_order); + if (!addr) + return -ENOMEM; + pagedir_nosave = (struct pbe *)addr; + + pr_debug("pmdisk: Reading pagedir (%d Pages)\n",n); + + for (i = 0; i < n && !error; i++, addr += PAGE_SIZE) { + unsigned long offset = swp_offset(swsusp_info.pagedir[i]); + if (offset) + error = bio_read_page(offset, (void *)addr); + else + error = -EFAULT; + } + if (error) + free_pages((unsigned long)pagedir_nosave,pagedir_order); + return error; } -static int __init noresume_setup(char *str) +static int __init read_suspend_image(void) { - resume_status = NORESUME; - return 1; + int error = 0; + + if ((error = verify())) + return error; + if ((error = read_pagedir())) + return error; + if ((error = data_read())) { + free_pages((unsigned long)pagedir_nosave,pagedir_order); + } + return error; } -__setup("noresume", noresume_setup); -__setup("resume=", resume_setup); +/** + * pmdisk_read - Read saved image from swap. + */ + +int __init swsusp_read(void) +{ + int error; + + if (!strlen(resume_file)) + return -ENOENT; -EXPORT_SYMBOL(software_suspend); -EXPORT_SYMBOL(software_suspend_enabled); + resume_device = name_to_dev_t(resume_file); + pr_debug("swsusp: Resume From Partition: %s\n", resume_file); + + resume_bdev = open_by_devnum(resume_device, FMODE_READ); + if (!IS_ERR(resume_bdev)) { + set_blocksize(resume_bdev, PAGE_SIZE); + error = read_suspend_image(); + blkdev_put(resume_bdev); + } else + error = PTR_ERR(resume_bdev); + + if (!error) + pr_debug("Reading resume file was successful\n"); + else + pr_debug("pmdisk: Error %d resuming\n", error); + return error; +}