From 49c3df6aaa6a51071fc135273d1a2515d019099f Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 2 May 2007 19:27:07 +0200 Subject: [PATCH] x86: Move swsusp __pa() dependent code to arch portion o __pa() should be used only on kernel linearly mapped virtual addresses and not on kernel text and data addresses. o Hibernation code needs to determine the physical address associated with kernel symbol to mark a section boundary which contains pages which don't have to be saved and restored during hibernate/resume operation. o Move this piece of code in arch dependent section. So that architectures which don't have kernel text/data mapped into kernel linearly mapped region can come up with their own ways of determining physical addresses associated with a kernel text. Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen --- kernel/power/power.h | 5 ++--- kernel/power/snapshot.c | 11 ----------- 2 files changed, 2 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/power/power.h b/kernel/power/power.h index eb461b816bf4..1c6eef8df4ad 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -23,6 +23,8 @@ static inline int pm_suspend_disk(void) } #endif +extern int pfn_is_nosave(unsigned long); + extern struct mutex pm_mutex; #define power_attr(_name) \ @@ -37,9 +39,6 @@ static struct subsys_attribute _name##_attr = { \ extern struct subsystem power_subsys; -/* References to section boundaries */ -extern const void __nosave_begin, __nosave_end; - /* Preferred image size in bytes (default 500 MB) */ extern unsigned long image_size; extern int in_suspend; diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index fc53ad068128..704c25a3ffec 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -650,17 +650,6 @@ static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; } static inline unsigned int count_highmem_pages(void) { return 0; } #endif /* CONFIG_HIGHMEM */ -/** - * pfn_is_nosave - check if given pfn is in the 'nosave' section - */ - -static inline int pfn_is_nosave(unsigned long pfn) -{ - unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; - unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT; - return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); -} - /** * saveable - Determine whether a non-highmem page should be included in * the suspend image. -- cgit 1.4.1 From 1b29c1643c0d82512477ccd97dc290198fe23e22 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 2 May 2007 19:27:07 +0200 Subject: [PATCH] x86-64: do not use virt_to_page on kernel data address o virt_to_page() call should be used on kernel linear addresses and not on kernel text and data addresses. Swsusp code uses it on kernel data (statically allocated swsusp_header). o Allocate swsusp_header dynamically so that virt_to_page() can be used safely. o I am changing this because in next few patches, __pa() on x86_64 will no longer support kernel text and data addresses and hibernation breaks. Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen --- kernel/power/swap.c | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 3581f8f86acd..b18c155cbb60 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -33,12 +33,14 @@ extern char resume_file[]; #define SWSUSP_SIG "S1SUSPEND" -static struct swsusp_header { +struct swsusp_header { char reserved[PAGE_SIZE - 20 - sizeof(sector_t)]; sector_t image; char orig_sig[10]; char sig[10]; -} __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header; +} __attribute__((packed)); + +static struct swsusp_header *swsusp_header; /* * General things @@ -141,14 +143,14 @@ static int mark_swapfiles(sector_t start) { int error; - bio_read_page(swsusp_resume_block, &swsusp_header, NULL); - if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) || - !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { - memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); - memcpy(swsusp_header.sig,SWSUSP_SIG, 10); - swsusp_header.image = start; + bio_read_page(swsusp_resume_block, swsusp_header, NULL); + if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) || + !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) { + memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); + memcpy(swsusp_header->sig,SWSUSP_SIG, 10); + swsusp_header->image = start; error = bio_write_page(swsusp_resume_block, - &swsusp_header, NULL); + swsusp_header, NULL); } else { printk(KERN_ERR "swsusp: Swap header not found!\n"); error = -ENODEV; @@ -564,7 +566,7 @@ int swsusp_read(void) if (error < PAGE_SIZE) return error < 0 ? error : -EFAULT; header = (struct swsusp_info *)data_of(snapshot); - error = get_swap_reader(&handle, swsusp_header.image); + error = get_swap_reader(&handle, swsusp_header->image); if (!error) error = swap_read_page(&handle, header, NULL); if (!error) @@ -591,17 +593,17 @@ int swsusp_check(void) resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); if (!IS_ERR(resume_bdev)) { set_blocksize(resume_bdev, PAGE_SIZE); - memset(&swsusp_header, 0, sizeof(swsusp_header)); + memset(swsusp_header, 0, sizeof(PAGE_SIZE)); error = bio_read_page(swsusp_resume_block, - &swsusp_header, NULL); + swsusp_header, NULL); if (error) return error; - if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { - memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); + if (!memcmp(SWSUSP_SIG, swsusp_header->sig, 10)) { + memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); /* Reset swap signature now */ error = bio_write_page(swsusp_resume_block, - &swsusp_header, NULL); + swsusp_header, NULL); } else { return -EINVAL; } @@ -632,3 +634,13 @@ void swsusp_close(void) blkdev_put(resume_bdev); } + +static int swsusp_header_init(void) +{ + swsusp_header = (struct swsusp_header*) __get_free_page(GFP_KERNEL); + if (!swsusp_header) + panic("Could not allocate memory for swsusp_header\n"); + return 0; +} + +core_initcall(swsusp_header_init); -- cgit 1.4.1 From b00742d399513a4100c24cc2accefdc1bb1e0b15 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:11 +0200 Subject: [PATCH] x86-64: Account for module percpu space separately from kernel percpu Rather than using a single constant PERCPU_ENOUGH_ROOM, compute it as the sum of kernel_percpu + PERCPU_MODULE_RESERVE. This is now common to all architectures; if an architecture wants to set PERCPU_ENOUGH_ROOM to something special, then it may do so (ia64 is the only one which does). Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Eric W. Biederman Cc: Andi Kleen --- include/asm-alpha/percpu.h | 14 -------------- include/asm-sparc64/percpu.h | 10 ---------- include/asm-x86_64/percpu.h | 10 ---------- include/linux/percpu.h | 9 ++++++++- kernel/module.c | 2 +- 5 files changed, 9 insertions(+), 36 deletions(-) (limited to 'kernel') diff --git a/include/asm-alpha/percpu.h b/include/asm-alpha/percpu.h index 651ebb141b24..48348fe34c19 100644 --- a/include/asm-alpha/percpu.h +++ b/include/asm-alpha/percpu.h @@ -1,20 +1,6 @@ #ifndef __ALPHA_PERCPU_H #define __ALPHA_PERCPU_H -/* - * Increase the per cpu area for Alpha so that - * modules using percpu area can load. - */ -#ifdef CONFIG_MODULES -# define PERCPU_MODULE_RESERVE 8192 -#else -# define PERCPU_MODULE_RESERVE 0 -#endif - -#define PERCPU_ENOUGH_ROOM \ - (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \ - PERCPU_MODULE_RESERVE) - #include #endif /* __ALPHA_PERCPU_H */ diff --git a/include/asm-sparc64/percpu.h b/include/asm-sparc64/percpu.h index 0d3df76aa47f..ced8cbde046d 100644 --- a/include/asm-sparc64/percpu.h +++ b/include/asm-sparc64/percpu.h @@ -5,16 +5,6 @@ #ifdef CONFIG_SMP -#ifdef CONFIG_MODULES -# define PERCPU_MODULE_RESERVE 8192 -#else -# define PERCPU_MODULE_RESERVE 0 -#endif - -#define PERCPU_ENOUGH_ROOM \ - (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \ - PERCPU_MODULE_RESERVE) - extern void setup_per_cpu_areas(void); extern unsigned long __per_cpu_base; diff --git a/include/asm-x86_64/percpu.h b/include/asm-x86_64/percpu.h index 5ed0ef340842..c6fbb67eac90 100644 --- a/include/asm-x86_64/percpu.h +++ b/include/asm-x86_64/percpu.h @@ -11,16 +11,6 @@ #include -#ifdef CONFIG_MODULES -# define PERCPU_MODULE_RESERVE 8192 -#else -# define PERCPU_MODULE_RESERVE 0 -#endif - -#define PERCPU_ENOUGH_ROOM \ - (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \ - PERCPU_MODULE_RESERVE) - #define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) #define __my_cpu_offset() read_pda(data_offset) diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 600e3d387ffc..b72be2f79e6a 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -11,9 +11,16 @@ /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ #ifndef PERCPU_ENOUGH_ROOM -#define PERCPU_ENOUGH_ROOM 32768 +#ifdef CONFIG_MODULES +#define PERCPU_MODULE_RESERVE 8192 +#else +#define PERCPU_MODULE_RESERVE 0 #endif +#define PERCPU_ENOUGH_ROOM \ + (__per_cpu_end - __per_cpu_start + PERCPU_MODULE_RESERVE) +#endif /* PERCPU_ENOUGH_ROOM */ + /* * Must be an lvalue. Since @var must be a simple identifier, * we force a syntax error here if it isn't. diff --git a/kernel/module.c b/kernel/module.c index 9da5af668a20..cf49ca25fcce 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -430,7 +430,7 @@ static int percpu_modinit(void) pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated, GFP_KERNEL); /* Static in-kernel percpu data (used). */ - pcpu_size[0] = -ALIGN(__per_cpu_end-__per_cpu_start, SMP_CACHE_BYTES); + pcpu_size[0] = -(__per_cpu_end-__per_cpu_start); /* Free room. */ pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0]; if (pcpu_size[1] < 0) { -- cgit 1.4.1 From b6e3590f8145c77b8fcef3247e2412335221412f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:12 +0200 Subject: [PATCH] x86: Allow percpu variables to be page-aligned Let's allow page-alignment in general for per-cpu data (wanted by Xen, and Ingo suggested KVM as well). Because larger alignments can use more room, we increase the max per-cpu memory to 64k rather than 32k: it's getting a little tight. Signed-off-by: Rusty Russell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Acked-by: Ingo Molnar Cc: Andi Kleen Signed-off-by: Andrew Morton --- arch/alpha/kernel/vmlinux.lds.S | 2 +- arch/arm/kernel/vmlinux.lds.S | 2 +- arch/cris/arch-v32/vmlinux.lds.S | 1 + arch/frv/kernel/vmlinux.lds.S | 1 + arch/i386/kernel/vmlinux.lds.S | 2 +- arch/m32r/kernel/vmlinux.lds.S | 2 +- arch/mips/kernel/vmlinux.lds.S | 2 +- arch/parisc/kernel/vmlinux.lds.S | 2 +- arch/powerpc/kernel/setup_64.c | 4 ++-- arch/powerpc/kernel/vmlinux.lds.S | 6 +----- arch/ppc/kernel/vmlinux.lds.S | 2 +- arch/s390/kernel/vmlinux.lds.S | 2 +- arch/sh/kernel/vmlinux.lds.S | 2 +- arch/sh64/kernel/vmlinux.lds.S | 2 +- arch/sparc/kernel/vmlinux.lds.S | 2 +- arch/sparc64/kernel/smp.c | 6 +++--- arch/x86_64/kernel/setup64.c | 4 ++-- arch/x86_64/kernel/vmlinux.lds.S | 2 +- arch/xtensa/kernel/vmlinux.lds.S | 2 +- init/main.c | 8 ++------ kernel/module.c | 8 ++++---- 21 files changed, 29 insertions(+), 35 deletions(-) (limited to 'kernel') diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S index 4cc44bd33d33..cf1e6fc6c686 100644 --- a/arch/alpha/kernel/vmlinux.lds.S +++ b/arch/alpha/kernel/vmlinux.lds.S @@ -69,7 +69,7 @@ SECTIONS . = ALIGN(8); SECURITY_INIT - . = ALIGN(64); + . = ALIGN(8192); __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index ddbdad48f5b2..d1a6a597ed9a 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -59,7 +59,7 @@ SECTIONS usr/built-in.o(.init.ramfs) __initramfs_end = .; #endif - . = ALIGN(64); + . = ALIGN(4096); __per_cpu_start = .; *(.data.percpu) __per_cpu_end = .; diff --git a/arch/cris/arch-v32/vmlinux.lds.S b/arch/cris/arch-v32/vmlinux.lds.S index e124fcd766d5..dfa25e1542b9 100644 --- a/arch/cris/arch-v32/vmlinux.lds.S +++ b/arch/cris/arch-v32/vmlinux.lds.S @@ -91,6 +91,7 @@ SECTIONS } SECURITY_INIT + . = ALIGN (8192); __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S index 97910e016825..28eae9735ad6 100644 --- a/arch/frv/kernel/vmlinux.lds.S +++ b/arch/frv/kernel/vmlinux.lds.S @@ -57,6 +57,7 @@ SECTIONS __alt_instructions_end = .; .altinstr_replacement : { *(.altinstr_replacement) } + . = ALIGN(4096); __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index f4ec72231835..97fe6eac47c9 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -194,7 +194,7 @@ SECTIONS __initramfs_end = .; } #endif - . = ALIGN(L1_CACHE_BYTES); + . = ALIGN(4096); .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { __per_cpu_start = .; *(.data.percpu) diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S index 439cc257cd1d..6c73bca3f478 100644 --- a/arch/m32r/kernel/vmlinux.lds.S +++ b/arch/m32r/kernel/vmlinux.lds.S @@ -110,7 +110,7 @@ SECTIONS __initramfs_end = .; #endif - . = ALIGN(32); + . = ALIGN(4096); __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index c76b793310c2..043f637e3d10 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -119,7 +119,7 @@ SECTIONS .init.ramfs : { *(.init.ramfs) } __initramfs_end = .; #endif - . = ALIGN(32); + . = ALIGN(_PAGE_SIZE); __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 2a8253358c6c..c74585990598 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -181,7 +181,7 @@ SECTIONS .init.ramfs : { *(.init.ramfs) } __initramfs_end = .; #endif - . = ALIGN(32); + . = ALIGN(ASM_PAGE_SIZE); __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 22083ce3cc30..6018178708a5 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -582,14 +582,14 @@ void __init setup_per_cpu_areas(void) char *ptr; /* Copy section for each CPU (we discard the original) */ - size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); + size = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE); #ifdef CONFIG_MODULES if (size < PERCPU_ENOUGH_ROOM) size = PERCPU_ENOUGH_ROOM; #endif for_each_possible_cpu(i) { - ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size); + ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size); if (!ptr) panic("Cannot allocate cpu data for CPU %d\n", i); diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 7eefeb4a30e7..132067313147 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -139,11 +139,7 @@ SECTIONS __initramfs_end = .; } #endif -#ifdef CONFIG_PPC32 - . = ALIGN(32); -#else - . = ALIGN(128); -#endif + . = ALIGN(PAGE_SIZE); .data.percpu : { __per_cpu_start = .; *(.data.percpu) diff --git a/arch/ppc/kernel/vmlinux.lds.S b/arch/ppc/kernel/vmlinux.lds.S index a0625562a44b..44cd128fb719 100644 --- a/arch/ppc/kernel/vmlinux.lds.S +++ b/arch/ppc/kernel/vmlinux.lds.S @@ -130,7 +130,7 @@ SECTIONS __ftr_fixup : { *(__ftr_fixup) } __stop___ftr_fixup = .; - . = ALIGN(32); + . = ALIGN(4096); __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 418f6426a949..e9d3432aba60 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -107,7 +107,7 @@ SECTIONS . = ALIGN(2); __initramfs_end = .; #endif - . = ALIGN(256); + . = ALIGN(4096); __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S index 78a6c09875b2..2f606d0ce1f6 100644 --- a/arch/sh/kernel/vmlinux.lds.S +++ b/arch/sh/kernel/vmlinux.lds.S @@ -54,7 +54,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); .data.page_aligned : { *(.data.page_aligned) } - . = ALIGN(L1_CACHE_BYTES); + . = ALIGN(PAGE_SIZE); __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/sh64/kernel/vmlinux.lds.S b/arch/sh64/kernel/vmlinux.lds.S index a59c5e998131..4f9616f39830 100644 --- a/arch/sh64/kernel/vmlinux.lds.S +++ b/arch/sh64/kernel/vmlinux.lds.S @@ -85,7 +85,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); .data.page_aligned : C_PHYS(.data.page_aligned) { *(.data.page_aligned) } - . = ALIGN(L1_CACHE_BYTES); + . = ALIGN(PAGE_SIZE); __per_cpu_start = .; .data.percpu : C_PHYS(.data.percpu) { *(.data.percpu) } __per_cpu_end = . ; diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index e5c24e0521de..f0bb6e60e620 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -65,7 +65,7 @@ SECTIONS __initramfs_end = .; #endif - . = ALIGN(32); + . = ALIGN(4096); __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index d4f0a70f4845..1fac215252e4 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -1343,11 +1343,11 @@ void __init setup_per_cpu_areas(void) /* Copy section for each CPU (we discard the original) */ goal = PERCPU_ENOUGH_ROOM; - __per_cpu_shift = 0; - for (size = 1UL; size < goal; size <<= 1UL) + __per_cpu_shift = PAGE_SHIFT; + for (size = PAGE_SIZE; size < goal; size <<= 1UL) __per_cpu_shift++; - ptr = alloc_bootmem(size * NR_CPUS); + ptr = alloc_bootmem_pages(size * NR_CPUS); __per_cpu_base = ptr - __per_cpu_start; diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 53064a9a365f..64379a80d763 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -103,9 +103,9 @@ void __init setup_per_cpu_areas(void) if (!NODE_DATA(cpu_to_node(i))) { printk("cpu with no node %d, num_online_nodes %d\n", i, num_online_nodes()); - ptr = alloc_bootmem(size); + ptr = alloc_bootmem_pages(size); } else { - ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size); + ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size); } if (!ptr) panic("Cannot allocate cpu data for CPU %d\n", i); diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 3bdeb88d28f4..7ef0b8820cd2 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -195,7 +195,7 @@ SECTIONS __initramfs_end = .; #endif - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + . = ALIGN(4096); __per_cpu_start = .; .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index ab6370054cee..4fbd66a52a88 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -198,7 +198,7 @@ SECTIONS __ftr_fixup : { *(__ftr_fixup) } __stop___ftr_fixup = .; - . = ALIGN(32); + . = ALIGN(4096); __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; diff --git a/init/main.c b/init/main.c index a92989e7836a..80f09f31bfab 100644 --- a/init/main.c +++ b/init/main.c @@ -369,12 +369,8 @@ static void __init setup_per_cpu_areas(void) unsigned long nr_possible_cpus = num_possible_cpus(); /* Copy section for each CPU (we discard the original) */ - size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); -#ifdef CONFIG_MODULES - if (size < PERCPU_ENOUGH_ROOM) - size = PERCPU_ENOUGH_ROOM; -#endif - ptr = alloc_bootmem(size * nr_possible_cpus); + size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE); + ptr = alloc_bootmem_pages(size * nr_possible_cpus); for_each_possible_cpu(i) { __per_cpu_offset[i] = ptr - __per_cpu_start; diff --git a/kernel/module.c b/kernel/module.c index cf49ca25fcce..4dc4a257545c 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -346,10 +346,10 @@ static void *percpu_modalloc(unsigned long size, unsigned long align, unsigned int i; void *ptr; - if (align > SMP_CACHE_BYTES) { - printk(KERN_WARNING "%s: per-cpu alignment %li > %i\n", - name, align, SMP_CACHE_BYTES); - align = SMP_CACHE_BYTES; + if (align > PAGE_SIZE) { + printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", + name, align, PAGE_SIZE); + align = PAGE_SIZE; } ptr = __per_cpu_start; -- cgit 1.4.1 From d6dd61c831226f9cd7750885da04d360d6455101 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:14 +0200 Subject: [PATCH] x86: PARAVIRT: add hooks to intercept mm creation and destruction Add hooks to allow a paravirt implementation to track the lifetime of an mm. Paravirtualization requires three hooks, but only two are needed in common code. They are: arch_dup_mmap, which is called when a new mmap is created at fork arch_exit_mmap, which is called when the last process reference to an mm is dropped, which typically happens on exit and exec. The third hook is activate_mm, which is called from the arch-specific activate_mm() macro/function, and so doesn't need stub versions for other architectures. It's called when an mm is first used. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: linux-arch@vger.kernel.org Cc: James Bottomley Acked-by: Ingo Molnar --- arch/i386/kernel/paravirt.c | 4 ++++ include/asm-alpha/mmu_context.h | 1 + include/asm-arm/mmu_context.h | 1 + include/asm-arm26/mmu_context.h | 2 ++ include/asm-avr32/mmu_context.h | 1 + include/asm-cris/mmu_context.h | 2 ++ include/asm-frv/mmu_context.h | 1 + include/asm-generic/mm_hooks.h | 18 ++++++++++++++++++ include/asm-h8300/mmu_context.h | 1 + include/asm-i386/mmu_context.h | 17 +++++++++++++++-- include/asm-i386/paravirt.h | 23 +++++++++++++++++++++++ include/asm-ia64/mmu_context.h | 1 + include/asm-m32r/mmu_context.h | 1 + include/asm-m68k/mmu_context.h | 1 + include/asm-m68knommu/mmu_context.h | 1 + include/asm-mips/mmu_context.h | 1 + include/asm-parisc/mmu_context.h | 1 + include/asm-powerpc/mmu_context.h | 1 + include/asm-ppc/mmu_context.h | 1 + include/asm-s390/mmu_context.h | 2 ++ include/asm-sh/mmu_context.h | 1 + include/asm-sh64/mmu_context.h | 2 +- include/asm-sparc/mmu_context.h | 2 ++ include/asm-sparc64/mmu_context.h | 1 + include/asm-um/mmu_context.h | 2 ++ include/asm-v850/mmu_context.h | 2 ++ include/asm-x86_64/mmu_context.h | 1 + include/asm-xtensa/mmu_context.h | 1 + kernel/fork.c | 2 ++ mm/mmap.c | 4 ++++ 30 files changed, 96 insertions(+), 3 deletions(-) create mode 100644 include/asm-generic/mm_hooks.h (limited to 'kernel') diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index 2040a831d5b3..54cc14d99740 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c @@ -237,6 +237,10 @@ struct paravirt_ops paravirt_ops = { .irq_enable_sysexit = native_irq_enable_sysexit, .iret = native_iret, + .dup_mmap = paravirt_nop, + .exit_mmap = paravirt_nop, + .activate_mm = paravirt_nop, + .startup_ipi_hook = paravirt_nop, }; diff --git a/include/asm-alpha/mmu_context.h b/include/asm-alpha/mmu_context.h index fe249e9d3360..0bd7bd2ccb90 100644 --- a/include/asm-alpha/mmu_context.h +++ b/include/asm-alpha/mmu_context.h @@ -10,6 +10,7 @@ #include #include #include +#include /* * Force a context reload. This is needed when we change the page diff --git a/include/asm-arm/mmu_context.h b/include/asm-arm/mmu_context.h index d1a65b1edcaa..f8755c818b54 100644 --- a/include/asm-arm/mmu_context.h +++ b/include/asm-arm/mmu_context.h @@ -16,6 +16,7 @@ #include #include #include +#include void __check_kvm_seq(struct mm_struct *mm); diff --git a/include/asm-arm26/mmu_context.h b/include/asm-arm26/mmu_context.h index 1a929bfe5c3a..16c821f81b8d 100644 --- a/include/asm-arm26/mmu_context.h +++ b/include/asm-arm26/mmu_context.h @@ -13,6 +13,8 @@ #ifndef __ASM_ARM_MMU_CONTEXT_H #define __ASM_ARM_MMU_CONTEXT_H +#include + #define init_new_context(tsk,mm) 0 #define destroy_context(mm) do { } while(0) diff --git a/include/asm-avr32/mmu_context.h b/include/asm-avr32/mmu_context.h index 31add1ae8089..c37c391faef6 100644 --- a/include/asm-avr32/mmu_context.h +++ b/include/asm-avr32/mmu_context.h @@ -15,6 +15,7 @@ #include #include #include +#include /* * The MMU "context" consists of two things: diff --git a/include/asm-cris/mmu_context.h b/include/asm-cris/mmu_context.h index e6e659dc757b..72ba08dcfd18 100644 --- a/include/asm-cris/mmu_context.h +++ b/include/asm-cris/mmu_context.h @@ -1,6 +1,8 @@ #ifndef __CRIS_MMU_CONTEXT_H #define __CRIS_MMU_CONTEXT_H +#include + extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm); extern void get_mmu_context(struct mm_struct *mm); extern void destroy_context(struct mm_struct *mm); diff --git a/include/asm-frv/mmu_context.h b/include/asm-frv/mmu_context.h index 72edcaaccd5d..c7daa395156a 100644 --- a/include/asm-frv/mmu_context.h +++ b/include/asm-frv/mmu_context.h @@ -15,6 +15,7 @@ #include #include #include +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h new file mode 100644 index 000000000000..67dea8123683 --- /dev/null +++ b/include/asm-generic/mm_hooks.h @@ -0,0 +1,18 @@ +/* + * Define generic no-op hooks for arch_dup_mmap and arch_exit_mmap, to + * be included in asm-FOO/mmu_context.h for any arch FOO which doesn't + * need to hook these. + */ +#ifndef _ASM_GENERIC_MM_HOOKS_H +#define _ASM_GENERIC_MM_HOOKS_H + +static inline void arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) +{ +} + +static inline void arch_exit_mmap(struct mm_struct *mm) +{ +} + +#endif /* _ASM_GENERIC_MM_HOOKS_H */ diff --git a/include/asm-h8300/mmu_context.h b/include/asm-h8300/mmu_context.h index 5c165f7bee0e..f44b730da54d 100644 --- a/include/asm-h8300/mmu_context.h +++ b/include/asm-h8300/mmu_context.h @@ -4,6 +4,7 @@ #include #include #include +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { diff --git a/include/asm-i386/mmu_context.h b/include/asm-i386/mmu_context.h index e6aa30f8de5b..8198d1cca1f3 100644 --- a/include/asm-i386/mmu_context.h +++ b/include/asm-i386/mmu_context.h @@ -5,6 +5,16 @@ #include #include #include +#include +#ifndef CONFIG_PARAVIRT +#include + +static inline void paravirt_activate_mm(struct mm_struct *prev, + struct mm_struct *next) +{ +} +#endif /* !CONFIG_PARAVIRT */ + /* * Used for LDT copy/destruction. @@ -65,7 +75,10 @@ static inline void switch_mm(struct mm_struct *prev, #define deactivate_mm(tsk, mm) \ asm("movl %0,%%gs": :"r" (0)); -#define activate_mm(prev, next) \ - switch_mm((prev),(next),NULL) +#define activate_mm(prev, next) \ + do { \ + paravirt_activate_mm(prev, next); \ + switch_mm((prev),(next),NULL); \ + } while(0); #endif diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index f93599dc7756..61c03f1e0c29 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -119,6 +119,12 @@ struct paravirt_ops void (*io_delay)(void); + void (*activate_mm)(struct mm_struct *prev, + struct mm_struct *next); + void (*dup_mmap)(struct mm_struct *oldmm, + struct mm_struct *mm); + void (*exit_mmap)(struct mm_struct *mm); + #ifdef CONFIG_X86_LOCAL_APIC void (*apic_write)(unsigned long reg, unsigned long v); void (*apic_write_atomic)(unsigned long reg, unsigned long v); @@ -395,6 +401,23 @@ static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, } #endif +static inline void paravirt_activate_mm(struct mm_struct *prev, + struct mm_struct *next) +{ + paravirt_ops.activate_mm(prev, next); +} + +static inline void arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) +{ + paravirt_ops.dup_mmap(oldmm, mm); +} + +static inline void arch_exit_mmap(struct mm_struct *mm) +{ + paravirt_ops.exit_mmap(mm); +} + #define __flush_tlb() paravirt_ops.flush_tlb_user() #define __flush_tlb_global() paravirt_ops.flush_tlb_kernel() #define __flush_tlb_single(addr) paravirt_ops.flush_tlb_single(addr) diff --git a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h index b5c65081a3aa..cef2400983fa 100644 --- a/include/asm-ia64/mmu_context.h +++ b/include/asm-ia64/mmu_context.h @@ -29,6 +29,7 @@ #include #include +#include struct ia64_ctx { spinlock_t lock; diff --git a/include/asm-m32r/mmu_context.h b/include/asm-m32r/mmu_context.h index 1f40d4a0acf1..91909e5dd9d0 100644 --- a/include/asm-m32r/mmu_context.h +++ b/include/asm-m32r/mmu_context.h @@ -15,6 +15,7 @@ #include #include #include +#include /* * Cache of MMU context last used. diff --git a/include/asm-m68k/mmu_context.h b/include/asm-m68k/mmu_context.h index 231d11bd8e32..894dacbcee14 100644 --- a/include/asm-m68k/mmu_context.h +++ b/include/asm-m68k/mmu_context.h @@ -1,6 +1,7 @@ #ifndef __M68K_MMU_CONTEXT_H #define __M68K_MMU_CONTEXT_H +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { diff --git a/include/asm-m68knommu/mmu_context.h b/include/asm-m68knommu/mmu_context.h index 6c077d3a2572..9ccee4278c97 100644 --- a/include/asm-m68knommu/mmu_context.h +++ b/include/asm-m68knommu/mmu_context.h @@ -4,6 +4,7 @@ #include #include #include +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { diff --git a/include/asm-mips/mmu_context.h b/include/asm-mips/mmu_context.h index fe065d6070ca..65024ffd7879 100644 --- a/include/asm-mips/mmu_context.h +++ b/include/asm-mips/mmu_context.h @@ -20,6 +20,7 @@ #include #include #endif /* SMTC */ +#include /* * For the fast tlb miss handlers, we keep a per cpu array of pointers diff --git a/include/asm-parisc/mmu_context.h b/include/asm-parisc/mmu_context.h index 9c05836239a2..bad690298f0c 100644 --- a/include/asm-parisc/mmu_context.h +++ b/include/asm-parisc/mmu_context.h @@ -5,6 +5,7 @@ #include #include #include +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { diff --git a/include/asm-powerpc/mmu_context.h b/include/asm-powerpc/mmu_context.h index 083ac917bd29..c0d7795e3d25 100644 --- a/include/asm-powerpc/mmu_context.h +++ b/include/asm-powerpc/mmu_context.h @@ -10,6 +10,7 @@ #include #include #include +#include /* * Copyright (C) 2001 PPC 64 Team, IBM Corp diff --git a/include/asm-ppc/mmu_context.h b/include/asm-ppc/mmu_context.h index 2bc8589cc451..a6441a063e5d 100644 --- a/include/asm-ppc/mmu_context.h +++ b/include/asm-ppc/mmu_context.h @@ -6,6 +6,7 @@ #include #include #include +#include /* * On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs diff --git a/include/asm-s390/mmu_context.h b/include/asm-s390/mmu_context.h index 1d21da220d49..501cb9b06314 100644 --- a/include/asm-s390/mmu_context.h +++ b/include/asm-s390/mmu_context.h @@ -10,6 +10,8 @@ #define __S390_MMU_CONTEXT_H #include +#include + /* * get a new mmu context.. S390 don't know about contexts. */ diff --git a/include/asm-sh/mmu_context.h b/include/asm-sh/mmu_context.h index 342024425b7d..01acaaae9751 100644 --- a/include/asm-sh/mmu_context.h +++ b/include/asm-sh/mmu_context.h @@ -12,6 +12,7 @@ #include #include #include +#include /* * The MMU "context" consists of two things: diff --git a/include/asm-sh64/mmu_context.h b/include/asm-sh64/mmu_context.h index 8c860dab2d0e..507bf72bb8e1 100644 --- a/include/asm-sh64/mmu_context.h +++ b/include/asm-sh64/mmu_context.h @@ -27,7 +27,7 @@ extern unsigned long mmu_context_cache; #include - +#include /* Current mm's pgd */ extern pgd_t *mmu_pdtp_cache; diff --git a/include/asm-sparc/mmu_context.h b/include/asm-sparc/mmu_context.h index ed1e01d04d21..671a997b9e69 100644 --- a/include/asm-sparc/mmu_context.h +++ b/include/asm-sparc/mmu_context.h @@ -5,6 +5,8 @@ #ifndef __ASSEMBLY__ +#include + static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h index 2337eb487719..8d129032013e 100644 --- a/include/asm-sparc64/mmu_context.h +++ b/include/asm-sparc64/mmu_context.h @@ -9,6 +9,7 @@ #include #include #include +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { diff --git a/include/asm-um/mmu_context.h b/include/asm-um/mmu_context.h index f709c784bf12..9aa4b44e8cc1 100644 --- a/include/asm-um/mmu_context.h +++ b/include/asm-um/mmu_context.h @@ -6,6 +6,8 @@ #ifndef __UM_MMU_CONTEXT_H #define __UM_MMU_CONTEXT_H +#include + #include "linux/sched.h" #include "choose-mode.h" #include "um_mmu.h" diff --git a/include/asm-v850/mmu_context.h b/include/asm-v850/mmu_context.h index f521c8050d3c..01daacd5474e 100644 --- a/include/asm-v850/mmu_context.h +++ b/include/asm-v850/mmu_context.h @@ -1,6 +1,8 @@ #ifndef __V850_MMU_CONTEXT_H__ #define __V850_MMU_CONTEXT_H__ +#include + #define destroy_context(mm) ((void)0) #define init_new_context(tsk,mm) 0 #define switch_mm(prev,next,tsk) ((void)0) diff --git a/include/asm-x86_64/mmu_context.h b/include/asm-x86_64/mmu_context.h index af03b9f852d6..0cce83a78378 100644 --- a/include/asm-x86_64/mmu_context.h +++ b/include/asm-x86_64/mmu_context.h @@ -7,6 +7,7 @@ #include #include #include +#include /* * possibly do the LDT unload here? diff --git a/include/asm-xtensa/mmu_context.h b/include/asm-xtensa/mmu_context.h index f14851f086c3..92f948392ebc 100644 --- a/include/asm-xtensa/mmu_context.h +++ b/include/asm-xtensa/mmu_context.h @@ -18,6 +18,7 @@ #include #include #include +#include #define XCHAL_MMU_ASID_BITS 8 diff --git a/kernel/fork.c b/kernel/fork.c index 6af959c034d8..ffccefb28b6a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -286,6 +286,8 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) if (retval) goto out; } + /* a new mm has just been created */ + arch_dup_mmap(oldmm, mm); retval = 0; out: up_write(&mm->mmap_sem); diff --git a/mm/mmap.c b/mm/mmap.c index 84f997da78d7..88da687bde89 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -29,6 +29,7 @@ #include #include #include +#include #ifndef arch_mmap_check #define arch_mmap_check(addr, len, flags) (0) @@ -1979,6 +1980,9 @@ void exit_mmap(struct mm_struct *mm) unsigned long nr_accounted = 0; unsigned long end; + /* mm's last user has gone, and its about to be pulled down */ + arch_exit_mmap(mm); + lru_add_drain(); flush_cache_mm(mm); tlb = tlb_gather_mmu(mm, 1); -- cgit 1.4.1 From 823bccfc4002296ba88c3ad0f049e1abd8108d30 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 13 Apr 2007 13:15:19 -0700 Subject: remove "struct subsystem" as it is no longer needed We need to work on cleaning up the relationship between kobjects, ksets and ktypes. The removal of 'struct subsystem' is the first step of this, especially as it is not really needed at all. Thanks to Kay for fixing the bugs in this patch. Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap1/pm.c | 6 +-- arch/powerpc/kernel/vio.c | 4 +- arch/powerpc/platforms/pseries/power.c | 8 +-- arch/s390/kernel/ipl.c | 32 ++++++------ block/genhd.c | 12 ++--- drivers/base/base.h | 2 + drivers/base/bus.c | 16 +++--- drivers/base/class.c | 18 +++---- drivers/base/core.c | 22 ++++---- drivers/base/firmware.c | 6 +-- drivers/base/power/shutdown.c | 4 +- drivers/base/sys.c | 14 ++--- drivers/firmware/efivars.c | 12 ++--- drivers/input/evdev.c | 4 +- drivers/input/joydev.c | 4 +- drivers/input/mousedev.c | 4 +- drivers/input/tsdev.c | 4 +- drivers/parisc/pdc_stable.c | 94 +++++++++++++++++----------------- drivers/pci/hotplug/acpiphp_ibm.c | 4 +- drivers/pci/hotplug/pci_hotplug_core.c | 4 +- fs/configfs/mount.c | 2 +- fs/debugfs/inode.c | 2 +- fs/dlm/lockspace.c | 2 +- fs/ecryptfs/main.c | 12 ++--- fs/fuse/inode.c | 4 +- fs/gfs2/locking/dlm/sysfs.c | 2 +- fs/gfs2/sys.c | 2 +- fs/ocfs2/cluster/masklog.c | 4 +- fs/ocfs2/cluster/masklog.h | 2 +- fs/ocfs2/cluster/sys.c | 7 ++- fs/partitions/check.c | 6 +-- fs/sysfs/file.c | 11 ++-- include/acpi/acpi_bus.h | 2 +- include/linux/device.h | 8 +-- include/linux/fs.h | 2 +- include/linux/kobject.h | 58 +++++++++------------ include/linux/module.h | 2 +- include/linux/pci_hotplug.h | 2 +- kernel/ksysfs.c | 12 ++--- kernel/module.c | 8 +-- kernel/params.c | 2 + kernel/power/disk.c | 14 ++--- kernel/power/main.c | 10 ++-- kernel/power/power.h | 2 +- lib/kobject.c | 69 +++---------------------- security/inode.c | 2 +- 46 files changed, 231 insertions(+), 292 deletions(-) (limited to 'kernel') diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c index 0383ab334270..6f4ea4bda5e0 100644 --- a/arch/arm/mach-omap1/pm.c +++ b/arch/arm/mach-omap1/pm.c @@ -72,12 +72,12 @@ static unsigned int mpui1610_sleep_save[MPUI1610_SLEEP_SAVE_SIZE]; static unsigned short enable_dyn_sleep = 1; -static ssize_t omap_pm_sleep_while_idle_show(struct subsystem * subsys, char *buf) +static ssize_t omap_pm_sleep_while_idle_show(struct kset *kset, char *buf) { return sprintf(buf, "%hu\n", enable_dyn_sleep); } -static ssize_t omap_pm_sleep_while_idle_store(struct subsystem * subsys, +static ssize_t omap_pm_sleep_while_idle_store(struct kset *kset, const char * buf, size_t n) { @@ -100,7 +100,7 @@ static struct subsys_attribute sleep_while_idle_attr = { .store = omap_pm_sleep_while_idle_store, }; -extern struct subsystem power_subsys; +extern struct kset power_subsys; static void (*omap_sram_idle)(void) = NULL; static void (*omap_sram_suspend)(unsigned long r0, unsigned long r1) = NULL; diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 9eaefac5053f..b2c1b67a10a7 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -37,7 +37,7 @@ #include #include -extern struct subsystem devices_subsys; /* needed for vio_find_name() */ +extern struct kset devices_subsys; /* needed for vio_find_name() */ static struct vio_dev vio_bus_device = { /* fake "parent" device */ .name = vio_bus_device.dev.bus_id, @@ -427,7 +427,7 @@ static struct vio_dev *vio_find_name(const char *kobj_name) { struct kobject *found; - found = kset_find_obj(&devices_subsys.kset, kobj_name); + found = kset_find_obj(&devices_subsys, kobj_name); if (!found) return NULL; diff --git a/arch/powerpc/platforms/pseries/power.c b/arch/powerpc/platforms/pseries/power.c index 2624b71df73d..73e69023d90a 100644 --- a/arch/powerpc/platforms/pseries/power.c +++ b/arch/powerpc/platforms/pseries/power.c @@ -28,13 +28,13 @@ unsigned long rtas_poweron_auto; /* default and normal state is 0 */ -static ssize_t auto_poweron_show(struct subsystem *subsys, char *buf) +static ssize_t auto_poweron_show(struct kset *kset, char *buf) { return sprintf(buf, "%lu\n", rtas_poweron_auto); } static ssize_t -auto_poweron_store(struct subsystem *subsys, const char *buf, size_t n) +auto_poweron_store(struct kset *kset, const char *buf, size_t n) { int ret; unsigned long ups_restart; @@ -72,12 +72,12 @@ static int __init pm_init(void) { int error = subsystem_register(&power_subsys); if (!error) - error = sysfs_create_group(&power_subsys.kset.kobj,&attr_group); + error = sysfs_create_group(&power_subsys.kobj, &attr_group); return error; } core_initcall(pm_init); #else -extern struct subsystem power_subsys; +extern struct kset power_subsys; static int __init apo_pm_init(void) { diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 06833ac2b115..0ea048d350d8 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -164,7 +164,7 @@ EXPORT_SYMBOL_GPL(diag308); /* SYSFS */ #define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value) \ -static ssize_t sys_##_prefix##_##_name##_show(struct subsystem *subsys, \ +static ssize_t sys_##_prefix##_##_name##_show(struct kset *kset, \ char *page) \ { \ return sprintf(page, _format, _value); \ @@ -173,13 +173,13 @@ static struct subsys_attribute sys_##_prefix##_##_name##_attr = \ __ATTR(_name, S_IRUGO, sys_##_prefix##_##_name##_show, NULL); #define DEFINE_IPL_ATTR_RW(_prefix, _name, _fmt_out, _fmt_in, _value) \ -static ssize_t sys_##_prefix##_##_name##_show(struct subsystem *subsys, \ +static ssize_t sys_##_prefix##_##_name##_show(struct kset *kset, \ char *page) \ { \ return sprintf(page, _fmt_out, \ (unsigned long long) _value); \ } \ -static ssize_t sys_##_prefix##_##_name##_store(struct subsystem *subsys,\ +static ssize_t sys_##_prefix##_##_name##_store(struct kset *kset, \ const char *buf, size_t len) \ { \ unsigned long long value; \ @@ -194,12 +194,12 @@ static struct subsys_attribute sys_##_prefix##_##_name##_attr = \ sys_##_prefix##_##_name##_store); #define DEFINE_IPL_ATTR_STR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)\ -static ssize_t sys_##_prefix##_##_name##_show(struct subsystem *subsys, \ +static ssize_t sys_##_prefix##_##_name##_show(struct kset *kset, \ char *page) \ { \ return sprintf(page, _fmt_out, _value); \ } \ -static ssize_t sys_##_prefix##_##_name##_store(struct subsystem *subsys,\ +static ssize_t sys_##_prefix##_##_name##_store(struct kset *kset, \ const char *buf, size_t len) \ { \ if (sscanf(buf, _fmt_in, _value) != 1) \ @@ -272,14 +272,14 @@ void __init setup_ipl_info(void) struct ipl_info ipl_info; EXPORT_SYMBOL_GPL(ipl_info); -static ssize_t ipl_type_show(struct subsystem *subsys, char *page) +static ssize_t ipl_type_show(struct kset *kset, char *page) { return sprintf(page, "%s\n", ipl_type_str(ipl_info.type)); } static struct subsys_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type); -static ssize_t sys_ipl_device_show(struct subsystem *subsys, char *page) +static ssize_t sys_ipl_device_show(struct kset *kset, char *page) { struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START; @@ -371,7 +371,7 @@ static struct attribute_group ipl_fcp_attr_group = { /* CCW ipl device attributes */ -static ssize_t ipl_ccw_loadparm_show(struct subsystem *subsys, char *page) +static ssize_t ipl_ccw_loadparm_show(struct kset *kset, char *page) { char loadparm[LOADPARM_LEN + 1] = {}; @@ -469,7 +469,7 @@ static void reipl_get_ascii_loadparm(char *loadparm) strstrip(loadparm); } -static ssize_t reipl_ccw_loadparm_show(struct subsystem *subsys, char *page) +static ssize_t reipl_ccw_loadparm_show(struct kset *kset, char *page) { char buf[LOADPARM_LEN + 1]; @@ -477,7 +477,7 @@ static ssize_t reipl_ccw_loadparm_show(struct subsystem *subsys, char *page) return sprintf(page, "%s\n", buf); } -static ssize_t reipl_ccw_loadparm_store(struct subsystem *subsys, +static ssize_t reipl_ccw_loadparm_store(struct kset *kset, const char *buf, size_t len) { int i, lp_len; @@ -572,12 +572,12 @@ static int reipl_set_type(enum ipl_type type) return 0; } -static ssize_t reipl_type_show(struct subsystem *subsys, char *page) +static ssize_t reipl_type_show(struct kset *kset, char *page) { return sprintf(page, "%s\n", ipl_type_str(reipl_type)); } -static ssize_t reipl_type_store(struct subsystem *subsys, const char *buf, +static ssize_t reipl_type_store(struct kset *kset, const char *buf, size_t len) { int rc = -EINVAL; @@ -665,12 +665,12 @@ static int dump_set_type(enum dump_type type) return 0; } -static ssize_t dump_type_show(struct subsystem *subsys, char *page) +static ssize_t dump_type_show(struct kset *kset, char *page) { return sprintf(page, "%s\n", dump_type_str(dump_type)); } -static ssize_t dump_type_store(struct subsystem *subsys, const char *buf, +static ssize_t dump_type_store(struct kset *kset, const char *buf, size_t len) { int rc = -EINVAL; @@ -697,12 +697,12 @@ static decl_subsys(shutdown_actions, NULL, NULL); /* on panic */ -static ssize_t on_panic_show(struct subsystem *subsys, char *page) +static ssize_t on_panic_show(struct kset *kset, char *page) { return sprintf(page, "%s\n", shutdown_action_str(on_panic_action)); } -static ssize_t on_panic_store(struct subsystem *subsys, const char *buf, +static ssize_t on_panic_store(struct kset *kset, const char *buf, size_t len) { if (strncmp(buf, SHUTDOWN_REIPL_STR, strlen(SHUTDOWN_REIPL_STR)) == 0) diff --git a/block/genhd.c b/block/genhd.c index 441432a142f2..b5664440896c 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -17,7 +17,7 @@ #include #include -struct subsystem block_subsys; +struct kset block_subsys; static DEFINE_MUTEX(block_subsys_lock); /* @@ -221,7 +221,7 @@ static void *part_start(struct seq_file *part, loff_t *pos) loff_t l = *pos; mutex_lock(&block_subsys_lock); - list_for_each(p, &block_subsys.kset.list) + list_for_each(p, &block_subsys.list) if (!l--) return list_entry(p, struct gendisk, kobj.entry); return NULL; @@ -231,7 +231,7 @@ static void *part_next(struct seq_file *part, void *v, loff_t *pos) { struct list_head *p = ((struct gendisk *)v)->kobj.entry.next; ++*pos; - return p==&block_subsys.kset.list ? NULL : + return p==&block_subsys.list ? NULL : list_entry(p, struct gendisk, kobj.entry); } @@ -246,7 +246,7 @@ static int show_partition(struct seq_file *part, void *v) int n; char buf[BDEVNAME_SIZE]; - if (&sgp->kobj.entry == block_subsys.kset.list.next) + if (&sgp->kobj.entry == block_subsys.list.next) seq_puts(part, "major minor #blocks name\n\n"); /* Don't show non-partitionable removeable devices or empty devices */ @@ -565,7 +565,7 @@ static void *diskstats_start(struct seq_file *part, loff_t *pos) struct list_head *p; mutex_lock(&block_subsys_lock); - list_for_each(p, &block_subsys.kset.list) + list_for_each(p, &block_subsys.list) if (!k--) return list_entry(p, struct gendisk, kobj.entry); return NULL; @@ -575,7 +575,7 @@ static void *diskstats_next(struct seq_file *part, void *v, loff_t *pos) { struct list_head *p = ((struct gendisk *)v)->kobj.entry.next; ++*pos; - return p==&block_subsys.kset.list ? NULL : + return p==&block_subsys.list ? NULL : list_entry(p, struct gendisk, kobj.entry); } diff --git a/drivers/base/base.h b/drivers/base/base.h index d597f2659b23..5512d84452f2 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -45,3 +45,5 @@ struct class_device_attribute *to_class_dev_attr(struct attribute *_attr) extern char *make_class_name(const char *name, struct kobject *kobj); extern void devres_release_all(struct device *dev); + +extern struct kset devices_subsys; diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 1d76e2349654..dca734819e50 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -17,7 +17,7 @@ #include "power/power.h" #define to_bus_attr(_attr) container_of(_attr, struct bus_attribute, attr) -#define to_bus(obj) container_of(obj, struct bus_type, subsys.kset.kobj) +#define to_bus(obj) container_of(obj, struct bus_type, subsys.kobj) /* * sysfs bindings for drivers @@ -123,7 +123,7 @@ int bus_create_file(struct bus_type * bus, struct bus_attribute * attr) { int error; if (get_bus(bus)) { - error = sysfs_create_file(&bus->subsys.kset.kobj, &attr->attr); + error = sysfs_create_file(&bus->subsys.kobj, &attr->attr); put_bus(bus); } else error = -EINVAL; @@ -133,7 +133,7 @@ int bus_create_file(struct bus_type * bus, struct bus_attribute * attr) void bus_remove_file(struct bus_type * bus, struct bus_attribute * attr) { if (get_bus(bus)) { - sysfs_remove_file(&bus->subsys.kset.kobj, &attr->attr); + sysfs_remove_file(&bus->subsys.kobj, &attr->attr); put_bus(bus); } } @@ -397,7 +397,7 @@ static void device_remove_attrs(struct bus_type * bus, struct device * dev) static int make_deprecated_bus_links(struct device *dev) { return sysfs_create_link(&dev->kobj, - &dev->bus->subsys.kset.kobj, "bus"); + &dev->bus->subsys.kobj, "bus"); } static void remove_deprecated_bus_links(struct device *dev) @@ -431,7 +431,7 @@ int bus_add_device(struct device * dev) if (error) goto out_id; error = sysfs_create_link(&dev->kobj, - &dev->bus->subsys.kset.kobj, "subsystem"); + &dev->bus->subsys.kobj, "subsystem"); if (error) goto out_subsys; error = make_deprecated_bus_links(dev); @@ -810,7 +810,7 @@ int bus_register(struct bus_type * bus) BLOCKING_INIT_NOTIFIER_HEAD(&bus->bus_notifier); - retval = kobject_set_name(&bus->subsys.kset.kobj, "%s", bus->name); + retval = kobject_set_name(&bus->subsys.kobj, "%s", bus->name); if (retval) goto out; @@ -820,13 +820,13 @@ int bus_register(struct bus_type * bus) goto out; kobject_set_name(&bus->devices.kobj, "devices"); - bus->devices.subsys = &bus->subsys; + bus->devices.kobj.parent = &bus->subsys.kobj; retval = kset_register(&bus->devices); if (retval) goto bus_devices_fail; kobject_set_name(&bus->drivers.kobj, "drivers"); - bus->drivers.subsys = &bus->subsys; + bus->drivers.kobj.parent = &bus->subsys.kobj; bus->drivers.ktype = &ktype_driver; retval = kset_register(&bus->drivers); if (retval) diff --git a/drivers/base/class.c b/drivers/base/class.c index 80bbb2074636..20c4ea6eb50d 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -19,10 +19,8 @@ #include #include "base.h" -extern struct subsystem devices_subsys; - #define to_class_attr(_attr) container_of(_attr, struct class_attribute, attr) -#define to_class(obj) container_of(obj, struct class, subsys.kset.kobj) +#define to_class(obj) container_of(obj, struct class, subsys.kobj) static ssize_t class_attr_show(struct kobject * kobj, struct attribute * attr, char * buf) @@ -80,7 +78,7 @@ int class_create_file(struct class * cls, const struct class_attribute * attr) { int error; if (cls) { - error = sysfs_create_file(&cls->subsys.kset.kobj, &attr->attr); + error = sysfs_create_file(&cls->subsys.kobj, &attr->attr); } else error = -EINVAL; return error; @@ -89,7 +87,7 @@ int class_create_file(struct class * cls, const struct class_attribute * attr) void class_remove_file(struct class * cls, const struct class_attribute * attr) { if (cls) - sysfs_remove_file(&cls->subsys.kset.kobj, &attr->attr); + sysfs_remove_file(&cls->subsys.kobj, &attr->attr); } static struct class *class_get(struct class *cls) @@ -147,7 +145,7 @@ int class_register(struct class * cls) INIT_LIST_HEAD(&cls->interfaces); kset_init(&cls->class_dirs); init_MUTEX(&cls->sem); - error = kobject_set_name(&cls->subsys.kset.kobj, "%s", cls->name); + error = kobject_set_name(&cls->subsys.kobj, "%s", cls->name); if (error) return error; @@ -611,7 +609,7 @@ int class_device_add(struct class_device *class_dev) if (parent_class_dev) class_dev->kobj.parent = &parent_class_dev->kobj; else - class_dev->kobj.parent = &parent_class->subsys.kset.kobj; + class_dev->kobj.parent = &parent_class->subsys.kobj; error = kobject_add(&class_dev->kobj); if (error) @@ -619,7 +617,7 @@ int class_device_add(struct class_device *class_dev) /* add the needed attributes to this device */ error = sysfs_create_link(&class_dev->kobj, - &parent_class->subsys.kset.kobj, "subsystem"); + &parent_class->subsys.kobj, "subsystem"); if (error) goto out3; class_dev->uevent_attr.attr.name = "uevent"; @@ -917,8 +915,8 @@ int __init classes_init(void) /* ick, this is ugly, the things we go through to keep from showing up * in sysfs... */ subsystem_init(&class_obj_subsys); - if (!class_obj_subsys.kset.subsys) - class_obj_subsys.kset.subsys = &class_obj_subsys; + if (!class_obj_subsys.kobj.parent) + class_obj_subsys.kobj.parent = &class_obj_subsys.kobj; return 0; } diff --git a/drivers/base/core.c b/drivers/base/core.c index 59d9816c332e..b78fc1e68264 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -565,7 +565,7 @@ static struct kobject * get_device_parent(struct device *dev, /* Set the parent to the class, not the parent device */ /* this keeps sysfs from having a symlink to make old udevs happy */ if (dev->class) - return &dev->class->subsys.kset.kobj; + return &dev->class->subsys.kobj; else if (parent) return &parent->kobj; @@ -577,7 +577,7 @@ static struct kobject *virtual_device_parent(struct device *dev) static struct kobject *virtual_dir = NULL; if (!virtual_dir) - virtual_dir = kobject_add_dir(&devices_subsys.kset.kobj, "virtual"); + virtual_dir = kobject_add_dir(&devices_subsys.kobj, "virtual"); return virtual_dir; } @@ -711,12 +711,12 @@ int device_add(struct device *dev) } if (dev->class) { - sysfs_create_link(&dev->kobj, &dev->class->subsys.kset.kobj, + sysfs_create_link(&dev->kobj, &dev->class->subsys.kobj, "subsystem"); /* If this is not a "fake" compatible device, then create the * symlink from the class to the device. */ - if (dev->kobj.parent != &dev->class->subsys.kset.kobj) - sysfs_create_link(&dev->class->subsys.kset.kobj, + if (dev->kobj.parent != &dev->class->subsys.kobj) + sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj, dev->bus_id); if (parent) { sysfs_create_link(&dev->kobj, &dev->parent->kobj, @@ -774,8 +774,8 @@ int device_add(struct device *dev) sysfs_remove_link(&dev->kobj, "subsystem"); /* If this is not a "fake" compatible device, remove the * symlink from the class to the device. */ - if (dev->kobj.parent != &dev->class->subsys.kset.kobj) - sysfs_remove_link(&dev->class->subsys.kset.kobj, + if (dev->kobj.parent != &dev->class->subsys.kobj) + sysfs_remove_link(&dev->class->subsys.kobj, dev->bus_id); if (parent) { #ifdef CONFIG_SYSFS_DEPRECATED @@ -875,8 +875,8 @@ void device_del(struct device * dev) sysfs_remove_link(&dev->kobj, "subsystem"); /* If this is not a "fake" compatible device, remove the * symlink from the class to the device. */ - if (dev->kobj.parent != &dev->class->subsys.kset.kobj) - sysfs_remove_link(&dev->class->subsys.kset.kobj, + if (dev->kobj.parent != &dev->class->subsys.kobj) + sysfs_remove_link(&dev->class->subsys.kobj, dev->bus_id); if (parent) { #ifdef CONFIG_SYSFS_DEPRECATED @@ -1192,9 +1192,9 @@ int device_rename(struct device *dev, char *new_name) #endif if (dev->class) { - sysfs_remove_link(&dev->class->subsys.kset.kobj, + sysfs_remove_link(&dev->class->subsys.kobj, old_symlink_name); - sysfs_create_link(&dev->class->subsys.kset.kobj, &dev->kobj, + sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj, dev->bus_id); } put_device(dev); diff --git a/drivers/base/firmware.c b/drivers/base/firmware.c index cb1b98ae0d58..90c862932169 100644 --- a/drivers/base/firmware.c +++ b/drivers/base/firmware.c @@ -17,13 +17,13 @@ static decl_subsys(firmware, NULL, NULL); -int firmware_register(struct subsystem * s) +int firmware_register(struct kset *s) { - kset_set_kset_s(s, firmware_subsys); + kobj_set_kset_s(s, firmware_subsys); return subsystem_register(s); } -void firmware_unregister(struct subsystem * s) +void firmware_unregister(struct kset *s) { subsystem_unregister(s); } diff --git a/drivers/base/power/shutdown.c b/drivers/base/power/shutdown.c index 58b6f77a1b34..a47ee1b70d20 100644 --- a/drivers/base/power/shutdown.c +++ b/drivers/base/power/shutdown.c @@ -16,8 +16,6 @@ #define to_dev(node) container_of(node, struct device, kobj.entry) -extern struct subsystem devices_subsys; - /** * We handle system devices differently - we suspend and shut them @@ -36,7 +34,7 @@ void device_shutdown(void) { struct device * dev, *devn; - list_for_each_entry_safe_reverse(dev, devn, &devices_subsys.kset.list, + list_for_each_entry_safe_reverse(dev, devn, &devices_subsys.list, kobj.entry) { if (dev->bus && dev->bus->shutdown) { dev_dbg(dev, "shutdown\n"); diff --git a/drivers/base/sys.c b/drivers/base/sys.c index 04e5db445c74..29f1291966c1 100644 --- a/drivers/base/sys.c +++ b/drivers/base/sys.c @@ -25,7 +25,7 @@ #include "base.h" -extern struct subsystem devices_subsys; +extern struct kset devices_subsys; #define to_sysdev(k) container_of(k, struct sys_device, kobj) #define to_sysdev_attr(a) container_of(a, struct sysdev_attribute, attr) @@ -138,7 +138,7 @@ int sysdev_class_register(struct sysdev_class * cls) pr_debug("Registering sysdev class '%s'\n", kobject_name(&cls->kset.kobj)); INIT_LIST_HEAD(&cls->drivers); - cls->kset.subsys = &system_subsys; + cls->kset.kobj.parent = &system_subsys.kobj; kset_set_kset_s(cls, system_subsys); return kset_register(&cls->kset); } @@ -309,7 +309,7 @@ void sysdev_shutdown(void) pr_debug("Shutting Down System Devices\n"); down(&sysdev_drivers_lock); - list_for_each_entry_reverse(cls, &system_subsys.kset.list, + list_for_each_entry_reverse(cls, &system_subsys.list, kset.kobj.entry) { struct sys_device * sysdev; @@ -384,7 +384,7 @@ int sysdev_suspend(pm_message_t state) pr_debug("Suspending System Devices\n"); - list_for_each_entry_reverse(cls, &system_subsys.kset.list, + list_for_each_entry_reverse(cls, &system_subsys.list, kset.kobj.entry) { pr_debug("Suspending type '%s':\n", @@ -457,7 +457,7 @@ gbl_driver: } /* resume other classes */ - list_for_each_entry_continue(cls, &system_subsys.kset.list, + list_for_each_entry_continue(cls, &system_subsys.list, kset.kobj.entry) { list_for_each_entry(err_dev, &cls->kset.list, kobj.entry) { pr_debug(" %s\n", kobject_name(&err_dev->kobj)); @@ -483,7 +483,7 @@ int sysdev_resume(void) pr_debug("Resuming System Devices\n"); - list_for_each_entry(cls, &system_subsys.kset.list, kset.kobj.entry) { + list_for_each_entry(cls, &system_subsys.list, kset.kobj.entry) { struct sys_device * sysdev; pr_debug("Resuming type '%s':\n", @@ -501,7 +501,7 @@ int sysdev_resume(void) int __init system_bus_init(void) { - system_subsys.kset.kobj.parent = &devices_subsys.kset.kobj; + system_subsys.kobj.parent = &devices_subsys.kobj; return subsystem_register(&system_subsys); } diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index c6281ccd4fe7..1324984a4c35 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -409,7 +409,7 @@ static struct kobj_type ktype_efivar = { }; static ssize_t -dummy(struct subsystem *sub, char *buf) +dummy(struct kset *kset, char *buf) { return -ENODEV; } @@ -422,7 +422,7 @@ efivar_unregister(struct efivar_entry *var) static ssize_t -efivar_create(struct subsystem *sub, const char *buf, size_t count) +efivar_create(struct kset *kset, const char *buf, size_t count) { struct efi_variable *new_var = (struct efi_variable *)buf; struct efivar_entry *search_efivar, *n; @@ -480,7 +480,7 @@ efivar_create(struct subsystem *sub, const char *buf, size_t count) } static ssize_t -efivar_delete(struct subsystem *sub, const char *buf, size_t count) +efivar_delete(struct kset *kset, const char *buf, size_t count) { struct efi_variable *del_var = (struct efi_variable *)buf; struct efivar_entry *search_efivar, *n; @@ -551,11 +551,11 @@ static struct subsys_attribute *var_subsys_attrs[] = { * the efivars driver */ static ssize_t -systab_read(struct subsystem *entry, char *buf) +systab_read(struct kset *kset, char *buf) { char *str = buf; - if (!entry || !buf) + if (!kset || !buf) return -EINVAL; if (efi.mps != EFI_INVALID_TABLE_ADDR) @@ -687,7 +687,7 @@ efivars_init(void) goto out_free; } - kset_set_kset_s(&vars_subsys, efi_subsys); + kobj_set_kset_s(&vars_subsys, efi_subsys); error = subsystem_register(&vars_subsys); diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index 6439f378f6cc..6e55b2c5874e 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -650,7 +650,7 @@ static struct input_handle *evdev_connect(struct input_handler *handler, struct dev->cdev.dev, evdev->name); /* temporary symlink to keep userspace happy */ - sysfs_create_link(&input_class.subsys.kset.kobj, &cdev->kobj, + sysfs_create_link(&input_class.subsys.kobj, &cdev->kobj, evdev->name); return &evdev->handle; @@ -661,7 +661,7 @@ static void evdev_disconnect(struct input_handle *handle) struct evdev *evdev = handle->private; struct evdev_list *list; - sysfs_remove_link(&input_class.subsys.kset.kobj, evdev->name); + sysfs_remove_link(&input_class.subsys.kobj, evdev->name); class_device_destroy(&input_class, MKDEV(INPUT_MAJOR, EVDEV_MINOR_BASE + evdev->minor)); evdev->exist = 0; diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c index 9f3529ad3fda..d2482e4f71be 100644 --- a/drivers/input/joydev.c +++ b/drivers/input/joydev.c @@ -539,7 +539,7 @@ static struct input_handle *joydev_connect(struct input_handler *handler, struct dev->cdev.dev, joydev->name); /* temporary symlink to keep userspace happy */ - sysfs_create_link(&input_class.subsys.kset.kobj, &cdev->kobj, + sysfs_create_link(&input_class.subsys.kobj, &cdev->kobj, joydev->name); return &joydev->handle; @@ -550,7 +550,7 @@ static void joydev_disconnect(struct input_handle *handle) struct joydev *joydev = handle->private; struct joydev_list *list; - sysfs_remove_link(&input_class.subsys.kset.kobj, joydev->name); + sysfs_remove_link(&input_class.subsys.kobj, joydev->name); class_device_destroy(&input_class, MKDEV(INPUT_MAJOR, JOYDEV_MINOR_BASE + joydev->minor)); joydev->exist = 0; diff --git a/drivers/input/mousedev.c b/drivers/input/mousedev.c index 664bcc8116fc..074fee429d1b 100644 --- a/drivers/input/mousedev.c +++ b/drivers/input/mousedev.c @@ -661,7 +661,7 @@ static struct input_handle *mousedev_connect(struct input_handler *handler, stru dev->cdev.dev, mousedev->name); /* temporary symlink to keep userspace happy */ - sysfs_create_link(&input_class.subsys.kset.kobj, &cdev->kobj, + sysfs_create_link(&input_class.subsys.kobj, &cdev->kobj, mousedev->name); return &mousedev->handle; @@ -672,7 +672,7 @@ static void mousedev_disconnect(struct input_handle *handle) struct mousedev *mousedev = handle->private; struct mousedev_list *list; - sysfs_remove_link(&input_class.subsys.kset.kobj, mousedev->name); + sysfs_remove_link(&input_class.subsys.kobj, mousedev->name); class_device_destroy(&input_class, MKDEV(INPUT_MAJOR, MOUSEDEV_MINOR_BASE + mousedev->minor)); mousedev->exist = 0; diff --git a/drivers/input/tsdev.c b/drivers/input/tsdev.c index 0300dca8591d..05dfc10bc079 100644 --- a/drivers/input/tsdev.c +++ b/drivers/input/tsdev.c @@ -420,7 +420,7 @@ static struct input_handle *tsdev_connect(struct input_handler *handler, dev->cdev.dev, tsdev->name); /* temporary symlink to keep userspace happy */ - sysfs_create_link(&input_class.subsys.kset.kobj, &cdev->kobj, + sysfs_create_link(&input_class.subsys.kobj, &cdev->kobj, tsdev->name); return &tsdev->handle; @@ -431,7 +431,7 @@ static void tsdev_disconnect(struct input_handle *handle) struct tsdev *tsdev = handle->private; struct tsdev_list *list; - sysfs_remove_link(&input_class.subsys.kset.kobj, tsdev->name); + sysfs_remove_link(&input_class.subsys.kobj, tsdev->name); class_device_destroy(&input_class, MKDEV(INPUT_MAJOR, TSDEV_MINOR_BASE + tsdev->minor)); tsdev->exist = 0; diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c index ea1b7a63598e..815e445c3125 100644 --- a/drivers/parisc/pdc_stable.c +++ b/drivers/parisc/pdc_stable.c @@ -520,17 +520,17 @@ static struct pdcspath_entry *pdcspath_entries[] = { /** * pdcs_size_read - Stable Storage size output. - * @entry: An allocated and populated subsytem struct. We don't use it tho. + * @kset: An allocated and populated struct kset. We don't use it tho. * @buf: The output buffer to write to. */ static ssize_t -pdcs_size_read(struct subsystem *entry, char *buf) +pdcs_size_read(struct kset *kset, char *buf) { char *out = buf; - - if (!entry || !buf) + + if (!kset || !buf) return -EINVAL; - + /* show the size of the stable storage */ out += sprintf(out, "%ld\n", pdcs_size); @@ -539,17 +539,17 @@ pdcs_size_read(struct subsystem *entry, char *buf) /** * pdcs_auto_read - Stable Storage autoboot/search flag output. - * @entry: An allocated and populated subsytem struct. We don't use it tho. + * @kset: An allocated and populated struct kset. We don't use it tho. * @buf: The output buffer to write to. * @knob: The PF_AUTOBOOT or PF_AUTOSEARCH flag */ static ssize_t -pdcs_auto_read(struct subsystem *entry, char *buf, int knob) +pdcs_auto_read(struct kset *kset, char *buf, int knob) { char *out = buf; struct pdcspath_entry *pathentry; - if (!entry || !buf) + if (!kset || !buf) return -EINVAL; /* Current flags are stored in primary boot path entry */ @@ -565,40 +565,40 @@ pdcs_auto_read(struct subsystem *entry, char *buf, int knob) /** * pdcs_autoboot_read - Stable Storage autoboot flag output. - * @entry: An allocated and populated subsytem struct. We don't use it tho. + * @kset: An allocated and populated struct kset. We don't use it tho. * @buf: The output buffer to write to. */ static inline ssize_t -pdcs_autoboot_read(struct subsystem *entry, char *buf) +pdcs_autoboot_read(struct kset *kset, char *buf) { - return pdcs_auto_read(entry, buf, PF_AUTOBOOT); + return pdcs_auto_read(kset, buf, PF_AUTOBOOT); } /** * pdcs_autosearch_read - Stable Storage autoboot flag output. - * @entry: An allocated and populated subsytem struct. We don't use it tho. + * @kset: An allocated and populated struct kset. We don't use it tho. * @buf: The output buffer to write to. */ static inline ssize_t -pdcs_autosearch_read(struct subsystem *entry, char *buf) +pdcs_autosearch_read(struct kset *kset, char *buf) { - return pdcs_auto_read(entry, buf, PF_AUTOSEARCH); + return pdcs_auto_read(kset, buf, PF_AUTOSEARCH); } /** * pdcs_timer_read - Stable Storage timer count output (in seconds). - * @entry: An allocated and populated subsytem struct. We don't use it tho. + * @kset: An allocated and populated struct kset. We don't use it tho. * @buf: The output buffer to write to. * * The value of the timer field correponds to a number of seconds in powers of 2. */ static ssize_t -pdcs_timer_read(struct subsystem *entry, char *buf) +pdcs_timer_read(struct kset *kset, char *buf) { char *out = buf; struct pdcspath_entry *pathentry; - if (!entry || !buf) + if (!kset || !buf) return -EINVAL; /* Current flags are stored in primary boot path entry */ @@ -615,15 +615,15 @@ pdcs_timer_read(struct subsystem *entry, char *buf) /** * pdcs_osid_read - Stable Storage OS ID register output. - * @entry: An allocated and populated subsytem struct. We don't use it tho. + * @kset: An allocated and populated struct kset. We don't use it tho. * @buf: The output buffer to write to. */ static ssize_t -pdcs_osid_read(struct subsystem *entry, char *buf) +pdcs_osid_read(struct kset *kset, char *buf) { char *out = buf; - if (!entry || !buf) + if (!kset || !buf) return -EINVAL; out += sprintf(out, "%s dependent data (0x%.4x)\n", @@ -634,18 +634,18 @@ pdcs_osid_read(struct subsystem *entry, char *buf) /** * pdcs_osdep1_read - Stable Storage OS-Dependent data area 1 output. - * @entry: An allocated and populated subsytem struct. We don't use it tho. + * @kset: An allocated and populated struct kset. We don't use it tho. * @buf: The output buffer to write to. * * This can hold 16 bytes of OS-Dependent data. */ static ssize_t -pdcs_osdep1_read(struct subsystem *entry, char *buf) +pdcs_osdep1_read(struct kset *kset, char *buf) { char *out = buf; u32 result[4]; - if (!entry || !buf) + if (!kset || !buf) return -EINVAL; if (pdc_stable_read(PDCS_ADDR_OSD1, &result, sizeof(result)) != PDC_OK) @@ -661,18 +661,18 @@ pdcs_osdep1_read(struct subsystem *entry, char *buf) /** * pdcs_diagnostic_read - Stable Storage Diagnostic register output. - * @entry: An allocated and populated subsytem struct. We don't use it tho. + * @kset: An allocated and populated struct kset. We don't use it tho. * @buf: The output buffer to write to. * * I have NFC how to interpret the content of that register ;-). */ static ssize_t -pdcs_diagnostic_read(struct subsystem *entry, char *buf) +pdcs_diagnostic_read(struct kset *kset, char *buf) { char *out = buf; u32 result; - if (!entry || !buf) + if (!kset || !buf) return -EINVAL; /* get diagnostic */ @@ -686,18 +686,18 @@ pdcs_diagnostic_read(struct subsystem *entry, char *buf) /** * pdcs_fastsize_read - Stable Storage FastSize register output. - * @entry: An allocated and populated subsytem struct. We don't use it tho. + * @kset: An allocated and populated struct kset. We don't use it tho. * @buf: The output buffer to write to. * * This register holds the amount of system RAM to be tested during boot sequence. */ static ssize_t -pdcs_fastsize_read(struct subsystem *entry, char *buf) +pdcs_fastsize_read(struct kset *kset, char *buf) { char *out = buf; u32 result; - if (!entry || !buf) + if (!kset || !buf) return -EINVAL; /* get fast-size */ @@ -715,13 +715,13 @@ pdcs_fastsize_read(struct subsystem *entry, char *buf) /** * pdcs_osdep2_read - Stable Storage OS-Dependent data area 2 output. - * @entry: An allocated and populated subsytem struct. We don't use it tho. + * @kset: An allocated and populated struct kset. We don't use it tho. * @buf: The output buffer to write to. * * This can hold pdcs_size - 224 bytes of OS-Dependent data, when available. */ static ssize_t -pdcs_osdep2_read(struct subsystem *entry, char *buf) +pdcs_osdep2_read(struct kset *kset, char *buf) { char *out = buf; unsigned long size; @@ -733,7 +733,7 @@ pdcs_osdep2_read(struct subsystem *entry, char *buf) size = pdcs_size - 224; - if (!entry || !buf) + if (!kset || !buf) return -EINVAL; for (i=0; i #include -int mlog_sys_init(struct subsystem *o2cb_subsys); +int mlog_sys_init(struct kset *o2cb_subsys); void mlog_sys_shutdown(void); #endif /* O2CLUSTER_MASKLOG_H */ diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c index 1d9f6acafa2e..64f6f378fd09 100644 --- a/fs/ocfs2/cluster/sys.c +++ b/fs/ocfs2/cluster/sys.c @@ -42,7 +42,6 @@ struct o2cb_attribute { #define O2CB_ATTR(_name, _mode, _show, _store) \ struct o2cb_attribute o2cb_attr_##_name = __ATTR(_name, _mode, _show, _store) -#define to_o2cb_subsys(k) container_of(to_kset(k), struct subsystem, kset) #define to_o2cb_attr(_attr) container_of(_attr, struct o2cb_attribute, attr) static ssize_t o2cb_interface_revision_show(char *buf) @@ -79,7 +78,7 @@ static ssize_t o2cb_show(struct kobject * kobj, struct attribute * attr, char * buffer) { struct o2cb_attribute *o2cb_attr = to_o2cb_attr(attr); - struct subsystem *sbs = to_o2cb_subsys(kobj); + struct kset *sbs = to_kset(kobj); BUG_ON(sbs != &o2cb_subsys); @@ -93,7 +92,7 @@ o2cb_store(struct kobject * kobj, struct attribute * attr, const char * buffer, size_t count) { struct o2cb_attribute *o2cb_attr = to_o2cb_attr(attr); - struct subsystem *sbs = to_o2cb_subsys(kobj); + struct kset *sbs = to_kset(kobj); BUG_ON(sbs != &o2cb_subsys); @@ -112,7 +111,7 @@ int o2cb_sys_init(void) { int ret; - o2cb_subsys.kset.kobj.ktype = &o2cb_subsys_type; + o2cb_subsys.kobj.ktype = &o2cb_subsys_type; ret = subsystem_register(&o2cb_subsys); if (ret) return ret; diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 8a7d0035ad7a..f01572fca02f 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -312,7 +312,7 @@ static struct attribute * default_attrs[] = { NULL, }; -extern struct subsystem block_subsys; +extern struct kset block_subsys; static void part_release(struct kobject *kobj) { @@ -388,7 +388,7 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, kobject_add(&p->kobj); if (!disk->part_uevent_suppress) kobject_uevent(&p->kobj, KOBJ_ADD); - sysfs_create_link(&p->kobj, &block_subsys.kset.kobj, "subsystem"); + sysfs_create_link(&p->kobj, &block_subsys.kobj, "subsystem"); if (flags & ADDPART_FLAG_WHOLEDISK) { static struct attribute addpartattr = { .name = "whole_disk", @@ -444,7 +444,7 @@ static int disk_sysfs_symlinks(struct gendisk *disk) goto err_out_dev_link; } - err = sysfs_create_link(&disk->kobj, &block_subsys.kset.kobj, + err = sysfs_create_link(&disk->kobj, &block_subsys.kobj, "subsystem"); if (err) goto err_out_disk_name_lnk; diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index db0413a411d6..0e637adc2b87 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -13,8 +13,7 @@ #include "sysfs.h" -#define to_subsys(k) container_of(k,struct subsystem,kset.kobj) -#define to_sattr(a) container_of(a,struct subsys_attribute,attr) +#define to_sattr(a) container_of(a,struct subsys_attribute, attr) /* * Subsystem file operations. @@ -24,12 +23,12 @@ static ssize_t subsys_attr_show(struct kobject * kobj, struct attribute * attr, char * page) { - struct subsystem * s = to_subsys(kobj); + struct kset *kset = to_kset(kobj); struct subsys_attribute * sattr = to_sattr(attr); ssize_t ret = -EIO; if (sattr->show) - ret = sattr->show(s,page); + ret = sattr->show(kset, page); return ret; } @@ -37,12 +36,12 @@ static ssize_t subsys_attr_store(struct kobject * kobj, struct attribute * attr, const char * page, size_t count) { - struct subsystem * s = to_subsys(kobj); + struct kset *kset = to_kset(kobj); struct subsys_attribute * sattr = to_sattr(attr); ssize_t ret = -EIO; if (sattr->store) - ret = sattr->store(s,page,count); + ret = sattr->store(kset, page, count); return ret; } diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 0d9f984a60a1..16c3c441256e 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -316,7 +316,7 @@ struct acpi_bus_event { u32 data; }; -extern struct subsystem acpi_subsys; +extern struct kset acpi_subsys; /* * External Functions diff --git a/include/linux/device.h b/include/linux/device.h index a0cd2ced31a9..ee292fe87cb2 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -53,7 +53,7 @@ struct bus_type { const char * name; struct module * owner; - struct subsystem subsys; + struct kset subsys; struct kset drivers; struct kset devices; struct klist klist_devices; @@ -179,7 +179,7 @@ struct class { const char * name; struct module * owner; - struct subsystem subsys; + struct kset subsys; struct list_head children; struct list_head devices; struct list_head interfaces; @@ -559,8 +559,8 @@ extern void device_shutdown(void); /* drivers/base/firmware.c */ -extern int __must_check firmware_register(struct subsystem *); -extern void firmware_unregister(struct subsystem *); +extern int __must_check firmware_register(struct kset *); +extern void firmware_unregister(struct kset *); /* debugging and troubleshooting/diagnostic helpers. */ extern const char *dev_driver_string(struct device *dev); diff --git a/include/linux/fs.h b/include/linux/fs.h index 095a9c9a64fb..7c0077f06e24 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1416,7 +1416,7 @@ extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, extern int vfs_statfs(struct dentry *, struct kstatfs *); /* /sys/fs */ -extern struct subsystem fs_subsys; +extern struct kset fs_subsys; #define FLOCK_VERIFY_READ 1 #define FLOCK_VERIFY_WRITE 2 diff --git a/include/linux/kobject.h b/include/linux/kobject.h index eb0e63ef297f..c288e41ba331 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -124,7 +124,6 @@ struct kset_uevent_ops { }; struct kset { - struct subsystem * subsys; struct kobj_type * ktype; struct list_head list; spinlock_t list_lock; @@ -171,32 +170,23 @@ extern struct kobject * kset_find_obj(struct kset *, const char *); #define set_kset_name(str) .kset = { .kobj = { .name = str } } - -struct subsystem { - struct kset kset; -}; - #define decl_subsys(_name,_type,_uevent_ops) \ -struct subsystem _name##_subsys = { \ - .kset = { \ - .kobj = { .name = __stringify(_name) }, \ - .ktype = _type, \ - .uevent_ops =_uevent_ops, \ - } \ +struct kset _name##_subsys = { \ + .kobj = { .name = __stringify(_name) }, \ + .ktype = _type, \ + .uevent_ops =_uevent_ops, \ } #define decl_subsys_name(_varname,_name,_type,_uevent_ops) \ -struct subsystem _varname##_subsys = { \ - .kset = { \ - .kobj = { .name = __stringify(_name) }, \ - .ktype = _type, \ - .uevent_ops =_uevent_ops, \ - } \ +struct kset _varname##_subsys = { \ + .kobj = { .name = __stringify(_name) }, \ + .ktype = _type, \ + .uevent_ops =_uevent_ops, \ } /* The global /sys/kernel/ subsystem for people to chain off of */ -extern struct subsystem kernel_subsys; +extern struct kset kernel_subsys; /* The global /sys/hypervisor/ subsystem */ -extern struct subsystem hypervisor_subsys; +extern struct kset hypervisor_subsys; /** * Helpers for setting the kset of registered objects. @@ -214,7 +204,7 @@ extern struct subsystem hypervisor_subsys; */ #define kobj_set_kset_s(obj,subsys) \ - (obj)->kobj.kset = &(subsys).kset + (obj)->kobj.kset = &(subsys) /** * kset_set_kset_s(obj,subsys) - set kset for embedded kset. @@ -228,7 +218,7 @@ extern struct subsystem hypervisor_subsys; */ #define kset_set_kset_s(obj,subsys) \ - (obj)->kset.kobj.kset = &(subsys).kset + (obj)->kset.kobj.kset = &(subsys) /** * subsys_set_kset(obj,subsys) - set kset for subsystem @@ -241,29 +231,31 @@ extern struct subsystem hypervisor_subsys; */ #define subsys_set_kset(obj,_subsys) \ - (obj)->subsys.kset.kobj.kset = &(_subsys).kset + (obj)->subsys.kobj.kset = &(_subsys) -extern void subsystem_init(struct subsystem *); -extern int __must_check subsystem_register(struct subsystem *); -extern void subsystem_unregister(struct subsystem *); +extern void subsystem_init(struct kset *); +extern int __must_check subsystem_register(struct kset *); +extern void subsystem_unregister(struct kset *); -static inline struct subsystem * subsys_get(struct subsystem * s) +static inline struct kset *subsys_get(struct kset *s) { - return s ? container_of(kset_get(&s->kset),struct subsystem,kset) : NULL; + if (s) + return kset_get(s); + return NULL; } -static inline void subsys_put(struct subsystem * s) +static inline void subsys_put(struct kset *s) { - kset_put(&s->kset); + kset_put(s); } struct subsys_attribute { struct attribute attr; - ssize_t (*show)(struct subsystem *, char *); - ssize_t (*store)(struct subsystem *, const char *, size_t); + ssize_t (*show)(struct kset *, char *); + ssize_t (*store)(struct kset *, const char *, size_t); }; -extern int __must_check subsys_create_file(struct subsystem * , +extern int __must_check subsys_create_file(struct kset *, struct subsys_attribute *); #if defined(CONFIG_HOTPLUG) diff --git a/include/linux/module.h b/include/linux/module.h index 95679eb8571e..f0b0faf42d5d 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -568,7 +568,7 @@ struct device_driver; #ifdef CONFIG_SYSFS struct module; -extern struct subsystem module_subsys; +extern struct kset module_subsys; int mod_sysfs_init(struct module *mod); int mod_sysfs_setup(struct module *mod, diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index a675a05c4091..ab4cb6ecd47c 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -174,7 +174,7 @@ extern int pci_hp_register (struct hotplug_slot *slot); extern int pci_hp_deregister (struct hotplug_slot *slot); extern int __must_check pci_hp_change_slot_info (struct hotplug_slot *slot, struct hotplug_slot_info *info); -extern struct subsystem pci_hotplug_slots_subsys; +extern struct kset pci_hotplug_slots_subsys; /* PCI Setting Record (Type 0) */ struct hpp_type0 { diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index e0ffe4ab0917..559deca5ed15 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -24,18 +24,18 @@ static struct subsys_attribute _name##_attr = \ #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET) /* current uevent sequence number */ -static ssize_t uevent_seqnum_show(struct subsystem *subsys, char *page) +static ssize_t uevent_seqnum_show(struct kset *kset, char *page) { return sprintf(page, "%llu\n", (unsigned long long)uevent_seqnum); } KERNEL_ATTR_RO(uevent_seqnum); /* uevent helper program, used during early boo */ -static ssize_t uevent_helper_show(struct subsystem *subsys, char *page) +static ssize_t uevent_helper_show(struct kset *kset, char *page) { return sprintf(page, "%s\n", uevent_helper); } -static ssize_t uevent_helper_store(struct subsystem *subsys, const char *page, size_t count) +static ssize_t uevent_helper_store(struct kset *kset, const char *page, size_t count) { if (count+1 > UEVENT_HELPER_PATH_LEN) return -ENOENT; @@ -49,13 +49,13 @@ KERNEL_ATTR_RW(uevent_helper); #endif #ifdef CONFIG_KEXEC -static ssize_t kexec_loaded_show(struct subsystem *subsys, char *page) +static ssize_t kexec_loaded_show(struct kset *kset, char *page) { return sprintf(page, "%d\n", !!kexec_image); } KERNEL_ATTR_RO(kexec_loaded); -static ssize_t kexec_crash_loaded_show(struct subsystem *subsys, char *page) +static ssize_t kexec_crash_loaded_show(struct kset *kset, char *page) { return sprintf(page, "%d\n", !!kexec_crash_image); } @@ -85,7 +85,7 @@ static int __init ksysfs_init(void) { int error = subsystem_register(&kernel_subsys); if (!error) - error = sysfs_create_group(&kernel_subsys.kset.kobj, + error = sysfs_create_group(&kernel_subsys.kobj, &kernel_attr_group); return error; diff --git a/kernel/module.c b/kernel/module.c index 9da5af668a20..ff982ec435fc 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -45,6 +45,8 @@ #include #include +extern int module_sysfs_initialized; + #if 0 #define DEBUGP printk #else @@ -1117,8 +1119,8 @@ int mod_sysfs_init(struct module *mod) { int err; - if (!module_subsys.kset.subsys) { - printk(KERN_ERR "%s: module_subsys not initialized\n", + if (!module_sysfs_initialized) { + printk(KERN_ERR "%s: module sysfs not initialized\n", mod->name); err = -EINVAL; goto out; @@ -2385,7 +2387,7 @@ void module_add_driver(struct module *mod, struct device_driver *drv) struct kobject *mkobj; /* Lookup built-in module entry in /sys/modules */ - mkobj = kset_find_obj(&module_subsys.kset, drv->mod_name); + mkobj = kset_find_obj(&module_subsys, drv->mod_name); if (mkobj) { mk = container_of(mkobj, struct module_kobject, kobj); /* remember our module structure */ diff --git a/kernel/params.c b/kernel/params.c index 1fc4ac746cd8..312172320b4c 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -691,6 +691,7 @@ static struct kset_uevent_ops module_uevent_ops = { }; decl_subsys(module, &module_ktype, &module_uevent_ops); +int module_sysfs_initialized; static struct kobj_type module_ktype = { .sysfs_ops = &module_sysfs_ops, @@ -709,6 +710,7 @@ static int __init param_sysfs_init(void) __FILE__, __LINE__, ret); return ret; } + module_sysfs_initialized = 1; param_sysfs_builtin(); diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 02e4fb69111a..8df51c23bba4 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -322,13 +322,13 @@ static const char * const pm_disk_modes[] = { * supports it (as determined from pm_ops->pm_disk_mode). */ -static ssize_t disk_show(struct subsystem * subsys, char * buf) +static ssize_t disk_show(struct kset *kset, char *buf) { return sprintf(buf, "%s\n", pm_disk_modes[pm_disk_mode]); } -static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) +static ssize_t disk_store(struct kset *kset, const char *buf, size_t n) { int error = 0; int i; @@ -373,13 +373,13 @@ static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) power_attr(disk); -static ssize_t resume_show(struct subsystem * subsys, char *buf) +static ssize_t resume_show(struct kset *kset, char *buf) { return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); } -static ssize_t resume_store(struct subsystem *subsys, const char *buf, size_t n) +static ssize_t resume_store(struct kset *kset, const char *buf, size_t n) { unsigned int maj, min; dev_t res; @@ -405,12 +405,12 @@ static ssize_t resume_store(struct subsystem *subsys, const char *buf, size_t n) power_attr(resume); -static ssize_t image_size_show(struct subsystem * subsys, char *buf) +static ssize_t image_size_show(struct kset *kset, char *buf) { return sprintf(buf, "%lu\n", image_size); } -static ssize_t image_size_store(struct subsystem * subsys, const char * buf, size_t n) +static ssize_t image_size_store(struct kset *kset, const char *buf, size_t n) { unsigned long size; @@ -439,7 +439,7 @@ static struct attribute_group attr_group = { static int __init pm_disk_init(void) { - return sysfs_create_group(&power_subsys.kset.kobj,&attr_group); + return sysfs_create_group(&power_subsys.kobj, &attr_group); } core_initcall(pm_disk_init); diff --git a/kernel/power/main.c b/kernel/power/main.c index 72419a3b1beb..b21c2a56f960 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -285,7 +285,7 @@ decl_subsys(power,NULL,NULL); * proper enumerated value, and initiates a suspend transition. */ -static ssize_t state_show(struct subsystem * subsys, char * buf) +static ssize_t state_show(struct kset *kset, char *buf) { int i; char * s = buf; @@ -298,7 +298,7 @@ static ssize_t state_show(struct subsystem * subsys, char * buf) return (s - buf); } -static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n) +static ssize_t state_store(struct kset *kset, const char *buf, size_t n) { suspend_state_t state = PM_SUSPEND_STANDBY; const char * const *s; @@ -325,13 +325,13 @@ power_attr(state); #ifdef CONFIG_PM_TRACE int pm_trace_enabled; -static ssize_t pm_trace_show(struct subsystem * subsys, char * buf) +static ssize_t pm_trace_show(struct kset *kset, char *buf) { return sprintf(buf, "%d\n", pm_trace_enabled); } static ssize_t -pm_trace_store(struct subsystem * subsys, const char * buf, size_t n) +pm_trace_store(struct kset *kset, const char *buf, size_t n) { int val; @@ -365,7 +365,7 @@ static int __init pm_init(void) { int error = subsystem_register(&power_subsys); if (!error) - error = sysfs_create_group(&power_subsys.kset.kobj,&attr_group); + error = sysfs_create_group(&power_subsys.kobj,&attr_group); return error; } diff --git a/kernel/power/power.h b/kernel/power/power.h index eb461b816bf4..5f842c3efc4b 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -35,7 +35,7 @@ static struct subsys_attribute _name##_attr = { \ .store = _name##_store, \ } -extern struct subsystem power_subsys; +extern struct kset power_subsys; /* References to section boundaries */ extern const void __nosave_begin, __nosave_end; diff --git a/lib/kobject.c b/lib/kobject.c index cecf2fbede3e..fc5f3f6e7329 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -582,22 +582,10 @@ void kset_init(struct kset * k) /** * kset_add - add a kset object to the hierarchy. * @k: kset. - * - * Simply, this adds the kset's embedded kobject to the - * hierarchy. - * We also try to make sure that the kset's embedded kobject - * has a parent before it is added. We only care if the embedded - * kobject is not part of a kset itself, since kobject_add() - * assigns a parent in that case. - * If that is the case, and the kset has a controlling subsystem, - * then we set the kset's parent to be said subsystem. */ int kset_add(struct kset * k) { - if (!k->kobj.parent && !k->kobj.kset && k->subsys) - k->kobj.parent = &k->subsys->kset.kobj; - return kobject_add(&k->kobj); } @@ -656,53 +644,28 @@ struct kobject * kset_find_obj(struct kset * kset, const char * name) return ret; } - -void subsystem_init(struct subsystem * s) +void subsystem_init(struct kset *s) { - kset_init(&s->kset); + kset_init(s); } -/** - * subsystem_register - register a subsystem. - * @s: the subsystem we're registering. - * - * Once we register the subsystem, we want to make sure that - * the kset points back to this subsystem. - */ - -int subsystem_register(struct subsystem * s) +int subsystem_register(struct kset *s) { - int error; - - if (!s) - return -EINVAL; - - subsystem_init(s); - pr_debug("subsystem %s: registering\n",s->kset.kobj.name); - - if (!(error = kset_add(&s->kset))) { - if (!s->kset.subsys) - s->kset.subsys = s; - } - return error; + return kset_register(s); } -void subsystem_unregister(struct subsystem * s) +void subsystem_unregister(struct kset *s) { - if (!s) - return; - pr_debug("subsystem %s: unregistering\n",s->kset.kobj.name); - kset_unregister(&s->kset); + kset_unregister(s); } - /** * subsystem_create_file - export sysfs attribute file. * @s: subsystem. * @a: subsystem attribute descriptor. */ -int subsys_create_file(struct subsystem * s, struct subsys_attribute * a) +int subsys_create_file(struct kset *s, struct subsys_attribute *a) { int error = 0; @@ -710,28 +673,12 @@ int subsys_create_file(struct subsystem * s, struct subsys_attribute * a) return -EINVAL; if (subsys_get(s)) { - error = sysfs_create_file(&s->kset.kobj,&a->attr); + error = sysfs_create_file(&s->kobj, &a->attr); subsys_put(s); } return error; } - -/** - * subsystem_remove_file - remove sysfs attribute file. - * @s: subsystem. - * @a: attribute desciptor. - */ -#if 0 -void subsys_remove_file(struct subsystem * s, struct subsys_attribute * a) -{ - if (subsys_get(s)) { - sysfs_remove_file(&s->kset.kobj,&a->attr); - subsys_put(s); - } -} -#endif /* 0 */ - EXPORT_SYMBOL(kobject_init); EXPORT_SYMBOL(kobject_register); EXPORT_SYMBOL(kobject_unregister); diff --git a/security/inode.c b/security/inode.c index d7ecf89fbc74..307211ac7346 100644 --- a/security/inode.c +++ b/security/inode.c @@ -321,7 +321,7 @@ static int __init securityfs_init(void) { int retval; - kset_set_kset_s(&security_subsys, kernel_subsys); + kobj_set_kset_s(&security_subsys, kernel_subsys); retval = subsystem_register(&security_subsys); if (retval) return retval; -- cgit 1.4.1 From 7fe3730de729b758e9f69b862b9255d998671b5f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 18 Apr 2007 19:39:21 +1000 Subject: MSI: arch must connect the irq and the msi_desc set_irq_msi() currently connects an irq_desc to an msi_desc. The archs call it at some point in their setup routine, and then the generic code sets up the reverse mapping from the msi_desc back to the irq. set_irq_msi() should do both connections, making it the one and only call required to connect an irq with it's MSI desc and vice versa. The arch code MUST call set_irq_msi(), and it must do so only once it's sure it's not going to fail the irq allocation. Given that there's no need for the arch to return the irq anymore, the return value from the arch setup routine just becomes 0 for success and anything else for failure. Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/i386/kernel/io_apic.c | 4 ++-- arch/ia64/sn/kernel/msi_sn.c | 4 ++-- arch/sparc64/kernel/pci.c | 4 ++-- arch/sparc64/kernel/pci_sun4v.c | 4 ++-- arch/x86_64/kernel/io_apic.c | 4 ++-- drivers/pci/msi.c | 23 +++++++++-------------- kernel/irq/chip.c | 3 +++ 7 files changed, 22 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index b3ab8ffebd27..89d85d244926 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -2611,19 +2611,19 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) if (irq < 0) return irq; - set_irq_msi(irq, desc); ret = msi_compose_msg(dev, irq, &msg); if (ret < 0) { destroy_irq(irq); return ret; } + set_irq_msi(irq, desc); write_msi_msg(irq, &msg); set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); - return irq; + return 0; } void arch_teardown_msi_irq(unsigned int irq) diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c index 49873aa4a37d..83f190ffe350 100644 --- a/arch/ia64/sn/kernel/msi_sn.c +++ b/arch/ia64/sn/kernel/msi_sn.c @@ -87,7 +87,6 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry) if (irq < 0) return irq; - set_irq_msi(irq, entry); /* * Set up the vector plumbing. Let the prom (via sn_intr_alloc) * decide which cpu to direct this msi at by default. @@ -144,10 +143,11 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry) */ msg.data = 0x100 + irq; + set_irq_msi(irq, entry); write_msi_msg(irq, &msg); set_irq_chip_and_handler(irq, &sn_msi_chip, handle_edge_irq); - return irq; + return 0; } #ifdef CONFIG_SMP diff --git a/arch/sparc64/kernel/pci.c b/arch/sparc64/kernel/pci.c index 023af41ad68d..9a549547cb2b 100644 --- a/arch/sparc64/kernel/pci.c +++ b/arch/sparc64/kernel/pci.c @@ -1092,10 +1092,10 @@ int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) return -EINVAL; err = p->setup_msi_irq(&virt_irq, pdev, desc); - if (err < 0) + if (err) return err; - return virt_irq; + return 0; } void arch_teardown_msi_irq(unsigned int virt_irq) diff --git a/arch/sparc64/kernel/pci_sun4v.c b/arch/sparc64/kernel/pci_sun4v.c index 94295c219329..1ccf4c9a9a43 100644 --- a/arch/sparc64/kernel/pci_sun4v.c +++ b/arch/sparc64/kernel/pci_sun4v.c @@ -1169,8 +1169,6 @@ static int pci_sun4v_setup_msi_irq(unsigned int *virt_irq_p, if (!devino) goto out_err; - set_irq_msi(*virt_irq_p, entry); - msiqid = ((devino - pbm->msiq_first_devino) + pbm->msiq_first); @@ -1204,6 +1202,8 @@ static int pci_sun4v_setup_msi_irq(unsigned int *virt_irq_p, msg.address_lo = pbm->msi32_start; } msg.data = msi_num; + + set_irq_msi(*virt_irq_p, entry); write_msi_msg(*virt_irq_p, &msg); irq_install_pre_handler(*virt_irq_p, diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index c6a5bc7e8118..b7d2b76b92d4 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -1983,18 +1983,18 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) if (irq < 0) return irq; - set_irq_msi(irq, desc); ret = msi_compose_msg(dev, irq, &msg); if (ret < 0) { destroy_irq(irq); return ret; } + set_irq_msi(irq, desc); write_msi_msg(irq, &msg); set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); - return irq; + return 0; } void arch_teardown_msi_irq(unsigned int irq) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 7a44ba467481..88362f1bd9cf 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -297,7 +297,7 @@ void pci_restore_msi_state(struct pci_dev *dev) static int msi_capability_init(struct pci_dev *dev) { struct msi_desc *entry; - int pos, irq; + int pos, ret; u16 control; msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */ @@ -335,21 +335,19 @@ static int msi_capability_init(struct pci_dev *dev) maskbits); } /* Configure MSI capability structure */ - irq = arch_setup_msi_irq(dev, entry); - if (irq < 0) { + ret = arch_setup_msi_irq(dev, entry); + if (ret) { kfree(entry); - return irq; + return ret; } - entry->irq = irq; list_add(&entry->list, &dev->msi_list); - set_irq_msi(irq, entry); /* Set MSI enabled bits */ pci_intx(dev, 0); /* disable intx */ msi_set_enable(dev, 1); dev->msi_enabled = 1; - dev->irq = irq; + dev->irq = entry->irq; return 0; } @@ -367,7 +365,7 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, int nvec) { struct msi_desc *entry; - int irq, pos, i, j, nr_entries; + int irq, pos, i, j, nr_entries, ret; unsigned long phys_addr; u32 table_offset; u16 control; @@ -407,16 +405,13 @@ static int msix_capability_init(struct pci_dev *dev, entry->mask_base = base; /* Configure MSI-X capability structure */ - irq = arch_setup_msi_irq(dev, entry); - if (irq < 0) { + ret = arch_setup_msi_irq(dev, entry); + if (ret) { kfree(entry); break; } - entry->irq = irq; - entries[i].vector = irq; + entries[i].vector = entry->irq; list_add(&entry->list, &dev->msi_list); - - set_irq_msi(irq, entry); } if (i != nvec) { int avail = i - 1; diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 0133f4f9e9f0..615ce97c6cfd 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -11,6 +11,7 @@ */ #include +#include #include #include #include @@ -185,6 +186,8 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry) desc = irq_desc + irq; spin_lock_irqsave(&desc->lock, flags); desc->msi_desc = entry; + if (entry) + entry->irq = irq; spin_unlock_irqrestore(&desc->lock, flags); return 0; } -- cgit 1.4.1 From 476f35348eb8d2a827765992899fea78b7dcc46f Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 6 May 2007 14:48:58 -0700 Subject: Safer nr_node_ids and nr_node_ids determination and initial values The nr_cpu_ids value is currently only calculated in smp_init. However, it may be needed before (SLUB needs it on kmem_cache_init!) and other kernel components may also want to allocate dynamically sized per cpu array before smp_init. So move the determination of possible cpus into sched_init() where we already loop over all possible cpus early in boot. Also initialize both nr_node_ids and nr_cpu_ids with the highest value they could take. If we have accidental users before these values are determined then the current valud of 0 may cause too small per cpu and per node arrays to be allocated. If it is set to the maximum possible then we only waste some memory for early boot users. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/main.c | 5 ----- kernel/sched.c | 8 ++++++++ lib/cpumask.c | 3 --- mm/page_alloc.c | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/init/main.c b/init/main.c index df982ff5d2b0..0e22f40487bb 100644 --- a/init/main.c +++ b/init/main.c @@ -384,11 +384,6 @@ static void __init setup_per_cpu_areas(void) static void __init smp_init(void) { unsigned int cpu; - unsigned highest = 0; - - for_each_cpu_mask(cpu, cpu_possible_map) - highest = cpu; - nr_cpu_ids = highest + 1; /* FIXME: This should be done in userspace --RR */ for_each_present_cpu(cpu) { diff --git a/kernel/sched.c b/kernel/sched.c index 960d7c5fca39..0227f1625a75 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5244,6 +5244,11 @@ int __init migration_init(void) #endif #ifdef CONFIG_SMP + +/* Number of possible processor ids */ +int nr_cpu_ids __read_mostly = NR_CPUS; +EXPORT_SYMBOL(nr_cpu_ids); + #undef SCHED_DOMAIN_DEBUG #ifdef SCHED_DOMAIN_DEBUG static void sched_domain_debug(struct sched_domain *sd, int cpu) @@ -6726,6 +6731,7 @@ int in_sched_functions(unsigned long addr) void __init sched_init(void) { int i, j, k; + int highest_cpu = 0; for_each_possible_cpu(i) { struct prio_array *array; @@ -6760,11 +6766,13 @@ void __init sched_init(void) // delimiter for bitsearch __set_bit(MAX_PRIO, array->bitmap); } + highest_cpu = i; } set_load_weight(&init_task); #ifdef CONFIG_SMP + nr_cpu_ids = highest_cpu + 1; open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); #endif diff --git a/lib/cpumask.c b/lib/cpumask.c index 1ea2c184315d..bb4f76d3c3e7 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -15,9 +15,6 @@ int __next_cpu(int n, const cpumask_t *srcp) } EXPORT_SYMBOL(__next_cpu); -int nr_cpu_ids; -EXPORT_SYMBOL(nr_cpu_ids); - int __any_online_cpu(const cpumask_t *mask) { int cpu; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 353ce9039a86..019ceda6a8b6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -665,7 +665,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, } #if MAX_NUMNODES > 1 -int nr_node_ids __read_mostly; +int nr_node_ids __read_mostly = MAX_NUMNODES; EXPORT_SYMBOL(nr_node_ids); /* -- cgit 1.4.1 From c596d9f320aaf30d28c1d793ff3a976dee1db8f5 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Sun, 6 May 2007 14:49:32 -0700 Subject: cpusets: allow TIF_MEMDIE threads to allocate anywhere OOM killed tasks have access to memory reserves as specified by the TIF_MEMDIE flag in the hopes that it will quickly exit. If such a task has memory allocations constrained by cpusets, we may encounter a deadlock if a blocking task cannot exit because it cannot allocate the necessary memory. We allow tasks that have the TIF_MEMDIE flag to allocate memory anywhere, including outside its cpuset restriction, so that it can quickly die regardless of whether it is __GFP_HARDWALL. Cc: Andi Kleen Cc: Paul Jackson Cc: Christoph Lameter Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f382b0f775e1..d240349cbf0f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2351,6 +2351,8 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) * z's node is in our tasks mems_allowed, yes. If it's not a * __GFP_HARDWALL request and this zone's nodes is in the nearest * mem_exclusive cpuset ancestor to this tasks cpuset, yes. + * If the task has been OOM killed and has access to memory reserves + * as specified by the TIF_MEMDIE flag, yes. * Otherwise, no. * * If __GFP_HARDWALL is set, cpuset_zone_allowed_softwall() @@ -2368,7 +2370,8 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) * calls get to this routine, we should just shut up and say 'yes'. * * GFP_USER allocations are marked with the __GFP_HARDWALL bit, - * and do not allow allocations outside the current tasks cpuset. + * and do not allow allocations outside the current tasks cpuset + * unless the task has been OOM killed as is marked TIF_MEMDIE. * GFP_KERNEL allocations are not so marked, so can escape to the * nearest enclosing mem_exclusive ancestor cpuset. * @@ -2392,6 +2395,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) * affect that: * in_interrupt - any node ok (current task context irrelevant) * GFP_ATOMIC - any node ok + * TIF_MEMDIE - any node ok * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok * GFP_USER - only nodes in current tasks mems allowed ok. * @@ -2413,6 +2417,12 @@ int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) might_sleep_if(!(gfp_mask & __GFP_HARDWALL)); if (node_isset(node, current->mems_allowed)) return 1; + /* + * Allow tasks that have access to memory reserves because they have + * been OOM killed to get memory anywhere. + */ + if (unlikely(test_thread_flag(TIF_MEMDIE))) + return 1; if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ return 0; @@ -2438,7 +2448,9 @@ int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) * * If we're in interrupt, yes, we can always allocate. * If __GFP_THISNODE is set, yes, we can always allocate. If zone - * z's node is in our tasks mems_allowed, yes. Otherwise, no. + * z's node is in our tasks mems_allowed, yes. If the task has been + * OOM killed and has access to memory reserves as specified by the + * TIF_MEMDIE flag, yes. Otherwise, no. * * The __GFP_THISNODE placement logic is really handled elsewhere, * by forcibly using a zonelist starting at a specified node, and by @@ -2462,6 +2474,12 @@ int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask) node = zone_to_nid(z); if (node_isset(node, current->mems_allowed)) return 1; + /* + * Allow tasks that have access to memory reserves because they have + * been OOM killed to get memory anywhere. + */ + if (unlikely(test_thread_flag(TIF_MEMDIE))) + return 1; return 0; } -- cgit 1.4.1 From 0a31bd5f2bbb6473ef9d24f0063ca91cfa678b64 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 6 May 2007 14:49:57 -0700 Subject: KMEM_CACHE(): simplify slab cache creation This patch provides a new macro KMEM_CACHE(, ) to simplify slab creation. KMEM_CACHE creates a slab with the name of the struct, with the size of the struct and with the alignment of the struct. Additional slab flags may be specified if necessary. Example struct test_slab { int a,b,c; struct list_head; } __cacheline_aligned_in_smp; test_slab_cache = KMEM_CACHE(test_slab, SLAB_PANIC) will create a new slab named "test_slab" of the size sizeof(struct test_slab) and aligned to the alignment of test slab. If it fails then we panic. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- block/cfq-iosched.c | 6 ++---- fs/aio.c | 6 ++---- fs/bio.c | 3 +-- fs/dcache.c | 8 ++------ include/linux/slab.h | 12 ++++++++++++ kernel/delayacct.c | 6 +----- kernel/pid.c | 4 +--- kernel/signal.c | 6 +----- kernel/taskstats.c | 4 +--- 9 files changed, 23 insertions(+), 32 deletions(-) (limited to 'kernel') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 64df3fa303b0..baef5fc7cff8 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2090,13 +2090,11 @@ static void cfq_slab_kill(void) static int __init cfq_slab_setup(void) { - cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0, - NULL, NULL); + cfq_pool = KMEM_CACHE(cfq_queue, 0); if (!cfq_pool) goto fail; - cfq_ioc_pool = kmem_cache_create("cfq_ioc_pool", - sizeof(struct cfq_io_context), 0, 0, NULL, NULL); + cfq_ioc_pool = KMEM_CACHE(cfq_io_context, 0); if (!cfq_ioc_pool) goto fail; diff --git a/fs/aio.c b/fs/aio.c index e4598d6d49dd..b97ab8028b6d 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -68,10 +68,8 @@ static void aio_queue_work(struct kioctx *); */ static int __init aio_setup(void) { - kiocb_cachep = kmem_cache_create("kiocb", sizeof(struct kiocb), - 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); - kioctx_cachep = kmem_cache_create("kioctx", sizeof(struct kioctx), - 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); + kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); aio_wq = create_workqueue("aio"); diff --git a/fs/bio.c b/fs/bio.c index 693940da4090..093345f00128 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1193,8 +1193,7 @@ static void __init biovec_init_slabs(void) static int __init init_bio(void) { - bio_slab = kmem_cache_create("bio", sizeof(struct bio), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC); biovec_init_slabs(); diff --git a/fs/dcache.c b/fs/dcache.c index d68631f18df1..d1bf5d8aeb5a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2052,12 +2052,8 @@ static void __init dcache_init(unsigned long mempages) * but it is probably not worth it because of the cache nature * of the dcache. */ - dentry_cache = kmem_cache_create("dentry_cache", - sizeof(struct dentry), - 0, - (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| - SLAB_MEM_SPREAD), - NULL, NULL); + dentry_cache = KMEM_CACHE(dentry, + SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory); diff --git a/include/linux/slab.h b/include/linux/slab.h index a9befa50d3e3..e14b4c338b89 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -57,6 +57,18 @@ unsigned int kmem_cache_size(struct kmem_cache *); const char *kmem_cache_name(struct kmem_cache *); int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr); +/* + * Please use this macro to create slab caches. Simply specify the + * name of the structure and maybe some flags that are listed above. + * + * The alignment of the struct determines object alignment. If you + * f.e. add ____cacheline_aligned_in_smp to the struct declaration + * then the objects will be properly aligned in SMP configurations. + */ +#define KMEM_CACHE(__struct, __flags) kmem_cache_create(#__struct,\ + sizeof(struct __struct), __alignof__(struct __struct),\ + (__flags), NULL, NULL) + #ifdef CONFIG_NUMA extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); #else diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 766d5912b26a..c0148ae992c4 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -31,11 +31,7 @@ __setup("nodelayacct", delayacct_setup_disable); void delayacct_init(void) { - delayacct_cache = kmem_cache_create("delayacct_cache", - sizeof(struct task_delay_info), - 0, - SLAB_PANIC, - NULL, NULL); + delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC); delayacct_tsk_init(&init_task); } diff --git a/kernel/pid.c b/kernel/pid.c index 78f2aee90f54..9c80bc23d6b8 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -412,7 +412,5 @@ void __init pidmap_init(void) set_bit(0, init_pid_ns.pidmap[0].page); atomic_dec(&init_pid_ns.pidmap[0].nr_free); - pid_cachep = kmem_cache_create("pid", sizeof(struct pid), - __alignof__(struct pid), - SLAB_PANIC, NULL, NULL); + pid_cachep = KMEM_CACHE(pid, SLAB_PANIC); } diff --git a/kernel/signal.c b/kernel/signal.c index 3670225ecbc0..2b4087d545a3 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2636,9 +2636,5 @@ __attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma) void __init signals_init(void) { - sigqueue_cachep = - kmem_cache_create("sigqueue", - sizeof(struct sigqueue), - __alignof__(struct sigqueue), - SLAB_PANIC, NULL, NULL); + sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC); } diff --git a/kernel/taskstats.c b/kernel/taskstats.c index ad7d2392cb0e..906cae771585 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -524,9 +524,7 @@ void __init taskstats_init_early(void) { unsigned int i; - taskstats_cache = kmem_cache_create("taskstats_cache", - sizeof(struct taskstats), - 0, SLAB_PANIC, NULL, NULL); + taskstats_cache = KMEM_CACHE(taskstats, SLAB_PANIC); for_each_possible_cpu(i) { INIT_LIST_HEAD(&(per_cpu(listener_array, i).list)); init_rwsem(&(per_cpu(listener_array, i).sem)); -- cgit 1.4.1 From 50953fe9e00ebbeffa032a565ab2f08312d51a87 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 6 May 2007 14:50:16 -0700 Subject: slab allocators: Remove SLAB_DEBUG_INITIAL flag I have never seen a use of SLAB_DEBUG_INITIAL. It is only supported by SLAB. I think its purpose was to have a callback after an object has been freed to verify that the state is the constructor state again? The callback is performed before each freeing of an object. I would think that it is much easier to check the object state manually before the free. That also places the check near the code object manipulation of the object. Also the SLAB_DEBUG_INITIAL callback is only performed if the kernel was compiled with SLAB debugging on. If there would be code in a constructor handling SLAB_DEBUG_INITIAL then it would have to be conditional on SLAB_DEBUG otherwise it would just be dead code. But there is no such code in the kernel. I think SLUB_DEBUG_INITIAL is too problematic to make real use of, difficult to understand and there are easier ways to accomplish the same effect (i.e. add debug code before kfree). There is a related flag SLAB_CTOR_VERIFY that is frequently checked to be clear in fs inode caches. Remove the pointless checks (they would even be pointless without removeal of SLAB_DEBUG_INITIAL) from the fs constructors. This is the last slab flag that SLUB did not support. Remove the check for unimplemented flags from SLUB. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/cell/spufs/inode.c | 3 +-- drivers/mtd/ubi/eba.c | 3 +-- fs/adfs/super.c | 3 +-- fs/affs/super.c | 3 +-- fs/afs/super.c | 3 +-- fs/befs/linuxvfs.c | 3 +-- fs/bfs/inode.c | 3 +-- fs/block_dev.c | 4 +--- fs/buffer.c | 3 +-- fs/cifs/cifsfs.c | 3 +-- fs/coda/inode.c | 3 +-- fs/ecryptfs/main.c | 3 +-- fs/efs/super.c | 3 +-- fs/ext2/super.c | 3 +-- fs/ext3/super.c | 3 +-- fs/ext4/super.c | 3 +-- fs/fat/cache.c | 3 +-- fs/fat/inode.c | 3 +-- fs/fuse/inode.c | 3 +-- fs/gfs2/main.c | 6 ++---- fs/hfs/super.c | 2 +- fs/hfsplus/super.c | 2 +- fs/hpfs/super.c | 3 +-- fs/hugetlbfs/inode.c | 3 +-- fs/inode.c | 3 +-- fs/isofs/inode.c | 3 +-- fs/jffs2/super.c | 3 +-- fs/jfs/jfs_metapage.c | 3 +-- fs/jfs/super.c | 3 +-- fs/locks.c | 3 +-- fs/minix/inode.c | 3 +-- fs/ncpfs/inode.c | 3 +-- fs/nfs/inode.c | 3 +-- fs/ntfs/super.c | 3 +-- fs/ocfs2/dlm/dlmfs.c | 3 +-- fs/ocfs2/super.c | 3 +-- fs/openpromfs/inode.c | 3 +-- fs/proc/inode.c | 3 +-- fs/qnx4/inode.c | 3 +-- fs/reiserfs/super.c | 3 +-- fs/romfs/inode.c | 3 +-- fs/smbfs/inode.c | 3 +-- fs/sysv/inode.c | 3 +-- fs/udf/super.c | 4 +--- fs/ufs/super.c | 3 +-- fs/xfs/linux-2.6/xfs_super.c | 3 +-- include/linux/slab.h | 2 -- ipc/mqueue.c | 3 +-- kernel/fork.c | 3 +-- mm/rmap.c | 3 +-- mm/shmem.c | 3 +-- mm/slab.c | 20 ++------------------ mm/slub.c | 10 ---------- net/socket.c | 3 +-- net/sunrpc/rpc_pipe.c | 3 +-- 55 files changed, 55 insertions(+), 136 deletions(-) (limited to 'kernel') diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 13e4f70ec8c0..a93f328a7317 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -71,8 +71,7 @@ spufs_init_once(void *p, struct kmem_cache * cachep, unsigned long flags) { struct spufs_inode_info *ei = p; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { inode_init_once(&ei->vfs_inode); } } diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index d847ee1da3d9..3dba5733ed1f 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -940,8 +940,7 @@ static void ltree_entry_ctor(void *obj, struct kmem_cache *cache, { struct ltree_entry *le = obj; - if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) != - SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) return; le->users = 0; diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 2e5f2c8371ee..30c296508497 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -232,8 +232,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct adfs_inode_info *ei = (struct adfs_inode_info *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) inode_init_once(&ei->vfs_inode); } diff --git a/fs/affs/super.c b/fs/affs/super.c index c3986a1911b0..beff7d21e6e2 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -87,8 +87,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct affs_inode_info *ei = (struct affs_inode_info *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { init_MUTEX(&ei->i_link_lock); init_MUTEX(&ei->i_ext_lock); inode_init_once(&ei->vfs_inode); diff --git a/fs/afs/super.c b/fs/afs/super.c index 41173f81ac47..7030d76155fc 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -453,8 +453,7 @@ static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep, { struct afs_vnode *vnode = _vnode; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { memset(vnode, 0, sizeof(*vnode)); inode_init_once(&vnode->vfs_inode); init_waitqueue_head(&vnode->update_waitq); diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index cc6cc8ed2e39..fe96108a788d 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -293,8 +293,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct befs_inode_info *bi = (struct befs_inode_info *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { inode_init_once(&bi->vfs_inode); } } diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 93d6219243ad..edc08d89aabc 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -248,8 +248,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct bfs_inode_info *bi = foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) inode_init_once(&bi->vfs_inode); } diff --git a/fs/block_dev.c b/fs/block_dev.c index 6fe49b9349ea..f02b7bdd9864 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -457,9 +457,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag struct bdev_inode *ei = (struct bdev_inode *) foo; struct block_device *bdev = &ei->bdev; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) - { + if (flags & SLAB_CTOR_CONSTRUCTOR) { memset(bdev, 0, sizeof(*bdev)); mutex_init(&bdev->bd_mutex); sema_init(&bdev->bd_mount_sem, 1); diff --git a/fs/buffer.c b/fs/buffer.c index 80291aad6de6..7db24b9e5449 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2953,8 +2953,7 @@ EXPORT_SYMBOL(free_buffer_head); static void init_buffer_head(void *data, struct kmem_cache *cachep, unsigned long flags) { - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { struct buffer_head * bh = (struct buffer_head *)data; memset(bh, 0, sizeof(*bh)); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 5036dae09cd7..8568e100953c 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -701,8 +701,7 @@ cifs_init_once(void *inode, struct kmem_cache * cachep, unsigned long flags) { struct cifsInodeInfo *cifsi = inode; - if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { inode_init_once(&cifsi->vfs_inode); INIT_LIST_HEAD(&cifsi->lockList); } diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 614175a3b02e..0aaff3651d14 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -62,8 +62,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct coda_inode_info *ei = (struct coda_inode_info *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) inode_init_once(&ei->vfs_inode); } diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 6acc8f4fc588..8cbf3f69ebe5 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -583,8 +583,7 @@ inode_info_init_once(void *vptr, struct kmem_cache *cachep, unsigned long flags) { struct ecryptfs_inode_info *ei = (struct ecryptfs_inode_info *)vptr; - if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) inode_init_once(&ei->vfs_inode); } diff --git a/fs/efs/super.c b/fs/efs/super.c index c2235e46edcd..ba7a8b9da0c1 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -72,8 +72,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct efs_inode_info *ei = (struct efs_inode_info *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) inode_init_once(&ei->vfs_inode); } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index a046a419d8af..685a1c287177 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -160,8 +160,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct ext2_inode_info *ei = (struct ext2_inode_info *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { rwlock_init(&ei->i_meta_lock); #ifdef CONFIG_EXT2_FS_XATTR init_rwsem(&ei->xattr_sem); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 4266b708ca01..54d3c9041259 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -466,8 +466,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct ext3_inode_info *ei = (struct ext3_inode_info *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { INIT_LIST_HEAD(&ei->i_orphan); #ifdef CONFIG_EXT3_FS_XATTR init_rwsem(&ei->xattr_sem); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 25e8d0096176..719126932354 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -517,8 +517,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { INIT_LIST_HEAD(&ei->i_orphan); #ifdef CONFIG_EXT4DEV_FS_XATTR init_rwsem(&ei->xattr_sem); diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 05c2941c74f2..1959143c1d27 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -40,8 +40,7 @@ static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct fat_cache *cache = (struct fat_cache *)foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) INIT_LIST_HEAD(&cache->cache_list); } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 9bfe607c892e..65cb54bde481 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -499,8 +499,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct msdos_inode_info *ei = (struct msdos_inode_info *)foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { spin_lock_init(&ei->cache_lru_lock); ei->nr_caches = 0; ei->cache_valid_id = FAT_CACHE_VALID + 1; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index bdffe0cfe09a..d8003be56e05 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -685,8 +685,7 @@ static void fuse_inode_init_once(void *foo, struct kmem_cache *cachep, { struct inode * inode = foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) inode_init_once(inode); } diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 6e8a59809abf..e2bffae683cc 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -27,8 +27,7 @@ static void gfs2_init_inode_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct gfs2_inode *ip = foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { inode_init_once(&ip->i_inode); spin_lock_init(&ip->i_spin); init_rwsem(&ip->i_rw_mutex); @@ -39,8 +38,7 @@ static void gfs2_init_inode_once(void *foo, struct kmem_cache *cachep, unsigned static void gfs2_init_glock_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct gfs2_glock *gl = foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { INIT_HLIST_NODE(&gl->gl_list); spin_lock_init(&gl->gl_spin); INIT_LIST_HEAD(&gl->gl_holders); diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 623f509f1d47..4f1888f16cf0 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -434,7 +434,7 @@ static void hfs_init_once(void *p, struct kmem_cache *cachep, unsigned long flag { struct hfs_inode_info *i = p; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) inode_init_once(&i->vfs_inode); } diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 1a97f9293447..37afbec8a761 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -470,7 +470,7 @@ static void hfsplus_init_once(void *p, struct kmem_cache *cachep, unsigned long { struct hfsplus_inode_info *i = p; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) inode_init_once(&i->vfs_inode); } diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index e0174e338526..1b95f39fbc37 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -176,8 +176,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { mutex_init(&ei->i_mutex); mutex_init(&ei->i_parent_mutex); inode_init_once(&ei->vfs_inode); diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index fe625cd1719a..842a4ed4052d 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -556,8 +556,7 @@ static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) inode_init_once(&ei->vfs_inode); } diff --git a/fs/inode.c b/fs/inode.c index 5abb097ab1b0..b4296bf62739 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -213,8 +213,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct inode * inode = (struct inode *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) inode_init_once(inode); } diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 64a96cdfe3a4..e99f7ff4ecb4 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -77,8 +77,7 @@ static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags { struct iso_inode_info *ei = foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) inode_init_once(&ei->vfs_inode); } diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index e51164a8a8d4..45368f8bbe72 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -47,8 +47,7 @@ static void jffs2_i_init_once(void * foo, struct kmem_cache * cachep, unsigned l { struct jffs2_inode_info *ei = (struct jffs2_inode_info *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { init_MUTEX(&ei->sem); inode_init_once(&ei->vfs_inode); } diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 58deae007507..6b3acb0b5781 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -184,8 +184,7 @@ static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct metapage *mp = (struct metapage *)foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { mp->lid = 0; mp->lsn = 0; mp->flag = 0; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 52d73d54a931..ea9dc3e65dcf 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -752,8 +752,7 @@ static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags { struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo; - if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { memset(jfs_ip, 0, sizeof(struct jfs_inode_info)); INIT_LIST_HEAD(&jfs_ip->anon_inode_list); init_rwsem(&jfs_ip->rdwrlock); diff --git a/fs/locks.c b/fs/locks.c index 52a81005dab4..325578074742 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -203,8 +203,7 @@ static void init_once(void *foo, struct kmem_cache *cache, unsigned long flags) { struct file_lock *lock = (struct file_lock *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) != - SLAB_CTOR_CONSTRUCTOR) + if (!(flags & SLAB_CTOR_CONSTRUCTOR)) return; locks_init_lock(lock); diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 92e383af3709..2f4d43a2a310 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -73,8 +73,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct minix_inode_info *ei = (struct minix_inode_info *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) inode_init_once(&ei->vfs_inode); } diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 7285c94956c4..c29f00ad495d 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -60,8 +60,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct ncp_inode_info *ei = (struct ncp_inode_info *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { mutex_init(&ei->open_mutex); inode_init_once(&ei->vfs_inode); } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 44aa9b726573..1e9a915d1fea 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1167,8 +1167,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct nfs_inode *nfsi = (struct nfs_inode *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { inode_init_once(&nfsi->vfs_inode); spin_lock_init(&nfsi->req_lock); INIT_LIST_HEAD(&nfsi->dirty); diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 2ddde534db0a..21d834e5ed73 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -3085,8 +3085,7 @@ static void ntfs_big_inode_init_once(void *foo, struct kmem_cache *cachep, { ntfs_inode *ni = (ntfs_inode *)foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) inode_init_once(VFS_I(ni)); } diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index de952eba29a9..d4e46d067edd 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c @@ -263,8 +263,7 @@ static void dlmfs_init_once(void *foo, struct dlmfs_inode_private *ip = (struct dlmfs_inode_private *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { ip->ip_dlm = NULL; ip->ip_parent = NULL; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index f5493540d94f..7c5e3f5d6634 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -937,8 +937,7 @@ static void ocfs2_inode_init_once(void *data, { struct ocfs2_inode_info *oi = data; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { oi->ip_flags = 0; oi->ip_open_count = 0; spin_lock_init(&oi->ip_lock); diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index bde1c164417d..731a90e9f0cd 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -419,8 +419,7 @@ static void op_inode_init_once(void *data, struct kmem_cache * cachep, unsigned { struct op_inode_info *oi = (struct op_inode_info *) data; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) inode_init_once(&oi->vfs_inode); } diff --git a/fs/proc/inode.c b/fs/proc/inode.c index c372eb151a3a..22b1158389ae 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -109,8 +109,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct proc_inode *ei = (struct proc_inode *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) inode_init_once(&ei->vfs_inode); } diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 83bc8e7824cd..75fc8498f2e2 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -536,8 +536,7 @@ static void init_once(void *foo, struct kmem_cache * cachep, { struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo; - if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) inode_init_once(&ei->vfs_inode); } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index f13a7f164dc6..7054aaef0493 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -511,8 +511,7 @@ static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags { struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo; - if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { INIT_LIST_HEAD(&ei->i_prealloc_list); inode_init_once(&ei->vfs_inode); #ifdef CONFIG_REISERFS_FS_POSIX_ACL diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index fd601014813e..804285190271 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -570,8 +570,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct romfs_inode_info *ei = (struct romfs_inode_info *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) inode_init_once(&ei->vfs_inode); } diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 5faba4f1c9ab..424a3ddf86dd 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -69,9 +69,8 @@ static void smb_destroy_inode(struct inode *inode) static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct smb_inode_info *ei = (struct smb_inode_info *) foo; - unsigned long flagmask = SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR; - if ((flags & flagmask) == SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) inode_init_once(&ei->vfs_inode); } diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 9311cac186fe..3152d7415606 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -322,8 +322,7 @@ static void init_once(void *p, struct kmem_cache *cachep, unsigned long flags) { struct sysv_inode_info *si = (struct sysv_inode_info *)p; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) inode_init_once(&si->vfs_inode); } diff --git a/fs/udf/super.c b/fs/udf/super.c index 8672b88f7ff2..023b304fdd99 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -134,9 +134,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct udf_inode_info *ei = (struct udf_inode_info *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) - { + if (flags & SLAB_CTOR_CONSTRUCTOR) { ei->i_ext.i_data = NULL; inode_init_once(&ei->vfs_inode); } diff --git a/fs/ufs/super.c b/fs/ufs/super.c index b5a6461ec66b..be7c48c5f203 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1237,8 +1237,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct ufs_inode_info *ei = (struct ufs_inode_info *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) inode_init_once(&ei->vfs_inode); } diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 2f2c40db562e..14e2cbe5a8d5 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -360,8 +360,7 @@ xfs_fs_inode_init_once( kmem_zone_t *zonep, unsigned long flags) { - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) inode_init_once(vn_to_inode((bhv_vnode_t *)vnode)); } diff --git a/include/linux/slab.h b/include/linux/slab.h index e14b4c338b89..1ffe0a959cd4 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -21,7 +21,6 @@ typedef struct kmem_cache kmem_cache_t __deprecated; * The ones marked DEBUG are only valid if CONFIG_SLAB_DEBUG is set. */ #define SLAB_DEBUG_FREE 0x00000100UL /* DEBUG: Perform (expensive) checks on free */ -#define SLAB_DEBUG_INITIAL 0x00000200UL /* DEBUG: Call constructor (as verifier) */ #define SLAB_RED_ZONE 0x00000400UL /* DEBUG: Red zone objs in a cache */ #define SLAB_POISON 0x00000800UL /* DEBUG: Poison objects */ #define SLAB_HWCACHE_ALIGN 0x00002000UL /* Align objs on cache lines */ @@ -36,7 +35,6 @@ typedef struct kmem_cache kmem_cache_t __deprecated; /* Flags passed to a constructor functions */ #define SLAB_CTOR_CONSTRUCTOR 0x001UL /* If not set, then deconstructor */ #define SLAB_CTOR_ATOMIC 0x002UL /* Tell constructor it can't sleep */ -#define SLAB_CTOR_VERIFY 0x004UL /* Tell constructor it's a verify call */ /* * struct kmem_cache related prototypes diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 554ac368be79..d17821d3f483 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -215,8 +215,7 @@ static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags { struct mqueue_inode_info *p = (struct mqueue_inode_info *) foo; - if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) inode_init_once(&p->vfs_inode); } diff --git a/kernel/fork.c b/kernel/fork.c index ffccefb28b6a..b7d169def942 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1425,8 +1425,7 @@ static void sighand_ctor(void *data, struct kmem_cache *cachep, unsigned long fl { struct sighand_struct *sighand = data; - if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) spin_lock_init(&sighand->siglock); } diff --git a/mm/rmap.c b/mm/rmap.c index 59da5b734c80..75a32be64a21 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -162,8 +162,7 @@ void anon_vma_unlink(struct vm_area_struct *vma) static void anon_vma_ctor(void *data, struct kmem_cache *cachep, unsigned long flags) { - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { struct anon_vma *anon_vma = data; spin_lock_init(&anon_vma->lock); diff --git a/mm/shmem.c b/mm/shmem.c index b2a35ebf071a..f01e8deed645 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2358,8 +2358,7 @@ static void init_once(void *foo, struct kmem_cache *cachep, { struct shmem_inode_info *p = (struct shmem_inode_info *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { inode_init_once(&p->vfs_inode); #ifdef CONFIG_TMPFS_POSIX_ACL p->i_acl = NULL; diff --git a/mm/slab.c b/mm/slab.c index 2a3cbd6e675d..a877d6f3d687 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -116,8 +116,7 @@ #include /* - * DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL, - * SLAB_RED_ZONE & SLAB_POISON. + * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON. * 0 for faster, smaller code (especially in the critical paths). * * STATS - 1 to collect stats for /proc/slabinfo. @@ -172,7 +171,7 @@ /* Legal flag mask for kmem_cache_create(). */ #if DEBUG -# define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \ +# define CREATE_MASK (SLAB_RED_ZONE | \ SLAB_POISON | SLAB_HWCACHE_ALIGN | \ SLAB_CACHE_DMA | \ SLAB_STORE_USER | \ @@ -2184,12 +2183,6 @@ kmem_cache_create (const char *name, size_t size, size_t align, #if DEBUG WARN_ON(strchr(name, ' ')); /* It confuses parsers */ - if ((flags & SLAB_DEBUG_INITIAL) && !ctor) { - /* No constructor, but inital state check requested */ - printk(KERN_ERR "%s: No con, but init state check " - "requested - %s\n", __FUNCTION__, name); - flags &= ~SLAB_DEBUG_INITIAL; - } #if FORCED_DEBUG /* * Enable redzoning and last user accounting, except for caches with @@ -2895,15 +2888,6 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, BUG_ON(objnr >= cachep->num); BUG_ON(objp != index_to_obj(cachep, slabp, objnr)); - if (cachep->flags & SLAB_DEBUG_INITIAL) { - /* - * Need to call the slab's constructor so the caller can - * perform a verify of its state (debugging). Called without - * the cache-lock held. - */ - cachep->ctor(objp + obj_offset(cachep), - cachep, SLAB_CTOR_CONSTRUCTOR | SLAB_CTOR_VERIFY); - } if (cachep->flags & SLAB_POISON && cachep->dtor) { /* we want to cache poison the object, * call the destruction callback diff --git a/mm/slub.c b/mm/slub.c index 79940e98e5e6..bd86182e595e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -97,9 +97,6 @@ * * - Support PAGE_ALLOC_DEBUG. Should be easy to do. * - * - SLAB_DEBUG_INITIAL is not supported but I have never seen a use of - * it. - * * - Variable sizing of the per node arrays */ @@ -125,11 +122,6 @@ #endif -/* - * Flags from the regular SLAB that SLUB does not support: - */ -#define SLUB_UNIMPLEMENTED (SLAB_DEBUG_INITIAL) - /* * Mininum number of partial slabs. These will be left on the partial * lists even if they are empty. kmem_cache_shrink may reclaim them. @@ -1748,8 +1740,6 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, s->flags = flags; s->align = align; - BUG_ON(flags & SLUB_UNIMPLEMENTED); - /* * The page->offset field is only 16 bit wide. This is an offset * in units of words from the beginning of an object. If the slab diff --git a/net/socket.c b/net/socket.c index 1ad62c08377b..759825b7ca26 100644 --- a/net/socket.c +++ b/net/socket.c @@ -261,8 +261,7 @@ static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct socket_alloc *ei = (struct socket_alloc *)foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) - == SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) inode_init_once(&ei->vfs_inode); } diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 9b9ea5045569..ad39b47e05bc 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -828,8 +828,7 @@ init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct rpc_inode *rpci = (struct rpc_inode *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { + if (flags & SLAB_CTOR_CONSTRUCTOR) { inode_init_once(&rpci->vfs_inode); rpci->private = NULL; rpci->nreaders = 0; -- cgit 1.4.1 From 73243284463a761e04d69d22c7516b2be7de096c Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sun, 6 May 2007 14:50:20 -0700 Subject: Return EPERM not ECHILD on security_task_wait failure wait* syscalls return -ECHILD even when an individual PID of a live child was requested explicitly, when security_task_wait denies the operation. This means that something like a broken SELinux policy can produce an unexpected failure that looks just like a bug with wait or ptrace or something. This patch makes do_wait return -EACCES (or other appropriate error returned from security_task_wait() instead of -ECHILD if some children were ruled out solely because security_task_wait failed. [jmorris@namei.org: switch error code to EACCES] Signed-off-by: Roland McGrath Acked-by: Stephen Smalley Cc: Chris Wright Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index b55ed4cc9104..92369240d91d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1033,6 +1033,8 @@ asmlinkage void sys_exit_group(int error_code) static int eligible_child(pid_t pid, int options, struct task_struct *p) { + int err; + if (pid > 0) { if (p->pid != pid) return 0; @@ -1066,8 +1068,9 @@ static int eligible_child(pid_t pid, int options, struct task_struct *p) if (delay_group_leader(p)) return 2; - if (security_task_wait(p)) - return 0; + err = security_task_wait(p); + if (err) + return err; return 1; } @@ -1449,6 +1452,7 @@ static long do_wait(pid_t pid, int options, struct siginfo __user *infop, DECLARE_WAITQUEUE(wait, current); struct task_struct *tsk; int flag, retval; + int allowed, denied; add_wait_queue(¤t->signal->wait_chldexit,&wait); repeat: @@ -1457,6 +1461,7 @@ repeat: * match our criteria, even if we are not able to reap it yet. */ flag = 0; + allowed = denied = 0; current->state = TASK_INTERRUPTIBLE; read_lock(&tasklist_lock); tsk = current; @@ -1472,6 +1477,12 @@ repeat: if (!ret) continue; + if (unlikely(ret < 0)) { + denied = ret; + continue; + } + allowed = 1; + switch (p->state) { case TASK_TRACED: /* @@ -1570,6 +1581,8 @@ check_continued: goto repeat; } retval = -ECHILD; + if (unlikely(denied) && !allowed) + retval = denied; end: current->state = TASK_RUNNING; remove_wait_queue(¤t->signal->wait_chldexit,&wait); -- cgit 1.4.1 From 433ecb4ab312f873870b67ee374502e84f6dcf92 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 6 May 2007 14:50:40 -0700 Subject: fix refrigerator() vs thaw_process() race refrigerator() can miss a wakeup, "wait event" loop needs a proper memory ordering. Signed-off-by: Oleg Nesterov Acked-by: "Rafael J. Wysocki" Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/process.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/power/process.c b/kernel/power/process.c index 6d566bf7085c..0eb5c420e8ed 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -47,8 +47,10 @@ void refrigerator(void) recalc_sigpending(); /* We sent fake signal, clean it up */ spin_unlock_irq(¤t->sighand->siglock); - while (frozen(current)) { - current->state = TASK_UNINTERRUPTIBLE; + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (!frozen(current)) + break; schedule(); } pr_debug("%s left refrigerator\n", current->comm); -- cgit 1.4.1 From 7be9823491ecbaf9700d7d3502cb4b4dd0ed868a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 6 May 2007 14:50:42 -0700 Subject: swsusp: use inline functions for changing page flags Replace direct invocations of SetPageNosave(), SetPageNosaveFree() etc. with calls to inline functions that can be changed in subsequent patches without modifying the code calling them. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/suspend.h | 33 +++++++++++++++++++++++++++++++++ kernel/power/snapshot.c | 48 +++++++++++++++++++++++++----------------------- mm/page_alloc.c | 6 +++--- 3 files changed, 61 insertions(+), 26 deletions(-) (limited to 'kernel') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index bf99bd49f8ef..a45b9f514492 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -8,6 +8,7 @@ #include #include #include +#include /* struct pbe is used for creating lists of pages that should be restored * atomically during the resume from disk, because the page frames they have @@ -49,6 +50,38 @@ void __save_processor_state(struct saved_context *ctxt); void __restore_processor_state(struct saved_context *ctxt); unsigned long get_safe_page(gfp_t gfp_mask); +/* Page management functions for the software suspend (swsusp) */ + +static inline void swsusp_set_page_forbidden(struct page *page) +{ + SetPageNosave(page); +} + +static inline int swsusp_page_is_forbidden(struct page *page) +{ + return PageNosave(page); +} + +static inline void swsusp_unset_page_forbidden(struct page *page) +{ + ClearPageNosave(page); +} + +static inline void swsusp_set_page_free(struct page *page) +{ + SetPageNosaveFree(page); +} + +static inline int swsusp_page_is_free(struct page *page) +{ + return PageNosaveFree(page); +} + +static inline void swsusp_unset_page_free(struct page *page) +{ + ClearPageNosaveFree(page); +} + /* * XXX: We try to keep some more pages free so that I/O operations succeed * without paging. Might this be more? diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 704c25a3ffec..48fc7a35571b 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -67,15 +67,15 @@ static void *get_image_page(gfp_t gfp_mask, int safe_needed) res = (void *)get_zeroed_page(gfp_mask); if (safe_needed) - while (res && PageNosaveFree(virt_to_page(res))) { + while (res && swsusp_page_is_free(virt_to_page(res))) { /* The page is unsafe, mark it for swsusp_free() */ - SetPageNosave(virt_to_page(res)); + swsusp_set_page_forbidden(virt_to_page(res)); allocated_unsafe_pages++; res = (void *)get_zeroed_page(gfp_mask); } if (res) { - SetPageNosave(virt_to_page(res)); - SetPageNosaveFree(virt_to_page(res)); + swsusp_set_page_forbidden(virt_to_page(res)); + swsusp_set_page_free(virt_to_page(res)); } return res; } @@ -91,8 +91,8 @@ static struct page *alloc_image_page(gfp_t gfp_mask) page = alloc_page(gfp_mask); if (page) { - SetPageNosave(page); - SetPageNosaveFree(page); + swsusp_set_page_forbidden(page); + swsusp_set_page_free(page); } return page; } @@ -110,9 +110,9 @@ static inline void free_image_page(void *addr, int clear_nosave_free) page = virt_to_page(addr); - ClearPageNosave(page); + swsusp_unset_page_forbidden(page); if (clear_nosave_free) - ClearPageNosaveFree(page); + swsusp_unset_page_free(page); __free_page(page); } @@ -615,7 +615,8 @@ static struct page *saveable_highmem_page(unsigned long pfn) BUG_ON(!PageHighMem(page)); - if (PageNosave(page) || PageReserved(page) || PageNosaveFree(page)) + if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page) || + PageReserved(page)) return NULL; return page; @@ -670,7 +671,7 @@ static struct page *saveable_page(unsigned long pfn) BUG_ON(PageHighMem(page)); - if (PageNosave(page) || PageNosaveFree(page)) + if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) return NULL; if (PageReserved(page) && pfn_is_nosave(pfn)) @@ -810,9 +811,10 @@ void swsusp_free(void) if (pfn_valid(pfn)) { struct page *page = pfn_to_page(pfn); - if (PageNosave(page) && PageNosaveFree(page)) { - ClearPageNosave(page); - ClearPageNosaveFree(page); + if (swsusp_page_is_forbidden(page) && + swsusp_page_is_free(page)) { + swsusp_unset_page_forbidden(page); + swsusp_unset_page_free(page); __free_page(page); } } @@ -1135,7 +1137,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm) max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (pfn_valid(pfn)) - ClearPageNosaveFree(pfn_to_page(pfn)); + swsusp_unset_page_free(pfn_to_page(pfn)); } /* Mark pages that correspond to the "original" pfns as "unsafe" */ @@ -1144,7 +1146,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm) pfn = memory_bm_next_pfn(bm); if (likely(pfn != BM_END_OF_MAP)) { if (likely(pfn_valid(pfn))) - SetPageNosaveFree(pfn_to_page(pfn)); + swsusp_set_page_free(pfn_to_page(pfn)); else return -EFAULT; } @@ -1310,14 +1312,14 @@ prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) struct page *page; page = alloc_page(__GFP_HIGHMEM); - if (!PageNosaveFree(page)) { + if (!swsusp_page_is_free(page)) { /* The page is "safe", set its bit the bitmap */ memory_bm_set_bit(bm, page_to_pfn(page)); safe_highmem_pages++; } /* Mark the page as allocated */ - SetPageNosave(page); - SetPageNosaveFree(page); + swsusp_set_page_forbidden(page); + swsusp_set_page_free(page); } memory_bm_position_reset(bm); safe_highmem_bm = bm; @@ -1349,7 +1351,7 @@ get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) struct highmem_pbe *pbe; void *kaddr; - if (PageNosave(page) && PageNosaveFree(page)) { + if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) { /* We have allocated the "original" page frame and we can * use it directly to store the loaded page. */ @@ -1511,14 +1513,14 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) error = -ENOMEM; goto Free; } - if (!PageNosaveFree(virt_to_page(lp))) { + if (!swsusp_page_is_free(virt_to_page(lp))) { /* The page is "safe", add it to the list */ lp->next = safe_pages_list; safe_pages_list = lp; } /* Mark the page as allocated */ - SetPageNosave(virt_to_page(lp)); - SetPageNosaveFree(virt_to_page(lp)); + swsusp_set_page_forbidden(virt_to_page(lp)); + swsusp_set_page_free(virt_to_page(lp)); nr_pages--; } /* Free the reserved safe pages so that chain_alloc() can use them */ @@ -1547,7 +1549,7 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) if (PageHighMem(page)) return get_highmem_page_buffer(page, ca); - if (PageNosave(page) && PageNosaveFree(page)) + if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) /* We have allocated the "original" page frame and we can * use it directly to store the loaded page. */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 36d713e216e8..59164313167f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -775,8 +775,8 @@ void mark_free_pages(struct zone *zone) if (pfn_valid(pfn)) { struct page *page = pfn_to_page(pfn); - if (!PageNosave(page)) - ClearPageNosaveFree(page); + if (!swsusp_page_is_forbidden(page)) + swsusp_unset_page_free(page); } for (order = MAX_ORDER - 1; order >= 0; --order) @@ -785,7 +785,7 @@ void mark_free_pages(struct zone *zone) pfn = page_to_pfn(list_entry(curr, struct page, lru)); for (i = 0; i < (1UL << order); i++) - SetPageNosaveFree(pfn_to_page(pfn + i)); + swsusp_set_page_free(pfn_to_page(pfn + i)); } spin_unlock_irqrestore(&zone->lock, flags); -- cgit 1.4.1 From 74dfd666de861c97d47bdbd892f6d21b801d0247 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 6 May 2007 14:50:43 -0700 Subject: swsusp: do not use page flags Make swsusp use memory bitmaps instead of page flags for marking 'nosave' and free pages. This allows us to 'recycle' two page flags that can be used for other purposes. Also, the memory needed to store the bitmaps is allocated when necessary (ie. before the suspend) and freed after the resume which is more reasonable. The patch is designed to minimize the amount of changes and there are some nice simplifications and optimizations possible on top of it. I am going to implement them separately in the future. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/e820.c | 26 ++--- include/linux/suspend.h | 58 ++++------- kernel/power/disk.c | 23 +++-- kernel/power/main.c | 2 + kernel/power/power.h | 2 + kernel/power/snapshot.c | 250 +++++++++++++++++++++++++++++++++++++++++++--- kernel/power/user.c | 4 + 7 files changed, 283 insertions(+), 82 deletions(-) (limited to 'kernel') diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index be8965427a93..13c6c37610e0 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include #include @@ -256,22 +258,6 @@ void __init e820_reserve_resources(void) } } -/* Mark pages corresponding to given address range as nosave */ -static void __init -e820_mark_nosave_range(unsigned long start, unsigned long end) -{ - unsigned long pfn, max_pfn; - - if (start >= end) - return; - - printk("Nosave address range: %016lx - %016lx\n", start, end); - max_pfn = end >> PAGE_SHIFT; - for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++) - if (pfn_valid(pfn)) - SetPageNosave(pfn_to_page(pfn)); -} - /* * Find the ranges of physical addresses that do not correspond to * e820 RAM areas and mark the corresponding pages as nosave for software @@ -290,13 +276,13 @@ void __init e820_mark_nosave_regions(void) struct e820entry *ei = &e820.map[i]; if (paddr < ei->addr) - e820_mark_nosave_range(paddr, - round_up(ei->addr, PAGE_SIZE)); + register_nosave_region(PFN_DOWN(paddr), + PFN_UP(ei->addr)); paddr = round_down(ei->addr + ei->size, PAGE_SIZE); if (ei->type != E820_RAM) - e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE), - paddr); + register_nosave_region(PFN_UP(ei->addr), + PFN_DOWN(paddr)); if (paddr >= (end_pfn << PAGE_SHIFT)) break; diff --git a/include/linux/suspend.h b/include/linux/suspend.h index a45b9f514492..3cc4d6394c07 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -24,63 +24,41 @@ struct pbe { extern void drain_local_pages(void); extern void mark_free_pages(struct zone *zone); -#ifdef CONFIG_PM -/* kernel/power/swsusp.c */ -extern int software_suspend(void); - -#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE) +#if defined(CONFIG_PM) && defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE) extern int pm_prepare_console(void); extern void pm_restore_console(void); #else static inline int pm_prepare_console(void) { return 0; } static inline void pm_restore_console(void) {} -#endif /* defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE) */ +#endif + +#if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) +/* kernel/power/swsusp.c */ +extern int software_suspend(void); +/* kernel/power/snapshot.c */ +extern void __init register_nosave_region(unsigned long, unsigned long); +extern int swsusp_page_is_forbidden(struct page *); +extern void swsusp_set_page_free(struct page *); +extern void swsusp_unset_page_free(struct page *); +extern unsigned long get_safe_page(gfp_t gfp_mask); #else static inline int software_suspend(void) { printk("Warning: fake suspend called\n"); return -ENOSYS; } -#endif /* CONFIG_PM */ + +static inline void register_nosave_region(unsigned long b, unsigned long e) {} +static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } +static inline void swsusp_set_page_free(struct page *p) {} +static inline void swsusp_unset_page_free(struct page *p) {} +#endif /* defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) */ void save_processor_state(void); void restore_processor_state(void); struct saved_context; void __save_processor_state(struct saved_context *ctxt); void __restore_processor_state(struct saved_context *ctxt); -unsigned long get_safe_page(gfp_t gfp_mask); - -/* Page management functions for the software suspend (swsusp) */ - -static inline void swsusp_set_page_forbidden(struct page *page) -{ - SetPageNosave(page); -} - -static inline int swsusp_page_is_forbidden(struct page *page) -{ - return PageNosave(page); -} - -static inline void swsusp_unset_page_forbidden(struct page *page) -{ - ClearPageNosave(page); -} - -static inline void swsusp_set_page_free(struct page *page) -{ - SetPageNosaveFree(page); -} - -static inline int swsusp_page_is_free(struct page *page) -{ - return PageNosaveFree(page); -} - -static inline void swsusp_unset_page_free(struct page *page) -{ - ClearPageNosaveFree(page); -} /* * XXX: We try to keep some more pages free so that I/O operations succeed diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 8df51c23bba4..403bc3722fee 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -139,14 +139,19 @@ int pm_suspend_disk(void) mdelay(5000); goto Thaw; } + /* Allocate memory management structures */ + error = create_basic_memory_bitmaps(); + if (error) + goto Thaw; + /* Free memory before shutting down devices. */ error = swsusp_shrink_memory(); if (error) - goto Thaw; + goto Finish; error = platform_prepare(); if (error) - goto Thaw; + goto Finish; suspend_console(); error = device_suspend(PMSG_FREEZE); @@ -181,7 +186,7 @@ int pm_suspend_disk(void) power_down(); else { swsusp_free(); - goto Thaw; + goto Finish; } } else { pr_debug("PM: Image restored successfully.\n"); @@ -194,6 +199,8 @@ int pm_suspend_disk(void) platform_finish(); device_resume(); resume_console(); + Finish: + free_basic_memory_bitmaps(); Thaw: unprepare_processes(); return error; @@ -239,13 +246,15 @@ static int software_resume(void) } pr_debug("PM: Checking swsusp image.\n"); - error = swsusp_check(); if (error) - goto Done; + goto Unlock; - pr_debug("PM: Preparing processes for restore.\n"); + error = create_basic_memory_bitmaps(); + if (error) + goto Unlock; + pr_debug("PM: Preparing processes for restore.\n"); error = prepare_processes(); if (error) { swsusp_close(); @@ -280,7 +289,9 @@ static int software_resume(void) printk(KERN_ERR "PM: Restore failed, recovering.\n"); unprepare_processes(); Done: + free_basic_memory_bitmaps(); /* For success case, the suspend path will release the lock */ + Unlock: mutex_unlock(&pm_mutex); pr_debug("PM: Resume from disk failed.\n"); return 0; diff --git a/kernel/power/main.c b/kernel/power/main.c index b21c2a56f960..5a779270cdbc 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -244,6 +244,7 @@ static int enter_state(suspend_state_t state) return error; } +#ifdef CONFIG_SOFTWARE_SUSPEND /* * This is main interface to the outside world. It needs to be * called from process context. @@ -252,6 +253,7 @@ int software_suspend(void) { return enter_state(PM_SUSPEND_DISK); } +#endif /** diff --git a/kernel/power/power.h b/kernel/power/power.h index 33bd94ceba32..1f8052bda0f7 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -48,6 +48,8 @@ extern sector_t swsusp_resume_block; extern asmlinkage int swsusp_arch_suspend(void); extern asmlinkage int swsusp_arch_resume(void); +extern int create_basic_memory_bitmaps(void); +extern void free_basic_memory_bitmaps(void); extern unsigned int count_data_pages(void); /** diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 48fc7a35571b..f66e4411795b 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -34,6 +35,10 @@ #include "power.h" +static int swsusp_page_is_free(struct page *); +static void swsusp_set_page_forbidden(struct page *); +static void swsusp_unset_page_forbidden(struct page *); + /* List of PBEs needed for restoring the pages that were allocated before * the suspend and included in the suspend image, but have also been * allocated by the "resume" kernel, so their contents cannot be written @@ -224,11 +229,6 @@ static void chain_free(struct chain_allocator *ca, int clear_page_nosave) * of type unsigned long each). It also contains the pfns that * correspond to the start and end of the represented memory area and * the number of bit chunks in the block. - * - * NOTE: Memory bitmaps are used for two types of operations only: - * "set a bit" and "find the next bit set". Moreover, the searching - * is always carried out after all of the "set a bit" operations - * on given bitmap. */ #define BM_END_OF_MAP (~0UL) @@ -443,15 +443,13 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) } /** - * memory_bm_set_bit - set the bit in the bitmap @bm that corresponds + * memory_bm_find_bit - find the bit in the bitmap @bm that corresponds * to given pfn. The cur_zone_bm member of @bm and the cur_block member * of @bm->cur_zone_bm are updated. - * - * If the bit cannot be set, the function returns -EINVAL . */ -static int -memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) +static void memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, + void **addr, unsigned int *bit_nr) { struct zone_bitmap *zone_bm; struct bm_block *bb; @@ -463,8 +461,8 @@ memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) /* We don't assume that the zones are sorted by pfns */ while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) { zone_bm = zone_bm->next; - if (unlikely(!zone_bm)) - return -EINVAL; + + BUG_ON(!zone_bm); } bm->cur.zone_bm = zone_bm; } @@ -475,13 +473,40 @@ memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) while (pfn >= bb->end_pfn) { bb = bb->next; - if (unlikely(!bb)) - return -EINVAL; + + BUG_ON(!bb); } zone_bm->cur_block = bb; pfn -= bb->start_pfn; - set_bit(pfn % BM_BITS_PER_CHUNK, bb->data + pfn / BM_BITS_PER_CHUNK); - return 0; + *bit_nr = pfn % BM_BITS_PER_CHUNK; + *addr = bb->data + pfn / BM_BITS_PER_CHUNK; +} + +static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) +{ + void *addr; + unsigned int bit; + + memory_bm_find_bit(bm, pfn, &addr, &bit); + set_bit(bit, addr); +} + +static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) +{ + void *addr; + unsigned int bit; + + memory_bm_find_bit(bm, pfn, &addr, &bit); + clear_bit(bit, addr); +} + +static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) +{ + void *addr; + unsigned int bit; + + memory_bm_find_bit(bm, pfn, &addr, &bit); + return test_bit(bit, addr); } /* Two auxiliary functions for memory_bm_next_pfn */ @@ -563,6 +588,199 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) return bb->start_pfn + chunk * BM_BITS_PER_CHUNK + bit; } +/** + * This structure represents a range of page frames the contents of which + * should not be saved during the suspend. + */ + +struct nosave_region { + struct list_head list; + unsigned long start_pfn; + unsigned long end_pfn; +}; + +static LIST_HEAD(nosave_regions); + +/** + * register_nosave_region - register a range of page frames the contents + * of which should not be saved during the suspend (to be used in the early + * initialization code) + */ + +void __init +register_nosave_region(unsigned long start_pfn, unsigned long end_pfn) +{ + struct nosave_region *region; + + if (start_pfn >= end_pfn) + return; + + if (!list_empty(&nosave_regions)) { + /* Try to extend the previous region (they should be sorted) */ + region = list_entry(nosave_regions.prev, + struct nosave_region, list); + if (region->end_pfn == start_pfn) { + region->end_pfn = end_pfn; + goto Report; + } + } + /* This allocation cannot fail */ + region = alloc_bootmem_low(sizeof(struct nosave_region)); + region->start_pfn = start_pfn; + region->end_pfn = end_pfn; + list_add_tail(®ion->list, &nosave_regions); + Report: + printk("swsusp: Registered nosave memory region: %016lx - %016lx\n", + start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); +} + +/* + * Set bits in this map correspond to the page frames the contents of which + * should not be saved during the suspend. + */ +static struct memory_bitmap *forbidden_pages_map; + +/* Set bits in this map correspond to free page frames. */ +static struct memory_bitmap *free_pages_map; + +/* + * Each page frame allocated for creating the image is marked by setting the + * corresponding bits in forbidden_pages_map and free_pages_map simultaneously + */ + +void swsusp_set_page_free(struct page *page) +{ + if (free_pages_map) + memory_bm_set_bit(free_pages_map, page_to_pfn(page)); +} + +static int swsusp_page_is_free(struct page *page) +{ + return free_pages_map ? + memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0; +} + +void swsusp_unset_page_free(struct page *page) +{ + if (free_pages_map) + memory_bm_clear_bit(free_pages_map, page_to_pfn(page)); +} + +static void swsusp_set_page_forbidden(struct page *page) +{ + if (forbidden_pages_map) + memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page)); +} + +int swsusp_page_is_forbidden(struct page *page) +{ + return forbidden_pages_map ? + memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0; +} + +static void swsusp_unset_page_forbidden(struct page *page) +{ + if (forbidden_pages_map) + memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page)); +} + +/** + * mark_nosave_pages - set bits corresponding to the page frames the + * contents of which should not be saved in a given bitmap. + */ + +static void mark_nosave_pages(struct memory_bitmap *bm) +{ + struct nosave_region *region; + + if (list_empty(&nosave_regions)) + return; + + list_for_each_entry(region, &nosave_regions, list) { + unsigned long pfn; + + printk("swsusp: Marking nosave pages: %016lx - %016lx\n", + region->start_pfn << PAGE_SHIFT, + region->end_pfn << PAGE_SHIFT); + + for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) + memory_bm_set_bit(bm, pfn); + } +} + +/** + * create_basic_memory_bitmaps - create bitmaps needed for marking page + * frames that should not be saved and free page frames. The pointers + * forbidden_pages_map and free_pages_map are only modified if everything + * goes well, because we don't want the bits to be used before both bitmaps + * are set up. + */ + +int create_basic_memory_bitmaps(void) +{ + struct memory_bitmap *bm1, *bm2; + int error = 0; + + BUG_ON(forbidden_pages_map || free_pages_map); + + bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_ATOMIC); + if (!bm1) + return -ENOMEM; + + error = memory_bm_create(bm1, GFP_ATOMIC | __GFP_COLD, PG_ANY); + if (error) + goto Free_first_object; + + bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_ATOMIC); + if (!bm2) + goto Free_first_bitmap; + + error = memory_bm_create(bm2, GFP_ATOMIC | __GFP_COLD, PG_ANY); + if (error) + goto Free_second_object; + + forbidden_pages_map = bm1; + free_pages_map = bm2; + mark_nosave_pages(forbidden_pages_map); + + printk("swsusp: Basic memory bitmaps created\n"); + + return 0; + + Free_second_object: + kfree(bm2); + Free_first_bitmap: + memory_bm_free(bm1, PG_UNSAFE_CLEAR); + Free_first_object: + kfree(bm1); + return -ENOMEM; +} + +/** + * free_basic_memory_bitmaps - free memory bitmaps allocated by + * create_basic_memory_bitmaps(). The auxiliary pointers are necessary + * so that the bitmaps themselves are not referred to while they are being + * freed. + */ + +void free_basic_memory_bitmaps(void) +{ + struct memory_bitmap *bm1, *bm2; + + BUG_ON(!(forbidden_pages_map && free_pages_map)); + + bm1 = forbidden_pages_map; + bm2 = free_pages_map; + forbidden_pages_map = NULL; + free_pages_map = NULL; + memory_bm_free(bm1, PG_UNSAFE_CLEAR); + kfree(bm1); + memory_bm_free(bm2, PG_UNSAFE_CLEAR); + kfree(bm2); + + printk("swsusp: Basic memory bitmaps freed\n"); +} + /** * snapshot_additional_pages - estimate the number of additional pages * be needed for setting up the suspend image data structures for given diff --git a/kernel/power/user.c b/kernel/power/user.c index 7cf6713b2325..845acd84cb23 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -52,6 +52,9 @@ static int snapshot_open(struct inode *inode, struct file *filp) if ((filp->f_flags & O_ACCMODE) == O_RDWR) return -ENOSYS; + if(create_basic_memory_bitmaps()) + return -ENOMEM; + nonseekable_open(inode, filp); data = &snapshot_state; filp->private_data = data; @@ -77,6 +80,7 @@ static int snapshot_release(struct inode *inode, struct file *filp) struct snapshot_data *data; swsusp_free(); + free_basic_memory_bitmaps(); data = filp->private_data; free_all_swap_pages(data->swap, data->bitmap); free_bitmap(data->bitmap); -- cgit 1.4.1 From 1525a2ad76f991eba9755f75c9b6d4d97abad25e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 6 May 2007 14:50:44 -0700 Subject: swsusp: fix error paths in snapshot_open We forget to increase device_available if there's an error in snapshot_open(), so the snapshot device cannot be open at all after snapshot_open() has returned an error. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/user.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/power/user.c b/kernel/power/user.c index 845acd84cb23..bd1771f7a64e 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -49,12 +49,14 @@ static int snapshot_open(struct inode *inode, struct file *filp) if (!atomic_add_unless(&device_available, -1, 0)) return -EBUSY; - if ((filp->f_flags & O_ACCMODE) == O_RDWR) + if ((filp->f_flags & O_ACCMODE) == O_RDWR) { + atomic_inc(&device_available); return -ENOSYS; - - if(create_basic_memory_bitmaps()) + } + if(create_basic_memory_bitmaps()) { + atomic_inc(&device_available); return -ENOMEM; - + } nonseekable_open(inode, filp); data = &snapshot_state; filp->private_data = data; -- cgit 1.4.1 From 0709db6072c2e799eba1aa61bd19e0d7f38aa2cd Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 6 May 2007 14:50:45 -0700 Subject: swsusp: use GFP_KERNEL for creating basic data structures Make swsusp call create_basic_memory_bitmaps() before processes are frozen, so that GFP_KERNEL allocations can be made in it. Additionally, ensure that the swsusp's userland interface won't be used while either pm_suspend_disk() or software_resume() is being executed. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/disk.c | 37 ++++++++++++++++++++++++++----------- kernel/power/power.h | 3 +++ kernel/power/snapshot.c | 8 ++++---- kernel/power/user.c | 10 +++++----- 4 files changed, 38 insertions(+), 20 deletions(-) (limited to 'kernel') diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 403bc3722fee..e518379b667a 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -130,28 +130,33 @@ int pm_suspend_disk(void) { int error; + /* The snapshot device should not be opened while we're running */ + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) + return -EBUSY; + + /* Allocate memory management structures */ + error = create_basic_memory_bitmaps(); + if (error) + goto Exit; + error = prepare_processes(); if (error) - return error; + goto Finish; if (pm_disk_mode == PM_DISK_TESTPROC) { printk("swsusp debug: Waiting for 5 seconds.\n"); mdelay(5000); goto Thaw; } - /* Allocate memory management structures */ - error = create_basic_memory_bitmaps(); - if (error) - goto Thaw; /* Free memory before shutting down devices. */ error = swsusp_shrink_memory(); if (error) - goto Finish; + goto Thaw; error = platform_prepare(); if (error) - goto Finish; + goto Thaw; suspend_console(); error = device_suspend(PMSG_FREEZE); @@ -186,7 +191,7 @@ int pm_suspend_disk(void) power_down(); else { swsusp_free(); - goto Finish; + goto Thaw; } } else { pr_debug("PM: Image restored successfully.\n"); @@ -199,10 +204,12 @@ int pm_suspend_disk(void) platform_finish(); device_resume(); resume_console(); - Finish: - free_basic_memory_bitmaps(); Thaw: unprepare_processes(); + Finish: + free_basic_memory_bitmaps(); + Exit: + atomic_inc(&snapshot_device_available); return error; } @@ -250,9 +257,15 @@ static int software_resume(void) if (error) goto Unlock; + /* The snapshot device should not be opened while we're running */ + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { + error = -EBUSY; + goto Unlock; + } + error = create_basic_memory_bitmaps(); if (error) - goto Unlock; + goto Finish; pr_debug("PM: Preparing processes for restore.\n"); error = prepare_processes(); @@ -290,6 +303,8 @@ static int software_resume(void) unprepare_processes(); Done: free_basic_memory_bitmaps(); + Finish: + atomic_inc(&snapshot_device_available); /* For success case, the suspend path will release the lock */ Unlock: mutex_unlock(&pm_mutex); diff --git a/kernel/power/power.h b/kernel/power/power.h index 1f8052bda0f7..a64d3f22de97 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -140,6 +140,9 @@ struct resume_swap_area { #define PMOPS_ENTER 2 #define PMOPS_FINISH 3 +/* If unset, the snapshot device cannot be open. */ +extern atomic_t snapshot_device_available; + /** * The bitmap is used for tracing allocated swap pages * diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index f66e4411795b..128da11f01c2 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -723,19 +723,19 @@ int create_basic_memory_bitmaps(void) BUG_ON(forbidden_pages_map || free_pages_map); - bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_ATOMIC); + bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); if (!bm1) return -ENOMEM; - error = memory_bm_create(bm1, GFP_ATOMIC | __GFP_COLD, PG_ANY); + error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY); if (error) goto Free_first_object; - bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_ATOMIC); + bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); if (!bm2) goto Free_first_bitmap; - error = memory_bm_create(bm2, GFP_ATOMIC | __GFP_COLD, PG_ANY); + error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY); if (error) goto Free_second_object; diff --git a/kernel/power/user.c b/kernel/power/user.c index bd1771f7a64e..72dbfd01408e 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -40,21 +40,21 @@ static struct snapshot_data { char platform_suspend; } snapshot_state; -static atomic_t device_available = ATOMIC_INIT(1); +atomic_t snapshot_device_available = ATOMIC_INIT(1); static int snapshot_open(struct inode *inode, struct file *filp) { struct snapshot_data *data; - if (!atomic_add_unless(&device_available, -1, 0)) + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) return -EBUSY; if ((filp->f_flags & O_ACCMODE) == O_RDWR) { - atomic_inc(&device_available); + atomic_inc(&snapshot_device_available); return -ENOSYS; } if(create_basic_memory_bitmaps()) { - atomic_inc(&device_available); + atomic_inc(&snapshot_device_available); return -ENOMEM; } nonseekable_open(inode, filp); @@ -92,7 +92,7 @@ static int snapshot_release(struct inode *inode, struct file *filp) enable_nonboot_cpus(); mutex_unlock(&pm_mutex); } - atomic_inc(&device_available); + atomic_inc(&snapshot_device_available); return 0; } -- cgit 1.4.1 From d1d241cc2c5feec057c370aa71637380b1b945d5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 6 May 2007 14:50:47 -0700 Subject: swsusp: use rbtree for tracking allocated swap Make swsusp use extents instead of a bitmap to trace swap pages allocated for saving the image (the tracking is only needed in case there's an error, so that the allocated swap pages can be released). This should allow us to reduce the memory usage, practically always, and improve performance. Signed-off-by: Rafael J. Wysocki Cc: Nigel Cunningham Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/power.h | 27 ++-------- kernel/power/swap.c | 18 ++----- kernel/power/swsusp.c | 137 +++++++++++++++++++++++++++----------------------- kernel/power/user.c | 22 ++------ 4 files changed, 86 insertions(+), 118 deletions(-) (limited to 'kernel') diff --git a/kernel/power/power.h b/kernel/power/power.h index a64d3f22de97..a3e47cbdaf31 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -143,30 +143,9 @@ struct resume_swap_area { /* If unset, the snapshot device cannot be open. */ extern atomic_t snapshot_device_available; -/** - * The bitmap is used for tracing allocated swap pages - * - * The entire bitmap consists of a number of bitmap_page - * structures linked with the help of the .next member. - * Thus each page can be allocated individually, so we only - * need to make 0-order memory allocations to create - * the bitmap. - */ - -#define BITMAP_PAGE_SIZE (PAGE_SIZE - sizeof(void *)) -#define BITMAP_PAGE_CHUNKS (BITMAP_PAGE_SIZE / sizeof(long)) -#define BITS_PER_CHUNK (sizeof(long) * 8) -#define BITMAP_PAGE_BITS (BITMAP_PAGE_CHUNKS * BITS_PER_CHUNK) - -struct bitmap_page { - unsigned long chunks[BITMAP_PAGE_CHUNKS]; - struct bitmap_page *next; -}; - -extern void free_bitmap(struct bitmap_page *bitmap); -extern struct bitmap_page *alloc_bitmap(unsigned int nr_bits); -extern sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap); -extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap); +extern sector_t alloc_swapdev_block(int swap); +extern void free_all_swap_pages(int swap); +extern int swsusp_swap_in_use(void); extern int swsusp_check(void); extern int swsusp_shrink_memory(void); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index b18c155cbb60..e83ed9945a80 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -243,7 +243,6 @@ struct swap_map_page { struct swap_map_handle { struct swap_map_page *cur; sector_t cur_swap; - struct bitmap_page *bitmap; unsigned int k; }; @@ -252,9 +251,6 @@ static void release_swap_writer(struct swap_map_handle *handle) if (handle->cur) free_page((unsigned long)handle->cur); handle->cur = NULL; - if (handle->bitmap) - free_bitmap(handle->bitmap); - handle->bitmap = NULL; } static int get_swap_writer(struct swap_map_handle *handle) @@ -262,12 +258,7 @@ static int get_swap_writer(struct swap_map_handle *handle) handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL); if (!handle->cur) return -ENOMEM; - handle->bitmap = alloc_bitmap(count_swap_pages(root_swap, 0)); - if (!handle->bitmap) { - release_swap_writer(handle); - return -ENOMEM; - } - handle->cur_swap = alloc_swapdev_block(root_swap, handle->bitmap); + handle->cur_swap = alloc_swapdev_block(root_swap); if (!handle->cur_swap) { release_swap_writer(handle); return -ENOSPC; @@ -284,7 +275,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, if (!handle->cur) return -EINVAL; - offset = alloc_swapdev_block(root_swap, handle->bitmap); + offset = alloc_swapdev_block(root_swap); error = write_page(buf, offset, bio_chain); if (error) return error; @@ -293,7 +284,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, error = wait_on_bio_chain(bio_chain); if (error) goto out; - offset = alloc_swapdev_block(root_swap, handle->bitmap); + offset = alloc_swapdev_block(root_swap); if (!offset) return -ENOSPC; handle->cur->next_swap = offset; @@ -430,7 +421,8 @@ int swsusp_write(void) } } if (error) - free_all_swap_pages(root_swap, handle.bitmap); + free_all_swap_pages(root_swap); + release_swap_writer(&handle); out: swsusp_close(); diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 175370824f37..1109023d8358 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "power.h" @@ -74,72 +75,69 @@ static inline unsigned int count_highmem_pages(void) { return 0; } /** * The following functions are used for tracing the allocated * swap pages, so that they can be freed in case of an error. - * - * The functions operate on a linked bitmap structure defined - * in power.h */ -void free_bitmap(struct bitmap_page *bitmap) -{ - struct bitmap_page *bp; +struct swsusp_extent { + struct rb_node node; + unsigned long start; + unsigned long end; +}; - while (bitmap) { - bp = bitmap->next; - free_page((unsigned long)bitmap); - bitmap = bp; - } -} +static struct rb_root swsusp_extents = RB_ROOT; -struct bitmap_page *alloc_bitmap(unsigned int nr_bits) +static int swsusp_extents_insert(unsigned long swap_offset) { - struct bitmap_page *bitmap, *bp; - unsigned int n; - - if (!nr_bits) - return NULL; - - bitmap = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL); - bp = bitmap; - for (n = BITMAP_PAGE_BITS; n < nr_bits; n += BITMAP_PAGE_BITS) { - bp->next = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL); - bp = bp->next; - if (!bp) { - free_bitmap(bitmap); - return NULL; + struct rb_node **new = &(swsusp_extents.rb_node); + struct rb_node *parent = NULL; + struct swsusp_extent *ext; + + /* Figure out where to put the new node */ + while (*new) { + ext = container_of(*new, struct swsusp_extent, node); + parent = *new; + if (swap_offset < ext->start) { + /* Try to merge */ + if (swap_offset == ext->start - 1) { + ext->start--; + return 0; + } + new = &((*new)->rb_left); + } else if (swap_offset > ext->end) { + /* Try to merge */ + if (swap_offset == ext->end + 1) { + ext->end++; + return 0; + } + new = &((*new)->rb_right); + } else { + /* It already is in the tree */ + return -EINVAL; } } - return bitmap; -} - -static int bitmap_set(struct bitmap_page *bitmap, unsigned long bit) -{ - unsigned int n; - - n = BITMAP_PAGE_BITS; - while (bitmap && n <= bit) { - n += BITMAP_PAGE_BITS; - bitmap = bitmap->next; - } - if (!bitmap) - return -EINVAL; - n -= BITMAP_PAGE_BITS; - bit -= n; - n = 0; - while (bit >= BITS_PER_CHUNK) { - bit -= BITS_PER_CHUNK; - n++; - } - bitmap->chunks[n] |= (1UL << bit); + /* Add the new node and rebalance the tree. */ + ext = kzalloc(sizeof(struct swsusp_extent), GFP_KERNEL); + if (!ext) + return -ENOMEM; + + ext->start = swap_offset; + ext->end = swap_offset; + rb_link_node(&ext->node, parent, new); + rb_insert_color(&ext->node, &swsusp_extents); return 0; } -sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap) +/** + * alloc_swapdev_block - allocate a swap page and register that it has + * been allocated, so that it can be freed in case of an error. + */ + +sector_t alloc_swapdev_block(int swap) { unsigned long offset; offset = swp_offset(get_swap_page_of_type(swap)); if (offset) { - if (bitmap_set(bitmap, offset)) + if (swsusp_extents_insert(offset)) swap_free(swp_entry(swap, offset)); else return swapdev_block(swap, offset); @@ -147,23 +145,34 @@ sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap) return 0; } -void free_all_swap_pages(int swap, struct bitmap_page *bitmap) +/** + * free_all_swap_pages - free swap pages allocated for saving image data. + * It also frees the extents used to register which swap entres had been + * allocated. + */ + +void free_all_swap_pages(int swap) { - unsigned int bit, n; - unsigned long test; - - bit = 0; - while (bitmap) { - for (n = 0; n < BITMAP_PAGE_CHUNKS; n++) - for (test = 1UL; test; test <<= 1) { - if (bitmap->chunks[n] & test) - swap_free(swp_entry(swap, bit)); - bit++; - } - bitmap = bitmap->next; + struct rb_node *node; + + while ((node = swsusp_extents.rb_node)) { + struct swsusp_extent *ext; + unsigned long offset; + + ext = container_of(node, struct swsusp_extent, node); + rb_erase(node, &swsusp_extents); + for (offset = ext->start; offset <= ext->end; offset++) + swap_free(swp_entry(swap, offset)); + + kfree(ext); } } +int swsusp_swap_in_use(void) +{ + return (swsusp_extents.rb_node != NULL); +} + /** * swsusp_show_speed - print the time elapsed between two events represented by * @start and @stop diff --git a/kernel/power/user.c b/kernel/power/user.c index 72dbfd01408e..ad4e10208cde 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -33,7 +33,6 @@ static struct snapshot_data { struct snapshot_handle handle; int swap; - struct bitmap_page *bitmap; int mode; char frozen; char ready; @@ -69,7 +68,6 @@ static int snapshot_open(struct inode *inode, struct file *filp) data->swap = -1; data->mode = O_WRONLY; } - data->bitmap = NULL; data->frozen = 0; data->ready = 0; data->platform_suspend = 0; @@ -84,8 +82,7 @@ static int snapshot_release(struct inode *inode, struct file *filp) swsusp_free(); free_basic_memory_bitmaps(); data = filp->private_data; - free_all_swap_pages(data->swap, data->bitmap); - free_bitmap(data->bitmap); + free_all_swap_pages(data->swap); if (data->frozen) { mutex_lock(&pm_mutex); thaw_processes(); @@ -300,14 +297,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = -ENODEV; break; } - if (!data->bitmap) { - data->bitmap = alloc_bitmap(count_swap_pages(data->swap, 0)); - if (!data->bitmap) { - error = -ENOMEM; - break; - } - } - offset = alloc_swapdev_block(data->swap, data->bitmap); + offset = alloc_swapdev_block(data->swap); if (offset) { offset <<= PAGE_SHIFT; error = put_user(offset, (sector_t __user *)arg); @@ -321,13 +311,11 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = -ENODEV; break; } - free_all_swap_pages(data->swap, data->bitmap); - free_bitmap(data->bitmap); - data->bitmap = NULL; + free_all_swap_pages(data->swap); break; case SNAPSHOT_SET_SWAP_FILE: - if (!data->bitmap) { + if (!swsusp_swap_in_use()) { /* * User space encodes device types as two-byte values, * so we need to recode them @@ -426,7 +414,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, break; case SNAPSHOT_SET_SWAP_AREA: - if (data->bitmap) { + if (swsusp_swap_in_use()) { error = -EPERM; } else { struct resume_swap_area swap_area; -- cgit 1.4.1 From ab3bfca7abf3fd0fe41d26d839610a787aa7e587 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 6 May 2007 14:50:49 -0700 Subject: remove software_suspend() Remove software_suspend() and all its users since pm_suspend(PM_SUSPEND_DISK) should be equivalent and there's no point in having two interfaces for the same thing. The patch also changes the valid_state function to return 0 (false) for PM_SUSPEND_DISK when SOFTWARE_SUSPEND is not configured instead of accepting it and having the whole thing fail later. Signed-off-by: Johannes Berg Acked-by: "Rafael J. Wysocki" Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/acpi/sleep/proc.c | 2 +- drivers/i2c/chips/tps65010.c | 4 +++- include/linux/init.h | 2 +- include/linux/suspend.h | 8 -------- kernel/power/main.c | 21 +++++++-------------- kernel/sys.c | 2 +- 6 files changed, 13 insertions(+), 26 deletions(-) (limited to 'kernel') diff --git a/drivers/acpi/sleep/proc.c b/drivers/acpi/sleep/proc.c index 2d912b71e543..dcde9ddd105a 100644 --- a/drivers/acpi/sleep/proc.c +++ b/drivers/acpi/sleep/proc.c @@ -60,7 +60,7 @@ acpi_system_write_sleep(struct file *file, state = simple_strtoul(str, NULL, 0); #ifdef CONFIG_SOFTWARE_SUSPEND if (state == 4) { - error = software_suspend(); + error = pm_suspend(PM_SUSPEND_DISK); goto Done; } #endif diff --git a/drivers/i2c/chips/tps65010.c b/drivers/i2c/chips/tps65010.c index 214fbb1423c5..7ed92dc3d833 100644 --- a/drivers/i2c/chips/tps65010.c +++ b/drivers/i2c/chips/tps65010.c @@ -351,8 +351,10 @@ static void tps65010_interrupt(struct tps65010 *tps) #if 0 /* REVISIT: this might need its own workqueue * plus tweaks including deadlock avoidance ... + * also needs to get error handling and probably + * an #ifdef CONFIG_SOFTWARE_SUSPEND */ - software_suspend(); + pm_suspend(PM_SUSPEND_DISK); #endif poll = 1; } diff --git a/include/linux/init.h b/include/linux/init.h index 9abf120ec9f8..dbbdbd1bec77 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -233,7 +233,7 @@ void __init parse_early_param(void); #define __obsolete_setup(str) /* nothing */ #endif -/* Data marked not to be saved by software_suspend() */ +/* Data marked not to be saved by software suspend */ #define __nosavedata __attribute__ ((__section__ (".data.nosave"))) /* This means "can be init if no module support, otherwise module load diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 3cc4d6394c07..3aecc96acc76 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -33,8 +33,6 @@ static inline void pm_restore_console(void) {} #endif #if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) -/* kernel/power/swsusp.c */ -extern int software_suspend(void); /* kernel/power/snapshot.c */ extern void __init register_nosave_region(unsigned long, unsigned long); extern int swsusp_page_is_forbidden(struct page *); @@ -42,12 +40,6 @@ extern void swsusp_set_page_free(struct page *); extern void swsusp_unset_page_free(struct page *); extern unsigned long get_safe_page(gfp_t gfp_mask); #else -static inline int software_suspend(void) -{ - printk("Warning: fake suspend called\n"); - return -ENOSYS; -} - static inline void register_nosave_region(unsigned long b, unsigned long e) {} static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } static inline void swsusp_set_page_free(struct page *p) {} diff --git a/kernel/power/main.c b/kernel/power/main.c index 5a779270cdbc..f6dda685e7e2 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -184,17 +184,21 @@ static void suspend_finish(suspend_state_t state) static const char * const pm_states[PM_SUSPEND_MAX] = { [PM_SUSPEND_STANDBY] = "standby", [PM_SUSPEND_MEM] = "mem", -#ifdef CONFIG_SOFTWARE_SUSPEND [PM_SUSPEND_DISK] = "disk", -#endif }; static inline int valid_state(suspend_state_t state) { /* Suspend-to-disk does not really need low-level support. - * It can work with reboot if needed. */ + * It can work with shutdown/reboot if needed. If it isn't + * configured, then it cannot be supported. + */ if (state == PM_SUSPEND_DISK) +#ifdef CONFIG_SOFTWARE_SUSPEND return 1; +#else + return 0; +#endif /* all other states need lowlevel support and need to be * valid to the lowlevel implementation, no valid callback @@ -244,17 +248,6 @@ static int enter_state(suspend_state_t state) return error; } -#ifdef CONFIG_SOFTWARE_SUSPEND -/* - * This is main interface to the outside world. It needs to be - * called from process context. - */ -int software_suspend(void) -{ - return enter_state(PM_SUSPEND_DISK); -} -#endif - /** * pm_suspend - Externally visible function for suspending system. diff --git a/kernel/sys.c b/kernel/sys.c index 123b165080e6..fe1f3ab20477 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -881,7 +881,7 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user #ifdef CONFIG_SOFTWARE_SUSPEND case LINUX_REBOOT_CMD_SW_SUSPEND: { - int ret = software_suspend(); + int ret = pm_suspend(PM_SUSPEND_DISK); unlock_kernel(); return ret; } -- cgit 1.4.1 From f0ced9b229cfbc76b5db9837b4b256b602d56610 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 6 May 2007 14:50:50 -0700 Subject: power management: change /sys/power/disk display Change /sys/power/disk to display all valid modes as well as the currently selected one in a fashion known from the LED subsystem. This changes userspace API, but it is apparently not used much (we asked some userspace developers) Signed-off-by: Johannes Berg Acked-by: "Rafael J. Wysocki" Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/power/interface.txt | 8 ++++++-- kernel/power/disk.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt index 8c5b41bf3f36..fd5192a8fa8a 100644 --- a/Documentation/power/interface.txt +++ b/Documentation/power/interface.txt @@ -34,8 +34,12 @@ for 5 seconds, resume devices, unfreeze tasks and enable nonboot CPUs. Then, we are able to look in the log messages and work out, for example, which code is being slow and which device drivers are misbehaving. -Reading from this file will display what the mode is currently set -to. Writing to this file will accept one of +Reading from this file will display all supported modes and the currently +selected one in brackets, for example + + [shutdown] reboot test testproc + +Writing to this file will accept one of 'platform' (only if the platform supports it) 'shutdown' diff --git a/kernel/power/disk.c b/kernel/power/disk.c index e518379b667a..06331374d862 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -350,7 +350,34 @@ static const char * const pm_disk_modes[] = { static ssize_t disk_show(struct kset *kset, char *buf) { - return sprintf(buf, "%s\n", pm_disk_modes[pm_disk_mode]); + int i; + char *start = buf; + + for (i = PM_DISK_PLATFORM; i < PM_DISK_MAX; i++) { + if (!pm_disk_modes[i]) + continue; + switch (i) { + case PM_DISK_SHUTDOWN: + case PM_DISK_REBOOT: + case PM_DISK_TEST: + case PM_DISK_TESTPROC: + break; + default: + if (pm_ops && pm_ops->enter && + (i == pm_ops->pm_disk_mode)) + break; + /* not a valid mode, continue with loop */ + continue; + } + if (i == pm_disk_mode) + buf += sprintf(buf, "[%s]", pm_disk_modes[i]); + else + buf += sprintf(buf, "%s", pm_disk_modes[i]); + if (i+1 != PM_DISK_MAX) + buf += sprintf(buf, " "); + } + buf += sprintf(buf, "\n"); + return buf-start; } -- cgit 1.4.1 From a7ee2e5f5b4c9c72f4390c60ba7ea30306f47188 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Sun, 6 May 2007 14:50:50 -0700 Subject: kconfig: mention 'hibernation' not just swsusp Clarify that "software suspend" is what's called "hibernation" in most user interfaces, shrinking a terminology gap. (Examples include Gnome and MS-Windows.) Also provide a more succinct description of what it does, so you won't have to read the whole novel in Kconfig; and highlights just why the lack of BIOS requirements for swsusp are a big deal. Signed-off-by: David Brownell Acked-by: "Rafael J. Wysocki" Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/Kconfig | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 51a4dd0f1b74..877721708fa4 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -78,17 +78,22 @@ config PM_SYSFS_DEPRECATED are likely to be bus or driver specific. config SOFTWARE_SUSPEND - bool "Software Suspend" + bool "Software Suspend (Hibernation)" depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)) ---help--- - Enable the suspend to disk (STD) functionality. + Enable the suspend to disk (STD) functionality, which is usually + called "hibernation" in user interfaces. STD checkpoints the + system and powers it off; and restores that checkpoint on reboot. You can suspend your machine with 'echo disk > /sys/power/state'. Alternatively, you can use the additional userland tools available from . In principle it does not require ACPI or APM, although for example - ACPI will be used if available. + ACPI will be used for the final steps when it is available. One + of the reasons to use software suspend is that the firmware hooks + for suspend states like suspend-to-RAM (STR) often don't work very + well with Linux. It creates an image which is saved in your active swap. Upon the next boot, pass the 'resume=/dev/swappartition' argument to the kernel to -- cgit 1.4.1 From 9b95e43763cfdfebc1318d27e55712e7b6bfe098 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 6 May 2007 14:50:51 -0700 Subject: swsusp: fix snapshot_release Remove the leftover enable_nonboot_cpus() from snapshot_release(). Signed-off-by: Rafael J. Wysocki Cc: Nigel Cunningham Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/user.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel') diff --git a/kernel/power/user.c b/kernel/power/user.c index ad4e10208cde..040560d9c312 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -86,7 +86,6 @@ static int snapshot_release(struct inode *inode, struct file *filp) if (data->frozen) { mutex_lock(&pm_mutex); thaw_processes(); - enable_nonboot_cpus(); mutex_unlock(&pm_mutex); } atomic_inc(&snapshot_device_available); -- cgit 1.4.1 From 56f99bcb52d64d70078b41cc176dd8b6f5763108 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 6 May 2007 14:50:52 -0700 Subject: swsusp: free more memory Move the definition of PAGES_FOR_IO to kernel/power/power.h and introduce SPARE_PAGES representing the number of pages that should be freed by the swsusp's memory shrinker in addition to PAGES_FOR_IO so that device drivers can allocate some memory (up to 1 MB total) in their .suspend() routines without causing the suspend to fail. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Cc: Nigel Cunningham Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/suspend.h | 6 ------ kernel/power/power.h | 12 +++++++++++- kernel/power/swsusp.c | 2 +- 3 files changed, 12 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 3aecc96acc76..96868be9c211 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -52,10 +52,4 @@ struct saved_context; void __save_processor_state(struct saved_context *ctxt); void __restore_processor_state(struct saved_context *ctxt); -/* - * XXX: We try to keep some more pages free so that I/O operations succeed - * without paging. Might this be more? - */ -#define PAGES_FOR_IO 1024 - #endif /* _LINUX_SWSUSP_H */ diff --git a/kernel/power/power.h b/kernel/power/power.h index a3e47cbdaf31..34b43542785a 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -14,8 +14,18 @@ struct swsusp_info { #ifdef CONFIG_SOFTWARE_SUSPEND -extern int pm_suspend_disk(void); +/* + * Keep some memory free so that I/O operations can succeed without paging + * [Might this be more than 4 MB?] + */ +#define PAGES_FOR_IO ((4096 * 1024) >> PAGE_SHIFT) +/* + * Keep 1 MB of memory free so that device drivers can allocate some pages in + * their .suspend() routines without breaking the suspend to disk. + */ +#define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT) +extern int pm_suspend_disk(void); #else static inline int pm_suspend_disk(void) { diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 1109023d8358..5da304c8f1f6 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -233,7 +233,7 @@ int swsusp_shrink_memory(void) long size, highmem_size; highmem_size = count_highmem_pages(); - size = count_data_pages() + PAGES_FOR_IO; + size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES; tmp = size; size += highmem_size; for_each_zone (zone) -- cgit 1.4.1