summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/setup_percpu.c23
-rw-r--r--include/linux/cpumask.h26
-rw-r--r--kernel/cpu.c128
3 files changed, 43 insertions, 134 deletions
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 1cd53dfcd309..76e305e064f9 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -80,26 +80,6 @@ static void __init setup_per_cpu_maps(void)
 #endif
 }
 
-#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
-/*
- * Replace static cpumask_of_cpu_map in the initdata section,
- * with one that's allocated sized by the possible number of cpus.
- *
- * (requires nr_cpu_ids to be initialized)
- */
-static void __init setup_cpumask_of_cpu(void)
-{
-	int i;
-
-	/* alloc_bootmem zeroes memory */
-	cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
-	for (i = 0; i < nr_cpu_ids; i++)
-		cpu_set(i, cpumask_of_cpu_map[i]);
-}
-#else
-static inline void setup_cpumask_of_cpu(void) { }
-#endif
-
 #ifdef CONFIG_X86_32
 /*
  * Great future not-so-futuristic plan: make i386 and x86_64 do it
@@ -199,9 +179,6 @@ void __init setup_per_cpu_areas(void)
 
 	/* Setup node to cpumask map */
 	setup_node_to_cpumask_map();
-
-	/* Setup cpumask_of_cpu map */
-	setup_cpumask_of_cpu();
 }
 
 #endif
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 8fa3b6d4a320..96d0509fb8d8 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -265,10 +265,30 @@ static inline void __cpus_shift_left(cpumask_t *dstp,
 	bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
 }
 
+/*
+ * Special-case data structure for "single bit set only" constant CPU masks.
+ *
+ * We pre-generate all the 64 (or 32) possible bit positions, with enough
+ * padding to the left and the right, and return the constant pointer
+ * appropriately offset.
+ */
+extern const unsigned long
+	cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)];
+
+static inline const cpumask_t *get_cpu_mask(unsigned int cpu)
+{
+	const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
+	p -= cpu / BITS_PER_LONG;
+	return (const cpumask_t *)p;
+}
+
+/*
+ * In cases where we take the address of the cpumask immediately,
+ * gcc optimizes it out (it's a constant) and there's no huge stack
+ * variable created:
+ */
+#define cpumask_of_cpu(cpu) ({ *get_cpu_mask(cpu); })
 
-/* cpumask_of_cpu_map[] is in kernel/cpu.c */
-extern const cpumask_t *cpumask_of_cpu_map;
-#define cpumask_of_cpu(cpu)	(cpumask_of_cpu_map[cpu])
 
 #define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
 
diff --git a/kernel/cpu.c b/kernel/cpu.c
index a35d8995dc8c..06a8358bb418 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -462,115 +462,27 @@ out:
 
 #endif /* CONFIG_SMP */
 
-/* 64 bits of zeros, for initializers. */
-#if BITS_PER_LONG == 32
-#define Z64 0, 0
-#else
-#define Z64 0
-#endif
+/*
+ * cpu_bit_bitmap[] is a special, "compressed" data structure that
+ * represents all NR_CPUS bits binary values of 1<<nr.
+ *
+ * It is used by cpumask_of_cpu() to get a constant address to a CPU
+ * mask value that has a single bit set only.
+ */
 
-/* Initializer macros. */
-#define CMI0(n) { .bits = { 1UL << (n) } }
-#define CMI(n, ...) { .bits = { __VA_ARGS__, 1UL << ((n) % BITS_PER_LONG) } }
-
-#define CMI8(n, ...)						\
-	CMI((n), __VA_ARGS__), CMI((n)+1, __VA_ARGS__),		\
-	CMI((n)+2, __VA_ARGS__), CMI((n)+3, __VA_ARGS__),	\
-	CMI((n)+4, __VA_ARGS__), CMI((n)+5, __VA_ARGS__),	\
-	CMI((n)+6, __VA_ARGS__), CMI((n)+7, __VA_ARGS__)
-
-#if BITS_PER_LONG == 32
-#define CMI64(n, ...)							\
-	CMI8((n), __VA_ARGS__), CMI8((n)+8, __VA_ARGS__),		\
-	CMI8((n)+16, __VA_ARGS__), CMI8((n)+24, __VA_ARGS__),		\
-	CMI8((n)+32, 0, __VA_ARGS__), CMI8((n)+40, 0, __VA_ARGS__),	\
-	CMI8((n)+48, 0, __VA_ARGS__), CMI8((n)+56, 0, __VA_ARGS__)
-#else
-#define CMI64(n, ...)							\
-	CMI8((n), __VA_ARGS__), CMI8((n)+8, __VA_ARGS__),		\
-	CMI8((n)+16, __VA_ARGS__), CMI8((n)+24, __VA_ARGS__),		\
-	CMI8((n)+32, __VA_ARGS__), CMI8((n)+40, __VA_ARGS__),	\
-	CMI8((n)+48, __VA_ARGS__), CMI8((n)+56, __VA_ARGS__)
-#endif
+/* cpu_bit_bitmap[0] is empty - so we can back into it */
+#define MASK_DECLARE_1(x)	[x+1][0] = 1UL << (x)
+#define MASK_DECLARE_2(x)	MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
+#define MASK_DECLARE_4(x)	MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
+#define MASK_DECLARE_8(x)	MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
 
-#define CMI256(n, ...)							\
-	CMI64((n), __VA_ARGS__), CMI64((n)+64, Z64, __VA_ARGS__),	\
-	CMI64((n)+128, Z64, Z64, __VA_ARGS__),				\
-	CMI64((n)+192, Z64, Z64, Z64, __VA_ARGS__)
-#define Z256 Z64, Z64, Z64, Z64
-
-#define CMI1024(n, ...)					\
-	CMI256((n), __VA_ARGS__),			\
-	CMI256((n)+256, Z256, __VA_ARGS__),		\
-	CMI256((n)+512, Z256, Z256, __VA_ARGS__),	\
-	CMI256((n)+768, Z256, Z256, Z256, __VA_ARGS__)
-#define Z1024 Z256, Z256, Z256, Z256
-
-/* We want this statically initialized, just to be safe.  We try not
- * to waste too much space, either. */
-static const cpumask_t cpumask_map[]
-#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
-__initdata
-#endif
-= {
-	CMI0(0), CMI0(1), CMI0(2), CMI0(3),
-#if NR_CPUS > 4
-	CMI0(4), CMI0(5), CMI0(6), CMI0(7),
-#endif
-#if NR_CPUS > 8
-	CMI0(8), CMI0(9), CMI0(10), CMI0(11),
-	CMI0(12), CMI0(13), CMI0(14), CMI0(15),
-#endif
-#if NR_CPUS > 16
-	CMI0(16), CMI0(17), CMI0(18), CMI0(19),
-	CMI0(20), CMI0(21), CMI0(22), CMI0(23),
-	CMI0(24), CMI0(25), CMI0(26), CMI0(27),
-	CMI0(28), CMI0(29), CMI0(30), CMI0(31),
-#endif
-#if NR_CPUS > 32
-#if BITS_PER_LONG == 32
-	CMI(32, 0), CMI(33, 0), CMI(34, 0), CMI(35, 0),
-	CMI(36, 0), CMI(37, 0), CMI(38, 0), CMI(39, 0),
-	CMI(40, 0), CMI(41, 0), CMI(42, 0), CMI(43, 0),
-	CMI(44, 0), CMI(45, 0), CMI(46, 0), CMI(47, 0),
-	CMI(48, 0), CMI(49, 0), CMI(50, 0), CMI(51, 0),
-	CMI(52, 0), CMI(53, 0), CMI(54, 0), CMI(55, 0),
-	CMI(56, 0), CMI(57, 0), CMI(58, 0), CMI(59, 0),
-	CMI(60, 0), CMI(61, 0), CMI(62, 0), CMI(63, 0),
-#else
-	CMI0(32), CMI0(33), CMI0(34), CMI0(35),
-	CMI0(36), CMI0(37), CMI0(38), CMI0(39),
-	CMI0(40), CMI0(41), CMI0(42), CMI0(43),
-	CMI0(44), CMI0(45), CMI0(46), CMI0(47),
-	CMI0(48), CMI0(49), CMI0(50), CMI0(51),
-	CMI0(52), CMI0(53), CMI0(54), CMI0(55),
-	CMI0(56), CMI0(57), CMI0(58), CMI0(59),
-	CMI0(60), CMI0(61), CMI0(62), CMI0(63),
-#endif /* BITS_PER_LONG == 64 */
-#endif
-#if NR_CPUS > 64
-	CMI64(64, Z64),
-#endif
-#if NR_CPUS > 128
-	CMI64(128, Z64, Z64), CMI64(192, Z64, Z64, Z64),
-#endif
-#if NR_CPUS > 256
-	CMI256(256, Z256),
-#endif
-#if NR_CPUS > 512
-	CMI256(512, Z256, Z256), CMI256(768, Z256, Z256, Z256),
-#endif
-#if NR_CPUS > 1024
-	CMI1024(1024, Z1024),
-#endif
-#if NR_CPUS > 2048
-	CMI1024(2048, Z1024, Z1024), CMI1024(3072, Z1024, Z1024, Z1024),
-#endif
-#if NR_CPUS > 4096
-#error NR_CPUS too big.  Fix initializers or set CONFIG_HAVE_CPUMASK_OF_CPU_MAP
+const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
+
+	MASK_DECLARE_8(0),	MASK_DECLARE_8(8),
+	MASK_DECLARE_8(16),	MASK_DECLARE_8(24),
+#if BITS_PER_LONG > 32
+	MASK_DECLARE_8(32),	MASK_DECLARE_8(40),
+	MASK_DECLARE_8(48),	MASK_DECLARE_8(56),
 #endif
 };
-
-const cpumask_t *cpumask_of_cpu_map = cpumask_map;
-
-EXPORT_SYMBOL_GPL(cpumask_of_cpu_map);
+EXPORT_SYMBOL_GPL(cpu_bit_bitmap);