17 files changed, 316 insertions, 172 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 1ee596cd942f..2d7f56a98e0f 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -87,9 +87,6 @@ config GENERIC_TIME_VSYSCALL
 	bool
 	default y
 
-config HAVE_LEGACY_PER_CPU_AREA
-	def_bool y
-
 config HAVE_SETUP_PER_CPU_AREA
 	def_bool y
 
diff --git a/arch/ia64/include/asm/meminit.h b/arch/ia64/include/asm/meminit.h
index 688a812c017d..61c7b1750b16 100644
--- a/arch/ia64/include/asm/meminit.h
+++ b/arch/ia64/include/asm/meminit.h
@@ -61,7 +61,7 @@ extern int register_active_ranges(u64 start, u64 len, int nid);
 
 #ifdef CONFIG_VIRTUAL_MEM_MAP
 # define LARGE_GAP	0x40000000 /* Use virtual mem map if hole is > than this */
-  extern unsigned long vmalloc_end;
+  extern unsigned long VMALLOC_END;
   extern struct page *vmem_map;
   extern int find_largest_hole(u64 start, u64 end, void *arg);
   extern int create_mem_map_page_table(u64 start, u64 end, void *arg);
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 8840a690d1e7..69bf13857a9f 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -228,8 +228,7 @@ ia64_phys_addr_valid (unsigned long addr)
 #define VMALLOC_START		(RGN_BASE(RGN_GATE) + 0x200000000UL)
 #ifdef CONFIG_VIRTUAL_MEM_MAP
 # define VMALLOC_END_INIT	(RGN_BASE(RGN_GATE) + (1UL << (4*PAGE_SHIFT - 9)))
-# define VMALLOC_END		vmalloc_end
-  extern unsigned long vmalloc_end;
+extern unsigned long VMALLOC_END;
 #else
 #if defined(CONFIG_SPARSEMEM) && defined(CONFIG_SPARSEMEM_VMEMMAP)
 /* SPARSEMEM_VMEMMAP uses half of vmalloc... */
diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index 3eaeedf1aef2..7fa90f73f6be 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -229,7 +229,7 @@ struct cpuinfo_ia64 {
 #endif
 };
 
-DECLARE_PER_CPU(struct cpuinfo_ia64, cpu_info);
+DECLARE_PER_CPU(struct cpuinfo_ia64, ia64_cpu_info);
 
 /*
  * The "local" data variable.  It refers to the per-CPU data of the currently executing
@@ -237,8 +237,8 @@ DECLARE_PER_CPU(struct cpuinfo_ia64, cpu_info);
  * Do not use the address of local_cpu_data, since it will be different from
  * cpu_data(smp_processor_id())!
  */
-#define local_cpu_data		(&__ia64_per_cpu_var(cpu_info))
-#define cpu_data(cpu)		(&per_cpu(cpu_info, cpu))
+#define local_cpu_data		(&__ia64_per_cpu_var(ia64_cpu_info))
+#define cpu_data(cpu)		(&per_cpu(ia64_cpu_info, cpu))
 
 extern void print_cpu_info (struct cpuinfo_ia64 *);
 
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index baec6f00f7f3..40574ae11401 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -702,11 +702,23 @@ int __init early_acpi_boot_init(void)
 		printk(KERN_ERR PREFIX
 		       "Error parsing MADT - no LAPIC entries\n");
 
+#ifdef CONFIG_SMP
+	if (available_cpus == 0) {
+		printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n");
+		printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id());
+		smp_boot_data.cpu_phys_id[available_cpus] =
+		    hard_smp_processor_id();
+		available_cpus = 1;	/* We've got at least one of these, no? */
+	}
+	smp_boot_data.cpu_count = available_cpus;
+#endif
+	/* Make boot-up look pretty */
+	printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus,
+	       total_cpus);
+
 	return 0;
 }
 
-
-
 int __init acpi_boot_init(void)
 {
 
@@ -769,18 +781,8 @@ int __init acpi_boot_init(void)
 	if (acpi_table_parse(ACPI_SIG_FADT, acpi_parse_fadt))
 		printk(KERN_ERR PREFIX "Can't find FADT\n");
 
+#ifdef CONFIG_ACPI_NUMA
 #ifdef CONFIG_SMP
-	if (available_cpus == 0) {
-		printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n");
-		printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id());
-		smp_boot_data.cpu_phys_id[available_cpus] =
-		    hard_smp_processor_id();
-		available_cpus = 1;	/* We've got at least one of these, no? */
-	}
-	smp_boot_data.cpu_count = available_cpus;
-
-	smp_build_cpu_map();
-# ifdef CONFIG_ACPI_NUMA
 	if (srat_num_cpus == 0) {
 		int cpu, i = 1;
 		for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++)
@@ -789,14 +791,9 @@ int __init acpi_boot_init(void)
 				node_cpuid[i++].phys_id =
 				    smp_boot_data.cpu_phys_id[cpu];
 	}
-# endif
 #endif
-#ifdef CONFIG_ACPI_NUMA
 	build_cpu_to_node_map();
 #endif
-	/* Make boot-up look pretty */
-	printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus,
-	       total_cpus);
 	return 0;
 }
 
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index 696eff28a0c4..17a9fba38930 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -1051,7 +1051,7 @@ END(ia64_delay_loop)
  * intermediate precision so that we can produce a full 64-bit result.
  */
 GLOBAL_ENTRY(ia64_native_sched_clock)
-	addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
+	addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
 	mov.m r9=ar.itc		// fetch cycle-counter				(35 cyc)
 	;;
 	ldf8 f8=[r8]
@@ -1077,7 +1077,7 @@ sched_clock = ia64_native_sched_clock
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 GLOBAL_ENTRY(cycle_to_cputime)
 	alloc r16=ar.pfs,1,0,0,0
-	addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
+	addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
 	;;
 	ldf8 f8=[r8]
 	;;
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 14d39e300627..461b99902bf6 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -30,7 +30,7 @@ EXPORT_SYMBOL(max_low_pfn);	/* defined by bootmem.c, but not exported by generic
 #endif
 
 #include <asm/processor.h>
-EXPORT_SYMBOL(per_cpu__cpu_info);
+EXPORT_SYMBOL(per_cpu__ia64_cpu_info);
 #ifdef CONFIG_SMP
 EXPORT_SYMBOL(per_cpu__local_per_cpu_offset);
 #endif
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index 7461d2573d41..d5bdf9de36b6 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -59,7 +59,7 @@
 ia64_do_tlb_purge:
 #define O(member)	IA64_CPUINFO_##member##_OFFSET
 
-	GET_THIS_PADDR(r2, cpu_info)	// load phys addr of cpu_info into r2
+	GET_THIS_PADDR(r2, ia64_cpu_info) // load phys addr of cpu_info into r2
 	;;
 	addl r17=O(PTCE_STRIDE),r2
 	addl r2=O(PTCE_BASE),r2
diff --git a/arch/ia64/kernel/relocate_kernel.S b/arch/ia64/kernel/relocate_kernel.S
index 32f6fc131fbe..c370e02f0061 100644
--- a/arch/ia64/kernel/relocate_kernel.S
+++ b/arch/ia64/kernel/relocate_kernel.S
@@ -61,7 +61,7 @@ GLOBAL_ENTRY(relocate_new_kernel)
 
 	// purge all TC entries
 #define O(member)       IA64_CPUINFO_##member##_OFFSET
-        GET_THIS_PADDR(r2, cpu_info)    // load phys addr of cpu_info into r2
+        GET_THIS_PADDR(r2, ia64_cpu_info) // load phys addr of cpu_info into r2
         ;;
         addl r17=O(PTCE_STRIDE),r2
         addl r2=O(PTCE_BASE),r2
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 1de86c96801d..a1ea87919777 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -74,7 +74,7 @@ unsigned long __per_cpu_offset[NR_CPUS];
 EXPORT_SYMBOL(__per_cpu_offset);
 #endif
 
-DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
+DEFINE_PER_CPU(struct cpuinfo_ia64, ia64_cpu_info);
 DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
 unsigned long ia64_cycles_per_usec;
 struct ia64_boot_param *ia64_boot_param;
@@ -566,19 +566,18 @@ setup_arch (char **cmdline_p)
 	early_acpi_boot_init();
 # ifdef CONFIG_ACPI_NUMA
 	acpi_numa_init();
-#ifdef CONFIG_ACPI_HOTPLUG_CPU
+#  ifdef CONFIG_ACPI_HOTPLUG_CPU
 	prefill_possible_map();
-#endif
+#  endif
 	per_cpu_scan_finalize((cpus_weight(early_cpu_possible_map) == 0 ?
 		32 : cpus_weight(early_cpu_possible_map)),
 		additional_cpus > 0 ? additional_cpus : 0);
 # endif
-#else
-# ifdef CONFIG_SMP
-	smp_build_cpu_map();	/* happens, e.g., with the Ski simulator */
-# endif
 #endif /* CONFIG_APCI_BOOT */
 
+#ifdef CONFIG_SMP
+	smp_build_cpu_map();
+#endif
 	find_memory();
 
 	/* process SAL system table: */
@@ -856,18 +855,6 @@ identify_cpu (struct cpuinfo_ia64 *c)
 }
 
 /*
- * In UP configuration, setup_per_cpu_areas() is defined in
- * include/linux/percpu.h
- */
-#ifdef CONFIG_SMP
-void __init
-setup_per_cpu_areas (void)
-{
-	/* start_kernel() requires this... */
-}
-#endif
-
-/*
  * Do the following calculations:
  *
  * 1. the max. cache line size.
@@ -980,7 +967,7 @@ cpu_init (void)
 	 * depends on the data returned by identify_cpu().  We break the dependency by
 	 * accessing cpu_data() through the canonical per-CPU address.
 	 */
-	cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start);
+	cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(ia64_cpu_info) - __per_cpu_start);
 	identify_cpu(cpu_info);
 
 #ifdef CONFIG_MCKINLEY
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 0a0c77b2c988..1295ba327f6f 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -166,6 +166,12 @@ SECTIONS
 	}
 #endif
 
+#ifdef	CONFIG_SMP
+  . = ALIGN(PERCPU_PAGE_SIZE);
+  __cpu0_per_cpu = .;
+  . = . + PERCPU_PAGE_SIZE;	/* cpu0 per-cpu space */
+#endif
+
   . = ALIGN(PAGE_SIZE);
   __init_end = .;
 
@@ -198,11 +204,6 @@ SECTIONS
   data : { } :data
   .data : AT(ADDR(.data) - LOAD_OFFSET)
 	{
-#ifdef	CONFIG_SMP
-  . = ALIGN(PERCPU_PAGE_SIZE);
-		__cpu0_per_cpu = .;
-  . = . + PERCPU_PAGE_SIZE;	/* cpu0 per-cpu space */
-#endif
 		INIT_TASK_DATA(PAGE_SIZE)
 		CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES)
 		READ_MOSTLY_DATA(SMP_CACHE_BYTES)
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 2f724d2bf299..54bf54059811 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -154,38 +154,99 @@ static void *cpu_data;
 void * __cpuinit
 per_cpu_init (void)
 {
-	int cpu;
-	static int first_time=1;
+	static bool first_time = true;
+	void *cpu0_data = __cpu0_per_cpu;
+	unsigned int cpu;
+
+	if (!first_time)
+		goto skip;
+	first_time = false;
 
 	/*
-	 * get_free_pages() cannot be used before cpu_init() done.  BSP
-	 * allocates "NR_CPUS" pages for all CPUs to avoid that AP calls
-	 * get_zeroed_page().
+	 * get_free_pages() cannot be used before cpu_init() done.
+	 * BSP allocates PERCPU_PAGE_SIZE bytes for all possible CPUs
+	 * to avoid that AP calls get_zeroed_page().
 	 */
-	if (first_time) {
-		void *cpu0_data = __cpu0_per_cpu;
+	for_each_possible_cpu(cpu) {
+		void *src = cpu == 0 ? cpu0_data : __phys_per_cpu_start;
 
-		first_time=0;
+		memcpy(cpu_data, src, __per_cpu_end - __per_cpu_start);
+		__per_cpu_offset[cpu] = (char *)cpu_data - __per_cpu_start;
+		per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
 
-		__per_cpu_offset[0] = (char *) cpu0_data - __per_cpu_start;
-		per_cpu(local_per_cpu_offset, 0) = __per_cpu_offset[0];
+		/*
+		 * percpu area for cpu0 is moved from the __init area
+		 * which is setup by head.S and used till this point.
+		 * Update ar.k3.  This move is ensures that percpu
+		 * area for cpu0 is on the correct node and its
+		 * virtual address isn't insanely far from other
+		 * percpu areas which is important for congruent
+		 * percpu allocator.
+		 */
+		if (cpu == 0)
+			ia64_set_kr(IA64_KR_PER_CPU_DATA, __pa(cpu_data) -
+				    (unsigned long)__per_cpu_start);
 
-		for (cpu = 1; cpu < NR_CPUS; cpu++) {
-			memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start);
-			__per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start;
-			cpu_data += PERCPU_PAGE_SIZE;
-			per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
-		}
+		cpu_data += PERCPU_PAGE_SIZE;
 	}
+skip:
 	return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
 }
 
 static inline void
 alloc_per_cpu_data(void)
 {
-	cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS-1,
+	cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * num_possible_cpus(),
 				   PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
 }
+
+/**
+ * setup_per_cpu_areas - setup percpu areas
+ *
+ * Arch code has already allocated and initialized percpu areas.  All
+ * this function has to do is to teach the determined layout to the
+ * dynamic percpu allocator, which happens to be more complex than
+ * creating whole new ones using helpers.
+ */
+void __init
+setup_per_cpu_areas(void)
+{
+	struct pcpu_alloc_info *ai;
+	struct pcpu_group_info *gi;
+	unsigned int cpu;
+	ssize_t static_size, reserved_size, dyn_size;
+	int rc;
+
+	ai = pcpu_alloc_alloc_info(1, num_possible_cpus());
+	if (!ai)
+		panic("failed to allocate pcpu_alloc_info");
+	gi = &ai->groups[0];
+
+	/* units are assigned consecutively to possible cpus */
+	for_each_possible_cpu(cpu)
+		gi->cpu_map[gi->nr_units++] = cpu;
+
+	/* set parameters */
+	static_size = __per_cpu_end - __per_cpu_start;
+	reserved_size = PERCPU_MODULE_RESERVE;
+	dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size;
+	if (dyn_size < 0)
+		panic("percpu area overflow static=%zd reserved=%zd\n",
+		      static_size, reserved_size);
+
+	ai->static_size		= static_size;
+	ai->reserved_size	= reserved_size;
+	ai->dyn_size		= dyn_size;
+	ai->unit_size		= PERCPU_PAGE_SIZE;
+	ai->atom_size		= PAGE_SIZE;
+	ai->alloc_size		= PERCPU_PAGE_SIZE;
+
+	rc = pcpu_setup_first_chunk(ai, __per_cpu_start + __per_cpu_offset[0]);
+	if (rc)
+		panic("failed to setup percpu area (err=%d)", rc);
+
+	pcpu_free_alloc_info(ai);
+}
 #else
 #define alloc_per_cpu_data() do { } while (0)
 #endif /* CONFIG_SMP */
@@ -270,8 +331,8 @@ paging_init (void)
 
 		map_size = PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) *
 			sizeof(struct page));
-		vmalloc_end -= map_size;
-		vmem_map = (struct page *) vmalloc_end;
+		VMALLOC_END -= map_size;
+		vmem_map = (struct page *) VMALLOC_END;
 		efi_memmap_walk(create_mem_map_page_table, NULL);
 
 		/*
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index d85ba98d9008..19c4b2195dce 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -143,22 +143,120 @@ static void *per_cpu_node_setup(void *cpu_data, int node)
 	int cpu;
 
 	for_each_possible_early_cpu(cpu) {
-		if (cpu == 0) {
-			void *cpu0_data = __cpu0_per_cpu;
-			__per_cpu_offset[cpu] = (char*)cpu0_data -
-				__per_cpu_start;
-		} else if (node == node_cpuid[cpu].nid) {
-			memcpy(__va(cpu_data), __phys_per_cpu_start,
-			       __per_cpu_end - __per_cpu_start);
-			__per_cpu_offset[cpu] = (char*)__va(cpu_data) -
-				__per_cpu_start;
-			cpu_data += PERCPU_PAGE_SIZE;
-		}
+		void *src = cpu == 0 ? __cpu0_per_cpu : __phys_per_cpu_start;
+
+		if (node != node_cpuid[cpu].nid)
+			continue;
+
+		memcpy(__va(cpu_data), src, __per_cpu_end - __per_cpu_start);
+		__per_cpu_offset[cpu] = (char *)__va(cpu_data) -
+			__per_cpu_start;
+
+		/*
+		 * percpu area for cpu0 is moved from the __init area
+		 * which is setup by head.S and used till this point.
+		 * Update ar.k3.  This move is ensures that percpu
+		 * area for cpu0 is on the correct node and its
+		 * virtual address isn't insanely far from other
+		 * percpu areas which is important for congruent
+		 * percpu allocator.
+		 */
+		if (cpu == 0)
+			ia64_set_kr(IA64_KR_PER_CPU_DATA,
+				    (unsigned long)cpu_data -
+				    (unsigned long)__per_cpu_start);
+
+		cpu_data += PERCPU_PAGE_SIZE;
 	}
 #endif
 	return cpu_data;
 }
 
+#ifdef CONFIG_SMP
+/**
+ * setup_per_cpu_areas - setup percpu areas
+ *
+ * Arch code has already allocated and initialized percpu areas.  All
+ * this function has to do is to teach the determined layout to the
+ * dynamic percpu allocator, which happens to be more complex than
+ * creating whole new ones using helpers.
+ */
+void __init setup_per_cpu_areas(void)
+{
+	struct pcpu_alloc_info *ai;
+	struct pcpu_group_info *uninitialized_var(gi);
+	unsigned int *cpu_map;
+	void *base;
+	unsigned long base_offset;
+	unsigned int cpu;
+	ssize_t static_size, reserved_size, dyn_size;
+	int node, prev_node, unit, nr_units, rc;
+
+	ai = pcpu_alloc_alloc_info(MAX_NUMNODES, nr_cpu_ids);
+	if (!ai)
+		panic("failed to allocate pcpu_alloc_info");
+	cpu_map = ai->groups[0].cpu_map;
+
+	/* determine base */
+	base = (void *)ULONG_MAX;
+	for_each_possible_cpu(cpu)
+		base = min(base,
+			   (void *)(__per_cpu_offset[cpu] + __per_cpu_start));
+	base_offset = (void *)__per_cpu_start - base;
+
+	/* build cpu_map, units are grouped by node */
+	unit = 0;
+	for_each_node(node)
+		for_each_possible_cpu(cpu)
+			if (node == node_cpuid[cpu].nid)
+				cpu_map[unit++] = cpu;
+	nr_units = unit;
+
+	/* set basic parameters */
+	static_size = __per_cpu_end - __per_cpu_start;
+	reserved_size = PERCPU_MODULE_RESERVE;
+	dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size;
+	if (dyn_size < 0)
+		panic("percpu area overflow static=%zd reserved=%zd\n",
+		      static_size, reserved_size);
+
+	ai->static_size		= static_size;
+	ai->reserved_size	= reserved_size;
+	ai->dyn_size		= dyn_size;
+	ai->unit_size		= PERCPU_PAGE_SIZE;
+	ai->atom_size		= PAGE_SIZE;
+	ai->alloc_size		= PERCPU_PAGE_SIZE;
+
+	/*
+	 * CPUs are put into groups according to node.  Walk cpu_map
+	 * and create new groups at node boundaries.
+	 */
+	prev_node = -1;
+	ai->nr_groups = 0;
+	for (unit = 0; unit < nr_units; unit++) {
+		cpu = cpu_map[unit];
+		node = node_cpuid[cpu].nid;
+
+		if (node == prev_node) {
+			gi->nr_units++;
+			continue;
+		}
+		prev_node = node;
+
+		gi = &ai->groups[ai->nr_groups++];
+		gi->nr_units		= 1;
+		gi->base_offset		= __per_cpu_offset[cpu] + base_offset;
+		gi->cpu_map		= &cpu_map[unit];
+	}
+
+	rc = pcpu_setup_first_chunk(ai, base);
+	if (rc)
+		panic("failed to setup percpu area (err=%d)", rc);
+
+	pcpu_free_alloc_info(ai);
+}
+#endif
+
 /**
  * fill_pernode - initialize pernode data.
  * @node: the node id.
@@ -352,7 +450,8 @@ static void __init initialize_pernode_data(void)
 	/* Set the node_data pointer for each per-cpu struct */
 	for_each_possible_early_cpu(cpu) {
 		node = node_cpuid[cpu].nid;
-		per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data;
+		per_cpu(ia64_cpu_info, cpu).node_data =
+			mem_data[node].node_data;
 	}
 #else
 	{
@@ -360,7 +459,7 @@ static void __init initialize_pernode_data(void)
 		cpu = 0;
 		node = node_cpuid[cpu].nid;
 		cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start +
-			((char *)&per_cpu__cpu_info - __per_cpu_start));
+			((char *)&per_cpu__ia64_cpu_info - __per_cpu_start));
 		cpu0_cpu_info->node_data = mem_data[node].node_data;
 	}
 #endif /* CONFIG_SMP */
@@ -666,9 +765,9 @@ void __init paging_init(void)
 	sparse_init();
 
 #ifdef CONFIG_VIRTUAL_MEM_MAP
-	vmalloc_end -= PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) *
+	VMALLOC_END -= PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) *
 		sizeof(struct page));
-	vmem_map = (struct page *) vmalloc_end;
+	vmem_map = (struct page *) VMALLOC_END;
 	efi_memmap_walk(create_mem_map_page_table, NULL);
 	printk("Virtual mem_map starts at 0x%p\n", vmem_map);
 #endif
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 1857766a63c1..b9609c69343a 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -44,8 +44,8 @@ extern void ia64_tlb_init (void);
 unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
 
 #ifdef CONFIG_VIRTUAL_MEM_MAP
-unsigned long vmalloc_end = VMALLOC_END_INIT;
-EXPORT_SYMBOL(vmalloc_end);
+unsigned long VMALLOC_END = VMALLOC_END_INIT;
+EXPORT_SYMBOL(VMALLOC_END);
 struct page *vmem_map;
 EXPORT_SYMBOL(vmem_map);
 #endif
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index 1176506b2bae..e884ba4e031d 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -496,13 +496,13 @@ static int sn2_ptc_seq_show(struct seq_file *file, void *data)
 		seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l,
 				stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed,
 				stat->deadlocks,
-				1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec,
-				1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec,
-				1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec,
+				1000 * stat->lock_itc_clocks / per_cpu(ia64_cpu_info, cpu).cyc_per_usec,
+				1000 * stat->shub_itc_clocks / per_cpu(ia64_cpu_info, cpu).cyc_per_usec,
+				1000 * stat->shub_itc_clocks_max / per_cpu(ia64_cpu_info, cpu).cyc_per_usec,
 				stat->shub_ptc_flushes_not_my_mm,
 				stat->deadlocks2,
 				stat->shub_ipi_flushes,
-				1000 * stat->shub_ipi_flushes_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec);
+				1000 * stat->shub_ipi_flushes_itc_clocks / per_cpu(ia64_cpu_info, cpu).cyc_per_usec);
 	}
 	return 0;
 }
diff --git a/arch/ia64/xen/irq_xen.c b/arch/ia64/xen/irq_xen.c
index f042e192d2fe..a3fb7cf9ae1d 100644
--- a/arch/ia64/xen/irq_xen.c
+++ b/arch/ia64/xen/irq_xen.c
@@ -63,19 +63,19 @@ xen_free_irq_vector(int vector)
 }
 
 
-static DEFINE_PER_CPU(int, timer_irq) = -1;
-static DEFINE_PER_CPU(int, ipi_irq) = -1;
-static DEFINE_PER_CPU(int, resched_irq) = -1;
-static DEFINE_PER_CPU(int, cmc_irq) = -1;
-static DEFINE_PER_CPU(int, cmcp_irq) = -1;
-static DEFINE_PER_CPU(int, cpep_irq) = -1;
+static DEFINE_PER_CPU(int, xen_timer_irq) = -1;
+static DEFINE_PER_CPU(int, xen_ipi_irq) = -1;
+static DEFINE_PER_CPU(int, xen_resched_irq) = -1;
+static DEFINE_PER_CPU(int, xen_cmc_irq) = -1;
+static DEFINE_PER_CPU(int, xen_cmcp_irq) = -1;
+static DEFINE_PER_CPU(int, xen_cpep_irq) = -1;
 #define NAME_SIZE	15
-static DEFINE_PER_CPU(char[NAME_SIZE], timer_name);
-static DEFINE_PER_CPU(char[NAME_SIZE], ipi_name);
-static DEFINE_PER_CPU(char[NAME_SIZE], resched_name);
-static DEFINE_PER_CPU(char[NAME_SIZE], cmc_name);
-static DEFINE_PER_CPU(char[NAME_SIZE], cmcp_name);
-static DEFINE_PER_CPU(char[NAME_SIZE], cpep_name);
+static DEFINE_PER_CPU(char[NAME_SIZE], xen_timer_name);
+static DEFINE_PER_CPU(char[NAME_SIZE], xen_ipi_name);
+static DEFINE_PER_CPU(char[NAME_SIZE], xen_resched_name);
+static DEFINE_PER_CPU(char[NAME_SIZE], xen_cmc_name);
+static DEFINE_PER_CPU(char[NAME_SIZE], xen_cmcp_name);
+static DEFINE_PER_CPU(char[NAME_SIZE], xen_cpep_name);
 #undef NAME_SIZE
 
 struct saved_irq {
@@ -144,64 +144,64 @@ __xen_register_percpu_irq(unsigned int cpu, unsigned int vec,
 	if (xen_slab_ready) {
 		switch (vec) {
 		case IA64_TIMER_VECTOR:
-			snprintf(per_cpu(timer_name, cpu),
-				 sizeof(per_cpu(timer_name, cpu)),
+			snprintf(per_cpu(xen_timer_name, cpu),
+				 sizeof(per_cpu(xen_timer_name, cpu)),
 				 "%s%d", action->name, cpu);
 			irq = bind_virq_to_irqhandler(VIRQ_ITC, cpu,
 				action->handler, action->flags,
-				per_cpu(timer_name, cpu), action->dev_id);
-			per_cpu(timer_irq, cpu) = irq;
+				per_cpu(xen_timer_name, cpu), action->dev_id);
+			per_cpu(xen_timer_irq, cpu) = irq;
 			break;
 		case IA64_IPI_RESCHEDULE:
-			snprintf(per_cpu(resched_name, cpu),
-				 sizeof(per_cpu(resched_name, cpu)),
+			snprintf(per_cpu(xen_resched_name, cpu),
+				 sizeof(per_cpu(xen_resched_name, cpu)),
 				 "%s%d", action->name, cpu);
 			irq = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, cpu,
 				action->handler, action->flags,
-				per_cpu(resched_name, cpu), action->dev_id);
-			per_cpu(resched_irq, cpu) = irq;
+				per_cpu(xen_resched_name, cpu), action->dev_id);
+			per_cpu(xen_resched_irq, cpu) = irq;
 			break;
 		case IA64_IPI_VECTOR:
-			snprintf(per_cpu(ipi_name, cpu),
-				 sizeof(per_cpu(ipi_name, cpu)),
+			snprintf(per_cpu(xen_ipi_name, cpu),
+				 sizeof(per_cpu(xen_ipi_name, cpu)),
 				 "%s%d", action->name, cpu);
 			irq = bind_ipi_to_irqhandler(XEN_IPI_VECTOR, cpu,
 				action->handler, action->flags,
-				per_cpu(ipi_name, cpu), action->dev_id);
-			per_cpu(ipi_irq, cpu) = irq;
+				per_cpu(xen_ipi_name, cpu), action->dev_id);
+			per_cpu(xen_ipi_irq, cpu) = irq;
 			break;
 		case IA64_CMC_VECTOR:
-			snprintf(per_cpu(cmc_name, cpu),
-				 sizeof(per_cpu(cmc_name, cpu)),
+			snprintf(per_cpu(xen_cmc_name, cpu),
+				 sizeof(per_cpu(xen_cmc_name, cpu)),
 				 "%s%d", action->name, cpu);
 			irq = bind_virq_to_irqhandler(VIRQ_MCA_CMC, cpu,
-						      action->handler,
-						      action->flags,
-						      per_cpu(cmc_name, cpu),
-						      action->dev_id);
-			per_cpu(cmc_irq, cpu) = irq;
+						action->handler,
+						action->flags,
+						per_cpu(xen_cmc_name, cpu),
+						action->dev_id);
+			per_cpu(xen_cmc_irq, cpu) = irq;
 			break;
 		case IA64_CMCP_VECTOR:
-			snprintf(per_cpu(cmcp_name, cpu),
-				 sizeof(per_cpu(cmcp_name, cpu)),
+			snprintf(per_cpu(xen_cmcp_name, cpu),
+				 sizeof(per_cpu(xen_cmcp_name, cpu)),
 				 "%s%d", action->name, cpu);
 			irq = bind_ipi_to_irqhandler(XEN_CMCP_VECTOR, cpu,
-						     action->handler,
-						     action->flags,
-						     per_cpu(cmcp_name, cpu),
-						     action->dev_id);
-			per_cpu(cmcp_irq, cpu) = irq;
+						action->handler,
+						action->flags,
+						per_cpu(xen_cmcp_name, cpu),
+						action->dev_id);
+			per_cpu(xen_cmcp_irq, cpu) = irq;
 			break;
 		case IA64_CPEP_VECTOR:
-			snprintf(per_cpu(cpep_name, cpu),
-				 sizeof(per_cpu(cpep_name, cpu)),
+			snprintf(per_cpu(xen_cpep_name, cpu),
+				 sizeof(per_cpu(xen_cpep_name, cpu)),
 				 "%s%d", action->name, cpu);
 			irq = bind_ipi_to_irqhandler(XEN_CPEP_VECTOR, cpu,
-						     action->handler,
-						     action->flags,
-						     per_cpu(cpep_name, cpu),
-						     action->dev_id);
-			per_cpu(cpep_irq, cpu) = irq;
+						action->handler,
+						action->flags,
+						per_cpu(xen_cpep_name, cpu),
+						action->dev_id);
+			per_cpu(xen_cpep_irq, cpu) = irq;
 			break;
 		case IA64_CPE_VECTOR:
 		case IA64_MCA_RENDEZ_VECTOR:
@@ -275,30 +275,33 @@ unbind_evtchn_callback(struct notifier_block *nfb,
 
 	if (action == CPU_DEAD) {
 		/* Unregister evtchn.  */
-		if (per_cpu(cpep_irq, cpu) >= 0) {
-			unbind_from_irqhandler(per_cpu(cpep_irq, cpu), NULL);
-			per_cpu(cpep_irq, cpu) = -1;
+		if (per_cpu(xen_cpep_irq, cpu) >= 0) {
+			unbind_from_irqhandler(per_cpu(xen_cpep_irq, cpu),
+					       NULL);
+			per_cpu(xen_cpep_irq, cpu) = -1;
 		}
-		if (per_cpu(cmcp_irq, cpu) >= 0) {
-			unbind_from_irqhandler(per_cpu(cmcp_irq, cpu), NULL);
-			per_cpu(cmcp_irq, cpu) = -1;
+		if (per_cpu(xen_cmcp_irq, cpu) >= 0) {
+			unbind_from_irqhandler(per_cpu(xen_cmcp_irq, cpu),
+					       NULL);
+			per_cpu(xen_cmcp_irq, cpu) = -1;
 		}
-		if (per_cpu(cmc_irq, cpu) >= 0) {
-			unbind_from_irqhandler(per_cpu(cmc_irq, cpu), NULL);
-			per_cpu(cmc_irq, cpu) = -1;
+		if (per_cpu(xen_cmc_irq, cpu) >= 0) {
+			unbind_from_irqhandler(per_cpu(xen_cmc_irq, cpu), NULL);
+			per_cpu(xen_cmc_irq, cpu) = -1;
 		}
-		if (per_cpu(ipi_irq, cpu) >= 0) {
-			unbind_from_irqhandler(per_cpu(ipi_irq, cpu), NULL);
-			per_cpu(ipi_irq, cpu) = -1;
+		if (per_cpu(xen_ipi_irq, cpu) >= 0) {
+			unbind_from_irqhandler(per_cpu(xen_ipi_irq, cpu), NULL);
+			per_cpu(xen_ipi_irq, cpu) = -1;
 		}
-		if (per_cpu(resched_irq, cpu) >= 0) {
-			unbind_from_irqhandler(per_cpu(resched_irq, cpu),
-						NULL);
-			per_cpu(resched_irq, cpu) = -1;
+		if (per_cpu(xen_resched_irq, cpu) >= 0) {
+			unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu),
+					       NULL);
+			per_cpu(xen_resched_irq, cpu) = -1;
 		}
-		if (per_cpu(timer_irq, cpu) >= 0) {
-			unbind_from_irqhandler(per_cpu(timer_irq, cpu), NULL);
-			per_cpu(timer_irq, cpu) = -1;
+		if (per_cpu(xen_timer_irq, cpu) >= 0) {
+			unbind_from_irqhandler(per_cpu(xen_timer_irq, cpu),
+					       NULL);
+			per_cpu(xen_timer_irq, cpu) = -1;
 		}
 	}
 	return NOTIFY_OK;
diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c
index dbeadb9c8e20..c1c544513e8d 100644
--- a/arch/ia64/xen/time.c
+++ b/arch/ia64/xen/time.c
@@ -34,15 +34,15 @@
 
 #include "../kernel/fsyscall_gtod_data.h"
 
-DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
-DEFINE_PER_CPU(unsigned long, processed_stolen_time);
-DEFINE_PER_CPU(unsigned long, processed_blocked_time);
+static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
+static DEFINE_PER_CPU(unsigned long, xen_stolen_time);
+static DEFINE_PER_CPU(unsigned long, xen_blocked_time);
 
 /* taken from i386/kernel/time-xen.c */
 static void xen_init_missing_ticks_accounting(int cpu)
 {
 	struct vcpu_register_runstate_memory_area area;
-	struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
+	struct vcpu_runstate_info *runstate = &per_cpu(xen_runstate, cpu);
 	int rc;
 
 	memset(runstate, 0, sizeof(*runstate));
@@ -52,8 +52,8 @@ static void xen_init_missing_ticks_accounting(int cpu)
 				&area);
 	WARN_ON(rc && rc != -ENOSYS);
 
-	per_cpu(processed_blocked_time, cpu) = runstate->time[RUNSTATE_blocked];
-	per_cpu(processed_stolen_time, cpu) = runstate->time[RUNSTATE_runnable]
+	per_cpu(xen_blocked_time, cpu) = runstate->time[RUNSTATE_blocked];
+	per_cpu(xen_stolen_time, cpu) = runstate->time[RUNSTATE_runnable]
 					    + runstate->time[RUNSTATE_offline];
 }
 
@@ -68,7 +68,7 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res)
 
 	BUG_ON(preemptible());
 
-	state = &__get_cpu_var(runstate);
+	state = &__get_cpu_var(xen_runstate);
 
 	/*
 	 * The runstate info is always updated by the hypervisor on
@@ -103,12 +103,12 @@ consider_steal_time(unsigned long new_itm)
 	 * This function just checks and reject this effect.
 	 */
 	if (!time_after_eq(runstate.time[RUNSTATE_blocked],
-			   per_cpu(processed_blocked_time, cpu)))
+			   per_cpu(xen_blocked_time, cpu)))
 		blocked = 0;
 
 	if (!time_after_eq(runstate.time[RUNSTATE_runnable] +
 			   runstate.time[RUNSTATE_offline],
-			   per_cpu(processed_stolen_time, cpu)))
+			   per_cpu(xen_stolen_time, cpu)))
 		stolen = 0;
 
 	if (!time_after(delta_itm + new_itm, ia64_get_itc()))
@@ -147,8 +147,8 @@ consider_steal_time(unsigned long new_itm)
 		} else {
 			local_cpu_data->itm_next = delta_itm + new_itm;
 		}
-		per_cpu(processed_stolen_time, cpu) += NS_PER_TICK * stolen;
-		per_cpu(processed_blocked_time, cpu) += NS_PER_TICK * blocked;
+		per_cpu(xen_stolen_time, cpu) += NS_PER_TICK * stolen;
+		per_cpu(xen_blocked_time, cpu) += NS_PER_TICK * blocked;
 	}
 	return delta_itm;
 }