summary refs log tree commit diff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/Kconfig4
-rw-r--r--arch/arm/Makefile1
-rw-r--r--arch/arm/common/gic.c45
-rw-r--r--arch/arm/common/vic.c19
-rw-r--r--arch/arm/include/asm/Kbuild1
-rw-r--r--arch/arm/include/asm/assembler.h8
-rw-r--r--arch/arm/include/asm/cpu.h1
-rw-r--r--arch/arm/include/asm/cputype.h13
-rw-r--r--arch/arm/include/asm/cti.h20
-rw-r--r--arch/arm/include/asm/hardware/cache-l2x0.h5
-rw-r--r--arch/arm/include/asm/hardware/vic.h2
-rw-r--r--arch/arm/include/asm/hw_breakpoint.h8
-rw-r--r--arch/arm/include/asm/mach/serial_at91.h33
-rw-r--r--arch/arm/include/asm/mach/serial_sa1100.h31
-rw-r--r--arch/arm/include/asm/mach/udc_pxa2xx.h26
-rw-r--r--arch/arm/include/asm/mmu.h13
-rw-r--r--arch/arm/include/asm/mmu_context.h88
-rw-r--r--arch/arm/include/asm/percpu.h45
-rw-r--r--arch/arm/include/asm/perf_event.h7
-rw-r--r--arch/arm/include/asm/pgtable-2level.h2
-rw-r--r--arch/arm/include/asm/pgtable-3level.h4
-rw-r--r--arch/arm/include/asm/pgtable.h10
-rw-r--r--arch/arm/include/asm/pmu.h28
-rw-r--r--arch/arm/include/asm/prom.h2
-rw-r--r--arch/arm/include/asm/smp.h1
-rw-r--r--arch/arm/include/asm/smp_plat.h17
-rw-r--r--arch/arm/include/asm/syscall.h9
-rw-r--r--arch/arm/include/asm/thread_info.h7
-rw-r--r--arch/arm/kernel/devtree.c104
-rw-r--r--arch/arm/kernel/entry-common.S16
-rw-r--r--arch/arm/kernel/head-nommu.S2
-rw-r--r--arch/arm/kernel/hw_breakpoint.c154
-rw-r--r--arch/arm/kernel/perf_event.c85
-rw-r--r--arch/arm/kernel/perf_event_cpu.c74
-rw-r--r--arch/arm/kernel/perf_event_v6.c126
-rw-r--r--arch/arm/kernel/perf_event_v7.c246
-rw-r--r--arch/arm/kernel/perf_event_xscale.c157
-rw-r--r--arch/arm/kernel/process.c4
-rw-r--r--arch/arm/kernel/ptrace.c43
-rw-r--r--arch/arm/kernel/setup.c84
-rw-r--r--arch/arm/kernel/smp.c12
-rw-r--r--arch/arm/kernel/smp_twd.c54
-rw-r--r--arch/arm/kernel/topology.c42
-rw-r--r--arch/arm/kernel/vmlinux.lds.S19
-rw-r--r--arch/arm/mach-integrator/impd1.c7
-rw-r--r--arch/arm/mach-ixp4xx/include/mach/udc.h2
-rw-r--r--arch/arm/mach-omap2/pmu.c2
-rw-r--r--arch/arm/mach-pxa/include/mach/udc.h2
-rw-r--r--arch/arm/mach-realview/realview_eb.c1
-rw-r--r--arch/arm/mach-sa1100/assabet.c2
-rw-r--r--arch/arm/mach-sa1100/badge4.c2
-rw-r--r--arch/arm/mach-sa1100/cerf.c2
-rw-r--r--arch/arm/mach-sa1100/collie.c2
-rw-r--r--arch/arm/mach-sa1100/h3xxx.c2
-rw-r--r--arch/arm/mach-sa1100/hackkit.c2
-rw-r--r--arch/arm/mach-sa1100/jornada720.c2
-rw-r--r--arch/arm/mach-sa1100/lart.c2
-rw-r--r--arch/arm/mach-sa1100/nanoengine.c2
-rw-r--r--arch/arm/mach-sa1100/neponset.c2
-rw-r--r--arch/arm/mach-sa1100/pleb.c2
-rw-r--r--arch/arm/mach-sa1100/shannon.c2
-rw-r--r--arch/arm/mach-sa1100/simpad.c2
-rw-r--r--arch/arm/mm/cache-aurora-l2.h55
-rw-r--r--arch/arm/mm/cache-l2x0.c278
-rw-r--r--arch/arm/mm/context.c207
-rw-r--r--arch/arm/mm/idmap.c14
-rw-r--r--arch/arm/mm/ioremap.c16
-rw-r--r--arch/arm/mm/mmu.c2
-rw-r--r--arch/arm/mm/proc-macros.S4
-rw-r--r--arch/arm/mm/proc-v7-2level.S10
-rw-r--r--arch/arm/mm/proc-v7-3level.S5
-rw-r--r--arch/arm/mm/proc-v7.S2
-rw-r--r--arch/arm/net/bpf_jit_32.c6
-rw-r--r--arch/avr32/include/asm/mach/serial_at91.h33
74 files changed, 1372 insertions, 972 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 8a027f9e339e..825066e29c38 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -5,8 +5,9 @@ config ARM
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select ARCH_HAVE_CUSTOM_GPIO_H
 	select ARCH_WANT_IPC_PARSE_VERSION
+	select BUILDTIME_EXTABLE_SORT if MMU
 	select CPU_PM if (SUSPEND || CPU_IDLE)
-	select DCACHE_WORD_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && !CPU_BIG_ENDIAN
+	select DCACHE_WORD_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && !CPU_BIG_ENDIAN && MMU
 	select GENERIC_ATOMIC64 if (CPU_V6 || !CPU_32v6K || !AEABI)
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
 	select GENERIC_IRQ_PROBE
@@ -21,6 +22,7 @@ config ARM
 	select HAVE_AOUT
 	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
 	select HAVE_ARCH_KGDB
+	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_BPF_JIT
 	select HAVE_C_RECORDMCOUNT
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index c35baf102f6f..45096a1ee0a0 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -32,6 +32,7 @@ KBUILD_DEFCONFIG := versatile_defconfig
 # defines filename extension depending memory management type.
 ifeq ($(CONFIG_MMU),)
 MMUEXT		:= -nommu
+KBUILD_CFLAGS	+= $(call cc-option,-mno-unaligned-access)
 endif
 
 ifeq ($(CONFIG_FRAME_POINTER),y)
diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c
index aa5269984187..36ae03a3f5d1 100644
--- a/arch/arm/common/gic.c
+++ b/arch/arm/common/gic.c
@@ -70,6 +70,14 @@ struct gic_chip_data {
 static DEFINE_RAW_SPINLOCK(irq_controller_lock);
 
 /*
+ * The GIC mapping of CPU interfaces does not necessarily match
+ * the logical CPU numbering.  Let's use a mapping as returned
+ * by the GIC itself.
+ */
+#define NR_GIC_CPU_IF 8
+static u8 gic_cpu_map[NR_GIC_CPU_IF] __read_mostly;
+
+/*
  * Supported arch specific GIC irq extension.
  * Default make them NULL.
  */
@@ -238,11 +246,11 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 	unsigned int cpu = cpumask_any_and(mask_val, cpu_online_mask);
 	u32 val, mask, bit;
 
-	if (cpu >= 8 || cpu >= nr_cpu_ids)
+	if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids)
 		return -EINVAL;
 
 	mask = 0xff << shift;
-	bit = 1 << (cpu_logical_map(cpu) + shift);
+	bit = gic_cpu_map[cpu] << shift;
 
 	raw_spin_lock(&irq_controller_lock);
 	val = readl_relaxed(reg) & ~mask;
@@ -349,11 +357,6 @@ static void __init gic_dist_init(struct gic_chip_data *gic)
 	u32 cpumask;
 	unsigned int gic_irqs = gic->gic_irqs;
 	void __iomem *base = gic_data_dist_base(gic);
-	u32 cpu = cpu_logical_map(smp_processor_id());
-
-	cpumask = 1 << cpu;
-	cpumask |= cpumask << 8;
-	cpumask |= cpumask << 16;
 
 	writel_relaxed(0, base + GIC_DIST_CTRL);
 
@@ -366,6 +369,7 @@ static void __init gic_dist_init(struct gic_chip_data *gic)
 	/*
 	 * Set all global interrupts to this CPU only.
 	 */
+	cpumask = readl_relaxed(base + GIC_DIST_TARGET + 0);
 	for (i = 32; i < gic_irqs; i += 4)
 		writel_relaxed(cpumask, base + GIC_DIST_TARGET + i * 4 / 4);
 
@@ -389,9 +393,25 @@ static void __cpuinit gic_cpu_init(struct gic_chip_data *gic)
 {
 	void __iomem *dist_base = gic_data_dist_base(gic);
 	void __iomem *base = gic_data_cpu_base(gic);
+	unsigned int cpu_mask, cpu = smp_processor_id();
 	int i;
 
 	/*
+	 * Get what the GIC says our CPU mask is.
+	 */
+	BUG_ON(cpu >= NR_GIC_CPU_IF);
+	cpu_mask = readl_relaxed(dist_base + GIC_DIST_TARGET + 0);
+	gic_cpu_map[cpu] = cpu_mask;
+
+	/*
+	 * Clear our mask from the other map entries in case they're
+	 * still undefined.
+	 */
+	for (i = 0; i < NR_GIC_CPU_IF; i++)
+		if (i != cpu)
+			gic_cpu_map[i] &= ~cpu_mask;
+
+	/*
 	 * Deal with the banked PPI and SGI interrupts - disable all
 	 * PPI interrupts, ensure all SGI interrupts are enabled.
 	 */
@@ -646,7 +666,7 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
 {
 	irq_hw_number_t hwirq_base;
 	struct gic_chip_data *gic;
-	int gic_irqs, irq_base;
+	int gic_irqs, irq_base, i;
 
 	BUG_ON(gic_nr >= MAX_GIC_NR);
 
@@ -683,6 +703,13 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
 	}
 
 	/*
+	 * Initialize the CPU interface map to all CPUs.
+	 * It will be refined as each CPU probes its ID.
+	 */
+	for (i = 0; i < NR_GIC_CPU_IF; i++)
+		gic_cpu_map[i] = 0xff;
+
+	/*
 	 * For primary GICs, skip over SGIs.
 	 * For secondary GICs, skip over PPIs, too.
 	 */
@@ -737,7 +764,7 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 
 	/* Convert our logical CPU mask into a physical one. */
 	for_each_cpu(cpu, mask)
-		map |= 1 << cpu_logical_map(cpu);
+		map |= gic_cpu_map[cpu];
 
 	/*
 	 * Ensure that stores to Normal memory are visible to the
diff --git a/arch/arm/common/vic.c b/arch/arm/common/vic.c
index e0d538803cc3..e4df17ca90c7 100644
--- a/arch/arm/common/vic.c
+++ b/arch/arm/common/vic.c
@@ -218,7 +218,7 @@ static void __init vic_register(void __iomem *base, unsigned int irq,
 	v->resume_sources = resume_sources;
 	v->irq = irq;
 	vic_id++;
-	v->domain = irq_domain_add_legacy(node, fls(valid_sources), irq, 0,
+	v->domain = irq_domain_add_simple(node, fls(valid_sources), irq,
 					  &vic_irqdomain_ops, v);
 }
 
@@ -350,7 +350,7 @@ static void __init vic_init_st(void __iomem *base, unsigned int irq_start,
 	vic_register(base, irq_start, vic_sources, 0, node);
 }
 
-void __init __vic_init(void __iomem *base, unsigned int irq_start,
+void __init __vic_init(void __iomem *base, int irq_start,
 			      u32 vic_sources, u32 resume_sources,
 			      struct device_node *node)
 {
@@ -407,7 +407,6 @@ void __init vic_init(void __iomem *base, unsigned int irq_start,
 int __init vic_of_init(struct device_node *node, struct device_node *parent)
 {
 	void __iomem *regs;
-	int irq_base;
 
 	if (WARN(parent, "non-root VICs are not supported"))
 		return -EINVAL;
@@ -416,18 +415,12 @@ int __init vic_of_init(struct device_node *node, struct device_node *parent)
 	if (WARN_ON(!regs))
 		return -EIO;
 
-	irq_base = irq_alloc_descs(-1, 0, 32, numa_node_id());
-	if (WARN_ON(irq_base < 0))
-		goto out_unmap;
-
-	__vic_init(regs, irq_base, ~0, ~0, node);
+	/*
+	 * Passing -1 as first IRQ makes the simple domain allocate descriptors
+	 */
+	__vic_init(regs, -1, ~0, ~0, node);
 
 	return 0;
-
- out_unmap:
-	iounmap(regs);
-
-	return -EIO;
 }
 #endif /* CONFIG OF */
 
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 514e398f1a07..d3db39860b9c 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -16,7 +16,6 @@ generic-y += local64.h
 generic-y += msgbuf.h
 generic-y += param.h
 generic-y += parport.h
-generic-y += percpu.h
 generic-y += poll.h
 generic-y += resource.h
 generic-y += sections.h
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 2ef95813fce0..eb87200aa4b5 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -250,6 +250,7 @@
  * Beware, it also clobers LR.
  */
 .macro safe_svcmode_maskall reg:req
+#if __LINUX_ARM_ARCH__ >= 6
 	mrs	\reg , cpsr
 	mov	lr , \reg
 	and	lr , lr , #MODE_MASK
@@ -266,6 +267,13 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 	__ERET
 1:	msr	cpsr_c, \reg
 2:
+#else
+/*
+ * workaround for possibly broken pre-v6 hardware
+ * (akita, Sharp Zaurus C-1000, PXA270-based)
+ */
+	setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, \reg
+#endif
 .endm
 
 /*
diff --git a/arch/arm/include/asm/cpu.h b/arch/arm/include/asm/cpu.h
index d797223b39d5..2744f0602550 100644
--- a/arch/arm/include/asm/cpu.h
+++ b/arch/arm/include/asm/cpu.h
@@ -15,6 +15,7 @@
 
 struct cpuinfo_arm {
 	struct cpu	cpu;
+	u32		cpuid;
 #ifdef CONFIG_SMP
 	unsigned int	loops_per_jiffy;
 #endif
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index cb47d28cbe1f..a59dcb5ab5fc 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -25,6 +25,19 @@
 #define CPUID_EXT_ISAR4	"c2, 4"
 #define CPUID_EXT_ISAR5	"c2, 5"
 
+#define MPIDR_SMP_BITMASK (0x3 << 30)
+#define MPIDR_SMP_VALUE (0x2 << 30)
+
+#define MPIDR_MT_BITMASK (0x1 << 24)
+
+#define MPIDR_HWID_BITMASK 0xFFFFFF
+
+#define MPIDR_LEVEL_BITS 8
+#define MPIDR_LEVEL_MASK ((1 << MPIDR_LEVEL_BITS) - 1)
+
+#define MPIDR_AFFINITY_LEVEL(mpidr, level) \
+	((mpidr >> (MPIDR_LEVEL_BITS * level)) & MPIDR_LEVEL_MASK)
+
 extern unsigned int processor_id;
 
 #ifdef CONFIG_CPU_CP15
diff --git a/arch/arm/include/asm/cti.h b/arch/arm/include/asm/cti.h
index a0ada3ea4358..f2e5cad3f306 100644
--- a/arch/arm/include/asm/cti.h
+++ b/arch/arm/include/asm/cti.h
@@ -146,15 +146,7 @@ static inline void cti_irq_ack(struct cti *cti)
  */
 static inline void cti_unlock(struct cti *cti)
 {
-	void __iomem *base = cti->base;
-	unsigned long val;
-
-	val = __raw_readl(base + LOCKSTATUS);
-
-	if (val & 1) {
-		val = LOCKCODE;
-		__raw_writel(val, base + LOCKACCESS);
-	}
+	__raw_writel(LOCKCODE, cti->base + LOCKACCESS);
 }
 
 /**
@@ -166,14 +158,6 @@ static inline void cti_unlock(struct cti *cti)
  */
 static inline void cti_lock(struct cti *cti)
 {
-	void __iomem *base = cti->base;
-	unsigned long val;
-
-	val = __raw_readl(base + LOCKSTATUS);
-
-	if (!(val & 1)) {
-		val = ~LOCKCODE;
-		__raw_writel(val, base + LOCKACCESS);
-	}
+	__raw_writel(~LOCKCODE, cti->base + LOCKACCESS);
 }
 #endif
diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h
index c4c87bc12231..3b2c40b5bfa2 100644
--- a/arch/arm/include/asm/hardware/cache-l2x0.h
+++ b/arch/arm/include/asm/hardware/cache-l2x0.h
@@ -102,6 +102,10 @@
 
 #define L2X0_ADDR_FILTER_EN		1
 
+#define L2X0_CTRL_EN			1
+
+#define L2X0_WAY_SIZE_SHIFT		3
+
 #ifndef __ASSEMBLY__
 extern void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask);
 #if defined(CONFIG_CACHE_L2X0) && defined(CONFIG_OF)
@@ -126,6 +130,7 @@ struct l2x0_regs {
 	unsigned long filter_end;
 	unsigned long prefetch_ctrl;
 	unsigned long pwr_ctrl;
+	unsigned long ctrl;
 };
 
 extern struct l2x0_regs l2x0_saved_regs;
diff --git a/arch/arm/include/asm/hardware/vic.h b/arch/arm/include/asm/hardware/vic.h
index e14af1a1a320..2bebad36fc83 100644
--- a/arch/arm/include/asm/hardware/vic.h
+++ b/arch/arm/include/asm/hardware/vic.h
@@ -47,7 +47,7 @@
 struct device_node;
 struct pt_regs;
 
-void __vic_init(void __iomem *base, unsigned int irq_start, u32 vic_sources,
+void __vic_init(void __iomem *base, int irq_start, u32 vic_sources,
 		u32 resume_sources, struct device_node *node);
 void vic_init(void __iomem *base, unsigned int irq_start, u32 vic_sources, u32 resume_sources);
 int vic_of_init(struct device_node *node, struct device_node *parent);
diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h
index c190bc992f0e..01169dd723f1 100644
--- a/arch/arm/include/asm/hw_breakpoint.h
+++ b/arch/arm/include/asm/hw_breakpoint.h
@@ -98,12 +98,12 @@ static inline void decode_ctrl_reg(u32 reg,
 #define ARM_BASE_WCR		112
 
 /* Accessor macros for the debug registers. */
-#define ARM_DBG_READ(M, OP2, VAL) do {\
-	asm volatile("mrc p14, 0, %0, c0," #M ", " #OP2 : "=r" (VAL));\
+#define ARM_DBG_READ(N, M, OP2, VAL) do {\
+	asm volatile("mrc p14, 0, %0, " #N "," #M ", " #OP2 : "=r" (VAL));\
 } while (0)
 
-#define ARM_DBG_WRITE(M, OP2, VAL) do {\
-	asm volatile("mcr p14, 0, %0, c0," #M ", " #OP2 : : "r" (VAL));\
+#define ARM_DBG_WRITE(N, M, OP2, VAL) do {\
+	asm volatile("mcr p14, 0, %0, " #N "," #M ", " #OP2 : : "r" (VAL));\
 } while (0)
 
 struct notifier_block;
diff --git a/arch/arm/include/asm/mach/serial_at91.h b/arch/arm/include/asm/mach/serial_at91.h
deleted file mode 100644
index ea6d063923b8..000000000000
--- a/arch/arm/include/asm/mach/serial_at91.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- *  arch/arm/include/asm/mach/serial_at91.h
- *
- *  Based on serial_sa1100.h  by Nicolas Pitre
- *
- *  Copyright (C) 2002 ATMEL Rousset
- *
- *  Low level machine dependent UART functions.
- */
-
-struct uart_port;
-
-/*
- * This is a temporary structure for registering these
- * functions; it is intended to be discarded after boot.
- */
-struct atmel_port_fns {
-	void	(*set_mctrl)(struct uart_port *, u_int);
-	u_int	(*get_mctrl)(struct uart_port *);
-	void	(*enable_ms)(struct uart_port *);
-	void	(*pm)(struct uart_port *, u_int, u_int);
-	int	(*set_wake)(struct uart_port *, u_int);
-	int	(*open)(struct uart_port *);
-	void	(*close)(struct uart_port *);
-};
-
-#if defined(CONFIG_SERIAL_ATMEL)
-void atmel_register_uart_fns(struct atmel_port_fns *fns);
-#else
-#define atmel_register_uart_fns(fns) do { } while (0)
-#endif
-
-
diff --git a/arch/arm/include/asm/mach/serial_sa1100.h b/arch/arm/include/asm/mach/serial_sa1100.h
deleted file mode 100644
index d09064bf95a0..000000000000
--- a/arch/arm/include/asm/mach/serial_sa1100.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- *  arch/arm/include/asm/mach/serial_sa1100.h
- *
- *  Author: Nicolas Pitre
- *
- * Moved and changed lots, Russell King
- *
- * Low level machine dependent UART functions.
- */
-
-struct uart_port;
-struct uart_info;
-
-/*
- * This is a temporary structure for registering these
- * functions; it is intended to be discarded after boot.
- */
-struct sa1100_port_fns {
-	void	(*set_mctrl)(struct uart_port *, u_int);
-	u_int	(*get_mctrl)(struct uart_port *);
-	void	(*pm)(struct uart_port *, u_int, u_int);
-	int	(*set_wake)(struct uart_port *, u_int);
-};
-
-#ifdef CONFIG_SERIAL_SA1100
-void sa1100_register_uart_fns(struct sa1100_port_fns *fns);
-void sa1100_register_uart(int idx, int port);
-#else
-#define sa1100_register_uart_fns(fns) do { } while (0)
-#define sa1100_register_uart(idx,port) do { } while (0)
-#endif
diff --git a/arch/arm/include/asm/mach/udc_pxa2xx.h b/arch/arm/include/asm/mach/udc_pxa2xx.h
deleted file mode 100644
index ea297ac70bc6..000000000000
--- a/arch/arm/include/asm/mach/udc_pxa2xx.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * arch/arm/include/asm/mach/udc_pxa2xx.h
- *
- * This supports machine-specific differences in how the PXA2xx
- * USB Device Controller (UDC) is wired.
- *
- * It is set in linux/arch/arm/mach-pxa/<machine>.c or in
- * linux/arch/mach-ixp4xx/<machine>.c and used in
- * the probe routine of linux/drivers/usb/gadget/pxa2xx_udc.c
- */
-
-struct pxa2xx_udc_mach_info {
-        int  (*udc_is_connected)(void);		/* do we see host? */
-        void (*udc_command)(int cmd);
-#define	PXA2XX_UDC_CMD_CONNECT		0	/* let host see us */
-#define	PXA2XX_UDC_CMD_DISCONNECT	1	/* so host won't see us */
-
-	/* Boards following the design guidelines in the developer's manual,
-	 * with on-chip GPIOs not Lubbock's weird hardware, can have a sane
-	 * VBUS IRQ and omit the methods above.  Store the GPIO number
-	 * here.  Note that sometimes the signals go through inverters...
-	 */
-	bool	gpio_pullup_inverted;
-	int	gpio_pullup;			/* high == pullup activated */
-};
-
diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
index 14965658a923..9f77e7804f3b 100644
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@@ -5,18 +5,15 @@
 
 typedef struct {
 #ifdef CONFIG_CPU_HAS_ASID
-	unsigned int id;
-	raw_spinlock_t id_lock;
+	u64 id;
 #endif
-	unsigned int kvm_seq;
+	unsigned int vmalloc_seq;
 } mm_context_t;
 
 #ifdef CONFIG_CPU_HAS_ASID
-#define ASID(mm)	((mm)->context.id & 255)
-
-/* init_mm.context.id_lock should be initialized. */
-#define INIT_MM_CONTEXT(name)                                                 \
-	.context.id_lock    = __RAW_SPIN_LOCK_UNLOCKED(name.context.id_lock),
+#define ASID_BITS	8
+#define ASID_MASK	((~0ULL) << ASID_BITS)
+#define ASID(mm)	((mm)->context.id & ~ASID_MASK)
 #else
 #define ASID(mm)	(0)
 #endif
diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index 0306bc642c0d..e1f644bc7cc5 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -20,88 +20,12 @@
 #include <asm/proc-fns.h>
 #include <asm-generic/mm_hooks.h>
 
-void __check_kvm_seq(struct mm_struct *mm);
+void __check_vmalloc_seq(struct mm_struct *mm);
 
 #ifdef CONFIG_CPU_HAS_ASID
 
-/*
- * On ARMv6, we have the following structure in the Context ID:
- *
- * 31                         7          0
- * +-------------------------+-----------+
- * |      process ID         |   ASID    |
- * +-------------------------+-----------+
- * |              context ID             |
- * +-------------------------------------+
- *
- * The ASID is used to tag entries in the CPU caches and TLBs.
- * The context ID is used by debuggers and trace logic, and
- * should be unique within all running processes.
- */
-#define ASID_BITS		8
-#define ASID_MASK		((~0) << ASID_BITS)
-#define ASID_FIRST_VERSION	(1 << ASID_BITS)
-
-extern unsigned int cpu_last_asid;
-
-void __init_new_context(struct task_struct *tsk, struct mm_struct *mm);
-void __new_context(struct mm_struct *mm);
-void cpu_set_reserved_ttbr0(void);
-
-static inline void switch_new_context(struct mm_struct *mm)
-{
-	unsigned long flags;
-
-	__new_context(mm);
-
-	local_irq_save(flags);
-	cpu_switch_mm(mm->pgd, mm);
-	local_irq_restore(flags);
-}
-
-static inline void check_and_switch_context(struct mm_struct *mm,
-					    struct task_struct *tsk)
-{
-	if (unlikely(mm->context.kvm_seq != init_mm.context.kvm_seq))
-		__check_kvm_seq(mm);
-
-	/*
-	 * Required during context switch to avoid speculative page table
-	 * walking with the wrong TTBR.
-	 */
-	cpu_set_reserved_ttbr0();
-
-	if (!((mm->context.id ^ cpu_last_asid) >> ASID_BITS))
-		/*
-		 * The ASID is from the current generation, just switch to the
-		 * new pgd. This condition is only true for calls from
-		 * context_switch() and interrupts are already disabled.
-		 */
-		cpu_switch_mm(mm->pgd, mm);
-	else if (irqs_disabled())
-		/*
-		 * Defer the new ASID allocation until after the context
-		 * switch critical region since __new_context() cannot be
-		 * called with interrupts disabled (it sends IPIs).
-		 */
-		set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM);
-	else
-		/*
-		 * That is a direct call to switch_mm() or activate_mm() with
-		 * interrupts enabled and a new context.
-		 */
-		switch_new_context(mm);
-}
-
-#define init_new_context(tsk,mm)	(__init_new_context(tsk,mm),0)
-
-#define finish_arch_post_lock_switch \
-	finish_arch_post_lock_switch
-static inline void finish_arch_post_lock_switch(void)
-{
-	if (test_and_clear_thread_flag(TIF_SWITCH_MM))
-		switch_new_context(current->mm);
-}
+void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk);
+#define init_new_context(tsk,mm)	({ mm->context.id = 0; })
 
 #else	/* !CONFIG_CPU_HAS_ASID */
 
@@ -110,8 +34,8 @@ static inline void finish_arch_post_lock_switch(void)
 static inline void check_and_switch_context(struct mm_struct *mm,
 					    struct task_struct *tsk)
 {
-	if (unlikely(mm->context.kvm_seq != init_mm.context.kvm_seq))
-		__check_kvm_seq(mm);
+	if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq))
+		__check_vmalloc_seq(mm);
 
 	if (irqs_disabled())
 		/*
@@ -143,6 +67,7 @@ static inline void finish_arch_post_lock_switch(void)
 #endif	/* CONFIG_CPU_HAS_ASID */
 
 #define destroy_context(mm)		do { } while(0)
+#define activate_mm(prev,next)		switch_mm(prev, next, NULL)
 
 /*
  * This is called when "tsk" is about to enter lazy TLB mode.
@@ -186,6 +111,5 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 }
 
 #define deactivate_mm(tsk,mm)	do { } while (0)
-#define activate_mm(prev,next)	switch_mm(prev, next, NULL)
 
 #endif
diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h
new file mode 100644
index 000000000000..968c0a14e0a3
--- /dev/null
+++ b/arch/arm/include/asm/percpu.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2012 Calxeda, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _ASM_ARM_PERCPU_H_
+#define _ASM_ARM_PERCPU_H_
+
+/*
+ * Same as asm-generic/percpu.h, except that we store the per cpu offset
+ * in the TPIDRPRW. TPIDRPRW only exists on V6K and V7
+ */
+#if defined(CONFIG_SMP) && !defined(CONFIG_CPU_V6)
+static inline void set_my_cpu_offset(unsigned long off)
+{
+	/* Set TPIDRPRW */
+	asm volatile("mcr p15, 0, %0, c13, c0, 4" : : "r" (off) : "memory");
+}
+
+static inline unsigned long __my_cpu_offset(void)
+{
+	unsigned long off;
+	/* Read TPIDRPRW */
+	asm("mrc p15, 0, %0, c13, c0, 4" : "=r" (off) : : "memory");
+	return off;
+}
+#define __my_cpu_offset __my_cpu_offset()
+#else
+#define set_my_cpu_offset(x)	do {} while(0)
+
+#endif /* CONFIG_SMP */
+
+#include <asm-generic/percpu.h>
+
+#endif /* _ASM_ARM_PERCPU_H_ */
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
index 625cd621a436..755877527cf9 100644
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -21,4 +21,11 @@
 #define C(_x)				PERF_COUNT_HW_CACHE_##_x
 #define CACHE_OP_UNSUPPORTED		0xFFFF
 
+#ifdef CONFIG_HW_PERF_EVENTS
+struct pt_regs;
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_misc_flags(struct pt_regs *regs);
+#define perf_misc_flags(regs)	perf_misc_flags(regs)
+#endif
+
 #endif /* __ARM_PERF_EVENT_H__ */
diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
index 2317a71c8f8e..f97ee02386ee 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -115,6 +115,7 @@
  * The PTE table pointer refers to the hardware entries; the "Linux"
  * entries are stored 1024 bytes below.
  */
+#define L_PTE_VALID		(_AT(pteval_t, 1) << 0)		/* Valid */
 #define L_PTE_PRESENT		(_AT(pteval_t, 1) << 0)
 #define L_PTE_YOUNG		(_AT(pteval_t, 1) << 1)
 #define L_PTE_FILE		(_AT(pteval_t, 1) << 2)	/* only when !PRESENT */
@@ -123,6 +124,7 @@
 #define L_PTE_USER		(_AT(pteval_t, 1) << 8)
 #define L_PTE_XN		(_AT(pteval_t, 1) << 9)
 #define L_PTE_SHARED		(_AT(pteval_t, 1) << 10)	/* shared(v6), coherent(xsc3) */
+#define L_PTE_NONE		(_AT(pteval_t, 1) << 11)
 
 /*
  * These are the memory types, defined to be compatible with
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index b24903549d1c..a3f37929940a 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -67,7 +67,8 @@
  * These bits overlap with the hardware bits but the naming is preserved for
  * consistency with the classic page table format.
  */
-#define L_PTE_PRESENT		(_AT(pteval_t, 3) << 0)		/* Valid */
+#define L_PTE_VALID		(_AT(pteval_t, 1) << 0)		/* Valid */
+#define L_PTE_PRESENT		(_AT(pteval_t, 3) << 0)		/* Present */
 #define L_PTE_FILE		(_AT(pteval_t, 1) << 2)		/* only when !PRESENT */
 #define L_PTE_USER		(_AT(pteval_t, 1) << 6)		/* AP[1] */
 #define L_PTE_RDONLY		(_AT(pteval_t, 1) << 7)		/* AP[2] */
@@ -76,6 +77,7 @@
 #define L_PTE_XN		(_AT(pteval_t, 1) << 54)	/* XN */
 #define L_PTE_DIRTY		(_AT(pteval_t, 1) << 55)	/* unused */
 #define L_PTE_SPECIAL		(_AT(pteval_t, 1) << 56)	/* unused */
+#define L_PTE_NONE		(_AT(pteval_t, 1) << 57)	/* PROT_NONE */
 
 /*
  * To be used in assembly code with the upper page attributes.
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 08c12312a1f9..9c82f988c0e3 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -73,7 +73,7 @@ extern pgprot_t		pgprot_kernel;
 
 #define _MOD_PROT(p, b)	__pgprot(pgprot_val(p) | (b))
 
-#define PAGE_NONE		_MOD_PROT(pgprot_user, L_PTE_XN | L_PTE_RDONLY)
+#define PAGE_NONE		_MOD_PROT(pgprot_user, L_PTE_XN | L_PTE_RDONLY | L_PTE_NONE)
 #define PAGE_SHARED		_MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_XN)
 #define PAGE_SHARED_EXEC	_MOD_PROT(pgprot_user, L_PTE_USER)
 #define PAGE_COPY		_MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY | L_PTE_XN)
@@ -83,7 +83,7 @@ extern pgprot_t		pgprot_kernel;
 #define PAGE_KERNEL		_MOD_PROT(pgprot_kernel, L_PTE_XN)
 #define PAGE_KERNEL_EXEC	pgprot_kernel
 
-#define __PAGE_NONE		__pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN)
+#define __PAGE_NONE		__pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE)
 #define __PAGE_SHARED		__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
 #define __PAGE_SHARED_EXEC	__pgprot(_L_PTE_DEFAULT | L_PTE_USER)
 #define __PAGE_COPY		__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_RDONLY | L_PTE_XN)
@@ -203,9 +203,7 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
 #define pte_exec(pte)		(!(pte_val(pte) & L_PTE_XN))
 #define pte_special(pte)	(0)
 
-#define pte_present_user(pte) \
-	((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \
-	 (L_PTE_PRESENT | L_PTE_USER))
+#define pte_present_user(pte)  (pte_present(pte) && (pte_val(pte) & L_PTE_USER))
 
 #if __LINUX_ARM_ARCH__ < 6
 static inline void __sync_icache_dcache(pte_t pteval)
@@ -242,7 +240,7 @@ static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
 
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
-	const pteval_t mask = L_PTE_XN | L_PTE_RDONLY | L_PTE_USER;
+	const pteval_t mask = L_PTE_XN | L_PTE_RDONLY | L_PTE_USER | L_PTE_NONE;
 	pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
 	return pte;
 }
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index a26170dce02e..f24edad26c70 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -67,19 +67,19 @@ struct arm_pmu {
 	cpumask_t	active_irqs;
 	char		*name;
 	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
-	void		(*enable)(struct hw_perf_event *evt, int idx);
-	void		(*disable)(struct hw_perf_event *evt, int idx);
+	void		(*enable)(struct perf_event *event);
+	void		(*disable)(struct perf_event *event);
 	int		(*get_event_idx)(struct pmu_hw_events *hw_events,
-					 struct hw_perf_event *hwc);
+					 struct perf_event *event);
 	int		(*set_event_filter)(struct hw_perf_event *evt,
 					    struct perf_event_attr *attr);
-	u32		(*read_counter)(int idx);
-	void		(*write_counter)(int idx, u32 val);
-	void		(*start)(void);
-	void		(*stop)(void);
+	u32		(*read_counter)(struct perf_event *event);
+	void		(*write_counter)(struct perf_event *event, u32 val);
+	void		(*start)(struct arm_pmu *);
+	void		(*stop)(struct arm_pmu *);
 	void		(*reset)(void *);
-	int		(*request_irq)(irq_handler_t handler);
-	void		(*free_irq)(void);
+	int		(*request_irq)(struct arm_pmu *, irq_handler_t handler);
+	void		(*free_irq)(struct arm_pmu *);
 	int		(*map_event)(struct perf_event *event);
 	int		num_events;
 	atomic_t	active_events;
@@ -93,15 +93,11 @@ struct arm_pmu {
 
 extern const struct dev_pm_ops armpmu_dev_pm_ops;
 
-int armpmu_register(struct arm_pmu *armpmu, char *name, int type);
+int armpmu_register(struct arm_pmu *armpmu, int type);
 
-u64 armpmu_event_update(struct perf_event *event,
-			struct hw_perf_event *hwc,
-			int idx);
+u64 armpmu_event_update(struct perf_event *event);
 
-int armpmu_event_set_period(struct perf_event *event,
-			    struct hw_perf_event *hwc,
-			    int idx);
+int armpmu_event_set_period(struct perf_event *event);
 
 int armpmu_map_event(struct perf_event *event,
 		     const unsigned (*event_map)[PERF_COUNT_HW_MAX],
diff --git a/arch/arm/include/asm/prom.h b/arch/arm/include/asm/prom.h
index 6d65ba222db9..a219227c3e43 100644
--- a/arch/arm/include/asm/prom.h
+++ b/arch/arm/include/asm/prom.h
@@ -17,6 +17,7 @@
 
 extern struct machine_desc *setup_machine_fdt(unsigned int dt_phys);
 extern void arm_dt_memblock_reserve(void);
+extern void __init arm_dt_init_cpu_maps(void);
 
 #else /* CONFIG_OF */
 
@@ -26,6 +27,7 @@ static inline struct machine_desc *setup_machine_fdt(unsigned int dt_phys)
 }
 
 static inline void arm_dt_memblock_reserve(void) { }
+static inline void arm_dt_init_cpu_maps(void) { }
 
 #endif /* CONFIG_OF */
 #endif /* ASMARM_PROM_H */
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index 2e3be16c6766..d3a22bebe6ce 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -79,6 +79,7 @@ extern void cpu_die(void);
 
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
 
 struct smp_operations {
 #ifdef CONFIG_SMP
diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h
index 558d6c80aca9..aaa61b6f50ff 100644
--- a/arch/arm/include/asm/smp_plat.h
+++ b/arch/arm/include/asm/smp_plat.h
@@ -5,6 +5,9 @@
 #ifndef __ASMARM_SMP_PLAT_H
 #define __ASMARM_SMP_PLAT_H
 
+#include <linux/cpumask.h>
+#include <linux/err.h>
+
 #include <asm/cputype.h>
 
 /*
@@ -48,5 +51,19 @@ static inline int cache_ops_need_broadcast(void)
  */
 extern int __cpu_logical_map[];
 #define cpu_logical_map(cpu)	__cpu_logical_map[cpu]
+/*
+ * Retrieve logical cpu index corresponding to a given MPIDR[23:0]
+ *  - mpidr: MPIDR[23:0] to be used for the look-up
+ *
+ * Returns the cpu logical index or -EINVAL on look-up error
+ */
+static inline int get_logical_index(u32 mpidr)
+{
+	int cpu;
+	for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+		if (cpu_logical_map(cpu) == mpidr)
+			return cpu;
+	return -EINVAL;
+}
 
 #endif
diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
index 9fdded6b1089..f1d96d4e8092 100644
--- a/arch/arm/include/asm/syscall.h
+++ b/arch/arm/include/asm/syscall.h
@@ -7,6 +7,8 @@
 #ifndef _ASM_ARM_SYSCALL_H
 #define _ASM_ARM_SYSCALL_H
 
+#include <linux/audit.h> /* for AUDIT_ARCH_* */
+#include <linux/elf.h> /* for ELF_EM */
 #include <linux/err.h>
 #include <linux/sched.h>
 
@@ -95,4 +97,11 @@ static inline void syscall_set_arguments(struct task_struct *task,
 	memcpy(&regs->ARM_r0 + i, args, n * sizeof(args[0]));
 }
 
+static inline int syscall_get_arch(struct task_struct *task,
+				   struct pt_regs *regs)
+{
+	/* ARM tasks don't change audit architectures on the fly. */
+	return AUDIT_ARCH_ARM;
+}
+
 #endif /* _ASM_ARM_SYSCALL_H */
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 8477b4c1d39f..cddda1f41f0f 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -151,10 +151,10 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 #define TIF_SYSCALL_TRACE	8
 #define TIF_SYSCALL_AUDIT	9
 #define TIF_SYSCALL_TRACEPOINT	10
+#define TIF_SECCOMP		11	/* seccomp syscall filtering active */
 #define TIF_USING_IWMMXT	17
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK	20
-#define TIF_SECCOMP		21
 #define TIF_SWITCH_MM		22	/* deferred switch_mm */
 
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
@@ -163,11 +163,12 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
-#define _TIF_USING_IWMMXT	(1 << TIF_USING_IWMMXT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
+#define _TIF_USING_IWMMXT	(1 << TIF_USING_IWMMXT)
 
 /* Checks for any syscall work in entry-common.S */
-#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SYSCALL_TRACEPOINT)
+#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
+			   _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP)
 
 /*
  * Change these and you break ASM code in entry-common.S
diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c
index bee7f9d47f02..70f1bdeb241b 100644
--- a/arch/arm/kernel/devtree.c
+++ b/arch/arm/kernel/devtree.c
@@ -19,8 +19,10 @@
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 
+#include <asm/cputype.h>
 #include <asm/setup.h>
 #include <asm/page.h>
+#include <asm/smp_plat.h>
 #include <asm/mach/arch.h>
 #include <asm/mach-types.h>
 
@@ -61,6 +63,108 @@ void __init arm_dt_memblock_reserve(void)
 	}
 }
 
+/*
+ * arm_dt_init_cpu_maps - Function retrieves cpu nodes from the device tree
+ * and builds the cpu logical map array containing MPIDR values related to
+ * logical cpus
+ *
+ * Updates the cpu possible mask with the number of parsed cpu nodes
+ */
+void __init arm_dt_init_cpu_maps(void)
+{
+	/*
+	 * Temp logical map is initialized with UINT_MAX values that are
+	 * considered invalid logical map entries since the logical map must
+	 * contain a list of MPIDR[23:0] values where MPIDR[31:24] must
+	 * read as 0.
+	 */
+	struct device_node *cpu, *cpus;
+	u32 i, j, cpuidx = 1;
+	u32 mpidr = is_smp() ? read_cpuid_mpidr() & MPIDR_HWID_BITMASK : 0;
+
+	u32 tmp_map[NR_CPUS] = { [0 ... NR_CPUS-1] = UINT_MAX };
+	bool bootcpu_valid = false;
+	cpus = of_find_node_by_path("/cpus");
+
+	if (!cpus)
+		return;
+
+	for_each_child_of_node(cpus, cpu) {
+		u32 hwid;
+
+		pr_debug(" * %s...\n", cpu->full_name);
+		/*
+		 * A device tree containing CPU nodes with missing "reg"
+		 * properties is considered invalid to build the
+		 * cpu_logical_map.
+		 */
+		if (of_property_read_u32(cpu, "reg", &hwid)) {
+			pr_debug(" * %s missing reg property\n",
+				     cpu->full_name);
+			return;
+		}
+
+		/*
+		 * 8 MSBs must be set to 0 in the DT since the reg property
+		 * defines the MPIDR[23:0].
+		 */
+		if (hwid & ~MPIDR_HWID_BITMASK)
+			return;
+
+		/*
+		 * Duplicate MPIDRs are a recipe for disaster.
+		 * Scan all initialized entries and check for
+		 * duplicates. If any is found just bail out.
+		 * temp values were initialized to UINT_MAX
+		 * to avoid matching valid MPIDR[23:0] values.
+		 */
+		for (j = 0; j < cpuidx; j++)
+			if (WARN(tmp_map[j] == hwid, "Duplicate /cpu reg "
+						     "properties in the DT\n"))
+				return;
+
+		/*
+		 * Build a stashed array of MPIDR values. Numbering scheme
+		 * requires that if detected the boot CPU must be assigned
+		 * logical id 0. Other CPUs get sequential indexes starting
+		 * from 1. If a CPU node with a reg property matching the
+		 * boot CPU MPIDR is detected, this is recorded so that the
+		 * logical map built from DT is validated and can be used
+		 * to override the map created in smp_setup_processor_id().
+		 */
+		if (hwid == mpidr) {
+			i = 0;
+			bootcpu_valid = true;
+		} else {
+			i = cpuidx++;
+		}
+
+		if (WARN(cpuidx > nr_cpu_ids, "DT /cpu %u nodes greater than "
+					       "max cores %u, capping them\n",
+					       cpuidx, nr_cpu_ids)) {
+			cpuidx = nr_cpu_ids;
+			break;
+		}
+
+		tmp_map[i] = hwid;
+	}
+
+	if (WARN(!bootcpu_valid, "DT missing boot CPU MPIDR[23:0], "
+				 "fall back to default cpu_logical_map\n"))
+		return;
+
+	/*
+	 * Since the boot CPU node contains proper data, and all nodes have
+	 * a reg property, the DT CPU list can be considered valid and the
+	 * logical map created in smp_setup_processor_id() can be overridden
+	 */
+	for (i = 0; i < cpuidx; i++) {
+		set_cpu_possible(i, true);
+		cpu_logical_map(i) = tmp_map[i];
+		pr_debug("cpu logical map 0x%x\n", cpu_logical_map(i));
+	}
+}
+
 /**
  * setup_machine_fdt - Machine setup when an dtb was passed to the kernel
  * @dt_phys: physical address of dt blob
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 34711757ba59..804153c0a9cf 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -417,16 +417,6 @@ local_restart:
 	ldr	r10, [tsk, #TI_FLAGS]		@ check for syscall tracing
 	stmdb	sp!, {r4, r5}			@ push fifth and sixth args
 
-#ifdef CONFIG_SECCOMP
-	tst	r10, #_TIF_SECCOMP
-	beq	1f
-	mov	r0, scno
-	bl	__secure_computing	
-	add	r0, sp, #S_R0 + S_OFF		@ pointer to regs
-	ldmia	r0, {r0 - r3}			@ have to reload r0 - r3
-1:
-#endif
-
 	tst	r10, #_TIF_SYSCALL_WORK		@ are we tracing syscalls?
 	bne	__sys_trace
 
@@ -458,11 +448,13 @@ __sys_trace:
 	ldmccia	r1, {r0 - r6}			@ have to reload r0 - r6
 	stmccia	sp, {r4, r5}			@ and update the stack args
 	ldrcc	pc, [tbl, scno, lsl #2]		@ call sys_* routine
-	b	2b
+	cmp	scno, #-1			@ skip the syscall?
+	bne	2b
+	add	sp, sp, #S_OFF			@ restore stack
+	b	ret_slow_syscall
 
 __sys_trace_return:
 	str	r0, [sp, #S_R0 + S_OFF]!	@ save returned r0
-	mov	r1, scno
 	mov	r0, sp
 	bl	syscall_trace_exit
 	b	ret_slow_syscall
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 278cfc144f44..2c228a07e58c 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -68,7 +68,7 @@ __after_proc_init:
 	 * CP15 system control register value returned in r0 from
 	 * the CPU init function.
 	 */
-#ifdef CONFIG_ALIGNMENT_TRAP
+#if defined(CONFIG_ALIGNMENT_TRAP) && __LINUX_ARM_ARCH__ < 6
 	orr	r0, r0, #CR_A
 #else
 	bic	r0, r0, #CR_A
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 281bf3301241..5ff2e77782b1 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -52,14 +52,14 @@ static u8 debug_arch;
 /* Maximum supported watchpoint length. */
 static u8 max_watchpoint_len;
 
-#define READ_WB_REG_CASE(OP2, M, VAL)		\
-	case ((OP2 << 4) + M):			\
-		ARM_DBG_READ(c ## M, OP2, VAL); \
+#define READ_WB_REG_CASE(OP2, M, VAL)			\
+	case ((OP2 << 4) + M):				\
+		ARM_DBG_READ(c0, c ## M, OP2, VAL);	\
 		break
 
-#define WRITE_WB_REG_CASE(OP2, M, VAL)		\
-	case ((OP2 << 4) + M):			\
-		ARM_DBG_WRITE(c ## M, OP2, VAL);\
+#define WRITE_WB_REG_CASE(OP2, M, VAL)			\
+	case ((OP2 << 4) + M):				\
+		ARM_DBG_WRITE(c0, c ## M, OP2, VAL);	\
 		break
 
 #define GEN_READ_WB_REG_CASES(OP2, VAL)		\
@@ -136,12 +136,12 @@ static u8 get_debug_arch(void)
 
 	/* Do we implement the extended CPUID interface? */
 	if (((read_cpuid_id() >> 16) & 0xf) != 0xf) {
-		pr_warning("CPUID feature registers not supported. "
-			   "Assuming v6 debug is present.\n");
+		pr_warn_once("CPUID feature registers not supported. "
+			     "Assuming v6 debug is present.\n");
 		return ARM_DEBUG_ARCH_V6;
 	}
 
-	ARM_DBG_READ(c0, 0, didr);
+	ARM_DBG_READ(c0, c0, 0, didr);
 	return (didr >> 16) & 0xf;
 }
 
@@ -169,7 +169,7 @@ static int debug_exception_updates_fsr(void)
 static int get_num_wrp_resources(void)
 {
 	u32 didr;
-	ARM_DBG_READ(c0, 0, didr);
+	ARM_DBG_READ(c0, c0, 0, didr);
 	return ((didr >> 28) & 0xf) + 1;
 }
 
@@ -177,7 +177,7 @@ static int get_num_wrp_resources(void)
 static int get_num_brp_resources(void)
 {
 	u32 didr;
-	ARM_DBG_READ(c0, 0, didr);
+	ARM_DBG_READ(c0, c0, 0, didr);
 	return ((didr >> 24) & 0xf) + 1;
 }
 
@@ -228,19 +228,17 @@ static int get_num_brps(void)
  * be put into halting debug mode at any time by an external debugger
  * but there is nothing we can do to prevent that.
  */
-static int enable_monitor_mode(void)
+static int monitor_mode_enabled(void)
 {
 	u32 dscr;
-	int ret = 0;
-
-	ARM_DBG_READ(c1, 0, dscr);
+	ARM_DBG_READ(c0, c1, 0, dscr);
+	return !!(dscr & ARM_DSCR_MDBGEN);
+}
 
-	/* Ensure that halting mode is disabled. */
-	if (WARN_ONCE(dscr & ARM_DSCR_HDBGEN,
-		"halting debug mode enabled. Unable to access hardware resources.\n")) {
-		ret = -EPERM;
-		goto out;
-	}
+static int enable_monitor_mode(void)
+{
+	u32 dscr;
+	ARM_DBG_READ(c0, c1, 0, dscr);
 
 	/* If monitor mode is already enabled, just return. */
 	if (dscr & ARM_DSCR_MDBGEN)
@@ -250,24 +248,27 @@ static int enable_monitor_mode(void)
 	switch (get_debug_arch()) {
 	case ARM_DEBUG_ARCH_V6:
 	case ARM_DEBUG_ARCH_V6_1:
-		ARM_DBG_WRITE(c1, 0, (dscr | ARM_DSCR_MDBGEN));
+		ARM_DBG_WRITE(c0, c1, 0, (dscr | ARM_DSCR_MDBGEN));
 		break;
 	case ARM_DEBUG_ARCH_V7_ECP14:
 	case ARM_DEBUG_ARCH_V7_1:
-		ARM_DBG_WRITE(c2, 2, (dscr | ARM_DSCR_MDBGEN));
+		ARM_DBG_WRITE(c0, c2, 2, (dscr | ARM_DSCR_MDBGEN));
+		isb();
 		break;
 	default:
-		ret = -ENODEV;
-		goto out;
+		return -ENODEV;
 	}
 
 	/* Check that the write made it through. */
-	ARM_DBG_READ(c1, 0, dscr);
-	if (!(dscr & ARM_DSCR_MDBGEN))
-		ret = -EPERM;
+	ARM_DBG_READ(c0, c1, 0, dscr);
+	if (!(dscr & ARM_DSCR_MDBGEN)) {
+		pr_warn_once("Failed to enable monitor mode on CPU %d.\n",
+				smp_processor_id());
+		return -EPERM;
+	}
 
 out:
-	return ret;
+	return 0;
 }
 
 int hw_breakpoint_slots(int type)
@@ -328,14 +329,9 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
 {
 	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
 	struct perf_event **slot, **slots;
-	int i, max_slots, ctrl_base, val_base, ret = 0;
+	int i, max_slots, ctrl_base, val_base;
 	u32 addr, ctrl;
 
-	/* Ensure that we are in monitor mode and halting mode is disabled. */
-	ret = enable_monitor_mode();
-	if (ret)
-		goto out;
-
 	addr = info->address;
 	ctrl = encode_ctrl_reg(info->ctrl) | 0x1;
 
@@ -362,9 +358,9 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
 		}
 	}
 
-	if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot\n")) {
-		ret = -EBUSY;
-		goto out;
+	if (i == max_slots) {
+		pr_warning("Can't find any breakpoint slot\n");
+		return -EBUSY;
 	}
 
 	/* Override the breakpoint data with the step data. */
@@ -383,9 +379,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
 
 	/* Setup the control register. */
 	write_wb_reg(ctrl_base + i, ctrl);
-
-out:
-	return ret;
+	return 0;
 }
 
 void arch_uninstall_hw_breakpoint(struct perf_event *bp)
@@ -416,8 +410,10 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
 		}
 	}
 
-	if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot\n"))
+	if (i == max_slots) {
+		pr_warning("Can't find any breakpoint slot\n");
 		return;
+	}
 
 	/* Ensure that we disable the mismatch breakpoint. */
 	if (info->ctrl.type != ARM_BREAKPOINT_EXECUTE &&
@@ -596,6 +592,10 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 	int ret = 0;
 	u32 offset, alignment_mask = 0x3;
 
+	/* Ensure that we are in monitor debug mode. */
+	if (!monitor_mode_enabled())
+		return -ENODEV;
+
 	/* Build the arch_hw_breakpoint. */
 	ret = arch_build_bp_info(bp);
 	if (ret)
@@ -858,7 +858,7 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
 		local_irq_enable();
 
 	/* We only handle watchpoints and hardware breakpoints. */
-	ARM_DBG_READ(c1, 0, dscr);
+	ARM_DBG_READ(c0, c1, 0, dscr);
 
 	/* Perform perf callbacks. */
 	switch (ARM_DSCR_MOE(dscr)) {
@@ -906,7 +906,7 @@ static struct undef_hook debug_reg_hook = {
 static void reset_ctrl_regs(void *unused)
 {
 	int i, raw_num_brps, err = 0, cpu = smp_processor_id();
-	u32 dbg_power;
+	u32 val;
 
 	/*
 	 * v7 debug contains save and restore registers so that debug state
@@ -919,23 +919,30 @@ static void reset_ctrl_regs(void *unused)
 	switch (debug_arch) {
 	case ARM_DEBUG_ARCH_V6:
 	case ARM_DEBUG_ARCH_V6_1:
-		/* ARMv6 cores just need to reset the registers. */
-		goto reset_regs;
+		/* ARMv6 cores clear the registers out of reset. */
+		goto out_mdbgen;
 	case ARM_DEBUG_ARCH_V7_ECP14:
 		/*
 		 * Ensure sticky power-down is clear (i.e. debug logic is
 		 * powered up).
 		 */
-		asm volatile("mrc p14, 0, %0, c1, c5, 4" : "=r" (dbg_power));
-		if ((dbg_power & 0x1) == 0)
+		ARM_DBG_READ(c1, c5, 4, val);
+		if ((val & 0x1) == 0)
 			err = -EPERM;
+
+		/*
+		 * Check whether we implement OS save and restore.
+		 */
+		ARM_DBG_READ(c1, c1, 4, val);
+		if ((val & 0x9) == 0)
+			goto clear_vcr;
 		break;
 	case ARM_DEBUG_ARCH_V7_1:
 		/*
 		 * Ensure the OS double lock is clear.
 		 */
-		asm volatile("mrc p14, 0, %0, c1, c3, 4" : "=r" (dbg_power));
-		if ((dbg_power & 0x1) == 1)
+		ARM_DBG_READ(c1, c3, 4, val);
+		if ((val & 0x1) == 1)
 			err = -EPERM;
 		break;
 	}
@@ -947,24 +954,29 @@ static void reset_ctrl_regs(void *unused)
 	}
 
 	/*
-	 * Unconditionally clear the lock by writing a value
+	 * Unconditionally clear the OS lock by writing a value
 	 * other than 0xC5ACCE55 to the access register.
 	 */
-	asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0));
+	ARM_DBG_WRITE(c1, c0, 4, 0);
 	isb();
 
 	/*
 	 * Clear any configured vector-catch events before
 	 * enabling monitor mode.
 	 */
-	asm volatile("mcr p14, 0, %0, c0, c7, 0" : : "r" (0));
+clear_vcr:
+	ARM_DBG_WRITE(c0, c7, 0, 0);
 	isb();
 
-reset_regs:
-	if (enable_monitor_mode())
+	if (cpumask_intersects(&debug_err_mask, cpumask_of(cpu))) {
+		pr_warning("CPU %d failed to disable vector catch\n", cpu);
 		return;
+	}
 
-	/* We must also reset any reserved registers. */
+	/*
+	 * The control/value register pairs are UNKNOWN out of reset so
+	 * clear them to avoid spurious debug events.
+	 */
 	raw_num_brps = get_num_brp_resources();
 	for (i = 0; i < raw_num_brps; ++i) {
 		write_wb_reg(ARM_BASE_BCR + i, 0UL);
@@ -975,6 +987,19 @@ reset_regs:
 		write_wb_reg(ARM_BASE_WCR + i, 0UL);
 		write_wb_reg(ARM_BASE_WVR + i, 0UL);
 	}
+
+	if (cpumask_intersects(&debug_err_mask, cpumask_of(cpu))) {
+		pr_warning("CPU %d failed to clear debug register pairs\n", cpu);
+		return;
+	}
+
+	/*
+	 * Have a crack at enabling monitor mode. We don't actually need
+	 * it yet, but reporting an error early is useful if it fails.
+	 */
+out_mdbgen:
+	if (enable_monitor_mode())
+		cpumask_or(&debug_err_mask, &debug_err_mask, cpumask_of(cpu));
 }
 
 static int __cpuinit dbg_reset_notify(struct notifier_block *self,
@@ -992,8 +1017,6 @@ static struct notifier_block __cpuinitdata dbg_reset_nb = {
 
 static int __init arch_hw_breakpoint_init(void)
 {
-	u32 dscr;
-
 	debug_arch = get_debug_arch();
 
 	if (!debug_arch_supported()) {
@@ -1028,17 +1051,10 @@ static int __init arch_hw_breakpoint_init(void)
 		core_num_brps, core_has_mismatch_brps() ? "(+1 reserved) " :
 		"", core_num_wrps);
 
-	ARM_DBG_READ(c1, 0, dscr);
-	if (dscr & ARM_DSCR_HDBGEN) {
-		max_watchpoint_len = 4;
-		pr_warning("halting debug mode enabled. Assuming maximum watchpoint size of %u bytes.\n",
-			   max_watchpoint_len);
-	} else {
-		/* Work out the maximum supported watchpoint length. */
-		max_watchpoint_len = get_max_wp_len();
-		pr_info("maximum watchpoint size is %u bytes.\n",
-				max_watchpoint_len);
-	}
+	/* Work out the maximum supported watchpoint length. */
+	max_watchpoint_len = get_max_wp_len();
+	pr_info("maximum watchpoint size is %u bytes.\n",
+			max_watchpoint_len);
 
 	/* Register debug fault handler. */
 	hook_fault_code(FAULT_CODE_DEBUG, hw_breakpoint_pending, SIGTRAP,
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 53c0304b734a..f9e8657dd241 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -86,12 +86,10 @@ armpmu_map_event(struct perf_event *event,
 	return -ENOENT;
 }
 
-int
-armpmu_event_set_period(struct perf_event *event,
-			struct hw_perf_event *hwc,
-			int idx)
+int armpmu_event_set_period(struct perf_event *event)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
 	s64 left = local64_read(&hwc->period_left);
 	s64 period = hwc->sample_period;
 	int ret = 0;
@@ -119,24 +117,22 @@ armpmu_event_set_period(struct perf_event *event,
 
 	local64_set(&hwc->prev_count, (u64)-left);
 
-	armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
+	armpmu->write_counter(event, (u64)(-left) & 0xffffffff);
 
 	perf_event_update_userpage(event);
 
 	return ret;
 }
 
-u64
-armpmu_event_update(struct perf_event *event,
-		    struct hw_perf_event *hwc,
-		    int idx)
+u64 armpmu_event_update(struct perf_event *event)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
 	u64 delta, prev_raw_count, new_raw_count;
 
 again:
 	prev_raw_count = local64_read(&hwc->prev_count);
-	new_raw_count = armpmu->read_counter(idx);
+	new_raw_count = armpmu->read_counter(event);
 
 	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 			     new_raw_count) != prev_raw_count)
@@ -159,7 +155,7 @@ armpmu_read(struct perf_event *event)
 	if (hwc->idx < 0)
 		return;
 
-	armpmu_event_update(event, hwc, hwc->idx);
+	armpmu_event_update(event);
 }
 
 static void
@@ -173,14 +169,13 @@ armpmu_stop(struct perf_event *event, int flags)
 	 * PERF_EF_UPDATE, see comments in armpmu_start().
 	 */
 	if (!(hwc->state & PERF_HES_STOPPED)) {
-		armpmu->disable(hwc, hwc->idx);
-		armpmu_event_update(event, hwc, hwc->idx);
+		armpmu->disable(event);
+		armpmu_event_update(event);
 		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
 	}
 }
 
-static void
-armpmu_start(struct perf_event *event, int flags)
+static void armpmu_start(struct perf_event *event, int flags)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
@@ -200,8 +195,8 @@ armpmu_start(struct perf_event *event, int flags)
 	 * get an interrupt too soon or *way* too late if the overflow has
 	 * happened since disabling.
 	 */
-	armpmu_event_set_period(event, hwc, hwc->idx);
-	armpmu->enable(hwc, hwc->idx);
+	armpmu_event_set_period(event);
+	armpmu->enable(event);
 }
 
 static void
@@ -233,7 +228,7 @@ armpmu_add(struct perf_event *event, int flags)
 	perf_pmu_disable(event->pmu);
 
 	/* If we don't have a space for the counter then finish early. */
-	idx = armpmu->get_event_idx(hw_events, hwc);
+	idx = armpmu->get_event_idx(hw_events, event);
 	if (idx < 0) {
 		err = idx;
 		goto out;
@@ -244,7 +239,7 @@ armpmu_add(struct perf_event *event, int flags)
 	 * sure it is disabled.
 	 */
 	event->hw.idx = idx;
-	armpmu->disable(hwc, idx);
+	armpmu->disable(event);
 	hw_events->events[idx] = event;
 
 	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
@@ -264,13 +259,12 @@ validate_event(struct pmu_hw_events *hw_events,
 	       struct perf_event *event)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct hw_perf_event fake_event = event->hw;
 	struct pmu *leader_pmu = event->group_leader->pmu;
 
 	if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF)
 		return 1;
 
-	return armpmu->get_event_idx(hw_events, &fake_event) >= 0;
+	return armpmu->get_event_idx(hw_events, event) >= 0;
 }
 
 static int
@@ -316,7 +310,7 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
 static void
 armpmu_release_hardware(struct arm_pmu *armpmu)
 {
-	armpmu->free_irq();
+	armpmu->free_irq(armpmu);
 	pm_runtime_put_sync(&armpmu->plat_device->dev);
 }
 
@@ -330,7 +324,7 @@ armpmu_reserve_hardware(struct arm_pmu *armpmu)
 		return -ENODEV;
 
 	pm_runtime_get_sync(&pmu_device->dev);
-	err = armpmu->request_irq(armpmu_dispatch_irq);
+	err = armpmu->request_irq(armpmu, armpmu_dispatch_irq);
 	if (err) {
 		armpmu_release_hardware(armpmu);
 		return err;
@@ -465,13 +459,13 @@ static void armpmu_enable(struct pmu *pmu)
 	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
 
 	if (enabled)
-		armpmu->start();
+		armpmu->start(armpmu);
 }
 
 static void armpmu_disable(struct pmu *pmu)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(pmu);
-	armpmu->stop();
+	armpmu->stop(armpmu);
 }
 
 #ifdef CONFIG_PM_RUNTIME
@@ -517,12 +511,13 @@ static void __init armpmu_init(struct arm_pmu *armpmu)
 	};
 }
 
-int armpmu_register(struct arm_pmu *armpmu, char *name, int type)
+int armpmu_register(struct arm_pmu *armpmu, int type)
 {
 	armpmu_init(armpmu);
+	pm_runtime_enable(&armpmu->plat_device->dev);
 	pr_info("enabled with %s PMU driver, %d counters available\n",
 			armpmu->name, armpmu->num_events);
-	return perf_pmu_register(&armpmu->pmu, name, type);
+	return perf_pmu_register(&armpmu->pmu, armpmu->name, type);
 }
 
 /*
@@ -576,6 +571,10 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
 	struct frame_tail __user *tail;
 
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		/* We don't support guest os callchain now */
+		return;
+	}
 
 	tail = (struct frame_tail __user *)regs->ARM_fp - 1;
 
@@ -603,9 +602,41 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
 	struct stackframe fr;
 
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		/* We don't support guest os callchain now */
+		return;
+	}
+
 	fr.fp = regs->ARM_fp;
 	fr.sp = regs->ARM_sp;
 	fr.lr = regs->ARM_lr;
 	fr.pc = regs->ARM_pc;
 	walk_stackframe(&fr, callchain_trace, entry);
 }
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
+		return perf_guest_cbs->get_guest_ip();
+
+	return instruction_pointer(regs);
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+	int misc = 0;
+
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		if (perf_guest_cbs->is_user_mode())
+			misc |= PERF_RECORD_MISC_GUEST_USER;
+		else
+			misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+	} else {
+		if (user_mode(regs))
+			misc |= PERF_RECORD_MISC_USER;
+		else
+			misc |= PERF_RECORD_MISC_KERNEL;
+	}
+
+	return misc;
+}
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 8d7d8d4de9d6..9a4f6307a016 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -23,6 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
 
 #include <asm/cputype.h>
@@ -45,7 +46,7 @@ const char *perf_pmu_name(void)
 	if (!cpu_pmu)
 		return NULL;
 
-	return cpu_pmu->pmu.name;
+	return cpu_pmu->name;
 }
 EXPORT_SYMBOL_GPL(perf_pmu_name);
 
@@ -70,7 +71,7 @@ static struct pmu_hw_events *cpu_pmu_get_cpu_events(void)
 	return &__get_cpu_var(cpu_hw_events);
 }
 
-static void cpu_pmu_free_irq(void)
+static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
 {
 	int i, irq, irqs;
 	struct platform_device *pmu_device = cpu_pmu->plat_device;
@@ -86,7 +87,7 @@ static void cpu_pmu_free_irq(void)
 	}
 }
 
-static int cpu_pmu_request_irq(irq_handler_t handler)
+static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 {
 	int i, err, irq, irqs;
 	struct platform_device *pmu_device = cpu_pmu->plat_device;
@@ -147,7 +148,7 @@ static void __devinit cpu_pmu_init(struct arm_pmu *cpu_pmu)
 
 	/* Ensure the PMU has sane values out of reset. */
 	if (cpu_pmu && cpu_pmu->reset)
-		on_each_cpu(cpu_pmu->reset, NULL, 1);
+		on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
 }
 
 /*
@@ -163,7 +164,9 @@ static int __cpuinit cpu_pmu_notify(struct notifier_block *b,
 		return NOTIFY_DONE;
 
 	if (cpu_pmu && cpu_pmu->reset)
-		cpu_pmu->reset(NULL);
+		cpu_pmu->reset(cpu_pmu);
+	else
+		return NOTIFY_DONE;
 
 	return NOTIFY_OK;
 }
@@ -195,13 +198,13 @@ static struct platform_device_id __devinitdata cpu_pmu_plat_device_ids[] = {
 /*
  * CPU PMU identification and probing.
  */
-static struct arm_pmu *__devinit probe_current_pmu(void)
+static int __devinit probe_current_pmu(struct arm_pmu *pmu)
 {
-	struct arm_pmu *pmu = NULL;
 	int cpu = get_cpu();
 	unsigned long cpuid = read_cpuid_id();
 	unsigned long implementor = (cpuid & 0xFF000000) >> 24;
 	unsigned long part_number = (cpuid & 0xFFF0);
+	int ret = -ENODEV;
 
 	pr_info("probing PMU on CPU %d\n", cpu);
 
@@ -211,25 +214,25 @@ static struct arm_pmu *__devinit probe_current_pmu(void)
 		case 0xB360:	/* ARM1136 */
 		case 0xB560:	/* ARM1156 */
 		case 0xB760:	/* ARM1176 */
-			pmu = armv6pmu_init();
+			ret = armv6pmu_init(pmu);
 			break;
 		case 0xB020:	/* ARM11mpcore */
-			pmu = armv6mpcore_pmu_init();
+			ret = armv6mpcore_pmu_init(pmu);
 			break;
 		case 0xC080:	/* Cortex-A8 */
-			pmu = armv7_a8_pmu_init();
+			ret = armv7_a8_pmu_init(pmu);
 			break;
 		case 0xC090:	/* Cortex-A9 */
-			pmu = armv7_a9_pmu_init();
+			ret = armv7_a9_pmu_init(pmu);
 			break;
 		case 0xC050:	/* Cortex-A5 */
-			pmu = armv7_a5_pmu_init();
+			ret = armv7_a5_pmu_init(pmu);
 			break;
 		case 0xC0F0:	/* Cortex-A15 */
-			pmu = armv7_a15_pmu_init();
+			ret = armv7_a15_pmu_init(pmu);
 			break;
 		case 0xC070:	/* Cortex-A7 */
-			pmu = armv7_a7_pmu_init();
+			ret = armv7_a7_pmu_init(pmu);
 			break;
 		}
 	/* Intel CPUs [xscale]. */
@@ -237,43 +240,54 @@ static struct arm_pmu *__devinit probe_current_pmu(void)
 		part_number = (cpuid >> 13) & 0x7;
 		switch (part_number) {
 		case 1:
-			pmu = xscale1pmu_init();
+			ret = xscale1pmu_init(pmu);
 			break;
 		case 2:
-			pmu = xscale2pmu_init();
+			ret = xscale2pmu_init(pmu);
 			break;
 		}
 	}
 
 	put_cpu();
-	return pmu;
+	return ret;
 }
 
 static int __devinit cpu_pmu_device_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *of_id;
-	struct arm_pmu *(*init_fn)(void);
+	int (*init_fn)(struct arm_pmu *);
 	struct device_node *node = pdev->dev.of_node;
+	struct arm_pmu *pmu;
+	int ret = -ENODEV;
 
 	if (cpu_pmu) {
 		pr_info("attempt to register multiple PMU devices!");
 		return -ENOSPC;
 	}
 
+	pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
+	if (!pmu) {
+		pr_info("failed to allocate PMU device!");
+		return -ENOMEM;
+	}
+
 	if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) {
 		init_fn = of_id->data;
-		cpu_pmu = init_fn();
+		ret = init_fn(pmu);
 	} else {
-		cpu_pmu = probe_current_pmu();
+		ret = probe_current_pmu(pmu);
 	}
 
-	if (!cpu_pmu)
-		return -ENODEV;
+	if (ret) {
+		pr_info("failed to register PMU devices!");
+		kfree(pmu);
+		return ret;
+	}
 
+	cpu_pmu = pmu;
 	cpu_pmu->plat_device = pdev;
 	cpu_pmu_init(cpu_pmu);
-	register_cpu_notifier(&cpu_pmu_hotplug_notifier);
-	armpmu_register(cpu_pmu, cpu_pmu->name, PERF_TYPE_RAW);
+	armpmu_register(cpu_pmu, PERF_TYPE_RAW);
 
 	return 0;
 }
@@ -290,6 +304,16 @@ static struct platform_driver cpu_pmu_driver = {
 
 static int __init register_pmu_driver(void)
 {
-	return platform_driver_register(&cpu_pmu_driver);
+	int err;
+
+	err = register_cpu_notifier(&cpu_pmu_hotplug_notifier);
+	if (err)
+		return err;
+
+	err = platform_driver_register(&cpu_pmu_driver);
+	if (err)
+		unregister_cpu_notifier(&cpu_pmu_hotplug_notifier);
+
+	return err;
 }
 device_initcall(register_pmu_driver);
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index 6ccc07971745..f3e22ff8b6a2 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -401,9 +401,10 @@ armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
 	return ret;
 }
 
-static inline u32
-armv6pmu_read_counter(int counter)
+static inline u32 armv6pmu_read_counter(struct perf_event *event)
 {
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
 	unsigned long value = 0;
 
 	if (ARMV6_CYCLE_COUNTER == counter)
@@ -418,10 +419,11 @@ armv6pmu_read_counter(int counter)
 	return value;
 }
 
-static inline void
-armv6pmu_write_counter(int counter,
-		       u32 value)
+static inline void armv6pmu_write_counter(struct perf_event *event, u32 value)
 {
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
 	if (ARMV6_CYCLE_COUNTER == counter)
 		asm volatile("mcr   p15, 0, %0, c15, c12, 1" : : "r"(value));
 	else if (ARMV6_COUNTER0 == counter)
@@ -432,12 +434,13 @@ armv6pmu_write_counter(int counter,
 		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
 }
 
-static void
-armv6pmu_enable_event(struct hw_perf_event *hwc,
-		      int idx)
+static void armv6pmu_enable_event(struct perf_event *event)
 {
 	unsigned long val, mask, evt, flags;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	int idx = hwc->idx;
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
 		mask	= 0;
@@ -473,7 +476,8 @@ armv6pmu_handle_irq(int irq_num,
 {
 	unsigned long pmcr = armv6_pmcr_read();
 	struct perf_sample_data data;
-	struct pmu_hw_events *cpuc;
+	struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
+	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
 	struct pt_regs *regs;
 	int idx;
 
@@ -489,7 +493,6 @@ armv6pmu_handle_irq(int irq_num,
 	 */
 	armv6_pmcr_write(pmcr);
 
-	cpuc = &__get_cpu_var(cpu_hw_events);
 	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
@@ -506,13 +509,13 @@ armv6pmu_handle_irq(int irq_num,
 			continue;
 
 		hwc = &event->hw;
-		armpmu_event_update(event, hwc, idx);
+		armpmu_event_update(event);
 		perf_sample_data_init(&data, 0, hwc->last_period);
-		if (!armpmu_event_set_period(event, hwc, idx))
+		if (!armpmu_event_set_period(event))
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			cpu_pmu->disable(hwc, idx);
+			cpu_pmu->disable(event);
 	}
 
 	/*
@@ -527,8 +530,7 @@ armv6pmu_handle_irq(int irq_num,
 	return IRQ_HANDLED;
 }
 
-static void
-armv6pmu_start(void)
+static void armv6pmu_start(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
@@ -540,8 +542,7 @@ armv6pmu_start(void)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static void
-armv6pmu_stop(void)
+static void armv6pmu_stop(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
@@ -555,10 +556,11 @@ armv6pmu_stop(void)
 
 static int
 armv6pmu_get_event_idx(struct pmu_hw_events *cpuc,
-		       struct hw_perf_event *event)
+				struct perf_event *event)
 {
+	struct hw_perf_event *hwc = &event->hw;
 	/* Always place a cycle counter into the cycle counter. */
-	if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
+	if (ARMV6_PERFCTR_CPU_CYCLES == hwc->config_base) {
 		if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
 			return -EAGAIN;
 
@@ -579,12 +581,13 @@ armv6pmu_get_event_idx(struct pmu_hw_events *cpuc,
 	}
 }
 
-static void
-armv6pmu_disable_event(struct hw_perf_event *hwc,
-		       int idx)
+static void armv6pmu_disable_event(struct perf_event *event)
 {
 	unsigned long val, mask, evt, flags;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	int idx = hwc->idx;
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
 		mask	= ARMV6_PMCR_CCOUNT_IEN;
@@ -613,12 +616,13 @@ armv6pmu_disable_event(struct hw_perf_event *hwc,
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static void
-armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
-			      int idx)
+static void armv6mpcore_pmu_disable_event(struct perf_event *event)
 {
 	unsigned long val, mask, flags, evt = 0;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	int idx = hwc->idx;
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
 		mask	= ARMV6_PMCR_CCOUNT_IEN;
@@ -649,24 +653,22 @@ static int armv6_map_event(struct perf_event *event)
 				&armv6_perf_cache_map, 0xFF);
 }
 
-static struct arm_pmu armv6pmu = {
-	.name			= "v6",
-	.handle_irq		= armv6pmu_handle_irq,
-	.enable			= armv6pmu_enable_event,
-	.disable		= armv6pmu_disable_event,
-	.read_counter		= armv6pmu_read_counter,
-	.write_counter		= armv6pmu_write_counter,
-	.get_event_idx		= armv6pmu_get_event_idx,
-	.start			= armv6pmu_start,
-	.stop			= armv6pmu_stop,
-	.map_event		= armv6_map_event,
-	.num_events		= 3,
-	.max_period		= (1LLU << 32) - 1,
-};
-
-static struct arm_pmu *__devinit armv6pmu_init(void)
+static int __devinit armv6pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return &armv6pmu;
+	cpu_pmu->name		= "v6";
+	cpu_pmu->handle_irq	= armv6pmu_handle_irq;
+	cpu_pmu->enable		= armv6pmu_enable_event;
+	cpu_pmu->disable	= armv6pmu_disable_event;
+	cpu_pmu->read_counter	= armv6pmu_read_counter;
+	cpu_pmu->write_counter	= armv6pmu_write_counter;
+	cpu_pmu->get_event_idx	= armv6pmu_get_event_idx;
+	cpu_pmu->start		= armv6pmu_start;
+	cpu_pmu->stop		= armv6pmu_stop;
+	cpu_pmu->map_event	= armv6_map_event;
+	cpu_pmu->num_events	= 3;
+	cpu_pmu->max_period	= (1LLU << 32) - 1;
+
+	return 0;
 }
 
 /*
@@ -683,33 +685,31 @@ static int armv6mpcore_map_event(struct perf_event *event)
 				&armv6mpcore_perf_cache_map, 0xFF);
 }
 
-static struct arm_pmu armv6mpcore_pmu = {
-	.name			= "v6mpcore",
-	.handle_irq		= armv6pmu_handle_irq,
-	.enable			= armv6pmu_enable_event,
-	.disable		= armv6mpcore_pmu_disable_event,
-	.read_counter		= armv6pmu_read_counter,
-	.write_counter		= armv6pmu_write_counter,
-	.get_event_idx		= armv6pmu_get_event_idx,
-	.start			= armv6pmu_start,
-	.stop			= armv6pmu_stop,
-	.map_event		= armv6mpcore_map_event,
-	.num_events		= 3,
-	.max_period		= (1LLU << 32) - 1,
-};
-
-static struct arm_pmu *__devinit armv6mpcore_pmu_init(void)
+static int __devinit armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return &armv6mpcore_pmu;
+	cpu_pmu->name		= "v6mpcore";
+	cpu_pmu->handle_irq	= armv6pmu_handle_irq;
+	cpu_pmu->enable		= armv6pmu_enable_event;
+	cpu_pmu->disable	= armv6mpcore_pmu_disable_event;
+	cpu_pmu->read_counter	= armv6pmu_read_counter;
+	cpu_pmu->write_counter	= armv6pmu_write_counter;
+	cpu_pmu->get_event_idx	= armv6pmu_get_event_idx;
+	cpu_pmu->start		= armv6pmu_start;
+	cpu_pmu->stop		= armv6pmu_stop;
+	cpu_pmu->map_event	= armv6mpcore_map_event;
+	cpu_pmu->num_events	= 3;
+	cpu_pmu->max_period	= (1LLU << 32) - 1;
+
+	return 0;
 }
 #else
-static struct arm_pmu *__devinit armv6pmu_init(void)
+static int armv6pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return NULL;
+	return -ENODEV;
 }
 
-static struct arm_pmu *__devinit armv6mpcore_pmu_init(void)
+static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return NULL;
+	return -ENODEV;
 }
 #endif	/* CONFIG_CPU_V6 || CONFIG_CPU_V6K */
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index bd4b090ebcfd..7d0cce85d17e 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -18,8 +18,6 @@
 
 #ifdef CONFIG_CPU_V7
 
-static struct arm_pmu armv7pmu;
-
 /*
  * Common ARMv7 event types
  *
@@ -738,7 +736,8 @@ static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
  */
 #define	ARMV7_IDX_CYCLE_COUNTER	0
 #define	ARMV7_IDX_COUNTER0	1
-#define	ARMV7_IDX_COUNTER_LAST	(ARMV7_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
+#define	ARMV7_IDX_COUNTER_LAST(cpu_pmu) \
+	(ARMV7_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
 
 #define	ARMV7_MAX_COUNTERS	32
 #define	ARMV7_COUNTER_MASK	(ARMV7_MAX_COUNTERS - 1)
@@ -804,49 +803,34 @@ static inline int armv7_pmnc_has_overflowed(u32 pmnc)
 	return pmnc & ARMV7_OVERFLOWED_MASK;
 }
 
-static inline int armv7_pmnc_counter_valid(int idx)
+static inline int armv7_pmnc_counter_valid(struct arm_pmu *cpu_pmu, int idx)
 {
-	return idx >= ARMV7_IDX_CYCLE_COUNTER && idx <= ARMV7_IDX_COUNTER_LAST;
+	return idx >= ARMV7_IDX_CYCLE_COUNTER &&
+		idx <= ARMV7_IDX_COUNTER_LAST(cpu_pmu);
 }
 
 static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx)
 {
-	int ret = 0;
-	u32 counter;
-
-	if (!armv7_pmnc_counter_valid(idx)) {
-		pr_err("CPU%u checking wrong counter %d overflow status\n",
-			smp_processor_id(), idx);
-	} else {
-		counter = ARMV7_IDX_TO_COUNTER(idx);
-		ret = pmnc & BIT(counter);
-	}
-
-	return ret;
+	return pmnc & BIT(ARMV7_IDX_TO_COUNTER(idx));
 }
 
 static inline int armv7_pmnc_select_counter(int idx)
 {
-	u32 counter;
-
-	if (!armv7_pmnc_counter_valid(idx)) {
-		pr_err("CPU%u selecting wrong PMNC counter %d\n",
-			smp_processor_id(), idx);
-		return -EINVAL;
-	}
-
-	counter = ARMV7_IDX_TO_COUNTER(idx);
+	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
 	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (counter));
 	isb();
 
 	return idx;
 }
 
-static inline u32 armv7pmu_read_counter(int idx)
+static inline u32 armv7pmu_read_counter(struct perf_event *event)
 {
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
 	u32 value = 0;
 
-	if (!armv7_pmnc_counter_valid(idx))
+	if (!armv7_pmnc_counter_valid(cpu_pmu, idx))
 		pr_err("CPU%u reading wrong counter %d\n",
 			smp_processor_id(), idx);
 	else if (idx == ARMV7_IDX_CYCLE_COUNTER)
@@ -857,9 +841,13 @@ static inline u32 armv7pmu_read_counter(int idx)
 	return value;
 }
 
-static inline void armv7pmu_write_counter(int idx, u32 value)
+static inline void armv7pmu_write_counter(struct perf_event *event, u32 value)
 {
-	if (!armv7_pmnc_counter_valid(idx))
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (!armv7_pmnc_counter_valid(cpu_pmu, idx))
 		pr_err("CPU%u writing wrong counter %d\n",
 			smp_processor_id(), idx);
 	else if (idx == ARMV7_IDX_CYCLE_COUNTER)
@@ -878,60 +866,28 @@ static inline void armv7_pmnc_write_evtsel(int idx, u32 val)
 
 static inline int armv7_pmnc_enable_counter(int idx)
 {
-	u32 counter;
-
-	if (!armv7_pmnc_counter_valid(idx)) {
-		pr_err("CPU%u enabling wrong PMNC counter %d\n",
-			smp_processor_id(), idx);
-		return -EINVAL;
-	}
-
-	counter = ARMV7_IDX_TO_COUNTER(idx);
+	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
 	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(counter)));
 	return idx;
 }
 
 static inline int armv7_pmnc_disable_counter(int idx)
 {
-	u32 counter;
-
-	if (!armv7_pmnc_counter_valid(idx)) {
-		pr_err("CPU%u disabling wrong PMNC counter %d\n",
-			smp_processor_id(), idx);
-		return -EINVAL;
-	}
-
-	counter = ARMV7_IDX_TO_COUNTER(idx);
+	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
 	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(counter)));
 	return idx;
 }
 
 static inline int armv7_pmnc_enable_intens(int idx)
 {
-	u32 counter;
-
-	if (!armv7_pmnc_counter_valid(idx)) {
-		pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n",
-			smp_processor_id(), idx);
-		return -EINVAL;
-	}
-
-	counter = ARMV7_IDX_TO_COUNTER(idx);
+	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
 	asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(counter)));
 	return idx;
 }
 
 static inline int armv7_pmnc_disable_intens(int idx)
 {
-	u32 counter;
-
-	if (!armv7_pmnc_counter_valid(idx)) {
-		pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n",
-			smp_processor_id(), idx);
-		return -EINVAL;
-	}
-
-	counter = ARMV7_IDX_TO_COUNTER(idx);
+	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
 	asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter)));
 	isb();
 	/* Clear the overflow flag in case an interrupt is pending. */
@@ -956,7 +912,7 @@ static inline u32 armv7_pmnc_getreset_flags(void)
 }
 
 #ifdef DEBUG
-static void armv7_pmnc_dump_regs(void)
+static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu)
 {
 	u32 val;
 	unsigned int cnt;
@@ -981,7 +937,8 @@ static void armv7_pmnc_dump_regs(void)
 	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
 	printk(KERN_INFO "CCNT  =0x%08x\n", val);
 
-	for (cnt = ARMV7_IDX_COUNTER0; cnt <= ARMV7_IDX_COUNTER_LAST; cnt++) {
+	for (cnt = ARMV7_IDX_COUNTER0;
+			cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
 		armv7_pmnc_select_counter(cnt);
 		asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
 		printk(KERN_INFO "CNT[%d] count =0x%08x\n",
@@ -993,10 +950,19 @@ static void armv7_pmnc_dump_regs(void)
 }
 #endif
 
-static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
+static void armv7pmu_enable_event(struct perf_event *event)
 {
 	unsigned long flags;
+	struct hw_perf_event *hwc = &event->hw;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	int idx = hwc->idx;
+
+	if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+		pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n",
+			smp_processor_id(), idx);
+		return;
+	}
 
 	/*
 	 * Enable counter and interrupt, and set the counter to count
@@ -1014,7 +980,7 @@ static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
 	 * We only need to set the event for the cycle counter if we
 	 * have the ability to perform event filtering.
 	 */
-	if (armv7pmu.set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER)
+	if (cpu_pmu->set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER)
 		armv7_pmnc_write_evtsel(idx, hwc->config_base);
 
 	/*
@@ -1030,10 +996,19 @@ static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
+static void armv7pmu_disable_event(struct perf_event *event)
 {
 	unsigned long flags;
+	struct hw_perf_event *hwc = &event->hw;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	int idx = hwc->idx;
+
+	if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+		pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n",
+			smp_processor_id(), idx);
+		return;
+	}
 
 	/*
 	 * Disable counter and interrupt
@@ -1057,7 +1032,8 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 {
 	u32 pmnc;
 	struct perf_sample_data data;
-	struct pmu_hw_events *cpuc;
+	struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
+	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
 	struct pt_regs *regs;
 	int idx;
 
@@ -1077,7 +1053,6 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 	 */
 	regs = get_irq_regs();
 
-	cpuc = &__get_cpu_var(cpu_hw_events);
 	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
@@ -1094,13 +1069,13 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 			continue;
 
 		hwc = &event->hw;
-		armpmu_event_update(event, hwc, idx);
+		armpmu_event_update(event);
 		perf_sample_data_init(&data, 0, hwc->last_period);
-		if (!armpmu_event_set_period(event, hwc, idx))
+		if (!armpmu_event_set_period(event))
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			cpu_pmu->disable(hwc, idx);
+			cpu_pmu->disable(event);
 	}
 
 	/*
@@ -1115,7 +1090,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 	return IRQ_HANDLED;
 }
 
-static void armv7pmu_start(void)
+static void armv7pmu_start(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
@@ -1126,7 +1101,7 @@ static void armv7pmu_start(void)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static void armv7pmu_stop(void)
+static void armv7pmu_stop(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
@@ -1138,10 +1113,12 @@ static void armv7pmu_stop(void)
 }
 
 static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc,
-				  struct hw_perf_event *event)
+				  struct perf_event *event)
 {
 	int idx;
-	unsigned long evtype = event->config_base & ARMV7_EVTYPE_EVENT;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned long evtype = hwc->config_base & ARMV7_EVTYPE_EVENT;
 
 	/* Always place a cycle counter into the cycle counter. */
 	if (evtype == ARMV7_PERFCTR_CPU_CYCLES) {
@@ -1192,11 +1169,14 @@ static int armv7pmu_set_event_filter(struct hw_perf_event *event,
 
 static void armv7pmu_reset(void *info)
 {
+	struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
 	u32 idx, nb_cnt = cpu_pmu->num_events;
 
 	/* The counter and interrupt enable registers are unknown at reset. */
-	for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx)
-		armv7pmu_disable_event(NULL, idx);
+	for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
+		armv7_pmnc_disable_counter(idx);
+		armv7_pmnc_disable_intens(idx);
+	}
 
 	/* Initialize & Reset PMNC: C and P bits */
 	armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
@@ -1232,17 +1212,18 @@ static int armv7_a7_map_event(struct perf_event *event)
 				&armv7_a7_perf_cache_map, 0xFF);
 }
 
-static struct arm_pmu armv7pmu = {
-	.handle_irq		= armv7pmu_handle_irq,
-	.enable			= armv7pmu_enable_event,
-	.disable		= armv7pmu_disable_event,
-	.read_counter		= armv7pmu_read_counter,
-	.write_counter		= armv7pmu_write_counter,
-	.get_event_idx		= armv7pmu_get_event_idx,
-	.start			= armv7pmu_start,
-	.stop			= armv7pmu_stop,
-	.reset			= armv7pmu_reset,
-	.max_period		= (1LLU << 32) - 1,
+static void armv7pmu_init(struct arm_pmu *cpu_pmu)
+{
+	cpu_pmu->handle_irq	= armv7pmu_handle_irq;
+	cpu_pmu->enable		= armv7pmu_enable_event;
+	cpu_pmu->disable	= armv7pmu_disable_event;
+	cpu_pmu->read_counter	= armv7pmu_read_counter;
+	cpu_pmu->write_counter	= armv7pmu_write_counter;
+	cpu_pmu->get_event_idx	= armv7pmu_get_event_idx;
+	cpu_pmu->start		= armv7pmu_start;
+	cpu_pmu->stop		= armv7pmu_stop;
+	cpu_pmu->reset		= armv7pmu_reset;
+	cpu_pmu->max_period	= (1LLU << 32) - 1;
 };
 
 static u32 __devinit armv7_read_num_pmnc_events(void)
@@ -1256,70 +1237,75 @@ static u32 __devinit armv7_read_num_pmnc_events(void)
 	return nb_cnt + 1;
 }
 
-static struct arm_pmu *__devinit armv7_a8_pmu_init(void)
+static int __devinit armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	armv7pmu.name		= "ARMv7 Cortex-A8";
-	armv7pmu.map_event	= armv7_a8_map_event;
-	armv7pmu.num_events	= armv7_read_num_pmnc_events();
-	return &armv7pmu;
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "ARMv7 Cortex-A8";
+	cpu_pmu->map_event	= armv7_a8_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	return 0;
 }
 
-static struct arm_pmu *__devinit armv7_a9_pmu_init(void)
+static int __devinit armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	armv7pmu.name		= "ARMv7 Cortex-A9";
-	armv7pmu.map_event	= armv7_a9_map_event;
-	armv7pmu.num_events	= armv7_read_num_pmnc_events();
-	return &armv7pmu;
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "ARMv7 Cortex-A9";
+	cpu_pmu->map_event	= armv7_a9_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	return 0;
 }
 
-static struct arm_pmu *__devinit armv7_a5_pmu_init(void)
+static int __devinit armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	armv7pmu.name		= "ARMv7 Cortex-A5";
-	armv7pmu.map_event	= armv7_a5_map_event;
-	armv7pmu.num_events	= armv7_read_num_pmnc_events();
-	return &armv7pmu;
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "ARMv7 Cortex-A5";
+	cpu_pmu->map_event	= armv7_a5_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	return 0;
 }
 
-static struct arm_pmu *__devinit armv7_a15_pmu_init(void)
+static int __devinit armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	armv7pmu.name		= "ARMv7 Cortex-A15";
-	armv7pmu.map_event	= armv7_a15_map_event;
-	armv7pmu.num_events	= armv7_read_num_pmnc_events();
-	armv7pmu.set_event_filter = armv7pmu_set_event_filter;
-	return &armv7pmu;
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "ARMv7 Cortex-A15";
+	cpu_pmu->map_event	= armv7_a15_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
+	return 0;
 }
 
-static struct arm_pmu *__devinit armv7_a7_pmu_init(void)
+static int __devinit armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	armv7pmu.name		= "ARMv7 Cortex-A7";
-	armv7pmu.map_event	= armv7_a7_map_event;
-	armv7pmu.num_events	= armv7_read_num_pmnc_events();
-	armv7pmu.set_event_filter = armv7pmu_set_event_filter;
-	return &armv7pmu;
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "ARMv7 Cortex-A7";
+	cpu_pmu->map_event	= armv7_a7_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
+	return 0;
 }
 #else
-static struct arm_pmu *__devinit armv7_a8_pmu_init(void)
+static inline int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return NULL;
+	return -ENODEV;
 }
 
-static struct arm_pmu *__devinit armv7_a9_pmu_init(void)
+static inline int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return NULL;
+	return -ENODEV;
 }
 
-static struct arm_pmu *__devinit armv7_a5_pmu_init(void)
+static inline int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return NULL;
+	return -ENODEV;
 }
 
-static struct arm_pmu *__devinit armv7_a15_pmu_init(void)
+static inline int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return NULL;
+	return -ENODEV;
 }
 
-static struct arm_pmu *__devinit armv7_a7_pmu_init(void)
+static inline int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return NULL;
+	return -ENODEV;
 }
 #endif	/* CONFIG_CPU_V7 */
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index 426e19f380a2..0c8265e53d5f 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -224,7 +224,8 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 {
 	unsigned long pmnc;
 	struct perf_sample_data data;
-	struct pmu_hw_events *cpuc;
+	struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
+	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
 	struct pt_regs *regs;
 	int idx;
 
@@ -248,7 +249,6 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 
 	regs = get_irq_regs();
 
-	cpuc = &__get_cpu_var(cpu_hw_events);
 	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
@@ -260,13 +260,13 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 			continue;
 
 		hwc = &event->hw;
-		armpmu_event_update(event, hwc, idx);
+		armpmu_event_update(event);
 		perf_sample_data_init(&data, 0, hwc->last_period);
-		if (!armpmu_event_set_period(event, hwc, idx))
+		if (!armpmu_event_set_period(event))
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			cpu_pmu->disable(hwc, idx);
+			cpu_pmu->disable(event);
 	}
 
 	irq_work_run();
@@ -280,11 +280,13 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 	return IRQ_HANDLED;
 }
 
-static void
-xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
+static void xscale1pmu_enable_event(struct perf_event *event)
 {
 	unsigned long val, mask, evt, flags;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	int idx = hwc->idx;
 
 	switch (idx) {
 	case XSCALE_CYCLE_COUNTER:
@@ -314,11 +316,13 @@ xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static void
-xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
+static void xscale1pmu_disable_event(struct perf_event *event)
 {
 	unsigned long val, mask, evt, flags;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	int idx = hwc->idx;
 
 	switch (idx) {
 	case XSCALE_CYCLE_COUNTER:
@@ -348,9 +352,10 @@ xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
 
 static int
 xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc,
-			struct hw_perf_event *event)
+				struct perf_event *event)
 {
-	if (XSCALE_PERFCTR_CCNT == event->config_base) {
+	struct hw_perf_event *hwc = &event->hw;
+	if (XSCALE_PERFCTR_CCNT == hwc->config_base) {
 		if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
 			return -EAGAIN;
 
@@ -366,8 +371,7 @@ xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc,
 	}
 }
 
-static void
-xscale1pmu_start(void)
+static void xscale1pmu_start(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
@@ -379,8 +383,7 @@ xscale1pmu_start(void)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static void
-xscale1pmu_stop(void)
+static void xscale1pmu_stop(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
@@ -392,9 +395,10 @@ xscale1pmu_stop(void)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static inline u32
-xscale1pmu_read_counter(int counter)
+static inline u32 xscale1pmu_read_counter(struct perf_event *event)
 {
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
 	u32 val = 0;
 
 	switch (counter) {
@@ -412,9 +416,11 @@ xscale1pmu_read_counter(int counter)
 	return val;
 }
 
-static inline void
-xscale1pmu_write_counter(int counter, u32 val)
+static inline void xscale1pmu_write_counter(struct perf_event *event, u32 val)
 {
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
 	switch (counter) {
 	case XSCALE_CYCLE_COUNTER:
 		asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
@@ -434,24 +440,22 @@ static int xscale_map_event(struct perf_event *event)
 				&xscale_perf_cache_map, 0xFF);
 }
 
-static struct arm_pmu xscale1pmu = {
-	.name		= "xscale1",
-	.handle_irq	= xscale1pmu_handle_irq,
-	.enable		= xscale1pmu_enable_event,
-	.disable	= xscale1pmu_disable_event,
-	.read_counter	= xscale1pmu_read_counter,
-	.write_counter	= xscale1pmu_write_counter,
-	.get_event_idx	= xscale1pmu_get_event_idx,
-	.start		= xscale1pmu_start,
-	.stop		= xscale1pmu_stop,
-	.map_event	= xscale_map_event,
-	.num_events	= 3,
-	.max_period	= (1LLU << 32) - 1,
-};
-
-static struct arm_pmu *__devinit xscale1pmu_init(void)
+static int __devinit xscale1pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return &xscale1pmu;
+	cpu_pmu->name		= "xscale1";
+	cpu_pmu->handle_irq	= xscale1pmu_handle_irq;
+	cpu_pmu->enable		= xscale1pmu_enable_event;
+	cpu_pmu->disable	= xscale1pmu_disable_event;
+	cpu_pmu->read_counter	= xscale1pmu_read_counter;
+	cpu_pmu->write_counter	= xscale1pmu_write_counter;
+	cpu_pmu->get_event_idx	= xscale1pmu_get_event_idx;
+	cpu_pmu->start		= xscale1pmu_start;
+	cpu_pmu->stop		= xscale1pmu_stop;
+	cpu_pmu->map_event	= xscale_map_event;
+	cpu_pmu->num_events	= 3;
+	cpu_pmu->max_period	= (1LLU << 32) - 1;
+
+	return 0;
 }
 
 #define XSCALE2_OVERFLOWED_MASK	0x01f
@@ -567,7 +571,8 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 {
 	unsigned long pmnc, of_flags;
 	struct perf_sample_data data;
-	struct pmu_hw_events *cpuc;
+	struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
+	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
 	struct pt_regs *regs;
 	int idx;
 
@@ -585,7 +590,6 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 
 	regs = get_irq_regs();
 
-	cpuc = &__get_cpu_var(cpu_hw_events);
 	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
@@ -597,13 +601,13 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 			continue;
 
 		hwc = &event->hw;
-		armpmu_event_update(event, hwc, idx);
+		armpmu_event_update(event);
 		perf_sample_data_init(&data, 0, hwc->last_period);
-		if (!armpmu_event_set_period(event, hwc, idx))
+		if (!armpmu_event_set_period(event))
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			cpu_pmu->disable(hwc, idx);
+			cpu_pmu->disable(event);
 	}
 
 	irq_work_run();
@@ -617,11 +621,13 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 	return IRQ_HANDLED;
 }
 
-static void
-xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
+static void xscale2pmu_enable_event(struct perf_event *event)
 {
 	unsigned long flags, ien, evtsel;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	int idx = hwc->idx;
 
 	ien = xscale2pmu_read_int_enable();
 	evtsel = xscale2pmu_read_event_select();
@@ -661,11 +667,13 @@ xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static void
-xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
+static void xscale2pmu_disable_event(struct perf_event *event)
 {
 	unsigned long flags, ien, evtsel, of_flags;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	int idx = hwc->idx;
 
 	ien = xscale2pmu_read_int_enable();
 	evtsel = xscale2pmu_read_event_select();
@@ -713,7 +721,7 @@ xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
 
 static int
 xscale2pmu_get_event_idx(struct pmu_hw_events *cpuc,
-			struct hw_perf_event *event)
+				struct perf_event *event)
 {
 	int idx = xscale1pmu_get_event_idx(cpuc, event);
 	if (idx >= 0)
@@ -727,8 +735,7 @@ out:
 	return idx;
 }
 
-static void
-xscale2pmu_start(void)
+static void xscale2pmu_start(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
@@ -740,8 +747,7 @@ xscale2pmu_start(void)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static void
-xscale2pmu_stop(void)
+static void xscale2pmu_stop(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
@@ -753,9 +759,10 @@ xscale2pmu_stop(void)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static inline u32
-xscale2pmu_read_counter(int counter)
+static inline u32 xscale2pmu_read_counter(struct perf_event *event)
 {
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
 	u32 val = 0;
 
 	switch (counter) {
@@ -779,9 +786,11 @@ xscale2pmu_read_counter(int counter)
 	return val;
 }
 
-static inline void
-xscale2pmu_write_counter(int counter, u32 val)
+static inline void xscale2pmu_write_counter(struct perf_event *event, u32 val)
 {
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
 	switch (counter) {
 	case XSCALE_CYCLE_COUNTER:
 		asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
@@ -801,33 +810,31 @@ xscale2pmu_write_counter(int counter, u32 val)
 	}
 }
 
-static struct arm_pmu xscale2pmu = {
-	.name		= "xscale2",
-	.handle_irq	= xscale2pmu_handle_irq,
-	.enable		= xscale2pmu_enable_event,
-	.disable	= xscale2pmu_disable_event,
-	.read_counter	= xscale2pmu_read_counter,
-	.write_counter	= xscale2pmu_write_counter,
-	.get_event_idx	= xscale2pmu_get_event_idx,
-	.start		= xscale2pmu_start,
-	.stop		= xscale2pmu_stop,
-	.map_event	= xscale_map_event,
-	.num_events	= 5,
-	.max_period	= (1LLU << 32) - 1,
-};
-
-static struct arm_pmu *__devinit xscale2pmu_init(void)
+static int __devinit xscale2pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return &xscale2pmu;
+	cpu_pmu->name		= "xscale2";
+	cpu_pmu->handle_irq	= xscale2pmu_handle_irq;
+	cpu_pmu->enable		= xscale2pmu_enable_event;
+	cpu_pmu->disable	= xscale2pmu_disable_event;
+	cpu_pmu->read_counter	= xscale2pmu_read_counter;
+	cpu_pmu->write_counter	= xscale2pmu_write_counter;
+	cpu_pmu->get_event_idx	= xscale2pmu_get_event_idx;
+	cpu_pmu->start		= xscale2pmu_start;
+	cpu_pmu->stop		= xscale2pmu_stop;
+	cpu_pmu->map_event	= xscale_map_event;
+	cpu_pmu->num_events	= 5;
+	cpu_pmu->max_period	= (1LLU << 32) - 1;
+
+	return 0;
 }
 #else
-static struct arm_pmu *__devinit xscale1pmu_init(void)
+static inline int xscale1pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return NULL;
+	return -ENODEV;
 }
 
-static struct arm_pmu *__devinit xscale2pmu_init(void)
+static inline int xscale2pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return NULL;
+	return -ENODEV;
 }
 #endif	/* CONFIG_CPU_XSCALE */
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 90084a6de35a..44bc0b327e2b 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -34,6 +34,7 @@
 #include <linux/leds.h>
 
 #include <asm/cacheflush.h>
+#include <asm/idmap.h>
 #include <asm/processor.h>
 #include <asm/thread_notify.h>
 #include <asm/stacktrace.h>
@@ -56,8 +57,6 @@ static const char *isa_modes[] = {
   "ARM" , "Thumb" , "Jazelle", "ThumbEE"
 };
 
-extern void setup_mm_for_reboot(void);
-
 static volatile int hlt_counter;
 
 void disable_hlt(void)
@@ -70,6 +69,7 @@ EXPORT_SYMBOL(disable_hlt);
 void enable_hlt(void)
 {
 	hlt_counter--;
+	BUG_ON(hlt_counter < 0);
 }
 
 EXPORT_SYMBOL(enable_hlt);
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 739db3a1b2d2..03deeffd9f6d 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -916,16 +916,11 @@ enum ptrace_syscall_dir {
 	PTRACE_SYSCALL_EXIT,
 };
 
-static int ptrace_syscall_trace(struct pt_regs *regs, int scno,
-				enum ptrace_syscall_dir dir)
+static int tracehook_report_syscall(struct pt_regs *regs,
+				    enum ptrace_syscall_dir dir)
 {
 	unsigned long ip;
 
-	current_thread_info()->syscall = scno;
-
-	if (!test_thread_flag(TIF_SYSCALL_TRACE))
-		return scno;
-
 	/*
 	 * IP is used to denote syscall entry/exit:
 	 * IP = 0 -> entry, =1 -> exit
@@ -944,19 +939,41 @@ static int ptrace_syscall_trace(struct pt_regs *regs, int scno,
 
 asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
 {
-	scno = ptrace_syscall_trace(regs, scno, PTRACE_SYSCALL_ENTER);
+	current_thread_info()->syscall = scno;
+
+	/* Do the secure computing check first; failures should be fast. */
+	if (secure_computing(scno) == -1)
+		return -1;
+
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		scno = tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
+
 	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
 		trace_sys_enter(regs, scno);
+
 	audit_syscall_entry(AUDIT_ARCH_ARM, scno, regs->ARM_r0, regs->ARM_r1,
 			    regs->ARM_r2, regs->ARM_r3);
+
 	return scno;
 }
 
-asmlinkage int syscall_trace_exit(struct pt_regs *regs, int scno)
+asmlinkage void syscall_trace_exit(struct pt_regs *regs)
 {
-	scno = ptrace_syscall_trace(regs, scno, PTRACE_SYSCALL_EXIT);
-	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
-		trace_sys_exit(regs, scno);
+	/*
+	 * Audit the syscall before anything else, as a debugger may
+	 * come in and change the current registers.
+	 */
 	audit_syscall_exit(regs);
-	return scno;
+
+	/*
+	 * Note that we haven't updated the ->syscall field for the
+	 * current thread. This isn't a problem because it will have
+	 * been set on syscall entry and there hasn't been an opportunity
+	 * for a PTRACE_SET_SYSCALL since then.
+	 */
+	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+		trace_sys_exit(regs, regs_return_value(regs));
+
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT);
 }
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index da1d1aa20ad9..9a89bf4aefe1 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -383,6 +383,12 @@ void cpu_init(void)
 		BUG();
 	}
 
+	/*
+	 * This only works on resume and secondary cores. For booting on the
+	 * boot cpu, smp_prepare_boot_cpu is called after percpu area setup.
+	 */
+	set_my_cpu_offset(per_cpu_offset(cpu));
+
 	cpu_proc_init();
 
 	/*
@@ -426,13 +432,14 @@ int __cpu_logical_map[NR_CPUS];
 void __init smp_setup_processor_id(void)
 {
 	int i;
-	u32 cpu = is_smp() ? read_cpuid_mpidr() & 0xff : 0;
+	u32 mpidr = is_smp() ? read_cpuid_mpidr() & MPIDR_HWID_BITMASK : 0;
+	u32 cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
 
 	cpu_logical_map(0) = cpu;
-	for (i = 1; i < NR_CPUS; ++i)
+	for (i = 1; i < nr_cpu_ids; ++i)
 		cpu_logical_map(i) = i == cpu ? 0 : i;
 
-	printk(KERN_INFO "Booting Linux on physical CPU %d\n", cpu);
+	printk(KERN_INFO "Booting Linux on physical CPU 0x%x\n", mpidr);
 }
 
 static void __init setup_processor(void)
@@ -758,6 +765,7 @@ void __init setup_arch(char **cmdline_p)
 
 	unflatten_device_tree();
 
+	arm_dt_init_cpu_maps();
 #ifdef CONFIG_SMP
 	if (is_smp()) {
 		smp_set_ops(mdesc->smp);
@@ -841,12 +849,9 @@ static const char *hwcap_str[] = {
 
 static int c_show(struct seq_file *m, void *v)
 {
-	int i;
+	int i, j;
+	u32 cpuid;
 
-	seq_printf(m, "Processor\t: %s rev %d (%s)\n",
-		   cpu_name, read_cpuid_id() & 15, elf_platform);
-
-#if defined(CONFIG_SMP)
 	for_each_online_cpu(i) {
 		/*
 		 * glibc reads /proc/cpuinfo to determine the number of
@@ -854,45 +859,48 @@ static int c_show(struct seq_file *m, void *v)
 		 * "processor".  Give glibc what it expects.
 		 */
 		seq_printf(m, "processor\t: %d\n", i);
-		seq_printf(m, "BogoMIPS\t: %lu.%02lu\n\n",
+		cpuid = is_smp() ? per_cpu(cpu_data, i).cpuid : read_cpuid_id();
+		seq_printf(m, "model name\t: %s rev %d (%s)\n",
+			   cpu_name, cpuid & 15, elf_platform);
+
+#if defined(CONFIG_SMP)
+		seq_printf(m, "BogoMIPS\t: %lu.%02lu\n",
 			   per_cpu(cpu_data, i).loops_per_jiffy / (500000UL/HZ),
 			   (per_cpu(cpu_data, i).loops_per_jiffy / (5000UL/HZ)) % 100);
-	}
-#else /* CONFIG_SMP */
-	seq_printf(m, "BogoMIPS\t: %lu.%02lu\n",
-		   loops_per_jiffy / (500000/HZ),
-		   (loops_per_jiffy / (5000/HZ)) % 100);
+#else
+		seq_printf(m, "BogoMIPS\t: %lu.%02lu\n",
+			   loops_per_jiffy / (500000/HZ),
+			   (loops_per_jiffy / (5000/HZ)) % 100);
 #endif
+		/* dump out the processor features */
+		seq_puts(m, "Features\t: ");
 
-	/* dump out the processor features */
-	seq_puts(m, "Features\t: ");
-
-	for (i = 0; hwcap_str[i]; i++)
-		if (elf_hwcap & (1 << i))
-			seq_printf(m, "%s ", hwcap_str[i]);
+		for (j = 0; hwcap_str[j]; j++)
+			if (elf_hwcap & (1 << j))
+				seq_printf(m, "%s ", hwcap_str[j]);
 
-	seq_printf(m, "\nCPU implementer\t: 0x%02x\n", read_cpuid_id() >> 24);
-	seq_printf(m, "CPU architecture: %s\n", proc_arch[cpu_architecture()]);
+		seq_printf(m, "\nCPU implementer\t: 0x%02x\n", cpuid >> 24);
+		seq_printf(m, "CPU architecture: %s\n",
+			   proc_arch[cpu_architecture()]);
 
-	if ((read_cpuid_id() & 0x0008f000) == 0x00000000) {
-		/* pre-ARM7 */
-		seq_printf(m, "CPU part\t: %07x\n", read_cpuid_id() >> 4);
-	} else {
-		if ((read_cpuid_id() & 0x0008f000) == 0x00007000) {
-			/* ARM7 */
-			seq_printf(m, "CPU variant\t: 0x%02x\n",
-				   (read_cpuid_id() >> 16) & 127);
+		if ((cpuid & 0x0008f000) == 0x00000000) {
+			/* pre-ARM7 */
+			seq_printf(m, "CPU part\t: %07x\n", cpuid >> 4);
 		} else {
-			/* post-ARM7 */
-			seq_printf(m, "CPU variant\t: 0x%x\n",
-				   (read_cpuid_id() >> 20) & 15);
+			if ((cpuid & 0x0008f000) == 0x00007000) {
+				/* ARM7 */
+				seq_printf(m, "CPU variant\t: 0x%02x\n",
+					   (cpuid >> 16) & 127);
+			} else {
+				/* post-ARM7 */
+				seq_printf(m, "CPU variant\t: 0x%x\n",
+					   (cpuid >> 20) & 15);
+			}
+			seq_printf(m, "CPU part\t: 0x%03x\n",
+				   (cpuid >> 4) & 0xfff);
 		}
-		seq_printf(m, "CPU part\t: 0x%03x\n",
-			   (read_cpuid_id() >> 4) & 0xfff);
+		seq_printf(m, "CPU revision\t: %d\n\n", cpuid & 15);
 	}
-	seq_printf(m, "CPU revision\t: %d\n", read_cpuid_id() & 15);
-
-	seq_puts(m, "\n");
 
 	seq_printf(m, "Hardware\t: %s\n", machine_name);
 	seq_printf(m, "Revision\t: %04x\n", system_rev);
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index fbc8b2623d82..84f4cbf652e5 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -281,6 +281,7 @@ static void __cpuinit smp_store_cpu_info(unsigned int cpuid)
 	struct cpuinfo_arm *cpu_info = &per_cpu(cpu_data, cpuid);
 
 	cpu_info->loops_per_jiffy = loops_per_jiffy;
+	cpu_info->cpuid = read_cpuid_id();
 
 	store_cpu_topology(cpuid);
 }
@@ -313,9 +314,10 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
 	current->active_mm = mm;
 	cpumask_set_cpu(cpu, mm_cpumask(mm));
 
+	cpu_init();
+
 	printk("CPU%u: Booted secondary processor\n", cpu);
 
-	cpu_init();
 	preempt_disable();
 	trace_hardirqs_off();
 
@@ -371,6 +373,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
 
 void __init smp_prepare_boot_cpu(void)
 {
+	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
 }
 
 void __init smp_prepare_cpus(unsigned int max_cpus)
@@ -421,6 +424,11 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 	smp_cross_call(mask, IPI_CALL_FUNC);
 }
 
+void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
+{
+	smp_cross_call(mask, IPI_WAKEUP);
+}
+
 void arch_send_call_function_single_ipi(int cpu)
 {
 	smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
@@ -443,7 +451,7 @@ void show_ipi_list(struct seq_file *p, int prec)
 	for (i = 0; i < NR_IPI; i++) {
 		seq_printf(p, "%*s%u: ", prec - 1, "IPI", i);
 
-		for_each_present_cpu(cpu)
+		for_each_online_cpu(cpu)
 			seq_printf(p, "%10u ",
 				   __get_irq_stat(cpu, ipi_irqs[i]));
 
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index b22d700fea27..ff07879ad95d 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -31,6 +31,8 @@ static void __iomem *twd_base;
 
 static struct clk *twd_clk;
 static unsigned long twd_timer_rate;
+static bool common_setup_called;
+static DEFINE_PER_CPU(bool, percpu_setup_called);
 
 static struct clock_event_device __percpu **twd_evt;
 static int twd_ppi;
@@ -248,17 +250,9 @@ static struct clk *twd_get_clock(void)
 		return clk;
 	}
 
-	err = clk_prepare(clk);
+	err = clk_prepare_enable(clk);
 	if (err) {
-		pr_err("smp_twd: clock failed to prepare: %d\n", err);
-		clk_put(clk);
-		return ERR_PTR(err);
-	}
-
-	err = clk_enable(clk);
-	if (err) {
-		pr_err("smp_twd: clock failed to enable: %d\n", err);
-		clk_unprepare(clk);
+		pr_err("smp_twd: clock failed to prepare+enable: %d\n", err);
 		clk_put(clk);
 		return ERR_PTR(err);
 	}
@@ -272,15 +266,45 @@ static struct clk *twd_get_clock(void)
 static int __cpuinit twd_timer_setup(struct clock_event_device *clk)
 {
 	struct clock_event_device **this_cpu_clk;
+	int cpu = smp_processor_id();
+
+	/*
+	 * If the basic setup for this CPU has been done before don't
+	 * bother with the below.
+	 */
+	if (per_cpu(percpu_setup_called, cpu)) {
+		__raw_writel(0, twd_base + TWD_TIMER_CONTROL);
+		clockevents_register_device(*__this_cpu_ptr(twd_evt));
+		enable_percpu_irq(clk->irq, 0);
+		return 0;
+	}
+	per_cpu(percpu_setup_called, cpu) = true;
 
-	if (!twd_clk)
+	/*
+	 * This stuff only need to be done once for the entire TWD cluster
+	 * during the runtime of the system.
+	 */
+	if (!common_setup_called) {
 		twd_clk = twd_get_clock();
 
-	if (!IS_ERR_OR_NULL(twd_clk))
-		twd_timer_rate = clk_get_rate(twd_clk);
-	else
-		twd_calibrate_rate();
+		/*
+		 * We use IS_ERR_OR_NULL() here, because if the clock stubs
+		 * are active we will get a valid clk reference which is
+		 * however NULL and will return the rate 0. In that case we
+		 * need to calibrate the rate instead.
+		 */
+		if (!IS_ERR_OR_NULL(twd_clk))
+			twd_timer_rate = clk_get_rate(twd_clk);
+		else
+			twd_calibrate_rate();
+
+		common_setup_called = true;
+	}
 
+	/*
+	 * The following is done once per CPU the first time .setup() is
+	 * called.
+	 */
 	__raw_writel(0, twd_base + TWD_TIMER_CONTROL);
 
 	clk->name = "local_timer";
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 26c12c6440fc..79282ebcd939 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -196,32 +196,7 @@ static inline void parse_dt_topology(void) {}
 static inline void update_cpu_power(unsigned int cpuid, unsigned int mpidr) {}
 #endif
 
-
-/*
- * cpu topology management
- */
-
-#define MPIDR_SMP_BITMASK (0x3 << 30)
-#define MPIDR_SMP_VALUE (0x2 << 30)
-
-#define MPIDR_MT_BITMASK (0x1 << 24)
-
-/*
- * These masks reflect the current use of the affinity levels.
- * The affinity level can be up to 16 bits according to ARM ARM
- */
-#define MPIDR_HWID_BITMASK 0xFFFFFF
-
-#define MPIDR_LEVEL0_MASK 0x3
-#define MPIDR_LEVEL0_SHIFT 0
-
-#define MPIDR_LEVEL1_MASK 0xF
-#define MPIDR_LEVEL1_SHIFT 8
-
-#define MPIDR_LEVEL2_MASK 0xFF
-#define MPIDR_LEVEL2_SHIFT 16
-
-/*
+ /*
  * cpu topology table
  */
 struct cputopo_arm cpu_topology[NR_CPUS];
@@ -282,19 +257,14 @@ void store_cpu_topology(unsigned int cpuid)
 
 		if (mpidr & MPIDR_MT_BITMASK) {
 			/* core performance interdependency */
-			cpuid_topo->thread_id = (mpidr >> MPIDR_LEVEL0_SHIFT)
-				& MPIDR_LEVEL0_MASK;
-			cpuid_topo->core_id = (mpidr >> MPIDR_LEVEL1_SHIFT)
-				& MPIDR_LEVEL1_MASK;
-			cpuid_topo->socket_id = (mpidr >> MPIDR_LEVEL2_SHIFT)
-				& MPIDR_LEVEL2_MASK;
+			cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+			cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+			cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
 		} else {
 			/* largely independent cores */
 			cpuid_topo->thread_id = -1;
-			cpuid_topo->core_id = (mpidr >> MPIDR_LEVEL0_SHIFT)
-				& MPIDR_LEVEL0_MASK;
-			cpuid_topo->socket_id = (mpidr >> MPIDR_LEVEL1_SHIFT)
-				& MPIDR_LEVEL1_MASK;
+			cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+			cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
 		}
 	} else {
 		/*
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 36ff15bbfdd4..b9f38e388b43 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -114,6 +114,15 @@ SECTIONS
 
 	RO_DATA(PAGE_SIZE)
 
+	. = ALIGN(4);
+	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
+		__start___ex_table = .;
+#ifdef CONFIG_MMU
+		*(__ex_table)
+#endif
+		__stop___ex_table = .;
+	}
+
 #ifdef CONFIG_ARM_UNWIND
 	/*
 	 * Stack unwinding tables
@@ -220,16 +229,6 @@ SECTIONS
 		READ_MOSTLY_DATA(L1_CACHE_BYTES)
 
 		/*
-		 * The exception fixup table (might need resorting at runtime)
-		 */
-		. = ALIGN(4);
-		__start___ex_table = .;
-#ifdef CONFIG_MMU
-		*(__ex_table)
-#endif
-		__stop___ex_table = .;
-
-		/*
 		 * and the usual data section
 		 */
 		DATA_DATA
diff --git a/arch/arm/mach-integrator/impd1.c b/arch/arm/mach-integrator/impd1.c
index b3d86d7081a0..9f82f9dcbb98 100644
--- a/arch/arm/mach-integrator/impd1.c
+++ b/arch/arm/mach-integrator/impd1.c
@@ -342,9 +342,10 @@ static int impd1_probe(struct lm_device *dev)
 
 		pc_base = dev->resource.start + idev->offset;
 		snprintf(devname, 32, "lm%x:%5.5lx", dev->id, idev->offset >> 12);
-		d = amba_ahb_device_add(&dev->dev, devname, pc_base, SZ_4K,
-					dev->irq, dev->irq,
-					idev->platform_data, idev->id);
+		d = amba_ahb_device_add_res(&dev->dev, devname, pc_base, SZ_4K,
+					    dev->irq, dev->irq,
+					    idev->platform_data, idev->id,
+					    &dev->resource);
 		if (IS_ERR(d)) {
 			dev_err(&dev->dev, "unable to register device: %ld\n", PTR_ERR(d));
 			continue;
diff --git a/arch/arm/mach-ixp4xx/include/mach/udc.h b/arch/arm/mach-ixp4xx/include/mach/udc.h
index 80d6da2eafac..7bd8b96c8843 100644
--- a/arch/arm/mach-ixp4xx/include/mach/udc.h
+++ b/arch/arm/mach-ixp4xx/include/mach/udc.h
@@ -2,7 +2,7 @@
  * arch/arm/mach-ixp4xx/include/mach/udc.h
  *
  */
-#include <asm/mach/udc_pxa2xx.h>
+#include <linux/platform_data/pxa2xx_udc.h>
 
 extern void ixp4xx_set_udc_info(struct pxa2xx_udc_mach_info *info);
 
diff --git a/arch/arm/mach-omap2/pmu.c b/arch/arm/mach-omap2/pmu.c
index 2a791766283d..031e2fbd0e19 100644
--- a/arch/arm/mach-omap2/pmu.c
+++ b/arch/arm/mach-omap2/pmu.c
@@ -57,8 +57,6 @@ static int __init omap2_init_pmu(unsigned oh_num, char *oh_names[])
 	if (IS_ERR(omap_pmu_dev))
 		return PTR_ERR(omap_pmu_dev);
 
-	pm_runtime_enable(&omap_pmu_dev->dev);
-
 	return 0;
 }
 
diff --git a/arch/arm/mach-pxa/include/mach/udc.h b/arch/arm/mach-pxa/include/mach/udc.h
index 2f82332e81a0..9a827e32db98 100644
--- a/arch/arm/mach-pxa/include/mach/udc.h
+++ b/arch/arm/mach-pxa/include/mach/udc.h
@@ -2,7 +2,7 @@
  * arch/arm/mach-pxa/include/mach/udc.h
  *
  */
-#include <asm/mach/udc_pxa2xx.h>
+#include <linux/platform_data/pxa2xx_udc.h>
 
 extern void pxa_set_udc_info(struct pxa2xx_udc_mach_info *info);
 
diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c
index d3b3cd216d64..28511d43637a 100644
--- a/arch/arm/mach-realview/realview_eb.c
+++ b/arch/arm/mach-realview/realview_eb.c
@@ -467,6 +467,7 @@ static void __init realview_eb_init(void)
 MACHINE_START(REALVIEW_EB, "ARM-RealView EB")
 	/* Maintainer: ARM Ltd/Deep Blue Solutions Ltd */
 	.atag_offset	= 0x100,
+	.smp		= smp_ops(realview_smp_ops),
 	.fixup		= realview_fixup,
 	.map_io		= realview_eb_map_io,
 	.init_early	= realview_init_early,
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index 6a7ad3c2a3fc..9a23739f7026 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/ioport.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/serial_core.h>
 #include <linux/mfd/ucb1x00.h>
 #include <linux/mtd/mtd.h>
@@ -37,7 +38,6 @@
 #include <asm/mach/flash.h>
 #include <asm/mach/irda.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 #include <mach/assabet.h>
 #include <linux/platform_data/mfd-mcp-sa11x0.h>
 #include <mach/irqs.h>
diff --git a/arch/arm/mach-sa1100/badge4.c b/arch/arm/mach-sa1100/badge4.c
index 038df4894b0f..b2dadf3ea3df 100644
--- a/arch/arm/mach-sa1100/badge4.c
+++ b/arch/arm/mach-sa1100/badge4.c
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/tty.h>
@@ -34,7 +35,6 @@
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
 #include <asm/hardware/sa1111.h>
-#include <asm/mach/serial_sa1100.h>
 
 #include <mach/badge4.h>
 
diff --git a/arch/arm/mach-sa1100/cerf.c b/arch/arm/mach-sa1100/cerf.c
index ad0eb08ea077..304bca4a07c0 100644
--- a/arch/arm/mach-sa1100/cerf.c
+++ b/arch/arm/mach-sa1100/cerf.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/tty.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/irq.h>
 #include <linux/mtd/mtd.h>
@@ -27,7 +28,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 
 #include <mach/cerf.h>
 #include <linux/platform_data/mfd-mcp-sa11x0.h>
diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c
index 170cb6107f68..45f424f5fca6 100644
--- a/arch/arm/mach-sa1100/collie.c
+++ b/arch/arm/mach-sa1100/collie.c
@@ -21,6 +21,7 @@
 #include <linux/kernel.h>
 #include <linux/tty.h>
 #include <linux/delay.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/ucb1x00.h>
 #include <linux/mtd/mtd.h>
@@ -40,7 +41,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 
 #include <asm/hardware/scoop.h>
 #include <asm/mach/sharpsl_param.h>
diff --git a/arch/arm/mach-sa1100/h3xxx.c b/arch/arm/mach-sa1100/h3xxx.c
index 63150e1ffe9e..f17e7382242a 100644
--- a/arch/arm/mach-sa1100/h3xxx.c
+++ b/arch/arm/mach-sa1100/h3xxx.c
@@ -17,12 +17,12 @@
 #include <linux/mfd/htc-egpio.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/serial_core.h>
 
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 
 #include <mach/h3xxx.h>
 
diff --git a/arch/arm/mach-sa1100/hackkit.c b/arch/arm/mach-sa1100/hackkit.c
index fc106aab7c7e..d005939c41fc 100644
--- a/arch/arm/mach-sa1100/hackkit.c
+++ b/arch/arm/mach-sa1100/hackkit.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/cpufreq.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/serial_core.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
@@ -35,7 +36,6 @@
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
 #include <asm/mach/irq.h>
-#include <asm/mach/serial_sa1100.h>
 
 #include <mach/hardware.h>
 #include <mach/irqs.h>
diff --git a/arch/arm/mach-sa1100/jornada720.c b/arch/arm/mach-sa1100/jornada720.c
index e3084f47027d..35cfc428b4d4 100644
--- a/arch/arm/mach-sa1100/jornada720.c
+++ b/arch/arm/mach-sa1100/jornada720.c
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/tty.h>
 #include <linux/delay.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/ioport.h>
 #include <linux/mtd/mtd.h>
@@ -30,7 +31,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 
 #include <mach/hardware.h>
 #include <mach/irqs.h>
diff --git a/arch/arm/mach-sa1100/lart.c b/arch/arm/mach-sa1100/lart.c
index 3048b17e84c5..f69f78fc3ddd 100644
--- a/arch/arm/mach-sa1100/lart.c
+++ b/arch/arm/mach-sa1100/lart.c
@@ -4,6 +4,7 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/tty.h>
 #include <linux/gpio.h>
 #include <linux/leds.h>
@@ -18,7 +19,6 @@
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 #include <linux/platform_data/mfd-mcp-sa11x0.h>
 #include <mach/irqs.h>
 
diff --git a/arch/arm/mach-sa1100/nanoengine.c b/arch/arm/mach-sa1100/nanoengine.c
index 41f69d97066f..102e08f7b109 100644
--- a/arch/arm/mach-sa1100/nanoengine.c
+++ b/arch/arm/mach-sa1100/nanoengine.c
@@ -13,6 +13,7 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/root_dev.h>
@@ -24,7 +25,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 
 #include <mach/hardware.h>
 #include <mach/nanoengine.h>
diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c
index 266db873a4e4..88be0474f3d7 100644
--- a/arch/arm/mach-sa1100/neponset.c
+++ b/arch/arm/mach-sa1100/neponset.c
@@ -7,6 +7,7 @@
 #include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/serial_core.h>
@@ -14,7 +15,6 @@
 
 #include <asm/mach-types.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 #include <asm/hardware/sa1111.h>
 #include <asm/sizes.h>
 
diff --git a/arch/arm/mach-sa1100/pleb.c b/arch/arm/mach-sa1100/pleb.c
index 37fe0a0a5369..c51bb63f90fb 100644
--- a/arch/arm/mach-sa1100/pleb.c
+++ b/arch/arm/mach-sa1100/pleb.c
@@ -6,6 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/tty.h>
 #include <linux/ioport.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/irq.h>
 #include <linux/io.h>
@@ -18,7 +19,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 #include <asm/mach/flash.h>
-#include <asm/mach/serial_sa1100.h>
 #include <mach/irqs.h>
 
 #include "generic.h"
diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c
index ff6b7b35bca9..6460d25fbb88 100644
--- a/arch/arm/mach-sa1100/shannon.c
+++ b/arch/arm/mach-sa1100/shannon.c
@@ -5,6 +5,7 @@
 #include <linux/init.h>
 #include <linux/device.h>
 #include <linux/kernel.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/tty.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
@@ -18,7 +19,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 #include <linux/platform_data/mfd-mcp-sa11x0.h>
 #include <mach/shannon.h>
 #include <mach/irqs.h>
diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c
index 71790e581d93..6d65f65fcb23 100644
--- a/arch/arm/mach-sa1100/simpad.c
+++ b/arch/arm/mach-sa1100/simpad.c
@@ -9,6 +9,7 @@
 #include <linux/proc_fs.h>
 #include <linux/string.h>
 #include <linux/pm.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/ucb1x00.h>
 #include <linux/mtd/mtd.h>
@@ -23,7 +24,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 #include <linux/platform_data/mfd-mcp-sa11x0.h>
 #include <mach/simpad.h>
 #include <mach/irqs.h>
diff --git a/arch/arm/mm/cache-aurora-l2.h b/arch/arm/mm/cache-aurora-l2.h
new file mode 100644
index 000000000000..c86124769831
--- /dev/null
+++ b/arch/arm/mm/cache-aurora-l2.h
@@ -0,0 +1,55 @@
+/*
+ * AURORA shared L2 cache controller support
+ *
+ * Copyright (C) 2012 Marvell
+ *
+ * Yehuda Yitschak <yehuday@marvell.com>
+ * Gregory CLEMENT <gregory.clement@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef __ASM_ARM_HARDWARE_AURORA_L2_H
+#define __ASM_ARM_HARDWARE_AURORA_L2_H
+
+#define AURORA_SYNC_REG		    0x700
+#define AURORA_RANGE_BASE_ADDR_REG  0x720
+#define AURORA_FLUSH_PHY_ADDR_REG   0x7f0
+#define AURORA_INVAL_RANGE_REG	    0x774
+#define AURORA_CLEAN_RANGE_REG	    0x7b4
+#define AURORA_FLUSH_RANGE_REG	    0x7f4
+
+#define AURORA_ACR_REPLACEMENT_OFFSET	    27
+#define AURORA_ACR_REPLACEMENT_MASK	     \
+	(0x3 << AURORA_ACR_REPLACEMENT_OFFSET)
+#define AURORA_ACR_REPLACEMENT_TYPE_WAYRR    \
+	(0 << AURORA_ACR_REPLACEMENT_OFFSET)
+#define AURORA_ACR_REPLACEMENT_TYPE_LFSR     \
+	(1 << AURORA_ACR_REPLACEMENT_OFFSET)
+#define AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU \
+	(3 << AURORA_ACR_REPLACEMENT_OFFSET)
+
+#define AURORA_ACR_FORCE_WRITE_POLICY_OFFSET	0
+#define AURORA_ACR_FORCE_WRITE_POLICY_MASK	\
+	(0x3 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET)
+#define AURORA_ACR_FORCE_WRITE_POLICY_DIS	\
+	(0 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET)
+#define AURORA_ACR_FORCE_WRITE_BACK_POLICY	\
+	(1 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET)
+#define AURORA_ACR_FORCE_WRITE_THRO_POLICY	\
+	(2 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET)
+
+#define MAX_RANGE_SIZE		1024
+
+#define AURORA_WAY_SIZE_SHIFT	2
+
+#define AURORA_CTRL_FW		0x100
+
+/* chose a number outside L2X0_CACHE_ID_PART_MASK to be sure to make
+ * the distinction between a number coming from hardware and a number
+ * coming from the device tree */
+#define AURORA_CACHE_ID	       0x100
+
+#endif /* __ASM_ARM_HARDWARE_AURORA_L2_H */
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 8a97e6443c62..6911b8b2745c 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -25,6 +25,7 @@
 
 #include <asm/cacheflush.h>
 #include <asm/hardware/cache-l2x0.h>
+#include "cache-aurora-l2.h"
 
 #define CACHE_LINE_SIZE		32
 
@@ -34,14 +35,20 @@ static u32 l2x0_way_mask;	/* Bitmask of active ways */
 static u32 l2x0_size;
 static unsigned long sync_reg_offset = L2X0_CACHE_SYNC;
 
+/* Aurora don't have the cache ID register available, so we have to
+ * pass it though the device tree */
+static u32  cache_id_part_number_from_dt;
+
 struct l2x0_regs l2x0_saved_regs;
 
 struct l2x0_of_data {
 	void (*setup)(const struct device_node *, u32 *, u32 *);
 	void (*save)(void);
-	void (*resume)(void);
+	struct outer_cache_fns outer_cache;
 };
 
+static bool of_init = false;
+
 static inline void cache_wait_way(void __iomem *reg, unsigned long mask)
 {
 	/* wait for cache operation by line or way to complete */
@@ -168,7 +175,7 @@ static void l2x0_inv_all(void)
 	/* invalidate all ways */
 	raw_spin_lock_irqsave(&l2x0_lock, flags);
 	/* Invalidating when L2 is enabled is a nono */
-	BUG_ON(readl(l2x0_base + L2X0_CTRL) & 1);
+	BUG_ON(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN);
 	writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_INV_WAY);
 	cache_wait_way(l2x0_base + L2X0_INV_WAY, l2x0_way_mask);
 	cache_sync();
@@ -292,11 +299,18 @@ static void l2x0_unlock(u32 cache_id)
 	int lockregs;
 	int i;
 
-	if (cache_id == L2X0_CACHE_ID_PART_L310)
+	switch (cache_id) {
+	case L2X0_CACHE_ID_PART_L310:
 		lockregs = 8;
-	else
+		break;
+	case AURORA_CACHE_ID:
+		lockregs = 4;
+		break;
+	default:
 		/* L210 and unknown types */
 		lockregs = 1;
+		break;
+	}
 
 	for (i = 0; i < lockregs; i++) {
 		writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_D_BASE +
@@ -312,18 +326,22 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
 	u32 cache_id;
 	u32 way_size = 0;
 	int ways;
+	int way_size_shift = L2X0_WAY_SIZE_SHIFT;
 	const char *type;
 
 	l2x0_base = base;
-
-	cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
+	if (cache_id_part_number_from_dt)
+		cache_id = cache_id_part_number_from_dt;
+	else
+		cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID)
+			& L2X0_CACHE_ID_PART_MASK;
 	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
 
 	aux &= aux_mask;
 	aux |= aux_val;
 
 	/* Determine the number of ways */
-	switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
+	switch (cache_id) {
 	case L2X0_CACHE_ID_PART_L310:
 		if (aux & (1 << 16))
 			ways = 16;
@@ -340,6 +358,14 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
 		ways = (aux >> 13) & 0xf;
 		type = "L210";
 		break;
+
+	case AURORA_CACHE_ID:
+		sync_reg_offset = AURORA_SYNC_REG;
+		ways = (aux >> 13) & 0xf;
+		ways = 2 << ((ways + 1) >> 2);
+		way_size_shift = AURORA_WAY_SIZE_SHIFT;
+		type = "Aurora";
+		break;
 	default:
 		/* Assume unknown chips have 8 ways */
 		ways = 8;
@@ -353,7 +379,8 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
 	 * L2 cache Size =  Way size * Number of ways
 	 */
 	way_size = (aux & L2X0_AUX_CTRL_WAY_SIZE_MASK) >> 17;
-	way_size = 1 << (way_size + 3);
+	way_size = 1 << (way_size + way_size_shift);
+
 	l2x0_size = ways * way_size * SZ_1K;
 
 	/*
@@ -361,7 +388,7 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
 	 * If you are booting from non-secure mode
 	 * accessing the below registers will fault.
 	 */
-	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
+	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
 		/* Make sure that I&D is not locked down when starting */
 		l2x0_unlock(cache_id);
 
@@ -371,7 +398,7 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
 		l2x0_inv_all();
 
 		/* enable L2X0 */
-		writel_relaxed(1, l2x0_base + L2X0_CTRL);
+		writel_relaxed(L2X0_CTRL_EN, l2x0_base + L2X0_CTRL);
 	}
 
 	/* Re-read it in case some bits are reserved. */
@@ -380,13 +407,15 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
 	/* Save the value for resuming. */
 	l2x0_saved_regs.aux_ctrl = aux;
 
-	outer_cache.inv_range = l2x0_inv_range;
-	outer_cache.clean_range = l2x0_clean_range;
-	outer_cache.flush_range = l2x0_flush_range;
-	outer_cache.sync = l2x0_cache_sync;
-	outer_cache.flush_all = l2x0_flush_all;
-	outer_cache.inv_all = l2x0_inv_all;
-	outer_cache.disable = l2x0_disable;
+	if (!of_init) {
+		outer_cache.inv_range = l2x0_inv_range;
+		outer_cache.clean_range = l2x0_clean_range;
+		outer_cache.flush_range = l2x0_flush_range;
+		outer_cache.sync = l2x0_cache_sync;
+		outer_cache.flush_all = l2x0_flush_all;
+		outer_cache.inv_all = l2x0_inv_all;
+		outer_cache.disable = l2x0_disable;
+	}
 
 	printk(KERN_INFO "%s cache controller enabled\n", type);
 	printk(KERN_INFO "l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d B\n",
@@ -394,6 +423,100 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
 }
 
 #ifdef CONFIG_OF
+static int l2_wt_override;
+
+/*
+ * Note that the end addresses passed to Linux primitives are
+ * noninclusive, while the hardware cache range operations use
+ * inclusive start and end addresses.
+ */
+static unsigned long calc_range_end(unsigned long start, unsigned long end)
+{
+	/*
+	 * Limit the number of cache lines processed at once,
+	 * since cache range operations stall the CPU pipeline
+	 * until completion.
+	 */
+	if (end > start + MAX_RANGE_SIZE)
+		end = start + MAX_RANGE_SIZE;
+
+	/*
+	 * Cache range operations can't straddle a page boundary.
+	 */
+	if (end > PAGE_ALIGN(start+1))
+		end = PAGE_ALIGN(start+1);
+
+	return end;
+}
+
+/*
+ * Make sure 'start' and 'end' reference the same page, as L2 is PIPT
+ * and range operations only do a TLB lookup on the start address.
+ */
+static void aurora_pa_range(unsigned long start, unsigned long end,
+			unsigned long offset)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
+	writel(start, l2x0_base + AURORA_RANGE_BASE_ADDR_REG);
+	writel(end, l2x0_base + offset);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+
+	cache_sync();
+}
+
+static void aurora_inv_range(unsigned long start, unsigned long end)
+{
+	/*
+	 * round start and end adresses up to cache line size
+	 */
+	start &= ~(CACHE_LINE_SIZE - 1);
+	end = ALIGN(end, CACHE_LINE_SIZE);
+
+	/*
+	 * Invalidate all full cache lines between 'start' and 'end'.
+	 */
+	while (start < end) {
+		unsigned long range_end = calc_range_end(start, end);
+		aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
+				AURORA_INVAL_RANGE_REG);
+		start = range_end;
+	}
+}
+
+static void aurora_clean_range(unsigned long start, unsigned long end)
+{
+	/*
+	 * If L2 is forced to WT, the L2 will always be clean and we
+	 * don't need to do anything here.
+	 */
+	if (!l2_wt_override) {
+		start &= ~(CACHE_LINE_SIZE - 1);
+		end = ALIGN(end, CACHE_LINE_SIZE);
+		while (start != end) {
+			unsigned long range_end = calc_range_end(start, end);
+			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
+					AURORA_CLEAN_RANGE_REG);
+			start = range_end;
+		}
+	}
+}
+
+static void aurora_flush_range(unsigned long start, unsigned long end)
+{
+	if (!l2_wt_override) {
+		start &= ~(CACHE_LINE_SIZE - 1);
+		end = ALIGN(end, CACHE_LINE_SIZE);
+		while (start != end) {
+			unsigned long range_end = calc_range_end(start, end);
+			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
+					AURORA_FLUSH_RANGE_REG);
+			start = range_end;
+		}
+	}
+}
+
 static void __init l2x0_of_setup(const struct device_node *np,
 				 u32 *aux_val, u32 *aux_mask)
 {
@@ -491,9 +614,15 @@ static void __init pl310_save(void)
 	}
 }
 
+static void aurora_save(void)
+{
+	l2x0_saved_regs.ctrl = readl_relaxed(l2x0_base + L2X0_CTRL);
+	l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
+}
+
 static void l2x0_resume(void)
 {
-	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
+	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
 		/* restore aux ctrl and enable l2 */
 		l2x0_unlock(readl_relaxed(l2x0_base + L2X0_CACHE_ID));
 
@@ -502,7 +631,7 @@ static void l2x0_resume(void)
 
 		l2x0_inv_all();
 
-		writel_relaxed(1, l2x0_base + L2X0_CTRL);
+		writel_relaxed(L2X0_CTRL_EN, l2x0_base + L2X0_CTRL);
 	}
 }
 
@@ -510,7 +639,7 @@ static void pl310_resume(void)
 {
 	u32 l2x0_revision;
 
-	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
+	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
 		/* restore pl310 setup */
 		writel_relaxed(l2x0_saved_regs.tag_latency,
 			l2x0_base + L2X0_TAG_LATENCY_CTRL);
@@ -536,22 +665,108 @@ static void pl310_resume(void)
 	l2x0_resume();
 }
 
+static void aurora_resume(void)
+{
+	if (!(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
+		writel(l2x0_saved_regs.aux_ctrl, l2x0_base + L2X0_AUX_CTRL);
+		writel(l2x0_saved_regs.ctrl, l2x0_base + L2X0_CTRL);
+	}
+}
+
+static void __init aurora_broadcast_l2_commands(void)
+{
+	__u32 u;
+	/* Enable Broadcasting of cache commands to L2*/
+	__asm__ __volatile__("mrc p15, 1, %0, c15, c2, 0" : "=r"(u));
+	u |= AURORA_CTRL_FW;		/* Set the FW bit */
+	__asm__ __volatile__("mcr p15, 1, %0, c15, c2, 0\n" : : "r"(u));
+	isb();
+}
+
+static void __init aurora_of_setup(const struct device_node *np,
+				u32 *aux_val, u32 *aux_mask)
+{
+	u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU;
+	u32 mask =  AURORA_ACR_REPLACEMENT_MASK;
+
+	of_property_read_u32(np, "cache-id-part",
+			&cache_id_part_number_from_dt);
+
+	/* Determine and save the write policy */
+	l2_wt_override = of_property_read_bool(np, "wt-override");
+
+	if (l2_wt_override) {
+		val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY;
+		mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK;
+	}
+
+	*aux_val &= ~mask;
+	*aux_val |= val;
+	*aux_mask &= ~mask;
+}
+
 static const struct l2x0_of_data pl310_data = {
-	pl310_of_setup,
-	pl310_save,
-	pl310_resume,
+	.setup = pl310_of_setup,
+	.save  = pl310_save,
+	.outer_cache = {
+		.resume      = pl310_resume,
+		.inv_range   = l2x0_inv_range,
+		.clean_range = l2x0_clean_range,
+		.flush_range = l2x0_flush_range,
+		.sync        = l2x0_cache_sync,
+		.flush_all   = l2x0_flush_all,
+		.inv_all     = l2x0_inv_all,
+		.disable     = l2x0_disable,
+		.set_debug   = pl310_set_debug,
+	},
 };
 
 static const struct l2x0_of_data l2x0_data = {
-	l2x0_of_setup,
-	NULL,
-	l2x0_resume,
+	.setup = l2x0_of_setup,
+	.save  = NULL,
+	.outer_cache = {
+		.resume      = l2x0_resume,
+		.inv_range   = l2x0_inv_range,
+		.clean_range = l2x0_clean_range,
+		.flush_range = l2x0_flush_range,
+		.sync        = l2x0_cache_sync,
+		.flush_all   = l2x0_flush_all,
+		.inv_all     = l2x0_inv_all,
+		.disable     = l2x0_disable,
+	},
+};
+
+static const struct l2x0_of_data aurora_with_outer_data = {
+	.setup = aurora_of_setup,
+	.save  = aurora_save,
+	.outer_cache = {
+		.resume      = aurora_resume,
+		.inv_range   = aurora_inv_range,
+		.clean_range = aurora_clean_range,
+		.flush_range = aurora_flush_range,
+		.sync        = l2x0_cache_sync,
+		.flush_all   = l2x0_flush_all,
+		.inv_all     = l2x0_inv_all,
+		.disable     = l2x0_disable,
+	},
+};
+
+static const struct l2x0_of_data aurora_no_outer_data = {
+	.setup = aurora_of_setup,
+	.save  = aurora_save,
+	.outer_cache = {
+		.resume      = aurora_resume,
+	},
 };
 
 static const struct of_device_id l2x0_ids[] __initconst = {
 	{ .compatible = "arm,pl310-cache", .data = (void *)&pl310_data },
 	{ .compatible = "arm,l220-cache", .data = (void *)&l2x0_data },
 	{ .compatible = "arm,l210-cache", .data = (void *)&l2x0_data },
+	{ .compatible = "marvell,aurora-system-cache",
+	  .data = (void *)&aurora_no_outer_data},
+	{ .compatible = "marvell,aurora-outer-cache",
+	  .data = (void *)&aurora_with_outer_data},
 	{}
 };
 
@@ -577,17 +792,24 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
 	data = of_match_node(l2x0_ids, np)->data;
 
 	/* L2 configuration can only be changed if the cache is disabled */
-	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
+	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
 		if (data->setup)
 			data->setup(np, &aux_val, &aux_mask);
+
+		/* For aurora cache in no outer mode select the
+		 * correct mode using the coprocessor*/
+		if (data == &aurora_no_outer_data)
+			aurora_broadcast_l2_commands();
 	}
 
 	if (data->save)
 		data->save();
 
+	of_init = true;
 	l2x0_init(l2x0_base, aux_val, aux_mask);
 
-	outer_cache.resume = data->resume;
+	memcpy(&outer_cache, &data->outer_cache, sizeof(outer_cache));
+
 	return 0;
 }
 #endif
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 4e07eec1270d..bc4a5e9ebb78 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -2,6 +2,9 @@
  *  linux/arch/arm/mm/context.c
  *
  *  Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved.
+ *  Copyright (C) 2012 ARM Limited
+ *
+ *  Author: Will Deacon <will.deacon@arm.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -14,14 +17,40 @@
 #include <linux/percpu.h>
 
 #include <asm/mmu_context.h>
+#include <asm/smp_plat.h>
 #include <asm/thread_notify.h>
 #include <asm/tlbflush.h>
 
+/*
+ * On ARMv6, we have the following structure in the Context ID:
+ *
+ * 31                         7          0
+ * +-------------------------+-----------+
+ * |      process ID         |   ASID    |
+ * +-------------------------+-----------+
+ * |              context ID             |
+ * +-------------------------------------+
+ *
+ * The ASID is used to tag entries in the CPU caches and TLBs.
+ * The context ID is used by debuggers and trace logic, and
+ * should be unique within all running processes.
+ */
+#define ASID_FIRST_VERSION	(1ULL << ASID_BITS)
+#define NUM_USER_ASIDS		(ASID_FIRST_VERSION - 1)
+
+#define ASID_TO_IDX(asid)	((asid & ~ASID_MASK) - 1)
+#define IDX_TO_ASID(idx)	((idx + 1) & ~ASID_MASK)
+
 static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
-unsigned int cpu_last_asid = ASID_FIRST_VERSION;
+static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION);
+static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS);
+
+static DEFINE_PER_CPU(atomic64_t, active_asids);
+static DEFINE_PER_CPU(u64, reserved_asids);
+static cpumask_t tlb_flush_pending;
 
 #ifdef CONFIG_ARM_LPAE
-void cpu_set_reserved_ttbr0(void)
+static void cpu_set_reserved_ttbr0(void)
 {
 	unsigned long ttbl = __pa(swapper_pg_dir);
 	unsigned long ttbh = 0;
@@ -37,7 +66,7 @@ void cpu_set_reserved_ttbr0(void)
 	isb();
 }
 #else
-void cpu_set_reserved_ttbr0(void)
+static void cpu_set_reserved_ttbr0(void)
 {
 	u32 ttb;
 	/* Copy TTBR1 into TTBR0 */
@@ -84,124 +113,104 @@ static int __init contextidr_notifier_init(void)
 arch_initcall(contextidr_notifier_init);
 #endif
 
-/*
- * We fork()ed a process, and we need a new context for the child
- * to run in.
- */
-void __init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+static void flush_context(unsigned int cpu)
 {
-	mm->context.id = 0;
-	raw_spin_lock_init(&mm->context.id_lock);
-}
+	int i;
+	u64 asid;
+
+	/* Update the list of reserved ASIDs and the ASID bitmap. */
+	bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
+	for_each_possible_cpu(i) {
+		if (i == cpu) {
+			asid = 0;
+		} else {
+			asid = atomic64_xchg(&per_cpu(active_asids, i), 0);
+			__set_bit(ASID_TO_IDX(asid), asid_map);
+		}
+		per_cpu(reserved_asids, i) = asid;
+	}
 
-static void flush_context(void)
-{
-	cpu_set_reserved_ttbr0();
-	local_flush_tlb_all();
-	if (icache_is_vivt_asid_tagged()) {
+	/* Queue a TLB invalidate and flush the I-cache if necessary. */
+	if (!tlb_ops_need_broadcast())
+		cpumask_set_cpu(cpu, &tlb_flush_pending);
+	else
+		cpumask_setall(&tlb_flush_pending);
+
+	if (icache_is_vivt_asid_tagged())
 		__flush_icache_all();
-		dsb();
-	}
 }
 
-#ifdef CONFIG_SMP
+static int is_reserved_asid(u64 asid)
+{
+	int cpu;
+	for_each_possible_cpu(cpu)
+		if (per_cpu(reserved_asids, cpu) == asid)
+			return 1;
+	return 0;
+}
 
-static void set_mm_context(struct mm_struct *mm, unsigned int asid)
+static void new_context(struct mm_struct *mm, unsigned int cpu)
 {
-	unsigned long flags;
+	u64 asid = mm->context.id;
+	u64 generation = atomic64_read(&asid_generation);
 
-	/*
-	 * Locking needed for multi-threaded applications where the
-	 * same mm->context.id could be set from different CPUs during
-	 * the broadcast. This function is also called via IPI so the
-	 * mm->context.id_lock has to be IRQ-safe.
-	 */
-	raw_spin_lock_irqsave(&mm->context.id_lock, flags);
-	if (likely((mm->context.id ^ cpu_last_asid) >> ASID_BITS)) {
+	if (asid != 0 && is_reserved_asid(asid)) {
 		/*
-		 * Old version of ASID found. Set the new one and
-		 * reset mm_cpumask(mm).
+		 * Our current ASID was active during a rollover, we can
+		 * continue to use it and this was just a false alarm.
 		 */
-		mm->context.id = asid;
+		asid = generation | (asid & ~ASID_MASK);
+	} else {
+		/*
+		 * Allocate a free ASID. If we can't find one, take a
+		 * note of the currently active ASIDs and mark the TLBs
+		 * as requiring flushes.
+		 */
+		asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS);
+		if (asid == NUM_USER_ASIDS) {
+			generation = atomic64_add_return(ASID_FIRST_VERSION,
+							 &asid_generation);
+			flush_context(cpu);
+			asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS);
+		}
+		__set_bit(asid, asid_map);
+		asid = generation | IDX_TO_ASID(asid);
 		cpumask_clear(mm_cpumask(mm));
 	}
-	raw_spin_unlock_irqrestore(&mm->context.id_lock, flags);
 
-	/*
-	 * Set the mm_cpumask(mm) bit for the current CPU.
-	 */
-	cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+	mm->context.id = asid;
 }
 
-/*
- * Reset the ASID on the current CPU. This function call is broadcast
- * from the CPU handling the ASID rollover and holding cpu_asid_lock.
- */
-static void reset_context(void *info)
+void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
 {
-	unsigned int asid;
+	unsigned long flags;
 	unsigned int cpu = smp_processor_id();
-	struct mm_struct *mm = current->active_mm;
 
-	smp_rmb();
-	asid = cpu_last_asid + cpu + 1;
+	if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq))
+		__check_vmalloc_seq(mm);
 
-	flush_context();
-	set_mm_context(mm, asid);
-
-	/* set the new ASID */
-	cpu_switch_mm(mm->pgd, mm);
-}
+	/*
+	 * Required during context switch to avoid speculative page table
+	 * walking with the wrong TTBR.
+	 */
+	cpu_set_reserved_ttbr0();
 
-#else
+	if (!((mm->context.id ^ atomic64_read(&asid_generation)) >> ASID_BITS)
+	    && atomic64_xchg(&per_cpu(active_asids, cpu), mm->context.id))
+		goto switch_mm_fastpath;
 
-static inline void set_mm_context(struct mm_struct *mm, unsigned int asid)
-{
-	mm->context.id = asid;
-	cpumask_copy(mm_cpumask(mm), cpumask_of(smp_processor_id()));
-}
+	raw_spin_lock_irqsave(&cpu_asid_lock, flags);
+	/* Check that our ASID belongs to the current generation. */
+	if ((mm->context.id ^ atomic64_read(&asid_generation)) >> ASID_BITS)
+		new_context(mm, cpu);
 
-#endif
+	atomic64_set(&per_cpu(active_asids, cpu), mm->context.id);
+	cpumask_set_cpu(cpu, mm_cpumask(mm));
 
-void __new_context(struct mm_struct *mm)
-{
-	unsigned int asid;
+	if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
+		local_flush_tlb_all();
+	raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
 
-	raw_spin_lock(&cpu_asid_lock);
-#ifdef CONFIG_SMP
-	/*
-	 * Check the ASID again, in case the change was broadcast from
-	 * another CPU before we acquired the lock.
-	 */
-	if (unlikely(((mm->context.id ^ cpu_last_asid) >> ASID_BITS) == 0)) {
-		cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
-		raw_spin_unlock(&cpu_asid_lock);
-		return;
-	}
-#endif
-	/*
-	 * At this point, it is guaranteed that the current mm (with
-	 * an old ASID) isn't active on any other CPU since the ASIDs
-	 * are changed simultaneously via IPI.
-	 */
-	asid = ++cpu_last_asid;
-	if (asid == 0)
-		asid = cpu_last_asid = ASID_FIRST_VERSION;
-
-	/*
-	 * If we've used up all our ASIDs, we need
-	 * to start a new version and flush the TLB.
-	 */
-	if (unlikely((asid & ~ASID_MASK) == 0)) {
-		asid = cpu_last_asid + smp_processor_id() + 1;
-		flush_context();
-#ifdef CONFIG_SMP
-		smp_wmb();
-		smp_call_function(reset_context, NULL, 1);
-#endif
-		cpu_last_asid += NR_CPUS;
-	}
-
-	set_mm_context(mm, asid);
-	raw_spin_unlock(&cpu_asid_lock);
+switch_mm_fastpath:
+	cpu_switch_mm(mm->pgd, mm);
 }
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index ab88ed4f8e08..99db769307ec 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -92,6 +92,9 @@ static int __init init_static_idmap(void)
 		(long long)idmap_start, (long long)idmap_end);
 	identity_mapping_add(idmap_pgd, idmap_start, idmap_end);
 
+	/* Flush L1 for the hardware to see this page table content */
+	flush_cache_louis();
+
 	return 0;
 }
 early_initcall(init_static_idmap);
@@ -103,12 +106,15 @@ early_initcall(init_static_idmap);
  */
 void setup_mm_for_reboot(void)
 {
-	/* Clean and invalidate L1. */
-	flush_cache_all();
-
 	/* Switch to the identity mapping. */
 	cpu_switch_mm(idmap_pgd, &init_mm);
 
-	/* Flush the TLB. */
+#ifdef CONFIG_CPU_HAS_ASID
+	/*
+	 * We don't have a clean ASID for the identity mapping, which
+	 * may clash with virtual addresses of the previous page tables
+	 * and therefore potentially in the TLB.
+	 */
 	local_flush_tlb_all();
+#endif
 }
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 5dcc2fd46c46..88fd86cf3d9a 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -47,18 +47,18 @@ int ioremap_page(unsigned long virt, unsigned long phys,
 }
 EXPORT_SYMBOL(ioremap_page);
 
-void __check_kvm_seq(struct mm_struct *mm)
+void __check_vmalloc_seq(struct mm_struct *mm)
 {
 	unsigned int seq;
 
 	do {
-		seq = init_mm.context.kvm_seq;
+		seq = init_mm.context.vmalloc_seq;
 		memcpy(pgd_offset(mm, VMALLOC_START),
 		       pgd_offset_k(VMALLOC_START),
 		       sizeof(pgd_t) * (pgd_index(VMALLOC_END) -
 					pgd_index(VMALLOC_START)));
-		mm->context.kvm_seq = seq;
-	} while (seq != init_mm.context.kvm_seq);
+		mm->context.vmalloc_seq = seq;
+	} while (seq != init_mm.context.vmalloc_seq);
 }
 
 #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE)
@@ -89,13 +89,13 @@ static void unmap_area_sections(unsigned long virt, unsigned long size)
 		if (!pmd_none(pmd)) {
 			/*
 			 * Clear the PMD from the page table, and
-			 * increment the kvm sequence so others
+			 * increment the vmalloc sequence so others
 			 * notice this change.
 			 *
 			 * Note: this is still racy on SMP machines.
 			 */
 			pmd_clear(pmdp);
-			init_mm.context.kvm_seq++;
+			init_mm.context.vmalloc_seq++;
 
 			/*
 			 * Free the page table, if there was one.
@@ -112,8 +112,8 @@ static void unmap_area_sections(unsigned long virt, unsigned long size)
 	 * Ensure that the active_mm is up to date - we want to
 	 * catch any use-after-iounmap cases.
 	 */
-	if (current->active_mm->context.kvm_seq != init_mm.context.kvm_seq)
-		__check_kvm_seq(current->active_mm);
+	if (current->active_mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)
+		__check_vmalloc_seq(current->active_mm);
 
 	flush_tlb_kernel_range(virt, end);
 }
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 941dfb9e9a78..99b47b950efc 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -488,7 +488,7 @@ static void __init build_mem_type_table(void)
 #endif
 
 	for (i = 0; i < 16; i++) {
-		unsigned long v = pgprot_val(protection_map[i]);
+		pteval_t v = pgprot_val(protection_map[i]);
 		protection_map[i] = __pgprot(v | user_pgprot);
 	}
 
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index b29a2265af01..eb6aa73bc8b7 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -167,6 +167,10 @@
 	tst	r1, #L_PTE_YOUNG
 	tstne	r1, #L_PTE_PRESENT
 	moveq	r3, #0
+#ifndef CONFIG_CPU_USE_DOMAINS
+	tstne	r1, #L_PTE_NONE
+	movne	r3, #0
+#endif
 
 	str	r3, [r0]
 	mcr	p15, 0, r0, c7, c10, 1		@ flush_pte
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index fd045e706390..6d98c13ab827 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -100,7 +100,11 @@ ENTRY(cpu_v7_set_pte_ext)
 	orrne	r3, r3, #PTE_EXT_XN
 
 	tst	r1, #L_PTE_YOUNG
-	tstne	r1, #L_PTE_PRESENT
+	tstne	r1, #L_PTE_VALID
+#ifndef CONFIG_CPU_USE_DOMAINS
+	eorne	r1, r1, #L_PTE_NONE
+	tstne	r1, #L_PTE_NONE
+#endif
 	moveq	r3, #0
 
  ARM(	str	r3, [r0, #2048]! )
@@ -161,11 +165,11 @@ ENDPROC(cpu_v7_set_pte_ext)
 	 *  TFR   EV X F   I D LR    S
 	 * .EEE ..EE PUI. .T.T 4RVI ZWRS BLDP WCAM
 	 * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced
-	 *    1    0 110       0011 1100 .111 1101 < we want
+	 *   01    0 110       0011 1100 .111 1101 < we want
 	 */
 	.align	2
 	.type	v7_crval, #object
 v7_crval:
-	crval	clear=0x0120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c
+	crval	clear=0x2120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c
 
 	.previous
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
index 8de0f1dd1549..7b56386f9496 100644
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -65,8 +65,11 @@ ENDPROC(cpu_v7_switch_mm)
  */
 ENTRY(cpu_v7_set_pte_ext)
 #ifdef CONFIG_MMU
-	tst	r2, #L_PTE_PRESENT
+	tst	r2, #L_PTE_VALID
 	beq	1f
+	tst	r3, #1 << (57 - 32)		@ L_PTE_NONE
+	bicne	r2, #L_PTE_VALID
+	bne	1f
 	tst	r3, #1 << (55 - 32)		@ L_PTE_DIRTY
 	orreq	r2, #L_PTE_RDONLY
 1:	strd	r2, r3, [r0]
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 846d279f3176..42cc833aa02f 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -57,7 +57,7 @@ ENTRY(cpu_v7_reset)
  THUMB(	bic	r1, r1, #1 << 30 )		@ SCTLR.TE (Thumb exceptions)
 	mcr	p15, 0, r1, c1, c0, 0		@ disable MMU
 	isb
-	mov	pc, r0
+	bx	r0
 ENDPROC(cpu_v7_reset)
 	.popsection
 
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index c641fb685017..b6f305e3b908 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -42,7 +42,7 @@
 #define r_skb_hl	ARM_R8
 
 #define SCRATCH_SP_OFFSET	0
-#define SCRATCH_OFF(k)		(SCRATCH_SP_OFFSET + (k))
+#define SCRATCH_OFF(k)		(SCRATCH_SP_OFFSET + 4 * (k))
 
 #define SEEN_MEM		((1 << BPF_MEMWORDS) - 1)
 #define SEEN_MEM_WORD(k)	(1 << (k))
@@ -845,7 +845,7 @@ void bpf_jit_compile(struct sk_filter *fp)
 	ctx.skf		= fp;
 	ctx.ret0_fp_idx = -1;
 
-	ctx.offsets = kzalloc(GFP_KERNEL, 4 * (ctx.skf->len + 1));
+	ctx.offsets = kzalloc(4 * (ctx.skf->len + 1), GFP_KERNEL);
 	if (ctx.offsets == NULL)
 		return;
 
@@ -864,7 +864,7 @@ void bpf_jit_compile(struct sk_filter *fp)
 
 	ctx.idx += ctx.imm_count;
 	if (ctx.imm_count) {
-		ctx.imms = kzalloc(GFP_KERNEL, 4 * ctx.imm_count);
+		ctx.imms = kzalloc(4 * ctx.imm_count, GFP_KERNEL);
 		if (ctx.imms == NULL)
 			goto out;
 	}
diff --git a/arch/avr32/include/asm/mach/serial_at91.h b/arch/avr32/include/asm/mach/serial_at91.h
deleted file mode 100644
index 55b317a89061..000000000000
--- a/arch/avr32/include/asm/mach/serial_at91.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- *  linux/include/asm-arm/mach/serial_at91.h
- *
- *  Based on serial_sa1100.h  by Nicolas Pitre
- *
- *  Copyright (C) 2002 ATMEL Rousset
- *
- *  Low level machine dependent UART functions.
- */
-
-struct uart_port;
-
-/*
- * This is a temporary structure for registering these
- * functions; it is intended to be discarded after boot.
- */
-struct atmel_port_fns {
-	void	(*set_mctrl)(struct uart_port *, u_int);
-	u_int	(*get_mctrl)(struct uart_port *);
-	void	(*enable_ms)(struct uart_port *);
-	void	(*pm)(struct uart_port *, u_int, u_int);
-	int	(*set_wake)(struct uart_port *, u_int);
-	int	(*open)(struct uart_port *);
-	void	(*close)(struct uart_port *);
-};
-
-#if defined(CONFIG_SERIAL_ATMEL)
-void atmel_register_uart_fns(struct atmel_port_fns *fns);
-#else
-#define atmel_register_uart_fns(fns) do { } while (0)
-#endif
-
-