summary refs log tree commit diff
path: root/arch/arm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-14 21:03:26 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-14 21:03:26 -0700
commitbb0fd7ab0986105765d11baa82e619c618a235aa (patch)
tree6a0585ece827e1025aa48819959d02155a871be9 /arch/arm
parentbdfa54dfd9eea001274dbcd622657a904fe43b81 (diff)
parent4b2f8838479eb2abe042e094f7d2cced6d5ea772 (diff)
downloadlinux-bb0fd7ab0986105765d11baa82e619c618a235aa.tar.gz
Merge branch 'for-linus' of git://ftp.arm.linux.org.uk/~rmk/linux-arm
Pull ARM updates from Russell King:
 "Included in this update are both some long term fixes and some new
  features.

  Fixes:

   - An integer overflow in the calculation of ELF_ET_DYN_BASE.

   - Avoiding OOMs for high-order IOMMU allocations

   - SMP requires the data cache to be enabled for synchronisation
     primitives to work, so prevent the CPU_DCACHE_DISABLE option being
     visible on SMP builds.

   - A bug going back 10+ years in the noMMU ARM94* CPU support code,
     where it corrupts registers.  Found by folk getting Linux running
     on their cameras.

   - Versatile Express needs an errata workaround enabled for CPU
     hot-unplug to work.

  Features:

   - Clean up module linker by handling out of range relocations
     separately from relocation cases we don't handle.

   - Fix a long term bug in the pci_mmap_page_range() code, which we
     hope won't impact userspace (we hope there's no users of the
     existing broken interface.)

   - Don't map DMA coherent allocations when we don't have a MMU.

   - Drop experimental status for SMP_ON_UP.

   - Warn when DT doesn't specify ePAPR mandatory cache properties.

   - Add documentation concerning how we find the start of physical
     memory for AUTO_ZRELADDR kernels, detailing why we have chosen the
     mask and the implications of changing it.

   - Updates from Ard Biesheuvel to address some issues with large
     kernels (such as allyesconfig) failing to link.

   - Allow hibernation to work on modern (ARMv7) CPUs - this appears to
     have never worked in the past on these CPUs.

   - Enable IRQ_SHOW_LEVEL, which changes the /proc/interrupts output
     format (hopefully without userspace breaking...  let's hope that if
     it causes someone a problem, they tell us.)

   - Fix tegra-ahb DT offsets.

   - Rework ARM errata 643719 code (and ARMv7 flush_cache_louis()/
     flush_dcache_all()) code to be more efficient, and enable this
     errata workaround by default for ARMv7+SMP CPUs.  This complements
     the Versatile Express fix above.

   - Rework ARMv7 context code for errata 430973, so that only Cortex A8
     CPUs are impacted by the branch target buffer flush when this
     errata is enabled.  Also update the help text to indicate that all
     r1p* A8 CPUs are impacted.

   - Switch ARM to the generic show_mem() implementation, it conveys all
     the information which we were already reporting.

   - Prevent slow timer sources being used for udelay() - timers running
     at less than 1MHz are not useful for this, and can cause udelay()
     to return immediately, without any wait.  Using such a slow timer
     is silly.

   - VDSO support for 32-bit ARM, mainly for gettimeofday() using the
     ARM architected timer.

   - Perf support for Scorpion performance monitoring units"

vdso semantic conflict fixed up as per linux-next.

* 'for-linus' of git://ftp.arm.linux.org.uk/~rmk/linux-arm: (52 commits)
  ARM: update errata 430973 documentation to cover Cortex A8 r1p*
  ARM: ensure delay timer has sufficient accuracy for delays
  ARM: switch to use the generic show_mem() implementation
  ARM: proc-v7: avoid errata 430973 workaround for non-Cortex A8 CPUs
  ARM: enable ARM errata 643719 workaround by default
  ARM: cache-v7: optimise test for Cortex A9 r0pX devices
  ARM: cache-v7: optimise branches in v7_flush_cache_louis
  ARM: cache-v7: consolidate initialisation of cache level index
  ARM: cache-v7: shift CLIDR to extract appropriate field before masking
  ARM: cache-v7: use movw/movt instructions
  ARM: allow 16-bit instructions in ALT_UP()
  ARM: proc-arm94*.S: fix setup function
  ARM: vexpress: fix CPU hotplug with CT9x4 tile.
  ARM: 8276/1: Make CPU_DCACHE_DISABLE depend on !SMP
  ARM: 8335/1: Documentation: DT bindings: Tegra AHB: document the legacy base address
  ARM: 8334/1: amba: tegra-ahb: detect and correct bogus base address
  ARM: 8333/1: amba: tegra-ahb: fix register offsets in the macros
  ARM: 8339/1: Enable CONFIG_GENERIC_IRQ_SHOW_LEVEL
  ARM: 8338/1: kexec: Relax SMP validation to improve DT compatibility
  ARM: 8337/1: mm: Do not invoke OOM for higher order IOMMU DMA allocations
  ...
Diffstat (limited to 'arch/arm')
-rw-r--r--arch/arm/Kconfig6
-rw-r--r--arch/arm/Makefile10
-rw-r--r--arch/arm/boot/compressed/head.S52
-rw-r--r--arch/arm/include/asm/Kbuild1
-rw-r--r--arch/arm/include/asm/assembler.h3
-rw-r--r--arch/arm/include/asm/auxvec.h1
-rw-r--r--arch/arm/include/asm/cputype.h16
-rw-r--r--arch/arm/include/asm/elf.h11
-rw-r--r--arch/arm/include/asm/futex.h2
-rw-r--r--arch/arm/include/asm/mmu.h3
-rw-r--r--arch/arm/include/asm/pmu.h1
-rw-r--r--arch/arm/include/asm/smp_plat.h1
-rw-r--r--arch/arm/include/asm/uaccess.h10
-rw-r--r--arch/arm/include/asm/unified.h8
-rw-r--r--arch/arm/include/asm/vdso.h32
-rw-r--r--arch/arm/include/asm/vdso_datapage.h60
-rw-r--r--arch/arm/include/asm/word-at-a-time.h2
-rw-r--r--arch/arm/include/uapi/asm/Kbuild1
-rw-r--r--arch/arm/include/uapi/asm/auxvec.h7
-rw-r--r--arch/arm/kernel/Makefile5
-rw-r--r--arch/arm/kernel/asm-offsets.c5
-rw-r--r--arch/arm/kernel/bios32.c10
-rw-r--r--arch/arm/kernel/entry-armv.S2
-rw-r--r--arch/arm/kernel/head.S14
-rw-r--r--arch/arm/kernel/hibernate.c6
-rw-r--r--arch/arm/kernel/machine_kexec.c3
-rw-r--r--arch/arm/kernel/module.c38
-rw-r--r--arch/arm/kernel/perf_event.c21
-rw-r--r--arch/arm/kernel/perf_event_cpu.c71
-rw-r--r--arch/arm/kernel/perf_event_v7.c525
-rw-r--r--arch/arm/kernel/process.c159
-rw-r--r--arch/arm/kernel/psci-call.S31
-rw-r--r--arch/arm/kernel/psci.c39
-rw-r--r--arch/arm/kernel/reboot.c155
-rw-r--r--arch/arm/kernel/reboot.h7
-rw-r--r--arch/arm/kernel/return_address.c4
-rw-r--r--arch/arm/kernel/setup.c44
-rw-r--r--arch/arm/kernel/sleep.S15
-rw-r--r--arch/arm/kernel/smp.c5
-rw-r--r--arch/arm/kernel/swp_emulate.c2
-rw-r--r--arch/arm/kernel/vdso.c337
-rw-r--r--arch/arm/kernel/vmlinux.lds.S7
-rw-r--r--arch/arm/lib/clear_user.S2
-rw-r--r--arch/arm/lib/copy_to_user.S2
-rw-r--r--arch/arm/lib/csumpartialcopyuser.S2
-rw-r--r--arch/arm/lib/delay.c6
-rw-r--r--arch/arm/mach-exynos/sleep.S31
-rw-r--r--arch/arm/mach-s5pv210/sleep.S2
-rw-r--r--arch/arm/mach-vexpress/Kconfig1
-rw-r--r--arch/arm/mm/Kconfig16
-rw-r--r--arch/arm/mm/alignment.c6
-rw-r--r--arch/arm/mm/cache-l2x0.c7
-rw-r--r--arch/arm/mm/cache-v7.S38
-rw-r--r--arch/arm/mm/dma-mapping.c116
-rw-r--r--arch/arm/mm/init.c49
-rw-r--r--arch/arm/mm/proc-arm1020.S4
-rw-r--r--arch/arm/mm/proc-arm1020e.S4
-rw-r--r--arch/arm/mm/proc-arm1022.S4
-rw-r--r--arch/arm/mm/proc-arm1026.S4
-rw-r--r--arch/arm/mm/proc-arm720.S4
-rw-r--r--arch/arm/mm/proc-arm740.S4
-rw-r--r--arch/arm/mm/proc-arm7tdmi.S4
-rw-r--r--arch/arm/mm/proc-arm920.S4
-rw-r--r--arch/arm/mm/proc-arm922.S4
-rw-r--r--arch/arm/mm/proc-arm925.S4
-rw-r--r--arch/arm/mm/proc-arm926.S4
-rw-r--r--arch/arm/mm/proc-arm940.S30
-rw-r--r--arch/arm/mm/proc-arm946.S26
-rw-r--r--arch/arm/mm/proc-arm9tdmi.S4
-rw-r--r--arch/arm/mm/proc-fa526.S4
-rw-r--r--arch/arm/mm/proc-feroceon.S5
-rw-r--r--arch/arm/mm/proc-macros.S28
-rw-r--r--arch/arm/mm/proc-mohawk.S4
-rw-r--r--arch/arm/mm/proc-sa110.S4
-rw-r--r--arch/arm/mm/proc-sa1100.S4
-rw-r--r--arch/arm/mm/proc-v6.S4
-rw-r--r--arch/arm/mm/proc-v7-2level.S12
-rw-r--r--arch/arm/mm/proc-v7.S56
-rw-r--r--arch/arm/mm/proc-v7m.S4
-rw-r--r--arch/arm/mm/proc-xsc3.S4
-rw-r--r--arch/arm/mm/proc-xscale.S4
-rw-r--r--arch/arm/nwfpe/entry.S2
-rw-r--r--arch/arm/vdso/.gitignore1
-rw-r--r--arch/arm/vdso/Makefile74
-rw-r--r--arch/arm/vdso/datapage.S15
-rw-r--r--arch/arm/vdso/vdso.S35
-rw-r--r--arch/arm/vdso/vdso.lds.S87
-rw-r--r--arch/arm/vdso/vdsomunge.c201
-rw-r--r--arch/arm/vdso/vgettimeofday.c282
89 files changed, 2372 insertions, 572 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4b62f4caf0ce..da1266c53c13 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -21,6 +21,7 @@ config ARM
 	select GENERIC_IDLE_POLL_SETUP
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
+	select GENERIC_IRQ_SHOW_LEVEL
 	select GENERIC_PCI_IOMAP
 	select GENERIC_SCHED_CLOCK
 	select GENERIC_SMP_IDLE_THREAD
@@ -1063,7 +1064,7 @@ config ARM_ERRATA_430973
 	depends on CPU_V7
 	help
 	  This option enables the workaround for the 430973 Cortex-A8
-	  (r1p0..r1p2) erratum. If a code sequence containing an ARM/Thumb
+	  r1p* erratum. If a code sequence containing an ARM/Thumb
 	  interworking branch is replaced with another code sequence at the
 	  same virtual address, whether due to self-modifying code or virtual
 	  to physical address re-mapping, Cortex-A8 does not recover from the
@@ -1132,6 +1133,7 @@ config ARM_ERRATA_742231
 config ARM_ERRATA_643719
 	bool "ARM errata: LoUIS bit field in CLIDR register is incorrect"
 	depends on CPU_V7 && SMP
+	default y
 	help
 	  This option enables the workaround for the 643719 Cortex-A9 (prior to
 	  r1p0) erratum. On affected cores the LoUIS bit field of the CLIDR
@@ -1349,7 +1351,7 @@ config SMP
 	  If you don't know what to do here, say N.
 
 config SMP_ON_UP
-	bool "Allow booting SMP kernel on uniprocessor systems (EXPERIMENTAL)"
+	bool "Allow booting SMP kernel on uniprocessor systems"
 	depends on SMP && !XIP_KERNEL && MMU
 	default y
 	help
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index eb7bb511f853..5575d9fa8806 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -13,7 +13,7 @@
 # Ensure linker flags are correct
 LDFLAGS		:=
 
-LDFLAGS_vmlinux	:=-p --no-undefined -X
+LDFLAGS_vmlinux	:=-p --no-undefined -X --pic-veneer
 ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
 LDFLAGS_vmlinux	+= --be8
 LDFLAGS_MODULE	+= --be8
@@ -264,6 +264,7 @@ core-$(CONFIG_FPE_FASTFPE)	+= $(FASTFPE_OBJ)
 core-$(CONFIG_VFP)		+= arch/arm/vfp/
 core-$(CONFIG_XEN)		+= arch/arm/xen/
 core-$(CONFIG_KVM_ARM_HOST) 	+= arch/arm/kvm/
+core-$(CONFIG_VDSO)		+= arch/arm/vdso/
 
 # If we have a machine-specific directory, then include it in the build.
 core-y				+= arch/arm/kernel/ arch/arm/mm/ arch/arm/common/
@@ -321,6 +322,12 @@ dtbs: prepare scripts
 dtbs_install:
 	$(Q)$(MAKE) $(dtbinst)=$(boot)/dts
 
+PHONY += vdso_install
+vdso_install:
+ifeq ($(CONFIG_VDSO),y)
+	$(Q)$(MAKE) $(build)=arch/arm/vdso $@
+endif
+
 # We use MRPROPER_FILES and CLEAN_FILES now
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
@@ -345,4 +352,5 @@ define archhelp
   echo  '                  Install using (your) ~/bin/$(INSTALLKERNEL) or'
   echo  '                  (distribution) /sbin/$(INSTALLKERNEL) or'
   echo  '                  install to $$(INSTALL_PATH) and run lilo'
+  echo  '  vdso_install  - Install unstripped vdso.so to $$(INSTALL_MOD_PATH)/vdso'
 endef
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index c41a793b519c..2c45b5709fa4 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -10,8 +10,11 @@
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/v7m.h>
+
+ AR_CLASS(	.arch	armv7-a	)
+ M_CLASS(	.arch	armv7-m	)
 
-	.arch	armv7-a
 /*
  * Debugging stuff
  *
@@ -114,7 +117,12 @@
  * sort out different calling conventions
  */
 		.align
-		.arm				@ Always enter in ARM state
+		/*
+		 * Always enter in ARM state for CPUs that support the ARM ISA.
+		 * As of today (2014) that's exactly the members of the A and R
+		 * classes.
+		 */
+ AR_CLASS(	.arm	)
 start:
 		.type	start,#function
 		.rept	7
@@ -132,14 +140,15 @@ start:
 
  THUMB(		.thumb			)
 1:
- ARM_BE8(	setend	be )			@ go BE8 if compiled for BE8
-		mrs	r9, cpsr
+ ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
+ AR_CLASS(	mrs	r9, cpsr	)
 #ifdef CONFIG_ARM_VIRT_EXT
 		bl	__hyp_stub_install	@ get into SVC mode, reversibly
 #endif
 		mov	r7, r1			@ save architecture ID
 		mov	r8, r2			@ save atags pointer
 
+#ifndef CONFIG_CPU_V7M
 		/*
 		 * Booting from Angel - need to enter SVC mode and disable
 		 * FIQs/IRQs (numeric definitions from angel arm.h source).
@@ -155,6 +164,7 @@ not_angel:
 		safe_svcmode_maskall r0
 		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
 						@ SPSR
+#endif
 		/*
 		 * Note that some cache flushing and other stuff may
 		 * be needed here - is there an Angel SWI call for this?
@@ -168,9 +178,26 @@ not_angel:
 		.text
 
 #ifdef CONFIG_AUTO_ZRELADDR
-		@ determine final kernel image address
+		/*
+		 * Find the start of physical memory.  As we are executing
+		 * without the MMU on, we are in the physical address space.
+		 * We just need to get rid of any offset by aligning the
+		 * address.
+		 *
+		 * This alignment is a balance between the requirements of
+		 * different platforms - we have chosen 128MB to allow
+		 * platforms which align the start of their physical memory
+		 * to 128MB to use this feature, while allowing the zImage
+		 * to be placed within the first 128MB of memory on other
+		 * platforms.  Increasing the alignment means we place
+		 * stricter alignment requirements on the start of physical
+		 * memory, but relaxing it means that we break people who
+		 * are already placing their zImage in (eg) the top 64MB
+		 * of this range.
+		 */
 		mov	r4, pc
 		and	r4, r4, #0xf8000000
+		/* Determine final kernel image address. */
 		add	r4, r4, #TEXT_OFFSET
 #else
 		ldr	r4, =zreladdr
@@ -810,6 +837,16 @@ __common_mmu_cache_on:
 call_cache_fn:	adr	r12, proc_types
 #ifdef CONFIG_CPU_CP15
 		mrc	p15, 0, r9, c0, c0	@ get processor ID
+#elif defined(CONFIG_CPU_V7M)
+		/*
+		 * On v7-M the processor id is located in the V7M_SCB_CPUID
+		 * register, but as cache handling is IMPLEMENTATION DEFINED on
+		 * v7-M (if existant at all) we just return early here.
+		 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
+		 * __armv7_mmu_cache_{on,off,flush}) would be selected which
+		 * use cp15 registers that are not implemented on v7-M.
+		 */
+		bx	lr
 #else
 		ldr	r9, =CONFIG_PROCESSOR_ID
 #endif
@@ -1310,8 +1347,9 @@ __hyp_reentry_vectors:
 
 __enter_kernel:
 		mov	r0, #0			@ must be 0
- ARM(		mov	pc, r4	)		@ call kernel
- THUMB(		bx	r4	)		@ entry point is always ARM
+ ARM(		mov	pc, r4		)	@ call kernel
+ M_CLASS(	add	r4, r4, #1	)	@ enter in Thumb mode for M class
+ THUMB(		bx	r4		)	@ entry point is always ARM for A/R classes
 
 reloc_code_end:
 
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index fe74c0d1e485..eb0f43f3e3f1 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -1,6 +1,5 @@
 
 
-generic-y += auxvec.h
 generic-y += bitsperlong.h
 generic-y += cputime.h
 generic-y += current.h
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index f67fd3afebdf..186270b3e194 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -237,6 +237,9 @@
 	.pushsection ".alt.smp.init", "a"			;\
 	.long	9998b						;\
 9997:	instr							;\
+	.if . - 9997b == 2					;\
+		nop						;\
+	.endif							;\
 	.if . - 9997b != 4					;\
 		.error "ALT_UP() content must assemble to exactly 4 bytes";\
 	.endif							;\
diff --git a/arch/arm/include/asm/auxvec.h b/arch/arm/include/asm/auxvec.h
new file mode 100644
index 000000000000..fbd388c46299
--- /dev/null
+++ b/arch/arm/include/asm/auxvec.h
@@ -0,0 +1 @@
+#include <uapi/asm/auxvec.h>
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index 819777d0e91f..85e374f873ac 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -253,4 +253,20 @@ static inline int cpu_is_pj4(void)
 #else
 #define cpu_is_pj4()	0
 #endif
+
+static inline int __attribute_const__ cpuid_feature_extract_field(u32 features,
+								  int field)
+{
+	int feature = (features >> field) & 15;
+
+	/* feature registers are signed values */
+	if (feature > 8)
+		feature -= 16;
+
+	return feature;
+}
+
+#define cpuid_feature_extract(reg, field) \
+	cpuid_feature_extract_field(read_cpuid_ext(reg), field)
+
 #endif
diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h
index c1ff8ab12914..d2315ffd8f12 100644
--- a/arch/arm/include/asm/elf.h
+++ b/arch/arm/include/asm/elf.h
@@ -1,7 +1,9 @@
 #ifndef __ASMARM_ELF_H
 #define __ASMARM_ELF_H
 
+#include <asm/auxvec.h>
 #include <asm/hwcap.h>
+#include <asm/vdso_datapage.h>
 
 /*
  * ELF register definitions..
@@ -115,7 +117,7 @@ int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs);
    the loader.  We need to make sure that it is out of the way of the program
    that it will "exec", and that there is sufficient room for the brk.  */
 
-#define ELF_ET_DYN_BASE	(2 * TASK_SIZE / 3)
+#define ELF_ET_DYN_BASE	(TASK_SIZE / 3 * 2)
 
 /* When the program starts, a1 contains a pointer to a function to be 
    registered with atexit, as per the SVR4 ABI.  A value of 0 means we 
@@ -126,6 +128,13 @@ extern void elf_set_personality(const struct elf32_hdr *);
 #define SET_PERSONALITY(ex)	elf_set_personality(&(ex))
 
 #ifdef CONFIG_MMU
+#ifdef CONFIG_VDSO
+#define ARCH_DLINFO						\
+do {								\
+	NEW_AUX_ENT(AT_SYSINFO_EHDR,				\
+		    (elf_addr_t)current->mm->context.vdso);	\
+} while (0)
+#endif
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
 struct linux_binprm;
 int arch_setup_additional_pages(struct linux_binprm *, int);
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index 53e69dae796f..4e78065a16aa 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -13,7 +13,7 @@
 	"	.align	3\n"					\
 	"	.long	1b, 4f, 2b, 4f\n"			\
 	"	.popsection\n"					\
-	"	.pushsection .fixup,\"ax\"\n"			\
+	"	.pushsection .text.fixup,\"ax\"\n"		\
 	"	.align	2\n"					\
 	"4:	mov	%0, " err_reg "\n"			\
 	"	b	3b\n"					\
diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
index 64fd15159b7d..a5b47421059d 100644
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@@ -11,6 +11,9 @@ typedef struct {
 #endif
 	unsigned int	vmalloc_seq;
 	unsigned long	sigpage;
+#ifdef CONFIG_VDSO
+	unsigned long	vdso;
+#endif
 } mm_context_t;
 
 #ifdef CONFIG_CPU_HAS_ASID
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index b1596bd59129..675e4ab79f68 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -92,6 +92,7 @@ struct pmu_hw_events {
 struct arm_pmu {
 	struct pmu	pmu;
 	cpumask_t	active_irqs;
+	int		*irq_affinity;
 	char		*name;
 	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
 	void		(*enable)(struct perf_event *event);
diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h
index 0ad7d490ee6f..993e5224d8f7 100644
--- a/arch/arm/include/asm/smp_plat.h
+++ b/arch/arm/include/asm/smp_plat.h
@@ -104,6 +104,7 @@ static inline u32 mpidr_hash_size(void)
 	return 1 << mpidr_hash.bits;
 }
 
+extern int platform_can_secondary_boot(void);
 extern int platform_can_cpu_hotplug(void);
 
 #endif
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index ce0786efd26c..74b17d09ef7a 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -315,7 +315,7 @@ do {									\
 	__asm__ __volatile__(					\
 	"1:	" TUSER(ldrb) "	%1,[%2],#0\n"			\
 	"2:\n"							\
-	"	.pushsection .fixup,\"ax\"\n"			\
+	"	.pushsection .text.fixup,\"ax\"\n"		\
 	"	.align	2\n"					\
 	"3:	mov	%0, %3\n"				\
 	"	mov	%1, #0\n"				\
@@ -351,7 +351,7 @@ do {									\
 	__asm__ __volatile__(					\
 	"1:	" TUSER(ldr) "	%1,[%2],#0\n"			\
 	"2:\n"							\
-	"	.pushsection .fixup,\"ax\"\n"			\
+	"	.pushsection .text.fixup,\"ax\"\n"		\
 	"	.align	2\n"					\
 	"3:	mov	%0, %3\n"				\
 	"	mov	%1, #0\n"				\
@@ -397,7 +397,7 @@ do {									\
 	__asm__ __volatile__(					\
 	"1:	" TUSER(strb) "	%1,[%2],#0\n"			\
 	"2:\n"							\
-	"	.pushsection .fixup,\"ax\"\n"			\
+	"	.pushsection .text.fixup,\"ax\"\n"		\
 	"	.align	2\n"					\
 	"3:	mov	%0, %3\n"				\
 	"	b	2b\n"					\
@@ -430,7 +430,7 @@ do {									\
 	__asm__ __volatile__(					\
 	"1:	" TUSER(str) "	%1,[%2],#0\n"			\
 	"2:\n"							\
-	"	.pushsection .fixup,\"ax\"\n"			\
+	"	.pushsection .text.fixup,\"ax\"\n"		\
 	"	.align	2\n"					\
 	"3:	mov	%0, %3\n"				\
 	"	b	2b\n"					\
@@ -458,7 +458,7 @@ do {									\
  THUMB(	"1:	" TUSER(str) "	" __reg_oper1 ", [%1]\n"	) \
  THUMB(	"2:	" TUSER(str) "	" __reg_oper0 ", [%1, #4]\n"	) \
 	"3:\n"							\
-	"	.pushsection .fixup,\"ax\"\n"			\
+	"	.pushsection .text.fixup,\"ax\"\n"		\
 	"	.align	2\n"					\
 	"4:	mov	%0, %3\n"				\
 	"	b	3b\n"					\
diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h
index b88beaba6b4a..200f9a7cd623 100644
--- a/arch/arm/include/asm/unified.h
+++ b/arch/arm/include/asm/unified.h
@@ -24,6 +24,14 @@
 	.syntax unified
 #endif
 
+#ifdef CONFIG_CPU_V7M
+#define AR_CLASS(x...)
+#define M_CLASS(x...)	x
+#else
+#define AR_CLASS(x...)	x
+#define M_CLASS(x...)
+#endif
+
 #ifdef CONFIG_THUMB2_KERNEL
 
 #if __GNUC__ < 4
diff --git a/arch/arm/include/asm/vdso.h b/arch/arm/include/asm/vdso.h
new file mode 100644
index 000000000000..d0295f1dd1a3
--- /dev/null
+++ b/arch/arm/include/asm/vdso.h
@@ -0,0 +1,32 @@
+#ifndef __ASM_VDSO_H
+#define __ASM_VDSO_H
+
+#ifdef __KERNEL__
+
+#ifndef __ASSEMBLY__
+
+struct mm_struct;
+
+#ifdef CONFIG_VDSO
+
+void arm_install_vdso(struct mm_struct *mm, unsigned long addr);
+
+extern char vdso_start, vdso_end;
+
+extern unsigned int vdso_total_pages;
+
+#else /* CONFIG_VDSO */
+
+static inline void arm_install_vdso(struct mm_struct *mm, unsigned long addr)
+{
+}
+
+#define vdso_total_pages 0
+
+#endif /* CONFIG_VDSO */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASM_VDSO_H */
diff --git a/arch/arm/include/asm/vdso_datapage.h b/arch/arm/include/asm/vdso_datapage.h
new file mode 100644
index 000000000000..9be259442fca
--- /dev/null
+++ b/arch/arm/include/asm/vdso_datapage.h
@@ -0,0 +1,60 @@
+/*
+ * Adapted from arm64 version.
+ *
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_VDSO_DATAPAGE_H
+#define __ASM_VDSO_DATAPAGE_H
+
+#ifdef __KERNEL__
+
+#ifndef __ASSEMBLY__
+
+#include <asm/page.h>
+
+/* Try to be cache-friendly on systems that don't implement the
+ * generic timer: fit the unconditionally updated fields in the first
+ * 32 bytes.
+ */
+struct vdso_data {
+	u32 seq_count;		/* sequence count - odd during updates */
+	u16 tk_is_cntvct;	/* fall back to syscall if false */
+	u16 cs_shift;		/* clocksource shift */
+	u32 xtime_coarse_sec;	/* coarse time */
+	u32 xtime_coarse_nsec;
+
+	u32 wtm_clock_sec;	/* wall to monotonic offset */
+	u32 wtm_clock_nsec;
+	u32 xtime_clock_sec;	/* CLOCK_REALTIME - seconds */
+	u32 cs_mult;		/* clocksource multiplier */
+
+	u64 cs_cycle_last;	/* last cycle value */
+	u64 cs_mask;		/* clocksource mask */
+
+	u64 xtime_clock_snsec;	/* CLOCK_REALTIME sub-ns base */
+	u32 tz_minuteswest;	/* timezone info for gettimeofday(2) */
+	u32 tz_dsttime;
+};
+
+union vdso_data_store {
+	struct vdso_data data;
+	u8 page[PAGE_SIZE];
+};
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASM_VDSO_DATAPAGE_H */
diff --git a/arch/arm/include/asm/word-at-a-time.h b/arch/arm/include/asm/word-at-a-time.h
index a6d0a29861e7..5831dce4b51c 100644
--- a/arch/arm/include/asm/word-at-a-time.h
+++ b/arch/arm/include/asm/word-at-a-time.h
@@ -71,7 +71,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
 	asm(
 	"1:	ldr	%0, [%2]\n"
 	"2:\n"
-	"	.pushsection .fixup,\"ax\"\n"
+	"	.pushsection .text.fixup,\"ax\"\n"
 	"	.align 2\n"
 	"3:	and	%1, %2, #0x3\n"
 	"	bic	%2, %2, #0x3\n"
diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild
index 70a1c9da30ca..a1c05f93d920 100644
--- a/arch/arm/include/uapi/asm/Kbuild
+++ b/arch/arm/include/uapi/asm/Kbuild
@@ -1,6 +1,7 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+header-y += auxvec.h
 header-y += byteorder.h
 header-y += fcntl.h
 header-y += hwcap.h
diff --git a/arch/arm/include/uapi/asm/auxvec.h b/arch/arm/include/uapi/asm/auxvec.h
new file mode 100644
index 000000000000..cb02a767a500
--- /dev/null
+++ b/arch/arm/include/uapi/asm/auxvec.h
@@ -0,0 +1,7 @@
+#ifndef __ASM_AUXVEC_H
+#define __ASM_AUXVEC_H
+
+/* VDSO location */
+#define AT_SYSINFO_EHDR	33
+
+#endif
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 902397dd1000..ba5f83226011 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -16,7 +16,7 @@ CFLAGS_REMOVE_return_address.o = -pg
 # Object file lists.
 
 obj-y		:= elf.o entry-common.o irq.o opcodes.o \
-		   process.o ptrace.o return_address.o \
+		   process.o ptrace.o reboot.o return_address.o \
 		   setup.o signal.o sigreturn_codes.o \
 		   stacktrace.o sys_arm.o time.o traps.o
 
@@ -75,6 +75,7 @@ obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o perf_event_cpu.o
 CFLAGS_pj4-cp0.o		:= -marm
 AFLAGS_iwmmxt.o			:= -Wa,-mcpu=iwmmxt
 obj-$(CONFIG_ARM_CPU_TOPOLOGY)  += topology.o
+obj-$(CONFIG_VDSO)		+= vdso.o
 
 ifneq ($(CONFIG_ARCH_EBSA110),y)
   obj-y		+= io.o
@@ -86,7 +87,7 @@ obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 
 obj-$(CONFIG_ARM_VIRT_EXT)	+= hyp-stub.o
 ifeq ($(CONFIG_ARM_PSCI),y)
-obj-y				+= psci.o
+obj-y				+= psci.o psci-call.o
 obj-$(CONFIG_SMP)		+= psci_smp.o
 endif
 
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 488eaac56028..61bb5a65eb37 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -25,6 +25,7 @@
 #include <asm/memory.h>
 #include <asm/procinfo.h>
 #include <asm/suspend.h>
+#include <asm/vdso_datapage.h>
 #include <asm/hardware/cache-l2x0.h>
 #include <linux/kbuild.h>
 
@@ -206,5 +207,9 @@ int main(void)
   DEFINE(KVM_VGIC_VCTRL,	offsetof(struct kvm, arch.vgic.vctrl_base));
   DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
 #endif
+  BLANK();
+#ifdef CONFIG_VDSO
+  DEFINE(VDSO_DATA_SIZE,	sizeof(union vdso_data_store));
+#endif
   return 0; 
 }
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index ab19b7c03423..fcbbbb1b9e95 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -618,21 +618,15 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
 int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 			enum pci_mmap_state mmap_state, int write_combine)
 {
-	struct pci_sys_data *root = dev->sysdata;
-	unsigned long phys;
-
-	if (mmap_state == pci_mmap_io) {
+	if (mmap_state == pci_mmap_io)
 		return -EINVAL;
-	} else {
-		phys = vma->vm_pgoff + (root->mem_offset >> PAGE_SHIFT);
-	}
 
 	/*
 	 * Mark this as IO
 	 */
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
-	if (remap_pfn_range(vma, vma->vm_start, phys,
+	if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
 			     vma->vm_end - vma->vm_start,
 			     vma->vm_page_prot))
 		return -EAGAIN;
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 672b21942fff..570306c49406 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -545,7 +545,7 @@ ENDPROC(__und_usr)
 /*
  * The out of line fixup for the ldrt instructions above.
  */
-	.pushsection .fixup, "ax"
+	.pushsection .text.fixup, "ax"
 	.align	2
 4:	str     r4, [sp, #S_PC]			@ retry current instruction
 	ret	r9
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 01963273c07a..3637973a9708 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -138,9 +138,9 @@ ENTRY(stext)
 						@ mmu has been enabled
 	adr	lr, BSYM(1f)			@ return (PIC) address
 	mov	r8, r4				@ set TTBR1 to swapper_pg_dir
- ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
- THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
- THUMB(	ret	r12				)
+	ldr	r12, [r10, #PROCINFO_INITFUNC]
+	add	r12, r12, r10
+	ret	r12
 1:	b	__enable_mmu
 ENDPROC(stext)
 	.ltorg
@@ -386,10 +386,10 @@ ENTRY(secondary_startup)
 	ldr	r8, [r7, lr]			@ get secondary_data.swapper_pg_dir
 	adr	lr, BSYM(__enable_mmu)		@ return address
 	mov	r13, r12			@ __secondary_switched address
- ARM(	add	pc, r10, #PROCINFO_INITFUNC	) @ initialise processor
-						  @ (return control reg)
- THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
- THUMB(	ret	r12				)
+	ldr	r12, [r10, #PROCINFO_INITFUNC]
+	add	r12, r12, r10			@ initialise processor
+						@ (return control reg)
+	ret	r12
 ENDPROC(secondary_startup)
 ENDPROC(secondary_startup_arm)
 
diff --git a/arch/arm/kernel/hibernate.c b/arch/arm/kernel/hibernate.c
index c4cc50e58c13..a71501ff6f18 100644
--- a/arch/arm/kernel/hibernate.c
+++ b/arch/arm/kernel/hibernate.c
@@ -22,6 +22,7 @@
 #include <asm/suspend.h>
 #include <asm/memory.h>
 #include <asm/sections.h>
+#include "reboot.h"
 
 int pfn_is_nosave(unsigned long pfn)
 {
@@ -61,7 +62,7 @@ static int notrace arch_save_image(unsigned long unused)
 
 	ret = swsusp_save();
 	if (ret == 0)
-		soft_restart(virt_to_phys(cpu_resume));
+		_soft_restart(virt_to_phys(cpu_resume), false);
 	return ret;
 }
 
@@ -86,7 +87,7 @@ static void notrace arch_restore_image(void *unused)
 	for (pbe = restore_pblist; pbe; pbe = pbe->next)
 		copy_page(pbe->orig_address, pbe->address);
 
-	soft_restart(virt_to_phys(cpu_resume));
+	_soft_restart(virt_to_phys(cpu_resume), false);
 }
 
 static u64 resume_stack[PAGE_SIZE/2/sizeof(u64)] __nosavedata;
@@ -99,7 +100,6 @@ static u64 resume_stack[PAGE_SIZE/2/sizeof(u64)] __nosavedata;
  */
 int swsusp_arch_resume(void)
 {
-	extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
 	call_with_stack(arch_restore_image, 0,
 		resume_stack + ARRAY_SIZE(resume_stack));
 	return 0;
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index de2b085ad753..8bf3b7c09888 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -46,7 +46,8 @@ int machine_kexec_prepare(struct kimage *image)
 	 * and implements CPU hotplug for the current HW. If not, we won't be
 	 * able to kexec reliably, so fail the prepare operation.
 	 */
-	if (num_possible_cpus() > 1 && !platform_can_cpu_hotplug())
+	if (num_possible_cpus() > 1 && platform_can_secondary_boot() &&
+	    !platform_can_cpu_hotplug())
 		return -EINVAL;
 
 	/*
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index 2e11961f65ae..af791f4a6205 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -98,14 +98,19 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 		case R_ARM_PC24:
 		case R_ARM_CALL:
 		case R_ARM_JUMP24:
+			if (sym->st_value & 3) {
+				pr_err("%s: section %u reloc %u sym '%s': unsupported interworking call (ARM -> Thumb)\n",
+				       module->name, relindex, i, symname);
+				return -ENOEXEC;
+			}
+
 			offset = __mem_to_opcode_arm(*(u32 *)loc);
 			offset = (offset & 0x00ffffff) << 2;
 			if (offset & 0x02000000)
 				offset -= 0x04000000;
 
 			offset += sym->st_value - loc;
-			if (offset & 3 ||
-			    offset <= (s32)0xfe000000 ||
+			if (offset <= (s32)0xfe000000 ||
 			    offset >= (s32)0x02000000) {
 				pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
 				       module->name, relindex, i, symname,
@@ -155,6 +160,22 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 #ifdef CONFIG_THUMB2_KERNEL
 		case R_ARM_THM_CALL:
 		case R_ARM_THM_JUMP24:
+			/*
+			 * For function symbols, only Thumb addresses are
+			 * allowed (no interworking).
+			 *
+			 * For non-function symbols, the destination
+			 * has no specific ARM/Thumb disposition, so
+			 * the branch is resolved under the assumption
+			 * that interworking is not required.
+			 */
+			if (ELF32_ST_TYPE(sym->st_info) == STT_FUNC &&
+			    !(sym->st_value & 1)) {
+				pr_err("%s: section %u reloc %u sym '%s': unsupported interworking call (Thumb -> ARM)\n",
+				       module->name, relindex, i, symname);
+				return -ENOEXEC;
+			}
+
 			upper = __mem_to_opcode_thumb16(*(u16 *)loc);
 			lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2));
 
@@ -182,18 +203,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 				offset -= 0x02000000;
 			offset += sym->st_value - loc;
 
-			/*
-			 * For function symbols, only Thumb addresses are
-			 * allowed (no interworking).
-			 *
-			 * For non-function symbols, the destination
-			 * has no specific ARM/Thumb disposition, so
-			 * the branch is resolved under the assumption
-			 * that interworking is not required.
-			 */
-			if ((ELF32_ST_TYPE(sym->st_info) == STT_FUNC &&
-				!(offset & 1)) ||
-			    offset <= (s32)0xff000000 ||
+			if (offset <= (s32)0xff000000 ||
 			    offset >= (s32)0x01000000) {
 				pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
 				       module->name, relindex, i, symname,
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 557e128e4df0..4a86a0133ac3 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -259,20 +259,29 @@ out:
 }
 
 static int
-validate_event(struct pmu_hw_events *hw_events,
-	       struct perf_event *event)
+validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
+			       struct perf_event *event)
 {
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct arm_pmu *armpmu;
 
 	if (is_software_event(event))
 		return 1;
 
+	/*
+	 * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
+	 * core perf code won't check that the pmu->ctx == leader->ctx
+	 * until after pmu->event_init(event).
+	 */
+	if (event->pmu != pmu)
+		return 0;
+
 	if (event->state < PERF_EVENT_STATE_OFF)
 		return 1;
 
 	if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
 		return 1;
 
+	armpmu = to_arm_pmu(event->pmu);
 	return armpmu->get_event_idx(hw_events, event) >= 0;
 }
 
@@ -288,15 +297,15 @@ validate_group(struct perf_event *event)
 	 */
 	memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask));
 
-	if (!validate_event(&fake_pmu, leader))
+	if (!validate_event(event->pmu, &fake_pmu, leader))
 		return -EINVAL;
 
 	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
-		if (!validate_event(&fake_pmu, sibling))
+		if (!validate_event(event->pmu, &fake_pmu, sibling))
 			return -EINVAL;
 	}
 
-	if (!validate_event(&fake_pmu, event))
+	if (!validate_event(event->pmu, &fake_pmu, event))
 		return -EINVAL;
 
 	return 0;
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 61b53c46edfa..91c7ba182dcd 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -92,11 +92,16 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
 		free_percpu_irq(irq, &hw_events->percpu_pmu);
 	} else {
 		for (i = 0; i < irqs; ++i) {
-			if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs))
+			int cpu = i;
+
+			if (cpu_pmu->irq_affinity)
+				cpu = cpu_pmu->irq_affinity[i];
+
+			if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
 				continue;
 			irq = platform_get_irq(pmu_device, i);
 			if (irq >= 0)
-				free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, i));
+				free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
 		}
 	}
 }
@@ -128,32 +133,37 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 		on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1);
 	} else {
 		for (i = 0; i < irqs; ++i) {
+			int cpu = i;
+
 			err = 0;
 			irq = platform_get_irq(pmu_device, i);
 			if (irq < 0)
 				continue;
 
+			if (cpu_pmu->irq_affinity)
+				cpu = cpu_pmu->irq_affinity[i];
+
 			/*
 			 * If we have a single PMU interrupt that we can't shift,
 			 * assume that we're running on a uniprocessor machine and
 			 * continue. Otherwise, continue without this interrupt.
 			 */
-			if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
+			if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
 				pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
-					irq, i);
+					irq, cpu);
 				continue;
 			}
 
 			err = request_irq(irq, handler,
 					  IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
-					  per_cpu_ptr(&hw_events->percpu_pmu, i));
+					  per_cpu_ptr(&hw_events->percpu_pmu, cpu));
 			if (err) {
 				pr_err("unable to request IRQ%d for ARM PMU counters\n",
 					irq);
 				return err;
 			}
 
-			cpumask_set_cpu(i, &cpu_pmu->active_irqs);
+			cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
 		}
 	}
 
@@ -243,6 +253,8 @@ static const struct of_device_id cpu_pmu_of_device_ids[] = {
 	{.compatible = "arm,arm1176-pmu",	.data = armv6_1176_pmu_init},
 	{.compatible = "arm,arm1136-pmu",	.data = armv6_1136_pmu_init},
 	{.compatible = "qcom,krait-pmu",	.data = krait_pmu_init},
+	{.compatible = "qcom,scorpion-pmu",	.data = scorpion_pmu_init},
+	{.compatible = "qcom,scorpion-mp-pmu",	.data = scorpion_mp_pmu_init},
 	{},
 };
 
@@ -289,6 +301,48 @@ static int probe_current_pmu(struct arm_pmu *pmu)
 	return ret;
 }
 
+static int of_pmu_irq_cfg(struct platform_device *pdev)
+{
+	int i;
+	int *irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
+
+	if (!irqs)
+		return -ENOMEM;
+
+	for (i = 0; i < pdev->num_resources; ++i) {
+		struct device_node *dn;
+		int cpu;
+
+		dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity",
+				      i);
+		if (!dn) {
+			pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
+				of_node_full_name(dn), i);
+			break;
+		}
+
+		for_each_possible_cpu(cpu)
+			if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL))
+				break;
+
+		of_node_put(dn);
+		if (cpu >= nr_cpu_ids) {
+			pr_warn("Failed to find logical CPU for %s\n",
+				dn->name);
+			break;
+		}
+
+		irqs[i] = cpu;
+	}
+
+	if (i == pdev->num_resources)
+		cpu_pmu->irq_affinity = irqs;
+	else
+		kfree(irqs);
+
+	return 0;
+}
+
 static int cpu_pmu_device_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *of_id;
@@ -313,7 +367,10 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 
 	if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) {
 		init_fn = of_id->data;
-		ret = init_fn(pmu);
+
+		ret = of_pmu_irq_cfg(pdev);
+		if (!ret)
+			ret = init_fn(pmu);
 	} else {
 		ret = probe_current_pmu(pmu);
 	}
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 8993770c47de..f4207a4dcb01 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -140,6 +140,23 @@ enum krait_perf_types {
 	KRAIT_PERFCTR_L1_DTLB_ACCESS			= 0x12210,
 };
 
+/* ARMv7 Scorpion specific event types */
+enum scorpion_perf_types {
+	SCORPION_LPM0_GROUP0				= 0x4c,
+	SCORPION_LPM1_GROUP0				= 0x50,
+	SCORPION_LPM2_GROUP0				= 0x54,
+	SCORPION_L2LPM_GROUP0				= 0x58,
+	SCORPION_VLPM_GROUP0				= 0x5c,
+
+	SCORPION_ICACHE_ACCESS				= 0x10053,
+	SCORPION_ICACHE_MISS				= 0x10052,
+
+	SCORPION_DTLB_ACCESS				= 0x12013,
+	SCORPION_DTLB_MISS				= 0x12012,
+
+	SCORPION_ITLB_MISS				= 0x12021,
+};
+
 /*
  * Cortex-A8 HW events mapping
  *
@@ -482,6 +499,49 @@ static const unsigned krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 };
 
 /*
+ * Scorpion HW events mapping
+ */
+static const unsigned scorpion_perf_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV7_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV7_PERFCTR_INSTR_EXECUTED,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = ARMV7_PERFCTR_CLOCK_CYCLES,
+};
+
+static const unsigned scorpion_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					    [PERF_COUNT_HW_CACHE_OP_MAX]
+					    [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+	/*
+	 * The performance counters don't differentiate between read and write
+	 * accesses/misses so this isn't strictly correct, but it's the best we
+	 * can do. Writes and reads get combined.
+	 */
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_ICACHE_ACCESS,
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ICACHE_MISS,
+	/*
+	 * Only ITLB misses and DTLB refills are supported.  If users want the
+	 * DTLB refills misses a raw counter must be used.
+	 */
+	[C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS,
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_DTLB_MISS,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_DTLB_MISS,
+	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ITLB_MISS,
+	[C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_ITLB_MISS,
+	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
  * Perf Events' indices
  */
 #define	ARMV7_IDX_CYCLE_COUNTER	0
@@ -976,6 +1036,12 @@ static int krait_map_event_no_branch(struct perf_event *event)
 				&krait_perf_cache_map, 0xFFFFF);
 }
 
+static int scorpion_map_event(struct perf_event *event)
+{
+	return armpmu_map_event(event, &scorpion_perf_map,
+				&scorpion_perf_cache_map, 0xFFFFF);
+}
+
 static void armv7pmu_init(struct arm_pmu *cpu_pmu)
 {
 	cpu_pmu->handle_irq	= armv7pmu_handle_irq;
@@ -1103,6 +1169,12 @@ static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu)
 #define KRAIT_EVENT_MASK	(KRAIT_EVENT | VENUM_EVENT)
 #define PMRESRn_EN		BIT(31)
 
+#define EVENT_REGION(event)	(((event) >> 12) & 0xf)		/* R */
+#define EVENT_GROUP(event)	((event) & 0xf)			/* G */
+#define EVENT_CODE(event)	(((event) >> 4) & 0xff)		/* CC */
+#define EVENT_VENUM(event)	(!!(event & VENUM_EVENT))	/* N=2 */
+#define EVENT_CPU(event)	(!!(event & KRAIT_EVENT))	/* N=1 */
+
 static u32 krait_read_pmresrn(int n)
 {
 	u32 val;
@@ -1141,19 +1213,19 @@ static void krait_write_pmresrn(int n, u32 val)
 	}
 }
 
-static u32 krait_read_vpmresr0(void)
+static u32 venum_read_pmresr(void)
 {
 	u32 val;
 	asm volatile("mrc p10, 7, %0, c11, c0, 0" : "=r" (val));
 	return val;
 }
 
-static void krait_write_vpmresr0(u32 val)
+static void venum_write_pmresr(u32 val)
 {
 	asm volatile("mcr p10, 7, %0, c11, c0, 0" : : "r" (val));
 }
 
-static void krait_pre_vpmresr0(u32 *venum_orig_val, u32 *fp_orig_val)
+static void venum_pre_pmresr(u32 *venum_orig_val, u32 *fp_orig_val)
 {
 	u32 venum_new_val;
 	u32 fp_new_val;
@@ -1170,7 +1242,7 @@ static void krait_pre_vpmresr0(u32 *venum_orig_val, u32 *fp_orig_val)
 	fmxr(FPEXC, fp_new_val);
 }
 
-static void krait_post_vpmresr0(u32 venum_orig_val, u32 fp_orig_val)
+static void venum_post_pmresr(u32 venum_orig_val, u32 fp_orig_val)
 {
 	BUG_ON(preemptible());
 	/* Restore FPEXC */
@@ -1193,16 +1265,11 @@ static void krait_evt_setup(int idx, u32 config_base)
 	u32 val;
 	u32 mask;
 	u32 vval, fval;
-	unsigned int region;
-	unsigned int group;
-	unsigned int code;
+	unsigned int region = EVENT_REGION(config_base);
+	unsigned int group = EVENT_GROUP(config_base);
+	unsigned int code = EVENT_CODE(config_base);
 	unsigned int group_shift;
-	bool venum_event;
-
-	venum_event = !!(config_base & VENUM_EVENT);
-	region = (config_base >> 12) & 0xf;
-	code   = (config_base >> 4) & 0xff;
-	group  = (config_base >> 0)  & 0xf;
+	bool venum_event = EVENT_VENUM(config_base);
 
 	group_shift = group * 8;
 	mask = 0xff << group_shift;
@@ -1217,16 +1284,14 @@ static void krait_evt_setup(int idx, u32 config_base)
 	val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1);
 	armv7_pmnc_write_evtsel(idx, val);
 
-	asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
-
 	if (venum_event) {
-		krait_pre_vpmresr0(&vval, &fval);
-		val = krait_read_vpmresr0();
+		venum_pre_pmresr(&vval, &fval);
+		val = venum_read_pmresr();
 		val &= ~mask;
 		val |= code << group_shift;
 		val |= PMRESRn_EN;
-		krait_write_vpmresr0(val);
-		krait_post_vpmresr0(vval, fval);
+		venum_write_pmresr(val);
+		venum_post_pmresr(vval, fval);
 	} else {
 		val = krait_read_pmresrn(region);
 		val &= ~mask;
@@ -1236,7 +1301,7 @@ static void krait_evt_setup(int idx, u32 config_base)
 	}
 }
 
-static u32 krait_clear_pmresrn_group(u32 val, int group)
+static u32 clear_pmresrn_group(u32 val, int group)
 {
 	u32 mask;
 	int group_shift;
@@ -1256,23 +1321,19 @@ static void krait_clearpmu(u32 config_base)
 {
 	u32 val;
 	u32 vval, fval;
-	unsigned int region;
-	unsigned int group;
-	bool venum_event;
-
-	venum_event = !!(config_base & VENUM_EVENT);
-	region = (config_base >> 12) & 0xf;
-	group  = (config_base >> 0)  & 0xf;
+	unsigned int region = EVENT_REGION(config_base);
+	unsigned int group = EVENT_GROUP(config_base);
+	bool venum_event = EVENT_VENUM(config_base);
 
 	if (venum_event) {
-		krait_pre_vpmresr0(&vval, &fval);
-		val = krait_read_vpmresr0();
-		val = krait_clear_pmresrn_group(val, group);
-		krait_write_vpmresr0(val);
-		krait_post_vpmresr0(vval, fval);
+		venum_pre_pmresr(&vval, &fval);
+		val = venum_read_pmresr();
+		val = clear_pmresrn_group(val, group);
+		venum_write_pmresr(val);
+		venum_post_pmresr(vval, fval);
 	} else {
 		val = krait_read_pmresrn(region);
-		val = krait_clear_pmresrn_group(val, group);
+		val = clear_pmresrn_group(val, group);
 		krait_write_pmresrn(region, val);
 	}
 }
@@ -1342,6 +1403,8 @@ static void krait_pmu_enable_event(struct perf_event *event)
 static void krait_pmu_reset(void *info)
 {
 	u32 vval, fval;
+	struct arm_pmu *cpu_pmu = info;
+	u32 idx, nb_cnt = cpu_pmu->num_events;
 
 	armv7pmu_reset(info);
 
@@ -1350,9 +1413,16 @@ static void krait_pmu_reset(void *info)
 	krait_write_pmresrn(1, 0);
 	krait_write_pmresrn(2, 0);
 
-	krait_pre_vpmresr0(&vval, &fval);
-	krait_write_vpmresr0(0);
-	krait_post_vpmresr0(vval, fval);
+	venum_pre_pmresr(&vval, &fval);
+	venum_write_pmresr(0);
+	venum_post_pmresr(vval, fval);
+
+	/* Reset PMxEVNCTCR to sane default */
+	for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
+		armv7_pmnc_select_counter(idx);
+		asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
+	}
+
 }
 
 static int krait_event_to_bit(struct perf_event *event, unsigned int region,
@@ -1386,26 +1456,18 @@ static int krait_pmu_get_event_idx(struct pmu_hw_events *cpuc,
 {
 	int idx;
 	int bit = -1;
-	unsigned int prefix;
-	unsigned int region;
-	unsigned int code;
-	unsigned int group;
-	bool krait_event;
 	struct hw_perf_event *hwc = &event->hw;
+	unsigned int region = EVENT_REGION(hwc->config_base);
+	unsigned int code = EVENT_CODE(hwc->config_base);
+	unsigned int group = EVENT_GROUP(hwc->config_base);
+	bool venum_event = EVENT_VENUM(hwc->config_base);
+	bool krait_event = EVENT_CPU(hwc->config_base);
 
-	region = (hwc->config_base >> 12) & 0xf;
-	code   = (hwc->config_base >> 4) & 0xff;
-	group  = (hwc->config_base >> 0) & 0xf;
-	krait_event = !!(hwc->config_base & KRAIT_EVENT_MASK);
-
-	if (krait_event) {
+	if (venum_event || krait_event) {
 		/* Ignore invalid events */
 		if (group > 3 || region > 2)
 			return -EINVAL;
-		prefix = hwc->config_base & KRAIT_EVENT_MASK;
-		if (prefix != KRAIT_EVENT && prefix != VENUM_EVENT)
-			return -EINVAL;
-		if (prefix == VENUM_EVENT && (code & 0xe0))
+		if (venum_event && (code & 0xe0))
 			return -EINVAL;
 
 		bit = krait_event_to_bit(event, region, group);
@@ -1425,15 +1487,12 @@ static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
 {
 	int bit;
 	struct hw_perf_event *hwc = &event->hw;
-	unsigned int region;
-	unsigned int group;
-	bool krait_event;
+	unsigned int region = EVENT_REGION(hwc->config_base);
+	unsigned int group = EVENT_GROUP(hwc->config_base);
+	bool venum_event = EVENT_VENUM(hwc->config_base);
+	bool krait_event = EVENT_CPU(hwc->config_base);
 
-	region = (hwc->config_base >> 12) & 0xf;
-	group  = (hwc->config_base >> 0) & 0xf;
-	krait_event = !!(hwc->config_base & KRAIT_EVENT_MASK);
-
-	if (krait_event) {
+	if (venum_event || krait_event) {
 		bit = krait_event_to_bit(event, region, group);
 		clear_bit(bit, cpuc->used_mask);
 	}
@@ -1458,6 +1517,344 @@ static int krait_pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx;
 	return 0;
 }
+
+/*
+ * Scorpion Local Performance Monitor Register (LPMn)
+ *
+ *            31   30     24     16     8      0
+ *            +--------------------------------+
+ *  LPM0      | EN |  CC  |  CC  |  CC  |  CC  |   N = 1, R = 0
+ *            +--------------------------------+
+ *  LPM1      | EN |  CC  |  CC  |  CC  |  CC  |   N = 1, R = 1
+ *            +--------------------------------+
+ *  LPM2      | EN |  CC  |  CC  |  CC  |  CC  |   N = 1, R = 2
+ *            +--------------------------------+
+ *  L2LPM     | EN |  CC  |  CC  |  CC  |  CC  |   N = 1, R = 3
+ *            +--------------------------------+
+ *  VLPM      | EN |  CC  |  CC  |  CC  |  CC  |   N = 2, R = ?
+ *            +--------------------------------+
+ *              EN | G=3  | G=2  | G=1  | G=0
+ *
+ *
+ *  Event Encoding:
+ *
+ *      hwc->config_base = 0xNRCCG
+ *
+ *      N  = prefix, 1 for Scorpion CPU (LPMn/L2LPM), 2 for Venum VFP (VLPM)
+ *      R  = region register
+ *      CC = class of events the group G is choosing from
+ *      G  = group or particular event
+ *
+ *  Example: 0x12021 is a Scorpion CPU event in LPM2's group 1 with code 2
+ *
+ *  A region (R) corresponds to a piece of the CPU (execution unit, instruction
+ *  unit, etc.) while the event code (CC) corresponds to a particular class of
+ *  events (interrupts for example). An event code is broken down into
+ *  groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for
+ *  example).
+ */
+
+static u32 scorpion_read_pmresrn(int n)
+{
+	u32 val;
+
+	switch (n) {
+	case 0:
+		asm volatile("mrc p15, 0, %0, c15, c0, 0" : "=r" (val));
+		break;
+	case 1:
+		asm volatile("mrc p15, 1, %0, c15, c0, 0" : "=r" (val));
+		break;
+	case 2:
+		asm volatile("mrc p15, 2, %0, c15, c0, 0" : "=r" (val));
+		break;
+	case 3:
+		asm volatile("mrc p15, 3, %0, c15, c2, 0" : "=r" (val));
+		break;
+	default:
+		BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */
+	}
+
+	return val;
+}
+
+static void scorpion_write_pmresrn(int n, u32 val)
+{
+	switch (n) {
+	case 0:
+		asm volatile("mcr p15, 0, %0, c15, c0, 0" : : "r" (val));
+		break;
+	case 1:
+		asm volatile("mcr p15, 1, %0, c15, c0, 0" : : "r" (val));
+		break;
+	case 2:
+		asm volatile("mcr p15, 2, %0, c15, c0, 0" : : "r" (val));
+		break;
+	case 3:
+		asm volatile("mcr p15, 3, %0, c15, c2, 0" : : "r" (val));
+		break;
+	default:
+		BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */
+	}
+}
+
+static u32 scorpion_get_pmresrn_event(unsigned int region)
+{
+	static const u32 pmresrn_table[] = { SCORPION_LPM0_GROUP0,
+					     SCORPION_LPM1_GROUP0,
+					     SCORPION_LPM2_GROUP0,
+					     SCORPION_L2LPM_GROUP0 };
+	return pmresrn_table[region];
+}
+
+static void scorpion_evt_setup(int idx, u32 config_base)
+{
+	u32 val;
+	u32 mask;
+	u32 vval, fval;
+	unsigned int region = EVENT_REGION(config_base);
+	unsigned int group = EVENT_GROUP(config_base);
+	unsigned int code = EVENT_CODE(config_base);
+	unsigned int group_shift;
+	bool venum_event = EVENT_VENUM(config_base);
+
+	group_shift = group * 8;
+	mask = 0xff << group_shift;
+
+	/* Configure evtsel for the region and group */
+	if (venum_event)
+		val = SCORPION_VLPM_GROUP0;
+	else
+		val = scorpion_get_pmresrn_event(region);
+	val += group;
+	/* Mix in mode-exclusion bits */
+	val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1);
+	armv7_pmnc_write_evtsel(idx, val);
+
+	asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
+
+	if (venum_event) {
+		venum_pre_pmresr(&vval, &fval);
+		val = venum_read_pmresr();
+		val &= ~mask;
+		val |= code << group_shift;
+		val |= PMRESRn_EN;
+		venum_write_pmresr(val);
+		venum_post_pmresr(vval, fval);
+	} else {
+		val = scorpion_read_pmresrn(region);
+		val &= ~mask;
+		val |= code << group_shift;
+		val |= PMRESRn_EN;
+		scorpion_write_pmresrn(region, val);
+	}
+}
+
+static void scorpion_clearpmu(u32 config_base)
+{
+	u32 val;
+	u32 vval, fval;
+	unsigned int region = EVENT_REGION(config_base);
+	unsigned int group = EVENT_GROUP(config_base);
+	bool venum_event = EVENT_VENUM(config_base);
+
+	if (venum_event) {
+		venum_pre_pmresr(&vval, &fval);
+		val = venum_read_pmresr();
+		val = clear_pmresrn_group(val, group);
+		venum_write_pmresr(val);
+		venum_post_pmresr(vval, fval);
+	} else {
+		val = scorpion_read_pmresrn(region);
+		val = clear_pmresrn_group(val, group);
+		scorpion_write_pmresrn(region, val);
+	}
+}
+
+static void scorpion_pmu_disable_event(struct perf_event *event)
+{
+	unsigned long flags;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+
+	/* Disable counter and interrupt */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Disable counter */
+	armv7_pmnc_disable_counter(idx);
+
+	/*
+	 * Clear pmresr code (if destined for PMNx counters)
+	 */
+	if (hwc->config_base & KRAIT_EVENT_MASK)
+		scorpion_clearpmu(hwc->config_base);
+
+	/* Disable interrupt for this counter */
+	armv7_pmnc_disable_intens(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void scorpion_pmu_enable_event(struct perf_event *event)
+{
+	unsigned long flags;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+
+	/*
+	 * Enable counter and interrupt, and set the counter to count
+	 * the event that we're interested in.
+	 */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Disable counter */
+	armv7_pmnc_disable_counter(idx);
+
+	/*
+	 * Set event (if destined for PMNx counters)
+	 * We don't set the event for the cycle counter because we
+	 * don't have the ability to perform event filtering.
+	 */
+	if (hwc->config_base & KRAIT_EVENT_MASK)
+		scorpion_evt_setup(idx, hwc->config_base);
+	else if (idx != ARMV7_IDX_CYCLE_COUNTER)
+		armv7_pmnc_write_evtsel(idx, hwc->config_base);
+
+	/* Enable interrupt for this counter */
+	armv7_pmnc_enable_intens(idx);
+
+	/* Enable counter */
+	armv7_pmnc_enable_counter(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void scorpion_pmu_reset(void *info)
+{
+	u32 vval, fval;
+	struct arm_pmu *cpu_pmu = info;
+	u32 idx, nb_cnt = cpu_pmu->num_events;
+
+	armv7pmu_reset(info);
+
+	/* Clear all pmresrs */
+	scorpion_write_pmresrn(0, 0);
+	scorpion_write_pmresrn(1, 0);
+	scorpion_write_pmresrn(2, 0);
+	scorpion_write_pmresrn(3, 0);
+
+	venum_pre_pmresr(&vval, &fval);
+	venum_write_pmresr(0);
+	venum_post_pmresr(vval, fval);
+
+	/* Reset PMxEVNCTCR to sane default */
+	for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
+		armv7_pmnc_select_counter(idx);
+		asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
+	}
+}
+
+static int scorpion_event_to_bit(struct perf_event *event, unsigned int region,
+			      unsigned int group)
+{
+	int bit;
+	struct hw_perf_event *hwc = &event->hw;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+
+	if (hwc->config_base & VENUM_EVENT)
+		bit = SCORPION_VLPM_GROUP0;
+	else
+		bit = scorpion_get_pmresrn_event(region);
+	bit -= scorpion_get_pmresrn_event(0);
+	bit += group;
+	/*
+	 * Lower bits are reserved for use by the counters (see
+	 * armv7pmu_get_event_idx() for more info)
+	 */
+	bit += ARMV7_IDX_COUNTER_LAST(cpu_pmu) + 1;
+
+	return bit;
+}
+
+/*
+ * We check for column exclusion constraints here.
+ * Two events cant use the same group within a pmresr register.
+ */
+static int scorpion_pmu_get_event_idx(struct pmu_hw_events *cpuc,
+				   struct perf_event *event)
+{
+	int idx;
+	int bit = -1;
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned int region = EVENT_REGION(hwc->config_base);
+	unsigned int group = EVENT_GROUP(hwc->config_base);
+	bool venum_event = EVENT_VENUM(hwc->config_base);
+	bool scorpion_event = EVENT_CPU(hwc->config_base);
+
+	if (venum_event || scorpion_event) {
+		/* Ignore invalid events */
+		if (group > 3 || region > 3)
+			return -EINVAL;
+
+		bit = scorpion_event_to_bit(event, region, group);
+		if (test_and_set_bit(bit, cpuc->used_mask))
+			return -EAGAIN;
+	}
+
+	idx = armv7pmu_get_event_idx(cpuc, event);
+	if (idx < 0 && bit >= 0)
+		clear_bit(bit, cpuc->used_mask);
+
+	return idx;
+}
+
+static void scorpion_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+				      struct perf_event *event)
+{
+	int bit;
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned int region = EVENT_REGION(hwc->config_base);
+	unsigned int group = EVENT_GROUP(hwc->config_base);
+	bool venum_event = EVENT_VENUM(hwc->config_base);
+	bool scorpion_event = EVENT_CPU(hwc->config_base);
+
+	if (venum_event || scorpion_event) {
+		bit = scorpion_event_to_bit(event, region, group);
+		clear_bit(bit, cpuc->used_mask);
+	}
+}
+
+static int scorpion_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "armv7_scorpion";
+	cpu_pmu->map_event	= scorpion_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	cpu_pmu->reset		= scorpion_pmu_reset;
+	cpu_pmu->enable		= scorpion_pmu_enable_event;
+	cpu_pmu->disable	= scorpion_pmu_disable_event;
+	cpu_pmu->get_event_idx	= scorpion_pmu_get_event_idx;
+	cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx;
+	return 0;
+}
+
+static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "armv7_scorpion_mp";
+	cpu_pmu->map_event	= scorpion_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	cpu_pmu->reset		= scorpion_pmu_reset;
+	cpu_pmu->enable		= scorpion_pmu_enable_event;
+	cpu_pmu->disable	= scorpion_pmu_disable_event;
+	cpu_pmu->get_event_idx	= scorpion_pmu_get_event_idx;
+	cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx;
+	return 0;
+}
 #else
 static inline int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
 {
@@ -1498,4 +1895,14 @@ static inline int krait_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	return -ENODEV;
 }
+
+static inline int scorpion_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return -ENODEV;
+}
+
+static inline int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return -ENODEV;
+}
 #endif	/* CONFIG_CPU_V7 */
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index fdfa3a78ec8c..f192a2a41719 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -17,12 +17,9 @@
 #include <linux/stddef.h>
 #include <linux/unistd.h>
 #include <linux/user.h>
-#include <linux/delay.h>
-#include <linux/reboot.h>
 #include <linux/interrupt.h>
 #include <linux/kallsyms.h>
 #include <linux/init.h>
-#include <linux/cpu.h>
 #include <linux/elfcore.h>
 #include <linux/pm.h>
 #include <linux/tick.h>
@@ -31,16 +28,14 @@
 #include <linux/random.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/leds.h>
-#include <linux/reboot.h>
 
-#include <asm/cacheflush.h>
-#include <asm/idmap.h>
 #include <asm/processor.h>
 #include <asm/thread_notify.h>
 #include <asm/stacktrace.h>
 #include <asm/system_misc.h>
 #include <asm/mach/time.h>
 #include <asm/tls.h>
+#include <asm/vdso.h>
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #include <linux/stackprotector.h>
@@ -59,69 +54,6 @@ static const char *isa_modes[] __maybe_unused = {
   "ARM" , "Thumb" , "Jazelle", "ThumbEE"
 };
 
-extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
-typedef void (*phys_reset_t)(unsigned long);
-
-/*
- * A temporary stack to use for CPU reset. This is static so that we
- * don't clobber it with the identity mapping. When running with this
- * stack, any references to the current task *will not work* so you
- * should really do as little as possible before jumping to your reset
- * code.
- */
-static u64 soft_restart_stack[16];
-
-static void __soft_restart(void *addr)
-{
-	phys_reset_t phys_reset;
-
-	/* Take out a flat memory mapping. */
-	setup_mm_for_reboot();
-
-	/* Clean and invalidate caches */
-	flush_cache_all();
-
-	/* Turn off caching */
-	cpu_proc_fin();
-
-	/* Push out any further dirty data, and ensure cache is empty */
-	flush_cache_all();
-
-	/* Switch to the identity mapping. */
-	phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
-	phys_reset((unsigned long)addr);
-
-	/* Should never get here. */
-	BUG();
-}
-
-void soft_restart(unsigned long addr)
-{
-	u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack);
-
-	/* Disable interrupts first */
-	raw_local_irq_disable();
-	local_fiq_disable();
-
-	/* Disable the L2 if we're the last man standing. */
-	if (num_online_cpus() == 1)
-		outer_disable();
-
-	/* Change to the new stack and continue with the reset. */
-	call_with_stack(__soft_restart, (void *)addr, (void *)stack);
-
-	/* Should never get here. */
-	BUG();
-}
-
-/*
- * Function pointers to optional machine specific functions
- */
-void (*pm_power_off)(void);
-EXPORT_SYMBOL(pm_power_off);
-
-void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
-
 /*
  * This is our default idle handler.
  */
@@ -166,79 +98,6 @@ void arch_cpu_idle_dead(void)
 }
 #endif
 
-/*
- * Called by kexec, immediately prior to machine_kexec().
- *
- * This must completely disable all secondary CPUs; simply causing those CPUs
- * to execute e.g. a RAM-based pin loop is not sufficient. This allows the
- * kexec'd kernel to use any and all RAM as it sees fit, without having to
- * avoid any code or data used by any SW CPU pin loop. The CPU hotplug
- * functionality embodied in disable_nonboot_cpus() to achieve this.
- */
-void machine_shutdown(void)
-{
-	disable_nonboot_cpus();
-}
-
-/*
- * Halting simply requires that the secondary CPUs stop performing any
- * activity (executing tasks, handling interrupts). smp_send_stop()
- * achieves this.
- */
-void machine_halt(void)
-{
-	local_irq_disable();
-	smp_send_stop();
-
-	local_irq_disable();
-	while (1);
-}
-
-/*
- * Power-off simply requires that the secondary CPUs stop performing any
- * activity (executing tasks, handling interrupts). smp_send_stop()
- * achieves this. When the system power is turned off, it will take all CPUs
- * with it.
- */
-void machine_power_off(void)
-{
-	local_irq_disable();
-	smp_send_stop();
-
-	if (pm_power_off)
-		pm_power_off();
-}
-
-/*
- * Restart requires that the secondary CPUs stop performing any activity
- * while the primary CPU resets the system. Systems with a single CPU can
- * use soft_restart() as their machine descriptor's .restart hook, since that
- * will cause the only available CPU to reset. Systems with multiple CPUs must
- * provide a HW restart implementation, to ensure that all CPUs reset at once.
- * This is required so that any code running after reset on the primary CPU
- * doesn't have to co-ordinate with other CPUs to ensure they aren't still
- * executing pre-reset code, and using RAM that the primary CPU's code wishes
- * to use. Implementing such co-ordination would be essentially impossible.
- */
-void machine_restart(char *cmd)
-{
-	local_irq_disable();
-	smp_send_stop();
-
-	if (arm_pm_restart)
-		arm_pm_restart(reboot_mode, cmd);
-	else
-		do_kernel_restart(cmd);
-
-	/* Give a grace period for failure to restart of 1s */
-	mdelay(1000);
-
-	/* Whoops - the platform was unable to reboot. Tell the user! */
-	printk("Reboot failed -- System halted\n");
-	local_irq_disable();
-	while (1);
-}
-
 void __show_regs(struct pt_regs *regs)
 {
 	unsigned long flags;
@@ -475,7 +334,7 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 }
 
 /* If possible, provide a placement hint at a random offset from the
- * stack for the signal page.
+ * stack for the sigpage and vdso pages.
  */
 static unsigned long sigpage_addr(const struct mm_struct *mm,
 				  unsigned int npages)
@@ -519,6 +378,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
+	unsigned long npages;
 	unsigned long addr;
 	unsigned long hint;
 	int ret = 0;
@@ -528,9 +388,12 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	if (!signal_page)
 		return -ENOMEM;
 
+	npages = 1; /* for sigpage */
+	npages += vdso_total_pages;
+
 	down_write(&mm->mmap_sem);
-	hint = sigpage_addr(mm, 1);
-	addr = get_unmapped_area(NULL, hint, PAGE_SIZE, 0, 0);
+	hint = sigpage_addr(mm, npages);
+	addr = get_unmapped_area(NULL, hint, npages << PAGE_SHIFT, 0, 0);
 	if (IS_ERR_VALUE(addr)) {
 		ret = addr;
 		goto up_fail;
@@ -547,6 +410,12 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 
 	mm->context.sigpage = addr;
 
+	/* Unlike the sigpage, failure to install the vdso is unlikely
+	 * to be fatal to the process, so no error check needed
+	 * here.
+	 */
+	arm_install_vdso(mm, addr + PAGE_SIZE);
+
  up_fail:
 	up_write(&mm->mmap_sem);
 	return ret;
diff --git a/arch/arm/kernel/psci-call.S b/arch/arm/kernel/psci-call.S
new file mode 100644
index 000000000000..a78e9e1e206d
--- /dev/null
+++ b/arch/arm/kernel/psci-call.S
@@ -0,0 +1,31 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2015 ARM Limited
+ *
+ * Author: Mark Rutland <mark.rutland@arm.com>
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/opcodes-sec.h>
+#include <asm/opcodes-virt.h>
+
+/* int __invoke_psci_fn_hvc(u32 function_id, u32 arg0, u32 arg1, u32 arg2) */
+ENTRY(__invoke_psci_fn_hvc)
+	__HVC(0)
+	bx	lr
+ENDPROC(__invoke_psci_fn_hvc)
+
+/* int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1, u32 arg2) */
+ENTRY(__invoke_psci_fn_smc)
+	__SMC(0)
+	bx	lr
+ENDPROC(__invoke_psci_fn_smc)
diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c
index f73891b6b730..f90fdf4ce7c7 100644
--- a/arch/arm/kernel/psci.c
+++ b/arch/arm/kernel/psci.c
@@ -23,8 +23,6 @@
 
 #include <asm/compiler.h>
 #include <asm/errno.h>
-#include <asm/opcodes-sec.h>
-#include <asm/opcodes-virt.h>
 #include <asm/psci.h>
 #include <asm/system_misc.h>
 
@@ -33,6 +31,9 @@ struct psci_operations psci_ops;
 static int (*invoke_psci_fn)(u32, u32, u32, u32);
 typedef int (*psci_initcall_t)(const struct device_node *);
 
+asmlinkage int __invoke_psci_fn_hvc(u32, u32, u32, u32);
+asmlinkage int __invoke_psci_fn_smc(u32, u32, u32, u32);
+
 enum psci_function {
 	PSCI_FN_CPU_SUSPEND,
 	PSCI_FN_CPU_ON,
@@ -71,40 +72,6 @@ static u32 psci_power_state_pack(struct psci_power_state state)
 		 & PSCI_0_2_POWER_STATE_AFFL_MASK);
 }
 
-/*
- * The following two functions are invoked via the invoke_psci_fn pointer
- * and will not be inlined, allowing us to piggyback on the AAPCS.
- */
-static noinline int __invoke_psci_fn_hvc(u32 function_id, u32 arg0, u32 arg1,
-					 u32 arg2)
-{
-	asm volatile(
-			__asmeq("%0", "r0")
-			__asmeq("%1", "r1")
-			__asmeq("%2", "r2")
-			__asmeq("%3", "r3")
-			__HVC(0)
-		: "+r" (function_id)
-		: "r" (arg0), "r" (arg1), "r" (arg2));
-
-	return function_id;
-}
-
-static noinline int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1,
-					 u32 arg2)
-{
-	asm volatile(
-			__asmeq("%0", "r0")
-			__asmeq("%1", "r1")
-			__asmeq("%2", "r2")
-			__asmeq("%3", "r3")
-			__SMC(0)
-		: "+r" (function_id)
-		: "r" (arg0), "r" (arg1), "r" (arg2));
-
-	return function_id;
-}
-
 static int psci_get_version(void)
 {
 	int err;
diff --git a/arch/arm/kernel/reboot.c b/arch/arm/kernel/reboot.c
new file mode 100644
index 000000000000..1a4d232796be
--- /dev/null
+++ b/arch/arm/kernel/reboot.c
@@ -0,0 +1,155 @@
+/*
+ *  Copyright (C) 1996-2000 Russell King - Converted to ARM.
+ *  Original Copyright (C) 1995  Linus Torvalds
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+
+#include <asm/cacheflush.h>
+#include <asm/idmap.h>
+
+#include "reboot.h"
+
+typedef void (*phys_reset_t)(unsigned long);
+
+/*
+ * Function pointers to optional machine specific functions
+ */
+void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
+void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
+
+/*
+ * A temporary stack to use for CPU reset. This is static so that we
+ * don't clobber it with the identity mapping. When running with this
+ * stack, any references to the current task *will not work* so you
+ * should really do as little as possible before jumping to your reset
+ * code.
+ */
+static u64 soft_restart_stack[16];
+
+static void __soft_restart(void *addr)
+{
+	phys_reset_t phys_reset;
+
+	/* Take out a flat memory mapping. */
+	setup_mm_for_reboot();
+
+	/* Clean and invalidate caches */
+	flush_cache_all();
+
+	/* Turn off caching */
+	cpu_proc_fin();
+
+	/* Push out any further dirty data, and ensure cache is empty */
+	flush_cache_all();
+
+	/* Switch to the identity mapping. */
+	phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
+	phys_reset((unsigned long)addr);
+
+	/* Should never get here. */
+	BUG();
+}
+
+void _soft_restart(unsigned long addr, bool disable_l2)
+{
+	u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack);
+
+	/* Disable interrupts first */
+	raw_local_irq_disable();
+	local_fiq_disable();
+
+	/* Disable the L2 if we're the last man standing. */
+	if (disable_l2)
+		outer_disable();
+
+	/* Change to the new stack and continue with the reset. */
+	call_with_stack(__soft_restart, (void *)addr, (void *)stack);
+
+	/* Should never get here. */
+	BUG();
+}
+
+void soft_restart(unsigned long addr)
+{
+	_soft_restart(addr, num_online_cpus() == 1);
+}
+
+/*
+ * Called by kexec, immediately prior to machine_kexec().
+ *
+ * This must completely disable all secondary CPUs; simply causing those CPUs
+ * to execute e.g. a RAM-based pin loop is not sufficient. This allows the
+ * kexec'd kernel to use any and all RAM as it sees fit, without having to
+ * avoid any code or data used by any SW CPU pin loop. The CPU hotplug
+ * functionality embodied in disable_nonboot_cpus() to achieve this.
+ */
+void machine_shutdown(void)
+{
+	disable_nonboot_cpus();
+}
+
+/*
+ * Halting simply requires that the secondary CPUs stop performing any
+ * activity (executing tasks, handling interrupts). smp_send_stop()
+ * achieves this.
+ */
+void machine_halt(void)
+{
+	local_irq_disable();
+	smp_send_stop();
+
+	local_irq_disable();
+	while (1);
+}
+
+/*
+ * Power-off simply requires that the secondary CPUs stop performing any
+ * activity (executing tasks, handling interrupts). smp_send_stop()
+ * achieves this. When the system power is turned off, it will take all CPUs
+ * with it.
+ */
+void machine_power_off(void)
+{
+	local_irq_disable();
+	smp_send_stop();
+
+	if (pm_power_off)
+		pm_power_off();
+}
+
+/*
+ * Restart requires that the secondary CPUs stop performing any activity
+ * while the primary CPU resets the system. Systems with a single CPU can
+ * use soft_restart() as their machine descriptor's .restart hook, since that
+ * will cause the only available CPU to reset. Systems with multiple CPUs must
+ * provide a HW restart implementation, to ensure that all CPUs reset at once.
+ * This is required so that any code running after reset on the primary CPU
+ * doesn't have to co-ordinate with other CPUs to ensure they aren't still
+ * executing pre-reset code, and using RAM that the primary CPU's code wishes
+ * to use. Implementing such co-ordination would be essentially impossible.
+ */
+void machine_restart(char *cmd)
+{
+	local_irq_disable();
+	smp_send_stop();
+
+	if (arm_pm_restart)
+		arm_pm_restart(reboot_mode, cmd);
+	else
+		do_kernel_restart(cmd);
+
+	/* Give a grace period for failure to restart of 1s */
+	mdelay(1000);
+
+	/* Whoops - the platform was unable to reboot. Tell the user! */
+	printk("Reboot failed -- System halted\n");
+	local_irq_disable();
+	while (1);
+}
diff --git a/arch/arm/kernel/reboot.h b/arch/arm/kernel/reboot.h
new file mode 100644
index 000000000000..bf7a0b1f076e
--- /dev/null
+++ b/arch/arm/kernel/reboot.h
@@ -0,0 +1,7 @@
+#ifndef REBOOT_H
+#define REBOOT_H
+
+extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
+extern void _soft_restart(unsigned long addr, bool disable_l2);
+
+#endif
diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c
index 24b4a04846eb..36ed35073289 100644
--- a/arch/arm/kernel/return_address.c
+++ b/arch/arm/kernel/return_address.c
@@ -56,8 +56,6 @@ void *return_address(unsigned int level)
 		return NULL;
 }
 
-#else /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) */
-
-#endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) / else */
+#endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) */
 
 EXPORT_SYMBOL_GPL(return_address);
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 1d60bebea4b8..6c777e908a24 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -372,30 +372,48 @@ void __init early_print(const char *str, ...)
 
 static void __init cpuid_init_hwcaps(void)
 {
-	unsigned int divide_instrs, vmsa;
+	int block;
+	u32 isar5;
 
 	if (cpu_architecture() < CPU_ARCH_ARMv7)
 		return;
 
-	divide_instrs = (read_cpuid_ext(CPUID_EXT_ISAR0) & 0x0f000000) >> 24;
-
-	switch (divide_instrs) {
-	case 2:
+	block = cpuid_feature_extract(CPUID_EXT_ISAR0, 24);
+	if (block >= 2)
 		elf_hwcap |= HWCAP_IDIVA;
-	case 1:
+	if (block >= 1)
 		elf_hwcap |= HWCAP_IDIVT;
-	}
 
 	/* LPAE implies atomic ldrd/strd instructions */
-	vmsa = (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xf) >> 0;
-	if (vmsa >= 5)
+	block = cpuid_feature_extract(CPUID_EXT_MMFR0, 0);
+	if (block >= 5)
 		elf_hwcap |= HWCAP_LPAE;
+
+	/* check for supported v8 Crypto instructions */
+	isar5 = read_cpuid_ext(CPUID_EXT_ISAR5);
+
+	block = cpuid_feature_extract_field(isar5, 4);
+	if (block >= 2)
+		elf_hwcap2 |= HWCAP2_PMULL;
+	if (block >= 1)
+		elf_hwcap2 |= HWCAP2_AES;
+
+	block = cpuid_feature_extract_field(isar5, 8);
+	if (block >= 1)
+		elf_hwcap2 |= HWCAP2_SHA1;
+
+	block = cpuid_feature_extract_field(isar5, 12);
+	if (block >= 1)
+		elf_hwcap2 |= HWCAP2_SHA2;
+
+	block = cpuid_feature_extract_field(isar5, 16);
+	if (block >= 1)
+		elf_hwcap2 |= HWCAP2_CRC32;
 }
 
 static void __init elf_hwcap_fixup(void)
 {
 	unsigned id = read_cpuid_id();
-	unsigned sync_prim;
 
 	/*
 	 * HWCAP_TLS is available only on 1136 r1p0 and later,
@@ -416,9 +434,9 @@ static void __init elf_hwcap_fixup(void)
 	 * avoid advertising SWP; it may not be atomic with
 	 * multiprocessing cores.
 	 */
-	sync_prim = ((read_cpuid_ext(CPUID_EXT_ISAR3) >> 8) & 0xf0) |
-		    ((read_cpuid_ext(CPUID_EXT_ISAR4) >> 20) & 0x0f);
-	if (sync_prim >= 0x13)
+	if (cpuid_feature_extract(CPUID_EXT_ISAR3, 12) > 1 ||
+	    (cpuid_feature_extract(CPUID_EXT_ISAR3, 12) == 1 &&
+	     cpuid_feature_extract(CPUID_EXT_ISAR3, 20) >= 3))
 		elf_hwcap &= ~HWCAP_SWP;
 }
 
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index e1e60e5a7a27..7d37bfc50830 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -116,14 +116,7 @@ cpu_resume_after_mmu:
 	ldmfd	sp!, {r4 - r11, pc}
 ENDPROC(cpu_resume_after_mmu)
 
-/*
- * Note: Yes, part of the following code is located into the .data section.
- *       This is to allow sleep_save_sp to be accessed with a relative load
- *       while we can't rely on any MMU translation.  We could have put
- *       sleep_save_sp in the .text section as well, but some setups might
- *       insist on it to be truly read-only.
- */
-	.data
+	.text
 	.align
 ENTRY(cpu_resume)
 ARM_BE8(setend be)			@ ensure we are in BE mode
@@ -145,6 +138,8 @@ ARM_BE8(setend be)			@ ensure we are in BE mode
 	compute_mpidr_hash	r1, r4, r5, r6, r0, r3
 1:
 	adr	r0, _sleep_save_sp
+	ldr	r2, [r0]
+	add	r0, r0, r2
 	ldr	r0, [r0, #SLEEP_SAVE_SP_PHYS]
 	ldr	r0, [r0, r1, lsl #2]
 
@@ -156,10 +151,12 @@ THUMB(	bx	r3			)
 ENDPROC(cpu_resume)
 
 	.align 2
+_sleep_save_sp:
+	.long	sleep_save_sp - .
 mpidr_hash_ptr:
 	.long	mpidr_hash - .			@ mpidr_hash struct offset
 
+	.data
 	.type	sleep_save_sp, #object
 ENTRY(sleep_save_sp)
-_sleep_save_sp:
 	.space	SLEEP_SAVE_SP_SZ		@ struct sleep_save_sp
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 86ef244c5a24..cca5b8758185 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -145,6 +145,11 @@ void __init smp_init_cpus(void)
 		smp_ops.smp_init_cpus();
 }
 
+int platform_can_secondary_boot(void)
+{
+	return !!smp_ops.smp_boot_secondary;
+}
+
 int platform_can_cpu_hotplug(void)
 {
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c
index afdd51e30bec..1361756782c7 100644
--- a/arch/arm/kernel/swp_emulate.c
+++ b/arch/arm/kernel/swp_emulate.c
@@ -42,7 +42,7 @@
 	"	cmp		%0, #0\n"			\
 	"	movne		%0, %4\n"			\
 	"2:\n"							\
-	"	.section	 .fixup,\"ax\"\n"		\
+	"	.section	 .text.fixup,\"ax\"\n"		\
 	"	.align		2\n"				\
 	"3:	mov		%0, %5\n"			\
 	"	b		2b\n"				\
diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
new file mode 100644
index 000000000000..efe17dd9b921
--- /dev/null
+++ b/arch/arm/kernel/vdso.c
@@ -0,0 +1,337 @@
+/*
+ * Adapted from arm64 version.
+ *
+ * Copyright (C) 2012 ARM Limited
+ * Copyright (C) 2015 Mentor Graphics Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/elf.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/of.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/timekeeper_internal.h>
+#include <linux/vmalloc.h>
+#include <asm/arch_timer.h>
+#include <asm/barrier.h>
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
+#include <clocksource/arm_arch_timer.h>
+
+#define MAX_SYMNAME	64
+
+static struct page **vdso_text_pagelist;
+
+/* Total number of pages needed for the data and text portions of the VDSO. */
+unsigned int vdso_total_pages __read_mostly;
+
+/*
+ * The VDSO data page.
+ */
+static union vdso_data_store vdso_data_store __page_aligned_data;
+static struct vdso_data *vdso_data = &vdso_data_store.data;
+
+static struct page *vdso_data_page;
+static struct vm_special_mapping vdso_data_mapping = {
+	.name = "[vvar]",
+	.pages = &vdso_data_page,
+};
+
+static struct vm_special_mapping vdso_text_mapping = {
+	.name = "[vdso]",
+};
+
+struct elfinfo {
+	Elf32_Ehdr	*hdr;		/* ptr to ELF */
+	Elf32_Sym	*dynsym;	/* ptr to .dynsym section */
+	unsigned long	dynsymsize;	/* size of .dynsym section */
+	char		*dynstr;	/* ptr to .dynstr section */
+};
+
+/* Cached result of boot-time check for whether the arch timer exists,
+ * and if so, whether the virtual counter is useable.
+ */
+static bool cntvct_ok __read_mostly;
+
+static bool __init cntvct_functional(void)
+{
+	struct device_node *np;
+	bool ret = false;
+
+	if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER))
+		goto out;
+
+	/* The arm_arch_timer core should export
+	 * arch_timer_use_virtual or similar so we don't have to do
+	 * this.
+	 */
+	np = of_find_compatible_node(NULL, NULL, "arm,armv7-timer");
+	if (!np)
+		goto out_put;
+
+	if (of_property_read_bool(np, "arm,cpu-registers-not-fw-configured"))
+		goto out_put;
+
+	ret = true;
+
+out_put:
+	of_node_put(np);
+out:
+	return ret;
+}
+
+static void * __init find_section(Elf32_Ehdr *ehdr, const char *name,
+				  unsigned long *size)
+{
+	Elf32_Shdr *sechdrs;
+	unsigned int i;
+	char *secnames;
+
+	/* Grab section headers and strings so we can tell who is who */
+	sechdrs = (void *)ehdr + ehdr->e_shoff;
+	secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
+
+	/* Find the section they want */
+	for (i = 1; i < ehdr->e_shnum; i++) {
+		if (strcmp(secnames + sechdrs[i].sh_name, name) == 0) {
+			if (size)
+				*size = sechdrs[i].sh_size;
+			return (void *)ehdr + sechdrs[i].sh_offset;
+		}
+	}
+
+	if (size)
+		*size = 0;
+	return NULL;
+}
+
+static Elf32_Sym * __init find_symbol(struct elfinfo *lib, const char *symname)
+{
+	unsigned int i;
+
+	for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) {
+		char name[MAX_SYMNAME], *c;
+
+		if (lib->dynsym[i].st_name == 0)
+			continue;
+		strlcpy(name, lib->dynstr + lib->dynsym[i].st_name,
+			MAX_SYMNAME);
+		c = strchr(name, '@');
+		if (c)
+			*c = 0;
+		if (strcmp(symname, name) == 0)
+			return &lib->dynsym[i];
+	}
+	return NULL;
+}
+
+static void __init vdso_nullpatch_one(struct elfinfo *lib, const char *symname)
+{
+	Elf32_Sym *sym;
+
+	sym = find_symbol(lib, symname);
+	if (!sym)
+		return;
+
+	sym->st_name = 0;
+}
+
+static void __init patch_vdso(void *ehdr)
+{
+	struct elfinfo einfo;
+
+	einfo = (struct elfinfo) {
+		.hdr = ehdr,
+	};
+
+	einfo.dynsym = find_section(einfo.hdr, ".dynsym", &einfo.dynsymsize);
+	einfo.dynstr = find_section(einfo.hdr, ".dynstr", NULL);
+
+	/* If the virtual counter is absent or non-functional we don't
+	 * want programs to incur the slight additional overhead of
+	 * dispatching through the VDSO only to fall back to syscalls.
+	 */
+	if (!cntvct_ok) {
+		vdso_nullpatch_one(&einfo, "__vdso_gettimeofday");
+		vdso_nullpatch_one(&einfo, "__vdso_clock_gettime");
+	}
+}
+
+static int __init vdso_init(void)
+{
+	unsigned int text_pages;
+	int i;
+
+	if (memcmp(&vdso_start, "\177ELF", 4)) {
+		pr_err("VDSO is not a valid ELF object!\n");
+		return -ENOEXEC;
+	}
+
+	text_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT;
+	pr_debug("vdso: %i text pages at base %p\n", text_pages, &vdso_start);
+
+	/* Allocate the VDSO text pagelist */
+	vdso_text_pagelist = kcalloc(text_pages, sizeof(struct page *),
+				     GFP_KERNEL);
+	if (vdso_text_pagelist == NULL)
+		return -ENOMEM;
+
+	/* Grab the VDSO data page. */
+	vdso_data_page = virt_to_page(vdso_data);
+
+	/* Grab the VDSO text pages. */
+	for (i = 0; i < text_pages; i++) {
+		struct page *page;
+
+		page = virt_to_page(&vdso_start + i * PAGE_SIZE);
+		vdso_text_pagelist[i] = page;
+	}
+
+	vdso_text_mapping.pages = vdso_text_pagelist;
+
+	vdso_total_pages = 1; /* for the data/vvar page */
+	vdso_total_pages += text_pages;
+
+	cntvct_ok = cntvct_functional();
+
+	patch_vdso(&vdso_start);
+
+	return 0;
+}
+arch_initcall(vdso_init);
+
+static int install_vvar(struct mm_struct *mm, unsigned long addr)
+{
+	struct vm_area_struct *vma;
+
+	vma = _install_special_mapping(mm, addr, PAGE_SIZE,
+				       VM_READ | VM_MAYREAD,
+				       &vdso_data_mapping);
+
+	return IS_ERR(vma) ? PTR_ERR(vma) : 0;
+}
+
+/* assumes mmap_sem is write-locked */
+void arm_install_vdso(struct mm_struct *mm, unsigned long addr)
+{
+	struct vm_area_struct *vma;
+	unsigned long len;
+
+	mm->context.vdso = 0;
+
+	if (vdso_text_pagelist == NULL)
+		return;
+
+	if (install_vvar(mm, addr))
+		return;
+
+	/* Account for vvar page. */
+	addr += PAGE_SIZE;
+	len = (vdso_total_pages - 1) << PAGE_SHIFT;
+
+	vma = _install_special_mapping(mm, addr, len,
+		VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
+		&vdso_text_mapping);
+
+	if (!IS_ERR(vma))
+		mm->context.vdso = addr;
+}
+
+static void vdso_write_begin(struct vdso_data *vdata)
+{
+	++vdso_data->seq_count;
+	smp_wmb(); /* Pairs with smp_rmb in vdso_read_retry */
+}
+
+static void vdso_write_end(struct vdso_data *vdata)
+{
+	smp_wmb(); /* Pairs with smp_rmb in vdso_read_begin */
+	++vdso_data->seq_count;
+}
+
+static bool tk_is_cntvct(const struct timekeeper *tk)
+{
+	if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER))
+		return false;
+
+	if (strcmp(tk->tkr_mono.clock->name, "arch_sys_counter") != 0)
+		return false;
+
+	return true;
+}
+
+/**
+ * update_vsyscall - update the vdso data page
+ *
+ * Increment the sequence counter, making it odd, indicating to
+ * userspace that an update is in progress.  Update the fields used
+ * for coarse clocks and, if the architected system timer is in use,
+ * the fields used for high precision clocks.  Increment the sequence
+ * counter again, making it even, indicating to userspace that the
+ * update is finished.
+ *
+ * Userspace is expected to sample seq_count before reading any other
+ * fields from the data page.  If seq_count is odd, userspace is
+ * expected to wait until it becomes even.  After copying data from
+ * the page, userspace must sample seq_count again; if it has changed
+ * from its previous value, userspace must retry the whole sequence.
+ *
+ * Calls to update_vsyscall are serialized by the timekeeping core.
+ */
+void update_vsyscall(struct timekeeper *tk)
+{
+	struct timespec xtime_coarse;
+	struct timespec64 *wtm = &tk->wall_to_monotonic;
+
+	if (!cntvct_ok) {
+		/* The entry points have been zeroed, so there is no
+		 * point in updating the data page.
+		 */
+		return;
+	}
+
+	vdso_write_begin(vdso_data);
+
+	xtime_coarse = __current_kernel_time();
+	vdso_data->tk_is_cntvct			= tk_is_cntvct(tk);
+	vdso_data->xtime_coarse_sec		= xtime_coarse.tv_sec;
+	vdso_data->xtime_coarse_nsec		= xtime_coarse.tv_nsec;
+	vdso_data->wtm_clock_sec		= wtm->tv_sec;
+	vdso_data->wtm_clock_nsec		= wtm->tv_nsec;
+
+	if (vdso_data->tk_is_cntvct) {
+		vdso_data->cs_cycle_last	= tk->tkr_mono.cycle_last;
+		vdso_data->xtime_clock_sec	= tk->xtime_sec;
+		vdso_data->xtime_clock_snsec	= tk->tkr_mono.xtime_nsec;
+		vdso_data->cs_mult		= tk->tkr_mono.mult;
+		vdso_data->cs_shift		= tk->tkr_mono.shift;
+		vdso_data->cs_mask		= tk->tkr_mono.mask;
+	}
+
+	vdso_write_end(vdso_data);
+
+	flush_dcache_page(virt_to_page(vdso_data));
+}
+
+void update_vsyscall_tz(void)
+{
+	vdso_data->tz_minuteswest	= sys_tz.tz_minuteswest;
+	vdso_data->tz_dsttime		= sys_tz.tz_dsttime;
+	flush_dcache_page(virt_to_page(vdso_data));
+}
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index b31aa73e8076..7a301be9ac67 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -74,7 +74,7 @@ SECTIONS
 		ARM_EXIT_DISCARD(EXIT_DATA)
 		EXIT_CALL
 #ifndef CONFIG_MMU
-		*(.fixup)
+		*(.text.fixup)
 		*(__ex_table)
 #endif
 #ifndef CONFIG_SMP_ON_UP
@@ -100,6 +100,7 @@ SECTIONS
 
 	.text : {			/* Real text segment		*/
 		_stext = .;		/* Text and read-only data	*/
+			IDMAP_TEXT
 			__exception_text_start = .;
 			*(.exception.text)
 			__exception_text_end = .;
@@ -108,10 +109,6 @@ SECTIONS
 			SCHED_TEXT
 			LOCK_TEXT
 			KPROBES_TEXT
-			IDMAP_TEXT
-#ifdef CONFIG_MMU
-			*(.fixup)
-#endif
 			*(.gnu.warning)
 			*(.glue_7)
 			*(.glue_7t)
diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S
index 14a0d988c82c..1710fd7db2d5 100644
--- a/arch/arm/lib/clear_user.S
+++ b/arch/arm/lib/clear_user.S
@@ -47,7 +47,7 @@ USER(		strnebt	r2, [r0])
 ENDPROC(__clear_user)
 ENDPROC(__clear_user_std)
 
-		.pushsection .fixup,"ax"
+		.pushsection .text.fixup,"ax"
 		.align	0
 9001:		ldmfd	sp!, {r0, pc}
 		.popsection
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S
index a9d3db16ecb5..9648b0675a3e 100644
--- a/arch/arm/lib/copy_to_user.S
+++ b/arch/arm/lib/copy_to_user.S
@@ -100,7 +100,7 @@ WEAK(__copy_to_user)
 ENDPROC(__copy_to_user)
 ENDPROC(__copy_to_user_std)
 
-	.pushsection .fixup,"ax"
+	.pushsection .text.fixup,"ax"
 	.align 0
 	copy_abort_preamble
 	ldmfd	sp!, {r1, r2, r3}
diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S
index 7d08b43d2c0e..1d0957e61f89 100644
--- a/arch/arm/lib/csumpartialcopyuser.S
+++ b/arch/arm/lib/csumpartialcopyuser.S
@@ -68,7 +68,7 @@
  * so properly, we would have to add in whatever registers were loaded before
  * the fault, which, with the current asm above is not predictable.
  */
-		.pushsection .fixup,"ax"
+		.pushsection .text.fixup,"ax"
 		.align	4
 9001:		mov	r4, #-EFAULT
 		ldr	r5, [sp, #8*4]		@ *err_ptr
diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c
index 312d43eb686a..8044591dca72 100644
--- a/arch/arm/lib/delay.c
+++ b/arch/arm/lib/delay.c
@@ -83,6 +83,12 @@ void __init register_current_timer_delay(const struct delay_timer *timer)
 			       NSEC_PER_SEC, 3600);
 	res = cyc_to_ns(1ULL, new_mult, new_shift);
 
+	if (res > 1000) {
+		pr_err("Ignoring delay timer %ps, which has insufficient resolution of %lluns\n",
+			timer, res);
+		return;
+	}
+
 	if (!delay_calibrated && (!delay_res || (res < delay_res))) {
 		pr_info("Switching to timer-based delay loop, resolution %lluns\n", res);
 		delay_timer			= timer;
diff --git a/arch/arm/mach-exynos/sleep.S b/arch/arm/mach-exynos/sleep.S
index 31d25834b9c4..cf950790fbdc 100644
--- a/arch/arm/mach-exynos/sleep.S
+++ b/arch/arm/mach-exynos/sleep.S
@@ -23,14 +23,7 @@
 #define CPU_MASK	0xff0ffff0
 #define CPU_CORTEX_A9	0x410fc090
 
-	/*
-	 * The following code is located into the .data section. This is to
-	 * allow l2x0_regs_phys to be accessed with a relative load while we
-	 * can't rely on any MMU translation. We could have put l2x0_regs_phys
-	 * in the .text section as well, but some setups might insist on it to
-	 * be truly read-only. (Reference from: arch/arm/kernel/sleep.S)
-	 */
-	.data
+	.text
 	.align
 
 	/*
@@ -69,10 +62,12 @@ ENTRY(exynos_cpu_resume_ns)
 	cmp	r0, r1
 	bne	skip_cp15
 
-	adr	r0, cp15_save_power
+	adr	r0, _cp15_save_power
 	ldr	r1, [r0]
-	adr	r0, cp15_save_diag
+	ldr	r1, [r0, r1]
+	adr	r0, _cp15_save_diag
 	ldr	r2, [r0]
+	ldr	r2, [r0, r2]
 	mov	r0, #SMC_CMD_C15RESUME
 	dsb
 	smc	#0
@@ -118,14 +113,20 @@ skip_l2x0:
 skip_cp15:
 	b	cpu_resume
 ENDPROC(exynos_cpu_resume_ns)
+
+	.align
+_cp15_save_power:
+	.long	cp15_save_power - .
+_cp15_save_diag:
+	.long	cp15_save_diag - .
+#ifdef CONFIG_CACHE_L2X0
+1:	.long	l2x0_saved_regs - .
+#endif /* CONFIG_CACHE_L2X0 */
+
+	.data
 	.globl cp15_save_diag
 cp15_save_diag:
 	.long	0	@ cp15 diagnostic
 	.globl cp15_save_power
 cp15_save_power:
 	.long	0	@ cp15 power control
-
-#ifdef CONFIG_CACHE_L2X0
-	.align
-1:	.long	l2x0_saved_regs - .
-#endif /* CONFIG_CACHE_L2X0 */
diff --git a/arch/arm/mach-s5pv210/sleep.S b/arch/arm/mach-s5pv210/sleep.S
index 7c43ddd33ba8..dfbfc0f7f8b8 100644
--- a/arch/arm/mach-s5pv210/sleep.S
+++ b/arch/arm/mach-s5pv210/sleep.S
@@ -14,7 +14,7 @@
 
 #include <linux/linkage.h>
 
-	.data
+	.text
 	.align
 
 	/*
diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig
index 3c2509b4b694..4be537977040 100644
--- a/arch/arm/mach-vexpress/Kconfig
+++ b/arch/arm/mach-vexpress/Kconfig
@@ -42,6 +42,7 @@ if ARCH_VEXPRESS
 config ARCH_VEXPRESS_CORTEX_A5_A9_ERRATA
 	bool "Enable A5 and A9 only errata work-arounds"
 	default y
+	select ARM_ERRATA_643719 if SMP
 	select ARM_ERRATA_720789
 	select PL310_ERRATA_753970 if CACHE_L2X0
 	help
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 9b4f29e595a4..b7644310236b 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -738,7 +738,7 @@ config CPU_ICACHE_DISABLE
 
 config CPU_DCACHE_DISABLE
 	bool "Disable D-Cache (C-bit)"
-	depends on CPU_CP15
+	depends on CPU_CP15 && !SMP
 	help
 	  Say Y here to disable the processor data cache. Unless
 	  you have a reason not to or are unsure, say N.
@@ -825,6 +825,20 @@ config KUSER_HELPERS
 	  Say N here only if you are absolutely certain that you do not
 	  need these helpers; otherwise, the safe option is to say Y.
 
+config VDSO
+	bool "Enable VDSO for acceleration of some system calls"
+	depends on AEABI && MMU
+	default y if ARM_ARCH_TIMER
+	select GENERIC_TIME_VSYSCALL
+	help
+	  Place in the process address space an ELF shared object
+	  providing fast implementations of gettimeofday and
+	  clock_gettime.  Systems that implement the ARM architected
+	  timer will receive maximum benefit.
+
+	  You must have glibc 2.22 or later for programs to seamlessly
+	  take advantage of this.
+
 config DMA_CACHE_RWFO
 	bool "Enable read/write for ownership DMA cache maintenance"
 	depends on CPU_V6K && SMP
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 2c0c541c60ca..9769f1eefe3b 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -201,7 +201,7 @@ union offset_union {
  THUMB(	"1:	"ins"	%1, [%2]\n"	)		\
  THUMB(	"	add	%2, %2, #1\n"	)		\
 	"2:\n"						\
-	"	.pushsection .fixup,\"ax\"\n"		\
+	"	.pushsection .text.fixup,\"ax\"\n"	\
 	"	.align	2\n"				\
 	"3:	mov	%0, #1\n"			\
 	"	b	2b\n"				\
@@ -261,7 +261,7 @@ union offset_union {
 		"	mov	%1, %1, "NEXT_BYTE"\n"		\
 		"2:	"ins"	%1, [%2]\n"			\
 		"3:\n"						\
-		"	.pushsection .fixup,\"ax\"\n"		\
+		"	.pushsection .text.fixup,\"ax\"\n"	\
 		"	.align	2\n"				\
 		"4:	mov	%0, #1\n"			\
 		"	b	3b\n"				\
@@ -301,7 +301,7 @@ union offset_union {
 		"	mov	%1, %1, "NEXT_BYTE"\n"		\
 		"4:	"ins"	%1, [%2]\n"			\
 		"5:\n"						\
-		"	.pushsection .fixup,\"ax\"\n"		\
+		"	.pushsection .text.fixup,\"ax\"\n"	\
 		"	.align	2\n"				\
 		"6:	mov	%0, #1\n"			\
 		"	b	5b\n"				\
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 8f15f70622a6..e309c8f35af5 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -1647,6 +1647,7 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
 	struct device_node *np;
 	struct resource res;
 	u32 cache_id, old_aux;
+	u32 cache_level = 2;
 
 	np = of_find_matching_node(NULL, l2x0_ids);
 	if (!np)
@@ -1679,6 +1680,12 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
 	if (!of_property_read_bool(np, "cache-unified"))
 		pr_err("L2C: device tree omits to specify unified cache\n");
 
+	if (of_property_read_u32(np, "cache-level", &cache_level))
+		pr_err("L2C: device tree omits to specify cache-level\n");
+
+	if (cache_level != 2)
+		pr_err("L2C: device tree specifies invalid cache level\n");
+
 	/* Read back current (default) hardware configuration */
 	if (data->save)
 		data->save(l2x0_base);
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index b966656d2c2d..a134d8a13d00 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -36,10 +36,10 @@ ENTRY(v7_invalidate_l1)
        mcr     p15, 2, r0, c0, c0, 0
        mrc     p15, 1, r0, c0, c0, 0
 
-       ldr     r1, =0x7fff
+       movw    r1, #0x7fff
        and     r2, r1, r0, lsr #13
 
-       ldr     r1, =0x3ff
+       movw    r1, #0x3ff
 
        and     r3, r1, r0, lsr #3      @ NumWays - 1
        add     r2, r2, #1              @ NumSets
@@ -90,21 +90,20 @@ ENDPROC(v7_flush_icache_all)
 ENTRY(v7_flush_dcache_louis)
 	dmb					@ ensure ordering with previous memory accesses
 	mrc	p15, 1, r0, c0, c0, 1		@ read clidr, r0 = clidr
-	ALT_SMP(ands	r3, r0, #(7 << 21))	@ extract LoUIS from clidr
-	ALT_UP(ands	r3, r0, #(7 << 27))	@ extract LoUU from clidr
+ALT_SMP(mov	r3, r0, lsr #20)		@ move LoUIS into position
+ALT_UP(	mov	r3, r0, lsr #26)		@ move LoUU into position
+	ands	r3, r3, #7 << 1 		@ extract LoU*2 field from clidr
+	bne	start_flush_levels		@ LoU != 0, start flushing
 #ifdef CONFIG_ARM_ERRATA_643719
-	ALT_SMP(mrceq	p15, 0, r2, c0, c0, 0)	@ read main ID register
-	ALT_UP(reteq	lr)			@ LoUU is zero, so nothing to do
-	ldreq	r1, =0x410fc090                 @ ID of ARM Cortex A9 r0p?
-	biceq	r2, r2, #0x0000000f             @ clear minor revision number
-	teqeq	r2, r1                          @ test for errata affected core and if so...
-	orreqs	r3, #(1 << 21)			@   fix LoUIS value (and set flags state to 'ne')
+ALT_SMP(mrc	p15, 0, r2, c0, c0, 0)		@ read main ID register
+ALT_UP(	ret	lr)				@ LoUU is zero, so nothing to do
+	movw	r1, #:lower16:(0x410fc090 >> 4)	@ ID of ARM Cortex A9 r0p?
+	movt	r1, #:upper16:(0x410fc090 >> 4)
+	teq	r1, r2, lsr #4			@ test for errata affected core and if so...
+	moveq	r3, #1 << 1			@   fix LoUIS value
+	beq	start_flush_levels		@   start flushing cache levels
 #endif
-	ALT_SMP(mov	r3, r3, lsr #20)	@ r3 = LoUIS * 2
-	ALT_UP(mov	r3, r3, lsr #26)	@ r3 = LoUU * 2
-	reteq	lr				@ return if level == 0
-	mov	r10, #0				@ r10 (starting level) = 0
-	b	flush_levels			@ start flushing cache levels
+	ret	lr
 ENDPROC(v7_flush_dcache_louis)
 
 /*
@@ -119,9 +118,10 @@ ENDPROC(v7_flush_dcache_louis)
 ENTRY(v7_flush_dcache_all)
 	dmb					@ ensure ordering with previous memory accesses
 	mrc	p15, 1, r0, c0, c0, 1		@ read clidr
-	ands	r3, r0, #0x7000000		@ extract loc from clidr
-	mov	r3, r3, lsr #23			@ left align loc bit field
+	mov	r3, r0, lsr #23			@ move LoC into position
+	ands	r3, r3, #7 << 1			@ extract LoC*2 from clidr
 	beq	finished			@ if loc is 0, then no need to clean
+start_flush_levels:
 	mov	r10, #0				@ start clean at cache level 0
 flush_levels:
 	add	r2, r10, r10, lsr #1		@ work out 3x current cache level
@@ -140,10 +140,10 @@ flush_levels:
 #endif
 	and	r2, r1, #7			@ extract the length of the cache lines
 	add	r2, r2, #4			@ add 4 (line length offset)
-	ldr	r4, =0x3ff
+	movw	r4, #0x3ff
 	ands	r4, r4, r1, lsr #3		@ find maximum number on the way size
 	clz	r5, r4				@ find bit position of way size increment
-	ldr	r7, =0x7fff
+	movw	r7, #0x7fff
 	ands	r7, r7, r1, lsr #13		@ extract max number of the index size
 loop1:
 	mov	r9, r7				@ create working copy of max index
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index e315dfe3af1b..09c5fe3d30c2 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -289,11 +289,11 @@ static void __dma_free_buffer(struct page *page, size_t size)
 
 static void *__alloc_from_contiguous(struct device *dev, size_t size,
 				     pgprot_t prot, struct page **ret_page,
-				     const void *caller);
+				     const void *caller, bool want_vaddr);
 
 static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
 				 pgprot_t prot, struct page **ret_page,
-				 const void *caller);
+				 const void *caller, bool want_vaddr);
 
 static void *
 __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
@@ -357,10 +357,10 @@ static int __init atomic_pool_init(void)
 
 	if (dev_get_cma_area(NULL))
 		ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot,
-					      &page, atomic_pool_init);
+					      &page, atomic_pool_init, true);
 	else
 		ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot,
-					   &page, atomic_pool_init);
+					   &page, atomic_pool_init, true);
 	if (ptr) {
 		int ret;
 
@@ -467,13 +467,15 @@ static void __dma_remap(struct page *page, size_t size, pgprot_t prot)
 
 static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
 				 pgprot_t prot, struct page **ret_page,
-				 const void *caller)
+				 const void *caller, bool want_vaddr)
 {
 	struct page *page;
-	void *ptr;
+	void *ptr = NULL;
 	page = __dma_alloc_buffer(dev, size, gfp);
 	if (!page)
 		return NULL;
+	if (!want_vaddr)
+		goto out;
 
 	ptr = __dma_alloc_remap(page, size, gfp, prot, caller);
 	if (!ptr) {
@@ -481,6 +483,7 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
 		return NULL;
 	}
 
+ out:
 	*ret_page = page;
 	return ptr;
 }
@@ -523,12 +526,12 @@ static int __free_from_pool(void *start, size_t size)
 
 static void *__alloc_from_contiguous(struct device *dev, size_t size,
 				     pgprot_t prot, struct page **ret_page,
-				     const void *caller)
+				     const void *caller, bool want_vaddr)
 {
 	unsigned long order = get_order(size);
 	size_t count = size >> PAGE_SHIFT;
 	struct page *page;
-	void *ptr;
+	void *ptr = NULL;
 
 	page = dma_alloc_from_contiguous(dev, count, order);
 	if (!page)
@@ -536,6 +539,9 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size,
 
 	__dma_clear_buffer(page, size);
 
+	if (!want_vaddr)
+		goto out;
+
 	if (PageHighMem(page)) {
 		ptr = __dma_alloc_remap(page, size, GFP_KERNEL, prot, caller);
 		if (!ptr) {
@@ -546,17 +552,21 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size,
 		__dma_remap(page, size, prot);
 		ptr = page_address(page);
 	}
+
+ out:
 	*ret_page = page;
 	return ptr;
 }
 
 static void __free_from_contiguous(struct device *dev, struct page *page,
-				   void *cpu_addr, size_t size)
+				   void *cpu_addr, size_t size, bool want_vaddr)
 {
-	if (PageHighMem(page))
-		__dma_free_remap(cpu_addr, size);
-	else
-		__dma_remap(page, size, PAGE_KERNEL);
+	if (want_vaddr) {
+		if (PageHighMem(page))
+			__dma_free_remap(cpu_addr, size);
+		else
+			__dma_remap(page, size, PAGE_KERNEL);
+	}
 	dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);
 }
 
@@ -574,12 +584,12 @@ static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot)
 
 #define nommu() 1
 
-#define __get_dma_pgprot(attrs, prot)	__pgprot(0)
-#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c)	NULL
+#define __get_dma_pgprot(attrs, prot)				__pgprot(0)
+#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c, wv)	NULL
 #define __alloc_from_pool(size, ret_page)			NULL
-#define __alloc_from_contiguous(dev, size, prot, ret, c)	NULL
+#define __alloc_from_contiguous(dev, size, prot, ret, c, wv)	NULL
 #define __free_from_pool(cpu_addr, size)			0
-#define __free_from_contiguous(dev, page, cpu_addr, size)	do { } while (0)
+#define __free_from_contiguous(dev, page, cpu_addr, size, wv)	do { } while (0)
 #define __dma_free_remap(cpu_addr, size)			do { } while (0)
 
 #endif	/* CONFIG_MMU */
@@ -599,11 +609,13 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp,
 
 
 static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
-			 gfp_t gfp, pgprot_t prot, bool is_coherent, const void *caller)
+			 gfp_t gfp, pgprot_t prot, bool is_coherent,
+			 struct dma_attrs *attrs, const void *caller)
 {
 	u64 mask = get_coherent_dma_mask(dev);
 	struct page *page = NULL;
 	void *addr;
+	bool want_vaddr;
 
 #ifdef CONFIG_DMA_API_DEBUG
 	u64 limit = (mask + 1) & ~mask;
@@ -631,20 +643,21 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 
 	*handle = DMA_ERROR_CODE;
 	size = PAGE_ALIGN(size);
+	want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs);
 
 	if (is_coherent || nommu())
 		addr = __alloc_simple_buffer(dev, size, gfp, &page);
 	else if (!(gfp & __GFP_WAIT))
 		addr = __alloc_from_pool(size, &page);
 	else if (!dev_get_cma_area(dev))
-		addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
+		addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller, want_vaddr);
 	else
-		addr = __alloc_from_contiguous(dev, size, prot, &page, caller);
+		addr = __alloc_from_contiguous(dev, size, prot, &page, caller, want_vaddr);
 
-	if (addr)
+	if (page)
 		*handle = pfn_to_dma(dev, page_to_pfn(page));
 
-	return addr;
+	return want_vaddr ? addr : page;
 }
 
 /*
@@ -661,7 +674,7 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 		return memory;
 
 	return __dma_alloc(dev, size, handle, gfp, prot, false,
-			   __builtin_return_address(0));
+			   attrs, __builtin_return_address(0));
 }
 
 static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
@@ -674,7 +687,7 @@ static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
 		return memory;
 
 	return __dma_alloc(dev, size, handle, gfp, prot, true,
-			   __builtin_return_address(0));
+			   attrs, __builtin_return_address(0));
 }
 
 /*
@@ -715,6 +728,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
 			   bool is_coherent)
 {
 	struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
+	bool want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs);
 
 	if (dma_release_from_coherent(dev, get_order(size), cpu_addr))
 		return;
@@ -726,14 +740,15 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
 	} else if (__free_from_pool(cpu_addr, size)) {
 		return;
 	} else if (!dev_get_cma_area(dev)) {
-		__dma_free_remap(cpu_addr, size);
+		if (want_vaddr)
+			__dma_free_remap(cpu_addr, size);
 		__dma_free_buffer(page, size);
 	} else {
 		/*
 		 * Non-atomic allocations cannot be freed with IRQs disabled
 		 */
 		WARN_ON(irqs_disabled());
-		__free_from_contiguous(dev, page, cpu_addr, size);
+		__free_from_contiguous(dev, page, cpu_addr, size, want_vaddr);
 	}
 }
 
@@ -1135,13 +1150,28 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
 	gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
 
 	while (count) {
-		int j, order = __fls(count);
+		int j, order;
+
+		for (order = __fls(count); order > 0; --order) {
+			/*
+			 * We do not want OOM killer to be invoked as long
+			 * as we can fall back to single pages, so we force
+			 * __GFP_NORETRY for orders higher than zero.
+			 */
+			pages[i] = alloc_pages(gfp | __GFP_NORETRY, order);
+			if (pages[i])
+				break;
+		}
 
-		pages[i] = alloc_pages(gfp, order);
-		while (!pages[i] && order)
-			pages[i] = alloc_pages(gfp, --order);
-		if (!pages[i])
-			goto error;
+		if (!pages[i]) {
+			/*
+			 * Fall back to single page allocation.
+			 * Might invoke OOM killer as last resort.
+			 */
+			pages[i] = alloc_pages(gfp, 0);
+			if (!pages[i])
+				goto error;
+		}
 
 		if (order) {
 			split_page(pages[i], order);
@@ -1206,7 +1236,7 @@ __iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot,
 static dma_addr_t
 __iommu_create_mapping(struct device *dev, struct page **pages, size_t size)
 {
-	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
 	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	dma_addr_t dma_addr, iova;
 	int i, ret = DMA_ERROR_CODE;
@@ -1242,7 +1272,7 @@ fail:
 
 static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size)
 {
-	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
 
 	/*
 	 * add optional in-page offset from iova to size and align
@@ -1457,7 +1487,7 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
 			  enum dma_data_direction dir, struct dma_attrs *attrs,
 			  bool is_coherent)
 {
-	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
 	dma_addr_t iova, iova_base;
 	int ret = 0;
 	unsigned int count;
@@ -1678,7 +1708,7 @@ static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *p
 	     unsigned long offset, size_t size, enum dma_data_direction dir,
 	     struct dma_attrs *attrs)
 {
-	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
 	dma_addr_t dma_addr;
 	int ret, prot, len = PAGE_ALIGN(size + offset);
 
@@ -1731,7 +1761,7 @@ static void arm_coherent_iommu_unmap_page(struct device *dev, dma_addr_t handle,
 		size_t size, enum dma_data_direction dir,
 		struct dma_attrs *attrs)
 {
-	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
 	dma_addr_t iova = handle & PAGE_MASK;
 	int offset = handle & ~PAGE_MASK;
 	int len = PAGE_ALIGN(size + offset);
@@ -1756,7 +1786,7 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
 		size_t size, enum dma_data_direction dir,
 		struct dma_attrs *attrs)
 {
-	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
 	dma_addr_t iova = handle & PAGE_MASK;
 	struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
 	int offset = handle & ~PAGE_MASK;
@@ -1775,7 +1805,7 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
 static void arm_iommu_sync_single_for_cpu(struct device *dev,
 		dma_addr_t handle, size_t size, enum dma_data_direction dir)
 {
-	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
 	dma_addr_t iova = handle & PAGE_MASK;
 	struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
 	unsigned int offset = handle & ~PAGE_MASK;
@@ -1789,7 +1819,7 @@ static void arm_iommu_sync_single_for_cpu(struct device *dev,
 static void arm_iommu_sync_single_for_device(struct device *dev,
 		dma_addr_t handle, size_t size, enum dma_data_direction dir)
 {
-	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
 	dma_addr_t iova = handle & PAGE_MASK;
 	struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
 	unsigned int offset = handle & ~PAGE_MASK;
@@ -1950,7 +1980,7 @@ static int __arm_iommu_attach_device(struct device *dev,
 		return err;
 
 	kref_get(&mapping->kref);
-	dev->archdata.mapping = mapping;
+	to_dma_iommu_mapping(dev) = mapping;
 
 	pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev));
 	return 0;
@@ -1995,7 +2025,7 @@ static void __arm_iommu_detach_device(struct device *dev)
 
 	iommu_detach_device(mapping->domain, dev);
 	kref_put(&mapping->kref, release_iommu_mapping);
-	dev->archdata.mapping = NULL;
+	to_dma_iommu_mapping(dev) = NULL;
 
 	pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev));
 }
@@ -2053,7 +2083,7 @@ static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size,
 
 static void arm_teardown_iommu_dma_ops(struct device *dev)
 {
-	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
 
 	if (!mapping)
 		return;
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 3d0e9aed4b40..be92fa0f2f35 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -86,55 +86,6 @@ static int __init parse_tag_initrd2(const struct tag *tag)
 
 __tagtable(ATAG_INITRD2, parse_tag_initrd2);
 
-/*
- * This keeps memory configuration data used by a couple memory
- * initialization functions, as well as show_mem() for the skipping
- * of holes in the memory map.  It is populated by arm_add_memory().
- */
-void show_mem(unsigned int filter)
-{
-	int free = 0, total = 0, reserved = 0;
-	int shared = 0, cached = 0, slab = 0;
-	struct memblock_region *reg;
-
-	printk("Mem-info:\n");
-	show_free_areas(filter);
-
-	for_each_memblock (memory, reg) {
-		unsigned int pfn1, pfn2;
-		struct page *page, *end;
-
-		pfn1 = memblock_region_memory_base_pfn(reg);
-		pfn2 = memblock_region_memory_end_pfn(reg);
-
-		page = pfn_to_page(pfn1);
-		end  = pfn_to_page(pfn2 - 1) + 1;
-
-		do {
-			total++;
-			if (PageReserved(page))
-				reserved++;
-			else if (PageSwapCache(page))
-				cached++;
-			else if (PageSlab(page))
-				slab++;
-			else if (!page_count(page))
-				free++;
-			else
-				shared += page_count(page) - 1;
-			pfn1++;
-			page = pfn_to_page(pfn1);
-		} while (pfn1 < pfn2);
-	}
-
-	printk("%d pages of RAM\n", total);
-	printk("%d free pages\n", free);
-	printk("%d reserved pages\n", reserved);
-	printk("%d slab pages\n", slab);
-	printk("%d pages shared\n", shared);
-	printk("%d pages swap cached\n", cached);
-}
-
 static void __init find_limits(unsigned long *min, unsigned long *max_low,
 			       unsigned long *max_high)
 {
diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
index 86ee5d47ce3c..aa0519eed698 100644
--- a/arch/arm/mm/proc-arm1020.S
+++ b/arch/arm/mm/proc-arm1020.S
@@ -507,7 +507,7 @@ cpu_arm1020_name:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm1020_proc_info,#object
 __arm1020_proc_info:
@@ -519,7 +519,7 @@ __arm1020_proc_info:
 	.long   PMD_TYPE_SECT | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm1020_setup
+	initfn	__arm1020_setup, __arm1020_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
index a6331d78601f..bff4c7f70fd6 100644
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -465,7 +465,7 @@ arm1020e_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm1020e_proc_info,#object
 __arm1020e_proc_info:
@@ -479,7 +479,7 @@ __arm1020e_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm1020e_setup
+	initfn	__arm1020e_setup, __arm1020e_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP
diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S
index a126b7a59928..dbb2413fe04d 100644
--- a/arch/arm/mm/proc-arm1022.S
+++ b/arch/arm/mm/proc-arm1022.S
@@ -448,7 +448,7 @@ arm1022_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm1022_proc_info,#object
 __arm1022_proc_info:
@@ -462,7 +462,7 @@ __arm1022_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm1022_setup
+	initfn	__arm1022_setup, __arm1022_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP
diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S
index fc294067e977..0b37b2cef9d3 100644
--- a/arch/arm/mm/proc-arm1026.S
+++ b/arch/arm/mm/proc-arm1026.S
@@ -442,7 +442,7 @@ arm1026_crval:
 	string	cpu_arm1026_name, "ARM1026EJ-S"
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm1026_proc_info,#object
 __arm1026_proc_info:
@@ -456,7 +456,7 @@ __arm1026_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm1026_setup
+	initfn	__arm1026_setup, __arm1026_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA
diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S
index 2baa66b3ac9b..3651cd70e418 100644
--- a/arch/arm/mm/proc-arm720.S
+++ b/arch/arm/mm/proc-arm720.S
@@ -186,7 +186,7 @@ arm720_crval:
  * See <asm/procinfo.h> for a definition of this structure.
  */
 	
-		.section ".proc.info.init", #alloc, #execinstr
+		.section ".proc.info.init", #alloc
 
 .macro arm720_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cpu_flush:req
 		.type	__\name\()_proc_info,#object
@@ -203,7 +203,7 @@ __\name\()_proc_info:
 			PMD_BIT4 | \
 			PMD_SECT_AP_WRITE | \
 			PMD_SECT_AP_READ
-		b	\cpu_flush				@ cpu_flush
+		initfn	\cpu_flush, __\name\()_proc_info	@ cpu_flush
 		.long	cpu_arch_name				@ arch_name
 		.long	cpu_elf_name				@ elf_name
 		.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB	@ elf_hwcap
diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S
index ac1ea6b3bce4..024fb7732407 100644
--- a/arch/arm/mm/proc-arm740.S
+++ b/arch/arm/mm/proc-arm740.S
@@ -132,14 +132,14 @@ __arm740_setup:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 	.type	__arm740_proc_info,#object
 __arm740_proc_info:
 	.long	0x41807400
 	.long	0xfffffff0
 	.long	0
 	.long	0
-	b	__arm740_setup
+	initfn	__arm740_setup, __arm740_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_26BIT
diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S
index bf6ba4bc30ff..25472d94426d 100644
--- a/arch/arm/mm/proc-arm7tdmi.S
+++ b/arch/arm/mm/proc-arm7tdmi.S
@@ -76,7 +76,7 @@ __arm7tdmi_setup:
 
 		.align
 
-		.section ".proc.info.init", #alloc, #execinstr
+		.section ".proc.info.init", #alloc
 
 .macro arm7tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, \
 	extra_hwcaps=0
@@ -86,7 +86,7 @@ __\name\()_proc_info:
 		.long	\cpu_mask
 		.long	0
 		.long	0
-		b	__arm7tdmi_setup
+		initfn	__arm7tdmi_setup, __\name\()_proc_info
 		.long	cpu_arch_name
 		.long	cpu_elf_name
 		.long	HWCAP_SWP | HWCAP_26BIT | ( \extra_hwcaps )
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
index 22bf8dde4f84..7a14bd4414c9 100644
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -448,7 +448,7 @@ arm920_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm920_proc_info,#object
 __arm920_proc_info:
@@ -464,7 +464,7 @@ __arm920_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm920_setup
+	initfn	__arm920_setup, __arm920_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S
index 0c6d5ac5a6d4..edccfcdcd551 100644
--- a/arch/arm/mm/proc-arm922.S
+++ b/arch/arm/mm/proc-arm922.S
@@ -426,7 +426,7 @@ arm922_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm922_proc_info,#object
 __arm922_proc_info:
@@ -442,7 +442,7 @@ __arm922_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm922_setup
+	initfn	__arm922_setup, __arm922_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
index c32d073282ea..ede8c54ab4aa 100644
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S
@@ -494,7 +494,7 @@ arm925_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 .macro arm925_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache
 	.type	__\name\()_proc_info,#object
@@ -510,7 +510,7 @@ __\name\()_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm925_setup
+	initfn	__arm925_setup, __\name\()_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
index 252b2503038d..fb827c633693 100644
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -474,7 +474,7 @@ arm926_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm926_proc_info,#object
 __arm926_proc_info:
@@ -490,7 +490,7 @@ __arm926_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm926_setup
+	initfn	__arm926_setup, __arm926_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA
diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S
index e5212d489377..ee5b66f847c4 100644
--- a/arch/arm/mm/proc-arm940.S
+++ b/arch/arm/mm/proc-arm940.S
@@ -297,26 +297,16 @@ __arm940_setup:
 	mcr	p15, 0, r0, c6,	c0, 1
 
 	ldr	r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM
-	ldr	r1, =(CONFIG_DRAM_SIZE >> 12)	@ size of RAM (must be >= 4KB)
-	mov	r2, #10				@ 11 is the minimum (4KB)
-1:	add	r2, r2, #1			@ area size *= 2
-	mov	r1, r1, lsr #1
-	bne	1b				@ count not zero r-shift
-	orr	r0, r0, r2, lsl #1		@ the area register value
-	orr	r0, r0, #1			@ set enable bit
-	mcr	p15, 0, r0, c6,	c1, 0		@ set area 1, RAM
-	mcr	p15, 0, r0, c6,	c1, 1
+	ldr	r7, =CONFIG_DRAM_SIZE >> 12	@ size of RAM (must be >= 4KB)
+	pr_val	r3, r0, r7, #1
+	mcr	p15, 0, r3, c6,	c1, 0		@ set area 1, RAM
+	mcr	p15, 0, r3, c6,	c1, 1
 
 	ldr	r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH
-	ldr	r1, =(CONFIG_FLASH_SIZE >> 12)	@ size of FLASH (must be >= 4KB)
-	mov	r2, #10				@ 11 is the minimum (4KB)
-1:	add	r2, r2, #1			@ area size *= 2
-	mov	r1, r1, lsr #1
-	bne	1b				@ count not zero r-shift
-	orr	r0, r0, r2, lsl #1		@ the area register value
-	orr	r0, r0, #1			@ set enable bit
-	mcr	p15, 0, r0, c6,	c2, 0		@ set area 2, ROM/FLASH
-	mcr	p15, 0, r0, c6,	c2, 1
+	ldr	r7, =CONFIG_FLASH_SIZE		@ size of FLASH (must be >= 4KB)
+	pr_val	r3, r0, r6, #1
+	mcr	p15, 0, r3, c6,	c2, 0		@ set area 2, ROM/FLASH
+	mcr	p15, 0, r3, c6,	c2, 1
 
 	mov	r0, #0x06
 	mcr	p15, 0, r0, c2, c0, 0		@ Region 1&2 cacheable
@@ -354,14 +344,14 @@ __arm940_setup:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm940_proc_info,#object
 __arm940_proc_info:
 	.long	0x41009400
 	.long	0xff00fff0
 	.long	0
-	b	__arm940_setup
+	initfn	__arm940_setup, __arm940_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S
index b3dd9b2d0b8e..7361837edc31 100644
--- a/arch/arm/mm/proc-arm946.S
+++ b/arch/arm/mm/proc-arm946.S
@@ -343,24 +343,14 @@ __arm946_setup:
 	mcr	p15, 0, r0, c6,	c0, 0		@ set region 0, default
 
 	ldr	r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM
-	ldr	r1, =(CONFIG_DRAM_SIZE >> 12)	@ size of RAM (must be >= 4KB)
-	mov	r2, #10				@ 11 is the minimum (4KB)
-1:	add	r2, r2, #1			@ area size *= 2
-	mov	r1, r1, lsr #1
-	bne	1b				@ count not zero r-shift
-	orr	r0, r0, r2, lsl #1		@ the region register value
-	orr	r0, r0, #1			@ set enable bit
-	mcr	p15, 0, r0, c6,	c1, 0		@ set region 1, RAM
+	ldr	r7, =CONFIG_DRAM_SIZE		@ size of RAM (must be >= 4KB)
+	pr_val	r3, r0, r7, #1
+	mcr	p15, 0, r3, c6, c1, 0
 
 	ldr	r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH
-	ldr	r1, =(CONFIG_FLASH_SIZE >> 12)	@ size of FLASH (must be >= 4KB)
-	mov	r2, #10				@ 11 is the minimum (4KB)
-1:	add	r2, r2, #1			@ area size *= 2
-	mov	r1, r1, lsr #1
-	bne	1b				@ count not zero r-shift
-	orr	r0, r0, r2, lsl #1		@ the region register value
-	orr	r0, r0, #1			@ set enable bit
-	mcr	p15, 0, r0, c6,	c2, 0		@ set region 2, ROM/FLASH
+	ldr	r7, =CONFIG_FLASH_SIZE		@ size of FLASH (must be >= 4KB)
+	pr_val	r3, r0, r7, #1
+	mcr	p15, 0, r3, c6, c2, 0
 
 	mov	r0, #0x06
 	mcr	p15, 0, r0, c2, c0, 0		@ region 1,2 d-cacheable
@@ -409,14 +399,14 @@ __arm946_setup:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 	.type	__arm946_proc_info,#object
 __arm946_proc_info:
 	.long	0x41009460
 	.long	0xff00fff0
 	.long	0
 	.long	0
-	b	__arm946_setup
+	initfn	__arm946_setup, __arm946_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S
index 8227322bbb8f..7fac8c612134 100644
--- a/arch/arm/mm/proc-arm9tdmi.S
+++ b/arch/arm/mm/proc-arm9tdmi.S
@@ -70,7 +70,7 @@ __arm9tdmi_setup:
 
 		.align
 
-		.section ".proc.info.init", #alloc, #execinstr
+		.section ".proc.info.init", #alloc
 
 .macro arm9tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req
 		.type	__\name\()_proc_info, #object
@@ -79,7 +79,7 @@ __\name\()_proc_info:
 		.long	\cpu_mask
 		.long	0
 		.long	0
-		b	__arm9tdmi_setup
+		initfn	__arm9tdmi_setup, __\name\()_proc_info
 		.long	cpu_arch_name
 		.long	cpu_elf_name
 		.long	HWCAP_SWP | HWCAP_THUMB | HWCAP_26BIT
diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S
index c494886892ba..4001b73af4ee 100644
--- a/arch/arm/mm/proc-fa526.S
+++ b/arch/arm/mm/proc-fa526.S
@@ -190,7 +190,7 @@ fa526_cr1_set:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__fa526_proc_info,#object
 __fa526_proc_info:
@@ -206,7 +206,7 @@ __fa526_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__fa526_setup
+	initfn	__fa526_setup, __fa526_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index 03a1b75f2e16..e494d6d6acbe 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -584,7 +584,7 @@ feroceon_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 .macro feroceon_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache:req
 	.type	__\name\()_proc_info,#object
@@ -601,7 +601,8 @@ __\name\()_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__feroceon_setup
+	initfn	__feroceon_setup, __\name\()_proc_info
+	.long __feroceon_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index 082b9f2f7e90..c671f345266a 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -331,3 +331,31 @@ ENTRY(\name\()_tlb_fns)
 	.globl	\x
 	.equ	\x, \y
 .endm
+
+.macro	initfn, func, base
+	.long	\func - \base
+.endm
+
+	/*
+	 * Macro to calculate the log2 size for the protection region
+	 * registers. This calculates rd = log2(size) - 1.  tmp must
+	 * not be the same register as rd.
+	 */
+.macro	pr_sz, rd, size, tmp
+	mov	\tmp, \size, lsr #12
+	mov	\rd, #11
+1:	movs	\tmp, \tmp, lsr #1
+	addne	\rd, \rd, #1
+	bne	1b
+.endm
+
+	/*
+	 * Macro to generate a protection region register value
+	 * given a pre-masked address, size, and enable bit.
+	 * Corrupts size.
+	 */
+.macro	pr_val, dest, addr, size, enable
+	pr_sz	\dest, \size, \size		@ calculate log2(size) - 1
+	orr	\dest, \addr, \dest, lsl #1	@ mask in the region size
+	orr	\dest, \dest, \enable
+.endm
diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S
index 53d393455f13..d65edf717bf7 100644
--- a/arch/arm/mm/proc-mohawk.S
+++ b/arch/arm/mm/proc-mohawk.S
@@ -427,7 +427,7 @@ mohawk_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__88sv331x_proc_info,#object
 __88sv331x_proc_info:
@@ -443,7 +443,7 @@ __88sv331x_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__mohawk_setup
+	initfn	__mohawk_setup, __88sv331x_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S
index 8008a0461cf5..ee2ce496239f 100644
--- a/arch/arm/mm/proc-sa110.S
+++ b/arch/arm/mm/proc-sa110.S
@@ -199,7 +199,7 @@ sa110_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__sa110_proc_info,#object
 __sa110_proc_info:
@@ -213,7 +213,7 @@ __sa110_proc_info:
 	.long   PMD_TYPE_SECT | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__sa110_setup
+	initfn	__sa110_setup, __sa110_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT
diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S
index 89f97ac648a9..222d5836f666 100644
--- a/arch/arm/mm/proc-sa1100.S
+++ b/arch/arm/mm/proc-sa1100.S
@@ -242,7 +242,7 @@ sa1100_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 .macro sa1100_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req
 	.type	__\name\()_proc_info,#object
@@ -257,7 +257,7 @@ __\name\()_proc_info:
 	.long   PMD_TYPE_SECT | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__sa1100_setup
+	initfn	__sa1100_setup, __\name\()_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index d0390f4b3f18..06d890a2342b 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -264,7 +264,7 @@ v6_crval:
 	string	cpu_elf_name, "v6"
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	/*
 	 * Match any ARMv6 processor core.
@@ -287,7 +287,7 @@ __v6_proc_info:
 		PMD_SECT_XN | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__v6_setup
+	initfn	__v6_setup, __v6_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	/* See also feat_v6_fixup() for HWCAP_TLS */
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index ed448d8a596b..10405b8d31af 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -37,15 +37,18 @@
  *	It is assumed that:
  *	- we are not using split page tables
  */
-ENTRY(cpu_v7_switch_mm)
+ENTRY(cpu_ca8_switch_mm)
 #ifdef CONFIG_MMU
 	mov	r2, #0
-	mmid	r1, r1				@ get mm->context.id
-	ALT_SMP(orr	r0, r0, #TTB_FLAGS_SMP)
-	ALT_UP(orr	r0, r0, #TTB_FLAGS_UP)
 #ifdef CONFIG_ARM_ERRATA_430973
 	mcr	p15, 0, r2, c7, c5, 6		@ flush BTAC/BTB
 #endif
+#endif
+ENTRY(cpu_v7_switch_mm)
+#ifdef CONFIG_MMU
+	mmid	r1, r1				@ get mm->context.id
+	ALT_SMP(orr	r0, r0, #TTB_FLAGS_SMP)
+	ALT_UP(orr	r0, r0, #TTB_FLAGS_UP)
 #ifdef CONFIG_PID_IN_CONTEXTIDR
 	mrc	p15, 0, r2, c13, c0, 1		@ read current context ID
 	lsr	r2, r2, #8			@ extract the PID
@@ -61,6 +64,7 @@ ENTRY(cpu_v7_switch_mm)
 #endif
 	bx	lr
 ENDPROC(cpu_v7_switch_mm)
+ENDPROC(cpu_ca8_switch_mm)
 
 /*
  *	cpu_v7_set_pte_ext(ptep, pte)
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 8b4ee5e81c14..3d1054f11a8a 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -153,6 +153,21 @@ ENDPROC(cpu_v7_do_resume)
 #endif
 
 /*
+ * Cortex-A8
+ */
+	globl_equ	cpu_ca8_proc_init,	cpu_v7_proc_init
+	globl_equ	cpu_ca8_proc_fin,	cpu_v7_proc_fin
+	globl_equ	cpu_ca8_reset,		cpu_v7_reset
+	globl_equ	cpu_ca8_do_idle,	cpu_v7_do_idle
+	globl_equ	cpu_ca8_dcache_clean_area, cpu_v7_dcache_clean_area
+	globl_equ	cpu_ca8_set_pte_ext,	cpu_v7_set_pte_ext
+	globl_equ	cpu_ca8_suspend_size,	cpu_v7_suspend_size
+#ifdef CONFIG_ARM_CPU_SUSPEND
+	globl_equ	cpu_ca8_do_suspend,	cpu_v7_do_suspend
+	globl_equ	cpu_ca8_do_resume,	cpu_v7_do_resume
+#endif
+
+/*
  * Cortex-A9 processor functions
  */
 	globl_equ	cpu_ca9mp_proc_init,	cpu_v7_proc_init
@@ -451,7 +466,10 @@ __v7_setup_stack:
 
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 	define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
+#ifndef CONFIG_ARM_LPAE
+	define_processor_functions ca8, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
 	define_processor_functions ca9mp, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
+#endif
 #ifdef CONFIG_CPU_PJ4B
 	define_processor_functions pj4b, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
 #endif
@@ -462,19 +480,19 @@ __v7_setup_stack:
 	string	cpu_elf_name, "v7"
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	/*
 	 * Standard v7 proc info content
 	 */
-.macro __v7_proc initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions
+.macro __v7_proc name, initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions
 	ALT_SMP(.long	PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \
 			PMD_SECT_AF | PMD_FLAGS_SMP | \mm_mmuflags)
 	ALT_UP(.long	PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \
 			PMD_SECT_AF | PMD_FLAGS_UP | \mm_mmuflags)
 	.long	PMD_TYPE_SECT | PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ | PMD_SECT_AF | \io_mmuflags
-	W(b)	\initfunc
+	initfn	\initfunc, \name
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \
@@ -494,7 +512,7 @@ __v7_setup_stack:
 __v7_ca5mp_proc_info:
 	.long	0x410fc050
 	.long	0xff0ffff0
-	__v7_proc __v7_ca5mp_setup
+	__v7_proc __v7_ca5mp_proc_info, __v7_ca5mp_setup
 	.size	__v7_ca5mp_proc_info, . - __v7_ca5mp_proc_info
 
 	/*
@@ -504,9 +522,19 @@ __v7_ca5mp_proc_info:
 __v7_ca9mp_proc_info:
 	.long	0x410fc090
 	.long	0xff0ffff0
-	__v7_proc __v7_ca9mp_setup, proc_fns = ca9mp_processor_functions
+	__v7_proc __v7_ca9mp_proc_info, __v7_ca9mp_setup, proc_fns = ca9mp_processor_functions
 	.size	__v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info
 
+	/*
+	 * ARM Ltd. Cortex A8 processor.
+	 */
+	.type	__v7_ca8_proc_info, #object
+__v7_ca8_proc_info:
+	.long	0x410fc080
+	.long	0xff0ffff0
+	__v7_proc __v7_ca8_proc_info, __v7_setup, proc_fns = ca8_processor_functions
+	.size	__v7_ca8_proc_info, . - __v7_ca8_proc_info
+
 #endif	/* CONFIG_ARM_LPAE */
 
 	/*
@@ -517,7 +545,7 @@ __v7_ca9mp_proc_info:
 __v7_pj4b_proc_info:
 	.long	0x560f5800
 	.long	0xff0fff00
-	__v7_proc __v7_pj4b_setup, proc_fns = pj4b_processor_functions
+	__v7_proc __v7_pj4b_proc_info, __v7_pj4b_setup, proc_fns = pj4b_processor_functions
 	.size	__v7_pj4b_proc_info, . - __v7_pj4b_proc_info
 #endif
 
@@ -528,7 +556,7 @@ __v7_pj4b_proc_info:
 __v7_cr7mp_proc_info:
 	.long	0x410fc170
 	.long	0xff0ffff0
-	__v7_proc __v7_cr7mp_setup
+	__v7_proc __v7_cr7mp_proc_info, __v7_cr7mp_setup
 	.size	__v7_cr7mp_proc_info, . - __v7_cr7mp_proc_info
 
 	/*
@@ -538,7 +566,7 @@ __v7_cr7mp_proc_info:
 __v7_ca7mp_proc_info:
 	.long	0x410fc070
 	.long	0xff0ffff0
-	__v7_proc __v7_ca7mp_setup
+	__v7_proc __v7_ca7mp_proc_info, __v7_ca7mp_setup
 	.size	__v7_ca7mp_proc_info, . - __v7_ca7mp_proc_info
 
 	/*
@@ -548,7 +576,7 @@ __v7_ca7mp_proc_info:
 __v7_ca12mp_proc_info:
 	.long	0x410fc0d0
 	.long	0xff0ffff0
-	__v7_proc __v7_ca12mp_setup
+	__v7_proc __v7_ca12mp_proc_info, __v7_ca12mp_setup
 	.size	__v7_ca12mp_proc_info, . - __v7_ca12mp_proc_info
 
 	/*
@@ -558,7 +586,7 @@ __v7_ca12mp_proc_info:
 __v7_ca15mp_proc_info:
 	.long	0x410fc0f0
 	.long	0xff0ffff0
-	__v7_proc __v7_ca15mp_setup
+	__v7_proc __v7_ca15mp_proc_info, __v7_ca15mp_setup
 	.size	__v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info
 
 	/*
@@ -568,7 +596,7 @@ __v7_ca15mp_proc_info:
 __v7_b15mp_proc_info:
 	.long	0x420f00f0
 	.long	0xff0ffff0
-	__v7_proc __v7_b15mp_setup
+	__v7_proc __v7_b15mp_proc_info, __v7_b15mp_setup
 	.size	__v7_b15mp_proc_info, . - __v7_b15mp_proc_info
 
 	/*
@@ -578,7 +606,7 @@ __v7_b15mp_proc_info:
 __v7_ca17mp_proc_info:
 	.long	0x410fc0e0
 	.long	0xff0ffff0
-	__v7_proc __v7_ca17mp_setup
+	__v7_proc __v7_ca17mp_proc_info, __v7_ca17mp_setup
 	.size	__v7_ca17mp_proc_info, . - __v7_ca17mp_proc_info
 
 	/*
@@ -594,7 +622,7 @@ __krait_proc_info:
 	 * do support them. They also don't indicate support for fused multiply
 	 * instructions even though they actually do support them.
 	 */
-	__v7_proc __v7_setup, hwcaps = HWCAP_IDIV | HWCAP_VFPv4
+	__v7_proc __krait_proc_info, __v7_setup, hwcaps = HWCAP_IDIV | HWCAP_VFPv4
 	.size	__krait_proc_info, . - __krait_proc_info
 
 	/*
@@ -604,5 +632,5 @@ __krait_proc_info:
 __v7_proc_info:
 	.long	0x000f0000		@ Required ID value
 	.long	0x000f0000		@ Mask for ID
-	__v7_proc __v7_setup
+	__v7_proc __v7_proc_info, __v7_setup
 	.size	__v7_proc_info, . - __v7_proc_info
diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index d1e68b553d3b..e08e1f2bab76 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -135,7 +135,7 @@ __v7m_setup_stack_top:
 	string cpu_elf_name "v7m"
 	string cpu_v7m_name "ARMv7-M"
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	/*
 	 * Match any ARMv7-M processor core.
@@ -146,7 +146,7 @@ __v7m_proc_info:
 	.long	0x000f0000		@ Mask for ID
 	.long   0			@ proc_info_list.__cpu_mm_mmu_flags
 	.long   0			@ proc_info_list.__cpu_io_mmu_flags
-	b	__v7m_setup		@ proc_info_list.__cpu_flush
+	initfn	__v7m_setup, __v7m_proc_info	@ proc_info_list.__cpu_flush
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
index f8acdfece036..293dcc2c441f 100644
--- a/arch/arm/mm/proc-xsc3.S
+++ b/arch/arm/mm/proc-xsc3.S
@@ -499,7 +499,7 @@ xsc3_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 .macro xsc3_proc_info name:req, cpu_val:req, cpu_mask:req
 	.type	__\name\()_proc_info,#object
@@ -514,7 +514,7 @@ __\name\()_proc_info:
 	.long	PMD_TYPE_SECT | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__xsc3_setup
+	initfn	__xsc3_setup, __\name\()_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index afa2b3c4df4a..b6bbfdb6dfdc 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -612,7 +612,7 @@ xscale_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 .macro xscale_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache
 	.type	__\name\()_proc_info,#object
@@ -627,7 +627,7 @@ __\name\()_proc_info:
 	.long	PMD_TYPE_SECT | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__xscale_setup
+	initfn	__xscale_setup, __\name\()_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S
index 5d65be1f1e8a..71df43547659 100644
--- a/arch/arm/nwfpe/entry.S
+++ b/arch/arm/nwfpe/entry.S
@@ -113,7 +113,7 @@ next:
 	@ to fault.  Emit the appropriate exception gunk to fix things up.
 	@ ??? For some reason, faults can happen at .Lx2 even with a
 	@ plain LDR instruction.  Weird, but it seems harmless.
-	.pushsection .fixup,"ax"
+	.pushsection .text.fixup,"ax"
 	.align	2
 .Lfix:	ret	r9			@ let the user eat segfaults
 	.popsection
diff --git a/arch/arm/vdso/.gitignore b/arch/arm/vdso/.gitignore
new file mode 100644
index 000000000000..f8b69d84238e
--- /dev/null
+++ b/arch/arm/vdso/.gitignore
@@ -0,0 +1 @@
+vdso.lds
diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile
new file mode 100644
index 000000000000..bab0a8be7924
--- /dev/null
+++ b/arch/arm/vdso/Makefile
@@ -0,0 +1,74 @@
+hostprogs-y := vdsomunge
+
+obj-vdso := vgettimeofday.o datapage.o
+
+# Build rules
+targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.so.raw vdso.lds
+obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
+
+ccflags-y := -shared -fPIC -fno-common -fno-builtin -fno-stack-protector
+ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 -DDISABLE_BRANCH_PROFILING
+ccflags-y += -Wl,--no-undefined $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+
+obj-y += vdso.o
+extra-y += vdso.lds
+CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
+
+CFLAGS_REMOVE_vdso.o = -pg
+
+# Force -O2 to avoid libgcc dependencies
+CFLAGS_REMOVE_vgettimeofday.o = -pg -Os
+CFLAGS_vgettimeofday.o = -O2
+
+# Disable gcov profiling for VDSO code
+GCOV_PROFILE := n
+
+# Force dependency
+$(obj)/vdso.o : $(obj)/vdso.so
+
+# Link rule for the .so file
+$(obj)/vdso.so.raw: $(src)/vdso.lds $(obj-vdso) FORCE
+	$(call if_changed,vdsold)
+
+$(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/vdsomunge FORCE
+	$(call if_changed,vdsomunge)
+
+# Strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+	$(call if_changed,objcopy)
+
+# Actual build commands
+quiet_cmd_vdsold = VDSO    $@
+      cmd_vdsold = $(CC) $(c_flags) -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) \
+                   $(call cc-ldoption, -Wl$(comma)--build-id) \
+                   -Wl,-Bsymbolic -Wl,-z,max-page-size=4096 \
+                   -Wl,-z,common-page-size=4096 -o $@
+
+quiet_cmd_vdsomunge = MUNGE   $@
+      cmd_vdsomunge = $(objtree)/$(obj)/vdsomunge $< $@
+
+#
+# Install the unstripped copy of vdso.so.dbg.  If our toolchain
+# supports build-id, install .build-id links as well.
+#
+# Cribbed from arch/x86/vdso/Makefile.
+#
+quiet_cmd_vdso_install = INSTALL $<
+define cmd_vdso_install
+	cp $< "$(MODLIB)/vdso/vdso.so"; \
+	if readelf -n $< | grep -q 'Build ID'; then \
+	  buildid=`readelf -n $< |grep 'Build ID' |sed -e 's/^.*Build ID: \(.*\)$$/\1/'`; \
+	  first=`echo $$buildid | cut -b-2`; \
+	  last=`echo $$buildid | cut -b3-`; \
+	  mkdir -p "$(MODLIB)/vdso/.build-id/$$first"; \
+	  ln -sf "../../vdso.so" "$(MODLIB)/vdso/.build-id/$$first/$$last.debug"; \
+	fi
+endef
+
+$(MODLIB)/vdso: FORCE
+	@mkdir -p $(MODLIB)/vdso
+
+PHONY += vdso_install
+vdso_install: $(obj)/vdso.so.dbg $(MODLIB)/vdso FORCE
+	$(call cmd,vdso_install)
diff --git a/arch/arm/vdso/datapage.S b/arch/arm/vdso/datapage.S
new file mode 100644
index 000000000000..a2e60367931b
--- /dev/null
+++ b/arch/arm/vdso/datapage.S
@@ -0,0 +1,15 @@
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+
+	.align 2
+.L_vdso_data_ptr:
+	.long	_start - . - VDSO_DATA_SIZE
+
+ENTRY(__get_datapage)
+	.fnstart
+	adr	r0, .L_vdso_data_ptr
+	ldr	r1, [r0]
+	add	r0, r0, r1
+	bx	lr
+	.fnend
+ENDPROC(__get_datapage)
diff --git a/arch/arm/vdso/vdso.S b/arch/arm/vdso/vdso.S
new file mode 100644
index 000000000000..b2b97e3e7bab
--- /dev/null
+++ b/arch/arm/vdso/vdso.S
@@ -0,0 +1,35 @@
+/*
+ * Adapted from arm64 version.
+ *
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/const.h>
+#include <asm/page.h>
+
+	__PAGE_ALIGNED_DATA
+
+	.globl vdso_start, vdso_end
+	.balign PAGE_SIZE
+vdso_start:
+	.incbin "arch/arm/vdso/vdso.so"
+	.balign PAGE_SIZE
+vdso_end:
+
+	.previous
diff --git a/arch/arm/vdso/vdso.lds.S b/arch/arm/vdso/vdso.lds.S
new file mode 100644
index 000000000000..89ca89f12d23
--- /dev/null
+++ b/arch/arm/vdso/vdso.lds.S
@@ -0,0 +1,87 @@
+/*
+ * Adapted from arm64 version.
+ *
+ * GNU linker script for the VDSO library.
+ *
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ * Heavily based on the vDSO linker scripts for other archs.
+ */
+
+#include <linux/const.h>
+#include <asm/page.h>
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm")
+OUTPUT_ARCH(arm)
+
+SECTIONS
+{
+	PROVIDE(_start = .);
+
+	. = SIZEOF_HEADERS;
+
+	.hash		: { *(.hash) }			:text
+	.gnu.hash	: { *(.gnu.hash) }
+	.dynsym		: { *(.dynsym) }
+	.dynstr		: { *(.dynstr) }
+	.gnu.version	: { *(.gnu.version) }
+	.gnu.version_d	: { *(.gnu.version_d) }
+	.gnu.version_r	: { *(.gnu.version_r) }
+
+	.note		: { *(.note.*) }		:text	:note
+
+
+	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
+	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
+
+	.dynamic	: { *(.dynamic) }		:text	:dynamic
+
+	.rodata		: { *(.rodata*) }		:text
+
+	.text		: { *(.text*) }			:text	=0xe7f001f2
+
+	.got		: { *(.got) }
+	.rel.plt	: { *(.rel.plt) }
+
+	/DISCARD/	: {
+		*(.note.GNU-stack)
+		*(.data .data.* .gnu.linkonce.d.* .sdata*)
+		*(.bss .sbss .dynbss .dynsbss)
+	}
+}
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+	text		PT_LOAD		FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
+	dynamic		PT_DYNAMIC	FLAGS(4);		/* PF_R */
+	note		PT_NOTE		FLAGS(4);		/* PF_R */
+	eh_frame_hdr	PT_GNU_EH_FRAME;
+}
+
+VERSION
+{
+	LINUX_2.6 {
+	global:
+		__vdso_clock_gettime;
+		__vdso_gettimeofday;
+	local: *;
+	};
+}
diff --git a/arch/arm/vdso/vdsomunge.c b/arch/arm/vdso/vdsomunge.c
new file mode 100644
index 000000000000..9005b07296c8
--- /dev/null
+++ b/arch/arm/vdso/vdsomunge.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright 2015 Mentor Graphics Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * vdsomunge - Host program which produces a shared object
+ * architecturally specified to be usable by both soft- and hard-float
+ * programs.
+ *
+ * The Procedure Call Standard for the ARM Architecture (ARM IHI
+ * 0042E) says:
+ *
+ *	6.4.1 VFP and Base Standard Compatibility
+ *
+ *	Code compiled for the VFP calling standard is compatible with
+ *	the base standard (and vice-versa) if no floating-point or
+ *	containerized vector arguments or results are used.
+ *
+ * And ELF for the ARM Architecture (ARM IHI 0044E) (Table 4-2) says:
+ *
+ *	If both EF_ARM_ABI_FLOAT_XXXX bits are clear, conformance to the
+ *	base procedure-call standard is implied.
+ *
+ * The VDSO is built with -msoft-float, as with the rest of the ARM
+ * kernel, and uses no floating point arguments or results.  The build
+ * process will produce a shared object that may or may not have the
+ * EF_ARM_ABI_FLOAT_SOFT flag set (it seems to depend on the binutils
+ * version; binutils starting with 2.24 appears to set it).  The
+ * EF_ARM_ABI_FLOAT_HARD flag should definitely not be set, and this
+ * program will error out if it is.
+ *
+ * If the soft-float flag is set, this program clears it.  That's all
+ * it does.
+ */
+
+#define _GNU_SOURCE
+
+#include <byteswap.h>
+#include <elf.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define HOST_ORDER ELFDATA2LSB
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define HOST_ORDER ELFDATA2MSB
+#endif
+
+/* Some of the ELF constants we'd like to use were added to <elf.h>
+ * relatively recently.
+ */
+#ifndef EF_ARM_EABI_VER5
+#define EF_ARM_EABI_VER5 0x05000000
+#endif
+
+#ifndef EF_ARM_ABI_FLOAT_SOFT
+#define EF_ARM_ABI_FLOAT_SOFT 0x200
+#endif
+
+#ifndef EF_ARM_ABI_FLOAT_HARD
+#define EF_ARM_ABI_FLOAT_HARD 0x400
+#endif
+
+static const char *outfile;
+
+static void cleanup(void)
+{
+	if (error_message_count > 0 && outfile != NULL)
+		unlink(outfile);
+}
+
+static Elf32_Word read_elf_word(Elf32_Word word, bool swap)
+{
+	return swap ? bswap_32(word) : word;
+}
+
+static Elf32_Half read_elf_half(Elf32_Half half, bool swap)
+{
+	return swap ? bswap_16(half) : half;
+}
+
+static void write_elf_word(Elf32_Word val, Elf32_Word *dst, bool swap)
+{
+	*dst = swap ? bswap_32(val) : val;
+}
+
+int main(int argc, char **argv)
+{
+	const Elf32_Ehdr *inhdr;
+	bool clear_soft_float;
+	const char *infile;
+	Elf32_Word e_flags;
+	const void *inbuf;
+	struct stat stat;
+	void *outbuf;
+	bool swap;
+	int outfd;
+	int infd;
+
+	atexit(cleanup);
+
+	if (argc != 3)
+		error(EXIT_FAILURE, 0, "Usage: %s [infile] [outfile]", argv[0]);
+
+	infile = argv[1];
+	outfile = argv[2];
+
+	infd = open(infile, O_RDONLY);
+	if (infd < 0)
+		error(EXIT_FAILURE, errno, "Cannot open %s", infile);
+
+	if (fstat(infd, &stat) != 0)
+		error(EXIT_FAILURE, errno, "Failed stat for %s", infile);
+
+	inbuf = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, infd, 0);
+	if (inbuf == MAP_FAILED)
+		error(EXIT_FAILURE, errno, "Failed to map %s", infile);
+
+	close(infd);
+
+	inhdr = inbuf;
+
+	if (memcmp(&inhdr->e_ident, ELFMAG, SELFMAG) != 0)
+		error(EXIT_FAILURE, 0, "Not an ELF file");
+
+	if (inhdr->e_ident[EI_CLASS] != ELFCLASS32)
+		error(EXIT_FAILURE, 0, "Unsupported ELF class");
+
+	swap = inhdr->e_ident[EI_DATA] != HOST_ORDER;
+
+	if (read_elf_half(inhdr->e_type, swap) != ET_DYN)
+		error(EXIT_FAILURE, 0, "Not a shared object");
+
+	if (read_elf_half(inhdr->e_machine, swap) != EM_ARM) {
+		error(EXIT_FAILURE, 0, "Unsupported architecture %#x",
+		      inhdr->e_machine);
+	}
+
+	e_flags = read_elf_word(inhdr->e_flags, swap);
+
+	if (EF_ARM_EABI_VERSION(e_flags) != EF_ARM_EABI_VER5) {
+		error(EXIT_FAILURE, 0, "Unsupported EABI version %#x",
+		      EF_ARM_EABI_VERSION(e_flags));
+	}
+
+	if (e_flags & EF_ARM_ABI_FLOAT_HARD)
+		error(EXIT_FAILURE, 0,
+		      "Unexpected hard-float flag set in e_flags");
+
+	clear_soft_float = !!(e_flags & EF_ARM_ABI_FLOAT_SOFT);
+
+	outfd = open(outfile, O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
+	if (outfd < 0)
+		error(EXIT_FAILURE, errno, "Cannot open %s", outfile);
+
+	if (ftruncate(outfd, stat.st_size) != 0)
+		error(EXIT_FAILURE, errno, "Cannot truncate %s", outfile);
+
+	outbuf = mmap(NULL, stat.st_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+		      outfd, 0);
+	if (outbuf == MAP_FAILED)
+		error(EXIT_FAILURE, errno, "Failed to map %s", outfile);
+
+	close(outfd);
+
+	memcpy(outbuf, inbuf, stat.st_size);
+
+	if (clear_soft_float) {
+		Elf32_Ehdr *outhdr;
+
+		outhdr = outbuf;
+		e_flags &= ~EF_ARM_ABI_FLOAT_SOFT;
+		write_elf_word(e_flags, &outhdr->e_flags, swap);
+	}
+
+	if (msync(outbuf, stat.st_size, MS_SYNC) != 0)
+		error(EXIT_FAILURE, errno, "Failed to sync %s", outfile);
+
+	return EXIT_SUCCESS;
+}
diff --git a/arch/arm/vdso/vgettimeofday.c b/arch/arm/vdso/vgettimeofday.c
new file mode 100644
index 000000000000..79214d5ff097
--- /dev/null
+++ b/arch/arm/vdso/vgettimeofday.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright 2015 Mentor Graphics Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/hrtimer.h>
+#include <linux/time.h>
+#include <asm/arch_timer.h>
+#include <asm/barrier.h>
+#include <asm/bug.h>
+#include <asm/page.h>
+#include <asm/unistd.h>
+#include <asm/vdso_datapage.h>
+
+#ifndef CONFIG_AEABI
+#error This code depends on AEABI system call conventions
+#endif
+
+extern struct vdso_data *__get_datapage(void);
+
+static notrace u32 __vdso_read_begin(const struct vdso_data *vdata)
+{
+	u32 seq;
+repeat:
+	seq = ACCESS_ONCE(vdata->seq_count);
+	if (seq & 1) {
+		cpu_relax();
+		goto repeat;
+	}
+	return seq;
+}
+
+static notrace u32 vdso_read_begin(const struct vdso_data *vdata)
+{
+	u32 seq;
+
+	seq = __vdso_read_begin(vdata);
+
+	smp_rmb(); /* Pairs with smp_wmb in vdso_write_end */
+	return seq;
+}
+
+static notrace int vdso_read_retry(const struct vdso_data *vdata, u32 start)
+{
+	smp_rmb(); /* Pairs with smp_wmb in vdso_write_begin */
+	return vdata->seq_count != start;
+}
+
+static notrace long clock_gettime_fallback(clockid_t _clkid,
+					   struct timespec *_ts)
+{
+	register struct timespec *ts asm("r1") = _ts;
+	register clockid_t clkid asm("r0") = _clkid;
+	register long ret asm ("r0");
+	register long nr asm("r7") = __NR_clock_gettime;
+
+	asm volatile(
+	"	swi #0\n"
+	: "=r" (ret)
+	: "r" (clkid), "r" (ts), "r" (nr)
+	: "memory");
+
+	return ret;
+}
+
+static notrace int do_realtime_coarse(struct timespec *ts,
+				      struct vdso_data *vdata)
+{
+	u32 seq;
+
+	do {
+		seq = vdso_read_begin(vdata);
+
+		ts->tv_sec = vdata->xtime_coarse_sec;
+		ts->tv_nsec = vdata->xtime_coarse_nsec;
+
+	} while (vdso_read_retry(vdata, seq));
+
+	return 0;
+}
+
+static notrace int do_monotonic_coarse(struct timespec *ts,
+				       struct vdso_data *vdata)
+{
+	struct timespec tomono;
+	u32 seq;
+
+	do {
+		seq = vdso_read_begin(vdata);
+
+		ts->tv_sec = vdata->xtime_coarse_sec;
+		ts->tv_nsec = vdata->xtime_coarse_nsec;
+
+		tomono.tv_sec = vdata->wtm_clock_sec;
+		tomono.tv_nsec = vdata->wtm_clock_nsec;
+
+	} while (vdso_read_retry(vdata, seq));
+
+	ts->tv_sec += tomono.tv_sec;
+	timespec_add_ns(ts, tomono.tv_nsec);
+
+	return 0;
+}
+
+#ifdef CONFIG_ARM_ARCH_TIMER
+
+static notrace u64 get_ns(struct vdso_data *vdata)
+{
+	u64 cycle_delta;
+	u64 cycle_now;
+	u64 nsec;
+
+	cycle_now = arch_counter_get_cntvct();
+
+	cycle_delta = (cycle_now - vdata->cs_cycle_last) & vdata->cs_mask;
+
+	nsec = (cycle_delta * vdata->cs_mult) + vdata->xtime_clock_snsec;
+	nsec >>= vdata->cs_shift;
+
+	return nsec;
+}
+
+static notrace int do_realtime(struct timespec *ts, struct vdso_data *vdata)
+{
+	u64 nsecs;
+	u32 seq;
+
+	do {
+		seq = vdso_read_begin(vdata);
+
+		if (!vdata->tk_is_cntvct)
+			return -1;
+
+		ts->tv_sec = vdata->xtime_clock_sec;
+		nsecs = get_ns(vdata);
+
+	} while (vdso_read_retry(vdata, seq));
+
+	ts->tv_nsec = 0;
+	timespec_add_ns(ts, nsecs);
+
+	return 0;
+}
+
+static notrace int do_monotonic(struct timespec *ts, struct vdso_data *vdata)
+{
+	struct timespec tomono;
+	u64 nsecs;
+	u32 seq;
+
+	do {
+		seq = vdso_read_begin(vdata);
+
+		if (!vdata->tk_is_cntvct)
+			return -1;
+
+		ts->tv_sec = vdata->xtime_clock_sec;
+		nsecs = get_ns(vdata);
+
+		tomono.tv_sec = vdata->wtm_clock_sec;
+		tomono.tv_nsec = vdata->wtm_clock_nsec;
+
+	} while (vdso_read_retry(vdata, seq));
+
+	ts->tv_sec += tomono.tv_sec;
+	ts->tv_nsec = 0;
+	timespec_add_ns(ts, nsecs + tomono.tv_nsec);
+
+	return 0;
+}
+
+#else /* CONFIG_ARM_ARCH_TIMER */
+
+static notrace int do_realtime(struct timespec *ts, struct vdso_data *vdata)
+{
+	return -1;
+}
+
+static notrace int do_monotonic(struct timespec *ts, struct vdso_data *vdata)
+{
+	return -1;
+}
+
+#endif /* CONFIG_ARM_ARCH_TIMER */
+
+notrace int __vdso_clock_gettime(clockid_t clkid, struct timespec *ts)
+{
+	struct vdso_data *vdata;
+	int ret = -1;
+
+	vdata = __get_datapage();
+
+	switch (clkid) {
+	case CLOCK_REALTIME_COARSE:
+		ret = do_realtime_coarse(ts, vdata);
+		break;
+	case CLOCK_MONOTONIC_COARSE:
+		ret = do_monotonic_coarse(ts, vdata);
+		break;
+	case CLOCK_REALTIME:
+		ret = do_realtime(ts, vdata);
+		break;
+	case CLOCK_MONOTONIC:
+		ret = do_monotonic(ts, vdata);
+		break;
+	default:
+		break;
+	}
+
+	if (ret)
+		ret = clock_gettime_fallback(clkid, ts);
+
+	return ret;
+}
+
+static notrace long gettimeofday_fallback(struct timeval *_tv,
+					  struct timezone *_tz)
+{
+	register struct timezone *tz asm("r1") = _tz;
+	register struct timeval *tv asm("r0") = _tv;
+	register long ret asm ("r0");
+	register long nr asm("r7") = __NR_gettimeofday;
+
+	asm volatile(
+	"	swi #0\n"
+	: "=r" (ret)
+	: "r" (tv), "r" (tz), "r" (nr)
+	: "memory");
+
+	return ret;
+}
+
+notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+	struct timespec ts;
+	struct vdso_data *vdata;
+	int ret;
+
+	vdata = __get_datapage();
+
+	ret = do_realtime(&ts, vdata);
+	if (ret)
+		return gettimeofday_fallback(tv, tz);
+
+	if (tv) {
+		tv->tv_sec = ts.tv_sec;
+		tv->tv_usec = ts.tv_nsec / 1000;
+	}
+	if (tz) {
+		tz->tz_minuteswest = vdata->tz_minuteswest;
+		tz->tz_dsttime = vdata->tz_dsttime;
+	}
+
+	return ret;
+}
+
+/* Avoid unresolved references emitted by GCC */
+
+void __aeabi_unwind_cpp_pr0(void)
+{
+}
+
+void __aeabi_unwind_cpp_pr1(void)
+{
+}
+
+void __aeabi_unwind_cpp_pr2(void)
+{
+}