summary refs log tree commit diff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-12 08:51:56 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-12 08:51:56 -0800
commit42cf0f203e877cc7e502883d43b3f72149033d86 (patch)
tree3658297d62f28d7bfaa148099b08001aa9904229 /arch
parenta2f0bb03f7c499e3db72c70a62b1aa5c55d6a82b (diff)
parentdf9ab9771c64f5229843bfe2a20fe0ee6ac59fc1 (diff)
downloadlinux-42cf0f203e877cc7e502883d43b3f72149033d86.tar.gz
Merge branch 'for-linus' of git://ftp.arm.linux.org.uk/~rmk/linux-arm
Pull ARM updates from Russell King:

 - clang assembly fixes from Ard

 - optimisations and cleanups for Aurora L2 cache support

 - efficient L2 cache support for secure monitor API on Exynos SoCs

 - debug menu cleanup from Daniel Thompson to allow better behaviour for
   multiplatform kernels

 - StrongARM SA11x0 conversion to irq domains, and pxa_timer

 - kprobes updates for older ARM CPUs

 - move probes support out of arch/arm/kernel to arch/arm/probes

 - add inline asm support for the rbit (reverse bits) instruction

 - provide an ARM mode secondary CPU entry point (for Qualcomm CPUs)

 - remove the unused ARMv3 user access code

 - add driver_override support to AMBA Primecell bus

* 'for-linus' of git://ftp.arm.linux.org.uk/~rmk/linux-arm: (55 commits)
  ARM: 8256/1: driver coamba: add device binding path 'driver_override'
  ARM: 8301/1: qcom: Use secondary_startup_arm()
  ARM: 8302/1: Add a secondary_startup that assumes ARM mode
  ARM: 8300/1: teach __asmeq that r11 == fp and r12 == ip
  ARM: kprobes: Fix compilation error caused by superfluous '*'
  ARM: 8297/1: cache-l2x0: optimize aurora range operations
  ARM: 8296/1: cache-l2x0: clean up aurora cache handling
  ARM: 8284/1: sa1100: clear RCSR_SMR on resume
  ARM: 8283/1: sa1100: collie: clear PWER register on machine init
  ARM: 8282/1: sa1100: use handle_domain_irq
  ARM: 8281/1: sa1100: move GPIO-related IRQ code to gpio driver
  ARM: 8280/1: sa1100: switch to irq_domain_add_simple()
  ARM: 8279/1: sa1100: merge both GPIO irqdomains
  ARM: 8278/1: sa1100: split irq handling for low GPIOs
  ARM: 8291/1: replace magic number with PAGE_SHIFT macro in fixup_pv code
  ARM: 8290/1: decompressor: fix a wrong comment
  ARM: 8286/1: mm: Fix dma_contiguous_reserve comment
  ARM: 8248/1: pm: remove outdated comment
  ARM: 8274/1: Fix DEBUG_LL for multi-platform kernels (without PL01X)
  ARM: 8273/1: Seperate DEBUG_UART_PHYS from DEBUG_LL on EP93XX
  ...
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/Kconfig.debug101
-rw-r--r--arch/arm/Makefile1
-rw-r--r--arch/arm/boot/compressed/head.S2
-rw-r--r--arch/arm/boot/dts/exynos4210.dtsi9
-rw-r--r--arch/arm/boot/dts/exynos4x12.dtsi14
-rw-r--r--arch/arm/configs/iop32x_defconfig1
-rw-r--r--arch/arm/configs/iop33x_defconfig1
-rw-r--r--arch/arm/configs/ixp4xx_defconfig1
-rw-r--r--arch/arm/configs/lpc32xx_defconfig1
-rw-r--r--arch/arm/configs/mv78xx0_defconfig1
-rw-r--r--arch/arm/configs/orion5x_defconfig1
-rw-r--r--arch/arm/configs/rpc_defconfig1
-rw-r--r--arch/arm/include/asm/bitrev.h20
-rw-r--r--arch/arm/include/asm/compiler.h15
-rw-r--r--arch/arm/include/asm/insn.h (renamed from arch/arm/kernel/insn.h)0
-rw-r--r--arch/arm/include/asm/kprobes.h33
-rw-r--r--arch/arm/include/asm/outercache.h3
-rw-r--r--arch/arm/include/asm/patch.h (renamed from arch/arm/kernel/patch.h)0
-rw-r--r--arch/arm/include/asm/probes.h15
-rw-r--r--arch/arm/include/debug/ks8695.S (renamed from arch/arm/mach-ks8695/include/mach/debug-macro.S)10
-rw-r--r--arch/arm/include/debug/netx.S (renamed from arch/arm/mach-netx/include/mach/debug-macro.S)22
-rw-r--r--arch/arm/kernel/Makefile16
-rw-r--r--arch/arm/kernel/entry-armv.S3
-rw-r--r--arch/arm/kernel/ftrace.c3
-rw-r--r--arch/arm/kernel/head.S9
-rw-r--r--arch/arm/kernel/irq.c3
-rw-r--r--arch/arm/kernel/jump_label.c5
-rw-r--r--arch/arm/kernel/kgdb.c3
-rw-r--r--arch/arm/kernel/patch.c3
-rw-r--r--arch/arm/kernel/suspend.c4
-rw-r--r--arch/arm/lib/Makefile15
-rw-r--r--arch/arm/lib/uaccess.S564
-rw-r--r--arch/arm/mach-exynos/firmware.c50
-rw-r--r--arch/arm/mach-exynos/sleep.S46
-rw-r--r--arch/arm/mach-omap1/include/mach/debug-macro.S101
-rw-r--r--arch/arm/mach-omap2/board-generic.c6
-rw-r--r--arch/arm/mach-omap2/common.h8
-rw-r--r--arch/arm/mach-omap2/omap4-common.c16
-rw-r--r--arch/arm/mach-qcom/platsmp.c4
-rw-r--r--arch/arm/mach-sa1100/Makefile2
-rw-r--r--arch/arm/mach-sa1100/clock.c12
-rw-r--r--arch/arm/mach-sa1100/collie.c3
-rw-r--r--arch/arm/mach-sa1100/generic.c6
-rw-r--r--arch/arm/mach-sa1100/include/mach/irqs.h73
-rw-r--r--arch/arm/mach-sa1100/irq.c203
-rw-r--r--arch/arm/mach-sa1100/pm.c1
-rw-r--r--arch/arm/mach-sa1100/time.c139
-rw-r--r--arch/arm/mm/cache-l2x0.c439
-rw-r--r--arch/arm/mm/init.c5
-rw-r--r--arch/arm/probes/Makefile7
-rw-r--r--arch/arm/probes/decode-arm.c (renamed from arch/arm/kernel/probes-arm.c)18
-rw-r--r--arch/arm/probes/decode-arm.h (renamed from arch/arm/kernel/probes-arm.h)9
-rw-r--r--arch/arm/probes/decode-thumb.c (renamed from arch/arm/kernel/probes-thumb.c)16
-rw-r--r--arch/arm/probes/decode-thumb.h (renamed from arch/arm/kernel/probes-thumb.h)10
-rw-r--r--arch/arm/probes/decode.c (renamed from arch/arm/kernel/probes.c)81
-rw-r--r--arch/arm/probes/decode.h (renamed from arch/arm/kernel/probes.h)13
-rw-r--r--arch/arm/probes/kprobes/Makefile12
-rw-r--r--arch/arm/probes/kprobes/actions-arm.c (renamed from arch/arm/kernel/kprobes-arm.c)11
-rw-r--r--arch/arm/probes/kprobes/actions-common.c (renamed from arch/arm/kernel/kprobes-common.c)4
-rw-r--r--arch/arm/probes/kprobes/actions-thumb.c (renamed from arch/arm/kernel/kprobes-thumb.c)10
-rw-r--r--arch/arm/probes/kprobes/checkers-arm.c192
-rw-r--r--arch/arm/probes/kprobes/checkers-common.c101
-rw-r--r--arch/arm/probes/kprobes/checkers-thumb.c110
-rw-r--r--arch/arm/probes/kprobes/checkers.h55
-rw-r--r--arch/arm/probes/kprobes/core.c (renamed from arch/arm/kernel/kprobes.c)49
-rw-r--r--arch/arm/probes/kprobes/core.h (renamed from arch/arm/kernel/kprobes.h)12
-rw-r--r--arch/arm/probes/kprobes/opt-arm.c370
-rw-r--r--arch/arm/probes/kprobes/test-arm.c (renamed from arch/arm/kernel/kprobes-test-arm.c)40
-rw-r--r--arch/arm/probes/kprobes/test-core.c (renamed from arch/arm/kernel/kprobes-test.c)46
-rw-r--r--arch/arm/probes/kprobes/test-core.h (renamed from arch/arm/kernel/kprobes-test.h)35
-rw-r--r--arch/arm/probes/kprobes/test-thumb.c (renamed from arch/arm/kernel/kprobes-test-thumb.c)20
-rw-r--r--arch/arm/probes/uprobes/Makefile1
-rw-r--r--arch/arm/probes/uprobes/actions-arm.c (renamed from arch/arm/kernel/uprobes-arm.c)8
-rw-r--r--arch/arm/probes/uprobes/core.c (renamed from arch/arm/kernel/uprobes.c)8
-rw-r--r--arch/arm/probes/uprobes/core.h (renamed from arch/arm/kernel/uprobes.h)0
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/include/asm/bitrev.h19
-rw-r--r--arch/x86/kernel/kprobes/opt.c3
79 files changed, 1807 insertions, 1450 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index dcb2e0c55be4..0850fc0f9658 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -29,6 +29,7 @@ config ARM
 	select HANDLE_DOMAIN_IRQ
 	select HARDIRQS_SW_RESEND
 	select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT)
+	select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
 	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
@@ -60,6 +61,7 @@ config ARM
 	select HAVE_MEMBLOCK
 	select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND
 	select HAVE_OPROFILE if (HAVE_PERF_EVENTS)
+	select HAVE_OPTPROBES if !THUMB2_KERNEL
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 5ddd4906f7a7..a324ecdfeb21 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -397,6 +397,13 @@ choice
 		  Say Y here if you want the debug print routines to direct
 		  their output to UART1 serial port on KEYSTONE2 devices.
 
+	config DEBUG_KS8695_UART
+		bool "KS8695 Debug UART"
+		depends on ARCH_KS8695
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on KS8695.
+
 	config DEBUG_MESON_UARTAO
 		bool "Kernel low-level debugging via Meson6 UARTAO"
 		depends on ARCH_MESON
@@ -496,6 +503,13 @@ choice
 		  Say Y here if you want kernel low-level debugging support
 		  on Vybrid based platforms.
 
+	config DEBUG_NETX_UART
+		bool "Kernel low-level debugging messages via NetX UART"
+		depends on ARCH_NETX
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Hilscher NetX based platforms.
+
 	config DEBUG_NOMADIK_UART
 		bool "Kernel low-level debugging messages via NOMADIK UART"
 		depends on ARCH_NOMADIK
@@ -520,6 +534,30 @@ choice
 		  Say Y here if you want kernel low-level debugging support
 		  on TI-NSPIRE CX models.
 
+	config DEBUG_OMAP1UART1
+		bool "Kernel low-level debugging via OMAP1 UART1"
+		depends on ARCH_OMAP1
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on OMAP1 based platforms (except OMAP730) on the UART1.
+
+	config DEBUG_OMAP1UART2
+		bool "Kernel low-level debugging via OMAP1 UART2"
+		depends on ARCH_OMAP1
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on OMAP1 based platforms (except OMAP730) on the UART2.
+
+	config DEBUG_OMAP1UART3
+		bool "Kernel low-level debugging via OMAP1 UART3"
+		depends on ARCH_OMAP1
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on OMAP1 based platforms (except OMAP730) on the UART3.
+
 	config DEBUG_OMAP2UART1
 		bool "OMAP2/3/4 UART1 (omap2/3 sdp boards and some omap3 boards)"
 		depends on ARCH_OMAP2PLUS
@@ -562,6 +600,30 @@ choice
 		depends on ARCH_OMAP2PLUS
 		select DEBUG_OMAP2PLUS_UART
 
+	config DEBUG_OMAP7XXUART1
+		bool "Kernel low-level debugging via OMAP730 UART1"
+		depends on ARCH_OMAP730
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on OMAP730 based platforms on the UART1.
+
+	config DEBUG_OMAP7XXUART2
+		bool "Kernel low-level debugging via OMAP730 UART2"
+		depends on ARCH_OMAP730
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on OMAP730 based platforms on the UART2.
+
+	config DEBUG_OMAP7XXUART3
+		bool "Kernel low-level debugging via OMAP730 UART3"
+		depends on ARCH_OMAP730
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on OMAP730 based platforms on the UART3.
+
 	config DEBUG_TI81XXUART1
 		bool "Kernel low-level debugging messages via TI81XX UART1 (ti8148evm)"
 		depends on ARCH_OMAP2PLUS
@@ -1031,15 +1093,6 @@ choice
 		  This option selects UART0 on VIA/Wondermedia System-on-a-chip
 		  devices, including VT8500, WM8505, WM8650 and WM8850.
 
-	config DEBUG_LL_UART_NONE
-		bool "No low-level debugging UART"
-		depends on !ARCH_MULTIPLATFORM
-		help
-		  Say Y here if your platform doesn't provide a UART option
-		  above. This relies on your platform choosing the right UART
-		  definition internally in order for low-level debugging to
-		  work.
-
 	config DEBUG_ICEDCC
 		bool "Kernel low-level debugging via EmbeddedICE DCC channel"
 		help
@@ -1183,7 +1236,9 @@ config DEBUG_LL_INCLUDE
 				 DEBUG_IMX6Q_UART || \
 				 DEBUG_IMX6SL_UART || \
 				 DEBUG_IMX6SX_UART
+	default "debug/ks8695.S" if DEBUG_KS8695_UART
 	default "debug/msm.S" if DEBUG_MSM_UART || DEBUG_QCOM_UARTDM
+	default "debug/netx.S" if DEBUG_NETX_UART
 	default "debug/omap2plus.S" if DEBUG_OMAP2PLUS_UART
 	default "debug/renesas-scif.S" if DEBUG_R7S72100_SCIF2
 	default "debug/renesas-scif.S" if DEBUG_RCAR_GEN1_SCIF0
@@ -1208,12 +1263,7 @@ config DEBUG_LL_INCLUDE
 
 # Compatibility options for PL01x
 config DEBUG_UART_PL01X
-	def_bool ARCH_EP93XX || \
-		ARCH_INTEGRATOR || \
-		ARCH_SPEAR3XX || \
-		ARCH_SPEAR6XX || \
-		ARCH_SPEAR13XX || \
-		ARCH_VERSATILE
+	bool
 
 # Compatibility options for 8250
 config DEBUG_UART_8250
@@ -1229,6 +1279,7 @@ config DEBUG_UART_BCM63XX
 
 config DEBUG_UART_PHYS
 	hex "Physical base address of debug UART"
+	default 0x00100a00 if DEBUG_NETX_UART
 	default 0x01c20000 if DEBUG_DAVINCI_DMx_UART0
 	default 0x01c28000 if DEBUG_SUNXI_UART0
 	default 0x01c28400 if DEBUG_SUNXI_UART1
@@ -1269,7 +1320,6 @@ config DEBUG_UART_PHYS
 				DEBUG_S3C2410_UART2)
 	default 0x78000000 if DEBUG_CNS3XXX
 	default 0x7c0003f8 if FOOTBRIDGE
-	default 0x78000000 if DEBUG_CNS3XXX
 	default 0x80010000 if DEBUG_ASM9260_UART
 	default 0x80070000 if DEBUG_IMX23_UART
 	default 0x80074000 if DEBUG_IMX28_UART
@@ -1310,12 +1360,17 @@ config DEBUG_UART_PHYS
 	default 0xffe40000 if DEBUG_RCAR_GEN1_SCIF0
 	default 0xffe42000 if DEBUG_RCAR_GEN1_SCIF2
 	default 0xfff36000 if DEBUG_HIGHBANK_UART
+	default 0xfffb0000 if DEBUG_OMAP1UART1 || DEBUG_OMAP7XXUART1
+	default 0xfffb0800 if DEBUG_OMAP1UART2 || DEBUG_OMAP7XXUART2
+	default 0xfffb9800 if DEBUG_OMAP1UART3 || DEBUG_OMAP7XXUART3
 	default 0xfffe8600 if DEBUG_UART_BCM63XX
 	default 0xfffff700 if ARCH_IOP33X
-	depends on DEBUG_LL_UART_8250 || DEBUG_LL_UART_PL01X || \
+	depends on ARCH_EP93XX || \
+	        DEBUG_LL_UART_8250 || DEBUG_LL_UART_PL01X || \
 		DEBUG_LL_UART_EFM32 || \
 		DEBUG_UART_8250 || DEBUG_UART_PL01X || DEBUG_MESON_UARTAO || \
-		DEBUG_MSM_UART || DEBUG_QCOM_UARTDM || DEBUG_R7S72100_SCIF2 || \
+		DEBUG_MSM_UART || DEBUG_NETX_UART || \
+		DEBUG_QCOM_UARTDM || DEBUG_R7S72100_SCIF2 || \
 		DEBUG_RCAR_GEN1_SCIF0 || DEBUG_RCAR_GEN1_SCIF2 || \
 		DEBUG_RCAR_GEN2_SCIF0 || DEBUG_RCAR_GEN2_SCIF2 || \
 		DEBUG_RMOBILE_SCIFA0 || DEBUG_RMOBILE_SCIFA1 || \
@@ -1324,6 +1379,7 @@ config DEBUG_UART_PHYS
 
 config DEBUG_UART_VIRT
 	hex "Virtual base address of debug UART"
+	default 0xe0000a00 if DEBUG_NETX_UART
 	default 0xe0010fe0 if ARCH_RPC
 	default 0xe1000000 if DEBUG_MSM_UART
 	default 0xf0000be0 if ARCH_EBSA110
@@ -1392,18 +1448,23 @@ config DEBUG_UART_VIRT
 	default 0xfef00000 if ARCH_IXP4XX && !CPU_BIG_ENDIAN
 	default 0xfef00003 if ARCH_IXP4XX && CPU_BIG_ENDIAN
 	default 0xfef36000 if DEBUG_HIGHBANK_UART
+	default 0xfefb0000 if DEBUG_OMAP1UART1 || DEBUG_OMAP7XXUART1
+	default 0xfefb0800 if DEBUG_OMAP1UART2 || DEBUG_OMAP7XXUART2
+	default 0xfefb9800 if DEBUG_OMAP1UART3 || DEBUG_OMAP7XXUART3
 	default 0xfefff700 if ARCH_IOP33X
 	default 0xff003000 if DEBUG_U300_UART
 	default DEBUG_UART_PHYS if !MMU
 	depends on DEBUG_LL_UART_8250 || DEBUG_LL_UART_PL01X || \
 		DEBUG_UART_8250 || DEBUG_UART_PL01X || DEBUG_MESON_UARTAO || \
-		DEBUG_MSM_UART || DEBUG_QCOM_UARTDM || DEBUG_S3C24XX_UART || \
+		DEBUG_MSM_UART || DEBUG_NETX_UART || \
+		DEBUG_QCOM_UARTDM || DEBUG_S3C24XX_UART || \
 		DEBUG_UART_BCM63XX || DEBUG_ASM9260_UART
 
 config DEBUG_UART_8250_SHIFT
 	int "Register offset shift for the 8250 debug UART"
 	depends on DEBUG_LL_UART_8250 || DEBUG_UART_8250
-	default 0 if FOOTBRIDGE || ARCH_IOP32X || DEBUG_BCM_5301X
+	default 0 if FOOTBRIDGE || ARCH_IOP32X || DEBUG_BCM_5301X || \
+		DEBUG_OMAP7XXUART1 || DEBUG_OMAP7XXUART2 || DEBUG_OMAP7XXUART3
 	default 2
 
 config DEBUG_UART_8250_WORD
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index c1785eec2cf7..7f99cd652203 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -266,6 +266,7 @@ core-$(CONFIG_KVM_ARM_HOST) 	+= arch/arm/kvm/
 
 # If we have a machine-specific directory, then include it in the build.
 core-y				+= arch/arm/kernel/ arch/arm/mm/ arch/arm/common/
+core-y				+= arch/arm/probes/
 core-y				+= arch/arm/net/
 core-y				+= arch/arm/crypto/
 core-y				+= arch/arm/firmware/
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 132c70e2d2f1..c41a793b519c 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -178,7 +178,7 @@ not_angel:
 
 		/*
 		 * Set up a page table only if it won't overwrite ourself.
-		 * That means r4 < pc && r4 - 16k page directory > &_end.
+		 * That means r4 < pc || r4 - 16k page directory > &_end.
 		 * Given that r4 > &_end is most unfrequent, we add a rough
 		 * additional 1MB of room for a possible appended DTB.
 		 */
diff --git a/arch/arm/boot/dts/exynos4210.dtsi b/arch/arm/boot/dts/exynos4210.dtsi
index bcc9e63c8070..8e45ea44317e 100644
--- a/arch/arm/boot/dts/exynos4210.dtsi
+++ b/arch/arm/boot/dts/exynos4210.dtsi
@@ -81,6 +81,15 @@
 		reg = <0x10023CA0 0x20>;
 	};
 
+	l2c: l2-cache-controller@10502000 {
+		compatible = "arm,pl310-cache";
+		reg = <0x10502000 0x1000>;
+		cache-unified;
+		cache-level = <2>;
+		arm,tag-latency = <2 2 1>;
+		arm,data-latency = <2 2 1>;
+	};
+
 	gic: interrupt-controller@10490000 {
 		cpu-offset = <0x8000>;
 	};
diff --git a/arch/arm/boot/dts/exynos4x12.dtsi b/arch/arm/boot/dts/exynos4x12.dtsi
index 93b70402e943..8bc97c415c9a 100644
--- a/arch/arm/boot/dts/exynos4x12.dtsi
+++ b/arch/arm/boot/dts/exynos4x12.dtsi
@@ -54,6 +54,20 @@
 		reg = <0x10023CA0 0x20>;
 	};
 
+	l2c: l2-cache-controller@10502000 {
+		compatible = "arm,pl310-cache";
+		reg = <0x10502000 0x1000>;
+		cache-unified;
+		cache-level = <2>;
+		arm,tag-latency = <2 2 1>;
+		arm,data-latency = <3 2 1>;
+		arm,double-linefill = <1>;
+		arm,double-linefill-incr = <0>;
+		arm,double-linefill-wrap = <1>;
+		arm,prefetch-drop = <1>;
+		arm,prefetch-offset = <7>;
+	};
+
 	clock: clock-controller@10030000 {
 		compatible = "samsung,exynos4412-clock";
 		reg = <0x10030000 0x20000>;
diff --git a/arch/arm/configs/iop32x_defconfig b/arch/arm/configs/iop32x_defconfig
index 4f2ec3ac138e..c3058da631da 100644
--- a/arch/arm/configs/iop32x_defconfig
+++ b/arch/arm/configs/iop32x_defconfig
@@ -106,6 +106,7 @@ CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_USER=y
 CONFIG_DEBUG_LL=y
+CONFIG_DEBUG_LL_UART_8250=y
 CONFIG_KEYS=y
 CONFIG_KEYS_DEBUG_PROC_KEYS=y
 CONFIG_CRYPTO_NULL=y
diff --git a/arch/arm/configs/iop33x_defconfig b/arch/arm/configs/iop33x_defconfig
index aa36128abca2..713faeee8cf4 100644
--- a/arch/arm/configs/iop33x_defconfig
+++ b/arch/arm/configs/iop33x_defconfig
@@ -87,5 +87,6 @@ CONFIG_DEBUG_KERNEL=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_DEBUG_USER=y
 CONFIG_DEBUG_LL=y
+CONFIG_DEBUG_LL_UART_8250=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRC32 is not set
diff --git a/arch/arm/configs/ixp4xx_defconfig b/arch/arm/configs/ixp4xx_defconfig
index 1af665e847d1..24636cfdf6df 100644
--- a/arch/arm/configs/ixp4xx_defconfig
+++ b/arch/arm/configs/ixp4xx_defconfig
@@ -202,3 +202,4 @@ CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_ERRORS=y
 CONFIG_DEBUG_LL=y
+CONFIG_DEBUG_LL_UART_8250=y
diff --git a/arch/arm/configs/lpc32xx_defconfig b/arch/arm/configs/lpc32xx_defconfig
index 9f56ca3985ae..c100b7df5441 100644
--- a/arch/arm/configs/lpc32xx_defconfig
+++ b/arch/arm/configs/lpc32xx_defconfig
@@ -204,6 +204,7 @@ CONFIG_DEBUG_INFO=y
 # CONFIG_FTRACE is not set
 # CONFIG_ARM_UNWIND is not set
 CONFIG_DEBUG_LL=y
+CONFIG_DEBUG_LL_UART_8250=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_CRYPTO_ANSI_CPRNG=y
 # CONFIG_CRYPTO_HW is not set
diff --git a/arch/arm/configs/mv78xx0_defconfig b/arch/arm/configs/mv78xx0_defconfig
index 0dae1c1f007a..85d10d2e3d66 100644
--- a/arch/arm/configs/mv78xx0_defconfig
+++ b/arch/arm/configs/mv78xx0_defconfig
@@ -132,6 +132,7 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_DEBUG_USER=y
 CONFIG_DEBUG_ERRORS=y
 CONFIG_DEBUG_LL=y
+CONFIG_DEBUG_LL_UART_8250=y
 CONFIG_CRYPTO_CBC=m
 CONFIG_CRYPTO_ECB=m
 CONFIG_CRYPTO_PCBC=m
diff --git a/arch/arm/configs/orion5x_defconfig b/arch/arm/configs/orion5x_defconfig
index 952430d9e2d9..855143fac6bd 100644
--- a/arch/arm/configs/orion5x_defconfig
+++ b/arch/arm/configs/orion5x_defconfig
@@ -156,6 +156,7 @@ CONFIG_LATENCYTOP=y
 # CONFIG_FTRACE is not set
 CONFIG_DEBUG_USER=y
 CONFIG_DEBUG_LL=y
+CONFIG_DEBUG_LL_UART_8250=y
 CONFIG_CRYPTO_CBC=m
 CONFIG_CRYPTO_ECB=m
 CONFIG_CRYPTO_PCBC=m
diff --git a/arch/arm/configs/rpc_defconfig b/arch/arm/configs/rpc_defconfig
index 00515ef9782d..89631795a915 100644
--- a/arch/arm/configs/rpc_defconfig
+++ b/arch/arm/configs/rpc_defconfig
@@ -131,3 +131,4 @@ CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_USER=y
 CONFIG_DEBUG_ERRORS=y
 CONFIG_DEBUG_LL=y
+CONFIG_DEBUG_LL_UART_8250=y
diff --git a/arch/arm/include/asm/bitrev.h b/arch/arm/include/asm/bitrev.h
new file mode 100644
index 000000000000..ec291c350ea3
--- /dev/null
+++ b/arch/arm/include/asm/bitrev.h
@@ -0,0 +1,20 @@
+#ifndef __ASM_BITREV_H
+#define __ASM_BITREV_H
+
+static __always_inline __attribute_const__ u32 __arch_bitrev32(u32 x)
+{
+	__asm__ ("rbit %0, %1" : "=r" (x) : "r" (x));
+	return x;
+}
+
+static __always_inline __attribute_const__ u16 __arch_bitrev16(u16 x)
+{
+	return __arch_bitrev32((u32)x) >> 16;
+}
+
+static __always_inline __attribute_const__ u8 __arch_bitrev8(u8 x)
+{
+	return __arch_bitrev32((u32)x) >> 24;
+}
+
+#endif
diff --git a/arch/arm/include/asm/compiler.h b/arch/arm/include/asm/compiler.h
index 8155db2f7fa1..29fe85e59439 100644
--- a/arch/arm/include/asm/compiler.h
+++ b/arch/arm/include/asm/compiler.h
@@ -8,8 +8,21 @@
  * This string is meant to be concatenated with the inline asm string and
  * will cause compilation to stop on mismatch.
  * (for details, see gcc PR 15089)
+ * For compatibility with clang, we have to specifically take the equivalence
+ * of 'r11' <-> 'fp' and 'r12' <-> 'ip' into account as well.
  */
-#define __asmeq(x, y)  ".ifnc " x "," y " ; .err ; .endif\n\t"
+#define __asmeq(x, y)				\
+	".ifnc " x "," y "; "			\
+	  ".ifnc " x y ",fpr11; " 		\
+	    ".ifnc " x y ",r11fp; "		\
+	      ".ifnc " x y ",ipr12; " 		\
+	        ".ifnc " x y ",r12ip; "		\
+	          ".err; "			\
+	        ".endif; "			\
+	      ".endif; "			\
+	    ".endif; "				\
+	  ".endif; "				\
+	".endif\n\t"
 
 
 #endif /* __ASM_ARM_COMPILER_H */
diff --git a/arch/arm/kernel/insn.h b/arch/arm/include/asm/insn.h
index e96065da4dae..e96065da4dae 100644
--- a/arch/arm/kernel/insn.h
+++ b/arch/arm/include/asm/insn.h
diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h
index 49fa0dfaad33..3ea9be559726 100644
--- a/arch/arm/include/asm/kprobes.h
+++ b/arch/arm/include/asm/kprobes.h
@@ -22,7 +22,6 @@
 
 #define __ARCH_WANT_KPROBES_INSN_SLOT
 #define MAX_INSN_SIZE			2
-#define MAX_STACK_SIZE			64	/* 32 would probably be OK */
 
 #define flush_insn_slot(p)		do { } while (0)
 #define kretprobe_blacklist_size	0
@@ -51,5 +50,37 @@ int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr);
 int kprobe_exceptions_notify(struct notifier_block *self,
 			     unsigned long val, void *data);
 
+/* optinsn template addresses */
+extern __visible kprobe_opcode_t optprobe_template_entry;
+extern __visible kprobe_opcode_t optprobe_template_val;
+extern __visible kprobe_opcode_t optprobe_template_call;
+extern __visible kprobe_opcode_t optprobe_template_end;
+extern __visible kprobe_opcode_t optprobe_template_sub_sp;
+extern __visible kprobe_opcode_t optprobe_template_add_sp;
+extern __visible kprobe_opcode_t optprobe_template_restore_begin;
+extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn;
+extern __visible kprobe_opcode_t optprobe_template_restore_end;
+
+#define MAX_OPTIMIZED_LENGTH	4
+#define MAX_OPTINSN_SIZE				\
+	((unsigned long)&optprobe_template_end -	\
+	 (unsigned long)&optprobe_template_entry)
+#define RELATIVEJUMP_SIZE	4
+
+struct arch_optimized_insn {
+	/*
+	 * copy of the original instructions.
+	 * Different from x86, ARM kprobe_opcode_t is u32.
+	 */
+#define MAX_COPIED_INSN	DIV_ROUND_UP(RELATIVEJUMP_SIZE, sizeof(kprobe_opcode_t))
+	kprobe_opcode_t copied_insn[MAX_COPIED_INSN];
+	/* detour code buffer */
+	kprobe_opcode_t *insn;
+	/*
+	 * We always copy one instruction on ARM,
+	 * so size will always be 4, and unlike x86, there is no
+	 * need for a size field.
+	 */
+};
 
 #endif /* _ARM_KPROBES_H */
diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h
index 891a56b35bcf..563b92fc2f41 100644
--- a/arch/arm/include/asm/outercache.h
+++ b/arch/arm/include/asm/outercache.h
@@ -23,6 +23,8 @@
 
 #include <linux/types.h>
 
+struct l2x0_regs;
+
 struct outer_cache_fns {
 	void (*inv_range)(unsigned long, unsigned long);
 	void (*clean_range)(unsigned long, unsigned long);
@@ -36,6 +38,7 @@ struct outer_cache_fns {
 
 	/* This is an ARM L2C thing */
 	void (*write_sec)(unsigned long, unsigned);
+	void (*configure)(const struct l2x0_regs *);
 };
 
 extern struct outer_cache_fns outer_cache;
diff --git a/arch/arm/kernel/patch.h b/arch/arm/include/asm/patch.h
index 77e054c2f6cd..77e054c2f6cd 100644
--- a/arch/arm/kernel/patch.h
+++ b/arch/arm/include/asm/patch.h
diff --git a/arch/arm/include/asm/probes.h b/arch/arm/include/asm/probes.h
index 806cfe622a9e..1e5b9bb92270 100644
--- a/arch/arm/include/asm/probes.h
+++ b/arch/arm/include/asm/probes.h
@@ -19,6 +19,8 @@
 #ifndef _ASM_PROBES_H
 #define _ASM_PROBES_H
 
+#ifndef __ASSEMBLY__
+
 typedef u32 probes_opcode_t;
 
 struct arch_probes_insn;
@@ -38,6 +40,19 @@ struct arch_probes_insn {
 	probes_check_cc			*insn_check_cc;
 	probes_insn_singlestep_t	*insn_singlestep;
 	probes_insn_fn_t		*insn_fn;
+	int				stack_space;
+	unsigned long			register_usage_flags;
+	bool				kprobe_direct_exec;
 };
 
+#endif /* __ASSEMBLY__ */
+
+/*
+ * We assume one instruction can consume at most 64 bytes stack, which is
+ * 'push {r0-r15}'. Instructions consume more or unknown stack space like
+ * 'str r0, [sp, #-80]' and 'str r0, [sp, r1]' should be prohibit to probe.
+ * Both kprobe and jprobe use this macro.
+ */
+#define MAX_STACK_SIZE			64
+
 #endif
diff --git a/arch/arm/mach-ks8695/include/mach/debug-macro.S b/arch/arm/include/debug/ks8695.S
index a79e48981202..961da1f32ab3 100644
--- a/arch/arm/mach-ks8695/include/mach/debug-macro.S
+++ b/arch/arm/include/debug/ks8695.S
@@ -1,5 +1,5 @@
 /*
- * arch/arm/mach-ks8695/include/mach/debug-macro.S
+ * arch/arm/include/debug/ks8695.S
  *
  * Copyright (C) 2006 Ben Dooks <ben@simtec.co.uk>
  * Copyright (C) 2006 Simtec Electronics
@@ -11,8 +11,12 @@
  * published by the Free Software Foundation.
  */
 
-#include <mach/hardware.h>
-#include <mach/regs-uart.h>
+#define KS8695_UART_PA	0x03ffe000
+#define KS8695_UART_VA	0xf00fe000
+#define KS8695_URTH	(0x04)
+#define KS8695_URLS	(0x14)
+#define URLS_URTE	(1 << 6)
+#define URLS_URTHRE	(1 << 5)
 
 	.macro	addruart, rp, rv, tmp
 		ldr	\rp, =KS8695_UART_PA		@ physical base address
diff --git a/arch/arm/mach-netx/include/mach/debug-macro.S b/arch/arm/include/debug/netx.S
index 247781e096e2..81e1b2af70f7 100644
--- a/arch/arm/mach-netx/include/mach/debug-macro.S
+++ b/arch/arm/include/debug/netx.S
@@ -1,5 +1,4 @@
-/* arch/arm/mach-netx/include/mach/debug-macro.S
- *
+/*
  * Debugging macro include header
  *
  *  Copyright (C) 1994-1999 Russell King
@@ -11,26 +10,27 @@
  *
 */
 
-#include "hardware.h"
+#define UART_DATA 0
+#define UART_FLAG 0x18
+#define UART_FLAG_BUSY (1 << 3)
 
 		.macro	addruart, rp, rv, tmp
-		mov	\rp, #0x00000a00
-		orr	\rv, \rp, #io_p2v(0x00100000)	@ virtual
-		orr	\rp, \rp, #0x00100000		@ physical
+		ldr	\rp, =CONFIG_DEBUG_UART_PHYS
+		ldr	\rv, =CONFIG_DEBUG_UART_VIRT
 		.endm
 
 		.macro	senduart,rd,rx
-		str	\rd, [\rx, #0]
+		str	\rd, [\rx, #UART_DATA]
 		.endm
 
 		.macro	busyuart,rd,rx
-1002:		ldr	\rd, [\rx, #0x18]
-		tst	\rd, #(1 << 3)
+1002:		ldr	\rd, [\rx, #UART_FLAG]
+		tst	\rd, #UART_FLAG_BUSY
 		bne	1002b
 		.endm
 
 		.macro	waituart,rd,rx
-1001:		ldr	\rd, [\rx, #0x18]
-		tst	\rd, #(1 << 3)
+1001:		ldr	\rd, [\rx, #UART_FLAG]
+		tst	\rd, #UART_FLAG_BUSY
 		bne	1001b
 		.endm
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index fb2b71ebe3f2..902397dd1000 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -51,20 +51,8 @@ obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o insn.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o insn.o
 obj-$(CONFIG_JUMP_LABEL)	+= jump_label.o insn.o patch.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
-obj-$(CONFIG_UPROBES)		+= probes.o probes-arm.o uprobes.o uprobes-arm.o
-obj-$(CONFIG_KPROBES)		+= probes.o kprobes.o kprobes-common.o patch.o
-ifdef CONFIG_THUMB2_KERNEL
-obj-$(CONFIG_KPROBES)		+= kprobes-thumb.o probes-thumb.o
-else
-obj-$(CONFIG_KPROBES)		+= kprobes-arm.o probes-arm.o
-endif
-obj-$(CONFIG_ARM_KPROBES_TEST)	+= test-kprobes.o
-test-kprobes-objs		:= kprobes-test.o
-ifdef CONFIG_THUMB2_KERNEL
-test-kprobes-objs		+= kprobes-test-thumb.o
-else
-test-kprobes-objs		+= kprobes-test-arm.o
-endif
+# Main staffs in KPROBES are in arch/arm/probes/ .
+obj-$(CONFIG_KPROBES)		+= patch.o insn.o
 obj-$(CONFIG_OABI_COMPAT)	+= sys_oabi-compat.o
 obj-$(CONFIG_ARM_THUMBEE)	+= thumbee.o
 obj-$(CONFIG_KGDB)		+= kgdb.o patch.o
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 2f5555d307b3..672b21942fff 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -31,6 +31,7 @@
 
 #include "entry-header.S"
 #include <asm/entry-macro-multi.S>
+#include <asm/probes.h>
 
 /*
  * Interrupt handling.
@@ -249,7 +250,7 @@ __und_svc:
 	@ If a kprobe is about to simulate a "stmdb sp..." instruction,
 	@ it obviously needs free stack space which then will belong to
 	@ the saved context.
-	svc_entry 64
+	svc_entry MAX_STACK_SIZE
 #else
 	svc_entry
 #endif
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index b8c75e45a950..709ee1d6d4df 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -20,8 +20,7 @@
 #include <asm/cacheflush.h>
 #include <asm/opcodes.h>
 #include <asm/ftrace.h>
-
-#include "insn.h"
+#include <asm/insn.h>
 
 #ifdef CONFIG_THUMB2_KERNEL
 #define	NOP		0xf85deb04	/* pop.w {lr} */
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 664eee8c4a26..01963273c07a 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -346,6 +346,12 @@ __turn_mmu_on_loc:
 
 #if defined(CONFIG_SMP)
 	.text
+ENTRY(secondary_startup_arm)
+	.arm
+ THUMB(	adr	r9, BSYM(1f)	)	@ Kernel is entered in ARM.
+ THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
+ THUMB(	.thumb			)	@ switch to Thumb now.
+ THUMB(1:			)
 ENTRY(secondary_startup)
 	/*
 	 * Common entry point for secondary CPUs.
@@ -385,6 +391,7 @@ ENTRY(secondary_startup)
  THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
  THUMB(	ret	r12				)
 ENDPROC(secondary_startup)
+ENDPROC(secondary_startup_arm)
 
 	/*
 	 * r6  = &secondary_data
@@ -586,7 +593,7 @@ __fixup_pv_table:
 	add	r5, r5, r3	@ adjust table end address
 	add	r6, r6, r3	@ adjust __pv_phys_pfn_offset address
 	add	r7, r7, r3	@ adjust __pv_offset address
-	mov	r0, r8, lsr #12	@ convert to PFN
+	mov	r0, r8, lsr #PAGE_SHIFT	@ convert to PFN
 	str	r0, [r6]	@ save computed PHYS_OFFSET to __pv_phys_pfn_offset
 	strcc	ip, [r7, #HIGH_OFFSET]	@ save to __pv_offset high bits
 	mov	r6, r3, lsr #24	@ constant for add/sub instructions
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index ad857bada96c..350f188c92d2 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -109,7 +109,8 @@ void __init init_IRQ(void)
 
 	if (IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_CACHE_L2X0) &&
 	    (machine_desc->l2c_aux_mask || machine_desc->l2c_aux_val)) {
-		outer_cache.write_sec = machine_desc->l2c_write_sec;
+		if (!outer_cache.write_sec)
+			outer_cache.write_sec = machine_desc->l2c_write_sec;
 		ret = l2x0_of_init(machine_desc->l2c_aux_val,
 				   machine_desc->l2c_aux_mask);
 		if (ret)
diff --git a/arch/arm/kernel/jump_label.c b/arch/arm/kernel/jump_label.c
index afeeb9ea6f43..e39cbf488cfe 100644
--- a/arch/arm/kernel/jump_label.c
+++ b/arch/arm/kernel/jump_label.c
@@ -1,8 +1,7 @@
 #include <linux/kernel.h>
 #include <linux/jump_label.h>
-
-#include "insn.h"
-#include "patch.h"
+#include <asm/patch.h>
+#include <asm/insn.h>
 
 #ifdef HAVE_JUMP_LABEL
 
diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c
index 07db2f8a1b45..a6ad93c9bce3 100644
--- a/arch/arm/kernel/kgdb.c
+++ b/arch/arm/kernel/kgdb.c
@@ -14,10 +14,9 @@
 #include <linux/kgdb.h>
 #include <linux/uaccess.h>
 
+#include <asm/patch.h>
 #include <asm/traps.h>
 
-#include "patch.h"
-
 struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
 {
 	{ "r0", 4, offsetof(struct pt_regs, ARM_r0)},
diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c
index 5038960e3c55..69bda1a5707e 100644
--- a/arch/arm/kernel/patch.c
+++ b/arch/arm/kernel/patch.c
@@ -8,8 +8,7 @@
 #include <asm/fixmap.h>
 #include <asm/smp_plat.h>
 #include <asm/opcodes.h>
-
-#include "patch.h"
+#include <asm/patch.h>
 
 struct patch {
 	void *addr;
diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
index 2835d35234ca..9a2f882a0a2d 100644
--- a/arch/arm/kernel/suspend.c
+++ b/arch/arm/kernel/suspend.c
@@ -14,10 +14,6 @@ extern int __cpu_suspend(unsigned long, int (*)(unsigned long), u32 cpuid);
 extern void cpu_resume_mmu(void);
 
 #ifdef CONFIG_MMU
-/*
- * Hide the first two arguments to __cpu_suspend - these are an implementation
- * detail which platform code shouldn't have to know about.
- */
 int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 {
 	struct mm_struct *mm = current->active_mm;
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 0573faab96ad..d8a780799506 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -15,19 +15,8 @@ lib-y		:= backtrace.o changebit.o csumipv6.o csumpartial.o   \
 		   io-readsb.o io-writesb.o io-readsl.o io-writesl.o  \
 		   call_with_stack.o bswapsdi2.o
 
-mmu-y	:= clear_user.o copy_page.o getuser.o putuser.o
-
-# the code in uaccess.S is not preemption safe and
-# probably faster on ARMv3 only
-ifeq ($(CONFIG_PREEMPT),y)
-  mmu-y	+= copy_from_user.o copy_to_user.o
-else
-ifneq ($(CONFIG_CPU_32v3),y)
-  mmu-y	+= copy_from_user.o copy_to_user.o
-else
-  mmu-y	+= uaccess.o
-endif
-endif
+mmu-y		:= clear_user.o copy_page.o getuser.o putuser.o       \
+		   copy_from_user.o copy_to_user.o
 
 # using lib_ here won't override already available weak symbols
 obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o
diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S
deleted file mode 100644
index e50520904b76..000000000000
--- a/arch/arm/lib/uaccess.S
+++ /dev/null
@@ -1,564 +0,0 @@
-/*
- *  linux/arch/arm/lib/uaccess.S
- *
- *  Copyright (C) 1995, 1996,1997,1998 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  Routines to block copy data to/from user memory
- *   These are highly optimised both for the 4k page size
- *   and for various alignments.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-#include <asm/domain.h>
-
-		.text
-
-#define PAGE_SHIFT 12
-
-/* Prototype: int __copy_to_user(void *to, const char *from, size_t n)
- * Purpose  : copy a block to user memory from kernel memory
- * Params   : to   - user memory
- *          : from - kernel memory
- *          : n    - number of bytes to copy
- * Returns  : Number of bytes NOT copied.
- */
-
-.Lc2u_dest_not_aligned:
-		rsb	ip, ip, #4
-		cmp	ip, #2
-		ldrb	r3, [r1], #1
-USER(	TUSER(	strb)	r3, [r0], #1)			@ May fault
-		ldrgeb	r3, [r1], #1
-USER(	TUSER(	strgeb) r3, [r0], #1)			@ May fault
-		ldrgtb	r3, [r1], #1
-USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
-		sub	r2, r2, ip
-		b	.Lc2u_dest_aligned
-
-ENTRY(__copy_to_user)
-		stmfd	sp!, {r2, r4 - r7, lr}
-		cmp	r2, #4
-		blt	.Lc2u_not_enough
-		ands	ip, r0, #3
-		bne	.Lc2u_dest_not_aligned
-.Lc2u_dest_aligned:
-
-		ands	ip, r1, #3
-		bne	.Lc2u_src_not_aligned
-/*
- * Seeing as there has to be at least 8 bytes to copy, we can
- * copy one word, and force a user-mode page fault...
- */
-
-.Lc2u_0fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lc2u_0nowords
-		ldr	r3, [r1], #4
-USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
-		mov	ip, r0, lsl #32 - PAGE_SHIFT	@ On each page, use a ld/st??t instruction
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lc2u_0fupi
-/*
- * ip = max no. of bytes to copy before needing another "strt" insn
- */
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #32
-		blt	.Lc2u_0rem8lp
-
-.Lc2u_0cpy8lp:	ldmia	r1!, {r3 - r6}
-		stmia	r0!, {r3 - r6}			@ Shouldnt fault
-		ldmia	r1!, {r3 - r6}
-		subs	ip, ip, #32
-		stmia	r0!, {r3 - r6}			@ Shouldnt fault
-		bpl	.Lc2u_0cpy8lp
-
-.Lc2u_0rem8lp:	cmn	ip, #16
-		ldmgeia	r1!, {r3 - r6}
-		stmgeia	r0!, {r3 - r6}			@ Shouldnt fault
-		tst	ip, #8
-		ldmneia	r1!, {r3 - r4}
-		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
-		tst	ip, #4
-		ldrne	r3, [r1], #4
-	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
-		ands	ip, ip, #3
-		beq	.Lc2u_0fupi
-.Lc2u_0nowords:	teq	ip, #0
-		beq	.Lc2u_finished
-.Lc2u_nowords:	cmp	ip, #2
-		ldrb	r3, [r1], #1
-USER(	TUSER(	strb)	r3, [r0], #1)			@ May fault
-		ldrgeb	r3, [r1], #1
-USER(	TUSER(	strgeb) r3, [r0], #1)			@ May fault
-		ldrgtb	r3, [r1], #1
-USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
-		b	.Lc2u_finished
-
-.Lc2u_not_enough:
-		movs	ip, r2
-		bne	.Lc2u_nowords
-.Lc2u_finished:	mov	r0, #0
-		ldmfd	sp!, {r2, r4 - r7, pc}
-
-.Lc2u_src_not_aligned:
-		bic	r1, r1, #3
-		ldr	r7, [r1], #4
-		cmp	ip, #2
-		bgt	.Lc2u_3fupi
-		beq	.Lc2u_2fupi
-.Lc2u_1fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lc2u_1nowords
-		mov	r3, r7, lspull #8
-		ldr	r7, [r1], #4
-		orr	r3, r3, r7, lspush #24
-USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
-		mov	ip, r0, lsl #32 - PAGE_SHIFT
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lc2u_1fupi
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #16
-		blt	.Lc2u_1rem8lp
-
-.Lc2u_1cpy8lp:	mov	r3, r7, lspull #8
-		ldmia	r1!, {r4 - r7}
-		subs	ip, ip, #16
-		orr	r3, r3, r4, lspush #24
-		mov	r4, r4, lspull #8
-		orr	r4, r4, r5, lspush #24
-		mov	r5, r5, lspull #8
-		orr	r5, r5, r6, lspush #24
-		mov	r6, r6, lspull #8
-		orr	r6, r6, r7, lspush #24
-		stmia	r0!, {r3 - r6}			@ Shouldnt fault
-		bpl	.Lc2u_1cpy8lp
-
-.Lc2u_1rem8lp:	tst	ip, #8
-		movne	r3, r7, lspull #8
-		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, lspush #24
-		movne	r4, r4, lspull #8
-		orrne	r4, r4, r7, lspush #24
-		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
-		tst	ip, #4
-		movne	r3, r7, lspull #8
-		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, lspush #24
-	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
-		ands	ip, ip, #3
-		beq	.Lc2u_1fupi
-.Lc2u_1nowords:	mov	r3, r7, get_byte_1
-		teq	ip, #0
-		beq	.Lc2u_finished
-		cmp	ip, #2
-USER(	TUSER(	strb)	r3, [r0], #1)			@ May fault
-		movge	r3, r7, get_byte_2
-USER(	TUSER(	strgeb) r3, [r0], #1)			@ May fault
-		movgt	r3, r7, get_byte_3
-USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
-		b	.Lc2u_finished
-
-.Lc2u_2fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lc2u_2nowords
-		mov	r3, r7, lspull #16
-		ldr	r7, [r1], #4
-		orr	r3, r3, r7, lspush #16
-USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
-		mov	ip, r0, lsl #32 - PAGE_SHIFT
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lc2u_2fupi
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #16
-		blt	.Lc2u_2rem8lp
-
-.Lc2u_2cpy8lp:	mov	r3, r7, lspull #16
-		ldmia	r1!, {r4 - r7}
-		subs	ip, ip, #16
-		orr	r3, r3, r4, lspush #16
-		mov	r4, r4, lspull #16
-		orr	r4, r4, r5, lspush #16
-		mov	r5, r5, lspull #16
-		orr	r5, r5, r6, lspush #16
-		mov	r6, r6, lspull #16
-		orr	r6, r6, r7, lspush #16
-		stmia	r0!, {r3 - r6}			@ Shouldnt fault
-		bpl	.Lc2u_2cpy8lp
-
-.Lc2u_2rem8lp:	tst	ip, #8
-		movne	r3, r7, lspull #16
-		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, lspush #16
-		movne	r4, r4, lspull #16
-		orrne	r4, r4, r7, lspush #16
-		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
-		tst	ip, #4
-		movne	r3, r7, lspull #16
-		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, lspush #16
-	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
-		ands	ip, ip, #3
-		beq	.Lc2u_2fupi
-.Lc2u_2nowords:	mov	r3, r7, get_byte_2
-		teq	ip, #0
-		beq	.Lc2u_finished
-		cmp	ip, #2
-USER(	TUSER(	strb)	r3, [r0], #1)			@ May fault
-		movge	r3, r7, get_byte_3
-USER(	TUSER(	strgeb) r3, [r0], #1)			@ May fault
-		ldrgtb	r3, [r1], #0
-USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
-		b	.Lc2u_finished
-
-.Lc2u_3fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lc2u_3nowords
-		mov	r3, r7, lspull #24
-		ldr	r7, [r1], #4
-		orr	r3, r3, r7, lspush #8
-USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
-		mov	ip, r0, lsl #32 - PAGE_SHIFT
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lc2u_3fupi
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #16
-		blt	.Lc2u_3rem8lp
-
-.Lc2u_3cpy8lp:	mov	r3, r7, lspull #24
-		ldmia	r1!, {r4 - r7}
-		subs	ip, ip, #16
-		orr	r3, r3, r4, lspush #8
-		mov	r4, r4, lspull #24
-		orr	r4, r4, r5, lspush #8
-		mov	r5, r5, lspull #24
-		orr	r5, r5, r6, lspush #8
-		mov	r6, r6, lspull #24
-		orr	r6, r6, r7, lspush #8
-		stmia	r0!, {r3 - r6}			@ Shouldnt fault
-		bpl	.Lc2u_3cpy8lp
-
-.Lc2u_3rem8lp:	tst	ip, #8
-		movne	r3, r7, lspull #24
-		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, lspush #8
-		movne	r4, r4, lspull #24
-		orrne	r4, r4, r7, lspush #8
-		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
-		tst	ip, #4
-		movne	r3, r7, lspull #24
-		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, lspush #8
-	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
-		ands	ip, ip, #3
-		beq	.Lc2u_3fupi
-.Lc2u_3nowords:	mov	r3, r7, get_byte_3
-		teq	ip, #0
-		beq	.Lc2u_finished
-		cmp	ip, #2
-USER(	TUSER(	strb)	r3, [r0], #1)			@ May fault
-		ldrgeb	r3, [r1], #1
-USER(	TUSER(	strgeb) r3, [r0], #1)			@ May fault
-		ldrgtb	r3, [r1], #0
-USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
-		b	.Lc2u_finished
-ENDPROC(__copy_to_user)
-
-		.pushsection .fixup,"ax"
-		.align	0
-9001:		ldmfd	sp!, {r0, r4 - r7, pc}
-		.popsection
-
-/* Prototype: unsigned long __copy_from_user(void *to,const void *from,unsigned long n);
- * Purpose  : copy a block from user memory to kernel memory
- * Params   : to   - kernel memory
- *          : from - user memory
- *          : n    - number of bytes to copy
- * Returns  : Number of bytes NOT copied.
- */
-.Lcfu_dest_not_aligned:
-		rsb	ip, ip, #4
-		cmp	ip, #2
-USER(	TUSER(	ldrb)	r3, [r1], #1)			@ May fault
-		strb	r3, [r0], #1
-USER(	TUSER(	ldrgeb) r3, [r1], #1)			@ May fault
-		strgeb	r3, [r0], #1
-USER(	TUSER(	ldrgtb) r3, [r1], #1)			@ May fault
-		strgtb	r3, [r0], #1
-		sub	r2, r2, ip
-		b	.Lcfu_dest_aligned
-
-ENTRY(__copy_from_user)
-		stmfd	sp!, {r0, r2, r4 - r7, lr}
-		cmp	r2, #4
-		blt	.Lcfu_not_enough
-		ands	ip, r0, #3
-		bne	.Lcfu_dest_not_aligned
-.Lcfu_dest_aligned:
-		ands	ip, r1, #3
-		bne	.Lcfu_src_not_aligned
-
-/*
- * Seeing as there has to be at least 8 bytes to copy, we can
- * copy one word, and force a user-mode page fault...
- */
-
-.Lcfu_0fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lcfu_0nowords
-USER(	TUSER(	ldr)	r3, [r1], #4)
-		str	r3, [r0], #4
-		mov	ip, r1, lsl #32 - PAGE_SHIFT	@ On each page, use a ld/st??t instruction
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lcfu_0fupi
-/*
- * ip = max no. of bytes to copy before needing another "strt" insn
- */
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #32
-		blt	.Lcfu_0rem8lp
-
-.Lcfu_0cpy8lp:	ldmia	r1!, {r3 - r6}			@ Shouldnt fault
-		stmia	r0!, {r3 - r6}
-		ldmia	r1!, {r3 - r6}			@ Shouldnt fault
-		subs	ip, ip, #32
-		stmia	r0!, {r3 - r6}
-		bpl	.Lcfu_0cpy8lp
-
-.Lcfu_0rem8lp:	cmn	ip, #16
-		ldmgeia	r1!, {r3 - r6}			@ Shouldnt fault
-		stmgeia	r0!, {r3 - r6}
-		tst	ip, #8
-		ldmneia	r1!, {r3 - r4}			@ Shouldnt fault
-		stmneia	r0!, {r3 - r4}
-		tst	ip, #4
-	TUSER(	ldrne) r3, [r1], #4			@ Shouldnt fault
-		strne	r3, [r0], #4
-		ands	ip, ip, #3
-		beq	.Lcfu_0fupi
-.Lcfu_0nowords:	teq	ip, #0
-		beq	.Lcfu_finished
-.Lcfu_nowords:	cmp	ip, #2
-USER(	TUSER(	ldrb)	r3, [r1], #1)			@ May fault
-		strb	r3, [r0], #1
-USER(	TUSER(	ldrgeb) r3, [r1], #1)			@ May fault
-		strgeb	r3, [r0], #1
-USER(	TUSER(	ldrgtb) r3, [r1], #1)			@ May fault
-		strgtb	r3, [r0], #1
-		b	.Lcfu_finished
-
-.Lcfu_not_enough:
-		movs	ip, r2
-		bne	.Lcfu_nowords
-.Lcfu_finished:	mov	r0, #0
-		add	sp, sp, #8
-		ldmfd	sp!, {r4 - r7, pc}
-
-.Lcfu_src_not_aligned:
-		bic	r1, r1, #3
-USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		cmp	ip, #2
-		bgt	.Lcfu_3fupi
-		beq	.Lcfu_2fupi
-.Lcfu_1fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lcfu_1nowords
-		mov	r3, r7, lspull #8
-USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, lspush #24
-		str	r3, [r0], #4
-		mov	ip, r1, lsl #32 - PAGE_SHIFT
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lcfu_1fupi
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #16
-		blt	.Lcfu_1rem8lp
-
-.Lcfu_1cpy8lp:	mov	r3, r7, lspull #8
-		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
-		subs	ip, ip, #16
-		orr	r3, r3, r4, lspush #24
-		mov	r4, r4, lspull #8
-		orr	r4, r4, r5, lspush #24
-		mov	r5, r5, lspull #8
-		orr	r5, r5, r6, lspush #24
-		mov	r6, r6, lspull #8
-		orr	r6, r6, r7, lspush #24
-		stmia	r0!, {r3 - r6}
-		bpl	.Lcfu_1cpy8lp
-
-.Lcfu_1rem8lp:	tst	ip, #8
-		movne	r3, r7, lspull #8
-		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, lspush #24
-		movne	r4, r4, lspull #8
-		orrne	r4, r4, r7, lspush #24
-		stmneia	r0!, {r3 - r4}
-		tst	ip, #4
-		movne	r3, r7, lspull #8
-USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, lspush #24
-		strne	r3, [r0], #4
-		ands	ip, ip, #3
-		beq	.Lcfu_1fupi
-.Lcfu_1nowords:	mov	r3, r7, get_byte_1
-		teq	ip, #0
-		beq	.Lcfu_finished
-		cmp	ip, #2
-		strb	r3, [r0], #1
-		movge	r3, r7, get_byte_2
-		strgeb	r3, [r0], #1
-		movgt	r3, r7, get_byte_3
-		strgtb	r3, [r0], #1
-		b	.Lcfu_finished
-
-.Lcfu_2fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lcfu_2nowords
-		mov	r3, r7, lspull #16
-USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, lspush #16
-		str	r3, [r0], #4
-		mov	ip, r1, lsl #32 - PAGE_SHIFT
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lcfu_2fupi
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #16
-		blt	.Lcfu_2rem8lp
-
-
-.Lcfu_2cpy8lp:	mov	r3, r7, lspull #16
-		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
-		subs	ip, ip, #16
-		orr	r3, r3, r4, lspush #16
-		mov	r4, r4, lspull #16
-		orr	r4, r4, r5, lspush #16
-		mov	r5, r5, lspull #16
-		orr	r5, r5, r6, lspush #16
-		mov	r6, r6, lspull #16
-		orr	r6, r6, r7, lspush #16
-		stmia	r0!, {r3 - r6}
-		bpl	.Lcfu_2cpy8lp
-
-.Lcfu_2rem8lp:	tst	ip, #8
-		movne	r3, r7, lspull #16
-		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, lspush #16
-		movne	r4, r4, lspull #16
-		orrne	r4, r4, r7, lspush #16
-		stmneia	r0!, {r3 - r4}
-		tst	ip, #4
-		movne	r3, r7, lspull #16
-USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, lspush #16
-		strne	r3, [r0], #4
-		ands	ip, ip, #3
-		beq	.Lcfu_2fupi
-.Lcfu_2nowords:	mov	r3, r7, get_byte_2
-		teq	ip, #0
-		beq	.Lcfu_finished
-		cmp	ip, #2
-		strb	r3, [r0], #1
-		movge	r3, r7, get_byte_3
-		strgeb	r3, [r0], #1
-USER(	TUSER(	ldrgtb) r3, [r1], #0)			@ May fault
-		strgtb	r3, [r0], #1
-		b	.Lcfu_finished
-
-.Lcfu_3fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lcfu_3nowords
-		mov	r3, r7, lspull #24
-USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, lspush #8
-		str	r3, [r0], #4
-		mov	ip, r1, lsl #32 - PAGE_SHIFT
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lcfu_3fupi
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #16
-		blt	.Lcfu_3rem8lp
-
-.Lcfu_3cpy8lp:	mov	r3, r7, lspull #24
-		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
-		orr	r3, r3, r4, lspush #8
-		mov	r4, r4, lspull #24
-		orr	r4, r4, r5, lspush #8
-		mov	r5, r5, lspull #24
-		orr	r5, r5, r6, lspush #8
-		mov	r6, r6, lspull #24
-		orr	r6, r6, r7, lspush #8
-		stmia	r0!, {r3 - r6}
-		subs	ip, ip, #16
-		bpl	.Lcfu_3cpy8lp
-
-.Lcfu_3rem8lp:	tst	ip, #8
-		movne	r3, r7, lspull #24
-		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, lspush #8
-		movne	r4, r4, lspull #24
-		orrne	r4, r4, r7, lspush #8
-		stmneia	r0!, {r3 - r4}
-		tst	ip, #4
-		movne	r3, r7, lspull #24
-USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, lspush #8
-		strne	r3, [r0], #4
-		ands	ip, ip, #3
-		beq	.Lcfu_3fupi
-.Lcfu_3nowords:	mov	r3, r7, get_byte_3
-		teq	ip, #0
-		beq	.Lcfu_finished
-		cmp	ip, #2
-		strb	r3, [r0], #1
-USER(	TUSER(	ldrgeb) r3, [r1], #1)			@ May fault
-		strgeb	r3, [r0], #1
-USER(	TUSER(	ldrgtb) r3, [r1], #1)			@ May fault
-		strgtb	r3, [r0], #1
-		b	.Lcfu_finished
-ENDPROC(__copy_from_user)
-
-		.pushsection .fixup,"ax"
-		.align	0
-		/*
-		 * We took an exception.  r0 contains a pointer to
-		 * the byte not copied.
-		 */
-9001:		ldr	r2, [sp], #4			@ void *to
-		sub	r2, r0, r2			@ bytes copied
-		ldr	r1, [sp], #4			@ unsigned long count
-		subs	r4, r1, r2			@ bytes left to copy
-		movne	r1, r4
-		blne	__memzero
-		mov	r0, r4
-		ldmfd	sp!, {r4 - r7, pc}
-		.popsection
-
diff --git a/arch/arm/mach-exynos/firmware.c b/arch/arm/mach-exynos/firmware.c
index 766f57d2f029..4791a3cc00f9 100644
--- a/arch/arm/mach-exynos/firmware.c
+++ b/arch/arm/mach-exynos/firmware.c
@@ -17,6 +17,7 @@
 #include <asm/cacheflush.h>
 #include <asm/cputype.h>
 #include <asm/firmware.h>
+#include <asm/hardware/cache-l2x0.h>
 #include <asm/suspend.h>
 
 #include <mach/map.h>
@@ -136,6 +137,43 @@ static const struct firmware_ops exynos_firmware_ops = {
 	.resume			= IS_ENABLED(CONFIG_EXYNOS_CPU_SUSPEND) ? exynos_resume : NULL,
 };
 
+static void exynos_l2_write_sec(unsigned long val, unsigned reg)
+{
+	static int l2cache_enabled;
+
+	switch (reg) {
+	case L2X0_CTRL:
+		if (val & L2X0_CTRL_EN) {
+			/*
+			 * Before the cache can be enabled, due to firmware
+			 * design, SMC_CMD_L2X0INVALL must be called.
+			 */
+			if (!l2cache_enabled) {
+				exynos_smc(SMC_CMD_L2X0INVALL, 0, 0, 0);
+				l2cache_enabled = 1;
+			}
+		} else {
+			l2cache_enabled = 0;
+		}
+		exynos_smc(SMC_CMD_L2X0CTRL, val, 0, 0);
+		break;
+
+	case L2X0_DEBUG_CTRL:
+		exynos_smc(SMC_CMD_L2X0DEBUG, val, 0, 0);
+		break;
+
+	default:
+		WARN_ONCE(1, "%s: ignoring write to reg 0x%x\n", __func__, reg);
+	}
+}
+
+static void exynos_l2_configure(const struct l2x0_regs *regs)
+{
+	exynos_smc(SMC_CMD_L2X0SETUP1, regs->tag_latency, regs->data_latency,
+		   regs->prefetch_ctrl);
+	exynos_smc(SMC_CMD_L2X0SETUP2, regs->pwr_ctrl, regs->aux_ctrl, 0);
+}
+
 void __init exynos_firmware_init(void)
 {
 	struct device_node *nd;
@@ -155,4 +193,16 @@ void __init exynos_firmware_init(void)
 	pr_info("Running under secure firmware.\n");
 
 	register_firmware_ops(&exynos_firmware_ops);
+
+	/*
+	 * Exynos 4 SoCs (based on Cortex A9 and equipped with L2C-310),
+	 * running under secure firmware, require certain registers of L2
+	 * cache controller to be written in secure mode. Here .write_sec
+	 * callback is provided to perform necessary SMC calls.
+	 */
+	if (IS_ENABLED(CONFIG_CACHE_L2X0) &&
+	    read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) {
+		outer_cache.write_sec = exynos_l2_write_sec;
+		outer_cache.configure = exynos_l2_configure;
+	}
 }
diff --git a/arch/arm/mach-exynos/sleep.S b/arch/arm/mach-exynos/sleep.S
index e3c373082bbe..31d25834b9c4 100644
--- a/arch/arm/mach-exynos/sleep.S
+++ b/arch/arm/mach-exynos/sleep.S
@@ -16,6 +16,8 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/hardware/cache-l2x0.h>
 #include "smc.h"
 
 #define CPU_MASK	0xff0ffff0
@@ -74,6 +76,45 @@ ENTRY(exynos_cpu_resume_ns)
 	mov	r0, #SMC_CMD_C15RESUME
 	dsb
 	smc	#0
+#ifdef CONFIG_CACHE_L2X0
+	adr	r0, 1f
+	ldr	r2, [r0]
+	add	r0, r2, r0
+
+	/* Check that the address has been initialised. */
+	ldr	r1, [r0, #L2X0_R_PHY_BASE]
+	teq	r1, #0
+	beq	skip_l2x0
+
+	/* Check if controller has been enabled. */
+	ldr	r2, [r1, #L2X0_CTRL]
+	tst	r2, #0x1
+	bne	skip_l2x0
+
+	ldr	r1, [r0, #L2X0_R_TAG_LATENCY]
+	ldr	r2, [r0, #L2X0_R_DATA_LATENCY]
+	ldr	r3, [r0, #L2X0_R_PREFETCH_CTRL]
+	mov	r0, #SMC_CMD_L2X0SETUP1
+	smc	#0
+
+	/* Reload saved regs pointer because smc corrupts registers. */
+	adr	r0, 1f
+	ldr	r2, [r0]
+	add	r0, r2, r0
+
+	ldr	r1, [r0, #L2X0_R_PWR_CTRL]
+	ldr	r2, [r0, #L2X0_R_AUX_CTRL]
+	mov	r0, #SMC_CMD_L2X0SETUP2
+	smc	#0
+
+	mov	r0, #SMC_CMD_L2X0INVALL
+	smc	#0
+
+	mov	r1, #1
+	mov	r0, #SMC_CMD_L2X0CTRL
+	smc	#0
+skip_l2x0:
+#endif /* CONFIG_CACHE_L2X0 */
 skip_cp15:
 	b	cpu_resume
 ENDPROC(exynos_cpu_resume_ns)
@@ -83,3 +124,8 @@ cp15_save_diag:
 	.globl cp15_save_power
 cp15_save_power:
 	.long	0	@ cp15 power control
+
+#ifdef CONFIG_CACHE_L2X0
+	.align
+1:	.long	l2x0_saved_regs - .
+#endif /* CONFIG_CACHE_L2X0 */
diff --git a/arch/arm/mach-omap1/include/mach/debug-macro.S b/arch/arm/mach-omap1/include/mach/debug-macro.S
deleted file mode 100644
index 5c1a26c9f490..000000000000
--- a/arch/arm/mach-omap1/include/mach/debug-macro.S
+++ /dev/null
@@ -1,101 +0,0 @@
-/* arch/arm/mach-omap1/include/mach/debug-macro.S
- *
- * Debugging macro include header
- *
- *  Copyright (C) 1994-1999 Russell King
- *  Moved from linux/arch/arm/kernel/debug.S by Ben Dooks
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
-*/
-
-#include <linux/serial_reg.h>
-
-#include "serial.h"
-
-		.pushsection .data
-omap_uart_phys:	.word	0x0
-omap_uart_virt:	.word	0x0
-		.popsection
-
-		/*
-		 * Note that this code won't work if the bootloader passes
-		 * a wrong machine ID number in r1. To debug, just hardcode
-		 * the desired UART phys and virt addresses temporarily into
-		 * the omap_uart_phys and omap_uart_virt above.
-		 */
-		.macro	addruart, rp, rv, tmp
-
-		/* Use omap_uart_phys/virt if already configured */
-9:		adr	\rp, 99f		@ get effective addr of 99f
-		ldr	\rv, [\rp]		@ get absolute addr of 99f
-		sub	\rv, \rv, \rp		@ offset between the two
-		ldr	\rp, [\rp, #4]		@ abs addr of omap_uart_phys
-		sub	\tmp, \rp, \rv		@ make it effective
-		ldr	\rp, [\tmp, #0]		@ omap_uart_phys
-		ldr	\rv, [\tmp, #4]		@ omap_uart_virt
-		cmp	\rp, #0			@ is port configured?
-		cmpne	\rv, #0
-		bne	100f			@ already configured
-
-		/* Check the debug UART configuration set in uncompress.h */
-		and	\rp, pc, #0xff000000
-		ldr	\rv, =OMAP_UART_INFO_OFS
-		ldr	\rp, [\rp, \rv]
-
-		/* Select the UART to use based on the UART1 scratchpad value */
-10:		cmp	\rp, #0			@ no port configured?
-		beq	11f			@ if none, try to use UART1
-		cmp	\rp, #OMAP1UART1
-		beq	11f			@ configure OMAP1UART1
-		cmp	\rp, #OMAP1UART2
-		beq	12f			@ configure OMAP1UART2
-		cmp	\rp, #OMAP1UART3
-		beq	13f			@ configure OMAP2UART3
-
-		/* Configure the UART offset from the phys/virt base */
-11:		mov	\rp, #0x00fb0000	@ OMAP1UART1
-		b	98f
-12:		mov	\rp, #0x00fb0000	@ OMAP1UART1
-		orr	\rp, \rp, #0x00000800	@ OMAP1UART2
-		b	98f
-13:		mov	\rp, #0x00fb0000	@ OMAP1UART1
-		orr	\rp, \rp, #0x00000800	@ OMAP1UART2
-		orr	\rp, \rp, #0x00009000	@ OMAP1UART3
-
-		/* Store both phys and virt address for the uart */
-98:		add	\rp, \rp, #0xff000000	@ phys base
-		str	\rp, [\tmp, #0]		@ omap_uart_phys
-		sub	\rp, \rp, #0xff000000	@ phys base
-		add	\rp, \rp, #0xfe000000	@ virt base
-		str	\rp, [\tmp, #4]		@ omap_uart_virt
-		b	9b
-
-		.align
-99:		.word	.
-		.word	omap_uart_phys
-		.ltorg
-
-100:
-		.endm
-
-		.macro	senduart,rd,rx
-		strb	\rd, [\rx]
-		.endm
-
-		.macro	busyuart,rd,rx
-1001:		ldrb	\rd, [\rx, #(UART_LSR << OMAP_PORT_SHIFT)]
-		and	\rd, \rd, #(UART_LSR_TEMT | UART_LSR_THRE)
-		teq	\rd, #(UART_LSR_TEMT | UART_LSR_THRE)
-		beq	1002f
-		ldrb	\rd, [\rx, #(UART_LSR << OMAP7XX_PORT_SHIFT)]
-		and	\rd, \rd, #(UART_LSR_TEMT | UART_LSR_THRE)
-		teq	\rd, #(UART_LSR_TEMT | UART_LSR_THRE)
-		bne	1001b
-1002:
-		.endm
-
-		.macro	waituart,rd,rx
-		.endm
diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c
index b61c049f92d6..42b7f4c9169b 100644
--- a/arch/arm/mach-omap2/board-generic.c
+++ b/arch/arm/mach-omap2/board-generic.c
@@ -189,6 +189,9 @@ static const char *const omap4_boards_compat[] __initconst = {
 };
 
 DT_MACHINE_START(OMAP4_DT, "Generic OMAP4 (Flattened Device Tree)")
+	.l2c_aux_val	= OMAP_L2C_AUX_CTRL,
+	.l2c_aux_mask	= 0xcf9fffff,
+	.l2c_write_sec	= omap4_l2c310_write_sec,
 	.reserve	= omap_reserve,
 	.smp		= smp_ops(omap4_smp_ops),
 	.map_io		= omap4_map_io,
@@ -232,6 +235,9 @@ static const char *const am43_boards_compat[] __initconst = {
 };
 
 DT_MACHINE_START(AM43_DT, "Generic AM43 (Flattened Device Tree)")
+	.l2c_aux_val	= OMAP_L2C_AUX_CTRL,
+	.l2c_aux_mask	= 0xcf9fffff,
+	.l2c_write_sec	= omap4_l2c310_write_sec,
 	.map_io		= am33xx_map_io,
 	.init_early	= am43xx_init_early,
 	.init_late	= am43xx_init_late,
diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h
index 64e44d6d07c0..3933b8aa4f01 100644
--- a/arch/arm/mach-omap2/common.h
+++ b/arch/arm/mach-omap2/common.h
@@ -35,6 +35,7 @@
 #include <linux/irqchip/irq-omap-intc.h>
 
 #include <asm/proc-fns.h>
+#include <asm/hardware/cache-l2x0.h>
 
 #include "i2c.h"
 #include "serial.h"
@@ -94,11 +95,18 @@ extern void omap3_gptimer_timer_init(void);
 extern void omap4_local_timer_init(void);
 #ifdef CONFIG_CACHE_L2X0
 int omap_l2_cache_init(void);
+#define OMAP_L2C_AUX_CTRL	(L2C_AUX_CTRL_SHARED_OVERRIDE | \
+				 L310_AUX_CTRL_DATA_PREFETCH | \
+				 L310_AUX_CTRL_INSTR_PREFETCH)
+void omap4_l2c310_write_sec(unsigned long val, unsigned reg);
 #else
 static inline int omap_l2_cache_init(void)
 {
 	return 0;
 }
+
+#define OMAP_L2C_AUX_CTRL	0
+#define omap4_l2c310_write_sec	NULL
 #endif
 extern void omap5_realtime_timer_init(void);
 
diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c
index cc30e49a4cc2..2418bdf28ca2 100644
--- a/arch/arm/mach-omap2/omap4-common.c
+++ b/arch/arm/mach-omap2/omap4-common.c
@@ -166,7 +166,7 @@ void __iomem *omap4_get_l2cache_base(void)
 	return l2cache_base;
 }
 
-static void omap4_l2c310_write_sec(unsigned long val, unsigned reg)
+void omap4_l2c310_write_sec(unsigned long val, unsigned reg)
 {
 	unsigned smc_op;
 
@@ -201,24 +201,10 @@ static void omap4_l2c310_write_sec(unsigned long val, unsigned reg)
 
 int __init omap_l2_cache_init(void)
 {
-	u32 aux_ctrl;
-
 	/* Static mapping, never released */
 	l2cache_base = ioremap(OMAP44XX_L2CACHE_BASE, SZ_4K);
 	if (WARN_ON(!l2cache_base))
 		return -ENOMEM;
-
-	/* 16-way associativity, parity disabled, way size - 64KB (es2.0 +) */
-	aux_ctrl = L2C_AUX_CTRL_SHARED_OVERRIDE |
-		   L310_AUX_CTRL_DATA_PREFETCH |
-		   L310_AUX_CTRL_INSTR_PREFETCH;
-
-	outer_cache.write_sec = omap4_l2c310_write_sec;
-	if (of_have_populated_dt())
-		l2x0_of_init(aux_ctrl, 0xcf9fffff);
-	else
-		l2x0_init(l2cache_base, aux_ctrl, 0xcf9fffff);
-
 	return 0;
 }
 #endif
diff --git a/arch/arm/mach-qcom/platsmp.c b/arch/arm/mach-qcom/platsmp.c
index d6908569ecaf..09cffed4c0a4 100644
--- a/arch/arm/mach-qcom/platsmp.c
+++ b/arch/arm/mach-qcom/platsmp.c
@@ -44,7 +44,7 @@
 #define APCS_SAW2_VCTL		0x14
 #define APCS_SAW2_2_VCTL	0x1c
 
-extern void secondary_startup(void);
+extern void secondary_startup_arm(void);
 
 static DEFINE_SPINLOCK(boot_lock);
 
@@ -337,7 +337,7 @@ static void __init qcom_smp_prepare_cpus(unsigned int max_cpus)
 		flags |= cold_boot_flags[map];
 	}
 
-	if (scm_set_boot_addr(virt_to_phys(secondary_startup), flags)) {
+	if (scm_set_boot_addr(virt_to_phys(secondary_startup_arm), flags)) {
 		for_each_present_cpu(cpu) {
 			if (cpu == smp_processor_id())
 				continue;
diff --git a/arch/arm/mach-sa1100/Makefile b/arch/arm/mach-sa1100/Makefile
index f1114d11fe13..61ff91e76e0a 100644
--- a/arch/arm/mach-sa1100/Makefile
+++ b/arch/arm/mach-sa1100/Makefile
@@ -3,7 +3,7 @@
 #
 
 # Common support
-obj-y := clock.o generic.o irq.o time.o #nmi-oopser.o
+obj-y := clock.o generic.o irq.o #nmi-oopser.o
 
 # Specific board support
 obj-$(CONFIG_SA1100_ASSABET)		+= assabet.o
diff --git a/arch/arm/mach-sa1100/clock.c b/arch/arm/mach-sa1100/clock.c
index 03c75a811cb0..cbf53bb9c814 100644
--- a/arch/arm/mach-sa1100/clock.c
+++ b/arch/arm/mach-sa1100/clock.c
@@ -119,6 +119,17 @@ static DEFINE_CLK(gpio27, &clk_gpio27_ops);
 
 static DEFINE_CLK(cpu, &clk_cpu_ops);
 
+static unsigned long clk_36864_get_rate(struct clk *clk)
+{
+	return 3686400;
+}
+
+static struct clkops clk_36864_ops = {
+	.get_rate	= clk_36864_get_rate,
+};
+
+static DEFINE_CLK(36864, &clk_36864_ops);
+
 static struct clk_lookup sa11xx_clkregs[] = {
 	CLKDEV_INIT("sa1111.0", NULL, &clk_gpio27),
 	CLKDEV_INIT("sa1100-rtc", NULL, NULL),
@@ -126,6 +137,7 @@ static struct clk_lookup sa11xx_clkregs[] = {
 	CLKDEV_INIT("sa11x0-pcmcia", NULL, &clk_cpu),
 	/* sa1111 names devices using internal offsets, PCMCIA is at 0x1800 */
 	CLKDEV_INIT("1800", NULL, &clk_cpu),
+	CLKDEV_INIT(NULL, "OSTIMER0", &clk_36864),
 };
 
 static int __init sa11xx_clk_init(void)
diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c
index 7fcbe3d119c7..3cc2b71e16f0 100644
--- a/arch/arm/mach-sa1100/collie.c
+++ b/arch/arm/mach-sa1100/collie.c
@@ -371,8 +371,7 @@ static void __init collie_init(void)
 		PPC_LDD6 | PPC_LDD7 | PPC_L_PCLK | PPC_L_LCLK | PPC_L_FCLK | PPC_L_BIAS |
 		PPC_TXD1 | PPC_TXD2 | PPC_TXD3 | PPC_TXD4 | PPC_SCLK | PPC_SFRM;
 
-	PWER = _COLLIE_GPIO_AC_IN | _COLLIE_GPIO_CO | _COLLIE_GPIO_ON_KEY |
-		_COLLIE_GPIO_WAKEUP | _COLLIE_GPIO_nREMOCON_INT | PWER_RTC;
+	PWER = 0;
 
 	PGSR = _COLLIE_GPIO_nREMOCON_ON;
 
diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c
index d4ea142c4edd..40e0d8619a2d 100644
--- a/arch/arm/mach-sa1100/generic.c
+++ b/arch/arm/mach-sa1100/generic.c
@@ -33,6 +33,7 @@
 #include <mach/irqs.h>
 
 #include "generic.h"
+#include <clocksource/pxa.h>
 
 unsigned int reset_status;
 EXPORT_SYMBOL(reset_status);
@@ -369,6 +370,11 @@ void __init sa1100_map_io(void)
 	iotable_init(standard_io_desc, ARRAY_SIZE(standard_io_desc));
 }
 
+void __init sa1100_timer_init(void)
+{
+	pxa_timer_nodt_init(IRQ_OST0, io_p2v(0x90000000), 3686400);
+}
+
 /*
  * Disable the memory bus request/grant signals on the SA1110 to
  * ensure that we don't receive spurious memory requests.  We set
diff --git a/arch/arm/mach-sa1100/include/mach/irqs.h b/arch/arm/mach-sa1100/include/mach/irqs.h
index de0983494c7e..734e30e406a3 100644
--- a/arch/arm/mach-sa1100/include/mach/irqs.h
+++ b/arch/arm/mach-sa1100/include/mach/irqs.h
@@ -8,17 +8,17 @@
  * 2001/11/14	RMK	Cleaned up and standardised a lot of the IRQs.
  */
 
-#define	IRQ_GPIO0		1
-#define	IRQ_GPIO1		2
-#define	IRQ_GPIO2		3
-#define	IRQ_GPIO3		4
-#define	IRQ_GPIO4		5
-#define	IRQ_GPIO5		6
-#define	IRQ_GPIO6		7
-#define	IRQ_GPIO7		8
-#define	IRQ_GPIO8		9
-#define	IRQ_GPIO9		10
-#define	IRQ_GPIO10		11
+#define	IRQ_GPIO0_SC		1
+#define	IRQ_GPIO1_SC		2
+#define	IRQ_GPIO2_SC		3
+#define	IRQ_GPIO3_SC		4
+#define	IRQ_GPIO4_SC		5
+#define	IRQ_GPIO5_SC		6
+#define	IRQ_GPIO6_SC		7
+#define	IRQ_GPIO7_SC		8
+#define	IRQ_GPIO8_SC		9
+#define	IRQ_GPIO9_SC		10
+#define	IRQ_GPIO10_SC		11
 #define	IRQ_GPIO11_27		12
 #define	IRQ_LCD			13	/* LCD controller           */
 #define	IRQ_Ser0UDC		14	/* Ser. port 0 UDC          */
@@ -41,32 +41,43 @@
 #define	IRQ_RTC1Hz		31	/* RTC 1 Hz clock           */
 #define	IRQ_RTCAlrm		32	/* RTC Alarm                */
 
-#define	IRQ_GPIO11		33
-#define	IRQ_GPIO12		34
-#define	IRQ_GPIO13		35
-#define	IRQ_GPIO14		36
-#define	IRQ_GPIO15		37
-#define	IRQ_GPIO16		38
-#define	IRQ_GPIO17		39
-#define	IRQ_GPIO18		40
-#define	IRQ_GPIO19		41
-#define	IRQ_GPIO20		42
-#define	IRQ_GPIO21		43
-#define	IRQ_GPIO22		44
-#define	IRQ_GPIO23		45
-#define	IRQ_GPIO24		46
-#define	IRQ_GPIO25		47
-#define	IRQ_GPIO26		48
-#define	IRQ_GPIO27		49
+#define	IRQ_GPIO0		33
+#define	IRQ_GPIO1		34
+#define	IRQ_GPIO2		35
+#define	IRQ_GPIO3		36
+#define	IRQ_GPIO4		37
+#define	IRQ_GPIO5		38
+#define	IRQ_GPIO6		39
+#define	IRQ_GPIO7		40
+#define	IRQ_GPIO8		41
+#define	IRQ_GPIO9		42
+#define	IRQ_GPIO10		43
+#define	IRQ_GPIO11		44
+#define	IRQ_GPIO12		45
+#define	IRQ_GPIO13		46
+#define	IRQ_GPIO14		47
+#define	IRQ_GPIO15		48
+#define	IRQ_GPIO16		49
+#define	IRQ_GPIO17		50
+#define	IRQ_GPIO18		51
+#define	IRQ_GPIO19		52
+#define	IRQ_GPIO20		53
+#define	IRQ_GPIO21		54
+#define	IRQ_GPIO22		55
+#define	IRQ_GPIO23		56
+#define	IRQ_GPIO24		57
+#define	IRQ_GPIO25		58
+#define	IRQ_GPIO26		59
+#define	IRQ_GPIO27		60
 
 /*
  * The next 16 interrupts are for board specific purposes.  Since
  * the kernel can only run on one machine at a time, we can re-use
  * these.  If you need more, increase IRQ_BOARD_END, but keep it
- * within sensible limits.  IRQs 49 to 64 are available.
+ * within sensible limits.  IRQs 61 to 76 are available.
  */
-#define IRQ_BOARD_START		50
-#define IRQ_BOARD_END		66
+#define IRQ_BOARD_START		61
+#define IRQ_BOARD_END		77
 
 /*
  * Figure out the MAX IRQ number.
diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c
index 63e2901db416..65aebfa66fe5 100644
--- a/arch/arm/mach-sa1100/irq.c
+++ b/arch/arm/mach-sa1100/irq.c
@@ -80,170 +80,6 @@ static struct irq_domain_ops sa1100_normal_irqdomain_ops = {
 
 static struct irq_domain *sa1100_normal_irqdomain;
 
-/*
- * SA1100 GPIO edge detection for IRQs:
- * IRQs are generated on Falling-Edge, Rising-Edge, or both.
- * Use this instead of directly setting GRER/GFER.
- */
-static int GPIO_IRQ_rising_edge;
-static int GPIO_IRQ_falling_edge;
-static int GPIO_IRQ_mask = (1 << 11) - 1;
-
-static int sa1100_gpio_type(struct irq_data *d, unsigned int type)
-{
-	unsigned int mask;
-
-	mask = BIT(d->hwirq);
-
-	if (type == IRQ_TYPE_PROBE) {
-		if ((GPIO_IRQ_rising_edge | GPIO_IRQ_falling_edge) & mask)
-			return 0;
-		type = IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING;
-	}
-
-	if (type & IRQ_TYPE_EDGE_RISING) {
-		GPIO_IRQ_rising_edge |= mask;
-	} else
-		GPIO_IRQ_rising_edge &= ~mask;
-	if (type & IRQ_TYPE_EDGE_FALLING) {
-		GPIO_IRQ_falling_edge |= mask;
-	} else
-		GPIO_IRQ_falling_edge &= ~mask;
-
-	GRER = GPIO_IRQ_rising_edge & GPIO_IRQ_mask;
-	GFER = GPIO_IRQ_falling_edge & GPIO_IRQ_mask;
-
-	return 0;
-}
-
-/*
- * GPIO IRQs must be acknowledged.
- */
-static void sa1100_gpio_ack(struct irq_data *d)
-{
-	GEDR = BIT(d->hwirq);
-}
-
-static int sa1100_gpio_wake(struct irq_data *d, unsigned int on)
-{
-	if (on)
-		PWER |= BIT(d->hwirq);
-	else
-		PWER &= ~BIT(d->hwirq);
-	return 0;
-}
-
-/*
- * This is for IRQs from 0 to 10.
- */
-static struct irq_chip sa1100_low_gpio_chip = {
-	.name		= "GPIO-l",
-	.irq_ack	= sa1100_gpio_ack,
-	.irq_mask	= sa1100_mask_irq,
-	.irq_unmask	= sa1100_unmask_irq,
-	.irq_set_type	= sa1100_gpio_type,
-	.irq_set_wake	= sa1100_gpio_wake,
-};
-
-static int sa1100_low_gpio_irqdomain_map(struct irq_domain *d,
-		unsigned int irq, irq_hw_number_t hwirq)
-{
-	irq_set_chip_and_handler(irq, &sa1100_low_gpio_chip,
-				 handle_edge_irq);
-	set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
-
-	return 0;
-}
-
-static struct irq_domain_ops sa1100_low_gpio_irqdomain_ops = {
-	.map = sa1100_low_gpio_irqdomain_map,
-	.xlate = irq_domain_xlate_onetwocell,
-};
-
-static struct irq_domain *sa1100_low_gpio_irqdomain;
-
-/*
- * IRQ11 (GPIO11 through 27) handler.  We enter here with the
- * irq_controller_lock held, and IRQs disabled.  Decode the IRQ
- * and call the handler.
- */
-static void
-sa1100_high_gpio_handler(unsigned int irq, struct irq_desc *desc)
-{
-	unsigned int mask;
-
-	mask = GEDR & 0xfffff800;
-	do {
-		/*
-		 * clear down all currently active IRQ sources.
-		 * We will be processing them all.
-		 */
-		GEDR = mask;
-
-		irq = IRQ_GPIO11;
-		mask >>= 11;
-		do {
-			if (mask & 1)
-				generic_handle_irq(irq);
-			mask >>= 1;
-			irq++;
-		} while (mask);
-
-		mask = GEDR & 0xfffff800;
-	} while (mask);
-}
-
-/*
- * Like GPIO0 to 10, GPIO11-27 IRQs need to be handled specially.
- * In addition, the IRQs are all collected up into one bit in the
- * interrupt controller registers.
- */
-static void sa1100_high_gpio_mask(struct irq_data *d)
-{
-	unsigned int mask = BIT(d->hwirq);
-
-	GPIO_IRQ_mask &= ~mask;
-
-	GRER &= ~mask;
-	GFER &= ~mask;
-}
-
-static void sa1100_high_gpio_unmask(struct irq_data *d)
-{
-	unsigned int mask = BIT(d->hwirq);
-
-	GPIO_IRQ_mask |= mask;
-
-	GRER = GPIO_IRQ_rising_edge & GPIO_IRQ_mask;
-	GFER = GPIO_IRQ_falling_edge & GPIO_IRQ_mask;
-}
-
-static struct irq_chip sa1100_high_gpio_chip = {
-	.name		= "GPIO-h",
-	.irq_ack	= sa1100_gpio_ack,
-	.irq_mask	= sa1100_high_gpio_mask,
-	.irq_unmask	= sa1100_high_gpio_unmask,
-	.irq_set_type	= sa1100_gpio_type,
-	.irq_set_wake	= sa1100_gpio_wake,
-};
-
-static int sa1100_high_gpio_irqdomain_map(struct irq_domain *d,
-		unsigned int irq, irq_hw_number_t hwirq)
-{
-	irq_set_chip_and_handler(irq, &sa1100_high_gpio_chip,
-				 handle_edge_irq);
-	set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
-
-	return 0;
-}
-
-static struct irq_domain_ops sa1100_high_gpio_irqdomain_ops = {
-	.map = sa1100_high_gpio_irqdomain_map,
-	.xlate = irq_domain_xlate_onetwocell,
-};
-
-static struct irq_domain *sa1100_high_gpio_irqdomain;
-
 static struct resource irq_resource =
 	DEFINE_RES_MEM_NAMED(0x90050000, SZ_64K, "irqs");
 
@@ -270,17 +106,6 @@ static int sa1100irq_suspend(void)
 		  IC_GPIO6|IC_GPIO5|IC_GPIO4|IC_GPIO3|IC_GPIO2|
 		  IC_GPIO1|IC_GPIO0);
 
-	/*
-	 * Set the appropriate edges for wakeup.
-	 */
-	GRER = PWER & GPIO_IRQ_rising_edge;
-	GFER = PWER & GPIO_IRQ_falling_edge;
-	
-	/*
-	 * Clear any pending GPIO interrupts.
-	 */
-	GEDR = GEDR;
-
 	return 0;
 }
 
@@ -292,9 +117,6 @@ static void sa1100irq_resume(void)
 		ICCR = st->iccr;
 		ICLR = st->iclr;
 
-		GRER = GPIO_IRQ_rising_edge & GPIO_IRQ_mask;
-		GFER = GPIO_IRQ_falling_edge & GPIO_IRQ_mask;
-
 		ICMR = st->icmr;
 	}
 }
@@ -325,7 +147,8 @@ sa1100_handle_irq(struct pt_regs *regs)
 		if (mask == 0)
 			break;
 
-		handle_IRQ(ffs(mask) - 1 + IRQ_GPIO0, regs);
+		handle_domain_irq(sa1100_normal_irqdomain,
+				ffs(mask) - 1, regs);
 	} while (1);
 }
 
@@ -339,34 +162,16 @@ void __init sa1100_init_irq(void)
 	/* all IRQs are IRQ, not FIQ */
 	ICLR = 0;
 
-	/* clear all GPIO edge detects */
-	GFER = 0;
-	GRER = 0;
-	GEDR = -1;
-
 	/*
 	 * Whatever the doc says, this has to be set for the wait-on-irq
 	 * instruction to work... on a SA1100 rev 9 at least.
 	 */
 	ICCR = 1;
 
-	sa1100_low_gpio_irqdomain = irq_domain_add_legacy(NULL,
-			11, IRQ_GPIO0, 0,
-			&sa1100_low_gpio_irqdomain_ops, NULL);
-
-	sa1100_normal_irqdomain = irq_domain_add_legacy(NULL,
-			21, IRQ_GPIO11_27, 11,
+	sa1100_normal_irqdomain = irq_domain_add_simple(NULL,
+			32, IRQ_GPIO0_SC,
 			&sa1100_normal_irqdomain_ops, NULL);
 
-	sa1100_high_gpio_irqdomain = irq_domain_add_legacy(NULL,
-			17, IRQ_GPIO11, 11,
-			&sa1100_high_gpio_irqdomain_ops, NULL);
-
-	/*
-	 * Install handler for GPIO 11-27 edge detect interrupts
-	 */
-	irq_set_chained_handler(IRQ_GPIO11_27, sa1100_high_gpio_handler);
-
 	set_handle_irq(sa1100_handle_irq);
 
 	sa1100_init_gpio();
diff --git a/arch/arm/mach-sa1100/pm.c b/arch/arm/mach-sa1100/pm.c
index 6645d1e31f14..34853d5dfda2 100644
--- a/arch/arm/mach-sa1100/pm.c
+++ b/arch/arm/mach-sa1100/pm.c
@@ -81,6 +81,7 @@ static int sa11x0_pm_enter(suspend_state_t state)
 	/*
 	 * Ensure not to come back here if it wasn't intended
 	 */
+	RCSR = RCSR_SMR;
 	PSPR = 0;
 
 	/*
diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c
deleted file mode 100644
index 1dea6cfafb31..000000000000
--- a/arch/arm/mach-sa1100/time.c
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * linux/arch/arm/mach-sa1100/time.c
- *
- * Copyright (C) 1998 Deborah Wallach.
- * Twiddles  (C) 1999 Hugo Fiennes <hugo@empeg.com>
- *
- * 2000/03/29 (C) Nicolas Pitre <nico@fluxnic.net>
- *	Rewritten: big cleanup, much simpler, better HZ accuracy.
- *
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/timex.h>
-#include <linux/clockchips.h>
-#include <linux/sched_clock.h>
-
-#include <asm/mach/time.h>
-#include <mach/hardware.h>
-#include <mach/irqs.h>
-
-#define SA1100_CLOCK_FREQ 3686400
-#define SA1100_LATCH DIV_ROUND_CLOSEST(SA1100_CLOCK_FREQ, HZ)
-
-static u64 notrace sa1100_read_sched_clock(void)
-{
-	return readl_relaxed(OSCR);
-}
-
-#define MIN_OSCR_DELTA 2
-
-static irqreturn_t sa1100_ost0_interrupt(int irq, void *dev_id)
-{
-	struct clock_event_device *c = dev_id;
-
-	/* Disarm the compare/match, signal the event. */
-	writel_relaxed(readl_relaxed(OIER) & ~OIER_E0, OIER);
-	writel_relaxed(OSSR_M0, OSSR);
-	c->event_handler(c);
-
-	return IRQ_HANDLED;
-}
-
-static int
-sa1100_osmr0_set_next_event(unsigned long delta, struct clock_event_device *c)
-{
-	unsigned long next, oscr;
-
-	writel_relaxed(readl_relaxed(OIER) | OIER_E0, OIER);
-	next = readl_relaxed(OSCR) + delta;
-	writel_relaxed(next, OSMR0);
-	oscr = readl_relaxed(OSCR);
-
-	return (signed)(next - oscr) <= MIN_OSCR_DELTA ? -ETIME : 0;
-}
-
-static void
-sa1100_osmr0_set_mode(enum clock_event_mode mode, struct clock_event_device *c)
-{
-	switch (mode) {
-	case CLOCK_EVT_MODE_ONESHOT:
-	case CLOCK_EVT_MODE_UNUSED:
-	case CLOCK_EVT_MODE_SHUTDOWN:
-		writel_relaxed(readl_relaxed(OIER) & ~OIER_E0, OIER);
-		writel_relaxed(OSSR_M0, OSSR);
-		break;
-
-	case CLOCK_EVT_MODE_RESUME:
-	case CLOCK_EVT_MODE_PERIODIC:
-		break;
-	}
-}
-
-#ifdef CONFIG_PM
-unsigned long osmr[4], oier;
-
-static void sa1100_timer_suspend(struct clock_event_device *cedev)
-{
-	osmr[0] = readl_relaxed(OSMR0);
-	osmr[1] = readl_relaxed(OSMR1);
-	osmr[2] = readl_relaxed(OSMR2);
-	osmr[3] = readl_relaxed(OSMR3);
-	oier = readl_relaxed(OIER);
-}
-
-static void sa1100_timer_resume(struct clock_event_device *cedev)
-{
-	writel_relaxed(0x0f, OSSR);
-	writel_relaxed(osmr[0], OSMR0);
-	writel_relaxed(osmr[1], OSMR1);
-	writel_relaxed(osmr[2], OSMR2);
-	writel_relaxed(osmr[3], OSMR3);
-	writel_relaxed(oier, OIER);
-
-	/*
-	 * OSMR0 is the system timer: make sure OSCR is sufficiently behind
-	 */
-	writel_relaxed(OSMR0 - SA1100_LATCH, OSCR);
-}
-#else
-#define sa1100_timer_suspend NULL
-#define sa1100_timer_resume NULL
-#endif
-
-static struct clock_event_device ckevt_sa1100_osmr0 = {
-	.name		= "osmr0",
-	.features	= CLOCK_EVT_FEAT_ONESHOT,
-	.rating		= 200,
-	.set_next_event	= sa1100_osmr0_set_next_event,
-	.set_mode	= sa1100_osmr0_set_mode,
-	.suspend	= sa1100_timer_suspend,
-	.resume		= sa1100_timer_resume,
-};
-
-static struct irqaction sa1100_timer_irq = {
-	.name		= "ost0",
-	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
-	.handler	= sa1100_ost0_interrupt,
-	.dev_id		= &ckevt_sa1100_osmr0,
-};
-
-void __init sa1100_timer_init(void)
-{
-	writel_relaxed(0, OIER);
-	writel_relaxed(OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3, OSSR);
-
-	sched_clock_register(sa1100_read_sched_clock, 32, 3686400);
-
-	ckevt_sa1100_osmr0.cpumask = cpumask_of(0);
-
-	setup_irq(IRQ_OST0, &sa1100_timer_irq);
-
-	clocksource_mmio_init(OSCR, "oscr", SA1100_CLOCK_FREQ, 200, 32,
-		clocksource_mmio_readl_up);
-	clockevents_config_and_register(&ckevt_sa1100_osmr0, 3686400,
-					MIN_OSCR_DELTA * 2, 0x7fffffff);
-}
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index c7fc009ad21c..c6c7696b8db9 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -41,12 +41,14 @@ struct l2c_init_data {
 	void (*enable)(void __iomem *, u32, unsigned);
 	void (*fixup)(void __iomem *, u32, struct outer_cache_fns *);
 	void (*save)(void __iomem *);
+	void (*configure)(void __iomem *);
 	struct outer_cache_fns outer_cache;
 };
 
 #define CACHE_LINE_SIZE		32
 
 static void __iomem *l2x0_base;
+static const struct l2c_init_data *l2x0_data;
 static DEFINE_RAW_SPINLOCK(l2x0_lock);
 static u32 l2x0_way_mask;	/* Bitmask of active ways */
 static u32 l2x0_size;
@@ -106,6 +108,19 @@ static inline void l2c_unlock(void __iomem *base, unsigned num)
 	}
 }
 
+static void l2c_configure(void __iomem *base)
+{
+	if (outer_cache.configure) {
+		outer_cache.configure(&l2x0_saved_regs);
+		return;
+	}
+
+	if (l2x0_data->configure)
+		l2x0_data->configure(base);
+
+	l2c_write_sec(l2x0_saved_regs.aux_ctrl, base, L2X0_AUX_CTRL);
+}
+
 /*
  * Enable the L2 cache controller.  This function must only be
  * called when the cache controller is known to be disabled.
@@ -114,7 +129,12 @@ static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock)
 {
 	unsigned long flags;
 
-	l2c_write_sec(aux, base, L2X0_AUX_CTRL);
+	/* Do not touch the controller if already enabled. */
+	if (readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)
+		return;
+
+	l2x0_saved_regs.aux_ctrl = aux;
+	l2c_configure(base);
 
 	l2c_unlock(base, num_lock);
 
@@ -136,76 +156,14 @@ static void l2c_disable(void)
 	dsb(st);
 }
 
-#ifdef CONFIG_CACHE_PL310
-static inline void cache_wait(void __iomem *reg, unsigned long mask)
-{
-	/* cache operations by line are atomic on PL310 */
-}
-#else
-#define cache_wait	l2c_wait_mask
-#endif
-
-static inline void cache_sync(void)
-{
-	void __iomem *base = l2x0_base;
-
-	writel_relaxed(0, base + sync_reg_offset);
-	cache_wait(base + L2X0_CACHE_SYNC, 1);
-}
-
-#if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915)
-static inline void debug_writel(unsigned long val)
-{
-	l2c_set_debug(l2x0_base, val);
-}
-#else
-/* Optimised out for non-errata case */
-static inline void debug_writel(unsigned long val)
-{
-}
-#endif
-
-static void l2x0_cache_sync(void)
-{
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&l2x0_lock, flags);
-	cache_sync();
-	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
-}
-
-static void __l2x0_flush_all(void)
-{
-	debug_writel(0x03);
-	__l2c_op_way(l2x0_base + L2X0_CLEAN_INV_WAY);
-	cache_sync();
-	debug_writel(0x00);
-}
-
-static void l2x0_flush_all(void)
-{
-	unsigned long flags;
-
-	/* clean all ways */
-	raw_spin_lock_irqsave(&l2x0_lock, flags);
-	__l2x0_flush_all();
-	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
-}
-
-static void l2x0_disable(void)
+static void l2c_save(void __iomem *base)
 {
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&l2x0_lock, flags);
-	__l2x0_flush_all();
-	l2c_write_sec(0, l2x0_base, L2X0_CTRL);
-	dsb(st);
-	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+	l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
 }
 
-static void l2c_save(void __iomem *base)
+static void l2c_resume(void)
 {
-	l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
+	l2c_enable(l2x0_base, l2x0_saved_regs.aux_ctrl, l2x0_data->num_lock);
 }
 
 /*
@@ -288,14 +246,6 @@ static void l2c210_sync(void)
 	__l2c210_cache_sync(l2x0_base);
 }
 
-static void l2c210_resume(void)
-{
-	void __iomem *base = l2x0_base;
-
-	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN))
-		l2c_enable(base, l2x0_saved_regs.aux_ctrl, 1);
-}
-
 static const struct l2c_init_data l2c210_data __initconst = {
 	.type = "L2C-210",
 	.way_size_0 = SZ_8K,
@@ -309,7 +259,7 @@ static const struct l2c_init_data l2c210_data __initconst = {
 		.flush_all = l2c210_flush_all,
 		.disable = l2c_disable,
 		.sync = l2c210_sync,
-		.resume = l2c210_resume,
+		.resume = l2c_resume,
 	},
 };
 
@@ -466,7 +416,7 @@ static const struct l2c_init_data l2c220_data = {
 		.flush_all = l2c220_flush_all,
 		.disable = l2c_disable,
 		.sync = l2c220_sync,
-		.resume = l2c210_resume,
+		.resume = l2c_resume,
 	},
 };
 
@@ -615,39 +565,29 @@ static void __init l2c310_save(void __iomem *base)
 							L310_POWER_CTRL);
 }
 
-static void l2c310_resume(void)
+static void l2c310_configure(void __iomem *base)
 {
-	void __iomem *base = l2x0_base;
+	unsigned revision;
 
-	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
-		unsigned revision;
-
-		/* restore pl310 setup */
-		writel_relaxed(l2x0_saved_regs.tag_latency,
-			       base + L310_TAG_LATENCY_CTRL);
-		writel_relaxed(l2x0_saved_regs.data_latency,
-			       base + L310_DATA_LATENCY_CTRL);
-		writel_relaxed(l2x0_saved_regs.filter_end,
-			       base + L310_ADDR_FILTER_END);
-		writel_relaxed(l2x0_saved_regs.filter_start,
-			       base + L310_ADDR_FILTER_START);
-
-		revision = readl_relaxed(base + L2X0_CACHE_ID) &
-				L2X0_CACHE_ID_RTL_MASK;
-
-		if (revision >= L310_CACHE_ID_RTL_R2P0)
-			l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base,
-				      L310_PREFETCH_CTRL);
-		if (revision >= L310_CACHE_ID_RTL_R3P0)
-			l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base,
-				      L310_POWER_CTRL);
-
-		l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8);
-
-		/* Re-enable full-line-of-zeros for Cortex-A9 */
-		if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO)
-			set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
-	}
+	/* restore pl310 setup */
+	l2c_write_sec(l2x0_saved_regs.tag_latency, base,
+		      L310_TAG_LATENCY_CTRL);
+	l2c_write_sec(l2x0_saved_regs.data_latency, base,
+		      L310_DATA_LATENCY_CTRL);
+	l2c_write_sec(l2x0_saved_regs.filter_end, base,
+		      L310_ADDR_FILTER_END);
+	l2c_write_sec(l2x0_saved_regs.filter_start, base,
+		      L310_ADDR_FILTER_START);
+
+	revision = readl_relaxed(base + L2X0_CACHE_ID) &
+				 L2X0_CACHE_ID_RTL_MASK;
+
+	if (revision >= L310_CACHE_ID_RTL_R2P0)
+		l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base,
+			      L310_PREFETCH_CTRL);
+	if (revision >= L310_CACHE_ID_RTL_R3P0)
+		l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base,
+			      L310_POWER_CTRL);
 }
 
 static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, void *data)
@@ -699,6 +639,23 @@ static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock)
 		aux &= ~(L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP);
 	}
 
+	/* r3p0 or later has power control register */
+	if (rev >= L310_CACHE_ID_RTL_R3P0)
+		l2x0_saved_regs.pwr_ctrl = L310_DYNAMIC_CLK_GATING_EN |
+						L310_STNDBY_MODE_EN;
+
+	/*
+	 * Always enable non-secure access to the lockdown registers -
+	 * we write to them as part of the L2C enable sequence so they
+	 * need to be accessible.
+	 */
+	aux |= L310_AUX_CTRL_NS_LOCKDOWN;
+
+	l2c_enable(base, aux, num_lock);
+
+	/* Read back resulting AUX_CTRL value as it could have been altered. */
+	aux = readl_relaxed(base + L2X0_AUX_CTRL);
+
 	if (aux & (L310_AUX_CTRL_DATA_PREFETCH | L310_AUX_CTRL_INSTR_PREFETCH)) {
 		u32 prefetch = readl_relaxed(base + L310_PREFETCH_CTRL);
 
@@ -712,23 +669,12 @@ static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock)
 	if (rev >= L310_CACHE_ID_RTL_R3P0) {
 		u32 power_ctrl;
 
-		l2c_write_sec(L310_DYNAMIC_CLK_GATING_EN | L310_STNDBY_MODE_EN,
-			      base, L310_POWER_CTRL);
 		power_ctrl = readl_relaxed(base + L310_POWER_CTRL);
 		pr_info("L2C-310 dynamic clock gating %sabled, standby mode %sabled\n",
 			power_ctrl & L310_DYNAMIC_CLK_GATING_EN ? "en" : "dis",
 			power_ctrl & L310_STNDBY_MODE_EN ? "en" : "dis");
 	}
 
-	/*
-	 * Always enable non-secure access to the lockdown registers -
-	 * we write to them as part of the L2C enable sequence so they
-	 * need to be accessible.
-	 */
-	aux |= L310_AUX_CTRL_NS_LOCKDOWN;
-
-	l2c_enable(base, aux, num_lock);
-
 	if (aux & L310_AUX_CTRL_FULL_LINE_ZERO) {
 		set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
 		cpu_notifier(l2c310_cpu_enable_flz, 0);
@@ -760,11 +706,11 @@ static void __init l2c310_fixup(void __iomem *base, u32 cache_id,
 
 	if (revision >= L310_CACHE_ID_RTL_R3P0 &&
 	    revision < L310_CACHE_ID_RTL_R3P2) {
-		u32 val = readl_relaxed(base + L310_PREFETCH_CTRL);
+		u32 val = l2x0_saved_regs.prefetch_ctrl;
 		/* I don't think bit23 is required here... but iMX6 does so */
 		if (val & (BIT(30) | BIT(23))) {
 			val &= ~(BIT(30) | BIT(23));
-			l2c_write_sec(val, base, L310_PREFETCH_CTRL);
+			l2x0_saved_regs.prefetch_ctrl = val;
 			errata[n++] = "752271";
 		}
 	}
@@ -800,6 +746,15 @@ static void l2c310_disable(void)
 	l2c_disable();
 }
 
+static void l2c310_resume(void)
+{
+	l2c_resume();
+
+	/* Re-enable full-line-of-zeros for Cortex-A9 */
+	if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO)
+		set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
+}
+
 static const struct l2c_init_data l2c310_init_fns __initconst = {
 	.type = "L2C-310",
 	.way_size_0 = SZ_8K,
@@ -807,6 +762,7 @@ static const struct l2c_init_data l2c310_init_fns __initconst = {
 	.enable = l2c310_enable,
 	.fixup = l2c310_fixup,
 	.save = l2c310_save,
+	.configure = l2c310_configure,
 	.outer_cache = {
 		.inv_range = l2c210_inv_range,
 		.clean_range = l2c210_clean_range,
@@ -818,14 +774,22 @@ static const struct l2c_init_data l2c310_init_fns __initconst = {
 	},
 };
 
-static void __init __l2c_init(const struct l2c_init_data *data,
-	u32 aux_val, u32 aux_mask, u32 cache_id)
+static int __init __l2c_init(const struct l2c_init_data *data,
+			     u32 aux_val, u32 aux_mask, u32 cache_id)
 {
 	struct outer_cache_fns fns;
 	unsigned way_size_bits, ways;
 	u32 aux, old_aux;
 
 	/*
+	 * Save the pointer globally so that callbacks which do not receive
+	 * context from callers can access the structure.
+	 */
+	l2x0_data = kmemdup(data, sizeof(*data), GFP_KERNEL);
+	if (!l2x0_data)
+		return -ENOMEM;
+
+	/*
 	 * Sanity check the aux values.  aux_mask is the bits we preserve
 	 * from reading the hardware register, and aux_val is the bits we
 	 * set.
@@ -884,6 +848,7 @@ static void __init __l2c_init(const struct l2c_init_data *data,
 
 	fns = data->outer_cache;
 	fns.write_sec = outer_cache.write_sec;
+	fns.configure = outer_cache.configure;
 	if (data->fixup)
 		data->fixup(l2x0_base, cache_id, &fns);
 
@@ -910,6 +875,8 @@ static void __init __l2c_init(const struct l2c_init_data *data,
 		data->type, ways, l2x0_size >> 10);
 	pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n",
 		data->type, cache_id, aux);
+
+	return 0;
 }
 
 void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
@@ -936,6 +903,10 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
 		break;
 	}
 
+	/* Read back current (default) hardware configuration */
+	if (data->save)
+		data->save(l2x0_base);
+
 	__l2c_init(data, aux_val, aux_mask, cache_id);
 }
 
@@ -1102,7 +1073,7 @@ static const struct l2c_init_data of_l2c210_data __initconst = {
 		.flush_all   = l2c210_flush_all,
 		.disable     = l2c_disable,
 		.sync        = l2c210_sync,
-		.resume      = l2c210_resume,
+		.resume      = l2c_resume,
 	},
 };
 
@@ -1120,7 +1091,7 @@ static const struct l2c_init_data of_l2c220_data __initconst = {
 		.flush_all   = l2c220_flush_all,
 		.disable     = l2c_disable,
 		.sync        = l2c220_sync,
-		.resume      = l2c210_resume,
+		.resume      = l2c_resume,
 	},
 };
 
@@ -1131,32 +1102,32 @@ static void __init l2c310_of_parse(const struct device_node *np,
 	u32 tag[3] = { 0, 0, 0 };
 	u32 filter[2] = { 0, 0 };
 	u32 assoc;
+	u32 prefetch;
+	u32 val;
 	int ret;
 
 	of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag));
 	if (tag[0] && tag[1] && tag[2])
-		writel_relaxed(
+		l2x0_saved_regs.tag_latency =
 			L310_LATENCY_CTRL_RD(tag[0] - 1) |
 			L310_LATENCY_CTRL_WR(tag[1] - 1) |
-			L310_LATENCY_CTRL_SETUP(tag[2] - 1),
-			l2x0_base + L310_TAG_LATENCY_CTRL);
+			L310_LATENCY_CTRL_SETUP(tag[2] - 1);
 
 	of_property_read_u32_array(np, "arm,data-latency",
 				   data, ARRAY_SIZE(data));
 	if (data[0] && data[1] && data[2])
-		writel_relaxed(
+		l2x0_saved_regs.data_latency =
 			L310_LATENCY_CTRL_RD(data[0] - 1) |
 			L310_LATENCY_CTRL_WR(data[1] - 1) |
-			L310_LATENCY_CTRL_SETUP(data[2] - 1),
-			l2x0_base + L310_DATA_LATENCY_CTRL);
+			L310_LATENCY_CTRL_SETUP(data[2] - 1);
 
 	of_property_read_u32_array(np, "arm,filter-ranges",
 				   filter, ARRAY_SIZE(filter));
 	if (filter[1]) {
-		writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M),
-			       l2x0_base + L310_ADDR_FILTER_END);
-		writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN,
-			       l2x0_base + L310_ADDR_FILTER_START);
+		l2x0_saved_regs.filter_end =
+					ALIGN(filter[0] + filter[1], SZ_1M);
+		l2x0_saved_regs.filter_start = (filter[0] & ~(SZ_1M - 1))
+					| L310_ADDR_FILTER_EN;
 	}
 
 	ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K);
@@ -1178,6 +1149,58 @@ static void __init l2c310_of_parse(const struct device_node *np,
 		       assoc);
 		break;
 	}
+
+	prefetch = l2x0_saved_regs.prefetch_ctrl;
+
+	ret = of_property_read_u32(np, "arm,double-linefill", &val);
+	if (ret == 0) {
+		if (val)
+			prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL;
+		else
+			prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF arm,double-linefill property value is missing\n");
+	}
+
+	ret = of_property_read_u32(np, "arm,double-linefill-incr", &val);
+	if (ret == 0) {
+		if (val)
+			prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL_INCR;
+		else
+			prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL_INCR;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF arm,double-linefill-incr property value is missing\n");
+	}
+
+	ret = of_property_read_u32(np, "arm,double-linefill-wrap", &val);
+	if (ret == 0) {
+		if (!val)
+			prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL_WRAP;
+		else
+			prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL_WRAP;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF arm,double-linefill-wrap property value is missing\n");
+	}
+
+	ret = of_property_read_u32(np, "arm,prefetch-drop", &val);
+	if (ret == 0) {
+		if (val)
+			prefetch |= L310_PREFETCH_CTRL_PREFETCH_DROP;
+		else
+			prefetch &= ~L310_PREFETCH_CTRL_PREFETCH_DROP;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF arm,prefetch-drop property value is missing\n");
+	}
+
+	ret = of_property_read_u32(np, "arm,prefetch-offset", &val);
+	if (ret == 0) {
+		prefetch &= ~L310_PREFETCH_CTRL_OFFSET_MASK;
+		prefetch |= val & L310_PREFETCH_CTRL_OFFSET_MASK;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF arm,prefetch-offset property value is missing\n");
+	}
+
+	l2x0_saved_regs.prefetch_ctrl = prefetch;
 }
 
 static const struct l2c_init_data of_l2c310_data __initconst = {
@@ -1188,6 +1211,7 @@ static const struct l2c_init_data of_l2c310_data __initconst = {
 	.enable = l2c310_enable,
 	.fixup = l2c310_fixup,
 	.save  = l2c310_save,
+	.configure = l2c310_configure,
 	.outer_cache = {
 		.inv_range   = l2c210_inv_range,
 		.clean_range = l2c210_clean_range,
@@ -1216,6 +1240,7 @@ static const struct l2c_init_data of_l2c310_coherent_data __initconst = {
 	.enable = l2c310_enable,
 	.fixup = l2c310_fixup,
 	.save  = l2c310_save,
+	.configure = l2c310_configure,
 	.outer_cache = {
 		.inv_range   = l2c210_inv_range,
 		.clean_range = l2c210_clean_range,
@@ -1231,7 +1256,7 @@ static const struct l2c_init_data of_l2c310_coherent_data __initconst = {
  * noninclusive, while the hardware cache range operations use
  * inclusive start and end addresses.
  */
-static unsigned long calc_range_end(unsigned long start, unsigned long end)
+static unsigned long aurora_range_end(unsigned long start, unsigned long end)
 {
 	/*
 	 * Limit the number of cache lines processed at once,
@@ -1250,25 +1275,13 @@ static unsigned long calc_range_end(unsigned long start, unsigned long end)
 	return end;
 }
 
-/*
- * Make sure 'start' and 'end' reference the same page, as L2 is PIPT
- * and range operations only do a TLB lookup on the start address.
- */
 static void aurora_pa_range(unsigned long start, unsigned long end,
-			unsigned long offset)
+			    unsigned long offset)
 {
+	void __iomem *base = l2x0_base;
+	unsigned long range_end;
 	unsigned long flags;
 
-	raw_spin_lock_irqsave(&l2x0_lock, flags);
-	writel_relaxed(start, l2x0_base + AURORA_RANGE_BASE_ADDR_REG);
-	writel_relaxed(end, l2x0_base + offset);
-	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
-
-	cache_sync();
-}
-
-static void aurora_inv_range(unsigned long start, unsigned long end)
-{
 	/*
 	 * round start and end adresses up to cache line size
 	 */
@@ -1276,15 +1289,24 @@ static void aurora_inv_range(unsigned long start, unsigned long end)
 	end = ALIGN(end, CACHE_LINE_SIZE);
 
 	/*
-	 * Invalidate all full cache lines between 'start' and 'end'.
+	 * perform operation on all full cache lines between 'start' and 'end'
 	 */
 	while (start < end) {
-		unsigned long range_end = calc_range_end(start, end);
-		aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
-				AURORA_INVAL_RANGE_REG);
+		range_end = aurora_range_end(start, end);
+
+		raw_spin_lock_irqsave(&l2x0_lock, flags);
+		writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG);
+		writel_relaxed(range_end - CACHE_LINE_SIZE, base + offset);
+		raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+
+		writel_relaxed(0, base + AURORA_SYNC_REG);
 		start = range_end;
 	}
 }
+static void aurora_inv_range(unsigned long start, unsigned long end)
+{
+	aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG);
+}
 
 static void aurora_clean_range(unsigned long start, unsigned long end)
 {
@@ -1292,52 +1314,53 @@ static void aurora_clean_range(unsigned long start, unsigned long end)
 	 * If L2 is forced to WT, the L2 will always be clean and we
 	 * don't need to do anything here.
 	 */
-	if (!l2_wt_override) {
-		start &= ~(CACHE_LINE_SIZE - 1);
-		end = ALIGN(end, CACHE_LINE_SIZE);
-		while (start != end) {
-			unsigned long range_end = calc_range_end(start, end);
-			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
-					AURORA_CLEAN_RANGE_REG);
-			start = range_end;
-		}
-	}
+	if (!l2_wt_override)
+		aurora_pa_range(start, end, AURORA_CLEAN_RANGE_REG);
 }
 
 static void aurora_flush_range(unsigned long start, unsigned long end)
 {
-	start &= ~(CACHE_LINE_SIZE - 1);
-	end = ALIGN(end, CACHE_LINE_SIZE);
-	while (start != end) {
-		unsigned long range_end = calc_range_end(start, end);
-		/*
-		 * If L2 is forced to WT, the L2 will always be clean and we
-		 * just need to invalidate.
-		 */
-		if (l2_wt_override)
-			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
-							AURORA_INVAL_RANGE_REG);
-		else
-			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
-							AURORA_FLUSH_RANGE_REG);
-		start = range_end;
-	}
+	if (l2_wt_override)
+		aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG);
+	else
+		aurora_pa_range(start, end, AURORA_FLUSH_RANGE_REG);
 }
 
-static void aurora_save(void __iomem *base)
+static void aurora_flush_all(void)
 {
-	l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL);
-	l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL);
+	void __iomem *base = l2x0_base;
+	unsigned long flags;
+
+	/* clean all ways */
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
+	__l2c_op_way(base + L2X0_CLEAN_INV_WAY);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+
+	writel_relaxed(0, base + AURORA_SYNC_REG);
 }
 
-static void aurora_resume(void)
+static void aurora_cache_sync(void)
+{
+	writel_relaxed(0, l2x0_base + AURORA_SYNC_REG);
+}
+
+static void aurora_disable(void)
 {
 	void __iomem *base = l2x0_base;
+	unsigned long flags;
 
-	if (!(readl(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
-		writel_relaxed(l2x0_saved_regs.aux_ctrl, base + L2X0_AUX_CTRL);
-		writel_relaxed(l2x0_saved_regs.ctrl, base + L2X0_CTRL);
-	}
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
+	__l2c_op_way(base + L2X0_CLEAN_INV_WAY);
+	writel_relaxed(0, base + AURORA_SYNC_REG);
+	l2c_write_sec(0, base, L2X0_CTRL);
+	dsb(st);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void aurora_save(void __iomem *base)
+{
+	l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL);
+	l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL);
 }
 
 /*
@@ -1398,10 +1421,10 @@ static const struct l2c_init_data of_aurora_with_outer_data __initconst = {
 		.inv_range   = aurora_inv_range,
 		.clean_range = aurora_clean_range,
 		.flush_range = aurora_flush_range,
-		.flush_all   = l2x0_flush_all,
-		.disable     = l2x0_disable,
-		.sync        = l2x0_cache_sync,
-		.resume      = aurora_resume,
+		.flush_all   = aurora_flush_all,
+		.disable     = aurora_disable,
+		.sync	     = aurora_cache_sync,
+		.resume      = l2c_resume,
 	},
 };
 
@@ -1414,7 +1437,7 @@ static const struct l2c_init_data of_aurora_no_outer_data __initconst = {
 	.fixup = aurora_fixup,
 	.save  = aurora_save,
 	.outer_cache = {
-		.resume      = aurora_resume,
+		.resume      = l2c_resume,
 	},
 };
 
@@ -1562,6 +1585,7 @@ static const struct l2c_init_data of_bcm_l2x0_data __initconst = {
 	.of_parse = l2c310_of_parse,
 	.enable = l2c310_enable,
 	.save  = l2c310_save,
+	.configure = l2c310_configure,
 	.outer_cache = {
 		.inv_range   = bcm_inv_range,
 		.clean_range = bcm_clean_range,
@@ -1583,18 +1607,12 @@ static void __init tauros3_save(void __iomem *base)
 		readl_relaxed(base + L310_PREFETCH_CTRL);
 }
 
-static void tauros3_resume(void)
+static void tauros3_configure(void __iomem *base)
 {
-	void __iomem *base = l2x0_base;
-
-	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
-		writel_relaxed(l2x0_saved_regs.aux2_ctrl,
-			       base + TAUROS3_AUX2_CTRL);
-		writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
-			       base + L310_PREFETCH_CTRL);
-
-		l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8);
-	}
+	writel_relaxed(l2x0_saved_regs.aux2_ctrl,
+		       base + TAUROS3_AUX2_CTRL);
+	writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
+		       base + L310_PREFETCH_CTRL);
 }
 
 static const struct l2c_init_data of_tauros3_data __initconst = {
@@ -1603,9 +1621,10 @@ static const struct l2c_init_data of_tauros3_data __initconst = {
 	.num_lock = 8,
 	.enable = l2c_enable,
 	.save  = tauros3_save,
+	.configure = tauros3_configure,
 	/* Tauros3 broadcasts L1 cache operations to L2 */
 	.outer_cache = {
-		.resume      = tauros3_resume,
+		.resume      = l2c_resume,
 	},
 };
 
@@ -1661,6 +1680,10 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
 	if (!of_property_read_bool(np, "cache-unified"))
 		pr_err("L2C: device tree omits to specify unified cache\n");
 
+	/* Read back current (default) hardware configuration */
+	if (data->save)
+		data->save(l2x0_base);
+
 	/* L2 configuration can only be changed if the cache is disabled */
 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN))
 		if (data->of_parse)
@@ -1671,8 +1694,6 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
 	else
 		cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
 
-	__l2c_init(data, aux_val, aux_mask, cache_id);
-
-	return 0;
+	return __l2c_init(data, aux_val, aux_mask, cache_id);
 }
 #endif
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 2495c8cb47ba..1609b022a72f 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -319,10 +319,7 @@ void __init arm_memblock_init(const struct machine_desc *mdesc)
 
 	early_init_fdt_scan_reserved_mem();
 
-	/*
-	 * reserve memory for DMA contigouos allocations,
-	 * must come from DMA area inside low memory
-	 */
+	/* reserve memory for DMA contiguous allocations */
 	dma_contiguous_reserve(arm_dma_limit);
 
 	arm_memblock_steal_permitted = false;
diff --git a/arch/arm/probes/Makefile b/arch/arm/probes/Makefile
new file mode 100644
index 000000000000..aa1f8590dcdd
--- /dev/null
+++ b/arch/arm/probes/Makefile
@@ -0,0 +1,7 @@
+obj-$(CONFIG_UPROBES)		+= decode.o decode-arm.o uprobes/
+obj-$(CONFIG_KPROBES)		+= decode.o kprobes/
+ifdef CONFIG_THUMB2_KERNEL
+obj-$(CONFIG_KPROBES)		+= decode-thumb.o
+else
+obj-$(CONFIG_KPROBES)		+= decode-arm.o
+endif
diff --git a/arch/arm/kernel/probes-arm.c b/arch/arm/probes/decode-arm.c
index 8eaef81d8344..f72c33a2dcfb 100644
--- a/arch/arm/kernel/probes-arm.c
+++ b/arch/arm/probes/decode-arm.c
@@ -1,5 +1,6 @@
 /*
- * arch/arm/kernel/probes-arm.c
+ *
+ * arch/arm/probes/decode-arm.c
  *
  * Some code moved here from arch/arm/kernel/kprobes-arm.c
  *
@@ -20,8 +21,8 @@
 #include <linux/stddef.h>
 #include <linux/ptrace.h>
 
-#include "probes.h"
-#include "probes-arm.h"
+#include "decode.h"
+#include "decode-arm.h"
 
 #define sign_extend(x, signbit) ((x) | (0 - ((x) & (1 << (signbit)))))
 
@@ -369,17 +370,17 @@ static const union decode_item arm_cccc_001x_table[] = {
 
 	/* MOVW			cccc 0011 0000 xxxx xxxx xxxx xxxx xxxx */
 	/* MOVT			cccc 0011 0100 xxxx xxxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0x0fb00000, 0x03000000, PROBES_DATA_PROCESSING_IMM,
+	DECODE_EMULATEX	(0x0fb00000, 0x03000000, PROBES_MOV_HALFWORD,
 						 REGS(0, NOPC, 0, 0, 0)),
 
 	/* YIELD		cccc 0011 0010 0000 xxxx xxxx 0000 0001 */
 	DECODE_OR	(0x0fff00ff, 0x03200001),
 	/* SEV			cccc 0011 0010 0000 xxxx xxxx 0000 0100 */
-	DECODE_EMULATE	(0x0fff00ff, 0x03200004, PROBES_EMULATE_NONE),
+	DECODE_EMULATE	(0x0fff00ff, 0x03200004, PROBES_SEV),
 	/* NOP			cccc 0011 0010 0000 xxxx xxxx 0000 0000 */
 	/* WFE			cccc 0011 0010 0000 xxxx xxxx 0000 0010 */
 	/* WFI			cccc 0011 0010 0000 xxxx xxxx 0000 0011 */
-	DECODE_SIMULATE	(0x0fff00fc, 0x03200000, PROBES_SIMULATE_NOP),
+	DECODE_SIMULATE	(0x0fff00fc, 0x03200000, PROBES_WFE),
 	/* DBG			cccc 0011 0010 0000 xxxx xxxx ffff xxxx */
 	/* unallocated hints	cccc 0011 0010 0000 xxxx xxxx xxxx xxxx */
 	/* MSR (immediate)	cccc 0011 0x10 xxxx xxxx xxxx xxxx xxxx */
@@ -725,10 +726,11 @@ static void __kprobes arm_singlestep(probes_opcode_t insn,
  */
 enum probes_insn __kprobes
 arm_probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi,
-		       bool emulate, const union decode_action *actions)
+		       bool emulate, const union decode_action *actions,
+		       const struct decode_checker *checkers[])
 {
 	asi->insn_singlestep = arm_singlestep;
 	asi->insn_check_cc = probes_condition_checks[insn>>28];
 	return probes_decode_insn(insn, asi, probes_decode_arm_table, false,
-				  emulate, actions);
+				  emulate, actions, checkers);
 }
diff --git a/arch/arm/kernel/probes-arm.h b/arch/arm/probes/decode-arm.h
index ace6572f6e26..b3b80f6d414b 100644
--- a/arch/arm/kernel/probes-arm.h
+++ b/arch/arm/probes/decode-arm.h
@@ -1,5 +1,5 @@
 /*
- * arch/arm/kernel/probes-arm.h
+ * arch/arm/probes/decode-arm.h
  *
  * Copyright 2013 Linaro Ltd.
  * Written by: David A. Long
@@ -15,9 +15,9 @@
 #ifndef _ARM_KERNEL_PROBES_ARM_H
 #define  _ARM_KERNEL_PROBES_ARM_H
 
+#include "decode.h"
+
 enum probes_arm_action {
-	PROBES_EMULATE_NONE,
-	PROBES_SIMULATE_NOP,
 	PROBES_PRELOAD_IMM,
 	PROBES_PRELOAD_REG,
 	PROBES_BRANCH_IMM,
@@ -68,6 +68,7 @@ extern const union decode_item probes_decode_arm_table[];
 
 enum probes_insn arm_probes_decode_insn(probes_opcode_t,
 		struct arch_probes_insn *, bool emulate,
-		const union decode_action *actions);
+		const union decode_action *actions,
+		const struct decode_checker *checkers[]);
 
 #endif
diff --git a/arch/arm/kernel/probes-thumb.c b/arch/arm/probes/decode-thumb.c
index 4131351e812f..985e7dd4cac6 100644
--- a/arch/arm/kernel/probes-thumb.c
+++ b/arch/arm/probes/decode-thumb.c
@@ -1,5 +1,5 @@
 /*
- * arch/arm/kernel/probes-thumb.c
+ * arch/arm/probes/decode-thumb.c
  *
  * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
  *
@@ -12,8 +12,8 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 
-#include "probes.h"
-#include "probes-thumb.h"
+#include "decode.h"
+#include "decode-thumb.h"
 
 
 static const union decode_item t32_table_1110_100x_x0xx[] = {
@@ -863,20 +863,22 @@ static void __kprobes thumb32_singlestep(probes_opcode_t opcode,
 
 enum probes_insn __kprobes
 thumb16_probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi,
-			   bool emulate, const union decode_action *actions)
+			   bool emulate, const union decode_action *actions,
+			   const struct decode_checker *checkers[])
 {
 	asi->insn_singlestep = thumb16_singlestep;
 	asi->insn_check_cc = thumb_check_cc;
 	return probes_decode_insn(insn, asi, probes_decode_thumb16_table, true,
-				  emulate, actions);
+				  emulate, actions, checkers);
 }
 
 enum probes_insn __kprobes
 thumb32_probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi,
-			   bool emulate, const union decode_action *actions)
+			   bool emulate, const union decode_action *actions,
+			   const struct decode_checker *checkers[])
 {
 	asi->insn_singlestep = thumb32_singlestep;
 	asi->insn_check_cc = thumb_check_cc;
 	return probes_decode_insn(insn, asi, probes_decode_thumb32_table, true,
-				  emulate, actions);
+				  emulate, actions, checkers);
 }
diff --git a/arch/arm/kernel/probes-thumb.h b/arch/arm/probes/decode-thumb.h
index 7c6f6ebe514f..8457add0a2d8 100644
--- a/arch/arm/kernel/probes-thumb.h
+++ b/arch/arm/probes/decode-thumb.h
@@ -1,5 +1,5 @@
 /*
- * arch/arm/kernel/probes-thumb.h
+ * arch/arm/probes/decode-thumb.h
  *
  * Copyright 2013 Linaro Ltd.
  * Written by: David A. Long
@@ -15,6 +15,8 @@
 #ifndef _ARM_KERNEL_PROBES_THUMB_H
 #define  _ARM_KERNEL_PROBES_THUMB_H
 
+#include "decode.h"
+
 /*
  * True if current instruction is in an IT block.
  */
@@ -89,9 +91,11 @@ extern const union decode_item probes_decode_thumb16_table[];
 
 enum probes_insn __kprobes
 thumb16_probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi,
-		bool emulate, const union decode_action *actions);
+		bool emulate, const union decode_action *actions,
+		const struct decode_checker *checkers[]);
 enum probes_insn __kprobes
 thumb32_probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi,
-		bool emulate, const union decode_action *actions);
+		bool emulate, const union decode_action *actions,
+		const struct decode_checker *checkers[]);
 
 #endif
diff --git a/arch/arm/kernel/probes.c b/arch/arm/probes/decode.c
index a8ab540d7e73..880ebe0cdf19 100644
--- a/arch/arm/kernel/probes.c
+++ b/arch/arm/probes/decode.c
@@ -1,5 +1,5 @@
 /*
- * arch/arm/kernel/probes.c
+ * arch/arm/probes/decode.c
  *
  * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
  *
@@ -17,7 +17,7 @@
 #include <asm/ptrace.h>
 #include <linux/bug.h>
 
-#include "probes.h"
+#include "decode.h"
 
 
 #ifndef find_str_pc_offset
@@ -342,6 +342,31 @@ static const int decode_struct_sizes[NUM_DECODE_TYPES] = {
 	[DECODE_TYPE_REJECT]	= sizeof(struct decode_reject)
 };
 
+static int run_checkers(const struct decode_checker *checkers[],
+		int action, probes_opcode_t insn,
+		struct arch_probes_insn *asi,
+		const struct decode_header *h)
+{
+	const struct decode_checker **p;
+
+	if (!checkers)
+		return INSN_GOOD;
+
+	p = checkers;
+	while (*p != NULL) {
+		int retval;
+		probes_check_t *checker_func = (*p)[action].checker;
+
+		retval = INSN_GOOD;
+		if (checker_func)
+			retval = checker_func(insn, asi, h);
+		if (retval == INSN_REJECTED)
+			return retval;
+		p++;
+	}
+	return INSN_GOOD;
+}
+
 /*
  * probes_decode_insn operates on data tables in order to decode an ARM
  * architecture instruction onto which a kprobe has been placed.
@@ -388,11 +413,34 @@ static const int decode_struct_sizes[NUM_DECODE_TYPES] = {
 int __kprobes
 probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi,
 		   const union decode_item *table, bool thumb,
-		   bool emulate, const union decode_action *actions)
+		   bool emulate, const union decode_action *actions,
+		   const struct decode_checker *checkers[])
 {
 	const struct decode_header *h = (struct decode_header *)table;
 	const struct decode_header *next;
 	bool matched = false;
+	/*
+	 * @insn can be modified by decode_regs. Save its original
+	 * value for checkers.
+	 */
+	probes_opcode_t origin_insn = insn;
+
+	/*
+	 * stack_space is initialized to 0 here. Checker functions
+	 * should update is value if they find this is a stack store
+	 * instruction: positive value means bytes of stack usage,
+	 * negitive value means unable to determine stack usage
+	 * statically. For instruction doesn't store to stack, checker
+	 * do nothing with it.
+	 */
+	asi->stack_space = 0;
+
+	/*
+	 * Similarly to stack_space, register_usage_flags is filled by
+	 * checkers. Its default value is set to ~0, which is 'all
+	 * registers are used', to prevent any potential optimization.
+	 */
+	asi->register_usage_flags = ~0UL;
 
 	if (emulate)
 		insn = prepare_emulated_insn(insn, asi, thumb);
@@ -422,24 +470,41 @@ probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi,
 		}
 
 		case DECODE_TYPE_CUSTOM: {
+			int err;
 			struct decode_custom *d = (struct decode_custom *)h;
-			return actions[d->decoder.action].decoder(insn, asi, h);
+			int action = d->decoder.action;
+
+			err = run_checkers(checkers, action, origin_insn, asi, h);
+			if (err == INSN_REJECTED)
+				return INSN_REJECTED;
+			return actions[action].decoder(insn, asi, h);
 		}
 
 		case DECODE_TYPE_SIMULATE: {
+			int err;
 			struct decode_simulate *d = (struct decode_simulate *)h;
-			asi->insn_handler = actions[d->handler.action].handler;
+			int action = d->handler.action;
+
+			err = run_checkers(checkers, action, origin_insn, asi, h);
+			if (err == INSN_REJECTED)
+				return INSN_REJECTED;
+			asi->insn_handler = actions[action].handler;
 			return INSN_GOOD_NO_SLOT;
 		}
 
 		case DECODE_TYPE_EMULATE: {
+			int err;
 			struct decode_emulate *d = (struct decode_emulate *)h;
+			int action = d->handler.action;
+
+			err = run_checkers(checkers, action, origin_insn, asi, h);
+			if (err == INSN_REJECTED)
+				return INSN_REJECTED;
 
 			if (!emulate)
-				return actions[d->handler.action].decoder(insn,
-					asi, h);
+				return actions[action].decoder(insn, asi, h);
 
-			asi->insn_handler = actions[d->handler.action].handler;
+			asi->insn_handler = actions[action].handler;
 			set_emulated_insn(insn, asi, thumb);
 			return INSN_GOOD;
 		}
diff --git a/arch/arm/kernel/probes.h b/arch/arm/probes/decode.h
index dba9f2466a93..f9b08ba7fe73 100644
--- a/arch/arm/kernel/probes.h
+++ b/arch/arm/probes/decode.h
@@ -1,5 +1,5 @@
 /*
- * arch/arm/kernel/probes.h
+ * arch/arm/probes/decode.h
  *
  * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
  *
@@ -314,6 +314,14 @@ union decode_action {
 	probes_custom_decode_t	*decoder;
 };
 
+typedef enum probes_insn (probes_check_t)(probes_opcode_t,
+					   struct arch_probes_insn *,
+					   const struct decode_header *);
+
+struct decode_checker {
+	probes_check_t	*checker;
+};
+
 #define DECODE_END			\
 	{.bits = DECODE_TYPE_END}
 
@@ -402,6 +410,7 @@ probes_insn_handler_t probes_emulate_none;
 int __kprobes
 probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi,
 		const union decode_item *table, bool thumb, bool emulate,
-		const union decode_action *actions);
+		const union decode_action *actions,
+		const struct decode_checker **checkers);
 
 #endif
diff --git a/arch/arm/probes/kprobes/Makefile b/arch/arm/probes/kprobes/Makefile
new file mode 100644
index 000000000000..76a36bf102b7
--- /dev/null
+++ b/arch/arm/probes/kprobes/Makefile
@@ -0,0 +1,12 @@
+obj-$(CONFIG_KPROBES)		+= core.o actions-common.o checkers-common.o
+obj-$(CONFIG_ARM_KPROBES_TEST)	+= test-kprobes.o
+test-kprobes-objs		:= test-core.o
+
+ifdef CONFIG_THUMB2_KERNEL
+obj-$(CONFIG_KPROBES)		+= actions-thumb.o checkers-thumb.o
+test-kprobes-objs		+= test-thumb.o
+else
+obj-$(CONFIG_KPROBES)		+= actions-arm.o checkers-arm.o
+obj-$(CONFIG_OPTPROBES)		+= opt-arm.o
+test-kprobes-objs		+= test-arm.o
+endif
diff --git a/arch/arm/kernel/kprobes-arm.c b/arch/arm/probes/kprobes/actions-arm.c
index ac300c60d656..b9056d649607 100644
--- a/arch/arm/kernel/kprobes-arm.c
+++ b/arch/arm/probes/kprobes/actions-arm.c
@@ -1,5 +1,5 @@
 /*
- * arch/arm/kernel/kprobes-decode.c
+ * arch/arm/probes/kprobes/actions-arm.c
  *
  * Copyright (C) 2006, 2007 Motorola Inc.
  *
@@ -62,8 +62,9 @@
 #include <linux/kprobes.h>
 #include <linux/ptrace.h>
 
-#include "kprobes.h"
-#include "probes-arm.h"
+#include "../decode-arm.h"
+#include "core.h"
+#include "checkers.h"
 
 #if  __LINUX_ARM_ARCH__ >= 6
 #define BLX(reg)	"blx	"reg"		\n\t"
@@ -302,8 +303,6 @@ emulate_rdlo12rdhi16rn0rm8_rwflags_nopc(probes_opcode_t insn,
 }
 
 const union decode_action kprobes_arm_actions[NUM_PROBES_ARM_ACTIONS] = {
-	[PROBES_EMULATE_NONE] = {.handler = probes_emulate_none},
-	[PROBES_SIMULATE_NOP] = {.handler = probes_simulate_nop},
 	[PROBES_PRELOAD_IMM] = {.handler = probes_simulate_nop},
 	[PROBES_PRELOAD_REG] = {.handler = probes_simulate_nop},
 	[PROBES_BRANCH_IMM] = {.handler = simulate_blx1},
@@ -341,3 +340,5 @@ const union decode_action kprobes_arm_actions[NUM_PROBES_ARM_ACTIONS] = {
 	[PROBES_BRANCH] = {.handler = simulate_bbl},
 	[PROBES_LDMSTM] = {.decoder = kprobe_decode_ldmstm}
 };
+
+const struct decode_checker *kprobes_arm_checkers[] = {arm_stack_checker, arm_regs_checker, NULL};
diff --git a/arch/arm/kernel/kprobes-common.c b/arch/arm/probes/kprobes/actions-common.c
index 0bf5d64eba1d..bd20a71cd34a 100644
--- a/arch/arm/kernel/kprobes-common.c
+++ b/arch/arm/probes/kprobes/actions-common.c
@@ -1,5 +1,5 @@
 /*
- * arch/arm/kernel/kprobes-common.c
+ * arch/arm/probes/kprobes/actions-common.c
  *
  * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
  *
@@ -15,7 +15,7 @@
 #include <linux/kprobes.h>
 #include <asm/opcodes.h>
 
-#include "kprobes.h"
+#include "core.h"
 
 
 static void __kprobes simulate_ldm1stm1(probes_opcode_t insn,
diff --git a/arch/arm/kernel/kprobes-thumb.c b/arch/arm/probes/kprobes/actions-thumb.c
index 9495d7f3516f..07cfd9bef340 100644
--- a/arch/arm/kernel/kprobes-thumb.c
+++ b/arch/arm/probes/kprobes/actions-thumb.c
@@ -1,5 +1,5 @@
 /*
- * arch/arm/kernel/kprobes-thumb.c
+ * arch/arm/probes/kprobes/actions-thumb.c
  *
  * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
  *
@@ -13,8 +13,9 @@
 #include <linux/ptrace.h>
 #include <linux/kprobes.h>
 
-#include "kprobes.h"
-#include "probes-thumb.h"
+#include "../decode-thumb.h"
+#include "core.h"
+#include "checkers.h"
 
 /* These emulation encodings are functionally equivalent... */
 #define t32_emulate_rd8rn16rm0ra12_noflags \
@@ -664,3 +665,6 @@ const union decode_action kprobes_t32_actions[NUM_PROBES_T32_ACTIONS] = {
 	[PROBES_T32_MUL_ADD_LONG] = {
 		.handler = t32_emulate_rdlo12rdhi8rn16rm0_noflags},
 };
+
+const struct decode_checker *kprobes_t32_checkers[] = {t32_stack_checker, NULL};
+const struct decode_checker *kprobes_t16_checkers[] = {t16_stack_checker, NULL};
diff --git a/arch/arm/probes/kprobes/checkers-arm.c b/arch/arm/probes/kprobes/checkers-arm.c
new file mode 100644
index 000000000000..7b9817333b68
--- /dev/null
+++ b/arch/arm/probes/kprobes/checkers-arm.c
@@ -0,0 +1,192 @@
+/*
+ * arch/arm/probes/kprobes/checkers-arm.c
+ *
+ * Copyright (C) 2014 Huawei Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include "../decode.h"
+#include "../decode-arm.h"
+#include "checkers.h"
+
+static enum probes_insn __kprobes arm_check_stack(probes_opcode_t insn,
+		struct arch_probes_insn *asi,
+		const struct decode_header *h)
+{
+	/*
+	 * PROBES_LDRSTRD, PROBES_LDMSTM, PROBES_STORE,
+	 * PROBES_STORE_EXTRA may get here. Simply mark all normal
+	 * insns as STACK_USE_NONE.
+	 */
+	static const union decode_item table[] = {
+		/*
+		 * 'STR{,D,B,H}, Rt, [Rn, Rm]' should be marked as UNKNOWN
+		 * if Rn or Rm is SP.
+		 *                                 x
+		 * STR (register)	cccc 011x x0x0 xxxx xxxx xxxx xxxx xxxx
+		 * STRB (register)	cccc 011x x1x0 xxxx xxxx xxxx xxxx xxxx
+		 */
+		DECODE_OR	(0x0e10000f, 0x0600000d),
+		DECODE_OR	(0x0e1f0000, 0x060d0000),
+
+		/*
+		 *                                                     x
+		 * STRD (register)	cccc 000x x0x0 xxxx xxxx xxxx 1111 xxxx
+		 * STRH (register)	cccc 000x x0x0 xxxx xxxx xxxx 1011 xxxx
+		 */
+		DECODE_OR	(0x0e5000bf, 0x000000bd),
+		DECODE_CUSTOM	(0x0e5f00b0, 0x000d00b0, STACK_USE_UNKNOWN),
+
+		/*
+		 * For PROBES_LDMSTM, only stmdx sp, [...] need to examine
+		 *
+		 * Bit B/A (bit 24) encodes arithmetic operation order. 1 means
+		 * before, 0 means after.
+		 * Bit I/D (bit 23) encodes arithmetic operation. 1 means
+		 * increment, 0 means decrement.
+		 *
+		 * So:
+		 *                              B I
+		 *                              / /
+		 *                              A D   | Rn |
+		 * STMDX SP, [...]	cccc 100x 00x0 xxxx xxxx xxxx xxxx xxxx
+		 */
+		DECODE_CUSTOM	(0x0edf0000, 0x080d0000, STACK_USE_STMDX),
+
+		/*                              P U W | Rn | Rt |     imm12    |*/
+		/* STR (immediate)	cccc 010x x0x0 1101 xxxx xxxx xxxx xxxx */
+		/* STRB (immediate)	cccc 010x x1x0 1101 xxxx xxxx xxxx xxxx */
+		/*                              P U W | Rn | Rt |imm4|    |imm4|*/
+		/* STRD (immediate)	cccc 000x x1x0 1101 xxxx xxxx 1111 xxxx */
+		/* STRH (immediate)	cccc 000x x1x0 1101 xxxx xxxx 1011 xxxx */
+		/*
+		 * index = (P == '1'); add = (U == '1').
+		 * Above insns with:
+		 *    index == 0 (str{,d,h} rx, [sp], #+/-imm) or
+		 *    add == 1 (str{,d,h} rx, [sp, #+<imm>])
+		 * should be STACK_USE_NONE.
+		 * Only str{,b,d,h} rx,[sp,#-n] (P == 1 and U == 0) are
+		 * required to be examined.
+		 */
+		/* STR{,B} Rt,[SP,#-n]	cccc 0101 0xx0 1101 xxxx xxxx xxxx xxxx */
+		DECODE_CUSTOM	(0x0f9f0000, 0x050d0000, STACK_USE_FIXED_XXX),
+
+		/* STR{D,H} Rt,[SP,#-n]	cccc 0001 01x0 1101 xxxx xxxx 1x11 xxxx */
+		DECODE_CUSTOM	(0x0fdf00b0, 0x014d00b0, STACK_USE_FIXED_X0X),
+
+		/* fall through */
+		DECODE_CUSTOM	(0, 0, STACK_USE_NONE),
+		DECODE_END
+	};
+
+	return probes_decode_insn(insn, asi, table, false, false, stack_check_actions, NULL);
+}
+
+const struct decode_checker arm_stack_checker[NUM_PROBES_ARM_ACTIONS] = {
+	[PROBES_LDRSTRD] = {.checker = arm_check_stack},
+	[PROBES_STORE_EXTRA] = {.checker = arm_check_stack},
+	[PROBES_STORE] = {.checker = arm_check_stack},
+	[PROBES_LDMSTM] = {.checker = arm_check_stack},
+};
+
+static enum probes_insn __kprobes arm_check_regs_nouse(probes_opcode_t insn,
+		struct arch_probes_insn *asi,
+		const struct decode_header *h)
+{
+	asi->register_usage_flags = 0;
+	return INSN_GOOD;
+}
+
+static enum probes_insn arm_check_regs_normal(probes_opcode_t insn,
+		struct arch_probes_insn *asi,
+		const struct decode_header *h)
+{
+	u32 regs = h->type_regs.bits >> DECODE_TYPE_BITS;
+	int i;
+
+	asi->register_usage_flags = 0;
+	for (i = 0; i < 5; regs >>= 4, insn >>= 4, i++)
+		if (regs & 0xf)
+			asi->register_usage_flags |= 1 << (insn & 0xf);
+
+	return INSN_GOOD;
+}
+
+
+static enum probes_insn arm_check_regs_ldmstm(probes_opcode_t insn,
+		struct arch_probes_insn *asi,
+		const struct decode_header *h)
+{
+	unsigned int reglist = insn & 0xffff;
+	unsigned int rn = (insn >> 16) & 0xf;
+	asi->register_usage_flags = reglist | (1 << rn);
+	return INSN_GOOD;
+}
+
+static enum probes_insn arm_check_regs_mov_ip_sp(probes_opcode_t insn,
+		struct arch_probes_insn *asi,
+		const struct decode_header *h)
+{
+	/* Instruction is 'mov ip, sp' i.e. 'mov r12, r13' */
+	asi->register_usage_flags = (1 << 12) | (1<< 13);
+	return INSN_GOOD;
+}
+
+/*
+ *                                    | Rn |Rt/d|         | Rm |
+ * LDRD (register)      cccc 000x x0x0 xxxx xxxx xxxx 1101 xxxx
+ * STRD (register)      cccc 000x x0x0 xxxx xxxx xxxx 1111 xxxx
+ *                                    | Rn |Rt/d|         |imm4L|
+ * LDRD (immediate)     cccc 000x x1x0 xxxx xxxx xxxx 1101 xxxx
+ * STRD (immediate)     cccc 000x x1x0 xxxx xxxx xxxx 1111 xxxx
+ *
+ * Such instructions access Rt/d and its next register, so different
+ * from others, a specific checker is required to handle this extra
+ * implicit register usage.
+ */
+static enum probes_insn arm_check_regs_ldrdstrd(probes_opcode_t insn,
+		struct arch_probes_insn *asi,
+		const struct decode_header *h)
+{
+	int rdt = (insn >> 12) & 0xf;
+	arm_check_regs_normal(insn, asi, h);
+	asi->register_usage_flags |= 1 << (rdt + 1);
+	return INSN_GOOD;
+}
+
+
+const struct decode_checker arm_regs_checker[NUM_PROBES_ARM_ACTIONS] = {
+	[PROBES_MRS] = {.checker = arm_check_regs_normal},
+	[PROBES_SATURATING_ARITHMETIC] = {.checker = arm_check_regs_normal},
+	[PROBES_MUL1] = {.checker = arm_check_regs_normal},
+	[PROBES_MUL2] = {.checker = arm_check_regs_normal},
+	[PROBES_MUL_ADD_LONG] = {.checker = arm_check_regs_normal},
+	[PROBES_MUL_ADD] = {.checker = arm_check_regs_normal},
+	[PROBES_LOAD] = {.checker = arm_check_regs_normal},
+	[PROBES_LOAD_EXTRA] = {.checker = arm_check_regs_normal},
+	[PROBES_STORE] = {.checker = arm_check_regs_normal},
+	[PROBES_STORE_EXTRA] = {.checker = arm_check_regs_normal},
+	[PROBES_DATA_PROCESSING_REG] = {.checker = arm_check_regs_normal},
+	[PROBES_DATA_PROCESSING_IMM] = {.checker = arm_check_regs_normal},
+	[PROBES_SEV] = {.checker = arm_check_regs_nouse},
+	[PROBES_WFE] = {.checker = arm_check_regs_nouse},
+	[PROBES_SATURATE] = {.checker = arm_check_regs_normal},
+	[PROBES_REV] = {.checker = arm_check_regs_normal},
+	[PROBES_MMI] = {.checker = arm_check_regs_normal},
+	[PROBES_PACK] = {.checker = arm_check_regs_normal},
+	[PROBES_EXTEND] = {.checker = arm_check_regs_normal},
+	[PROBES_EXTEND_ADD] = {.checker = arm_check_regs_normal},
+	[PROBES_BITFIELD] = {.checker = arm_check_regs_normal},
+	[PROBES_LDMSTM] = {.checker = arm_check_regs_ldmstm},
+	[PROBES_MOV_IP_SP] = {.checker = arm_check_regs_mov_ip_sp},
+	[PROBES_LDRSTRD] = {.checker = arm_check_regs_ldrdstrd},
+};
diff --git a/arch/arm/probes/kprobes/checkers-common.c b/arch/arm/probes/kprobes/checkers-common.c
new file mode 100644
index 000000000000..971119c29474
--- /dev/null
+++ b/arch/arm/probes/kprobes/checkers-common.c
@@ -0,0 +1,101 @@
+/*
+ * arch/arm/probes/kprobes/checkers-common.c
+ *
+ * Copyright (C) 2014 Huawei Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include "../decode.h"
+#include "../decode-arm.h"
+#include "checkers.h"
+
+enum probes_insn checker_stack_use_none(probes_opcode_t insn,
+		struct arch_probes_insn *asi,
+		const struct decode_header *h)
+{
+	asi->stack_space = 0;
+	return INSN_GOOD_NO_SLOT;
+}
+
+enum probes_insn checker_stack_use_unknown(probes_opcode_t insn,
+		struct arch_probes_insn *asi,
+		const struct decode_header *h)
+{
+	asi->stack_space = -1;
+	return INSN_GOOD_NO_SLOT;
+}
+
+#ifdef CONFIG_THUMB2_KERNEL
+enum probes_insn checker_stack_use_imm_0xx(probes_opcode_t insn,
+		struct arch_probes_insn *asi,
+		const struct decode_header *h)
+{
+	int imm = insn & 0xff;
+	asi->stack_space = imm;
+	return INSN_GOOD_NO_SLOT;
+}
+
+/*
+ * Different from other insn uses imm8, the real addressing offset of
+ * STRD in T32 encoding should be imm8 * 4. See ARMARM description.
+ */
+enum probes_insn checker_stack_use_t32strd(probes_opcode_t insn,
+		struct arch_probes_insn *asi,
+		const struct decode_header *h)
+{
+	int imm = insn & 0xff;
+	asi->stack_space = imm << 2;
+	return INSN_GOOD_NO_SLOT;
+}
+#else
+enum probes_insn checker_stack_use_imm_x0x(probes_opcode_t insn,
+		struct arch_probes_insn *asi,
+		const struct decode_header *h)
+{
+	int imm = ((insn & 0xf00) >> 4) + (insn & 0xf);
+	asi->stack_space = imm;
+	return INSN_GOOD_NO_SLOT;
+}
+#endif
+
+enum probes_insn checker_stack_use_imm_xxx(probes_opcode_t insn,
+		struct arch_probes_insn *asi,
+		const struct decode_header *h)
+{
+	int imm = insn & 0xfff;
+	asi->stack_space = imm;
+	return INSN_GOOD_NO_SLOT;
+}
+
+enum probes_insn checker_stack_use_stmdx(probes_opcode_t insn,
+		struct arch_probes_insn *asi,
+		const struct decode_header *h)
+{
+	unsigned int reglist = insn & 0xffff;
+	int pbit = insn & (1 << 24);
+	asi->stack_space = (hweight32(reglist) - (!pbit ? 1 : 0)) * 4;
+
+	return INSN_GOOD_NO_SLOT;
+}
+
+const union decode_action stack_check_actions[] = {
+	[STACK_USE_NONE] = {.decoder = checker_stack_use_none},
+	[STACK_USE_UNKNOWN] = {.decoder = checker_stack_use_unknown},
+#ifdef CONFIG_THUMB2_KERNEL
+	[STACK_USE_FIXED_0XX] = {.decoder = checker_stack_use_imm_0xx},
+	[STACK_USE_T32STRD] = {.decoder = checker_stack_use_t32strd},
+#else
+	[STACK_USE_FIXED_X0X] = {.decoder = checker_stack_use_imm_x0x},
+#endif
+	[STACK_USE_FIXED_XXX] = {.decoder = checker_stack_use_imm_xxx},
+	[STACK_USE_STMDX] = {.decoder = checker_stack_use_stmdx},
+};
diff --git a/arch/arm/probes/kprobes/checkers-thumb.c b/arch/arm/probes/kprobes/checkers-thumb.c
new file mode 100644
index 000000000000..d608e3b9017a
--- /dev/null
+++ b/arch/arm/probes/kprobes/checkers-thumb.c
@@ -0,0 +1,110 @@
+/*
+ * arch/arm/probes/kprobes/checkers-thumb.c
+ *
+ * Copyright (C) 2014 Huawei Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include "../decode.h"
+#include "../decode-thumb.h"
+#include "checkers.h"
+
+static enum probes_insn __kprobes t32_check_stack(probes_opcode_t insn,
+		struct arch_probes_insn *asi,
+		const struct decode_header *h)
+{
+	/*
+	 * PROBES_T32_LDMSTM, PROBES_T32_LDRDSTRD and PROBES_T32_LDRSTR
+	 * may get here. Simply mark all normal insns as STACK_USE_NONE.
+	 */
+	static const union decode_item table[] = {
+
+		/*
+		 * First, filter out all ldr insns to make our life easier.
+		 * Following load insns may come here:
+		 * LDM, LDRD, LDR.
+		 * In T32 encoding, bit 20 is enough for distinguishing
+		 * load and store. All load insns have this bit set, when
+		 * all store insns have this bit clear.
+		 */
+		DECODE_CUSTOM	(0x00100000, 0x00100000, STACK_USE_NONE),
+
+		/*
+		 * Mark all 'STR{,B,H}, Rt, [Rn, Rm]' as STACK_USE_UNKNOWN
+		 * if Rn or Rm is SP. T32 doesn't encode STRD.
+		 */
+		/*                                 xx | Rn | Rt |         | Rm |*/
+		/* STR (register)	1111 1000 0100 xxxx xxxx 0000 00xx xxxx */
+		/* STRB (register)	1111 1000 0000 xxxx xxxx 0000 00xx xxxx */
+		/* STRH (register)	1111 1000 0010 xxxx xxxx 0000 00xx xxxx */
+		/* INVALID INSN		1111 1000 0110 xxxx xxxx 0000 00xx xxxx */
+		/* By Introducing INVALID INSN, bit 21 and 22 can be ignored. */
+		DECODE_OR	(0xff9f0fc0, 0xf80d0000),
+		DECODE_CUSTOM	(0xff900fcf, 0xf800000d, STACK_USE_UNKNOWN),
+
+
+		/*                                 xx | Rn | Rt | PUW|   imm8  |*/
+		/* STR (imm 8)		1111 1000 0100 1101 xxxx 110x xxxx xxxx */
+		/* STRB (imm 8)		1111 1000 0000 1101 xxxx 110x xxxx xxxx */
+		/* STRH (imm 8)		1111 1000 0010 1101 xxxx 110x xxxx xxxx */
+		/* INVALID INSN		1111 1000 0110 1101 xxxx 110x xxxx xxxx */
+		/* Only consider U == 0 and P == 1: strx rx, [sp, #-<imm>] */
+		DECODE_CUSTOM	(0xff9f0e00, 0xf80d0c00, STACK_USE_FIXED_0XX),
+
+		/* For STR{,B,H} (imm 12), offset is always positive, so ignore them. */
+
+		/*                              P U W | Rn | Rt | Rt2|   imm8  |*/
+		/* STRD (immediate)	1110 1001 01x0 1101 xxxx xxxx xxxx xxxx */
+		/*
+		 * Only consider U == 0 and P == 1.
+		 * Also note that STRD in T32 encoding is special:
+		 * imm = ZeroExtend(imm8:'00', 32)
+		 */
+		DECODE_CUSTOM	(0xffdf0000, 0xe94d0000, STACK_USE_T32STRD),
+
+		/*                                    | Rn | */
+		/* STMDB		1110 1001 00x0 1101 xxxx xxxx xxxx xxxx */
+		DECODE_CUSTOM	(0xffdf0000, 0xe90d0000, STACK_USE_STMDX),
+
+		/* fall through */
+		DECODE_CUSTOM	(0, 0, STACK_USE_NONE),
+		DECODE_END
+	};
+
+	return probes_decode_insn(insn, asi, table, false, false, stack_check_actions, NULL);
+}
+
+const struct decode_checker t32_stack_checker[NUM_PROBES_T32_ACTIONS] = {
+	[PROBES_T32_LDMSTM] = {.checker = t32_check_stack},
+	[PROBES_T32_LDRDSTRD] = {.checker = t32_check_stack},
+	[PROBES_T32_LDRSTR] = {.checker = t32_check_stack},
+};
+
+/*
+ * See following comments. This insn must be 'push'.
+ */
+static enum probes_insn __kprobes t16_check_stack(probes_opcode_t insn,
+		struct arch_probes_insn *asi,
+		const struct decode_header *h)
+{
+	unsigned int reglist = insn & 0x1ff;
+	asi->stack_space = hweight32(reglist) * 4;
+	return INSN_GOOD;
+}
+
+/*
+ * T16 encoding is simple: only the 'push' insn can need extra stack space.
+ * Other insns, like str, can only use r0-r7 as Rn.
+ */
+const struct decode_checker t16_stack_checker[NUM_PROBES_T16_ACTIONS] = {
+	[PROBES_T16_PUSH] = {.checker = t16_check_stack},
+};
diff --git a/arch/arm/probes/kprobes/checkers.h b/arch/arm/probes/kprobes/checkers.h
new file mode 100644
index 000000000000..cf6c9e74d666
--- /dev/null
+++ b/arch/arm/probes/kprobes/checkers.h
@@ -0,0 +1,55 @@
+/*
+ * arch/arm/probes/kprobes/checkers.h
+ *
+ * Copyright (C) 2014 Huawei Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef _ARM_KERNEL_PROBES_CHECKERS_H
+#define _ARM_KERNEL_PROBES_CHECKERS_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include "../decode.h"
+
+extern probes_check_t checker_stack_use_none;
+extern probes_check_t checker_stack_use_unknown;
+#ifdef CONFIG_THUMB2_KERNEL
+extern probes_check_t checker_stack_use_imm_0xx;
+#else
+extern probes_check_t checker_stack_use_imm_x0x;
+#endif
+extern probes_check_t checker_stack_use_imm_xxx;
+extern probes_check_t checker_stack_use_stmdx;
+
+enum {
+	STACK_USE_NONE,
+	STACK_USE_UNKNOWN,
+#ifdef CONFIG_THUMB2_KERNEL
+	STACK_USE_FIXED_0XX,
+	STACK_USE_T32STRD,
+#else
+	STACK_USE_FIXED_X0X,
+#endif
+	STACK_USE_FIXED_XXX,
+	STACK_USE_STMDX,
+	NUM_STACK_USE_TYPES
+};
+
+extern const union decode_action stack_check_actions[];
+
+#ifndef CONFIG_THUMB2_KERNEL
+extern const struct decode_checker arm_stack_checker[];
+extern const struct decode_checker arm_regs_checker[];
+#else
+#endif
+extern const struct decode_checker t32_stack_checker[];
+extern const struct decode_checker t16_stack_checker[];
+#endif
diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/probes/kprobes/core.c
index 6d644202c8dc..a4ec240ee7ba 100644
--- a/arch/arm/kernel/kprobes.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -30,11 +30,11 @@
 #include <asm/cacheflush.h>
 #include <linux/percpu.h>
 #include <linux/bug.h>
+#include <asm/patch.h>
 
-#include "kprobes.h"
-#include "probes-arm.h"
-#include "probes-thumb.h"
-#include "patch.h"
+#include "../decode-arm.h"
+#include "../decode-thumb.h"
+#include "core.h"
 
 #define MIN_STACK_SIZE(addr) 				\
 	min((unsigned long)MAX_STACK_SIZE,		\
@@ -61,6 +61,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 	kprobe_decode_insn_t *decode_insn;
 	const union decode_action *actions;
 	int is;
+	const struct decode_checker **checkers;
 
 	if (in_exception_text(addr))
 		return -EINVAL;
@@ -74,9 +75,11 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 		insn = __opcode_thumb32_compose(insn, inst2);
 		decode_insn = thumb32_probes_decode_insn;
 		actions = kprobes_t32_actions;
+		checkers = kprobes_t32_checkers;
 	} else {
 		decode_insn = thumb16_probes_decode_insn;
 		actions = kprobes_t16_actions;
+		checkers = kprobes_t16_checkers;
 	}
 #else /* !CONFIG_THUMB2_KERNEL */
 	thumb = false;
@@ -85,12 +88,13 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 	insn = __mem_to_opcode_arm(*p->addr);
 	decode_insn = arm_probes_decode_insn;
 	actions = kprobes_arm_actions;
+	checkers = kprobes_arm_checkers;
 #endif
 
 	p->opcode = insn;
 	p->ainsn.insn = tmp_insn;
 
-	switch ((*decode_insn)(insn, &p->ainsn, true, actions)) {
+	switch ((*decode_insn)(insn, &p->ainsn, true, actions, checkers)) {
 	case INSN_REJECTED:	/* not supported */
 		return -EINVAL;
 
@@ -111,6 +115,15 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 		break;
 	}
 
+	/*
+	 * Never instrument insn like 'str r0, [sp, +/-r1]'. Also, insn likes
+	 * 'str r0, [sp, #-68]' should also be prohibited.
+	 * See __und_svc.
+	 */
+	if ((p->ainsn.stack_space < 0) ||
+			(p->ainsn.stack_space > MAX_STACK_SIZE))
+		return -EINVAL;
+
 	return 0;
 }
 
@@ -150,19 +163,31 @@ void __kprobes arch_arm_kprobe(struct kprobe *p)
  * memory. It is also needed to atomically set the two half-words of a 32-bit
  * Thumb breakpoint.
  */
-int __kprobes __arch_disarm_kprobe(void *p)
-{
-	struct kprobe *kp = p;
-	void *addr = (void *)((uintptr_t)kp->addr & ~1);
-
-	__patch_text(addr, kp->opcode);
+struct patch {
+	void *addr;
+	unsigned int insn;
+};
 
+static int __kprobes_remove_breakpoint(void *data)
+{
+	struct patch *p = data;
+	__patch_text(p->addr, p->insn);
 	return 0;
 }
 
+void __kprobes kprobes_remove_breakpoint(void *addr, unsigned int insn)
+{
+	struct patch p = {
+		.addr = addr,
+		.insn = insn,
+	};
+	stop_machine(__kprobes_remove_breakpoint, &p, cpu_online_mask);
+}
+
 void __kprobes arch_disarm_kprobe(struct kprobe *p)
 {
-	stop_machine(__arch_disarm_kprobe, p, cpu_online_mask);
+	kprobes_remove_breakpoint((void *)((uintptr_t)p->addr & ~1),
+			p->opcode);
 }
 
 void __kprobes arch_remove_kprobe(struct kprobe *p)
diff --git a/arch/arm/kernel/kprobes.h b/arch/arm/probes/kprobes/core.h
index 9a2712ecefc3..ec5d1f20a085 100644
--- a/arch/arm/kernel/kprobes.h
+++ b/arch/arm/probes/kprobes/core.h
@@ -19,7 +19,8 @@
 #ifndef _ARM_KERNEL_KPROBES_H
 #define _ARM_KERNEL_KPROBES_H
 
-#include "probes.h"
+#include <asm/kprobes.h>
+#include "../decode.h"
 
 /*
  * These undefined instructions must be unique and
@@ -29,6 +30,8 @@
 #define KPROBE_THUMB16_BREAKPOINT_INSTRUCTION	0xde18
 #define KPROBE_THUMB32_BREAKPOINT_INSTRUCTION	0xf7f0a018
 
+extern void kprobes_remove_breakpoint(void *addr, unsigned int insn);
+
 enum probes_insn __kprobes
 kprobe_decode_ldmstm(kprobe_opcode_t insn, struct arch_probes_insn *asi,
 		const struct decode_header *h);
@@ -36,16 +39,19 @@ kprobe_decode_ldmstm(kprobe_opcode_t insn, struct arch_probes_insn *asi,
 typedef enum probes_insn (kprobe_decode_insn_t)(probes_opcode_t,
 						struct arch_probes_insn *,
 						bool,
-						const union decode_action *);
+						const union decode_action *,
+						const struct decode_checker *[]);
 
 #ifdef CONFIG_THUMB2_KERNEL
 
 extern const union decode_action kprobes_t32_actions[];
 extern const union decode_action kprobes_t16_actions[];
-
+extern const struct decode_checker *kprobes_t32_checkers[];
+extern const struct decode_checker *kprobes_t16_checkers[];
 #else /* !CONFIG_THUMB2_KERNEL */
 
 extern const union decode_action kprobes_arm_actions[];
+extern const struct decode_checker *kprobes_arm_checkers[];
 
 #endif
 
diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c
new file mode 100644
index 000000000000..bcdecc25461b
--- /dev/null
+++ b/arch/arm/probes/kprobes/opt-arm.c
@@ -0,0 +1,370 @@
+/*
+ *  Kernel Probes Jump Optimization (Optprobes)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ * Copyright (C) Hitachi Ltd., 2012
+ * Copyright (C) Huawei Inc., 2014
+ */
+
+#include <linux/kprobes.h>
+#include <linux/jump_label.h>
+#include <asm/kprobes.h>
+#include <asm/cacheflush.h>
+/* for arm_gen_branch */
+#include <asm/insn.h>
+/* for patch_text */
+#include <asm/patch.h>
+
+#include "core.h"
+
+/*
+ * See register_usage_flags. If the probed instruction doesn't use PC,
+ * we can copy it into template and have it executed directly without
+ * simulation or emulation.
+ */
+#define ARM_REG_PC	15
+#define can_kprobe_direct_exec(m)	(!test_bit(ARM_REG_PC, &(m)))
+
+/*
+ * NOTE: the first sub and add instruction will be modified according
+ * to the stack cost of the instruction.
+ */
+asm (
+			".global optprobe_template_entry\n"
+			"optprobe_template_entry:\n"
+			".global optprobe_template_sub_sp\n"
+			"optprobe_template_sub_sp:"
+			"	sub	sp, sp, #0xff\n"
+			"	stmia	sp, {r0 - r14} \n"
+			".global optprobe_template_add_sp\n"
+			"optprobe_template_add_sp:"
+			"	add	r3, sp, #0xff\n"
+			"	str	r3, [sp, #52]\n"
+			"	mrs	r4, cpsr\n"
+			"	str	r4, [sp, #64]\n"
+			"	mov	r1, sp\n"
+			"	ldr	r0, 1f\n"
+			"	ldr	r2, 2f\n"
+			/*
+			 * AEABI requires an 8-bytes alignment stack. If
+			 * SP % 8 != 0 (SP % 4 == 0 should be ensured),
+			 * alloc more bytes here.
+			 */
+			"	and	r4, sp, #4\n"
+			"	sub	sp, sp, r4\n"
+#if __LINUX_ARM_ARCH__ >= 5
+			"	blx	r2\n"
+#else
+			"	mov     lr, pc\n"
+			"	mov	pc, r2\n"
+#endif
+			"	add	sp, sp, r4\n"
+			"	ldr	r1, [sp, #64]\n"
+			"	tst	r1, #"__stringify(PSR_T_BIT)"\n"
+			"	ldrne	r2, [sp, #60]\n"
+			"	orrne	r2, #1\n"
+			"	strne	r2, [sp, #60] @ set bit0 of PC for thumb\n"
+			"	msr	cpsr_cxsf, r1\n"
+			".global optprobe_template_restore_begin\n"
+			"optprobe_template_restore_begin:\n"
+			"	ldmia	sp, {r0 - r15}\n"
+			".global optprobe_template_restore_orig_insn\n"
+			"optprobe_template_restore_orig_insn:\n"
+			"	nop\n"
+			".global optprobe_template_restore_end\n"
+			"optprobe_template_restore_end:\n"
+			"	nop\n"
+			".global optprobe_template_val\n"
+			"optprobe_template_val:\n"
+			"1:	.long 0\n"
+			".global optprobe_template_call\n"
+			"optprobe_template_call:\n"
+			"2:	.long 0\n"
+			".global optprobe_template_end\n"
+			"optprobe_template_end:\n");
+
+#define TMPL_VAL_IDX \
+	((unsigned long *)&optprobe_template_val - (unsigned long *)&optprobe_template_entry)
+#define TMPL_CALL_IDX \
+	((unsigned long *)&optprobe_template_call - (unsigned long *)&optprobe_template_entry)
+#define TMPL_END_IDX \
+	((unsigned long *)&optprobe_template_end - (unsigned long *)&optprobe_template_entry)
+#define TMPL_ADD_SP \
+	((unsigned long *)&optprobe_template_add_sp - (unsigned long *)&optprobe_template_entry)
+#define TMPL_SUB_SP \
+	((unsigned long *)&optprobe_template_sub_sp - (unsigned long *)&optprobe_template_entry)
+#define TMPL_RESTORE_BEGIN \
+	((unsigned long *)&optprobe_template_restore_begin - (unsigned long *)&optprobe_template_entry)
+#define TMPL_RESTORE_ORIGN_INSN \
+	((unsigned long *)&optprobe_template_restore_orig_insn - (unsigned long *)&optprobe_template_entry)
+#define TMPL_RESTORE_END \
+	((unsigned long *)&optprobe_template_restore_end - (unsigned long *)&optprobe_template_entry)
+
+/*
+ * ARM can always optimize an instruction when using ARM ISA, except
+ * instructions like 'str r0, [sp, r1]' which store to stack and unable
+ * to determine stack space consumption statically.
+ */
+int arch_prepared_optinsn(struct arch_optimized_insn *optinsn)
+{
+	return optinsn->insn != NULL;
+}
+
+/*
+ * In ARM ISA, kprobe opt always replace one instruction (4 bytes
+ * aligned and 4 bytes long). It is impossible to encounter another
+ * kprobe in the address range. So always return 0.
+ */
+int arch_check_optimized_kprobe(struct optimized_kprobe *op)
+{
+	return 0;
+}
+
+/* Caller must ensure addr & 3 == 0 */
+static int can_optimize(struct kprobe *kp)
+{
+	if (kp->ainsn.stack_space < 0)
+		return 0;
+	/*
+	 * 255 is the biggest imm can be used in 'sub r0, r0, #<imm>'.
+	 * Number larger than 255 needs special encoding.
+	 */
+	if (kp->ainsn.stack_space > 255 - sizeof(struct pt_regs))
+		return 0;
+	return 1;
+}
+
+/* Free optimized instruction slot */
+static void
+__arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
+{
+	if (op->optinsn.insn) {
+		free_optinsn_slot(op->optinsn.insn, dirty);
+		op->optinsn.insn = NULL;
+	}
+}
+
+extern void kprobe_handler(struct pt_regs *regs);
+
+static void
+optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
+{
+	unsigned long flags;
+	struct kprobe *p = &op->kp;
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	/* Save skipped registers */
+	regs->ARM_pc = (unsigned long)op->kp.addr;
+	regs->ARM_ORIG_r0 = ~0UL;
+
+	local_irq_save(flags);
+
+	if (kprobe_running()) {
+		kprobes_inc_nmissed_count(&op->kp);
+	} else {
+		__this_cpu_write(current_kprobe, &op->kp);
+		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+		opt_pre_handler(&op->kp, regs);
+		__this_cpu_write(current_kprobe, NULL);
+	}
+
+	/*
+	 * We singlestep the replaced instruction only when it can't be
+	 * executed directly during restore.
+	 */
+	if (!p->ainsn.kprobe_direct_exec)
+		op->kp.ainsn.insn_singlestep(p->opcode, &p->ainsn, regs);
+
+	local_irq_restore(flags);
+}
+
+int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *orig)
+{
+	kprobe_opcode_t *code;
+	unsigned long rel_chk;
+	unsigned long val;
+	unsigned long stack_protect = sizeof(struct pt_regs);
+
+	if (!can_optimize(orig))
+		return -EILSEQ;
+
+	code = get_optinsn_slot();
+	if (!code)
+		return -ENOMEM;
+
+	/*
+	 * Verify if the address gap is in 32MiB range, because this uses
+	 * a relative jump.
+	 *
+	 * kprobe opt use a 'b' instruction to branch to optinsn.insn.
+	 * According to ARM manual, branch instruction is:
+	 *
+	 *   31  28 27           24 23             0
+	 *  +------+---+---+---+---+----------------+
+	 *  | cond | 1 | 0 | 1 | 0 |      imm24     |
+	 *  +------+---+---+---+---+----------------+
+	 *
+	 * imm24 is a signed 24 bits integer. The real branch offset is computed
+	 * by: imm32 = SignExtend(imm24:'00', 32);
+	 *
+	 * So the maximum forward branch should be:
+	 *   (0x007fffff << 2) = 0x01fffffc =  0x1fffffc
+	 * The maximum backword branch should be:
+	 *   (0xff800000 << 2) = 0xfe000000 = -0x2000000
+	 *
+	 * We can simply check (rel & 0xfe000003):
+	 *  if rel is positive, (rel & 0xfe000000) shoule be 0
+	 *  if rel is negitive, (rel & 0xfe000000) should be 0xfe000000
+	 *  the last '3' is used for alignment checking.
+	 */
+	rel_chk = (unsigned long)((long)code -
+			(long)orig->addr + 8) & 0xfe000003;
+
+	if ((rel_chk != 0) && (rel_chk != 0xfe000000)) {
+		/*
+		 * Different from x86, we free code buf directly instead of
+		 * calling __arch_remove_optimized_kprobe() because
+		 * we have not fill any field in op.
+		 */
+		free_optinsn_slot(code, 0);
+		return -ERANGE;
+	}
+
+	/* Copy arch-dep-instance from template. */
+	memcpy(code, &optprobe_template_entry,
+			TMPL_END_IDX * sizeof(kprobe_opcode_t));
+
+	/* Adjust buffer according to instruction. */
+	BUG_ON(orig->ainsn.stack_space < 0);
+
+	stack_protect += orig->ainsn.stack_space;
+
+	/* Should have been filtered by can_optimize(). */
+	BUG_ON(stack_protect > 255);
+
+	/* Create a 'sub sp, sp, #<stack_protect>' */
+	code[TMPL_SUB_SP] = __opcode_to_mem_arm(0xe24dd000 | stack_protect);
+	/* Create a 'add r3, sp, #<stack_protect>' */
+	code[TMPL_ADD_SP] = __opcode_to_mem_arm(0xe28d3000 | stack_protect);
+
+	/* Set probe information */
+	val = (unsigned long)op;
+	code[TMPL_VAL_IDX] = val;
+
+	/* Set probe function call */
+	val = (unsigned long)optimized_callback;
+	code[TMPL_CALL_IDX] = val;
+
+	/* If possible, copy insn and have it executed during restore */
+	orig->ainsn.kprobe_direct_exec = false;
+	if (can_kprobe_direct_exec(orig->ainsn.register_usage_flags)) {
+		kprobe_opcode_t final_branch = arm_gen_branch(
+				(unsigned long)(&code[TMPL_RESTORE_END]),
+				(unsigned long)(op->kp.addr) + 4);
+		if (final_branch != 0) {
+			/*
+			 * Replace original 'ldmia sp, {r0 - r15}' with
+			 * 'ldmia {r0 - r14}', restore all registers except pc.
+			 */
+			code[TMPL_RESTORE_BEGIN] = __opcode_to_mem_arm(0xe89d7fff);
+
+			/* The original probed instruction */
+			code[TMPL_RESTORE_ORIGN_INSN] = __opcode_to_mem_arm(orig->opcode);
+
+			/* Jump back to next instruction */
+			code[TMPL_RESTORE_END] = __opcode_to_mem_arm(final_branch);
+			orig->ainsn.kprobe_direct_exec = true;
+		}
+	}
+
+	flush_icache_range((unsigned long)code,
+			   (unsigned long)(&code[TMPL_END_IDX]));
+
+	/* Set op->optinsn.insn means prepared. */
+	op->optinsn.insn = code;
+	return 0;
+}
+
+void __kprobes arch_optimize_kprobes(struct list_head *oplist)
+{
+	struct optimized_kprobe *op, *tmp;
+
+	list_for_each_entry_safe(op, tmp, oplist, list) {
+		unsigned long insn;
+		WARN_ON(kprobe_disabled(&op->kp));
+
+		/*
+		 * Backup instructions which will be replaced
+		 * by jump address
+		 */
+		memcpy(op->optinsn.copied_insn, op->kp.addr,
+				RELATIVEJUMP_SIZE);
+
+		insn = arm_gen_branch((unsigned long)op->kp.addr,
+				(unsigned long)op->optinsn.insn);
+		BUG_ON(insn == 0);
+
+		/*
+		 * Make it a conditional branch if replaced insn
+		 * is consitional
+		 */
+		insn = (__mem_to_opcode_arm(
+			  op->optinsn.copied_insn[0]) & 0xf0000000) |
+			(insn & 0x0fffffff);
+
+		/*
+		 * Similar to __arch_disarm_kprobe, operations which
+		 * removing breakpoints must be wrapped by stop_machine
+		 * to avoid racing.
+		 */
+		kprobes_remove_breakpoint(op->kp.addr, insn);
+
+		list_del_init(&op->list);
+	}
+}
+
+void arch_unoptimize_kprobe(struct optimized_kprobe *op)
+{
+	arch_arm_kprobe(&op->kp);
+}
+
+/*
+ * Recover original instructions and breakpoints from relative jumps.
+ * Caller must call with locking kprobe_mutex.
+ */
+void arch_unoptimize_kprobes(struct list_head *oplist,
+			    struct list_head *done_list)
+{
+	struct optimized_kprobe *op, *tmp;
+
+	list_for_each_entry_safe(op, tmp, oplist, list) {
+		arch_unoptimize_kprobe(op);
+		list_move(&op->list, done_list);
+	}
+}
+
+int arch_within_optimized_kprobe(struct optimized_kprobe *op,
+				unsigned long addr)
+{
+	return ((unsigned long)op->kp.addr <= addr &&
+		(unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr);
+}
+
+void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
+{
+	__arch_remove_optimized_kprobe(op, 1);
+}
diff --git a/arch/arm/kernel/kprobes-test-arm.c b/arch/arm/probes/kprobes/test-arm.c
index cb1424240ff6..8866aedfdea2 100644
--- a/arch/arm/kernel/kprobes-test-arm.c
+++ b/arch/arm/probes/kprobes/test-arm.c
@@ -12,8 +12,9 @@
 #include <linux/module.h>
 #include <asm/system_info.h>
 #include <asm/opcodes.h>
+#include <asm/probes.h>
 
-#include "kprobes-test.h"
+#include "test-core.h"
 
 
 #define TEST_ISA "32"
@@ -203,9 +204,9 @@ void kprobe_arm_test_cases(void)
 #endif
 	TEST_GROUP("Miscellaneous instructions")
 
-	TEST("mrs	r0, cpsr")
-	TEST("mrspl	r7, cpsr")
-	TEST("mrs	r14, cpsr")
+	TEST_RMASKED("mrs	r",0,~PSR_IGNORE_BITS,", cpsr")
+	TEST_RMASKED("mrspl	r",7,~PSR_IGNORE_BITS,", cpsr")
+	TEST_RMASKED("mrs	r",14,~PSR_IGNORE_BITS,", cpsr")
 	TEST_UNSUPPORTED(__inst_arm(0xe10ff000) "	@ mrs r15, cpsr")
 	TEST_UNSUPPORTED("mrs	r0, spsr")
 	TEST_UNSUPPORTED("mrs	lr, spsr")
@@ -214,9 +215,12 @@ void kprobe_arm_test_cases(void)
 	TEST_UNSUPPORTED("msr	cpsr_f, lr")
 	TEST_UNSUPPORTED("msr	spsr, r0")
 
+#if __LINUX_ARM_ARCH__ >= 5 || \
+    (__LINUX_ARM_ARCH__ == 4 && !defined(CONFIG_CPU_32v4))
 	TEST_BF_R("bx	r",0,2f,"")
 	TEST_BB_R("bx	r",7,2f,"")
 	TEST_BF_R("bxeq	r",14,2f,"")
+#endif
 
 #if __LINUX_ARM_ARCH__ >= 5
 	TEST_R("clz	r0, r",0, 0x0,"")
@@ -476,7 +480,9 @@ void kprobe_arm_test_cases(void)
 	TEST_GROUP("Extra load/store instructions")
 
 	TEST_RPR(  "strh	r",0, VAL1,", [r",1, 48,", -r",2, 24,"]")
-	TEST_RPR(  "streqh	r",14,VAL2,", [r",13,0, ", r",12, 48,"]")
+	TEST_RPR(  "streqh	r",14,VAL2,", [r",11,0, ", r",12, 48,"]")
+	TEST_UNSUPPORTED(  "streqh	r14, [r13, r12]")
+	TEST_UNSUPPORTED(  "streqh	r14, [r12, r13]")
 	TEST_RPR(  "strh	r",1, VAL1,", [r",2, 24,", r",3,  48,"]!")
 	TEST_RPR(  "strneh	r",12,VAL2,", [r",11,48,", -r",10,24,"]!")
 	TEST_RPR(  "strh	r",2, VAL1,", [r",3, 24,"], r",4, 48,"")
@@ -501,6 +507,9 @@ void kprobe_arm_test_cases(void)
 	TEST_RP(   "strplh	r",12,VAL2,", [r",11,24,", #-4]!")
 	TEST_RP(   "strh	r",2, VAL1,", [r",3, 24,"], #48")
 	TEST_RP(   "strh	r",10,VAL2,", [r",9, 64,"], #-48")
+	TEST_RP(   "strh	r",3, VAL1,", [r",13,TEST_MEMORY_SIZE,", #-"__stringify(MAX_STACK_SIZE)"]!")
+	TEST_UNSUPPORTED("strh r3, [r13, #-"__stringify(MAX_STACK_SIZE)"-8]!")
+	TEST_RP(   "strh	r",4, VAL1,", [r",14,TEST_MEMORY_SIZE,", #-"__stringify(MAX_STACK_SIZE)"-8]!")
 	TEST_UNSUPPORTED(__inst_arm(0xe1efc3b0) "	@ strh r12, [pc, #48]!")
 	TEST_UNSUPPORTED(__inst_arm(0xe0c9f3b0) "	@ strh pc, [r9], #48")
 
@@ -565,7 +574,9 @@ void kprobe_arm_test_cases(void)
 
 #if __LINUX_ARM_ARCH__ >= 5
 	TEST_RPR(  "strd	r",0, VAL1,", [r",1, 48,", -r",2,24,"]")
-	TEST_RPR(  "strccd	r",8, VAL2,", [r",13,0, ", r",12,48,"]")
+	TEST_RPR(  "strccd	r",8, VAL2,", [r",11,0, ", r",12,48,"]")
+	TEST_UNSUPPORTED(  "strccd r8, [r13, r12]")
+	TEST_UNSUPPORTED(  "strccd r8, [r12, r13]")
 	TEST_RPR(  "strd	r",4, VAL1,", [r",2, 24,", r",3, 48,"]!")
 	TEST_RPR(  "strcsd	r",12,VAL2,", [r",11,48,", -r",10,24,"]!")
 	TEST_RPR(  "strd	r",2, VAL1,", [r",5, 24,"], r",4,48,"")
@@ -589,6 +600,9 @@ void kprobe_arm_test_cases(void)
 	TEST_RP(   "strvcd	r",12,VAL2,", [r",11,24,", #-16]!")
 	TEST_RP(   "strd	r",2, VAL1,", [r",4, 24,"], #48")
 	TEST_RP(   "strd	r",10,VAL2,", [r",9, 64,"], #-48")
+	TEST_RP(   "strd	r",6, VAL1,", [r",13,TEST_MEMORY_SIZE,", #-"__stringify(MAX_STACK_SIZE)"]!")
+	TEST_UNSUPPORTED("strd r6, [r13, #-"__stringify(MAX_STACK_SIZE)"-8]!")
+	TEST_RP(   "strd	r",4, VAL1,", [r",12,TEST_MEMORY_SIZE,", #-"__stringify(MAX_STACK_SIZE)"-8]!")
 	TEST_UNSUPPORTED(__inst_arm(0xe1efc3f0) "	@ strd r12, [pc, #48]!")
 
 	TEST_P(	   "ldrd	r0, [r",0, 24,", #-8]")
@@ -637,14 +651,20 @@ void kprobe_arm_test_cases(void)
 	TEST_RP( "str"byte"	r",12,VAL2,", [r",11,24,", #-4]!")		\
 	TEST_RP( "str"byte"	r",2, VAL1,", [r",3, 24,"], #48")		\
 	TEST_RP( "str"byte"	r",10,VAL2,", [r",9, 64,"], #-48")		\
+	TEST_RP( "str"byte"	r",3, VAL1,", [r",13,TEST_MEMORY_SIZE,", #-"__stringify(MAX_STACK_SIZE)"]!") \
+	TEST_UNSUPPORTED("str"byte" r3, [r13, #-"__stringify(MAX_STACK_SIZE)"-8]!")				\
+	TEST_RP( "str"byte"	r",4, VAL1,", [r",10,TEST_MEMORY_SIZE,", #-"__stringify(MAX_STACK_SIZE)"-8]!") \
 	TEST_RPR("str"byte"	r",0, VAL1,", [r",1, 48,", -r",2, 24,"]")	\
-	TEST_RPR("str"byte"	r",14,VAL2,", [r",13,0, ", r",12, 48,"]")	\
+	TEST_RPR("str"byte"	r",14,VAL2,", [r",11,0, ", r",12, 48,"]")	\
+	TEST_UNSUPPORTED("str"byte" r14, [r13, r12]")				\
+	TEST_UNSUPPORTED("str"byte" r14, [r12, r13]")				\
 	TEST_RPR("str"byte"	r",1, VAL1,", [r",2, 24,", r",3,  48,"]!")	\
 	TEST_RPR("str"byte"	r",12,VAL2,", [r",11,48,", -r",10,24,"]!")	\
 	TEST_RPR("str"byte"	r",2, VAL1,", [r",3, 24,"], r",4, 48,"")	\
 	TEST_RPR("str"byte"	r",10,VAL2,", [r",9, 48,"], -r",11,24,"")	\
 	TEST_RPR("str"byte"	r",0, VAL1,", [r",1, 24,", r",2,  32,", asl #1]")\
-	TEST_RPR("str"byte"	r",14,VAL2,", [r",13,0, ", r",12, 32,", lsr #2]")\
+	TEST_RPR("str"byte"	r",14,VAL2,", [r",11,0, ", r",12, 32,", lsr #2]")\
+	TEST_UNSUPPORTED("str"byte"	r14, [r13, r12, lsr #2]")		\
 	TEST_RPR("str"byte"	r",1, VAL1,", [r",2, 24,", r",3,  32,", asr #3]!")\
 	TEST_RPR("str"byte"	r",12,VAL2,", [r",11,24,", r",10, 4,", ror #31]!")\
 	TEST_P(  "ldr"byte"	r0, [r",0,  24,", #-2]")			\
@@ -668,12 +688,12 @@ void kprobe_arm_test_cases(void)
 
 	LOAD_STORE("")
 	TEST_P(   "str	pc, [r",0,0,", #15*4]")
-	TEST_R(   "str	pc, [sp, r",2,15*4,"]")
+	TEST_UNSUPPORTED(   "str	pc, [sp, r2]")
 	TEST_BF(  "ldr	pc, [sp, #15*4]")
 	TEST_BF_R("ldr	pc, [sp, r",2,15*4,"]")
 
 	TEST_P(   "str	sp, [r",0,0,", #13*4]")
-	TEST_R(   "str	sp, [sp, r",2,13*4,"]")
+	TEST_UNSUPPORTED(   "str	sp, [sp, r2]")
 	TEST_BF(  "ldr	sp, [sp, #13*4]")
 	TEST_BF_R("ldr	sp, [sp, r",2,13*4,"]")
 
diff --git a/arch/arm/kernel/kprobes-test.c b/arch/arm/probes/kprobes/test-core.c
index b206d7790c77..9775de22e2ff 100644
--- a/arch/arm/kernel/kprobes-test.c
+++ b/arch/arm/probes/kprobes/test-core.c
@@ -209,10 +209,10 @@
 #include <linux/bug.h>
 #include <asm/opcodes.h>
 
-#include "kprobes.h"
-#include "probes-arm.h"
-#include "probes-thumb.h"
-#include "kprobes-test.h"
+#include "core.h"
+#include "test-core.h"
+#include "../decode-arm.h"
+#include "../decode-thumb.h"
 
 
 #define BENCHMARKING	1
@@ -236,6 +236,8 @@ static int tests_failed;
 
 #ifndef CONFIG_THUMB2_KERNEL
 
+#define RET(reg)	"mov	pc, "#reg
+
 long arm_func(long r0, long r1);
 
 static void __used __naked __arm_kprobes_test_func(void)
@@ -245,7 +247,7 @@ static void __used __naked __arm_kprobes_test_func(void)
 		".type arm_func, %%function		\n\t"
 		"arm_func:				\n\t"
 		"adds	r0, r0, r1			\n\t"
-		"bx	lr				\n\t"
+		"mov	pc, lr				\n\t"
 		".code "NORMAL_ISA	 /* Back to Thumb if necessary */
 		: : : "r0", "r1", "cc"
 	);
@@ -253,6 +255,8 @@ static void __used __naked __arm_kprobes_test_func(void)
 
 #else /* CONFIG_THUMB2_KERNEL */
 
+#define RET(reg)	"bx	"#reg
+
 long thumb16_func(long r0, long r1);
 long thumb32even_func(long r0, long r1);
 long thumb32odd_func(long r0, long r1);
@@ -494,7 +498,7 @@ static void __naked benchmark_nop(void)
 {
 	__asm__ __volatile__ (
 		"nop		\n\t"
-		"bx	lr"
+		RET(lr)"	\n\t"
 	);
 }
 
@@ -977,7 +981,7 @@ void __naked __kprobes_test_case_start(void)
 		"bic	r0, lr, #1  @ r0 = inline data		\n\t"
 		"mov	r1, sp					\n\t"
 		"bl	kprobes_test_case_start			\n\t"
-		"bx	r0					\n\t"
+		RET(r0)"					\n\t"
 	);
 }
 
@@ -1056,15 +1060,6 @@ static int test_case_run_count;
 static bool test_case_is_thumb;
 static int test_instance;
 
-/*
- * We ignore the state of the imprecise abort disable flag (CPSR.A) because this
- * can change randomly as the kernel doesn't take care to preserve or initialise
- * this across context switches. Also, with Security Extentions, the flag may
- * not be under control of the kernel; for this reason we ignore the state of
- * the FIQ disable flag CPSR.F as well.
- */
-#define PSR_IGNORE_BITS (PSR_A_BIT | PSR_F_BIT)
-
 static unsigned long test_check_cc(int cc, unsigned long cpsr)
 {
 	int ret = arm_check_condition(cc << 28, cpsr);
@@ -1196,6 +1191,13 @@ static void setup_test_context(struct pt_regs *regs)
 			regs->uregs[arg->reg] =
 				(unsigned long)current_stack + arg->val;
 			memory_needs_checking = true;
+			/*
+			 * Test memory at an address below SP is in danger of
+			 * being altered by an interrupt occurring and pushing
+			 * data onto the stack. Disable interrupts to stop this.
+			 */
+			if (arg->reg == 13)
+				regs->ARM_cpsr |= PSR_I_BIT;
 			break;
 		}
 		case ARG_TYPE_MEM: {
@@ -1264,14 +1266,26 @@ test_case_pre_handler(struct kprobe *p, struct pt_regs *regs)
 static int __kprobes
 test_after_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
+	struct test_arg *args;
+
 	if (container_of(p, struct test_probe, kprobe)->hit == test_instance)
 		return 0; /* Already run for this test instance */
 
 	result_regs = *regs;
+
+	/* Mask out results which are indeterminate */
 	result_regs.ARM_cpsr &= ~PSR_IGNORE_BITS;
+	for (args = current_args; args[0].type != ARG_TYPE_END; ++args)
+		if (args[0].type == ARG_TYPE_REG_MASKED) {
+			struct test_arg_regptr *arg =
+				(struct test_arg_regptr *)args;
+			result_regs.uregs[arg->reg] &= arg->val;
+		}
 
 	/* Undo any changes done to SP by the test case */
 	regs->ARM_sp = (unsigned long)current_stack;
+	/* Enable interrupts in case setup_test_context disabled them */
+	regs->ARM_cpsr &= ~PSR_I_BIT;
 
 	container_of(p, struct test_probe, kprobe)->hit = test_instance;
 	return 0;
diff --git a/arch/arm/kernel/kprobes-test.h b/arch/arm/probes/kprobes/test-core.h
index 4430990e90e7..94285203e9f7 100644
--- a/arch/arm/kernel/kprobes-test.h
+++ b/arch/arm/probes/kprobes/test-core.h
@@ -1,5 +1,5 @@
 /*
- * arch/arm/kernel/kprobes-test.h
+ * arch/arm/probes/kprobes/test-core.h
  *
  * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
  *
@@ -45,10 +45,11 @@ extern int kprobe_test_cc_position;
  *
  */
 
-#define	ARG_TYPE_END	0
-#define	ARG_TYPE_REG	1
-#define	ARG_TYPE_PTR	2
-#define	ARG_TYPE_MEM	3
+#define	ARG_TYPE_END		0
+#define	ARG_TYPE_REG		1
+#define	ARG_TYPE_PTR		2
+#define	ARG_TYPE_MEM		3
+#define	ARG_TYPE_REG_MASKED	4
 
 #define ARG_FLAG_UNSUPPORTED	0x01
 #define ARG_FLAG_SUPPORTED	0x02
@@ -61,7 +62,7 @@ struct test_arg {
 };
 
 struct test_arg_regptr {
-	u8	type;		/* ARG_TYPE_REG or ARG_TYPE_PTR */
+	u8	type;		/* ARG_TYPE_REG or ARG_TYPE_PTR or ARG_TYPE_REG_MASKED */
 	u8	reg;
 	u8	_padding[2];
 	u32	val;
@@ -138,6 +139,12 @@ struct test_arg_end {
 	".short	0					\n\t"	\
 	".word	"#val"					\n\t"
 
+#define	TEST_ARG_REG_MASKED(reg, val)				\
+	".byte	"__stringify(ARG_TYPE_REG_MASKED)"	\n\t"	\
+	".byte	"#reg"					\n\t"	\
+	".short	0					\n\t"	\
+	".word	"#val"					\n\t"
+
 #define	TEST_ARG_END(flags)					\
 	".byte	"__stringify(ARG_TYPE_END)"		\n\t"	\
 	".byte	"TEST_ISA flags"			\n\t"	\
@@ -395,6 +402,22 @@ struct test_arg_end {
 	"	"codex"			\n\t"					\
 	TESTCASE_END
 
+#define TEST_RMASKED(code1, reg, mask, code2)		\
+	TESTCASE_START(code1 #reg code2)		\
+	TEST_ARG_REG_MASKED(reg, mask)			\
+	TEST_ARG_END("")				\
+	TEST_INSTRUCTION(code1 #reg code2)		\
+	TESTCASE_END
+
+/*
+ * We ignore the state of the imprecise abort disable flag (CPSR.A) because this
+ * can change randomly as the kernel doesn't take care to preserve or initialise
+ * this across context switches. Also, with Security Extensions, the flag may
+ * not be under control of the kernel; for this reason we ignore the state of
+ * the FIQ disable flag CPSR.F as well.
+ */
+#define PSR_IGNORE_BITS (PSR_A_BIT | PSR_F_BIT)
+
 
 /*
  * Macros for defining space directives spread over multiple lines.
diff --git a/arch/arm/kernel/kprobes-test-thumb.c b/arch/arm/probes/kprobes/test-thumb.c
index 844dd10d8593..b683b4517458 100644
--- a/arch/arm/kernel/kprobes-test-thumb.c
+++ b/arch/arm/probes/kprobes/test-thumb.c
@@ -1,5 +1,5 @@
 /*
- * arch/arm/kernel/kprobes-test-thumb.c
+ * arch/arm/probes/kprobes/test-thumb.c
  *
  * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
  *
@@ -11,8 +11,9 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <asm/opcodes.h>
+#include <asm/probes.h>
 
-#include "kprobes-test.h"
+#include "test-core.h"
 
 
 #define TEST_ISA "16"
@@ -416,6 +417,9 @@ void kprobe_thumb32_test_cases(void)
 	TEST_RR( "strd	r",14,VAL2,", r",12,VAL1,", [sp, #16]!")
 	TEST_RRP("strd	r",1, VAL1,", r",0, VAL2,", [r",7, 24,"], #16")
 	TEST_RR( "strd	r",7, VAL2,", r",8, VAL1,", [sp], #-16")
+	TEST_RRP("strd	r",6, VAL1,", r",7, VAL2,", [r",13, TEST_MEMORY_SIZE,", #-"__stringify(MAX_STACK_SIZE)"]!")
+	TEST_UNSUPPORTED("strd r6, r7, [r13, #-"__stringify(MAX_STACK_SIZE)"-8]!")
+	TEST_RRP("strd	r",4, VAL1,", r",5, VAL2,", [r",14, TEST_MEMORY_SIZE,", #-"__stringify(MAX_STACK_SIZE)"-8]!")
 	TEST_UNSUPPORTED(__inst_thumb32(0xe9efec04) "	@ strd	r14, r12, [pc, #16]!")
 	TEST_UNSUPPORTED(__inst_thumb32(0xe8efec04) "	@ strd	r14, r12, [pc], #16")
 
@@ -774,8 +778,8 @@ CONDITION_INSTRUCTIONS(22,
 
 	TEST_UNSUPPORTED("subs	pc, lr, #4")
 
-	TEST("mrs	r0, cpsr")
-	TEST("mrs	r14, cpsr")
+	TEST_RMASKED("mrs	r",0,~PSR_IGNORE_BITS,", cpsr")
+	TEST_RMASKED("mrs	r",14,~PSR_IGNORE_BITS,", cpsr")
 	TEST_UNSUPPORTED(__inst_thumb32(0xf3ef8d00) "	@ mrs	sp, spsr")
 	TEST_UNSUPPORTED(__inst_thumb32(0xf3ef8f00) "	@ mrs	pc, spsr")
 	TEST_UNSUPPORTED("mrs	r0, spsr")
@@ -821,14 +825,22 @@ CONDITION_INSTRUCTIONS(22,
 	TEST_RP( "str"size"	r",14,VAL2,", [r",1, 256,  ", #-128]!")		\
 	TEST_RPR("str"size".w	r",0, VAL1,", [r",1, 0,", r",2, 4,"]")		\
 	TEST_RPR("str"size"	r",14,VAL2,", [r",10,0,", r",11,4,", lsl #1]")	\
+	TEST_UNSUPPORTED("str"size"	r0, [r13, r1]")				\
 	TEST_R(  "str"size".w	r",7, VAL1,", [sp, #24]")			\
 	TEST_RP( "str"size".w	r",0, VAL2,", [r",0,0, "]")			\
+	TEST_RP( "str"size"	r",6, VAL1,", [r",13, TEST_MEMORY_SIZE,", #-"__stringify(MAX_STACK_SIZE)"]!") \
+	TEST_UNSUPPORTED("str"size"	r6, [r13, #-"__stringify(MAX_STACK_SIZE)"-8]!")			\
+	TEST_RP( "str"size"	r",4, VAL2,", [r",12, TEST_MEMORY_SIZE,", #-"__stringify(MAX_STACK_SIZE)"-8]!") \
 	TEST_UNSUPPORTED("str"size"t	r0, [r1, #4]")
 
 	SINGLE_STORE("b")
 	SINGLE_STORE("h")
 	SINGLE_STORE("")
 
+	TEST_UNSUPPORTED(__inst_thumb32(0xf801000d) "	@ strb	r0, [r1, r13]")
+	TEST_UNSUPPORTED(__inst_thumb32(0xf821000d) "	@ strh	r0, [r1, r13]")
+	TEST_UNSUPPORTED(__inst_thumb32(0xf841000d) "	@ str	r0, [r1, r13]")
+
 	TEST("str	sp, [sp]")
 	TEST_UNSUPPORTED(__inst_thumb32(0xf8cfe000) "	@ str	r14, [pc]")
 	TEST_UNSUPPORTED(__inst_thumb32(0xf8cef000) "	@ str	pc, [r14]")
diff --git a/arch/arm/probes/uprobes/Makefile b/arch/arm/probes/uprobes/Makefile
new file mode 100644
index 000000000000..e1dc3d0f6d5a
--- /dev/null
+++ b/arch/arm/probes/uprobes/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_UPROBES)		+= core.o actions-arm.o
diff --git a/arch/arm/kernel/uprobes-arm.c b/arch/arm/probes/uprobes/actions-arm.c
index d3b655ff17da..76eb44972ebe 100644
--- a/arch/arm/kernel/uprobes-arm.c
+++ b/arch/arm/probes/uprobes/actions-arm.c
@@ -13,9 +13,9 @@
 #include <linux/uprobes.h>
 #include <linux/module.h>
 
-#include "probes.h"
-#include "probes-arm.h"
-#include "uprobes.h"
+#include "../decode.h"
+#include "../decode-arm.h"
+#include "core.h"
 
 static int uprobes_substitute_pc(unsigned long *pinsn, u32 oregs)
 {
@@ -195,8 +195,6 @@ uprobe_decode_ldmstm(probes_opcode_t insn,
 }
 
 const union decode_action uprobes_probes_actions[] = {
-	[PROBES_EMULATE_NONE] = {.handler = probes_simulate_nop},
-	[PROBES_SIMULATE_NOP] = {.handler = probes_simulate_nop},
 	[PROBES_PRELOAD_IMM] = {.handler = probes_simulate_nop},
 	[PROBES_PRELOAD_REG] = {.handler = probes_simulate_nop},
 	[PROBES_BRANCH_IMM] = {.handler = simulate_blx1},
diff --git a/arch/arm/kernel/uprobes.c b/arch/arm/probes/uprobes/core.c
index 56adf9c1fde0..d1329f1ba4e4 100644
--- a/arch/arm/kernel/uprobes.c
+++ b/arch/arm/probes/uprobes/core.c
@@ -17,9 +17,9 @@
 #include <asm/opcodes.h>
 #include <asm/traps.h>
 
-#include "probes.h"
-#include "probes-arm.h"
-#include "uprobes.h"
+#include "../decode.h"
+#include "../decode-arm.h"
+#include "core.h"
 
 #define UPROBE_TRAP_NR	UINT_MAX
 
@@ -88,7 +88,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	auprobe->ixol[1] = __opcode_to_mem_arm(UPROBE_SS_ARM_INSN);
 
 	ret = arm_probes_decode_insn(insn, &auprobe->asi, false,
-				     uprobes_probes_actions);
+				     uprobes_probes_actions, NULL);
 	switch (ret) {
 	case INSN_REJECTED:
 		return -EINVAL;
diff --git a/arch/arm/kernel/uprobes.h b/arch/arm/probes/uprobes/core.h
index 1d0c12dfbd03..1d0c12dfbd03 100644
--- a/arch/arm/kernel/uprobes.h
+++ b/arch/arm/probes/uprobes/core.h
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index d3f7e4941231..2814304cec04 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -39,6 +39,7 @@ config ARM64
 	select HARDIRQS_SW_RESEND
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
 	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_BITREVERSE
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_SECCOMP_FILTER
diff --git a/arch/arm64/include/asm/bitrev.h b/arch/arm64/include/asm/bitrev.h
new file mode 100644
index 000000000000..a5a0c3660137
--- /dev/null
+++ b/arch/arm64/include/asm/bitrev.h
@@ -0,0 +1,19 @@
+#ifndef __ASM_BITREV_H
+#define __ASM_BITREV_H
+static __always_inline __attribute_const__ u32 __arch_bitrev32(u32 x)
+{
+	__asm__ ("rbit %w0, %w1" : "=r" (x) : "r" (x));
+	return x;
+}
+
+static __always_inline __attribute_const__ u16 __arch_bitrev16(u16 x)
+{
+	return __arch_bitrev32((u32)x) >> 16;
+}
+
+static __always_inline __attribute_const__ u8 __arch_bitrev8(u8 x)
+{
+	return __arch_bitrev32((u32)x) >> 24;
+}
+
+#endif
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 7c523bbf3dc8..0dd8d089c315 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -322,7 +322,8 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
  * Target instructions MUST be relocatable (checked inside)
  * This is called when new aggr(opt)probe is allocated or reused.
  */
-int arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
+int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
+				  struct kprobe *__unused)
 {
 	u8 *buf;
 	int ret;