summary refs log tree commit diff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/sysinfo.h9
-rw-r--r--arch/alpha/include/asm/thread_info.h8
-rw-r--r--arch/alpha/kernel/osf_sys.c12
-rw-r--r--arch/alpha/kernel/systbls.S2
-rw-r--r--arch/arm/Kconfig45
-rw-r--r--arch/arm/Kconfig.debug6
-rw-r--r--arch/arm/boot/compressed/.gitignore9
-rw-r--r--arch/arm/boot/compressed/Makefile32
-rw-r--r--arch/arm/boot/compressed/atags_to_fdt.c97
-rw-r--r--arch/arm/boot/compressed/head.S121
-rw-r--r--arch/arm/boot/compressed/libfdt_env.h15
-rw-r--r--arch/arm/boot/compressed/misc.c42
-rw-r--r--arch/arm/boot/compressed/mmcif-sh7372.c2
-rw-r--r--arch/arm/boot/compressed/sdhi-sh7372.c2
-rw-r--r--arch/arm/boot/compressed/string.c127
-rw-r--r--arch/arm/boot/compressed/vmlinux.lds.in4
-rw-r--r--arch/arm/common/gic.c188
-rw-r--r--arch/arm/include/asm/hardware/cache-l2x0.h11
-rw-r--r--arch/arm/include/asm/hardware/gic.h8
-rw-r--r--arch/arm/include/asm/hw_breakpoint.h2
-rw-r--r--arch/arm/include/asm/mach/map.h1
-rw-r--r--arch/arm/include/asm/pgtable.h3
-rw-r--r--arch/arm/include/asm/pmu.h97
-rw-r--r--arch/arm/include/asm/proc-fns.h8
-rw-r--r--arch/arm/include/asm/suspend.h17
-rw-r--r--arch/arm/kernel/Makefile9
-rw-r--r--arch/arm/kernel/calls.S2
-rw-r--r--arch/arm/kernel/hw_breakpoint.c275
-rw-r--r--arch/arm/kernel/kprobes-arm.c4
-rw-r--r--arch/arm/kernel/kprobes-test-arm.c1323
-rw-r--r--arch/arm/kernel/kprobes-test-thumb.c1187
-rw-r--r--arch/arm/kernel/kprobes-test.c1748
-rw-r--r--arch/arm/kernel/kprobes-test.h392
-rw-r--r--arch/arm/kernel/kprobes-thumb.c7
-rw-r--r--arch/arm/kernel/kprobes.h8
-rw-r--r--arch/arm/kernel/perf_event.c475
-rw-r--r--arch/arm/kernel/perf_event_v6.c87
-rw-r--r--arch/arm/kernel/perf_event_v7.c395
-rw-r--r--arch/arm/kernel/perf_event_xscale.c90
-rw-r--r--arch/arm/kernel/pmu.c186
-rw-r--r--arch/arm/kernel/relocate_kernel.S3
-rw-r--r--arch/arm/kernel/setup.c15
-rw-r--r--arch/arm/kernel/sleep.S85
-rw-r--r--arch/arm/kernel/smp_twd.c4
-rw-r--r--arch/arm/kernel/suspend.c72
-rw-r--r--arch/arm/mach-at91/at91sam9261.c2
-rw-r--r--arch/arm/mach-cns3xxx/include/mach/entry-macro.S1
-rw-r--r--arch/arm/mach-cns3xxx/include/mach/system.h1
-rw-r--r--arch/arm/mach-cns3xxx/include/mach/uncompress.h1
-rw-r--r--arch/arm/mach-cns3xxx/pcie.c2
-rw-r--r--arch/arm/mach-davinci/board-da850-evm.c28
-rw-r--r--arch/arm/mach-davinci/include/mach/psc.h2
-rw-r--r--arch/arm/mach-davinci/sleep.S6
-rw-r--r--arch/arm/mach-ep93xx/include/mach/ts72xx.h26
-rw-r--r--arch/arm/mach-exynos4/clock.c2
-rw-r--r--arch/arm/mach-exynos4/cpu.c11
-rw-r--r--arch/arm/mach-exynos4/include/mach/irqs.h5
-rw-r--r--arch/arm/mach-exynos4/include/mach/regs-pmu.h2
-rw-r--r--arch/arm/mach-exynos4/irq-eint.c7
-rw-r--r--arch/arm/mach-exynos4/mach-universal_c210.c4
-rw-r--r--arch/arm/mach-exynos4/setup-usb-phy.c2
-rw-r--r--arch/arm/mach-footbridge/Kconfig1
-rw-r--r--arch/arm/mach-footbridge/dc21285.c1
-rw-r--r--arch/arm/mach-imx/mach-cpuimx27.c2
-rw-r--r--arch/arm/mach-imx/mach-cpuimx35.c2
-rw-r--r--arch/arm/mach-imx/mach-eukrea_cpuimx25.c2
-rw-r--r--arch/arm/mach-integrator/integrator_ap.c6
-rw-r--r--arch/arm/mach-omap2/clock3xxx_data.c2
-rw-r--r--arch/arm/mach-omap2/clock44xx_data.c10
-rw-r--r--arch/arm/mach-omap2/clockdomain.c2
-rw-r--r--arch/arm/mach-omap2/omap_hwmod_2430_data.c1
-rw-r--r--arch/arm/mach-omap2/pm.c2
-rw-r--r--arch/arm/mach-omap2/powerdomain.c25
-rw-r--r--arch/arm/mach-orion5x/dns323-setup.c2
-rw-r--r--arch/arm/mach-orion5x/pci.c1
-rw-r--r--arch/arm/mach-prima2/clock.c1
-rw-r--r--arch/arm/mach-prima2/irq.c1
-rw-r--r--arch/arm/mach-prima2/rstc.c1
-rw-r--r--arch/arm/mach-prima2/timer.c1
-rw-r--r--arch/arm/mach-realview/include/mach/system.h1
-rw-r--r--arch/arm/mach-s3c64xx/pm.c1
-rw-r--r--arch/arm/mach-s5p64x0/irq-eint.c2
-rw-r--r--arch/arm/mach-s5pv210/pm.c2
-rw-r--r--arch/arm/mach-shmobile/board-ag5evm.c3
-rw-r--r--arch/arm/mach-shmobile/board-ap4evb.c1
-rw-r--r--arch/arm/mach-shmobile/board-mackerel.c5
-rw-r--r--arch/arm/mach-shmobile/clock-sh7372.c31
-rw-r--r--arch/arm/mach-shmobile/clock-sh73a0.c2
-rw-r--r--arch/arm/mach-shmobile/include/mach/sh7372.h4
-rw-r--r--arch/arm/mach-shmobile/intc-sh7372.c7
-rw-r--r--arch/arm/mach-shmobile/setup-sh7372.c176
-rw-r--r--arch/arm/mach-vexpress/v2m.c7
-rw-r--r--arch/arm/mm/abort-macro.S2
-rw-r--r--arch/arm/mm/cache-l2x0.c21
-rw-r--r--arch/arm/mm/init.c2
-rw-r--r--arch/arm/mm/mmu.c8
-rw-r--r--arch/arm/mm/proc-arm920.S19
-rw-r--r--arch/arm/mm/proc-arm926.S19
-rw-r--r--arch/arm/mm/proc-sa1100.S35
-rw-r--r--arch/arm/mm/proc-v6.S60
-rw-r--r--arch/arm/mm/proc-v7.S52
-rw-r--r--arch/arm/mm/proc-xsc3.S28
-rw-r--r--arch/arm/mm/proc-xscale.S25
-rw-r--r--arch/arm/plat-omap/omap_device.c6
-rw-r--r--arch/arm/plat-s5p/clock.c2
-rw-r--r--arch/arm/plat-s5p/irq-gpioint.c6
-rw-r--r--arch/arm/plat-samsung/include/plat/backlight.h2
-rw-r--r--arch/arm/plat-samsung/irq-vic-timer.c5
-rw-r--r--arch/arm/tools/mach-types6
-rw-r--r--arch/arm/vfp/vfpmodule.c31
-rw-r--r--arch/avr32/kernel/syscall_table.S2
-rw-r--r--arch/blackfin/mach-common/entry.S2
-rw-r--r--arch/cris/arch-v10/kernel/entry.S2
-rw-r--r--arch/cris/arch-v32/kernel/entry.S2
-rw-r--r--arch/cris/include/asm/serial.h9
-rw-r--r--arch/frv/kernel/entry.S2
-rw-r--r--arch/h8300/kernel/syscalls.S2
-rw-r--r--arch/ia64/kernel/entry.S2
-rw-r--r--arch/m32r/kernel/syscall_table.S2
-rw-r--r--arch/m68k/include/asm/page_mm.h2
-rw-r--r--arch/m68k/kernel/syscalltable.S2
-rw-r--r--arch/microblaze/kernel/syscall_table.S2
-rw-r--r--arch/mips/kernel/scall32-o32.S2
-rw-r--r--arch/mips/kernel/scall64-64.S2
-rw-r--r--arch/mips/kernel/scall64-n32.S2
-rw-r--r--arch/mips/kernel/scall64-o32.S2
-rw-r--r--arch/mn10300/kernel/entry.S2
-rw-r--r--arch/openrisc/include/asm/dma-mapping.h59
-rw-r--r--arch/openrisc/include/asm/sigcontext.h7
-rw-r--r--arch/openrisc/kernel/dma.c28
-rw-r--r--arch/openrisc/kernel/signal.c29
-rw-r--r--arch/parisc/kernel/syscall_table.S2
-rw-r--r--arch/powerpc/boot/dts/p1023rds.dts2
-rw-r--r--arch/powerpc/configs/85xx/p1023rds_defconfig1
-rw-r--r--arch/powerpc/configs/corenet32_smp_defconfig1
-rw-r--r--arch/powerpc/configs/corenet64_smp_defconfig5
-rw-r--r--arch/powerpc/configs/mpc85xx_defconfig1
-rw-r--r--arch/powerpc/configs/mpc85xx_smp_defconfig1
-rw-r--r--arch/powerpc/include/asm/systbl.h2
-rw-r--r--arch/powerpc/sysdev/fsl_rio.c5
-rw-r--r--arch/s390/kernel/compat_wrapper.S6
-rw-r--r--arch/s390/kernel/early.c14
-rw-r--r--arch/s390/kernel/ipl.c7
-rw-r--r--arch/s390/kernel/syscalls.S2
-rw-r--r--arch/sh/include/asm/ptrace.h2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7757.c1
-rw-r--r--arch/sh/kernel/idle.c2
-rw-r--r--arch/sh/kernel/syscalls_32.S2
-rw-r--r--arch/sh/kernel/syscalls_64.S2
-rw-r--r--arch/sh/kernel/traps_32.c37
-rw-r--r--arch/sparc/include/asm/sigcontext.h14
-rw-r--r--arch/sparc/kernel/Makefile1
-rw-r--r--arch/sparc/kernel/irq.h2
-rw-r--r--arch/sparc/kernel/setup_64.c10
-rw-r--r--arch/sparc/kernel/signal32.c184
-rw-r--r--arch/sparc/kernel/signal_32.c172
-rw-r--r--arch/sparc/kernel/signal_64.c108
-rw-r--r--arch/sparc/kernel/sigutil.h9
-rw-r--r--arch/sparc/kernel/sigutil_32.c120
-rw-r--r--arch/sparc/kernel/sigutil_64.c93
-rw-r--r--arch/sparc/kernel/sys32.S1
-rw-r--r--arch/sparc/kernel/systbls_32.S2
-rw-r--r--arch/sparc/kernel/systbls_64.S4
-rw-r--r--arch/x86/ia32/ia32entry.S2
-rw-r--r--arch/x86/include/asm/pvclock.h2
-rw-r--r--arch/x86/include/asm/unistd_64.h2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c3
-rw-r--r--arch/x86/kernel/entry_32.S8
-rw-r--r--arch/x86/kernel/syscall_table_32.S2
-rw-r--r--arch/x86/pci/acpi.c9
-rw-r--r--arch/x86/platform/mrst/mrst.c4
-rw-r--r--arch/x86/platform/olpc/olpc.c4
-rw-r--r--arch/x86/vdso/vdso32/sysenter.S2
-rw-r--r--arch/x86/xen/setup.c19
-rw-r--r--arch/x86/xen/smp.c10
-rw-r--r--arch/x86/xen/xen-asm_32.S8
-rw-r--r--arch/xtensa/include/asm/unistd.h2
179 files changed, 7530 insertions, 1500 deletions
diff --git a/arch/alpha/include/asm/sysinfo.h b/arch/alpha/include/asm/sysinfo.h
index 086aba284df2..e77d77cd07b8 100644
--- a/arch/alpha/include/asm/sysinfo.h
+++ b/arch/alpha/include/asm/sysinfo.h
@@ -27,13 +27,4 @@
 #define UAC_NOFIX			2
 #define UAC_SIGBUS			4
 
-
-#ifdef __KERNEL__
-
-/* This is the shift that is applied to the UAC bits as stored in the
-   per-thread flags.  See thread_info.h.  */
-#define UAC_SHIFT			6
-
-#endif
-
 #endif /* __ASM_ALPHA_SYSINFO_H */
diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
index 6f32f9c84a2d..ff73db022342 100644
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -74,9 +74,9 @@ register struct thread_info *__current_thread_info __asm__("$8");
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_POLLING_NRFLAG	8	/* poll_idle is polling NEED_RESCHED */
 #define TIF_DIE_IF_KERNEL	9	/* dik recursion lock */
-#define TIF_UAC_NOPRINT		10	/* see sysinfo.h */
-#define TIF_UAC_NOFIX		11
-#define TIF_UAC_SIGBUS		12
+#define TIF_UAC_NOPRINT		10	/* ! Preserve sequence of following */
+#define TIF_UAC_NOFIX		11	/* ! flags as they match            */
+#define TIF_UAC_SIGBUS		12	/* ! userspace part of 'osf_sysinfo' */
 #define TIF_MEMDIE		13	/* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK	14	/* restore signal mask in do_signal */
 #define TIF_FREEZE		16	/* is freezing for suspend */
@@ -97,7 +97,7 @@ register struct thread_info *__current_thread_info __asm__("$8");
 #define _TIF_ALLWORK_MASK	(_TIF_WORK_MASK		\
 				 | _TIF_SYSCALL_TRACE)
 
-#define ALPHA_UAC_SHIFT		10
+#define ALPHA_UAC_SHIFT		TIF_UAC_NOPRINT
 #define ALPHA_UAC_MASK		(1 << TIF_UAC_NOPRINT | 1 << TIF_UAC_NOFIX | \
 				 1 << TIF_UAC_SIGBUS)
 
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 326f0a2d56e5..01e8715e26d9 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -42,6 +42,7 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <asm/sysinfo.h>
+#include <asm/thread_info.h>
 #include <asm/hwrpb.h>
 #include <asm/processor.h>
 
@@ -633,9 +634,10 @@ SYSCALL_DEFINE5(osf_getsysinfo, unsigned long, op, void __user *, buffer,
  	case GSI_UACPROC:
 		if (nbytes < sizeof(unsigned int))
 			return -EINVAL;
- 		w = (current_thread_info()->flags >> UAC_SHIFT) & UAC_BITMASK;
- 		if (put_user(w, (unsigned int __user *)buffer))
- 			return -EFAULT;
+		w = (current_thread_info()->flags >> ALPHA_UAC_SHIFT) &
+			UAC_BITMASK;
+		if (put_user(w, (unsigned int __user *)buffer))
+			return -EFAULT;
  		return 1;
 
 	case GSI_PROC_TYPE:
@@ -756,8 +758,8 @@ SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer,
  			case SSIN_UACPROC:
 			again:
 				old = current_thread_info()->flags;
-				new = old & ~(UAC_BITMASK << UAC_SHIFT);
-				new = new | (w & UAC_BITMASK) << UAC_SHIFT;
+				new = old & ~(UAC_BITMASK << ALPHA_UAC_SHIFT);
+				new = new | (w & UAC_BITMASK) << ALPHA_UAC_SHIFT;
 				if (cmpxchg(&current_thread_info()->flags,
 					    old, new) != old)
 					goto again;
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index b9c28f3f1956..6acea1f96de3 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -360,7 +360,7 @@ sys_call_table:
 	.quad sys_newuname
 	.quad sys_nanosleep			/* 340 */
 	.quad sys_mremap
-	.quad sys_nfsservctl
+	.quad sys_ni_syscall			/* old nfsservctl */
 	.quad sys_setresuid
 	.quad sys_getresuid
 	.quad sys_pciconfig_read		/* 345 */
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index b7f7510658d6..c8d5ec8a9391 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -29,6 +29,7 @@ config ARM
 	select HAVE_GENERIC_HARDIRQS
 	select HAVE_SPARSE_IRQ
 	select GENERIC_IRQ_SHOW
+	select CPU_PM if (SUSPEND || CPU_IDLE)
 	help
 	  The ARM series is a line of low-power-consumption RISC chip designs
 	  licensed by ARM Ltd and targeted at embedded applications and
@@ -1297,6 +1298,18 @@ config ARM_ERRATA_754327
 	  This workaround defines cpu_relax() as smp_mb(), preventing correctly
 	  written polling loops from denying visibility of updates to memory.
 
+config ARM_ERRATA_364296
+	bool "ARM errata: Possible cache data corruption with hit-under-miss enabled"
+	depends on CPU_V6 && !SMP
+	help
+	  This options enables the workaround for the 364296 ARM1136
+	  r0p2 erratum (possible cache data corruption with
+	  hit-under-miss enabled). It sets the undocumented bit 31 in
+	  the auxiliary control register and the FI bit in the control
+	  register, thus disabling hit-under-miss without putting the
+	  processor into full low interrupt latency mode. ARM11MPCore
+	  is not affected.
+
 endmenu
 
 source "arch/arm/common/Kconfig"
@@ -1807,6 +1820,38 @@ config ZBOOT_ROM_SH_MOBILE_SDHI
 
 endchoice
 
+config ARM_APPENDED_DTB
+	bool "Use appended device tree blob to zImage (EXPERIMENTAL)"
+	depends on OF && !ZBOOT_ROM && EXPERIMENTAL
+	help
+	  With this option, the boot code will look for a device tree binary
+	  (DTB) appended to zImage
+	  (e.g. cat zImage <filename>.dtb > zImage_w_dtb).
+
+	  This is meant as a backward compatibility convenience for those
+	  systems with a bootloader that can't be upgraded to accommodate
+	  the documented boot protocol using a device tree.
+
+	  Beware that there is very little in terms of protection against
+	  this option being confused by leftover garbage in memory that might
+	  look like a DTB header after a reboot if no actual DTB is appended
+	  to zImage.  Do not leave this option active in a production kernel
+	  if you don't intend to always append a DTB.  Proper passing of the
+	  location into r2 of a bootloader provided DTB is always preferable
+	  to this option.
+
+config ARM_ATAG_DTB_COMPAT
+	bool "Supplement the appended DTB with traditional ATAG information"
+	depends on ARM_APPENDED_DTB
+	help
+	  Some old bootloaders can't be updated to a DTB capable one, yet
+	  they provide ATAGs with memory configuration, the ramdisk address,
+	  the kernel cmdline string, etc.  Such information is dynamically
+	  provided by the bootloader and can't always be stored in a static
+	  DTB.  To allow a device tree enabled kernel to be used with such
+	  bootloaders, this option allows zImage to extract the information
+	  from the ATAG list and store it at run time into the appended DTB.
+
 config CMDLINE
 	string "Default kernel command string"
 	default ""
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 81cbe40c159c..be3a0f78d915 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -129,4 +129,10 @@ config DEBUG_S3C_UART
 	  The uncompressor code port configuration is now handled
 	  by CONFIG_S3C_LOWLEVEL_UART_PORT.
 
+config ARM_KPROBES_TEST
+	tristate "Kprobes test module"
+	depends on KPROBES && MODULES
+	help
+	  Perform tests of kprobes API and instruction set simulation.
+
 endmenu
diff --git a/arch/arm/boot/compressed/.gitignore b/arch/arm/boot/compressed/.gitignore
index c6028967d336..e0936a148516 100644
--- a/arch/arm/boot/compressed/.gitignore
+++ b/arch/arm/boot/compressed/.gitignore
@@ -5,3 +5,12 @@ piggy.lzo
 piggy.lzma
 vmlinux
 vmlinux.lds
+
+# borrowed libfdt files
+fdt.c
+fdt.h
+fdt_ro.c
+fdt_rw.c
+fdt_wip.c
+libfdt.h
+libfdt_internal.h
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 0c74a6fab952..e4f32a8e002a 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -26,6 +26,10 @@ HEAD	= head.o
 OBJS	+= misc.o decompress.o
 FONTC	= $(srctree)/drivers/video/console/font_acorn_8x8.c
 
+# string library code (-Os is enforced to keep it much smaller)
+OBJS		+= string.o
+CFLAGS_string.o	:= -Os
+
 #
 # Architecture dependencies
 #
@@ -89,21 +93,41 @@ suffix_$(CONFIG_KERNEL_GZIP) = gzip
 suffix_$(CONFIG_KERNEL_LZO)  = lzo
 suffix_$(CONFIG_KERNEL_LZMA) = lzma
 
+# Borrowed libfdt files for the ATAG compatibility mode
+
+libfdt		:= fdt_rw.c fdt_ro.c fdt_wip.c fdt.c
+libfdt_hdrs	:= fdt.h libfdt.h libfdt_internal.h
+
+libfdt_objs	:= $(addsuffix .o, $(basename $(libfdt)))
+
+$(addprefix $(obj)/,$(libfdt) $(libfdt_hdrs)): $(obj)/%: $(srctree)/scripts/dtc/libfdt/%
+	$(call cmd,shipped)
+
+$(addprefix $(obj)/,$(libfdt_objs) atags_to_fdt.o): \
+	$(addprefix $(obj)/,$(libfdt_hdrs))
+
+ifeq ($(CONFIG_ARM_ATAG_DTB_COMPAT),y)
+OBJS	+= $(libfdt_objs) atags_to_fdt.o
+endif
+
 targets       := vmlinux vmlinux.lds \
 		 piggy.$(suffix_y) piggy.$(suffix_y).o \
-		 font.o font.c head.o misc.o $(OBJS)
+		 lib1funcs.o lib1funcs.S font.o font.c head.o misc.o $(OBJS)
 
 # Make sure files are removed during clean
-extra-y       += piggy.gzip piggy.lzo piggy.lzma lib1funcs.S
+extra-y       += piggy.gzip piggy.lzo piggy.lzma lib1funcs.S $(libfdt) $(libfdt_hdrs)
 
 ifeq ($(CONFIG_FUNCTION_TRACER),y)
 ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
 endif
 
-ccflags-y := -fpic -fno-builtin
+ccflags-y := -fpic -fno-builtin -I$(obj)
 asflags-y := -Wa,-march=all
 
+# Supply kernel BSS size to the decompressor via a linker symbol.
+KBSS_SZ = $(shell size $(obj)/../../../../vmlinux | awk 'END{print $$3}')
+LDFLAGS_vmlinux = --defsym _kernel_bss_size=$(KBSS_SZ)
 # Supply ZRELADDR to the decompressor via a linker symbol.
 ifneq ($(CONFIG_AUTO_ZRELADDR),y)
 LDFLAGS_vmlinux += --defsym zreladdr=$(ZRELADDR)
@@ -123,7 +147,7 @@ LDFLAGS_vmlinux += -T
 # For __aeabi_uidivmod
 lib1funcs = $(obj)/lib1funcs.o
 
-$(obj)/lib1funcs.S: $(srctree)/arch/$(SRCARCH)/lib/lib1funcs.S FORCE
+$(obj)/lib1funcs.S: $(srctree)/arch/$(SRCARCH)/lib/lib1funcs.S
 	$(call cmd,shipped)
 
 # We need to prevent any GOTOFF relocs being used with references
diff --git a/arch/arm/boot/compressed/atags_to_fdt.c b/arch/arm/boot/compressed/atags_to_fdt.c
new file mode 100644
index 000000000000..6ce11c481178
--- /dev/null
+++ b/arch/arm/boot/compressed/atags_to_fdt.c
@@ -0,0 +1,97 @@
+#include <asm/setup.h>
+#include <libfdt.h>
+
+static int node_offset(void *fdt, const char *node_path)
+{
+	int offset = fdt_path_offset(fdt, node_path);
+	if (offset == -FDT_ERR_NOTFOUND)
+		offset = fdt_add_subnode(fdt, 0, node_path);
+	return offset;
+}
+
+static int setprop(void *fdt, const char *node_path, const char *property,
+		   uint32_t *val_array, int size)
+{
+	int offset = node_offset(fdt, node_path);
+	if (offset < 0)
+		return offset;
+	return fdt_setprop(fdt, offset, property, val_array, size);
+}
+
+static int setprop_string(void *fdt, const char *node_path,
+			  const char *property, const char *string)
+{
+	int offset = node_offset(fdt, node_path);
+	if (offset < 0)
+		return offset;
+	return fdt_setprop_string(fdt, offset, property, string);
+}
+
+static int setprop_cell(void *fdt, const char *node_path,
+			const char *property, uint32_t val)
+{
+	int offset = node_offset(fdt, node_path);
+	if (offset < 0)
+		return offset;
+	return fdt_setprop_cell(fdt, offset, property, val);
+}
+
+/*
+ * Convert and fold provided ATAGs into the provided FDT.
+ *
+ * REturn values:
+ *    = 0 -> pretend success
+ *    = 1 -> bad ATAG (may retry with another possible ATAG pointer)
+ *    < 0 -> error from libfdt
+ */
+int atags_to_fdt(void *atag_list, void *fdt, int total_space)
+{
+	struct tag *atag = atag_list;
+	uint32_t mem_reg_property[2 * NR_BANKS];
+	int memcount = 0;
+	int ret;
+
+	/* make sure we've got an aligned pointer */
+	if ((u32)atag_list & 0x3)
+		return 1;
+
+	/* if we get a DTB here we're done already */
+	if (*(u32 *)atag_list == fdt32_to_cpu(FDT_MAGIC))
+	       return 0;
+
+	/* validate the ATAG */
+	if (atag->hdr.tag != ATAG_CORE ||
+	    (atag->hdr.size != tag_size(tag_core) &&
+	     atag->hdr.size != 2))
+		return 1;
+
+	/* let's give it all the room it could need */
+	ret = fdt_open_into(fdt, fdt, total_space);
+	if (ret < 0)
+		return ret;
+
+	for_each_tag(atag, atag_list) {
+		if (atag->hdr.tag == ATAG_CMDLINE) {
+			setprop_string(fdt, "/chosen", "bootargs",
+					atag->u.cmdline.cmdline);
+		} else if (atag->hdr.tag == ATAG_MEM) {
+			if (memcount >= sizeof(mem_reg_property)/4)
+				continue;
+			mem_reg_property[memcount++] = cpu_to_fdt32(atag->u.mem.start);
+			mem_reg_property[memcount++] = cpu_to_fdt32(atag->u.mem.size);
+		} else if (atag->hdr.tag == ATAG_INITRD2) {
+			uint32_t initrd_start, initrd_size;
+			initrd_start = atag->u.initrd.start;
+			initrd_size = atag->u.initrd.size;
+			setprop_cell(fdt, "/chosen", "linux,initrd-start",
+					initrd_start);
+			setprop_cell(fdt, "/chosen", "linux,initrd-end",
+					initrd_start + initrd_size);
+		}
+	}
+
+	if (memcount)
+		setprop(fdt, "/memory", "reg", mem_reg_property, 4*memcount);
+
+	return fdt_pack(fdt);
+}
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index e95a5989602a..9f5ac11ccd8e 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -216,6 +216,103 @@ restart:	adr	r0, LC0
 		mov	r10, r6
 #endif
 
+		mov	r5, #0			@ init dtb size to 0
+#ifdef CONFIG_ARM_APPENDED_DTB
+/*
+ *   r0  = delta
+ *   r2  = BSS start
+ *   r3  = BSS end
+ *   r4  = final kernel address
+ *   r5  = appended dtb size (still unknown)
+ *   r6  = _edata
+ *   r7  = architecture ID
+ *   r8  = atags/device tree pointer
+ *   r9  = size of decompressed image
+ *   r10 = end of this image, including  bss/stack/malloc space if non XIP
+ *   r11 = GOT start
+ *   r12 = GOT end
+ *   sp  = stack pointer
+ *
+ * if there are device trees (dtb) appended to zImage, advance r10 so that the
+ * dtb data will get relocated along with the kernel if necessary.
+ */
+
+		ldr	lr, [r6, #0]
+#ifndef __ARMEB__
+		ldr	r1, =0xedfe0dd0		@ sig is 0xd00dfeed big endian
+#else
+		ldr	r1, =0xd00dfeed
+#endif
+		cmp	lr, r1
+		bne	dtb_check_done		@ not found
+
+#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
+		/*
+		 * OK... Let's do some funky business here.
+		 * If we do have a DTB appended to zImage, and we do have
+		 * an ATAG list around, we want the later to be translated
+		 * and folded into the former here.  To be on the safe side,
+		 * let's temporarily move  the stack away into the malloc
+		 * area.  No GOT fixup has occurred yet, but none of the
+		 * code we're about to call uses any global variable.
+		*/
+		add	sp, sp, #0x10000
+		stmfd	sp!, {r0-r3, ip, lr}
+		mov	r0, r8
+		mov	r1, r6
+		sub	r2, sp, r6
+		bl	atags_to_fdt
+
+		/*
+		 * If returned value is 1, there is no ATAG at the location
+		 * pointed by r8.  Try the typical 0x100 offset from start
+		 * of RAM and hope for the best.
+		 */
+		cmp	r0, #1
+		sub	r0, r4, #(TEXT_OFFSET - 0x100)
+		mov	r1, r6
+		sub	r2, sp, r6
+		blne	atags_to_fdt
+
+		ldmfd	sp!, {r0-r3, ip, lr}
+		sub	sp, sp, #0x10000
+#endif
+
+		mov	r8, r6			@ use the appended device tree
+
+		/*
+		 * Make sure that the DTB doesn't end up in the final
+		 * kernel's .bss area. To do so, we adjust the decompressed
+		 * kernel size to compensate if that .bss size is larger
+		 * than the relocated code.
+		 */
+		ldr	r5, =_kernel_bss_size
+		adr	r1, wont_overwrite
+		sub	r1, r6, r1
+		subs	r1, r5, r1
+		addhi	r9, r9, r1
+
+		/* Get the dtb's size */
+		ldr	r5, [r6, #4]
+#ifndef __ARMEB__
+		/* convert r5 (dtb size) to little endian */
+		eor	r1, r5, r5, ror #16
+		bic	r1, r1, #0x00ff0000
+		mov	r5, r5, ror #8
+		eor	r5, r5, r1, lsr #8
+#endif
+
+		/* preserve 64-bit alignment */
+		add	r5, r5, #7
+		bic	r5, r5, #7
+
+		/* relocate some pointers past the appended dtb */
+		add	r6, r6, r5
+		add	r10, r10, r5
+		add	sp, sp, r5
+dtb_check_done:
+#endif
+
 /*
  * Check to see if we will overwrite ourselves.
  *   r4  = final kernel address
@@ -223,15 +320,14 @@ restart:	adr	r0, LC0
  *   r10 = end of this image, including  bss/stack/malloc space if non XIP
  * We basically want:
  *   r4 - 16k page directory >= r10 -> OK
- *   r4 + image length <= current position (pc) -> OK
+ *   r4 + image length <= address of wont_overwrite -> OK
  */
 		add	r10, r10, #16384
 		cmp	r4, r10
 		bhs	wont_overwrite
 		add	r10, r4, r9
-   ARM(		cmp	r10, pc		)
- THUMB(		mov	lr, pc		)
- THUMB(		cmp	r10, lr		)
+		adr	r9, wont_overwrite
+		cmp	r10, r9
 		bls	wont_overwrite
 
 /*
@@ -285,14 +381,16 @@ wont_overwrite:
  *   r2  = BSS start
  *   r3  = BSS end
  *   r4  = kernel execution address
+ *   r5  = appended dtb size (0 if not present)
  *   r7  = architecture ID
  *   r8  = atags pointer
  *   r11 = GOT start
  *   r12 = GOT end
  *   sp  = stack pointer
  */
-		teq	r0, #0
+		orrs	r1, r0, r5
 		beq	not_relocated
+
 		add	r11, r11, r0
 		add	r12, r12, r0
 
@@ -307,12 +405,21 @@ wont_overwrite:
 
 		/*
 		 * Relocate all entries in the GOT table.
+		 * Bump bss entries to _edata + dtb size
 		 */
 1:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
-		add	r1, r1, r0		@ table.  This fixes up the
-		str	r1, [r11], #4		@ C references.
+		add	r1, r1, r0		@ This fixes up C references
+		cmp	r1, r2			@ if entry >= bss_start &&
+		cmphs	r3, r1			@       bss_end > entry
+		addhi	r1, r1, r5		@    entry += dtb size
+		str	r1, [r11], #4		@ next entry
 		cmp	r11, r12
 		blo	1b
+
+		/* bump our bss pointers too */
+		add	r2, r2, r5
+		add	r3, r3, r5
+
 #else
 
 		/*
diff --git a/arch/arm/boot/compressed/libfdt_env.h b/arch/arm/boot/compressed/libfdt_env.h
new file mode 100644
index 000000000000..1f4e71876b00
--- /dev/null
+++ b/arch/arm/boot/compressed/libfdt_env.h
@@ -0,0 +1,15 @@
+#ifndef _ARM_LIBFDT_ENV_H
+#define _ARM_LIBFDT_ENV_H
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <asm/byteorder.h>
+
+#define fdt16_to_cpu(x)		be16_to_cpu(x)
+#define cpu_to_fdt16(x)		cpu_to_be16(x)
+#define fdt32_to_cpu(x)		be32_to_cpu(x)
+#define cpu_to_fdt32(x)		cpu_to_be32(x)
+#define fdt64_to_cpu(x)		be64_to_cpu(x)
+#define cpu_to_fdt64(x)		cpu_to_be64(x)
+
+#endif
diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c
index 832d37236c59..8e2a8fca5ed2 100644
--- a/arch/arm/boot/compressed/misc.c
+++ b/arch/arm/boot/compressed/misc.c
@@ -18,14 +18,9 @@
 
 unsigned int __machine_arch_type;
 
-#define _LINUX_STRING_H_
-
 #include <linux/compiler.h>	/* for inline */
-#include <linux/types.h>	/* for size_t */
-#include <linux/stddef.h>	/* for NULL */
+#include <linux/types.h>
 #include <linux/linkage.h>
-#include <asm/string.h>
-
 
 static void putstr(const char *ptr);
 extern void error(char *x);
@@ -101,41 +96,6 @@ static void putstr(const char *ptr)
 	flush();
 }
 
-
-void *memcpy(void *__dest, __const void *__src, size_t __n)
-{
-	int i = 0;
-	unsigned char *d = (unsigned char *)__dest, *s = (unsigned char *)__src;
-
-	for (i = __n >> 3; i > 0; i--) {
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-	}
-
-	if (__n & 1 << 2) {
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-	}
-
-	if (__n & 1 << 1) {
-		*d++ = *s++;
-		*d++ = *s++;
-	}
-
-	if (__n & 1)
-		*d++ = *s++;
-
-	return __dest;
-}
-
 /*
  * gzip declarations
  */
diff --git a/arch/arm/boot/compressed/mmcif-sh7372.c b/arch/arm/boot/compressed/mmcif-sh7372.c
index b6f61d9a5a1b..672ae95db5c3 100644
--- a/arch/arm/boot/compressed/mmcif-sh7372.c
+++ b/arch/arm/boot/compressed/mmcif-sh7372.c
@@ -82,7 +82,7 @@ asmlinkage void mmc_loader(unsigned char *buf, unsigned long len)
 
 
 	/* Disable clock to MMC hardware block */
-	__raw_writel(__raw_readl(SMSTPCR3) & (1 << 12), SMSTPCR3);
+	__raw_writel(__raw_readl(SMSTPCR3) | (1 << 12), SMSTPCR3);
 
 	mmc_update_progress(MMC_PROGRESS_DONE);
 }
diff --git a/arch/arm/boot/compressed/sdhi-sh7372.c b/arch/arm/boot/compressed/sdhi-sh7372.c
index d403a8b24d7f..d279294f2381 100644
--- a/arch/arm/boot/compressed/sdhi-sh7372.c
+++ b/arch/arm/boot/compressed/sdhi-sh7372.c
@@ -85,7 +85,7 @@ asmlinkage void mmc_loader(unsigned short *buf, unsigned long len)
 		goto err;
 
         /* Disable clock to SDHI1 hardware block */
-        __raw_writel(__raw_readl(SMSTPCR3) & (1 << 13), SMSTPCR3);
+        __raw_writel(__raw_readl(SMSTPCR3) | (1 << 13), SMSTPCR3);
 
 	mmc_update_progress(MMC_PROGRESS_DONE);
 
diff --git a/arch/arm/boot/compressed/string.c b/arch/arm/boot/compressed/string.c
new file mode 100644
index 000000000000..36e53ef9200f
--- /dev/null
+++ b/arch/arm/boot/compressed/string.c
@@ -0,0 +1,127 @@
+/*
+ * arch/arm/boot/compressed/string.c
+ *
+ * Small subset of simple string routines
+ */
+
+#include <linux/string.h>
+
+void *memcpy(void *__dest, __const void *__src, size_t __n)
+{
+	int i = 0;
+	unsigned char *d = (unsigned char *)__dest, *s = (unsigned char *)__src;
+
+	for (i = __n >> 3; i > 0; i--) {
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+	}
+
+	if (__n & 1 << 2) {
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+	}
+
+	if (__n & 1 << 1) {
+		*d++ = *s++;
+		*d++ = *s++;
+	}
+
+	if (__n & 1)
+		*d++ = *s++;
+
+	return __dest;
+}
+
+void *memmove(void *__dest, __const void *__src, size_t count)
+{
+	unsigned char *d = __dest;
+	const unsigned char *s = __src;
+
+	if (__dest == __src)
+		return __dest;
+
+	if (__dest < __src)
+		return memcpy(__dest, __src, count);
+
+	while (count--)
+		d[count] = s[count];
+	return __dest;
+}
+
+size_t strlen(const char *s)
+{
+	const char *sc = s;
+
+	while (*sc != '\0')
+		sc++;
+	return sc - s;
+}
+
+int memcmp(const void *cs, const void *ct, size_t count)
+{
+	const unsigned char *su1 = cs, *su2 = ct, *end = su1 + count;
+	int res = 0;
+
+	while (su1 < end) {
+		res = *su1++ - *su2++;
+		if (res)
+			break;
+	}
+	return res;
+}
+
+int strcmp(const char *cs, const char *ct)
+{
+	unsigned char c1, c2;
+	int res = 0;
+
+	do {
+		c1 = *cs++;
+		c2 = *ct++;
+		res = c1 - c2;
+		if (res)
+			break;
+	} while (c1);
+	return res;
+}
+
+void *memchr(const void *s, int c, size_t count)
+{
+	const unsigned char *p = s;
+
+	while (count--)
+		if ((unsigned char)c == *p++)
+			return (void *)(p - 1);
+	return NULL;
+}
+
+char *strchr(const char *s, int c)
+{
+	while (*s != (char)c)
+		if (*s++ == '\0')
+			return NULL;
+	return (char *)s;
+}
+
+#undef memset
+
+void *memset(void *s, int c, size_t count)
+{
+	char *xs = s;
+	while (count--)
+		*xs++ = c;
+	return s;
+}
+
+void __memzero(void *s, size_t count)
+{
+	memset(s, 0, count);
+}
diff --git a/arch/arm/boot/compressed/vmlinux.lds.in b/arch/arm/boot/compressed/vmlinux.lds.in
index 4e728834a1b9..4919f2ac8b89 100644
--- a/arch/arm/boot/compressed/vmlinux.lds.in
+++ b/arch/arm/boot/compressed/vmlinux.lds.in
@@ -51,6 +51,10 @@ SECTIONS
   _got_start = .;
   .got			: { *(.got) }
   _got_end = .;
+
+  /* ensure the zImage file size is always a multiple of 64 bits */
+  /* (without a dummy byte, ld just ignores the empty section) */
+  .pad			: { BYTE(0); . = ALIGN(8); }
   _edata = .;
 
   . = BSS_START;
diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c
index 3227ca952a12..734db99eaee7 100644
--- a/arch/arm/common/gic.c
+++ b/arch/arm/common/gic.c
@@ -26,6 +26,7 @@
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/smp.h>
+#include <linux/cpu_pm.h>
 #include <linux/cpumask.h>
 #include <linux/io.h>
 
@@ -276,6 +277,8 @@ static void __init gic_dist_init(struct gic_chip_data *gic,
 	if (gic_irqs > 1020)
 		gic_irqs = 1020;
 
+	gic->gic_irqs = gic_irqs;
+
 	/*
 	 * Set all global interrupts to be level triggered, active low.
 	 */
@@ -343,6 +346,189 @@ static void __cpuinit gic_cpu_init(struct gic_chip_data *gic)
 	writel_relaxed(1, base + GIC_CPU_CTRL);
 }
 
+#ifdef CONFIG_CPU_PM
+/*
+ * Saves the GIC distributor registers during suspend or idle.  Must be called
+ * with interrupts disabled but before powering down the GIC.  After calling
+ * this function, no interrupts will be delivered by the GIC, and another
+ * platform-specific wakeup source must be enabled.
+ */
+static void gic_dist_save(unsigned int gic_nr)
+{
+	unsigned int gic_irqs;
+	void __iomem *dist_base;
+	int i;
+
+	if (gic_nr >= MAX_GIC_NR)
+		BUG();
+
+	gic_irqs = gic_data[gic_nr].gic_irqs;
+	dist_base = gic_data[gic_nr].dist_base;
+
+	if (!dist_base)
+		return;
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 16); i++)
+		gic_data[gic_nr].saved_spi_conf[i] =
+			readl_relaxed(dist_base + GIC_DIST_CONFIG + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++)
+		gic_data[gic_nr].saved_spi_target[i] =
+			readl_relaxed(dist_base + GIC_DIST_TARGET + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 32); i++)
+		gic_data[gic_nr].saved_spi_enable[i] =
+			readl_relaxed(dist_base + GIC_DIST_ENABLE_SET + i * 4);
+}
+
+/*
+ * Restores the GIC distributor registers during resume or when coming out of
+ * idle.  Must be called before enabling interrupts.  If a level interrupt
+ * that occured while the GIC was suspended is still present, it will be
+ * handled normally, but any edge interrupts that occured will not be seen by
+ * the GIC and need to be handled by the platform-specific wakeup source.
+ */
+static void gic_dist_restore(unsigned int gic_nr)
+{
+	unsigned int gic_irqs;
+	unsigned int i;
+	void __iomem *dist_base;
+
+	if (gic_nr >= MAX_GIC_NR)
+		BUG();
+
+	gic_irqs = gic_data[gic_nr].gic_irqs;
+	dist_base = gic_data[gic_nr].dist_base;
+
+	if (!dist_base)
+		return;
+
+	writel_relaxed(0, dist_base + GIC_DIST_CTRL);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 16); i++)
+		writel_relaxed(gic_data[gic_nr].saved_spi_conf[i],
+			dist_base + GIC_DIST_CONFIG + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++)
+		writel_relaxed(0xa0a0a0a0,
+			dist_base + GIC_DIST_PRI + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++)
+		writel_relaxed(gic_data[gic_nr].saved_spi_target[i],
+			dist_base + GIC_DIST_TARGET + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 32); i++)
+		writel_relaxed(gic_data[gic_nr].saved_spi_enable[i],
+			dist_base + GIC_DIST_ENABLE_SET + i * 4);
+
+	writel_relaxed(1, dist_base + GIC_DIST_CTRL);
+}
+
+static void gic_cpu_save(unsigned int gic_nr)
+{
+	int i;
+	u32 *ptr;
+	void __iomem *dist_base;
+	void __iomem *cpu_base;
+
+	if (gic_nr >= MAX_GIC_NR)
+		BUG();
+
+	dist_base = gic_data[gic_nr].dist_base;
+	cpu_base = gic_data[gic_nr].cpu_base;
+
+	if (!dist_base || !cpu_base)
+		return;
+
+	ptr = __this_cpu_ptr(gic_data[gic_nr].saved_ppi_enable);
+	for (i = 0; i < DIV_ROUND_UP(32, 32); i++)
+		ptr[i] = readl_relaxed(dist_base + GIC_DIST_ENABLE_SET + i * 4);
+
+	ptr = __this_cpu_ptr(gic_data[gic_nr].saved_ppi_conf);
+	for (i = 0; i < DIV_ROUND_UP(32, 16); i++)
+		ptr[i] = readl_relaxed(dist_base + GIC_DIST_CONFIG + i * 4);
+
+}
+
+static void gic_cpu_restore(unsigned int gic_nr)
+{
+	int i;
+	u32 *ptr;
+	void __iomem *dist_base;
+	void __iomem *cpu_base;
+
+	if (gic_nr >= MAX_GIC_NR)
+		BUG();
+
+	dist_base = gic_data[gic_nr].dist_base;
+	cpu_base = gic_data[gic_nr].cpu_base;
+
+	if (!dist_base || !cpu_base)
+		return;
+
+	ptr = __this_cpu_ptr(gic_data[gic_nr].saved_ppi_enable);
+	for (i = 0; i < DIV_ROUND_UP(32, 32); i++)
+		writel_relaxed(ptr[i], dist_base + GIC_DIST_ENABLE_SET + i * 4);
+
+	ptr = __this_cpu_ptr(gic_data[gic_nr].saved_ppi_conf);
+	for (i = 0; i < DIV_ROUND_UP(32, 16); i++)
+		writel_relaxed(ptr[i], dist_base + GIC_DIST_CONFIG + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(32, 4); i++)
+		writel_relaxed(0xa0a0a0a0, dist_base + GIC_DIST_PRI + i * 4);
+
+	writel_relaxed(0xf0, cpu_base + GIC_CPU_PRIMASK);
+	writel_relaxed(1, cpu_base + GIC_CPU_CTRL);
+}
+
+static int gic_notifier(struct notifier_block *self, unsigned long cmd,	void *v)
+{
+	int i;
+
+	for (i = 0; i < MAX_GIC_NR; i++) {
+		switch (cmd) {
+		case CPU_PM_ENTER:
+			gic_cpu_save(i);
+			break;
+		case CPU_PM_ENTER_FAILED:
+		case CPU_PM_EXIT:
+			gic_cpu_restore(i);
+			break;
+		case CPU_CLUSTER_PM_ENTER:
+			gic_dist_save(i);
+			break;
+		case CPU_CLUSTER_PM_ENTER_FAILED:
+		case CPU_CLUSTER_PM_EXIT:
+			gic_dist_restore(i);
+			break;
+		}
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block gic_notifier_block = {
+	.notifier_call = gic_notifier,
+};
+
+static void __init gic_pm_init(struct gic_chip_data *gic)
+{
+	gic->saved_ppi_enable = __alloc_percpu(DIV_ROUND_UP(32, 32) * 4,
+		sizeof(u32));
+	BUG_ON(!gic->saved_ppi_enable);
+
+	gic->saved_ppi_conf = __alloc_percpu(DIV_ROUND_UP(32, 16) * 4,
+		sizeof(u32));
+	BUG_ON(!gic->saved_ppi_conf);
+
+	cpu_pm_register_notifier(&gic_notifier_block);
+}
+#else
+static void __init gic_pm_init(struct gic_chip_data *gic)
+{
+}
+#endif
+
 void __init gic_init(unsigned int gic_nr, unsigned int irq_start,
 	void __iomem *dist_base, void __iomem *cpu_base)
 {
@@ -358,8 +544,10 @@ void __init gic_init(unsigned int gic_nr, unsigned int irq_start,
 	if (gic_nr == 0)
 		gic_cpu_base_addr = cpu_base;
 
+	gic_chip.flags |= gic_arch_extn.flags;
 	gic_dist_init(gic, irq_start);
 	gic_cpu_init(gic);
+	gic_pm_init(gic);
 }
 
 void __cpuinit gic_secondary_init(unsigned int gic_nr)
diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h
index 16bd48031583..99a6ed7e1bfd 100644
--- a/arch/arm/include/asm/hardware/cache-l2x0.h
+++ b/arch/arm/include/asm/hardware/cache-l2x0.h
@@ -45,8 +45,13 @@
 #define L2X0_CLEAN_INV_LINE_PA		0x7F0
 #define L2X0_CLEAN_INV_LINE_IDX		0x7F8
 #define L2X0_CLEAN_INV_WAY		0x7FC
-#define L2X0_LOCKDOWN_WAY_D		0x900
-#define L2X0_LOCKDOWN_WAY_I		0x904
+/*
+ * The lockdown registers repeat 8 times for L310, the L210 has only one
+ * D and one I lockdown register at 0x0900 and 0x0904.
+ */
+#define L2X0_LOCKDOWN_WAY_D_BASE	0x900
+#define L2X0_LOCKDOWN_WAY_I_BASE	0x904
+#define L2X0_LOCKDOWN_STRIDE		0x08
 #define L2X0_TEST_OPERATION		0xF00
 #define L2X0_LINE_DATA			0xF10
 #define L2X0_LINE_TAG			0xF30
@@ -64,7 +69,7 @@
 #define L2X0_AUX_CTRL_MASK			0xc0000fff
 #define L2X0_AUX_CTRL_ASSOCIATIVITY_SHIFT	16
 #define L2X0_AUX_CTRL_WAY_SIZE_SHIFT		17
-#define L2X0_AUX_CTRL_WAY_SIZE_MASK		(0x3 << 17)
+#define L2X0_AUX_CTRL_WAY_SIZE_MASK		(0x7 << 17)
 #define L2X0_AUX_CTRL_SHARE_OVERRIDE_SHIFT	22
 #define L2X0_AUX_CTRL_NS_LOCKDOWN_SHIFT		26
 #define L2X0_AUX_CTRL_NS_INT_CTRL_SHIFT		27
diff --git a/arch/arm/include/asm/hardware/gic.h b/arch/arm/include/asm/hardware/gic.h
index 435d3f86c708..c5627057b1c7 100644
--- a/arch/arm/include/asm/hardware/gic.h
+++ b/arch/arm/include/asm/hardware/gic.h
@@ -46,6 +46,14 @@ struct gic_chip_data {
 	unsigned int irq_offset;
 	void __iomem *dist_base;
 	void __iomem *cpu_base;
+#ifdef CONFIG_CPU_PM
+	u32 saved_spi_enable[DIV_ROUND_UP(1020, 32)];
+	u32 saved_spi_conf[DIV_ROUND_UP(1020, 16)];
+	u32 saved_spi_target[DIV_ROUND_UP(1020, 4)];
+	u32 __percpu *saved_ppi_enable;
+	u32 __percpu *saved_ppi_conf;
+#endif
+	unsigned int gic_irqs;
 };
 #endif
 
diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h
index f389b2704d82..c190bc992f0e 100644
--- a/arch/arm/include/asm/hw_breakpoint.h
+++ b/arch/arm/include/asm/hw_breakpoint.h
@@ -50,6 +50,7 @@ static inline void decode_ctrl_reg(u32 reg,
 #define ARM_DEBUG_ARCH_V6_1	2
 #define ARM_DEBUG_ARCH_V7_ECP14	3
 #define ARM_DEBUG_ARCH_V7_MM	4
+#define ARM_DEBUG_ARCH_V7_1	5
 
 /* Breakpoint */
 #define ARM_BREAKPOINT_EXECUTE	0
@@ -57,6 +58,7 @@ static inline void decode_ctrl_reg(u32 reg,
 /* Watchpoints */
 #define ARM_BREAKPOINT_LOAD	1
 #define ARM_BREAKPOINT_STORE	2
+#define ARM_FSR_ACCESS_MASK	(1 << 11)
 
 /* Privilege Levels */
 #define ARM_BREAKPOINT_PRIV	1
diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h
index d2fedb5aeb1f..b36f3654bf54 100644
--- a/arch/arm/include/asm/mach/map.h
+++ b/arch/arm/include/asm/mach/map.h
@@ -29,6 +29,7 @@ struct map_desc {
 #define MT_MEMORY_NONCACHED	11
 #define MT_MEMORY_DTCM		12
 #define MT_MEMORY_ITCM		13
+#define MT_MEMORY_SO		14
 
 #ifdef CONFIG_MMU
 extern void iotable_init(struct map_desc *, int);
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 5750704e0271..f1956b27ae5a 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -232,6 +232,9 @@ extern pgprot_t		pgprot_kernel;
 #define pgprot_writecombine(prot) \
 	__pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE)
 
+#define pgprot_stronglyordered(prot) \
+	__pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_UNCACHED)
+
 #ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
 #define pgprot_dmacoherent(prot) \
 	__pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE | L_PTE_XN)
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index 67c70a31a1be..71d99b83cdb9 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -13,7 +13,12 @@
 #define __ARM_PMU_H__
 
 #include <linux/interrupt.h>
+#include <linux/perf_event.h>
 
+/*
+ * Types of PMUs that can be accessed directly and require mutual
+ * exclusion between profiling tools.
+ */
 enum arm_pmu_type {
 	ARM_PMU_DEVICE_CPU	= 0,
 	ARM_NUM_PMU_DEVICES,
@@ -37,21 +42,17 @@ struct arm_pmu_platdata {
  * reserve_pmu() - reserve the hardware performance counters
  *
  * Reserve the hardware performance counters in the system for exclusive use.
- * The platform_device for the system is returned on success, ERR_PTR()
- * encoded error on failure.
+ * Returns 0 on success or -EBUSY if the lock is already held.
  */
-extern struct platform_device *
-reserve_pmu(enum arm_pmu_type device);
+extern int
+reserve_pmu(enum arm_pmu_type type);
 
 /**
  * release_pmu() - Relinquish control of the performance counters
  *
  * Release the performance counters and allow someone else to use them.
- * Callers must have disabled the counters and released IRQs before calling
- * this. The platform_device returned from reserve_pmu() must be passed as
- * a cookie.
  */
-extern int
+extern void
 release_pmu(enum arm_pmu_type type);
 
 /**
@@ -62,30 +63,84 @@ release_pmu(enum arm_pmu_type type);
  * the actual hardware initialisation.
  */
 extern int
-init_pmu(enum arm_pmu_type device);
+init_pmu(enum arm_pmu_type type);
 
 #else /* CONFIG_CPU_HAS_PMU */
 
 #include <linux/err.h>
 
-static inline struct platform_device *
-reserve_pmu(enum arm_pmu_type device)
-{
-	return ERR_PTR(-ENODEV);
-}
-
 static inline int
-release_pmu(struct platform_device *pdev)
+reserve_pmu(enum arm_pmu_type type)
 {
 	return -ENODEV;
 }
 
-static inline int
-init_pmu(enum arm_pmu_type device)
-{
-	return -ENODEV;
-}
+static inline void
+release_pmu(enum arm_pmu_type type)	{ }
 
 #endif /* CONFIG_CPU_HAS_PMU */
 
+#ifdef CONFIG_HW_PERF_EVENTS
+
+/* The events for a given PMU register set. */
+struct pmu_hw_events {
+	/*
+	 * The events that are active on the PMU for the given index.
+	 */
+	struct perf_event	**events;
+
+	/*
+	 * A 1 bit for an index indicates that the counter is being used for
+	 * an event. A 0 means that the counter can be used.
+	 */
+	unsigned long           *used_mask;
+
+	/*
+	 * Hardware lock to serialize accesses to PMU registers. Needed for the
+	 * read/modify/write sequences.
+	 */
+	raw_spinlock_t		pmu_lock;
+};
+
+struct arm_pmu {
+	struct pmu	pmu;
+	enum arm_perf_pmu_ids id;
+	enum arm_pmu_type type;
+	cpumask_t	active_irqs;
+	const char	*name;
+	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
+	void		(*enable)(struct hw_perf_event *evt, int idx);
+	void		(*disable)(struct hw_perf_event *evt, int idx);
+	int		(*get_event_idx)(struct pmu_hw_events *hw_events,
+					 struct hw_perf_event *hwc);
+	int		(*set_event_filter)(struct hw_perf_event *evt,
+					    struct perf_event_attr *attr);
+	u32		(*read_counter)(int idx);
+	void		(*write_counter)(int idx, u32 val);
+	void		(*start)(void);
+	void		(*stop)(void);
+	void		(*reset)(void *);
+	int		(*map_event)(struct perf_event *event);
+	int		num_events;
+	atomic_t	active_events;
+	struct mutex	reserve_mutex;
+	u64		max_period;
+	struct platform_device	*plat_device;
+	struct pmu_hw_events	*(*get_hw_events)(void);
+};
+
+#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
+
+int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type);
+
+u64 armpmu_event_update(struct perf_event *event,
+			struct hw_perf_event *hwc,
+			int idx, int overflow);
+
+int armpmu_event_set_period(struct perf_event *event,
+			    struct hw_perf_event *hwc,
+			    int idx);
+
+#endif /* CONFIG_HW_PERF_EVENTS */
+
 #endif /* __ARM_PMU_H__ */
diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index 633d1cb84d87..9e92cb205e65 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -81,6 +81,10 @@ extern void cpu_dcache_clean_area(void *, int);
 extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
 extern void cpu_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
 extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
+
+/* These three are private to arch/arm/kernel/suspend.c */
+extern void cpu_do_suspend(void *);
+extern void cpu_do_resume(void *);
 #else
 #define cpu_proc_init			processor._proc_init
 #define cpu_proc_fin			processor._proc_fin
@@ -89,6 +93,10 @@ extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
 #define cpu_dcache_clean_area		processor.dcache_clean_area
 #define cpu_set_pte_ext			processor.set_pte_ext
 #define cpu_do_switch_mm		processor.switch_mm
+
+/* These three are private to arch/arm/kernel/suspend.c */
+#define cpu_do_suspend			processor.do_suspend
+#define cpu_do_resume			processor.do_resume
 #endif
 
 extern void cpu_resume(void);
diff --git a/arch/arm/include/asm/suspend.h b/arch/arm/include/asm/suspend.h
index b0e4e1a02318..1c0a551ae375 100644
--- a/arch/arm/include/asm/suspend.h
+++ b/arch/arm/include/asm/suspend.h
@@ -1,22 +1,7 @@
 #ifndef __ASM_ARM_SUSPEND_H
 #define __ASM_ARM_SUSPEND_H
 
-#include <asm/memory.h>
-#include <asm/tlbflush.h>
-
 extern void cpu_resume(void);
-
-/*
- * Hide the first two arguments to __cpu_suspend - these are an implementation
- * detail which platform code shouldn't have to know about.
- */
-static inline int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
-{
-	extern int __cpu_suspend(int, long, unsigned long,
-				 int (*)(unsigned long));
-	int ret = __cpu_suspend(0, PHYS_OFFSET - PAGE_OFFSET, arg, fn);
-	flush_tlb_all();
-	return ret;
-}
+extern int cpu_suspend(unsigned long, int (*)(unsigned long));
 
 #endif
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index f7887dc53c1f..8fa83f54c967 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -29,7 +29,7 @@ obj-$(CONFIG_MODULES)		+= armksyms.o module.o
 obj-$(CONFIG_ARTHUR)		+= arthur.o
 obj-$(CONFIG_ISA_DMA)		+= dma-isa.o
 obj-$(CONFIG_PCI)		+= bios32.o isa.o
-obj-$(CONFIG_PM_SLEEP)		+= sleep.o
+obj-$(CONFIG_PM_SLEEP)		+= sleep.o suspend.o
 obj-$(CONFIG_HAVE_SCHED_CLOCK)	+= sched_clock.o
 obj-$(CONFIG_SMP)		+= smp.o smp_tlb.o
 obj-$(CONFIG_HAVE_ARM_SCU)	+= smp_scu.o
@@ -43,6 +43,13 @@ obj-$(CONFIG_KPROBES)		+= kprobes-thumb.o
 else
 obj-$(CONFIG_KPROBES)		+= kprobes-arm.o
 endif
+obj-$(CONFIG_ARM_KPROBES_TEST)	+= test-kprobes.o
+test-kprobes-objs		:= kprobes-test.o
+ifdef CONFIG_THUMB2_KERNEL
+test-kprobes-objs		+= kprobes-test-thumb.o
+else
+test-kprobes-objs		+= kprobes-test-arm.o
+endif
 obj-$(CONFIG_ATAGS_PROC)	+= atags.o
 obj-$(CONFIG_OABI_COMPAT)	+= sys_oabi-compat.o
 obj-$(CONFIG_ARM_THUMBEE)	+= thumbee.o
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 80f7896cc016..9943e9e74a1b 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -178,7 +178,7 @@
 		CALL(sys_ni_syscall)		/* vm86 */
 		CALL(sys_ni_syscall)		/* was sys_query_module */
 		CALL(sys_poll)
-		CALL(sys_nfsservctl)
+		CALL(sys_ni_syscall)		/* was nfsservctl */
 /* 170 */	CALL(sys_setresgid16)
 		CALL(sys_getresgid16)
 		CALL(sys_prctl)
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index a927ca1f5566..814a52a9dc39 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -45,7 +45,6 @@ static DEFINE_PER_CPU(struct perf_event *, wp_on_reg[ARM_MAX_WRP]);
 
 /* Number of BRP/WRP registers on this CPU. */
 static int core_num_brps;
-static int core_num_reserved_brps;
 static int core_num_wrps;
 
 /* Debug architecture version. */
@@ -137,10 +136,11 @@ static u8 get_debug_arch(void)
 	u32 didr;
 
 	/* Do we implement the extended CPUID interface? */
-	if (WARN_ONCE((((read_cpuid_id() >> 16) & 0xf) != 0xf),
-	    "CPUID feature registers not supported. "
-	    "Assuming v6 debug is present.\n"))
+	if (((read_cpuid_id() >> 16) & 0xf) != 0xf) {
+		pr_warning("CPUID feature registers not supported. "
+			   "Assuming v6 debug is present.\n");
 		return ARM_DEBUG_ARCH_V6;
+	}
 
 	ARM_DBG_READ(c0, 0, didr);
 	return (didr >> 16) & 0xf;
@@ -154,10 +154,21 @@ u8 arch_get_debug_arch(void)
 static int debug_arch_supported(void)
 {
 	u8 arch = get_debug_arch();
-	return arch >= ARM_DEBUG_ARCH_V6 && arch <= ARM_DEBUG_ARCH_V7_ECP14;
+
+	/* We don't support the memory-mapped interface. */
+	return (arch >= ARM_DEBUG_ARCH_V6 && arch <= ARM_DEBUG_ARCH_V7_ECP14) ||
+		arch >= ARM_DEBUG_ARCH_V7_1;
+}
+
+/* Determine number of WRP registers available. */
+static int get_num_wrp_resources(void)
+{
+	u32 didr;
+	ARM_DBG_READ(c0, 0, didr);
+	return ((didr >> 28) & 0xf) + 1;
 }
 
-/* Determine number of BRP register available. */
+/* Determine number of BRP registers available. */
 static int get_num_brp_resources(void)
 {
 	u32 didr;
@@ -176,9 +187,10 @@ static int core_has_mismatch_brps(void)
 static int get_num_wrps(void)
 {
 	/*
-	 * FIXME: When a watchpoint fires, the only way to work out which
-	 * watchpoint it was is by disassembling the faulting instruction
-	 * and working out the address of the memory access.
+	 * On debug architectures prior to 7.1, when a watchpoint fires, the
+	 * only way to work out which watchpoint it was is by disassembling
+	 * the faulting instruction and working out the address of the memory
+	 * access.
 	 *
 	 * Furthermore, we can only do this if the watchpoint was precise
 	 * since imprecise watchpoints prevent us from calculating register
@@ -192,36 +204,17 @@ static int get_num_wrps(void)
 	 * [the ARM ARM states that the DFAR is UNKNOWN, but experience shows
 	 * that it is set on some implementations].
 	 */
+	if (get_debug_arch() < ARM_DEBUG_ARCH_V7_1)
+		return 1;
 
-#if 0
-	int wrps;
-	u32 didr;
-	ARM_DBG_READ(c0, 0, didr);
-	wrps = ((didr >> 28) & 0xf) + 1;
-#endif
-	int wrps = 1;
-
-	if (core_has_mismatch_brps() && wrps >= get_num_brp_resources())
-		wrps = get_num_brp_resources() - 1;
-
-	return wrps;
-}
-
-/* We reserve one breakpoint for each watchpoint. */
-static int get_num_reserved_brps(void)
-{
-	if (core_has_mismatch_brps())
-		return get_num_wrps();
-	return 0;
+	return get_num_wrp_resources();
 }
 
 /* Determine number of usable BRPs available. */
 static int get_num_brps(void)
 {
 	int brps = get_num_brp_resources();
-	if (core_has_mismatch_brps())
-		brps -= get_num_reserved_brps();
-	return brps;
+	return core_has_mismatch_brps() ? brps - 1 : brps;
 }
 
 /*
@@ -239,7 +232,7 @@ static int enable_monitor_mode(void)
 
 	/* Ensure that halting mode is disabled. */
 	if (WARN_ONCE(dscr & ARM_DSCR_HDBGEN,
-			"halting debug mode enabled. Unable to access hardware resources.\n")) {
+		"halting debug mode enabled. Unable to access hardware resources.\n")) {
 		ret = -EPERM;
 		goto out;
 	}
@@ -255,6 +248,7 @@ static int enable_monitor_mode(void)
 		ARM_DBG_WRITE(c1, 0, (dscr | ARM_DSCR_MDBGEN));
 		break;
 	case ARM_DEBUG_ARCH_V7_ECP14:
+	case ARM_DEBUG_ARCH_V7_1:
 		ARM_DBG_WRITE(c2, 2, (dscr | ARM_DSCR_MDBGEN));
 		break;
 	default:
@@ -346,24 +340,10 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
 		val_base = ARM_BASE_BVR;
 		slots = (struct perf_event **)__get_cpu_var(bp_on_reg);
 		max_slots = core_num_brps;
-		if (info->step_ctrl.enabled) {
-			/* Override the breakpoint data with the step data. */
-			addr = info->trigger & ~0x3;
-			ctrl = encode_ctrl_reg(info->step_ctrl);
-		}
 	} else {
 		/* Watchpoint */
-		if (info->step_ctrl.enabled) {
-			/* Install into the reserved breakpoint region. */
-			ctrl_base = ARM_BASE_BCR + core_num_brps;
-			val_base = ARM_BASE_BVR + core_num_brps;
-			/* Override the watchpoint data with the step data. */
-			addr = info->trigger & ~0x3;
-			ctrl = encode_ctrl_reg(info->step_ctrl);
-		} else {
-			ctrl_base = ARM_BASE_WCR;
-			val_base = ARM_BASE_WVR;
-		}
+		ctrl_base = ARM_BASE_WCR;
+		val_base = ARM_BASE_WVR;
 		slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
 		max_slots = core_num_wrps;
 	}
@@ -382,6 +362,17 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
 		goto out;
 	}
 
+	/* Override the breakpoint data with the step data. */
+	if (info->step_ctrl.enabled) {
+		addr = info->trigger & ~0x3;
+		ctrl = encode_ctrl_reg(info->step_ctrl);
+		if (info->ctrl.type != ARM_BREAKPOINT_EXECUTE) {
+			i = 0;
+			ctrl_base = ARM_BASE_BCR + core_num_brps;
+			val_base = ARM_BASE_BVR + core_num_brps;
+		}
+	}
+
 	/* Setup the address register. */
 	write_wb_reg(val_base + i, addr);
 
@@ -405,10 +396,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
 		max_slots = core_num_brps;
 	} else {
 		/* Watchpoint */
-		if (info->step_ctrl.enabled)
-			base = ARM_BASE_BCR + core_num_brps;
-		else
-			base = ARM_BASE_WCR;
+		base = ARM_BASE_WCR;
 		slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
 		max_slots = core_num_wrps;
 	}
@@ -426,6 +414,13 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
 	if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot\n"))
 		return;
 
+	/* Ensure that we disable the mismatch breakpoint. */
+	if (info->ctrl.type != ARM_BREAKPOINT_EXECUTE &&
+	    info->step_ctrl.enabled) {
+		i = 0;
+		base = ARM_BASE_BCR + core_num_brps;
+	}
+
 	/* Reset the control register. */
 	write_wb_reg(base + i, 0);
 }
@@ -632,10 +627,9 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 	 * we can use the mismatch feature as a poor-man's hardware
 	 * single-step, but this only works for per-task breakpoints.
 	 */
-	if (WARN_ONCE(!bp->overflow_handler &&
-		(arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_brps()
-		 || !bp->hw.bp_target),
-			"overflow handler required but none found\n")) {
+	if (!bp->overflow_handler && (arch_check_bp_in_kernelspace(bp) ||
+	    !core_has_mismatch_brps() || !bp->hw.bp_target)) {
+		pr_warning("overflow handler required but none found\n");
 		ret = -EINVAL;
 	}
 out:
@@ -666,34 +660,62 @@ static void disable_single_step(struct perf_event *bp)
 	arch_install_hw_breakpoint(bp);
 }
 
-static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs)
+static void watchpoint_handler(unsigned long addr, unsigned int fsr,
+			       struct pt_regs *regs)
 {
-	int i;
+	int i, access;
+	u32 val, ctrl_reg, alignment_mask;
 	struct perf_event *wp, **slots;
 	struct arch_hw_breakpoint *info;
+	struct arch_hw_breakpoint_ctrl ctrl;
 
 	slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
 
-	/* Without a disassembler, we can only handle 1 watchpoint. */
-	BUG_ON(core_num_wrps > 1);
-
 	for (i = 0; i < core_num_wrps; ++i) {
 		rcu_read_lock();
 
 		wp = slots[i];
 
-		if (wp == NULL) {
-			rcu_read_unlock();
-			continue;
-		}
+		if (wp == NULL)
+			goto unlock;
 
+		info = counter_arch_bp(wp);
 		/*
-		 * The DFAR is an unknown value. Since we only allow a
-		 * single watchpoint, we can set the trigger to the lowest
-		 * possible faulting address.
+		 * The DFAR is an unknown value on debug architectures prior
+		 * to 7.1. Since we only allow a single watchpoint on these
+		 * older CPUs, we can set the trigger to the lowest possible
+		 * faulting address.
 		 */
-		info = counter_arch_bp(wp);
-		info->trigger = wp->attr.bp_addr;
+		if (debug_arch < ARM_DEBUG_ARCH_V7_1) {
+			BUG_ON(i > 0);
+			info->trigger = wp->attr.bp_addr;
+		} else {
+			if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
+				alignment_mask = 0x7;
+			else
+				alignment_mask = 0x3;
+
+			/* Check if the watchpoint value matches. */
+			val = read_wb_reg(ARM_BASE_WVR + i);
+			if (val != (addr & ~alignment_mask))
+				goto unlock;
+
+			/* Possible match, check the byte address select. */
+			ctrl_reg = read_wb_reg(ARM_BASE_WCR + i);
+			decode_ctrl_reg(ctrl_reg, &ctrl);
+			if (!((1 << (addr & alignment_mask)) & ctrl.len))
+				goto unlock;
+
+			/* Check that the access type matches. */
+			access = (fsr & ARM_FSR_ACCESS_MASK) ? HW_BREAKPOINT_W :
+				 HW_BREAKPOINT_R;
+			if (!(access & hw_breakpoint_type(wp)))
+				goto unlock;
+
+			/* We have a winner. */
+			info->trigger = addr;
+		}
+
 		pr_debug("watchpoint fired: address = 0x%x\n", info->trigger);
 		perf_bp_event(wp, regs);
 
@@ -705,6 +727,7 @@ static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs)
 		if (!wp->overflow_handler)
 			enable_single_step(wp, instruction_pointer(regs));
 
+unlock:
 		rcu_read_unlock();
 	}
 }
@@ -717,7 +740,7 @@ static void watchpoint_single_step_handler(unsigned long pc)
 
 	slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
 
-	for (i = 0; i < core_num_reserved_brps; ++i) {
+	for (i = 0; i < core_num_wrps; ++i) {
 		rcu_read_lock();
 
 		wp = slots[i];
@@ -820,7 +843,7 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
 	case ARM_ENTRY_ASYNC_WATCHPOINT:
 		WARN(1, "Asynchronous watchpoint exception taken. Debugging results may be unreliable\n");
 	case ARM_ENTRY_SYNC_WATCHPOINT:
-		watchpoint_handler(addr, regs);
+		watchpoint_handler(addr, fsr, regs);
 		break;
 	default:
 		ret = 1; /* Unhandled fault. */
@@ -834,11 +857,31 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
 /*
  * One-time initialisation.
  */
-static void reset_ctrl_regs(void *info)
+static cpumask_t debug_err_mask;
+
+static int debug_reg_trap(struct pt_regs *regs, unsigned int instr)
 {
-	int i, cpu = smp_processor_id();
+	int cpu = smp_processor_id();
+
+	pr_warning("Debug register access (0x%x) caused undefined instruction on CPU %d\n",
+		   instr, cpu);
+
+	/* Set the error flag for this CPU and skip the faulting instruction. */
+	cpumask_set_cpu(cpu, &debug_err_mask);
+	instruction_pointer(regs) += 4;
+	return 0;
+}
+
+static struct undef_hook debug_reg_hook = {
+	.instr_mask	= 0x0fe80f10,
+	.instr_val	= 0x0e000e10,
+	.fn		= debug_reg_trap,
+};
+
+static void reset_ctrl_regs(void *unused)
+{
+	int i, raw_num_brps, err = 0, cpu = smp_processor_id();
 	u32 dbg_power;
-	cpumask_t *cpumask = info;
 
 	/*
 	 * v7 debug contains save and restore registers so that debug state
@@ -848,38 +891,57 @@ static void reset_ctrl_regs(void *info)
 	 * Access Register to avoid taking undefined instruction exceptions
 	 * later on.
 	 */
-	if (debug_arch >= ARM_DEBUG_ARCH_V7_ECP14) {
+	switch (debug_arch) {
+	case ARM_DEBUG_ARCH_V6:
+	case ARM_DEBUG_ARCH_V6_1:
+		/* ARMv6 cores just need to reset the registers. */
+		goto reset_regs;
+	case ARM_DEBUG_ARCH_V7_ECP14:
 		/*
 		 * Ensure sticky power-down is clear (i.e. debug logic is
 		 * powered up).
 		 */
 		asm volatile("mrc p14, 0, %0, c1, c5, 4" : "=r" (dbg_power));
-		if ((dbg_power & 0x1) == 0) {
-			pr_warning("CPU %d debug is powered down!\n", cpu);
-			cpumask_or(cpumask, cpumask, cpumask_of(cpu));
-			return;
-		}
-
+		if ((dbg_power & 0x1) == 0)
+			err = -EPERM;
+		break;
+	case ARM_DEBUG_ARCH_V7_1:
 		/*
-		 * Unconditionally clear the lock by writing a value
-		 * other than 0xC5ACCE55 to the access register.
+		 * Ensure the OS double lock is clear.
 		 */
-		asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0));
-		isb();
+		asm volatile("mrc p14, 0, %0, c1, c3, 4" : "=r" (dbg_power));
+		if ((dbg_power & 0x1) == 1)
+			err = -EPERM;
+		break;
+	}
 
-		/*
-		 * Clear any configured vector-catch events before
-		 * enabling monitor mode.
-		 */
-		asm volatile("mcr p14, 0, %0, c0, c7, 0" : : "r" (0));
-		isb();
+	if (err) {
+		pr_warning("CPU %d debug is powered down!\n", cpu);
+		cpumask_or(&debug_err_mask, &debug_err_mask, cpumask_of(cpu));
+		return;
 	}
 
+	/*
+	 * Unconditionally clear the lock by writing a value
+	 * other than 0xC5ACCE55 to the access register.
+	 */
+	asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0));
+	isb();
+
+	/*
+	 * Clear any configured vector-catch events before
+	 * enabling monitor mode.
+	 */
+	asm volatile("mcr p14, 0, %0, c0, c7, 0" : : "r" (0));
+	isb();
+
+reset_regs:
 	if (enable_monitor_mode())
 		return;
 
 	/* We must also reset any reserved registers. */
-	for (i = 0; i < core_num_brps + core_num_reserved_brps; ++i) {
+	raw_num_brps = get_num_brp_resources();
+	for (i = 0; i < raw_num_brps; ++i) {
 		write_wb_reg(ARM_BASE_BCR + i, 0UL);
 		write_wb_reg(ARM_BASE_BVR + i, 0UL);
 	}
@@ -895,6 +957,7 @@ static int __cpuinit dbg_reset_notify(struct notifier_block *self,
 {
 	if (action == CPU_ONLINE)
 		smp_call_function_single((int)cpu, reset_ctrl_regs, NULL, 1);
+
 	return NOTIFY_OK;
 }
 
@@ -905,7 +968,6 @@ static struct notifier_block __cpuinitdata dbg_reset_nb = {
 static int __init arch_hw_breakpoint_init(void)
 {
 	u32 dscr;
-	cpumask_t cpumask = { CPU_BITS_NONE };
 
 	debug_arch = get_debug_arch();
 
@@ -916,28 +978,31 @@ static int __init arch_hw_breakpoint_init(void)
 
 	/* Determine how many BRPs/WRPs are available. */
 	core_num_brps = get_num_brps();
-	core_num_reserved_brps = get_num_reserved_brps();
 	core_num_wrps = get_num_wrps();
 
-	pr_info("found %d breakpoint and %d watchpoint registers.\n",
-		core_num_brps + core_num_reserved_brps, core_num_wrps);
-
-	if (core_num_reserved_brps)
-		pr_info("%d breakpoint(s) reserved for watchpoint "
-				"single-step.\n", core_num_reserved_brps);
+	/*
+	 * We need to tread carefully here because DBGSWENABLE may be
+	 * driven low on this core and there isn't an architected way to
+	 * determine that.
+	 */
+	register_undef_hook(&debug_reg_hook);
 
 	/*
 	 * Reset the breakpoint resources. We assume that a halting
 	 * debugger will leave the world in a nice state for us.
 	 */
-	on_each_cpu(reset_ctrl_regs, &cpumask, 1);
-	if (!cpumask_empty(&cpumask)) {
+	on_each_cpu(reset_ctrl_regs, NULL, 1);
+	unregister_undef_hook(&debug_reg_hook);
+	if (!cpumask_empty(&debug_err_mask)) {
 		core_num_brps = 0;
-		core_num_reserved_brps = 0;
 		core_num_wrps = 0;
 		return 0;
 	}
 
+	pr_info("found %d " "%s" "breakpoint and %d watchpoint registers.\n",
+		core_num_brps, core_has_mismatch_brps() ? "(+1 reserved) " :
+		"", core_num_wrps);
+
 	ARM_DBG_READ(c1, 0, dscr);
 	if (dscr & ARM_DSCR_HDBGEN) {
 		max_watchpoint_len = 4;
diff --git a/arch/arm/kernel/kprobes-arm.c b/arch/arm/kernel/kprobes-arm.c
index 79203ee1d039..9fe8910308af 100644
--- a/arch/arm/kernel/kprobes-arm.c
+++ b/arch/arm/kernel/kprobes-arm.c
@@ -60,6 +60,7 @@
 
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
+#include <linux/module.h>
 
 #include "kprobes.h"
 
@@ -971,6 +972,9 @@ const union decode_item kprobe_decode_arm_table[] = {
 
 	DECODE_END
 };
+#ifdef CONFIG_ARM_KPROBES_TEST_MODULE
+EXPORT_SYMBOL_GPL(kprobe_decode_arm_table);
+#endif
 
 static void __kprobes arm_singlestep(struct kprobe *p, struct pt_regs *regs)
 {
diff --git a/arch/arm/kernel/kprobes-test-arm.c b/arch/arm/kernel/kprobes-test-arm.c
new file mode 100644
index 000000000000..fc82de8bdcce
--- /dev/null
+++ b/arch/arm/kernel/kprobes-test-arm.c
@@ -0,0 +1,1323 @@
+/*
+ * arch/arm/kernel/kprobes-test-arm.c
+ *
+ * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include "kprobes-test.h"
+
+
+#define TEST_ISA "32"
+
+#define TEST_ARM_TO_THUMB_INTERWORK_R(code1, reg, val, code2)	\
+	TESTCASE_START(code1 #reg code2)			\
+	TEST_ARG_REG(reg, val)					\
+	TEST_ARG_REG(14, 99f)					\
+	TEST_ARG_END("")					\
+	"50:	nop			\n\t"			\
+	"1:	"code1 #reg code2"	\n\t"			\
+	"	bx	lr		\n\t"			\
+	".thumb				\n\t"			\
+	"3:	adr	lr, 2f		\n\t"			\
+	"	bx	lr		\n\t"			\
+	".arm				\n\t"			\
+	"2:	nop			\n\t"			\
+	TESTCASE_END
+
+#define TEST_ARM_TO_THUMB_INTERWORK_P(code1, reg, val, code2)	\
+	TESTCASE_START(code1 #reg code2)			\
+	TEST_ARG_PTR(reg, val)					\
+	TEST_ARG_REG(14, 99f)					\
+	TEST_ARG_MEM(15, 3f+1)					\
+	TEST_ARG_END("")					\
+	"50:	nop			\n\t"			\
+	"1:	"code1 #reg code2"	\n\t"			\
+	"	bx	lr		\n\t"			\
+	".thumb				\n\t"			\
+	"3:	adr	lr, 2f		\n\t"			\
+	"	bx	lr		\n\t"			\
+	".arm				\n\t"			\
+	"2:	nop			\n\t"			\
+	TESTCASE_END
+
+
+void kprobe_arm_test_cases(void)
+{
+	kprobe_test_flags = 0;
+
+	TEST_GROUP("Data-processing (register), (register-shifted register), (immediate)")
+
+#define _DATA_PROCESSING_DNM(op,s,val)						\
+	TEST_RR(  op "eq" s "	r0,  r",1, VAL1,", r",2, val, "")		\
+	TEST_RR(  op "ne" s "	r1,  r",1, VAL1,", r",2, val, ", lsl #3")	\
+	TEST_RR(  op "cs" s "	r2,  r",3, VAL1,", r",2, val, ", lsr #4")	\
+	TEST_RR(  op "cc" s "	r3,  r",3, VAL1,", r",2, val, ", asr #5")	\
+	TEST_RR(  op "mi" s "	r4,  r",5, VAL1,", r",2, N(val),", asr #6")	\
+	TEST_RR(  op "pl" s "	r5,  r",5, VAL1,", r",2, val, ", ror #7")	\
+	TEST_RR(  op "vs" s "	r6,  r",7, VAL1,", r",2, val, ", rrx")		\
+	TEST_R(   op "vc" s "	r6,  r",7, VAL1,", pc, lsl #3")			\
+	TEST_R(   op "vc" s "	r6,  r",7, VAL1,", sp, lsr #4")			\
+	TEST_R(   op "vc" s "	r6,  pc, r",7, VAL1,", asr #5")			\
+	TEST_R(   op "vc" s "	r6,  sp, r",7, VAL1,", ror #6")			\
+	TEST_RRR( op "hi" s "	r8,  r",9, VAL1,", r",14,val, ", lsl r",0, 3,"")\
+	TEST_RRR( op "ls" s "	r9,  r",9, VAL1,", r",14,val, ", lsr r",7, 4,"")\
+	TEST_RRR( op "ge" s "	r10, r",11,VAL1,", r",14,val, ", asr r",7, 5,"")\
+	TEST_RRR( op "lt" s "	r11, r",11,VAL1,", r",14,N(val),", asr r",7, 6,"")\
+	TEST_RR(  op "gt" s "	r12, r13"       ", r",14,val, ", ror r",14,7,"")\
+	TEST_RR(  op "le" s "	r14, r",0, val, ", r13"       ", lsl r",14,8,"")\
+	TEST_RR(  op s "	r12, pc"        ", r",14,val, ", ror r",14,7,"")\
+	TEST_RR(  op s "	r14, r",0, val, ", pc"        ", lsl r",14,8,"")\
+	TEST_R(   op "eq" s "	r0,  r",11,VAL1,", #0xf5")			\
+	TEST_R(   op "ne" s "	r11, r",0, VAL1,", #0xf5000000")		\
+	TEST_R(   op s "	r7,  r",8, VAL2,", #0x000af000")		\
+	TEST(     op s "	r4,  pc"        ", #0x00005a00")
+
+#define DATA_PROCESSING_DNM(op,val)		\
+	_DATA_PROCESSING_DNM(op,"",val)		\
+	_DATA_PROCESSING_DNM(op,"s",val)
+
+#define DATA_PROCESSING_NM(op,val)						\
+	TEST_RR(  op "ne	r",1, VAL1,", r",2, val, "")			\
+	TEST_RR(  op "eq	r",1, VAL1,", r",2, val, ", lsl #3")		\
+	TEST_RR(  op "cc	r",3, VAL1,", r",2, val, ", lsr #4")		\
+	TEST_RR(  op "cs	r",3, VAL1,", r",2, val, ", asr #5")		\
+	TEST_RR(  op "pl	r",5, VAL1,", r",2, N(val),", asr #6")		\
+	TEST_RR(  op "mi	r",5, VAL1,", r",2, val, ", ror #7")		\
+	TEST_RR(  op "vc	r",7, VAL1,", r",2, val, ", rrx")		\
+	TEST_R (  op "vs	r",7, VAL1,", pc, lsl #3")			\
+	TEST_R (  op "vs	r",7, VAL1,", sp, lsr #4")			\
+	TEST_R(   op "vs	pc, r",7, VAL1,", asr #5")			\
+	TEST_R(   op "vs	sp, r",7, VAL1,", ror #6")			\
+	TEST_RRR( op "ls	r",9, VAL1,", r",14,val, ", lsl r",0, 3,"")	\
+	TEST_RRR( op "hi	r",9, VAL1,", r",14,val, ", lsr r",7, 4,"")	\
+	TEST_RRR( op "lt	r",11,VAL1,", r",14,val, ", asr r",7, 5,"")	\
+	TEST_RRR( op "ge	r",11,VAL1,", r",14,N(val),", asr r",7, 6,"")	\
+	TEST_RR(  op "le	r13"       ", r",14,val, ", ror r",14,7,"")	\
+	TEST_RR(  op "gt	r",0, val, ", r13"       ", lsl r",14,8,"")	\
+	TEST_RR(  op "	pc"        ", r",14,val, ", ror r",14,7,"")		\
+	TEST_RR(  op "	r",0, val, ", pc"        ", lsl r",14,8,"")		\
+	TEST_R(   op "eq	r",11,VAL1,", #0xf5")				\
+	TEST_R(   op "ne	r",0, VAL1,", #0xf5000000")			\
+	TEST_R(   op "	r",8, VAL2,", #0x000af000")
+
+#define _DATA_PROCESSING_DM(op,s,val)					\
+	TEST_R(   op "eq" s "	r0,  r",1, val, "")			\
+	TEST_R(   op "ne" s "	r1,  r",1, val, ", lsl #3")		\
+	TEST_R(   op "cs" s "	r2,  r",3, val, ", lsr #4")		\
+	TEST_R(   op "cc" s "	r3,  r",3, val, ", asr #5")		\
+	TEST_R(   op "mi" s "	r4,  r",5, N(val),", asr #6")		\
+	TEST_R(   op "pl" s "	r5,  r",5, val, ", ror #7")		\
+	TEST_R(   op "vs" s "	r6,  r",10,val, ", rrx")		\
+	TEST(     op "vs" s "	r7,  pc, lsl #3")			\
+	TEST(     op "vs" s "	r7,  sp, lsr #4")			\
+	TEST_RR(  op "vc" s "	r8,  r",7, val, ", lsl r",0, 3,"")	\
+	TEST_RR(  op "hi" s "	r9,  r",9, val, ", lsr r",7, 4,"")	\
+	TEST_RR(  op "ls" s "	r10, r",9, val, ", asr r",7, 5,"")	\
+	TEST_RR(  op "ge" s "	r11, r",11,N(val),", asr r",7, 6,"")	\
+	TEST_RR(  op "lt" s "	r12, r",11,val, ", ror r",14,7,"")	\
+	TEST_R(   op "gt" s "	r14, r13"       ", lsl r",14,8,"")	\
+	TEST_R(   op "le" s "	r14, pc"        ", lsl r",14,8,"")	\
+	TEST(     op "eq" s "	r0,  #0xf5")				\
+	TEST(     op "ne" s "	r11, #0xf5000000")			\
+	TEST(     op s "	r7,  #0x000af000")			\
+	TEST(     op s "	r4,  #0x00005a00")
+
+#define DATA_PROCESSING_DM(op,val)		\
+	_DATA_PROCESSING_DM(op,"",val)		\
+	_DATA_PROCESSING_DM(op,"s",val)
+
+	DATA_PROCESSING_DNM("and",0xf00f00ff)
+	DATA_PROCESSING_DNM("eor",0xf00f00ff)
+	DATA_PROCESSING_DNM("sub",VAL2)
+	DATA_PROCESSING_DNM("rsb",VAL2)
+	DATA_PROCESSING_DNM("add",VAL2)
+	DATA_PROCESSING_DNM("adc",VAL2)
+	DATA_PROCESSING_DNM("sbc",VAL2)
+	DATA_PROCESSING_DNM("rsc",VAL2)
+	DATA_PROCESSING_NM("tst",0xf00f00ff)
+	DATA_PROCESSING_NM("teq",0xf00f00ff)
+	DATA_PROCESSING_NM("cmp",VAL2)
+	DATA_PROCESSING_NM("cmn",VAL2)
+	DATA_PROCESSING_DNM("orr",0xf00f00ff)
+	DATA_PROCESSING_DM("mov",VAL2)
+	DATA_PROCESSING_DNM("bic",0xf00f00ff)
+	DATA_PROCESSING_DM("mvn",VAL2)
+
+	TEST("mov	ip, sp") /* This has special case emulation code */
+
+	TEST_SUPPORTED("mov	pc, #0x1000");
+	TEST_SUPPORTED("mov	sp, #0x1000");
+	TEST_SUPPORTED("cmp	pc, #0x1000");
+	TEST_SUPPORTED("cmp	sp, #0x1000");
+
+	/* Data-processing with PC as shift*/
+	TEST_UNSUPPORTED(".word 0xe15c0f1e	@ cmp	r12, r14, asl pc")
+	TEST_UNSUPPORTED(".word 0xe1a0cf1e	@ mov	r12, r14, asl pc")
+	TEST_UNSUPPORTED(".word 0xe08caf1e	@ add	r10, r12, r14, asl pc")
+
+	/* Data-processing with PC as shift*/
+	TEST_UNSUPPORTED("movs	pc, r1")
+	TEST_UNSUPPORTED("movs	pc, r1, lsl r2")
+	TEST_UNSUPPORTED("movs	pc, #0x10000")
+	TEST_UNSUPPORTED("adds	pc, lr, r1")
+	TEST_UNSUPPORTED("adds	pc, lr, r1, lsl r2")
+	TEST_UNSUPPORTED("adds	pc, lr, #4")
+
+	/* Data-processing with SP as target */
+	TEST("add	sp, sp, #16")
+	TEST("sub	sp, sp, #8")
+	TEST("bic	sp, sp, #0x20")
+	TEST("orr	sp, sp, #0x20")
+	TEST_PR( "add	sp, r",10,0,", r",11,4,"")
+	TEST_PRR("add	sp, r",10,0,", r",11,4,", asl r",12,1,"")
+	TEST_P(  "mov	sp, r",10,0,"")
+	TEST_PR( "mov	sp, r",10,0,", asl r",12,0,"")
+
+	/* Data-processing with PC as target */
+	TEST_BF(   "add	pc, pc, #2f-1b-8")
+	TEST_BF_R ("add	pc, pc, r",14,2f-1f-8,"")
+	TEST_BF_R ("add	pc, r",14,2f-1f-8,", pc")
+	TEST_BF_R ("mov	pc, r",0,2f,"")
+	TEST_BF_RR("mov	pc, r",0,2f,", asl r",1,0,"")
+	TEST_BB(   "sub	pc, pc, #1b-2b+8")
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_BB(   "sub	pc, pc, #1b-2b+8-2") /* UNPREDICTABLE before ARMv6 */
+#endif
+	TEST_BB_R( "sub	pc, pc, r",14, 1f-2f+8,"")
+	TEST_BB_R( "rsb	pc, r",14,1f-2f+8,", pc")
+	TEST_RR(   "add	pc, pc, r",10,-2,", asl r",11,1,"")
+#ifdef CONFIG_THUMB2_KERNEL
+	TEST_ARM_TO_THUMB_INTERWORK_R("add	pc, pc, r",0,3f-1f-8+1,"")
+	TEST_ARM_TO_THUMB_INTERWORK_R("sub	pc, r",0,3f+8+1,", #8")
+#endif
+	TEST_GROUP("Miscellaneous instructions")
+
+	TEST("mrs	r0, cpsr")
+	TEST("mrspl	r7, cpsr")
+	TEST("mrs	r14, cpsr")
+	TEST_UNSUPPORTED(".word 0xe10ff000	@ mrs r15, cpsr")
+	TEST_UNSUPPORTED("mrs	r0, spsr")
+	TEST_UNSUPPORTED("mrs	lr, spsr")
+
+	TEST_UNSUPPORTED("msr	cpsr, r0")
+	TEST_UNSUPPORTED("msr	cpsr_f, lr")
+	TEST_UNSUPPORTED("msr	spsr, r0")
+
+	TEST_BF_R("bx	r",0,2f,"")
+	TEST_BB_R("bx	r",7,2f,"")
+	TEST_BF_R("bxeq	r",14,2f,"")
+
+	TEST_R("clz	r0, r",0, 0x0,"")
+	TEST_R("clzeq	r7, r",14,0x1,"")
+	TEST_R("clz	lr, r",7, 0xffffffff,"")
+	TEST(  "clz	r4, sp")
+	TEST_UNSUPPORTED(".word 0x016fff10	@ clz pc, r0")
+	TEST_UNSUPPORTED(".word 0x016f0f1f	@ clz r0, pc")
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_UNSUPPORTED("bxj	r0")
+#endif
+
+	TEST_BF_R("blx	r",0,2f,"")
+	TEST_BB_R("blx	r",7,2f,"")
+	TEST_BF_R("blxeq	r",14,2f,"")
+	TEST_UNSUPPORTED(".word 0x0120003f	@ blx pc")
+
+	TEST_RR(   "qadd	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(   "qaddvs	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_R(    "qadd	lr, r",9, VAL2,", r13")
+	TEST_RR(   "qsub	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(   "qsubvs	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_R(    "qsub	lr, r",9, VAL2,", r13")
+	TEST_RR(   "qdadd	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(   "qdaddvs	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_R(    "qdadd	lr, r",9, VAL2,", r13")
+	TEST_RR(   "qdsub	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(   "qdsubvs	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_R(    "qdsub	lr, r",9, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe101f050	@ qadd pc, r0, r1")
+	TEST_UNSUPPORTED(".word 0xe121f050	@ qsub pc, r0, r1")
+	TEST_UNSUPPORTED(".word 0xe141f050	@ qdadd pc, r0, r1")
+	TEST_UNSUPPORTED(".word 0xe161f050	@ qdsub pc, r0, r1")
+	TEST_UNSUPPORTED(".word 0xe16f2050	@ qdsub r2, r0, pc")
+	TEST_UNSUPPORTED(".word 0xe161205f	@ qdsub r2, pc, r1")
+
+	TEST_UNSUPPORTED("bkpt	0xffff")
+	TEST_UNSUPPORTED("bkpt	0x0000")
+
+	TEST_UNSUPPORTED(".word 0xe1600070 @ smc #0")
+
+	TEST_GROUP("Halfword multiply and multiply-accumulate")
+
+	TEST_RRR(    "smlabb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlabbge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlabb	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe10f3281 @ smlabb pc, r1, r2, r3")
+	TEST_RRR(    "smlatb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlatbge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlatb	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe10f32a1 @ smlatb pc, r1, r2, r3")
+	TEST_RRR(    "smlabt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlabtge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlabt	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe10f32c1 @ smlabt pc, r1, r2, r3")
+	TEST_RRR(    "smlatt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlattge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlatt	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe10f32e1 @ smlatt pc, r1, r2, r3")
+
+	TEST_RRR(    "smlawb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlawbge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlawb	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe12f3281 @ smlawb pc, r1, r2, r3")
+	TEST_RRR(    "smlawt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlawtge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlawt	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe12f32c1 @ smlawt pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe12032cf @ smlawt r0, pc, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe1203fc1 @ smlawt r0, r1, pc, r3")
+	TEST_UNSUPPORTED(".word 0xe120f2c1 @ smlawt r0, r1, r2, pc")
+
+	TEST_RR(    "smulwb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulwbge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smulwb	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe12f02a1 @ smulwb pc, r1, r2")
+	TEST_RR(    "smulwt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulwtge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smulwt	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe12f02e1 @ smulwt pc, r1, r2")
+
+	TEST_RRRR(  "smlalbb	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlalbble	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlalbb	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe14f1382 @ smlalbb pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe141f382 @ smlalbb r1, pc, r2, r3")
+	TEST_RRRR(  "smlaltb	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlaltble	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlaltb	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe14f13a2 @ smlaltb pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe141f3a2 @ smlaltb r1, pc, r2, r3")
+	TEST_RRRR(  "smlalbt	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlalbtle	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlalbt	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe14f13c2 @ smlalbt pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe141f3c2 @ smlalbt r1, pc, r2, r3")
+	TEST_RRRR(  "smlaltt	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlalttle	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlaltt	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe14f13e2 @ smlalbb pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe140f3e2 @ smlalbb r0, pc, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe14013ef @ smlalbb r0, r1, pc, r3")
+	TEST_UNSUPPORTED(".word 0xe1401fe2 @ smlalbb r0, r1, r2, pc")
+
+	TEST_RR(    "smulbb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulbbge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smulbb	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe16f0281 @ smulbb pc, r1, r2")
+	TEST_RR(    "smultb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smultbge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smultb	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe16f02a1 @ smultb pc, r1, r2")
+	TEST_RR(    "smulbt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulbtge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smulbt	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe16f02c1 @ smultb pc, r1, r2")
+	TEST_RR(    "smultt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulttge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smultt	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe16f02e1 @ smultt pc, r1, r2")
+	TEST_UNSUPPORTED(".word 0xe16002ef @ smultt r0, pc, r2")
+	TEST_UNSUPPORTED(".word 0xe1600fe1 @ smultt r0, r1, pc")
+
+	TEST_GROUP("Multiply and multiply-accumulate")
+
+	TEST_RR(    "mul	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "mulls	r7, r",8, VAL2,", r",9, VAL2,"")
+	TEST_R(     "mul	lr, r",4, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe00f0291 @ mul pc, r1, r2")
+	TEST_UNSUPPORTED(".word 0xe000029f @ mul r0, pc, r2")
+	TEST_UNSUPPORTED(".word 0xe0000f91 @ mul r0, r1, pc")
+	TEST_RR(    "muls	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "mullss	r7, r",8, VAL2,", r",9, VAL2,"")
+	TEST_R(     "muls	lr, r",4, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe01f0291 @ muls pc, r1, r2")
+
+	TEST_RRR(    "mla	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "mlahi	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "mla	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe02f3291 @ mla pc, r1, r2, r3")
+	TEST_RRR(    "mlas	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "mlahis	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "mlas	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe03f3291 @ mlas pc, r1, r2, r3")
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_RR(  "umaal	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(  "umaalls	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_R(   "umaal	lr, r12, r",11,VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe041f392 @ umaal pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe04f0392 @ umaal r0, pc, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0500090 @ undef")
+	TEST_UNSUPPORTED(".word 0xe05fff9f @ undef")
+
+	TEST_RRR(  "mls		r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(  "mlshi	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(   "mls		lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe06f3291 @ mls pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe060329f @ mls r0, pc, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0603f91 @ mls r0, r1, pc, r3")
+	TEST_UNSUPPORTED(".word 0xe060f291 @ mls r0, r1, r2, pc")
+#endif
+
+	TEST_UNSUPPORTED(".word 0xe0700090 @ undef")
+	TEST_UNSUPPORTED(".word 0xe07fff9f @ undef")
+
+	TEST_RR(  "umull	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(  "umullls	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_R(   "umull	lr, r12, r",11,VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe081f392 @ umull pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe08f1392 @ umull r1, pc, r2, r3")
+	TEST_RR(  "umulls	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(  "umulllss	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_R(   "umulls	lr, r12, r",11,VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe091f392 @ umulls pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe09f1392 @ umulls r1, pc, r2, r3")
+
+	TEST_RRRR(  "umlal	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "umlalle	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "umlal	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe0af1392 @ umlal pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0a1f392 @ umlal r1, pc, r2, r3")
+	TEST_RRRR(  "umlals	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "umlalles	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "umlals	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe0bf1392 @ umlals pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0b1f392 @ umlals r1, pc, r2, r3")
+
+	TEST_RR(  "smull	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(  "smullls	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_R(   "smull	lr, r12, r",11,VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe0c1f392 @ smull pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0cf1392 @ smull r1, pc, r2, r3")
+	TEST_RR(  "smulls	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(  "smulllss	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_R(   "smulls	lr, r12, r",11,VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe0d1f392 @ smulls pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0df1392 @ smulls r1, pc, r2, r3")
+
+	TEST_RRRR(  "smlal	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlalle	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlal	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe0ef1392 @ smlal pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0e1f392 @ smlal r1, pc, r2, r3")
+	TEST_RRRR(  "smlals	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlalles	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlals	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe0ff1392 @ smlals pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0f0f392 @ smlals r0, pc, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0f0139f @ smlals r0, r1, pc, r3")
+	TEST_UNSUPPORTED(".word 0xe0f01f92 @ smlals r0, r1, r2, pc")
+
+	TEST_GROUP("Synchronization primitives")
+
+	/*
+	 * Use hard coded constants for SWP instructions to avoid warnings
+	 * about deprecated instructions.
+	 */
+	TEST_RP( ".word 0xe108e097 @ swp	lr, r",7,VAL2,", [r",8,0,"]")
+	TEST_R(  ".word 0x610d0091 @ swpvs	r0, r",1,VAL1,", [sp]")
+	TEST_RP( ".word 0xe10cd09e @ swp	sp, r",14,VAL2,", [r",12,13*4,"]")
+	TEST_UNSUPPORTED(".word 0xe102f091 @ swp pc, r1, [r2]")
+	TEST_UNSUPPORTED(".word 0xe102009f @ swp r0, pc, [r2]")
+	TEST_UNSUPPORTED(".word 0xe10f0091 @ swp r0, r1, [pc]")
+	TEST_RP( ".word 0xe148e097 @ swpb	lr, r",7,VAL2,", [r",8,0,"]")
+	TEST_R(  ".word 0x614d0091 @ swpvsb	r0, r",1,VAL1,", [sp]")
+	TEST_UNSUPPORTED(".word 0xe142f091 @ swpb pc, r1, [r2]")
+
+	TEST_UNSUPPORTED(".word	0xe1100090") /* Unallocated space */
+	TEST_UNSUPPORTED(".word	0xe1200090") /* Unallocated space */
+	TEST_UNSUPPORTED(".word	0xe1300090") /* Unallocated space */
+	TEST_UNSUPPORTED(".word	0xe1500090") /* Unallocated space */
+	TEST_UNSUPPORTED(".word	0xe1600090") /* Unallocated space */
+	TEST_UNSUPPORTED(".word	0xe1700090") /* Unallocated space */
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_UNSUPPORTED("ldrex	r2, [sp]")
+	TEST_UNSUPPORTED("strexd	r0, r2, r3, [sp]")
+	TEST_UNSUPPORTED("ldrexd	r2, r3, [sp]")
+	TEST_UNSUPPORTED("strexb	r0, r2, [sp]")
+	TEST_UNSUPPORTED("ldrexb	r2, [sp]")
+	TEST_UNSUPPORTED("strexh	r0, r2, [sp]")
+	TEST_UNSUPPORTED("ldrexh	r2, [sp]")
+#endif
+	TEST_GROUP("Extra load/store instructions")
+
+	TEST_RPR(  "strh	r",0, VAL1,", [r",1, 48,", -r",2, 24,"]")
+	TEST_RPR(  "streqh	r",14,VAL2,", [r",13,0, ", r",12, 48,"]")
+	TEST_RPR(  "strh	r",1, VAL1,", [r",2, 24,", r",3,  48,"]!")
+	TEST_RPR(  "strneh	r",12,VAL2,", [r",11,48,", -r",10,24,"]!")
+	TEST_RPR(  "strh	r",2, VAL1,", [r",3, 24,"], r",4, 48,"")
+	TEST_RPR(  "strh	r",10,VAL2,", [r",9, 48,"], -r",11,24,"")
+	TEST_UNSUPPORTED(".word 0xe1afc0ba	@ strh r12, [pc, r10]!")
+	TEST_UNSUPPORTED(".word 0xe089f0bb	@ strh pc, [r9], r11")
+	TEST_UNSUPPORTED(".word 0xe089a0bf	@ strh r10, [r9], pc")
+
+	TEST_PR(   "ldrh	r0, [r",0,  48,", -r",2, 24,"]")
+	TEST_PR(   "ldrcsh	r14, [r",13,0, ", r",12, 48,"]")
+	TEST_PR(   "ldrh	r1, [r",2,  24,", r",3,  48,"]!")
+	TEST_PR(   "ldrcch	r12, [r",11,48,", -r",10,24,"]!")
+	TEST_PR(   "ldrh	r2, [r",3,  24,"], r",4, 48,"")
+	TEST_PR(   "ldrh	r10, [r",9, 48,"], -r",11,24,"")
+	TEST_UNSUPPORTED(".word 0xe1bfc0ba	@ ldrh r12, [pc, r10]!")
+	TEST_UNSUPPORTED(".word 0xe099f0bb	@ ldrh pc, [r9], r11")
+	TEST_UNSUPPORTED(".word 0xe099a0bf	@ ldrh r10, [r9], pc")
+
+	TEST_RP(   "strh	r",0, VAL1,", [r",1, 24,", #-2]")
+	TEST_RP(   "strmih	r",14,VAL2,", [r",13,0, ", #2]")
+	TEST_RP(   "strh	r",1, VAL1,", [r",2, 24,", #4]!")
+	TEST_RP(   "strplh	r",12,VAL2,", [r",11,24,", #-4]!")
+	TEST_RP(   "strh	r",2, VAL1,", [r",3, 24,"], #48")
+	TEST_RP(   "strh	r",10,VAL2,", [r",9, 64,"], #-48")
+	TEST_UNSUPPORTED(".word 0xe1efc3b0	@ strh r12, [pc, #48]!")
+	TEST_UNSUPPORTED(".word 0xe0c9f3b0	@ strh pc, [r9], #48")
+
+	TEST_P(	   "ldrh	r0, [r",0,  24,", #-2]")
+	TEST_P(	   "ldrvsh	r14, [r",13,0, ", #2]")
+	TEST_P(	   "ldrh	r1, [r",2,  24,", #4]!")
+	TEST_P(	   "ldrvch	r12, [r",11,24,", #-4]!")
+	TEST_P(	   "ldrh	r2, [r",3,  24,"], #48")
+	TEST_P(	   "ldrh	r10, [r",9, 64,"], #-48")
+	TEST(      "ldrh	r0, [pc, #0]")
+	TEST_UNSUPPORTED(".word 0xe1ffc3b0	@ ldrh r12, [pc, #48]!")
+	TEST_UNSUPPORTED(".word 0xe0d9f3b0	@ ldrh pc, [r9], #48")
+
+	TEST_PR(   "ldrsb	r0, [r",0,  48,", -r",2, 24,"]")
+	TEST_PR(   "ldrhisb	r14, [r",13,0,", r",12,  48,"]")
+	TEST_PR(   "ldrsb	r1, [r",2,  24,", r",3,  48,"]!")
+	TEST_PR(   "ldrlssb	r12, [r",11,48,", -r",10,24,"]!")
+	TEST_PR(   "ldrsb	r2, [r",3,  24,"], r",4, 48,"")
+	TEST_PR(   "ldrsb	r10, [r",9, 48,"], -r",11,24,"")
+	TEST_UNSUPPORTED(".word 0xe1bfc0da	@ ldrsb r12, [pc, r10]!")
+	TEST_UNSUPPORTED(".word 0xe099f0db	@ ldrsb pc, [r9], r11")
+
+	TEST_P(	   "ldrsb	r0, [r",0,  24,", #-1]")
+	TEST_P(	   "ldrgesb	r14, [r",13,0, ", #1]")
+	TEST_P(	   "ldrsb	r1, [r",2,  24,", #4]!")
+	TEST_P(	   "ldrltsb	r12, [r",11,24,", #-4]!")
+	TEST_P(	   "ldrsb	r2, [r",3,  24,"], #48")
+	TEST_P(	   "ldrsb	r10, [r",9, 64,"], #-48")
+	TEST(      "ldrsb	r0, [pc, #0]")
+	TEST_UNSUPPORTED(".word 0xe1ffc3d0	@ ldrsb r12, [pc, #48]!")
+	TEST_UNSUPPORTED(".word 0xe0d9f3d0	@ ldrsb pc, [r9], #48")
+
+	TEST_PR(   "ldrsh	r0, [r",0,  48,", -r",2, 24,"]")
+	TEST_PR(   "ldrgtsh	r14, [r",13,0, ", r",12, 48,"]")
+	TEST_PR(   "ldrsh	r1, [r",2,  24,", r",3,  48,"]!")
+	TEST_PR(   "ldrlesh	r12, [r",11,48,", -r",10,24,"]!")
+	TEST_PR(   "ldrsh	r2, [r",3,  24,"], r",4, 48,"")
+	TEST_PR(   "ldrsh	r10, [r",9, 48,"], -r",11,24,"")
+	TEST_UNSUPPORTED(".word 0xe1bfc0fa	@ ldrsh r12, [pc, r10]!")
+	TEST_UNSUPPORTED(".word 0xe099f0fb	@ ldrsh pc, [r9], r11")
+
+	TEST_P(	   "ldrsh	r0, [r",0,  24,", #-1]")
+	TEST_P(	   "ldreqsh	r14, [r",13,0 ,", #1]")
+	TEST_P(	   "ldrsh	r1, [r",2,  24,", #4]!")
+	TEST_P(	   "ldrnesh	r12, [r",11,24,", #-4]!")
+	TEST_P(	   "ldrsh	r2, [r",3,  24,"], #48")
+	TEST_P(	   "ldrsh	r10, [r",9, 64,"], #-48")
+	TEST(      "ldrsh	r0, [pc, #0]")
+	TEST_UNSUPPORTED(".word 0xe1ffc3f0	@ ldrsh r12, [pc, #48]!")
+	TEST_UNSUPPORTED(".word 0xe0d9f3f0	@ ldrsh pc, [r9], #48")
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_UNSUPPORTED("strht	r1, [r2], r3")
+	TEST_UNSUPPORTED("ldrht	r1, [r2], r3")
+	TEST_UNSUPPORTED("strht	r1, [r2], #48")
+	TEST_UNSUPPORTED("ldrht	r1, [r2], #48")
+	TEST_UNSUPPORTED("ldrsbt	r1, [r2], r3")
+	TEST_UNSUPPORTED("ldrsbt	r1, [r2], #48")
+	TEST_UNSUPPORTED("ldrsht	r1, [r2], r3")
+	TEST_UNSUPPORTED("ldrsht	r1, [r2], #48")
+#endif
+
+	TEST_RPR(  "strd	r",0, VAL1,", [r",1, 48,", -r",2,24,"]")
+	TEST_RPR(  "strccd	r",8, VAL2,", [r",13,0, ", r",12,48,"]")
+	TEST_RPR(  "strd	r",4, VAL1,", [r",2, 24,", r",3, 48,"]!")
+	TEST_RPR(  "strcsd	r",12,VAL2,", [r",11,48,", -r",10,24,"]!")
+	TEST_RPR(  "strd	r",2, VAL1,", [r",3, 24,"], r",4,48,"")
+	TEST_RPR(  "strd	r",10,VAL2,", [r",9, 48,"], -r",7,24,"")
+	TEST_UNSUPPORTED(".word 0xe1afc0fa	@ strd r12, [pc, r10]!")
+
+	TEST_PR(   "ldrd	r0, [r",0, 48,", -r",2,24,"]")
+	TEST_PR(   "ldrmid	r8, [r",13,0, ", r",12,48,"]")
+	TEST_PR(   "ldrd	r4, [r",2, 24,", r",3, 48,"]!")
+	TEST_PR(   "ldrpld	r6, [r",11,48,", -r",10,24,"]!")
+	TEST_PR(   "ldrd	r2, [r",5, 24,"], r",4,48,"")
+	TEST_PR(   "ldrd	r10, [r",9,48,"], -r",7,24,"")
+	TEST_UNSUPPORTED(".word 0xe1afc0da	@ ldrd r12, [pc, r10]!")
+	TEST_UNSUPPORTED(".word 0xe089f0db	@ ldrd pc, [r9], r11")
+	TEST_UNSUPPORTED(".word 0xe089e0db	@ ldrd lr, [r9], r11")
+	TEST_UNSUPPORTED(".word 0xe089c0df	@ ldrd r12, [r9], pc")
+
+	TEST_RP(   "strd	r",0, VAL1,", [r",1, 24,", #-8]")
+	TEST_RP(   "strvsd	r",8, VAL2,", [r",13,0, ", #8]")
+	TEST_RP(   "strd	r",4, VAL1,", [r",2, 24,", #16]!")
+	TEST_RP(   "strvcd	r",12,VAL2,", [r",11,24,", #-16]!")
+	TEST_RP(   "strd	r",2, VAL1,", [r",4, 24,"], #48")
+	TEST_RP(   "strd	r",10,VAL2,", [r",9, 64,"], #-48")
+	TEST_UNSUPPORTED(".word 0xe1efc3f0	@ strd r12, [pc, #48]!")
+
+	TEST_P(	   "ldrd	r0, [r",0, 24,", #-8]")
+	TEST_P(	   "ldrhid	r8, [r",13,0, ", #8]")
+	TEST_P(	   "ldrd	r4, [r",2, 24,", #16]!")
+	TEST_P(	   "ldrlsd	r6, [r",11,24,", #-16]!")
+	TEST_P(	   "ldrd	r2, [r",5, 24,"], #48")
+	TEST_P(	   "ldrd	r10, [r",9,6,"], #-48")
+	TEST_UNSUPPORTED(".word 0xe1efc3d0	@ ldrd r12, [pc, #48]!")
+	TEST_UNSUPPORTED(".word 0xe0c9f3d0	@ ldrd pc, [r9], #48")
+	TEST_UNSUPPORTED(".word 0xe0c9e3d0	@ ldrd lr, [r9], #48")
+
+	TEST_GROUP("Miscellaneous")
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST("movw	r0, #0")
+	TEST("movw	r0, #0xffff")
+	TEST("movw	lr, #0xffff")
+	TEST_UNSUPPORTED(".word 0xe300f000	@ movw pc, #0")
+	TEST_R("movt	r",0, VAL1,", #0")
+	TEST_R("movt	r",0, VAL2,", #0xffff")
+	TEST_R("movt	r",14,VAL1,", #0xffff")
+	TEST_UNSUPPORTED(".word 0xe340f000	@ movt pc, #0")
+#endif
+
+	TEST_UNSUPPORTED("msr	cpsr, 0x13")
+	TEST_UNSUPPORTED("msr	cpsr_f, 0xf0000000")
+	TEST_UNSUPPORTED("msr	spsr, 0x13")
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_SUPPORTED("yield")
+	TEST("sev")
+	TEST("nop")
+	TEST("wfi")
+	TEST_SUPPORTED("wfe")
+	TEST_UNSUPPORTED("dbg #0")
+#endif
+
+	TEST_GROUP("Load/store word and unsigned byte")
+
+#define LOAD_STORE(byte)							\
+	TEST_RP( "str"byte"	r",0, VAL1,", [r",1, 24,", #-2]")		\
+	TEST_RP( "str"byte"	r",14,VAL2,", [r",13,0, ", #2]")		\
+	TEST_RP( "str"byte"	r",1, VAL1,", [r",2, 24,", #4]!")		\
+	TEST_RP( "str"byte"	r",12,VAL2,", [r",11,24,", #-4]!")		\
+	TEST_RP( "str"byte"	r",2, VAL1,", [r",3, 24,"], #48")		\
+	TEST_RP( "str"byte"	r",10,VAL2,", [r",9, 64,"], #-48")		\
+	TEST_RPR("str"byte"	r",0, VAL1,", [r",1, 48,", -r",2, 24,"]")	\
+	TEST_RPR("str"byte"	r",14,VAL2,", [r",13,0, ", r",12, 48,"]")	\
+	TEST_RPR("str"byte"	r",1, VAL1,", [r",2, 24,", r",3,  48,"]!")	\
+	TEST_RPR("str"byte"	r",12,VAL2,", [r",11,48,", -r",10,24,"]!")	\
+	TEST_RPR("str"byte"	r",2, VAL1,", [r",3, 24,"], r",4, 48,"")	\
+	TEST_RPR("str"byte"	r",10,VAL2,", [r",9, 48,"], -r",11,24,"")	\
+	TEST_RPR("str"byte"	r",0, VAL1,", [r",1, 24,", r",2,  32,", asl #1]")\
+	TEST_RPR("str"byte"	r",14,VAL2,", [r",13,0, ", r",12, 32,", lsr #2]")\
+	TEST_RPR("str"byte"	r",1, VAL1,", [r",2, 24,", r",3,  32,", asr #3]!")\
+	TEST_RPR("str"byte"	r",12,VAL2,", [r",11,24,", r",10, 4,", ror #31]!")\
+	TEST_P(  "ldr"byte"	r0, [r",0,  24,", #-2]")			\
+	TEST_P(  "ldr"byte"	r14, [r",13,0, ", #2]")				\
+	TEST_P(  "ldr"byte"	r1, [r",2,  24,", #4]!")			\
+	TEST_P(  "ldr"byte"	r12, [r",11,24,", #-4]!")			\
+	TEST_P(  "ldr"byte"	r2, [r",3,  24,"], #48")			\
+	TEST_P(  "ldr"byte"	r10, [r",9, 64,"], #-48")			\
+	TEST_PR( "ldr"byte"	r0, [r",0,  48,", -r",2, 24,"]")		\
+	TEST_PR( "ldr"byte"	r14, [r",13,0, ", r",12, 48,"]")		\
+	TEST_PR( "ldr"byte"	r1, [r",2,  24,", r",3, 48,"]!")		\
+	TEST_PR( "ldr"byte"	r12, [r",11,48,", -r",10,24,"]!")		\
+	TEST_PR( "ldr"byte"	r2, [r",3,  24,"], r",4, 48,"")			\
+	TEST_PR( "ldr"byte"	r10, [r",9, 48,"], -r",11,24,"")		\
+	TEST_PR( "ldr"byte"	r0, [r",0,  24,", r",2,  32,", asl #1]")	\
+	TEST_PR( "ldr"byte"	r14, [r",13,0, ", r",12, 32,", lsr #2]")	\
+	TEST_PR( "ldr"byte"	r1, [r",2,  24,", r",3,  32,", asr #3]!")	\
+	TEST_PR( "ldr"byte"	r12, [r",11,24,", r",10, 4,", ror #31]!")	\
+	TEST(    "ldr"byte"	r0, [pc, #0]")					\
+	TEST_R(  "ldr"byte"	r12, [pc, r",14,0,"]")
+
+	LOAD_STORE("")
+	TEST_P(   "str	pc, [r",0,0,", #15*4]")
+	TEST_R(   "str	pc, [sp, r",2,15*4,"]")
+	TEST_BF(  "ldr	pc, [sp, #15*4]")
+	TEST_BF_R("ldr	pc, [sp, r",2,15*4,"]")
+
+	TEST_P(   "str	sp, [r",0,0,", #13*4]")
+	TEST_R(   "str	sp, [sp, r",2,13*4,"]")
+	TEST_BF(  "ldr	sp, [sp, #13*4]")
+	TEST_BF_R("ldr	sp, [sp, r",2,13*4,"]")
+
+#ifdef CONFIG_THUMB2_KERNEL
+	TEST_ARM_TO_THUMB_INTERWORK_P("ldr	pc, [r",0,0,", #15*4]")
+#endif
+	TEST_UNSUPPORTED(".word 0xe5af6008	@ str r6, [pc, #8]!")
+	TEST_UNSUPPORTED(".word 0xe7af6008	@ str r6, [pc, r8]!")
+	TEST_UNSUPPORTED(".word 0xe5bf6008	@ ldr r6, [pc, #8]!")
+	TEST_UNSUPPORTED(".word 0xe7bf6008	@ ldr r6, [pc, r8]!")
+	TEST_UNSUPPORTED(".word 0xe788600f	@ str r6, [r8, pc]")
+	TEST_UNSUPPORTED(".word 0xe798600f	@ ldr r6, [r8, pc]")
+
+	LOAD_STORE("b")
+	TEST_UNSUPPORTED(".word 0xe5f7f008	@ ldrb pc, [r7, #8]!")
+	TEST_UNSUPPORTED(".word 0xe7f7f008	@ ldrb pc, [r7, r8]!")
+	TEST_UNSUPPORTED(".word 0xe5ef6008	@ strb r6, [pc, #8]!")
+	TEST_UNSUPPORTED(".word 0xe7ef6008	@ strb r6, [pc, r3]!")
+	TEST_UNSUPPORTED(".word 0xe5ff6008	@ ldrb r6, [pc, #8]!")
+	TEST_UNSUPPORTED(".word 0xe7ff6008	@ ldrb r6, [pc, r3]!")
+
+	TEST_UNSUPPORTED("ldrt	r0, [r1], #4")
+	TEST_UNSUPPORTED("ldrt	r1, [r2], r3")
+	TEST_UNSUPPORTED("strt	r2, [r3], #4")
+	TEST_UNSUPPORTED("strt	r3, [r4], r5")
+	TEST_UNSUPPORTED("ldrbt	r4, [r5], #4")
+	TEST_UNSUPPORTED("ldrbt	r5, [r6], r7")
+	TEST_UNSUPPORTED("strbt	r6, [r7], #4")
+	TEST_UNSUPPORTED("strbt	r7, [r8], r9")
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_GROUP("Parallel addition and subtraction, signed")
+
+	TEST_UNSUPPORTED(".word 0xe6000010") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe60fffff") /* Unallocated space */
+
+	TEST_RR(    "sadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cff1a	@ sadd16	pc, r12, r10")
+	TEST_RR(    "sasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cff3a	@ sasx	pc, r12, r10")
+	TEST_RR(    "ssax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "ssax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cff5a	@ ssax	pc, r12, r10")
+	TEST_RR(    "ssub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "ssub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cff7a	@ ssub16	pc, r12, r10")
+	TEST_RR(    "sadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cff9a	@ sadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe61000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe61fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe61000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe61fffdf") /* Unallocated space */
+	TEST_RR(    "ssub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "ssub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cfffa	@ ssub8	pc, r12, r10")
+
+	TEST_RR(    "qadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cff1a	@ qadd16	pc, r12, r10")
+	TEST_RR(    "qasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cff3a	@ qasx	pc, r12, r10")
+	TEST_RR(    "qsax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qsax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cff5a	@ qsax	pc, r12, r10")
+	TEST_RR(    "qsub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qsub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cff7a	@ qsub16	pc, r12, r10")
+	TEST_RR(    "qadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cff9a	@ qadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe62000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe62fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe62000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe62fffdf") /* Unallocated space */
+	TEST_RR(    "qsub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qsub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cfffa	@ qsub8	pc, r12, r10")
+
+	TEST_RR(    "shadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cff1a	@ shadd16	pc, r12, r10")
+	TEST_RR(    "shasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cff3a	@ shasx	pc, r12, r10")
+	TEST_RR(    "shsax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shsax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cff5a	@ shsax	pc, r12, r10")
+	TEST_RR(    "shsub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shsub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cff7a	@ shsub16	pc, r12, r10")
+	TEST_RR(    "shadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cff9a	@ shadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe63000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe63fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe63000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe63fffdf") /* Unallocated space */
+	TEST_RR(    "shsub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shsub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cfffa	@ shsub8	pc, r12, r10")
+
+	TEST_GROUP("Parallel addition and subtraction, unsigned")
+
+	TEST_UNSUPPORTED(".word 0xe6400010") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe64fffff") /* Unallocated space */
+
+	TEST_RR(    "uadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cff1a	@ uadd16	pc, r12, r10")
+	TEST_RR(    "uasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cff3a	@ uasx	pc, r12, r10")
+	TEST_RR(    "usax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "usax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cff5a	@ usax	pc, r12, r10")
+	TEST_RR(    "usub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "usub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cff7a	@ usub16	pc, r12, r10")
+	TEST_RR(    "uadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cff9a	@ uadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe65000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe65fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe65000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe65fffdf") /* Unallocated space */
+	TEST_RR(    "usub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "usub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cfffa	@ usub8	pc, r12, r10")
+
+	TEST_RR(    "uqadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cff1a	@ uqadd16	pc, r12, r10")
+	TEST_RR(    "uqasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cff3a	@ uqasx	pc, r12, r10")
+	TEST_RR(    "uqsax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqsax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cff5a	@ uqsax	pc, r12, r10")
+	TEST_RR(    "uqsub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqsub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cff7a	@ uqsub16	pc, r12, r10")
+	TEST_RR(    "uqadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cff9a	@ uqadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe66000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe66fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe66000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe66fffdf") /* Unallocated space */
+	TEST_RR(    "uqsub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqsub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cfffa	@ uqsub8	pc, r12, r10")
+
+	TEST_RR(    "uhadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cff1a	@ uhadd16	pc, r12, r10")
+	TEST_RR(    "uhasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cff3a	@ uhasx	pc, r12, r10")
+	TEST_RR(    "uhsax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhsax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cff5a	@ uhsax	pc, r12, r10")
+	TEST_RR(    "uhsub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhsub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cff7a	@ uhsub16	pc, r12, r10")
+	TEST_RR(    "uhadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cff9a	@ uhadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe67000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe67fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe67000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe67fffdf") /* Unallocated space */
+	TEST_RR(    "uhsub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhsub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cfffa	@ uhsub8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe67feffa	@ uhsub8	r14, pc, r10")
+	TEST_UNSUPPORTED(".word 0xe67cefff	@ uhsub8	r14, r12, pc")
+#endif /* __LINUX_ARM_ARCH__ >= 7 */
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_GROUP("Packing, unpacking, saturation, and reversal")
+
+	TEST_RR(    "pkhbt	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "pkhbt	r14,r",12, HH1,", r",10,HH2,", lsl #2")
+	TEST_UNSUPPORTED(".word 0xe68cf11a	@ pkhbt	pc, r12, r10, lsl #2")
+	TEST_RR(    "pkhtb	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "pkhtb	r14,r",12, HH1,", r",10,HH2,", asr #2")
+	TEST_UNSUPPORTED(".word 0xe68cf15a	@ pkhtb	pc, r12, r10, asr #2")
+	TEST_UNSUPPORTED(".word 0xe68fe15a	@ pkhtb	r14, pc, r10, asr #2")
+	TEST_UNSUPPORTED(".word 0xe68ce15f	@ pkhtb	r14, r12, pc, asr #2")
+	TEST_UNSUPPORTED(".word 0xe6900010") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe69fffdf") /* Unallocated space */
+
+	TEST_R(     "ssat	r0, #24, r",0,   VAL1,"")
+	TEST_R(     "ssat	r14, #24, r",12, VAL2,"")
+	TEST_R(     "ssat	r0, #24, r",0,   VAL1,", lsl #8")
+	TEST_R(     "ssat	r14, #24, r",12, VAL2,", asr #8")
+	TEST_UNSUPPORTED(".word 0xe6b7f01c	@ ssat	pc, #24, r12")
+
+	TEST_R(     "usat	r0, #24, r",0,   VAL1,"")
+	TEST_R(     "usat	r14, #24, r",12, VAL2,"")
+	TEST_R(     "usat	r0, #24, r",0,   VAL1,", lsl #8")
+	TEST_R(     "usat	r14, #24, r",12, VAL2,", asr #8")
+	TEST_UNSUPPORTED(".word 0xe6f7f01c	@ usat	pc, #24, r12")
+
+	TEST_RR(    "sxtab16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtab16	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxtb16	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe68cf47a	@ sxtab16	pc,r12, r10, ror #8")
+
+	TEST_RR(    "sel	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(    "sel	r14, r",12,VAL1,", r",10, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe68cffba	@ sel	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe68fefba	@ sel	r14, pc, r10")
+	TEST_UNSUPPORTED(".word 0xe68cefbf	@ sel	r14, r12, pc")
+
+	TEST_R(     "ssat16	r0, #12, r",0,   HH1,"")
+	TEST_R(     "ssat16	r14, #12, r",12, HH2,"")
+	TEST_UNSUPPORTED(".word 0xe6abff3c	@ ssat16	pc, #12, r12")
+
+	TEST_RR(    "sxtab	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtab	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxtb	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe6acf47a	@ sxtab	pc,r12, r10, ror #8")
+
+	TEST_R(     "rev	r0, r",0,   VAL1,"")
+	TEST_R(     "rev	r14, r",12, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe6bfff3c	@ rev	pc, r12")
+
+	TEST_RR(    "sxtah	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtah	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxth	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe6bcf47a	@ sxtah	pc,r12, r10, ror #8")
+
+	TEST_R(     "rev16	r0, r",0,   VAL1,"")
+	TEST_R(     "rev16	r14, r",12, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe6bfffbc	@ rev16	pc, r12")
+
+	TEST_RR(    "uxtab16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtab16	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxtb16	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe6ccf47a	@ uxtab16	pc,r12, r10, ror #8")
+
+	TEST_R(     "usat16	r0, #12, r",0,   HH1,"")
+	TEST_R(     "usat16	r14, #12, r",12, HH2,"")
+	TEST_UNSUPPORTED(".word 0xe6ecff3c	@ usat16	pc, #12, r12")
+	TEST_UNSUPPORTED(".word 0xe6ecef3f	@ usat16	r14, #12, pc")
+
+	TEST_RR(    "uxtab	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtab	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxtb	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe6ecf47a	@ uxtab	pc,r12, r10, ror #8")
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_R(     "rbit	r0, r",0,   VAL1,"")
+	TEST_R(     "rbit	r14, r",12, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe6ffff3c	@ rbit	pc, r12")
+#endif
+
+	TEST_RR(    "uxtah	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtah	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxth	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe6fff077	@ uxth	pc, r7")
+	TEST_UNSUPPORTED(".word 0xe6ff807f	@ uxth	r8, pc")
+	TEST_UNSUPPORTED(".word 0xe6fcf47a	@ uxtah	pc, r12, r10, ror #8")
+	TEST_UNSUPPORTED(".word 0xe6fce47f	@ uxtah	r14, r12, pc, ror #8")
+
+	TEST_R(     "revsh	r0, r",0,   VAL1,"")
+	TEST_R(     "revsh	r14, r",12, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe6ffff3c	@ revsh	pc, r12")
+	TEST_UNSUPPORTED(".word 0xe6ffef3f	@ revsh	r14, pc")
+
+	TEST_UNSUPPORTED(".word 0xe6900070") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe69fff7f") /* Unallocated space */
+
+	TEST_UNSUPPORTED(".word 0xe6d00070") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe6dfff7f") /* Unallocated space */
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_GROUP("Signed multiplies")
+
+	TEST_RRR(   "smlad	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlad	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe70f8a1c	@ smlad	pc, r12, r10, r8")
+	TEST_RRR(   "smladx	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smladx	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe70f8a3c	@ smladx	pc, r12, r10, r8")
+
+	TEST_RR(   "smuad	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(   "smuad	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe70ffa1c	@ smuad	pc, r12, r10")
+	TEST_RR(   "smuadx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(   "smuadx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe70ffa3c	@ smuadx	pc, r12, r10")
+
+	TEST_RRR(   "smlsd	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlsd	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe70f8a5c	@ smlsd	pc, r12, r10, r8")
+	TEST_RRR(   "smlsdx	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlsdx	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe70f8a7c	@ smlsdx	pc, r12, r10, r8")
+
+	TEST_RR(   "smusd	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(   "smusd	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe70ffa5c	@ smusd	pc, r12, r10")
+	TEST_RR(   "smusdx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(   "smusdx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe70ffa7c	@ smusdx	pc, r12, r10")
+
+	TEST_RRRR( "smlald	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlald	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+	TEST_UNSUPPORTED(".word 0xe74af819	@ smlald	pc, r10, r9, r8")
+	TEST_UNSUPPORTED(".word 0xe74fb819	@ smlald	r11, pc, r9, r8")
+	TEST_UNSUPPORTED(".word 0xe74ab81f	@ smlald	r11, r10, pc, r8")
+	TEST_UNSUPPORTED(".word 0xe74abf19	@ smlald	r11, r10, r9, pc")
+
+	TEST_RRRR( "smlaldx	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlaldx	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+	TEST_UNSUPPORTED(".word 0xe74af839	@ smlaldx	pc, r10, r9, r8")
+	TEST_UNSUPPORTED(".word 0xe74fb839	@ smlaldx	r11, pc, r9, r8")
+
+	TEST_RRR(  "smmla	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(  "smmla	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe75f8a1c	@ smmla	pc, r12, r10, r8")
+	TEST_RRR(  "smmlar	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(  "smmlar	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe75f8a3c	@ smmlar	pc, r12, r10, r8")
+
+	TEST_RR(   "smmul	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(   "smmul	r14, r",12,VAL2,", r",10,VAL1,"")
+	TEST_UNSUPPORTED(".word 0xe75ffa1c	@ smmul	pc, r12, r10")
+	TEST_RR(   "smmulr	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(   "smmulr	r14, r",12,VAL2,", r",10,VAL1,"")
+	TEST_UNSUPPORTED(".word 0xe75ffa3c	@ smmulr	pc, r12, r10")
+
+	TEST_RRR(  "smmls	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(  "smmls	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe75f8adc	@ smmls	pc, r12, r10, r8")
+	TEST_RRR(  "smmlsr	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(  "smmlsr	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe75f8afc	@ smmlsr	pc, r12, r10, r8")
+	TEST_UNSUPPORTED(".word 0xe75e8aff	@ smmlsr	r14, pc, r10, r8")
+	TEST_UNSUPPORTED(".word 0xe75e8ffc	@ smmlsr	r14, r12, pc, r8")
+	TEST_UNSUPPORTED(".word 0xe75efafc	@ smmlsr	r14, r12, r10, pc")
+
+	TEST_RR(   "usad8	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(   "usad8	r14, r",12,VAL2,", r",10,VAL1,"")
+	TEST_UNSUPPORTED(".word 0xe75ffa1c	@ usad8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe75efa1f	@ usad8	r14, pc, r10")
+	TEST_UNSUPPORTED(".word 0xe75eff1c	@ usad8	r14, r12, pc")
+
+	TEST_RRR(  "usada8	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL3,"")
+	TEST_RRR(  "usada8	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL3,"")
+	TEST_UNSUPPORTED(".word 0xe78f8a1c	@ usada8	pc, r12, r10, r8")
+	TEST_UNSUPPORTED(".word 0xe78e8a1f	@ usada8	r14, pc, r10, r8")
+	TEST_UNSUPPORTED(".word 0xe78e8f1c	@ usada8	r14, r12, pc, r8")
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_GROUP("Bit Field")
+
+	TEST_R(     "sbfx	r0, r",0  , VAL1,", #0, #31")
+	TEST_R(     "sbfxeq	r14, r",12, VAL2,", #8, #16")
+	TEST_R(     "sbfx	r4, r",10,  VAL1,", #16, #15")
+	TEST_UNSUPPORTED(".word 0xe7aff45c	@ sbfx	pc, r12, #8, #16")
+
+	TEST_R(     "ubfx	r0, r",0  , VAL1,", #0, #31")
+	TEST_R(     "ubfxcs	r14, r",12, VAL2,", #8, #16")
+	TEST_R(     "ubfx	r4, r",10,  VAL1,", #16, #15")
+	TEST_UNSUPPORTED(".word 0xe7eff45c	@ ubfx	pc, r12, #8, #16")
+	TEST_UNSUPPORTED(".word 0xe7efc45f	@ ubfx	r12, pc, #8, #16")
+
+	TEST_R(     "bfc	r",0, VAL1,", #4, #20")
+	TEST_R(     "bfcvs	r",14,VAL2,", #4, #20")
+	TEST_R(     "bfc	r",7, VAL1,", #0, #31")
+	TEST_R(     "bfc	r",8, VAL2,", #0, #31")
+	TEST_UNSUPPORTED(".word 0xe7def01f	@ bfc	pc, #0, #31");
+
+	TEST_RR(    "bfi	r",0, VAL1,", r",0  , VAL2,", #0, #31")
+	TEST_RR(    "bfipl	r",12,VAL1,", r",14 , VAL2,", #4, #20")
+	TEST_UNSUPPORTED(".word 0xe7d7f21e	@ bfi	pc, r14, #4, #20")
+
+	TEST_UNSUPPORTED(".word 0x07f000f0")  /* Permanently UNDEFINED */
+	TEST_UNSUPPORTED(".word 0x07ffffff")  /* Permanently UNDEFINED */
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
+
+	TEST_GROUP("Branch, branch with link, and block data transfer")
+
+	TEST_P(   "stmda	r",0, 16*4,", {r0}")
+	TEST_P(   "stmeqda	r",4, 16*4,", {r0-r15}")
+	TEST_P(   "stmneda	r",8, 16*4,"!, {r8-r15}")
+	TEST_P(   "stmda	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_P(   "stmda	r",13,0,   "!, {pc}")
+
+	TEST_P(   "ldmda	r",0, 16*4,", {r0}")
+	TEST_BF_P("ldmcsda	r",4, 15*4,", {r0-r15}")
+	TEST_BF_P("ldmccda	r",7, 15*4,"!, {r8-r15}")
+	TEST_P(   "ldmda	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmda	r",14,15*4,"!, {pc}")
+
+	TEST_P(   "stmia	r",0, 16*4,", {r0}")
+	TEST_P(   "stmmiia	r",4, 16*4,", {r0-r15}")
+	TEST_P(   "stmplia	r",8, 16*4,"!, {r8-r15}")
+	TEST_P(   "stmia	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_P(   "stmia	r",14,0,   "!, {pc}")
+
+	TEST_P(   "ldmia	r",0, 16*4,", {r0}")
+	TEST_BF_P("ldmvsia	r",4, 0,   ", {r0-r15}")
+	TEST_BF_P("ldmvcia	r",7, 8*4, "!, {r8-r15}")
+	TEST_P(   "ldmia	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmia	r",14,15*4,"!, {pc}")
+
+	TEST_P(   "stmdb	r",0, 16*4,", {r0}")
+	TEST_P(   "stmhidb	r",4, 16*4,", {r0-r15}")
+	TEST_P(   "stmlsdb	r",8, 16*4,"!, {r8-r15}")
+	TEST_P(   "stmdb	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_P(   "stmdb	r",13,4,   "!, {pc}")
+
+	TEST_P(   "ldmdb	r",0, 16*4,", {r0}")
+	TEST_BF_P("ldmgedb	r",4, 16*4,", {r0-r15}")
+	TEST_BF_P("ldmltdb	r",7, 16*4,"!, {r8-r15}")
+	TEST_P(   "ldmdb	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmdb	r",14,16*4,"!, {pc}")
+
+	TEST_P(   "stmib	r",0, 16*4,", {r0}")
+	TEST_P(   "stmgtib	r",4, 16*4,", {r0-r15}")
+	TEST_P(   "stmleib	r",8, 16*4,"!, {r8-r15}")
+	TEST_P(   "stmib	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_P(   "stmib	r",13,-4,  "!, {pc}")
+
+	TEST_P(   "ldmib	r",0, 16*4,", {r0}")
+	TEST_BF_P("ldmeqib	r",4, -4,", {r0-r15}")
+	TEST_BF_P("ldmneib	r",7, 7*4,"!, {r8-r15}")
+	TEST_P(   "ldmib	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmib	r",14,14*4,"!, {pc}")
+
+	TEST_P(   "stmdb	r",13,16*4,"!, {r3-r12,lr}")
+	TEST_P(	  "stmeqdb	r",13,16*4,"!, {r3-r12}")
+	TEST_P(   "stmnedb	r",2, 16*4,", {r3-r12,lr}")
+	TEST_P(   "stmdb	r",13,16*4,"!, {r2-r12,lr}")
+	TEST_P(   "stmdb	r",0, 16*4,", {r0-r12}")
+	TEST_P(   "stmdb	r",0, 16*4,", {r0-r12,lr}")
+
+	TEST_BF_P("ldmia	r",13,5*4, "!, {r3-r12,pc}")
+	TEST_P(	  "ldmccia	r",13,5*4, "!, {r3-r12}")
+	TEST_BF_P("ldmcsia	r",2, 5*4, "!, {r3-r12,pc}")
+	TEST_BF_P("ldmia	r",13,4*4, "!, {r2-r12,pc}")
+	TEST_P(   "ldmia	r",0, 16*4,", {r0-r12}")
+	TEST_P(   "ldmia	r",0, 16*4,", {r0-r12,lr}")
+
+#ifdef CONFIG_THUMB2_KERNEL
+	TEST_ARM_TO_THUMB_INTERWORK_P("ldmplia	r",0,15*4,", {pc}")
+	TEST_ARM_TO_THUMB_INTERWORK_P("ldmmiia	r",13,0,", {r0-r15}")
+#endif
+	TEST_BF("b	2f")
+	TEST_BF("bl	2f")
+	TEST_BB("b	2b")
+	TEST_BB("bl	2b")
+
+	TEST_BF("beq	2f")
+	TEST_BF("bleq	2f")
+	TEST_BB("bne	2b")
+	TEST_BB("blne	2b")
+
+	TEST_BF("bgt	2f")
+	TEST_BF("blgt	2f")
+	TEST_BB("blt	2b")
+	TEST_BB("bllt	2b")
+
+	TEST_GROUP("Supervisor Call, and coprocessor instructions")
+
+	/*
+	 * We can't really test these by executing them, so all
+	 * we can do is check that probes are, or are not allowed.
+	 * At the moment none are allowed...
+	 */
+#define TEST_COPROCESSOR(code) TEST_UNSUPPORTED(code)
+
+#define COPROCESSOR_INSTRUCTIONS_ST_LD(two,cc)					\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13, #4]")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13, #-4]")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13, #4]!")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13, #-4]!")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13], #4")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13], #-4")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13], {1}")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13, #4]")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13, #-4]")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13, #4]!")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13, #-4]!")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13], #4")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13], #-4")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13], {1}")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13, #4]")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13, #-4]")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13, #4]!")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13, #-4]!")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13], #4")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13], #-4")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13], {1}")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13, #4]")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13, #-4]")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13, #4]!")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13, #-4]!")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13], #4")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13], #-4")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13], {1}")			\
+										\
+	TEST_COPROCESSOR( "stc"two"	0, cr0, [r15, #4]")			\
+	TEST_COPROCESSOR( "stc"two"	0, cr0, [r15, #-4]")			\
+	TEST_UNSUPPORTED(".word 0x"cc"daf0001	@ stc"two"	0, cr0, [r15, #4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"d2f0001	@ stc"two"	0, cr0, [r15, #-4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"caf0001	@ stc"two"	0, cr0, [r15], #4")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c2f0001	@ stc"two"	0, cr0, [r15], #-4")	\
+	TEST_COPROCESSOR( "stc"two"	0, cr0, [r15], {1}")			\
+	TEST_COPROCESSOR( "stc"two"l	0, cr0, [r15, #4]")			\
+	TEST_COPROCESSOR( "stc"two"l	0, cr0, [r15, #-4]")			\
+	TEST_UNSUPPORTED(".word 0x"cc"def0001	@ stc"two"l	0, cr0, [r15, #4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"d6f0001	@ stc"two"l	0, cr0, [r15, #-4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"cef0001	@ stc"two"l	0, cr0, [r15], #4")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c6f0001	@ stc"two"l	0, cr0, [r15], #-4")	\
+	TEST_COPROCESSOR( "stc"two"l	0, cr0, [r15], {1}")			\
+	TEST_COPROCESSOR( "ldc"two"	0, cr0, [r15, #4]")			\
+	TEST_COPROCESSOR( "ldc"two"	0, cr0, [r15, #-4]")			\
+	TEST_UNSUPPORTED(".word 0x"cc"dbf0001	@ ldc"two"	0, cr0, [r15, #4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"d3f0001	@ ldc"two"	0, cr0, [r15, #-4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"cbf0001	@ ldc"two"	0, cr0, [r15], #4")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c3f0001	@ ldc"two"	0, cr0, [r15], #-4")	\
+	TEST_COPROCESSOR( "ldc"two"	0, cr0, [r15], {1}")			\
+	TEST_COPROCESSOR( "ldc"two"l	0, cr0, [r15, #4]")			\
+	TEST_COPROCESSOR( "ldc"two"l	0, cr0, [r15, #-4]")			\
+	TEST_UNSUPPORTED(".word 0x"cc"dff0001	@ ldc"two"l	0, cr0, [r15, #4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"d7f0001	@ ldc"two"l	0, cr0, [r15, #-4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"cff0001	@ ldc"two"l	0, cr0, [r15], #4")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c7f0001	@ ldc"two"l	0, cr0, [r15], #-4")	\
+	TEST_COPROCESSOR( "ldc"two"l	0, cr0, [r15], {1}")
+
+#define COPROCESSOR_INSTRUCTIONS_MC_MR(two,cc)					\
+										\
+	TEST_COPROCESSOR( "mcrr"two"	0, 15, r0, r14, cr0")			\
+	TEST_COPROCESSOR( "mcrr"two"	15, 0, r14, r0, cr15")			\
+	TEST_UNSUPPORTED(".word 0x"cc"c4f00f0	@ mcrr"two"	0, 15, r0, r15, cr0")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c40ff0f	@ mcrr"two"	15, 0, r15, r0, cr15")	\
+	TEST_COPROCESSOR( "mrrc"two"	0, 15, r0, r14, cr0")			\
+	TEST_COPROCESSOR( "mrrc"two"	15, 0, r14, r0, cr15")			\
+	TEST_UNSUPPORTED(".word 0x"cc"c5f00f0	@ mrrc"two"	0, 15, r0, r15, cr0")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c50ff0f	@ mrrc"two"	15, 0, r15, r0, cr15")	\
+	TEST_COPROCESSOR( "cdp"two"	15, 15, cr15, cr15, cr15, 7")		\
+	TEST_COPROCESSOR( "cdp"two"	0, 0, cr0, cr0, cr0, 0")		\
+	TEST_COPROCESSOR( "mcr"two"	15, 7, r15, cr15, cr15, 7")		\
+	TEST_COPROCESSOR( "mcr"two"	0, 0, r0, cr0, cr0, 0")			\
+	TEST_COPROCESSOR( "mrc"two"	15, 7, r15, cr15, cr15, 7")		\
+	TEST_COPROCESSOR( "mrc"two"	0, 0, r0, cr0, cr0, 0")
+
+	COPROCESSOR_INSTRUCTIONS_ST_LD("","e")
+	COPROCESSOR_INSTRUCTIONS_MC_MR("","e")
+	TEST_UNSUPPORTED("svc	0")
+	TEST_UNSUPPORTED("svc	0xffffff")
+
+	TEST_UNSUPPORTED("svc	0")
+
+	TEST_GROUP("Unconditional instruction")
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_UNSUPPORTED("srsda	sp, 0x13")
+	TEST_UNSUPPORTED("srsdb	sp, 0x13")
+	TEST_UNSUPPORTED("srsia	sp, 0x13")
+	TEST_UNSUPPORTED("srsib	sp, 0x13")
+	TEST_UNSUPPORTED("srsda	sp!, 0x13")
+	TEST_UNSUPPORTED("srsdb	sp!, 0x13")
+	TEST_UNSUPPORTED("srsia	sp!, 0x13")
+	TEST_UNSUPPORTED("srsib	sp!, 0x13")
+
+	TEST_UNSUPPORTED("rfeda	sp")
+	TEST_UNSUPPORTED("rfedb	sp")
+	TEST_UNSUPPORTED("rfeia	sp")
+	TEST_UNSUPPORTED("rfeib	sp")
+	TEST_UNSUPPORTED("rfeda	sp!")
+	TEST_UNSUPPORTED("rfedb	sp!")
+	TEST_UNSUPPORTED("rfeia	sp!")
+	TEST_UNSUPPORTED("rfeib	sp!")
+	TEST_UNSUPPORTED(".word 0xf81d0a00	@ rfeda	pc")
+	TEST_UNSUPPORTED(".word 0xf91d0a00	@ rfedb	pc")
+	TEST_UNSUPPORTED(".word 0xf89d0a00	@ rfeia	pc")
+	TEST_UNSUPPORTED(".word 0xf99d0a00	@ rfeib	pc")
+	TEST_UNSUPPORTED(".word 0xf83d0a00	@ rfeda	pc!")
+	TEST_UNSUPPORTED(".word 0xf93d0a00	@ rfedb	pc!")
+	TEST_UNSUPPORTED(".word 0xf8bd0a00	@ rfeia	pc!")
+	TEST_UNSUPPORTED(".word 0xf9bd0a00	@ rfeib	pc!")
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_X(	"blx	__dummy_thumb_subroutine_even",
+		".thumb				\n\t"
+		".space 4			\n\t"
+		".type __dummy_thumb_subroutine_even, %%function \n\t"
+		"__dummy_thumb_subroutine_even:	\n\t"
+		"mov	r0, pc			\n\t"
+		"bx	lr			\n\t"
+		".arm				\n\t"
+	)
+	TEST(	"blx	__dummy_thumb_subroutine_even")
+
+	TEST_X(	"blx	__dummy_thumb_subroutine_odd",
+		".thumb				\n\t"
+		".space 2			\n\t"
+		".type __dummy_thumb_subroutine_odd, %%function	\n\t"
+		"__dummy_thumb_subroutine_odd:	\n\t"
+		"mov	r0, pc			\n\t"
+		"bx	lr			\n\t"
+		".arm				\n\t"
+	)
+	TEST(	"blx	__dummy_thumb_subroutine_odd")
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
+
+	COPROCESSOR_INSTRUCTIONS_ST_LD("2","f")
+#if __LINUX_ARM_ARCH__ >= 6
+	COPROCESSOR_INSTRUCTIONS_MC_MR("2","f")
+#endif
+
+	TEST_GROUP("Miscellaneous instructions, memory hints, and Advanced SIMD instructions")
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_UNSUPPORTED("cps	0x13")
+	TEST_UNSUPPORTED("cpsie	i")
+	TEST_UNSUPPORTED("cpsid	i")
+	TEST_UNSUPPORTED("cpsie	i,0x13")
+	TEST_UNSUPPORTED("cpsid	i,0x13")
+	TEST_UNSUPPORTED("setend	le")
+	TEST_UNSUPPORTED("setend	be")
+#endif
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_P("pli	[r",0,0b,", #16]")
+	TEST(  "pli	[pc, #0]")
+	TEST_RR("pli	[r",12,0b,", r",0, 16,"]")
+	TEST_RR("pli	[r",0, 0b,", -r",12,16,", lsl #4]")
+#endif
+
+#if __LINUX_ARM_ARCH__ >= 5
+	TEST_P("pld	[r",0,32,", #-16]")
+	TEST(  "pld	[pc, #0]")
+	TEST_PR("pld	[r",7, 24, ", r",0, 16,"]")
+	TEST_PR("pld	[r",8, 24, ", -r",12,16,", lsl #4]")
+#endif
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_SUPPORTED(  ".word 0xf590f000	@ pldw [r0, #0]")
+	TEST_SUPPORTED(  ".word 0xf797f000	@ pldw	[r7, r0]")
+	TEST_SUPPORTED(  ".word 0xf798f18c	@ pldw	[r8, r12, lsl #3]");
+#endif
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_UNSUPPORTED("clrex")
+	TEST_UNSUPPORTED("dsb")
+	TEST_UNSUPPORTED("dmb")
+	TEST_UNSUPPORTED("isb")
+#endif
+
+	verbose("\n");
+}
+
diff --git a/arch/arm/kernel/kprobes-test-thumb.c b/arch/arm/kernel/kprobes-test-thumb.c
new file mode 100644
index 000000000000..5e726c31c45a
--- /dev/null
+++ b/arch/arm/kernel/kprobes-test-thumb.c
@@ -0,0 +1,1187 @@
+/*
+ * arch/arm/kernel/kprobes-test-thumb.c
+ *
+ * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include "kprobes-test.h"
+
+
+#define TEST_ISA "16"
+
+#define DONT_TEST_IN_ITBLOCK(tests)			\
+	kprobe_test_flags |= TEST_FLAG_NO_ITBLOCK;	\
+	tests						\
+	kprobe_test_flags &= ~TEST_FLAG_NO_ITBLOCK;
+
+#define CONDITION_INSTRUCTIONS(cc_pos, tests)		\
+	kprobe_test_cc_position = cc_pos;		\
+	DONT_TEST_IN_ITBLOCK(tests)			\
+	kprobe_test_cc_position = 0;
+
+#define TEST_ITBLOCK(code)				\
+	kprobe_test_flags |= TEST_FLAG_FULL_ITBLOCK;	\
+	TESTCASE_START(code)				\
+	TEST_ARG_END("")				\
+	"50:	nop			\n\t"		\
+	"1:	"code"			\n\t"		\
+	"	mov r1, #0x11		\n\t"		\
+	"	mov r2, #0x22		\n\t"		\
+	"	mov r3, #0x33		\n\t"		\
+	"2:	nop			\n\t"		\
+	TESTCASE_END					\
+	kprobe_test_flags &= ~TEST_FLAG_FULL_ITBLOCK;
+
+#define TEST_THUMB_TO_ARM_INTERWORK_P(code1, reg, val, code2)	\
+	TESTCASE_START(code1 #reg code2)			\
+	TEST_ARG_PTR(reg, val)					\
+	TEST_ARG_REG(14, 99f+1)					\
+	TEST_ARG_MEM(15, 3f)					\
+	TEST_ARG_END("")					\
+	"	nop			\n\t" /* To align 1f */	\
+	"50:	nop			\n\t"			\
+	"1:	"code1 #reg code2"	\n\t"			\
+	"	bx	lr		\n\t"			\
+	".arm				\n\t"			\
+	"3:	adr	lr, 2f+1	\n\t"			\
+	"	bx	lr		\n\t"			\
+	".thumb				\n\t"			\
+	"2:	nop			\n\t"			\
+	TESTCASE_END
+
+
+void kprobe_thumb16_test_cases(void)
+{
+	kprobe_test_flags = TEST_FLAG_NARROW_INSTR;
+
+	TEST_GROUP("Shift (immediate), add, subtract, move, and compare")
+
+	TEST_R(    "lsls	r7, r",0,VAL1,", #5")
+	TEST_R(    "lsls	r0, r",7,VAL2,", #11")
+	TEST_R(    "lsrs	r7, r",0,VAL1,", #5")
+	TEST_R(    "lsrs	r0, r",7,VAL2,", #11")
+	TEST_R(    "asrs	r7, r",0,VAL1,", #5")
+	TEST_R(    "asrs	r0, r",7,VAL2,", #11")
+	TEST_RR(   "adds	r2, r",0,VAL1,", r",7,VAL2,"")
+	TEST_RR(   "adds	r5, r",7,VAL2,", r",0,VAL2,"")
+	TEST_RR(   "subs	r2, r",0,VAL1,", r",7,VAL2,"")
+	TEST_RR(   "subs	r5, r",7,VAL2,", r",0,VAL2,"")
+	TEST_R(    "adds	r7, r",0,VAL1,", #5")
+	TEST_R(    "adds	r0, r",7,VAL2,", #2")
+	TEST_R(    "subs	r7, r",0,VAL1,", #5")
+	TEST_R(    "subs	r0, r",7,VAL2,", #2")
+	TEST(      "movs.n	r0, #0x5f")
+	TEST(      "movs.n	r7, #0xa0")
+	TEST_R(    "cmp.n	r",0,0x5e, ", #0x5f")
+	TEST_R(    "cmp.n	r",5,0x15f,", #0x5f")
+	TEST_R(    "cmp.n	r",7,0xa0, ", #0xa0")
+	TEST_R(    "adds.n	r",0,VAL1,", #0x5f")
+	TEST_R(    "adds.n	r",7,VAL2,", #0xa0")
+	TEST_R(    "subs.n	r",0,VAL1,", #0x5f")
+	TEST_R(    "subs.n	r",7,VAL2,", #0xa0")
+
+	TEST_GROUP("16-bit Thumb data-processing instructions")
+
+#define DATA_PROCESSING16(op,val)			\
+	TEST_RR(   op"	r",0,VAL1,", r",7,val,"")	\
+	TEST_RR(   op"	r",7,VAL2,", r",0,val,"")
+
+	DATA_PROCESSING16("ands",0xf00f00ff)
+	DATA_PROCESSING16("eors",0xf00f00ff)
+	DATA_PROCESSING16("lsls",11)
+	DATA_PROCESSING16("lsrs",11)
+	DATA_PROCESSING16("asrs",11)
+	DATA_PROCESSING16("adcs",VAL2)
+	DATA_PROCESSING16("sbcs",VAL2)
+	DATA_PROCESSING16("rors",11)
+	DATA_PROCESSING16("tst",0xf00f00ff)
+	TEST_R("rsbs	r",0,VAL1,", #0")
+	TEST_R("rsbs	r",7,VAL2,", #0")
+	DATA_PROCESSING16("cmp",0xf00f00ff)
+	DATA_PROCESSING16("cmn",0xf00f00ff)
+	DATA_PROCESSING16("orrs",0xf00f00ff)
+	DATA_PROCESSING16("muls",VAL2)
+	DATA_PROCESSING16("bics",0xf00f00ff)
+	DATA_PROCESSING16("mvns",VAL2)
+
+	TEST_GROUP("Special data instructions and branch and exchange")
+
+	TEST_RR(  "add	r",0, VAL1,", r",7,VAL2,"")
+	TEST_RR(  "add	r",3, VAL2,", r",8,VAL3,"")
+	TEST_RR(  "add	r",8, VAL3,", r",0,VAL1,"")
+	TEST_R(   "add	sp"        ", r",8,-8,  "")
+	TEST_R(   "add	r",14,VAL1,", pc")
+	TEST_BF_R("add	pc"        ", r",0,2f-1f-8,"")
+	TEST_UNSUPPORTED(".short 0x44ff	@ add pc, pc")
+
+	TEST_RR(  "cmp	r",3,VAL1,", r",8,VAL2,"")
+	TEST_RR(  "cmp	r",8,VAL2,", r",0,VAL1,"")
+	TEST_R(   "cmp	sp"       ", r",8,-8,  "")
+
+	TEST_R(   "mov	r0, r",7,VAL2,"")
+	TEST_R(   "mov	r3, r",8,VAL3,"")
+	TEST_R(   "mov	r8, r",0,VAL1,"")
+	TEST_P(   "mov	sp, r",8,-8,  "")
+	TEST(     "mov	lr, pc")
+	TEST_BF_R("mov	pc, r",0,2f,  "")
+
+	TEST_BF_R("bx	r",0, 2f+1,"")
+	TEST_BF_R("bx	r",14,2f+1,"")
+	TESTCASE_START("bx	pc")
+		TEST_ARG_REG(14, 99f+1)
+		TEST_ARG_END("")
+		"	nop			\n\t" /* To align the bx pc*/
+		"50:	nop			\n\t"
+		"1:	bx	pc		\n\t"
+		"	bx	lr		\n\t"
+		".arm				\n\t"
+		"	adr	lr, 2f+1	\n\t"
+		"	bx	lr		\n\t"
+		".thumb				\n\t"
+		"2:	nop			\n\t"
+	TESTCASE_END
+
+	TEST_BF_R("blx	r",0, 2f+1,"")
+	TEST_BB_R("blx	r",14,2f+1,"")
+	TEST_UNSUPPORTED(".short 0x47f8	@ blx pc")
+
+	TEST_GROUP("Load from Literal Pool")
+
+	TEST_X( "ldr	r0, 3f",
+		".align					\n\t"
+		"3:	.word	"__stringify(VAL1))
+	TEST_X( "ldr	r7, 3f",
+		".space 128				\n\t"
+		".align					\n\t"
+		"3:	.word	"__stringify(VAL2))
+
+	TEST_GROUP("16-bit Thumb Load/store instructions")
+
+	TEST_RPR("str	r",0, VAL1,", [r",1, 24,", r",2,  48,"]")
+	TEST_RPR("str	r",7, VAL2,", [r",6, 24,", r",5,  48,"]")
+	TEST_RPR("strh	r",0, VAL1,", [r",1, 24,", r",2,  48,"]")
+	TEST_RPR("strh	r",7, VAL2,", [r",6, 24,", r",5,  48,"]")
+	TEST_RPR("strb	r",0, VAL1,", [r",1, 24,", r",2,  48,"]")
+	TEST_RPR("strb	r",7, VAL2,", [r",6, 24,", r",5,  48,"]")
+	TEST_PR( "ldrsb	r0, [r",1, 24,", r",2,  48,"]")
+	TEST_PR( "ldrsb	r7, [r",6, 24,", r",5,  50,"]")
+	TEST_PR( "ldr	r0, [r",1, 24,", r",2,  48,"]")
+	TEST_PR( "ldr	r7, [r",6, 24,", r",5,  48,"]")
+	TEST_PR( "ldrh	r0, [r",1, 24,", r",2,  48,"]")
+	TEST_PR( "ldrh	r7, [r",6, 24,", r",5,  50,"]")
+	TEST_PR( "ldrb	r0, [r",1, 24,", r",2,  48,"]")
+	TEST_PR( "ldrb	r7, [r",6, 24,", r",5,  50,"]")
+	TEST_PR( "ldrsh	r0, [r",1, 24,", r",2,  48,"]")
+	TEST_PR( "ldrsh	r7, [r",6, 24,", r",5,  50,"]")
+
+	TEST_RP("str	r",0, VAL1,", [r",1, 24,", #120]")
+	TEST_RP("str	r",7, VAL2,", [r",6, 24,", #120]")
+	TEST_P( "ldr	r0, [r",1, 24,", #120]")
+	TEST_P( "ldr	r7, [r",6, 24,", #120]")
+	TEST_RP("strb	r",0, VAL1,", [r",1, 24,", #30]")
+	TEST_RP("strb	r",7, VAL2,", [r",6, 24,", #30]")
+	TEST_P( "ldrb	r0, [r",1, 24,", #30]")
+	TEST_P( "ldrb	r7, [r",6, 24,", #30]")
+	TEST_RP("strh	r",0, VAL1,", [r",1, 24,", #60]")
+	TEST_RP("strh	r",7, VAL2,", [r",6, 24,", #60]")
+	TEST_P( "ldrh	r0, [r",1, 24,", #60]")
+	TEST_P( "ldrh	r7, [r",6, 24,", #60]")
+
+	TEST_R( "str	r",0, VAL1,", [sp, #0]")
+	TEST_R( "str	r",7, VAL2,", [sp, #160]")
+	TEST(   "ldr	r0, [sp, #0]")
+	TEST(   "ldr	r7, [sp, #160]")
+
+	TEST_RP("str	r",0, VAL1,", [r",0, 24,"]")
+	TEST_P( "ldr	r0, [r",0, 24,"]")
+
+	TEST_GROUP("Generate PC-/SP-relative address")
+
+	TEST("add	r0, pc, #4")
+	TEST("add	r7, pc, #1020")
+	TEST("add	r0, sp, #4")
+	TEST("add	r7, sp, #1020")
+
+	TEST_GROUP("Miscellaneous 16-bit instructions")
+
+	TEST_UNSUPPORTED( "cpsie	i")
+	TEST_UNSUPPORTED( "cpsid	i")
+	TEST_UNSUPPORTED( "setend	le")
+	TEST_UNSUPPORTED( "setend	be")
+
+	TEST("add	sp, #"__stringify(TEST_MEMORY_SIZE)) /* Assumes TEST_MEMORY_SIZE < 0x400 */
+	TEST("sub	sp, #0x7f*4")
+
+DONT_TEST_IN_ITBLOCK(
+	TEST_BF_R(  "cbnz	r",0,0, ", 2f")
+	TEST_BF_R(  "cbz	r",2,-1,", 2f")
+	TEST_BF_RX( "cbnz	r",4,1, ", 2f",0x20)
+	TEST_BF_RX( "cbz	r",7,0, ", 2f",0x40)
+)
+	TEST_R("sxth	r0, r",7, HH1,"")
+	TEST_R("sxth	r7, r",0, HH2,"")
+	TEST_R("sxtb	r0, r",7, HH1,"")
+	TEST_R("sxtb	r7, r",0, HH2,"")
+	TEST_R("uxth	r0, r",7, HH1,"")
+	TEST_R("uxth	r7, r",0, HH2,"")
+	TEST_R("uxtb	r0, r",7, HH1,"")
+	TEST_R("uxtb	r7, r",0, HH2,"")
+	TEST_R("rev	r0, r",7, VAL1,"")
+	TEST_R("rev	r7, r",0, VAL2,"")
+	TEST_R("rev16	r0, r",7, VAL1,"")
+	TEST_R("rev16	r7, r",0, VAL2,"")
+	TEST_UNSUPPORTED(".short 0xba80")
+	TEST_UNSUPPORTED(".short 0xbabf")
+	TEST_R("revsh	r0, r",7, VAL1,"")
+	TEST_R("revsh	r7, r",0, VAL2,"")
+
+#define TEST_POPPC(code, offset)	\
+	TESTCASE_START(code)		\
+	TEST_ARG_PTR(13, offset)	\
+	TEST_ARG_END("")		\
+	TEST_BRANCH_F(code,0)		\
+	TESTCASE_END
+
+	TEST("push	{r0}")
+	TEST("push	{r7}")
+	TEST("push	{r14}")
+	TEST("push	{r0-r7,r14}")
+	TEST("push	{r0,r2,r4,r6,r14}")
+	TEST("push	{r1,r3,r5,r7}")
+	TEST("pop	{r0}")
+	TEST("pop	{r7}")
+	TEST("pop	{r0,r2,r4,r6}")
+	TEST_POPPC("pop	{pc}",15*4)
+	TEST_POPPC("pop	{r0-r7,pc}",7*4)
+	TEST_POPPC("pop	{r1,r3,r5,r7,pc}",11*4)
+	TEST_THUMB_TO_ARM_INTERWORK_P("pop	{pc}	@ ",13,15*4,"")
+	TEST_THUMB_TO_ARM_INTERWORK_P("pop	{r0-r7,pc}	@ ",13,7*4,"")
+
+	TEST_UNSUPPORTED("bkpt.n	0")
+	TEST_UNSUPPORTED("bkpt.n	255")
+
+	TEST_SUPPORTED("yield")
+	TEST("sev")
+	TEST("nop")
+	TEST("wfi")
+	TEST_SUPPORTED("wfe")
+	TEST_UNSUPPORTED(".short 0xbf50") /* Unassigned hints */
+	TEST_UNSUPPORTED(".short 0xbff0") /* Unassigned hints */
+
+#define TEST_IT(code, code2)			\
+	TESTCASE_START(code)			\
+	TEST_ARG_END("")			\
+	"50:	nop			\n\t"	\
+	"1:	"code"			\n\t"	\
+	"	"code2"			\n\t"	\
+	"2:	nop			\n\t"	\
+	TESTCASE_END
+
+DONT_TEST_IN_ITBLOCK(
+	TEST_IT("it	eq","moveq r0,#0")
+	TEST_IT("it	vc","movvc r0,#0")
+	TEST_IT("it	le","movle r0,#0")
+	TEST_IT("ite	eq","moveq r0,#0\n\t  movne r1,#1")
+	TEST_IT("itet	vc","movvc r0,#0\n\t  movvs r1,#1\n\t  movvc r2,#2")
+	TEST_IT("itete	le","movle r0,#0\n\t  movgt r1,#1\n\t  movle r2,#2\n\t  movgt r3,#3")
+	TEST_IT("itttt	le","movle r0,#0\n\t  movle r1,#1\n\t  movle r2,#2\n\t  movle r3,#3")
+	TEST_IT("iteee	le","movle r0,#0\n\t  movgt r1,#1\n\t  movgt r2,#2\n\t  movgt r3,#3")
+)
+
+	TEST_GROUP("Load and store multiple")
+
+	TEST_P("ldmia	r",4, 16*4,"!, {r0,r7}")
+	TEST_P("ldmia	r",7, 16*4,"!, {r0-r6}")
+	TEST_P("stmia	r",4, 16*4,"!, {r0,r7}")
+	TEST_P("stmia	r",0, 16*4,"!, {r0-r7}")
+
+	TEST_GROUP("Conditional branch and Supervisor Call instructions")
+
+CONDITION_INSTRUCTIONS(8,
+	TEST_BF("beq	2f")
+	TEST_BB("bne	2b")
+	TEST_BF("bgt	2f")
+	TEST_BB("blt	2b")
+)
+	TEST_UNSUPPORTED(".short 0xde00")
+	TEST_UNSUPPORTED(".short 0xdeff")
+	TEST_UNSUPPORTED("svc	#0x00")
+	TEST_UNSUPPORTED("svc	#0xff")
+
+	TEST_GROUP("Unconditional branch")
+
+	TEST_BF(  "b	2f")
+	TEST_BB(  "b	2b")
+	TEST_BF_X("b	2f", 0x400)
+	TEST_BB_X("b	2b", 0x400)
+
+	TEST_GROUP("Testing instructions in IT blocks")
+
+	TEST_ITBLOCK("subs.n r0, r0")
+
+	verbose("\n");
+}
+
+
+void kprobe_thumb32_test_cases(void)
+{
+	kprobe_test_flags = 0;
+
+	TEST_GROUP("Load/store multiple")
+
+	TEST_UNSUPPORTED("rfedb	sp")
+	TEST_UNSUPPORTED("rfeia	sp")
+	TEST_UNSUPPORTED("rfedb	sp!")
+	TEST_UNSUPPORTED("rfeia	sp!")
+
+	TEST_P(   "stmia	r",0, 16*4,", {r0,r8}")
+	TEST_P(   "stmia	r",4, 16*4,", {r0-r12,r14}")
+	TEST_P(   "stmia	r",7, 16*4,"!, {r8-r12,r14}")
+	TEST_P(   "stmia	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+
+	TEST_P(   "ldmia	r",0, 16*4,", {r0,r8}")
+	TEST_P(   "ldmia	r",4, 0,   ", {r0-r12,r14}")
+	TEST_BF_P("ldmia	r",5, 8*4, "!, {r6-r12,r15}")
+	TEST_P(   "ldmia	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmia	r",14,14*4,"!, {r4,pc}")
+
+	TEST_P(   "stmdb	r",0, 16*4,", {r0,r8}")
+	TEST_P(   "stmdb	r",4, 16*4,", {r0-r12,r14}")
+	TEST_P(   "stmdb	r",5, 16*4,"!, {r8-r12,r14}")
+	TEST_P(   "stmdb	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+
+	TEST_P(   "ldmdb	r",0, 16*4,", {r0,r8}")
+	TEST_P(   "ldmdb	r",4, 16*4,", {r0-r12,r14}")
+	TEST_BF_P("ldmdb	r",5, 16*4,"!, {r6-r12,r15}")
+	TEST_P(   "ldmdb	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmdb	r",14,16*4,"!, {r4,pc}")
+
+	TEST_P(   "stmdb	r",13,16*4,"!, {r3-r12,lr}")
+	TEST_P(	  "stmdb	r",13,16*4,"!, {r3-r12}")
+	TEST_P(   "stmdb	r",2, 16*4,", {r3-r12,lr}")
+	TEST_P(   "stmdb	r",13,16*4,"!, {r2-r12,lr}")
+	TEST_P(   "stmdb	r",0, 16*4,", {r0-r12}")
+	TEST_P(   "stmdb	r",0, 16*4,", {r0-r12,lr}")
+
+	TEST_BF_P("ldmia	r",13,5*4, "!, {r3-r12,pc}")
+	TEST_P(	  "ldmia	r",13,5*4, "!, {r3-r12}")
+	TEST_BF_P("ldmia	r",2, 5*4, "!, {r3-r12,pc}")
+	TEST_BF_P("ldmia	r",13,4*4, "!, {r2-r12,pc}")
+	TEST_P(   "ldmia	r",0, 16*4,", {r0-r12}")
+	TEST_P(   "ldmia	r",0, 16*4,", {r0-r12,lr}")
+
+	TEST_THUMB_TO_ARM_INTERWORK_P("ldmia	r",0,14*4,", {r12,pc}")
+	TEST_THUMB_TO_ARM_INTERWORK_P("ldmia	r",13,2*4,", {r0-r12,pc}")
+
+	TEST_UNSUPPORTED(".short 0xe88f,0x0101	@ stmia	pc, {r0,r8}")
+	TEST_UNSUPPORTED(".short 0xe92f,0x5f00	@ stmdb	pc!, {r8-r12,r14}")
+	TEST_UNSUPPORTED(".short 0xe8bd,0xc000	@ ldmia	r13!, {r14,pc}")
+	TEST_UNSUPPORTED(".short 0xe93e,0xc000	@ ldmdb	r14!, {r14,pc}")
+	TEST_UNSUPPORTED(".short 0xe8a7,0x3f00	@ stmia	r7!, {r8-r12,sp}")
+	TEST_UNSUPPORTED(".short 0xe8a7,0x9f00	@ stmia	r7!, {r8-r12,pc}")
+	TEST_UNSUPPORTED(".short 0xe93e,0x2010	@ ldmdb	r14!, {r4,sp}")
+
+	TEST_GROUP("Load/store double or exclusive, table branch")
+
+	TEST_P(  "ldrd	r0, r1, [r",1, 24,", #-16]")
+	TEST(    "ldrd	r12, r14, [sp, #16]")
+	TEST_P(  "ldrd	r1, r0, [r",7, 24,", #-16]!")
+	TEST(    "ldrd	r14, r12, [sp, #16]!")
+	TEST_P(  "ldrd	r1, r0, [r",7, 24,"], #16")
+	TEST(    "ldrd	r7, r8, [sp], #-16")
+
+	TEST_X( "ldrd	r12, r14, 3f",
+		".align 3				\n\t"
+		"3:	.word	"__stringify(VAL1)"	\n\t"
+		"	.word	"__stringify(VAL2))
+
+	TEST_UNSUPPORTED(".short 0xe9ff,0xec04	@ ldrd	r14, r12, [pc, #16]!")
+	TEST_UNSUPPORTED(".short 0xe8ff,0xec04	@ ldrd	r14, r12, [pc], #16")
+	TEST_UNSUPPORTED(".short 0xe9d4,0xd800	@ ldrd	sp, r8, [r4]")
+	TEST_UNSUPPORTED(".short 0xe9d4,0xf800	@ ldrd	pc, r8, [r4]")
+	TEST_UNSUPPORTED(".short 0xe9d4,0x7d00	@ ldrd	r7, sp, [r4]")
+	TEST_UNSUPPORTED(".short 0xe9d4,0x7f00	@ ldrd	r7, pc, [r4]")
+
+	TEST_RRP("strd	r",0, VAL1,", r",1, VAL2,", [r",1, 24,", #-16]")
+	TEST_RR( "strd	r",12,VAL2,", r",14,VAL1,", [sp, #16]")
+	TEST_RRP("strd	r",1, VAL1,", r",0, VAL2,", [r",7, 24,", #-16]!")
+	TEST_RR( "strd	r",14,VAL2,", r",12,VAL1,", [sp, #16]!")
+	TEST_RRP("strd	r",1, VAL1,", r",0, VAL2,", [r",7, 24,"], #16")
+	TEST_RR( "strd	r",7, VAL2,", r",8, VAL1,", [sp], #-16")
+	TEST_UNSUPPORTED(".short 0xe9ef,0xec04	@ strd	r14, r12, [pc, #16]!")
+	TEST_UNSUPPORTED(".short 0xe8ef,0xec04	@ strd	r14, r12, [pc], #16")
+
+	TEST_RX("tbb	[pc, r",0, (9f-(1f+4)),"]",
+		"9:			\n\t"
+		".byte	(2f-1b-4)>>1	\n\t"
+		".byte	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_RX("tbb	[pc, r",4, (9f-(1f+4)+1),"]",
+		"9:			\n\t"
+		".byte	(2f-1b-4)>>1	\n\t"
+		".byte	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_RRX("tbb	[r",1,9f,", r",2,0,"]",
+		"9:			\n\t"
+		".byte	(2f-1b-4)>>1	\n\t"
+		".byte	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_RX("tbh	[pc, r",7, (9f-(1f+4))>>1,"]",
+		"9:			\n\t"
+		".short	(2f-1b-4)>>1	\n\t"
+		".short	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_RX("tbh	[pc, r",12, ((9f-(1f+4))>>1)+1,"]",
+		"9:			\n\t"
+		".short	(2f-1b-4)>>1	\n\t"
+		".short	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_RRX("tbh	[r",1,9f, ", r",14,1,"]",
+		"9:			\n\t"
+		".short	(2f-1b-4)>>1	\n\t"
+		".short	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_UNSUPPORTED(".short 0xe8d1,0xf01f	@ tbh [r1, pc]")
+	TEST_UNSUPPORTED(".short 0xe8d1,0xf01d	@ tbh [r1, sp]")
+	TEST_UNSUPPORTED(".short 0xe8dd,0xf012	@ tbh [sp, r2]")
+
+	TEST_UNSUPPORTED("strexb	r0, r1, [r2]")
+	TEST_UNSUPPORTED("strexh	r0, r1, [r2]")
+	TEST_UNSUPPORTED("strexd	r0, r1, [r2]")
+	TEST_UNSUPPORTED("ldrexb	r0, [r1]")
+	TEST_UNSUPPORTED("ldrexh	r0, [r1]")
+	TEST_UNSUPPORTED("ldrexd	r0, [r1]")
+
+	TEST_GROUP("Data-processing (shifted register) and (modified immediate)")
+
+#define _DATA_PROCESSING32_DNM(op,s,val)					\
+	TEST_RR(op s".w	r0,  r",1, VAL1,", r",2, val, "")			\
+	TEST_RR(op s"	r1,  r",1, VAL1,", r",2, val, ", lsl #3")		\
+	TEST_RR(op s"	r2,  r",3, VAL1,", r",2, val, ", lsr #4")		\
+	TEST_RR(op s"	r3,  r",3, VAL1,", r",2, val, ", asr #5")		\
+	TEST_RR(op s"	r4,  r",5, VAL1,", r",2, N(val),", asr #6")		\
+	TEST_RR(op s"	r5,  r",5, VAL1,", r",2, val, ", ror #7")		\
+	TEST_RR(op s"	r8,  r",9, VAL1,", r",10,val, ", rrx")			\
+	TEST_R( op s"	r0,  r",11,VAL1,", #0x00010001")			\
+	TEST_R( op s"	r11, r",0, VAL1,", #0xf5000000")			\
+	TEST_R( op s"	r7,  r",8, VAL2,", #0x000af000")
+
+#define DATA_PROCESSING32_DNM(op,val)		\
+	_DATA_PROCESSING32_DNM(op,"",val)	\
+	_DATA_PROCESSING32_DNM(op,"s",val)
+
+#define DATA_PROCESSING32_NM(op,val)					\
+	TEST_RR(op".w	r",1, VAL1,", r",2, val, "")			\
+	TEST_RR(op"	r",1, VAL1,", r",2, val, ", lsl #3")		\
+	TEST_RR(op"	r",3, VAL1,", r",2, val, ", lsr #4")		\
+	TEST_RR(op"	r",3, VAL1,", r",2, val, ", asr #5")		\
+	TEST_RR(op"	r",5, VAL1,", r",2, N(val),", asr #6")		\
+	TEST_RR(op"	r",5, VAL1,", r",2, val, ", ror #7")		\
+	TEST_RR(op"	r",9, VAL1,", r",10,val, ", rrx")		\
+	TEST_R( op"	r",11,VAL1,", #0x00010001")			\
+	TEST_R( op"	r",0, VAL1,", #0xf5000000")			\
+	TEST_R( op"	r",8, VAL2,", #0x000af000")
+
+#define _DATA_PROCESSING32_DM(op,s,val)				\
+	TEST_R( op s".w	r0,  r",14, val, "")			\
+	TEST_R( op s"	r1,  r",12, val, ", lsl #3")		\
+	TEST_R( op s"	r2,  r",11, val, ", lsr #4")		\
+	TEST_R( op s"	r3,  r",10, val, ", asr #5")		\
+	TEST_R( op s"	r4,  r",9, N(val),", asr #6")		\
+	TEST_R( op s"	r5,  r",8, val, ", ror #7")		\
+	TEST_R( op s"	r8,  r",7,val, ", rrx")			\
+	TEST(   op s"	r0,  #0x00010001")			\
+	TEST(   op s"	r11, #0xf5000000")			\
+	TEST(   op s"	r7,  #0x000af000")			\
+	TEST(   op s"	r4,  #0x00005a00")
+
+#define DATA_PROCESSING32_DM(op,val)		\
+	_DATA_PROCESSING32_DM(op,"",val)	\
+	_DATA_PROCESSING32_DM(op,"s",val)
+
+	DATA_PROCESSING32_DNM("and",0xf00f00ff)
+	DATA_PROCESSING32_NM("tst",0xf00f00ff)
+	DATA_PROCESSING32_DNM("bic",0xf00f00ff)
+	DATA_PROCESSING32_DNM("orr",0xf00f00ff)
+	DATA_PROCESSING32_DM("mov",VAL2)
+	DATA_PROCESSING32_DNM("orn",0xf00f00ff)
+	DATA_PROCESSING32_DM("mvn",VAL2)
+	DATA_PROCESSING32_DNM("eor",0xf00f00ff)
+	DATA_PROCESSING32_NM("teq",0xf00f00ff)
+	DATA_PROCESSING32_DNM("add",VAL2)
+	DATA_PROCESSING32_NM("cmn",VAL2)
+	DATA_PROCESSING32_DNM("adc",VAL2)
+	DATA_PROCESSING32_DNM("sbc",VAL2)
+	DATA_PROCESSING32_DNM("sub",VAL2)
+	DATA_PROCESSING32_NM("cmp",VAL2)
+	DATA_PROCESSING32_DNM("rsb",VAL2)
+
+	TEST_RR("pkhbt	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR("pkhbt	r14,r",12, HH1,", r",10,HH2,", lsl #2")
+	TEST_RR("pkhtb	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR("pkhtb	r14,r",12, HH1,", r",10,HH2,", asr #2")
+
+	TEST_UNSUPPORTED(".short 0xea17,0x0f0d	@ tst.w r7, sp")
+	TEST_UNSUPPORTED(".short 0xea17,0x0f0f	@ tst.w r7, pc")
+	TEST_UNSUPPORTED(".short 0xea1d,0x0f07	@ tst.w sp, r7")
+	TEST_UNSUPPORTED(".short 0xea1f,0x0f07	@ tst.w pc, r7")
+	TEST_UNSUPPORTED(".short 0xf01d,0x1f08	@ tst sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf01f,0x1f08	@ tst pc, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xea97,0x0f0d	@ teq.w r7, sp")
+	TEST_UNSUPPORTED(".short 0xea97,0x0f0f	@ teq.w r7, pc")
+	TEST_UNSUPPORTED(".short 0xea9d,0x0f07	@ teq.w sp, r7")
+	TEST_UNSUPPORTED(".short 0xea9f,0x0f07	@ teq.w pc, r7")
+	TEST_UNSUPPORTED(".short 0xf09d,0x1f08	@ tst sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf09f,0x1f08	@ tst pc, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xeb17,0x0f0d	@ cmn.w r7, sp")
+	TEST_UNSUPPORTED(".short 0xeb17,0x0f0f	@ cmn.w r7, pc")
+	TEST_P("cmn.w	sp, r",7,0,"")
+	TEST_UNSUPPORTED(".short 0xeb1f,0x0f07	@ cmn.w pc, r7")
+	TEST(  "cmn	sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf11f,0x1f08	@ cmn pc, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xebb7,0x0f0d	@ cmp.w r7, sp")
+	TEST_UNSUPPORTED(".short 0xebb7,0x0f0f	@ cmp.w r7, pc")
+	TEST_P("cmp.w	sp, r",7,0,"")
+	TEST_UNSUPPORTED(".short 0xebbf,0x0f07	@ cmp.w pc, r7")
+	TEST(  "cmp	sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf1bf,0x1f08	@ cmp pc, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xea5f,0x070d	@ movs.w r7, sp")
+	TEST_UNSUPPORTED(".short 0xea5f,0x070f	@ movs.w r7, pc")
+	TEST_UNSUPPORTED(".short 0xea5f,0x0d07	@ movs.w sp, r7")
+	TEST_UNSUPPORTED(".short 0xea4f,0x0f07	@ mov.w  pc, r7")
+	TEST_UNSUPPORTED(".short 0xf04f,0x1d08	@ mov sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf04f,0x1f08	@ mov pc, #0x00080008")
+
+	TEST_R("add.w	r0, sp, r",1, 4,"")
+	TEST_R("adds	r0, sp, r",1, 4,", asl #3")
+	TEST_R("add	r0, sp, r",1, 4,", asl #4")
+	TEST_R("add	r0, sp, r",1, 16,", ror #1")
+	TEST_R("add.w	sp, sp, r",1, 4,"")
+	TEST_R("add	sp, sp, r",1, 4,", asl #3")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x1d01	@ add sp, sp, r1, asl #4")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x0d71	@ add sp, sp, r1, ror #1")
+	TEST(  "add.w	r0, sp, #24")
+	TEST(  "add.w	sp, sp, #24")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x0f01	@ add pc, sp, r1")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x000f	@ add r0, sp, pc")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x000d	@ add r0, sp, sp")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x0d0f	@ add sp, sp, pc")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x0d0d	@ add sp, sp, sp")
+
+	TEST_R("sub.w	r0, sp, r",1, 4,"")
+	TEST_R("subs	r0, sp, r",1, 4,", asl #3")
+	TEST_R("sub	r0, sp, r",1, 4,", asl #4")
+	TEST_R("sub	r0, sp, r",1, 16,", ror #1")
+	TEST_R("sub.w	sp, sp, r",1, 4,"")
+	TEST_R("sub	sp, sp, r",1, 4,", asl #3")
+	TEST_UNSUPPORTED(".short 0xebad,0x1d01	@ sub sp, sp, r1, asl #4")
+	TEST_UNSUPPORTED(".short 0xebad,0x0d71	@ sub sp, sp, r1, ror #1")
+	TEST_UNSUPPORTED(".short 0xebad,0x0f01	@ sub pc, sp, r1")
+	TEST(  "sub.w	r0, sp, #24")
+	TEST(  "sub.w	sp, sp, #24")
+
+	TEST_UNSUPPORTED(".short 0xea02,0x010f	@ and r1, r2, pc")
+	TEST_UNSUPPORTED(".short 0xea0f,0x0103	@ and r1, pc, r3")
+	TEST_UNSUPPORTED(".short 0xea02,0x0f03	@ and pc, r2, r3")
+	TEST_UNSUPPORTED(".short 0xea02,0x010d	@ and r1, r2, sp")
+	TEST_UNSUPPORTED(".short 0xea0d,0x0103	@ and r1, sp, r3")
+	TEST_UNSUPPORTED(".short 0xea02,0x0d03	@ and sp, r2, r3")
+	TEST_UNSUPPORTED(".short 0xf00d,0x1108	@ and r1, sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf00f,0x1108	@ and r1, pc, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf002,0x1d08	@ and sp, r8, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf002,0x1f08	@ and pc, r8, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xeb02,0x010f	@ add r1, r2, pc")
+	TEST_UNSUPPORTED(".short 0xeb0f,0x0103	@ add r1, pc, r3")
+	TEST_UNSUPPORTED(".short 0xeb02,0x0f03	@ add pc, r2, r3")
+	TEST_UNSUPPORTED(".short 0xeb02,0x010d	@ add r1, r2, sp")
+	TEST_SUPPORTED(  ".short 0xeb0d,0x0103	@ add r1, sp, r3")
+	TEST_UNSUPPORTED(".short 0xeb02,0x0d03	@ add sp, r2, r3")
+	TEST_SUPPORTED(  ".short 0xf10d,0x1108	@ add r1, sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf10d,0x1f08	@ add pc, sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf10f,0x1108	@ add r1, pc, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf102,0x1d08	@ add sp, r8, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf102,0x1f08	@ add pc, r8, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xeaa0,0x0000")
+	TEST_UNSUPPORTED(".short 0xeaf0,0x0000")
+	TEST_UNSUPPORTED(".short 0xeb20,0x0000")
+	TEST_UNSUPPORTED(".short 0xeb80,0x0000")
+	TEST_UNSUPPORTED(".short 0xebe0,0x0000")
+
+	TEST_UNSUPPORTED(".short 0xf0a0,0x0000")
+	TEST_UNSUPPORTED(".short 0xf0c0,0x0000")
+	TEST_UNSUPPORTED(".short 0xf0f0,0x0000")
+	TEST_UNSUPPORTED(".short 0xf120,0x0000")
+	TEST_UNSUPPORTED(".short 0xf180,0x0000")
+	TEST_UNSUPPORTED(".short 0xf1e0,0x0000")
+
+	TEST_GROUP("Coprocessor instructions")
+
+	TEST_UNSUPPORTED(".short 0xec00,0x0000")
+	TEST_UNSUPPORTED(".short 0xeff0,0x0000")
+	TEST_UNSUPPORTED(".short 0xfc00,0x0000")
+	TEST_UNSUPPORTED(".short 0xfff0,0x0000")
+
+	TEST_GROUP("Data-processing (plain binary immediate)")
+
+	TEST_R("addw	r0,  r",1, VAL1,", #0x123")
+	TEST(  "addw	r14, sp, #0xf5a")
+	TEST(  "addw	sp, sp, #0x20")
+	TEST(  "addw	r7,  pc, #0x888")
+	TEST_UNSUPPORTED(".short 0xf20f,0x1f20	@ addw pc, pc, #0x120")
+	TEST_UNSUPPORTED(".short 0xf20d,0x1f20	@ addw pc, sp, #0x120")
+	TEST_UNSUPPORTED(".short 0xf20f,0x1d20	@ addw sp, pc, #0x120")
+	TEST_UNSUPPORTED(".short 0xf200,0x1d20	@ addw sp, r0, #0x120")
+
+	TEST_R("subw	r0,  r",1, VAL1,", #0x123")
+	TEST(  "subw	r14, sp, #0xf5a")
+	TEST(  "subw	sp, sp, #0x20")
+	TEST(  "subw	r7,  pc, #0x888")
+	TEST_UNSUPPORTED(".short 0xf2af,0x1f20	@ subw pc, pc, #0x120")
+	TEST_UNSUPPORTED(".short 0xf2ad,0x1f20	@ subw pc, sp, #0x120")
+	TEST_UNSUPPORTED(".short 0xf2af,0x1d20	@ subw sp, pc, #0x120")
+	TEST_UNSUPPORTED(".short 0xf2a0,0x1d20	@ subw sp, r0, #0x120")
+
+	TEST("movw	r0, #0")
+	TEST("movw	r0, #0xffff")
+	TEST("movw	lr, #0xffff")
+	TEST_UNSUPPORTED(".short 0xf240,0x0d00	@ movw sp, #0")
+	TEST_UNSUPPORTED(".short 0xf240,0x0f00	@ movw pc, #0")
+
+	TEST_R("movt	r",0, VAL1,", #0")
+	TEST_R("movt	r",0, VAL2,", #0xffff")
+	TEST_R("movt	r",14,VAL1,", #0xffff")
+	TEST_UNSUPPORTED(".short 0xf2c0,0x0d00	@ movt sp, #0")
+	TEST_UNSUPPORTED(".short 0xf2c0,0x0f00	@ movt pc, #0")
+
+	TEST_R(     "ssat	r0, #24, r",0,   VAL1,"")
+	TEST_R(     "ssat	r14, #24, r",12, VAL2,"")
+	TEST_R(     "ssat	r0, #24, r",0,   VAL1,", lsl #8")
+	TEST_R(     "ssat	r14, #24, r",12, VAL2,", asr #8")
+	TEST_UNSUPPORTED(".short 0xf30c,0x0d17	@ ssat	sp, #24, r12")
+	TEST_UNSUPPORTED(".short 0xf30c,0x0f17	@ ssat	pc, #24, r12")
+	TEST_UNSUPPORTED(".short 0xf30d,0x0c17	@ ssat	r12, #24, sp")
+	TEST_UNSUPPORTED(".short 0xf30f,0x0c17	@ ssat	r12, #24, pc")
+
+	TEST_R(     "usat	r0, #24, r",0,   VAL1,"")
+	TEST_R(     "usat	r14, #24, r",12, VAL2,"")
+	TEST_R(     "usat	r0, #24, r",0,   VAL1,", lsl #8")
+	TEST_R(     "usat	r14, #24, r",12, VAL2,", asr #8")
+	TEST_UNSUPPORTED(".short 0xf38c,0x0d17	@ usat	sp, #24, r12")
+	TEST_UNSUPPORTED(".short 0xf38c,0x0f17	@ usat	pc, #24, r12")
+	TEST_UNSUPPORTED(".short 0xf38d,0x0c17	@ usat	r12, #24, sp")
+	TEST_UNSUPPORTED(".short 0xf38f,0x0c17	@ usat	r12, #24, pc")
+
+	TEST_R(     "ssat16	r0, #12, r",0,   HH1,"")
+	TEST_R(     "ssat16	r14, #12, r",12, HH2,"")
+	TEST_UNSUPPORTED(".short 0xf32c,0x0d0b	@ ssat16	sp, #12, r12")
+	TEST_UNSUPPORTED(".short 0xf32c,0x0f0b	@ ssat16	pc, #12, r12")
+	TEST_UNSUPPORTED(".short 0xf32d,0x0c0b	@ ssat16	r12, #12, sp")
+	TEST_UNSUPPORTED(".short 0xf32f,0x0c0b	@ ssat16	r12, #12, pc")
+
+	TEST_R(     "usat16	r0, #12, r",0,   HH1,"")
+	TEST_R(     "usat16	r14, #12, r",12, HH2,"")
+	TEST_UNSUPPORTED(".short 0xf3ac,0x0d0b	@ usat16	sp, #12, r12")
+	TEST_UNSUPPORTED(".short 0xf3ac,0x0f0b	@ usat16	pc, #12, r12")
+	TEST_UNSUPPORTED(".short 0xf3ad,0x0c0b	@ usat16	r12, #12, sp")
+	TEST_UNSUPPORTED(".short 0xf3af,0x0c0b	@ usat16	r12, #12, pc")
+
+	TEST_R(     "sbfx	r0, r",0  , VAL1,", #0, #31")
+	TEST_R(     "sbfx	r14, r",12, VAL2,", #8, #16")
+	TEST_R(     "sbfx	r4, r",10,  VAL1,", #16, #15")
+	TEST_UNSUPPORTED(".short 0xf34c,0x2d0f	@ sbfx	sp, r12, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf34c,0x2f0f	@ sbfx	pc, r12, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf34d,0x2c0f	@ sbfx	r12, sp, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf34f,0x2c0f	@ sbfx	r12, pc, #8, #16")
+
+	TEST_R(     "ubfx	r0, r",0  , VAL1,", #0, #31")
+	TEST_R(     "ubfx	r14, r",12, VAL2,", #8, #16")
+	TEST_R(     "ubfx	r4, r",10,  VAL1,", #16, #15")
+	TEST_UNSUPPORTED(".short 0xf3cc,0x2d0f	@ ubfx	sp, r12, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf3cc,0x2f0f	@ ubfx	pc, r12, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf3cd,0x2c0f	@ ubfx	r12, sp, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf3cf,0x2c0f	@ ubfx	r12, pc, #8, #16")
+
+	TEST_R(     "bfc	r",0, VAL1,", #4, #20")
+	TEST_R(     "bfc	r",14,VAL2,", #4, #20")
+	TEST_R(     "bfc	r",7, VAL1,", #0, #31")
+	TEST_R(     "bfc	r",8, VAL2,", #0, #31")
+	TEST_UNSUPPORTED(".short 0xf36f,0x0d1e	@ bfc	sp, #0, #31")
+	TEST_UNSUPPORTED(".short 0xf36f,0x0f1e	@ bfc	pc, #0, #31")
+
+	TEST_RR(    "bfi	r",0, VAL1,", r",0  , VAL2,", #0, #31")
+	TEST_RR(    "bfi	r",12,VAL1,", r",14 , VAL2,", #4, #20")
+	TEST_UNSUPPORTED(".short 0xf36e,0x1d17	@ bfi	sp, r14, #4, #20")
+	TEST_UNSUPPORTED(".short 0xf36e,0x1f17	@ bfi	pc, r14, #4, #20")
+	TEST_UNSUPPORTED(".short 0xf36d,0x1e17	@ bfi	r14, sp, #4, #20")
+
+	TEST_GROUP("Branches and miscellaneous control")
+
+CONDITION_INSTRUCTIONS(22,
+	TEST_BF("beq.w	2f")
+	TEST_BB("bne.w	2b")
+	TEST_BF("bgt.w	2f")
+	TEST_BB("blt.w	2b")
+	TEST_BF_X("bpl.w	2f",0x1000)
+)
+
+	TEST_UNSUPPORTED("msr	cpsr, r0")
+	TEST_UNSUPPORTED("msr	cpsr_f, r1")
+	TEST_UNSUPPORTED("msr	spsr, r2")
+
+	TEST_UNSUPPORTED("cpsie.w	i")
+	TEST_UNSUPPORTED("cpsid.w	i")
+	TEST_UNSUPPORTED("cps	0x13")
+
+	TEST_SUPPORTED("yield.w")
+	TEST("sev.w")
+	TEST("nop.w")
+	TEST("wfi.w")
+	TEST_SUPPORTED("wfe.w")
+	TEST_UNSUPPORTED("dbg.w	#0")
+
+	TEST_UNSUPPORTED("clrex")
+	TEST_UNSUPPORTED("dsb")
+	TEST_UNSUPPORTED("dmb")
+	TEST_UNSUPPORTED("isb")
+
+	TEST_UNSUPPORTED("bxj	r0")
+
+	TEST_UNSUPPORTED("subs	pc, lr, #4")
+
+	TEST("mrs	r0, cpsr")
+	TEST("mrs	r14, cpsr")
+	TEST_UNSUPPORTED(".short 0xf3ef,0x8d00	@ mrs	sp, spsr")
+	TEST_UNSUPPORTED(".short 0xf3ef,0x8f00	@ mrs	pc, spsr")
+	TEST_UNSUPPORTED("mrs	r0, spsr")
+	TEST_UNSUPPORTED("mrs	lr, spsr")
+
+	TEST_UNSUPPORTED(".short 0xf7f0,0x8000 @ smc #0")
+
+	TEST_UNSUPPORTED(".short 0xf7f0,0xa000 @ undefeined")
+
+	TEST_BF(  "b.w	2f")
+	TEST_BB(  "b.w	2b")
+	TEST_BF_X("b.w	2f", 0x1000)
+
+	TEST_BF(  "bl.w	2f")
+	TEST_BB(  "bl.w	2b")
+	TEST_BB_X("bl.w	2b", 0x1000)
+
+	TEST_X(	"blx	__dummy_arm_subroutine",
+		".arm				\n\t"
+		".align				\n\t"
+		".type __dummy_arm_subroutine, %%function \n\t"
+		"__dummy_arm_subroutine:	\n\t"
+		"mov	r0, pc			\n\t"
+		"bx	lr			\n\t"
+		".thumb				\n\t"
+	)
+	TEST(	"blx	__dummy_arm_subroutine")
+
+	TEST_GROUP("Store single data item")
+
+#define SINGLE_STORE(size)							\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,-1024,", #1024]")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, -1024,", #1080]")		\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,256,  ", #-120]")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, 256,  ", #-128]")		\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,24,  "], #120")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, 24,  "], #128")		\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,24,  "], #-120")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, 24,  "], #-128")		\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,24,   ", #120]!")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, 24,   ", #128]!")		\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,256,  ", #-120]!")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, 256,  ", #-128]!")		\
+	TEST_RPR("str"size".w	r",0, VAL1,", [r",1, 0,", r",2, 4,"]")		\
+	TEST_RPR("str"size"	r",14,VAL2,", [r",10,0,", r",11,4,", lsl #1]")	\
+	TEST_R(  "str"size".w	r",7, VAL1,", [sp, #24]")			\
+	TEST_RP( "str"size".w	r",0, VAL2,", [r",0,0, "]")			\
+	TEST_UNSUPPORTED("str"size"t	r0, [r1, #4]")
+
+	SINGLE_STORE("b")
+	SINGLE_STORE("h")
+	SINGLE_STORE("")
+
+	TEST("str	sp, [sp]")
+	TEST_UNSUPPORTED(".short 0xf8cf,0xe000	@ str	r14, [pc]")
+	TEST_UNSUPPORTED(".short 0xf8ce,0xf000	@ str	pc, [r14]")
+
+	TEST_GROUP("Advanced SIMD element or structure load/store instructions")
+
+	TEST_UNSUPPORTED(".short 0xf900,0x0000")
+	TEST_UNSUPPORTED(".short 0xf92f,0xffff")
+	TEST_UNSUPPORTED(".short 0xf980,0x0000")
+	TEST_UNSUPPORTED(".short 0xf9ef,0xffff")
+
+	TEST_GROUP("Load single data item and memory hints")
+
+#define SINGLE_LOAD(size)						\
+	TEST_P( "ldr"size"	r0, [r",11,-1024, ", #1024]")		\
+	TEST_P( "ldr"size"	r14, [r",1, -1024,", #1080]")		\
+	TEST_P( "ldr"size"	r0, [r",11,256,   ", #-120]")		\
+	TEST_P( "ldr"size"	r14, [r",1, 256,  ", #-128]")		\
+	TEST_P( "ldr"size"	r0, [r",11,24,   "], #120")		\
+	TEST_P( "ldr"size"	r14, [r",1, 24,  "], #128")		\
+	TEST_P( "ldr"size"	r0, [r",11,24,   "], #-120")		\
+	TEST_P( "ldr"size"	r14, [r",1,24,   "], #-128")		\
+	TEST_P( "ldr"size"	r0, [r",11,24,    ", #120]!")		\
+	TEST_P( "ldr"size"	r14, [r",1, 24,   ", #128]!")		\
+	TEST_P( "ldr"size"	r0, [r",11,256,   ", #-120]!")		\
+	TEST_P( "ldr"size"	r14, [r",1, 256,  ", #-128]!")		\
+	TEST_PR("ldr"size".w	r0, [r",1, 0,", r",2, 4,"]")		\
+	TEST_PR("ldr"size"	r14, [r",10,0,", r",11,4,", lsl #1]")	\
+	TEST_X( "ldr"size".w	r0, 3f",				\
+		".align 3				\n\t"		\
+		"3:	.word	"__stringify(VAL1))			\
+	TEST_X( "ldr"size".w	r14, 3f",				\
+		".align 3				\n\t"		\
+		"3:	.word	"__stringify(VAL2))			\
+	TEST(   "ldr"size".w	r7, 3b")				\
+	TEST(   "ldr"size".w	r7, [sp, #24]")				\
+	TEST_P( "ldr"size".w	r0, [r",0,0, "]")			\
+	TEST_UNSUPPORTED("ldr"size"t	r0, [r1, #4]")
+
+	SINGLE_LOAD("b")
+	SINGLE_LOAD("sb")
+	SINGLE_LOAD("h")
+	SINGLE_LOAD("sh")
+	SINGLE_LOAD("")
+
+	TEST_BF_P("ldr	pc, [r",14, 15*4,"]")
+	TEST_P(   "ldr	sp, [r",14, 13*4,"]")
+	TEST_BF_R("ldr	pc, [sp, r",14, 15*4,"]")
+	TEST_R(   "ldr	sp, [sp, r",14, 13*4,"]")
+	TEST_THUMB_TO_ARM_INTERWORK_P("ldr	pc, [r",0,0,", #15*4]")
+	TEST_SUPPORTED("ldr	sp, 99f")
+	TEST_SUPPORTED("ldr	pc, 99f")
+
+	TEST_UNSUPPORTED(".short 0xf854,0x700d	@ ldr	r7, [r4, sp]")
+	TEST_UNSUPPORTED(".short 0xf854,0x700f	@ ldr	r7, [r4, pc]")
+	TEST_UNSUPPORTED(".short 0xf814,0x700d	@ ldrb	r7, [r4, sp]")
+	TEST_UNSUPPORTED(".short 0xf814,0x700f	@ ldrb	r7, [r4, pc]")
+	TEST_UNSUPPORTED(".short 0xf89f,0xd004	@ ldrb	sp, 99f")
+	TEST_UNSUPPORTED(".short 0xf814,0xd008	@ ldrb	sp, [r4, r8]")
+	TEST_UNSUPPORTED(".short 0xf894,0xd000	@ ldrb	sp, [r4]")
+
+	TEST_UNSUPPORTED(".short 0xf860,0x0000") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xf9ff,0xffff") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xf950,0x0000") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xf95f,0xffff") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xf800,0x0800") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xf97f,0xfaff") /* Unallocated space */
+
+	TEST(   "pli	[pc, #4]")
+	TEST(   "pli	[pc, #-4]")
+	TEST(   "pld	[pc, #4]")
+	TEST(   "pld	[pc, #-4]")
+
+	TEST_P( "pld	[r",0,-1024,", #1024]")
+	TEST(   ".short 0xf8b0,0xf400	@ pldw	[r0, #1024]")
+	TEST_P( "pli	[r",4, 0b,", #1024]")
+	TEST_P( "pld	[r",7, 120,", #-120]")
+	TEST(   ".short 0xf837,0xfc78	@ pldw	[r7, #-120]")
+	TEST_P( "pli	[r",11,120,", #-120]")
+	TEST(   "pld	[sp, #0]")
+
+	TEST_PR("pld	[r",7, 24, ", r",0, 16,"]")
+	TEST_PR("pld	[r",8, 24, ", r",12,16,", lsl #3]")
+	TEST_SUPPORTED(".short 0xf837,0xf000	@ pldw	[r7, r0]")
+	TEST_SUPPORTED(".short 0xf838,0xf03c	@ pldw	[r8, r12, lsl #3]");
+	TEST_RR("pli	[r",12,0b,", r",0, 16,"]")
+	TEST_RR("pli	[r",0, 0b,", r",12,16,", lsl #3]")
+	TEST_R( "pld	[sp, r",1, 16,"]")
+	TEST_UNSUPPORTED(".short 0xf817,0xf00d  @pld	[r7, sp]")
+	TEST_UNSUPPORTED(".short 0xf817,0xf00f  @pld	[r7, pc]")
+
+	TEST_GROUP("Data-processing (register)")
+
+#define SHIFTS32(op)					\
+	TEST_RR(op"	r0,  r",1, VAL1,", r",2, 3, "")	\
+	TEST_RR(op"	r14, r",12,VAL2,", r",11,10,"")
+
+	SHIFTS32("lsl")
+	SHIFTS32("lsls")
+	SHIFTS32("lsr")
+	SHIFTS32("lsrs")
+	SHIFTS32("asr")
+	SHIFTS32("asrs")
+	SHIFTS32("ror")
+	SHIFTS32("rors")
+
+	TEST_UNSUPPORTED(".short 0xfa01,0xff02	@ lsl	pc, r1, r2")
+	TEST_UNSUPPORTED(".short 0xfa01,0xfd02	@ lsl	sp, r1, r2")
+	TEST_UNSUPPORTED(".short 0xfa0f,0xf002	@ lsl	r0, pc, r2")
+	TEST_UNSUPPORTED(".short 0xfa0d,0xf002	@ lsl	r0, sp, r2")
+	TEST_UNSUPPORTED(".short 0xfa01,0xf00f	@ lsl	r0, r1, pc")
+	TEST_UNSUPPORTED(".short 0xfa01,0xf00d	@ lsl	r0, r1, sp")
+
+	TEST_RR(    "sxtah	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtah	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxth	r8, r",7,  HH1,"")
+
+	TEST_UNSUPPORTED(".short 0xfa0f,0xff87	@ sxth	pc, r7");
+	TEST_UNSUPPORTED(".short 0xfa0f,0xfd87	@ sxth	sp, r7");
+	TEST_UNSUPPORTED(".short 0xfa0f,0xf88f	@ sxth	r8, pc");
+	TEST_UNSUPPORTED(".short 0xfa0f,0xf88d	@ sxth	r8, sp");
+
+	TEST_RR(    "uxtah	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtah	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxth	r8, r",7,  HH1,"")
+
+	TEST_RR(    "sxtab16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtab16	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxtb16	r8, r",7,  HH1,"")
+
+	TEST_RR(    "uxtab16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtab16	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxtb16	r8, r",7,  HH1,"")
+
+	TEST_RR(    "sxtab	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtab	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxtb	r8, r",7,  HH1,"")
+
+	TEST_RR(    "uxtab	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtab	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxtb	r8, r",7,  HH1,"")
+
+	TEST_UNSUPPORTED(".short 0xfa60,0x00f0")
+	TEST_UNSUPPORTED(".short 0xfa7f,0xffff")
+
+#define PARALLEL_ADD_SUB(op)					\
+	TEST_RR(  op"add16	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"add16	r14, r",12,HH2,", r",10,HH1,"")	\
+	TEST_RR(  op"asx	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"asx	r14, r",12,HH2,", r",10,HH1,"")	\
+	TEST_RR(  op"sax	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"sax	r14, r",12,HH2,", r",10,HH1,"")	\
+	TEST_RR(  op"sub16	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"sub16	r14, r",12,HH2,", r",10,HH1,"")	\
+	TEST_RR(  op"add8	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"add8	r14, r",12,HH2,", r",10,HH1,"")	\
+	TEST_RR(  op"sub8	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"sub8	r14, r",12,HH2,", r",10,HH1,"")
+
+	TEST_GROUP("Parallel addition and subtraction, signed")
+
+	PARALLEL_ADD_SUB("s")
+	PARALLEL_ADD_SUB("q")
+	PARALLEL_ADD_SUB("sh")
+
+	TEST_GROUP("Parallel addition and subtraction, unsigned")
+
+	PARALLEL_ADD_SUB("u")
+	PARALLEL_ADD_SUB("uq")
+	PARALLEL_ADD_SUB("uh")
+
+	TEST_GROUP("Miscellaneous operations")
+
+	TEST_RR("qadd	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR("qadd	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_RR("qsub	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR("qsub	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_RR("qdadd	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR("qdadd	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_RR("qdsub	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR("qdsub	lr, r",9, VAL2,", r",8, VAL1,"")
+
+	TEST_R("rev.w	r0, r",0,   VAL1,"")
+	TEST_R("rev	r14, r",12, VAL2,"")
+	TEST_R("rev16.w	r0, r",0,   VAL1,"")
+	TEST_R("rev16	r14, r",12, VAL2,"")
+	TEST_R("rbit	r0, r",0,   VAL1,"")
+	TEST_R("rbit	r14, r",12, VAL2,"")
+	TEST_R("revsh.w	r0, r",0,   VAL1,"")
+	TEST_R("revsh	r14, r",12, VAL2,"")
+
+	TEST_UNSUPPORTED(".short 0xfa9c,0xff8c	@ rev	pc, r12");
+	TEST_UNSUPPORTED(".short 0xfa9c,0xfd8c	@ rev	sp, r12");
+	TEST_UNSUPPORTED(".short 0xfa9f,0xfe8f	@ rev	r14, pc");
+	TEST_UNSUPPORTED(".short 0xfa9d,0xfe8d	@ rev	r14, sp");
+
+	TEST_RR("sel	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR("sel	r14, r",12,VAL1,", r",10, VAL2,"")
+
+	TEST_R("clz	r0, r",0, 0x0,"")
+	TEST_R("clz	r7, r",14,0x1,"")
+	TEST_R("clz	lr, r",7, 0xffffffff,"")
+
+	TEST_UNSUPPORTED(".short 0xfa80,0xf030") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfaff,0xff7f") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfab0,0xf000") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfaff,0xff7f") /* Unallocated space */
+
+	TEST_GROUP("Multiply, multiply accumulate, and absolute difference operations")
+
+	TEST_RR(    "mul	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "mul	r7, r",8, VAL2,", r",9, VAL2,"")
+	TEST_UNSUPPORTED(".short 0xfb08,0xff09	@ mul	pc, r8, r9")
+	TEST_UNSUPPORTED(".short 0xfb08,0xfd09	@ mul	sp, r8, r9")
+	TEST_UNSUPPORTED(".short 0xfb0f,0xf709	@ mul	r7, pc, r9")
+	TEST_UNSUPPORTED(".short 0xfb0d,0xf709	@ mul	r7, sp, r9")
+	TEST_UNSUPPORTED(".short 0xfb08,0xf70f	@ mul	r7, r8, pc")
+	TEST_UNSUPPORTED(".short 0xfb08,0xf70d	@ mul	r7, r8, sp")
+
+	TEST_RRR(   "mla	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "mla	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_UNSUPPORTED(".short 0xfb08,0xaf09	@ mla	pc, r8, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb08,0xad09	@ mla	sp, r8, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb0f,0xa709	@ mla	r7, pc, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb0d,0xa709	@ mla	r7, sp, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb08,0xa70f	@ mla	r7, r8, pc, r10");
+	TEST_UNSUPPORTED(".short 0xfb08,0xa70d	@ mla	r7, r8, sp, r10");
+	TEST_UNSUPPORTED(".short 0xfb08,0xd709	@ mla	r7, r8, r9, sp");
+
+	TEST_RRR(   "mls	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "mls	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+
+	TEST_RRR(   "smlabb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlabb	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RRR(   "smlatb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlatb	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RRR(   "smlabt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlabt	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RRR(   "smlatt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlatt	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(    "smulbb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulbb	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_RR(    "smultb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smultb	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_RR(    "smulbt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulbt	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_RR(    "smultt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smultt	r7, r",8, VAL3,", r",9, VAL1,"")
+
+	TEST_RRR(   "smlad	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlad	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_RRR(   "smladx	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smladx	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_RR(    "smuad	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "smuad	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_RR(    "smuadx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "smuadx	r14, r",12,HH2,", r",10,HH1,"")
+
+	TEST_RRR(   "smlawb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlawb	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RRR(   "smlawt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlawt	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(    "smulwb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulwb	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_RR(    "smulwt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulwt	r7, r",8, VAL3,", r",9, VAL1,"")
+
+	TEST_RRR(   "smlsd	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlsd	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_RRR(   "smlsdx	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlsdx	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_RR(    "smusd	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "smusd	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_RR(    "smusdx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "smusdx	r14, r",12,HH2,", r",10,HH1,"")
+
+	TEST_RRR(   "smmla	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(   "smmla	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_RRR(   "smmlar	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(   "smmlar	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_RR(    "smmul	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(    "smmul	r14, r",12,VAL2,", r",10,VAL1,"")
+	TEST_RR(    "smmulr	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(    "smmulr	r14, r",12,VAL2,", r",10,VAL1,"")
+
+	TEST_RRR(   "smmls	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(   "smmls	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_RRR(   "smmlsr	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(   "smmlsr	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+
+	TEST_RRR(   "usada8	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL3,"")
+	TEST_RRR(   "usada8	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL3,"")
+	TEST_RR(    "usad8	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(    "usad8	r14, r",12,VAL2,", r",10,VAL1,"")
+
+	TEST_UNSUPPORTED(".short 0xfb00,0xf010") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfb0f,0xff1f") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfb70,0xf010") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfb7f,0xff1f") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfb70,0x0010") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfb7f,0xff1f") /* Unallocated space */
+
+	TEST_GROUP("Long multiply, long multiply accumulate, and divide")
+
+	TEST_RR(   "smull	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(   "smull	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_UNSUPPORTED(".short 0xfb89,0xf80a	@ smull	pc, r8, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb89,0xd80a	@ smull	sp, r8, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb89,0x7f0a	@ smull	r7, pc, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb89,0x7d0a	@ smull	r7, sp, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb8f,0x780a	@ smull	r7, r8, pc, r10");
+	TEST_UNSUPPORTED(".short 0xfb8d,0x780a	@ smull	r7, r8, sp, r10");
+	TEST_UNSUPPORTED(".short 0xfb89,0x780f	@ smull	r7, r8, r9, pc");
+	TEST_UNSUPPORTED(".short 0xfb89,0x780d	@ smull	r7, r8, r9, sp");
+
+	TEST_RR(   "umull	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(   "umull	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+
+	TEST_RRRR( "smlal	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "smlal	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+
+	TEST_RRRR( "smlalbb	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "smlalbb	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRRR( "smlalbt	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "smlalbt	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRRR( "smlaltb	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "smlaltb	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRRR( "smlaltt	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "smlaltt	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+
+	TEST_RRRR( "smlald	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlald	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+	TEST_RRRR( "smlaldx	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlaldx	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+
+	TEST_RRRR( "smlsld	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlsld	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+	TEST_RRRR( "smlsldx	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlsldx	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+
+	TEST_RRRR( "umlal	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "umlal	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRRR( "umaal	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "umaal	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+
+	TEST_GROUP("Coprocessor instructions")
+
+	TEST_UNSUPPORTED(".short 0xfc00,0x0000")
+	TEST_UNSUPPORTED(".short 0xffff,0xffff")
+
+	TEST_GROUP("Testing instructions in IT blocks")
+
+	TEST_ITBLOCK("sub.w	r0, r0")
+
+	verbose("\n");
+}
+
diff --git a/arch/arm/kernel/kprobes-test.c b/arch/arm/kernel/kprobes-test.c
new file mode 100644
index 000000000000..e17cdd6d90d8
--- /dev/null
+++ b/arch/arm/kernel/kprobes-test.c
@@ -0,0 +1,1748 @@
+/*
+ * arch/arm/kernel/kprobes-test.c
+ *
+ * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * This file contains test code for ARM kprobes.
+ *
+ * The top level function run_all_tests() executes tests for all of the
+ * supported instruction sets: ARM, 16-bit Thumb, and 32-bit Thumb. These tests
+ * fall into two categories; run_api_tests() checks basic functionality of the
+ * kprobes API, and run_test_cases() is a comprehensive test for kprobes
+ * instruction decoding and simulation.
+ *
+ * run_test_cases() first checks the kprobes decoding table for self consistency
+ * (using table_test()) then executes a series of test cases for each of the CPU
+ * instruction forms. coverage_start() and coverage_end() are used to verify
+ * that these test cases cover all of the possible combinations of instructions
+ * described by the kprobes decoding tables.
+ *
+ * The individual test cases are in kprobes-test-arm.c and kprobes-test-thumb.c
+ * which use the macros defined in kprobes-test.h. The rest of this
+ * documentation will describe the operation of the framework used by these
+ * test cases.
+ */
+
+/*
+ * TESTING METHODOLOGY
+ * -------------------
+ *
+ * The methodology used to test an ARM instruction 'test_insn' is to use
+ * inline assembler like:
+ *
+ * test_before: nop
+ * test_case:	test_insn
+ * test_after:	nop
+ *
+ * When the test case is run a kprobe is placed of each nop. The
+ * post-handler of the test_before probe is used to modify the saved CPU
+ * register context to that which we require for the test case. The
+ * pre-handler of the of the test_after probe saves a copy of the CPU
+ * register context. In this way we can execute test_insn with a specific
+ * register context and see the results afterwards.
+ *
+ * To actually test the kprobes instruction emulation we perform the above
+ * step a second time but with an additional kprobe on the test_case
+ * instruction itself. If the emulation is accurate then the results seen
+ * by the test_after probe will be identical to the first run which didn't
+ * have a probe on test_case.
+ *
+ * Each test case is run several times with a variety of variations in the
+ * flags value of stored in CPSR, and for Thumb code, different ITState.
+ *
+ * For instructions which can modify PC, a second test_after probe is used
+ * like this:
+ *
+ * test_before: nop
+ * test_case:	test_insn
+ * test_after:	nop
+ *		b test_done
+ * test_after2: nop
+ * test_done:
+ *
+ * The test case is constructed such that test_insn branches to
+ * test_after2, or, if testing a conditional instruction, it may just
+ * continue to test_after. The probes inserted at both locations let us
+ * determine which happened. A similar approach is used for testing
+ * backwards branches...
+ *
+ *		b test_before
+ *		b test_done  @ helps to cope with off by 1 branches
+ * test_after2: nop
+ *		b test_done
+ * test_before: nop
+ * test_case:	test_insn
+ * test_after:	nop
+ * test_done:
+ *
+ * The macros used to generate the assembler instructions describe above
+ * are TEST_INSTRUCTION, TEST_BRANCH_F (branch forwards) and TEST_BRANCH_B
+ * (branch backwards). In these, the local variables numbered 1, 50, 2 and
+ * 99 represent: test_before, test_case, test_after2 and test_done.
+ *
+ * FRAMEWORK
+ * ---------
+ *
+ * Each test case is wrapped between the pair of macros TESTCASE_START and
+ * TESTCASE_END. As well as performing the inline assembler boilerplate,
+ * these call out to the kprobes_test_case_start() and
+ * kprobes_test_case_end() functions which drive the execution of the test
+ * case. The specific arguments to use for each test case are stored as
+ * inline data constructed using the various TEST_ARG_* macros. Putting
+ * this all together, a simple test case may look like:
+ *
+ *	TESTCASE_START("Testing mov r0, r7")
+ *	TEST_ARG_REG(7, 0x12345678) // Set r7=0x12345678
+ *	TEST_ARG_END("")
+ *	TEST_INSTRUCTION("mov r0, r7")
+ *	TESTCASE_END
+ *
+ * Note, in practice the single convenience macro TEST_R would be used for this
+ * instead.
+ *
+ * The above would expand to assembler looking something like:
+ *
+ *	@ TESTCASE_START
+ *	bl	__kprobes_test_case_start
+ *	@ start of inline data...
+ *	.ascii "mov r0, r7"	@ text title for test case
+ *	.byte	0
+ *	.align	2
+ *
+ *	@ TEST_ARG_REG
+ *	.byte	ARG_TYPE_REG
+ *	.byte	7
+ *	.short	0
+ *	.word	0x1234567
+ *
+ *	@ TEST_ARG_END
+ *	.byte	ARG_TYPE_END
+ *	.byte	TEST_ISA	@ flags, including ISA being tested
+ *	.short	50f-0f		@ offset of 'test_before'
+ *	.short	2f-0f		@ offset of 'test_after2' (if relevent)
+ *	.short	99f-0f		@ offset of 'test_done'
+ *	@ start of test case code...
+ *	0:
+ *	.code	TEST_ISA	@ switch to ISA being tested
+ *
+ *	@ TEST_INSTRUCTION
+ *	50:	nop		@ location for 'test_before' probe
+ *	1:	mov r0, r7	@ the test case instruction 'test_insn'
+ *		nop		@ location for 'test_after' probe
+ *
+ *	// TESTCASE_END
+ *	2:
+ *	99:	bl __kprobes_test_case_end_##TEST_ISA
+ *	.code	NONMAL_ISA
+ *
+ * When the above is execute the following happens...
+ *
+ * __kprobes_test_case_start() is an assembler wrapper which sets up space
+ * for a stack buffer and calls the C function kprobes_test_case_start().
+ * This C function will do some initial processing of the inline data and
+ * setup some global state. It then inserts the test_before and test_after
+ * kprobes and returns a value which causes the assembler wrapper to jump
+ * to the start of the test case code, (local label '0').
+ *
+ * When the test case code executes, the test_before probe will be hit and
+ * test_before_post_handler will call setup_test_context(). This fills the
+ * stack buffer and CPU registers with a test pattern and then processes
+ * the test case arguments. In our example there is one TEST_ARG_REG which
+ * indicates that R7 should be loaded with the value 0x12345678.
+ *
+ * When the test_before probe ends, the test case continues and executes
+ * the "mov r0, r7" instruction. It then hits the test_after probe and the
+ * pre-handler for this (test_after_pre_handler) will save a copy of the
+ * CPU register context. This should now have R0 holding the same value as
+ * R7.
+ *
+ * Finally we get to the call to __kprobes_test_case_end_{32,16}. This is
+ * an assembler wrapper which switches back to the ISA used by the test
+ * code and calls the C function kprobes_test_case_end().
+ *
+ * For each run through the test case, test_case_run_count is incremented
+ * by one. For even runs, kprobes_test_case_end() saves a copy of the
+ * register and stack buffer contents from the test case just run. It then
+ * inserts a kprobe on the test case instruction 'test_insn' and returns a
+ * value to cause the test case code to be re-run.
+ *
+ * For odd numbered runs, kprobes_test_case_end() compares the register and
+ * stack buffer contents to those that were saved on the previous even
+ * numbered run (the one without the kprobe on test_insn). These should be
+ * the same if the kprobe instruction simulation routine is correct.
+ *
+ * The pair of test case runs is repeated with different combinations of
+ * flag values in CPSR and, for Thumb, different ITState. This is
+ * controlled by test_context_cpsr().
+ *
+ * BUILDING TEST CASES
+ * -------------------
+ *
+ *
+ * As an aid to building test cases, the stack buffer is initialised with
+ * some special values:
+ *
+ *   [SP+13*4]	Contains SP+120. This can be used to test instructions
+ *		which load a value into SP.
+ *
+ *   [SP+15*4]	When testing branching instructions using TEST_BRANCH_{F,B},
+ *		this holds the target address of the branch, 'test_after2'.
+ *		This can be used to test instructions which load a PC value
+ *		from memory.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/kprobes.h>
+
+#include "kprobes.h"
+#include "kprobes-test.h"
+
+
+#define BENCHMARKING	1
+
+
+/*
+ * Test basic API
+ */
+
+static bool test_regs_ok;
+static int test_func_instance;
+static int pre_handler_called;
+static int post_handler_called;
+static int jprobe_func_called;
+static int kretprobe_handler_called;
+
+#define FUNC_ARG1 0x12345678
+#define FUNC_ARG2 0xabcdef
+
+
+#ifndef CONFIG_THUMB2_KERNEL
+
+long arm_func(long r0, long r1);
+
+static void __used __naked __arm_kprobes_test_func(void)
+{
+	__asm__ __volatile__ (
+		".arm					\n\t"
+		".type arm_func, %%function		\n\t"
+		"arm_func:				\n\t"
+		"adds	r0, r0, r1			\n\t"
+		"bx	lr				\n\t"
+		".code "NORMAL_ISA	 /* Back to Thumb if necessary */
+		: : : "r0", "r1", "cc"
+	);
+}
+
+#else /* CONFIG_THUMB2_KERNEL */
+
+long thumb16_func(long r0, long r1);
+long thumb32even_func(long r0, long r1);
+long thumb32odd_func(long r0, long r1);
+
+static void __used __naked __thumb_kprobes_test_funcs(void)
+{
+	__asm__ __volatile__ (
+		".type thumb16_func, %%function		\n\t"
+		"thumb16_func:				\n\t"
+		"adds.n	r0, r0, r1			\n\t"
+		"bx	lr				\n\t"
+
+		".align					\n\t"
+		".type thumb32even_func, %%function	\n\t"
+		"thumb32even_func:			\n\t"
+		"adds.w	r0, r0, r1			\n\t"
+		"bx	lr				\n\t"
+
+		".align					\n\t"
+		"nop.n					\n\t"
+		".type thumb32odd_func, %%function	\n\t"
+		"thumb32odd_func:			\n\t"
+		"adds.w	r0, r0, r1			\n\t"
+		"bx	lr				\n\t"
+
+		: : : "r0", "r1", "cc"
+	);
+}
+
+#endif /* CONFIG_THUMB2_KERNEL */
+
+
+static int call_test_func(long (*func)(long, long), bool check_test_regs)
+{
+	long ret;
+
+	++test_func_instance;
+	test_regs_ok = false;
+
+	ret = (*func)(FUNC_ARG1, FUNC_ARG2);
+	if (ret != FUNC_ARG1 + FUNC_ARG2) {
+		pr_err("FAIL: call_test_func: func returned %lx\n", ret);
+		return false;
+	}
+
+	if (check_test_regs && !test_regs_ok) {
+		pr_err("FAIL: test regs not OK\n");
+		return false;
+	}
+
+	return true;
+}
+
+static int __kprobes pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	pre_handler_called = test_func_instance;
+	if (regs->ARM_r0 == FUNC_ARG1 && regs->ARM_r1 == FUNC_ARG2)
+		test_regs_ok = true;
+	return 0;
+}
+
+static void __kprobes post_handler(struct kprobe *p, struct pt_regs *regs,
+				unsigned long flags)
+{
+	post_handler_called = test_func_instance;
+	if (regs->ARM_r0 != FUNC_ARG1 + FUNC_ARG2 || regs->ARM_r1 != FUNC_ARG2)
+		test_regs_ok = false;
+}
+
+static struct kprobe the_kprobe = {
+	.addr		= 0,
+	.pre_handler	= pre_handler,
+	.post_handler	= post_handler
+};
+
+static int test_kprobe(long (*func)(long, long))
+{
+	int ret;
+
+	the_kprobe.addr = (kprobe_opcode_t *)func;
+	ret = register_kprobe(&the_kprobe);
+	if (ret < 0) {
+		pr_err("FAIL: register_kprobe failed with %d\n", ret);
+		return ret;
+	}
+
+	ret = call_test_func(func, true);
+
+	unregister_kprobe(&the_kprobe);
+	the_kprobe.flags = 0; /* Clear disable flag to allow reuse */
+
+	if (!ret)
+		return -EINVAL;
+	if (pre_handler_called != test_func_instance) {
+		pr_err("FAIL: kprobe pre_handler not called\n");
+		return -EINVAL;
+	}
+	if (post_handler_called != test_func_instance) {
+		pr_err("FAIL: kprobe post_handler not called\n");
+		return -EINVAL;
+	}
+	if (!call_test_func(func, false))
+		return -EINVAL;
+	if (pre_handler_called == test_func_instance ||
+				post_handler_called == test_func_instance) {
+		pr_err("FAIL: probe called after unregistering\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void __kprobes jprobe_func(long r0, long r1)
+{
+	jprobe_func_called = test_func_instance;
+	if (r0 == FUNC_ARG1 && r1 == FUNC_ARG2)
+		test_regs_ok = true;
+	jprobe_return();
+}
+
+static struct jprobe the_jprobe = {
+	.entry		= jprobe_func,
+};
+
+static int test_jprobe(long (*func)(long, long))
+{
+	int ret;
+
+	the_jprobe.kp.addr = (kprobe_opcode_t *)func;
+	ret = register_jprobe(&the_jprobe);
+	if (ret < 0) {
+		pr_err("FAIL: register_jprobe failed with %d\n", ret);
+		return ret;
+	}
+
+	ret = call_test_func(func, true);
+
+	unregister_jprobe(&the_jprobe);
+	the_jprobe.kp.flags = 0; /* Clear disable flag to allow reuse */
+
+	if (!ret)
+		return -EINVAL;
+	if (jprobe_func_called != test_func_instance) {
+		pr_err("FAIL: jprobe handler function not called\n");
+		return -EINVAL;
+	}
+	if (!call_test_func(func, false))
+		return -EINVAL;
+	if (jprobe_func_called == test_func_instance) {
+		pr_err("FAIL: probe called after unregistering\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __kprobes
+kretprobe_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+	kretprobe_handler_called = test_func_instance;
+	if (regs_return_value(regs) == FUNC_ARG1 + FUNC_ARG2)
+		test_regs_ok = true;
+	return 0;
+}
+
+static struct kretprobe the_kretprobe = {
+	.handler	= kretprobe_handler,
+};
+
+static int test_kretprobe(long (*func)(long, long))
+{
+	int ret;
+
+	the_kretprobe.kp.addr = (kprobe_opcode_t *)func;
+	ret = register_kretprobe(&the_kretprobe);
+	if (ret < 0) {
+		pr_err("FAIL: register_kretprobe failed with %d\n", ret);
+		return ret;
+	}
+
+	ret = call_test_func(func, true);
+
+	unregister_kretprobe(&the_kretprobe);
+	the_kretprobe.kp.flags = 0; /* Clear disable flag to allow reuse */
+
+	if (!ret)
+		return -EINVAL;
+	if (kretprobe_handler_called != test_func_instance) {
+		pr_err("FAIL: kretprobe handler not called\n");
+		return -EINVAL;
+	}
+	if (!call_test_func(func, false))
+		return -EINVAL;
+	if (jprobe_func_called == test_func_instance) {
+		pr_err("FAIL: kretprobe called after unregistering\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int run_api_tests(long (*func)(long, long))
+{
+	int ret;
+
+	pr_info("    kprobe\n");
+	ret = test_kprobe(func);
+	if (ret < 0)
+		return ret;
+
+	pr_info("    jprobe\n");
+	ret = test_jprobe(func);
+	if (ret < 0)
+		return ret;
+
+	pr_info("    kretprobe\n");
+	ret = test_kretprobe(func);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+
+/*
+ * Benchmarking
+ */
+
+#if BENCHMARKING
+
+static void __naked benchmark_nop(void)
+{
+	__asm__ __volatile__ (
+		"nop		\n\t"
+		"bx	lr"
+	);
+}
+
+#ifdef CONFIG_THUMB2_KERNEL
+#define wide ".w"
+#else
+#define wide
+#endif
+
+static void __naked benchmark_pushpop1(void)
+{
+	__asm__ __volatile__ (
+		"stmdb"wide"	sp!, {r3-r11,lr}  \n\t"
+		"ldmia"wide"	sp!, {r3-r11,pc}"
+	);
+}
+
+static void __naked benchmark_pushpop2(void)
+{
+	__asm__ __volatile__ (
+		"stmdb"wide"	sp!, {r0-r8,lr}  \n\t"
+		"ldmia"wide"	sp!, {r0-r8,pc}"
+	);
+}
+
+static void __naked benchmark_pushpop3(void)
+{
+	__asm__ __volatile__ (
+		"stmdb"wide"	sp!, {r4,lr}  \n\t"
+		"ldmia"wide"	sp!, {r4,pc}"
+	);
+}
+
+static void __naked benchmark_pushpop4(void)
+{
+	__asm__ __volatile__ (
+		"stmdb"wide"	sp!, {r0,lr}  \n\t"
+		"ldmia"wide"	sp!, {r0,pc}"
+	);
+}
+
+
+#ifdef CONFIG_THUMB2_KERNEL
+
+static void __naked benchmark_pushpop_thumb(void)
+{
+	__asm__ __volatile__ (
+		"push.n	{r0-r7,lr}  \n\t"
+		"pop.n	{r0-r7,pc}"
+	);
+}
+
+#endif
+
+static int __kprobes
+benchmark_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	return 0;
+}
+
+static int benchmark(void(*fn)(void))
+{
+	unsigned n, i, t, t0;
+
+	for (n = 1000; ; n *= 2) {
+		t0 = sched_clock();
+		for (i = n; i > 0; --i)
+			fn();
+		t = sched_clock() - t0;
+		if (t >= 250000000)
+			break; /* Stop once we took more than 0.25 seconds */
+	}
+	return t / n; /* Time for one iteration in nanoseconds */
+};
+
+static int kprobe_benchmark(void(*fn)(void), unsigned offset)
+{
+	struct kprobe k = {
+		.addr		= (kprobe_opcode_t *)((uintptr_t)fn + offset),
+		.pre_handler	= benchmark_pre_handler,
+	};
+
+	int ret = register_kprobe(&k);
+	if (ret < 0) {
+		pr_err("FAIL: register_kprobe failed with %d\n", ret);
+		return ret;
+	}
+
+	ret = benchmark(fn);
+
+	unregister_kprobe(&k);
+	return ret;
+};
+
+struct benchmarks {
+	void		(*fn)(void);
+	unsigned	offset;
+	const char	*title;
+};
+
+static int run_benchmarks(void)
+{
+	int ret;
+	struct benchmarks list[] = {
+		{&benchmark_nop, 0, "nop"},
+		/*
+		 * benchmark_pushpop{1,3} will have the optimised
+		 * instruction emulation, whilst benchmark_pushpop{2,4} will
+		 * be the equivalent unoptimised instructions.
+		 */
+		{&benchmark_pushpop1, 0, "stmdb	sp!, {r3-r11,lr}"},
+		{&benchmark_pushpop1, 4, "ldmia	sp!, {r3-r11,pc}"},
+		{&benchmark_pushpop2, 0, "stmdb	sp!, {r0-r8,lr}"},
+		{&benchmark_pushpop2, 4, "ldmia	sp!, {r0-r8,pc}"},
+		{&benchmark_pushpop3, 0, "stmdb	sp!, {r4,lr}"},
+		{&benchmark_pushpop3, 4, "ldmia	sp!, {r4,pc}"},
+		{&benchmark_pushpop4, 0, "stmdb	sp!, {r0,lr}"},
+		{&benchmark_pushpop4, 4, "ldmia	sp!, {r0,pc}"},
+#ifdef CONFIG_THUMB2_KERNEL
+		{&benchmark_pushpop_thumb, 0, "push.n	{r0-r7,lr}"},
+		{&benchmark_pushpop_thumb, 2, "pop.n	{r0-r7,pc}"},
+#endif
+		{0}
+	};
+
+	struct benchmarks *b;
+	for (b = list; b->fn; ++b) {
+		ret = kprobe_benchmark(b->fn, b->offset);
+		if (ret < 0)
+			return ret;
+		pr_info("    %dns for kprobe %s\n", ret, b->title);
+	}
+
+	pr_info("\n");
+	return 0;
+}
+
+#endif /* BENCHMARKING */
+
+
+/*
+ * Decoding table self-consistency tests
+ */
+
+static const int decode_struct_sizes[NUM_DECODE_TYPES] = {
+	[DECODE_TYPE_TABLE]	= sizeof(struct decode_table),
+	[DECODE_TYPE_CUSTOM]	= sizeof(struct decode_custom),
+	[DECODE_TYPE_SIMULATE]	= sizeof(struct decode_simulate),
+	[DECODE_TYPE_EMULATE]	= sizeof(struct decode_emulate),
+	[DECODE_TYPE_OR]	= sizeof(struct decode_or),
+	[DECODE_TYPE_REJECT]	= sizeof(struct decode_reject)
+};
+
+static int table_iter(const union decode_item *table,
+			int (*fn)(const struct decode_header *, void *),
+			void *args)
+{
+	const struct decode_header *h = (struct decode_header *)table;
+	int result;
+
+	for (;;) {
+		enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK;
+
+		if (type == DECODE_TYPE_END)
+			return 0;
+
+		result = fn(h, args);
+		if (result)
+			return result;
+
+		h = (struct decode_header *)
+			((uintptr_t)h + decode_struct_sizes[type]);
+
+	}
+}
+
+static int table_test_fail(const struct decode_header *h, const char* message)
+{
+
+	pr_err("FAIL: kprobes test failure \"%s\" (mask %08x, value %08x)\n",
+					message, h->mask.bits, h->value.bits);
+	return -EINVAL;
+}
+
+struct table_test_args {
+	const union decode_item *root_table;
+	u32			parent_mask;
+	u32			parent_value;
+};
+
+static int table_test_fn(const struct decode_header *h, void *args)
+{
+	struct table_test_args *a = (struct table_test_args *)args;
+	enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK;
+
+	if (h->value.bits & ~h->mask.bits)
+		return table_test_fail(h, "Match value has bits not in mask");
+
+	if ((h->mask.bits & a->parent_mask) != a->parent_mask)
+		return table_test_fail(h, "Mask has bits not in parent mask");
+
+	if ((h->value.bits ^ a->parent_value) & a->parent_mask)
+		return table_test_fail(h, "Value is inconsistent with parent");
+
+	if (type == DECODE_TYPE_TABLE) {
+		struct decode_table *d = (struct decode_table *)h;
+		struct table_test_args args2 = *a;
+		args2.parent_mask = h->mask.bits;
+		args2.parent_value = h->value.bits;
+		return table_iter(d->table.table, table_test_fn, &args2);
+	}
+
+	return 0;
+}
+
+static int table_test(const union decode_item *table)
+{
+	struct table_test_args args = {
+		.root_table	= table,
+		.parent_mask	= 0,
+		.parent_value	= 0
+	};
+	return table_iter(args.root_table, table_test_fn, &args);
+}
+
+
+/*
+ * Decoding table test coverage analysis
+ *
+ * coverage_start() builds a coverage_table which contains a list of
+ * coverage_entry's to match each entry in the specified kprobes instruction
+ * decoding table.
+ *
+ * When test cases are run, coverage_add() is called to process each case.
+ * This looks up the corresponding entry in the coverage_table and sets it as
+ * being matched, as well as clearing the regs flag appropriate for the test.
+ *
+ * After all test cases have been run, coverage_end() is called to check that
+ * all entries in coverage_table have been matched and that all regs flags are
+ * cleared. I.e. that all possible combinations of instructions described by
+ * the kprobes decoding tables have had a test case executed for them.
+ */
+
+bool coverage_fail;
+
+#define MAX_COVERAGE_ENTRIES 256
+
+struct coverage_entry {
+	const struct decode_header	*header;
+	unsigned			regs;
+	unsigned			nesting;
+	char				matched;
+};
+
+struct coverage_table {
+	struct coverage_entry	*base;
+	unsigned		num_entries;
+	unsigned		nesting;
+};
+
+struct coverage_table coverage;
+
+#define COVERAGE_ANY_REG	(1<<0)
+#define COVERAGE_SP		(1<<1)
+#define COVERAGE_PC		(1<<2)
+#define COVERAGE_PCWB		(1<<3)
+
+static const char coverage_register_lookup[16] = {
+	[REG_TYPE_ANY]		= COVERAGE_ANY_REG | COVERAGE_SP | COVERAGE_PC,
+	[REG_TYPE_SAMEAS16]	= COVERAGE_ANY_REG,
+	[REG_TYPE_SP]		= COVERAGE_SP,
+	[REG_TYPE_PC]		= COVERAGE_PC,
+	[REG_TYPE_NOSP]		= COVERAGE_ANY_REG | COVERAGE_SP,
+	[REG_TYPE_NOSPPC]	= COVERAGE_ANY_REG | COVERAGE_SP | COVERAGE_PC,
+	[REG_TYPE_NOPC]		= COVERAGE_ANY_REG | COVERAGE_PC,
+	[REG_TYPE_NOPCWB]	= COVERAGE_ANY_REG | COVERAGE_PC | COVERAGE_PCWB,
+	[REG_TYPE_NOPCX]	= COVERAGE_ANY_REG,
+	[REG_TYPE_NOSPPCX]	= COVERAGE_ANY_REG | COVERAGE_SP,
+};
+
+unsigned coverage_start_registers(const struct decode_header *h)
+{
+	unsigned regs = 0;
+	int i;
+	for (i = 0; i < 20; i += 4) {
+		int r = (h->type_regs.bits >> (DECODE_TYPE_BITS + i)) & 0xf;
+		regs |= coverage_register_lookup[r] << i;
+	}
+	return regs;
+}
+
+static int coverage_start_fn(const struct decode_header *h, void *args)
+{
+	struct coverage_table *coverage = (struct coverage_table *)args;
+	enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK;
+	struct coverage_entry *entry = coverage->base + coverage->num_entries;
+
+	if (coverage->num_entries == MAX_COVERAGE_ENTRIES - 1) {
+		pr_err("FAIL: Out of space for test coverage data");
+		return -ENOMEM;
+	}
+
+	++coverage->num_entries;
+
+	entry->header = h;
+	entry->regs = coverage_start_registers(h);
+	entry->nesting = coverage->nesting;
+	entry->matched = false;
+
+	if (type == DECODE_TYPE_TABLE) {
+		struct decode_table *d = (struct decode_table *)h;
+		int ret;
+		++coverage->nesting;
+		ret = table_iter(d->table.table, coverage_start_fn, coverage);
+		--coverage->nesting;
+		return ret;
+	}
+
+	return 0;
+}
+
+static int coverage_start(const union decode_item *table)
+{
+	coverage.base = kmalloc(MAX_COVERAGE_ENTRIES *
+				sizeof(struct coverage_entry), GFP_KERNEL);
+	coverage.num_entries = 0;
+	coverage.nesting = 0;
+	return table_iter(table, coverage_start_fn, &coverage);
+}
+
+static void
+coverage_add_registers(struct coverage_entry *entry, kprobe_opcode_t insn)
+{
+	int regs = entry->header->type_regs.bits >> DECODE_TYPE_BITS;
+	int i;
+	for (i = 0; i < 20; i += 4) {
+		enum decode_reg_type reg_type = (regs >> i) & 0xf;
+		int reg = (insn >> i) & 0xf;
+		int flag;
+
+		if (!reg_type)
+			continue;
+
+		if (reg == 13)
+			flag = COVERAGE_SP;
+		else if (reg == 15)
+			flag = COVERAGE_PC;
+		else
+			flag = COVERAGE_ANY_REG;
+		entry->regs &= ~(flag << i);
+
+		switch (reg_type) {
+
+		case REG_TYPE_NONE:
+		case REG_TYPE_ANY:
+		case REG_TYPE_SAMEAS16:
+			break;
+
+		case REG_TYPE_SP:
+			if (reg != 13)
+				return;
+			break;
+
+		case REG_TYPE_PC:
+			if (reg != 15)
+				return;
+			break;
+
+		case REG_TYPE_NOSP:
+			if (reg == 13)
+				return;
+			break;
+
+		case REG_TYPE_NOSPPC:
+		case REG_TYPE_NOSPPCX:
+			if (reg == 13 || reg == 15)
+				return;
+			break;
+
+		case REG_TYPE_NOPCWB:
+			if (!is_writeback(insn))
+				break;
+			if (reg == 15) {
+				entry->regs &= ~(COVERAGE_PCWB << i);
+				return;
+			}
+			break;
+
+		case REG_TYPE_NOPC:
+		case REG_TYPE_NOPCX:
+			if (reg == 15)
+				return;
+			break;
+		}
+
+	}
+}
+
+static void coverage_add(kprobe_opcode_t insn)
+{
+	struct coverage_entry *entry = coverage.base;
+	struct coverage_entry *end = coverage.base + coverage.num_entries;
+	bool matched = false;
+	unsigned nesting = 0;
+
+	for (; entry < end; ++entry) {
+		const struct decode_header *h = entry->header;
+		enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK;
+
+		if (entry->nesting > nesting)
+			continue; /* Skip sub-table we didn't match */
+
+		if (entry->nesting < nesting)
+			break; /* End of sub-table we were scanning */
+
+		if (!matched) {
+			if ((insn & h->mask.bits) != h->value.bits)
+				continue;
+			entry->matched = true;
+		}
+
+		switch (type) {
+
+		case DECODE_TYPE_TABLE:
+			++nesting;
+			break;
+
+		case DECODE_TYPE_CUSTOM:
+		case DECODE_TYPE_SIMULATE:
+		case DECODE_TYPE_EMULATE:
+			coverage_add_registers(entry, insn);
+			return;
+
+		case DECODE_TYPE_OR:
+			matched = true;
+			break;
+
+		case DECODE_TYPE_REJECT:
+		default:
+			return;
+		}
+
+	}
+}
+
+static void coverage_end(void)
+{
+	struct coverage_entry *entry = coverage.base;
+	struct coverage_entry *end = coverage.base + coverage.num_entries;
+
+	for (; entry < end; ++entry) {
+		u32 mask = entry->header->mask.bits;
+		u32 value = entry->header->value.bits;
+
+		if (entry->regs) {
+			pr_err("FAIL: Register test coverage missing for %08x %08x (%05x)\n",
+				mask, value, entry->regs);
+			coverage_fail = true;
+		}
+		if (!entry->matched) {
+			pr_err("FAIL: Test coverage entry missing for %08x %08x\n",
+				mask, value);
+			coverage_fail = true;
+		}
+	}
+
+	kfree(coverage.base);
+}
+
+
+/*
+ * Framework for instruction set test cases
+ */
+
+void __naked __kprobes_test_case_start(void)
+{
+	__asm__ __volatile__ (
+		"stmdb	sp!, {r4-r11}				\n\t"
+		"sub	sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
+		"bic	r0, lr, #1  @ r0 = inline title string	\n\t"
+		"mov	r1, sp					\n\t"
+		"bl	kprobes_test_case_start			\n\t"
+		"bx	r0					\n\t"
+	);
+}
+
+#ifndef CONFIG_THUMB2_KERNEL
+
+void __naked __kprobes_test_case_end_32(void)
+{
+	__asm__ __volatile__ (
+		"mov	r4, lr					\n\t"
+		"bl	kprobes_test_case_end			\n\t"
+		"cmp	r0, #0					\n\t"
+		"movne	pc, r0					\n\t"
+		"mov	r0, r4					\n\t"
+		"add	sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
+		"ldmia	sp!, {r4-r11}				\n\t"
+		"mov	pc, r0					\n\t"
+	);
+}
+
+#else /* CONFIG_THUMB2_KERNEL */
+
+void __naked __kprobes_test_case_end_16(void)
+{
+	__asm__ __volatile__ (
+		"mov	r4, lr					\n\t"
+		"bl	kprobes_test_case_end			\n\t"
+		"cmp	r0, #0					\n\t"
+		"bxne	r0					\n\t"
+		"mov	r0, r4					\n\t"
+		"add	sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
+		"ldmia	sp!, {r4-r11}				\n\t"
+		"bx	r0					\n\t"
+	);
+}
+
+void __naked __kprobes_test_case_end_32(void)
+{
+	__asm__ __volatile__ (
+		".arm						\n\t"
+		"orr	lr, lr, #1  @ will return to Thumb code	\n\t"
+		"ldr	pc, 1f					\n\t"
+		"1:						\n\t"
+		".word	__kprobes_test_case_end_16		\n\t"
+	);
+}
+
+#endif
+
+
+int kprobe_test_flags;
+int kprobe_test_cc_position;
+
+static int test_try_count;
+static int test_pass_count;
+static int test_fail_count;
+
+static struct pt_regs initial_regs;
+static struct pt_regs expected_regs;
+static struct pt_regs result_regs;
+
+static u32 expected_memory[TEST_MEMORY_SIZE/sizeof(u32)];
+
+static const char *current_title;
+static struct test_arg *current_args;
+static u32 *current_stack;
+static uintptr_t current_branch_target;
+
+static uintptr_t current_code_start;
+static kprobe_opcode_t current_instruction;
+
+
+#define TEST_CASE_PASSED -1
+#define TEST_CASE_FAILED -2
+
+static int test_case_run_count;
+static bool test_case_is_thumb;
+static int test_instance;
+
+/*
+ * We ignore the state of the imprecise abort disable flag (CPSR.A) because this
+ * can change randomly as the kernel doesn't take care to preserve or initialise
+ * this across context switches. Also, with Security Extentions, the flag may
+ * not be under control of the kernel; for this reason we ignore the state of
+ * the FIQ disable flag CPSR.F as well.
+ */
+#define PSR_IGNORE_BITS (PSR_A_BIT | PSR_F_BIT)
+
+static unsigned long test_check_cc(int cc, unsigned long cpsr)
+{
+	unsigned long temp;
+
+	switch (cc) {
+	case 0x0: /* eq */
+		return cpsr & PSR_Z_BIT;
+
+	case 0x1: /* ne */
+		return (~cpsr) & PSR_Z_BIT;
+
+	case 0x2: /* cs */
+		return cpsr & PSR_C_BIT;
+
+	case 0x3: /* cc */
+		return (~cpsr) & PSR_C_BIT;
+
+	case 0x4: /* mi */
+		return cpsr & PSR_N_BIT;
+
+	case 0x5: /* pl */
+		return (~cpsr) & PSR_N_BIT;
+
+	case 0x6: /* vs */
+		return cpsr & PSR_V_BIT;
+
+	case 0x7: /* vc */
+		return (~cpsr) & PSR_V_BIT;
+
+	case 0x8: /* hi */
+		cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+		return cpsr & PSR_C_BIT;
+
+	case 0x9: /* ls */
+		cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+		return (~cpsr) & PSR_C_BIT;
+
+	case 0xa: /* ge */
+		cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+		return (~cpsr) & PSR_N_BIT;
+
+	case 0xb: /* lt */
+		cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+		return cpsr & PSR_N_BIT;
+
+	case 0xc: /* gt */
+		temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+		temp |= (cpsr << 1);	   /* PSR_N_BIT |= PSR_Z_BIT */
+		return (~temp) & PSR_N_BIT;
+
+	case 0xd: /* le */
+		temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+		temp |= (cpsr << 1);	   /* PSR_N_BIT |= PSR_Z_BIT */
+		return temp & PSR_N_BIT;
+
+	case 0xe: /* al */
+	case 0xf: /* unconditional */
+		return true;
+	}
+	BUG();
+	return false;
+}
+
+static int is_last_scenario;
+static int probe_should_run; /* 0 = no, 1 = yes, -1 = unknown */
+static int memory_needs_checking;
+
+static unsigned long test_context_cpsr(int scenario)
+{
+	unsigned long cpsr;
+
+	probe_should_run = 1;
+
+	/* Default case is that we cycle through 16 combinations of flags */
+	cpsr  = (scenario & 0xf) << 28; /* N,Z,C,V flags */
+	cpsr |= (scenario & 0xf) << 16; /* GE flags */
+	cpsr |= (scenario & 0x1) << 27; /* Toggle Q flag */
+
+	if (!test_case_is_thumb) {
+		/* Testing ARM code */
+		probe_should_run = test_check_cc(current_instruction >> 28, cpsr) != 0;
+		if (scenario == 15)
+			is_last_scenario = true;
+
+	} else if (kprobe_test_flags & TEST_FLAG_NO_ITBLOCK) {
+		/* Testing Thumb code without setting ITSTATE */
+		if (kprobe_test_cc_position) {
+			int cc = (current_instruction >> kprobe_test_cc_position) & 0xf;
+			probe_should_run = test_check_cc(cc, cpsr) != 0;
+		}
+
+		if (scenario == 15)
+			is_last_scenario = true;
+
+	} else if (kprobe_test_flags & TEST_FLAG_FULL_ITBLOCK) {
+		/* Testing Thumb code with all combinations of ITSTATE */
+		unsigned x = (scenario >> 4);
+		unsigned cond_base = x % 7; /* ITSTATE<7:5> */
+		unsigned mask = x / 7 + 2;  /* ITSTATE<4:0>, bits reversed */
+
+		if (mask > 0x1f) {
+			/* Finish by testing state from instruction 'itt al' */
+			cond_base = 7;
+			mask = 0x4;
+			if ((scenario & 0xf) == 0xf)
+				is_last_scenario = true;
+		}
+
+		cpsr |= cond_base << 13;	/* ITSTATE<7:5> */
+		cpsr |= (mask & 0x1) << 12;	/* ITSTATE<4> */
+		cpsr |= (mask & 0x2) << 10;	/* ITSTATE<3> */
+		cpsr |= (mask & 0x4) << 8;	/* ITSTATE<2> */
+		cpsr |= (mask & 0x8) << 23;	/* ITSTATE<1> */
+		cpsr |= (mask & 0x10) << 21;	/* ITSTATE<0> */
+
+		probe_should_run = test_check_cc((cpsr >> 12) & 0xf, cpsr) != 0;
+
+	} else {
+		/* Testing Thumb code with several combinations of ITSTATE */
+		switch (scenario) {
+		case 16: /* Clear NZCV flags and 'it eq' state (false as Z=0) */
+			cpsr = 0x00000800;
+			probe_should_run = 0;
+			break;
+		case 17: /* Set NZCV flags and 'it vc' state (false as V=1) */
+			cpsr = 0xf0007800;
+			probe_should_run = 0;
+			break;
+		case 18: /* Clear NZCV flags and 'it ls' state (true as C=0) */
+			cpsr = 0x00009800;
+			break;
+		case 19: /* Set NZCV flags and 'it cs' state (true as C=1) */
+			cpsr = 0xf0002800;
+			is_last_scenario = true;
+			break;
+		}
+	}
+
+	return cpsr;
+}
+
+static void setup_test_context(struct pt_regs *regs)
+{
+	int scenario = test_case_run_count>>1;
+	unsigned long val;
+	struct test_arg *args;
+	int i;
+
+	is_last_scenario = false;
+	memory_needs_checking = false;
+
+	/* Initialise test memory on stack */
+	val = (scenario & 1) ? VALM : ~VALM;
+	for (i = 0; i < TEST_MEMORY_SIZE / sizeof(current_stack[0]); ++i)
+		current_stack[i] = val + (i << 8);
+	/* Put target of branch on stack for tests which load PC from memory */
+	if (current_branch_target)
+		current_stack[15] = current_branch_target;
+	/* Put a value for SP on stack for tests which load SP from memory */
+	current_stack[13] = (u32)current_stack + 120;
+
+	/* Initialise register values to their default state */
+	val = (scenario & 2) ? VALR : ~VALR;
+	for (i = 0; i < 13; ++i)
+		regs->uregs[i] = val ^ (i << 8);
+	regs->ARM_lr = val ^ (14 << 8);
+	regs->ARM_cpsr &= ~(APSR_MASK | PSR_IT_MASK);
+	regs->ARM_cpsr |= test_context_cpsr(scenario);
+
+	/* Perform testcase specific register setup  */
+	args = current_args;
+	for (; args[0].type != ARG_TYPE_END; ++args)
+		switch (args[0].type) {
+		case ARG_TYPE_REG: {
+			struct test_arg_regptr *arg =
+				(struct test_arg_regptr *)args;
+			regs->uregs[arg->reg] = arg->val;
+			break;
+		}
+		case ARG_TYPE_PTR: {
+			struct test_arg_regptr *arg =
+				(struct test_arg_regptr *)args;
+			regs->uregs[arg->reg] =
+				(unsigned long)current_stack + arg->val;
+			memory_needs_checking = true;
+			break;
+		}
+		case ARG_TYPE_MEM: {
+			struct test_arg_mem *arg = (struct test_arg_mem *)args;
+			current_stack[arg->index] = arg->val;
+			break;
+		}
+		default:
+			break;
+		}
+}
+
+struct test_probe {
+	struct kprobe	kprobe;
+	bool		registered;
+	int		hit;
+};
+
+static void unregister_test_probe(struct test_probe *probe)
+{
+	if (probe->registered) {
+		unregister_kprobe(&probe->kprobe);
+		probe->kprobe.flags = 0; /* Clear disable flag to allow reuse */
+	}
+	probe->registered = false;
+}
+
+static int register_test_probe(struct test_probe *probe)
+{
+	int ret;
+
+	if (probe->registered)
+		BUG();
+
+	ret = register_kprobe(&probe->kprobe);
+	if (ret >= 0) {
+		probe->registered = true;
+		probe->hit = -1;
+	}
+	return ret;
+}
+
+static int __kprobes
+test_before_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	container_of(p, struct test_probe, kprobe)->hit = test_instance;
+	return 0;
+}
+
+static void __kprobes
+test_before_post_handler(struct kprobe *p, struct pt_regs *regs,
+							unsigned long flags)
+{
+	setup_test_context(regs);
+	initial_regs = *regs;
+	initial_regs.ARM_cpsr &= ~PSR_IGNORE_BITS;
+}
+
+static int __kprobes
+test_case_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	container_of(p, struct test_probe, kprobe)->hit = test_instance;
+	return 0;
+}
+
+static int __kprobes
+test_after_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	if (container_of(p, struct test_probe, kprobe)->hit == test_instance)
+		return 0; /* Already run for this test instance */
+
+	result_regs = *regs;
+	result_regs.ARM_cpsr &= ~PSR_IGNORE_BITS;
+
+	/* Undo any changes done to SP by the test case */
+	regs->ARM_sp = (unsigned long)current_stack;
+
+	container_of(p, struct test_probe, kprobe)->hit = test_instance;
+	return 0;
+}
+
+static struct test_probe test_before_probe = {
+	.kprobe.pre_handler	= test_before_pre_handler,
+	.kprobe.post_handler	= test_before_post_handler,
+};
+
+static struct test_probe test_case_probe = {
+	.kprobe.pre_handler	= test_case_pre_handler,
+};
+
+static struct test_probe test_after_probe = {
+	.kprobe.pre_handler	= test_after_pre_handler,
+};
+
+static struct test_probe test_after2_probe = {
+	.kprobe.pre_handler	= test_after_pre_handler,
+};
+
+static void test_case_cleanup(void)
+{
+	unregister_test_probe(&test_before_probe);
+	unregister_test_probe(&test_case_probe);
+	unregister_test_probe(&test_after_probe);
+	unregister_test_probe(&test_after2_probe);
+}
+
+static void print_registers(struct pt_regs *regs)
+{
+	pr_err("r0  %08lx | r1  %08lx | r2  %08lx | r3  %08lx\n",
+		regs->ARM_r0, regs->ARM_r1, regs->ARM_r2, regs->ARM_r3);
+	pr_err("r4  %08lx | r5  %08lx | r6  %08lx | r7  %08lx\n",
+		regs->ARM_r4, regs->ARM_r5, regs->ARM_r6, regs->ARM_r7);
+	pr_err("r8  %08lx | r9  %08lx | r10 %08lx | r11 %08lx\n",
+		regs->ARM_r8, regs->ARM_r9, regs->ARM_r10, regs->ARM_fp);
+	pr_err("r12 %08lx | sp  %08lx | lr  %08lx | pc  %08lx\n",
+		regs->ARM_ip, regs->ARM_sp, regs->ARM_lr, regs->ARM_pc);
+	pr_err("cpsr %08lx\n", regs->ARM_cpsr);
+}
+
+static void print_memory(u32 *mem, size_t size)
+{
+	int i;
+	for (i = 0; i < size / sizeof(u32); i += 4)
+		pr_err("%08x %08x %08x %08x\n", mem[i], mem[i+1],
+						mem[i+2], mem[i+3]);
+}
+
+static size_t expected_memory_size(u32 *sp)
+{
+	size_t size = sizeof(expected_memory);
+	int offset = (uintptr_t)sp - (uintptr_t)current_stack;
+	if (offset > 0)
+		size -= offset;
+	return size;
+}
+
+static void test_case_failed(const char *message)
+{
+	test_case_cleanup();
+
+	pr_err("FAIL: %s\n", message);
+	pr_err("FAIL: Test %s\n", current_title);
+	pr_err("FAIL: Scenario %d\n", test_case_run_count >> 1);
+}
+
+static unsigned long next_instruction(unsigned long pc)
+{
+#ifdef CONFIG_THUMB2_KERNEL
+	if ((pc & 1) && !is_wide_instruction(*(u16 *)(pc - 1)))
+		return pc + 2;
+	else
+#endif
+	return pc + 4;
+}
+
+static uintptr_t __used kprobes_test_case_start(const char *title, void *stack)
+{
+	struct test_arg *args;
+	struct test_arg_end *end_arg;
+	unsigned long test_code;
+
+	args = (struct test_arg *)PTR_ALIGN(title + strlen(title) + 1, 4);
+
+	current_title = title;
+	current_args = args;
+	current_stack = stack;
+
+	++test_try_count;
+
+	while (args->type != ARG_TYPE_END)
+		++args;
+	end_arg = (struct test_arg_end *)args;
+
+	test_code = (unsigned long)(args + 1); /* Code starts after args */
+
+	test_case_is_thumb = end_arg->flags & ARG_FLAG_THUMB;
+	if (test_case_is_thumb)
+		test_code |= 1;
+
+	current_code_start = test_code;
+
+	current_branch_target = 0;
+	if (end_arg->branch_offset != end_arg->end_offset)
+		current_branch_target = test_code + end_arg->branch_offset;
+
+	test_code += end_arg->code_offset;
+	test_before_probe.kprobe.addr = (kprobe_opcode_t *)test_code;
+
+	test_code = next_instruction(test_code);
+	test_case_probe.kprobe.addr = (kprobe_opcode_t *)test_code;
+
+	if (test_case_is_thumb) {
+		u16 *p = (u16 *)(test_code & ~1);
+		current_instruction = p[0];
+		if (is_wide_instruction(current_instruction)) {
+			current_instruction <<= 16;
+			current_instruction |= p[1];
+		}
+	} else {
+		current_instruction = *(u32 *)test_code;
+	}
+
+	if (current_title[0] == '.')
+		verbose("%s\n", current_title);
+	else
+		verbose("%s\t@ %0*x\n", current_title,
+					test_case_is_thumb ? 4 : 8,
+					current_instruction);
+
+	test_code = next_instruction(test_code);
+	test_after_probe.kprobe.addr = (kprobe_opcode_t *)test_code;
+
+	if (kprobe_test_flags & TEST_FLAG_NARROW_INSTR) {
+		if (!test_case_is_thumb ||
+			is_wide_instruction(current_instruction)) {
+				test_case_failed("expected 16-bit instruction");
+				goto fail;
+		}
+	} else {
+		if (test_case_is_thumb &&
+			!is_wide_instruction(current_instruction)) {
+				test_case_failed("expected 32-bit instruction");
+				goto fail;
+		}
+	}
+
+	coverage_add(current_instruction);
+
+	if (end_arg->flags & ARG_FLAG_UNSUPPORTED) {
+		if (register_test_probe(&test_case_probe) < 0)
+			goto pass;
+		test_case_failed("registered probe for unsupported instruction");
+		goto fail;
+	}
+
+	if (end_arg->flags & ARG_FLAG_SUPPORTED) {
+		if (register_test_probe(&test_case_probe) >= 0)
+			goto pass;
+		test_case_failed("couldn't register probe for supported instruction");
+		goto fail;
+	}
+
+	if (register_test_probe(&test_before_probe) < 0) {
+		test_case_failed("register test_before_probe failed");
+		goto fail;
+	}
+	if (register_test_probe(&test_after_probe) < 0) {
+		test_case_failed("register test_after_probe failed");
+		goto fail;
+	}
+	if (current_branch_target) {
+		test_after2_probe.kprobe.addr =
+				(kprobe_opcode_t *)current_branch_target;
+		if (register_test_probe(&test_after2_probe) < 0) {
+			test_case_failed("register test_after2_probe failed");
+			goto fail;
+		}
+	}
+
+	/* Start first run of test case */
+	test_case_run_count = 0;
+	++test_instance;
+	return current_code_start;
+pass:
+	test_case_run_count = TEST_CASE_PASSED;
+	return (uintptr_t)test_after_probe.kprobe.addr;
+fail:
+	test_case_run_count = TEST_CASE_FAILED;
+	return (uintptr_t)test_after_probe.kprobe.addr;
+}
+
+static bool check_test_results(void)
+{
+	size_t mem_size = 0;
+	u32 *mem = 0;
+
+	if (memcmp(&expected_regs, &result_regs, sizeof(expected_regs))) {
+		test_case_failed("registers differ");
+		goto fail;
+	}
+
+	if (memory_needs_checking) {
+		mem = (u32 *)result_regs.ARM_sp;
+		mem_size = expected_memory_size(mem);
+		if (memcmp(expected_memory, mem, mem_size)) {
+			test_case_failed("test memory differs");
+			goto fail;
+		}
+	}
+
+	return true;
+
+fail:
+	pr_err("initial_regs:\n");
+	print_registers(&initial_regs);
+	pr_err("expected_regs:\n");
+	print_registers(&expected_regs);
+	pr_err("result_regs:\n");
+	print_registers(&result_regs);
+
+	if (mem) {
+		pr_err("current_stack=%p\n", current_stack);
+		pr_err("expected_memory:\n");
+		print_memory(expected_memory, mem_size);
+		pr_err("result_memory:\n");
+		print_memory(mem, mem_size);
+	}
+
+	return false;
+}
+
+static uintptr_t __used kprobes_test_case_end(void)
+{
+	if (test_case_run_count < 0) {
+		if (test_case_run_count == TEST_CASE_PASSED)
+			/* kprobes_test_case_start did all the needed testing */
+			goto pass;
+		else
+			/* kprobes_test_case_start failed */
+			goto fail;
+	}
+
+	if (test_before_probe.hit != test_instance) {
+		test_case_failed("test_before_handler not run");
+		goto fail;
+	}
+
+	if (test_after_probe.hit != test_instance &&
+				test_after2_probe.hit != test_instance) {
+		test_case_failed("test_after_handler not run");
+		goto fail;
+	}
+
+	/*
+	 * Even numbered test runs ran without a probe on the test case so
+	 * we can gather reference results. The subsequent odd numbered run
+	 * will have the probe inserted.
+	*/
+	if ((test_case_run_count & 1) == 0) {
+		/* Save results from run without probe */
+		u32 *mem = (u32 *)result_regs.ARM_sp;
+		expected_regs = result_regs;
+		memcpy(expected_memory, mem, expected_memory_size(mem));
+
+		/* Insert probe onto test case instruction */
+		if (register_test_probe(&test_case_probe) < 0) {
+			test_case_failed("register test_case_probe failed");
+			goto fail;
+		}
+	} else {
+		/* Check probe ran as expected */
+		if (probe_should_run == 1) {
+			if (test_case_probe.hit != test_instance) {
+				test_case_failed("test_case_handler not run");
+				goto fail;
+			}
+		} else if (probe_should_run == 0) {
+			if (test_case_probe.hit == test_instance) {
+				test_case_failed("test_case_handler ran");
+				goto fail;
+			}
+		}
+
+		/* Remove probe for any subsequent reference run */
+		unregister_test_probe(&test_case_probe);
+
+		if (!check_test_results())
+			goto fail;
+
+		if (is_last_scenario)
+			goto pass;
+	}
+
+	/* Do next test run */
+	++test_case_run_count;
+	++test_instance;
+	return current_code_start;
+fail:
+	++test_fail_count;
+	goto end;
+pass:
+	++test_pass_count;
+end:
+	test_case_cleanup();
+	return 0;
+}
+
+
+/*
+ * Top level test functions
+ */
+
+static int run_test_cases(void (*tests)(void), const union decode_item *table)
+{
+	int ret;
+
+	pr_info("    Check decoding tables\n");
+	ret = table_test(table);
+	if (ret)
+		return ret;
+
+	pr_info("    Run test cases\n");
+	ret = coverage_start(table);
+	if (ret)
+		return ret;
+
+	tests();
+
+	coverage_end();
+	return 0;
+}
+
+
+static int __init run_all_tests(void)
+{
+	int ret = 0;
+
+	pr_info("Begining kprobe tests...\n");
+
+#ifndef CONFIG_THUMB2_KERNEL
+
+	pr_info("Probe ARM code\n");
+	ret = run_api_tests(arm_func);
+	if (ret)
+		goto out;
+
+	pr_info("ARM instruction simulation\n");
+	ret = run_test_cases(kprobe_arm_test_cases, kprobe_decode_arm_table);
+	if (ret)
+		goto out;
+
+#else /* CONFIG_THUMB2_KERNEL */
+
+	pr_info("Probe 16-bit Thumb code\n");
+	ret = run_api_tests(thumb16_func);
+	if (ret)
+		goto out;
+
+	pr_info("Probe 32-bit Thumb code, even halfword\n");
+	ret = run_api_tests(thumb32even_func);
+	if (ret)
+		goto out;
+
+	pr_info("Probe 32-bit Thumb code, odd halfword\n");
+	ret = run_api_tests(thumb32odd_func);
+	if (ret)
+		goto out;
+
+	pr_info("16-bit Thumb instruction simulation\n");
+	ret = run_test_cases(kprobe_thumb16_test_cases,
+				kprobe_decode_thumb16_table);
+	if (ret)
+		goto out;
+
+	pr_info("32-bit Thumb instruction simulation\n");
+	ret = run_test_cases(kprobe_thumb32_test_cases,
+				kprobe_decode_thumb32_table);
+	if (ret)
+		goto out;
+#endif
+
+	pr_info("Total instruction simulation tests=%d, pass=%d fail=%d\n",
+		test_try_count, test_pass_count, test_fail_count);
+	if (test_fail_count) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+#if BENCHMARKING
+	pr_info("Benchmarks\n");
+	ret = run_benchmarks();
+	if (ret)
+		goto out;
+#endif
+
+#if __LINUX_ARM_ARCH__ >= 7
+	/* We are able to run all test cases so coverage should be complete */
+	if (coverage_fail) {
+		pr_err("FAIL: Test coverage checks failed\n");
+		ret = -EINVAL;
+		goto out;
+	}
+#endif
+
+out:
+	if (ret == 0)
+		pr_info("Finished kprobe tests OK\n");
+	else
+		pr_err("kprobe tests failed\n");
+
+	return ret;
+}
+
+
+/*
+ * Module setup
+ */
+
+#ifdef MODULE
+
+static void __exit kprobe_test_exit(void)
+{
+}
+
+module_init(run_all_tests)
+module_exit(kprobe_test_exit)
+MODULE_LICENSE("GPL");
+
+#else /* !MODULE */
+
+late_initcall(run_all_tests);
+
+#endif
diff --git a/arch/arm/kernel/kprobes-test.h b/arch/arm/kernel/kprobes-test.h
new file mode 100644
index 000000000000..0dc5d77b9356
--- /dev/null
+++ b/arch/arm/kernel/kprobes-test.h
@@ -0,0 +1,392 @@
+/*
+ * arch/arm/kernel/kprobes-test.h
+ *
+ * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define VERBOSE 0 /* Set to '1' for more logging of test cases */
+
+#ifdef CONFIG_THUMB2_KERNEL
+#define NORMAL_ISA "16"
+#else
+#define NORMAL_ISA "32"
+#endif
+
+
+/* Flags used in kprobe_test_flags */
+#define TEST_FLAG_NO_ITBLOCK	(1<<0)
+#define TEST_FLAG_FULL_ITBLOCK	(1<<1)
+#define TEST_FLAG_NARROW_INSTR	(1<<2)
+
+extern int kprobe_test_flags;
+extern int kprobe_test_cc_position;
+
+
+#define TEST_MEMORY_SIZE 256
+
+
+/*
+ * Test case structures.
+ *
+ * The arguments given to test cases can be one of three types.
+ *
+ *   ARG_TYPE_REG
+ *	Load a register with the given value.
+ *
+ *   ARG_TYPE_PTR
+ *	Load a register with a pointer into the stack buffer (SP + given value).
+ *
+ *   ARG_TYPE_MEM
+ *	Store the given value into the stack buffer at [SP+index].
+ *
+ */
+
+#define	ARG_TYPE_END	0
+#define	ARG_TYPE_REG	1
+#define	ARG_TYPE_PTR	2
+#define	ARG_TYPE_MEM	3
+
+#define ARG_FLAG_UNSUPPORTED	0x01
+#define ARG_FLAG_SUPPORTED	0x02
+#define ARG_FLAG_THUMB		0x10	/* Must be 16 so TEST_ISA can be used */
+#define ARG_FLAG_ARM		0x20	/* Must be 32 so TEST_ISA can be used */
+
+struct test_arg {
+	u8	type;		/* ARG_TYPE_x */
+	u8	_padding[7];
+};
+
+struct test_arg_regptr {
+	u8	type;		/* ARG_TYPE_REG or ARG_TYPE_PTR */
+	u8	reg;
+	u8	_padding[2];
+	u32	val;
+};
+
+struct test_arg_mem {
+	u8	type;		/* ARG_TYPE_MEM */
+	u8	index;
+	u8	_padding[2];
+	u32	val;
+};
+
+struct test_arg_end {
+	u8	type;		/* ARG_TYPE_END */
+	u8	flags;		/* ARG_FLAG_x */
+	u16	code_offset;
+	u16	branch_offset;
+	u16	end_offset;
+};
+
+
+/*
+ * Building blocks for test cases.
+ *
+ * Each test case is wrapped between TESTCASE_START and TESTCASE_END.
+ *
+ * To specify arguments for a test case the TEST_ARG_{REG,PTR,MEM} macros are
+ * used followed by a terminating TEST_ARG_END.
+ *
+ * After this, the instruction to be tested is defined with TEST_INSTRUCTION.
+ * Or for branches, TEST_BRANCH_B and TEST_BRANCH_F (branch forwards/backwards).
+ *
+ * Some specific test cases may make use of other custom constructs.
+ */
+
+#if VERBOSE
+#define verbose(fmt, ...) pr_info(fmt, ##__VA_ARGS__)
+#else
+#define verbose(fmt, ...)
+#endif
+
+#define TEST_GROUP(title)					\
+	verbose("\n");						\
+	verbose(title"\n");					\
+	verbose("---------------------------------------------------------\n");
+
+#define TESTCASE_START(title)					\
+	__asm__ __volatile__ (					\
+	"bl	__kprobes_test_case_start		\n\t"	\
+	/* don't use .asciz here as 'title' may be */		\
+	/* multiple strings to be concatenated.  */		\
+	".ascii "#title"				\n\t"	\
+	".byte	0					\n\t"	\
+	".align	2					\n\t"
+
+#define	TEST_ARG_REG(reg, val)					\
+	".byte	"__stringify(ARG_TYPE_REG)"		\n\t"	\
+	".byte	"#reg"					\n\t"	\
+	".short	0					\n\t"	\
+	".word	"#val"					\n\t"
+
+#define	TEST_ARG_PTR(reg, val)					\
+	".byte	"__stringify(ARG_TYPE_PTR)"		\n\t"	\
+	".byte	"#reg"					\n\t"	\
+	".short	0					\n\t"	\
+	".word	"#val"					\n\t"
+
+#define	TEST_ARG_MEM(index, val)				\
+	".byte	"__stringify(ARG_TYPE_MEM)"		\n\t"	\
+	".byte	"#index"				\n\t"	\
+	".short	0					\n\t"	\
+	".word	"#val"					\n\t"
+
+#define	TEST_ARG_END(flags)					\
+	".byte	"__stringify(ARG_TYPE_END)"		\n\t"	\
+	".byte	"TEST_ISA flags"			\n\t"	\
+	".short	50f-0f					\n\t"	\
+	".short	2f-0f					\n\t"	\
+	".short	99f-0f					\n\t"	\
+	".code "TEST_ISA"				\n\t"	\
+	"0:						\n\t"
+
+#define TEST_INSTRUCTION(instruction)				\
+	"50:	nop					\n\t"	\
+	"1:	"instruction"				\n\t"	\
+	"	nop					\n\t"
+
+#define TEST_BRANCH_F(instruction, xtra_dist)			\
+	TEST_INSTRUCTION(instruction)				\
+	".if "#xtra_dist"				\n\t"	\
+	"	b	99f				\n\t"	\
+	".space "#xtra_dist"				\n\t"	\
+	".endif						\n\t"	\
+	"	b	99f				\n\t"	\
+	"2:	nop					\n\t"
+
+#define TEST_BRANCH_B(instruction, xtra_dist)			\
+	"	b	50f				\n\t"	\
+	"	b	99f				\n\t"	\
+	"2:	nop					\n\t"	\
+	"	b	99f				\n\t"	\
+	".if "#xtra_dist"				\n\t"	\
+	".space "#xtra_dist"				\n\t"	\
+	".endif						\n\t"	\
+	TEST_INSTRUCTION(instruction)
+
+#define TESTCASE_END						\
+	"2:						\n\t"	\
+	"99:						\n\t"	\
+	"	bl __kprobes_test_case_end_"TEST_ISA"	\n\t"	\
+	".code "NORMAL_ISA"				\n\t"	\
+	: :							\
+	: "r0", "r1", "r2", "r3", "ip", "lr", "memory", "cc"	\
+	);
+
+
+/*
+ * Macros to define test cases.
+ *
+ * Those of the form TEST_{R,P,M}* can be used to define test cases
+ * which take combinations of the three basic types of arguments. E.g.
+ *
+ *   TEST_R	One register argument
+ *   TEST_RR	Two register arguments
+ *   TEST_RPR	A register, a pointer, then a register argument
+ *
+ * For testing instructions which may branch, there are macros TEST_BF_*
+ * and TEST_BB_* for branching forwards and backwards.
+ *
+ * TEST_SUPPORTED and TEST_UNSUPPORTED don't cause the code to be executed,
+ * the just verify that a kprobe is or is not allowed on the given instruction.
+ */
+
+#define TEST(code)				\
+	TESTCASE_START(code)			\
+	TEST_ARG_END("")			\
+	TEST_INSTRUCTION(code)			\
+	TESTCASE_END
+
+#define TEST_UNSUPPORTED(code)					\
+	TESTCASE_START(code)					\
+	TEST_ARG_END("|"__stringify(ARG_FLAG_UNSUPPORTED))	\
+	TEST_INSTRUCTION(code)					\
+	TESTCASE_END
+
+#define TEST_SUPPORTED(code)					\
+	TESTCASE_START(code)					\
+	TEST_ARG_END("|"__stringify(ARG_FLAG_SUPPORTED))	\
+	TEST_INSTRUCTION(code)					\
+	TESTCASE_END
+
+#define TEST_R(code1, reg, val, code2)			\
+	TESTCASE_START(code1 #reg code2)		\
+	TEST_ARG_REG(reg, val)				\
+	TEST_ARG_END("")				\
+	TEST_INSTRUCTION(code1 #reg code2)		\
+	TESTCASE_END
+
+#define TEST_RR(code1, reg1, val1, code2, reg2, val2, code3)	\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3)		\
+	TEST_ARG_REG(reg1, val1)				\
+	TEST_ARG_REG(reg2, val2)				\
+	TEST_ARG_END("")					\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3)		\
+	TESTCASE_END
+
+#define TEST_RRR(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4)\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TEST_ARG_REG(reg1, val1)						\
+	TEST_ARG_REG(reg2, val2)						\
+	TEST_ARG_REG(reg3, val3)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TESTCASE_END
+
+#define TEST_RRRR(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4, reg4, val4)	\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4 #reg4)		\
+	TEST_ARG_REG(reg1, val1)						\
+	TEST_ARG_REG(reg2, val2)						\
+	TEST_ARG_REG(reg3, val3)						\
+	TEST_ARG_REG(reg4, val4)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4 #reg4)	\
+	TESTCASE_END
+
+#define TEST_P(code1, reg1, val1, code2)	\
+	TESTCASE_START(code1 #reg1 code2)	\
+	TEST_ARG_PTR(reg1, val1)		\
+	TEST_ARG_END("")			\
+	TEST_INSTRUCTION(code1 #reg1 code2)	\
+	TESTCASE_END
+
+#define TEST_PR(code1, reg1, val1, code2, reg2, val2, code3)	\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3)		\
+	TEST_ARG_PTR(reg1, val1)				\
+	TEST_ARG_REG(reg2, val2)				\
+	TEST_ARG_END("")					\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3)		\
+	TESTCASE_END
+
+#define TEST_RP(code1, reg1, val1, code2, reg2, val2, code3)	\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3)		\
+	TEST_ARG_REG(reg1, val1)				\
+	TEST_ARG_PTR(reg2, val2)				\
+	TEST_ARG_END("")					\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3)		\
+	TESTCASE_END
+
+#define TEST_PRR(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4)\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TEST_ARG_PTR(reg1, val1)						\
+	TEST_ARG_REG(reg2, val2)						\
+	TEST_ARG_REG(reg3, val3)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TESTCASE_END
+
+#define TEST_RPR(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4)\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TEST_ARG_REG(reg1, val1)						\
+	TEST_ARG_PTR(reg2, val2)						\
+	TEST_ARG_REG(reg3, val3)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TESTCASE_END
+
+#define TEST_RRP(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4)\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TEST_ARG_REG(reg1, val1)						\
+	TEST_ARG_REG(reg2, val2)						\
+	TEST_ARG_PTR(reg3, val3)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TESTCASE_END
+
+#define TEST_BF_P(code1, reg1, val1, code2)	\
+	TESTCASE_START(code1 #reg1 code2)	\
+	TEST_ARG_PTR(reg1, val1)		\
+	TEST_ARG_END("")			\
+	TEST_BRANCH_F(code1 #reg1 code2, 0)	\
+	TESTCASE_END
+
+#define TEST_BF_X(code, xtra_dist)		\
+	TESTCASE_START(code)			\
+	TEST_ARG_END("")			\
+	TEST_BRANCH_F(code, xtra_dist)		\
+	TESTCASE_END
+
+#define TEST_BB_X(code, xtra_dist)		\
+	TESTCASE_START(code)			\
+	TEST_ARG_END("")			\
+	TEST_BRANCH_B(code, xtra_dist)		\
+	TESTCASE_END
+
+#define TEST_BF_RX(code1, reg, val, code2, xtra_dist)	\
+	TESTCASE_START(code1 #reg code2)		\
+	TEST_ARG_REG(reg, val)				\
+	TEST_ARG_END("")				\
+	TEST_BRANCH_F(code1 #reg code2, xtra_dist)	\
+	TESTCASE_END
+
+#define TEST_BB_RX(code1, reg, val, code2, xtra_dist)	\
+	TESTCASE_START(code1 #reg code2)		\
+	TEST_ARG_REG(reg, val)				\
+	TEST_ARG_END("")				\
+	TEST_BRANCH_B(code1 #reg code2, xtra_dist)	\
+	TESTCASE_END
+
+#define TEST_BF(code)	TEST_BF_X(code, 0)
+#define TEST_BB(code)	TEST_BB_X(code, 0)
+
+#define TEST_BF_R(code1, reg, val, code2) TEST_BF_RX(code1, reg, val, code2, 0)
+#define TEST_BB_R(code1, reg, val, code2) TEST_BB_RX(code1, reg, val, code2, 0)
+
+#define TEST_BF_RR(code1, reg1, val1, code2, reg2, val2, code3)	\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3)		\
+	TEST_ARG_REG(reg1, val1)				\
+	TEST_ARG_REG(reg2, val2)				\
+	TEST_ARG_END("")					\
+	TEST_BRANCH_F(code1 #reg1 code2 #reg2 code3, 0)		\
+	TESTCASE_END
+
+#define TEST_X(code, codex)			\
+	TESTCASE_START(code)			\
+	TEST_ARG_END("")			\
+	TEST_INSTRUCTION(code)			\
+	"	b	99f		\n\t"	\
+	"	"codex"			\n\t"	\
+	TESTCASE_END
+
+#define TEST_RX(code1, reg, val, code2, codex)		\
+	TESTCASE_START(code1 #reg code2)		\
+	TEST_ARG_REG(reg, val)				\
+	TEST_ARG_END("")				\
+	TEST_INSTRUCTION(code1 __stringify(reg) code2)	\
+	"	b	99f		\n\t"		\
+	"	"codex"			\n\t"		\
+	TESTCASE_END
+
+#define TEST_RRX(code1, reg1, val1, code2, reg2, val2, code3, codex)		\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3)				\
+	TEST_ARG_REG(reg1, val1)						\
+	TEST_ARG_REG(reg2, val2)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 __stringify(reg1) code2 __stringify(reg2) code3)	\
+	"	b	99f		\n\t"					\
+	"	"codex"			\n\t"					\
+	TESTCASE_END
+
+
+/* Various values used in test cases... */
+#define N(val)	(val ^ 0xffffffff)
+#define VAL1	0x12345678
+#define VAL2	N(VAL1)
+#define VAL3	0xa5f801
+#define VAL4	N(VAL3)
+#define VALM	0x456789ab
+#define VALR	0xdeaddead
+#define HH1	0x0123fecb
+#define HH2	0xa9874567
+
+
+#ifdef CONFIG_THUMB2_KERNEL
+void kprobe_thumb16_test_cases(void);
+void kprobe_thumb32_test_cases(void);
+#else
+void kprobe_arm_test_cases(void);
+#endif
diff --git a/arch/arm/kernel/kprobes-thumb.c b/arch/arm/kernel/kprobes-thumb.c
index 902ca59e8b11..8f96ec778e8d 100644
--- a/arch/arm/kernel/kprobes-thumb.c
+++ b/arch/arm/kernel/kprobes-thumb.c
@@ -10,6 +10,7 @@
 
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
+#include <linux/module.h>
 
 #include "kprobes.h"
 
@@ -943,6 +944,9 @@ const union decode_item kprobe_decode_thumb32_table[] = {
 	 */
 	DECODE_END
 };
+#ifdef CONFIG_ARM_KPROBES_TEST_MODULE
+EXPORT_SYMBOL_GPL(kprobe_decode_thumb32_table);
+#endif
 
 static void __kprobes
 t16_simulate_bxblx(struct kprobe *p, struct pt_regs *regs)
@@ -1423,6 +1427,9 @@ const union decode_item kprobe_decode_thumb16_table[] = {
 
 	DECODE_END
 };
+#ifdef CONFIG_ARM_KPROBES_TEST_MODULE
+EXPORT_SYMBOL_GPL(kprobe_decode_thumb16_table);
+#endif
 
 static unsigned long __kprobes thumb_check_cc(unsigned long cpsr)
 {
diff --git a/arch/arm/kernel/kprobes.h b/arch/arm/kernel/kprobes.h
index a6aeda0a6c7f..38945f78f9f1 100644
--- a/arch/arm/kernel/kprobes.h
+++ b/arch/arm/kernel/kprobes.h
@@ -413,6 +413,14 @@ struct decode_reject {
 	DECODE_HEADER(DECODE_TYPE_REJECT, _mask, _value, 0)
 
 
+#ifdef CONFIG_THUMB2_KERNEL
+extern const union decode_item kprobe_decode_thumb16_table[];
+extern const union decode_item kprobe_decode_thumb32_table[];
+#else
+extern const union decode_item kprobe_decode_arm_table[];
+#endif
+
+
 int kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi,
 			const union decode_item *table, bool thumb16);
 
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 53c9c2610cbc..e6e5d7c84f1a 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -12,6 +12,7 @@
  */
 #define pr_fmt(fmt) "hw perfevents: " fmt
 
+#include <linux/bitmap.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -26,16 +27,8 @@
 #include <asm/pmu.h>
 #include <asm/stacktrace.h>
 
-static struct platform_device *pmu_device;
-
-/*
- * Hardware lock to serialize accesses to PMU registers. Needed for the
- * read/modify/write sequences.
- */
-static DEFINE_RAW_SPINLOCK(pmu_lock);
-
 /*
- * ARMv6 supports a maximum of 3 events, starting from index 1. If we add
+ * ARMv6 supports a maximum of 3 events, starting from index 0. If we add
  * another platform that supports more, we need to increase this to be the
  * largest of all platforms.
  *
@@ -43,62 +36,24 @@ static DEFINE_RAW_SPINLOCK(pmu_lock);
  *  cycle counter CCNT + 31 events counters CNT0..30.
  *  Cortex-A8 has 1+4 counters, Cortex-A9 has 1+6 counters.
  */
-#define ARMPMU_MAX_HWEVENTS		33
+#define ARMPMU_MAX_HWEVENTS		32
 
-/* The events for a given CPU. */
-struct cpu_hw_events {
-	/*
-	 * The events that are active on the CPU for the given index. Index 0
-	 * is reserved.
-	 */
-	struct perf_event	*events[ARMPMU_MAX_HWEVENTS];
-
-	/*
-	 * A 1 bit for an index indicates that the counter is being used for
-	 * an event. A 0 means that the counter can be used.
-	 */
-	unsigned long		used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
+static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
+static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
+static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
 
-	/*
-	 * A 1 bit for an index indicates that the counter is actively being
-	 * used.
-	 */
-	unsigned long		active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
-};
-static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
-
-struct arm_pmu {
-	enum arm_perf_pmu_ids id;
-	const char	*name;
-	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
-	void		(*enable)(struct hw_perf_event *evt, int idx);
-	void		(*disable)(struct hw_perf_event *evt, int idx);
-	int		(*get_event_idx)(struct cpu_hw_events *cpuc,
-					 struct hw_perf_event *hwc);
-	u32		(*read_counter)(int idx);
-	void		(*write_counter)(int idx, u32 val);
-	void		(*start)(void);
-	void		(*stop)(void);
-	void		(*reset)(void *);
-	const unsigned	(*cache_map)[PERF_COUNT_HW_CACHE_MAX]
-				    [PERF_COUNT_HW_CACHE_OP_MAX]
-				    [PERF_COUNT_HW_CACHE_RESULT_MAX];
-	const unsigned	(*event_map)[PERF_COUNT_HW_MAX];
-	u32		raw_event_mask;
-	int		num_events;
-	u64		max_period;
-};
+#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
 
 /* Set at runtime when we know what CPU type we are. */
-static const struct arm_pmu *armpmu;
+static struct arm_pmu *cpu_pmu;
 
 enum arm_perf_pmu_ids
 armpmu_get_pmu_id(void)
 {
 	int id = -ENODEV;
 
-	if (armpmu != NULL)
-		id = armpmu->id;
+	if (cpu_pmu != NULL)
+		id = cpu_pmu->id;
 
 	return id;
 }
@@ -109,8 +64,8 @@ armpmu_get_max_events(void)
 {
 	int max_events = 0;
 
-	if (armpmu != NULL)
-		max_events = armpmu->num_events;
+	if (cpu_pmu != NULL)
+		max_events = cpu_pmu->num_events;
 
 	return max_events;
 }
@@ -130,7 +85,11 @@ EXPORT_SYMBOL_GPL(perf_num_counters);
 #define CACHE_OP_UNSUPPORTED		0xFFFF
 
 static int
-armpmu_map_cache_event(u64 config)
+armpmu_map_cache_event(const unsigned (*cache_map)
+				      [PERF_COUNT_HW_CACHE_MAX]
+				      [PERF_COUNT_HW_CACHE_OP_MAX]
+				      [PERF_COUNT_HW_CACHE_RESULT_MAX],
+		       u64 config)
 {
 	unsigned int cache_type, cache_op, cache_result, ret;
 
@@ -146,7 +105,7 @@ armpmu_map_cache_event(u64 config)
 	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
 		return -EINVAL;
 
-	ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result];
+	ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
 
 	if (ret == CACHE_OP_UNSUPPORTED)
 		return -ENOENT;
@@ -155,23 +114,46 @@ armpmu_map_cache_event(u64 config)
 }
 
 static int
-armpmu_map_event(u64 config)
+armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
 {
-	int mapping = (*armpmu->event_map)[config];
-	return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping;
+	int mapping = (*event_map)[config];
+	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
 }
 
 static int
-armpmu_map_raw_event(u64 config)
+armpmu_map_raw_event(u32 raw_event_mask, u64 config)
 {
-	return (int)(config & armpmu->raw_event_mask);
+	return (int)(config & raw_event_mask);
 }
 
-static int
+static int map_cpu_event(struct perf_event *event,
+			 const unsigned (*event_map)[PERF_COUNT_HW_MAX],
+			 const unsigned (*cache_map)
+					[PERF_COUNT_HW_CACHE_MAX]
+					[PERF_COUNT_HW_CACHE_OP_MAX]
+					[PERF_COUNT_HW_CACHE_RESULT_MAX],
+			 u32 raw_event_mask)
+{
+	u64 config = event->attr.config;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+		return armpmu_map_event(event_map, config);
+	case PERF_TYPE_HW_CACHE:
+		return armpmu_map_cache_event(cache_map, config);
+	case PERF_TYPE_RAW:
+		return armpmu_map_raw_event(raw_event_mask, config);
+	}
+
+	return -ENOENT;
+}
+
+int
 armpmu_event_set_period(struct perf_event *event,
 			struct hw_perf_event *hwc,
 			int idx)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	s64 left = local64_read(&hwc->period_left);
 	s64 period = hwc->sample_period;
 	int ret = 0;
@@ -202,11 +184,12 @@ armpmu_event_set_period(struct perf_event *event,
 	return ret;
 }
 
-static u64
+u64
 armpmu_event_update(struct perf_event *event,
 		    struct hw_perf_event *hwc,
 		    int idx, int overflow)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	u64 delta, prev_raw_count, new_raw_count;
 
 again:
@@ -246,11 +229,9 @@ armpmu_read(struct perf_event *event)
 static void
 armpmu_stop(struct perf_event *event, int flags)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 
-	if (!armpmu)
-		return;
-
 	/*
 	 * ARM pmu always has to update the counter, so ignore
 	 * PERF_EF_UPDATE, see comments in armpmu_start().
@@ -266,11 +247,9 @@ armpmu_stop(struct perf_event *event, int flags)
 static void
 armpmu_start(struct perf_event *event, int flags)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 
-	if (!armpmu)
-		return;
-
 	/*
 	 * ARM pmu always has to reprogram the period, so ignore
 	 * PERF_EF_RELOAD, see the comment below.
@@ -293,16 +272,16 @@ armpmu_start(struct perf_event *event, int flags)
 static void
 armpmu_del(struct perf_event *event, int flags)
 {
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 
 	WARN_ON(idx < 0);
 
-	clear_bit(idx, cpuc->active_mask);
 	armpmu_stop(event, PERF_EF_UPDATE);
-	cpuc->events[idx] = NULL;
-	clear_bit(idx, cpuc->used_mask);
+	hw_events->events[idx] = NULL;
+	clear_bit(idx, hw_events->used_mask);
 
 	perf_event_update_userpage(event);
 }
@@ -310,7 +289,8 @@ armpmu_del(struct perf_event *event, int flags)
 static int
 armpmu_add(struct perf_event *event, int flags)
 {
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
 	struct hw_perf_event *hwc = &event->hw;
 	int idx;
 	int err = 0;
@@ -318,7 +298,7 @@ armpmu_add(struct perf_event *event, int flags)
 	perf_pmu_disable(event->pmu);
 
 	/* If we don't have a space for the counter then finish early. */
-	idx = armpmu->get_event_idx(cpuc, hwc);
+	idx = armpmu->get_event_idx(hw_events, hwc);
 	if (idx < 0) {
 		err = idx;
 		goto out;
@@ -330,8 +310,7 @@ armpmu_add(struct perf_event *event, int flags)
 	 */
 	event->hw.idx = idx;
 	armpmu->disable(hwc, idx);
-	cpuc->events[idx] = event;
-	set_bit(idx, cpuc->active_mask);
+	hw_events->events[idx] = event;
 
 	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
 	if (flags & PERF_EF_START)
@@ -345,25 +324,25 @@ out:
 	return err;
 }
 
-static struct pmu pmu;
-
 static int
-validate_event(struct cpu_hw_events *cpuc,
+validate_event(struct pmu_hw_events *hw_events,
 	       struct perf_event *event)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event fake_event = event->hw;
+	struct pmu *leader_pmu = event->group_leader->pmu;
 
-	if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF)
+	if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF)
 		return 1;
 
-	return armpmu->get_event_idx(cpuc, &fake_event) >= 0;
+	return armpmu->get_event_idx(hw_events, &fake_event) >= 0;
 }
 
 static int
 validate_group(struct perf_event *event)
 {
 	struct perf_event *sibling, *leader = event->group_leader;
-	struct cpu_hw_events fake_pmu;
+	struct pmu_hw_events fake_pmu;
 
 	memset(&fake_pmu, 0, sizeof(fake_pmu));
 
@@ -383,110 +362,119 @@ validate_group(struct perf_event *event)
 
 static irqreturn_t armpmu_platform_irq(int irq, void *dev)
 {
-	struct arm_pmu_platdata *plat = dev_get_platdata(&pmu_device->dev);
+	struct arm_pmu *armpmu = (struct arm_pmu *) dev;
+	struct platform_device *plat_device = armpmu->plat_device;
+	struct arm_pmu_platdata *plat = dev_get_platdata(&plat_device->dev);
 
 	return plat->handle_irq(irq, dev, armpmu->handle_irq);
 }
 
+static void
+armpmu_release_hardware(struct arm_pmu *armpmu)
+{
+	int i, irq, irqs;
+	struct platform_device *pmu_device = armpmu->plat_device;
+
+	irqs = min(pmu_device->num_resources, num_possible_cpus());
+
+	for (i = 0; i < irqs; ++i) {
+		if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs))
+			continue;
+		irq = platform_get_irq(pmu_device, i);
+		if (irq >= 0)
+			free_irq(irq, armpmu);
+	}
+
+	release_pmu(armpmu->type);
+}
+
 static int
-armpmu_reserve_hardware(void)
+armpmu_reserve_hardware(struct arm_pmu *armpmu)
 {
 	struct arm_pmu_platdata *plat;
 	irq_handler_t handle_irq;
-	int i, err = -ENODEV, irq;
+	int i, err, irq, irqs;
+	struct platform_device *pmu_device = armpmu->plat_device;
 
-	pmu_device = reserve_pmu(ARM_PMU_DEVICE_CPU);
-	if (IS_ERR(pmu_device)) {
+	err = reserve_pmu(armpmu->type);
+	if (err) {
 		pr_warning("unable to reserve pmu\n");
-		return PTR_ERR(pmu_device);
+		return err;
 	}
 
-	init_pmu(ARM_PMU_DEVICE_CPU);
-
 	plat = dev_get_platdata(&pmu_device->dev);
 	if (plat && plat->handle_irq)
 		handle_irq = armpmu_platform_irq;
 	else
 		handle_irq = armpmu->handle_irq;
 
-	if (pmu_device->num_resources < 1) {
+	irqs = min(pmu_device->num_resources, num_possible_cpus());
+	if (irqs < 1) {
 		pr_err("no irqs for PMUs defined\n");
 		return -ENODEV;
 	}
 
-	for (i = 0; i < pmu_device->num_resources; ++i) {
+	for (i = 0; i < irqs; ++i) {
+		err = 0;
 		irq = platform_get_irq(pmu_device, i);
 		if (irq < 0)
 			continue;
 
+		/*
+		 * If we have a single PMU interrupt that we can't shift,
+		 * assume that we're running on a uniprocessor machine and
+		 * continue. Otherwise, continue without this interrupt.
+		 */
+		if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
+			pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
+				    irq, i);
+			continue;
+		}
+
 		err = request_irq(irq, handle_irq,
 				  IRQF_DISABLED | IRQF_NOBALANCING,
-				  "armpmu", NULL);
+				  "arm-pmu", armpmu);
 		if (err) {
-			pr_warning("unable to request IRQ%d for ARM perf "
-				"counters\n", irq);
-			break;
+			pr_err("unable to request IRQ%d for ARM PMU counters\n",
+				irq);
+			armpmu_release_hardware(armpmu);
+			return err;
 		}
-	}
 
-	if (err) {
-		for (i = i - 1; i >= 0; --i) {
-			irq = platform_get_irq(pmu_device, i);
-			if (irq >= 0)
-				free_irq(irq, NULL);
-		}
-		release_pmu(ARM_PMU_DEVICE_CPU);
-		pmu_device = NULL;
+		cpumask_set_cpu(i, &armpmu->active_irqs);
 	}
 
-	return err;
+	return 0;
 }
 
 static void
-armpmu_release_hardware(void)
+hw_perf_event_destroy(struct perf_event *event)
 {
-	int i, irq;
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	atomic_t *active_events	 = &armpmu->active_events;
+	struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex;
 
-	for (i = pmu_device->num_resources - 1; i >= 0; --i) {
-		irq = platform_get_irq(pmu_device, i);
-		if (irq >= 0)
-			free_irq(irq, NULL);
+	if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) {
+		armpmu_release_hardware(armpmu);
+		mutex_unlock(pmu_reserve_mutex);
 	}
-	armpmu->stop();
-
-	release_pmu(ARM_PMU_DEVICE_CPU);
-	pmu_device = NULL;
 }
 
-static atomic_t active_events = ATOMIC_INIT(0);
-static DEFINE_MUTEX(pmu_reserve_mutex);
-
-static void
-hw_perf_event_destroy(struct perf_event *event)
+static int
+event_requires_mode_exclusion(struct perf_event_attr *attr)
 {
-	if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
-		armpmu_release_hardware();
-		mutex_unlock(&pmu_reserve_mutex);
-	}
+	return attr->exclude_idle || attr->exclude_user ||
+	       attr->exclude_kernel || attr->exclude_hv;
 }
 
 static int
 __hw_perf_event_init(struct perf_event *event)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 	int mapping, err;
 
-	/* Decode the generic type into an ARM event identifier. */
-	if (PERF_TYPE_HARDWARE == event->attr.type) {
-		mapping = armpmu_map_event(event->attr.config);
-	} else if (PERF_TYPE_HW_CACHE == event->attr.type) {
-		mapping = armpmu_map_cache_event(event->attr.config);
-	} else if (PERF_TYPE_RAW == event->attr.type) {
-		mapping = armpmu_map_raw_event(event->attr.config);
-	} else {
-		pr_debug("event type %x not supported\n", event->attr.type);
-		return -EOPNOTSUPP;
-	}
+	mapping = armpmu->map_event(event);
 
 	if (mapping < 0) {
 		pr_debug("event %x:%llx not supported\n", event->attr.type,
@@ -495,34 +483,31 @@ __hw_perf_event_init(struct perf_event *event)
 	}
 
 	/*
+	 * We don't assign an index until we actually place the event onto
+	 * hardware. Use -1 to signify that we haven't decided where to put it
+	 * yet. For SMP systems, each core has it's own PMU so we can't do any
+	 * clever allocation or constraints checking at this point.
+	 */
+	hwc->idx		= -1;
+	hwc->config_base	= 0;
+	hwc->config		= 0;
+	hwc->event_base		= 0;
+
+	/*
 	 * Check whether we need to exclude the counter from certain modes.
-	 * The ARM performance counters are on all of the time so if someone
-	 * has asked us for some excludes then we have to fail.
 	 */
-	if (event->attr.exclude_kernel || event->attr.exclude_user ||
-	    event->attr.exclude_hv || event->attr.exclude_idle) {
+	if ((!armpmu->set_event_filter ||
+	     armpmu->set_event_filter(hwc, &event->attr)) &&
+	     event_requires_mode_exclusion(&event->attr)) {
 		pr_debug("ARM performance counters do not support "
 			 "mode exclusion\n");
 		return -EPERM;
 	}
 
 	/*
-	 * We don't assign an index until we actually place the event onto
-	 * hardware. Use -1 to signify that we haven't decided where to put it
-	 * yet. For SMP systems, each core has it's own PMU so we can't do any
-	 * clever allocation or constraints checking at this point.
+	 * Store the event encoding into the config_base field.
 	 */
-	hwc->idx = -1;
-
-	/*
-	 * Store the event encoding into the config_base field. config and
-	 * event_base are unused as the only 2 things we need to know are
-	 * the event mapping and the counter to use. The counter to use is
-	 * also the indx and the config_base is the event type.
-	 */
-	hwc->config_base	    = (unsigned long)mapping;
-	hwc->config		    = 0;
-	hwc->event_base		    = 0;
+	hwc->config_base	    |= (unsigned long)mapping;
 
 	if (!hwc->sample_period) {
 		hwc->sample_period  = armpmu->max_period;
@@ -542,32 +527,23 @@ __hw_perf_event_init(struct perf_event *event)
 
 static int armpmu_event_init(struct perf_event *event)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	int err = 0;
+	atomic_t *active_events = &armpmu->active_events;
 
-	switch (event->attr.type) {
-	case PERF_TYPE_RAW:
-	case PERF_TYPE_HARDWARE:
-	case PERF_TYPE_HW_CACHE:
-		break;
-
-	default:
+	if (armpmu->map_event(event) == -ENOENT)
 		return -ENOENT;
-	}
-
-	if (!armpmu)
-		return -ENODEV;
 
 	event->destroy = hw_perf_event_destroy;
 
-	if (!atomic_inc_not_zero(&active_events)) {
-		mutex_lock(&pmu_reserve_mutex);
-		if (atomic_read(&active_events) == 0) {
-			err = armpmu_reserve_hardware();
-		}
+	if (!atomic_inc_not_zero(active_events)) {
+		mutex_lock(&armpmu->reserve_mutex);
+		if (atomic_read(active_events) == 0)
+			err = armpmu_reserve_hardware(armpmu);
 
 		if (!err)
-			atomic_inc(&active_events);
-		mutex_unlock(&pmu_reserve_mutex);
+			atomic_inc(active_events);
+		mutex_unlock(&armpmu->reserve_mutex);
 	}
 
 	if (err)
@@ -582,22 +558,9 @@ static int armpmu_event_init(struct perf_event *event)
 
 static void armpmu_enable(struct pmu *pmu)
 {
-	/* Enable all of the perf events on hardware. */
-	int idx, enabled = 0;
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-
-	if (!armpmu)
-		return;
-
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
-		struct perf_event *event = cpuc->events[idx];
-
-		if (!event)
-			continue;
-
-		armpmu->enable(&event->hw, idx);
-		enabled = 1;
-	}
+	struct arm_pmu *armpmu = to_arm_pmu(pmu);
+	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
+	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
 
 	if (enabled)
 		armpmu->start();
@@ -605,20 +568,32 @@ static void armpmu_enable(struct pmu *pmu)
 
 static void armpmu_disable(struct pmu *pmu)
 {
-	if (armpmu)
-		armpmu->stop();
+	struct arm_pmu *armpmu = to_arm_pmu(pmu);
+	armpmu->stop();
 }
 
-static struct pmu pmu = {
-	.pmu_enable	= armpmu_enable,
-	.pmu_disable	= armpmu_disable,
-	.event_init	= armpmu_event_init,
-	.add		= armpmu_add,
-	.del		= armpmu_del,
-	.start		= armpmu_start,
-	.stop		= armpmu_stop,
-	.read		= armpmu_read,
-};
+static void __init armpmu_init(struct arm_pmu *armpmu)
+{
+	atomic_set(&armpmu->active_events, 0);
+	mutex_init(&armpmu->reserve_mutex);
+
+	armpmu->pmu = (struct pmu) {
+		.pmu_enable	= armpmu_enable,
+		.pmu_disable	= armpmu_disable,
+		.event_init	= armpmu_event_init,
+		.add		= armpmu_add,
+		.del		= armpmu_del,
+		.start		= armpmu_start,
+		.stop		= armpmu_stop,
+		.read		= armpmu_read,
+	};
+}
+
+int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type)
+{
+	armpmu_init(armpmu);
+	return perf_pmu_register(&armpmu->pmu, name, type);
+}
 
 /* Include the PMU-specific implementations. */
 #include "perf_event_xscale.c"
@@ -630,14 +605,72 @@ static struct pmu pmu = {
  * This requires SMP to be available, so exists as a separate initcall.
  */
 static int __init
-armpmu_reset(void)
+cpu_pmu_reset(void)
+{
+	if (cpu_pmu && cpu_pmu->reset)
+		return on_each_cpu(cpu_pmu->reset, NULL, 1);
+	return 0;
+}
+arch_initcall(cpu_pmu_reset);
+
+/*
+ * PMU platform driver and devicetree bindings.
+ */
+static struct of_device_id armpmu_of_device_ids[] = {
+	{.compatible = "arm,cortex-a9-pmu"},
+	{.compatible = "arm,cortex-a8-pmu"},
+	{.compatible = "arm,arm1136-pmu"},
+	{.compatible = "arm,arm1176-pmu"},
+	{},
+};
+
+static struct platform_device_id armpmu_plat_device_ids[] = {
+	{.name = "arm-pmu"},
+	{},
+};
+
+static int __devinit armpmu_device_probe(struct platform_device *pdev)
 {
-	if (armpmu && armpmu->reset)
-		return on_each_cpu(armpmu->reset, NULL, 1);
+	cpu_pmu->plat_device = pdev;
 	return 0;
 }
-arch_initcall(armpmu_reset);
 
+static struct platform_driver armpmu_driver = {
+	.driver		= {
+		.name	= "arm-pmu",
+		.of_match_table = armpmu_of_device_ids,
+	},
+	.probe		= armpmu_device_probe,
+	.id_table	= armpmu_plat_device_ids,
+};
+
+static int __init register_pmu_driver(void)
+{
+	return platform_driver_register(&armpmu_driver);
+}
+device_initcall(register_pmu_driver);
+
+static struct pmu_hw_events *armpmu_get_cpu_events(void)
+{
+	return &__get_cpu_var(cpu_hw_events);
+}
+
+static void __init cpu_pmu_init(struct arm_pmu *armpmu)
+{
+	int cpu;
+	for_each_possible_cpu(cpu) {
+		struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
+		events->events = per_cpu(hw_events, cpu);
+		events->used_mask = per_cpu(used_mask, cpu);
+		raw_spin_lock_init(&events->pmu_lock);
+	}
+	armpmu->get_hw_events = armpmu_get_cpu_events;
+	armpmu->type = ARM_PMU_DEVICE_CPU;
+}
+
+/*
+ * CPU PMU identification and registration.
+ */
 static int __init
 init_hw_perf_events(void)
 {
@@ -651,22 +684,22 @@ init_hw_perf_events(void)
 		case 0xB360:	/* ARM1136 */
 		case 0xB560:	/* ARM1156 */
 		case 0xB760:	/* ARM1176 */
-			armpmu = armv6pmu_init();
+			cpu_pmu = armv6pmu_init();
 			break;
 		case 0xB020:	/* ARM11mpcore */
-			armpmu = armv6mpcore_pmu_init();
+			cpu_pmu = armv6mpcore_pmu_init();
 			break;
 		case 0xC080:	/* Cortex-A8 */
-			armpmu = armv7_a8_pmu_init();
+			cpu_pmu = armv7_a8_pmu_init();
 			break;
 		case 0xC090:	/* Cortex-A9 */
-			armpmu = armv7_a9_pmu_init();
+			cpu_pmu = armv7_a9_pmu_init();
 			break;
 		case 0xC050:	/* Cortex-A5 */
-			armpmu = armv7_a5_pmu_init();
+			cpu_pmu = armv7_a5_pmu_init();
 			break;
 		case 0xC0F0:	/* Cortex-A15 */
-			armpmu = armv7_a15_pmu_init();
+			cpu_pmu = armv7_a15_pmu_init();
 			break;
 		}
 	/* Intel CPUs [xscale]. */
@@ -674,23 +707,23 @@ init_hw_perf_events(void)
 		part_number = (cpuid >> 13) & 0x7;
 		switch (part_number) {
 		case 1:
-			armpmu = xscale1pmu_init();
+			cpu_pmu = xscale1pmu_init();
 			break;
 		case 2:
-			armpmu = xscale2pmu_init();
+			cpu_pmu = xscale2pmu_init();
 			break;
 		}
 	}
 
-	if (armpmu) {
+	if (cpu_pmu) {
 		pr_info("enabled with %s PMU driver, %d counters available\n",
-			armpmu->name, armpmu->num_events);
+			cpu_pmu->name, cpu_pmu->num_events);
+		cpu_pmu_init(cpu_pmu);
+		armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW);
 	} else {
 		pr_info("no hardware support available\n");
 	}
 
-	perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
-
 	return 0;
 }
 early_initcall(init_hw_perf_events);
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index dd7f3b9f4cb3..e63d8115c01b 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -54,7 +54,7 @@ enum armv6_perf_types {
 };
 
 enum armv6_counters {
-	ARMV6_CYCLE_COUNTER = 1,
+	ARMV6_CYCLE_COUNTER = 0,
 	ARMV6_COUNTER0,
 	ARMV6_COUNTER1,
 };
@@ -433,6 +433,7 @@ armv6pmu_enable_event(struct hw_perf_event *hwc,
 		      int idx)
 {
 	unsigned long val, mask, evt, flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
 		mask	= 0;
@@ -454,12 +455,29 @@ armv6pmu_enable_event(struct hw_perf_event *hwc,
 	 * Mask out the current event and set the counter to count the event
 	 * that we're interested in.
 	 */
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
 	val &= ~mask;
 	val |= evt;
 	armv6_pmcr_write(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static int counter_is_active(unsigned long pmcr, int idx)
+{
+	unsigned long mask = 0;
+	if (idx == ARMV6_CYCLE_COUNTER)
+		mask = ARMV6_PMCR_CCOUNT_IEN;
+	else if (idx == ARMV6_COUNTER0)
+		mask = ARMV6_PMCR_COUNT0_IEN;
+	else if (idx == ARMV6_COUNTER1)
+		mask = ARMV6_PMCR_COUNT1_IEN;
+
+	if (mask)
+		return pmcr & mask;
+
+	WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+	return 0;
 }
 
 static irqreturn_t
@@ -468,7 +486,7 @@ armv6pmu_handle_irq(int irq_num,
 {
 	unsigned long pmcr = armv6_pmcr_read();
 	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
+	struct pmu_hw_events *cpuc;
 	struct pt_regs *regs;
 	int idx;
 
@@ -487,11 +505,11 @@ armv6pmu_handle_irq(int irq_num,
 	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
 
-		if (!test_bit(idx, cpuc->active_mask))
+		if (!counter_is_active(pmcr, idx))
 			continue;
 
 		/*
@@ -508,7 +526,7 @@ armv6pmu_handle_irq(int irq_num,
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			armpmu->disable(hwc, idx);
+			cpu_pmu->disable(hwc, idx);
 	}
 
 	/*
@@ -527,28 +545,30 @@ static void
 armv6pmu_start(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
 	val |= ARMV6_PMCR_ENABLE;
 	armv6_pmcr_write(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 armv6pmu_stop(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
 	val &= ~ARMV6_PMCR_ENABLE;
 	armv6_pmcr_write(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static int
-armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
+armv6pmu_get_event_idx(struct pmu_hw_events *cpuc,
 		       struct hw_perf_event *event)
 {
 	/* Always place a cycle counter into the cycle counter. */
@@ -578,6 +598,7 @@ armv6pmu_disable_event(struct hw_perf_event *hwc,
 		       int idx)
 {
 	unsigned long val, mask, evt, flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
 		mask	= ARMV6_PMCR_CCOUNT_IEN;
@@ -598,12 +619,12 @@ armv6pmu_disable_event(struct hw_perf_event *hwc,
 	 * of ETM bus signal assertion cycles. The external reporting should
 	 * be disabled and so this should never increment.
 	 */
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
 	val &= ~mask;
 	val |= evt;
 	armv6_pmcr_write(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
@@ -611,6 +632,7 @@ armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
 			      int idx)
 {
 	unsigned long val, mask, flags, evt = 0;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
 		mask	= ARMV6_PMCR_CCOUNT_IEN;
@@ -627,15 +649,21 @@ armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
 	 * Unlike UP ARMv6, we don't have a way of stopping the counters. We
 	 * simply disable the interrupt reporting.
 	 */
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
 	val &= ~mask;
 	val |= evt;
 	armv6_pmcr_write(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static int armv6_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv6_perf_map,
+				&armv6_perf_cache_map, 0xFF);
 }
 
-static const struct arm_pmu armv6pmu = {
+static struct arm_pmu armv6pmu = {
 	.id			= ARM_PERF_PMU_ID_V6,
 	.name			= "v6",
 	.handle_irq		= armv6pmu_handle_irq,
@@ -646,14 +674,12 @@ static const struct arm_pmu armv6pmu = {
 	.get_event_idx		= armv6pmu_get_event_idx,
 	.start			= armv6pmu_start,
 	.stop			= armv6pmu_stop,
-	.cache_map		= &armv6_perf_cache_map,
-	.event_map		= &armv6_perf_map,
-	.raw_event_mask		= 0xFF,
+	.map_event		= armv6_map_event,
 	.num_events		= 3,
 	.max_period		= (1LLU << 32) - 1,
 };
 
-static const struct arm_pmu *__init armv6pmu_init(void)
+static struct arm_pmu *__init armv6pmu_init(void)
 {
 	return &armv6pmu;
 }
@@ -665,7 +691,14 @@ static const struct arm_pmu *__init armv6pmu_init(void)
  * disable the interrupt reporting and update the event. When unthrottling we
  * reset the period and enable the interrupt reporting.
  */
-static const struct arm_pmu armv6mpcore_pmu = {
+
+static int armv6mpcore_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv6mpcore_perf_map,
+				&armv6mpcore_perf_cache_map, 0xFF);
+}
+
+static struct arm_pmu armv6mpcore_pmu = {
 	.id			= ARM_PERF_PMU_ID_V6MP,
 	.name			= "v6mpcore",
 	.handle_irq		= armv6pmu_handle_irq,
@@ -676,24 +709,22 @@ static const struct arm_pmu armv6mpcore_pmu = {
 	.get_event_idx		= armv6pmu_get_event_idx,
 	.start			= armv6pmu_start,
 	.stop			= armv6pmu_stop,
-	.cache_map		= &armv6mpcore_perf_cache_map,
-	.event_map		= &armv6mpcore_perf_map,
-	.raw_event_mask		= 0xFF,
+	.map_event		= armv6mpcore_map_event,
 	.num_events		= 3,
 	.max_period		= (1LLU << 32) - 1,
 };
 
-static const struct arm_pmu *__init armv6mpcore_pmu_init(void)
+static struct arm_pmu *__init armv6mpcore_pmu_init(void)
 {
 	return &armv6mpcore_pmu;
 }
 #else
-static const struct arm_pmu *__init armv6pmu_init(void)
+static struct arm_pmu *__init armv6pmu_init(void)
 {
 	return NULL;
 }
 
-static const struct arm_pmu *__init armv6mpcore_pmu_init(void)
+static struct arm_pmu *__init armv6mpcore_pmu_init(void)
 {
 	return NULL;
 }
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 4c851834f68e..98b75738345e 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -17,6 +17,9 @@
  */
 
 #ifdef CONFIG_CPU_V7
+
+static struct arm_pmu armv7pmu;
+
 /*
  * Common ARMv7 event types
  *
@@ -676,23 +679,24 @@ static const unsigned armv7_a15_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 };
 
 /*
- * Perf Events counters
+ * Perf Events' indices
  */
-enum armv7_counters {
-	ARMV7_CYCLE_COUNTER		= 1,	/* Cycle counter */
-	ARMV7_COUNTER0			= 2,	/* First event counter */
-};
+#define	ARMV7_IDX_CYCLE_COUNTER	0
+#define	ARMV7_IDX_COUNTER0	1
+#define	ARMV7_IDX_COUNTER_LAST	(ARMV7_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
+
+#define	ARMV7_MAX_COUNTERS	32
+#define	ARMV7_COUNTER_MASK	(ARMV7_MAX_COUNTERS - 1)
 
 /*
- * The cycle counter is ARMV7_CYCLE_COUNTER.
- * The first event counter is ARMV7_COUNTER0.
- * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
+ * ARMv7 low level PMNC access
  */
-#define	ARMV7_COUNTER_LAST	(ARMV7_COUNTER0 + armpmu->num_events - 1)
 
 /*
- * ARMv7 low level PMNC access
+ * Perf Event to low level counters mapping
  */
+#define	ARMV7_IDX_TO_COUNTER(x)	\
+	(((x) - ARMV7_IDX_COUNTER0) & ARMV7_COUNTER_MASK)
 
 /*
  * Per-CPU PMNC: config reg
@@ -708,103 +712,76 @@ enum armv7_counters {
 #define	ARMV7_PMNC_MASK		0x3f	 /* Mask for writable bits */
 
 /*
- * Available counters
- */
-#define ARMV7_CNT0		0	/* First event counter */
-#define ARMV7_CCNT		31	/* Cycle counter */
-
-/* Perf Event to low level counters mapping */
-#define ARMV7_EVENT_CNT_TO_CNTx	(ARMV7_COUNTER0 - ARMV7_CNT0)
-
-/*
- * CNTENS: counters enable reg
- */
-#define ARMV7_CNTENS_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_CNTENS_C		(1 << ARMV7_CCNT)
-
-/*
- * CNTENC: counters disable reg
- */
-#define ARMV7_CNTENC_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_CNTENC_C		(1 << ARMV7_CCNT)
-
-/*
- * INTENS: counters overflow interrupt enable reg
- */
-#define ARMV7_INTENS_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_INTENS_C		(1 << ARMV7_CCNT)
-
-/*
- * INTENC: counters overflow interrupt disable reg
- */
-#define ARMV7_INTENC_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_INTENC_C		(1 << ARMV7_CCNT)
-
-/*
- * EVTSEL: Event selection reg
+ * FLAG: counters overflow flag status reg
  */
-#define	ARMV7_EVTSEL_MASK	0xff		/* Mask for writable bits */
+#define	ARMV7_FLAG_MASK		0xffffffff	/* Mask for writable bits */
+#define	ARMV7_OVERFLOWED_MASK	ARMV7_FLAG_MASK
 
 /*
- * SELECT: Counter selection reg
+ * PMXEVTYPER: Event selection reg
  */
-#define	ARMV7_SELECT_MASK	0x1f		/* Mask for writable bits */
+#define	ARMV7_EVTYPE_MASK	0xc00000ff	/* Mask for writable bits */
+#define	ARMV7_EVTYPE_EVENT	0xff		/* Mask for EVENT bits */
 
 /*
- * FLAG: counters overflow flag status reg
+ * Event filters for PMUv2
  */
-#define ARMV7_FLAG_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_FLAG_C		(1 << ARMV7_CCNT)
-#define	ARMV7_FLAG_MASK		0xffffffff	/* Mask for writable bits */
-#define	ARMV7_OVERFLOWED_MASK	ARMV7_FLAG_MASK
+#define	ARMV7_EXCLUDE_PL1	(1 << 31)
+#define	ARMV7_EXCLUDE_USER	(1 << 30)
+#define	ARMV7_INCLUDE_HYP	(1 << 27)
 
-static inline unsigned long armv7_pmnc_read(void)
+static inline u32 armv7_pmnc_read(void)
 {
 	u32 val;
 	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
 	return val;
 }
 
-static inline void armv7_pmnc_write(unsigned long val)
+static inline void armv7_pmnc_write(u32 val)
 {
 	val &= ARMV7_PMNC_MASK;
 	isb();
 	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
 }
 
-static inline int armv7_pmnc_has_overflowed(unsigned long pmnc)
+static inline int armv7_pmnc_has_overflowed(u32 pmnc)
 {
 	return pmnc & ARMV7_OVERFLOWED_MASK;
 }
 
-static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc,
-					enum armv7_counters counter)
+static inline int armv7_pmnc_counter_valid(int idx)
+{
+	return idx >= ARMV7_IDX_CYCLE_COUNTER && idx <= ARMV7_IDX_COUNTER_LAST;
+}
+
+static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx)
 {
 	int ret = 0;
+	u32 counter;
 
-	if (counter == ARMV7_CYCLE_COUNTER)
-		ret = pmnc & ARMV7_FLAG_C;
-	else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST))
-		ret = pmnc & ARMV7_FLAG_P(counter);
-	else
+	if (!armv7_pmnc_counter_valid(idx)) {
 		pr_err("CPU%u checking wrong counter %d overflow status\n",
-			smp_processor_id(), counter);
+			smp_processor_id(), idx);
+	} else {
+		counter = ARMV7_IDX_TO_COUNTER(idx);
+		ret = pmnc & BIT(counter);
+	}
 
 	return ret;
 }
 
-static inline int armv7_pmnc_select_counter(unsigned int idx)
+static inline int armv7_pmnc_select_counter(int idx)
 {
-	u32 val;
+	u32 counter;
 
-	if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) {
-		pr_err("CPU%u selecting wrong PMNC counter"
-			" %d\n", smp_processor_id(), idx);
-		return -1;
+	if (!armv7_pmnc_counter_valid(idx)) {
+		pr_err("CPU%u selecting wrong PMNC counter %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
 	}
 
-	val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK;
-	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
+	counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (counter));
 	isb();
 
 	return idx;
@@ -812,124 +789,95 @@ static inline int armv7_pmnc_select_counter(unsigned int idx)
 
 static inline u32 armv7pmu_read_counter(int idx)
 {
-	unsigned long value = 0;
+	u32 value = 0;
 
-	if (idx == ARMV7_CYCLE_COUNTER)
-		asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
-	else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
-		if (armv7_pmnc_select_counter(idx) == idx)
-			asm volatile("mrc p15, 0, %0, c9, c13, 2"
-				     : "=r" (value));
-	} else
+	if (!armv7_pmnc_counter_valid(idx))
 		pr_err("CPU%u reading wrong counter %d\n",
 			smp_processor_id(), idx);
+	else if (idx == ARMV7_IDX_CYCLE_COUNTER)
+		asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
+	else if (armv7_pmnc_select_counter(idx) == idx)
+		asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (value));
 
 	return value;
 }
 
 static inline void armv7pmu_write_counter(int idx, u32 value)
 {
-	if (idx == ARMV7_CYCLE_COUNTER)
-		asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
-	else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
-		if (armv7_pmnc_select_counter(idx) == idx)
-			asm volatile("mcr p15, 0, %0, c9, c13, 2"
-				     : : "r" (value));
-	} else
+	if (!armv7_pmnc_counter_valid(idx))
 		pr_err("CPU%u writing wrong counter %d\n",
 			smp_processor_id(), idx);
+	else if (idx == ARMV7_IDX_CYCLE_COUNTER)
+		asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
+	else if (armv7_pmnc_select_counter(idx) == idx)
+		asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value));
 }
 
-static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val)
+static inline void armv7_pmnc_write_evtsel(int idx, u32 val)
 {
 	if (armv7_pmnc_select_counter(idx) == idx) {
-		val &= ARMV7_EVTSEL_MASK;
+		val &= ARMV7_EVTYPE_MASK;
 		asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
 	}
 }
 
-static inline u32 armv7_pmnc_enable_counter(unsigned int idx)
+static inline int armv7_pmnc_enable_counter(int idx)
 {
-	u32 val;
+	u32 counter;
 
-	if ((idx != ARMV7_CYCLE_COUNTER) &&
-	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
-		pr_err("CPU%u enabling wrong PMNC counter"
-			" %d\n", smp_processor_id(), idx);
-		return -1;
+	if (!armv7_pmnc_counter_valid(idx)) {
+		pr_err("CPU%u enabling wrong PMNC counter %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
 	}
 
-	if (idx == ARMV7_CYCLE_COUNTER)
-		val = ARMV7_CNTENS_C;
-	else
-		val = ARMV7_CNTENS_P(idx);
-
-	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
-
+	counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(counter)));
 	return idx;
 }
 
-static inline u32 armv7_pmnc_disable_counter(unsigned int idx)
+static inline int armv7_pmnc_disable_counter(int idx)
 {
-	u32 val;
-
+	u32 counter;
 
-	if ((idx != ARMV7_CYCLE_COUNTER) &&
-	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
-		pr_err("CPU%u disabling wrong PMNC counter"
-			" %d\n", smp_processor_id(), idx);
-		return -1;
+	if (!armv7_pmnc_counter_valid(idx)) {
+		pr_err("CPU%u disabling wrong PMNC counter %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
 	}
 
-	if (idx == ARMV7_CYCLE_COUNTER)
-		val = ARMV7_CNTENC_C;
-	else
-		val = ARMV7_CNTENC_P(idx);
-
-	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
-
+	counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(counter)));
 	return idx;
 }
 
-static inline u32 armv7_pmnc_enable_intens(unsigned int idx)
+static inline int armv7_pmnc_enable_intens(int idx)
 {
-	u32 val;
+	u32 counter;
 
-	if ((idx != ARMV7_CYCLE_COUNTER) &&
-	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
-		pr_err("CPU%u enabling wrong PMNC counter"
-			" interrupt enable %d\n", smp_processor_id(), idx);
-		return -1;
+	if (!armv7_pmnc_counter_valid(idx)) {
+		pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
 	}
 
-	if (idx == ARMV7_CYCLE_COUNTER)
-		val = ARMV7_INTENS_C;
-	else
-		val = ARMV7_INTENS_P(idx);
-
-	asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
-
+	counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(counter)));
 	return idx;
 }
 
-static inline u32 armv7_pmnc_disable_intens(unsigned int idx)
+static inline int armv7_pmnc_disable_intens(int idx)
 {
-	u32 val;
+	u32 counter;
 
-	if ((idx != ARMV7_CYCLE_COUNTER) &&
-	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
-		pr_err("CPU%u disabling wrong PMNC counter"
-			" interrupt enable %d\n", smp_processor_id(), idx);
-		return -1;
+	if (!armv7_pmnc_counter_valid(idx)) {
+		pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
 	}
 
-	if (idx == ARMV7_CYCLE_COUNTER)
-		val = ARMV7_INTENC_C;
-	else
-		val = ARMV7_INTENC_P(idx);
-
-	asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
-
+	counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter)));
 	return idx;
 }
 
@@ -973,14 +921,14 @@ static void armv7_pmnc_dump_regs(void)
 	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
 	printk(KERN_INFO "CCNT  =0x%08x\n", val);
 
-	for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) {
+	for (cnt = ARMV7_IDX_COUNTER0; cnt <= ARMV7_IDX_COUNTER_LAST; cnt++) {
 		armv7_pmnc_select_counter(cnt);
 		asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
 		printk(KERN_INFO "CNT[%d] count =0x%08x\n",
-			cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
+			ARMV7_IDX_TO_COUNTER(cnt), val);
 		asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
 		printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
-			cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
+			ARMV7_IDX_TO_COUNTER(cnt), val);
 	}
 }
 #endif
@@ -988,12 +936,13 @@ static void armv7_pmnc_dump_regs(void)
 static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	/*
 	 * Enable counter and interrupt, and set the counter to count
 	 * the event that we're interested in.
 	 */
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 
 	/*
 	 * Disable counter
@@ -1002,9 +951,10 @@ static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
 
 	/*
 	 * Set event (if destined for PMNx counters)
-	 * We don't need to set the event if it's a cycle count
+	 * We only need to set the event for the cycle counter if we
+	 * have the ability to perform event filtering.
 	 */
-	if (idx != ARMV7_CYCLE_COUNTER)
+	if (armv7pmu.set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER)
 		armv7_pmnc_write_evtsel(idx, hwc->config_base);
 
 	/*
@@ -1017,17 +967,18 @@ static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
 	 */
 	armv7_pmnc_enable_counter(idx);
 
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	/*
 	 * Disable counter and interrupt
 	 */
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 
 	/*
 	 * Disable counter
@@ -1039,14 +990,14 @@ static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
 	 */
 	armv7_pmnc_disable_intens(idx);
 
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 {
-	unsigned long pmnc;
+	u32 pmnc;
 	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
+	struct pmu_hw_events *cpuc;
 	struct pt_regs *regs;
 	int idx;
 
@@ -1069,13 +1020,10 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
 
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
 		/*
 		 * We have a single interrupt for all counters. Check that
 		 * each counter has overflowed before we process it.
@@ -1090,7 +1038,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			armpmu->disable(hwc, idx);
+			cpu_pmu->disable(hwc, idx);
 	}
 
 	/*
@@ -1108,61 +1056,114 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 static void armv7pmu_start(void)
 {
 	unsigned long flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	/* Enable all counters */
 	armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void armv7pmu_stop(void)
 {
 	unsigned long flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	/* Disable all counters */
 	armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
+static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc,
 				  struct hw_perf_event *event)
 {
 	int idx;
+	unsigned long evtype = event->config_base & ARMV7_EVTYPE_EVENT;
 
 	/* Always place a cycle counter into the cycle counter. */
-	if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) {
-		if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask))
+	if (evtype == ARMV7_PERFCTR_CPU_CYCLES) {
+		if (test_and_set_bit(ARMV7_IDX_CYCLE_COUNTER, cpuc->used_mask))
 			return -EAGAIN;
 
-		return ARMV7_CYCLE_COUNTER;
-	} else {
-		/*
-		 * For anything other than a cycle counter, try and use
-		 * the events counters
-		 */
-		for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) {
-			if (!test_and_set_bit(idx, cpuc->used_mask))
-				return idx;
-		}
+		return ARMV7_IDX_CYCLE_COUNTER;
+	}
 
-		/* The counters are all in use. */
-		return -EAGAIN;
+	/*
+	 * For anything other than a cycle counter, try and use
+	 * the events counters
+	 */
+	for (idx = ARMV7_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) {
+		if (!test_and_set_bit(idx, cpuc->used_mask))
+			return idx;
 	}
+
+	/* The counters are all in use. */
+	return -EAGAIN;
+}
+
+/*
+ * Add an event filter to a given event. This will only work for PMUv2 PMUs.
+ */
+static int armv7pmu_set_event_filter(struct hw_perf_event *event,
+				     struct perf_event_attr *attr)
+{
+	unsigned long config_base = 0;
+
+	if (attr->exclude_idle)
+		return -EPERM;
+	if (attr->exclude_user)
+		config_base |= ARMV7_EXCLUDE_USER;
+	if (attr->exclude_kernel)
+		config_base |= ARMV7_EXCLUDE_PL1;
+	if (!attr->exclude_hv)
+		config_base |= ARMV7_INCLUDE_HYP;
+
+	/*
+	 * Install the filter into config_base as this is used to
+	 * construct the event type.
+	 */
+	event->config_base = config_base;
+
+	return 0;
 }
 
 static void armv7pmu_reset(void *info)
 {
-	u32 idx, nb_cnt = armpmu->num_events;
+	u32 idx, nb_cnt = cpu_pmu->num_events;
 
 	/* The counter and interrupt enable registers are unknown at reset. */
-	for (idx = 1; idx < nb_cnt; ++idx)
+	for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx)
 		armv7pmu_disable_event(NULL, idx);
 
 	/* Initialize & Reset PMNC: C and P bits */
 	armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
 }
 
+static int armv7_a8_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv7_a8_perf_map,
+				&armv7_a8_perf_cache_map, 0xFF);
+}
+
+static int armv7_a9_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv7_a9_perf_map,
+				&armv7_a9_perf_cache_map, 0xFF);
+}
+
+static int armv7_a5_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv7_a5_perf_map,
+				&armv7_a5_perf_cache_map, 0xFF);
+}
+
+static int armv7_a15_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv7_a15_perf_map,
+				&armv7_a15_perf_cache_map, 0xFF);
+}
+
 static struct arm_pmu armv7pmu = {
 	.handle_irq		= armv7pmu_handle_irq,
 	.enable			= armv7pmu_enable_event,
@@ -1173,7 +1174,6 @@ static struct arm_pmu armv7pmu = {
 	.start			= armv7pmu_start,
 	.stop			= armv7pmu_stop,
 	.reset			= armv7pmu_reset,
-	.raw_event_mask		= 0xFF,
 	.max_period		= (1LLU << 32) - 1,
 };
 
@@ -1188,62 +1188,59 @@ static u32 __init armv7_read_num_pmnc_events(void)
 	return nb_cnt + 1;
 }
 
-static const struct arm_pmu *__init armv7_a8_pmu_init(void)
+static struct arm_pmu *__init armv7_a8_pmu_init(void)
 {
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA8;
 	armv7pmu.name		= "ARMv7 Cortex-A8";
-	armv7pmu.cache_map	= &armv7_a8_perf_cache_map;
-	armv7pmu.event_map	= &armv7_a8_perf_map;
+	armv7pmu.map_event	= armv7_a8_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
 	return &armv7pmu;
 }
 
-static const struct arm_pmu *__init armv7_a9_pmu_init(void)
+static struct arm_pmu *__init armv7_a9_pmu_init(void)
 {
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA9;
 	armv7pmu.name		= "ARMv7 Cortex-A9";
-	armv7pmu.cache_map	= &armv7_a9_perf_cache_map;
-	armv7pmu.event_map	= &armv7_a9_perf_map;
+	armv7pmu.map_event	= armv7_a9_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
 	return &armv7pmu;
 }
 
-static const struct arm_pmu *__init armv7_a5_pmu_init(void)
+static struct arm_pmu *__init armv7_a5_pmu_init(void)
 {
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA5;
 	armv7pmu.name		= "ARMv7 Cortex-A5";
-	armv7pmu.cache_map	= &armv7_a5_perf_cache_map;
-	armv7pmu.event_map	= &armv7_a5_perf_map;
+	armv7pmu.map_event	= armv7_a5_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
 	return &armv7pmu;
 }
 
-static const struct arm_pmu *__init armv7_a15_pmu_init(void)
+static struct arm_pmu *__init armv7_a15_pmu_init(void)
 {
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA15;
 	armv7pmu.name		= "ARMv7 Cortex-A15";
-	armv7pmu.cache_map	= &armv7_a15_perf_cache_map;
-	armv7pmu.event_map	= &armv7_a15_perf_map;
+	armv7pmu.map_event	= armv7_a15_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
+	armv7pmu.set_event_filter = armv7pmu_set_event_filter;
 	return &armv7pmu;
 }
 #else
-static const struct arm_pmu *__init armv7_a8_pmu_init(void)
+static struct arm_pmu *__init armv7_a8_pmu_init(void)
 {
 	return NULL;
 }
 
-static const struct arm_pmu *__init armv7_a9_pmu_init(void)
+static struct arm_pmu *__init armv7_a9_pmu_init(void)
 {
 	return NULL;
 }
 
-static const struct arm_pmu *__init armv7_a5_pmu_init(void)
+static struct arm_pmu *__init armv7_a5_pmu_init(void)
 {
 	return NULL;
 }
 
-static const struct arm_pmu *__init armv7_a15_pmu_init(void)
+static struct arm_pmu *__init armv7_a15_pmu_init(void)
 {
 	return NULL;
 }
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index 3c4397491d08..e0cca10a8411 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -40,7 +40,7 @@ enum xscale_perf_types {
 };
 
 enum xscale_counters {
-	XSCALE_CYCLE_COUNTER	= 1,
+	XSCALE_CYCLE_COUNTER	= 0,
 	XSCALE_COUNTER0,
 	XSCALE_COUNTER1,
 	XSCALE_COUNTER2,
@@ -222,7 +222,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 {
 	unsigned long pmnc;
 	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
+	struct pmu_hw_events *cpuc;
 	struct pt_regs *regs;
 	int idx;
 
@@ -249,13 +249,10 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
 
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
 		if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
 			continue;
 
@@ -266,7 +263,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			armpmu->disable(hwc, idx);
+			cpu_pmu->disable(hwc, idx);
 	}
 
 	irq_work_run();
@@ -284,6 +281,7 @@ static void
 xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long val, mask, evt, flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	switch (idx) {
 	case XSCALE_CYCLE_COUNTER:
@@ -305,18 +303,19 @@ xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
 		return;
 	}
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale1pmu_read_pmnc();
 	val &= ~mask;
 	val |= evt;
 	xscale1pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long val, mask, evt, flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	switch (idx) {
 	case XSCALE_CYCLE_COUNTER:
@@ -336,16 +335,16 @@ xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
 		return;
 	}
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale1pmu_read_pmnc();
 	val &= ~mask;
 	val |= evt;
 	xscale1pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static int
-xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
+xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc,
 			struct hw_perf_event *event)
 {
 	if (XSCALE_PERFCTR_CCNT == event->config_base) {
@@ -368,24 +367,26 @@ static void
 xscale1pmu_start(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale1pmu_read_pmnc();
 	val |= XSCALE_PMU_ENABLE;
 	xscale1pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 xscale1pmu_stop(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale1pmu_read_pmnc();
 	val &= ~XSCALE_PMU_ENABLE;
 	xscale1pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static inline u32
@@ -424,7 +425,13 @@ xscale1pmu_write_counter(int counter, u32 val)
 	}
 }
 
-static const struct arm_pmu xscale1pmu = {
+static int xscale_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &xscale_perf_map,
+				&xscale_perf_cache_map, 0xFF);
+}
+
+static struct arm_pmu xscale1pmu = {
 	.id		= ARM_PERF_PMU_ID_XSCALE1,
 	.name		= "xscale1",
 	.handle_irq	= xscale1pmu_handle_irq,
@@ -435,14 +442,12 @@ static const struct arm_pmu xscale1pmu = {
 	.get_event_idx	= xscale1pmu_get_event_idx,
 	.start		= xscale1pmu_start,
 	.stop		= xscale1pmu_stop,
-	.cache_map	= &xscale_perf_cache_map,
-	.event_map	= &xscale_perf_map,
-	.raw_event_mask	= 0xFF,
+	.map_event	= xscale_map_event,
 	.num_events	= 3,
 	.max_period	= (1LLU << 32) - 1,
 };
 
-static const struct arm_pmu *__init xscale1pmu_init(void)
+static struct arm_pmu *__init xscale1pmu_init(void)
 {
 	return &xscale1pmu;
 }
@@ -560,7 +565,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 {
 	unsigned long pmnc, of_flags;
 	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
+	struct pmu_hw_events *cpuc;
 	struct pt_regs *regs;
 	int idx;
 
@@ -581,13 +586,10 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
 
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
 		if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
 			continue;
 
@@ -598,7 +600,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			armpmu->disable(hwc, idx);
+			cpu_pmu->disable(hwc, idx);
 	}
 
 	irq_work_run();
@@ -616,6 +618,7 @@ static void
 xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long flags, ien, evtsel;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	ien = xscale2pmu_read_int_enable();
 	evtsel = xscale2pmu_read_event_select();
@@ -649,16 +652,17 @@ xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
 		return;
 	}
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	xscale2pmu_write_event_select(evtsel);
 	xscale2pmu_write_int_enable(ien);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long flags, ien, evtsel;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	ien = xscale2pmu_read_int_enable();
 	evtsel = xscale2pmu_read_event_select();
@@ -692,14 +696,14 @@ xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
 		return;
 	}
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	xscale2pmu_write_event_select(evtsel);
 	xscale2pmu_write_int_enable(ien);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static int
-xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
+xscale2pmu_get_event_idx(struct pmu_hw_events *cpuc,
 			struct hw_perf_event *event)
 {
 	int idx = xscale1pmu_get_event_idx(cpuc, event);
@@ -718,24 +722,26 @@ static void
 xscale2pmu_start(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
 	val |= XSCALE_PMU_ENABLE;
 	xscale2pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 xscale2pmu_stop(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale2pmu_read_pmnc();
 	val &= ~XSCALE_PMU_ENABLE;
 	xscale2pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static inline u32
@@ -786,7 +792,7 @@ xscale2pmu_write_counter(int counter, u32 val)
 	}
 }
 
-static const struct arm_pmu xscale2pmu = {
+static struct arm_pmu xscale2pmu = {
 	.id		= ARM_PERF_PMU_ID_XSCALE2,
 	.name		= "xscale2",
 	.handle_irq	= xscale2pmu_handle_irq,
@@ -797,24 +803,22 @@ static const struct arm_pmu xscale2pmu = {
 	.get_event_idx	= xscale2pmu_get_event_idx,
 	.start		= xscale2pmu_start,
 	.stop		= xscale2pmu_stop,
-	.cache_map	= &xscale_perf_cache_map,
-	.event_map	= &xscale_perf_map,
-	.raw_event_mask	= 0xFF,
+	.map_event	= xscale_map_event,
 	.num_events	= 5,
 	.max_period	= (1LLU << 32) - 1,
 };
 
-static const struct arm_pmu *__init xscale2pmu_init(void)
+static struct arm_pmu *__init xscale2pmu_init(void)
 {
 	return &xscale2pmu;
 }
 #else
-static const struct arm_pmu *__init xscale1pmu_init(void)
+static struct arm_pmu *__init xscale1pmu_init(void)
 {
 	return NULL;
 }
 
-static const struct arm_pmu *__init xscale2pmu_init(void)
+static struct arm_pmu *__init xscale2pmu_init(void)
 {
 	return NULL;
 }
diff --git a/arch/arm/kernel/pmu.c b/arch/arm/kernel/pmu.c
index 2b70709376c3..2c3407ee8576 100644
--- a/arch/arm/kernel/pmu.c
+++ b/arch/arm/kernel/pmu.c
@@ -10,192 +10,26 @@
  *
  */
 
-#define pr_fmt(fmt) "PMU: " fmt
-
-#include <linux/cpumask.h>
 #include <linux/err.h>
-#include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
-#include <linux/platform_device.h>
 
 #include <asm/pmu.h>
 
-static volatile long pmu_lock;
-
-static struct platform_device *pmu_devices[ARM_NUM_PMU_DEVICES];
-
-static int __devinit pmu_register(struct platform_device *pdev,
-					enum arm_pmu_type type)
-{
-	if (type < 0 || type >= ARM_NUM_PMU_DEVICES) {
-		pr_warning("received registration request for unknown "
-				"device %d\n", type);
-		return -EINVAL;
-	}
-
-	if (pmu_devices[type]) {
-		pr_warning("rejecting duplicate registration of PMU device "
-			"type %d.", type);
-		return -ENOSPC;
-	}
-
-	pr_info("registered new PMU device of type %d\n", type);
-	pmu_devices[type] = pdev;
-	return 0;
-}
-
-#define OF_MATCH_PMU(_name, _type) {	\
-	.compatible = _name,		\
-	.data = (void *)_type,		\
-}
-
-#define OF_MATCH_CPU(name)	OF_MATCH_PMU(name, ARM_PMU_DEVICE_CPU)
-
-static struct of_device_id armpmu_of_device_ids[] = {
-	OF_MATCH_CPU("arm,cortex-a9-pmu"),
-	OF_MATCH_CPU("arm,cortex-a8-pmu"),
-	OF_MATCH_CPU("arm,arm1136-pmu"),
-	OF_MATCH_CPU("arm,arm1176-pmu"),
-	{},
-};
-
-#define PLAT_MATCH_PMU(_name, _type) {	\
-	.name		= _name,	\
-	.driver_data	= _type,	\
-}
-
-#define PLAT_MATCH_CPU(_name)	PLAT_MATCH_PMU(_name, ARM_PMU_DEVICE_CPU)
-
-static struct platform_device_id armpmu_plat_device_ids[] = {
-	PLAT_MATCH_CPU("arm-pmu"),
-	{},
-};
-
-enum arm_pmu_type armpmu_device_type(struct platform_device *pdev)
-{
-	const struct of_device_id	*of_id;
-	const struct platform_device_id *pdev_id;
-
-	/* provided by of_device_id table */
-	if (pdev->dev.of_node) {
-		of_id = of_match_device(armpmu_of_device_ids, &pdev->dev);
-		BUG_ON(!of_id);
-		return (enum arm_pmu_type)of_id->data;
-	}
-
-	/* Provided by platform_device_id table */
-	pdev_id = platform_get_device_id(pdev);
-	BUG_ON(!pdev_id);
-	return pdev_id->driver_data;
-}
-
-static int __devinit armpmu_device_probe(struct platform_device *pdev)
-{
-	return pmu_register(pdev, armpmu_device_type(pdev));
-}
-
-static struct platform_driver armpmu_driver = {
-	.driver		= {
-		.name	= "arm-pmu",
-		.of_match_table = armpmu_of_device_ids,
-	},
-	.probe		= armpmu_device_probe,
-	.id_table	= armpmu_plat_device_ids,
-};
-
-static int __init register_pmu_driver(void)
-{
-	return platform_driver_register(&armpmu_driver);
-}
-device_initcall(register_pmu_driver);
-
-struct platform_device *
-reserve_pmu(enum arm_pmu_type device)
-{
-	struct platform_device *pdev;
-
-	if (test_and_set_bit_lock(device, &pmu_lock)) {
-		pdev = ERR_PTR(-EBUSY);
-	} else if (pmu_devices[device] == NULL) {
-		clear_bit_unlock(device, &pmu_lock);
-		pdev = ERR_PTR(-ENODEV);
-	} else {
-		pdev = pmu_devices[device];
-	}
-
-	return pdev;
-}
-EXPORT_SYMBOL_GPL(reserve_pmu);
+/*
+ * PMU locking to ensure mutual exclusion between different subsystems.
+ */
+static unsigned long pmu_lock[BITS_TO_LONGS(ARM_NUM_PMU_DEVICES)];
 
 int
-release_pmu(enum arm_pmu_type device)
-{
-	if (WARN_ON(!pmu_devices[device]))
-		return -EINVAL;
-	clear_bit_unlock(device, &pmu_lock);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(release_pmu);
-
-static int
-set_irq_affinity(int irq,
-		 unsigned int cpu)
+reserve_pmu(enum arm_pmu_type type)
 {
-#ifdef CONFIG_SMP
-	int err = irq_set_affinity(irq, cpumask_of(cpu));
-	if (err)
-		pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
-			   irq, cpu);
-	return err;
-#else
-	return -EINVAL;
-#endif
-}
-
-static int
-init_cpu_pmu(void)
-{
-	int i, irqs, err = 0;
-	struct platform_device *pdev = pmu_devices[ARM_PMU_DEVICE_CPU];
-
-	if (!pdev)
-		return -ENODEV;
-
-	irqs = pdev->num_resources;
-
-	/*
-	 * If we have a single PMU interrupt that we can't shift, assume that
-	 * we're running on a uniprocessor machine and continue.
-	 */
-	if (irqs == 1 && !irq_can_set_affinity(platform_get_irq(pdev, 0)))
-		return 0;
-
-	for (i = 0; i < irqs; ++i) {
-		err = set_irq_affinity(platform_get_irq(pdev, i), i);
-		if (err)
-			break;
-	}
-
-	return err;
+	return test_and_set_bit_lock(type, pmu_lock) ? -EBUSY : 0;
 }
+EXPORT_SYMBOL_GPL(reserve_pmu);
 
-int
-init_pmu(enum arm_pmu_type device)
+void
+release_pmu(enum arm_pmu_type type)
 {
-	int err = 0;
-
-	switch (device) {
-	case ARM_PMU_DEVICE_CPU:
-		err = init_cpu_pmu();
-		break;
-	default:
-		pr_warning("attempt to initialise unknown device %d\n",
-				device);
-		err = -EINVAL;
-	}
-
-	return err;
+	clear_bit_unlock(type, pmu_lock);
 }
-EXPORT_SYMBOL_GPL(init_pmu);
diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S
index 9cf4cbf8f95b..d0cdedf4864d 100644
--- a/arch/arm/kernel/relocate_kernel.S
+++ b/arch/arm/kernel/relocate_kernel.S
@@ -57,7 +57,8 @@ relocate_new_kernel:
 	mov r0,#0
 	ldr r1,kexec_mach_type
 	ldr r2,kexec_boot_atags
-	mov pc,lr
+ ARM(	mov pc, lr	)
+ THUMB(	bx lr		)
 
 	.align
 
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 78d197d6ec34..6136144f8f8d 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -280,18 +280,19 @@ static void __init cacheid_init(void)
 	if (arch >= CPU_ARCH_ARMv6) {
 		if ((cachetype & (7 << 29)) == 4 << 29) {
 			/* ARMv7 register format */
+			arch = CPU_ARCH_ARMv7;
 			cacheid = CACHEID_VIPT_NONALIASING;
 			if ((cachetype & (3 << 14)) == 1 << 14)
 				cacheid |= CACHEID_ASID_TAGGED;
-			else if (cpu_has_aliasing_icache(CPU_ARCH_ARMv7))
-				cacheid |= CACHEID_VIPT_I_ALIASING;
-		} else if (cachetype & (1 << 23)) {
-			cacheid = CACHEID_VIPT_ALIASING;
 		} else {
-			cacheid = CACHEID_VIPT_NONALIASING;
-			if (cpu_has_aliasing_icache(CPU_ARCH_ARMv6))
-				cacheid |= CACHEID_VIPT_I_ALIASING;
+			arch = CPU_ARCH_ARMv6;
+			if (cachetype & (1 << 23))
+				cacheid = CACHEID_VIPT_ALIASING;
+			else
+				cacheid = CACHEID_VIPT_NONALIASING;
 		}
+		if (cpu_has_aliasing_icache(arch))
+			cacheid |= CACHEID_VIPT_I_ALIASING;
 	} else {
 		cacheid = CACHEID_VIVT;
 	}
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index dc902f2c6845..020e99c845e7 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -8,92 +8,61 @@
 	.text
 
 /*
- * Save CPU state for a suspend
- *  r1 = v:p offset
- *  r2 = suspend function arg0
- *  r3 = suspend function
+ * Save CPU state for a suspend.  This saves the CPU general purpose
+ * registers, and allocates space on the kernel stack to save the CPU
+ * specific registers and some other data for resume.
+ *  r0 = suspend function arg0
+ *  r1 = suspend function
  */
 ENTRY(__cpu_suspend)
 	stmfd	sp!, {r4 - r11, lr}
 #ifdef MULTI_CPU
 	ldr	r10, =processor
-	ldr	r5, [r10, #CPU_SLEEP_SIZE] @ size of CPU sleep state
-	ldr	ip, [r10, #CPU_DO_RESUME] @ virtual resume function
+	ldr	r4, [r10, #CPU_SLEEP_SIZE] @ size of CPU sleep state
 #else
-	ldr	r5, =cpu_suspend_size
-	ldr	ip, =cpu_do_resume
+	ldr	r4, =cpu_suspend_size
 #endif
-	mov	r6, sp			@ current virtual SP
-	sub	sp, sp, r5		@ allocate CPU state on stack
-	mov	r0, sp			@ save pointer to CPU save block
-	add	ip, ip, r1		@ convert resume fn to phys
-	stmfd	sp!, {r1, r6, ip}	@ save v:p, virt SP, phys resume fn
-	ldr	r5, =sleep_save_sp
-	add	r6, sp, r1		@ convert SP to phys
-	stmfd	sp!, {r2, r3}		@ save suspend func arg and pointer
+	mov	r5, sp			@ current virtual SP
+	add	r4, r4, #12		@ Space for pgd, virt sp, phys resume fn
+	sub	sp, sp, r4		@ allocate CPU state on stack
+	stmfd	sp!, {r0, r1}		@ save suspend func arg and pointer
+	add	r0, sp, #8		@ save pointer to save block
+	mov	r1, r4			@ size of save block
+	mov	r2, r5			@ virtual SP
+	ldr	r3, =sleep_save_sp
 #ifdef CONFIG_SMP
 	ALT_SMP(mrc p15, 0, lr, c0, c0, 5)
 	ALT_UP(mov lr, #0)
 	and	lr, lr, #15
-	str	r6, [r5, lr, lsl #2]	@ save phys SP
-#else
-	str	r6, [r5]		@ save phys SP
-#endif
-#ifdef MULTI_CPU
-	mov	lr, pc
-	ldr	pc, [r10, #CPU_DO_SUSPEND] @ save CPU state
-#else
-	bl	cpu_do_suspend
-#endif
-
-	@ flush data cache
-#ifdef MULTI_CACHE
-	ldr	r10, =cpu_cache
-	mov	lr, pc
-	ldr	pc, [r10, #CACHE_FLUSH_KERN_ALL]
-#else
-	bl	__cpuc_flush_kern_all
+	add	r3, r3, lr, lsl #2
 #endif
+	bl	__cpu_suspend_save
 	adr	lr, BSYM(cpu_suspend_abort)
 	ldmfd	sp!, {r0, pc}		@ call suspend fn
 ENDPROC(__cpu_suspend)
 	.ltorg
 
 cpu_suspend_abort:
-	ldmia	sp!, {r1 - r3}		@ pop v:p, virt SP, phys resume fn
+	ldmia	sp!, {r1 - r3}		@ pop phys pgd, virt SP, phys resume fn
+	teq	r0, #0
+	moveq	r0, #1			@ force non-zero value
 	mov	sp, r2
 	ldmfd	sp!, {r4 - r11, pc}
 ENDPROC(cpu_suspend_abort)
 
 /*
  * r0 = control register value
- * r1 = v:p offset (preserved by cpu_do_resume)
- * r2 = phys page table base
- * r3 = L1 section flags
  */
+	.align	5
 ENTRY(cpu_resume_mmu)
-	adr	r4, cpu_resume_turn_mmu_on
-	mov	r4, r4, lsr #20
-	orr	r3, r3, r4, lsl #20
-	ldr	r5, [r2, r4, lsl #2]	@ save old mapping
-	str	r3, [r2, r4, lsl #2]	@ setup 1:1 mapping for mmu code
-	sub	r2, r2, r1
 	ldr	r3, =cpu_resume_after_mmu
-	bic	r1, r0, #CR_C		@ ensure D-cache is disabled
-	b	cpu_resume_turn_mmu_on
-ENDPROC(cpu_resume_mmu)
-	.ltorg
-	.align	5
-cpu_resume_turn_mmu_on:
-	mcr	p15, 0, r1, c1, c0, 0	@ turn on MMU, I-cache, etc
-	mrc	p15, 0, r1, c0, c0, 0	@ read id reg
-	mov	r1, r1
-	mov	r1, r1
+	mcr	p15, 0, r0, c1, c0, 0	@ turn on MMU, I-cache, etc
+	mrc	p15, 0, r0, c0, c0, 0	@ read id reg
+	mov	r0, r0
+	mov	r0, r0
 	mov	pc, r3			@ jump to virtual address
-ENDPROC(cpu_resume_turn_mmu_on)
+ENDPROC(cpu_resume_mmu)
 cpu_resume_after_mmu:
-	str	r5, [r2, r4, lsl #2]	@ restore old mapping
-	mcr	p15, 0, r0, c1, c0, 0	@ turn on D-cache
 	bl	cpu_init		@ restore the und/abt/irq banked regs
 	mov	r0, #0			@ return zero on success
 	ldmfd	sp!, {r4 - r11, pc}
@@ -119,7 +88,7 @@ ENTRY(cpu_resume)
 	ldr	r0, sleep_save_sp	@ stack phys addr
 #endif
 	setmode	PSR_I_BIT | PSR_F_BIT | SVC_MODE, r1  @ set SVC, irqs off
-	@ load v:p, stack, resume fn
+	@ load phys pgd, stack, resume fn
   ARM(	ldmia	r0!, {r1, sp, pc}	)
 THUMB(	ldmia	r0!, {r1, r2, r3}	)
 THUMB(	mov	sp, r2			)
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index 2c277d40cee6..01c186222f3b 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -137,8 +137,8 @@ void __cpuinit twd_timer_setup(struct clock_event_device *clk)
 	clk->max_delta_ns = clockevent_delta2ns(0xffffffff, clk);
 	clk->min_delta_ns = clockevent_delta2ns(0xf, clk);
 
+	clockevents_register_device(clk);
+
 	/* Make sure our local interrupt controller has this enabled */
 	gic_enable_ppi(clk->irq);
-
-	clockevents_register_device(clk);
 }
diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
new file mode 100644
index 000000000000..93a22d282c16
--- /dev/null
+++ b/arch/arm/kernel/suspend.c
@@ -0,0 +1,72 @@
+#include <linux/init.h>
+
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/memory.h>
+#include <asm/suspend.h>
+#include <asm/tlbflush.h>
+
+static pgd_t *suspend_pgd;
+
+extern int __cpu_suspend(unsigned long, int (*)(unsigned long));
+extern void cpu_resume_mmu(void);
+
+/*
+ * This is called by __cpu_suspend() to save the state, and do whatever
+ * flushing is required to ensure that when the CPU goes to sleep we have
+ * the necessary data available when the caches are not searched.
+ */
+void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr)
+{
+	*save_ptr = virt_to_phys(ptr);
+
+	/* This must correspond to the LDM in cpu_resume() assembly */
+	*ptr++ = virt_to_phys(suspend_pgd);
+	*ptr++ = sp;
+	*ptr++ = virt_to_phys(cpu_do_resume);
+
+	cpu_do_suspend(ptr);
+
+	flush_cache_all();
+	outer_clean_range(*save_ptr, *save_ptr + ptrsz);
+	outer_clean_range(virt_to_phys(save_ptr),
+			  virt_to_phys(save_ptr) + sizeof(*save_ptr));
+}
+
+/*
+ * Hide the first two arguments to __cpu_suspend - these are an implementation
+ * detail which platform code shouldn't have to know about.
+ */
+int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
+{
+	struct mm_struct *mm = current->active_mm;
+	int ret;
+
+	if (!suspend_pgd)
+		return -EINVAL;
+
+	/*
+	 * Provide a temporary page table with an identity mapping for
+	 * the MMU-enable code, required for resuming.  On successful
+	 * resume (indicated by a zero return code), we need to switch
+	 * back to the correct page tables.
+	 */
+	ret = __cpu_suspend(arg, fn);
+	if (ret == 0) {
+		cpu_switch_mm(mm->pgd, mm);
+		local_flush_tlb_all();
+	}
+
+	return ret;
+}
+
+static int __init cpu_suspend_init(void)
+{
+	suspend_pgd = pgd_alloc(&init_mm);
+	if (suspend_pgd) {
+		unsigned long addr = virt_to_phys(cpu_resume_mmu);
+		identity_mapping_add(suspend_pgd, addr, addr + SECTION_SIZE);
+	}
+	return suspend_pgd ? 0 : -ENOMEM;
+}
+core_initcall(cpu_suspend_init);
diff --git a/arch/arm/mach-at91/at91sam9261.c b/arch/arm/mach-at91/at91sam9261.c
index d522b47e30b5..6c8e3b5f669f 100644
--- a/arch/arm/mach-at91/at91sam9261.c
+++ b/arch/arm/mach-at91/at91sam9261.c
@@ -157,7 +157,7 @@ static struct clk_lookup periph_clocks_lookups[] = {
 	CLKDEV_CON_DEV_ID("spi_clk", "atmel_spi.1", &spi1_clk),
 	CLKDEV_CON_DEV_ID("t0_clk", "atmel_tcb.0", &tc0_clk),
 	CLKDEV_CON_DEV_ID("t1_clk", "atmel_tcb.0", &tc1_clk),
-	CLKDEV_CON_DEV_ID("t2_clk", "atmel_tcb.0", &tc1_clk),
+	CLKDEV_CON_DEV_ID("t2_clk", "atmel_tcb.0", &tc2_clk),
 	CLKDEV_CON_DEV_ID("pclk", "ssc.0", &ssc0_clk),
 	CLKDEV_CON_DEV_ID("pclk", "ssc.1", &ssc1_clk),
 	CLKDEV_CON_DEV_ID("pclk", "ssc.2", &ssc2_clk),
diff --git a/arch/arm/mach-cns3xxx/include/mach/entry-macro.S b/arch/arm/mach-cns3xxx/include/mach/entry-macro.S
index 6bd83ed90afe..d87bfc397d39 100644
--- a/arch/arm/mach-cns3xxx/include/mach/entry-macro.S
+++ b/arch/arm/mach-cns3xxx/include/mach/entry-macro.S
@@ -8,7 +8,6 @@
  * published by the Free Software Foundation.
  */
 
-#include <mach/hardware.h>
 #include <asm/hardware/entry-macro-gic.S>
 
 		.macro	disable_fiq
diff --git a/arch/arm/mach-cns3xxx/include/mach/system.h b/arch/arm/mach-cns3xxx/include/mach/system.h
index 58bb03ae3cf4..4f16c9b79f78 100644
--- a/arch/arm/mach-cns3xxx/include/mach/system.h
+++ b/arch/arm/mach-cns3xxx/include/mach/system.h
@@ -13,7 +13,6 @@
 
 #include <linux/io.h>
 #include <asm/proc-fns.h>
-#include <mach/hardware.h>
 
 static inline void arch_idle(void)
 {
diff --git a/arch/arm/mach-cns3xxx/include/mach/uncompress.h b/arch/arm/mach-cns3xxx/include/mach/uncompress.h
index de8ead9b91f7..a91b6058ab4f 100644
--- a/arch/arm/mach-cns3xxx/include/mach/uncompress.h
+++ b/arch/arm/mach-cns3xxx/include/mach/uncompress.h
@@ -8,7 +8,6 @@
  */
 
 #include <asm/mach-types.h>
-#include <mach/hardware.h>
 #include <mach/cns3xxx.h>
 
 #define AMBA_UART_DR(base)	(*(volatile unsigned char *)((base) + 0x00))
diff --git a/arch/arm/mach-cns3xxx/pcie.c b/arch/arm/mach-cns3xxx/pcie.c
index 06fd25d70aec..0f8fca48a5ed 100644
--- a/arch/arm/mach-cns3xxx/pcie.c
+++ b/arch/arm/mach-cns3xxx/pcie.c
@@ -49,7 +49,7 @@ static struct cns3xxx_pcie *sysdata_to_cnspci(void *sysdata)
 	return &cns3xxx_pcie[root->domain];
 }
 
-static struct cns3xxx_pcie *pdev_to_cnspci(struct pci_dev *dev)
+static struct cns3xxx_pcie *pdev_to_cnspci(const struct pci_dev *dev)
 {
 	return sysdata_to_cnspci(dev->sysdata);
 }
diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c
index 85a2c7e1e684..6e41cb5baeb4 100644
--- a/arch/arm/mach-davinci/board-da850-evm.c
+++ b/arch/arm/mach-davinci/board-da850-evm.c
@@ -115,6 +115,32 @@ static struct spi_board_info da850evm_spi_info[] = {
 	},
 };
 
+#ifdef CONFIG_MTD
+static void da850_evm_m25p80_notify_add(struct mtd_info *mtd)
+{
+	char *mac_addr = davinci_soc_info.emac_pdata->mac_addr;
+	size_t retlen;
+
+	if (!strcmp(mtd->name, "MAC-Address")) {
+		mtd->read(mtd, 0, ETH_ALEN, &retlen, mac_addr);
+		if (retlen == ETH_ALEN)
+			pr_info("Read MAC addr from SPI Flash: %pM\n",
+				mac_addr);
+	}
+}
+
+static struct mtd_notifier da850evm_spi_notifier = {
+	.add	= da850_evm_m25p80_notify_add,
+};
+
+static void da850_evm_setup_mac_addr(void)
+{
+	register_mtd_user(&da850evm_spi_notifier);
+}
+#else
+static void da850_evm_setup_mac_addr(void) { }
+#endif
+
 static struct mtd_partition da850_evm_norflash_partition[] = {
 	{
 		.name           = "bootloaders + env",
@@ -1244,6 +1270,8 @@ static __init void da850_evm_init(void)
 	if (ret)
 		pr_warning("da850_evm_init: sata registration failed: %d\n",
 				ret);
+
+	da850_evm_setup_mac_addr();
 }
 
 #ifdef CONFIG_SERIAL_8250_CONSOLE
diff --git a/arch/arm/mach-davinci/include/mach/psc.h b/arch/arm/mach-davinci/include/mach/psc.h
index 47fd0bc3d3e7..fa59c097223d 100644
--- a/arch/arm/mach-davinci/include/mach/psc.h
+++ b/arch/arm/mach-davinci/include/mach/psc.h
@@ -243,7 +243,7 @@
 #define PSC_STATE_DISABLE	2
 #define PSC_STATE_ENABLE	3
 
-#define MDSTAT_STATE_MASK	0x1f
+#define MDSTAT_STATE_MASK	0x3f
 #define MDCTL_FORCE		BIT(31)
 
 #ifndef __ASSEMBLER__
diff --git a/arch/arm/mach-davinci/sleep.S b/arch/arm/mach-davinci/sleep.S
index 574028995545..d4e9316ecacb 100644
--- a/arch/arm/mach-davinci/sleep.S
+++ b/arch/arm/mach-davinci/sleep.S
@@ -217,7 +217,11 @@ ddr2clk_stop_done:
 ENDPROC(davinci_ddr_psc_config)
 
 CACHE_FLUSH:
-	.word	arm926_flush_kern_cache_all
+#ifdef CONFIG_CPU_V6
+	.word	v6_flush_kern_cache_all
+#else
+	.word   arm926_flush_kern_cache_all
+#endif
 
 ENTRY(davinci_cpu_suspend_sz)
 	.word	. - davinci_cpu_suspend
diff --git a/arch/arm/mach-ep93xx/include/mach/ts72xx.h b/arch/arm/mach-ep93xx/include/mach/ts72xx.h
index 0eabec62cd9d..f1397a13e76b 100644
--- a/arch/arm/mach-ep93xx/include/mach/ts72xx.h
+++ b/arch/arm/mach-ep93xx/include/mach/ts72xx.h
@@ -6,7 +6,7 @@
  * TS72xx memory map:
  *
  * virt		phys		size
- * febff000	22000000	4K	model number register
+ * febff000	22000000	4K	model number register (bits 0-2)
  * febfe000	22400000	4K	options register
  * febfd000	22800000	4K	options register #2
  * febf9000	10800000	4K	TS-5620 RTC index register
@@ -20,6 +20,9 @@
 #define TS72XX_MODEL_TS7200		0x00
 #define TS72XX_MODEL_TS7250		0x01
 #define TS72XX_MODEL_TS7260		0x02
+#define TS72XX_MODEL_TS7300		0x03
+#define TS72XX_MODEL_TS7400		0x04
+#define TS72XX_MODEL_MASK		0x07
 
 
 #define TS72XX_OPTIONS_PHYS_BASE	0x22400000
@@ -51,19 +54,34 @@
 
 #ifndef __ASSEMBLY__
 
+static inline int ts72xx_model(void)
+{
+	return __raw_readb(TS72XX_MODEL_VIRT_BASE) & TS72XX_MODEL_MASK;
+}
+
 static inline int board_is_ts7200(void)
 {
-	return __raw_readb(TS72XX_MODEL_VIRT_BASE) == TS72XX_MODEL_TS7200;
+	return ts72xx_model() == TS72XX_MODEL_TS7200;
 }
 
 static inline int board_is_ts7250(void)
 {
-	return __raw_readb(TS72XX_MODEL_VIRT_BASE) == TS72XX_MODEL_TS7250;
+	return ts72xx_model() == TS72XX_MODEL_TS7250;
 }
 
 static inline int board_is_ts7260(void)
 {
-	return __raw_readb(TS72XX_MODEL_VIRT_BASE) == TS72XX_MODEL_TS7260;
+	return ts72xx_model() == TS72XX_MODEL_TS7260;
+}
+
+static inline int board_is_ts7300(void)
+{
+	return ts72xx_model()  == TS72XX_MODEL_TS7300;
+}
+
+static inline int board_is_ts7400(void)
+{
+	return ts72xx_model() == TS72XX_MODEL_TS7400;
 }
 
 static inline int is_max197_installed(void)
diff --git a/arch/arm/mach-exynos4/clock.c b/arch/arm/mach-exynos4/clock.c
index 851dea018578..1561b036a9bf 100644
--- a/arch/arm/mach-exynos4/clock.c
+++ b/arch/arm/mach-exynos4/clock.c
@@ -520,7 +520,7 @@ static struct clk init_clocks_off[] = {
 		.ctrlbit	= (1 << 21),
 	}, {
 		.name		= "ac97",
-		.id		= -1,
+		.devname	= "samsung-ac97",
 		.enable		= exynos4_clk_ip_peril_ctrl,
 		.ctrlbit	= (1 << 27),
 	}, {
diff --git a/arch/arm/mach-exynos4/cpu.c b/arch/arm/mach-exynos4/cpu.c
index 2d8a40c9e6e5..746d6fc6d397 100644
--- a/arch/arm/mach-exynos4/cpu.c
+++ b/arch/arm/mach-exynos4/cpu.c
@@ -24,12 +24,13 @@
 #include <plat/exynos4.h>
 #include <plat/adc-core.h>
 #include <plat/sdhci.h>
-#include <plat/devs.h>
 #include <plat/fb-core.h>
 #include <plat/fimc-core.h>
 #include <plat/iic-core.h>
+#include <plat/reset.h>
 
 #include <mach/regs-irq.h>
+#include <mach/regs-pmu.h>
 
 extern int combiner_init(unsigned int combiner_nr, void __iomem *base,
 			 unsigned int irq_start);
@@ -128,6 +129,11 @@ static void exynos4_idle(void)
 	local_irq_enable();
 }
 
+static void exynos4_sw_reset(void)
+{
+	__raw_writel(0x1, S5P_SWRESET);
+}
+
 /*
  * exynos4_map_io
  *
@@ -241,5 +247,8 @@ int __init exynos4_init(void)
 	/* set idle function */
 	pm_idle = exynos4_idle;
 
+	/* set sw_reset function */
+	s5p_reset_hook = exynos4_sw_reset;
+
 	return sysdev_register(&exynos4_sysdev);
 }
diff --git a/arch/arm/mach-exynos4/include/mach/irqs.h b/arch/arm/mach-exynos4/include/mach/irqs.h
index 934d2a493982..f8952f8f3757 100644
--- a/arch/arm/mach-exynos4/include/mach/irqs.h
+++ b/arch/arm/mach-exynos4/include/mach/irqs.h
@@ -80,9 +80,8 @@
 #define IRQ_HSMMC3		IRQ_SPI(76)
 #define IRQ_DWMCI		IRQ_SPI(77)
 
-#define IRQ_MIPICSI0		IRQ_SPI(78)
-
-#define IRQ_MIPICSI1		IRQ_SPI(80)
+#define IRQ_MIPI_CSIS0		IRQ_SPI(78)
+#define IRQ_MIPI_CSIS1		IRQ_SPI(80)
 
 #define IRQ_ONENAND_AUDI	IRQ_SPI(82)
 #define IRQ_ROTATOR		IRQ_SPI(83)
diff --git a/arch/arm/mach-exynos4/include/mach/regs-pmu.h b/arch/arm/mach-exynos4/include/mach/regs-pmu.h
index fa49bbb8e7b0..cdf9b47c303c 100644
--- a/arch/arm/mach-exynos4/include/mach/regs-pmu.h
+++ b/arch/arm/mach-exynos4/include/mach/regs-pmu.h
@@ -29,6 +29,8 @@
 #define S5P_USE_STANDBY_WFE1			(1 << 25)
 #define S5P_USE_MASK				((0x3 << 16) | (0x3 << 24))
 
+#define S5P_SWRESET				S5P_PMUREG(0x0400)
+
 #define S5P_WAKEUP_STAT				S5P_PMUREG(0x0600)
 #define S5P_EINT_WAKEUP_MASK			S5P_PMUREG(0x0604)
 #define S5P_WAKEUP_MASK				S5P_PMUREG(0x0608)
diff --git a/arch/arm/mach-exynos4/irq-eint.c b/arch/arm/mach-exynos4/irq-eint.c
index 9d87d2ac7f68..badb8c66fc9b 100644
--- a/arch/arm/mach-exynos4/irq-eint.c
+++ b/arch/arm/mach-exynos4/irq-eint.c
@@ -23,6 +23,8 @@
 
 #include <mach/regs-gpio.h>
 
+#include <asm/mach/irq.h>
+
 static DEFINE_SPINLOCK(eint_lock);
 
 static unsigned int eint0_15_data[16];
@@ -184,8 +186,11 @@ static inline void exynos4_irq_demux_eint(unsigned int start)
 
 static void exynos4_irq_demux_eint16_31(unsigned int irq, struct irq_desc *desc)
 {
+	struct irq_chip *chip = irq_get_chip(irq);
+	chained_irq_enter(chip, desc);
 	exynos4_irq_demux_eint(IRQ_EINT(16));
 	exynos4_irq_demux_eint(IRQ_EINT(24));
+	chained_irq_exit(chip, desc);
 }
 
 static void exynos4_irq_eint0_15(unsigned int irq, struct irq_desc *desc)
@@ -193,6 +198,7 @@ static void exynos4_irq_eint0_15(unsigned int irq, struct irq_desc *desc)
 	u32 *irq_data = irq_get_handler_data(irq);
 	struct irq_chip *chip = irq_get_chip(irq);
 
+	chained_irq_enter(chip, desc);
 	chip->irq_mask(&desc->irq_data);
 
 	if (chip->irq_ack)
@@ -201,6 +207,7 @@ static void exynos4_irq_eint0_15(unsigned int irq, struct irq_desc *desc)
 	generic_handle_irq(*irq_data);
 
 	chip->irq_unmask(&desc->irq_data);
+	chained_irq_exit(chip, desc);
 }
 
 int __init exynos4_init_irq_eint(void)
diff --git a/arch/arm/mach-exynos4/mach-universal_c210.c b/arch/arm/mach-exynos4/mach-universal_c210.c
index d7ec84d586f2..2aac6f755c8e 100644
--- a/arch/arm/mach-exynos4/mach-universal_c210.c
+++ b/arch/arm/mach-exynos4/mach-universal_c210.c
@@ -79,7 +79,7 @@ static struct s3c2410_uartcfg universal_uartcfgs[] __initdata = {
 };
 
 static struct regulator_consumer_supply max8952_consumer =
-	REGULATOR_SUPPLY("vddarm", NULL);
+	REGULATOR_SUPPLY("vdd_arm", NULL);
 
 static struct max8952_platform_data universal_max8952_pdata __initdata = {
 	.gpio_vid0	= EXYNOS4_GPX0(3),
@@ -105,7 +105,7 @@ static struct max8952_platform_data universal_max8952_pdata __initdata = {
 };
 
 static struct regulator_consumer_supply lp3974_buck1_consumer =
-	REGULATOR_SUPPLY("vddint", NULL);
+	REGULATOR_SUPPLY("vdd_int", NULL);
 
 static struct regulator_consumer_supply lp3974_buck2_consumer =
 	REGULATOR_SUPPLY("vddg3d", NULL);
diff --git a/arch/arm/mach-exynos4/setup-usb-phy.c b/arch/arm/mach-exynos4/setup-usb-phy.c
index 0883c1b824b9..39aca045f660 100644
--- a/arch/arm/mach-exynos4/setup-usb-phy.c
+++ b/arch/arm/mach-exynos4/setup-usb-phy.c
@@ -82,7 +82,7 @@ static int exynos4_usb_phy1_init(struct platform_device *pdev)
 
 	rstcon &= ~(HOST_LINK_PORT_SWRST_MASK | PHY1_SWRST_MASK);
 	writel(rstcon, EXYNOS4_RSTCON);
-	udelay(50);
+	udelay(80);
 
 	clk_disable(otg_clk);
 	clk_put(otg_clk);
diff --git a/arch/arm/mach-footbridge/Kconfig b/arch/arm/mach-footbridge/Kconfig
index dc26fff22cf0..c8e7afcf14ec 100644
--- a/arch/arm/mach-footbridge/Kconfig
+++ b/arch/arm/mach-footbridge/Kconfig
@@ -62,6 +62,7 @@ config ARCH_EBSA285_HOST
 config ARCH_NETWINDER
 	bool "NetWinder"
 	select CLKSRC_I8253
+	select CLKEVT_I8253
 	select FOOTBRIDGE_HOST
 	select ISA
 	select ISA_DMA
diff --git a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c
index 1331fff51ae2..18c32a5541d9 100644
--- a/arch/arm/mach-footbridge/dc21285.c
+++ b/arch/arm/mach-footbridge/dc21285.c
@@ -18,6 +18,7 @@
 #include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/spinlock.h>
+#include <video/vga.h>
 
 #include <asm/irq.h>
 #include <asm/system.h>
diff --git a/arch/arm/mach-imx/mach-cpuimx27.c b/arch/arm/mach-imx/mach-cpuimx27.c
index 881add0fbe5b..b1ec2cf53bb0 100644
--- a/arch/arm/mach-imx/mach-cpuimx27.c
+++ b/arch/arm/mach-imx/mach-cpuimx27.c
@@ -310,7 +310,7 @@ static struct sys_timer eukrea_cpuimx27_timer = {
 	.init = eukrea_cpuimx27_timer_init,
 };
 
-MACHINE_START(CPUIMX27, "EUKREA CPUIMX27")
+MACHINE_START(EUKREA_CPUIMX27, "EUKREA CPUIMX27")
 	.atag_offset = 0x100,
 	.map_io = mx27_map_io,
 	.init_early = imx27_init_early,
diff --git a/arch/arm/mach-imx/mach-cpuimx35.c b/arch/arm/mach-imx/mach-cpuimx35.c
index 10b89139da48..470b654b0e6e 100644
--- a/arch/arm/mach-imx/mach-cpuimx35.c
+++ b/arch/arm/mach-imx/mach-cpuimx35.c
@@ -192,7 +192,7 @@ struct sys_timer eukrea_cpuimx35_timer = {
 	.init	= eukrea_cpuimx35_timer_init,
 };
 
-MACHINE_START(EUKREA_CPUIMX35, "Eukrea CPUIMX35")
+MACHINE_START(EUKREA_CPUIMX35SD, "Eukrea CPUIMX35")
 	/* Maintainer: Eukrea Electromatique */
 	.atag_offset = 0x100,
 	.map_io = mx35_map_io,
diff --git a/arch/arm/mach-imx/mach-eukrea_cpuimx25.c b/arch/arm/mach-imx/mach-eukrea_cpuimx25.c
index d8699b54268d..9163318e95a2 100644
--- a/arch/arm/mach-imx/mach-eukrea_cpuimx25.c
+++ b/arch/arm/mach-imx/mach-eukrea_cpuimx25.c
@@ -161,7 +161,7 @@ static struct sys_timer eukrea_cpuimx25_timer = {
 	.init   = eukrea_cpuimx25_timer_init,
 };
 
-MACHINE_START(EUKREA_CPUIMX25, "Eukrea CPUIMX25")
+MACHINE_START(EUKREA_CPUIMX25SD, "Eukrea CPUIMX25")
 	/* Maintainer: Eukrea Electromatique */
 	.atag_offset = 0x100,
 	.map_io = mx25_map_io,
diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c
index eaa95bdadcac..b19ae1805569 100644
--- a/arch/arm/mach-integrator/integrator_ap.c
+++ b/arch/arm/mach-integrator/integrator_ap.c
@@ -337,15 +337,15 @@ static unsigned long timer_reload;
 static void integrator_clocksource_init(u32 khz)
 {
 	void __iomem *base = (void __iomem *)TIMER2_VA_BASE;
-	u32 ctrl = TIMER_CTRL_ENABLE;
+	u32 ctrl = TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC;
 
 	if (khz >= 1500) {
 		khz /= 16;
-		ctrl = TIMER_CTRL_DIV16;
+		ctrl |= TIMER_CTRL_DIV16;
 	}
 
-	writel(ctrl, base + TIMER_CTRL);
 	writel(0xffff, base + TIMER_LOAD);
+	writel(ctrl, base + TIMER_CTRL);
 
 	clocksource_mmio_init(base + TIMER_VALUE, "timer2",
 		khz * 1000, 200, 16, clocksource_mmio_readl_down);
diff --git a/arch/arm/mach-omap2/clock3xxx_data.c b/arch/arm/mach-omap2/clock3xxx_data.c
index ffd55b1c4396..b9b844683147 100644
--- a/arch/arm/mach-omap2/clock3xxx_data.c
+++ b/arch/arm/mach-omap2/clock3xxx_data.c
@@ -3078,6 +3078,7 @@ static struct clk gpt12_fck = {
 	.name		= "gpt12_fck",
 	.ops		= &clkops_null,
 	.parent		= &secure_32k_fck,
+	.clkdm_name	= "wkup_clkdm",
 	.recalc		= &followparent_recalc,
 };
 
@@ -3085,6 +3086,7 @@ static struct clk wdt1_fck = {
 	.name		= "wdt1_fck",
 	.ops		= &clkops_null,
 	.parent		= &secure_32k_fck,
+	.clkdm_name	= "wkup_clkdm",
 	.recalc		= &followparent_recalc,
 };
 
diff --git a/arch/arm/mach-omap2/clock44xx_data.c b/arch/arm/mach-omap2/clock44xx_data.c
index 2af0e3f00ce1..c0b6fbda3408 100644
--- a/arch/arm/mach-omap2/clock44xx_data.c
+++ b/arch/arm/mach-omap2/clock44xx_data.c
@@ -3376,10 +3376,18 @@ int __init omap4xxx_clk_init(void)
 	} else if (cpu_is_omap446x()) {
 		cpu_mask = RATE_IN_4460;
 		cpu_clkflg = CK_446X;
+	} else {
+		return 0;
 	}
 
 	clk_init(&omap2_clk_functions);
-	omap2_clk_disable_clkdm_control();
+
+	/*
+	 * Must stay commented until all OMAP SoC drivers are
+	 * converted to runtime PM, or drivers may start crashing
+	 *
+	 * omap2_clk_disable_clkdm_control();
+	 */
 
 	for (c = omap44xx_clks; c < omap44xx_clks + ARRAY_SIZE(omap44xx_clks);
 									  c++)
diff --git a/arch/arm/mach-omap2/clockdomain.c b/arch/arm/mach-omap2/clockdomain.c
index ab7db083f97f..8f0890685d7b 100644
--- a/arch/arm/mach-omap2/clockdomain.c
+++ b/arch/arm/mach-omap2/clockdomain.c
@@ -747,6 +747,7 @@ int clkdm_wakeup(struct clockdomain *clkdm)
 	spin_lock_irqsave(&clkdm->lock, flags);
 	clkdm->_flags &= ~_CLKDM_FLAG_HWSUP_ENABLED;
 	ret = arch_clkdm->clkdm_wakeup(clkdm);
+	ret |= pwrdm_state_switch(clkdm->pwrdm.ptr);
 	spin_unlock_irqrestore(&clkdm->lock, flags);
 	return ret;
 }
@@ -818,6 +819,7 @@ void clkdm_deny_idle(struct clockdomain *clkdm)
 	spin_lock_irqsave(&clkdm->lock, flags);
 	clkdm->_flags &= ~_CLKDM_FLAG_HWSUP_ENABLED;
 	arch_clkdm->clkdm_deny_idle(clkdm);
+	pwrdm_state_switch(clkdm->pwrdm.ptr);
 	spin_unlock_irqrestore(&clkdm->lock, flags);
 }
 
diff --git a/arch/arm/mach-omap2/omap_hwmod_2430_data.c b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
index 16743c7d6e8e..408193d8e044 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2430_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
@@ -192,6 +192,7 @@ static struct omap_hwmod_addr_space omap2430_usbhsotg_addrs[] = {
 		.pa_end		= OMAP243X_HS_BASE + SZ_4K - 1,
 		.flags		= ADDR_TYPE_RT
 	},
+	{ }
 };
 
 /*  l4_core ->usbhsotg  interface */
diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c
index 3feb35911a32..472bf22d5e84 100644
--- a/arch/arm/mach-omap2/pm.c
+++ b/arch/arm/mach-omap2/pm.c
@@ -130,7 +130,6 @@ int omap_set_pwrdm_state(struct powerdomain *pwrdm, u32 state)
 		} else {
 			hwsup = clkdm_in_hwsup(pwrdm->pwrdm_clkdms[0]);
 			clkdm_wakeup(pwrdm->pwrdm_clkdms[0]);
-			pwrdm_wait_transition(pwrdm);
 			sleep_switch = FORCEWAKEUP_SWITCH;
 		}
 	}
@@ -156,7 +155,6 @@ int omap_set_pwrdm_state(struct powerdomain *pwrdm, u32 state)
 		return ret;
 	}
 
-	pwrdm_wait_transition(pwrdm);
 	pwrdm_state_switch(pwrdm);
 err:
 	return ret;
diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c
index 9af08473bf10..ef71fdd40fc4 100644
--- a/arch/arm/mach-omap2/powerdomain.c
+++ b/arch/arm/mach-omap2/powerdomain.c
@@ -195,28 +195,35 @@ static int _pwrdm_post_transition_cb(struct powerdomain *pwrdm, void *unused)
 
 /**
  * pwrdm_init - set up the powerdomain layer
- * @pwrdm_list: array of struct powerdomain pointers to register
+ * @pwrdms: array of struct powerdomain pointers to register
  * @custom_funcs: func pointers for arch specific implementations
  *
- * Loop through the array of powerdomains @pwrdm_list, registering all
- * that are available on the current CPU. If pwrdm_list is supplied
- * and not null, all of the referenced powerdomains will be
- * registered.  No return value.  XXX pwrdm_list is not really a
- * "list"; it is an array.  Rename appropriately.
+ * Loop through the array of powerdomains @pwrdms, registering all
+ * that are available on the current CPU.  Also, program all
+ * powerdomain target state as ON; this is to prevent domains from
+ * hitting low power states (if bootloader has target states set to
+ * something other than ON) and potentially even losing context while
+ * PM is not fully initialized.  The PM late init code can then program
+ * the desired target state for all the power domains.  No return
+ * value.
  */
-void pwrdm_init(struct powerdomain **pwrdm_list, struct pwrdm_ops *custom_funcs)
+void pwrdm_init(struct powerdomain **pwrdms, struct pwrdm_ops *custom_funcs)
 {
 	struct powerdomain **p = NULL;
+	struct powerdomain *temp_p;
 
 	if (!custom_funcs)
 		WARN(1, "powerdomain: No custom pwrdm functions registered\n");
 	else
 		arch_pwrdm = custom_funcs;
 
-	if (pwrdm_list) {
-		for (p = pwrdm_list; *p; p++)
+	if (pwrdms) {
+		for (p = pwrdms; *p; p++)
 			_pwrdm_register(*p);
 	}
+
+	list_for_each_entry(temp_p, &pwrdm_list, node)
+		pwrdm_set_next_pwrst(temp_p, PWRDM_POWER_ON);
 }
 
 /**
diff --git a/arch/arm/mach-orion5x/dns323-setup.c b/arch/arm/mach-orion5x/dns323-setup.c
index e25f60f5db8f..9e5c1663fc4f 100644
--- a/arch/arm/mach-orion5x/dns323-setup.c
+++ b/arch/arm/mach-orion5x/dns323-setup.c
@@ -77,7 +77,7 @@ static int __init dns323_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 	/*
 	 * Check for devices with hard-wired IRQs.
 	 */
-	irq = orion5x_pci_map_irq(const dev, slot, pin);
+	irq = orion5x_pci_map_irq(dev, slot, pin);
 	if (irq != -1)
 		return irq;
 
diff --git a/arch/arm/mach-orion5x/pci.c b/arch/arm/mach-orion5x/pci.c
index 28b8760ab9fa..bc4a920e26ee 100644
--- a/arch/arm/mach-orion5x/pci.c
+++ b/arch/arm/mach-orion5x/pci.c
@@ -14,6 +14,7 @@
 #include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/mbus.h>
+#include <video/vga.h>
 #include <asm/irq.h>
 #include <asm/mach/pci.h>
 #include <plat/pcie.h>
diff --git a/arch/arm/mach-prima2/clock.c b/arch/arm/mach-prima2/clock.c
index f9a2aaf63f71..615a4e75ceab 100644
--- a/arch/arm/mach-prima2/clock.c
+++ b/arch/arm/mach-prima2/clock.c
@@ -481,6 +481,7 @@ static void __init sirfsoc_clk_init(void)
 
 static struct of_device_id clkc_ids[] = {
 	{ .compatible = "sirf,prima2-clkc" },
+	{},
 };
 
 void __init sirfsoc_of_clk_init(void)
diff --git a/arch/arm/mach-prima2/irq.c b/arch/arm/mach-prima2/irq.c
index c3404cbb6ff7..7af254d046ba 100644
--- a/arch/arm/mach-prima2/irq.c
+++ b/arch/arm/mach-prima2/irq.c
@@ -51,6 +51,7 @@ static __init void sirfsoc_irq_init(void)
 
 static struct of_device_id intc_ids[]  = {
 	{ .compatible = "sirf,prima2-intc" },
+	{},
 };
 
 void __init sirfsoc_of_irq_init(void)
diff --git a/arch/arm/mach-prima2/rstc.c b/arch/arm/mach-prima2/rstc.c
index d074786e83d4..492cfa8d2610 100644
--- a/arch/arm/mach-prima2/rstc.c
+++ b/arch/arm/mach-prima2/rstc.c
@@ -19,6 +19,7 @@ static DEFINE_MUTEX(rstc_lock);
 
 static struct of_device_id rstc_ids[]  = {
 	{ .compatible = "sirf,prima2-rstc" },
+	{},
 };
 
 static int __init sirfsoc_of_rstc_init(void)
diff --git a/arch/arm/mach-prima2/timer.c b/arch/arm/mach-prima2/timer.c
index 44027f34a88a..ed7ec48d11da 100644
--- a/arch/arm/mach-prima2/timer.c
+++ b/arch/arm/mach-prima2/timer.c
@@ -190,6 +190,7 @@ static void __init sirfsoc_timer_init(void)
 
 static struct of_device_id timer_ids[] = {
 	{ .compatible = "sirf,prima2-tick" },
+	{},
 };
 
 static void __init sirfsoc_of_timer_map(void)
diff --git a/arch/arm/mach-realview/include/mach/system.h b/arch/arm/mach-realview/include/mach/system.h
index a30f2e3ec178..6657ff231161 100644
--- a/arch/arm/mach-realview/include/mach/system.h
+++ b/arch/arm/mach-realview/include/mach/system.h
@@ -44,6 +44,7 @@ static inline void arch_reset(char mode, const char *cmd)
 	 */
 	if (realview_reset)
 		realview_reset(mode);
+	dsb();
 }
 
 #endif
diff --git a/arch/arm/mach-s3c64xx/pm.c b/arch/arm/mach-s3c64xx/pm.c
index 8bad64370689..055e2858b0dd 100644
--- a/arch/arm/mach-s3c64xx/pm.c
+++ b/arch/arm/mach-s3c64xx/pm.c
@@ -16,6 +16,7 @@
 #include <linux/suspend.h>
 #include <linux/serial_core.h>
 #include <linux/io.h>
+#include <linux/gpio.h>
 
 #include <mach/map.h>
 #include <mach/irqs.h>
diff --git a/arch/arm/mach-s5p64x0/irq-eint.c b/arch/arm/mach-s5p64x0/irq-eint.c
index 69ed4545112b..fe7380f5c3cd 100644
--- a/arch/arm/mach-s5p64x0/irq-eint.c
+++ b/arch/arm/mach-s5p64x0/irq-eint.c
@@ -129,7 +129,7 @@ static int s5p64x0_alloc_gc(void)
 	}
 
 	ct = gc->chip_types;
-	ct->chip.irq_ack = irq_gc_ack;
+	ct->chip.irq_ack = irq_gc_ack_set_bit;
 	ct->chip.irq_mask = irq_gc_mask_set_bit;
 	ct->chip.irq_unmask = irq_gc_mask_clr_bit;
 	ct->chip.irq_set_type = s5p64x0_irq_eint_set_type;
diff --git a/arch/arm/mach-s5pv210/pm.c b/arch/arm/mach-s5pv210/pm.c
index 309e388a8a83..f149d278377b 100644
--- a/arch/arm/mach-s5pv210/pm.c
+++ b/arch/arm/mach-s5pv210/pm.c
@@ -88,7 +88,7 @@ static struct sleep_save s5pv210_core_save[] = {
 	SAVE_ITEM(S3C2410_TCNTO(0)),
 };
 
-void s5pv210_cpu_suspend(unsigned long arg)
+static int s5pv210_cpu_suspend(unsigned long arg)
 {
 	unsigned long tmp;
 
diff --git a/arch/arm/mach-shmobile/board-ag5evm.c b/arch/arm/mach-shmobile/board-ag5evm.c
index 167a67c5ca54..5fde49da399a 100644
--- a/arch/arm/mach-shmobile/board-ag5evm.c
+++ b/arch/arm/mach-shmobile/board-ag5evm.c
@@ -342,6 +342,7 @@ static struct platform_device mipidsi0_device = {
 static struct sh_mobile_sdhi_info sdhi0_info = {
 	.dma_slave_tx	= SHDMA_SLAVE_SDHI0_TX,
 	.dma_slave_rx	= SHDMA_SLAVE_SDHI0_RX,
+	.tmio_flags	= TMIO_MMC_HAS_IDLE_WAIT,
 	.tmio_caps	= MMC_CAP_SD_HIGHSPEED,
 	.tmio_ocr_mask	= MMC_VDD_27_28 | MMC_VDD_28_29,
 };
@@ -383,7 +384,7 @@ void ag5evm_sdhi1_set_pwr(struct platform_device *pdev, int state)
 }
 
 static struct sh_mobile_sdhi_info sh_sdhi1_info = {
-	.tmio_flags	= TMIO_MMC_WRPROTECT_DISABLE,
+	.tmio_flags	= TMIO_MMC_WRPROTECT_DISABLE | TMIO_MMC_HAS_IDLE_WAIT,
 	.tmio_caps	= MMC_CAP_NONREMOVABLE | MMC_CAP_SDIO_IRQ,
 	.tmio_ocr_mask	= MMC_VDD_32_33 | MMC_VDD_33_34,
 	.set_pwr	= ag5evm_sdhi1_set_pwr,
diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c
index ec8f9d960e27..b622d8d3ab72 100644
--- a/arch/arm/mach-shmobile/board-ap4evb.c
+++ b/arch/arm/mach-shmobile/board-ap4evb.c
@@ -1415,6 +1415,7 @@ static void __init ap4evb_init(void)
 	fsi_init_pm_clock();
 	sh7372_pm_init();
 	pm_clk_add(&fsi_device.dev, "spu2");
+	pm_clk_add(&lcdc1_device.dev, "hdmi");
 }
 
 static void __init ap4evb_timer_init(void)
diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c
index 671925dcf5f9..de2253d7f157 100644
--- a/arch/arm/mach-shmobile/board-mackerel.c
+++ b/arch/arm/mach-shmobile/board-mackerel.c
@@ -642,6 +642,8 @@ static struct usbhs_private usbhs0_private = {
 		},
 		.driver_param = {
 			.buswait_bwait	= 4,
+			.d0_tx_id	= SHDMA_SLAVE_USB0_TX,
+			.d1_rx_id	= SHDMA_SLAVE_USB0_RX,
 		},
 	},
 };
@@ -811,6 +813,8 @@ static struct usbhs_private usbhs1_private = {
 			.buswait_bwait	= 4,
 			.pipe_type	= usbhs1_pipe_cfg,
 			.pipe_size	= ARRAY_SIZE(usbhs1_pipe_cfg),
+			.d0_tx_id	= SHDMA_SLAVE_USB1_TX,
+			.d1_rx_id	= SHDMA_SLAVE_USB1_RX,
 		},
 	},
 };
@@ -1591,6 +1595,7 @@ static void __init mackerel_init(void)
 	hdmi_init_pm_clock();
 	sh7372_pm_init();
 	pm_clk_add(&fsi_device.dev, "spu2");
+	pm_clk_add(&hdmi_lcdc_device.dev, "hdmi");
 }
 
 static void __init mackerel_timer_init(void)
diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c
index 6b1619a65dba..66975921e646 100644
--- a/arch/arm/mach-shmobile/clock-sh7372.c
+++ b/arch/arm/mach-shmobile/clock-sh7372.c
@@ -503,16 +503,17 @@ static struct clk *late_main_clks[] = {
 	&sh7372_fsidivb_clk,
 };
 
-enum { MSTP001,
+enum { MSTP001, MSTP000,
        MSTP131, MSTP130,
        MSTP129, MSTP128, MSTP127, MSTP126, MSTP125,
        MSTP118, MSTP117, MSTP116, MSTP113,
        MSTP106, MSTP101, MSTP100,
        MSTP223,
-       MSTP218, MSTP217, MSTP216,
-       MSTP207, MSTP206, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200,
-       MSTP329, MSTP328, MSTP323, MSTP322, MSTP314, MSTP313, MSTP312,
-       MSTP423, MSTP415, MSTP413, MSTP411, MSTP410, MSTP406, MSTP403,
+       MSTP218, MSTP217, MSTP216, MSTP214, MSTP208, MSTP207,
+       MSTP206, MSTP205, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200,
+       MSTP328, MSTP323, MSTP322, MSTP314, MSTP313, MSTP312,
+       MSTP423, MSTP415, MSTP413, MSTP411, MSTP410, MSTP407, MSTP406,
+       MSTP405, MSTP404, MSTP403, MSTP400,
        MSTP_NR };
 
 #define MSTP(_parent, _reg, _bit, _flags) \
@@ -520,6 +521,7 @@ enum { MSTP001,
 
 static struct clk mstp_clks[MSTP_NR] = {
 	[MSTP001] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR0, 1, 0), /* IIC2 */
+	[MSTP000] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR0, 0, 0), /* MSIOF0 */
 	[MSTP131] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 31, 0), /* VEU3 */
 	[MSTP130] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 30, 0), /* VEU2 */
 	[MSTP129] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 29, 0), /* VEU1 */
@@ -538,14 +540,16 @@ static struct clk mstp_clks[MSTP_NR] = {
 	[MSTP218] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 18, 0), /* DMAC1 */
 	[MSTP217] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 17, 0), /* DMAC2 */
 	[MSTP216] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 16, 0), /* DMAC3 */
+	[MSTP214] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 14, 0), /* USBDMAC */
+	[MSTP208] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 8, 0), /* MSIOF1 */
 	[MSTP207] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 7, 0), /* SCIFA5 */
 	[MSTP206] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 6, 0), /* SCIFB */
+	[MSTP205] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 5, 0), /* MSIOF2 */
 	[MSTP204] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 4, 0), /* SCIFA0 */
 	[MSTP203] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 3, 0), /* SCIFA1 */
 	[MSTP202] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 2, 0), /* SCIFA2 */
 	[MSTP201] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 1, 0), /* SCIFA3 */
 	[MSTP200] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 0, 0), /* SCIFA4 */
-	[MSTP329] = MSTP(&r_clk, SMSTPCR3, 29, 0), /* CMT10 */
 	[MSTP328] = MSTP(&div6_clks[DIV6_SPU], SMSTPCR3, 28, 0), /* FSI2 */
 	[MSTP323] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR3, 23, 0), /* IIC1 */
 	[MSTP322] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR3, 22, 0), /* USB0 */
@@ -557,8 +561,12 @@ static struct clk mstp_clks[MSTP_NR] = {
 	[MSTP413] = MSTP(&pllc1_div2_clk, SMSTPCR4, 13, 0), /* HDMI */
 	[MSTP411] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR4, 11, 0), /* IIC3 */
 	[MSTP410] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR4, 10, 0), /* IIC4 */
+	[MSTP407] = MSTP(&div4_clks[DIV4_HP], SMSTPCR4, 7, 0), /* USB-DMAC1 */
 	[MSTP406] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR4, 6, 0), /* USB1 */
+	[MSTP405] = MSTP(&r_clk, SMSTPCR4, 5, 0), /* CMT4 */
+	[MSTP404] = MSTP(&r_clk, SMSTPCR4, 4, 0), /* CMT3 */
 	[MSTP403] = MSTP(&r_clk, SMSTPCR4, 3, 0), /* KEYSC */
+	[MSTP400] = MSTP(&r_clk, SMSTPCR4, 0, 0), /* CMT2 */
 };
 
 static struct clk_lookup lookups[] = {
@@ -609,6 +617,7 @@ static struct clk_lookup lookups[] = {
 
 	/* MSTP32 clocks */
 	CLKDEV_DEV_ID("i2c-sh_mobile.2", &mstp_clks[MSTP001]), /* IIC2 */
+	CLKDEV_DEV_ID("spi_sh_msiof.0", &mstp_clks[MSTP000]), /* MSIOF0 */
 	CLKDEV_DEV_ID("uio_pdrv_genirq.4", &mstp_clks[MSTP131]), /* VEU3 */
 	CLKDEV_DEV_ID("uio_pdrv_genirq.3", &mstp_clks[MSTP130]), /* VEU2 */
 	CLKDEV_DEV_ID("uio_pdrv_genirq.2", &mstp_clks[MSTP129]), /* VEU1 */
@@ -629,14 +638,16 @@ static struct clk_lookup lookups[] = {
 	CLKDEV_DEV_ID("sh-dma-engine.0", &mstp_clks[MSTP218]), /* DMAC1 */
 	CLKDEV_DEV_ID("sh-dma-engine.1", &mstp_clks[MSTP217]), /* DMAC2 */
 	CLKDEV_DEV_ID("sh-dma-engine.2", &mstp_clks[MSTP216]), /* DMAC3 */
+	CLKDEV_DEV_ID("sh-dma-engine.3", &mstp_clks[MSTP214]), /* USB-DMAC0 */
+	CLKDEV_DEV_ID("spi_sh_msiof.1", &mstp_clks[MSTP208]), /* MSIOF1 */
 	CLKDEV_DEV_ID("sh-sci.5", &mstp_clks[MSTP207]), /* SCIFA5 */
 	CLKDEV_DEV_ID("sh-sci.6", &mstp_clks[MSTP206]), /* SCIFB */
+	CLKDEV_DEV_ID("spi_sh_msiof.2", &mstp_clks[MSTP205]), /* MSIOF2 */
 	CLKDEV_DEV_ID("sh-sci.0", &mstp_clks[MSTP204]), /* SCIFA0 */
 	CLKDEV_DEV_ID("sh-sci.1", &mstp_clks[MSTP203]), /* SCIFA1 */
 	CLKDEV_DEV_ID("sh-sci.2", &mstp_clks[MSTP202]), /* SCIFA2 */
 	CLKDEV_DEV_ID("sh-sci.3", &mstp_clks[MSTP201]), /* SCIFA3 */
 	CLKDEV_DEV_ID("sh-sci.4", &mstp_clks[MSTP200]), /* SCIFA4 */
-	CLKDEV_DEV_ID("sh_cmt.10", &mstp_clks[MSTP329]), /* CMT10 */
 	CLKDEV_DEV_ID("sh_fsi2", &mstp_clks[MSTP328]), /* FSI2 */
 	CLKDEV_DEV_ID("i2c-sh_mobile.1", &mstp_clks[MSTP323]), /* IIC1 */
 	CLKDEV_DEV_ID("r8a66597_hcd.0", &mstp_clks[MSTP322]), /* USB0 */
@@ -650,11 +661,17 @@ static struct clk_lookup lookups[] = {
 	CLKDEV_DEV_ID("sh-mobile-hdmi", &mstp_clks[MSTP413]), /* HDMI */
 	CLKDEV_DEV_ID("i2c-sh_mobile.3", &mstp_clks[MSTP411]), /* IIC3 */
 	CLKDEV_DEV_ID("i2c-sh_mobile.4", &mstp_clks[MSTP410]), /* IIC4 */
+	CLKDEV_DEV_ID("sh-dma-engine.4", &mstp_clks[MSTP407]), /* USB-DMAC1 */
 	CLKDEV_DEV_ID("r8a66597_hcd.1", &mstp_clks[MSTP406]), /* USB1 */
 	CLKDEV_DEV_ID("r8a66597_udc.1", &mstp_clks[MSTP406]), /* USB1 */
 	CLKDEV_DEV_ID("renesas_usbhs.1", &mstp_clks[MSTP406]), /* USB1 */
+	CLKDEV_DEV_ID("sh_cmt.4", &mstp_clks[MSTP405]), /* CMT4 */
+	CLKDEV_DEV_ID("sh_cmt.3", &mstp_clks[MSTP404]), /* CMT3 */
 	CLKDEV_DEV_ID("sh_keysc.0", &mstp_clks[MSTP403]), /* KEYSC */
+	CLKDEV_DEV_ID("sh_cmt.2", &mstp_clks[MSTP400]), /* CMT2 */
 
+	CLKDEV_ICK_ID("hdmi", "sh_mobile_lcdc_fb.1",
+		      &div6_reparent_clks[DIV6_HDMI]),
 	CLKDEV_ICK_ID("ick", "sh-mobile-hdmi", &div6_reparent_clks[DIV6_HDMI]),
 	CLKDEV_ICK_ID("icka", "sh_fsi2", &div6_reparent_clks[DIV6_FSIA]),
 	CLKDEV_ICK_ID("ickb", "sh_fsi2", &div6_reparent_clks[DIV6_FSIB]),
diff --git a/arch/arm/mach-shmobile/clock-sh73a0.c b/arch/arm/mach-shmobile/clock-sh73a0.c
index 6db2ccabc2bf..61a846bb30f2 100644
--- a/arch/arm/mach-shmobile/clock-sh73a0.c
+++ b/arch/arm/mach-shmobile/clock-sh73a0.c
@@ -365,7 +365,7 @@ void __init sh73a0_clock_init(void)
 	__raw_writel(0x108, SD2CKCR);
 
 	/* detect main clock parent */
-	switch ((__raw_readl(CKSCR) >> 24) & 0x03) {
+	switch ((__raw_readl(CKSCR) >> 28) & 0x03) {
 	case 0:
 		main_clk.parent = &sh73a0_extal1_clk;
 		break;
diff --git a/arch/arm/mach-shmobile/include/mach/sh7372.h b/arch/arm/mach-shmobile/include/mach/sh7372.h
index ce595cee86cd..24e63a85e669 100644
--- a/arch/arm/mach-shmobile/include/mach/sh7372.h
+++ b/arch/arm/mach-shmobile/include/mach/sh7372.h
@@ -459,6 +459,10 @@ enum {
 	SHDMA_SLAVE_SDHI2_TX,
 	SHDMA_SLAVE_MMCIF_RX,
 	SHDMA_SLAVE_MMCIF_TX,
+	SHDMA_SLAVE_USB0_TX,
+	SHDMA_SLAVE_USB0_RX,
+	SHDMA_SLAVE_USB1_TX,
+	SHDMA_SLAVE_USB1_RX,
 };
 
 extern struct clk sh7372_extal1_clk;
diff --git a/arch/arm/mach-shmobile/intc-sh7372.c b/arch/arm/mach-shmobile/intc-sh7372.c
index 3b28743c77eb..739315e30eb9 100644
--- a/arch/arm/mach-shmobile/intc-sh7372.c
+++ b/arch/arm/mach-shmobile/intc-sh7372.c
@@ -379,7 +379,7 @@ enum {
 	/* BBIF2 */
 	VPU,
 	TSIF1,
-	_3DG_SGX530,
+	/* 3DG */
 	_2DDMAC,
 	IIC2_ALI2, IIC2_TACKI2, IIC2_WAITI2, IIC2_DTEI2,
 	IPMMU_IPMMUR, IPMMU_IPMMUR2,
@@ -436,7 +436,7 @@ static struct intc_vect intcs_vectors[] = {
 	/* BBIF2 */
 	INTCS_VECT(VPU, 0x980),
 	INTCS_VECT(TSIF1, 0x9a0),
-	INTCS_VECT(_3DG_SGX530, 0x9e0),
+	/* 3DG */
 	INTCS_VECT(_2DDMAC, 0xa00),
 	INTCS_VECT(IIC2_ALI2, 0xa80), INTCS_VECT(IIC2_TACKI2, 0xaa0),
 	INTCS_VECT(IIC2_WAITI2, 0xac0), INTCS_VECT(IIC2_DTEI2, 0xae0),
@@ -521,7 +521,7 @@ static struct intc_mask_reg intcs_mask_registers[] = {
 	    RTDMAC_1_DEI3, RTDMAC_1_DEI2, RTDMAC_1_DEI1, RTDMAC_1_DEI0 } },
 	{ 0xffd20198, 0xffd201d8, 8, /* IMR6SA / IMCR6SA */
 	  { 0, 0, MSIOF, 0,
-	    _3DG_SGX530, 0, 0, 0 } },
+	    0, 0, 0, 0 } },
 	{ 0xffd2019c, 0xffd201dc, 8, /* IMR7SA / IMCR7SA */
 	  { 0, TMU_TUNI2, TMU_TUNI1, TMU_TUNI0,
 	    0, 0, 0, 0 } },
@@ -561,7 +561,6 @@ static struct intc_prio_reg intcs_prio_registers[] = {
 					      TMU_TUNI2, TSIF1 } },
 	{ 0xffd2001c, 0, 16, 4, /* IPRHS */ { 0, 0, VEU, BEU } },
 	{ 0xffd20020, 0, 16, 4, /* IPRIS */ { 0, MSIOF, TSIF0, IIC0 } },
-	{ 0xffd20024, 0, 16, 4, /* IPRJS */ { 0, _3DG_SGX530, 0, 0 } },
 	{ 0xffd20028, 0, 16, 4, /* IPRKS */ { 0, 0, LMB, 0 } },
 	{ 0xffd2002c, 0, 16, 4, /* IPRLS */ { IPMMU, 0, 0, 0 } },
 	{ 0xffd20030, 0, 16, 4, /* IPRMS */ { IIC2, 0, 0, 0 } },
diff --git a/arch/arm/mach-shmobile/setup-sh7372.c b/arch/arm/mach-shmobile/setup-sh7372.c
index 79f0413d8725..2d9b1b1a2538 100644
--- a/arch/arm/mach-shmobile/setup-sh7372.c
+++ b/arch/arm/mach-shmobile/setup-sh7372.c
@@ -169,35 +169,35 @@ static struct platform_device scif6_device = {
 };
 
 /* CMT */
-static struct sh_timer_config cmt10_platform_data = {
-	.name = "CMT10",
-	.channel_offset = 0x10,
-	.timer_bit = 0,
+static struct sh_timer_config cmt2_platform_data = {
+	.name = "CMT2",
+	.channel_offset = 0x40,
+	.timer_bit = 5,
 	.clockevent_rating = 125,
 	.clocksource_rating = 125,
 };
 
-static struct resource cmt10_resources[] = {
+static struct resource cmt2_resources[] = {
 	[0] = {
-		.name	= "CMT10",
-		.start	= 0xe6138010,
-		.end	= 0xe613801b,
+		.name	= "CMT2",
+		.start	= 0xe6130040,
+		.end	= 0xe613004b,
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
-		.start	= evt2irq(0x0b00), /* CMT1_CMT10 */
+		.start	= evt2irq(0x0b80), /* CMT2 */
 		.flags	= IORESOURCE_IRQ,
 	},
 };
 
-static struct platform_device cmt10_device = {
+static struct platform_device cmt2_device = {
 	.name		= "sh_cmt",
-	.id		= 10,
+	.id		= 2,
 	.dev = {
-		.platform_data	= &cmt10_platform_data,
+		.platform_data	= &cmt2_platform_data,
 	},
-	.resource	= cmt10_resources,
-	.num_resources	= ARRAY_SIZE(cmt10_resources),
+	.resource	= cmt2_resources,
+	.num_resources	= ARRAY_SIZE(cmt2_resources),
 };
 
 /* TMU */
@@ -602,6 +602,150 @@ static struct platform_device dma2_device = {
 	},
 };
 
+/*
+ * USB-DMAC
+ */
+
+unsigned int usbts_shift[] = {3, 4, 5};
+
+enum {
+	XMIT_SZ_8BYTE		= 0,
+	XMIT_SZ_16BYTE		= 1,
+	XMIT_SZ_32BYTE		= 2,
+};
+
+#define USBTS_INDEX2VAL(i) (((i) & 3) << 6)
+
+static const struct sh_dmae_channel sh7372_usb_dmae_channels[] = {
+	{
+		.offset = 0,
+	}, {
+		.offset = 0x20,
+	},
+};
+
+/* USB DMAC0 */
+static const struct sh_dmae_slave_config sh7372_usb_dmae0_slaves[] = {
+	{
+		.slave_id	= SHDMA_SLAVE_USB0_TX,
+		.chcr		= USBTS_INDEX2VAL(XMIT_SZ_8BYTE),
+	}, {
+		.slave_id	= SHDMA_SLAVE_USB0_RX,
+		.chcr		= USBTS_INDEX2VAL(XMIT_SZ_8BYTE),
+	},
+};
+
+static struct sh_dmae_pdata usb_dma0_platform_data = {
+	.slave		= sh7372_usb_dmae0_slaves,
+	.slave_num	= ARRAY_SIZE(sh7372_usb_dmae0_slaves),
+	.channel	= sh7372_usb_dmae_channels,
+	.channel_num	= ARRAY_SIZE(sh7372_usb_dmae_channels),
+	.ts_low_shift	= 6,
+	.ts_low_mask	= 0xc0,
+	.ts_high_shift	= 0,
+	.ts_high_mask	= 0,
+	.ts_shift	= usbts_shift,
+	.ts_shift_num	= ARRAY_SIZE(usbts_shift),
+	.dmaor_init	= DMAOR_DME,
+	.chcr_offset	= 0x14,
+	.chcr_ie_bit	= 1 << 5,
+	.dmaor_is_32bit	= 1,
+	.needs_tend_set	= 1,
+	.no_dmars	= 1,
+};
+
+static struct resource sh7372_usb_dmae0_resources[] = {
+	{
+		/* Channel registers and DMAOR */
+		.start	= 0xe68a0020,
+		.end	= 0xe68a0064 - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		/* VCR/SWR/DMICR */
+		.start	= 0xe68a0000,
+		.end	= 0xe68a0014 - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		/* IRQ for channels */
+		.start	= evt2irq(0x0a00),
+		.end	= evt2irq(0x0a00),
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device usb_dma0_device = {
+	.name		= "sh-dma-engine",
+	.id		= 3,
+	.resource	= sh7372_usb_dmae0_resources,
+	.num_resources	= ARRAY_SIZE(sh7372_usb_dmae0_resources),
+	.dev		= {
+		.platform_data	= &usb_dma0_platform_data,
+	},
+};
+
+/* USB DMAC1 */
+static const struct sh_dmae_slave_config sh7372_usb_dmae1_slaves[] = {
+	{
+		.slave_id	= SHDMA_SLAVE_USB1_TX,
+		.chcr		= USBTS_INDEX2VAL(XMIT_SZ_8BYTE),
+	}, {
+		.slave_id	= SHDMA_SLAVE_USB1_RX,
+		.chcr		= USBTS_INDEX2VAL(XMIT_SZ_8BYTE),
+	},
+};
+
+static struct sh_dmae_pdata usb_dma1_platform_data = {
+	.slave		= sh7372_usb_dmae1_slaves,
+	.slave_num	= ARRAY_SIZE(sh7372_usb_dmae1_slaves),
+	.channel	= sh7372_usb_dmae_channels,
+	.channel_num	= ARRAY_SIZE(sh7372_usb_dmae_channels),
+	.ts_low_shift	= 6,
+	.ts_low_mask	= 0xc0,
+	.ts_high_shift	= 0,
+	.ts_high_mask	= 0,
+	.ts_shift	= usbts_shift,
+	.ts_shift_num	= ARRAY_SIZE(usbts_shift),
+	.dmaor_init	= DMAOR_DME,
+	.chcr_offset	= 0x14,
+	.chcr_ie_bit	= 1 << 5,
+	.dmaor_is_32bit	= 1,
+	.needs_tend_set	= 1,
+	.no_dmars	= 1,
+};
+
+static struct resource sh7372_usb_dmae1_resources[] = {
+	{
+		/* Channel registers and DMAOR */
+		.start	= 0xe68c0020,
+		.end	= 0xe68c0064 - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		/* VCR/SWR/DMICR */
+		.start	= 0xe68c0000,
+		.end	= 0xe68c0014 - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		/* IRQ for channels */
+		.start	= evt2irq(0x1d00),
+		.end	= evt2irq(0x1d00),
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device usb_dma1_device = {
+	.name		= "sh-dma-engine",
+	.id		= 4,
+	.resource	= sh7372_usb_dmae1_resources,
+	.num_resources	= ARRAY_SIZE(sh7372_usb_dmae1_resources),
+	.dev		= {
+		.platform_data	= &usb_dma1_platform_data,
+	},
+};
+
 /* VPU */
 static struct uio_info vpu_platform_data = {
 	.name = "VPU5HG",
@@ -818,7 +962,7 @@ static struct platform_device *sh7372_early_devices[] __initdata = {
 	&scif4_device,
 	&scif5_device,
 	&scif6_device,
-	&cmt10_device,
+	&cmt2_device,
 	&tmu00_device,
 	&tmu01_device,
 };
@@ -829,6 +973,8 @@ static struct platform_device *sh7372_late_devices[] __initdata = {
 	&dma0_device,
 	&dma1_device,
 	&dma2_device,
+	&usb_dma0_device,
+	&usb_dma1_device,
 	&vpu_device,
 	&veu0_device,
 	&veu1_device,
diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index de72058180a6..1fafc3244607 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -318,6 +318,10 @@ static struct clk v2m_sp804_clk = {
 	.rate	= 1000000,
 };
 
+static struct clk v2m_ref_clk = {
+	.rate   = 32768,
+};
+
 static struct clk dummy_apb_pclk;
 
 static struct clk_lookup v2m_lookups[] = {
@@ -348,6 +352,9 @@ static struct clk_lookup v2m_lookups[] = {
 	}, {	/* CLCD */
 		.dev_id		= "mb:clcd",
 		.clk		= &osc1_clk,
+	}, {	/* SP805 WDT */
+		.dev_id		= "mb:wdt",
+		.clk		= &v2m_ref_clk,
 	}, {	/* SP804 timers */
 		.dev_id		= "sp804",
 		.con_id		= "v2m-timer0",
diff --git a/arch/arm/mm/abort-macro.S b/arch/arm/mm/abort-macro.S
index 52162d59407a..2cbf68ef0e83 100644
--- a/arch/arm/mm/abort-macro.S
+++ b/arch/arm/mm/abort-macro.S
@@ -17,7 +17,7 @@
 	cmp	\tmp, # 0x5600			@ Is it ldrsb?
 	orreq	\tmp, \tmp, #1 << 11		@ Set L-bit if yes
 	tst	\tmp, #1 << 11			@ L = 0 -> write
-	orreq	\psr, \psr, #1 << 11		@ yes.
+	orreq	\fsr, \fsr, #1 << 11		@ yes.
 	b	do_DataAbort
 not_thumb:
 	.endm
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 44c086710d2b..9ecfdb511951 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -277,6 +277,25 @@ static void l2x0_disable(void)
 	spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
+static void __init l2x0_unlock(__u32 cache_id)
+{
+	int lockregs;
+	int i;
+
+	if (cache_id == L2X0_CACHE_ID_PART_L310)
+		lockregs = 8;
+	else
+		/* L210 and unknown types */
+		lockregs = 1;
+
+	for (i = 0; i < lockregs; i++) {
+		writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_D_BASE +
+			       i * L2X0_LOCKDOWN_STRIDE);
+		writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_I_BASE +
+			       i * L2X0_LOCKDOWN_STRIDE);
+	}
+}
+
 void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask)
 {
 	__u32 aux;
@@ -328,6 +347,8 @@ void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask)
 	 * accessing the below registers will fault.
 	 */
 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
+		/* Make sure that I&D is not locked down when starting */
+		l2x0_unlock(cache_id);
 
 		/* l2x0 controller is disabled */
 		writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL);
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 64ab41348f30..34409a08ba0d 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -298,7 +298,7 @@ static void __init arm_bootmem_free(unsigned long min, unsigned long max_low,
 #ifdef CONFIG_HAVE_ARCH_PFN_VALID
 int pfn_valid(unsigned long pfn)
 {
-	return memblock_is_memory(pfn << PAGE_SHIFT);
+	return memblock_is_memory(__pfn_to_phys(pfn));
 }
 EXPORT_SYMBOL(pfn_valid);
 #endif
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 594d677b92c8..ea9c9f3e48bf 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -273,6 +273,14 @@ static struct mem_type mem_types[] = {
 		.prot_l1   = PMD_TYPE_TABLE,
 		.domain    = DOMAIN_KERNEL,
 	},
+	[MT_MEMORY_SO] = {
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
+				L_PTE_MT_UNCACHED,
+		.prot_l1   = PMD_TYPE_TABLE,
+		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_S |
+				PMD_SECT_UNCACHED | PMD_SECT_XN,
+		.domain    = DOMAIN_KERNEL,
+	},
 };
 
 const struct mem_type *get_mem_type(unsigned int type)
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
index 92bd102e3982..88fb3d9e0640 100644
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -382,28 +382,23 @@ ENTRY(cpu_arm920_set_pte_ext)
 .equ	cpu_arm920_suspend_size, 4 * 3
 #ifdef CONFIG_PM_SLEEP
 ENTRY(cpu_arm920_do_suspend)
-	stmfd	sp!, {r4 - r7, lr}
+	stmfd	sp!, {r4 - r6, lr}
 	mrc	p15, 0, r4, c13, c0, 0	@ PID
 	mrc	p15, 0, r5, c3, c0, 0	@ Domain ID
-	mrc	p15, 0, r6, c2, c0, 0	@ TTB address
-	mrc	p15, 0, r7, c1, c0, 0	@ Control register
-	stmia	r0, {r4 - r7}
-	ldmfd	sp!, {r4 - r7, pc}
+	mrc	p15, 0, r6, c1, c0, 0	@ Control register
+	stmia	r0, {r4 - r6}
+	ldmfd	sp!, {r4 - r6, pc}
 ENDPROC(cpu_arm920_do_suspend)
 
 ENTRY(cpu_arm920_do_resume)
 	mov	ip, #0
 	mcr	p15, 0, ip, c8, c7, 0	@ invalidate I+D TLBs
 	mcr	p15, 0, ip, c7, c7, 0	@ invalidate I+D caches
-	ldmia	r0, {r4 - r7}
+	ldmia	r0, {r4 - r6}
 	mcr	p15, 0, r4, c13, c0, 0	@ PID
 	mcr	p15, 0, r5, c3, c0, 0	@ Domain ID
-	mcr	p15, 0, r6, c2, c0, 0	@ TTB address
-	mov	r0, r7			@ control register
-	mov	r2, r6, lsr #14		@ get TTB0 base
-	mov	r2, r2, lsl #14
-	ldr	r3, =PMD_TYPE_SECT | PMD_SECT_BUFFERABLE | \
-		     PMD_SECT_CACHEABLE | PMD_BIT4 | PMD_SECT_AP_WRITE
+	mcr	p15, 0, r1, c2, c0, 0	@ TTB address
+	mov	r0, r6			@ control register
 	b	cpu_resume_mmu
 ENDPROC(cpu_arm920_do_resume)
 #endif
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
index 2bbcf053dffd..9f8fd91f918a 100644
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -397,28 +397,23 @@ ENTRY(cpu_arm926_set_pte_ext)
 .equ	cpu_arm926_suspend_size, 4 * 3
 #ifdef CONFIG_PM_SLEEP
 ENTRY(cpu_arm926_do_suspend)
-	stmfd	sp!, {r4 - r7, lr}
+	stmfd	sp!, {r4 - r6, lr}
 	mrc	p15, 0, r4, c13, c0, 0	@ PID
 	mrc	p15, 0, r5, c3, c0, 0	@ Domain ID
-	mrc	p15, 0, r6, c2, c0, 0	@ TTB address
-	mrc	p15, 0, r7, c1, c0, 0	@ Control register
-	stmia	r0, {r4 - r7}
-	ldmfd	sp!, {r4 - r7, pc}
+	mrc	p15, 0, r6, c1, c0, 0	@ Control register
+	stmia	r0, {r4 - r6}
+	ldmfd	sp!, {r4 - r6, pc}
 ENDPROC(cpu_arm926_do_suspend)
 
 ENTRY(cpu_arm926_do_resume)
 	mov	ip, #0
 	mcr	p15, 0, ip, c8, c7, 0	@ invalidate I+D TLBs
 	mcr	p15, 0, ip, c7, c7, 0	@ invalidate I+D caches
-	ldmia	r0, {r4 - r7}
+	ldmia	r0, {r4 - r6}
 	mcr	p15, 0, r4, c13, c0, 0	@ PID
 	mcr	p15, 0, r5, c3, c0, 0	@ Domain ID
-	mcr	p15, 0, r6, c2, c0, 0	@ TTB address
-	mov	r0, r7			@ control register
-	mov	r2, r6, lsr #14		@ get TTB0 base
-	mov	r2, r2, lsl #14
-	ldr	r3, =PMD_TYPE_SECT | PMD_SECT_BUFFERABLE | \
-		     PMD_SECT_CACHEABLE | PMD_BIT4 | PMD_SECT_AP_WRITE
+	mcr	p15, 0, r1, c2, c0, 0	@ TTB address
+	mov	r0, r6			@ control register
 	b	cpu_resume_mmu
 ENDPROC(cpu_arm926_do_resume)
 #endif
diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S
index 07219c2ae114..7d91545d089b 100644
--- a/arch/arm/mm/proc-sa1100.S
+++ b/arch/arm/mm/proc-sa1100.S
@@ -168,34 +168,29 @@ ENTRY(cpu_sa1100_set_pte_ext)
 	mov	pc, lr
 
 .globl	cpu_sa1100_suspend_size
-.equ	cpu_sa1100_suspend_size, 4*4
+.equ	cpu_sa1100_suspend_size, 4 * 3
 #ifdef CONFIG_PM_SLEEP
 ENTRY(cpu_sa1100_do_suspend)
-	stmfd	sp!, {r4 - r7, lr}
+	stmfd	sp!, {r4 - r6, lr}
 	mrc	p15, 0, r4, c3, c0, 0		@ domain ID
-	mrc	p15, 0, r5, c2, c0, 0		@ translation table base addr
-	mrc	p15, 0, r6, c13, c0, 0		@ PID
-	mrc	p15, 0, r7, c1, c0, 0		@ control reg
-	stmia	r0, {r4 - r7}			@ store cp regs
-	ldmfd	sp!, {r4 - r7, pc}
+	mrc	p15, 0, r5, c13, c0, 0		@ PID
+	mrc	p15, 0, r6, c1, c0, 0		@ control reg
+	stmia	r0, {r4 - r6}			@ store cp regs
+	ldmfd	sp!, {r4 - r6, pc}
 ENDPROC(cpu_sa1100_do_suspend)
 
 ENTRY(cpu_sa1100_do_resume)
-	ldmia	r0, {r4 - r7}			@ load cp regs
-	mov	r1, #0
-	mcr	p15, 0, r1, c8, c7, 0		@ flush I+D TLBs
-	mcr	p15, 0, r1, c7, c7, 0		@ flush I&D cache
-	mcr	p15, 0, r1, c9, c0, 0		@ invalidate RB
-	mcr	p15, 0, r1, c9, c0, 5		@ allow user space to use RB
+	ldmia	r0, {r4 - r6}			@ load cp regs
+	mov	ip, #0
+	mcr	p15, 0, ip, c8, c7, 0		@ flush I+D TLBs
+	mcr	p15, 0, ip, c7, c7, 0		@ flush I&D cache
+	mcr	p15, 0, ip, c9, c0, 0		@ invalidate RB
+	mcr	p15, 0, ip, c9, c0, 5		@ allow user space to use RB
 
 	mcr	p15, 0, r4, c3, c0, 0		@ domain ID
-	mcr	p15, 0, r5, c2, c0, 0		@ translation table base addr
-	mcr	p15, 0, r6, c13, c0, 0		@ PID
-	mov	r0, r7				@ control register
-	mov	r2, r5, lsr #14			@ get TTB0 base
-	mov	r2, r2, lsl #14
-	ldr	r3, =PMD_TYPE_SECT | PMD_SECT_BUFFERABLE | \
-		     PMD_SECT_CACHEABLE | PMD_SECT_AP_WRITE
+	mcr	p15, 0, r1, c2, c0, 0		@ translation table base addr
+	mcr	p15, 0, r5, c13, c0, 0		@ PID
+	mov	r0, r6				@ control register
 	b	cpu_resume_mmu
 ENDPROC(cpu_sa1100_do_resume)
 #endif
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 219138d2f158..d061d2fa5506 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -128,20 +128,18 @@ ENTRY(cpu_v6_set_pte_ext)
 
 /* Suspend/resume support: taken from arch/arm/mach-s3c64xx/sleep.S */
 .globl	cpu_v6_suspend_size
-.equ	cpu_v6_suspend_size, 4 * 8
+.equ	cpu_v6_suspend_size, 4 * 6
 #ifdef CONFIG_PM_SLEEP
 ENTRY(cpu_v6_do_suspend)
-	stmfd	sp!, {r4 - r11, lr}
+	stmfd	sp!, {r4 - r9, lr}
 	mrc	p15, 0, r4, c13, c0, 0	@ FCSE/PID
-	mrc	p15, 0, r5, c13, c0, 1	@ Context ID
-	mrc	p15, 0, r6, c3, c0, 0	@ Domain ID
-	mrc	p15, 0, r7, c2, c0, 0	@ Translation table base 0
-	mrc	p15, 0, r8, c2, c0, 1	@ Translation table base 1
-	mrc	p15, 0, r9, c1, c0, 1	@ auxiliary control register
-	mrc	p15, 0, r10, c1, c0, 2	@ co-processor access control
-	mrc	p15, 0, r11, c1, c0, 0	@ control register
-	stmia	r0, {r4 - r11}
-	ldmfd	sp!, {r4- r11, pc}
+	mrc	p15, 0, r5, c3, c0, 0	@ Domain ID
+	mrc	p15, 0, r6, c2, c0, 1	@ Translation table base 1
+	mrc	p15, 0, r7, c1, c0, 1	@ auxiliary control register
+	mrc	p15, 0, r8, c1, c0, 2	@ co-processor access control
+	mrc	p15, 0, r9, c1, c0, 0	@ control register
+	stmia	r0, {r4 - r9}
+	ldmfd	sp!, {r4- r9, pc}
 ENDPROC(cpu_v6_do_suspend)
 
 ENTRY(cpu_v6_do_resume)
@@ -150,25 +148,21 @@ ENTRY(cpu_v6_do_resume)
 	mcr	p15, 0, ip, c7, c5, 0	@ invalidate I cache
 	mcr	p15, 0, ip, c7, c15, 0	@ clean+invalidate cache
 	mcr	p15, 0, ip, c7, c10, 4	@ drain write buffer
-	ldmia	r0, {r4 - r11}
+	mcr	p15, 0, ip, c13, c0, 1	@ set reserved context ID
+	ldmia	r0, {r4 - r9}
 	mcr	p15, 0, r4, c13, c0, 0	@ FCSE/PID
-	mcr	p15, 0, r5, c13, c0, 1	@ Context ID
-	mcr	p15, 0, r6, c3, c0, 0	@ Domain ID
-	mcr	p15, 0, r7, c2, c0, 0	@ Translation table base 0
-	mcr	p15, 0, r8, c2, c0, 1	@ Translation table base 1
-	mcr	p15, 0, r9, c1, c0, 1	@ auxiliary control register
-	mcr	p15, 0, r10, c1, c0, 2	@ co-processor access control
+	mcr	p15, 0, r5, c3, c0, 0	@ Domain ID
+	ALT_SMP(orr	r1, r1, #TTB_FLAGS_SMP)
+	ALT_UP(orr	r1, r1, #TTB_FLAGS_UP)
+	mcr	p15, 0, r1, c2, c0, 0	@ Translation table base 0
+	mcr	p15, 0, r6, c2, c0, 1	@ Translation table base 1
+	mcr	p15, 0, r7, c1, c0, 1	@ auxiliary control register
+	mcr	p15, 0, r8, c1, c0, 2	@ co-processor access control
 	mcr	p15, 0, ip, c2, c0, 2	@ TTB control register
 	mcr	p15, 0, ip, c7, c5, 4	@ ISB
-	mov	r0, r11			@ control register
-	mov	r2, r7, lsr #14		@ get TTB0 base
-	mov	r2, r2, lsl #14
-	ldr	r3, cpu_resume_l1_flags
+	mov	r0, r9			@ control register
 	b	cpu_resume_mmu
 ENDPROC(cpu_v6_do_resume)
-cpu_resume_l1_flags:
-	ALT_SMP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_FLAGS_SMP)
-	ALT_UP(.long  PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_FLAGS_UP)
 #endif
 
 	string	cpu_v6_name, "ARMv6-compatible processor"
@@ -223,6 +217,22 @@ __v6_setup:
 	mrc	p15, 0, r0, c1, c0, 0		@ read control register
 	bic	r0, r0, r5			@ clear bits them
 	orr	r0, r0, r6			@ set them
+#ifdef CONFIG_ARM_ERRATA_364296
+	/*
+	 * Workaround for the 364296 ARM1136 r0p2 erratum (possible cache data
+	 * corruption with hit-under-miss enabled). The conditional code below
+	 * (setting the undocumented bit 31 in the auxiliary control register
+	 * and the FI bit in the control register) disables hit-under-miss
+	 * without putting the processor into full low interrupt latency mode.
+	 */
+	ldr	r6, =0x4107b362			@ id for ARM1136 r0p2
+	mrc	p15, 0, r5, c0, c0, 0		@ get processor id
+	teq	r5, r6				@ check for the faulty core
+	mrceq	p15, 0, r5, c1, c0, 1		@ load aux control reg
+	orreq	r5, r5, #(1 << 31)		@ set the undocumented bit 31
+	mcreq	p15, 0, r5, c1, c0, 1		@ write aux control reg
+	orreq	r0, r0, #(1 << 21)		@ low interrupt latency configuration
+#endif
 	mov	pc, lr				@ return to head.S:__ret
 
 	/*
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index a30e78542ccf..6af366ce0165 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -66,6 +66,7 @@ ENDPROC(cpu_v7_proc_fin)
 ENTRY(cpu_v7_reset)
 	mrc	p15, 0, r1, c1, c0, 0		@ ctrl register
 	bic	r1, r1, #0x1			@ ...............m
+ THUMB(	bic	r1, r1, #1 << 30 )		@ SCTLR.TE (Thumb exceptions)
 	mcr	p15, 0, r1, c1, c0, 0		@ disable MMU
 	isb
 	mov	pc, r0
@@ -216,53 +217,50 @@ ENDPROC(cpu_v7_set_pte_ext)
 
 /* Suspend/resume support: derived from arch/arm/mach-s5pv210/sleep.S */
 .globl	cpu_v7_suspend_size
-.equ	cpu_v7_suspend_size, 4 * 9
+.equ	cpu_v7_suspend_size, 4 * 7
 #ifdef CONFIG_PM_SLEEP
 ENTRY(cpu_v7_do_suspend)
-	stmfd	sp!, {r4 - r11, lr}
+	stmfd	sp!, {r4 - r10, lr}
 	mrc	p15, 0, r4, c13, c0, 0	@ FCSE/PID
-	mrc	p15, 0, r5, c13, c0, 1	@ Context ID
-	mrc	p15, 0, r6, c13, c0, 3	@ User r/o thread ID
-	stmia	r0!, {r4 - r6}
+	mrc	p15, 0, r5, c13, c0, 3	@ User r/o thread ID
+	stmia	r0!, {r4 - r5}
 	mrc	p15, 0, r6, c3, c0, 0	@ Domain ID
-	mrc	p15, 0, r7, c2, c0, 0	@ TTB 0
-	mrc	p15, 0, r8, c2, c0, 1	@ TTB 1
-	mrc	p15, 0, r9, c1, c0, 0	@ Control register
-	mrc	p15, 0, r10, c1, c0, 1	@ Auxiliary control register
-	mrc	p15, 0, r11, c1, c0, 2	@ Co-processor access control
-	stmia	r0, {r6 - r11}
-	ldmfd	sp!, {r4 - r11, pc}
+	mrc	p15, 0, r7, c2, c0, 1	@ TTB 1
+	mrc	p15, 0, r8, c1, c0, 0	@ Control register
+	mrc	p15, 0, r9, c1, c0, 1	@ Auxiliary control register
+	mrc	p15, 0, r10, c1, c0, 2	@ Co-processor access control
+	stmia	r0, {r6 - r10}
+	ldmfd	sp!, {r4 - r10, pc}
 ENDPROC(cpu_v7_do_suspend)
 
 ENTRY(cpu_v7_do_resume)
 	mov	ip, #0
 	mcr	p15, 0, ip, c8, c7, 0	@ invalidate TLBs
 	mcr	p15, 0, ip, c7, c5, 0	@ invalidate I cache
-	ldmia	r0!, {r4 - r6}
+	mcr	p15, 0, ip, c13, c0, 1	@ set reserved context ID
+	ldmia	r0!, {r4 - r5}
 	mcr	p15, 0, r4, c13, c0, 0	@ FCSE/PID
-	mcr	p15, 0, r5, c13, c0, 1	@ Context ID
-	mcr	p15, 0, r6, c13, c0, 3	@ User r/o thread ID
-	ldmia	r0, {r6 - r11}
+	mcr	p15, 0, r5, c13, c0, 3	@ User r/o thread ID
+	ldmia	r0, {r6 - r10}
 	mcr	p15, 0, r6, c3, c0, 0	@ Domain ID
-	mcr	p15, 0, r7, c2, c0, 0	@ TTB 0
-	mcr	p15, 0, r8, c2, c0, 1	@ TTB 1
+	ALT_SMP(orr	r1, r1, #TTB_FLAGS_SMP)
+	ALT_UP(orr	r1, r1, #TTB_FLAGS_UP)
+	mcr	p15, 0, r1, c2, c0, 0	@ TTB 0
+	mcr	p15, 0, r7, c2, c0, 1	@ TTB 1
 	mcr	p15, 0, ip, c2, c0, 2	@ TTB control register
-	mcr	p15, 0, r10, c1, c0, 1	@ Auxiliary control register
-	mcr	p15, 0, r11, c1, c0, 2	@ Co-processor access control
+	mrc	p15, 0, r4, c1, c0, 1	@ Read Auxiliary control register
+	teq	r4, r9			@ Is it already set?
+	mcrne	p15, 0, r9, c1, c0, 1	@ No, so write it
+	mcr	p15, 0, r10, c1, c0, 2	@ Co-processor access control
 	ldr	r4, =PRRR		@ PRRR
 	ldr	r5, =NMRR		@ NMRR
 	mcr	p15, 0, r4, c10, c2, 0	@ write PRRR
 	mcr	p15, 0, r5, c10, c2, 1	@ write NMRR
 	isb
-	mov	r0, r9			@ control register
-	mov	r2, r7, lsr #14		@ get TTB0 base
-	mov	r2, r2, lsl #14
-	ldr	r3, cpu_resume_l1_flags
+	dsb
+	mov	r0, r8			@ control register
 	b	cpu_resume_mmu
 ENDPROC(cpu_v7_do_resume)
-cpu_resume_l1_flags:
-	ALT_SMP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_FLAGS_SMP)
-	ALT_UP(.long  PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_FLAGS_UP)
 #endif
 
 	__CPUINIT
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
index 28c72a2006a1..abf0507a08ae 100644
--- a/arch/arm/mm/proc-xsc3.S
+++ b/arch/arm/mm/proc-xsc3.S
@@ -406,24 +406,23 @@ ENTRY(cpu_xsc3_set_pte_ext)
 	.align
 
 .globl	cpu_xsc3_suspend_size
-.equ	cpu_xsc3_suspend_size, 4 * 8
+.equ	cpu_xsc3_suspend_size, 4 * 6
 #ifdef CONFIG_PM_SLEEP
 ENTRY(cpu_xsc3_do_suspend)
-	stmfd	sp!, {r4 - r10, lr}
+	stmfd	sp!, {r4 - r9, lr}
 	mrc	p14, 0, r4, c6, c0, 0	@ clock configuration, for turbo mode
 	mrc	p15, 0, r5, c15, c1, 0	@ CP access reg
 	mrc	p15, 0, r6, c13, c0, 0	@ PID
 	mrc 	p15, 0, r7, c3, c0, 0	@ domain ID
-	mrc 	p15, 0, r8, c2, c0, 0	@ translation table base addr
-	mrc	p15, 0, r9, c1, c0, 1	@ auxiliary control reg
-	mrc 	p15, 0, r10, c1, c0, 0	@ control reg
+	mrc	p15, 0, r8, c1, c0, 1	@ auxiliary control reg
+	mrc 	p15, 0, r9, c1, c0, 0	@ control reg
 	bic	r4, r4, #2		@ clear frequency change bit
-	stmia	r0, {r1, r4 - r10}	@ store v:p offset + cp regs
-	ldmia	sp!, {r4 - r10, pc}
+	stmia	r0, {r4 - r9}		@ store cp regs
+	ldmia	sp!, {r4 - r9, pc}
 ENDPROC(cpu_xsc3_do_suspend)
 
 ENTRY(cpu_xsc3_do_resume)
-	ldmia	r0, {r1, r4 - r10}	@ load v:p offset + cp regs
+	ldmia	r0, {r4 - r9}		@ load cp regs
 	mov	ip, #0
 	mcr	p15, 0, ip, c7, c7, 0	@ invalidate I & D caches, BTB
 	mcr	p15, 0, ip, c7, c10, 4	@ drain write (&fill) buffer
@@ -433,15 +432,10 @@ ENTRY(cpu_xsc3_do_resume)
 	mcr	p15, 0, r5, c15, c1, 0	@ CP access reg
 	mcr	p15, 0, r6, c13, c0, 0	@ PID
 	mcr	p15, 0, r7, c3, c0, 0	@ domain ID
-	mcr	p15, 0, r8, c2, c0, 0	@ translation table base addr
-	mcr	p15, 0, r9, c1, c0, 1	@ auxiliary control reg
-
-	@ temporarily map resume_turn_on_mmu into the page table,
-	@ otherwise prefetch abort occurs after MMU is turned on
-	mov	r0, r10			@ control register
-	mov	r2, r8, lsr #14		@ get TTB0 base
-	mov	r2, r2, lsl #14
-	ldr	r3, =0x542e		@ section flags
+	orr	r1, r1, #0x18		@ cache the page table in L2
+	mcr	p15, 0, r1, c2, c0, 0	@ translation table base addr
+	mcr	p15, 0, r8, c1, c0, 1	@ auxiliary control reg
+	mov	r0, r9			@ control register
 	b	cpu_resume_mmu
 ENDPROC(cpu_xsc3_do_resume)
 #endif
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index fbc06e55b87a..3277904bebaf 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -520,24 +520,23 @@ ENTRY(cpu_xscale_set_pte_ext)
 	.align
 
 .globl	cpu_xscale_suspend_size
-.equ	cpu_xscale_suspend_size, 4 * 7
+.equ	cpu_xscale_suspend_size, 4 * 6
 #ifdef CONFIG_PM_SLEEP
 ENTRY(cpu_xscale_do_suspend)
-	stmfd	sp!, {r4 - r10, lr}
+	stmfd	sp!, {r4 - r9, lr}
 	mrc	p14, 0, r4, c6, c0, 0	@ clock configuration, for turbo mode
 	mrc	p15, 0, r5, c15, c1, 0	@ CP access reg
 	mrc	p15, 0, r6, c13, c0, 0	@ PID
 	mrc	p15, 0, r7, c3, c0, 0	@ domain ID
-	mrc	p15, 0, r8, c2, c0, 0	@ translation table base addr
-	mrc	p15, 0, r9, c1, c1, 0	@ auxiliary control reg
-	mrc	p15, 0, r10, c1, c0, 0	@ control reg
+	mrc	p15, 0, r8, c1, c1, 0	@ auxiliary control reg
+	mrc	p15, 0, r9, c1, c0, 0	@ control reg
 	bic	r4, r4, #2		@ clear frequency change bit
-	stmia	r0, {r4 - r10}		@ store cp regs
-	ldmfd	sp!, {r4 - r10, pc}
+	stmia	r0, {r4 - r9}		@ store cp regs
+	ldmfd	sp!, {r4 - r9, pc}
 ENDPROC(cpu_xscale_do_suspend)
 
 ENTRY(cpu_xscale_do_resume)
-	ldmia	r0, {r4 - r10}		@ load cp regs
+	ldmia	r0, {r4 - r9}		@ load cp regs
 	mov	ip, #0
 	mcr	p15, 0, ip, c8, c7, 0	@ invalidate I & D TLBs
 	mcr	p15, 0, ip, c7, c7, 0	@ invalidate I & D caches, BTB
@@ -545,13 +544,9 @@ ENTRY(cpu_xscale_do_resume)
 	mcr	p15, 0, r5, c15, c1, 0	@ CP access reg
 	mcr	p15, 0, r6, c13, c0, 0	@ PID
 	mcr	p15, 0, r7, c3, c0, 0	@ domain ID
-	mcr	p15, 0, r8, c2, c0, 0	@ translation table base addr
-	mcr	p15, 0, r9, c1, c1, 0	@ auxiliary control reg
-	mov	r0, r10			@ control register
-	mov	r2, r8, lsr #14		@ get TTB0 base
-	mov	r2, r2, lsl #14
-	ldr	r3, =PMD_TYPE_SECT | PMD_SECT_BUFFERABLE | \
-		     PMD_SECT_CACHEABLE | PMD_SECT_AP_WRITE
+	mcr	p15, 0, r1, c2, c0, 0	@ translation table base addr
+	mcr	p15, 0, r8, c1, c1, 0	@ auxiliary control reg
+	mov	r0, r9			@ control register
 	b	cpu_resume_mmu
 ENDPROC(cpu_xscale_do_resume)
 #endif
diff --git a/arch/arm/plat-omap/omap_device.c b/arch/arm/plat-omap/omap_device.c
index b6b409744954..02609eee0562 100644
--- a/arch/arm/plat-omap/omap_device.c
+++ b/arch/arm/plat-omap/omap_device.c
@@ -615,6 +615,9 @@ static int _od_resume_noirq(struct device *dev)
 
 	return pm_generic_resume_noirq(dev);
 }
+#else
+#define _od_suspend_noirq NULL
+#define _od_resume_noirq NULL
 #endif
 
 static struct dev_pm_domain omap_device_pm_domain = {
@@ -622,7 +625,8 @@ static struct dev_pm_domain omap_device_pm_domain = {
 		SET_RUNTIME_PM_OPS(_od_runtime_suspend, _od_runtime_resume,
 				   _od_runtime_idle)
 		USE_PLATFORM_PM_SLEEP_OPS
-		SET_SYSTEM_SLEEP_PM_OPS(_od_suspend_noirq, _od_resume_noirq)
+		.suspend_noirq = _od_suspend_noirq,
+		.resume_noirq = _od_resume_noirq,
 	}
 };
 
diff --git a/arch/arm/plat-s5p/clock.c b/arch/arm/plat-s5p/clock.c
index 02af235298e2..5f84a3f13ef9 100644
--- a/arch/arm/plat-s5p/clock.c
+++ b/arch/arm/plat-s5p/clock.c
@@ -192,7 +192,7 @@ unsigned long s5p_spdif_get_rate(struct clk *clk)
 	if (IS_ERR(pclk))
 		return -EINVAL;
 
-	rate = pclk->ops->get_rate(clk);
+	rate = pclk->ops->get_rate(pclk);
 	clk_put(pclk);
 
 	return rate;
diff --git a/arch/arm/plat-s5p/irq-gpioint.c b/arch/arm/plat-s5p/irq-gpioint.c
index 327ab9f662e8..f71078ef6bb5 100644
--- a/arch/arm/plat-s5p/irq-gpioint.c
+++ b/arch/arm/plat-s5p/irq-gpioint.c
@@ -23,6 +23,8 @@
 #include <plat/gpio-core.h>
 #include <plat/gpio-cfg.h>
 
+#include <asm/mach/irq.h>
+
 #define GPIO_BASE(chip)		(((unsigned long)(chip)->base) & 0xFFFFF000u)
 
 #define CON_OFFSET		0x700
@@ -81,6 +83,9 @@ static void s5p_gpioint_handler(unsigned int irq, struct irq_desc *desc)
 	int group, pend_offset, mask_offset;
 	unsigned int pend, mask;
 
+	struct irq_chip *chip = irq_get_chip(irq);
+	chained_irq_enter(chip, desc);
+
 	for (group = 0; group < bank->nr_groups; group++) {
 		struct s3c_gpio_chip *chip = bank->chips[group];
 		if (!chip)
@@ -102,6 +107,7 @@ static void s5p_gpioint_handler(unsigned int irq, struct irq_desc *desc)
 			pend &= ~BIT(offset);
 		}
 	}
+	chained_irq_exit(chip, desc);
 }
 
 static __init int s5p_gpioint_add(struct s3c_gpio_chip *chip)
diff --git a/arch/arm/plat-samsung/include/plat/backlight.h b/arch/arm/plat-samsung/include/plat/backlight.h
index 51d8da846a62..ad530c78fe8c 100644
--- a/arch/arm/plat-samsung/include/plat/backlight.h
+++ b/arch/arm/plat-samsung/include/plat/backlight.h
@@ -20,7 +20,7 @@ struct samsung_bl_gpio_info {
 	int func;
 };
 
-extern void samsung_bl_set(struct samsung_bl_gpio_info *gpio_info,
+extern void __init samsung_bl_set(struct samsung_bl_gpio_info *gpio_info,
 	struct platform_pwm_backlight_data *bl_data);
 
 #endif /* __ASM_PLAT_BACKLIGHT_H */
diff --git a/arch/arm/plat-samsung/irq-vic-timer.c b/arch/arm/plat-samsung/irq-vic-timer.c
index f714d060370d..51583cd30164 100644
--- a/arch/arm/plat-samsung/irq-vic-timer.c
+++ b/arch/arm/plat-samsung/irq-vic-timer.c
@@ -22,9 +22,14 @@
 #include <plat/irq-vic-timer.h>
 #include <plat/regs-timer.h>
 
+#include <asm/mach/irq.h>
+
 static void s3c_irq_demux_vic_timer(unsigned int irq, struct irq_desc *desc)
 {
+	struct irq_chip *chip = irq_get_chip(irq);
+	chained_irq_enter(chip, desc);
 	generic_handle_irq((int)desc->irq_data.handler_data);
+	chained_irq_exit(chip, desc);
 }
 
 /* We assume the IRQ_TIMER0..IRQ_TIMER4 range is continuous. */
diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types
index fff68d0d521b..62cc8f981171 100644
--- a/arch/arm/tools/mach-types
+++ b/arch/arm/tools/mach-types
@@ -351,7 +351,7 @@ centro			MACH_CENTRO		CENTRO			1944
 nokia_rx51		MACH_NOKIA_RX51		NOKIA_RX51		1955
 omap_zoom2		MACH_OMAP_ZOOM2		OMAP_ZOOM2		1967
 cpuat9260		MACH_CPUAT9260		CPUAT9260		1973
-eukrea_cpuimx27		MACH_CPUIMX27		CPUIMX27		1975
+eukrea_cpuimx27		MACH_EUKREA_CPUIMX27	EUKREA_CPUIMX27		1975
 acs5k			MACH_ACS5K		ACS5K			1982
 snapper_9260		MACH_SNAPPER_9260	SNAPPER_9260		1987
 dsm320			MACH_DSM320		DSM320			1988
@@ -476,8 +476,8 @@ cns3420vb		MACH_CNS3420VB		CNS3420VB		2776
 omap4_panda		MACH_OMAP4_PANDA	OMAP4_PANDA		2791
 ti8168evm		MACH_TI8168EVM		TI8168EVM		2800
 teton_bga		MACH_TETON_BGA		TETON_BGA		2816
-eukrea_cpuimx25sd	MACH_EUKREA_CPUIMX25	EUKREA_CPUIMX25		2820
-eukrea_cpuimx35sd	MACH_EUKREA_CPUIMX35	EUKREA_CPUIMX35		2821
+eukrea_cpuimx25sd	MACH_EUKREA_CPUIMX25SD	EUKREA_CPUIMX25SD	2820
+eukrea_cpuimx35sd	MACH_EUKREA_CPUIMX35SD	EUKREA_CPUIMX35SD	2821
 eukrea_cpuimx51sd	MACH_EUKREA_CPUIMX51SD	EUKREA_CPUIMX51SD	2822
 eukrea_cpuimx51		MACH_EUKREA_CPUIMX51	EUKREA_CPUIMX51		2823
 smdkc210		MACH_SMDKC210		SMDKC210		2838
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 79bcb4316930..0cbd5a0a9332 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/cpu.h>
+#include <linux/cpu_pm.h>
 #include <linux/kernel.h>
 #include <linux/notifier.h>
 #include <linux/signal.h>
@@ -68,7 +69,7 @@ static bool vfp_state_in_hw(unsigned int cpu, struct thread_info *thread)
 /*
  * Force a reload of the VFP context from the thread structure.  We do
  * this by ensuring that access to the VFP hardware is disabled, and
- * clear last_VFP_context.  Must be called from non-preemptible context.
+ * clear vfp_current_hw_state.  Must be called from non-preemptible context.
  */
 static void vfp_force_reload(unsigned int cpu, struct thread_info *thread)
 {
@@ -436,9 +437,7 @@ static void vfp_enable(void *unused)
 	set_copro_access(access | CPACC_FULL(10) | CPACC_FULL(11));
 }
 
-#ifdef CONFIG_PM
-#include <linux/syscore_ops.h>
-
+#ifdef CONFIG_CPU_PM
 static int vfp_pm_suspend(void)
 {
 	struct thread_info *ti = current_thread_info();
@@ -468,19 +467,33 @@ static void vfp_pm_resume(void)
 	fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
 }
 
-static struct syscore_ops vfp_pm_syscore_ops = {
-	.suspend	= vfp_pm_suspend,
-	.resume		= vfp_pm_resume,
+static int vfp_cpu_pm_notifier(struct notifier_block *self, unsigned long cmd,
+	void *v)
+{
+	switch (cmd) {
+	case CPU_PM_ENTER:
+		vfp_pm_suspend();
+		break;
+	case CPU_PM_ENTER_FAILED:
+	case CPU_PM_EXIT:
+		vfp_pm_resume();
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block vfp_cpu_pm_notifier_block = {
+	.notifier_call = vfp_cpu_pm_notifier,
 };
 
 static void vfp_pm_init(void)
 {
-	register_syscore_ops(&vfp_pm_syscore_ops);
+	cpu_pm_register_notifier(&vfp_cpu_pm_notifier_block);
 }
 
 #else
 static inline void vfp_pm_init(void) { }
-#endif /* CONFIG_PM */
+#endif /* CONFIG_CPU_PM */
 
 /*
  * Ensure that the VFP state stored in 'thread->vfpstate' is up to date
diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S
index c7fd394d28a4..6eba53530d1c 100644
--- a/arch/avr32/kernel/syscall_table.S
+++ b/arch/avr32/kernel/syscall_table.S
@@ -158,7 +158,7 @@ sys_call_table:
 	.long	sys_sched_rr_get_interval
 	.long	sys_nanosleep
 	.long	sys_poll
-	.long	sys_nfsservctl		/* 145 */
+	.long	sys_ni_syscall		/* 145 was nfsservctl */
 	.long	sys_setresgid
 	.long	sys_getresgid
 	.long	sys_prctl
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index 225d311c9701..e4137297b790 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -1543,7 +1543,7 @@ ENTRY(_sys_call_table)
 	.long _sys_ni_syscall	/* for vm86 */
 	.long _sys_ni_syscall	/* old "query_module" */
 	.long _sys_ni_syscall	/* sys_poll */
-	.long _sys_nfsservctl
+	.long _sys_ni_syscall   /* old nfsservctl */
 	.long _sys_setresgid	/* setresgid16 */	/* 170 */
 	.long _sys_getresgid	/* getresgid16 */
 	.long _sys_prctl
diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S
index 1161883eb582..592fbe9dfb62 100644
--- a/arch/cris/arch-v10/kernel/entry.S
+++ b/arch/cris/arch-v10/kernel/entry.S
@@ -771,7 +771,7 @@ sys_call_table:
 	.long sys_ni_syscall	/* sys_vm86 */
 	.long sys_ni_syscall	/* Old sys_query_module */
 	.long sys_poll
-	.long sys_nfsservctl
+	.long sys_ni_syscall    /* old nfsservctl */
 	.long sys_setresgid16	/* 170 */
 	.long sys_getresgid16
 	.long sys_prctl
diff --git a/arch/cris/arch-v32/kernel/entry.S b/arch/cris/arch-v32/kernel/entry.S
index 84fed7e91ada..c3ea4694fbaf 100644
--- a/arch/cris/arch-v32/kernel/entry.S
+++ b/arch/cris/arch-v32/kernel/entry.S
@@ -714,7 +714,7 @@ sys_call_table:
 	.long sys_ni_syscall	/* sys_vm86 */
 	.long sys_ni_syscall	/* Old sys_query_module */
 	.long sys_poll
-	.long sys_nfsservctl
+	.long sys_ni_syscall	/* Old nfsservctl */
 	.long sys_setresgid16	/* 170 */
 	.long sys_getresgid16
 	.long sys_prctl
diff --git a/arch/cris/include/asm/serial.h b/arch/cris/include/asm/serial.h
new file mode 100644
index 000000000000..af7535a955fb
--- /dev/null
+++ b/arch/cris/include/asm/serial.h
@@ -0,0 +1,9 @@
+#ifndef _ASM_SERIAL_H
+#define _ASM_SERIAL_H
+
+/*
+ * This assumes you have a 1.8432 MHz clock for your UART.
+ */
+#define BASE_BAUD (1843200 / 16)
+
+#endif /* _ASM_SERIAL_H */
diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S
index 017d6d7b784f..5ba23f715ea5 100644
--- a/arch/frv/kernel/entry.S
+++ b/arch/frv/kernel/entry.S
@@ -1358,7 +1358,7 @@ sys_call_table:
 	.long sys_ni_syscall	/* for vm86 */
 	.long sys_ni_syscall	/* Old sys_query_module */
 	.long sys_poll
-	.long sys_nfsservctl
+	.long sys_ni_syscall	/* Old nfsservctl */
 	.long sys_setresgid16	/* 170 */
 	.long sys_getresgid16
 	.long sys_prctl
diff --git a/arch/h8300/kernel/syscalls.S b/arch/h8300/kernel/syscalls.S
index f4b2e67bcc34..4be2ea2fbe26 100644
--- a/arch/h8300/kernel/syscalls.S
+++ b/arch/h8300/kernel/syscalls.S
@@ -183,7 +183,7 @@ SYMBOL_NAME_LABEL(sys_call_table)
 	.long SYMBOL_NAME(sys_ni_syscall)	/* for vm86 */
 	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_query_module */
 	.long SYMBOL_NAME(sys_poll)
-	.long SYMBOL_NAME(sys_nfsservctl)
+	.long SYMBOL_NAME(sys_ni_syscall)	/* old nfsservctl */
 	.long SYMBOL_NAME(sys_setresgid16)	/* 170 */
 	.long SYMBOL_NAME(sys_getresgid16)
 	.long SYMBOL_NAME(sys_prctl)
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 97dd2abdeb1a..198c753d1006 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1614,7 +1614,7 @@ sys_call_table:
 	data8 sys_sched_get_priority_min
 	data8 sys_sched_rr_get_interval
 	data8 sys_nanosleep
-	data8 sys_nfsservctl
+	data8 sys_ni_syscall			// old nfsservctl
 	data8 sys_prctl				// 1170
 	data8 sys_getpagesize
 	data8 sys_mmap2
diff --git a/arch/m32r/kernel/syscall_table.S b/arch/m32r/kernel/syscall_table.S
index 528f2e6ad064..f365c19795ef 100644
--- a/arch/m32r/kernel/syscall_table.S
+++ b/arch/m32r/kernel/syscall_table.S
@@ -168,7 +168,7 @@ ENTRY(sys_call_table)
 	.long sys_tas			/* vm86 syscall holder */
 	.long sys_ni_syscall		/* query_module syscall holder */
 	.long sys_poll
-	.long sys_nfsservctl
+	.long sys_ni_syscall		/* was nfsservctl */
 	.long sys_setresgid		/* 170 */
 	.long sys_getresgid
 	.long sys_prctl
diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h
index 31d5570d6567..89f201434b5a 100644
--- a/arch/m68k/include/asm/page_mm.h
+++ b/arch/m68k/include/asm/page_mm.h
@@ -162,7 +162,7 @@ static inline __attribute_const__ int __virt_to_node_shift(void)
 	pgdat->node_mem_map + (__pfn - pgdat->node_start_pfn);		\
 })
 #define page_to_pfn(_page) ({						\
-	struct page *__p = (_page);					\
+	const struct page *__p = (_page);				\
 	struct pglist_data *pgdat;					\
 	pgdat = &pg_data_map[page_to_nid(__p)];				\
 	((__p) - pgdat->node_mem_map) + pgdat->node_start_pfn;		\
diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S
index 00d1452f9571..c468f2edaa85 100644
--- a/arch/m68k/kernel/syscalltable.S
+++ b/arch/m68k/kernel/syscalltable.S
@@ -189,7 +189,7 @@ ENTRY(sys_call_table)
 	.long sys_getpagesize
 	.long sys_ni_syscall		/* old "query_module" */
 	.long sys_poll
-	.long sys_nfsservctl
+	.long sys_ni_syscall		/* old nfsservctl */
 	.long sys_setresgid16		/* 170 */
 	.long sys_getresgid16
 	.long sys_prctl
diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S
index d915a122c865..8789daa2a346 100644
--- a/arch/microblaze/kernel/syscall_table.S
+++ b/arch/microblaze/kernel/syscall_table.S
@@ -173,7 +173,7 @@ ENTRY(sys_call_table)
 	.long sys_ni_syscall		/* sys_vm86 */
 	.long sys_ni_syscall		/* Old sys_query_module */
 	.long sys_poll
-	.long sys_nfsservctl
+	.long sys_ni_syscall		/* old nfsservctl */
 	.long sys_setresgid		/* 170 */
 	.long sys_getresgid
 	.long sys_prctl
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index e521420a45a5..865bc7a6f5a1 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -424,7 +424,7 @@ einval:	li	v0, -ENOSYS
 	sys	sys_getresuid		3
 	sys	sys_ni_syscall		0	/* was sys_query_module */
 	sys	sys_poll		3
-	sys	sys_nfsservctl		3
+	sys	sys_ni_syscall		0	/* was nfsservctl */
 	sys	sys_setresgid		3	/* 4190 */
 	sys	sys_getresgid		3
 	sys	sys_prctl		5
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 85874d6a8a70..fb7334bea731 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -299,7 +299,7 @@ sys_call_table:
 	PTR	sys_ni_syscall			/* 5170, was get_kernel_syms */
 	PTR	sys_ni_syscall			/* was query_module */
 	PTR	sys_quotactl
-	PTR	sys_nfsservctl
+	PTR	sys_ni_syscall			/* was nfsservctl */
 	PTR	sys_ni_syscall			/* res. for getpmsg */
 	PTR	sys_ni_syscall			/* 5175  for putpmsg */
 	PTR	sys_ni_syscall			/* res. for afs_syscall */
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index b85842fc87ae..f9296e894e46 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -294,7 +294,7 @@ EXPORT(sysn32_call_table)
 	PTR	sys_ni_syscall			/* 6170, was get_kernel_syms */
 	PTR	sys_ni_syscall			/* was query_module */
 	PTR	sys_quotactl
-	PTR	compat_sys_nfsservctl
+	PTR	sys_ni_syscall			/* was nfsservctl */
 	PTR	sys_ni_syscall			/* res. for getpmsg */
 	PTR	sys_ni_syscall			/* 6175  for putpmsg */
 	PTR	sys_ni_syscall			/* res. for afs_syscall */
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 46c4763edf21..4d7c9827706f 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -392,7 +392,7 @@ sys_call_table:
 	PTR	sys_getresuid
 	PTR	sys_ni_syscall			/* was query_module */
 	PTR	sys_poll
-	PTR	compat_sys_nfsservctl
+	PTR	sys_ni_syscall			/* was nfsservctl */
 	PTR	sys_setresgid			/* 4190 */
 	PTR	sys_getresgid
 	PTR	sys_prctl
diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S
index ae435e1d5669..3e3620d9fc45 100644
--- a/arch/mn10300/kernel/entry.S
+++ b/arch/mn10300/kernel/entry.S
@@ -589,7 +589,7 @@ ENTRY(sys_call_table)
 	.long sys_ni_syscall	/* vm86 */
 	.long sys_ni_syscall	/* Old sys_query_module */
 	.long sys_poll
-	.long sys_nfsservctl
+	.long sys_ni_syscall	/* was nfsservctl */
 	.long sys_setresgid16	/* 170 */
 	.long sys_getresgid16
 	.long sys_prctl
diff --git a/arch/openrisc/include/asm/dma-mapping.h b/arch/openrisc/include/asm/dma-mapping.h
index 052f877b52a5..60b472233900 100644
--- a/arch/openrisc/include/asm/dma-mapping.h
+++ b/arch/openrisc/include/asm/dma-mapping.h
@@ -31,7 +31,6 @@
 
 #define DMA_ERROR_CODE		(~(dma_addr_t)0x0)
 
-int dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
 
 #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
 #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
@@ -47,6 +46,12 @@ dma_addr_t or1k_map_page(struct device *dev, struct page *page,
 void or1k_unmap_page(struct device *dev, dma_addr_t dma_handle,
 		     size_t size, enum dma_data_direction dir,
 		     struct dma_attrs *attrs);
+int or1k_map_sg(struct device *dev, struct scatterlist *sg,
+		int nents, enum dma_data_direction dir,
+		struct dma_attrs *attrs);
+void or1k_unmap_sg(struct device *dev, struct scatterlist *sg,
+		   int nents, enum dma_data_direction dir,
+		   struct dma_attrs *attrs);
 void or1k_sync_single_for_cpu(struct device *dev,
 			      dma_addr_t dma_handle, size_t size,
 			      enum dma_data_direction dir);
@@ -98,6 +103,51 @@ static inline void dma_unmap_single(struct device *dev, dma_addr_t addr,
 	debug_dma_unmap_page(dev, addr, size, dir, true);
 }
 
+static inline int dma_map_sg(struct device *dev, struct scatterlist *sg,
+				   int nents, enum dma_data_direction dir)
+{
+	int i, ents;
+	struct scatterlist *s;
+
+	for_each_sg(sg, s, nents, i)
+		kmemcheck_mark_initialized(sg_virt(s), s->length);
+	BUG_ON(!valid_dma_direction(dir));
+	ents = or1k_map_sg(dev, sg, nents, dir, NULL);
+	debug_dma_map_sg(dev, sg, nents, ents, dir);
+
+	return ents;
+}
+
+static inline void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+				      int nents, enum dma_data_direction dir)
+{
+	BUG_ON(!valid_dma_direction(dir));
+	debug_dma_unmap_sg(dev, sg, nents, dir);
+	or1k_unmap_sg(dev, sg, nents, dir, NULL);
+}
+
+static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
+				      size_t offset, size_t size,
+				      enum dma_data_direction dir)
+{
+	dma_addr_t addr;
+
+	kmemcheck_mark_initialized(page_address(page) + offset, size);
+	BUG_ON(!valid_dma_direction(dir));
+	addr = or1k_map_page(dev, page, offset, size, dir, NULL);
+	debug_dma_map_page(dev, page, offset, size, dir, addr, false);
+
+	return addr;
+}
+
+static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
+				  size_t size, enum dma_data_direction dir)
+{
+	BUG_ON(!valid_dma_direction(dir));
+	or1k_unmap_page(dev, addr, size, dir, NULL);
+	debug_dma_unmap_page(dev, addr, size, dir, true);
+}
+
 static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
 					   size_t size,
 					   enum dma_data_direction dir)
@@ -119,7 +169,12 @@ static inline void dma_sync_single_for_device(struct device *dev,
 static inline int dma_supported(struct device *dev, u64 dma_mask)
 {
 	/* Support 32 bit DMA mask exclusively */
-	return dma_mask == 0xffffffffULL;
+	return dma_mask == DMA_BIT_MASK(32);
+}
+
+static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return 0;
 }
 
 static inline int dma_set_mask(struct device *dev, u64 dma_mask)
diff --git a/arch/openrisc/include/asm/sigcontext.h b/arch/openrisc/include/asm/sigcontext.h
index 54a5c50132e3..b79c2b19afbe 100644
--- a/arch/openrisc/include/asm/sigcontext.h
+++ b/arch/openrisc/include/asm/sigcontext.h
@@ -23,16 +23,11 @@
 
 /* This struct is saved by setup_frame in signal.c, to keep the current
    context while a signal handler is executed. It's restored by sys_sigreturn.
-
-   To keep things simple, we use pt_regs here even though normally you just
-   specify the list of regs to save. Then we can use copy_from_user on the
-   entire regs instead of a bunch of get_user's as well...
 */
 
 struct sigcontext {
-	struct pt_regs regs;  /* needs to be first */
+	struct user_regs_struct regs;  /* needs to be first */
 	unsigned long oldmask;
-	unsigned long usp;    /* usp before stacking this gunk on it */
 };
 
 #endif /* __ASM_OPENRISC_SIGCONTEXT_H */
diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c
index 968d3ee477e3..f1c8ee2895d0 100644
--- a/arch/openrisc/kernel/dma.c
+++ b/arch/openrisc/kernel/dma.c
@@ -154,6 +154,33 @@ void or1k_unmap_page(struct device *dev, dma_addr_t dma_handle,
 	/* Nothing special to do here... */
 }
 
+int or1k_map_sg(struct device *dev, struct scatterlist *sg,
+		int nents, enum dma_data_direction dir,
+		struct dma_attrs *attrs)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		s->dma_address = or1k_map_page(dev, sg_page(s), s->offset,
+					       s->length, dir, NULL);
+	}
+
+	return nents;
+}
+
+void or1k_unmap_sg(struct device *dev, struct scatterlist *sg,
+		   int nents, enum dma_data_direction dir,
+		   struct dma_attrs *attrs)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		or1k_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, NULL);
+	}
+}
+
 void or1k_sync_single_for_cpu(struct device *dev,
 			      dma_addr_t dma_handle, size_t size,
 			      enum dma_data_direction dir)
@@ -187,5 +214,4 @@ static int __init dma_init(void)
 
 	return 0;
 }
-
 fs_initcall(dma_init);
diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c
index 5f759c76834e..95207ab0c99e 100644
--- a/arch/openrisc/kernel/signal.c
+++ b/arch/openrisc/kernel/signal.c
@@ -52,31 +52,25 @@ struct rt_sigframe {
 static int restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc)
 {
 	unsigned int err = 0;
-	unsigned long old_usp;
 
 	/* Alwys make any pending restarted system call return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
-	/* restore the regs from &sc->regs (same as sc, since regs is first)
+	/*
+	 * Restore the regs from &sc->regs.
 	 * (sc is already checked for VERIFY_READ since the sigframe was
 	 *  checked in sys_sigreturn previously)
 	 */
-
-	if (__copy_from_user(regs, sc, sizeof(struct pt_regs)))
+	if (__copy_from_user(regs, sc->regs.gpr, 32 * sizeof(unsigned long)))
+		goto badframe;
+	if (__copy_from_user(&regs->pc, &sc->regs.pc, sizeof(unsigned long)))
+		goto badframe;
+	if (__copy_from_user(&regs->sr, &sc->regs.sr, sizeof(unsigned long)))
 		goto badframe;
 
 	/* make sure the SM-bit is cleared so user-mode cannot fool us */
 	regs->sr &= ~SPR_SR_SM;
 
-	/* restore the old USP as it was before we stacked the sc etc.
-	 * (we cannot just pop the sigcontext since we aligned the sp and
-	 *  stuff after pushing it)
-	 */
-
-	err |= __get_user(old_usp, &sc->usp);
-
-	regs->sp = old_usp;
-
 	/* TODO: the other ports use regs->orig_XX to disable syscall checks
 	 * after this completes, but we don't use that mechanism. maybe we can
 	 * use it now ?
@@ -137,18 +131,17 @@ static int setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs,
 			    unsigned long mask)
 {
 	int err = 0;
-	unsigned long usp = regs->sp;
 
-	/* copy the regs. they are first in sc so we can use sc directly */
+	/* copy the regs */
 
-	err |= __copy_to_user(sc, regs, sizeof(struct pt_regs));
+	err |= __copy_to_user(sc->regs.gpr, regs, 32 * sizeof(unsigned long));
+	err |= __copy_to_user(&sc->regs.pc, &regs->pc, sizeof(unsigned long));
+	err |= __copy_to_user(&sc->regs.sr, &regs->sr, sizeof(unsigned long));
 
 	/* then some other stuff */
 
 	err |= __put_user(mask, &sc->oldmask);
 
-	err |= __put_user(usp, &sc->usp);
-
 	return err;
 }
 
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index e66366fd2abc..3735abd7f8f6 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -259,7 +259,7 @@
 	ENTRY_SAME(ni_syscall)		/* query_module */
 	ENTRY_SAME(poll)
 	/* structs contain pointers and an in_addr... */
-	ENTRY_COMP(nfsservctl)
+	ENTRY_SAME(ni_syscall)		/* was nfsservctl */
 	ENTRY_SAME(setresgid)		/* 170 */
 	ENTRY_SAME(getresgid)
 	ENTRY_SAME(prctl)
diff --git a/arch/powerpc/boot/dts/p1023rds.dts b/arch/powerpc/boot/dts/p1023rds.dts
index bfa96aa8f2ca..d9b776740a67 100644
--- a/arch/powerpc/boot/dts/p1023rds.dts
+++ b/arch/powerpc/boot/dts/p1023rds.dts
@@ -387,7 +387,7 @@
 			#size-cells = <1>;
 			compatible = "cfi-flash";
 			reg = <0x0 0x0 0x02000000>;
-			bank-width = <1>;
+			bank-width = <2>;
 			device-width = <1>;
 			partition@0 {
 				label = "ramdisk";
diff --git a/arch/powerpc/configs/85xx/p1023rds_defconfig b/arch/powerpc/configs/85xx/p1023rds_defconfig
index 980ff8f61fd4..3ff5a81c709f 100644
--- a/arch/powerpc/configs/85xx/p1023rds_defconfig
+++ b/arch/powerpc/configs/85xx/p1023rds_defconfig
@@ -171,3 +171,4 @@ CONFIG_CRYPTO_SHA256=y
 CONFIG_CRYPTO_SHA512=y
 CONFIG_CRYPTO_AES=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_DEV_FSL_CAAM=y
diff --git a/arch/powerpc/configs/corenet32_smp_defconfig b/arch/powerpc/configs/corenet32_smp_defconfig
index 10562a5c65b9..4311d02a3bfd 100644
--- a/arch/powerpc/configs/corenet32_smp_defconfig
+++ b/arch/powerpc/configs/corenet32_smp_defconfig
@@ -185,3 +185,4 @@ CONFIG_CRYPTO_SHA256=y
 CONFIG_CRYPTO_SHA512=y
 CONFIG_CRYPTO_AES=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_DEV_FSL_CAAM=y
diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig
index d32283555b53..c92c204a204b 100644
--- a/arch/powerpc/configs/corenet64_smp_defconfig
+++ b/arch/powerpc/configs/corenet64_smp_defconfig
@@ -100,5 +100,8 @@ CONFIG_DEBUG_INFO=y
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_VIRQ_DEBUG=y
 CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_SHA256=y
+CONFIG_CRYPTO_SHA512=y
+CONFIG_CRYPTO_AES=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DEV_TALITOS=y
+CONFIG_CRYPTO_DEV_FSL_CAAM=y
diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig
index fcd85d2c72dc..a3467bfb7671 100644
--- a/arch/powerpc/configs/mpc85xx_defconfig
+++ b/arch/powerpc/configs/mpc85xx_defconfig
@@ -139,6 +139,7 @@ CONFIG_SND=y
 CONFIG_SND_INTEL8X0=y
 # CONFIG_SND_PPC is not set
 # CONFIG_SND_USB is not set
+CONFIG_SND_SOC=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig
index 908c941fc24c..9693f6ed3da0 100644
--- a/arch/powerpc/configs/mpc85xx_smp_defconfig
+++ b/arch/powerpc/configs/mpc85xx_smp_defconfig
@@ -140,6 +140,7 @@ CONFIG_SND=y
 CONFIG_SND_INTEL8X0=y
 # CONFIG_SND_PPC is not set
 # CONFIG_SND_USB is not set
+CONFIG_SND_SOC=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index f6736b7da463..fa0d27a400de 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -171,7 +171,7 @@ SYSCALL_SPU(setresuid)
 SYSCALL_SPU(getresuid)
 SYSCALL(ni_syscall)
 SYSCALL_SPU(poll)
-COMPAT_SYS(nfsservctl)
+SYSCALL(ni_syscall)
 SYSCALL_SPU(setresgid)
 SYSCALL_SPU(getresgid)
 COMPAT_SYS_SPU(prctl)
diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index 2de8551df40f..c65f75aa7ff7 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -54,6 +54,7 @@
 #define ODSR_CLEAR		0x1c00
 #define LTLEECSR_ENABLE_ALL	0xFFC000FC
 #define ESCSR_CLEAR		0x07120204
+#define IECSR_CLEAR		0x80000000
 
 #define RIO_PORT1_EDCSR		0x0640
 #define RIO_PORT2_EDCSR		0x0680
@@ -1089,11 +1090,11 @@ static void port_error_handler(struct rio_mport *port, int offset)
 
 	if (offset == 0) {
 		out_be32((u32 *)(rio_regs_win + RIO_PORT1_EDCSR), 0);
-		out_be32((u32 *)(rio_regs_win + RIO_PORT1_IECSR), 0);
+		out_be32((u32 *)(rio_regs_win + RIO_PORT1_IECSR), IECSR_CLEAR);
 		out_be32((u32 *)(rio_regs_win + RIO_ESCSR), ESCSR_CLEAR);
 	} else {
 		out_be32((u32 *)(rio_regs_win + RIO_PORT2_EDCSR), 0);
-		out_be32((u32 *)(rio_regs_win + RIO_PORT2_IECSR), 0);
+		out_be32((u32 *)(rio_regs_win + RIO_PORT2_IECSR), IECSR_CLEAR);
 		out_be32((u32 *)(rio_regs_win + RIO_PORT2_ESCSR), ESCSR_CLEAR);
 	}
 }
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 08ab9aa6a0d5..7526db6bf501 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -665,12 +665,6 @@ ENTRY(sys32_poll_wrapper)
 	lgfr	%r4,%r4			# long
 	jg	sys_poll		# branch to system call
 
-ENTRY(compat_sys_nfsservctl_wrapper)
-	lgfr	%r2,%r2			# int
-	llgtr	%r3,%r3			# struct compat_nfsctl_arg*
-	llgtr	%r4,%r4			# union compat_nfsctl_res*
-	jg	compat_sys_nfsservctl	# branch to system call
-
 ENTRY(sys32_setresgid16_wrapper)
 	llgfr	%r2,%r2			# __kernel_old_gid_emu31_t
 	llgfr	%r3,%r3			# __kernel_old_gid_emu31_t
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 068f8465c4ee..f297456dba7a 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -396,17 +396,19 @@ static __init void detect_machine_facilities(void)
 static __init void rescue_initrd(void)
 {
 #ifdef CONFIG_BLK_DEV_INITRD
+	unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20);
 	/*
-	 * Move the initrd right behind the bss section in case it starts
-	 * within the bss section. So we don't overwrite it when the bss
-	 * section gets cleared.
+	 * Just like in case of IPL from VM reader we make sure there is a
+	 * gap of 4MB between end of kernel and start of initrd.
+	 * That way we can also be sure that saving an NSS will succeed,
+	 * which however only requires different segments.
 	 */
 	if (!INITRD_START || !INITRD_SIZE)
 		return;
-	if (INITRD_START >= (unsigned long) __bss_stop)
+	if (INITRD_START >= min_initrd_addr)
 		return;
-	memmove(__bss_stop, (void *) INITRD_START, INITRD_SIZE);
-	INITRD_START = (unsigned long) __bss_stop;
+	memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE);
+	INITRD_START = min_initrd_addr;
 #endif
 }
 
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 04361d5a4279..48c710206366 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -1220,7 +1220,7 @@ static int __init reipl_fcp_init(void)
 	/* sysfs: create fcp kset for mixing attr group and bin attrs */
 	reipl_fcp_kset = kset_create_and_add(IPL_FCP_STR, NULL,
 					     &reipl_kset->kobj);
-	if (!reipl_kset) {
+	if (!reipl_fcp_kset) {
 		free_page((unsigned long) reipl_block_fcp);
 		return -ENOMEM;
 	}
@@ -1618,7 +1618,8 @@ static struct shutdown_action vmcmd_action = {SHUTDOWN_ACTION_VMCMD_STR,
 
 static void stop_run(struct shutdown_trigger *trigger)
 {
-	if (strcmp(trigger->name, ON_PANIC_STR) == 0)
+	if (strcmp(trigger->name, ON_PANIC_STR) == 0 ||
+	    strcmp(trigger->name, ON_RESTART_STR) == 0)
 		disabled_wait((unsigned long) __builtin_return_address(0));
 	while (sigp(smp_processor_id(), sigp_stop) == sigp_busy)
 		cpu_relax();
@@ -1717,7 +1718,7 @@ static void do_panic(void)
 /* on restart */
 
 static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR,
-	&reipl_action};
+	&stop_action};
 
 static ssize_t on_restart_show(struct kobject *kobj,
 			       struct kobj_attribute *attr, char *page)
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 6ee39ef8fe4a..73eb08c874fb 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -177,7 +177,7 @@ SYSCALL(sys_getresuid16,sys_ni_syscall,sys32_getresuid16_wrapper)	/* 165 old get
 NI_SYSCALL							/* for vm86 */
 NI_SYSCALL							/* old sys_query_module */
 SYSCALL(sys_poll,sys_poll,sys32_poll_wrapper)
-SYSCALL(sys_nfsservctl,sys_nfsservctl,compat_sys_nfsservctl_wrapper)
+NI_SYSCALL							/* old nfsservctl */
 SYSCALL(sys_setresgid16,sys_ni_syscall,sys32_setresgid16_wrapper)	/* 170 old setresgid16 syscall */
 SYSCALL(sys_getresgid16,sys_ni_syscall,sys32_getresgid16_wrapper)	/* old getresgid16 syscall */
 SYSCALL(sys_prctl,sys_prctl,sys32_prctl_wrapper)
diff --git a/arch/sh/include/asm/ptrace.h b/arch/sh/include/asm/ptrace.h
index b97baf81a87b..2d3679b2447f 100644
--- a/arch/sh/include/asm/ptrace.h
+++ b/arch/sh/include/asm/ptrace.h
@@ -123,7 +123,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
 struct perf_event;
 struct perf_sample_data;
 
-extern void ptrace_triggered(struct perf_event *bp, int nmi,
+extern void ptrace_triggered(struct perf_event *bp,
 		      struct perf_sample_data *data, struct pt_regs *regs);
 
 #define task_pt_regs(task) \
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7757.c b/arch/sh/kernel/cpu/sh4a/setup-sh7757.c
index e915deafac89..05559295d2ca 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7757.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7757.c
@@ -15,6 +15,7 @@
 #include <linux/serial_sci.h>
 #include <linux/io.h>
 #include <linux/mm.h>
+#include <linux/dma-mapping.h>
 #include <linux/sh_timer.h>
 #include <linux/sh_dma.h>
 
diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c
index 32114e0941ae..db4ecd731a00 100644
--- a/arch/sh/kernel/idle.c
+++ b/arch/sh/kernel/idle.c
@@ -22,7 +22,7 @@
 #include <linux/atomic.h>
 #include <asm/smp.h>
 
-static void (*pm_idle)(void);
+void (*pm_idle)(void);
 
 static int hlt_counter;
 
diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S
index 39b051de4c7c..293e39c59c00 100644
--- a/arch/sh/kernel/syscalls_32.S
+++ b/arch/sh/kernel/syscalls_32.S
@@ -185,7 +185,7 @@ ENTRY(sys_call_table)
 	.long sys_ni_syscall	/* vm86 */
 	.long sys_ni_syscall	/* old "query_module" */
 	.long sys_poll
-	.long sys_nfsservctl
+	.long sys_ni_syscall	/* was nfsservctl */
 	.long sys_setresgid16	/* 170 */
 	.long sys_getresgid16
 	.long sys_prctl
diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S
index 089c4d825d08..ceb34b94afa9 100644
--- a/arch/sh/kernel/syscalls_64.S
+++ b/arch/sh/kernel/syscalls_64.S
@@ -189,7 +189,7 @@ sys_call_table:
 	.long sys_ni_syscall	/* vm86 */
 	.long sys_ni_syscall	/* old "query_module" */
 	.long sys_poll
-	.long sys_nfsservctl
+	.long sys_ni_syscall	/* was nfsservctl */
 	.long sys_setresgid16		/* 170 */
 	.long sys_getresgid16
 	.long sys_prctl
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index d9006f8ffc14..7bbef95c9d1b 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -316,6 +316,35 @@ static int handle_unaligned_ins(insn_size_t instruction, struct pt_regs *regs,
 			break;
 		}
 		break;
+
+	case 9: /* mov.w @(disp,PC),Rn */
+		srcu = (unsigned char __user *)regs->pc;
+		srcu += 4;
+		srcu += (instruction & 0x00FF) << 1;
+		dst = (unsigned char *)rn;
+		*(unsigned long *)dst = 0;
+
+#if !defined(__LITTLE_ENDIAN__)
+		dst += 2;
+#endif
+
+		if (ma->from(dst, srcu, 2))
+			goto fetch_fault;
+		sign_extend(2, dst);
+		ret = 0;
+		break;
+
+	case 0xd: /* mov.l @(disp,PC),Rn */
+		srcu = (unsigned char __user *)(regs->pc & ~0x3);
+		srcu += 4;
+		srcu += (instruction & 0x00FF) << 2;
+		dst = (unsigned char *)rn;
+		*(unsigned long *)dst = 0;
+
+		if (ma->from(dst, srcu, 4))
+			goto fetch_fault;
+		ret = 0;
+		break;
 	}
 	return ret;
 
@@ -466,6 +495,7 @@ int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
 		case 0x0500: /* mov.w @(disp,Rm),R0 */
 			goto simple;
 		case 0x0B00: /* bf   lab - no delayslot*/
+			ret = 0;
 			break;
 		case 0x0F00: /* bf/s lab */
 			ret = handle_delayslot(regs, instruction, ma);
@@ -479,6 +509,7 @@ int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
 			}
 			break;
 		case 0x0900: /* bt   lab - no delayslot */
+			ret = 0;
 			break;
 		case 0x0D00: /* bt/s lab */
 			ret = handle_delayslot(regs, instruction, ma);
@@ -494,6 +525,9 @@ int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
 		}
 		break;
 
+	case 0x9000: /* mov.w @(disp,Rm),Rn */
+		goto simple;
+
 	case 0xA000: /* bra label */
 		ret = handle_delayslot(regs, instruction, ma);
 		if (ret==0)
@@ -507,6 +541,9 @@ int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
 			regs->pc += SH_PC_12BIT_OFFSET(instruction);
 		}
 		break;
+
+	case 0xD000: /* mov.l @(disp,Rm),Rn */
+		goto simple;
 	}
 	return ret;
 
diff --git a/arch/sparc/include/asm/sigcontext.h b/arch/sparc/include/asm/sigcontext.h
index a1607d180354..69914d748130 100644
--- a/arch/sparc/include/asm/sigcontext.h
+++ b/arch/sparc/include/asm/sigcontext.h
@@ -45,6 +45,19 @@ typedef struct {
 	int			si_mask;
 } __siginfo32_t;
 
+#define __SIGC_MAXWIN	7
+
+typedef struct {
+	unsigned long locals[8];
+	unsigned long ins[8];
+} __siginfo_reg_window;
+
+typedef struct {
+	int			wsaved;
+	__siginfo_reg_window	reg_window[__SIGC_MAXWIN];
+	unsigned long		rwbuf_stkptrs[__SIGC_MAXWIN];
+} __siginfo_rwin_t;
+
 #ifdef CONFIG_SPARC64
 typedef struct {
 	unsigned   int si_float_regs [64];
@@ -73,6 +86,7 @@ struct sigcontext {
 		unsigned long	ss_size;
 	}			sigc_stack;
 	unsigned long		sigc_mask;
+	__siginfo_rwin_t *	sigc_rwin_save;
 };
 
 #else
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index b90b4a1d070a..cb85458f89d2 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_SPARC32)   += sun4m_irq.o sun4c_irq.o sun4d_irq.o
 
 obj-y                   += process_$(BITS).o
 obj-y                   += signal_$(BITS).o
+obj-y                   += sigutil_$(BITS).o
 obj-$(CONFIG_SPARC32)   += ioport.o
 obj-y                   += setup_$(BITS).o
 obj-y                   += idprom.o
diff --git a/arch/sparc/kernel/irq.h b/arch/sparc/kernel/irq.h
index 100b9c204e78..42851122bbd9 100644
--- a/arch/sparc/kernel/irq.h
+++ b/arch/sparc/kernel/irq.h
@@ -88,7 +88,7 @@ BTFIXUPDEF_CALL(void, set_irq_udt, int)
 #define set_irq_udt(cpu) BTFIXUP_CALL(set_irq_udt)(cpu)
 
 /* All SUN4D IPIs are sent on this IRQ, may be shared with hard IRQs */
-#define SUN4D_IPI_IRQ 14
+#define SUN4D_IPI_IRQ 13
 
 extern void sun4d_ipi_interrupt(void);
 
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 3e9daea1653d..3c5bb784214f 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -440,8 +440,14 @@ static void __init init_sparc64_elf_hwcap(void)
 			cap |= AV_SPARC_VIS;
 		if (tlb_type == cheetah || tlb_type == cheetah_plus)
 			cap |= AV_SPARC_VIS | AV_SPARC_VIS2;
-		if (tlb_type == cheetah_plus)
-			cap |= AV_SPARC_POPC;
+		if (tlb_type == cheetah_plus) {
+			unsigned long impl, ver;
+
+			__asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver));
+			impl = ((ver >> 32) & 0xffff);
+			if (impl == PANTHER_IMPL)
+				cap |= AV_SPARC_POPC;
+		}
 		if (tlb_type == hypervisor) {
 			if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1)
 				cap |= AV_SPARC_ASI_BLK_INIT;
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index 75fad425e249..1ba95aff5d59 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -29,6 +29,8 @@
 #include <asm/visasm.h>
 #include <asm/compat_signal.h>
 
+#include "sigutil.h"
+
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
 /* This magic should be in g_upper[0] for all upper parts
@@ -44,14 +46,14 @@ typedef struct {
 struct signal_frame32 {
 	struct sparc_stackf32	ss;
 	__siginfo32_t		info;
-	/* __siginfo_fpu32_t * */ u32 fpu_save;
+	/* __siginfo_fpu_t * */ u32 fpu_save;
 	unsigned int		insns[2];
 	unsigned int		extramask[_COMPAT_NSIG_WORDS - 1];
 	unsigned int		extra_size; /* Should be sizeof(siginfo_extra_v8plus_t) */
 	/* Only valid if (info.si_regs.psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS */
 	siginfo_extra_v8plus_t	v8plus;
-	__siginfo_fpu_t		fpu_state;
-};
+	/* __siginfo_rwin_t * */u32 rwin_save;
+} __attribute__((aligned(8)));
 
 typedef struct compat_siginfo{
 	int si_signo;
@@ -110,18 +112,14 @@ struct rt_signal_frame32 {
 	compat_siginfo_t	info;
 	struct pt_regs32	regs;
 	compat_sigset_t		mask;
-	/* __siginfo_fpu32_t * */ u32 fpu_save;
+	/* __siginfo_fpu_t * */ u32 fpu_save;
 	unsigned int		insns[2];
 	stack_t32		stack;
 	unsigned int		extra_size; /* Should be sizeof(siginfo_extra_v8plus_t) */
 	/* Only valid if (regs.psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS */
 	siginfo_extra_v8plus_t	v8plus;
-	__siginfo_fpu_t		fpu_state;
-};
-
-/* Align macros */
-#define SF_ALIGNEDSZ  (((sizeof(struct signal_frame32) + 15) & (~15)))
-#define RT_ALIGNEDSZ  (((sizeof(struct rt_signal_frame32) + 15) & (~15)))
+	/* __siginfo_rwin_t * */u32 rwin_save;
+} __attribute__((aligned(8)));
 
 int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
 {
@@ -192,30 +190,13 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
 	return 0;
 }
 
-static int restore_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-	unsigned long *fpregs = current_thread_info()->fpregs;
-	unsigned long fprs;
-	int err;
-	
-	err = __get_user(fprs, &fpu->si_fprs);
-	fprs_write(0);
-	regs->tstate &= ~TSTATE_PEF;
-	if (fprs & FPRS_DL)
-		err |= copy_from_user(fpregs, &fpu->si_float_regs[0], (sizeof(unsigned int) * 32));
-	if (fprs & FPRS_DU)
-		err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32], (sizeof(unsigned int) * 32));
-	err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
-	err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr);
-	current_thread_info()->fpsaved[0] |= fprs;
-	return err;
-}
-
 void do_sigreturn32(struct pt_regs *regs)
 {
 	struct signal_frame32 __user *sf;
+	compat_uptr_t fpu_save;
+	compat_uptr_t rwin_save;
 	unsigned int psr;
-	unsigned pc, npc, fpu_save;
+	unsigned pc, npc;
 	sigset_t set;
 	unsigned seta[_COMPAT_NSIG_WORDS];
 	int err, i;
@@ -273,8 +254,13 @@ void do_sigreturn32(struct pt_regs *regs)
 	pt_regs_clear_syscall(regs);
 
 	err |= __get_user(fpu_save, &sf->fpu_save);
-	if (fpu_save)
-		err |= restore_fpu_state32(regs, &sf->fpu_state);
+	if (!err && fpu_save)
+		err |= restore_fpu_state(regs, compat_ptr(fpu_save));
+	err |= __get_user(rwin_save, &sf->rwin_save);
+	if (!err && rwin_save) {
+		if (restore_rwin_state(compat_ptr(rwin_save)))
+			goto segv;
+	}
 	err |= __get_user(seta[0], &sf->info.si_mask);
 	err |= copy_from_user(seta+1, &sf->extramask,
 			      (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int));
@@ -300,7 +286,9 @@ segv:
 asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
 {
 	struct rt_signal_frame32 __user *sf;
-	unsigned int psr, pc, npc, fpu_save, u_ss_sp;
+	unsigned int psr, pc, npc, u_ss_sp;
+	compat_uptr_t fpu_save;
+	compat_uptr_t rwin_save;
 	mm_segment_t old_fs;
 	sigset_t set;
 	compat_sigset_t seta;
@@ -359,8 +347,8 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
 	pt_regs_clear_syscall(regs);
 
 	err |= __get_user(fpu_save, &sf->fpu_save);
-	if (fpu_save)
-		err |= restore_fpu_state32(regs, &sf->fpu_state);
+	if (!err && fpu_save)
+		err |= restore_fpu_state(regs, compat_ptr(fpu_save));
 	err |= copy_from_user(&seta, &sf->mask, sizeof(compat_sigset_t));
 	err |= __get_user(u_ss_sp, &sf->stack.ss_sp);
 	st.ss_sp = compat_ptr(u_ss_sp);
@@ -376,6 +364,12 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
 	do_sigaltstack((stack_t __user *) &st, NULL, (unsigned long)sf);
 	set_fs(old_fs);
 	
+	err |= __get_user(rwin_save, &sf->rwin_save);
+	if (!err && rwin_save) {
+		if (restore_rwin_state(compat_ptr(rwin_save)))
+			goto segv;
+	}
+
 	switch (_NSIG_WORDS) {
 		case 4: set.sig[3] = seta.sig[6] + (((long)seta.sig[7]) << 32);
 		case 3: set.sig[2] = seta.sig[4] + (((long)seta.sig[5]) << 32);
@@ -433,26 +427,6 @@ static void __user *get_sigframe(struct sigaction *sa, struct pt_regs *regs, uns
 	return (void __user *) sp;
 }
 
-static int save_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-	unsigned long *fpregs = current_thread_info()->fpregs;
-	unsigned long fprs;
-	int err = 0;
-	
-	fprs = current_thread_info()->fpsaved[0];
-	if (fprs & FPRS_DL)
-		err |= copy_to_user(&fpu->si_float_regs[0], fpregs,
-				    (sizeof(unsigned int) * 32));
-	if (fprs & FPRS_DU)
-		err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16,
-				    (sizeof(unsigned int) * 32));
-	err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
-	err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr);
-	err |= __put_user(fprs, &fpu->si_fprs);
-
-	return err;
-}
-
 /* The I-cache flush instruction only works in the primary ASI, which
  * right now is the nucleus, aka. kernel space.
  *
@@ -515,18 +489,23 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
 			 int signo, sigset_t *oldset)
 {
 	struct signal_frame32 __user *sf;
+	int i, err, wsaved;
+	void __user *tail;
 	int sigframe_size;
 	u32 psr;
-	int i, err;
 	unsigned int seta[_COMPAT_NSIG_WORDS];
 
 	/* 1. Make sure everything is clean */
 	synchronize_user_stack();
 	save_and_clear_fpu();
 	
-	sigframe_size = SF_ALIGNEDSZ;
-	if (!(current_thread_info()->fpsaved[0] & FPRS_FEF))
-		sigframe_size -= sizeof(__siginfo_fpu_t);
+	wsaved = get_thread_wsaved();
+
+	sigframe_size = sizeof(*sf);
+	if (current_thread_info()->fpsaved[0] & FPRS_FEF)
+		sigframe_size += sizeof(__siginfo_fpu_t);
+	if (wsaved)
+		sigframe_size += sizeof(__siginfo_rwin_t);
 
 	sf = (struct signal_frame32 __user *)
 		get_sigframe(&ka->sa, regs, sigframe_size);
@@ -534,8 +513,7 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
 	if (invalid_frame_pointer(sf, sigframe_size))
 		goto sigill;
 
-	if (get_thread_wsaved() != 0)
-		goto sigill;
+	tail = (sf + 1);
 
 	/* 2. Save the current process state */
 	if (test_thread_flag(TIF_32BIT)) {
@@ -560,11 +538,22 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
 			  &sf->v8plus.asi);
 
 	if (psr & PSR_EF) {
-		err |= save_fpu_state32(regs, &sf->fpu_state);
-		err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save);
+		__siginfo_fpu_t __user *fp = tail;
+		tail += sizeof(*fp);
+		err |= save_fpu_state(regs, fp);
+		err |= __put_user((u64)fp, &sf->fpu_save);
 	} else {
 		err |= __put_user(0, &sf->fpu_save);
 	}
+	if (wsaved) {
+		__siginfo_rwin_t __user *rwp = tail;
+		tail += sizeof(*rwp);
+		err |= save_rwin_state(wsaved, rwp);
+		err |= __put_user((u64)rwp, &sf->rwin_save);
+		set_thread_wsaved(0);
+	} else {
+		err |= __put_user(0, &sf->rwin_save);
+	}
 
 	switch (_NSIG_WORDS) {
 	case 4: seta[7] = (oldset->sig[3] >> 32);
@@ -580,10 +569,21 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
 	err |= __copy_to_user(sf->extramask, seta + 1,
 			      (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int));
 
-	err |= copy_in_user((u32 __user *)sf,
-			    (u32 __user *)(regs->u_regs[UREG_FP]),
-			    sizeof(struct reg_window32));
-	
+	if (!wsaved) {
+		err |= copy_in_user((u32 __user *)sf,
+				    (u32 __user *)(regs->u_regs[UREG_FP]),
+				    sizeof(struct reg_window32));
+	} else {
+		struct reg_window *rp;
+
+		rp = &current_thread_info()->reg_window[wsaved - 1];
+		for (i = 0; i < 8; i++)
+			err |= __put_user(rp->locals[i], &sf->ss.locals[i]);
+		for (i = 0; i < 6; i++)
+			err |= __put_user(rp->ins[i], &sf->ss.ins[i]);
+		err |= __put_user(rp->ins[6], &sf->ss.fp);
+		err |= __put_user(rp->ins[7], &sf->ss.callers_pc);
+	}	
 	if (err)
 		goto sigsegv;
 
@@ -613,7 +613,6 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
 		err |= __put_user(0x91d02010, &sf->insns[1]); /*t 0x10*/
 		if (err)
 			goto sigsegv;
-
 		flush_signal_insns(address);
 	}
 	return 0;
@@ -632,18 +631,23 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs,
 			    siginfo_t *info)
 {
 	struct rt_signal_frame32 __user *sf;
+	int i, err, wsaved;
+	void __user *tail;
 	int sigframe_size;
 	u32 psr;
-	int i, err;
 	compat_sigset_t seta;
 
 	/* 1. Make sure everything is clean */
 	synchronize_user_stack();
 	save_and_clear_fpu();
 	
-	sigframe_size = RT_ALIGNEDSZ;
-	if (!(current_thread_info()->fpsaved[0] & FPRS_FEF))
-		sigframe_size -= sizeof(__siginfo_fpu_t);
+	wsaved = get_thread_wsaved();
+
+	sigframe_size = sizeof(*sf);
+	if (current_thread_info()->fpsaved[0] & FPRS_FEF)
+		sigframe_size += sizeof(__siginfo_fpu_t);
+	if (wsaved)
+		sigframe_size += sizeof(__siginfo_rwin_t);
 
 	sf = (struct rt_signal_frame32 __user *)
 		get_sigframe(&ka->sa, regs, sigframe_size);
@@ -651,8 +655,7 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs,
 	if (invalid_frame_pointer(sf, sigframe_size))
 		goto sigill;
 
-	if (get_thread_wsaved() != 0)
-		goto sigill;
+	tail = (sf + 1);
 
 	/* 2. Save the current process state */
 	if (test_thread_flag(TIF_32BIT)) {
@@ -677,11 +680,22 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs,
 			  &sf->v8plus.asi);
 
 	if (psr & PSR_EF) {
-		err |= save_fpu_state32(regs, &sf->fpu_state);
-		err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save);
+		__siginfo_fpu_t __user *fp = tail;
+		tail += sizeof(*fp);
+		err |= save_fpu_state(regs, fp);
+		err |= __put_user((u64)fp, &sf->fpu_save);
 	} else {
 		err |= __put_user(0, &sf->fpu_save);
 	}
+	if (wsaved) {
+		__siginfo_rwin_t __user *rwp = tail;
+		tail += sizeof(*rwp);
+		err |= save_rwin_state(wsaved, rwp);
+		err |= __put_user((u64)rwp, &sf->rwin_save);
+		set_thread_wsaved(0);
+	} else {
+		err |= __put_user(0, &sf->rwin_save);
+	}
 
 	/* Update the siginfo structure.  */
 	err |= copy_siginfo_to_user32(&sf->info, info);
@@ -703,9 +717,21 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs,
 	}
 	err |= __copy_to_user(&sf->mask, &seta, sizeof(compat_sigset_t));
 
-	err |= copy_in_user((u32 __user *)sf,
-			    (u32 __user *)(regs->u_regs[UREG_FP]),
-			    sizeof(struct reg_window32));
+	if (!wsaved) {
+		err |= copy_in_user((u32 __user *)sf,
+				    (u32 __user *)(regs->u_regs[UREG_FP]),
+				    sizeof(struct reg_window32));
+	} else {
+		struct reg_window *rp;
+
+		rp = &current_thread_info()->reg_window[wsaved - 1];
+		for (i = 0; i < 8; i++)
+			err |= __put_user(rp->locals[i], &sf->ss.locals[i]);
+		for (i = 0; i < 6; i++)
+			err |= __put_user(rp->ins[i], &sf->ss.ins[i]);
+		err |= __put_user(rp->ins[6], &sf->ss.fp);
+		err |= __put_user(rp->ins[7], &sf->ss.callers_pc);
+	}
 	if (err)
 		goto sigsegv;
 	
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index 5e5c5fd03783..04ede8f04add 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -26,6 +26,8 @@
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>	/* flush_sig_insns */
 
+#include "sigutil.h"
+
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
 extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
@@ -39,8 +41,8 @@ struct signal_frame {
 	unsigned long		insns[2] __attribute__ ((aligned (8)));
 	unsigned int		extramask[_NSIG_WORDS - 1];
 	unsigned int		extra_size; /* Should be 0 */
-	__siginfo_fpu_t		fpu_state;
-};
+	__siginfo_rwin_t __user	*rwin_save;
+} __attribute__((aligned(8)));
 
 struct rt_signal_frame {
 	struct sparc_stackf	ss;
@@ -51,8 +53,8 @@ struct rt_signal_frame {
 	unsigned int		insns[2];
 	stack_t			stack;
 	unsigned int		extra_size; /* Should be 0 */
-	__siginfo_fpu_t		fpu_state;
-};
+	__siginfo_rwin_t __user	*rwin_save;
+} __attribute__((aligned(8)));
 
 /* Align macros */
 #define SF_ALIGNEDSZ  (((sizeof(struct signal_frame) + 7) & (~7)))
@@ -79,43 +81,13 @@ asmlinkage int sys_sigsuspend(old_sigset_t set)
 	return _sigpause_common(set);
 }
 
-static inline int
-restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-	int err;
-#ifdef CONFIG_SMP
-	if (test_tsk_thread_flag(current, TIF_USEDFPU))
-		regs->psr &= ~PSR_EF;
-#else
-	if (current == last_task_used_math) {
-		last_task_used_math = NULL;
-		regs->psr &= ~PSR_EF;
-	}
-#endif
-	set_used_math();
-	clear_tsk_thread_flag(current, TIF_USEDFPU);
-
-	if (!access_ok(VERIFY_READ, fpu, sizeof(*fpu)))
-		return -EFAULT;
-
-	err = __copy_from_user(&current->thread.float_regs[0], &fpu->si_float_regs[0],
-			       (sizeof(unsigned long) * 32));
-	err |= __get_user(current->thread.fsr, &fpu->si_fsr);
-	err |= __get_user(current->thread.fpqdepth, &fpu->si_fpqdepth);
-	if (current->thread.fpqdepth != 0)
-		err |= __copy_from_user(&current->thread.fpqueue[0],
-					&fpu->si_fpqueue[0],
-					((sizeof(unsigned long) +
-					(sizeof(unsigned long *)))*16));
-	return err;
-}
-
 asmlinkage void do_sigreturn(struct pt_regs *regs)
 {
 	struct signal_frame __user *sf;
 	unsigned long up_psr, pc, npc;
 	sigset_t set;
 	__siginfo_fpu_t __user *fpu_save;
+	__siginfo_rwin_t __user *rwin_save;
 	int err;
 
 	/* Always make any pending restarted system calls return -EINTR */
@@ -150,9 +122,11 @@ asmlinkage void do_sigreturn(struct pt_regs *regs)
 	pt_regs_clear_syscall(regs);
 
 	err |= __get_user(fpu_save, &sf->fpu_save);
-
 	if (fpu_save)
 		err |= restore_fpu_state(regs, fpu_save);
+	err |= __get_user(rwin_save, &sf->rwin_save);
+	if (rwin_save)
+		err |= restore_rwin_state(rwin_save);
 
 	/* This is pretty much atomic, no amount locking would prevent
 	 * the races which exist anyways.
@@ -180,6 +154,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
 	struct rt_signal_frame __user *sf;
 	unsigned int psr, pc, npc;
 	__siginfo_fpu_t __user *fpu_save;
+	__siginfo_rwin_t __user *rwin_save;
 	mm_segment_t old_fs;
 	sigset_t set;
 	stack_t st;
@@ -207,8 +182,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
 	pt_regs_clear_syscall(regs);
 
 	err |= __get_user(fpu_save, &sf->fpu_save);
-
-	if (fpu_save)
+	if (!err && fpu_save)
 		err |= restore_fpu_state(regs, fpu_save);
 	err |= __copy_from_user(&set, &sf->mask, sizeof(sigset_t));
 	
@@ -228,6 +202,12 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
 	do_sigaltstack((const stack_t __user *) &st, NULL, (unsigned long)sf);
 	set_fs(old_fs);
 
+	err |= __get_user(rwin_save, &sf->rwin_save);
+	if (!err && rwin_save) {
+		if (restore_rwin_state(rwin_save))
+			goto segv;
+	}
+
 	sigdelsetmask(&set, ~_BLOCKABLE);
 	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
@@ -280,53 +260,23 @@ static inline void __user *get_sigframe(struct sigaction *sa, struct pt_regs *re
 	return (void __user *) sp;
 }
 
-static inline int
-save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-	int err = 0;
-#ifdef CONFIG_SMP
-	if (test_tsk_thread_flag(current, TIF_USEDFPU)) {
-		put_psr(get_psr() | PSR_EF);
-		fpsave(&current->thread.float_regs[0], &current->thread.fsr,
-		       &current->thread.fpqueue[0], &current->thread.fpqdepth);
-		regs->psr &= ~(PSR_EF);
-		clear_tsk_thread_flag(current, TIF_USEDFPU);
-	}
-#else
-	if (current == last_task_used_math) {
-		put_psr(get_psr() | PSR_EF);
-		fpsave(&current->thread.float_regs[0], &current->thread.fsr,
-		       &current->thread.fpqueue[0], &current->thread.fpqdepth);
-		last_task_used_math = NULL;
-		regs->psr &= ~(PSR_EF);
-	}
-#endif
-	err |= __copy_to_user(&fpu->si_float_regs[0],
-			      &current->thread.float_regs[0],
-			      (sizeof(unsigned long) * 32));
-	err |= __put_user(current->thread.fsr, &fpu->si_fsr);
-	err |= __put_user(current->thread.fpqdepth, &fpu->si_fpqdepth);
-	if (current->thread.fpqdepth != 0)
-		err |= __copy_to_user(&fpu->si_fpqueue[0],
-				      &current->thread.fpqueue[0],
-				      ((sizeof(unsigned long) +
-				      (sizeof(unsigned long *)))*16));
-	clear_used_math();
-	return err;
-}
-
 static int setup_frame(struct k_sigaction *ka, struct pt_regs *regs,
 		       int signo, sigset_t *oldset)
 {
 	struct signal_frame __user *sf;
-	int sigframe_size, err;
+	int sigframe_size, err, wsaved;
+	void __user *tail;
 
 	/* 1. Make sure everything is clean */
 	synchronize_user_stack();
 
-	sigframe_size = SF_ALIGNEDSZ;
-	if (!used_math())
-		sigframe_size -= sizeof(__siginfo_fpu_t);
+	wsaved = current_thread_info()->w_saved;
+
+	sigframe_size = sizeof(*sf);
+	if (used_math())
+		sigframe_size += sizeof(__siginfo_fpu_t);
+	if (wsaved)
+		sigframe_size += sizeof(__siginfo_rwin_t);
 
 	sf = (struct signal_frame __user *)
 		get_sigframe(&ka->sa, regs, sigframe_size);
@@ -334,8 +284,7 @@ static int setup_frame(struct k_sigaction *ka, struct pt_regs *regs,
 	if (invalid_frame_pointer(sf, sigframe_size))
 		goto sigill_and_return;
 
-	if (current_thread_info()->w_saved != 0)
-		goto sigill_and_return;
+	tail = sf + 1;
 
 	/* 2. Save the current process state */
 	err = __copy_to_user(&sf->info.si_regs, regs, sizeof(struct pt_regs));
@@ -343,17 +292,34 @@ static int setup_frame(struct k_sigaction *ka, struct pt_regs *regs,
 	err |= __put_user(0, &sf->extra_size);
 
 	if (used_math()) {
-		err |= save_fpu_state(regs, &sf->fpu_state);
-		err |= __put_user(&sf->fpu_state, &sf->fpu_save);
+		__siginfo_fpu_t __user *fp = tail;
+		tail += sizeof(*fp);
+		err |= save_fpu_state(regs, fp);
+		err |= __put_user(fp, &sf->fpu_save);
 	} else {
 		err |= __put_user(0, &sf->fpu_save);
 	}
+	if (wsaved) {
+		__siginfo_rwin_t __user *rwp = tail;
+		tail += sizeof(*rwp);
+		err |= save_rwin_state(wsaved, rwp);
+		err |= __put_user(rwp, &sf->rwin_save);
+	} else {
+		err |= __put_user(0, &sf->rwin_save);
+	}
 
 	err |= __put_user(oldset->sig[0], &sf->info.si_mask);
 	err |= __copy_to_user(sf->extramask, &oldset->sig[1],
 			      (_NSIG_WORDS - 1) * sizeof(unsigned int));
-	err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
-			      sizeof(struct reg_window32));
+	if (!wsaved) {
+		err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
+				      sizeof(struct reg_window32));
+	} else {
+		struct reg_window32 *rp;
+
+		rp = &current_thread_info()->reg_window[wsaved - 1];
+		err |= __copy_to_user(sf, rp, sizeof(struct reg_window32));
+	}
 	if (err)
 		goto sigsegv;
 	
@@ -399,21 +365,24 @@ static int setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
 			  int signo, sigset_t *oldset, siginfo_t *info)
 {
 	struct rt_signal_frame __user *sf;
-	int sigframe_size;
+	int sigframe_size, wsaved;
+	void __user *tail;
 	unsigned int psr;
 	int err;
 
 	synchronize_user_stack();
-	sigframe_size = RT_ALIGNEDSZ;
-	if (!used_math())
-		sigframe_size -= sizeof(__siginfo_fpu_t);
+	wsaved = current_thread_info()->w_saved;
+	sigframe_size = sizeof(*sf);
+	if (used_math())
+		sigframe_size += sizeof(__siginfo_fpu_t);
+	if (wsaved)
+		sigframe_size += sizeof(__siginfo_rwin_t);
 	sf = (struct rt_signal_frame __user *)
 		get_sigframe(&ka->sa, regs, sigframe_size);
 	if (invalid_frame_pointer(sf, sigframe_size))
 		goto sigill;
-	if (current_thread_info()->w_saved != 0)
-		goto sigill;
 
+	tail = sf + 1;
 	err  = __put_user(regs->pc, &sf->regs.pc);
 	err |= __put_user(regs->npc, &sf->regs.npc);
 	err |= __put_user(regs->y, &sf->regs.y);
@@ -425,11 +394,21 @@ static int setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
 	err |= __put_user(0, &sf->extra_size);
 
 	if (psr & PSR_EF) {
-		err |= save_fpu_state(regs, &sf->fpu_state);
-		err |= __put_user(&sf->fpu_state, &sf->fpu_save);
+		__siginfo_fpu_t *fp = tail;
+		tail += sizeof(*fp);
+		err |= save_fpu_state(regs, fp);
+		err |= __put_user(fp, &sf->fpu_save);
 	} else {
 		err |= __put_user(0, &sf->fpu_save);
 	}
+	if (wsaved) {
+		__siginfo_rwin_t *rwp = tail;
+		tail += sizeof(*rwp);
+		err |= save_rwin_state(wsaved, rwp);
+		err |= __put_user(rwp, &sf->rwin_save);
+	} else {
+		err |= __put_user(0, &sf->rwin_save);
+	}
 	err |= __copy_to_user(&sf->mask, &oldset->sig[0], sizeof(sigset_t));
 	
 	/* Setup sigaltstack */
@@ -437,8 +416,15 @@ static int setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
 	err |= __put_user(sas_ss_flags(regs->u_regs[UREG_FP]), &sf->stack.ss_flags);
 	err |= __put_user(current->sas_ss_size, &sf->stack.ss_size);
 	
-	err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
-			      sizeof(struct reg_window32));
+	if (!wsaved) {
+		err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
+				      sizeof(struct reg_window32));
+	} else {
+		struct reg_window32 *rp;
+
+		rp = &current_thread_info()->reg_window[wsaved - 1];
+		err |= __copy_to_user(sf, rp, sizeof(struct reg_window32));
+	}
 
 	err |= copy_siginfo_to_user(&sf->info, info);
 
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index 006fe4515886..47509df3b893 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -34,6 +34,7 @@
 
 #include "entry.h"
 #include "systbls.h"
+#include "sigutil.h"
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
@@ -236,7 +237,7 @@ struct rt_signal_frame {
 	__siginfo_fpu_t __user	*fpu_save;
 	stack_t			stack;
 	sigset_t		mask;
-	__siginfo_fpu_t		fpu_state;
+	__siginfo_rwin_t	*rwin_save;
 };
 
 static long _sigpause_common(old_sigset_t set)
@@ -266,33 +267,12 @@ asmlinkage long sys_sigsuspend(old_sigset_t set)
 	return _sigpause_common(set);
 }
 
-static inline int
-restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-	unsigned long *fpregs = current_thread_info()->fpregs;
-	unsigned long fprs;
-	int err;
-
-	err = __get_user(fprs, &fpu->si_fprs);
-	fprs_write(0);
-	regs->tstate &= ~TSTATE_PEF;
-	if (fprs & FPRS_DL)
-		err |= copy_from_user(fpregs, &fpu->si_float_regs[0],
-		       	       (sizeof(unsigned int) * 32));
-	if (fprs & FPRS_DU)
-		err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32],
-		       	       (sizeof(unsigned int) * 32));
-	err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
-	err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr);
-	current_thread_info()->fpsaved[0] |= fprs;
-	return err;
-}
-
 void do_rt_sigreturn(struct pt_regs *regs)
 {
 	struct rt_signal_frame __user *sf;
 	unsigned long tpc, tnpc, tstate;
 	__siginfo_fpu_t __user *fpu_save;
+	__siginfo_rwin_t __user *rwin_save;
 	sigset_t set;
 	int err;
 
@@ -325,8 +305,8 @@ void do_rt_sigreturn(struct pt_regs *regs)
 	regs->tstate |= (tstate & (TSTATE_ASI | TSTATE_ICC | TSTATE_XCC));
 
 	err |= __get_user(fpu_save, &sf->fpu_save);
-	if (fpu_save)
-		err |= restore_fpu_state(regs, &sf->fpu_state);
+	if (!err && fpu_save)
+		err |= restore_fpu_state(regs, fpu_save);
 
 	err |= __copy_from_user(&set, &sf->mask, sizeof(sigset_t));
 	err |= do_sigaltstack(&sf->stack, NULL, (unsigned long)sf);
@@ -334,6 +314,12 @@ void do_rt_sigreturn(struct pt_regs *regs)
 	if (err)
 		goto segv;
 
+	err |= __get_user(rwin_save, &sf->rwin_save);
+	if (!err && rwin_save) {
+		if (restore_rwin_state(rwin_save))
+			goto segv;
+	}
+
 	regs->tpc = tpc;
 	regs->tnpc = tnpc;
 
@@ -351,34 +337,13 @@ segv:
 }
 
 /* Checks if the fp is valid */
-static int invalid_frame_pointer(void __user *fp, int fplen)
+static int invalid_frame_pointer(void __user *fp)
 {
 	if (((unsigned long) fp) & 15)
 		return 1;
 	return 0;
 }
 
-static inline int
-save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-	unsigned long *fpregs = current_thread_info()->fpregs;
-	unsigned long fprs;
-	int err = 0;
-	
-	fprs = current_thread_info()->fpsaved[0];
-	if (fprs & FPRS_DL)
-		err |= copy_to_user(&fpu->si_float_regs[0], fpregs,
-				    (sizeof(unsigned int) * 32));
-	if (fprs & FPRS_DU)
-		err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16,
-				    (sizeof(unsigned int) * 32));
-	err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
-	err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr);
-	err |= __put_user(fprs, &fpu->si_fprs);
-
-	return err;
-}
-
 static inline void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, unsigned long framesize)
 {
 	unsigned long sp = regs->u_regs[UREG_FP] + STACK_BIAS;
@@ -414,34 +379,48 @@ setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
 	       int signo, sigset_t *oldset, siginfo_t *info)
 {
 	struct rt_signal_frame __user *sf;
-	int sigframe_size, err;
+	int wsaved, err, sf_size;
+	void __user *tail;
 
 	/* 1. Make sure everything is clean */
 	synchronize_user_stack();
 	save_and_clear_fpu();
 	
-	sigframe_size = sizeof(struct rt_signal_frame);
-	if (!(current_thread_info()->fpsaved[0] & FPRS_FEF))
-		sigframe_size -= sizeof(__siginfo_fpu_t);
+	wsaved = get_thread_wsaved();
 
+	sf_size = sizeof(struct rt_signal_frame);
+	if (current_thread_info()->fpsaved[0] & FPRS_FEF)
+		sf_size += sizeof(__siginfo_fpu_t);
+	if (wsaved)
+		sf_size += sizeof(__siginfo_rwin_t);
 	sf = (struct rt_signal_frame __user *)
-		get_sigframe(ka, regs, sigframe_size);
-	
-	if (invalid_frame_pointer (sf, sigframe_size))
-		goto sigill;
+		get_sigframe(ka, regs, sf_size);
 
-	if (get_thread_wsaved() != 0)
+	if (invalid_frame_pointer (sf))
 		goto sigill;
 
+	tail = (sf + 1);
+
 	/* 2. Save the current process state */
 	err = copy_to_user(&sf->regs, regs, sizeof (*regs));
 
 	if (current_thread_info()->fpsaved[0] & FPRS_FEF) {
-		err |= save_fpu_state(regs, &sf->fpu_state);
-		err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save);
+		__siginfo_fpu_t __user *fpu_save = tail;
+		tail += sizeof(__siginfo_fpu_t);
+		err |= save_fpu_state(regs, fpu_save);
+		err |= __put_user((u64)fpu_save, &sf->fpu_save);
 	} else {
 		err |= __put_user(0, &sf->fpu_save);
 	}
+	if (wsaved) {
+		__siginfo_rwin_t __user *rwin_save = tail;
+		tail += sizeof(__siginfo_rwin_t);
+		err |= save_rwin_state(wsaved, rwin_save);
+		err |= __put_user((u64)rwin_save, &sf->rwin_save);
+		set_thread_wsaved(0);
+	} else {
+		err |= __put_user(0, &sf->rwin_save);
+	}
 	
 	/* Setup sigaltstack */
 	err |= __put_user(current->sas_ss_sp, &sf->stack.ss_sp);
@@ -450,10 +429,17 @@ setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
 
 	err |= copy_to_user(&sf->mask, oldset, sizeof(sigset_t));
 
-	err |= copy_in_user((u64 __user *)sf,
-			    (u64 __user *)(regs->u_regs[UREG_FP]+STACK_BIAS),
-			    sizeof(struct reg_window));
+	if (!wsaved) {
+		err |= copy_in_user((u64 __user *)sf,
+				    (u64 __user *)(regs->u_regs[UREG_FP] +
+						   STACK_BIAS),
+				    sizeof(struct reg_window));
+	} else {
+		struct reg_window *rp;
 
+		rp = &current_thread_info()->reg_window[wsaved - 1];
+		err |= copy_to_user(sf, rp, sizeof(struct reg_window));
+	}
 	if (info)
 		err |= copy_siginfo_to_user(&sf->info, info);
 	else {
diff --git a/arch/sparc/kernel/sigutil.h b/arch/sparc/kernel/sigutil.h
new file mode 100644
index 000000000000..d223aa432bb6
--- /dev/null
+++ b/arch/sparc/kernel/sigutil.h
@@ -0,0 +1,9 @@
+#ifndef _SIGUTIL_H
+#define _SIGUTIL_H
+
+int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu);
+int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu);
+int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin);
+int restore_rwin_state(__siginfo_rwin_t __user *rp);
+
+#endif /* _SIGUTIL_H */
diff --git a/arch/sparc/kernel/sigutil_32.c b/arch/sparc/kernel/sigutil_32.c
new file mode 100644
index 000000000000..35c7897b009a
--- /dev/null
+++ b/arch/sparc/kernel/sigutil_32.c
@@ -0,0 +1,120 @@
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/thread_info.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+
+#include <asm/sigcontext.h>
+#include <asm/fpumacro.h>
+#include <asm/ptrace.h>
+
+#include "sigutil.h"
+
+int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+	int err = 0;
+#ifdef CONFIG_SMP
+	if (test_tsk_thread_flag(current, TIF_USEDFPU)) {
+		put_psr(get_psr() | PSR_EF);
+		fpsave(&current->thread.float_regs[0], &current->thread.fsr,
+		       &current->thread.fpqueue[0], &current->thread.fpqdepth);
+		regs->psr &= ~(PSR_EF);
+		clear_tsk_thread_flag(current, TIF_USEDFPU);
+	}
+#else
+	if (current == last_task_used_math) {
+		put_psr(get_psr() | PSR_EF);
+		fpsave(&current->thread.float_regs[0], &current->thread.fsr,
+		       &current->thread.fpqueue[0], &current->thread.fpqdepth);
+		last_task_used_math = NULL;
+		regs->psr &= ~(PSR_EF);
+	}
+#endif
+	err |= __copy_to_user(&fpu->si_float_regs[0],
+			      &current->thread.float_regs[0],
+			      (sizeof(unsigned long) * 32));
+	err |= __put_user(current->thread.fsr, &fpu->si_fsr);
+	err |= __put_user(current->thread.fpqdepth, &fpu->si_fpqdepth);
+	if (current->thread.fpqdepth != 0)
+		err |= __copy_to_user(&fpu->si_fpqueue[0],
+				      &current->thread.fpqueue[0],
+				      ((sizeof(unsigned long) +
+				      (sizeof(unsigned long *)))*16));
+	clear_used_math();
+	return err;
+}
+
+int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+	int err;
+#ifdef CONFIG_SMP
+	if (test_tsk_thread_flag(current, TIF_USEDFPU))
+		regs->psr &= ~PSR_EF;
+#else
+	if (current == last_task_used_math) {
+		last_task_used_math = NULL;
+		regs->psr &= ~PSR_EF;
+	}
+#endif
+	set_used_math();
+	clear_tsk_thread_flag(current, TIF_USEDFPU);
+
+	if (!access_ok(VERIFY_READ, fpu, sizeof(*fpu)))
+		return -EFAULT;
+
+	err = __copy_from_user(&current->thread.float_regs[0], &fpu->si_float_regs[0],
+			       (sizeof(unsigned long) * 32));
+	err |= __get_user(current->thread.fsr, &fpu->si_fsr);
+	err |= __get_user(current->thread.fpqdepth, &fpu->si_fpqdepth);
+	if (current->thread.fpqdepth != 0)
+		err |= __copy_from_user(&current->thread.fpqueue[0],
+					&fpu->si_fpqueue[0],
+					((sizeof(unsigned long) +
+					(sizeof(unsigned long *)))*16));
+	return err;
+}
+
+int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin)
+{
+	int i, err = __put_user(wsaved, &rwin->wsaved);
+
+	for (i = 0; i < wsaved; i++) {
+		struct reg_window32 *rp;
+		unsigned long fp;
+
+		rp = &current_thread_info()->reg_window[i];
+		fp = current_thread_info()->rwbuf_stkptrs[i];
+		err |= copy_to_user(&rwin->reg_window[i], rp,
+				    sizeof(struct reg_window32));
+		err |= __put_user(fp, &rwin->rwbuf_stkptrs[i]);
+	}
+	return err;
+}
+
+int restore_rwin_state(__siginfo_rwin_t __user *rp)
+{
+	struct thread_info *t = current_thread_info();
+	int i, wsaved, err;
+
+	__get_user(wsaved, &rp->wsaved);
+	if (wsaved > NSWINS)
+		return -EFAULT;
+
+	err = 0;
+	for (i = 0; i < wsaved; i++) {
+		err |= copy_from_user(&t->reg_window[i],
+				      &rp->reg_window[i],
+				      sizeof(struct reg_window32));
+		err |= __get_user(t->rwbuf_stkptrs[i],
+				  &rp->rwbuf_stkptrs[i]);
+	}
+	if (err)
+		return err;
+
+	t->w_saved = wsaved;
+	synchronize_user_stack();
+	if (t->w_saved)
+		return -EFAULT;
+	return 0;
+
+}
diff --git a/arch/sparc/kernel/sigutil_64.c b/arch/sparc/kernel/sigutil_64.c
new file mode 100644
index 000000000000..e7dc508c38eb
--- /dev/null
+++ b/arch/sparc/kernel/sigutil_64.c
@@ -0,0 +1,93 @@
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/thread_info.h>
+#include <linux/uaccess.h>
+
+#include <asm/sigcontext.h>
+#include <asm/fpumacro.h>
+#include <asm/ptrace.h>
+
+#include "sigutil.h"
+
+int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+	unsigned long *fpregs = current_thread_info()->fpregs;
+	unsigned long fprs;
+	int err = 0;
+	
+	fprs = current_thread_info()->fpsaved[0];
+	if (fprs & FPRS_DL)
+		err |= copy_to_user(&fpu->si_float_regs[0], fpregs,
+				    (sizeof(unsigned int) * 32));
+	if (fprs & FPRS_DU)
+		err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16,
+				    (sizeof(unsigned int) * 32));
+	err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
+	err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr);
+	err |= __put_user(fprs, &fpu->si_fprs);
+
+	return err;
+}
+
+int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+	unsigned long *fpregs = current_thread_info()->fpregs;
+	unsigned long fprs;
+	int err;
+
+	err = __get_user(fprs, &fpu->si_fprs);
+	fprs_write(0);
+	regs->tstate &= ~TSTATE_PEF;
+	if (fprs & FPRS_DL)
+		err |= copy_from_user(fpregs, &fpu->si_float_regs[0],
+		       	       (sizeof(unsigned int) * 32));
+	if (fprs & FPRS_DU)
+		err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32],
+		       	       (sizeof(unsigned int) * 32));
+	err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
+	err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr);
+	current_thread_info()->fpsaved[0] |= fprs;
+	return err;
+}
+
+int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin)
+{
+	int i, err = __put_user(wsaved, &rwin->wsaved);
+
+	for (i = 0; i < wsaved; i++) {
+		struct reg_window *rp = &current_thread_info()->reg_window[i];
+		unsigned long fp = current_thread_info()->rwbuf_stkptrs[i];
+
+		err |= copy_to_user(&rwin->reg_window[i], rp,
+				    sizeof(struct reg_window));
+		err |= __put_user(fp, &rwin->rwbuf_stkptrs[i]);
+	}
+	return err;
+}
+
+int restore_rwin_state(__siginfo_rwin_t __user *rp)
+{
+	struct thread_info *t = current_thread_info();
+	int i, wsaved, err;
+
+	__get_user(wsaved, &rp->wsaved);
+	if (wsaved > NSWINS)
+		return -EFAULT;
+
+	err = 0;
+	for (i = 0; i < wsaved; i++) {
+		err |= copy_from_user(&t->reg_window[i],
+				      &rp->reg_window[i],
+				      sizeof(struct reg_window));
+		err |= __get_user(t->rwbuf_stkptrs[i],
+				  &rp->rwbuf_stkptrs[i]);
+	}
+	if (err)
+		return err;
+
+	set_thread_wsaved(wsaved);
+	synchronize_user_stack();
+	if (get_thread_wsaved())
+		return -EFAULT;
+	return 0;
+}
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
index 44e5faf1ad5f..d97f3eb72e06 100644
--- a/arch/sparc/kernel/sys32.S
+++ b/arch/sparc/kernel/sys32.S
@@ -81,7 +81,6 @@ SIGN2(sys32_fadvise64, compat_sys_fadvise64, %o0, %o4)
 SIGN2(sys32_fadvise64_64, compat_sys_fadvise64_64, %o0, %o5)
 SIGN2(sys32_bdflush, sys_bdflush, %o0, %o1)
 SIGN1(sys32_mlockall, sys_mlockall, %o0)
-SIGN1(sys32_nfsservctl, compat_sys_nfsservctl, %o0)
 SIGN1(sys32_clock_nanosleep, compat_sys_clock_nanosleep, %o1)
 SIGN1(sys32_timer_settime, compat_sys_timer_settime, %o1)
 SIGN1(sys32_io_submit, compat_sys_io_submit, %o1)
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 6e492d59f6b1..09d8ec454450 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -67,7 +67,7 @@ sys_call_table:
 /*235*/	.long sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys_mlockall
 /*240*/	.long sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler
 /*245*/	.long sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, sys_sched_rr_get_interval, sys_nanosleep
-/*250*/	.long sys_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
+/*250*/	.long sys_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_ni_syscall
 /*255*/	.long sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
 /*260*/	.long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
 /*265*/	.long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index f566518483b5..edbec45d4688 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -68,7 +68,7 @@ sys_call_table32:
 	.word compat_sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys32_mlockall
 /*240*/	.word sys_munlockall, sys32_sched_setparam, sys32_sched_getparam, sys32_sched_setscheduler, sys32_sched_getscheduler
 	.word sys_sched_yield, sys32_sched_get_priority_max, sys32_sched_get_priority_min, sys32_sched_rr_get_interval, compat_sys_nanosleep
-/*250*/	.word sys_mremap, compat_sys_sysctl, sys32_getsid, sys_fdatasync, sys32_nfsservctl
+/*250*/	.word sys_mremap, compat_sys_sysctl, sys32_getsid, sys_fdatasync, sys_nis_syscall
 	.word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep
 /*260*/	.word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun
 	.word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy
@@ -145,7 +145,7 @@ sys_call_table:
 	.word sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys_mlockall
 /*240*/	.word sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler
 	.word sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, sys_sched_rr_get_interval, sys_nanosleep
-/*250*/	.word sys_64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
+/*250*/	.word sys_64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nis_syscall
 	.word sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
 /*260*/	.word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
 	.word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index a0e866d233ee..54edb207ff3a 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -672,7 +672,7 @@ ia32_sys_call_table:
 	.quad sys32_vm86_warning	/* vm86 */ 
 	.quad quiet_ni_syscall	/* query_module */
 	.quad sys_poll
-	.quad compat_sys_nfsservctl
+	.quad quiet_ni_syscall /* old nfsservctl */
 	.quad sys_setresgid16	/* 170 */
 	.quad sys_getresgid16
 	.quad sys_prctl
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index a518c0a45044..c59cc97fe6c1 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -44,7 +44,7 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
 		: "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
 #elif defined(__x86_64__)
 	__asm__ (
-		"mul %[mul_frac] ; shrd $32, %[hi], %[lo]"
+		"mulq %[mul_frac] ; shrd $32, %[hi], %[lo]"
 		: [lo]"=a"(product),
 		  [hi]"=d"(tmp)
 		: "0"(delta),
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index d92641cc7acc..201040573444 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -414,7 +414,7 @@ __SYSCALL(__NR_query_module, sys_ni_syscall)
 __SYSCALL(__NR_quotactl, sys_quotactl)
 
 #define __NR_nfsservctl				180
-__SYSCALL(__NR_nfsservctl, sys_nfsservctl)
+__SYSCALL(__NR_nfsservctl, sys_ni_syscall)
 
 /* reserved for LiS/STREAMS */
 #define __NR_getpmsg				181
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index adc66c3a1fef..34b18594e724 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -207,7 +207,6 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri
 	    ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
 	    APIC_DM_INIT;
 	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
-	mdelay(10);
 
 	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
 	    (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 08119a37e53c..6b96110bb0c3 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -149,7 +149,6 @@ struct set_mtrr_data {
  */
 static int mtrr_rendezvous_handler(void *info)
 {
-#ifdef CONFIG_SMP
 	struct set_mtrr_data *data = info;
 
 	/*
@@ -171,7 +170,6 @@ static int mtrr_rendezvous_handler(void *info)
 	} else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {
 		mtrr_if->set_all();
 	}
-#endif
 	return 0;
 }
 
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 4ee3abf20ed6..cfa62ec090ec 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1900,6 +1900,9 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 
 	perf_callchain_store(entry, regs->ip);
 
+	if (!current->mm)
+		return;
+
 	if (perf_callchain_user32(regs, entry))
 		return;
 
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 5c1a91974918..f3f6f5344001 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -54,6 +54,7 @@
 #include <asm/ftrace.h>
 #include <asm/irq_vectors.h>
 #include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
 
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
 #include <linux/elf-em.h>
@@ -873,12 +874,7 @@ ENTRY(simd_coprocessor_error)
 661:	pushl_cfi $do_general_protection
 662:
 .section .altinstructions,"a"
-	.balign 4
-	.long 661b
-	.long 663f
-	.word X86_FEATURE_XMM
-	.byte 662b-661b
-	.byte 664f-663f
+	altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f
 .previous
 .section .altinstr_replacement,"ax"
 663:	pushl $do_simd_coprocessor_error
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index fbb0a045a1a2..bc19be332bc9 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -168,7 +168,7 @@ ENTRY(sys_call_table)
 	.long ptregs_vm86
 	.long sys_ni_syscall	/* Old sys_query_module */
 	.long sys_poll
-	.long sys_nfsservctl
+	.long sys_ni_syscall	/* Old nfsservctl */
 	.long sys_setresgid16	/* 170 */
 	.long sys_getresgid16
 	.long sys_prctl
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index c95330267f08..039d91315bc5 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -365,8 +365,13 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
 	 */
 	if (bus) {
 		struct pci_bus *child;
-		list_for_each_entry(child, &bus->children, node)
-			pcie_bus_configure_settings(child, child->self->pcie_mpss);
+		list_for_each_entry(child, &bus->children, node) {
+			struct pci_dev *self = child->self;
+			if (!self)
+				continue;
+
+			pcie_bus_configure_settings(child, self->pcie_mpss);
+		}
 	}
 
 	if (!bus)
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index 7000e74b3087..58425adc22c6 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -689,7 +689,9 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
 			irq_attr.trigger = 1;
 			irq_attr.polarity = 1;
 			io_apic_set_pci_routing(NULL, pentry->irq, &irq_attr);
-		}
+		} else
+			pentry->irq = 0; /* No irq */
+
 		switch (pentry->type) {
 		case SFI_DEV_TYPE_IPC:
 			/* ID as IRQ is a hack that will go away */
diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c
index 8b9940e78e2f..7cce722667b8 100644
--- a/arch/x86/platform/olpc/olpc.c
+++ b/arch/x86/platform/olpc/olpc.c
@@ -161,13 +161,13 @@ restart:
 	if (inbuf && inlen) {
 		/* write data to EC */
 		for (i = 0; i < inlen; i++) {
+			pr_devel("olpc-ec:  sending cmd arg 0x%x\n", inbuf[i]);
+			outb(inbuf[i], 0x68);
 			if (wait_on_ibf(0x6c, 0)) {
 				printk(KERN_ERR "olpc-ec:  timeout waiting for"
 						" EC accept data!\n");
 				goto err;
 			}
-			pr_devel("olpc-ec:  sending cmd arg 0x%x\n", inbuf[i]);
-			outb(inbuf[i], 0x68);
 		}
 	}
 	if (outbuf && outlen) {
diff --git a/arch/x86/vdso/vdso32/sysenter.S b/arch/x86/vdso/vdso32/sysenter.S
index e2800affa754..e354bceee0e0 100644
--- a/arch/x86/vdso/vdso32/sysenter.S
+++ b/arch/x86/vdso/vdso32/sysenter.S
@@ -43,7 +43,7 @@ __kernel_vsyscall:
 	.space 7,0x90
 
 	/* 14: System call restart point is here! (SYSENTER_RETURN-2) */
-	jmp .Lenter_kernel
+	int $0x80
 	/* 16: System call normal return point is here! */
 VDSO32_SYSENTER_RETURN:	/* Symbol used by sysenter.c via vdso32-syms.h */
 	pop %ebp
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index df118a825f39..c3b8d440873c 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -184,6 +184,19 @@ static unsigned long __init xen_set_identity(const struct e820entry *list,
 					PFN_UP(start_pci), PFN_DOWN(last));
 	return identity;
 }
+
+static unsigned long __init xen_get_max_pages(void)
+{
+	unsigned long max_pages = MAX_DOMAIN_PAGES;
+	domid_t domid = DOMID_SELF;
+	int ret;
+
+	ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid);
+	if (ret > 0)
+		max_pages = ret;
+	return min(max_pages, MAX_DOMAIN_PAGES);
+}
+
 /**
  * machine_specific_memory_setup - Hook for machine specific memory setup.
  **/
@@ -292,6 +305,12 @@ char * __init xen_memory_setup(void)
 
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 
+	extra_limit = xen_get_max_pages();
+	if (extra_limit >= max_pfn)
+		extra_pages = extra_limit - max_pfn;
+	else
+		extra_pages = 0;
+
 	extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820);
 
 	/*
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index e79dbb95482b..d4fc6d454f8d 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -32,6 +32,7 @@
 #include <xen/page.h>
 #include <xen/events.h>
 
+#include <xen/hvc-console.h>
 #include "xen-ops.h"
 #include "mmu.h"
 
@@ -207,6 +208,15 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 	unsigned cpu;
 	unsigned int i;
 
+	if (skip_ioapic_setup) {
+		char *m = (max_cpus == 0) ?
+			"The nosmp parameter is incompatible with Xen; " \
+			"use Xen dom0_max_vcpus=1 parameter" :
+			"The noapic parameter is incompatible with Xen";
+
+		xen_raw_printk(m);
+		panic(m);
+	}
 	xen_init_lock_cpu(0);
 
 	smp_store_cpu_info(0);
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index 22a2093b5862..b040b0e518ca 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -113,11 +113,13 @@ xen_iret_start_crit:
 
 	/*
 	 * If there's something pending, mask events again so we can
-	 * jump back into xen_hypervisor_callback
+	 * jump back into xen_hypervisor_callback. Otherwise do not
+	 * touch XEN_vcpu_info_mask.
 	 */
-	sete XEN_vcpu_info_mask(%eax)
+	jne 1f
+	movb $1, XEN_vcpu_info_mask(%eax)
 
-	popl %eax
+1:	popl %eax
 
 	/*
 	 * From this point on the registers are restored and the stack
diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h
index a6f934f37f1a..798ee6d285a1 100644
--- a/arch/xtensa/include/asm/unistd.h
+++ b/arch/xtensa/include/asm/unistd.h
@@ -455,7 +455,7 @@ __SYSCALL(203, sys_reboot, 3)
 #define __NR_quotactl 				204
 __SYSCALL(204, sys_quotactl, 4)
 #define __NR_nfsservctl 			205
-__SYSCALL(205, sys_nfsservctl, 3)
+__SYSCALL(205, sys_ni_syscall, 0)
 #define __NR__sysctl 				206
 __SYSCALL(206, sys_sysctl, 1)
 #define __NR_bdflush 				207