summary refs log tree commit diff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig6
-rw-r--r--arch/arm/Kconfig5
-rw-r--r--arch/arm/boot/dts/Makefile4
-rw-r--r--arch/arm/boot/dts/am335x-evmsk.dts11
-rw-r--r--arch/arm/boot/dts/armada-xp-mv78260.dtsi3
-rw-r--r--arch/arm/boot/dts/at91-sama5d3_xplained.dts229
-rw-r--r--arch/arm/boot/dts/at91sam9263.dtsi2
-rw-r--r--arch/arm/boot/dts/at91sam9n12ek.dts4
-rw-r--r--arch/arm/boot/dts/dove.dtsi11
-rw-r--r--arch/arm/boot/dts/omap3-gta04.dts6
-rw-r--r--arch/arm/boot/dts/omap3-n9.dts2
-rw-r--r--arch/arm/boot/dts/omap3-n900.dts4
-rw-r--r--arch/arm/boot/dts/omap3-n950.dts2
-rw-r--r--arch/arm/boot/dts/omap3-overo-storm-tobi.dts22
-rw-r--r--arch/arm/boot/dts/omap3-overo-tobi-common.dtsi (renamed from arch/arm/boot/dts/omap3-tobi.dts)3
-rw-r--r--arch/arm/boot/dts/omap3-overo-tobi.dts22
-rw-r--r--arch/arm/boot/dts/omap3-overo.dtsi3
-rw-r--r--arch/arm/boot/dts/sama5d3.dtsi2
-rw-r--r--arch/arm/boot/dts/ste-href.dtsi1
-rw-r--r--arch/arm/boot/dts/sun4i-a10.dtsi4
-rw-r--r--arch/arm/boot/dts/sun5i-a10s.dtsi4
-rw-r--r--arch/arm/boot/dts/sun7i-a20.dtsi4
-rw-r--r--arch/arm/boot/dts/tegra114.dtsi4
-rw-r--r--arch/arm/boot/dts/tegra20.dtsi4
-rw-r--r--arch/arm/boot/dts/tegra30-cardhu.dtsi2
-rw-r--r--arch/arm/boot/dts/tegra30.dtsi4
-rw-r--r--arch/arm/boot/dts/testcases/tests-interrupts.dtsi58
-rw-r--r--arch/arm/boot/dts/testcases/tests-phandle.dtsi39
-rw-r--r--arch/arm/boot/dts/testcases/tests.dtsi2
-rw-r--r--arch/arm/boot/dts/versatile-pb.dts4
-rw-r--r--arch/arm/common/Makefile1
-rw-r--r--arch/arm/common/scoop.c2
-rw-r--r--arch/arm/configs/multi_v7_defconfig1
-rw-r--r--arch/arm/include/asm/assembler.h50
-rw-r--r--arch/arm/include/asm/atomic.h44
-rw-r--r--arch/arm/include/asm/cmpxchg.h6
-rw-r--r--arch/arm/include/asm/cputype.h20
-rw-r--r--arch/arm/include/asm/floppy.h2
-rw-r--r--arch/arm/include/asm/futex.h9
-rw-r--r--arch/arm/include/asm/hw_breakpoint.h1
-rw-r--r--arch/arm/include/asm/hwcap.h3
-rw-r--r--arch/arm/include/asm/jump_label.h1
-rw-r--r--arch/arm/include/asm/kprobes.h17
-rw-r--r--arch/arm/include/asm/memory.h41
-rw-r--r--arch/arm/include/asm/pgtable-2level.h1
-rw-r--r--arch/arm/include/asm/pgtable.h7
-rw-r--r--arch/arm/include/asm/pmu.h2
-rw-r--r--arch/arm/include/asm/probes.h43
-rw-r--r--arch/arm/include/asm/ptrace.h14
-rw-r--r--arch/arm/include/asm/sync_bitops.h1
-rw-r--r--arch/arm/include/asm/system.h7
-rw-r--r--arch/arm/include/asm/thread_info.h5
-rw-r--r--arch/arm/include/asm/uaccess.h2
-rw-r--r--arch/arm/include/asm/uprobes.h45
-rw-r--r--arch/arm/include/uapi/asm/hwcap.h9
-rw-r--r--arch/arm/kernel/Makefile7
-rw-r--r--arch/arm/kernel/armksyms.c2
-rw-r--r--arch/arm/kernel/bios32.c37
-rw-r--r--arch/arm/kernel/entry-header.S11
-rw-r--r--arch/arm/kernel/head.S17
-rw-r--r--arch/arm/kernel/hw_breakpoint.c3
-rw-r--r--arch/arm/kernel/kprobes-arm.c806
-rw-r--r--arch/arm/kernel/kprobes-common.c469
-rw-r--r--arch/arm/kernel/kprobes-test-arm.c1
-rw-r--r--arch/arm/kernel/kprobes-test.c12
-rw-r--r--arch/arm/kernel/kprobes-thumb.c1145
-rw-r--r--arch/arm/kernel/kprobes.c25
-rw-r--r--arch/arm/kernel/kprobes.h400
-rw-r--r--arch/arm/kernel/perf_event.c27
-rw-r--r--arch/arm/kernel/perf_event_cpu.c113
-rw-r--r--arch/arm/kernel/perf_event_v7.c717
-rw-r--r--arch/arm/kernel/pj4-cp0.c4
-rw-r--r--arch/arm/kernel/probes-arm.c734
-rw-r--r--arch/arm/kernel/probes-arm.h73
-rw-r--r--arch/arm/kernel/probes-thumb.c882
-rw-r--r--arch/arm/kernel/probes-thumb.h97
-rw-r--r--arch/arm/kernel/probes.c455
-rw-r--r--arch/arm/kernel/probes.h407
-rw-r--r--arch/arm/kernel/process.c10
-rw-r--r--arch/arm/kernel/setup.c16
-rw-r--r--arch/arm/kernel/signal.c4
-rw-r--r--arch/arm/kernel/traps.c1
-rw-r--r--arch/arm/kernel/unwind.c137
-rw-r--r--arch/arm/kernel/uprobes-arm.c234
-rw-r--r--arch/arm/kernel/uprobes.c210
-rw-r--r--arch/arm/kernel/uprobes.h35
-rw-r--r--arch/arm/kvm/arm.c3
-rw-r--r--arch/arm/kvm/interrupts.S11
-rw-r--r--arch/arm/lib/bitops.h5
-rw-r--r--arch/arm/lib/copy_template.S36
-rw-r--r--arch/arm/lib/csumpartialcopygeneric.S96
-rw-r--r--arch/arm/lib/io-readsl.S12
-rw-r--r--arch/arm/lib/io-writesl.S12
-rw-r--r--arch/arm/lib/memmove.S36
-rw-r--r--arch/arm/lib/uaccess.S192
-rw-r--r--arch/arm/mach-cns3xxx/core.c2
-rw-r--r--arch/arm/mach-ebsa110/core.c2
-rw-r--r--arch/arm/mach-footbridge/dc21285-timer.c6
-rw-r--r--arch/arm/mach-footbridge/dc21285.c10
-rw-r--r--arch/arm/mach-footbridge/isa-timer.c2
-rw-r--r--arch/arm/mach-gemini/idle.c2
-rw-r--r--arch/arm/mach-hisi/Kconfig2
-rw-r--r--arch/arm/mach-imx/Makefile2
-rw-r--r--arch/arm/mach-imx/clk-imx6q.c3
-rw-r--r--arch/arm/mach-imx/clk-imx6sl.c3
-rw-r--r--arch/arm/mach-imx/common.h4
-rw-r--r--arch/arm/mach-imx/pm-imx6q.c2
-rw-r--r--arch/arm/mach-integrator/integrator_ap.c2
-rw-r--r--arch/arm/mach-ixp4xx/common.c2
-rw-r--r--arch/arm/mach-ixp4xx/dsmg600-setup.c3
-rw-r--r--arch/arm/mach-ixp4xx/fsg-setup.c6
-rw-r--r--arch/arm/mach-ixp4xx/nas100d-setup.c3
-rw-r--r--arch/arm/mach-ixp4xx/nslu2-setup.c6
-rw-r--r--arch/arm/mach-ks8695/time.c2
-rw-r--r--arch/arm/mach-lpc32xx/timer.c2
-rw-r--r--arch/arm/mach-mmp/time.c2
-rw-r--r--arch/arm/mach-moxart/Kconfig1
-rw-r--r--arch/arm/mach-netx/time.c2
-rw-r--r--arch/arm/mach-omap1/board-nokia770.c1
-rw-r--r--arch/arm/mach-omap1/dma.c191
-rw-r--r--arch/arm/mach-omap2/Kconfig10
-rw-r--r--arch/arm/mach-omap2/am35xx-emac.c1
-rw-r--r--arch/arm/mach-omap2/dma.c183
-rw-r--r--arch/arm/mach-omap2/gpmc.c4
-rw-r--r--arch/arm/mach-omap2/io.c9
-rw-r--r--arch/arm/mach-pxa/am300epd.c1
-rw-r--r--arch/arm/mach-pxa/include/mach/balloon3.h2
-rw-r--r--arch/arm/mach-pxa/include/mach/corgi.h1
-rw-r--r--arch/arm/mach-pxa/include/mach/csb726.h2
-rw-r--r--arch/arm/mach-pxa/include/mach/gumstix.h1
-rw-r--r--arch/arm/mach-pxa/include/mach/idp.h1
-rw-r--r--arch/arm/mach-pxa/include/mach/palmld.h2
-rw-r--r--arch/arm/mach-pxa/include/mach/palmt5.h2
-rw-r--r--arch/arm/mach-pxa/include/mach/palmtc.h2
-rw-r--r--arch/arm/mach-pxa/include/mach/palmtx.h2
-rw-r--r--arch/arm/mach-pxa/include/mach/pcm027.h2
-rw-r--r--arch/arm/mach-pxa/include/mach/pcm990_baseboard.h1
-rw-r--r--arch/arm/mach-pxa/include/mach/poodle.h2
-rw-r--r--arch/arm/mach-pxa/include/mach/spitz.h2
-rw-r--r--arch/arm/mach-pxa/include/mach/tosa.h2
-rw-r--r--arch/arm/mach-pxa/include/mach/trizeps4.h2
-rw-r--r--arch/arm/mach-pxa/mioa701.c9
-rw-r--r--arch/arm/mach-rpc/dma.c2
-rw-r--r--arch/arm/mach-rpc/time.c1
-rw-r--r--arch/arm/mach-sa1100/collie.c33
-rw-r--r--arch/arm/mach-sa1100/h3100.c7
-rw-r--r--arch/arm/mach-sa1100/h3600.c7
-rw-r--r--arch/arm/mach-sa1100/h3xxx.c58
-rw-r--r--arch/arm/mach-sa1100/include/mach/collie.h2
-rw-r--r--arch/arm/mach-sa1100/include/mach/h3xxx.h11
-rw-r--r--arch/arm/mach-sa1100/time.c2
-rw-r--r--arch/arm/mach-shmobile/Kconfig2
-rw-r--r--arch/arm/mach-spear/time.c2
-rw-r--r--arch/arm/mach-tegra/pm.c1
-rw-r--r--arch/arm/mach-tegra/tegra.c10
-rw-r--r--arch/arm/mach-u300/timer.c2
-rw-r--r--arch/arm/mach-vexpress/Makefile3
-rw-r--r--arch/arm/mach-w90x900/time.c2
-rw-r--r--arch/arm/mach-zynq/common.c14
-rw-r--r--arch/arm/mm/Kconfig3
-rw-r--r--arch/arm/mm/cache-feroceon-l2.c4
-rw-r--r--arch/arm/mm/dma-mapping.c5
-rw-r--r--arch/arm/mm/mmu.c10
-rw-r--r--arch/arm/mm/proc-macros.S19
-rw-r--r--arch/arm/mm/proc-v7-2level.S7
-rw-r--r--arch/arm/mm/proc-v7.S11
-rw-r--r--arch/arm/plat-iop/time.c2
-rw-r--r--arch/arm/plat-omap/dma.c17
-rw-r--r--arch/arm/vfp/entry.S25
-rw-r--r--arch/arm/vfp/vfphw.S19
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/configs/defconfig18
-rw-r--r--arch/arm64/include/asm/atomic.h53
-rw-r--r--arch/arm64/include/asm/barrier.h2
-rw-r--r--arch/arm64/include/asm/cacheflush.h1
-rw-r--r--arch/arm64/include/asm/cmpxchg.h17
-rw-r--r--arch/arm64/include/asm/esr.h2
-rw-r--r--arch/arm64/include/asm/futex.h10
-rw-r--r--arch/arm64/include/asm/kvm_arm.h2
-rw-r--r--arch/arm64/include/asm/percpu.h8
-rw-r--r--arch/arm64/include/asm/pgtable.h10
-rw-r--r--arch/arm64/include/asm/spinlock.h10
-rw-r--r--arch/arm64/include/asm/unistd32.h5
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h9
-rw-r--r--arch/arm64/kernel/kuser32.S6
-rw-r--r--arch/arm64/kernel/stacktrace.c6
-rw-r--r--arch/arm64/kernel/vdso.c4
-rw-r--r--arch/arm64/kernel/vdso/Makefile2
-rw-r--r--arch/arm64/kernel/vdso/gettimeofday.S7
-rw-r--r--arch/arm64/kvm/hyp.S27
-rw-r--r--arch/arm64/lib/bitops.S3
-rw-r--r--arch/arm64/mm/dma-mapping.c1
-rw-r--r--arch/arm64/mm/mmu.c12
-rw-r--r--arch/arm64/mm/pgd.c11
-rw-r--r--arch/avr32/Makefile2
-rw-r--r--arch/avr32/boards/mimc200/fram.c1
-rw-r--r--arch/avr32/include/asm/Kbuild1
-rw-r--r--arch/avr32/include/asm/io.h2
-rw-r--r--arch/ia64/include/asm/unistd.h2
-rw-r--r--arch/ia64/include/uapi/asm/unistd.h2
-rw-r--r--arch/ia64/kernel/entry.S2
-rw-r--r--arch/m68k/include/asm/Kbuild6
-rw-r--r--arch/m68k/include/asm/barrier.h8
-rw-r--r--arch/m68k/include/asm/unistd.h2
-rw-r--r--arch/m68k/include/uapi/asm/unistd.h2
-rw-r--r--arch/m68k/kernel/syscalltable.S2
-rw-r--r--arch/microblaze/include/asm/delay.h2
-rw-r--r--arch/microblaze/include/asm/io.h6
-rw-r--r--arch/microblaze/kernel/head.S2
-rw-r--r--arch/mips/alchemy/devboards/db1000.c7
-rw-r--r--arch/mips/include/asm/fpu.h2
-rw-r--r--arch/mips/include/uapi/asm/unistd.h18
-rw-r--r--arch/mips/kernel/scall32-o32.S2
-rw-r--r--arch/mips/kernel/scall64-64.S2
-rw-r--r--arch/mips/kernel/scall64-n32.S2
-rw-r--r--arch/mips/kernel/scall64-o32.S2
-rw-r--r--arch/parisc/hpux/fs.c15
-rw-r--r--arch/powerpc/include/asm/compat.h5
-rw-r--r--arch/powerpc/include/asm/dma-mapping.h1
-rw-r--r--arch/powerpc/include/asm/eeh.h21
-rw-r--r--arch/powerpc/include/asm/hugetlb.h2
-rw-r--r--arch/powerpc/include/asm/iommu.h1
-rw-r--r--arch/powerpc/include/asm/opal.h4
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc64.h26
-rw-r--r--arch/powerpc/include/asm/pgtable.h22
-rw-r--r--arch/powerpc/include/asm/ptrace.h16
-rw-r--r--arch/powerpc/include/asm/sections.h12
-rw-r--r--arch/powerpc/include/asm/vdso.h6
-rw-r--r--arch/powerpc/kernel/crash_dump.c8
-rw-r--r--arch/powerpc/kernel/dma.c10
-rw-r--r--arch/powerpc/kernel/eeh.c32
-rw-r--r--arch/powerpc/kernel/eeh_driver.c8
-rw-r--r--arch/powerpc/kernel/ftrace.c1
-rw-r--r--arch/powerpc/kernel/iommu.c12
-rw-r--r--arch/powerpc/kernel/irq.c5
-rw-r--r--arch/powerpc/kernel/machine_kexec.c14
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c6
-rw-r--r--arch/powerpc/kernel/misc_32.S5
-rw-r--r--arch/powerpc/kernel/reloc_64.S4
-rw-r--r--arch/powerpc/kernel/setup_32.c5
-rw-r--r--arch/powerpc/kernel/signal_64.c4
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32_wrapper.S2
-rw-r--r--arch/powerpc/kernel/vdso64/vdso64_wrapper.S2
-rw-r--r--arch/powerpc/mm/hash_utils_64.c14
-rw-r--r--arch/powerpc/mm/pgtable_64.c12
-rw-r--r--arch/powerpc/mm/subpage-prot.c2
-rw-r--r--arch/powerpc/perf/core-book3s.c5
-rw-r--r--arch/powerpc/perf/power8-pmu.c144
-rw-r--r--arch/powerpc/platforms/powernv/eeh-ioda.c128
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-xscom.c21
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c84
-rw-r--r--arch/powerpc/platforms/powernv/pci.c230
-rw-r--r--arch/powerpc/platforms/powernv/pci.h6
-rw-r--r--arch/powerpc/platforms/powernv/powernv.h8
-rw-r--r--arch/powerpc/platforms/powernv/setup.c9
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig1
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pseries.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c22
-rw-r--r--arch/powerpc/platforms/pseries/pci.c22
-rw-r--r--arch/powerpc/platforms/pseries/setup.c3
-rw-r--r--arch/powerpc/sysdev/mpic.c38
-rw-r--r--arch/powerpc/xmon/xmon.c24
-rw-r--r--arch/s390/appldata/appldata_base.c1
-rw-r--r--arch/s390/crypto/aes_s390.c65
-rw-r--r--arch/s390/crypto/des_s390.c95
-rw-r--r--arch/s390/kernel/compat_wrapper.S2
-rw-r--r--arch/s390/kernel/head64.S7
-rw-r--r--arch/s390/mm/page-states.c10
-rw-r--r--arch/s390/pci/pci_dma.c8
-rw-r--r--arch/sparc/Kconfig2
-rw-r--r--arch/sparc/mm/srmmu.c2
-rw-r--r--arch/x86/Kconfig7
-rw-r--r--arch/x86/Kconfig.debug1
-rw-r--r--arch/x86/boot/compressed/aslr.c9
-rw-r--r--arch/x86/include/asm/amd_nb.h2
-rw-r--r--arch/x86/include/asm/efi.h2
-rw-r--r--arch/x86/include/asm/pgtable.h14
-rw-r--r--arch/x86/include/asm/tlbflush.h6
-rw-r--r--arch/x86/include/asm/tsc.h2
-rw-r--r--arch/x86/include/asm/xen/page.h5
-rw-r--r--arch/x86/kernel/amd_nb.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c5
-rw-r--r--arch/x86/kernel/cpu/common.c7
-rw-r--r--arch/x86/kernel/cpu/intel.c10
-rw-r--r--arch/x86/kernel/cpu/microcode/amd_early.c43
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event.h1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c10
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c48
-rw-r--r--arch/x86/kernel/ftrace.c83
-rw-r--r--arch/x86/kernel/irq.c9
-rw-r--r--arch/x86/kernel/machine_kexec_64.c2
-rw-r--r--arch/x86/kernel/pci-dma.c4
-rw-r--r--arch/x86/kernel/quirks.c37
-rw-r--r--arch/x86/kernel/tsc.c11
-rw-r--r--arch/x86/kernel/tsc_msr.c30
-rw-r--r--arch/x86/kvm/mmu.c1
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--arch/x86/mm/fault.c14
-rw-r--r--arch/x86/mm/numa.c21
-rw-r--r--arch/x86/mm/numa_32.c2
-rw-r--r--arch/x86/mm/srat.c16
-rw-r--r--arch/x86/mm/tlb.c52
-rw-r--r--arch/x86/platform/efi/efi-bgrt.c12
-rw-r--r--arch/x86/platform/efi/efi.c5
-rw-r--r--arch/x86/platform/efi/efi_32.c6
-rw-r--r--arch/x86/platform/efi/efi_64.c9
-rw-r--r--arch/x86/xen/enlighten.c12
-rw-r--r--arch/x86/xen/mmu.c4
-rw-r--r--arch/x86/xen/p2m.c17
-rw-r--r--arch/xtensa/Kconfig3
-rw-r--r--arch/xtensa/boot/dts/xtfpga.dtsi12
-rw-r--r--arch/xtensa/include/asm/io.h2
-rw-r--r--arch/xtensa/include/asm/traps.h44
-rw-r--r--arch/xtensa/include/asm/vectors.h2
-rw-r--r--arch/xtensa/include/uapi/asm/unistd.h7
-rw-r--r--arch/xtensa/kernel/entry.S449
-rw-r--r--arch/xtensa/kernel/setup.c2
-rw-r--r--arch/xtensa/kernel/time.c1
-rw-r--r--arch/xtensa/kernel/vectors.S2
-rw-r--r--arch/xtensa/kernel/xtensa_ksyms.c2
-rw-r--r--arch/xtensa/mm/init.c13
-rw-r--r--arch/xtensa/mm/mmu.c2
-rw-r--r--arch/xtensa/platforms/xtfpga/setup.c7
-rw-r--r--arch/xtensa/variants/fsf/include/variant/tie.h9
329 files changed, 7300 insertions, 4416 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 80bbb8ccd0d1..97ff872c7acc 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -86,9 +86,7 @@ config KPROBES_ON_FTRACE
 	 optimize on top of function tracing.
 
 config UPROBES
-	bool "Transparent user-space probes (EXPERIMENTAL)"
-	depends on UPROBE_EVENT && PERF_EVENTS
-	default n
+	def_bool n
 	select PERCPU_RWSEM
 	help
 	  Uprobes is the user-space counterpart to kprobes: they
@@ -101,8 +99,6 @@ config UPROBES
 	    managed by the kernel and kept transparent to the probed
 	    application. )
 
-	  If in doubt, say "N".
-
 config HAVE_64BIT_ALIGNED_ACCESS
 	def_bool 64BIT && !HAVE_EFFICIENT_UNALIGNED_ACCESS
 	help
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 2a232ce56d9c..19369c87b85a 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -207,6 +207,9 @@ config ZONE_DMA
 config NEED_DMA_MAP_STATE
        def_bool y
 
+config ARCH_SUPPORTS_UPROBES
+	def_bool y
+
 config ARCH_HAS_DMA_SET_COHERENT_MASK
 	bool
 
@@ -2272,7 +2275,7 @@ source "kernel/power/Kconfig"
 config ARCH_SUSPEND_POSSIBLE
 	depends on !ARCH_S5PC100
 	depends on CPU_ARM920T || CPU_ARM926T || CPU_FEROCEON || CPU_SA1100 || \
-		CPU_V6 || CPU_V6K || CPU_V7 || CPU_XSC3 || CPU_XSCALE || CPU_MOHAWK
+		CPU_V6 || CPU_V6K || CPU_V7 || CPU_V7M || CPU_XSC3 || CPU_XSCALE || CPU_MOHAWK
 	def_bool y
 
 config ARM_CPU_SUSPEND
diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index b9d6a8b485e0..032030361bef 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -38,6 +38,7 @@ dtb-$(CONFIG_ARCH_AT91) += at91sam9g35ek.dtb
 dtb-$(CONFIG_ARCH_AT91) += at91sam9x25ek.dtb
 dtb-$(CONFIG_ARCH_AT91) += at91sam9x35ek.dtb
 # sama5d3
+dtb-$(CONFIG_ARCH_AT91)	+= at91-sama5d3_xplained.dtb
 dtb-$(CONFIG_ARCH_AT91)	+= sama5d31ek.dtb
 dtb-$(CONFIG_ARCH_AT91)	+= sama5d33ek.dtb
 dtb-$(CONFIG_ARCH_AT91)	+= sama5d34ek.dtb
@@ -208,7 +209,8 @@ dtb-$(CONFIG_ARCH_OMAP2PLUS) += omap2420-h4.dtb \
 	omap3-n900.dtb \
 	omap3-n9.dtb \
 	omap3-n950.dtb \
-	omap3-tobi.dtb \
+	omap3-overo-tobi.dtb \
+	omap3-overo-storm-tobi.dtb \
 	omap3-gta04.dtb \
 	omap3-igep0020.dtb \
 	omap3-igep0030.dtb \
diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts
index 4718ec4a4dbf..486880b74831 100644
--- a/arch/arm/boot/dts/am335x-evmsk.dts
+++ b/arch/arm/boot/dts/am335x-evmsk.dts
@@ -121,7 +121,7 @@
 		ti,model = "AM335x-EVMSK";
 		ti,audio-codec = <&tlv320aic3106>;
 		ti,mcasp-controller = <&mcasp1>;
-		ti,codec-clock-rate = <24576000>;
+		ti,codec-clock-rate = <24000000>;
 		ti,audio-routing =
 			"Headphone Jack",       "HPLOUT",
 			"Headphone Jack",       "HPROUT";
@@ -256,6 +256,12 @@
 		>;
 	};
 
+	mmc1_pins: pinmux_mmc1_pins {
+		pinctrl-single,pins = <
+			0x160 (PIN_INPUT | MUX_MODE7) /* spi0_cs1.gpio0_6 */
+		>;
+	};
+
 	mcasp1_pins: mcasp1_pins {
 		pinctrl-single,pins = <
 			0x10c (PIN_INPUT_PULLDOWN | MUX_MODE4) /* mii1_crs.mcasp1_aclkx */
@@ -456,6 +462,9 @@
 	status = "okay";
 	vmmc-supply = <&vmmc_reg>;
 	bus-width = <4>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&mmc1_pins>;
+	cd-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>;
 };
 
 &sham {
diff --git a/arch/arm/boot/dts/armada-xp-mv78260.dtsi b/arch/arm/boot/dts/armada-xp-mv78260.dtsi
index 66609684d41b..9480cf891f8c 100644
--- a/arch/arm/boot/dts/armada-xp-mv78260.dtsi
+++ b/arch/arm/boot/dts/armada-xp-mv78260.dtsi
@@ -23,6 +23,7 @@
 		gpio0 = &gpio0;
 		gpio1 = &gpio1;
 		gpio2 = &gpio2;
+		eth3 = &eth3;
 	};
 
 	cpus {
@@ -291,7 +292,7 @@
 				interrupts = <91>;
 			};
 
-			ethernet@34000 {
+			eth3: ethernet@34000 {
 				compatible = "marvell,armada-370-neta";
 				reg = <0x34000 0x4000>;
 				interrupts = <14>;
diff --git a/arch/arm/boot/dts/at91-sama5d3_xplained.dts b/arch/arm/boot/dts/at91-sama5d3_xplained.dts
new file mode 100644
index 000000000000..ce1375595e5f
--- /dev/null
+++ b/arch/arm/boot/dts/at91-sama5d3_xplained.dts
@@ -0,0 +1,229 @@
+/*
+ * at91-sama5d3_xplained.dts - Device Tree file for the SAMA5D3 Xplained board
+ *
+ *  Copyright (C) 2014 Atmel,
+ *		  2014 Nicolas Ferre <nicolas.ferre@atmel.com>
+ *
+ * Licensed under GPLv2 or later.
+ */
+/dts-v1/;
+#include "sama5d36.dtsi"
+
+/ {
+	model = "SAMA5D3 Xplained";
+	compatible = "atmel,sama5d3-xplained", "atmel,sama5d3", "atmel,sama5";
+
+	chosen {
+		bootargs = "console=ttyS0,115200";
+	};
+
+	memory {
+		reg = <0x20000000 0x10000000>;
+	};
+
+	ahb {
+		apb {
+			mmc0: mmc@f0000000 {
+				pinctrl-0 = <&pinctrl_mmc0_clk_cmd_dat0 &pinctrl_mmc0_dat1_3 &pinctrl_mmc0_dat4_7 &pinctrl_mmc0_cd>;
+				status = "okay";
+				slot@0 {
+					reg = <0>;
+					bus-width = <8>;
+					cd-gpios = <&pioE 0 GPIO_ACTIVE_LOW>;
+				};
+			};
+
+			spi0: spi@f0004000 {
+				cs-gpios = <&pioD 13 0>;
+				status = "okay";
+			};
+
+			can0: can@f000c000 {
+				status = "okay";
+			};
+
+			i2c0: i2c@f0014000 {
+				status = "okay";
+			};
+
+			i2c1: i2c@f0018000 {
+				status = "okay";
+			};
+
+			macb0: ethernet@f0028000 {
+				phy-mode = "rgmii";
+				status = "okay";
+			};
+
+			usart0: serial@f001c000 {
+				status = "okay";
+			};
+
+			usart1: serial@f0020000 {
+				pinctrl-0 = <&pinctrl_usart1 &pinctrl_usart1_rts_cts>;
+				status = "okay";
+			};
+
+			uart0: serial@f0024000 {
+				status = "okay";
+			};
+
+			mmc1: mmc@f8000000 {
+				pinctrl-0 = <&pinctrl_mmc1_clk_cmd_dat0 &pinctrl_mmc1_dat1_3 &pinctrl_mmc1_cd>;
+				status = "okay";
+				slot@0 {
+					reg = <0>;
+					bus-width = <4>;
+					cd-gpios = <&pioE 1 GPIO_ACTIVE_HIGH>;
+				};
+			};
+
+			spi1: spi@f8008000 {
+				cs-gpios = <&pioC 25 0>, <0>, <0>, <&pioD 16 0>;
+				status = "okay";
+			};
+
+			adc0: adc@f8018000 {
+				pinctrl-0 = <
+					&pinctrl_adc0_adtrg
+					&pinctrl_adc0_ad0
+					&pinctrl_adc0_ad1
+					&pinctrl_adc0_ad2
+					&pinctrl_adc0_ad3
+					&pinctrl_adc0_ad4
+					&pinctrl_adc0_ad5
+					&pinctrl_adc0_ad6
+					&pinctrl_adc0_ad7
+					&pinctrl_adc0_ad8
+					&pinctrl_adc0_ad9
+					>;
+				status = "okay";
+			};
+
+			i2c2: i2c@f801c000 {
+				dmas = <0>, <0>;	/* Do not use DMA for i2c2 */
+				status = "okay";
+			};
+
+			macb1: ethernet@f802c000 {
+				phy-mode = "rmii";
+				status = "okay";
+			};
+
+			dbgu: serial@ffffee00 {
+				status = "okay";
+			};
+
+			pinctrl@fffff200 {
+				board {
+					pinctrl_mmc0_cd: mmc0_cd {
+						atmel,pins =
+							<AT91_PIOE 0 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
+					};
+
+					pinctrl_mmc1_cd: mmc1_cd {
+						atmel,pins =
+							<AT91_PIOE 1 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
+					};
+
+					pinctrl_usba_vbus: usba_vbus {
+						atmel,pins =
+							<AT91_PIOE 9 AT91_PERIPH_GPIO AT91_PINCTRL_DEGLITCH>;	/* PE9, conflicts with A9 */
+					};
+				};
+			};
+
+			pmc: pmc@fffffc00 {
+				main: mainck {
+					clock-frequency = <12000000>;
+				};
+			};
+		};
+
+		nand0: nand@60000000 {
+			nand-bus-width = <8>;
+			nand-ecc-mode = "hw";
+			atmel,has-pmecc;
+			atmel,pmecc-cap = <4>;
+			atmel,pmecc-sector-size = <512>;
+			nand-on-flash-bbt;
+			status = "okay";
+
+			at91bootstrap@0 {
+				label = "at91bootstrap";
+				reg = <0x0 0x40000>;
+			};
+
+			bootloader@40000 {
+				label = "bootloader";
+				reg = <0x40000 0x80000>;
+			};
+
+			bootloaderenv@c0000 {
+				label = "bootloader env";
+				reg = <0xc0000 0xc0000>;
+			};
+
+			dtb@180000 {
+				label = "device tree";
+				reg = <0x180000 0x80000>;
+			};
+
+			kernel@200000 {
+				label = "kernel";
+				reg = <0x200000 0x600000>;
+			};
+
+			rootfs@800000 {
+				label = "rootfs";
+				reg = <0x800000 0x0f800000>;
+			};
+		};
+
+		usb0: gadget@00500000 {
+			atmel,vbus-gpio = <&pioE 9 GPIO_ACTIVE_HIGH>;	/* PE9, conflicts with A9 */
+			pinctrl-names = "default";
+			pinctrl-0 = <&pinctrl_usba_vbus>;
+			status = "okay";
+		};
+
+		usb1: ohci@00600000 {
+			num-ports = <3>;
+			atmel,vbus-gpio = <0
+					   &pioE 3 GPIO_ACTIVE_LOW
+					   &pioE 4 GPIO_ACTIVE_LOW
+					  >;
+			status = "okay";
+		};
+
+		usb2: ehci@00700000 {
+			status = "okay";
+		};
+	};
+
+	gpio_keys {
+		compatible = "gpio-keys";
+
+		bp3 {
+			label = "PB_USER";
+			gpios = <&pioE 29 GPIO_ACTIVE_LOW>;
+			linux,code = <0x104>;
+			gpio-key,wakeup;
+		};
+	};
+
+	leds {
+		compatible = "gpio-leds";
+
+		d2 {
+			label = "d2";
+			gpios = <&pioE 23 GPIO_ACTIVE_LOW>;	/* PE23, conflicts with A23, CTS2 */
+			linux,default-trigger = "heartbeat";
+		};
+
+		d3 {
+			label = "d3";
+			gpios = <&pioE 24 GPIO_ACTIVE_HIGH>;
+		};
+	};
+};
diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi
index 0042f73068b0..fece8665fb63 100644
--- a/arch/arm/boot/dts/at91sam9263.dtsi
+++ b/arch/arm/boot/dts/at91sam9263.dtsi
@@ -523,7 +523,7 @@
 			};
 
 			i2c0: i2c@fff88000 {
-				compatible = "atmel,at91sam9263-i2c";
+				compatible = "atmel,at91sam9260-i2c";
 				reg = <0xfff88000 0x100>;
 				interrupts = <13 IRQ_TYPE_LEVEL_HIGH 6>;
 				#address-cells = <1>;
diff --git a/arch/arm/boot/dts/at91sam9n12ek.dts b/arch/arm/boot/dts/at91sam9n12ek.dts
index e9487f6f0166..924a6a6ffd0f 100644
--- a/arch/arm/boot/dts/at91sam9n12ek.dts
+++ b/arch/arm/boot/dts/at91sam9n12ek.dts
@@ -124,6 +124,10 @@
 			nand-on-flash-bbt;
 			status = "okay";
 		};
+
+		usb0: ohci@00500000 {
+			status = "okay";
+		};
 	};
 
 	leds {
diff --git a/arch/arm/boot/dts/dove.dtsi b/arch/arm/boot/dts/dove.dtsi
index 2b76524f4aa7..187fd46b7b5e 100644
--- a/arch/arm/boot/dts/dove.dtsi
+++ b/arch/arm/boot/dts/dove.dtsi
@@ -379,15 +379,6 @@
 				#clock-cells = <1>;
 			};
 
-			pmu_intc: pmu-interrupt-ctrl@d0050 {
-				compatible = "marvell,dove-pmu-intc";
-				interrupt-controller;
-				#interrupt-cells = <1>;
-				reg = <0xd0050 0x8>;
-				interrupts = <33>;
-				marvell,#interrupts = <7>;
-			};
-
 			pinctrl: pin-ctrl@d0200 {
 				compatible = "marvell,dove-pinctrl";
 				reg = <0xd0200 0x10>;
@@ -610,8 +601,6 @@
 			rtc: real-time-clock@d8500 {
 				compatible = "marvell,orion-rtc";
 				reg = <0xd8500 0x20>;
-				interrupt-parent = <&pmu_intc>;
-				interrupts = <5>;
 			};
 
 			gpio2: gpio-ctrl@e8400 {
diff --git a/arch/arm/boot/dts/omap3-gta04.dts b/arch/arm/boot/dts/omap3-gta04.dts
index b9b55c95a566..c551e4af4d83 100644
--- a/arch/arm/boot/dts/omap3-gta04.dts
+++ b/arch/arm/boot/dts/omap3-gta04.dts
@@ -32,7 +32,7 @@
 		aux-button {
 			label = "aux";
 			linux,code = <169>;
-			gpios = <&gpio1 7 GPIO_ACTIVE_LOW>;
+			gpios = <&gpio1 7 GPIO_ACTIVE_HIGH>;
 			gpio-key,wakeup;
 		};
 	};
@@ -92,6 +92,8 @@
 	bmp085@77 {
 		compatible = "bosch,bmp085";
 		reg = <0x77>;
+		interrupt-parent = <&gpio4>;
+		interrupts = <17 IRQ_TYPE_EDGE_RISING>;
 	};
 
 	/* leds */
@@ -141,8 +143,8 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&mmc1_pins>;
 	vmmc-supply = <&vmmc1>;
-	vmmc_aux-supply = <&vsim>;
 	bus-width = <4>;
+	ti,non-removable;
 };
 
 &mmc2 {
diff --git a/arch/arm/boot/dts/omap3-n9.dts b/arch/arm/boot/dts/omap3-n9.dts
index 39828ce464ee..9938b5dc1909 100644
--- a/arch/arm/boot/dts/omap3-n9.dts
+++ b/arch/arm/boot/dts/omap3-n9.dts
@@ -14,5 +14,5 @@
 
 / {
 	model = "Nokia N9";
-	compatible = "nokia,omap3-n9", "ti,omap3";
+	compatible = "nokia,omap3-n9", "ti,omap36xx", "ti,omap3";
 };
diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts
index 6fc85f963530..0bf40c90faba 100644
--- a/arch/arm/boot/dts/omap3-n900.dts
+++ b/arch/arm/boot/dts/omap3-n900.dts
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2013 Pavel Machek <pavel@ucw.cz>
- * Copyright 2013 Aaro Koskinen <aaro.koskinen@iki.fi>
+ * Copyright (C) 2013-2014 Aaro Koskinen <aaro.koskinen@iki.fi>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 (or later) as
@@ -13,7 +13,7 @@
 
 / {
 	model = "Nokia N900";
-	compatible = "nokia,omap3-n900", "ti,omap3";
+	compatible = "nokia,omap3-n900", "ti,omap3430", "ti,omap3";
 
 	cpus {
 		cpu@0 {
diff --git a/arch/arm/boot/dts/omap3-n950.dts b/arch/arm/boot/dts/omap3-n950.dts
index b076a526b999..261c5589bfa3 100644
--- a/arch/arm/boot/dts/omap3-n950.dts
+++ b/arch/arm/boot/dts/omap3-n950.dts
@@ -14,5 +14,5 @@
 
 / {
 	model = "Nokia N950";
-	compatible = "nokia,omap3-n950", "ti,omap3";
+	compatible = "nokia,omap3-n950", "ti,omap36xx", "ti,omap3";
 };
diff --git a/arch/arm/boot/dts/omap3-overo-storm-tobi.dts b/arch/arm/boot/dts/omap3-overo-storm-tobi.dts
new file mode 100644
index 000000000000..966b5c9cd96a
--- /dev/null
+++ b/arch/arm/boot/dts/omap3-overo-storm-tobi.dts
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2012 Florian Vaussard, EPFL Mobots group
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Tobi expansion board is manufactured by Gumstix Inc.
+ */
+
+/dts-v1/;
+
+#include "omap36xx.dtsi"
+#include "omap3-overo-tobi-common.dtsi"
+
+/ {
+	model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Tobi";
+	compatible = "gumstix,omap3-overo-tobi", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+};
+
diff --git a/arch/arm/boot/dts/omap3-tobi.dts b/arch/arm/boot/dts/omap3-overo-tobi-common.dtsi
index 7e4ad2aec37a..4edc013a91c1 100644
--- a/arch/arm/boot/dts/omap3-tobi.dts
+++ b/arch/arm/boot/dts/omap3-overo-tobi-common.dtsi
@@ -13,9 +13,6 @@
 #include "omap3-overo.dtsi"
 
 / {
-	model = "TI OMAP3 Gumstix Overo on Tobi";
-	compatible = "ti,omap3-tobi", "ti,omap3-overo", "ti,omap3";
-
 	leds {
 		compatible = "gpio-leds";
 		heartbeat {
diff --git a/arch/arm/boot/dts/omap3-overo-tobi.dts b/arch/arm/boot/dts/omap3-overo-tobi.dts
new file mode 100644
index 000000000000..de5653e1b5ca
--- /dev/null
+++ b/arch/arm/boot/dts/omap3-overo-tobi.dts
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2012 Florian Vaussard, EPFL Mobots group
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Tobi expansion board is manufactured by Gumstix Inc.
+ */
+
+/dts-v1/;
+
+#include "omap34xx.dtsi"
+#include "omap3-overo-tobi-common.dtsi"
+
+/ {
+	model = "OMAP35xx Gumstix Overo on Tobi";
+	compatible = "gumstix,omap3-overo-tobi", "gumstix,omap3-overo", "ti,omap3430", "ti,omap3";
+};
+
diff --git a/arch/arm/boot/dts/omap3-overo.dtsi b/arch/arm/boot/dts/omap3-overo.dtsi
index a461d2fd1fb0..597099907f8e 100644
--- a/arch/arm/boot/dts/omap3-overo.dtsi
+++ b/arch/arm/boot/dts/omap3-overo.dtsi
@@ -9,9 +9,6 @@
 /*
  * The Gumstix Overo must be combined with an expansion board.
  */
-/dts-v1/;
-
-#include "omap34xx.dtsi"
 
 / {
 	pwmleds {
diff --git a/arch/arm/boot/dts/sama5d3.dtsi b/arch/arm/boot/dts/sama5d3.dtsi
index 52447c17537a..3d5faf85f51b 100644
--- a/arch/arm/boot/dts/sama5d3.dtsi
+++ b/arch/arm/boot/dts/sama5d3.dtsi
@@ -1228,7 +1228,7 @@
 			compatible = "atmel,at91rm9200-ohci", "usb-ohci";
 			reg = <0x00600000 0x100000>;
 			interrupts = <32 IRQ_TYPE_LEVEL_HIGH 2>;
-			clocks = <&usb>, <&uhphs_clk>, <&udphs_clk>,
+			clocks = <&usb>, <&uhphs_clk>, <&uhphs_clk>,
 				 <&uhpck>;
 			clock-names = "usb_clk", "ohci_clk", "hclk", "uhpck";
 			status = "disabled";
diff --git a/arch/arm/boot/dts/ste-href.dtsi b/arch/arm/boot/dts/ste-href.dtsi
index 0c1e8d871ed1..6cb9b68e2188 100644
--- a/arch/arm/boot/dts/ste-href.dtsi
+++ b/arch/arm/boot/dts/ste-href.dtsi
@@ -188,7 +188,6 @@
 		msp2: msp@80117000 {
 			pinctrl-names = "default";
 			pinctrl-0 = <&msp2_default_mode>;
-			status = "okay";
 		};
 
 		msp3: msp@80125000 {
diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
index 040bb0eba152..10666ca8aee1 100644
--- a/arch/arm/boot/dts/sun4i-a10.dtsi
+++ b/arch/arm/boot/dts/sun4i-a10.dtsi
@@ -315,7 +315,7 @@
 		ranges;
 
 		emac: ethernet@01c0b000 {
-			compatible = "allwinner,sun4i-emac";
+			compatible = "allwinner,sun4i-a10-emac";
 			reg = <0x01c0b000 0x1000>;
 			interrupts = <55>;
 			clocks = <&ahb_gates 17>;
@@ -323,7 +323,7 @@
 		};
 
 		mdio@01c0b080 {
-			compatible = "allwinner,sun4i-mdio";
+			compatible = "allwinner,sun4i-a10-mdio";
 			reg = <0x01c0b080 0x14>;
 			status = "disabled";
 			#address-cells = <1>;
diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi
index ea16054857a4..64961595e8d6 100644
--- a/arch/arm/boot/dts/sun5i-a10s.dtsi
+++ b/arch/arm/boot/dts/sun5i-a10s.dtsi
@@ -278,7 +278,7 @@
 		ranges;
 
 		emac: ethernet@01c0b000 {
-			compatible = "allwinner,sun4i-emac";
+			compatible = "allwinner,sun4i-a10-emac";
 			reg = <0x01c0b000 0x1000>;
 			interrupts = <55>;
 			clocks = <&ahb_gates 17>;
@@ -286,7 +286,7 @@
 		};
 
 		mdio@01c0b080 {
-			compatible = "allwinner,sun4i-mdio";
+			compatible = "allwinner,sun4i-a10-mdio";
 			reg = <0x01c0b080 0x14>;
 			status = "disabled";
 			#address-cells = <1>;
diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi
index 119f066f0d98..9ff09484847b 100644
--- a/arch/arm/boot/dts/sun7i-a20.dtsi
+++ b/arch/arm/boot/dts/sun7i-a20.dtsi
@@ -340,7 +340,7 @@
 		ranges;
 
 		emac: ethernet@01c0b000 {
-			compatible = "allwinner,sun4i-emac";
+			compatible = "allwinner,sun4i-a10-emac";
 			reg = <0x01c0b000 0x1000>;
 			interrupts = <0 55 4>;
 			clocks = <&ahb_gates 17>;
@@ -348,7 +348,7 @@
 		};
 
 		mdio@01c0b080 {
-			compatible = "allwinner,sun4i-mdio";
+			compatible = "allwinner,sun4i-a10-mdio";
 			reg = <0x01c0b080 0x14>;
 			status = "disabled";
 			#address-cells = <1>;
diff --git a/arch/arm/boot/dts/tegra114.dtsi b/arch/arm/boot/dts/tegra114.dtsi
index 389e987ec281..44ec401ec366 100644
--- a/arch/arm/boot/dts/tegra114.dtsi
+++ b/arch/arm/boot/dts/tegra114.dtsi
@@ -57,6 +57,8 @@
 			resets = <&tegra_car 27>;
 			reset-names = "dc";
 
+			nvidia,head = <0>;
+
 			rgb {
 				status = "disabled";
 			};
@@ -72,6 +74,8 @@
 			resets = <&tegra_car 26>;
 			reset-names = "dc";
 
+			nvidia,head = <1>;
+
 			rgb {
 				status = "disabled";
 			};
diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
index 480ecda3416b..48d2a7f4d0c0 100644
--- a/arch/arm/boot/dts/tegra20.dtsi
+++ b/arch/arm/boot/dts/tegra20.dtsi
@@ -94,6 +94,8 @@
 			resets = <&tegra_car 27>;
 			reset-names = "dc";
 
+			nvidia,head = <0>;
+
 			rgb {
 				status = "disabled";
 			};
@@ -109,6 +111,8 @@
 			resets = <&tegra_car 26>;
 			reset-names = "dc";
 
+			nvidia,head = <1>;
+
 			rgb {
 				status = "disabled";
 			};
diff --git a/arch/arm/boot/dts/tegra30-cardhu.dtsi b/arch/arm/boot/dts/tegra30-cardhu.dtsi
index 9104224124ee..1e156d9d0506 100644
--- a/arch/arm/boot/dts/tegra30-cardhu.dtsi
+++ b/arch/arm/boot/dts/tegra30-cardhu.dtsi
@@ -28,7 +28,7 @@
 	compatible = "nvidia,cardhu", "nvidia,tegra30";
 
 	aliases {
-		rtc0 = "/i2c@7000d000/tps6586x@34";
+		rtc0 = "/i2c@7000d000/tps65911@2d";
 		rtc1 = "/rtc@7000e000";
 	};
 
diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi
index ed8e7700b46d..19a84e933f4e 100644
--- a/arch/arm/boot/dts/tegra30.dtsi
+++ b/arch/arm/boot/dts/tegra30.dtsi
@@ -170,6 +170,8 @@
 			resets = <&tegra_car 27>;
 			reset-names = "dc";
 
+			nvidia,head = <0>;
+
 			rgb {
 				status = "disabled";
 			};
@@ -185,6 +187,8 @@
 			resets = <&tegra_car 26>;
 			reset-names = "dc";
 
+			nvidia,head = <1>;
+
 			rgb {
 				status = "disabled";
 			};
diff --git a/arch/arm/boot/dts/testcases/tests-interrupts.dtsi b/arch/arm/boot/dts/testcases/tests-interrupts.dtsi
deleted file mode 100644
index c843720bd3e5..000000000000
--- a/arch/arm/boot/dts/testcases/tests-interrupts.dtsi
+++ /dev/null
@@ -1,58 +0,0 @@
-
-/ {
-	testcase-data {
-		interrupts {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			test_intc0: intc0 {
-				interrupt-controller;
-				#interrupt-cells = <1>;
-			};
-
-			test_intc1: intc1 {
-				interrupt-controller;
-				#interrupt-cells = <3>;
-			};
-
-			test_intc2: intc2 {
-				interrupt-controller;
-				#interrupt-cells = <2>;
-			};
-
-			test_intmap0: intmap0 {
-				#interrupt-cells = <1>;
-				#address-cells = <0>;
-				interrupt-map = <1 &test_intc0 9>,
-						<2 &test_intc1 10 11 12>,
-						<3 &test_intc2 13 14>,
-						<4 &test_intc2 15 16>;
-			};
-
-			test_intmap1: intmap1 {
-				#interrupt-cells = <2>;
-				interrupt-map = <0x5000 1 2 &test_intc0 15>;
-			};
-
-			interrupts0 {
-				interrupt-parent = <&test_intc0>;
-				interrupts = <1>, <2>, <3>, <4>;
-			};
-
-			interrupts1 {
-				interrupt-parent = <&test_intmap0>;
-				interrupts = <1>, <2>, <3>, <4>;
-			};
-
-			interrupts-extended0 {
-				reg = <0x5000 0x100>;
-				interrupts-extended = <&test_intc0 1>,
-						      <&test_intc1 2 3 4>,
-						      <&test_intc2 5 6>,
-						      <&test_intmap0 1>,
-						      <&test_intmap0 2>,
-						      <&test_intmap0 3>,
-						      <&test_intmap1 1 2>;
-			};
-		};
-	};
-};
diff --git a/arch/arm/boot/dts/testcases/tests-phandle.dtsi b/arch/arm/boot/dts/testcases/tests-phandle.dtsi
deleted file mode 100644
index 0007d3cd7dc2..000000000000
--- a/arch/arm/boot/dts/testcases/tests-phandle.dtsi
+++ /dev/null
@@ -1,39 +0,0 @@
-
-/ {
-	testcase-data {
-		phandle-tests {
-			provider0: provider0 {
-				#phandle-cells = <0>;
-			};
-
-			provider1: provider1 {
-				#phandle-cells = <1>;
-			};
-
-			provider2: provider2 {
-				#phandle-cells = <2>;
-			};
-
-			provider3: provider3 {
-				#phandle-cells = <3>;
-			};
-
-			consumer-a {
-				phandle-list =	<&provider1 1>,
-						<&provider2 2 0>,
-						<0>,
-						<&provider3 4 4 3>,
-						<&provider2 5 100>,
-						<&provider0>,
-						<&provider1 7>;
-				phandle-list-names = "first", "second", "third";
-
-				phandle-list-bad-phandle = <12345678 0 0>;
-				phandle-list-bad-args = <&provider2 1 0>,
-							<&provider3 0>;
-				empty-property;
-				unterminated-string = [40 41 42 43];
-			};
-		};
-	};
-};
diff --git a/arch/arm/boot/dts/testcases/tests.dtsi b/arch/arm/boot/dts/testcases/tests.dtsi
deleted file mode 100644
index 3f123ecc9dd7..000000000000
--- a/arch/arm/boot/dts/testcases/tests.dtsi
+++ /dev/null
@@ -1,2 +0,0 @@
-/include/ "tests-phandle.dtsi"
-/include/ "tests-interrupts.dtsi"
diff --git a/arch/arm/boot/dts/versatile-pb.dts b/arch/arm/boot/dts/versatile-pb.dts
index f43907c40c93..65f657711323 100644
--- a/arch/arm/boot/dts/versatile-pb.dts
+++ b/arch/arm/boot/dts/versatile-pb.dts
@@ -1,4 +1,4 @@
-/include/ "versatile-ab.dts"
+#include <versatile-ab.dts>
 
 / {
 	model = "ARM Versatile PB";
@@ -47,4 +47,4 @@
 	};
 };
 
-/include/ "testcases/tests.dtsi"
+#include <testcases.dtsi>
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index 4bdc41622c36..70b1eff477b3 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_SHARP_SCOOP)	+= scoop.o
 obj-$(CONFIG_PCI_HOST_ITE8152)  += it8152.o
 obj-$(CONFIG_ARM_TIMER_SP804)	+= timer-sp.o
 obj-$(CONFIG_MCPM)		+= mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o
+CFLAGS_REMOVE_mcpm_entry.o	= -pg
 AFLAGS_mcpm_head.o		:= -march=armv7-a
 AFLAGS_vlock.o			:= -march=armv7-a
 obj-$(CONFIG_TI_PRIV_EDMA)	+= edma.o
diff --git a/arch/arm/common/scoop.c b/arch/arm/common/scoop.c
index a5c3dc38aa18..6ef146edd0cd 100644
--- a/arch/arm/common/scoop.c
+++ b/arch/arm/common/scoop.c
@@ -232,8 +232,6 @@ static int scoop_probe(struct platform_device *pdev)
 
 	return 0;
 
-	if (devptr->gpio.base != -1)
-		temp = gpiochip_remove(&devptr->gpio);
 err_gpio:
 	platform_set_drvdata(pdev, NULL);
 err_ioremap:
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 845bc745706b..ee6982976d66 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -29,6 +29,7 @@ CONFIG_ARCH_OMAP3=y
 CONFIG_ARCH_OMAP4=y
 CONFIG_SOC_OMAP5=y
 CONFIG_SOC_AM33XX=y
+CONFIG_SOC_DRA7XX=y
 CONFIG_SOC_AM43XX=y
 CONFIG_ARCH_ROCKCHIP=y
 CONFIG_ARCH_SOCFPGA=y
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 5c2285160575..b974184f9941 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -23,6 +23,7 @@
 #include <asm/ptrace.h>
 #include <asm/domain.h>
 #include <asm/opcodes-virt.h>
+#include <asm/asm-offsets.h>
 
 #define IOMEM(x)	(x)
 
@@ -30,8 +31,8 @@
  * Endian independent macros for shifting bytes within registers.
  */
 #ifndef __ARMEB__
-#define pull            lsr
-#define push            lsl
+#define lspull          lsr
+#define lspush          lsl
 #define get_byte_0      lsl #0
 #define get_byte_1	lsr #8
 #define get_byte_2	lsr #16
@@ -41,8 +42,8 @@
 #define put_byte_2	lsl #16
 #define put_byte_3	lsl #24
 #else
-#define pull            lsl
-#define push            lsr
+#define lspull          lsl
+#define lspush          lsr
 #define get_byte_0	lsr #24
 #define get_byte_1	lsr #16
 #define get_byte_2	lsr #8
@@ -174,6 +175,47 @@
 	restore_irqs_notrace \oldcpsr
 	.endm
 
+/*
+ * Get current thread_info.
+ */
+	.macro	get_thread_info, rd
+ ARM(	mov	\rd, sp, lsr #13	)
+ THUMB(	mov	\rd, sp			)
+ THUMB(	lsr	\rd, \rd, #13		)
+	mov	\rd, \rd, lsl #13
+	.endm
+
+/*
+ * Increment/decrement the preempt count.
+ */
+#ifdef CONFIG_PREEMPT_COUNT
+	.macro	inc_preempt_count, ti, tmp
+	ldr	\tmp, [\ti, #TI_PREEMPT]	@ get preempt count
+	add	\tmp, \tmp, #1			@ increment it
+	str	\tmp, [\ti, #TI_PREEMPT]
+	.endm
+
+	.macro	dec_preempt_count, ti, tmp
+	ldr	\tmp, [\ti, #TI_PREEMPT]	@ get preempt count
+	sub	\tmp, \tmp, #1			@ decrement it
+	str	\tmp, [\ti, #TI_PREEMPT]
+	.endm
+
+	.macro	dec_preempt_count_ti, ti, tmp
+	get_thread_info \ti
+	dec_preempt_count \ti, \tmp
+	.endm
+#else
+	.macro	inc_preempt_count, ti, tmp
+	.endm
+
+	.macro	dec_preempt_count, ti, tmp
+	.endm
+
+	.macro	dec_preempt_count_ti, ti, tmp
+	.endm
+#endif
+
 #define USER(x...)				\
 9999:	x;					\
 	.pushsection __ex_table,"a";		\
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 62d2cb53b069..9a92fd7864a8 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -60,6 +60,7 @@ static inline int atomic_add_return(int i, atomic_t *v)
 	int result;
 
 	smp_mb();
+	prefetchw(&v->counter);
 
 	__asm__ __volatile__("@ atomic_add_return\n"
 "1:	ldrex	%0, [%3]\n"
@@ -99,6 +100,7 @@ static inline int atomic_sub_return(int i, atomic_t *v)
 	int result;
 
 	smp_mb();
+	prefetchw(&v->counter);
 
 	__asm__ __volatile__("@ atomic_sub_return\n"
 "1:	ldrex	%0, [%3]\n"
@@ -121,6 +123,7 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
 	unsigned long res;
 
 	smp_mb();
+	prefetchw(&ptr->counter);
 
 	do {
 		__asm__ __volatile__("@ atomic_cmpxchg\n"
@@ -138,6 +141,33 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
 	return oldval;
 }
 
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int oldval, newval;
+	unsigned long tmp;
+
+	smp_mb();
+	prefetchw(&v->counter);
+
+	__asm__ __volatile__ ("@ atomic_add_unless\n"
+"1:	ldrex	%0, [%4]\n"
+"	teq	%0, %5\n"
+"	beq	2f\n"
+"	add	%1, %0, %6\n"
+"	strex	%2, %1, [%4]\n"
+"	teq	%2, #0\n"
+"	bne	1b\n"
+"2:"
+	: "=&r" (oldval), "=&r" (newval), "=&r" (tmp), "+Qo" (v->counter)
+	: "r" (&v->counter), "r" (u), "r" (a)
+	: "cc");
+
+	if (oldval != u)
+		smp_mb();
+
+	return oldval;
+}
+
 #else /* ARM_ARCH_6 */
 
 #ifdef CONFIG_SMP
@@ -186,10 +216,6 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
-#endif /* __LINUX_ARM_ARCH__ */
-
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
-
 static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
@@ -200,6 +226,10 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 	return c;
 }
 
+#endif /* __LINUX_ARM_ARCH__ */
+
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
 #define atomic_inc(v)		atomic_add(1, v)
 #define atomic_dec(v)		atomic_sub(1, v)
 
@@ -299,6 +329,7 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v)
 	unsigned long tmp;
 
 	smp_mb();
+	prefetchw(&v->counter);
 
 	__asm__ __volatile__("@ atomic64_add_return\n"
 "1:	ldrexd	%0, %H0, [%3]\n"
@@ -340,6 +371,7 @@ static inline long long atomic64_sub_return(long long i, atomic64_t *v)
 	unsigned long tmp;
 
 	smp_mb();
+	prefetchw(&v->counter);
 
 	__asm__ __volatile__("@ atomic64_sub_return\n"
 "1:	ldrexd	%0, %H0, [%3]\n"
@@ -364,6 +396,7 @@ static inline long long atomic64_cmpxchg(atomic64_t *ptr, long long old,
 	unsigned long res;
 
 	smp_mb();
+	prefetchw(&ptr->counter);
 
 	do {
 		__asm__ __volatile__("@ atomic64_cmpxchg\n"
@@ -388,6 +421,7 @@ static inline long long atomic64_xchg(atomic64_t *ptr, long long new)
 	unsigned long tmp;
 
 	smp_mb();
+	prefetchw(&ptr->counter);
 
 	__asm__ __volatile__("@ atomic64_xchg\n"
 "1:	ldrexd	%0, %H0, [%3]\n"
@@ -409,6 +443,7 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
 	unsigned long tmp;
 
 	smp_mb();
+	prefetchw(&v->counter);
 
 	__asm__ __volatile__("@ atomic64_dec_if_positive\n"
 "1:	ldrexd	%0, %H0, [%3]\n"
@@ -436,6 +471,7 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
 	int ret = 1;
 
 	smp_mb();
+	prefetchw(&v->counter);
 
 	__asm__ __volatile__("@ atomic64_add_unless\n"
 "1:	ldrexd	%0, %H0, [%4]\n"
diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h
index df2fbba7efc8..abb2c3769b01 100644
--- a/arch/arm/include/asm/cmpxchg.h
+++ b/arch/arm/include/asm/cmpxchg.h
@@ -2,6 +2,7 @@
 #define __ASM_ARM_CMPXCHG_H
 
 #include <linux/irqflags.h>
+#include <linux/prefetch.h>
 #include <asm/barrier.h>
 
 #if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110)
@@ -35,6 +36,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 #endif
 
 	smp_mb();
+	prefetchw((const void *)ptr);
 
 	switch (size) {
 #if __LINUX_ARM_ARCH__ >= 6
@@ -138,6 +140,8 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 {
 	unsigned long oldval, res;
 
+	prefetchw((const void *)ptr);
+
 	switch (size) {
 #ifndef CONFIG_CPU_V6	/* min ARCH >= ARMv6K */
 	case 1:
@@ -230,6 +234,8 @@ static inline unsigned long long __cmpxchg64(unsigned long long *ptr,
 	unsigned long long oldval;
 	unsigned long res;
 
+	prefetchw(ptr);
+
 	__asm__ __volatile__(
 "1:	ldrexd		%1, %H1, [%3]\n"
 "	teq		%1, %4\n"
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index acdde76b39bb..c651e3b26ec7 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -71,6 +71,7 @@
 #define ARM_CPU_PART_CORTEX_A5		0xC050
 #define ARM_CPU_PART_CORTEX_A15		0xC0F0
 #define ARM_CPU_PART_CORTEX_A7		0xC070
+#define ARM_CPU_PART_CORTEX_A12		0xC0D0
 
 #define ARM_CPU_XSCALE_ARCH_MASK	0xe000
 #define ARM_CPU_XSCALE_ARCH_V1		0x2000
@@ -220,4 +221,23 @@ static inline int cpu_is_xsc3(void)
 #define	cpu_is_xscale()	1
 #endif
 
+/*
+ * Marvell's PJ4 core is based on V7 version. It has some modification
+ * for coprocessor setting. For this reason, we need a way to distinguish
+ * it.
+ */
+#ifndef CONFIG_CPU_PJ4
+#define cpu_is_pj4()	0
+#else
+static inline int cpu_is_pj4(void)
+{
+	unsigned int id;
+
+	id = read_cpuid_id();
+	if ((id & 0xfffffff0) == 0x562f5840)
+		return 1;
+
+	return 0;
+}
+#endif
 #endif
diff --git a/arch/arm/include/asm/floppy.h b/arch/arm/include/asm/floppy.h
index c9f03eccc9d8..f4882553fbb0 100644
--- a/arch/arm/include/asm/floppy.h
+++ b/arch/arm/include/asm/floppy.h
@@ -25,7 +25,7 @@
 
 #define fd_inb(port)		inb((port))
 #define fd_request_irq()	request_irq(IRQ_FLOPPYDISK,floppy_interrupt,\
-					    IRQF_DISABLED,"floppy",NULL)
+					    0,"floppy",NULL)
 #define fd_free_irq()		free_irq(IRQ_FLOPPYDISK,NULL)
 #define fd_disable_irq()	disable_irq(IRQ_FLOPPYDISK)
 #define fd_enable_irq()		enable_irq(IRQ_FLOPPYDISK)
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index e42cf597f6e6..53e69dae796f 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -3,11 +3,6 @@
 
 #ifdef __KERNEL__
 
-#if defined(CONFIG_CPU_USE_DOMAINS) && defined(CONFIG_SMP)
-/* ARM doesn't provide unprivileged exclusive memory accessors */
-#include <asm-generic/futex.h>
-#else
-
 #include <linux/futex.h>
 #include <linux/uaccess.h>
 #include <asm/errno.h>
@@ -28,6 +23,7 @@
 
 #define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg)	\
 	smp_mb();						\
+	prefetchw(uaddr);					\
 	__asm__ __volatile__(					\
 	"1:	ldrex	%1, [%3]\n"				\
 	"	" insn "\n"					\
@@ -51,6 +47,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 		return -EFAULT;
 
 	smp_mb();
+	/* Prefetching cannot fault */
+	prefetchw(uaddr);
 	__asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
 	"1:	ldrex	%1, [%4]\n"
 	"	teq	%1, %2\n"
@@ -164,6 +162,5 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
 	return ret;
 }
 
-#endif /* !(CPU_USE_DOMAINS && SMP) */
 #endif /* __KERNEL__ */
 #endif /* _ASM_ARM_FUTEX_H */
diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h
index eef55ea9ef00..8e427c7b4425 100644
--- a/arch/arm/include/asm/hw_breakpoint.h
+++ b/arch/arm/include/asm/hw_breakpoint.h
@@ -51,6 +51,7 @@ static inline void decode_ctrl_reg(u32 reg,
 #define ARM_DEBUG_ARCH_V7_ECP14	3
 #define ARM_DEBUG_ARCH_V7_MM	4
 #define ARM_DEBUG_ARCH_V7_1	5
+#define ARM_DEBUG_ARCH_V8	6
 
 /* Breakpoint */
 #define ARM_BREAKPOINT_EXECUTE	0
diff --git a/arch/arm/include/asm/hwcap.h b/arch/arm/include/asm/hwcap.h
index 6ff56eca3f1f..6e183fd269fb 100644
--- a/arch/arm/include/asm/hwcap.h
+++ b/arch/arm/include/asm/hwcap.h
@@ -9,6 +9,7 @@
  * instruction set this cpu supports.
  */
 #define ELF_HWCAP	(elf_hwcap)
-extern unsigned int elf_hwcap;
+#define ELF_HWCAP2	(elf_hwcap2)
+extern unsigned int elf_hwcap, elf_hwcap2;
 #endif
 #endif
diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h
index 863c892b4aaa..70f9b9bfb1f9 100644
--- a/arch/arm/include/asm/jump_label.h
+++ b/arch/arm/include/asm/jump_label.h
@@ -4,7 +4,6 @@
 #ifdef __KERNEL__
 
 #include <linux/types.h>
-#include <asm/system.h>
 
 #define JUMP_LABEL_NOP_SIZE 4
 
diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h
index f82ec22eeb11..49fa0dfaad33 100644
--- a/arch/arm/include/asm/kprobes.h
+++ b/arch/arm/include/asm/kprobes.h
@@ -18,7 +18,7 @@
 
 #include <linux/types.h>
 #include <linux/ptrace.h>
-#include <linux/percpu.h>
+#include <linux/notifier.h>
 
 #define __ARCH_WANT_KPROBES_INSN_SLOT
 #define MAX_INSN_SIZE			2
@@ -28,21 +28,10 @@
 #define kretprobe_blacklist_size	0
 
 typedef u32 kprobe_opcode_t;
-
 struct kprobe;
-typedef void (kprobe_insn_handler_t)(struct kprobe *, struct pt_regs *);
-typedef unsigned long (kprobe_check_cc)(unsigned long);
-typedef void (kprobe_insn_singlestep_t)(struct kprobe *, struct pt_regs *);
-typedef void (kprobe_insn_fn_t)(void);
+#include <asm/probes.h>
 
-/* Architecture specific copy of original instruction. */
-struct arch_specific_insn {
-	kprobe_opcode_t			*insn;
-	kprobe_insn_handler_t		*insn_handler;
-	kprobe_check_cc			*insn_check_cc;
-	kprobe_insn_singlestep_t	*insn_singlestep;
-	kprobe_insn_fn_t		*insn_fn;
-};
+#define	arch_specific_insn	arch_probes_insn
 
 struct prev_kprobe {
 	struct kprobe *kp;
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 4afb376d9c7c..02fa2558f662 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -166,9 +166,17 @@
  * Physical vs virtual RAM address space conversion.  These are
  * private definitions which should NOT be used outside memory.h
  * files.  Use virt_to_phys/phys_to_virt/__pa/__va instead.
+ *
+ * PFNs are used to describe any physical page; this means
+ * PFN 0 == physical address 0.
  */
-#ifndef __virt_to_phys
-#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+#if defined(__virt_to_phys)
+#define PHYS_OFFSET	PLAT_PHYS_OFFSET
+#define PHYS_PFN_OFFSET	((unsigned long)(PHYS_OFFSET >> PAGE_SHIFT))
+
+#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
+
+#elif defined(CONFIG_ARM_PATCH_PHYS_VIRT)
 
 /*
  * Constants used to force the right instruction encodings and shifts
@@ -177,12 +185,17 @@
 #define __PV_BITS_31_24	0x81000000
 #define __PV_BITS_7_0	0x81
 
-extern u64 __pv_phys_offset;
+extern unsigned long __pv_phys_pfn_offset;
 extern u64 __pv_offset;
 extern void fixup_pv_table(const void *, unsigned long);
 extern const void *__pv_table_begin, *__pv_table_end;
 
-#define PHYS_OFFSET __pv_phys_offset
+#define PHYS_OFFSET	((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT)
+#define PHYS_PFN_OFFSET	(__pv_phys_pfn_offset)
+
+#define virt_to_pfn(kaddr) \
+	((((unsigned long)(kaddr) - PAGE_OFFSET) >> PAGE_SHIFT) + \
+	 PHYS_PFN_OFFSET)
 
 #define __pv_stub(from,to,instr,type)			\
 	__asm__("@ __pv_stub\n"				\
@@ -243,6 +256,7 @@ static inline unsigned long __phys_to_virt(phys_addr_t x)
 #else
 
 #define PHYS_OFFSET	PLAT_PHYS_OFFSET
+#define PHYS_PFN_OFFSET	((unsigned long)(PHYS_OFFSET >> PAGE_SHIFT))
 
 static inline phys_addr_t __virt_to_phys(unsigned long x)
 {
@@ -254,18 +268,11 @@ static inline unsigned long __phys_to_virt(phys_addr_t x)
 	return x - PHYS_OFFSET + PAGE_OFFSET;
 }
 
-#endif
-#endif
+#define virt_to_pfn(kaddr) \
+	((((unsigned long)(kaddr) - PAGE_OFFSET) >> PAGE_SHIFT) + \
+	 PHYS_PFN_OFFSET)
 
-/*
- * PFNs are used to describe any physical page; this means
- * PFN 0 == physical address 0.
- *
- * This is the PFN of the first RAM page in the kernel
- * direct-mapped view.  We assume this is the first page
- * of RAM in the mem_map as well.
- */
-#define PHYS_PFN_OFFSET	((unsigned long)(PHYS_OFFSET >> PAGE_SHIFT))
+#endif
 
 /*
  * These are *only* valid on the kernel direct mapped RAM memory.
@@ -343,9 +350,9 @@ static inline __deprecated void *bus_to_virt(unsigned long x)
  */
 #define ARCH_PFN_OFFSET		PHYS_PFN_OFFSET
 
-#define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_to_page(kaddr)	pfn_to_page(virt_to_pfn(kaddr))
 #define virt_addr_valid(kaddr)	(((unsigned long)(kaddr) >= PAGE_OFFSET && (unsigned long)(kaddr) < (unsigned long)high_memory) \
-					&& pfn_valid(__pa(kaddr) >> PAGE_SHIFT) )
+					&& pfn_valid(virt_to_pfn(kaddr)))
 
 #endif
 
diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
index dfff709fda3c..219ac88a9542 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -140,6 +140,7 @@
 #define L_PTE_MT_DEV_NONSHARED	(_AT(pteval_t, 0x0c) << 2)	/* 1100 */
 #define L_PTE_MT_DEV_WC		(_AT(pteval_t, 0x09) << 2)	/* 1001 */
 #define L_PTE_MT_DEV_CACHED	(_AT(pteval_t, 0x0b) << 2)	/* 1011 */
+#define L_PTE_MT_VECTORS	(_AT(pteval_t, 0x0f) << 2)	/* 1111 */
 #define L_PTE_MT_MASK		(_AT(pteval_t, 0x0f) << 2)
 
 #ifndef __ASSEMBLY__
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 7d59b524f2af..5478e5d6ad89 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -216,13 +216,16 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
 
 #define pte_none(pte)		(!pte_val(pte))
 #define pte_present(pte)	(pte_val(pte) & L_PTE_PRESENT)
+#define pte_valid(pte)		(pte_val(pte) & L_PTE_VALID)
+#define pte_accessible(mm, pte)	(mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))
 #define pte_write(pte)		(!(pte_val(pte) & L_PTE_RDONLY))
 #define pte_dirty(pte)		(pte_val(pte) & L_PTE_DIRTY)
 #define pte_young(pte)		(pte_val(pte) & L_PTE_YOUNG)
 #define pte_exec(pte)		(!(pte_val(pte) & L_PTE_XN))
 #define pte_special(pte)	(0)
 
-#define pte_present_user(pte)  (pte_present(pte) && (pte_val(pte) & L_PTE_USER))
+#define pte_valid_user(pte)	\
+	(pte_valid(pte) && (pte_val(pte) & L_PTE_USER) && pte_young(pte))
 
 #if __LINUX_ARM_ARCH__ < 6
 static inline void __sync_icache_dcache(pte_t pteval)
@@ -237,7 +240,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 {
 	unsigned long ext = 0;
 
-	if (addr < TASK_SIZE && pte_present_user(pteval)) {
+	if (addr < TASK_SIZE && pte_valid_user(pteval)) {
 		__sync_icache_dcache(pteval);
 		ext |= PTE_EXT_NG;
 	}
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index f24edad26c70..ae1919be8f98 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -71,6 +71,8 @@ struct arm_pmu {
 	void		(*disable)(struct perf_event *event);
 	int		(*get_event_idx)(struct pmu_hw_events *hw_events,
 					 struct perf_event *event);
+	void		(*clear_event_idx)(struct pmu_hw_events *hw_events,
+					 struct perf_event *event);
 	int		(*set_event_filter)(struct hw_perf_event *evt,
 					    struct perf_event_attr *attr);
 	u32		(*read_counter)(struct perf_event *event);
diff --git a/arch/arm/include/asm/probes.h b/arch/arm/include/asm/probes.h
new file mode 100644
index 000000000000..806cfe622a9e
--- /dev/null
+++ b/arch/arm/include/asm/probes.h
@@ -0,0 +1,43 @@
+/*
+ * arch/arm/include/asm/probes.h
+ *
+ * Original contents copied from arch/arm/include/asm/kprobes.h
+ * which contains the following notice...
+ *
+ * Copyright (C) 2006, 2007 Motorola Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _ASM_PROBES_H
+#define _ASM_PROBES_H
+
+typedef u32 probes_opcode_t;
+
+struct arch_probes_insn;
+typedef void (probes_insn_handler_t)(probes_opcode_t,
+				     struct arch_probes_insn *,
+				     struct pt_regs *);
+typedef unsigned long (probes_check_cc)(unsigned long);
+typedef void (probes_insn_singlestep_t)(probes_opcode_t,
+					struct arch_probes_insn *,
+					struct pt_regs *);
+typedef void (probes_insn_fn_t)(void);
+
+/* Architecture specific copy of original instruction. */
+struct arch_probes_insn {
+	probes_opcode_t			*insn;
+	probes_insn_handler_t		*insn_handler;
+	probes_check_cc			*insn_check_cc;
+	probes_insn_singlestep_t	*insn_singlestep;
+	probes_insn_fn_t		*insn_fn;
+};
+
+#endif
diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
index 04c99f36ff7f..c877654fe3bf 100644
--- a/arch/arm/include/asm/ptrace.h
+++ b/arch/arm/include/asm/ptrace.h
@@ -27,9 +27,13 @@ struct pt_regs {
 #define thumb_mode(regs) (0)
 #endif
 
+#ifndef CONFIG_CPU_V7M
 #define isa_mode(regs) \
-	((((regs)->ARM_cpsr & PSR_J_BIT) >> 23) | \
-	 (((regs)->ARM_cpsr & PSR_T_BIT) >> 5))
+	((((regs)->ARM_cpsr & PSR_J_BIT) >> (__ffs(PSR_J_BIT) - 1)) | \
+	 (((regs)->ARM_cpsr & PSR_T_BIT) >> (__ffs(PSR_T_BIT))))
+#else
+#define isa_mode(regs) 1 /* Thumb */
+#endif
 
 #define processor_mode(regs) \
 	((regs)->ARM_cpsr & MODE_MASK)
@@ -80,6 +84,12 @@ static inline long regs_return_value(struct pt_regs *regs)
 
 #define instruction_pointer(regs)	(regs)->ARM_pc
 
+static inline void instruction_pointer_set(struct pt_regs *regs,
+					   unsigned long val)
+{
+	instruction_pointer(regs) = val;
+}
+
 #ifdef CONFIG_SMP
 extern unsigned long profile_pc(struct pt_regs *regs);
 #else
diff --git a/arch/arm/include/asm/sync_bitops.h b/arch/arm/include/asm/sync_bitops.h
index 63479eecbf76..9732b8e11e63 100644
--- a/arch/arm/include/asm/sync_bitops.h
+++ b/arch/arm/include/asm/sync_bitops.h
@@ -2,7 +2,6 @@
 #define __ASM_SYNC_BITOPS_H__
 
 #include <asm/bitops.h>
-#include <asm/system.h>
 
 /* sync_bitops functions are equivalent to the SMP implementation of the
  * original functions, independently from CONFIG_SMP being defined.
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
deleted file mode 100644
index 368165e33c1c..000000000000
--- a/arch/arm/include/asm/system.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* FILE TO BE DELETED. DO NOT ADD STUFF HERE! */
-#include <asm/barrier.h>
-#include <asm/compiler.h>
-#include <asm/cmpxchg.h>
-#include <asm/switch_to.h>
-#include <asm/system_info.h>
-#include <asm/system_misc.h>
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 71a06b293489..f989d7c22dc5 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -153,6 +153,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 #define TIF_SIGPENDING		0
 #define TIF_NEED_RESCHED	1
 #define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
+#define TIF_UPROBE		7
 #define TIF_SYSCALL_TRACE	8
 #define TIF_SYSCALL_AUDIT	9
 #define TIF_SYSCALL_TRACEPOINT	10
@@ -165,6 +166,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
+#define _TIF_UPROBE		(1 << TIF_UPROBE)
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
@@ -178,7 +180,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 /*
  * Change these and you break ASM code in entry-common.S
  */
-#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_RESUME)
+#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+				 _TIF_NOTIFY_RESUME | _TIF_UPROBE)
 
 #endif /* __KERNEL__ */
 #endif /* __ASM_ARM_THREAD_INFO_H */
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 72abdc541f38..12c3a5decc60 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -19,7 +19,7 @@
 #include <asm/unified.h>
 #include <asm/compiler.h>
 
-#if __LINUX_ARM_ARCH__ < 6
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 #include <asm-generic/uaccess-unaligned.h>
 #else
 #define __get_user_unaligned __get_user
diff --git a/arch/arm/include/asm/uprobes.h b/arch/arm/include/asm/uprobes.h
new file mode 100644
index 000000000000..9472c20b7d49
--- /dev/null
+++ b/arch/arm/include/asm/uprobes.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2012 Rabin Vincent <rabin at rab.in>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ASM_UPROBES_H
+#define _ASM_UPROBES_H
+
+#include <asm/probes.h>
+#include <asm/opcodes.h>
+
+typedef u32 uprobe_opcode_t;
+
+#define MAX_UINSN_BYTES		4
+#define UPROBE_XOL_SLOT_BYTES	64
+
+#define UPROBE_SWBP_ARM_INSN	0xe7f001f9
+#define UPROBE_SS_ARM_INSN	0xe7f001fa
+#define UPROBE_SWBP_INSN	__opcode_to_mem_arm(UPROBE_SWBP_ARM_INSN)
+#define UPROBE_SWBP_INSN_SIZE	4
+
+struct arch_uprobe_task {
+	u32 backup;
+	unsigned long	saved_trap_no;
+};
+
+struct arch_uprobe {
+	u8 insn[MAX_UINSN_BYTES];
+	unsigned long ixol[2];
+	uprobe_opcode_t bpinsn;
+	bool simulate;
+	u32 pcreg;
+	void (*prehandler)(struct arch_uprobe *auprobe,
+			   struct arch_uprobe_task *autask,
+			   struct pt_regs *regs);
+	void (*posthandler)(struct arch_uprobe *auprobe,
+			    struct arch_uprobe_task *autask,
+			    struct pt_regs *regs);
+	struct arch_probes_insn asi;
+};
+
+#endif
diff --git a/arch/arm/include/uapi/asm/hwcap.h b/arch/arm/include/uapi/asm/hwcap.h
index 7dcc10d67253..20d12f230a2f 100644
--- a/arch/arm/include/uapi/asm/hwcap.h
+++ b/arch/arm/include/uapi/asm/hwcap.h
@@ -28,4 +28,13 @@
 #define HWCAP_LPAE	(1 << 20)
 #define HWCAP_EVTSTRM	(1 << 21)
 
+/*
+ * HWCAP2 flags - for elf_hwcap2 (in kernel) and AT_HWCAP2
+ */
+#define HWCAP2_AES	(1 << 0)
+#define HWCAP2_PMULL	(1 << 1)
+#define HWCAP2_SHA1	(1 << 2)
+#define HWCAP2_SHA2	(1 << 3)
+#define HWCAP2_CRC32	(1 << 4)
+
 #endif /* _UAPI__ASMARM_HWCAP_H */
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index a30fc9be9e9e..a766bcbaf8ad 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -50,11 +50,12 @@ obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o insn.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o insn.o
 obj-$(CONFIG_JUMP_LABEL)	+= jump_label.o insn.o patch.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
-obj-$(CONFIG_KPROBES)		+= kprobes.o kprobes-common.o patch.o
+obj-$(CONFIG_UPROBES)		+= probes.o probes-arm.o uprobes.o uprobes-arm.o
+obj-$(CONFIG_KPROBES)		+= probes.o kprobes.o kprobes-common.o patch.o
 ifdef CONFIG_THUMB2_KERNEL
-obj-$(CONFIG_KPROBES)		+= kprobes-thumb.o
+obj-$(CONFIG_KPROBES)		+= kprobes-thumb.o probes-thumb.o
 else
-obj-$(CONFIG_KPROBES)		+= kprobes-arm.o
+obj-$(CONFIG_KPROBES)		+= kprobes-arm.o probes-arm.o
 endif
 obj-$(CONFIG_ARM_KPROBES_TEST)	+= test-kprobes.o
 test-kprobes-objs		:= kprobes-test.o
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 85e664b6a5f1..f7b450f97e68 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -158,6 +158,6 @@ EXPORT_SYMBOL(__gnu_mcount_nc);
 #endif
 
 #ifdef CONFIG_ARM_PATCH_PHYS_VIRT
-EXPORT_SYMBOL(__pv_phys_offset);
+EXPORT_SYMBOL(__pv_phys_pfn_offset);
 EXPORT_SYMBOL(__pv_offset);
 #endif
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index 317da88ae65b..91f48804e3bb 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -608,41 +608,10 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
  */
 int pcibios_enable_device(struct pci_dev *dev, int mask)
 {
-	u16 cmd, old_cmd;
-	int idx;
-	struct resource *r;
-
-	pci_read_config_word(dev, PCI_COMMAND, &cmd);
-	old_cmd = cmd;
-	for (idx = 0; idx < 6; idx++) {
-		/* Only set up the requested stuff */
-		if (!(mask & (1 << idx)))
-			continue;
-
-		r = dev->resource + idx;
-		if (!r->start && r->end) {
-			printk(KERN_ERR "PCI: Device %s not available because"
-			       " of resource collisions\n", pci_name(dev));
-			return -EINVAL;
-		}
-		if (r->flags & IORESOURCE_IO)
-			cmd |= PCI_COMMAND_IO;
-		if (r->flags & IORESOURCE_MEM)
-			cmd |= PCI_COMMAND_MEMORY;
-	}
+	if (pci_has_flag(PCI_PROBE_ONLY))
+		return 0;
 
-	/*
-	 * Bridges (eg, cardbus bridges) need to be fully enabled
-	 */
-	if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE)
-		cmd |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
-
-	if (cmd != old_cmd) {
-		printk("PCI: enabling device %s (%04x -> %04x)\n",
-		       pci_name(dev), old_cmd, cmd);
-		pci_write_config_word(dev, PCI_COMMAND, cmd);
-	}
-	return 0;
+	return pci_enable_resources(dev, mask);
 }
 
 int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 39f89fbd5111..1420725142ca 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -236,11 +236,6 @@
 	movs	pc, lr				@ return & move spsr_svc into cpsr
 	.endm
 
-	.macro	get_thread_info, rd
-	mov	\rd, sp, lsr #13
-	mov	\rd, \rd, lsl #13
-	.endm
-
 	@
 	@ 32-bit wide "mov pc, reg"
 	@
@@ -306,12 +301,6 @@
 	.endm
 #endif	/* ifdef CONFIG_CPU_V7M / else */
 
-	.macro	get_thread_info, rd
-	mov	\rd, sp
-	lsr	\rd, \rd, #13
-	mov	\rd, \rd, lsl #13
-	.endm
-
 	@
 	@ 32-bit wide "mov pc, reg"
 	@
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index f5f381d91556..f8c08839edf3 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -584,9 +584,10 @@ __fixup_pv_table:
 	subs	r3, r0, r3	@ PHYS_OFFSET - PAGE_OFFSET
 	add	r4, r4, r3	@ adjust table start address
 	add	r5, r5, r3	@ adjust table end address
-	add	r6, r6, r3	@ adjust __pv_phys_offset address
+	add	r6, r6, r3	@ adjust __pv_phys_pfn_offset address
 	add	r7, r7, r3	@ adjust __pv_offset address
-	str	r8, [r6, #LOW_OFFSET]	@ save computed PHYS_OFFSET to __pv_phys_offset
+	mov	r0, r8, lsr #12	@ convert to PFN
+	str	r0, [r6, #LOW_OFFSET]	@ save computed PHYS_OFFSET to __pv_phys_pfn_offset
 	strcc	ip, [r7, #HIGH_OFFSET]	@ save to __pv_offset high bits
 	mov	r6, r3, lsr #24	@ constant for add/sub instructions
 	teq	r3, r6, lsl #24 @ must be 16MiB aligned
@@ -600,7 +601,7 @@ ENDPROC(__fixup_pv_table)
 1:	.long	.
 	.long	__pv_table_begin
 	.long	__pv_table_end
-2:	.long	__pv_phys_offset
+2:	.long	__pv_phys_pfn_offset
 	.long	__pv_offset
 
 	.text
@@ -688,11 +689,11 @@ ENTRY(fixup_pv_table)
 ENDPROC(fixup_pv_table)
 
 	.data
-	.globl	__pv_phys_offset
-	.type	__pv_phys_offset, %object
-__pv_phys_offset:
-	.quad	0
-	.size	__pv_phys_offset, . -__pv_phys_offset
+	.globl	__pv_phys_pfn_offset
+	.type	__pv_phys_pfn_offset, %object
+__pv_phys_pfn_offset:
+	.word	0
+	.size	__pv_phys_pfn_offset, . -__pv_phys_pfn_offset
 
 	.globl	__pv_offset
 	.type	__pv_offset, %object
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 3d446605cbf8..9da35c6d3411 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -167,7 +167,7 @@ static int debug_arch_supported(void)
 /* Can we determine the watchpoint access type from the fsr? */
 static int debug_exception_updates_fsr(void)
 {
-	return 0;
+	return get_debug_arch() >= ARM_DEBUG_ARCH_V8;
 }
 
 /* Determine number of WRP registers available. */
@@ -257,6 +257,7 @@ static int enable_monitor_mode(void)
 		break;
 	case ARM_DEBUG_ARCH_V7_ECP14:
 	case ARM_DEBUG_ARCH_V7_1:
+	case ARM_DEBUG_ARCH_V8:
 		ARM_DBG_WRITE(c0, c2, 2, (dscr | ARM_DSCR_MDBGEN));
 		isb();
 		break;
diff --git a/arch/arm/kernel/kprobes-arm.c b/arch/arm/kernel/kprobes-arm.c
index 8a30c89da70e..ac300c60d656 100644
--- a/arch/arm/kernel/kprobes-arm.c
+++ b/arch/arm/kernel/kprobes-arm.c
@@ -60,13 +60,10 @@
 
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
-#include <linux/module.h>
+#include <linux/ptrace.h>
 
 #include "kprobes.h"
-
-#define sign_extend(x, signbit) ((x) | (0 - ((x) & (1 << (signbit)))))
-
-#define branch_displacement(insn) sign_extend(((insn) & 0xffffff) << 2, 25)
+#include "probes-arm.h"
 
 #if  __LINUX_ARM_ARCH__ >= 6
 #define BLX(reg)	"blx	"reg"		\n\t"
@@ -75,92 +72,11 @@
 			"mov	pc, "reg"	\n\t"
 #endif
 
-/*
- * To avoid the complications of mimicing single-stepping on a
- * processor without a Next-PC or a single-step mode, and to
- * avoid having to deal with the side-effects of boosting, we
- * simulate or emulate (almost) all ARM instructions.
- *
- * "Simulation" is where the instruction's behavior is duplicated in
- * C code.  "Emulation" is where the original instruction is rewritten
- * and executed, often by altering its registers.
- *
- * By having all behavior of the kprobe'd instruction completed before
- * returning from the kprobe_handler(), all locks (scheduler and
- * interrupt) can safely be released.  There is no need for secondary
- * breakpoints, no race with MP or preemptable kernels, nor having to
- * clean up resources counts at a later time impacting overall system
- * performance.  By rewriting the instruction, only the minimum registers
- * need to be loaded and saved back optimizing performance.
- *
- * Calling the insnslot_*_rwflags version of a function doesn't hurt
- * anything even when the CPSR flags aren't updated by the
- * instruction.  It's just a little slower in return for saving
- * a little space by not having a duplicate function that doesn't
- * update the flags.  (The same optimization can be said for
- * instructions that do or don't perform register writeback)
- * Also, instructions can either read the flags, only write the
- * flags, or read and write the flags.  To save combinations
- * rather than for sheer performance, flag functions just assume
- * read and write of flags.
- */
-
-static void __kprobes simulate_bbl(struct kprobe *p, struct pt_regs *regs)
-{
-	kprobe_opcode_t insn = p->opcode;
-	long iaddr = (long)p->addr;
-	int disp  = branch_displacement(insn);
-
-	if (insn & (1 << 24))
-		regs->ARM_lr = iaddr + 4;
-
-	regs->ARM_pc = iaddr + 8 + disp;
-}
-
-static void __kprobes simulate_blx1(struct kprobe *p, struct pt_regs *regs)
-{
-	kprobe_opcode_t insn = p->opcode;
-	long iaddr = (long)p->addr;
-	int disp = branch_displacement(insn);
-
-	regs->ARM_lr = iaddr + 4;
-	regs->ARM_pc = iaddr + 8 + disp + ((insn >> 23) & 0x2);
-	regs->ARM_cpsr |= PSR_T_BIT;
-}
-
-static void __kprobes simulate_blx2bx(struct kprobe *p, struct pt_regs *regs)
-{
-	kprobe_opcode_t insn = p->opcode;
-	int rm = insn & 0xf;
-	long rmv = regs->uregs[rm];
-
-	if (insn & (1 << 5))
-		regs->ARM_lr = (long)p->addr + 4;
-
-	regs->ARM_pc = rmv & ~0x1;
-	regs->ARM_cpsr &= ~PSR_T_BIT;
-	if (rmv & 0x1)
-		regs->ARM_cpsr |= PSR_T_BIT;
-}
-
-static void __kprobes simulate_mrs(struct kprobe *p, struct pt_regs *regs)
-{
-	kprobe_opcode_t insn = p->opcode;
-	int rd = (insn >> 12) & 0xf;
-	unsigned long mask = 0xf8ff03df; /* Mask out execution state */
-	regs->uregs[rd] = regs->ARM_cpsr & mask;
-}
-
-static void __kprobes simulate_mov_ipsp(struct kprobe *p, struct pt_regs *regs)
-{
-	regs->uregs[12] = regs->uregs[13];
-}
-
 static void __kprobes
-emulate_ldrdstrd(struct kprobe *p, struct pt_regs *regs)
+emulate_ldrdstrd(probes_opcode_t insn,
+	struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
-	unsigned long pc = (unsigned long)p->addr + 8;
+	unsigned long pc = regs->ARM_pc + 4;
 	int rt = (insn >> 12) & 0xf;
 	int rn = (insn >> 16) & 0xf;
 	int rm = insn & 0xf;
@@ -175,7 +91,7 @@ emulate_ldrdstrd(struct kprobe *p, struct pt_regs *regs)
 		BLX("%[fn]")
 		: "=r" (rtv), "=r" (rt2v), "=r" (rnv)
 		: "0" (rtv), "1" (rt2v), "2" (rnv), "r" (rmv),
-		  [fn] "r" (p->ainsn.insn_fn)
+		  [fn] "r" (asi->insn_fn)
 		: "lr", "memory", "cc"
 	);
 
@@ -186,10 +102,10 @@ emulate_ldrdstrd(struct kprobe *p, struct pt_regs *regs)
 }
 
 static void __kprobes
-emulate_ldr(struct kprobe *p, struct pt_regs *regs)
+emulate_ldr(probes_opcode_t insn,
+	struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
-	unsigned long pc = (unsigned long)p->addr + 8;
+	unsigned long pc = regs->ARM_pc + 4;
 	int rt = (insn >> 12) & 0xf;
 	int rn = (insn >> 16) & 0xf;
 	int rm = insn & 0xf;
@@ -202,7 +118,7 @@ emulate_ldr(struct kprobe *p, struct pt_regs *regs)
 	__asm__ __volatile__ (
 		BLX("%[fn]")
 		: "=r" (rtv), "=r" (rnv)
-		: "1" (rnv), "r" (rmv), [fn] "r" (p->ainsn.insn_fn)
+		: "1" (rnv), "r" (rmv), [fn] "r" (asi->insn_fn)
 		: "lr", "memory", "cc"
 	);
 
@@ -216,11 +132,11 @@ emulate_ldr(struct kprobe *p, struct pt_regs *regs)
 }
 
 static void __kprobes
-emulate_str(struct kprobe *p, struct pt_regs *regs)
+emulate_str(probes_opcode_t insn,
+	struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
-	unsigned long rtpc = (unsigned long)p->addr + str_pc_offset;
-	unsigned long rnpc = (unsigned long)p->addr + 8;
+	unsigned long rtpc = regs->ARM_pc - 4 + str_pc_offset;
+	unsigned long rnpc = regs->ARM_pc + 4;
 	int rt = (insn >> 12) & 0xf;
 	int rn = (insn >> 16) & 0xf;
 	int rm = insn & 0xf;
@@ -234,7 +150,7 @@ emulate_str(struct kprobe *p, struct pt_regs *regs)
 	__asm__ __volatile__ (
 		BLX("%[fn]")
 		: "=r" (rnv)
-		: "r" (rtv), "0" (rnv), "r" (rmv), [fn] "r" (p->ainsn.insn_fn)
+		: "r" (rtv), "0" (rnv), "r" (rmv), [fn] "r" (asi->insn_fn)
 		: "lr", "memory", "cc"
 	);
 
@@ -243,10 +159,10 @@ emulate_str(struct kprobe *p, struct pt_regs *regs)
 }
 
 static void __kprobes
-emulate_rd12rn16rm0rs8_rwflags(struct kprobe *p, struct pt_regs *regs)
+emulate_rd12rn16rm0rs8_rwflags(probes_opcode_t insn,
+	struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
-	unsigned long pc = (unsigned long)p->addr + 8;
+	unsigned long pc = regs->ARM_pc + 4;
 	int rd = (insn >> 12) & 0xf;
 	int rn = (insn >> 16) & 0xf;
 	int rm = insn & 0xf;
@@ -266,7 +182,7 @@ emulate_rd12rn16rm0rs8_rwflags(struct kprobe *p, struct pt_regs *regs)
 		"mrs	%[cpsr], cpsr		\n\t"
 		: "=r" (rdv), [cpsr] "=r" (cpsr)
 		: "0" (rdv), "r" (rnv), "r" (rmv), "r" (rsv),
-		  "1" (cpsr), [fn] "r" (p->ainsn.insn_fn)
+		  "1" (cpsr), [fn] "r" (asi->insn_fn)
 		: "lr", "memory", "cc"
 	);
 
@@ -278,9 +194,9 @@ emulate_rd12rn16rm0rs8_rwflags(struct kprobe *p, struct pt_regs *regs)
 }
 
 static void __kprobes
-emulate_rd12rn16rm0_rwflags_nopc(struct kprobe *p, struct pt_regs *regs)
+emulate_rd12rn16rm0_rwflags_nopc(probes_opcode_t insn,
+	struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
 	int rd = (insn >> 12) & 0xf;
 	int rn = (insn >> 16) & 0xf;
 	int rm = insn & 0xf;
@@ -296,7 +212,7 @@ emulate_rd12rn16rm0_rwflags_nopc(struct kprobe *p, struct pt_regs *regs)
 		"mrs	%[cpsr], cpsr		\n\t"
 		: "=r" (rdv), [cpsr] "=r" (cpsr)
 		: "0" (rdv), "r" (rnv), "r" (rmv),
-		  "1" (cpsr), [fn] "r" (p->ainsn.insn_fn)
+		  "1" (cpsr), [fn] "r" (asi->insn_fn)
 		: "lr", "memory", "cc"
 	);
 
@@ -305,9 +221,10 @@ emulate_rd12rn16rm0_rwflags_nopc(struct kprobe *p, struct pt_regs *regs)
 }
 
 static void __kprobes
-emulate_rd16rn12rm0rs8_rwflags_nopc(struct kprobe *p, struct pt_regs *regs)
+emulate_rd16rn12rm0rs8_rwflags_nopc(probes_opcode_t insn,
+	struct arch_probes_insn *asi,
+	struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
 	int rd = (insn >> 16) & 0xf;
 	int rn = (insn >> 12) & 0xf;
 	int rm = insn & 0xf;
@@ -325,7 +242,7 @@ emulate_rd16rn12rm0rs8_rwflags_nopc(struct kprobe *p, struct pt_regs *regs)
 		"mrs	%[cpsr], cpsr		\n\t"
 		: "=r" (rdv), [cpsr] "=r" (cpsr)
 		: "0" (rdv), "r" (rnv), "r" (rmv), "r" (rsv),
-		  "1" (cpsr), [fn] "r" (p->ainsn.insn_fn)
+		  "1" (cpsr), [fn] "r" (asi->insn_fn)
 		: "lr", "memory", "cc"
 	);
 
@@ -334,9 +251,9 @@ emulate_rd16rn12rm0rs8_rwflags_nopc(struct kprobe *p, struct pt_regs *regs)
 }
 
 static void __kprobes
-emulate_rd12rm0_noflags_nopc(struct kprobe *p, struct pt_regs *regs)
+emulate_rd12rm0_noflags_nopc(probes_opcode_t insn,
+	struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
 	int rd = (insn >> 12) & 0xf;
 	int rm = insn & 0xf;
 
@@ -346,7 +263,7 @@ emulate_rd12rm0_noflags_nopc(struct kprobe *p, struct pt_regs *regs)
 	__asm__ __volatile__ (
 		BLX("%[fn]")
 		: "=r" (rdv)
-		: "0" (rdv), "r" (rmv), [fn] "r" (p->ainsn.insn_fn)
+		: "0" (rdv), "r" (rmv), [fn] "r" (asi->insn_fn)
 		: "lr", "memory", "cc"
 	);
 
@@ -354,9 +271,10 @@ emulate_rd12rm0_noflags_nopc(struct kprobe *p, struct pt_regs *regs)
 }
 
 static void __kprobes
-emulate_rdlo12rdhi16rn0rm8_rwflags_nopc(struct kprobe *p, struct pt_regs *regs)
+emulate_rdlo12rdhi16rn0rm8_rwflags_nopc(probes_opcode_t insn,
+	struct arch_probes_insn *asi,
+	struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
 	int rdlo = (insn >> 12) & 0xf;
 	int rdhi = (insn >> 16) & 0xf;
 	int rn = insn & 0xf;
@@ -374,7 +292,7 @@ emulate_rdlo12rdhi16rn0rm8_rwflags_nopc(struct kprobe *p, struct pt_regs *regs)
 		"mrs	%[cpsr], cpsr		\n\t"
 		: "=r" (rdlov), "=r" (rdhiv), [cpsr] "=r" (cpsr)
 		: "0" (rdlov), "1" (rdhiv), "r" (rnv), "r" (rmv),
-		  "2" (cpsr), [fn] "r" (p->ainsn.insn_fn)
+		  "2" (cpsr), [fn] "r" (asi->insn_fn)
 		: "lr", "memory", "cc"
 	);
 
@@ -383,623 +301,43 @@ emulate_rdlo12rdhi16rn0rm8_rwflags_nopc(struct kprobe *p, struct pt_regs *regs)
 	regs->ARM_cpsr = (regs->ARM_cpsr & ~APSR_MASK) | (cpsr & APSR_MASK);
 }
 
-/*
- * For the instruction masking and comparisons in all the "space_*"
- * functions below, Do _not_ rearrange the order of tests unless
- * you're very, very sure of what you are doing.  For the sake of
- * efficiency, the masks for some tests sometimes assume other test
- * have been done prior to them so the number of patterns to test
- * for an instruction set can be as broad as possible to reduce the
- * number of tests needed.
- */
-
-static const union decode_item arm_1111_table[] = {
-	/* Unconditional instructions					*/
-
-	/* memory hint		1111 0100 x001 xxxx xxxx xxxx xxxx xxxx */
-	/* PLDI (immediate)	1111 0100 x101 xxxx xxxx xxxx xxxx xxxx */
-	/* PLDW (immediate)	1111 0101 x001 xxxx xxxx xxxx xxxx xxxx */
-	/* PLD (immediate)	1111 0101 x101 xxxx xxxx xxxx xxxx xxxx */
-	DECODE_SIMULATE	(0xfe300000, 0xf4100000, kprobe_simulate_nop),
-
-	/* memory hint		1111 0110 x001 xxxx xxxx xxxx xxx0 xxxx */
-	/* PLDI (register)	1111 0110 x101 xxxx xxxx xxxx xxx0 xxxx */
-	/* PLDW (register)	1111 0111 x001 xxxx xxxx xxxx xxx0 xxxx */
-	/* PLD (register)	1111 0111 x101 xxxx xxxx xxxx xxx0 xxxx */
-	DECODE_SIMULATE	(0xfe300010, 0xf6100000, kprobe_simulate_nop),
-
-	/* BLX (immediate)	1111 101x xxxx xxxx xxxx xxxx xxxx xxxx */
-	DECODE_SIMULATE	(0xfe000000, 0xfa000000, simulate_blx1),
-
-	/* CPS			1111 0001 0000 xxx0 xxxx xxxx xx0x xxxx */
-	/* SETEND		1111 0001 0000 0001 xxxx xxxx 0000 xxxx */
-	/* SRS			1111 100x x1x0 xxxx xxxx xxxx xxxx xxxx */
-	/* RFE			1111 100x x0x1 xxxx xxxx xxxx xxxx xxxx */
-
-	/* Coprocessor instructions... */
-	/* MCRR2		1111 1100 0100 xxxx xxxx xxxx xxxx xxxx */
-	/* MRRC2		1111 1100 0101 xxxx xxxx xxxx xxxx xxxx */
-	/* LDC2			1111 110x xxx1 xxxx xxxx xxxx xxxx xxxx */
-	/* STC2			1111 110x xxx0 xxxx xxxx xxxx xxxx xxxx */
-	/* CDP2			1111 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */
-	/* MCR2			1111 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */
-	/* MRC2			1111 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */
-
-	/* Other unallocated instructions...				*/
-	DECODE_END
-};
-
-static const union decode_item arm_cccc_0001_0xx0____0xxx_table[] = {
-	/* Miscellaneous instructions					*/
-
-	/* MRS cpsr		cccc 0001 0000 xxxx xxxx xxxx 0000 xxxx */
-	DECODE_SIMULATEX(0x0ff000f0, 0x01000000, simulate_mrs,
-						 REGS(0, NOPC, 0, 0, 0)),
-
-	/* BX			cccc 0001 0010 xxxx xxxx xxxx 0001 xxxx */
-	DECODE_SIMULATE	(0x0ff000f0, 0x01200010, simulate_blx2bx),
-
-	/* BLX (register)	cccc 0001 0010 xxxx xxxx xxxx 0011 xxxx */
-	DECODE_SIMULATEX(0x0ff000f0, 0x01200030, simulate_blx2bx,
-						 REGS(0, 0, 0, 0, NOPC)),
-
-	/* CLZ			cccc 0001 0110 xxxx xxxx xxxx 0001 xxxx */
-	DECODE_EMULATEX	(0x0ff000f0, 0x01600010, emulate_rd12rm0_noflags_nopc,
-						 REGS(0, NOPC, 0, 0, NOPC)),
-
-	/* QADD			cccc 0001 0000 xxxx xxxx xxxx 0101 xxxx */
-	/* QSUB			cccc 0001 0010 xxxx xxxx xxxx 0101 xxxx */
-	/* QDADD		cccc 0001 0100 xxxx xxxx xxxx 0101 xxxx */
-	/* QDSUB		cccc 0001 0110 xxxx xxxx xxxx 0101 xxxx */
-	DECODE_EMULATEX	(0x0f9000f0, 0x01000050, emulate_rd12rn16rm0_rwflags_nopc,
-						 REGS(NOPC, NOPC, 0, 0, NOPC)),
-
-	/* BXJ			cccc 0001 0010 xxxx xxxx xxxx 0010 xxxx */
-	/* MSR			cccc 0001 0x10 xxxx xxxx xxxx 0000 xxxx */
-	/* MRS spsr		cccc 0001 0100 xxxx xxxx xxxx 0000 xxxx */
-	/* BKPT			1110 0001 0010 xxxx xxxx xxxx 0111 xxxx */
-	/* SMC			cccc 0001 0110 xxxx xxxx xxxx 0111 xxxx */
-	/* And unallocated instructions...				*/
-	DECODE_END
-};
-
-static const union decode_item arm_cccc_0001_0xx0____1xx0_table[] = {
-	/* Halfword multiply and multiply-accumulate			*/
-
-	/* SMLALxy		cccc 0001 0100 xxxx xxxx xxxx 1xx0 xxxx */
-	DECODE_EMULATEX	(0x0ff00090, 0x01400080, emulate_rdlo12rdhi16rn0rm8_rwflags_nopc,
-						 REGS(NOPC, NOPC, NOPC, 0, NOPC)),
-
-	/* SMULWy		cccc 0001 0010 xxxx xxxx xxxx 1x10 xxxx */
-	DECODE_OR	(0x0ff000b0, 0x012000a0),
-	/* SMULxy		cccc 0001 0110 xxxx xxxx xxxx 1xx0 xxxx */
-	DECODE_EMULATEX	(0x0ff00090, 0x01600080, emulate_rd16rn12rm0rs8_rwflags_nopc,
-						 REGS(NOPC, 0, NOPC, 0, NOPC)),
-
-	/* SMLAxy		cccc 0001 0000 xxxx xxxx xxxx 1xx0 xxxx */
-	DECODE_OR	(0x0ff00090, 0x01000080),
-	/* SMLAWy		cccc 0001 0010 xxxx xxxx xxxx 1x00 xxxx */
-	DECODE_EMULATEX	(0x0ff000b0, 0x01200080, emulate_rd16rn12rm0rs8_rwflags_nopc,
-						 REGS(NOPC, NOPC, NOPC, 0, NOPC)),
-
-	DECODE_END
+const union decode_action kprobes_arm_actions[NUM_PROBES_ARM_ACTIONS] = {
+	[PROBES_EMULATE_NONE] = {.handler = probes_emulate_none},
+	[PROBES_SIMULATE_NOP] = {.handler = probes_simulate_nop},
+	[PROBES_PRELOAD_IMM] = {.handler = probes_simulate_nop},
+	[PROBES_PRELOAD_REG] = {.handler = probes_simulate_nop},
+	[PROBES_BRANCH_IMM] = {.handler = simulate_blx1},
+	[PROBES_MRS] = {.handler = simulate_mrs},
+	[PROBES_BRANCH_REG] = {.handler = simulate_blx2bx},
+	[PROBES_CLZ] = {.handler = emulate_rd12rm0_noflags_nopc},
+	[PROBES_SATURATING_ARITHMETIC] = {
+		.handler = emulate_rd12rn16rm0_rwflags_nopc},
+	[PROBES_MUL1] = {.handler = emulate_rdlo12rdhi16rn0rm8_rwflags_nopc},
+	[PROBES_MUL2] = {.handler = emulate_rd16rn12rm0rs8_rwflags_nopc},
+	[PROBES_SWP] = {.handler = emulate_rd12rn16rm0_rwflags_nopc},
+	[PROBES_LDRSTRD] = {.handler = emulate_ldrdstrd},
+	[PROBES_LOAD_EXTRA] = {.handler = emulate_ldr},
+	[PROBES_LOAD] = {.handler = emulate_ldr},
+	[PROBES_STORE_EXTRA] = {.handler = emulate_str},
+	[PROBES_STORE] = {.handler = emulate_str},
+	[PROBES_MOV_IP_SP] = {.handler = simulate_mov_ipsp},
+	[PROBES_DATA_PROCESSING_REG] = {
+		.handler = emulate_rd12rn16rm0rs8_rwflags},
+	[PROBES_DATA_PROCESSING_IMM] = {
+		.handler = emulate_rd12rn16rm0rs8_rwflags},
+	[PROBES_MOV_HALFWORD] = {.handler = emulate_rd12rm0_noflags_nopc},
+	[PROBES_SEV] = {.handler = probes_emulate_none},
+	[PROBES_WFE] = {.handler = probes_simulate_nop},
+	[PROBES_SATURATE] = {.handler = emulate_rd12rn16rm0_rwflags_nopc},
+	[PROBES_REV] = {.handler = emulate_rd12rm0_noflags_nopc},
+	[PROBES_MMI] = {.handler = emulate_rd12rn16rm0_rwflags_nopc},
+	[PROBES_PACK] = {.handler = emulate_rd12rn16rm0_rwflags_nopc},
+	[PROBES_EXTEND] = {.handler = emulate_rd12rm0_noflags_nopc},
+	[PROBES_EXTEND_ADD] = {.handler = emulate_rd12rn16rm0_rwflags_nopc},
+	[PROBES_MUL_ADD_LONG] = {
+		.handler = emulate_rdlo12rdhi16rn0rm8_rwflags_nopc},
+	[PROBES_MUL_ADD] = {.handler = emulate_rd16rn12rm0rs8_rwflags_nopc},
+	[PROBES_BITFIELD] = {.handler = emulate_rd12rm0_noflags_nopc},
+	[PROBES_BRANCH] = {.handler = simulate_bbl},
+	[PROBES_LDMSTM] = {.decoder = kprobe_decode_ldmstm}
 };
-
-static const union decode_item arm_cccc_0000_____1001_table[] = {
-	/* Multiply and multiply-accumulate				*/
-
-	/* MUL			cccc 0000 0000 xxxx xxxx xxxx 1001 xxxx */
-	/* MULS			cccc 0000 0001 xxxx xxxx xxxx 1001 xxxx */
-	DECODE_EMULATEX	(0x0fe000f0, 0x00000090, emulate_rd16rn12rm0rs8_rwflags_nopc,
-						 REGS(NOPC, 0, NOPC, 0, NOPC)),
-
-	/* MLA			cccc 0000 0010 xxxx xxxx xxxx 1001 xxxx */
-	/* MLAS			cccc 0000 0011 xxxx xxxx xxxx 1001 xxxx */
-	DECODE_OR	(0x0fe000f0, 0x00200090),
-	/* MLS			cccc 0000 0110 xxxx xxxx xxxx 1001 xxxx */
-	DECODE_EMULATEX	(0x0ff000f0, 0x00600090, emulate_rd16rn12rm0rs8_rwflags_nopc,
-						 REGS(NOPC, NOPC, NOPC, 0, NOPC)),
-
-	/* UMAAL		cccc 0000 0100 xxxx xxxx xxxx 1001 xxxx */
-	DECODE_OR	(0x0ff000f0, 0x00400090),
-	/* UMULL		cccc 0000 1000 xxxx xxxx xxxx 1001 xxxx */
-	/* UMULLS		cccc 0000 1001 xxxx xxxx xxxx 1001 xxxx */
-	/* UMLAL		cccc 0000 1010 xxxx xxxx xxxx 1001 xxxx */
-	/* UMLALS		cccc 0000 1011 xxxx xxxx xxxx 1001 xxxx */
-	/* SMULL		cccc 0000 1100 xxxx xxxx xxxx 1001 xxxx */
-	/* SMULLS		cccc 0000 1101 xxxx xxxx xxxx 1001 xxxx */
-	/* SMLAL		cccc 0000 1110 xxxx xxxx xxxx 1001 xxxx */
-	/* SMLALS		cccc 0000 1111 xxxx xxxx xxxx 1001 xxxx */
-	DECODE_EMULATEX	(0x0f8000f0, 0x00800090, emulate_rdlo12rdhi16rn0rm8_rwflags_nopc,
-						 REGS(NOPC, NOPC, NOPC, 0, NOPC)),
-
-	DECODE_END
-};
-
-static const union decode_item arm_cccc_0001_____1001_table[] = {
-	/* Synchronization primitives					*/
-
-#if __LINUX_ARM_ARCH__ < 6
-	/* Deprecated on ARMv6 and may be UNDEFINED on v7		*/
-	/* SMP/SWPB		cccc 0001 0x00 xxxx xxxx xxxx 1001 xxxx */
-	DECODE_EMULATEX	(0x0fb000f0, 0x01000090, emulate_rd12rn16rm0_rwflags_nopc,
-						 REGS(NOPC, NOPC, 0, 0, NOPC)),
-#endif
-	/* LDREX/STREX{,D,B,H}	cccc 0001 1xxx xxxx xxxx xxxx 1001 xxxx */
-	/* And unallocated instructions...				*/
-	DECODE_END
-};
-
-static const union decode_item arm_cccc_000x_____1xx1_table[] = {
-	/* Extra load/store instructions				*/
-
-	/* STRHT		cccc 0000 xx10 xxxx xxxx xxxx 1011 xxxx */
-	/* ???			cccc 0000 xx10 xxxx xxxx xxxx 11x1 xxxx */
-	/* LDRHT		cccc 0000 xx11 xxxx xxxx xxxx 1011 xxxx */
-	/* LDRSBT		cccc 0000 xx11 xxxx xxxx xxxx 1101 xxxx */
-	/* LDRSHT		cccc 0000 xx11 xxxx xxxx xxxx 1111 xxxx */
-	DECODE_REJECT	(0x0f200090, 0x00200090),
-
-	/* LDRD/STRD lr,pc,{...	cccc 000x x0x0 xxxx 111x xxxx 1101 xxxx */
-	DECODE_REJECT	(0x0e10e0d0, 0x0000e0d0),
-
-	/* LDRD (register)	cccc 000x x0x0 xxxx xxxx xxxx 1101 xxxx */
-	/* STRD (register)	cccc 000x x0x0 xxxx xxxx xxxx 1111 xxxx */
-	DECODE_EMULATEX	(0x0e5000d0, 0x000000d0, emulate_ldrdstrd,
-						 REGS(NOPCWB, NOPCX, 0, 0, NOPC)),
-
-	/* LDRD (immediate)	cccc 000x x1x0 xxxx xxxx xxxx 1101 xxxx */
-	/* STRD (immediate)	cccc 000x x1x0 xxxx xxxx xxxx 1111 xxxx */
-	DECODE_EMULATEX	(0x0e5000d0, 0x004000d0, emulate_ldrdstrd,
-						 REGS(NOPCWB, NOPCX, 0, 0, 0)),
-
-	/* STRH (register)	cccc 000x x0x0 xxxx xxxx xxxx 1011 xxxx */
-	DECODE_EMULATEX	(0x0e5000f0, 0x000000b0, emulate_str,
-						 REGS(NOPCWB, NOPC, 0, 0, NOPC)),
-
-	/* LDRH (register)	cccc 000x x0x1 xxxx xxxx xxxx 1011 xxxx */
-	/* LDRSB (register)	cccc 000x x0x1 xxxx xxxx xxxx 1101 xxxx */
-	/* LDRSH (register)	cccc 000x x0x1 xxxx xxxx xxxx 1111 xxxx */
-	DECODE_EMULATEX	(0x0e500090, 0x00100090, emulate_ldr,
-						 REGS(NOPCWB, NOPC, 0, 0, NOPC)),
-
-	/* STRH (immediate)	cccc 000x x1x0 xxxx xxxx xxxx 1011 xxxx */
-	DECODE_EMULATEX	(0x0e5000f0, 0x004000b0, emulate_str,
-						 REGS(NOPCWB, NOPC, 0, 0, 0)),
-
-	/* LDRH (immediate)	cccc 000x x1x1 xxxx xxxx xxxx 1011 xxxx */
-	/* LDRSB (immediate)	cccc 000x x1x1 xxxx xxxx xxxx 1101 xxxx */
-	/* LDRSH (immediate)	cccc 000x x1x1 xxxx xxxx xxxx 1111 xxxx */
-	DECODE_EMULATEX	(0x0e500090, 0x00500090, emulate_ldr,
-						 REGS(NOPCWB, NOPC, 0, 0, 0)),
-
-	DECODE_END
-};
-
-static const union decode_item arm_cccc_000x_table[] = {
-	/* Data-processing (register)					*/
-
-	/* <op>S PC, ...	cccc 000x xxx1 xxxx 1111 xxxx xxxx xxxx */
-	DECODE_REJECT	(0x0e10f000, 0x0010f000),
-
-	/* MOV IP, SP		1110 0001 1010 0000 1100 0000 0000 1101 */
-	DECODE_SIMULATE	(0xffffffff, 0xe1a0c00d, simulate_mov_ipsp),
-
-	/* TST (register)	cccc 0001 0001 xxxx xxxx xxxx xxx0 xxxx */
-	/* TEQ (register)	cccc 0001 0011 xxxx xxxx xxxx xxx0 xxxx */
-	/* CMP (register)	cccc 0001 0101 xxxx xxxx xxxx xxx0 xxxx */
-	/* CMN (register)	cccc 0001 0111 xxxx xxxx xxxx xxx0 xxxx */
-	DECODE_EMULATEX	(0x0f900010, 0x01100000, emulate_rd12rn16rm0rs8_rwflags,
-						 REGS(ANY, 0, 0, 0, ANY)),
-
-	/* MOV (register)	cccc 0001 101x xxxx xxxx xxxx xxx0 xxxx */
-	/* MVN (register)	cccc 0001 111x xxxx xxxx xxxx xxx0 xxxx */
-	DECODE_EMULATEX	(0x0fa00010, 0x01a00000, emulate_rd12rn16rm0rs8_rwflags,
-						 REGS(0, ANY, 0, 0, ANY)),
-
-	/* AND (register)	cccc 0000 000x xxxx xxxx xxxx xxx0 xxxx */
-	/* EOR (register)	cccc 0000 001x xxxx xxxx xxxx xxx0 xxxx */
-	/* SUB (register)	cccc 0000 010x xxxx xxxx xxxx xxx0 xxxx */
-	/* RSB (register)	cccc 0000 011x xxxx xxxx xxxx xxx0 xxxx */
-	/* ADD (register)	cccc 0000 100x xxxx xxxx xxxx xxx0 xxxx */
-	/* ADC (register)	cccc 0000 101x xxxx xxxx xxxx xxx0 xxxx */
-	/* SBC (register)	cccc 0000 110x xxxx xxxx xxxx xxx0 xxxx */
-	/* RSC (register)	cccc 0000 111x xxxx xxxx xxxx xxx0 xxxx */
-	/* ORR (register)	cccc 0001 100x xxxx xxxx xxxx xxx0 xxxx */
-	/* BIC (register)	cccc 0001 110x xxxx xxxx xxxx xxx0 xxxx */
-	DECODE_EMULATEX	(0x0e000010, 0x00000000, emulate_rd12rn16rm0rs8_rwflags,
-						 REGS(ANY, ANY, 0, 0, ANY)),
-
-	/* TST (reg-shift reg)	cccc 0001 0001 xxxx xxxx xxxx 0xx1 xxxx */
-	/* TEQ (reg-shift reg)	cccc 0001 0011 xxxx xxxx xxxx 0xx1 xxxx */
-	/* CMP (reg-shift reg)	cccc 0001 0101 xxxx xxxx xxxx 0xx1 xxxx */
-	/* CMN (reg-shift reg)	cccc 0001 0111 xxxx xxxx xxxx 0xx1 xxxx */
-	DECODE_EMULATEX	(0x0f900090, 0x01100010, emulate_rd12rn16rm0rs8_rwflags,
-						 REGS(ANY, 0, NOPC, 0, ANY)),
-
-	/* MOV (reg-shift reg)	cccc 0001 101x xxxx xxxx xxxx 0xx1 xxxx */
-	/* MVN (reg-shift reg)	cccc 0001 111x xxxx xxxx xxxx 0xx1 xxxx */
-	DECODE_EMULATEX	(0x0fa00090, 0x01a00010, emulate_rd12rn16rm0rs8_rwflags,
-						 REGS(0, ANY, NOPC, 0, ANY)),
-
-	/* AND (reg-shift reg)	cccc 0000 000x xxxx xxxx xxxx 0xx1 xxxx */
-	/* EOR (reg-shift reg)	cccc 0000 001x xxxx xxxx xxxx 0xx1 xxxx */
-	/* SUB (reg-shift reg)	cccc 0000 010x xxxx xxxx xxxx 0xx1 xxxx */
-	/* RSB (reg-shift reg)	cccc 0000 011x xxxx xxxx xxxx 0xx1 xxxx */
-	/* ADD (reg-shift reg)	cccc 0000 100x xxxx xxxx xxxx 0xx1 xxxx */
-	/* ADC (reg-shift reg)	cccc 0000 101x xxxx xxxx xxxx 0xx1 xxxx */
-	/* SBC (reg-shift reg)	cccc 0000 110x xxxx xxxx xxxx 0xx1 xxxx */
-	/* RSC (reg-shift reg)	cccc 0000 111x xxxx xxxx xxxx 0xx1 xxxx */
-	/* ORR (reg-shift reg)	cccc 0001 100x xxxx xxxx xxxx 0xx1 xxxx */
-	/* BIC (reg-shift reg)	cccc 0001 110x xxxx xxxx xxxx 0xx1 xxxx */
-	DECODE_EMULATEX	(0x0e000090, 0x00000010, emulate_rd12rn16rm0rs8_rwflags,
-						 REGS(ANY, ANY, NOPC, 0, ANY)),
-
-	DECODE_END
-};
-
-static const union decode_item arm_cccc_001x_table[] = {
-	/* Data-processing (immediate)					*/
-
-	/* MOVW			cccc 0011 0000 xxxx xxxx xxxx xxxx xxxx */
-	/* MOVT			cccc 0011 0100 xxxx xxxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0x0fb00000, 0x03000000, emulate_rd12rm0_noflags_nopc,
-						 REGS(0, NOPC, 0, 0, 0)),
-
-	/* YIELD		cccc 0011 0010 0000 xxxx xxxx 0000 0001 */
-	DECODE_OR	(0x0fff00ff, 0x03200001),
-	/* SEV			cccc 0011 0010 0000 xxxx xxxx 0000 0100 */
-	DECODE_EMULATE	(0x0fff00ff, 0x03200004, kprobe_emulate_none),
-	/* NOP			cccc 0011 0010 0000 xxxx xxxx 0000 0000 */
-	/* WFE			cccc 0011 0010 0000 xxxx xxxx 0000 0010 */
-	/* WFI			cccc 0011 0010 0000 xxxx xxxx 0000 0011 */
-	DECODE_SIMULATE	(0x0fff00fc, 0x03200000, kprobe_simulate_nop),
-	/* DBG			cccc 0011 0010 0000 xxxx xxxx ffff xxxx */
-	/* unallocated hints	cccc 0011 0010 0000 xxxx xxxx xxxx xxxx */
-	/* MSR (immediate)	cccc 0011 0x10 xxxx xxxx xxxx xxxx xxxx */
-	DECODE_REJECT	(0x0fb00000, 0x03200000),
-
-	/* <op>S PC, ...	cccc 001x xxx1 xxxx 1111 xxxx xxxx xxxx */
-	DECODE_REJECT	(0x0e10f000, 0x0210f000),
-
-	/* TST (immediate)	cccc 0011 0001 xxxx xxxx xxxx xxxx xxxx */
-	/* TEQ (immediate)	cccc 0011 0011 xxxx xxxx xxxx xxxx xxxx */
-	/* CMP (immediate)	cccc 0011 0101 xxxx xxxx xxxx xxxx xxxx */
-	/* CMN (immediate)	cccc 0011 0111 xxxx xxxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0x0f900000, 0x03100000, emulate_rd12rn16rm0rs8_rwflags,
-						 REGS(ANY, 0, 0, 0, 0)),
-
-	/* MOV (immediate)	cccc 0011 101x xxxx xxxx xxxx xxxx xxxx */
-	/* MVN (immediate)	cccc 0011 111x xxxx xxxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0x0fa00000, 0x03a00000, emulate_rd12rn16rm0rs8_rwflags,
-						 REGS(0, ANY, 0, 0, 0)),
-
-	/* AND (immediate)	cccc 0010 000x xxxx xxxx xxxx xxxx xxxx */
-	/* EOR (immediate)	cccc 0010 001x xxxx xxxx xxxx xxxx xxxx */
-	/* SUB (immediate)	cccc 0010 010x xxxx xxxx xxxx xxxx xxxx */
-	/* RSB (immediate)	cccc 0010 011x xxxx xxxx xxxx xxxx xxxx */
-	/* ADD (immediate)	cccc 0010 100x xxxx xxxx xxxx xxxx xxxx */
-	/* ADC (immediate)	cccc 0010 101x xxxx xxxx xxxx xxxx xxxx */
-	/* SBC (immediate)	cccc 0010 110x xxxx xxxx xxxx xxxx xxxx */
-	/* RSC (immediate)	cccc 0010 111x xxxx xxxx xxxx xxxx xxxx */
-	/* ORR (immediate)	cccc 0011 100x xxxx xxxx xxxx xxxx xxxx */
-	/* BIC (immediate)	cccc 0011 110x xxxx xxxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0x0e000000, 0x02000000, emulate_rd12rn16rm0rs8_rwflags,
-						 REGS(ANY, ANY, 0, 0, 0)),
-
-	DECODE_END
-};
-
-static const union decode_item arm_cccc_0110_____xxx1_table[] = {
-	/* Media instructions						*/
-
-	/* SEL			cccc 0110 1000 xxxx xxxx xxxx 1011 xxxx */
-	DECODE_EMULATEX	(0x0ff000f0, 0x068000b0, emulate_rd12rn16rm0_rwflags_nopc,
-						 REGS(NOPC, NOPC, 0, 0, NOPC)),
-
-	/* SSAT			cccc 0110 101x xxxx xxxx xxxx xx01 xxxx */
-	/* USAT			cccc 0110 111x xxxx xxxx xxxx xx01 xxxx */
-	DECODE_OR(0x0fa00030, 0x06a00010),
-	/* SSAT16		cccc 0110 1010 xxxx xxxx xxxx 0011 xxxx */
-	/* USAT16		cccc 0110 1110 xxxx xxxx xxxx 0011 xxxx */
-	DECODE_EMULATEX	(0x0fb000f0, 0x06a00030, emulate_rd12rn16rm0_rwflags_nopc,
-						 REGS(0, NOPC, 0, 0, NOPC)),
-
-	/* REV			cccc 0110 1011 xxxx xxxx xxxx 0011 xxxx */
-	/* REV16		cccc 0110 1011 xxxx xxxx xxxx 1011 xxxx */
-	/* RBIT			cccc 0110 1111 xxxx xxxx xxxx 0011 xxxx */
-	/* REVSH		cccc 0110 1111 xxxx xxxx xxxx 1011 xxxx */
-	DECODE_EMULATEX	(0x0fb00070, 0x06b00030, emulate_rd12rm0_noflags_nopc,
-						 REGS(0, NOPC, 0, 0, NOPC)),
-
-	/* ???			cccc 0110 0x00 xxxx xxxx xxxx xxx1 xxxx */
-	DECODE_REJECT	(0x0fb00010, 0x06000010),
-	/* ???			cccc 0110 0xxx xxxx xxxx xxxx 1011 xxxx */
-	DECODE_REJECT	(0x0f8000f0, 0x060000b0),
-	/* ???			cccc 0110 0xxx xxxx xxxx xxxx 1101 xxxx */
-	DECODE_REJECT	(0x0f8000f0, 0x060000d0),
-	/* SADD16		cccc 0110 0001 xxxx xxxx xxxx 0001 xxxx */
-	/* SADDSUBX		cccc 0110 0001 xxxx xxxx xxxx 0011 xxxx */
-	/* SSUBADDX		cccc 0110 0001 xxxx xxxx xxxx 0101 xxxx */
-	/* SSUB16		cccc 0110 0001 xxxx xxxx xxxx 0111 xxxx */
-	/* SADD8		cccc 0110 0001 xxxx xxxx xxxx 1001 xxxx */
-	/* SSUB8		cccc 0110 0001 xxxx xxxx xxxx 1111 xxxx */
-	/* QADD16		cccc 0110 0010 xxxx xxxx xxxx 0001 xxxx */
-	/* QADDSUBX		cccc 0110 0010 xxxx xxxx xxxx 0011 xxxx */
-	/* QSUBADDX		cccc 0110 0010 xxxx xxxx xxxx 0101 xxxx */
-	/* QSUB16		cccc 0110 0010 xxxx xxxx xxxx 0111 xxxx */
-	/* QADD8		cccc 0110 0010 xxxx xxxx xxxx 1001 xxxx */
-	/* QSUB8		cccc 0110 0010 xxxx xxxx xxxx 1111 xxxx */
-	/* SHADD16		cccc 0110 0011 xxxx xxxx xxxx 0001 xxxx */
-	/* SHADDSUBX		cccc 0110 0011 xxxx xxxx xxxx 0011 xxxx */
-	/* SHSUBADDX		cccc 0110 0011 xxxx xxxx xxxx 0101 xxxx */
-	/* SHSUB16		cccc 0110 0011 xxxx xxxx xxxx 0111 xxxx */
-	/* SHADD8		cccc 0110 0011 xxxx xxxx xxxx 1001 xxxx */
-	/* SHSUB8		cccc 0110 0011 xxxx xxxx xxxx 1111 xxxx */
-	/* UADD16		cccc 0110 0101 xxxx xxxx xxxx 0001 xxxx */
-	/* UADDSUBX		cccc 0110 0101 xxxx xxxx xxxx 0011 xxxx */
-	/* USUBADDX		cccc 0110 0101 xxxx xxxx xxxx 0101 xxxx */
-	/* USUB16		cccc 0110 0101 xxxx xxxx xxxx 0111 xxxx */
-	/* UADD8		cccc 0110 0101 xxxx xxxx xxxx 1001 xxxx */
-	/* USUB8		cccc 0110 0101 xxxx xxxx xxxx 1111 xxxx */
-	/* UQADD16		cccc 0110 0110 xxxx xxxx xxxx 0001 xxxx */
-	/* UQADDSUBX		cccc 0110 0110 xxxx xxxx xxxx 0011 xxxx */
-	/* UQSUBADDX		cccc 0110 0110 xxxx xxxx xxxx 0101 xxxx */
-	/* UQSUB16		cccc 0110 0110 xxxx xxxx xxxx 0111 xxxx */
-	/* UQADD8		cccc 0110 0110 xxxx xxxx xxxx 1001 xxxx */
-	/* UQSUB8		cccc 0110 0110 xxxx xxxx xxxx 1111 xxxx */
-	/* UHADD16		cccc 0110 0111 xxxx xxxx xxxx 0001 xxxx */
-	/* UHADDSUBX		cccc 0110 0111 xxxx xxxx xxxx 0011 xxxx */
-	/* UHSUBADDX		cccc 0110 0111 xxxx xxxx xxxx 0101 xxxx */
-	/* UHSUB16		cccc 0110 0111 xxxx xxxx xxxx 0111 xxxx */
-	/* UHADD8		cccc 0110 0111 xxxx xxxx xxxx 1001 xxxx */
-	/* UHSUB8		cccc 0110 0111 xxxx xxxx xxxx 1111 xxxx */
-	DECODE_EMULATEX	(0x0f800010, 0x06000010, emulate_rd12rn16rm0_rwflags_nopc,
-						 REGS(NOPC, NOPC, 0, 0, NOPC)),
-
-	/* PKHBT		cccc 0110 1000 xxxx xxxx xxxx x001 xxxx */
-	/* PKHTB		cccc 0110 1000 xxxx xxxx xxxx x101 xxxx */
-	DECODE_EMULATEX	(0x0ff00030, 0x06800010, emulate_rd12rn16rm0_rwflags_nopc,
-						 REGS(NOPC, NOPC, 0, 0, NOPC)),
-
-	/* ???			cccc 0110 1001 xxxx xxxx xxxx 0111 xxxx */
-	/* ???			cccc 0110 1101 xxxx xxxx xxxx 0111 xxxx */
-	DECODE_REJECT	(0x0fb000f0, 0x06900070),
-
-	/* SXTB16		cccc 0110 1000 1111 xxxx xxxx 0111 xxxx */
-	/* SXTB			cccc 0110 1010 1111 xxxx xxxx 0111 xxxx */
-	/* SXTH			cccc 0110 1011 1111 xxxx xxxx 0111 xxxx */
-	/* UXTB16		cccc 0110 1100 1111 xxxx xxxx 0111 xxxx */
-	/* UXTB			cccc 0110 1110 1111 xxxx xxxx 0111 xxxx */
-	/* UXTH			cccc 0110 1111 1111 xxxx xxxx 0111 xxxx */
-	DECODE_EMULATEX	(0x0f8f00f0, 0x068f0070, emulate_rd12rm0_noflags_nopc,
-						 REGS(0, NOPC, 0, 0, NOPC)),
-
-	/* SXTAB16		cccc 0110 1000 xxxx xxxx xxxx 0111 xxxx */
-	/* SXTAB		cccc 0110 1010 xxxx xxxx xxxx 0111 xxxx */
-	/* SXTAH		cccc 0110 1011 xxxx xxxx xxxx 0111 xxxx */
-	/* UXTAB16		cccc 0110 1100 xxxx xxxx xxxx 0111 xxxx */
-	/* UXTAB		cccc 0110 1110 xxxx xxxx xxxx 0111 xxxx */
-	/* UXTAH		cccc 0110 1111 xxxx xxxx xxxx 0111 xxxx */
-	DECODE_EMULATEX	(0x0f8000f0, 0x06800070, emulate_rd12rn16rm0_rwflags_nopc,
-						 REGS(NOPCX, NOPC, 0, 0, NOPC)),
-
-	DECODE_END
-};
-
-static const union decode_item arm_cccc_0111_____xxx1_table[] = {
-	/* Media instructions						*/
-
-	/* UNDEFINED		cccc 0111 1111 xxxx xxxx xxxx 1111 xxxx */
-	DECODE_REJECT	(0x0ff000f0, 0x07f000f0),
-
-	/* SMLALD		cccc 0111 0100 xxxx xxxx xxxx 00x1 xxxx */
-	/* SMLSLD		cccc 0111 0100 xxxx xxxx xxxx 01x1 xxxx */
-	DECODE_EMULATEX	(0x0ff00090, 0x07400010, emulate_rdlo12rdhi16rn0rm8_rwflags_nopc,
-						 REGS(NOPC, NOPC, NOPC, 0, NOPC)),
-
-	/* SMUAD		cccc 0111 0000 xxxx 1111 xxxx 00x1 xxxx */
-	/* SMUSD		cccc 0111 0000 xxxx 1111 xxxx 01x1 xxxx */
-	DECODE_OR	(0x0ff0f090, 0x0700f010),
-	/* SMMUL		cccc 0111 0101 xxxx 1111 xxxx 00x1 xxxx */
-	DECODE_OR	(0x0ff0f0d0, 0x0750f010),
-	/* USAD8		cccc 0111 1000 xxxx 1111 xxxx 0001 xxxx */
-	DECODE_EMULATEX	(0x0ff0f0f0, 0x0780f010, emulate_rd16rn12rm0rs8_rwflags_nopc,
-						 REGS(NOPC, 0, NOPC, 0, NOPC)),
-
-	/* SMLAD		cccc 0111 0000 xxxx xxxx xxxx 00x1 xxxx */
-	/* SMLSD		cccc 0111 0000 xxxx xxxx xxxx 01x1 xxxx */
-	DECODE_OR	(0x0ff00090, 0x07000010),
-	/* SMMLA		cccc 0111 0101 xxxx xxxx xxxx 00x1 xxxx */
-	DECODE_OR	(0x0ff000d0, 0x07500010),
-	/* USADA8		cccc 0111 1000 xxxx xxxx xxxx 0001 xxxx */
-	DECODE_EMULATEX	(0x0ff000f0, 0x07800010, emulate_rd16rn12rm0rs8_rwflags_nopc,
-						 REGS(NOPC, NOPCX, NOPC, 0, NOPC)),
-
-	/* SMMLS		cccc 0111 0101 xxxx xxxx xxxx 11x1 xxxx */
-	DECODE_EMULATEX	(0x0ff000d0, 0x075000d0, emulate_rd16rn12rm0rs8_rwflags_nopc,
-						 REGS(NOPC, NOPC, NOPC, 0, NOPC)),
-
-	/* SBFX			cccc 0111 101x xxxx xxxx xxxx x101 xxxx */
-	/* UBFX			cccc 0111 111x xxxx xxxx xxxx x101 xxxx */
-	DECODE_EMULATEX	(0x0fa00070, 0x07a00050, emulate_rd12rm0_noflags_nopc,
-						 REGS(0, NOPC, 0, 0, NOPC)),
-
-	/* BFC			cccc 0111 110x xxxx xxxx xxxx x001 1111 */
-	DECODE_EMULATEX	(0x0fe0007f, 0x07c0001f, emulate_rd12rm0_noflags_nopc,
-						 REGS(0, NOPC, 0, 0, 0)),
-
-	/* BFI			cccc 0111 110x xxxx xxxx xxxx x001 xxxx */
-	DECODE_EMULATEX	(0x0fe00070, 0x07c00010, emulate_rd12rm0_noflags_nopc,
-						 REGS(0, NOPC, 0, 0, NOPCX)),
-
-	DECODE_END
-};
-
-static const union decode_item arm_cccc_01xx_table[] = {
-	/* Load/store word and unsigned byte				*/
-
-	/* LDRB/STRB pc,[...]	cccc 01xx x0xx xxxx xxxx xxxx xxxx xxxx */
-	DECODE_REJECT	(0x0c40f000, 0x0440f000),
-
-	/* STRT			cccc 01x0 x010 xxxx xxxx xxxx xxxx xxxx */
-	/* LDRT			cccc 01x0 x011 xxxx xxxx xxxx xxxx xxxx */
-	/* STRBT		cccc 01x0 x110 xxxx xxxx xxxx xxxx xxxx */
-	/* LDRBT		cccc 01x0 x111 xxxx xxxx xxxx xxxx xxxx */
-	DECODE_REJECT	(0x0d200000, 0x04200000),
-
-	/* STR (immediate)	cccc 010x x0x0 xxxx xxxx xxxx xxxx xxxx */
-	/* STRB (immediate)	cccc 010x x1x0 xxxx xxxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0x0e100000, 0x04000000, emulate_str,
-						 REGS(NOPCWB, ANY, 0, 0, 0)),
-
-	/* LDR (immediate)	cccc 010x x0x1 xxxx xxxx xxxx xxxx xxxx */
-	/* LDRB (immediate)	cccc 010x x1x1 xxxx xxxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0x0e100000, 0x04100000, emulate_ldr,
-						 REGS(NOPCWB, ANY, 0, 0, 0)),
-
-	/* STR (register)	cccc 011x x0x0 xxxx xxxx xxxx xxxx xxxx */
-	/* STRB (register)	cccc 011x x1x0 xxxx xxxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0x0e100000, 0x06000000, emulate_str,
-						 REGS(NOPCWB, ANY, 0, 0, NOPC)),
-
-	/* LDR (register)	cccc 011x x0x1 xxxx xxxx xxxx xxxx xxxx */
-	/* LDRB (register)	cccc 011x x1x1 xxxx xxxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0x0e100000, 0x06100000, emulate_ldr,
-						 REGS(NOPCWB, ANY, 0, 0, NOPC)),
-
-	DECODE_END
-};
-
-static const union decode_item arm_cccc_100x_table[] = {
-	/* Block data transfer instructions				*/
-
-	/* LDM			cccc 100x x0x1 xxxx xxxx xxxx xxxx xxxx */
-	/* STM			cccc 100x x0x0 xxxx xxxx xxxx xxxx xxxx */
-	DECODE_CUSTOM	(0x0e400000, 0x08000000, kprobe_decode_ldmstm),
-
-	/* STM (user registers)	cccc 100x x1x0 xxxx xxxx xxxx xxxx xxxx */
-	/* LDM (user registers)	cccc 100x x1x1 xxxx 0xxx xxxx xxxx xxxx */
-	/* LDM (exception ret)	cccc 100x x1x1 xxxx 1xxx xxxx xxxx xxxx */
-	DECODE_END
-};
-
-const union decode_item kprobe_decode_arm_table[] = {
-	/*
-	 * Unconditional instructions
-	 *			1111 xxxx xxxx xxxx xxxx xxxx xxxx xxxx
-	 */
-	DECODE_TABLE	(0xf0000000, 0xf0000000, arm_1111_table),
-
-	/*
-	 * Miscellaneous instructions
-	 *			cccc 0001 0xx0 xxxx xxxx xxxx 0xxx xxxx
-	 */
-	DECODE_TABLE	(0x0f900080, 0x01000000, arm_cccc_0001_0xx0____0xxx_table),
-
-	/*
-	 * Halfword multiply and multiply-accumulate
-	 *			cccc 0001 0xx0 xxxx xxxx xxxx 1xx0 xxxx
-	 */
-	DECODE_TABLE	(0x0f900090, 0x01000080, arm_cccc_0001_0xx0____1xx0_table),
-
-	/*
-	 * Multiply and multiply-accumulate
-	 *			cccc 0000 xxxx xxxx xxxx xxxx 1001 xxxx
-	 */
-	DECODE_TABLE	(0x0f0000f0, 0x00000090, arm_cccc_0000_____1001_table),
-
-	/*
-	 * Synchronization primitives
-	 *			cccc 0001 xxxx xxxx xxxx xxxx 1001 xxxx
-	 */
-	DECODE_TABLE	(0x0f0000f0, 0x01000090, arm_cccc_0001_____1001_table),
-
-	/*
-	 * Extra load/store instructions
-	 *			cccc 000x xxxx xxxx xxxx xxxx 1xx1 xxxx
-	 */
-	DECODE_TABLE	(0x0e000090, 0x00000090, arm_cccc_000x_____1xx1_table),
-
-	/*
-	 * Data-processing (register)
-	 *			cccc 000x xxxx xxxx xxxx xxxx xxx0 xxxx
-	 * Data-processing (register-shifted register)
-	 *			cccc 000x xxxx xxxx xxxx xxxx 0xx1 xxxx
-	 */
-	DECODE_TABLE	(0x0e000000, 0x00000000, arm_cccc_000x_table),
-
-	/*
-	 * Data-processing (immediate)
-	 *			cccc 001x xxxx xxxx xxxx xxxx xxxx xxxx
-	 */
-	DECODE_TABLE	(0x0e000000, 0x02000000, arm_cccc_001x_table),
-
-	/*
-	 * Media instructions
-	 *			cccc 011x xxxx xxxx xxxx xxxx xxx1 xxxx
-	 */
-	DECODE_TABLE	(0x0f000010, 0x06000010, arm_cccc_0110_____xxx1_table),
-	DECODE_TABLE	(0x0f000010, 0x07000010, arm_cccc_0111_____xxx1_table),
-
-	/*
-	 * Load/store word and unsigned byte
-	 *			cccc 01xx xxxx xxxx xxxx xxxx xxxx xxxx
-	 */
-	DECODE_TABLE	(0x0c000000, 0x04000000, arm_cccc_01xx_table),
-
-	/*
-	 * Block data transfer instructions
-	 *			cccc 100x xxxx xxxx xxxx xxxx xxxx xxxx
-	 */
-	DECODE_TABLE	(0x0e000000, 0x08000000, arm_cccc_100x_table),
-
-	/* B			cccc 1010 xxxx xxxx xxxx xxxx xxxx xxxx */
-	/* BL			cccc 1011 xxxx xxxx xxxx xxxx xxxx xxxx */
-	DECODE_SIMULATE	(0x0e000000, 0x0a000000, simulate_bbl),
-
-	/*
-	 * Supervisor Call, and coprocessor instructions
-	 */
-
-	/* MCRR			cccc 1100 0100 xxxx xxxx xxxx xxxx xxxx */
-	/* MRRC			cccc 1100 0101 xxxx xxxx xxxx xxxx xxxx */
-	/* LDC			cccc 110x xxx1 xxxx xxxx xxxx xxxx xxxx */
-	/* STC			cccc 110x xxx0 xxxx xxxx xxxx xxxx xxxx */
-	/* CDP			cccc 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */
-	/* MCR			cccc 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */
-	/* MRC			cccc 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */
-	/* SVC			cccc 1111 xxxx xxxx xxxx xxxx xxxx xxxx */
-	DECODE_REJECT	(0x0c000000, 0x0c000000),
-
-	DECODE_END
-};
-#ifdef CONFIG_ARM_KPROBES_TEST_MODULE
-EXPORT_SYMBOL_GPL(kprobe_decode_arm_table);
-#endif
-
-static void __kprobes arm_singlestep(struct kprobe *p, struct pt_regs *regs)
-{
-	regs->ARM_pc += 4;
-	p->ainsn.insn_handler(p, regs);
-}
-
-/* Return:
- *   INSN_REJECTED     If instruction is one not allowed to kprobe,
- *   INSN_GOOD         If instruction is supported and uses instruction slot,
- *   INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot.
- *
- * For instructions we don't want to kprobe (INSN_REJECTED return result):
- *   These are generally ones that modify the processor state making
- *   them "hard" to simulate such as switches processor modes or
- *   make accesses in alternate modes.  Any of these could be simulated
- *   if the work was put into it, but low return considering they
- *   should also be very rare.
- */
-enum kprobe_insn __kprobes
-arm_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
-{
-	asi->insn_singlestep = arm_singlestep;
-	asi->insn_check_cc = kprobe_condition_checks[insn>>28];
-	return kprobe_decode_insn(insn, asi, kprobe_decode_arm_table, false);
-}
diff --git a/arch/arm/kernel/kprobes-common.c b/arch/arm/kernel/kprobes-common.c
index 18a76282970e..c311ed94ff1c 100644
--- a/arch/arm/kernel/kprobes-common.c
+++ b/arch/arm/kernel/kprobes-common.c
@@ -13,178 +13,14 @@
 
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
-#include <asm/system_info.h>
 
 #include "kprobes.h"
 
 
-#ifndef find_str_pc_offset
-
-/*
- * For STR and STM instructions, an ARM core may choose to use either
- * a +8 or a +12 displacement from the current instruction's address.
- * Whichever value is chosen for a given core, it must be the same for
- * both instructions and may not change.  This function measures it.
- */
-
-int str_pc_offset;
-
-void __init find_str_pc_offset(void)
-{
-	int addr, scratch, ret;
-
-	__asm__ (
-		"sub	%[ret], pc, #4		\n\t"
-		"str	pc, %[addr]		\n\t"
-		"ldr	%[scr], %[addr]		\n\t"
-		"sub	%[ret], %[scr], %[ret]	\n\t"
-		: [ret] "=r" (ret), [scr] "=r" (scratch), [addr] "+m" (addr));
-
-	str_pc_offset = ret;
-}
-
-#endif /* !find_str_pc_offset */
-
-
-#ifndef test_load_write_pc_interworking
-
-bool load_write_pc_interworks;
-
-void __init test_load_write_pc_interworking(void)
-{
-	int arch = cpu_architecture();
-	BUG_ON(arch == CPU_ARCH_UNKNOWN);
-	load_write_pc_interworks = arch >= CPU_ARCH_ARMv5T;
-}
-
-#endif /* !test_load_write_pc_interworking */
-
-
-#ifndef test_alu_write_pc_interworking
-
-bool alu_write_pc_interworks;
-
-void __init test_alu_write_pc_interworking(void)
-{
-	int arch = cpu_architecture();
-	BUG_ON(arch == CPU_ARCH_UNKNOWN);
-	alu_write_pc_interworks = arch >= CPU_ARCH_ARMv7;
-}
-
-#endif /* !test_alu_write_pc_interworking */
-
-
-void __init arm_kprobe_decode_init(void)
-{
-	find_str_pc_offset();
-	test_load_write_pc_interworking();
-	test_alu_write_pc_interworking();
-}
-
-
-static unsigned long __kprobes __check_eq(unsigned long cpsr)
-{
-	return cpsr & PSR_Z_BIT;
-}
-
-static unsigned long __kprobes __check_ne(unsigned long cpsr)
-{
-	return (~cpsr) & PSR_Z_BIT;
-}
-
-static unsigned long __kprobes __check_cs(unsigned long cpsr)
-{
-	return cpsr & PSR_C_BIT;
-}
-
-static unsigned long __kprobes __check_cc(unsigned long cpsr)
-{
-	return (~cpsr) & PSR_C_BIT;
-}
-
-static unsigned long __kprobes __check_mi(unsigned long cpsr)
-{
-	return cpsr & PSR_N_BIT;
-}
-
-static unsigned long __kprobes __check_pl(unsigned long cpsr)
-{
-	return (~cpsr) & PSR_N_BIT;
-}
-
-static unsigned long __kprobes __check_vs(unsigned long cpsr)
-{
-	return cpsr & PSR_V_BIT;
-}
-
-static unsigned long __kprobes __check_vc(unsigned long cpsr)
-{
-	return (~cpsr) & PSR_V_BIT;
-}
-
-static unsigned long __kprobes __check_hi(unsigned long cpsr)
-{
-	cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
-	return cpsr & PSR_C_BIT;
-}
-
-static unsigned long __kprobes __check_ls(unsigned long cpsr)
-{
-	cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
-	return (~cpsr) & PSR_C_BIT;
-}
-
-static unsigned long __kprobes __check_ge(unsigned long cpsr)
-{
-	cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
-	return (~cpsr) & PSR_N_BIT;
-}
-
-static unsigned long __kprobes __check_lt(unsigned long cpsr)
-{
-	cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
-	return cpsr & PSR_N_BIT;
-}
-
-static unsigned long __kprobes __check_gt(unsigned long cpsr)
-{
-	unsigned long temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
-	temp |= (cpsr << 1);			 /* PSR_N_BIT |= PSR_Z_BIT */
-	return (~temp) & PSR_N_BIT;
-}
-
-static unsigned long __kprobes __check_le(unsigned long cpsr)
-{
-	unsigned long temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
-	temp |= (cpsr << 1);			 /* PSR_N_BIT |= PSR_Z_BIT */
-	return temp & PSR_N_BIT;
-}
-
-static unsigned long __kprobes __check_al(unsigned long cpsr)
-{
-	return true;
-}
-
-kprobe_check_cc * const kprobe_condition_checks[16] = {
-	&__check_eq, &__check_ne, &__check_cs, &__check_cc,
-	&__check_mi, &__check_pl, &__check_vs, &__check_vc,
-	&__check_hi, &__check_ls, &__check_ge, &__check_lt,
-	&__check_gt, &__check_le, &__check_al, &__check_al
-};
-
-
-void __kprobes kprobe_simulate_nop(struct kprobe *p, struct pt_regs *regs)
-{
-}
-
-void __kprobes kprobe_emulate_none(struct kprobe *p, struct pt_regs *regs)
-{
-	p->ainsn.insn_fn();
-}
-
-static void __kprobes simulate_ldm1stm1(struct kprobe *p, struct pt_regs *regs)
+static void __kprobes simulate_ldm1stm1(probes_opcode_t insn,
+		struct arch_probes_insn *asi,
+		struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
 	int rn = (insn >> 16) & 0xf;
 	int lbit = insn & (1 << 20);
 	int wbit = insn & (1 << 21);
@@ -223,24 +59,31 @@ static void __kprobes simulate_ldm1stm1(struct kprobe *p, struct pt_regs *regs)
 	}
 }
 
-static void __kprobes simulate_stm1_pc(struct kprobe *p, struct pt_regs *regs)
+static void __kprobes simulate_stm1_pc(probes_opcode_t insn,
+	struct arch_probes_insn *asi,
+	struct pt_regs *regs)
 {
-	regs->ARM_pc = (long)p->addr + str_pc_offset;
-	simulate_ldm1stm1(p, regs);
-	regs->ARM_pc = (long)p->addr + 4;
+	unsigned long addr = regs->ARM_pc - 4;
+
+	regs->ARM_pc = (long)addr + str_pc_offset;
+	simulate_ldm1stm1(insn, asi, regs);
+	regs->ARM_pc = (long)addr + 4;
 }
 
-static void __kprobes simulate_ldm1_pc(struct kprobe *p, struct pt_regs *regs)
+static void __kprobes simulate_ldm1_pc(probes_opcode_t insn,
+	struct arch_probes_insn *asi,
+	struct pt_regs *regs)
 {
-	simulate_ldm1stm1(p, regs);
+	simulate_ldm1stm1(insn, asi, regs);
 	load_write_pc(regs->ARM_pc, regs);
 }
 
 static void __kprobes
-emulate_generic_r0_12_noflags(struct kprobe *p, struct pt_regs *regs)
+emulate_generic_r0_12_noflags(probes_opcode_t insn,
+	struct arch_probes_insn *asi, struct pt_regs *regs)
 {
 	register void *rregs asm("r1") = regs;
-	register void *rfn asm("lr") = p->ainsn.insn_fn;
+	register void *rfn asm("lr") = asi->insn_fn;
 
 	__asm__ __volatile__ (
 		"stmdb	sp!, {%[regs], r11}	\n\t"
@@ -264,22 +107,27 @@ emulate_generic_r0_12_noflags(struct kprobe *p, struct pt_regs *regs)
 }
 
 static void __kprobes
-emulate_generic_r2_14_noflags(struct kprobe *p, struct pt_regs *regs)
+emulate_generic_r2_14_noflags(probes_opcode_t insn,
+	struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	emulate_generic_r0_12_noflags(p, (struct pt_regs *)(regs->uregs+2));
+	emulate_generic_r0_12_noflags(insn, asi,
+		(struct pt_regs *)(regs->uregs+2));
 }
 
 static void __kprobes
-emulate_ldm_r3_15(struct kprobe *p, struct pt_regs *regs)
+emulate_ldm_r3_15(probes_opcode_t insn,
+	struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	emulate_generic_r0_12_noflags(p, (struct pt_regs *)(regs->uregs+3));
+	emulate_generic_r0_12_noflags(insn, asi,
+		(struct pt_regs *)(regs->uregs+3));
 	load_write_pc(regs->ARM_pc, regs);
 }
 
-enum kprobe_insn __kprobes
-kprobe_decode_ldmstm(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+enum probes_insn __kprobes
+kprobe_decode_ldmstm(probes_opcode_t insn, struct arch_probes_insn *asi,
+		const struct decode_header *h)
 {
-	kprobe_insn_handler_t *handler = 0;
+	probes_insn_handler_t *handler = 0;
 	unsigned reglist = insn & 0xffff;
 	int is_ldm = insn & 0x100000;
 	int rn = (insn >> 16) & 0xf;
@@ -319,260 +167,3 @@ kprobe_decode_ldmstm(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	return INSN_GOOD_NO_SLOT;
 }
 
-
-/*
- * Prepare an instruction slot to receive an instruction for emulating.
- * This is done by placing a subroutine return after the location where the
- * instruction will be placed. We also modify ARM instructions to be
- * unconditional as the condition code will already be checked before any
- * emulation handler is called.
- */
-static kprobe_opcode_t __kprobes
-prepare_emulated_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi,
-								bool thumb)
-{
-#ifdef CONFIG_THUMB2_KERNEL
-	if (thumb) {
-		u16 *thumb_insn = (u16 *)asi->insn;
-		thumb_insn[1] = 0x4770; /* Thumb bx lr */
-		thumb_insn[2] = 0x4770; /* Thumb bx lr */
-		return insn;
-	}
-	asi->insn[1] = 0xe12fff1e; /* ARM bx lr */
-#else
-	asi->insn[1] = 0xe1a0f00e; /* mov pc, lr */
-#endif
-	/* Make an ARM instruction unconditional */
-	if (insn < 0xe0000000)
-		insn = (insn | 0xe0000000) & ~0x10000000;
-	return insn;
-}
-
-/*
- * Write a (probably modified) instruction into the slot previously prepared by
- * prepare_emulated_insn
- */
-static void  __kprobes
-set_emulated_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi,
-								bool thumb)
-{
-#ifdef CONFIG_THUMB2_KERNEL
-	if (thumb) {
-		u16 *ip = (u16 *)asi->insn;
-		if (is_wide_instruction(insn))
-			*ip++ = insn >> 16;
-		*ip++ = insn;
-		return;
-	}
-#endif
-	asi->insn[0] = insn;
-}
-
-/*
- * When we modify the register numbers encoded in an instruction to be emulated,
- * the new values come from this define. For ARM and 32-bit Thumb instructions
- * this gives...
- *
- *	bit position	  16  12   8   4   0
- *	---------------+---+---+---+---+---+
- *	register	 r2  r0  r1  --  r3
- */
-#define INSN_NEW_BITS		0x00020103
-
-/* Each nibble has same value as that at INSN_NEW_BITS bit 16 */
-#define INSN_SAMEAS16_BITS	0x22222222
-
-/*
- * Validate and modify each of the registers encoded in an instruction.
- *
- * Each nibble in regs contains a value from enum decode_reg_type. For each
- * non-zero value, the corresponding nibble in pinsn is validated and modified
- * according to the type.
- */
-static bool __kprobes decode_regs(kprobe_opcode_t* pinsn, u32 regs)
-{
-	kprobe_opcode_t insn = *pinsn;
-	kprobe_opcode_t mask = 0xf; /* Start at least significant nibble */
-
-	for (; regs != 0; regs >>= 4, mask <<= 4) {
-
-		kprobe_opcode_t new_bits = INSN_NEW_BITS;
-
-		switch (regs & 0xf) {
-
-		case REG_TYPE_NONE:
-			/* Nibble not a register, skip to next */
-			continue;
-
-		case REG_TYPE_ANY:
-			/* Any register is allowed */
-			break;
-
-		case REG_TYPE_SAMEAS16:
-			/* Replace register with same as at bit position 16 */
-			new_bits = INSN_SAMEAS16_BITS;
-			break;
-
-		case REG_TYPE_SP:
-			/* Only allow SP (R13) */
-			if ((insn ^ 0xdddddddd) & mask)
-				goto reject;
-			break;
-
-		case REG_TYPE_PC:
-			/* Only allow PC (R15) */
-			if ((insn ^ 0xffffffff) & mask)
-				goto reject;
-			break;
-
-		case REG_TYPE_NOSP:
-			/* Reject SP (R13) */
-			if (((insn ^ 0xdddddddd) & mask) == 0)
-				goto reject;
-			break;
-
-		case REG_TYPE_NOSPPC:
-		case REG_TYPE_NOSPPCX:
-			/* Reject SP and PC (R13 and R15) */
-			if (((insn ^ 0xdddddddd) & 0xdddddddd & mask) == 0)
-				goto reject;
-			break;
-
-		case REG_TYPE_NOPCWB:
-			if (!is_writeback(insn))
-				break; /* No writeback, so any register is OK */
-			/* fall through... */
-		case REG_TYPE_NOPC:
-		case REG_TYPE_NOPCX:
-			/* Reject PC (R15) */
-			if (((insn ^ 0xffffffff) & mask) == 0)
-				goto reject;
-			break;
-		}
-
-		/* Replace value of nibble with new register number... */
-		insn &= ~mask;
-		insn |= new_bits & mask;
-	}
-
-	*pinsn = insn;
-	return true;
-
-reject:
-	return false;
-}
-
-static const int decode_struct_sizes[NUM_DECODE_TYPES] = {
-	[DECODE_TYPE_TABLE]	= sizeof(struct decode_table),
-	[DECODE_TYPE_CUSTOM]	= sizeof(struct decode_custom),
-	[DECODE_TYPE_SIMULATE]	= sizeof(struct decode_simulate),
-	[DECODE_TYPE_EMULATE]	= sizeof(struct decode_emulate),
-	[DECODE_TYPE_OR]	= sizeof(struct decode_or),
-	[DECODE_TYPE_REJECT]	= sizeof(struct decode_reject)
-};
-
-/*
- * kprobe_decode_insn operates on data tables in order to decode an ARM
- * architecture instruction onto which a kprobe has been placed.
- *
- * These instruction decoding tables are a concatenation of entries each
- * of which consist of one of the following structs:
- *
- *	decode_table
- *	decode_custom
- *	decode_simulate
- *	decode_emulate
- *	decode_or
- *	decode_reject
- *
- * Each of these starts with a struct decode_header which has the following
- * fields:
- *
- *	type_regs
- *	mask
- *	value
- *
- * The least significant DECODE_TYPE_BITS of type_regs contains a value
- * from enum decode_type, this indicates which of the decode_* structs
- * the entry contains. The value DECODE_TYPE_END indicates the end of the
- * table.
- *
- * When the table is parsed, each entry is checked in turn to see if it
- * matches the instruction to be decoded using the test:
- *
- *	(insn & mask) == value
- *
- * If no match is found before the end of the table is reached then decoding
- * fails with INSN_REJECTED.
- *
- * When a match is found, decode_regs() is called to validate and modify each
- * of the registers encoded in the instruction; the data it uses to do this
- * is (type_regs >> DECODE_TYPE_BITS). A validation failure will cause decoding
- * to fail with INSN_REJECTED.
- *
- * Once the instruction has passed the above tests, further processing
- * depends on the type of the table entry's decode struct.
- *
- */
-int __kprobes
-kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi,
-				const union decode_item *table, bool thumb)
-{
-	const struct decode_header *h = (struct decode_header *)table;
-	const struct decode_header *next;
-	bool matched = false;
-
-	insn = prepare_emulated_insn(insn, asi, thumb);
-
-	for (;; h = next) {
-		enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK;
-		u32 regs = h->type_regs.bits >> DECODE_TYPE_BITS;
-
-		if (type == DECODE_TYPE_END)
-			return INSN_REJECTED;
-
-		next = (struct decode_header *)
-				((uintptr_t)h + decode_struct_sizes[type]);
-
-		if (!matched && (insn & h->mask.bits) != h->value.bits)
-			continue;
-
-		if (!decode_regs(&insn, regs))
-			return INSN_REJECTED;
-
-		switch (type) {
-
-		case DECODE_TYPE_TABLE: {
-			struct decode_table *d = (struct decode_table *)h;
-			next = (struct decode_header *)d->table.table;
-			break;
-		}
-
-		case DECODE_TYPE_CUSTOM: {
-			struct decode_custom *d = (struct decode_custom *)h;
-			return (*d->decoder.decoder)(insn, asi);
-		}
-
-		case DECODE_TYPE_SIMULATE: {
-			struct decode_simulate *d = (struct decode_simulate *)h;
-			asi->insn_handler = d->handler.handler;
-			return INSN_GOOD_NO_SLOT;
-		}
-
-		case DECODE_TYPE_EMULATE: {
-			struct decode_emulate *d = (struct decode_emulate *)h;
-			asi->insn_handler = d->handler.handler;
-			set_emulated_insn(insn, asi, thumb);
-			return INSN_GOOD;
-		}
-
-		case DECODE_TYPE_OR:
-			matched = true;
-			break;
-
-		case DECODE_TYPE_REJECT:
-		default:
-			return INSN_REJECTED;
-		}
-		}
-	}
diff --git a/arch/arm/kernel/kprobes-test-arm.c b/arch/arm/kernel/kprobes-test-arm.c
index 839312905067..87839de77e5f 100644
--- a/arch/arm/kernel/kprobes-test-arm.c
+++ b/arch/arm/kernel/kprobes-test-arm.c
@@ -10,6 +10,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <asm/system_info.h>
 
 #include "kprobes-test.h"
 
diff --git a/arch/arm/kernel/kprobes-test.c b/arch/arm/kernel/kprobes-test.c
index 0cd63d080c7b..c2fd06b4c389 100644
--- a/arch/arm/kernel/kprobes-test.c
+++ b/arch/arm/kernel/kprobes-test.c
@@ -201,10 +201,14 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/kprobes.h>
-
+#include <linux/errno.h>
+#include <linux/stddef.h>
+#include <linux/bug.h>
 #include <asm/opcodes.h>
 
 #include "kprobes.h"
+#include "probes-arm.h"
+#include "probes-thumb.h"
 #include "kprobes-test.h"
 
 
@@ -1608,7 +1612,7 @@ static int __init run_all_tests(void)
 		goto out;
 
 	pr_info("ARM instruction simulation\n");
-	ret = run_test_cases(kprobe_arm_test_cases, kprobe_decode_arm_table);
+	ret = run_test_cases(kprobe_arm_test_cases, probes_decode_arm_table);
 	if (ret)
 		goto out;
 
@@ -1631,13 +1635,13 @@ static int __init run_all_tests(void)
 
 	pr_info("16-bit Thumb instruction simulation\n");
 	ret = run_test_cases(kprobe_thumb16_test_cases,
-				kprobe_decode_thumb16_table);
+				probes_decode_thumb16_table);
 	if (ret)
 		goto out;
 
 	pr_info("32-bit Thumb instruction simulation\n");
 	ret = run_test_cases(kprobe_thumb32_test_cases,
-				kprobe_decode_thumb32_table);
+				probes_decode_thumb32_table);
 	if (ret)
 		goto out;
 #endif
diff --git a/arch/arm/kernel/kprobes-thumb.c b/arch/arm/kernel/kprobes-thumb.c
index 6123daf397a7..6619188619ae 100644
--- a/arch/arm/kernel/kprobes-thumb.c
+++ b/arch/arm/kernel/kprobes-thumb.c
@@ -8,41 +8,25 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/ptrace.h>
 #include <linux/kprobes.h>
-#include <linux/module.h>
 
 #include "kprobes.h"
+#include "probes-thumb.h"
 
+/* These emulation encodings are functionally equivalent... */
+#define t32_emulate_rd8rn16rm0ra12_noflags \
+		t32_emulate_rdlo12rdhi8rn16rm0_noflags
 
-/*
- * True if current instruction is in an IT block.
- */
-#define in_it_block(cpsr)	((cpsr & 0x06000c00) != 0x00000000)
-
-/*
- * Return the condition code to check for the currently executing instruction.
- * This is in ITSTATE<7:4> which is in CPSR<15:12> but is only valid if
- * in_it_block returns true.
- */
-#define current_cond(cpsr)	((cpsr >> 12) & 0xf)
-
-/*
- * Return the PC value for a probe in thumb code.
- * This is the address of the probed instruction plus 4.
- * We subtract one because the address will have bit zero set to indicate
- * a pointer to thumb code.
- */
-static inline unsigned long __kprobes thumb_probe_pc(struct kprobe *p)
-{
-	return (unsigned long)p->addr - 1 + 4;
-}
+/* t32 thumb actions */
 
 static void __kprobes
-t32_simulate_table_branch(struct kprobe *p, struct pt_regs *regs)
+t32_simulate_table_branch(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
-	unsigned long pc = thumb_probe_pc(p);
+	unsigned long pc = regs->ARM_pc;
 	int rn = (insn >> 16) & 0xf;
 	int rm = insn & 0xf;
 
@@ -59,19 +43,19 @@ t32_simulate_table_branch(struct kprobe *p, struct pt_regs *regs)
 }
 
 static void __kprobes
-t32_simulate_mrs(struct kprobe *p, struct pt_regs *regs)
+t32_simulate_mrs(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
 	int rd = (insn >> 8) & 0xf;
 	unsigned long mask = 0xf8ff03df; /* Mask out execution state */
 	regs->uregs[rd] = regs->ARM_cpsr & mask;
 }
 
 static void __kprobes
-t32_simulate_cond_branch(struct kprobe *p, struct pt_regs *regs)
+t32_simulate_cond_branch(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
-	unsigned long pc = thumb_probe_pc(p);
+	unsigned long pc = regs->ARM_pc;
 
 	long offset = insn & 0x7ff;		/* imm11 */
 	offset += (insn & 0x003f0000) >> 5;	/* imm6 */
@@ -82,20 +66,21 @@ t32_simulate_cond_branch(struct kprobe *p, struct pt_regs *regs)
 	regs->ARM_pc = pc + (offset * 2);
 }
 
-static enum kprobe_insn __kprobes
-t32_decode_cond_branch(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+static enum probes_insn __kprobes
+t32_decode_cond_branch(probes_opcode_t insn, struct arch_probes_insn *asi,
+		const struct decode_header *d)
 {
 	int cc = (insn >> 22) & 0xf;
-	asi->insn_check_cc = kprobe_condition_checks[cc];
+	asi->insn_check_cc = probes_condition_checks[cc];
 	asi->insn_handler = t32_simulate_cond_branch;
 	return INSN_GOOD_NO_SLOT;
 }
 
 static void __kprobes
-t32_simulate_branch(struct kprobe *p, struct pt_regs *regs)
+t32_simulate_branch(probes_opcode_t insn,
+		    struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
-	unsigned long pc = thumb_probe_pc(p);
+	unsigned long pc = regs->ARM_pc;
 
 	long offset = insn & 0x7ff;		/* imm11 */
 	offset += (insn & 0x03ff0000) >> 5;	/* imm10 */
@@ -108,7 +93,7 @@ t32_simulate_branch(struct kprobe *p, struct pt_regs *regs)
 
 	if (insn & (1 << 14)) {
 		/* BL or BLX */
-		regs->ARM_lr = (unsigned long)p->addr + 4;
+		regs->ARM_lr = regs->ARM_pc | 1;
 		if (!(insn & (1 << 12))) {
 			/* BLX so switch to ARM mode */
 			regs->ARM_cpsr &= ~PSR_T_BIT;
@@ -120,10 +105,10 @@ t32_simulate_branch(struct kprobe *p, struct pt_regs *regs)
 }
 
 static void __kprobes
-t32_simulate_ldr_literal(struct kprobe *p, struct pt_regs *regs)
+t32_simulate_ldr_literal(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
-	unsigned long addr = thumb_probe_pc(p) & ~3;
+	unsigned long addr = regs->ARM_pc & ~3;
 	int rt = (insn >> 12) & 0xf;
 	unsigned long rtv;
 
@@ -157,10 +142,11 @@ t32_simulate_ldr_literal(struct kprobe *p, struct pt_regs *regs)
 	regs->uregs[rt] = rtv;
 }
 
-static enum kprobe_insn __kprobes
-t32_decode_ldmstm(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+static enum probes_insn __kprobes
+t32_decode_ldmstm(probes_opcode_t insn, struct arch_probes_insn *asi,
+		const struct decode_header *d)
 {
-	enum kprobe_insn ret = kprobe_decode_ldmstm(insn, asi);
+	enum probes_insn ret = kprobe_decode_ldmstm(insn, asi, d);
 
 	/* Fixup modified instruction to have halfwords in correct order...*/
 	insn = asi->insn[0];
@@ -171,10 +157,10 @@ t32_decode_ldmstm(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 }
 
 static void __kprobes
-t32_emulate_ldrdstrd(struct kprobe *p, struct pt_regs *regs)
+t32_emulate_ldrdstrd(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
-	unsigned long pc = thumb_probe_pc(p) & ~3;
+	unsigned long pc = regs->ARM_pc & ~3;
 	int rt1 = (insn >> 12) & 0xf;
 	int rt2 = (insn >> 8) & 0xf;
 	int rn = (insn >> 16) & 0xf;
@@ -187,7 +173,7 @@ t32_emulate_ldrdstrd(struct kprobe *p, struct pt_regs *regs)
 	__asm__ __volatile__ (
 		"blx    %[fn]"
 		: "=r" (rt1v), "=r" (rt2v), "=r" (rnv)
-		: "0" (rt1v), "1" (rt2v), "2" (rnv), [fn] "r" (p->ainsn.insn_fn)
+		: "0" (rt1v), "1" (rt2v), "2" (rnv), [fn] "r" (asi->insn_fn)
 		: "lr", "memory", "cc"
 	);
 
@@ -198,9 +184,9 @@ t32_emulate_ldrdstrd(struct kprobe *p, struct pt_regs *regs)
 }
 
 static void __kprobes
-t32_emulate_ldrstr(struct kprobe *p, struct pt_regs *regs)
+t32_emulate_ldrstr(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
 	int rt = (insn >> 12) & 0xf;
 	int rn = (insn >> 16) & 0xf;
 	int rm = insn & 0xf;
@@ -212,7 +198,7 @@ t32_emulate_ldrstr(struct kprobe *p, struct pt_regs *regs)
 	__asm__ __volatile__ (
 		"blx    %[fn]"
 		: "=r" (rtv), "=r" (rnv)
-		: "0" (rtv), "1" (rnv), "r" (rmv), [fn] "r" (p->ainsn.insn_fn)
+		: "0" (rtv), "1" (rnv), "r" (rmv), [fn] "r" (asi->insn_fn)
 		: "lr", "memory", "cc"
 	);
 
@@ -224,9 +210,9 @@ t32_emulate_ldrstr(struct kprobe *p, struct pt_regs *regs)
 }
 
 static void __kprobes
-t32_emulate_rd8rn16rm0_rwflags(struct kprobe *p, struct pt_regs *regs)
+t32_emulate_rd8rn16rm0_rwflags(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
 	int rd = (insn >> 8) & 0xf;
 	int rn = (insn >> 16) & 0xf;
 	int rm = insn & 0xf;
@@ -242,7 +228,7 @@ t32_emulate_rd8rn16rm0_rwflags(struct kprobe *p, struct pt_regs *regs)
 		"mrs	%[cpsr], cpsr		\n\t"
 		: "=r" (rdv), [cpsr] "=r" (cpsr)
 		: "0" (rdv), "r" (rnv), "r" (rmv),
-		  "1" (cpsr), [fn] "r" (p->ainsn.insn_fn)
+		  "1" (cpsr), [fn] "r" (asi->insn_fn)
 		: "lr", "memory", "cc"
 	);
 
@@ -251,10 +237,10 @@ t32_emulate_rd8rn16rm0_rwflags(struct kprobe *p, struct pt_regs *regs)
 }
 
 static void __kprobes
-t32_emulate_rd8pc16_noflags(struct kprobe *p, struct pt_regs *regs)
+t32_emulate_rd8pc16_noflags(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
-	unsigned long pc = thumb_probe_pc(p);
+	unsigned long pc = regs->ARM_pc;
 	int rd = (insn >> 8) & 0xf;
 
 	register unsigned long rdv asm("r1") = regs->uregs[rd];
@@ -263,7 +249,7 @@ t32_emulate_rd8pc16_noflags(struct kprobe *p, struct pt_regs *regs)
 	__asm__ __volatile__ (
 		"blx    %[fn]"
 		: "=r" (rdv)
-		: "0" (rdv), "r" (rnv), [fn] "r" (p->ainsn.insn_fn)
+		: "0" (rdv), "r" (rnv), [fn] "r" (asi->insn_fn)
 		: "lr", "memory", "cc"
 	);
 
@@ -271,9 +257,9 @@ t32_emulate_rd8pc16_noflags(struct kprobe *p, struct pt_regs *regs)
 }
 
 static void __kprobes
-t32_emulate_rd8rn16_noflags(struct kprobe *p, struct pt_regs *regs)
+t32_emulate_rd8rn16_noflags(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
 	int rd = (insn >> 8) & 0xf;
 	int rn = (insn >> 16) & 0xf;
 
@@ -283,7 +269,7 @@ t32_emulate_rd8rn16_noflags(struct kprobe *p, struct pt_regs *regs)
 	__asm__ __volatile__ (
 		"blx    %[fn]"
 		: "=r" (rdv)
-		: "0" (rdv), "r" (rnv), [fn] "r" (p->ainsn.insn_fn)
+		: "0" (rdv), "r" (rnv), [fn] "r" (asi->insn_fn)
 		: "lr", "memory", "cc"
 	);
 
@@ -291,9 +277,10 @@ t32_emulate_rd8rn16_noflags(struct kprobe *p, struct pt_regs *regs)
 }
 
 static void __kprobes
-t32_emulate_rdlo12rdhi8rn16rm0_noflags(struct kprobe *p, struct pt_regs *regs)
+t32_emulate_rdlo12rdhi8rn16rm0_noflags(probes_opcode_t insn,
+		struct arch_probes_insn *asi,
+		struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
 	int rdlo = (insn >> 12) & 0xf;
 	int rdhi = (insn >> 8) & 0xf;
 	int rn = (insn >> 16) & 0xf;
@@ -308,674 +295,43 @@ t32_emulate_rdlo12rdhi8rn16rm0_noflags(struct kprobe *p, struct pt_regs *regs)
 		"blx    %[fn]"
 		: "=r" (rdlov), "=r" (rdhiv)
 		: "0" (rdlov), "1" (rdhiv), "r" (rnv), "r" (rmv),
-		  [fn] "r" (p->ainsn.insn_fn)
+		  [fn] "r" (asi->insn_fn)
 		: "lr", "memory", "cc"
 	);
 
 	regs->uregs[rdlo] = rdlov;
 	regs->uregs[rdhi] = rdhiv;
 }
-
-/* These emulation encodings are functionally equivalent... */
-#define t32_emulate_rd8rn16rm0ra12_noflags \
-		t32_emulate_rdlo12rdhi8rn16rm0_noflags
-
-static const union decode_item t32_table_1110_100x_x0xx[] = {
-	/* Load/store multiple instructions */
-
-	/* Rn is PC		1110 100x x0xx 1111 xxxx xxxx xxxx xxxx */
-	DECODE_REJECT	(0xfe4f0000, 0xe80f0000),
-
-	/* SRS			1110 1000 00x0 xxxx xxxx xxxx xxxx xxxx */
-	/* RFE			1110 1000 00x1 xxxx xxxx xxxx xxxx xxxx */
-	DECODE_REJECT	(0xffc00000, 0xe8000000),
-	/* SRS			1110 1001 10x0 xxxx xxxx xxxx xxxx xxxx */
-	/* RFE			1110 1001 10x1 xxxx xxxx xxxx xxxx xxxx */
-	DECODE_REJECT	(0xffc00000, 0xe9800000),
-
-	/* STM Rn, {...pc}	1110 100x x0x0 xxxx 1xxx xxxx xxxx xxxx */
-	DECODE_REJECT	(0xfe508000, 0xe8008000),
-	/* LDM Rn, {...lr,pc}	1110 100x x0x1 xxxx 11xx xxxx xxxx xxxx */
-	DECODE_REJECT	(0xfe50c000, 0xe810c000),
-	/* LDM/STM Rn, {...sp}	1110 100x x0xx xxxx xx1x xxxx xxxx xxxx */
-	DECODE_REJECT	(0xfe402000, 0xe8002000),
-
-	/* STMIA		1110 1000 10x0 xxxx xxxx xxxx xxxx xxxx */
-	/* LDMIA		1110 1000 10x1 xxxx xxxx xxxx xxxx xxxx */
-	/* STMDB		1110 1001 00x0 xxxx xxxx xxxx xxxx xxxx */
-	/* LDMDB		1110 1001 00x1 xxxx xxxx xxxx xxxx xxxx */
-	DECODE_CUSTOM	(0xfe400000, 0xe8000000, t32_decode_ldmstm),
-
-	DECODE_END
-};
-
-static const union decode_item t32_table_1110_100x_x1xx[] = {
-	/* Load/store dual, load/store exclusive, table branch */
-
-	/* STRD (immediate)	1110 1000 x110 xxxx xxxx xxxx xxxx xxxx */
-	/* LDRD (immediate)	1110 1000 x111 xxxx xxxx xxxx xxxx xxxx */
-	DECODE_OR	(0xff600000, 0xe8600000),
-	/* STRD (immediate)	1110 1001 x1x0 xxxx xxxx xxxx xxxx xxxx */
-	/* LDRD (immediate)	1110 1001 x1x1 xxxx xxxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0xff400000, 0xe9400000, t32_emulate_ldrdstrd,
-						 REGS(NOPCWB, NOSPPC, NOSPPC, 0, 0)),
-
-	/* TBB			1110 1000 1101 xxxx xxxx xxxx 0000 xxxx */
-	/* TBH			1110 1000 1101 xxxx xxxx xxxx 0001 xxxx */
-	DECODE_SIMULATEX(0xfff000e0, 0xe8d00000, t32_simulate_table_branch,
-						 REGS(NOSP, 0, 0, 0, NOSPPC)),
-
-	/* STREX		1110 1000 0100 xxxx xxxx xxxx xxxx xxxx */
-	/* LDREX		1110 1000 0101 xxxx xxxx xxxx xxxx xxxx */
-	/* STREXB		1110 1000 1100 xxxx xxxx xxxx 0100 xxxx */
-	/* STREXH		1110 1000 1100 xxxx xxxx xxxx 0101 xxxx */
-	/* STREXD		1110 1000 1100 xxxx xxxx xxxx 0111 xxxx */
-	/* LDREXB		1110 1000 1101 xxxx xxxx xxxx 0100 xxxx */
-	/* LDREXH		1110 1000 1101 xxxx xxxx xxxx 0101 xxxx */
-	/* LDREXD		1110 1000 1101 xxxx xxxx xxxx 0111 xxxx */
-	/* And unallocated instructions...				*/
-	DECODE_END
-};
-
-static const union decode_item t32_table_1110_101x[] = {
-	/* Data-processing (shifted register)				*/
-
-	/* TST			1110 1010 0001 xxxx xxxx 1111 xxxx xxxx */
-	/* TEQ			1110 1010 1001 xxxx xxxx 1111 xxxx xxxx */
-	DECODE_EMULATEX	(0xff700f00, 0xea100f00, t32_emulate_rd8rn16rm0_rwflags,
-						 REGS(NOSPPC, 0, 0, 0, NOSPPC)),
-
-	/* CMN			1110 1011 0001 xxxx xxxx 1111 xxxx xxxx */
-	DECODE_OR	(0xfff00f00, 0xeb100f00),
-	/* CMP			1110 1011 1011 xxxx xxxx 1111 xxxx xxxx */
-	DECODE_EMULATEX	(0xfff00f00, 0xebb00f00, t32_emulate_rd8rn16rm0_rwflags,
-						 REGS(NOPC, 0, 0, 0, NOSPPC)),
-
-	/* MOV			1110 1010 010x 1111 xxxx xxxx xxxx xxxx */
-	/* MVN			1110 1010 011x 1111 xxxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0xffcf0000, 0xea4f0000, t32_emulate_rd8rn16rm0_rwflags,
-						 REGS(0, 0, NOSPPC, 0, NOSPPC)),
-
-	/* ???			1110 1010 101x xxxx xxxx xxxx xxxx xxxx */
-	/* ???			1110 1010 111x xxxx xxxx xxxx xxxx xxxx */
-	DECODE_REJECT	(0xffa00000, 0xeaa00000),
-	/* ???			1110 1011 001x xxxx xxxx xxxx xxxx xxxx */
-	DECODE_REJECT	(0xffe00000, 0xeb200000),
-	/* ???			1110 1011 100x xxxx xxxx xxxx xxxx xxxx */
-	DECODE_REJECT	(0xffe00000, 0xeb800000),
-	/* ???			1110 1011 111x xxxx xxxx xxxx xxxx xxxx */
-	DECODE_REJECT	(0xffe00000, 0xebe00000),
-
-	/* ADD/SUB SP, SP, Rm, LSL #0..3				*/
-	/*			1110 1011 x0xx 1101 x000 1101 xx00 xxxx */
-	DECODE_EMULATEX	(0xff4f7f30, 0xeb0d0d00, t32_emulate_rd8rn16rm0_rwflags,
-						 REGS(SP, 0, SP, 0, NOSPPC)),
-
-	/* ADD/SUB SP, SP, Rm, shift					*/
-	/*			1110 1011 x0xx 1101 xxxx 1101 xxxx xxxx */
-	DECODE_REJECT	(0xff4f0f00, 0xeb0d0d00),
-
-	/* ADD/SUB Rd, SP, Rm, shift					*/
-	/*			1110 1011 x0xx 1101 xxxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0xff4f0000, 0xeb0d0000, t32_emulate_rd8rn16rm0_rwflags,
-						 REGS(SP, 0, NOPC, 0, NOSPPC)),
-
-	/* AND			1110 1010 000x xxxx xxxx xxxx xxxx xxxx */
-	/* BIC			1110 1010 001x xxxx xxxx xxxx xxxx xxxx */
-	/* ORR			1110 1010 010x xxxx xxxx xxxx xxxx xxxx */
-	/* ORN			1110 1010 011x xxxx xxxx xxxx xxxx xxxx */
-	/* EOR			1110 1010 100x xxxx xxxx xxxx xxxx xxxx */
-	/* PKH			1110 1010 110x xxxx xxxx xxxx xxxx xxxx */
-	/* ADD			1110 1011 000x xxxx xxxx xxxx xxxx xxxx */
-	/* ADC			1110 1011 010x xxxx xxxx xxxx xxxx xxxx */
-	/* SBC			1110 1011 011x xxxx xxxx xxxx xxxx xxxx */
-	/* SUB			1110 1011 101x xxxx xxxx xxxx xxxx xxxx */
-	/* RSB			1110 1011 110x xxxx xxxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0xfe000000, 0xea000000, t32_emulate_rd8rn16rm0_rwflags,
-						 REGS(NOSPPC, 0, NOSPPC, 0, NOSPPC)),
-
-	DECODE_END
-};
-
-static const union decode_item t32_table_1111_0x0x___0[] = {
-	/* Data-processing (modified immediate)				*/
-
-	/* TST			1111 0x00 0001 xxxx 0xxx 1111 xxxx xxxx */
-	/* TEQ			1111 0x00 1001 xxxx 0xxx 1111 xxxx xxxx */
-	DECODE_EMULATEX	(0xfb708f00, 0xf0100f00, t32_emulate_rd8rn16rm0_rwflags,
-						 REGS(NOSPPC, 0, 0, 0, 0)),
-
-	/* CMN			1111 0x01 0001 xxxx 0xxx 1111 xxxx xxxx */
-	DECODE_OR	(0xfbf08f00, 0xf1100f00),
-	/* CMP			1111 0x01 1011 xxxx 0xxx 1111 xxxx xxxx */
-	DECODE_EMULATEX	(0xfbf08f00, 0xf1b00f00, t32_emulate_rd8rn16rm0_rwflags,
-						 REGS(NOPC, 0, 0, 0, 0)),
-
-	/* MOV			1111 0x00 010x 1111 0xxx xxxx xxxx xxxx */
-	/* MVN			1111 0x00 011x 1111 0xxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0xfbcf8000, 0xf04f0000, t32_emulate_rd8rn16rm0_rwflags,
-						 REGS(0, 0, NOSPPC, 0, 0)),
-
-	/* ???			1111 0x00 101x xxxx 0xxx xxxx xxxx xxxx */
-	DECODE_REJECT	(0xfbe08000, 0xf0a00000),
-	/* ???			1111 0x00 110x xxxx 0xxx xxxx xxxx xxxx */
-	/* ???			1111 0x00 111x xxxx 0xxx xxxx xxxx xxxx */
-	DECODE_REJECT	(0xfbc08000, 0xf0c00000),
-	/* ???			1111 0x01 001x xxxx 0xxx xxxx xxxx xxxx */
-	DECODE_REJECT	(0xfbe08000, 0xf1200000),
-	/* ???			1111 0x01 100x xxxx 0xxx xxxx xxxx xxxx */
-	DECODE_REJECT	(0xfbe08000, 0xf1800000),
-	/* ???			1111 0x01 111x xxxx 0xxx xxxx xxxx xxxx */
-	DECODE_REJECT	(0xfbe08000, 0xf1e00000),
-
-	/* ADD Rd, SP, #imm	1111 0x01 000x 1101 0xxx xxxx xxxx xxxx */
-	/* SUB Rd, SP, #imm	1111 0x01 101x 1101 0xxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0xfb4f8000, 0xf10d0000, t32_emulate_rd8rn16rm0_rwflags,
-						 REGS(SP, 0, NOPC, 0, 0)),
-
-	/* AND			1111 0x00 000x xxxx 0xxx xxxx xxxx xxxx */
-	/* BIC			1111 0x00 001x xxxx 0xxx xxxx xxxx xxxx */
-	/* ORR			1111 0x00 010x xxxx 0xxx xxxx xxxx xxxx */
-	/* ORN			1111 0x00 011x xxxx 0xxx xxxx xxxx xxxx */
-	/* EOR			1111 0x00 100x xxxx 0xxx xxxx xxxx xxxx */
-	/* ADD			1111 0x01 000x xxxx 0xxx xxxx xxxx xxxx */
-	/* ADC			1111 0x01 010x xxxx 0xxx xxxx xxxx xxxx */
-	/* SBC			1111 0x01 011x xxxx 0xxx xxxx xxxx xxxx */
-	/* SUB			1111 0x01 101x xxxx 0xxx xxxx xxxx xxxx */
-	/* RSB			1111 0x01 110x xxxx 0xxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0xfa008000, 0xf0000000, t32_emulate_rd8rn16rm0_rwflags,
-						 REGS(NOSPPC, 0, NOSPPC, 0, 0)),
-
-	DECODE_END
-};
-
-static const union decode_item t32_table_1111_0x1x___0[] = {
-	/* Data-processing (plain binary immediate)			*/
-
-	/* ADDW Rd, PC, #imm	1111 0x10 0000 1111 0xxx xxxx xxxx xxxx */
-	DECODE_OR	(0xfbff8000, 0xf20f0000),
-	/* SUBW	Rd, PC, #imm	1111 0x10 1010 1111 0xxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0xfbff8000, 0xf2af0000, t32_emulate_rd8pc16_noflags,
-						 REGS(PC, 0, NOSPPC, 0, 0)),
-
-	/* ADDW SP, SP, #imm	1111 0x10 0000 1101 0xxx 1101 xxxx xxxx */
-	DECODE_OR	(0xfbff8f00, 0xf20d0d00),
-	/* SUBW	SP, SP, #imm	1111 0x10 1010 1101 0xxx 1101 xxxx xxxx */
-	DECODE_EMULATEX	(0xfbff8f00, 0xf2ad0d00, t32_emulate_rd8rn16_noflags,
-						 REGS(SP, 0, SP, 0, 0)),
-
-	/* ADDW			1111 0x10 0000 xxxx 0xxx xxxx xxxx xxxx */
-	DECODE_OR	(0xfbf08000, 0xf2000000),
-	/* SUBW			1111 0x10 1010 xxxx 0xxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0xfbf08000, 0xf2a00000, t32_emulate_rd8rn16_noflags,
-						 REGS(NOPCX, 0, NOSPPC, 0, 0)),
-
-	/* MOVW			1111 0x10 0100 xxxx 0xxx xxxx xxxx xxxx */
-	/* MOVT			1111 0x10 1100 xxxx 0xxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0xfb708000, 0xf2400000, t32_emulate_rd8rn16_noflags,
-						 REGS(0, 0, NOSPPC, 0, 0)),
-
-	/* SSAT16		1111 0x11 0010 xxxx 0000 xxxx 00xx xxxx */
-	/* SSAT			1111 0x11 00x0 xxxx 0xxx xxxx xxxx xxxx */
-	/* USAT16		1111 0x11 1010 xxxx 0000 xxxx 00xx xxxx */
-	/* USAT			1111 0x11 10x0 xxxx 0xxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0xfb508000, 0xf3000000, t32_emulate_rd8rn16rm0_rwflags,
-						 REGS(NOSPPC, 0, NOSPPC, 0, 0)),
-
-	/* SFBX			1111 0x11 0100 xxxx 0xxx xxxx xxxx xxxx */
-	/* UFBX			1111 0x11 1100 xxxx 0xxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0xfb708000, 0xf3400000, t32_emulate_rd8rn16_noflags,
-						 REGS(NOSPPC, 0, NOSPPC, 0, 0)),
-
-	/* BFC			1111 0x11 0110 1111 0xxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0xfbff8000, 0xf36f0000, t32_emulate_rd8rn16_noflags,
-						 REGS(0, 0, NOSPPC, 0, 0)),
-
-	/* BFI			1111 0x11 0110 xxxx 0xxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0xfbf08000, 0xf3600000, t32_emulate_rd8rn16_noflags,
-						 REGS(NOSPPCX, 0, NOSPPC, 0, 0)),
-
-	DECODE_END
-};
-
-static const union decode_item t32_table_1111_0xxx___1[] = {
-	/* Branches and miscellaneous control				*/
-
-	/* YIELD		1111 0011 1010 xxxx 10x0 x000 0000 0001 */
-	DECODE_OR	(0xfff0d7ff, 0xf3a08001),
-	/* SEV			1111 0011 1010 xxxx 10x0 x000 0000 0100 */
-	DECODE_EMULATE	(0xfff0d7ff, 0xf3a08004, kprobe_emulate_none),
-	/* NOP			1111 0011 1010 xxxx 10x0 x000 0000 0000 */
-	/* WFE			1111 0011 1010 xxxx 10x0 x000 0000 0010 */
-	/* WFI			1111 0011 1010 xxxx 10x0 x000 0000 0011 */
-	DECODE_SIMULATE	(0xfff0d7fc, 0xf3a08000, kprobe_simulate_nop),
-
-	/* MRS Rd, CPSR		1111 0011 1110 xxxx 10x0 xxxx xxxx xxxx */
-	DECODE_SIMULATEX(0xfff0d000, 0xf3e08000, t32_simulate_mrs,
-						 REGS(0, 0, NOSPPC, 0, 0)),
-
-	/*
-	 * Unsupported instructions
-	 *			1111 0x11 1xxx xxxx 10x0 xxxx xxxx xxxx
-	 *
-	 * MSR			1111 0011 100x xxxx 10x0 xxxx xxxx xxxx
-	 * DBG hint		1111 0011 1010 xxxx 10x0 x000 1111 xxxx
-	 * Unallocated hints	1111 0011 1010 xxxx 10x0 x000 xxxx xxxx
-	 * CPS			1111 0011 1010 xxxx 10x0 xxxx xxxx xxxx
-	 * CLREX/DSB/DMB/ISB	1111 0011 1011 xxxx 10x0 xxxx xxxx xxxx
-	 * BXJ			1111 0011 1100 xxxx 10x0 xxxx xxxx xxxx
-	 * SUBS PC,LR,#<imm8>	1111 0011 1101 xxxx 10x0 xxxx xxxx xxxx
-	 * MRS Rd, SPSR		1111 0011 1111 xxxx 10x0 xxxx xxxx xxxx
-	 * SMC			1111 0111 1111 xxxx 1000 xxxx xxxx xxxx
-	 * UNDEFINED		1111 0111 1111 xxxx 1010 xxxx xxxx xxxx
-	 * ???			1111 0111 1xxx xxxx 1010 xxxx xxxx xxxx
-	 */
-	DECODE_REJECT	(0xfb80d000, 0xf3808000),
-
-	/* Bcc			1111 0xxx xxxx xxxx 10x0 xxxx xxxx xxxx */
-	DECODE_CUSTOM	(0xf800d000, 0xf0008000, t32_decode_cond_branch),
-
-	/* BLX			1111 0xxx xxxx xxxx 11x0 xxxx xxxx xxx0 */
-	DECODE_OR	(0xf800d001, 0xf000c000),
-	/* B			1111 0xxx xxxx xxxx 10x1 xxxx xxxx xxxx */
-	/* BL			1111 0xxx xxxx xxxx 11x1 xxxx xxxx xxxx */
-	DECODE_SIMULATE	(0xf8009000, 0xf0009000, t32_simulate_branch),
-
-	DECODE_END
-};
-
-static const union decode_item t32_table_1111_100x_x0x1__1111[] = {
-	/* Memory hints							*/
-
-	/* PLD (literal)	1111 1000 x001 1111 1111 xxxx xxxx xxxx */
-	/* PLI (literal)	1111 1001 x001 1111 1111 xxxx xxxx xxxx */
-	DECODE_SIMULATE	(0xfe7ff000, 0xf81ff000, kprobe_simulate_nop),
-
-	/* PLD{W} (immediate)	1111 1000 10x1 xxxx 1111 xxxx xxxx xxxx */
-	DECODE_OR	(0xffd0f000, 0xf890f000),
-	/* PLD{W} (immediate)	1111 1000 00x1 xxxx 1111 1100 xxxx xxxx */
-	DECODE_OR	(0xffd0ff00, 0xf810fc00),
-	/* PLI (immediate)	1111 1001 1001 xxxx 1111 xxxx xxxx xxxx */
-	DECODE_OR	(0xfff0f000, 0xf990f000),
-	/* PLI (immediate)	1111 1001 0001 xxxx 1111 1100 xxxx xxxx */
-	DECODE_SIMULATEX(0xfff0ff00, 0xf910fc00, kprobe_simulate_nop,
-						 REGS(NOPCX, 0, 0, 0, 0)),
-
-	/* PLD{W} (register)	1111 1000 00x1 xxxx 1111 0000 00xx xxxx */
-	DECODE_OR	(0xffd0ffc0, 0xf810f000),
-	/* PLI (register)	1111 1001 0001 xxxx 1111 0000 00xx xxxx */
-	DECODE_SIMULATEX(0xfff0ffc0, 0xf910f000, kprobe_simulate_nop,
-						 REGS(NOPCX, 0, 0, 0, NOSPPC)),
-
-	/* Other unallocated instructions...				*/
-	DECODE_END
-};
-
-static const union decode_item t32_table_1111_100x[] = {
-	/* Store/Load single data item					*/
-
-	/* ???			1111 100x x11x xxxx xxxx xxxx xxxx xxxx */
-	DECODE_REJECT	(0xfe600000, 0xf8600000),
-
-	/* ???			1111 1001 0101 xxxx xxxx xxxx xxxx xxxx */
-	DECODE_REJECT	(0xfff00000, 0xf9500000),
-
-	/* ???			1111 100x 0xxx xxxx xxxx 10x0 xxxx xxxx */
-	DECODE_REJECT	(0xfe800d00, 0xf8000800),
-
-	/* STRBT		1111 1000 0000 xxxx xxxx 1110 xxxx xxxx */
-	/* STRHT		1111 1000 0010 xxxx xxxx 1110 xxxx xxxx */
-	/* STRT			1111 1000 0100 xxxx xxxx 1110 xxxx xxxx */
-	/* LDRBT		1111 1000 0001 xxxx xxxx 1110 xxxx xxxx */
-	/* LDRSBT		1111 1001 0001 xxxx xxxx 1110 xxxx xxxx */
-	/* LDRHT		1111 1000 0011 xxxx xxxx 1110 xxxx xxxx */
-	/* LDRSHT		1111 1001 0011 xxxx xxxx 1110 xxxx xxxx */
-	/* LDRT			1111 1000 0101 xxxx xxxx 1110 xxxx xxxx */
-	DECODE_REJECT	(0xfe800f00, 0xf8000e00),
-
-	/* STR{,B,H} Rn,[PC...]	1111 1000 xxx0 1111 xxxx xxxx xxxx xxxx */
-	DECODE_REJECT	(0xff1f0000, 0xf80f0000),
-
-	/* STR{,B,H} PC,[Rn...]	1111 1000 xxx0 xxxx 1111 xxxx xxxx xxxx */
-	DECODE_REJECT	(0xff10f000, 0xf800f000),
-
-	/* LDR (literal)	1111 1000 x101 1111 xxxx xxxx xxxx xxxx */
-	DECODE_SIMULATEX(0xff7f0000, 0xf85f0000, t32_simulate_ldr_literal,
-						 REGS(PC, ANY, 0, 0, 0)),
-
-	/* STR (immediate)	1111 1000 0100 xxxx xxxx 1xxx xxxx xxxx */
-	/* LDR (immediate)	1111 1000 0101 xxxx xxxx 1xxx xxxx xxxx */
-	DECODE_OR	(0xffe00800, 0xf8400800),
-	/* STR (immediate)	1111 1000 1100 xxxx xxxx xxxx xxxx xxxx */
-	/* LDR (immediate)	1111 1000 1101 xxxx xxxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0xffe00000, 0xf8c00000, t32_emulate_ldrstr,
-						 REGS(NOPCX, ANY, 0, 0, 0)),
-
-	/* STR (register)	1111 1000 0100 xxxx xxxx 0000 00xx xxxx */
-	/* LDR (register)	1111 1000 0101 xxxx xxxx 0000 00xx xxxx */
-	DECODE_EMULATEX	(0xffe00fc0, 0xf8400000, t32_emulate_ldrstr,
-						 REGS(NOPCX, ANY, 0, 0, NOSPPC)),
-
-	/* LDRB (literal)	1111 1000 x001 1111 xxxx xxxx xxxx xxxx */
-	/* LDRSB (literal)	1111 1001 x001 1111 xxxx xxxx xxxx xxxx */
-	/* LDRH (literal)	1111 1000 x011 1111 xxxx xxxx xxxx xxxx */
-	/* LDRSH (literal)	1111 1001 x011 1111 xxxx xxxx xxxx xxxx */
-	DECODE_SIMULATEX(0xfe5f0000, 0xf81f0000, t32_simulate_ldr_literal,
-						 REGS(PC, NOSPPCX, 0, 0, 0)),
-
-	/* STRB (immediate)	1111 1000 0000 xxxx xxxx 1xxx xxxx xxxx */
-	/* STRH (immediate)	1111 1000 0010 xxxx xxxx 1xxx xxxx xxxx */
-	/* LDRB (immediate)	1111 1000 0001 xxxx xxxx 1xxx xxxx xxxx */
-	/* LDRSB (immediate)	1111 1001 0001 xxxx xxxx 1xxx xxxx xxxx */
-	/* LDRH (immediate)	1111 1000 0011 xxxx xxxx 1xxx xxxx xxxx */
-	/* LDRSH (immediate)	1111 1001 0011 xxxx xxxx 1xxx xxxx xxxx */
-	DECODE_OR	(0xfec00800, 0xf8000800),
-	/* STRB (immediate)	1111 1000 1000 xxxx xxxx xxxx xxxx xxxx */
-	/* STRH (immediate)	1111 1000 1010 xxxx xxxx xxxx xxxx xxxx */
-	/* LDRB (immediate)	1111 1000 1001 xxxx xxxx xxxx xxxx xxxx */
-	/* LDRSB (immediate)	1111 1001 1001 xxxx xxxx xxxx xxxx xxxx */
-	/* LDRH (immediate)	1111 1000 1011 xxxx xxxx xxxx xxxx xxxx */
-	/* LDRSH (immediate)	1111 1001 1011 xxxx xxxx xxxx xxxx xxxx */
-	DECODE_EMULATEX	(0xfec00000, 0xf8800000, t32_emulate_ldrstr,
-						 REGS(NOPCX, NOSPPCX, 0, 0, 0)),
-
-	/* STRB (register)	1111 1000 0000 xxxx xxxx 0000 00xx xxxx */
-	/* STRH (register)	1111 1000 0010 xxxx xxxx 0000 00xx xxxx */
-	/* LDRB (register)	1111 1000 0001 xxxx xxxx 0000 00xx xxxx */
-	/* LDRSB (register)	1111 1001 0001 xxxx xxxx 0000 00xx xxxx */
-	/* LDRH (register)	1111 1000 0011 xxxx xxxx 0000 00xx xxxx */
-	/* LDRSH (register)	1111 1001 0011 xxxx xxxx 0000 00xx xxxx */
-	DECODE_EMULATEX	(0xfe800fc0, 0xf8000000, t32_emulate_ldrstr,
-						 REGS(NOPCX, NOSPPCX, 0, 0, NOSPPC)),
-
-	/* Other unallocated instructions...				*/
-	DECODE_END
-};
-
-static const union decode_item t32_table_1111_1010___1111[] = {
-	/* Data-processing (register)					*/
-
-	/* ???			1111 1010 011x xxxx 1111 xxxx 1xxx xxxx */
-	DECODE_REJECT	(0xffe0f080, 0xfa60f080),
-
-	/* SXTH			1111 1010 0000 1111 1111 xxxx 1xxx xxxx */
-	/* UXTH			1111 1010 0001 1111 1111 xxxx 1xxx xxxx */
-	/* SXTB16		1111 1010 0010 1111 1111 xxxx 1xxx xxxx */
-	/* UXTB16		1111 1010 0011 1111 1111 xxxx 1xxx xxxx */
-	/* SXTB			1111 1010 0100 1111 1111 xxxx 1xxx xxxx */
-	/* UXTB			1111 1010 0101 1111 1111 xxxx 1xxx xxxx */
-	DECODE_EMULATEX	(0xff8ff080, 0xfa0ff080, t32_emulate_rd8rn16rm0_rwflags,
-						 REGS(0, 0, NOSPPC, 0, NOSPPC)),
-
-
-	/* ???			1111 1010 1xxx xxxx 1111 xxxx 0x11 xxxx */
-	DECODE_REJECT	(0xff80f0b0, 0xfa80f030),
-	/* ???			1111 1010 1x11 xxxx 1111 xxxx 0xxx xxxx */
-	DECODE_REJECT	(0xffb0f080, 0xfab0f000),
-
-	/* SADD16		1111 1010 1001 xxxx 1111 xxxx 0000 xxxx */
-	/* SASX			1111 1010 1010 xxxx 1111 xxxx 0000 xxxx */
-	/* SSAX			1111 1010 1110 xxxx 1111 xxxx 0000 xxxx */
-	/* SSUB16		1111 1010 1101 xxxx 1111 xxxx 0000 xxxx */
-	/* SADD8		1111 1010 1000 xxxx 1111 xxxx 0000 xxxx */
-	/* SSUB8		1111 1010 1100 xxxx 1111 xxxx 0000 xxxx */
-
-	/* QADD16		1111 1010 1001 xxxx 1111 xxxx 0001 xxxx */
-	/* QASX			1111 1010 1010 xxxx 1111 xxxx 0001 xxxx */
-	/* QSAX			1111 1010 1110 xxxx 1111 xxxx 0001 xxxx */
-	/* QSUB16		1111 1010 1101 xxxx 1111 xxxx 0001 xxxx */
-	/* QADD8		1111 1010 1000 xxxx 1111 xxxx 0001 xxxx */
-	/* QSUB8		1111 1010 1100 xxxx 1111 xxxx 0001 xxxx */
-
-	/* SHADD16		1111 1010 1001 xxxx 1111 xxxx 0010 xxxx */
-	/* SHASX		1111 1010 1010 xxxx 1111 xxxx 0010 xxxx */
-	/* SHSAX		1111 1010 1110 xxxx 1111 xxxx 0010 xxxx */
-	/* SHSUB16		1111 1010 1101 xxxx 1111 xxxx 0010 xxxx */
-	/* SHADD8		1111 1010 1000 xxxx 1111 xxxx 0010 xxxx */
-	/* SHSUB8		1111 1010 1100 xxxx 1111 xxxx 0010 xxxx */
-
-	/* UADD16		1111 1010 1001 xxxx 1111 xxxx 0100 xxxx */
-	/* UASX			1111 1010 1010 xxxx 1111 xxxx 0100 xxxx */
-	/* USAX			1111 1010 1110 xxxx 1111 xxxx 0100 xxxx */
-	/* USUB16		1111 1010 1101 xxxx 1111 xxxx 0100 xxxx */
-	/* UADD8		1111 1010 1000 xxxx 1111 xxxx 0100 xxxx */
-	/* USUB8		1111 1010 1100 xxxx 1111 xxxx 0100 xxxx */
-
-	/* UQADD16		1111 1010 1001 xxxx 1111 xxxx 0101 xxxx */
-	/* UQASX		1111 1010 1010 xxxx 1111 xxxx 0101 xxxx */
-	/* UQSAX		1111 1010 1110 xxxx 1111 xxxx 0101 xxxx */
-	/* UQSUB16		1111 1010 1101 xxxx 1111 xxxx 0101 xxxx */
-	/* UQADD8		1111 1010 1000 xxxx 1111 xxxx 0101 xxxx */
-	/* UQSUB8		1111 1010 1100 xxxx 1111 xxxx 0101 xxxx */
-
-	/* UHADD16		1111 1010 1001 xxxx 1111 xxxx 0110 xxxx */
-	/* UHASX		1111 1010 1010 xxxx 1111 xxxx 0110 xxxx */
-	/* UHSAX		1111 1010 1110 xxxx 1111 xxxx 0110 xxxx */
-	/* UHSUB16		1111 1010 1101 xxxx 1111 xxxx 0110 xxxx */
-	/* UHADD8		1111 1010 1000 xxxx 1111 xxxx 0110 xxxx */
-	/* UHSUB8		1111 1010 1100 xxxx 1111 xxxx 0110 xxxx */
-	DECODE_OR	(0xff80f080, 0xfa80f000),
-
-	/* SXTAH		1111 1010 0000 xxxx 1111 xxxx 1xxx xxxx */
-	/* UXTAH		1111 1010 0001 xxxx 1111 xxxx 1xxx xxxx */
-	/* SXTAB16		1111 1010 0010 xxxx 1111 xxxx 1xxx xxxx */
-	/* UXTAB16		1111 1010 0011 xxxx 1111 xxxx 1xxx xxxx */
-	/* SXTAB		1111 1010 0100 xxxx 1111 xxxx 1xxx xxxx */
-	/* UXTAB		1111 1010 0101 xxxx 1111 xxxx 1xxx xxxx */
-	DECODE_OR	(0xff80f080, 0xfa00f080),
-
-	/* QADD			1111 1010 1000 xxxx 1111 xxxx 1000 xxxx */
-	/* QDADD		1111 1010 1000 xxxx 1111 xxxx 1001 xxxx */
-	/* QSUB			1111 1010 1000 xxxx 1111 xxxx 1010 xxxx */
-	/* QDSUB		1111 1010 1000 xxxx 1111 xxxx 1011 xxxx */
-	DECODE_OR	(0xfff0f0c0, 0xfa80f080),
-
-	/* SEL			1111 1010 1010 xxxx 1111 xxxx 1000 xxxx */
-	DECODE_OR	(0xfff0f0f0, 0xfaa0f080),
-
-	/* LSL			1111 1010 000x xxxx 1111 xxxx 0000 xxxx */
-	/* LSR			1111 1010 001x xxxx 1111 xxxx 0000 xxxx */
-	/* ASR			1111 1010 010x xxxx 1111 xxxx 0000 xxxx */
-	/* ROR			1111 1010 011x xxxx 1111 xxxx 0000 xxxx */
-	DECODE_EMULATEX	(0xff80f0f0, 0xfa00f000, t32_emulate_rd8rn16rm0_rwflags,
-						 REGS(NOSPPC, 0, NOSPPC, 0, NOSPPC)),
-
-	/* CLZ			1111 1010 1010 xxxx 1111 xxxx 1000 xxxx */
-	DECODE_OR	(0xfff0f0f0, 0xfab0f080),
-
-	/* REV			1111 1010 1001 xxxx 1111 xxxx 1000 xxxx */
-	/* REV16		1111 1010 1001 xxxx 1111 xxxx 1001 xxxx */
-	/* RBIT			1111 1010 1001 xxxx 1111 xxxx 1010 xxxx */
-	/* REVSH		1111 1010 1001 xxxx 1111 xxxx 1011 xxxx */
-	DECODE_EMULATEX	(0xfff0f0c0, 0xfa90f080, t32_emulate_rd8rn16_noflags,
-						 REGS(NOSPPC, 0, NOSPPC, 0, SAMEAS16)),
-
-	/* Other unallocated instructions...				*/
-	DECODE_END
-};
-
-static const union decode_item t32_table_1111_1011_0[] = {
-	/* Multiply, multiply accumulate, and absolute difference	*/
-
-	/* ???			1111 1011 0000 xxxx 1111 xxxx 0001 xxxx */
-	DECODE_REJECT	(0xfff0f0f0, 0xfb00f010),
-	/* ???			1111 1011 0111 xxxx 1111 xxxx 0001 xxxx */
-	DECODE_REJECT	(0xfff0f0f0, 0xfb70f010),
-
-	/* SMULxy		1111 1011 0001 xxxx 1111 xxxx 00xx xxxx */
-	DECODE_OR	(0xfff0f0c0, 0xfb10f000),
-	/* MUL			1111 1011 0000 xxxx 1111 xxxx 0000 xxxx */
-	/* SMUAD{X}		1111 1011 0010 xxxx 1111 xxxx 000x xxxx */
-	/* SMULWy		1111 1011 0011 xxxx 1111 xxxx 000x xxxx */
-	/* SMUSD{X}		1111 1011 0100 xxxx 1111 xxxx 000x xxxx */
-	/* SMMUL{R}		1111 1011 0101 xxxx 1111 xxxx 000x xxxx */
-	/* USAD8		1111 1011 0111 xxxx 1111 xxxx 0000 xxxx */
-	DECODE_EMULATEX	(0xff80f0e0, 0xfb00f000, t32_emulate_rd8rn16rm0_rwflags,
-						 REGS(NOSPPC, 0, NOSPPC, 0, NOSPPC)),
-
-	/* ???			1111 1011 0111 xxxx xxxx xxxx 0001 xxxx */
-	DECODE_REJECT	(0xfff000f0, 0xfb700010),
-
-	/* SMLAxy		1111 1011 0001 xxxx xxxx xxxx 00xx xxxx */
-	DECODE_OR	(0xfff000c0, 0xfb100000),
-	/* MLA			1111 1011 0000 xxxx xxxx xxxx 0000 xxxx */
-	/* MLS			1111 1011 0000 xxxx xxxx xxxx 0001 xxxx */
-	/* SMLAD{X}		1111 1011 0010 xxxx xxxx xxxx 000x xxxx */
-	/* SMLAWy		1111 1011 0011 xxxx xxxx xxxx 000x xxxx */
-	/* SMLSD{X}		1111 1011 0100 xxxx xxxx xxxx 000x xxxx */
-	/* SMMLA{R}		1111 1011 0101 xxxx xxxx xxxx 000x xxxx */
-	/* SMMLS{R}		1111 1011 0110 xxxx xxxx xxxx 000x xxxx */
-	/* USADA8		1111 1011 0111 xxxx xxxx xxxx 0000 xxxx */
-	DECODE_EMULATEX	(0xff8000c0, 0xfb000000, t32_emulate_rd8rn16rm0ra12_noflags,
-						 REGS(NOSPPC, NOSPPCX, NOSPPC, 0, NOSPPC)),
-
-	/* Other unallocated instructions...				*/
-	DECODE_END
-};
-
-static const union decode_item t32_table_1111_1011_1[] = {
-	/* Long multiply, long multiply accumulate, and divide		*/
-
-	/* UMAAL		1111 1011 1110 xxxx xxxx xxxx 0110 xxxx */
-	DECODE_OR	(0xfff000f0, 0xfbe00060),
-	/* SMLALxy		1111 1011 1100 xxxx xxxx xxxx 10xx xxxx */
-	DECODE_OR	(0xfff000c0, 0xfbc00080),
-	/* SMLALD{X}		1111 1011 1100 xxxx xxxx xxxx 110x xxxx */
-	/* SMLSLD{X}		1111 1011 1101 xxxx xxxx xxxx 110x xxxx */
-	DECODE_OR	(0xffe000e0, 0xfbc000c0),
-	/* SMULL		1111 1011 1000 xxxx xxxx xxxx 0000 xxxx */
-	/* UMULL		1111 1011 1010 xxxx xxxx xxxx 0000 xxxx */
-	/* SMLAL		1111 1011 1100 xxxx xxxx xxxx 0000 xxxx */
-	/* UMLAL		1111 1011 1110 xxxx xxxx xxxx 0000 xxxx */
-	DECODE_EMULATEX	(0xff9000f0, 0xfb800000, t32_emulate_rdlo12rdhi8rn16rm0_noflags,
-						 REGS(NOSPPC, NOSPPC, NOSPPC, 0, NOSPPC)),
-
-	/* SDIV			1111 1011 1001 xxxx xxxx xxxx 1111 xxxx */
-	/* UDIV			1111 1011 1011 xxxx xxxx xxxx 1111 xxxx */
-	/* Other unallocated instructions...				*/
-	DECODE_END
-};
-
-const union decode_item kprobe_decode_thumb32_table[] = {
-
-	/*
-	 * Load/store multiple instructions
-	 *			1110 100x x0xx xxxx xxxx xxxx xxxx xxxx
-	 */
-	DECODE_TABLE	(0xfe400000, 0xe8000000, t32_table_1110_100x_x0xx),
-
-	/*
-	 * Load/store dual, load/store exclusive, table branch
-	 *			1110 100x x1xx xxxx xxxx xxxx xxxx xxxx
-	 */
-	DECODE_TABLE	(0xfe400000, 0xe8400000, t32_table_1110_100x_x1xx),
-
-	/*
-	 * Data-processing (shifted register)
-	 *			1110 101x xxxx xxxx xxxx xxxx xxxx xxxx
-	 */
-	DECODE_TABLE	(0xfe000000, 0xea000000, t32_table_1110_101x),
-
-	/*
-	 * Coprocessor instructions
-	 *			1110 11xx xxxx xxxx xxxx xxxx xxxx xxxx
-	 */
-	DECODE_REJECT	(0xfc000000, 0xec000000),
-
-	/*
-	 * Data-processing (modified immediate)
-	 *			1111 0x0x xxxx xxxx 0xxx xxxx xxxx xxxx
-	 */
-	DECODE_TABLE	(0xfa008000, 0xf0000000, t32_table_1111_0x0x___0),
-
-	/*
-	 * Data-processing (plain binary immediate)
-	 *			1111 0x1x xxxx xxxx 0xxx xxxx xxxx xxxx
-	 */
-	DECODE_TABLE	(0xfa008000, 0xf2000000, t32_table_1111_0x1x___0),
-
-	/*
-	 * Branches and miscellaneous control
-	 *			1111 0xxx xxxx xxxx 1xxx xxxx xxxx xxxx
-	 */
-	DECODE_TABLE	(0xf8008000, 0xf0008000, t32_table_1111_0xxx___1),
-
-	/*
-	 * Advanced SIMD element or structure load/store instructions
-	 *			1111 1001 xxx0 xxxx xxxx xxxx xxxx xxxx
-	 */
-	DECODE_REJECT	(0xff100000, 0xf9000000),
-
-	/*
-	 * Memory hints
-	 *			1111 100x x0x1 xxxx 1111 xxxx xxxx xxxx
-	 */
-	DECODE_TABLE	(0xfe50f000, 0xf810f000, t32_table_1111_100x_x0x1__1111),
-
-	/*
-	 * Store single data item
-	 *			1111 1000 xxx0 xxxx xxxx xxxx xxxx xxxx
-	 * Load single data items
-	 *			1111 100x xxx1 xxxx xxxx xxxx xxxx xxxx
-	 */
-	DECODE_TABLE	(0xfe000000, 0xf8000000, t32_table_1111_100x),
-
-	/*
-	 * Data-processing (register)
-	 *			1111 1010 xxxx xxxx 1111 xxxx xxxx xxxx
-	 */
-	DECODE_TABLE	(0xff00f000, 0xfa00f000, t32_table_1111_1010___1111),
-
-	/*
-	 * Multiply, multiply accumulate, and absolute difference
-	 *			1111 1011 0xxx xxxx xxxx xxxx xxxx xxxx
-	 */
-	DECODE_TABLE	(0xff800000, 0xfb000000, t32_table_1111_1011_0),
-
-	/*
-	 * Long multiply, long multiply accumulate, and divide
-	 *			1111 1011 1xxx xxxx xxxx xxxx xxxx xxxx
-	 */
-	DECODE_TABLE	(0xff800000, 0xfb800000, t32_table_1111_1011_1),
-
-	/*
-	 * Coprocessor instructions
-	 *			1111 11xx xxxx xxxx xxxx xxxx xxxx xxxx
-	 */
-	DECODE_END
-};
-#ifdef CONFIG_ARM_KPROBES_TEST_MODULE
-EXPORT_SYMBOL_GPL(kprobe_decode_thumb32_table);
-#endif
+/* t16 thumb actions */
 
 static void __kprobes
-t16_simulate_bxblx(struct kprobe *p, struct pt_regs *regs)
+t16_simulate_bxblx(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
-	unsigned long pc = thumb_probe_pc(p);
+	unsigned long pc = regs->ARM_pc + 2;
 	int rm = (insn >> 3) & 0xf;
 	unsigned long rmv = (rm == 15) ? pc : regs->uregs[rm];
 
 	if (insn & (1 << 7)) /* BLX ? */
-		regs->ARM_lr = (unsigned long)p->addr + 2;
+		regs->ARM_lr = regs->ARM_pc | 1;
 
 	bx_write_pc(rmv, regs);
 }
 
 static void __kprobes
-t16_simulate_ldr_literal(struct kprobe *p, struct pt_regs *regs)
+t16_simulate_ldr_literal(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
-	unsigned long* base = (unsigned long *)(thumb_probe_pc(p) & ~3);
+	unsigned long *base = (unsigned long *)((regs->ARM_pc + 2) & ~3);
 	long index = insn & 0xff;
 	int rt = (insn >> 8) & 0x7;
 	regs->uregs[rt] = base[index];
 }
 
 static void __kprobes
-t16_simulate_ldrstr_sp_relative(struct kprobe *p, struct pt_regs *regs)
+t16_simulate_ldrstr_sp_relative(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
 	unsigned long* base = (unsigned long *)regs->ARM_sp;
 	long index = insn & 0xff;
 	int rt = (insn >> 8) & 0x7;
@@ -986,20 +342,20 @@ t16_simulate_ldrstr_sp_relative(struct kprobe *p, struct pt_regs *regs)
 }
 
 static void __kprobes
-t16_simulate_reladr(struct kprobe *p, struct pt_regs *regs)
+t16_simulate_reladr(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
 	unsigned long base = (insn & 0x800) ? regs->ARM_sp
-					    : (thumb_probe_pc(p) & ~3);
+					    : ((regs->ARM_pc + 2) & ~3);
 	long offset = insn & 0xff;
 	int rt = (insn >> 8) & 0x7;
 	regs->uregs[rt] = base + offset * 4;
 }
 
 static void __kprobes
-t16_simulate_add_sp_imm(struct kprobe *p, struct pt_regs *regs)
+t16_simulate_add_sp_imm(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
 	long imm = insn & 0x7f;
 	if (insn & 0x80) /* SUB */
 		regs->ARM_sp -= imm * 4;
@@ -1008,21 +364,22 @@ t16_simulate_add_sp_imm(struct kprobe *p, struct pt_regs *regs)
 }
 
 static void __kprobes
-t16_simulate_cbz(struct kprobe *p, struct pt_regs *regs)
+t16_simulate_cbz(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
 	int rn = insn & 0x7;
-	kprobe_opcode_t nonzero = regs->uregs[rn] ? insn : ~insn;
+	probes_opcode_t nonzero = regs->uregs[rn] ? insn : ~insn;
 	if (nonzero & 0x800) {
 		long i = insn & 0x200;
 		long imm5 = insn & 0xf8;
-		unsigned long pc = thumb_probe_pc(p);
+		unsigned long pc = regs->ARM_pc + 2;
 		regs->ARM_pc = pc + (i >> 3) + (imm5 >> 2);
 	}
 }
 
 static void __kprobes
-t16_simulate_it(struct kprobe *p, struct pt_regs *regs)
+t16_simulate_it(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
 {
 	/*
 	 * The 8 IT state bits are split into two parts in CPSR:
@@ -1030,7 +387,6 @@ t16_simulate_it(struct kprobe *p, struct pt_regs *regs)
 	 *	ITSTATE<7:2> are in CPSR<15:10>
 	 * The new IT state is in the lower byte of insn.
 	 */
-	kprobe_opcode_t insn = p->opcode;
 	unsigned long cpsr = regs->ARM_cpsr;
 	cpsr &= ~PSR_IT_MASK;
 	cpsr |= (insn & 0xfc) << 8;
@@ -1039,50 +395,54 @@ t16_simulate_it(struct kprobe *p, struct pt_regs *regs)
 }
 
 static void __kprobes
-t16_singlestep_it(struct kprobe *p, struct pt_regs *regs)
+t16_singlestep_it(probes_opcode_t insn,
+		  struct arch_probes_insn *asi, struct pt_regs *regs)
 {
 	regs->ARM_pc += 2;
-	t16_simulate_it(p, regs);
+	t16_simulate_it(insn, asi, regs);
 }
 
-static enum kprobe_insn __kprobes
-t16_decode_it(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+static enum probes_insn __kprobes
+t16_decode_it(probes_opcode_t insn, struct arch_probes_insn *asi,
+		const struct decode_header *d)
 {
 	asi->insn_singlestep = t16_singlestep_it;
 	return INSN_GOOD_NO_SLOT;
 }
 
 static void __kprobes
-t16_simulate_cond_branch(struct kprobe *p, struct pt_regs *regs)
+t16_simulate_cond_branch(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
-	unsigned long pc = thumb_probe_pc(p);
+	unsigned long pc = regs->ARM_pc + 2;
 	long offset = insn & 0x7f;
 	offset -= insn & 0x80; /* Apply sign bit */
 	regs->ARM_pc = pc + (offset * 2);
 }
 
-static enum kprobe_insn __kprobes
-t16_decode_cond_branch(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+static enum probes_insn __kprobes
+t16_decode_cond_branch(probes_opcode_t insn, struct arch_probes_insn *asi,
+		const struct decode_header *d)
 {
 	int cc = (insn >> 8) & 0xf;
-	asi->insn_check_cc = kprobe_condition_checks[cc];
+	asi->insn_check_cc = probes_condition_checks[cc];
 	asi->insn_handler = t16_simulate_cond_branch;
 	return INSN_GOOD_NO_SLOT;
 }
 
 static void __kprobes
-t16_simulate_branch(struct kprobe *p, struct pt_regs *regs)
+t16_simulate_branch(probes_opcode_t insn,
+		   struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
-	unsigned long pc = thumb_probe_pc(p);
+	unsigned long pc = regs->ARM_pc + 2;
 	long offset = insn & 0x3ff;
 	offset -= insn & 0x400; /* Apply sign bit */
 	regs->ARM_pc = pc + (offset * 2);
 }
 
 static unsigned long __kprobes
-t16_emulate_loregs(struct kprobe *p, struct pt_regs *regs)
+t16_emulate_loregs(probes_opcode_t insn,
+		   struct arch_probes_insn *asi, struct pt_regs *regs)
 {
 	unsigned long oldcpsr = regs->ARM_cpsr;
 	unsigned long newcpsr;
@@ -1095,7 +455,7 @@ t16_emulate_loregs(struct kprobe *p, struct pt_regs *regs)
 		"mrs	%[newcpsr], cpsr	\n\t"
 		: [newcpsr] "=r" (newcpsr)
 		: [oldcpsr] "r" (oldcpsr), [regs] "r" (regs),
-		  [fn] "r" (p->ainsn.insn_fn)
+		  [fn] "r" (asi->insn_fn)
 		: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
 		  "lr", "memory", "cc"
 		);
@@ -1104,24 +464,26 @@ t16_emulate_loregs(struct kprobe *p, struct pt_regs *regs)
 }
 
 static void __kprobes
-t16_emulate_loregs_rwflags(struct kprobe *p, struct pt_regs *regs)
+t16_emulate_loregs_rwflags(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	regs->ARM_cpsr = t16_emulate_loregs(p, regs);
+	regs->ARM_cpsr = t16_emulate_loregs(insn, asi, regs);
 }
 
 static void __kprobes
-t16_emulate_loregs_noitrwflags(struct kprobe *p, struct pt_regs *regs)
+t16_emulate_loregs_noitrwflags(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	unsigned long cpsr = t16_emulate_loregs(p, regs);
+	unsigned long cpsr = t16_emulate_loregs(insn, asi, regs);
 	if (!in_it_block(cpsr))
 		regs->ARM_cpsr = cpsr;
 }
 
 static void __kprobes
-t16_emulate_hiregs(struct kprobe *p, struct pt_regs *regs)
+t16_emulate_hiregs(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = p->opcode;
-	unsigned long pc = thumb_probe_pc(p);
+	unsigned long pc = regs->ARM_pc + 2;
 	int rdn = (insn & 0x7) | ((insn & 0x80) >> 4);
 	int rm = (insn >> 3) & 0xf;
 
@@ -1137,7 +499,7 @@ t16_emulate_hiregs(struct kprobe *p, struct pt_regs *regs)
 		"blx    %[fn]			\n\t"
 		"mrs	%[cpsr], cpsr		\n\t"
 		: "=r" (rdnv), [cpsr] "=r" (cpsr)
-		: "0" (rdnv), "r" (rmv), "1" (cpsr), [fn] "r" (p->ainsn.insn_fn)
+		: "0" (rdnv), "r" (rmv), "1" (cpsr), [fn] "r" (asi->insn_fn)
 		: "lr", "memory", "cc"
 	);
 
@@ -1148,8 +510,9 @@ t16_emulate_hiregs(struct kprobe *p, struct pt_regs *regs)
 	regs->ARM_cpsr = (regs->ARM_cpsr & ~APSR_MASK) | (cpsr & APSR_MASK);
 }
 
-static enum kprobe_insn __kprobes
-t16_decode_hiregs(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+static enum probes_insn __kprobes
+t16_decode_hiregs(probes_opcode_t insn, struct arch_probes_insn *asi,
+		const struct decode_header *d)
 {
 	insn &= ~0x00ff;
 	insn |= 0x001; /* Set Rdn = R1 and Rm = R0 */
@@ -1159,7 +522,8 @@ t16_decode_hiregs(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 }
 
 static void __kprobes
-t16_emulate_push(struct kprobe *p, struct pt_regs *regs)
+t16_emulate_push(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
 {
 	__asm__ __volatile__ (
 		"ldr	r9, [%[regs], #13*4]	\n\t"
@@ -1168,14 +532,15 @@ t16_emulate_push(struct kprobe *p, struct pt_regs *regs)
 		"blx	%[fn]			\n\t"
 		"str	r9, [%[regs], #13*4]	\n\t"
 		:
-		: [regs] "r" (regs), [fn] "r" (p->ainsn.insn_fn)
+		: [regs] "r" (regs), [fn] "r" (asi->insn_fn)
 		: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9",
 		  "lr", "memory", "cc"
 		);
 }
 
-static enum kprobe_insn __kprobes
-t16_decode_push(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+static enum probes_insn __kprobes
+t16_decode_push(probes_opcode_t insn, struct arch_probes_insn *asi,
+		const struct decode_header *d)
 {
 	/*
 	 * To simulate a PUSH we use a Thumb-2 "STMDB R9!, {registers}"
@@ -1189,7 +554,8 @@ t16_decode_push(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 }
 
 static void __kprobes
-t16_emulate_pop_nopc(struct kprobe *p, struct pt_regs *regs)
+t16_emulate_pop_nopc(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
 {
 	__asm__ __volatile__ (
 		"ldr	r9, [%[regs], #13*4]	\n\t"
@@ -1198,14 +564,15 @@ t16_emulate_pop_nopc(struct kprobe *p, struct pt_regs *regs)
 		"stmia	%[regs], {r0-r7}	\n\t"
 		"str	r9, [%[regs], #13*4]	\n\t"
 		:
-		: [regs] "r" (regs), [fn] "r" (p->ainsn.insn_fn)
+		: [regs] "r" (regs), [fn] "r" (asi->insn_fn)
 		: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r9",
 		  "lr", "memory", "cc"
 		);
 }
 
 static void __kprobes
-t16_emulate_pop_pc(struct kprobe *p, struct pt_regs *regs)
+t16_emulate_pop_pc(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
 {
 	register unsigned long pc asm("r8");
 
@@ -1216,7 +583,7 @@ t16_emulate_pop_pc(struct kprobe *p, struct pt_regs *regs)
 		"stmia	%[regs], {r0-r7}	\n\t"
 		"str	r9, [%[regs], #13*4]	\n\t"
 		: "=r" (pc)
-		: [regs] "r" (regs), [fn] "r" (p->ainsn.insn_fn)
+		: [regs] "r" (regs), [fn] "r" (asi->insn_fn)
 		: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r9",
 		  "lr", "memory", "cc"
 		);
@@ -1224,8 +591,9 @@ t16_emulate_pop_pc(struct kprobe *p, struct pt_regs *regs)
 	bx_write_pc(pc, regs);
 }
 
-static enum kprobe_insn __kprobes
-t16_decode_pop(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+static enum probes_insn __kprobes
+t16_decode_pop(probes_opcode_t insn, struct arch_probes_insn *asi,
+		const struct decode_header *d)
 {
 	/*
 	 * To simulate a POP we use a Thumb-2 "LDMDB R9!, {registers}"
@@ -1239,231 +607,56 @@ t16_decode_pop(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	return INSN_GOOD;
 }
 
-static const union decode_item t16_table_1011[] = {
-	/* Miscellaneous 16-bit instructions		    */
-
-	/* ADD (SP plus immediate)	1011 0000 0xxx xxxx */
-	/* SUB (SP minus immediate)	1011 0000 1xxx xxxx */
-	DECODE_SIMULATE	(0xff00, 0xb000, t16_simulate_add_sp_imm),
-
-	/* CBZ				1011 00x1 xxxx xxxx */
-	/* CBNZ				1011 10x1 xxxx xxxx */
-	DECODE_SIMULATE	(0xf500, 0xb100, t16_simulate_cbz),
-
-	/* SXTH				1011 0010 00xx xxxx */
-	/* SXTB				1011 0010 01xx xxxx */
-	/* UXTH				1011 0010 10xx xxxx */
-	/* UXTB				1011 0010 11xx xxxx */
-	/* REV				1011 1010 00xx xxxx */
-	/* REV16			1011 1010 01xx xxxx */
-	/* ???				1011 1010 10xx xxxx */
-	/* REVSH			1011 1010 11xx xxxx */
-	DECODE_REJECT	(0xffc0, 0xba80),
-	DECODE_EMULATE	(0xf500, 0xb000, t16_emulate_loregs_rwflags),
-
-	/* PUSH				1011 010x xxxx xxxx */
-	DECODE_CUSTOM	(0xfe00, 0xb400, t16_decode_push),
-	/* POP				1011 110x xxxx xxxx */
-	DECODE_CUSTOM	(0xfe00, 0xbc00, t16_decode_pop),
-
-	/*
-	 * If-Then, and hints
-	 *				1011 1111 xxxx xxxx
-	 */
-
-	/* YIELD			1011 1111 0001 0000 */
-	DECODE_OR	(0xffff, 0xbf10),
-	/* SEV				1011 1111 0100 0000 */
-	DECODE_EMULATE	(0xffff, 0xbf40, kprobe_emulate_none),
-	/* NOP				1011 1111 0000 0000 */
-	/* WFE				1011 1111 0010 0000 */
-	/* WFI				1011 1111 0011 0000 */
-	DECODE_SIMULATE	(0xffcf, 0xbf00, kprobe_simulate_nop),
-	/* Unassigned hints		1011 1111 xxxx 0000 */
-	DECODE_REJECT	(0xff0f, 0xbf00),
-	/* IT				1011 1111 xxxx xxxx */
-	DECODE_CUSTOM	(0xff00, 0xbf00, t16_decode_it),
-
-	/* SETEND			1011 0110 010x xxxx */
-	/* CPS				1011 0110 011x xxxx */
-	/* BKPT				1011 1110 xxxx xxxx */
-	/* And unallocated instructions...		    */
-	DECODE_END
+const union decode_action kprobes_t16_actions[NUM_PROBES_T16_ACTIONS] = {
+	[PROBES_T16_ADD_SP] = {.handler = t16_simulate_add_sp_imm},
+	[PROBES_T16_CBZ] = {.handler = t16_simulate_cbz},
+	[PROBES_T16_SIGN_EXTEND] = {.handler = t16_emulate_loregs_rwflags},
+	[PROBES_T16_PUSH] = {.decoder = t16_decode_push},
+	[PROBES_T16_POP] = {.decoder = t16_decode_pop},
+	[PROBES_T16_SEV] = {.handler = probes_emulate_none},
+	[PROBES_T16_WFE] = {.handler = probes_simulate_nop},
+	[PROBES_T16_IT] = {.decoder = t16_decode_it},
+	[PROBES_T16_CMP] = {.handler = t16_emulate_loregs_rwflags},
+	[PROBES_T16_ADDSUB] = {.handler = t16_emulate_loregs_noitrwflags},
+	[PROBES_T16_LOGICAL] = {.handler = t16_emulate_loregs_noitrwflags},
+	[PROBES_T16_LDR_LIT] = {.handler = t16_simulate_ldr_literal},
+	[PROBES_T16_BLX] = {.handler = t16_simulate_bxblx},
+	[PROBES_T16_HIREGOPS] = {.decoder = t16_decode_hiregs},
+	[PROBES_T16_LDRHSTRH] = {.handler = t16_emulate_loregs_rwflags},
+	[PROBES_T16_LDRSTR] = {.handler = t16_simulate_ldrstr_sp_relative},
+	[PROBES_T16_ADR] = {.handler = t16_simulate_reladr},
+	[PROBES_T16_LDMSTM] = {.handler = t16_emulate_loregs_rwflags},
+	[PROBES_T16_BRANCH_COND] = {.decoder = t16_decode_cond_branch},
+	[PROBES_T16_BRANCH] = {.handler = t16_simulate_branch},
 };
 
-const union decode_item kprobe_decode_thumb16_table[] = {
-
-	/*
-	 * Shift (immediate), add, subtract, move, and compare
-	 *				00xx xxxx xxxx xxxx
-	 */
-
-	/* CMP (immediate)		0010 1xxx xxxx xxxx */
-	DECODE_EMULATE	(0xf800, 0x2800, t16_emulate_loregs_rwflags),
-
-	/* ADD (register)		0001 100x xxxx xxxx */
-	/* SUB (register)		0001 101x xxxx xxxx */
-	/* LSL (immediate)		0000 0xxx xxxx xxxx */
-	/* LSR (immediate)		0000 1xxx xxxx xxxx */
-	/* ASR (immediate)		0001 0xxx xxxx xxxx */
-	/* ADD (immediate, Thumb)	0001 110x xxxx xxxx */
-	/* SUB (immediate, Thumb)	0001 111x xxxx xxxx */
-	/* MOV (immediate)		0010 0xxx xxxx xxxx */
-	/* ADD (immediate, Thumb)	0011 0xxx xxxx xxxx */
-	/* SUB (immediate, Thumb)	0011 1xxx xxxx xxxx */
-	DECODE_EMULATE	(0xc000, 0x0000, t16_emulate_loregs_noitrwflags),
-
-	/*
-	 * 16-bit Thumb data-processing instructions
-	 *				0100 00xx xxxx xxxx
-	 */
-
-	/* TST (register)		0100 0010 00xx xxxx */
-	DECODE_EMULATE	(0xffc0, 0x4200, t16_emulate_loregs_rwflags),
-	/* CMP (register)		0100 0010 10xx xxxx */
-	/* CMN (register)		0100 0010 11xx xxxx */
-	DECODE_EMULATE	(0xff80, 0x4280, t16_emulate_loregs_rwflags),
-	/* AND (register)		0100 0000 00xx xxxx */
-	/* EOR (register)		0100 0000 01xx xxxx */
-	/* LSL (register)		0100 0000 10xx xxxx */
-	/* LSR (register)		0100 0000 11xx xxxx */
-	/* ASR (register)		0100 0001 00xx xxxx */
-	/* ADC (register)		0100 0001 01xx xxxx */
-	/* SBC (register)		0100 0001 10xx xxxx */
-	/* ROR (register)		0100 0001 11xx xxxx */
-	/* RSB (immediate)		0100 0010 01xx xxxx */
-	/* ORR (register)		0100 0011 00xx xxxx */
-	/* MUL				0100 0011 00xx xxxx */
-	/* BIC (register)		0100 0011 10xx xxxx */
-	/* MVN (register)		0100 0011 10xx xxxx */
-	DECODE_EMULATE	(0xfc00, 0x4000, t16_emulate_loregs_noitrwflags),
-
-	/*
-	 * Special data instructions and branch and exchange
-	 *				0100 01xx xxxx xxxx
-	 */
-
-	/* BLX pc			0100 0111 1111 1xxx */
-	DECODE_REJECT	(0xfff8, 0x47f8),
-
-	/* BX (register)		0100 0111 0xxx xxxx */
-	/* BLX (register)		0100 0111 1xxx xxxx */
-	DECODE_SIMULATE (0xff00, 0x4700, t16_simulate_bxblx),
-
-	/* ADD pc, pc			0100 0100 1111 1111 */
-	DECODE_REJECT	(0xffff, 0x44ff),
-
-	/* ADD (register)		0100 0100 xxxx xxxx */
-	/* CMP (register)		0100 0101 xxxx xxxx */
-	/* MOV (register)		0100 0110 xxxx xxxx */
-	DECODE_CUSTOM	(0xfc00, 0x4400, t16_decode_hiregs),
-
-	/*
-	 * Load from Literal Pool
-	 * LDR (literal)		0100 1xxx xxxx xxxx
-	 */
-	DECODE_SIMULATE	(0xf800, 0x4800, t16_simulate_ldr_literal),
-
-	/*
-	 * 16-bit Thumb Load/store instructions
-	 *				0101 xxxx xxxx xxxx
-	 *				011x xxxx xxxx xxxx
-	 *				100x xxxx xxxx xxxx
-	 */
-
-	/* STR (register)		0101 000x xxxx xxxx */
-	/* STRH (register)		0101 001x xxxx xxxx */
-	/* STRB (register)		0101 010x xxxx xxxx */
-	/* LDRSB (register)		0101 011x xxxx xxxx */
-	/* LDR (register)		0101 100x xxxx xxxx */
-	/* LDRH (register)		0101 101x xxxx xxxx */
-	/* LDRB (register)		0101 110x xxxx xxxx */
-	/* LDRSH (register)		0101 111x xxxx xxxx */
-	/* STR (immediate, Thumb)	0110 0xxx xxxx xxxx */
-	/* LDR (immediate, Thumb)	0110 1xxx xxxx xxxx */
-	/* STRB (immediate, Thumb)	0111 0xxx xxxx xxxx */
-	/* LDRB (immediate, Thumb)	0111 1xxx xxxx xxxx */
-	DECODE_EMULATE	(0xc000, 0x4000, t16_emulate_loregs_rwflags),
-	/* STRH (immediate, Thumb)	1000 0xxx xxxx xxxx */
-	/* LDRH (immediate, Thumb)	1000 1xxx xxxx xxxx */
-	DECODE_EMULATE	(0xf000, 0x8000, t16_emulate_loregs_rwflags),
-	/* STR (immediate, Thumb)	1001 0xxx xxxx xxxx */
-	/* LDR (immediate, Thumb)	1001 1xxx xxxx xxxx */
-	DECODE_SIMULATE	(0xf000, 0x9000, t16_simulate_ldrstr_sp_relative),
-
-	/*
-	 * Generate PC-/SP-relative address
-	 * ADR (literal)		1010 0xxx xxxx xxxx
-	 * ADD (SP plus immediate)	1010 1xxx xxxx xxxx
-	 */
-	DECODE_SIMULATE	(0xf000, 0xa000, t16_simulate_reladr),
-
-	/*
-	 * Miscellaneous 16-bit instructions
-	 *				1011 xxxx xxxx xxxx
-	 */
-	DECODE_TABLE	(0xf000, 0xb000, t16_table_1011),
-
-	/* STM				1100 0xxx xxxx xxxx */
-	/* LDM				1100 1xxx xxxx xxxx */
-	DECODE_EMULATE	(0xf000, 0xc000, t16_emulate_loregs_rwflags),
-
-	/*
-	 * Conditional branch, and Supervisor Call
-	 */
-
-	/* Permanently UNDEFINED	1101 1110 xxxx xxxx */
-	/* SVC				1101 1111 xxxx xxxx */
-	DECODE_REJECT	(0xfe00, 0xde00),
-
-	/* Conditional branch		1101 xxxx xxxx xxxx */
-	DECODE_CUSTOM	(0xf000, 0xd000, t16_decode_cond_branch),
-
-	/*
-	 * Unconditional branch
-	 * B				1110 0xxx xxxx xxxx
-	 */
-	DECODE_SIMULATE	(0xf800, 0xe000, t16_simulate_branch),
-
-	DECODE_END
+const union decode_action kprobes_t32_actions[NUM_PROBES_T32_ACTIONS] = {
+	[PROBES_T32_LDMSTM] = {.decoder = t32_decode_ldmstm},
+	[PROBES_T32_LDRDSTRD] = {.handler = t32_emulate_ldrdstrd},
+	[PROBES_T32_TABLE_BRANCH] = {.handler = t32_simulate_table_branch},
+	[PROBES_T32_TST] = {.handler = t32_emulate_rd8rn16rm0_rwflags},
+	[PROBES_T32_MOV] = {.handler = t32_emulate_rd8rn16rm0_rwflags},
+	[PROBES_T32_ADDSUB] = {.handler = t32_emulate_rd8rn16rm0_rwflags},
+	[PROBES_T32_LOGICAL] = {.handler = t32_emulate_rd8rn16rm0_rwflags},
+	[PROBES_T32_CMP] = {.handler = t32_emulate_rd8rn16rm0_rwflags},
+	[PROBES_T32_ADDWSUBW_PC] = {.handler = t32_emulate_rd8pc16_noflags,},
+	[PROBES_T32_ADDWSUBW] = {.handler = t32_emulate_rd8rn16_noflags},
+	[PROBES_T32_MOVW] = {.handler = t32_emulate_rd8rn16_noflags},
+	[PROBES_T32_SAT] = {.handler = t32_emulate_rd8rn16rm0_rwflags},
+	[PROBES_T32_BITFIELD] = {.handler = t32_emulate_rd8rn16_noflags},
+	[PROBES_T32_SEV] = {.handler = probes_emulate_none},
+	[PROBES_T32_WFE] = {.handler = probes_simulate_nop},
+	[PROBES_T32_MRS] = {.handler = t32_simulate_mrs},
+	[PROBES_T32_BRANCH_COND] = {.decoder = t32_decode_cond_branch},
+	[PROBES_T32_BRANCH] = {.handler = t32_simulate_branch},
+	[PROBES_T32_PLDI] = {.handler = probes_simulate_nop},
+	[PROBES_T32_LDR_LIT] = {.handler = t32_simulate_ldr_literal},
+	[PROBES_T32_LDRSTR] = {.handler = t32_emulate_ldrstr},
+	[PROBES_T32_SIGN_EXTEND] = {.handler = t32_emulate_rd8rn16rm0_rwflags},
+	[PROBES_T32_MEDIA] = {.handler = t32_emulate_rd8rn16rm0_rwflags},
+	[PROBES_T32_REVERSE] = {.handler = t32_emulate_rd8rn16_noflags},
+	[PROBES_T32_MUL_ADD] = {.handler = t32_emulate_rd8rn16rm0_rwflags},
+	[PROBES_T32_MUL_ADD2] = {.handler = t32_emulate_rd8rn16rm0ra12_noflags},
+	[PROBES_T32_MUL_ADD_LONG] = {
+		.handler = t32_emulate_rdlo12rdhi8rn16rm0_noflags},
 };
-#ifdef CONFIG_ARM_KPROBES_TEST_MODULE
-EXPORT_SYMBOL_GPL(kprobe_decode_thumb16_table);
-#endif
-
-static unsigned long __kprobes thumb_check_cc(unsigned long cpsr)
-{
-	if (unlikely(in_it_block(cpsr)))
-		return kprobe_condition_checks[current_cond(cpsr)](cpsr);
-	return true;
-}
-
-static void __kprobes thumb16_singlestep(struct kprobe *p, struct pt_regs *regs)
-{
-	regs->ARM_pc += 2;
-	p->ainsn.insn_handler(p, regs);
-	regs->ARM_cpsr = it_advance(regs->ARM_cpsr);
-}
-
-static void __kprobes thumb32_singlestep(struct kprobe *p, struct pt_regs *regs)
-{
-	regs->ARM_pc += 4;
-	p->ainsn.insn_handler(p, regs);
-	regs->ARM_cpsr = it_advance(regs->ARM_cpsr);
-}
-
-enum kprobe_insn __kprobes
-thumb16_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
-{
-	asi->insn_singlestep = thumb16_singlestep;
-	asi->insn_check_cc = thumb_check_cc;
-	return kprobe_decode_insn(insn, asi, kprobe_decode_thumb16_table, true);
-}
-
-enum kprobe_insn __kprobes
-thumb32_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
-{
-	asi->insn_singlestep = thumb32_singlestep;
-	asi->insn_check_cc = thumb_check_cc;
-	return kprobe_decode_insn(insn, asi, kprobe_decode_thumb32_table, true);
-}
diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c
index a7b621ece23d..8795f9f819d5 100644
--- a/arch/arm/kernel/kprobes.c
+++ b/arch/arm/kernel/kprobes.c
@@ -27,8 +27,12 @@
 #include <linux/stringify.h>
 #include <asm/traps.h>
 #include <asm/cacheflush.h>
+#include <linux/percpu.h>
+#include <linux/bug.h>
 
 #include "kprobes.h"
+#include "probes-arm.h"
+#include "probes-thumb.h"
 #include "patch.h"
 
 #define MIN_STACK_SIZE(addr) 				\
@@ -54,6 +58,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 	unsigned long addr = (unsigned long)p->addr;
 	bool thumb;
 	kprobe_decode_insn_t *decode_insn;
+	const union decode_action *actions;
 	int is;
 
 	if (in_exception_text(addr))
@@ -66,21 +71,25 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 	if (is_wide_instruction(insn)) {
 		insn <<= 16;
 		insn |= ((u16 *)addr)[1];
-		decode_insn = thumb32_kprobe_decode_insn;
-	} else
-		decode_insn = thumb16_kprobe_decode_insn;
+		decode_insn = thumb32_probes_decode_insn;
+		actions = kprobes_t32_actions;
+	} else {
+		decode_insn = thumb16_probes_decode_insn;
+		actions = kprobes_t16_actions;
+	}
 #else /* !CONFIG_THUMB2_KERNEL */
 	thumb = false;
 	if (addr & 0x3)
 		return -EINVAL;
 	insn = *p->addr;
-	decode_insn = arm_kprobe_decode_insn;
+	decode_insn = arm_probes_decode_insn;
+	actions = kprobes_arm_actions;
 #endif
 
 	p->opcode = insn;
 	p->ainsn.insn = tmp_insn;
 
-	switch ((*decode_insn)(insn, &p->ainsn)) {
+	switch ((*decode_insn)(insn, &p->ainsn, true, actions)) {
 	case INSN_REJECTED:	/* not supported */
 		return -EINVAL;
 
@@ -92,7 +101,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 			p->ainsn.insn[is] = tmp_insn[is];
 		flush_insns(p->ainsn.insn,
 				sizeof(p->ainsn.insn[0]) * MAX_INSN_SIZE);
-		p->ainsn.insn_fn = (kprobe_insn_fn_t *)
+		p->ainsn.insn_fn = (probes_insn_fn_t *)
 					((uintptr_t)p->ainsn.insn | thumb);
 		break;
 
@@ -197,7 +206,7 @@ singlestep_skip(struct kprobe *p, struct pt_regs *regs)
 static inline void __kprobes
 singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
 {
-	p->ainsn.insn_singlestep(p, regs);
+	p->ainsn.insn_singlestep(p->opcode, &p->ainsn, regs);
 }
 
 /*
@@ -607,7 +616,7 @@ static struct undef_hook kprobes_arm_break_hook = {
 
 int __init arch_init_kprobes()
 {
-	arm_kprobe_decode_init();
+	arm_probes_decode_init();
 #ifdef CONFIG_THUMB2_KERNEL
 	register_undef_hook(&kprobes_thumb16_break_hook);
 	register_undef_hook(&kprobes_thumb32_break_hook);
diff --git a/arch/arm/kernel/kprobes.h b/arch/arm/kernel/kprobes.h
index 38945f78f9f1..9a2712ecefc3 100644
--- a/arch/arm/kernel/kprobes.h
+++ b/arch/arm/kernel/kprobes.h
@@ -19,6 +19,8 @@
 #ifndef _ARM_KERNEL_KPROBES_H
 #define _ARM_KERNEL_KPROBES_H
 
+#include "probes.h"
+
 /*
  * These undefined instructions must be unique and
  * reserved solely for kprobes' use.
@@ -27,402 +29,24 @@
 #define KPROBE_THUMB16_BREAKPOINT_INSTRUCTION	0xde18
 #define KPROBE_THUMB32_BREAKPOINT_INSTRUCTION	0xf7f0a018
 
+enum probes_insn __kprobes
+kprobe_decode_ldmstm(kprobe_opcode_t insn, struct arch_probes_insn *asi,
+		const struct decode_header *h);
 
-enum kprobe_insn {
-	INSN_REJECTED,
-	INSN_GOOD,
-	INSN_GOOD_NO_SLOT
-};
-
-typedef enum kprobe_insn (kprobe_decode_insn_t)(kprobe_opcode_t,
-						struct arch_specific_insn *);
+typedef enum probes_insn (kprobe_decode_insn_t)(probes_opcode_t,
+						struct arch_probes_insn *,
+						bool,
+						const union decode_action *);
 
 #ifdef CONFIG_THUMB2_KERNEL
 
-enum kprobe_insn thumb16_kprobe_decode_insn(kprobe_opcode_t,
-						struct arch_specific_insn *);
-enum kprobe_insn thumb32_kprobe_decode_insn(kprobe_opcode_t,
-						struct arch_specific_insn *);
+extern const union decode_action kprobes_t32_actions[];
+extern const union decode_action kprobes_t16_actions[];
 
 #else /* !CONFIG_THUMB2_KERNEL */
 
-enum kprobe_insn arm_kprobe_decode_insn(kprobe_opcode_t,
-					struct arch_specific_insn *);
-#endif
-
-void __init arm_kprobe_decode_init(void);
-
-extern kprobe_check_cc * const kprobe_condition_checks[16];
-
-
-#if __LINUX_ARM_ARCH__ >= 7
-
-/* str_pc_offset is architecturally defined from ARMv7 onwards */
-#define str_pc_offset 8
-#define find_str_pc_offset()
-
-#else /* __LINUX_ARM_ARCH__ < 7 */
-
-/* We need a run-time check to determine str_pc_offset */
-extern int str_pc_offset;
-void __init find_str_pc_offset(void);
+extern const union decode_action kprobes_arm_actions[];
 
 #endif
 
-
-/*
- * Update ITSTATE after normal execution of an IT block instruction.
- *
- * The 8 IT state bits are split into two parts in CPSR:
- *	ITSTATE<1:0> are in CPSR<26:25>
- *	ITSTATE<7:2> are in CPSR<15:10>
- */
-static inline unsigned long it_advance(unsigned long cpsr)
-	{
-	if ((cpsr & 0x06000400) == 0) {
-		/* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */
-		cpsr &= ~PSR_IT_MASK;
-	} else {
-		/* We need to shift left ITSTATE<4:0> */
-		const unsigned long mask = 0x06001c00;  /* Mask ITSTATE<4:0> */
-		unsigned long it = cpsr & mask;
-		it <<= 1;
-		it |= it >> (27 - 10);  /* Carry ITSTATE<2> to correct place */
-		it &= mask;
-		cpsr &= ~mask;
-		cpsr |= it;
-	}
-	return cpsr;
-}
-
-static inline void __kprobes bx_write_pc(long pcv, struct pt_regs *regs)
-{
-	long cpsr = regs->ARM_cpsr;
-	if (pcv & 0x1) {
-		cpsr |= PSR_T_BIT;
-		pcv &= ~0x1;
-	} else {
-		cpsr &= ~PSR_T_BIT;
-		pcv &= ~0x2;	/* Avoid UNPREDICTABLE address allignment */
-	}
-	regs->ARM_cpsr = cpsr;
-	regs->ARM_pc = pcv;
-}
-
-
-#if __LINUX_ARM_ARCH__ >= 6
-
-/* Kernels built for >= ARMv6 should never run on <= ARMv5 hardware, so... */
-#define load_write_pc_interworks true
-#define test_load_write_pc_interworking()
-
-#else /* __LINUX_ARM_ARCH__ < 6 */
-
-/* We need run-time testing to determine if load_write_pc() should interwork. */
-extern bool load_write_pc_interworks;
-void __init test_load_write_pc_interworking(void);
-
-#endif
-
-static inline void __kprobes load_write_pc(long pcv, struct pt_regs *regs)
-{
-	if (load_write_pc_interworks)
-		bx_write_pc(pcv, regs);
-	else
-		regs->ARM_pc = pcv;
-}
-
-
-#if __LINUX_ARM_ARCH__ >= 7
-
-#define alu_write_pc_interworks true
-#define test_alu_write_pc_interworking()
-
-#elif __LINUX_ARM_ARCH__ <= 5
-
-/* Kernels built for <= ARMv5 should never run on >= ARMv6 hardware, so... */
-#define alu_write_pc_interworks false
-#define test_alu_write_pc_interworking()
-
-#else /* __LINUX_ARM_ARCH__ == 6 */
-
-/* We could be an ARMv6 binary on ARMv7 hardware so we need a run-time check. */
-extern bool alu_write_pc_interworks;
-void __init test_alu_write_pc_interworking(void);
-
-#endif /* __LINUX_ARM_ARCH__ == 6 */
-
-static inline void __kprobes alu_write_pc(long pcv, struct pt_regs *regs)
-{
-	if (alu_write_pc_interworks)
-		bx_write_pc(pcv, regs);
-	else
-		regs->ARM_pc = pcv;
-}
-
-
-void __kprobes kprobe_simulate_nop(struct kprobe *p, struct pt_regs *regs);
-void __kprobes kprobe_emulate_none(struct kprobe *p, struct pt_regs *regs);
-
-enum kprobe_insn __kprobes
-kprobe_decode_ldmstm(kprobe_opcode_t insn, struct arch_specific_insn *asi);
-
-/*
- * Test if load/store instructions writeback the address register.
- * if P (bit 24) == 0 or W (bit 21) == 1
- */
-#define is_writeback(insn) ((insn ^ 0x01000000) & 0x01200000)
-
-/*
- * The following definitions and macros are used to build instruction
- * decoding tables for use by kprobe_decode_insn.
- *
- * These tables are a concatenation of entries each of which consist of one of
- * the decode_* structs. All of the fields in every type of decode structure
- * are of the union type decode_item, therefore the entire decode table can be
- * viewed as an array of these and declared like:
- *
- *	static const union decode_item table_name[] = {};
- *
- * In order to construct each entry in the table, macros are used to
- * initialise a number of sequential decode_item values in a layout which
- * matches the relevant struct. E.g. DECODE_SIMULATE initialise a struct
- * decode_simulate by initialising four decode_item objects like this...
- *
- *	{.bits = _type},
- *	{.bits = _mask},
- *	{.bits = _value},
- *	{.handler = _handler},
- *
- * Initialising a specified member of the union means that the compiler
- * will produce a warning if the argument is of an incorrect type.
- *
- * Below is a list of each of the macros used to initialise entries and a
- * description of the action performed when that entry is matched to an
- * instruction. A match is found when (instruction & mask) == value.
- *
- * DECODE_TABLE(mask, value, table)
- *	Instruction decoding jumps to parsing the new sub-table 'table'.
- *
- * DECODE_CUSTOM(mask, value, decoder)
- *	The custom function 'decoder' is called to the complete decoding
- *	of an instruction.
- *
- * DECODE_SIMULATE(mask, value, handler)
- *	Set the probes instruction handler to 'handler', this will be used
- *	to simulate the instruction when the probe is hit. Decoding returns
- *	with INSN_GOOD_NO_SLOT.
- *
- * DECODE_EMULATE(mask, value, handler)
- *	Set the probes instruction handler to 'handler', this will be used
- *	to emulate the instruction when the probe is hit. The modified
- *	instruction (see below) is placed in the probes instruction slot so it
- *	may be called by the emulation code. Decoding returns with INSN_GOOD.
- *
- * DECODE_REJECT(mask, value)
- *	Instruction decoding fails with INSN_REJECTED
- *
- * DECODE_OR(mask, value)
- *	This allows the mask/value test of multiple table entries to be
- *	logically ORed. Once an 'or' entry is matched the decoding action to
- *	be performed is that of the next entry which isn't an 'or'. E.g.
- *
- *		DECODE_OR	(mask1, value1)
- *		DECODE_OR	(mask2, value2)
- *		DECODE_SIMULATE	(mask3, value3, simulation_handler)
- *
- *	This means that if any of the three mask/value pairs match the
- *	instruction being decoded, then 'simulation_handler' will be used
- *	for it.
- *
- * Both the SIMULATE and EMULATE macros have a second form which take an
- * additional 'regs' argument.
- *
- *	DECODE_SIMULATEX(mask, value, handler, regs)
- *	DECODE_EMULATEX	(mask, value, handler, regs)
- *
- * These are used to specify what kind of CPU register is encoded in each of the
- * least significant 5 nibbles of the instruction being decoded. The regs value
- * is specified using the REGS macro, this takes any of the REG_TYPE_* values
- * from enum decode_reg_type as arguments; only the '*' part of the name is
- * given. E.g.
- *
- *	REGS(0, ANY, NOPC, 0, ANY)
- *
- * This indicates an instruction is encoded like:
- *
- *	bits 19..16	ignore
- *	bits 15..12	any register allowed here
- *	bits 11.. 8	any register except PC allowed here
- *	bits  7.. 4	ignore
- *	bits  3.. 0	any register allowed here
- *
- * This register specification is checked after a decode table entry is found to
- * match an instruction (through the mask/value test). Any invalid register then
- * found in the instruction will cause decoding to fail with INSN_REJECTED. In
- * the above example this would happen if bits 11..8 of the instruction were
- * 1111, indicating R15 or PC.
- *
- * As well as checking for legal combinations of registers, this data is also
- * used to modify the registers encoded in the instructions so that an
- * emulation routines can use it. (See decode_regs() and INSN_NEW_BITS.)
- *
- * Here is a real example which matches ARM instructions of the form
- * "AND <Rd>,<Rn>,<Rm>,<shift> <Rs>"
- *
- *	DECODE_EMULATEX	(0x0e000090, 0x00000010, emulate_rd12rn16rm0rs8_rwflags,
- *						 REGS(ANY, ANY, NOPC, 0, ANY)),
- *						      ^    ^    ^        ^
- *						      Rn   Rd   Rs       Rm
- *
- * Decoding the instruction "AND R4, R5, R6, ASL R15" will be rejected because
- * Rs == R15
- *
- * Decoding the instruction "AND R4, R5, R6, ASL R7" will be accepted and the
- * instruction will be modified to "AND R0, R2, R3, ASL R1" and then placed into
- * the kprobes instruction slot. This can then be called later by the handler
- * function emulate_rd12rn16rm0rs8_rwflags in order to simulate the instruction.
- */
-
-enum decode_type {
-	DECODE_TYPE_END,
-	DECODE_TYPE_TABLE,
-	DECODE_TYPE_CUSTOM,
-	DECODE_TYPE_SIMULATE,
-	DECODE_TYPE_EMULATE,
-	DECODE_TYPE_OR,
-	DECODE_TYPE_REJECT,
-	NUM_DECODE_TYPES /* Must be last enum */
-};
-
-#define DECODE_TYPE_BITS	4
-#define DECODE_TYPE_MASK	((1 << DECODE_TYPE_BITS) - 1)
-
-enum decode_reg_type {
-	REG_TYPE_NONE = 0, /* Not a register, ignore */
-	REG_TYPE_ANY,	   /* Any register allowed */
-	REG_TYPE_SAMEAS16, /* Register should be same as that at bits 19..16 */
-	REG_TYPE_SP,	   /* Register must be SP */
-	REG_TYPE_PC,	   /* Register must be PC */
-	REG_TYPE_NOSP,	   /* Register must not be SP */
-	REG_TYPE_NOSPPC,   /* Register must not be SP or PC */
-	REG_TYPE_NOPC,	   /* Register must not be PC */
-	REG_TYPE_NOPCWB,   /* No PC if load/store write-back flag also set */
-
-	/* The following types are used when the encoding for PC indicates
-	 * another instruction form. This distiction only matters for test
-	 * case coverage checks.
-	 */
-	REG_TYPE_NOPCX,	   /* Register must not be PC */
-	REG_TYPE_NOSPPCX,  /* Register must not be SP or PC */
-
-	/* Alias to allow '0' arg to be used in REGS macro. */
-	REG_TYPE_0 = REG_TYPE_NONE
-};
-
-#define REGS(r16, r12, r8, r4, r0)	\
-	((REG_TYPE_##r16) << 16) +	\
-	((REG_TYPE_##r12) << 12) +	\
-	((REG_TYPE_##r8) << 8) +	\
-	((REG_TYPE_##r4) << 4) +	\
-	(REG_TYPE_##r0)
-
-union decode_item {
-	u32			bits;
-	const union decode_item	*table;
-	kprobe_insn_handler_t	*handler;
-	kprobe_decode_insn_t	*decoder;
-};
-
-
-#define DECODE_END			\
-	{.bits = DECODE_TYPE_END}
-
-
-struct decode_header {
-	union decode_item	type_regs;
-	union decode_item	mask;
-	union decode_item	value;
-};
-
-#define DECODE_HEADER(_type, _mask, _value, _regs)		\
-	{.bits = (_type) | ((_regs) << DECODE_TYPE_BITS)},	\
-	{.bits = (_mask)},					\
-	{.bits = (_value)}
-
-
-struct decode_table {
-	struct decode_header	header;
-	union decode_item	table;
-};
-
-#define DECODE_TABLE(_mask, _value, _table)			\
-	DECODE_HEADER(DECODE_TYPE_TABLE, _mask, _value, 0),	\
-	{.table = (_table)}
-
-
-struct decode_custom {
-	struct decode_header	header;
-	union decode_item	decoder;
-};
-
-#define DECODE_CUSTOM(_mask, _value, _decoder)			\
-	DECODE_HEADER(DECODE_TYPE_CUSTOM, _mask, _value, 0),	\
-	{.decoder = (_decoder)}
-
-
-struct decode_simulate {
-	struct decode_header	header;
-	union decode_item	handler;
-};
-
-#define DECODE_SIMULATEX(_mask, _value, _handler, _regs)		\
-	DECODE_HEADER(DECODE_TYPE_SIMULATE, _mask, _value, _regs),	\
-	{.handler = (_handler)}
-
-#define DECODE_SIMULATE(_mask, _value, _handler)	\
-	DECODE_SIMULATEX(_mask, _value, _handler, 0)
-
-
-struct decode_emulate {
-	struct decode_header	header;
-	union decode_item	handler;
-};
-
-#define DECODE_EMULATEX(_mask, _value, _handler, _regs)			\
-	DECODE_HEADER(DECODE_TYPE_EMULATE, _mask, _value, _regs),	\
-	{.handler = (_handler)}
-
-#define DECODE_EMULATE(_mask, _value, _handler)		\
-	DECODE_EMULATEX(_mask, _value, _handler, 0)
-
-
-struct decode_or {
-	struct decode_header	header;
-};
-
-#define DECODE_OR(_mask, _value)				\
-	DECODE_HEADER(DECODE_TYPE_OR, _mask, _value, 0)
-
-
-struct decode_reject {
-	struct decode_header	header;
-};
-
-#define DECODE_REJECT(_mask, _value)				\
-	DECODE_HEADER(DECODE_TYPE_REJECT, _mask, _value, 0)
-
-
-#ifdef CONFIG_THUMB2_KERNEL
-extern const union decode_item kprobe_decode_thumb16_table[];
-extern const union decode_item kprobe_decode_thumb32_table[];
-#else
-extern const union decode_item kprobe_decode_arm_table[];
-#endif
-
-
-int kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi,
-			const union decode_item *table, bool thumb16);
-
-
 #endif /* _ARM_KERNEL_KPROBES_H */
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 789d846a9184..a6bc431cde70 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -16,6 +16,8 @@
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/uaccess.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
 
 #include <asm/irq_regs.h>
 #include <asm/pmu.h>
@@ -205,6 +207,8 @@ armpmu_del(struct perf_event *event, int flags)
 	armpmu_stop(event, PERF_EF_UPDATE);
 	hw_events->events[idx] = NULL;
 	clear_bit(idx, hw_events->used_mask);
+	if (armpmu->clear_event_idx)
+		armpmu->clear_event_idx(hw_events, event);
 
 	perf_event_update_userpage(event);
 }
@@ -295,14 +299,27 @@ validate_group(struct perf_event *event)
 
 static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
 {
-	struct arm_pmu *armpmu = (struct arm_pmu *) dev;
-	struct platform_device *plat_device = armpmu->plat_device;
-	struct arm_pmu_platdata *plat = dev_get_platdata(&plat_device->dev);
+	struct arm_pmu *armpmu;
+	struct platform_device *plat_device;
+	struct arm_pmu_platdata *plat;
+	int ret;
+	u64 start_clock, finish_clock;
 
+	if (irq_is_percpu(irq))
+		dev = *(void **)dev;
+	armpmu = dev;
+	plat_device = armpmu->plat_device;
+	plat = dev_get_platdata(&plat_device->dev);
+
+	start_clock = sched_clock();
 	if (plat && plat->handle_irq)
-		return plat->handle_irq(irq, dev, armpmu->handle_irq);
+		ret = plat->handle_irq(irq, dev, armpmu->handle_irq);
 	else
-		return armpmu->handle_irq(irq, dev);
+		ret = armpmu->handle_irq(irq, dev);
+	finish_clock = sched_clock();
+
+	perf_sample_event_took(finish_clock - start_clock);
+	return ret;
 }
 
 static void
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 20d553c9f5e2..51798d7854ac 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -25,6 +25,8 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
 
 #include <asm/cputype.h>
 #include <asm/irq_regs.h>
@@ -33,6 +35,7 @@
 /* Set at runtime when we know what CPU type we are. */
 static struct arm_pmu *cpu_pmu;
 
+static DEFINE_PER_CPU(struct arm_pmu *, percpu_pmu);
 static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
 static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
 static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
@@ -71,6 +74,26 @@ static struct pmu_hw_events *cpu_pmu_get_cpu_events(void)
 	return this_cpu_ptr(&cpu_hw_events);
 }
 
+static void cpu_pmu_enable_percpu_irq(void *data)
+{
+	struct arm_pmu *cpu_pmu = data;
+	struct platform_device *pmu_device = cpu_pmu->plat_device;
+	int irq = platform_get_irq(pmu_device, 0);
+
+	enable_percpu_irq(irq, IRQ_TYPE_NONE);
+	cpumask_set_cpu(smp_processor_id(), &cpu_pmu->active_irqs);
+}
+
+static void cpu_pmu_disable_percpu_irq(void *data)
+{
+	struct arm_pmu *cpu_pmu = data;
+	struct platform_device *pmu_device = cpu_pmu->plat_device;
+	int irq = platform_get_irq(pmu_device, 0);
+
+	cpumask_clear_cpu(smp_processor_id(), &cpu_pmu->active_irqs);
+	disable_percpu_irq(irq);
+}
+
 static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
 {
 	int i, irq, irqs;
@@ -78,12 +101,18 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
 
 	irqs = min(pmu_device->num_resources, num_possible_cpus());
 
-	for (i = 0; i < irqs; ++i) {
-		if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs))
-			continue;
-		irq = platform_get_irq(pmu_device, i);
-		if (irq >= 0)
-			free_irq(irq, cpu_pmu);
+	irq = platform_get_irq(pmu_device, 0);
+	if (irq >= 0 && irq_is_percpu(irq)) {
+		on_each_cpu(cpu_pmu_disable_percpu_irq, cpu_pmu, 1);
+		free_percpu_irq(irq, &percpu_pmu);
+	} else {
+		for (i = 0; i < irqs; ++i) {
+			if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs))
+				continue;
+			irq = platform_get_irq(pmu_device, i);
+			if (irq >= 0)
+				free_irq(irq, cpu_pmu);
+		}
 	}
 }
 
@@ -101,33 +130,44 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 		return -ENODEV;
 	}
 
-	for (i = 0; i < irqs; ++i) {
-		err = 0;
-		irq = platform_get_irq(pmu_device, i);
-		if (irq < 0)
-			continue;
-
-		/*
-		 * If we have a single PMU interrupt that we can't shift,
-		 * assume that we're running on a uniprocessor machine and
-		 * continue. Otherwise, continue without this interrupt.
-		 */
-		if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
-			pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
-				    irq, i);
-			continue;
-		}
-
-		err = request_irq(irq, handler,
-				  IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
-				  cpu_pmu);
+	irq = platform_get_irq(pmu_device, 0);
+	if (irq >= 0 && irq_is_percpu(irq)) {
+		err = request_percpu_irq(irq, handler, "arm-pmu", &percpu_pmu);
 		if (err) {
 			pr_err("unable to request IRQ%d for ARM PMU counters\n",
 				irq);
 			return err;
 		}
-
-		cpumask_set_cpu(i, &cpu_pmu->active_irqs);
+		on_each_cpu(cpu_pmu_enable_percpu_irq, cpu_pmu, 1);
+	} else {
+		for (i = 0; i < irqs; ++i) {
+			err = 0;
+			irq = platform_get_irq(pmu_device, i);
+			if (irq < 0)
+				continue;
+
+			/*
+			 * If we have a single PMU interrupt that we can't shift,
+			 * assume that we're running on a uniprocessor machine and
+			 * continue. Otherwise, continue without this interrupt.
+			 */
+			if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
+				pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
+					    irq, i);
+				continue;
+			}
+
+			err = request_irq(irq, handler,
+					  IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
+					  cpu_pmu);
+			if (err) {
+				pr_err("unable to request IRQ%d for ARM PMU counters\n",
+					irq);
+				return err;
+			}
+
+			cpumask_set_cpu(i, &cpu_pmu->active_irqs);
+		}
 	}
 
 	return 0;
@@ -141,6 +181,7 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
 		events->events = per_cpu(hw_events, cpu);
 		events->used_mask = per_cpu(used_mask, cpu);
 		raw_spin_lock_init(&events->pmu_lock);
+		per_cpu(percpu_pmu, cpu) = cpu_pmu;
 	}
 
 	cpu_pmu->get_hw_events	= cpu_pmu_get_cpu_events;
@@ -181,6 +222,7 @@ static struct notifier_block cpu_pmu_hotplug_notifier = {
  */
 static struct of_device_id cpu_pmu_of_device_ids[] = {
 	{.compatible = "arm,cortex-a15-pmu",	.data = armv7_a15_pmu_init},
+	{.compatible = "arm,cortex-a12-pmu",	.data = armv7_a12_pmu_init},
 	{.compatible = "arm,cortex-a9-pmu",	.data = armv7_a9_pmu_init},
 	{.compatible = "arm,cortex-a8-pmu",	.data = armv7_a8_pmu_init},
 	{.compatible = "arm,cortex-a7-pmu",	.data = armv7_a7_pmu_init},
@@ -188,6 +230,7 @@ static struct of_device_id cpu_pmu_of_device_ids[] = {
 	{.compatible = "arm,arm11mpcore-pmu",	.data = armv6mpcore_pmu_init},
 	{.compatible = "arm,arm1176-pmu",	.data = armv6pmu_init},
 	{.compatible = "arm,arm1136-pmu",	.data = armv6pmu_init},
+	{.compatible = "qcom,krait-pmu",	.data = krait_pmu_init},
 	{},
 };
 
@@ -225,15 +268,6 @@ static int probe_current_pmu(struct arm_pmu *pmu)
 		case ARM_CPU_PART_CORTEX_A9:
 			ret = armv7_a9_pmu_init(pmu);
 			break;
-		case ARM_CPU_PART_CORTEX_A5:
-			ret = armv7_a5_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A15:
-			ret = armv7_a15_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A7:
-			ret = armv7_a7_pmu_init(pmu);
-			break;
 		}
 	/* Intel CPUs [xscale]. */
 	} else if (implementor == ARM_CPU_IMP_INTEL) {
@@ -270,6 +304,9 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
+	cpu_pmu = pmu;
+	cpu_pmu->plat_device = pdev;
+
 	if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) {
 		init_fn = of_id->data;
 		ret = init_fn(pmu);
@@ -282,8 +319,6 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 		goto out_free;
 	}
 
-	cpu_pmu = pmu;
-	cpu_pmu->plat_device = pdev;
 	cpu_pmu_init(cpu_pmu);
 	ret = armpmu_register(cpu_pmu, PERF_TYPE_RAW);
 
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 039cffb053a7..f4ef3981ed02 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -18,6 +18,10 @@
 
 #ifdef CONFIG_CPU_V7
 
+#include <asm/cp15.h>
+#include <asm/vfp.h>
+#include "../vfp/vfpinstr.h"
+
 /*
  * Common ARMv7 event types
  *
@@ -109,6 +113,33 @@ enum armv7_a15_perf_types {
 	ARMV7_A15_PERFCTR_PC_WRITE_SPEC			= 0x76,
 };
 
+/* ARMv7 Cortex-A12 specific event types */
+enum armv7_a12_perf_types {
+	ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ		= 0x40,
+	ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE	= 0x41,
+
+	ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ		= 0x50,
+	ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE		= 0x51,
+
+	ARMV7_A12_PERFCTR_PC_WRITE_SPEC			= 0x76,
+
+	ARMV7_A12_PERFCTR_PF_TLB_REFILL			= 0xe7,
+};
+
+/* ARMv7 Krait specific event types */
+enum krait_perf_types {
+	KRAIT_PMRESR0_GROUP0				= 0xcc,
+	KRAIT_PMRESR1_GROUP0				= 0xd0,
+	KRAIT_PMRESR2_GROUP0				= 0xd4,
+	KRAIT_VPMRESR0_GROUP0				= 0xd8,
+
+	KRAIT_PERFCTR_L1_ICACHE_ACCESS			= 0x10011,
+	KRAIT_PERFCTR_L1_ICACHE_MISS			= 0x10010,
+
+	KRAIT_PERFCTR_L1_ITLB_ACCESS			= 0x12222,
+	KRAIT_PERFCTR_L1_DTLB_ACCESS			= 0x12210,
+};
+
 /*
  * Cortex-A8 HW events mapping
  *
@@ -732,6 +763,262 @@ static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 };
 
 /*
+ * Cortex-A12 HW events mapping
+ */
+static const unsigned armv7_a12_perf_map[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV7_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV7_PERFCTR_INSTR_EXECUTED,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV7_PERFCTR_L1_DCACHE_REFILL,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV7_A12_PERFCTR_PC_WRITE_SPEC,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV7_PERFCTR_BUS_CYCLES,
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= HW_OP_UNSUPPORTED,
+};
+
+static const unsigned armv7_a12_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					[PERF_COUNT_HW_CACHE_OP_MAX]
+					[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_DCACHE_REFILL,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_DCACHE_REFILL,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(L1I)] = {
+		/*
+		 * Not all performance counters differentiate between read
+		 * and write accesses/misses so we're not always strictly
+		 * correct, but it's the best we can do. Writes and reads get
+		 * combined in these cases.
+		 */
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_ICACHE_REFILL,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_L2_CACHE_REFILL,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_L2_CACHE_REFILL,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_A12_PERFCTR_PF_TLB_REFILL,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_REFILL,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_REFILL,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_BRANCH_PRED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_BRANCH_PRED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(NODE)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+};
+
+/*
+ * Krait HW events mapping
+ */
+static const unsigned krait_perf_map[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV7_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV7_PERFCTR_INSTR_EXECUTED,
+	[PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_CACHE_MISSES]	    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = ARMV7_PERFCTR_CLOCK_CYCLES,
+};
+
+static const unsigned krait_perf_map_no_branch[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV7_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV7_PERFCTR_INSTR_EXECUTED,
+	[PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_CACHE_MISSES]	    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = ARMV7_PERFCTR_CLOCK_CYCLES,
+};
+
+static const unsigned krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					  [PERF_COUNT_HW_CACHE_OP_MAX]
+					  [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(L1D)] = {
+		/*
+		 * The performance counters don't differentiate between read
+		 * and write accesses/misses so this isn't strictly correct,
+		 * but it's the best we can do. Writes and reads get
+		 * combined.
+		 */
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_DCACHE_REFILL,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_DCACHE_REFILL,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= KRAIT_PERFCTR_L1_ICACHE_ACCESS,
+			[C(RESULT_MISS)]	= KRAIT_PERFCTR_L1_ICACHE_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= KRAIT_PERFCTR_L1_DTLB_ACCESS,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= KRAIT_PERFCTR_L1_DTLB_ACCESS,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= KRAIT_PERFCTR_L1_ITLB_ACCESS,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= KRAIT_PERFCTR_L1_ITLB_ACCESS,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_BRANCH_PRED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_BRANCH_PRED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(NODE)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+};
+
+/*
  * Perf Events' indices
  */
 #define	ARMV7_IDX_CYCLE_COUNTER	0
@@ -1212,6 +1499,24 @@ static int armv7_a7_map_event(struct perf_event *event)
 				&armv7_a7_perf_cache_map, 0xFF);
 }
 
+static int armv7_a12_map_event(struct perf_event *event)
+{
+	return armpmu_map_event(event, &armv7_a12_perf_map,
+				&armv7_a12_perf_cache_map, 0xFF);
+}
+
+static int krait_map_event(struct perf_event *event)
+{
+	return armpmu_map_event(event, &krait_perf_map,
+				&krait_perf_cache_map, 0xFFFFF);
+}
+
+static int krait_map_event_no_branch(struct perf_event *event)
+{
+	return armpmu_map_event(event, &krait_perf_map_no_branch,
+				&krait_perf_cache_map, 0xFFFFF);
+}
+
 static void armv7pmu_init(struct arm_pmu *cpu_pmu)
 {
 	cpu_pmu->handle_irq	= armv7pmu_handle_irq;
@@ -1283,6 +1588,408 @@ static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
 	return 0;
 }
+
+static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "ARMv7 Cortex-A12";
+	cpu_pmu->map_event	= armv7_a12_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
+	return 0;
+}
+
+/*
+ * Krait Performance Monitor Region Event Selection Register (PMRESRn)
+ *
+ *            31   30     24     16     8      0
+ *            +--------------------------------+
+ *  PMRESR0   | EN |  CC  |  CC  |  CC  |  CC  |   N = 1, R = 0
+ *            +--------------------------------+
+ *  PMRESR1   | EN |  CC  |  CC  |  CC  |  CC  |   N = 1, R = 1
+ *            +--------------------------------+
+ *  PMRESR2   | EN |  CC  |  CC  |  CC  |  CC  |   N = 1, R = 2
+ *            +--------------------------------+
+ *  VPMRESR0  | EN |  CC  |  CC  |  CC  |  CC  |   N = 2, R = ?
+ *            +--------------------------------+
+ *              EN | G=3  | G=2  | G=1  | G=0
+ *
+ *  Event Encoding:
+ *
+ *      hwc->config_base = 0xNRCCG
+ *
+ *      N  = prefix, 1 for Krait CPU (PMRESRn), 2 for Venum VFP (VPMRESR)
+ *      R  = region register
+ *      CC = class of events the group G is choosing from
+ *      G  = group or particular event
+ *
+ *  Example: 0x12021 is a Krait CPU event in PMRESR2's group 1 with code 2
+ *
+ *  A region (R) corresponds to a piece of the CPU (execution unit, instruction
+ *  unit, etc.) while the event code (CC) corresponds to a particular class of
+ *  events (interrupts for example). An event code is broken down into
+ *  groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for
+ *  example).
+ */
+
+#define KRAIT_EVENT		(1 << 16)
+#define VENUM_EVENT		(2 << 16)
+#define KRAIT_EVENT_MASK	(KRAIT_EVENT | VENUM_EVENT)
+#define PMRESRn_EN		BIT(31)
+
+static u32 krait_read_pmresrn(int n)
+{
+	u32 val;
+
+	switch (n) {
+	case 0:
+		asm volatile("mrc p15, 1, %0, c9, c15, 0" : "=r" (val));
+		break;
+	case 1:
+		asm volatile("mrc p15, 1, %0, c9, c15, 1" : "=r" (val));
+		break;
+	case 2:
+		asm volatile("mrc p15, 1, %0, c9, c15, 2" : "=r" (val));
+		break;
+	default:
+		BUG(); /* Should be validated in krait_pmu_get_event_idx() */
+	}
+
+	return val;
+}
+
+static void krait_write_pmresrn(int n, u32 val)
+{
+	switch (n) {
+	case 0:
+		asm volatile("mcr p15, 1, %0, c9, c15, 0" : : "r" (val));
+		break;
+	case 1:
+		asm volatile("mcr p15, 1, %0, c9, c15, 1" : : "r" (val));
+		break;
+	case 2:
+		asm volatile("mcr p15, 1, %0, c9, c15, 2" : : "r" (val));
+		break;
+	default:
+		BUG(); /* Should be validated in krait_pmu_get_event_idx() */
+	}
+}
+
+static u32 krait_read_vpmresr0(void)
+{
+	u32 val;
+	asm volatile("mrc p10, 7, %0, c11, c0, 0" : "=r" (val));
+	return val;
+}
+
+static void krait_write_vpmresr0(u32 val)
+{
+	asm volatile("mcr p10, 7, %0, c11, c0, 0" : : "r" (val));
+}
+
+static void krait_pre_vpmresr0(u32 *venum_orig_val, u32 *fp_orig_val)
+{
+	u32 venum_new_val;
+	u32 fp_new_val;
+
+	BUG_ON(preemptible());
+	/* CPACR Enable CP10 and CP11 access */
+	*venum_orig_val = get_copro_access();
+	venum_new_val = *venum_orig_val | CPACC_SVC(10) | CPACC_SVC(11);
+	set_copro_access(venum_new_val);
+
+	/* Enable FPEXC */
+	*fp_orig_val = fmrx(FPEXC);
+	fp_new_val = *fp_orig_val | FPEXC_EN;
+	fmxr(FPEXC, fp_new_val);
+}
+
+static void krait_post_vpmresr0(u32 venum_orig_val, u32 fp_orig_val)
+{
+	BUG_ON(preemptible());
+	/* Restore FPEXC */
+	fmxr(FPEXC, fp_orig_val);
+	isb();
+	/* Restore CPACR */
+	set_copro_access(venum_orig_val);
+}
+
+static u32 krait_get_pmresrn_event(unsigned int region)
+{
+	static const u32 pmresrn_table[] = { KRAIT_PMRESR0_GROUP0,
+					     KRAIT_PMRESR1_GROUP0,
+					     KRAIT_PMRESR2_GROUP0 };
+	return pmresrn_table[region];
+}
+
+static void krait_evt_setup(int idx, u32 config_base)
+{
+	u32 val;
+	u32 mask;
+	u32 vval, fval;
+	unsigned int region;
+	unsigned int group;
+	unsigned int code;
+	unsigned int group_shift;
+	bool venum_event;
+
+	venum_event = !!(config_base & VENUM_EVENT);
+	region = (config_base >> 12) & 0xf;
+	code   = (config_base >> 4) & 0xff;
+	group  = (config_base >> 0)  & 0xf;
+
+	group_shift = group * 8;
+	mask = 0xff << group_shift;
+
+	/* Configure evtsel for the region and group */
+	if (venum_event)
+		val = KRAIT_VPMRESR0_GROUP0;
+	else
+		val = krait_get_pmresrn_event(region);
+	val += group;
+	/* Mix in mode-exclusion bits */
+	val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1);
+	armv7_pmnc_write_evtsel(idx, val);
+
+	asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
+
+	if (venum_event) {
+		krait_pre_vpmresr0(&vval, &fval);
+		val = krait_read_vpmresr0();
+		val &= ~mask;
+		val |= code << group_shift;
+		val |= PMRESRn_EN;
+		krait_write_vpmresr0(val);
+		krait_post_vpmresr0(vval, fval);
+	} else {
+		val = krait_read_pmresrn(region);
+		val &= ~mask;
+		val |= code << group_shift;
+		val |= PMRESRn_EN;
+		krait_write_pmresrn(region, val);
+	}
+}
+
+static u32 krait_clear_pmresrn_group(u32 val, int group)
+{
+	u32 mask;
+	int group_shift;
+
+	group_shift = group * 8;
+	mask = 0xff << group_shift;
+	val &= ~mask;
+
+	/* Don't clear enable bit if entire region isn't disabled */
+	if (val & ~PMRESRn_EN)
+		return val |= PMRESRn_EN;
+
+	return 0;
+}
+
+static void krait_clearpmu(u32 config_base)
+{
+	u32 val;
+	u32 vval, fval;
+	unsigned int region;
+	unsigned int group;
+	bool venum_event;
+
+	venum_event = !!(config_base & VENUM_EVENT);
+	region = (config_base >> 12) & 0xf;
+	group  = (config_base >> 0)  & 0xf;
+
+	if (venum_event) {
+		krait_pre_vpmresr0(&vval, &fval);
+		val = krait_read_vpmresr0();
+		val = krait_clear_pmresrn_group(val, group);
+		krait_write_vpmresr0(val);
+		krait_post_vpmresr0(vval, fval);
+	} else {
+		val = krait_read_pmresrn(region);
+		val = krait_clear_pmresrn_group(val, group);
+		krait_write_pmresrn(region, val);
+	}
+}
+
+static void krait_pmu_disable_event(struct perf_event *event)
+{
+	unsigned long flags;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+
+	/* Disable counter and interrupt */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Disable counter */
+	armv7_pmnc_disable_counter(idx);
+
+	/*
+	 * Clear pmresr code (if destined for PMNx counters)
+	 */
+	if (hwc->config_base & KRAIT_EVENT_MASK)
+		krait_clearpmu(hwc->config_base);
+
+	/* Disable interrupt for this counter */
+	armv7_pmnc_disable_intens(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void krait_pmu_enable_event(struct perf_event *event)
+{
+	unsigned long flags;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+
+	/*
+	 * Enable counter and interrupt, and set the counter to count
+	 * the event that we're interested in.
+	 */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Disable counter */
+	armv7_pmnc_disable_counter(idx);
+
+	/*
+	 * Set event (if destined for PMNx counters)
+	 * We set the event for the cycle counter because we
+	 * have the ability to perform event filtering.
+	 */
+	if (hwc->config_base & KRAIT_EVENT_MASK)
+		krait_evt_setup(idx, hwc->config_base);
+	else
+		armv7_pmnc_write_evtsel(idx, hwc->config_base);
+
+	/* Enable interrupt for this counter */
+	armv7_pmnc_enable_intens(idx);
+
+	/* Enable counter */
+	armv7_pmnc_enable_counter(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void krait_pmu_reset(void *info)
+{
+	u32 vval, fval;
+
+	armv7pmu_reset(info);
+
+	/* Clear all pmresrs */
+	krait_write_pmresrn(0, 0);
+	krait_write_pmresrn(1, 0);
+	krait_write_pmresrn(2, 0);
+
+	krait_pre_vpmresr0(&vval, &fval);
+	krait_write_vpmresr0(0);
+	krait_post_vpmresr0(vval, fval);
+}
+
+static int krait_event_to_bit(struct perf_event *event, unsigned int region,
+			      unsigned int group)
+{
+	int bit;
+	struct hw_perf_event *hwc = &event->hw;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+
+	if (hwc->config_base & VENUM_EVENT)
+		bit = KRAIT_VPMRESR0_GROUP0;
+	else
+		bit = krait_get_pmresrn_event(region);
+	bit -= krait_get_pmresrn_event(0);
+	bit += group;
+	/*
+	 * Lower bits are reserved for use by the counters (see
+	 * armv7pmu_get_event_idx() for more info)
+	 */
+	bit += ARMV7_IDX_COUNTER_LAST(cpu_pmu) + 1;
+
+	return bit;
+}
+
+/*
+ * We check for column exclusion constraints here.
+ * Two events cant use the same group within a pmresr register.
+ */
+static int krait_pmu_get_event_idx(struct pmu_hw_events *cpuc,
+				   struct perf_event *event)
+{
+	int idx;
+	int bit;
+	unsigned int prefix;
+	unsigned int region;
+	unsigned int code;
+	unsigned int group;
+	bool krait_event;
+	struct hw_perf_event *hwc = &event->hw;
+
+	region = (hwc->config_base >> 12) & 0xf;
+	code   = (hwc->config_base >> 4) & 0xff;
+	group  = (hwc->config_base >> 0) & 0xf;
+	krait_event = !!(hwc->config_base & KRAIT_EVENT_MASK);
+
+	if (krait_event) {
+		/* Ignore invalid events */
+		if (group > 3 || region > 2)
+			return -EINVAL;
+		prefix = hwc->config_base & KRAIT_EVENT_MASK;
+		if (prefix != KRAIT_EVENT && prefix != VENUM_EVENT)
+			return -EINVAL;
+		if (prefix == VENUM_EVENT && (code & 0xe0))
+			return -EINVAL;
+
+		bit = krait_event_to_bit(event, region, group);
+		if (test_and_set_bit(bit, cpuc->used_mask))
+			return -EAGAIN;
+	}
+
+	idx = armv7pmu_get_event_idx(cpuc, event);
+	if (idx < 0 && krait_event)
+		clear_bit(bit, cpuc->used_mask);
+
+	return idx;
+}
+
+static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+				      struct perf_event *event)
+{
+	int bit;
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned int region;
+	unsigned int group;
+	bool krait_event;
+
+	region = (hwc->config_base >> 12) & 0xf;
+	group  = (hwc->config_base >> 0) & 0xf;
+	krait_event = !!(hwc->config_base & KRAIT_EVENT_MASK);
+
+	if (krait_event) {
+		bit = krait_event_to_bit(event, region, group);
+		clear_bit(bit, cpuc->used_mask);
+	}
+}
+
+static int krait_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "ARMv7 Krait";
+	/* Some early versions of Krait don't support PC write events */
+	if (of_property_read_bool(cpu_pmu->plat_device->dev.of_node,
+				  "qcom,no-pc-write"))
+		cpu_pmu->map_event = krait_map_event_no_branch;
+	else
+		cpu_pmu->map_event = krait_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
+	cpu_pmu->reset		= krait_pmu_reset;
+	cpu_pmu->enable		= krait_pmu_enable_event;
+	cpu_pmu->disable	= krait_pmu_disable_event;
+	cpu_pmu->get_event_idx	= krait_pmu_get_event_idx;
+	cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx;
+	return 0;
+}
 #else
 static inline int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
 {
@@ -1308,4 +2015,14 @@ static inline int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	return -ENODEV;
 }
+
+static inline int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return -ENODEV;
+}
+
+static inline int krait_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return -ENODEV;
+}
 #endif	/* CONFIG_CPU_V7 */
diff --git a/arch/arm/kernel/pj4-cp0.c b/arch/arm/kernel/pj4-cp0.c
index 679cf4d18c08..fc7208636284 100644
--- a/arch/arm/kernel/pj4-cp0.c
+++ b/arch/arm/kernel/pj4-cp0.c
@@ -17,6 +17,7 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <asm/thread_notify.h>
+#include <asm/cputype.h>
 
 static int iwmmxt_do(struct notifier_block *self, unsigned long cmd, void *t)
 {
@@ -80,6 +81,9 @@ static int __init pj4_cp0_init(void)
 {
 	u32 cp_access;
 
+	if (!cpu_is_pj4())
+		return 0;
+
 	cp_access = pj4_cp_access_read() & ~0xf;
 	pj4_cp_access_write(cp_access);
 
diff --git a/arch/arm/kernel/probes-arm.c b/arch/arm/kernel/probes-arm.c
new file mode 100644
index 000000000000..51a13a027989
--- /dev/null
+++ b/arch/arm/kernel/probes-arm.c
@@ -0,0 +1,734 @@
+/*
+ * arch/arm/kernel/probes-arm.c
+ *
+ * Some code moved here from arch/arm/kernel/kprobes-arm.c
+ *
+ * Copyright (C) 2006, 2007 Motorola Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/stddef.h>
+#include <linux/ptrace.h>
+
+#include "probes.h"
+#include "probes-arm.h"
+
+#define sign_extend(x, signbit) ((x) | (0 - ((x) & (1 << (signbit)))))
+
+#define branch_displacement(insn) sign_extend(((insn) & 0xffffff) << 2, 25)
+
+/*
+ * To avoid the complications of mimicing single-stepping on a
+ * processor without a Next-PC or a single-step mode, and to
+ * avoid having to deal with the side-effects of boosting, we
+ * simulate or emulate (almost) all ARM instructions.
+ *
+ * "Simulation" is where the instruction's behavior is duplicated in
+ * C code.  "Emulation" is where the original instruction is rewritten
+ * and executed, often by altering its registers.
+ *
+ * By having all behavior of the kprobe'd instruction completed before
+ * returning from the kprobe_handler(), all locks (scheduler and
+ * interrupt) can safely be released.  There is no need for secondary
+ * breakpoints, no race with MP or preemptable kernels, nor having to
+ * clean up resources counts at a later time impacting overall system
+ * performance.  By rewriting the instruction, only the minimum registers
+ * need to be loaded and saved back optimizing performance.
+ *
+ * Calling the insnslot_*_rwflags version of a function doesn't hurt
+ * anything even when the CPSR flags aren't updated by the
+ * instruction.  It's just a little slower in return for saving
+ * a little space by not having a duplicate function that doesn't
+ * update the flags.  (The same optimization can be said for
+ * instructions that do or don't perform register writeback)
+ * Also, instructions can either read the flags, only write the
+ * flags, or read and write the flags.  To save combinations
+ * rather than for sheer performance, flag functions just assume
+ * read and write of flags.
+ */
+
+void __kprobes simulate_bbl(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
+{
+	long iaddr = (long) regs->ARM_pc - 4;
+	int disp  = branch_displacement(insn);
+
+	if (insn & (1 << 24))
+		regs->ARM_lr = iaddr + 4;
+
+	regs->ARM_pc = iaddr + 8 + disp;
+}
+
+void __kprobes simulate_blx1(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
+{
+	long iaddr = (long) regs->ARM_pc - 4;
+	int disp = branch_displacement(insn);
+
+	regs->ARM_lr = iaddr + 4;
+	regs->ARM_pc = iaddr + 8 + disp + ((insn >> 23) & 0x2);
+	regs->ARM_cpsr |= PSR_T_BIT;
+}
+
+void __kprobes simulate_blx2bx(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
+{
+	int rm = insn & 0xf;
+	long rmv = regs->uregs[rm];
+
+	if (insn & (1 << 5))
+		regs->ARM_lr = (long) regs->ARM_pc;
+
+	regs->ARM_pc = rmv & ~0x1;
+	regs->ARM_cpsr &= ~PSR_T_BIT;
+	if (rmv & 0x1)
+		regs->ARM_cpsr |= PSR_T_BIT;
+}
+
+void __kprobes simulate_mrs(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
+{
+	int rd = (insn >> 12) & 0xf;
+	unsigned long mask = 0xf8ff03df; /* Mask out execution state */
+	regs->uregs[rd] = regs->ARM_cpsr & mask;
+}
+
+void __kprobes simulate_mov_ipsp(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
+{
+	regs->uregs[12] = regs->uregs[13];
+}
+
+/*
+ * For the instruction masking and comparisons in all the "space_*"
+ * functions below, Do _not_ rearrange the order of tests unless
+ * you're very, very sure of what you are doing.  For the sake of
+ * efficiency, the masks for some tests sometimes assume other test
+ * have been done prior to them so the number of patterns to test
+ * for an instruction set can be as broad as possible to reduce the
+ * number of tests needed.
+ */
+
+static const union decode_item arm_1111_table[] = {
+	/* Unconditional instructions					*/
+
+	/* memory hint		1111 0100 x001 xxxx xxxx xxxx xxxx xxxx */
+	/* PLDI (immediate)	1111 0100 x101 xxxx xxxx xxxx xxxx xxxx */
+	/* PLDW (immediate)	1111 0101 x001 xxxx xxxx xxxx xxxx xxxx */
+	/* PLD (immediate)	1111 0101 x101 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_SIMULATE	(0xfe300000, 0xf4100000, PROBES_PRELOAD_IMM),
+
+	/* memory hint		1111 0110 x001 xxxx xxxx xxxx xxx0 xxxx */
+	/* PLDI (register)	1111 0110 x101 xxxx xxxx xxxx xxx0 xxxx */
+	/* PLDW (register)	1111 0111 x001 xxxx xxxx xxxx xxx0 xxxx */
+	/* PLD (register)	1111 0111 x101 xxxx xxxx xxxx xxx0 xxxx */
+	DECODE_SIMULATE	(0xfe300010, 0xf6100000, PROBES_PRELOAD_REG),
+
+	/* BLX (immediate)	1111 101x xxxx xxxx xxxx xxxx xxxx xxxx */
+	DECODE_SIMULATE	(0xfe000000, 0xfa000000, PROBES_BRANCH_IMM),
+
+	/* CPS			1111 0001 0000 xxx0 xxxx xxxx xx0x xxxx */
+	/* SETEND		1111 0001 0000 0001 xxxx xxxx 0000 xxxx */
+	/* SRS			1111 100x x1x0 xxxx xxxx xxxx xxxx xxxx */
+	/* RFE			1111 100x x0x1 xxxx xxxx xxxx xxxx xxxx */
+
+	/* Coprocessor instructions... */
+	/* MCRR2		1111 1100 0100 xxxx xxxx xxxx xxxx xxxx */
+	/* MRRC2		1111 1100 0101 xxxx xxxx xxxx xxxx xxxx */
+	/* LDC2			1111 110x xxx1 xxxx xxxx xxxx xxxx xxxx */
+	/* STC2			1111 110x xxx0 xxxx xxxx xxxx xxxx xxxx */
+	/* CDP2			1111 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */
+	/* MCR2			1111 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */
+	/* MRC2			1111 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */
+
+	/* Other unallocated instructions...				*/
+	DECODE_END
+};
+
+static const union decode_item arm_cccc_0001_0xx0____0xxx_table[] = {
+	/* Miscellaneous instructions					*/
+
+	/* MRS cpsr		cccc 0001 0000 xxxx xxxx xxxx 0000 xxxx */
+	DECODE_SIMULATEX(0x0ff000f0, 0x01000000, PROBES_MRS,
+						 REGS(0, NOPC, 0, 0, 0)),
+
+	/* BX			cccc 0001 0010 xxxx xxxx xxxx 0001 xxxx */
+	DECODE_SIMULATE	(0x0ff000f0, 0x01200010, PROBES_BRANCH_REG),
+
+	/* BLX (register)	cccc 0001 0010 xxxx xxxx xxxx 0011 xxxx */
+	DECODE_SIMULATEX(0x0ff000f0, 0x01200030, PROBES_BRANCH_REG,
+						 REGS(0, 0, 0, 0, NOPC)),
+
+	/* CLZ			cccc 0001 0110 xxxx xxxx xxxx 0001 xxxx */
+	DECODE_EMULATEX	(0x0ff000f0, 0x01600010, PROBES_CLZ,
+						 REGS(0, NOPC, 0, 0, NOPC)),
+
+	/* QADD			cccc 0001 0000 xxxx xxxx xxxx 0101 xxxx */
+	/* QSUB			cccc 0001 0010 xxxx xxxx xxxx 0101 xxxx */
+	/* QDADD		cccc 0001 0100 xxxx xxxx xxxx 0101 xxxx */
+	/* QDSUB		cccc 0001 0110 xxxx xxxx xxxx 0101 xxxx */
+	DECODE_EMULATEX	(0x0f9000f0, 0x01000050, PROBES_SATURATING_ARITHMETIC,
+						 REGS(NOPC, NOPC, 0, 0, NOPC)),
+
+	/* BXJ			cccc 0001 0010 xxxx xxxx xxxx 0010 xxxx */
+	/* MSR			cccc 0001 0x10 xxxx xxxx xxxx 0000 xxxx */
+	/* MRS spsr		cccc 0001 0100 xxxx xxxx xxxx 0000 xxxx */
+	/* BKPT			1110 0001 0010 xxxx xxxx xxxx 0111 xxxx */
+	/* SMC			cccc 0001 0110 xxxx xxxx xxxx 0111 xxxx */
+	/* And unallocated instructions...				*/
+	DECODE_END
+};
+
+static const union decode_item arm_cccc_0001_0xx0____1xx0_table[] = {
+	/* Halfword multiply and multiply-accumulate			*/
+
+	/* SMLALxy		cccc 0001 0100 xxxx xxxx xxxx 1xx0 xxxx */
+	DECODE_EMULATEX	(0x0ff00090, 0x01400080, PROBES_MUL1,
+						 REGS(NOPC, NOPC, NOPC, 0, NOPC)),
+
+	/* SMULWy		cccc 0001 0010 xxxx xxxx xxxx 1x10 xxxx */
+	DECODE_OR	(0x0ff000b0, 0x012000a0),
+	/* SMULxy		cccc 0001 0110 xxxx xxxx xxxx 1xx0 xxxx */
+	DECODE_EMULATEX	(0x0ff00090, 0x01600080, PROBES_MUL2,
+						 REGS(NOPC, 0, NOPC, 0, NOPC)),
+
+	/* SMLAxy		cccc 0001 0000 xxxx xxxx xxxx 1xx0 xxxx */
+	DECODE_OR	(0x0ff00090, 0x01000080),
+	/* SMLAWy		cccc 0001 0010 xxxx xxxx xxxx 1x00 xxxx */
+	DECODE_EMULATEX	(0x0ff000b0, 0x01200080, PROBES_MUL2,
+						 REGS(NOPC, NOPC, NOPC, 0, NOPC)),
+
+	DECODE_END
+};
+
+static const union decode_item arm_cccc_0000_____1001_table[] = {
+	/* Multiply and multiply-accumulate				*/
+
+	/* MUL			cccc 0000 0000 xxxx xxxx xxxx 1001 xxxx */
+	/* MULS			cccc 0000 0001 xxxx xxxx xxxx 1001 xxxx */
+	DECODE_EMULATEX	(0x0fe000f0, 0x00000090, PROBES_MUL2,
+						 REGS(NOPC, 0, NOPC, 0, NOPC)),
+
+	/* MLA			cccc 0000 0010 xxxx xxxx xxxx 1001 xxxx */
+	/* MLAS			cccc 0000 0011 xxxx xxxx xxxx 1001 xxxx */
+	DECODE_OR	(0x0fe000f0, 0x00200090),
+	/* MLS			cccc 0000 0110 xxxx xxxx xxxx 1001 xxxx */
+	DECODE_EMULATEX	(0x0ff000f0, 0x00600090, PROBES_MUL2,
+						 REGS(NOPC, NOPC, NOPC, 0, NOPC)),
+
+	/* UMAAL		cccc 0000 0100 xxxx xxxx xxxx 1001 xxxx */
+	DECODE_OR	(0x0ff000f0, 0x00400090),
+	/* UMULL		cccc 0000 1000 xxxx xxxx xxxx 1001 xxxx */
+	/* UMULLS		cccc 0000 1001 xxxx xxxx xxxx 1001 xxxx */
+	/* UMLAL		cccc 0000 1010 xxxx xxxx xxxx 1001 xxxx */
+	/* UMLALS		cccc 0000 1011 xxxx xxxx xxxx 1001 xxxx */
+	/* SMULL		cccc 0000 1100 xxxx xxxx xxxx 1001 xxxx */
+	/* SMULLS		cccc 0000 1101 xxxx xxxx xxxx 1001 xxxx */
+	/* SMLAL		cccc 0000 1110 xxxx xxxx xxxx 1001 xxxx */
+	/* SMLALS		cccc 0000 1111 xxxx xxxx xxxx 1001 xxxx */
+	DECODE_EMULATEX	(0x0f8000f0, 0x00800090, PROBES_MUL1,
+						 REGS(NOPC, NOPC, NOPC, 0, NOPC)),
+
+	DECODE_END
+};
+
+static const union decode_item arm_cccc_0001_____1001_table[] = {
+	/* Synchronization primitives					*/
+
+#if __LINUX_ARM_ARCH__ < 6
+	/* Deprecated on ARMv6 and may be UNDEFINED on v7		*/
+	/* SMP/SWPB		cccc 0001 0x00 xxxx xxxx xxxx 1001 xxxx */
+	DECODE_EMULATEX	(0x0fb000f0, 0x01000090, PROBES_SWP,
+						 REGS(NOPC, NOPC, 0, 0, NOPC)),
+#endif
+	/* LDREX/STREX{,D,B,H}	cccc 0001 1xxx xxxx xxxx xxxx 1001 xxxx */
+	/* And unallocated instructions...				*/
+	DECODE_END
+};
+
+static const union decode_item arm_cccc_000x_____1xx1_table[] = {
+	/* Extra load/store instructions				*/
+
+	/* STRHT		cccc 0000 xx10 xxxx xxxx xxxx 1011 xxxx */
+	/* ???			cccc 0000 xx10 xxxx xxxx xxxx 11x1 xxxx */
+	/* LDRHT		cccc 0000 xx11 xxxx xxxx xxxx 1011 xxxx */
+	/* LDRSBT		cccc 0000 xx11 xxxx xxxx xxxx 1101 xxxx */
+	/* LDRSHT		cccc 0000 xx11 xxxx xxxx xxxx 1111 xxxx */
+	DECODE_REJECT	(0x0f200090, 0x00200090),
+
+	/* LDRD/STRD lr,pc,{...	cccc 000x x0x0 xxxx 111x xxxx 1101 xxxx */
+	DECODE_REJECT	(0x0e10e0d0, 0x0000e0d0),
+
+	/* LDRD (register)	cccc 000x x0x0 xxxx xxxx xxxx 1101 xxxx */
+	/* STRD (register)	cccc 000x x0x0 xxxx xxxx xxxx 1111 xxxx */
+	DECODE_EMULATEX	(0x0e5000d0, 0x000000d0, PROBES_LDRSTRD,
+						 REGS(NOPCWB, NOPCX, 0, 0, NOPC)),
+
+	/* LDRD (immediate)	cccc 000x x1x0 xxxx xxxx xxxx 1101 xxxx */
+	/* STRD (immediate)	cccc 000x x1x0 xxxx xxxx xxxx 1111 xxxx */
+	DECODE_EMULATEX	(0x0e5000d0, 0x004000d0, PROBES_LDRSTRD,
+						 REGS(NOPCWB, NOPCX, 0, 0, 0)),
+
+	/* STRH (register)	cccc 000x x0x0 xxxx xxxx xxxx 1011 xxxx */
+	DECODE_EMULATEX	(0x0e5000f0, 0x000000b0, PROBES_STORE_EXTRA,
+						 REGS(NOPCWB, NOPC, 0, 0, NOPC)),
+
+	/* LDRH (register)	cccc 000x x0x1 xxxx xxxx xxxx 1011 xxxx */
+	/* LDRSB (register)	cccc 000x x0x1 xxxx xxxx xxxx 1101 xxxx */
+	/* LDRSH (register)	cccc 000x x0x1 xxxx xxxx xxxx 1111 xxxx */
+	DECODE_EMULATEX	(0x0e500090, 0x00100090, PROBES_LOAD_EXTRA,
+						 REGS(NOPCWB, NOPC, 0, 0, NOPC)),
+
+	/* STRH (immediate)	cccc 000x x1x0 xxxx xxxx xxxx 1011 xxxx */
+	DECODE_EMULATEX	(0x0e5000f0, 0x004000b0, PROBES_STORE_EXTRA,
+						 REGS(NOPCWB, NOPC, 0, 0, 0)),
+
+	/* LDRH (immediate)	cccc 000x x1x1 xxxx xxxx xxxx 1011 xxxx */
+	/* LDRSB (immediate)	cccc 000x x1x1 xxxx xxxx xxxx 1101 xxxx */
+	/* LDRSH (immediate)	cccc 000x x1x1 xxxx xxxx xxxx 1111 xxxx */
+	DECODE_EMULATEX	(0x0e500090, 0x00500090, PROBES_LOAD_EXTRA,
+						 REGS(NOPCWB, NOPC, 0, 0, 0)),
+
+	DECODE_END
+};
+
+static const union decode_item arm_cccc_000x_table[] = {
+	/* Data-processing (register)					*/
+
+	/* <op>S PC, ...	cccc 000x xxx1 xxxx 1111 xxxx xxxx xxxx */
+	DECODE_REJECT	(0x0e10f000, 0x0010f000),
+
+	/* MOV IP, SP		1110 0001 1010 0000 1100 0000 0000 1101 */
+	DECODE_SIMULATE	(0xffffffff, 0xe1a0c00d, PROBES_MOV_IP_SP),
+
+	/* TST (register)	cccc 0001 0001 xxxx xxxx xxxx xxx0 xxxx */
+	/* TEQ (register)	cccc 0001 0011 xxxx xxxx xxxx xxx0 xxxx */
+	/* CMP (register)	cccc 0001 0101 xxxx xxxx xxxx xxx0 xxxx */
+	/* CMN (register)	cccc 0001 0111 xxxx xxxx xxxx xxx0 xxxx */
+	DECODE_EMULATEX	(0x0f900010, 0x01100000, PROBES_DATA_PROCESSING_REG,
+						 REGS(ANY, 0, 0, 0, ANY)),
+
+	/* MOV (register)	cccc 0001 101x xxxx xxxx xxxx xxx0 xxxx */
+	/* MVN (register)	cccc 0001 111x xxxx xxxx xxxx xxx0 xxxx */
+	DECODE_EMULATEX	(0x0fa00010, 0x01a00000, PROBES_DATA_PROCESSING_REG,
+						 REGS(0, ANY, 0, 0, ANY)),
+
+	/* AND (register)	cccc 0000 000x xxxx xxxx xxxx xxx0 xxxx */
+	/* EOR (register)	cccc 0000 001x xxxx xxxx xxxx xxx0 xxxx */
+	/* SUB (register)	cccc 0000 010x xxxx xxxx xxxx xxx0 xxxx */
+	/* RSB (register)	cccc 0000 011x xxxx xxxx xxxx xxx0 xxxx */
+	/* ADD (register)	cccc 0000 100x xxxx xxxx xxxx xxx0 xxxx */
+	/* ADC (register)	cccc 0000 101x xxxx xxxx xxxx xxx0 xxxx */
+	/* SBC (register)	cccc 0000 110x xxxx xxxx xxxx xxx0 xxxx */
+	/* RSC (register)	cccc 0000 111x xxxx xxxx xxxx xxx0 xxxx */
+	/* ORR (register)	cccc 0001 100x xxxx xxxx xxxx xxx0 xxxx */
+	/* BIC (register)	cccc 0001 110x xxxx xxxx xxxx xxx0 xxxx */
+	DECODE_EMULATEX	(0x0e000010, 0x00000000, PROBES_DATA_PROCESSING_REG,
+						 REGS(ANY, ANY, 0, 0, ANY)),
+
+	/* TST (reg-shift reg)	cccc 0001 0001 xxxx xxxx xxxx 0xx1 xxxx */
+	/* TEQ (reg-shift reg)	cccc 0001 0011 xxxx xxxx xxxx 0xx1 xxxx */
+	/* CMP (reg-shift reg)	cccc 0001 0101 xxxx xxxx xxxx 0xx1 xxxx */
+	/* CMN (reg-shift reg)	cccc 0001 0111 xxxx xxxx xxxx 0xx1 xxxx */
+	DECODE_EMULATEX	(0x0f900090, 0x01100010, PROBES_DATA_PROCESSING_REG,
+						 REGS(ANY, 0, NOPC, 0, ANY)),
+
+	/* MOV (reg-shift reg)	cccc 0001 101x xxxx xxxx xxxx 0xx1 xxxx */
+	/* MVN (reg-shift reg)	cccc 0001 111x xxxx xxxx xxxx 0xx1 xxxx */
+	DECODE_EMULATEX	(0x0fa00090, 0x01a00010, PROBES_DATA_PROCESSING_REG,
+						 REGS(0, ANY, NOPC, 0, ANY)),
+
+	/* AND (reg-shift reg)	cccc 0000 000x xxxx xxxx xxxx 0xx1 xxxx */
+	/* EOR (reg-shift reg)	cccc 0000 001x xxxx xxxx xxxx 0xx1 xxxx */
+	/* SUB (reg-shift reg)	cccc 0000 010x xxxx xxxx xxxx 0xx1 xxxx */
+	/* RSB (reg-shift reg)	cccc 0000 011x xxxx xxxx xxxx 0xx1 xxxx */
+	/* ADD (reg-shift reg)	cccc 0000 100x xxxx xxxx xxxx 0xx1 xxxx */
+	/* ADC (reg-shift reg)	cccc 0000 101x xxxx xxxx xxxx 0xx1 xxxx */
+	/* SBC (reg-shift reg)	cccc 0000 110x xxxx xxxx xxxx 0xx1 xxxx */
+	/* RSC (reg-shift reg)	cccc 0000 111x xxxx xxxx xxxx 0xx1 xxxx */
+	/* ORR (reg-shift reg)	cccc 0001 100x xxxx xxxx xxxx 0xx1 xxxx */
+	/* BIC (reg-shift reg)	cccc 0001 110x xxxx xxxx xxxx 0xx1 xxxx */
+	DECODE_EMULATEX	(0x0e000090, 0x00000010, PROBES_DATA_PROCESSING_REG,
+						 REGS(ANY, ANY, NOPC, 0, ANY)),
+
+	DECODE_END
+};
+
+static const union decode_item arm_cccc_001x_table[] = {
+	/* Data-processing (immediate)					*/
+
+	/* MOVW			cccc 0011 0000 xxxx xxxx xxxx xxxx xxxx */
+	/* MOVT			cccc 0011 0100 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0x0fb00000, 0x03000000, PROBES_DATA_PROCESSING_IMM,
+						 REGS(0, NOPC, 0, 0, 0)),
+
+	/* YIELD		cccc 0011 0010 0000 xxxx xxxx 0000 0001 */
+	DECODE_OR	(0x0fff00ff, 0x03200001),
+	/* SEV			cccc 0011 0010 0000 xxxx xxxx 0000 0100 */
+	DECODE_EMULATE	(0x0fff00ff, 0x03200004, PROBES_EMULATE_NONE),
+	/* NOP			cccc 0011 0010 0000 xxxx xxxx 0000 0000 */
+	/* WFE			cccc 0011 0010 0000 xxxx xxxx 0000 0010 */
+	/* WFI			cccc 0011 0010 0000 xxxx xxxx 0000 0011 */
+	DECODE_SIMULATE	(0x0fff00fc, 0x03200000, PROBES_SIMULATE_NOP),
+	/* DBG			cccc 0011 0010 0000 xxxx xxxx ffff xxxx */
+	/* unallocated hints	cccc 0011 0010 0000 xxxx xxxx xxxx xxxx */
+	/* MSR (immediate)	cccc 0011 0x10 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0x0fb00000, 0x03200000),
+
+	/* <op>S PC, ...	cccc 001x xxx1 xxxx 1111 xxxx xxxx xxxx */
+	DECODE_REJECT	(0x0e10f000, 0x0210f000),
+
+	/* TST (immediate)	cccc 0011 0001 xxxx xxxx xxxx xxxx xxxx */
+	/* TEQ (immediate)	cccc 0011 0011 xxxx xxxx xxxx xxxx xxxx */
+	/* CMP (immediate)	cccc 0011 0101 xxxx xxxx xxxx xxxx xxxx */
+	/* CMN (immediate)	cccc 0011 0111 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0x0f900000, 0x03100000, PROBES_DATA_PROCESSING_IMM,
+						 REGS(ANY, 0, 0, 0, 0)),
+
+	/* MOV (immediate)	cccc 0011 101x xxxx xxxx xxxx xxxx xxxx */
+	/* MVN (immediate)	cccc 0011 111x xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0x0fa00000, 0x03a00000, PROBES_DATA_PROCESSING_IMM,
+						 REGS(0, ANY, 0, 0, 0)),
+
+	/* AND (immediate)	cccc 0010 000x xxxx xxxx xxxx xxxx xxxx */
+	/* EOR (immediate)	cccc 0010 001x xxxx xxxx xxxx xxxx xxxx */
+	/* SUB (immediate)	cccc 0010 010x xxxx xxxx xxxx xxxx xxxx */
+	/* RSB (immediate)	cccc 0010 011x xxxx xxxx xxxx xxxx xxxx */
+	/* ADD (immediate)	cccc 0010 100x xxxx xxxx xxxx xxxx xxxx */
+	/* ADC (immediate)	cccc 0010 101x xxxx xxxx xxxx xxxx xxxx */
+	/* SBC (immediate)	cccc 0010 110x xxxx xxxx xxxx xxxx xxxx */
+	/* RSC (immediate)	cccc 0010 111x xxxx xxxx xxxx xxxx xxxx */
+	/* ORR (immediate)	cccc 0011 100x xxxx xxxx xxxx xxxx xxxx */
+	/* BIC (immediate)	cccc 0011 110x xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0x0e000000, 0x02000000, PROBES_DATA_PROCESSING_IMM,
+						 REGS(ANY, ANY, 0, 0, 0)),
+
+	DECODE_END
+};
+
+static const union decode_item arm_cccc_0110_____xxx1_table[] = {
+	/* Media instructions						*/
+
+	/* SEL			cccc 0110 1000 xxxx xxxx xxxx 1011 xxxx */
+	DECODE_EMULATEX	(0x0ff000f0, 0x068000b0, PROBES_SATURATE,
+						 REGS(NOPC, NOPC, 0, 0, NOPC)),
+
+	/* SSAT			cccc 0110 101x xxxx xxxx xxxx xx01 xxxx */
+	/* USAT			cccc 0110 111x xxxx xxxx xxxx xx01 xxxx */
+	DECODE_OR(0x0fa00030, 0x06a00010),
+	/* SSAT16		cccc 0110 1010 xxxx xxxx xxxx 0011 xxxx */
+	/* USAT16		cccc 0110 1110 xxxx xxxx xxxx 0011 xxxx */
+	DECODE_EMULATEX	(0x0fb000f0, 0x06a00030, PROBES_SATURATE,
+						 REGS(0, NOPC, 0, 0, NOPC)),
+
+	/* REV			cccc 0110 1011 xxxx xxxx xxxx 0011 xxxx */
+	/* REV16		cccc 0110 1011 xxxx xxxx xxxx 1011 xxxx */
+	/* RBIT			cccc 0110 1111 xxxx xxxx xxxx 0011 xxxx */
+	/* REVSH		cccc 0110 1111 xxxx xxxx xxxx 1011 xxxx */
+	DECODE_EMULATEX	(0x0fb00070, 0x06b00030, PROBES_REV,
+						 REGS(0, NOPC, 0, 0, NOPC)),
+
+	/* ???			cccc 0110 0x00 xxxx xxxx xxxx xxx1 xxxx */
+	DECODE_REJECT	(0x0fb00010, 0x06000010),
+	/* ???			cccc 0110 0xxx xxxx xxxx xxxx 1011 xxxx */
+	DECODE_REJECT	(0x0f8000f0, 0x060000b0),
+	/* ???			cccc 0110 0xxx xxxx xxxx xxxx 1101 xxxx */
+	DECODE_REJECT	(0x0f8000f0, 0x060000d0),
+	/* SADD16		cccc 0110 0001 xxxx xxxx xxxx 0001 xxxx */
+	/* SADDSUBX		cccc 0110 0001 xxxx xxxx xxxx 0011 xxxx */
+	/* SSUBADDX		cccc 0110 0001 xxxx xxxx xxxx 0101 xxxx */
+	/* SSUB16		cccc 0110 0001 xxxx xxxx xxxx 0111 xxxx */
+	/* SADD8		cccc 0110 0001 xxxx xxxx xxxx 1001 xxxx */
+	/* SSUB8		cccc 0110 0001 xxxx xxxx xxxx 1111 xxxx */
+	/* QADD16		cccc 0110 0010 xxxx xxxx xxxx 0001 xxxx */
+	/* QADDSUBX		cccc 0110 0010 xxxx xxxx xxxx 0011 xxxx */
+	/* QSUBADDX		cccc 0110 0010 xxxx xxxx xxxx 0101 xxxx */
+	/* QSUB16		cccc 0110 0010 xxxx xxxx xxxx 0111 xxxx */
+	/* QADD8		cccc 0110 0010 xxxx xxxx xxxx 1001 xxxx */
+	/* QSUB8		cccc 0110 0010 xxxx xxxx xxxx 1111 xxxx */
+	/* SHADD16		cccc 0110 0011 xxxx xxxx xxxx 0001 xxxx */
+	/* SHADDSUBX		cccc 0110 0011 xxxx xxxx xxxx 0011 xxxx */
+	/* SHSUBADDX		cccc 0110 0011 xxxx xxxx xxxx 0101 xxxx */
+	/* SHSUB16		cccc 0110 0011 xxxx xxxx xxxx 0111 xxxx */
+	/* SHADD8		cccc 0110 0011 xxxx xxxx xxxx 1001 xxxx */
+	/* SHSUB8		cccc 0110 0011 xxxx xxxx xxxx 1111 xxxx */
+	/* UADD16		cccc 0110 0101 xxxx xxxx xxxx 0001 xxxx */
+	/* UADDSUBX		cccc 0110 0101 xxxx xxxx xxxx 0011 xxxx */
+	/* USUBADDX		cccc 0110 0101 xxxx xxxx xxxx 0101 xxxx */
+	/* USUB16		cccc 0110 0101 xxxx xxxx xxxx 0111 xxxx */
+	/* UADD8		cccc 0110 0101 xxxx xxxx xxxx 1001 xxxx */
+	/* USUB8		cccc 0110 0101 xxxx xxxx xxxx 1111 xxxx */
+	/* UQADD16		cccc 0110 0110 xxxx xxxx xxxx 0001 xxxx */
+	/* UQADDSUBX		cccc 0110 0110 xxxx xxxx xxxx 0011 xxxx */
+	/* UQSUBADDX		cccc 0110 0110 xxxx xxxx xxxx 0101 xxxx */
+	/* UQSUB16		cccc 0110 0110 xxxx xxxx xxxx 0111 xxxx */
+	/* UQADD8		cccc 0110 0110 xxxx xxxx xxxx 1001 xxxx */
+	/* UQSUB8		cccc 0110 0110 xxxx xxxx xxxx 1111 xxxx */
+	/* UHADD16		cccc 0110 0111 xxxx xxxx xxxx 0001 xxxx */
+	/* UHADDSUBX		cccc 0110 0111 xxxx xxxx xxxx 0011 xxxx */
+	/* UHSUBADDX		cccc 0110 0111 xxxx xxxx xxxx 0101 xxxx */
+	/* UHSUB16		cccc 0110 0111 xxxx xxxx xxxx 0111 xxxx */
+	/* UHADD8		cccc 0110 0111 xxxx xxxx xxxx 1001 xxxx */
+	/* UHSUB8		cccc 0110 0111 xxxx xxxx xxxx 1111 xxxx */
+	DECODE_EMULATEX	(0x0f800010, 0x06000010, PROBES_MMI,
+						 REGS(NOPC, NOPC, 0, 0, NOPC)),
+
+	/* PKHBT		cccc 0110 1000 xxxx xxxx xxxx x001 xxxx */
+	/* PKHTB		cccc 0110 1000 xxxx xxxx xxxx x101 xxxx */
+	DECODE_EMULATEX	(0x0ff00030, 0x06800010, PROBES_PACK,
+						 REGS(NOPC, NOPC, 0, 0, NOPC)),
+
+	/* ???			cccc 0110 1001 xxxx xxxx xxxx 0111 xxxx */
+	/* ???			cccc 0110 1101 xxxx xxxx xxxx 0111 xxxx */
+	DECODE_REJECT	(0x0fb000f0, 0x06900070),
+
+	/* SXTB16		cccc 0110 1000 1111 xxxx xxxx 0111 xxxx */
+	/* SXTB			cccc 0110 1010 1111 xxxx xxxx 0111 xxxx */
+	/* SXTH			cccc 0110 1011 1111 xxxx xxxx 0111 xxxx */
+	/* UXTB16		cccc 0110 1100 1111 xxxx xxxx 0111 xxxx */
+	/* UXTB			cccc 0110 1110 1111 xxxx xxxx 0111 xxxx */
+	/* UXTH			cccc 0110 1111 1111 xxxx xxxx 0111 xxxx */
+	DECODE_EMULATEX	(0x0f8f00f0, 0x068f0070, PROBES_EXTEND,
+						 REGS(0, NOPC, 0, 0, NOPC)),
+
+	/* SXTAB16		cccc 0110 1000 xxxx xxxx xxxx 0111 xxxx */
+	/* SXTAB		cccc 0110 1010 xxxx xxxx xxxx 0111 xxxx */
+	/* SXTAH		cccc 0110 1011 xxxx xxxx xxxx 0111 xxxx */
+	/* UXTAB16		cccc 0110 1100 xxxx xxxx xxxx 0111 xxxx */
+	/* UXTAB		cccc 0110 1110 xxxx xxxx xxxx 0111 xxxx */
+	/* UXTAH		cccc 0110 1111 xxxx xxxx xxxx 0111 xxxx */
+	DECODE_EMULATEX	(0x0f8000f0, 0x06800070, PROBES_EXTEND_ADD,
+						 REGS(NOPCX, NOPC, 0, 0, NOPC)),
+
+	DECODE_END
+};
+
+static const union decode_item arm_cccc_0111_____xxx1_table[] = {
+	/* Media instructions						*/
+
+	/* UNDEFINED		cccc 0111 1111 xxxx xxxx xxxx 1111 xxxx */
+	DECODE_REJECT	(0x0ff000f0, 0x07f000f0),
+
+	/* SMLALD		cccc 0111 0100 xxxx xxxx xxxx 00x1 xxxx */
+	/* SMLSLD		cccc 0111 0100 xxxx xxxx xxxx 01x1 xxxx */
+	DECODE_EMULATEX	(0x0ff00090, 0x07400010, PROBES_MUL_ADD_LONG,
+						 REGS(NOPC, NOPC, NOPC, 0, NOPC)),
+
+	/* SMUAD		cccc 0111 0000 xxxx 1111 xxxx 00x1 xxxx */
+	/* SMUSD		cccc 0111 0000 xxxx 1111 xxxx 01x1 xxxx */
+	DECODE_OR	(0x0ff0f090, 0x0700f010),
+	/* SMMUL		cccc 0111 0101 xxxx 1111 xxxx 00x1 xxxx */
+	DECODE_OR	(0x0ff0f0d0, 0x0750f010),
+	/* USAD8		cccc 0111 1000 xxxx 1111 xxxx 0001 xxxx */
+	DECODE_EMULATEX	(0x0ff0f0f0, 0x0780f010, PROBES_MUL_ADD,
+						 REGS(NOPC, 0, NOPC, 0, NOPC)),
+
+	/* SMLAD		cccc 0111 0000 xxxx xxxx xxxx 00x1 xxxx */
+	/* SMLSD		cccc 0111 0000 xxxx xxxx xxxx 01x1 xxxx */
+	DECODE_OR	(0x0ff00090, 0x07000010),
+	/* SMMLA		cccc 0111 0101 xxxx xxxx xxxx 00x1 xxxx */
+	DECODE_OR	(0x0ff000d0, 0x07500010),
+	/* USADA8		cccc 0111 1000 xxxx xxxx xxxx 0001 xxxx */
+	DECODE_EMULATEX	(0x0ff000f0, 0x07800010, PROBES_MUL_ADD,
+						 REGS(NOPC, NOPCX, NOPC, 0, NOPC)),
+
+	/* SMMLS		cccc 0111 0101 xxxx xxxx xxxx 11x1 xxxx */
+	DECODE_EMULATEX	(0x0ff000d0, 0x075000d0, PROBES_MUL_ADD,
+						 REGS(NOPC, NOPC, NOPC, 0, NOPC)),
+
+	/* SBFX			cccc 0111 101x xxxx xxxx xxxx x101 xxxx */
+	/* UBFX			cccc 0111 111x xxxx xxxx xxxx x101 xxxx */
+	DECODE_EMULATEX	(0x0fa00070, 0x07a00050, PROBES_BITFIELD,
+						 REGS(0, NOPC, 0, 0, NOPC)),
+
+	/* BFC			cccc 0111 110x xxxx xxxx xxxx x001 1111 */
+	DECODE_EMULATEX	(0x0fe0007f, 0x07c0001f, PROBES_BITFIELD,
+						 REGS(0, NOPC, 0, 0, 0)),
+
+	/* BFI			cccc 0111 110x xxxx xxxx xxxx x001 xxxx */
+	DECODE_EMULATEX	(0x0fe00070, 0x07c00010, PROBES_BITFIELD,
+						 REGS(0, NOPC, 0, 0, NOPCX)),
+
+	DECODE_END
+};
+
+static const union decode_item arm_cccc_01xx_table[] = {
+	/* Load/store word and unsigned byte				*/
+
+	/* LDRB/STRB pc,[...]	cccc 01xx x0xx xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0x0c40f000, 0x0440f000),
+
+	/* STRT			cccc 01x0 x010 xxxx xxxx xxxx xxxx xxxx */
+	/* LDRT			cccc 01x0 x011 xxxx xxxx xxxx xxxx xxxx */
+	/* STRBT		cccc 01x0 x110 xxxx xxxx xxxx xxxx xxxx */
+	/* LDRBT		cccc 01x0 x111 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0x0d200000, 0x04200000),
+
+	/* STR (immediate)	cccc 010x x0x0 xxxx xxxx xxxx xxxx xxxx */
+	/* STRB (immediate)	cccc 010x x1x0 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0x0e100000, 0x04000000, PROBES_STORE,
+						 REGS(NOPCWB, ANY, 0, 0, 0)),
+
+	/* LDR (immediate)	cccc 010x x0x1 xxxx xxxx xxxx xxxx xxxx */
+	/* LDRB (immediate)	cccc 010x x1x1 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0x0e100000, 0x04100000, PROBES_LOAD,
+						 REGS(NOPCWB, ANY, 0, 0, 0)),
+
+	/* STR (register)	cccc 011x x0x0 xxxx xxxx xxxx xxxx xxxx */
+	/* STRB (register)	cccc 011x x1x0 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0x0e100000, 0x06000000, PROBES_STORE,
+						 REGS(NOPCWB, ANY, 0, 0, NOPC)),
+
+	/* LDR (register)	cccc 011x x0x1 xxxx xxxx xxxx xxxx xxxx */
+	/* LDRB (register)	cccc 011x x1x1 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0x0e100000, 0x06100000, PROBES_LOAD,
+						 REGS(NOPCWB, ANY, 0, 0, NOPC)),
+
+	DECODE_END
+};
+
+static const union decode_item arm_cccc_100x_table[] = {
+	/* Block data transfer instructions				*/
+
+	/* LDM			cccc 100x x0x1 xxxx xxxx xxxx xxxx xxxx */
+	/* STM			cccc 100x x0x0 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_CUSTOM	(0x0e400000, 0x08000000, PROBES_LDMSTM),
+
+	/* STM (user registers)	cccc 100x x1x0 xxxx xxxx xxxx xxxx xxxx */
+	/* LDM (user registers)	cccc 100x x1x1 xxxx 0xxx xxxx xxxx xxxx */
+	/* LDM (exception ret)	cccc 100x x1x1 xxxx 1xxx xxxx xxxx xxxx */
+	DECODE_END
+};
+
+const union decode_item probes_decode_arm_table[] = {
+	/*
+	 * Unconditional instructions
+	 *			1111 xxxx xxxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xf0000000, 0xf0000000, arm_1111_table),
+
+	/*
+	 * Miscellaneous instructions
+	 *			cccc 0001 0xx0 xxxx xxxx xxxx 0xxx xxxx
+	 */
+	DECODE_TABLE	(0x0f900080, 0x01000000, arm_cccc_0001_0xx0____0xxx_table),
+
+	/*
+	 * Halfword multiply and multiply-accumulate
+	 *			cccc 0001 0xx0 xxxx xxxx xxxx 1xx0 xxxx
+	 */
+	DECODE_TABLE	(0x0f900090, 0x01000080, arm_cccc_0001_0xx0____1xx0_table),
+
+	/*
+	 * Multiply and multiply-accumulate
+	 *			cccc 0000 xxxx xxxx xxxx xxxx 1001 xxxx
+	 */
+	DECODE_TABLE	(0x0f0000f0, 0x00000090, arm_cccc_0000_____1001_table),
+
+	/*
+	 * Synchronization primitives
+	 *			cccc 0001 xxxx xxxx xxxx xxxx 1001 xxxx
+	 */
+	DECODE_TABLE	(0x0f0000f0, 0x01000090, arm_cccc_0001_____1001_table),
+
+	/*
+	 * Extra load/store instructions
+	 *			cccc 000x xxxx xxxx xxxx xxxx 1xx1 xxxx
+	 */
+	DECODE_TABLE	(0x0e000090, 0x00000090, arm_cccc_000x_____1xx1_table),
+
+	/*
+	 * Data-processing (register)
+	 *			cccc 000x xxxx xxxx xxxx xxxx xxx0 xxxx
+	 * Data-processing (register-shifted register)
+	 *			cccc 000x xxxx xxxx xxxx xxxx 0xx1 xxxx
+	 */
+	DECODE_TABLE	(0x0e000000, 0x00000000, arm_cccc_000x_table),
+
+	/*
+	 * Data-processing (immediate)
+	 *			cccc 001x xxxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0x0e000000, 0x02000000, arm_cccc_001x_table),
+
+	/*
+	 * Media instructions
+	 *			cccc 011x xxxx xxxx xxxx xxxx xxx1 xxxx
+	 */
+	DECODE_TABLE	(0x0f000010, 0x06000010, arm_cccc_0110_____xxx1_table),
+	DECODE_TABLE	(0x0f000010, 0x07000010, arm_cccc_0111_____xxx1_table),
+
+	/*
+	 * Load/store word and unsigned byte
+	 *			cccc 01xx xxxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0x0c000000, 0x04000000, arm_cccc_01xx_table),
+
+	/*
+	 * Block data transfer instructions
+	 *			cccc 100x xxxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0x0e000000, 0x08000000, arm_cccc_100x_table),
+
+	/* B			cccc 1010 xxxx xxxx xxxx xxxx xxxx xxxx */
+	/* BL			cccc 1011 xxxx xxxx xxxx xxxx xxxx xxxx */
+	DECODE_SIMULATE	(0x0e000000, 0x0a000000, PROBES_BRANCH),
+
+	/*
+	 * Supervisor Call, and coprocessor instructions
+	 */
+
+	/* MCRR			cccc 1100 0100 xxxx xxxx xxxx xxxx xxxx */
+	/* MRRC			cccc 1100 0101 xxxx xxxx xxxx xxxx xxxx */
+	/* LDC			cccc 110x xxx1 xxxx xxxx xxxx xxxx xxxx */
+	/* STC			cccc 110x xxx0 xxxx xxxx xxxx xxxx xxxx */
+	/* CDP			cccc 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */
+	/* MCR			cccc 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */
+	/* MRC			cccc 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */
+	/* SVC			cccc 1111 xxxx xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0x0c000000, 0x0c000000),
+
+	DECODE_END
+};
+#ifdef CONFIG_ARM_KPROBES_TEST_MODULE
+EXPORT_SYMBOL_GPL(probes_decode_arm_table);
+#endif
+
+static void __kprobes arm_singlestep(probes_opcode_t insn,
+		struct arch_probes_insn *asi, struct pt_regs *regs)
+{
+	regs->ARM_pc += 4;
+	asi->insn_handler(insn, asi, regs);
+}
+
+/* Return:
+ *   INSN_REJECTED     If instruction is one not allowed to kprobe,
+ *   INSN_GOOD         If instruction is supported and uses instruction slot,
+ *   INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot.
+ *
+ * For instructions we don't want to kprobe (INSN_REJECTED return result):
+ *   These are generally ones that modify the processor state making
+ *   them "hard" to simulate such as switches processor modes or
+ *   make accesses in alternate modes.  Any of these could be simulated
+ *   if the work was put into it, but low return considering they
+ *   should also be very rare.
+ */
+enum probes_insn __kprobes
+arm_probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi,
+		       bool emulate, const union decode_action *actions)
+{
+	asi->insn_singlestep = arm_singlestep;
+	asi->insn_check_cc = probes_condition_checks[insn>>28];
+	return probes_decode_insn(insn, asi, probes_decode_arm_table, false,
+				  emulate, actions);
+}
diff --git a/arch/arm/kernel/probes-arm.h b/arch/arm/kernel/probes-arm.h
new file mode 100644
index 000000000000..ace6572f6e26
--- /dev/null
+++ b/arch/arm/kernel/probes-arm.h
@@ -0,0 +1,73 @@
+/*
+ * arch/arm/kernel/probes-arm.h
+ *
+ * Copyright 2013 Linaro Ltd.
+ * Written by: David A. Long
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+#ifndef _ARM_KERNEL_PROBES_ARM_H
+#define  _ARM_KERNEL_PROBES_ARM_H
+
+enum probes_arm_action {
+	PROBES_EMULATE_NONE,
+	PROBES_SIMULATE_NOP,
+	PROBES_PRELOAD_IMM,
+	PROBES_PRELOAD_REG,
+	PROBES_BRANCH_IMM,
+	PROBES_BRANCH_REG,
+	PROBES_MRS,
+	PROBES_CLZ,
+	PROBES_SATURATING_ARITHMETIC,
+	PROBES_MUL1,
+	PROBES_MUL2,
+	PROBES_SWP,
+	PROBES_LDRSTRD,
+	PROBES_LOAD,
+	PROBES_STORE,
+	PROBES_LOAD_EXTRA,
+	PROBES_STORE_EXTRA,
+	PROBES_MOV_IP_SP,
+	PROBES_DATA_PROCESSING_REG,
+	PROBES_DATA_PROCESSING_IMM,
+	PROBES_MOV_HALFWORD,
+	PROBES_SEV,
+	PROBES_WFE,
+	PROBES_SATURATE,
+	PROBES_REV,
+	PROBES_MMI,
+	PROBES_PACK,
+	PROBES_EXTEND,
+	PROBES_EXTEND_ADD,
+	PROBES_MUL_ADD_LONG,
+	PROBES_MUL_ADD,
+	PROBES_BITFIELD,
+	PROBES_BRANCH,
+	PROBES_LDMSTM,
+	NUM_PROBES_ARM_ACTIONS
+};
+
+void __kprobes simulate_bbl(probes_opcode_t opcode,
+	struct arch_probes_insn *asi, struct pt_regs *regs);
+void __kprobes simulate_blx1(probes_opcode_t opcode,
+	struct arch_probes_insn *asi, struct pt_regs *regs);
+void __kprobes simulate_blx2bx(probes_opcode_t opcode,
+	struct arch_probes_insn *asi, struct pt_regs *regs);
+void __kprobes simulate_mrs(probes_opcode_t opcode,
+	struct arch_probes_insn *asi, struct pt_regs *regs);
+void __kprobes simulate_mov_ipsp(probes_opcode_t opcode,
+	struct arch_probes_insn *asi, struct pt_regs *regs);
+
+extern const union decode_item probes_decode_arm_table[];
+
+enum probes_insn arm_probes_decode_insn(probes_opcode_t,
+		struct arch_probes_insn *, bool emulate,
+		const union decode_action *actions);
+
+#endif
diff --git a/arch/arm/kernel/probes-thumb.c b/arch/arm/kernel/probes-thumb.c
new file mode 100644
index 000000000000..4131351e812f
--- /dev/null
+++ b/arch/arm/kernel/probes-thumb.c
@@ -0,0 +1,882 @@
+/*
+ * arch/arm/kernel/probes-thumb.c
+ *
+ * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include "probes.h"
+#include "probes-thumb.h"
+
+
+static const union decode_item t32_table_1110_100x_x0xx[] = {
+	/* Load/store multiple instructions */
+
+	/* Rn is PC		1110 100x x0xx 1111 xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xfe4f0000, 0xe80f0000),
+
+	/* SRS			1110 1000 00x0 xxxx xxxx xxxx xxxx xxxx */
+	/* RFE			1110 1000 00x1 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xffc00000, 0xe8000000),
+	/* SRS			1110 1001 10x0 xxxx xxxx xxxx xxxx xxxx */
+	/* RFE			1110 1001 10x1 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xffc00000, 0xe9800000),
+
+	/* STM Rn, {...pc}	1110 100x x0x0 xxxx 1xxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xfe508000, 0xe8008000),
+	/* LDM Rn, {...lr,pc}	1110 100x x0x1 xxxx 11xx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xfe50c000, 0xe810c000),
+	/* LDM/STM Rn, {...sp}	1110 100x x0xx xxxx xx1x xxxx xxxx xxxx */
+	DECODE_REJECT	(0xfe402000, 0xe8002000),
+
+	/* STMIA		1110 1000 10x0 xxxx xxxx xxxx xxxx xxxx */
+	/* LDMIA		1110 1000 10x1 xxxx xxxx xxxx xxxx xxxx */
+	/* STMDB		1110 1001 00x0 xxxx xxxx xxxx xxxx xxxx */
+	/* LDMDB		1110 1001 00x1 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_CUSTOM	(0xfe400000, 0xe8000000, PROBES_T32_LDMSTM),
+
+	DECODE_END
+};
+
+static const union decode_item t32_table_1110_100x_x1xx[] = {
+	/* Load/store dual, load/store exclusive, table branch */
+
+	/* STRD (immediate)	1110 1000 x110 xxxx xxxx xxxx xxxx xxxx */
+	/* LDRD (immediate)	1110 1000 x111 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_OR	(0xff600000, 0xe8600000),
+	/* STRD (immediate)	1110 1001 x1x0 xxxx xxxx xxxx xxxx xxxx */
+	/* LDRD (immediate)	1110 1001 x1x1 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xff400000, 0xe9400000, PROBES_T32_LDRDSTRD,
+						 REGS(NOPCWB, NOSPPC, NOSPPC, 0, 0)),
+
+	/* TBB			1110 1000 1101 xxxx xxxx xxxx 0000 xxxx */
+	/* TBH			1110 1000 1101 xxxx xxxx xxxx 0001 xxxx */
+	DECODE_SIMULATEX(0xfff000e0, 0xe8d00000, PROBES_T32_TABLE_BRANCH,
+						 REGS(NOSP, 0, 0, 0, NOSPPC)),
+
+	/* STREX		1110 1000 0100 xxxx xxxx xxxx xxxx xxxx */
+	/* LDREX		1110 1000 0101 xxxx xxxx xxxx xxxx xxxx */
+	/* STREXB		1110 1000 1100 xxxx xxxx xxxx 0100 xxxx */
+	/* STREXH		1110 1000 1100 xxxx xxxx xxxx 0101 xxxx */
+	/* STREXD		1110 1000 1100 xxxx xxxx xxxx 0111 xxxx */
+	/* LDREXB		1110 1000 1101 xxxx xxxx xxxx 0100 xxxx */
+	/* LDREXH		1110 1000 1101 xxxx xxxx xxxx 0101 xxxx */
+	/* LDREXD		1110 1000 1101 xxxx xxxx xxxx 0111 xxxx */
+	/* And unallocated instructions...				*/
+	DECODE_END
+};
+
+static const union decode_item t32_table_1110_101x[] = {
+	/* Data-processing (shifted register)				*/
+
+	/* TST			1110 1010 0001 xxxx xxxx 1111 xxxx xxxx */
+	/* TEQ			1110 1010 1001 xxxx xxxx 1111 xxxx xxxx */
+	DECODE_EMULATEX	(0xff700f00, 0xea100f00, PROBES_T32_TST,
+						 REGS(NOSPPC, 0, 0, 0, NOSPPC)),
+
+	/* CMN			1110 1011 0001 xxxx xxxx 1111 xxxx xxxx */
+	DECODE_OR	(0xfff00f00, 0xeb100f00),
+	/* CMP			1110 1011 1011 xxxx xxxx 1111 xxxx xxxx */
+	DECODE_EMULATEX	(0xfff00f00, 0xebb00f00, PROBES_T32_TST,
+						 REGS(NOPC, 0, 0, 0, NOSPPC)),
+
+	/* MOV			1110 1010 010x 1111 xxxx xxxx xxxx xxxx */
+	/* MVN			1110 1010 011x 1111 xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xffcf0000, 0xea4f0000, PROBES_T32_MOV,
+						 REGS(0, 0, NOSPPC, 0, NOSPPC)),
+
+	/* ???			1110 1010 101x xxxx xxxx xxxx xxxx xxxx */
+	/* ???			1110 1010 111x xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xffa00000, 0xeaa00000),
+	/* ???			1110 1011 001x xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xffe00000, 0xeb200000),
+	/* ???			1110 1011 100x xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xffe00000, 0xeb800000),
+	/* ???			1110 1011 111x xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xffe00000, 0xebe00000),
+
+	/* ADD/SUB SP, SP, Rm, LSL #0..3				*/
+	/*			1110 1011 x0xx 1101 x000 1101 xx00 xxxx */
+	DECODE_EMULATEX	(0xff4f7f30, 0xeb0d0d00, PROBES_T32_ADDSUB,
+						 REGS(SP, 0, SP, 0, NOSPPC)),
+
+	/* ADD/SUB SP, SP, Rm, shift					*/
+	/*			1110 1011 x0xx 1101 xxxx 1101 xxxx xxxx */
+	DECODE_REJECT	(0xff4f0f00, 0xeb0d0d00),
+
+	/* ADD/SUB Rd, SP, Rm, shift					*/
+	/*			1110 1011 x0xx 1101 xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xff4f0000, 0xeb0d0000, PROBES_T32_ADDSUB,
+						 REGS(SP, 0, NOPC, 0, NOSPPC)),
+
+	/* AND			1110 1010 000x xxxx xxxx xxxx xxxx xxxx */
+	/* BIC			1110 1010 001x xxxx xxxx xxxx xxxx xxxx */
+	/* ORR			1110 1010 010x xxxx xxxx xxxx xxxx xxxx */
+	/* ORN			1110 1010 011x xxxx xxxx xxxx xxxx xxxx */
+	/* EOR			1110 1010 100x xxxx xxxx xxxx xxxx xxxx */
+	/* PKH			1110 1010 110x xxxx xxxx xxxx xxxx xxxx */
+	/* ADD			1110 1011 000x xxxx xxxx xxxx xxxx xxxx */
+	/* ADC			1110 1011 010x xxxx xxxx xxxx xxxx xxxx */
+	/* SBC			1110 1011 011x xxxx xxxx xxxx xxxx xxxx */
+	/* SUB			1110 1011 101x xxxx xxxx xxxx xxxx xxxx */
+	/* RSB			1110 1011 110x xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfe000000, 0xea000000, PROBES_T32_LOGICAL,
+						 REGS(NOSPPC, 0, NOSPPC, 0, NOSPPC)),
+
+	DECODE_END
+};
+
+static const union decode_item t32_table_1111_0x0x___0[] = {
+	/* Data-processing (modified immediate)				*/
+
+	/* TST			1111 0x00 0001 xxxx 0xxx 1111 xxxx xxxx */
+	/* TEQ			1111 0x00 1001 xxxx 0xxx 1111 xxxx xxxx */
+	DECODE_EMULATEX	(0xfb708f00, 0xf0100f00, PROBES_T32_TST,
+						 REGS(NOSPPC, 0, 0, 0, 0)),
+
+	/* CMN			1111 0x01 0001 xxxx 0xxx 1111 xxxx xxxx */
+	DECODE_OR	(0xfbf08f00, 0xf1100f00),
+	/* CMP			1111 0x01 1011 xxxx 0xxx 1111 xxxx xxxx */
+	DECODE_EMULATEX	(0xfbf08f00, 0xf1b00f00, PROBES_T32_CMP,
+						 REGS(NOPC, 0, 0, 0, 0)),
+
+	/* MOV			1111 0x00 010x 1111 0xxx xxxx xxxx xxxx */
+	/* MVN			1111 0x00 011x 1111 0xxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfbcf8000, 0xf04f0000, PROBES_T32_MOV,
+						 REGS(0, 0, NOSPPC, 0, 0)),
+
+	/* ???			1111 0x00 101x xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xfbe08000, 0xf0a00000),
+	/* ???			1111 0x00 110x xxxx 0xxx xxxx xxxx xxxx */
+	/* ???			1111 0x00 111x xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xfbc08000, 0xf0c00000),
+	/* ???			1111 0x01 001x xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xfbe08000, 0xf1200000),
+	/* ???			1111 0x01 100x xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xfbe08000, 0xf1800000),
+	/* ???			1111 0x01 111x xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xfbe08000, 0xf1e00000),
+
+	/* ADD Rd, SP, #imm	1111 0x01 000x 1101 0xxx xxxx xxxx xxxx */
+	/* SUB Rd, SP, #imm	1111 0x01 101x 1101 0xxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfb4f8000, 0xf10d0000, PROBES_T32_ADDSUB,
+						 REGS(SP, 0, NOPC, 0, 0)),
+
+	/* AND			1111 0x00 000x xxxx 0xxx xxxx xxxx xxxx */
+	/* BIC			1111 0x00 001x xxxx 0xxx xxxx xxxx xxxx */
+	/* ORR			1111 0x00 010x xxxx 0xxx xxxx xxxx xxxx */
+	/* ORN			1111 0x00 011x xxxx 0xxx xxxx xxxx xxxx */
+	/* EOR			1111 0x00 100x xxxx 0xxx xxxx xxxx xxxx */
+	/* ADD			1111 0x01 000x xxxx 0xxx xxxx xxxx xxxx */
+	/* ADC			1111 0x01 010x xxxx 0xxx xxxx xxxx xxxx */
+	/* SBC			1111 0x01 011x xxxx 0xxx xxxx xxxx xxxx */
+	/* SUB			1111 0x01 101x xxxx 0xxx xxxx xxxx xxxx */
+	/* RSB			1111 0x01 110x xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfa008000, 0xf0000000, PROBES_T32_LOGICAL,
+						 REGS(NOSPPC, 0, NOSPPC, 0, 0)),
+
+	DECODE_END
+};
+
+static const union decode_item t32_table_1111_0x1x___0[] = {
+	/* Data-processing (plain binary immediate)			*/
+
+	/* ADDW Rd, PC, #imm	1111 0x10 0000 1111 0xxx xxxx xxxx xxxx */
+	DECODE_OR	(0xfbff8000, 0xf20f0000),
+	/* SUBW	Rd, PC, #imm	1111 0x10 1010 1111 0xxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfbff8000, 0xf2af0000, PROBES_T32_ADDWSUBW_PC,
+						 REGS(PC, 0, NOSPPC, 0, 0)),
+
+	/* ADDW SP, SP, #imm	1111 0x10 0000 1101 0xxx 1101 xxxx xxxx */
+	DECODE_OR	(0xfbff8f00, 0xf20d0d00),
+	/* SUBW	SP, SP, #imm	1111 0x10 1010 1101 0xxx 1101 xxxx xxxx */
+	DECODE_EMULATEX	(0xfbff8f00, 0xf2ad0d00, PROBES_T32_ADDWSUBW,
+						 REGS(SP, 0, SP, 0, 0)),
+
+	/* ADDW			1111 0x10 0000 xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_OR	(0xfbf08000, 0xf2000000),
+	/* SUBW			1111 0x10 1010 xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfbf08000, 0xf2a00000, PROBES_T32_ADDWSUBW,
+						 REGS(NOPCX, 0, NOSPPC, 0, 0)),
+
+	/* MOVW			1111 0x10 0100 xxxx 0xxx xxxx xxxx xxxx */
+	/* MOVT			1111 0x10 1100 xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfb708000, 0xf2400000, PROBES_T32_MOVW,
+						 REGS(0, 0, NOSPPC, 0, 0)),
+
+	/* SSAT16		1111 0x11 0010 xxxx 0000 xxxx 00xx xxxx */
+	/* SSAT			1111 0x11 00x0 xxxx 0xxx xxxx xxxx xxxx */
+	/* USAT16		1111 0x11 1010 xxxx 0000 xxxx 00xx xxxx */
+	/* USAT			1111 0x11 10x0 xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfb508000, 0xf3000000, PROBES_T32_SAT,
+						 REGS(NOSPPC, 0, NOSPPC, 0, 0)),
+
+	/* SFBX			1111 0x11 0100 xxxx 0xxx xxxx xxxx xxxx */
+	/* UFBX			1111 0x11 1100 xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfb708000, 0xf3400000, PROBES_T32_BITFIELD,
+						 REGS(NOSPPC, 0, NOSPPC, 0, 0)),
+
+	/* BFC			1111 0x11 0110 1111 0xxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfbff8000, 0xf36f0000, PROBES_T32_BITFIELD,
+						 REGS(0, 0, NOSPPC, 0, 0)),
+
+	/* BFI			1111 0x11 0110 xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfbf08000, 0xf3600000, PROBES_T32_BITFIELD,
+						 REGS(NOSPPCX, 0, NOSPPC, 0, 0)),
+
+	DECODE_END
+};
+
+static const union decode_item t32_table_1111_0xxx___1[] = {
+	/* Branches and miscellaneous control				*/
+
+	/* YIELD		1111 0011 1010 xxxx 10x0 x000 0000 0001 */
+	DECODE_OR	(0xfff0d7ff, 0xf3a08001),
+	/* SEV			1111 0011 1010 xxxx 10x0 x000 0000 0100 */
+	DECODE_EMULATE	(0xfff0d7ff, 0xf3a08004, PROBES_T32_SEV),
+	/* NOP			1111 0011 1010 xxxx 10x0 x000 0000 0000 */
+	/* WFE			1111 0011 1010 xxxx 10x0 x000 0000 0010 */
+	/* WFI			1111 0011 1010 xxxx 10x0 x000 0000 0011 */
+	DECODE_SIMULATE	(0xfff0d7fc, 0xf3a08000, PROBES_T32_WFE),
+
+	/* MRS Rd, CPSR		1111 0011 1110 xxxx 10x0 xxxx xxxx xxxx */
+	DECODE_SIMULATEX(0xfff0d000, 0xf3e08000, PROBES_T32_MRS,
+						 REGS(0, 0, NOSPPC, 0, 0)),
+
+	/*
+	 * Unsupported instructions
+	 *			1111 0x11 1xxx xxxx 10x0 xxxx xxxx xxxx
+	 *
+	 * MSR			1111 0011 100x xxxx 10x0 xxxx xxxx xxxx
+	 * DBG hint		1111 0011 1010 xxxx 10x0 x000 1111 xxxx
+	 * Unallocated hints	1111 0011 1010 xxxx 10x0 x000 xxxx xxxx
+	 * CPS			1111 0011 1010 xxxx 10x0 xxxx xxxx xxxx
+	 * CLREX/DSB/DMB/ISB	1111 0011 1011 xxxx 10x0 xxxx xxxx xxxx
+	 * BXJ			1111 0011 1100 xxxx 10x0 xxxx xxxx xxxx
+	 * SUBS PC,LR,#<imm8>	1111 0011 1101 xxxx 10x0 xxxx xxxx xxxx
+	 * MRS Rd, SPSR		1111 0011 1111 xxxx 10x0 xxxx xxxx xxxx
+	 * SMC			1111 0111 1111 xxxx 1000 xxxx xxxx xxxx
+	 * UNDEFINED		1111 0111 1111 xxxx 1010 xxxx xxxx xxxx
+	 * ???			1111 0111 1xxx xxxx 1010 xxxx xxxx xxxx
+	 */
+	DECODE_REJECT	(0xfb80d000, 0xf3808000),
+
+	/* Bcc			1111 0xxx xxxx xxxx 10x0 xxxx xxxx xxxx */
+	DECODE_CUSTOM	(0xf800d000, 0xf0008000, PROBES_T32_BRANCH_COND),
+
+	/* BLX			1111 0xxx xxxx xxxx 11x0 xxxx xxxx xxx0 */
+	DECODE_OR	(0xf800d001, 0xf000c000),
+	/* B			1111 0xxx xxxx xxxx 10x1 xxxx xxxx xxxx */
+	/* BL			1111 0xxx xxxx xxxx 11x1 xxxx xxxx xxxx */
+	DECODE_SIMULATE	(0xf8009000, 0xf0009000, PROBES_T32_BRANCH),
+
+	DECODE_END
+};
+
+static const union decode_item t32_table_1111_100x_x0x1__1111[] = {
+	/* Memory hints							*/
+
+	/* PLD (literal)	1111 1000 x001 1111 1111 xxxx xxxx xxxx */
+	/* PLI (literal)	1111 1001 x001 1111 1111 xxxx xxxx xxxx */
+	DECODE_SIMULATE	(0xfe7ff000, 0xf81ff000, PROBES_T32_PLDI),
+
+	/* PLD{W} (immediate)	1111 1000 10x1 xxxx 1111 xxxx xxxx xxxx */
+	DECODE_OR	(0xffd0f000, 0xf890f000),
+	/* PLD{W} (immediate)	1111 1000 00x1 xxxx 1111 1100 xxxx xxxx */
+	DECODE_OR	(0xffd0ff00, 0xf810fc00),
+	/* PLI (immediate)	1111 1001 1001 xxxx 1111 xxxx xxxx xxxx */
+	DECODE_OR	(0xfff0f000, 0xf990f000),
+	/* PLI (immediate)	1111 1001 0001 xxxx 1111 1100 xxxx xxxx */
+	DECODE_SIMULATEX(0xfff0ff00, 0xf910fc00, PROBES_T32_PLDI,
+						 REGS(NOPCX, 0, 0, 0, 0)),
+
+	/* PLD{W} (register)	1111 1000 00x1 xxxx 1111 0000 00xx xxxx */
+	DECODE_OR	(0xffd0ffc0, 0xf810f000),
+	/* PLI (register)	1111 1001 0001 xxxx 1111 0000 00xx xxxx */
+	DECODE_SIMULATEX(0xfff0ffc0, 0xf910f000, PROBES_T32_PLDI,
+						 REGS(NOPCX, 0, 0, 0, NOSPPC)),
+
+	/* Other unallocated instructions...				*/
+	DECODE_END
+};
+
+static const union decode_item t32_table_1111_100x[] = {
+	/* Store/Load single data item					*/
+
+	/* ???			1111 100x x11x xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xfe600000, 0xf8600000),
+
+	/* ???			1111 1001 0101 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xfff00000, 0xf9500000),
+
+	/* ???			1111 100x 0xxx xxxx xxxx 10x0 xxxx xxxx */
+	DECODE_REJECT	(0xfe800d00, 0xf8000800),
+
+	/* STRBT		1111 1000 0000 xxxx xxxx 1110 xxxx xxxx */
+	/* STRHT		1111 1000 0010 xxxx xxxx 1110 xxxx xxxx */
+	/* STRT			1111 1000 0100 xxxx xxxx 1110 xxxx xxxx */
+	/* LDRBT		1111 1000 0001 xxxx xxxx 1110 xxxx xxxx */
+	/* LDRSBT		1111 1001 0001 xxxx xxxx 1110 xxxx xxxx */
+	/* LDRHT		1111 1000 0011 xxxx xxxx 1110 xxxx xxxx */
+	/* LDRSHT		1111 1001 0011 xxxx xxxx 1110 xxxx xxxx */
+	/* LDRT			1111 1000 0101 xxxx xxxx 1110 xxxx xxxx */
+	DECODE_REJECT	(0xfe800f00, 0xf8000e00),
+
+	/* STR{,B,H} Rn,[PC...]	1111 1000 xxx0 1111 xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xff1f0000, 0xf80f0000),
+
+	/* STR{,B,H} PC,[Rn...]	1111 1000 xxx0 xxxx 1111 xxxx xxxx xxxx */
+	DECODE_REJECT	(0xff10f000, 0xf800f000),
+
+	/* LDR (literal)	1111 1000 x101 1111 xxxx xxxx xxxx xxxx */
+	DECODE_SIMULATEX(0xff7f0000, 0xf85f0000, PROBES_T32_LDR_LIT,
+						 REGS(PC, ANY, 0, 0, 0)),
+
+	/* STR (immediate)	1111 1000 0100 xxxx xxxx 1xxx xxxx xxxx */
+	/* LDR (immediate)	1111 1000 0101 xxxx xxxx 1xxx xxxx xxxx */
+	DECODE_OR	(0xffe00800, 0xf8400800),
+	/* STR (immediate)	1111 1000 1100 xxxx xxxx xxxx xxxx xxxx */
+	/* LDR (immediate)	1111 1000 1101 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xffe00000, 0xf8c00000, PROBES_T32_LDRSTR,
+						 REGS(NOPCX, ANY, 0, 0, 0)),
+
+	/* STR (register)	1111 1000 0100 xxxx xxxx 0000 00xx xxxx */
+	/* LDR (register)	1111 1000 0101 xxxx xxxx 0000 00xx xxxx */
+	DECODE_EMULATEX	(0xffe00fc0, 0xf8400000, PROBES_T32_LDRSTR,
+						 REGS(NOPCX, ANY, 0, 0, NOSPPC)),
+
+	/* LDRB (literal)	1111 1000 x001 1111 xxxx xxxx xxxx xxxx */
+	/* LDRSB (literal)	1111 1001 x001 1111 xxxx xxxx xxxx xxxx */
+	/* LDRH (literal)	1111 1000 x011 1111 xxxx xxxx xxxx xxxx */
+	/* LDRSH (literal)	1111 1001 x011 1111 xxxx xxxx xxxx xxxx */
+	DECODE_SIMULATEX(0xfe5f0000, 0xf81f0000, PROBES_T32_LDR_LIT,
+						 REGS(PC, NOSPPCX, 0, 0, 0)),
+
+	/* STRB (immediate)	1111 1000 0000 xxxx xxxx 1xxx xxxx xxxx */
+	/* STRH (immediate)	1111 1000 0010 xxxx xxxx 1xxx xxxx xxxx */
+	/* LDRB (immediate)	1111 1000 0001 xxxx xxxx 1xxx xxxx xxxx */
+	/* LDRSB (immediate)	1111 1001 0001 xxxx xxxx 1xxx xxxx xxxx */
+	/* LDRH (immediate)	1111 1000 0011 xxxx xxxx 1xxx xxxx xxxx */
+	/* LDRSH (immediate)	1111 1001 0011 xxxx xxxx 1xxx xxxx xxxx */
+	DECODE_OR	(0xfec00800, 0xf8000800),
+	/* STRB (immediate)	1111 1000 1000 xxxx xxxx xxxx xxxx xxxx */
+	/* STRH (immediate)	1111 1000 1010 xxxx xxxx xxxx xxxx xxxx */
+	/* LDRB (immediate)	1111 1000 1001 xxxx xxxx xxxx xxxx xxxx */
+	/* LDRSB (immediate)	1111 1001 1001 xxxx xxxx xxxx xxxx xxxx */
+	/* LDRH (immediate)	1111 1000 1011 xxxx xxxx xxxx xxxx xxxx */
+	/* LDRSH (immediate)	1111 1001 1011 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfec00000, 0xf8800000, PROBES_T32_LDRSTR,
+						 REGS(NOPCX, NOSPPCX, 0, 0, 0)),
+
+	/* STRB (register)	1111 1000 0000 xxxx xxxx 0000 00xx xxxx */
+	/* STRH (register)	1111 1000 0010 xxxx xxxx 0000 00xx xxxx */
+	/* LDRB (register)	1111 1000 0001 xxxx xxxx 0000 00xx xxxx */
+	/* LDRSB (register)	1111 1001 0001 xxxx xxxx 0000 00xx xxxx */
+	/* LDRH (register)	1111 1000 0011 xxxx xxxx 0000 00xx xxxx */
+	/* LDRSH (register)	1111 1001 0011 xxxx xxxx 0000 00xx xxxx */
+	DECODE_EMULATEX	(0xfe800fc0, 0xf8000000, PROBES_T32_LDRSTR,
+						 REGS(NOPCX, NOSPPCX, 0, 0, NOSPPC)),
+
+	/* Other unallocated instructions...				*/
+	DECODE_END
+};
+
+static const union decode_item t32_table_1111_1010___1111[] = {
+	/* Data-processing (register)					*/
+
+	/* ???			1111 1010 011x xxxx 1111 xxxx 1xxx xxxx */
+	DECODE_REJECT	(0xffe0f080, 0xfa60f080),
+
+	/* SXTH			1111 1010 0000 1111 1111 xxxx 1xxx xxxx */
+	/* UXTH			1111 1010 0001 1111 1111 xxxx 1xxx xxxx */
+	/* SXTB16		1111 1010 0010 1111 1111 xxxx 1xxx xxxx */
+	/* UXTB16		1111 1010 0011 1111 1111 xxxx 1xxx xxxx */
+	/* SXTB			1111 1010 0100 1111 1111 xxxx 1xxx xxxx */
+	/* UXTB			1111 1010 0101 1111 1111 xxxx 1xxx xxxx */
+	DECODE_EMULATEX	(0xff8ff080, 0xfa0ff080, PROBES_T32_SIGN_EXTEND,
+						 REGS(0, 0, NOSPPC, 0, NOSPPC)),
+
+
+	/* ???			1111 1010 1xxx xxxx 1111 xxxx 0x11 xxxx */
+	DECODE_REJECT	(0xff80f0b0, 0xfa80f030),
+	/* ???			1111 1010 1x11 xxxx 1111 xxxx 0xxx xxxx */
+	DECODE_REJECT	(0xffb0f080, 0xfab0f000),
+
+	/* SADD16		1111 1010 1001 xxxx 1111 xxxx 0000 xxxx */
+	/* SASX			1111 1010 1010 xxxx 1111 xxxx 0000 xxxx */
+	/* SSAX			1111 1010 1110 xxxx 1111 xxxx 0000 xxxx */
+	/* SSUB16		1111 1010 1101 xxxx 1111 xxxx 0000 xxxx */
+	/* SADD8		1111 1010 1000 xxxx 1111 xxxx 0000 xxxx */
+	/* SSUB8		1111 1010 1100 xxxx 1111 xxxx 0000 xxxx */
+
+	/* QADD16		1111 1010 1001 xxxx 1111 xxxx 0001 xxxx */
+	/* QASX			1111 1010 1010 xxxx 1111 xxxx 0001 xxxx */
+	/* QSAX			1111 1010 1110 xxxx 1111 xxxx 0001 xxxx */
+	/* QSUB16		1111 1010 1101 xxxx 1111 xxxx 0001 xxxx */
+	/* QADD8		1111 1010 1000 xxxx 1111 xxxx 0001 xxxx */
+	/* QSUB8		1111 1010 1100 xxxx 1111 xxxx 0001 xxxx */
+
+	/* SHADD16		1111 1010 1001 xxxx 1111 xxxx 0010 xxxx */
+	/* SHASX		1111 1010 1010 xxxx 1111 xxxx 0010 xxxx */
+	/* SHSAX		1111 1010 1110 xxxx 1111 xxxx 0010 xxxx */
+	/* SHSUB16		1111 1010 1101 xxxx 1111 xxxx 0010 xxxx */
+	/* SHADD8		1111 1010 1000 xxxx 1111 xxxx 0010 xxxx */
+	/* SHSUB8		1111 1010 1100 xxxx 1111 xxxx 0010 xxxx */
+
+	/* UADD16		1111 1010 1001 xxxx 1111 xxxx 0100 xxxx */
+	/* UASX			1111 1010 1010 xxxx 1111 xxxx 0100 xxxx */
+	/* USAX			1111 1010 1110 xxxx 1111 xxxx 0100 xxxx */
+	/* USUB16		1111 1010 1101 xxxx 1111 xxxx 0100 xxxx */
+	/* UADD8		1111 1010 1000 xxxx 1111 xxxx 0100 xxxx */
+	/* USUB8		1111 1010 1100 xxxx 1111 xxxx 0100 xxxx */
+
+	/* UQADD16		1111 1010 1001 xxxx 1111 xxxx 0101 xxxx */
+	/* UQASX		1111 1010 1010 xxxx 1111 xxxx 0101 xxxx */
+	/* UQSAX		1111 1010 1110 xxxx 1111 xxxx 0101 xxxx */
+	/* UQSUB16		1111 1010 1101 xxxx 1111 xxxx 0101 xxxx */
+	/* UQADD8		1111 1010 1000 xxxx 1111 xxxx 0101 xxxx */
+	/* UQSUB8		1111 1010 1100 xxxx 1111 xxxx 0101 xxxx */
+
+	/* UHADD16		1111 1010 1001 xxxx 1111 xxxx 0110 xxxx */
+	/* UHASX		1111 1010 1010 xxxx 1111 xxxx 0110 xxxx */
+	/* UHSAX		1111 1010 1110 xxxx 1111 xxxx 0110 xxxx */
+	/* UHSUB16		1111 1010 1101 xxxx 1111 xxxx 0110 xxxx */
+	/* UHADD8		1111 1010 1000 xxxx 1111 xxxx 0110 xxxx */
+	/* UHSUB8		1111 1010 1100 xxxx 1111 xxxx 0110 xxxx */
+	DECODE_OR	(0xff80f080, 0xfa80f000),
+
+	/* SXTAH		1111 1010 0000 xxxx 1111 xxxx 1xxx xxxx */
+	/* UXTAH		1111 1010 0001 xxxx 1111 xxxx 1xxx xxxx */
+	/* SXTAB16		1111 1010 0010 xxxx 1111 xxxx 1xxx xxxx */
+	/* UXTAB16		1111 1010 0011 xxxx 1111 xxxx 1xxx xxxx */
+	/* SXTAB		1111 1010 0100 xxxx 1111 xxxx 1xxx xxxx */
+	/* UXTAB		1111 1010 0101 xxxx 1111 xxxx 1xxx xxxx */
+	DECODE_OR	(0xff80f080, 0xfa00f080),
+
+	/* QADD			1111 1010 1000 xxxx 1111 xxxx 1000 xxxx */
+	/* QDADD		1111 1010 1000 xxxx 1111 xxxx 1001 xxxx */
+	/* QSUB			1111 1010 1000 xxxx 1111 xxxx 1010 xxxx */
+	/* QDSUB		1111 1010 1000 xxxx 1111 xxxx 1011 xxxx */
+	DECODE_OR	(0xfff0f0c0, 0xfa80f080),
+
+	/* SEL			1111 1010 1010 xxxx 1111 xxxx 1000 xxxx */
+	DECODE_OR	(0xfff0f0f0, 0xfaa0f080),
+
+	/* LSL			1111 1010 000x xxxx 1111 xxxx 0000 xxxx */
+	/* LSR			1111 1010 001x xxxx 1111 xxxx 0000 xxxx */
+	/* ASR			1111 1010 010x xxxx 1111 xxxx 0000 xxxx */
+	/* ROR			1111 1010 011x xxxx 1111 xxxx 0000 xxxx */
+	DECODE_EMULATEX	(0xff80f0f0, 0xfa00f000, PROBES_T32_MEDIA,
+						 REGS(NOSPPC, 0, NOSPPC, 0, NOSPPC)),
+
+	/* CLZ			1111 1010 1010 xxxx 1111 xxxx 1000 xxxx */
+	DECODE_OR	(0xfff0f0f0, 0xfab0f080),
+
+	/* REV			1111 1010 1001 xxxx 1111 xxxx 1000 xxxx */
+	/* REV16		1111 1010 1001 xxxx 1111 xxxx 1001 xxxx */
+	/* RBIT			1111 1010 1001 xxxx 1111 xxxx 1010 xxxx */
+	/* REVSH		1111 1010 1001 xxxx 1111 xxxx 1011 xxxx */
+	DECODE_EMULATEX	(0xfff0f0c0, 0xfa90f080, PROBES_T32_REVERSE,
+						 REGS(NOSPPC, 0, NOSPPC, 0, SAMEAS16)),
+
+	/* Other unallocated instructions...				*/
+	DECODE_END
+};
+
+static const union decode_item t32_table_1111_1011_0[] = {
+	/* Multiply, multiply accumulate, and absolute difference	*/
+
+	/* ???			1111 1011 0000 xxxx 1111 xxxx 0001 xxxx */
+	DECODE_REJECT	(0xfff0f0f0, 0xfb00f010),
+	/* ???			1111 1011 0111 xxxx 1111 xxxx 0001 xxxx */
+	DECODE_REJECT	(0xfff0f0f0, 0xfb70f010),
+
+	/* SMULxy		1111 1011 0001 xxxx 1111 xxxx 00xx xxxx */
+	DECODE_OR	(0xfff0f0c0, 0xfb10f000),
+	/* MUL			1111 1011 0000 xxxx 1111 xxxx 0000 xxxx */
+	/* SMUAD{X}		1111 1011 0010 xxxx 1111 xxxx 000x xxxx */
+	/* SMULWy		1111 1011 0011 xxxx 1111 xxxx 000x xxxx */
+	/* SMUSD{X}		1111 1011 0100 xxxx 1111 xxxx 000x xxxx */
+	/* SMMUL{R}		1111 1011 0101 xxxx 1111 xxxx 000x xxxx */
+	/* USAD8		1111 1011 0111 xxxx 1111 xxxx 0000 xxxx */
+	DECODE_EMULATEX	(0xff80f0e0, 0xfb00f000, PROBES_T32_MUL_ADD,
+						 REGS(NOSPPC, 0, NOSPPC, 0, NOSPPC)),
+
+	/* ???			1111 1011 0111 xxxx xxxx xxxx 0001 xxxx */
+	DECODE_REJECT	(0xfff000f0, 0xfb700010),
+
+	/* SMLAxy		1111 1011 0001 xxxx xxxx xxxx 00xx xxxx */
+	DECODE_OR	(0xfff000c0, 0xfb100000),
+	/* MLA			1111 1011 0000 xxxx xxxx xxxx 0000 xxxx */
+	/* MLS			1111 1011 0000 xxxx xxxx xxxx 0001 xxxx */
+	/* SMLAD{X}		1111 1011 0010 xxxx xxxx xxxx 000x xxxx */
+	/* SMLAWy		1111 1011 0011 xxxx xxxx xxxx 000x xxxx */
+	/* SMLSD{X}		1111 1011 0100 xxxx xxxx xxxx 000x xxxx */
+	/* SMMLA{R}		1111 1011 0101 xxxx xxxx xxxx 000x xxxx */
+	/* SMMLS{R}		1111 1011 0110 xxxx xxxx xxxx 000x xxxx */
+	/* USADA8		1111 1011 0111 xxxx xxxx xxxx 0000 xxxx */
+	DECODE_EMULATEX	(0xff8000c0, 0xfb000000,  PROBES_T32_MUL_ADD2,
+						 REGS(NOSPPC, NOSPPCX, NOSPPC, 0, NOSPPC)),
+
+	/* Other unallocated instructions...				*/
+	DECODE_END
+};
+
+static const union decode_item t32_table_1111_1011_1[] = {
+	/* Long multiply, long multiply accumulate, and divide		*/
+
+	/* UMAAL		1111 1011 1110 xxxx xxxx xxxx 0110 xxxx */
+	DECODE_OR	(0xfff000f0, 0xfbe00060),
+	/* SMLALxy		1111 1011 1100 xxxx xxxx xxxx 10xx xxxx */
+	DECODE_OR	(0xfff000c0, 0xfbc00080),
+	/* SMLALD{X}		1111 1011 1100 xxxx xxxx xxxx 110x xxxx */
+	/* SMLSLD{X}		1111 1011 1101 xxxx xxxx xxxx 110x xxxx */
+	DECODE_OR	(0xffe000e0, 0xfbc000c0),
+	/* SMULL		1111 1011 1000 xxxx xxxx xxxx 0000 xxxx */
+	/* UMULL		1111 1011 1010 xxxx xxxx xxxx 0000 xxxx */
+	/* SMLAL		1111 1011 1100 xxxx xxxx xxxx 0000 xxxx */
+	/* UMLAL		1111 1011 1110 xxxx xxxx xxxx 0000 xxxx */
+	DECODE_EMULATEX	(0xff9000f0, 0xfb800000, PROBES_T32_MUL_ADD_LONG,
+						 REGS(NOSPPC, NOSPPC, NOSPPC, 0, NOSPPC)),
+
+	/* SDIV			1111 1011 1001 xxxx xxxx xxxx 1111 xxxx */
+	/* UDIV			1111 1011 1011 xxxx xxxx xxxx 1111 xxxx */
+	/* Other unallocated instructions...				*/
+	DECODE_END
+};
+
+const union decode_item probes_decode_thumb32_table[] = {
+
+	/*
+	 * Load/store multiple instructions
+	 *			1110 100x x0xx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xfe400000, 0xe8000000, t32_table_1110_100x_x0xx),
+
+	/*
+	 * Load/store dual, load/store exclusive, table branch
+	 *			1110 100x x1xx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xfe400000, 0xe8400000, t32_table_1110_100x_x1xx),
+
+	/*
+	 * Data-processing (shifted register)
+	 *			1110 101x xxxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xfe000000, 0xea000000, t32_table_1110_101x),
+
+	/*
+	 * Coprocessor instructions
+	 *			1110 11xx xxxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_REJECT	(0xfc000000, 0xec000000),
+
+	/*
+	 * Data-processing (modified immediate)
+	 *			1111 0x0x xxxx xxxx 0xxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xfa008000, 0xf0000000, t32_table_1111_0x0x___0),
+
+	/*
+	 * Data-processing (plain binary immediate)
+	 *			1111 0x1x xxxx xxxx 0xxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xfa008000, 0xf2000000, t32_table_1111_0x1x___0),
+
+	/*
+	 * Branches and miscellaneous control
+	 *			1111 0xxx xxxx xxxx 1xxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xf8008000, 0xf0008000, t32_table_1111_0xxx___1),
+
+	/*
+	 * Advanced SIMD element or structure load/store instructions
+	 *			1111 1001 xxx0 xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_REJECT	(0xff100000, 0xf9000000),
+
+	/*
+	 * Memory hints
+	 *			1111 100x x0x1 xxxx 1111 xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xfe50f000, 0xf810f000, t32_table_1111_100x_x0x1__1111),
+
+	/*
+	 * Store single data item
+	 *			1111 1000 xxx0 xxxx xxxx xxxx xxxx xxxx
+	 * Load single data items
+	 *			1111 100x xxx1 xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xfe000000, 0xf8000000, t32_table_1111_100x),
+
+	/*
+	 * Data-processing (register)
+	 *			1111 1010 xxxx xxxx 1111 xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xff00f000, 0xfa00f000, t32_table_1111_1010___1111),
+
+	/*
+	 * Multiply, multiply accumulate, and absolute difference
+	 *			1111 1011 0xxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xff800000, 0xfb000000, t32_table_1111_1011_0),
+
+	/*
+	 * Long multiply, long multiply accumulate, and divide
+	 *			1111 1011 1xxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xff800000, 0xfb800000, t32_table_1111_1011_1),
+
+	/*
+	 * Coprocessor instructions
+	 *			1111 11xx xxxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_END
+};
+#ifdef CONFIG_ARM_KPROBES_TEST_MODULE
+EXPORT_SYMBOL_GPL(probes_decode_thumb32_table);
+#endif
+
+static const union decode_item t16_table_1011[] = {
+	/* Miscellaneous 16-bit instructions		    */
+
+	/* ADD (SP plus immediate)	1011 0000 0xxx xxxx */
+	/* SUB (SP minus immediate)	1011 0000 1xxx xxxx */
+	DECODE_SIMULATE	(0xff00, 0xb000, PROBES_T16_ADD_SP),
+
+	/* CBZ				1011 00x1 xxxx xxxx */
+	/* CBNZ				1011 10x1 xxxx xxxx */
+	DECODE_SIMULATE	(0xf500, 0xb100, PROBES_T16_CBZ),
+
+	/* SXTH				1011 0010 00xx xxxx */
+	/* SXTB				1011 0010 01xx xxxx */
+	/* UXTH				1011 0010 10xx xxxx */
+	/* UXTB				1011 0010 11xx xxxx */
+	/* REV				1011 1010 00xx xxxx */
+	/* REV16			1011 1010 01xx xxxx */
+	/* ???				1011 1010 10xx xxxx */
+	/* REVSH			1011 1010 11xx xxxx */
+	DECODE_REJECT	(0xffc0, 0xba80),
+	DECODE_EMULATE	(0xf500, 0xb000, PROBES_T16_SIGN_EXTEND),
+
+	/* PUSH				1011 010x xxxx xxxx */
+	DECODE_CUSTOM	(0xfe00, 0xb400, PROBES_T16_PUSH),
+	/* POP				1011 110x xxxx xxxx */
+	DECODE_CUSTOM	(0xfe00, 0xbc00, PROBES_T16_POP),
+
+	/*
+	 * If-Then, and hints
+	 *				1011 1111 xxxx xxxx
+	 */
+
+	/* YIELD			1011 1111 0001 0000 */
+	DECODE_OR	(0xffff, 0xbf10),
+	/* SEV				1011 1111 0100 0000 */
+	DECODE_EMULATE	(0xffff, 0xbf40, PROBES_T16_SEV),
+	/* NOP				1011 1111 0000 0000 */
+	/* WFE				1011 1111 0010 0000 */
+	/* WFI				1011 1111 0011 0000 */
+	DECODE_SIMULATE	(0xffcf, 0xbf00, PROBES_T16_WFE),
+	/* Unassigned hints		1011 1111 xxxx 0000 */
+	DECODE_REJECT	(0xff0f, 0xbf00),
+	/* IT				1011 1111 xxxx xxxx */
+	DECODE_CUSTOM	(0xff00, 0xbf00, PROBES_T16_IT),
+
+	/* SETEND			1011 0110 010x xxxx */
+	/* CPS				1011 0110 011x xxxx */
+	/* BKPT				1011 1110 xxxx xxxx */
+	/* And unallocated instructions...		    */
+	DECODE_END
+};
+
+const union decode_item probes_decode_thumb16_table[] = {
+
+	/*
+	 * Shift (immediate), add, subtract, move, and compare
+	 *				00xx xxxx xxxx xxxx
+	 */
+
+	/* CMP (immediate)		0010 1xxx xxxx xxxx */
+	DECODE_EMULATE	(0xf800, 0x2800, PROBES_T16_CMP),
+
+	/* ADD (register)		0001 100x xxxx xxxx */
+	/* SUB (register)		0001 101x xxxx xxxx */
+	/* LSL (immediate)		0000 0xxx xxxx xxxx */
+	/* LSR (immediate)		0000 1xxx xxxx xxxx */
+	/* ASR (immediate)		0001 0xxx xxxx xxxx */
+	/* ADD (immediate, Thumb)	0001 110x xxxx xxxx */
+	/* SUB (immediate, Thumb)	0001 111x xxxx xxxx */
+	/* MOV (immediate)		0010 0xxx xxxx xxxx */
+	/* ADD (immediate, Thumb)	0011 0xxx xxxx xxxx */
+	/* SUB (immediate, Thumb)	0011 1xxx xxxx xxxx */
+	DECODE_EMULATE	(0xc000, 0x0000, PROBES_T16_ADDSUB),
+
+	/*
+	 * 16-bit Thumb data-processing instructions
+	 *				0100 00xx xxxx xxxx
+	 */
+
+	/* TST (register)		0100 0010 00xx xxxx */
+	DECODE_EMULATE	(0xffc0, 0x4200, PROBES_T16_CMP),
+	/* CMP (register)		0100 0010 10xx xxxx */
+	/* CMN (register)		0100 0010 11xx xxxx */
+	DECODE_EMULATE	(0xff80, 0x4280, PROBES_T16_CMP),
+	/* AND (register)		0100 0000 00xx xxxx */
+	/* EOR (register)		0100 0000 01xx xxxx */
+	/* LSL (register)		0100 0000 10xx xxxx */
+	/* LSR (register)		0100 0000 11xx xxxx */
+	/* ASR (register)		0100 0001 00xx xxxx */
+	/* ADC (register)		0100 0001 01xx xxxx */
+	/* SBC (register)		0100 0001 10xx xxxx */
+	/* ROR (register)		0100 0001 11xx xxxx */
+	/* RSB (immediate)		0100 0010 01xx xxxx */
+	/* ORR (register)		0100 0011 00xx xxxx */
+	/* MUL				0100 0011 00xx xxxx */
+	/* BIC (register)		0100 0011 10xx xxxx */
+	/* MVN (register)		0100 0011 10xx xxxx */
+	DECODE_EMULATE	(0xfc00, 0x4000, PROBES_T16_LOGICAL),
+
+	/*
+	 * Special data instructions and branch and exchange
+	 *				0100 01xx xxxx xxxx
+	 */
+
+	/* BLX pc			0100 0111 1111 1xxx */
+	DECODE_REJECT	(0xfff8, 0x47f8),
+
+	/* BX (register)		0100 0111 0xxx xxxx */
+	/* BLX (register)		0100 0111 1xxx xxxx */
+	DECODE_SIMULATE (0xff00, 0x4700, PROBES_T16_BLX),
+
+	/* ADD pc, pc			0100 0100 1111 1111 */
+	DECODE_REJECT	(0xffff, 0x44ff),
+
+	/* ADD (register)		0100 0100 xxxx xxxx */
+	/* CMP (register)		0100 0101 xxxx xxxx */
+	/* MOV (register)		0100 0110 xxxx xxxx */
+	DECODE_CUSTOM	(0xfc00, 0x4400, PROBES_T16_HIREGOPS),
+
+	/*
+	 * Load from Literal Pool
+	 * LDR (literal)		0100 1xxx xxxx xxxx
+	 */
+	DECODE_SIMULATE	(0xf800, 0x4800, PROBES_T16_LDR_LIT),
+
+	/*
+	 * 16-bit Thumb Load/store instructions
+	 *				0101 xxxx xxxx xxxx
+	 *				011x xxxx xxxx xxxx
+	 *				100x xxxx xxxx xxxx
+	 */
+
+	/* STR (register)		0101 000x xxxx xxxx */
+	/* STRH (register)		0101 001x xxxx xxxx */
+	/* STRB (register)		0101 010x xxxx xxxx */
+	/* LDRSB (register)		0101 011x xxxx xxxx */
+	/* LDR (register)		0101 100x xxxx xxxx */
+	/* LDRH (register)		0101 101x xxxx xxxx */
+	/* LDRB (register)		0101 110x xxxx xxxx */
+	/* LDRSH (register)		0101 111x xxxx xxxx */
+	/* STR (immediate, Thumb)	0110 0xxx xxxx xxxx */
+	/* LDR (immediate, Thumb)	0110 1xxx xxxx xxxx */
+	/* STRB (immediate, Thumb)	0111 0xxx xxxx xxxx */
+	/* LDRB (immediate, Thumb)	0111 1xxx xxxx xxxx */
+	DECODE_EMULATE	(0xc000, 0x4000, PROBES_T16_LDRHSTRH),
+	/* STRH (immediate, Thumb)	1000 0xxx xxxx xxxx */
+	/* LDRH (immediate, Thumb)	1000 1xxx xxxx xxxx */
+	DECODE_EMULATE	(0xf000, 0x8000, PROBES_T16_LDRHSTRH),
+	/* STR (immediate, Thumb)	1001 0xxx xxxx xxxx */
+	/* LDR (immediate, Thumb)	1001 1xxx xxxx xxxx */
+	DECODE_SIMULATE	(0xf000, 0x9000, PROBES_T16_LDRSTR),
+
+	/*
+	 * Generate PC-/SP-relative address
+	 * ADR (literal)		1010 0xxx xxxx xxxx
+	 * ADD (SP plus immediate)	1010 1xxx xxxx xxxx
+	 */
+	DECODE_SIMULATE	(0xf000, 0xa000, PROBES_T16_ADR),
+
+	/*
+	 * Miscellaneous 16-bit instructions
+	 *				1011 xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xf000, 0xb000, t16_table_1011),
+
+	/* STM				1100 0xxx xxxx xxxx */
+	/* LDM				1100 1xxx xxxx xxxx */
+	DECODE_EMULATE	(0xf000, 0xc000, PROBES_T16_LDMSTM),
+
+	/*
+	 * Conditional branch, and Supervisor Call
+	 */
+
+	/* Permanently UNDEFINED	1101 1110 xxxx xxxx */
+	/* SVC				1101 1111 xxxx xxxx */
+	DECODE_REJECT	(0xfe00, 0xde00),
+
+	/* Conditional branch		1101 xxxx xxxx xxxx */
+	DECODE_CUSTOM	(0xf000, 0xd000, PROBES_T16_BRANCH_COND),
+
+	/*
+	 * Unconditional branch
+	 * B				1110 0xxx xxxx xxxx
+	 */
+	DECODE_SIMULATE	(0xf800, 0xe000, PROBES_T16_BRANCH),
+
+	DECODE_END
+};
+#ifdef CONFIG_ARM_KPROBES_TEST_MODULE
+EXPORT_SYMBOL_GPL(probes_decode_thumb16_table);
+#endif
+
+static unsigned long __kprobes thumb_check_cc(unsigned long cpsr)
+{
+	if (unlikely(in_it_block(cpsr)))
+		return probes_condition_checks[current_cond(cpsr)](cpsr);
+	return true;
+}
+
+static void __kprobes thumb16_singlestep(probes_opcode_t opcode,
+		struct arch_probes_insn *asi,
+		struct pt_regs *regs)
+{
+	regs->ARM_pc += 2;
+	asi->insn_handler(opcode, asi, regs);
+	regs->ARM_cpsr = it_advance(regs->ARM_cpsr);
+}
+
+static void __kprobes thumb32_singlestep(probes_opcode_t opcode,
+		struct arch_probes_insn *asi,
+		struct pt_regs *regs)
+{
+	regs->ARM_pc += 4;
+	asi->insn_handler(opcode, asi, regs);
+	regs->ARM_cpsr = it_advance(regs->ARM_cpsr);
+}
+
+enum probes_insn __kprobes
+thumb16_probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi,
+			   bool emulate, const union decode_action *actions)
+{
+	asi->insn_singlestep = thumb16_singlestep;
+	asi->insn_check_cc = thumb_check_cc;
+	return probes_decode_insn(insn, asi, probes_decode_thumb16_table, true,
+				  emulate, actions);
+}
+
+enum probes_insn __kprobes
+thumb32_probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi,
+			   bool emulate, const union decode_action *actions)
+{
+	asi->insn_singlestep = thumb32_singlestep;
+	asi->insn_check_cc = thumb_check_cc;
+	return probes_decode_insn(insn, asi, probes_decode_thumb32_table, true,
+				  emulate, actions);
+}
diff --git a/arch/arm/kernel/probes-thumb.h b/arch/arm/kernel/probes-thumb.h
new file mode 100644
index 000000000000..7c6f6ebe514f
--- /dev/null
+++ b/arch/arm/kernel/probes-thumb.h
@@ -0,0 +1,97 @@
+/*
+ * arch/arm/kernel/probes-thumb.h
+ *
+ * Copyright 2013 Linaro Ltd.
+ * Written by: David A. Long
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+#ifndef _ARM_KERNEL_PROBES_THUMB_H
+#define  _ARM_KERNEL_PROBES_THUMB_H
+
+/*
+ * True if current instruction is in an IT block.
+ */
+#define in_it_block(cpsr)	((cpsr & 0x06000c00) != 0x00000000)
+
+/*
+ * Return the condition code to check for the currently executing instruction.
+ * This is in ITSTATE<7:4> which is in CPSR<15:12> but is only valid if
+ * in_it_block returns true.
+ */
+#define current_cond(cpsr)	((cpsr >> 12) & 0xf)
+
+enum probes_t32_action {
+	PROBES_T32_EMULATE_NONE,
+	PROBES_T32_SIMULATE_NOP,
+	PROBES_T32_LDMSTM,
+	PROBES_T32_LDRDSTRD,
+	PROBES_T32_TABLE_BRANCH,
+	PROBES_T32_TST,
+	PROBES_T32_CMP,
+	PROBES_T32_MOV,
+	PROBES_T32_ADDSUB,
+	PROBES_T32_LOGICAL,
+	PROBES_T32_ADDWSUBW_PC,
+	PROBES_T32_ADDWSUBW,
+	PROBES_T32_MOVW,
+	PROBES_T32_SAT,
+	PROBES_T32_BITFIELD,
+	PROBES_T32_SEV,
+	PROBES_T32_WFE,
+	PROBES_T32_MRS,
+	PROBES_T32_BRANCH_COND,
+	PROBES_T32_BRANCH,
+	PROBES_T32_PLDI,
+	PROBES_T32_LDR_LIT,
+	PROBES_T32_LDRSTR,
+	PROBES_T32_SIGN_EXTEND,
+	PROBES_T32_MEDIA,
+	PROBES_T32_REVERSE,
+	PROBES_T32_MUL_ADD,
+	PROBES_T32_MUL_ADD2,
+	PROBES_T32_MUL_ADD_LONG,
+	NUM_PROBES_T32_ACTIONS
+};
+
+enum probes_t16_action {
+	PROBES_T16_ADD_SP,
+	PROBES_T16_CBZ,
+	PROBES_T16_SIGN_EXTEND,
+	PROBES_T16_PUSH,
+	PROBES_T16_POP,
+	PROBES_T16_SEV,
+	PROBES_T16_WFE,
+	PROBES_T16_IT,
+	PROBES_T16_CMP,
+	PROBES_T16_ADDSUB,
+	PROBES_T16_LOGICAL,
+	PROBES_T16_BLX,
+	PROBES_T16_HIREGOPS,
+	PROBES_T16_LDR_LIT,
+	PROBES_T16_LDRHSTRH,
+	PROBES_T16_LDRSTR,
+	PROBES_T16_ADR,
+	PROBES_T16_LDMSTM,
+	PROBES_T16_BRANCH_COND,
+	PROBES_T16_BRANCH,
+	NUM_PROBES_T16_ACTIONS
+};
+
+extern const union decode_item probes_decode_thumb32_table[];
+extern const union decode_item probes_decode_thumb16_table[];
+
+enum probes_insn __kprobes
+thumb16_probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi,
+		bool emulate, const union decode_action *actions);
+enum probes_insn __kprobes
+thumb32_probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi,
+		bool emulate, const union decode_action *actions);
+
+#endif
diff --git a/arch/arm/kernel/probes.c b/arch/arm/kernel/probes.c
new file mode 100644
index 000000000000..b41873f33e69
--- /dev/null
+++ b/arch/arm/kernel/probes.c
@@ -0,0 +1,455 @@
+/*
+ * arch/arm/kernel/probes.c
+ *
+ * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
+ *
+ * Some contents moved here from arch/arm/include/asm/kprobes-arm.c which is
+ * Copyright (C) 2006, 2007 Motorola Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <asm/system_info.h>
+#include <asm/ptrace.h>
+#include <linux/bug.h>
+
+#include "probes.h"
+
+
+#ifndef find_str_pc_offset
+
+/*
+ * For STR and STM instructions, an ARM core may choose to use either
+ * a +8 or a +12 displacement from the current instruction's address.
+ * Whichever value is chosen for a given core, it must be the same for
+ * both instructions and may not change.  This function measures it.
+ */
+
+int str_pc_offset;
+
+void __init find_str_pc_offset(void)
+{
+	int addr, scratch, ret;
+
+	__asm__ (
+		"sub	%[ret], pc, #4		\n\t"
+		"str	pc, %[addr]		\n\t"
+		"ldr	%[scr], %[addr]		\n\t"
+		"sub	%[ret], %[scr], %[ret]	\n\t"
+		: [ret] "=r" (ret), [scr] "=r" (scratch), [addr] "+m" (addr));
+
+	str_pc_offset = ret;
+}
+
+#endif /* !find_str_pc_offset */
+
+
+#ifndef test_load_write_pc_interworking
+
+bool load_write_pc_interworks;
+
+void __init test_load_write_pc_interworking(void)
+{
+	int arch = cpu_architecture();
+	BUG_ON(arch == CPU_ARCH_UNKNOWN);
+	load_write_pc_interworks = arch >= CPU_ARCH_ARMv5T;
+}
+
+#endif /* !test_load_write_pc_interworking */
+
+
+#ifndef test_alu_write_pc_interworking
+
+bool alu_write_pc_interworks;
+
+void __init test_alu_write_pc_interworking(void)
+{
+	int arch = cpu_architecture();
+	BUG_ON(arch == CPU_ARCH_UNKNOWN);
+	alu_write_pc_interworks = arch >= CPU_ARCH_ARMv7;
+}
+
+#endif /* !test_alu_write_pc_interworking */
+
+
+void __init arm_probes_decode_init(void)
+{
+	find_str_pc_offset();
+	test_load_write_pc_interworking();
+	test_alu_write_pc_interworking();
+}
+
+
+static unsigned long __kprobes __check_eq(unsigned long cpsr)
+{
+	return cpsr & PSR_Z_BIT;
+}
+
+static unsigned long __kprobes __check_ne(unsigned long cpsr)
+{
+	return (~cpsr) & PSR_Z_BIT;
+}
+
+static unsigned long __kprobes __check_cs(unsigned long cpsr)
+{
+	return cpsr & PSR_C_BIT;
+}
+
+static unsigned long __kprobes __check_cc(unsigned long cpsr)
+{
+	return (~cpsr) & PSR_C_BIT;
+}
+
+static unsigned long __kprobes __check_mi(unsigned long cpsr)
+{
+	return cpsr & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_pl(unsigned long cpsr)
+{
+	return (~cpsr) & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_vs(unsigned long cpsr)
+{
+	return cpsr & PSR_V_BIT;
+}
+
+static unsigned long __kprobes __check_vc(unsigned long cpsr)
+{
+	return (~cpsr) & PSR_V_BIT;
+}
+
+static unsigned long __kprobes __check_hi(unsigned long cpsr)
+{
+	cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+	return cpsr & PSR_C_BIT;
+}
+
+static unsigned long __kprobes __check_ls(unsigned long cpsr)
+{
+	cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+	return (~cpsr) & PSR_C_BIT;
+}
+
+static unsigned long __kprobes __check_ge(unsigned long cpsr)
+{
+	cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+	return (~cpsr) & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_lt(unsigned long cpsr)
+{
+	cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+	return cpsr & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_gt(unsigned long cpsr)
+{
+	unsigned long temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+	temp |= (cpsr << 1);			 /* PSR_N_BIT |= PSR_Z_BIT */
+	return (~temp) & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_le(unsigned long cpsr)
+{
+	unsigned long temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+	temp |= (cpsr << 1);			 /* PSR_N_BIT |= PSR_Z_BIT */
+	return temp & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_al(unsigned long cpsr)
+{
+	return true;
+}
+
+probes_check_cc * const probes_condition_checks[16] = {
+	&__check_eq, &__check_ne, &__check_cs, &__check_cc,
+	&__check_mi, &__check_pl, &__check_vs, &__check_vc,
+	&__check_hi, &__check_ls, &__check_ge, &__check_lt,
+	&__check_gt, &__check_le, &__check_al, &__check_al
+};
+
+
+void __kprobes probes_simulate_nop(probes_opcode_t opcode,
+	struct arch_probes_insn *asi,
+	struct pt_regs *regs)
+{
+}
+
+void __kprobes probes_emulate_none(probes_opcode_t opcode,
+	struct arch_probes_insn *asi,
+	struct pt_regs *regs)
+{
+	asi->insn_fn();
+}
+
+/*
+ * Prepare an instruction slot to receive an instruction for emulating.
+ * This is done by placing a subroutine return after the location where the
+ * instruction will be placed. We also modify ARM instructions to be
+ * unconditional as the condition code will already be checked before any
+ * emulation handler is called.
+ */
+static probes_opcode_t __kprobes
+prepare_emulated_insn(probes_opcode_t insn, struct arch_probes_insn *asi,
+		      bool thumb)
+{
+#ifdef CONFIG_THUMB2_KERNEL
+	if (thumb) {
+		u16 *thumb_insn = (u16 *)asi->insn;
+		thumb_insn[1] = 0x4770; /* Thumb bx lr */
+		thumb_insn[2] = 0x4770; /* Thumb bx lr */
+		return insn;
+	}
+	asi->insn[1] = 0xe12fff1e; /* ARM bx lr */
+#else
+	asi->insn[1] = 0xe1a0f00e; /* mov pc, lr */
+#endif
+	/* Make an ARM instruction unconditional */
+	if (insn < 0xe0000000)
+		insn = (insn | 0xe0000000) & ~0x10000000;
+	return insn;
+}
+
+/*
+ * Write a (probably modified) instruction into the slot previously prepared by
+ * prepare_emulated_insn
+ */
+static void  __kprobes
+set_emulated_insn(probes_opcode_t insn, struct arch_probes_insn *asi,
+		  bool thumb)
+{
+#ifdef CONFIG_THUMB2_KERNEL
+	if (thumb) {
+		u16 *ip = (u16 *)asi->insn;
+		if (is_wide_instruction(insn))
+			*ip++ = insn >> 16;
+		*ip++ = insn;
+		return;
+	}
+#endif
+	asi->insn[0] = insn;
+}
+
+/*
+ * When we modify the register numbers encoded in an instruction to be emulated,
+ * the new values come from this define. For ARM and 32-bit Thumb instructions
+ * this gives...
+ *
+ *	bit position	  16  12   8   4   0
+ *	---------------+---+---+---+---+---+
+ *	register	 r2  r0  r1  --  r3
+ */
+#define INSN_NEW_BITS		0x00020103
+
+/* Each nibble has same value as that at INSN_NEW_BITS bit 16 */
+#define INSN_SAMEAS16_BITS	0x22222222
+
+/*
+ * Validate and modify each of the registers encoded in an instruction.
+ *
+ * Each nibble in regs contains a value from enum decode_reg_type. For each
+ * non-zero value, the corresponding nibble in pinsn is validated and modified
+ * according to the type.
+ */
+static bool __kprobes decode_regs(probes_opcode_t *pinsn, u32 regs, bool modify)
+{
+	probes_opcode_t insn = *pinsn;
+	probes_opcode_t mask = 0xf; /* Start at least significant nibble */
+
+	for (; regs != 0; regs >>= 4, mask <<= 4) {
+
+		probes_opcode_t new_bits = INSN_NEW_BITS;
+
+		switch (regs & 0xf) {
+
+		case REG_TYPE_NONE:
+			/* Nibble not a register, skip to next */
+			continue;
+
+		case REG_TYPE_ANY:
+			/* Any register is allowed */
+			break;
+
+		case REG_TYPE_SAMEAS16:
+			/* Replace register with same as at bit position 16 */
+			new_bits = INSN_SAMEAS16_BITS;
+			break;
+
+		case REG_TYPE_SP:
+			/* Only allow SP (R13) */
+			if ((insn ^ 0xdddddddd) & mask)
+				goto reject;
+			break;
+
+		case REG_TYPE_PC:
+			/* Only allow PC (R15) */
+			if ((insn ^ 0xffffffff) & mask)
+				goto reject;
+			break;
+
+		case REG_TYPE_NOSP:
+			/* Reject SP (R13) */
+			if (((insn ^ 0xdddddddd) & mask) == 0)
+				goto reject;
+			break;
+
+		case REG_TYPE_NOSPPC:
+		case REG_TYPE_NOSPPCX:
+			/* Reject SP and PC (R13 and R15) */
+			if (((insn ^ 0xdddddddd) & 0xdddddddd & mask) == 0)
+				goto reject;
+			break;
+
+		case REG_TYPE_NOPCWB:
+			if (!is_writeback(insn))
+				break; /* No writeback, so any register is OK */
+			/* fall through... */
+		case REG_TYPE_NOPC:
+		case REG_TYPE_NOPCX:
+			/* Reject PC (R15) */
+			if (((insn ^ 0xffffffff) & mask) == 0)
+				goto reject;
+			break;
+		}
+
+		/* Replace value of nibble with new register number... */
+		insn &= ~mask;
+		insn |= new_bits & mask;
+	}
+
+	if (modify)
+		*pinsn = insn;
+
+	return true;
+
+reject:
+	return false;
+}
+
+static const int decode_struct_sizes[NUM_DECODE_TYPES] = {
+	[DECODE_TYPE_TABLE]	= sizeof(struct decode_table),
+	[DECODE_TYPE_CUSTOM]	= sizeof(struct decode_custom),
+	[DECODE_TYPE_SIMULATE]	= sizeof(struct decode_simulate),
+	[DECODE_TYPE_EMULATE]	= sizeof(struct decode_emulate),
+	[DECODE_TYPE_OR]	= sizeof(struct decode_or),
+	[DECODE_TYPE_REJECT]	= sizeof(struct decode_reject)
+};
+
+/*
+ * probes_decode_insn operates on data tables in order to decode an ARM
+ * architecture instruction onto which a kprobe has been placed.
+ *
+ * These instruction decoding tables are a concatenation of entries each
+ * of which consist of one of the following structs:
+ *
+ *	decode_table
+ *	decode_custom
+ *	decode_simulate
+ *	decode_emulate
+ *	decode_or
+ *	decode_reject
+ *
+ * Each of these starts with a struct decode_header which has the following
+ * fields:
+ *
+ *	type_regs
+ *	mask
+ *	value
+ *
+ * The least significant DECODE_TYPE_BITS of type_regs contains a value
+ * from enum decode_type, this indicates which of the decode_* structs
+ * the entry contains. The value DECODE_TYPE_END indicates the end of the
+ * table.
+ *
+ * When the table is parsed, each entry is checked in turn to see if it
+ * matches the instruction to be decoded using the test:
+ *
+ *	(insn & mask) == value
+ *
+ * If no match is found before the end of the table is reached then decoding
+ * fails with INSN_REJECTED.
+ *
+ * When a match is found, decode_regs() is called to validate and modify each
+ * of the registers encoded in the instruction; the data it uses to do this
+ * is (type_regs >> DECODE_TYPE_BITS). A validation failure will cause decoding
+ * to fail with INSN_REJECTED.
+ *
+ * Once the instruction has passed the above tests, further processing
+ * depends on the type of the table entry's decode struct.
+ *
+ */
+int __kprobes
+probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi,
+		   const union decode_item *table, bool thumb,
+		   bool emulate, const union decode_action *actions)
+{
+	const struct decode_header *h = (struct decode_header *)table;
+	const struct decode_header *next;
+	bool matched = false;
+
+	if (emulate)
+		insn = prepare_emulated_insn(insn, asi, thumb);
+
+	for (;; h = next) {
+		enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK;
+		u32 regs = h->type_regs.bits >> DECODE_TYPE_BITS;
+
+		if (type == DECODE_TYPE_END)
+			return INSN_REJECTED;
+
+		next = (struct decode_header *)
+				((uintptr_t)h + decode_struct_sizes[type]);
+
+		if (!matched && (insn & h->mask.bits) != h->value.bits)
+			continue;
+
+		if (!decode_regs(&insn, regs, emulate))
+			return INSN_REJECTED;
+
+		switch (type) {
+
+		case DECODE_TYPE_TABLE: {
+			struct decode_table *d = (struct decode_table *)h;
+			next = (struct decode_header *)d->table.table;
+			break;
+		}
+
+		case DECODE_TYPE_CUSTOM: {
+			struct decode_custom *d = (struct decode_custom *)h;
+			return actions[d->decoder.action].decoder(insn, asi, h);
+		}
+
+		case DECODE_TYPE_SIMULATE: {
+			struct decode_simulate *d = (struct decode_simulate *)h;
+			asi->insn_handler = actions[d->handler.action].handler;
+			return INSN_GOOD_NO_SLOT;
+		}
+
+		case DECODE_TYPE_EMULATE: {
+			struct decode_emulate *d = (struct decode_emulate *)h;
+
+			if (!emulate)
+				return actions[d->handler.action].decoder(insn,
+					asi, h);
+
+			asi->insn_handler = actions[d->handler.action].handler;
+			set_emulated_insn(insn, asi, thumb);
+			return INSN_GOOD;
+		}
+
+		case DECODE_TYPE_OR:
+			matched = true;
+			break;
+
+		case DECODE_TYPE_REJECT:
+		default:
+			return INSN_REJECTED;
+		}
+	}
+}
diff --git a/arch/arm/kernel/probes.h b/arch/arm/kernel/probes.h
new file mode 100644
index 000000000000..dba9f2466a93
--- /dev/null
+++ b/arch/arm/kernel/probes.h
@@ -0,0 +1,407 @@
+/*
+ * arch/arm/kernel/probes.h
+ *
+ * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
+ *
+ * Some contents moved here from arch/arm/include/asm/kprobes.h which is
+ * Copyright (C) 2006, 2007 Motorola Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _ARM_KERNEL_PROBES_H
+#define  _ARM_KERNEL_PROBES_H
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <asm/probes.h>
+
+void __init arm_probes_decode_init(void);
+
+extern probes_check_cc * const probes_condition_checks[16];
+
+#if __LINUX_ARM_ARCH__ >= 7
+
+/* str_pc_offset is architecturally defined from ARMv7 onwards */
+#define str_pc_offset 8
+#define find_str_pc_offset()
+
+#else /* __LINUX_ARM_ARCH__ < 7 */
+
+/* We need a run-time check to determine str_pc_offset */
+extern int str_pc_offset;
+void __init find_str_pc_offset(void);
+
+#endif
+
+
+/*
+ * Update ITSTATE after normal execution of an IT block instruction.
+ *
+ * The 8 IT state bits are split into two parts in CPSR:
+ *	ITSTATE<1:0> are in CPSR<26:25>
+ *	ITSTATE<7:2> are in CPSR<15:10>
+ */
+static inline unsigned long it_advance(unsigned long cpsr)
+	{
+	if ((cpsr & 0x06000400) == 0) {
+		/* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */
+		cpsr &= ~PSR_IT_MASK;
+	} else {
+		/* We need to shift left ITSTATE<4:0> */
+		const unsigned long mask = 0x06001c00;  /* Mask ITSTATE<4:0> */
+		unsigned long it = cpsr & mask;
+		it <<= 1;
+		it |= it >> (27 - 10);  /* Carry ITSTATE<2> to correct place */
+		it &= mask;
+		cpsr &= ~mask;
+		cpsr |= it;
+	}
+	return cpsr;
+}
+
+static inline void __kprobes bx_write_pc(long pcv, struct pt_regs *regs)
+{
+	long cpsr = regs->ARM_cpsr;
+	if (pcv & 0x1) {
+		cpsr |= PSR_T_BIT;
+		pcv &= ~0x1;
+	} else {
+		cpsr &= ~PSR_T_BIT;
+		pcv &= ~0x2;	/* Avoid UNPREDICTABLE address allignment */
+	}
+	regs->ARM_cpsr = cpsr;
+	regs->ARM_pc = pcv;
+}
+
+
+#if __LINUX_ARM_ARCH__ >= 6
+
+/* Kernels built for >= ARMv6 should never run on <= ARMv5 hardware, so... */
+#define load_write_pc_interworks true
+#define test_load_write_pc_interworking()
+
+#else /* __LINUX_ARM_ARCH__ < 6 */
+
+/* We need run-time testing to determine if load_write_pc() should interwork. */
+extern bool load_write_pc_interworks;
+void __init test_load_write_pc_interworking(void);
+
+#endif
+
+static inline void __kprobes load_write_pc(long pcv, struct pt_regs *regs)
+{
+	if (load_write_pc_interworks)
+		bx_write_pc(pcv, regs);
+	else
+		regs->ARM_pc = pcv;
+}
+
+
+#if __LINUX_ARM_ARCH__ >= 7
+
+#define alu_write_pc_interworks true
+#define test_alu_write_pc_interworking()
+
+#elif __LINUX_ARM_ARCH__ <= 5
+
+/* Kernels built for <= ARMv5 should never run on >= ARMv6 hardware, so... */
+#define alu_write_pc_interworks false
+#define test_alu_write_pc_interworking()
+
+#else /* __LINUX_ARM_ARCH__ == 6 */
+
+/* We could be an ARMv6 binary on ARMv7 hardware so we need a run-time check. */
+extern bool alu_write_pc_interworks;
+void __init test_alu_write_pc_interworking(void);
+
+#endif /* __LINUX_ARM_ARCH__ == 6 */
+
+static inline void __kprobes alu_write_pc(long pcv, struct pt_regs *regs)
+{
+	if (alu_write_pc_interworks)
+		bx_write_pc(pcv, regs);
+	else
+		regs->ARM_pc = pcv;
+}
+
+
+/*
+ * Test if load/store instructions writeback the address register.
+ * if P (bit 24) == 0 or W (bit 21) == 1
+ */
+#define is_writeback(insn) ((insn ^ 0x01000000) & 0x01200000)
+
+/*
+ * The following definitions and macros are used to build instruction
+ * decoding tables for use by probes_decode_insn.
+ *
+ * These tables are a concatenation of entries each of which consist of one of
+ * the decode_* structs. All of the fields in every type of decode structure
+ * are of the union type decode_item, therefore the entire decode table can be
+ * viewed as an array of these and declared like:
+ *
+ *	static const union decode_item table_name[] = {};
+ *
+ * In order to construct each entry in the table, macros are used to
+ * initialise a number of sequential decode_item values in a layout which
+ * matches the relevant struct. E.g. DECODE_SIMULATE initialise a struct
+ * decode_simulate by initialising four decode_item objects like this...
+ *
+ *	{.bits = _type},
+ *	{.bits = _mask},
+ *	{.bits = _value},
+ *	{.action = _handler},
+ *
+ * Initialising a specified member of the union means that the compiler
+ * will produce a warning if the argument is of an incorrect type.
+ *
+ * Below is a list of each of the macros used to initialise entries and a
+ * description of the action performed when that entry is matched to an
+ * instruction. A match is found when (instruction & mask) == value.
+ *
+ * DECODE_TABLE(mask, value, table)
+ *	Instruction decoding jumps to parsing the new sub-table 'table'.
+ *
+ * DECODE_CUSTOM(mask, value, decoder)
+ *	The value of 'decoder' is used as an index into the array of
+ *	action functions, and the retrieved decoder function is invoked
+ *	to complete decoding of the instruction.
+ *
+ * DECODE_SIMULATE(mask, value, handler)
+ *	The probes instruction handler is set to the value found by
+ *	indexing into the action array using the value of 'handler'. This
+ *	will be used to simulate the instruction when the probe is hit.
+ *	Decoding returns with INSN_GOOD_NO_SLOT.
+ *
+ * DECODE_EMULATE(mask, value, handler)
+ *	The probes instruction handler is set to the value found by
+ *	indexing into the action array using the value of 'handler'. This
+ *	will be used to emulate the instruction when the probe is hit. The
+ *	modified instruction (see below) is placed in the probes instruction
+ *	slot so it may be called by the emulation code. Decoding returns
+ *	with INSN_GOOD.
+ *
+ * DECODE_REJECT(mask, value)
+ *	Instruction decoding fails with INSN_REJECTED
+ *
+ * DECODE_OR(mask, value)
+ *	This allows the mask/value test of multiple table entries to be
+ *	logically ORed. Once an 'or' entry is matched the decoding action to
+ *	be performed is that of the next entry which isn't an 'or'. E.g.
+ *
+ *		DECODE_OR	(mask1, value1)
+ *		DECODE_OR	(mask2, value2)
+ *		DECODE_SIMULATE	(mask3, value3, simulation_handler)
+ *
+ *	This means that if any of the three mask/value pairs match the
+ *	instruction being decoded, then 'simulation_handler' will be used
+ *	for it.
+ *
+ * Both the SIMULATE and EMULATE macros have a second form which take an
+ * additional 'regs' argument.
+ *
+ *	DECODE_SIMULATEX(mask, value, handler, regs)
+ *	DECODE_EMULATEX	(mask, value, handler, regs)
+ *
+ * These are used to specify what kind of CPU register is encoded in each of the
+ * least significant 5 nibbles of the instruction being decoded. The regs value
+ * is specified using the REGS macro, this takes any of the REG_TYPE_* values
+ * from enum decode_reg_type as arguments; only the '*' part of the name is
+ * given. E.g.
+ *
+ *	REGS(0, ANY, NOPC, 0, ANY)
+ *
+ * This indicates an instruction is encoded like:
+ *
+ *	bits 19..16	ignore
+ *	bits 15..12	any register allowed here
+ *	bits 11.. 8	any register except PC allowed here
+ *	bits  7.. 4	ignore
+ *	bits  3.. 0	any register allowed here
+ *
+ * This register specification is checked after a decode table entry is found to
+ * match an instruction (through the mask/value test). Any invalid register then
+ * found in the instruction will cause decoding to fail with INSN_REJECTED. In
+ * the above example this would happen if bits 11..8 of the instruction were
+ * 1111, indicating R15 or PC.
+ *
+ * As well as checking for legal combinations of registers, this data is also
+ * used to modify the registers encoded in the instructions so that an
+ * emulation routines can use it. (See decode_regs() and INSN_NEW_BITS.)
+ *
+ * Here is a real example which matches ARM instructions of the form
+ * "AND <Rd>,<Rn>,<Rm>,<shift> <Rs>"
+ *
+ *	DECODE_EMULATEX	(0x0e000090, 0x00000010, PROBES_DATA_PROCESSING_REG,
+ *						 REGS(ANY, ANY, NOPC, 0, ANY)),
+ *						      ^    ^    ^        ^
+ *						      Rn   Rd   Rs       Rm
+ *
+ * Decoding the instruction "AND R4, R5, R6, ASL R15" will be rejected because
+ * Rs == R15
+ *
+ * Decoding the instruction "AND R4, R5, R6, ASL R7" will be accepted and the
+ * instruction will be modified to "AND R0, R2, R3, ASL R1" and then placed into
+ * the kprobes instruction slot. This can then be called later by the handler
+ * function emulate_rd12rn16rm0rs8_rwflags (a pointer to which is retrieved from
+ * the indicated slot in the action array), in order to simulate the instruction.
+ */
+
+enum decode_type {
+	DECODE_TYPE_END,
+	DECODE_TYPE_TABLE,
+	DECODE_TYPE_CUSTOM,
+	DECODE_TYPE_SIMULATE,
+	DECODE_TYPE_EMULATE,
+	DECODE_TYPE_OR,
+	DECODE_TYPE_REJECT,
+	NUM_DECODE_TYPES /* Must be last enum */
+};
+
+#define DECODE_TYPE_BITS	4
+#define DECODE_TYPE_MASK	((1 << DECODE_TYPE_BITS) - 1)
+
+enum decode_reg_type {
+	REG_TYPE_NONE = 0, /* Not a register, ignore */
+	REG_TYPE_ANY,	   /* Any register allowed */
+	REG_TYPE_SAMEAS16, /* Register should be same as that at bits 19..16 */
+	REG_TYPE_SP,	   /* Register must be SP */
+	REG_TYPE_PC,	   /* Register must be PC */
+	REG_TYPE_NOSP,	   /* Register must not be SP */
+	REG_TYPE_NOSPPC,   /* Register must not be SP or PC */
+	REG_TYPE_NOPC,	   /* Register must not be PC */
+	REG_TYPE_NOPCWB,   /* No PC if load/store write-back flag also set */
+
+	/* The following types are used when the encoding for PC indicates
+	 * another instruction form. This distiction only matters for test
+	 * case coverage checks.
+	 */
+	REG_TYPE_NOPCX,	   /* Register must not be PC */
+	REG_TYPE_NOSPPCX,  /* Register must not be SP or PC */
+
+	/* Alias to allow '0' arg to be used in REGS macro. */
+	REG_TYPE_0 = REG_TYPE_NONE
+};
+
+#define REGS(r16, r12, r8, r4, r0)	\
+	(((REG_TYPE_##r16) << 16) +	\
+	((REG_TYPE_##r12) << 12) +	\
+	((REG_TYPE_##r8) << 8) +	\
+	((REG_TYPE_##r4) << 4) +	\
+	(REG_TYPE_##r0))
+
+union decode_item {
+	u32			bits;
+	const union decode_item	*table;
+	int			action;
+};
+
+struct decode_header;
+typedef enum probes_insn (probes_custom_decode_t)(probes_opcode_t,
+						  struct arch_probes_insn *,
+						  const struct decode_header *);
+
+union decode_action {
+	probes_insn_handler_t	*handler;
+	probes_custom_decode_t	*decoder;
+};
+
+#define DECODE_END			\
+	{.bits = DECODE_TYPE_END}
+
+
+struct decode_header {
+	union decode_item	type_regs;
+	union decode_item	mask;
+	union decode_item	value;
+};
+
+#define DECODE_HEADER(_type, _mask, _value, _regs)		\
+	{.bits = (_type) | ((_regs) << DECODE_TYPE_BITS)},	\
+	{.bits = (_mask)},					\
+	{.bits = (_value)}
+
+
+struct decode_table {
+	struct decode_header	header;
+	union decode_item	table;
+};
+
+#define DECODE_TABLE(_mask, _value, _table)			\
+	DECODE_HEADER(DECODE_TYPE_TABLE, _mask, _value, 0),	\
+	{.table = (_table)}
+
+
+struct decode_custom {
+	struct decode_header	header;
+	union decode_item	decoder;
+};
+
+#define DECODE_CUSTOM(_mask, _value, _decoder)			\
+	DECODE_HEADER(DECODE_TYPE_CUSTOM, _mask, _value, 0),	\
+	{.action = (_decoder)}
+
+
+struct decode_simulate {
+	struct decode_header	header;
+	union decode_item	handler;
+};
+
+#define DECODE_SIMULATEX(_mask, _value, _handler, _regs)		\
+	DECODE_HEADER(DECODE_TYPE_SIMULATE, _mask, _value, _regs),	\
+	{.action = (_handler)}
+
+#define DECODE_SIMULATE(_mask, _value, _handler)	\
+	DECODE_SIMULATEX(_mask, _value, _handler, 0)
+
+
+struct decode_emulate {
+	struct decode_header	header;
+	union decode_item	handler;
+};
+
+#define DECODE_EMULATEX(_mask, _value, _handler, _regs)			\
+	DECODE_HEADER(DECODE_TYPE_EMULATE, _mask, _value, _regs),	\
+	{.action = (_handler)}
+
+#define DECODE_EMULATE(_mask, _value, _handler)		\
+	DECODE_EMULATEX(_mask, _value, _handler, 0)
+
+
+struct decode_or {
+	struct decode_header	header;
+};
+
+#define DECODE_OR(_mask, _value)				\
+	DECODE_HEADER(DECODE_TYPE_OR, _mask, _value, 0)
+
+enum probes_insn {
+	INSN_REJECTED,
+	INSN_GOOD,
+	INSN_GOOD_NO_SLOT
+};
+
+struct decode_reject {
+	struct decode_header	header;
+};
+
+#define DECODE_REJECT(_mask, _value)				\
+	DECODE_HEADER(DECODE_TYPE_REJECT, _mask, _value, 0)
+
+probes_insn_handler_t probes_simulate_nop;
+probes_insn_handler_t probes_emulate_none;
+
+int __kprobes
+probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi,
+		const union decode_item *table, bool thumb, bool emulate,
+		const union decode_action *actions);
+
+#endif
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index f58b723bec00..639bf32689dd 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -39,6 +39,7 @@
 #include <asm/processor.h>
 #include <asm/thread_notify.h>
 #include <asm/stacktrace.h>
+#include <asm/system_misc.h>
 #include <asm/mach/time.h>
 #include <asm/tls.h>
 
@@ -48,14 +49,14 @@ unsigned long __stack_chk_guard __read_mostly;
 EXPORT_SYMBOL(__stack_chk_guard);
 #endif
 
-static const char *processor_modes[] = {
+static const char *processor_modes[] __maybe_unused = {
   "USER_26", "FIQ_26" , "IRQ_26" , "SVC_26" , "UK4_26" , "UK5_26" , "UK6_26" , "UK7_26" ,
   "UK8_26" , "UK9_26" , "UK10_26", "UK11_26", "UK12_26", "UK13_26", "UK14_26", "UK15_26",
   "USER_32", "FIQ_32" , "IRQ_32" , "SVC_32" , "UK4_32" , "UK5_32" , "UK6_32" , "ABT_32" ,
   "UK8_32" , "UK9_32" , "UK10_32", "UND_32" , "UK12_32", "UK13_32", "UK14_32", "SYS_32"
 };
 
-static const char *isa_modes[] = {
+static const char *isa_modes[] __maybe_unused = {
   "ARM" , "Thumb" , "Jazelle", "ThumbEE"
 };
 
@@ -276,12 +277,17 @@ void __show_regs(struct pt_regs *regs)
 	buf[3] = flags & PSR_V_BIT ? 'V' : 'v';
 	buf[4] = '\0';
 
+#ifndef CONFIG_CPU_V7M
 	printk("Flags: %s  IRQs o%s  FIQs o%s  Mode %s  ISA %s  Segment %s\n",
 		buf, interrupts_enabled(regs) ? "n" : "ff",
 		fast_interrupts_enabled(regs) ? "n" : "ff",
 		processor_modes[processor_mode(regs)],
 		isa_modes[isa_mode(regs)],
 		get_fs() == get_ds() ? "kernel" : "user");
+#else
+	printk("xPSR: %08lx\n", regs->ARM_cpsr);
+#endif
+
 #ifdef CONFIG_CPU_CP15
 	{
 		unsigned int ctrl;
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 1e8b030dbefd..50e198c1e9c8 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -100,6 +100,9 @@ EXPORT_SYMBOL(system_serial_high);
 unsigned int elf_hwcap __read_mostly;
 EXPORT_SYMBOL(elf_hwcap);
 
+unsigned int elf_hwcap2 __read_mostly;
+EXPORT_SYMBOL(elf_hwcap2);
+
 
 #ifdef MULTI_CPU
 struct processor processor __read_mostly;
@@ -1005,6 +1008,15 @@ static const char *hwcap_str[] = {
 	NULL
 };
 
+static const char *hwcap2_str[] = {
+	"aes",
+	"pmull",
+	"sha1",
+	"sha2",
+	"crc32",
+	NULL
+};
+
 static int c_show(struct seq_file *m, void *v)
 {
 	int i, j;
@@ -1028,6 +1040,10 @@ static int c_show(struct seq_file *m, void *v)
 			if (elf_hwcap & (1 << j))
 				seq_printf(m, "%s ", hwcap_str[j]);
 
+		for (j = 0; hwcap2_str[j]; j++)
+			if (elf_hwcap2 & (1 << j))
+				seq_printf(m, "%s ", hwcap2_str[j]);
+
 		seq_printf(m, "\nCPU implementer\t: 0x%02x\n", cpuid >> 24);
 		seq_printf(m, "CPU architecture: %s\n",
 			   proc_arch[cpu_architecture()]);
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 04d63880037f..bd1983437205 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -13,6 +13,7 @@
 #include <linux/personality.h>
 #include <linux/uaccess.h>
 #include <linux/tracehook.h>
+#include <linux/uprobes.h>
 
 #include <asm/elf.h>
 #include <asm/cacheflush.h>
@@ -590,6 +591,9 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
 					return restart;
 				}
 				syscall = 0;
+			} else if (thread_flags & _TIF_UPROBE) {
+				clear_thread_flag(TIF_UPROBE);
+				uprobe_notify_resume(regs);
 			} else {
 				clear_thread_flag(TIF_NOTIFY_RESUME);
 				tracehook_notify_resume(regs);
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 172ee18ff124..abd2fc067736 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -445,6 +445,7 @@ die_sig:
 	if (user_debug & UDBG_UNDEFINED) {
 		printk(KERN_INFO "%s (%d): undefined instruction: pc=%p\n",
 			current->comm, task_pid_nr(current), pc);
+		__show_regs(regs);
 		dump_instr(KERN_INFO, regs);
 	}
 #endif
diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
index 00df012c4678..3c217694ebec 100644
--- a/arch/arm/kernel/unwind.c
+++ b/arch/arm/kernel/unwind.c
@@ -68,6 +68,12 @@ EXPORT_SYMBOL(__aeabi_unwind_cpp_pr2);
 struct unwind_ctrl_block {
 	unsigned long vrs[16];		/* virtual register set */
 	const unsigned long *insn;	/* pointer to the current instructions word */
+	unsigned long sp_high;		/* highest value of sp allowed */
+	/*
+	 * 1 : check for stack overflow for each register pop.
+	 * 0 : save overhead if there is plenty of stack remaining.
+	 */
+	int check_each_pop;
 	int entries;			/* number of entries left to interpret */
 	int byte;			/* current byte number in the instructions word */
 };
@@ -235,12 +241,85 @@ static unsigned long unwind_get_byte(struct unwind_ctrl_block *ctrl)
 	return ret;
 }
 
+/* Before poping a register check whether it is feasible or not */
+static int unwind_pop_register(struct unwind_ctrl_block *ctrl,
+				unsigned long **vsp, unsigned int reg)
+{
+	if (unlikely(ctrl->check_each_pop))
+		if (*vsp >= (unsigned long *)ctrl->sp_high)
+			return -URC_FAILURE;
+
+	ctrl->vrs[reg] = *(*vsp)++;
+	return URC_OK;
+}
+
+/* Helper functions to execute the instructions */
+static int unwind_exec_pop_subset_r4_to_r13(struct unwind_ctrl_block *ctrl,
+						unsigned long mask)
+{
+	unsigned long *vsp = (unsigned long *)ctrl->vrs[SP];
+	int load_sp, reg = 4;
+
+	load_sp = mask & (1 << (13 - 4));
+	while (mask) {
+		if (mask & 1)
+			if (unwind_pop_register(ctrl, &vsp, reg))
+				return -URC_FAILURE;
+		mask >>= 1;
+		reg++;
+	}
+	if (!load_sp)
+		ctrl->vrs[SP] = (unsigned long)vsp;
+
+	return URC_OK;
+}
+
+static int unwind_exec_pop_r4_to_rN(struct unwind_ctrl_block *ctrl,
+					unsigned long insn)
+{
+	unsigned long *vsp = (unsigned long *)ctrl->vrs[SP];
+	int reg;
+
+	/* pop R4-R[4+bbb] */
+	for (reg = 4; reg <= 4 + (insn & 7); reg++)
+		if (unwind_pop_register(ctrl, &vsp, reg))
+				return -URC_FAILURE;
+
+	if (insn & 0x80)
+		if (unwind_pop_register(ctrl, &vsp, 14))
+				return -URC_FAILURE;
+
+	ctrl->vrs[SP] = (unsigned long)vsp;
+
+	return URC_OK;
+}
+
+static int unwind_exec_pop_subset_r0_to_r3(struct unwind_ctrl_block *ctrl,
+						unsigned long mask)
+{
+	unsigned long *vsp = (unsigned long *)ctrl->vrs[SP];
+	int reg = 0;
+
+	/* pop R0-R3 according to mask */
+	while (mask) {
+		if (mask & 1)
+			if (unwind_pop_register(ctrl, &vsp, reg))
+				return -URC_FAILURE;
+		mask >>= 1;
+		reg++;
+	}
+	ctrl->vrs[SP] = (unsigned long)vsp;
+
+	return URC_OK;
+}
+
 /*
  * Execute the current unwind instruction.
  */
 static int unwind_exec_insn(struct unwind_ctrl_block *ctrl)
 {
 	unsigned long insn = unwind_get_byte(ctrl);
+	int ret = URC_OK;
 
 	pr_debug("%s: insn = %08lx\n", __func__, insn);
 
@@ -250,8 +329,6 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl)
 		ctrl->vrs[SP] -= ((insn & 0x3f) << 2) + 4;
 	else if ((insn & 0xf0) == 0x80) {
 		unsigned long mask;
-		unsigned long *vsp = (unsigned long *)ctrl->vrs[SP];
-		int load_sp, reg = 4;
 
 		insn = (insn << 8) | unwind_get_byte(ctrl);
 		mask = insn & 0x0fff;
@@ -261,29 +338,16 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl)
 			return -URC_FAILURE;
 		}
 
-		/* pop R4-R15 according to mask */
-		load_sp = mask & (1 << (13 - 4));
-		while (mask) {
-			if (mask & 1)
-				ctrl->vrs[reg] = *vsp++;
-			mask >>= 1;
-			reg++;
-		}
-		if (!load_sp)
-			ctrl->vrs[SP] = (unsigned long)vsp;
+		ret = unwind_exec_pop_subset_r4_to_r13(ctrl, mask);
+		if (ret)
+			goto error;
 	} else if ((insn & 0xf0) == 0x90 &&
 		   (insn & 0x0d) != 0x0d)
 		ctrl->vrs[SP] = ctrl->vrs[insn & 0x0f];
 	else if ((insn & 0xf0) == 0xa0) {
-		unsigned long *vsp = (unsigned long *)ctrl->vrs[SP];
-		int reg;
-
-		/* pop R4-R[4+bbb] */
-		for (reg = 4; reg <= 4 + (insn & 7); reg++)
-			ctrl->vrs[reg] = *vsp++;
-		if (insn & 0x80)
-			ctrl->vrs[14] = *vsp++;
-		ctrl->vrs[SP] = (unsigned long)vsp;
+		ret = unwind_exec_pop_r4_to_rN(ctrl, insn);
+		if (ret)
+			goto error;
 	} else if (insn == 0xb0) {
 		if (ctrl->vrs[PC] == 0)
 			ctrl->vrs[PC] = ctrl->vrs[LR];
@@ -291,8 +355,6 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl)
 		ctrl->entries = 0;
 	} else if (insn == 0xb1) {
 		unsigned long mask = unwind_get_byte(ctrl);
-		unsigned long *vsp = (unsigned long *)ctrl->vrs[SP];
-		int reg = 0;
 
 		if (mask == 0 || mask & 0xf0) {
 			pr_warning("unwind: Spare encoding %04lx\n",
@@ -300,14 +362,9 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl)
 			return -URC_FAILURE;
 		}
 
-		/* pop R0-R3 according to mask */
-		while (mask) {
-			if (mask & 1)
-				ctrl->vrs[reg] = *vsp++;
-			mask >>= 1;
-			reg++;
-		}
-		ctrl->vrs[SP] = (unsigned long)vsp;
+		ret = unwind_exec_pop_subset_r0_to_r3(ctrl, mask);
+		if (ret)
+			goto error;
 	} else if (insn == 0xb2) {
 		unsigned long uleb128 = unwind_get_byte(ctrl);
 
@@ -320,7 +377,8 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl)
 	pr_debug("%s: fp = %08lx sp = %08lx lr = %08lx pc = %08lx\n", __func__,
 		 ctrl->vrs[FP], ctrl->vrs[SP], ctrl->vrs[LR], ctrl->vrs[PC]);
 
-	return URC_OK;
+error:
+	return ret;
 }
 
 /*
@@ -329,13 +387,13 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl)
  */
 int unwind_frame(struct stackframe *frame)
 {
-	unsigned long high, low;
+	unsigned long low;
 	const struct unwind_idx *idx;
 	struct unwind_ctrl_block ctrl;
 
-	/* only go to a higher address on the stack */
+	/* store the highest address on the stack to avoid crossing it*/
 	low = frame->sp;
-	high = ALIGN(low, THREAD_SIZE);
+	ctrl.sp_high = ALIGN(low, THREAD_SIZE);
 
 	pr_debug("%s(pc = %08lx lr = %08lx sp = %08lx)\n", __func__,
 		 frame->pc, frame->lr, frame->sp);
@@ -382,11 +440,16 @@ int unwind_frame(struct stackframe *frame)
 		return -URC_FAILURE;
 	}
 
+	ctrl.check_each_pop = 0;
+
 	while (ctrl.entries > 0) {
-		int urc = unwind_exec_insn(&ctrl);
+		int urc;
+		if ((ctrl.sp_high - ctrl.vrs[SP]) < sizeof(ctrl.vrs))
+			ctrl.check_each_pop = 1;
+		urc = unwind_exec_insn(&ctrl);
 		if (urc < 0)
 			return urc;
-		if (ctrl.vrs[SP] < low || ctrl.vrs[SP] >= high)
+		if (ctrl.vrs[SP] < low || ctrl.vrs[SP] >= ctrl.sp_high)
 			return -URC_FAILURE;
 	}
 
diff --git a/arch/arm/kernel/uprobes-arm.c b/arch/arm/kernel/uprobes-arm.c
new file mode 100644
index 000000000000..d3b655ff17da
--- /dev/null
+++ b/arch/arm/kernel/uprobes-arm.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright (C) 2012 Rabin Vincent <rabin at rab.in>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/wait.h>
+#include <linux/uprobes.h>
+#include <linux/module.h>
+
+#include "probes.h"
+#include "probes-arm.h"
+#include "uprobes.h"
+
+static int uprobes_substitute_pc(unsigned long *pinsn, u32 oregs)
+{
+	probes_opcode_t insn = __mem_to_opcode_arm(*pinsn);
+	probes_opcode_t temp;
+	probes_opcode_t mask;
+	int freereg;
+	u32 free = 0xffff;
+	u32 regs;
+
+	for (regs = oregs; regs; regs >>= 4, insn >>= 4) {
+		if ((regs & 0xf) == REG_TYPE_NONE)
+			continue;
+
+		free &= ~(1 << (insn & 0xf));
+	}
+
+	/* No PC, no problem */
+	if (free & (1 << 15))
+		return 15;
+
+	if (!free)
+		return -1;
+
+	/*
+	 * fls instead of ffs ensures that for "ldrd r0, r1, [pc]" we would
+	 * pick LR instead of R1.
+	 */
+	freereg = free = fls(free) - 1;
+
+	temp = __mem_to_opcode_arm(*pinsn);
+	insn = temp;
+	regs = oregs;
+	mask = 0xf;
+
+	for (; regs; regs >>= 4, mask <<= 4, free <<= 4, temp >>= 4) {
+		if ((regs & 0xf) == REG_TYPE_NONE)
+			continue;
+
+		if ((temp & 0xf) != 15)
+			continue;
+
+		insn &= ~mask;
+		insn |= free & mask;
+	}
+
+	*pinsn = __opcode_to_mem_arm(insn);
+	return freereg;
+}
+
+static void uprobe_set_pc(struct arch_uprobe *auprobe,
+			  struct arch_uprobe_task *autask,
+			  struct pt_regs *regs)
+{
+	u32 pcreg = auprobe->pcreg;
+
+	autask->backup = regs->uregs[pcreg];
+	regs->uregs[pcreg] = regs->ARM_pc + 8;
+}
+
+static void uprobe_unset_pc(struct arch_uprobe *auprobe,
+			    struct arch_uprobe_task *autask,
+			    struct pt_regs *regs)
+{
+	/* PC will be taken care of by common code */
+	regs->uregs[auprobe->pcreg] = autask->backup;
+}
+
+static void uprobe_aluwrite_pc(struct arch_uprobe *auprobe,
+			       struct arch_uprobe_task *autask,
+			       struct pt_regs *regs)
+{
+	u32 pcreg = auprobe->pcreg;
+
+	alu_write_pc(regs->uregs[pcreg], regs);
+	regs->uregs[pcreg] = autask->backup;
+}
+
+static void uprobe_write_pc(struct arch_uprobe *auprobe,
+			    struct arch_uprobe_task *autask,
+			    struct pt_regs *regs)
+{
+	u32 pcreg = auprobe->pcreg;
+
+	load_write_pc(regs->uregs[pcreg], regs);
+	regs->uregs[pcreg] = autask->backup;
+}
+
+enum probes_insn
+decode_pc_ro(probes_opcode_t insn, struct arch_probes_insn *asi,
+	     const struct decode_header *d)
+{
+	struct arch_uprobe *auprobe = container_of(asi, struct arch_uprobe,
+						   asi);
+	struct decode_emulate *decode = (struct decode_emulate *) d;
+	u32 regs = decode->header.type_regs.bits >> DECODE_TYPE_BITS;
+	int reg;
+
+	reg = uprobes_substitute_pc(&auprobe->ixol[0], regs);
+	if (reg == 15)
+		return INSN_GOOD;
+
+	if (reg == -1)
+		return INSN_REJECTED;
+
+	auprobe->pcreg = reg;
+	auprobe->prehandler = uprobe_set_pc;
+	auprobe->posthandler = uprobe_unset_pc;
+
+	return INSN_GOOD;
+}
+
+enum probes_insn
+decode_wb_pc(probes_opcode_t insn, struct arch_probes_insn *asi,
+	     const struct decode_header *d, bool alu)
+{
+	struct arch_uprobe *auprobe = container_of(asi, struct arch_uprobe,
+						   asi);
+	enum probes_insn ret = decode_pc_ro(insn, asi, d);
+
+	if (((insn >> 12) & 0xf) == 15)
+		auprobe->posthandler = alu ? uprobe_aluwrite_pc
+					   : uprobe_write_pc;
+
+	return ret;
+}
+
+enum probes_insn
+decode_rd12rn16rm0rs8_rwflags(probes_opcode_t insn,
+			      struct arch_probes_insn *asi,
+			      const struct decode_header *d)
+{
+	return decode_wb_pc(insn, asi, d, true);
+}
+
+enum probes_insn
+decode_ldr(probes_opcode_t insn, struct arch_probes_insn *asi,
+	   const struct decode_header *d)
+{
+	return decode_wb_pc(insn, asi, d, false);
+}
+
+enum probes_insn
+uprobe_decode_ldmstm(probes_opcode_t insn,
+		     struct arch_probes_insn *asi,
+		     const struct decode_header *d)
+{
+	struct arch_uprobe *auprobe = container_of(asi, struct arch_uprobe,
+						   asi);
+	unsigned reglist = insn & 0xffff;
+	int rn = (insn >> 16) & 0xf;
+	int lbit = insn & (1 << 20);
+	unsigned used = reglist | (1 << rn);
+
+	if (rn == 15)
+		return INSN_REJECTED;
+
+	if (!(used & (1 << 15)))
+		return INSN_GOOD;
+
+	if (used & (1 << 14))
+		return INSN_REJECTED;
+
+	/* Use LR instead of PC */
+	insn ^= 0xc000;
+
+	auprobe->pcreg = 14;
+	auprobe->ixol[0] = __opcode_to_mem_arm(insn);
+
+	auprobe->prehandler = uprobe_set_pc;
+	if (lbit)
+		auprobe->posthandler = uprobe_write_pc;
+	else
+		auprobe->posthandler = uprobe_unset_pc;
+
+	return INSN_GOOD;
+}
+
+const union decode_action uprobes_probes_actions[] = {
+	[PROBES_EMULATE_NONE] = {.handler = probes_simulate_nop},
+	[PROBES_SIMULATE_NOP] = {.handler = probes_simulate_nop},
+	[PROBES_PRELOAD_IMM] = {.handler = probes_simulate_nop},
+	[PROBES_PRELOAD_REG] = {.handler = probes_simulate_nop},
+	[PROBES_BRANCH_IMM] = {.handler = simulate_blx1},
+	[PROBES_MRS] = {.handler = simulate_mrs},
+	[PROBES_BRANCH_REG] = {.handler = simulate_blx2bx},
+	[PROBES_CLZ] = {.handler = probes_simulate_nop},
+	[PROBES_SATURATING_ARITHMETIC] = {.handler = probes_simulate_nop},
+	[PROBES_MUL1] = {.handler = probes_simulate_nop},
+	[PROBES_MUL2] = {.handler = probes_simulate_nop},
+	[PROBES_SWP] = {.handler = probes_simulate_nop},
+	[PROBES_LDRSTRD] = {.decoder = decode_pc_ro},
+	[PROBES_LOAD_EXTRA] = {.decoder = decode_pc_ro},
+	[PROBES_LOAD] = {.decoder = decode_ldr},
+	[PROBES_STORE_EXTRA] = {.decoder = decode_pc_ro},
+	[PROBES_STORE] = {.decoder = decode_pc_ro},
+	[PROBES_MOV_IP_SP] = {.handler = simulate_mov_ipsp},
+	[PROBES_DATA_PROCESSING_REG] = {
+		.decoder = decode_rd12rn16rm0rs8_rwflags},
+	[PROBES_DATA_PROCESSING_IMM] = {
+		.decoder = decode_rd12rn16rm0rs8_rwflags},
+	[PROBES_MOV_HALFWORD] = {.handler = probes_simulate_nop},
+	[PROBES_SEV] = {.handler = probes_simulate_nop},
+	[PROBES_WFE] = {.handler = probes_simulate_nop},
+	[PROBES_SATURATE] = {.handler = probes_simulate_nop},
+	[PROBES_REV] = {.handler = probes_simulate_nop},
+	[PROBES_MMI] = {.handler = probes_simulate_nop},
+	[PROBES_PACK] = {.handler = probes_simulate_nop},
+	[PROBES_EXTEND] = {.handler = probes_simulate_nop},
+	[PROBES_EXTEND_ADD] = {.handler = probes_simulate_nop},
+	[PROBES_MUL_ADD_LONG] = {.handler = probes_simulate_nop},
+	[PROBES_MUL_ADD] = {.handler = probes_simulate_nop},
+	[PROBES_BITFIELD] = {.handler = probes_simulate_nop},
+	[PROBES_BRANCH] = {.handler = simulate_bbl},
+	[PROBES_LDMSTM] = {.decoder = uprobe_decode_ldmstm}
+};
diff --git a/arch/arm/kernel/uprobes.c b/arch/arm/kernel/uprobes.c
new file mode 100644
index 000000000000..f9bacee973bf
--- /dev/null
+++ b/arch/arm/kernel/uprobes.c
@@ -0,0 +1,210 @@
+/*
+ * Copyright (C) 2012 Rabin Vincent <rabin at rab.in>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/errno.h>
+#include <linux/highmem.h>
+#include <linux/sched.h>
+#include <linux/uprobes.h>
+#include <linux/notifier.h>
+
+#include <asm/opcodes.h>
+#include <asm/traps.h>
+
+#include "probes.h"
+#include "probes-arm.h"
+#include "uprobes.h"
+
+#define UPROBE_TRAP_NR	UINT_MAX
+
+bool is_swbp_insn(uprobe_opcode_t *insn)
+{
+	return (__mem_to_opcode_arm(*insn) & 0x0fffffff) ==
+		(UPROBE_SWBP_ARM_INSN & 0x0fffffff);
+}
+
+int set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm,
+	     unsigned long vaddr)
+{
+	return uprobe_write_opcode(mm, vaddr,
+		   __opcode_to_mem_arm(auprobe->bpinsn));
+}
+
+bool arch_uprobe_ignore(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	if (!auprobe->asi.insn_check_cc(regs->ARM_cpsr)) {
+		regs->ARM_pc += 4;
+		return true;
+	}
+
+	return false;
+}
+
+bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	probes_opcode_t opcode;
+
+	if (!auprobe->simulate)
+		return false;
+
+	opcode = __mem_to_opcode_arm(*(unsigned int *) auprobe->insn);
+
+	auprobe->asi.insn_singlestep(opcode, &auprobe->asi, regs);
+
+	return true;
+}
+
+unsigned long
+arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr,
+				  struct pt_regs *regs)
+{
+	unsigned long orig_ret_vaddr;
+
+	orig_ret_vaddr = regs->ARM_lr;
+	/* Replace the return addr with trampoline addr */
+	regs->ARM_lr = trampoline_vaddr;
+	return orig_ret_vaddr;
+}
+
+int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
+			     unsigned long addr)
+{
+	unsigned int insn;
+	unsigned int bpinsn;
+	enum probes_insn ret;
+
+	/* Thumb not yet support */
+	if (addr & 0x3)
+		return -EINVAL;
+
+	insn = __mem_to_opcode_arm(*(unsigned int *)auprobe->insn);
+	auprobe->ixol[0] = __opcode_to_mem_arm(insn);
+	auprobe->ixol[1] = __opcode_to_mem_arm(UPROBE_SS_ARM_INSN);
+
+	ret = arm_probes_decode_insn(insn, &auprobe->asi, false,
+				     uprobes_probes_actions);
+	switch (ret) {
+	case INSN_REJECTED:
+		return -EINVAL;
+
+	case INSN_GOOD_NO_SLOT:
+		auprobe->simulate = true;
+		break;
+
+	case INSN_GOOD:
+	default:
+		break;
+	}
+
+	bpinsn = UPROBE_SWBP_ARM_INSN & 0x0fffffff;
+	if (insn >= 0xe0000000)
+		bpinsn |= 0xe0000000;  /* Unconditional instruction */
+	else
+		bpinsn |= insn & 0xf0000000;  /* Copy condition from insn */
+
+	auprobe->bpinsn = bpinsn;
+
+	return 0;
+}
+
+int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	struct uprobe_task *utask = current->utask;
+
+	if (auprobe->prehandler)
+		auprobe->prehandler(auprobe, &utask->autask, regs);
+
+	utask->autask.saved_trap_no = current->thread.trap_no;
+	current->thread.trap_no = UPROBE_TRAP_NR;
+	regs->ARM_pc = utask->xol_vaddr;
+
+	return 0;
+}
+
+int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	struct uprobe_task *utask = current->utask;
+
+	WARN_ON_ONCE(current->thread.trap_no != UPROBE_TRAP_NR);
+
+	current->thread.trap_no = utask->autask.saved_trap_no;
+	regs->ARM_pc = utask->vaddr + 4;
+
+	if (auprobe->posthandler)
+		auprobe->posthandler(auprobe, &utask->autask, regs);
+
+	return 0;
+}
+
+bool arch_uprobe_xol_was_trapped(struct task_struct *t)
+{
+	if (t->thread.trap_no != UPROBE_TRAP_NR)
+		return true;
+
+	return false;
+}
+
+void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	struct uprobe_task *utask = current->utask;
+
+	current->thread.trap_no = utask->autask.saved_trap_no;
+	instruction_pointer_set(regs, utask->vaddr);
+}
+
+int arch_uprobe_exception_notify(struct notifier_block *self,
+				 unsigned long val, void *data)
+{
+	return NOTIFY_DONE;
+}
+
+static int uprobe_trap_handler(struct pt_regs *regs, unsigned int instr)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	instr &= 0x0fffffff;
+	if (instr == (UPROBE_SWBP_ARM_INSN & 0x0fffffff))
+		uprobe_pre_sstep_notifier(regs);
+	else if (instr == (UPROBE_SS_ARM_INSN & 0x0fffffff))
+		uprobe_post_sstep_notifier(regs);
+	local_irq_restore(flags);
+
+	return 0;
+}
+
+unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
+{
+	return instruction_pointer(regs);
+}
+
+static struct undef_hook uprobes_arm_break_hook = {
+	.instr_mask	= 0x0fffffff,
+	.instr_val	= (UPROBE_SWBP_ARM_INSN & 0x0fffffff),
+	.cpsr_mask	= MODE_MASK,
+	.cpsr_val	= USR_MODE,
+	.fn		= uprobe_trap_handler,
+};
+
+static struct undef_hook uprobes_arm_ss_hook = {
+	.instr_mask	= 0x0fffffff,
+	.instr_val	= (UPROBE_SS_ARM_INSN & 0x0fffffff),
+	.cpsr_mask	= MODE_MASK,
+	.cpsr_val	= USR_MODE,
+	.fn		= uprobe_trap_handler,
+};
+
+static int arch_uprobes_init(void)
+{
+	register_undef_hook(&uprobes_arm_break_hook);
+	register_undef_hook(&uprobes_arm_ss_hook);
+
+	return 0;
+}
+device_initcall(arch_uprobes_init);
diff --git a/arch/arm/kernel/uprobes.h b/arch/arm/kernel/uprobes.h
new file mode 100644
index 000000000000..1d0c12dfbd03
--- /dev/null
+++ b/arch/arm/kernel/uprobes.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2012 Rabin Vincent <rabin at rab.in>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ARM_KERNEL_UPROBES_H
+#define __ARM_KERNEL_UPROBES_H
+
+enum probes_insn uprobe_decode_ldmstm(probes_opcode_t insn,
+				      struct arch_probes_insn *asi,
+				      const struct decode_header *d);
+
+enum probes_insn decode_ldr(probes_opcode_t insn,
+			    struct arch_probes_insn *asi,
+			    const struct decode_header *d);
+
+enum probes_insn
+decode_rd12rn16rm0rs8_rwflags(probes_opcode_t insn,
+			      struct arch_probes_insn *asi,
+			      const struct decode_header *d);
+
+enum probes_insn
+decode_wb_pc(probes_opcode_t insn, struct arch_probes_insn *asi,
+	     const struct decode_header *d, bool alu);
+
+enum probes_insn
+decode_pc_ro(probes_opcode_t insn, struct arch_probes_insn *asi,
+	     const struct decode_header *d);
+
+extern const union decode_action uprobes_probes_actions[];
+
+#endif
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 1d8248ea5669..bd18bb8b2770 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -878,7 +878,8 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
 				    unsigned long cmd,
 				    void *v)
 {
-	if (cmd == CPU_PM_EXIT) {
+	if (cmd == CPU_PM_EXIT &&
+	    __hyp_get_vectors() == hyp_default_vectors) {
 		cpu_init_hyp_mode(NULL);
 		return NOTIFY_OK;
 	}
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index ddc15539bad2..0d68d4073068 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -220,6 +220,10 @@ after_vfp_restore:
  * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
  * passed in r0 and r1.
  *
+ * A function pointer with a value of 0xffffffff has a special meaning,
+ * and is used to implement __hyp_get_vectors in the same way as in
+ * arch/arm/kernel/hyp_stub.S.
+ *
  * The calling convention follows the standard AAPCS:
  *   r0 - r3: caller save
  *   r12:     caller save
@@ -363,6 +367,11 @@ hyp_hvc:
 host_switch_to_hyp:
 	pop	{r0, r1, r2}
 
+	/* Check for __hyp_get_vectors */
+	cmp	r0, #-1
+	mrceq	p15, 4, r0, c12, c0, 0	@ get HVBAR
+	beq	1f
+
 	push	{lr}
 	mrs	lr, SPSR
 	push	{lr}
@@ -378,7 +387,7 @@ THUMB(	orr	lr, #1)
 	pop	{lr}
 	msr	SPSR_csxf, lr
 	pop	{lr}
-	eret
+1:	eret
 
 guest_trap:
 	load_vcpu			@ Load VCPU pointer to r0
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h
index 52886b89706c..9f12ed1eea86 100644
--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -37,6 +37,11 @@ UNWIND(	.fnstart	)
 	add	r1, r1, r0, lsl #2	@ Get word offset
 	mov	r3, r2, lsl r3		@ create mask
 	smp_dmb
+#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
+	.arch_extension	mp
+	ALT_SMP(W(pldw)	[r1])
+	ALT_UP(W(nop))
+#endif
 1:	ldrex	r2, [r1]
 	ands	r0, r2, r3		@ save old value of bit
 	\instr	r2, r2, r3		@ toggle bit
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
index 805e3f8fb007..3bc8eb811a73 100644
--- a/arch/arm/lib/copy_template.S
+++ b/arch/arm/lib/copy_template.S
@@ -197,24 +197,24 @@
 
 12:	PLD(	pld	[r1, #124]		)
 13:		ldr4w	r1, r4, r5, r6, r7, abort=19f
-		mov	r3, lr, pull #\pull
+		mov	r3, lr, lspull #\pull
 		subs	r2, r2, #32
 		ldr4w	r1, r8, r9, ip, lr, abort=19f
-		orr	r3, r3, r4, push #\push
-		mov	r4, r4, pull #\pull
-		orr	r4, r4, r5, push #\push
-		mov	r5, r5, pull #\pull
-		orr	r5, r5, r6, push #\push
-		mov	r6, r6, pull #\pull
-		orr	r6, r6, r7, push #\push
-		mov	r7, r7, pull #\pull
-		orr	r7, r7, r8, push #\push
-		mov	r8, r8, pull #\pull
-		orr	r8, r8, r9, push #\push
-		mov	r9, r9, pull #\pull
-		orr	r9, r9, ip, push #\push
-		mov	ip, ip, pull #\pull
-		orr	ip, ip, lr, push #\push
+		orr	r3, r3, r4, lspush #\push
+		mov	r4, r4, lspull #\pull
+		orr	r4, r4, r5, lspush #\push
+		mov	r5, r5, lspull #\pull
+		orr	r5, r5, r6, lspush #\push
+		mov	r6, r6, lspull #\pull
+		orr	r6, r6, r7, lspush #\push
+		mov	r7, r7, lspull #\pull
+		orr	r7, r7, r8, lspush #\push
+		mov	r8, r8, lspull #\pull
+		orr	r8, r8, r9, lspush #\push
+		mov	r9, r9, lspull #\pull
+		orr	r9, r9, ip, lspush #\push
+		mov	ip, ip, lspull #\pull
+		orr	ip, ip, lr, lspush #\push
 		str8w	r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
 		bge	12b
 	PLD(	cmn	r2, #96			)
@@ -225,10 +225,10 @@
 14:		ands	ip, r2, #28
 		beq	16f
 
-15:		mov	r3, lr, pull #\pull
+15:		mov	r3, lr, lspull #\pull
 		ldr1w	r1, lr, abort=21f
 		subs	ip, ip, #4
-		orr	r3, r3, lr, push #\push
+		orr	r3, r3, lr, lspush #\push
 		str1w	r0, r3, abort=21f
 		bgt	15b
 	CALGN(	cmp	r2, #0			)
diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S
index d620a5f22a09..d6e742d24007 100644
--- a/arch/arm/lib/csumpartialcopygeneric.S
+++ b/arch/arm/lib/csumpartialcopygeneric.S
@@ -141,7 +141,7 @@ FN_ENTRY
 		tst	len, #2
 		mov	r5, r4, get_byte_0
 		beq	.Lexit
-		adcs	sum, sum, r4, push #16
+		adcs	sum, sum, r4, lspush #16
 		strb	r5, [dst], #1
 		mov	r5, r4, get_byte_1
 		strb	r5, [dst], #1
@@ -171,23 +171,23 @@ FN_ENTRY
 		cmp	ip, #2
 		beq	.Lsrc2_aligned
 		bhi	.Lsrc3_aligned
-		mov	r4, r5, pull #8		@ C = 0
+		mov	r4, r5, lspull #8		@ C = 0
 		bics	ip, len, #15
 		beq	2f
 1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
-		mov	r6, r6, pull #8
-		orr	r6, r6, r7, push #24
-		mov	r7, r7, pull #8
-		orr	r7, r7, r8, push #24
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
+		mov	r6, r6, lspull #8
+		orr	r6, r6, r7, lspush #24
+		mov	r7, r7, lspull #8
+		orr	r7, r7, r8, lspush #24
 		stmia	dst!, {r4, r5, r6, r7}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
 		adcs	sum, sum, r6
 		adcs	sum, sum, r7
-		mov	r4, r8, pull #8
+		mov	r4, r8, lspull #8
 		sub	ip, ip, #16
 		teq	ip, #0
 		bne	1b
@@ -196,50 +196,50 @@ FN_ENTRY
 		tst	ip, #8
 		beq	3f
 		load2l	r5, r6
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
 		stmia	dst!, {r4, r5}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
-		mov	r4, r6, pull #8
+		mov	r4, r6, lspull #8
 		tst	ip, #4
 		beq	4f
 3:		load1l	r5
-		orr	r4, r4, r5, push #24
+		orr	r4, r4, r5, lspush #24
 		str	r4, [dst], #4
 		adcs	sum, sum, r4
-		mov	r4, r5, pull #8
+		mov	r4, r5, lspull #8
 4:		ands	len, len, #3
 		beq	.Ldone
 		mov	r5, r4, get_byte_0
 		tst	len, #2
 		beq	.Lexit
-		adcs	sum, sum, r4, push #16
+		adcs	sum, sum, r4, lspush #16
 		strb	r5, [dst], #1
 		mov	r5, r4, get_byte_1
 		strb	r5, [dst], #1
 		mov	r5, r4, get_byte_2
 		b	.Lexit
 
-.Lsrc2_aligned:	mov	r4, r5, pull #16
+.Lsrc2_aligned:	mov	r4, r5, lspull #16
 		adds	sum, sum, #0
 		bics	ip, len, #15
 		beq	2f
 1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
-		mov	r6, r6, pull #16
-		orr	r6, r6, r7, push #16
-		mov	r7, r7, pull #16
-		orr	r7, r7, r8, push #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
+		mov	r6, r6, lspull #16
+		orr	r6, r6, r7, lspush #16
+		mov	r7, r7, lspull #16
+		orr	r7, r7, r8, lspush #16
 		stmia	dst!, {r4, r5, r6, r7}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
 		adcs	sum, sum, r6
 		adcs	sum, sum, r7
-		mov	r4, r8, pull #16
+		mov	r4, r8, lspull #16
 		sub	ip, ip, #16
 		teq	ip, #0
 		bne	1b
@@ -248,20 +248,20 @@ FN_ENTRY
 		tst	ip, #8
 		beq	3f
 		load2l	r5, r6
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
 		stmia	dst!, {r4, r5}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
-		mov	r4, r6, pull #16
+		mov	r4, r6, lspull #16
 		tst	ip, #4
 		beq	4f
 3:		load1l	r5
-		orr	r4, r4, r5, push #16
+		orr	r4, r4, r5, lspush #16
 		str	r4, [dst], #4
 		adcs	sum, sum, r4
-		mov	r4, r5, pull #16
+		mov	r4, r5, lspull #16
 4:		ands	len, len, #3
 		beq	.Ldone
 		mov	r5, r4, get_byte_0
@@ -276,24 +276,24 @@ FN_ENTRY
 		load1b	r5
 		b	.Lexit
 
-.Lsrc3_aligned:	mov	r4, r5, pull #24
+.Lsrc3_aligned:	mov	r4, r5, lspull #24
 		adds	sum, sum, #0
 		bics	ip, len, #15
 		beq	2f
 1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
-		mov	r6, r6, pull #24
-		orr	r6, r6, r7, push #8
-		mov	r7, r7, pull #24
-		orr	r7, r7, r8, push #8
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
+		mov	r6, r6, lspull #24
+		orr	r6, r6, r7, lspush #8
+		mov	r7, r7, lspull #24
+		orr	r7, r7, r8, lspush #8
 		stmia	dst!, {r4, r5, r6, r7}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
 		adcs	sum, sum, r6
 		adcs	sum, sum, r7
-		mov	r4, r8, pull #24
+		mov	r4, r8, lspull #24
 		sub	ip, ip, #16
 		teq	ip, #0
 		bne	1b
@@ -302,20 +302,20 @@ FN_ENTRY
 		tst	ip, #8
 		beq	3f
 		load2l	r5, r6
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
 		stmia	dst!, {r4, r5}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
-		mov	r4, r6, pull #24
+		mov	r4, r6, lspull #24
 		tst	ip, #4
 		beq	4f
 3:		load1l	r5
-		orr	r4, r4, r5, push #8
+		orr	r4, r4, r5, lspush #8
 		str	r4, [dst], #4
 		adcs	sum, sum, r4
-		mov	r4, r5, pull #24
+		mov	r4, r5, lspull #24
 4:		ands	len, len, #3
 		beq	.Ldone
 		mov	r5, r4, get_byte_0
@@ -326,7 +326,7 @@ FN_ENTRY
 		load1l	r4
 		mov	r5, r4, get_byte_0
 		strb	r5, [dst], #1
-		adcs	sum, sum, r4, push #24
+		adcs	sum, sum, r4, lspush #24
 		mov	r5, r4, get_byte_1
 		b	.Lexit
 FN_EXIT
diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S
index 5fb97e7f9f4b..7a7430950c79 100644
--- a/arch/arm/lib/io-readsl.S
+++ b/arch/arm/lib/io-readsl.S
@@ -47,25 +47,25 @@ ENTRY(__raw_readsl)
 		strb	ip, [r1], #1
 
 4:		subs	r2, r2, #1
-		mov	ip, r3, pull #24
+		mov	ip, r3, lspull #24
 		ldrne	r3, [r0]
-		orrne	ip, ip, r3, push #8
+		orrne	ip, ip, r3, lspush #8
 		strne	ip, [r1], #4
 		bne	4b
 		b	8f
 
 5:		subs	r2, r2, #1
-		mov	ip, r3, pull #16
+		mov	ip, r3, lspull #16
 		ldrne	r3, [r0]
-		orrne	ip, ip, r3, push #16
+		orrne	ip, ip, r3, lspush #16
 		strne	ip, [r1], #4
 		bne	5b
 		b	7f
 
 6:		subs	r2, r2, #1
-		mov	ip, r3, pull #8
+		mov	ip, r3, lspull #8
 		ldrne	r3, [r0]
-		orrne	ip, ip, r3, push #24
+		orrne	ip, ip, r3, lspush #24
 		strne	ip, [r1], #4
 		bne	6b
 
diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S
index 8d3b7813725c..d0d104a0dd11 100644
--- a/arch/arm/lib/io-writesl.S
+++ b/arch/arm/lib/io-writesl.S
@@ -41,26 +41,26 @@ ENTRY(__raw_writesl)
 		blt	5f
 		bgt	6f
 
-4:		mov	ip, r3, pull #16
+4:		mov	ip, r3, lspull #16
 		ldr	r3, [r1], #4
 		subs	r2, r2, #1
-		orr	ip, ip, r3, push #16
+		orr	ip, ip, r3, lspush #16
 		str	ip, [r0]
 		bne	4b
 		mov	pc, lr
 
-5:		mov	ip, r3, pull #8
+5:		mov	ip, r3, lspull #8
 		ldr	r3, [r1], #4
 		subs	r2, r2, #1
-		orr	ip, ip, r3, push #24
+		orr	ip, ip, r3, lspush #24
 		str	ip, [r0]
 		bne	5b
 		mov	pc, lr
 
-6:		mov	ip, r3, pull #24
+6:		mov	ip, r3, lspull #24
 		ldr	r3, [r1], #4
 		subs	r2, r2, #1
-		orr	ip, ip, r3, push #8
+		orr	ip, ip, r3, lspush #8
 		str	ip, [r0]
 		bne	6b
 		mov	pc, lr
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
index 938fc14f962d..d1fc0c0c342c 100644
--- a/arch/arm/lib/memmove.S
+++ b/arch/arm/lib/memmove.S
@@ -147,24 +147,24 @@ ENTRY(memmove)
 
 12:	PLD(	pld	[r1, #-128]		)
 13:		ldmdb   r1!, {r7, r8, r9, ip}
-		mov     lr, r3, push #\push
+		mov     lr, r3, lspush #\push
 		subs    r2, r2, #32
 		ldmdb   r1!, {r3, r4, r5, r6}
-		orr     lr, lr, ip, pull #\pull
-		mov     ip, ip, push #\push
-		orr     ip, ip, r9, pull #\pull
-		mov     r9, r9, push #\push
-		orr     r9, r9, r8, pull #\pull
-		mov     r8, r8, push #\push
-		orr     r8, r8, r7, pull #\pull
-		mov     r7, r7, push #\push
-		orr     r7, r7, r6, pull #\pull
-		mov     r6, r6, push #\push
-		orr     r6, r6, r5, pull #\pull
-		mov     r5, r5, push #\push
-		orr     r5, r5, r4, pull #\pull
-		mov     r4, r4, push #\push
-		orr     r4, r4, r3, pull #\pull
+		orr     lr, lr, ip, lspull #\pull
+		mov     ip, ip, lspush #\push
+		orr     ip, ip, r9, lspull #\pull
+		mov     r9, r9, lspush #\push
+		orr     r9, r9, r8, lspull #\pull
+		mov     r8, r8, lspush #\push
+		orr     r8, r8, r7, lspull #\pull
+		mov     r7, r7, lspush #\push
+		orr     r7, r7, r6, lspull #\pull
+		mov     r6, r6, lspush #\push
+		orr     r6, r6, r5, lspull #\pull
+		mov     r5, r5, lspush #\push
+		orr     r5, r5, r4, lspull #\pull
+		mov     r4, r4, lspush #\push
+		orr     r4, r4, r3, lspull #\pull
 		stmdb   r0!, {r4 - r9, ip, lr}
 		bge	12b
 	PLD(	cmn	r2, #96			)
@@ -175,10 +175,10 @@ ENTRY(memmove)
 14:		ands	ip, r2, #28
 		beq	16f
 
-15:		mov     lr, r3, push #\push
+15:		mov     lr, r3, lspush #\push
 		ldr	r3, [r1, #-4]!
 		subs	ip, ip, #4
-		orr	lr, lr, r3, pull #\pull
+		orr	lr, lr, r3, lspull #\pull
 		str	lr, [r0, #-4]!
 		bgt	15b
 	CALGN(	cmp	r2, #0			)
diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S
index 5c908b1cb8ed..e50520904b76 100644
--- a/arch/arm/lib/uaccess.S
+++ b/arch/arm/lib/uaccess.S
@@ -117,9 +117,9 @@ USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
 .Lc2u_1fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lc2u_1nowords
-		mov	r3, r7, pull #8
+		mov	r3, r7, lspull #8
 		ldr	r7, [r1], #4
-		orr	r3, r3, r7, push #24
+		orr	r3, r3, r7, lspush #24
 USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -131,30 +131,30 @@ USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lc2u_1rem8lp
 
-.Lc2u_1cpy8lp:	mov	r3, r7, pull #8
+.Lc2u_1cpy8lp:	mov	r3, r7, lspull #8
 		ldmia	r1!, {r4 - r7}
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #24
-		mov	r4, r4, pull #8
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
-		mov	r6, r6, pull #8
-		orr	r6, r6, r7, push #24
+		orr	r3, r3, r4, lspush #24
+		mov	r4, r4, lspull #8
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
+		mov	r6, r6, lspull #8
+		orr	r6, r6, r7, lspush #24
 		stmia	r0!, {r3 - r6}			@ Shouldnt fault
 		bpl	.Lc2u_1cpy8lp
 
 .Lc2u_1rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, push #24
-		movne	r4, r4, pull #8
-		orrne	r4, r4, r7, push #24
+		orrne	r3, r3, r4, lspush #24
+		movne	r4, r4, lspull #8
+		orrne	r4, r4, r7, lspush #24
 		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
 		tst	ip, #4
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, push #24
+		orrne	r3, r3, r7, lspush #24
 	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_1fupi
@@ -172,9 +172,9 @@ USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
 .Lc2u_2fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lc2u_2nowords
-		mov	r3, r7, pull #16
+		mov	r3, r7, lspull #16
 		ldr	r7, [r1], #4
-		orr	r3, r3, r7, push #16
+		orr	r3, r3, r7, lspush #16
 USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -186,30 +186,30 @@ USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lc2u_2rem8lp
 
-.Lc2u_2cpy8lp:	mov	r3, r7, pull #16
+.Lc2u_2cpy8lp:	mov	r3, r7, lspull #16
 		ldmia	r1!, {r4 - r7}
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #16
-		mov	r4, r4, pull #16
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
-		mov	r6, r6, pull #16
-		orr	r6, r6, r7, push #16
+		orr	r3, r3, r4, lspush #16
+		mov	r4, r4, lspull #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
+		mov	r6, r6, lspull #16
+		orr	r6, r6, r7, lspush #16
 		stmia	r0!, {r3 - r6}			@ Shouldnt fault
 		bpl	.Lc2u_2cpy8lp
 
 .Lc2u_2rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, push #16
-		movne	r4, r4, pull #16
-		orrne	r4, r4, r7, push #16
+		orrne	r3, r3, r4, lspush #16
+		movne	r4, r4, lspull #16
+		orrne	r4, r4, r7, lspush #16
 		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
 		tst	ip, #4
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, push #16
+		orrne	r3, r3, r7, lspush #16
 	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_2fupi
@@ -227,9 +227,9 @@ USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
 .Lc2u_3fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lc2u_3nowords
-		mov	r3, r7, pull #24
+		mov	r3, r7, lspull #24
 		ldr	r7, [r1], #4
-		orr	r3, r3, r7, push #8
+		orr	r3, r3, r7, lspush #8
 USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -241,30 +241,30 @@ USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lc2u_3rem8lp
 
-.Lc2u_3cpy8lp:	mov	r3, r7, pull #24
+.Lc2u_3cpy8lp:	mov	r3, r7, lspull #24
 		ldmia	r1!, {r4 - r7}
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #8
-		mov	r4, r4, pull #24
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
-		mov	r6, r6, pull #24
-		orr	r6, r6, r7, push #8
+		orr	r3, r3, r4, lspush #8
+		mov	r4, r4, lspull #24
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
+		mov	r6, r6, lspull #24
+		orr	r6, r6, r7, lspush #8
 		stmia	r0!, {r3 - r6}			@ Shouldnt fault
 		bpl	.Lc2u_3cpy8lp
 
 .Lc2u_3rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, push #8
-		movne	r4, r4, pull #24
-		orrne	r4, r4, r7, push #8
+		orrne	r3, r3, r4, lspush #8
+		movne	r4, r4, lspull #24
+		orrne	r4, r4, r7, lspush #8
 		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
 		tst	ip, #4
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, push #8
+		orrne	r3, r3, r7, lspush #8
 	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_3fupi
@@ -382,9 +382,9 @@ USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
 .Lcfu_1fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lcfu_1nowords
-		mov	r3, r7, pull #8
+		mov	r3, r7, lspull #8
 USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, push #24
+		orr	r3, r3, r7, lspush #24
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -396,30 +396,30 @@ USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lcfu_1rem8lp
 
-.Lcfu_1cpy8lp:	mov	r3, r7, pull #8
+.Lcfu_1cpy8lp:	mov	r3, r7, lspull #8
 		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #24
-		mov	r4, r4, pull #8
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
-		mov	r6, r6, pull #8
-		orr	r6, r6, r7, push #24
+		orr	r3, r3, r4, lspush #24
+		mov	r4, r4, lspull #8
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
+		mov	r6, r6, lspull #8
+		orr	r6, r6, r7, lspush #24
 		stmia	r0!, {r3 - r6}
 		bpl	.Lcfu_1cpy8lp
 
 .Lcfu_1rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, push #24
-		movne	r4, r4, pull #8
-		orrne	r4, r4, r7, push #24
+		orrne	r3, r3, r4, lspush #24
+		movne	r4, r4, lspull #8
+		orrne	r4, r4, r7, lspush #24
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, push #24
+		orrne	r3, r3, r7, lspush #24
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
 		beq	.Lcfu_1fupi
@@ -437,9 +437,9 @@ USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
 .Lcfu_2fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lcfu_2nowords
-		mov	r3, r7, pull #16
+		mov	r3, r7, lspull #16
 USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, push #16
+		orr	r3, r3, r7, lspush #16
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -452,30 +452,30 @@ USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
 		blt	.Lcfu_2rem8lp
 
 
-.Lcfu_2cpy8lp:	mov	r3, r7, pull #16
+.Lcfu_2cpy8lp:	mov	r3, r7, lspull #16
 		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #16
-		mov	r4, r4, pull #16
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
-		mov	r6, r6, pull #16
-		orr	r6, r6, r7, push #16
+		orr	r3, r3, r4, lspush #16
+		mov	r4, r4, lspull #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
+		mov	r6, r6, lspull #16
+		orr	r6, r6, r7, lspush #16
 		stmia	r0!, {r3 - r6}
 		bpl	.Lcfu_2cpy8lp
 
 .Lcfu_2rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, push #16
-		movne	r4, r4, pull #16
-		orrne	r4, r4, r7, push #16
+		orrne	r3, r3, r4, lspush #16
+		movne	r4, r4, lspull #16
+		orrne	r4, r4, r7, lspush #16
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, push #16
+		orrne	r3, r3, r7, lspush #16
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
 		beq	.Lcfu_2fupi
@@ -493,9 +493,9 @@ USER(	TUSER(	ldrgtb) r3, [r1], #0)			@ May fault
 .Lcfu_3fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lcfu_3nowords
-		mov	r3, r7, pull #24
+		mov	r3, r7, lspull #24
 USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, push #8
+		orr	r3, r3, r7, lspush #8
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -507,30 +507,30 @@ USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lcfu_3rem8lp
 
-.Lcfu_3cpy8lp:	mov	r3, r7, pull #24
+.Lcfu_3cpy8lp:	mov	r3, r7, lspull #24
 		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
-		orr	r3, r3, r4, push #8
-		mov	r4, r4, pull #24
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
-		mov	r6, r6, pull #24
-		orr	r6, r6, r7, push #8
+		orr	r3, r3, r4, lspush #8
+		mov	r4, r4, lspull #24
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
+		mov	r6, r6, lspull #24
+		orr	r6, r6, r7, lspush #8
 		stmia	r0!, {r3 - r6}
 		subs	ip, ip, #16
 		bpl	.Lcfu_3cpy8lp
 
 .Lcfu_3rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, push #8
-		movne	r4, r4, pull #24
-		orrne	r4, r4, r7, push #8
+		orrne	r3, r3, r4, lspush #8
+		movne	r4, r4, lspull #24
+		orrne	r4, r4, r7, lspush #8
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, push #8
+		orrne	r3, r3, r7, lspush #8
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
 		beq	.Lcfu_3fupi
diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c
index e38b279f402c..384dc859e6c6 100644
--- a/arch/arm/mach-cns3xxx/core.c
+++ b/arch/arm/mach-cns3xxx/core.c
@@ -155,7 +155,7 @@ static irqreturn_t cns3xxx_timer_interrupt(int irq, void *dev_id)
 
 static struct irqaction cns3xxx_timer_irq = {
 	.name		= "timer",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.handler	= cns3xxx_timer_interrupt,
 };
 
diff --git a/arch/arm/mach-ebsa110/core.c b/arch/arm/mach-ebsa110/core.c
index 68ac934d4565..8254e716b095 100644
--- a/arch/arm/mach-ebsa110/core.c
+++ b/arch/arm/mach-ebsa110/core.c
@@ -206,7 +206,7 @@ ebsa110_timer_interrupt(int irq, void *dev_id)
 
 static struct irqaction ebsa110_timer_irq = {
 	.name		= "EBSA110 Timer Tick",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.handler	= ebsa110_timer_interrupt,
 };
 
diff --git a/arch/arm/mach-footbridge/dc21285-timer.c b/arch/arm/mach-footbridge/dc21285-timer.c
index 3971104d32d4..bf7aa7d298e7 100644
--- a/arch/arm/mach-footbridge/dc21285-timer.c
+++ b/arch/arm/mach-footbridge/dc21285-timer.c
@@ -105,7 +105,7 @@ static irqreturn_t timer1_interrupt(int irq, void *dev_id)
 static struct irqaction footbridge_timer_irq = {
 	.name		= "dc21285_timer1",
 	.handler	= timer1_interrupt,
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.dev_id		= &ckevt_dc21285,
 };
 
@@ -125,7 +125,7 @@ void __init footbridge_timer_init(void)
 	clockevents_config_and_register(ce, rate, 0x4, 0xffffff);
 }
 
-static u32 notrace footbridge_read_sched_clock(void)
+static u64 notrace footbridge_read_sched_clock(void)
 {
 	return ~*CSR_TIMER3_VALUE;
 }
@@ -138,5 +138,5 @@ void __init footbridge_sched_clock(void)
 	*CSR_TIMER3_CLR = 0;
 	*CSR_TIMER3_CNTL = TIMER_CNTL_ENABLE | TIMER_CNTL_DIV16;
 
-	setup_sched_clock(footbridge_read_sched_clock, 24, rate);
+	sched_clock_register(footbridge_read_sched_clock, 24, rate);
 }
diff --git a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c
index 7c2fdae9a38b..96a3d73ef4bf 100644
--- a/arch/arm/mach-footbridge/dc21285.c
+++ b/arch/arm/mach-footbridge/dc21285.c
@@ -334,15 +334,15 @@ void __init dc21285_preinit(void)
 	/*
 	 * We don't care if these fail.
 	 */
-	dc21285_request_irq(IRQ_PCI_SERR, dc21285_serr_irq, IRQF_DISABLED,
+	dc21285_request_irq(IRQ_PCI_SERR, dc21285_serr_irq, 0,
 			    "PCI system error", &serr_timer);
-	dc21285_request_irq(IRQ_PCI_PERR, dc21285_parity_irq, IRQF_DISABLED,
+	dc21285_request_irq(IRQ_PCI_PERR, dc21285_parity_irq, 0,
 			    "PCI parity error", &perr_timer);
-	dc21285_request_irq(IRQ_PCI_ABORT, dc21285_abort_irq, IRQF_DISABLED,
+	dc21285_request_irq(IRQ_PCI_ABORT, dc21285_abort_irq, 0,
 			    "PCI abort", NULL);
-	dc21285_request_irq(IRQ_DISCARD_TIMER, dc21285_discard_irq, IRQF_DISABLED,
+	dc21285_request_irq(IRQ_DISCARD_TIMER, dc21285_discard_irq, 0,
 			    "Discard timer", NULL);
-	dc21285_request_irq(IRQ_PCI_DPERR, dc21285_dparity_irq, IRQF_DISABLED,
+	dc21285_request_irq(IRQ_PCI_DPERR, dc21285_dparity_irq, 0,
 			    "PCI data parity", NULL);
 
 	if (cfn_mode) {
diff --git a/arch/arm/mach-footbridge/isa-timer.c b/arch/arm/mach-footbridge/isa-timer.c
index d9301dd56354..b73f52e196b9 100644
--- a/arch/arm/mach-footbridge/isa-timer.c
+++ b/arch/arm/mach-footbridge/isa-timer.c
@@ -27,7 +27,7 @@ static irqreturn_t pit_timer_interrupt(int irq, void *dev_id)
 static struct irqaction pit_timer_irq = {
 	.name		= "pit",
 	.handler	= pit_timer_interrupt,
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.dev_id		= &i8253_clockevent,
 };
 
diff --git a/arch/arm/mach-gemini/idle.c b/arch/arm/mach-gemini/idle.c
index 87dff4f5059e..ddf8ec9d203b 100644
--- a/arch/arm/mach-gemini/idle.c
+++ b/arch/arm/mach-gemini/idle.c
@@ -3,7 +3,7 @@
  */
 
 #include <linux/init.h>
-#include <asm/system.h>
+#include <asm/system_misc.h>
 #include <asm/proc-fns.h>
 
 static void gemini_idle(void)
diff --git a/arch/arm/mach-hisi/Kconfig b/arch/arm/mach-hisi/Kconfig
index 8f4649b301b2..1abae5f6a418 100644
--- a/arch/arm/mach-hisi/Kconfig
+++ b/arch/arm/mach-hisi/Kconfig
@@ -8,7 +8,7 @@ config ARCH_HI3xxx
 	select CLKSRC_OF
 	select GENERIC_CLOCKEVENTS
 	select HAVE_ARM_SCU
-	select HAVE_ARM_TWD
+	select HAVE_ARM_TWD if SMP
 	select HAVE_SMP
 	select PINCTRL
 	select PINCTRL_SINGLE
diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile
index befcaf5d0574..ec419649320f 100644
--- a/arch/arm/mach-imx/Makefile
+++ b/arch/arm/mach-imx/Makefile
@@ -101,11 +101,9 @@ obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o
 obj-$(CONFIG_SOC_IMX6Q) += clk-imx6q.o mach-imx6q.o
 obj-$(CONFIG_SOC_IMX6SL) += clk-imx6sl.o mach-imx6sl.o
 
-ifeq ($(CONFIG_PM),y)
 obj-$(CONFIG_SOC_IMX6Q) += pm-imx6q.o headsmp.o
 # i.MX6SL reuses i.MX6Q code
 obj-$(CONFIG_SOC_IMX6SL) += pm-imx6q.o headsmp.o
-endif
 
 # i.MX5 based machines
 obj-$(CONFIG_MACH_MX51_BABBAGE) += mach-mx51_babbage.o
diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c
index af2e582d2b74..4d677f442539 100644
--- a/arch/arm/mach-imx/clk-imx6q.c
+++ b/arch/arm/mach-imx/clk-imx6q.c
@@ -482,6 +482,9 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node)
 	if (IS_ENABLED(CONFIG_PCI_IMX6))
 		clk_set_parent(clk[lvds1_sel], clk[sata_ref]);
 
+	/* Set initial power mode */
+	imx6q_set_lpm(WAIT_CLOCKED);
+
 	np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-gpt");
 	base = of_iomap(np, 0);
 	WARN_ON(!base);
diff --git a/arch/arm/mach-imx/clk-imx6sl.c b/arch/arm/mach-imx/clk-imx6sl.c
index 3781a1853998..4c86f3035205 100644
--- a/arch/arm/mach-imx/clk-imx6sl.c
+++ b/arch/arm/mach-imx/clk-imx6sl.c
@@ -266,6 +266,9 @@ static void __init imx6sl_clocks_init(struct device_node *ccm_node)
 	/* Audio-related clocks configuration */
 	clk_set_parent(clks[IMX6SL_CLK_SPDIF0_SEL], clks[IMX6SL_CLK_PLL3_PFD3]);
 
+	/* Set initial power mode */
+	imx6q_set_lpm(WAIT_CLOCKED);
+
 	np = of_find_compatible_node(NULL, NULL, "fsl,imx6sl-gpt");
 	base = of_iomap(np, 0);
 	WARN_ON(!base);
diff --git a/arch/arm/mach-imx/common.h b/arch/arm/mach-imx/common.h
index 59c3b9b26bb4..baf439dc22d8 100644
--- a/arch/arm/mach-imx/common.h
+++ b/arch/arm/mach-imx/common.h
@@ -144,13 +144,11 @@ void imx6q_set_chicken_bit(void);
 void imx_cpu_die(unsigned int cpu);
 int imx_cpu_kill(unsigned int cpu);
 
-#ifdef CONFIG_PM
 void imx6q_pm_init(void);
 void imx6q_pm_set_ccm_base(void __iomem *base);
+#ifdef CONFIG_PM
 void imx5_pm_init(void);
 #else
-static inline void imx6q_pm_init(void) {}
-static inline void imx6q_pm_set_ccm_base(void __iomem *base) {}
 static inline void imx5_pm_init(void) {}
 #endif
 
diff --git a/arch/arm/mach-imx/pm-imx6q.c b/arch/arm/mach-imx/pm-imx6q.c
index 9d47adc078aa..7a9b98589db7 100644
--- a/arch/arm/mach-imx/pm-imx6q.c
+++ b/arch/arm/mach-imx/pm-imx6q.c
@@ -236,8 +236,6 @@ void __init imx6q_pm_init(void)
 		regmap_update_bits(gpr, IOMUXC_GPR1, IMX6Q_GPR1_GINT,
 				   IMX6Q_GPR1_GINT);
 
-	/* Set initial power mode */
-	imx6q_set_lpm(WAIT_CLOCKED);
 
 	suspend_set_ops(&imx6q_pm_ops);
 }
diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c
index 17c0fe627435..e4f27f0e56ac 100644
--- a/arch/arm/mach-integrator/integrator_ap.c
+++ b/arch/arm/mach-integrator/integrator_ap.c
@@ -358,7 +358,7 @@ static struct clock_event_device integrator_clockevent = {
 
 static struct irqaction integrator_timer_irq = {
 	.name		= "timer",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.handler	= integrator_timer_interrupt,
 	.dev_id		= &integrator_clockevent,
 };
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 6d68aed6548a..a465f27bc263 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -312,7 +312,7 @@ static irqreturn_t ixp4xx_timer_interrupt(int irq, void *dev_id)
 
 static struct irqaction ixp4xx_timer_irq = {
 	.name		= "timer1",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.handler	= ixp4xx_timer_interrupt,
 	.dev_id		= &clockevent_ixp4xx,
 };
diff --git a/arch/arm/mach-ixp4xx/dsmg600-setup.c b/arch/arm/mach-ixp4xx/dsmg600-setup.c
index 736dc692d540..43ee06d3abe5 100644
--- a/arch/arm/mach-ixp4xx/dsmg600-setup.c
+++ b/arch/arm/mach-ixp4xx/dsmg600-setup.c
@@ -233,8 +233,7 @@ static int __init dsmg600_gpio_init(void)
 
 	gpio_request(DSMG600_RB_GPIO, "reset button");
 	if (request_irq(gpio_to_irq(DSMG600_RB_GPIO), &dsmg600_reset_handler,
-		IRQF_DISABLED | IRQF_TRIGGER_LOW,
-		"DSM-G600 reset button", NULL) < 0) {
+		IRQF_TRIGGER_LOW, "DSM-G600 reset button", NULL) < 0) {
 
 		printk(KERN_DEBUG "Reset Button IRQ %d not available\n",
 			gpio_to_irq(DSMG600_RB_GPIO));
diff --git a/arch/arm/mach-ixp4xx/fsg-setup.c b/arch/arm/mach-ixp4xx/fsg-setup.c
index 429966b756ed..5c4b0c4a1b37 100644
--- a/arch/arm/mach-ixp4xx/fsg-setup.c
+++ b/arch/arm/mach-ixp4xx/fsg-setup.c
@@ -208,16 +208,14 @@ static void __init fsg_init(void)
 	platform_add_devices(fsg_devices, ARRAY_SIZE(fsg_devices));
 
 	if (request_irq(gpio_to_irq(FSG_RB_GPIO), &fsg_reset_handler,
-			IRQF_DISABLED | IRQF_TRIGGER_LOW,
-			"FSG reset button", NULL) < 0) {
+			IRQF_TRIGGER_LOW, "FSG reset button", NULL) < 0) {
 
 		printk(KERN_DEBUG "Reset Button IRQ %d not available\n",
 			gpio_to_irq(FSG_RB_GPIO));
 	}
 
 	if (request_irq(gpio_to_irq(FSG_SB_GPIO), &fsg_power_handler,
-			IRQF_DISABLED | IRQF_TRIGGER_LOW,
-			"FSG power button", NULL) < 0) {
+			IRQF_TRIGGER_LOW, "FSG power button", NULL) < 0) {
 
 		printk(KERN_DEBUG "Power Button IRQ %d not available\n",
 			gpio_to_irq(FSG_SB_GPIO));
diff --git a/arch/arm/mach-ixp4xx/nas100d-setup.c b/arch/arm/mach-ixp4xx/nas100d-setup.c
index 507cb5233537..4e0f762bc651 100644
--- a/arch/arm/mach-ixp4xx/nas100d-setup.c
+++ b/arch/arm/mach-ixp4xx/nas100d-setup.c
@@ -295,8 +295,7 @@ static void __init nas100d_init(void)
 	pm_power_off = nas100d_power_off;
 
 	if (request_irq(gpio_to_irq(NAS100D_RB_GPIO), &nas100d_reset_handler,
-		IRQF_DISABLED | IRQF_TRIGGER_LOW,
-		"NAS100D reset button", NULL) < 0) {
+		IRQF_TRIGGER_LOW, "NAS100D reset button", NULL) < 0) {
 
 		printk(KERN_DEBUG "Reset Button IRQ %d not available\n",
 			gpio_to_irq(NAS100D_RB_GPIO));
diff --git a/arch/arm/mach-ixp4xx/nslu2-setup.c b/arch/arm/mach-ixp4xx/nslu2-setup.c
index ba5f1cda2a9d..88c025f52d8d 100644
--- a/arch/arm/mach-ixp4xx/nslu2-setup.c
+++ b/arch/arm/mach-ixp4xx/nslu2-setup.c
@@ -265,16 +265,14 @@ static void __init nslu2_init(void)
 	pm_power_off = nslu2_power_off;
 
 	if (request_irq(gpio_to_irq(NSLU2_RB_GPIO), &nslu2_reset_handler,
-		IRQF_DISABLED | IRQF_TRIGGER_LOW,
-		"NSLU2 reset button", NULL) < 0) {
+		IRQF_TRIGGER_LOW, "NSLU2 reset button", NULL) < 0) {
 
 		printk(KERN_DEBUG "Reset Button IRQ %d not available\n",
 			gpio_to_irq(NSLU2_RB_GPIO));
 	}
 
 	if (request_irq(gpio_to_irq(NSLU2_PB_GPIO), &nslu2_power_handler,
-		IRQF_DISABLED | IRQF_TRIGGER_HIGH,
-		"NSLU2 power button", NULL) < 0) {
+		IRQF_TRIGGER_HIGH, "NSLU2 power button", NULL) < 0) {
 
 		printk(KERN_DEBUG "Power Button IRQ %d not available\n",
 			gpio_to_irq(NSLU2_PB_GPIO));
diff --git a/arch/arm/mach-ks8695/time.c b/arch/arm/mach-ks8695/time.c
index 426c97662f5b..a197874bf382 100644
--- a/arch/arm/mach-ks8695/time.c
+++ b/arch/arm/mach-ks8695/time.c
@@ -122,7 +122,7 @@ static irqreturn_t ks8695_timer_interrupt(int irq, void *dev_id)
 
 static struct irqaction ks8695_timer_irq = {
 	.name		= "ks8695_tick",
-	.flags		= IRQF_DISABLED | IRQF_TIMER,
+	.flags		= IRQF_TIMER,
 	.handler	= ks8695_timer_interrupt,
 };
 
diff --git a/arch/arm/mach-lpc32xx/timer.c b/arch/arm/mach-lpc32xx/timer.c
index 20eab63d10ba..4e5837299c04 100644
--- a/arch/arm/mach-lpc32xx/timer.c
+++ b/arch/arm/mach-lpc32xx/timer.c
@@ -90,7 +90,7 @@ static irqreturn_t lpc32xx_timer_interrupt(int irq, void *dev_id)
 
 static struct irqaction lpc32xx_timer_irq = {
 	.name		= "LPC32XX Timer Tick",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.handler	= lpc32xx_timer_interrupt,
 };
 
diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c
index 024022d91fe3..bbcd2322fd27 100644
--- a/arch/arm/mach-mmp/time.c
+++ b/arch/arm/mach-mmp/time.c
@@ -186,7 +186,7 @@ static void __init timer_config(void)
 
 static struct irqaction timer_irq = {
 	.name		= "timer",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.handler	= timer_interrupt,
 	.dev_id		= &ckevt,
 };
diff --git a/arch/arm/mach-moxart/Kconfig b/arch/arm/mach-moxart/Kconfig
index ba470d64493b..3795ae28a613 100644
--- a/arch/arm/mach-moxart/Kconfig
+++ b/arch/arm/mach-moxart/Kconfig
@@ -2,7 +2,6 @@ config ARCH_MOXART
 	bool "MOXA ART SoC" if ARCH_MULTI_V4T
 	select CPU_FA526
 	select ARM_DMA_MEM_BUFFERABLE
-	select DMA_OF
 	select USE_OF
 	select CLKSRC_OF
 	select CLKSRC_MMIO
diff --git a/arch/arm/mach-netx/time.c b/arch/arm/mach-netx/time.c
index 6df42e643031..3177c7a40930 100644
--- a/arch/arm/mach-netx/time.c
+++ b/arch/arm/mach-netx/time.c
@@ -99,7 +99,7 @@ netx_timer_interrupt(int irq, void *dev_id)
 
 static struct irqaction netx_timer_irq = {
 	.name		= "NetX Timer Tick",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.handler	= netx_timer_interrupt,
 };
 
diff --git a/arch/arm/mach-omap1/board-nokia770.c b/arch/arm/mach-omap1/board-nokia770.c
index 91449c5cb70f..85089d821982 100644
--- a/arch/arm/mach-omap1/board-nokia770.c
+++ b/arch/arm/mach-omap1/board-nokia770.c
@@ -156,6 +156,7 @@ static struct omap_usb_config nokia770_usb_config __initdata = {
 	.register_dev	= 1,
 	.hmc_mode	= 16,
 	.pins[0]	= 6,
+	.extcon		= "tahvo-usb",
 };
 
 #if defined(CONFIG_MMC_OMAP) || defined(CONFIG_MMC_OMAP_MODULE)
diff --git a/arch/arm/mach-omap1/dma.c b/arch/arm/mach-omap1/dma.c
index 5bb8ce86d54b..4be601b638d7 100644
--- a/arch/arm/mach-omap1/dma.c
+++ b/arch/arm/mach-omap1/dma.c
@@ -32,55 +32,51 @@
 
 #define OMAP1_DMA_BASE			(0xfffed800)
 #define OMAP1_LOGICAL_DMA_CH_COUNT	17
-#define OMAP1_DMA_STRIDE		0x40
 
-static u32 errata;
 static u32 enable_1510_mode;
-static u8 dma_stride;
-static enum omap_reg_offsets dma_common_ch_start, dma_common_ch_end;
-
-static u16 reg_map[] = {
-	[GCR]		= 0x400,
-	[GSCR]		= 0x404,
-	[GRST1]		= 0x408,
-	[HW_ID]		= 0x442,
-	[PCH2_ID]	= 0x444,
-	[PCH0_ID]	= 0x446,
-	[PCH1_ID]	= 0x448,
-	[PCHG_ID]	= 0x44a,
-	[PCHD_ID]	= 0x44c,
-	[CAPS_0]	= 0x44e,
-	[CAPS_1]	= 0x452,
-	[CAPS_2]	= 0x456,
-	[CAPS_3]	= 0x458,
-	[CAPS_4]	= 0x45a,
-	[PCH2_SR]	= 0x460,
-	[PCH0_SR]	= 0x480,
-	[PCH1_SR]	= 0x482,
-	[PCHD_SR]	= 0x4c0,
+
+static const struct omap_dma_reg reg_map[] = {
+	[GCR]		= { 0x0400, 0x00, OMAP_DMA_REG_16BIT },
+	[GSCR]		= { 0x0404, 0x00, OMAP_DMA_REG_16BIT },
+	[GRST1]		= { 0x0408, 0x00, OMAP_DMA_REG_16BIT },
+	[HW_ID]		= { 0x0442, 0x00, OMAP_DMA_REG_16BIT },
+	[PCH2_ID]	= { 0x0444, 0x00, OMAP_DMA_REG_16BIT },
+	[PCH0_ID]	= { 0x0446, 0x00, OMAP_DMA_REG_16BIT },
+	[PCH1_ID]	= { 0x0448, 0x00, OMAP_DMA_REG_16BIT },
+	[PCHG_ID]	= { 0x044a, 0x00, OMAP_DMA_REG_16BIT },
+	[PCHD_ID]	= { 0x044c, 0x00, OMAP_DMA_REG_16BIT },
+	[CAPS_0]	= { 0x044e, 0x00, OMAP_DMA_REG_2X16BIT },
+	[CAPS_1]	= { 0x0452, 0x00, OMAP_DMA_REG_2X16BIT },
+	[CAPS_2]	= { 0x0456, 0x00, OMAP_DMA_REG_16BIT },
+	[CAPS_3]	= { 0x0458, 0x00, OMAP_DMA_REG_16BIT },
+	[CAPS_4]	= { 0x045a, 0x00, OMAP_DMA_REG_16BIT },
+	[PCH2_SR]	= { 0x0460, 0x00, OMAP_DMA_REG_16BIT },
+	[PCH0_SR]	= { 0x0480, 0x00, OMAP_DMA_REG_16BIT },
+	[PCH1_SR]	= { 0x0482, 0x00, OMAP_DMA_REG_16BIT },
+	[PCHD_SR]	= { 0x04c0, 0x00, OMAP_DMA_REG_16BIT },
 
 	/* Common Registers */
-	[CSDP]		= 0x00,
-	[CCR]		= 0x02,
-	[CICR]		= 0x04,
-	[CSR]		= 0x06,
-	[CEN]		= 0x10,
-	[CFN]		= 0x12,
-	[CSFI]		= 0x14,
-	[CSEI]		= 0x16,
-	[CPC]		= 0x18,	/* 15xx only */
-	[CSAC]		= 0x18,
-	[CDAC]		= 0x1a,
-	[CDEI]		= 0x1c,
-	[CDFI]		= 0x1e,
-	[CLNK_CTRL]	= 0x28,
+	[CSDP]		= { 0x0000, 0x40, OMAP_DMA_REG_16BIT },
+	[CCR]		= { 0x0002, 0x40, OMAP_DMA_REG_16BIT },
+	[CICR]		= { 0x0004, 0x40, OMAP_DMA_REG_16BIT },
+	[CSR]		= { 0x0006, 0x40, OMAP_DMA_REG_16BIT },
+	[CEN]		= { 0x0010, 0x40, OMAP_DMA_REG_16BIT },
+	[CFN]		= { 0x0012, 0x40, OMAP_DMA_REG_16BIT },
+	[CSFI]		= { 0x0014, 0x40, OMAP_DMA_REG_16BIT },
+	[CSEI]		= { 0x0016, 0x40, OMAP_DMA_REG_16BIT },
+	[CPC]		= { 0x0018, 0x40, OMAP_DMA_REG_16BIT },	/* 15xx only */
+	[CSAC]		= { 0x0018, 0x40, OMAP_DMA_REG_16BIT },
+	[CDAC]		= { 0x001a, 0x40, OMAP_DMA_REG_16BIT },
+	[CDEI]		= { 0x001c, 0x40, OMAP_DMA_REG_16BIT },
+	[CDFI]		= { 0x001e, 0x40, OMAP_DMA_REG_16BIT },
+	[CLNK_CTRL]	= { 0x0028, 0x40, OMAP_DMA_REG_16BIT },
 
 	/* Channel specific register offsets */
-	[CSSA]		= 0x08,
-	[CDSA]		= 0x0c,
-	[COLOR]		= 0x20,
-	[CCR2]		= 0x24,
-	[LCH_CTRL]	= 0x2a,
+	[CSSA]		= { 0x0008, 0x40, OMAP_DMA_REG_2X16BIT },
+	[CDSA]		= { 0x000c, 0x40, OMAP_DMA_REG_2X16BIT },
+	[COLOR]		= { 0x0020, 0x40, OMAP_DMA_REG_2X16BIT },
+	[CCR2]		= { 0x0024, 0x40, OMAP_DMA_REG_16BIT },
+	[LCH_CTRL]	= { 0x002a, 0x40, OMAP_DMA_REG_16BIT },
 };
 
 static struct resource res[] __initdata = {
@@ -181,44 +177,36 @@ static struct resource res[] __initdata = {
 static void __iomem *dma_base;
 static inline void dma_write(u32 val, int reg, int lch)
 {
-	u8  stride;
-	u32 offset;
+	void __iomem *addr = dma_base;
 
-	stride = (reg >= dma_common_ch_start) ? dma_stride : 0;
-	offset = reg_map[reg] + (stride * lch);
+	addr += reg_map[reg].offset;
+	addr += reg_map[reg].stride * lch;
 
-	__raw_writew(val, dma_base + offset);
-	if ((reg > CLNK_CTRL && reg < CCEN) ||
-			(reg > PCHD_ID && reg < CAPS_2)) {
-		u32 offset2 = reg_map[reg] + 2 + (stride * lch);
-		__raw_writew(val >> 16, dma_base + offset2);
-	}
+	__raw_writew(val, addr);
+	if (reg_map[reg].type == OMAP_DMA_REG_2X16BIT)
+		__raw_writew(val >> 16, addr + 2);
 }
 
 static inline u32 dma_read(int reg, int lch)
 {
-	u8 stride;
-	u32 offset, val;
-
-	stride = (reg >= dma_common_ch_start) ? dma_stride : 0;
-	offset = reg_map[reg] + (stride * lch);
-
-	val = __raw_readw(dma_base + offset);
-	if ((reg > CLNK_CTRL && reg < CCEN) ||
-			(reg > PCHD_ID && reg < CAPS_2)) {
-		u16 upper;
-		u32 offset2 = reg_map[reg] + 2 + (stride * lch);
-		upper = __raw_readw(dma_base + offset2);
-		val |= (upper << 16);
-	}
+	void __iomem *addr = dma_base;
+	uint32_t val;
+
+	addr += reg_map[reg].offset;
+	addr += reg_map[reg].stride * lch;
+
+	val = __raw_readw(addr);
+	if (reg_map[reg].type == OMAP_DMA_REG_2X16BIT)
+		val |= __raw_readw(addr + 2) << 16;
+
 	return val;
 }
 
 static void omap1_clear_lch_regs(int lch)
 {
-	int i = dma_common_ch_start;
+	int i;
 
-	for (; i <= dma_common_ch_end; i += 1)
+	for (i = CPC; i <= COLOR; i += 1)
 		dma_write(0, i, lch);
 }
 
@@ -255,8 +243,9 @@ static void omap1_show_dma_caps(void)
 	return;
 }
 
-static u32 configure_dma_errata(void)
+static unsigned configure_dma_errata(void)
 {
+	unsigned errata = 0;
 
 	/*
 	 * Erratum 3.2/3.3: sometimes 0 is returned if CSAC/CDAC is
@@ -272,11 +261,23 @@ static const struct platform_device_info omap_dma_dev_info = {
 	.name = "omap-dma-engine",
 	.id = -1,
 	.dma_mask = DMA_BIT_MASK(32),
+	.res = res,
+	.num_res = 1,
+};
+
+static struct omap_system_dma_plat_info dma_plat_info __initdata = {
+	.reg_map	= reg_map,
+	.channel_stride	= 0x40,
+	.show_dma_caps	= omap1_show_dma_caps,
+	.clear_lch_regs	= omap1_clear_lch_regs,
+	.clear_dma	= omap1_clear_dma,
+	.dma_write	= dma_write,
+	.dma_read	= dma_read,
 };
 
 static int __init omap1_system_dma_init(void)
 {
-	struct omap_system_dma_plat_info	*p;
+	struct omap_system_dma_plat_info	p;
 	struct omap_dma_dev_attr		*d;
 	struct platform_device			*pdev, *dma_pdev;
 	int ret;
@@ -302,20 +303,12 @@ static int __init omap1_system_dma_init(void)
 		goto exit_iounmap;
 	}
 
-	p = kzalloc(sizeof(struct omap_system_dma_plat_info), GFP_KERNEL);
-	if (!p) {
-		dev_err(&pdev->dev, "%s: Unable to allocate 'p' for %s\n",
-			__func__, pdev->name);
-		ret = -ENOMEM;
-		goto exit_iounmap;
-	}
-
 	d = kzalloc(sizeof(struct omap_dma_dev_attr), GFP_KERNEL);
 	if (!d) {
 		dev_err(&pdev->dev, "%s: Unable to allocate 'd' for %s\n",
 			__func__, pdev->name);
 		ret = -ENOMEM;
-		goto exit_release_p;
+		goto exit_iounmap;
 	}
 
 	d->lch_count		= OMAP1_LOGICAL_DMA_CH_COUNT;
@@ -336,17 +329,6 @@ static int __init omap1_system_dma_init(void)
 	d->dev_caps		|= CLEAR_CSR_ON_READ;
 	d->dev_caps		|= IS_WORD_16;
 
-
-	d->chan = kzalloc(sizeof(struct omap_dma_lch) *
-					(d->lch_count), GFP_KERNEL);
-	if (!d->chan) {
-		dev_err(&pdev->dev,
-			"%s: Memory allocation failed for d->chan!\n",
-			__func__);
-		ret = -ENOMEM;
-		goto exit_release_d;
-	}
-
 	if (cpu_is_omap15xx())
 		d->chan_count = 9;
 	else if (cpu_is_omap16xx() || cpu_is_omap7xx()) {
@@ -356,35 +338,24 @@ static int __init omap1_system_dma_init(void)
 			d->chan_count = 9;
 	}
 
-	p->dma_attr = d;
-
-	p->show_dma_caps	= omap1_show_dma_caps;
-	p->clear_lch_regs	= omap1_clear_lch_regs;
-	p->clear_dma		= omap1_clear_dma;
-	p->dma_write		= dma_write;
-	p->dma_read		= dma_read;
-	p->disable_irq_lch	= NULL;
-
-	p->errata = configure_dma_errata();
+	p = dma_plat_info;
+	p.dma_attr = d;
+	p.errata = configure_dma_errata();
 
-	ret = platform_device_add_data(pdev, p, sizeof(*p));
+	ret = platform_device_add_data(pdev, &p, sizeof(p));
 	if (ret) {
 		dev_err(&pdev->dev, "%s: Unable to add resources for %s%d\n",
 			__func__, pdev->name, pdev->id);
-		goto exit_release_chan;
+		goto exit_release_d;
 	}
 
 	ret = platform_device_add(pdev);
 	if (ret) {
 		dev_err(&pdev->dev, "%s: Unable to add resources for %s%d\n",
 			__func__, pdev->name, pdev->id);
-		goto exit_release_chan;
+		goto exit_release_d;
 	}
 
-	dma_stride		= OMAP1_DMA_STRIDE;
-	dma_common_ch_start	= CPC;
-	dma_common_ch_end	= COLOR;
-
 	dma_pdev = platform_device_register_full(&omap_dma_dev_info);
 	if (IS_ERR(dma_pdev)) {
 		ret = PTR_ERR(dma_pdev);
@@ -395,12 +366,8 @@ static int __init omap1_system_dma_init(void)
 
 exit_release_pdev:
 	platform_device_del(pdev);
-exit_release_chan:
-	kfree(d->chan);
 exit_release_d:
 	kfree(d);
-exit_release_p:
-	kfree(p);
 exit_iounmap:
 	iounmap(dma_base);
 exit_device_put:
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index 653b489479e0..0af7ca02314d 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -50,11 +50,12 @@ config SOC_OMAP5
 	bool "TI OMAP5"
 	depends on ARCH_MULTI_V7
 	select ARCH_OMAP2PLUS
+	select ARCH_HAS_OPP
 	select ARM_CPU_SUSPEND if PM
 	select ARM_GIC
 	select CPU_V7
 	select HAVE_ARM_SCU if SMP
-	select HAVE_ARM_TWD if LOCAL_TIMERS
+	select HAVE_ARM_TWD if SMP
 	select HAVE_SMP
 	select HAVE_ARM_ARCH_TIMER
 	select ARM_ERRATA_798181 if SMP
@@ -63,6 +64,7 @@ config SOC_AM33XX
 	bool "TI AM33XX"
 	depends on ARCH_MULTI_V7
 	select ARCH_OMAP2PLUS
+	select ARCH_HAS_OPP
 	select ARM_CPU_SUSPEND if PM
 	select CPU_V7
 	select MULTI_IRQ_HANDLER
@@ -72,6 +74,7 @@ config SOC_AM43XX
 	depends on ARCH_MULTI_V7
 	select CPU_V7
 	select ARCH_OMAP2PLUS
+	select ARCH_HAS_OPP
 	select MULTI_IRQ_HANDLER
 	select ARM_GIC
 	select MACH_OMAP_GENERIC
@@ -80,6 +83,7 @@ config SOC_DRA7XX
 	bool "TI DRA7XX"
 	depends on ARCH_MULTI_V7
 	select ARCH_OMAP2PLUS
+	select ARCH_HAS_OPP
 	select ARM_CPU_SUSPEND if PM
 	select ARM_GIC
 	select CPU_V7
@@ -268,9 +272,6 @@ config MACH_OMAP_3430SDP
 	default y
 	select OMAP_PACKAGE_CBB
 
-config MACH_NOKIA_N800
-       bool
-
 config MACH_NOKIA_N810
        bool
 
@@ -281,7 +282,6 @@ config MACH_NOKIA_N8X0
 	bool "Nokia N800/N810"
 	depends on SOC_OMAP2420
 	default y
-	select MACH_NOKIA_N800
 	select MACH_NOKIA_N810
 	select MACH_NOKIA_N810_WIMAX
 	select OMAP_PACKAGE_ZAC
diff --git a/arch/arm/mach-omap2/am35xx-emac.c b/arch/arm/mach-omap2/am35xx-emac.c
index 25b79a297365..6a6935caac1e 100644
--- a/arch/arm/mach-omap2/am35xx-emac.c
+++ b/arch/arm/mach-omap2/am35xx-emac.c
@@ -17,7 +17,6 @@
 
 #include <linux/err.h>
 #include <linux/davinci_emac.h>
-#include <asm/system.h>
 #include "omap_device.h"
 #include "am35xx.h"
 #include "control.h"
diff --git a/arch/arm/mach-omap2/dma.c b/arch/arm/mach-omap2/dma.c
index 49fd0d501c9b..5689c88d986d 100644
--- a/arch/arm/mach-omap2/dma.c
+++ b/arch/arm/mach-omap2/dma.c
@@ -35,97 +35,80 @@
 #include "omap_hwmod.h"
 #include "omap_device.h"
 
-#define OMAP2_DMA_STRIDE	0x60
-
-static u32 errata;
-static u8 dma_stride;
-
-static struct omap_dma_dev_attr *d;
-
-static enum omap_reg_offsets dma_common_ch_start, dma_common_ch_end;
-
-static u16 reg_map[] = {
-	[REVISION]		= 0x00,
-	[GCR]			= 0x78,
-	[IRQSTATUS_L0]		= 0x08,
-	[IRQSTATUS_L1]		= 0x0c,
-	[IRQSTATUS_L2]		= 0x10,
-	[IRQSTATUS_L3]		= 0x14,
-	[IRQENABLE_L0]		= 0x18,
-	[IRQENABLE_L1]		= 0x1c,
-	[IRQENABLE_L2]		= 0x20,
-	[IRQENABLE_L3]		= 0x24,
-	[SYSSTATUS]		= 0x28,
-	[OCP_SYSCONFIG]		= 0x2c,
-	[CAPS_0]		= 0x64,
-	[CAPS_2]		= 0x6c,
-	[CAPS_3]		= 0x70,
-	[CAPS_4]		= 0x74,
+static enum omap_reg_offsets dma_common_ch_end;
+
+static const struct omap_dma_reg reg_map[] = {
+	[REVISION]	= { 0x0000, 0x00, OMAP_DMA_REG_32BIT },
+	[GCR]		= { 0x0078, 0x00, OMAP_DMA_REG_32BIT },
+	[IRQSTATUS_L0]	= { 0x0008, 0x00, OMAP_DMA_REG_32BIT },
+	[IRQSTATUS_L1]	= { 0x000c, 0x00, OMAP_DMA_REG_32BIT },
+	[IRQSTATUS_L2]	= { 0x0010, 0x00, OMAP_DMA_REG_32BIT },
+	[IRQSTATUS_L3]	= { 0x0014, 0x00, OMAP_DMA_REG_32BIT },
+	[IRQENABLE_L0]	= { 0x0018, 0x00, OMAP_DMA_REG_32BIT },
+	[IRQENABLE_L1]	= { 0x001c, 0x00, OMAP_DMA_REG_32BIT },
+	[IRQENABLE_L2]	= { 0x0020, 0x00, OMAP_DMA_REG_32BIT },
+	[IRQENABLE_L3]	= { 0x0024, 0x00, OMAP_DMA_REG_32BIT },
+	[SYSSTATUS]	= { 0x0028, 0x00, OMAP_DMA_REG_32BIT },
+	[OCP_SYSCONFIG]	= { 0x002c, 0x00, OMAP_DMA_REG_32BIT },
+	[CAPS_0]	= { 0x0064, 0x00, OMAP_DMA_REG_32BIT },
+	[CAPS_2]	= { 0x006c, 0x00, OMAP_DMA_REG_32BIT },
+	[CAPS_3]	= { 0x0070, 0x00, OMAP_DMA_REG_32BIT },
+	[CAPS_4]	= { 0x0074, 0x00, OMAP_DMA_REG_32BIT },
 
 	/* Common register offsets */
-	[CCR]			= 0x80,
-	[CLNK_CTRL]		= 0x84,
-	[CICR]			= 0x88,
-	[CSR]			= 0x8c,
-	[CSDP]			= 0x90,
-	[CEN]			= 0x94,
-	[CFN]			= 0x98,
-	[CSEI]			= 0xa4,
-	[CSFI]			= 0xa8,
-	[CDEI]			= 0xac,
-	[CDFI]			= 0xb0,
-	[CSAC]			= 0xb4,
-	[CDAC]			= 0xb8,
+	[CCR]		= { 0x0080, 0x60, OMAP_DMA_REG_32BIT },
+	[CLNK_CTRL]	= { 0x0084, 0x60, OMAP_DMA_REG_32BIT },
+	[CICR]		= { 0x0088, 0x60, OMAP_DMA_REG_32BIT },
+	[CSR]		= { 0x008c, 0x60, OMAP_DMA_REG_32BIT },
+	[CSDP]		= { 0x0090, 0x60, OMAP_DMA_REG_32BIT },
+	[CEN]		= { 0x0094, 0x60, OMAP_DMA_REG_32BIT },
+	[CFN]		= { 0x0098, 0x60, OMAP_DMA_REG_32BIT },
+	[CSEI]		= { 0x00a4, 0x60, OMAP_DMA_REG_32BIT },
+	[CSFI]		= { 0x00a8, 0x60, OMAP_DMA_REG_32BIT },
+	[CDEI]		= { 0x00ac, 0x60, OMAP_DMA_REG_32BIT },
+	[CDFI]		= { 0x00b0, 0x60, OMAP_DMA_REG_32BIT },
+	[CSAC]		= { 0x00b4, 0x60, OMAP_DMA_REG_32BIT },
+	[CDAC]		= { 0x00b8, 0x60, OMAP_DMA_REG_32BIT },
 
 	/* Channel specific register offsets */
-	[CSSA]			= 0x9c,
-	[CDSA]			= 0xa0,
-	[CCEN]			= 0xbc,
-	[CCFN]			= 0xc0,
-	[COLOR]			= 0xc4,
+	[CSSA]		= { 0x009c, 0x60, OMAP_DMA_REG_32BIT },
+	[CDSA]		= { 0x00a0, 0x60, OMAP_DMA_REG_32BIT },
+	[CCEN]		= { 0x00bc, 0x60, OMAP_DMA_REG_32BIT },
+	[CCFN]		= { 0x00c0, 0x60, OMAP_DMA_REG_32BIT },
+	[COLOR]		= { 0x00c4, 0x60, OMAP_DMA_REG_32BIT },
 
 	/* OMAP4 specific registers */
-	[CDP]			= 0xd0,
-	[CNDP]			= 0xd4,
-	[CCDN]			= 0xd8,
+	[CDP]		= { 0x00d0, 0x60, OMAP_DMA_REG_32BIT },
+	[CNDP]		= { 0x00d4, 0x60, OMAP_DMA_REG_32BIT },
+	[CCDN]		= { 0x00d8, 0x60, OMAP_DMA_REG_32BIT },
 };
 
 static void __iomem *dma_base;
 static inline void dma_write(u32 val, int reg, int lch)
 {
-	u8  stride;
-	u32 offset;
+	void __iomem *addr = dma_base;
 
-	stride = (reg >= dma_common_ch_start) ? dma_stride : 0;
-	offset = reg_map[reg] + (stride * lch);
-	__raw_writel(val, dma_base + offset);
+	addr += reg_map[reg].offset;
+	addr += reg_map[reg].stride * lch;
+
+	__raw_writel(val, addr);
 }
 
 static inline u32 dma_read(int reg, int lch)
 {
-	u8 stride;
-	u32 offset, val;
-
-	stride = (reg >= dma_common_ch_start) ? dma_stride : 0;
-	offset = reg_map[reg] + (stride * lch);
-	val = __raw_readl(dma_base + offset);
-	return val;
-}
+	void __iomem *addr = dma_base;
 
-static inline void omap2_disable_irq_lch(int lch)
-{
-	u32 val;
+	addr += reg_map[reg].offset;
+	addr += reg_map[reg].stride * lch;
 
-	val = dma_read(IRQENABLE_L0, lch);
-	val &= ~(1 << lch);
-	dma_write(val, IRQENABLE_L0, lch);
+	return __raw_readl(addr);
 }
 
 static void omap2_clear_dma(int lch)
 {
-	int i = dma_common_ch_start;
+	int i;
 
-	for (; i <= dma_common_ch_end; i += 1)
+	for (i = CSDP; i <= dma_common_ch_end; i += 1)
 		dma_write(0, i, lch);
 }
 
@@ -137,8 +120,9 @@ static void omap2_show_dma_caps(void)
 	return;
 }
 
-static u32 configure_dma_errata(void)
+static unsigned configure_dma_errata(void)
 {
+	unsigned errata = 0;
 
 	/*
 	 * Errata applicable for OMAP2430ES1.0 and all omap2420
@@ -220,48 +204,50 @@ static u32 configure_dma_errata(void)
 	return errata;
 }
 
+static struct omap_system_dma_plat_info dma_plat_info __initdata = {
+	.reg_map	= reg_map,
+	.channel_stride	= 0x60,
+	.show_dma_caps	= omap2_show_dma_caps,
+	.clear_dma	= omap2_clear_dma,
+	.dma_write	= dma_write,
+	.dma_read	= dma_read,
+};
+
+static struct platform_device_info omap_dma_dev_info = {
+	.name = "omap-dma-engine",
+	.id = -1,
+	.dma_mask = DMA_BIT_MASK(32),
+};
+
 /* One time initializations */
 static int __init omap2_system_dma_init_dev(struct omap_hwmod *oh, void *unused)
 {
 	struct platform_device			*pdev;
-	struct omap_system_dma_plat_info	*p;
+	struct omap_system_dma_plat_info	p;
+	struct omap_dma_dev_attr		*d;
 	struct resource				*mem;
 	char					*name = "omap_dma_system";
 
-	dma_stride		= OMAP2_DMA_STRIDE;
-	dma_common_ch_start	= CSDP;
-
-	p = kzalloc(sizeof(struct omap_system_dma_plat_info), GFP_KERNEL);
-	if (!p) {
-		pr_err("%s: Unable to allocate pdata for %s:%s\n",
-			__func__, name, oh->name);
-		return -ENOMEM;
-	}
-
-	p->dma_attr		= (struct omap_dma_dev_attr *)oh->dev_attr;
-	p->disable_irq_lch	= omap2_disable_irq_lch;
-	p->show_dma_caps	= omap2_show_dma_caps;
-	p->clear_dma		= omap2_clear_dma;
-	p->dma_write		= dma_write;
-	p->dma_read		= dma_read;
-
-	p->clear_lch_regs	= NULL;
-
-	p->errata		= configure_dma_errata();
+	p = dma_plat_info;
+	p.dma_attr = (struct omap_dma_dev_attr *)oh->dev_attr;
+	p.errata = configure_dma_errata();
 
-	pdev = omap_device_build(name, 0, oh, p, sizeof(*p));
-	kfree(p);
+	pdev = omap_device_build(name, 0, oh, &p, sizeof(p));
 	if (IS_ERR(pdev)) {
 		pr_err("%s: Can't build omap_device for %s:%s.\n",
 			__func__, name, oh->name);
 		return PTR_ERR(pdev);
 	}
 
+	omap_dma_dev_info.res = pdev->resource;
+	omap_dma_dev_info.num_res = pdev->num_resources;
+
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!mem) {
 		dev_err(&pdev->dev, "%s: no mem resource\n", __func__);
 		return -EINVAL;
 	}
+
 	dma_base = ioremap(mem->start, resource_size(mem));
 	if (!dma_base) {
 		dev_err(&pdev->dev, "%s: ioremap fail\n", __func__);
@@ -269,13 +255,6 @@ static int __init omap2_system_dma_init_dev(struct omap_hwmod *oh, void *unused)
 	}
 
 	d = oh->dev_attr;
-	d->chan = kzalloc(sizeof(struct omap_dma_lch) *
-					(d->lch_count), GFP_KERNEL);
-
-	if (!d->chan) {
-		dev_err(&pdev->dev, "%s: kzalloc fail\n", __func__);
-		return -ENOMEM;
-	}
 
 	if (cpu_is_omap34xx() && (omap_type() != OMAP2_DEVICE_TYPE_GP))
 		d->dev_caps |= HS_CHANNELS_RESERVED;
@@ -289,12 +268,6 @@ static int __init omap2_system_dma_init_dev(struct omap_hwmod *oh, void *unused)
 	return 0;
 }
 
-static const struct platform_device_info omap_dma_dev_info = {
-	.name = "omap-dma-engine",
-	.id = -1,
-	.dma_mask = DMA_BIT_MASK(32),
-};
-
 static int __init omap2_system_dma_init(void)
 {
 	struct platform_device *pdev;
diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c
index d24926e6340f..ab43755364f5 100644
--- a/arch/arm/mach-omap2/gpmc.c
+++ b/arch/arm/mach-omap2/gpmc.c
@@ -1339,7 +1339,7 @@ static void __maybe_unused gpmc_read_timings_dt(struct device_node *np,
 		of_property_read_bool(np, "gpmc,time-para-granularity");
 }
 
-#ifdef CONFIG_MTD_NAND
+#if IS_ENABLED(CONFIG_MTD_NAND)
 
 static const char * const nand_xfer_types[] = {
 	[NAND_OMAP_PREFETCH_POLLED]		= "prefetch-polled",
@@ -1429,7 +1429,7 @@ static int gpmc_probe_nand_child(struct platform_device *pdev,
 }
 #endif
 
-#ifdef CONFIG_MTD_ONENAND
+#if IS_ENABLED(CONFIG_MTD_ONENAND)
 static int gpmc_probe_onenand_child(struct platform_device *pdev,
 				 struct device_node *child)
 {
diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c
index d408b15b4fbf..af432b191255 100644
--- a/arch/arm/mach-omap2/io.c
+++ b/arch/arm/mach-omap2/io.c
@@ -179,15 +179,6 @@ static struct map_desc omap34xx_io_desc[] __initdata = {
 		.length		= L4_EMU_34XX_SIZE,
 		.type		= MT_DEVICE
 	},
-#if defined(CONFIG_DEBUG_LL) &&							\
-	(defined(CONFIG_MACH_OMAP_ZOOM2) || defined(CONFIG_MACH_OMAP_ZOOM3))
-	{
-		.virtual	= ZOOM_UART_VIRT,
-		.pfn		= __phys_to_pfn(ZOOM_UART_BASE),
-		.length		= SZ_1M,
-		.type		= MT_DEVICE
-	},
-#endif
 };
 #endif
 
diff --git a/arch/arm/mach-pxa/am300epd.c b/arch/arm/mach-pxa/am300epd.c
index c9f309ae88c5..8b90c4f2d430 100644
--- a/arch/arm/mach-pxa/am300epd.c
+++ b/arch/arm/mach-pxa/am300epd.c
@@ -30,6 +30,7 @@
 
 #include <mach/gumstix.h>
 #include <mach/mfp-pxa25x.h>
+#include <mach/irqs.h>
 #include <linux/platform_data/video-pxafb.h>
 
 #include "generic.h"
diff --git a/arch/arm/mach-pxa/include/mach/balloon3.h b/arch/arm/mach-pxa/include/mach/balloon3.h
index 954641e6c8b1..1b0825911e62 100644
--- a/arch/arm/mach-pxa/include/mach/balloon3.h
+++ b/arch/arm/mach-pxa/include/mach/balloon3.h
@@ -14,6 +14,8 @@
 #ifndef ASM_ARCH_BALLOON3_H
 #define ASM_ARCH_BALLOON3_H
 
+#include "irqs.h" /* PXA_NR_BUILTIN_GPIO */
+
 enum balloon3_features {
 	BALLOON3_FEATURE_OHCI,
 	BALLOON3_FEATURE_MMC,
diff --git a/arch/arm/mach-pxa/include/mach/corgi.h b/arch/arm/mach-pxa/include/mach/corgi.h
index f3c3493b468d..c030d955bbd7 100644
--- a/arch/arm/mach-pxa/include/mach/corgi.h
+++ b/arch/arm/mach-pxa/include/mach/corgi.h
@@ -13,6 +13,7 @@
 #ifndef __ASM_ARCH_CORGI_H
 #define __ASM_ARCH_CORGI_H  1
 
+#include "irqs.h" /* PXA_NR_BUILTIN_GPIO */
 
 /*
  * Corgi (Non Standard) GPIO Definitions
diff --git a/arch/arm/mach-pxa/include/mach/csb726.h b/arch/arm/mach-pxa/include/mach/csb726.h
index 2628e7b72116..00cfbbbf73f7 100644
--- a/arch/arm/mach-pxa/include/mach/csb726.h
+++ b/arch/arm/mach-pxa/include/mach/csb726.h
@@ -11,6 +11,8 @@
 #ifndef CSB726_H
 #define CSB726_H
 
+#include "irqs.h" /* PXA_GPIO_TO_IRQ */
+
 #define CSB726_GPIO_IRQ_LAN	52
 #define CSB726_GPIO_IRQ_SM501	53
 #define CSB726_GPIO_MMC_DETECT	100
diff --git a/arch/arm/mach-pxa/include/mach/gumstix.h b/arch/arm/mach-pxa/include/mach/gumstix.h
index dba14b6503ad..f7df27bbb42e 100644
--- a/arch/arm/mach-pxa/include/mach/gumstix.h
+++ b/arch/arm/mach-pxa/include/mach/gumstix.h
@@ -6,6 +6,7 @@
  * published by the Free Software Foundation.
  */
 
+#include "irqs.h" /* PXA_GPIO_TO_IRQ */
 
 /* BTRESET - Reset line to Bluetooth module, active low signal. */
 #define GPIO_GUMSTIX_BTRESET          7
diff --git a/arch/arm/mach-pxa/include/mach/idp.h b/arch/arm/mach-pxa/include/mach/idp.h
index 22a96f87232b..7e63f4680271 100644
--- a/arch/arm/mach-pxa/include/mach/idp.h
+++ b/arch/arm/mach-pxa/include/mach/idp.h
@@ -23,6 +23,7 @@
  * IDP hardware.
  */
 
+#include "irqs.h" /* PXA_GPIO_TO_IRQ */
 
 #define IDP_FLASH_PHYS		(PXA_CS0_PHYS)
 #define IDP_ALT_FLASH_PHYS	(PXA_CS1_PHYS)
diff --git a/arch/arm/mach-pxa/include/mach/palmld.h b/arch/arm/mach-pxa/include/mach/palmld.h
index 2c4471336570..b184f296023b 100644
--- a/arch/arm/mach-pxa/include/mach/palmld.h
+++ b/arch/arm/mach-pxa/include/mach/palmld.h
@@ -13,6 +13,8 @@
 #ifndef _INCLUDE_PALMLD_H_
 #define _INCLUDE_PALMLD_H_
 
+#include "irqs.h" /* PXA_GPIO_TO_IRQ */
+
 /** HERE ARE GPIOs **/
 
 /* GPIOs */
diff --git a/arch/arm/mach-pxa/include/mach/palmt5.h b/arch/arm/mach-pxa/include/mach/palmt5.h
index 0bd4f036c72f..e342c5921405 100644
--- a/arch/arm/mach-pxa/include/mach/palmt5.h
+++ b/arch/arm/mach-pxa/include/mach/palmt5.h
@@ -15,6 +15,8 @@
 #ifndef _INCLUDE_PALMT5_H_
 #define _INCLUDE_PALMT5_H_
 
+#include "irqs.h" /* PXA_GPIO_TO_IRQ */
+
 /** HERE ARE GPIOs **/
 
 /* GPIOs */
diff --git a/arch/arm/mach-pxa/include/mach/palmtc.h b/arch/arm/mach-pxa/include/mach/palmtc.h
index c383a21680b6..81c727b3cfd2 100644
--- a/arch/arm/mach-pxa/include/mach/palmtc.h
+++ b/arch/arm/mach-pxa/include/mach/palmtc.h
@@ -16,6 +16,8 @@
 #ifndef _INCLUDE_PALMTC_H_
 #define _INCLUDE_PALMTC_H_
 
+#include "irqs.h" /* PXA_GPIO_TO_IRQ */
+
 /** HERE ARE GPIOs **/
 
 /* GPIOs */
diff --git a/arch/arm/mach-pxa/include/mach/palmtx.h b/arch/arm/mach-pxa/include/mach/palmtx.h
index f2e530380253..92bc1f05300d 100644
--- a/arch/arm/mach-pxa/include/mach/palmtx.h
+++ b/arch/arm/mach-pxa/include/mach/palmtx.h
@@ -16,6 +16,8 @@
 #ifndef _INCLUDE_PALMTX_H_
 #define _INCLUDE_PALMTX_H_
 
+#include "irqs.h" /* PXA_GPIO_TO_IRQ */
+
 /** HERE ARE GPIOs **/
 
 /* GPIOs */
diff --git a/arch/arm/mach-pxa/include/mach/pcm027.h b/arch/arm/mach-pxa/include/mach/pcm027.h
index 6bf28de228bd..86ebd7b6c960 100644
--- a/arch/arm/mach-pxa/include/mach/pcm027.h
+++ b/arch/arm/mach-pxa/include/mach/pcm027.h
@@ -23,6 +23,8 @@
  * Definitions of CPU card resources only
  */
 
+#include "irqs.h" /* PXA_GPIO_TO_IRQ */
+
 /* phyCORE-PXA270 (PCM027) Interrupts */
 #define PCM027_IRQ(x)          (IRQ_BOARD_START + (x))
 #define PCM027_BTDET_IRQ       PCM027_IRQ(0)
diff --git a/arch/arm/mach-pxa/include/mach/pcm990_baseboard.h b/arch/arm/mach-pxa/include/mach/pcm990_baseboard.h
index 0260aaa2fc17..7e544c14967e 100644
--- a/arch/arm/mach-pxa/include/mach/pcm990_baseboard.h
+++ b/arch/arm/mach-pxa/include/mach/pcm990_baseboard.h
@@ -20,6 +20,7 @@
  */
 
 #include <mach/pcm027.h>
+#include "irqs.h" /* PXA_GPIO_TO_IRQ */
 
 /*
  * definitions relevant only when the PCM-990
diff --git a/arch/arm/mach-pxa/include/mach/poodle.h b/arch/arm/mach-pxa/include/mach/poodle.h
index f32ff75dcca8..b56b19351a03 100644
--- a/arch/arm/mach-pxa/include/mach/poodle.h
+++ b/arch/arm/mach-pxa/include/mach/poodle.h
@@ -15,6 +15,8 @@
 #ifndef __ASM_ARCH_POODLE_H
 #define __ASM_ARCH_POODLE_H  1
 
+#include "irqs.h" /* PXA_GPIO_TO_IRQ */
+
 /*
  * GPIOs
  */
diff --git a/arch/arm/mach-pxa/include/mach/spitz.h b/arch/arm/mach-pxa/include/mach/spitz.h
index 0bfe6507c95d..25c9f62e46aa 100644
--- a/arch/arm/mach-pxa/include/mach/spitz.h
+++ b/arch/arm/mach-pxa/include/mach/spitz.h
@@ -15,8 +15,8 @@
 #define __ASM_ARCH_SPITZ_H  1
 #endif
 
+#include "irqs.h" /* PXA_NR_BUILTIN_GPIO, PXA_GPIO_TO_IRQ */
 #include <linux/fb.h>
-#include <linux/gpio.h>
 
 /* Spitz/Akita GPIOs */
 
diff --git a/arch/arm/mach-pxa/include/mach/tosa.h b/arch/arm/mach-pxa/include/mach/tosa.h
index 2bb0e862598c..0497d95cef25 100644
--- a/arch/arm/mach-pxa/include/mach/tosa.h
+++ b/arch/arm/mach-pxa/include/mach/tosa.h
@@ -13,6 +13,8 @@
 #ifndef _ASM_ARCH_TOSA_H_
 #define _ASM_ARCH_TOSA_H_ 1
 
+#include "irqs.h" /* PXA_NR_BUILTIN_GPIO */
+
 /*  TOSA Chip selects  */
 #define TOSA_LCDC_PHYS		PXA_CS4_PHYS
 /* Internel Scoop */
diff --git a/arch/arm/mach-pxa/include/mach/trizeps4.h b/arch/arm/mach-pxa/include/mach/trizeps4.h
index d2ca01053f69..ae3ca013afab 100644
--- a/arch/arm/mach-pxa/include/mach/trizeps4.h
+++ b/arch/arm/mach-pxa/include/mach/trizeps4.h
@@ -10,6 +10,8 @@
 #ifndef _TRIPEPS4_H_
 #define _TRIPEPS4_H_
 
+#include "irqs.h" /* PXA_GPIO_TO_IRQ */
+
 /* physical memory regions */
 #define TRIZEPS4_FLASH_PHYS	(PXA_CS0_PHYS)  /* Flash region */
 #define TRIZEPS4_DISK_PHYS	(PXA_CS1_PHYS)  /* Disk On Chip region */
diff --git a/arch/arm/mach-pxa/mioa701.c b/arch/arm/mach-pxa/mioa701.c
index f70583fee59f..29997bde277d 100644
--- a/arch/arm/mach-pxa/mioa701.c
+++ b/arch/arm/mach-pxa/mioa701.c
@@ -38,6 +38,7 @@
 #include <linux/mtd/physmap.h>
 #include <linux/usb/gpio_vbus.h>
 #include <linux/reboot.h>
+#include <linux/regulator/fixed.h>
 #include <linux/regulator/max1586.h>
 #include <linux/slab.h>
 #include <linux/i2c/pxa-i2c.h>
@@ -714,6 +715,10 @@ static struct gpio global_gpios[] = {
 	{ GPIO56_MT9M111_nOE, GPIOF_OUT_INIT_LOW, "Camera nOE" },
 };
 
+static struct regulator_consumer_supply fixed_5v0_consumers[] = {
+	REGULATOR_SUPPLY("power", "pwm-backlight"),
+};
+
 static void __init mioa701_machine_init(void)
 {
 	int rc;
@@ -753,6 +758,10 @@ static void __init mioa701_machine_init(void)
 	pxa_set_i2c_info(&i2c_pdata);
 	pxa27x_set_i2c_power_info(NULL);
 	pxa_set_camera_info(&mioa701_pxacamera_platform_data);
+
+	regulator_register_always_on(0, "fixed-5.0V", fixed_5v0_consumers,
+				     ARRAY_SIZE(fixed_5v0_consumers),
+				     5000000);
 }
 
 static void mioa701_machine_exit(void)
diff --git a/arch/arm/mach-rpc/dma.c b/arch/arm/mach-rpc/dma.c
index 85883b2e0e49..6d3517dc4772 100644
--- a/arch/arm/mach-rpc/dma.c
+++ b/arch/arm/mach-rpc/dma.c
@@ -141,7 +141,7 @@ static int iomd_request_dma(unsigned int chan, dma_t *dma)
 	struct iomd_dma *idma = container_of(dma, struct iomd_dma, dma);
 
 	return request_irq(idma->irq, iomd_dma_handle,
-			   IRQF_DISABLED, idma->dma.device_id, idma);
+			   0, idma->dma.device_id, idma);
 }
 
 static void iomd_free_dma(unsigned int chan, dma_t *dma)
diff --git a/arch/arm/mach-rpc/time.c b/arch/arm/mach-rpc/time.c
index 9a6def14df01..9a5158861ca9 100644
--- a/arch/arm/mach-rpc/time.c
+++ b/arch/arm/mach-rpc/time.c
@@ -75,7 +75,6 @@ ioc_timer_interrupt(int irq, void *dev_id)
 
 static struct irqaction ioc_timer_irq = {
 	.name		= "timer",
-	.flags		= IRQF_DISABLED,
 	.handler	= ioc_timer_interrupt
 };
 
diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c
index 831a15824ec8..f9874ba60cc8 100644
--- a/arch/arm/mach-sa1100/collie.c
+++ b/arch/arm/mach-sa1100/collie.c
@@ -43,6 +43,7 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
+#include <asm/mach/irda.h>
 
 #include <asm/hardware/scoop.h>
 #include <asm/mach/sharpsl_param.h>
@@ -96,6 +97,37 @@ static struct mcp_plat_data collie_mcp_data = {
 	.codec_pdata	= &collie_ucb1x00_data,
 };
 
+static int collie_ir_startup(struct device *dev)
+{
+	int rc = gpio_request(COLLIE_GPIO_IR_ON, "IrDA");
+	if (rc)
+		return rc;
+	rc = gpio_direction_output(COLLIE_GPIO_IR_ON, 1);
+
+	if (!rc)
+		return 0;
+
+	gpio_free(COLLIE_GPIO_IR_ON);
+	return rc;
+}
+
+static void collie_ir_shutdown(struct device *dev)
+{
+	gpio_free(COLLIE_GPIO_IR_ON);
+}
+
+static int collie_ir_set_power(struct device *dev, unsigned int state)
+{
+	gpio_set_value(COLLIE_GPIO_IR_ON, !state);
+	return 0;
+}
+
+static struct irda_platform_data collie_ir_data = {
+	.startup = collie_ir_startup,
+	.shutdown = collie_ir_shutdown,
+	.set_power = collie_ir_set_power,
+};
+
 /*
  * Collie AC IN
  */
@@ -400,6 +432,7 @@ static void __init collie_init(void)
 	sa11x0_register_mtd(&collie_flash_data, collie_flash_resources,
 			    ARRAY_SIZE(collie_flash_resources));
 	sa11x0_register_mcp(&collie_mcp_data);
+	sa11x0_register_irda(&collie_ir_data);
 
 	sharpsl_save_param();
 }
diff --git a/arch/arm/mach-sa1100/h3100.c b/arch/arm/mach-sa1100/h3100.c
index daa27c474c13..3c43219bc881 100644
--- a/arch/arm/mach-sa1100/h3100.c
+++ b/arch/arm/mach-sa1100/h3100.c
@@ -122,15 +122,8 @@ static struct irda_platform_data h3100_irda_data = {
 	.shutdown	= h3100_irda_shutdown,
 };
 
-static struct gpio_default_state h3100_default_gpio[] = {
-	{ H3XXX_GPIO_COM_DCD,	GPIO_MODE_IN,	"COM DCD" },
-	{ H3XXX_GPIO_COM_CTS,	GPIO_MODE_IN,	"COM CTS" },
-	{ H3XXX_GPIO_COM_RTS,	GPIO_MODE_OUT0,	"COM RTS" },
-};
-
 static void __init h3100_mach_init(void)
 {
-	h3xxx_init_gpio(h3100_default_gpio, ARRAY_SIZE(h3100_default_gpio));
 	h3xxx_mach_init();
 
 	sa11x0_register_lcd(&h3100_lcd_info);
diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c
index a663e7230141..5be54c214c7c 100644
--- a/arch/arm/mach-sa1100/h3600.c
+++ b/arch/arm/mach-sa1100/h3600.c
@@ -130,15 +130,8 @@ static struct irda_platform_data h3600_irda_data = {
 	.shutdown	= h3600_irda_shutdown,
 };
 
-static struct gpio_default_state h3600_default_gpio[] = {
-	{ H3XXX_GPIO_COM_DCD,	GPIO_MODE_IN,	"COM DCD" },
-	{ H3XXX_GPIO_COM_CTS,	GPIO_MODE_IN,	"COM CTS" },
-	{ H3XXX_GPIO_COM_RTS,	GPIO_MODE_OUT0,	"COM RTS" },
-};
-
 static void __init h3600_mach_init(void)
 {
-	h3xxx_init_gpio(h3600_default_gpio, ARRAY_SIZE(h3600_default_gpio));
 	h3xxx_mach_init();
 
 	sa11x0_register_lcd(&h3600_lcd_info);
diff --git a/arch/arm/mach-sa1100/h3xxx.c b/arch/arm/mach-sa1100/h3xxx.c
index f17e7382242a..c79bf467fb7f 100644
--- a/arch/arm/mach-sa1100/h3xxx.c
+++ b/arch/arm/mach-sa1100/h3xxx.c
@@ -28,37 +28,6 @@
 
 #include "generic.h"
 
-void h3xxx_init_gpio(struct gpio_default_state *s, size_t n)
-{
-	while (n--) {
-		const char *name = s->name;
-		int err;
-
-		if (!name)
-			name = "[init]";
-		err = gpio_request(s->gpio, name);
-		if (err) {
-			printk(KERN_ERR "gpio%u: unable to request: %d\n",
-				s->gpio, err);
-			continue;
-		}
-		if (s->mode >= 0) {
-			err = gpio_direction_output(s->gpio, s->mode);
-		} else {
-			err = gpio_direction_input(s->gpio);
-		}
-		if (err) {
-			printk(KERN_ERR "gpio%u: unable to set direction: %d\n",
-				s->gpio, err);
-			continue;
-		}
-		if (!s->name)
-			gpio_free(s->gpio);
-		s++;
-	}
-}
-
-
 /*
  * H3xxx flash support
  */
@@ -116,9 +85,34 @@ static struct resource h3xxx_flash_resource =
 /*
  * H3xxx uart support
  */
+static struct gpio h3xxx_uart_gpio[] = {
+	{ H3XXX_GPIO_COM_DCD,	GPIOF_IN,		"COM DCD" },
+	{ H3XXX_GPIO_COM_CTS,	GPIOF_IN,		"COM CTS" },
+	{ H3XXX_GPIO_COM_RTS,	GPIOF_OUT_INIT_LOW,	"COM RTS" },
+};
+
+static bool h3xxx_uart_request_gpios(void)
+{
+	static bool h3xxx_uart_gpio_ok;
+	int rc;
+
+	if (h3xxx_uart_gpio_ok)
+		return true;
+
+	rc = gpio_request_array(h3xxx_uart_gpio, ARRAY_SIZE(h3xxx_uart_gpio));
+	if (rc)
+		pr_err("h3xxx_uart_request_gpios: error %d\n", rc);
+	else
+		h3xxx_uart_gpio_ok = true;
+
+	return h3xxx_uart_gpio_ok;
+}
+
 static void h3xxx_uart_set_mctrl(struct uart_port *port, u_int mctrl)
 {
 	if (port->mapbase == _Ser3UTCR0) {
+		if (!h3xxx_uart_request_gpios())
+			return;
 		gpio_set_value(H3XXX_GPIO_COM_RTS, !(mctrl & TIOCM_RTS));
 	}
 }
@@ -128,6 +122,8 @@ static u_int h3xxx_uart_get_mctrl(struct uart_port *port)
 	u_int ret = TIOCM_CD | TIOCM_CTS | TIOCM_DSR;
 
 	if (port->mapbase == _Ser3UTCR0) {
+		if (!h3xxx_uart_request_gpios())
+			return ret;
 		/*
 		 * DCD and CTS bits are inverted in GPLR by RS232 transceiver
 		 */
diff --git a/arch/arm/mach-sa1100/include/mach/collie.h b/arch/arm/mach-sa1100/include/mach/collie.h
index 50e1d850ee2e..b478ca180c19 100644
--- a/arch/arm/mach-sa1100/include/mach/collie.h
+++ b/arch/arm/mach-sa1100/include/mach/collie.h
@@ -80,7 +80,7 @@ extern void locomolcd_power(int on);
 #define COLLIE_TC35143_GPIO_VERSION0    UCB_IO_0
 #define COLLIE_TC35143_GPIO_TBL_CHK     UCB_IO_1
 #define COLLIE_TC35143_GPIO_VPEN_ON     UCB_IO_2
-#define COLLIE_TC35143_GPIO_IR_ON       UCB_IO_3
+#define COLLIE_GPIO_IR_ON		(COLLIE_TC35143_GPIO_BASE + 3)
 #define COLLIE_TC35143_GPIO_AMP_ON      UCB_IO_4
 #define COLLIE_TC35143_GPIO_VERSION1    UCB_IO_5
 #define COLLIE_TC35143_GPIO_FS8KLPF     UCB_IO_5
diff --git a/arch/arm/mach-sa1100/include/mach/h3xxx.h b/arch/arm/mach-sa1100/include/mach/h3xxx.h
index c810620db53d..603d4343f7f6 100644
--- a/arch/arm/mach-sa1100/include/mach/h3xxx.h
+++ b/arch/arm/mach-sa1100/include/mach/h3xxx.h
@@ -79,17 +79,6 @@
 #define H3600_EGPIO_LCD_5V_ON		(H3XXX_EGPIO_BASE + 14) /* enable 5V to LCD. active high. */
 #define H3600_EGPIO_LVDD_ON		(H3XXX_EGPIO_BASE + 15) /* enable 9V and -6.5V to LCD. */
 
-struct gpio_default_state {
-	int gpio;
-	int mode;
-	const char *name;
-};
-
-#define GPIO_MODE_IN	-1
-#define GPIO_MODE_OUT0	0
-#define GPIO_MODE_OUT1	1
-
-void h3xxx_init_gpio(struct gpio_default_state *s, size_t n);
 void __init h3xxx_map_io(void);
 void __init h3xxx_mach_init(void);
 
diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c
index 6fd4acb8f187..4852c08cb526 100644
--- a/arch/arm/mach-sa1100/time.c
+++ b/arch/arm/mach-sa1100/time.c
@@ -112,7 +112,7 @@ static struct clock_event_device ckevt_sa1100_osmr0 = {
 
 static struct irqaction sa1100_timer_irq = {
 	.name		= "ost0",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.handler	= sa1100_ost0_interrupt,
 	.dev_id		= &ckevt_sa1100_osmr0,
 };
diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig
index 338640631e08..05fa505df585 100644
--- a/arch/arm/mach-shmobile/Kconfig
+++ b/arch/arm/mach-shmobile/Kconfig
@@ -8,7 +8,7 @@ config ARCH_SHMOBILE_MULTI
 	select CPU_V7
 	select GENERIC_CLOCKEVENTS
 	select HAVE_ARM_SCU if SMP
-	select HAVE_ARM_TWD if LOCAL_TIMERS
+	select HAVE_ARM_TWD if SMP
 	select HAVE_SMP
 	select ARM_GIC
 	select MIGHT_HAVE_CACHE_L2X0
diff --git a/arch/arm/mach-spear/time.c b/arch/arm/mach-spear/time.c
index d449673e40f7..218ba5b67d92 100644
--- a/arch/arm/mach-spear/time.c
+++ b/arch/arm/mach-spear/time.c
@@ -172,7 +172,7 @@ static irqreturn_t spear_timer_interrupt(int irq, void *dev_id)
 
 static struct irqaction spear_timer_irq = {
 	.name = "timer",
-	.flags = IRQF_DISABLED | IRQF_TIMER,
+	.flags = IRQF_TIMER,
 	.handler = spear_timer_interrupt
 };
 
diff --git a/arch/arm/mach-tegra/pm.c b/arch/arm/mach-tegra/pm.c
index 4ae0286b468d..f55b05a29b55 100644
--- a/arch/arm/mach-tegra/pm.c
+++ b/arch/arm/mach-tegra/pm.c
@@ -24,6 +24,7 @@
 #include <linux/cpu_pm.h>
 #include <linux/suspend.h>
 #include <linux/err.h>
+#include <linux/slab.h>
 #include <linux/clk/tegra.h>
 
 #include <asm/smp_plat.h>
diff --git a/arch/arm/mach-tegra/tegra.c b/arch/arm/mach-tegra/tegra.c
index 303a285d80fd..6191603379e1 100644
--- a/arch/arm/mach-tegra/tegra.c
+++ b/arch/arm/mach-tegra/tegra.c
@@ -73,10 +73,20 @@ u32 tegra_uart_config[3] = {
 static void __init tegra_init_cache(void)
 {
 #ifdef CONFIG_CACHE_L2X0
+	static const struct of_device_id pl310_ids[] __initconst = {
+		{ .compatible = "arm,pl310-cache",  },
+		{}
+	};
+
+	struct device_node *np;
 	int ret;
 	void __iomem *p = IO_ADDRESS(TEGRA_ARM_PERIF_BASE) + 0x3000;
 	u32 aux_ctrl, cache_type;
 
+	np = of_find_matching_node(NULL, pl310_ids);
+	if (!np)
+		return;
+
 	cache_type = readl(p + L2X0_CACHE_TYPE);
 	aux_ctrl = (cache_type & 0x700) << (17-8);
 	aux_ctrl |= 0x7C400001;
diff --git a/arch/arm/mach-u300/timer.c b/arch/arm/mach-u300/timer.c
index fe08fd34c0ce..de52cb37c479 100644
--- a/arch/arm/mach-u300/timer.c
+++ b/arch/arm/mach-u300/timer.c
@@ -337,7 +337,7 @@ static irqreturn_t u300_timer_interrupt(int irq, void *dev_id)
 
 static struct irqaction u300_timer_irq = {
 	.name		= "U300 Timer Tick",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.handler	= u300_timer_interrupt,
 };
 
diff --git a/arch/arm/mach-vexpress/Makefile b/arch/arm/mach-vexpress/Makefile
index 0997e0b7494c..fc649bc09d0c 100644
--- a/arch/arm/mach-vexpress/Makefile
+++ b/arch/arm/mach-vexpress/Makefile
@@ -8,8 +8,11 @@ obj-y					:= v2m.o
 obj-$(CONFIG_ARCH_VEXPRESS_CA9X4)	+= ct-ca9x4.o
 obj-$(CONFIG_ARCH_VEXPRESS_DCSCB)	+= dcscb.o	dcscb_setup.o
 CFLAGS_dcscb.o				+= -march=armv7-a
+CFLAGS_REMOVE_dcscb.o			= -pg
 obj-$(CONFIG_ARCH_VEXPRESS_SPC)		+= spc.o
+CFLAGS_REMOVE_spc.o			= -pg
 obj-$(CONFIG_ARCH_VEXPRESS_TC2_PM)	+= tc2_pm.o
 CFLAGS_tc2_pm.o				+= -march=armv7-a
+CFLAGS_REMOVE_tc2_pm.o			= -pg
 obj-$(CONFIG_SMP)			+= platsmp.o
 obj-$(CONFIG_HOTPLUG_CPU)		+= hotplug.o
diff --git a/arch/arm/mach-w90x900/time.c b/arch/arm/mach-w90x900/time.c
index 30fbca844575..9230d3725599 100644
--- a/arch/arm/mach-w90x900/time.c
+++ b/arch/arm/mach-w90x900/time.c
@@ -111,7 +111,7 @@ static irqreturn_t nuc900_timer0_interrupt(int irq, void *dev_id)
 
 static struct irqaction nuc900_timer0_irq = {
 	.name		= "nuc900-timer0",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.handler	= nuc900_timer0_interrupt,
 };
 
diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c
index 1db2a5ca9ab8..8c09a8393fb6 100644
--- a/arch/arm/mach-zynq/common.c
+++ b/arch/arm/mach-zynq/common.c
@@ -25,6 +25,7 @@
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/of.h>
+#include <linux/memblock.h>
 #include <linux/irqchip.h>
 #include <linux/irqchip/arm-gic.h>
 
@@ -41,6 +42,18 @@
 
 void __iomem *zynq_scu_base;
 
+/**
+ * zynq_memory_init - Initialize special memory
+ *
+ * We need to stop things allocating the low memory as DMA can't work in
+ * the 1st 512K of memory.
+ */
+static void __init zynq_memory_init(void)
+{
+	if (!__pa(PAGE_OFFSET))
+		memblock_reserve(__pa(PAGE_OFFSET), __pa(swapper_pg_dir));
+}
+
 static struct platform_device zynq_cpuidle_device = {
 	.name = "cpuidle-zynq",
 };
@@ -117,5 +130,6 @@ DT_MACHINE_START(XILINX_EP107, "Xilinx Zynq Platform")
 	.init_machine	= zynq_init_machine,
 	.init_time	= zynq_timer_init,
 	.dt_compat	= zynq_dt_match,
+	.reserve	= zynq_memory_init,
 	.restart	= zynq_system_reset,
 MACHINE_END
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 1f8fed94c2a4..ca8ecdee47d8 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -446,7 +446,6 @@ config CPU_32v5
 
 config CPU_32v6
 	bool
-	select CPU_USE_DOMAINS if CPU_V6 && MMU
 	select TLS_REG_EMUL if !CPU_32v6K && !MMU
 
 config CPU_32v6K
@@ -671,7 +670,7 @@ config ARM_VIRT_EXT
 
 config SWP_EMULATE
 	bool "Emulate SWP/SWPB instructions"
-	depends on !CPU_USE_DOMAINS && CPU_V7
+	depends on CPU_V7
 	default y if SMP
 	select HAVE_PROC_CPU if PROC_FS
 	help
diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c
index 48bc3c0a87ce..aae891820f8f 100644
--- a/arch/arm/mm/cache-feroceon-l2.c
+++ b/arch/arm/mm/cache-feroceon-l2.c
@@ -331,7 +331,9 @@ static void __init enable_l2(void)
 			enable_icache();
 		if (d)
 			enable_dcache();
-	}
+	} else
+		pr_err(FW_BUG
+		       "Feroceon L2: bootloader left the L2 cache on!\n");
 }
 
 void __init feroceon_l2_init(int __l2_wt_override)
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 1a77450e728a..c9c6acdf90cc 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -284,9 +284,6 @@ static void __dma_free_buffer(struct page *page, size_t size)
 }
 
 #ifdef CONFIG_MMU
-#ifdef CONFIG_HUGETLB_PAGE
-#warning ARM Coherent DMA allocator does not (yet) support huge TLB
-#endif
 
 static void *__alloc_from_contiguous(struct device *dev, size_t size,
 				     pgprot_t prot, struct page **ret_page,
@@ -1358,7 +1355,7 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
 	*handle = DMA_ERROR_CODE;
 	size = PAGE_ALIGN(size);
 
-	if (gfp & GFP_ATOMIC)
+	if (!(gfp & __GFP_WAIT))
 		return __iommu_alloc_atomic(dev, size, handle);
 
 	/*
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index a623cb3ad012..b68c6b22e1c8 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -516,6 +516,16 @@ static void __init build_mem_type_table(void)
 	s2_device_pgprot = mem_types[MT_DEVICE].prot_pte_s2;
 
 	/*
+	 * We don't use domains on ARMv6 (since this causes problems with
+	 * v6/v7 kernels), so we must use a separate memory type for user
+	 * r/o, kernel r/w to map the vectors page.
+	 */
+#ifndef CONFIG_ARM_LPAE
+	if (cpu_arch == CPU_ARCH_ARMv6)
+		vecs_pgprot |= L_PTE_MT_VECTORS;
+#endif
+
+	/*
 	 * ARMv6 and above have extended page tables.
 	 */
 	if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) {
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index e3c48a3fe063..ee1d80593958 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -112,13 +112,9 @@
  *  100x   1   0   1	r/o	no acc
  *  10x0   1   0   1	r/o	no acc
  *  1011   0   0   1	r/w	no acc
- *  110x   0   1   0	r/w	r/o
- *  11x0   0   1   0	r/w	r/o
- *  1111   0   1   1	r/w	r/w
- *
- * If !CONFIG_CPU_USE_DOMAINS, the following permissions are changed:
  *  110x   1   1   1	r/o	r/o
  *  11x0   1   1   1	r/o	r/o
+ *  1111   0   1   1	r/w	r/w
  */
 	.macro	armv6_mt_table pfx
 \pfx\()_mt_table:
@@ -137,7 +133,7 @@
 	.long	PTE_EXT_TEX(2)					@ L_PTE_MT_DEV_NONSHARED
 	.long	0x00						@ unused
 	.long	0x00						@ unused
-	.long	0x00						@ unused
+	.long	PTE_CACHEABLE | PTE_BUFFERABLE | PTE_EXT_APX	@ L_PTE_MT_VECTORS
 	.endm
 
 	.macro	armv6_set_pte_ext pfx
@@ -158,24 +154,21 @@
 
 	tst	r1, #L_PTE_USER
 	orrne	r3, r3, #PTE_EXT_AP1
-#ifdef CONFIG_CPU_USE_DOMAINS
-	@ allow kernel read/write access to read-only user pages
 	tstne	r3, #PTE_EXT_APX
-	bicne	r3, r3, #PTE_EXT_APX | PTE_EXT_AP0
-#endif
+
+	@ user read-only -> kernel read-only
+	bicne	r3, r3, #PTE_EXT_AP0
 
 	tst	r1, #L_PTE_XN
 	orrne	r3, r3, #PTE_EXT_XN
 
-	orr	r3, r3, r2
+	eor	r3, r3, r2
 
 	tst	r1, #L_PTE_YOUNG
 	tstne	r1, #L_PTE_PRESENT
 	moveq	r3, #0
-#ifndef CONFIG_CPU_USE_DOMAINS
 	tstne	r1, #L_PTE_NONE
 	movne	r3, #0
-#endif
 
 	str	r3, [r0]
 	mcr	p15, 0, r0, c7, c10, 1		@ flush_pte
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index bdd3be4be77a..1f52915f2b28 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -90,21 +90,14 @@ ENTRY(cpu_v7_set_pte_ext)
 
 	tst	r1, #L_PTE_USER
 	orrne	r3, r3, #PTE_EXT_AP1
-#ifdef CONFIG_CPU_USE_DOMAINS
-	@ allow kernel read/write access to read-only user pages
-	tstne	r3, #PTE_EXT_APX
-	bicne	r3, r3, #PTE_EXT_APX | PTE_EXT_AP0
-#endif
 
 	tst	r1, #L_PTE_XN
 	orrne	r3, r3, #PTE_EXT_XN
 
 	tst	r1, #L_PTE_YOUNG
 	tstne	r1, #L_PTE_VALID
-#ifndef CONFIG_CPU_USE_DOMAINS
 	eorne	r1, r1, #L_PTE_NONE
 	tstne	r1, #L_PTE_NONE
-#endif
 	moveq	r3, #0
 
  ARM(	str	r3, [r0, #2048]! )
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 74f6033e76dd..195731d3813b 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -192,6 +192,7 @@ __v7_cr7mp_setup:
 	mov	r10, #(1 << 0)			@ Cache/TLB ops broadcasting
 	b	1f
 __v7_ca7mp_setup:
+__v7_ca12mp_setup:
 __v7_ca15mp_setup:
 	mov	r10, #0
 1:
@@ -484,6 +485,16 @@ __v7_ca7mp_proc_info:
 	.size	__v7_ca7mp_proc_info, . - __v7_ca7mp_proc_info
 
 	/*
+	 * ARM Ltd. Cortex A12 processor.
+	 */
+	.type	__v7_ca12mp_proc_info, #object
+__v7_ca12mp_proc_info:
+	.long	0x410fc0d0
+	.long	0xff0ffff0
+	__v7_proc __v7_ca12mp_setup
+	.size	__v7_ca12mp_proc_info, . - __v7_ca12mp_proc_info
+
+	/*
 	 * ARM Ltd. Cortex A15 processor.
 	 */
 	.type	__v7_ca15mp_proc_info, #object
diff --git a/arch/arm/plat-iop/time.c b/arch/arm/plat-iop/time.c
index d70b73364a3f..6ad65d8ae237 100644
--- a/arch/arm/plat-iop/time.c
+++ b/arch/arm/plat-iop/time.c
@@ -127,7 +127,7 @@ iop_timer_interrupt(int irq, void *dev_id)
 static struct irqaction iop_timer_irq = {
 	.name		= "IOP Timer Tick",
 	.handler	= iop_timer_interrupt,
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
 	.dev_id		= &iop_clockevent,
 };
 
diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c
index 01619c2910e3..5f5b975887fc 100644
--- a/arch/arm/plat-omap/dma.c
+++ b/arch/arm/plat-omap/dma.c
@@ -2000,6 +2000,12 @@ void omap_dma_global_context_restore(void)
 			omap_clear_dma(ch);
 }
 
+struct omap_system_dma_plat_info *omap_get_plat_info(void)
+{
+	return p;
+}
+EXPORT_SYMBOL_GPL(omap_get_plat_info);
+
 static int omap_system_dma_probe(struct platform_device *pdev)
 {
 	int ch, ret = 0;
@@ -2024,9 +2030,16 @@ static int omap_system_dma_probe(struct platform_device *pdev)
 
 	dma_lch_count		= d->lch_count;
 	dma_chan_count		= dma_lch_count;
-	dma_chan		= d->chan;
 	enable_1510_mode	= d->dev_caps & ENABLE_1510_MODE;
 
+	dma_chan = devm_kcalloc(&pdev->dev, dma_lch_count,
+				sizeof(struct omap_dma_lch), GFP_KERNEL);
+	if (!dma_chan) {
+		dev_err(&pdev->dev, "%s: kzalloc fail\n", __func__);
+		return -ENOMEM;
+	}
+
+
 	if (dma_omap2plus()) {
 		dma_linked_lch = kzalloc(sizeof(struct dma_link_info) *
 						dma_lch_count, GFP_KERNEL);
@@ -2111,7 +2124,6 @@ exit_dma_irq_fail:
 	}
 
 exit_dma_lch_fail:
-	kfree(dma_chan);
 	return ret;
 }
 
@@ -2131,7 +2143,6 @@ static int omap_system_dma_remove(struct platform_device *pdev)
 			free_irq(dma_irq, (void *)(irq_rel + 1));
 		}
 	}
-	kfree(dma_chan);
 	return 0;
 }
 
diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S
index 46e17492fd1f..f0759e70fb86 100644
--- a/arch/arm/vfp/entry.S
+++ b/arch/arm/vfp/entry.S
@@ -8,9 +8,12 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+#include <linux/init.h>
+#include <linux/linkage.h>
 #include <asm/thread_info.h>
 #include <asm/vfpmacros.h>
-#include "../kernel/entry-header.S"
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
 
 @ VFP entry point.
 @
@@ -22,11 +25,7 @@
 @  IRQs disabled.
 @
 ENTRY(do_vfp)
-#ifdef CONFIG_PREEMPT_COUNT
-	ldr	r4, [r10, #TI_PREEMPT]	@ get preempt count
-	add	r11, r4, #1		@ increment it
-	str	r11, [r10, #TI_PREEMPT]
-#endif
+	inc_preempt_count r10, r4
 	enable_irq
  	ldr	r4, .LCvfp
 	ldr	r11, [r10, #TI_CPU]	@ CPU number
@@ -35,12 +34,7 @@ ENTRY(do_vfp)
 ENDPROC(do_vfp)
 
 ENTRY(vfp_null_entry)
-#ifdef CONFIG_PREEMPT_COUNT
-	get_thread_info	r10
-	ldr	r4, [r10, #TI_PREEMPT]	@ get preempt count
-	sub	r11, r4, #1		@ decrement it
-	str	r11, [r10, #TI_PREEMPT]
-#endif
+	dec_preempt_count_ti r10, r4
 	mov	pc, lr
 ENDPROC(vfp_null_entry)
 
@@ -53,12 +47,7 @@ ENDPROC(vfp_null_entry)
 
 	__INIT
 ENTRY(vfp_testing_entry)
-#ifdef CONFIG_PREEMPT_COUNT
-	get_thread_info	r10
-	ldr	r4, [r10, #TI_PREEMPT]	@ get preempt count
-	sub	r11, r4, #1		@ decrement it
-	str	r11, [r10, #TI_PREEMPT]
-#endif
+	dec_preempt_count_ti r10, r4
 	ldr	r0, VFP_arch_address
 	str	r0, [r0]		@ set to non-zero value
 	mov	pc, r9			@ we have handled the fault
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index 3e5d3115a2a6..be807625ed8c 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -14,10 +14,13 @@
  * r10 points at the start of the private FP workspace in the thread structure
  * sp points to a struct pt_regs (as defined in include/asm/proc/ptrace.h)
  */
+#include <linux/init.h>
+#include <linux/linkage.h>
 #include <asm/thread_info.h>
 #include <asm/vfpmacros.h>
 #include <linux/kern_levels.h>
-#include "../kernel/entry-header.S"
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
 
 	.macro	DBGSTR, str
 #ifdef DEBUG
@@ -179,12 +182,7 @@ vfp_hw_state_valid:
 					@ else it's one 32-bit instruction, so
 					@ always subtract 4 from the following
 					@ instruction address.
-#ifdef CONFIG_PREEMPT_COUNT
-	get_thread_info	r10
-	ldr	r4, [r10, #TI_PREEMPT]	@ get preempt count
-	sub	r11, r4, #1		@ decrement it
-	str	r11, [r10, #TI_PREEMPT]
-#endif
+	dec_preempt_count_ti r10, r4
 	mov	pc, r9			@ we think we have handled things
 
 
@@ -203,12 +201,7 @@ look_for_VFP_exceptions:
 	@ not recognised by VFP
 
 	DBGSTR	"not VFP"
-#ifdef CONFIG_PREEMPT_COUNT
-	get_thread_info	r10
-	ldr	r4, [r10, #TI_PREEMPT]	@ get preempt count
-	sub	r11, r4, #1		@ decrement it
-	str	r11, [r10, #TI_PREEMPT]
-#endif
+	dec_preempt_count_ti r10, r4
 	mov	pc, lr
 
 process_exception:
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index dd4327f09ba4..27bbcfc7202a 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -36,6 +36,7 @@ config ARM64
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS
 	select HAVE_MEMBLOCK
+	select HAVE_PATA_PLATFORM
 	select HAVE_PERF_EVENTS
 	select IRQ_DOMAIN
 	select MODULES_USE_ELF_RELA
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 84139be62ae6..7959dd0ca5d5 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
@@ -19,6 +18,7 @@ CONFIG_BLK_DEV_INITRD=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_PROFILING=y
+CONFIG_JUMP_LABEL=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 # CONFIG_BLK_DEV_BSG is not set
@@ -27,6 +27,7 @@ CONFIG_ARCH_VEXPRESS=y
 CONFIG_ARCH_XGENE=y
 CONFIG_SMP=y
 CONFIG_PREEMPT=y
+CONFIG_CMA=y
 CONFIG_CMDLINE="console=ttyAMA0"
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
@@ -42,14 +43,17 @@ CONFIG_IP_PNP_BOOTP=y
 # CONFIG_WIRELESS is not set
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
-CONFIG_BLK_DEV=y
+CONFIG_DMA_CMA=y
 CONFIG_SCSI=y
 # CONFIG_SCSI_PROC_FS is not set
 CONFIG_BLK_DEV_SD=y
 # CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_ATA=y
+CONFIG_PATA_PLATFORM=y
+CONFIG_PATA_OF_PLATFORM=y
 CONFIG_NETDEVICES=y
-CONFIG_MII=y
 CONFIG_SMC91X=y
+CONFIG_SMSC911X=y
 # CONFIG_WLAN is not set
 CONFIG_INPUT_EVDEV=y
 # CONFIG_SERIO_I8042 is not set
@@ -62,13 +66,19 @@ CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
+CONFIG_REGULATOR=y
+CONFIG_REGULATOR_FIXED_VOLTAGE=y
 CONFIG_FB=y
 # CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_MONO is not set
 # CONFIG_LOGO_LINUX_VGA16 is not set
-# CONFIG_USB_SUPPORT is not set
+CONFIG_USB=y
+CONFIG_USB_ISP1760_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_MMC_ARMMMCI=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 01de5aaa3edc..0237f0867e37 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -54,8 +54,7 @@ static inline void atomic_add(int i, atomic_t *v)
 "	stxr	%w1, %w0, %2\n"
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
-	: "Ir" (i)
-	: "cc");
+	: "Ir" (i));
 }
 
 static inline int atomic_add_return(int i, atomic_t *v)
@@ -64,14 +63,15 @@ static inline int atomic_add_return(int i, atomic_t *v)
 	int result;
 
 	asm volatile("// atomic_add_return\n"
-"1:	ldaxr	%w0, %2\n"
+"1:	ldxr	%w0, %2\n"
 "	add	%w0, %w0, %w3\n"
 "	stlxr	%w1, %w0, %2\n"
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
 	: "Ir" (i)
-	: "cc", "memory");
+	: "memory");
 
+	smp_mb();
 	return result;
 }
 
@@ -86,8 +86,7 @@ static inline void atomic_sub(int i, atomic_t *v)
 "	stxr	%w1, %w0, %2\n"
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
-	: "Ir" (i)
-	: "cc");
+	: "Ir" (i));
 }
 
 static inline int atomic_sub_return(int i, atomic_t *v)
@@ -96,14 +95,15 @@ static inline int atomic_sub_return(int i, atomic_t *v)
 	int result;
 
 	asm volatile("// atomic_sub_return\n"
-"1:	ldaxr	%w0, %2\n"
+"1:	ldxr	%w0, %2\n"
 "	sub	%w0, %w0, %w3\n"
 "	stlxr	%w1, %w0, %2\n"
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
 	: "Ir" (i)
-	: "cc", "memory");
+	: "memory");
 
+	smp_mb();
 	return result;
 }
 
@@ -112,17 +112,20 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
 	unsigned long tmp;
 	int oldval;
 
+	smp_mb();
+
 	asm volatile("// atomic_cmpxchg\n"
-"1:	ldaxr	%w1, %2\n"
+"1:	ldxr	%w1, %2\n"
 "	cmp	%w1, %w3\n"
 "	b.ne	2f\n"
-"	stlxr	%w0, %w4, %2\n"
+"	stxr	%w0, %w4, %2\n"
 "	cbnz	%w0, 1b\n"
 "2:"
 	: "=&r" (tmp), "=&r" (oldval), "+Q" (ptr->counter)
 	: "Ir" (old), "r" (new)
-	: "cc", "memory");
+	: "cc");
 
+	smp_mb();
 	return oldval;
 }
 
@@ -173,8 +176,7 @@ static inline void atomic64_add(u64 i, atomic64_t *v)
 "	stxr	%w1, %0, %2\n"
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
-	: "Ir" (i)
-	: "cc");
+	: "Ir" (i));
 }
 
 static inline long atomic64_add_return(long i, atomic64_t *v)
@@ -183,14 +185,15 @@ static inline long atomic64_add_return(long i, atomic64_t *v)
 	unsigned long tmp;
 
 	asm volatile("// atomic64_add_return\n"
-"1:	ldaxr	%0, %2\n"
+"1:	ldxr	%0, %2\n"
 "	add	%0, %0, %3\n"
 "	stlxr	%w1, %0, %2\n"
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
 	: "Ir" (i)
-	: "cc", "memory");
+	: "memory");
 
+	smp_mb();
 	return result;
 }
 
@@ -205,8 +208,7 @@ static inline void atomic64_sub(u64 i, atomic64_t *v)
 "	stxr	%w1, %0, %2\n"
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
-	: "Ir" (i)
-	: "cc");
+	: "Ir" (i));
 }
 
 static inline long atomic64_sub_return(long i, atomic64_t *v)
@@ -215,14 +217,15 @@ static inline long atomic64_sub_return(long i, atomic64_t *v)
 	unsigned long tmp;
 
 	asm volatile("// atomic64_sub_return\n"
-"1:	ldaxr	%0, %2\n"
+"1:	ldxr	%0, %2\n"
 "	sub	%0, %0, %3\n"
 "	stlxr	%w1, %0, %2\n"
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
 	: "Ir" (i)
-	: "cc", "memory");
+	: "memory");
 
+	smp_mb();
 	return result;
 }
 
@@ -231,17 +234,20 @@ static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new)
 	long oldval;
 	unsigned long res;
 
+	smp_mb();
+
 	asm volatile("// atomic64_cmpxchg\n"
-"1:	ldaxr	%1, %2\n"
+"1:	ldxr	%1, %2\n"
 "	cmp	%1, %3\n"
 "	b.ne	2f\n"
-"	stlxr	%w0, %4, %2\n"
+"	stxr	%w0, %4, %2\n"
 "	cbnz	%w0, 1b\n"
 "2:"
 	: "=&r" (res), "=&r" (oldval), "+Q" (ptr->counter)
 	: "Ir" (old), "r" (new)
-	: "cc", "memory");
+	: "cc");
 
+	smp_mb();
 	return oldval;
 }
 
@@ -253,11 +259,12 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 	unsigned long tmp;
 
 	asm volatile("// atomic64_dec_if_positive\n"
-"1:	ldaxr	%0, %2\n"
+"1:	ldxr	%0, %2\n"
 "	subs	%0, %0, #1\n"
 "	b.mi	2f\n"
 "	stlxr	%w1, %0, %2\n"
 "	cbnz	%w1, 1b\n"
+"	dmb	ish\n"
 "2:"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
 	:
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 78e20ba8806b..409ca370cfe2 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -25,7 +25,7 @@
 #define wfi()		asm volatile("wfi" : : : "memory")
 
 #define isb()		asm volatile("isb" : : : "memory")
-#define dsb()		asm volatile("dsb sy" : : : "memory")
+#define dsb(opt)	asm volatile("dsb sy" : : : "memory")
 
 #define mb()		dsb()
 #define rmb()		asm volatile("dsb ld" : : : "memory")
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index fea9ee327206..889324981aa4 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -116,6 +116,7 @@ extern void flush_dcache_page(struct page *);
 static inline void __flush_icache_all(void)
 {
 	asm("ic	ialluis");
+	dsb();
 }
 
 #define flush_dcache_mmap_lock(mapping) \
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 56166d7f4a25..57c0fa7bf711 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -29,44 +29,45 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 	switch (size) {
 	case 1:
 		asm volatile("//	__xchg1\n"
-		"1:	ldaxrb	%w0, %2\n"
+		"1:	ldxrb	%w0, %2\n"
 		"	stlxrb	%w1, %w3, %2\n"
 		"	cbnz	%w1, 1b\n"
 			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr)
 			: "r" (x)
-			: "cc", "memory");
+			: "memory");
 		break;
 	case 2:
 		asm volatile("//	__xchg2\n"
-		"1:	ldaxrh	%w0, %2\n"
+		"1:	ldxrh	%w0, %2\n"
 		"	stlxrh	%w1, %w3, %2\n"
 		"	cbnz	%w1, 1b\n"
 			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr)
 			: "r" (x)
-			: "cc", "memory");
+			: "memory");
 		break;
 	case 4:
 		asm volatile("//	__xchg4\n"
-		"1:	ldaxr	%w0, %2\n"
+		"1:	ldxr	%w0, %2\n"
 		"	stlxr	%w1, %w3, %2\n"
 		"	cbnz	%w1, 1b\n"
 			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr)
 			: "r" (x)
-			: "cc", "memory");
+			: "memory");
 		break;
 	case 8:
 		asm volatile("//	__xchg8\n"
-		"1:	ldaxr	%0, %2\n"
+		"1:	ldxr	%0, %2\n"
 		"	stlxr	%w1, %3, %2\n"
 		"	cbnz	%w1, 1b\n"
 			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr)
 			: "r" (x)
-			: "cc", "memory");
+			: "memory");
 		break;
 	default:
 		BUILD_BUG();
 	}
 
+	smp_mb();
 	return ret;
 }
 
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 78834123a32e..c4a7f940b387 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -42,7 +42,7 @@
 #define ESR_EL1_EC_SP_ALIGN	(0x26)
 #define ESR_EL1_EC_FP_EXC32	(0x28)
 #define ESR_EL1_EC_FP_EXC64	(0x2C)
-#define ESR_EL1_EC_SERRROR	(0x2F)
+#define ESR_EL1_EC_SERROR	(0x2F)
 #define ESR_EL1_EC_BREAKPT_EL0	(0x30)
 #define ESR_EL1_EC_BREAKPT_EL1	(0x31)
 #define ESR_EL1_EC_SOFTSTP_EL0	(0x32)
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 78cc3aba5d69..5f750dc96e0f 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -24,10 +24,11 @@
 
 #define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg)		\
 	asm volatile(							\
-"1:	ldaxr	%w1, %2\n"						\
+"1:	ldxr	%w1, %2\n"						\
 	insn "\n"							\
 "2:	stlxr	%w3, %w0, %2\n"						\
 "	cbnz	%w3, 1b\n"						\
+"	dmb	ish\n"							\
 "3:\n"									\
 "	.pushsection .fixup,\"ax\"\n"					\
 "	.align	2\n"							\
@@ -40,7 +41,7 @@
 "	.popsection\n"							\
 	: "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp)	\
 	: "r" (oparg), "Ir" (-EFAULT)					\
-	: "cc", "memory")
+	: "memory")
 
 static inline int
 futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
@@ -111,11 +112,12 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 		return -EFAULT;
 
 	asm volatile("// futex_atomic_cmpxchg_inatomic\n"
-"1:	ldaxr	%w1, %2\n"
+"1:	ldxr	%w1, %2\n"
 "	sub	%w3, %w1, %w4\n"
 "	cbnz	%w3, 3f\n"
 "2:	stlxr	%w3, %w5, %2\n"
 "	cbnz	%w3, 1b\n"
+"	dmb	ish\n"
 "3:\n"
 "	.pushsection .fixup,\"ax\"\n"
 "4:	mov	%w0, %w6\n"
@@ -127,7 +129,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 "	.popsection\n"
 	: "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp)
 	: "r" (oldval), "r" (newval), "Ir" (-EFAULT)
-	: "cc", "memory");
+	: "memory");
 
 	*uval = val;
 	return ret;
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index c98ef4771c73..0eb398655378 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -231,7 +231,7 @@
 #define ESR_EL2_EC_SP_ALIGN	(0x26)
 #define ESR_EL2_EC_FP_EXC32	(0x28)
 #define ESR_EL2_EC_FP_EXC64	(0x2C)
-#define ESR_EL2_EC_SERRROR	(0x2F)
+#define ESR_EL2_EC_SERROR	(0x2F)
 #define ESR_EL2_EC_BREAKPT	(0x30)
 #define ESR_EL2_EC_BREAKPT_HYP	(0x31)
 #define ESR_EL2_EC_SOFTSTP	(0x32)
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 13fb0b3efc5f..453a179469a3 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -16,6 +16,8 @@
 #ifndef __ASM_PERCPU_H
 #define __ASM_PERCPU_H
 
+#ifdef CONFIG_SMP
+
 static inline void set_my_cpu_offset(unsigned long off)
 {
 	asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory");
@@ -36,6 +38,12 @@ static inline unsigned long __my_cpu_offset(void)
 }
 #define __my_cpu_offset __my_cpu_offset()
 
+#else	/* !CONFIG_SMP */
+
+#define set_my_cpu_offset(x)	do { } while (0)
+
+#endif /* CONFIG_SMP */
+
 #include <asm-generic/percpu.h>
 
 #endif /* __ASM_PERCPU_H */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index b524dcd17243..aa3917c8b623 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -136,11 +136,11 @@ extern struct page *empty_zero_page;
 /*
  * The following only work if pte_present(). Undefined behaviour otherwise.
  */
-#define pte_present(pte)	(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE))
-#define pte_dirty(pte)		(pte_val(pte) & PTE_DIRTY)
-#define pte_young(pte)		(pte_val(pte) & PTE_AF)
-#define pte_special(pte)	(pte_val(pte) & PTE_SPECIAL)
-#define pte_write(pte)		(pte_val(pte) & PTE_WRITE)
+#define pte_present(pte)	(!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)))
+#define pte_dirty(pte)		(!!(pte_val(pte) & PTE_DIRTY))
+#define pte_young(pte)		(!!(pte_val(pte) & PTE_AF))
+#define pte_special(pte)	(!!(pte_val(pte) & PTE_SPECIAL))
+#define pte_write(pte)		(!!(pte_val(pte) & PTE_WRITE))
 #define pte_exec(pte)		(!(pte_val(pte) & PTE_UXN))
 
 #define pte_valid_user(pte) \
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index 3d5cf064d7a1..c45b7b1b7197 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -132,7 +132,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
 	"	cbnz	%w0, 2b\n"
 	: "=&r" (tmp), "+Q" (rw->lock)
 	: "r" (0x80000000)
-	: "cc", "memory");
+	: "memory");
 }
 
 static inline int arch_write_trylock(arch_rwlock_t *rw)
@@ -146,7 +146,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
 	"1:\n"
 	: "=&r" (tmp), "+Q" (rw->lock)
 	: "r" (0x80000000)
-	: "cc", "memory");
+	: "memory");
 
 	return !tmp;
 }
@@ -187,7 +187,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
 	"	cbnz	%w1, 2b\n"
 	: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
 	:
-	: "cc", "memory");
+	: "memory");
 }
 
 static inline void arch_read_unlock(arch_rwlock_t *rw)
@@ -201,7 +201,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
 	"	cbnz	%w1, 1b\n"
 	: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
 	:
-	: "cc", "memory");
+	: "memory");
 }
 
 static inline int arch_read_trylock(arch_rwlock_t *rw)
@@ -216,7 +216,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
 	"1:\n"
 	: "=&r" (tmp), "+r" (tmp2), "+Q" (rw->lock)
 	:
-	: "cc", "memory");
+	: "memory");
 
 	return !tmp2;
 }
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 58125bf008d3..bb8eb8a78e67 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -399,7 +399,10 @@ __SYSCALL(374, compat_sys_sendmmsg)
 __SYSCALL(375, sys_setns)
 __SYSCALL(376, compat_sys_process_vm_readv)
 __SYSCALL(377, compat_sys_process_vm_writev)
-__SYSCALL(378, sys_ni_syscall)			/* 378 for kcmp */
+__SYSCALL(378, sys_kcmp)
+__SYSCALL(379, sys_finit_module)
+__SYSCALL(380, sys_sched_setattr)
+__SYSCALL(381, sys_sched_getattr)
 
 #define __NR_compat_syscalls		379
 
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 495ab6f84a61..eaf54a30bedc 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -148,6 +148,15 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_TIMER_CNT		ARM64_SYS_REG(3, 3, 14, 3, 2)
 #define KVM_REG_ARM_TIMER_CVAL		ARM64_SYS_REG(3, 3, 14, 0, 2)
 
+/* Device Control API: ARM VGIC */
+#define KVM_DEV_ARM_VGIC_GRP_ADDR	0
+#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS	1
+#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS	2
+#define   KVM_DEV_ARM_VGIC_CPUID_SHIFT	32
+#define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
+#define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
 #define KVM_ARM_IRQ_TYPE_MASK		0xff
diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S
index 63c48ffdf230..7787208e8cc6 100644
--- a/arch/arm64/kernel/kuser32.S
+++ b/arch/arm64/kernel/kuser32.S
@@ -38,12 +38,13 @@ __kuser_cmpxchg64:			// 0xffff0f60
 	.inst	0xe92d00f0		//	push		{r4, r5, r6, r7}
 	.inst	0xe1c040d0		//	ldrd		r4, r5, [r0]
 	.inst	0xe1c160d0		//	ldrd		r6, r7, [r1]
-	.inst	0xe1b20e9f		// 1:	ldaexd		r0, r1, [r2]
+	.inst	0xe1b20f9f		// 1:	ldrexd		r0, r1, [r2]
 	.inst	0xe0303004		//	eors		r3, r0, r4
 	.inst	0x00313005		//	eoreqs		r3, r1, r5
 	.inst	0x01a23e96		//	stlexdeq	r3, r6, [r2]
 	.inst	0x03330001		//	teqeq		r3, #1
 	.inst	0x0afffff9		//	beq		1b
+	.inst	0xf57ff05b		//	dmb		ish
 	.inst	0xe2730000		//	rsbs		r0, r3, #0
 	.inst	0xe8bd00f0		//	pop		{r4, r5, r6, r7}
 	.inst	0xe12fff1e		//	bx		lr
@@ -55,11 +56,12 @@ __kuser_memory_barrier:			// 0xffff0fa0
 
 	.align	5
 __kuser_cmpxchg:			// 0xffff0fc0
-	.inst	0xe1923e9f		// 1:	ldaex		r3, [r2]
+	.inst	0xe1923f9f		// 1:	ldrex		r3, [r2]
 	.inst	0xe0533000		//	subs		r3, r3, r0
 	.inst	0x01823e91		//	stlexeq		r3, r1, [r2]
 	.inst	0x03330001		//	teqeq		r3, #1
 	.inst	0x0afffffa		//	beq		1b
+	.inst	0xf57ff05b		//	dmb		ish
 	.inst	0xe2730000		//	rsbs		r0, r3, #0
 	.inst	0xe12fff1e		//	bx		lr
 
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index c3b6c63ea5fb..38f0558f0c0a 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -48,7 +48,11 @@ int unwind_frame(struct stackframe *frame)
 
 	frame->sp = fp + 0x10;
 	frame->fp = *(unsigned long *)(fp);
-	frame->pc = *(unsigned long *)(fp + 8);
+	/*
+	 * -4 here because we care about the PC at time of bl,
+	 * not where the return will go.
+	 */
+	frame->pc = *(unsigned long *)(fp + 8) - 4;
 
 	return 0;
 }
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 65d40cf6945a..a7149cae1615 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -238,6 +238,8 @@ void update_vsyscall(struct timekeeper *tk)
 	vdso_data->use_syscall			= use_syscall;
 	vdso_data->xtime_coarse_sec		= xtime_coarse.tv_sec;
 	vdso_data->xtime_coarse_nsec		= xtime_coarse.tv_nsec;
+	vdso_data->wtm_clock_sec		= tk->wall_to_monotonic.tv_sec;
+	vdso_data->wtm_clock_nsec		= tk->wall_to_monotonic.tv_nsec;
 
 	if (!use_syscall) {
 		vdso_data->cs_cycle_last	= tk->clock->cycle_last;
@@ -245,8 +247,6 @@ void update_vsyscall(struct timekeeper *tk)
 		vdso_data->xtime_clock_nsec	= tk->xtime_nsec;
 		vdso_data->cs_mult		= tk->mult;
 		vdso_data->cs_shift		= tk->shift;
-		vdso_data->wtm_clock_sec	= tk->wall_to_monotonic.tv_sec;
-		vdso_data->wtm_clock_nsec	= tk->wall_to_monotonic.tv_nsec;
 	}
 
 	smp_wmb();
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index d8064af42e62..6d20b7d162d8 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -48,7 +48,7 @@ $(obj-vdso): %.o: %.S
 
 # Actual build commands
 quiet_cmd_vdsold = VDSOL $@
-      cmd_vdsold = $(CC) $(c_flags) -Wl,-T $^ -o $@
+      cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@
 quiet_cmd_vdsoas = VDSOA $@
       cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $<
 
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index f0a6d10b5211..fe652ffd34c2 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -103,6 +103,8 @@ ENTRY(__kernel_clock_gettime)
 	bl	__do_get_tspec
 	seqcnt_check w9, 1b
 
+	mov	x30, x2
+
 	cmp	w0, #CLOCK_MONOTONIC
 	b.ne	6f
 
@@ -118,6 +120,9 @@ ENTRY(__kernel_clock_gettime)
 	ccmp	w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne
 	b.ne	8f
 
+	/* xtime_coarse_nsec is already right-shifted */
+	mov	x12, #0
+
 	/* Get coarse timespec. */
 	adr	vdso_data, _vdso_data
 3:	seqcnt_acquire
@@ -156,7 +161,7 @@ ENTRY(__kernel_clock_gettime)
 	lsr	x11, x11, x12
 	stp	x10, x11, [x1, #TSPEC_TV_SEC]
 	mov	x0, xzr
-	ret	x2
+	ret
 7:
 	mov	x30, x2
 8:	/* Syscall fallback. */
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 3b47c36e10ff..2c56012cb2d2 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -694,6 +694,24 @@ __hyp_panic_str:
 
 	.align	2
 
+/*
+ * u64 kvm_call_hyp(void *hypfn, ...);
+ *
+ * This is not really a variadic function in the classic C-way and care must
+ * be taken when calling this to ensure parameters are passed in registers
+ * only, since the stack will change between the caller and the callee.
+ *
+ * Call the function with the first argument containing a pointer to the
+ * function you wish to call in Hyp mode, and subsequent arguments will be
+ * passed as x0, x1, and x2 (a maximum of 3 arguments in addition to the
+ * function pointer can be passed).  The function being called must be mapped
+ * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
+ * passed in r0 and r1.
+ *
+ * A function pointer with a value of 0 has a special meaning, and is
+ * used to implement __hyp_get_vectors in the same way as in
+ * arch/arm64/kernel/hyp_stub.S.
+ */
 ENTRY(kvm_call_hyp)
 	hvc	#0
 	ret
@@ -737,7 +755,12 @@ el1_sync:					// Guest trapped into EL2
 	pop	x2, x3
 	pop	x0, x1
 
-	push	lr, xzr
+	/* Check for __hyp_get_vectors */
+	cbnz	x0, 1f
+	mrs	x0, vbar_el2
+	b	2f
+
+1:	push	lr, xzr
 
 	/*
 	 * Compute the function address in EL2, and shuffle the parameters.
@@ -750,7 +773,7 @@ el1_sync:					// Guest trapped into EL2
 	blr	lr
 
 	pop	lr, xzr
-	eret
+2:	eret
 
 el1_trap:
 	/*
diff --git a/arch/arm64/lib/bitops.S b/arch/arm64/lib/bitops.S
index e5db797790d3..7dac371cc9a2 100644
--- a/arch/arm64/lib/bitops.S
+++ b/arch/arm64/lib/bitops.S
@@ -46,11 +46,12 @@ ENTRY(	\name	)
 	mov	x2, #1
 	add	x1, x1, x0, lsr #3	// Get word offset
 	lsl	x4, x2, x3		// Create mask
-1:	ldaxr	x2, [x1]
+1:	ldxr	x2, [x1]
 	lsr	x0, x2, x3		// Save old value of bit
 	\instr	x2, x2, x4		// toggle bit
 	stlxr	w5, x2, [x1]
 	cbnz	w5, 1b
+	dmb	ish
 	and	x0, x0, #1
 3:	ret
 ENDPROC(\name	)
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 45b5ab54c9ee..fbd76785c5db 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -45,6 +45,7 @@ static void *arm64_swiotlb_alloc_coherent(struct device *dev, size_t size,
 	if (IS_ENABLED(CONFIG_DMA_CMA)) {
 		struct page *page;
 
+		size = PAGE_ALIGN(size);
 		page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
 							get_order(size));
 		if (!page)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index f557ebbe7013..f8dc7e8fce6f 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -203,10 +203,18 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
 	do {
 		next = pmd_addr_end(addr, end);
 		/* try section mapping first */
-		if (((addr | next | phys) & ~SECTION_MASK) == 0)
+		if (((addr | next | phys) & ~SECTION_MASK) == 0) {
+			pmd_t old_pmd =*pmd;
 			set_pmd(pmd, __pmd(phys | prot_sect_kernel));
-		else
+			/*
+			 * Check for previous table entries created during
+			 * boot (__create_page_tables) and flush them.
+			 */
+			if (!pmd_none(old_pmd))
+				flush_tlb_all();
+		} else {
 			alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys));
+		}
 		phys += next - addr;
 	} while (pmd++, addr = next, addr != end);
 }
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index 7083cdada657..62c6101df260 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -32,17 +32,10 @@
 
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	pgd_t *new_pgd;
-
 	if (PGD_SIZE == PAGE_SIZE)
-		new_pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL);
+		return (pgd_t *)get_zeroed_page(GFP_KERNEL);
 	else
-		new_pgd = kzalloc(PGD_SIZE, GFP_KERNEL);
-
-	if (!new_pgd)
-		return NULL;
-
-	return new_pgd;
+		return kzalloc(PGD_SIZE, GFP_KERNEL);
 }
 
 void pgd_free(struct mm_struct *mm, pgd_t *pgd)
diff --git a/arch/avr32/Makefile b/arch/avr32/Makefile
index 22fb66590dcd..dba48a5d5bb9 100644
--- a/arch/avr32/Makefile
+++ b/arch/avr32/Makefile
@@ -11,7 +11,7 @@ all: uImage vmlinux.elf
 
 KBUILD_DEFCONFIG	:= atstk1002_defconfig
 
-KBUILD_CFLAGS	+= -pipe -fno-builtin -mno-pic
+KBUILD_CFLAGS	+= -pipe -fno-builtin -mno-pic -D__linux__
 KBUILD_AFLAGS	+= -mrelax -mno-pic
 KBUILD_CFLAGS_MODULE += -mno-relax
 LDFLAGS_vmlinux	+= --relax
diff --git a/arch/avr32/boards/mimc200/fram.c b/arch/avr32/boards/mimc200/fram.c
index 9764a1a1073e..c1466a872b9c 100644
--- a/arch/avr32/boards/mimc200/fram.c
+++ b/arch/avr32/boards/mimc200/fram.c
@@ -11,6 +11,7 @@
 #define FRAM_VERSION	"1.0"
 
 #include <linux/miscdevice.h>
+#include <linux/module.h>
 #include <linux/proc_fs.h>
 #include <linux/mm.h>
 #include <linux/io.h>
diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild
index cfb9fe1b8df9..c7c64a63c29f 100644
--- a/arch/avr32/include/asm/Kbuild
+++ b/arch/avr32/include/asm/Kbuild
@@ -17,5 +17,6 @@ generic-y       += scatterlist.h
 generic-y       += sections.h
 generic-y       += topology.h
 generic-y	+= trace_clock.h
+generic-y += vga.h
 generic-y       += xor.h
 generic-y	+= hash.h
diff --git a/arch/avr32/include/asm/io.h b/arch/avr32/include/asm/io.h
index fc6483f83ccc..4f5ec2bb7172 100644
--- a/arch/avr32/include/asm/io.h
+++ b/arch/avr32/include/asm/io.h
@@ -295,6 +295,8 @@ extern void __iounmap(void __iomem *addr);
 #define iounmap(addr)				\
 	__iounmap(addr)
 
+#define ioremap_wc ioremap_nocache
+
 #define cached(addr) P1SEGADDR(addr)
 #define uncached(addr) P2SEGADDR(addr)
 
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index afd45e0d552e..ae763d8bf55a 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h
@@ -11,7 +11,7 @@
 
 
 
-#define NR_syscalls			312 /* length of syscall table */
+#define NR_syscalls			314 /* length of syscall table */
 
 /*
  * The following defines stop scripts/checksyscalls.sh from complaining about
diff --git a/arch/ia64/include/uapi/asm/unistd.h b/arch/ia64/include/uapi/asm/unistd.h
index 34fd6fe46da1..715e85f858de 100644
--- a/arch/ia64/include/uapi/asm/unistd.h
+++ b/arch/ia64/include/uapi/asm/unistd.h
@@ -325,5 +325,7 @@
 #define __NR_process_vm_writev		1333
 #define __NR_accept4			1334
 #define __NR_finit_module		1335
+#define __NR_sched_setattr		1336
+#define __NR_sched_getattr		1337
 
 #endif /* _UAPI_ASM_IA64_UNISTD_H */
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index ddea607f948a..fa8d61a312a7 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1773,6 +1773,8 @@ sys_call_table:
 	data8 sys_process_vm_writev
 	data8 sys_accept4
 	data8 sys_finit_module			// 1335
+	data8 sys_sched_setattr
+	data8 sys_sched_getattr
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
 #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index 7cc8c364924d..6fb9e813a910 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -1,4 +1,4 @@
-
+generic-y += barrier.h
 generic-y += bitsperlong.h
 generic-y += clkdev.h
 generic-y += cputime.h
@@ -6,6 +6,7 @@ generic-y += device.h
 generic-y += emergency-restart.h
 generic-y += errno.h
 generic-y += exec.h
+generic-y += hash.h
 generic-y += hw_irq.h
 generic-y += ioctl.h
 generic-y += ipcbuf.h
@@ -18,6 +19,7 @@ generic-y += local.h
 generic-y += mman.h
 generic-y += mutex.h
 generic-y += percpu.h
+generic-y += preempt.h
 generic-y += resource.h
 generic-y += scatterlist.h
 generic-y += sections.h
@@ -31,5 +33,3 @@ generic-y += trace_clock.h
 generic-y += types.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += preempt.h
-generic-y += hash.h
diff --git a/arch/m68k/include/asm/barrier.h b/arch/m68k/include/asm/barrier.h
deleted file mode 100644
index 15c5f77c1614..000000000000
--- a/arch/m68k/include/asm/barrier.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _M68K_BARRIER_H
-#define _M68K_BARRIER_H
-
-#define nop()		do { asm volatile ("nop"); barrier(); } while (0)
-
-#include <asm-generic/barrier.h>
-
-#endif /* _M68K_BARRIER_H */
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index 014f288fc813..9d38b73989eb 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -4,7 +4,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#define NR_syscalls		349
+#define NR_syscalls		351
 
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_OLD_STAT
diff --git a/arch/m68k/include/uapi/asm/unistd.h b/arch/m68k/include/uapi/asm/unistd.h
index 625f321001dc..b932dd470041 100644
--- a/arch/m68k/include/uapi/asm/unistd.h
+++ b/arch/m68k/include/uapi/asm/unistd.h
@@ -354,5 +354,7 @@
 #define __NR_process_vm_writev	346
 #define __NR_kcmp		347
 #define __NR_finit_module	348
+#define __NR_sched_setattr	349
+#define __NR_sched_getattr	350
 
 #endif /* _UAPI_ASM_M68K_UNISTD_H_ */
diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S
index 3f04ea0ab802..b6223dc41d82 100644
--- a/arch/m68k/kernel/syscalltable.S
+++ b/arch/m68k/kernel/syscalltable.S
@@ -369,4 +369,6 @@ ENTRY(sys_call_table)
 	.long sys_process_vm_writev
 	.long sys_kcmp
 	.long sys_finit_module
+	.long sys_sched_setattr
+	.long sys_sched_getattr		/* 350 */
 
diff --git a/arch/microblaze/include/asm/delay.h b/arch/microblaze/include/asm/delay.h
index 05b7d39e4391..66fc24c24238 100644
--- a/arch/microblaze/include/asm/delay.h
+++ b/arch/microblaze/include/asm/delay.h
@@ -13,6 +13,8 @@
 #ifndef _ASM_MICROBLAZE_DELAY_H
 #define _ASM_MICROBLAZE_DELAY_H
 
+#include <linux/param.h>
+
 extern inline void __delay(unsigned long loops)
 {
 	asm volatile ("# __delay		\n\t"		\
diff --git a/arch/microblaze/include/asm/io.h b/arch/microblaze/include/asm/io.h
index a2cea7206077..3fbb7f1db3bc 100644
--- a/arch/microblaze/include/asm/io.h
+++ b/arch/microblaze/include/asm/io.h
@@ -89,6 +89,11 @@ static inline unsigned int readl(const volatile void __iomem *addr)
 {
 	return le32_to_cpu(*(volatile unsigned int __force *)addr);
 }
+#define readq readq
+static inline u64 readq(const volatile void __iomem *addr)
+{
+	return le64_to_cpu(__raw_readq(addr));
+}
 static inline void writeb(unsigned char v, volatile void __iomem *addr)
 {
 	*(volatile unsigned char __force *)addr = v;
@@ -101,6 +106,7 @@ static inline void writel(unsigned int v, volatile void __iomem *addr)
 {
 	*(volatile unsigned int __force *)addr = cpu_to_le32(v);
 }
+#define writeq(b, addr) __raw_writeq(cpu_to_le64(b), addr)
 
 /* ioread and iowrite variants. thease are for now same as __raw_
  * variants of accessors. we might check for endianess in the feature
diff --git a/arch/microblaze/kernel/head.S b/arch/microblaze/kernel/head.S
index b7fb0438458c..17645b2e2f07 100644
--- a/arch/microblaze/kernel/head.S
+++ b/arch/microblaze/kernel/head.S
@@ -66,7 +66,7 @@ real_start:
 	mts	rmsr, r0
 /* Disable stack protection from bootloader */
 	mts	rslr, r0
-	addi	r8, r0, 0xFFFFFFF
+	addi	r8, r0, 0xFFFFFFFF
 	mts	rshr, r8
 /*
  * According to Xilinx, msrclr instruction behaves like 'mfs rX,rpc'
diff --git a/arch/mips/alchemy/devboards/db1000.c b/arch/mips/alchemy/devboards/db1000.c
index 11f3ad20321c..5483906e0f86 100644
--- a/arch/mips/alchemy/devboards/db1000.c
+++ b/arch/mips/alchemy/devboards/db1000.c
@@ -534,13 +534,10 @@ static int __init db1000_dev_init(void)
 		s0 = AU1100_GPIO1_INT;
 		s1 = AU1100_GPIO4_INT;
 
+		gpio_request(19, "sd0_cd");
+		gpio_request(20, "sd1_cd");
 		gpio_direction_input(19);	/* sd0 cd# */
 		gpio_direction_input(20);	/* sd1 cd# */
-		gpio_direction_input(21);	/* touch pendown# */
-		gpio_direction_input(207);	/* SPI MISO */
-		gpio_direction_output(208, 0);	/* SPI MOSI */
-		gpio_direction_output(209, 1);	/* SPI SCK */
-		gpio_direction_output(210, 1);	/* SPI CS# */
 
 		/* spi_gpio on SSI0 pins */
 		pfc = __raw_readl((void __iomem *)SYS_PINFUNC);
diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index cfe092fc720d..6b9749540edf 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -74,6 +74,8 @@ static inline int __enable_fpu(enum fpu_mode mode)
 	default:
 		BUG();
 	}
+
+	return SIGFPE;
 }
 
 #define __disable_fpu()							\
diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h
index 1dee279f9665..d6e154a9e6a5 100644
--- a/arch/mips/include/uapi/asm/unistd.h
+++ b/arch/mips/include/uapi/asm/unistd.h
@@ -369,16 +369,18 @@
 #define __NR_process_vm_writev		(__NR_Linux + 346)
 #define __NR_kcmp			(__NR_Linux + 347)
 #define __NR_finit_module		(__NR_Linux + 348)
+#define __NR_sched_setattr		(__NR_Linux + 349)
+#define __NR_sched_getattr		(__NR_Linux + 350)
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls		348
+#define __NR_Linux_syscalls		350
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux			4000
-#define __NR_O32_Linux_syscalls		348
+#define __NR_O32_Linux_syscalls		350
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
@@ -695,16 +697,18 @@
 #define __NR_kcmp			(__NR_Linux + 306)
 #define __NR_finit_module		(__NR_Linux + 307)
 #define __NR_getdents64			(__NR_Linux + 308)
+#define __NR_sched_setattr		(__NR_Linux + 309)
+#define __NR_sched_getattr		(__NR_Linux + 310)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls		308
+#define __NR_Linux_syscalls		310
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux			5000
-#define __NR_64_Linux_syscalls		308
+#define __NR_64_Linux_syscalls		310
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
@@ -1025,15 +1029,17 @@
 #define __NR_process_vm_writev		(__NR_Linux + 310)
 #define __NR_kcmp			(__NR_Linux + 311)
 #define __NR_finit_module		(__NR_Linux + 312)
+#define __NR_sched_setattr		(__NR_Linux + 313)
+#define __NR_sched_getattr		(__NR_Linux + 314)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls		312
+#define __NR_Linux_syscalls		314
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux			6000
-#define __NR_N32_Linux_syscalls		312
+#define __NR_N32_Linux_syscalls		314
 
 #endif /* _UAPI_ASM_UNISTD_H */
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index e8e541b40d86..a5b14f48e1af 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -563,3 +563,5 @@ EXPORT(sys_call_table)
 	PTR	sys_process_vm_writev
 	PTR	sys_kcmp
 	PTR	sys_finit_module
+	PTR	sys_sched_setattr
+	PTR	sys_sched_getattr		/* 4350 */
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 57e3742fec59..b56e254beb15 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -425,4 +425,6 @@ EXPORT(sys_call_table)
 	PTR	sys_kcmp
 	PTR	sys_finit_module
 	PTR	sys_getdents64
+	PTR	sys_sched_setattr
+	PTR	sys_sched_getattr		/* 5310 */
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 2f48f5934399..f7e5b72cf481 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -418,4 +418,6 @@ EXPORT(sysn32_call_table)
 	PTR	compat_sys_process_vm_writev	/* 6310 */
 	PTR	sys_kcmp
 	PTR	sys_finit_module
+	PTR	sys_sched_setattr
+	PTR	sys_sched_getattr
 	.size	sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index f1acdb429f4f..6788727d91af 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -541,4 +541,6 @@ EXPORT(sys32_call_table)
 	PTR	compat_sys_process_vm_writev
 	PTR	sys_kcmp
 	PTR	sys_finit_module
+	PTR	sys_sched_setattr
+	PTR	sys_sched_getattr		/* 4350 */
 	.size	sys32_call_table,.-sys32_call_table
diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c
index 88d0962de65a..2bedafea3d94 100644
--- a/arch/parisc/hpux/fs.c
+++ b/arch/parisc/hpux/fs.c
@@ -33,22 +33,9 @@
 
 int hpux_execve(struct pt_regs *regs)
 {
-	int error;
-	struct filename *filename;
-
-	filename = getname((const char __user *) regs->gr[26]);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-
-	error = do_execve(filename->name,
+	return  do_execve(getname((const char __user *) regs->gr[26]),
 			  (const char __user *const __user *) regs->gr[25],
 			  (const char __user *const __user *) regs->gr[24]);
-
-	putname(filename);
-
-out:
-	return error;
 }
 
 struct hpux_dirent {
diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index 84fdf6857c31..a613d2c82fd9 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -200,10 +200,11 @@ static inline void __user *arch_compat_alloc_user_space(long len)
 
 	/*
 	 * We can't access below the stack pointer in the 32bit ABI and
-	 * can access 288 bytes in the 64bit ABI
+	 * can access 288 bytes in the 64bit big-endian ABI,
+	 * or 512 bytes with the new ELFv2 little-endian ABI.
 	 */
 	if (!is_32bit_task())
-		usp -= 288;
+		usp -= USER_REDZONE_SIZE;
 
 	return (void __user *) (usp - len);
 }
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index e27e9ad6818e..150866b2a3fe 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -134,6 +134,7 @@ static inline int dma_supported(struct device *dev, u64 mask)
 }
 
 extern int dma_set_mask(struct device *dev, u64 dma_mask);
+extern int __dma_set_mask(struct device *dev, u64 dma_mask);
 
 #define dma_alloc_coherent(d,s,h,f)	dma_alloc_attrs(d,s,h,f,NULL)
 
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 9e39ceb1d19f..d4dd41fb951b 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -172,10 +172,20 @@ struct eeh_ops {
 };
 
 extern struct eeh_ops *eeh_ops;
-extern int eeh_subsystem_enabled;
+extern bool eeh_subsystem_enabled;
 extern raw_spinlock_t confirm_error_lock;
 extern int eeh_probe_mode;
 
+static inline bool eeh_enabled(void)
+{
+	return eeh_subsystem_enabled;
+}
+
+static inline void eeh_set_enable(bool mode)
+{
+	eeh_subsystem_enabled = mode;
+}
+
 #define EEH_PROBE_MODE_DEV	(1<<0)	/* From PCI device	*/
 #define EEH_PROBE_MODE_DEVTREE	(1<<1)	/* From device tree	*/
 
@@ -246,7 +256,7 @@ void eeh_remove_device(struct pci_dev *);
  * If this macro yields TRUE, the caller relays to eeh_check_failure()
  * which does further tests out of line.
  */
-#define EEH_POSSIBLE_ERROR(val, type)	((val) == (type)~0 && eeh_subsystem_enabled)
+#define EEH_POSSIBLE_ERROR(val, type)	((val) == (type)~0 && eeh_enabled())
 
 /*
  * Reads from a device which has been isolated by EEH will return
@@ -257,6 +267,13 @@ void eeh_remove_device(struct pci_dev *);
 
 #else /* !CONFIG_EEH */
 
+static inline bool eeh_enabled(void)
+{
+        return false;
+}
+
+static inline void eeh_set_enable(bool mode) { }
+
 static inline int eeh_init(void)
 {
 	return 0;
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index d750336b171d..623f2971ce0e 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -127,7 +127,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 					    unsigned long addr, pte_t *ptep)
 {
 #ifdef CONFIG_PPC64
-	return __pte(pte_update(mm, addr, ptep, ~0UL, 1));
+	return __pte(pte_update(mm, addr, ptep, ~0UL, 0, 1));
 #else
 	return __pte(pte_update(ptep, ~0UL, 0));
 #endif
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index f7a8036579b5..42632c7a2a4e 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -77,6 +77,7 @@ struct iommu_table {
 #ifdef CONFIG_IOMMU_API
 	struct iommu_group *it_group;
 #endif
+	void (*set_bypass)(struct iommu_table *tbl, bool enable);
 };
 
 /* Pure 2^n version of get_order */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 40157e2ca691..ed82142a3251 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -816,8 +816,8 @@ int64_t opal_pci_next_error(uint64_t phb_id, uint64_t *first_frozen_pe,
 int64_t opal_pci_poll(uint64_t phb_id);
 int64_t opal_return_cpu(void);
 
-int64_t opal_xscom_read(uint32_t gcid, uint32_t pcb_addr, __be64 *val);
-int64_t opal_xscom_write(uint32_t gcid, uint32_t pcb_addr, uint64_t val);
+int64_t opal_xscom_read(uint32_t gcid, uint64_t pcb_addr, __be64 *val);
+int64_t opal_xscom_write(uint32_t gcid, uint64_t pcb_addr, uint64_t val);
 
 int64_t opal_lpc_write(uint32_t chip_id, enum OpalLPCAddressType addr_type,
 		       uint32_t addr, uint32_t data, uint32_t sz);
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index bc141c950b1e..eb9261024f51 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -195,6 +195,7 @@ extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 static inline unsigned long pte_update(struct mm_struct *mm,
 				       unsigned long addr,
 				       pte_t *ptep, unsigned long clr,
+				       unsigned long set,
 				       int huge)
 {
 #ifdef PTE_ATOMIC_UPDATES
@@ -205,14 +206,15 @@ static inline unsigned long pte_update(struct mm_struct *mm,
 	andi.	%1,%0,%6\n\
 	bne-	1b \n\
 	andc	%1,%0,%4 \n\
+	or	%1,%1,%7\n\
 	stdcx.	%1,0,%3 \n\
 	bne-	1b"
 	: "=&r" (old), "=&r" (tmp), "=m" (*ptep)
-	: "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY)
+	: "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY), "r" (set)
 	: "cc" );
 #else
 	unsigned long old = pte_val(*ptep);
-	*ptep = __pte(old & ~clr);
+	*ptep = __pte((old & ~clr) | set);
 #endif
 	/* huge pages use the old page table lock */
 	if (!huge)
@@ -231,9 +233,9 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
 {
 	unsigned long old;
 
-       	if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
+	if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
 		return 0;
-	old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0);
+	old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
 	return (old & _PAGE_ACCESSED) != 0;
 }
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
@@ -252,7 +254,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 	if ((pte_val(*ptep) & _PAGE_RW) == 0)
 		return;
 
-	pte_update(mm, addr, ptep, _PAGE_RW, 0);
+	pte_update(mm, addr, ptep, _PAGE_RW, 0, 0);
 }
 
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
@@ -261,7 +263,7 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 	if ((pte_val(*ptep) & _PAGE_RW) == 0)
 		return;
 
-	pte_update(mm, addr, ptep, _PAGE_RW, 1);
+	pte_update(mm, addr, ptep, _PAGE_RW, 0, 1);
 }
 
 /*
@@ -284,14 +286,14 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 				       unsigned long addr, pte_t *ptep)
 {
-	unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0);
+	unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
 	return __pte(old);
 }
 
 static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
 			     pte_t * ptep)
 {
-	pte_update(mm, addr, ptep, ~0UL, 0);
+	pte_update(mm, addr, ptep, ~0UL, 0, 0);
 }
 
 
@@ -506,7 +508,9 @@ extern int pmdp_set_access_flags(struct vm_area_struct *vma,
 
 extern unsigned long pmd_hugepage_update(struct mm_struct *mm,
 					 unsigned long addr,
-					 pmd_t *pmdp, unsigned long clr);
+					 pmd_t *pmdp,
+					 unsigned long clr,
+					 unsigned long set);
 
 static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
 					      unsigned long addr, pmd_t *pmdp)
@@ -515,7 +519,7 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
 
 	if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
 		return 0;
-	old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED);
+	old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
 	return ((old & _PAGE_ACCESSED) != 0);
 }
 
@@ -542,7 +546,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 	if ((pmd_val(*pmdp) & _PAGE_RW) == 0)
 		return;
 
-	pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW);
+	pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0);
 }
 
 #define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index f83b6f3e1b39..3ebb188c3ff5 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -75,12 +75,34 @@ static inline pte_t pte_mknuma(pte_t pte)
 	return pte;
 }
 
+#define ptep_set_numa ptep_set_numa
+static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr,
+				 pte_t *ptep)
+{
+	if ((pte_val(*ptep) & _PAGE_PRESENT) == 0)
+		VM_BUG_ON(1);
+
+	pte_update(mm, addr, ptep, _PAGE_PRESENT, _PAGE_NUMA, 0);
+	return;
+}
+
 #define pmd_numa pmd_numa
 static inline int pmd_numa(pmd_t pmd)
 {
 	return pte_numa(pmd_pte(pmd));
 }
 
+#define pmdp_set_numa pmdp_set_numa
+static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr,
+				 pmd_t *pmdp)
+{
+	if ((pmd_val(*pmdp) & _PAGE_PRESENT) == 0)
+		VM_BUG_ON(1);
+
+	pmd_hugepage_update(mm, addr, pmdp, _PAGE_PRESENT, _PAGE_NUMA);
+	return;
+}
+
 #define pmd_mknonnuma pmd_mknonnuma
 static inline pmd_t pmd_mknonnuma(pmd_t pmd)
 {
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index becc08e6a65c..279b80f3bb29 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -28,11 +28,23 @@
 
 #ifdef __powerpc64__
 
+/*
+ * Size of redzone that userspace is allowed to use below the stack
+ * pointer.  This is 288 in the 64-bit big-endian ELF ABI, and 512 in
+ * the new ELFv2 little-endian ABI, so we allow the larger amount.
+ *
+ * For kernel code we allow a 288-byte redzone, in order to conserve
+ * kernel stack space; gcc currently only uses 288 bytes, and will
+ * hopefully allow explicit control of the redzone size in future.
+ */
+#define USER_REDZONE_SIZE	512
+#define KERNEL_REDZONE_SIZE	288
+
 #define STACK_FRAME_OVERHEAD	112	/* size of minimum stack frame */
 #define STACK_FRAME_LR_SAVE	2	/* Location of LR in stack frame */
 #define STACK_FRAME_REGS_MARKER	ASM_CONST(0x7265677368657265)
 #define STACK_INT_FRAME_SIZE	(sizeof(struct pt_regs) + \
-					STACK_FRAME_OVERHEAD + 288)
+				 STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE)
 #define STACK_FRAME_MARKER	12
 
 /* Size of dummy stack frame allocated when calling signal handler. */
@@ -41,6 +53,8 @@
 
 #else /* __powerpc64__ */
 
+#define USER_REDZONE_SIZE	0
+#define KERNEL_REDZONE_SIZE	0
 #define STACK_FRAME_OVERHEAD	16	/* size of minimum stack frame */
 #define STACK_FRAME_LR_SAVE	1	/* Location of LR in stack frame */
 #define STACK_FRAME_REGS_MARKER	ASM_CONST(0x72656773)
diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h
index 4ee06fe15de4..d0e784e0ff48 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -8,6 +8,7 @@
 
 #ifdef __powerpc64__
 
+extern char __start_interrupts[];
 extern char __end_interrupts[];
 
 extern char __prom_init_toc_start[];
@@ -21,6 +22,17 @@ static inline int in_kernel_text(unsigned long addr)
 	return 0;
 }
 
+static inline int overlaps_interrupt_vector_text(unsigned long start,
+							unsigned long end)
+{
+	unsigned long real_start, real_end;
+	real_start = __start_interrupts - _stext;
+	real_end = __end_interrupts - _stext;
+
+	return start < (unsigned long)__va(real_end) &&
+		(unsigned long)__va(real_start) < end;
+}
+
 static inline int overlaps_kernel_text(unsigned long start, unsigned long end)
 {
 	return start < (unsigned long)__init_end &&
diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h
index 0d9cecddf8a4..c53f5f6d1761 100644
--- a/arch/powerpc/include/asm/vdso.h
+++ b/arch/powerpc/include/asm/vdso.h
@@ -4,11 +4,11 @@
 #ifdef __KERNEL__
 
 /* Default link addresses for the vDSOs */
-#define VDSO32_LBASE	0x100000
-#define VDSO64_LBASE	0x100000
+#define VDSO32_LBASE	0x0
+#define VDSO64_LBASE	0x0
 
 /* Default map addresses for 32bit vDSO */
-#define VDSO32_MBASE	VDSO32_LBASE
+#define VDSO32_MBASE	0x100000
 
 #define VDSO_VERSION_STRING	LINUX_2.6.15
 
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 11c1d069d920..7a13f378ca2c 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -98,17 +98,19 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
 			size_t csize, unsigned long offset, int userbuf)
 {
 	void  *vaddr;
+	phys_addr_t paddr;
 
 	if (!csize)
 		return 0;
 
 	csize = min_t(size_t, csize, PAGE_SIZE);
+	paddr = pfn << PAGE_SHIFT;
 
-	if ((min_low_pfn < pfn) && (pfn < max_pfn)) {
-		vaddr = __va(pfn << PAGE_SHIFT);
+	if (memblock_is_region_memory(paddr, csize)) {
+		vaddr = __va(paddr);
 		csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
 	} else {
-		vaddr = __ioremap(pfn << PAGE_SHIFT, PAGE_SIZE, 0);
+		vaddr = __ioremap(paddr, PAGE_SIZE, 0);
 		csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
 		iounmap(vaddr);
 	}
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 8032b97ccdcb..ee78f6e49d64 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -191,12 +191,10 @@ EXPORT_SYMBOL(dma_direct_ops);
 
 #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
 
-int dma_set_mask(struct device *dev, u64 dma_mask)
+int __dma_set_mask(struct device *dev, u64 dma_mask)
 {
 	struct dma_map_ops *dma_ops = get_dma_ops(dev);
 
-	if (ppc_md.dma_set_mask)
-		return ppc_md.dma_set_mask(dev, dma_mask);
 	if ((dma_ops != NULL) && (dma_ops->set_dma_mask != NULL))
 		return dma_ops->set_dma_mask(dev, dma_mask);
 	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
@@ -204,6 +202,12 @@ int dma_set_mask(struct device *dev, u64 dma_mask)
 	*dev->dma_mask = dma_mask;
 	return 0;
 }
+int dma_set_mask(struct device *dev, u64 dma_mask)
+{
+	if (ppc_md.dma_set_mask)
+		return ppc_md.dma_set_mask(dev, dma_mask);
+	return __dma_set_mask(dev, dma_mask);
+}
 EXPORT_SYMBOL(dma_set_mask);
 
 u64 dma_get_required_mask(struct device *dev)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 148db72a8c43..e7b76a6bf150 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -28,6 +28,7 @@
 #include <linux/pci.h>
 #include <linux/proc_fs.h>
 #include <linux/rbtree.h>
+#include <linux/reboot.h>
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
 #include <linux/export.h>
@@ -89,7 +90,7 @@
 /* Platform dependent EEH operations */
 struct eeh_ops *eeh_ops = NULL;
 
-int eeh_subsystem_enabled;
+bool eeh_subsystem_enabled = false;
 EXPORT_SYMBOL(eeh_subsystem_enabled);
 
 /*
@@ -364,7 +365,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
 
 	eeh_stats.total_mmio_ffs++;
 
-	if (!eeh_subsystem_enabled)
+	if (!eeh_enabled())
 		return 0;
 
 	if (!edev) {
@@ -747,6 +748,17 @@ int __exit eeh_ops_unregister(const char *name)
 	return -EEXIST;
 }
 
+static int eeh_reboot_notifier(struct notifier_block *nb,
+			       unsigned long action, void *unused)
+{
+	eeh_set_enable(false);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block eeh_reboot_nb = {
+	.notifier_call = eeh_reboot_notifier,
+};
+
 /**
  * eeh_init - EEH initialization
  *
@@ -778,6 +790,14 @@ int eeh_init(void)
 	if (machine_is(powernv) && cnt++ <= 0)
 		return ret;
 
+	/* Register reboot notifier */
+	ret = register_reboot_notifier(&eeh_reboot_nb);
+	if (ret) {
+		pr_warn("%s: Failed to register notifier (%d)\n",
+			__func__, ret);
+		return ret;
+	}
+
 	/* call platform initialization function */
 	if (!eeh_ops) {
 		pr_warning("%s: Platform EEH operation not found\n",
@@ -822,7 +842,7 @@ int eeh_init(void)
 			return ret;
 	}
 
-	if (eeh_subsystem_enabled)
+	if (eeh_enabled())
 		pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
 	else
 		pr_warning("EEH: No capable adapters found\n");
@@ -897,7 +917,7 @@ void eeh_add_device_late(struct pci_dev *dev)
 	struct device_node *dn;
 	struct eeh_dev *edev;
 
-	if (!dev || !eeh_subsystem_enabled)
+	if (!dev || !eeh_enabled())
 		return;
 
 	pr_debug("EEH: Adding device %s\n", pci_name(dev));
@@ -1005,7 +1025,7 @@ void eeh_remove_device(struct pci_dev *dev)
 {
 	struct eeh_dev *edev;
 
-	if (!dev || !eeh_subsystem_enabled)
+	if (!dev || !eeh_enabled())
 		return;
 	edev = pci_dev_to_eeh_dev(dev);
 
@@ -1045,7 +1065,7 @@ void eeh_remove_device(struct pci_dev *dev)
 
 static int proc_eeh_show(struct seq_file *m, void *v)
 {
-	if (0 == eeh_subsystem_enabled) {
+	if (!eeh_enabled()) {
 		seq_printf(m, "EEH Subsystem is globally disabled\n");
 		seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs);
 	} else {
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 7bb30dca4e19..fdc679d309ec 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -362,9 +362,13 @@ static void *eeh_rmv_device(void *data, void *userdata)
 	 */
 	if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE))
 		return NULL;
+
 	driver = eeh_pcid_get(dev);
-	if (driver && driver->err_handler)
-		return NULL;
+	if (driver) {
+		eeh_pcid_put(dev);
+		if (driver->err_handler)
+			return NULL;
+	}
 
 	/* Remove it from PCI subsystem */
 	pr_debug("EEH: Removing %s without EEH sensitive driver\n",
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 9b27b293a922..b0ded97ee4e1 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -74,6 +74,7 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new)
  */
 static int test_24bit_addr(unsigned long ip, unsigned long addr)
 {
+	addr = ppc_function_entry((void *)addr);
 
 	/* use the create_branch to verify that this offset can be branched */
 	return create_branch((unsigned int *)ip, addr, 0);
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index d773dd440a45..88e3ec6e1d96 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1088,6 +1088,14 @@ int iommu_take_ownership(struct iommu_table *tbl)
 	memset(tbl->it_map, 0xff, sz);
 	iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
 
+	/*
+	 * Disable iommu bypass, otherwise the user can DMA to all of
+	 * our physical memory via the bypass window instead of just
+	 * the pages that has been explicitly mapped into the iommu
+	 */
+	if (tbl->set_bypass)
+		tbl->set_bypass(tbl, false);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(iommu_take_ownership);
@@ -1102,6 +1110,10 @@ void iommu_release_ownership(struct iommu_table *tbl)
 	/* Restore bit#0 set by iommu_init_table() */
 	if (tbl->it_offset == 0)
 		set_bit(0, tbl->it_map);
+
+	/* The kernel owns the device now, we can restore the iommu bypass */
+	if (tbl->set_bypass)
+		tbl->set_bypass(tbl, true);
 }
 EXPORT_SYMBOL_GPL(iommu_release_ownership);
 
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 9729b23bfb0a..1d0848bba049 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -559,8 +559,13 @@ void exc_lvl_ctx_init(void)
 #ifdef CONFIG_PPC64
 		cpu_nr = i;
 #else
+#ifdef CONFIG_SMP
 		cpu_nr = get_hard_smp_processor_id(i);
+#else
+		cpu_nr = 0;
 #endif
+#endif
+
 		memset((void *)critirq_ctx[cpu_nr], 0, THREAD_SIZE);
 		tp = critirq_ctx[cpu_nr];
 		tp->cpu = cpu_nr;
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index 75d4f7340da8..015ae55c1868 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -196,7 +196,9 @@ int overlaps_crashkernel(unsigned long start, unsigned long size)
 
 /* Values we need to export to the second kernel via the device tree. */
 static phys_addr_t kernel_end;
+static phys_addr_t crashk_base;
 static phys_addr_t crashk_size;
+static unsigned long long mem_limit;
 
 static struct property kernel_end_prop = {
 	.name = "linux,kernel-end",
@@ -207,7 +209,7 @@ static struct property kernel_end_prop = {
 static struct property crashk_base_prop = {
 	.name = "linux,crashkernel-base",
 	.length = sizeof(phys_addr_t),
-	.value = &crashk_res.start,
+	.value = &crashk_base
 };
 
 static struct property crashk_size_prop = {
@@ -219,9 +221,11 @@ static struct property crashk_size_prop = {
 static struct property memory_limit_prop = {
 	.name = "linux,memory-limit",
 	.length = sizeof(unsigned long long),
-	.value = &memory_limit,
+	.value = &mem_limit,
 };
 
+#define cpu_to_be_ulong	__PASTE(cpu_to_be, BITS_PER_LONG)
+
 static void __init export_crashk_values(struct device_node *node)
 {
 	struct property *prop;
@@ -237,8 +241,9 @@ static void __init export_crashk_values(struct device_node *node)
 		of_remove_property(node, prop);
 
 	if (crashk_res.start != 0) {
+		crashk_base = cpu_to_be_ulong(crashk_res.start),
 		of_add_property(node, &crashk_base_prop);
-		crashk_size = resource_size(&crashk_res);
+		crashk_size = cpu_to_be_ulong(resource_size(&crashk_res));
 		of_add_property(node, &crashk_size_prop);
 	}
 
@@ -246,6 +251,7 @@ static void __init export_crashk_values(struct device_node *node)
 	 * memory_limit is required by the kexec-tools to limit the
 	 * crash regions to the actual memory used.
 	 */
+	mem_limit = cpu_to_be_ulong(memory_limit);
 	of_update_property(node, &memory_limit_prop);
 }
 
@@ -264,7 +270,7 @@ static int __init kexec_setup(void)
 		of_remove_property(node, prop);
 
 	/* information needed by userspace when using default_machine_kexec */
-	kernel_end = __pa(_end);
+	kernel_end = cpu_to_be_ulong(__pa(_end));
 	of_add_property(node, &kernel_end_prop);
 
 	export_crashk_values(node);
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index be4e6d648f60..59d229a2a3e0 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -369,6 +369,7 @@ void default_machine_kexec(struct kimage *image)
 
 /* Values we need to export to the second kernel via the device tree. */
 static unsigned long htab_base;
+static unsigned long htab_size;
 
 static struct property htab_base_prop = {
 	.name = "linux,htab-base",
@@ -379,7 +380,7 @@ static struct property htab_base_prop = {
 static struct property htab_size_prop = {
 	.name = "linux,htab-size",
 	.length = sizeof(unsigned long),
-	.value = &htab_size_bytes,
+	.value = &htab_size,
 };
 
 static int __init export_htab_values(void)
@@ -403,8 +404,9 @@ static int __init export_htab_values(void)
 	if (prop)
 		of_remove_property(node, prop);
 
-	htab_base = __pa(htab_address);
+	htab_base = cpu_to_be64(__pa(htab_address));
 	of_add_property(node, &htab_base_prop);
+	htab_size = cpu_to_be64(htab_size_bytes);
 	of_add_property(node, &htab_size_prop);
 
 	of_node_put(node);
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 879f09620f83..7c6bb4b17b49 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -57,11 +57,14 @@ _GLOBAL(call_do_softirq)
 	mtlr	r0
 	blr
 
+/*
+ * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp);
+ */
 _GLOBAL(call_do_irq)
 	mflr	r0
 	stw	r0,4(r1)
 	lwz	r10,THREAD+KSP_LIMIT(r2)
-	addi	r11,r3,THREAD_INFO_GAP
+	addi	r11,r4,THREAD_INFO_GAP
 	stwu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
 	mr	r1,r4
 	stw	r10,8(r1)
diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S
index b47a0e1ab001..1482327cfeba 100644
--- a/arch/powerpc/kernel/reloc_64.S
+++ b/arch/powerpc/kernel/reloc_64.S
@@ -69,8 +69,8 @@ _GLOBAL(relocate)
 	 * R_PPC64_RELATIVE ones.
 	 */
 	mtctr	r8
-5:	lwz	r0,12(9)	/* ELF64_R_TYPE(reloc->r_info) */
-	cmpwi	r0,R_PPC64_RELATIVE
+5:	ld	r0,8(9)		/* ELF64_R_TYPE(reloc->r_info) */
+	cmpdi	r0,R_PPC64_RELATIVE
 	bne	6f
 	ld	r6,0(r9)	/* reloc->r_offset */
 	ld	r0,16(r9)	/* reloc->r_addend */
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 2b0da27eaee4..04cc4fcca78b 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -247,7 +247,12 @@ static void __init exc_lvl_early_init(void)
 	/* interrupt stacks must be in lowmem, we get that for free on ppc32
 	 * as the memblock is limited to lowmem by MEMBLOCK_REAL_LIMIT */
 	for_each_possible_cpu(i) {
+#ifdef CONFIG_SMP
 		hw_cpu = get_hard_smp_processor_id(i);
+#else
+		hw_cpu = 0;
+#endif
+
 		critirq_ctx[hw_cpu] = (struct thread_info *)
 			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
 #ifdef CONFIG_BOOKE
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index e35bf773df7a..8d253c29649b 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -65,8 +65,8 @@ struct rt_sigframe {
 	struct siginfo __user *pinfo;
 	void __user *puc;
 	struct siginfo info;
-	/* 64 bit ABI allows for 288 bytes below sp before decrementing it. */
-	char abigap[288];
+	/* New 64 bit little-endian ABI allows redzone of 512 bytes below sp */
+	char abigap[USER_REDZONE_SIZE];
 } __attribute__ ((aligned (16)));
 
 static const char fmt32[] = KERN_INFO \
diff --git a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
index 79683d0393f5..6ac107ac402a 100644
--- a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
+++ b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
@@ -6,7 +6,7 @@
 	.globl vdso32_start, vdso32_end
 	.balign PAGE_SIZE
 vdso32_start:
-	.incbin "arch/powerpc/kernel/vdso32/vdso32.so"
+	.incbin "arch/powerpc/kernel/vdso32/vdso32.so.dbg"
 	.balign PAGE_SIZE
 vdso32_end:
 
diff --git a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
index 8df9e2463007..df60fca6a13d 100644
--- a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
+++ b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
@@ -6,7 +6,7 @@
 	.globl vdso64_start, vdso64_end
 	.balign PAGE_SIZE
 vdso64_start:
-	.incbin "arch/powerpc/kernel/vdso64/vdso64.so"
+	.incbin "arch/powerpc/kernel/vdso64/vdso64.so.dbg"
 	.balign PAGE_SIZE
 vdso64_end:
 
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index de6881259aef..d766d6ee33fe 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -207,6 +207,20 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 		if (overlaps_kernel_text(vaddr, vaddr + step))
 			tprot &= ~HPTE_R_N;
 
+		/*
+		 * If relocatable, check if it overlaps interrupt vectors that
+		 * are copied down to real 0. For relocatable kernel
+		 * (e.g. kdump case) we copy interrupt vectors down to real
+		 * address 0. Mark that region as executable. This is
+		 * because on p8 system with relocation on exception feature
+		 * enabled, exceptions are raised with MMU (IR=DR=1) ON. Hence
+		 * in order to execute the interrupt handlers in virtual
+		 * mode the vector region need to be marked as executable.
+		 */
+		if ((PHYSICAL_START > MEMORY_START) &&
+			overlaps_interrupt_vector_text(vaddr, vaddr + step))
+				tprot &= ~HPTE_R_N;
+
 		hash = hpt_hash(vpn, shift, ssize);
 		hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
 
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 65b7b65e8708..62bf5e8e78da 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -510,7 +510,8 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
 }
 
 unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
-				  pmd_t *pmdp, unsigned long clr)
+				  pmd_t *pmdp, unsigned long clr,
+				  unsigned long set)
 {
 
 	unsigned long old, tmp;
@@ -526,14 +527,15 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
 		andi.	%1,%0,%6\n\
 		bne-	1b \n\
 		andc	%1,%0,%4 \n\
+		or	%1,%1,%7\n\
 		stdcx.	%1,0,%3 \n\
 		bne-	1b"
 	: "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
-	: "r" (pmdp), "r" (clr), "m" (*pmdp), "i" (_PAGE_BUSY)
+	: "r" (pmdp), "r" (clr), "m" (*pmdp), "i" (_PAGE_BUSY), "r" (set)
 	: "cc" );
 #else
 	old = pmd_val(*pmdp);
-	*pmdp = __pmd(old & ~clr);
+	*pmdp = __pmd((old & ~clr) | set);
 #endif
 	if (old & _PAGE_HASHPTE)
 		hpte_do_hugepage_flush(mm, addr, pmdp);
@@ -708,7 +710,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 		     pmd_t *pmdp)
 {
-	pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT);
+	pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
 }
 
 /*
@@ -835,7 +837,7 @@ pmd_t pmdp_get_and_clear(struct mm_struct *mm,
 	unsigned long old;
 	pgtable_t *pgtable_slot;
 
-	old = pmd_hugepage_update(mm, addr, pmdp, ~0UL);
+	old = pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
 	old_pmd = __pmd(old);
 	/*
 	 * We have pmd == none and we are holding page_table_lock.
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
index a770df2dae70..6c0b1f5f8d2c 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/subpage-prot.c
@@ -78,7 +78,7 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
 	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 	arch_enter_lazy_mmu_mode();
 	for (; npages > 0; --npages) {
-		pte_update(mm, addr, pte, 0, 0);
+		pte_update(mm, addr, pte, 0, 0, 0);
 		addr += PAGE_SIZE;
 		++pte;
 	}
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 29b89e863d7c..67cf22083f4c 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1147,6 +1147,9 @@ static void power_pmu_enable(struct pmu *pmu)
 	mmcr0 = ebb_switch_in(ebb, cpuhw->mmcr[0]);
 
 	mb();
+	if (cpuhw->bhrb_users)
+		ppmu->config_bhrb(cpuhw->bhrb_filter);
+
 	write_mmcr0(cpuhw, mmcr0);
 
 	/*
@@ -1158,8 +1161,6 @@ static void power_pmu_enable(struct pmu *pmu)
 	}
 
  out:
-	if (cpuhw->bhrb_users)
-		ppmu->config_bhrb(cpuhw->bhrb_filter);
 
 	local_irq_restore(flags);
 }
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index a3f7abd2f13f..96cee20dcd34 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -25,6 +25,37 @@
 #define PM_BRU_FIN			0x10068
 #define PM_BR_MPRED_CMPL		0x400f6
 
+/* All L1 D cache load references counted at finish, gated by reject */
+#define PM_LD_REF_L1			0x100ee
+/* Load Missed L1 */
+#define PM_LD_MISS_L1			0x3e054
+/* Store Missed L1 */
+#define PM_ST_MISS_L1			0x300f0
+/* L1 cache data prefetches */
+#define PM_L1_PREF			0x0d8b8
+/* Instruction fetches from L1 */
+#define PM_INST_FROM_L1			0x04080
+/* Demand iCache Miss */
+#define PM_L1_ICACHE_MISS		0x200fd
+/* Instruction Demand sectors wriittent into IL1 */
+#define PM_L1_DEMAND_WRITE		0x0408c
+/* Instruction prefetch written into IL1 */
+#define PM_IC_PREF_WRITE		0x0408e
+/* The data cache was reloaded from local core's L3 due to a demand load */
+#define PM_DATA_FROM_L3			0x4c042
+/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
+#define PM_DATA_FROM_L3MISS		0x300fe
+/* All successful D-side store dispatches for this thread */
+#define PM_L2_ST			0x17080
+/* All successful D-side store dispatches for this thread that were L2 Miss */
+#define PM_L2_ST_MISS			0x17082
+/* Total HW L3 prefetches(Load+store) */
+#define PM_L3_PREF_ALL			0x4e052
+/* Data PTEG reload */
+#define PM_DTLB_MISS			0x300fc
+/* ITLB Reloaded */
+#define PM_ITLB_MISS			0x400fc
+
 
 /*
  * Raw event encoding for POWER8:
@@ -557,6 +588,8 @@ static int power8_generic_events[] = {
 	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_INST_CMPL,
 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BRU_FIN,
 	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_BR_MPRED_CMPL,
+	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1,
+	[PERF_COUNT_HW_CACHE_MISSES] =			PM_LD_MISS_L1,
 };
 
 static u64 power8_bhrb_filter_map(u64 branch_sample_type)
@@ -596,6 +629,116 @@ static void power8_config_bhrb(u64 pmu_bhrb_filter)
 	mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter));
 }
 
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static int power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[ C(L1D) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = PM_LD_REF_L1,
+			[ C(RESULT_MISS)   ] = PM_LD_MISS_L1,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = PM_ST_MISS_L1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = PM_L1_PREF,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+	},
+	[ C(L1I) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = PM_INST_FROM_L1,
+			[ C(RESULT_MISS)   ] = PM_L1_ICACHE_MISS,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = PM_L1_DEMAND_WRITE,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = PM_IC_PREF_WRITE,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+	},
+	[ C(LL) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = PM_DATA_FROM_L3,
+			[ C(RESULT_MISS)   ] = PM_DATA_FROM_L3MISS,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = PM_L2_ST,
+			[ C(RESULT_MISS)   ] = PM_L2_ST_MISS,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = PM_L3_PREF_ALL,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+	},
+	[ C(DTLB) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = PM_DTLB_MISS,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+	},
+	[ C(ITLB) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = PM_ITLB_MISS,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+	},
+	[ C(BPU) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = PM_BRU_FIN,
+			[ C(RESULT_MISS)   ] = PM_BR_MPRED_CMPL,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+	},
+	[ C(NODE) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+	},
+};
+
+#undef C
+
 static struct power_pmu power8_pmu = {
 	.name			= "POWER8",
 	.n_counter		= 6,
@@ -611,6 +754,7 @@ static struct power_pmu power8_pmu = {
 	.flags			= PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_BHRB | PPMU_EBB,
 	.n_generic		= ARRAY_SIZE(power8_generic_events),
 	.generic_events		= power8_generic_events,
+	.cache_events		= &power8_cache_events,
 	.attr_groups		= power8_pmu_attr_groups,
 	.bhrb_nr		= 32,
 };
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index e1e71618b70c..253fefe3d1a0 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -44,7 +44,8 @@ static int ioda_eeh_event(struct notifier_block *nb,
 
 	/* We simply send special EEH event */
 	if ((changed_evts & OPAL_EVENT_PCI_ERROR) &&
-	    (events & OPAL_EVENT_PCI_ERROR))
+	    (events & OPAL_EVENT_PCI_ERROR) &&
+	    eeh_enabled())
 		eeh_send_failure_event(NULL);
 
 	return 0;
@@ -113,6 +114,7 @@ DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbB_dbgfs_ops, ioda_eeh_inbB_dbgfs_get,
 			ioda_eeh_inbB_dbgfs_set, "0x%llx\n");
 #endif /* CONFIG_DEBUG_FS */
 
+
 /**
  * ioda_eeh_post_init - Chip dependent post initialization
  * @hose: PCI controller
@@ -220,6 +222,22 @@ static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
 	return ret;
 }
 
+static void ioda_eeh_phb_diag(struct pci_controller *hose)
+{
+	struct pnv_phb *phb = hose->private_data;
+	long rc;
+
+	rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
+					 PNV_PCI_DIAG_BUF_SIZE);
+	if (rc != OPAL_SUCCESS) {
+		pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n",
+			    __func__, hose->global_number, rc);
+		return;
+	}
+
+	pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
+}
+
 /**
  * ioda_eeh_get_state - Retrieve the state of PE
  * @pe: EEH PE
@@ -271,6 +289,9 @@ static int ioda_eeh_get_state(struct eeh_pe *pe)
 			result |= EEH_STATE_DMA_ACTIVE;
 			result |= EEH_STATE_MMIO_ENABLED;
 			result |= EEH_STATE_DMA_ENABLED;
+		} else if (!(pe->state & EEH_PE_ISOLATED)) {
+			eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+			ioda_eeh_phb_diag(hose);
 		}
 
 		return result;
@@ -314,6 +335,15 @@ static int ioda_eeh_get_state(struct eeh_pe *pe)
 			   __func__, fstate, hose->global_number, pe_no);
 	}
 
+	/* Dump PHB diag-data for frozen PE */
+	if (result != EEH_STATE_NOT_SUPPORT &&
+	    (result & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) !=
+	    (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE) &&
+	    !(pe->state & EEH_PE_ISOLATED)) {
+		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+		ioda_eeh_phb_diag(hose);
+	}
+
 	return result;
 }
 
@@ -489,8 +519,7 @@ static int ioda_eeh_bridge_reset(struct pci_controller *hose,
 static int ioda_eeh_reset(struct eeh_pe *pe, int option)
 {
 	struct pci_controller *hose = pe->phb;
-	struct eeh_dev *edev;
-	struct pci_dev *dev;
+	struct pci_bus *bus;
 	int ret;
 
 	/*
@@ -519,73 +548,17 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option)
 	if (pe->type & EEH_PE_PHB) {
 		ret = ioda_eeh_phb_reset(hose, option);
 	} else {
-		if (pe->type & EEH_PE_DEVICE) {
-			/*
-			 * If it's device PE, we didn't refer to the parent
-			 * PCI bus yet. So we have to figure it out indirectly.
-			 */
-			edev = list_first_entry(&pe->edevs,
-					struct eeh_dev, list);
-			dev = eeh_dev_to_pci_dev(edev);
-			dev = dev->bus->self;
-		} else {
-			/*
-			 * If it's bus PE, the parent PCI bus is already there
-			 * and just pick it up.
-			 */
-			dev = pe->bus->self;
-		}
-
-		/*
-		 * Do reset based on the fact that the direct upstream bridge
-		 * is root bridge (port) or not.
-		 */
-		if (dev->bus->number == 0)
+		bus = eeh_pe_bus_get(pe);
+		if (pci_is_root_bus(bus))
 			ret = ioda_eeh_root_reset(hose, option);
 		else
-			ret = ioda_eeh_bridge_reset(hose, dev, option);
+			ret = ioda_eeh_bridge_reset(hose, bus->self, option);
 	}
 
 	return ret;
 }
 
 /**
- * ioda_eeh_get_log - Retrieve error log
- * @pe: EEH PE
- * @severity: Severity level of the log
- * @drv_log: buffer to store the log
- * @len: space of the log buffer
- *
- * The function is used to retrieve error log from P7IOC.
- */
-static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
-			    char *drv_log, unsigned long len)
-{
-	s64 ret;
-	unsigned long flags;
-	struct pci_controller *hose = pe->phb;
-	struct pnv_phb *phb = hose->private_data;
-
-	spin_lock_irqsave(&phb->lock, flags);
-
-	ret = opal_pci_get_phb_diag_data2(phb->opal_id,
-			phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE);
-	if (ret) {
-		spin_unlock_irqrestore(&phb->lock, flags);
-		pr_warning("%s: Can't get log for PHB#%x-PE#%x (%lld)\n",
-			   __func__, hose->global_number, pe->addr, ret);
-		return -EIO;
-	}
-
-	/* The PHB diag-data is always indicative */
-	pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
-
-	spin_unlock_irqrestore(&phb->lock, flags);
-
-	return 0;
-}
-
-/**
  * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE
  * @pe: EEH PE
  *
@@ -666,22 +639,6 @@ static void ioda_eeh_hub_diag(struct pci_controller *hose)
 	}
 }
 
-static void ioda_eeh_phb_diag(struct pci_controller *hose)
-{
-	struct pnv_phb *phb = hose->private_data;
-	long rc;
-
-	rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
-					 PNV_PCI_DIAG_BUF_SIZE);
-	if (rc != OPAL_SUCCESS) {
-		pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n",
-			    __func__, hose->global_number, rc);
-		return;
-	}
-
-	pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
-}
-
 static int ioda_eeh_get_phb_pe(struct pci_controller *hose,
 			       struct eeh_pe **pe)
 {
@@ -855,6 +812,20 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
 		}
 
 		/*
+		 * EEH core will try recover from fenced PHB or
+		 * frozen PE. In the time for frozen PE, EEH core
+		 * enable IO path for that before collecting logs,
+		 * but it ruins the site. So we have to dump the
+		 * log in advance here.
+		 */
+		if ((ret == EEH_NEXT_ERR_FROZEN_PE  ||
+		    ret == EEH_NEXT_ERR_FENCED_PHB) &&
+		    !((*pe)->state & EEH_PE_ISOLATED)) {
+			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
+			ioda_eeh_phb_diag(hose);
+		}
+
+		/*
 		 * If we have no errors on the specific PHB or only
 		 * informative error there, we continue poking it.
 		 * Otherwise, we need actions to be taken by upper
@@ -872,7 +843,6 @@ struct pnv_eeh_ops ioda_eeh_ops = {
 	.set_option		= ioda_eeh_set_option,
 	.get_state		= ioda_eeh_get_state,
 	.reset			= ioda_eeh_reset,
-	.get_log		= ioda_eeh_get_log,
 	.configure_bridge	= ioda_eeh_configure_bridge,
 	.next_error		= ioda_eeh_next_error
 };
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index a79fddc5e74e..a59788e83b8b 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -145,7 +145,7 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
 	 * Enable EEH explicitly so that we will do EEH check
 	 * while accessing I/O stuff
 	 */
-	eeh_subsystem_enabled = 1;
+	eeh_set_enable(true);
 
 	/* Save memory bars */
 	eeh_save_bars(edev);
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
index 4fbf276ac99e..4cd2ea6c0dbe 100644
--- a/arch/powerpc/platforms/powernv/opal-xscom.c
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -71,11 +71,11 @@ static int opal_xscom_err_xlate(int64_t rc)
 	}
 }
 
-static u64 opal_scom_unmangle(u64 reg)
+static u64 opal_scom_unmangle(u64 addr)
 {
 	/*
 	 * XSCOM indirect addresses have the top bit set. Additionally
-	 * the reset of the top 3 nibbles is always 0.
+	 * the rest of the top 3 nibbles is always 0.
 	 *
 	 * Because the debugfs interface uses signed offsets and shifts
 	 * the address left by 3, we basically cannot use the top 4 bits
@@ -86,10 +86,13 @@ static u64 opal_scom_unmangle(u64 reg)
 	 * conversion here. To leave room for further xscom address
 	 * expansion, we only clear out the top byte
 	 *
+	 * For in-kernel use, we also support the real indirect bit, so
+	 * we test for any of the top 5 bits
+	 *
 	 */
-	if (reg & (1ull << 59))
-		reg = (reg & ~(0xffull << 56)) | (1ull << 63);
-	return reg;
+	if (addr & (0x1full << 59))
+		addr = (addr & ~(0xffull << 56)) | (1ull << 63);
+	return addr;
 }
 
 static int opal_scom_read(scom_map_t map, u64 reg, u64 *value)
@@ -98,8 +101,8 @@ static int opal_scom_read(scom_map_t map, u64 reg, u64 *value)
 	int64_t rc;
 	__be64 v;
 
-	reg = opal_scom_unmangle(reg);
-	rc = opal_xscom_read(m->chip, m->addr + reg, (__be64 *)__pa(&v));
+	reg = opal_scom_unmangle(m->addr + reg);
+	rc = opal_xscom_read(m->chip, reg, (__be64 *)__pa(&v));
 	*value = be64_to_cpu(v);
 	return opal_xscom_err_xlate(rc);
 }
@@ -109,8 +112,8 @@ static int opal_scom_write(scom_map_t map, u64 reg, u64 value)
 	struct opal_scom_map *m = map;
 	int64_t rc;
 
-	reg = opal_scom_unmangle(reg);
-	rc = opal_xscom_write(m->chip, m->addr + reg, value);
+	reg = opal_scom_unmangle(m->addr + reg);
+	rc = opal_xscom_write(m->chip, reg, value);
 	return opal_xscom_err_xlate(rc);
 }
 
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 7d6dcc6d5fa9..3b2b4fb3585b 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -21,6 +21,7 @@
 #include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/msi.h>
+#include <linux/memblock.h>
 
 #include <asm/sections.h>
 #include <asm/io.h>
@@ -460,9 +461,39 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
 		return;
 
 	pe = &phb->ioda.pe_array[pdn->pe_number];
+	WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
 	set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table);
 }
 
+static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
+				     struct pci_dev *pdev, u64 dma_mask)
+{
+	struct pci_dn *pdn = pci_get_pdn(pdev);
+	struct pnv_ioda_pe *pe;
+	uint64_t top;
+	bool bypass = false;
+
+	if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
+		return -ENODEV;;
+
+	pe = &phb->ioda.pe_array[pdn->pe_number];
+	if (pe->tce_bypass_enabled) {
+		top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1;
+		bypass = (dma_mask >= top);
+	}
+
+	if (bypass) {
+		dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n");
+		set_dma_ops(&pdev->dev, &dma_direct_ops);
+		set_dma_offset(&pdev->dev, pe->tce_bypass_base);
+	} else {
+		dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
+		set_dma_ops(&pdev->dev, &dma_iommu_ops);
+		set_iommu_table_base(&pdev->dev, &pe->tce32_table);
+	}
+	return 0;
+}
+
 static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
 {
 	struct pci_dev *dev;
@@ -657,6 +688,56 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 		__free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
 }
 
+static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
+{
+	struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
+					      tce32_table);
+	uint16_t window_id = (pe->pe_number << 1 ) + 1;
+	int64_t rc;
+
+	pe_info(pe, "%sabling 64-bit DMA bypass\n", enable ? "En" : "Dis");
+	if (enable) {
+		phys_addr_t top = memblock_end_of_DRAM();
+
+		top = roundup_pow_of_two(top);
+		rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
+						     pe->pe_number,
+						     window_id,
+						     pe->tce_bypass_base,
+						     top);
+	} else {
+		rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
+						     pe->pe_number,
+						     window_id,
+						     pe->tce_bypass_base,
+						     0);
+
+		/*
+		 * We might want to reset the DMA ops of all devices on
+		 * this PE. However in theory, that shouldn't be necessary
+		 * as this is used for VFIO/KVM pass-through and the device
+		 * hasn't yet been returned to its kernel driver
+		 */
+	}
+	if (rc)
+		pe_err(pe, "OPAL error %lld configuring bypass window\n", rc);
+	else
+		pe->tce_bypass_enabled = enable;
+}
+
+static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb,
+					  struct pnv_ioda_pe *pe)
+{
+	/* TVE #1 is selected by PCI address bit 59 */
+	pe->tce_bypass_base = 1ull << 59;
+
+	/* Install set_bypass callback for VFIO */
+	pe->tce32_table.set_bypass = pnv_pci_ioda2_set_bypass;
+
+	/* Enable bypass by default */
+	pnv_pci_ioda2_set_bypass(&pe->tce32_table, true);
+}
+
 static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 				       struct pnv_ioda_pe *pe)
 {
@@ -727,6 +808,8 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 	else
 		pnv_ioda_setup_bus_dma(pe, pe->pbus);
 
+	/* Also create a bypass window */
+	pnv_pci_ioda2_setup_bypass_pe(phb, pe);
 	return;
 fail:
 	if (pe->tce32_seg >= 0)
@@ -1286,6 +1369,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
 
 	/* Setup TCEs */
 	phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
+	phb->dma_set_mask = pnv_pci_ioda_dma_set_mask;
 
 	/* Setup shutdown function for kexec */
 	phb->shutdown = pnv_pci_ioda_shutdown;
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index b555ebc57ef5..8518817dcdfd 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -134,57 +134,72 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
 	pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n\n",
 		hose->global_number, common->version);
 
-	pr_info("  brdgCtl:              %08x\n", data->brdgCtl);
-
-	pr_info("  portStatusReg:        %08x\n", data->portStatusReg);
-	pr_info("  rootCmplxStatus:      %08x\n", data->rootCmplxStatus);
-	pr_info("  busAgentStatus:       %08x\n", data->busAgentStatus);
-
-	pr_info("  deviceStatus:         %08x\n", data->deviceStatus);
-	pr_info("  slotStatus:           %08x\n", data->slotStatus);
-	pr_info("  linkStatus:           %08x\n", data->linkStatus);
-	pr_info("  devCmdStatus:         %08x\n", data->devCmdStatus);
-	pr_info("  devSecStatus:         %08x\n", data->devSecStatus);
-
-	pr_info("  rootErrorStatus:      %08x\n", data->rootErrorStatus);
-	pr_info("  uncorrErrorStatus:    %08x\n", data->uncorrErrorStatus);
-	pr_info("  corrErrorStatus:      %08x\n", data->corrErrorStatus);
-	pr_info("  tlpHdr1:              %08x\n", data->tlpHdr1);
-	pr_info("  tlpHdr2:              %08x\n", data->tlpHdr2);
-	pr_info("  tlpHdr3:              %08x\n", data->tlpHdr3);
-	pr_info("  tlpHdr4:              %08x\n", data->tlpHdr4);
-	pr_info("  sourceId:             %08x\n", data->sourceId);
-	pr_info("  errorClass:           %016llx\n", data->errorClass);
-	pr_info("  correlator:           %016llx\n", data->correlator);
-	pr_info("  p7iocPlssr:           %016llx\n", data->p7iocPlssr);
-	pr_info("  p7iocCsr:             %016llx\n", data->p7iocCsr);
-	pr_info("  lemFir:               %016llx\n", data->lemFir);
-	pr_info("  lemErrorMask:         %016llx\n", data->lemErrorMask);
-	pr_info("  lemWOF:               %016llx\n", data->lemWOF);
-	pr_info("  phbErrorStatus:       %016llx\n", data->phbErrorStatus);
-	pr_info("  phbFirstErrorStatus:  %016llx\n", data->phbFirstErrorStatus);
-	pr_info("  phbErrorLog0:         %016llx\n", data->phbErrorLog0);
-	pr_info("  phbErrorLog1:         %016llx\n", data->phbErrorLog1);
-	pr_info("  mmioErrorStatus:      %016llx\n", data->mmioErrorStatus);
-	pr_info("  mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
-	pr_info("  mmioErrorLog0:        %016llx\n", data->mmioErrorLog0);
-	pr_info("  mmioErrorLog1:        %016llx\n", data->mmioErrorLog1);
-	pr_info("  dma0ErrorStatus:      %016llx\n", data->dma0ErrorStatus);
-	pr_info("  dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
-	pr_info("  dma0ErrorLog0:        %016llx\n", data->dma0ErrorLog0);
-	pr_info("  dma0ErrorLog1:        %016llx\n", data->dma0ErrorLog1);
-	pr_info("  dma1ErrorStatus:      %016llx\n", data->dma1ErrorStatus);
-	pr_info("  dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
-	pr_info("  dma1ErrorLog0:        %016llx\n", data->dma1ErrorLog0);
-	pr_info("  dma1ErrorLog1:        %016llx\n", data->dma1ErrorLog1);
+	if (data->brdgCtl)
+		pr_info("  brdgCtl:     %08x\n",
+			data->brdgCtl);
+	if (data->portStatusReg || data->rootCmplxStatus ||
+	    data->busAgentStatus)
+		pr_info("  UtlSts:      %08x %08x %08x\n",
+			data->portStatusReg, data->rootCmplxStatus,
+			data->busAgentStatus);
+	if (data->deviceStatus || data->slotStatus   ||
+	    data->linkStatus   || data->devCmdStatus ||
+	    data->devSecStatus)
+		pr_info("  RootSts:     %08x %08x %08x %08x %08x\n",
+			data->deviceStatus, data->slotStatus,
+			data->linkStatus, data->devCmdStatus,
+			data->devSecStatus);
+	if (data->rootErrorStatus   || data->uncorrErrorStatus ||
+	    data->corrErrorStatus)
+		pr_info("  RootErrSts:  %08x %08x %08x\n",
+			data->rootErrorStatus, data->uncorrErrorStatus,
+			data->corrErrorStatus);
+	if (data->tlpHdr1 || data->tlpHdr2 ||
+	    data->tlpHdr3 || data->tlpHdr4)
+		pr_info("  RootErrLog:  %08x %08x %08x %08x\n",
+			data->tlpHdr1, data->tlpHdr2,
+			data->tlpHdr3, data->tlpHdr4);
+	if (data->sourceId || data->errorClass ||
+	    data->correlator)
+		pr_info("  RootErrLog1: %08x %016llx %016llx\n",
+			data->sourceId, data->errorClass,
+			data->correlator);
+	if (data->p7iocPlssr || data->p7iocCsr)
+		pr_info("  PhbSts:      %016llx %016llx\n",
+			data->p7iocPlssr, data->p7iocCsr);
+	if (data->lemFir || data->lemErrorMask ||
+	    data->lemWOF)
+		pr_info("  Lem:         %016llx %016llx %016llx\n",
+			data->lemFir, data->lemErrorMask,
+			data->lemWOF);
+	if (data->phbErrorStatus || data->phbFirstErrorStatus ||
+	    data->phbErrorLog0   || data->phbErrorLog1)
+		pr_info("  PhbErr:      %016llx %016llx %016llx %016llx\n",
+			data->phbErrorStatus, data->phbFirstErrorStatus,
+			data->phbErrorLog0, data->phbErrorLog1);
+	if (data->mmioErrorStatus || data->mmioFirstErrorStatus ||
+	    data->mmioErrorLog0   || data->mmioErrorLog1)
+		pr_info("  OutErr:      %016llx %016llx %016llx %016llx\n",
+			data->mmioErrorStatus, data->mmioFirstErrorStatus,
+			data->mmioErrorLog0, data->mmioErrorLog1);
+	if (data->dma0ErrorStatus || data->dma0FirstErrorStatus ||
+	    data->dma0ErrorLog0   || data->dma0ErrorLog1)
+		pr_info("  InAErr:      %016llx %016llx %016llx %016llx\n",
+			data->dma0ErrorStatus, data->dma0FirstErrorStatus,
+			data->dma0ErrorLog0, data->dma0ErrorLog1);
+	if (data->dma1ErrorStatus || data->dma1FirstErrorStatus ||
+	    data->dma1ErrorLog0   || data->dma1ErrorLog1)
+		pr_info("  InBErr:      %016llx %016llx %016llx %016llx\n",
+			data->dma1ErrorStatus, data->dma1FirstErrorStatus,
+			data->dma1ErrorLog0, data->dma1ErrorLog1);
 
 	for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
 		if ((data->pestA[i] >> 63) == 0 &&
 		    (data->pestB[i] >> 63) == 0)
 			continue;
 
-		pr_info("  PE[%3d] PESTA:        %016llx\n", i, data->pestA[i]);
-		pr_info("          PESTB:        %016llx\n", data->pestB[i]);
+		pr_info("  PE[%3d] A/B: %016llx %016llx\n",
+			i, data->pestA[i], data->pestB[i]);
 	}
 }
 
@@ -197,62 +212,77 @@ static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose,
 	data = (struct OpalIoPhb3ErrorData*)common;
 	pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n\n",
 		hose->global_number, common->version);
-
-	pr_info("  brdgCtl:              %08x\n", data->brdgCtl);
-
-	pr_info("  portStatusReg:        %08x\n", data->portStatusReg);
-	pr_info("  rootCmplxStatus:      %08x\n", data->rootCmplxStatus);
-	pr_info("  busAgentStatus:       %08x\n", data->busAgentStatus);
-
-	pr_info("  deviceStatus:         %08x\n", data->deviceStatus);
-	pr_info("  slotStatus:           %08x\n", data->slotStatus);
-	pr_info("  linkStatus:           %08x\n", data->linkStatus);
-	pr_info("  devCmdStatus:         %08x\n", data->devCmdStatus);
-	pr_info("  devSecStatus:         %08x\n", data->devSecStatus);
-
-	pr_info("  rootErrorStatus:      %08x\n", data->rootErrorStatus);
-	pr_info("  uncorrErrorStatus:    %08x\n", data->uncorrErrorStatus);
-	pr_info("  corrErrorStatus:      %08x\n", data->corrErrorStatus);
-	pr_info("  tlpHdr1:              %08x\n", data->tlpHdr1);
-	pr_info("  tlpHdr2:              %08x\n", data->tlpHdr2);
-	pr_info("  tlpHdr3:              %08x\n", data->tlpHdr3);
-	pr_info("  tlpHdr4:              %08x\n", data->tlpHdr4);
-	pr_info("  sourceId:             %08x\n", data->sourceId);
-	pr_info("  errorClass:           %016llx\n", data->errorClass);
-	pr_info("  correlator:           %016llx\n", data->correlator);
-
-	pr_info("  nFir:                 %016llx\n", data->nFir);
-	pr_info("  nFirMask:             %016llx\n", data->nFirMask);
-	pr_info("  nFirWOF:              %016llx\n", data->nFirWOF);
-	pr_info("  PhbPlssr:             %016llx\n", data->phbPlssr);
-	pr_info("  PhbCsr:               %016llx\n", data->phbCsr);
-	pr_info("  lemFir:               %016llx\n", data->lemFir);
-	pr_info("  lemErrorMask:         %016llx\n", data->lemErrorMask);
-	pr_info("  lemWOF:               %016llx\n", data->lemWOF);
-	pr_info("  phbErrorStatus:       %016llx\n", data->phbErrorStatus);
-	pr_info("  phbFirstErrorStatus:  %016llx\n", data->phbFirstErrorStatus);
-	pr_info("  phbErrorLog0:         %016llx\n", data->phbErrorLog0);
-	pr_info("  phbErrorLog1:         %016llx\n", data->phbErrorLog1);
-	pr_info("  mmioErrorStatus:      %016llx\n", data->mmioErrorStatus);
-	pr_info("  mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
-	pr_info("  mmioErrorLog0:        %016llx\n", data->mmioErrorLog0);
-	pr_info("  mmioErrorLog1:        %016llx\n", data->mmioErrorLog1);
-	pr_info("  dma0ErrorStatus:      %016llx\n", data->dma0ErrorStatus);
-	pr_info("  dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
-	pr_info("  dma0ErrorLog0:        %016llx\n", data->dma0ErrorLog0);
-	pr_info("  dma0ErrorLog1:        %016llx\n", data->dma0ErrorLog1);
-	pr_info("  dma1ErrorStatus:      %016llx\n", data->dma1ErrorStatus);
-	pr_info("  dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
-	pr_info("  dma1ErrorLog0:        %016llx\n", data->dma1ErrorLog0);
-	pr_info("  dma1ErrorLog1:        %016llx\n", data->dma1ErrorLog1);
+	if (data->brdgCtl)
+		pr_info("  brdgCtl:     %08x\n",
+			data->brdgCtl);
+	if (data->portStatusReg || data->rootCmplxStatus ||
+	    data->busAgentStatus)
+		pr_info("  UtlSts:      %08x %08x %08x\n",
+			data->portStatusReg, data->rootCmplxStatus,
+			data->busAgentStatus);
+	if (data->deviceStatus || data->slotStatus   ||
+	    data->linkStatus   || data->devCmdStatus ||
+	    data->devSecStatus)
+		pr_info("  RootSts:     %08x %08x %08x %08x %08x\n",
+			data->deviceStatus, data->slotStatus,
+			data->linkStatus, data->devCmdStatus,
+			data->devSecStatus);
+	if (data->rootErrorStatus || data->uncorrErrorStatus ||
+	    data->corrErrorStatus)
+		pr_info("  RootErrSts:  %08x %08x %08x\n",
+			data->rootErrorStatus, data->uncorrErrorStatus,
+			data->corrErrorStatus);
+	if (data->tlpHdr1 || data->tlpHdr2 ||
+	    data->tlpHdr3 || data->tlpHdr4)
+		pr_info("  RootErrLog:  %08x %08x %08x %08x\n",
+			data->tlpHdr1, data->tlpHdr2,
+			data->tlpHdr3, data->tlpHdr4);
+	if (data->sourceId || data->errorClass ||
+	    data->correlator)
+		pr_info("  RootErrLog1: %08x %016llx %016llx\n",
+			data->sourceId, data->errorClass,
+			data->correlator);
+	if (data->nFir || data->nFirMask ||
+	    data->nFirWOF)
+		pr_info("  nFir:        %016llx %016llx %016llx\n",
+			data->nFir, data->nFirMask,
+			data->nFirWOF);
+	if (data->phbPlssr || data->phbCsr)
+		pr_info("  PhbSts:      %016llx %016llx\n",
+			data->phbPlssr, data->phbCsr);
+	if (data->lemFir || data->lemErrorMask ||
+	    data->lemWOF)
+		pr_info("  Lem:         %016llx %016llx %016llx\n",
+			data->lemFir, data->lemErrorMask,
+			data->lemWOF);
+	if (data->phbErrorStatus || data->phbFirstErrorStatus ||
+	    data->phbErrorLog0   || data->phbErrorLog1)
+		pr_info("  PhbErr:      %016llx %016llx %016llx %016llx\n",
+			data->phbErrorStatus, data->phbFirstErrorStatus,
+			data->phbErrorLog0, data->phbErrorLog1);
+	if (data->mmioErrorStatus || data->mmioFirstErrorStatus ||
+	    data->mmioErrorLog0   || data->mmioErrorLog1)
+		pr_info("  OutErr:      %016llx %016llx %016llx %016llx\n",
+			data->mmioErrorStatus, data->mmioFirstErrorStatus,
+			data->mmioErrorLog0, data->mmioErrorLog1);
+	if (data->dma0ErrorStatus || data->dma0FirstErrorStatus ||
+	    data->dma0ErrorLog0   || data->dma0ErrorLog1)
+		pr_info("  InAErr:      %016llx %016llx %016llx %016llx\n",
+			data->dma0ErrorStatus, data->dma0FirstErrorStatus,
+			data->dma0ErrorLog0, data->dma0ErrorLog1);
+	if (data->dma1ErrorStatus || data->dma1FirstErrorStatus ||
+	    data->dma1ErrorLog0   || data->dma1ErrorLog1)
+		pr_info("  InBErr:      %016llx %016llx %016llx %016llx\n",
+			data->dma1ErrorStatus, data->dma1FirstErrorStatus,
+			data->dma1ErrorLog0, data->dma1ErrorLog1);
 
 	for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) {
 		if ((data->pestA[i] >> 63) == 0 &&
 		    (data->pestB[i] >> 63) == 0)
 			continue;
 
-		pr_info("  PE[%3d] PESTA:        %016llx\n", i, data->pestA[i]);
-		pr_info("          PESTB:        %016llx\n", data->pestB[i]);
+		pr_info("  PE[%3d] A/B: %016llx %016llx\n",
+			i, data->pestA[i], data->pestB[i]);
 	}
 }
 
@@ -634,6 +664,16 @@ static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
 		pnv_pci_dma_fallback_setup(hose, pdev);
 }
 
+int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
+{
+	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+	struct pnv_phb *phb = hose->private_data;
+
+	if (phb && phb->dma_set_mask)
+		return phb->dma_set_mask(phb, pdev, dma_mask);
+	return __dma_set_mask(&pdev->dev, dma_mask);
+}
+
 void pnv_pci_shutdown(void)
 {
 	struct pci_controller *hose;
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 13f1942a9a5f..cde169442775 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -54,7 +54,9 @@ struct pnv_ioda_pe {
 	struct iommu_table	tce32_table;
 	phys_addr_t		tce_inval_reg_phys;
 
-	/* XXX TODO: Add support for additional 64-bit iommus */
+	/* 64-bit TCE bypass region */
+	bool			tce_bypass_enabled;
+	uint64_t		tce_bypass_base;
 
 	/* MSIs. MVE index is identical for for 32 and 64 bit MSI
 	 * and -1 if not supported. (It's actually identical to the
@@ -113,6 +115,8 @@ struct pnv_phb {
 			 unsigned int hwirq, unsigned int virq,
 			 unsigned int is_64, struct msi_msg *msg);
 	void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
+	int (*dma_set_mask)(struct pnv_phb *phb, struct pci_dev *pdev,
+			    u64 dma_mask);
 	void (*fixup_phb)(struct pci_controller *hose);
 	u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
 	void (*shutdown)(struct pnv_phb *phb);
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index de6819be1f95..0051e108ef0f 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -7,12 +7,20 @@ extern void pnv_smp_init(void);
 static inline void pnv_smp_init(void) { }
 #endif
 
+struct pci_dev;
+
 #ifdef CONFIG_PCI
 extern void pnv_pci_init(void);
 extern void pnv_pci_shutdown(void);
+extern int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask);
 #else
 static inline void pnv_pci_init(void) { }
 static inline void pnv_pci_shutdown(void) { }
+
+static inline int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
+{
+	return -ENODEV;
+}
 #endif
 
 extern void pnv_lpc_init(void);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 21166f65c97c..110f4fbd319f 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -27,6 +27,7 @@
 #include <linux/interrupt.h>
 #include <linux/bug.h>
 #include <linux/cpuidle.h>
+#include <linux/pci.h>
 
 #include <asm/machdep.h>
 #include <asm/firmware.h>
@@ -141,6 +142,13 @@ static void pnv_progress(char *s, unsigned short hex)
 {
 }
 
+static int pnv_dma_set_mask(struct device *dev, u64 dma_mask)
+{
+	if (dev_is_pci(dev))
+		return pnv_pci_dma_set_mask(to_pci_dev(dev), dma_mask);
+	return __dma_set_mask(dev, dma_mask);
+}
+
 static void pnv_shutdown(void)
 {
 	/* Let the PCI code clear up IODA tables */
@@ -238,6 +246,7 @@ define_machine(powernv) {
 	.machine_shutdown	= pnv_shutdown,
 	.power_save             = powernv_idle,
 	.calibrate_decr		= generic_calibrate_decr,
+	.dma_set_mask		= pnv_dma_set_mask,
 #ifdef CONFIG_KEXEC
 	.kexec_cpu_down		= pnv_kexec_cpu_down,
 #endif
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 37300f6ee244..80b1d57c306a 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -20,6 +20,7 @@ config PPC_PSERIES
 	select PPC_DOORBELL
 	select HAVE_CONTEXT_TRACKING
 	select HOTPLUG_CPU if SMP
+	select ARCH_RANDOM
 	default y
 
 config PPC_SPLPAR
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 9ef3cc8ebc11..8a8f0472d98f 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -265,7 +265,7 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
 			enable = 1;
 
 		if (enable) {
-			eeh_subsystem_enabled = 1;
+			eeh_set_enable(true);
 			eeh_add_to_parent_pe(edev);
 
 			pr_debug("%s: EEH enabled on %s PHB#%d-PE#%x, config addr#%x\n",
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 82789e79e539..0ea99e3d4815 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -35,12 +35,7 @@
 #include "offline_states.h"
 
 /* This version can't take the spinlock, because it never returns */
-static struct rtas_args rtas_stop_self_args = {
-	.token = RTAS_UNKNOWN_SERVICE,
-	.nargs = 0,
-	.nret = 1,
-	.rets = &rtas_stop_self_args.args[0],
-};
+static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE;
 
 static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) =
 							CPU_STATE_OFFLINE;
@@ -93,15 +88,20 @@ void set_default_offline_state(int cpu)
 
 static void rtas_stop_self(void)
 {
-	struct rtas_args *args = &rtas_stop_self_args;
+	struct rtas_args args = {
+		.token = cpu_to_be32(rtas_stop_self_token),
+		.nargs = 0,
+		.nret = 1,
+		.rets = &args.args[0],
+	};
 
 	local_irq_disable();
 
-	BUG_ON(args->token == RTAS_UNKNOWN_SERVICE);
+	BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE);
 
 	printk("cpu %u (hwid %u) Ready to die...\n",
 	       smp_processor_id(), hard_smp_processor_id());
-	enter_rtas(__pa(args));
+	enter_rtas(__pa(&args));
 
 	panic("Alas, I survived.\n");
 }
@@ -392,10 +392,10 @@ static int __init pseries_cpu_hotplug_init(void)
 		}
 	}
 
-	rtas_stop_self_args.token = rtas_token("stop-self");
+	rtas_stop_self_token = rtas_token("stop-self");
 	qcss_tok = rtas_token("query-cpu-stopped-state");
 
-	if (rtas_stop_self_args.token == RTAS_UNKNOWN_SERVICE ||
+	if (rtas_stop_self_token == RTAS_UNKNOWN_SERVICE ||
 			qcss_tok == RTAS_UNKNOWN_SERVICE) {
 		printk(KERN_INFO "CPU Hotplug not supported by firmware "
 				"- disabling.\n");
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 70670a2d9cf2..c413ec158ff5 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -113,7 +113,8 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
 {
 	struct device_node *dn, *pdn;
 	struct pci_bus *bus;
-	const __be32 *pcie_link_speed_stats;
+	u32 pcie_link_speed_stats[2];
+	int rc;
 
 	bus = bridge->bus;
 
@@ -122,38 +123,45 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
 		return 0;
 
 	for (pdn = dn; pdn != NULL; pdn = of_get_next_parent(pdn)) {
-		pcie_link_speed_stats = of_get_property(pdn,
-			"ibm,pcie-link-speed-stats", NULL);
-		if (pcie_link_speed_stats)
+		rc = of_property_read_u32_array(pdn,
+				"ibm,pcie-link-speed-stats",
+				&pcie_link_speed_stats[0], 2);
+		if (!rc)
 			break;
 	}
 
 	of_node_put(pdn);
 
-	if (!pcie_link_speed_stats) {
+	if (rc) {
 		pr_err("no ibm,pcie-link-speed-stats property\n");
 		return 0;
 	}
 
-	switch (be32_to_cpup(pcie_link_speed_stats)) {
+	switch (pcie_link_speed_stats[0]) {
 	case 0x01:
 		bus->max_bus_speed = PCIE_SPEED_2_5GT;
 		break;
 	case 0x02:
 		bus->max_bus_speed = PCIE_SPEED_5_0GT;
 		break;
+	case 0x04:
+		bus->max_bus_speed = PCIE_SPEED_8_0GT;
+		break;
 	default:
 		bus->max_bus_speed = PCI_SPEED_UNKNOWN;
 		break;
 	}
 
-	switch (be32_to_cpup(pcie_link_speed_stats)) {
+	switch (pcie_link_speed_stats[1]) {
 	case 0x01:
 		bus->cur_bus_speed = PCIE_SPEED_2_5GT;
 		break;
 	case 0x02:
 		bus->cur_bus_speed = PCIE_SPEED_5_0GT;
 		break;
+	case 0x04:
+		bus->cur_bus_speed = PCIE_SPEED_8_0GT;
+		break;
 	default:
 		bus->cur_bus_speed = PCI_SPEED_UNKNOWN;
 		break;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 8e639d7cbda7..972df0ffd4dc 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -430,8 +430,7 @@ static void pSeries_machine_kexec(struct kimage *image)
 {
 	long rc;
 
-	if (firmware_has_feature(FW_FEATURE_SET_MODE) &&
-	    (image->type != KEXEC_TYPE_CRASH)) {
+	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
 		rc = pSeries_disable_reloc_on_exc();
 		if (rc != H_SUCCESS)
 			pr_warning("Warning: Failed to disable relocation on "
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 0e166ed4cd16..8209744b2829 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -886,25 +886,25 @@ int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type)
 
 	/* Default: read HW settings */
 	if (flow_type == IRQ_TYPE_DEFAULT) {
-		switch(vold & (MPIC_INFO(VECPRI_POLARITY_MASK) |
-			       MPIC_INFO(VECPRI_SENSE_MASK))) {
-			case MPIC_INFO(VECPRI_SENSE_EDGE) |
-			     MPIC_INFO(VECPRI_POLARITY_POSITIVE):
-				flow_type = IRQ_TYPE_EDGE_RISING;
-				break;
-			case MPIC_INFO(VECPRI_SENSE_EDGE) |
-			     MPIC_INFO(VECPRI_POLARITY_NEGATIVE):
-				flow_type = IRQ_TYPE_EDGE_FALLING;
-				break;
-			case MPIC_INFO(VECPRI_SENSE_LEVEL) |
-			     MPIC_INFO(VECPRI_POLARITY_POSITIVE):
-				flow_type = IRQ_TYPE_LEVEL_HIGH;
-				break;
-			case MPIC_INFO(VECPRI_SENSE_LEVEL) |
-			     MPIC_INFO(VECPRI_POLARITY_NEGATIVE):
-				flow_type = IRQ_TYPE_LEVEL_LOW;
-				break;
-		}
+		int vold_ps;
+
+		vold_ps = vold & (MPIC_INFO(VECPRI_POLARITY_MASK) |
+				  MPIC_INFO(VECPRI_SENSE_MASK));
+
+		if (vold_ps == (MPIC_INFO(VECPRI_SENSE_EDGE) |
+				MPIC_INFO(VECPRI_POLARITY_POSITIVE)))
+			flow_type = IRQ_TYPE_EDGE_RISING;
+		else if	(vold_ps == (MPIC_INFO(VECPRI_SENSE_EDGE) |
+				     MPIC_INFO(VECPRI_POLARITY_NEGATIVE)))
+			flow_type = IRQ_TYPE_EDGE_FALLING;
+		else if (vold_ps == (MPIC_INFO(VECPRI_SENSE_LEVEL) |
+				     MPIC_INFO(VECPRI_POLARITY_POSITIVE)))
+			flow_type = IRQ_TYPE_LEVEL_HIGH;
+		else if (vold_ps == (MPIC_INFO(VECPRI_SENSE_LEVEL) |
+				     MPIC_INFO(VECPRI_POLARITY_NEGATIVE)))
+			flow_type = IRQ_TYPE_LEVEL_LOW;
+		else
+			WARN_ONCE(1, "mpic: unknown IRQ type %d\n", vold);
 	}
 
 	/* Apply to irq desc */
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index a90731b3d44a..b07909850f77 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -309,16 +309,23 @@ static void get_output_lock(void)
 
 	if (xmon_speaker == me)
 		return;
+
 	for (;;) {
-		if (xmon_speaker == 0) {
-			last_speaker = cmpxchg(&xmon_speaker, 0, me);
-			if (last_speaker == 0)
-				return;
-		}
-		timeout = 10000000;
+		last_speaker = cmpxchg(&xmon_speaker, 0, me);
+		if (last_speaker == 0)
+			return;
+
+		/*
+		 * Wait a full second for the lock, we might be on a slow
+		 * console, but check every 100us.
+		 */
+		timeout = 10000;
 		while (xmon_speaker == last_speaker) {
-			if (--timeout > 0)
+			if (--timeout > 0) {
+				udelay(100);
 				continue;
+			}
+
 			/* hostile takeover */
 			prev = cmpxchg(&xmon_speaker, last_speaker, me);
 			if (prev == last_speaker)
@@ -397,7 +404,6 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 	}
 
 	xmon_fault_jmp[cpu] = recurse_jmp;
-	cpumask_set_cpu(cpu, &cpus_in_xmon);
 
 	bp = NULL;
 	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT))
@@ -419,6 +425,8 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 		release_output_lock();
 	}
 
+	cpumask_set_cpu(cpu, &cpus_in_xmon);
+
  waiting:
 	secondary = 1;
 	while (secondary && !xmon_gate) {
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index 4c4a1cef5208..47c8630c93cd 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -529,6 +529,7 @@ static int __init appldata_init(void)
 {
 	int rc;
 
+	init_virt_timer(&appldata_timer);
 	appldata_timer.function = appldata_timer_function;
 	appldata_timer.data = (unsigned long) &appldata_work;
 
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index b3feabd39f31..cf3c0089bef2 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -25,6 +25,7 @@
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/spinlock.h>
 #include "crypt_s390.h"
 
 #define AES_KEYLEN_128		1
@@ -32,6 +33,7 @@
 #define AES_KEYLEN_256		4
 
 static u8 *ctrblk;
+static DEFINE_SPINLOCK(ctrblk_lock);
 static char keylen_flag;
 
 struct s390_aes_ctx {
@@ -758,43 +760,67 @@ static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	return aes_set_key(tfm, in_key, key_len);
 }
 
+static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes)
+{
+	unsigned int i, n;
+
+	/* only use complete blocks, max. PAGE_SIZE */
+	n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1);
+	for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) {
+		memcpy(ctrptr + i, ctrptr + i - AES_BLOCK_SIZE,
+		       AES_BLOCK_SIZE);
+		crypto_inc(ctrptr + i, AES_BLOCK_SIZE);
+	}
+	return n;
+}
+
 static int ctr_aes_crypt(struct blkcipher_desc *desc, long func,
 			 struct s390_aes_ctx *sctx, struct blkcipher_walk *walk)
 {
 	int ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
-	unsigned int i, n, nbytes;
-	u8 buf[AES_BLOCK_SIZE];
-	u8 *out, *in;
+	unsigned int n, nbytes;
+	u8 buf[AES_BLOCK_SIZE], ctrbuf[AES_BLOCK_SIZE];
+	u8 *out, *in, *ctrptr = ctrbuf;
 
 	if (!walk->nbytes)
 		return ret;
 
-	memcpy(ctrblk, walk->iv, AES_BLOCK_SIZE);
+	if (spin_trylock(&ctrblk_lock))
+		ctrptr = ctrblk;
+
+	memcpy(ctrptr, walk->iv, AES_BLOCK_SIZE);
 	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
 		out = walk->dst.virt.addr;
 		in = walk->src.virt.addr;
 		while (nbytes >= AES_BLOCK_SIZE) {
-			/* only use complete blocks, max. PAGE_SIZE */
-			n = (nbytes > PAGE_SIZE) ? PAGE_SIZE :
-						 nbytes & ~(AES_BLOCK_SIZE - 1);
-			for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) {
-				memcpy(ctrblk + i, ctrblk + i - AES_BLOCK_SIZE,
-				       AES_BLOCK_SIZE);
-				crypto_inc(ctrblk + i, AES_BLOCK_SIZE);
-			}
-			ret = crypt_s390_kmctr(func, sctx->key, out, in, n, ctrblk);
-			if (ret < 0 || ret != n)
+			if (ctrptr == ctrblk)
+				n = __ctrblk_init(ctrptr, nbytes);
+			else
+				n = AES_BLOCK_SIZE;
+			ret = crypt_s390_kmctr(func, sctx->key, out, in,
+					       n, ctrptr);
+			if (ret < 0 || ret != n) {
+				if (ctrptr == ctrblk)
+					spin_unlock(&ctrblk_lock);
 				return -EIO;
+			}
 			if (n > AES_BLOCK_SIZE)
-				memcpy(ctrblk, ctrblk + n - AES_BLOCK_SIZE,
+				memcpy(ctrptr, ctrptr + n - AES_BLOCK_SIZE,
 				       AES_BLOCK_SIZE);
-			crypto_inc(ctrblk, AES_BLOCK_SIZE);
+			crypto_inc(ctrptr, AES_BLOCK_SIZE);
 			out += n;
 			in += n;
 			nbytes -= n;
 		}
 		ret = blkcipher_walk_done(desc, walk, nbytes);
 	}
+	if (ctrptr == ctrblk) {
+		if (nbytes)
+			memcpy(ctrbuf, ctrptr, AES_BLOCK_SIZE);
+		else
+			memcpy(walk->iv, ctrptr, AES_BLOCK_SIZE);
+		spin_unlock(&ctrblk_lock);
+	}
 	/*
 	 * final block may be < AES_BLOCK_SIZE, copy only nbytes
 	 */
@@ -802,14 +828,15 @@ static int ctr_aes_crypt(struct blkcipher_desc *desc, long func,
 		out = walk->dst.virt.addr;
 		in = walk->src.virt.addr;
 		ret = crypt_s390_kmctr(func, sctx->key, buf, in,
-				       AES_BLOCK_SIZE, ctrblk);
+				       AES_BLOCK_SIZE, ctrbuf);
 		if (ret < 0 || ret != AES_BLOCK_SIZE)
 			return -EIO;
 		memcpy(out, buf, nbytes);
-		crypto_inc(ctrblk, AES_BLOCK_SIZE);
+		crypto_inc(ctrbuf, AES_BLOCK_SIZE);
 		ret = blkcipher_walk_done(desc, walk, 0);
+		memcpy(walk->iv, ctrbuf, AES_BLOCK_SIZE);
 	}
-	memcpy(walk->iv, ctrblk, AES_BLOCK_SIZE);
+
 	return ret;
 }
 
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 200f2a1b599d..0a5aac8a9412 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -25,6 +25,7 @@
 #define DES3_KEY_SIZE	(3 * DES_KEY_SIZE)
 
 static u8 *ctrblk;
+static DEFINE_SPINLOCK(ctrblk_lock);
 
 struct s390_des_ctx {
 	u8 iv[DES_BLOCK_SIZE];
@@ -105,29 +106,35 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
 }
 
 static int cbc_desall_crypt(struct blkcipher_desc *desc, long func,
-			    u8 *iv, struct blkcipher_walk *walk)
+			    struct blkcipher_walk *walk)
 {
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	int ret = blkcipher_walk_virt(desc, walk);
 	unsigned int nbytes = walk->nbytes;
+	struct {
+		u8 iv[DES_BLOCK_SIZE];
+		u8 key[DES3_KEY_SIZE];
+	} param;
 
 	if (!nbytes)
 		goto out;
 
-	memcpy(iv, walk->iv, DES_BLOCK_SIZE);
+	memcpy(param.iv, walk->iv, DES_BLOCK_SIZE);
+	memcpy(param.key, ctx->key, DES3_KEY_SIZE);
 	do {
 		/* only use complete blocks */
 		unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1);
 		u8 *out = walk->dst.virt.addr;
 		u8 *in = walk->src.virt.addr;
 
-		ret = crypt_s390_kmc(func, iv, out, in, n);
+		ret = crypt_s390_kmc(func, &param, out, in, n);
 		if (ret < 0 || ret != n)
 			return -EIO;
 
 		nbytes &= DES_BLOCK_SIZE - 1;
 		ret = blkcipher_walk_done(desc, walk, nbytes);
 	} while ((nbytes = walk->nbytes));
-	memcpy(walk->iv, iv, DES_BLOCK_SIZE);
+	memcpy(walk->iv, param.iv, DES_BLOCK_SIZE);
 
 out:
 	return ret;
@@ -179,22 +186,20 @@ static int cbc_des_encrypt(struct blkcipher_desc *desc,
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, ctx->iv, &walk);
+	return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, &walk);
 }
 
 static int cbc_des_decrypt(struct blkcipher_desc *desc,
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, ctx->iv, &walk);
+	return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, &walk);
 }
 
 static struct crypto_alg cbc_des_alg = {
@@ -327,22 +332,20 @@ static int cbc_des3_encrypt(struct blkcipher_desc *desc,
 			    struct scatterlist *dst, struct scatterlist *src,
 			    unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, ctx->iv, &walk);
+	return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, &walk);
 }
 
 static int cbc_des3_decrypt(struct blkcipher_desc *desc,
 			    struct scatterlist *dst, struct scatterlist *src,
 			    unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, ctx->iv, &walk);
+	return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, &walk);
 }
 
 static struct crypto_alg cbc_des3_alg = {
@@ -366,54 +369,80 @@ static struct crypto_alg cbc_des3_alg = {
 	}
 };
 
+static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes)
+{
+	unsigned int i, n;
+
+	/* align to block size, max. PAGE_SIZE */
+	n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(DES_BLOCK_SIZE - 1);
+	for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) {
+		memcpy(ctrptr + i, ctrptr + i - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
+		crypto_inc(ctrptr + i, DES_BLOCK_SIZE);
+	}
+	return n;
+}
+
 static int ctr_desall_crypt(struct blkcipher_desc *desc, long func,
-			    struct s390_des_ctx *ctx, struct blkcipher_walk *walk)
+			    struct s390_des_ctx *ctx,
+			    struct blkcipher_walk *walk)
 {
 	int ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE);
-	unsigned int i, n, nbytes;
-	u8 buf[DES_BLOCK_SIZE];
-	u8 *out, *in;
+	unsigned int n, nbytes;
+	u8 buf[DES_BLOCK_SIZE], ctrbuf[DES_BLOCK_SIZE];
+	u8 *out, *in, *ctrptr = ctrbuf;
+
+	if (!walk->nbytes)
+		return ret;
 
-	memcpy(ctrblk, walk->iv, DES_BLOCK_SIZE);
+	if (spin_trylock(&ctrblk_lock))
+		ctrptr = ctrblk;
+
+	memcpy(ctrptr, walk->iv, DES_BLOCK_SIZE);
 	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
 		out = walk->dst.virt.addr;
 		in = walk->src.virt.addr;
 		while (nbytes >= DES_BLOCK_SIZE) {
-			/* align to block size, max. PAGE_SIZE */
-			n = (nbytes > PAGE_SIZE) ? PAGE_SIZE :
-				nbytes & ~(DES_BLOCK_SIZE - 1);
-			for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) {
-				memcpy(ctrblk + i, ctrblk + i - DES_BLOCK_SIZE,
-				       DES_BLOCK_SIZE);
-				crypto_inc(ctrblk + i, DES_BLOCK_SIZE);
-			}
-			ret = crypt_s390_kmctr(func, ctx->key, out, in, n, ctrblk);
-			if (ret < 0 || ret != n)
+			if (ctrptr == ctrblk)
+				n = __ctrblk_init(ctrptr, nbytes);
+			else
+				n = DES_BLOCK_SIZE;
+			ret = crypt_s390_kmctr(func, ctx->key, out, in,
+					       n, ctrptr);
+			if (ret < 0 || ret != n) {
+				if (ctrptr == ctrblk)
+					spin_unlock(&ctrblk_lock);
 				return -EIO;
+			}
 			if (n > DES_BLOCK_SIZE)
-				memcpy(ctrblk, ctrblk + n - DES_BLOCK_SIZE,
+				memcpy(ctrptr, ctrptr + n - DES_BLOCK_SIZE,
 				       DES_BLOCK_SIZE);
-			crypto_inc(ctrblk, DES_BLOCK_SIZE);
+			crypto_inc(ctrptr, DES_BLOCK_SIZE);
 			out += n;
 			in += n;
 			nbytes -= n;
 		}
 		ret = blkcipher_walk_done(desc, walk, nbytes);
 	}
-
+	if (ctrptr == ctrblk) {
+		if (nbytes)
+			memcpy(ctrbuf, ctrptr, DES_BLOCK_SIZE);
+		else
+			memcpy(walk->iv, ctrptr, DES_BLOCK_SIZE);
+		spin_unlock(&ctrblk_lock);
+	}
 	/* final block may be < DES_BLOCK_SIZE, copy only nbytes */
 	if (nbytes) {
 		out = walk->dst.virt.addr;
 		in = walk->src.virt.addr;
 		ret = crypt_s390_kmctr(func, ctx->key, buf, in,
-				       DES_BLOCK_SIZE, ctrblk);
+				       DES_BLOCK_SIZE, ctrbuf);
 		if (ret < 0 || ret != DES_BLOCK_SIZE)
 			return -EIO;
 		memcpy(out, buf, nbytes);
-		crypto_inc(ctrblk, DES_BLOCK_SIZE);
+		crypto_inc(ctrbuf, DES_BLOCK_SIZE);
 		ret = blkcipher_walk_done(desc, walk, 0);
+		memcpy(walk->iv, ctrbuf, DES_BLOCK_SIZE);
 	}
-	memcpy(walk->iv, ctrblk, DES_BLOCK_SIZE);
 	return ret;
 }
 
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 59c8efce1b99..0248949a756d 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1421,5 +1421,5 @@ ENTRY(sys_sched_setattr_wrapper)
 ENTRY(sys_sched_getattr_wrapper)
 	lgfr	%r2,%r2			# pid_t
 	llgtr	%r3,%r3			# const char __user *
-	llgfr	%r3,%r3			# unsigned int
+	llgfr	%r4,%r4			# unsigned int
 	jg	sys_sched_getattr
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index b9e25ae2579c..d7c00507568a 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -59,7 +59,7 @@ ENTRY(startup_continue)
 	.quad	0			# cr12: tracing off
 	.quad	0			# cr13: home space segment table
 	.quad	0xc0000000		# cr14: machine check handling off
-	.quad	0			# cr15: linkage stack operations
+	.quad	.Llinkage_stack		# cr15: linkage stack operations
 .Lpcmsk:.quad	0x0000000180000000
 .L4malign:.quad 0xffffffffffc00000
 .Lscan2g:.quad	0x80000000 + 0x20000 - 8	# 2GB + 128K - 8
@@ -67,12 +67,15 @@ ENTRY(startup_continue)
 .Lparmaddr:
 	.quad	PARMAREA
 	.align	64
-.Lduct: .long	0,0,0,0,.Lduald,0,0,0
+.Lduct: .long	0,.Laste,.Laste,0,.Lduald,0,0,0
 	.long	0,0,0,0,0,0,0,0
+.Laste:	.quad	0,0xffffffffffffffff,0,0,0,0,0,0
 	.align	128
 .Lduald:.rept	8
 	.long	0x80000000,0,0,0	# invalid access-list entries
 	.endr
+.Llinkage_stack:
+	.long	0,0,0x89000000,0,0,0,0x8a000000,0
 
 ENTRY(_ehead)
 
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index a90d45e9dfb0..27c50f4d90cb 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -12,6 +12,8 @@
 #include <linux/mm.h>
 #include <linux/gfp.h>
 #include <linux/init.h>
+#include <asm/setup.h>
+#include <asm/ipl.h>
 
 #define ESSA_SET_STABLE		1
 #define ESSA_SET_UNUSED		2
@@ -41,6 +43,14 @@ void __init cmma_init(void)
 
 	if (!cmma_flag)
 		return;
+	/*
+	 * Disable CMM for dump, otherwise  the tprot based memory
+	 * detection can fail because of unstable pages.
+	 */
+	if (OLDMEM_BASE || ipl_info.type == IPL_TYPE_FCP_DUMP) {
+		cmma_flag = 0;
+		return;
+	}
 	asm volatile(
 		"       .insn rrf,0xb9ab0000,%1,%1,0,0\n"
 		"0:     la      %0,0\n"
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 60c11a629d96..f91c03119804 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -206,11 +206,13 @@ static void dma_cleanup_tables(struct zpci_dev *zdev)
 	zdev->dma_table = NULL;
 }
 
-static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev, unsigned long start,
-				   int size)
+static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev,
+				       unsigned long start, int size)
 {
-	unsigned long boundary_size = 0x1000000;
+	unsigned long boundary_size;
 
+	boundary_size = ALIGN(dma_get_seg_boundary(&zdev->pdev->dev) + 1,
+			      PAGE_SIZE) >> PAGE_SHIFT;
 	return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
 				start, size, 0, boundary_size, 0);
 }
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index c51efdcd07a2..7d8b7e94b93b 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -27,7 +27,7 @@ config SPARC
 	select RTC_DRV_M48T59
 	select HAVE_DMA_ATTRS
 	select HAVE_DMA_API_DEBUG
-	select HAVE_ARCH_JUMP_LABEL
+	select HAVE_ARCH_JUMP_LABEL if SPARC64
 	select GENERIC_IRQ_SHOW
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select GENERIC_PCI_IOMAP
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index 869023abe5a4..cfbe53c17b0d 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -14,6 +14,7 @@
 #include <linux/pagemap.h>
 #include <linux/vmalloc.h>
 #include <linux/kdebug.h>
+#include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/log2.h>
@@ -62,6 +63,7 @@ extern unsigned long last_valid_pfn;
 static pgd_t *srmmu_swapper_pg_dir;
 
 const struct sparc32_cachetlb_ops *sparc32_cachetlb_ops;
+EXPORT_SYMBOL(sparc32_cachetlb_ops);
 
 #ifdef CONFIG_SMP
 const struct sparc32_cachetlb_ops *local_ops;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 940e50ebfafa..0af5250d914f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -444,6 +444,7 @@ config X86_INTEL_MID
 	bool "Intel MID platform support"
 	depends on X86_32
 	depends on X86_EXTENDED_PLATFORM
+	depends on X86_PLATFORM_DEVICES
 	depends on PCI
 	depends on PCI_GOANY
 	depends on X86_IO_APIC
@@ -1051,9 +1052,9 @@ config MICROCODE_INTEL
 	  This options enables microcode patch loading support for Intel
 	  processors.
 
-	  For latest news and information on obtaining all the required
-	  Intel ingredients for this driver, check:
-	  <http://www.urbanmyth.org/microcode/>.
+	  For the current Intel microcode data package go to
+	  <https://downloadcenter.intel.com> and search for
+	  'Linux Processor Microcode Data File'.
 
 config MICROCODE_AMD
 	bool "AMD microcode loading support"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 0f3621ed1db6..321a52ccf63a 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -184,6 +184,7 @@ config HAVE_MMIOTRACE_SUPPORT
 config X86_DECODER_SELFTEST
 	bool "x86 instruction decoder selftest"
 	depends on DEBUG_KERNEL && KPROBES
+	depends on !COMPILE_TEST
 	---help---
 	 Perform x86 instruction decoder selftests at build time.
 	 This option is useful for checking the sanity of x86 instruction
diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index 90a21f430117..4dbf967da50d 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -111,7 +111,7 @@ struct mem_vector {
 };
 
 #define MEM_AVOID_MAX 5
-struct mem_vector mem_avoid[MEM_AVOID_MAX];
+static struct mem_vector mem_avoid[MEM_AVOID_MAX];
 
 static bool mem_contains(struct mem_vector *region, struct mem_vector *item)
 {
@@ -180,7 +180,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
 }
 
 /* Does this memory vector overlap a known avoided area? */
-bool mem_avoid_overlap(struct mem_vector *img)
+static bool mem_avoid_overlap(struct mem_vector *img)
 {
 	int i;
 
@@ -192,8 +192,9 @@ bool mem_avoid_overlap(struct mem_vector *img)
 	return false;
 }
 
-unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET / CONFIG_PHYSICAL_ALIGN];
-unsigned long slot_max = 0;
+static unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET /
+			   CONFIG_PHYSICAL_ALIGN];
+static unsigned long slot_max;
 
 static void slots_append(unsigned long addr)
 {
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index a54ee1d054d9..aaac3b2fb746 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -19,7 +19,7 @@ extern int amd_cache_northbridges(void);
 extern void amd_flush_garts(void);
 extern int amd_numa_init(void);
 extern int amd_get_subcaches(int);
-extern int amd_set_subcaches(int, int);
+extern int amd_set_subcaches(int, unsigned long);
 
 struct amd_l3_cache {
 	unsigned indices;
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 3b978c472d08..3d6b9f81cc68 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -132,6 +132,8 @@ extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
 extern void efi_sync_low_kernel_mappings(void);
 extern void efi_setup_page_tables(void);
 extern void __init old_map_region(efi_memory_desc_t *md);
+extern void __init runtime_code_page_mkexec(void);
+extern void __init efi_runtime_mkexec(void);
 
 struct efi_setup_data {
 	u64 fw_vendor;
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index bbc8b12fa443..5ad38ad07890 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -445,10 +445,20 @@ static inline int pte_same(pte_t a, pte_t b)
 	return a.pte == b.pte;
 }
 
+static inline int pteval_present(pteval_t pteval)
+{
+	/*
+	 * Yes Linus, _PAGE_PROTNONE == _PAGE_NUMA. Expressing it this
+	 * way clearly states that the intent is that protnone and numa
+	 * hinting ptes are considered present for the purposes of
+	 * pagetable operations like zapping, protection changes, gup etc.
+	 */
+	return pteval & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_NUMA);
+}
+
 static inline int pte_present(pte_t a)
 {
-	return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE |
-			       _PAGE_NUMA);
+	return pteval_present(pte_flags(a));
 }
 
 #define pte_accessible pte_accessible
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index e6d90babc245..04905bfc508b 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -62,7 +62,7 @@ static inline void __flush_tlb_all(void)
 
 static inline void __flush_tlb_one(unsigned long addr)
 {
-	count_vm_event(NR_TLB_LOCAL_FLUSH_ONE);
+	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
 	__flush_tlb_single(addr);
 }
 
@@ -93,13 +93,13 @@ static inline void __flush_tlb_one(unsigned long addr)
  */
 static inline void __flush_tlb_up(void)
 {
-	count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
+	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
 	__flush_tlb();
 }
 
 static inline void flush_tlb_all(void)
 {
-	count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
+	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
 	__flush_tlb_all();
 }
 
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 57ae63cd6ee2..94605c0e9cee 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -66,6 +66,6 @@ extern void tsc_save_sched_clock_state(void);
 extern void tsc_restore_sched_clock_state(void);
 
 /* MSR based TSC calibration for Intel Atom SoC platforms */
-int try_msr_calibrate_tsc(unsigned long *fast_calibrate);
+unsigned long try_msr_calibrate_tsc(void);
 
 #endif /* _ASM_X86_TSC_H */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 787e1bb5aafc..3e276eb23d1b 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -52,8 +52,7 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s,
 extern int m2p_add_override(unsigned long mfn, struct page *page,
 			    struct gnttab_map_grant_ref *kmap_op);
 extern int m2p_remove_override(struct page *page,
-			       struct gnttab_map_grant_ref *kmap_op,
-			       unsigned long mfn);
+				struct gnttab_map_grant_ref *kmap_op);
 extern struct page *m2p_find_override(unsigned long mfn);
 extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
 
@@ -122,7 +121,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
 		pfn = m2p_find_override_pfn(mfn, ~0);
 	}
 
-	/*
+	/* 
 	 * pfn is ~0 if there are no entries in the m2p for mfn or if the
 	 * entry doesn't map back to the mfn and m2p_override doesn't have a
 	 * valid entry for it.
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 59554dca96ec..dec8de4e1663 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -179,7 +179,7 @@ int amd_get_subcaches(int cpu)
 	return (mask >> (4 * cuid)) & 0xf;
 }
 
-int amd_set_subcaches(int cpu, int mask)
+int amd_set_subcaches(int cpu, unsigned long mask)
 {
 	static unsigned int reset, ban;
 	struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu));
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index d3153e281d72..c67ffa686064 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -767,10 +767,7 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
 
 static void cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
 {
-	tlb_flushall_shift = 5;
-
-	if (c->x86 <= 0x11)
-		tlb_flushall_shift = 4;
+	tlb_flushall_shift = 6;
 }
 
 static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 24b6fd10625a..8e28bf2fc3ef 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -284,8 +284,13 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
 	raw_local_save_flags(eflags);
 	BUG_ON(eflags & X86_EFLAGS_AC);
 
-	if (cpu_has(c, X86_FEATURE_SMAP))
+	if (cpu_has(c, X86_FEATURE_SMAP)) {
+#ifdef CONFIG_X86_SMAP
 		set_in_cr4(X86_CR4_SMAP);
+#else
+		clear_in_cr4(X86_CR4_SMAP);
+#endif
+	}
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 3db61c644e44..5cd9bfabd645 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -640,21 +640,17 @@ static void intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
 	case 0x61d: /* six-core 45 nm xeon "Dunnington" */
 		tlb_flushall_shift = -1;
 		break;
+	case 0x63a: /* Ivybridge */
+		tlb_flushall_shift = 2;
+		break;
 	case 0x61a: /* 45 nm nehalem, "Bloomfield" */
 	case 0x61e: /* 45 nm nehalem, "Lynnfield" */
 	case 0x625: /* 32 nm nehalem, "Clarkdale" */
 	case 0x62c: /* 32 nm nehalem, "Gulftown" */
 	case 0x62e: /* 45 nm nehalem-ex, "Beckton" */
 	case 0x62f: /* 32 nm Xeon E7 */
-		tlb_flushall_shift = 6;
-		break;
 	case 0x62a: /* SandyBridge */
 	case 0x62d: /* SandyBridge, "Romely-EP" */
-		tlb_flushall_shift = 5;
-		break;
-	case 0x63a: /* Ivybridge */
-		tlb_flushall_shift = 1;
-		break;
 	default:
 		tlb_flushall_shift = 6;
 	}
diff --git a/arch/x86/kernel/cpu/microcode/amd_early.c b/arch/x86/kernel/cpu/microcode/amd_early.c
index 8384c0fa206f..617a9e284245 100644
--- a/arch/x86/kernel/cpu/microcode/amd_early.c
+++ b/arch/x86/kernel/cpu/microcode/amd_early.c
@@ -285,6 +285,15 @@ static void __init collect_cpu_sig_on_bsp(void *arg)
 
 	uci->cpu_sig.sig = cpuid_eax(0x00000001);
 }
+
+static void __init get_bsp_sig(void)
+{
+	unsigned int bsp = boot_cpu_data.cpu_index;
+	struct ucode_cpu_info *uci = ucode_cpu_info + bsp;
+
+	if (!uci->cpu_sig.sig)
+		smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1);
+}
 #else
 void load_ucode_amd_ap(void)
 {
@@ -337,31 +346,37 @@ void load_ucode_amd_ap(void)
 
 int __init save_microcode_in_initrd_amd(void)
 {
+	unsigned long cont;
 	enum ucode_state ret;
 	u32 eax;
 
-#ifdef CONFIG_X86_32
-	unsigned int bsp = boot_cpu_data.cpu_index;
-	struct ucode_cpu_info *uci = ucode_cpu_info + bsp;
-
-	if (!uci->cpu_sig.sig)
-		smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1);
+	if (!container)
+		return -EINVAL;
 
+#ifdef CONFIG_X86_32
+	get_bsp_sig();
+	cont = (unsigned long)container;
+#else
 	/*
-	 * Take into account the fact that the ramdisk might get relocated
-	 * and therefore we need to recompute the container's position in
-	 * virtual memory space.
+	 * We need the physical address of the container for both bitness since
+	 * boot_params.hdr.ramdisk_image is a physical address.
 	 */
-	container = (u8 *)(__va((u32)relocated_ramdisk) +
-			   ((u32)container - boot_params.hdr.ramdisk_image));
+	cont = __pa(container);
 #endif
+
+	/*
+	 * Take into account the fact that the ramdisk might get relocated and
+	 * therefore we need to recompute the container's position in virtual
+	 * memory space.
+	 */
+	if (relocated_ramdisk)
+		container = (u8 *)(__va(relocated_ramdisk) +
+			     (cont - boot_params.hdr.ramdisk_image));
+
 	if (ucode_new_rev)
 		pr_info("microcode: updated early to new patch_level=0x%08x\n",
 			ucode_new_rev);
 
-	if (!container)
-		return -EINVAL;
-
 	eax   = cpuid_eax(0x00000001);
 	eax   = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
 
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index ce2d0a2c3e4f..0e25a1bc5ab5 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -683,7 +683,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
 	}
 
 	/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
-	count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
+	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
 	__flush_tlb();
 
 	/* Save MTRR state */
@@ -697,7 +697,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
 static void post_set(void) __releases(set_atomicity_lock)
 {
 	/* Flush TLBs (no need to flush caches - they are disabled) */
-	count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
+	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
 	__flush_tlb();
 
 	/* Intel (P6) standard MTRRs */
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index b88645191fe5..79f9f848bee4 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1192,6 +1192,9 @@ static void x86_pmu_del(struct perf_event *event, int flags)
 	for (i = 0; i < cpuc->n_events; i++) {
 		if (event == cpuc->event_list[i]) {
 
+			if (i >= cpuc->n_events - cpuc->n_added)
+				--cpuc->n_added;
+
 			if (x86_pmu.put_event_constraints)
 				x86_pmu.put_event_constraints(cpuc, event);
 
@@ -1521,6 +1524,8 @@ static int __init init_hw_perf_events(void)
 
 	pr_cont("%s PMU driver.\n", x86_pmu.name);
 
+	x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
+
 	for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
 		quirk->func();
 
@@ -1534,7 +1539,6 @@ static int __init init_hw_perf_events(void)
 		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
 				   0, x86_pmu.num_counters, 0, 0);
 
-	x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
 	x86_pmu_format_group.attrs = x86_pmu.format_attrs;
 
 	if (x86_pmu.event_attrs)
@@ -1820,9 +1824,12 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
 	if (ret)
 		return ret;
 
+	if (x86_pmu.attr_rdpmc_broken)
+		return -ENOTSUPP;
+
 	if (!!val != !!x86_pmu.attr_rdpmc) {
 		x86_pmu.attr_rdpmc = !!val;
-		smp_call_function(change_rdpmc, (void *)val, 1);
+		on_each_cpu(change_rdpmc, (void *)val, 1);
 	}
 
 	return count;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index c1a861829d81..4972c244d0bc 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -409,6 +409,7 @@ struct x86_pmu {
 	/*
 	 * sysfs attrs
 	 */
+	int		attr_rdpmc_broken;
 	int		attr_rdpmc;
 	struct attribute **format_attrs;
 	struct attribute **event_attrs;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 0fa4f242f050..aa333d966886 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1361,10 +1361,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 	intel_pmu_disable_all();
 	handled = intel_pmu_drain_bts_buffer();
 	status = intel_pmu_get_status();
-	if (!status) {
-		intel_pmu_enable_all(0);
-		return handled;
-	}
+	if (!status)
+		goto done;
 
 	loops = 0;
 again:
@@ -2310,10 +2308,7 @@ __init int intel_pmu_init(void)
 	if (version > 1)
 		x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
 
-	/*
-	 * v2 and above have a perf capabilities MSR
-	 */
-	if (version > 1) {
+	if (boot_cpu_has(X86_FEATURE_PDCM)) {
 		u64 capabilities;
 
 		rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 29c248799ced..c88f7f4b03ee 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -501,8 +501,11 @@ static struct extra_reg snbep_uncore_cbox_extra_regs[] = {
 	SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
 				  SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
 	SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
+	SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0x6),
 	SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
+	SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0x6),
 	SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
+	SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0x6),
 	SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6),
 	SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8),
 	SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8),
@@ -1178,10 +1181,15 @@ static struct extra_reg ivt_uncore_cbox_extra_regs[] = {
 	SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
 				  SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
 	SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2),
+	SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
+	SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc),
+	SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc),
 	SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
+	SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0xc),
 	SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
+	SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0xc),
 	SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
-	SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc),
+	SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0xc),
 	SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x10),
 	SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10),
 	SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10),
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index b1e2fe115323..7c1a0c07b607 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -231,31 +231,49 @@ static __initconst const struct x86_pmu p6_pmu = {
 
 };
 
+static __init void p6_pmu_rdpmc_quirk(void)
+{
+	if (boot_cpu_data.x86_mask < 9) {
+		/*
+		 * PPro erratum 26; fixed in stepping 9 and above.
+		 */
+		pr_warn("Userspace RDPMC support disabled due to a CPU erratum\n");
+		x86_pmu.attr_rdpmc_broken = 1;
+		x86_pmu.attr_rdpmc = 0;
+	}
+}
+
 __init int p6_pmu_init(void)
 {
+	x86_pmu = p6_pmu;
+
 	switch (boot_cpu_data.x86_model) {
-	case 1:
-	case 3:  /* Pentium Pro */
-	case 5:
-	case 6:  /* Pentium II */
-	case 7:
-	case 8:
-	case 11: /* Pentium III */
-	case 9:
-	case 13:
-		/* Pentium M */
+	case  1: /* Pentium Pro */
+		x86_add_quirk(p6_pmu_rdpmc_quirk);
+		break;
+
+	case  3: /* Pentium II - Klamath */
+	case  5: /* Pentium II - Deschutes */
+	case  6: /* Pentium II - Mendocino */
 		break;
+
+	case  7: /* Pentium III - Katmai */
+	case  8: /* Pentium III - Coppermine */
+	case 10: /* Pentium III Xeon */
+	case 11: /* Pentium III - Tualatin */
+		break;
+
+	case  9: /* Pentium M - Banias */
+	case 13: /* Pentium M - Dothan */
+		break;
+
 	default:
-		pr_cont("unsupported p6 CPU model %d ",
-			boot_cpu_data.x86_model);
+		pr_cont("unsupported p6 CPU model %d ", boot_cpu_data.x86_model);
 		return -ENODEV;
 	}
 
-	x86_pmu = p6_pmu;
-
 	memcpy(hw_cache_event_ids, p6_hw_cache_event_ids,
 		sizeof(hw_cache_event_ids));
 
-
 	return 0;
 }
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index d4bdd253fea7..e6253195a301 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -77,8 +77,7 @@ within(unsigned long addr, unsigned long start, unsigned long end)
 	return addr >= start && addr < end;
 }
 
-static int
-do_ftrace_mod_code(unsigned long ip, const void *new_code)
+static unsigned long text_ip_addr(unsigned long ip)
 {
 	/*
 	 * On x86_64, kernel text mappings are mapped read-only with
@@ -91,7 +90,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code)
 	if (within(ip, (unsigned long)_text, (unsigned long)_etext))
 		ip = (unsigned long)__va(__pa_symbol(ip));
 
-	return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE);
+	return ip;
 }
 
 static const unsigned char *ftrace_nop_replace(void)
@@ -123,8 +122,10 @@ ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
 	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
 		return -EINVAL;
 
+	ip = text_ip_addr(ip);
+
 	/* replace the text with the new text */
-	if (do_ftrace_mod_code(ip, new_code))
+	if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
 		return -EPERM;
 
 	sync_core();
@@ -221,37 +222,51 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
 	return -EINVAL;
 }
 
-int ftrace_update_ftrace_func(ftrace_func_t func)
+static unsigned long ftrace_update_func;
+
+static int update_ftrace_func(unsigned long ip, void *new)
 {
-	unsigned long ip = (unsigned long)(&ftrace_call);
-	unsigned char old[MCOUNT_INSN_SIZE], *new;
+	unsigned char old[MCOUNT_INSN_SIZE];
 	int ret;
 
-	memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
-	new = ftrace_call_replace(ip, (unsigned long)func);
+	memcpy(old, (void *)ip, MCOUNT_INSN_SIZE);
+
+	ftrace_update_func = ip;
+	/* Make sure the breakpoints see the ftrace_update_func update */
+	smp_wmb();
 
 	/* See comment above by declaration of modifying_ftrace_code */
 	atomic_inc(&modifying_ftrace_code);
 
 	ret = ftrace_modify_code(ip, old, new);
 
+	atomic_dec(&modifying_ftrace_code);
+
+	return ret;
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	unsigned long ip = (unsigned long)(&ftrace_call);
+	unsigned char *new;
+	int ret;
+
+	new = ftrace_call_replace(ip, (unsigned long)func);
+	ret = update_ftrace_func(ip, new);
+
 	/* Also update the regs callback function */
 	if (!ret) {
 		ip = (unsigned long)(&ftrace_regs_call);
-		memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE);
 		new = ftrace_call_replace(ip, (unsigned long)func);
-		ret = ftrace_modify_code(ip, old, new);
+		ret = update_ftrace_func(ip, new);
 	}
 
-	atomic_dec(&modifying_ftrace_code);
-
 	return ret;
 }
 
 static int is_ftrace_caller(unsigned long ip)
 {
-	if (ip == (unsigned long)(&ftrace_call) ||
-		ip == (unsigned long)(&ftrace_regs_call))
+	if (ip == ftrace_update_func)
 		return 1;
 
 	return 0;
@@ -677,45 +692,41 @@ int __init ftrace_dyn_arch_init(void *data)
 #ifdef CONFIG_DYNAMIC_FTRACE
 extern void ftrace_graph_call(void);
 
-static int ftrace_mod_jmp(unsigned long ip,
-			  int old_offset, int new_offset)
+static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
 {
-	unsigned char code[MCOUNT_INSN_SIZE];
+	static union ftrace_code_union calc;
 
-	if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE))
-		return -EFAULT;
+	/* Jmp not a call (ignore the .e8) */
+	calc.e8		= 0xe9;
+	calc.offset	= ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
 
-	if (code[0] != 0xe9 || old_offset != *(int *)(&code[1]))
-		return -EINVAL;
+	/*
+	 * ftrace external locks synchronize the access to the static variable.
+	 */
+	return calc.code;
+}
 
-	*(int *)(&code[1]) = new_offset;
+static int ftrace_mod_jmp(unsigned long ip, void *func)
+{
+	unsigned char *new;
 
-	if (do_ftrace_mod_code(ip, &code))
-		return -EPERM;
+	new = ftrace_jmp_replace(ip, (unsigned long)func);
 
-	return 0;
+	return update_ftrace_func(ip, new);
 }
 
 int ftrace_enable_ftrace_graph_caller(void)
 {
 	unsigned long ip = (unsigned long)(&ftrace_graph_call);
-	int old_offset, new_offset;
 
-	old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
-	new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
-
-	return ftrace_mod_jmp(ip, old_offset, new_offset);
+	return ftrace_mod_jmp(ip, &ftrace_graph_caller);
 }
 
 int ftrace_disable_ftrace_graph_caller(void)
 {
 	unsigned long ip = (unsigned long)(&ftrace_graph_call);
-	int old_offset, new_offset;
-
-	old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
-	new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
 
-	return ftrace_mod_jmp(ip, old_offset, new_offset);
+	return ftrace_mod_jmp(ip, &ftrace_stub);
 }
 
 #endif /* !CONFIG_DYNAMIC_FTRACE */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index dbb60878b744..d99f31d9a750 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -266,6 +266,14 @@ __visible void smp_trace_x86_platform_ipi(struct pt_regs *regs)
 EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
 
 #ifdef CONFIG_HOTPLUG_CPU
+
+/* These two declarations are only used in check_irq_vectors_for_cpu_disable()
+ * below, which is protected by stop_machine().  Putting them on the stack
+ * results in a stack frame overflow.  Dynamically allocating could result in a
+ * failure so declare these two cpumasks as global.
+ */
+static struct cpumask affinity_new, online_new;
+
 /*
  * This cpu is going to be removed and its vectors migrated to the remaining
  * online cpus.  Check to see if there are enough vectors in the remaining cpus.
@@ -277,7 +285,6 @@ int check_irq_vectors_for_cpu_disable(void)
 	unsigned int this_cpu, vector, this_count, count;
 	struct irq_desc *desc;
 	struct irq_data *data;
-	struct cpumask affinity_new, online_new;
 
 	this_cpu = smp_processor_id();
 	cpumask_copy(&online_new, cpu_online_mask);
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 4eabc160696f..679cef0791cd 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -279,5 +279,7 @@ void arch_crash_save_vmcoreinfo(void)
 	VMCOREINFO_SYMBOL(node_data);
 	VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
 #endif
+	vmcoreinfo_append_str("KERNELOFFSET=%lx\n",
+			      (unsigned long)&_text - __START_KERNEL);
 }
 
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 872079a67e4d..f7d0672481fd 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -100,8 +100,10 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 	flag |= __GFP_ZERO;
 again:
 	page = NULL;
-	if (!(flag & GFP_ATOMIC))
+	/* CMA can be used only in the context which permits sleeping */
+	if (flag & __GFP_WAIT)
 		page = dma_alloc_from_contiguous(dev, count, get_order(size));
+	/* fallback */
 	if (!page)
 		page = alloc_pages_node(dev_to_node(dev), flag, get_order(size));
 	if (!page)
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 04ee1e2e4c02..7c6acd4b8995 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -571,3 +571,40 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F5,
 			quirk_amd_nb_node);
 
 #endif
+
+#ifdef CONFIG_PCI
+/*
+ * Processor does not ensure DRAM scrub read/write sequence
+ * is atomic wrt accesses to CC6 save state area. Therefore
+ * if a concurrent scrub read/write access is to same address
+ * the entry may appear as if it is not written. This quirk
+ * applies to Fam16h models 00h-0Fh
+ *
+ * See "Revision Guide" for AMD F16h models 00h-0fh,
+ * document 51810 rev. 3.04, Nov 2013
+ */
+static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev)
+{
+	u32 val;
+
+	/*
+	 * Suggested workaround:
+	 * set D18F3x58[4:0] = 00h and set D18F3x5C[0] = 0b
+	 */
+	pci_read_config_dword(dev, 0x58, &val);
+	if (val & 0x1F) {
+		val &= ~(0x1F);
+		pci_write_config_dword(dev, 0x58, val);
+	}
+
+	pci_read_config_dword(dev, 0x5C, &val);
+	if (val & BIT(0)) {
+		val &= ~BIT(0);
+		pci_write_config_dword(dev, 0x5c, val);
+	}
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3,
+			amd_disable_seq_and_redirect_scrub);
+
+#endif
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 19e5adb49a27..cfbe99f88830 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -209,7 +209,7 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc)
 	 * dance when its actually needed.
 	 */
 
-	preempt_disable();
+	preempt_disable_notrace();
 	data = this_cpu_read(cyc2ns.head);
 	tail = this_cpu_read(cyc2ns.tail);
 
@@ -229,7 +229,7 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc)
 		if (!--data->__count)
 			this_cpu_write(cyc2ns.tail, data);
 	}
-	preempt_enable();
+	preempt_enable_notrace();
 
 	return ns;
 }
@@ -653,13 +653,10 @@ unsigned long native_calibrate_tsc(void)
 
 	/* Calibrate TSC using MSR for Intel Atom SoCs */
 	local_irq_save(flags);
-	i = try_msr_calibrate_tsc(&fast_calibrate);
+	fast_calibrate = try_msr_calibrate_tsc();
 	local_irq_restore(flags);
-	if (i >= 0) {
-		if (i == 0)
-			pr_warn("Fast TSC calibration using MSR failed\n");
+	if (fast_calibrate)
 		return fast_calibrate;
-	}
 
 	local_irq_save(flags);
 	fast_calibrate = quick_pit_calibrate();
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 8b5434f4389f..92ae6acac8a7 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -53,7 +53,7 @@ static struct freq_desc freq_desc_tables[] = {
 	/* TNG */
 	{ 6, 0x4a, 1, { 0, FREQ_100, FREQ_133, 0, 0, 0, 0, 0 } },
 	/* VLV2 */
-	{ 6, 0x37, 1, { 0, FREQ_100, FREQ_133, FREQ_166, 0, 0, 0, 0 } },
+	{ 6, 0x37, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, 0, 0, 0, 0 } },
 	/* ANN */
 	{ 6, 0x5a, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_100, 0, 0, 0, 0 } },
 };
@@ -77,21 +77,18 @@ static int match_cpu(u8 family, u8 model)
 
 /*
  * Do MSR calibration only for known/supported CPUs.
- * Return values:
- * -1: CPU is unknown/unsupported for MSR based calibration
- *  0: CPU is known/supported, but calibration failed
- *  1: CPU is known/supported, and calibration succeeded
+ *
+ * Returns the calibration value or 0 if MSR calibration failed.
  */
-int try_msr_calibrate_tsc(unsigned long *fast_calibrate)
+unsigned long try_msr_calibrate_tsc(void)
 {
-	int cpu_index;
 	u32 lo, hi, ratio, freq_id, freq;
+	unsigned long res;
+	int cpu_index;
 
 	cpu_index = match_cpu(boot_cpu_data.x86, boot_cpu_data.x86_model);
 	if (cpu_index < 0)
-		return -1;
-
-	*fast_calibrate = 0;
+		return 0;
 
 	if (freq_desc_tables[cpu_index].msr_plat) {
 		rdmsr(MSR_PLATFORM_INFO, lo, hi);
@@ -103,7 +100,7 @@ int try_msr_calibrate_tsc(unsigned long *fast_calibrate)
 	pr_info("Maximum core-clock to bus-clock ratio: 0x%x\n", ratio);
 
 	if (!ratio)
-		return 0;
+		goto fail;
 
 	/* Get FSB FREQ ID */
 	rdmsr(MSR_FSB_FREQ, lo, hi);
@@ -112,16 +109,19 @@ int try_msr_calibrate_tsc(unsigned long *fast_calibrate)
 	pr_info("Resolved frequency ID: %u, frequency: %u KHz\n",
 				freq_id, freq);
 	if (!freq)
-		return 0;
+		goto fail;
 
 	/* TSC frequency = maximum resolved freq * maximum resolved bus ratio */
-	*fast_calibrate = freq * ratio;
-	pr_info("TSC runs at %lu KHz\n", *fast_calibrate);
+	res = freq * ratio;
+	pr_info("TSC runs at %lu KHz\n", res);
 
 #ifdef CONFIG_X86_LOCAL_APIC
 	lapic_timer_frequency = (freq * 1000) / HZ;
 	pr_info("lapic_timer_frequency = %d\n", lapic_timer_frequency);
 #endif
+	return res;
 
-	return 1;
+fail:
+	pr_warn("Fast TSC calibration using MSR failed\n");
+	return 0;
 }
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e50425d0f5f7..9b531351a587 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2672,6 +2672,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 			break;
 		}
 
+		drop_large_spte(vcpu, iterator.sptep);
 		if (!is_shadow_present_pte(*iterator.sptep)) {
 			u64 base_addr = iterator.addr;
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a06f101ef64b..392752834751 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6688,7 +6688,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 		else if (is_page_fault(intr_info))
 			return enable_ept;
 		else if (is_no_device(intr_info) &&
-			 !(nested_read_cr0(vmcs12) & X86_CR0_TS))
+			 !(vmcs12->guest_cr0 & X86_CR0_TS))
 			return 0;
 		return vmcs12->exception_bitmap &
 				(1u << (intr_info & INTR_INFO_VECTOR_MASK));
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 39c28f09dfd5..2b8578432d5b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6186,7 +6186,7 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 		frag->len -= len;
 	}
 
-	if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
+	if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
 		vcpu->mmio_needed = 0;
 
 		/* FIXME: return into emulator if single-stepping.  */
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9d591c895803..6dea040cc3a1 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1001,6 +1001,12 @@ static int fault_in_kernel_space(unsigned long address)
 
 static inline bool smap_violation(int error_code, struct pt_regs *regs)
 {
+	if (!IS_ENABLED(CONFIG_X86_SMAP))
+		return false;
+
+	if (!static_cpu_has(X86_FEATURE_SMAP))
+		return false;
+
 	if (error_code & PF_USER)
 		return false;
 
@@ -1087,11 +1093,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	if (unlikely(error_code & PF_RSVD))
 		pgtable_bad(regs, error_code, address);
 
-	if (static_cpu_has(X86_FEATURE_SMAP)) {
-		if (unlikely(smap_violation(error_code, regs))) {
-			bad_area_nosemaphore(regs, error_code, address);
-			return;
-		}
+	if (unlikely(smap_violation(error_code, regs))) {
+		bad_area_nosemaphore(regs, error_code, address);
+		return;
 	}
 
 	/*
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 81b2750f3666..27aa0455fab3 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -493,14 +493,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
 		struct numa_memblk *mb = &mi->blk[i];
 		memblock_set_node(mb->start, mb->end - mb->start,
 				  &memblock.memory, mb->nid);
-
-		/*
-		 * At this time, all memory regions reserved by memblock are
-		 * used by the kernel. Set the nid in memblock.reserved will
-		 * mark out all the nodes the kernel resides in.
-		 */
-		memblock_set_node(mb->start, mb->end - mb->start,
-				  &memblock.reserved, mb->nid);
 	}
 
 	/*
@@ -565,10 +557,21 @@ static void __init numa_init_array(void)
 static void __init numa_clear_kernel_node_hotplug(void)
 {
 	int i, nid;
-	nodemask_t numa_kernel_nodes;
+	nodemask_t numa_kernel_nodes = NODE_MASK_NONE;
 	unsigned long start, end;
 	struct memblock_type *type = &memblock.reserved;
 
+	/*
+	 * At this time, all memory regions reserved by memblock are
+	 * used by the kernel. Set the nid in memblock.reserved will
+	 * mark out all the nodes the kernel resides in.
+	 */
+	for (i = 0; i < numa_meminfo.nr_blks; i++) {
+		struct numa_memblk *mb = &numa_meminfo.blk[i];
+		memblock_set_node(mb->start, mb->end - mb->start,
+				  &memblock.reserved, mb->nid);
+	}
+
 	/* Mark all kernel nodes. */
 	for (i = 0; i < type->cnt; i++)
 		node_set(type->regions[i].nid, numa_kernel_nodes);
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 0342d27ca798..47b6436e41c2 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -52,6 +52,8 @@ void memory_present(int nid, unsigned long start, unsigned long end)
 			nid, start, end);
 	printk(KERN_DEBUG "  Setting physnode_map array to node %d for pfns:\n", nid);
 	printk(KERN_DEBUG "  ");
+	start = round_down(start, PAGES_PER_SECTION);
+	end = round_up(end, PAGES_PER_SECTION);
 	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
 		physnode_map[pfn / PAGES_PER_SECTION] = nid;
 		printk(KERN_CONT "%lx ", pfn);
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 1a25187e151e..1953e9c9391a 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -42,15 +42,25 @@ static __init inline int srat_disabled(void)
 	return acpi_numa < 0;
 }
 
-/* Callback for SLIT parsing */
+/*
+ * Callback for SLIT parsing.  pxm_to_node() returns NUMA_NO_NODE for
+ * I/O localities since SRAT does not list them.  I/O localities are
+ * not supported at this point.
+ */
 void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
 {
 	int i, j;
 
-	for (i = 0; i < slit->locality_count; i++)
-		for (j = 0; j < slit->locality_count; j++)
+	for (i = 0; i < slit->locality_count; i++) {
+		if (pxm_to_node(i) == NUMA_NO_NODE)
+			continue;
+		for (j = 0; j < slit->locality_count; j++) {
+			if (pxm_to_node(j) == NUMA_NO_NODE)
+				continue;
 			numa_set_distance(pxm_to_node(i), pxm_to_node(j),
 				slit->entry[slit->locality_count * i + j]);
+		}
+	}
 }
 
 /* Callback for Proximity Domain -> x2APIC mapping */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index ae699b3bbac8..dd8dda167a24 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -103,7 +103,7 @@ static void flush_tlb_func(void *info)
 	if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
 		return;
 
-	count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
 	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
 		if (f->flush_end == TLB_FLUSH_ALL)
 			local_flush_tlb();
@@ -131,7 +131,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
 	info.flush_start = start;
 	info.flush_end = end;
 
-	count_vm_event(NR_TLB_REMOTE_FLUSH);
+	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
 	if (is_uv_system()) {
 		unsigned int cpu;
 
@@ -151,44 +151,19 @@ void flush_tlb_current_task(void)
 
 	preempt_disable();
 
-	count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
+	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
 	local_flush_tlb();
 	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
 		flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
 	preempt_enable();
 }
 
-/*
- * It can find out the THP large page, or
- * HUGETLB page in tlb_flush when THP disabled
- */
-static inline unsigned long has_large_page(struct mm_struct *mm,
-				 unsigned long start, unsigned long end)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	unsigned long addr = ALIGN(start, HPAGE_SIZE);
-	for (; addr < end; addr += HPAGE_SIZE) {
-		pgd = pgd_offset(mm, addr);
-		if (likely(!pgd_none(*pgd))) {
-			pud = pud_offset(pgd, addr);
-			if (likely(!pud_none(*pud))) {
-				pmd = pmd_offset(pud, addr);
-				if (likely(!pmd_none(*pmd)))
-					if (pmd_large(*pmd))
-						return addr;
-			}
-		}
-	}
-	return 0;
-}
-
 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 				unsigned long end, unsigned long vmflag)
 {
 	unsigned long addr;
 	unsigned act_entries, tlb_entries = 0;
+	unsigned long nr_base_pages;
 
 	preempt_disable();
 	if (current->active_mm != mm)
@@ -210,21 +185,20 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 		tlb_entries = tlb_lli_4k[ENTRIES];
 	else
 		tlb_entries = tlb_lld_4k[ENTRIES];
+
 	/* Assume all of TLB entries was occupied by this task */
-	act_entries = mm->total_vm > tlb_entries ? tlb_entries : mm->total_vm;
+	act_entries = tlb_entries >> tlb_flushall_shift;
+	act_entries = mm->total_vm > act_entries ? act_entries : mm->total_vm;
+	nr_base_pages = (end - start) >> PAGE_SHIFT;
 
 	/* tlb_flushall_shift is on balance point, details in commit log */
-	if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) {
-		count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
+	if (nr_base_pages > act_entries) {
+		count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
 		local_flush_tlb();
 	} else {
-		if (has_large_page(mm, start, end)) {
-			local_flush_tlb();
-			goto flush_all;
-		}
 		/* flush range by one by one 'invlpg' */
 		for (addr = start; addr < end;	addr += PAGE_SIZE) {
-			count_vm_event(NR_TLB_LOCAL_FLUSH_ONE);
+			count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
 			__flush_tlb_single(addr);
 		}
 
@@ -262,7 +236,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
 
 static void do_flush_tlb_all(void *info)
 {
-	count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
 	__flush_tlb_all();
 	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
 		leave_mm(smp_processor_id());
@@ -270,7 +244,7 @@ static void do_flush_tlb_all(void *info)
 
 void flush_tlb_all(void)
 {
-	count_vm_event(NR_TLB_REMOTE_FLUSH);
+	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
 	on_each_cpu(do_flush_tlb_all, NULL, 1);
 }
 
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index 7145ec63c520..f15103dff4b4 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -42,14 +42,15 @@ void __init efi_bgrt_init(void)
 
 	if (bgrt_tab->header.length < sizeof(*bgrt_tab))
 		return;
-	if (bgrt_tab->version != 1)
+	if (bgrt_tab->version != 1 || bgrt_tab->status != 1)
 		return;
 	if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address)
 		return;
 
 	image = efi_lookup_mapped_addr(bgrt_tab->image_address);
 	if (!image) {
-		image = ioremap(bgrt_tab->image_address, sizeof(bmp_header));
+		image = early_memremap(bgrt_tab->image_address,
+				       sizeof(bmp_header));
 		ioremapped = true;
 		if (!image)
 			return;
@@ -57,7 +58,7 @@ void __init efi_bgrt_init(void)
 
 	memcpy_fromio(&bmp_header, image, sizeof(bmp_header));
 	if (ioremapped)
-		iounmap(image);
+		early_iounmap(image, sizeof(bmp_header));
 	bgrt_image_size = bmp_header.size;
 
 	bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL);
@@ -65,7 +66,8 @@ void __init efi_bgrt_init(void)
 		return;
 
 	if (ioremapped) {
-		image = ioremap(bgrt_tab->image_address, bmp_header.size);
+		image = early_memremap(bgrt_tab->image_address,
+				       bmp_header.size);
 		if (!image) {
 			kfree(bgrt_image);
 			bgrt_image = NULL;
@@ -75,5 +77,5 @@ void __init efi_bgrt_init(void)
 
 	memcpy_fromio(bgrt_image, image, bgrt_image_size);
 	if (ioremapped)
-		iounmap(image);
+		early_iounmap(image, bmp_header.size);
 }
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index d62ec87a2b26..1a201ac7cef8 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -792,7 +792,7 @@ void __init efi_set_executable(efi_memory_desc_t *md, bool executable)
 		set_memory_nx(addr, npages);
 }
 
-static void __init runtime_code_page_mkexec(void)
+void __init runtime_code_page_mkexec(void)
 {
 	efi_memory_desc_t *md;
 	void *p;
@@ -1069,8 +1069,7 @@ void __init efi_enter_virtual_mode(void)
 	efi.update_capsule = virt_efi_update_capsule;
 	efi.query_capsule_caps = virt_efi_query_capsule_caps;
 
-	if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX))
-		runtime_code_page_mkexec();
+	efi_runtime_mkexec();
 
 	kfree(new_memmap);
 
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 249b183cf417..0b74cdf7f816 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -77,3 +77,9 @@ void efi_call_phys_epilog(void)
 
 	local_irq_restore(efi_rt_eflags);
 }
+
+void __init efi_runtime_mkexec(void)
+{
+	if (__supported_pte_mask & _PAGE_NX)
+		runtime_code_page_mkexec();
+}
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 6284f158a47d..0c2a234fef1e 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -233,3 +233,12 @@ void __init parse_efi_setup(u64 phys_addr, u32 data_len)
 {
 	efi_setup = phys_addr + sizeof(struct setup_data);
 }
+
+void __init efi_runtime_mkexec(void)
+{
+	if (!efi_enabled(EFI_OLD_MEMMAP))
+		return;
+
+	if (__supported_pte_mask & _PAGE_NX)
+		runtime_code_page_mkexec();
+}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index a4d7b647867f..201d09a7c46b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1473,6 +1473,18 @@ static void xen_pvh_set_cr_flags(int cpu)
 	 * X86_CR0_TS, X86_CR0_PE, X86_CR0_ET are set by Xen for HVM guests
 	 * (which PVH shared codepaths), while X86_CR0_PG is for PVH. */
 	write_cr0(read_cr0() | X86_CR0_MP | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM);
+
+	if (!cpu)
+		return;
+	/*
+	 * For BSP, PSE PGE are set in probe_page_size_mask(), for APs
+	 * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu_init.
+	*/
+	if (cpu_has_pse)
+		set_in_cr4(X86_CR4_PSE);
+
+	if (cpu_has_pge)
+		set_in_cr4(X86_CR4_PGE);
 }
 
 /*
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 2423ef04ffea..256282e7888b 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -365,7 +365,7 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
 /* Assume pteval_t is equivalent to all the other *val_t types. */
 static pteval_t pte_mfn_to_pfn(pteval_t val)
 {
-	if (val & _PAGE_PRESENT) {
+	if (pteval_present(val)) {
 		unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
 		unsigned long pfn = mfn_to_pfn(mfn);
 
@@ -381,7 +381,7 @@ static pteval_t pte_mfn_to_pfn(pteval_t val)
 
 static pteval_t pte_pfn_to_mfn(pteval_t val)
 {
-	if (val & _PAGE_PRESENT) {
+	if (pteval_present(val)) {
 		unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
 		pteval_t flags = val & PTE_FLAGS_MASK;
 		unsigned long mfn;
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 8009acbe41e4..696c694986d0 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -899,6 +899,13 @@ int m2p_add_override(unsigned long mfn, struct page *page,
 					"m2p_add_override: pfn %lx not mapped", pfn))
 			return -EINVAL;
 	}
+	WARN_ON(PagePrivate(page));
+	SetPagePrivate(page);
+	set_page_private(page, mfn);
+	page->index = pfn_to_mfn(pfn);
+
+	if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
+		return -ENOMEM;
 
 	if (kmap_op != NULL) {
 		if (!PageHighMem(page)) {
@@ -937,16 +944,19 @@ int m2p_add_override(unsigned long mfn, struct page *page,
 }
 EXPORT_SYMBOL_GPL(m2p_add_override);
 int m2p_remove_override(struct page *page,
-			struct gnttab_map_grant_ref *kmap_op,
-			unsigned long mfn)
+		struct gnttab_map_grant_ref *kmap_op)
 {
 	unsigned long flags;
+	unsigned long mfn;
 	unsigned long pfn;
 	unsigned long uninitialized_var(address);
 	unsigned level;
 	pte_t *ptep = NULL;
 
 	pfn = page_to_pfn(page);
+	mfn = get_phys_to_machine(pfn);
+	if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT))
+		return -EINVAL;
 
 	if (!PageHighMem(page)) {
 		address = (unsigned long)__va(pfn << PAGE_SHIFT);
@@ -960,7 +970,10 @@ int m2p_remove_override(struct page *page,
 	spin_lock_irqsave(&m2p_override_lock, flags);
 	list_del(&page->lru);
 	spin_unlock_irqrestore(&m2p_override_lock, flags);
+	WARN_ON(!PagePrivate(page));
+	ClearPagePrivate(page);
 
+	set_phys_to_machine(pfn, page->index);
 	if (kmap_op != NULL) {
 		if (!PageHighMem(page)) {
 			struct multicall_space mcs;
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index ba56e11cbf77..c87ae7c6e5f9 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -20,6 +20,7 @@ config XTENSA
 	select HAVE_FUNCTION_TRACER
 	select HAVE_IRQ_TIME_ACCOUNTING
 	select HAVE_PERF_EVENTS
+	select COMMON_CLK
 	help
 	  Xtensa processors are 32-bit RISC machines designed by Tensilica
 	  primarily for embedded systems.  These processors are both
@@ -80,7 +81,6 @@ choice
 config XTENSA_VARIANT_FSF
 	bool "fsf - default (not generic) configuration"
 	select MMU
-	select HAVE_XTENSA_GPIO32
 
 config XTENSA_VARIANT_DC232B
 	bool "dc232b - Diamond 232L Standard Core Rev.B (LE)"
@@ -135,7 +135,6 @@ config HAVE_SMP
 config SMP
 	bool "Enable Symmetric multi-processing support"
 	depends on HAVE_SMP
-	select USE_GENERIC_SMP_HELPERS
 	select GENERIC_SMP_IDLE_THREAD
 	help
 	  Enabled SMP Software; allows more than one CPU/CORE
diff --git a/arch/xtensa/boot/dts/xtfpga.dtsi b/arch/xtensa/boot/dts/xtfpga.dtsi
index 46b4f5eab421..e7370b11348e 100644
--- a/arch/xtensa/boot/dts/xtfpga.dtsi
+++ b/arch/xtensa/boot/dts/xtfpga.dtsi
@@ -35,6 +35,13 @@
 		interrupt-controller;
 	};
 
+	clocks {
+		osc: main-oscillator {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+		};
+	};
+
 	serial0: serial@fd050020 {
 		device_type = "serial";
 		compatible = "ns16550a";
@@ -42,9 +49,7 @@
 		reg = <0xfd050020 0x20>;
 		reg-shift = <2>;
 		interrupts = <0 1>; /* external irq 0 */
-		/* Filled in by platform_setup from FPGA register
-		 * clock-frequency = <100000000>;
-		 */
+		clocks = <&osc>;
 	};
 
 	enet0: ethoc@fd030000 {
@@ -52,5 +57,6 @@
 		reg = <0xfd030000 0x4000 0xfd800000 0x4000>;
 		interrupts = <1 1>; /* external irq 1 */
 		local-mac-address = [00 50 c2 13 6f 00];
+		clocks = <&osc>;
 	};
 };
diff --git a/arch/xtensa/include/asm/io.h b/arch/xtensa/include/asm/io.h
index 2a042d430c25..74944207167e 100644
--- a/arch/xtensa/include/asm/io.h
+++ b/arch/xtensa/include/asm/io.h
@@ -25,7 +25,7 @@
 
 #ifdef CONFIG_MMU
 
-#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY && CONFIG_OF
+#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY && defined(CONFIG_OF)
 extern unsigned long xtensa_kio_paddr;
 
 static inline unsigned long xtensa_get_kio_paddr(void)
diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h
index 8c194f6af45e..677bfcf4ee5d 100644
--- a/arch/xtensa/include/asm/traps.h
+++ b/arch/xtensa/include/asm/traps.h
@@ -23,25 +23,37 @@ void secondary_trap_init(void);
 
 static inline void spill_registers(void)
 {
-
+#if XCHAL_NUM_AREGS > 16
 	__asm__ __volatile__ (
-		"movi	a14, "__stringify((1 << PS_EXCM_BIT) | LOCKLEVEL)"\n\t"
-		"mov	a12, a0\n\t"
-		"rsr	a13, sar\n\t"
-		"xsr	a14, ps\n\t"
-		"movi	a0, _spill_registers\n\t"
-		"rsync\n\t"
-		"callx0 a0\n\t"
-		"mov	a0, a12\n\t"
-		"wsr	a13, sar\n\t"
-		"wsr	a14, ps\n\t"
-		: :
-#if defined(CONFIG_FRAME_POINTER)
-		: "a2", "a3", "a4",       "a11", "a12", "a13", "a14", "a15",
+		"	call12	1f\n"
+		"	_j	2f\n"
+		"	retw\n"
+		"	.align	4\n"
+		"1:\n"
+		"	_entry	a1, 48\n"
+		"	addi	a12, a0, 3\n"
+#if XCHAL_NUM_AREGS > 32
+		"	.rept	(" __stringify(XCHAL_NUM_AREGS) " - 32) / 12\n"
+		"	_entry	a1, 48\n"
+		"	mov	a12, a0\n"
+		"	.endr\n"
+#endif
+		"	_entry	a1, 48\n"
+#if XCHAL_NUM_AREGS % 12 == 0
+		"	mov	a8, a8\n"
+#elif XCHAL_NUM_AREGS % 12 == 4
+		"	mov	a12, a12\n"
+#elif XCHAL_NUM_AREGS % 12 == 8
+		"	mov	a4, a4\n"
+#endif
+		"	retw\n"
+		"2:\n"
+		: : : "a12", "a13", "memory");
 #else
-		: "a2", "a3", "a4", "a7", "a11", "a12", "a13", "a14", "a15",
+	__asm__ __volatile__ (
+		"	mov	a12, a12\n"
+		: : : "memory");
 #endif
-		  "memory");
 }
 
 #endif /* _XTENSA_TRAPS_H */
diff --git a/arch/xtensa/include/asm/vectors.h b/arch/xtensa/include/asm/vectors.h
index 5791b45d5a5d..f74ddfbb92ef 100644
--- a/arch/xtensa/include/asm/vectors.h
+++ b/arch/xtensa/include/asm/vectors.h
@@ -25,7 +25,7 @@
 #define XCHAL_KIO_DEFAULT_PADDR		0xf0000000
 #define XCHAL_KIO_SIZE			0x10000000
 
-#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY && CONFIG_OF
+#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY && defined(CONFIG_OF)
 #define XCHAL_KIO_PADDR			xtensa_get_kio_paddr()
 #else
 #define XCHAL_KIO_PADDR			XCHAL_KIO_DEFAULT_PADDR
diff --git a/arch/xtensa/include/uapi/asm/unistd.h b/arch/xtensa/include/uapi/asm/unistd.h
index 51940fec6990..b9395529f02d 100644
--- a/arch/xtensa/include/uapi/asm/unistd.h
+++ b/arch/xtensa/include/uapi/asm/unistd.h
@@ -734,7 +734,12 @@ __SYSCALL(332, sys_finit_module, 3)
 #define __NR_accept4				333
 __SYSCALL(333, sys_accept4, 4)
 
-#define __NR_syscall_count			334
+#define __NR_sched_setattr			334
+__SYSCALL(334, sys_sched_setattr, 2)
+#define __NR_sched_getattr			335
+__SYSCALL(335, sys_sched_getattr, 3)
+
+#define __NR_syscall_count			336
 
 /*
  * sysxtensa syscall handler
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 21dbe6bdb8ed..ef7f4990722b 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -1081,196 +1081,53 @@ ENTRY(fast_syscall_spill_registers)
 
 	rsr	a0, sar
 	s32i	a3, a2, PT_AREG3
-	s32i	a4, a2, PT_AREG4
-	s32i	a0, a2, PT_AREG5	# store SAR to PT_AREG5
+	s32i	a0, a2, PT_SAR
 
-	/* The spill routine might clobber a7, a11, and a15. */
+	/* The spill routine might clobber a4, a7, a8, a11, a12, and a15. */
 
+	s32i	a4, a2, PT_AREG4
 	s32i	a7, a2, PT_AREG7
+	s32i	a8, a2, PT_AREG8
 	s32i	a11, a2, PT_AREG11
+	s32i	a12, a2, PT_AREG12
 	s32i	a15, a2, PT_AREG15
 
-	call0	_spill_registers	# destroys a3, a4, and SAR
-
-	/* Advance PC, restore registers and SAR, and return from exception. */
-
-	l32i	a3, a2, PT_AREG5
-	l32i	a4, a2, PT_AREG4
-	l32i	a0, a2, PT_AREG0
-	wsr	a3, sar
-	l32i	a3, a2, PT_AREG3
-
-	/* Restore clobbered registers. */
-
-	l32i	a7, a2, PT_AREG7
-	l32i	a11, a2, PT_AREG11
-	l32i	a15, a2, PT_AREG15
-
-	movi	a2, 0
-	rfe
-
-ENDPROC(fast_syscall_spill_registers)
-
-/* Fixup handler.
- *
- * We get here if the spill routine causes an exception, e.g. tlb miss.
- * We basically restore WINDOWBASE and WINDOWSTART to the condition when
- * we entered the spill routine and jump to the user exception handler.
- *
- * a0: value of depc, original value in depc
- * a2: trashed, original value in EXC_TABLE_DOUBLE_SAVE
- * a3: exctable, original value in excsave1
- */
-
-ENTRY(fast_syscall_spill_registers_fixup)
-
-	rsr	a2, windowbase	# get current windowbase (a2 is saved)
-	xsr	a0, depc	# restore depc and a0
-	ssl	a2		# set shift (32 - WB)
-
-	/* We need to make sure the current registers (a0-a3) are preserved.
-	 * To do this, we simply set the bit for the current window frame
-	 * in WS, so that the exception handlers save them to the task stack.
-	 */
-
-	xsr	a3, excsave1	# get spill-mask
-	slli	a3, a3, 1	# shift left by one
-
-	slli	a2, a3, 32-WSBITS
-	src	a2, a3, a2	# a2 = xxwww1yyxxxwww1yy......
-	wsr	a2, windowstart	# set corrected windowstart
-
-	srli	a3, a3, 1
-	rsr	a2, excsave1
-	l32i	a2, a2, EXC_TABLE_DOUBLE_SAVE	# restore a2
-	xsr	a2, excsave1
-	s32i	a3, a2, EXC_TABLE_DOUBLE_SAVE	# save a3
-	l32i	a3, a2, EXC_TABLE_PARAM	# original WB (in user task)
-	xsr	a2, excsave1
-
-	/* Return to the original (user task) WINDOWBASE.
-	 * We leave the following frame behind:
-	 * a0, a1, a2	same
-	 * a3:		trashed (saved in EXC_TABLE_DOUBLE_SAVE)
-	 * depc:	depc (we have to return to that address)
-	 * excsave_1:	exctable
-	 */
-
-	wsr	a3, windowbase
-	rsync
-
-	/* We are now in the original frame when we entered _spill_registers:
-	 *  a0: return address
-	 *  a1: used, stack pointer
-	 *  a2: kernel stack pointer
-	 *  a3: available
-	 *  depc: exception address
-	 *  excsave: exctable
-	 * Note: This frame might be the same as above.
-	 */
-
-	/* Setup stack pointer. */
-
-	addi	a2, a2, -PT_USER_SIZE
-	s32i	a0, a2, PT_AREG0
-
-	/* Make sure we return to this fixup handler. */
-
-	movi	a3, fast_syscall_spill_registers_fixup_return
-	s32i	a3, a2, PT_DEPC		# setup depc
-
-	/* Jump to the exception handler. */
-
-	rsr	a3, excsave1
-	rsr	a0, exccause
-	addx4	a0, a0, a3              	# find entry in table
-	l32i	a0, a0, EXC_TABLE_FAST_USER     # load handler
-	l32i	a3, a3, EXC_TABLE_DOUBLE_SAVE
-	jx	a0
-
-ENDPROC(fast_syscall_spill_registers_fixup)
-
-ENTRY(fast_syscall_spill_registers_fixup_return)
-
-	/* When we return here, all registers have been restored (a2: DEPC) */
-
-	wsr	a2, depc		# exception address
-
-	/* Restore fixup handler. */
-
-	rsr	a2, excsave1
-	s32i	a3, a2, EXC_TABLE_DOUBLE_SAVE
-	movi	a3, fast_syscall_spill_registers_fixup
-	s32i	a3, a2, EXC_TABLE_FIXUP
-	rsr	a3, windowbase
-	s32i	a3, a2, EXC_TABLE_PARAM
-	l32i	a2, a2, EXC_TABLE_KSTK
-
-	/* Load WB at the time the exception occurred. */
-
-	rsr	a3, sar			# WB is still in SAR
-	neg	a3, a3
-	wsr	a3, windowbase
-	rsync
-
-	rsr	a3, excsave1
-	l32i	a3, a3, EXC_TABLE_DOUBLE_SAVE
-
-	rfde
-
-ENDPROC(fast_syscall_spill_registers_fixup_return)
-
-/*
- * spill all registers.
- *
- * This is not a real function. The following conditions must be met:
- *
- *  - must be called with call0.
- *  - uses a3, a4 and SAR.
- *  - the last 'valid' register of each frame are clobbered.
- *  - the caller must have registered a fixup handler
- *    (or be inside a critical section)
- *  - PS_EXCM must be set (PS_WOE cleared?)
- */
-
-ENTRY(_spill_registers)
-
 	/*
 	 * Rotate ws so that the current windowbase is at bit 0.
 	 * Assume ws = xxxwww1yy (www1 current window frame).
 	 * Rotate ws right so that a4 = yyxxxwww1.
 	 */
 
-	rsr	a4, windowbase
+	rsr	a0, windowbase
 	rsr	a3, windowstart		# a3 = xxxwww1yy
-	ssr	a4			# holds WB
-	slli	a4, a3, WSBITS
-	or	a3, a3, a4		# a3 = xxxwww1yyxxxwww1yy
+	ssr	a0			# holds WB
+	slli	a0, a3, WSBITS
+	or	a3, a3, a0		# a3 = xxxwww1yyxxxwww1yy
 	srl	a3, a3			# a3 = 00xxxwww1yyxxxwww1
 
 	/* We are done if there are no more than the current register frame. */
 
 	extui	a3, a3, 1, WSBITS-1	# a3 = 0yyxxxwww
-	movi	a4, (1 << (WSBITS-1))
+	movi	a0, (1 << (WSBITS-1))
 	_beqz	a3, .Lnospill		# only one active frame? jump
 
 	/* We want 1 at the top, so that we return to the current windowbase */
 
-	or	a3, a3, a4		# 1yyxxxwww
+	or	a3, a3, a0		# 1yyxxxwww
 
 	/* Skip empty frames - get 'oldest' WINDOWSTART-bit. */
 
 	wsr	a3, windowstart		# save shifted windowstart
-	neg	a4, a3
-	and	a3, a4, a3		# first bit set from right: 000010000
+	neg	a0, a3
+	and	a3, a0, a3		# first bit set from right: 000010000
 
-	ffs_ws	a4, a3			# a4: shifts to skip empty frames
+	ffs_ws	a0, a3			# a0: shifts to skip empty frames
 	movi	a3, WSBITS
-	sub	a4, a3, a4		# WSBITS-a4:number of 0-bits from right
-	ssr	a4			# save in SAR for later.
+	sub	a0, a3, a0		# WSBITS-a0:number of 0-bits from right
+	ssr	a0			# save in SAR for later.
 
 	rsr	a3, windowbase
-	add	a3, a3, a4
+	add	a3, a3, a0
 	wsr	a3, windowbase
 	rsync
 
@@ -1285,22 +1142,6 @@ ENTRY(_spill_registers)
 	 * we have to save 4,8. or 12 registers.
 	 */
 
-	_bbsi.l	a3, 1, .Lc4
-	_bbsi.l	a3, 2, .Lc8
-
-	/* Special case: we have a call12-frame starting at a4. */
-
-	_bbci.l	a3, 3, .Lc12	# bit 3 shouldn't be zero! (Jump to Lc12 first)
-
-	s32e	a4, a1, -16	# a1 is valid with an empty spill area
-	l32e	a4, a5, -12
-	s32e	a8, a4, -48
-	mov	a8, a4
-	l32e	a4, a1, -16
-	j	.Lc12c
-
-.Lnospill:
-	ret
 
 .Lloop: _bbsi.l	a3, 1, .Lc4
 	_bbci.l	a3, 2, .Lc12
@@ -1314,20 +1155,10 @@ ENTRY(_spill_registers)
 	s32e	a9, a4, -28
 	s32e	a10, a4, -24
 	s32e	a11, a4, -20
-
 	srli	a11, a3, 2		# shift windowbase by 2
 	rotw	2
 	_bnei	a3, 1, .Lloop
-
-.Lexit: /* Done. Do the final rotation, set WS, and return. */
-
-	rotw	1
-	rsr	a3, windowbase
-	ssl	a3
-	movi	a3, 1
-	sll	a3, a3
-	wsr	a3, windowstart
-	ret
+	j	.Lexit
 
 .Lc4:	s32e	a4, a9, -16
 	s32e	a5, a9, -12
@@ -1343,11 +1174,11 @@ ENTRY(_spill_registers)
 
 	/* 12-register frame (call12) */
 
-	l32e	a2, a5, -12
-	s32e	a8, a2, -48
-	mov	a8, a2
+	l32e	a0, a5, -12
+	s32e	a8, a0, -48
+	mov	a8, a0
 
-.Lc12c: s32e	a9, a8, -44
+	s32e	a9, a8, -44
 	s32e	a10, a8, -40
 	s32e	a11, a8, -36
 	s32e	a12, a8, -32
@@ -1367,30 +1198,54 @@ ENTRY(_spill_registers)
 	 */
 
 	rotw	1
-	mov	a5, a13
+	mov	a4, a13
 	rotw	-1
 
-	s32e	a4, a9, -16
-	s32e	a5, a9, -12
-	s32e	a6, a9, -8
-	s32e	a7, a9, -4
+	s32e	a4, a8, -16
+	s32e	a5, a8, -12
+	s32e	a6, a8, -8
+	s32e	a7, a8, -4
 
 	rotw	3
 
 	_beqi	a3, 1, .Lexit
 	j	.Lloop
 
-.Linvalid_mask:
+.Lexit:
 
-	/* We get here because of an unrecoverable error in the window
-	 * registers. If we are in user space, we kill the application,
-	 * however, this condition is unrecoverable in kernel space.
-	 */
+	/* Done. Do the final rotation and set WS */
+
+	rotw	1
+	rsr	a3, windowbase
+	ssl	a3
+	movi	a3, 1
+	sll	a3, a3
+	wsr	a3, windowstart
+.Lnospill:
+
+	/* Advance PC, restore registers and SAR, and return from exception. */
+
+	l32i	a3, a2, PT_SAR
+	l32i	a0, a2, PT_AREG0
+	wsr	a3, sar
+	l32i	a3, a2, PT_AREG3
 
-	rsr	a0, ps
-	_bbci.l	a0, PS_UM_BIT, 1f
+	/* Restore clobbered registers. */
 
-	/* User space: Setup a dummy frame and kill application.
+	l32i	a4, a2, PT_AREG4
+	l32i	a7, a2, PT_AREG7
+	l32i	a8, a2, PT_AREG8
+	l32i	a11, a2, PT_AREG11
+	l32i	a12, a2, PT_AREG12
+	l32i	a15, a2, PT_AREG15
+
+	movi	a2, 0
+	rfe
+
+.Linvalid_mask:
+
+	/* We get here because of an unrecoverable error in the window
+	 * registers, so set up a dummy frame and kill the user application.
 	 * Note: We assume EXC_TABLE_KSTK contains a valid stack pointer.
 	 */
 
@@ -1414,14 +1269,136 @@ ENTRY(_spill_registers)
 	movi	a4, do_exit
 	callx4	a4
 
-1:	/* Kernel space: PANIC! */
+	/* shouldn't return, so panic */
 
 	wsr	a0, excsave1
 	movi	a0, unrecoverable_exception
 	callx0	a0		# should not return
 1:	j	1b
 
-ENDPROC(_spill_registers)
+
+ENDPROC(fast_syscall_spill_registers)
+
+/* Fixup handler.
+ *
+ * We get here if the spill routine causes an exception, e.g. tlb miss.
+ * We basically restore WINDOWBASE and WINDOWSTART to the condition when
+ * we entered the spill routine and jump to the user exception handler.
+ *
+ * Note that we only need to restore the bits in windowstart that have not
+ * been spilled yet by the _spill_register routine. Luckily, a3 contains a
+ * rotated windowstart with only those bits set for frames that haven't been
+ * spilled yet. Because a3 is rotated such that bit 0 represents the register
+ * frame for the current windowbase - 1, we need to rotate a3 left by the
+ * value of the current windowbase + 1 and move it to windowstart.
+ *
+ * a0: value of depc, original value in depc
+ * a2: trashed, original value in EXC_TABLE_DOUBLE_SAVE
+ * a3: exctable, original value in excsave1
+ */
+
+ENTRY(fast_syscall_spill_registers_fixup)
+
+	rsr	a2, windowbase	# get current windowbase (a2 is saved)
+	xsr	a0, depc	# restore depc and a0
+	ssl	a2		# set shift (32 - WB)
+
+	/* We need to make sure the current registers (a0-a3) are preserved.
+	 * To do this, we simply set the bit for the current window frame
+	 * in WS, so that the exception handlers save them to the task stack.
+	 *
+	 * Note: we use a3 to set the windowbase, so we take a special care
+	 * of it, saving it in the original _spill_registers frame across
+	 * the exception handler call.
+	 */
+
+	xsr	a3, excsave1	# get spill-mask
+	slli	a3, a3, 1	# shift left by one
+	addi	a3, a3, 1	# set the bit for the current window frame
+
+	slli	a2, a3, 32-WSBITS
+	src	a2, a3, a2	# a2 = xxwww1yyxxxwww1yy......
+	wsr	a2, windowstart	# set corrected windowstart
+
+	srli	a3, a3, 1
+	rsr	a2, excsave1
+	l32i	a2, a2, EXC_TABLE_DOUBLE_SAVE	# restore a2
+	xsr	a2, excsave1
+	s32i	a3, a2, EXC_TABLE_DOUBLE_SAVE	# save a3
+	l32i	a3, a2, EXC_TABLE_PARAM	# original WB (in user task)
+	xsr	a2, excsave1
+
+	/* Return to the original (user task) WINDOWBASE.
+	 * We leave the following frame behind:
+	 * a0, a1, a2	same
+	 * a3:		trashed (saved in EXC_TABLE_DOUBLE_SAVE)
+	 * depc:	depc (we have to return to that address)
+	 * excsave_1:	exctable
+	 */
+
+	wsr	a3, windowbase
+	rsync
+
+	/* We are now in the original frame when we entered _spill_registers:
+	 *  a0: return address
+	 *  a1: used, stack pointer
+	 *  a2: kernel stack pointer
+	 *  a3: available
+	 *  depc: exception address
+	 *  excsave: exctable
+	 * Note: This frame might be the same as above.
+	 */
+
+	/* Setup stack pointer. */
+
+	addi	a2, a2, -PT_USER_SIZE
+	s32i	a0, a2, PT_AREG0
+
+	/* Make sure we return to this fixup handler. */
+
+	movi	a3, fast_syscall_spill_registers_fixup_return
+	s32i	a3, a2, PT_DEPC		# setup depc
+
+	/* Jump to the exception handler. */
+
+	rsr	a3, excsave1
+	rsr	a0, exccause
+	addx4	a0, a0, a3              	# find entry in table
+	l32i	a0, a0, EXC_TABLE_FAST_USER     # load handler
+	l32i	a3, a3, EXC_TABLE_DOUBLE_SAVE
+	jx	a0
+
+ENDPROC(fast_syscall_spill_registers_fixup)
+
+ENTRY(fast_syscall_spill_registers_fixup_return)
+
+	/* When we return here, all registers have been restored (a2: DEPC) */
+
+	wsr	a2, depc		# exception address
+
+	/* Restore fixup handler. */
+
+	rsr	a2, excsave1
+	s32i	a3, a2, EXC_TABLE_DOUBLE_SAVE
+	movi	a3, fast_syscall_spill_registers_fixup
+	s32i	a3, a2, EXC_TABLE_FIXUP
+	rsr	a3, windowbase
+	s32i	a3, a2, EXC_TABLE_PARAM
+	l32i	a2, a2, EXC_TABLE_KSTK
+
+	/* Load WB at the time the exception occurred. */
+
+	rsr	a3, sar			# WB is still in SAR
+	neg	a3, a3
+	wsr	a3, windowbase
+	rsync
+
+	rsr	a3, excsave1
+	l32i	a3, a3, EXC_TABLE_DOUBLE_SAVE
+
+	rfde
+
+ENDPROC(fast_syscall_spill_registers_fixup_return)
 
 #ifdef CONFIG_MMU
 /*
@@ -1794,6 +1771,43 @@ ENTRY(system_call)
 
 ENDPROC(system_call)
 
+/*
+ * Spill live registers on the kernel stack macro.
+ *
+ * Entry condition: ps.woe is set, ps.excm is cleared
+ * Exit condition: windowstart has single bit set
+ * May clobber: a12, a13
+ */
+	.macro	spill_registers_kernel
+
+#if XCHAL_NUM_AREGS > 16
+	call12	1f
+	_j	2f
+	retw
+	.align	4
+1:
+	_entry	a1, 48
+	addi	a12, a0, 3
+#if XCHAL_NUM_AREGS > 32
+	.rept	(XCHAL_NUM_AREGS - 32) / 12
+	_entry	a1, 48
+	mov	a12, a0
+	.endr
+#endif
+	_entry	a1, 48
+#if XCHAL_NUM_AREGS % 12 == 0
+	mov	a8, a8
+#elif XCHAL_NUM_AREGS % 12 == 4
+	mov	a12, a12
+#elif XCHAL_NUM_AREGS % 12 == 8
+	mov	a4, a4
+#endif
+	retw
+2:
+#else
+	mov	a12, a12
+#endif
+	.endm
 
 /*
  * Task switch.
@@ -1806,21 +1820,20 @@ ENTRY(_switch_to)
 
 	entry	a1, 16
 
-	mov	a12, a2			# preserve 'prev' (a2)
-	mov	a13, a3			# and 'next' (a3)
+	mov	a10, a2			# preserve 'prev' (a2)
+	mov	a11, a3			# and 'next' (a3)
 
 	l32i	a4, a2, TASK_THREAD_INFO
 	l32i	a5, a3, TASK_THREAD_INFO
 
-	save_xtregs_user a4 a6 a8 a9 a10 a11 THREAD_XTREGS_USER
+	save_xtregs_user a4 a6 a8 a9 a12 a13 THREAD_XTREGS_USER
 
-	s32i	a0, a12, THREAD_RA	# save return address
-	s32i	a1, a12, THREAD_SP	# save stack pointer
+	s32i	a0, a10, THREAD_RA	# save return address
+	s32i	a1, a10, THREAD_SP	# save stack pointer
 
 	/* Disable ints while we manipulate the stack pointer. */
 
-	movi	a14, (1 << PS_EXCM_BIT) | LOCKLEVEL
-	xsr	a14, ps
+	rsil	a14, LOCKLEVEL
 	rsr	a3, excsave1
 	rsync
 	s32i	a3, a3, EXC_TABLE_FIXUP	/* enter critical section */
@@ -1835,7 +1848,7 @@ ENTRY(_switch_to)
 
 	/* Flush register file. */
 
-	call0	_spill_registers	# destroys a3, a4, and SAR
+	spill_registers_kernel
 
 	/* Set kernel stack (and leave critical section)
 	 * Note: It's save to set it here. The stack will not be overwritten
@@ -1851,13 +1864,13 @@ ENTRY(_switch_to)
 
 	/* restore context of the task 'next' */
 
-	l32i	a0, a13, THREAD_RA	# restore return address
-	l32i	a1, a13, THREAD_SP	# restore stack pointer
+	l32i	a0, a11, THREAD_RA	# restore return address
+	l32i	a1, a11, THREAD_SP	# restore stack pointer
 
-	load_xtregs_user a5 a6 a8 a9 a10 a11 THREAD_XTREGS_USER
+	load_xtregs_user a5 a6 a8 a9 a12 a13 THREAD_XTREGS_USER
 
 	wsr	a14, ps
-	mov	a2, a12			# return 'prev'
+	mov	a2, a10			# return 'prev'
 	rsync
 
 	retw
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 7d12af1317f1..84fe931bb60e 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -22,6 +22,7 @@
 #include <linux/bootmem.h>
 #include <linux/kernel.h>
 #include <linux/percpu.h>
+#include <linux/clk-provider.h>
 #include <linux/cpu.h>
 #include <linux/of_fdt.h>
 #include <linux/of_platform.h>
@@ -276,6 +277,7 @@ void __init early_init_devtree(void *params)
 
 static int __init xtensa_device_probe(void)
 {
+	of_clk_init(NULL);
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
 	return 0;
 }
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index 08b769d3b3a1..2a1823de69cc 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -30,6 +30,7 @@
 #include <asm/platform.h>
 
 unsigned long ccount_freq;		/* ccount Hz */
+EXPORT_SYMBOL(ccount_freq);
 
 static cycle_t ccount_read(struct clocksource *cs)
 {
diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S
index cb8fd44caabc..f9e1ec346e35 100644
--- a/arch/xtensa/kernel/vectors.S
+++ b/arch/xtensa/kernel/vectors.S
@@ -235,7 +235,7 @@ ENTRY(_DoubleExceptionVector)
 
 	/* Check for overflow/underflow exception, jump if overflow. */
 
-	_bbci.l	a0, 6, _DoubleExceptionVector_WindowOverflow
+	bbci.l	a0, 6, _DoubleExceptionVector_WindowOverflow
 
 	/*
 	 * Restart window underflow exception.
diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c
index 74a60c7e085e..80b33ed51f31 100644
--- a/arch/xtensa/kernel/xtensa_ksyms.c
+++ b/arch/xtensa/kernel/xtensa_ksyms.c
@@ -122,9 +122,7 @@ EXPORT_SYMBOL(insw);
 EXPORT_SYMBOL(insl);
 
 extern long common_exception_return;
-extern long _spill_registers;
 EXPORT_SYMBOL(common_exception_return);
-EXPORT_SYMBOL(_spill_registers);
 
 #ifdef CONFIG_FUNCTION_TRACER
 EXPORT_SYMBOL(_mcount);
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index 479d7537a32a..aff108df92d3 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -90,7 +90,7 @@ int __init mem_reserve(unsigned long start, unsigned long end, int must_exist)
 
 
 /*
- * Initialize the bootmem system and give it all the memory we have available.
+ * Initialize the bootmem system and give it all low memory we have available.
  */
 
 void __init bootmem_init(void)
@@ -142,9 +142,14 @@ void __init bootmem_init(void)
 
 	/* Add all remaining memory pieces into the bootmem map */
 
-	for (i=0; i<sysmem.nr_banks; i++)
-		free_bootmem(sysmem.bank[i].start,
-			     sysmem.bank[i].end - sysmem.bank[i].start);
+	for (i = 0; i < sysmem.nr_banks; i++) {
+		if (sysmem.bank[i].start >> PAGE_SHIFT < max_low_pfn) {
+			unsigned long end = min(max_low_pfn << PAGE_SHIFT,
+						sysmem.bank[i].end);
+			free_bootmem(sysmem.bank[i].start,
+				     end - sysmem.bank[i].start);
+		}
+	}
 
 }
 
diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c
index 36ec171698b8..861203e958da 100644
--- a/arch/xtensa/mm/mmu.c
+++ b/arch/xtensa/mm/mmu.c
@@ -39,7 +39,7 @@ void init_mmu(void)
 	set_itlbcfg_register(0);
 	set_dtlbcfg_register(0);
 #endif
-#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY && CONFIG_OF
+#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY && defined(CONFIG_OF)
 	/*
 	 * Update the IO area mapping in case xtensa_kio_paddr has changed
 	 */
diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c
index 800227862fe8..57fd08b36f51 100644
--- a/arch/xtensa/platforms/xtfpga/setup.c
+++ b/arch/xtensa/platforms/xtfpga/setup.c
@@ -135,11 +135,11 @@ static void __init update_local_mac(struct device_node *node)
 
 static int __init machine_setup(void)
 {
-	struct device_node *serial;
+	struct device_node *clock;
 	struct device_node *eth = NULL;
 
-	for_each_compatible_node(serial, NULL, "ns16550a")
-		update_clock_frequency(serial);
+	for_each_node_by_name(clock, "main-oscillator")
+		update_clock_frequency(clock);
 
 	if ((eth = of_find_compatible_node(eth, NULL, "opencores,ethoc")))
 		update_local_mac(eth);
@@ -290,6 +290,7 @@ static int __init xtavnet_init(void)
 	 * knows whether they set it correctly on the DIP switches.
 	 */
 	pr_info("XTFPGA: Ethernet MAC %pM\n", ethoc_pdata.hwaddr);
+	ethoc_pdata.eth_clkfreq = *(long *)XTFPGA_CLKFRQ_VADDR;
 
 	return 0;
 }
diff --git a/arch/xtensa/variants/fsf/include/variant/tie.h b/arch/xtensa/variants/fsf/include/variant/tie.h
index bf4020116df5..244cdea4dee5 100644
--- a/arch/xtensa/variants/fsf/include/variant/tie.h
+++ b/arch/xtensa/variants/fsf/include/variant/tie.h
@@ -18,13 +18,6 @@
 #define XCHAL_CP_MASK			0x00	/* bitmask of all CPs by ID */
 #define XCHAL_CP_PORT_MASK		0x00	/* bitmask of only port CPs */
 
-/*  Basic parameters of each coprocessor:  */
-#define XCHAL_CP7_NAME			"XTIOP"
-#define XCHAL_CP7_IDENT			XTIOP
-#define XCHAL_CP7_SA_SIZE		0	/* size of state save area */
-#define XCHAL_CP7_SA_ALIGN		1	/* min alignment of save area */
-#define XCHAL_CP_ID_XTIOP		7	/* coprocessor ID (0..7) */
-
 /*  Filler info for unassigned coprocessors, to simplify arrays etc:  */
 #define XCHAL_NCP_SA_SIZE		0
 #define XCHAL_NCP_SA_ALIGN		1
@@ -42,6 +35,8 @@
 #define XCHAL_CP5_SA_ALIGN		1
 #define XCHAL_CP6_SA_SIZE		0
 #define XCHAL_CP6_SA_ALIGN		1
+#define XCHAL_CP7_SA_SIZE		0
+#define XCHAL_CP7_SA_ALIGN		1
 
 /*  Save area for non-coprocessor optional and custom (TIE) state:  */
 #define XCHAL_NCP_SA_SIZE		0