summary refs log tree commit diff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig17
-rw-r--r--arch/alpha/Kconfig4
-rw-r--r--arch/alpha/include/asm/processor.h1
-rw-r--r--arch/alpha/include/asm/thread_info.h2
-rw-r--r--arch/alpha/kernel/pci.c7
-rw-r--r--arch/alpha/kernel/rtc.c8
-rw-r--r--arch/alpha/kernel/sys_nautilus.c4
-rw-r--r--arch/arc/include/asm/thread_info.h2
-rw-r--r--arch/arc/kernel/process.c2
-rw-r--r--arch/arc/kernel/signal.c38
-rw-r--r--arch/arc/kernel/troubleshoot.c3
-rw-r--r--arch/arc/kernel/unwind.c2
-rw-r--r--arch/arm/Kconfig27
-rw-r--r--arch/arm/Kconfig.debug55
-rw-r--r--arch/arm/Makefile11
-rw-r--r--arch/arm/boot/Makefile2
-rw-r--r--arch/arm/boot/compressed/head.S52
-rw-r--r--arch/arm/boot/dts/am335x-bone-common.dtsi8
-rw-r--r--arch/arm/boot/dts/am335x-bone.dts8
-rw-r--r--arch/arm/boot/dts/am335x-lxm.dts4
-rw-r--r--arch/arm/boot/dts/am33xx-clocks.dtsi6
-rw-r--r--arch/arm/boot/dts/am4372.dtsi15
-rw-r--r--arch/arm/boot/dts/am437x-gp-evm.dts1
-rw-r--r--arch/arm/boot/dts/am437x-sk-evm.dts1
-rw-r--r--arch/arm/boot/dts/am43x-epos-evm.dts1
-rw-r--r--arch/arm/boot/dts/am43xx-clocks.dtsi12
-rw-r--r--arch/arm/boot/dts/am57xx-beagle-x15.dts52
-rw-r--r--arch/arm/boot/dts/at91-sama5d3_xplained.dts6
-rw-r--r--arch/arm/boot/dts/at91sam9260.dtsi2
-rw-r--r--arch/arm/boot/dts/at91sam9263.dtsi2
-rw-r--r--arch/arm/boot/dts/at91sam9g45.dtsi2
-rw-r--r--arch/arm/boot/dts/at91sam9x5_macb0.dtsi2
-rw-r--r--arch/arm/boot/dts/at91sam9x5_macb1.dtsi2
-rw-r--r--arch/arm/boot/dts/dm8168-evm.dts19
-rw-r--r--arch/arm/boot/dts/dm816x.dtsi18
-rw-r--r--arch/arm/boot/dts/dra7-evm.dts12
-rw-r--r--arch/arm/boot/dts/dra7.dtsi68
-rw-r--r--arch/arm/boot/dts/dra72-evm.dts11
-rw-r--r--arch/arm/boot/dts/dra72x.dtsi8
-rw-r--r--arch/arm/boot/dts/dra74x.dtsi10
-rw-r--r--arch/arm/boot/dts/dra7xx-clocks.dtsi90
-rw-r--r--arch/arm/boot/dts/exynos3250.dtsi6
-rw-r--r--arch/arm/boot/dts/exynos4-cpu-thermal.dtsi52
-rw-r--r--arch/arm/boot/dts/exynos4.dtsi49
-rw-r--r--arch/arm/boot/dts/exynos4210-trats.dts19
-rw-r--r--arch/arm/boot/dts/exynos4210-universal_c210.dts57
-rw-r--r--arch/arm/boot/dts/exynos4210.dtsi38
-rw-r--r--arch/arm/boot/dts/exynos4212.dtsi5
-rw-r--r--arch/arm/boot/dts/exynos4412-odroid-common.dtsi64
-rw-r--r--arch/arm/boot/dts/exynos4412-tmu-sensor-conf.dtsi24
-rw-r--r--arch/arm/boot/dts/exynos4412-trats2.dts15
-rw-r--r--arch/arm/boot/dts/exynos4412.dtsi5
-rw-r--r--arch/arm/boot/dts/exynos4x12.dtsi12
-rw-r--r--arch/arm/boot/dts/exynos5250-spring.dts20
-rw-r--r--arch/arm/boot/dts/exynos5250.dtsi48
-rw-r--r--arch/arm/boot/dts/exynos5420-trip-points.dtsi35
-rw-r--r--arch/arm/boot/dts/exynos5420.dtsi37
-rw-r--r--arch/arm/boot/dts/exynos5440-tmu-sensor-conf.dtsi24
-rw-r--r--arch/arm/boot/dts/exynos5440-trip-points.dtsi25
-rw-r--r--arch/arm/boot/dts/exynos5440.dtsi18
-rw-r--r--arch/arm/boot/dts/hip04.dtsi1
-rw-r--r--arch/arm/boot/dts/imx6qdl-sabresd.dtsi2
-rw-r--r--arch/arm/boot/dts/imx6sl-evk.dts2
-rw-r--r--arch/arm/boot/dts/omap3-beagle-xm.dts1
-rw-r--r--arch/arm/boot/dts/omap3-beagle.dts1
-rw-r--r--arch/arm/boot/dts/omap3-n900.dts52
-rw-r--r--arch/arm/boot/dts/omap3.dtsi4
-rw-r--r--arch/arm/boot/dts/omap4-cpu-thermal.dtsi4
-rw-r--r--arch/arm/boot/dts/omap4-duovero.dtsi2
-rw-r--r--arch/arm/boot/dts/omap4-panda-common.dtsi8
-rw-r--r--arch/arm/boot/dts/omap4-sdp.dts8
-rw-r--r--arch/arm/boot/dts/omap4-var-som-om44.dtsi2
-rw-r--r--arch/arm/boot/dts/omap4.dtsi18
-rw-r--r--arch/arm/boot/dts/omap5-cm-t54.dts1
-rw-r--r--arch/arm/boot/dts/omap5-core-thermal.dtsi2
-rw-r--r--arch/arm/boot/dts/omap5-gpu-thermal.dtsi2
-rw-r--r--arch/arm/boot/dts/omap5-uevm.dts2
-rw-r--r--arch/arm/boot/dts/omap5.dtsi30
-rw-r--r--arch/arm/boot/dts/omap54xx-clocks.dtsi41
-rw-r--r--arch/arm/boot/dts/rk3288.dtsi1
-rw-r--r--arch/arm/boot/dts/sama5d3_emac.dtsi2
-rw-r--r--arch/arm/boot/dts/socfpga.dtsi8
-rw-r--r--arch/arm/boot/dts/stih416.dtsi4
-rw-r--r--arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts16
-rw-r--r--arch/arm/boot/dts/sun4i-a10.dtsi3
-rw-r--r--arch/arm/boot/dts/sun5i-a13.dtsi3
-rw-r--r--arch/arm/boot/dts/sun7i-a20.dtsi3
-rw-r--r--arch/arm/boot/dts/tegra114.dtsi16
-rw-r--r--arch/arm/boot/dts/tegra124.dtsi16
-rw-r--r--arch/arm/boot/dts/tegra20.dtsi15
-rw-r--r--arch/arm/boot/dts/tegra30.dtsi16
-rw-r--r--arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts1
-rw-r--r--arch/arm/common/bL_switcher.c16
-rw-r--r--arch/arm/configs/badge4_defconfig1
-rw-r--r--arch/arm/configs/multi_v7_defconfig2
-rw-r--r--arch/arm/configs/omap2plus_defconfig1
-rw-r--r--arch/arm/configs/sunxi_defconfig1
-rw-r--r--arch/arm/configs/vexpress_defconfig2
-rw-r--r--arch/arm/crypto/Kconfig130
-rw-r--r--arch/arm/crypto/Makefile27
-rw-r--r--arch/arm/crypto/aes-ce-core.S518
-rw-r--r--arch/arm/crypto/aes-ce-glue.c524
-rw-r--r--arch/arm/crypto/aesbs-core.S_shipped12
-rw-r--r--arch/arm/crypto/aesbs-glue.c9
-rw-r--r--arch/arm/crypto/bsaes-armv7.pl12
-rw-r--r--arch/arm/crypto/ghash-ce-core.S94
-rw-r--r--arch/arm/crypto/ghash-ce-glue.c320
-rw-r--r--arch/arm/crypto/sha1-ce-core.S125
-rw-r--r--arch/arm/crypto/sha1-ce-glue.c96
-rw-r--r--arch/arm/crypto/sha1.h (renamed from arch/arm/include/asm/crypto/sha1.h)3
-rw-r--r--arch/arm/crypto/sha1_glue.c112
-rw-r--r--arch/arm/crypto/sha1_neon_glue.c137
-rw-r--r--arch/arm/crypto/sha2-ce-core.S125
-rw-r--r--arch/arm/crypto/sha2-ce-glue.c114
-rw-r--r--arch/arm/crypto/sha256-armv4.pl716
-rw-r--r--arch/arm/crypto/sha256-core.S_shipped2808
-rw-r--r--arch/arm/crypto/sha256_glue.c128
-rw-r--r--arch/arm/crypto/sha256_glue.h14
-rw-r--r--arch/arm/crypto/sha256_neon_glue.c101
-rw-r--r--arch/arm/include/asm/Kbuild2
-rw-r--r--arch/arm/include/asm/assembler.h3
-rw-r--r--arch/arm/include/asm/auxvec.h1
-rw-r--r--arch/arm/include/asm/cpuidle.h23
-rw-r--r--arch/arm/include/asm/cputype.h16
-rw-r--r--arch/arm/include/asm/elf.h15
-rw-r--r--arch/arm/include/asm/futex.h2
-rw-r--r--arch/arm/include/asm/jump_label.h5
-rw-r--r--arch/arm/include/asm/kvm_arm.h1
-rw-r--r--arch/arm/include/asm/kvm_host.h15
-rw-r--r--arch/arm/include/asm/kvm_mmio.h22
-rw-r--r--arch/arm/include/asm/kvm_mmu.h23
-rw-r--r--arch/arm/include/asm/mach/time.h3
-rw-r--r--arch/arm/include/asm/mmu.h3
-rw-r--r--arch/arm/include/asm/pmu.h1
-rw-r--r--arch/arm/include/asm/seccomp.h11
-rw-r--r--arch/arm/include/asm/smp_plat.h1
-rw-r--r--arch/arm/include/asm/thread_info.h3
-rw-r--r--arch/arm/include/asm/uaccess.h10
-rw-r--r--arch/arm/include/asm/unified.h8
-rw-r--r--arch/arm/include/asm/vdso.h32
-rw-r--r--arch/arm/include/asm/vdso_datapage.h60
-rw-r--r--arch/arm/include/asm/word-at-a-time.h2
-rw-r--r--arch/arm/include/uapi/asm/Kbuild1
-rw-r--r--arch/arm/include/uapi/asm/auxvec.h7
-rw-r--r--arch/arm/include/uapi/asm/kvm.h3
-rw-r--r--arch/arm/kernel/Makefile6
-rw-r--r--arch/arm/kernel/arthur.c94
-rw-r--r--arch/arm/kernel/asm-offsets.c10
-rw-r--r--arch/arm/kernel/bios32.c10
-rw-r--r--arch/arm/kernel/cpuidle.c133
-rw-r--r--arch/arm/kernel/entry-armv.S2
-rw-r--r--arch/arm/kernel/head.S14
-rw-r--r--arch/arm/kernel/hibernate.c6
-rw-r--r--arch/arm/kernel/hw_breakpoint.c2
-rw-r--r--arch/arm/kernel/machine_kexec.c3
-rw-r--r--arch/arm/kernel/module.c38
-rw-r--r--arch/arm/kernel/perf_event.c21
-rw-r--r--arch/arm/kernel/perf_event_cpu.c71
-rw-r--r--arch/arm/kernel/perf_event_v7.c525
-rw-r--r--arch/arm/kernel/process.c159
-rw-r--r--arch/arm/kernel/psci-call.S31
-rw-r--r--arch/arm/kernel/psci.c39
-rw-r--r--arch/arm/kernel/reboot.c155
-rw-r--r--arch/arm/kernel/reboot.h7
-rw-r--r--arch/arm/kernel/return_address.c4
-rw-r--r--arch/arm/kernel/setup.c49
-rw-r--r--arch/arm/kernel/signal.c13
-rw-r--r--arch/arm/kernel/sleep.S15
-rw-r--r--arch/arm/kernel/smp.c5
-rw-r--r--arch/arm/kernel/swp_emulate.c2
-rw-r--r--arch/arm/kernel/time.c6
-rw-r--r--arch/arm/kernel/traps.c6
-rw-r--r--arch/arm/kernel/vdso.c337
-rw-r--r--arch/arm/kernel/vmlinux.lds.S18
-rw-r--r--arch/arm/kvm/Kconfig30
-rw-r--r--arch/arm/kvm/Makefile12
-rw-r--r--arch/arm/kvm/arm.c45
-rw-r--r--arch/arm/kvm/guest.c18
-rw-r--r--arch/arm/kvm/interrupts_head.S8
-rw-r--r--arch/arm/kvm/mmio.c64
-rw-r--r--arch/arm/kvm/mmu.c278
-rw-r--r--arch/arm/kvm/trace.h48
-rw-r--r--arch/arm/lib/clear_user.S2
-rw-r--r--arch/arm/lib/copy_to_user.S2
-rw-r--r--arch/arm/lib/csumpartialcopyuser.S2
-rw-r--r--arch/arm/lib/delay.c6
-rw-r--r--arch/arm/mach-cns3xxx/pm.c1
-rw-r--r--arch/arm/mach-davinci/cpuidle.c1
-rw-r--r--arch/arm/mach-dove/pcie.c12
-rw-r--r--arch/arm/mach-exynos/exynos.c17
-rw-r--r--arch/arm/mach-exynos/platsmp.c3
-rw-r--r--arch/arm/mach-exynos/pm.c2
-rw-r--r--arch/arm/mach-exynos/pm_domains.c28
-rw-r--r--arch/arm/mach-exynos/sleep.S31
-rw-r--r--arch/arm/mach-exynos/suspend.c139
-rw-r--r--arch/arm/mach-imx/Kconfig1
-rw-r--r--arch/arm/mach-imx/cpuidle-imx6q.c1
-rw-r--r--arch/arm/mach-imx/cpuidle-imx6sl.c1
-rw-r--r--arch/arm/mach-imx/cpuidle-imx6sx.c1
-rw-r--r--arch/arm/mach-imx/mach-imx6q.c5
-rw-r--r--arch/arm/mach-mv78xx0/pcie.c12
-rw-r--r--arch/arm/mach-mvebu/pmsu.c16
-rw-r--r--arch/arm/mach-omap1/pm.c51
-rw-r--r--arch/arm/mach-omap2/Kconfig22
-rw-r--r--arch/arm/mach-omap2/board-cm-t35.c57
-rw-r--r--arch/arm/mach-omap2/common.c1
-rw-r--r--arch/arm/mach-omap2/common.h3
-rw-r--r--arch/arm/mach-omap2/cpuidle44xx.c11
-rw-r--r--arch/arm/mach-omap2/devices.c76
-rw-r--r--arch/arm/mach-omap2/hsmmc.c33
-rw-r--r--arch/arm/mach-omap2/id.c2
-rw-r--r--arch/arm/mach-omap2/io.c2
-rw-r--r--arch/arm/mach-omap2/mux.c2
-rw-r--r--arch/arm/mach-omap2/omap-secure.h7
-rw-r--r--arch/arm/mach-omap2/omap-wakeupgen.c128
-rw-r--r--arch/arm/mach-omap2/omap-wakeupgen.h1
-rw-r--r--arch/arm/mach-omap2/omap34xx.h36
-rw-r--r--arch/arm/mach-omap2/omap4-common.c96
-rw-r--r--arch/arm/mach-omap2/omap_hwmod.c10
-rw-r--r--arch/arm/mach-omap2/omap_hwmod.h1
-rw-r--r--arch/arm/mach-omap2/omap_hwmod_7xx_data.c103
-rw-r--r--arch/arm/mach-omap2/pdata-quirks.c1
-rw-r--r--arch/arm/mach-omap2/prm44xx.c4
-rw-r--r--arch/arm/mach-omap2/sleep44xx.S2
-rw-r--r--arch/arm/mach-orion5x/pci.c32
-rw-r--r--arch/arm/mach-pxa/idp.c1
-rw-r--r--arch/arm/mach-pxa/irq.c111
-rw-r--r--arch/arm/mach-pxa/lpd270.c2
-rw-r--r--arch/arm/mach-pxa/raumfeld.c4
-rw-r--r--arch/arm/mach-pxa/zeus.c2
-rw-r--r--arch/arm/mach-s3c24xx/Kconfig19
-rw-r--r--arch/arm/mach-s3c24xx/Makefile3
-rw-r--r--arch/arm/mach-s3c24xx/include/mach/pm-core.h24
-rw-r--r--arch/arm/mach-s3c24xx/pm-s3c2416.c3
-rw-r--r--arch/arm/mach-s3c24xx/pm.c6
-rw-r--r--arch/arm/mach-s3c24xx/s3c2410.c2
-rw-r--r--arch/arm/mach-s3c24xx/s3c2412.c2
-rw-r--r--arch/arm/mach-s3c24xx/s3c2416.c2
-rw-r--r--arch/arm/mach-s3c24xx/s3c2440.c4
-rw-r--r--arch/arm/mach-s3c24xx/s3c2442.c4
-rw-r--r--arch/arm/mach-s3c24xx/s3c244x.c7
-rw-r--r--arch/arm/mach-s3c64xx/Kconfig4
-rw-r--r--arch/arm/mach-s3c64xx/Makefile3
-rw-r--r--arch/arm/mach-s3c64xx/cpuidle.c2
-rw-r--r--arch/arm/mach-s3c64xx/mach-smdk6410.c2
-rw-r--r--arch/arm/mach-s3c64xx/pm.c2
-rw-r--r--arch/arm/mach-s5pv210/sleep.S2
-rw-r--r--arch/arm/mach-sa1100/neponset.c4
-rw-r--r--arch/arm/mach-sa1100/pleb.c2
-rw-r--r--arch/arm/mach-shmobile/board-armadillo800eva.c3
-rw-r--r--arch/arm/mach-shmobile/intc-sh73a0.c7
-rw-r--r--arch/arm/mach-shmobile/setup-r8a7779.c7
-rw-r--r--arch/arm/mach-socfpga/core.h2
-rw-r--r--arch/arm/mach-socfpga/socfpga.c5
-rw-r--r--arch/arm/mach-sti/board-dt.c1
-rw-r--r--arch/arm/mach-sunxi/Kconfig8
-rw-r--r--arch/arm/mach-tegra/cpuidle-tegra114.c6
-rw-r--r--arch/arm/mach-tegra/cpuidle-tegra20.c11
-rw-r--r--arch/arm/mach-tegra/cpuidle-tegra30.c11
-rw-r--r--arch/arm/mach-tegra/iomap.h15
-rw-r--r--arch/arm/mach-tegra/irq.c209
-rw-r--r--arch/arm/mach-tegra/irq.h6
-rw-r--r--arch/arm/mach-tegra/tegra.c1
-rw-r--r--arch/arm/mach-ux500/cpu.c2
-rw-r--r--arch/arm/mach-vexpress/Kconfig1
-rw-r--r--arch/arm/mach-zynq/common.c2
-rw-r--r--arch/arm/mm/Kconfig16
-rw-r--r--arch/arm/mm/alignment.c6
-rw-r--r--arch/arm/mm/cache-l2x0.c40
-rw-r--r--arch/arm/mm/cache-v7.S38
-rw-r--r--arch/arm/mm/dma-mapping.c125
-rw-r--r--arch/arm/mm/fault.c1
-rw-r--r--arch/arm/mm/init.c52
-rw-r--r--arch/arm/mm/mmap.c16
-rw-r--r--arch/arm/mm/pageattr.c5
-rw-r--r--arch/arm/mm/proc-arm1020.S4
-rw-r--r--arch/arm/mm/proc-arm1020e.S4
-rw-r--r--arch/arm/mm/proc-arm1022.S4
-rw-r--r--arch/arm/mm/proc-arm1026.S4
-rw-r--r--arch/arm/mm/proc-arm720.S4
-rw-r--r--arch/arm/mm/proc-arm740.S4
-rw-r--r--arch/arm/mm/proc-arm7tdmi.S4
-rw-r--r--arch/arm/mm/proc-arm920.S4
-rw-r--r--arch/arm/mm/proc-arm922.S4
-rw-r--r--arch/arm/mm/proc-arm925.S4
-rw-r--r--arch/arm/mm/proc-arm926.S4
-rw-r--r--arch/arm/mm/proc-arm940.S30
-rw-r--r--arch/arm/mm/proc-arm946.S26
-rw-r--r--arch/arm/mm/proc-arm9tdmi.S4
-rw-r--r--arch/arm/mm/proc-fa526.S4
-rw-r--r--arch/arm/mm/proc-feroceon.S5
-rw-r--r--arch/arm/mm/proc-macros.S28
-rw-r--r--arch/arm/mm/proc-mohawk.S4
-rw-r--r--arch/arm/mm/proc-sa110.S4
-rw-r--r--arch/arm/mm/proc-sa1100.S4
-rw-r--r--arch/arm/mm/proc-v6.S4
-rw-r--r--arch/arm/mm/proc-v7-2level.S12
-rw-r--r--arch/arm/mm/proc-v7.S56
-rw-r--r--arch/arm/mm/proc-v7m.S4
-rw-r--r--arch/arm/mm/proc-xsc3.S4
-rw-r--r--arch/arm/mm/proc-xscale.S4
-rw-r--r--arch/arm/nwfpe/entry.S2
-rw-r--r--arch/arm/plat-omap/counter_32k.c20
-rw-r--r--arch/arm/plat-omap/dmtimer.c15
-rw-r--r--arch/arm/plat-pxa/dma.c111
-rw-r--r--arch/arm/plat-samsung/include/plat/pm.h14
-rw-r--r--arch/arm/plat-samsung/pm-debug.c1
-rw-r--r--arch/arm/plat-samsung/pm.c20
-rw-r--r--arch/arm/vdso/.gitignore1
-rw-r--r--arch/arm/vdso/Makefile74
-rw-r--r--arch/arm/vdso/datapage.S15
-rw-r--r--arch/arm/vdso/vdso.S (renamed from arch/arm64/kernel/cputable.c)30
-rw-r--r--arch/arm/vdso/vdso.lds.S87
-rw-r--r--arch/arm/vdso/vdsomunge.c201
-rw-r--r--arch/arm/vdso/vgettimeofday.c282
-rw-r--r--arch/arm/xen/enlighten.c104
-rw-r--r--arch/arm64/Kconfig51
-rw-r--r--arch/arm64/Kconfig.debug2
-rw-r--r--arch/arm64/Makefile2
-rw-r--r--arch/arm64/boot/dts/apm/apm-mustang.dts4
-rw-r--r--arch/arm64/boot/dts/apm/apm-storm.dtsi36
-rw-r--r--arch/arm64/boot/dts/arm/juno-clocks.dtsi2
-rw-r--r--arch/arm64/configs/defconfig16
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-core.S12
-rw-r--r--arch/arm64/crypto/aes-ce.S10
-rw-r--r--arch/arm64/crypto/aes-glue.c12
-rw-r--r--arch/arm64/crypto/sha1-ce-core.S33
-rw-r--r--arch/arm64/crypto/sha1-ce-glue.c151
-rw-r--r--arch/arm64/crypto/sha2-ce-core.S29
-rw-r--r--arch/arm64/crypto/sha2-ce-glue.c227
-rw-r--r--arch/arm64/include/asm/assembler.h48
-rw-r--r--arch/arm64/include/asm/cmpxchg.h32
-rw-r--r--arch/arm64/include/asm/cpufeature.h18
-rw-r--r--arch/arm64/include/asm/cpuidle.h9
-rw-r--r--arch/arm64/include/asm/cputable.h30
-rw-r--r--arch/arm64/include/asm/dma-mapping.h2
-rw-r--r--arch/arm64/include/asm/elf.h5
-rw-r--r--arch/arm64/include/asm/esr.h1
-rw-r--r--arch/arm64/include/asm/fixmap.h2
-rw-r--r--arch/arm64/include/asm/insn.h1
-rw-r--r--arch/arm64/include/asm/jump_label.h8
-rw-r--r--arch/arm64/include/asm/kvm_arm.h6
-rw-r--r--arch/arm64/include/asm/kvm_host.h15
-rw-r--r--arch/arm64/include/asm/kvm_mmio.h22
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h85
-rw-r--r--arch/arm64/include/asm/mmu_context.h52
-rw-r--r--arch/arm64/include/asm/page.h10
-rw-r--r--arch/arm64/include/asm/percpu.h44
-rw-r--r--arch/arm64/include/asm/pgalloc.h8
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h13
-rw-r--r--arch/arm64/include/asm/pgtable-types.h12
-rw-r--r--arch/arm64/include/asm/pgtable.h8
-rw-r--r--arch/arm64/include/asm/pmu.h1
-rw-r--r--arch/arm64/include/asm/proc-fns.h15
-rw-r--r--arch/arm64/include/asm/processor.h6
-rw-r--r--arch/arm64/include/asm/smp_plat.h2
-rw-r--r--arch/arm64/include/asm/thread_info.h3
-rw-r--r--arch/arm64/include/asm/tlb.h7
-rw-r--r--arch/arm64/include/asm/tlbflush.h13
-rw-r--r--arch/arm64/include/asm/unistd32.h2
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h3
-rw-r--r--arch/arm64/kernel/Makefile4
-rw-r--r--arch/arm64/kernel/alternative.c55
-rw-r--r--arch/arm64/kernel/asm-offsets.c5
-rw-r--r--arch/arm64/kernel/cpu_errata.c47
-rw-r--r--arch/arm64/kernel/cpufeature.c47
-rw-r--r--arch/arm64/kernel/cpuidle.c2
-rw-r--r--arch/arm64/kernel/cpuinfo.c1
-rw-r--r--arch/arm64/kernel/efi.c15
-rw-r--r--arch/arm64/kernel/entry.S20
-rw-r--r--arch/arm64/kernel/entry32.S18
-rw-r--r--arch/arm64/kernel/head.S263
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c2
-rw-r--r--arch/arm64/kernel/insn.c81
-rw-r--r--arch/arm64/kernel/perf_event.c78
-rw-r--r--arch/arm64/kernel/process.c8
-rw-r--r--arch/arm64/kernel/setup.c55
-rw-r--r--arch/arm64/kernel/signal.c7
-rw-r--r--arch/arm64/kernel/smp.c5
-rw-r--r--arch/arm64/kernel/sys32.c1
-rw-r--r--arch/arm64/kernel/vdso.c10
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S17
-rw-r--r--arch/arm64/kvm/Kconfig18
-rw-r--r--arch/arm64/kvm/Makefile20
-rw-r--r--arch/arm64/kvm/hyp-init.S25
-rw-r--r--arch/arm64/mm/dma-mapping.c12
-rw-r--r--arch/arm64/mm/init.c2
-rw-r--r--arch/arm64/mm/mmap.c20
-rw-r--r--arch/arm64/mm/mmu.c16
-rw-r--r--arch/arm64/mm/pageattr.c2
-rw-r--r--arch/arm64/mm/proc-macros.S10
-rw-r--r--arch/arm64/mm/proc.S3
-rw-r--r--arch/avr32/include/asm/elf.h2
-rw-r--r--arch/avr32/include/asm/thread_info.h3
-rw-r--r--arch/avr32/kernel/asm-offsets.c1
-rw-r--r--arch/blackfin/include/asm/thread_info.h11
-rw-r--r--arch/blackfin/kernel/asm-offsets.c6
-rw-r--r--arch/blackfin/kernel/signal.c6
-rw-r--r--arch/blackfin/kernel/traps.c1
-rw-r--r--arch/blackfin/mach-bf561/smp.c2
-rw-r--r--arch/blackfin/mach-common/smp.c6
-rw-r--r--arch/c6x/Makefile2
-rw-r--r--arch/c6x/include/asm/Kbuild1
-rw-r--r--arch/c6x/include/asm/dma-mapping.h8
-rw-r--r--arch/c6x/include/asm/flat.h12
-rw-r--r--arch/c6x/include/asm/pgtable.h5
-rw-r--r--arch/c6x/include/asm/setup.h1
-rw-r--r--arch/c6x/include/asm/thread_info.h2
-rw-r--r--arch/c6x/kernel/process.c1
-rw-r--r--arch/c6x/kernel/setup.c10
-rw-r--r--arch/c6x/kernel/time.c2
-rw-r--r--arch/c6x/platforms/cache.c2
-rw-r--r--arch/cris/arch-v10/kernel/fasttimer.c85
-rw-r--r--arch/cris/arch-v10/kernel/setup.c58
-rw-r--r--arch/cris/arch-v10/kernel/signal.c2
-rw-r--r--arch/cris/arch-v32/kernel/fasttimer.c85
-rw-r--r--arch/cris/arch-v32/kernel/setup.c62
-rw-r--r--arch/cris/arch-v32/kernel/signal.c2
-rw-r--r--arch/cris/include/asm/thread_info.h2
-rw-r--r--arch/frv/include/asm/thread_info.h2
-rw-r--r--arch/frv/kernel/asm-offsets.c1
-rw-r--r--arch/frv/kernel/signal.c24
-rw-r--r--arch/frv/mb93090-mb00/pci-vdk.c6
-rw-r--r--arch/hexagon/include/asm/thread_info.h2
-rw-r--r--arch/hexagon/kernel/process.c2
-rw-r--r--arch/ia64/Kconfig18
-rw-r--r--arch/ia64/include/asm/acpi.h6
-rw-r--r--arch/ia64/include/asm/page.h4
-rw-r--r--arch/ia64/include/asm/pgalloc.h4
-rw-r--r--arch/ia64/include/asm/pgtable.h12
-rw-r--r--arch/ia64/include/asm/thread_info.h2
-rw-r--r--arch/ia64/kernel/Makefile2
-rw-r--r--arch/ia64/kernel/acpi.c2
-rw-r--r--arch/ia64/kernel/iosapic.c2
-rw-r--r--arch/ia64/kernel/irq_ia64.c36
-rw-r--r--arch/ia64/kernel/ivt.S12
-rw-r--r--arch/ia64/kernel/machine_kexec.c4
-rw-r--r--arch/ia64/kernel/mca.c10
-rw-r--r--arch/ia64/kernel/msi_ia64.c10
-rw-r--r--arch/ia64/kernel/numa.c10
-rw-r--r--arch/ia64/kernel/salinfo.c24
-rw-r--r--arch/ia64/kernel/setup.c11
-rw-r--r--arch/ia64/kernel/smp.c6
-rw-r--r--arch/ia64/kernel/smpboot.c42
-rw-r--r--arch/ia64/kernel/topology.c6
-rw-r--r--arch/ia64/mm/init.c25
-rw-r--r--arch/ia64/sn/kernel/io_init.c2
-rw-r--r--arch/m32r/include/asm/asm-offsets.h1
-rw-r--r--arch/m32r/include/asm/io.h1
-rw-r--r--arch/m32r/include/asm/thread_info.h15
-rw-r--r--arch/m32r/kernel/asm-offsets.c15
-rw-r--r--arch/m32r/kernel/entry.S1
-rw-r--r--arch/m32r/kernel/signal.c14
-rw-r--r--arch/m32r/kernel/smpboot.c2
-rw-r--r--arch/m68k/Kconfig4
-rw-r--r--arch/m68k/coldfire/m527x.c3
-rw-r--r--arch/m68k/coldfire/pci.c4
-rw-r--r--arch/m68k/configs/amiga_defconfig3
-rw-r--r--arch/m68k/configs/apollo_defconfig3
-rw-r--r--arch/m68k/configs/atari_defconfig3
-rw-r--r--arch/m68k/configs/bvme6000_defconfig3
-rw-r--r--arch/m68k/configs/hp300_defconfig3
-rw-r--r--arch/m68k/configs/mac_defconfig3
-rw-r--r--arch/m68k/configs/multi_defconfig3
-rw-r--r--arch/m68k/configs/mvme147_defconfig3
-rw-r--r--arch/m68k/configs/mvme16x_defconfig3
-rw-r--r--arch/m68k/configs/q40_defconfig5
-rw-r--r--arch/m68k/configs/sun3_defconfig3
-rw-r--r--arch/m68k/configs/sun3x_defconfig3
-rw-r--r--arch/m68k/include/asm/m527xsim.h2
-rw-r--r--arch/m68k/include/asm/m68360_pram.h4
-rw-r--r--arch/m68k/include/asm/mcfqspi.h5
-rw-r--r--arch/m68k/include/asm/thread_info.h2
-rw-r--r--arch/m68k/kernel/pcibios.c2
-rw-r--r--arch/m68k/kernel/signal.c14
-rw-r--r--arch/m68k/lib/ashldi3.c7
-rw-r--r--arch/m68k/lib/ashrdi3.c7
-rw-r--r--arch/m68k/lib/divsi3.S7
-rw-r--r--arch/m68k/lib/lshrdi3.c7
-rw-r--r--arch/m68k/lib/modsi3.S7
-rw-r--r--arch/m68k/lib/muldi3.c7
-rw-r--r--arch/m68k/lib/mulsi3.S7
-rw-r--r--arch/m68k/lib/udivsi3.S7
-rw-r--r--arch/m68k/lib/umodsi3.S7
-rw-r--r--arch/m68k/mac/oss.c3
-rw-r--r--arch/metag/include/asm/io.h1
-rw-r--r--arch/metag/include/asm/pgtable-bits.h104
-rw-r--r--arch/metag/include/asm/pgtable.h95
-rw-r--r--arch/metag/include/asm/processor.h1
-rw-r--r--arch/metag/include/asm/thread_info.h2
-rw-r--r--arch/metag/kernel/irq.c2
-rw-r--r--arch/metag/kernel/smp.c5
-rw-r--r--arch/microblaze/include/asm/seccomp.h8
-rw-r--r--arch/microblaze/include/asm/thread_info.h2
-rw-r--r--arch/microblaze/kernel/cpu/mb.c149
-rw-r--r--arch/microblaze/kernel/entry.S7
-rw-r--r--arch/microblaze/kernel/signal.c11
-rw-r--r--arch/microblaze/pci/pci-common.c4
-rw-r--r--arch/mips/Kbuild.platforms3
-rw-r--r--arch/mips/Kconfig124
-rw-r--r--arch/mips/Makefile23
-rw-r--r--arch/mips/ar7/platform.c5
-rw-r--r--arch/mips/ath79/common.h2
-rw-r--r--arch/mips/bcm3384/Platform7
-rw-r--r--arch/mips/bcm3384/dma.c81
-rw-r--r--arch/mips/bcm3384/irq.c193
-rw-r--r--arch/mips/bcm3384/setup.c97
-rw-r--r--arch/mips/bcm47xx/bcm47xx_private.h4
-rw-r--r--arch/mips/bcm47xx/board.c61
-rw-r--r--arch/mips/bcm47xx/buttons.c18
-rw-r--r--arch/mips/bcm47xx/leds.c10
-rw-r--r--arch/mips/bcm47xx/nvram.c77
-rw-r--r--arch/mips/bcm47xx/prom.c3
-rw-r--r--arch/mips/bcm47xx/serial.c8
-rw-r--r--arch/mips/bcm47xx/setup.c13
-rw-r--r--arch/mips/bcm47xx/sprom.c675
-rw-r--r--arch/mips/bcm47xx/time.c2
-rw-r--r--arch/mips/bcm63xx/irq.c4
-rw-r--r--arch/mips/bcm63xx/prom.c4
-rw-r--r--arch/mips/bcm63xx/setup.c4
-rw-r--r--arch/mips/bmips/Kconfig62
-rw-r--r--arch/mips/bmips/Makefile (renamed from arch/mips/bcm3384/Makefile)0
-rw-r--r--arch/mips/bmips/Platform7
-rw-r--r--arch/mips/bmips/dma.c117
-rw-r--r--arch/mips/bmips/irq.c38
-rw-r--r--arch/mips/bmips/setup.c194
-rw-r--r--arch/mips/boot/compressed/Makefile9
-rw-r--r--arch/mips/boot/compressed/decompress.c5
-rw-r--r--arch/mips/boot/dts/Makefile33
-rw-r--r--arch/mips/boot/dts/bcm3384.dtsi109
-rw-r--r--arch/mips/boot/dts/brcm/Makefile19
-rw-r--r--arch/mips/boot/dts/brcm/bcm3384_viper.dtsi108
-rw-r--r--arch/mips/boot/dts/brcm/bcm3384_zephyr.dtsi126
-rw-r--r--arch/mips/boot/dts/brcm/bcm6328.dtsi86
-rw-r--r--arch/mips/boot/dts/brcm/bcm6368.dtsi93
-rw-r--r--arch/mips/boot/dts/brcm/bcm7125.dtsi139
-rw-r--r--arch/mips/boot/dts/brcm/bcm7346.dtsi224
-rw-r--r--arch/mips/boot/dts/brcm/bcm7358.dtsi161
-rw-r--r--arch/mips/boot/dts/brcm/bcm7360.dtsi161
-rw-r--r--arch/mips/boot/dts/brcm/bcm7362.dtsi167
-rw-r--r--arch/mips/boot/dts/brcm/bcm7420.dtsi184
-rw-r--r--arch/mips/boot/dts/brcm/bcm7425.dtsi225
-rw-r--r--arch/mips/boot/dts/brcm/bcm93384wvg.dts (renamed from arch/mips/boot/dts/bcm93384wvg.dts)9
-rw-r--r--arch/mips/boot/dts/brcm/bcm93384wvg_viper.dts25
-rw-r--r--arch/mips/boot/dts/brcm/bcm96368mvwg.dts31
-rw-r--r--arch/mips/boot/dts/brcm/bcm97125cbmb.dts31
-rw-r--r--arch/mips/boot/dts/brcm/bcm97346dbsmb.dts58
-rw-r--r--arch/mips/boot/dts/brcm/bcm97358svmb.dts34
-rw-r--r--arch/mips/boot/dts/brcm/bcm97360svmb.dts34
-rw-r--r--arch/mips/boot/dts/brcm/bcm97362svmb.dts34
-rw-r--r--arch/mips/boot/dts/brcm/bcm97420c.dts45
-rw-r--r--arch/mips/boot/dts/brcm/bcm97425svmb.dts60
-rw-r--r--arch/mips/boot/dts/brcm/bcm9ejtagprb.dts22
-rw-r--r--arch/mips/boot/dts/cavium-octeon/Makefile9
-rw-r--r--arch/mips/boot/dts/cavium-octeon/octeon_3xxx.dts (renamed from arch/mips/boot/dts/octeon_3xxx.dts)12
-rw-r--r--arch/mips/boot/dts/cavium-octeon/octeon_68xx.dts (renamed from arch/mips/boot/dts/octeon_68xx.dts)0
-rw-r--r--arch/mips/boot/dts/lantiq/Makefile9
-rw-r--r--arch/mips/boot/dts/lantiq/danube.dtsi (renamed from arch/mips/boot/dts/danube.dtsi)0
-rw-r--r--arch/mips/boot/dts/lantiq/easy50712.dts (renamed from arch/mips/boot/dts/easy50712.dts)0
-rw-r--r--arch/mips/boot/dts/mti/Makefile9
-rw-r--r--arch/mips/boot/dts/mti/sead3.dts (renamed from arch/mips/boot/dts/sead3.dts)0
-rw-r--r--arch/mips/boot/dts/netlogic/Makefile13
-rw-r--r--arch/mips/boot/dts/netlogic/xlp_evp.dts (renamed from arch/mips/boot/dts/xlp_evp.dts)0
-rw-r--r--arch/mips/boot/dts/netlogic/xlp_fvp.dts (renamed from arch/mips/boot/dts/xlp_fvp.dts)0
-rw-r--r--arch/mips/boot/dts/netlogic/xlp_gvp.dts (renamed from arch/mips/boot/dts/xlp_gvp.dts)0
-rw-r--r--arch/mips/boot/dts/netlogic/xlp_rvp.dts77
-rw-r--r--arch/mips/boot/dts/netlogic/xlp_svp.dts (renamed from arch/mips/boot/dts/xlp_svp.dts)0
-rw-r--r--arch/mips/boot/dts/ralink/Makefile12
-rw-r--r--arch/mips/boot/dts/ralink/mt7620a.dtsi (renamed from arch/mips/boot/dts/mt7620a.dtsi)0
-rw-r--r--arch/mips/boot/dts/ralink/mt7620a_eval.dts (renamed from arch/mips/boot/dts/mt7620a_eval.dts)0
-rw-r--r--arch/mips/boot/dts/ralink/rt2880.dtsi (renamed from arch/mips/boot/dts/rt2880.dtsi)0
-rw-r--r--arch/mips/boot/dts/ralink/rt2880_eval.dts (renamed from arch/mips/boot/dts/rt2880_eval.dts)0
-rw-r--r--arch/mips/boot/dts/ralink/rt3050.dtsi (renamed from arch/mips/boot/dts/rt3050.dtsi)0
-rw-r--r--arch/mips/boot/dts/ralink/rt3052_eval.dts (renamed from arch/mips/boot/dts/rt3052_eval.dts)0
-rw-r--r--arch/mips/boot/dts/ralink/rt3883.dtsi (renamed from arch/mips/boot/dts/rt3883.dtsi)0
-rw-r--r--arch/mips/boot/dts/ralink/rt3883_eval.dts (renamed from arch/mips/boot/dts/rt3883_eval.dts)0
-rw-r--r--arch/mips/cavium-octeon/crypto/Makefile5
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-crypto.c4
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-crypto.h163
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-md5.c8
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-sha1.c241
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-sha256.c280
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-sha512.c277
-rw-r--r--arch/mips/cavium-octeon/dma-octeon.c2
-rw-r--r--arch/mips/cavium-octeon/executive/cvmx-l2c.c45
-rw-r--r--arch/mips/cavium-octeon/flash_setup.c83
-rw-r--r--arch/mips/cavium-octeon/octeon-platform.c19
-rw-r--r--arch/mips/cavium-octeon/octeon_boot.h23
-rw-r--r--arch/mips/cavium-octeon/setup.c10
-rw-r--r--arch/mips/cavium-octeon/smp.c4
-rw-r--r--arch/mips/configs/bmips_be_defconfig (renamed from arch/mips/configs/bcm3384_defconfig)11
-rw-r--r--arch/mips/configs/bmips_stb_defconfig88
-rw-r--r--arch/mips/configs/ip32_defconfig3
-rw-r--r--arch/mips/configs/lemote2f_defconfig1
-rw-r--r--arch/mips/configs/loongson3_defconfig1
-rw-r--r--arch/mips/configs/maltaup_xpa_defconfig439
-rw-r--r--arch/mips/configs/pistachio_defconfig336
-rw-r--r--arch/mips/dec/int-handler.S7
-rw-r--r--arch/mips/dec/setup.c16
-rw-r--r--arch/mips/include/asm/asm-eva.h137
-rw-r--r--arch/mips/include/asm/asmmacro-32.h96
-rw-r--r--arch/mips/include/asm/asmmacro.h218
-rw-r--r--arch/mips/include/asm/bitops.h7
-rw-r--r--arch/mips/include/asm/bmips.h16
-rw-r--r--arch/mips/include/asm/cacheflush.h38
-rw-r--r--arch/mips/include/asm/cdmm.h98
-rw-r--r--arch/mips/include/asm/cevt-r4k.h19
-rw-r--r--arch/mips/include/asm/checksum.h6
-rw-r--r--arch/mips/include/asm/cmpxchg.h11
-rw-r--r--arch/mips/include/asm/cpu-features.h48
-rw-r--r--arch/mips/include/asm/cpu-info.h2
-rw-r--r--arch/mips/include/asm/cpu-type.h1
-rw-r--r--arch/mips/include/asm/cpu.h8
-rw-r--r--arch/mips/include/asm/dma-mapping.h2
-rw-r--r--arch/mips/include/asm/elf.h16
-rw-r--r--arch/mips/include/asm/fpu.h27
-rw-r--r--arch/mips/include/asm/fpu_emulator.h6
-rw-r--r--arch/mips/include/asm/irq.h3
-rw-r--r--arch/mips/include/asm/jump_label.h7
-rw-r--r--arch/mips/include/asm/kdebug.h3
-rw-r--r--arch/mips/include/asm/kvm_host.h125
-rw-r--r--arch/mips/include/asm/mach-ar7/war.h24
-rw-r--r--arch/mips/include/asm/mach-ath25/dma-coherence.h14
-rw-r--r--arch/mips/include/asm/mach-ath25/war.h25
-rw-r--r--arch/mips/include/asm/mach-au1x00/war.h24
-rw-r--r--arch/mips/include/asm/mach-bcm3384/war.h24
-rw-r--r--arch/mips/include/asm/mach-bcm47xx/bcm47xx.h1
-rw-r--r--arch/mips/include/asm/mach-bcm47xx/bcm47xx_board.h4
-rw-r--r--arch/mips/include/asm/mach-bcm47xx/bcm47xx_nvram.h21
-rw-r--r--arch/mips/include/asm/mach-bcm47xx/war.h24
-rw-r--r--arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h14
-rw-r--r--arch/mips/include/asm/mach-bcm63xx/dma-coherence.h10
-rw-r--r--arch/mips/include/asm/mach-bcm63xx/war.h24
-rw-r--r--arch/mips/include/asm/mach-bmips/dma-coherence.h (renamed from arch/mips/include/asm/mach-bcm3384/dma-coherence.h)12
-rw-r--r--arch/mips/include/asm/mach-bmips/spaces.h18
-rw-r--r--arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h1
-rw-r--r--arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h4
-rw-r--r--arch/mips/include/asm/mach-cavium-octeon/mangle-port.h74
-rw-r--r--arch/mips/include/asm/mach-cobalt/cpu-feature-overrides.h1
-rw-r--r--arch/mips/include/asm/mach-cobalt/war.h24
-rw-r--r--arch/mips/include/asm/mach-dec/cpu-feature-overrides.h1
-rw-r--r--arch/mips/include/asm/mach-dec/war.h24
-rw-r--r--arch/mips/include/asm/mach-emma2rh/war.h24
-rw-r--r--arch/mips/include/asm/mach-generic/dma-coherence.h6
-rw-r--r--arch/mips/include/asm/mach-generic/war.h (renamed from arch/mips/include/asm/mach-ath79/war.h)6
-rw-r--r--arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h1
-rw-r--r--arch/mips/include/asm/mach-ip27/dma-coherence.h4
-rw-r--r--arch/mips/include/asm/mach-ip32/cpu-feature-overrides.h1
-rw-r--r--arch/mips/include/asm/mach-ip32/dma-coherence.h4
-rw-r--r--arch/mips/include/asm/mach-ip32/mc146818rtc.h36
-rw-r--r--arch/mips/include/asm/mach-jazz/dma-coherence.h4
-rw-r--r--arch/mips/include/asm/mach-jazz/war.h24
-rw-r--r--arch/mips/include/asm/mach-jz4740/war.h24
-rw-r--r--arch/mips/include/asm/mach-lantiq/war.h23
-rw-r--r--arch/mips/include/asm/mach-lasat/war.h24
-rw-r--r--arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h1
-rw-r--r--arch/mips/include/asm/mach-loongson/dma-coherence.h4
-rw-r--r--arch/mips/include/asm/mach-loongson/gpio.h15
-rw-r--r--arch/mips/include/asm/mach-loongson/loongson.h4
-rw-r--r--arch/mips/include/asm/mach-loongson/war.h24
-rw-r--r--arch/mips/include/asm/mach-loongson1/war.h24
-rw-r--r--arch/mips/include/asm/mach-netlogic/multi-node.h9
-rw-r--r--arch/mips/include/asm/mach-netlogic/topology.h15
-rw-r--r--arch/mips/include/asm/mach-netlogic/war.h25
-rw-r--r--arch/mips/include/asm/mach-paravirt/war.h25
-rw-r--r--arch/mips/include/asm/mach-pistachio/gpio.h21
-rw-r--r--arch/mips/include/asm/mach-pistachio/irq.h18
-rw-r--r--arch/mips/include/asm/mach-pnx833x/war.h24
-rw-r--r--arch/mips/include/asm/mach-ralink/war.h24
-rw-r--r--arch/mips/include/asm/mach-rm/cpu-feature-overrides.h1
-rw-r--r--arch/mips/include/asm/mach-tx39xx/war.h24
-rw-r--r--arch/mips/include/asm/mach-vr41xx/war.h24
-rw-r--r--arch/mips/include/asm/mips-boards/sead3-addr.h83
-rw-r--r--arch/mips/include/asm/mips-r2-to-r6-emul.h9
-rw-r--r--arch/mips/include/asm/mipsregs.h312
-rw-r--r--arch/mips/include/asm/netlogic/common.h21
-rw-r--r--arch/mips/include/asm/netlogic/mips-extns.h8
-rw-r--r--arch/mips/include/asm/netlogic/xlp-hal/cpucontrol.h2
-rw-r--r--arch/mips/include/asm/netlogic/xlp-hal/sys.h3
-rw-r--r--arch/mips/include/asm/netlogic/xlp-hal/xlp.h3
-rw-r--r--arch/mips/include/asm/octeon/cvmx-address.h67
-rw-r--r--arch/mips/include/asm/octeon/cvmx-bootinfo.h55
-rw-r--r--arch/mips/include/asm/octeon/cvmx-bootmem.h14
-rw-r--r--arch/mips/include/asm/octeon/cvmx-fau.h22
-rw-r--r--arch/mips/include/asm/octeon/cvmx-fpa.h7
-rw-r--r--arch/mips/include/asm/octeon/cvmx-l2c.h9
-rw-r--r--arch/mips/include/asm/octeon/cvmx-packet.h8
-rw-r--r--arch/mips/include/asm/octeon/cvmx-pko.h31
-rw-r--r--arch/mips/include/asm/octeon/cvmx-pow.h247
-rw-r--r--arch/mips/include/asm/octeon/cvmx-wqe.h71
-rw-r--r--arch/mips/include/asm/octeon/cvmx.h8
-rw-r--r--arch/mips/include/asm/octeon/octeon.h2
-rw-r--r--arch/mips/include/asm/octeon/pci-octeon.h3
-rw-r--r--arch/mips/include/asm/page.h2
-rw-r--r--arch/mips/include/asm/pci.h2
-rw-r--r--arch/mips/include/asm/pci/bridge.h1
-rw-r--r--arch/mips/include/asm/pgtable-32.h15
-rw-r--r--arch/mips/include/asm/pgtable-64.h10
-rw-r--r--arch/mips/include/asm/pgtable-bits.h96
-rw-r--r--arch/mips/include/asm/pgtable.h83
-rw-r--r--arch/mips/include/asm/processor.h2
-rw-r--r--arch/mips/include/asm/r4kcache.h89
-rw-r--r--arch/mips/include/asm/seccomp.h7
-rw-r--r--arch/mips/include/asm/sgi/sgi.h15
-rw-r--r--arch/mips/include/asm/smp.h2
-rw-r--r--arch/mips/include/asm/spinlock.h2
-rw-r--r--arch/mips/include/asm/thread_info.h6
-rw-r--r--arch/mips/include/uapi/asm/kvm.h164
-rw-r--r--arch/mips/jz4740/Platform1
-rw-r--r--arch/mips/jz4740/time.c8
-rw-r--r--arch/mips/kernel/asm-offsets.c106
-rw-r--r--arch/mips/kernel/branch.c6
-rw-r--r--arch/mips/kernel/cevt-r4k.c33
-rw-r--r--arch/mips/kernel/cevt-txx9.c9
-rw-r--r--arch/mips/kernel/cpu-probe.c200
-rw-r--r--arch/mips/kernel/crash.c8
-rw-r--r--arch/mips/kernel/csrc-bcm1480.c12
-rw-r--r--arch/mips/kernel/csrc-ioasic.c13
-rw-r--r--arch/mips/kernel/csrc-r4k.c8
-rw-r--r--arch/mips/kernel/csrc-sb1250.c23
-rw-r--r--arch/mips/kernel/elf.c14
-rw-r--r--arch/mips/kernel/entry.S3
-rw-r--r--arch/mips/kernel/genex.S15
-rw-r--r--arch/mips/kernel/idle.c13
-rw-r--r--arch/mips/kernel/mips-mt-fpaff.c4
-rw-r--r--arch/mips/kernel/mips-r2-to-r6-emul.c15
-rw-r--r--arch/mips/kernel/perf_event_mipsxx.c83
-rw-r--r--arch/mips/kernel/proc.c1
-rw-r--r--arch/mips/kernel/process.c12
-rw-r--r--arch/mips/kernel/prom.c5
-rw-r--r--arch/mips/kernel/ptrace.c40
-rw-r--r--arch/mips/kernel/r2300_switch.S7
-rw-r--r--arch/mips/kernel/r4k_fpu.S2
-rw-r--r--arch/mips/kernel/r4k_switch.S7
-rw-r--r--arch/mips/kernel/reset.c25
-rw-r--r--arch/mips/kernel/setup.c2
-rw-r--r--arch/mips/kernel/smp-bmips.c2
-rw-r--r--arch/mips/kernel/smp-cmp.c4
-rw-r--r--arch/mips/kernel/smp-cps.c10
-rw-r--r--arch/mips/kernel/smp-mt.c4
-rw-r--r--arch/mips/kernel/smp.c32
-rw-r--r--arch/mips/kernel/traps.c291
-rw-r--r--arch/mips/kernel/unaligned.c346
-rw-r--r--arch/mips/kvm/Makefile8
-rw-r--r--arch/mips/kvm/emulate.c332
-rw-r--r--arch/mips/kvm/fpu.S122
-rw-r--r--arch/mips/kvm/locore.S38
-rw-r--r--arch/mips/kvm/mips.c472
-rw-r--r--arch/mips/kvm/msa.S161
-rw-r--r--arch/mips/kvm/stats.c4
-rw-r--r--arch/mips/kvm/tlb.c6
-rw-r--r--arch/mips/kvm/trap_emul.c199
-rw-r--r--arch/mips/lantiq/prom.c2
-rw-r--r--arch/mips/lantiq/xway/vmmc.c1
-rw-r--r--arch/mips/lasat/sysctl.c19
-rw-r--r--arch/mips/lib/csum_partial.S38
-rw-r--r--arch/mips/loongson/common/Makefile1
-rw-r--r--arch/mips/loongson/common/env.c9
-rw-r--r--arch/mips/loongson/common/gpio.c139
-rw-r--r--arch/mips/loongson/common/pci.c6
-rw-r--r--arch/mips/loongson/loongson-3/cop2-ex.c2
-rw-r--r--arch/mips/loongson/loongson-3/hpet.c2
-rw-r--r--arch/mips/loongson/loongson-3/irq.c1
-rw-r--r--arch/mips/loongson/loongson-3/numa.c4
-rw-r--r--arch/mips/loongson/loongson-3/smp.c2
-rw-r--r--arch/mips/math-emu/Makefile15
-rw-r--r--arch/mips/math-emu/cp1emu.c288
-rw-r--r--arch/mips/math-emu/dp_add.c14
-rw-r--r--arch/mips/math-emu/dp_cmp.c13
-rw-r--r--arch/mips/math-emu/dp_div.c9
-rw-r--r--arch/mips/math-emu/dp_fsp.c16
-rw-r--r--arch/mips/math-emu/dp_mul.c9
-rw-r--r--arch/mips/math-emu/dp_simple.c55
-rw-r--r--arch/mips/math-emu/dp_sqrt.c13
-rw-r--r--arch/mips/math-emu/dp_sub.c14
-rw-r--r--arch/mips/math-emu/dsemul.c6
-rw-r--r--arch/mips/math-emu/ieee754.h111
-rw-r--r--arch/mips/math-emu/ieee754dp.c25
-rw-r--r--arch/mips/math-emu/ieee754dp.h1
-rw-r--r--arch/mips/math-emu/ieee754int.h5
-rw-r--r--arch/mips/math-emu/ieee754sp.c25
-rw-r--r--arch/mips/math-emu/ieee754sp.h1
-rw-r--r--arch/mips/math-emu/me-debugfs.c1
-rw-r--r--arch/mips/math-emu/sp_add.c14
-rw-r--r--arch/mips/math-emu/sp_cmp.c13
-rw-r--r--arch/mips/math-emu/sp_div.c9
-rw-r--r--arch/mips/math-emu/sp_fdp.c22
-rw-r--r--arch/mips/math-emu/sp_mul.c9
-rw-r--r--arch/mips/math-emu/sp_simple.c55
-rw-r--r--arch/mips/math-emu/sp_sqrt.c13
-rw-r--r--arch/mips/math-emu/sp_sub.c14
-rw-r--r--arch/mips/mm/c-r4k.c33
-rw-r--r--arch/mips/mm/cache.c39
-rw-r--r--arch/mips/mm/dma-default.c4
-rw-r--r--arch/mips/mm/init.c7
-rw-r--r--arch/mips/mm/mmap.c24
-rw-r--r--arch/mips/mm/page.c1
-rw-r--r--arch/mips/mm/tlb-r4k.c15
-rw-r--r--arch/mips/mm/tlbex.c116
-rw-r--r--arch/mips/mti-malta/malta-init.c4
-rw-r--r--arch/mips/mti-malta/malta-memory.c13
-rw-r--r--arch/mips/mti-malta/malta-time.c20
-rw-r--r--arch/mips/mti-sead3/Makefile7
-rw-r--r--arch/mips/mti-sead3/leds-sead3.c41
-rw-r--r--arch/mips/mti-sead3/sead3-ehci.c53
-rw-r--r--arch/mips/mti-sead3/sead3-i2c-dev.c33
-rw-r--r--arch/mips/mti-sead3/sead3-i2c-drv.c404
-rw-r--r--arch/mips/mti-sead3/sead3-i2c.c31
-rw-r--r--arch/mips/mti-sead3/sead3-init.c2
-rw-r--r--arch/mips/mti-sead3/sead3-leds.c79
-rw-r--r--arch/mips/mti-sead3/sead3-mtd.c53
-rw-r--r--arch/mips/mti-sead3/sead3-net.c57
-rw-r--r--arch/mips/mti-sead3/sead3-platform.c184
-rw-r--r--arch/mips/netlogic/Kconfig9
-rw-r--r--arch/mips/netlogic/common/irq.c10
-rw-r--r--arch/mips/netlogic/common/reset.S22
-rw-r--r--arch/mips/netlogic/common/smp.c25
-rw-r--r--arch/mips/netlogic/common/time.c1
-rw-r--r--arch/mips/netlogic/xlp/ahci-init-xlp2.c13
-rw-r--r--arch/mips/netlogic/xlp/ahci-init.c2
-rw-r--r--arch/mips/netlogic/xlp/dt.c10
-rw-r--r--arch/mips/netlogic/xlp/nlm_hal.c57
-rw-r--r--arch/mips/netlogic/xlp/setup.c7
-rw-r--r--arch/mips/netlogic/xlp/usb-init-xlp2.c10
-rw-r--r--arch/mips/netlogic/xlp/wakeup.c10
-rw-r--r--arch/mips/oprofile/common.c1
-rw-r--r--arch/mips/oprofile/op_model_mipsxx.c15
-rw-r--r--arch/mips/paravirt/paravirt-smp.c2
-rw-r--r--arch/mips/pci/Makefile2
-rw-r--r--arch/mips/pci/msi-xlp.c19
-rw-r--r--arch/mips/pci/pci-ar2315.c1
-rw-r--r--arch/mips/pci/pci-octeon.c8
-rw-r--r--arch/mips/pci/pci-rt2880.c1
-rw-r--r--arch/mips/pci/pci.c37
-rw-r--r--arch/mips/pci/pcie-octeon.c8
-rw-r--r--arch/mips/pistachio/Makefile1
-rw-r--r--arch/mips/pistachio/Platform8
-rw-r--r--arch/mips/pistachio/init.c131
-rw-r--r--arch/mips/pistachio/irq.c28
-rw-r--r--arch/mips/pistachio/time.c52
-rw-r--r--arch/mips/power/Makefile2
-rw-r--r--arch/mips/power/hibernate.c10
-rw-r--r--arch/mips/power/hibernate_asm.S (renamed from arch/mips/power/hibernate.S)5
-rw-r--r--arch/mips/ralink/Kconfig5
-rw-r--r--arch/mips/sgi-ip27/ip27-init.c2
-rw-r--r--arch/mips/sgi-ip27/ip27-klnuma.c10
-rw-r--r--arch/mips/sgi-ip27/ip27-memory.c2
-rw-r--r--arch/mips/sgi-ip27/ip27-timer.c8
-rw-r--r--arch/mips/sgi-ip32/ip32-platform.c51
-rw-r--r--arch/mips/sgi-ip32/ip32-reset.c131
-rw-r--r--arch/mn10300/include/asm/thread_info.h2
-rw-r--r--arch/mn10300/kernel/asm-offsets.c2
-rw-r--r--arch/mn10300/kernel/signal.c20
-rw-r--r--arch/mn10300/unit-asb2305/pci.c6
-rw-r--r--arch/nios2/include/asm/ptrace.h47
-rw-r--r--arch/nios2/include/asm/thread_info.h6
-rw-r--r--arch/nios2/include/asm/ucontext.h32
-rw-r--r--arch/nios2/include/uapi/asm/Kbuild3
-rw-r--r--arch/nios2/include/uapi/asm/elf.h4
-rw-r--r--arch/nios2/include/uapi/asm/ptrace.h59
-rw-r--r--arch/nios2/include/uapi/asm/sigcontext.h12
-rw-r--r--arch/nios2/kernel/cpuinfo.c77
-rw-r--r--arch/nios2/kernel/entry.S2
-rw-r--r--arch/nios2/kernel/process.c1
-rw-r--r--arch/nios2/kernel/signal.c6
-rw-r--r--arch/nios2/mm/cacheflush.c3
-rw-r--r--arch/nios2/mm/fault.c6
-rw-r--r--arch/openrisc/include/asm/thread_info.h2
-rw-r--r--arch/openrisc/kernel/process.c1
-rw-r--r--arch/openrisc/kernel/setup.c50
-rw-r--r--arch/openrisc/kernel/signal.c2
-rw-r--r--arch/parisc/Kconfig5
-rw-r--r--arch/parisc/include/asm/Kbuild2
-rw-r--r--arch/parisc/include/asm/pgalloc.h19
-rw-r--r--arch/parisc/include/asm/pgtable.h16
-rw-r--r--arch/parisc/include/asm/scatterlist.h10
-rw-r--r--arch/parisc/include/asm/seccomp.h16
-rw-r--r--arch/parisc/include/asm/thread_info.h2
-rw-r--r--arch/parisc/kernel/asm-offsets.c1
-rw-r--r--arch/parisc/kernel/entry.S4
-rw-r--r--arch/parisc/kernel/head.S4
-rw-r--r--arch/parisc/kernel/irq.c4
-rw-r--r--arch/parisc/kernel/pci-dma.c8
-rw-r--r--arch/parisc/kernel/syscall_table.S9
-rw-r--r--arch/parisc/mm/init.c2
-rw-r--r--arch/powerpc/Kconfig14
-rw-r--r--arch/powerpc/Kconfig.debug9
-rw-r--r--arch/powerpc/Makefile4
-rw-r--r--arch/powerpc/boot/Makefile4
-rw-r--r--arch/powerpc/boot/crt0.S26
-rw-r--r--arch/powerpc/boot/dts/b4860emu.dts223
-rw-r--r--arch/powerpc/boot/dts/b4qds.dtsi17
-rw-r--r--arch/powerpc/boot/dts/fsl/b4860si-post.dtsi60
-rw-r--r--arch/powerpc/boot/dts/fsl/b4si-post.dtsi89
-rw-r--r--arch/powerpc/boot/dts/fsl/p1023si-post.dtsi37
-rw-r--r--arch/powerpc/boot/dts/fsl/p2041si-post.dtsi11
-rw-r--r--arch/powerpc/boot/dts/fsl/p3041si-post.dtsi11
-rw-r--r--arch/powerpc/boot/dts/fsl/p4080si-post.dtsi11
-rw-r--r--arch/powerpc/boot/dts/fsl/p5020si-post.dtsi11
-rw-r--r--arch/powerpc/boot/dts/fsl/p5040si-post.dtsi11
-rw-r--r--arch/powerpc/boot/dts/fsl/t1040si-post.dtsi65
-rw-r--r--arch/powerpc/boot/dts/fsl/t2081si-post.dtsi105
-rw-r--r--arch/powerpc/boot/dts/fsl/t4240si-post.dtsi265
-rw-r--r--arch/powerpc/boot/dts/kmcoge4.dts15
-rw-r--r--arch/powerpc/boot/dts/oca4080.dts15
-rw-r--r--arch/powerpc/boot/dts/p1023rdb.dts18
-rw-r--r--arch/powerpc/boot/dts/p2041rdb.dts17
-rw-r--r--arch/powerpc/boot/dts/p3041ds.dts17
-rw-r--r--arch/powerpc/boot/dts/p4080ds.dts17
-rw-r--r--arch/powerpc/boot/dts/p5020ds.dts17
-rw-r--r--arch/powerpc/boot/dts/p5040ds.dts17
-rw-r--r--arch/powerpc/boot/dts/t104xqds.dtsi17
-rw-r--r--arch/powerpc/boot/dts/t104xrdb.dtsi14
-rw-r--r--arch/powerpc/boot/dts/t208xqds.dtsi19
-rw-r--r--arch/powerpc/boot/dts/t208xrdb.dtsi15
-rw-r--r--arch/powerpc/boot/dts/t4240qds.dts17
-rw-r--r--arch/powerpc/boot/dts/t4240rdb.dts15
-rw-r--r--arch/powerpc/boot/libfdt-wrapper.c6
-rw-r--r--arch/powerpc/boot/libfdt_env.h14
-rw-r--r--arch/powerpc/boot/of.h8
-rw-r--r--arch/powerpc/boot/planetcore.c33
-rw-r--r--arch/powerpc/boot/planetcore.h3
-rwxr-xr-xarch/powerpc/boot/wrapper2
-rw-r--r--arch/powerpc/configs/cell_defconfig3
-rw-r--r--arch/powerpc/configs/celleb_defconfig152
-rw-r--r--arch/powerpc/configs/corenet32_smp_defconfig7
-rw-r--r--arch/powerpc/configs/corenet64_smp_defconfig15
-rw-r--r--arch/powerpc/configs/mpc85xx_defconfig3
-rw-r--r--arch/powerpc/configs/mpc85xx_smp_defconfig2
-rw-r--r--arch/powerpc/configs/ppc64_defconfig3
-rw-r--r--arch/powerpc/crypto/Makefile8
-rw-r--r--arch/powerpc/crypto/aes-spe-core.S351
-rw-r--r--arch/powerpc/crypto/aes-spe-glue.c512
-rw-r--r--arch/powerpc/crypto/aes-spe-keys.S283
-rw-r--r--arch/powerpc/crypto/aes-spe-modes.S630
-rw-r--r--arch/powerpc/crypto/aes-spe-regs.h42
-rw-r--r--arch/powerpc/crypto/aes-tab-4k.S331
-rw-r--r--arch/powerpc/crypto/md5-asm.S243
-rw-r--r--arch/powerpc/crypto/md5-glue.c165
-rw-r--r--arch/powerpc/crypto/sha1-spe-asm.S299
-rw-r--r--arch/powerpc/crypto/sha1-spe-glue.c210
-rw-r--r--arch/powerpc/crypto/sha256-spe-asm.S323
-rw-r--r--arch/powerpc/crypto/sha256-spe-glue.c275
-rw-r--r--arch/powerpc/include/asm/Kbuild4
-rw-r--r--arch/powerpc/include/asm/asm-compat.h4
-rw-r--r--arch/powerpc/include/asm/cache.h3
-rw-r--r--arch/powerpc/include/asm/cputable.h8
-rw-r--r--arch/powerpc/include/asm/cputhreads.h4
-rw-r--r--arch/powerpc/include/asm/dbdma.h12
-rw-r--r--arch/powerpc/include/asm/dcr-native.h2
-rw-r--r--arch/powerpc/include/asm/device.h6
-rw-r--r--arch/powerpc/include/asm/div64.h1
-rw-r--r--arch/powerpc/include/asm/dma-mapping.h4
-rw-r--r--arch/powerpc/include/asm/eeh.h29
-rw-r--r--arch/powerpc/include/asm/elf.h4
-rw-r--r--arch/powerpc/include/asm/firmware.h10
-rw-r--r--arch/powerpc/include/asm/iommu.h6
-rw-r--r--arch/powerpc/include/asm/irq_regs.h2
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h4
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h16
-rw-r--r--arch/powerpc/include/asm/kvm_host.h2
-rw-r--r--arch/powerpc/include/asm/local64.h1
-rw-r--r--arch/powerpc/include/asm/machdep.h19
-rw-r--r--arch/powerpc/include/asm/mmu-hash64.h1
-rw-r--r--arch/powerpc/include/asm/mpc85xx.h1
-rw-r--r--arch/powerpc/include/asm/mpic.h17
-rw-r--r--arch/powerpc/include/asm/nmi.h4
-rw-r--r--arch/powerpc/include/asm/nvram.h50
-rw-r--r--arch/powerpc/include/asm/opal-api.h735
-rw-r--r--arch/powerpc/include/asm/opal.h770
-rw-r--r--arch/powerpc/include/asm/paca.h4
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h60
-rw-r--r--arch/powerpc/include/asm/pci.h2
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h5
-rw-r--r--arch/powerpc/include/asm/ppc-pci.h8
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h192
-rw-r--r--arch/powerpc/include/asm/reg.h3
-rw-r--r--arch/powerpc/include/asm/rtas.h33
-rw-r--r--arch/powerpc/include/asm/seccomp.h10
-rw-r--r--arch/powerpc/include/asm/setup.h1
-rw-r--r--arch/powerpc/include/asm/smp.h5
-rw-r--r--arch/powerpc/include/asm/smu.h2
-rw-r--r--arch/powerpc/include/asm/swab.h26
-rw-r--r--arch/powerpc/include/asm/systbl.h1
-rw-r--r--arch/powerpc/include/asm/thread_info.h2
-rw-r--r--arch/powerpc/include/asm/ucc_slow.h13
-rw-r--r--arch/powerpc/include/asm/unistd.h2
-rw-r--r--arch/powerpc/include/asm/vga.h4
-rw-r--r--arch/powerpc/include/asm/xics.h2
-rw-r--r--arch/powerpc/include/uapi/asm/Kbuild1
-rw-r--r--arch/powerpc/include/uapi/asm/ptrace.h2
-rw-r--r--arch/powerpc/include/uapi/asm/seccomp.h16
-rw-r--r--arch/powerpc/include/uapi/asm/tm.h2
-rw-r--r--arch/powerpc/include/uapi/asm/unistd.h1
-rw-r--r--arch/powerpc/kernel/Makefile4
-rw-r--r--arch/powerpc/kernel/cacheinfo.c44
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S10
-rw-r--r--arch/powerpc/kernel/cputable.c24
-rw-r--r--arch/powerpc/kernel/dbell.c2
-rw-r--r--arch/powerpc/kernel/dma-swiotlb.c11
-rw-r--r--arch/powerpc/kernel/eeh.c176
-rw-r--r--arch/powerpc/kernel/eeh_cache.c25
-rw-r--r--arch/powerpc/kernel/eeh_dev.c14
-rw-r--r--arch/powerpc/kernel/eeh_driver.c22
-rw-r--r--arch/powerpc/kernel/eeh_pe.c129
-rw-r--r--arch/powerpc/kernel/entry_64.S24
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S2
-rw-r--r--arch/powerpc/kernel/idle_power7.S1
-rw-r--r--arch/powerpc/kernel/mce_power.c53
-rw-r--r--arch/powerpc/kernel/nvram_64.c677
-rw-r--r--arch/powerpc/kernel/of_platform.c2
-rw-r--r--arch/powerpc/kernel/pci-common.c57
-rw-r--r--arch/powerpc/kernel/pci-hotplug.c9
-rw-r--r--arch/powerpc/kernel/pci_dn.c309
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c9
-rw-r--r--arch/powerpc/kernel/process.c9
-rw-r--r--arch/powerpc/kernel/prom.c2
-rw-r--r--arch/powerpc/kernel/prom_init.c4
-rw-r--r--arch/powerpc/kernel/rtas.c30
-rw-r--r--arch/powerpc/kernel/rtas_pci.c49
-rw-r--r--arch/powerpc/kernel/setup_64.c20
-rw-r--r--arch/powerpc/kernel/syscalls.c17
-rw-r--r--arch/powerpc/kernel/systbl.S2
-rw-r--r--arch/powerpc/kernel/systbl_chk.c2
-rw-r--r--arch/powerpc/kernel/tm.S8
-rw-r--r--arch/powerpc/kernel/udbg.c2
-rw-r--r--arch/powerpc/kernel/vector.S24
-rw-r--r--arch/powerpc/kvm/book3s_hv.c8
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S1
-rw-r--r--arch/powerpc/kvm/mpic.c17
-rw-r--r--arch/powerpc/kvm/powerpc.c42
-rw-r--r--arch/powerpc/lib/alloc.c2
-rw-r--r--arch/powerpc/lib/copy_32.S127
-rw-r--r--arch/powerpc/lib/copypage_power7.S32
-rw-r--r--arch/powerpc/lib/copyuser_power7.S226
-rw-r--r--arch/powerpc/lib/crtsavres.S96
-rw-r--r--arch/powerpc/lib/ldstfp.S32
-rw-r--r--arch/powerpc/lib/locks.c1
-rw-r--r--arch/powerpc/lib/memcpy_power7.S226
-rw-r--r--arch/powerpc/lib/ppc_ksyms.c4
-rw-r--r--arch/powerpc/lib/rheap.c2
-rw-r--r--arch/powerpc/mm/Makefile1
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c2
-rw-r--r--arch/powerpc/mm/fsl_booke_mmu.c2
-rw-r--r--arch/powerpc/mm/hugepage-hash64.c2
-rw-r--r--arch/powerpc/mm/hugetlbpage.c4
-rw-r--r--arch/powerpc/mm/init_64.c1
-rw-r--r--arch/powerpc/mm/mem.c3
-rw-r--r--arch/powerpc/mm/mmap.c28
-rw-r--r--arch/powerpc/mm/mmu_decl.h2
-rw-r--r--arch/powerpc/mm/numa.c62
-rw-r--r--arch/powerpc/mm/pgtable_32.c18
-rw-r--r--arch/powerpc/mm/pgtable_64.c6
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c5
-rw-r--r--arch/powerpc/mm/tlb_hash64.c2
-rw-r--r--arch/powerpc/mm/vphn.c70
-rw-r--r--arch/powerpc/mm/vphn.h16
-rw-r--r--arch/powerpc/net/Makefile2
-rw-r--r--arch/powerpc/net/bpf_jit.h64
-rw-r--r--arch/powerpc/net/bpf_jit_asm.S (renamed from arch/powerpc/net/bpf_jit_64.S)70
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c46
-rw-r--r--arch/powerpc/oprofile/cell/spu_task_sync.c13
-rw-r--r--arch/powerpc/perf/callchain.c2
-rw-r--r--arch/powerpc/perf/core-book3s.c17
-rw-r--r--arch/powerpc/perf/hv-24x7.c253
-rw-r--r--arch/powerpc/perf/hv-24x7.h8
-rw-r--r--arch/powerpc/platforms/85xx/common.c1
-rw-r--r--arch/powerpc/platforms/85xx/corenet_generic.c12
-rw-r--r--arch/powerpc/platforms/85xx/p1022_rdk.c4
-rw-r--r--arch/powerpc/platforms/85xx/smp.c4
-rw-r--r--arch/powerpc/platforms/Kconfig5
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype3
-rw-r--r--arch/powerpc/platforms/cell/Kconfig11
-rw-r--r--arch/powerpc/platforms/cell/Makefile15
-rw-r--r--arch/powerpc/platforms/cell/beat.c264
-rw-r--r--arch/powerpc/platforms/cell/beat.h39
-rw-r--r--arch/powerpc/platforms/cell/beat_htab.c445
-rw-r--r--arch/powerpc/platforms/cell/beat_hvCall.S285
-rw-r--r--arch/powerpc/platforms/cell/beat_interrupt.c253
-rw-r--r--arch/powerpc/platforms/cell/beat_interrupt.h30
-rw-r--r--arch/powerpc/platforms/cell/beat_iommu.c115
-rw-r--r--arch/powerpc/platforms/cell/beat_spu_priv1.c205
-rw-r--r--arch/powerpc/platforms/cell/beat_syscall.h164
-rw-r--r--arch/powerpc/platforms/cell/beat_udbg.c98
-rw-r--r--arch/powerpc/platforms/cell/beat_wrapper.h290
-rw-r--r--arch/powerpc/platforms/cell/cell.h24
-rw-r--r--arch/powerpc/platforms/cell/celleb_pci.c500
-rw-r--r--arch/powerpc/platforms/cell/celleb_pci.h46
-rw-r--r--arch/powerpc/platforms/cell/celleb_scc.h232
-rw-r--r--arch/powerpc/platforms/cell/celleb_scc_epci.c428
-rw-r--r--arch/powerpc/platforms/cell/celleb_scc_pciex.c538
-rw-r--r--arch/powerpc/platforms/cell/celleb_scc_sio.c99
-rw-r--r--arch/powerpc/platforms/cell/celleb_scc_uhc.c95
-rw-r--r--arch/powerpc/platforms/cell/celleb_setup.c243
-rw-r--r--arch/powerpc/platforms/cell/interrupt.c2
-rw-r--r--arch/powerpc/platforms/cell/iommu.c11
-rw-r--r--arch/powerpc/platforms/cell/setup.c5
-rw-r--r--arch/powerpc/platforms/cell/smp.c9
-rw-r--r--arch/powerpc/platforms/cell/spu_callbacks.c1
-rw-r--r--arch/powerpc/platforms/chrp/setup.c2
-rw-r--r--arch/powerpc/platforms/maple/maple.h2
-rw-r--r--arch/powerpc/platforms/maple/pci.c4
-rw-r--r--arch/powerpc/platforms/maple/setup.c2
-rw-r--r--arch/powerpc/platforms/pasemi/iommu.c6
-rw-r--r--arch/powerpc/platforms/pasemi/pasemi.h1
-rw-r--r--arch/powerpc/platforms/pasemi/pci.c5
-rw-r--r--arch/powerpc/platforms/powermac/bootx_init.c2
-rw-r--r--arch/powerpc/platforms/powermac/pci.c38
-rw-r--r--arch/powerpc/platforms/powermac/pic.c3
-rw-r--r--arch/powerpc/platforms/powermac/pmac.h3
-rw-r--r--arch/powerpc/platforms/powermac/setup.c22
-rw-r--r--arch/powerpc/platforms/powermac/smp.c18
-rw-r--r--arch/powerpc/platforms/powernv/Kconfig7
-rw-r--r--arch/powerpc/platforms/powernv/Makefile2
-rw-r--r--arch/powerpc/platforms/powernv/eeh-ioda.c1149
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c1300
-rw-r--r--arch/powerpc/platforms/powernv/opal-dump.c3
-rw-r--r--arch/powerpc/platforms/powernv/opal-elog.c3
-rw-r--r--arch/powerpc/platforms/powernv/opal-flash.c8
-rw-r--r--arch/powerpc/platforms/powernv/opal-nvram.c10
-rw-r--r--arch/powerpc/platforms/powernv/opal-power.c5
-rw-r--r--arch/powerpc/platforms/powernv/opal-sensor.c30
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S7
-rw-r--r--arch/powerpc/platforms/powernv/opal.c92
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c797
-rw-r--r--arch/powerpc/platforms/powernv/pci-p5ioc2.c1
-rw-r--r--arch/powerpc/platforms/powernv/pci.c190
-rw-r--r--arch/powerpc/platforms/powernv/pci.h38
-rw-r--r--arch/powerpc/platforms/powernv/powernv.h2
-rw-r--r--arch/powerpc/platforms/powernv/setup.c54
-rw-r--r--arch/powerpc/platforms/powernv/smp.c27
-rw-r--r--arch/powerpc/platforms/ps3/smp.c4
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig1
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c118
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pseries.c98
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c489
-rw-r--r--arch/powerpc/platforms/pseries/hvCall.S2
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c9
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c2
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c70
-rw-r--r--arch/powerpc/platforms/pseries/msi.c6
-rw-r--r--arch/powerpc/platforms/pseries/nvram.c674
-rw-r--r--arch/powerpc/platforms/pseries/pci_dlpar.c5
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h14
-rw-r--r--arch/powerpc/platforms/pseries/setup.c48
-rw-r--r--arch/powerpc/platforms/pseries/smp.c6
-rwxr-xr-xarch/powerpc/relocs_check.pl66
-rwxr-xr-xarch/powerpc/relocs_check.sh59
-rw-r--r--arch/powerpc/sysdev/dart_iommu.c10
-rw-r--r--arch/powerpc/sysdev/dcr.c2
-rw-r--r--arch/powerpc/sysdev/fsl_msi.c29
-rw-r--r--arch/powerpc/sysdev/fsl_msi.h2
-rw-r--r--arch/powerpc/sysdev/fsl_pci.c15
-rw-r--r--arch/powerpc/sysdev/mpic.c30
-rw-r--r--arch/powerpc/sysdev/qe_lib/qe_io.c25
-rw-r--r--arch/powerpc/sysdev/qe_lib/ucc_slow.c5
-rw-r--r--arch/powerpc/sysdev/xics/xics-common.c4
-rw-r--r--arch/s390/Kbuild1
-rw-r--r--arch/s390/Kconfig89
-rw-r--r--arch/s390/Makefile16
-rw-r--r--arch/s390/boot/compressed/Makefile12
-rw-r--r--arch/s390/boot/compressed/head.S (renamed from arch/s390/boot/compressed/head64.S)0
-rw-r--r--arch/s390/boot/compressed/head31.S51
-rw-r--r--arch/s390/boot/compressed/vmlinux.lds.S5
-rw-r--r--arch/s390/crypto/crypt_s390.h8
-rw-r--r--arch/s390/hypfs/hypfs_diag0c.c4
-rw-r--r--arch/s390/hypfs/inode.c4
-rw-r--r--arch/s390/include/asm/appldata.h24
-rw-r--r--arch/s390/include/asm/atomic.h95
-rw-r--r--arch/s390/include/asm/bitops.h28
-rw-r--r--arch/s390/include/asm/cmpxchg.h7
-rw-r--r--arch/s390/include/asm/cputime.h26
-rw-r--r--arch/s390/include/asm/ctl_reg.h14
-rw-r--r--arch/s390/include/asm/dma-mapping.h2
-rw-r--r--arch/s390/include/asm/elf.h18
-rw-r--r--arch/s390/include/asm/idals.h16
-rw-r--r--arch/s390/include/asm/irq.h1
-rw-r--r--arch/s390/include/asm/jump_label.h15
-rw-r--r--arch/s390/include/asm/kvm_host.h58
-rw-r--r--arch/s390/include/asm/livepatch.h43
-rw-r--r--arch/s390/include/asm/lowcore.h159
-rw-r--r--arch/s390/include/asm/mman.h2
-rw-r--r--arch/s390/include/asm/mmu_context.h6
-rw-r--r--arch/s390/include/asm/page.h11
-rw-r--r--arch/s390/include/asm/pci.h10
-rw-r--r--arch/s390/include/asm/percpu.h4
-rw-r--r--arch/s390/include/asm/perf_event.h3
-rw-r--r--arch/s390/include/asm/pgalloc.h24
-rw-r--r--arch/s390/include/asm/pgtable.h125
-rw-r--r--arch/s390/include/asm/processor.h66
-rw-r--r--arch/s390/include/asm/ptrace.h4
-rw-r--r--arch/s390/include/asm/qdio.h10
-rw-r--r--arch/s390/include/asm/runtime_instr.h10
-rw-r--r--arch/s390/include/asm/rwsem.h81
-rw-r--r--arch/s390/include/asm/setup.h35
-rw-r--r--arch/s390/include/asm/sfp-util.h10
-rw-r--r--arch/s390/include/asm/sparsemem.h9
-rw-r--r--arch/s390/include/asm/switch_to.h21
-rw-r--r--arch/s390/include/asm/syscall.h2
-rw-r--r--arch/s390/include/asm/thread_info.h13
-rw-r--r--arch/s390/include/asm/tlb.h4
-rw-r--r--arch/s390/include/asm/tlbflush.h7
-rw-r--r--arch/s390/include/asm/types.h17
-rw-r--r--arch/s390/include/asm/uaccess.h1
-rw-r--r--arch/s390/include/asm/unistd.h8
-rw-r--r--arch/s390/include/asm/vdso.h2
-rw-r--r--arch/s390/include/uapi/asm/kvm.h4
-rw-r--r--arch/s390/include/uapi/asm/sie.h4
-rw-r--r--arch/s390/kernel/Makefile24
-rw-r--r--arch/s390/kernel/asm-offsets.c6
-rw-r--r--arch/s390/kernel/base.S76
-rw-r--r--arch/s390/kernel/cache.c4
-rw-r--r--arch/s390/kernel/compat_signal.c14
-rw-r--r--arch/s390/kernel/cpcmd.c10
-rw-r--r--arch/s390/kernel/diag.c15
-rw-r--r--arch/s390/kernel/dis.c48
-rw-r--r--arch/s390/kernel/dumpstack.c26
-rw-r--r--arch/s390/kernel/early.c69
-rw-r--r--arch/s390/kernel/entry.S1005
-rw-r--r--arch/s390/kernel/entry64.S1059
-rw-r--r--arch/s390/kernel/ftrace.c73
-rw-r--r--arch/s390/kernel/head.S49
-rw-r--r--arch/s390/kernel/head31.S106
-rw-r--r--arch/s390/kernel/head_kdump.S8
-rw-r--r--arch/s390/kernel/ipl.c157
-rw-r--r--arch/s390/kernel/irq.c4
-rw-r--r--arch/s390/kernel/jump_label.c14
-rw-r--r--arch/s390/kernel/kprobes.c2
-rw-r--r--arch/s390/kernel/module.c13
-rw-r--r--arch/s390/kernel/nmi.c92
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c7
-rw-r--r--arch/s390/kernel/pgm_check.S22
-rw-r--r--arch/s390/kernel/process.c29
-rw-r--r--arch/s390/kernel/processor.c2
-rw-r--r--arch/s390/kernel/ptrace.c46
-rw-r--r--arch/s390/kernel/reipl.S133
-rw-r--r--arch/s390/kernel/reipl64.S155
-rw-r--r--arch/s390/kernel/relocate_kernel.S63
-rw-r--r--arch/s390/kernel/relocate_kernel64.S121
-rw-r--r--arch/s390/kernel/sclp.S10
-rw-r--r--arch/s390/kernel/setup.c72
-rw-r--r--arch/s390/kernel/signal.c24
-rw-r--r--arch/s390/kernel/smp.c36
-rw-r--r--arch/s390/kernel/suspend.c4
-rw-r--r--arch/s390/kernel/swsusp.S (renamed from arch/s390/kernel/swsusp_asm64.S)11
-rw-r--r--arch/s390/kernel/sys_s390.c49
-rw-r--r--arch/s390/kernel/syscalls.S716
-rw-r--r--arch/s390/kernel/time.c20
-rw-r--r--arch/s390/kernel/topology.c2
-rw-r--r--arch/s390/kernel/traps.c155
-rw-r--r--arch/s390/kernel/uprobes.c4
-rw-r--r--arch/s390/kernel/vdso.c16
-rw-r--r--arch/s390/kernel/vmlinux.lds.S7
-rw-r--r--arch/s390/kvm/diag.c6
-rw-r--r--arch/s390/kvm/gaccess.c296
-rw-r--r--arch/s390/kvm/gaccess.h21
-rw-r--r--arch/s390/kvm/guestdbg.c8
-rw-r--r--arch/s390/kvm/intercept.c5
-rw-r--r--arch/s390/kvm/interrupt.c1101
-rw-r--r--arch/s390/kvm/kvm-s390.c465
-rw-r--r--arch/s390/kvm/kvm-s390.h54
-rw-r--r--arch/s390/kvm/priv.c146
-rw-r--r--arch/s390/kvm/sigp.c7
-rw-r--r--arch/s390/kvm/trace-s390.h7
-rw-r--r--arch/s390/lib/Makefile3
-rw-r--r--arch/s390/lib/div64.c147
-rw-r--r--arch/s390/lib/mem.S (renamed from arch/s390/lib/mem64.S)0
-rw-r--r--arch/s390/lib/mem32.S92
-rw-r--r--arch/s390/lib/qrnnd.S78
-rw-r--r--arch/s390/lib/uaccess.c136
-rw-r--r--arch/s390/lib/ucmpdi2.c26
-rw-r--r--arch/s390/math-emu/Makefile7
-rw-r--r--arch/s390/math-emu/math.c2255
-rw-r--r--arch/s390/mm/dump_pagetables.c24
-rw-r--r--arch/s390/mm/extmem.c14
-rw-r--r--arch/s390/mm/fault.c36
-rw-r--r--arch/s390/mm/gup.c4
-rw-r--r--arch/s390/mm/init.c5
-rw-r--r--arch/s390/mm/maccess.c70
-rw-r--r--arch/s390/mm/mem_detect.c4
-rw-r--r--arch/s390/mm/mmap.c59
-rw-r--r--arch/s390/mm/pageattr.c2
-rw-r--r--arch/s390/mm/pgtable.c8
-rw-r--r--arch/s390/mm/vmem.c10
-rw-r--r--arch/s390/net/bpf_jit.S197
-rw-r--r--arch/s390/net/bpf_jit.h58
-rw-r--r--arch/s390/net/bpf_jit_comp.c1780
-rw-r--r--arch/s390/oprofile/Makefile2
-rw-r--r--arch/s390/oprofile/init.c11
-rw-r--r--arch/s390/pci/pci.c39
-rw-r--r--arch/s390/pci/pci_debug.c39
-rw-r--r--arch/s390/pci/pci_dma.c8
-rw-r--r--arch/s390/pci/pci_mmio.c17
-rw-r--r--arch/score/include/asm/thread_info.h2
-rw-r--r--arch/score/kernel/asm-offsets.c1
-rw-r--r--arch/sh/Kconfig4
-rw-r--r--arch/sh/drivers/pci/pci.c25
-rw-r--r--arch/sh/include/asm/mmu_context.h2
-rw-r--r--arch/sh/include/asm/thread_info.h2
-rw-r--r--arch/sh/kernel/asm-offsets.c1
-rw-r--r--arch/sh/kernel/dwarf.c18
-rw-r--r--arch/sh/kernel/irq.c2
-rw-r--r--arch/sh/kernel/signal_32.c22
-rw-r--r--arch/sh/kernel/signal_64.c25
-rw-r--r--arch/sh/kernel/smp.c6
-rw-r--r--arch/sparc/Kconfig7
-rw-r--r--arch/sparc/include/asm/hypervisor.h12
-rw-r--r--arch/sparc/include/asm/io_64.h20
-rw-r--r--arch/sparc/include/asm/iommu_64.h7
-rw-r--r--arch/sparc/include/asm/jump_label.h5
-rw-r--r--arch/sparc/include/asm/seccomp.h11
-rw-r--r--arch/sparc/include/asm/starfire.h1
-rw-r--r--arch/sparc/include/asm/thread_info_32.h15
-rw-r--r--arch/sparc/include/asm/thread_info_64.h26
-rw-r--r--arch/sparc/kernel/entry.h4
-rw-r--r--arch/sparc/kernel/hvapi.c1
-rw-r--r--arch/sparc/kernel/hvcalls.S16
-rw-r--r--arch/sparc/kernel/iommu.c172
-rw-r--r--arch/sparc/kernel/iommu_common.h8
-rw-r--r--arch/sparc/kernel/ldc.c155
-rw-r--r--arch/sparc/kernel/leon_pci.c16
-rw-r--r--arch/sparc/kernel/mdesc.c22
-rw-r--r--arch/sparc/kernel/pci.c8
-rw-r--r--arch/sparc/kernel/pci_sun4v.c183
-rw-r--r--arch/sparc/kernel/pcic.c4
-rw-r--r--arch/sparc/kernel/pcr.c33
-rw-r--r--arch/sparc/kernel/perf_event.c62
-rw-r--r--arch/sparc/kernel/process_64.c4
-rw-r--r--arch/sparc/kernel/smp_64.c27
-rw-r--r--arch/sparc/kernel/starfire.c5
-rw-r--r--arch/sparc/kernel/sys_sparc_64.c2
-rw-r--r--arch/sparc/kernel/time_32.c10
-rw-r--r--arch/sparc/kernel/traps_32.c1
-rw-r--r--arch/sparc/kernel/traps_64.c32
-rw-r--r--arch/sparc/lib/memmove.S35
-rw-r--r--arch/sparc/mm/init_64.c2
-rw-r--r--arch/tile/Kconfig6
-rw-r--r--arch/tile/gxio/mpipe.c4
-rw-r--r--arch/tile/include/asm/Kbuild1
-rw-r--r--arch/tile/include/asm/ftrace.h2
-rw-r--r--arch/tile/include/asm/irq_work.h14
-rw-r--r--arch/tile/include/asm/smp.h1
-rw-r--r--arch/tile/include/asm/thread_info.h11
-rw-r--r--arch/tile/include/gxio/mpipe.h4
-rw-r--r--arch/tile/include/hv/hypervisor.h6
-rw-r--r--arch/tile/kernel/compat_signal.c20
-rw-r--r--arch/tile/kernel/ftrace.c6
-rw-r--r--arch/tile/kernel/mcount_64.S7
-rw-r--r--arch/tile/kernel/pci.c2
-rw-r--r--arch/tile/kernel/pci_gx.c2
-rw-r--r--arch/tile/kernel/process.c12
-rw-r--r--arch/tile/kernel/ptrace.c22
-rw-r--r--arch/tile/kernel/setup.c25
-rw-r--r--arch/tile/kernel/signal.c9
-rw-r--r--arch/tile/kernel/single_step.c3
-rw-r--r--arch/tile/kernel/smp.c32
-rw-r--r--arch/tile/kernel/stack.c15
-rw-r--r--arch/tile/kernel/time.c24
-rw-r--r--arch/tile/kernel/traps.c16
-rw-r--r--arch/tile/kernel/unaligned.c22
-rw-r--r--arch/tile/mm/elf.c47
-rw-r--r--arch/tile/mm/fault.c10
-rw-r--r--arch/tile/mm/init.c7
-rw-r--r--arch/um/Kconfig.um47
-rw-r--r--arch/um/Makefile6
-rw-r--r--arch/um/Makefile-ia641
-rw-r--r--arch/um/Makefile-ppc9
-rw-r--r--arch/um/include/asm/fixmap.h4
-rw-r--r--arch/um/include/asm/pgtable.h6
-rw-r--r--arch/um/include/asm/processor-generic.h8
-rw-r--r--arch/um/include/asm/smp.h26
-rw-r--r--arch/um/include/asm/thread_info.h2
-rw-r--r--arch/um/include/shared/as-layout.h1
-rw-r--r--arch/um/include/shared/os.h2
-rw-r--r--arch/um/include/shared/skas/proc_mm.h44
-rw-r--r--arch/um/include/shared/skas/skas.h3
-rw-r--r--arch/um/include/shared/skas_ptrace.h14
-rw-r--r--arch/um/kernel/Makefile4
-rw-r--r--arch/um/kernel/irq.c3
-rw-r--r--arch/um/kernel/kmsg_dump.c43
-rw-r--r--arch/um/kernel/mem.c66
-rw-r--r--arch/um/kernel/physmem.c41
-rw-r--r--arch/um/kernel/process.c11
-rw-r--r--arch/um/kernel/ptrace.c32
-rw-r--r--arch/um/kernel/reboot.c35
-rw-r--r--arch/um/kernel/skas/mmu.c68
-rw-r--r--arch/um/kernel/skas/process.c31
-rw-r--r--arch/um/kernel/smp.c238
-rw-r--r--arch/um/kernel/sysrq.c6
-rw-r--r--arch/um/kernel/trap.c2
-rw-r--r--arch/um/kernel/um_arch.c67
-rw-r--r--arch/um/os-Linux/process.c16
-rw-r--r--arch/um/os-Linux/skas/mem.c100
-rw-r--r--arch/um/os-Linux/skas/process.c202
-rw-r--r--arch/um/os-Linux/start_up.c154
-rw-r--r--arch/um/sys-ia64/Makefile11
-rw-r--r--arch/um/sys-ia64/sysdep/ptrace.h16
-rw-r--r--arch/um/sys-ia64/sysdep/sigcontext.h10
-rw-r--r--arch/um/sys-ia64/sysdep/skas_ptrace.h22
-rw-r--r--arch/um/sys-ia64/sysdep/syscalls.h10
-rw-r--r--arch/um/sys-ppc/Makefile65
-rw-r--r--arch/um/sys-ppc/asm/archparam.h8
-rw-r--r--arch/um/sys-ppc/asm/elf.h51
-rw-r--r--arch/um/sys-ppc/asm/processor.h15
-rw-r--r--arch/um/sys-ppc/misc.S111
-rw-r--r--arch/um/sys-ppc/miscthings.c42
-rw-r--r--arch/um/sys-ppc/ptrace.c58
-rw-r--r--arch/um/sys-ppc/ptrace_user.c29
-rw-r--r--arch/um/sys-ppc/shared/sysdep/ptrace.h93
-rw-r--r--arch/um/sys-ppc/shared/sysdep/sigcontext.h52
-rw-r--r--arch/um/sys-ppc/shared/sysdep/skas_ptrace.h22
-rw-r--r--arch/um/sys-ppc/shared/sysdep/syscalls.h43
-rw-r--r--arch/um/sys-ppc/sigcontext.c4
-rw-r--r--arch/um/sys-ppc/sysrq.c33
-rw-r--r--arch/unicore32/include/asm/thread_info.h3
-rw-r--r--arch/unicore32/kernel/asm-offsets.c1
-rw-r--r--arch/unicore32/kernel/pci.c9
-rw-r--r--arch/unicore32/kernel/signal.c7
-rw-r--r--arch/x86/Kconfig80
-rw-r--r--arch/x86/Kconfig.debug4
-rw-r--r--arch/x86/Makefile2
-rw-r--r--arch/x86/Makefile.um2
-rw-r--r--arch/x86/boot/compressed/aslr.c35
-rw-r--r--arch/x86/boot/compressed/head_32.S3
-rw-r--r--arch/x86/boot/compressed/head_64.S5
-rw-r--r--arch/x86/boot/compressed/misc.c6
-rw-r--r--arch/x86/boot/compressed/misc.h4
-rw-r--r--arch/x86/boot/string.c2
-rw-r--r--arch/x86/boot/video-mode.c4
-rw-r--r--arch/x86/boot/video.c2
-rw-r--r--arch/x86/boot/video.h1
-rw-r--r--arch/x86/configs/i386_defconfig2
-rw-r--r--arch/x86/configs/x86_64_defconfig2
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c191
-rw-r--r--arch/x86/crypto/camellia_aesni_avx2_glue.c15
-rw-r--r--arch/x86/crypto/camellia_aesni_avx_glue.c15
-rw-r--r--arch/x86/crypto/cast5_avx_glue.c9
-rw-r--r--arch/x86/crypto/cast6_avx_glue.c15
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S2
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c7
-rw-r--r--arch/x86/crypto/glue_helper.c1
-rw-r--r--arch/x86/crypto/serpent_avx2_glue.c15
-rw-r--r--arch/x86/crypto/serpent_avx_glue.c15
-rw-r--r--arch/x86/crypto/serpent_sse2_glue.c15
-rw-r--r--arch/x86/crypto/sha-mb/sha1_mb.c9
-rw-r--r--arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c2
-rw-r--r--arch/x86/crypto/sha1_ssse3_glue.c139
-rw-r--r--arch/x86/crypto/sha256-avx-asm.S10
-rw-r--r--arch/x86/crypto/sha256-avx2-asm.S10
-rw-r--r--arch/x86/crypto/sha256-ssse3-asm.S10
-rw-r--r--arch/x86/crypto/sha256_ssse3_glue.c193
-rw-r--r--arch/x86/crypto/sha512-avx-asm.S6
-rw-r--r--arch/x86/crypto/sha512-avx2-asm.S6
-rw-r--r--arch/x86/crypto/sha512-ssse3-asm.S6
-rw-r--r--arch/x86/crypto/sha512_ssse3_glue.c202
-rw-r--r--arch/x86/crypto/twofish-x86_64-asm_64.S4
-rw-r--r--arch/x86/crypto/twofish_avx_glue.c15
-rw-r--r--arch/x86/ia32/Makefile1
-rw-r--r--arch/x86/ia32/ia32_signal.c19
-rw-r--r--arch/x86/ia32/ia32entry.S485
-rw-r--r--arch/x86/ia32/nosyscall.c7
-rw-r--r--arch/x86/ia32/sys_ia32.c14
-rw-r--r--arch/x86/ia32/syscall_ia32.c25
-rw-r--r--arch/x86/include/asm/alternative-asm.h53
-rw-r--r--arch/x86/include/asm/alternative.h73
-rw-r--r--arch/x86/include/asm/apic.h3
-rw-r--r--arch/x86/include/asm/barrier.h6
-rw-r--r--arch/x86/include/asm/calling.h284
-rw-r--r--arch/x86/include/asm/compat.h2
-rw-r--r--arch/x86/include/asm/cpu.h2
-rw-r--r--arch/x86/include/asm/cpufeature.h42
-rw-r--r--arch/x86/include/asm/desc.h7
-rw-r--r--arch/x86/include/asm/dwarf2.h24
-rw-r--r--arch/x86/include/asm/e820.h8
-rw-r--r--arch/x86/include/asm/efi.h6
-rw-r--r--arch/x86/include/asm/elf.h11
-rw-r--r--arch/x86/include/asm/fpu-internal.h130
-rw-r--r--arch/x86/include/asm/hw_irq.h5
-rw-r--r--arch/x86/include/asm/insn.h2
-rw-r--r--arch/x86/include/asm/intel-mid.h3
-rw-r--r--arch/x86/include/asm/iommu_table.h11
-rw-r--r--arch/x86/include/asm/irqflags.h49
-rw-r--r--arch/x86/include/asm/jump_label.h5
-rw-r--r--arch/x86/include/asm/kvm_host.h28
-rw-r--r--arch/x86/include/asm/kvm_para.h2
-rw-r--r--arch/x86/include/asm/livepatch.h4
-rw-r--r--arch/x86/include/asm/mce.h16
-rw-r--r--arch/x86/include/asm/microcode.h73
-rw-r--r--arch/x86/include/asm/microcode_intel.h13
-rw-r--r--arch/x86/include/asm/mwait.h8
-rw-r--r--arch/x86/include/asm/page_types.h4
-rw-r--r--arch/x86/include/asm/paravirt.h13
-rw-r--r--arch/x86/include/asm/paravirt_types.h8
-rw-r--r--arch/x86/include/asm/pci_x86.h2
-rw-r--r--arch/x86/include/asm/pgalloc.h8
-rw-r--r--arch/x86/include/asm/pgtable-2level_types.h1
-rw-r--r--arch/x86/include/asm/pgtable-3level_types.h2
-rw-r--r--arch/x86/include/asm/pgtable.h8
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h1
-rw-r--r--arch/x86/include/asm/pgtable_types.h4
-rw-r--r--arch/x86/include/asm/pm-trace.h (renamed from arch/x86/include/asm/resume-trace.h)10
-rw-r--r--arch/x86/include/asm/processor.h110
-rw-r--r--arch/x86/include/asm/ptrace.h45
-rw-r--r--arch/x86/include/asm/pvclock.h1
-rw-r--r--arch/x86/include/asm/seccomp.h21
-rw-r--r--arch/x86/include/asm/seccomp_32.h11
-rw-r--r--arch/x86/include/asm/seccomp_64.h17
-rw-r--r--arch/x86/include/asm/segment.h289
-rw-r--r--arch/x86/include/asm/serial.h8
-rw-r--r--arch/x86/include/asm/setup.h5
-rw-r--r--arch/x86/include/asm/sigcontext.h6
-rw-r--r--arch/x86/include/asm/sighandling.h4
-rw-r--r--arch/x86/include/asm/smap.h30
-rw-r--r--arch/x86/include/asm/smp.h3
-rw-r--r--arch/x86/include/asm/special_insns.h24
-rw-r--r--arch/x86/include/asm/thread_info.h77
-rw-r--r--arch/x86/include/asm/uaccess_64.h2
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h2
-rw-r--r--arch/x86/include/uapi/asm/e820.h10
-rw-r--r--arch/x86/include/uapi/asm/hyperv.h2
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h26
-rw-r--r--arch/x86/include/uapi/asm/ptrace-abi.h16
-rw-r--r--arch/x86/include/uapi/asm/ptrace.h13
-rw-r--r--arch/x86/include/uapi/asm/sigcontext.h21
-rw-r--r--arch/x86/include/uapi/asm/vmx.h1
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c25
-rw-r--r--arch/x86/kernel/alternative.c163
-rw-r--r--arch/x86/kernel/apic/apic.c62
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c22
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c12
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c89
-rw-r--r--arch/x86/kernel/asm-offsets_32.c2
-rw-r--r--arch/x86/kernel/asm-offsets_64.c1
-rw-r--r--arch/x86/kernel/cpu/Makefile3
-rw-r--r--arch/x86/kernel/cpu/amd.c9
-rw-r--r--arch/x86/kernel/cpu/common.c126
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c715
-rw-r--r--arch/x86/kernel/cpu/intel_pt.h131
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h11
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c66
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c154
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c11
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c63
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c1
-rw-r--r--arch/x86/kernel/cpu/microcode/core_early.c75
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c4
-rw-r--r--arch/x86/kernel/cpu/microcode/intel_early.c345
-rw-r--r--arch/x86/kernel/cpu/microcode/intel_lib.c22
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.sh2
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c12
-rw-r--r--arch/x86/kernel/cpu/perf_event.c223
-rw-r--r--arch/x86/kernel/cpu/perf_event.h181
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c9
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c12
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c918
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_bts.c525
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_cqm.c1379
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c39
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c321
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_pt.c1100
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c94
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c3
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/crash.c2
-rw-r--r--arch/x86/kernel/devicetree.c4
-rw-r--r--arch/x86/kernel/dumpstack.c15
-rw-r--r--arch/x86/kernel/dumpstack_32.c13
-rw-r--r--arch/x86/kernel/dumpstack_64.c11
-rw-r--r--arch/x86/kernel/e820.c28
-rw-r--r--arch/x86/kernel/early_printk.c38
-rw-r--r--arch/x86/kernel/entry_32.S93
-rw-r--r--arch/x86/kernel/entry_64.S972
-rw-r--r--arch/x86/kernel/head64.c3
-rw-r--r--arch/x86/kernel/head_32.S3
-rw-r--r--arch/x86/kernel/head_64.S6
-rw-r--r--arch/x86/kernel/i387.c58
-rw-r--r--arch/x86/kernel/ioport.c2
-rw-r--r--arch/x86/kernel/irq.c4
-rw-r--r--arch/x86/kernel/irq_32.c2
-rw-r--r--arch/x86/kernel/irq_64.c2
-rw-r--r--arch/x86/kernel/irqinit.c3
-rw-r--r--arch/x86/kernel/kgdb.c6
-rw-r--r--arch/x86/kernel/kprobes/core.c13
-rw-r--r--arch/x86/kernel/kvm.c2
-rw-r--r--arch/x86/kernel/module.c3
-rw-r--r--arch/x86/kernel/paravirt.c6
-rw-r--r--arch/x86/kernel/perf_regs.c40
-rw-r--r--arch/x86/kernel/pmem.c53
-rw-r--r--arch/x86/kernel/process.c106
-rw-r--r--arch/x86/kernel/process_32.c27
-rw-r--r--arch/x86/kernel/process_64.c24
-rw-r--r--arch/x86/kernel/ptrace.c12
-rw-r--r--arch/x86/kernel/pvclock.c44
-rw-r--r--arch/x86/kernel/reboot.c10
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S8
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S16
-rw-r--r--arch/x86/kernel/setup.c23
-rw-r--r--arch/x86/kernel/signal.c90
-rw-r--r--arch/x86/kernel/smpboot.c77
-rw-r--r--arch/x86/kernel/sys_x86_64.c30
-rw-r--r--arch/x86/kernel/syscall_32.c16
-rw-r--r--arch/x86/kernel/test_rodata.c2
-rw-r--r--arch/x86/kernel/time.c2
-rw-r--r--arch/x86/kernel/traps.c58
-rw-r--r--arch/x86/kernel/uprobes.c2
-rw-r--r--arch/x86/kernel/vm86_32.c4
-rw-r--r--arch/x86/kernel/vsyscall_gtod.c24
-rw-r--r--arch/x86/kernel/xsave.c46
-rw-r--r--arch/x86/kvm/Makefile2
-rw-r--r--arch/x86/kvm/cpuid.c33
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/emulate.c193
-rw-r--r--arch/x86/kvm/i8254.c14
-rw-r--r--arch/x86/kvm/i8254.h2
-rw-r--r--arch/x86/kvm/i8259.c13
-rw-r--r--arch/x86/kvm/ioapic.c26
-rw-r--r--arch/x86/kvm/ioapic.h11
-rw-r--r--arch/x86/kvm/irq.h2
-rw-r--r--arch/x86/kvm/lapic.c150
-rw-r--r--arch/x86/kvm/lapic.h17
-rw-r--r--arch/x86/kvm/mmu.c73
-rw-r--r--arch/x86/kvm/pmu.c2
-rw-r--r--arch/x86/kvm/svm.c43
-rw-r--r--arch/x86/kvm/vmx.c164
-rw-r--r--arch/x86/kvm/x86.c172
-rw-r--r--arch/x86/lguest/boot.c4
-rw-r--r--arch/x86/lib/atomic64_cx8_32.S50
-rw-r--r--arch/x86/lib/checksum_32.S64
-rw-r--r--arch/x86/lib/clear_page_64.S66
-rw-r--r--arch/x86/lib/copy_page_64.S37
-rw-r--r--arch/x86/lib/copy_user_64.S46
-rw-r--r--arch/x86/lib/csum-copy_64.S2
-rw-r--r--arch/x86/lib/insn.c13
-rw-r--r--arch/x86/lib/memcpy_64.S68
-rw-r--r--arch/x86/lib/memmove_64.S19
-rw-r--r--arch/x86/lib/memset_64.S61
-rw-r--r--arch/x86/lib/msr-reg.S24
-rw-r--r--arch/x86/lib/rwsem.S44
-rw-r--r--arch/x86/lib/thunk_32.S18
-rw-r--r--arch/x86/lib/thunk_64.S28
-rw-r--r--arch/x86/lib/usercopy_64.c15
-rw-r--r--arch/x86/lib/x86-opcode-map.txt9
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/fault.c8
-rw-r--r--arch/x86/mm/init.c69
-rw-r--r--arch/x86/mm/init_64.c14
-rw-r--r--arch/x86/mm/ioremap.c23
-rw-r--r--arch/x86/mm/memtest.c118
-rw-r--r--arch/x86/mm/mmap.c38
-rw-r--r--arch/x86/mm/numa.c11
-rw-r--r--arch/x86/mm/pageattr.c4
-rw-r--r--arch/x86/mm/pat.c6
-rw-r--r--arch/x86/mm/pgtable.c160
-rw-r--r--arch/x86/oprofile/backtrace.c2
-rw-r--r--arch/x86/pci/common.c36
-rw-r--r--arch/x86/pci/intel_mid_pci.c4
-rw-r--r--arch/x86/pci/irq.c15
-rw-r--r--arch/x86/platform/efi/efi-bgrt.c4
-rw-r--r--arch/x86/platform/efi/efi.c17
-rw-r--r--arch/x86/platform/efi/efi_32.c22
-rw-r--r--arch/x86/platform/efi/efi_64.c29
-rw-r--r--arch/x86/platform/intel-mid/Makefile1
-rw-r--r--arch/x86/platform/intel-mid/early_printk_intel_mid.c112
-rw-r--r--arch/x86/platform/intel-quark/imr_selftest.c10
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-sci.c4
-rw-r--r--arch/x86/platform/olpc/olpc-xo15-sci.c4
-rw-r--r--arch/x86/platform/uv/tlb_uv.c6
-rw-r--r--arch/x86/power/cpu.c2
-rw-r--r--arch/x86/syscalls/Makefile9
-rw-r--r--arch/x86/syscalls/syscall_32.tbl4
-rw-r--r--arch/x86/syscalls/syscall_64.tbl2
-rw-r--r--arch/x86/um/Makefile1
-rw-r--r--arch/x86/um/asm/barrier.h15
-rw-r--r--arch/x86/um/asm/elf.h2
-rw-r--r--arch/x86/um/ldt.c227
-rw-r--r--arch/x86/um/shared/sysdep/faultinfo_32.h3
-rw-r--r--arch/x86/um/shared/sysdep/faultinfo_64.h3
-rw-r--r--arch/x86/um/shared/sysdep/skas_ptrace.h22
-rw-r--r--arch/x86/um/signal.c7
-rw-r--r--arch/x86/um/sys_call_table_64.c2
-rw-r--r--arch/x86/vdso/Makefile4
-rw-r--r--arch/x86/vdso/vclock_gettime.c34
-rw-r--r--arch/x86/vdso/vdso32/sigreturn.S1
-rw-r--r--arch/x86/vdso/vdso32/syscall.S2
-rw-r--r--arch/x86/xen/apic.c180
-rw-r--r--arch/x86/xen/enlighten.c91
-rw-r--r--arch/x86/xen/mmu.c221
-rw-r--r--arch/x86/xen/p2m.c12
-rw-r--r--arch/x86/xen/smp.c60
-rw-r--r--arch/x86/xen/suspend.c11
-rw-r--r--arch/x86/xen/trace.c50
-rw-r--r--arch/x86/xen/xen-asm_64.S8
-rw-r--r--arch/x86/xen/xen-head.S63
-rw-r--r--arch/xtensa/Kconfig30
-rw-r--r--arch/xtensa/boot/dts/xtfpga.dtsi64
-rw-r--r--arch/xtensa/configs/audio_kc705_defconfig142
-rw-r--r--arch/xtensa/include/asm/thread_info.h13
-rw-r--r--arch/xtensa/include/uapi/asm/unistd.h8
-rw-r--r--arch/xtensa/kernel/Makefile1
-rw-r--r--arch/xtensa/kernel/asm-offsets.c8
-rw-r--r--arch/xtensa/kernel/pci.c15
-rw-r--r--arch/xtensa/kernel/signal.c16
-rw-r--r--arch/xtensa/platforms/iss/network.c29
-rw-r--r--arch/xtensa/platforms/xtfpga/Makefile3
-rw-r--r--arch/xtensa/platforms/xtfpga/include/platform/hardware.h6
-rw-r--r--arch/xtensa/platforms/xtfpga/include/platform/lcd.h15
-rw-r--r--arch/xtensa/platforms/xtfpga/lcd.c55
-rw-r--r--arch/xtensa/platforms/xtfpga/setup.c34
1710 files changed, 49772 insertions, 35033 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 05d7a8a458d5..a65eafb24997 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -32,7 +32,7 @@ config HAVE_OPROFILE
 
 config OPROFILE_NMI_TIMER
 	def_bool y
-	depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI
+	depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !PPC64
 
 config KPROBES
 	bool "Kprobes"
@@ -446,6 +446,9 @@ config HAVE_IRQ_TIME_ACCOUNTING
 config HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	bool
 
+config HAVE_ARCH_HUGE_VMAP
+	bool
+
 config HAVE_ARCH_SOFT_DIRTY
 	bool
 
@@ -484,6 +487,18 @@ config HAVE_IRQ_EXIT_ON_IRQ_STACK
 	  This spares a stack switch and improves cache usage on softirq
 	  processing.
 
+config PGTABLE_LEVELS
+	int
+	default 2
+
+config ARCH_HAS_ELF_RANDOMIZE
+	bool
+	help
+	  An architecture supports choosing randomized locations for
+	  stack, mmap, brk, and ET_DYN. Defined functions:
+	  - arch_mmap_rnd()
+	  - arch_randomize_brk()
+
 #
 # ABI hall of shame
 #
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index b7ff9a318c31..bf9e9d3b3792 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -76,6 +76,10 @@ config GENERIC_ISA_DMA
 	bool
 	default y
 
+config PGTABLE_LEVELS
+	int
+	default 3
+
 source "init/Kconfig"
 source "kernel/Kconfig.freezer"
 
diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h
index b4cf03690394..43a7559c448b 100644
--- a/arch/alpha/include/asm/processor.h
+++ b/arch/alpha/include/asm/processor.h
@@ -44,6 +44,7 @@ struct task_struct;
 extern unsigned long thread_saved_pc(struct task_struct *);
 
 /* Do necessary setup to start up a newly executed thread.  */
+struct pt_regs;
 extern void start_thread(struct pt_regs *, unsigned long, unsigned long);
 
 /* Free all resources held by a thread. */
diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
index d5b98ab514bb..32e920a83ae5 100644
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -18,7 +18,6 @@ struct thread_info {
 	unsigned int		flags;		/* low level flags */
 	unsigned int		ieee_state;	/* see fpu.h */
 
-	struct exec_domain	*exec_domain;	/* execution domain */
 	mm_segment_t		addr_limit;	/* thread address space */
 	unsigned		cpu;		/* current CPU */
 	int			preempt_count; /* 0 => preemptable, <0 => BUG */
@@ -35,7 +34,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
-	.exec_domain	= &default_exec_domain,	\
 	.addr_limit	= KERNEL_DS,		\
 	.preempt_count	= INIT_PREEMPT_COUNT,	\
 }
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 98a1525fa164..82f738e5d54c 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -338,6 +338,8 @@ common_init_pci(void)
 
 		bus = pci_scan_root_bus(NULL, next_busno, alpha_mv.pci_ops,
 					hose, &resources);
+		if (!bus)
+			continue;
 		hose->bus = bus;
 		hose->need_domain_info = need_domain_info;
 		next_busno = bus->busn_res.end + 1;
@@ -353,6 +355,11 @@ common_init_pci(void)
 
 	pci_assign_unassigned_resources();
 	pci_fixup_irqs(alpha_mv.pci_swizzle, alpha_mv.pci_map_irq);
+	for (hose = hose_head; hose; hose = hose->next) {
+		bus = hose->bus;
+		if (bus)
+			pci_bus_add_devices(bus);
+	}
 }
 
 
diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c
index c8d284d8521f..f535a3fd0f60 100644
--- a/arch/alpha/kernel/rtc.c
+++ b/arch/alpha/kernel/rtc.c
@@ -116,7 +116,7 @@ alpha_rtc_set_time(struct device *dev, struct rtc_time *tm)
 }
 
 static int
-alpha_rtc_set_mmss(struct device *dev, unsigned long nowtime)
+alpha_rtc_set_mmss(struct device *dev, time64_t nowtime)
 {
 	int retval = 0;
 	int real_seconds, real_minutes, cmos_minutes;
@@ -211,7 +211,7 @@ alpha_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
 static const struct rtc_class_ops alpha_rtc_ops = {
 	.read_time = alpha_rtc_read_time,
 	.set_time = alpha_rtc_set_time,
-	.set_mmss = alpha_rtc_set_mmss,
+	.set_mmss64 = alpha_rtc_set_mmss,
 	.ioctl = alpha_rtc_ioctl,
 };
 
@@ -276,7 +276,7 @@ do_remote_mmss(void *data)
 }
 
 static int
-remote_set_mmss(struct device *dev, unsigned long now)
+remote_set_mmss(struct device *dev, time64_t now)
 {
 	union remote_data x;
 	if (smp_processor_id() != boot_cpuid) {
@@ -290,7 +290,7 @@ remote_set_mmss(struct device *dev, unsigned long now)
 static const struct rtc_class_ops remote_rtc_ops = {
 	.read_time = remote_read_time,
 	.set_time = remote_set_time,
-	.set_mmss = remote_set_mmss,
+	.set_mmss64 = remote_set_mmss,
 	.ioctl = alpha_rtc_ioctl,
 };
 #endif
diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c
index 837c0fa58317..700686d04869 100644
--- a/arch/alpha/kernel/sys_nautilus.c
+++ b/arch/alpha/kernel/sys_nautilus.c
@@ -207,6 +207,9 @@ nautilus_init_pci(void)
 
 	/* Scan our single hose.  */
 	bus = pci_scan_bus(0, alpha_mv.pci_ops, hose);
+	if (!bus)
+		return;
+
 	hose->bus = bus;
 	pcibios_claim_one_bus(bus);
 
@@ -253,6 +256,7 @@ nautilus_init_pci(void)
 	   for the root bus, so just clear it. */
 	bus->self = NULL;
 	pci_fixup_irqs(alpha_mv.pci_swizzle, alpha_mv.pci_map_irq);
+	pci_bus_add_devices(bus);
 }
 
 /*
diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h
index 1163a1838ac1..aca0d5a45c7b 100644
--- a/arch/arc/include/asm/thread_info.h
+++ b/arch/arc/include/asm/thread_info.h
@@ -43,7 +43,6 @@ struct thread_info {
 	int preempt_count;		/* 0 => preemptable, <0 => BUG */
 	struct task_struct *task;	/* main task structure */
 	mm_segment_t addr_limit;	/* thread address space */
-	struct exec_domain *exec_domain;/* execution domain */
 	__u32 cpu;			/* current CPU */
 	unsigned long thr_ptr;		/* TLS ptr */
 };
@@ -56,7 +55,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task       = &tsk,			\
-	.exec_domain    = &default_exec_domain,	\
 	.flags      = 0,			\
 	.cpu        = 0,			\
 	.preempt_count  = INIT_PREEMPT_COUNT,	\
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index 98c00a2d4dd9..f46efd14059d 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -155,8 +155,6 @@ int copy_thread(unsigned long clone_flags,
  */
 void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp)
 {
-	set_fs(USER_DS); /* user space */
-
 	regs->sp = usp;
 	regs->ret = pc;
 
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
index 114234e83caa..2251fb4bbfd7 100644
--- a/arch/arc/kernel/signal.c
+++ b/arch/arc/kernel/signal.c
@@ -67,7 +67,7 @@ stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs,
 	       sigset_t *set)
 {
 	int err;
-	err = __copy_to_user(&(sf->uc.uc_mcontext.regs), regs,
+	err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), regs,
 			     sizeof(sf->uc.uc_mcontext.regs.scratch));
 	err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t));
 
@@ -83,7 +83,7 @@ static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf)
 	if (!err)
 		set_current_blocked(&set);
 
-	err |= __copy_from_user(regs, &(sf->uc.uc_mcontext.regs),
+	err |= __copy_from_user(regs, &(sf->uc.uc_mcontext.regs.scratch),
 				sizeof(sf->uc.uc_mcontext.regs.scratch));
 
 	return err;
@@ -131,6 +131,15 @@ SYSCALL_DEFINE0(rt_sigreturn)
 	/* Don't restart from sigreturn */
 	syscall_wont_restart(regs);
 
+	/*
+	 * Ensure that sigreturn always returns to user mode (in case the
+	 * regs saved on user stack got fudged between save and sigreturn)
+	 * Otherwise it is easy to panic the kernel with a custom
+	 * signal handler and/or restorer which clobberes the status32/ret
+	 * to return to a bogus location in kernel mode.
+	 */
+	regs->status32 |= STATUS_U_MASK;
+
 	return regs->r0;
 
 badframe:
@@ -162,18 +171,6 @@ static inline void __user *get_sigframe(struct ksignal *ksig,
 	return frame;
 }
 
-/*
- * translate the signal
- */
-static inline int map_sig(int sig)
-{
-	struct thread_info *thread = current_thread_info();
-	if (thread->exec_domain && thread->exec_domain->signal_invmap
-	    && sig < 32)
-		sig = thread->exec_domain->signal_invmap[sig];
-	return sig;
-}
-
 static int
 setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
 {
@@ -222,15 +219,18 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
 		return err;
 
 	/* #1 arg to the user Signal handler */
-	regs->r0 = map_sig(ksig->sig);
+	regs->r0 = ksig->sig;
 
 	/* setup PC of user space signal handler */
 	regs->ret = (unsigned long)ksig->ka.sa.sa_handler;
 
 	/*
 	 * handler returns using sigreturn stub provided already by userpsace
+	 * If not, nuke the process right away
 	 */
-	BUG_ON(!(ksig->ka.sa.sa_flags & SA_RESTORER));
+	if(!(ksig->ka.sa.sa_flags & SA_RESTORER))
+		return 1;
+
 	regs->blink = (unsigned long)ksig->ka.sa.sa_restorer;
 
 	/* User Stack for signal handler will be above the frame just carved */
@@ -296,12 +296,12 @@ static void
 handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 {
 	sigset_t *oldset = sigmask_to_save();
-	int ret;
+	int failed;
 
 	/* Set up the stack frame */
-	ret = setup_rt_frame(ksig, oldset, regs);
+	failed = setup_rt_frame(ksig, oldset, regs);
 
-	signal_setup_done(ret, ksig, 0);
+	signal_setup_done(failed, ksig, 0);
 }
 
 void do_signal(struct pt_regs *regs)
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index 1badf9b84b51..e00a01879025 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -52,7 +52,7 @@ static void show_callee_regs(struct callee_regs *cregs)
 	print_reg_file(&(cregs->r13), 13);
 }
 
-void print_task_path_n_nm(struct task_struct *tsk, char *buf)
+static void print_task_path_n_nm(struct task_struct *tsk, char *buf)
 {
 	struct path path;
 	char *path_nm = NULL;
@@ -77,7 +77,6 @@ void print_task_path_n_nm(struct task_struct *tsk, char *buf)
 done:
 	pr_info("Path: %s\n", path_nm);
 }
-EXPORT_SYMBOL(print_task_path_n_nm);
 
 static void show_faulting_vma(unsigned long address, char *buf)
 {
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index e550b117ec4f..93c6ea52b671 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -841,7 +841,7 @@ static int processCFI(const u8 *start, const u8 *end, unsigned long targetLoc,
 				break;
 			case DW_CFA_GNU_window_save:
 			default:
-				unw_debug("UNKNOW OPCODE 0x%x\n", opcode);
+				unw_debug("UNKNOWN OPCODE 0x%x\n", opcode);
 				result = 0;
 				break;
 			}
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 3b11aacef2c3..c8baa96ed748 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1,8 +1,8 @@
 config ARM
 	bool
 	default y
-	select ARCH_BINFMT_ELF_RANDOMIZE_PIE
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_HAVE_CUSTOM_GPIO_H
 	select ARCH_HAS_GCOV_PROFILE_ALL
@@ -21,6 +21,7 @@ config ARM
 	select GENERIC_IDLE_POLL_SETUP
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
+	select GENERIC_IRQ_SHOW_LEVEL
 	select GENERIC_PCI_IOMAP
 	select GENERIC_SCHED_CLOCK
 	select GENERIC_SMP_IDLE_THREAD
@@ -286,6 +287,11 @@ config GENERIC_BUG
 	def_bool y
 	depends on BUG
 
+config PGTABLE_LEVELS
+	int
+	default 3 if ARM_LPAE
+	default 2
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
@@ -619,6 +625,7 @@ config ARCH_PXA
 	select GENERIC_CLOCKEVENTS
 	select GPIO_PXA
 	select HAVE_IDE
+	select IRQ_DOMAIN
 	select MULTI_IRQ_HANDLER
 	select PLAT_PXA
 	select SPARSE_IRQ
@@ -1043,7 +1050,7 @@ config ARM_ERRATA_430973
 	depends on CPU_V7
 	help
 	  This option enables the workaround for the 430973 Cortex-A8
-	  (r1p0..r1p2) erratum. If a code sequence containing an ARM/Thumb
+	  r1p* erratum. If a code sequence containing an ARM/Thumb
 	  interworking branch is replaced with another code sequence at the
 	  same virtual address, whether due to self-modifying code or virtual
 	  to physical address re-mapping, Cortex-A8 does not recover from the
@@ -1112,6 +1119,7 @@ config ARM_ERRATA_742231
 config ARM_ERRATA_643719
 	bool "ARM errata: LoUIS bit field in CLIDR register is incorrect"
 	depends on CPU_V7 && SMP
+	default y
 	help
 	  This option enables the workaround for the 643719 Cortex-A9 (prior to
 	  r1p0) erratum. On affected cores the LoUIS bit field of the CLIDR
@@ -1329,7 +1337,7 @@ config SMP
 	  If you don't know what to do here, say N.
 
 config SMP_ON_UP
-	bool "Allow booting SMP kernel on uniprocessor systems (EXPERIMENTAL)"
+	bool "Allow booting SMP kernel on uniprocessor systems"
 	depends on SMP && !XIP_KERNEL && MMU
 	default y
 	help
@@ -2111,16 +2119,6 @@ menu "Userspace binary formats"
 
 source "fs/Kconfig.binfmt"
 
-config ARTHUR
-	tristate "RISC OS personality"
-	depends on !AEABI
-	help
-	  Say Y here to include the kernel code necessary if you want to run
-	  Acorn RISC OS/Arthur binaries under Linux. This code is still very
-	  experimental; if this sounds frightening, say N and sleep in peace.
-	  You can also say M here to compile this support as a module (which
-	  will be called arthur).
-
 endmenu
 
 menu "Power management options"
@@ -2153,6 +2151,9 @@ source "arch/arm/Kconfig.debug"
 source "security/Kconfig"
 
 source "crypto/Kconfig"
+if CRYPTO
+source "arch/arm/crypto/Kconfig"
+endif
 
 source "lib/Kconfig"
 
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 750ccde6a8ed..4cfb4b8fc2dd 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -1589,59 +1589,6 @@ config DEBUG_SET_MODULE_RONX
 	  against certain classes of kernel exploits.
 	  If in doubt, say "N".
 
-menuconfig CORESIGHT
-	bool "CoreSight Tracing Support"
-	select ARM_AMBA
-	help
-	  This framework provides a kernel interface for the CoreSight debug
-	  and trace drivers to register themselves with. It's intended to build
-	  a topological view of the CoreSight components based on a DT
-	  specification and configure the right serie of components when a
-	  trace source gets enabled.
-
-if CORESIGHT
-config CORESIGHT_LINKS_AND_SINKS
-	bool "CoreSight Link and Sink drivers"
-	help
-	  This enables support for CoreSight link and sink drivers that are
-	  responsible for transporting and collecting the trace data
-	  respectively.  Link and sinks are dynamically aggregated with a trace
-	  entity at run time to form a complete trace path.
-
-config CORESIGHT_LINK_AND_SINK_TMC
-	bool "Coresight generic TMC driver"
-	depends on CORESIGHT_LINKS_AND_SINKS
-	help
-	  This enables support for the Trace Memory Controller driver.  Depending
-	  on its configuration the device can act as a link (embedded trace router
-	  - ETR) or sink (embedded trace FIFO).  The driver complies with the
-	  generic implementation of the component without special enhancement or
-	  added features.
-
-config CORESIGHT_SINK_TPIU
-	bool "Coresight generic TPIU driver"
-	depends on CORESIGHT_LINKS_AND_SINKS
-	help
-	  This enables support for the Trace Port Interface Unit driver, responsible
-	  for bridging the gap between the on-chip coresight components and a trace
-	  port collection engine, typically connected to an external host for use
-	  case capturing more traces than the on-board coresight memory can handle.
-
-config CORESIGHT_SINK_ETBV10
-	bool "Coresight ETBv1.0 driver"
-	depends on CORESIGHT_LINKS_AND_SINKS
-	help
-	  This enables support for the Embedded Trace Buffer version 1.0 driver
-	  that complies with the generic implementation of the component without
-	  special enhancement or added features.
+source "drivers/hwtracing/coresight/Kconfig"
 
-config CORESIGHT_SOURCE_ETM3X
-	bool "CoreSight Embedded Trace Macrocell 3.x driver"
-	select CORESIGHT_LINKS_AND_SINKS
-	help
-	  This driver provides support for processor ETM3.x and PTM1.x modules,
-	  which allows tracing the instructions that a processor is executing
-	  This is primarily useful for instruction level tracing.  Depending
-	  the ETM version data tracing may also be available.
-endif
 endmenu
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index fa03c13ddfec..cced41d40ce2 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -13,7 +13,7 @@
 # Ensure linker flags are correct
 LDFLAGS		:=
 
-LDFLAGS_vmlinux	:=-p --no-undefined -X
+LDFLAGS_vmlinux	:=-p --no-undefined -X --pic-veneer
 ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
 LDFLAGS_vmlinux	+= --be8
 LDFLAGS_MODULE	+= --be8
@@ -149,6 +149,7 @@ machine-$(CONFIG_ARCH_BERLIN)		+= berlin
 machine-$(CONFIG_ARCH_CLPS711X)		+= clps711x
 machine-$(CONFIG_ARCH_CNS3XXX)		+= cns3xxx
 machine-$(CONFIG_ARCH_DAVINCI)		+= davinci
+machine-$(CONFIG_ARCH_DIGICOLOR)	+= digicolor
 machine-$(CONFIG_ARCH_DOVE)		+= dove
 machine-$(CONFIG_ARCH_EBSA110)		+= ebsa110
 machine-$(CONFIG_ARCH_EFM32)		+= efm32
@@ -261,6 +262,7 @@ core-$(CONFIG_FPE_FASTFPE)	+= $(FASTFPE_OBJ)
 core-$(CONFIG_VFP)		+= arch/arm/vfp/
 core-$(CONFIG_XEN)		+= arch/arm/xen/
 core-$(CONFIG_KVM_ARM_HOST) 	+= arch/arm/kvm/
+core-$(CONFIG_VDSO)		+= arch/arm/vdso/
 
 # If we have a machine-specific directory, then include it in the build.
 core-y				+= arch/arm/kernel/ arch/arm/mm/ arch/arm/common/
@@ -318,6 +320,12 @@ dtbs: prepare scripts
 dtbs_install:
 	$(Q)$(MAKE) $(dtbinst)=$(boot)/dts
 
+PHONY += vdso_install
+vdso_install:
+ifeq ($(CONFIG_VDSO),y)
+	$(Q)$(MAKE) $(build)=arch/arm/vdso $@
+endif
+
 # We use MRPROPER_FILES and CLEAN_FILES now
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
@@ -342,4 +350,5 @@ define archhelp
   echo  '                  Install using (your) ~/bin/$(INSTALLKERNEL) or'
   echo  '                  (distribution) /sbin/$(INSTALLKERNEL) or'
   echo  '                  install to $$(INSTALL_PATH) and run lilo'
+  echo  '  vdso_install  - Install unstripped vdso.so to $$(INSTALL_MOD_PATH)/vdso'
 endef
diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile
index ec2f8065f955..9eca7aee927f 100644
--- a/arch/arm/boot/Makefile
+++ b/arch/arm/boot/Makefile
@@ -12,7 +12,7 @@
 #
 
 ifneq ($(MACHINE),)
-include $(srctree)/$(MACHINE)/Makefile.boot
+include $(MACHINE)/Makefile.boot
 endif
 
 # Note: the following conditions must always be true:
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index c41a793b519c..2c45b5709fa4 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -10,8 +10,11 @@
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/v7m.h>
+
+ AR_CLASS(	.arch	armv7-a	)
+ M_CLASS(	.arch	armv7-m	)
 
-	.arch	armv7-a
 /*
  * Debugging stuff
  *
@@ -114,7 +117,12 @@
  * sort out different calling conventions
  */
 		.align
-		.arm				@ Always enter in ARM state
+		/*
+		 * Always enter in ARM state for CPUs that support the ARM ISA.
+		 * As of today (2014) that's exactly the members of the A and R
+		 * classes.
+		 */
+ AR_CLASS(	.arm	)
 start:
 		.type	start,#function
 		.rept	7
@@ -132,14 +140,15 @@ start:
 
  THUMB(		.thumb			)
 1:
- ARM_BE8(	setend	be )			@ go BE8 if compiled for BE8
-		mrs	r9, cpsr
+ ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
+ AR_CLASS(	mrs	r9, cpsr	)
 #ifdef CONFIG_ARM_VIRT_EXT
 		bl	__hyp_stub_install	@ get into SVC mode, reversibly
 #endif
 		mov	r7, r1			@ save architecture ID
 		mov	r8, r2			@ save atags pointer
 
+#ifndef CONFIG_CPU_V7M
 		/*
 		 * Booting from Angel - need to enter SVC mode and disable
 		 * FIQs/IRQs (numeric definitions from angel arm.h source).
@@ -155,6 +164,7 @@ not_angel:
 		safe_svcmode_maskall r0
 		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
 						@ SPSR
+#endif
 		/*
 		 * Note that some cache flushing and other stuff may
 		 * be needed here - is there an Angel SWI call for this?
@@ -168,9 +178,26 @@ not_angel:
 		.text
 
 #ifdef CONFIG_AUTO_ZRELADDR
-		@ determine final kernel image address
+		/*
+		 * Find the start of physical memory.  As we are executing
+		 * without the MMU on, we are in the physical address space.
+		 * We just need to get rid of any offset by aligning the
+		 * address.
+		 *
+		 * This alignment is a balance between the requirements of
+		 * different platforms - we have chosen 128MB to allow
+		 * platforms which align the start of their physical memory
+		 * to 128MB to use this feature, while allowing the zImage
+		 * to be placed within the first 128MB of memory on other
+		 * platforms.  Increasing the alignment means we place
+		 * stricter alignment requirements on the start of physical
+		 * memory, but relaxing it means that we break people who
+		 * are already placing their zImage in (eg) the top 64MB
+		 * of this range.
+		 */
 		mov	r4, pc
 		and	r4, r4, #0xf8000000
+		/* Determine final kernel image address. */
 		add	r4, r4, #TEXT_OFFSET
 #else
 		ldr	r4, =zreladdr
@@ -810,6 +837,16 @@ __common_mmu_cache_on:
 call_cache_fn:	adr	r12, proc_types
 #ifdef CONFIG_CPU_CP15
 		mrc	p15, 0, r9, c0, c0	@ get processor ID
+#elif defined(CONFIG_CPU_V7M)
+		/*
+		 * On v7-M the processor id is located in the V7M_SCB_CPUID
+		 * register, but as cache handling is IMPLEMENTATION DEFINED on
+		 * v7-M (if existant at all) we just return early here.
+		 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
+		 * __armv7_mmu_cache_{on,off,flush}) would be selected which
+		 * use cp15 registers that are not implemented on v7-M.
+		 */
+		bx	lr
 #else
 		ldr	r9, =CONFIG_PROCESSOR_ID
 #endif
@@ -1310,8 +1347,9 @@ __hyp_reentry_vectors:
 
 __enter_kernel:
 		mov	r0, #0			@ must be 0
- ARM(		mov	pc, r4	)		@ call kernel
- THUMB(		bx	r4	)		@ entry point is always ARM
+ ARM(		mov	pc, r4		)	@ call kernel
+ M_CLASS(	add	r4, r4, #1	)	@ enter in Thumb mode for M class
+ THUMB(		bx	r4		)	@ entry point is always ARM for A/R classes
 
 reloc_code_end:
 
diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi
index 2c6248d9a9ef..c3255e0c90aa 100644
--- a/arch/arm/boot/dts/am335x-bone-common.dtsi
+++ b/arch/arm/boot/dts/am335x-bone-common.dtsi
@@ -301,3 +301,11 @@
 	cd-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>;
 	cd-inverted;
 };
+
+&aes {
+	status = "okay";
+};
+
+&sham {
+	status = "okay";
+};
diff --git a/arch/arm/boot/dts/am335x-bone.dts b/arch/arm/boot/dts/am335x-bone.dts
index 83d40f7655e5..6b8493720424 100644
--- a/arch/arm/boot/dts/am335x-bone.dts
+++ b/arch/arm/boot/dts/am335x-bone.dts
@@ -24,11 +24,3 @@
 &mmc1 {
 	vmmc-supply = <&ldo3_reg>;
 };
-
-&sham {
-	status = "okay";
-};
-
-&aes {
-	status = "okay";
-};
diff --git a/arch/arm/boot/dts/am335x-lxm.dts b/arch/arm/boot/dts/am335x-lxm.dts
index 7266a00aab2e..5c5667a3624d 100644
--- a/arch/arm/boot/dts/am335x-lxm.dts
+++ b/arch/arm/boot/dts/am335x-lxm.dts
@@ -328,6 +328,10 @@
 	dual_emac_res_vlan = <3>;
 };
 
+&phy_sel {
+	rmii-clock-ext;
+};
+
 &mac {
 	pinctrl-names = "default", "sleep";
 	pinctrl-0 = <&cpsw_default>;
diff --git a/arch/arm/boot/dts/am33xx-clocks.dtsi b/arch/arm/boot/dts/am33xx-clocks.dtsi
index 712edce7d6fb..071b56aa0c7e 100644
--- a/arch/arm/boot/dts/am33xx-clocks.dtsi
+++ b/arch/arm/boot/dts/am33xx-clocks.dtsi
@@ -99,7 +99,7 @@
 	ehrpwm0_tbclk: ehrpwm0_tbclk@44e10664 {
 		#clock-cells = <0>;
 		compatible = "ti,gate-clock";
-		clocks = <&dpll_per_m2_ck>;
+		clocks = <&l4ls_gclk>;
 		ti,bit-shift = <0>;
 		reg = <0x0664>;
 	};
@@ -107,7 +107,7 @@
 	ehrpwm1_tbclk: ehrpwm1_tbclk@44e10664 {
 		#clock-cells = <0>;
 		compatible = "ti,gate-clock";
-		clocks = <&dpll_per_m2_ck>;
+		clocks = <&l4ls_gclk>;
 		ti,bit-shift = <1>;
 		reg = <0x0664>;
 	};
@@ -115,7 +115,7 @@
 	ehrpwm2_tbclk: ehrpwm2_tbclk@44e10664 {
 		#clock-cells = <0>;
 		compatible = "ti,gate-clock";
-		clocks = <&dpll_per_m2_ck>;
+		clocks = <&l4ls_gclk>;
 		ti,bit-shift = <2>;
 		reg = <0x0664>;
 	};
diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi
index 1943fc333e7c..ebe4fa691860 100644
--- a/arch/arm/boot/dts/am4372.dtsi
+++ b/arch/arm/boot/dts/am4372.dtsi
@@ -15,7 +15,7 @@
 
 / {
 	compatible = "ti,am4372", "ti,am43";
-	interrupt-parent = <&gic>;
+	interrupt-parent = <&wakeupgen>;
 
 
 	aliases {
@@ -48,6 +48,15 @@
 		#interrupt-cells = <3>;
 		reg = <0x48241000 0x1000>,
 		      <0x48240100 0x0100>;
+		interrupt-parent = <&gic>;
+	};
+
+	wakeupgen: interrupt-controller@48281000 {
+		compatible = "ti,omap4-wugen-mpu";
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		reg = <0x48281000 0x1000>;
+		interrupt-parent = <&gic>;
 	};
 
 	l2-cache-controller@48242000 {
@@ -787,7 +796,7 @@
 		};
 
 		ocp2scp0: ocp2scp@483a8000 {
-			compatible = "ti,omap-ocp2scp";
+			compatible = "ti,am437x-ocp2scp", "ti,omap-ocp2scp";
 			#address-cells = <1>;
 			#size-cells = <1>;
 			ranges;
@@ -806,7 +815,7 @@
 		};
 
 		ocp2scp1: ocp2scp@483e8000 {
-			compatible = "ti,omap-ocp2scp";
+			compatible = "ti,am437x-ocp2scp", "ti,omap-ocp2scp";
 			#address-cells = <1>;
 			#size-cells = <1>;
 			ranges;
diff --git a/arch/arm/boot/dts/am437x-gp-evm.dts b/arch/arm/boot/dts/am437x-gp-evm.dts
index f84d9715a4a9..26956cb50835 100644
--- a/arch/arm/boot/dts/am437x-gp-evm.dts
+++ b/arch/arm/boot/dts/am437x-gp-evm.dts
@@ -352,7 +352,6 @@
 		reg = <0x24>;
 		compatible = "ti,tps65218";
 		interrupts = <GIC_SPI 7 IRQ_TYPE_NONE>; /* NMIn */
-		interrupt-parent = <&gic>;
 		interrupt-controller;
 		#interrupt-cells = <2>;
 
diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts
index 832d24318f62..8ae29c955c11 100644
--- a/arch/arm/boot/dts/am437x-sk-evm.dts
+++ b/arch/arm/boot/dts/am437x-sk-evm.dts
@@ -392,7 +392,6 @@
 	tps@24 {
 		compatible = "ti,tps65218";
 		reg = <0x24>;
-		interrupt-parent = <&gic>;
 		interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
 		interrupt-controller;
 		#interrupt-cells = <2>;
diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts
index 257c099c347e..1d7109196872 100644
--- a/arch/arm/boot/dts/am43x-epos-evm.dts
+++ b/arch/arm/boot/dts/am43x-epos-evm.dts
@@ -369,7 +369,6 @@
 		reg = <0x24>;
 		compatible = "ti,tps65218";
 		interrupts = <GIC_SPI 7 IRQ_TYPE_NONE>; /* NMIn */
-		interrupt-parent = <&gic>;
 		interrupt-controller;
 		#interrupt-cells = <2>;
 
diff --git a/arch/arm/boot/dts/am43xx-clocks.dtsi b/arch/arm/boot/dts/am43xx-clocks.dtsi
index c7dc9dab93a4..cfb49686ab6a 100644
--- a/arch/arm/boot/dts/am43xx-clocks.dtsi
+++ b/arch/arm/boot/dts/am43xx-clocks.dtsi
@@ -107,7 +107,7 @@
 	ehrpwm0_tbclk: ehrpwm0_tbclk {
 		#clock-cells = <0>;
 		compatible = "ti,gate-clock";
-		clocks = <&dpll_per_m2_ck>;
+		clocks = <&l4ls_gclk>;
 		ti,bit-shift = <0>;
 		reg = <0x0664>;
 	};
@@ -115,7 +115,7 @@
 	ehrpwm1_tbclk: ehrpwm1_tbclk {
 		#clock-cells = <0>;
 		compatible = "ti,gate-clock";
-		clocks = <&dpll_per_m2_ck>;
+		clocks = <&l4ls_gclk>;
 		ti,bit-shift = <1>;
 		reg = <0x0664>;
 	};
@@ -123,7 +123,7 @@
 	ehrpwm2_tbclk: ehrpwm2_tbclk {
 		#clock-cells = <0>;
 		compatible = "ti,gate-clock";
-		clocks = <&dpll_per_m2_ck>;
+		clocks = <&l4ls_gclk>;
 		ti,bit-shift = <2>;
 		reg = <0x0664>;
 	};
@@ -131,7 +131,7 @@
 	ehrpwm3_tbclk: ehrpwm3_tbclk {
 		#clock-cells = <0>;
 		compatible = "ti,gate-clock";
-		clocks = <&dpll_per_m2_ck>;
+		clocks = <&l4ls_gclk>;
 		ti,bit-shift = <4>;
 		reg = <0x0664>;
 	};
@@ -139,7 +139,7 @@
 	ehrpwm4_tbclk: ehrpwm4_tbclk {
 		#clock-cells = <0>;
 		compatible = "ti,gate-clock";
-		clocks = <&dpll_per_m2_ck>;
+		clocks = <&l4ls_gclk>;
 		ti,bit-shift = <5>;
 		reg = <0x0664>;
 	};
@@ -147,7 +147,7 @@
 	ehrpwm5_tbclk: ehrpwm5_tbclk {
 		#clock-cells = <0>;
 		compatible = "ti,gate-clock";
-		clocks = <&dpll_per_m2_ck>;
+		clocks = <&l4ls_gclk>;
 		ti,bit-shift = <6>;
 		reg = <0x0664>;
 	};
diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts
index 6463f9ef2b54..e580f4ffbde0 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15.dts
+++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts
@@ -87,6 +87,7 @@
 		gpios =  <&tps659038_gpio 1 GPIO_ACTIVE_HIGH>;
 		gpio-fan,speed-map = <0     0>,
 				     <13000 1>;
+		#cooling-cells = <2>;
 	};
 
 	extcon_usb1: extcon_usb1 {
@@ -442,6 +443,7 @@
 		pinctrl-0 = <&tmp102_pins_default>;
 		interrupt-parent = <&gpio7>;
 		interrupts = <16 IRQ_TYPE_LEVEL_LOW>;
+		#thermal-sensor-cells = <1>;
 	};
 };
 
@@ -454,7 +456,6 @@
 	mcp_rtc: rtc@6f {
 		compatible = "microchip,mcp7941x";
 		reg = <0x6f>;
-		interrupt-parent = <&gic>;
 		interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_LOW>;  /* IRQ_SYS_1N */
 
 		pinctrl-names = "default";
@@ -477,7 +478,7 @@
 
 &uart3 {
 	status = "okay";
-	interrupts-extended = <&gic GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>,
+	interrupts-extended = <&crossbar_mpu GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>,
 			      <&dra7_pmx_core 0x248>;
 
 	pinctrl-names = "default";
@@ -552,3 +553,50 @@
 &usb2 {
 	dr_mode = "peripheral";
 };
+
+&cpu_trips {
+	cpu_alert1: cpu_alert1 {
+		temperature = <50000>; /* millicelsius */
+		hysteresis = <2000>; /* millicelsius */
+		type = "active";
+	};
+};
+
+&cpu_cooling_maps {
+	map1 {
+		trip = <&cpu_alert1>;
+		cooling-device = <&gpio_fan THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+	};
+};
+
+&thermal_zones {
+	board_thermal: board_thermal {
+		polling-delay-passive = <1250>; /* milliseconds */
+		polling-delay = <1500>; /* milliseconds */
+
+				/* sensor       ID */
+		thermal-sensors = <&tmp102     0>;
+
+		board_trips: trips {
+			board_alert0: board_alert {
+				temperature = <40000>; /* millicelsius */
+				hysteresis = <2000>; /* millicelsius */
+				type = "active";
+			};
+
+			board_crit: board_crit {
+				temperature = <105000>; /* millicelsius */
+				hysteresis = <0>; /* millicelsius */
+				type = "critical";
+			};
+		};
+
+		board_cooling_maps: cooling-maps {
+			map0 {
+				trip = <&board_alert0>;
+				cooling-device =
+				  <&gpio_fan THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+			};
+		};
+       };
+};
diff --git a/arch/arm/boot/dts/at91-sama5d3_xplained.dts b/arch/arm/boot/dts/at91-sama5d3_xplained.dts
index fec1fca2ad66..6c4bc53cbf4e 100644
--- a/arch/arm/boot/dts/at91-sama5d3_xplained.dts
+++ b/arch/arm/boot/dts/at91-sama5d3_xplained.dts
@@ -167,7 +167,13 @@
 
 			macb1: ethernet@f802c000 {
 				phy-mode = "rmii";
+				#address-cells = <1>;
+				#size-cells = <0>;
 				status = "okay";
+
+				ethernet-phy@1 {
+					reg = <0x1>;
+				};
 			};
 
 			dbgu: serial@ffffee00 {
diff --git a/arch/arm/boot/dts/at91sam9260.dtsi b/arch/arm/boot/dts/at91sam9260.dtsi
index 7d989a8a9cf7..d88fe62a2b2e 100644
--- a/arch/arm/boot/dts/at91sam9260.dtsi
+++ b/arch/arm/boot/dts/at91sam9260.dtsi
@@ -842,7 +842,7 @@
 			};
 
 			macb0: ethernet@fffc4000 {
-				compatible = "cdns,at32ap7000-macb", "cdns,macb";
+				compatible = "cdns,at91sam9260-macb", "cdns,macb";
 				reg = <0xfffc4000 0x100>;
 				interrupts = <21 IRQ_TYPE_LEVEL_HIGH 3>;
 				pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi
index e07dae75be77..111889b556cf 100644
--- a/arch/arm/boot/dts/at91sam9263.dtsi
+++ b/arch/arm/boot/dts/at91sam9263.dtsi
@@ -845,7 +845,7 @@
 			};
 
 			macb0: ethernet@fffbc000 {
-				compatible = "cdns,at32ap7000-macb", "cdns,macb";
+				compatible = "cdns,at91sam9260-macb", "cdns,macb";
 				reg = <0xfffbc000 0x100>;
 				interrupts = <21 IRQ_TYPE_LEVEL_HIGH 3>;
 				pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi
index 56b66a795df7..70e59c5ceb2f 100644
--- a/arch/arm/boot/dts/at91sam9g45.dtsi
+++ b/arch/arm/boot/dts/at91sam9g45.dtsi
@@ -956,7 +956,7 @@
 			};
 
 			macb0: ethernet@fffbc000 {
-				compatible = "cdns,at32ap7000-macb", "cdns,macb";
+				compatible = "cdns,at91sam9260-macb", "cdns,macb";
 				reg = <0xfffbc000 0x100>;
 				interrupts = <25 IRQ_TYPE_LEVEL_HIGH 3>;
 				pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/at91sam9x5_macb0.dtsi b/arch/arm/boot/dts/at91sam9x5_macb0.dtsi
index 57e89d1d0325..73d7e30965ba 100644
--- a/arch/arm/boot/dts/at91sam9x5_macb0.dtsi
+++ b/arch/arm/boot/dts/at91sam9x5_macb0.dtsi
@@ -53,7 +53,7 @@
 			};
 
 			macb0: ethernet@f802c000 {
-				compatible = "cdns,at32ap7000-macb", "cdns,macb";
+				compatible = "cdns,at91sam9260-macb", "cdns,macb";
 				reg = <0xf802c000 0x100>;
 				interrupts = <24 IRQ_TYPE_LEVEL_HIGH 3>;
 				pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/at91sam9x5_macb1.dtsi b/arch/arm/boot/dts/at91sam9x5_macb1.dtsi
index 663676c02861..d81980c40c7d 100644
--- a/arch/arm/boot/dts/at91sam9x5_macb1.dtsi
+++ b/arch/arm/boot/dts/at91sam9x5_macb1.dtsi
@@ -41,7 +41,7 @@
 			};
 
 			macb1: ethernet@f8030000 {
-				compatible = "cdns,at32ap7000-macb", "cdns,macb";
+				compatible = "cdns,at91sam9260-macb", "cdns,macb";
 				reg = <0xf8030000 0x100>;
 				interrupts = <27 IRQ_TYPE_LEVEL_HIGH 3>;
 				pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/dm8168-evm.dts b/arch/arm/boot/dts/dm8168-evm.dts
index d3a29c1b8417..afe678f6d2e9 100644
--- a/arch/arm/boot/dts/dm8168-evm.dts
+++ b/arch/arm/boot/dts/dm8168-evm.dts
@@ -36,6 +36,20 @@
 		>;
 	};
 
+	mmc_pins: pinmux_mmc_pins {
+		pinctrl-single,pins = <
+			DM816X_IOPAD(0x0a70, MUX_MODE0)			/* SD_POW */
+			DM816X_IOPAD(0x0a74, MUX_MODE0)			/* SD_CLK */
+			DM816X_IOPAD(0x0a78, MUX_MODE0)			/* SD_CMD */
+			DM816X_IOPAD(0x0a7C, MUX_MODE0)			/* SD_DAT0 */
+			DM816X_IOPAD(0x0a80, MUX_MODE0)			/* SD_DAT1 */
+			DM816X_IOPAD(0x0a84, MUX_MODE0)			/* SD_DAT2 */
+			DM816X_IOPAD(0x0a88, MUX_MODE0)			/* SD_DAT2 */
+			DM816X_IOPAD(0x0a8c, MUX_MODE2)			/* GP1[7] */
+			DM816X_IOPAD(0x0a90, MUX_MODE2)			/* GP1[8] */
+		>;
+	};
+
 	usb0_pins: pinmux_usb0_pins {
 		pinctrl-single,pins = <
 			DM816X_IOPAD(0x0d00, MUX_MODE0)			/* USB0_DRVVBUS */
@@ -137,7 +151,12 @@
 };
 
 &mmc1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&mmc_pins>;
 	vmmc-supply = <&vmmcsd_fixed>;
+	bus-width = <4>;
+	cd-gpios = <&gpio2 7 GPIO_ACTIVE_LOW>;
+	wp-gpios = <&gpio2 8 GPIO_ACTIVE_LOW>;
 };
 
 /* At least dm8168-evm rev c won't support multipoint, later may */
diff --git a/arch/arm/boot/dts/dm816x.dtsi b/arch/arm/boot/dts/dm816x.dtsi
index 3c97b5f2addc..f35715bc6992 100644
--- a/arch/arm/boot/dts/dm816x.dtsi
+++ b/arch/arm/boot/dts/dm816x.dtsi
@@ -150,17 +150,27 @@
 		};
 
 		gpio1: gpio@48032000 {
-			compatible = "ti,omap3-gpio";
+			compatible = "ti,omap4-gpio";
 			ti,hwmods = "gpio1";
+			ti,gpio-always-on;
 			reg = <0x48032000 0x1000>;
-			interrupts = <97>;
+			interrupts = <96>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
 		};
 
 		gpio2: gpio@4804c000 {
-			compatible = "ti,omap3-gpio";
+			compatible = "ti,omap4-gpio";
 			ti,hwmods = "gpio2";
+			ti,gpio-always-on;
 			reg = <0x4804c000 0x1000>;
-			interrupts = <99>;
+			interrupts = <98>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
 		};
 
 		gpmc: gpmc@50000000 {
diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts
index 3290a96ba586..b1bd06c6c2a8 100644
--- a/arch/arm/boot/dts/dra7-evm.dts
+++ b/arch/arm/boot/dts/dra7-evm.dts
@@ -263,17 +263,15 @@
 
 	dcan1_pins_default: dcan1_pins_default {
 		pinctrl-single,pins = <
-			0x3d0   (PIN_OUTPUT | MUX_MODE0) /* dcan1_tx */
-			0x3d4   (MUX_MODE15)		/* dcan1_rx.off */
-			0x418   (PULL_DIS | MUX_MODE1) /* wakeup0.dcan1_rx */
+			0x3d0   (PIN_OUTPUT_PULLUP | MUX_MODE0) /* dcan1_tx */
+			0x418   (PULL_UP | MUX_MODE1) /* wakeup0.dcan1_rx */
 		>;
 	};
 
 	dcan1_pins_sleep: dcan1_pins_sleep {
 		pinctrl-single,pins = <
-			0x3d0   (MUX_MODE15)	/* dcan1_tx.off */
-			0x3d4   (MUX_MODE15)	/* dcan1_rx.off */
-			0x418   (MUX_MODE15)	/* wakeup0.off */
+			0x3d0   (MUX_MODE15 | PULL_UP)	/* dcan1_tx.off */
+			0x418   (MUX_MODE15 | PULL_UP)	/* wakeup0.off */
 		>;
 	};
 };
@@ -446,7 +444,7 @@
 	status = "okay";
 	pinctrl-names = "default";
 	pinctrl-0 = <&uart1_pins>;
-	interrupts-extended = <&gic GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>,
+	interrupts-extended = <&crossbar_mpu GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>,
 			      <&dra7_pmx_core 0x3e0>;
 };
 
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 127608d79033..fe55938bc978 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -13,14 +13,13 @@
 #include "skeleton.dtsi"
 
 #define MAX_SOURCES 400
-#define DIRECT_IRQ(irq) (MAX_SOURCES + irq)
 
 / {
 	#address-cells = <1>;
 	#size-cells = <1>;
 
 	compatible = "ti,dra7xx";
-	interrupt-parent = <&gic>;
+	interrupt-parent = <&crossbar_mpu>;
 
 	aliases {
 		i2c0 = &i2c1;
@@ -50,18 +49,27 @@
 			     <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
 			     <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
 			     <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>;
+		interrupt-parent = <&gic>;
 	};
 
 	gic: interrupt-controller@48211000 {
 		compatible = "arm,cortex-a15-gic";
 		interrupt-controller;
 		#interrupt-cells = <3>;
-		arm,routable-irqs = <192>;
 		reg = <0x48211000 0x1000>,
 		      <0x48212000 0x1000>,
 		      <0x48214000 0x2000>,
 		      <0x48216000 0x2000>;
 		interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_HIGH)>;
+		interrupt-parent = <&gic>;
+	};
+
+	wakeupgen: interrupt-controller@48281000 {
+		compatible = "ti,omap5-wugen-mpu", "ti,omap4-wugen-mpu";
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		reg = <0x48281000 0x1000>;
+		interrupt-parent = <&gic>;
 	};
 
 	/*
@@ -91,8 +99,8 @@
 		ti,hwmods = "l3_main_1", "l3_main_2";
 		reg = <0x44000000 0x1000000>,
 		      <0x45000000 0x1000>;
-		interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
-			     <GIC_SPI DIRECT_IRQ(10) IRQ_TYPE_LEVEL_HIGH>;
+		interrupts-extended = <&crossbar_mpu GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
+				      <&wakeupgen GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
 
 		prm: prm@4ae06000 {
 			compatible = "ti,dra7-prm";
@@ -177,6 +185,18 @@
 			};
 		};
 
+		bandgap: bandgap@4a0021e0 {
+			reg = <0x4a0021e0 0xc
+				0x4a00232c 0xc
+				0x4a002380 0x2c
+				0x4a0023C0 0x3c
+				0x4a002564 0x8
+				0x4a002574 0x50>;
+				compatible = "ti,dra752-bandgap";
+				interrupts = <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>;
+				#thermal-sensor-cells = <1>;
+		};
+
 		cm_core_aon: cm_core_aon@4a005000 {
 			compatible = "ti,dra7-cm-core-aon";
 			reg = <0x4a005000 0x2000>;
@@ -344,7 +364,7 @@
 		uart1: serial@4806a000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x4806a000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts-extended = <&crossbar_mpu GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart1";
 			clock-frequency = <48000000>;
 			status = "disabled";
@@ -355,7 +375,7 @@
 		uart2: serial@4806c000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x4806c000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart2";
 			clock-frequency = <48000000>;
 			status = "disabled";
@@ -366,7 +386,7 @@
 		uart3: serial@48020000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x48020000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart3";
 			clock-frequency = <48000000>;
 			status = "disabled";
@@ -377,7 +397,7 @@
 		uart4: serial@4806e000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x4806e000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart4";
 			clock-frequency = <48000000>;
                         status = "disabled";
@@ -388,7 +408,7 @@
 		uart5: serial@48066000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x48066000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart5";
 			clock-frequency = <48000000>;
 			status = "disabled";
@@ -399,7 +419,7 @@
 		uart6: serial@48068000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x48068000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart6";
 			clock-frequency = <48000000>;
 			status = "disabled";
@@ -410,7 +430,7 @@
 		uart7: serial@48420000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x48420000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 218 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 218 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart7";
 			clock-frequency = <48000000>;
 			status = "disabled";
@@ -419,7 +439,7 @@
 		uart8: serial@48422000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x48422000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 219 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 219 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart8";
 			clock-frequency = <48000000>;
 			status = "disabled";
@@ -428,7 +448,7 @@
 		uart9: serial@48424000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x48424000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 220 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 220 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart9";
 			clock-frequency = <48000000>;
 			status = "disabled";
@@ -437,7 +457,7 @@
 		uart10: serial@4ae2b000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x4ae2b000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 221 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 221 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart10";
 			clock-frequency = <48000000>;
 			status = "disabled";
@@ -1111,7 +1131,6 @@
 					      "wkupclk", "refclk",
 					      "div-clk", "phy-div";
 				#phy-cells = <0>;
-				ti,hwmods = "pcie1-phy";
 			};
 
 			pcie2_phy: pciephy@4a095000 {
@@ -1130,7 +1149,6 @@
 					      "wkupclk", "refclk",
 					      "div-clk", "phy-div";
 				#phy-cells = <0>;
-				ti,hwmods = "pcie2-phy";
 				status = "disabled";
 			};
 		};
@@ -1337,9 +1355,12 @@
 			status = "disabled";
 		};
 
-		crossbar_mpu: crossbar@4a020000 {
+		crossbar_mpu: crossbar@4a002a48 {
 			compatible = "ti,irq-crossbar";
 			reg = <0x4a002a48 0x130>;
+			interrupt-controller;
+			interrupt-parent = <&wakeupgen>;
+			#interrupt-cells = <3>;
 			ti,max-irqs = <160>;
 			ti,max-crossbar-sources = <MAX_SOURCES>;
 			ti,reg-size = <2>;
@@ -1426,6 +1447,17 @@
 			status = "disabled";
 		};
 	};
+
+	thermal_zones: thermal-zones {
+		#include "omap4-cpu-thermal.dtsi"
+		#include "omap5-gpu-thermal.dtsi"
+		#include "omap5-core-thermal.dtsi"
+	};
+
+};
+
+&cpu_thermal {
+	polling-delay = <500>; /* milliseconds */
 };
 
 /include/ "dra7xx-clocks.dtsi"
diff --git a/arch/arm/boot/dts/dra72-evm.dts b/arch/arm/boot/dts/dra72-evm.dts
index e0264d0bf7b9..daf28110d487 100644
--- a/arch/arm/boot/dts/dra72-evm.dts
+++ b/arch/arm/boot/dts/dra72-evm.dts
@@ -119,17 +119,15 @@
 
 	dcan1_pins_default: dcan1_pins_default {
 		pinctrl-single,pins = <
-			0x3d0   (PIN_OUTPUT | MUX_MODE0) /* dcan1_tx */
-			0x3d4   (MUX_MODE15)		/* dcan1_rx.off */
-			0x418   (PULL_DIS | MUX_MODE1) /* wakeup0.dcan1_rx */
+			0x3d0   (PIN_OUTPUT_PULLUP | MUX_MODE0) /* dcan1_tx */
+			0x418   (PULL_UP | MUX_MODE1)	/* wakeup0.dcan1_rx */
 		>;
 	};
 
 	dcan1_pins_sleep: dcan1_pins_sleep {
 		pinctrl-single,pins = <
-			0x3d0   (MUX_MODE15)	/* dcan1_tx.off */
-			0x3d4   (MUX_MODE15)	/* dcan1_rx.off */
-			0x418   (MUX_MODE15)	/* wakeup0.off */
+			0x3d0   (MUX_MODE15 | PULL_UP)	/* dcan1_tx.off */
+			0x418   (MUX_MODE15 | PULL_UP)	/* wakeup0.off */
 		>;
 	};
 
@@ -160,7 +158,6 @@
 		pinctrl-0 = <&tps65917_pins_default>;
 
 		interrupts = <GIC_SPI 2 IRQ_TYPE_NONE>;  /* IRQ_SYS_1N */
-		interrupt-parent = <&gic>;
 		interrupt-controller;
 		#interrupt-cells = <2>;
 
diff --git a/arch/arm/boot/dts/dra72x.dtsi b/arch/arm/boot/dts/dra72x.dtsi
index e5a3d23a3df1..03d742f8d572 100644
--- a/arch/arm/boot/dts/dra72x.dtsi
+++ b/arch/arm/boot/dts/dra72x.dtsi
@@ -20,11 +20,17 @@
 			device_type = "cpu";
 			compatible = "arm,cortex-a15";
 			reg = <0>;
+
+			/* cooling options */
+			cooling-min-level = <0>;
+			cooling-max-level = <2>;
+			#cooling-cells = <2>; /* min followed by max */
 		};
 	};
 
 	pmu {
 		compatible = "arm,cortex-a15-pmu";
-		interrupts = <GIC_SPI DIRECT_IRQ(131) IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-parent = <&wakeupgen>;
+		interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>;
 	};
 };
diff --git a/arch/arm/boot/dts/dra74x.dtsi b/arch/arm/boot/dts/dra74x.dtsi
index 10173fab1a15..cc560a70926f 100644
--- a/arch/arm/boot/dts/dra74x.dtsi
+++ b/arch/arm/boot/dts/dra74x.dtsi
@@ -31,6 +31,11 @@
 			clock-names = "cpu";
 
 			clock-latency = <300000>; /* From omap-cpufreq driver */
+
+			/* cooling options */
+			cooling-min-level = <0>;
+			cooling-max-level = <2>;
+			#cooling-cells = <2>; /* min followed by max */
 		};
 		cpu@1 {
 			device_type = "cpu";
@@ -41,8 +46,9 @@
 
 	pmu {
 		compatible = "arm,cortex-a15-pmu";
-		interrupts = <GIC_SPI DIRECT_IRQ(131) IRQ_TYPE_LEVEL_HIGH>,
-			     <GIC_SPI DIRECT_IRQ(132) IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-parent = <&wakeupgen>;
+		interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>;
 	};
 
 	ocp {
diff --git a/arch/arm/boot/dts/dra7xx-clocks.dtsi b/arch/arm/boot/dts/dra7xx-clocks.dtsi
index 4bdcbd61ce47..99b09a44e269 100644
--- a/arch/arm/boot/dts/dra7xx-clocks.dtsi
+++ b/arch/arm/boot/dts/dra7xx-clocks.dtsi
@@ -243,10 +243,18 @@
 		ti,invert-autoidle-bit;
 	};
 
+	dpll_core_byp_mux: dpll_core_byp_mux {
+		#clock-cells = <0>;
+		compatible = "ti,mux-clock";
+		clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+		ti,bit-shift = <23>;
+		reg = <0x012c>;
+	};
+
 	dpll_core_ck: dpll_core_ck {
 		#clock-cells = <0>;
 		compatible = "ti,omap4-dpll-core-clock";
-		clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+		clocks = <&sys_clkin1>, <&dpll_core_byp_mux>;
 		reg = <0x0120>, <0x0124>, <0x012c>, <0x0128>;
 	};
 
@@ -309,10 +317,18 @@
 		clock-div = <1>;
 	};
 
+	dpll_dsp_byp_mux: dpll_dsp_byp_mux {
+		#clock-cells = <0>;
+		compatible = "ti,mux-clock";
+		clocks = <&sys_clkin1>, <&dsp_dpll_hs_clk_div>;
+		ti,bit-shift = <23>;
+		reg = <0x0240>;
+	};
+
 	dpll_dsp_ck: dpll_dsp_ck {
 		#clock-cells = <0>;
 		compatible = "ti,omap4-dpll-clock";
-		clocks = <&sys_clkin1>, <&dsp_dpll_hs_clk_div>;
+		clocks = <&sys_clkin1>, <&dpll_dsp_byp_mux>;
 		reg = <0x0234>, <0x0238>, <0x0240>, <0x023c>;
 	};
 
@@ -335,10 +351,18 @@
 		clock-div = <1>;
 	};
 
+	dpll_iva_byp_mux: dpll_iva_byp_mux {
+		#clock-cells = <0>;
+		compatible = "ti,mux-clock";
+		clocks = <&sys_clkin1>, <&iva_dpll_hs_clk_div>;
+		ti,bit-shift = <23>;
+		reg = <0x01ac>;
+	};
+
 	dpll_iva_ck: dpll_iva_ck {
 		#clock-cells = <0>;
 		compatible = "ti,omap4-dpll-clock";
-		clocks = <&sys_clkin1>, <&iva_dpll_hs_clk_div>;
+		clocks = <&sys_clkin1>, <&dpll_iva_byp_mux>;
 		reg = <0x01a0>, <0x01a4>, <0x01ac>, <0x01a8>;
 	};
 
@@ -361,10 +385,18 @@
 		clock-div = <1>;
 	};
 
+	dpll_gpu_byp_mux: dpll_gpu_byp_mux {
+		#clock-cells = <0>;
+		compatible = "ti,mux-clock";
+		clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+		ti,bit-shift = <23>;
+		reg = <0x02e4>;
+	};
+
 	dpll_gpu_ck: dpll_gpu_ck {
 		#clock-cells = <0>;
 		compatible = "ti,omap4-dpll-clock";
-		clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+		clocks = <&sys_clkin1>, <&dpll_gpu_byp_mux>;
 		reg = <0x02d8>, <0x02dc>, <0x02e4>, <0x02e0>;
 	};
 
@@ -398,10 +430,18 @@
 		clock-div = <1>;
 	};
 
+	dpll_ddr_byp_mux: dpll_ddr_byp_mux {
+		#clock-cells = <0>;
+		compatible = "ti,mux-clock";
+		clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+		ti,bit-shift = <23>;
+		reg = <0x021c>;
+	};
+
 	dpll_ddr_ck: dpll_ddr_ck {
 		#clock-cells = <0>;
 		compatible = "ti,omap4-dpll-clock";
-		clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+		clocks = <&sys_clkin1>, <&dpll_ddr_byp_mux>;
 		reg = <0x0210>, <0x0214>, <0x021c>, <0x0218>;
 	};
 
@@ -416,10 +456,18 @@
 		ti,invert-autoidle-bit;
 	};
 
+	dpll_gmac_byp_mux: dpll_gmac_byp_mux {
+		#clock-cells = <0>;
+		compatible = "ti,mux-clock";
+		clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+		ti,bit-shift = <23>;
+		reg = <0x02b4>;
+	};
+
 	dpll_gmac_ck: dpll_gmac_ck {
 		#clock-cells = <0>;
 		compatible = "ti,omap4-dpll-clock";
-		clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+		clocks = <&sys_clkin1>, <&dpll_gmac_byp_mux>;
 		reg = <0x02a8>, <0x02ac>, <0x02b4>, <0x02b0>;
 	};
 
@@ -482,10 +530,18 @@
 		clock-div = <1>;
 	};
 
+	dpll_eve_byp_mux: dpll_eve_byp_mux {
+		#clock-cells = <0>;
+		compatible = "ti,mux-clock";
+		clocks = <&sys_clkin1>, <&eve_dpll_hs_clk_div>;
+		ti,bit-shift = <23>;
+		reg = <0x0290>;
+	};
+
 	dpll_eve_ck: dpll_eve_ck {
 		#clock-cells = <0>;
 		compatible = "ti,omap4-dpll-clock";
-		clocks = <&sys_clkin1>, <&eve_dpll_hs_clk_div>;
+		clocks = <&sys_clkin1>, <&dpll_eve_byp_mux>;
 		reg = <0x0284>, <0x0288>, <0x0290>, <0x028c>;
 	};
 
@@ -1249,10 +1305,18 @@
 		clock-div = <1>;
 	};
 
+	dpll_per_byp_mux: dpll_per_byp_mux {
+		#clock-cells = <0>;
+		compatible = "ti,mux-clock";
+		clocks = <&sys_clkin1>, <&per_dpll_hs_clk_div>;
+		ti,bit-shift = <23>;
+		reg = <0x014c>;
+	};
+
 	dpll_per_ck: dpll_per_ck {
 		#clock-cells = <0>;
 		compatible = "ti,omap4-dpll-clock";
-		clocks = <&sys_clkin1>, <&per_dpll_hs_clk_div>;
+		clocks = <&sys_clkin1>, <&dpll_per_byp_mux>;
 		reg = <0x0140>, <0x0144>, <0x014c>, <0x0148>;
 	};
 
@@ -1275,10 +1339,18 @@
 		clock-div = <1>;
 	};
 
+	dpll_usb_byp_mux: dpll_usb_byp_mux {
+		#clock-cells = <0>;
+		compatible = "ti,mux-clock";
+		clocks = <&sys_clkin1>, <&usb_dpll_hs_clk_div>;
+		ti,bit-shift = <23>;
+		reg = <0x018c>;
+	};
+
 	dpll_usb_ck: dpll_usb_ck {
 		#clock-cells = <0>;
 		compatible = "ti,omap4-dpll-j-type-clock";
-		clocks = <&sys_clkin1>, <&usb_dpll_hs_clk_div>;
+		clocks = <&sys_clkin1>, <&dpll_usb_byp_mux>;
 		reg = <0x0180>, <0x0184>, <0x018c>, <0x0188>;
 	};
 
diff --git a/arch/arm/boot/dts/exynos3250.dtsi b/arch/arm/boot/dts/exynos3250.dtsi
index 277b48b0b6f9..14ab515aa83c 100644
--- a/arch/arm/boot/dts/exynos3250.dtsi
+++ b/arch/arm/boot/dts/exynos3250.dtsi
@@ -18,6 +18,7 @@
  */
 
 #include "skeleton.dtsi"
+#include "exynos4-cpu-thermal.dtsi"
 #include <dt-bindings/clock/exynos3250.h>
 
 / {
@@ -130,6 +131,9 @@
 		pmu_system_controller: system-controller@10020000 {
 			compatible = "samsung,exynos3250-pmu", "syscon";
 			reg = <0x10020000 0x4000>;
+			interrupt-controller;
+			#interrupt-cells = <3>;
+			interrupt-parent = <&gic>;
 		};
 
 		mipi_phy: video-phy@10020710 {
@@ -184,6 +188,7 @@
 			compatible = "samsung,exynos3250-rtc";
 			reg = <0x10070000 0x100>;
 			interrupts = <0 73 0>, <0 74 0>;
+			interrupt-parent = <&pmu_system_controller>;
 			status = "disabled";
 		};
 
@@ -193,6 +198,7 @@
 			interrupts = <0 216 0>;
 			clocks = <&cmu CLK_TMU_APBIF>;
 			clock-names = "tmu_apbif";
+			#include "exynos4412-tmu-sensor-conf.dtsi"
 			status = "disabled";
 		};
 
diff --git a/arch/arm/boot/dts/exynos4-cpu-thermal.dtsi b/arch/arm/boot/dts/exynos4-cpu-thermal.dtsi
new file mode 100644
index 000000000000..735cb2f10817
--- /dev/null
+++ b/arch/arm/boot/dts/exynos4-cpu-thermal.dtsi
@@ -0,0 +1,52 @@
+/*
+ * Device tree sources for Exynos4 thermal zone
+ *
+ * Copyright (c) 2014 Lukasz Majewski <l.majewski@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <dt-bindings/thermal/thermal.h>
+
+/ {
+thermal-zones {
+	cpu_thermal: cpu-thermal {
+		thermal-sensors = <&tmu 0>;
+		polling-delay-passive = <0>;
+		polling-delay = <0>;
+		trips {
+			cpu_alert0: cpu-alert-0 {
+				temperature = <70000>; /* millicelsius */
+				hysteresis = <10000>; /* millicelsius */
+				type = "active";
+			};
+			cpu_alert1: cpu-alert-1 {
+				temperature = <95000>; /* millicelsius */
+				hysteresis = <10000>; /* millicelsius */
+				type = "active";
+			};
+			cpu_alert2: cpu-alert-2 {
+				temperature = <110000>; /* millicelsius */
+				hysteresis = <10000>; /* millicelsius */
+				type = "active";
+			};
+			cpu_crit0: cpu-crit-0 {
+				temperature = <120000>; /* millicelsius */
+				hysteresis = <0>; /* millicelsius */
+				type = "critical";
+			};
+		};
+		cooling-maps {
+			map0 {
+				trip = <&cpu_alert0>;
+			};
+			map1 {
+				trip = <&cpu_alert1>;
+			};
+		};
+	};
+};
+};
diff --git a/arch/arm/boot/dts/exynos4.dtsi b/arch/arm/boot/dts/exynos4.dtsi
index 76173cacd450..e20cdc24c3bb 100644
--- a/arch/arm/boot/dts/exynos4.dtsi
+++ b/arch/arm/boot/dts/exynos4.dtsi
@@ -38,6 +38,7 @@
 		i2c5 = &i2c_5;
 		i2c6 = &i2c_6;
 		i2c7 = &i2c_7;
+		i2c8 = &i2c_8;
 		csis0 = &csis_0;
 		csis1 = &csis_1;
 		fimc0 = &fimc_0;
@@ -104,6 +105,7 @@
 		compatible = "samsung,exynos4210-pd";
 		reg = <0x10023C20 0x20>;
 		#power-domain-cells = <0>;
+		power-domains = <&pd_lcd0>;
 	};
 
 	pd_cam: cam-power-domain@10023C00 {
@@ -152,6 +154,9 @@
 	pmu_system_controller: system-controller@10020000 {
 		compatible = "samsung,exynos4210-pmu", "syscon";
 		reg = <0x10020000 0x4000>;
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		interrupt-parent = <&gic>;
 	};
 
 	dsi_0: dsi@11C80000 {
@@ -264,6 +269,7 @@
 	rtc@10070000 {
 		compatible = "samsung,s3c6410-rtc";
 		reg = <0x10070000 0x100>;
+		interrupt-parent = <&pmu_system_controller>;
 		interrupts = <0 44 0>, <0 45 0>;
 		clocks = <&clock CLK_RTC>;
 		clock-names = "rtc";
@@ -554,6 +560,22 @@
 		status = "disabled";
 	};
 
+	i2c_8: i2c@138E0000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "samsung,s3c2440-hdmiphy-i2c";
+		reg = <0x138E0000 0x100>;
+		interrupts = <0 93 0>;
+		clocks = <&clock CLK_I2C_HDMI>;
+		clock-names = "i2c";
+		status = "disabled";
+
+		hdmi_i2c_phy: hdmiphy@38 {
+			compatible = "exynos4210-hdmiphy";
+			reg = <0x38>;
+		};
+	};
+
 	spi_0: spi@13920000 {
 		compatible = "samsung,exynos4210-spi";
 		reg = <0x13920000 0x100>;
@@ -663,6 +685,33 @@
 		status = "disabled";
 	};
 
+	tmu: tmu@100C0000 {
+		#include "exynos4412-tmu-sensor-conf.dtsi"
+	};
+
+	hdmi: hdmi@12D00000 {
+		compatible = "samsung,exynos4210-hdmi";
+		reg = <0x12D00000 0x70000>;
+		interrupts = <0 92 0>;
+		clock-names = "hdmi", "sclk_hdmi", "sclk_pixel", "sclk_hdmiphy",
+			"mout_hdmi";
+		clocks = <&clock CLK_HDMI>, <&clock CLK_SCLK_HDMI>,
+			<&clock CLK_SCLK_PIXEL>, <&clock CLK_SCLK_HDMIPHY>,
+			<&clock CLK_MOUT_HDMI>;
+		phy = <&hdmi_i2c_phy>;
+		power-domains = <&pd_tv>;
+		samsung,syscon-phandle = <&pmu_system_controller>;
+		status = "disabled";
+	};
+
+	mixer: mixer@12C10000 {
+		compatible = "samsung,exynos4210-mixer";
+		interrupts = <0 91 0>;
+		reg = <0x12C10000 0x2100>, <0x12c00000 0x300>;
+		power-domains = <&pd_tv>;
+		status = "disabled";
+	};
+
 	ppmu_dmc0: ppmu_dmc0@106a0000 {
 		compatible = "samsung,exynos-ppmu";
 		reg = <0x106a0000 0x2000>;
diff --git a/arch/arm/boot/dts/exynos4210-trats.dts b/arch/arm/boot/dts/exynos4210-trats.dts
index 3d6652a4b6cb..32c5fd8f6269 100644
--- a/arch/arm/boot/dts/exynos4210-trats.dts
+++ b/arch/arm/boot/dts/exynos4210-trats.dts
@@ -426,6 +426,25 @@
 		status = "okay";
 	};
 
+	tmu@100C0000 {
+		status = "okay";
+	};
+
+	thermal-zones {
+		cpu_thermal: cpu-thermal {
+			cooling-maps {
+				map0 {
+				     /* Corresponds to 800MHz at freq_table */
+				     cooling-device = <&cpu0 2 2>;
+				};
+				map1 {
+				     /* Corresponds to 200MHz at freq_table */
+				     cooling-device = <&cpu0 4 4>;
+			       };
+		       };
+		};
+	};
+
 	camera {
 		pinctrl-names = "default";
 		pinctrl-0 = <>;
diff --git a/arch/arm/boot/dts/exynos4210-universal_c210.dts b/arch/arm/boot/dts/exynos4210-universal_c210.dts
index b57e6b82ea20..d4f2b11319dd 100644
--- a/arch/arm/boot/dts/exynos4210-universal_c210.dts
+++ b/arch/arm/boot/dts/exynos4210-universal_c210.dts
@@ -505,6 +505,63 @@
 			assigned-clock-rates = <0>, <160000000>;
 		};
 	};
+
+	hdmi_en: voltage-regulator-hdmi-5v {
+		compatible = "regulator-fixed";
+		regulator-name = "HDMI_5V";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		gpio = <&gpe0 1 0>;
+		enable-active-high;
+	};
+
+	hdmi_ddc: i2c-ddc {
+		compatible = "i2c-gpio";
+		gpios = <&gpe4 2 0 &gpe4 3 0>;
+		i2c-gpio,delay-us = <100>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		pinctrl-0 = <&i2c_ddc_bus>;
+		pinctrl-names = "default";
+		status = "okay";
+	};
+
+	mixer@12C10000 {
+		status = "okay";
+	};
+
+	hdmi@12D00000 {
+		hpd-gpio = <&gpx3 7 0>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&hdmi_hpd>;
+		hdmi-en-supply = <&hdmi_en>;
+		vdd-supply = <&ldo3_reg>;
+		vdd_osc-supply = <&ldo4_reg>;
+		vdd_pll-supply = <&ldo3_reg>;
+		ddc = <&hdmi_ddc>;
+		status = "okay";
+	};
+
+	i2c@138E0000 {
+		status = "okay";
+	};
+};
+
+&pinctrl_1 {
+	hdmi_hpd: hdmi-hpd {
+		samsung,pins = "gpx3-7";
+		samsung,pin-pud = <0>;
+	};
+};
+
+&pinctrl_0 {
+	i2c_ddc_bus: i2c-ddc-bus {
+		samsung,pins = "gpe4-2", "gpe4-3";
+		samsung,pin-function = <2>;
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <0>;
+	};
 };
 
 &mdma1 {
diff --git a/arch/arm/boot/dts/exynos4210.dtsi b/arch/arm/boot/dts/exynos4210.dtsi
index 67c832c9dcf1..be89f83f70e7 100644
--- a/arch/arm/boot/dts/exynos4210.dtsi
+++ b/arch/arm/boot/dts/exynos4210.dtsi
@@ -21,6 +21,7 @@
 
 #include "exynos4.dtsi"
 #include "exynos4210-pinctrl.dtsi"
+#include "exynos4-cpu-thermal.dtsi"
 
 / {
 	compatible = "samsung,exynos4210", "samsung,exynos4";
@@ -35,10 +36,13 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		cpu@900 {
+		cpu0: cpu@900 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <0x900>;
+			cooling-min-level = <4>;
+			cooling-max-level = <2>;
+			#cooling-cells = <2>; /* min followed by max */
 		};
 
 		cpu@901 {
@@ -153,16 +157,38 @@
 		reg = <0x03860000 0x1000>;
 	};
 
-	tmu@100C0000 {
+	tmu: tmu@100C0000 {
 		compatible = "samsung,exynos4210-tmu";
 		interrupt-parent = <&combiner>;
 		reg = <0x100C0000 0x100>;
 		interrupts = <2 4>;
 		clocks = <&clock CLK_TMU_APBIF>;
 		clock-names = "tmu_apbif";
+		samsung,tmu_gain = <15>;
+		samsung,tmu_reference_voltage = <7>;
 		status = "disabled";
 	};
 
+	thermal-zones {
+		cpu_thermal: cpu-thermal {
+			polling-delay-passive = <0>;
+			polling-delay = <0>;
+			thermal-sensors = <&tmu 0>;
+
+			trips {
+			      cpu_alert0: cpu-alert-0 {
+				      temperature = <85000>; /* millicelsius */
+			      };
+			      cpu_alert1: cpu-alert-1 {
+				      temperature = <100000>; /* millicelsius */
+			      };
+			      cpu_alert2: cpu-alert-2 {
+				      temperature = <110000>; /* millicelsius */
+			      };
+			};
+		};
+	};
+
 	g2d@12800000 {
 		compatible = "samsung,s5pv210-g2d";
 		reg = <0x12800000 0x1000>;
@@ -203,6 +229,14 @@
 		};
 	};
 
+	mixer: mixer@12C10000 {
+		clock-names = "mixer", "hdmi", "sclk_hdmi", "vp", "mout_mixer",
+			"sclk_mixer";
+		clocks = <&clock CLK_MIXER>, <&clock CLK_HDMI>,
+			<&clock CLK_SCLK_HDMI>, <&clock CLK_VP>,
+			<&clock CLK_MOUT_MIXER>, <&clock CLK_SCLK_MIXER>;
+	};
+
 	ppmu_lcd1: ppmu_lcd1@12240000 {
 		compatible = "samsung,exynos-ppmu";
 		reg = <0x12240000 0x2000>;
diff --git a/arch/arm/boot/dts/exynos4212.dtsi b/arch/arm/boot/dts/exynos4212.dtsi
index dd0a43ec56da..5be03288f1ee 100644
--- a/arch/arm/boot/dts/exynos4212.dtsi
+++ b/arch/arm/boot/dts/exynos4212.dtsi
@@ -26,10 +26,13 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		cpu@A00 {
+		cpu0: cpu@A00 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <0xA00>;
+			cooling-min-level = <13>;
+			cooling-max-level = <7>;
+			#cooling-cells = <2>; /* min followed by max */
 		};
 
 		cpu@A01 {
diff --git a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
index de80b5bba204..adb4f6a97a1d 100644
--- a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
+++ b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
@@ -249,6 +249,20 @@
 					regulator-always-on;
 				};
 
+				ldo8_reg: ldo@8 {
+					regulator-compatible = "LDO8";
+					regulator-name = "VDD10_HDMI_1.0V";
+					regulator-min-microvolt = <1000000>;
+					regulator-max-microvolt = <1000000>;
+				};
+
+				ldo10_reg: ldo@10 {
+					regulator-compatible = "LDO10";
+					regulator-name = "VDDQ_MIPIHSI_1.8V";
+					regulator-min-microvolt = <1800000>;
+					regulator-max-microvolt = <1800000>;
+				};
+
 				ldo11_reg: LDO11 {
 					regulator-name = "VDD18_ABB1_1.8V";
 					regulator-min-microvolt = <1800000>;
@@ -411,6 +425,51 @@
 	ehci: ehci@12580000 {
 		status = "okay";
 	};
+
+	tmu@100C0000 {
+		vtmu-supply = <&ldo10_reg>;
+		status = "okay";
+	};
+
+	thermal-zones {
+		cpu_thermal: cpu-thermal {
+			cooling-maps {
+				map0 {
+				     /* Corresponds to 800MHz at freq_table */
+				     cooling-device = <&cpu0 7 7>;
+				};
+				map1 {
+				     /* Corresponds to 200MHz at freq_table */
+				     cooling-device = <&cpu0 13 13>;
+			       };
+		       };
+		};
+	};
+
+	mixer: mixer@12C10000 {
+		status = "okay";
+	};
+
+	hdmi@12D00000 {
+		hpd-gpio = <&gpx3 7 0>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&hdmi_hpd>;
+		vdd-supply = <&ldo8_reg>;
+		vdd_osc-supply = <&ldo10_reg>;
+		vdd_pll-supply = <&ldo8_reg>;
+		ddc = <&hdmi_ddc>;
+		status = "okay";
+	};
+
+	hdmi_ddc: i2c@13880000 {
+		status = "okay";
+		pinctrl-names = "default";
+		pinctrl-0 = <&i2c2_bus>;
+	};
+
+	i2c@138E0000 {
+		status = "okay";
+	};
 };
 
 &pinctrl_1 {
@@ -425,4 +484,9 @@
 		samsung,pin-pud = <0>;
 		samsung,pin-drv = <0>;
 	};
+
+	hdmi_hpd: hdmi-hpd {
+		samsung,pins = "gpx3-7";
+		samsung,pin-pud = <1>;
+	};
 };
diff --git a/arch/arm/boot/dts/exynos4412-tmu-sensor-conf.dtsi b/arch/arm/boot/dts/exynos4412-tmu-sensor-conf.dtsi
new file mode 100644
index 000000000000..e3f7934d19d0
--- /dev/null
+++ b/arch/arm/boot/dts/exynos4412-tmu-sensor-conf.dtsi
@@ -0,0 +1,24 @@
+/*
+ * Device tree sources for Exynos4412 TMU sensor configuration
+ *
+ * Copyright (c) 2014 Lukasz Majewski <l.majewski@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <dt-bindings/thermal/thermal_exynos.h>
+
+#thermal-sensor-cells = <0>;
+samsung,tmu_gain = <8>;
+samsung,tmu_reference_voltage = <16>;
+samsung,tmu_noise_cancel_mode = <4>;
+samsung,tmu_efuse_value = <55>;
+samsung,tmu_min_efuse_value = <40>;
+samsung,tmu_max_efuse_value = <100>;
+samsung,tmu_first_point_trim = <25>;
+samsung,tmu_second_point_trim = <85>;
+samsung,tmu_default_temp_offset = <50>;
+samsung,tmu_cal_type = <TYPE_ONE_POINT_TRIMMING>;
diff --git a/arch/arm/boot/dts/exynos4412-trats2.dts b/arch/arm/boot/dts/exynos4412-trats2.dts
index 21f748083586..173ffa479ad3 100644
--- a/arch/arm/boot/dts/exynos4412-trats2.dts
+++ b/arch/arm/boot/dts/exynos4412-trats2.dts
@@ -927,6 +927,21 @@
 		pulldown-ohm = <100000>; /* 100K */
 		io-channels = <&adc 2>;  /* Battery temperature */
 	};
+
+	thermal-zones {
+		cpu_thermal: cpu-thermal {
+			cooling-maps {
+				map0 {
+				     /* Corresponds to 800MHz at freq_table */
+				     cooling-device = <&cpu0 7 7>;
+				};
+				map1 {
+				     /* Corresponds to 200MHz at freq_table */
+				     cooling-device = <&cpu0 13 13>;
+			       };
+		       };
+		};
+	};
 };
 
 &pmu_system_controller {
diff --git a/arch/arm/boot/dts/exynos4412.dtsi b/arch/arm/boot/dts/exynos4412.dtsi
index 0f6ec93bb1d8..68ad43b391ae 100644
--- a/arch/arm/boot/dts/exynos4412.dtsi
+++ b/arch/arm/boot/dts/exynos4412.dtsi
@@ -26,10 +26,13 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		cpu@A00 {
+		cpu0: cpu@A00 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <0xA00>;
+			cooling-min-level = <13>;
+			cooling-max-level = <7>;
+			#cooling-cells = <2>; /* min followed by max */
 		};
 
 		cpu@A01 {
diff --git a/arch/arm/boot/dts/exynos4x12.dtsi b/arch/arm/boot/dts/exynos4x12.dtsi
index f5e0ae780d6c..6a6abe14fd9b 100644
--- a/arch/arm/boot/dts/exynos4x12.dtsi
+++ b/arch/arm/boot/dts/exynos4x12.dtsi
@@ -19,6 +19,7 @@
 
 #include "exynos4.dtsi"
 #include "exynos4x12-pinctrl.dtsi"
+#include "exynos4-cpu-thermal.dtsi"
 
 / {
 	aliases {
@@ -297,4 +298,15 @@
 		clock-names = "tmu_apbif";
 		status = "disabled";
 	};
+
+	hdmi: hdmi@12D00000 {
+		compatible = "samsung,exynos4212-hdmi";
+	};
+
+	mixer: mixer@12C10000 {
+		compatible = "samsung,exynos4212-mixer";
+		clock-names = "mixer", "hdmi", "sclk_hdmi", "vp";
+		clocks = <&clock CLK_MIXER>, <&clock CLK_HDMI>,
+			 <&clock CLK_SCLK_HDMI>, <&clock CLK_VP>;
+	};
 };
diff --git a/arch/arm/boot/dts/exynos5250-spring.dts b/arch/arm/boot/dts/exynos5250-spring.dts
index f02775487cd4..d075a68ac078 100644
--- a/arch/arm/boot/dts/exynos5250-spring.dts
+++ b/arch/arm/boot/dts/exynos5250-spring.dts
@@ -429,7 +429,6 @@
 &mmc_0 {
 	status = "okay";
 	num-slots = <1>;
-	supports-highspeed;
 	broken-cd;
 	card-detect-delay = <200>;
 	samsung,dw-mshc-ciu-div = <3>;
@@ -437,11 +436,8 @@
 	samsung,dw-mshc-ddr-timing = <1 2>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&sd0_clk &sd0_cmd &sd0_cd &sd0_bus4 &sd0_bus8>;
-
-	slot@0 {
-		reg = <0>;
-		bus-width = <8>;
-	};
+	bus-width = <8>;
+	cap-mmc-highspeed;
 };
 
 /*
@@ -451,7 +447,6 @@
 &mmc_1 {
 	status = "okay";
 	num-slots = <1>;
-	supports-highspeed;
 	broken-cd;
 	card-detect-delay = <200>;
 	samsung,dw-mshc-ciu-div = <3>;
@@ -459,11 +454,8 @@
 	samsung,dw-mshc-ddr-timing = <1 2>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&sd1_clk &sd1_cmd &sd1_cd &sd1_bus4>;
-
-	slot@0 {
-		reg = <0>;
-		bus-width = <4>;
-	};
+	bus-width = <4>;
+	cap-sd-highspeed;
 };
 
 &pinctrl_0 {
@@ -490,7 +482,7 @@
 
 	power_key_irq: power-key-irq {
 		samsung,pins = "gpx1-3";
-		samsung,pin-function = <0>;
+		samsung,pin-function = <0xf>;
 		samsung,pin-pud = <0>;
 		samsung,pin-drv = <0>;
 	};
@@ -518,7 +510,7 @@
 
 	lid_irq: lid-irq {
 		samsung,pins = "gpx3-5";
-		samsung,pin-function = <0>;
+		samsung,pin-function = <0xf>;
 		samsung,pin-pud = <0>;
 		samsung,pin-drv = <0>;
 	};
diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi
index 9bb1b0b738f5..77f656eb8e6b 100644
--- a/arch/arm/boot/dts/exynos5250.dtsi
+++ b/arch/arm/boot/dts/exynos5250.dtsi
@@ -20,7 +20,7 @@
 #include <dt-bindings/clock/exynos5250.h>
 #include "exynos5.dtsi"
 #include "exynos5250-pinctrl.dtsi"
-
+#include "exynos4-cpu-thermal.dtsi"
 #include <dt-bindings/clock/exynos-audss-clk.h>
 
 / {
@@ -58,11 +58,14 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		cpu@0 {
+		cpu0: cpu@0 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a15";
 			reg = <0>;
 			clock-frequency = <1700000000>;
+			cooling-min-level = <15>;
+			cooling-max-level = <9>;
+			#cooling-cells = <2>; /* min followed by max */
 		};
 		cpu@1 {
 			device_type = "cpu";
@@ -102,6 +105,12 @@
 		#power-domain-cells = <0>;
 	};
 
+	pd_disp1: disp1-power-domain@100440A0 {
+		compatible = "samsung,exynos4210-pd";
+		reg = <0x100440A0 0x20>;
+		#power-domain-cells = <0>;
+	};
+
 	clock: clock-controller@10010000 {
 		compatible = "samsung,exynos5250-clock";
 		reg = <0x10010000 0x30000>;
@@ -196,6 +205,9 @@
 		clock-names = "clkout16";
 		clocks = <&clock CLK_FIN_PLL>;
 		#clock-cells = <1>;
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		interrupt-parent = <&gic>;
 	};
 
 	sysreg_system_controller: syscon@10050000 {
@@ -232,15 +244,36 @@
 	rtc: rtc@101E0000 {
 		clocks = <&clock CLK_RTC>;
 		clock-names = "rtc";
+		interrupt-parent = <&pmu_system_controller>;
 		status = "disabled";
 	};
 
-	tmu@10060000 {
+	tmu: tmu@10060000 {
 		compatible = "samsung,exynos5250-tmu";
 		reg = <0x10060000 0x100>;
 		interrupts = <0 65 0>;
 		clocks = <&clock CLK_TMU>;
 		clock-names = "tmu_apbif";
+		#include "exynos4412-tmu-sensor-conf.dtsi"
+	};
+
+	thermal-zones {
+		cpu_thermal: cpu-thermal {
+			polling-delay-passive = <0>;
+			polling-delay = <0>;
+			thermal-sensors = <&tmu 0>;
+
+			cooling-maps {
+				map0 {
+				     /* Corresponds to 800MHz at freq_table */
+				     cooling-device = <&cpu0 9 9>;
+				};
+				map1 {
+				     /* Corresponds to 200MHz at freq_table */
+				     cooling-device = <&cpu0 15 15>;
+			       };
+		       };
+		};
 	};
 
 	serial@12C00000 {
@@ -719,6 +752,7 @@
 	hdmi: hdmi {
 		compatible = "samsung,exynos4212-hdmi";
 		reg = <0x14530000 0x70000>;
+		power-domains = <&pd_disp1>;
 		interrupts = <0 95 0>;
 		clocks = <&clock CLK_HDMI>, <&clock CLK_SCLK_HDMI>,
 			 <&clock CLK_SCLK_PIXEL>, <&clock CLK_SCLK_HDMIPHY>,
@@ -731,9 +765,11 @@
 	mixer {
 		compatible = "samsung,exynos5250-mixer";
 		reg = <0x14450000 0x10000>;
+		power-domains = <&pd_disp1>;
 		interrupts = <0 94 0>;
-		clocks = <&clock CLK_MIXER>, <&clock CLK_SCLK_HDMI>;
-		clock-names = "mixer", "sclk_hdmi";
+		clocks = <&clock CLK_MIXER>, <&clock CLK_HDMI>,
+			 <&clock CLK_SCLK_HDMI>;
+		clock-names = "mixer", "hdmi", "sclk_hdmi";
 	};
 
 	dp_phy: video-phy@10040720 {
@@ -743,6 +779,7 @@
 	};
 
 	dp: dp-controller@145B0000 {
+		power-domains = <&pd_disp1>;
 		clocks = <&clock CLK_DP>;
 		clock-names = "dp";
 		phys = <&dp_phy>;
@@ -750,6 +787,7 @@
 	};
 
 	fimd: fimd@14400000 {
+		power-domains = <&pd_disp1>;
 		clocks = <&clock CLK_SCLK_FIMD1>, <&clock CLK_FIMD1>;
 		clock-names = "sclk_fimd", "fimd";
 	};
diff --git a/arch/arm/boot/dts/exynos5420-trip-points.dtsi b/arch/arm/boot/dts/exynos5420-trip-points.dtsi
new file mode 100644
index 000000000000..5d31fc140823
--- /dev/null
+++ b/arch/arm/boot/dts/exynos5420-trip-points.dtsi
@@ -0,0 +1,35 @@
+/*
+ * Device tree sources for default Exynos5420 thermal zone definition
+ *
+ * Copyright (c) 2014 Lukasz Majewski <l.majewski@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+polling-delay-passive = <0>;
+polling-delay = <0>;
+trips {
+	cpu-alert-0 {
+		temperature = <85000>; /* millicelsius */
+		hysteresis = <10000>; /* millicelsius */
+		type = "active";
+	};
+	cpu-alert-1 {
+		temperature = <103000>; /* millicelsius */
+		hysteresis = <10000>; /* millicelsius */
+		type = "active";
+	};
+	cpu-alert-2 {
+		temperature = <110000>; /* millicelsius */
+		hysteresis = <10000>; /* millicelsius */
+		type = "active";
+	};
+	cpu-crit-0 {
+		temperature = <1200000>; /* millicelsius */
+		hysteresis = <0>; /* millicelsius */
+		type = "critical";
+	};
+};
diff --git a/arch/arm/boot/dts/exynos5420.dtsi b/arch/arm/boot/dts/exynos5420.dtsi
index 9dc2e9773b30..b3d2d53820e3 100644
--- a/arch/arm/boot/dts/exynos5420.dtsi
+++ b/arch/arm/boot/dts/exynos5420.dtsi
@@ -327,6 +327,7 @@
 	rtc: rtc@101E0000 {
 		clocks = <&clock CLK_RTC>;
 		clock-names = "rtc";
+		interrupt-parent = <&pmu_system_controller>;
 		status = "disabled";
 	};
 
@@ -740,8 +741,9 @@
 		compatible = "samsung,exynos5420-mixer";
 		reg = <0x14450000 0x10000>;
 		interrupts = <0 94 0>;
-		clocks = <&clock CLK_MIXER>, <&clock CLK_SCLK_HDMI>;
-		clock-names = "mixer", "sclk_hdmi";
+		clocks = <&clock CLK_MIXER>, <&clock CLK_HDMI>,
+			 <&clock CLK_SCLK_HDMI>;
+		clock-names = "mixer", "hdmi", "sclk_hdmi";
 		power-domains = <&disp_pd>;
 	};
 
@@ -769,6 +771,9 @@
 		clock-names = "clkout16";
 		clocks = <&clock CLK_FIN_PLL>;
 		#clock-cells = <1>;
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		interrupt-parent = <&gic>;
 	};
 
 	sysreg_system_controller: syscon@10050000 {
@@ -782,6 +787,7 @@
 		interrupts = <0 65 0>;
 		clocks = <&clock CLK_TMU>;
 		clock-names = "tmu_apbif";
+		#include "exynos4412-tmu-sensor-conf.dtsi"
 	};
 
 	tmu_cpu1: tmu@10064000 {
@@ -790,6 +796,7 @@
 		interrupts = <0 183 0>;
 		clocks = <&clock CLK_TMU>;
 		clock-names = "tmu_apbif";
+		#include "exynos4412-tmu-sensor-conf.dtsi"
 	};
 
 	tmu_cpu2: tmu@10068000 {
@@ -798,6 +805,7 @@
 		interrupts = <0 184 0>;
 		clocks = <&clock CLK_TMU>, <&clock CLK_TMU>;
 		clock-names = "tmu_apbif", "tmu_triminfo_apbif";
+		#include "exynos4412-tmu-sensor-conf.dtsi"
 	};
 
 	tmu_cpu3: tmu@1006c000 {
@@ -806,6 +814,7 @@
 		interrupts = <0 185 0>;
 		clocks = <&clock CLK_TMU>, <&clock CLK_TMU_GPU>;
 		clock-names = "tmu_apbif", "tmu_triminfo_apbif";
+		#include "exynos4412-tmu-sensor-conf.dtsi"
 	};
 
 	tmu_gpu: tmu@100a0000 {
@@ -814,6 +823,30 @@
 		interrupts = <0 215 0>;
 		clocks = <&clock CLK_TMU_GPU>, <&clock CLK_TMU>;
 		clock-names = "tmu_apbif", "tmu_triminfo_apbif";
+		#include "exynos4412-tmu-sensor-conf.dtsi"
+	};
+
+	thermal-zones {
+		cpu0_thermal: cpu0-thermal {
+			thermal-sensors = <&tmu_cpu0>;
+			#include "exynos5420-trip-points.dtsi"
+		};
+		cpu1_thermal: cpu1-thermal {
+		       thermal-sensors = <&tmu_cpu1>;
+		       #include "exynos5420-trip-points.dtsi"
+		};
+		cpu2_thermal: cpu2-thermal {
+		       thermal-sensors = <&tmu_cpu2>;
+		       #include "exynos5420-trip-points.dtsi"
+		};
+		cpu3_thermal: cpu3-thermal {
+		       thermal-sensors = <&tmu_cpu3>;
+		       #include "exynos5420-trip-points.dtsi"
+		};
+		gpu_thermal: gpu-thermal {
+		       thermal-sensors = <&tmu_gpu>;
+		       #include "exynos5420-trip-points.dtsi"
+		};
 	};
 
         watchdog: watchdog@101D0000 {
diff --git a/arch/arm/boot/dts/exynos5440-tmu-sensor-conf.dtsi b/arch/arm/boot/dts/exynos5440-tmu-sensor-conf.dtsi
new file mode 100644
index 000000000000..7b2fba0ae92b
--- /dev/null
+++ b/arch/arm/boot/dts/exynos5440-tmu-sensor-conf.dtsi
@@ -0,0 +1,24 @@
+/*
+ * Device tree sources for Exynos5440 TMU sensor configuration
+ *
+ * Copyright (c) 2014 Lukasz Majewski <l.majewski@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <dt-bindings/thermal/thermal_exynos.h>
+
+#thermal-sensor-cells = <0>;
+samsung,tmu_gain = <5>;
+samsung,tmu_reference_voltage = <16>;
+samsung,tmu_noise_cancel_mode = <4>;
+samsung,tmu_efuse_value = <0x5d2d>;
+samsung,tmu_min_efuse_value = <16>;
+samsung,tmu_max_efuse_value = <76>;
+samsung,tmu_first_point_trim = <25>;
+samsung,tmu_second_point_trim = <70>;
+samsung,tmu_default_temp_offset = <25>;
+samsung,tmu_cal_type = <TYPE_ONE_POINT_TRIMMING>;
diff --git a/arch/arm/boot/dts/exynos5440-trip-points.dtsi b/arch/arm/boot/dts/exynos5440-trip-points.dtsi
new file mode 100644
index 000000000000..48adfa8f4300
--- /dev/null
+++ b/arch/arm/boot/dts/exynos5440-trip-points.dtsi
@@ -0,0 +1,25 @@
+/*
+ * Device tree sources for default Exynos5440 thermal zone definition
+ *
+ * Copyright (c) 2014 Lukasz Majewski <l.majewski@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+polling-delay-passive = <0>;
+polling-delay = <0>;
+trips {
+	cpu-alert-0 {
+		temperature = <100000>; /* millicelsius */
+		hysteresis = <0>; /* millicelsius */
+		type = "active";
+	};
+	cpu-crit-0 {
+		temperature = <1050000>; /* millicelsius */
+		hysteresis = <0>; /* millicelsius */
+		type = "critical";
+	};
+};
diff --git a/arch/arm/boot/dts/exynos5440.dtsi b/arch/arm/boot/dts/exynos5440.dtsi
index 8f3373cd7b87..59d9416b3b03 100644
--- a/arch/arm/boot/dts/exynos5440.dtsi
+++ b/arch/arm/boot/dts/exynos5440.dtsi
@@ -219,6 +219,7 @@
 		interrupts = <0 58 0>;
 		clocks = <&clock CLK_B_125>;
 		clock-names = "tmu_apbif";
+		#include "exynos5440-tmu-sensor-conf.dtsi"
 	};
 
 	tmuctrl_1: tmuctrl@16011C {
@@ -227,6 +228,7 @@
 		interrupts = <0 58 0>;
 		clocks = <&clock CLK_B_125>;
 		clock-names = "tmu_apbif";
+		#include "exynos5440-tmu-sensor-conf.dtsi"
 	};
 
 	tmuctrl_2: tmuctrl@160120 {
@@ -235,6 +237,22 @@
 		interrupts = <0 58 0>;
 		clocks = <&clock CLK_B_125>;
 		clock-names = "tmu_apbif";
+		#include "exynos5440-tmu-sensor-conf.dtsi"
+	};
+
+	thermal-zones {
+		cpu0_thermal: cpu0-thermal {
+			thermal-sensors = <&tmuctrl_0>;
+			#include "exynos5440-trip-points.dtsi"
+		};
+		cpu1_thermal: cpu1-thermal {
+		       thermal-sensors = <&tmuctrl_1>;
+		       #include "exynos5440-trip-points.dtsi"
+		};
+		cpu2_thermal: cpu2-thermal {
+		       thermal-sensors = <&tmuctrl_2>;
+		       #include "exynos5440-trip-points.dtsi"
+		};
 	};
 
 	sata@210000 {
diff --git a/arch/arm/boot/dts/hip04.dtsi b/arch/arm/boot/dts/hip04.dtsi
index 238814596a87..44044f275115 100644
--- a/arch/arm/boot/dts/hip04.dtsi
+++ b/arch/arm/boot/dts/hip04.dtsi
@@ -275,7 +275,6 @@
 		compatible = "arm,coresight-etb10", "arm,primecell";
 		reg = <0 0xe3c42000 0 0x1000>;
 
-		coresight-default-sink;
 		clocks = <&clk_375m>;
 		clock-names = "apb_pclk";
 		port {
diff --git a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
index f1cd2147421d..a626e6dd8022 100644
--- a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
@@ -35,6 +35,7 @@
 			regulator-max-microvolt = <5000000>;
 			gpio = <&gpio3 22 0>;
 			enable-active-high;
+			vin-supply = <&swbst_reg>;
 		};
 
 		reg_usb_h1_vbus: regulator@1 {
@@ -45,6 +46,7 @@
 			regulator-max-microvolt = <5000000>;
 			gpio = <&gpio1 29 0>;
 			enable-active-high;
+			vin-supply = <&swbst_reg>;
 		};
 
 		reg_audio: regulator@2 {
diff --git a/arch/arm/boot/dts/imx6sl-evk.dts b/arch/arm/boot/dts/imx6sl-evk.dts
index fda4932faefd..945887d3fdb3 100644
--- a/arch/arm/boot/dts/imx6sl-evk.dts
+++ b/arch/arm/boot/dts/imx6sl-evk.dts
@@ -52,6 +52,7 @@
 			regulator-max-microvolt = <5000000>;
 			gpio = <&gpio4 0 0>;
 			enable-active-high;
+			vin-supply = <&swbst_reg>;
 		};
 
 		reg_usb_otg2_vbus: regulator@1 {
@@ -62,6 +63,7 @@
 			regulator-max-microvolt = <5000000>;
 			gpio = <&gpio4 2 0>;
 			enable-active-high;
+			vin-supply = <&swbst_reg>;
 		};
 
 		reg_aud3v: regulator@2 {
diff --git a/arch/arm/boot/dts/omap3-beagle-xm.dts b/arch/arm/boot/dts/omap3-beagle-xm.dts
index 25f7b0a22114..8cdca51b6984 100644
--- a/arch/arm/boot/dts/omap3-beagle-xm.dts
+++ b/arch/arm/boot/dts/omap3-beagle-xm.dts
@@ -150,7 +150,6 @@
 		compatible = "arm,coresight-etb10", "arm,primecell";
 		reg = <0x5401b000 0x1000>;
 
-		coresight-default-sink;
 		clocks = <&emu_src_ck>;
 		clock-names = "apb_pclk";
 		port {
diff --git a/arch/arm/boot/dts/omap3-beagle.dts b/arch/arm/boot/dts/omap3-beagle.dts
index c792391ef090..6d4c46be8c39 100644
--- a/arch/arm/boot/dts/omap3-beagle.dts
+++ b/arch/arm/boot/dts/omap3-beagle.dts
@@ -145,7 +145,6 @@
 		compatible = "arm,coresight-etb10", "arm,primecell";
 		reg = <0x5401b000 0x1000>;
 
-		coresight-default-sink;
 		clocks = <&emu_src_ck>;
 		clock-names = "apb_pclk";
 		port {
diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts
index db80f9d376fa..2cab149b191c 100644
--- a/arch/arm/boot/dts/omap3-n900.dts
+++ b/arch/arm/boot/dts/omap3-n900.dts
@@ -609,6 +609,58 @@
 	pinctrl-0 = <&i2c3_pins>;
 
 	clock-frequency = <400000>;
+
+	lis302dl: lis3lv02d@1d {
+		compatible = "st,lis3lv02d";
+		reg = <0x1d>;
+
+		Vdd-supply = <&vaux1>;
+		Vdd_IO-supply = <&vio>;
+
+		interrupt-parent = <&gpio6>;
+		interrupts = <21 20>; /* 181 and 180 */
+
+		/* click flags */
+		st,click-single-x;
+		st,click-single-y;
+		st,click-single-z;
+
+		/* Limits are 0.5g * value */
+		st,click-threshold-x = <8>;
+		st,click-threshold-y = <8>;
+		st,click-threshold-z = <10>;
+
+		/* Click must be longer than time limit */
+		st,click-time-limit = <9>;
+
+		/* Kind of debounce filter */
+		st,click-latency = <50>;
+
+		/* Interrupt line 2 for click detection */
+		st,irq2-click;
+
+		st,wakeup-x-hi;
+		st,wakeup-y-hi;
+		st,wakeup-threshold = <(800/18)>; /* millig-value / 18 to get HW values */
+
+		st,wakeup2-z-hi;
+		st,wakeup2-threshold = <(900/18)>; /* millig-value / 18 to get HW values */
+
+		st,hipass1-disable;
+		st,hipass2-disable;
+
+		st,axis-x = <1>;    /* LIS3_DEV_X */
+		st,axis-y = <(-2)>; /* LIS3_INV_DEV_Y */
+		st,axis-z = <(-3)>; /* LIS3_INV_DEV_Z */
+
+		st,min-limit-x = <(-32)>;
+		st,min-limit-y = <3>;
+		st,min-limit-z = <3>;
+
+		st,max-limit-x = <(-3)>;
+		st,max-limit-y = <32>;
+		st,max-limit-z = <32>;
+	};
 };
 
 &mmc1 {
diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi
index f4f78c40b564..3fdc84fddb70 100644
--- a/arch/arm/boot/dts/omap3.dtsi
+++ b/arch/arm/boot/dts/omap3.dtsi
@@ -92,6 +92,8 @@
 			ti,hwmods = "aes";
 			reg = <0x480c5000 0x50>;
 			interrupts = <0>;
+			dmas = <&sdma 65 &sdma 66>;
+			dma-names = "tx", "rx";
 		};
 
 		prm: prm@48306000 {
@@ -550,6 +552,8 @@
 			ti,hwmods = "sham";
 			reg = <0x480c3000 0x64>;
 			interrupts = <49>;
+			dmas = <&sdma 69>;
+			dma-names = "rx";
 		};
 
 		smartreflex_core: smartreflex@480cb000 {
diff --git a/arch/arm/boot/dts/omap4-cpu-thermal.dtsi b/arch/arm/boot/dts/omap4-cpu-thermal.dtsi
index cb9458feb2e3..ab7f87ae96f0 100644
--- a/arch/arm/boot/dts/omap4-cpu-thermal.dtsi
+++ b/arch/arm/boot/dts/omap4-cpu-thermal.dtsi
@@ -18,7 +18,7 @@ cpu_thermal: cpu_thermal {
 			/* sensor       ID */
         thermal-sensors = <&bandgap     0>;
 
-        trips {
+	cpu_trips: trips {
                 cpu_alert0: cpu_alert {
                         temperature = <100000>; /* millicelsius */
                         hysteresis = <2000>; /* millicelsius */
@@ -31,7 +31,7 @@ cpu_thermal: cpu_thermal {
                 };
         };
 
-	cooling-maps {
+	cpu_cooling_maps: cooling-maps {
 		map0 {
 			trip = <&cpu_alert0>;
 			cooling-device =
diff --git a/arch/arm/boot/dts/omap4-duovero.dtsi b/arch/arm/boot/dts/omap4-duovero.dtsi
index e860ccd9d09c..f2a94fa62552 100644
--- a/arch/arm/boot/dts/omap4-duovero.dtsi
+++ b/arch/arm/boot/dts/omap4-duovero.dtsi
@@ -173,14 +173,12 @@
 	twl: twl@48 {
 		reg = <0x48>;
 		interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;		/* IRQ_SYS_1N cascaded to gic */
-		interrupt-parent = <&gic>;
 	};
 
 	twl6040: twl@4b {
 		compatible = "ti,twl6040";
 		reg = <0x4b>;
 		interrupts = <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>;		/* IRQ_SYS_2N cascaded to gic */
-		interrupt-parent = <&gic>;
 		ti,audpwron-gpio = <&gpio6 0 GPIO_ACTIVE_HIGH>;		/* gpio_160 */
 
 		vio-supply = <&v1v8>;
diff --git a/arch/arm/boot/dts/omap4-panda-common.dtsi b/arch/arm/boot/dts/omap4-panda-common.dtsi
index 150513506c19..7c15fb2e2fe4 100644
--- a/arch/arm/boot/dts/omap4-panda-common.dtsi
+++ b/arch/arm/boot/dts/omap4-panda-common.dtsi
@@ -372,7 +372,6 @@
 		reg = <0x48>;
 		/* IRQ# = 7 */
 		interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>; /* IRQ_SYS_1N cascaded to gic */
-		interrupt-parent = <&gic>;
 	};
 
 	twl6040: twl@4b {
@@ -384,7 +383,6 @@
 
 		/* IRQ# = 119 */
 		interrupts = <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>; /* IRQ_SYS_2N cascaded to gic */
-		interrupt-parent = <&gic>;
 		ti,audpwron-gpio = <&gpio4 31 GPIO_ACTIVE_HIGH>;  /* gpio line 127 */
 
 		vio-supply = <&v1v8>;
@@ -479,17 +477,17 @@
 };
 
 &uart2 {
-	interrupts-extended = <&gic GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH
+	interrupts-extended = <&wakeupgen GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH
 			       &omap4_pmx_core OMAP4_UART2_RX>;
 };
 
 &uart3 {
-	interrupts-extended = <&gic GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH
+	interrupts-extended = <&wakeupgen GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH
 			       &omap4_pmx_core OMAP4_UART3_RX>;
 };
 
 &uart4 {
-	interrupts-extended = <&gic GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH
+	interrupts-extended = <&wakeupgen GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH
 			       &omap4_pmx_core OMAP4_UART4_RX>;
 };
 
diff --git a/arch/arm/boot/dts/omap4-sdp.dts b/arch/arm/boot/dts/omap4-sdp.dts
index 3e1da43068f6..8aca8dae968a 100644
--- a/arch/arm/boot/dts/omap4-sdp.dts
+++ b/arch/arm/boot/dts/omap4-sdp.dts
@@ -363,7 +363,6 @@
 		reg = <0x48>;
 		/* SPI = 0, IRQ# = 7, 4 = active high level-sensitive */
 		interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>; /* IRQ_SYS_1N cascaded to gic */
-		interrupt-parent = <&gic>;
 	};
 
 	twl6040: twl@4b {
@@ -375,7 +374,6 @@
 
 		/* SPI = 0, IRQ# = 119, 4 = active high level-sensitive */
 		interrupts = <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>; /* IRQ_SYS_2N cascaded to gic */
-		interrupt-parent = <&gic>;
 		ti,audpwron-gpio = <&gpio4 31 0>;  /* gpio line 127 */
 
 		vio-supply = <&v1v8>;
@@ -570,21 +568,21 @@
 };
 
 &uart2 {
-	interrupts-extended = <&gic GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH
+	interrupts-extended = <&wakeupgen GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH
 			       &omap4_pmx_core OMAP4_UART2_RX>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&uart2_pins>;
 };
 
 &uart3 {
-	interrupts-extended = <&gic GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH
+	interrupts-extended = <&wakeupgen GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH
 			       &omap4_pmx_core OMAP4_UART3_RX>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&uart3_pins>;
 };
 
 &uart4 {
-	interrupts-extended = <&gic GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH
+	interrupts-extended = <&wakeupgen GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH
 			       &omap4_pmx_core OMAP4_UART4_RX>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&uart4_pins>;
diff --git a/arch/arm/boot/dts/omap4-var-som-om44.dtsi b/arch/arm/boot/dts/omap4-var-som-om44.dtsi
index 062701e1a898..a4f1ba2e1903 100644
--- a/arch/arm/boot/dts/omap4-var-som-om44.dtsi
+++ b/arch/arm/boot/dts/omap4-var-som-om44.dtsi
@@ -185,7 +185,6 @@
 		reg = <0x48>;
 		/* SPI = 0, IRQ# = 7, 4 = active high level-sensitive */
 		interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>; /* IRQ_SYS_1N cascaded to gic */
-		interrupt-parent = <&gic>;
 	};
 
 	twl6040: twl@4b {
@@ -197,7 +196,6 @@
 
 		/* SPI = 0, IRQ# = 119, 4 = active high level-sensitive */
 		interrupts = <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>; /* IRQ_SYS_2N cascaded to gic */
-		interrupt-parent = <&gic>;
 		ti,audpwron-gpio = <&gpio6 22 0>; /* gpio 182 */
 
 		vio-supply = <&v1v8>;
diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi
index 87401d9f4d8b..f2091d1c9c36 100644
--- a/arch/arm/boot/dts/omap4.dtsi
+++ b/arch/arm/boot/dts/omap4.dtsi
@@ -14,7 +14,7 @@
 
 / {
 	compatible = "ti,omap4430", "ti,omap4";
-	interrupt-parent = <&gic>;
+	interrupt-parent = <&wakeupgen>;
 
 	aliases {
 		i2c0 = &i2c1;
@@ -56,6 +56,7 @@
 		#interrupt-cells = <3>;
 		reg = <0x48241000 0x1000>,
 		      <0x48240100 0x0100>;
+		interrupt-parent = <&gic>;
 	};
 
 	L2: l2-cache-controller@48242000 {
@@ -70,6 +71,15 @@
 		clocks = <&mpu_periphclk>;
 		reg = <0x48240600 0x20>;
 		interrupts = <GIC_PPI 13 (GIC_CPU_MASK_RAW(3) | IRQ_TYPE_LEVEL_HIGH)>;
+		interrupt-parent = <&gic>;
+	};
+
+	wakeupgen: interrupt-controller@48281000 {
+		compatible = "ti,omap4-wugen-mpu";
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		reg = <0x48281000 0x1000>;
+		interrupt-parent = <&gic>;
 	};
 
 	/*
@@ -319,7 +329,7 @@
 		uart2: serial@4806c000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x4806c000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart2";
 			clock-frequency = <48000000>;
 		};
@@ -327,7 +337,7 @@
 		uart3: serial@48020000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x48020000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart3";
 			clock-frequency = <48000000>;
 		};
@@ -335,7 +345,7 @@
 		uart4: serial@4806e000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x4806e000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart4";
 			clock-frequency = <48000000>;
 		};
diff --git a/arch/arm/boot/dts/omap5-cm-t54.dts b/arch/arm/boot/dts/omap5-cm-t54.dts
index b54b271e153b..61ad2ea34720 100644
--- a/arch/arm/boot/dts/omap5-cm-t54.dts
+++ b/arch/arm/boot/dts/omap5-cm-t54.dts
@@ -412,7 +412,6 @@
 	palmas: palmas@48 {
 		compatible = "ti,palmas";
 		interrupts = <GIC_SPI 7 IRQ_TYPE_NONE>; /* IRQ_SYS_1N */
-		interrupt-parent = <&gic>;
 		reg = <0x48>;
 		interrupt-controller;
 		#interrupt-cells = <2>;
diff --git a/arch/arm/boot/dts/omap5-core-thermal.dtsi b/arch/arm/boot/dts/omap5-core-thermal.dtsi
index 19212ac6eef0..de8a3d456cf7 100644
--- a/arch/arm/boot/dts/omap5-core-thermal.dtsi
+++ b/arch/arm/boot/dts/omap5-core-thermal.dtsi
@@ -13,7 +13,7 @@
 
 core_thermal: core_thermal {
 	polling-delay-passive = <250>; /* milliseconds */
-	polling-delay = <1000>; /* milliseconds */
+	polling-delay = <500>; /* milliseconds */
 
 			/* sensor       ID */
 	thermal-sensors = <&bandgap     2>;
diff --git a/arch/arm/boot/dts/omap5-gpu-thermal.dtsi b/arch/arm/boot/dts/omap5-gpu-thermal.dtsi
index 1b87aca88b77..bc3090f2e84b 100644
--- a/arch/arm/boot/dts/omap5-gpu-thermal.dtsi
+++ b/arch/arm/boot/dts/omap5-gpu-thermal.dtsi
@@ -13,7 +13,7 @@
 
 gpu_thermal: gpu_thermal {
 	polling-delay-passive = <250>; /* milliseconds */
-	polling-delay = <1000>; /* milliseconds */
+	polling-delay = <500>; /* milliseconds */
 
 			/* sensor       ID */
 	thermal-sensors = <&bandgap     1>;
diff --git a/arch/arm/boot/dts/omap5-uevm.dts b/arch/arm/boot/dts/omap5-uevm.dts
index 159720d6c956..74777a6e200a 100644
--- a/arch/arm/boot/dts/omap5-uevm.dts
+++ b/arch/arm/boot/dts/omap5-uevm.dts
@@ -311,7 +311,6 @@
 	palmas: palmas@48 {
 		compatible = "ti,palmas";
 		interrupts = <GIC_SPI 7 IRQ_TYPE_NONE>; /* IRQ_SYS_1N */
-		interrupt-parent = <&gic>;
 		reg = <0x48>;
 		interrupt-controller;
 		#interrupt-cells = <2>;
@@ -521,7 +520,6 @@
 		pinctrl-0 = <&twl6040_pins>;
 
 		interrupts = <GIC_SPI 119 IRQ_TYPE_NONE>; /* IRQ_SYS_2N cascaded to gic */
-		interrupt-parent = <&gic>;
 		ti,audpwron-gpio = <&gpio5 13 0>;  /* gpio line 141 */
 
 		vio-supply = <&smps7_reg>;
diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi
index ddff674bd05e..77b5f70d0ebc 100644
--- a/arch/arm/boot/dts/omap5.dtsi
+++ b/arch/arm/boot/dts/omap5.dtsi
@@ -18,7 +18,7 @@
 	#size-cells = <1>;
 
 	compatible = "ti,omap5";
-	interrupt-parent = <&gic>;
+	interrupt-parent = <&wakeupgen>;
 
 	aliases {
 		i2c0 = &i2c1;
@@ -79,6 +79,7 @@
 			     <GIC_PPI 14 (GIC_CPU_MASK_RAW(3) | IRQ_TYPE_LEVEL_LOW)>,
 			     <GIC_PPI 11 (GIC_CPU_MASK_RAW(3) | IRQ_TYPE_LEVEL_LOW)>,
 			     <GIC_PPI 10 (GIC_CPU_MASK_RAW(3) | IRQ_TYPE_LEVEL_LOW)>;
+		interrupt-parent = <&gic>;
 	};
 
 	pmu {
@@ -95,6 +96,15 @@
 		      <0x48212000 0x1000>,
 		      <0x48214000 0x2000>,
 		      <0x48216000 0x2000>;
+		interrupt-parent = <&gic>;
+	};
+
+	wakeupgen: interrupt-controller@48281000 {
+		compatible = "ti,omap5-wugen-mpu", "ti,omap4-wugen-mpu";
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		reg = <0x48281000 0x1000>;
+		interrupt-parent = <&gic>;
 	};
 
 	/*
@@ -458,7 +468,7 @@
 		uart1: serial@4806a000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x4806a000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart1";
 			clock-frequency = <48000000>;
 		};
@@ -466,7 +476,7 @@
 		uart2: serial@4806c000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x4806c000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart2";
 			clock-frequency = <48000000>;
 		};
@@ -474,7 +484,7 @@
 		uart3: serial@48020000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x48020000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart3";
 			clock-frequency = <48000000>;
 		};
@@ -482,7 +492,7 @@
 		uart4: serial@4806e000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x4806e000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart4";
 			clock-frequency = <48000000>;
 		};
@@ -490,7 +500,7 @@
 		uart5: serial@48066000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x48066000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart5";
 			clock-frequency = <48000000>;
 		};
@@ -498,7 +508,7 @@
 		uart6: serial@48068000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x48068000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart6";
 			clock-frequency = <48000000>;
 		};
@@ -883,14 +893,12 @@
 			usbhsohci: ohci@4a064800 {
 				compatible = "ti,ohci-omap3";
 				reg = <0x4a064800 0x400>;
-				interrupt-parent = <&gic>;
 				interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
 			};
 
 			usbhsehci: ehci@4a064c00 {
 				compatible = "ti,ehci-omap";
 				reg = <0x4a064c00 0x400>;
-				interrupt-parent = <&gic>;
 				interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
 			};
 		};
@@ -1079,4 +1087,8 @@
 	};
 };
 
+&cpu_thermal {
+	polling-delay = <500>; /* milliseconds */
+};
+
 /include/ "omap54xx-clocks.dtsi"
diff --git a/arch/arm/boot/dts/omap54xx-clocks.dtsi b/arch/arm/boot/dts/omap54xx-clocks.dtsi
index 58c27466f012..83b425fb3ac2 100644
--- a/arch/arm/boot/dts/omap54xx-clocks.dtsi
+++ b/arch/arm/boot/dts/omap54xx-clocks.dtsi
@@ -167,10 +167,18 @@
 		ti,index-starts-at-one;
 	};
 
+	dpll_core_byp_mux: dpll_core_byp_mux {
+		#clock-cells = <0>;
+		compatible = "ti,mux-clock";
+		clocks = <&sys_clkin>, <&dpll_abe_m3x2_ck>;
+		ti,bit-shift = <23>;
+		reg = <0x012c>;
+	};
+
 	dpll_core_ck: dpll_core_ck {
 		#clock-cells = <0>;
 		compatible = "ti,omap4-dpll-core-clock";
-		clocks = <&sys_clkin>, <&dpll_abe_m3x2_ck>;
+		clocks = <&sys_clkin>, <&dpll_core_byp_mux>;
 		reg = <0x0120>, <0x0124>, <0x012c>, <0x0128>;
 	};
 
@@ -294,10 +302,18 @@
 		clock-div = <1>;
 	};
 
+	dpll_iva_byp_mux: dpll_iva_byp_mux {
+		#clock-cells = <0>;
+		compatible = "ti,mux-clock";
+		clocks = <&sys_clkin>, <&iva_dpll_hs_clk_div>;
+		ti,bit-shift = <23>;
+		reg = <0x01ac>;
+	};
+
 	dpll_iva_ck: dpll_iva_ck {
 		#clock-cells = <0>;
 		compatible = "ti,omap4-dpll-clock";
-		clocks = <&sys_clkin>, <&iva_dpll_hs_clk_div>;
+		clocks = <&sys_clkin>, <&dpll_iva_byp_mux>;
 		reg = <0x01a0>, <0x01a4>, <0x01ac>, <0x01a8>;
 	};
 
@@ -599,10 +615,19 @@
 	};
 };
 &cm_core_clocks {
+
+	dpll_per_byp_mux: dpll_per_byp_mux {
+		#clock-cells = <0>;
+		compatible = "ti,mux-clock";
+		clocks = <&sys_clkin>, <&per_dpll_hs_clk_div>;
+		ti,bit-shift = <23>;
+		reg = <0x014c>;
+	};
+
 	dpll_per_ck: dpll_per_ck {
 		#clock-cells = <0>;
 		compatible = "ti,omap4-dpll-clock";
-		clocks = <&sys_clkin>, <&per_dpll_hs_clk_div>;
+		clocks = <&sys_clkin>, <&dpll_per_byp_mux>;
 		reg = <0x0140>, <0x0144>, <0x014c>, <0x0148>;
 	};
 
@@ -714,10 +739,18 @@
 		ti,index-starts-at-one;
 	};
 
+	dpll_usb_byp_mux: dpll_usb_byp_mux {
+		#clock-cells = <0>;
+		compatible = "ti,mux-clock";
+		clocks = <&sys_clkin>, <&usb_dpll_hs_clk_div>;
+		ti,bit-shift = <23>;
+		reg = <0x018c>;
+	};
+
 	dpll_usb_ck: dpll_usb_ck {
 		#clock-cells = <0>;
 		compatible = "ti,omap4-dpll-j-type-clock";
-		clocks = <&sys_clkin>, <&usb_dpll_hs_clk_div>;
+		clocks = <&sys_clkin>, <&dpll_usb_byp_mux>;
 		reg = <0x0180>, <0x0184>, <0x018c>, <0x0188>;
 	};
 
diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index d771f687a13b..eccc78d3220b 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -411,6 +411,7 @@
 			"mac_clk_rx", "mac_clk_tx",
 			"clk_mac_ref", "clk_mac_refout",
 			"aclk_mac", "pclk_mac";
+		status = "disabled";
 	};
 
 	usb_host0_ehci: usb@ff500000 {
diff --git a/arch/arm/boot/dts/sama5d3_emac.dtsi b/arch/arm/boot/dts/sama5d3_emac.dtsi
index fe2af9276312..b4544cf11bad 100644
--- a/arch/arm/boot/dts/sama5d3_emac.dtsi
+++ b/arch/arm/boot/dts/sama5d3_emac.dtsi
@@ -41,7 +41,7 @@
 			};
 
 			macb1: ethernet@f802c000 {
-				compatible = "cdns,at32ap7000-macb", "cdns,macb";
+				compatible = "cdns,at91sam9260-macb", "cdns,macb";
 				reg = <0xf802c000 0x100>;
 				interrupts = <35 IRQ_TYPE_LEVEL_HIGH 3>;
 				pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/socfpga.dtsi b/arch/arm/boot/dts/socfpga.dtsi
index 252c3d1bda50..d9176e606173 100644
--- a/arch/arm/boot/dts/socfpga.dtsi
+++ b/arch/arm/boot/dts/socfpga.dtsi
@@ -660,7 +660,7 @@
 			#address-cells = <1>;
 			#size-cells = <0>;
 			reg = <0xfff01000 0x1000>;
-			interrupts = <0 156 4>;
+			interrupts = <0 155 4>;
 			num-cs = <4>;
 			clocks = <&spi_m_clk>;
 			status = "disabled";
@@ -713,6 +713,9 @@
 			reg-shift = <2>;
 			reg-io-width = <4>;
 			clocks = <&l4_sp_clk>;
+			dmas = <&pdma 28>,
+			       <&pdma 29>;
+			dma-names = "tx", "rx";
 		};
 
 		uart1: serial1@ffc03000 {
@@ -722,6 +725,9 @@
 			reg-shift = <2>;
 			reg-io-width = <4>;
 			clocks = <&l4_sp_clk>;
+			dmas = <&pdma 30>,
+			       <&pdma 31>;
+			dma-names = "tx", "rx";
 		};
 
 		rst: rstmgr@ffd05000 {
diff --git a/arch/arm/boot/dts/stih416.dtsi b/arch/arm/boot/dts/stih416.dtsi
index ea28ebadab1a..eeb7afecbbe6 100644
--- a/arch/arm/boot/dts/stih416.dtsi
+++ b/arch/arm/boot/dts/stih416.dtsi
@@ -10,7 +10,7 @@
 #include "stih416-clock.dtsi"
 #include "stih416-pinctrl.dtsi"
 
-#include <dt-bindings/phy/phy-miphy365x.h>
+#include <dt-bindings/phy/phy.h>
 #include <dt-bindings/interrupt-controller/arm-gic.h>
 #include <dt-bindings/reset-controller/stih416-resets.h>
 / {
@@ -306,7 +306,7 @@
 			reg             = <0xfe380000 0x1000>;
 			interrupts      = <GIC_SPI 157 IRQ_TYPE_NONE>;
 			interrupt-names = "hostc";
-			phys	        = <&phy_port0 MIPHY_TYPE_SATA>;
+			phys	        = <&phy_port0 PHY_TYPE_SATA>;
 			phy-names       = "sata-phy";
 			resets	        = <&powerdown STIH416_SATA0_POWERDOWN>,
 					  <&softreset STIH416_SATA0_SOFTRESET>;
diff --git a/arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts b/arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts
index ab7891c43231..75742f8f96f3 100644
--- a/arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts
+++ b/arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts
@@ -56,6 +56,22 @@
 	model = "Olimex A10-OLinuXino-LIME";
 	compatible = "olimex,a10-olinuxino-lime", "allwinner,sun4i-a10";
 
+	cpus {
+		cpu0: cpu@0 {
+			/*
+			 * The A10-Lime is known to be unstable
+			 * when running at 1008 MHz
+			 */
+			operating-points = <
+				/* kHz    uV */
+				912000  1350000
+				864000  1300000
+				624000  1250000
+				>;
+			cooling-max-level = <2>;
+		};
+	};
+
 	soc@01c00000 {
 		emac: ethernet@01c0b000 {
 			pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
index 5c2925831f20..eebb7853e00b 100644
--- a/arch/arm/boot/dts/sun4i-a10.dtsi
+++ b/arch/arm/boot/dts/sun4i-a10.dtsi
@@ -75,7 +75,6 @@
 			clock-latency = <244144>; /* 8 32k periods */
 			operating-points = <
 				/* kHz    uV */
-				1056000 1500000
 				1008000 1400000
 				912000  1350000
 				864000  1300000
@@ -83,7 +82,7 @@
 				>;
 			#cooling-cells = <2>;
 			cooling-min-level = <0>;
-			cooling-max-level = <4>;
+			cooling-max-level = <3>;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi
index f8818f1edbbe..883cb4873688 100644
--- a/arch/arm/boot/dts/sun5i-a13.dtsi
+++ b/arch/arm/boot/dts/sun5i-a13.dtsi
@@ -47,7 +47,6 @@
 			clock-latency = <244144>; /* 8 32k periods */
 			operating-points = <
 				/* kHz    uV */
-				1104000	1500000
 				1008000 1400000
 				912000  1350000
 				864000  1300000
@@ -57,7 +56,7 @@
 				>;
 			#cooling-cells = <2>;
 			cooling-min-level = <0>;
-			cooling-max-level = <6>;
+			cooling-max-level = <5>;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi
index 3a8530b79f1c..fdd181792b4b 100644
--- a/arch/arm/boot/dts/sun7i-a20.dtsi
+++ b/arch/arm/boot/dts/sun7i-a20.dtsi
@@ -105,7 +105,6 @@
 			clock-latency = <244144>; /* 8 32k periods */
 			operating-points = <
 				/* kHz    uV */
-				1008000 1450000
 				960000  1400000
 				912000  1400000
 				864000  1300000
@@ -116,7 +115,7 @@
 				>;
 			#cooling-cells = <2>;
 			cooling-min-level = <0>;
-			cooling-max-level = <7>;
+			cooling-max-level = <6>;
 		};
 
 		cpu@1 {
diff --git a/arch/arm/boot/dts/tegra114.dtsi b/arch/arm/boot/dts/tegra114.dtsi
index 4296b5398bf5..f58a3d9d5f13 100644
--- a/arch/arm/boot/dts/tegra114.dtsi
+++ b/arch/arm/boot/dts/tegra114.dtsi
@@ -8,7 +8,7 @@
 
 / {
 	compatible = "nvidia,tegra114";
-	interrupt-parent = <&gic>;
+	interrupt-parent = <&lic>;
 
 	host1x@50000000 {
 		compatible = "nvidia,tegra114-host1x", "simple-bus";
@@ -134,6 +134,19 @@
 		      <0x50046000 0x2000>;
 		interrupts = <GIC_PPI 9
 			(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
+		interrupt-parent = <&gic>;
+	};
+
+	lic: interrupt-controller@60004000 {
+		compatible = "nvidia,tegra114-ictlr", "nvidia,tegra30-ictlr";
+		reg = <0x60004000 0x100>,
+		      <0x60004100 0x50>,
+		      <0x60004200 0x50>,
+		      <0x60004300 0x50>,
+		      <0x60004400 0x50>;
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		interrupt-parent = <&gic>;
 	};
 
 	timer@60005000 {
@@ -766,5 +779,6 @@
 				(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
 			<GIC_PPI 10
 				(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
+		interrupt-parent = <&gic>;
 	};
 };
diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi
index 4be06c6ea0c8..db85695aa7aa 100644
--- a/arch/arm/boot/dts/tegra124.dtsi
+++ b/arch/arm/boot/dts/tegra124.dtsi
@@ -10,7 +10,7 @@
 
 / {
 	compatible = "nvidia,tegra124";
-	interrupt-parent = <&gic>;
+	interrupt-parent = <&lic>;
 	#address-cells = <2>;
 	#size-cells = <2>;
 
@@ -173,6 +173,7 @@
 		      <0x0 0x50046000 0x0 0x2000>;
 		interrupts = <GIC_PPI 9
 			(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
+		interrupt-parent = <&gic>;
 	};
 
 	gpu@0,57000000 {
@@ -190,6 +191,18 @@
 		status = "disabled";
 	};
 
+	lic: interrupt-controller@60004000 {
+		compatible = "nvidia,tegra124-ictlr", "nvidia,tegra30-ictlr";
+		reg = <0x0 0x60004000 0x0 0x100>,
+		      <0x0 0x60004100 0x0 0x100>,
+		      <0x0 0x60004200 0x0 0x100>,
+		      <0x0 0x60004300 0x0 0x100>,
+		      <0x0 0x60004400 0x0 0x100>;
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		interrupt-parent = <&gic>;
+	};
+
 	timer@0,60005000 {
 		compatible = "nvidia,tegra124-timer", "nvidia,tegra20-timer";
 		reg = <0x0 0x60005000 0x0 0x400>;
@@ -955,5 +968,6 @@
 				(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
 			     <GIC_PPI 10
 				(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
+		interrupt-parent = <&gic>;
 	};
 };
diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
index e5527f742696..adf6b048d0bb 100644
--- a/arch/arm/boot/dts/tegra20.dtsi
+++ b/arch/arm/boot/dts/tegra20.dtsi
@@ -7,7 +7,7 @@
 
 / {
 	compatible = "nvidia,tegra20";
-	interrupt-parent = <&intc>;
+	interrupt-parent = <&lic>;
 
 	host1x@50000000 {
 		compatible = "nvidia,tegra20-host1x", "simple-bus";
@@ -142,6 +142,7 @@
 
 	timer@50040600 {
 		compatible = "arm,cortex-a9-twd-timer";
+		interrupt-parent = <&intc>;
 		reg = <0x50040600 0x20>;
 		interrupts = <GIC_PPI 13
 			(GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_HIGH)>;
@@ -154,6 +155,7 @@
 		       0x50040100 0x0100>;
 		interrupt-controller;
 		#interrupt-cells = <3>;
+		interrupt-parent = <&intc>;
 	};
 
 	cache-controller@50043000 {
@@ -165,6 +167,17 @@
 		cache-level = <2>;
 	};
 
+	lic: interrupt-controller@60004000 {
+		compatible = "nvidia,tegra20-ictlr";
+		reg = <0x60004000 0x100>,
+		      <0x60004100 0x50>,
+		      <0x60004200 0x50>,
+		      <0x60004300 0x50>;
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		interrupt-parent = <&intc>;
+	};
+
 	timer@60005000 {
 		compatible = "nvidia,tegra20-timer";
 		reg = <0x60005000 0x60>;
diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi
index db4810df142c..60e205a0f63d 100644
--- a/arch/arm/boot/dts/tegra30.dtsi
+++ b/arch/arm/boot/dts/tegra30.dtsi
@@ -8,7 +8,7 @@
 
 / {
 	compatible = "nvidia,tegra30";
-	interrupt-parent = <&intc>;
+	interrupt-parent = <&lic>;
 
 	pcie-controller@00003000 {
 		compatible = "nvidia,tegra30-pcie";
@@ -228,6 +228,7 @@
 	timer@50040600 {
 		compatible = "arm,cortex-a9-twd-timer";
 		reg = <0x50040600 0x20>;
+		interrupt-parent = <&intc>;
 		interrupts = <GIC_PPI 13
 			(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
 		clocks = <&tegra_car TEGRA30_CLK_TWD>;
@@ -239,6 +240,7 @@
 		       0x50040100 0x0100>;
 		interrupt-controller;
 		#interrupt-cells = <3>;
+		interrupt-parent = <&intc>;
 	};
 
 	cache-controller@50043000 {
@@ -250,6 +252,18 @@
 		cache-level = <2>;
 	};
 
+	lic: interrupt-controller@60004000 {
+		compatible = "nvidia,tegra30-ictlr";
+		reg = <0x60004000 0x100>,
+		      <0x60004100 0x50>,
+		      <0x60004200 0x50>,
+		      <0x60004300 0x50>,
+		      <0x60004400 0x50>;
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		interrupt-parent = <&intc>;
+	};
+
 	timer@60005000 {
 		compatible = "nvidia,tegra30-timer", "nvidia,tegra20-timer";
 		reg = <0x60005000 0x400>;
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
index 33920df03640..7a2aeacd62c0 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
@@ -362,7 +362,6 @@
 		compatible = "arm,coresight-etb10", "arm,primecell";
 		reg = <0 0x20010000 0 0x1000>;
 
-		coresight-default-sink;
 		clocks = <&oscclk6a>;
 		clock-names = "apb_pclk";
 		port {
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 6eaddc47c43d..37dc0fe1093f 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -151,8 +151,6 @@ static int bL_switch_to(unsigned int new_cluster_id)
 	unsigned int mpidr, this_cpu, that_cpu;
 	unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster;
 	struct completion inbound_alive;
-	struct tick_device *tdev;
-	enum clock_event_mode tdev_mode;
 	long volatile *handshake_ptr;
 	int ipi_nr, ret;
 
@@ -219,13 +217,7 @@ static int bL_switch_to(unsigned int new_cluster_id)
 	/* redirect GIC's SGIs to our counterpart */
 	gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]);
 
-	tdev = tick_get_device(this_cpu);
-	if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu)))
-		tdev = NULL;
-	if (tdev) {
-		tdev_mode = tdev->evtdev->mode;
-		clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN);
-	}
+	tick_suspend_local();
 
 	ret = cpu_pm_enter();
 
@@ -251,11 +243,7 @@ static int bL_switch_to(unsigned int new_cluster_id)
 
 	ret = cpu_pm_exit();
 
-	if (tdev) {
-		clockevents_set_mode(tdev->evtdev, tdev_mode);
-		clockevents_program_event(tdev->evtdev,
-					  tdev->evtdev->next_event, 1);
-	}
+	tick_resume_local();
 
 	trace_cpu_migrate_finish(ktime_get_real_ns(), ib_mpidr);
 	local_fiq_enable();
diff --git a/arch/arm/configs/badge4_defconfig b/arch/arm/configs/badge4_defconfig
index 0494c8f229a2..d59009878312 100644
--- a/arch/arm/configs/badge4_defconfig
+++ b/arch/arm/configs/badge4_defconfig
@@ -12,7 +12,6 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
 CONFIG_FPE_NWFPE=y
 CONFIG_BINFMT_AOUT=m
 CONFIG_BINFMT_MISC=m
-CONFIG_ARTHUR=m
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index b7e6b6fba5e0..06075b6d2463 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -99,7 +99,7 @@ CONFIG_PCI_RCAR_GEN2=y
 CONFIG_PCI_RCAR_GEN2_PCIE=y
 CONFIG_PCIEPORTBUS=y
 CONFIG_SMP=y
-CONFIG_NR_CPUS=8
+CONFIG_NR_CPUS=16
 CONFIG_HIGHPTE=y
 CONFIG_CMA=y
 CONFIG_ARM_APPENDED_DTB=y
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index a097cffa1231..8e108599e1af 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -377,6 +377,7 @@ CONFIG_PWM_TWL=m
 CONFIG_PWM_TWL_LED=m
 CONFIG_OMAP_USB2=m
 CONFIG_TI_PIPE3=y
+CONFIG_TWL4030_USB=m
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
 # CONFIG_EXT3_FS_XATTR is not set
diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig
index 38840a812924..8f6a5702b696 100644
--- a/arch/arm/configs/sunxi_defconfig
+++ b/arch/arm/configs/sunxi_defconfig
@@ -4,6 +4,7 @@ CONFIG_BLK_DEV_INITRD=y
 CONFIG_PERF_EVENTS=y
 CONFIG_ARCH_SUNXI=y
 CONFIG_SMP=y
+CONFIG_NR_CPUS=8
 CONFIG_AEABI=y
 CONFIG_HIGHMEM=y
 CONFIG_HIGHPTE=y
diff --git a/arch/arm/configs/vexpress_defconfig b/arch/arm/configs/vexpress_defconfig
index f489fdaa19b8..37fe607a4ede 100644
--- a/arch/arm/configs/vexpress_defconfig
+++ b/arch/arm/configs/vexpress_defconfig
@@ -118,8 +118,8 @@ CONFIG_HID_ZEROPLUS=y
 CONFIG_USB=y
 CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
 CONFIG_USB_MON=y
-CONFIG_USB_ISP1760_HCD=y
 CONFIG_USB_STORAGE=y
+CONFIG_USB_ISP1760=y
 CONFIG_MMC=y
 CONFIG_MMC_ARMMMCI=y
 CONFIG_NEW_LEDS=y
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
new file mode 100644
index 000000000000..8da2207b0072
--- /dev/null
+++ b/arch/arm/crypto/Kconfig
@@ -0,0 +1,130 @@
+
+menuconfig ARM_CRYPTO
+	bool "ARM Accelerated Cryptographic Algorithms"
+	depends on ARM
+	help
+	  Say Y here to choose from a selection of cryptographic algorithms
+	  implemented using ARM specific CPU features or instructions.
+
+if ARM_CRYPTO
+
+config CRYPTO_SHA1_ARM
+	tristate "SHA1 digest algorithm (ARM-asm)"
+	select CRYPTO_SHA1
+	select CRYPTO_HASH
+	help
+	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
+	  using optimized ARM assembler.
+
+config CRYPTO_SHA1_ARM_NEON
+	tristate "SHA1 digest algorithm (ARM NEON)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_SHA1_ARM
+	select CRYPTO_SHA1
+	select CRYPTO_HASH
+	help
+	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
+	  using optimized ARM NEON assembly, when NEON instructions are
+	  available.
+
+config CRYPTO_SHA1_ARM_CE
+	tristate "SHA1 digest algorithm (ARM v8 Crypto Extensions)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_SHA1_ARM
+	select CRYPTO_HASH
+	help
+	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
+	  using special ARMv8 Crypto Extensions.
+
+config CRYPTO_SHA2_ARM_CE
+	tristate "SHA-224/256 digest algorithm (ARM v8 Crypto Extensions)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_SHA256_ARM
+	select CRYPTO_HASH
+	help
+	  SHA-256 secure hash standard (DFIPS 180-2) implemented
+	  using special ARMv8 Crypto Extensions.
+
+config CRYPTO_SHA256_ARM
+	tristate "SHA-224/256 digest algorithm (ARM-asm and NEON)"
+	select CRYPTO_HASH
+	depends on !CPU_V7M
+	help
+	  SHA-256 secure hash standard (DFIPS 180-2) implemented
+	  using optimized ARM assembler and NEON, when available.
+
+config CRYPTO_SHA512_ARM_NEON
+	tristate "SHA384 and SHA512 digest algorithm (ARM NEON)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_SHA512
+	select CRYPTO_HASH
+	help
+	  SHA-512 secure hash standard (DFIPS 180-2) implemented
+	  using ARM NEON instructions, when available.
+
+	  This version of SHA implements a 512 bit hash with 256 bits of
+	  security against collision attacks.
+
+	  This code also includes SHA-384, a 384 bit hash with 192 bits
+	  of security against collision attacks.
+
+config CRYPTO_AES_ARM
+	tristate "AES cipher algorithms (ARM-asm)"
+	depends on ARM
+	select CRYPTO_ALGAPI
+	select CRYPTO_AES
+	help
+	  Use optimized AES assembler routines for ARM platforms.
+
+	  AES cipher algorithms (FIPS-197). AES uses the Rijndael
+	  algorithm.
+
+	  Rijndael appears to be consistently a very good performer in
+	  both hardware and software across a wide range of computing
+	  environments regardless of its use in feedback or non-feedback
+	  modes. Its key setup time is excellent, and its key agility is
+	  good. Rijndael's very low memory requirements make it very well
+	  suited for restricted-space environments, in which it also
+	  demonstrates excellent performance. Rijndael's operations are
+	  among the easiest to defend against power and timing attacks.
+
+	  The AES specifies three key sizes: 128, 192 and 256 bits
+
+	  See <http://csrc.nist.gov/encryption/aes/> for more information.
+
+config CRYPTO_AES_ARM_BS
+	tristate "Bit sliced AES using NEON instructions"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_ALGAPI
+	select CRYPTO_AES_ARM
+	select CRYPTO_ABLK_HELPER
+	help
+	  Use a faster and more secure NEON based implementation of AES in CBC,
+	  CTR and XTS modes
+
+	  Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode
+	  and for XTS mode encryption, CBC and XTS mode decryption speedup is
+	  around 25%. (CBC encryption speed is not affected by this driver.)
+	  This implementation does not rely on any lookup tables so it is
+	  believed to be invulnerable to cache timing attacks.
+
+config CRYPTO_AES_ARM_CE
+	tristate "Accelerated AES using ARMv8 Crypto Extensions"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_ALGAPI
+	select CRYPTO_ABLK_HELPER
+	help
+	  Use an implementation of AES in CBC, CTR and XTS modes that uses
+	  ARMv8 Crypto Extensions
+
+config CRYPTO_GHASH_ARM_CE
+	tristate "PMULL-accelerated GHASH using ARMv8 Crypto Extensions"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_HASH
+	select CRYPTO_CRYPTD
+	help
+	  Use an implementation of GHASH (used by the GCM AEAD chaining mode)
+	  that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
+	  that is part of the ARMv8 Crypto Extensions
+
+endif
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index b48fa341648d..6ea828241fcb 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -6,13 +6,35 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
 obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
 obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
 obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
+obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
 obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o
 
+ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
+ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
+ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
+ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
+
+ifneq ($(ce-obj-y)$(ce-obj-m),)
+ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y)
+obj-y += $(ce-obj-y)
+obj-m += $(ce-obj-m)
+else
+$(warning These ARMv8 Crypto Extensions modules need binutils 2.23 or higher)
+$(warning $(ce-obj-y) $(ce-obj-m))
+endif
+endif
+
 aes-arm-y	:= aes-armv4.o aes_glue.o
 aes-arm-bs-y	:= aesbs-core.o aesbs-glue.o
 sha1-arm-y	:= sha1-armv4-large.o sha1_glue.o
 sha1-arm-neon-y	:= sha1-armv7-neon.o sha1_neon_glue.o
+sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
+sha256-arm-y	:= sha256-core.o sha256_glue.o $(sha256-arm-neon-y)
 sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o
+sha1-arm-ce-y	:= sha1-ce-core.o sha1-ce-glue.o
+sha2-arm-ce-y	:= sha2-ce-core.o sha2-ce-glue.o
+aes-arm-ce-y	:= aes-ce-core.o aes-ce-glue.o
+ghash-arm-ce-y	:= ghash-ce-core.o ghash-ce-glue.o
 
 quiet_cmd_perl = PERL    $@
       cmd_perl = $(PERL) $(<) > $(@)
@@ -20,4 +42,7 @@ quiet_cmd_perl = PERL    $@
 $(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl
 	$(call cmd,perl)
 
-.PRECIOUS: $(obj)/aesbs-core.S
+$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
+	$(call cmd,perl)
+
+.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S
diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S
new file mode 100644
index 000000000000..8cfa468ee570
--- /dev/null
+++ b/arch/arm/crypto/aes-ce-core.S
@@ -0,0 +1,518 @@
+/*
+ * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.text
+	.fpu		crypto-neon-fp-armv8
+	.align		3
+
+	.macro		enc_round, state, key
+	aese.8		\state, \key
+	aesmc.8		\state, \state
+	.endm
+
+	.macro		dec_round, state, key
+	aesd.8		\state, \key
+	aesimc.8	\state, \state
+	.endm
+
+	.macro		enc_dround, key1, key2
+	enc_round	q0, \key1
+	enc_round	q0, \key2
+	.endm
+
+	.macro		dec_dround, key1, key2
+	dec_round	q0, \key1
+	dec_round	q0, \key2
+	.endm
+
+	.macro		enc_fround, key1, key2, key3
+	enc_round	q0, \key1
+	aese.8		q0, \key2
+	veor		q0, q0, \key3
+	.endm
+
+	.macro		dec_fround, key1, key2, key3
+	dec_round	q0, \key1
+	aesd.8		q0, \key2
+	veor		q0, q0, \key3
+	.endm
+
+	.macro		enc_dround_3x, key1, key2
+	enc_round	q0, \key1
+	enc_round	q1, \key1
+	enc_round	q2, \key1
+	enc_round	q0, \key2
+	enc_round	q1, \key2
+	enc_round	q2, \key2
+	.endm
+
+	.macro		dec_dround_3x, key1, key2
+	dec_round	q0, \key1
+	dec_round	q1, \key1
+	dec_round	q2, \key1
+	dec_round	q0, \key2
+	dec_round	q1, \key2
+	dec_round	q2, \key2
+	.endm
+
+	.macro		enc_fround_3x, key1, key2, key3
+	enc_round	q0, \key1
+	enc_round	q1, \key1
+	enc_round	q2, \key1
+	aese.8		q0, \key2
+	aese.8		q1, \key2
+	aese.8		q2, \key2
+	veor		q0, q0, \key3
+	veor		q1, q1, \key3
+	veor		q2, q2, \key3
+	.endm
+
+	.macro		dec_fround_3x, key1, key2, key3
+	dec_round	q0, \key1
+	dec_round	q1, \key1
+	dec_round	q2, \key1
+	aesd.8		q0, \key2
+	aesd.8		q1, \key2
+	aesd.8		q2, \key2
+	veor		q0, q0, \key3
+	veor		q1, q1, \key3
+	veor		q2, q2, \key3
+	.endm
+
+	.macro		do_block, dround, fround
+	cmp		r3, #12			@ which key size?
+	vld1.8		{q10-q11}, [ip]!
+	\dround		q8, q9
+	vld1.8		{q12-q13}, [ip]!
+	\dround		q10, q11
+	vld1.8		{q10-q11}, [ip]!
+	\dround		q12, q13
+	vld1.8		{q12-q13}, [ip]!
+	\dround		q10, q11
+	blo		0f			@ AES-128: 10 rounds
+	vld1.8		{q10-q11}, [ip]!
+	beq		1f			@ AES-192: 12 rounds
+	\dround		q12, q13
+	vld1.8		{q12-q13}, [ip]
+	\dround		q10, q11
+0:	\fround		q12, q13, q14
+	bx		lr
+
+1:	\dround		q12, q13
+	\fround		q10, q11, q14
+	bx		lr
+	.endm
+
+	/*
+	 * Internal, non-AAPCS compliant functions that implement the core AES
+	 * transforms. These should preserve all registers except q0 - q2 and ip
+	 * Arguments:
+	 *   q0        : first in/output block
+	 *   q1        : second in/output block (_3x version only)
+	 *   q2        : third in/output block (_3x version only)
+	 *   q8        : first round key
+	 *   q9        : secound round key
+	 *   ip        : address of 3rd round key
+	 *   q14       : final round key
+	 *   r3        : number of rounds
+	 */
+	.align		6
+aes_encrypt:
+	add		ip, r2, #32		@ 3rd round key
+.Laes_encrypt_tweak:
+	do_block	enc_dround, enc_fround
+ENDPROC(aes_encrypt)
+
+	.align		6
+aes_decrypt:
+	add		ip, r2, #32		@ 3rd round key
+	do_block	dec_dround, dec_fround
+ENDPROC(aes_decrypt)
+
+	.align		6
+aes_encrypt_3x:
+	add		ip, r2, #32		@ 3rd round key
+	do_block	enc_dround_3x, enc_fround_3x
+ENDPROC(aes_encrypt_3x)
+
+	.align		6
+aes_decrypt_3x:
+	add		ip, r2, #32		@ 3rd round key
+	do_block	dec_dround_3x, dec_fround_3x
+ENDPROC(aes_decrypt_3x)
+
+	.macro		prepare_key, rk, rounds
+	add		ip, \rk, \rounds, lsl #4
+	vld1.8		{q8-q9}, [\rk]		@ load first 2 round keys
+	vld1.8		{q14}, [ip]		@ load last round key
+	.endm
+
+	/*
+	 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		   int blocks)
+	 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		   int blocks)
+	 */
+ENTRY(ce_aes_ecb_encrypt)
+	push		{r4, lr}
+	ldr		r4, [sp, #8]
+	prepare_key	r2, r3
+.Lecbencloop3x:
+	subs		r4, r4, #3
+	bmi		.Lecbenc1x
+	vld1.8		{q0-q1}, [r1, :64]!
+	vld1.8		{q2}, [r1, :64]!
+	bl		aes_encrypt_3x
+	vst1.8		{q0-q1}, [r0, :64]!
+	vst1.8		{q2}, [r0, :64]!
+	b		.Lecbencloop3x
+.Lecbenc1x:
+	adds		r4, r4, #3
+	beq		.Lecbencout
+.Lecbencloop:
+	vld1.8		{q0}, [r1, :64]!
+	bl		aes_encrypt
+	vst1.8		{q0}, [r0, :64]!
+	subs		r4, r4, #1
+	bne		.Lecbencloop
+.Lecbencout:
+	pop		{r4, pc}
+ENDPROC(ce_aes_ecb_encrypt)
+
+ENTRY(ce_aes_ecb_decrypt)
+	push		{r4, lr}
+	ldr		r4, [sp, #8]
+	prepare_key	r2, r3
+.Lecbdecloop3x:
+	subs		r4, r4, #3
+	bmi		.Lecbdec1x
+	vld1.8		{q0-q1}, [r1, :64]!
+	vld1.8		{q2}, [r1, :64]!
+	bl		aes_decrypt_3x
+	vst1.8		{q0-q1}, [r0, :64]!
+	vst1.8		{q2}, [r0, :64]!
+	b		.Lecbdecloop3x
+.Lecbdec1x:
+	adds		r4, r4, #3
+	beq		.Lecbdecout
+.Lecbdecloop:
+	vld1.8		{q0}, [r1, :64]!
+	bl		aes_decrypt
+	vst1.8		{q0}, [r0, :64]!
+	subs		r4, r4, #1
+	bne		.Lecbdecloop
+.Lecbdecout:
+	pop		{r4, pc}
+ENDPROC(ce_aes_ecb_decrypt)
+
+	/*
+	 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		   int blocks, u8 iv[])
+	 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		   int blocks, u8 iv[])
+	 */
+ENTRY(ce_aes_cbc_encrypt)
+	push		{r4-r6, lr}
+	ldrd		r4, r5, [sp, #16]
+	vld1.8		{q0}, [r5]
+	prepare_key	r2, r3
+.Lcbcencloop:
+	vld1.8		{q1}, [r1, :64]!	@ get next pt block
+	veor		q0, q0, q1		@ ..and xor with iv
+	bl		aes_encrypt
+	vst1.8		{q0}, [r0, :64]!
+	subs		r4, r4, #1
+	bne		.Lcbcencloop
+	vst1.8		{q0}, [r5]
+	pop		{r4-r6, pc}
+ENDPROC(ce_aes_cbc_encrypt)
+
+ENTRY(ce_aes_cbc_decrypt)
+	push		{r4-r6, lr}
+	ldrd		r4, r5, [sp, #16]
+	vld1.8		{q6}, [r5]		@ keep iv in q6
+	prepare_key	r2, r3
+.Lcbcdecloop3x:
+	subs		r4, r4, #3
+	bmi		.Lcbcdec1x
+	vld1.8		{q0-q1}, [r1, :64]!
+	vld1.8		{q2}, [r1, :64]!
+	vmov		q3, q0
+	vmov		q4, q1
+	vmov		q5, q2
+	bl		aes_decrypt_3x
+	veor		q0, q0, q6
+	veor		q1, q1, q3
+	veor		q2, q2, q4
+	vmov		q6, q5
+	vst1.8		{q0-q1}, [r0, :64]!
+	vst1.8		{q2}, [r0, :64]!
+	b		.Lcbcdecloop3x
+.Lcbcdec1x:
+	adds		r4, r4, #3
+	beq		.Lcbcdecout
+	vmov		q15, q14		@ preserve last round key
+.Lcbcdecloop:
+	vld1.8		{q0}, [r1, :64]!	@ get next ct block
+	veor		q14, q15, q6		@ combine prev ct with last key
+	vmov		q6, q0
+	bl		aes_decrypt
+	vst1.8		{q0}, [r0, :64]!
+	subs		r4, r4, #1
+	bne		.Lcbcdecloop
+.Lcbcdecout:
+	vst1.8		{q6}, [r5]		@ keep iv in q6
+	pop		{r4-r6, pc}
+ENDPROC(ce_aes_cbc_decrypt)
+
+	/*
+	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		   int blocks, u8 ctr[])
+	 */
+ENTRY(ce_aes_ctr_encrypt)
+	push		{r4-r6, lr}
+	ldrd		r4, r5, [sp, #16]
+	vld1.8		{q6}, [r5]		@ load ctr
+	prepare_key	r2, r3
+	vmov		r6, s27			@ keep swabbed ctr in r6
+	rev		r6, r6
+	cmn		r6, r4			@ 32 bit overflow?
+	bcs		.Lctrloop
+.Lctrloop3x:
+	subs		r4, r4, #3
+	bmi		.Lctr1x
+	add		r6, r6, #1
+	vmov		q0, q6
+	vmov		q1, q6
+	rev		ip, r6
+	add		r6, r6, #1
+	vmov		q2, q6
+	vmov		s7, ip
+	rev		ip, r6
+	add		r6, r6, #1
+	vmov		s11, ip
+	vld1.8		{q3-q4}, [r1, :64]!
+	vld1.8		{q5}, [r1, :64]!
+	bl		aes_encrypt_3x
+	veor		q0, q0, q3
+	veor		q1, q1, q4
+	veor		q2, q2, q5
+	rev		ip, r6
+	vst1.8		{q0-q1}, [r0, :64]!
+	vst1.8		{q2}, [r0, :64]!
+	vmov		s27, ip
+	b		.Lctrloop3x
+.Lctr1x:
+	adds		r4, r4, #3
+	beq		.Lctrout
+.Lctrloop:
+	vmov		q0, q6
+	bl		aes_encrypt
+	subs		r4, r4, #1
+	bmi		.Lctrhalfblock		@ blocks < 0 means 1/2 block
+	vld1.8		{q3}, [r1, :64]!
+	veor		q3, q0, q3
+	vst1.8		{q3}, [r0, :64]!
+
+	adds		r6, r6, #1		@ increment BE ctr
+	rev		ip, r6
+	vmov		s27, ip
+	bcs		.Lctrcarry
+	teq		r4, #0
+	bne		.Lctrloop
+.Lctrout:
+	vst1.8		{q6}, [r5]
+	pop		{r4-r6, pc}
+
+.Lctrhalfblock:
+	vld1.8		{d1}, [r1, :64]
+	veor		d0, d0, d1
+	vst1.8		{d0}, [r0, :64]
+	pop		{r4-r6, pc}
+
+.Lctrcarry:
+	.irp		sreg, s26, s25, s24
+	vmov		ip, \sreg		@ load next word of ctr
+	rev		ip, ip			@ ... to handle the carry
+	adds		ip, ip, #1
+	rev		ip, ip
+	vmov		\sreg, ip
+	bcc		0f
+	.endr
+0:	teq		r4, #0
+	beq		.Lctrout
+	b		.Lctrloop
+ENDPROC(ce_aes_ctr_encrypt)
+
+	/*
+	 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
+	 *		   int blocks, u8 iv[], u8 const rk2[], int first)
+	 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
+	 *		   int blocks, u8 iv[], u8 const rk2[], int first)
+	 */
+
+	.macro		next_tweak, out, in, const, tmp
+	vshr.s64	\tmp, \in, #63
+	vand		\tmp, \tmp, \const
+	vadd.u64	\out, \in, \in
+	vext.8		\tmp, \tmp, \tmp, #8
+	veor		\out, \out, \tmp
+	.endm
+
+	.align		3
+.Lxts_mul_x:
+	.quad		1, 0x87
+
+ce_aes_xts_init:
+	vldr		d14, .Lxts_mul_x
+	vldr		d15, .Lxts_mul_x + 8
+
+	ldrd		r4, r5, [sp, #16]	@ load args
+	ldr		r6, [sp, #28]
+	vld1.8		{q0}, [r5]		@ load iv
+	teq		r6, #1			@ start of a block?
+	bxne		lr
+
+	@ Encrypt the IV in q0 with the second AES key. This should only
+	@ be done at the start of a block.
+	ldr		r6, [sp, #24]		@ load AES key 2
+	prepare_key	r6, r3
+	add		ip, r6, #32		@ 3rd round key of key 2
+	b		.Laes_encrypt_tweak	@ tail call
+ENDPROC(ce_aes_xts_init)
+
+ENTRY(ce_aes_xts_encrypt)
+	push		{r4-r6, lr}
+
+	bl		ce_aes_xts_init		@ run shared prologue
+	prepare_key	r2, r3
+	vmov		q3, q0
+
+	teq		r6, #0			@ start of a block?
+	bne		.Lxtsenc3x
+
+.Lxtsencloop3x:
+	next_tweak	q3, q3, q7, q6
+.Lxtsenc3x:
+	subs		r4, r4, #3
+	bmi		.Lxtsenc1x
+	vld1.8		{q0-q1}, [r1, :64]!	@ get 3 pt blocks
+	vld1.8		{q2}, [r1, :64]!
+	next_tweak	q4, q3, q7, q6
+	veor		q0, q0, q3
+	next_tweak	q5, q4, q7, q6
+	veor		q1, q1, q4
+	veor		q2, q2, q5
+	bl		aes_encrypt_3x
+	veor		q0, q0, q3
+	veor		q1, q1, q4
+	veor		q2, q2, q5
+	vst1.8		{q0-q1}, [r0, :64]!	@ write 3 ct blocks
+	vst1.8		{q2}, [r0, :64]!
+	vmov		q3, q5
+	teq		r4, #0
+	beq		.Lxtsencout
+	b		.Lxtsencloop3x
+.Lxtsenc1x:
+	adds		r4, r4, #3
+	beq		.Lxtsencout
+.Lxtsencloop:
+	vld1.8		{q0}, [r1, :64]!
+	veor		q0, q0, q3
+	bl		aes_encrypt
+	veor		q0, q0, q3
+	vst1.8		{q0}, [r0, :64]!
+	subs		r4, r4, #1
+	beq		.Lxtsencout
+	next_tweak	q3, q3, q7, q6
+	b		.Lxtsencloop
+.Lxtsencout:
+	vst1.8		{q3}, [r5]
+	pop		{r4-r6, pc}
+ENDPROC(ce_aes_xts_encrypt)
+
+
+ENTRY(ce_aes_xts_decrypt)
+	push		{r4-r6, lr}
+
+	bl		ce_aes_xts_init		@ run shared prologue
+	prepare_key	r2, r3
+	vmov		q3, q0
+
+	teq		r6, #0			@ start of a block?
+	bne		.Lxtsdec3x
+
+.Lxtsdecloop3x:
+	next_tweak	q3, q3, q7, q6
+.Lxtsdec3x:
+	subs		r4, r4, #3
+	bmi		.Lxtsdec1x
+	vld1.8		{q0-q1}, [r1, :64]!	@ get 3 ct blocks
+	vld1.8		{q2}, [r1, :64]!
+	next_tweak	q4, q3, q7, q6
+	veor		q0, q0, q3
+	next_tweak	q5, q4, q7, q6
+	veor		q1, q1, q4
+	veor		q2, q2, q5
+	bl		aes_decrypt_3x
+	veor		q0, q0, q3
+	veor		q1, q1, q4
+	veor		q2, q2, q5
+	vst1.8		{q0-q1}, [r0, :64]!	@ write 3 pt blocks
+	vst1.8		{q2}, [r0, :64]!
+	vmov		q3, q5
+	teq		r4, #0
+	beq		.Lxtsdecout
+	b		.Lxtsdecloop3x
+.Lxtsdec1x:
+	adds		r4, r4, #3
+	beq		.Lxtsdecout
+.Lxtsdecloop:
+	vld1.8		{q0}, [r1, :64]!
+	veor		q0, q0, q3
+	add		ip, r2, #32		@ 3rd round key
+	bl		aes_decrypt
+	veor		q0, q0, q3
+	vst1.8		{q0}, [r0, :64]!
+	subs		r4, r4, #1
+	beq		.Lxtsdecout
+	next_tweak	q3, q3, q7, q6
+	b		.Lxtsdecloop
+.Lxtsdecout:
+	vst1.8		{q3}, [r5]
+	pop		{r4-r6, pc}
+ENDPROC(ce_aes_xts_decrypt)
+
+	/*
+	 * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
+	 *                             AES sbox substitution on each byte in
+	 *                             'input'
+	 */
+ENTRY(ce_aes_sub)
+	vdup.32		q1, r0
+	veor		q0, q0, q0
+	aese.8		q0, q1
+	vmov		r0, s0
+	bx		lr
+ENDPROC(ce_aes_sub)
+
+	/*
+	 * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
+	 *                                        operation on round key *src
+	 */
+ENTRY(ce_aes_invert)
+	vld1.8		{q0}, [r1]
+	aesimc.8	q0, q0
+	vst1.8		{q0}, [r0]
+	bx		lr
+ENDPROC(ce_aes_invert)
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
new file mode 100644
index 000000000000..b445a5d56f43
--- /dev/null
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -0,0 +1,524 @@
+/*
+ * aes-ce-glue.c - wrapper code for ARMv8 AES
+ *
+ * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/hwcap.h>
+#include <crypto/aes.h>
+#include <crypto/ablk_helper.h>
+#include <crypto/algapi.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+/* defined in aes-ce-core.S */
+asmlinkage u32 ce_aes_sub(u32 input);
+asmlinkage void ce_aes_invert(void *dst, void *src);
+
+asmlinkage void ce_aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
+				   int rounds, int blocks);
+asmlinkage void ce_aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
+				   int rounds, int blocks);
+
+asmlinkage void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
+				   int rounds, int blocks, u8 iv[]);
+asmlinkage void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
+				   int rounds, int blocks, u8 iv[]);
+
+asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
+				   int rounds, int blocks, u8 ctr[]);
+
+asmlinkage void ce_aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
+				   int rounds, int blocks, u8 iv[],
+				   u8 const rk2[], int first);
+asmlinkage void ce_aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
+				   int rounds, int blocks, u8 iv[],
+				   u8 const rk2[], int first);
+
+struct aes_block {
+	u8 b[AES_BLOCK_SIZE];
+};
+
+static int num_rounds(struct crypto_aes_ctx *ctx)
+{
+	/*
+	 * # of rounds specified by AES:
+	 * 128 bit key		10 rounds
+	 * 192 bit key		12 rounds
+	 * 256 bit key		14 rounds
+	 * => n byte key	=> 6 + (n/4) rounds
+	 */
+	return 6 + ctx->key_length / 4;
+}
+
+static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
+			    unsigned int key_len)
+{
+	/*
+	 * The AES key schedule round constants
+	 */
+	static u8 const rcon[] = {
+		0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36,
+	};
+
+	u32 kwords = key_len / sizeof(u32);
+	struct aes_block *key_enc, *key_dec;
+	int i, j;
+
+	if (key_len != AES_KEYSIZE_128 &&
+	    key_len != AES_KEYSIZE_192 &&
+	    key_len != AES_KEYSIZE_256)
+		return -EINVAL;
+
+	memcpy(ctx->key_enc, in_key, key_len);
+	ctx->key_length = key_len;
+
+	kernel_neon_begin();
+	for (i = 0; i < sizeof(rcon); i++) {
+		u32 *rki = ctx->key_enc + (i * kwords);
+		u32 *rko = rki + kwords;
+
+		rko[0] = ror32(ce_aes_sub(rki[kwords - 1]), 8);
+		rko[0] = rko[0] ^ rki[0] ^ rcon[i];
+		rko[1] = rko[0] ^ rki[1];
+		rko[2] = rko[1] ^ rki[2];
+		rko[3] = rko[2] ^ rki[3];
+
+		if (key_len == AES_KEYSIZE_192) {
+			if (i >= 7)
+				break;
+			rko[4] = rko[3] ^ rki[4];
+			rko[5] = rko[4] ^ rki[5];
+		} else if (key_len == AES_KEYSIZE_256) {
+			if (i >= 6)
+				break;
+			rko[4] = ce_aes_sub(rko[3]) ^ rki[4];
+			rko[5] = rko[4] ^ rki[5];
+			rko[6] = rko[5] ^ rki[6];
+			rko[7] = rko[6] ^ rki[7];
+		}
+	}
+
+	/*
+	 * Generate the decryption keys for the Equivalent Inverse Cipher.
+	 * This involves reversing the order of the round keys, and applying
+	 * the Inverse Mix Columns transformation on all but the first and
+	 * the last one.
+	 */
+	key_enc = (struct aes_block *)ctx->key_enc;
+	key_dec = (struct aes_block *)ctx->key_dec;
+	j = num_rounds(ctx);
+
+	key_dec[0] = key_enc[j];
+	for (i = 1, j--; j > 0; i++, j--)
+		ce_aes_invert(key_dec + i, key_enc + j);
+	key_dec[i] = key_enc[0];
+
+	kernel_neon_end();
+	return 0;
+}
+
+static int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+			 unsigned int key_len)
+{
+	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	int ret;
+
+	ret = ce_aes_expandkey(ctx, in_key, key_len);
+	if (!ret)
+		return 0;
+
+	tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	return -EINVAL;
+}
+
+struct crypto_aes_xts_ctx {
+	struct crypto_aes_ctx key1;
+	struct crypto_aes_ctx __aligned(8) key2;
+};
+
+static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+		       unsigned int key_len)
+{
+	struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+	int ret;
+
+	ret = ce_aes_expandkey(&ctx->key1, in_key, key_len / 2);
+	if (!ret)
+		ret = ce_aes_expandkey(&ctx->key2, &in_key[key_len / 2],
+				       key_len / 2);
+	if (!ret)
+		return 0;
+
+	tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	return -EINVAL;
+}
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+	int err;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		ce_aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				   (u8 *)ctx->key_enc, num_rounds(ctx), blocks);
+		err = blkcipher_walk_done(desc, &walk,
+					  walk.nbytes % AES_BLOCK_SIZE);
+	}
+	kernel_neon_end();
+	return err;
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+	int err;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		ce_aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				   (u8 *)ctx->key_dec, num_rounds(ctx), blocks);
+		err = blkcipher_walk_done(desc, &walk,
+					  walk.nbytes % AES_BLOCK_SIZE);
+	}
+	kernel_neon_end();
+	return err;
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+	int err;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				   (u8 *)ctx->key_enc, num_rounds(ctx), blocks,
+				   walk.iv);
+		err = blkcipher_walk_done(desc, &walk,
+					  walk.nbytes % AES_BLOCK_SIZE);
+	}
+	kernel_neon_end();
+	return err;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+	int err;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				   (u8 *)ctx->key_dec, num_rounds(ctx), blocks,
+				   walk.iv);
+		err = blkcipher_walk_done(desc, &walk,
+					  walk.nbytes % AES_BLOCK_SIZE);
+	}
+	kernel_neon_end();
+	return err;
+}
+
+static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	int err, blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+
+	kernel_neon_begin();
+	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		ce_aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				   (u8 *)ctx->key_enc, num_rounds(ctx), blocks,
+				   walk.iv);
+		nbytes -= blocks * AES_BLOCK_SIZE;
+		if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE)
+			break;
+		err = blkcipher_walk_done(desc, &walk,
+					  walk.nbytes % AES_BLOCK_SIZE);
+	}
+	if (nbytes) {
+		u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
+		u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
+		u8 __aligned(8) tail[AES_BLOCK_SIZE];
+
+		/*
+		 * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
+		 * to tell aes_ctr_encrypt() to only read half a block.
+		 */
+		blocks = (nbytes <= 8) ? -1 : 1;
+
+		ce_aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc,
+				   num_rounds(ctx), blocks, walk.iv);
+		memcpy(tdst, tail, nbytes);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+	kernel_neon_end();
+
+	return err;
+}
+
+static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = num_rounds(&ctx->key1);
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				   (u8 *)ctx->key1.key_enc, rounds, blocks,
+				   walk.iv, (u8 *)ctx->key2.key_enc, first);
+		err = blkcipher_walk_done(desc, &walk,
+					  walk.nbytes % AES_BLOCK_SIZE);
+	}
+	kernel_neon_end();
+
+	return err;
+}
+
+static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = num_rounds(&ctx->key1);
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				   (u8 *)ctx->key1.key_dec, rounds, blocks,
+				   walk.iv, (u8 *)ctx->key2.key_enc, first);
+		err = blkcipher_walk_done(desc, &walk,
+					  walk.nbytes % AES_BLOCK_SIZE);
+	}
+	kernel_neon_end();
+
+	return err;
+}
+
+static struct crypto_alg aes_algs[] = { {
+	.cra_name		= "__ecb-aes-ce",
+	.cra_driver_name	= "__driver-ecb-aes-ce",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_blkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= ce_aes_setkey,
+		.encrypt	= ecb_encrypt,
+		.decrypt	= ecb_decrypt,
+	},
+}, {
+	.cra_name		= "__cbc-aes-ce",
+	.cra_driver_name	= "__driver-cbc-aes-ce",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_blkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= ce_aes_setkey,
+		.encrypt	= cbc_encrypt,
+		.decrypt	= cbc_decrypt,
+	},
+}, {
+	.cra_name		= "__ctr-aes-ce",
+	.cra_driver_name	= "__driver-ctr-aes-ce",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_blkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= ce_aes_setkey,
+		.encrypt	= ctr_encrypt,
+		.decrypt	= ctr_encrypt,
+	},
+}, {
+	.cra_name		= "__xts-aes-ce",
+	.cra_driver_name	= "__driver-xts-aes-ce",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_aes_xts_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_blkcipher = {
+		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
+		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= xts_set_key,
+		.encrypt	= xts_encrypt,
+		.decrypt	= xts_decrypt,
+	},
+}, {
+	.cra_name		= "ecb(aes)",
+	.cra_driver_name	= "ecb-aes-ce",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= ablk_set_key,
+		.encrypt	= ablk_encrypt,
+		.decrypt	= ablk_decrypt,
+	}
+}, {
+	.cra_name		= "cbc(aes)",
+	.cra_driver_name	= "cbc-aes-ce",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= ablk_set_key,
+		.encrypt	= ablk_encrypt,
+		.decrypt	= ablk_decrypt,
+	}
+}, {
+	.cra_name		= "ctr(aes)",
+	.cra_driver_name	= "ctr-aes-ce",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= ablk_set_key,
+		.encrypt	= ablk_encrypt,
+		.decrypt	= ablk_decrypt,
+	}
+}, {
+	.cra_name		= "xts(aes)",
+	.cra_driver_name	= "xts-aes-ce",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_ablkcipher = {
+		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
+		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= ablk_set_key,
+		.encrypt	= ablk_encrypt,
+		.decrypt	= ablk_decrypt,
+	}
+} };
+
+static int __init aes_init(void)
+{
+	if (!(elf_hwcap2 & HWCAP2_AES))
+		return -ENODEV;
+	return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+static void __exit aes_exit(void)
+{
+	crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+module_init(aes_init);
+module_exit(aes_exit);
diff --git a/arch/arm/crypto/aesbs-core.S_shipped b/arch/arm/crypto/aesbs-core.S_shipped
index 71e5fc7cfb18..1d1800f71c5b 100644
--- a/arch/arm/crypto/aesbs-core.S_shipped
+++ b/arch/arm/crypto/aesbs-core.S_shipped
@@ -58,14 +58,18 @@
 # define VFP_ABI_FRAME	0
 # define BSAES_ASM_EXTENDED_KEY
 # define XTS_CHAIN_TWEAK
-# define __ARM_ARCH__	7
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
 #endif
 
 #ifdef __thumb__
 # define adrl adr
 #endif
 
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch	armv7-a
+.fpu	neon
+
 .text
 .syntax	unified 	@ ARMv7-capable assembler is expected to handle this
 #ifdef __thumb2__
@@ -74,8 +78,6 @@
 .code   32
 #endif
 
-.fpu	neon
-
 .type	_bsaes_decrypt8,%function
 .align	4
 _bsaes_decrypt8:
@@ -2095,9 +2097,11 @@ bsaes_xts_decrypt:
 	vld1.8	{q8}, [r0]			@ initial tweak
 	adr	r2, .Lxts_magic
 
+#ifndef	XTS_CHAIN_TWEAK
 	tst	r9, #0xf			@ if not multiple of 16
 	it	ne				@ Thumb2 thing, sanity check in ARM
 	subne	r9, #0x10			@ subtract another 16 bytes
+#endif
 	subs	r9, #0x80
 
 	blo	.Lxts_dec_short
diff --git a/arch/arm/crypto/aesbs-glue.c b/arch/arm/crypto/aesbs-glue.c
index 15468fbbdea3..6d685298690e 100644
--- a/arch/arm/crypto/aesbs-glue.c
+++ b/arch/arm/crypto/aesbs-glue.c
@@ -301,7 +301,8 @@ static struct crypto_alg aesbs_algs[] = { {
 	.cra_name		= "__cbc-aes-neonbs",
 	.cra_driver_name	= "__driver-cbc-aes-neonbs",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= AES_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct aesbs_cbc_ctx),
 	.cra_alignmask		= 7,
@@ -319,7 +320,8 @@ static struct crypto_alg aesbs_algs[] = { {
 	.cra_name		= "__ctr-aes-neonbs",
 	.cra_driver_name	= "__driver-ctr-aes-neonbs",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= 1,
 	.cra_ctxsize		= sizeof(struct aesbs_ctr_ctx),
 	.cra_alignmask		= 7,
@@ -337,7 +339,8 @@ static struct crypto_alg aesbs_algs[] = { {
 	.cra_name		= "__xts-aes-neonbs",
 	.cra_driver_name	= "__driver-xts-aes-neonbs",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= AES_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct aesbs_xts_ctx),
 	.cra_alignmask		= 7,
diff --git a/arch/arm/crypto/bsaes-armv7.pl b/arch/arm/crypto/bsaes-armv7.pl
index be068db960ee..a4d3856e7d24 100644
--- a/arch/arm/crypto/bsaes-armv7.pl
+++ b/arch/arm/crypto/bsaes-armv7.pl
@@ -701,14 +701,18 @@ $code.=<<___;
 # define VFP_ABI_FRAME	0
 # define BSAES_ASM_EXTENDED_KEY
 # define XTS_CHAIN_TWEAK
-# define __ARM_ARCH__	7
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
 #endif
 
 #ifdef __thumb__
 # define adrl adr
 #endif
 
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch	armv7-a
+.fpu	neon
+
 .text
 .syntax	unified 	@ ARMv7-capable assembler is expected to handle this
 #ifdef __thumb2__
@@ -717,8 +721,6 @@ $code.=<<___;
 .code   32
 #endif
 
-.fpu	neon
-
 .type	_bsaes_decrypt8,%function
 .align	4
 _bsaes_decrypt8:
@@ -2076,9 +2078,11 @@ bsaes_xts_decrypt:
 	vld1.8	{@XMM[8]}, [r0]			@ initial tweak
 	adr	$magic, .Lxts_magic
 
+#ifndef	XTS_CHAIN_TWEAK
 	tst	$len, #0xf			@ if not multiple of 16
 	it	ne				@ Thumb2 thing, sanity check in ARM
 	subne	$len, #0x10			@ subtract another 16 bytes
+#endif
 	subs	$len, #0x80
 
 	blo	.Lxts_dec_short
diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S
new file mode 100644
index 000000000000..f6ab8bcc9efe
--- /dev/null
+++ b/arch/arm/crypto/ghash-ce-core.S
@@ -0,0 +1,94 @@
+/*
+ * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
+ *
+ * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	SHASH		.req	q0
+	SHASH2		.req	q1
+	T1		.req	q2
+	T2		.req	q3
+	MASK		.req	q4
+	XL		.req	q5
+	XM		.req	q6
+	XH		.req	q7
+	IN1		.req	q7
+
+	SHASH_L		.req	d0
+	SHASH_H		.req	d1
+	SHASH2_L	.req	d2
+	T1_L		.req	d4
+	MASK_L		.req	d8
+	XL_L		.req	d10
+	XL_H		.req	d11
+	XM_L		.req	d12
+	XM_H		.req	d13
+	XH_L		.req	d14
+
+	.text
+	.fpu		crypto-neon-fp-armv8
+
+	/*
+	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+	 *			   struct ghash_key const *k, const char *head)
+	 */
+ENTRY(pmull_ghash_update)
+	vld1.64		{SHASH}, [r3]
+	vld1.64		{XL}, [r1]
+	vmov.i8		MASK, #0xe1
+	vext.8		SHASH2, SHASH, SHASH, #8
+	vshl.u64	MASK, MASK, #57
+	veor		SHASH2, SHASH2, SHASH
+
+	/* do the head block first, if supplied */
+	ldr		ip, [sp]
+	teq		ip, #0
+	beq		0f
+	vld1.64		{T1}, [ip]
+	teq		r0, #0
+	b		1f
+
+0:	vld1.64		{T1}, [r2]!
+	subs		r0, r0, #1
+
+1:	/* multiply XL by SHASH in GF(2^128) */
+#ifndef CONFIG_CPU_BIG_ENDIAN
+	vrev64.8	T1, T1
+#endif
+	vext.8		T2, XL, XL, #8
+	vext.8		IN1, T1, T1, #8
+	veor		T1, T1, T2
+	veor		XL, XL, IN1
+
+	vmull.p64	XH, SHASH_H, XL_H		@ a1 * b1
+	veor		T1, T1, XL
+	vmull.p64	XL, SHASH_L, XL_L		@ a0 * b0
+	vmull.p64	XM, SHASH2_L, T1_L		@ (a1 + a0)(b1 + b0)
+
+	vext.8		T1, XL, XH, #8
+	veor		T2, XL, XH
+	veor		XM, XM, T1
+	veor		XM, XM, T2
+	vmull.p64	T2, XL_L, MASK_L
+
+	vmov		XH_L, XM_H
+	vmov		XM_H, XL_L
+
+	veor		XL, XM, T2
+	vext.8		T2, XL, XL, #8
+	vmull.p64	XL, XL_L, MASK_L
+	veor		T2, T2, XH
+	veor		XL, XL, T2
+
+	bne		0b
+
+	vst1.64		{XL}, [r1]
+	bx		lr
+ENDPROC(pmull_ghash_update)
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
new file mode 100644
index 000000000000..03a39fe29246
--- /dev/null
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -0,0 +1,320 @@
+/*
+ * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
+ *
+ * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
+#include <crypto/cryptd.h>
+#include <crypto/internal/hash.h>
+#include <crypto/gf128mul.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+#define GHASH_BLOCK_SIZE	16
+#define GHASH_DIGEST_SIZE	16
+
+struct ghash_key {
+	u64	a;
+	u64	b;
+};
+
+struct ghash_desc_ctx {
+	u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)];
+	u8 buf[GHASH_BLOCK_SIZE];
+	u32 count;
+};
+
+struct ghash_async_ctx {
+	struct cryptd_ahash *cryptd_tfm;
+};
+
+asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+				   struct ghash_key const *k, const char *head);
+
+static int ghash_init(struct shash_desc *desc)
+{
+	struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	*ctx = (struct ghash_desc_ctx){};
+	return 0;
+}
+
+static int ghash_update(struct shash_desc *desc, const u8 *src,
+			unsigned int len)
+{
+	struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+	unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+
+	ctx->count += len;
+
+	if ((partial + len) >= GHASH_BLOCK_SIZE) {
+		struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+		int blocks;
+
+		if (partial) {
+			int p = GHASH_BLOCK_SIZE - partial;
+
+			memcpy(ctx->buf + partial, src, p);
+			src += p;
+			len -= p;
+		}
+
+		blocks = len / GHASH_BLOCK_SIZE;
+		len %= GHASH_BLOCK_SIZE;
+
+		kernel_neon_begin();
+		pmull_ghash_update(blocks, ctx->digest, src, key,
+				   partial ? ctx->buf : NULL);
+		kernel_neon_end();
+		src += blocks * GHASH_BLOCK_SIZE;
+		partial = 0;
+	}
+	if (len)
+		memcpy(ctx->buf + partial, src, len);
+	return 0;
+}
+
+static int ghash_final(struct shash_desc *desc, u8 *dst)
+{
+	struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+	unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+
+	if (partial) {
+		struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+
+		memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
+		kernel_neon_begin();
+		pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
+		kernel_neon_end();
+	}
+	put_unaligned_be64(ctx->digest[1], dst);
+	put_unaligned_be64(ctx->digest[0], dst + 8);
+
+	*ctx = (struct ghash_desc_ctx){};
+	return 0;
+}
+
+static int ghash_setkey(struct crypto_shash *tfm,
+			const u8 *inkey, unsigned int keylen)
+{
+	struct ghash_key *key = crypto_shash_ctx(tfm);
+	u64 a, b;
+
+	if (keylen != GHASH_BLOCK_SIZE) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	/* perform multiplication by 'x' in GF(2^128) */
+	b = get_unaligned_be64(inkey);
+	a = get_unaligned_be64(inkey + 8);
+
+	key->a = (a << 1) | (b >> 63);
+	key->b = (b << 1) | (a >> 63);
+
+	if (b >> 63)
+		key->b ^= 0xc200000000000000UL;
+
+	return 0;
+}
+
+static struct shash_alg ghash_alg = {
+	.digestsize		= GHASH_DIGEST_SIZE,
+	.init			= ghash_init,
+	.update			= ghash_update,
+	.final			= ghash_final,
+	.setkey			= ghash_setkey,
+	.descsize		= sizeof(struct ghash_desc_ctx),
+	.base			= {
+		.cra_name	= "ghash",
+		.cra_driver_name = "__driver-ghash-ce",
+		.cra_priority	= 0,
+		.cra_flags	= CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_INTERNAL,
+		.cra_blocksize	= GHASH_BLOCK_SIZE,
+		.cra_ctxsize	= sizeof(struct ghash_key),
+		.cra_module	= THIS_MODULE,
+	},
+};
+
+static int ghash_async_init(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct ahash_request *cryptd_req = ahash_request_ctx(req);
+	struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
+
+	if (!may_use_simd()) {
+		memcpy(cryptd_req, req, sizeof(*req));
+		ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
+		return crypto_ahash_init(cryptd_req);
+	} else {
+		struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+		struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
+
+		desc->tfm = child;
+		desc->flags = req->base.flags;
+		return crypto_shash_init(desc);
+	}
+}
+
+static int ghash_async_update(struct ahash_request *req)
+{
+	struct ahash_request *cryptd_req = ahash_request_ctx(req);
+
+	if (!may_use_simd()) {
+		struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+		struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+		struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
+
+		memcpy(cryptd_req, req, sizeof(*req));
+		ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
+		return crypto_ahash_update(cryptd_req);
+	} else {
+		struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+		return shash_ahash_update(req, desc);
+	}
+}
+
+static int ghash_async_final(struct ahash_request *req)
+{
+	struct ahash_request *cryptd_req = ahash_request_ctx(req);
+
+	if (!may_use_simd()) {
+		struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+		struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+		struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
+
+		memcpy(cryptd_req, req, sizeof(*req));
+		ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
+		return crypto_ahash_final(cryptd_req);
+	} else {
+		struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+		return crypto_shash_final(desc, req->result);
+	}
+}
+
+static int ghash_async_digest(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct ahash_request *cryptd_req = ahash_request_ctx(req);
+	struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
+
+	if (!may_use_simd()) {
+		memcpy(cryptd_req, req, sizeof(*req));
+		ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
+		return crypto_ahash_digest(cryptd_req);
+	} else {
+		struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+		struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
+
+		desc->tfm = child;
+		desc->flags = req->base.flags;
+		return shash_ahash_digest(req, desc);
+	}
+}
+
+static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
+			      unsigned int keylen)
+{
+	struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct crypto_ahash *child = &ctx->cryptd_tfm->base;
+	int err;
+
+	crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm)
+			       & CRYPTO_TFM_REQ_MASK);
+	err = crypto_ahash_setkey(child, key, keylen);
+	crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child)
+			       & CRYPTO_TFM_RES_MASK);
+
+	return err;
+}
+
+static int ghash_async_init_tfm(struct crypto_tfm *tfm)
+{
+	struct cryptd_ahash *cryptd_tfm;
+	struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	cryptd_tfm = cryptd_alloc_ahash("__driver-ghash-ce",
+					CRYPTO_ALG_INTERNAL,
+					CRYPTO_ALG_INTERNAL);
+	if (IS_ERR(cryptd_tfm))
+		return PTR_ERR(cryptd_tfm);
+	ctx->cryptd_tfm = cryptd_tfm;
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct ahash_request) +
+				 crypto_ahash_reqsize(&cryptd_tfm->base));
+
+	return 0;
+}
+
+static void ghash_async_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	cryptd_free_ahash(ctx->cryptd_tfm);
+}
+
+static struct ahash_alg ghash_async_alg = {
+	.init			= ghash_async_init,
+	.update			= ghash_async_update,
+	.final			= ghash_async_final,
+	.setkey			= ghash_async_setkey,
+	.digest			= ghash_async_digest,
+	.halg.digestsize	= GHASH_DIGEST_SIZE,
+	.halg.base		= {
+		.cra_name	= "ghash",
+		.cra_driver_name = "ghash-ce",
+		.cra_priority	= 300,
+		.cra_flags	= CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+		.cra_blocksize	= GHASH_BLOCK_SIZE,
+		.cra_type	= &crypto_ahash_type,
+		.cra_ctxsize	= sizeof(struct ghash_async_ctx),
+		.cra_module	= THIS_MODULE,
+		.cra_init	= ghash_async_init_tfm,
+		.cra_exit	= ghash_async_exit_tfm,
+	},
+};
+
+static int __init ghash_ce_mod_init(void)
+{
+	int err;
+
+	if (!(elf_hwcap2 & HWCAP2_PMULL))
+		return -ENODEV;
+
+	err = crypto_register_shash(&ghash_alg);
+	if (err)
+		return err;
+	err = crypto_register_ahash(&ghash_async_alg);
+	if (err)
+		goto err_shash;
+
+	return 0;
+
+err_shash:
+	crypto_unregister_shash(&ghash_alg);
+	return err;
+}
+
+static void __exit ghash_ce_mod_exit(void)
+{
+	crypto_unregister_ahash(&ghash_async_alg);
+	crypto_unregister_shash(&ghash_alg);
+}
+
+module_init(ghash_ce_mod_init);
+module_exit(ghash_ce_mod_exit);
diff --git a/arch/arm/crypto/sha1-ce-core.S b/arch/arm/crypto/sha1-ce-core.S
new file mode 100644
index 000000000000..b623f51ccbcf
--- /dev/null
+++ b/arch/arm/crypto/sha1-ce-core.S
@@ -0,0 +1,125 @@
+/*
+ * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2015 Linaro Ltd.
+ * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.text
+	.fpu		crypto-neon-fp-armv8
+
+	k0		.req	q0
+	k1		.req	q1
+	k2		.req	q2
+	k3		.req	q3
+
+	ta0		.req	q4
+	ta1		.req	q5
+	tb0		.req	q5
+	tb1		.req	q4
+
+	dga		.req	q6
+	dgb		.req	q7
+	dgbs		.req	s28
+
+	dg0		.req	q12
+	dg1a0		.req	q13
+	dg1a1		.req	q14
+	dg1b0		.req	q14
+	dg1b1		.req	q13
+
+	.macro		add_only, op, ev, rc, s0, dg1
+	.ifnb		\s0
+	vadd.u32	tb\ev, q\s0, \rc
+	.endif
+	sha1h.32	dg1b\ev, dg0
+	.ifb		\dg1
+	sha1\op\().32	dg0, dg1a\ev, ta\ev
+	.else
+	sha1\op\().32	dg0, \dg1, ta\ev
+	.endif
+	.endm
+
+	.macro		add_update, op, ev, rc, s0, s1, s2, s3, dg1
+	sha1su0.32	q\s0, q\s1, q\s2
+	add_only	\op, \ev, \rc, \s1, \dg1
+	sha1su1.32	q\s0, q\s3
+	.endm
+
+	.align		6
+.Lsha1_rcon:
+	.word		0x5a827999, 0x5a827999, 0x5a827999, 0x5a827999
+	.word		0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1
+	.word		0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc
+	.word		0xca62c1d6, 0xca62c1d6, 0xca62c1d6, 0xca62c1d6
+
+	/*
+	 * void sha1_ce_transform(struct sha1_state *sst, u8 const *src,
+	 *			  int blocks);
+	 */
+ENTRY(sha1_ce_transform)
+	/* load round constants */
+	adr		ip, .Lsha1_rcon
+	vld1.32		{k0-k1}, [ip, :128]!
+	vld1.32		{k2-k3}, [ip, :128]
+
+	/* load state */
+	vld1.32		{dga}, [r0]
+	vldr		dgbs, [r0, #16]
+
+	/* load input */
+0:	vld1.32		{q8-q9}, [r1]!
+	vld1.32		{q10-q11}, [r1]!
+	subs		r2, r2, #1
+
+#ifndef CONFIG_CPU_BIG_ENDIAN
+	vrev32.8	q8, q8
+	vrev32.8	q9, q9
+	vrev32.8	q10, q10
+	vrev32.8	q11, q11
+#endif
+
+	vadd.u32	ta0, q8, k0
+	vmov		dg0, dga
+
+	add_update	c, 0, k0,  8,  9, 10, 11, dgb
+	add_update	c, 1, k0,  9, 10, 11,  8
+	add_update	c, 0, k0, 10, 11,  8,  9
+	add_update	c, 1, k0, 11,  8,  9, 10
+	add_update	c, 0, k1,  8,  9, 10, 11
+
+	add_update	p, 1, k1,  9, 10, 11,  8
+	add_update	p, 0, k1, 10, 11,  8,  9
+	add_update	p, 1, k1, 11,  8,  9, 10
+	add_update	p, 0, k1,  8,  9, 10, 11
+	add_update	p, 1, k2,  9, 10, 11,  8
+
+	add_update	m, 0, k2, 10, 11,  8,  9
+	add_update	m, 1, k2, 11,  8,  9, 10
+	add_update	m, 0, k2,  8,  9, 10, 11
+	add_update	m, 1, k2,  9, 10, 11,  8
+	add_update	m, 0, k3, 10, 11,  8,  9
+
+	add_update	p, 1, k3, 11,  8,  9, 10
+	add_only	p, 0, k3,  9
+	add_only	p, 1, k3, 10
+	add_only	p, 0, k3, 11
+	add_only	p, 1
+
+	/* update state */
+	vadd.u32	dga, dga, dg0
+	vadd.u32	dgb, dgb, dg1a0
+	bne		0b
+
+	/* store new state */
+	vst1.32		{dga}, [r0]
+	vstr		dgbs, [r0, #16]
+	bx		lr
+ENDPROC(sha1_ce_transform)
diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c
new file mode 100644
index 000000000000..80bc2fcd241a
--- /dev/null
+++ b/arch/arm/crypto/sha1-ce-glue.c
@@ -0,0 +1,96 @@
+/*
+ * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <crypto/sha1_base.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+#include "sha1.h"
+
+MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage void sha1_ce_transform(struct sha1_state *sst, u8 const *src,
+				  int blocks);
+
+static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
+			  unsigned int len)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	if (!may_use_simd() ||
+	    (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
+		return sha1_update_arm(desc, data, len);
+
+	kernel_neon_begin();
+	sha1_base_do_update(desc, data, len, sha1_ce_transform);
+	kernel_neon_end();
+
+	return 0;
+}
+
+static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
+			 unsigned int len, u8 *out)
+{
+	if (!may_use_simd())
+		return sha1_finup_arm(desc, data, len, out);
+
+	kernel_neon_begin();
+	if (len)
+		sha1_base_do_update(desc, data, len, sha1_ce_transform);
+	sha1_base_do_finalize(desc, sha1_ce_transform);
+	kernel_neon_end();
+
+	return sha1_base_finish(desc, out);
+}
+
+static int sha1_ce_final(struct shash_desc *desc, u8 *out)
+{
+	return sha1_ce_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg alg = {
+	.init			= sha1_base_init,
+	.update			= sha1_ce_update,
+	.final			= sha1_ce_final,
+	.finup			= sha1_ce_finup,
+	.descsize		= sizeof(struct sha1_state),
+	.digestsize		= SHA1_DIGEST_SIZE,
+	.base			= {
+		.cra_name		= "sha1",
+		.cra_driver_name	= "sha1-ce",
+		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= SHA1_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	}
+};
+
+static int __init sha1_ce_mod_init(void)
+{
+	if (!(elf_hwcap2 & HWCAP2_SHA1))
+		return -ENODEV;
+	return crypto_register_shash(&alg);
+}
+
+static void __exit sha1_ce_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_init(sha1_ce_mod_init);
+module_exit(sha1_ce_mod_fini);
diff --git a/arch/arm/include/asm/crypto/sha1.h b/arch/arm/crypto/sha1.h
index 75e6a417416b..ffd8bd08b1a7 100644
--- a/arch/arm/include/asm/crypto/sha1.h
+++ b/arch/arm/crypto/sha1.h
@@ -7,4 +7,7 @@
 extern int sha1_update_arm(struct shash_desc *desc, const u8 *data,
 			   unsigned int len);
 
+extern int sha1_finup_arm(struct shash_desc *desc, const u8 *data,
+			   unsigned int len, u8 *out);
+
 #endif
diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c
index e31b0440c613..6fc73bf8766d 100644
--- a/arch/arm/crypto/sha1_glue.c
+++ b/arch/arm/crypto/sha1_glue.c
@@ -22,127 +22,47 @@
 #include <linux/cryptohash.h>
 #include <linux/types.h>
 #include <crypto/sha.h>
+#include <crypto/sha1_base.h>
 #include <asm/byteorder.h>
-#include <asm/crypto/sha1.h>
 
+#include "sha1.h"
 
 asmlinkage void sha1_block_data_order(u32 *digest,
 		const unsigned char *data, unsigned int rounds);
 
-
-static int sha1_init(struct shash_desc *desc)
-{
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-
-	*sctx = (struct sha1_state){
-		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
-	};
-
-	return 0;
-}
-
-
-static int __sha1_update(struct sha1_state *sctx, const u8 *data,
-			 unsigned int len, unsigned int partial)
-{
-	unsigned int done = 0;
-
-	sctx->count += len;
-
-	if (partial) {
-		done = SHA1_BLOCK_SIZE - partial;
-		memcpy(sctx->buffer + partial, data, done);
-		sha1_block_data_order(sctx->state, sctx->buffer, 1);
-	}
-
-	if (len - done >= SHA1_BLOCK_SIZE) {
-		const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
-		sha1_block_data_order(sctx->state, data + done, rounds);
-		done += rounds * SHA1_BLOCK_SIZE;
-	}
-
-	memcpy(sctx->buffer, data + done, len - done);
-	return 0;
-}
-
-
 int sha1_update_arm(struct shash_desc *desc, const u8 *data,
 		    unsigned int len)
 {
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-	unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
-	int res;
+	/* make sure casting to sha1_block_fn() is safe */
+	BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0);
 
-	/* Handle the fast case right here */
-	if (partial + len < SHA1_BLOCK_SIZE) {
-		sctx->count += len;
-		memcpy(sctx->buffer + partial, data, len);
-		return 0;
-	}
-	res = __sha1_update(sctx, data, len, partial);
-	return res;
+	return sha1_base_do_update(desc, data, len,
+				   (sha1_block_fn *)sha1_block_data_order);
 }
 EXPORT_SYMBOL_GPL(sha1_update_arm);
 
-
-/* Add padding and return the message digest. */
 static int sha1_final(struct shash_desc *desc, u8 *out)
 {
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-	unsigned int i, index, padlen;
-	__be32 *dst = (__be32 *)out;
-	__be64 bits;
-	static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
-
-	bits = cpu_to_be64(sctx->count << 3);
-
-	/* Pad out to 56 mod 64 and append length */
-	index = sctx->count % SHA1_BLOCK_SIZE;
-	padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
-	/* We need to fill a whole block for __sha1_update() */
-	if (padlen <= 56) {
-		sctx->count += padlen;
-		memcpy(sctx->buffer + index, padding, padlen);
-	} else {
-		__sha1_update(sctx, padding, padlen, index);
-	}
-	__sha1_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
-
-	/* Store state in digest */
-	for (i = 0; i < 5; i++)
-		dst[i] = cpu_to_be32(sctx->state[i]);
-
-	/* Wipe context */
-	memset(sctx, 0, sizeof(*sctx));
-	return 0;
+	sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_block_data_order);
+	return sha1_base_finish(desc, out);
 }
 
-
-static int sha1_export(struct shash_desc *desc, void *out)
+int sha1_finup_arm(struct shash_desc *desc, const u8 *data,
+		   unsigned int len, u8 *out)
 {
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-	memcpy(out, sctx, sizeof(*sctx));
-	return 0;
+	sha1_base_do_update(desc, data, len,
+			    (sha1_block_fn *)sha1_block_data_order);
+	return sha1_final(desc, out);
 }
-
-
-static int sha1_import(struct shash_desc *desc, const void *in)
-{
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-	memcpy(sctx, in, sizeof(*sctx));
-	return 0;
-}
-
+EXPORT_SYMBOL_GPL(sha1_finup_arm);
 
 static struct shash_alg alg = {
 	.digestsize	=	SHA1_DIGEST_SIZE,
-	.init		=	sha1_init,
+	.init		=	sha1_base_init,
 	.update		=	sha1_update_arm,
 	.final		=	sha1_final,
-	.export		=	sha1_export,
-	.import		=	sha1_import,
+	.finup		=	sha1_finup_arm,
 	.descsize	=	sizeof(struct sha1_state),
-	.statesize	=	sizeof(struct sha1_state),
 	.base		=	{
 		.cra_name	=	"sha1",
 		.cra_driver_name=	"sha1-asm",
diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c
index 0b0083757d47..4e22f122f966 100644
--- a/arch/arm/crypto/sha1_neon_glue.c
+++ b/arch/arm/crypto/sha1_neon_glue.c
@@ -25,147 +25,60 @@
 #include <linux/cryptohash.h>
 #include <linux/types.h>
 #include <crypto/sha.h>
-#include <asm/byteorder.h>
+#include <crypto/sha1_base.h>
 #include <asm/neon.h>
 #include <asm/simd.h>
-#include <asm/crypto/sha1.h>
 
+#include "sha1.h"
 
 asmlinkage void sha1_transform_neon(void *state_h, const char *data,
 				    unsigned int rounds);
 
-
-static int sha1_neon_init(struct shash_desc *desc)
-{
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-
-	*sctx = (struct sha1_state){
-		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
-	};
-
-	return 0;
-}
-
-static int __sha1_neon_update(struct shash_desc *desc, const u8 *data,
-			       unsigned int len, unsigned int partial)
-{
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-	unsigned int done = 0;
-
-	sctx->count += len;
-
-	if (partial) {
-		done = SHA1_BLOCK_SIZE - partial;
-		memcpy(sctx->buffer + partial, data, done);
-		sha1_transform_neon(sctx->state, sctx->buffer, 1);
-	}
-
-	if (len - done >= SHA1_BLOCK_SIZE) {
-		const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
-
-		sha1_transform_neon(sctx->state, data + done, rounds);
-		done += rounds * SHA1_BLOCK_SIZE;
-	}
-
-	memcpy(sctx->buffer, data + done, len - done);
-
-	return 0;
-}
-
 static int sha1_neon_update(struct shash_desc *desc, const u8 *data,
-			     unsigned int len)
+			  unsigned int len)
 {
 	struct sha1_state *sctx = shash_desc_ctx(desc);
-	unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
-	int res;
 
-	/* Handle the fast case right here */
-	if (partial + len < SHA1_BLOCK_SIZE) {
-		sctx->count += len;
-		memcpy(sctx->buffer + partial, data, len);
+	if (!may_use_simd() ||
+	    (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
+		return sha1_update_arm(desc, data, len);
 
-		return 0;
-	}
-
-	if (!may_use_simd()) {
-		res = sha1_update_arm(desc, data, len);
-	} else {
-		kernel_neon_begin();
-		res = __sha1_neon_update(desc, data, len, partial);
-		kernel_neon_end();
-	}
-
-	return res;
-}
-
-
-/* Add padding and return the message digest. */
-static int sha1_neon_final(struct shash_desc *desc, u8 *out)
-{
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-	unsigned int i, index, padlen;
-	__be32 *dst = (__be32 *)out;
-	__be64 bits;
-	static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
-
-	bits = cpu_to_be64(sctx->count << 3);
-
-	/* Pad out to 56 mod 64 and append length */
-	index = sctx->count % SHA1_BLOCK_SIZE;
-	padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
-	if (!may_use_simd()) {
-		sha1_update_arm(desc, padding, padlen);
-		sha1_update_arm(desc, (const u8 *)&bits, sizeof(bits));
-	} else {
-		kernel_neon_begin();
-		/* We need to fill a whole block for __sha1_neon_update() */
-		if (padlen <= 56) {
-			sctx->count += padlen;
-			memcpy(sctx->buffer + index, padding, padlen);
-		} else {
-			__sha1_neon_update(desc, padding, padlen, index);
-		}
-		__sha1_neon_update(desc, (const u8 *)&bits, sizeof(bits), 56);
-		kernel_neon_end();
-	}
-
-	/* Store state in digest */
-	for (i = 0; i < 5; i++)
-		dst[i] = cpu_to_be32(sctx->state[i]);
-
-	/* Wipe context */
-	memset(sctx, 0, sizeof(*sctx));
+	kernel_neon_begin();
+	sha1_base_do_update(desc, data, len,
+			    (sha1_block_fn *)sha1_transform_neon);
+	kernel_neon_end();
 
 	return 0;
 }
 
-static int sha1_neon_export(struct shash_desc *desc, void *out)
+static int sha1_neon_finup(struct shash_desc *desc, const u8 *data,
+			   unsigned int len, u8 *out)
 {
-	struct sha1_state *sctx = shash_desc_ctx(desc);
+	if (!may_use_simd())
+		return sha1_finup_arm(desc, data, len, out);
 
-	memcpy(out, sctx, sizeof(*sctx));
+	kernel_neon_begin();
+	if (len)
+		sha1_base_do_update(desc, data, len,
+				    (sha1_block_fn *)sha1_transform_neon);
+	sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_neon);
+	kernel_neon_end();
 
-	return 0;
+	return sha1_base_finish(desc, out);
 }
 
-static int sha1_neon_import(struct shash_desc *desc, const void *in)
+static int sha1_neon_final(struct shash_desc *desc, u8 *out)
 {
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-
-	memcpy(sctx, in, sizeof(*sctx));
-
-	return 0;
+	return sha1_neon_finup(desc, NULL, 0, out);
 }
 
 static struct shash_alg alg = {
 	.digestsize	=	SHA1_DIGEST_SIZE,
-	.init		=	sha1_neon_init,
+	.init		=	sha1_base_init,
 	.update		=	sha1_neon_update,
 	.final		=	sha1_neon_final,
-	.export		=	sha1_neon_export,
-	.import		=	sha1_neon_import,
+	.finup		=	sha1_neon_finup,
 	.descsize	=	sizeof(struct sha1_state),
-	.statesize	=	sizeof(struct sha1_state),
 	.base		=	{
 		.cra_name		= "sha1",
 		.cra_driver_name	= "sha1-neon",
diff --git a/arch/arm/crypto/sha2-ce-core.S b/arch/arm/crypto/sha2-ce-core.S
new file mode 100644
index 000000000000..87ec11a5f405
--- /dev/null
+++ b/arch/arm/crypto/sha2-ce-core.S
@@ -0,0 +1,125 @@
+/*
+ * sha2-ce-core.S - SHA-224/256 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2015 Linaro Ltd.
+ * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.text
+	.fpu		crypto-neon-fp-armv8
+
+	k0		.req	q7
+	k1		.req	q8
+	rk		.req	r3
+
+	ta0		.req	q9
+	ta1		.req	q10
+	tb0		.req	q10
+	tb1		.req	q9
+
+	dga		.req	q11
+	dgb		.req	q12
+
+	dg0		.req	q13
+	dg1		.req	q14
+	dg2		.req	q15
+
+	.macro		add_only, ev, s0
+	vmov		dg2, dg0
+	.ifnb		\s0
+	vld1.32		{k\ev}, [rk, :128]!
+	.endif
+	sha256h.32	dg0, dg1, tb\ev
+	sha256h2.32	dg1, dg2, tb\ev
+	.ifnb		\s0
+	vadd.u32	ta\ev, q\s0, k\ev
+	.endif
+	.endm
+
+	.macro		add_update, ev, s0, s1, s2, s3
+	sha256su0.32	q\s0, q\s1
+	add_only	\ev, \s1
+	sha256su1.32	q\s0, q\s2, q\s3
+	.endm
+
+	.align		6
+.Lsha256_rcon:
+	.word		0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
+	.word		0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
+	.word		0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
+	.word		0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
+	.word		0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
+	.word		0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
+	.word		0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
+	.word		0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
+	.word		0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
+	.word		0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
+	.word		0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
+	.word		0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
+	.word		0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
+	.word		0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
+	.word		0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
+	.word		0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+
+	/*
+	 * void sha2_ce_transform(struct sha256_state *sst, u8 const *src,
+				  int blocks);
+	 */
+ENTRY(sha2_ce_transform)
+	/* load state */
+	vld1.32		{dga-dgb}, [r0]
+
+	/* load input */
+0:	vld1.32		{q0-q1}, [r1]!
+	vld1.32		{q2-q3}, [r1]!
+	subs		r2, r2, #1
+
+#ifndef CONFIG_CPU_BIG_ENDIAN
+	vrev32.8	q0, q0
+	vrev32.8	q1, q1
+	vrev32.8	q2, q2
+	vrev32.8	q3, q3
+#endif
+
+	/* load first round constant */
+	adr		rk, .Lsha256_rcon
+	vld1.32		{k0}, [rk, :128]!
+
+	vadd.u32	ta0, q0, k0
+	vmov		dg0, dga
+	vmov		dg1, dgb
+
+	add_update	1, 0, 1, 2, 3
+	add_update	0, 1, 2, 3, 0
+	add_update	1, 2, 3, 0, 1
+	add_update	0, 3, 0, 1, 2
+	add_update	1, 0, 1, 2, 3
+	add_update	0, 1, 2, 3, 0
+	add_update	1, 2, 3, 0, 1
+	add_update	0, 3, 0, 1, 2
+	add_update	1, 0, 1, 2, 3
+	add_update	0, 1, 2, 3, 0
+	add_update	1, 2, 3, 0, 1
+	add_update	0, 3, 0, 1, 2
+
+	add_only	1, 1
+	add_only	0, 2
+	add_only	1, 3
+	add_only	0
+
+	/* update state */
+	vadd.u32	dga, dga, dg0
+	vadd.u32	dgb, dgb, dg1
+	bne		0b
+
+	/* store new state */
+	vst1.32		{dga-dgb}, [r0]
+	bx		lr
+ENDPROC(sha2_ce_transform)
diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c
new file mode 100644
index 000000000000..0755b2d657f3
--- /dev/null
+++ b/arch/arm/crypto/sha2-ce-glue.c
@@ -0,0 +1,114 @@
+/*
+ * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <crypto/sha256_base.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+#include <asm/hwcap.h>
+#include <asm/simd.h>
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+
+#include "sha256_glue.h"
+
+MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage void sha2_ce_transform(struct sha256_state *sst, u8 const *src,
+				  int blocks);
+
+static int sha2_ce_update(struct shash_desc *desc, const u8 *data,
+			  unsigned int len)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	if (!may_use_simd() ||
+	    (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
+		return crypto_sha256_arm_update(desc, data, len);
+
+	kernel_neon_begin();
+	sha256_base_do_update(desc, data, len,
+			      (sha256_block_fn *)sha2_ce_transform);
+	kernel_neon_end();
+
+	return 0;
+}
+
+static int sha2_ce_finup(struct shash_desc *desc, const u8 *data,
+			 unsigned int len, u8 *out)
+{
+	if (!may_use_simd())
+		return crypto_sha256_arm_finup(desc, data, len, out);
+
+	kernel_neon_begin();
+	if (len)
+		sha256_base_do_update(desc, data, len,
+				      (sha256_block_fn *)sha2_ce_transform);
+	sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
+	kernel_neon_end();
+
+	return sha256_base_finish(desc, out);
+}
+
+static int sha2_ce_final(struct shash_desc *desc, u8 *out)
+{
+	return sha2_ce_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg algs[] = { {
+	.init			= sha224_base_init,
+	.update			= sha2_ce_update,
+	.final			= sha2_ce_final,
+	.finup			= sha2_ce_finup,
+	.descsize		= sizeof(struct sha256_state),
+	.digestsize		= SHA224_DIGEST_SIZE,
+	.base			= {
+		.cra_name		= "sha224",
+		.cra_driver_name	= "sha224-ce",
+		.cra_priority		= 300,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= SHA256_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	}
+}, {
+	.init			= sha256_base_init,
+	.update			= sha2_ce_update,
+	.final			= sha2_ce_final,
+	.finup			= sha2_ce_finup,
+	.descsize		= sizeof(struct sha256_state),
+	.digestsize		= SHA256_DIGEST_SIZE,
+	.base			= {
+		.cra_name		= "sha256",
+		.cra_driver_name	= "sha256-ce",
+		.cra_priority		= 300,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= SHA256_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	}
+} };
+
+static int __init sha2_ce_mod_init(void)
+{
+	if (!(elf_hwcap2 & HWCAP2_SHA2))
+		return -ENODEV;
+	return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit sha2_ce_mod_fini(void)
+{
+	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_init(sha2_ce_mod_init);
+module_exit(sha2_ce_mod_fini);
diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl
new file mode 100644
index 000000000000..fac0533ea633
--- /dev/null
+++ b/arch/arm/crypto/sha256-armv4.pl
@@ -0,0 +1,716 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+#
+# Permission to use under GPL terms is granted.
+# ====================================================================
+
+# SHA256 block procedure for ARMv4. May 2007.
+
+# Performance is ~2x better than gcc 3.4 generated code and in "abso-
+# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
+# byte [on single-issue Xscale PXA250 core].
+
+# July 2010.
+#
+# Rescheduling for dual-issue pipeline resulted in 22% improvement on
+# Cortex A8 core and ~20 cycles per processed byte.
+
+# February 2011.
+#
+# Profiler-assisted and platform-specific optimization resulted in 16%
+# improvement on Cortex A8 core and ~15.4 cycles per processed byte.
+
+# September 2013.
+#
+# Add NEON implementation. On Cortex A8 it was measured to process one
+# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
+# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
+# code (meaning that latter performs sub-optimally, nothing was done
+# about it).
+
+# May 2014.
+#
+# Add ARMv8 code path performing at 2.0 cpb on Apple A7.
+
+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+$ctx="r0";	$t0="r0";
+$inp="r1";	$t4="r1";
+$len="r2";	$t1="r2";
+$T1="r3";	$t3="r3";
+$A="r4";
+$B="r5";
+$C="r6";
+$D="r7";
+$E="r8";
+$F="r9";
+$G="r10";
+$H="r11";
+@V=($A,$B,$C,$D,$E,$F,$G,$H);
+$t2="r12";
+$Ktbl="r14";
+
+@Sigma0=( 2,13,22);
+@Sigma1=( 6,11,25);
+@sigma0=( 7,18, 3);
+@sigma1=(17,19,10);
+
+sub BODY_00_15 {
+my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
+
+$code.=<<___ if ($i<16);
+#if __ARM_ARCH__>=7
+	@ ldr	$t1,[$inp],#4			@ $i
+# if $i==15
+	str	$inp,[sp,#17*4]			@ make room for $t4
+# endif
+	eor	$t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
+	add	$a,$a,$t2			@ h+=Maj(a,b,c) from the past
+	eor	$t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]`	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	$t1,$t1
+# endif
+#else
+	@ ldrb	$t1,[$inp,#3]			@ $i
+	add	$a,$a,$t2			@ h+=Maj(a,b,c) from the past
+	ldrb	$t2,[$inp,#2]
+	ldrb	$t0,[$inp,#1]
+	orr	$t1,$t1,$t2,lsl#8
+	ldrb	$t2,[$inp],#4
+	orr	$t1,$t1,$t0,lsl#16
+# if $i==15
+	str	$inp,[sp,#17*4]			@ make room for $t4
+# endif
+	eor	$t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
+	orr	$t1,$t1,$t2,lsl#24
+	eor	$t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]`	@ Sigma1(e)
+#endif
+___
+$code.=<<___;
+	ldr	$t2,[$Ktbl],#4			@ *K256++
+	add	$h,$h,$t1			@ h+=X[i]
+	str	$t1,[sp,#`$i%16`*4]
+	eor	$t1,$f,$g
+	add	$h,$h,$t0,ror#$Sigma1[0]	@ h+=Sigma1(e)
+	and	$t1,$t1,$e
+	add	$h,$h,$t2			@ h+=K256[i]
+	eor	$t1,$t1,$g			@ Ch(e,f,g)
+	eor	$t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]`
+	add	$h,$h,$t1			@ h+=Ch(e,f,g)
+#if $i==31
+	and	$t2,$t2,#0xff
+	cmp	$t2,#0xf2			@ done?
+#endif
+#if $i<15
+# if __ARM_ARCH__>=7
+	ldr	$t1,[$inp],#4			@ prefetch
+# else
+	ldrb	$t1,[$inp,#3]
+# endif
+	eor	$t2,$a,$b			@ a^b, b^c in next round
+#else
+	ldr	$t1,[sp,#`($i+2)%16`*4]		@ from future BODY_16_xx
+	eor	$t2,$a,$b			@ a^b, b^c in next round
+	ldr	$t4,[sp,#`($i+15)%16`*4]	@ from future BODY_16_xx
+#endif
+	eor	$t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]`	@ Sigma0(a)
+	and	$t3,$t3,$t2			@ (b^c)&=(a^b)
+	add	$d,$d,$h			@ d+=h
+	eor	$t3,$t3,$b			@ Maj(a,b,c)
+	add	$h,$h,$t0,ror#$Sigma0[0]	@ h+=Sigma0(a)
+	@ add	$h,$h,$t3			@ h+=Maj(a,b,c)
+___
+	($t2,$t3)=($t3,$t2);
+}
+
+sub BODY_16_XX {
+my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
+
+$code.=<<___;
+	@ ldr	$t1,[sp,#`($i+1)%16`*4]		@ $i
+	@ ldr	$t4,[sp,#`($i+14)%16`*4]
+	mov	$t0,$t1,ror#$sigma0[0]
+	add	$a,$a,$t2			@ h+=Maj(a,b,c) from the past
+	mov	$t2,$t4,ror#$sigma1[0]
+	eor	$t0,$t0,$t1,ror#$sigma0[1]
+	eor	$t2,$t2,$t4,ror#$sigma1[1]
+	eor	$t0,$t0,$t1,lsr#$sigma0[2]	@ sigma0(X[i+1])
+	ldr	$t1,[sp,#`($i+0)%16`*4]
+	eor	$t2,$t2,$t4,lsr#$sigma1[2]	@ sigma1(X[i+14])
+	ldr	$t4,[sp,#`($i+9)%16`*4]
+
+	add	$t2,$t2,$t0
+	eor	$t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`	@ from BODY_00_15
+	add	$t1,$t1,$t2
+	eor	$t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]`	@ Sigma1(e)
+	add	$t1,$t1,$t4			@ X[i]
+___
+	&BODY_00_15(@_);
+}
+
+$code=<<___;
+#ifndef __KERNEL__
+# include "arm_arch.h"
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
+#endif
+
+.text
+#if __ARM_ARCH__<7
+.code	32
+#else
+.syntax unified
+# ifdef __thumb2__
+#  define adrl adr
+.thumb
+# else
+.code   32
+# endif
+#endif
+
+.type	K256,%object
+.align	5
+K256:
+.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.size	K256,.-K256
+.word	0				@ terminator
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.LOPENSSL_armcap:
+.word	OPENSSL_armcap_P-sha256_block_data_order
+#endif
+.align	5
+
+.global	sha256_block_data_order
+.type	sha256_block_data_order,%function
+sha256_block_data_order:
+#if __ARM_ARCH__<7
+	sub	r3,pc,#8		@ sha256_block_data_order
+#else
+	adr	r3,sha256_block_data_order
+#endif
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+	ldr	r12,.LOPENSSL_armcap
+	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
+	tst	r12,#ARMV8_SHA256
+	bne	.LARMv8
+	tst	r12,#ARMV7_NEON
+	bne	.LNEON
+#endif
+	add	$len,$inp,$len,lsl#6	@ len to point at the end of inp
+	stmdb	sp!,{$ctx,$inp,$len,r4-r11,lr}
+	ldmia	$ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
+	sub	$Ktbl,r3,#256+32	@ K256
+	sub	sp,sp,#16*4		@ alloca(X[16])
+.Loop:
+# if __ARM_ARCH__>=7
+	ldr	$t1,[$inp],#4
+# else
+	ldrb	$t1,[$inp,#3]
+# endif
+	eor	$t3,$B,$C		@ magic
+	eor	$t2,$t2,$t2
+___
+for($i=0;$i<16;$i++)	{ &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
+$code.=".Lrounds_16_xx:\n";
+for (;$i<32;$i++)	{ &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+#if __ARM_ARCH__>=7
+	ite	eq			@ Thumb2 thing, sanity check in ARM
+#endif
+	ldreq	$t3,[sp,#16*4]		@ pull ctx
+	bne	.Lrounds_16_xx
+
+	add	$A,$A,$t2		@ h+=Maj(a,b,c) from the past
+	ldr	$t0,[$t3,#0]
+	ldr	$t1,[$t3,#4]
+	ldr	$t2,[$t3,#8]
+	add	$A,$A,$t0
+	ldr	$t0,[$t3,#12]
+	add	$B,$B,$t1
+	ldr	$t1,[$t3,#16]
+	add	$C,$C,$t2
+	ldr	$t2,[$t3,#20]
+	add	$D,$D,$t0
+	ldr	$t0,[$t3,#24]
+	add	$E,$E,$t1
+	ldr	$t1,[$t3,#28]
+	add	$F,$F,$t2
+	ldr	$inp,[sp,#17*4]		@ pull inp
+	ldr	$t2,[sp,#18*4]		@ pull inp+len
+	add	$G,$G,$t0
+	add	$H,$H,$t1
+	stmia	$t3,{$A,$B,$C,$D,$E,$F,$G,$H}
+	cmp	$inp,$t2
+	sub	$Ktbl,$Ktbl,#256	@ rewind Ktbl
+	bne	.Loop
+
+	add	sp,sp,#`16+3`*4	@ destroy frame
+#if __ARM_ARCH__>=5
+	ldmia	sp!,{r4-r11,pc}
+#else
+	ldmia	sp!,{r4-r11,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	bx	lr			@ interoperable with Thumb ISA:-)
+#endif
+.size	sha256_block_data_order,.-sha256_block_data_order
+___
+######################################################################
+# NEON stuff
+#
+{{{
+my @X=map("q$_",(0..3));
+my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25");
+my $Xfer=$t4;
+my $j=0;
+
+sub Dlo()   { shift=~m|q([1]?[0-9])|?"d".($1*2):"";     }
+sub Dhi()   { shift=~m|q([1]?[0-9])|?"d".($1*2+1):"";   }
+
+sub AUTOLOAD()          # thunk [simplified] x86-style perlasm
+{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
+  my $arg = pop;
+    $arg = "#$arg" if ($arg*1 eq $arg);
+    $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
+}
+
+sub Xupdate()
+{ use integer;
+  my $body = shift;
+  my @insns = (&$body,&$body,&$body,&$body);
+  my ($a,$b,$c,$d,$e,$f,$g,$h);
+
+	&vext_8		($T0,@X[0],@X[1],4);	# X[1..4]
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vext_8		($T1,@X[2],@X[3],4);	# X[9..12]
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vshr_u32	($T2,$T0,$sigma0[0]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vadd_i32	(@X[0],@X[0],$T1);	# X[0..3] += X[9..12]
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vshr_u32	($T1,$T0,$sigma0[2]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vsli_32	($T2,$T0,32-$sigma0[0]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vshr_u32	($T3,$T0,$sigma0[1]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&veor		($T1,$T1,$T2);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vsli_32	($T3,$T0,32-$sigma0[1]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &vshr_u32	($T4,&Dhi(@X[3]),$sigma1[0]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&veor		($T1,$T1,$T3);		# sigma0(X[1..4])
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &vsli_32	($T4,&Dhi(@X[3]),32-$sigma1[0]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &vshr_u32	($T5,&Dhi(@X[3]),$sigma1[2]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vadd_i32	(@X[0],@X[0],$T1);	# X[0..3] += sigma0(X[1..4])
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &veor		($T5,$T5,$T4);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &vshr_u32	($T4,&Dhi(@X[3]),$sigma1[1]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &vsli_32	($T4,&Dhi(@X[3]),32-$sigma1[1]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &veor		($T5,$T5,$T4);		# sigma1(X[14..15])
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vadd_i32	(&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15])
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &vshr_u32	($T4,&Dlo(@X[0]),$sigma1[0]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &vsli_32	($T4,&Dlo(@X[0]),32-$sigma1[0]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &vshr_u32	($T5,&Dlo(@X[0]),$sigma1[2]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &veor		($T5,$T5,$T4);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &vshr_u32	($T4,&Dlo(@X[0]),$sigma1[1]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vld1_32	("{$T0}","[$Ktbl,:128]!");
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &vsli_32	($T4,&Dlo(@X[0]),32-$sigma1[1]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &veor		($T5,$T5,$T4);		# sigma1(X[16..17])
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vadd_i32	(&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17])
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vadd_i32	($T0,$T0,@X[0]);
+	 while($#insns>=2) { eval(shift(@insns)); }
+	&vst1_32	("{$T0}","[$Xfer,:128]!");
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+
+	push(@X,shift(@X));		# "rotate" X[]
+}
+
+sub Xpreload()
+{ use integer;
+  my $body = shift;
+  my @insns = (&$body,&$body,&$body,&$body);
+  my ($a,$b,$c,$d,$e,$f,$g,$h);
+
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vld1_32	("{$T0}","[$Ktbl,:128]!");
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vrev32_8	(@X[0],@X[0]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vadd_i32	($T0,$T0,@X[0]);
+	 foreach (@insns) { eval; }	# remaining instructions
+	&vst1_32	("{$T0}","[$Xfer,:128]!");
+
+	push(@X,shift(@X));		# "rotate" X[]
+}
+
+sub body_00_15 () {
+	(
+	'($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.
+	'&add	($h,$h,$t1)',			# h+=X[i]+K[i]
+	'&eor	($t1,$f,$g)',
+	'&eor	($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))',
+	'&add	($a,$a,$t2)',			# h+=Maj(a,b,c) from the past
+	'&and	($t1,$t1,$e)',
+	'&eor	($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))',	# Sigma1(e)
+	'&eor	($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))',
+	'&eor	($t1,$t1,$g)',			# Ch(e,f,g)
+	'&add	($h,$h,$t2,"ror#$Sigma1[0]")',	# h+=Sigma1(e)
+	'&eor	($t2,$a,$b)',			# a^b, b^c in next round
+	'&eor	($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))',	# Sigma0(a)
+	'&add	($h,$h,$t1)',			# h+=Ch(e,f,g)
+	'&ldr	($t1,sprintf "[sp,#%d]",4*(($j+1)&15))	if (($j&15)!=15);'.
+	'&ldr	($t1,"[$Ktbl]")				if ($j==15);'.
+	'&ldr	($t1,"[sp,#64]")			if ($j==31)',
+	'&and	($t3,$t3,$t2)',			# (b^c)&=(a^b)
+	'&add	($d,$d,$h)',			# d+=h
+	'&add	($h,$h,$t0,"ror#$Sigma0[0]");'.	# h+=Sigma0(a)
+	'&eor	($t3,$t3,$b)',			# Maj(a,b,c)
+	'$j++;	unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);'
+	)
+}
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
+.arch	armv7-a
+.fpu	neon
+
+.global	sha256_block_data_order_neon
+.type	sha256_block_data_order_neon,%function
+.align	4
+sha256_block_data_order_neon:
+.LNEON:
+	stmdb	sp!,{r4-r12,lr}
+
+	sub	$H,sp,#16*4+16
+	adrl	$Ktbl,K256
+	bic	$H,$H,#15		@ align for 128-bit stores
+	mov	$t2,sp
+	mov	sp,$H			@ alloca
+	add	$len,$inp,$len,lsl#6	@ len to point at the end of inp
+
+	vld1.8		{@X[0]},[$inp]!
+	vld1.8		{@X[1]},[$inp]!
+	vld1.8		{@X[2]},[$inp]!
+	vld1.8		{@X[3]},[$inp]!
+	vld1.32		{$T0},[$Ktbl,:128]!
+	vld1.32		{$T1},[$Ktbl,:128]!
+	vld1.32		{$T2},[$Ktbl,:128]!
+	vld1.32		{$T3},[$Ktbl,:128]!
+	vrev32.8	@X[0],@X[0]		@ yes, even on
+	str		$ctx,[sp,#64]
+	vrev32.8	@X[1],@X[1]		@ big-endian
+	str		$inp,[sp,#68]
+	mov		$Xfer,sp
+	vrev32.8	@X[2],@X[2]
+	str		$len,[sp,#72]
+	vrev32.8	@X[3],@X[3]
+	str		$t2,[sp,#76]		@ save original sp
+	vadd.i32	$T0,$T0,@X[0]
+	vadd.i32	$T1,$T1,@X[1]
+	vst1.32		{$T0},[$Xfer,:128]!
+	vadd.i32	$T2,$T2,@X[2]
+	vst1.32		{$T1},[$Xfer,:128]!
+	vadd.i32	$T3,$T3,@X[3]
+	vst1.32		{$T2},[$Xfer,:128]!
+	vst1.32		{$T3},[$Xfer,:128]!
+
+	ldmia		$ctx,{$A-$H}
+	sub		$Xfer,$Xfer,#64
+	ldr		$t1,[sp,#0]
+	eor		$t2,$t2,$t2
+	eor		$t3,$B,$C
+	b		.L_00_48
+
+.align	4
+.L_00_48:
+___
+	&Xupdate(\&body_00_15);
+	&Xupdate(\&body_00_15);
+	&Xupdate(\&body_00_15);
+	&Xupdate(\&body_00_15);
+$code.=<<___;
+	teq	$t1,#0				@ check for K256 terminator
+	ldr	$t1,[sp,#0]
+	sub	$Xfer,$Xfer,#64
+	bne	.L_00_48
+
+	ldr		$inp,[sp,#68]
+	ldr		$t0,[sp,#72]
+	sub		$Ktbl,$Ktbl,#256	@ rewind $Ktbl
+	teq		$inp,$t0
+	it		eq
+	subeq		$inp,$inp,#64		@ avoid SEGV
+	vld1.8		{@X[0]},[$inp]!		@ load next input block
+	vld1.8		{@X[1]},[$inp]!
+	vld1.8		{@X[2]},[$inp]!
+	vld1.8		{@X[3]},[$inp]!
+	it		ne
+	strne		$inp,[sp,#68]
+	mov		$Xfer,sp
+___
+	&Xpreload(\&body_00_15);
+	&Xpreload(\&body_00_15);
+	&Xpreload(\&body_00_15);
+	&Xpreload(\&body_00_15);
+$code.=<<___;
+	ldr	$t0,[$t1,#0]
+	add	$A,$A,$t2			@ h+=Maj(a,b,c) from the past
+	ldr	$t2,[$t1,#4]
+	ldr	$t3,[$t1,#8]
+	ldr	$t4,[$t1,#12]
+	add	$A,$A,$t0			@ accumulate
+	ldr	$t0,[$t1,#16]
+	add	$B,$B,$t2
+	ldr	$t2,[$t1,#20]
+	add	$C,$C,$t3
+	ldr	$t3,[$t1,#24]
+	add	$D,$D,$t4
+	ldr	$t4,[$t1,#28]
+	add	$E,$E,$t0
+	str	$A,[$t1],#4
+	add	$F,$F,$t2
+	str	$B,[$t1],#4
+	add	$G,$G,$t3
+	str	$C,[$t1],#4
+	add	$H,$H,$t4
+	str	$D,[$t1],#4
+	stmia	$t1,{$E-$H}
+
+	ittte	ne
+	movne	$Xfer,sp
+	ldrne	$t1,[sp,#0]
+	eorne	$t2,$t2,$t2
+	ldreq	sp,[sp,#76]			@ restore original sp
+	itt	ne
+	eorne	$t3,$B,$C
+	bne	.L_00_48
+
+	ldmia	sp!,{r4-r12,pc}
+.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
+#endif
+___
+}}}
+######################################################################
+# ARMv8 stuff
+#
+{{{
+my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2));
+my @MSG=map("q$_",(8..11));
+my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
+my $Ktbl="r3";
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+
+# ifdef __thumb2__
+#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
+# else
+#  define INST(a,b,c,d)	.byte	a,b,c,d
+# endif
+
+.type	sha256_block_data_order_armv8,%function
+.align	5
+sha256_block_data_order_armv8:
+.LARMv8:
+	vld1.32	{$ABCD,$EFGH},[$ctx]
+# ifdef __thumb2__
+	adr	$Ktbl,.LARMv8
+	sub	$Ktbl,$Ktbl,#.LARMv8-K256
+# else
+	adrl	$Ktbl,K256
+# endif
+	add	$len,$inp,$len,lsl#6	@ len to point at the end of inp
+
+.Loop_v8:
+	vld1.8		{@MSG[0]-@MSG[1]},[$inp]!
+	vld1.8		{@MSG[2]-@MSG[3]},[$inp]!
+	vld1.32		{$W0},[$Ktbl]!
+	vrev32.8	@MSG[0],@MSG[0]
+	vrev32.8	@MSG[1],@MSG[1]
+	vrev32.8	@MSG[2],@MSG[2]
+	vrev32.8	@MSG[3],@MSG[3]
+	vmov		$ABCD_SAVE,$ABCD	@ offload
+	vmov		$EFGH_SAVE,$EFGH
+	teq		$inp,$len
+___
+for($i=0;$i<12;$i++) {
+$code.=<<___;
+	vld1.32		{$W1},[$Ktbl]!
+	vadd.i32	$W0,$W0,@MSG[0]
+	sha256su0	@MSG[0],@MSG[1]
+	vmov		$abcd,$ABCD
+	sha256h		$ABCD,$EFGH,$W0
+	sha256h2	$EFGH,$abcd,$W0
+	sha256su1	@MSG[0],@MSG[2],@MSG[3]
+___
+	($W0,$W1)=($W1,$W0);	push(@MSG,shift(@MSG));
+}
+$code.=<<___;
+	vld1.32		{$W1},[$Ktbl]!
+	vadd.i32	$W0,$W0,@MSG[0]
+	vmov		$abcd,$ABCD
+	sha256h		$ABCD,$EFGH,$W0
+	sha256h2	$EFGH,$abcd,$W0
+
+	vld1.32		{$W0},[$Ktbl]!
+	vadd.i32	$W1,$W1,@MSG[1]
+	vmov		$abcd,$ABCD
+	sha256h		$ABCD,$EFGH,$W1
+	sha256h2	$EFGH,$abcd,$W1
+
+	vld1.32		{$W1},[$Ktbl]
+	vadd.i32	$W0,$W0,@MSG[2]
+	sub		$Ktbl,$Ktbl,#256-16	@ rewind
+	vmov		$abcd,$ABCD
+	sha256h		$ABCD,$EFGH,$W0
+	sha256h2	$EFGH,$abcd,$W0
+
+	vadd.i32	$W1,$W1,@MSG[3]
+	vmov		$abcd,$ABCD
+	sha256h		$ABCD,$EFGH,$W1
+	sha256h2	$EFGH,$abcd,$W1
+
+	vadd.i32	$ABCD,$ABCD,$ABCD_SAVE
+	vadd.i32	$EFGH,$EFGH,$EFGH_SAVE
+	it		ne
+	bne		.Loop_v8
+
+	vst1.32		{$ABCD,$EFGH},[$ctx]
+
+	ret		@ bx lr
+.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
+#endif
+___
+}}}
+$code.=<<___;
+.asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
+.align	2
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.comm   OPENSSL_armcap_P,4,4
+#endif
+___
+
+open SELF,$0;
+while(<SELF>) {
+	next if (/^#!/);
+	last if (!s/^#/@/ and !/^$/);
+	print;
+}
+close SELF;
+
+{   my  %opcode = (
+	"sha256h"	=> 0xf3000c40,	"sha256h2"	=> 0xf3100c40,
+	"sha256su0"	=> 0xf3ba03c0,	"sha256su1"	=> 0xf3200c40	);
+
+    sub unsha256 {
+	my ($mnemonic,$arg)=@_;
+
+	if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
+	    my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
+					 |(($2&7)<<17)|(($2&8)<<4)
+					 |(($3&7)<<1) |(($3&8)<<2);
+	    # since ARMv7 instructions are always encoded little-endian.
+	    # correct solution is to use .inst directive, but older
+	    # assemblers don't implement it:-(
+	    sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
+			$word&0xff,($word>>8)&0xff,
+			($word>>16)&0xff,($word>>24)&0xff,
+			$mnemonic,$arg;
+	}
+    }
+}
+
+foreach (split($/,$code)) {
+
+	s/\`([^\`]*)\`/eval $1/geo;
+
+	s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo;
+
+	s/\bret\b/bx	lr/go		or
+	s/\bbx\s+lr\b/.word\t0xe12fff1e/go;	# make it possible to compile with -march=armv4
+
+	print $_,"\n";
+}
+
+close STDOUT; # enforce flush
diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped
new file mode 100644
index 000000000000..555a1a8eec90
--- /dev/null
+++ b/arch/arm/crypto/sha256-core.S_shipped
@@ -0,0 +1,2808 @@
+
+@ ====================================================================
+@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+@ project. The module is, however, dual licensed under OpenSSL and
+@ CRYPTOGAMS licenses depending on where you obtain it. For further
+@ details see http://www.openssl.org/~appro/cryptogams/.
+@
+@ Permission to use under GPL terms is granted.
+@ ====================================================================
+
+@ SHA256 block procedure for ARMv4. May 2007.
+
+@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
+@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
+@ byte [on single-issue Xscale PXA250 core].
+
+@ July 2010.
+@
+@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
+@ Cortex A8 core and ~20 cycles per processed byte.
+
+@ February 2011.
+@
+@ Profiler-assisted and platform-specific optimization resulted in 16%
+@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
+
+@ September 2013.
+@
+@ Add NEON implementation. On Cortex A8 it was measured to process one
+@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
+@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
+@ code (meaning that latter performs sub-optimally, nothing was done
+@ about it).
+
+@ May 2014.
+@
+@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
+
+#ifndef __KERNEL__
+# include "arm_arch.h"
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
+#endif
+
+.text
+#if __ARM_ARCH__<7
+.code	32
+#else
+.syntax unified
+# ifdef __thumb2__
+#  define adrl adr
+.thumb
+# else
+.code   32
+# endif
+#endif
+
+.type	K256,%object
+.align	5
+K256:
+.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.size	K256,.-K256
+.word	0				@ terminator
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.LOPENSSL_armcap:
+.word	OPENSSL_armcap_P-sha256_block_data_order
+#endif
+.align	5
+
+.global	sha256_block_data_order
+.type	sha256_block_data_order,%function
+sha256_block_data_order:
+#if __ARM_ARCH__<7
+	sub	r3,pc,#8		@ sha256_block_data_order
+#else
+	adr	r3,sha256_block_data_order
+#endif
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+	ldr	r12,.LOPENSSL_armcap
+	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
+	tst	r12,#ARMV8_SHA256
+	bne	.LARMv8
+	tst	r12,#ARMV7_NEON
+	bne	.LNEON
+#endif
+	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
+	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
+	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
+	sub	r14,r3,#256+32	@ K256
+	sub	sp,sp,#16*4		@ alloca(X[16])
+.Loop:
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r5,r6		@ magic
+	eor	r12,r12,r12
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 0
+# if 0==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r8,r8,ror#5
+	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r8,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 0
+	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
+	ldrb	r12,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r12,lsl#8
+	ldrb	r12,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 0==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r8,r8,ror#5
+	orr	r2,r2,r12,lsl#24
+	eor	r0,r0,r8,ror#19	@ Sigma1(e)
+#endif
+	ldr	r12,[r14],#4			@ *K256++
+	add	r11,r11,r2			@ h+=X[i]
+	str	r2,[sp,#0*4]
+	eor	r2,r9,r10
+	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r8
+	add	r11,r11,r12			@ h+=K256[i]
+	eor	r2,r2,r10			@ Ch(e,f,g)
+	eor	r0,r4,r4,ror#11
+	add	r11,r11,r2			@ h+=Ch(e,f,g)
+#if 0==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 0<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r4,r5			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
+	eor	r12,r4,r5			@ a^b, b^c in next round
+	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r4,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r7,r7,r11			@ d+=h
+	eor	r3,r3,r5			@ Maj(a,b,c)
+	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 1
+# if 1==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r7,r7,ror#5
+	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r7,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 1
+	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
+	ldrb	r3,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r3,lsl#8
+	ldrb	r3,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 1==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r7,r7,ror#5
+	orr	r2,r2,r3,lsl#24
+	eor	r0,r0,r7,ror#19	@ Sigma1(e)
+#endif
+	ldr	r3,[r14],#4			@ *K256++
+	add	r10,r10,r2			@ h+=X[i]
+	str	r2,[sp,#1*4]
+	eor	r2,r8,r9
+	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r7
+	add	r10,r10,r3			@ h+=K256[i]
+	eor	r2,r2,r9			@ Ch(e,f,g)
+	eor	r0,r11,r11,ror#11
+	add	r10,r10,r2			@ h+=Ch(e,f,g)
+#if 1==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 1<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r11,r4			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
+	eor	r3,r11,r4			@ a^b, b^c in next round
+	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r11,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r6,r6,r10			@ d+=h
+	eor	r12,r12,r4			@ Maj(a,b,c)
+	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 2
+# if 2==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r6,r6,ror#5
+	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r6,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 2
+	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
+	ldrb	r12,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r12,lsl#8
+	ldrb	r12,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 2==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r6,r6,ror#5
+	orr	r2,r2,r12,lsl#24
+	eor	r0,r0,r6,ror#19	@ Sigma1(e)
+#endif
+	ldr	r12,[r14],#4			@ *K256++
+	add	r9,r9,r2			@ h+=X[i]
+	str	r2,[sp,#2*4]
+	eor	r2,r7,r8
+	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r6
+	add	r9,r9,r12			@ h+=K256[i]
+	eor	r2,r2,r8			@ Ch(e,f,g)
+	eor	r0,r10,r10,ror#11
+	add	r9,r9,r2			@ h+=Ch(e,f,g)
+#if 2==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 2<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r10,r11			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
+	eor	r12,r10,r11			@ a^b, b^c in next round
+	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r10,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r5,r5,r9			@ d+=h
+	eor	r3,r3,r11			@ Maj(a,b,c)
+	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 3
+# if 3==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r5,r5,ror#5
+	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r5,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 3
+	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
+	ldrb	r3,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r3,lsl#8
+	ldrb	r3,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 3==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r5,r5,ror#5
+	orr	r2,r2,r3,lsl#24
+	eor	r0,r0,r5,ror#19	@ Sigma1(e)
+#endif
+	ldr	r3,[r14],#4			@ *K256++
+	add	r8,r8,r2			@ h+=X[i]
+	str	r2,[sp,#3*4]
+	eor	r2,r6,r7
+	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r5
+	add	r8,r8,r3			@ h+=K256[i]
+	eor	r2,r2,r7			@ Ch(e,f,g)
+	eor	r0,r9,r9,ror#11
+	add	r8,r8,r2			@ h+=Ch(e,f,g)
+#if 3==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 3<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r9,r10			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
+	eor	r3,r9,r10			@ a^b, b^c in next round
+	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r9,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r4,r4,r8			@ d+=h
+	eor	r12,r12,r10			@ Maj(a,b,c)
+	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 4
+# if 4==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r4,r4,ror#5
+	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r4,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 4
+	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
+	ldrb	r12,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r12,lsl#8
+	ldrb	r12,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 4==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r4,r4,ror#5
+	orr	r2,r2,r12,lsl#24
+	eor	r0,r0,r4,ror#19	@ Sigma1(e)
+#endif
+	ldr	r12,[r14],#4			@ *K256++
+	add	r7,r7,r2			@ h+=X[i]
+	str	r2,[sp,#4*4]
+	eor	r2,r5,r6
+	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r4
+	add	r7,r7,r12			@ h+=K256[i]
+	eor	r2,r2,r6			@ Ch(e,f,g)
+	eor	r0,r8,r8,ror#11
+	add	r7,r7,r2			@ h+=Ch(e,f,g)
+#if 4==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 4<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r8,r9			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
+	eor	r12,r8,r9			@ a^b, b^c in next round
+	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r8,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r11,r11,r7			@ d+=h
+	eor	r3,r3,r9			@ Maj(a,b,c)
+	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 5
+# if 5==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r11,r11,ror#5
+	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r11,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 5
+	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
+	ldrb	r3,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r3,lsl#8
+	ldrb	r3,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 5==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r11,r11,ror#5
+	orr	r2,r2,r3,lsl#24
+	eor	r0,r0,r11,ror#19	@ Sigma1(e)
+#endif
+	ldr	r3,[r14],#4			@ *K256++
+	add	r6,r6,r2			@ h+=X[i]
+	str	r2,[sp,#5*4]
+	eor	r2,r4,r5
+	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r11
+	add	r6,r6,r3			@ h+=K256[i]
+	eor	r2,r2,r5			@ Ch(e,f,g)
+	eor	r0,r7,r7,ror#11
+	add	r6,r6,r2			@ h+=Ch(e,f,g)
+#if 5==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 5<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r7,r8			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
+	eor	r3,r7,r8			@ a^b, b^c in next round
+	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r7,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r10,r10,r6			@ d+=h
+	eor	r12,r12,r8			@ Maj(a,b,c)
+	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 6
+# if 6==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r10,r10,ror#5
+	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r10,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 6
+	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
+	ldrb	r12,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r12,lsl#8
+	ldrb	r12,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 6==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r10,r10,ror#5
+	orr	r2,r2,r12,lsl#24
+	eor	r0,r0,r10,ror#19	@ Sigma1(e)
+#endif
+	ldr	r12,[r14],#4			@ *K256++
+	add	r5,r5,r2			@ h+=X[i]
+	str	r2,[sp,#6*4]
+	eor	r2,r11,r4
+	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r10
+	add	r5,r5,r12			@ h+=K256[i]
+	eor	r2,r2,r4			@ Ch(e,f,g)
+	eor	r0,r6,r6,ror#11
+	add	r5,r5,r2			@ h+=Ch(e,f,g)
+#if 6==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 6<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r6,r7			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
+	eor	r12,r6,r7			@ a^b, b^c in next round
+	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r6,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r9,r9,r5			@ d+=h
+	eor	r3,r3,r7			@ Maj(a,b,c)
+	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 7
+# if 7==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r9,r9,ror#5
+	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r9,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 7
+	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
+	ldrb	r3,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r3,lsl#8
+	ldrb	r3,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 7==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r9,r9,ror#5
+	orr	r2,r2,r3,lsl#24
+	eor	r0,r0,r9,ror#19	@ Sigma1(e)
+#endif
+	ldr	r3,[r14],#4			@ *K256++
+	add	r4,r4,r2			@ h+=X[i]
+	str	r2,[sp,#7*4]
+	eor	r2,r10,r11
+	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r9
+	add	r4,r4,r3			@ h+=K256[i]
+	eor	r2,r2,r11			@ Ch(e,f,g)
+	eor	r0,r5,r5,ror#11
+	add	r4,r4,r2			@ h+=Ch(e,f,g)
+#if 7==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 7<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r5,r6			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
+	eor	r3,r5,r6			@ a^b, b^c in next round
+	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r5,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r8,r8,r4			@ d+=h
+	eor	r12,r12,r6			@ Maj(a,b,c)
+	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 8
+# if 8==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r8,r8,ror#5
+	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r8,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 8
+	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
+	ldrb	r12,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r12,lsl#8
+	ldrb	r12,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 8==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r8,r8,ror#5
+	orr	r2,r2,r12,lsl#24
+	eor	r0,r0,r8,ror#19	@ Sigma1(e)
+#endif
+	ldr	r12,[r14],#4			@ *K256++
+	add	r11,r11,r2			@ h+=X[i]
+	str	r2,[sp,#8*4]
+	eor	r2,r9,r10
+	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r8
+	add	r11,r11,r12			@ h+=K256[i]
+	eor	r2,r2,r10			@ Ch(e,f,g)
+	eor	r0,r4,r4,ror#11
+	add	r11,r11,r2			@ h+=Ch(e,f,g)
+#if 8==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 8<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r4,r5			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
+	eor	r12,r4,r5			@ a^b, b^c in next round
+	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r4,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r7,r7,r11			@ d+=h
+	eor	r3,r3,r5			@ Maj(a,b,c)
+	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 9
+# if 9==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r7,r7,ror#5
+	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r7,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 9
+	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
+	ldrb	r3,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r3,lsl#8
+	ldrb	r3,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 9==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r7,r7,ror#5
+	orr	r2,r2,r3,lsl#24
+	eor	r0,r0,r7,ror#19	@ Sigma1(e)
+#endif
+	ldr	r3,[r14],#4			@ *K256++
+	add	r10,r10,r2			@ h+=X[i]
+	str	r2,[sp,#9*4]
+	eor	r2,r8,r9
+	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r7
+	add	r10,r10,r3			@ h+=K256[i]
+	eor	r2,r2,r9			@ Ch(e,f,g)
+	eor	r0,r11,r11,ror#11
+	add	r10,r10,r2			@ h+=Ch(e,f,g)
+#if 9==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 9<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r11,r4			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
+	eor	r3,r11,r4			@ a^b, b^c in next round
+	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r11,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r6,r6,r10			@ d+=h
+	eor	r12,r12,r4			@ Maj(a,b,c)
+	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 10
+# if 10==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r6,r6,ror#5
+	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r6,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 10
+	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
+	ldrb	r12,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r12,lsl#8
+	ldrb	r12,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 10==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r6,r6,ror#5
+	orr	r2,r2,r12,lsl#24
+	eor	r0,r0,r6,ror#19	@ Sigma1(e)
+#endif
+	ldr	r12,[r14],#4			@ *K256++
+	add	r9,r9,r2			@ h+=X[i]
+	str	r2,[sp,#10*4]
+	eor	r2,r7,r8
+	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r6
+	add	r9,r9,r12			@ h+=K256[i]
+	eor	r2,r2,r8			@ Ch(e,f,g)
+	eor	r0,r10,r10,ror#11
+	add	r9,r9,r2			@ h+=Ch(e,f,g)
+#if 10==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 10<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r10,r11			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
+	eor	r12,r10,r11			@ a^b, b^c in next round
+	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r10,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r5,r5,r9			@ d+=h
+	eor	r3,r3,r11			@ Maj(a,b,c)
+	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 11
+# if 11==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r5,r5,ror#5
+	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r5,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 11
+	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
+	ldrb	r3,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r3,lsl#8
+	ldrb	r3,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 11==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r5,r5,ror#5
+	orr	r2,r2,r3,lsl#24
+	eor	r0,r0,r5,ror#19	@ Sigma1(e)
+#endif
+	ldr	r3,[r14],#4			@ *K256++
+	add	r8,r8,r2			@ h+=X[i]
+	str	r2,[sp,#11*4]
+	eor	r2,r6,r7
+	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r5
+	add	r8,r8,r3			@ h+=K256[i]
+	eor	r2,r2,r7			@ Ch(e,f,g)
+	eor	r0,r9,r9,ror#11
+	add	r8,r8,r2			@ h+=Ch(e,f,g)
+#if 11==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 11<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r9,r10			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
+	eor	r3,r9,r10			@ a^b, b^c in next round
+	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r9,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r4,r4,r8			@ d+=h
+	eor	r12,r12,r10			@ Maj(a,b,c)
+	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 12
+# if 12==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r4,r4,ror#5
+	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r4,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 12
+	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
+	ldrb	r12,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r12,lsl#8
+	ldrb	r12,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 12==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r4,r4,ror#5
+	orr	r2,r2,r12,lsl#24
+	eor	r0,r0,r4,ror#19	@ Sigma1(e)
+#endif
+	ldr	r12,[r14],#4			@ *K256++
+	add	r7,r7,r2			@ h+=X[i]
+	str	r2,[sp,#12*4]
+	eor	r2,r5,r6
+	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r4
+	add	r7,r7,r12			@ h+=K256[i]
+	eor	r2,r2,r6			@ Ch(e,f,g)
+	eor	r0,r8,r8,ror#11
+	add	r7,r7,r2			@ h+=Ch(e,f,g)
+#if 12==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 12<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r8,r9			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
+	eor	r12,r8,r9			@ a^b, b^c in next round
+	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r8,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r11,r11,r7			@ d+=h
+	eor	r3,r3,r9			@ Maj(a,b,c)
+	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 13
+# if 13==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r11,r11,ror#5
+	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r11,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 13
+	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
+	ldrb	r3,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r3,lsl#8
+	ldrb	r3,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 13==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r11,r11,ror#5
+	orr	r2,r2,r3,lsl#24
+	eor	r0,r0,r11,ror#19	@ Sigma1(e)
+#endif
+	ldr	r3,[r14],#4			@ *K256++
+	add	r6,r6,r2			@ h+=X[i]
+	str	r2,[sp,#13*4]
+	eor	r2,r4,r5
+	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r11
+	add	r6,r6,r3			@ h+=K256[i]
+	eor	r2,r2,r5			@ Ch(e,f,g)
+	eor	r0,r7,r7,ror#11
+	add	r6,r6,r2			@ h+=Ch(e,f,g)
+#if 13==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 13<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r7,r8			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
+	eor	r3,r7,r8			@ a^b, b^c in next round
+	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r7,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r10,r10,r6			@ d+=h
+	eor	r12,r12,r8			@ Maj(a,b,c)
+	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 14
+# if 14==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r10,r10,ror#5
+	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r10,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 14
+	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
+	ldrb	r12,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r12,lsl#8
+	ldrb	r12,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 14==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r10,r10,ror#5
+	orr	r2,r2,r12,lsl#24
+	eor	r0,r0,r10,ror#19	@ Sigma1(e)
+#endif
+	ldr	r12,[r14],#4			@ *K256++
+	add	r5,r5,r2			@ h+=X[i]
+	str	r2,[sp,#14*4]
+	eor	r2,r11,r4
+	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r10
+	add	r5,r5,r12			@ h+=K256[i]
+	eor	r2,r2,r4			@ Ch(e,f,g)
+	eor	r0,r6,r6,ror#11
+	add	r5,r5,r2			@ h+=Ch(e,f,g)
+#if 14==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 14<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r6,r7			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
+	eor	r12,r6,r7			@ a^b, b^c in next round
+	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r6,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r9,r9,r5			@ d+=h
+	eor	r3,r3,r7			@ Maj(a,b,c)
+	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 15
+# if 15==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r9,r9,ror#5
+	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r9,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 15
+	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
+	ldrb	r3,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r3,lsl#8
+	ldrb	r3,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 15==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r9,r9,ror#5
+	orr	r2,r2,r3,lsl#24
+	eor	r0,r0,r9,ror#19	@ Sigma1(e)
+#endif
+	ldr	r3,[r14],#4			@ *K256++
+	add	r4,r4,r2			@ h+=X[i]
+	str	r2,[sp,#15*4]
+	eor	r2,r10,r11
+	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r9
+	add	r4,r4,r3			@ h+=K256[i]
+	eor	r2,r2,r11			@ Ch(e,f,g)
+	eor	r0,r5,r5,ror#11
+	add	r4,r4,r2			@ h+=Ch(e,f,g)
+#if 15==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 15<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r5,r6			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
+	eor	r3,r5,r6			@ a^b, b^c in next round
+	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r5,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r8,r8,r4			@ d+=h
+	eor	r12,r12,r6			@ Maj(a,b,c)
+	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
+.Lrounds_16_xx:
+	@ ldr	r2,[sp,#1*4]		@ 16
+	@ ldr	r1,[sp,#14*4]
+	mov	r0,r2,ror#7
+	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
+	mov	r12,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r12,r12,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#0*4]
+	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#9*4]
+
+	add	r12,r12,r0
+	eor	r0,r8,r8,ror#5	@ from BODY_00_15
+	add	r2,r2,r12
+	eor	r0,r0,r8,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r12,[r14],#4			@ *K256++
+	add	r11,r11,r2			@ h+=X[i]
+	str	r2,[sp,#0*4]
+	eor	r2,r9,r10
+	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r8
+	add	r11,r11,r12			@ h+=K256[i]
+	eor	r2,r2,r10			@ Ch(e,f,g)
+	eor	r0,r4,r4,ror#11
+	add	r11,r11,r2			@ h+=Ch(e,f,g)
+#if 16==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 16<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r4,r5			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
+	eor	r12,r4,r5			@ a^b, b^c in next round
+	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r4,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r7,r7,r11			@ d+=h
+	eor	r3,r3,r5			@ Maj(a,b,c)
+	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#2*4]		@ 17
+	@ ldr	r1,[sp,#15*4]
+	mov	r0,r2,ror#7
+	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
+	mov	r3,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r3,r3,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#1*4]
+	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#10*4]
+
+	add	r3,r3,r0
+	eor	r0,r7,r7,ror#5	@ from BODY_00_15
+	add	r2,r2,r3
+	eor	r0,r0,r7,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r3,[r14],#4			@ *K256++
+	add	r10,r10,r2			@ h+=X[i]
+	str	r2,[sp,#1*4]
+	eor	r2,r8,r9
+	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r7
+	add	r10,r10,r3			@ h+=K256[i]
+	eor	r2,r2,r9			@ Ch(e,f,g)
+	eor	r0,r11,r11,ror#11
+	add	r10,r10,r2			@ h+=Ch(e,f,g)
+#if 17==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 17<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r11,r4			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
+	eor	r3,r11,r4			@ a^b, b^c in next round
+	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r11,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r6,r6,r10			@ d+=h
+	eor	r12,r12,r4			@ Maj(a,b,c)
+	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#3*4]		@ 18
+	@ ldr	r1,[sp,#0*4]
+	mov	r0,r2,ror#7
+	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
+	mov	r12,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r12,r12,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#2*4]
+	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#11*4]
+
+	add	r12,r12,r0
+	eor	r0,r6,r6,ror#5	@ from BODY_00_15
+	add	r2,r2,r12
+	eor	r0,r0,r6,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r12,[r14],#4			@ *K256++
+	add	r9,r9,r2			@ h+=X[i]
+	str	r2,[sp,#2*4]
+	eor	r2,r7,r8
+	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r6
+	add	r9,r9,r12			@ h+=K256[i]
+	eor	r2,r2,r8			@ Ch(e,f,g)
+	eor	r0,r10,r10,ror#11
+	add	r9,r9,r2			@ h+=Ch(e,f,g)
+#if 18==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 18<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r10,r11			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
+	eor	r12,r10,r11			@ a^b, b^c in next round
+	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r10,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r5,r5,r9			@ d+=h
+	eor	r3,r3,r11			@ Maj(a,b,c)
+	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#4*4]		@ 19
+	@ ldr	r1,[sp,#1*4]
+	mov	r0,r2,ror#7
+	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
+	mov	r3,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r3,r3,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#3*4]
+	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#12*4]
+
+	add	r3,r3,r0
+	eor	r0,r5,r5,ror#5	@ from BODY_00_15
+	add	r2,r2,r3
+	eor	r0,r0,r5,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r3,[r14],#4			@ *K256++
+	add	r8,r8,r2			@ h+=X[i]
+	str	r2,[sp,#3*4]
+	eor	r2,r6,r7
+	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r5
+	add	r8,r8,r3			@ h+=K256[i]
+	eor	r2,r2,r7			@ Ch(e,f,g)
+	eor	r0,r9,r9,ror#11
+	add	r8,r8,r2			@ h+=Ch(e,f,g)
+#if 19==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 19<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r9,r10			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
+	eor	r3,r9,r10			@ a^b, b^c in next round
+	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r9,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r4,r4,r8			@ d+=h
+	eor	r12,r12,r10			@ Maj(a,b,c)
+	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#5*4]		@ 20
+	@ ldr	r1,[sp,#2*4]
+	mov	r0,r2,ror#7
+	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
+	mov	r12,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r12,r12,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#4*4]
+	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#13*4]
+
+	add	r12,r12,r0
+	eor	r0,r4,r4,ror#5	@ from BODY_00_15
+	add	r2,r2,r12
+	eor	r0,r0,r4,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r12,[r14],#4			@ *K256++
+	add	r7,r7,r2			@ h+=X[i]
+	str	r2,[sp,#4*4]
+	eor	r2,r5,r6
+	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r4
+	add	r7,r7,r12			@ h+=K256[i]
+	eor	r2,r2,r6			@ Ch(e,f,g)
+	eor	r0,r8,r8,ror#11
+	add	r7,r7,r2			@ h+=Ch(e,f,g)
+#if 20==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 20<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r8,r9			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
+	eor	r12,r8,r9			@ a^b, b^c in next round
+	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r8,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r11,r11,r7			@ d+=h
+	eor	r3,r3,r9			@ Maj(a,b,c)
+	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#6*4]		@ 21
+	@ ldr	r1,[sp,#3*4]
+	mov	r0,r2,ror#7
+	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
+	mov	r3,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r3,r3,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#5*4]
+	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#14*4]
+
+	add	r3,r3,r0
+	eor	r0,r11,r11,ror#5	@ from BODY_00_15
+	add	r2,r2,r3
+	eor	r0,r0,r11,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r3,[r14],#4			@ *K256++
+	add	r6,r6,r2			@ h+=X[i]
+	str	r2,[sp,#5*4]
+	eor	r2,r4,r5
+	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r11
+	add	r6,r6,r3			@ h+=K256[i]
+	eor	r2,r2,r5			@ Ch(e,f,g)
+	eor	r0,r7,r7,ror#11
+	add	r6,r6,r2			@ h+=Ch(e,f,g)
+#if 21==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 21<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r7,r8			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
+	eor	r3,r7,r8			@ a^b, b^c in next round
+	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r7,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r10,r10,r6			@ d+=h
+	eor	r12,r12,r8			@ Maj(a,b,c)
+	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#7*4]		@ 22
+	@ ldr	r1,[sp,#4*4]
+	mov	r0,r2,ror#7
+	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
+	mov	r12,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r12,r12,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#6*4]
+	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#15*4]
+
+	add	r12,r12,r0
+	eor	r0,r10,r10,ror#5	@ from BODY_00_15
+	add	r2,r2,r12
+	eor	r0,r0,r10,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r12,[r14],#4			@ *K256++
+	add	r5,r5,r2			@ h+=X[i]
+	str	r2,[sp,#6*4]
+	eor	r2,r11,r4
+	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r10
+	add	r5,r5,r12			@ h+=K256[i]
+	eor	r2,r2,r4			@ Ch(e,f,g)
+	eor	r0,r6,r6,ror#11
+	add	r5,r5,r2			@ h+=Ch(e,f,g)
+#if 22==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 22<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r6,r7			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
+	eor	r12,r6,r7			@ a^b, b^c in next round
+	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r6,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r9,r9,r5			@ d+=h
+	eor	r3,r3,r7			@ Maj(a,b,c)
+	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#8*4]		@ 23
+	@ ldr	r1,[sp,#5*4]
+	mov	r0,r2,ror#7
+	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
+	mov	r3,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r3,r3,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#7*4]
+	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#0*4]
+
+	add	r3,r3,r0
+	eor	r0,r9,r9,ror#5	@ from BODY_00_15
+	add	r2,r2,r3
+	eor	r0,r0,r9,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r3,[r14],#4			@ *K256++
+	add	r4,r4,r2			@ h+=X[i]
+	str	r2,[sp,#7*4]
+	eor	r2,r10,r11
+	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r9
+	add	r4,r4,r3			@ h+=K256[i]
+	eor	r2,r2,r11			@ Ch(e,f,g)
+	eor	r0,r5,r5,ror#11
+	add	r4,r4,r2			@ h+=Ch(e,f,g)
+#if 23==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 23<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r5,r6			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
+	eor	r3,r5,r6			@ a^b, b^c in next round
+	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r5,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r8,r8,r4			@ d+=h
+	eor	r12,r12,r6			@ Maj(a,b,c)
+	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#9*4]		@ 24
+	@ ldr	r1,[sp,#6*4]
+	mov	r0,r2,ror#7
+	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
+	mov	r12,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r12,r12,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#8*4]
+	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#1*4]
+
+	add	r12,r12,r0
+	eor	r0,r8,r8,ror#5	@ from BODY_00_15
+	add	r2,r2,r12
+	eor	r0,r0,r8,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r12,[r14],#4			@ *K256++
+	add	r11,r11,r2			@ h+=X[i]
+	str	r2,[sp,#8*4]
+	eor	r2,r9,r10
+	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r8
+	add	r11,r11,r12			@ h+=K256[i]
+	eor	r2,r2,r10			@ Ch(e,f,g)
+	eor	r0,r4,r4,ror#11
+	add	r11,r11,r2			@ h+=Ch(e,f,g)
+#if 24==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 24<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r4,r5			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
+	eor	r12,r4,r5			@ a^b, b^c in next round
+	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r4,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r7,r7,r11			@ d+=h
+	eor	r3,r3,r5			@ Maj(a,b,c)
+	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#10*4]		@ 25
+	@ ldr	r1,[sp,#7*4]
+	mov	r0,r2,ror#7
+	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
+	mov	r3,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r3,r3,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#9*4]
+	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#2*4]
+
+	add	r3,r3,r0
+	eor	r0,r7,r7,ror#5	@ from BODY_00_15
+	add	r2,r2,r3
+	eor	r0,r0,r7,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r3,[r14],#4			@ *K256++
+	add	r10,r10,r2			@ h+=X[i]
+	str	r2,[sp,#9*4]
+	eor	r2,r8,r9
+	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r7
+	add	r10,r10,r3			@ h+=K256[i]
+	eor	r2,r2,r9			@ Ch(e,f,g)
+	eor	r0,r11,r11,ror#11
+	add	r10,r10,r2			@ h+=Ch(e,f,g)
+#if 25==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 25<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r11,r4			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
+	eor	r3,r11,r4			@ a^b, b^c in next round
+	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r11,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r6,r6,r10			@ d+=h
+	eor	r12,r12,r4			@ Maj(a,b,c)
+	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#11*4]		@ 26
+	@ ldr	r1,[sp,#8*4]
+	mov	r0,r2,ror#7
+	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
+	mov	r12,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r12,r12,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#10*4]
+	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#3*4]
+
+	add	r12,r12,r0
+	eor	r0,r6,r6,ror#5	@ from BODY_00_15
+	add	r2,r2,r12
+	eor	r0,r0,r6,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r12,[r14],#4			@ *K256++
+	add	r9,r9,r2			@ h+=X[i]
+	str	r2,[sp,#10*4]
+	eor	r2,r7,r8
+	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r6
+	add	r9,r9,r12			@ h+=K256[i]
+	eor	r2,r2,r8			@ Ch(e,f,g)
+	eor	r0,r10,r10,ror#11
+	add	r9,r9,r2			@ h+=Ch(e,f,g)
+#if 26==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 26<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r10,r11			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
+	eor	r12,r10,r11			@ a^b, b^c in next round
+	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r10,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r5,r5,r9			@ d+=h
+	eor	r3,r3,r11			@ Maj(a,b,c)
+	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#12*4]		@ 27
+	@ ldr	r1,[sp,#9*4]
+	mov	r0,r2,ror#7
+	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
+	mov	r3,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r3,r3,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#11*4]
+	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#4*4]
+
+	add	r3,r3,r0
+	eor	r0,r5,r5,ror#5	@ from BODY_00_15
+	add	r2,r2,r3
+	eor	r0,r0,r5,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r3,[r14],#4			@ *K256++
+	add	r8,r8,r2			@ h+=X[i]
+	str	r2,[sp,#11*4]
+	eor	r2,r6,r7
+	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r5
+	add	r8,r8,r3			@ h+=K256[i]
+	eor	r2,r2,r7			@ Ch(e,f,g)
+	eor	r0,r9,r9,ror#11
+	add	r8,r8,r2			@ h+=Ch(e,f,g)
+#if 27==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 27<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r9,r10			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
+	eor	r3,r9,r10			@ a^b, b^c in next round
+	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r9,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r4,r4,r8			@ d+=h
+	eor	r12,r12,r10			@ Maj(a,b,c)
+	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#13*4]		@ 28
+	@ ldr	r1,[sp,#10*4]
+	mov	r0,r2,ror#7
+	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
+	mov	r12,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r12,r12,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#12*4]
+	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#5*4]
+
+	add	r12,r12,r0
+	eor	r0,r4,r4,ror#5	@ from BODY_00_15
+	add	r2,r2,r12
+	eor	r0,r0,r4,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r12,[r14],#4			@ *K256++
+	add	r7,r7,r2			@ h+=X[i]
+	str	r2,[sp,#12*4]
+	eor	r2,r5,r6
+	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r4
+	add	r7,r7,r12			@ h+=K256[i]
+	eor	r2,r2,r6			@ Ch(e,f,g)
+	eor	r0,r8,r8,ror#11
+	add	r7,r7,r2			@ h+=Ch(e,f,g)
+#if 28==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 28<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r8,r9			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
+	eor	r12,r8,r9			@ a^b, b^c in next round
+	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r8,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r11,r11,r7			@ d+=h
+	eor	r3,r3,r9			@ Maj(a,b,c)
+	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#14*4]		@ 29
+	@ ldr	r1,[sp,#11*4]
+	mov	r0,r2,ror#7
+	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
+	mov	r3,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r3,r3,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#13*4]
+	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#6*4]
+
+	add	r3,r3,r0
+	eor	r0,r11,r11,ror#5	@ from BODY_00_15
+	add	r2,r2,r3
+	eor	r0,r0,r11,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r3,[r14],#4			@ *K256++
+	add	r6,r6,r2			@ h+=X[i]
+	str	r2,[sp,#13*4]
+	eor	r2,r4,r5
+	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r11
+	add	r6,r6,r3			@ h+=K256[i]
+	eor	r2,r2,r5			@ Ch(e,f,g)
+	eor	r0,r7,r7,ror#11
+	add	r6,r6,r2			@ h+=Ch(e,f,g)
+#if 29==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 29<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r7,r8			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
+	eor	r3,r7,r8			@ a^b, b^c in next round
+	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r7,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r10,r10,r6			@ d+=h
+	eor	r12,r12,r8			@ Maj(a,b,c)
+	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#15*4]		@ 30
+	@ ldr	r1,[sp,#12*4]
+	mov	r0,r2,ror#7
+	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
+	mov	r12,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r12,r12,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#14*4]
+	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#7*4]
+
+	add	r12,r12,r0
+	eor	r0,r10,r10,ror#5	@ from BODY_00_15
+	add	r2,r2,r12
+	eor	r0,r0,r10,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r12,[r14],#4			@ *K256++
+	add	r5,r5,r2			@ h+=X[i]
+	str	r2,[sp,#14*4]
+	eor	r2,r11,r4
+	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r10
+	add	r5,r5,r12			@ h+=K256[i]
+	eor	r2,r2,r4			@ Ch(e,f,g)
+	eor	r0,r6,r6,ror#11
+	add	r5,r5,r2			@ h+=Ch(e,f,g)
+#if 30==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 30<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r6,r7			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
+	eor	r12,r6,r7			@ a^b, b^c in next round
+	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r6,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r9,r9,r5			@ d+=h
+	eor	r3,r3,r7			@ Maj(a,b,c)
+	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#0*4]		@ 31
+	@ ldr	r1,[sp,#13*4]
+	mov	r0,r2,ror#7
+	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
+	mov	r3,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r3,r3,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#15*4]
+	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#8*4]
+
+	add	r3,r3,r0
+	eor	r0,r9,r9,ror#5	@ from BODY_00_15
+	add	r2,r2,r3
+	eor	r0,r0,r9,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r3,[r14],#4			@ *K256++
+	add	r4,r4,r2			@ h+=X[i]
+	str	r2,[sp,#15*4]
+	eor	r2,r10,r11
+	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r9
+	add	r4,r4,r3			@ h+=K256[i]
+	eor	r2,r2,r11			@ Ch(e,f,g)
+	eor	r0,r5,r5,ror#11
+	add	r4,r4,r2			@ h+=Ch(e,f,g)
+#if 31==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 31<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r5,r6			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
+	eor	r3,r5,r6			@ a^b, b^c in next round
+	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r5,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r8,r8,r4			@ d+=h
+	eor	r12,r12,r6			@ Maj(a,b,c)
+	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	ite	eq			@ Thumb2 thing, sanity check in ARM
+#endif
+	ldreq	r3,[sp,#16*4]		@ pull ctx
+	bne	.Lrounds_16_xx
+
+	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
+	ldr	r0,[r3,#0]
+	ldr	r2,[r3,#4]
+	ldr	r12,[r3,#8]
+	add	r4,r4,r0
+	ldr	r0,[r3,#12]
+	add	r5,r5,r2
+	ldr	r2,[r3,#16]
+	add	r6,r6,r12
+	ldr	r12,[r3,#20]
+	add	r7,r7,r0
+	ldr	r0,[r3,#24]
+	add	r8,r8,r2
+	ldr	r2,[r3,#28]
+	add	r9,r9,r12
+	ldr	r1,[sp,#17*4]		@ pull inp
+	ldr	r12,[sp,#18*4]		@ pull inp+len
+	add	r10,r10,r0
+	add	r11,r11,r2
+	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
+	cmp	r1,r12
+	sub	r14,r14,#256	@ rewind Ktbl
+	bne	.Loop
+
+	add	sp,sp,#19*4	@ destroy frame
+#if __ARM_ARCH__>=5
+	ldmia	sp!,{r4-r11,pc}
+#else
+	ldmia	sp!,{r4-r11,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+.size	sha256_block_data_order,.-sha256_block_data_order
+#if __ARM_MAX_ARCH__>=7
+.arch	armv7-a
+.fpu	neon
+
+.global	sha256_block_data_order_neon
+.type	sha256_block_data_order_neon,%function
+.align	4
+sha256_block_data_order_neon:
+.LNEON:
+	stmdb	sp!,{r4-r12,lr}
+
+	sub	r11,sp,#16*4+16
+	adrl	r14,K256
+	bic	r11,r11,#15		@ align for 128-bit stores
+	mov	r12,sp
+	mov	sp,r11			@ alloca
+	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
+
+	vld1.8		{q0},[r1]!
+	vld1.8		{q1},[r1]!
+	vld1.8		{q2},[r1]!
+	vld1.8		{q3},[r1]!
+	vld1.32		{q8},[r14,:128]!
+	vld1.32		{q9},[r14,:128]!
+	vld1.32		{q10},[r14,:128]!
+	vld1.32		{q11},[r14,:128]!
+	vrev32.8	q0,q0		@ yes, even on
+	str		r0,[sp,#64]
+	vrev32.8	q1,q1		@ big-endian
+	str		r1,[sp,#68]
+	mov		r1,sp
+	vrev32.8	q2,q2
+	str		r2,[sp,#72]
+	vrev32.8	q3,q3
+	str		r12,[sp,#76]		@ save original sp
+	vadd.i32	q8,q8,q0
+	vadd.i32	q9,q9,q1
+	vst1.32		{q8},[r1,:128]!
+	vadd.i32	q10,q10,q2
+	vst1.32		{q9},[r1,:128]!
+	vadd.i32	q11,q11,q3
+	vst1.32		{q10},[r1,:128]!
+	vst1.32		{q11},[r1,:128]!
+
+	ldmia		r0,{r4-r11}
+	sub		r1,r1,#64
+	ldr		r2,[sp,#0]
+	eor		r12,r12,r12
+	eor		r3,r5,r6
+	b		.L_00_48
+
+.align	4
+.L_00_48:
+	vext.8	q8,q0,q1,#4
+	add	r11,r11,r2
+	eor	r2,r9,r10
+	eor	r0,r8,r8,ror#5
+	vext.8	q9,q2,q3,#4
+	add	r4,r4,r12
+	and	r2,r2,r8
+	eor	r12,r0,r8,ror#19
+	vshr.u32	q10,q8,#7
+	eor	r0,r4,r4,ror#11
+	eor	r2,r2,r10
+	vadd.i32	q0,q0,q9
+	add	r11,r11,r12,ror#6
+	eor	r12,r4,r5
+	vshr.u32	q9,q8,#3
+	eor	r0,r0,r4,ror#20
+	add	r11,r11,r2
+	vsli.32	q10,q8,#25
+	ldr	r2,[sp,#4]
+	and	r3,r3,r12
+	vshr.u32	q11,q8,#18
+	add	r7,r7,r11
+	add	r11,r11,r0,ror#2
+	eor	r3,r3,r5
+	veor	q9,q9,q10
+	add	r10,r10,r2
+	vsli.32	q11,q8,#14
+	eor	r2,r8,r9
+	eor	r0,r7,r7,ror#5
+	vshr.u32	d24,d7,#17
+	add	r11,r11,r3
+	and	r2,r2,r7
+	veor	q9,q9,q11
+	eor	r3,r0,r7,ror#19
+	eor	r0,r11,r11,ror#11
+	vsli.32	d24,d7,#15
+	eor	r2,r2,r9
+	add	r10,r10,r3,ror#6
+	vshr.u32	d25,d7,#10
+	eor	r3,r11,r4
+	eor	r0,r0,r11,ror#20
+	vadd.i32	q0,q0,q9
+	add	r10,r10,r2
+	ldr	r2,[sp,#8]
+	veor	d25,d25,d24
+	and	r12,r12,r3
+	add	r6,r6,r10
+	vshr.u32	d24,d7,#19
+	add	r10,r10,r0,ror#2
+	eor	r12,r12,r4
+	vsli.32	d24,d7,#13
+	add	r9,r9,r2
+	eor	r2,r7,r8
+	veor	d25,d25,d24
+	eor	r0,r6,r6,ror#5
+	add	r10,r10,r12
+	vadd.i32	d0,d0,d25
+	and	r2,r2,r6
+	eor	r12,r0,r6,ror#19
+	vshr.u32	d24,d0,#17
+	eor	r0,r10,r10,ror#11
+	eor	r2,r2,r8
+	vsli.32	d24,d0,#15
+	add	r9,r9,r12,ror#6
+	eor	r12,r10,r11
+	vshr.u32	d25,d0,#10
+	eor	r0,r0,r10,ror#20
+	add	r9,r9,r2
+	veor	d25,d25,d24
+	ldr	r2,[sp,#12]
+	and	r3,r3,r12
+	vshr.u32	d24,d0,#19
+	add	r5,r5,r9
+	add	r9,r9,r0,ror#2
+	eor	r3,r3,r11
+	vld1.32	{q8},[r14,:128]!
+	add	r8,r8,r2
+	vsli.32	d24,d0,#13
+	eor	r2,r6,r7
+	eor	r0,r5,r5,ror#5
+	veor	d25,d25,d24
+	add	r9,r9,r3
+	and	r2,r2,r5
+	vadd.i32	d1,d1,d25
+	eor	r3,r0,r5,ror#19
+	eor	r0,r9,r9,ror#11
+	vadd.i32	q8,q8,q0
+	eor	r2,r2,r7
+	add	r8,r8,r3,ror#6
+	eor	r3,r9,r10
+	eor	r0,r0,r9,ror#20
+	add	r8,r8,r2
+	ldr	r2,[sp,#16]
+	and	r12,r12,r3
+	add	r4,r4,r8
+	vst1.32	{q8},[r1,:128]!
+	add	r8,r8,r0,ror#2
+	eor	r12,r12,r10
+	vext.8	q8,q1,q2,#4
+	add	r7,r7,r2
+	eor	r2,r5,r6
+	eor	r0,r4,r4,ror#5
+	vext.8	q9,q3,q0,#4
+	add	r8,r8,r12
+	and	r2,r2,r4
+	eor	r12,r0,r4,ror#19
+	vshr.u32	q10,q8,#7
+	eor	r0,r8,r8,ror#11
+	eor	r2,r2,r6
+	vadd.i32	q1,q1,q9
+	add	r7,r7,r12,ror#6
+	eor	r12,r8,r9
+	vshr.u32	q9,q8,#3
+	eor	r0,r0,r8,ror#20
+	add	r7,r7,r2
+	vsli.32	q10,q8,#25
+	ldr	r2,[sp,#20]
+	and	r3,r3,r12
+	vshr.u32	q11,q8,#18
+	add	r11,r11,r7
+	add	r7,r7,r0,ror#2
+	eor	r3,r3,r9
+	veor	q9,q9,q10
+	add	r6,r6,r2
+	vsli.32	q11,q8,#14
+	eor	r2,r4,r5
+	eor	r0,r11,r11,ror#5
+	vshr.u32	d24,d1,#17
+	add	r7,r7,r3
+	and	r2,r2,r11
+	veor	q9,q9,q11
+	eor	r3,r0,r11,ror#19
+	eor	r0,r7,r7,ror#11
+	vsli.32	d24,d1,#15
+	eor	r2,r2,r5
+	add	r6,r6,r3,ror#6
+	vshr.u32	d25,d1,#10
+	eor	r3,r7,r8
+	eor	r0,r0,r7,ror#20
+	vadd.i32	q1,q1,q9
+	add	r6,r6,r2
+	ldr	r2,[sp,#24]
+	veor	d25,d25,d24
+	and	r12,r12,r3
+	add	r10,r10,r6
+	vshr.u32	d24,d1,#19
+	add	r6,r6,r0,ror#2
+	eor	r12,r12,r8
+	vsli.32	d24,d1,#13
+	add	r5,r5,r2
+	eor	r2,r11,r4
+	veor	d25,d25,d24
+	eor	r0,r10,r10,ror#5
+	add	r6,r6,r12
+	vadd.i32	d2,d2,d25
+	and	r2,r2,r10
+	eor	r12,r0,r10,ror#19
+	vshr.u32	d24,d2,#17
+	eor	r0,r6,r6,ror#11
+	eor	r2,r2,r4
+	vsli.32	d24,d2,#15
+	add	r5,r5,r12,ror#6
+	eor	r12,r6,r7
+	vshr.u32	d25,d2,#10
+	eor	r0,r0,r6,ror#20
+	add	r5,r5,r2
+	veor	d25,d25,d24
+	ldr	r2,[sp,#28]
+	and	r3,r3,r12
+	vshr.u32	d24,d2,#19
+	add	r9,r9,r5
+	add	r5,r5,r0,ror#2
+	eor	r3,r3,r7
+	vld1.32	{q8},[r14,:128]!
+	add	r4,r4,r2
+	vsli.32	d24,d2,#13
+	eor	r2,r10,r11
+	eor	r0,r9,r9,ror#5
+	veor	d25,d25,d24
+	add	r5,r5,r3
+	and	r2,r2,r9
+	vadd.i32	d3,d3,d25
+	eor	r3,r0,r9,ror#19
+	eor	r0,r5,r5,ror#11
+	vadd.i32	q8,q8,q1
+	eor	r2,r2,r11
+	add	r4,r4,r3,ror#6
+	eor	r3,r5,r6
+	eor	r0,r0,r5,ror#20
+	add	r4,r4,r2
+	ldr	r2,[sp,#32]
+	and	r12,r12,r3
+	add	r8,r8,r4
+	vst1.32	{q8},[r1,:128]!
+	add	r4,r4,r0,ror#2
+	eor	r12,r12,r6
+	vext.8	q8,q2,q3,#4
+	add	r11,r11,r2
+	eor	r2,r9,r10
+	eor	r0,r8,r8,ror#5
+	vext.8	q9,q0,q1,#4
+	add	r4,r4,r12
+	and	r2,r2,r8
+	eor	r12,r0,r8,ror#19
+	vshr.u32	q10,q8,#7
+	eor	r0,r4,r4,ror#11
+	eor	r2,r2,r10
+	vadd.i32	q2,q2,q9
+	add	r11,r11,r12,ror#6
+	eor	r12,r4,r5
+	vshr.u32	q9,q8,#3
+	eor	r0,r0,r4,ror#20
+	add	r11,r11,r2
+	vsli.32	q10,q8,#25
+	ldr	r2,[sp,#36]
+	and	r3,r3,r12
+	vshr.u32	q11,q8,#18
+	add	r7,r7,r11
+	add	r11,r11,r0,ror#2
+	eor	r3,r3,r5
+	veor	q9,q9,q10
+	add	r10,r10,r2
+	vsli.32	q11,q8,#14
+	eor	r2,r8,r9
+	eor	r0,r7,r7,ror#5
+	vshr.u32	d24,d3,#17
+	add	r11,r11,r3
+	and	r2,r2,r7
+	veor	q9,q9,q11
+	eor	r3,r0,r7,ror#19
+	eor	r0,r11,r11,ror#11
+	vsli.32	d24,d3,#15
+	eor	r2,r2,r9
+	add	r10,r10,r3,ror#6
+	vshr.u32	d25,d3,#10
+	eor	r3,r11,r4
+	eor	r0,r0,r11,ror#20
+	vadd.i32	q2,q2,q9
+	add	r10,r10,r2
+	ldr	r2,[sp,#40]
+	veor	d25,d25,d24
+	and	r12,r12,r3
+	add	r6,r6,r10
+	vshr.u32	d24,d3,#19
+	add	r10,r10,r0,ror#2
+	eor	r12,r12,r4
+	vsli.32	d24,d3,#13
+	add	r9,r9,r2
+	eor	r2,r7,r8
+	veor	d25,d25,d24
+	eor	r0,r6,r6,ror#5
+	add	r10,r10,r12
+	vadd.i32	d4,d4,d25
+	and	r2,r2,r6
+	eor	r12,r0,r6,ror#19
+	vshr.u32	d24,d4,#17
+	eor	r0,r10,r10,ror#11
+	eor	r2,r2,r8
+	vsli.32	d24,d4,#15
+	add	r9,r9,r12,ror#6
+	eor	r12,r10,r11
+	vshr.u32	d25,d4,#10
+	eor	r0,r0,r10,ror#20
+	add	r9,r9,r2
+	veor	d25,d25,d24
+	ldr	r2,[sp,#44]
+	and	r3,r3,r12
+	vshr.u32	d24,d4,#19
+	add	r5,r5,r9
+	add	r9,r9,r0,ror#2
+	eor	r3,r3,r11
+	vld1.32	{q8},[r14,:128]!
+	add	r8,r8,r2
+	vsli.32	d24,d4,#13
+	eor	r2,r6,r7
+	eor	r0,r5,r5,ror#5
+	veor	d25,d25,d24
+	add	r9,r9,r3
+	and	r2,r2,r5
+	vadd.i32	d5,d5,d25
+	eor	r3,r0,r5,ror#19
+	eor	r0,r9,r9,ror#11
+	vadd.i32	q8,q8,q2
+	eor	r2,r2,r7
+	add	r8,r8,r3,ror#6
+	eor	r3,r9,r10
+	eor	r0,r0,r9,ror#20
+	add	r8,r8,r2
+	ldr	r2,[sp,#48]
+	and	r12,r12,r3
+	add	r4,r4,r8
+	vst1.32	{q8},[r1,:128]!
+	add	r8,r8,r0,ror#2
+	eor	r12,r12,r10
+	vext.8	q8,q3,q0,#4
+	add	r7,r7,r2
+	eor	r2,r5,r6
+	eor	r0,r4,r4,ror#5
+	vext.8	q9,q1,q2,#4
+	add	r8,r8,r12
+	and	r2,r2,r4
+	eor	r12,r0,r4,ror#19
+	vshr.u32	q10,q8,#7
+	eor	r0,r8,r8,ror#11
+	eor	r2,r2,r6
+	vadd.i32	q3,q3,q9
+	add	r7,r7,r12,ror#6
+	eor	r12,r8,r9
+	vshr.u32	q9,q8,#3
+	eor	r0,r0,r8,ror#20
+	add	r7,r7,r2
+	vsli.32	q10,q8,#25
+	ldr	r2,[sp,#52]
+	and	r3,r3,r12
+	vshr.u32	q11,q8,#18
+	add	r11,r11,r7
+	add	r7,r7,r0,ror#2
+	eor	r3,r3,r9
+	veor	q9,q9,q10
+	add	r6,r6,r2
+	vsli.32	q11,q8,#14
+	eor	r2,r4,r5
+	eor	r0,r11,r11,ror#5
+	vshr.u32	d24,d5,#17
+	add	r7,r7,r3
+	and	r2,r2,r11
+	veor	q9,q9,q11
+	eor	r3,r0,r11,ror#19
+	eor	r0,r7,r7,ror#11
+	vsli.32	d24,d5,#15
+	eor	r2,r2,r5
+	add	r6,r6,r3,ror#6
+	vshr.u32	d25,d5,#10
+	eor	r3,r7,r8
+	eor	r0,r0,r7,ror#20
+	vadd.i32	q3,q3,q9
+	add	r6,r6,r2
+	ldr	r2,[sp,#56]
+	veor	d25,d25,d24
+	and	r12,r12,r3
+	add	r10,r10,r6
+	vshr.u32	d24,d5,#19
+	add	r6,r6,r0,ror#2
+	eor	r12,r12,r8
+	vsli.32	d24,d5,#13
+	add	r5,r5,r2
+	eor	r2,r11,r4
+	veor	d25,d25,d24
+	eor	r0,r10,r10,ror#5
+	add	r6,r6,r12
+	vadd.i32	d6,d6,d25
+	and	r2,r2,r10
+	eor	r12,r0,r10,ror#19
+	vshr.u32	d24,d6,#17
+	eor	r0,r6,r6,ror#11
+	eor	r2,r2,r4
+	vsli.32	d24,d6,#15
+	add	r5,r5,r12,ror#6
+	eor	r12,r6,r7
+	vshr.u32	d25,d6,#10
+	eor	r0,r0,r6,ror#20
+	add	r5,r5,r2
+	veor	d25,d25,d24
+	ldr	r2,[sp,#60]
+	and	r3,r3,r12
+	vshr.u32	d24,d6,#19
+	add	r9,r9,r5
+	add	r5,r5,r0,ror#2
+	eor	r3,r3,r7
+	vld1.32	{q8},[r14,:128]!
+	add	r4,r4,r2
+	vsli.32	d24,d6,#13
+	eor	r2,r10,r11
+	eor	r0,r9,r9,ror#5
+	veor	d25,d25,d24
+	add	r5,r5,r3
+	and	r2,r2,r9
+	vadd.i32	d7,d7,d25
+	eor	r3,r0,r9,ror#19
+	eor	r0,r5,r5,ror#11
+	vadd.i32	q8,q8,q3
+	eor	r2,r2,r11
+	add	r4,r4,r3,ror#6
+	eor	r3,r5,r6
+	eor	r0,r0,r5,ror#20
+	add	r4,r4,r2
+	ldr	r2,[r14]
+	and	r12,r12,r3
+	add	r8,r8,r4
+	vst1.32	{q8},[r1,:128]!
+	add	r4,r4,r0,ror#2
+	eor	r12,r12,r6
+	teq	r2,#0				@ check for K256 terminator
+	ldr	r2,[sp,#0]
+	sub	r1,r1,#64
+	bne	.L_00_48
+
+	ldr		r1,[sp,#68]
+	ldr		r0,[sp,#72]
+	sub		r14,r14,#256	@ rewind r14
+	teq		r1,r0
+	it		eq
+	subeq		r1,r1,#64		@ avoid SEGV
+	vld1.8		{q0},[r1]!		@ load next input block
+	vld1.8		{q1},[r1]!
+	vld1.8		{q2},[r1]!
+	vld1.8		{q3},[r1]!
+	it		ne
+	strne		r1,[sp,#68]
+	mov		r1,sp
+	add	r11,r11,r2
+	eor	r2,r9,r10
+	eor	r0,r8,r8,ror#5
+	add	r4,r4,r12
+	vld1.32	{q8},[r14,:128]!
+	and	r2,r2,r8
+	eor	r12,r0,r8,ror#19
+	eor	r0,r4,r4,ror#11
+	eor	r2,r2,r10
+	vrev32.8	q0,q0
+	add	r11,r11,r12,ror#6
+	eor	r12,r4,r5
+	eor	r0,r0,r4,ror#20
+	add	r11,r11,r2
+	vadd.i32	q8,q8,q0
+	ldr	r2,[sp,#4]
+	and	r3,r3,r12
+	add	r7,r7,r11
+	add	r11,r11,r0,ror#2
+	eor	r3,r3,r5
+	add	r10,r10,r2
+	eor	r2,r8,r9
+	eor	r0,r7,r7,ror#5
+	add	r11,r11,r3
+	and	r2,r2,r7
+	eor	r3,r0,r7,ror#19
+	eor	r0,r11,r11,ror#11
+	eor	r2,r2,r9
+	add	r10,r10,r3,ror#6
+	eor	r3,r11,r4
+	eor	r0,r0,r11,ror#20
+	add	r10,r10,r2
+	ldr	r2,[sp,#8]
+	and	r12,r12,r3
+	add	r6,r6,r10
+	add	r10,r10,r0,ror#2
+	eor	r12,r12,r4
+	add	r9,r9,r2
+	eor	r2,r7,r8
+	eor	r0,r6,r6,ror#5
+	add	r10,r10,r12
+	and	r2,r2,r6
+	eor	r12,r0,r6,ror#19
+	eor	r0,r10,r10,ror#11
+	eor	r2,r2,r8
+	add	r9,r9,r12,ror#6
+	eor	r12,r10,r11
+	eor	r0,r0,r10,ror#20
+	add	r9,r9,r2
+	ldr	r2,[sp,#12]
+	and	r3,r3,r12
+	add	r5,r5,r9
+	add	r9,r9,r0,ror#2
+	eor	r3,r3,r11
+	add	r8,r8,r2
+	eor	r2,r6,r7
+	eor	r0,r5,r5,ror#5
+	add	r9,r9,r3
+	and	r2,r2,r5
+	eor	r3,r0,r5,ror#19
+	eor	r0,r9,r9,ror#11
+	eor	r2,r2,r7
+	add	r8,r8,r3,ror#6
+	eor	r3,r9,r10
+	eor	r0,r0,r9,ror#20
+	add	r8,r8,r2
+	ldr	r2,[sp,#16]
+	and	r12,r12,r3
+	add	r4,r4,r8
+	add	r8,r8,r0,ror#2
+	eor	r12,r12,r10
+	vst1.32	{q8},[r1,:128]!
+	add	r7,r7,r2
+	eor	r2,r5,r6
+	eor	r0,r4,r4,ror#5
+	add	r8,r8,r12
+	vld1.32	{q8},[r14,:128]!
+	and	r2,r2,r4
+	eor	r12,r0,r4,ror#19
+	eor	r0,r8,r8,ror#11
+	eor	r2,r2,r6
+	vrev32.8	q1,q1
+	add	r7,r7,r12,ror#6
+	eor	r12,r8,r9
+	eor	r0,r0,r8,ror#20
+	add	r7,r7,r2
+	vadd.i32	q8,q8,q1
+	ldr	r2,[sp,#20]
+	and	r3,r3,r12
+	add	r11,r11,r7
+	add	r7,r7,r0,ror#2
+	eor	r3,r3,r9
+	add	r6,r6,r2
+	eor	r2,r4,r5
+	eor	r0,r11,r11,ror#5
+	add	r7,r7,r3
+	and	r2,r2,r11
+	eor	r3,r0,r11,ror#19
+	eor	r0,r7,r7,ror#11
+	eor	r2,r2,r5
+	add	r6,r6,r3,ror#6
+	eor	r3,r7,r8
+	eor	r0,r0,r7,ror#20
+	add	r6,r6,r2
+	ldr	r2,[sp,#24]
+	and	r12,r12,r3
+	add	r10,r10,r6
+	add	r6,r6,r0,ror#2
+	eor	r12,r12,r8
+	add	r5,r5,r2
+	eor	r2,r11,r4
+	eor	r0,r10,r10,ror#5
+	add	r6,r6,r12
+	and	r2,r2,r10
+	eor	r12,r0,r10,ror#19
+	eor	r0,r6,r6,ror#11
+	eor	r2,r2,r4
+	add	r5,r5,r12,ror#6
+	eor	r12,r6,r7
+	eor	r0,r0,r6,ror#20
+	add	r5,r5,r2
+	ldr	r2,[sp,#28]
+	and	r3,r3,r12
+	add	r9,r9,r5
+	add	r5,r5,r0,ror#2
+	eor	r3,r3,r7
+	add	r4,r4,r2
+	eor	r2,r10,r11
+	eor	r0,r9,r9,ror#5
+	add	r5,r5,r3
+	and	r2,r2,r9
+	eor	r3,r0,r9,ror#19
+	eor	r0,r5,r5,ror#11
+	eor	r2,r2,r11
+	add	r4,r4,r3,ror#6
+	eor	r3,r5,r6
+	eor	r0,r0,r5,ror#20
+	add	r4,r4,r2
+	ldr	r2,[sp,#32]
+	and	r12,r12,r3
+	add	r8,r8,r4
+	add	r4,r4,r0,ror#2
+	eor	r12,r12,r6
+	vst1.32	{q8},[r1,:128]!
+	add	r11,r11,r2
+	eor	r2,r9,r10
+	eor	r0,r8,r8,ror#5
+	add	r4,r4,r12
+	vld1.32	{q8},[r14,:128]!
+	and	r2,r2,r8
+	eor	r12,r0,r8,ror#19
+	eor	r0,r4,r4,ror#11
+	eor	r2,r2,r10
+	vrev32.8	q2,q2
+	add	r11,r11,r12,ror#6
+	eor	r12,r4,r5
+	eor	r0,r0,r4,ror#20
+	add	r11,r11,r2
+	vadd.i32	q8,q8,q2
+	ldr	r2,[sp,#36]
+	and	r3,r3,r12
+	add	r7,r7,r11
+	add	r11,r11,r0,ror#2
+	eor	r3,r3,r5
+	add	r10,r10,r2
+	eor	r2,r8,r9
+	eor	r0,r7,r7,ror#5
+	add	r11,r11,r3
+	and	r2,r2,r7
+	eor	r3,r0,r7,ror#19
+	eor	r0,r11,r11,ror#11
+	eor	r2,r2,r9
+	add	r10,r10,r3,ror#6
+	eor	r3,r11,r4
+	eor	r0,r0,r11,ror#20
+	add	r10,r10,r2
+	ldr	r2,[sp,#40]
+	and	r12,r12,r3
+	add	r6,r6,r10
+	add	r10,r10,r0,ror#2
+	eor	r12,r12,r4
+	add	r9,r9,r2
+	eor	r2,r7,r8
+	eor	r0,r6,r6,ror#5
+	add	r10,r10,r12
+	and	r2,r2,r6
+	eor	r12,r0,r6,ror#19
+	eor	r0,r10,r10,ror#11
+	eor	r2,r2,r8
+	add	r9,r9,r12,ror#6
+	eor	r12,r10,r11
+	eor	r0,r0,r10,ror#20
+	add	r9,r9,r2
+	ldr	r2,[sp,#44]
+	and	r3,r3,r12
+	add	r5,r5,r9
+	add	r9,r9,r0,ror#2
+	eor	r3,r3,r11
+	add	r8,r8,r2
+	eor	r2,r6,r7
+	eor	r0,r5,r5,ror#5
+	add	r9,r9,r3
+	and	r2,r2,r5
+	eor	r3,r0,r5,ror#19
+	eor	r0,r9,r9,ror#11
+	eor	r2,r2,r7
+	add	r8,r8,r3,ror#6
+	eor	r3,r9,r10
+	eor	r0,r0,r9,ror#20
+	add	r8,r8,r2
+	ldr	r2,[sp,#48]
+	and	r12,r12,r3
+	add	r4,r4,r8
+	add	r8,r8,r0,ror#2
+	eor	r12,r12,r10
+	vst1.32	{q8},[r1,:128]!
+	add	r7,r7,r2
+	eor	r2,r5,r6
+	eor	r0,r4,r4,ror#5
+	add	r8,r8,r12
+	vld1.32	{q8},[r14,:128]!
+	and	r2,r2,r4
+	eor	r12,r0,r4,ror#19
+	eor	r0,r8,r8,ror#11
+	eor	r2,r2,r6
+	vrev32.8	q3,q3
+	add	r7,r7,r12,ror#6
+	eor	r12,r8,r9
+	eor	r0,r0,r8,ror#20
+	add	r7,r7,r2
+	vadd.i32	q8,q8,q3
+	ldr	r2,[sp,#52]
+	and	r3,r3,r12
+	add	r11,r11,r7
+	add	r7,r7,r0,ror#2
+	eor	r3,r3,r9
+	add	r6,r6,r2
+	eor	r2,r4,r5
+	eor	r0,r11,r11,ror#5
+	add	r7,r7,r3
+	and	r2,r2,r11
+	eor	r3,r0,r11,ror#19
+	eor	r0,r7,r7,ror#11
+	eor	r2,r2,r5
+	add	r6,r6,r3,ror#6
+	eor	r3,r7,r8
+	eor	r0,r0,r7,ror#20
+	add	r6,r6,r2
+	ldr	r2,[sp,#56]
+	and	r12,r12,r3
+	add	r10,r10,r6
+	add	r6,r6,r0,ror#2
+	eor	r12,r12,r8
+	add	r5,r5,r2
+	eor	r2,r11,r4
+	eor	r0,r10,r10,ror#5
+	add	r6,r6,r12
+	and	r2,r2,r10
+	eor	r12,r0,r10,ror#19
+	eor	r0,r6,r6,ror#11
+	eor	r2,r2,r4
+	add	r5,r5,r12,ror#6
+	eor	r12,r6,r7
+	eor	r0,r0,r6,ror#20
+	add	r5,r5,r2
+	ldr	r2,[sp,#60]
+	and	r3,r3,r12
+	add	r9,r9,r5
+	add	r5,r5,r0,ror#2
+	eor	r3,r3,r7
+	add	r4,r4,r2
+	eor	r2,r10,r11
+	eor	r0,r9,r9,ror#5
+	add	r5,r5,r3
+	and	r2,r2,r9
+	eor	r3,r0,r9,ror#19
+	eor	r0,r5,r5,ror#11
+	eor	r2,r2,r11
+	add	r4,r4,r3,ror#6
+	eor	r3,r5,r6
+	eor	r0,r0,r5,ror#20
+	add	r4,r4,r2
+	ldr	r2,[sp,#64]
+	and	r12,r12,r3
+	add	r8,r8,r4
+	add	r4,r4,r0,ror#2
+	eor	r12,r12,r6
+	vst1.32	{q8},[r1,:128]!
+	ldr	r0,[r2,#0]
+	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
+	ldr	r12,[r2,#4]
+	ldr	r3,[r2,#8]
+	ldr	r1,[r2,#12]
+	add	r4,r4,r0			@ accumulate
+	ldr	r0,[r2,#16]
+	add	r5,r5,r12
+	ldr	r12,[r2,#20]
+	add	r6,r6,r3
+	ldr	r3,[r2,#24]
+	add	r7,r7,r1
+	ldr	r1,[r2,#28]
+	add	r8,r8,r0
+	str	r4,[r2],#4
+	add	r9,r9,r12
+	str	r5,[r2],#4
+	add	r10,r10,r3
+	str	r6,[r2],#4
+	add	r11,r11,r1
+	str	r7,[r2],#4
+	stmia	r2,{r8-r11}
+
+	ittte	ne
+	movne	r1,sp
+	ldrne	r2,[sp,#0]
+	eorne	r12,r12,r12
+	ldreq	sp,[sp,#76]			@ restore original sp
+	itt	ne
+	eorne	r3,r5,r6
+	bne	.L_00_48
+
+	ldmia	sp!,{r4-r12,pc}
+.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
+#endif
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+
+# ifdef __thumb2__
+#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
+# else
+#  define INST(a,b,c,d)	.byte	a,b,c,d
+# endif
+
+.type	sha256_block_data_order_armv8,%function
+.align	5
+sha256_block_data_order_armv8:
+.LARMv8:
+	vld1.32	{q0,q1},[r0]
+# ifdef __thumb2__
+	adr	r3,.LARMv8
+	sub	r3,r3,#.LARMv8-K256
+# else
+	adrl	r3,K256
+# endif
+	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
+
+.Loop_v8:
+	vld1.8		{q8-q9},[r1]!
+	vld1.8		{q10-q11},[r1]!
+	vld1.32		{q12},[r3]!
+	vrev32.8	q8,q8
+	vrev32.8	q9,q9
+	vrev32.8	q10,q10
+	vrev32.8	q11,q11
+	vmov		q14,q0	@ offload
+	vmov		q15,q1
+	teq		r1,r2
+	vld1.32		{q13},[r3]!
+	vadd.i32	q12,q12,q8
+	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
+	vmov		q2,q0
+	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
+	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
+	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
+	vld1.32		{q12},[r3]!
+	vadd.i32	q13,q13,q9
+	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
+	vmov		q2,q0
+	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
+	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
+	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
+	vld1.32		{q13},[r3]!
+	vadd.i32	q12,q12,q10
+	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
+	vmov		q2,q0
+	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
+	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
+	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
+	vld1.32		{q12},[r3]!
+	vadd.i32	q13,q13,q11
+	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
+	vmov		q2,q0
+	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
+	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
+	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
+	vld1.32		{q13},[r3]!
+	vadd.i32	q12,q12,q8
+	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
+	vmov		q2,q0
+	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
+	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
+	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
+	vld1.32		{q12},[r3]!
+	vadd.i32	q13,q13,q9
+	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
+	vmov		q2,q0
+	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
+	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
+	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
+	vld1.32		{q13},[r3]!
+	vadd.i32	q12,q12,q10
+	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
+	vmov		q2,q0
+	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
+	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
+	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
+	vld1.32		{q12},[r3]!
+	vadd.i32	q13,q13,q11
+	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
+	vmov		q2,q0
+	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
+	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
+	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
+	vld1.32		{q13},[r3]!
+	vadd.i32	q12,q12,q8
+	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
+	vmov		q2,q0
+	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
+	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
+	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
+	vld1.32		{q12},[r3]!
+	vadd.i32	q13,q13,q9
+	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
+	vmov		q2,q0
+	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
+	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
+	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
+	vld1.32		{q13},[r3]!
+	vadd.i32	q12,q12,q10
+	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
+	vmov		q2,q0
+	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
+	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
+	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
+	vld1.32		{q12},[r3]!
+	vadd.i32	q13,q13,q11
+	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
+	vmov		q2,q0
+	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
+	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
+	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
+	vld1.32		{q13},[r3]!
+	vadd.i32	q12,q12,q8
+	vmov		q2,q0
+	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
+	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
+
+	vld1.32		{q12},[r3]!
+	vadd.i32	q13,q13,q9
+	vmov		q2,q0
+	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
+	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
+
+	vld1.32		{q13},[r3]
+	vadd.i32	q12,q12,q10
+	sub		r3,r3,#256-16	@ rewind
+	vmov		q2,q0
+	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
+	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
+
+	vadd.i32	q13,q13,q11
+	vmov		q2,q0
+	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
+	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
+
+	vadd.i32	q0,q0,q14
+	vadd.i32	q1,q1,q15
+	it		ne
+	bne		.Loop_v8
+
+	vst1.32		{q0,q1},[r0]
+
+	bx	lr		@ bx lr
+.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
+#endif
+.asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
+.align	2
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.comm   OPENSSL_armcap_P,4,4
+#endif
diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c
new file mode 100644
index 000000000000..a84e869ef900
--- /dev/null
+++ b/arch/arm/crypto/sha256_glue.c
@@ -0,0 +1,128 @@
+/*
+ * Glue code for the SHA256 Secure Hash Algorithm assembly implementation
+ * using optimized ARM assembler and NEON instructions.
+ *
+ * Copyright © 2015 Google Inc.
+ *
+ * This file is based on sha256_ssse3_glue.c:
+ *   Copyright (C) 2013 Intel Corporation
+ *   Author: Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <crypto/sha.h>
+#include <crypto/sha256_base.h>
+#include <asm/simd.h>
+#include <asm/neon.h>
+
+#include "sha256_glue.h"
+
+asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
+					unsigned int num_blks);
+
+int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data,
+			     unsigned int len)
+{
+	/* make sure casting to sha256_block_fn() is safe */
+	BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
+
+	return sha256_base_do_update(desc, data, len,
+				(sha256_block_fn *)sha256_block_data_order);
+}
+EXPORT_SYMBOL(crypto_sha256_arm_update);
+
+static int sha256_final(struct shash_desc *desc, u8 *out)
+{
+	sha256_base_do_finalize(desc,
+				(sha256_block_fn *)sha256_block_data_order);
+	return sha256_base_finish(desc, out);
+}
+
+int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data,
+			    unsigned int len, u8 *out)
+{
+	sha256_base_do_update(desc, data, len,
+			      (sha256_block_fn *)sha256_block_data_order);
+	return sha256_final(desc, out);
+}
+EXPORT_SYMBOL(crypto_sha256_arm_finup);
+
+static struct shash_alg algs[] = { {
+	.digestsize	=	SHA256_DIGEST_SIZE,
+	.init		=	sha256_base_init,
+	.update		=	crypto_sha256_arm_update,
+	.final		=	sha256_final,
+	.finup		=	crypto_sha256_arm_finup,
+	.descsize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha256",
+		.cra_driver_name =	"sha256-asm",
+		.cra_priority	=	150,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA256_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+}, {
+	.digestsize	=	SHA224_DIGEST_SIZE,
+	.init		=	sha224_base_init,
+	.update		=	crypto_sha256_arm_update,
+	.final		=	sha256_final,
+	.finup		=	crypto_sha256_arm_finup,
+	.descsize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha224",
+		.cra_driver_name =	"sha224-asm",
+		.cra_priority	=	150,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA224_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+} };
+
+static int __init sha256_mod_init(void)
+{
+	int res = crypto_register_shashes(algs, ARRAY_SIZE(algs));
+
+	if (res < 0)
+		return res;
+
+	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) {
+		res = crypto_register_shashes(sha256_neon_algs,
+					      ARRAY_SIZE(sha256_neon_algs));
+
+		if (res < 0)
+			crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+	}
+
+	return res;
+}
+
+static void __exit sha256_mod_fini(void)
+{
+	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+
+	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon())
+		crypto_unregister_shashes(sha256_neon_algs,
+					  ARRAY_SIZE(sha256_neon_algs));
+}
+
+module_init(sha256_mod_init);
+module_exit(sha256_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm (ARM), including NEON");
+
+MODULE_ALIAS_CRYPTO("sha256");
diff --git a/arch/arm/crypto/sha256_glue.h b/arch/arm/crypto/sha256_glue.h
new file mode 100644
index 000000000000..7cf0bf786ada
--- /dev/null
+++ b/arch/arm/crypto/sha256_glue.h
@@ -0,0 +1,14 @@
+#ifndef _CRYPTO_SHA256_GLUE_H
+#define _CRYPTO_SHA256_GLUE_H
+
+#include <linux/crypto.h>
+
+extern struct shash_alg sha256_neon_algs[2];
+
+int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data,
+			     unsigned int len);
+
+int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data,
+			    unsigned int len, u8 *hash);
+
+#endif /* _CRYPTO_SHA256_GLUE_H */
diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c
new file mode 100644
index 000000000000..39ccd658817e
--- /dev/null
+++ b/arch/arm/crypto/sha256_neon_glue.c
@@ -0,0 +1,101 @@
+/*
+ * Glue code for the SHA256 Secure Hash Algorithm assembly implementation
+ * using NEON instructions.
+ *
+ * Copyright © 2015 Google Inc.
+ *
+ * This file is based on sha512_neon_glue.c:
+ *   Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <crypto/sha.h>
+#include <crypto/sha256_base.h>
+#include <asm/byteorder.h>
+#include <asm/simd.h>
+#include <asm/neon.h>
+
+#include "sha256_glue.h"
+
+asmlinkage void sha256_block_data_order_neon(u32 *digest, const void *data,
+					     unsigned int num_blks);
+
+static int sha256_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int len)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	if (!may_use_simd() ||
+	    (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
+		return crypto_sha256_arm_update(desc, data, len);
+
+	kernel_neon_begin();
+	sha256_base_do_update(desc, data, len,
+			(sha256_block_fn *)sha256_block_data_order_neon);
+	kernel_neon_end();
+
+	return 0;
+}
+
+static int sha256_finup(struct shash_desc *desc, const u8 *data,
+			unsigned int len, u8 *out)
+{
+	if (!may_use_simd())
+		return crypto_sha256_arm_finup(desc, data, len, out);
+
+	kernel_neon_begin();
+	if (len)
+		sha256_base_do_update(desc, data, len,
+			(sha256_block_fn *)sha256_block_data_order_neon);
+	sha256_base_do_finalize(desc,
+			(sha256_block_fn *)sha256_block_data_order_neon);
+	kernel_neon_end();
+
+	return sha256_base_finish(desc, out);
+}
+
+static int sha256_final(struct shash_desc *desc, u8 *out)
+{
+	return sha256_finup(desc, NULL, 0, out);
+}
+
+struct shash_alg sha256_neon_algs[] = { {
+	.digestsize	=	SHA256_DIGEST_SIZE,
+	.init		=	sha256_base_init,
+	.update		=	sha256_update,
+	.final		=	sha256_final,
+	.finup		=	sha256_finup,
+	.descsize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha256",
+		.cra_driver_name =	"sha256-neon",
+		.cra_priority	=	250,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA256_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+}, {
+	.digestsize	=	SHA224_DIGEST_SIZE,
+	.init		=	sha224_base_init,
+	.update		=	sha256_update,
+	.final		=	sha256_final,
+	.finup		=	sha256_finup,
+	.descsize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha224",
+		.cra_driver_name =	"sha224-neon",
+		.cra_priority	=	250,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA224_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+} };
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index fe74c0d1e485..3c4596d0ce6c 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -1,6 +1,5 @@
 
 
-generic-y += auxvec.h
 generic-y += bitsperlong.h
 generic-y += cputime.h
 generic-y += current.h
@@ -22,6 +21,7 @@ generic-y += preempt.h
 generic-y += resource.h
 generic-y += rwsem.h
 generic-y += scatterlist.h
+generic-y += seccomp.h
 generic-y += sections.h
 generic-y += segment.h
 generic-y += sembuf.h
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index f67fd3afebdf..186270b3e194 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -237,6 +237,9 @@
 	.pushsection ".alt.smp.init", "a"			;\
 	.long	9998b						;\
 9997:	instr							;\
+	.if . - 9997b == 2					;\
+		nop						;\
+	.endif							;\
 	.if . - 9997b != 4					;\
 		.error "ALT_UP() content must assemble to exactly 4 bytes";\
 	.endif							;\
diff --git a/arch/arm/include/asm/auxvec.h b/arch/arm/include/asm/auxvec.h
new file mode 100644
index 000000000000..fbd388c46299
--- /dev/null
+++ b/arch/arm/include/asm/auxvec.h
@@ -0,0 +1 @@
+#include <uapi/asm/auxvec.h>
diff --git a/arch/arm/include/asm/cpuidle.h b/arch/arm/include/asm/cpuidle.h
index af319ac4960c..0f8424924902 100644
--- a/arch/arm/include/asm/cpuidle.h
+++ b/arch/arm/include/asm/cpuidle.h
@@ -1,6 +1,8 @@
 #ifndef __ASM_ARM_CPUIDLE_H
 #define __ASM_ARM_CPUIDLE_H
 
+#include <asm/proc-fns.h>
+
 #ifdef CONFIG_CPU_IDLE
 extern int arm_cpuidle_simple_enter(struct cpuidle_device *dev,
 		struct cpuidle_driver *drv, int index);
@@ -25,4 +27,25 @@ static inline int arm_cpuidle_simple_enter(struct cpuidle_device *dev,
  */
 #define ARM_CPUIDLE_WFI_STATE ARM_CPUIDLE_WFI_STATE_PWR(UINT_MAX)
 
+struct device_node;
+
+struct cpuidle_ops {
+	int (*suspend)(int cpu, unsigned long arg);
+	int (*init)(struct device_node *, int cpu);
+};
+
+struct of_cpuidle_method {
+	const char *method;
+	struct cpuidle_ops *ops;
+};
+
+#define CPUIDLE_METHOD_OF_DECLARE(name, _method, _ops)			\
+	static const struct of_cpuidle_method __cpuidle_method_of_table_##name \
+	__used __section(__cpuidle_method_of_table)			\
+	= { .method = _method, .ops = _ops }
+
+extern int arm_cpuidle_suspend(int index);
+
+extern int arm_cpuidle_init(int cpu);
+
 #endif
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index 819777d0e91f..85e374f873ac 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -253,4 +253,20 @@ static inline int cpu_is_pj4(void)
 #else
 #define cpu_is_pj4()	0
 #endif
+
+static inline int __attribute_const__ cpuid_feature_extract_field(u32 features,
+								  int field)
+{
+	int feature = (features >> field) & 15;
+
+	/* feature registers are signed values */
+	if (feature > 8)
+		feature -= 16;
+
+	return feature;
+}
+
+#define cpuid_feature_extract(reg, field) \
+	cpuid_feature_extract_field(read_cpuid_ext(reg), field)
+
 #endif
diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h
index afb9cafd3786..d2315ffd8f12 100644
--- a/arch/arm/include/asm/elf.h
+++ b/arch/arm/include/asm/elf.h
@@ -1,7 +1,9 @@
 #ifndef __ASMARM_ELF_H
 #define __ASMARM_ELF_H
 
+#include <asm/auxvec.h>
 #include <asm/hwcap.h>
+#include <asm/vdso_datapage.h>
 
 /*
  * ELF register definitions..
@@ -115,7 +117,7 @@ int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs);
    the loader.  We need to make sure that it is out of the way of the program
    that it will "exec", and that there is sufficient room for the brk.  */
 
-#define ELF_ET_DYN_BASE	(2 * TASK_SIZE / 3)
+#define ELF_ET_DYN_BASE	(TASK_SIZE / 3 * 2)
 
 /* When the program starts, a1 contains a pointer to a function to be 
    registered with atexit, as per the SVR4 ABI.  A value of 0 means we 
@@ -125,11 +127,14 @@ int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs);
 extern void elf_set_personality(const struct elf32_hdr *);
 #define SET_PERSONALITY(ex)	elf_set_personality(&(ex))
 
-struct mm_struct;
-extern unsigned long arch_randomize_brk(struct mm_struct *mm);
-#define arch_randomize_brk arch_randomize_brk
-
 #ifdef CONFIG_MMU
+#ifdef CONFIG_VDSO
+#define ARCH_DLINFO						\
+do {								\
+	NEW_AUX_ENT(AT_SYSINFO_EHDR,				\
+		    (elf_addr_t)current->mm->context.vdso);	\
+} while (0)
+#endif
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
 struct linux_binprm;
 int arch_setup_additional_pages(struct linux_binprm *, int);
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index 53e69dae796f..4e78065a16aa 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -13,7 +13,7 @@
 	"	.align	3\n"					\
 	"	.long	1b, 4f, 2b, 4f\n"			\
 	"	.popsection\n"					\
-	"	.pushsection .fixup,\"ax\"\n"			\
+	"	.pushsection .text.fixup,\"ax\"\n"		\
 	"	.align	2\n"					\
 	"4:	mov	%0, " err_reg "\n"			\
 	"	b	3b\n"					\
diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h
index 70f9b9bfb1f9..5f337dc5c108 100644
--- a/arch/arm/include/asm/jump_label.h
+++ b/arch/arm/include/asm/jump_label.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_ARM_JUMP_LABEL_H
 #define _ASM_ARM_JUMP_LABEL_H
 
-#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
 
 #include <linux/types.h>
 
@@ -27,8 +27,6 @@ l_yes:
 	return true;
 }
 
-#endif /* __KERNEL__ */
-
 typedef u32 jump_label_t;
 
 struct jump_entry {
@@ -37,4 +35,5 @@ struct jump_entry {
 	jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 816db0bf2dd8..d995821f1698 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -185,6 +185,7 @@
 #define HSR_COND	(0xfU << HSR_COND_SHIFT)
 
 #define FSC_FAULT	(0x04)
+#define FSC_ACCESS	(0x08)
 #define FSC_PERM	(0x0c)
 
 /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 41008cd7c53f..d71607c16601 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -27,6 +27,8 @@
 #include <asm/fpstate.h>
 #include <kvm/arm_arch_timer.h>
 
+#define __KVM_HAVE_ARCH_INTC_INITIALIZED
+
 #if defined(CONFIG_KVM_ARM_MAX_VCPUS)
 #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
 #else
@@ -165,19 +167,10 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 
 /* We do not have shadow page tables, hence the empty hooks */
-static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
-			      unsigned long end)
-{
-	return 0;
-}
-
-static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
-{
-	return 0;
-}
-
 static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 							 unsigned long address)
 {
diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h
index 3f83db2f6cf0..d8e90c8cb5fa 100644
--- a/arch/arm/include/asm/kvm_mmio.h
+++ b/arch/arm/include/asm/kvm_mmio.h
@@ -28,28 +28,6 @@ struct kvm_decode {
 	bool sign_extend;
 };
 
-/*
- * The in-kernel MMIO emulation code wants to use a copy of run->mmio,
- * which is an anonymous type. Use our own type instead.
- */
-struct kvm_exit_mmio {
-	phys_addr_t	phys_addr;
-	u8		data[8];
-	u32		len;
-	bool		is_write;
-	void		*private;
-};
-
-static inline void kvm_prepare_mmio(struct kvm_run *run,
-				    struct kvm_exit_mmio *mmio)
-{
-	run->mmio.phys_addr	= mmio->phys_addr;
-	run->mmio.len		= mmio->len;
-	run->mmio.is_write	= mmio->is_write;
-	memcpy(run->mmio.data, mmio->data, mmio->len);
-	run->exit_reason	= KVM_EXIT_MMIO;
-}
-
 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		 phys_addr_t fault_ipa);
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index bf0fe99e8ca9..405aa1883307 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -149,29 +149,28 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
 	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
 })
 
+#define kvm_pgd_index(addr)			pgd_index(addr)
+
 static inline bool kvm_page_empty(void *ptr)
 {
 	struct page *ptr_page = virt_to_page(ptr);
 	return page_count(ptr_page) == 1;
 }
 
-
 #define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
 #define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
 #define kvm_pud_table_empty(kvm, pudp) (0)
 
 #define KVM_PREALLOC_LEVEL	0
 
-static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
+static inline void *kvm_get_hwpgd(struct kvm *kvm)
 {
-	return 0;
+	return kvm->arch.pgd;
 }
 
-static inline void kvm_free_hwpgd(struct kvm *kvm) { }
-
-static inline void *kvm_get_hwpgd(struct kvm *kvm)
+static inline unsigned int kvm_get_hwpgd_size(void)
 {
-	return kvm->arch.pgd;
+	return PTRS_PER_S2_PGD * sizeof(pgd_t);
 }
 
 struct kvm;
@@ -270,6 +269,16 @@ static inline void __kvm_flush_dcache_pud(pud_t pud)
 void kvm_set_way_flush(struct kvm_vcpu *vcpu);
 void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
 
+static inline bool __kvm_cpu_uses_extended_idmap(void)
+{
+	return false;
+}
+
+static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
+				       pgd_t *hyp_pgd,
+				       pgd_t *merged_hyp_pgd,
+				       unsigned long hyp_idmap_start) { }
+
 #endif	/* !__ASSEMBLY__ */
 
 #endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/include/asm/mach/time.h b/arch/arm/include/asm/mach/time.h
index 90c12e1e695c..0f79e4dec7f9 100644
--- a/arch/arm/include/asm/mach/time.h
+++ b/arch/arm/include/asm/mach/time.h
@@ -12,8 +12,7 @@
 
 extern void timer_tick(void);
 
-struct timespec;
-typedef void (*clock_access_fn)(struct timespec *);
+typedef void (*clock_access_fn)(struct timespec64 *);
 extern int register_persistent_clock(clock_access_fn read_boot,
 				     clock_access_fn read_persistent);
 
diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
index 64fd15159b7d..a5b47421059d 100644
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@@ -11,6 +11,9 @@ typedef struct {
 #endif
 	unsigned int	vmalloc_seq;
 	unsigned long	sigpage;
+#ifdef CONFIG_VDSO
+	unsigned long	vdso;
+#endif
 } mm_context_t;
 
 #ifdef CONFIG_CPU_HAS_ASID
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index b1596bd59129..675e4ab79f68 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -92,6 +92,7 @@ struct pmu_hw_events {
 struct arm_pmu {
 	struct pmu	pmu;
 	cpumask_t	active_irqs;
+	int		*irq_affinity;
 	char		*name;
 	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
 	void		(*enable)(struct perf_event *event);
diff --git a/arch/arm/include/asm/seccomp.h b/arch/arm/include/asm/seccomp.h
deleted file mode 100644
index 52b156b341f5..000000000000
--- a/arch/arm/include/asm/seccomp.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _ASM_ARM_SECCOMP_H
-#define _ASM_ARM_SECCOMP_H
-
-#include <linux/unistd.h>
-
-#define __NR_seccomp_read __NR_read
-#define __NR_seccomp_write __NR_write
-#define __NR_seccomp_exit __NR_exit
-#define __NR_seccomp_sigreturn __NR_rt_sigreturn
-
-#endif /* _ASM_ARM_SECCOMP_H */
diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h
index 0ad7d490ee6f..993e5224d8f7 100644
--- a/arch/arm/include/asm/smp_plat.h
+++ b/arch/arm/include/asm/smp_plat.h
@@ -104,6 +104,7 @@ static inline u32 mpidr_hash_size(void)
 	return 1 << mpidr_hash.bits;
 }
 
+extern int platform_can_secondary_boot(void);
 extern int platform_can_cpu_hotplug(void);
 
 #endif
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 72812a1f3d1c..bd32eded3e50 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -23,7 +23,6 @@
 #ifndef __ASSEMBLY__
 
 struct task_struct;
-struct exec_domain;
 
 #include <asm/types.h>
 #include <asm/domain.h>
@@ -53,7 +52,6 @@ struct thread_info {
 	int			preempt_count;	/* 0 => preemptable, <0 => bug */
 	mm_segment_t		addr_limit;	/* address limit */
 	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
 	__u32			cpu;		/* cpu */
 	__u32			cpu_domain;	/* cpu domain */
 	struct cpu_context_save	cpu_context;	/* cpu context */
@@ -73,7 +71,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)						\
 {									\
 	.task		= &tsk,						\
-	.exec_domain	= &default_exec_domain,				\
 	.flags		= 0,						\
 	.preempt_count	= INIT_PREEMPT_COUNT,				\
 	.addr_limit	= KERNEL_DS,					\
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index ce0786efd26c..74b17d09ef7a 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -315,7 +315,7 @@ do {									\
 	__asm__ __volatile__(					\
 	"1:	" TUSER(ldrb) "	%1,[%2],#0\n"			\
 	"2:\n"							\
-	"	.pushsection .fixup,\"ax\"\n"			\
+	"	.pushsection .text.fixup,\"ax\"\n"		\
 	"	.align	2\n"					\
 	"3:	mov	%0, %3\n"				\
 	"	mov	%1, #0\n"				\
@@ -351,7 +351,7 @@ do {									\
 	__asm__ __volatile__(					\
 	"1:	" TUSER(ldr) "	%1,[%2],#0\n"			\
 	"2:\n"							\
-	"	.pushsection .fixup,\"ax\"\n"			\
+	"	.pushsection .text.fixup,\"ax\"\n"		\
 	"	.align	2\n"					\
 	"3:	mov	%0, %3\n"				\
 	"	mov	%1, #0\n"				\
@@ -397,7 +397,7 @@ do {									\
 	__asm__ __volatile__(					\
 	"1:	" TUSER(strb) "	%1,[%2],#0\n"			\
 	"2:\n"							\
-	"	.pushsection .fixup,\"ax\"\n"			\
+	"	.pushsection .text.fixup,\"ax\"\n"		\
 	"	.align	2\n"					\
 	"3:	mov	%0, %3\n"				\
 	"	b	2b\n"					\
@@ -430,7 +430,7 @@ do {									\
 	__asm__ __volatile__(					\
 	"1:	" TUSER(str) "	%1,[%2],#0\n"			\
 	"2:\n"							\
-	"	.pushsection .fixup,\"ax\"\n"			\
+	"	.pushsection .text.fixup,\"ax\"\n"		\
 	"	.align	2\n"					\
 	"3:	mov	%0, %3\n"				\
 	"	b	2b\n"					\
@@ -458,7 +458,7 @@ do {									\
  THUMB(	"1:	" TUSER(str) "	" __reg_oper1 ", [%1]\n"	) \
  THUMB(	"2:	" TUSER(str) "	" __reg_oper0 ", [%1, #4]\n"	) \
 	"3:\n"							\
-	"	.pushsection .fixup,\"ax\"\n"			\
+	"	.pushsection .text.fixup,\"ax\"\n"		\
 	"	.align	2\n"					\
 	"4:	mov	%0, %3\n"				\
 	"	b	3b\n"					\
diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h
index b88beaba6b4a..200f9a7cd623 100644
--- a/arch/arm/include/asm/unified.h
+++ b/arch/arm/include/asm/unified.h
@@ -24,6 +24,14 @@
 	.syntax unified
 #endif
 
+#ifdef CONFIG_CPU_V7M
+#define AR_CLASS(x...)
+#define M_CLASS(x...)	x
+#else
+#define AR_CLASS(x...)	x
+#define M_CLASS(x...)
+#endif
+
 #ifdef CONFIG_THUMB2_KERNEL
 
 #if __GNUC__ < 4
diff --git a/arch/arm/include/asm/vdso.h b/arch/arm/include/asm/vdso.h
new file mode 100644
index 000000000000..d0295f1dd1a3
--- /dev/null
+++ b/arch/arm/include/asm/vdso.h
@@ -0,0 +1,32 @@
+#ifndef __ASM_VDSO_H
+#define __ASM_VDSO_H
+
+#ifdef __KERNEL__
+
+#ifndef __ASSEMBLY__
+
+struct mm_struct;
+
+#ifdef CONFIG_VDSO
+
+void arm_install_vdso(struct mm_struct *mm, unsigned long addr);
+
+extern char vdso_start, vdso_end;
+
+extern unsigned int vdso_total_pages;
+
+#else /* CONFIG_VDSO */
+
+static inline void arm_install_vdso(struct mm_struct *mm, unsigned long addr)
+{
+}
+
+#define vdso_total_pages 0
+
+#endif /* CONFIG_VDSO */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASM_VDSO_H */
diff --git a/arch/arm/include/asm/vdso_datapage.h b/arch/arm/include/asm/vdso_datapage.h
new file mode 100644
index 000000000000..9be259442fca
--- /dev/null
+++ b/arch/arm/include/asm/vdso_datapage.h
@@ -0,0 +1,60 @@
+/*
+ * Adapted from arm64 version.
+ *
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_VDSO_DATAPAGE_H
+#define __ASM_VDSO_DATAPAGE_H
+
+#ifdef __KERNEL__
+
+#ifndef __ASSEMBLY__
+
+#include <asm/page.h>
+
+/* Try to be cache-friendly on systems that don't implement the
+ * generic timer: fit the unconditionally updated fields in the first
+ * 32 bytes.
+ */
+struct vdso_data {
+	u32 seq_count;		/* sequence count - odd during updates */
+	u16 tk_is_cntvct;	/* fall back to syscall if false */
+	u16 cs_shift;		/* clocksource shift */
+	u32 xtime_coarse_sec;	/* coarse time */
+	u32 xtime_coarse_nsec;
+
+	u32 wtm_clock_sec;	/* wall to monotonic offset */
+	u32 wtm_clock_nsec;
+	u32 xtime_clock_sec;	/* CLOCK_REALTIME - seconds */
+	u32 cs_mult;		/* clocksource multiplier */
+
+	u64 cs_cycle_last;	/* last cycle value */
+	u64 cs_mask;		/* clocksource mask */
+
+	u64 xtime_clock_snsec;	/* CLOCK_REALTIME sub-ns base */
+	u32 tz_minuteswest;	/* timezone info for gettimeofday(2) */
+	u32 tz_dsttime;
+};
+
+union vdso_data_store {
+	struct vdso_data data;
+	u8 page[PAGE_SIZE];
+};
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASM_VDSO_DATAPAGE_H */
diff --git a/arch/arm/include/asm/word-at-a-time.h b/arch/arm/include/asm/word-at-a-time.h
index a6d0a29861e7..5831dce4b51c 100644
--- a/arch/arm/include/asm/word-at-a-time.h
+++ b/arch/arm/include/asm/word-at-a-time.h
@@ -71,7 +71,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
 	asm(
 	"1:	ldr	%0, [%2]\n"
 	"2:\n"
-	"	.pushsection .fixup,\"ax\"\n"
+	"	.pushsection .text.fixup,\"ax\"\n"
 	"	.align 2\n"
 	"3:	and	%1, %2, #0x3\n"
 	"	bic	%2, %2, #0x3\n"
diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild
index 70a1c9da30ca..a1c05f93d920 100644
--- a/arch/arm/include/uapi/asm/Kbuild
+++ b/arch/arm/include/uapi/asm/Kbuild
@@ -1,6 +1,7 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+header-y += auxvec.h
 header-y += byteorder.h
 header-y += fcntl.h
 header-y += hwcap.h
diff --git a/arch/arm/include/uapi/asm/auxvec.h b/arch/arm/include/uapi/asm/auxvec.h
new file mode 100644
index 000000000000..cb02a767a500
--- /dev/null
+++ b/arch/arm/include/uapi/asm/auxvec.h
@@ -0,0 +1,7 @@
+#ifndef __ASM_AUXVEC_H
+#define __ASM_AUXVEC_H
+
+/* VDSO location */
+#define AT_SYSINFO_EHDR	33
+
+#endif
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 0db25bc32864..2499867dd0d8 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -198,6 +198,9 @@ struct kvm_arch_memory_slot {
 /* Highest supported SPI, from VGIC_NR_IRQS */
 #define KVM_ARM_IRQ_GIC_MAX		127
 
+/* One single KVM irqchip, ie. the VGIC */
+#define KVM_NR_IRQCHIPS          1
+
 /* PSCI interface */
 #define KVM_PSCI_FN_BASE		0x95c1ba5e
 #define KVM_PSCI_FN(n)			(KVM_PSCI_FN_BASE + (n))
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 902397dd1000..752725dcbf42 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -16,7 +16,7 @@ CFLAGS_REMOVE_return_address.o = -pg
 # Object file lists.
 
 obj-y		:= elf.o entry-common.o irq.o opcodes.o \
-		   process.o ptrace.o return_address.o \
+		   process.o ptrace.o reboot.o return_address.o \
 		   setup.o signal.o sigreturn_codes.o \
 		   stacktrace.o sys_arm.o time.o traps.o
 
@@ -34,7 +34,6 @@ obj-$(CONFIG_CPU_IDLE)		+= cpuidle.o
 obj-$(CONFIG_ISA_DMA_API)	+= dma.o
 obj-$(CONFIG_FIQ)		+= fiq.o fiqasm.o
 obj-$(CONFIG_MODULES)		+= armksyms.o module.o
-obj-$(CONFIG_ARTHUR)		+= arthur.o
 obj-$(CONFIG_ISA_DMA)		+= dma-isa.o
 obj-$(CONFIG_PCI)		+= bios32.o isa.o
 obj-$(CONFIG_ARM_CPU_SUSPEND)	+= sleep.o suspend.o
@@ -75,6 +74,7 @@ obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o perf_event_cpu.o
 CFLAGS_pj4-cp0.o		:= -marm
 AFLAGS_iwmmxt.o			:= -Wa,-mcpu=iwmmxt
 obj-$(CONFIG_ARM_CPU_TOPOLOGY)  += topology.o
+obj-$(CONFIG_VDSO)		+= vdso.o
 
 ifneq ($(CONFIG_ARCH_EBSA110),y)
   obj-y		+= io.o
@@ -86,7 +86,7 @@ obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 
 obj-$(CONFIG_ARM_VIRT_EXT)	+= hyp-stub.o
 ifeq ($(CONFIG_ARM_PSCI),y)
-obj-y				+= psci.o
+obj-y				+= psci.o psci-call.o
 obj-$(CONFIG_SMP)		+= psci_smp.o
 endif
 
diff --git a/arch/arm/kernel/arthur.c b/arch/arm/kernel/arthur.c
deleted file mode 100644
index 321c5291d05f..000000000000
--- a/arch/arm/kernel/arthur.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- *  linux/arch/arm/kernel/arthur.c
- *
- *  Copyright (C) 1998, 1999, 2000, 2001 Philip Blundell
- *
- * Arthur personality
- */
-
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/personality.h>
-#include <linux/stddef.h>
-#include <linux/signal.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-
-#include <asm/ptrace.h>
-
-/* Arthur doesn't have many signals, and a lot of those that it does
-   have don't map easily to any Linux equivalent.  Never mind.  */
-
-#define ARTHUR_SIGABRT		1
-#define ARTHUR_SIGFPE		2
-#define ARTHUR_SIGILL		3
-#define ARTHUR_SIGINT		4
-#define ARTHUR_SIGSEGV		5
-#define ARTHUR_SIGTERM		6
-#define ARTHUR_SIGSTAK		7
-#define ARTHUR_SIGUSR1		8
-#define ARTHUR_SIGUSR2		9
-#define ARTHUR_SIGOSERROR	10
-
-static unsigned long arthur_to_linux_signals[32] = {
-	0,	1,	2,	3,	4,	5,	6,	7,
-	8,	9,	10,	11,	12,	13,	14,	15,
-	16,	17,	18,	19,	20,	21,	22,	23,
-	24,	25,	26,	27,	28,	29,	30,	31
-};
-
-static unsigned long linux_to_arthur_signals[32] = {
-	0,		-1,		ARTHUR_SIGINT,	-1,
-       	ARTHUR_SIGILL,	5,		ARTHUR_SIGABRT,	7,
-	ARTHUR_SIGFPE,	9,		ARTHUR_SIGUSR1,	ARTHUR_SIGSEGV,	
-	ARTHUR_SIGUSR2,	13,		14,		ARTHUR_SIGTERM,
-	16,		17,		18,		19,
-	20,		21,		22,		23,
-	24,		25,		26,		27,
-	28,		29,		30,		31
-};
-
-static void arthur_lcall7(int nr, struct pt_regs *regs)
-{
-	struct siginfo info;
-	info.si_signo = SIGSWI;
-	info.si_errno = nr;
-	/* Bounce it to the emulator */
-	send_sig_info(SIGSWI, &info, current);
-}
-
-static struct exec_domain arthur_exec_domain = {
-	.name		= "Arthur",
-	.handler	= arthur_lcall7,
-	.pers_low	= PER_RISCOS,
-	.pers_high	= PER_RISCOS,
-	.signal_map	= arthur_to_linux_signals,
-	.signal_invmap	= linux_to_arthur_signals,
-	.module		= THIS_MODULE,
-};
-
-/*
- * We could do with some locking to stop Arthur being removed while
- * processes are using it.
- */
-
-static int __init arthur_init(void)
-{
-	return register_exec_domain(&arthur_exec_domain);
-}
-
-static void __exit arthur_exit(void)
-{
-	unregister_exec_domain(&arthur_exec_domain);
-}
-
-module_init(arthur_init);
-module_exit(arthur_exit);
-
-MODULE_LICENSE("GPL");
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 2d2d6087b9b1..871b8267d211 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -25,6 +25,7 @@
 #include <asm/memory.h>
 #include <asm/procinfo.h>
 #include <asm/suspend.h>
+#include <asm/vdso_datapage.h>
 #include <asm/hardware/cache-l2x0.h>
 #include <linux/kbuild.h>
 
@@ -66,7 +67,6 @@ int main(void)
   DEFINE(TI_PREEMPT,		offsetof(struct thread_info, preempt_count));
   DEFINE(TI_ADDR_LIMIT,		offsetof(struct thread_info, addr_limit));
   DEFINE(TI_TASK,		offsetof(struct thread_info, task));
-  DEFINE(TI_EXEC_DOMAIN,	offsetof(struct thread_info, exec_domain));
   DEFINE(TI_CPU,		offsetof(struct thread_info, cpu));
   DEFINE(TI_CPU_DOMAIN,		offsetof(struct thread_info, cpu_domain));
   DEFINE(TI_CPU_SAVE,		offsetof(struct thread_info, cpu_context));
@@ -190,7 +190,6 @@ int main(void)
   DEFINE(VCPU_HxFAR,		offsetof(struct kvm_vcpu, arch.fault.hxfar));
   DEFINE(VCPU_HPFAR,		offsetof(struct kvm_vcpu, arch.fault.hpfar));
   DEFINE(VCPU_HYP_PC,		offsetof(struct kvm_vcpu, arch.fault.hyp_pc));
-#ifdef CONFIG_KVM_ARM_VGIC
   DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
   DEFINE(VGIC_V2_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
   DEFINE(VGIC_V2_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
@@ -200,15 +199,16 @@ int main(void)
   DEFINE(VGIC_V2_CPU_APR,	offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
   DEFINE(VGIC_V2_CPU_LR,	offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
   DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr));
-#ifdef CONFIG_KVM_ARM_TIMER
   DEFINE(VCPU_TIMER_CNTV_CTL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
   DEFINE(VCPU_TIMER_CNTV_CVAL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
   DEFINE(KVM_TIMER_CNTVOFF,	offsetof(struct kvm, arch.timer.cntvoff));
   DEFINE(KVM_TIMER_ENABLED,	offsetof(struct kvm, arch.timer.enabled));
-#endif
   DEFINE(KVM_VGIC_VCTRL,	offsetof(struct kvm, arch.vgic.vctrl_base));
-#endif
   DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
 #endif
+  BLANK();
+#ifdef CONFIG_VDSO
+  DEFINE(VDSO_DATA_SIZE,	sizeof(union vdso_data_store));
+#endif
   return 0; 
 }
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index ab19b7c03423..fcbbbb1b9e95 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -618,21 +618,15 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
 int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 			enum pci_mmap_state mmap_state, int write_combine)
 {
-	struct pci_sys_data *root = dev->sysdata;
-	unsigned long phys;
-
-	if (mmap_state == pci_mmap_io) {
+	if (mmap_state == pci_mmap_io)
 		return -EINVAL;
-	} else {
-		phys = vma->vm_pgoff + (root->mem_offset >> PAGE_SHIFT);
-	}
 
 	/*
 	 * Mark this as IO
 	 */
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
-	if (remap_pfn_range(vma, vma->vm_start, phys,
+	if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
 			     vma->vm_end - vma->vm_start,
 			     vma->vm_page_prot))
 		return -EAGAIN;
diff --git a/arch/arm/kernel/cpuidle.c b/arch/arm/kernel/cpuidle.c
index 89545f6c8403..318da33465f4 100644
--- a/arch/arm/kernel/cpuidle.c
+++ b/arch/arm/kernel/cpuidle.c
@@ -10,8 +10,28 @@
  */
 
 #include <linux/cpuidle.h>
-#include <asm/proc-fns.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <asm/cpuidle.h>
 
+extern struct of_cpuidle_method __cpuidle_method_of_table[];
+
+static const struct of_cpuidle_method __cpuidle_method_of_table_sentinel
+	__used __section(__cpuidle_method_of_table_end);
+
+static struct cpuidle_ops cpuidle_ops[NR_CPUS];
+
+/**
+ * arm_cpuidle_simple_enter() - a wrapper to cpu_do_idle()
+ * @dev: not used
+ * @drv: not used
+ * @index: not used
+ *
+ * A trivial wrapper to allow the cpu_do_idle function to be assigned as a
+ * cpuidle callback by matching the function signature.
+ *
+ * Returns the index passed as parameter
+ */
 int arm_cpuidle_simple_enter(struct cpuidle_device *dev,
 		struct cpuidle_driver *drv, int index)
 {
@@ -19,3 +39,114 @@ int arm_cpuidle_simple_enter(struct cpuidle_device *dev,
 
 	return index;
 }
+
+/**
+ * arm_cpuidle_suspend() - function to enter low power idle states
+ * @index: an integer used as an identifier for the low level PM callbacks
+ *
+ * This function calls the underlying arch specific low level PM code as
+ * registered at the init time.
+ *
+ * Returns -EOPNOTSUPP if no suspend callback is defined, the result of the
+ * callback otherwise.
+ */
+int arm_cpuidle_suspend(int index)
+{
+	int ret = -EOPNOTSUPP;
+	int cpu = smp_processor_id();
+
+	if (cpuidle_ops[cpu].suspend)
+		ret = cpuidle_ops[cpu].suspend(cpu, index);
+
+	return ret;
+}
+
+/**
+ * arm_cpuidle_get_ops() - find a registered cpuidle_ops by name
+ * @method: the method name
+ *
+ * Search in the __cpuidle_method_of_table array the cpuidle ops matching the
+ * method name.
+ *
+ * Returns a struct cpuidle_ops pointer, NULL if not found.
+ */
+static struct cpuidle_ops *__init arm_cpuidle_get_ops(const char *method)
+{
+	struct of_cpuidle_method *m = __cpuidle_method_of_table;
+
+	for (; m->method; m++)
+		if (!strcmp(m->method, method))
+			return m->ops;
+
+	return NULL;
+}
+
+/**
+ * arm_cpuidle_read_ops() - Initialize the cpuidle ops with the device tree
+ * @dn: a pointer to a struct device node corresponding to a cpu node
+ * @cpu: the cpu identifier
+ *
+ * Get the method name defined in the 'enable-method' property, retrieve the
+ * associated cpuidle_ops and do a struct copy. This copy is needed because all
+ * cpuidle_ops are tagged __initdata and will be unloaded after the init
+ * process.
+ *
+ * Return 0 on sucess, -ENOENT if no 'enable-method' is defined, -EOPNOTSUPP if
+ * no cpuidle_ops is registered for the 'enable-method'.
+ */
+static int __init arm_cpuidle_read_ops(struct device_node *dn, int cpu)
+{
+	const char *enable_method;
+	struct cpuidle_ops *ops;
+
+	enable_method = of_get_property(dn, "enable-method", NULL);
+	if (!enable_method)
+		return -ENOENT;
+
+	ops = arm_cpuidle_get_ops(enable_method);
+	if (!ops) {
+		pr_warn("%s: unsupported enable-method property: %s\n",
+			dn->full_name, enable_method);
+		return -EOPNOTSUPP;
+	}
+
+	cpuidle_ops[cpu] = *ops; /* structure copy */
+
+	pr_notice("cpuidle: enable-method property '%s'"
+		  " found operations\n", enable_method);
+
+	return 0;
+}
+
+/**
+ * arm_cpuidle_init() - Initialize cpuidle_ops for a specific cpu
+ * @cpu: the cpu to be initialized
+ *
+ * Initialize the cpuidle ops with the device for the cpu and then call
+ * the cpu's idle initialization callback. This may fail if the underlying HW
+ * is not operational.
+ *
+ * Returns:
+ *  0 on success,
+ *  -ENODEV if it fails to find the cpu node in the device tree,
+ *  -EOPNOTSUPP if it does not find a registered cpuidle_ops for this cpu,
+ *  -ENOENT if it fails to find an 'enable-method' property,
+ *  -ENXIO if the HW reports a failure or a misconfiguration,
+ *  -ENOMEM if the HW report an memory allocation failure 
+ */
+int __init arm_cpuidle_init(int cpu)
+{
+	struct device_node *cpu_node = of_cpu_device_node_get(cpu);
+	int ret;
+
+	if (!cpu_node)
+		return -ENODEV;
+
+	ret = arm_cpuidle_read_ops(cpu_node, cpu);
+	if (!ret && cpuidle_ops[cpu].init)
+		ret = cpuidle_ops[cpu].init(cpu_node, cpu);
+
+	of_node_put(cpu_node);
+
+	return ret;
+}
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 672b21942fff..570306c49406 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -545,7 +545,7 @@ ENDPROC(__und_usr)
 /*
  * The out of line fixup for the ldrt instructions above.
  */
-	.pushsection .fixup, "ax"
+	.pushsection .text.fixup, "ax"
 	.align	2
 4:	str     r4, [sp, #S_PC]			@ retry current instruction
 	ret	r9
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 01963273c07a..3637973a9708 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -138,9 +138,9 @@ ENTRY(stext)
 						@ mmu has been enabled
 	adr	lr, BSYM(1f)			@ return (PIC) address
 	mov	r8, r4				@ set TTBR1 to swapper_pg_dir
- ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
- THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
- THUMB(	ret	r12				)
+	ldr	r12, [r10, #PROCINFO_INITFUNC]
+	add	r12, r12, r10
+	ret	r12
 1:	b	__enable_mmu
 ENDPROC(stext)
 	.ltorg
@@ -386,10 +386,10 @@ ENTRY(secondary_startup)
 	ldr	r8, [r7, lr]			@ get secondary_data.swapper_pg_dir
 	adr	lr, BSYM(__enable_mmu)		@ return address
 	mov	r13, r12			@ __secondary_switched address
- ARM(	add	pc, r10, #PROCINFO_INITFUNC	) @ initialise processor
-						  @ (return control reg)
- THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
- THUMB(	ret	r12				)
+	ldr	r12, [r10, #PROCINFO_INITFUNC]
+	add	r12, r12, r10			@ initialise processor
+						@ (return control reg)
+	ret	r12
 ENDPROC(secondary_startup)
 ENDPROC(secondary_startup_arm)
 
diff --git a/arch/arm/kernel/hibernate.c b/arch/arm/kernel/hibernate.c
index c4cc50e58c13..a71501ff6f18 100644
--- a/arch/arm/kernel/hibernate.c
+++ b/arch/arm/kernel/hibernate.c
@@ -22,6 +22,7 @@
 #include <asm/suspend.h>
 #include <asm/memory.h>
 #include <asm/sections.h>
+#include "reboot.h"
 
 int pfn_is_nosave(unsigned long pfn)
 {
@@ -61,7 +62,7 @@ static int notrace arch_save_image(unsigned long unused)
 
 	ret = swsusp_save();
 	if (ret == 0)
-		soft_restart(virt_to_phys(cpu_resume));
+		_soft_restart(virt_to_phys(cpu_resume), false);
 	return ret;
 }
 
@@ -86,7 +87,7 @@ static void notrace arch_restore_image(void *unused)
 	for (pbe = restore_pblist; pbe; pbe = pbe->next)
 		copy_page(pbe->orig_address, pbe->address);
 
-	soft_restart(virt_to_phys(cpu_resume));
+	_soft_restart(virt_to_phys(cpu_resume), false);
 }
 
 static u64 resume_stack[PAGE_SIZE/2/sizeof(u64)] __nosavedata;
@@ -99,7 +100,6 @@ static u64 resume_stack[PAGE_SIZE/2/sizeof(u64)] __nosavedata;
  */
 int swsusp_arch_resume(void)
 {
-	extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
 	call_with_stack(arch_restore_image, 0,
 		resume_stack + ARRAY_SIZE(resume_stack));
 	return 0;
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 7fc70ae21185..dc7d0a95bd36 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -648,7 +648,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 		 * Per-cpu breakpoints are not supported by our stepping
 		 * mechanism.
 		 */
-		if (!bp->hw.bp_target)
+		if (!bp->hw.target)
 			return -EINVAL;
 
 		/*
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index de2b085ad753..8bf3b7c09888 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -46,7 +46,8 @@ int machine_kexec_prepare(struct kimage *image)
 	 * and implements CPU hotplug for the current HW. If not, we won't be
 	 * able to kexec reliably, so fail the prepare operation.
 	 */
-	if (num_possible_cpus() > 1 && !platform_can_cpu_hotplug())
+	if (num_possible_cpus() > 1 && platform_can_secondary_boot() &&
+	    !platform_can_cpu_hotplug())
 		return -EINVAL;
 
 	/*
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index 2e11961f65ae..af791f4a6205 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -98,14 +98,19 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 		case R_ARM_PC24:
 		case R_ARM_CALL:
 		case R_ARM_JUMP24:
+			if (sym->st_value & 3) {
+				pr_err("%s: section %u reloc %u sym '%s': unsupported interworking call (ARM -> Thumb)\n",
+				       module->name, relindex, i, symname);
+				return -ENOEXEC;
+			}
+
 			offset = __mem_to_opcode_arm(*(u32 *)loc);
 			offset = (offset & 0x00ffffff) << 2;
 			if (offset & 0x02000000)
 				offset -= 0x04000000;
 
 			offset += sym->st_value - loc;
-			if (offset & 3 ||
-			    offset <= (s32)0xfe000000 ||
+			if (offset <= (s32)0xfe000000 ||
 			    offset >= (s32)0x02000000) {
 				pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
 				       module->name, relindex, i, symname,
@@ -155,6 +160,22 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 #ifdef CONFIG_THUMB2_KERNEL
 		case R_ARM_THM_CALL:
 		case R_ARM_THM_JUMP24:
+			/*
+			 * For function symbols, only Thumb addresses are
+			 * allowed (no interworking).
+			 *
+			 * For non-function symbols, the destination
+			 * has no specific ARM/Thumb disposition, so
+			 * the branch is resolved under the assumption
+			 * that interworking is not required.
+			 */
+			if (ELF32_ST_TYPE(sym->st_info) == STT_FUNC &&
+			    !(sym->st_value & 1)) {
+				pr_err("%s: section %u reloc %u sym '%s': unsupported interworking call (Thumb -> ARM)\n",
+				       module->name, relindex, i, symname);
+				return -ENOEXEC;
+			}
+
 			upper = __mem_to_opcode_thumb16(*(u16 *)loc);
 			lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2));
 
@@ -182,18 +203,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 				offset -= 0x02000000;
 			offset += sym->st_value - loc;
 
-			/*
-			 * For function symbols, only Thumb addresses are
-			 * allowed (no interworking).
-			 *
-			 * For non-function symbols, the destination
-			 * has no specific ARM/Thumb disposition, so
-			 * the branch is resolved under the assumption
-			 * that interworking is not required.
-			 */
-			if ((ELF32_ST_TYPE(sym->st_info) == STT_FUNC &&
-				!(offset & 1)) ||
-			    offset <= (s32)0xff000000 ||
+			if (offset <= (s32)0xff000000 ||
 			    offset >= (s32)0x01000000) {
 				pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
 				       module->name, relindex, i, symname,
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 557e128e4df0..4a86a0133ac3 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -259,20 +259,29 @@ out:
 }
 
 static int
-validate_event(struct pmu_hw_events *hw_events,
-	       struct perf_event *event)
+validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
+			       struct perf_event *event)
 {
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct arm_pmu *armpmu;
 
 	if (is_software_event(event))
 		return 1;
 
+	/*
+	 * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
+	 * core perf code won't check that the pmu->ctx == leader->ctx
+	 * until after pmu->event_init(event).
+	 */
+	if (event->pmu != pmu)
+		return 0;
+
 	if (event->state < PERF_EVENT_STATE_OFF)
 		return 1;
 
 	if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
 		return 1;
 
+	armpmu = to_arm_pmu(event->pmu);
 	return armpmu->get_event_idx(hw_events, event) >= 0;
 }
 
@@ -288,15 +297,15 @@ validate_group(struct perf_event *event)
 	 */
 	memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask));
 
-	if (!validate_event(&fake_pmu, leader))
+	if (!validate_event(event->pmu, &fake_pmu, leader))
 		return -EINVAL;
 
 	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
-		if (!validate_event(&fake_pmu, sibling))
+		if (!validate_event(event->pmu, &fake_pmu, sibling))
 			return -EINVAL;
 	}
 
-	if (!validate_event(&fake_pmu, event))
+	if (!validate_event(event->pmu, &fake_pmu, event))
 		return -EINVAL;
 
 	return 0;
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 61b53c46edfa..91c7ba182dcd 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -92,11 +92,16 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
 		free_percpu_irq(irq, &hw_events->percpu_pmu);
 	} else {
 		for (i = 0; i < irqs; ++i) {
-			if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs))
+			int cpu = i;
+
+			if (cpu_pmu->irq_affinity)
+				cpu = cpu_pmu->irq_affinity[i];
+
+			if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
 				continue;
 			irq = platform_get_irq(pmu_device, i);
 			if (irq >= 0)
-				free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, i));
+				free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
 		}
 	}
 }
@@ -128,32 +133,37 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 		on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1);
 	} else {
 		for (i = 0; i < irqs; ++i) {
+			int cpu = i;
+
 			err = 0;
 			irq = platform_get_irq(pmu_device, i);
 			if (irq < 0)
 				continue;
 
+			if (cpu_pmu->irq_affinity)
+				cpu = cpu_pmu->irq_affinity[i];
+
 			/*
 			 * If we have a single PMU interrupt that we can't shift,
 			 * assume that we're running on a uniprocessor machine and
 			 * continue. Otherwise, continue without this interrupt.
 			 */
-			if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
+			if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
 				pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
-					irq, i);
+					irq, cpu);
 				continue;
 			}
 
 			err = request_irq(irq, handler,
 					  IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
-					  per_cpu_ptr(&hw_events->percpu_pmu, i));
+					  per_cpu_ptr(&hw_events->percpu_pmu, cpu));
 			if (err) {
 				pr_err("unable to request IRQ%d for ARM PMU counters\n",
 					irq);
 				return err;
 			}
 
-			cpumask_set_cpu(i, &cpu_pmu->active_irqs);
+			cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
 		}
 	}
 
@@ -243,6 +253,8 @@ static const struct of_device_id cpu_pmu_of_device_ids[] = {
 	{.compatible = "arm,arm1176-pmu",	.data = armv6_1176_pmu_init},
 	{.compatible = "arm,arm1136-pmu",	.data = armv6_1136_pmu_init},
 	{.compatible = "qcom,krait-pmu",	.data = krait_pmu_init},
+	{.compatible = "qcom,scorpion-pmu",	.data = scorpion_pmu_init},
+	{.compatible = "qcom,scorpion-mp-pmu",	.data = scorpion_mp_pmu_init},
 	{},
 };
 
@@ -289,6 +301,48 @@ static int probe_current_pmu(struct arm_pmu *pmu)
 	return ret;
 }
 
+static int of_pmu_irq_cfg(struct platform_device *pdev)
+{
+	int i;
+	int *irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
+
+	if (!irqs)
+		return -ENOMEM;
+
+	for (i = 0; i < pdev->num_resources; ++i) {
+		struct device_node *dn;
+		int cpu;
+
+		dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity",
+				      i);
+		if (!dn) {
+			pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
+				of_node_full_name(dn), i);
+			break;
+		}
+
+		for_each_possible_cpu(cpu)
+			if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL))
+				break;
+
+		of_node_put(dn);
+		if (cpu >= nr_cpu_ids) {
+			pr_warn("Failed to find logical CPU for %s\n",
+				dn->name);
+			break;
+		}
+
+		irqs[i] = cpu;
+	}
+
+	if (i == pdev->num_resources)
+		cpu_pmu->irq_affinity = irqs;
+	else
+		kfree(irqs);
+
+	return 0;
+}
+
 static int cpu_pmu_device_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *of_id;
@@ -313,7 +367,10 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 
 	if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) {
 		init_fn = of_id->data;
-		ret = init_fn(pmu);
+
+		ret = of_pmu_irq_cfg(pdev);
+		if (!ret)
+			ret = init_fn(pmu);
 	} else {
 		ret = probe_current_pmu(pmu);
 	}
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 8993770c47de..f4207a4dcb01 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -140,6 +140,23 @@ enum krait_perf_types {
 	KRAIT_PERFCTR_L1_DTLB_ACCESS			= 0x12210,
 };
 
+/* ARMv7 Scorpion specific event types */
+enum scorpion_perf_types {
+	SCORPION_LPM0_GROUP0				= 0x4c,
+	SCORPION_LPM1_GROUP0				= 0x50,
+	SCORPION_LPM2_GROUP0				= 0x54,
+	SCORPION_L2LPM_GROUP0				= 0x58,
+	SCORPION_VLPM_GROUP0				= 0x5c,
+
+	SCORPION_ICACHE_ACCESS				= 0x10053,
+	SCORPION_ICACHE_MISS				= 0x10052,
+
+	SCORPION_DTLB_ACCESS				= 0x12013,
+	SCORPION_DTLB_MISS				= 0x12012,
+
+	SCORPION_ITLB_MISS				= 0x12021,
+};
+
 /*
  * Cortex-A8 HW events mapping
  *
@@ -482,6 +499,49 @@ static const unsigned krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 };
 
 /*
+ * Scorpion HW events mapping
+ */
+static const unsigned scorpion_perf_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV7_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV7_PERFCTR_INSTR_EXECUTED,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = ARMV7_PERFCTR_CLOCK_CYCLES,
+};
+
+static const unsigned scorpion_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					    [PERF_COUNT_HW_CACHE_OP_MAX]
+					    [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+	/*
+	 * The performance counters don't differentiate between read and write
+	 * accesses/misses so this isn't strictly correct, but it's the best we
+	 * can do. Writes and reads get combined.
+	 */
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_ICACHE_ACCESS,
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ICACHE_MISS,
+	/*
+	 * Only ITLB misses and DTLB refills are supported.  If users want the
+	 * DTLB refills misses a raw counter must be used.
+	 */
+	[C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS,
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_DTLB_MISS,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_DTLB_MISS,
+	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ITLB_MISS,
+	[C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_ITLB_MISS,
+	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
  * Perf Events' indices
  */
 #define	ARMV7_IDX_CYCLE_COUNTER	0
@@ -976,6 +1036,12 @@ static int krait_map_event_no_branch(struct perf_event *event)
 				&krait_perf_cache_map, 0xFFFFF);
 }
 
+static int scorpion_map_event(struct perf_event *event)
+{
+	return armpmu_map_event(event, &scorpion_perf_map,
+				&scorpion_perf_cache_map, 0xFFFFF);
+}
+
 static void armv7pmu_init(struct arm_pmu *cpu_pmu)
 {
 	cpu_pmu->handle_irq	= armv7pmu_handle_irq;
@@ -1103,6 +1169,12 @@ static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu)
 #define KRAIT_EVENT_MASK	(KRAIT_EVENT | VENUM_EVENT)
 #define PMRESRn_EN		BIT(31)
 
+#define EVENT_REGION(event)	(((event) >> 12) & 0xf)		/* R */
+#define EVENT_GROUP(event)	((event) & 0xf)			/* G */
+#define EVENT_CODE(event)	(((event) >> 4) & 0xff)		/* CC */
+#define EVENT_VENUM(event)	(!!(event & VENUM_EVENT))	/* N=2 */
+#define EVENT_CPU(event)	(!!(event & KRAIT_EVENT))	/* N=1 */
+
 static u32 krait_read_pmresrn(int n)
 {
 	u32 val;
@@ -1141,19 +1213,19 @@ static void krait_write_pmresrn(int n, u32 val)
 	}
 }
 
-static u32 krait_read_vpmresr0(void)
+static u32 venum_read_pmresr(void)
 {
 	u32 val;
 	asm volatile("mrc p10, 7, %0, c11, c0, 0" : "=r" (val));
 	return val;
 }
 
-static void krait_write_vpmresr0(u32 val)
+static void venum_write_pmresr(u32 val)
 {
 	asm volatile("mcr p10, 7, %0, c11, c0, 0" : : "r" (val));
 }
 
-static void krait_pre_vpmresr0(u32 *venum_orig_val, u32 *fp_orig_val)
+static void venum_pre_pmresr(u32 *venum_orig_val, u32 *fp_orig_val)
 {
 	u32 venum_new_val;
 	u32 fp_new_val;
@@ -1170,7 +1242,7 @@ static void krait_pre_vpmresr0(u32 *venum_orig_val, u32 *fp_orig_val)
 	fmxr(FPEXC, fp_new_val);
 }
 
-static void krait_post_vpmresr0(u32 venum_orig_val, u32 fp_orig_val)
+static void venum_post_pmresr(u32 venum_orig_val, u32 fp_orig_val)
 {
 	BUG_ON(preemptible());
 	/* Restore FPEXC */
@@ -1193,16 +1265,11 @@ static void krait_evt_setup(int idx, u32 config_base)
 	u32 val;
 	u32 mask;
 	u32 vval, fval;
-	unsigned int region;
-	unsigned int group;
-	unsigned int code;
+	unsigned int region = EVENT_REGION(config_base);
+	unsigned int group = EVENT_GROUP(config_base);
+	unsigned int code = EVENT_CODE(config_base);
 	unsigned int group_shift;
-	bool venum_event;
-
-	venum_event = !!(config_base & VENUM_EVENT);
-	region = (config_base >> 12) & 0xf;
-	code   = (config_base >> 4) & 0xff;
-	group  = (config_base >> 0)  & 0xf;
+	bool venum_event = EVENT_VENUM(config_base);
 
 	group_shift = group * 8;
 	mask = 0xff << group_shift;
@@ -1217,16 +1284,14 @@ static void krait_evt_setup(int idx, u32 config_base)
 	val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1);
 	armv7_pmnc_write_evtsel(idx, val);
 
-	asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
-
 	if (venum_event) {
-		krait_pre_vpmresr0(&vval, &fval);
-		val = krait_read_vpmresr0();
+		venum_pre_pmresr(&vval, &fval);
+		val = venum_read_pmresr();
 		val &= ~mask;
 		val |= code << group_shift;
 		val |= PMRESRn_EN;
-		krait_write_vpmresr0(val);
-		krait_post_vpmresr0(vval, fval);
+		venum_write_pmresr(val);
+		venum_post_pmresr(vval, fval);
 	} else {
 		val = krait_read_pmresrn(region);
 		val &= ~mask;
@@ -1236,7 +1301,7 @@ static void krait_evt_setup(int idx, u32 config_base)
 	}
 }
 
-static u32 krait_clear_pmresrn_group(u32 val, int group)
+static u32 clear_pmresrn_group(u32 val, int group)
 {
 	u32 mask;
 	int group_shift;
@@ -1256,23 +1321,19 @@ static void krait_clearpmu(u32 config_base)
 {
 	u32 val;
 	u32 vval, fval;
-	unsigned int region;
-	unsigned int group;
-	bool venum_event;
-
-	venum_event = !!(config_base & VENUM_EVENT);
-	region = (config_base >> 12) & 0xf;
-	group  = (config_base >> 0)  & 0xf;
+	unsigned int region = EVENT_REGION(config_base);
+	unsigned int group = EVENT_GROUP(config_base);
+	bool venum_event = EVENT_VENUM(config_base);
 
 	if (venum_event) {
-		krait_pre_vpmresr0(&vval, &fval);
-		val = krait_read_vpmresr0();
-		val = krait_clear_pmresrn_group(val, group);
-		krait_write_vpmresr0(val);
-		krait_post_vpmresr0(vval, fval);
+		venum_pre_pmresr(&vval, &fval);
+		val = venum_read_pmresr();
+		val = clear_pmresrn_group(val, group);
+		venum_write_pmresr(val);
+		venum_post_pmresr(vval, fval);
 	} else {
 		val = krait_read_pmresrn(region);
-		val = krait_clear_pmresrn_group(val, group);
+		val = clear_pmresrn_group(val, group);
 		krait_write_pmresrn(region, val);
 	}
 }
@@ -1342,6 +1403,8 @@ static void krait_pmu_enable_event(struct perf_event *event)
 static void krait_pmu_reset(void *info)
 {
 	u32 vval, fval;
+	struct arm_pmu *cpu_pmu = info;
+	u32 idx, nb_cnt = cpu_pmu->num_events;
 
 	armv7pmu_reset(info);
 
@@ -1350,9 +1413,16 @@ static void krait_pmu_reset(void *info)
 	krait_write_pmresrn(1, 0);
 	krait_write_pmresrn(2, 0);
 
-	krait_pre_vpmresr0(&vval, &fval);
-	krait_write_vpmresr0(0);
-	krait_post_vpmresr0(vval, fval);
+	venum_pre_pmresr(&vval, &fval);
+	venum_write_pmresr(0);
+	venum_post_pmresr(vval, fval);
+
+	/* Reset PMxEVNCTCR to sane default */
+	for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
+		armv7_pmnc_select_counter(idx);
+		asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
+	}
+
 }
 
 static int krait_event_to_bit(struct perf_event *event, unsigned int region,
@@ -1386,26 +1456,18 @@ static int krait_pmu_get_event_idx(struct pmu_hw_events *cpuc,
 {
 	int idx;
 	int bit = -1;
-	unsigned int prefix;
-	unsigned int region;
-	unsigned int code;
-	unsigned int group;
-	bool krait_event;
 	struct hw_perf_event *hwc = &event->hw;
+	unsigned int region = EVENT_REGION(hwc->config_base);
+	unsigned int code = EVENT_CODE(hwc->config_base);
+	unsigned int group = EVENT_GROUP(hwc->config_base);
+	bool venum_event = EVENT_VENUM(hwc->config_base);
+	bool krait_event = EVENT_CPU(hwc->config_base);
 
-	region = (hwc->config_base >> 12) & 0xf;
-	code   = (hwc->config_base >> 4) & 0xff;
-	group  = (hwc->config_base >> 0) & 0xf;
-	krait_event = !!(hwc->config_base & KRAIT_EVENT_MASK);
-
-	if (krait_event) {
+	if (venum_event || krait_event) {
 		/* Ignore invalid events */
 		if (group > 3 || region > 2)
 			return -EINVAL;
-		prefix = hwc->config_base & KRAIT_EVENT_MASK;
-		if (prefix != KRAIT_EVENT && prefix != VENUM_EVENT)
-			return -EINVAL;
-		if (prefix == VENUM_EVENT && (code & 0xe0))
+		if (venum_event && (code & 0xe0))
 			return -EINVAL;
 
 		bit = krait_event_to_bit(event, region, group);
@@ -1425,15 +1487,12 @@ static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
 {
 	int bit;
 	struct hw_perf_event *hwc = &event->hw;
-	unsigned int region;
-	unsigned int group;
-	bool krait_event;
+	unsigned int region = EVENT_REGION(hwc->config_base);
+	unsigned int group = EVENT_GROUP(hwc->config_base);
+	bool venum_event = EVENT_VENUM(hwc->config_base);
+	bool krait_event = EVENT_CPU(hwc->config_base);
 
-	region = (hwc->config_base >> 12) & 0xf;
-	group  = (hwc->config_base >> 0) & 0xf;
-	krait_event = !!(hwc->config_base & KRAIT_EVENT_MASK);
-
-	if (krait_event) {
+	if (venum_event || krait_event) {
 		bit = krait_event_to_bit(event, region, group);
 		clear_bit(bit, cpuc->used_mask);
 	}
@@ -1458,6 +1517,344 @@ static int krait_pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx;
 	return 0;
 }
+
+/*
+ * Scorpion Local Performance Monitor Register (LPMn)
+ *
+ *            31   30     24     16     8      0
+ *            +--------------------------------+
+ *  LPM0      | EN |  CC  |  CC  |  CC  |  CC  |   N = 1, R = 0
+ *            +--------------------------------+
+ *  LPM1      | EN |  CC  |  CC  |  CC  |  CC  |   N = 1, R = 1
+ *            +--------------------------------+
+ *  LPM2      | EN |  CC  |  CC  |  CC  |  CC  |   N = 1, R = 2
+ *            +--------------------------------+
+ *  L2LPM     | EN |  CC  |  CC  |  CC  |  CC  |   N = 1, R = 3
+ *            +--------------------------------+
+ *  VLPM      | EN |  CC  |  CC  |  CC  |  CC  |   N = 2, R = ?
+ *            +--------------------------------+
+ *              EN | G=3  | G=2  | G=1  | G=0
+ *
+ *
+ *  Event Encoding:
+ *
+ *      hwc->config_base = 0xNRCCG
+ *
+ *      N  = prefix, 1 for Scorpion CPU (LPMn/L2LPM), 2 for Venum VFP (VLPM)
+ *      R  = region register
+ *      CC = class of events the group G is choosing from
+ *      G  = group or particular event
+ *
+ *  Example: 0x12021 is a Scorpion CPU event in LPM2's group 1 with code 2
+ *
+ *  A region (R) corresponds to a piece of the CPU (execution unit, instruction
+ *  unit, etc.) while the event code (CC) corresponds to a particular class of
+ *  events (interrupts for example). An event code is broken down into
+ *  groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for
+ *  example).
+ */
+
+static u32 scorpion_read_pmresrn(int n)
+{
+	u32 val;
+
+	switch (n) {
+	case 0:
+		asm volatile("mrc p15, 0, %0, c15, c0, 0" : "=r" (val));
+		break;
+	case 1:
+		asm volatile("mrc p15, 1, %0, c15, c0, 0" : "=r" (val));
+		break;
+	case 2:
+		asm volatile("mrc p15, 2, %0, c15, c0, 0" : "=r" (val));
+		break;
+	case 3:
+		asm volatile("mrc p15, 3, %0, c15, c2, 0" : "=r" (val));
+		break;
+	default:
+		BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */
+	}
+
+	return val;
+}
+
+static void scorpion_write_pmresrn(int n, u32 val)
+{
+	switch (n) {
+	case 0:
+		asm volatile("mcr p15, 0, %0, c15, c0, 0" : : "r" (val));
+		break;
+	case 1:
+		asm volatile("mcr p15, 1, %0, c15, c0, 0" : : "r" (val));
+		break;
+	case 2:
+		asm volatile("mcr p15, 2, %0, c15, c0, 0" : : "r" (val));
+		break;
+	case 3:
+		asm volatile("mcr p15, 3, %0, c15, c2, 0" : : "r" (val));
+		break;
+	default:
+		BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */
+	}
+}
+
+static u32 scorpion_get_pmresrn_event(unsigned int region)
+{
+	static const u32 pmresrn_table[] = { SCORPION_LPM0_GROUP0,
+					     SCORPION_LPM1_GROUP0,
+					     SCORPION_LPM2_GROUP0,
+					     SCORPION_L2LPM_GROUP0 };
+	return pmresrn_table[region];
+}
+
+static void scorpion_evt_setup(int idx, u32 config_base)
+{
+	u32 val;
+	u32 mask;
+	u32 vval, fval;
+	unsigned int region = EVENT_REGION(config_base);
+	unsigned int group = EVENT_GROUP(config_base);
+	unsigned int code = EVENT_CODE(config_base);
+	unsigned int group_shift;
+	bool venum_event = EVENT_VENUM(config_base);
+
+	group_shift = group * 8;
+	mask = 0xff << group_shift;
+
+	/* Configure evtsel for the region and group */
+	if (venum_event)
+		val = SCORPION_VLPM_GROUP0;
+	else
+		val = scorpion_get_pmresrn_event(region);
+	val += group;
+	/* Mix in mode-exclusion bits */
+	val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1);
+	armv7_pmnc_write_evtsel(idx, val);
+
+	asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
+
+	if (venum_event) {
+		venum_pre_pmresr(&vval, &fval);
+		val = venum_read_pmresr();
+		val &= ~mask;
+		val |= code << group_shift;
+		val |= PMRESRn_EN;
+		venum_write_pmresr(val);
+		venum_post_pmresr(vval, fval);
+	} else {
+		val = scorpion_read_pmresrn(region);
+		val &= ~mask;
+		val |= code << group_shift;
+		val |= PMRESRn_EN;
+		scorpion_write_pmresrn(region, val);
+	}
+}
+
+static void scorpion_clearpmu(u32 config_base)
+{
+	u32 val;
+	u32 vval, fval;
+	unsigned int region = EVENT_REGION(config_base);
+	unsigned int group = EVENT_GROUP(config_base);
+	bool venum_event = EVENT_VENUM(config_base);
+
+	if (venum_event) {
+		venum_pre_pmresr(&vval, &fval);
+		val = venum_read_pmresr();
+		val = clear_pmresrn_group(val, group);
+		venum_write_pmresr(val);
+		venum_post_pmresr(vval, fval);
+	} else {
+		val = scorpion_read_pmresrn(region);
+		val = clear_pmresrn_group(val, group);
+		scorpion_write_pmresrn(region, val);
+	}
+}
+
+static void scorpion_pmu_disable_event(struct perf_event *event)
+{
+	unsigned long flags;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+
+	/* Disable counter and interrupt */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Disable counter */
+	armv7_pmnc_disable_counter(idx);
+
+	/*
+	 * Clear pmresr code (if destined for PMNx counters)
+	 */
+	if (hwc->config_base & KRAIT_EVENT_MASK)
+		scorpion_clearpmu(hwc->config_base);
+
+	/* Disable interrupt for this counter */
+	armv7_pmnc_disable_intens(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void scorpion_pmu_enable_event(struct perf_event *event)
+{
+	unsigned long flags;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+
+	/*
+	 * Enable counter and interrupt, and set the counter to count
+	 * the event that we're interested in.
+	 */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Disable counter */
+	armv7_pmnc_disable_counter(idx);
+
+	/*
+	 * Set event (if destined for PMNx counters)
+	 * We don't set the event for the cycle counter because we
+	 * don't have the ability to perform event filtering.
+	 */
+	if (hwc->config_base & KRAIT_EVENT_MASK)
+		scorpion_evt_setup(idx, hwc->config_base);
+	else if (idx != ARMV7_IDX_CYCLE_COUNTER)
+		armv7_pmnc_write_evtsel(idx, hwc->config_base);
+
+	/* Enable interrupt for this counter */
+	armv7_pmnc_enable_intens(idx);
+
+	/* Enable counter */
+	armv7_pmnc_enable_counter(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void scorpion_pmu_reset(void *info)
+{
+	u32 vval, fval;
+	struct arm_pmu *cpu_pmu = info;
+	u32 idx, nb_cnt = cpu_pmu->num_events;
+
+	armv7pmu_reset(info);
+
+	/* Clear all pmresrs */
+	scorpion_write_pmresrn(0, 0);
+	scorpion_write_pmresrn(1, 0);
+	scorpion_write_pmresrn(2, 0);
+	scorpion_write_pmresrn(3, 0);
+
+	venum_pre_pmresr(&vval, &fval);
+	venum_write_pmresr(0);
+	venum_post_pmresr(vval, fval);
+
+	/* Reset PMxEVNCTCR to sane default */
+	for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
+		armv7_pmnc_select_counter(idx);
+		asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
+	}
+}
+
+static int scorpion_event_to_bit(struct perf_event *event, unsigned int region,
+			      unsigned int group)
+{
+	int bit;
+	struct hw_perf_event *hwc = &event->hw;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+
+	if (hwc->config_base & VENUM_EVENT)
+		bit = SCORPION_VLPM_GROUP0;
+	else
+		bit = scorpion_get_pmresrn_event(region);
+	bit -= scorpion_get_pmresrn_event(0);
+	bit += group;
+	/*
+	 * Lower bits are reserved for use by the counters (see
+	 * armv7pmu_get_event_idx() for more info)
+	 */
+	bit += ARMV7_IDX_COUNTER_LAST(cpu_pmu) + 1;
+
+	return bit;
+}
+
+/*
+ * We check for column exclusion constraints here.
+ * Two events cant use the same group within a pmresr register.
+ */
+static int scorpion_pmu_get_event_idx(struct pmu_hw_events *cpuc,
+				   struct perf_event *event)
+{
+	int idx;
+	int bit = -1;
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned int region = EVENT_REGION(hwc->config_base);
+	unsigned int group = EVENT_GROUP(hwc->config_base);
+	bool venum_event = EVENT_VENUM(hwc->config_base);
+	bool scorpion_event = EVENT_CPU(hwc->config_base);
+
+	if (venum_event || scorpion_event) {
+		/* Ignore invalid events */
+		if (group > 3 || region > 3)
+			return -EINVAL;
+
+		bit = scorpion_event_to_bit(event, region, group);
+		if (test_and_set_bit(bit, cpuc->used_mask))
+			return -EAGAIN;
+	}
+
+	idx = armv7pmu_get_event_idx(cpuc, event);
+	if (idx < 0 && bit >= 0)
+		clear_bit(bit, cpuc->used_mask);
+
+	return idx;
+}
+
+static void scorpion_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+				      struct perf_event *event)
+{
+	int bit;
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned int region = EVENT_REGION(hwc->config_base);
+	unsigned int group = EVENT_GROUP(hwc->config_base);
+	bool venum_event = EVENT_VENUM(hwc->config_base);
+	bool scorpion_event = EVENT_CPU(hwc->config_base);
+
+	if (venum_event || scorpion_event) {
+		bit = scorpion_event_to_bit(event, region, group);
+		clear_bit(bit, cpuc->used_mask);
+	}
+}
+
+static int scorpion_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "armv7_scorpion";
+	cpu_pmu->map_event	= scorpion_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	cpu_pmu->reset		= scorpion_pmu_reset;
+	cpu_pmu->enable		= scorpion_pmu_enable_event;
+	cpu_pmu->disable	= scorpion_pmu_disable_event;
+	cpu_pmu->get_event_idx	= scorpion_pmu_get_event_idx;
+	cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx;
+	return 0;
+}
+
+static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "armv7_scorpion_mp";
+	cpu_pmu->map_event	= scorpion_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	cpu_pmu->reset		= scorpion_pmu_reset;
+	cpu_pmu->enable		= scorpion_pmu_enable_event;
+	cpu_pmu->disable	= scorpion_pmu_disable_event;
+	cpu_pmu->get_event_idx	= scorpion_pmu_get_event_idx;
+	cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx;
+	return 0;
+}
 #else
 static inline int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
 {
@@ -1498,4 +1895,14 @@ static inline int krait_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	return -ENODEV;
 }
+
+static inline int scorpion_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return -ENODEV;
+}
+
+static inline int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return -ENODEV;
+}
 #endif	/* CONFIG_CPU_V7 */
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index fdfa3a78ec8c..f192a2a41719 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -17,12 +17,9 @@
 #include <linux/stddef.h>
 #include <linux/unistd.h>
 #include <linux/user.h>
-#include <linux/delay.h>
-#include <linux/reboot.h>
 #include <linux/interrupt.h>
 #include <linux/kallsyms.h>
 #include <linux/init.h>
-#include <linux/cpu.h>
 #include <linux/elfcore.h>
 #include <linux/pm.h>
 #include <linux/tick.h>
@@ -31,16 +28,14 @@
 #include <linux/random.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/leds.h>
-#include <linux/reboot.h>
 
-#include <asm/cacheflush.h>
-#include <asm/idmap.h>
 #include <asm/processor.h>
 #include <asm/thread_notify.h>
 #include <asm/stacktrace.h>
 #include <asm/system_misc.h>
 #include <asm/mach/time.h>
 #include <asm/tls.h>
+#include <asm/vdso.h>
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #include <linux/stackprotector.h>
@@ -59,69 +54,6 @@ static const char *isa_modes[] __maybe_unused = {
   "ARM" , "Thumb" , "Jazelle", "ThumbEE"
 };
 
-extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
-typedef void (*phys_reset_t)(unsigned long);
-
-/*
- * A temporary stack to use for CPU reset. This is static so that we
- * don't clobber it with the identity mapping. When running with this
- * stack, any references to the current task *will not work* so you
- * should really do as little as possible before jumping to your reset
- * code.
- */
-static u64 soft_restart_stack[16];
-
-static void __soft_restart(void *addr)
-{
-	phys_reset_t phys_reset;
-
-	/* Take out a flat memory mapping. */
-	setup_mm_for_reboot();
-
-	/* Clean and invalidate caches */
-	flush_cache_all();
-
-	/* Turn off caching */
-	cpu_proc_fin();
-
-	/* Push out any further dirty data, and ensure cache is empty */
-	flush_cache_all();
-
-	/* Switch to the identity mapping. */
-	phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
-	phys_reset((unsigned long)addr);
-
-	/* Should never get here. */
-	BUG();
-}
-
-void soft_restart(unsigned long addr)
-{
-	u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack);
-
-	/* Disable interrupts first */
-	raw_local_irq_disable();
-	local_fiq_disable();
-
-	/* Disable the L2 if we're the last man standing. */
-	if (num_online_cpus() == 1)
-		outer_disable();
-
-	/* Change to the new stack and continue with the reset. */
-	call_with_stack(__soft_restart, (void *)addr, (void *)stack);
-
-	/* Should never get here. */
-	BUG();
-}
-
-/*
- * Function pointers to optional machine specific functions
- */
-void (*pm_power_off)(void);
-EXPORT_SYMBOL(pm_power_off);
-
-void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
-
 /*
  * This is our default idle handler.
  */
@@ -166,79 +98,6 @@ void arch_cpu_idle_dead(void)
 }
 #endif
 
-/*
- * Called by kexec, immediately prior to machine_kexec().
- *
- * This must completely disable all secondary CPUs; simply causing those CPUs
- * to execute e.g. a RAM-based pin loop is not sufficient. This allows the
- * kexec'd kernel to use any and all RAM as it sees fit, without having to
- * avoid any code or data used by any SW CPU pin loop. The CPU hotplug
- * functionality embodied in disable_nonboot_cpus() to achieve this.
- */
-void machine_shutdown(void)
-{
-	disable_nonboot_cpus();
-}
-
-/*
- * Halting simply requires that the secondary CPUs stop performing any
- * activity (executing tasks, handling interrupts). smp_send_stop()
- * achieves this.
- */
-void machine_halt(void)
-{
-	local_irq_disable();
-	smp_send_stop();
-
-	local_irq_disable();
-	while (1);
-}
-
-/*
- * Power-off simply requires that the secondary CPUs stop performing any
- * activity (executing tasks, handling interrupts). smp_send_stop()
- * achieves this. When the system power is turned off, it will take all CPUs
- * with it.
- */
-void machine_power_off(void)
-{
-	local_irq_disable();
-	smp_send_stop();
-
-	if (pm_power_off)
-		pm_power_off();
-}
-
-/*
- * Restart requires that the secondary CPUs stop performing any activity
- * while the primary CPU resets the system. Systems with a single CPU can
- * use soft_restart() as their machine descriptor's .restart hook, since that
- * will cause the only available CPU to reset. Systems with multiple CPUs must
- * provide a HW restart implementation, to ensure that all CPUs reset at once.
- * This is required so that any code running after reset on the primary CPU
- * doesn't have to co-ordinate with other CPUs to ensure they aren't still
- * executing pre-reset code, and using RAM that the primary CPU's code wishes
- * to use. Implementing such co-ordination would be essentially impossible.
- */
-void machine_restart(char *cmd)
-{
-	local_irq_disable();
-	smp_send_stop();
-
-	if (arm_pm_restart)
-		arm_pm_restart(reboot_mode, cmd);
-	else
-		do_kernel_restart(cmd);
-
-	/* Give a grace period for failure to restart of 1s */
-	mdelay(1000);
-
-	/* Whoops - the platform was unable to reboot. Tell the user! */
-	printk("Reboot failed -- System halted\n");
-	local_irq_disable();
-	while (1);
-}
-
 void __show_regs(struct pt_regs *regs)
 {
 	unsigned long flags;
@@ -475,7 +334,7 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 }
 
 /* If possible, provide a placement hint at a random offset from the
- * stack for the signal page.
+ * stack for the sigpage and vdso pages.
  */
 static unsigned long sigpage_addr(const struct mm_struct *mm,
 				  unsigned int npages)
@@ -519,6 +378,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
+	unsigned long npages;
 	unsigned long addr;
 	unsigned long hint;
 	int ret = 0;
@@ -528,9 +388,12 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	if (!signal_page)
 		return -ENOMEM;
 
+	npages = 1; /* for sigpage */
+	npages += vdso_total_pages;
+
 	down_write(&mm->mmap_sem);
-	hint = sigpage_addr(mm, 1);
-	addr = get_unmapped_area(NULL, hint, PAGE_SIZE, 0, 0);
+	hint = sigpage_addr(mm, npages);
+	addr = get_unmapped_area(NULL, hint, npages << PAGE_SHIFT, 0, 0);
 	if (IS_ERR_VALUE(addr)) {
 		ret = addr;
 		goto up_fail;
@@ -547,6 +410,12 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 
 	mm->context.sigpage = addr;
 
+	/* Unlike the sigpage, failure to install the vdso is unlikely
+	 * to be fatal to the process, so no error check needed
+	 * here.
+	 */
+	arm_install_vdso(mm, addr + PAGE_SIZE);
+
  up_fail:
 	up_write(&mm->mmap_sem);
 	return ret;
diff --git a/arch/arm/kernel/psci-call.S b/arch/arm/kernel/psci-call.S
new file mode 100644
index 000000000000..a78e9e1e206d
--- /dev/null
+++ b/arch/arm/kernel/psci-call.S
@@ -0,0 +1,31 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2015 ARM Limited
+ *
+ * Author: Mark Rutland <mark.rutland@arm.com>
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/opcodes-sec.h>
+#include <asm/opcodes-virt.h>
+
+/* int __invoke_psci_fn_hvc(u32 function_id, u32 arg0, u32 arg1, u32 arg2) */
+ENTRY(__invoke_psci_fn_hvc)
+	__HVC(0)
+	bx	lr
+ENDPROC(__invoke_psci_fn_hvc)
+
+/* int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1, u32 arg2) */
+ENTRY(__invoke_psci_fn_smc)
+	__SMC(0)
+	bx	lr
+ENDPROC(__invoke_psci_fn_smc)
diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c
index f73891b6b730..f90fdf4ce7c7 100644
--- a/arch/arm/kernel/psci.c
+++ b/arch/arm/kernel/psci.c
@@ -23,8 +23,6 @@
 
 #include <asm/compiler.h>
 #include <asm/errno.h>
-#include <asm/opcodes-sec.h>
-#include <asm/opcodes-virt.h>
 #include <asm/psci.h>
 #include <asm/system_misc.h>
 
@@ -33,6 +31,9 @@ struct psci_operations psci_ops;
 static int (*invoke_psci_fn)(u32, u32, u32, u32);
 typedef int (*psci_initcall_t)(const struct device_node *);
 
+asmlinkage int __invoke_psci_fn_hvc(u32, u32, u32, u32);
+asmlinkage int __invoke_psci_fn_smc(u32, u32, u32, u32);
+
 enum psci_function {
 	PSCI_FN_CPU_SUSPEND,
 	PSCI_FN_CPU_ON,
@@ -71,40 +72,6 @@ static u32 psci_power_state_pack(struct psci_power_state state)
 		 & PSCI_0_2_POWER_STATE_AFFL_MASK);
 }
 
-/*
- * The following two functions are invoked via the invoke_psci_fn pointer
- * and will not be inlined, allowing us to piggyback on the AAPCS.
- */
-static noinline int __invoke_psci_fn_hvc(u32 function_id, u32 arg0, u32 arg1,
-					 u32 arg2)
-{
-	asm volatile(
-			__asmeq("%0", "r0")
-			__asmeq("%1", "r1")
-			__asmeq("%2", "r2")
-			__asmeq("%3", "r3")
-			__HVC(0)
-		: "+r" (function_id)
-		: "r" (arg0), "r" (arg1), "r" (arg2));
-
-	return function_id;
-}
-
-static noinline int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1,
-					 u32 arg2)
-{
-	asm volatile(
-			__asmeq("%0", "r0")
-			__asmeq("%1", "r1")
-			__asmeq("%2", "r2")
-			__asmeq("%3", "r3")
-			__SMC(0)
-		: "+r" (function_id)
-		: "r" (arg0), "r" (arg1), "r" (arg2));
-
-	return function_id;
-}
-
 static int psci_get_version(void)
 {
 	int err;
diff --git a/arch/arm/kernel/reboot.c b/arch/arm/kernel/reboot.c
new file mode 100644
index 000000000000..1a4d232796be
--- /dev/null
+++ b/arch/arm/kernel/reboot.c
@@ -0,0 +1,155 @@
+/*
+ *  Copyright (C) 1996-2000 Russell King - Converted to ARM.
+ *  Original Copyright (C) 1995  Linus Torvalds
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+
+#include <asm/cacheflush.h>
+#include <asm/idmap.h>
+
+#include "reboot.h"
+
+typedef void (*phys_reset_t)(unsigned long);
+
+/*
+ * Function pointers to optional machine specific functions
+ */
+void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
+void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
+
+/*
+ * A temporary stack to use for CPU reset. This is static so that we
+ * don't clobber it with the identity mapping. When running with this
+ * stack, any references to the current task *will not work* so you
+ * should really do as little as possible before jumping to your reset
+ * code.
+ */
+static u64 soft_restart_stack[16];
+
+static void __soft_restart(void *addr)
+{
+	phys_reset_t phys_reset;
+
+	/* Take out a flat memory mapping. */
+	setup_mm_for_reboot();
+
+	/* Clean and invalidate caches */
+	flush_cache_all();
+
+	/* Turn off caching */
+	cpu_proc_fin();
+
+	/* Push out any further dirty data, and ensure cache is empty */
+	flush_cache_all();
+
+	/* Switch to the identity mapping. */
+	phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
+	phys_reset((unsigned long)addr);
+
+	/* Should never get here. */
+	BUG();
+}
+
+void _soft_restart(unsigned long addr, bool disable_l2)
+{
+	u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack);
+
+	/* Disable interrupts first */
+	raw_local_irq_disable();
+	local_fiq_disable();
+
+	/* Disable the L2 if we're the last man standing. */
+	if (disable_l2)
+		outer_disable();
+
+	/* Change to the new stack and continue with the reset. */
+	call_with_stack(__soft_restart, (void *)addr, (void *)stack);
+
+	/* Should never get here. */
+	BUG();
+}
+
+void soft_restart(unsigned long addr)
+{
+	_soft_restart(addr, num_online_cpus() == 1);
+}
+
+/*
+ * Called by kexec, immediately prior to machine_kexec().
+ *
+ * This must completely disable all secondary CPUs; simply causing those CPUs
+ * to execute e.g. a RAM-based pin loop is not sufficient. This allows the
+ * kexec'd kernel to use any and all RAM as it sees fit, without having to
+ * avoid any code or data used by any SW CPU pin loop. The CPU hotplug
+ * functionality embodied in disable_nonboot_cpus() to achieve this.
+ */
+void machine_shutdown(void)
+{
+	disable_nonboot_cpus();
+}
+
+/*
+ * Halting simply requires that the secondary CPUs stop performing any
+ * activity (executing tasks, handling interrupts). smp_send_stop()
+ * achieves this.
+ */
+void machine_halt(void)
+{
+	local_irq_disable();
+	smp_send_stop();
+
+	local_irq_disable();
+	while (1);
+}
+
+/*
+ * Power-off simply requires that the secondary CPUs stop performing any
+ * activity (executing tasks, handling interrupts). smp_send_stop()
+ * achieves this. When the system power is turned off, it will take all CPUs
+ * with it.
+ */
+void machine_power_off(void)
+{
+	local_irq_disable();
+	smp_send_stop();
+
+	if (pm_power_off)
+		pm_power_off();
+}
+
+/*
+ * Restart requires that the secondary CPUs stop performing any activity
+ * while the primary CPU resets the system. Systems with a single CPU can
+ * use soft_restart() as their machine descriptor's .restart hook, since that
+ * will cause the only available CPU to reset. Systems with multiple CPUs must
+ * provide a HW restart implementation, to ensure that all CPUs reset at once.
+ * This is required so that any code running after reset on the primary CPU
+ * doesn't have to co-ordinate with other CPUs to ensure they aren't still
+ * executing pre-reset code, and using RAM that the primary CPU's code wishes
+ * to use. Implementing such co-ordination would be essentially impossible.
+ */
+void machine_restart(char *cmd)
+{
+	local_irq_disable();
+	smp_send_stop();
+
+	if (arm_pm_restart)
+		arm_pm_restart(reboot_mode, cmd);
+	else
+		do_kernel_restart(cmd);
+
+	/* Give a grace period for failure to restart of 1s */
+	mdelay(1000);
+
+	/* Whoops - the platform was unable to reboot. Tell the user! */
+	printk("Reboot failed -- System halted\n");
+	local_irq_disable();
+	while (1);
+}
diff --git a/arch/arm/kernel/reboot.h b/arch/arm/kernel/reboot.h
new file mode 100644
index 000000000000..bf7a0b1f076e
--- /dev/null
+++ b/arch/arm/kernel/reboot.h
@@ -0,0 +1,7 @@
+#ifndef REBOOT_H
+#define REBOOT_H
+
+extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
+extern void _soft_restart(unsigned long addr, bool disable_l2);
+
+#endif
diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c
index 24b4a04846eb..36ed35073289 100644
--- a/arch/arm/kernel/return_address.c
+++ b/arch/arm/kernel/return_address.c
@@ -56,8 +56,6 @@ void *return_address(unsigned int level)
 		return NULL;
 }
 
-#else /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) */
-
-#endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) / else */
+#endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) */
 
 EXPORT_SYMBOL_GPL(return_address);
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index e55408e96559..6c777e908a24 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -246,12 +246,9 @@ static int __get_cpu_architecture(void)
 		if (cpu_arch)
 			cpu_arch += CPU_ARCH_ARMv3;
 	} else if ((read_cpuid_id() & 0x000f0000) == 0x000f0000) {
-		unsigned int mmfr0;
-
 		/* Revised CPUID format. Read the Memory Model Feature
 		 * Register 0 and check for VMSAv7 or PMSAv7 */
-		asm("mrc	p15, 0, %0, c0, c1, 4"
-		    : "=r" (mmfr0));
+		unsigned int mmfr0 = read_cpuid_ext(CPUID_EXT_MMFR0);
 		if ((mmfr0 & 0x0000000f) >= 0x00000003 ||
 		    (mmfr0 & 0x000000f0) >= 0x00000030)
 			cpu_arch = CPU_ARCH_ARMv7;
@@ -375,30 +372,48 @@ void __init early_print(const char *str, ...)
 
 static void __init cpuid_init_hwcaps(void)
 {
-	unsigned int divide_instrs, vmsa;
+	int block;
+	u32 isar5;
 
 	if (cpu_architecture() < CPU_ARCH_ARMv7)
 		return;
 
-	divide_instrs = (read_cpuid_ext(CPUID_EXT_ISAR0) & 0x0f000000) >> 24;
-
-	switch (divide_instrs) {
-	case 2:
+	block = cpuid_feature_extract(CPUID_EXT_ISAR0, 24);
+	if (block >= 2)
 		elf_hwcap |= HWCAP_IDIVA;
-	case 1:
+	if (block >= 1)
 		elf_hwcap |= HWCAP_IDIVT;
-	}
 
 	/* LPAE implies atomic ldrd/strd instructions */
-	vmsa = (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xf) >> 0;
-	if (vmsa >= 5)
+	block = cpuid_feature_extract(CPUID_EXT_MMFR0, 0);
+	if (block >= 5)
 		elf_hwcap |= HWCAP_LPAE;
+
+	/* check for supported v8 Crypto instructions */
+	isar5 = read_cpuid_ext(CPUID_EXT_ISAR5);
+
+	block = cpuid_feature_extract_field(isar5, 4);
+	if (block >= 2)
+		elf_hwcap2 |= HWCAP2_PMULL;
+	if (block >= 1)
+		elf_hwcap2 |= HWCAP2_AES;
+
+	block = cpuid_feature_extract_field(isar5, 8);
+	if (block >= 1)
+		elf_hwcap2 |= HWCAP2_SHA1;
+
+	block = cpuid_feature_extract_field(isar5, 12);
+	if (block >= 1)
+		elf_hwcap2 |= HWCAP2_SHA2;
+
+	block = cpuid_feature_extract_field(isar5, 16);
+	if (block >= 1)
+		elf_hwcap2 |= HWCAP2_CRC32;
 }
 
 static void __init elf_hwcap_fixup(void)
 {
 	unsigned id = read_cpuid_id();
-	unsigned sync_prim;
 
 	/*
 	 * HWCAP_TLS is available only on 1136 r1p0 and later,
@@ -419,9 +434,9 @@ static void __init elf_hwcap_fixup(void)
 	 * avoid advertising SWP; it may not be atomic with
 	 * multiprocessing cores.
 	 */
-	sync_prim = ((read_cpuid_ext(CPUID_EXT_ISAR3) >> 8) & 0xf0) |
-		    ((read_cpuid_ext(CPUID_EXT_ISAR4) >> 20) & 0x0f);
-	if (sync_prim >= 0x13)
+	if (cpuid_feature_extract(CPUID_EXT_ISAR3, 12) > 1 ||
+	    (cpuid_feature_extract(CPUID_EXT_ISAR3, 12) == 1 &&
+	     cpuid_feature_extract(CPUID_EXT_ISAR3, 20) >= 3))
 		elf_hwcap &= ~HWCAP_SWP;
 }
 
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 023ac905e4c3..423663e23791 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -318,17 +318,6 @@ get_sigframe(struct ksignal *ksig, struct pt_regs *regs, int framesize)
 	return frame;
 }
 
-/*
- * translate the signal
- */
-static inline int map_sig(int sig)
-{
-	struct thread_info *thread = current_thread_info();
-	if (sig < 32 && thread->exec_domain && thread->exec_domain->signal_invmap)
-		sig = thread->exec_domain->signal_invmap[sig];
-	return sig;
-}
-
 static int
 setup_return(struct pt_regs *regs, struct ksignal *ksig,
 	     unsigned long __user *rc, void __user *frame)
@@ -412,7 +401,7 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig,
 		}
 	}
 
-	regs->ARM_r0 = map_sig(ksig->sig);
+	regs->ARM_r0 = ksig->sig;
 	regs->ARM_sp = (unsigned long)frame;
 	regs->ARM_lr = retcode;
 	regs->ARM_pc = handler;
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index e1e60e5a7a27..7d37bfc50830 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -116,14 +116,7 @@ cpu_resume_after_mmu:
 	ldmfd	sp!, {r4 - r11, pc}
 ENDPROC(cpu_resume_after_mmu)
 
-/*
- * Note: Yes, part of the following code is located into the .data section.
- *       This is to allow sleep_save_sp to be accessed with a relative load
- *       while we can't rely on any MMU translation.  We could have put
- *       sleep_save_sp in the .text section as well, but some setups might
- *       insist on it to be truly read-only.
- */
-	.data
+	.text
 	.align
 ENTRY(cpu_resume)
 ARM_BE8(setend be)			@ ensure we are in BE mode
@@ -145,6 +138,8 @@ ARM_BE8(setend be)			@ ensure we are in BE mode
 	compute_mpidr_hash	r1, r4, r5, r6, r0, r3
 1:
 	adr	r0, _sleep_save_sp
+	ldr	r2, [r0]
+	add	r0, r0, r2
 	ldr	r0, [r0, #SLEEP_SAVE_SP_PHYS]
 	ldr	r0, [r0, r1, lsl #2]
 
@@ -156,10 +151,12 @@ THUMB(	bx	r3			)
 ENDPROC(cpu_resume)
 
 	.align 2
+_sleep_save_sp:
+	.long	sleep_save_sp - .
 mpidr_hash_ptr:
 	.long	mpidr_hash - .			@ mpidr_hash struct offset
 
+	.data
 	.type	sleep_save_sp, #object
 ENTRY(sleep_save_sp)
-_sleep_save_sp:
 	.space	SLEEP_SAVE_SP_SZ		@ struct sleep_save_sp
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 86ef244c5a24..cca5b8758185 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -145,6 +145,11 @@ void __init smp_init_cpus(void)
 		smp_ops.smp_init_cpus();
 }
 
+int platform_can_secondary_boot(void)
+{
+	return !!smp_ops.smp_boot_secondary;
+}
+
 int platform_can_cpu_hotplug(void)
 {
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c
index afdd51e30bec..1361756782c7 100644
--- a/arch/arm/kernel/swp_emulate.c
+++ b/arch/arm/kernel/swp_emulate.c
@@ -42,7 +42,7 @@
 	"	cmp		%0, #0\n"			\
 	"	movne		%0, %4\n"			\
 	"2:\n"							\
-	"	.section	 .fixup,\"ax\"\n"		\
+	"	.section	 .text.fixup,\"ax\"\n"		\
 	"	.align		2\n"				\
 	"3:	mov		%0, %5\n"			\
 	"	b		2b\n"				\
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 0cc7e58c47cc..a66e37e211a9 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -76,7 +76,7 @@ void timer_tick(void)
 }
 #endif
 
-static void dummy_clock_access(struct timespec *ts)
+static void dummy_clock_access(struct timespec64 *ts)
 {
 	ts->tv_sec = 0;
 	ts->tv_nsec = 0;
@@ -85,12 +85,12 @@ static void dummy_clock_access(struct timespec *ts)
 static clock_access_fn __read_persistent_clock = dummy_clock_access;
 static clock_access_fn __read_boot_clock = dummy_clock_access;;
 
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
 {
 	__read_persistent_clock(ts);
 }
 
-void read_boot_clock(struct timespec *ts)
+void read_boot_clock64(struct timespec64 *ts)
 {
 	__read_boot_clock(ts);
 }
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 788e23fe64d8..3dce1a342030 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -505,12 +505,10 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason)
 
 static int bad_syscall(int n, struct pt_regs *regs)
 {
-	struct thread_info *thread = current_thread_info();
 	siginfo_t info;
 
-	if ((current->personality & PER_MASK) != PER_LINUX &&
-	    thread->exec_domain->handler) {
-		thread->exec_domain->handler(n, regs);
+	if ((current->personality & PER_MASK) != PER_LINUX) {
+		send_sig(SIGSEGV, current, 1);
 		return regs->ARM_r0;
 	}
 
diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
new file mode 100644
index 000000000000..efe17dd9b921
--- /dev/null
+++ b/arch/arm/kernel/vdso.c
@@ -0,0 +1,337 @@
+/*
+ * Adapted from arm64 version.
+ *
+ * Copyright (C) 2012 ARM Limited
+ * Copyright (C) 2015 Mentor Graphics Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/elf.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/of.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/timekeeper_internal.h>
+#include <linux/vmalloc.h>
+#include <asm/arch_timer.h>
+#include <asm/barrier.h>
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
+#include <clocksource/arm_arch_timer.h>
+
+#define MAX_SYMNAME	64
+
+static struct page **vdso_text_pagelist;
+
+/* Total number of pages needed for the data and text portions of the VDSO. */
+unsigned int vdso_total_pages __read_mostly;
+
+/*
+ * The VDSO data page.
+ */
+static union vdso_data_store vdso_data_store __page_aligned_data;
+static struct vdso_data *vdso_data = &vdso_data_store.data;
+
+static struct page *vdso_data_page;
+static struct vm_special_mapping vdso_data_mapping = {
+	.name = "[vvar]",
+	.pages = &vdso_data_page,
+};
+
+static struct vm_special_mapping vdso_text_mapping = {
+	.name = "[vdso]",
+};
+
+struct elfinfo {
+	Elf32_Ehdr	*hdr;		/* ptr to ELF */
+	Elf32_Sym	*dynsym;	/* ptr to .dynsym section */
+	unsigned long	dynsymsize;	/* size of .dynsym section */
+	char		*dynstr;	/* ptr to .dynstr section */
+};
+
+/* Cached result of boot-time check for whether the arch timer exists,
+ * and if so, whether the virtual counter is useable.
+ */
+static bool cntvct_ok __read_mostly;
+
+static bool __init cntvct_functional(void)
+{
+	struct device_node *np;
+	bool ret = false;
+
+	if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER))
+		goto out;
+
+	/* The arm_arch_timer core should export
+	 * arch_timer_use_virtual or similar so we don't have to do
+	 * this.
+	 */
+	np = of_find_compatible_node(NULL, NULL, "arm,armv7-timer");
+	if (!np)
+		goto out_put;
+
+	if (of_property_read_bool(np, "arm,cpu-registers-not-fw-configured"))
+		goto out_put;
+
+	ret = true;
+
+out_put:
+	of_node_put(np);
+out:
+	return ret;
+}
+
+static void * __init find_section(Elf32_Ehdr *ehdr, const char *name,
+				  unsigned long *size)
+{
+	Elf32_Shdr *sechdrs;
+	unsigned int i;
+	char *secnames;
+
+	/* Grab section headers and strings so we can tell who is who */
+	sechdrs = (void *)ehdr + ehdr->e_shoff;
+	secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
+
+	/* Find the section they want */
+	for (i = 1; i < ehdr->e_shnum; i++) {
+		if (strcmp(secnames + sechdrs[i].sh_name, name) == 0) {
+			if (size)
+				*size = sechdrs[i].sh_size;
+			return (void *)ehdr + sechdrs[i].sh_offset;
+		}
+	}
+
+	if (size)
+		*size = 0;
+	return NULL;
+}
+
+static Elf32_Sym * __init find_symbol(struct elfinfo *lib, const char *symname)
+{
+	unsigned int i;
+
+	for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) {
+		char name[MAX_SYMNAME], *c;
+
+		if (lib->dynsym[i].st_name == 0)
+			continue;
+		strlcpy(name, lib->dynstr + lib->dynsym[i].st_name,
+			MAX_SYMNAME);
+		c = strchr(name, '@');
+		if (c)
+			*c = 0;
+		if (strcmp(symname, name) == 0)
+			return &lib->dynsym[i];
+	}
+	return NULL;
+}
+
+static void __init vdso_nullpatch_one(struct elfinfo *lib, const char *symname)
+{
+	Elf32_Sym *sym;
+
+	sym = find_symbol(lib, symname);
+	if (!sym)
+		return;
+
+	sym->st_name = 0;
+}
+
+static void __init patch_vdso(void *ehdr)
+{
+	struct elfinfo einfo;
+
+	einfo = (struct elfinfo) {
+		.hdr = ehdr,
+	};
+
+	einfo.dynsym = find_section(einfo.hdr, ".dynsym", &einfo.dynsymsize);
+	einfo.dynstr = find_section(einfo.hdr, ".dynstr", NULL);
+
+	/* If the virtual counter is absent or non-functional we don't
+	 * want programs to incur the slight additional overhead of
+	 * dispatching through the VDSO only to fall back to syscalls.
+	 */
+	if (!cntvct_ok) {
+		vdso_nullpatch_one(&einfo, "__vdso_gettimeofday");
+		vdso_nullpatch_one(&einfo, "__vdso_clock_gettime");
+	}
+}
+
+static int __init vdso_init(void)
+{
+	unsigned int text_pages;
+	int i;
+
+	if (memcmp(&vdso_start, "\177ELF", 4)) {
+		pr_err("VDSO is not a valid ELF object!\n");
+		return -ENOEXEC;
+	}
+
+	text_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT;
+	pr_debug("vdso: %i text pages at base %p\n", text_pages, &vdso_start);
+
+	/* Allocate the VDSO text pagelist */
+	vdso_text_pagelist = kcalloc(text_pages, sizeof(struct page *),
+				     GFP_KERNEL);
+	if (vdso_text_pagelist == NULL)
+		return -ENOMEM;
+
+	/* Grab the VDSO data page. */
+	vdso_data_page = virt_to_page(vdso_data);
+
+	/* Grab the VDSO text pages. */
+	for (i = 0; i < text_pages; i++) {
+		struct page *page;
+
+		page = virt_to_page(&vdso_start + i * PAGE_SIZE);
+		vdso_text_pagelist[i] = page;
+	}
+
+	vdso_text_mapping.pages = vdso_text_pagelist;
+
+	vdso_total_pages = 1; /* for the data/vvar page */
+	vdso_total_pages += text_pages;
+
+	cntvct_ok = cntvct_functional();
+
+	patch_vdso(&vdso_start);
+
+	return 0;
+}
+arch_initcall(vdso_init);
+
+static int install_vvar(struct mm_struct *mm, unsigned long addr)
+{
+	struct vm_area_struct *vma;
+
+	vma = _install_special_mapping(mm, addr, PAGE_SIZE,
+				       VM_READ | VM_MAYREAD,
+				       &vdso_data_mapping);
+
+	return IS_ERR(vma) ? PTR_ERR(vma) : 0;
+}
+
+/* assumes mmap_sem is write-locked */
+void arm_install_vdso(struct mm_struct *mm, unsigned long addr)
+{
+	struct vm_area_struct *vma;
+	unsigned long len;
+
+	mm->context.vdso = 0;
+
+	if (vdso_text_pagelist == NULL)
+		return;
+
+	if (install_vvar(mm, addr))
+		return;
+
+	/* Account for vvar page. */
+	addr += PAGE_SIZE;
+	len = (vdso_total_pages - 1) << PAGE_SHIFT;
+
+	vma = _install_special_mapping(mm, addr, len,
+		VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
+		&vdso_text_mapping);
+
+	if (!IS_ERR(vma))
+		mm->context.vdso = addr;
+}
+
+static void vdso_write_begin(struct vdso_data *vdata)
+{
+	++vdso_data->seq_count;
+	smp_wmb(); /* Pairs with smp_rmb in vdso_read_retry */
+}
+
+static void vdso_write_end(struct vdso_data *vdata)
+{
+	smp_wmb(); /* Pairs with smp_rmb in vdso_read_begin */
+	++vdso_data->seq_count;
+}
+
+static bool tk_is_cntvct(const struct timekeeper *tk)
+{
+	if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER))
+		return false;
+
+	if (strcmp(tk->tkr_mono.clock->name, "arch_sys_counter") != 0)
+		return false;
+
+	return true;
+}
+
+/**
+ * update_vsyscall - update the vdso data page
+ *
+ * Increment the sequence counter, making it odd, indicating to
+ * userspace that an update is in progress.  Update the fields used
+ * for coarse clocks and, if the architected system timer is in use,
+ * the fields used for high precision clocks.  Increment the sequence
+ * counter again, making it even, indicating to userspace that the
+ * update is finished.
+ *
+ * Userspace is expected to sample seq_count before reading any other
+ * fields from the data page.  If seq_count is odd, userspace is
+ * expected to wait until it becomes even.  After copying data from
+ * the page, userspace must sample seq_count again; if it has changed
+ * from its previous value, userspace must retry the whole sequence.
+ *
+ * Calls to update_vsyscall are serialized by the timekeeping core.
+ */
+void update_vsyscall(struct timekeeper *tk)
+{
+	struct timespec xtime_coarse;
+	struct timespec64 *wtm = &tk->wall_to_monotonic;
+
+	if (!cntvct_ok) {
+		/* The entry points have been zeroed, so there is no
+		 * point in updating the data page.
+		 */
+		return;
+	}
+
+	vdso_write_begin(vdso_data);
+
+	xtime_coarse = __current_kernel_time();
+	vdso_data->tk_is_cntvct			= tk_is_cntvct(tk);
+	vdso_data->xtime_coarse_sec		= xtime_coarse.tv_sec;
+	vdso_data->xtime_coarse_nsec		= xtime_coarse.tv_nsec;
+	vdso_data->wtm_clock_sec		= wtm->tv_sec;
+	vdso_data->wtm_clock_nsec		= wtm->tv_nsec;
+
+	if (vdso_data->tk_is_cntvct) {
+		vdso_data->cs_cycle_last	= tk->tkr_mono.cycle_last;
+		vdso_data->xtime_clock_sec	= tk->xtime_sec;
+		vdso_data->xtime_clock_snsec	= tk->tkr_mono.xtime_nsec;
+		vdso_data->cs_mult		= tk->tkr_mono.mult;
+		vdso_data->cs_shift		= tk->tkr_mono.shift;
+		vdso_data->cs_mask		= tk->tkr_mono.mask;
+	}
+
+	vdso_write_end(vdso_data);
+
+	flush_dcache_page(virt_to_page(vdso_data));
+}
+
+void update_vsyscall_tz(void)
+{
+	vdso_data->tz_minuteswest	= sys_tz.tz_minuteswest;
+	vdso_data->tz_dsttime		= sys_tz.tz_dsttime;
+	flush_dcache_page(virt_to_page(vdso_data));
+}
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index b31aa73e8076..8b60fde5ce48 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -11,7 +11,7 @@
 #ifdef CONFIG_ARM_KERNMEM_PERMS
 #include <asm/pgtable.h>
 #endif
-	
+
 #define PROC_INFO							\
 	. = ALIGN(4);							\
 	VMLINUX_SYMBOL(__proc_info_begin) = .;				\
@@ -23,7 +23,7 @@
 	VMLINUX_SYMBOL(__idmap_text_start) = .;				\
 	*(.idmap.text)							\
 	VMLINUX_SYMBOL(__idmap_text_end) = .;				\
-	. = ALIGN(32);							\
+	. = ALIGN(PAGE_SIZE);						\
 	VMLINUX_SYMBOL(__hyp_idmap_text_start) = .;			\
 	*(.hyp.idmap.text)						\
 	VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;
@@ -74,7 +74,7 @@ SECTIONS
 		ARM_EXIT_DISCARD(EXIT_DATA)
 		EXIT_CALL
 #ifndef CONFIG_MMU
-		*(.fixup)
+		*(.text.fixup)
 		*(__ex_table)
 #endif
 #ifndef CONFIG_SMP_ON_UP
@@ -100,6 +100,7 @@ SECTIONS
 
 	.text : {			/* Real text segment		*/
 		_stext = .;		/* Text and read-only data	*/
+			IDMAP_TEXT
 			__exception_text_start = .;
 			*(.exception.text)
 			__exception_text_end = .;
@@ -108,10 +109,6 @@ SECTIONS
 			SCHED_TEXT
 			LOCK_TEXT
 			KPROBES_TEXT
-			IDMAP_TEXT
-#ifdef CONFIG_MMU
-			*(.fixup)
-#endif
 			*(.gnu.warning)
 			*(.glue_7)
 			*(.glue_7t)
@@ -346,8 +343,11 @@ SECTIONS
  */
 ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support")
 ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined")
+
 /*
- * The HYP init code can't be more than a page long.
+ * The HYP init code can't be more than a page long,
+ * and should not cross a page boundary.
  * The above comment applies as well.
  */
-ASSERT(((__hyp_idmap_text_end - __hyp_idmap_text_start) <= PAGE_SIZE), "HYP init code too big")
+ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE,
+	"HYP init code too big or misaligned")
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 338ace78ed18..f1f79d104309 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -18,6 +18,7 @@ if VIRTUALIZATION
 
 config KVM
 	bool "Kernel-based Virtual Machine (KVM) support"
+	depends on MMU && OF
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
@@ -26,10 +27,12 @@ config KVM
 	select KVM_ARM_HOST
 	select KVM_GENERIC_DIRTYLOG_READ_PROTECT
 	select SRCU
-	depends on ARM_VIRT_EXT && ARM_LPAE
+	select MMU_NOTIFIER
+	select HAVE_KVM_EVENTFD
+	select HAVE_KVM_IRQFD
+	depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER
 	---help---
-	  Support hosting virtualized guest machines. You will also
-	  need to select one or more of the processor modules below.
+	  Support hosting virtualized guest machines.
 
 	  This module provides access to the hardware capabilities through
 	  a character device node named /dev/kvm.
@@ -37,10 +40,7 @@ config KVM
 	  If unsure, say N.
 
 config KVM_ARM_HOST
-	bool "KVM host support for ARM cpus."
-	depends on KVM
-	depends on MMU
-	select	MMU_NOTIFIER
+	bool
 	---help---
 	  Provides host support for ARM processors.
 
@@ -55,20 +55,4 @@ config KVM_ARM_MAX_VCPUS
 	  large, so only choose a reasonable number that you expect to
 	  actually use.
 
-config KVM_ARM_VGIC
-	bool "KVM support for Virtual GIC"
-	depends on KVM_ARM_HOST && OF
-	select HAVE_KVM_IRQCHIP
-	default y
-	---help---
-	  Adds support for a hardware assisted, in-kernel GIC emulation.
-
-config KVM_ARM_TIMER
-	bool "KVM support for Architected Timers"
-	depends on KVM_ARM_VGIC && ARM_ARCH_TIMER
-	select HAVE_KVM_IRQCHIP
-	default y
-	---help---
-	  Adds support for the Architected Timers in virtual machines
-
 endif # VIRTUALIZATION
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 443b8bea43e9..139e46c08b6e 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -7,7 +7,7 @@ ifeq ($(plus_virt),+virt)
 	plus_virt_def := -DREQUIRES_VIRT=1
 endif
 
-ccflags-y += -Ivirt/kvm -Iarch/arm/kvm
+ccflags-y += -Iarch/arm/kvm
 CFLAGS_arm.o := -I. $(plus_virt_def)
 CFLAGS_mmu.o := -I.
 
@@ -15,12 +15,12 @@ AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt)
 AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
 
 KVM := ../../../virt/kvm
-kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
+kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o
 
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
 obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
-obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
-obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
-obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o
-obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
+obj-y += $(KVM)/arm/vgic.o
+obj-y += $(KVM)/arm/vgic-v2.o
+obj-y += $(KVM)/arm/vgic-v2-emul.o
+obj-y += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 5560f74f9eee..6f536451ab78 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -61,8 +61,6 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
 static u8 kvm_next_vmid;
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 
-static bool vgic_present;
-
 static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
 {
 	BUG_ON(preemptible());
@@ -173,8 +171,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	int r;
 	switch (ext) {
 	case KVM_CAP_IRQCHIP:
-		r = vgic_present;
-		break;
+	case KVM_CAP_IRQFD:
+	case KVM_CAP_IOEVENTFD:
 	case KVM_CAP_DEVICE_CTRL:
 	case KVM_CAP_USER_MEMORY:
 	case KVM_CAP_SYNC_MMU:
@@ -183,6 +181,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_ARM_PSCI:
 	case KVM_CAP_ARM_PSCI_0_2:
 	case KVM_CAP_READONLY_MEM:
+	case KVM_CAP_MP_STATE:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -268,7 +267,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
-	return 0;
+	return kvm_timer_should_fire(vcpu);
 }
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
@@ -313,13 +312,29 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
 				    struct kvm_mp_state *mp_state)
 {
-	return -EINVAL;
+	if (vcpu->arch.pause)
+		mp_state->mp_state = KVM_MP_STATE_STOPPED;
+	else
+		mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
+
+	return 0;
 }
 
 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 				    struct kvm_mp_state *mp_state)
 {
-	return -EINVAL;
+	switch (mp_state->mp_state) {
+	case KVM_MP_STATE_RUNNABLE:
+		vcpu->arch.pause = false;
+		break;
+	case KVM_MP_STATE_STOPPED:
+		vcpu->arch.pause = true;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
 }
 
 /**
@@ -452,6 +467,11 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+bool kvm_arch_intc_initialized(struct kvm *kvm)
+{
+	return vgic_initialized(kvm);
+}
+
 static void vcpu_pause(struct kvm_vcpu *vcpu)
 {
 	wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
@@ -831,8 +851,6 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
 
 	switch (dev_id) {
 	case KVM_ARM_DEVICE_VGIC_V2:
-		if (!vgic_present)
-			return -ENXIO;
 		return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
 	default:
 		return -ENODEV;
@@ -847,10 +865,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
 	switch (ioctl) {
 	case KVM_CREATE_IRQCHIP: {
-		if (vgic_present)
-			return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
-		else
-			return -ENXIO;
+		return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
 	}
 	case KVM_ARM_SET_DEVICE_ADDR: {
 		struct kvm_arm_device_addr dev_addr;
@@ -1035,10 +1050,6 @@ static int init_hyp_mode(void)
 	if (err)
 		goto out_free_context;
 
-#ifdef CONFIG_KVM_ARM_VGIC
-		vgic_present = true;
-#endif
-
 	/*
 	 * Init HYP architected timer support
 	 */
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 384bab67c462..d503fbb787d3 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -109,22 +109,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	return -EINVAL;
 }
 
-#ifndef CONFIG_KVM_ARM_TIMER
-
-#define NUM_TIMER_REGS 0
-
-static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
-{
-	return 0;
-}
-
-static bool is_timer_reg(u64 index)
-{
-	return false;
-}
-
-#else
-
 #define NUM_TIMER_REGS 3
 
 static bool is_timer_reg(u64 index)
@@ -152,8 +136,6 @@ static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 	return 0;
 }
 
-#endif
-
 static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 {
 	void __user *uaddr = (void __user *)(long)reg->addr;
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 14d488388480..35e4a3a0c476 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -402,7 +402,6 @@ vcpu	.req	r0		@ vcpu pointer always in r0
  * Assumes vcpu pointer in vcpu reg
  */
 .macro save_vgic_state
-#ifdef CONFIG_KVM_ARM_VGIC
 	/* Get VGIC VCTRL base into r2 */
 	ldr	r2, [vcpu, #VCPU_KVM]
 	ldr	r2, [r2, #KVM_VGIC_VCTRL]
@@ -460,7 +459,6 @@ ARM_BE8(rev	r6, r6	)
 	subs	r4, r4, #1
 	bne	1b
 2:
-#endif
 .endm
 
 /*
@@ -469,7 +467,6 @@ ARM_BE8(rev	r6, r6	)
  * Assumes vcpu pointer in vcpu reg
  */
 .macro restore_vgic_state
-#ifdef CONFIG_KVM_ARM_VGIC
 	/* Get VGIC VCTRL base into r2 */
 	ldr	r2, [vcpu, #VCPU_KVM]
 	ldr	r2, [r2, #KVM_VGIC_VCTRL]
@@ -501,7 +498,6 @@ ARM_BE8(rev	r6, r6  )
 	subs	r4, r4, #1
 	bne	1b
 2:
-#endif
 .endm
 
 #define CNTHCTL_PL1PCTEN	(1 << 0)
@@ -515,7 +511,6 @@ ARM_BE8(rev	r6, r6  )
  * Clobbers r2-r5
  */
 .macro save_timer_state
-#ifdef CONFIG_KVM_ARM_TIMER
 	ldr	r4, [vcpu, #VCPU_KVM]
 	ldr	r2, [r4, #KVM_TIMER_ENABLED]
 	cmp	r2, #0
@@ -537,7 +532,6 @@ ARM_BE8(rev	r6, r6  )
 	mcrr	p15, 4, r2, r2, c14	@ CNTVOFF
 
 1:
-#endif
 	@ Allow physical timer/counter access for the host
 	mrc	p15, 4, r2, c14, c1, 0	@ CNTHCTL
 	orr	r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN)
@@ -559,7 +553,6 @@ ARM_BE8(rev	r6, r6  )
 	bic	r2, r2, #CNTHCTL_PL1PCEN
 	mcr	p15, 4, r2, c14, c1, 0	@ CNTHCTL
 
-#ifdef CONFIG_KVM_ARM_TIMER
 	ldr	r4, [vcpu, #VCPU_KVM]
 	ldr	r2, [r4, #KVM_TIMER_ENABLED]
 	cmp	r2, #0
@@ -579,7 +572,6 @@ ARM_BE8(rev	r6, r6  )
 	and	r2, r2, #3
 	mcr	p15, 0, r2, c14, c3, 1	@ CNTV_CTL
 1:
-#endif
 .endm
 
 .equ vmentry,	0
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 5d3bfc0eb3f0..974b1c606d04 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -121,12 +121,11 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	return 0;
 }
 
-static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
-		      struct kvm_exit_mmio *mmio)
+static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len)
 {
 	unsigned long rt;
-	int len;
-	bool is_write, sign_extend;
+	int access_size;
+	bool sign_extend;
 
 	if (kvm_vcpu_dabt_isextabt(vcpu)) {
 		/* cache operation on I/O addr, tell guest unsupported */
@@ -140,17 +139,15 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		return 1;
 	}
 
-	len = kvm_vcpu_dabt_get_as(vcpu);
-	if (unlikely(len < 0))
-		return len;
+	access_size = kvm_vcpu_dabt_get_as(vcpu);
+	if (unlikely(access_size < 0))
+		return access_size;
 
-	is_write = kvm_vcpu_dabt_iswrite(vcpu);
+	*is_write = kvm_vcpu_dabt_iswrite(vcpu);
 	sign_extend = kvm_vcpu_dabt_issext(vcpu);
 	rt = kvm_vcpu_dabt_get_rd(vcpu);
 
-	mmio->is_write = is_write;
-	mmio->phys_addr = fault_ipa;
-	mmio->len = len;
+	*len = access_size;
 	vcpu->arch.mmio_decode.sign_extend = sign_extend;
 	vcpu->arch.mmio_decode.rt = rt;
 
@@ -165,20 +162,20 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		 phys_addr_t fault_ipa)
 {
-	struct kvm_exit_mmio mmio;
 	unsigned long data;
 	unsigned long rt;
 	int ret;
+	bool is_write;
+	int len;
+	u8 data_buf[8];
 
 	/*
-	 * Prepare MMIO operation. First stash it in a private
-	 * structure that we can use for in-kernel emulation. If the
-	 * kernel can't handle it, copy it into run->mmio and let user
-	 * space do its magic.
+	 * Prepare MMIO operation. First decode the syndrome data we get
+	 * from the CPU. Then try if some in-kernel emulation feels
+	 * responsible, otherwise let user space do its magic.
 	 */
-
 	if (kvm_vcpu_dabt_isvalid(vcpu)) {
-		ret = decode_hsr(vcpu, fault_ipa, &mmio);
+		ret = decode_hsr(vcpu, &is_write, &len);
 		if (ret)
 			return ret;
 	} else {
@@ -188,21 +185,34 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 
 	rt = vcpu->arch.mmio_decode.rt;
 
-	if (mmio.is_write) {
-		data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt),
-					       mmio.len);
+	if (is_write) {
+		data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), len);
+
+		trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
+		mmio_write_buf(data_buf, len, data);
 
-		trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, mmio.len,
-			       fault_ipa, data);
-		mmio_write_buf(mmio.data, mmio.len, data);
+		ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
+				       data_buf);
 	} else {
-		trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len,
+		trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
 			       fault_ipa, 0);
+
+		ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
+				      data_buf);
 	}
 
-	if (vgic_handle_mmio(vcpu, run, &mmio))
+	/* Now prepare kvm_run for the potential return to userland. */
+	run->mmio.is_write	= is_write;
+	run->mmio.phys_addr	= fault_ipa;
+	run->mmio.len		= len;
+	memcpy(run->mmio.data, data_buf, len);
+
+	if (!ret) {
+		/* We handled the access successfully in the kernel. */
+		kvm_handle_mmio_return(vcpu, run);
 		return 1;
+	}
 
-	kvm_prepare_mmio(run, &mmio);
+	run->exit_reason	= KVM_EXIT_MMIO;
 	return 0;
 }
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 3e6859bc3e11..1d5accbd3dcf 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -35,9 +35,9 @@ extern char  __hyp_idmap_text_start[], __hyp_idmap_text_end[];
 
 static pgd_t *boot_hyp_pgd;
 static pgd_t *hyp_pgd;
+static pgd_t *merged_hyp_pgd;
 static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
 
-static void *init_bounce_page;
 static unsigned long hyp_idmap_start;
 static unsigned long hyp_idmap_end;
 static phys_addr_t hyp_idmap_vector;
@@ -290,7 +290,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 	phys_addr_t addr = start, end = start + size;
 	phys_addr_t next;
 
-	pgd = pgdp + pgd_index(addr);
+	pgd = pgdp + kvm_pgd_index(addr);
 	do {
 		next = kvm_pgd_addr_end(addr, end);
 		if (!pgd_none(*pgd))
@@ -355,7 +355,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
 	phys_addr_t next;
 	pgd_t *pgd;
 
-	pgd = kvm->arch.pgd + pgd_index(addr);
+	pgd = kvm->arch.pgd + kvm_pgd_index(addr);
 	do {
 		next = kvm_pgd_addr_end(addr, end);
 		stage2_flush_puds(kvm, pgd, addr, next);
@@ -405,9 +405,6 @@ void free_boot_hyp_pgd(void)
 	if (hyp_pgd)
 		unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
 
-	free_page((unsigned long)init_bounce_page);
-	init_bounce_page = NULL;
-
 	mutex_unlock(&kvm_hyp_pgd_mutex);
 }
 
@@ -438,6 +435,11 @@ void free_hyp_pgds(void)
 		free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
 		hyp_pgd = NULL;
 	}
+	if (merged_hyp_pgd) {
+		clear_page(merged_hyp_pgd);
+		free_page((unsigned long)merged_hyp_pgd);
+		merged_hyp_pgd = NULL;
+	}
 
 	mutex_unlock(&kvm_hyp_pgd_mutex);
 }
@@ -632,6 +634,20 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
 				     __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
 }
 
+/* Free the HW pgd, one page at a time */
+static void kvm_free_hwpgd(void *hwpgd)
+{
+	free_pages_exact(hwpgd, kvm_get_hwpgd_size());
+}
+
+/* Allocate the HW PGD, making sure that each page gets its own refcount */
+static void *kvm_alloc_hwpgd(void)
+{
+	unsigned int size = kvm_get_hwpgd_size();
+
+	return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
+}
+
 /**
  * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
  * @kvm:	The KVM struct pointer for the VM.
@@ -645,15 +661,31 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
  */
 int kvm_alloc_stage2_pgd(struct kvm *kvm)
 {
-	int ret;
 	pgd_t *pgd;
+	void *hwpgd;
 
 	if (kvm->arch.pgd != NULL) {
 		kvm_err("kvm_arch already initialized?\n");
 		return -EINVAL;
 	}
 
+	hwpgd = kvm_alloc_hwpgd();
+	if (!hwpgd)
+		return -ENOMEM;
+
+	/* When the kernel uses more levels of page tables than the
+	 * guest, we allocate a fake PGD and pre-populate it to point
+	 * to the next-level page table, which will be the real
+	 * initial page table pointed to by the VTTBR.
+	 *
+	 * When KVM_PREALLOC_LEVEL==2, we allocate a single page for
+	 * the PMD and the kernel will use folded pud.
+	 * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
+	 * pages.
+	 */
 	if (KVM_PREALLOC_LEVEL > 0) {
+		int i;
+
 		/*
 		 * Allocate fake pgd for the page table manipulation macros to
 		 * work.  This is not used by the hardware and we have no
@@ -661,30 +693,32 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
 		 */
 		pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
 				       GFP_KERNEL | __GFP_ZERO);
+
+		if (!pgd) {
+			kvm_free_hwpgd(hwpgd);
+			return -ENOMEM;
+		}
+
+		/* Plug the HW PGD into the fake one. */
+		for (i = 0; i < PTRS_PER_S2_PGD; i++) {
+			if (KVM_PREALLOC_LEVEL == 1)
+				pgd_populate(NULL, pgd + i,
+					     (pud_t *)hwpgd + i * PTRS_PER_PUD);
+			else if (KVM_PREALLOC_LEVEL == 2)
+				pud_populate(NULL, pud_offset(pgd, 0) + i,
+					     (pmd_t *)hwpgd + i * PTRS_PER_PMD);
+		}
 	} else {
 		/*
 		 * Allocate actual first-level Stage-2 page table used by the
 		 * hardware for Stage-2 page table walks.
 		 */
-		pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
+		pgd = (pgd_t *)hwpgd;
 	}
 
-	if (!pgd)
-		return -ENOMEM;
-
-	ret = kvm_prealloc_hwpgd(kvm, pgd);
-	if (ret)
-		goto out_err;
-
 	kvm_clean_pgd(pgd);
 	kvm->arch.pgd = pgd;
 	return 0;
-out_err:
-	if (KVM_PREALLOC_LEVEL > 0)
-		kfree(pgd);
-	else
-		free_pages((unsigned long)pgd, S2_PGD_ORDER);
-	return ret;
 }
 
 /**
@@ -785,11 +819,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
 		return;
 
 	unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
-	kvm_free_hwpgd(kvm);
+	kvm_free_hwpgd(kvm_get_hwpgd(kvm));
 	if (KVM_PREALLOC_LEVEL > 0)
 		kfree(kvm->arch.pgd);
-	else
-		free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
+
 	kvm->arch.pgd = NULL;
 }
 
@@ -799,7 +832,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
 	pgd_t *pgd;
 	pud_t *pud;
 
-	pgd = kvm->arch.pgd + pgd_index(addr);
+	pgd = kvm->arch.pgd + kvm_pgd_index(addr);
 	if (WARN_ON(pgd_none(*pgd))) {
 		if (!cache)
 			return NULL;
@@ -1089,7 +1122,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
 	pgd_t *pgd;
 	phys_addr_t next;
 
-	pgd = kvm->arch.pgd + pgd_index(addr);
+	pgd = kvm->arch.pgd + kvm_pgd_index(addr);
 	do {
 		/*
 		 * Release kvm_mmu_lock periodically if the memory region is
@@ -1299,10 +1332,51 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 
 out_unlock:
 	spin_unlock(&kvm->mmu_lock);
+	kvm_set_pfn_accessed(pfn);
 	kvm_release_pfn_clean(pfn);
 	return ret;
 }
 
+/*
+ * Resolve the access fault by making the page young again.
+ * Note that because the faulting entry is guaranteed not to be
+ * cached in the TLB, we don't need to invalidate anything.
+ */
+static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
+{
+	pmd_t *pmd;
+	pte_t *pte;
+	pfn_t pfn;
+	bool pfn_valid = false;
+
+	trace_kvm_access_fault(fault_ipa);
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+
+	pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa);
+	if (!pmd || pmd_none(*pmd))	/* Nothing there */
+		goto out;
+
+	if (kvm_pmd_huge(*pmd)) {	/* THP, HugeTLB */
+		*pmd = pmd_mkyoung(*pmd);
+		pfn = pmd_pfn(*pmd);
+		pfn_valid = true;
+		goto out;
+	}
+
+	pte = pte_offset_kernel(pmd, fault_ipa);
+	if (pte_none(*pte))		/* Nothing there either */
+		goto out;
+
+	*pte = pte_mkyoung(*pte);	/* Just a page... */
+	pfn = pte_pfn(*pte);
+	pfn_valid = true;
+out:
+	spin_unlock(&vcpu->kvm->mmu_lock);
+	if (pfn_valid)
+		kvm_set_pfn_accessed(pfn);
+}
+
 /**
  * kvm_handle_guest_abort - handles all 2nd stage aborts
  * @vcpu:	the VCPU pointer
@@ -1333,7 +1407,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 	/* Check the stage-2 fault is trans. fault or write fault */
 	fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
-	if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
+	if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
+	    fault_status != FSC_ACCESS) {
 		kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
 			kvm_vcpu_trap_get_class(vcpu),
 			(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
@@ -1369,6 +1444,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	/* Userspace should not be able to register out-of-bounds IPAs */
 	VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
 
+	if (fault_status == FSC_ACCESS) {
+		handle_access_fault(vcpu, fault_ipa);
+		ret = 1;
+		goto out_unlock;
+	}
+
 	ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
 	if (ret == 0)
 		ret = 1;
@@ -1377,15 +1458,16 @@ out_unlock:
 	return ret;
 }
 
-static void handle_hva_to_gpa(struct kvm *kvm,
-			      unsigned long start,
-			      unsigned long end,
-			      void (*handler)(struct kvm *kvm,
-					      gpa_t gpa, void *data),
-			      void *data)
+static int handle_hva_to_gpa(struct kvm *kvm,
+			     unsigned long start,
+			     unsigned long end,
+			     int (*handler)(struct kvm *kvm,
+					    gpa_t gpa, void *data),
+			     void *data)
 {
 	struct kvm_memslots *slots;
 	struct kvm_memory_slot *memslot;
+	int ret = 0;
 
 	slots = kvm_memslots(kvm);
 
@@ -1409,14 +1491,17 @@ static void handle_hva_to_gpa(struct kvm *kvm,
 
 		for (; gfn < gfn_end; ++gfn) {
 			gpa_t gpa = gfn << PAGE_SHIFT;
-			handler(kvm, gpa, data);
+			ret |= handler(kvm, gpa, data);
 		}
 	}
+
+	return ret;
 }
 
-static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
+static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
 {
 	unmap_stage2_range(kvm, gpa, PAGE_SIZE);
+	return 0;
 }
 
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
@@ -1442,7 +1527,7 @@ int kvm_unmap_hva_range(struct kvm *kvm,
 	return 0;
 }
 
-static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
+static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
 {
 	pte_t *pte = (pte_t *)data;
 
@@ -1454,6 +1539,7 @@ static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
 	 * through this calling path.
 	 */
 	stage2_set_pte(kvm, NULL, gpa, pte, 0);
+	return 0;
 }
 
 
@@ -1470,6 +1556,67 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
 	handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
 }
 
+static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
+{
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pmd = stage2_get_pmd(kvm, NULL, gpa);
+	if (!pmd || pmd_none(*pmd))	/* Nothing there */
+		return 0;
+
+	if (kvm_pmd_huge(*pmd)) {	/* THP, HugeTLB */
+		if (pmd_young(*pmd)) {
+			*pmd = pmd_mkold(*pmd);
+			return 1;
+		}
+
+		return 0;
+	}
+
+	pte = pte_offset_kernel(pmd, gpa);
+	if (pte_none(*pte))
+		return 0;
+
+	if (pte_young(*pte)) {
+		*pte = pte_mkold(*pte);	/* Just a page... */
+		return 1;
+	}
+
+	return 0;
+}
+
+static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
+{
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pmd = stage2_get_pmd(kvm, NULL, gpa);
+	if (!pmd || pmd_none(*pmd))	/* Nothing there */
+		return 0;
+
+	if (kvm_pmd_huge(*pmd))		/* THP, HugeTLB */
+		return pmd_young(*pmd);
+
+	pte = pte_offset_kernel(pmd, gpa);
+	if (!pte_none(*pte))		/* Just a page... */
+		return pte_young(*pte);
+
+	return 0;
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+	trace_kvm_age_hva(start, end);
+	return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
+}
+
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	trace_kvm_test_age_hva(hva);
+	return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
+}
+
 void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
 {
 	mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
@@ -1477,12 +1624,18 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
 
 phys_addr_t kvm_mmu_get_httbr(void)
 {
-	return virt_to_phys(hyp_pgd);
+	if (__kvm_cpu_uses_extended_idmap())
+		return virt_to_phys(merged_hyp_pgd);
+	else
+		return virt_to_phys(hyp_pgd);
 }
 
 phys_addr_t kvm_mmu_get_boot_httbr(void)
 {
-	return virt_to_phys(boot_hyp_pgd);
+	if (__kvm_cpu_uses_extended_idmap())
+		return virt_to_phys(merged_hyp_pgd);
+	else
+		return virt_to_phys(boot_hyp_pgd);
 }
 
 phys_addr_t kvm_get_idmap_vector(void)
@@ -1498,39 +1651,11 @@ int kvm_mmu_init(void)
 	hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end);
 	hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init);
 
-	if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) {
-		/*
-		 * Our init code is crossing a page boundary. Allocate
-		 * a bounce page, copy the code over and use that.
-		 */
-		size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start;
-		phys_addr_t phys_base;
-
-		init_bounce_page = (void *)__get_free_page(GFP_KERNEL);
-		if (!init_bounce_page) {
-			kvm_err("Couldn't allocate HYP init bounce page\n");
-			err = -ENOMEM;
-			goto out;
-		}
-
-		memcpy(init_bounce_page, __hyp_idmap_text_start, len);
-		/*
-		 * Warning: the code we just copied to the bounce page
-		 * must be flushed to the point of coherency.
-		 * Otherwise, the data may be sitting in L2, and HYP
-		 * mode won't be able to observe it as it runs with
-		 * caches off at that point.
-		 */
-		kvm_flush_dcache_to_poc(init_bounce_page, len);
-
-		phys_base = kvm_virt_to_phys(init_bounce_page);
-		hyp_idmap_vector += phys_base - hyp_idmap_start;
-		hyp_idmap_start = phys_base;
-		hyp_idmap_end = phys_base + len;
-
-		kvm_info("Using HYP init bounce page @%lx\n",
-			 (unsigned long)phys_base);
-	}
+	/*
+	 * We rely on the linker script to ensure at build time that the HYP
+	 * init code does not cross a page boundary.
+	 */
+	BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
 
 	hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
 	boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
@@ -1553,6 +1678,17 @@ int kvm_mmu_init(void)
 		goto out;
 	}
 
+	if (__kvm_cpu_uses_extended_idmap()) {
+		merged_hyp_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+		if (!merged_hyp_pgd) {
+			kvm_err("Failed to allocate extra HYP pgd\n");
+			goto out;
+		}
+		__kvm_extend_hypmap(boot_hyp_pgd, hyp_pgd, merged_hyp_pgd,
+				    hyp_idmap_start);
+		return 0;
+	}
+
 	/* Map the very same page at the trampoline VA */
 	err = 	__create_hyp_mappings(boot_hyp_pgd,
 				      TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index 6817664b46b8..0ec35392d208 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -68,6 +68,21 @@ TRACE_EVENT(kvm_guest_fault,
 		  __entry->hxfar, __entry->vcpu_pc)
 );
 
+TRACE_EVENT(kvm_access_fault,
+	TP_PROTO(unsigned long ipa),
+	TP_ARGS(ipa),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	ipa		)
+	),
+
+	TP_fast_assign(
+		__entry->ipa		= ipa;
+	),
+
+	TP_printk("IPA: %lx", __entry->ipa)
+);
+
 TRACE_EVENT(kvm_irq_line,
 	TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level),
 	TP_ARGS(type, vcpu_idx, irq_num, level),
@@ -210,6 +225,39 @@ TRACE_EVENT(kvm_set_spte_hva,
 	TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva)
 );
 
+TRACE_EVENT(kvm_age_hva,
+	TP_PROTO(unsigned long start, unsigned long end),
+	TP_ARGS(start, end),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	start		)
+		__field(	unsigned long,	end		)
+	),
+
+	TP_fast_assign(
+		__entry->start		= start;
+		__entry->end		= end;
+	),
+
+	TP_printk("mmu notifier age hva: %#08lx -- %#08lx",
+		  __entry->start, __entry->end)
+);
+
+TRACE_EVENT(kvm_test_age_hva,
+	TP_PROTO(unsigned long hva),
+	TP_ARGS(hva),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	hva		)
+	),
+
+	TP_fast_assign(
+		__entry->hva		= hva;
+	),
+
+	TP_printk("mmu notifier test age hva: %#08lx", __entry->hva)
+);
+
 TRACE_EVENT(kvm_hvc,
 	TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
 	TP_ARGS(vcpu_pc, r0, imm),
diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S
index 14a0d988c82c..1710fd7db2d5 100644
--- a/arch/arm/lib/clear_user.S
+++ b/arch/arm/lib/clear_user.S
@@ -47,7 +47,7 @@ USER(		strnebt	r2, [r0])
 ENDPROC(__clear_user)
 ENDPROC(__clear_user_std)
 
-		.pushsection .fixup,"ax"
+		.pushsection .text.fixup,"ax"
 		.align	0
 9001:		ldmfd	sp!, {r0, pc}
 		.popsection
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S
index a9d3db16ecb5..9648b0675a3e 100644
--- a/arch/arm/lib/copy_to_user.S
+++ b/arch/arm/lib/copy_to_user.S
@@ -100,7 +100,7 @@ WEAK(__copy_to_user)
 ENDPROC(__copy_to_user)
 ENDPROC(__copy_to_user_std)
 
-	.pushsection .fixup,"ax"
+	.pushsection .text.fixup,"ax"
 	.align 0
 	copy_abort_preamble
 	ldmfd	sp!, {r1, r2, r3}
diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S
index 7d08b43d2c0e..1d0957e61f89 100644
--- a/arch/arm/lib/csumpartialcopyuser.S
+++ b/arch/arm/lib/csumpartialcopyuser.S
@@ -68,7 +68,7 @@
  * so properly, we would have to add in whatever registers were loaded before
  * the fault, which, with the current asm above is not predictable.
  */
-		.pushsection .fixup,"ax"
+		.pushsection .text.fixup,"ax"
 		.align	4
 9001:		mov	r4, #-EFAULT
 		ldr	r5, [sp, #8*4]		@ *err_ptr
diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c
index 312d43eb686a..8044591dca72 100644
--- a/arch/arm/lib/delay.c
+++ b/arch/arm/lib/delay.c
@@ -83,6 +83,12 @@ void __init register_current_timer_delay(const struct delay_timer *timer)
 			       NSEC_PER_SEC, 3600);
 	res = cyc_to_ns(1ULL, new_mult, new_shift);
 
+	if (res > 1000) {
+		pr_err("Ignoring delay timer %ps, which has insufficient resolution of %lluns\n",
+			timer, res);
+		return;
+	}
+
 	if (!delay_calibrated && (!delay_res || (res < delay_res))) {
 		pr_info("Switching to timer-based delay loop, resolution %lluns\n", res);
 		delay_timer			= timer;
diff --git a/arch/arm/mach-cns3xxx/pm.c b/arch/arm/mach-cns3xxx/pm.c
index fb38c726e987..f46b78dd6136 100644
--- a/arch/arm/mach-cns3xxx/pm.c
+++ b/arch/arm/mach-cns3xxx/pm.c
@@ -73,7 +73,6 @@ static void cns3xxx_pwr_soft_rst_force(unsigned int block)
 
 	__raw_writel(reg, PM_SOFT_RST_REG);
 }
-EXPORT_SYMBOL(cns3xxx_pwr_soft_rst_force);
 
 void cns3xxx_pwr_soft_rst(unsigned int block)
 {
diff --git a/arch/arm/mach-davinci/cpuidle.c b/arch/arm/mach-davinci/cpuidle.c
index e365c1bb1265..306ebc51599a 100644
--- a/arch/arm/mach-davinci/cpuidle.c
+++ b/arch/arm/mach-davinci/cpuidle.c
@@ -17,7 +17,6 @@
 #include <linux/cpuidle.h>
 #include <linux/io.h>
 #include <linux/export.h>
-#include <asm/proc-fns.h>
 #include <asm/cpuidle.h>
 
 #include <mach/cpuidle.h>
diff --git a/arch/arm/mach-dove/pcie.c b/arch/arm/mach-dove/pcie.c
index 8a275f297522..91fe97144570 100644
--- a/arch/arm/mach-dove/pcie.c
+++ b/arch/arm/mach-dove/pcie.c
@@ -155,17 +155,13 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL, PCI_ANY_ID, rc_pci_fixup);
 static struct pci_bus __init *
 dove_pcie_scan_bus(int nr, struct pci_sys_data *sys)
 {
-	struct pci_bus *bus;
-
-	if (nr < num_pcie_ports) {
-		bus = pci_scan_root_bus(NULL, sys->busnr, &pcie_ops, sys,
-					&sys->resources);
-	} else {
-		bus = NULL;
+	if (nr >= num_pcie_ports) {
 		BUG();
+		return NULL;
 	}
 
-	return bus;
+	return pci_scan_root_bus(NULL, sys->busnr, &pcie_ops, sys,
+				 &sys->resources);
 }
 
 static int __init dove_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c
index 9e9dfdfad9d7..8576a9f734bd 100644
--- a/arch/arm/mach-exynos/exynos.c
+++ b/arch/arm/mach-exynos/exynos.c
@@ -166,16 +166,14 @@ static void __init exynos_init_io(void)
 	exynos_map_io();
 }
 
+/*
+ * Apparently, these SoCs are not able to wake-up from suspend using
+ * the PMU. Too bad. Should they suddenly become capable of such a
+ * feat, the matches below should be moved to suspend.c.
+ */
 static const struct of_device_id exynos_dt_pmu_match[] = {
-	{ .compatible = "samsung,exynos3250-pmu" },
-	{ .compatible = "samsung,exynos4210-pmu" },
-	{ .compatible = "samsung,exynos4212-pmu" },
-	{ .compatible = "samsung,exynos4412-pmu" },
-	{ .compatible = "samsung,exynos4415-pmu" },
-	{ .compatible = "samsung,exynos5250-pmu" },
 	{ .compatible = "samsung,exynos5260-pmu" },
 	{ .compatible = "samsung,exynos5410-pmu" },
-	{ .compatible = "samsung,exynos5420-pmu" },
 	{ /*sentinel*/ },
 };
 
@@ -186,9 +184,6 @@ static void exynos_map_pmu(void)
 	np = of_find_matching_node(NULL, exynos_dt_pmu_match);
 	if (np)
 		pmu_base_addr = of_iomap(np, 0);
-
-	if (!pmu_base_addr)
-		panic("failed to find exynos pmu register\n");
 }
 
 static void __init exynos_init_irq(void)
@@ -211,7 +206,7 @@ static void __init exynos_dt_machine_init(void)
 	if (!IS_ENABLED(CONFIG_SMP))
 		exynos_sysram_init();
 
-#ifdef CONFIG_ARM_EXYNOS_CPUIDLE
+#if defined(CONFIG_SMP) && defined(CONFIG_ARM_EXYNOS_CPUIDLE)
 	if (of_machine_is_compatible("samsung,exynos4210"))
 		exynos_cpuidle.dev.platform_data = &cpuidle_coupled_exynos_data;
 #endif
diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c
index 3f32c47a6d74..d2e9f12d12f1 100644
--- a/arch/arm/mach-exynos/platsmp.c
+++ b/arch/arm/mach-exynos/platsmp.c
@@ -126,8 +126,7 @@ static inline void platform_do_lowpower(unsigned int cpu, int *spurious)
  */
 void exynos_cpu_power_down(int cpu)
 {
-	if (cpu == 0 && (of_machine_is_compatible("samsung,exynos5420") ||
-		of_machine_is_compatible("samsung,exynos5800"))) {
+	if (cpu == 0 && (soc_is_exynos5420() || soc_is_exynos5800())) {
 		/*
 		 * Bypass power down for CPU0 during suspend. Check for
 		 * the SYS_PWR_REG value to decide if we are suspending
diff --git a/arch/arm/mach-exynos/pm.c b/arch/arm/mach-exynos/pm.c
index e6209dadc00d..5685250693fd 100644
--- a/arch/arm/mach-exynos/pm.c
+++ b/arch/arm/mach-exynos/pm.c
@@ -181,6 +181,7 @@ void exynos_enter_aftr(void)
 	cpu_pm_exit();
 }
 
+#if defined(CONFIG_SMP) && defined(CONFIG_ARM_EXYNOS_CPUIDLE)
 static atomic_t cpu1_wakeup = ATOMIC_INIT(0);
 
 static int exynos_cpu0_enter_aftr(void)
@@ -302,3 +303,4 @@ struct cpuidle_exynos_data cpuidle_coupled_exynos_data = {
 	.pre_enter_aftr		= exynos_pre_enter_aftr,
 	.post_enter_aftr		= exynos_post_enter_aftr,
 };
+#endif /* CONFIG_SMP && CONFIG_ARM_EXYNOS_CPUIDLE */
diff --git a/arch/arm/mach-exynos/pm_domains.c b/arch/arm/mach-exynos/pm_domains.c
index 20f267121b3e..37266a826437 100644
--- a/arch/arm/mach-exynos/pm_domains.c
+++ b/arch/arm/mach-exynos/pm_domains.c
@@ -161,6 +161,34 @@ no_clk:
 		of_genpd_add_provider_simple(np, &pd->pd);
 	}
 
+	/* Assign the child power domains to their parents */
+	for_each_compatible_node(np, NULL, "samsung,exynos4210-pd") {
+		struct generic_pm_domain *child_domain, *parent_domain;
+		struct of_phandle_args args;
+
+		args.np = np;
+		args.args_count = 0;
+		child_domain = of_genpd_get_from_provider(&args);
+		if (!child_domain)
+			continue;
+
+		if (of_parse_phandle_with_args(np, "power-domains",
+					 "#power-domain-cells", 0, &args) != 0)
+			continue;
+
+		parent_domain = of_genpd_get_from_provider(&args);
+		if (!parent_domain)
+			continue;
+
+		if (pm_genpd_add_subdomain(parent_domain, child_domain))
+			pr_warn("%s failed to add subdomain: %s\n",
+				parent_domain->name, child_domain->name);
+		else
+			pr_info("%s has as child subdomain: %s.\n",
+				parent_domain->name, child_domain->name);
+		of_node_put(np);
+	}
+
 	return 0;
 }
 arch_initcall(exynos4_pm_init_power_domain);
diff --git a/arch/arm/mach-exynos/sleep.S b/arch/arm/mach-exynos/sleep.S
index 31d25834b9c4..cf950790fbdc 100644
--- a/arch/arm/mach-exynos/sleep.S
+++ b/arch/arm/mach-exynos/sleep.S
@@ -23,14 +23,7 @@
 #define CPU_MASK	0xff0ffff0
 #define CPU_CORTEX_A9	0x410fc090
 
-	/*
-	 * The following code is located into the .data section. This is to
-	 * allow l2x0_regs_phys to be accessed with a relative load while we
-	 * can't rely on any MMU translation. We could have put l2x0_regs_phys
-	 * in the .text section as well, but some setups might insist on it to
-	 * be truly read-only. (Reference from: arch/arm/kernel/sleep.S)
-	 */
-	.data
+	.text
 	.align
 
 	/*
@@ -69,10 +62,12 @@ ENTRY(exynos_cpu_resume_ns)
 	cmp	r0, r1
 	bne	skip_cp15
 
-	adr	r0, cp15_save_power
+	adr	r0, _cp15_save_power
 	ldr	r1, [r0]
-	adr	r0, cp15_save_diag
+	ldr	r1, [r0, r1]
+	adr	r0, _cp15_save_diag
 	ldr	r2, [r0]
+	ldr	r2, [r0, r2]
 	mov	r0, #SMC_CMD_C15RESUME
 	dsb
 	smc	#0
@@ -118,14 +113,20 @@ skip_l2x0:
 skip_cp15:
 	b	cpu_resume
 ENDPROC(exynos_cpu_resume_ns)
+
+	.align
+_cp15_save_power:
+	.long	cp15_save_power - .
+_cp15_save_diag:
+	.long	cp15_save_diag - .
+#ifdef CONFIG_CACHE_L2X0
+1:	.long	l2x0_saved_regs - .
+#endif /* CONFIG_CACHE_L2X0 */
+
+	.data
 	.globl cp15_save_diag
 cp15_save_diag:
 	.long	0	@ cp15 diagnostic
 	.globl cp15_save_power
 cp15_save_power:
 	.long	0	@ cp15 power control
-
-#ifdef CONFIG_CACHE_L2X0
-	.align
-1:	.long	l2x0_saved_regs - .
-#endif /* CONFIG_CACHE_L2X0 */
diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c
index 52e2b1a2fddb..2146d918aedd 100644
--- a/arch/arm/mach-exynos/suspend.c
+++ b/arch/arm/mach-exynos/suspend.c
@@ -18,7 +18,9 @@
 #include <linux/syscore_ops.h>
 #include <linux/cpu_pm.h>
 #include <linux/io.h>
-#include <linux/irqchip/arm-gic.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of_address.h>
 #include <linux/err.h>
 #include <linux/regulator/machine.h>
 
@@ -43,8 +45,8 @@
 #define EXYNOS5420_CPU_STATE	0x28
 
 /**
- * struct exynos_wkup_irq - Exynos GIC to PMU IRQ mapping
- * @hwirq: Hardware IRQ signal of the GIC
+ * struct exynos_wkup_irq - PMU IRQ to mask mapping
+ * @hwirq: Hardware IRQ signal of the PMU
  * @mask: Mask in PMU wake-up mask register
  */
 struct exynos_wkup_irq {
@@ -87,20 +89,20 @@ static unsigned int exynos_pmu_spare3;
 static u32 exynos_irqwake_intmask = 0xffffffff;
 
 static const struct exynos_wkup_irq exynos3250_wkup_irq[] = {
-	{ 73, BIT(1) }, /* RTC alarm */
-	{ 74, BIT(2) }, /* RTC tick */
+	{ 105, BIT(1) }, /* RTC alarm */
+	{ 106, BIT(2) }, /* RTC tick */
 	{ /* sentinel */ },
 };
 
 static const struct exynos_wkup_irq exynos4_wkup_irq[] = {
-	{ 76, BIT(1) }, /* RTC alarm */
-	{ 77, BIT(2) }, /* RTC tick */
+	{ 44, BIT(1) }, /* RTC alarm */
+	{ 45, BIT(2) }, /* RTC tick */
 	{ /* sentinel */ },
 };
 
 static const struct exynos_wkup_irq exynos5250_wkup_irq[] = {
-	{ 75, BIT(1) }, /* RTC alarm */
-	{ 76, BIT(2) }, /* RTC tick */
+	{ 43, BIT(1) }, /* RTC alarm */
+	{ 44, BIT(2) }, /* RTC tick */
 	{ /* sentinel */ },
 };
 
@@ -167,6 +169,113 @@ static int exynos_irq_set_wake(struct irq_data *data, unsigned int state)
 	return -ENOENT;
 }
 
+static struct irq_chip exynos_pmu_chip = {
+	.name			= "PMU",
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+	.irq_set_wake		= exynos_irq_set_wake,
+#ifdef CONFIG_SMP
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+#endif
+};
+
+static int exynos_pmu_domain_xlate(struct irq_domain *domain,
+				   struct device_node *controller,
+				   const u32 *intspec,
+				   unsigned int intsize,
+				   unsigned long *out_hwirq,
+				   unsigned int *out_type)
+{
+	if (domain->of_node != controller)
+		return -EINVAL;	/* Shouldn't happen, really... */
+	if (intsize != 3)
+		return -EINVAL;	/* Not GIC compliant */
+	if (intspec[0] != 0)
+		return -EINVAL;	/* No PPI should point to this domain */
+
+	*out_hwirq = intspec[1];
+	*out_type = intspec[2];
+	return 0;
+}
+
+static int exynos_pmu_domain_alloc(struct irq_domain *domain,
+				   unsigned int virq,
+				   unsigned int nr_irqs, void *data)
+{
+	struct of_phandle_args *args = data;
+	struct of_phandle_args parent_args;
+	irq_hw_number_t hwirq;
+	int i;
+
+	if (args->args_count != 3)
+		return -EINVAL;	/* Not GIC compliant */
+	if (args->args[0] != 0)
+		return -EINVAL;	/* No PPI should point to this domain */
+
+	hwirq = args->args[1];
+
+	for (i = 0; i < nr_irqs; i++)
+		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+					      &exynos_pmu_chip, NULL);
+
+	parent_args = *args;
+	parent_args.np = domain->parent->of_node;
+	return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &parent_args);
+}
+
+static struct irq_domain_ops exynos_pmu_domain_ops = {
+	.xlate	= exynos_pmu_domain_xlate,
+	.alloc	= exynos_pmu_domain_alloc,
+	.free	= irq_domain_free_irqs_common,
+};
+
+static int __init exynos_pmu_irq_init(struct device_node *node,
+				      struct device_node *parent)
+{
+	struct irq_domain *parent_domain, *domain;
+
+	if (!parent) {
+		pr_err("%s: no parent, giving up\n", node->full_name);
+		return -ENODEV;
+	}
+
+	parent_domain = irq_find_host(parent);
+	if (!parent_domain) {
+		pr_err("%s: unable to obtain parent domain\n", node->full_name);
+		return -ENXIO;
+	}
+
+	pmu_base_addr = of_iomap(node, 0);
+
+	if (!pmu_base_addr) {
+		pr_err("%s: failed to find exynos pmu register\n",
+		       node->full_name);
+		return -ENOMEM;
+	}
+
+	domain = irq_domain_add_hierarchy(parent_domain, 0, 0,
+					  node, &exynos_pmu_domain_ops,
+					  NULL);
+	if (!domain) {
+		iounmap(pmu_base_addr);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+#define EXYNOS_PMU_IRQ(symbol, name)	OF_DECLARE_2(irqchip, symbol, name, exynos_pmu_irq_init)
+
+EXYNOS_PMU_IRQ(exynos3250_pmu_irq, "samsung,exynos3250-pmu");
+EXYNOS_PMU_IRQ(exynos4210_pmu_irq, "samsung,exynos4210-pmu");
+EXYNOS_PMU_IRQ(exynos4212_pmu_irq, "samsung,exynos4212-pmu");
+EXYNOS_PMU_IRQ(exynos4412_pmu_irq, "samsung,exynos4412-pmu");
+EXYNOS_PMU_IRQ(exynos4415_pmu_irq, "samsung,exynos4415-pmu");
+EXYNOS_PMU_IRQ(exynos5250_pmu_irq, "samsung,exynos5250-pmu");
+EXYNOS_PMU_IRQ(exynos5420_pmu_irq, "samsung,exynos5420-pmu");
+
 static int exynos_cpu_do_idle(void)
 {
 	/* issue the standby signal into the pm unit. */
@@ -615,17 +724,19 @@ static struct syscore_ops exynos_pm_syscore_ops;
 void __init exynos_pm_init(void)
 {
 	const struct of_device_id *match;
+	struct device_node *np;
 	u32 tmp;
 
-	of_find_matching_node_and_match(NULL, exynos_pmu_of_device_ids, &match);
-	if (!match) {
+	np = of_find_matching_node_and_match(NULL, exynos_pmu_of_device_ids, &match);
+	if (!np) {
 		pr_err("Failed to find PMU node\n");
 		return;
 	}
-	pm_data = (struct exynos_pm_data *) match->data;
 
-	/* Platform-specific GIC callback */
-	gic_arch_extn.irq_set_wake = exynos_irq_set_wake;
+	if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL)))
+		pr_warn("Outdated DT detected, suspend/resume will NOT work\n");
+
+	pm_data = (struct exynos_pm_data *) match->data;
 
 	/* All wakeup disable */
 	tmp = pmu_raw_readl(S5P_WAKEUP_MASK);
diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig
index ddbb4b837fea..0748747b2bc6 100644
--- a/arch/arm/mach-imx/Kconfig
+++ b/arch/arm/mach-imx/Kconfig
@@ -582,6 +582,7 @@ config SOC_IMX6SX
 
 config SOC_VF610
 	bool "Vybrid Family VF610 support"
+	select IRQ_DOMAIN_HIERARCHY
 	select ARM_GIC
 	select PINCTRL_VF610
 	select PL310_ERRATA_769419 if CACHE_L2X0
diff --git a/arch/arm/mach-imx/cpuidle-imx6q.c b/arch/arm/mach-imx/cpuidle-imx6q.c
index d76d08623f9f..8e21ccc1eda2 100644
--- a/arch/arm/mach-imx/cpuidle-imx6q.c
+++ b/arch/arm/mach-imx/cpuidle-imx6q.c
@@ -9,7 +9,6 @@
 #include <linux/cpuidle.h>
 #include <linux/module.h>
 #include <asm/cpuidle.h>
-#include <asm/proc-fns.h>
 
 #include "common.h"
 #include "cpuidle.h"
diff --git a/arch/arm/mach-imx/cpuidle-imx6sl.c b/arch/arm/mach-imx/cpuidle-imx6sl.c
index 7d92e6584551..5742a9fd1ef2 100644
--- a/arch/arm/mach-imx/cpuidle-imx6sl.c
+++ b/arch/arm/mach-imx/cpuidle-imx6sl.c
@@ -9,7 +9,6 @@
 #include <linux/cpuidle.h>
 #include <linux/module.h>
 #include <asm/cpuidle.h>
-#include <asm/proc-fns.h>
 
 #include "common.h"
 #include "cpuidle.h"
diff --git a/arch/arm/mach-imx/cpuidle-imx6sx.c b/arch/arm/mach-imx/cpuidle-imx6sx.c
index 5a36722b089d..2c9f1a8bf245 100644
--- a/arch/arm/mach-imx/cpuidle-imx6sx.c
+++ b/arch/arm/mach-imx/cpuidle-imx6sx.c
@@ -10,7 +10,6 @@
 #include <linux/cpu_pm.h>
 #include <linux/module.h>
 #include <asm/cpuidle.h>
-#include <asm/proc-fns.h>
 #include <asm/suspend.h>
 
 #include "common.h"
diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c
index 4ad6e473cf83..9de3412af406 100644
--- a/arch/arm/mach-imx/mach-imx6q.c
+++ b/arch/arm/mach-imx/mach-imx6q.c
@@ -211,8 +211,9 @@ static void __init imx6q_1588_init(void)
 	 * set bit IOMUXC_GPR1[21].  Or the PTP clock must be from pad
 	 * (external OSC), and we need to clear the bit.
 	 */
-	clksel = ptp_clk == enet_ref ? IMX6Q_GPR1_ENET_CLK_SEL_ANATOP :
-				       IMX6Q_GPR1_ENET_CLK_SEL_PAD;
+	clksel = clk_is_match(ptp_clk, enet_ref) ?
+				IMX6Q_GPR1_ENET_CLK_SEL_ANATOP :
+				IMX6Q_GPR1_ENET_CLK_SEL_PAD;
 	gpr = syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr");
 	if (!IS_ERR(gpr))
 		regmap_update_bits(gpr, IOMUXC_GPR1,
diff --git a/arch/arm/mach-mv78xx0/pcie.c b/arch/arm/mach-mv78xx0/pcie.c
index 445e553f4a28..097ea4cb1136 100644
--- a/arch/arm/mach-mv78xx0/pcie.c
+++ b/arch/arm/mach-mv78xx0/pcie.c
@@ -197,17 +197,13 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL, PCI_ANY_ID, rc_pci_fixup);
 static struct pci_bus __init *
 mv78xx0_pcie_scan_bus(int nr, struct pci_sys_data *sys)
 {
-	struct pci_bus *bus;
-
-	if (nr < num_pcie_ports) {
-		bus = pci_scan_root_bus(NULL, sys->busnr, &pcie_ops, sys,
-					&sys->resources);
-	} else {
-		bus = NULL;
+	if (nr >= num_pcie_ports) {
 		BUG();
+		return NULL;
 	}
 
-	return bus;
+	return pci_scan_root_bus(NULL, sys->busnr, &pcie_ops, sys,
+				 &sys->resources);
 }
 
 static int __init mv78xx0_pcie_map_irq(const struct pci_dev *dev, u8 slot,
diff --git a/arch/arm/mach-mvebu/pmsu.c b/arch/arm/mach-mvebu/pmsu.c
index 8b9f5e202ccf..4f4e22206ae5 100644
--- a/arch/arm/mach-mvebu/pmsu.c
+++ b/arch/arm/mach-mvebu/pmsu.c
@@ -415,6 +415,9 @@ static __init int armada_38x_cpuidle_init(void)
 	void __iomem *mpsoc_base;
 	u32 reg;
 
+	pr_warn("CPU idle is currently broken on Armada 38x: disabling");
+	return 0;
+
 	np = of_find_compatible_node(NULL, NULL,
 				     "marvell,armada-380-coherency-fabric");
 	if (!np)
@@ -476,6 +479,16 @@ static int __init mvebu_v7_cpu_pm_init(void)
 		return 0;
 	of_node_put(np);
 
+	/*
+	 * Currently the CPU idle support for Armada 38x is broken, as
+	 * the CPU hotplug uses some of the CPU idle functions it is
+	 * broken too, so let's disable it
+	 */
+	if (of_machine_is_compatible("marvell,armada380")) {
+		cpu_hotplug_disable();
+		pr_warn("CPU hotplug support is currently broken on Armada 38x: disabling");
+	}
+
 	if (of_machine_is_compatible("marvell,armadaxp"))
 		ret = armada_xp_cpuidle_init();
 	else if (of_machine_is_compatible("marvell,armada370"))
@@ -489,7 +502,8 @@ static int __init mvebu_v7_cpu_pm_init(void)
 		return ret;
 
 	mvebu_v7_pmsu_enable_l2_powerdown_onidle();
-	platform_device_register(&mvebu_v7_cpuidle_device);
+	if (mvebu_v7_cpuidle_device.name)
+		platform_device_register(&mvebu_v7_cpuidle_device);
 	cpu_pm_register_notifier(&mvebu_v7_cpu_pm_notifier);
 
 	return 0;
diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c
index 34b4c0044961..dd94567c3628 100644
--- a/arch/arm/mach-omap1/pm.c
+++ b/arch/arm/mach-omap1/pm.c
@@ -71,13 +71,7 @@ static unsigned int mpui7xx_sleep_save[MPUI7XX_SLEEP_SAVE_SIZE];
 static unsigned int mpui1510_sleep_save[MPUI1510_SLEEP_SAVE_SIZE];
 static unsigned int mpui1610_sleep_save[MPUI1610_SLEEP_SAVE_SIZE];
 
-#ifndef CONFIG_OMAP_32K_TIMER
-
-static unsigned short enable_dyn_sleep = 0;
-
-#else
-
-static unsigned short enable_dyn_sleep = 1;
+static unsigned short enable_dyn_sleep;
 
 static ssize_t idle_show(struct kobject *kobj, struct kobj_attribute *attr,
 			 char *buf)
@@ -90,8 +84,9 @@ static ssize_t idle_store(struct kobject *kobj, struct kobj_attribute *attr,
 {
 	unsigned short value;
 	if (sscanf(buf, "%hu", &value) != 1 ||
-	    (value != 0 && value != 1)) {
-		printk(KERN_ERR "idle_sleep_store: Invalid value\n");
+	    (value != 0 && value != 1) ||
+	    (value != 0 && !IS_ENABLED(CONFIG_OMAP_32K_TIMER))) {
+		pr_err("idle_sleep_store: Invalid value\n");
 		return -EINVAL;
 	}
 	enable_dyn_sleep = value;
@@ -101,7 +96,6 @@ static ssize_t idle_store(struct kobject *kobj, struct kobj_attribute *attr,
 static struct kobj_attribute sleep_while_idle_attr =
 	__ATTR(sleep_while_idle, 0644, idle_show, idle_store);
 
-#endif
 
 static void (*omap_sram_suspend)(unsigned long r0, unsigned long r1) = NULL;
 
@@ -115,16 +109,11 @@ void omap1_pm_idle(void)
 {
 	extern __u32 arm_idlect1_mask;
 	__u32 use_idlect1 = arm_idlect1_mask;
-	int do_sleep = 0;
 
 	local_fiq_disable();
 
 #if defined(CONFIG_OMAP_MPU_TIMER) && !defined(CONFIG_OMAP_DM_TIMER)
-#warning Enable 32kHz OS timer in order to allow sleep states in idle
 	use_idlect1 = use_idlect1 & ~(1 << 9);
-#else
-	if (enable_dyn_sleep)
-		do_sleep = 1;
 #endif
 
 #ifdef CONFIG_OMAP_DM_TIMER
@@ -134,10 +123,12 @@ void omap1_pm_idle(void)
 	if (omap_dma_running())
 		use_idlect1 &= ~(1 << 6);
 
-	/* We should be able to remove the do_sleep variable and multiple
+	/*
+	 * We should be able to remove the do_sleep variable and multiple
 	 * tests above as soon as drivers, timer and DMA code have been fixed.
-	 * Even the sleep block count should become obsolete. */
-	if ((use_idlect1 != ~0) || !do_sleep) {
+	 * Even the sleep block count should become obsolete.
+	 */
+	if ((use_idlect1 != ~0) || !enable_dyn_sleep) {
 
 		__u32 saved_idlect1 = omap_readl(ARM_IDLECT1);
 		if (cpu_is_omap15xx())
@@ -635,15 +626,25 @@ static const struct platform_suspend_ops omap_pm_ops = {
 
 static int __init omap_pm_init(void)
 {
-
-#ifdef CONFIG_OMAP_32K_TIMER
-	int error;
-#endif
+	int error = 0;
 
 	if (!cpu_class_is_omap1())
 		return -ENODEV;
 
-	printk("Power Management for TI OMAP.\n");
+	pr_info("Power Management for TI OMAP.\n");
+
+	if (!IS_ENABLED(CONFIG_OMAP_32K_TIMER))
+		pr_info("OMAP1 PM: sleep states in idle disabled due to no 32KiHz timer\n");
+
+	if (!IS_ENABLED(CONFIG_OMAP_DM_TIMER))
+		pr_info("OMAP1 PM: sleep states in idle disabled due to no DMTIMER support\n");
+
+	if (IS_ENABLED(CONFIG_OMAP_32K_TIMER) &&
+	    IS_ENABLED(CONFIG_OMAP_DM_TIMER)) {
+		/* OMAP16xx only */
+		pr_info("OMAP1 PM: sleep states in idle enabled\n");
+		enable_dyn_sleep = 1;
+	}
 
 	/*
 	 * We copy the assembler sleep/wakeup routines to SRAM.
@@ -693,17 +694,15 @@ static int __init omap_pm_init(void)
 	omap_pm_init_debugfs();
 #endif
 
-#ifdef CONFIG_OMAP_32K_TIMER
 	error = sysfs_create_file(power_kobj, &sleep_while_idle_attr.attr);
 	if (error)
 		printk(KERN_ERR "sysfs_create_file failed: %d\n", error);
-#endif
 
 	if (cpu_is_omap16xx()) {
 		/* configure LOW_PWR pin */
 		omap_cfg_reg(T20_1610_LOW_PWR);
 	}
 
-	return 0;
+	return error;
 }
 __initcall(omap_pm_init);
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index 276a755d2b0b..3e4b4ae8c75b 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -69,6 +69,7 @@ config SOC_DRA7XX
 	select ARM_GIC
 	select HAVE_ARM_ARCH_TIMER
 	select IRQ_CROSSBAR
+	select ARM_ERRATA_798181 if SMP
 
 config ARCH_OMAP2PLUS
 	bool
@@ -260,27 +261,6 @@ config OMAP3_SDRC_AC_TIMING
 	  wish to say no.  Selecting yes without understanding what is
 	  going on could result in system crashes;
 
-config OMAP4_ERRATA_I688
-	bool "OMAP4 errata: Async Bridge Corruption"
-	depends on (ARCH_OMAP4 || SOC_OMAP5) && !ARCH_MULTIPLATFORM
-	select ARCH_HAS_BARRIERS
-	help
-	  If a data is stalled inside asynchronous bridge because of back
-	  pressure, it may be accepted multiple times, creating pointer
-	  misalignment that will corrupt next transfers on that data path
-	  until next reset of the system (No recovery procedure once the
-	  issue is hit, the path remains consistently broken). Async bridge
-	  can be found on path between MPU to EMIF and MPU to L3 interconnect.
-	  This situation can happen only when the idle is initiated by a
-	  Master Request Disconnection (which is trigged by software when
-	  executing WFI on CPU).
-	  The work-around for this errata needs all the initiators connected
-	  through async bridge must ensure that data path is properly drained
-	  before issuing WFI. This condition will be met if one Strongly ordered
-	  access is performed to the target right before executing the WFI.
-	  In MPU case, L3 T2ASYNC FIFO and DDR T2ASYNC FIFO needs to be drained.
-	  IO barrier ensure that there is no synchronisation loss on initiators
-	  operating on both interconnect port simultaneously.
 endmenu
 
 endif
diff --git a/arch/arm/mach-omap2/board-cm-t35.c b/arch/arm/mach-omap2/board-cm-t35.c
index 91738a14ecbe..b5dfbc1b1fc6 100644
--- a/arch/arm/mach-omap2/board-cm-t35.c
+++ b/arch/arm/mach-omap2/board-cm-t35.c
@@ -492,51 +492,36 @@ static struct twl4030_platform_data cm_t35_twldata = {
 #include <media/omap3isp.h>
 #include "devices.h"
 
-static struct i2c_board_info cm_t35_isp_i2c_boardinfo[] = {
+static struct isp_platform_subdev cm_t35_isp_subdevs[] = {
 	{
-		I2C_BOARD_INFO("mt9t001", 0x5d),
-	},
-	{
-		I2C_BOARD_INFO("tvp5150", 0x5c),
-	},
-};
-
-static struct isp_subdev_i2c_board_info cm_t35_isp_primary_subdevs[] = {
-	{
-		.board_info = &cm_t35_isp_i2c_boardinfo[0],
-		.i2c_adapter_id = 3,
-	},
-	{ NULL, 0, },
-};
-
-static struct isp_subdev_i2c_board_info cm_t35_isp_secondary_subdevs[] = {
-	{
-		.board_info = &cm_t35_isp_i2c_boardinfo[1],
+		.board_info = &(struct i2c_board_info){
+			I2C_BOARD_INFO("mt9t001", 0x5d)
+		},
 		.i2c_adapter_id = 3,
-	},
-	{ NULL, 0, },
-};
-
-static struct isp_v4l2_subdevs_group cm_t35_isp_subdevs[] = {
-	{
-		.subdevs = cm_t35_isp_primary_subdevs,
-		.interface = ISP_INTERFACE_PARALLEL,
-		.bus = {
-			.parallel = {
-				.clk_pol = 1,
+		.bus = &(struct isp_bus_cfg){
+			.interface = ISP_INTERFACE_PARALLEL,
+			.bus = {
+				.parallel = {
+					.clk_pol = 1,
+				},
 			},
 		},
 	},
 	{
-		.subdevs = cm_t35_isp_secondary_subdevs,
-		.interface = ISP_INTERFACE_PARALLEL,
-		.bus = {
-			.parallel = {
-				.clk_pol = 0,
+		.board_info = &(struct i2c_board_info){
+			I2C_BOARD_INFO("tvp5150", 0x5c),
+		},
+		.i2c_adapter_id = 3,
+		.bus = &(struct isp_bus_cfg){
+			.interface = ISP_INTERFACE_PARALLEL,
+			.bus = {
+				.parallel = {
+					.clk_pol = 0,
+				},
 			},
 		},
 	},
-	{ NULL, 0, },
+	{ 0 },
 };
 
 static struct isp_platform_data cm_t35_isp_pdata = {
diff --git a/arch/arm/mach-omap2/common.c b/arch/arm/mach-omap2/common.c
index 484cdadfb187..eae6a0e87c90 100644
--- a/arch/arm/mach-omap2/common.c
+++ b/arch/arm/mach-omap2/common.c
@@ -30,5 +30,4 @@ int __weak omap_secure_ram_reserve_memblock(void)
 void __init omap_reserve(void)
 {
 	omap_secure_ram_reserve_memblock();
-	omap_barrier_reserve_memblock();
 }
diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h
index 46e24581d624..cf3cf22ecd42 100644
--- a/arch/arm/mach-omap2/common.h
+++ b/arch/arm/mach-omap2/common.h
@@ -200,9 +200,6 @@ void __init omap4_map_io(void);
 void __init omap5_map_io(void);
 void __init ti81xx_map_io(void);
 
-/* omap_barriers_init() is OMAP4 only */
-void omap_barriers_init(void);
-
 /**
  * omap_test_timeout - busy-loop, testing a condition
  * @cond: condition to test until it evaluates to true
diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c
index 01e398a868bc..4b8e9f4d59ea 100644
--- a/arch/arm/mach-omap2/cpuidle44xx.c
+++ b/arch/arm/mach-omap2/cpuidle44xx.c
@@ -14,10 +14,9 @@
 #include <linux/cpuidle.h>
 #include <linux/cpu_pm.h>
 #include <linux/export.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 
 #include <asm/cpuidle.h>
-#include <asm/proc-fns.h>
 
 #include "common.h"
 #include "pm.h"
@@ -84,7 +83,6 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev,
 {
 	struct idle_statedata *cx = state_ptr + index;
 	u32 mpuss_can_lose_context = 0;
-	int cpu_id = smp_processor_id();
 
 	/*
 	 * CPU0 has to wait and stay ON until CPU1 is OFF state.
@@ -112,7 +110,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev,
 	mpuss_can_lose_context = (cx->mpu_state == PWRDM_POWER_RET) &&
 				 (cx->mpu_logic_state == PWRDM_POWER_OFF);
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu_id);
+	tick_broadcast_enter();
 
 	/*
 	 * Call idle CPU PM enter notifier chain so that
@@ -169,7 +167,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev,
 	if (dev->cpu == 0 && mpuss_can_lose_context)
 		cpu_cluster_pm_exit();
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu_id);
+	tick_broadcast_exit();
 
 fail:
 	cpuidle_coupled_parallel_barrier(dev, &abort_barrier);
@@ -184,8 +182,7 @@ fail:
  */
 static void omap_setup_broadcast_timer(void *arg)
 {
-	int cpu = smp_processor_id();
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ON, &cpu);
+	tick_broadcast_enable();
 }
 
 static struct cpuidle_driver omap4_idle_driver = {
diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c
index 1afb50d6d636..990338fbaa59 100644
--- a/arch/arm/mach-omap2/devices.c
+++ b/arch/arm/mach-omap2/devices.c
@@ -74,82 +74,12 @@ omap_postcore_initcall(omap3_l3_init);
 static struct resource omap3isp_resources[] = {
 	{
 		.start		= OMAP3430_ISP_BASE,
-		.end		= OMAP3430_ISP_END,
+		.end		= OMAP3430_ISP_BASE + 0x12fc,
 		.flags		= IORESOURCE_MEM,
 	},
 	{
-		.start		= OMAP3430_ISP_CCP2_BASE,
-		.end		= OMAP3430_ISP_CCP2_END,
-		.flags		= IORESOURCE_MEM,
-	},
-	{
-		.start		= OMAP3430_ISP_CCDC_BASE,
-		.end		= OMAP3430_ISP_CCDC_END,
-		.flags		= IORESOURCE_MEM,
-	},
-	{
-		.start		= OMAP3430_ISP_HIST_BASE,
-		.end		= OMAP3430_ISP_HIST_END,
-		.flags		= IORESOURCE_MEM,
-	},
-	{
-		.start		= OMAP3430_ISP_H3A_BASE,
-		.end		= OMAP3430_ISP_H3A_END,
-		.flags		= IORESOURCE_MEM,
-	},
-	{
-		.start		= OMAP3430_ISP_PREV_BASE,
-		.end		= OMAP3430_ISP_PREV_END,
-		.flags		= IORESOURCE_MEM,
-	},
-	{
-		.start		= OMAP3430_ISP_RESZ_BASE,
-		.end		= OMAP3430_ISP_RESZ_END,
-		.flags		= IORESOURCE_MEM,
-	},
-	{
-		.start		= OMAP3430_ISP_SBL_BASE,
-		.end		= OMAP3430_ISP_SBL_END,
-		.flags		= IORESOURCE_MEM,
-	},
-	{
-		.start		= OMAP3430_ISP_CSI2A_REGS1_BASE,
-		.end		= OMAP3430_ISP_CSI2A_REGS1_END,
-		.flags		= IORESOURCE_MEM,
-	},
-	{
-		.start		= OMAP3430_ISP_CSIPHY2_BASE,
-		.end		= OMAP3430_ISP_CSIPHY2_END,
-		.flags		= IORESOURCE_MEM,
-	},
-	{
-		.start		= OMAP3630_ISP_CSI2A_REGS2_BASE,
-		.end		= OMAP3630_ISP_CSI2A_REGS2_END,
-		.flags		= IORESOURCE_MEM,
-	},
-	{
-		.start		= OMAP3630_ISP_CSI2C_REGS1_BASE,
-		.end		= OMAP3630_ISP_CSI2C_REGS1_END,
-		.flags		= IORESOURCE_MEM,
-	},
-	{
-		.start		= OMAP3630_ISP_CSIPHY1_BASE,
-		.end		= OMAP3630_ISP_CSIPHY1_END,
-		.flags		= IORESOURCE_MEM,
-	},
-	{
-		.start		= OMAP3630_ISP_CSI2C_REGS2_BASE,
-		.end		= OMAP3630_ISP_CSI2C_REGS2_END,
-		.flags		= IORESOURCE_MEM,
-	},
-	{
-		.start		= OMAP343X_CTRL_BASE + OMAP343X_CONTROL_CSIRXFE,
-		.end		= OMAP343X_CTRL_BASE + OMAP343X_CONTROL_CSIRXFE + 3,
-		.flags		= IORESOURCE_MEM,
-	},
-	{
-		.start		= OMAP343X_CTRL_BASE + OMAP3630_CONTROL_CAMERA_PHY_CTRL,
-		.end		= OMAP343X_CTRL_BASE + OMAP3630_CONTROL_CAMERA_PHY_CTRL + 3,
+		.start		= OMAP3430_ISP_BASE2,
+		.end		= OMAP3430_ISP_BASE2 + 0x0600,
 		.flags		= IORESOURCE_MEM,
 	},
 	{
diff --git a/arch/arm/mach-omap2/hsmmc.c b/arch/arm/mach-omap2/hsmmc.c
index dc6e79c4484a..9a8611ab5dfa 100644
--- a/arch/arm/mach-omap2/hsmmc.c
+++ b/arch/arm/mach-omap2/hsmmc.c
@@ -150,9 +150,13 @@ static int nop_mmc_set_power(struct device *dev, int power_on, int vdd)
 static inline void omap_hsmmc_mux(struct omap_hsmmc_platform_data
 				  *mmc_controller, int controller_nr)
 {
-	if (gpio_is_valid(mmc_controller->switch_pin) &&
-	    (mmc_controller->switch_pin < OMAP_MAX_GPIO_LINES))
-		omap_mux_init_gpio(mmc_controller->switch_pin,
+	if (gpio_is_valid(mmc_controller->gpio_cd) &&
+	    (mmc_controller->gpio_cd < OMAP_MAX_GPIO_LINES))
+		omap_mux_init_gpio(mmc_controller->gpio_cd,
+				   OMAP_PIN_INPUT_PULLUP);
+	if (gpio_is_valid(mmc_controller->gpio_cod) &&
+	    (mmc_controller->gpio_cod < OMAP_MAX_GPIO_LINES))
+		omap_mux_init_gpio(mmc_controller->gpio_cod,
 				   OMAP_PIN_INPUT_PULLUP);
 	if (gpio_is_valid(mmc_controller->gpio_wp) &&
 	    (mmc_controller->gpio_wp < OMAP_MAX_GPIO_LINES))
@@ -250,15 +254,20 @@ static int __init omap_hsmmc_pdata_init(struct omap2_hsmmc_info *c,
 	mmc->internal_clock = !c->ext_clock;
 	mmc->reg_offset = 0;
 
-	mmc->switch_pin = c->gpio_cd;
+	if (c->cover_only) {
+		/* detect if mobile phone cover removed */
+		mmc->gpio_cd = -EINVAL;
+		mmc->gpio_cod = c->gpio_cd;
+	} else {
+		/* card detect pin on the mmc socket itself */
+		mmc->gpio_cd = c->gpio_cd;
+		mmc->gpio_cod = -EINVAL;
+	}
 	mmc->gpio_wp = c->gpio_wp;
 
 	mmc->remux = c->remux;
 	mmc->init_card = c->init_card;
 
-	if (c->cover_only)
-		mmc->cover = 1;
-
 	if (c->nonremovable)
 		mmc->nonremovable = 1;
 
@@ -358,7 +367,15 @@ void omap_hsmmc_late_init(struct omap2_hsmmc_info *c)
 		if (!mmc_pdata)
 			continue;
 
-		mmc_pdata->switch_pin = c->gpio_cd;
+		if (c->cover_only) {
+			/* detect if mobile phone cover removed */
+			mmc_pdata->gpio_cd = -EINVAL;
+			mmc_pdata->gpio_cod = c->gpio_cd;
+		} else {
+			/* card detect pin on the mmc socket itself */
+			mmc_pdata->gpio_cd = c->gpio_cd;
+			mmc_pdata->gpio_cod = -EINVAL;
+		}
 		mmc_pdata->gpio_wp = c->gpio_wp;
 
 		res = omap_device_register(pdev);
diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c
index 2a2f4d56e4c8..25f1beea453e 100644
--- a/arch/arm/mach-omap2/id.c
+++ b/arch/arm/mach-omap2/id.c
@@ -720,6 +720,8 @@ static const char * __init omap_get_family(void)
 		return kasprintf(GFP_KERNEL, "OMAP4");
 	else if (soc_is_omap54xx())
 		return kasprintf(GFP_KERNEL, "OMAP5");
+	else if (soc_is_am33xx() || soc_is_am335x())
+		return kasprintf(GFP_KERNEL, "AM33xx");
 	else if (soc_is_am43xx())
 		return kasprintf(GFP_KERNEL, "AM43xx");
 	else if (soc_is_dra7xx())
diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c
index c4871c55bd8b..1eeff6be260d 100644
--- a/arch/arm/mach-omap2/io.c
+++ b/arch/arm/mach-omap2/io.c
@@ -306,7 +306,6 @@ void __init am33xx_map_io(void)
 void __init omap4_map_io(void)
 {
 	iotable_init(omap44xx_io_desc, ARRAY_SIZE(omap44xx_io_desc));
-	omap_barriers_init();
 }
 #endif
 
@@ -314,7 +313,6 @@ void __init omap4_map_io(void)
 void __init omap5_map_io(void)
 {
 	iotable_init(omap54xx_io_desc, ARRAY_SIZE(omap54xx_io_desc));
-	omap_barriers_init();
 }
 #endif
 /*
diff --git a/arch/arm/mach-omap2/mux.c b/arch/arm/mach-omap2/mux.c
index 78064b0d4db5..176eef6ef338 100644
--- a/arch/arm/mach-omap2/mux.c
+++ b/arch/arm/mach-omap2/mux.c
@@ -1053,7 +1053,7 @@ static void __init omap_mux_init_list(struct omap_mux_partition *partition,
 		struct omap_mux *entry;
 
 #ifdef CONFIG_OMAP_MUX
-		if (!superset->muxnames || !superset->muxnames[0]) {
+		if (!superset->muxnames[0]) {
 			superset++;
 			continue;
 		}
diff --git a/arch/arm/mach-omap2/omap-secure.h b/arch/arm/mach-omap2/omap-secure.h
index dec2b05d184b..af2851fbcdf0 100644
--- a/arch/arm/mach-omap2/omap-secure.h
+++ b/arch/arm/mach-omap2/omap-secure.h
@@ -70,13 +70,6 @@ extern u32 rx51_secure_dispatcher(u32 idx, u32 process, u32 flag, u32 nargs,
 extern u32 rx51_secure_update_aux_cr(u32 set_bits, u32 clear_bits);
 extern u32 rx51_secure_rng_call(u32 ptr, u32 count, u32 flag);
 
-#ifdef CONFIG_OMAP4_ERRATA_I688
-extern int omap_barrier_reserve_memblock(void);
-#else
-static inline void omap_barrier_reserve_memblock(void)
-{ }
-#endif
-
 #ifdef CONFIG_SOC_HAS_REALTIME_COUNTER
 void set_cntfreq(void);
 #else
diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c
index f961c46453b9..3b56722dfd8a 100644
--- a/arch/arm/mach-omap2/omap-wakeupgen.c
+++ b/arch/arm/mach-omap2/omap-wakeupgen.c
@@ -20,11 +20,12 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of_address.h>
 #include <linux/platform_device.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 #include <linux/cpu_pm.h>
-#include <linux/irqchip/arm-gic.h>
 
 #include "omap-wakeupgen.h"
 #include "omap-secure.h"
@@ -78,29 +79,12 @@ static inline void sar_writel(u32 val, u32 offset, u8 idx)
 
 static inline int _wakeupgen_get_irq_info(u32 irq, u32 *bit_posn, u8 *reg_index)
 {
-	unsigned int spi_irq;
-
-	/*
-	 * PPIs and SGIs are not supported.
-	 */
-	if (irq < OMAP44XX_IRQ_GIC_START)
-		return -EINVAL;
-
-	/*
-	 * Subtract the GIC offset.
-	 */
-	spi_irq = irq - OMAP44XX_IRQ_GIC_START;
-	if (spi_irq > MAX_IRQS) {
-		pr_err("omap wakeupGen: Invalid IRQ%d\n", irq);
-		return -EINVAL;
-	}
-
 	/*
 	 * Each WakeupGen register controls 32 interrupt.
 	 * i.e. 1 bit per SPI IRQ
 	 */
-	*reg_index = spi_irq >> 5;
-	*bit_posn = spi_irq %= 32;
+	*reg_index = irq >> 5;
+	*bit_posn = irq %= 32;
 
 	return 0;
 }
@@ -141,6 +125,7 @@ static void wakeupgen_mask(struct irq_data *d)
 	raw_spin_lock_irqsave(&wakeupgen_lock, flags);
 	_wakeupgen_clear(d->hwirq, irq_target_cpu[d->hwirq]);
 	raw_spin_unlock_irqrestore(&wakeupgen_lock, flags);
+	irq_chip_mask_parent(d);
 }
 
 /*
@@ -153,6 +138,7 @@ static void wakeupgen_unmask(struct irq_data *d)
 	raw_spin_lock_irqsave(&wakeupgen_lock, flags);
 	_wakeupgen_set(d->hwirq, irq_target_cpu[d->hwirq]);
 	raw_spin_unlock_irqrestore(&wakeupgen_lock, flags);
+	irq_chip_unmask_parent(d);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -400,15 +386,91 @@ int omap_secure_apis_support(void)
 	return omap_secure_apis;
 }
 
+static struct irq_chip wakeupgen_chip = {
+	.name			= "WUGEN",
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_mask		= wakeupgen_mask,
+	.irq_unmask		= wakeupgen_unmask,
+	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+	.flags			= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND,
+#ifdef CONFIG_SMP
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+#endif
+};
+
+static int wakeupgen_domain_xlate(struct irq_domain *domain,
+				  struct device_node *controller,
+				  const u32 *intspec,
+				  unsigned int intsize,
+				  unsigned long *out_hwirq,
+				  unsigned int *out_type)
+{
+	if (domain->of_node != controller)
+		return -EINVAL;	/* Shouldn't happen, really... */
+	if (intsize != 3)
+		return -EINVAL;	/* Not GIC compliant */
+	if (intspec[0] != 0)
+		return -EINVAL;	/* No PPI should point to this domain */
+
+	*out_hwirq = intspec[1];
+	*out_type = intspec[2];
+	return 0;
+}
+
+static int wakeupgen_domain_alloc(struct irq_domain *domain,
+				  unsigned int virq,
+				  unsigned int nr_irqs, void *data)
+{
+	struct of_phandle_args *args = data;
+	struct of_phandle_args parent_args;
+	irq_hw_number_t hwirq;
+	int i;
+
+	if (args->args_count != 3)
+		return -EINVAL;	/* Not GIC compliant */
+	if (args->args[0] != 0)
+		return -EINVAL;	/* No PPI should point to this domain */
+
+	hwirq = args->args[1];
+	if (hwirq >= MAX_IRQS)
+		return -EINVAL;	/* Can't deal with this */
+
+	for (i = 0; i < nr_irqs; i++)
+		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+					      &wakeupgen_chip, NULL);
+
+	parent_args = *args;
+	parent_args.np = domain->parent->of_node;
+	return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &parent_args);
+}
+
+static struct irq_domain_ops wakeupgen_domain_ops = {
+	.xlate	= wakeupgen_domain_xlate,
+	.alloc	= wakeupgen_domain_alloc,
+	.free	= irq_domain_free_irqs_common,
+};
+
 /*
  * Initialise the wakeupgen module.
  */
-int __init omap_wakeupgen_init(void)
+static int __init wakeupgen_init(struct device_node *node,
+				 struct device_node *parent)
 {
+	struct irq_domain *parent_domain, *domain;
 	int i;
 	unsigned int boot_cpu = smp_processor_id();
 	u32 val;
 
+	if (!parent) {
+		pr_err("%s: no parent, giving up\n", node->full_name);
+		return -ENODEV;
+	}
+
+	parent_domain = irq_find_host(parent);
+	if (!parent_domain) {
+		pr_err("%s: unable to obtain parent domain\n", node->full_name);
+		return -ENXIO;
+	}
 	/* Not supported on OMAP4 ES1.0 silicon */
 	if (omap_rev() == OMAP4430_REV_ES1_0) {
 		WARN(1, "WakeupGen: Not supported on OMAP4430 ES1.0\n");
@@ -416,7 +478,7 @@ int __init omap_wakeupgen_init(void)
 	}
 
 	/* Static mapping, never released */
-	wakeupgen_base = ioremap(OMAP_WKUPGEN_BASE, SZ_4K);
+	wakeupgen_base = of_iomap(node, 0);
 	if (WARN_ON(!wakeupgen_base))
 		return -ENOMEM;
 
@@ -429,6 +491,14 @@ int __init omap_wakeupgen_init(void)
 		max_irqs = AM43XX_IRQS;
 	}
 
+	domain = irq_domain_add_hierarchy(parent_domain, 0, max_irqs,
+					  node, &wakeupgen_domain_ops,
+					  NULL);
+	if (!domain) {
+		iounmap(wakeupgen_base);
+		return -ENOMEM;
+	}
+
 	/* Clear all IRQ bitmasks at wakeupGen level */
 	for (i = 0; i < irq_banks; i++) {
 		wakeupgen_writel(0, i, CPU0_ID);
@@ -437,14 +507,6 @@ int __init omap_wakeupgen_init(void)
 	}
 
 	/*
-	 * Override GIC architecture specific functions to add
-	 * OMAP WakeupGen interrupt controller along with GIC
-	 */
-	gic_arch_extn.irq_mask = wakeupgen_mask;
-	gic_arch_extn.irq_unmask = wakeupgen_unmask;
-	gic_arch_extn.flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_SKIP_SET_WAKE;
-
-	/*
 	 * FIXME: Add support to set_smp_affinity() once the core
 	 * GIC code has necessary hooks in place.
 	 */
@@ -474,3 +536,9 @@ int __init omap_wakeupgen_init(void)
 
 	return 0;
 }
+
+/*
+ * We cannot use the IRQCHIP_DECLARE macro that lives in
+ * drivers/irqchip, so we're forced to roll our own. Not very nice.
+ */
+OF_DECLARE_2(irqchip, ti_wakeupgen, "ti,omap4-wugen-mpu", wakeupgen_init);
diff --git a/arch/arm/mach-omap2/omap-wakeupgen.h b/arch/arm/mach-omap2/omap-wakeupgen.h
index b3c8eccfae79..a3491ad12368 100644
--- a/arch/arm/mach-omap2/omap-wakeupgen.h
+++ b/arch/arm/mach-omap2/omap-wakeupgen.h
@@ -33,7 +33,6 @@
 #define OMAP_TIMESTAMPCYCLELO			0xc08
 #define OMAP_TIMESTAMPCYCLEHI			0xc0c
 
-extern int __init omap_wakeupgen_init(void);
 extern void __iomem *omap_get_wakeupgen_base(void);
 extern int omap_secure_apis_support(void);
 #endif
diff --git a/arch/arm/mach-omap2/omap34xx.h b/arch/arm/mach-omap2/omap34xx.h
index c0d1b4b1653f..ed0024dda133 100644
--- a/arch/arm/mach-omap2/omap34xx.h
+++ b/arch/arm/mach-omap2/omap34xx.h
@@ -46,39 +46,9 @@
 
 #define OMAP34XX_IC_BASE	0x48200000
 
-#define OMAP3430_ISP_BASE		(L4_34XX_BASE + 0xBC000)
-#define OMAP3430_ISP_CBUFF_BASE		(OMAP3430_ISP_BASE + 0x0100)
-#define OMAP3430_ISP_CCP2_BASE		(OMAP3430_ISP_BASE + 0x0400)
-#define OMAP3430_ISP_CCDC_BASE		(OMAP3430_ISP_BASE + 0x0600)
-#define OMAP3430_ISP_HIST_BASE		(OMAP3430_ISP_BASE + 0x0A00)
-#define OMAP3430_ISP_H3A_BASE		(OMAP3430_ISP_BASE + 0x0C00)
-#define OMAP3430_ISP_PREV_BASE		(OMAP3430_ISP_BASE + 0x0E00)
-#define OMAP3430_ISP_RESZ_BASE		(OMAP3430_ISP_BASE + 0x1000)
-#define OMAP3430_ISP_SBL_BASE		(OMAP3430_ISP_BASE + 0x1200)
-#define OMAP3430_ISP_MMU_BASE		(OMAP3430_ISP_BASE + 0x1400)
-#define OMAP3430_ISP_CSI2A_REGS1_BASE	(OMAP3430_ISP_BASE + 0x1800)
-#define OMAP3430_ISP_CSIPHY2_BASE	(OMAP3430_ISP_BASE + 0x1970)
-#define OMAP3630_ISP_CSI2A_REGS2_BASE	(OMAP3430_ISP_BASE + 0x19C0)
-#define OMAP3630_ISP_CSI2C_REGS1_BASE	(OMAP3430_ISP_BASE + 0x1C00)
-#define OMAP3630_ISP_CSIPHY1_BASE	(OMAP3430_ISP_BASE + 0x1D70)
-#define OMAP3630_ISP_CSI2C_REGS2_BASE	(OMAP3430_ISP_BASE + 0x1DC0)
-
-#define OMAP3430_ISP_END		(OMAP3430_ISP_BASE         + 0x06F)
-#define OMAP3430_ISP_CBUFF_END		(OMAP3430_ISP_CBUFF_BASE   + 0x077)
-#define OMAP3430_ISP_CCP2_END		(OMAP3430_ISP_CCP2_BASE    + 0x1EF)
-#define OMAP3430_ISP_CCDC_END		(OMAP3430_ISP_CCDC_BASE    + 0x0A7)
-#define OMAP3430_ISP_HIST_END		(OMAP3430_ISP_HIST_BASE    + 0x047)
-#define OMAP3430_ISP_H3A_END		(OMAP3430_ISP_H3A_BASE     + 0x05F)
-#define OMAP3430_ISP_PREV_END		(OMAP3430_ISP_PREV_BASE    + 0x09F)
-#define OMAP3430_ISP_RESZ_END		(OMAP3430_ISP_RESZ_BASE    + 0x0AB)
-#define OMAP3430_ISP_SBL_END		(OMAP3430_ISP_SBL_BASE     + 0x0FB)
-#define OMAP3430_ISP_MMU_END		(OMAP3430_ISP_MMU_BASE     + 0x06F)
-#define OMAP3430_ISP_CSI2A_REGS1_END	(OMAP3430_ISP_CSI2A_REGS1_BASE + 0x16F)
-#define OMAP3430_ISP_CSIPHY2_END	(OMAP3430_ISP_CSIPHY2_BASE + 0x00B)
-#define OMAP3630_ISP_CSI2A_REGS2_END	(OMAP3630_ISP_CSI2A_REGS2_BASE + 0x3F)
-#define OMAP3630_ISP_CSI2C_REGS1_END	(OMAP3630_ISP_CSI2C_REGS1_BASE + 0x16F)
-#define OMAP3630_ISP_CSIPHY1_END	(OMAP3630_ISP_CSIPHY1_BASE + 0x00B)
-#define OMAP3630_ISP_CSI2C_REGS2_END	(OMAP3630_ISP_CSI2C_REGS2_BASE + 0x3F)
+#define OMAP3430_ISP_BASE	(L4_34XX_BASE + 0xBC000)
+#define OMAP3430_ISP_MMU_BASE	(OMAP3430_ISP_BASE + 0x1400)
+#define OMAP3430_ISP_BASE2	(OMAP3430_ISP_BASE + 0x1800)
 
 #define OMAP34XX_HSUSB_OTG_BASE	(L4_34XX_BASE + 0xAB000)
 #define OMAP34XX_USBTLL_BASE	(L4_34XX_BASE + 0x62000)
diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c
index cee0fe1ee6ff..16350eefa66c 100644
--- a/arch/arm/mach-omap2/omap4-common.c
+++ b/arch/arm/mach-omap2/omap4-common.c
@@ -22,7 +22,6 @@
 #include <linux/of_platform.h>
 #include <linux/export.h>
 #include <linux/irqchip/arm-gic.h>
-#include <linux/irqchip/irq-crossbar.h>
 #include <linux/of_address.h>
 #include <linux/reboot.h>
 #include <linux/genalloc.h>
@@ -52,75 +51,6 @@ static void __iomem *twd_base;
 
 #define IRQ_LOCALTIMER		29
 
-#ifdef CONFIG_OMAP4_ERRATA_I688
-/* Used to implement memory barrier on DRAM path */
-#define OMAP4_DRAM_BARRIER_VA			0xfe600000
-
-void __iomem *dram_sync, *sram_sync;
-
-static phys_addr_t paddr;
-static u32 size;
-
-void omap_bus_sync(void)
-{
-	if (dram_sync && sram_sync) {
-		writel_relaxed(readl_relaxed(dram_sync), dram_sync);
-		writel_relaxed(readl_relaxed(sram_sync), sram_sync);
-		isb();
-	}
-}
-EXPORT_SYMBOL(omap_bus_sync);
-
-static int __init omap4_sram_init(void)
-{
-	struct device_node *np;
-	struct gen_pool *sram_pool;
-
-	np = of_find_compatible_node(NULL, NULL, "ti,omap4-mpu");
-	if (!np)
-		pr_warn("%s:Unable to allocate sram needed to handle errata I688\n",
-			__func__);
-	sram_pool = of_get_named_gen_pool(np, "sram", 0);
-	if (!sram_pool)
-		pr_warn("%s:Unable to get sram pool needed to handle errata I688\n",
-			__func__);
-	else
-		sram_sync = (void *)gen_pool_alloc(sram_pool, PAGE_SIZE);
-
-	return 0;
-}
-omap_arch_initcall(omap4_sram_init);
-
-/* Steal one page physical memory for barrier implementation */
-int __init omap_barrier_reserve_memblock(void)
-{
-
-	size = ALIGN(PAGE_SIZE, SZ_1M);
-	paddr = arm_memblock_steal(size, SZ_1M);
-
-	return 0;
-}
-
-void __init omap_barriers_init(void)
-{
-	struct map_desc dram_io_desc[1];
-
-	dram_io_desc[0].virtual = OMAP4_DRAM_BARRIER_VA;
-	dram_io_desc[0].pfn = __phys_to_pfn(paddr);
-	dram_io_desc[0].length = size;
-	dram_io_desc[0].type = MT_MEMORY_RW_SO;
-	iotable_init(dram_io_desc, ARRAY_SIZE(dram_io_desc));
-	dram_sync = (void __iomem *) dram_io_desc[0].virtual;
-
-	pr_info("OMAP4: Map 0x%08llx to 0x%08lx for dram barrier\n",
-		(long long) paddr, dram_io_desc[0].virtual);
-
-}
-#else
-void __init omap_barriers_init(void)
-{}
-#endif
-
 void gic_dist_disable(void)
 {
 	if (gic_dist_base_addr)
@@ -242,26 +172,26 @@ static int __init omap4_sar_ram_init(void)
 }
 omap_early_initcall(omap4_sar_ram_init);
 
-static const struct of_device_id gic_match[] = {
-	{ .compatible = "arm,cortex-a9-gic", },
-	{ .compatible = "arm,cortex-a15-gic", },
+static const struct of_device_id intc_match[] = {
+	{ .compatible = "ti,omap4-wugen-mpu", },
+	{ .compatible = "ti,omap5-wugen-mpu", },
 	{ },
 };
 
-static struct device_node *gic_node;
+static struct device_node *intc_node;
 
 unsigned int omap4_xlate_irq(unsigned int hwirq)
 {
 	struct of_phandle_args irq_data;
 	unsigned int irq;
 
-	if (!gic_node)
-		gic_node = of_find_matching_node(NULL, gic_match);
+	if (!intc_node)
+		intc_node = of_find_matching_node(NULL, intc_match);
 
-	if (WARN_ON(!gic_node))
+	if (WARN_ON(!intc_node))
 		return hwirq;
 
-	irq_data.np = gic_node;
+	irq_data.np = intc_node;
 	irq_data.args_count = 3;
 	irq_data.args[0] = 0;
 	irq_data.args[1] = hwirq - OMAP44XX_IRQ_GIC_START;
@@ -278,6 +208,12 @@ void __init omap_gic_of_init(void)
 {
 	struct device_node *np;
 
+	intc_node = of_find_matching_node(NULL, intc_match);
+	if (WARN_ON(!intc_node)) {
+		pr_err("No WUGEN found in DT, system will misbehave.\n");
+		pr_err("UPDATE YOUR DEVICE TREE!\n");
+	}
+
 	/* Extract GIC distributor and TWD bases for OMAP4460 ROM Errata WA */
 	if (!cpu_is_omap446x())
 		goto skip_errata_init;
@@ -291,9 +227,5 @@ void __init omap_gic_of_init(void)
 	WARN_ON(!twd_base);
 
 skip_errata_init:
-	omap_wakeupgen_init();
-#ifdef CONFIG_IRQ_CROSSBAR
-	irqcrossbar_init();
-#endif
 	irqchip_init();
 }
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index 92afb723dcfc..355b08936871 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -1692,16 +1692,15 @@ static int _deassert_hardreset(struct omap_hwmod *oh, const char *name)
 	if (ret == -EBUSY)
 		pr_warn("omap_hwmod: %s: failed to hardreset\n", oh->name);
 
-	if (!ret) {
+	if (oh->clkdm) {
 		/*
 		 * Set the clockdomain to HW_AUTO, assuming that the
 		 * previous state was HW_AUTO.
 		 */
-		if (oh->clkdm && hwsup)
+		if (hwsup)
 			clkdm_allow_idle(oh->clkdm);
-	} else {
-		if (oh->clkdm)
-			clkdm_hwmod_disable(oh->clkdm, oh);
+
+		clkdm_hwmod_disable(oh->clkdm, oh);
 	}
 
 	return ret;
@@ -2698,6 +2697,7 @@ static int __init _register(struct omap_hwmod *oh)
 	INIT_LIST_HEAD(&oh->master_ports);
 	INIT_LIST_HEAD(&oh->slave_ports);
 	spin_lock_init(&oh->_lock);
+	lockdep_set_class(&oh->_lock, &oh->hwmod_key);
 
 	oh->_state = _HWMOD_STATE_REGISTERED;
 
diff --git a/arch/arm/mach-omap2/omap_hwmod.h b/arch/arm/mach-omap2/omap_hwmod.h
index 9d4bec6ee742..9611c91d9b82 100644
--- a/arch/arm/mach-omap2/omap_hwmod.h
+++ b/arch/arm/mach-omap2/omap_hwmod.h
@@ -674,6 +674,7 @@ struct omap_hwmod {
 	u32				_sysc_cache;
 	void __iomem			*_mpu_rt_va;
 	spinlock_t			_lock;
+	struct lock_class_key		hwmod_key; /* unique lock class */
 	struct list_head		node;
 	struct omap_hwmod_ocp_if	*_mpu_port;
 	unsigned int			(*xlate_irq)(unsigned int);
diff --git a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
index e8692e7675b8..16fe7a1b7a35 100644
--- a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
@@ -1466,55 +1466,18 @@ static struct omap_hwmod dra7xx_ocp2scp3_hwmod = {
  *
  */
 
-static struct omap_hwmod_class dra7xx_pcie_hwmod_class = {
+static struct omap_hwmod_class dra7xx_pciess_hwmod_class = {
 	.name	= "pcie",
 };
 
 /* pcie1 */
-static struct omap_hwmod dra7xx_pcie1_hwmod = {
+static struct omap_hwmod dra7xx_pciess1_hwmod = {
 	.name		= "pcie1",
-	.class		= &dra7xx_pcie_hwmod_class,
+	.class		= &dra7xx_pciess_hwmod_class,
 	.clkdm_name	= "pcie_clkdm",
 	.main_clk	= "l4_root_clk_div",
 	.prcm = {
 		.omap4 = {
-			.clkctrl_offs	= DRA7XX_CM_PCIE_CLKSTCTRL_OFFSET,
-			.modulemode	= MODULEMODE_SWCTRL,
-		},
-	},
-};
-
-/* pcie2 */
-static struct omap_hwmod dra7xx_pcie2_hwmod = {
-	.name		= "pcie2",
-	.class		= &dra7xx_pcie_hwmod_class,
-	.clkdm_name	= "pcie_clkdm",
-	.main_clk	= "l4_root_clk_div",
-	.prcm = {
-		.omap4 = {
-			.clkctrl_offs = DRA7XX_CM_PCIE_CLKSTCTRL_OFFSET,
-			.modulemode   = MODULEMODE_SWCTRL,
-		},
-	},
-};
-
-/*
- * 'PCIE PHY' class
- *
- */
-
-static struct omap_hwmod_class dra7xx_pcie_phy_hwmod_class = {
-	.name	= "pcie-phy",
-};
-
-/* pcie1 phy */
-static struct omap_hwmod dra7xx_pcie1_phy_hwmod = {
-	.name		= "pcie1-phy",
-	.class		= &dra7xx_pcie_phy_hwmod_class,
-	.clkdm_name	= "l3init_clkdm",
-	.main_clk	= "l4_root_clk_div",
-	.prcm = {
-		.omap4 = {
 			.clkctrl_offs = DRA7XX_CM_L3INIT_PCIESS1_CLKCTRL_OFFSET,
 			.context_offs = DRA7XX_RM_L3INIT_PCIESS1_CONTEXT_OFFSET,
 			.modulemode   = MODULEMODE_SWCTRL,
@@ -1522,11 +1485,11 @@ static struct omap_hwmod dra7xx_pcie1_phy_hwmod = {
 	},
 };
 
-/* pcie2 phy */
-static struct omap_hwmod dra7xx_pcie2_phy_hwmod = {
-	.name		= "pcie2-phy",
-	.class		= &dra7xx_pcie_phy_hwmod_class,
-	.clkdm_name	= "l3init_clkdm",
+/* pcie2 */
+static struct omap_hwmod dra7xx_pciess2_hwmod = {
+	.name		= "pcie2",
+	.class		= &dra7xx_pciess_hwmod_class,
+	.clkdm_name	= "pcie_clkdm",
 	.main_clk	= "l4_root_clk_div",
 	.prcm = {
 		.omap4 = {
@@ -2877,50 +2840,34 @@ static struct omap_hwmod_ocp_if dra7xx_l4_cfg__ocp2scp3 = {
 	.user		= OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-/* l3_main_1 -> pcie1 */
-static struct omap_hwmod_ocp_if dra7xx_l3_main_1__pcie1 = {
+/* l3_main_1 -> pciess1 */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_1__pciess1 = {
 	.master		= &dra7xx_l3_main_1_hwmod,
-	.slave		= &dra7xx_pcie1_hwmod,
+	.slave		= &dra7xx_pciess1_hwmod,
 	.clk		= "l3_iclk_div",
 	.user		= OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-/* l4_cfg -> pcie1 */
-static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pcie1 = {
+/* l4_cfg -> pciess1 */
+static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pciess1 = {
 	.master		= &dra7xx_l4_cfg_hwmod,
-	.slave		= &dra7xx_pcie1_hwmod,
+	.slave		= &dra7xx_pciess1_hwmod,
 	.clk		= "l4_root_clk_div",
 	.user		= OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-/* l3_main_1 -> pcie2 */
-static struct omap_hwmod_ocp_if dra7xx_l3_main_1__pcie2 = {
+/* l3_main_1 -> pciess2 */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_1__pciess2 = {
 	.master		= &dra7xx_l3_main_1_hwmod,
-	.slave		= &dra7xx_pcie2_hwmod,
+	.slave		= &dra7xx_pciess2_hwmod,
 	.clk		= "l3_iclk_div",
 	.user		= OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-/* l4_cfg -> pcie2 */
-static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pcie2 = {
-	.master		= &dra7xx_l4_cfg_hwmod,
-	.slave		= &dra7xx_pcie2_hwmod,
-	.clk		= "l4_root_clk_div",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_cfg -> pcie1 phy */
-static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pcie1_phy = {
-	.master		= &dra7xx_l4_cfg_hwmod,
-	.slave		= &dra7xx_pcie1_phy_hwmod,
-	.clk		= "l4_root_clk_div",
-	.user		= OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_cfg -> pcie2 phy */
-static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pcie2_phy = {
+/* l4_cfg -> pciess2 */
+static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pciess2 = {
 	.master		= &dra7xx_l4_cfg_hwmod,
-	.slave		= &dra7xx_pcie2_phy_hwmod,
+	.slave		= &dra7xx_pciess2_hwmod,
 	.clk		= "l4_root_clk_div",
 	.user		= OCP_USER_MPU | OCP_USER_SDMA,
 };
@@ -3327,12 +3274,10 @@ static struct omap_hwmod_ocp_if *dra7xx_hwmod_ocp_ifs[] __initdata = {
 	&dra7xx_l4_cfg__mpu,
 	&dra7xx_l4_cfg__ocp2scp1,
 	&dra7xx_l4_cfg__ocp2scp3,
-	&dra7xx_l3_main_1__pcie1,
-	&dra7xx_l4_cfg__pcie1,
-	&dra7xx_l3_main_1__pcie2,
-	&dra7xx_l4_cfg__pcie2,
-	&dra7xx_l4_cfg__pcie1_phy,
-	&dra7xx_l4_cfg__pcie2_phy,
+	&dra7xx_l3_main_1__pciess1,
+	&dra7xx_l4_cfg__pciess1,
+	&dra7xx_l3_main_1__pciess2,
+	&dra7xx_l4_cfg__pciess2,
 	&dra7xx_l3_main_1__qspi,
 	&dra7xx_l4_per3__rtcss,
 	&dra7xx_l4_cfg__sata,
diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c
index 190fa43e7479..e642b079e9f3 100644
--- a/arch/arm/mach-omap2/pdata-quirks.c
+++ b/arch/arm/mach-omap2/pdata-quirks.c
@@ -173,6 +173,7 @@ static void __init omap3_igep0030_rev_g_legacy_init(void)
 
 static void __init omap3_evm_legacy_init(void)
 {
+	hsmmc2_internal_input_clk();
 	legacy_init_wl12xx(WL12XX_REFCLOCK_38, 0, 149);
 }
 
diff --git a/arch/arm/mach-omap2/prm44xx.c b/arch/arm/mach-omap2/prm44xx.c
index a08a617a6c11..d6d6bc39e05c 100644
--- a/arch/arm/mach-omap2/prm44xx.c
+++ b/arch/arm/mach-omap2/prm44xx.c
@@ -252,10 +252,10 @@ static void omap44xx_prm_save_and_clear_irqen(u32 *saved_mask)
 {
 	saved_mask[0] =
 		omap4_prm_read_inst_reg(OMAP4430_PRM_OCP_SOCKET_INST,
-					OMAP4_PRM_IRQSTATUS_MPU_OFFSET);
+					OMAP4_PRM_IRQENABLE_MPU_OFFSET);
 	saved_mask[1] =
 		omap4_prm_read_inst_reg(OMAP4430_PRM_OCP_SOCKET_INST,
-					OMAP4_PRM_IRQSTATUS_MPU_2_OFFSET);
+					OMAP4_PRM_IRQENABLE_MPU_2_OFFSET);
 
 	omap4_prm_write_inst_reg(0, OMAP4430_PRM_OCP_SOCKET_INST,
 				 OMAP4_PRM_IRQENABLE_MPU_OFFSET);
diff --git a/arch/arm/mach-omap2/sleep44xx.S b/arch/arm/mach-omap2/sleep44xx.S
index b84a0122d823..ad1bb9431e94 100644
--- a/arch/arm/mach-omap2/sleep44xx.S
+++ b/arch/arm/mach-omap2/sleep44xx.S
@@ -333,11 +333,9 @@ ENDPROC(omap4_cpu_resume)
 
 #endif	/* defined(CONFIG_SMP) && defined(CONFIG_PM) */
 
-#ifndef CONFIG_OMAP4_ERRATA_I688
 ENTRY(omap_bus_sync)
 	ret	lr
 ENDPROC(omap_bus_sync)
-#endif
 
 ENTRY(omap_do_wfi)
 	stmfd	sp!, {lr}
diff --git a/arch/arm/mach-orion5x/pci.c b/arch/arm/mach-orion5x/pci.c
index 87a12d6930ff..b02f3947be51 100644
--- a/arch/arm/mach-orion5x/pci.c
+++ b/arch/arm/mach-orion5x/pci.c
@@ -540,37 +540,33 @@ void __init orion5x_pci_set_cardbus_mode(void)
 
 int __init orion5x_pci_sys_setup(int nr, struct pci_sys_data *sys)
 {
-	int ret = 0;
-
 	vga_base = ORION5X_PCIE_MEM_PHYS_BASE;
 
 	if (nr == 0) {
 		orion_pcie_set_local_bus_nr(PCIE_BASE, sys->busnr);
-		ret = pcie_setup(sys);
-	} else if (nr == 1 && !orion5x_pci_disabled) {
+		return pcie_setup(sys);
+	}
+
+	if (nr == 1 && !orion5x_pci_disabled) {
 		orion5x_pci_set_bus_nr(sys->busnr);
-		ret = pci_setup(sys);
+		return pci_setup(sys);
 	}
 
-	return ret;
+	return 0;
 }
 
 struct pci_bus __init *orion5x_pci_sys_scan_bus(int nr, struct pci_sys_data *sys)
 {
-	struct pci_bus *bus;
+	if (nr == 0)
+		return pci_scan_root_bus(NULL, sys->busnr, &pcie_ops, sys,
+					 &sys->resources);
 
-	if (nr == 0) {
-		bus = pci_scan_root_bus(NULL, sys->busnr, &pcie_ops, sys,
-					&sys->resources);
-	} else if (nr == 1 && !orion5x_pci_disabled) {
-		bus = pci_scan_root_bus(NULL, sys->busnr, &pci_ops, sys,
-					&sys->resources);
-	} else {
-		bus = NULL;
-		BUG();
-	}
+	if (nr == 1 && !orion5x_pci_disabled)
+		return pci_scan_root_bus(NULL, sys->busnr, &pci_ops, sys,
+					 &sys->resources);
 
-	return bus;
+	BUG();
+	return NULL;
 }
 
 int __init orion5x_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
diff --git a/arch/arm/mach-pxa/idp.c b/arch/arm/mach-pxa/idp.c
index 7d8eab857a93..f6d02e4cbcda 100644
--- a/arch/arm/mach-pxa/idp.c
+++ b/arch/arm/mach-pxa/idp.c
@@ -36,6 +36,7 @@
 #include <linux/platform_data/video-pxafb.h>
 #include <mach/bitfield.h>
 #include <linux/platform_data/mmc-pxamci.h>
+#include <linux/smc91x.h>
 
 #include "generic.h"
 #include "devices.h"
diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c
index 0eecd83c624e..89a7c06570d3 100644
--- a/arch/arm/mach-pxa/irq.c
+++ b/arch/arm/mach-pxa/irq.c
@@ -11,6 +11,7 @@
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
  */
+#include <linux/bitops.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
@@ -40,7 +41,6 @@
 #define ICHP_VAL_IRQ		(1 << 31)
 #define ICHP_IRQ(i)		(((i) >> 16) & 0x7fff)
 #define IPR_VALID		(1 << 31)
-#define IRQ_BIT(n)		(((n) - PXA_IRQ(0)) & 0x1f)
 
 #define MAX_INTERNAL_IRQS	128
 
@@ -51,6 +51,7 @@
 static void __iomem *pxa_irq_base;
 static int pxa_internal_irq_nr;
 static bool cpu_has_ipr;
+static struct irq_domain *pxa_irq_domain;
 
 static inline void __iomem *irq_base(int i)
 {
@@ -66,18 +67,20 @@ static inline void __iomem *irq_base(int i)
 void pxa_mask_irq(struct irq_data *d)
 {
 	void __iomem *base = irq_data_get_irq_chip_data(d);
+	irq_hw_number_t irq = irqd_to_hwirq(d);
 	uint32_t icmr = __raw_readl(base + ICMR);
 
-	icmr &= ~(1 << IRQ_BIT(d->irq));
+	icmr &= ~BIT(irq & 0x1f);
 	__raw_writel(icmr, base + ICMR);
 }
 
 void pxa_unmask_irq(struct irq_data *d)
 {
 	void __iomem *base = irq_data_get_irq_chip_data(d);
+	irq_hw_number_t irq = irqd_to_hwirq(d);
 	uint32_t icmr = __raw_readl(base + ICMR);
 
-	icmr |= 1 << IRQ_BIT(d->irq);
+	icmr |= BIT(irq & 0x1f);
 	__raw_writel(icmr, base + ICMR);
 }
 
@@ -118,40 +121,63 @@ asmlinkage void __exception_irq_entry ichp_handle_irq(struct pt_regs *regs)
 	} while (1);
 }
 
-void __init pxa_init_irq(int irq_nr, int (*fn)(struct irq_data *, unsigned int))
+static int pxa_irq_map(struct irq_domain *h, unsigned int virq,
+		       irq_hw_number_t hw)
 {
-	int irq, i, n;
+	void __iomem *base = irq_base(hw / 32);
 
-	BUG_ON(irq_nr > MAX_INTERNAL_IRQS);
+	/* initialize interrupt priority */
+	if (cpu_has_ipr)
+		__raw_writel(hw | IPR_VALID, pxa_irq_base + IPR(hw));
+
+	irq_set_chip_and_handler(virq, &pxa_internal_irq_chip,
+				 handle_level_irq);
+	irq_set_chip_data(virq, base);
+	set_irq_flags(virq, IRQF_VALID);
+
+	return 0;
+}
+
+static struct irq_domain_ops pxa_irq_ops = {
+	.map    = pxa_irq_map,
+	.xlate  = irq_domain_xlate_onecell,
+};
+
+static __init void
+pxa_init_irq_common(struct device_node *node, int irq_nr,
+		    int (*fn)(struct irq_data *, unsigned int))
+{
+	int n;
 
 	pxa_internal_irq_nr = irq_nr;
-	cpu_has_ipr = !cpu_is_pxa25x();
-	pxa_irq_base = io_p2v(0x40d00000);
+	pxa_irq_domain = irq_domain_add_legacy(node, irq_nr,
+					       PXA_IRQ(0), 0,
+					       &pxa_irq_ops, NULL);
+	if (!pxa_irq_domain)
+		panic("Unable to add PXA IRQ domain\n");
+	irq_set_default_host(pxa_irq_domain);
 
 	for (n = 0; n < irq_nr; n += 32) {
 		void __iomem *base = irq_base(n >> 5);
 
 		__raw_writel(0, base + ICMR);	/* disable all IRQs */
 		__raw_writel(0, base + ICLR);	/* all IRQs are IRQ, not FIQ */
-		for (i = n; (i < (n + 32)) && (i < irq_nr); i++) {
-			/* initialize interrupt priority */
-			if (cpu_has_ipr)
-				__raw_writel(i | IPR_VALID, pxa_irq_base + IPR(i));
-
-			irq = PXA_IRQ(i);
-			irq_set_chip_and_handler(irq, &pxa_internal_irq_chip,
-						 handle_level_irq);
-			irq_set_chip_data(irq, base);
-			set_irq_flags(irq, IRQF_VALID);
-		}
 	}
-
 	/* only unmasked interrupts kick us out of idle */
 	__raw_writel(1, irq_base(0) + ICCR);
 
 	pxa_internal_irq_chip.irq_set_wake = fn;
 }
 
+void __init pxa_init_irq(int irq_nr, int (*fn)(struct irq_data *, unsigned int))
+{
+	BUG_ON(irq_nr > MAX_INTERNAL_IRQS);
+
+	pxa_irq_base = io_p2v(0x40d00000);
+	cpu_has_ipr = !cpu_is_pxa25x();
+	pxa_init_irq_common(NULL, irq_nr, fn);
+}
+
 #ifdef CONFIG_PM
 static unsigned long saved_icmr[MAX_INTERNAL_IRQS/32];
 static unsigned long saved_ipr[MAX_INTERNAL_IRQS];
@@ -203,30 +229,6 @@ struct syscore_ops pxa_irq_syscore_ops = {
 };
 
 #ifdef CONFIG_OF
-static struct irq_domain *pxa_irq_domain;
-
-static int pxa_irq_map(struct irq_domain *h, unsigned int virq,
-		       irq_hw_number_t hw)
-{
-	void __iomem *base = irq_base(hw / 32);
-
-	/* initialize interrupt priority */
-	if (cpu_has_ipr)
-		__raw_writel(hw | IPR_VALID, pxa_irq_base + IPR(hw));
-
-	irq_set_chip_and_handler(hw, &pxa_internal_irq_chip,
-				 handle_level_irq);
-	irq_set_chip_data(hw, base);
-	set_irq_flags(hw, IRQF_VALID);
-
-	return 0;
-}
-
-static struct irq_domain_ops pxa_irq_ops = {
-	.map    = pxa_irq_map,
-	.xlate  = irq_domain_xlate_onecell,
-};
-
 static const struct of_device_id intc_ids[] __initconst = {
 	{ .compatible = "marvell,pxa-intc", },
 	{}
@@ -236,7 +238,7 @@ void __init pxa_dt_irq_init(int (*fn)(struct irq_data *, unsigned int))
 {
 	struct device_node *node;
 	struct resource res;
-	int n, ret;
+	int ret;
 
 	node = of_find_matching_node(NULL, intc_ids);
 	if (!node) {
@@ -267,23 +269,6 @@ void __init pxa_dt_irq_init(int (*fn)(struct irq_data *, unsigned int))
 		return;
 	}
 
-	pxa_irq_domain = irq_domain_add_legacy(node, pxa_internal_irq_nr, 0, 0,
-					       &pxa_irq_ops, NULL);
-	if (!pxa_irq_domain)
-		panic("Unable to add PXA IRQ domain\n");
-
-	irq_set_default_host(pxa_irq_domain);
-
-	for (n = 0; n < pxa_internal_irq_nr; n += 32) {
-		void __iomem *base = irq_base(n >> 5);
-
-		__raw_writel(0, base + ICMR);	/* disable all IRQs */
-		__raw_writel(0, base + ICLR);	/* all IRQs are IRQ, not FIQ */
-	}
-
-	/* only unmasked interrupts kick us out of idle */
-	__raw_writel(1, irq_base(0) + ICCR);
-
-	pxa_internal_irq_chip.irq_set_wake = fn;
+	pxa_init_irq_common(node, pxa_internal_irq_nr, fn);
 }
 #endif /* CONFIG_OF */
diff --git a/arch/arm/mach-pxa/lpd270.c b/arch/arm/mach-pxa/lpd270.c
index 28da319d389f..eaee2c20b189 100644
--- a/arch/arm/mach-pxa/lpd270.c
+++ b/arch/arm/mach-pxa/lpd270.c
@@ -195,7 +195,7 @@ static struct resource smc91x_resources[] = {
 };
 
 struct smc91x_platdata smc91x_platdata = {
-	.flags = SMC91X_USE_16BIT | SMC91X_NOWAIT;
+	.flags = SMC91X_USE_16BIT | SMC91X_NOWAIT,
 };
 
 static struct platform_device smc91x_device = {
diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c
index a762b23ac830..6dc4f025e674 100644
--- a/arch/arm/mach-pxa/raumfeld.c
+++ b/arch/arm/mach-pxa/raumfeld.c
@@ -758,8 +758,10 @@ static void raumfeld_power_signal_charged(void)
 	struct power_supply *psy =
 		power_supply_get_by_name(raumfeld_power_supplicants[0]);
 
-	if (psy)
+	if (psy) {
 		power_supply_set_battery_charged(psy);
+		power_supply_put(psy);
+	}
 }
 
 static int raumfeld_power_resume(void)
diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c
index 205f9bf3821e..ac2ae5c71ab4 100644
--- a/arch/arm/mach-pxa/zeus.c
+++ b/arch/arm/mach-pxa/zeus.c
@@ -412,7 +412,7 @@ static struct fixed_voltage_config can_regulator_pdata = {
 };
 
 static struct platform_device can_regulator_device = {
-	.name	= "reg-fixed-volage",
+	.name	= "reg-fixed-voltage",
 	.id	= 0,
 	.dev	= {
 		.platform_data	= &can_regulator_pdata,
diff --git a/arch/arm/mach-s3c24xx/Kconfig b/arch/arm/mach-s3c24xx/Kconfig
index 79c49ff77f6e..23bec3a85b22 100644
--- a/arch/arm/mach-s3c24xx/Kconfig
+++ b/arch/arm/mach-s3c24xx/Kconfig
@@ -39,14 +39,14 @@ config CPU_S3C2412
 	bool "SAMSUNG S3C2412"
 	select CPU_ARM926T
 	select S3C2412_COMMON_CLK
-	select S3C2412_PM if PM
+	select S3C2412_PM if PM_SLEEP
 	help
 	  Support for the S3C2412 and S3C2413 SoCs from the S3C24XX line
 
 config CPU_S3C2416
 	bool "SAMSUNG S3C2416/S3C2450"
 	select CPU_ARM926T
-	select S3C2416_PM if PM
+	select S3C2416_PM if PM_SLEEP
 	select S3C2443_COMMON_CLK
 	help
 	  Support for the S3C2416 SoC from the S3C24XX line
@@ -55,7 +55,7 @@ config CPU_S3C2440
 	bool "SAMSUNG S3C2440"
 	select CPU_ARM920T
 	select S3C2410_COMMON_CLK
-	select S3C2410_PM if PM
+	select S3C2410_PM if PM_SLEEP
 	help
 	  Support for S3C2440 Samsung Mobile CPU based systems.
 
@@ -63,7 +63,7 @@ config CPU_S3C2442
 	bool "SAMSUNG S3C2442"
 	select CPU_ARM920T
 	select S3C2410_COMMON_CLK
-	select S3C2410_PM if PM
+	select S3C2410_PM if PM_SLEEP
 	help
 	  Support for S3C2442 Samsung Mobile CPU based systems.
 
@@ -228,11 +228,6 @@ config H1940BT
 	  This is a simple driver that is able to control
 	  the state of built in bluetooth chip on h1940.
 
-config PM_H1940
-	bool
-	help
-	  Internal node for H1940 and related PM
-
 config MACH_N30
 	bool "Acer N30 family"
 	select S3C_DEV_NAND
@@ -362,6 +357,7 @@ if CPU_S3C2416
 config S3C2416_PM
 	bool
 	select S3C2412_PM_SLEEP
+	select SAMSUNG_WAKEMASK
 	help
 	  Internal config node to apply S3C2416 power management
 
@@ -584,6 +580,11 @@ config MACH_SMDK2443
 
 endif	# CPU_S3C2443
 
+config PM_H1940
+	bool
+	help
+	  Internal node for H1940 and related PM
+
 endmenu	# SAMSUNG S3C24XX SoCs Support
 
 endif	# ARCH_S3C24XX
diff --git a/arch/arm/mach-s3c24xx/Makefile b/arch/arm/mach-s3c24xx/Makefile
index b40a22fe082a..05920c8a5764 100644
--- a/arch/arm/mach-s3c24xx/Makefile
+++ b/arch/arm/mach-s3c24xx/Makefile
@@ -32,7 +32,8 @@ obj-$(CONFIG_CPU_S3C2443)	+= s3c2443.o
 
 # PM
 
-obj-$(CONFIG_PM)		+= pm.o irq-pm.o sleep.o
+obj-$(CONFIG_PM)		+= pm.o
+obj-$(CONFIG_PM_SLEEP)		+= irq-pm.o sleep.o
 
 # common code
 
diff --git a/arch/arm/mach-s3c24xx/include/mach/pm-core.h b/arch/arm/mach-s3c24xx/include/mach/pm-core.h
index 2eef7e6f7675..69459dbbdcad 100644
--- a/arch/arm/mach-s3c24xx/include/mach/pm-core.h
+++ b/arch/arm/mach-s3c24xx/include/mach/pm-core.h
@@ -10,6 +10,11 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+#include <linux/delay.h>
+#include <linux/io.h>
+
+#include "regs-clock.h"
+#include "regs-irq.h"
 
 static inline void s3c_pm_debug_init_uart(void)
 {
@@ -42,8 +47,23 @@ static inline void s3c_pm_arch_stop_clocks(void)
 	__raw_writel(0x00, S3C2410_CLKCON);  /* turn off clocks over sleep */
 }
 
-static void s3c_pm_show_resume_irqs(int start, unsigned long which,
-				    unsigned long mask);
+/* s3c2410_pm_show_resume_irqs
+ *
+ * print any IRQs asserted at resume time (ie, we woke from)
+*/
+static inline void s3c_pm_show_resume_irqs(int start, unsigned long which,
+					   unsigned long mask)
+{
+	int i;
+
+	which &= ~mask;
+
+	for (i = 0; i <= 31; i++) {
+		if (which & (1L<<i)) {
+			S3C_PMDBG("IRQ %d asserted at resume\n", start+i);
+		}
+	}
+}
 
 static inline void s3c_pm_arch_show_resume_irqs(void)
 {
diff --git a/arch/arm/mach-s3c24xx/pm-s3c2416.c b/arch/arm/mach-s3c24xx/pm-s3c2416.c
index 44923895f558..c0e328e37bd6 100644
--- a/arch/arm/mach-s3c24xx/pm-s3c2416.c
+++ b/arch/arm/mach-s3c24xx/pm-s3c2416.c
@@ -23,6 +23,7 @@
 
 #include "s3c2412-power.h"
 
+#ifdef CONFIG_PM_SLEEP
 extern void s3c2412_sleep_enter(void);
 
 static int s3c2416_cpu_suspend(unsigned long arg)
@@ -70,7 +71,7 @@ static __init int s3c2416_pm_init(void)
 }
 
 arch_initcall(s3c2416_pm_init);
-
+#endif
 
 static void s3c2416_pm_resume(void)
 {
diff --git a/arch/arm/mach-s3c24xx/pm.c b/arch/arm/mach-s3c24xx/pm.c
index b19256ec8d40..5d510bca0844 100644
--- a/arch/arm/mach-s3c24xx/pm.c
+++ b/arch/arm/mach-s3c24xx/pm.c
@@ -50,6 +50,7 @@
 
 #define PFX "s3c24xx-pm: "
 
+#ifdef CONFIG_PM_SLEEP
 static struct sleep_save core_save[] = {
 	/* we restore the timings here, with the proviso that the board
 	 * brings the system up in an slower, or equal frequency setting
@@ -67,6 +68,7 @@ static struct sleep_save core_save[] = {
 	SAVE_ITEM(S3C2410_BANKCON4),
 	SAVE_ITEM(S3C2410_BANKCON5),
 };
+#endif
 
 /* s3c_pm_check_resume_pin
  *
@@ -121,7 +123,7 @@ void s3c_pm_configure_extint(void)
 	}
 }
 
-
+#ifdef CONFIG_PM_SLEEP
 void s3c_pm_restore_core(void)
 {
 	s3c_pm_do_restore_core(core_save, ARRAY_SIZE(core_save));
@@ -131,4 +133,4 @@ void s3c_pm_save_core(void)
 {
 	s3c_pm_do_save(core_save, ARRAY_SIZE(core_save));
 }
-
+#endif
diff --git a/arch/arm/mach-s3c24xx/s3c2410.c b/arch/arm/mach-s3c24xx/s3c2410.c
index 2a6985a4a0ff..5061d66ca10c 100644
--- a/arch/arm/mach-s3c24xx/s3c2410.c
+++ b/arch/arm/mach-s3c24xx/s3c2410.c
@@ -121,7 +121,7 @@ int __init s3c2410_init(void)
 {
 	printk("S3C2410: Initialising architecture\n");
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	register_syscore_ops(&s3c2410_pm_syscore_ops);
 	register_syscore_ops(&s3c24xx_irq_syscore_ops);
 #endif
diff --git a/arch/arm/mach-s3c24xx/s3c2412.c b/arch/arm/mach-s3c24xx/s3c2412.c
index ecf2c77ab88b..64a13605cfc3 100644
--- a/arch/arm/mach-s3c24xx/s3c2412.c
+++ b/arch/arm/mach-s3c24xx/s3c2412.c
@@ -172,7 +172,7 @@ int __init s3c2412_init(void)
 {
 	printk("S3C2412: Initialising architecture\n");
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	register_syscore_ops(&s3c2412_pm_syscore_ops);
 	register_syscore_ops(&s3c24xx_irq_syscore_ops);
 #endif
diff --git a/arch/arm/mach-s3c24xx/s3c2416.c b/arch/arm/mach-s3c24xx/s3c2416.c
index bfd4da86deb8..3f8ca2a3ef17 100644
--- a/arch/arm/mach-s3c24xx/s3c2416.c
+++ b/arch/arm/mach-s3c24xx/s3c2416.c
@@ -98,7 +98,7 @@ int __init s3c2416_init(void)
 	s3c_adc_setname("s3c2416-adc");
 	s3c_rtc_setname("s3c2416-rtc");
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	register_syscore_ops(&s3c2416_pm_syscore_ops);
 	register_syscore_ops(&s3c24xx_irq_syscore_ops);
 	register_syscore_ops(&s3c2416_irq_syscore_ops);
diff --git a/arch/arm/mach-s3c24xx/s3c2440.c b/arch/arm/mach-s3c24xx/s3c2440.c
index 03d379f1fc52..eb733555fab5 100644
--- a/arch/arm/mach-s3c24xx/s3c2440.c
+++ b/arch/arm/mach-s3c24xx/s3c2440.c
@@ -57,11 +57,11 @@ int __init s3c2440_init(void)
 
 	/* register suspend/resume handlers */
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	register_syscore_ops(&s3c2410_pm_syscore_ops);
 	register_syscore_ops(&s3c24xx_irq_syscore_ops);
-#endif
 	register_syscore_ops(&s3c244x_pm_syscore_ops);
+#endif
 
 	/* register our system device for everything else */
 
diff --git a/arch/arm/mach-s3c24xx/s3c2442.c b/arch/arm/mach-s3c24xx/s3c2442.c
index 7b043349f1c8..893998ede022 100644
--- a/arch/arm/mach-s3c24xx/s3c2442.c
+++ b/arch/arm/mach-s3c24xx/s3c2442.c
@@ -60,11 +60,11 @@ int __init s3c2442_init(void)
 {
 	printk("S3C2442: Initialising architecture\n");
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	register_syscore_ops(&s3c2410_pm_syscore_ops);
 	register_syscore_ops(&s3c24xx_irq_syscore_ops);
-#endif
 	register_syscore_ops(&s3c244x_pm_syscore_ops);
+#endif
 
 	return device_register(&s3c2442_dev);
 }
diff --git a/arch/arm/mach-s3c24xx/s3c244x.c b/arch/arm/mach-s3c24xx/s3c244x.c
index 177f97802745..b14119585dc7 100644
--- a/arch/arm/mach-s3c24xx/s3c244x.c
+++ b/arch/arm/mach-s3c24xx/s3c244x.c
@@ -108,7 +108,7 @@ static int __init s3c2442_core_init(void)
 core_initcall(s3c2442_core_init);
 
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static struct sleep_save s3c244x_sleep[] = {
 	SAVE_ITEM(S3C2440_DSC0),
 	SAVE_ITEM(S3C2440_DSC1),
@@ -127,12 +127,9 @@ static void s3c244x_resume(void)
 {
 	s3c_pm_do_restore(s3c244x_sleep, ARRAY_SIZE(s3c244x_sleep));
 }
-#else
-#define s3c244x_suspend NULL
-#define s3c244x_resume  NULL
-#endif
 
 struct syscore_ops s3c244x_pm_syscore_ops = {
 	.suspend	= s3c244x_suspend,
 	.resume		= s3c244x_resume,
 };
+#endif
diff --git a/arch/arm/mach-s3c64xx/Kconfig b/arch/arm/mach-s3c64xx/Kconfig
index 26ca2427e53d..eff95e950d81 100644
--- a/arch/arm/mach-s3c64xx/Kconfig
+++ b/arch/arm/mach-s3c64xx/Kconfig
@@ -189,6 +189,7 @@ endchoice
 config SMDK6410_WM1190_EV1
 	bool "Support Wolfson Microelectronics 1190-EV1 PMIC card"
 	depends on MACH_SMDK6410
+	depends on I2C=y
 	select MFD_WM8350_I2C
 	select REGULATOR
 	select REGULATOR_WM8350
@@ -203,6 +204,7 @@ config SMDK6410_WM1190_EV1
 config SMDK6410_WM1192_EV1
 	bool "Support Wolfson Microelectronics 1192-EV1 PMIC card"
 	depends on MACH_SMDK6410
+	depends on I2C=y
 	select MFD_WM831X
 	select MFD_WM831X_I2C
 	select REGULATOR
@@ -269,8 +271,8 @@ config MACH_SMARTQ7
 
 config MACH_WLF_CRAGG_6410
 	bool "Wolfson Cragganmore 6410"
+	depends on I2C=y
 	select CPU_S3C6410
-	select I2C
 	select LEDS_GPIO_REGISTER
 	select S3C64XX_DEV_SPI0
 	select S3C64XX_SETUP_FB_24BPP
diff --git a/arch/arm/mach-s3c64xx/Makefile b/arch/arm/mach-s3c64xx/Makefile
index 12f67b61ca5f..17f4b07ec763 100644
--- a/arch/arm/mach-s3c64xx/Makefile
+++ b/arch/arm/mach-s3c64xx/Makefile
@@ -16,7 +16,8 @@ obj-$(CONFIG_CPU_S3C6410)	+= s3c6410.o
 
 # PM
 
-obj-$(CONFIG_PM)		+= pm.o irq-pm.o sleep.o
+obj-$(CONFIG_PM)		+= pm.o
+obj-$(CONFIG_PM_SLEEP)		+= irq-pm.o sleep.o
 obj-$(CONFIG_CPU_IDLE)		+= cpuidle.o
 
 # DMA support
diff --git a/arch/arm/mach-s3c64xx/cpuidle.c b/arch/arm/mach-s3c64xx/cpuidle.c
index 2eb072440dfa..93aa8cb70195 100644
--- a/arch/arm/mach-s3c64xx/cpuidle.c
+++ b/arch/arm/mach-s3c64xx/cpuidle.c
@@ -16,7 +16,7 @@
 #include <linux/export.h>
 #include <linux/time.h>
 
-#include <asm/proc-fns.h>
+#include <asm/cpuidle.h>
 
 #include <mach/map.h>
 
diff --git a/arch/arm/mach-s3c64xx/mach-smdk6410.c b/arch/arm/mach-s3c64xx/mach-smdk6410.c
index 661eb662d051..b7447a92276e 100644
--- a/arch/arm/mach-s3c64xx/mach-smdk6410.c
+++ b/arch/arm/mach-s3c64xx/mach-smdk6410.c
@@ -209,7 +209,7 @@ static struct platform_device smdk6410_smsc911x = {
 };
 
 #ifdef CONFIG_REGULATOR
-static struct regulator_consumer_supply smdk6410_b_pwr_5v_consumers[] __initdata = {
+static struct regulator_consumer_supply smdk6410_b_pwr_5v_consumers[] = {
 	REGULATOR_SUPPLY("PVDD", "0-001b"),
 	REGULATOR_SUPPLY("AVDD", "0-001b"),
 };
diff --git a/arch/arm/mach-s3c64xx/pm.c b/arch/arm/mach-s3c64xx/pm.c
index aaf7bea4032f..75b14e756383 100644
--- a/arch/arm/mach-s3c64xx/pm.c
+++ b/arch/arm/mach-s3c64xx/pm.c
@@ -194,6 +194,7 @@ void s3c_pm_debug_smdkled(u32 set, u32 clear)
 }
 #endif
 
+#ifdef CONFIG_PM_SLEEP
 static struct sleep_save core_save[] = {
 	SAVE_ITEM(S3C64XX_MEM0DRVCON),
 	SAVE_ITEM(S3C64XX_MEM1DRVCON),
@@ -238,6 +239,7 @@ void s3c_pm_save_core(void)
 	s3c_pm_do_save(misc_save, ARRAY_SIZE(misc_save));
 	s3c_pm_do_save(core_save, ARRAY_SIZE(core_save));
 }
+#endif
 
 /* since both s3c6400 and s3c6410 share the same sleep pm calls, we
  * put the per-cpu code in here until any new cpu comes along and changes
diff --git a/arch/arm/mach-s5pv210/sleep.S b/arch/arm/mach-s5pv210/sleep.S
index 7c43ddd33ba8..dfbfc0f7f8b8 100644
--- a/arch/arm/mach-s5pv210/sleep.S
+++ b/arch/arm/mach-s5pv210/sleep.S
@@ -14,7 +14,7 @@
 
 #include <linux/linkage.h>
 
-	.data
+	.text
 	.align
 
 	/*
diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c
index 7b0cd3172354..af868d258e66 100644
--- a/arch/arm/mach-sa1100/neponset.c
+++ b/arch/arm/mach-sa1100/neponset.c
@@ -268,8 +268,8 @@ static int neponset_probe(struct platform_device *dev)
 		.id = 0,
 		.res = smc91x_resources,
 		.num_res = ARRAY_SIZE(smc91x_resources),
-		.data = &smc91c_platdata,
-		.size_data = sizeof(smc91c_platdata),
+		.data = &smc91x_platdata,
+		.size_data = sizeof(smc91x_platdata),
 	};
 	int ret, irq;
 
diff --git a/arch/arm/mach-sa1100/pleb.c b/arch/arm/mach-sa1100/pleb.c
index 696fd0fe4806..1525d7b5f1b7 100644
--- a/arch/arm/mach-sa1100/pleb.c
+++ b/arch/arm/mach-sa1100/pleb.c
@@ -54,7 +54,7 @@ static struct platform_device smc91x_device = {
 	.num_resources	= ARRAY_SIZE(smc91x_resources),
 	.resource	= smc91x_resources,
 	.dev = {
-		.platform_data  = &smc91c_platdata,
+		.platform_data  = &smc91x_platdata,
 	},
 };
 
diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c
index 6d949f1c850b..36aaeb12e1a5 100644
--- a/arch/arm/mach-shmobile/board-armadillo800eva.c
+++ b/arch/arm/mach-shmobile/board-armadillo800eva.c
@@ -1015,7 +1015,6 @@ static struct asoc_simple_card_info fsi_wm8978_info = {
 	.platform	= "sh_fsi2",
 	.daifmt		= SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_CBM_CFM,
 	.cpu_dai = {
-		.fmt	= SND_SOC_DAIFMT_IB_NF,
 		.name	= "fsia-dai",
 	},
 	.codec_dai = {
@@ -1040,9 +1039,9 @@ static struct asoc_simple_card_info fsi2_hdmi_info = {
 	.card		= "FSI2B-HDMI",
 	.codec		= "sh-mobile-hdmi",
 	.platform	= "sh_fsi2",
+	.daifmt		= SND_SOC_DAIFMT_CBS_CFS,
 	.cpu_dai = {
 		.name	= "fsib-dai",
-		.fmt	= SND_SOC_DAIFMT_CBS_CFS,
 	},
 	.codec_dai = {
 		.name = "sh_mobile_hdmi-hifi",
diff --git a/arch/arm/mach-shmobile/intc-sh73a0.c b/arch/arm/mach-shmobile/intc-sh73a0.c
index 9e3618028acc..fd63ae6532fc 100644
--- a/arch/arm/mach-shmobile/intc-sh73a0.c
+++ b/arch/arm/mach-shmobile/intc-sh73a0.c
@@ -252,11 +252,6 @@ static irqreturn_t sh73a0_intcs_demux(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static int sh73a0_set_wake(struct irq_data *data, unsigned int on)
-{
-	return 0; /* always allow wakeup */
-}
-
 #define PINTER0_PHYS 0xe69000a0
 #define PINTER1_PHYS 0xe69000a4
 #define PINTER0_VIRT IOMEM(0xe69000a0)
@@ -318,8 +313,8 @@ void __init sh73a0_init_irq(void)
 	void __iomem *gic_cpu_base = IOMEM(0xf0000100);
 	void __iomem *intevtsa = ioremap_nocache(0xffd20100, PAGE_SIZE);
 
+	gic_set_irqchip_flags(IRQCHIP_SKIP_SET_WAKE);
 	gic_init(0, 29, gic_dist_base, gic_cpu_base);
-	gic_arch_extn.irq_set_wake = sh73a0_set_wake;
 
 	register_intc_controller(&intcs_desc);
 	register_intc_controller(&intc_pint0_desc);
diff --git a/arch/arm/mach-shmobile/setup-r8a7779.c b/arch/arm/mach-shmobile/setup-r8a7779.c
index 27dceaf9e688..c03e562be12b 100644
--- a/arch/arm/mach-shmobile/setup-r8a7779.c
+++ b/arch/arm/mach-shmobile/setup-r8a7779.c
@@ -713,18 +713,13 @@ void __init r8a7779_init_late(void)
 }
 
 #ifdef CONFIG_USE_OF
-static int r8a7779_set_wake(struct irq_data *data, unsigned int on)
-{
-	return 0; /* always allow wakeup */
-}
-
 void __init r8a7779_init_irq_dt(void)
 {
 #ifdef CONFIG_ARCH_SHMOBILE_LEGACY
 	void __iomem *gic_dist_base = ioremap_nocache(0xf0001000, 0x1000);
 	void __iomem *gic_cpu_base = ioremap_nocache(0xf0000100, 0x1000);
 #endif
-	gic_arch_extn.irq_set_wake = r8a7779_set_wake;
+	gic_set_irqchip_flags(IRQCHIP_SKIP_SET_WAKE);
 
 #ifdef CONFIG_ARCH_SHMOBILE_LEGACY
 	gic_init(0, 29, gic_dist_base, gic_cpu_base);
diff --git a/arch/arm/mach-socfpga/core.h b/arch/arm/mach-socfpga/core.h
index 483cb467bf65..a0f3b1cd497c 100644
--- a/arch/arm/mach-socfpga/core.h
+++ b/arch/arm/mach-socfpga/core.h
@@ -45,6 +45,6 @@ extern char secondary_trampoline, secondary_trampoline_end;
 
 extern unsigned long socfpga_cpu1start_addr;
 
-#define SOCFPGA_SCU_VIRT_BASE   0xfffec000
+#define SOCFPGA_SCU_VIRT_BASE   0xfee00000
 
 #endif
diff --git a/arch/arm/mach-socfpga/socfpga.c b/arch/arm/mach-socfpga/socfpga.c
index 383d61e138af..f5e597c207b9 100644
--- a/arch/arm/mach-socfpga/socfpga.c
+++ b/arch/arm/mach-socfpga/socfpga.c
@@ -23,6 +23,7 @@
 #include <asm/hardware/cache-l2x0.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
+#include <asm/cacheflush.h>
 
 #include "core.h"
 
@@ -73,6 +74,10 @@ void __init socfpga_sysmgr_init(void)
 			(u32 *) &socfpga_cpu1start_addr))
 		pr_err("SMP: Need cpu1-start-addr in device tree.\n");
 
+	/* Ensure that socfpga_cpu1start_addr is visible to other CPUs */
+	smp_wmb();
+	sync_cache_w(&socfpga_cpu1start_addr);
+
 	sys_manager_base_addr = of_iomap(np, 0);
 
 	np = of_find_compatible_node(NULL, NULL, "altr,rst-mgr");
diff --git a/arch/arm/mach-sti/board-dt.c b/arch/arm/mach-sti/board-dt.c
index b067390cef4e..b373acade338 100644
--- a/arch/arm/mach-sti/board-dt.c
+++ b/arch/arm/mach-sti/board-dt.c
@@ -18,6 +18,7 @@ static const char *stih41x_dt_match[] __initdata = {
 	"st,stih415",
 	"st,stih416",
 	"st,stih407",
+	"st,stih410",
 	"st,stih418",
 	NULL
 };
diff --git a/arch/arm/mach-sunxi/Kconfig b/arch/arm/mach-sunxi/Kconfig
index a77604fbaf25..81502b90dd91 100644
--- a/arch/arm/mach-sunxi/Kconfig
+++ b/arch/arm/mach-sunxi/Kconfig
@@ -1,10 +1,12 @@
 menuconfig ARCH_SUNXI
 	bool "Allwinner SoCs" if ARCH_MULTI_V7
 	select ARCH_REQUIRE_GPIOLIB
+	select ARCH_HAS_RESET_CONTROLLER
 	select CLKSRC_MMIO
 	select GENERIC_IRQ_CHIP
 	select PINCTRL
 	select SUN4I_TIMER
+	select RESET_CONTROLLER
 
 if ARCH_SUNXI
 
@@ -20,10 +22,8 @@ config MACH_SUN5I
 config MACH_SUN6I
 	bool "Allwinner A31 (sun6i) SoCs support"
 	default ARCH_SUNXI
-	select ARCH_HAS_RESET_CONTROLLER
 	select ARM_GIC
 	select MFD_SUN6I_PRCM
-	select RESET_CONTROLLER
 	select SUN5I_HSTIMER
 
 config MACH_SUN7I
@@ -37,16 +37,12 @@ config MACH_SUN7I
 config MACH_SUN8I
 	bool "Allwinner A23 (sun8i) SoCs support"
 	default ARCH_SUNXI
-	select ARCH_HAS_RESET_CONTROLLER
 	select ARM_GIC
 	select MFD_SUN6I_PRCM
-	select RESET_CONTROLLER
 
 config MACH_SUN9I
 	bool "Allwinner (sun9i) SoCs support"
 	default ARCH_SUNXI
-	select ARCH_HAS_RESET_CONTROLLER
 	select ARM_GIC
-	select RESET_CONTROLLER
 
 endif
diff --git a/arch/arm/mach-tegra/cpuidle-tegra114.c b/arch/arm/mach-tegra/cpuidle-tegra114.c
index f2b586d7b15d..155807fa6fdd 100644
--- a/arch/arm/mach-tegra/cpuidle-tegra114.c
+++ b/arch/arm/mach-tegra/cpuidle-tegra114.c
@@ -15,7 +15,7 @@
  */
 
 #include <asm/firmware.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <linux/cpuidle.h>
 #include <linux/cpu_pm.h>
 #include <linux/kernel.h>
@@ -44,7 +44,7 @@ static int tegra114_idle_power_down(struct cpuidle_device *dev,
 	tegra_set_cpu_in_lp2();
 	cpu_pm_enter();
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+	tick_broadcast_enter();
 
 	call_firmware_op(prepare_idle);
 
@@ -52,7 +52,7 @@ static int tegra114_idle_power_down(struct cpuidle_device *dev,
 	if (call_firmware_op(do_idle, 0) == -ENOSYS)
 		cpu_suspend(0, tegra30_sleep_cpu_secondary_finish);
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+	tick_broadcast_exit();
 
 	cpu_pm_exit();
 	tegra_clear_cpu_in_lp2();
diff --git a/arch/arm/mach-tegra/cpuidle-tegra20.c b/arch/arm/mach-tegra/cpuidle-tegra20.c
index 4f25a7c7ca0f..88de2dce2e87 100644
--- a/arch/arm/mach-tegra/cpuidle-tegra20.c
+++ b/arch/arm/mach-tegra/cpuidle-tegra20.c
@@ -20,14 +20,13 @@
  */
 
 #include <linux/clk/tegra.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <linux/cpuidle.h>
 #include <linux/cpu_pm.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 
 #include <asm/cpuidle.h>
-#include <asm/proc-fns.h>
 #include <asm/smp_plat.h>
 #include <asm/suspend.h>
 
@@ -136,11 +135,11 @@ static bool tegra20_cpu_cluster_power_down(struct cpuidle_device *dev,
 	if (tegra20_reset_cpu_1() || !tegra_cpu_rail_off_ready())
 		return false;
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+	tick_broadcast_enter();
 
 	tegra_idle_lp2_last();
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+	tick_broadcast_exit();
 
 	if (cpu_online(1))
 		tegra20_wake_cpu1_from_reset();
@@ -153,13 +152,13 @@ static bool tegra20_idle_enter_lp2_cpu_1(struct cpuidle_device *dev,
 					 struct cpuidle_driver *drv,
 					 int index)
 {
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+	tick_broadcast_enter();
 
 	cpu_suspend(0, tegra20_sleep_cpu_secondary_finish);
 
 	tegra20_cpu_clear_resettable();
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+	tick_broadcast_exit();
 
 	return true;
 }
diff --git a/arch/arm/mach-tegra/cpuidle-tegra30.c b/arch/arm/mach-tegra/cpuidle-tegra30.c
index f8815ed65d9d..4dbe1dae937c 100644
--- a/arch/arm/mach-tegra/cpuidle-tegra30.c
+++ b/arch/arm/mach-tegra/cpuidle-tegra30.c
@@ -20,14 +20,13 @@
  */
 
 #include <linux/clk/tegra.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <linux/cpuidle.h>
 #include <linux/cpu_pm.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 
 #include <asm/cpuidle.h>
-#include <asm/proc-fns.h>
 #include <asm/smp_plat.h>
 #include <asm/suspend.h>
 
@@ -76,11 +75,11 @@ static bool tegra30_cpu_cluster_power_down(struct cpuidle_device *dev,
 		return false;
 	}
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+	tick_broadcast_enter();
 
 	tegra_idle_lp2_last();
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+	tick_broadcast_exit();
 
 	return true;
 }
@@ -90,13 +89,13 @@ static bool tegra30_cpu_core_power_down(struct cpuidle_device *dev,
 					struct cpuidle_driver *drv,
 					int index)
 {
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+	tick_broadcast_enter();
 
 	smp_wmb();
 
 	cpu_suspend(0, tegra30_sleep_cpu_secondary_finish);
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+	tick_broadcast_exit();
 
 	return true;
 }
diff --git a/arch/arm/mach-tegra/iomap.h b/arch/arm/mach-tegra/iomap.h
index ee79808e93a3..81dc950b4881 100644
--- a/arch/arm/mach-tegra/iomap.h
+++ b/arch/arm/mach-tegra/iomap.h
@@ -31,21 +31,6 @@
 #define TEGRA_ARM_INT_DIST_BASE		0x50041000
 #define TEGRA_ARM_INT_DIST_SIZE		SZ_4K
 
-#define TEGRA_PRIMARY_ICTLR_BASE	0x60004000
-#define TEGRA_PRIMARY_ICTLR_SIZE	SZ_64
-
-#define TEGRA_SECONDARY_ICTLR_BASE	0x60004100
-#define TEGRA_SECONDARY_ICTLR_SIZE	SZ_64
-
-#define TEGRA_TERTIARY_ICTLR_BASE	0x60004200
-#define TEGRA_TERTIARY_ICTLR_SIZE	SZ_64
-
-#define TEGRA_QUATERNARY_ICTLR_BASE	0x60004300
-#define TEGRA_QUATERNARY_ICTLR_SIZE	SZ_64
-
-#define TEGRA_QUINARY_ICTLR_BASE	0x60004400
-#define TEGRA_QUINARY_ICTLR_SIZE	SZ_64
-
 #define TEGRA_TMR1_BASE			0x60005000
 #define TEGRA_TMR1_SIZE			SZ_8
 
diff --git a/arch/arm/mach-tegra/irq.c b/arch/arm/mach-tegra/irq.c
index ab95f5391a2b..3b9098d27ea5 100644
--- a/arch/arm/mach-tegra/irq.c
+++ b/arch/arm/mach-tegra/irq.c
@@ -30,43 +30,9 @@
 #include "board.h"
 #include "iomap.h"
 
-#define ICTLR_CPU_IEP_VFIQ	0x08
-#define ICTLR_CPU_IEP_FIR	0x14
-#define ICTLR_CPU_IEP_FIR_SET	0x18
-#define ICTLR_CPU_IEP_FIR_CLR	0x1c
-
-#define ICTLR_CPU_IER		0x20
-#define ICTLR_CPU_IER_SET	0x24
-#define ICTLR_CPU_IER_CLR	0x28
-#define ICTLR_CPU_IEP_CLASS	0x2C
-
-#define ICTLR_COP_IER		0x30
-#define ICTLR_COP_IER_SET	0x34
-#define ICTLR_COP_IER_CLR	0x38
-#define ICTLR_COP_IEP_CLASS	0x3c
-
-#define FIRST_LEGACY_IRQ 32
-#define TEGRA_MAX_NUM_ICTLRS	5
-
 #define SGI_MASK 0xFFFF
 
-static int num_ictlrs;
-
-static void __iomem *ictlr_reg_base[] = {
-	IO_ADDRESS(TEGRA_PRIMARY_ICTLR_BASE),
-	IO_ADDRESS(TEGRA_SECONDARY_ICTLR_BASE),
-	IO_ADDRESS(TEGRA_TERTIARY_ICTLR_BASE),
-	IO_ADDRESS(TEGRA_QUATERNARY_ICTLR_BASE),
-	IO_ADDRESS(TEGRA_QUINARY_ICTLR_BASE),
-};
-
 #ifdef CONFIG_PM_SLEEP
-static u32 cop_ier[TEGRA_MAX_NUM_ICTLRS];
-static u32 cop_iep[TEGRA_MAX_NUM_ICTLRS];
-static u32 cpu_ier[TEGRA_MAX_NUM_ICTLRS];
-static u32 cpu_iep[TEGRA_MAX_NUM_ICTLRS];
-
-static u32 ictlr_wake_mask[TEGRA_MAX_NUM_ICTLRS];
 static void __iomem *tegra_gic_cpu_base;
 #endif
 
@@ -83,140 +49,7 @@ bool tegra_pending_sgi(void)
 	return false;
 }
 
-static inline void tegra_irq_write_mask(unsigned int irq, unsigned long reg)
-{
-	void __iomem *base;
-	u32 mask;
-
-	BUG_ON(irq < FIRST_LEGACY_IRQ ||
-		irq >= FIRST_LEGACY_IRQ + num_ictlrs * 32);
-
-	base = ictlr_reg_base[(irq - FIRST_LEGACY_IRQ) / 32];
-	mask = BIT((irq - FIRST_LEGACY_IRQ) % 32);
-
-	__raw_writel(mask, base + reg);
-}
-
-static void tegra_mask(struct irq_data *d)
-{
-	if (d->hwirq < FIRST_LEGACY_IRQ)
-		return;
-
-	tegra_irq_write_mask(d->hwirq, ICTLR_CPU_IER_CLR);
-}
-
-static void tegra_unmask(struct irq_data *d)
-{
-	if (d->hwirq < FIRST_LEGACY_IRQ)
-		return;
-
-	tegra_irq_write_mask(d->hwirq, ICTLR_CPU_IER_SET);
-}
-
-static void tegra_ack(struct irq_data *d)
-{
-	if (d->hwirq < FIRST_LEGACY_IRQ)
-		return;
-
-	tegra_irq_write_mask(d->hwirq, ICTLR_CPU_IEP_FIR_CLR);
-}
-
-static void tegra_eoi(struct irq_data *d)
-{
-	if (d->hwirq < FIRST_LEGACY_IRQ)
-		return;
-
-	tegra_irq_write_mask(d->hwirq, ICTLR_CPU_IEP_FIR_CLR);
-}
-
-static int tegra_retrigger(struct irq_data *d)
-{
-	if (d->hwirq < FIRST_LEGACY_IRQ)
-		return 0;
-
-	tegra_irq_write_mask(d->hwirq, ICTLR_CPU_IEP_FIR_SET);
-
-	return 1;
-}
-
 #ifdef CONFIG_PM_SLEEP
-static int tegra_set_wake(struct irq_data *d, unsigned int enable)
-{
-	u32 irq = d->hwirq;
-	u32 index, mask;
-
-	if (irq < FIRST_LEGACY_IRQ ||
-		irq >= FIRST_LEGACY_IRQ + num_ictlrs * 32)
-		return -EINVAL;
-
-	index = ((irq - FIRST_LEGACY_IRQ) / 32);
-	mask = BIT((irq - FIRST_LEGACY_IRQ) % 32);
-	if (enable)
-		ictlr_wake_mask[index] |= mask;
-	else
-		ictlr_wake_mask[index] &= ~mask;
-
-	return 0;
-}
-
-static int tegra_legacy_irq_suspend(void)
-{
-	unsigned long flags;
-	int i;
-
-	local_irq_save(flags);
-	for (i = 0; i < num_ictlrs; i++) {
-		void __iomem *ictlr = ictlr_reg_base[i];
-		/* Save interrupt state */
-		cpu_ier[i] = readl_relaxed(ictlr + ICTLR_CPU_IER);
-		cpu_iep[i] = readl_relaxed(ictlr + ICTLR_CPU_IEP_CLASS);
-		cop_ier[i] = readl_relaxed(ictlr + ICTLR_COP_IER);
-		cop_iep[i] = readl_relaxed(ictlr + ICTLR_COP_IEP_CLASS);
-
-		/* Disable COP interrupts */
-		writel_relaxed(~0ul, ictlr + ICTLR_COP_IER_CLR);
-
-		/* Disable CPU interrupts */
-		writel_relaxed(~0ul, ictlr + ICTLR_CPU_IER_CLR);
-
-		/* Enable the wakeup sources of ictlr */
-		writel_relaxed(ictlr_wake_mask[i], ictlr + ICTLR_CPU_IER_SET);
-	}
-	local_irq_restore(flags);
-
-	return 0;
-}
-
-static void tegra_legacy_irq_resume(void)
-{
-	unsigned long flags;
-	int i;
-
-	local_irq_save(flags);
-	for (i = 0; i < num_ictlrs; i++) {
-		void __iomem *ictlr = ictlr_reg_base[i];
-		writel_relaxed(cpu_iep[i], ictlr + ICTLR_CPU_IEP_CLASS);
-		writel_relaxed(~0ul, ictlr + ICTLR_CPU_IER_CLR);
-		writel_relaxed(cpu_ier[i], ictlr + ICTLR_CPU_IER_SET);
-		writel_relaxed(cop_iep[i], ictlr + ICTLR_COP_IEP_CLASS);
-		writel_relaxed(~0ul, ictlr + ICTLR_COP_IER_CLR);
-		writel_relaxed(cop_ier[i], ictlr + ICTLR_COP_IER_SET);
-	}
-	local_irq_restore(flags);
-}
-
-static struct syscore_ops tegra_legacy_irq_syscore_ops = {
-	.suspend = tegra_legacy_irq_suspend,
-	.resume = tegra_legacy_irq_resume,
-};
-
-int tegra_legacy_irq_syscore_init(void)
-{
-	register_syscore_ops(&tegra_legacy_irq_syscore_ops);
-
-	return 0;
-}
-
 static int tegra_gic_notifier(struct notifier_block *self,
 			      unsigned long cmd, void *v)
 {
@@ -251,45 +84,19 @@ static void tegra114_gic_cpu_pm_registration(void)
 	cpu_pm_register_notifier(&tegra_gic_notifier_block);
 }
 #else
-#define tegra_set_wake NULL
 static void tegra114_gic_cpu_pm_registration(void) { }
 #endif
 
+static const struct of_device_id tegra_ictlr_match[] __initconst = {
+	{ .compatible = "nvidia,tegra20-ictlr" },
+	{ .compatible = "nvidia,tegra30-ictlr" },
+	{ }
+};
+
 void __init tegra_init_irq(void)
 {
-	int i;
-	void __iomem *distbase;
-
-	distbase = IO_ADDRESS(TEGRA_ARM_INT_DIST_BASE);
-	num_ictlrs = readl_relaxed(distbase + GIC_DIST_CTR) & 0x1f;
-
-	if (num_ictlrs > ARRAY_SIZE(ictlr_reg_base)) {
-		WARN(1, "Too many (%d) interrupt controllers found. Maximum is %d.",
-			num_ictlrs, ARRAY_SIZE(ictlr_reg_base));
-		num_ictlrs = ARRAY_SIZE(ictlr_reg_base);
-	}
-
-	for (i = 0; i < num_ictlrs; i++) {
-		void __iomem *ictlr = ictlr_reg_base[i];
-		writel(~0, ictlr + ICTLR_CPU_IER_CLR);
-		writel(0, ictlr + ICTLR_CPU_IEP_CLASS);
-	}
-
-	gic_arch_extn.irq_ack = tegra_ack;
-	gic_arch_extn.irq_eoi = tegra_eoi;
-	gic_arch_extn.irq_mask = tegra_mask;
-	gic_arch_extn.irq_unmask = tegra_unmask;
-	gic_arch_extn.irq_retrigger = tegra_retrigger;
-	gic_arch_extn.irq_set_wake = tegra_set_wake;
-	gic_arch_extn.flags = IRQCHIP_MASK_ON_SUSPEND;
-
-	/*
-	 * Check if there is a devicetree present, since the GIC will be
-	 * initialized elsewhere under DT.
-	 */
-	if (!of_have_populated_dt())
-		gic_init(0, 29, distbase,
-			IO_ADDRESS(TEGRA_ARM_PERIF_BASE + 0x100));
+	if (WARN_ON(!of_find_matching_node(NULL, tegra_ictlr_match)))
+		pr_warn("Outdated DT detected, suspend/resume will NOT work\n");
 
 	tegra114_gic_cpu_pm_registration();
 }
diff --git a/arch/arm/mach-tegra/irq.h b/arch/arm/mach-tegra/irq.h
index bc05ce5613fb..5142649bba05 100644
--- a/arch/arm/mach-tegra/irq.h
+++ b/arch/arm/mach-tegra/irq.h
@@ -19,10 +19,4 @@
 
 bool tegra_pending_sgi(void);
 
-#ifdef CONFIG_PM_SLEEP
-int tegra_legacy_irq_syscore_init(void);
-#else
-static inline int tegra_legacy_irq_syscore_init(void) { return 0; }
-#endif
-
 #endif
diff --git a/arch/arm/mach-tegra/tegra.c b/arch/arm/mach-tegra/tegra.c
index 914341bcef25..861d88486dbe 100644
--- a/arch/arm/mach-tegra/tegra.c
+++ b/arch/arm/mach-tegra/tegra.c
@@ -82,7 +82,6 @@ static void __init tegra_dt_init_irq(void)
 {
 	tegra_init_irq();
 	irqchip_init();
-	tegra_legacy_irq_syscore_init();
 }
 
 static void __init tegra_dt_init(void)
diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c
index dbb2970ee7da..6ced0f680262 100644
--- a/arch/arm/mach-ux500/cpu.c
+++ b/arch/arm/mach-ux500/cpu.c
@@ -52,7 +52,7 @@ void ux500_restart(enum reboot_mode mode, const char *cmd)
 */
 void __init ux500_init_irq(void)
 {
-	gic_arch_extn.flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND;
+	gic_set_irqchip_flags(IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND);
 	irqchip_init();
 
 	/*
diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig
index 3c2509b4b694..4be537977040 100644
--- a/arch/arm/mach-vexpress/Kconfig
+++ b/arch/arm/mach-vexpress/Kconfig
@@ -42,6 +42,7 @@ if ARCH_VEXPRESS
 config ARCH_VEXPRESS_CORTEX_A5_A9_ERRATA
 	bool "Enable A5 and A9 only errata work-arounds"
 	default y
+	select ARM_ERRATA_643719 if SMP
 	select ARM_ERRATA_720789
 	select PL310_ERRATA_753970 if CACHE_L2X0
 	help
diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c
index c887196cfdbe..58ef2a700414 100644
--- a/arch/arm/mach-zynq/common.c
+++ b/arch/arm/mach-zynq/common.c
@@ -186,7 +186,7 @@ static void __init zynq_map_io(void)
 
 static void __init zynq_irq_init(void)
 {
-	gic_arch_extn.flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND;
+	gic_set_irqchip_flags(IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND);
 	irqchip_init();
 }
 
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 9b4f29e595a4..b7644310236b 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -738,7 +738,7 @@ config CPU_ICACHE_DISABLE
 
 config CPU_DCACHE_DISABLE
 	bool "Disable D-Cache (C-bit)"
-	depends on CPU_CP15
+	depends on CPU_CP15 && !SMP
 	help
 	  Say Y here to disable the processor data cache. Unless
 	  you have a reason not to or are unsure, say N.
@@ -825,6 +825,20 @@ config KUSER_HELPERS
 	  Say N here only if you are absolutely certain that you do not
 	  need these helpers; otherwise, the safe option is to say Y.
 
+config VDSO
+	bool "Enable VDSO for acceleration of some system calls"
+	depends on AEABI && MMU
+	default y if ARM_ARCH_TIMER
+	select GENERIC_TIME_VSYSCALL
+	help
+	  Place in the process address space an ELF shared object
+	  providing fast implementations of gettimeofday and
+	  clock_gettime.  Systems that implement the ARM architected
+	  timer will receive maximum benefit.
+
+	  You must have glibc 2.22 or later for programs to seamlessly
+	  take advantage of this.
+
 config DMA_CACHE_RWFO
 	bool "Enable read/write for ownership DMA cache maintenance"
 	depends on CPU_V6K && SMP
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 2c0c541c60ca..9769f1eefe3b 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -201,7 +201,7 @@ union offset_union {
  THUMB(	"1:	"ins"	%1, [%2]\n"	)		\
  THUMB(	"	add	%2, %2, #1\n"	)		\
 	"2:\n"						\
-	"	.pushsection .fixup,\"ax\"\n"		\
+	"	.pushsection .text.fixup,\"ax\"\n"	\
 	"	.align	2\n"				\
 	"3:	mov	%0, #1\n"			\
 	"	b	2b\n"				\
@@ -261,7 +261,7 @@ union offset_union {
 		"	mov	%1, %1, "NEXT_BYTE"\n"		\
 		"2:	"ins"	%1, [%2]\n"			\
 		"3:\n"						\
-		"	.pushsection .fixup,\"ax\"\n"		\
+		"	.pushsection .text.fixup,\"ax\"\n"	\
 		"	.align	2\n"				\
 		"4:	mov	%0, #1\n"			\
 		"	b	3b\n"				\
@@ -301,7 +301,7 @@ union offset_union {
 		"	mov	%1, %1, "NEXT_BYTE"\n"		\
 		"4:	"ins"	%1, [%2]\n"			\
 		"5:\n"						\
-		"	.pushsection .fixup,\"ax\"\n"		\
+		"	.pushsection .text.fixup,\"ax\"\n"	\
 		"	.align	2\n"				\
 		"6:	mov	%0, #1\n"			\
 		"	b	5b\n"				\
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index c6c7696b8db9..e309c8f35af5 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -1131,23 +1131,22 @@ static void __init l2c310_of_parse(const struct device_node *np,
 	}
 
 	ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K);
-	if (ret)
-		return;
-
-	switch (assoc) {
-	case 16:
-		*aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK;
-		*aux_val |= L310_AUX_CTRL_ASSOCIATIVITY_16;
-		*aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK;
-		break;
-	case 8:
-		*aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK;
-		*aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK;
-		break;
-	default:
-		pr_err("L2C-310 OF cache associativity %d invalid, only 8 or 16 permitted\n",
-		       assoc);
-		break;
+	if (!ret) {
+		switch (assoc) {
+		case 16:
+			*aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+			*aux_val |= L310_AUX_CTRL_ASSOCIATIVITY_16;
+			*aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+			break;
+		case 8:
+			*aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+			*aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+			break;
+		default:
+			pr_err("L2C-310 OF cache associativity %d invalid, only 8 or 16 permitted\n",
+			       assoc);
+			break;
+		}
 	}
 
 	prefetch = l2x0_saved_regs.prefetch_ctrl;
@@ -1648,6 +1647,7 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
 	struct device_node *np;
 	struct resource res;
 	u32 cache_id, old_aux;
+	u32 cache_level = 2;
 
 	np = of_find_matching_node(NULL, l2x0_ids);
 	if (!np)
@@ -1680,6 +1680,12 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
 	if (!of_property_read_bool(np, "cache-unified"))
 		pr_err("L2C: device tree omits to specify unified cache\n");
 
+	if (of_property_read_u32(np, "cache-level", &cache_level))
+		pr_err("L2C: device tree omits to specify cache-level\n");
+
+	if (cache_level != 2)
+		pr_err("L2C: device tree specifies invalid cache level\n");
+
 	/* Read back current (default) hardware configuration */
 	if (data->save)
 		data->save(l2x0_base);
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index b966656d2c2d..a134d8a13d00 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -36,10 +36,10 @@ ENTRY(v7_invalidate_l1)
        mcr     p15, 2, r0, c0, c0, 0
        mrc     p15, 1, r0, c0, c0, 0
 
-       ldr     r1, =0x7fff
+       movw    r1, #0x7fff
        and     r2, r1, r0, lsr #13
 
-       ldr     r1, =0x3ff
+       movw    r1, #0x3ff
 
        and     r3, r1, r0, lsr #3      @ NumWays - 1
        add     r2, r2, #1              @ NumSets
@@ -90,21 +90,20 @@ ENDPROC(v7_flush_icache_all)
 ENTRY(v7_flush_dcache_louis)
 	dmb					@ ensure ordering with previous memory accesses
 	mrc	p15, 1, r0, c0, c0, 1		@ read clidr, r0 = clidr
-	ALT_SMP(ands	r3, r0, #(7 << 21))	@ extract LoUIS from clidr
-	ALT_UP(ands	r3, r0, #(7 << 27))	@ extract LoUU from clidr
+ALT_SMP(mov	r3, r0, lsr #20)		@ move LoUIS into position
+ALT_UP(	mov	r3, r0, lsr #26)		@ move LoUU into position
+	ands	r3, r3, #7 << 1 		@ extract LoU*2 field from clidr
+	bne	start_flush_levels		@ LoU != 0, start flushing
 #ifdef CONFIG_ARM_ERRATA_643719
-	ALT_SMP(mrceq	p15, 0, r2, c0, c0, 0)	@ read main ID register
-	ALT_UP(reteq	lr)			@ LoUU is zero, so nothing to do
-	ldreq	r1, =0x410fc090                 @ ID of ARM Cortex A9 r0p?
-	biceq	r2, r2, #0x0000000f             @ clear minor revision number
-	teqeq	r2, r1                          @ test for errata affected core and if so...
-	orreqs	r3, #(1 << 21)			@   fix LoUIS value (and set flags state to 'ne')
+ALT_SMP(mrc	p15, 0, r2, c0, c0, 0)		@ read main ID register
+ALT_UP(	ret	lr)				@ LoUU is zero, so nothing to do
+	movw	r1, #:lower16:(0x410fc090 >> 4)	@ ID of ARM Cortex A9 r0p?
+	movt	r1, #:upper16:(0x410fc090 >> 4)
+	teq	r1, r2, lsr #4			@ test for errata affected core and if so...
+	moveq	r3, #1 << 1			@   fix LoUIS value
+	beq	start_flush_levels		@   start flushing cache levels
 #endif
-	ALT_SMP(mov	r3, r3, lsr #20)	@ r3 = LoUIS * 2
-	ALT_UP(mov	r3, r3, lsr #26)	@ r3 = LoUU * 2
-	reteq	lr				@ return if level == 0
-	mov	r10, #0				@ r10 (starting level) = 0
-	b	flush_levels			@ start flushing cache levels
+	ret	lr
 ENDPROC(v7_flush_dcache_louis)
 
 /*
@@ -119,9 +118,10 @@ ENDPROC(v7_flush_dcache_louis)
 ENTRY(v7_flush_dcache_all)
 	dmb					@ ensure ordering with previous memory accesses
 	mrc	p15, 1, r0, c0, c0, 1		@ read clidr
-	ands	r3, r0, #0x7000000		@ extract loc from clidr
-	mov	r3, r3, lsr #23			@ left align loc bit field
+	mov	r3, r0, lsr #23			@ move LoC into position
+	ands	r3, r3, #7 << 1			@ extract LoC*2 from clidr
 	beq	finished			@ if loc is 0, then no need to clean
+start_flush_levels:
 	mov	r10, #0				@ start clean at cache level 0
 flush_levels:
 	add	r2, r10, r10, lsr #1		@ work out 3x current cache level
@@ -140,10 +140,10 @@ flush_levels:
 #endif
 	and	r2, r1, #7			@ extract the length of the cache lines
 	add	r2, r2, #4			@ add 4 (line length offset)
-	ldr	r4, =0x3ff
+	movw	r4, #0x3ff
 	ands	r4, r4, r1, lsr #3		@ find maximum number on the way size
 	clz	r5, r4				@ find bit position of way size increment
-	ldr	r7, =0x7fff
+	movw	r7, #0x7fff
 	ands	r7, r7, r1, lsr #13		@ extract max number of the index size
 loop1:
 	mov	r9, r7				@ create working copy of max index
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 170a116d1b29..09c5fe3d30c2 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -171,7 +171,7 @@ static int __dma_supported(struct device *dev, u64 mask, bool warn)
 	 */
 	if (sizeof(mask) != sizeof(dma_addr_t) &&
 	    mask > (dma_addr_t)~0 &&
-	    dma_to_pfn(dev, ~0) < max_pfn) {
+	    dma_to_pfn(dev, ~0) < max_pfn - 1) {
 		if (warn) {
 			dev_warn(dev, "Coherent DMA mask %#llx is larger than dma_addr_t allows\n",
 				 mask);
@@ -289,11 +289,11 @@ static void __dma_free_buffer(struct page *page, size_t size)
 
 static void *__alloc_from_contiguous(struct device *dev, size_t size,
 				     pgprot_t prot, struct page **ret_page,
-				     const void *caller);
+				     const void *caller, bool want_vaddr);
 
 static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
 				 pgprot_t prot, struct page **ret_page,
-				 const void *caller);
+				 const void *caller, bool want_vaddr);
 
 static void *
 __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
@@ -357,10 +357,10 @@ static int __init atomic_pool_init(void)
 
 	if (dev_get_cma_area(NULL))
 		ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot,
-					      &page, atomic_pool_init);
+					      &page, atomic_pool_init, true);
 	else
 		ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot,
-					   &page, atomic_pool_init);
+					   &page, atomic_pool_init, true);
 	if (ptr) {
 		int ret;
 
@@ -467,13 +467,15 @@ static void __dma_remap(struct page *page, size_t size, pgprot_t prot)
 
 static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
 				 pgprot_t prot, struct page **ret_page,
-				 const void *caller)
+				 const void *caller, bool want_vaddr)
 {
 	struct page *page;
-	void *ptr;
+	void *ptr = NULL;
 	page = __dma_alloc_buffer(dev, size, gfp);
 	if (!page)
 		return NULL;
+	if (!want_vaddr)
+		goto out;
 
 	ptr = __dma_alloc_remap(page, size, gfp, prot, caller);
 	if (!ptr) {
@@ -481,6 +483,7 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
 		return NULL;
 	}
 
+ out:
 	*ret_page = page;
 	return ptr;
 }
@@ -523,12 +526,12 @@ static int __free_from_pool(void *start, size_t size)
 
 static void *__alloc_from_contiguous(struct device *dev, size_t size,
 				     pgprot_t prot, struct page **ret_page,
-				     const void *caller)
+				     const void *caller, bool want_vaddr)
 {
 	unsigned long order = get_order(size);
 	size_t count = size >> PAGE_SHIFT;
 	struct page *page;
-	void *ptr;
+	void *ptr = NULL;
 
 	page = dma_alloc_from_contiguous(dev, count, order);
 	if (!page)
@@ -536,6 +539,9 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size,
 
 	__dma_clear_buffer(page, size);
 
+	if (!want_vaddr)
+		goto out;
+
 	if (PageHighMem(page)) {
 		ptr = __dma_alloc_remap(page, size, GFP_KERNEL, prot, caller);
 		if (!ptr) {
@@ -546,17 +552,21 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size,
 		__dma_remap(page, size, prot);
 		ptr = page_address(page);
 	}
+
+ out:
 	*ret_page = page;
 	return ptr;
 }
 
 static void __free_from_contiguous(struct device *dev, struct page *page,
-				   void *cpu_addr, size_t size)
+				   void *cpu_addr, size_t size, bool want_vaddr)
 {
-	if (PageHighMem(page))
-		__dma_free_remap(cpu_addr, size);
-	else
-		__dma_remap(page, size, PAGE_KERNEL);
+	if (want_vaddr) {
+		if (PageHighMem(page))
+			__dma_free_remap(cpu_addr, size);
+		else
+			__dma_remap(page, size, PAGE_KERNEL);
+	}
 	dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);
 }
 
@@ -574,12 +584,12 @@ static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot)
 
 #define nommu() 1
 
-#define __get_dma_pgprot(attrs, prot)	__pgprot(0)
-#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c)	NULL
+#define __get_dma_pgprot(attrs, prot)				__pgprot(0)
+#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c, wv)	NULL
 #define __alloc_from_pool(size, ret_page)			NULL
-#define __alloc_from_contiguous(dev, size, prot, ret, c)	NULL
+#define __alloc_from_contiguous(dev, size, prot, ret, c, wv)	NULL
 #define __free_from_pool(cpu_addr, size)			0
-#define __free_from_contiguous(dev, page, cpu_addr, size)	do { } while (0)
+#define __free_from_contiguous(dev, page, cpu_addr, size, wv)	do { } while (0)
 #define __dma_free_remap(cpu_addr, size)			do { } while (0)
 
 #endif	/* CONFIG_MMU */
@@ -599,11 +609,13 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp,
 
 
 static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
-			 gfp_t gfp, pgprot_t prot, bool is_coherent, const void *caller)
+			 gfp_t gfp, pgprot_t prot, bool is_coherent,
+			 struct dma_attrs *attrs, const void *caller)
 {
 	u64 mask = get_coherent_dma_mask(dev);
 	struct page *page = NULL;
 	void *addr;
+	bool want_vaddr;
 
 #ifdef CONFIG_DMA_API_DEBUG
 	u64 limit = (mask + 1) & ~mask;
@@ -631,20 +643,21 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 
 	*handle = DMA_ERROR_CODE;
 	size = PAGE_ALIGN(size);
+	want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs);
 
 	if (is_coherent || nommu())
 		addr = __alloc_simple_buffer(dev, size, gfp, &page);
 	else if (!(gfp & __GFP_WAIT))
 		addr = __alloc_from_pool(size, &page);
 	else if (!dev_get_cma_area(dev))
-		addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
+		addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller, want_vaddr);
 	else
-		addr = __alloc_from_contiguous(dev, size, prot, &page, caller);
+		addr = __alloc_from_contiguous(dev, size, prot, &page, caller, want_vaddr);
 
-	if (addr)
+	if (page)
 		*handle = pfn_to_dma(dev, page_to_pfn(page));
 
-	return addr;
+	return want_vaddr ? addr : page;
 }
 
 /*
@@ -661,7 +674,7 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 		return memory;
 
 	return __dma_alloc(dev, size, handle, gfp, prot, false,
-			   __builtin_return_address(0));
+			   attrs, __builtin_return_address(0));
 }
 
 static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
@@ -674,7 +687,7 @@ static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
 		return memory;
 
 	return __dma_alloc(dev, size, handle, gfp, prot, true,
-			   __builtin_return_address(0));
+			   attrs, __builtin_return_address(0));
 }
 
 /*
@@ -715,6 +728,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
 			   bool is_coherent)
 {
 	struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
+	bool want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs);
 
 	if (dma_release_from_coherent(dev, get_order(size), cpu_addr))
 		return;
@@ -726,14 +740,15 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
 	} else if (__free_from_pool(cpu_addr, size)) {
 		return;
 	} else if (!dev_get_cma_area(dev)) {
-		__dma_free_remap(cpu_addr, size);
+		if (want_vaddr)
+			__dma_free_remap(cpu_addr, size);
 		__dma_free_buffer(page, size);
 	} else {
 		/*
 		 * Non-atomic allocations cannot be freed with IRQs disabled
 		 */
 		WARN_ON(irqs_disabled());
-		__free_from_contiguous(dev, page, cpu_addr, size);
+		__free_from_contiguous(dev, page, cpu_addr, size, want_vaddr);
 	}
 }
 
@@ -1135,13 +1150,28 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
 	gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
 
 	while (count) {
-		int j, order = __fls(count);
+		int j, order;
+
+		for (order = __fls(count); order > 0; --order) {
+			/*
+			 * We do not want OOM killer to be invoked as long
+			 * as we can fall back to single pages, so we force
+			 * __GFP_NORETRY for orders higher than zero.
+			 */
+			pages[i] = alloc_pages(gfp | __GFP_NORETRY, order);
+			if (pages[i])
+				break;
+		}
 
-		pages[i] = alloc_pages(gfp, order);
-		while (!pages[i] && order)
-			pages[i] = alloc_pages(gfp, --order);
-		if (!pages[i])
-			goto error;
+		if (!pages[i]) {
+			/*
+			 * Fall back to single page allocation.
+			 * Might invoke OOM killer as last resort.
+			 */
+			pages[i] = alloc_pages(gfp, 0);
+			if (!pages[i])
+				goto error;
+		}
 
 		if (order) {
 			split_page(pages[i], order);
@@ -1206,7 +1236,7 @@ __iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot,
 static dma_addr_t
 __iommu_create_mapping(struct device *dev, struct page **pages, size_t size)
 {
-	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
 	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	dma_addr_t dma_addr, iova;
 	int i, ret = DMA_ERROR_CODE;
@@ -1242,7 +1272,7 @@ fail:
 
 static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size)
 {
-	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
 
 	/*
 	 * add optional in-page offset from iova to size and align
@@ -1457,7 +1487,7 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
 			  enum dma_data_direction dir, struct dma_attrs *attrs,
 			  bool is_coherent)
 {
-	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
 	dma_addr_t iova, iova_base;
 	int ret = 0;
 	unsigned int count;
@@ -1678,7 +1708,7 @@ static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *p
 	     unsigned long offset, size_t size, enum dma_data_direction dir,
 	     struct dma_attrs *attrs)
 {
-	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
 	dma_addr_t dma_addr;
 	int ret, prot, len = PAGE_ALIGN(size + offset);
 
@@ -1731,7 +1761,7 @@ static void arm_coherent_iommu_unmap_page(struct device *dev, dma_addr_t handle,
 		size_t size, enum dma_data_direction dir,
 		struct dma_attrs *attrs)
 {
-	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
 	dma_addr_t iova = handle & PAGE_MASK;
 	int offset = handle & ~PAGE_MASK;
 	int len = PAGE_ALIGN(size + offset);
@@ -1756,7 +1786,7 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
 		size_t size, enum dma_data_direction dir,
 		struct dma_attrs *attrs)
 {
-	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
 	dma_addr_t iova = handle & PAGE_MASK;
 	struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
 	int offset = handle & ~PAGE_MASK;
@@ -1775,7 +1805,7 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
 static void arm_iommu_sync_single_for_cpu(struct device *dev,
 		dma_addr_t handle, size_t size, enum dma_data_direction dir)
 {
-	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
 	dma_addr_t iova = handle & PAGE_MASK;
 	struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
 	unsigned int offset = handle & ~PAGE_MASK;
@@ -1789,7 +1819,7 @@ static void arm_iommu_sync_single_for_cpu(struct device *dev,
 static void arm_iommu_sync_single_for_device(struct device *dev,
 		dma_addr_t handle, size_t size, enum dma_data_direction dir)
 {
-	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
 	dma_addr_t iova = handle & PAGE_MASK;
 	struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
 	unsigned int offset = handle & ~PAGE_MASK;
@@ -1950,7 +1980,7 @@ static int __arm_iommu_attach_device(struct device *dev,
 		return err;
 
 	kref_get(&mapping->kref);
-	dev->archdata.mapping = mapping;
+	to_dma_iommu_mapping(dev) = mapping;
 
 	pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev));
 	return 0;
@@ -1995,7 +2025,7 @@ static void __arm_iommu_detach_device(struct device *dev)
 
 	iommu_detach_device(mapping->domain, dev);
 	kref_put(&mapping->kref, release_iommu_mapping);
-	dev->archdata.mapping = NULL;
+	to_dma_iommu_mapping(dev) = NULL;
 
 	pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev));
 }
@@ -2027,6 +2057,13 @@ static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size,
 	if (!iommu)
 		return false;
 
+	/*
+	 * currently arm_iommu_create_mapping() takes a max of size_t
+	 * for size param. So check this limit for now.
+	 */
+	if (size > SIZE_MAX)
+		return false;
+
 	mapping = arm_iommu_create_mapping(dev->bus, dma_base, size);
 	if (IS_ERR(mapping)) {
 		pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n",
@@ -2046,7 +2083,7 @@ static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size,
 
 static void arm_teardown_iommu_dma_ops(struct device *dev)
 {
-	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
 
 	if (!mapping)
 		return;
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index a982dc3190df..6333d9c17875 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -552,6 +552,7 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 
 	pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n",
 		inf->name, fsr, addr);
+	show_pte(current->mm, addr);
 
 	info.si_signo = inf->sig;
 	info.si_errno = 0;
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 1609b022a72f..be92fa0f2f35 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -86,55 +86,6 @@ static int __init parse_tag_initrd2(const struct tag *tag)
 
 __tagtable(ATAG_INITRD2, parse_tag_initrd2);
 
-/*
- * This keeps memory configuration data used by a couple memory
- * initialization functions, as well as show_mem() for the skipping
- * of holes in the memory map.  It is populated by arm_add_memory().
- */
-void show_mem(unsigned int filter)
-{
-	int free = 0, total = 0, reserved = 0;
-	int shared = 0, cached = 0, slab = 0;
-	struct memblock_region *reg;
-
-	printk("Mem-info:\n");
-	show_free_areas(filter);
-
-	for_each_memblock (memory, reg) {
-		unsigned int pfn1, pfn2;
-		struct page *page, *end;
-
-		pfn1 = memblock_region_memory_base_pfn(reg);
-		pfn2 = memblock_region_memory_end_pfn(reg);
-
-		page = pfn_to_page(pfn1);
-		end  = pfn_to_page(pfn2 - 1) + 1;
-
-		do {
-			total++;
-			if (PageReserved(page))
-				reserved++;
-			else if (PageSwapCache(page))
-				cached++;
-			else if (PageSlab(page))
-				slab++;
-			else if (!page_count(page))
-				free++;
-			else
-				shared += page_count(page) - 1;
-			pfn1++;
-			page = pfn_to_page(pfn1);
-		} while (pfn1 < pfn2);
-	}
-
-	printk("%d pages of RAM\n", total);
-	printk("%d free pages\n", free);
-	printk("%d reserved pages\n", reserved);
-	printk("%d slab pages\n", slab);
-	printk("%d pages shared\n", shared);
-	printk("%d pages swap cached\n", cached);
-}
-
 static void __init find_limits(unsigned long *min, unsigned long *max_low,
 			       unsigned long *max_high)
 {
@@ -335,6 +286,9 @@ void __init bootmem_init(void)
 
 	find_limits(&min, &max_low, &max_high);
 
+	early_memtest((phys_addr_t)min << PAGE_SHIFT,
+		      (phys_addr_t)max_low << PAGE_SHIFT);
+
 	/*
 	 * Sparsemem tries to allocate bootmem in memory_present(),
 	 * so must be done after the fixed reservations
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index 5e85ed371364..407dc786583a 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -169,14 +169,22 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	return addr;
 }
 
+unsigned long arch_mmap_rnd(void)
+{
+	unsigned long rnd;
+
+	/* 8 bits of randomness in 20 address space bits */
+	rnd = (unsigned long)get_random_int() % (1 << 8);
+
+	return rnd << PAGE_SHIFT;
+}
+
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
 	unsigned long random_factor = 0UL;
 
-	/* 8 bits of randomness in 20 address space bits */
-	if ((current->flags & PF_RANDOMIZE) &&
-	    !(current->personality & ADDR_NO_RANDOMIZE))
-		random_factor = (get_random_int() % (1 << 8)) << PAGE_SHIFT;
+	if (current->flags & PF_RANDOMIZE)
+		random_factor = arch_mmap_rnd();
 
 	if (mmap_is_legacy()) {
 		mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c
index 004e35cdcfff..cf30daff8932 100644
--- a/arch/arm/mm/pageattr.c
+++ b/arch/arm/mm/pageattr.c
@@ -49,7 +49,10 @@ static int change_memory_common(unsigned long addr, int numpages,
 		WARN_ON_ONCE(1);
 	}
 
-	if (!is_module_address(start) || !is_module_address(end - 1))
+	if (start < MODULES_VADDR || start >= MODULES_END)
+		return -EINVAL;
+
+	if (end < MODULES_VADDR || start >= MODULES_END)
 		return -EINVAL;
 
 	data.set_mask = set_mask;
diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
index 86ee5d47ce3c..aa0519eed698 100644
--- a/arch/arm/mm/proc-arm1020.S
+++ b/arch/arm/mm/proc-arm1020.S
@@ -507,7 +507,7 @@ cpu_arm1020_name:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm1020_proc_info,#object
 __arm1020_proc_info:
@@ -519,7 +519,7 @@ __arm1020_proc_info:
 	.long   PMD_TYPE_SECT | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm1020_setup
+	initfn	__arm1020_setup, __arm1020_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
index a6331d78601f..bff4c7f70fd6 100644
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -465,7 +465,7 @@ arm1020e_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm1020e_proc_info,#object
 __arm1020e_proc_info:
@@ -479,7 +479,7 @@ __arm1020e_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm1020e_setup
+	initfn	__arm1020e_setup, __arm1020e_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP
diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S
index a126b7a59928..dbb2413fe04d 100644
--- a/arch/arm/mm/proc-arm1022.S
+++ b/arch/arm/mm/proc-arm1022.S
@@ -448,7 +448,7 @@ arm1022_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm1022_proc_info,#object
 __arm1022_proc_info:
@@ -462,7 +462,7 @@ __arm1022_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm1022_setup
+	initfn	__arm1022_setup, __arm1022_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP
diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S
index fc294067e977..0b37b2cef9d3 100644
--- a/arch/arm/mm/proc-arm1026.S
+++ b/arch/arm/mm/proc-arm1026.S
@@ -442,7 +442,7 @@ arm1026_crval:
 	string	cpu_arm1026_name, "ARM1026EJ-S"
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm1026_proc_info,#object
 __arm1026_proc_info:
@@ -456,7 +456,7 @@ __arm1026_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm1026_setup
+	initfn	__arm1026_setup, __arm1026_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA
diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S
index 2baa66b3ac9b..3651cd70e418 100644
--- a/arch/arm/mm/proc-arm720.S
+++ b/arch/arm/mm/proc-arm720.S
@@ -186,7 +186,7 @@ arm720_crval:
  * See <asm/procinfo.h> for a definition of this structure.
  */
 	
-		.section ".proc.info.init", #alloc, #execinstr
+		.section ".proc.info.init", #alloc
 
 .macro arm720_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cpu_flush:req
 		.type	__\name\()_proc_info,#object
@@ -203,7 +203,7 @@ __\name\()_proc_info:
 			PMD_BIT4 | \
 			PMD_SECT_AP_WRITE | \
 			PMD_SECT_AP_READ
-		b	\cpu_flush				@ cpu_flush
+		initfn	\cpu_flush, __\name\()_proc_info	@ cpu_flush
 		.long	cpu_arch_name				@ arch_name
 		.long	cpu_elf_name				@ elf_name
 		.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB	@ elf_hwcap
diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S
index ac1ea6b3bce4..024fb7732407 100644
--- a/arch/arm/mm/proc-arm740.S
+++ b/arch/arm/mm/proc-arm740.S
@@ -132,14 +132,14 @@ __arm740_setup:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 	.type	__arm740_proc_info,#object
 __arm740_proc_info:
 	.long	0x41807400
 	.long	0xfffffff0
 	.long	0
 	.long	0
-	b	__arm740_setup
+	initfn	__arm740_setup, __arm740_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_26BIT
diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S
index bf6ba4bc30ff..25472d94426d 100644
--- a/arch/arm/mm/proc-arm7tdmi.S
+++ b/arch/arm/mm/proc-arm7tdmi.S
@@ -76,7 +76,7 @@ __arm7tdmi_setup:
 
 		.align
 
-		.section ".proc.info.init", #alloc, #execinstr
+		.section ".proc.info.init", #alloc
 
 .macro arm7tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, \
 	extra_hwcaps=0
@@ -86,7 +86,7 @@ __\name\()_proc_info:
 		.long	\cpu_mask
 		.long	0
 		.long	0
-		b	__arm7tdmi_setup
+		initfn	__arm7tdmi_setup, __\name\()_proc_info
 		.long	cpu_arch_name
 		.long	cpu_elf_name
 		.long	HWCAP_SWP | HWCAP_26BIT | ( \extra_hwcaps )
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
index 22bf8dde4f84..7a14bd4414c9 100644
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -448,7 +448,7 @@ arm920_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm920_proc_info,#object
 __arm920_proc_info:
@@ -464,7 +464,7 @@ __arm920_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm920_setup
+	initfn	__arm920_setup, __arm920_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S
index 0c6d5ac5a6d4..edccfcdcd551 100644
--- a/arch/arm/mm/proc-arm922.S
+++ b/arch/arm/mm/proc-arm922.S
@@ -426,7 +426,7 @@ arm922_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm922_proc_info,#object
 __arm922_proc_info:
@@ -442,7 +442,7 @@ __arm922_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm922_setup
+	initfn	__arm922_setup, __arm922_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
index c32d073282ea..ede8c54ab4aa 100644
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S
@@ -494,7 +494,7 @@ arm925_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 .macro arm925_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache
 	.type	__\name\()_proc_info,#object
@@ -510,7 +510,7 @@ __\name\()_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm925_setup
+	initfn	__arm925_setup, __\name\()_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
index 252b2503038d..fb827c633693 100644
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -474,7 +474,7 @@ arm926_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm926_proc_info,#object
 __arm926_proc_info:
@@ -490,7 +490,7 @@ __arm926_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__arm926_setup
+	initfn	__arm926_setup, __arm926_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA
diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S
index e5212d489377..ee5b66f847c4 100644
--- a/arch/arm/mm/proc-arm940.S
+++ b/arch/arm/mm/proc-arm940.S
@@ -297,26 +297,16 @@ __arm940_setup:
 	mcr	p15, 0, r0, c6,	c0, 1
 
 	ldr	r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM
-	ldr	r1, =(CONFIG_DRAM_SIZE >> 12)	@ size of RAM (must be >= 4KB)
-	mov	r2, #10				@ 11 is the minimum (4KB)
-1:	add	r2, r2, #1			@ area size *= 2
-	mov	r1, r1, lsr #1
-	bne	1b				@ count not zero r-shift
-	orr	r0, r0, r2, lsl #1		@ the area register value
-	orr	r0, r0, #1			@ set enable bit
-	mcr	p15, 0, r0, c6,	c1, 0		@ set area 1, RAM
-	mcr	p15, 0, r0, c6,	c1, 1
+	ldr	r7, =CONFIG_DRAM_SIZE >> 12	@ size of RAM (must be >= 4KB)
+	pr_val	r3, r0, r7, #1
+	mcr	p15, 0, r3, c6,	c1, 0		@ set area 1, RAM
+	mcr	p15, 0, r3, c6,	c1, 1
 
 	ldr	r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH
-	ldr	r1, =(CONFIG_FLASH_SIZE >> 12)	@ size of FLASH (must be >= 4KB)
-	mov	r2, #10				@ 11 is the minimum (4KB)
-1:	add	r2, r2, #1			@ area size *= 2
-	mov	r1, r1, lsr #1
-	bne	1b				@ count not zero r-shift
-	orr	r0, r0, r2, lsl #1		@ the area register value
-	orr	r0, r0, #1			@ set enable bit
-	mcr	p15, 0, r0, c6,	c2, 0		@ set area 2, ROM/FLASH
-	mcr	p15, 0, r0, c6,	c2, 1
+	ldr	r7, =CONFIG_FLASH_SIZE		@ size of FLASH (must be >= 4KB)
+	pr_val	r3, r0, r6, #1
+	mcr	p15, 0, r3, c6,	c2, 0		@ set area 2, ROM/FLASH
+	mcr	p15, 0, r3, c6,	c2, 1
 
 	mov	r0, #0x06
 	mcr	p15, 0, r0, c2, c0, 0		@ Region 1&2 cacheable
@@ -354,14 +344,14 @@ __arm940_setup:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__arm940_proc_info,#object
 __arm940_proc_info:
 	.long	0x41009400
 	.long	0xff00fff0
 	.long	0
-	b	__arm940_setup
+	initfn	__arm940_setup, __arm940_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S
index b3dd9b2d0b8e..7361837edc31 100644
--- a/arch/arm/mm/proc-arm946.S
+++ b/arch/arm/mm/proc-arm946.S
@@ -343,24 +343,14 @@ __arm946_setup:
 	mcr	p15, 0, r0, c6,	c0, 0		@ set region 0, default
 
 	ldr	r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM
-	ldr	r1, =(CONFIG_DRAM_SIZE >> 12)	@ size of RAM (must be >= 4KB)
-	mov	r2, #10				@ 11 is the minimum (4KB)
-1:	add	r2, r2, #1			@ area size *= 2
-	mov	r1, r1, lsr #1
-	bne	1b				@ count not zero r-shift
-	orr	r0, r0, r2, lsl #1		@ the region register value
-	orr	r0, r0, #1			@ set enable bit
-	mcr	p15, 0, r0, c6,	c1, 0		@ set region 1, RAM
+	ldr	r7, =CONFIG_DRAM_SIZE		@ size of RAM (must be >= 4KB)
+	pr_val	r3, r0, r7, #1
+	mcr	p15, 0, r3, c6, c1, 0
 
 	ldr	r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH
-	ldr	r1, =(CONFIG_FLASH_SIZE >> 12)	@ size of FLASH (must be >= 4KB)
-	mov	r2, #10				@ 11 is the minimum (4KB)
-1:	add	r2, r2, #1			@ area size *= 2
-	mov	r1, r1, lsr #1
-	bne	1b				@ count not zero r-shift
-	orr	r0, r0, r2, lsl #1		@ the region register value
-	orr	r0, r0, #1			@ set enable bit
-	mcr	p15, 0, r0, c6,	c2, 0		@ set region 2, ROM/FLASH
+	ldr	r7, =CONFIG_FLASH_SIZE		@ size of FLASH (must be >= 4KB)
+	pr_val	r3, r0, r7, #1
+	mcr	p15, 0, r3, c6, c2, 0
 
 	mov	r0, #0x06
 	mcr	p15, 0, r0, c2, c0, 0		@ region 1,2 d-cacheable
@@ -409,14 +399,14 @@ __arm946_setup:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 	.type	__arm946_proc_info,#object
 __arm946_proc_info:
 	.long	0x41009460
 	.long	0xff00fff0
 	.long	0
 	.long	0
-	b	__arm946_setup
+	initfn	__arm946_setup, __arm946_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S
index 8227322bbb8f..7fac8c612134 100644
--- a/arch/arm/mm/proc-arm9tdmi.S
+++ b/arch/arm/mm/proc-arm9tdmi.S
@@ -70,7 +70,7 @@ __arm9tdmi_setup:
 
 		.align
 
-		.section ".proc.info.init", #alloc, #execinstr
+		.section ".proc.info.init", #alloc
 
 .macro arm9tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req
 		.type	__\name\()_proc_info, #object
@@ -79,7 +79,7 @@ __\name\()_proc_info:
 		.long	\cpu_mask
 		.long	0
 		.long	0
-		b	__arm9tdmi_setup
+		initfn	__arm9tdmi_setup, __\name\()_proc_info
 		.long	cpu_arch_name
 		.long	cpu_elf_name
 		.long	HWCAP_SWP | HWCAP_THUMB | HWCAP_26BIT
diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S
index c494886892ba..4001b73af4ee 100644
--- a/arch/arm/mm/proc-fa526.S
+++ b/arch/arm/mm/proc-fa526.S
@@ -190,7 +190,7 @@ fa526_cr1_set:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__fa526_proc_info,#object
 __fa526_proc_info:
@@ -206,7 +206,7 @@ __fa526_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__fa526_setup
+	initfn	__fa526_setup, __fa526_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index 03a1b75f2e16..e494d6d6acbe 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -584,7 +584,7 @@ feroceon_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 .macro feroceon_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache:req
 	.type	__\name\()_proc_info,#object
@@ -601,7 +601,8 @@ __\name\()_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__feroceon_setup
+	initfn	__feroceon_setup, __\name\()_proc_info
+	.long __feroceon_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index 082b9f2f7e90..c671f345266a 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -331,3 +331,31 @@ ENTRY(\name\()_tlb_fns)
 	.globl	\x
 	.equ	\x, \y
 .endm
+
+.macro	initfn, func, base
+	.long	\func - \base
+.endm
+
+	/*
+	 * Macro to calculate the log2 size for the protection region
+	 * registers. This calculates rd = log2(size) - 1.  tmp must
+	 * not be the same register as rd.
+	 */
+.macro	pr_sz, rd, size, tmp
+	mov	\tmp, \size, lsr #12
+	mov	\rd, #11
+1:	movs	\tmp, \tmp, lsr #1
+	addne	\rd, \rd, #1
+	bne	1b
+.endm
+
+	/*
+	 * Macro to generate a protection region register value
+	 * given a pre-masked address, size, and enable bit.
+	 * Corrupts size.
+	 */
+.macro	pr_val, dest, addr, size, enable
+	pr_sz	\dest, \size, \size		@ calculate log2(size) - 1
+	orr	\dest, \addr, \dest, lsl #1	@ mask in the region size
+	orr	\dest, \dest, \enable
+.endm
diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S
index 53d393455f13..d65edf717bf7 100644
--- a/arch/arm/mm/proc-mohawk.S
+++ b/arch/arm/mm/proc-mohawk.S
@@ -427,7 +427,7 @@ mohawk_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__88sv331x_proc_info,#object
 __88sv331x_proc_info:
@@ -443,7 +443,7 @@ __88sv331x_proc_info:
 		PMD_BIT4 | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__mohawk_setup
+	initfn	__mohawk_setup, __88sv331x_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S
index 8008a0461cf5..ee2ce496239f 100644
--- a/arch/arm/mm/proc-sa110.S
+++ b/arch/arm/mm/proc-sa110.S
@@ -199,7 +199,7 @@ sa110_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	.type	__sa110_proc_info,#object
 __sa110_proc_info:
@@ -213,7 +213,7 @@ __sa110_proc_info:
 	.long   PMD_TYPE_SECT | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__sa110_setup
+	initfn	__sa110_setup, __sa110_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT
diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S
index 89f97ac648a9..222d5836f666 100644
--- a/arch/arm/mm/proc-sa1100.S
+++ b/arch/arm/mm/proc-sa1100.S
@@ -242,7 +242,7 @@ sa1100_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 .macro sa1100_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req
 	.type	__\name\()_proc_info,#object
@@ -257,7 +257,7 @@ __\name\()_proc_info:
 	.long   PMD_TYPE_SECT | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__sa1100_setup
+	initfn	__sa1100_setup, __\name\()_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index d0390f4b3f18..06d890a2342b 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -264,7 +264,7 @@ v6_crval:
 	string	cpu_elf_name, "v6"
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	/*
 	 * Match any ARMv6 processor core.
@@ -287,7 +287,7 @@ __v6_proc_info:
 		PMD_SECT_XN | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__v6_setup
+	initfn	__v6_setup, __v6_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	/* See also feat_v6_fixup() for HWCAP_TLS */
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index ed448d8a596b..10405b8d31af 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -37,15 +37,18 @@
  *	It is assumed that:
  *	- we are not using split page tables
  */
-ENTRY(cpu_v7_switch_mm)
+ENTRY(cpu_ca8_switch_mm)
 #ifdef CONFIG_MMU
 	mov	r2, #0
-	mmid	r1, r1				@ get mm->context.id
-	ALT_SMP(orr	r0, r0, #TTB_FLAGS_SMP)
-	ALT_UP(orr	r0, r0, #TTB_FLAGS_UP)
 #ifdef CONFIG_ARM_ERRATA_430973
 	mcr	p15, 0, r2, c7, c5, 6		@ flush BTAC/BTB
 #endif
+#endif
+ENTRY(cpu_v7_switch_mm)
+#ifdef CONFIG_MMU
+	mmid	r1, r1				@ get mm->context.id
+	ALT_SMP(orr	r0, r0, #TTB_FLAGS_SMP)
+	ALT_UP(orr	r0, r0, #TTB_FLAGS_UP)
 #ifdef CONFIG_PID_IN_CONTEXTIDR
 	mrc	p15, 0, r2, c13, c0, 1		@ read current context ID
 	lsr	r2, r2, #8			@ extract the PID
@@ -61,6 +64,7 @@ ENTRY(cpu_v7_switch_mm)
 #endif
 	bx	lr
 ENDPROC(cpu_v7_switch_mm)
+ENDPROC(cpu_ca8_switch_mm)
 
 /*
  *	cpu_v7_set_pte_ext(ptep, pte)
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 8b4ee5e81c14..3d1054f11a8a 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -153,6 +153,21 @@ ENDPROC(cpu_v7_do_resume)
 #endif
 
 /*
+ * Cortex-A8
+ */
+	globl_equ	cpu_ca8_proc_init,	cpu_v7_proc_init
+	globl_equ	cpu_ca8_proc_fin,	cpu_v7_proc_fin
+	globl_equ	cpu_ca8_reset,		cpu_v7_reset
+	globl_equ	cpu_ca8_do_idle,	cpu_v7_do_idle
+	globl_equ	cpu_ca8_dcache_clean_area, cpu_v7_dcache_clean_area
+	globl_equ	cpu_ca8_set_pte_ext,	cpu_v7_set_pte_ext
+	globl_equ	cpu_ca8_suspend_size,	cpu_v7_suspend_size
+#ifdef CONFIG_ARM_CPU_SUSPEND
+	globl_equ	cpu_ca8_do_suspend,	cpu_v7_do_suspend
+	globl_equ	cpu_ca8_do_resume,	cpu_v7_do_resume
+#endif
+
+/*
  * Cortex-A9 processor functions
  */
 	globl_equ	cpu_ca9mp_proc_init,	cpu_v7_proc_init
@@ -451,7 +466,10 @@ __v7_setup_stack:
 
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 	define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
+#ifndef CONFIG_ARM_LPAE
+	define_processor_functions ca8, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
 	define_processor_functions ca9mp, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
+#endif
 #ifdef CONFIG_CPU_PJ4B
 	define_processor_functions pj4b, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
 #endif
@@ -462,19 +480,19 @@ __v7_setup_stack:
 	string	cpu_elf_name, "v7"
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	/*
 	 * Standard v7 proc info content
 	 */
-.macro __v7_proc initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions
+.macro __v7_proc name, initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions
 	ALT_SMP(.long	PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \
 			PMD_SECT_AF | PMD_FLAGS_SMP | \mm_mmuflags)
 	ALT_UP(.long	PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \
 			PMD_SECT_AF | PMD_FLAGS_UP | \mm_mmuflags)
 	.long	PMD_TYPE_SECT | PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ | PMD_SECT_AF | \io_mmuflags
-	W(b)	\initfunc
+	initfn	\initfunc, \name
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \
@@ -494,7 +512,7 @@ __v7_setup_stack:
 __v7_ca5mp_proc_info:
 	.long	0x410fc050
 	.long	0xff0ffff0
-	__v7_proc __v7_ca5mp_setup
+	__v7_proc __v7_ca5mp_proc_info, __v7_ca5mp_setup
 	.size	__v7_ca5mp_proc_info, . - __v7_ca5mp_proc_info
 
 	/*
@@ -504,9 +522,19 @@ __v7_ca5mp_proc_info:
 __v7_ca9mp_proc_info:
 	.long	0x410fc090
 	.long	0xff0ffff0
-	__v7_proc __v7_ca9mp_setup, proc_fns = ca9mp_processor_functions
+	__v7_proc __v7_ca9mp_proc_info, __v7_ca9mp_setup, proc_fns = ca9mp_processor_functions
 	.size	__v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info
 
+	/*
+	 * ARM Ltd. Cortex A8 processor.
+	 */
+	.type	__v7_ca8_proc_info, #object
+__v7_ca8_proc_info:
+	.long	0x410fc080
+	.long	0xff0ffff0
+	__v7_proc __v7_ca8_proc_info, __v7_setup, proc_fns = ca8_processor_functions
+	.size	__v7_ca8_proc_info, . - __v7_ca8_proc_info
+
 #endif	/* CONFIG_ARM_LPAE */
 
 	/*
@@ -517,7 +545,7 @@ __v7_ca9mp_proc_info:
 __v7_pj4b_proc_info:
 	.long	0x560f5800
 	.long	0xff0fff00
-	__v7_proc __v7_pj4b_setup, proc_fns = pj4b_processor_functions
+	__v7_proc __v7_pj4b_proc_info, __v7_pj4b_setup, proc_fns = pj4b_processor_functions
 	.size	__v7_pj4b_proc_info, . - __v7_pj4b_proc_info
 #endif
 
@@ -528,7 +556,7 @@ __v7_pj4b_proc_info:
 __v7_cr7mp_proc_info:
 	.long	0x410fc170
 	.long	0xff0ffff0
-	__v7_proc __v7_cr7mp_setup
+	__v7_proc __v7_cr7mp_proc_info, __v7_cr7mp_setup
 	.size	__v7_cr7mp_proc_info, . - __v7_cr7mp_proc_info
 
 	/*
@@ -538,7 +566,7 @@ __v7_cr7mp_proc_info:
 __v7_ca7mp_proc_info:
 	.long	0x410fc070
 	.long	0xff0ffff0
-	__v7_proc __v7_ca7mp_setup
+	__v7_proc __v7_ca7mp_proc_info, __v7_ca7mp_setup
 	.size	__v7_ca7mp_proc_info, . - __v7_ca7mp_proc_info
 
 	/*
@@ -548,7 +576,7 @@ __v7_ca7mp_proc_info:
 __v7_ca12mp_proc_info:
 	.long	0x410fc0d0
 	.long	0xff0ffff0
-	__v7_proc __v7_ca12mp_setup
+	__v7_proc __v7_ca12mp_proc_info, __v7_ca12mp_setup
 	.size	__v7_ca12mp_proc_info, . - __v7_ca12mp_proc_info
 
 	/*
@@ -558,7 +586,7 @@ __v7_ca12mp_proc_info:
 __v7_ca15mp_proc_info:
 	.long	0x410fc0f0
 	.long	0xff0ffff0
-	__v7_proc __v7_ca15mp_setup
+	__v7_proc __v7_ca15mp_proc_info, __v7_ca15mp_setup
 	.size	__v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info
 
 	/*
@@ -568,7 +596,7 @@ __v7_ca15mp_proc_info:
 __v7_b15mp_proc_info:
 	.long	0x420f00f0
 	.long	0xff0ffff0
-	__v7_proc __v7_b15mp_setup
+	__v7_proc __v7_b15mp_proc_info, __v7_b15mp_setup
 	.size	__v7_b15mp_proc_info, . - __v7_b15mp_proc_info
 
 	/*
@@ -578,7 +606,7 @@ __v7_b15mp_proc_info:
 __v7_ca17mp_proc_info:
 	.long	0x410fc0e0
 	.long	0xff0ffff0
-	__v7_proc __v7_ca17mp_setup
+	__v7_proc __v7_ca17mp_proc_info, __v7_ca17mp_setup
 	.size	__v7_ca17mp_proc_info, . - __v7_ca17mp_proc_info
 
 	/*
@@ -594,7 +622,7 @@ __krait_proc_info:
 	 * do support them. They also don't indicate support for fused multiply
 	 * instructions even though they actually do support them.
 	 */
-	__v7_proc __v7_setup, hwcaps = HWCAP_IDIV | HWCAP_VFPv4
+	__v7_proc __krait_proc_info, __v7_setup, hwcaps = HWCAP_IDIV | HWCAP_VFPv4
 	.size	__krait_proc_info, . - __krait_proc_info
 
 	/*
@@ -604,5 +632,5 @@ __krait_proc_info:
 __v7_proc_info:
 	.long	0x000f0000		@ Required ID value
 	.long	0x000f0000		@ Mask for ID
-	__v7_proc __v7_setup
+	__v7_proc __v7_proc_info, __v7_setup
 	.size	__v7_proc_info, . - __v7_proc_info
diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index d1e68b553d3b..e08e1f2bab76 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -135,7 +135,7 @@ __v7m_setup_stack_top:
 	string cpu_elf_name "v7m"
 	string cpu_v7m_name "ARMv7-M"
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 	/*
 	 * Match any ARMv7-M processor core.
@@ -146,7 +146,7 @@ __v7m_proc_info:
 	.long	0x000f0000		@ Mask for ID
 	.long   0			@ proc_info_list.__cpu_mm_mmu_flags
 	.long   0			@ proc_info_list.__cpu_io_mmu_flags
-	b	__v7m_setup		@ proc_info_list.__cpu_flush
+	initfn	__v7m_setup, __v7m_proc_info	@ proc_info_list.__cpu_flush
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
index f8acdfece036..293dcc2c441f 100644
--- a/arch/arm/mm/proc-xsc3.S
+++ b/arch/arm/mm/proc-xsc3.S
@@ -499,7 +499,7 @@ xsc3_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 .macro xsc3_proc_info name:req, cpu_val:req, cpu_mask:req
 	.type	__\name\()_proc_info,#object
@@ -514,7 +514,7 @@ __\name\()_proc_info:
 	.long	PMD_TYPE_SECT | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__xsc3_setup
+	initfn	__xsc3_setup, __\name\()_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index afa2b3c4df4a..b6bbfdb6dfdc 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -612,7 +612,7 @@ xscale_crval:
 
 	.align
 
-	.section ".proc.info.init", #alloc, #execinstr
+	.section ".proc.info.init", #alloc
 
 .macro xscale_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache
 	.type	__\name\()_proc_info,#object
@@ -627,7 +627,7 @@ __\name\()_proc_info:
 	.long	PMD_TYPE_SECT | \
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
-	b	__xscale_setup
+	initfn	__xscale_setup, __\name\()_proc_info
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S
index 5d65be1f1e8a..71df43547659 100644
--- a/arch/arm/nwfpe/entry.S
+++ b/arch/arm/nwfpe/entry.S
@@ -113,7 +113,7 @@ next:
 	@ to fault.  Emit the appropriate exception gunk to fix things up.
 	@ ??? For some reason, faults can happen at .Lx2 even with a
 	@ plain LDR instruction.  Weird, but it seems harmless.
-	.pushsection .fixup,"ax"
+	.pushsection .text.fixup,"ax"
 	.align	2
 .Lfix:	ret	r9			@ let the user eat segfaults
 	.popsection
diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c
index 61b4d705c267..2438b96004c1 100644
--- a/arch/arm/plat-omap/counter_32k.c
+++ b/arch/arm/plat-omap/counter_32k.c
@@ -44,24 +44,20 @@ static u64 notrace omap_32k_read_sched_clock(void)
 }
 
 /**
- * omap_read_persistent_clock -  Return time from a persistent clock.
+ * omap_read_persistent_clock64 -  Return time from a persistent clock.
  *
  * Reads the time from a source which isn't disabled during PM, the
  * 32k sync timer.  Convert the cycles elapsed since last read into
- * nsecs and adds to a monotonically increasing timespec.
+ * nsecs and adds to a monotonically increasing timespec64.
  */
-static struct timespec persistent_ts;
+static struct timespec64 persistent_ts;
 static cycles_t cycles;
 static unsigned int persistent_mult, persistent_shift;
-static DEFINE_SPINLOCK(read_persistent_clock_lock);
 
-static void omap_read_persistent_clock(struct timespec *ts)
+static void omap_read_persistent_clock64(struct timespec64 *ts)
 {
 	unsigned long long nsecs;
 	cycles_t last_cycles;
-	unsigned long flags;
-
-	spin_lock_irqsave(&read_persistent_clock_lock, flags);
 
 	last_cycles = cycles;
 	cycles = sync32k_cnt_reg ? readl_relaxed(sync32k_cnt_reg) : 0;
@@ -69,11 +65,9 @@ static void omap_read_persistent_clock(struct timespec *ts)
 	nsecs = clocksource_cyc2ns(cycles - last_cycles,
 					persistent_mult, persistent_shift);
 
-	timespec_add_ns(&persistent_ts, nsecs);
+	timespec64_add_ns(&persistent_ts, nsecs);
 
 	*ts = persistent_ts;
-
-	spin_unlock_irqrestore(&read_persistent_clock_lock, flags);
 }
 
 /**
@@ -103,7 +97,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase)
 
 	/*
 	 * 120000 rough estimate from the calculations in
-	 * __clocksource_updatefreq_scale.
+	 * __clocksource_update_freq_scale.
 	 */
 	clocks_calc_mult_shift(&persistent_mult, &persistent_shift,
 			32768, NSEC_PER_SEC, 120000);
@@ -116,7 +110,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase)
 	}
 
 	sched_clock_register(omap_32k_read_sched_clock, 32, 32768);
-	register_persistent_clock(NULL, omap_read_persistent_clock);
+	register_persistent_clock(NULL, omap_read_persistent_clock64);
 	pr_info("OMAP clocksource: 32k_counter at 32768 Hz\n");
 
 	return 0;
diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c
index db10169a08de..8ca94d379bc3 100644
--- a/arch/arm/plat-omap/dmtimer.c
+++ b/arch/arm/plat-omap/dmtimer.c
@@ -799,6 +799,7 @@ static int omap_dm_timer_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	const struct of_device_id *match;
 	const struct dmtimer_platform_data *pdata;
+	int ret;
 
 	match = of_match_device(of_match_ptr(omap_timer_match), dev);
 	pdata = match ? match->data : dev->platform_data;
@@ -860,7 +861,12 @@ static int omap_dm_timer_probe(struct platform_device *pdev)
 	}
 
 	if (!timer->reserved) {
-		pm_runtime_get_sync(dev);
+		ret = pm_runtime_get_sync(dev);
+		if (ret < 0) {
+			dev_err(dev, "%s: pm_runtime_get_sync failed!\n",
+				__func__);
+			goto err_get_sync;
+		}
 		__omap_dm_timer_init_regs(timer);
 		pm_runtime_put(dev);
 	}
@@ -873,6 +879,11 @@ static int omap_dm_timer_probe(struct platform_device *pdev)
 	dev_dbg(dev, "Device Probed.\n");
 
 	return 0;
+
+err_get_sync:
+	pm_runtime_put_noidle(dev);
+	pm_runtime_disable(dev);
+	return ret;
 }
 
 /**
@@ -899,6 +910,8 @@ static int omap_dm_timer_remove(struct platform_device *pdev)
 		}
 	spin_unlock_irqrestore(&dm_timer_lock, flags);
 
+	pm_runtime_disable(&pdev->dev);
+
 	return ret;
 }
 
diff --git a/arch/arm/plat-pxa/dma.c b/arch/arm/plat-pxa/dma.c
index 054fc5a1a11c..d92f07f6ecfb 100644
--- a/arch/arm/plat-pxa/dma.c
+++ b/arch/arm/plat-pxa/dma.c
@@ -51,19 +51,19 @@ static struct dentry *dbgfs_root, *dbgfs_state, **dbgfs_chan;
 
 static int dbg_show_requester_chan(struct seq_file *s, void *p)
 {
-	int pos = 0;
 	int chan = (int)s->private;
 	int i;
 	u32 drcmr;
 
-	pos += seq_printf(s, "DMA channel %d requesters list :\n", chan);
+	seq_printf(s, "DMA channel %d requesters list :\n", chan);
 	for (i = 0; i < DMA_MAX_REQUESTERS; i++) {
 		drcmr = DRCMR(i);
 		if ((drcmr & DRCMR_CHLNUM) == chan)
-			pos += seq_printf(s, "\tRequester %d (MAPVLD=%d)\n", i,
-					  !!(drcmr & DRCMR_MAPVLD));
+			seq_printf(s, "\tRequester %d (MAPVLD=%d)\n",
+				   i, !!(drcmr & DRCMR_MAPVLD));
 	}
-	return pos;
+
+	return 0;
 }
 
 static inline int dbg_burst_from_dcmd(u32 dcmd)
@@ -83,7 +83,6 @@ static int is_phys_valid(unsigned long addr)
 
 static int dbg_show_descriptors(struct seq_file *s, void *p)
 {
-	int pos = 0;
 	int chan = (int)s->private;
 	int i, max_show = 20, burst, width;
 	u32 dcmd;
@@ -94,44 +93,43 @@ static int dbg_show_descriptors(struct seq_file *s, void *p)
 	spin_lock_irqsave(&dma_channels[chan].lock, flags);
 	phys_desc = DDADR(chan);
 
-	pos += seq_printf(s, "DMA channel %d descriptors :\n", chan);
-	pos += seq_printf(s, "[%03d] First descriptor unknown\n", 0);
+	seq_printf(s, "DMA channel %d descriptors :\n", chan);
+	seq_printf(s, "[%03d] First descriptor unknown\n", 0);
 	for (i = 1; i < max_show && is_phys_valid(phys_desc); i++) {
 		desc = phys_to_virt(phys_desc);
 		dcmd = desc->dcmd;
 		burst = dbg_burst_from_dcmd(dcmd);
 		width = (1 << ((dcmd >> 14) & 0x3)) >> 1;
 
-		pos += seq_printf(s, "[%03d] Desc at %08lx(virt %p)\n",
-				  i, phys_desc, desc);
-		pos += seq_printf(s, "\tDDADR = %08x\n", desc->ddadr);
-		pos += seq_printf(s, "\tDSADR = %08x\n", desc->dsadr);
-		pos += seq_printf(s, "\tDTADR = %08x\n", desc->dtadr);
-		pos += seq_printf(s, "\tDCMD  = %08x (%s%s%s%s%s%s%sburst=%d"
-				  " width=%d len=%d)\n",
-				  dcmd,
-				  DCMD_STR(INCSRCADDR), DCMD_STR(INCTRGADDR),
-				  DCMD_STR(FLOWSRC), DCMD_STR(FLOWTRG),
-				  DCMD_STR(STARTIRQEN), DCMD_STR(ENDIRQEN),
-				  DCMD_STR(ENDIAN), burst, width,
-				  dcmd & DCMD_LENGTH);
+		seq_printf(s, "[%03d] Desc at %08lx(virt %p)\n",
+			   i, phys_desc, desc);
+		seq_printf(s, "\tDDADR = %08x\n", desc->ddadr);
+		seq_printf(s, "\tDSADR = %08x\n", desc->dsadr);
+		seq_printf(s, "\tDTADR = %08x\n", desc->dtadr);
+		seq_printf(s, "\tDCMD  = %08x (%s%s%s%s%s%s%sburst=%d width=%d len=%d)\n",
+			   dcmd,
+			   DCMD_STR(INCSRCADDR), DCMD_STR(INCTRGADDR),
+			   DCMD_STR(FLOWSRC), DCMD_STR(FLOWTRG),
+			   DCMD_STR(STARTIRQEN), DCMD_STR(ENDIRQEN),
+			   DCMD_STR(ENDIAN), burst, width,
+			   dcmd & DCMD_LENGTH);
 		phys_desc = desc->ddadr;
 	}
 	if (i == max_show)
-		pos += seq_printf(s, "[%03d] Desc at %08lx ... max display reached\n",
-				  i, phys_desc);
+		seq_printf(s, "[%03d] Desc at %08lx ... max display reached\n",
+			   i, phys_desc);
 	else
-		pos += seq_printf(s, "[%03d] Desc at %08lx is %s\n",
-				  i, phys_desc, phys_desc == DDADR_STOP ?
-				  "DDADR_STOP" : "invalid");
+		seq_printf(s, "[%03d] Desc at %08lx is %s\n",
+			   i, phys_desc, phys_desc == DDADR_STOP ?
+			   "DDADR_STOP" : "invalid");
 
 	spin_unlock_irqrestore(&dma_channels[chan].lock, flags);
-	return pos;
+
+	return 0;
 }
 
 static int dbg_show_chan_state(struct seq_file *s, void *p)
 {
-	int pos = 0;
 	int chan = (int)s->private;
 	u32 dcsr, dcmd;
 	int burst, width;
@@ -142,42 +140,39 @@ static int dbg_show_chan_state(struct seq_file *s, void *p)
 	burst = dbg_burst_from_dcmd(dcmd);
 	width = (1 << ((dcmd >> 14) & 0x3)) >> 1;
 
-	pos += seq_printf(s, "DMA channel %d\n", chan);
-	pos += seq_printf(s, "\tPriority : %s\n",
-			  str_prio[dma_channels[chan].prio]);
-	pos += seq_printf(s, "\tUnaligned transfer bit: %s\n",
-			  DALGN & (1 << chan) ? "yes" : "no");
-	pos += seq_printf(s, "\tDCSR  = %08x (%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
-			  dcsr, DCSR_STR(RUN), DCSR_STR(NODESC),
-			  DCSR_STR(STOPIRQEN), DCSR_STR(EORIRQEN),
-			  DCSR_STR(EORJMPEN), DCSR_STR(EORSTOPEN),
-			  DCSR_STR(SETCMPST), DCSR_STR(CLRCMPST),
-			  DCSR_STR(CMPST), DCSR_STR(EORINTR), DCSR_STR(REQPEND),
-			  DCSR_STR(STOPSTATE), DCSR_STR(ENDINTR),
-			  DCSR_STR(STARTINTR), DCSR_STR(BUSERR));
-
-	pos += seq_printf(s, "\tDCMD  = %08x (%s%s%s%s%s%s%sburst=%d width=%d"
-			  " len=%d)\n",
-			  dcmd,
-			  DCMD_STR(INCSRCADDR), DCMD_STR(INCTRGADDR),
-			  DCMD_STR(FLOWSRC), DCMD_STR(FLOWTRG),
-			  DCMD_STR(STARTIRQEN), DCMD_STR(ENDIRQEN),
-			  DCMD_STR(ENDIAN), burst, width, dcmd & DCMD_LENGTH);
-	pos += seq_printf(s, "\tDSADR = %08x\n", DSADR(chan));
-	pos += seq_printf(s, "\tDTADR = %08x\n", DTADR(chan));
-	pos += seq_printf(s, "\tDDADR = %08x\n", DDADR(chan));
-	return pos;
+	seq_printf(s, "DMA channel %d\n", chan);
+	seq_printf(s, "\tPriority : %s\n", str_prio[dma_channels[chan].prio]);
+	seq_printf(s, "\tUnaligned transfer bit: %s\n",
+		   DALGN & (1 << chan) ? "yes" : "no");
+	seq_printf(s, "\tDCSR  = %08x (%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
+		   dcsr, DCSR_STR(RUN), DCSR_STR(NODESC),
+		   DCSR_STR(STOPIRQEN), DCSR_STR(EORIRQEN),
+		   DCSR_STR(EORJMPEN), DCSR_STR(EORSTOPEN),
+		   DCSR_STR(SETCMPST), DCSR_STR(CLRCMPST),
+		   DCSR_STR(CMPST), DCSR_STR(EORINTR), DCSR_STR(REQPEND),
+		   DCSR_STR(STOPSTATE), DCSR_STR(ENDINTR),
+		   DCSR_STR(STARTINTR), DCSR_STR(BUSERR));
+
+	seq_printf(s, "\tDCMD  = %08x (%s%s%s%s%s%s%sburst=%d width=%d len=%d)\n",
+		   dcmd,
+		   DCMD_STR(INCSRCADDR), DCMD_STR(INCTRGADDR),
+		   DCMD_STR(FLOWSRC), DCMD_STR(FLOWTRG),
+		   DCMD_STR(STARTIRQEN), DCMD_STR(ENDIRQEN),
+		   DCMD_STR(ENDIAN), burst, width, dcmd & DCMD_LENGTH);
+	seq_printf(s, "\tDSADR = %08x\n", DSADR(chan));
+	seq_printf(s, "\tDTADR = %08x\n", DTADR(chan));
+	seq_printf(s, "\tDDADR = %08x\n", DDADR(chan));
+
+	return 0;
 }
 
 static int dbg_show_state(struct seq_file *s, void *p)
 {
-	int pos = 0;
-
 	/* basic device status */
-	pos += seq_printf(s, "DMA engine status\n");
-	pos += seq_printf(s, "\tChannel number: %d\n", num_dma_channels);
+	seq_puts(s, "DMA engine status\n");
+	seq_printf(s, "\tChannel number: %d\n", num_dma_channels);
 
-	return pos;
+	return 0;
 }
 
 #define DBGFS_FUNC_DECL(name) \
diff --git a/arch/arm/plat-samsung/include/plat/pm.h b/arch/arm/plat-samsung/include/plat/pm.h
index e17d871b934c..7f415ce74591 100644
--- a/arch/arm/plat-samsung/include/plat/pm.h
+++ b/arch/arm/plat-samsung/include/plat/pm.h
@@ -43,7 +43,11 @@ extern unsigned long s3c_irqwake_eintmask;
 
 /* IRQ masks for IRQs allowed to go to sleep (see irq.c) */
 extern unsigned long s3c_irqwake_intallow;
+#ifdef CONFIG_PM_SLEEP
 extern unsigned long s3c_irqwake_eintallow;
+#else
+#define s3c_irqwake_eintallow 0
+#endif
 
 /* per-cpu sleep functions */
 
@@ -58,16 +62,20 @@ extern unsigned long s3c_pm_flags;
 
 extern int s3c2410_cpu_suspend(unsigned long);
 
-#ifdef CONFIG_SAMSUNG_PM
+#ifdef CONFIG_PM_SLEEP
 extern int s3c_irq_wake(struct irq_data *data, unsigned int state);
-extern int s3c_irqext_wake(struct irq_data *data, unsigned int state);
 extern void s3c_cpu_resume(void);
 #else
 #define s3c_irq_wake NULL
-#define s3c_irqext_wake NULL
 #define s3c_cpu_resume NULL
 #endif
 
+#ifdef CONFIG_SAMSUNG_PM
+extern int s3c_irqext_wake(struct irq_data *data, unsigned int state);
+#else
+#define s3c_irqext_wake NULL
+#endif
+
 #ifdef CONFIG_S3C_PM_DEBUG_LED_SMDK
 /**
  * s3c_pm_debug_smdkled() - Debug PM suspend/resume via SMDK Board LEDs
diff --git a/arch/arm/plat-samsung/pm-debug.c b/arch/arm/plat-samsung/pm-debug.c
index 39609601f407..64e15da33b42 100644
--- a/arch/arm/plat-samsung/pm-debug.c
+++ b/arch/arm/plat-samsung/pm-debug.c
@@ -23,6 +23,7 @@
 #include <plat/pm-common.h>
 
 #ifdef CONFIG_SAMSUNG_ATAGS
+#include <plat/pm.h>
 #include <mach/pm-core.h>
 #else
 static inline void s3c_pm_debug_init_uart(void) {}
diff --git a/arch/arm/plat-samsung/pm.c b/arch/arm/plat-samsung/pm.c
index f8c0f9797dcf..82777c649774 100644
--- a/arch/arm/plat-samsung/pm.c
+++ b/arch/arm/plat-samsung/pm.c
@@ -65,26 +65,6 @@ int s3c_irqext_wake(struct irq_data *data, unsigned int state)
 	return 0;
 }
 
-/* s3c2410_pm_show_resume_irqs
- *
- * print any IRQs asserted at resume time (ie, we woke from)
-*/
-static void __maybe_unused s3c_pm_show_resume_irqs(int start,
-						   unsigned long which,
-						   unsigned long mask)
-{
-	int i;
-
-	which &= ~mask;
-
-	for (i = 0; i <= 31; i++) {
-		if (which & (1L<<i)) {
-			S3C_PMDBG("IRQ %d asserted at resume\n", start+i);
-		}
-	}
-}
-
-
 void (*pm_cpu_prep)(void);
 int (*pm_cpu_sleep)(unsigned long);
 
diff --git a/arch/arm/vdso/.gitignore b/arch/arm/vdso/.gitignore
new file mode 100644
index 000000000000..f8b69d84238e
--- /dev/null
+++ b/arch/arm/vdso/.gitignore
@@ -0,0 +1 @@
+vdso.lds
diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile
new file mode 100644
index 000000000000..bab0a8be7924
--- /dev/null
+++ b/arch/arm/vdso/Makefile
@@ -0,0 +1,74 @@
+hostprogs-y := vdsomunge
+
+obj-vdso := vgettimeofday.o datapage.o
+
+# Build rules
+targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.so.raw vdso.lds
+obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
+
+ccflags-y := -shared -fPIC -fno-common -fno-builtin -fno-stack-protector
+ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 -DDISABLE_BRANCH_PROFILING
+ccflags-y += -Wl,--no-undefined $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+
+obj-y += vdso.o
+extra-y += vdso.lds
+CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
+
+CFLAGS_REMOVE_vdso.o = -pg
+
+# Force -O2 to avoid libgcc dependencies
+CFLAGS_REMOVE_vgettimeofday.o = -pg -Os
+CFLAGS_vgettimeofday.o = -O2
+
+# Disable gcov profiling for VDSO code
+GCOV_PROFILE := n
+
+# Force dependency
+$(obj)/vdso.o : $(obj)/vdso.so
+
+# Link rule for the .so file
+$(obj)/vdso.so.raw: $(src)/vdso.lds $(obj-vdso) FORCE
+	$(call if_changed,vdsold)
+
+$(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/vdsomunge FORCE
+	$(call if_changed,vdsomunge)
+
+# Strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+	$(call if_changed,objcopy)
+
+# Actual build commands
+quiet_cmd_vdsold = VDSO    $@
+      cmd_vdsold = $(CC) $(c_flags) -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) \
+                   $(call cc-ldoption, -Wl$(comma)--build-id) \
+                   -Wl,-Bsymbolic -Wl,-z,max-page-size=4096 \
+                   -Wl,-z,common-page-size=4096 -o $@
+
+quiet_cmd_vdsomunge = MUNGE   $@
+      cmd_vdsomunge = $(objtree)/$(obj)/vdsomunge $< $@
+
+#
+# Install the unstripped copy of vdso.so.dbg.  If our toolchain
+# supports build-id, install .build-id links as well.
+#
+# Cribbed from arch/x86/vdso/Makefile.
+#
+quiet_cmd_vdso_install = INSTALL $<
+define cmd_vdso_install
+	cp $< "$(MODLIB)/vdso/vdso.so"; \
+	if readelf -n $< | grep -q 'Build ID'; then \
+	  buildid=`readelf -n $< |grep 'Build ID' |sed -e 's/^.*Build ID: \(.*\)$$/\1/'`; \
+	  first=`echo $$buildid | cut -b-2`; \
+	  last=`echo $$buildid | cut -b3-`; \
+	  mkdir -p "$(MODLIB)/vdso/.build-id/$$first"; \
+	  ln -sf "../../vdso.so" "$(MODLIB)/vdso/.build-id/$$first/$$last.debug"; \
+	fi
+endef
+
+$(MODLIB)/vdso: FORCE
+	@mkdir -p $(MODLIB)/vdso
+
+PHONY += vdso_install
+vdso_install: $(obj)/vdso.so.dbg $(MODLIB)/vdso FORCE
+	$(call cmd,vdso_install)
diff --git a/arch/arm/vdso/datapage.S b/arch/arm/vdso/datapage.S
new file mode 100644
index 000000000000..a2e60367931b
--- /dev/null
+++ b/arch/arm/vdso/datapage.S
@@ -0,0 +1,15 @@
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+
+	.align 2
+.L_vdso_data_ptr:
+	.long	_start - . - VDSO_DATA_SIZE
+
+ENTRY(__get_datapage)
+	.fnstart
+	adr	r0, .L_vdso_data_ptr
+	ldr	r1, [r0]
+	add	r0, r0, r1
+	bx	lr
+	.fnend
+ENDPROC(__get_datapage)
diff --git a/arch/arm64/kernel/cputable.c b/arch/arm/vdso/vdso.S
index fd3993cb060f..b2b97e3e7bab 100644
--- a/arch/arm64/kernel/cputable.c
+++ b/arch/arm/vdso/vdso.S
@@ -1,9 +1,9 @@
 /*
- * arch/arm64/kernel/cputable.c
+ * Adapted from arm64 version.
  *
- * Copyright (C) 2012 ARM Ltd.
+ * Copyright (C) 2012 ARM Limited
  *
- * This program is free software: you can redistribute it and/or modify
+ * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  *
@@ -14,20 +14,22 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
  */
 
 #include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/const.h>
+#include <asm/page.h>
 
-#include <asm/cputable.h>
+	__PAGE_ALIGNED_DATA
 
-extern unsigned long __cpu_setup(void);
+	.globl vdso_start, vdso_end
+	.balign PAGE_SIZE
+vdso_start:
+	.incbin "arch/arm/vdso/vdso.so"
+	.balign PAGE_SIZE
+vdso_end:
 
-struct cpu_info cpu_table[] = {
-	{
-		.cpu_id_val	= 0x000f0000,
-		.cpu_id_mask	= 0x000f0000,
-		.cpu_name	= "AArch64 Processor",
-		.cpu_setup	= __cpu_setup,
-	},
-	{ /* Empty */ },
-};
+	.previous
diff --git a/arch/arm/vdso/vdso.lds.S b/arch/arm/vdso/vdso.lds.S
new file mode 100644
index 000000000000..89ca89f12d23
--- /dev/null
+++ b/arch/arm/vdso/vdso.lds.S
@@ -0,0 +1,87 @@
+/*
+ * Adapted from arm64 version.
+ *
+ * GNU linker script for the VDSO library.
+ *
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ * Heavily based on the vDSO linker scripts for other archs.
+ */
+
+#include <linux/const.h>
+#include <asm/page.h>
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm")
+OUTPUT_ARCH(arm)
+
+SECTIONS
+{
+	PROVIDE(_start = .);
+
+	. = SIZEOF_HEADERS;
+
+	.hash		: { *(.hash) }			:text
+	.gnu.hash	: { *(.gnu.hash) }
+	.dynsym		: { *(.dynsym) }
+	.dynstr		: { *(.dynstr) }
+	.gnu.version	: { *(.gnu.version) }
+	.gnu.version_d	: { *(.gnu.version_d) }
+	.gnu.version_r	: { *(.gnu.version_r) }
+
+	.note		: { *(.note.*) }		:text	:note
+
+
+	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
+	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
+
+	.dynamic	: { *(.dynamic) }		:text	:dynamic
+
+	.rodata		: { *(.rodata*) }		:text
+
+	.text		: { *(.text*) }			:text	=0xe7f001f2
+
+	.got		: { *(.got) }
+	.rel.plt	: { *(.rel.plt) }
+
+	/DISCARD/	: {
+		*(.note.GNU-stack)
+		*(.data .data.* .gnu.linkonce.d.* .sdata*)
+		*(.bss .sbss .dynbss .dynsbss)
+	}
+}
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+	text		PT_LOAD		FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
+	dynamic		PT_DYNAMIC	FLAGS(4);		/* PF_R */
+	note		PT_NOTE		FLAGS(4);		/* PF_R */
+	eh_frame_hdr	PT_GNU_EH_FRAME;
+}
+
+VERSION
+{
+	LINUX_2.6 {
+	global:
+		__vdso_clock_gettime;
+		__vdso_gettimeofday;
+	local: *;
+	};
+}
diff --git a/arch/arm/vdso/vdsomunge.c b/arch/arm/vdso/vdsomunge.c
new file mode 100644
index 000000000000..9005b07296c8
--- /dev/null
+++ b/arch/arm/vdso/vdsomunge.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright 2015 Mentor Graphics Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * vdsomunge - Host program which produces a shared object
+ * architecturally specified to be usable by both soft- and hard-float
+ * programs.
+ *
+ * The Procedure Call Standard for the ARM Architecture (ARM IHI
+ * 0042E) says:
+ *
+ *	6.4.1 VFP and Base Standard Compatibility
+ *
+ *	Code compiled for the VFP calling standard is compatible with
+ *	the base standard (and vice-versa) if no floating-point or
+ *	containerized vector arguments or results are used.
+ *
+ * And ELF for the ARM Architecture (ARM IHI 0044E) (Table 4-2) says:
+ *
+ *	If both EF_ARM_ABI_FLOAT_XXXX bits are clear, conformance to the
+ *	base procedure-call standard is implied.
+ *
+ * The VDSO is built with -msoft-float, as with the rest of the ARM
+ * kernel, and uses no floating point arguments or results.  The build
+ * process will produce a shared object that may or may not have the
+ * EF_ARM_ABI_FLOAT_SOFT flag set (it seems to depend on the binutils
+ * version; binutils starting with 2.24 appears to set it).  The
+ * EF_ARM_ABI_FLOAT_HARD flag should definitely not be set, and this
+ * program will error out if it is.
+ *
+ * If the soft-float flag is set, this program clears it.  That's all
+ * it does.
+ */
+
+#define _GNU_SOURCE
+
+#include <byteswap.h>
+#include <elf.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define HOST_ORDER ELFDATA2LSB
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define HOST_ORDER ELFDATA2MSB
+#endif
+
+/* Some of the ELF constants we'd like to use were added to <elf.h>
+ * relatively recently.
+ */
+#ifndef EF_ARM_EABI_VER5
+#define EF_ARM_EABI_VER5 0x05000000
+#endif
+
+#ifndef EF_ARM_ABI_FLOAT_SOFT
+#define EF_ARM_ABI_FLOAT_SOFT 0x200
+#endif
+
+#ifndef EF_ARM_ABI_FLOAT_HARD
+#define EF_ARM_ABI_FLOAT_HARD 0x400
+#endif
+
+static const char *outfile;
+
+static void cleanup(void)
+{
+	if (error_message_count > 0 && outfile != NULL)
+		unlink(outfile);
+}
+
+static Elf32_Word read_elf_word(Elf32_Word word, bool swap)
+{
+	return swap ? bswap_32(word) : word;
+}
+
+static Elf32_Half read_elf_half(Elf32_Half half, bool swap)
+{
+	return swap ? bswap_16(half) : half;
+}
+
+static void write_elf_word(Elf32_Word val, Elf32_Word *dst, bool swap)
+{
+	*dst = swap ? bswap_32(val) : val;
+}
+
+int main(int argc, char **argv)
+{
+	const Elf32_Ehdr *inhdr;
+	bool clear_soft_float;
+	const char *infile;
+	Elf32_Word e_flags;
+	const void *inbuf;
+	struct stat stat;
+	void *outbuf;
+	bool swap;
+	int outfd;
+	int infd;
+
+	atexit(cleanup);
+
+	if (argc != 3)
+		error(EXIT_FAILURE, 0, "Usage: %s [infile] [outfile]", argv[0]);
+
+	infile = argv[1];
+	outfile = argv[2];
+
+	infd = open(infile, O_RDONLY);
+	if (infd < 0)
+		error(EXIT_FAILURE, errno, "Cannot open %s", infile);
+
+	if (fstat(infd, &stat) != 0)
+		error(EXIT_FAILURE, errno, "Failed stat for %s", infile);
+
+	inbuf = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, infd, 0);
+	if (inbuf == MAP_FAILED)
+		error(EXIT_FAILURE, errno, "Failed to map %s", infile);
+
+	close(infd);
+
+	inhdr = inbuf;
+
+	if (memcmp(&inhdr->e_ident, ELFMAG, SELFMAG) != 0)
+		error(EXIT_FAILURE, 0, "Not an ELF file");
+
+	if (inhdr->e_ident[EI_CLASS] != ELFCLASS32)
+		error(EXIT_FAILURE, 0, "Unsupported ELF class");
+
+	swap = inhdr->e_ident[EI_DATA] != HOST_ORDER;
+
+	if (read_elf_half(inhdr->e_type, swap) != ET_DYN)
+		error(EXIT_FAILURE, 0, "Not a shared object");
+
+	if (read_elf_half(inhdr->e_machine, swap) != EM_ARM) {
+		error(EXIT_FAILURE, 0, "Unsupported architecture %#x",
+		      inhdr->e_machine);
+	}
+
+	e_flags = read_elf_word(inhdr->e_flags, swap);
+
+	if (EF_ARM_EABI_VERSION(e_flags) != EF_ARM_EABI_VER5) {
+		error(EXIT_FAILURE, 0, "Unsupported EABI version %#x",
+		      EF_ARM_EABI_VERSION(e_flags));
+	}
+
+	if (e_flags & EF_ARM_ABI_FLOAT_HARD)
+		error(EXIT_FAILURE, 0,
+		      "Unexpected hard-float flag set in e_flags");
+
+	clear_soft_float = !!(e_flags & EF_ARM_ABI_FLOAT_SOFT);
+
+	outfd = open(outfile, O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
+	if (outfd < 0)
+		error(EXIT_FAILURE, errno, "Cannot open %s", outfile);
+
+	if (ftruncate(outfd, stat.st_size) != 0)
+		error(EXIT_FAILURE, errno, "Cannot truncate %s", outfile);
+
+	outbuf = mmap(NULL, stat.st_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+		      outfd, 0);
+	if (outbuf == MAP_FAILED)
+		error(EXIT_FAILURE, errno, "Failed to map %s", outfile);
+
+	close(outfd);
+
+	memcpy(outbuf, inbuf, stat.st_size);
+
+	if (clear_soft_float) {
+		Elf32_Ehdr *outhdr;
+
+		outhdr = outbuf;
+		e_flags &= ~EF_ARM_ABI_FLOAT_SOFT;
+		write_elf_word(e_flags, &outhdr->e_flags, swap);
+	}
+
+	if (msync(outbuf, stat.st_size, MS_SYNC) != 0)
+		error(EXIT_FAILURE, errno, "Failed to sync %s", outfile);
+
+	return EXIT_SUCCESS;
+}
diff --git a/arch/arm/vdso/vgettimeofday.c b/arch/arm/vdso/vgettimeofday.c
new file mode 100644
index 000000000000..79214d5ff097
--- /dev/null
+++ b/arch/arm/vdso/vgettimeofday.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright 2015 Mentor Graphics Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/hrtimer.h>
+#include <linux/time.h>
+#include <asm/arch_timer.h>
+#include <asm/barrier.h>
+#include <asm/bug.h>
+#include <asm/page.h>
+#include <asm/unistd.h>
+#include <asm/vdso_datapage.h>
+
+#ifndef CONFIG_AEABI
+#error This code depends on AEABI system call conventions
+#endif
+
+extern struct vdso_data *__get_datapage(void);
+
+static notrace u32 __vdso_read_begin(const struct vdso_data *vdata)
+{
+	u32 seq;
+repeat:
+	seq = ACCESS_ONCE(vdata->seq_count);
+	if (seq & 1) {
+		cpu_relax();
+		goto repeat;
+	}
+	return seq;
+}
+
+static notrace u32 vdso_read_begin(const struct vdso_data *vdata)
+{
+	u32 seq;
+
+	seq = __vdso_read_begin(vdata);
+
+	smp_rmb(); /* Pairs with smp_wmb in vdso_write_end */
+	return seq;
+}
+
+static notrace int vdso_read_retry(const struct vdso_data *vdata, u32 start)
+{
+	smp_rmb(); /* Pairs with smp_wmb in vdso_write_begin */
+	return vdata->seq_count != start;
+}
+
+static notrace long clock_gettime_fallback(clockid_t _clkid,
+					   struct timespec *_ts)
+{
+	register struct timespec *ts asm("r1") = _ts;
+	register clockid_t clkid asm("r0") = _clkid;
+	register long ret asm ("r0");
+	register long nr asm("r7") = __NR_clock_gettime;
+
+	asm volatile(
+	"	swi #0\n"
+	: "=r" (ret)
+	: "r" (clkid), "r" (ts), "r" (nr)
+	: "memory");
+
+	return ret;
+}
+
+static notrace int do_realtime_coarse(struct timespec *ts,
+				      struct vdso_data *vdata)
+{
+	u32 seq;
+
+	do {
+		seq = vdso_read_begin(vdata);
+
+		ts->tv_sec = vdata->xtime_coarse_sec;
+		ts->tv_nsec = vdata->xtime_coarse_nsec;
+
+	} while (vdso_read_retry(vdata, seq));
+
+	return 0;
+}
+
+static notrace int do_monotonic_coarse(struct timespec *ts,
+				       struct vdso_data *vdata)
+{
+	struct timespec tomono;
+	u32 seq;
+
+	do {
+		seq = vdso_read_begin(vdata);
+
+		ts->tv_sec = vdata->xtime_coarse_sec;
+		ts->tv_nsec = vdata->xtime_coarse_nsec;
+
+		tomono.tv_sec = vdata->wtm_clock_sec;
+		tomono.tv_nsec = vdata->wtm_clock_nsec;
+
+	} while (vdso_read_retry(vdata, seq));
+
+	ts->tv_sec += tomono.tv_sec;
+	timespec_add_ns(ts, tomono.tv_nsec);
+
+	return 0;
+}
+
+#ifdef CONFIG_ARM_ARCH_TIMER
+
+static notrace u64 get_ns(struct vdso_data *vdata)
+{
+	u64 cycle_delta;
+	u64 cycle_now;
+	u64 nsec;
+
+	cycle_now = arch_counter_get_cntvct();
+
+	cycle_delta = (cycle_now - vdata->cs_cycle_last) & vdata->cs_mask;
+
+	nsec = (cycle_delta * vdata->cs_mult) + vdata->xtime_clock_snsec;
+	nsec >>= vdata->cs_shift;
+
+	return nsec;
+}
+
+static notrace int do_realtime(struct timespec *ts, struct vdso_data *vdata)
+{
+	u64 nsecs;
+	u32 seq;
+
+	do {
+		seq = vdso_read_begin(vdata);
+
+		if (!vdata->tk_is_cntvct)
+			return -1;
+
+		ts->tv_sec = vdata->xtime_clock_sec;
+		nsecs = get_ns(vdata);
+
+	} while (vdso_read_retry(vdata, seq));
+
+	ts->tv_nsec = 0;
+	timespec_add_ns(ts, nsecs);
+
+	return 0;
+}
+
+static notrace int do_monotonic(struct timespec *ts, struct vdso_data *vdata)
+{
+	struct timespec tomono;
+	u64 nsecs;
+	u32 seq;
+
+	do {
+		seq = vdso_read_begin(vdata);
+
+		if (!vdata->tk_is_cntvct)
+			return -1;
+
+		ts->tv_sec = vdata->xtime_clock_sec;
+		nsecs = get_ns(vdata);
+
+		tomono.tv_sec = vdata->wtm_clock_sec;
+		tomono.tv_nsec = vdata->wtm_clock_nsec;
+
+	} while (vdso_read_retry(vdata, seq));
+
+	ts->tv_sec += tomono.tv_sec;
+	ts->tv_nsec = 0;
+	timespec_add_ns(ts, nsecs + tomono.tv_nsec);
+
+	return 0;
+}
+
+#else /* CONFIG_ARM_ARCH_TIMER */
+
+static notrace int do_realtime(struct timespec *ts, struct vdso_data *vdata)
+{
+	return -1;
+}
+
+static notrace int do_monotonic(struct timespec *ts, struct vdso_data *vdata)
+{
+	return -1;
+}
+
+#endif /* CONFIG_ARM_ARCH_TIMER */
+
+notrace int __vdso_clock_gettime(clockid_t clkid, struct timespec *ts)
+{
+	struct vdso_data *vdata;
+	int ret = -1;
+
+	vdata = __get_datapage();
+
+	switch (clkid) {
+	case CLOCK_REALTIME_COARSE:
+		ret = do_realtime_coarse(ts, vdata);
+		break;
+	case CLOCK_MONOTONIC_COARSE:
+		ret = do_monotonic_coarse(ts, vdata);
+		break;
+	case CLOCK_REALTIME:
+		ret = do_realtime(ts, vdata);
+		break;
+	case CLOCK_MONOTONIC:
+		ret = do_monotonic(ts, vdata);
+		break;
+	default:
+		break;
+	}
+
+	if (ret)
+		ret = clock_gettime_fallback(clkid, ts);
+
+	return ret;
+}
+
+static notrace long gettimeofday_fallback(struct timeval *_tv,
+					  struct timezone *_tz)
+{
+	register struct timezone *tz asm("r1") = _tz;
+	register struct timeval *tv asm("r0") = _tv;
+	register long ret asm ("r0");
+	register long nr asm("r7") = __NR_gettimeofday;
+
+	asm volatile(
+	"	swi #0\n"
+	: "=r" (ret)
+	: "r" (tv), "r" (tz), "r" (nr)
+	: "memory");
+
+	return ret;
+}
+
+notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+	struct timespec ts;
+	struct vdso_data *vdata;
+	int ret;
+
+	vdata = __get_datapage();
+
+	ret = do_realtime(&ts, vdata);
+	if (ret)
+		return gettimeofday_fallback(tv, tz);
+
+	if (tv) {
+		tv->tv_sec = ts.tv_sec;
+		tv->tv_usec = ts.tv_nsec / 1000;
+	}
+	if (tz) {
+		tz->tz_minuteswest = vdata->tz_minuteswest;
+		tz->tz_dsttime = vdata->tz_dsttime;
+	}
+
+	return ret;
+}
+
+/* Avoid unresolved references emitted by GCC */
+
+void __aeabi_unwind_cpp_pr0(void)
+{
+}
+
+void __aeabi_unwind_cpp_pr1(void)
+{
+}
+
+void __aeabi_unwind_cpp_pr2(void)
+{
+}
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 263a2044c65b..224081ccc92f 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -53,105 +53,33 @@ EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
 
 static __read_mostly int xen_events_irq = -1;
 
-/* map fgmfn of domid to lpfn in the current domain */
-static int map_foreign_page(unsigned long lpfn, unsigned long fgmfn,
-			    unsigned int domid)
+int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
+			       unsigned long addr,
+			       xen_pfn_t *mfn, int nr,
+			       int *err_ptr, pgprot_t prot,
+			       unsigned domid,
+			       struct page **pages)
 {
-	int rc;
-	struct xen_add_to_physmap_range xatp = {
-		.domid = DOMID_SELF,
-		.foreign_domid = domid,
-		.size = 1,
-		.space = XENMAPSPACE_gmfn_foreign,
-	};
-	xen_ulong_t idx = fgmfn;
-	xen_pfn_t gpfn = lpfn;
-	int err = 0;
-
-	set_xen_guest_handle(xatp.idxs, &idx);
-	set_xen_guest_handle(xatp.gpfns, &gpfn);
-	set_xen_guest_handle(xatp.errs, &err);
-
-	rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp);
-	if (rc || err) {
-		pr_warn("Failed to map pfn to mfn rc:%d:%d pfn:%lx mfn:%lx\n",
-			rc, err, lpfn, fgmfn);
-		return 1;
-	}
-	return 0;
-}
-
-struct remap_data {
-	xen_pfn_t fgmfn; /* foreign domain's gmfn */
-	pgprot_t prot;
-	domid_t  domid;
-	struct vm_area_struct *vma;
-	int index;
-	struct page **pages;
-	struct xen_remap_mfn_info *info;
-};
-
-static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
-			void *data)
-{
-	struct remap_data *info = data;
-	struct page *page = info->pages[info->index++];
-	unsigned long pfn = page_to_pfn(page);
-	pte_t pte = pte_mkspecial(pfn_pte(pfn, info->prot));
-
-	if (map_foreign_page(pfn, info->fgmfn, info->domid))
-		return -EFAULT;
-	set_pte_at(info->vma->vm_mm, addr, ptep, pte);
-
-	return 0;
+	return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr,
+					 prot, domid, pages);
 }
+EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
 
+/* Not used by XENFEAT_auto_translated guests. */
 int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
-			       unsigned long addr,
-			       xen_pfn_t mfn, int nr,
-			       pgprot_t prot, unsigned domid,
-			       struct page **pages)
+                              unsigned long addr,
+                              xen_pfn_t mfn, int nr,
+                              pgprot_t prot, unsigned domid,
+                              struct page **pages)
 {
-	int err;
-	struct remap_data data;
-
-	/* TBD: Batching, current sole caller only does page at a time */
-	if (nr > 1)
-		return -EINVAL;
-
-	data.fgmfn = mfn;
-	data.prot = prot;
-	data.domid = domid;
-	data.vma = vma;
-	data.index = 0;
-	data.pages = pages;
-	err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
-				  remap_pte_fn, &data);
-	return err;
+	return -ENOSYS;
 }
 EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
 
 int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
 			       int nr, struct page **pages)
 {
-	int i;
-
-	for (i = 0; i < nr; i++) {
-		struct xen_remove_from_physmap xrp;
-		unsigned long rc, pfn;
-
-		pfn = page_to_pfn(pages[i]);
-
-		xrp.domid = DOMID_SELF;
-		xrp.gpfn = pfn;
-		rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp);
-		if (rc) {
-			pr_warn("Failed to unmap pfn:%lx rc:%ld\n",
-				pfn, rc);
-			return rc;
-		}
-	}
-	return 0;
+	return xen_xlate_unmap_gfn_range(vma, nr, pages);
 }
 EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range);
 
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1b8e97331ffb..b8d96f1554af 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1,7 +1,7 @@
 config ARM64
 	def_bool y
-	select ARCH_BINFMT_ELF_RANDOMIZE_PIE
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_SG_CHAIN
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
@@ -143,6 +143,13 @@ config KERNEL_MODE_NEON
 config FIX_EARLYCON_MEM
 	def_bool y
 
+config PGTABLE_LEVELS
+	int
+	default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42
+	default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48
+	default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39
+	default 4 if ARM64_4K_PAGES && ARM64_VA_BITS_48
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
@@ -361,6 +368,27 @@ config ARM64_ERRATUM_832075
 
 	  If unsure, say Y.
 
+config ARM64_ERRATUM_845719
+	bool "Cortex-A53: 845719: a load might read incorrect data"
+	depends on COMPAT
+	default y
+	help
+	  This option adds an alternative code sequence to work around ARM
+	  erratum 845719 on Cortex-A53 parts up to r0p4.
+
+	  When running a compat (AArch32) userspace on an affected Cortex-A53
+	  part, a load at EL0 from a virtual address that matches the bottom 32
+	  bits of the virtual address used by a recent load at (AArch64) EL1
+	  might return incorrect data.
+
+	  The workaround is to write the contextidr_el1 register on exception
+	  return to a 32-bit task.
+	  Please note that this does not necessarily enable the workaround,
+	  as it depends on the alternative framework, which will only patch
+	  the kernel if an affected CPU is detected.
+
+	  If unsure, say Y.
+
 endmenu
 
 
@@ -413,13 +441,6 @@ config ARM64_VA_BITS
 	default 42 if ARM64_VA_BITS_42
 	default 48 if ARM64_VA_BITS_48
 
-config ARM64_PGTABLE_LEVELS
-	int
-	default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42
-	default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48
-	default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39
-	default 4 if ARM64_4K_PAGES && ARM64_VA_BITS_48
-
 config CPU_BIG_ENDIAN
        bool "Build big-endian kernel"
        help
@@ -455,8 +476,8 @@ config SCHED_SMT
 	  places. If unsure say N here.
 
 config NR_CPUS
-	int "Maximum number of CPUs (2-64)"
-	range 2 64
+	int "Maximum number of CPUs (2-4096)"
+	range 2 4096
 	depends on SMP
 	# These have to remain sorted largest to smallest
 	default "64"
@@ -470,6 +491,10 @@ config HOTPLUG_CPU
 
 source kernel/Kconfig.preempt
 
+config UP_LATE_INIT
+       def_bool y
+       depends on !SMP
+
 config HZ
 	int
 	default 100
@@ -670,7 +695,7 @@ source "fs/Kconfig.binfmt"
 
 config COMPAT
 	bool "Kernel support for 32-bit EL0"
-	depends on !ARM64_64K_PAGES
+	depends on !ARM64_64K_PAGES || EXPERT
 	select COMPAT_BINFMT_ELF
 	select HAVE_UID16
 	select OLD_SIGSUSPEND3
@@ -681,6 +706,10 @@ config COMPAT
 	  the user helper functions, VFP support and the ptrace interface are
 	  handled appropriately by the kernel.
 
+	  If you also enabled CONFIG_ARM64_64K_PAGES, please be aware that you
+	  will only be able to execute AArch32 binaries that were compiled with
+	  64k aligned segments.
+
 	  If you want to execute 32-bit userspace applications, say Y.
 
 config SYSVIPC_COMPAT
diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index 4a8741073c90..d6285ef9b5f9 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -89,4 +89,6 @@ config DEBUG_ALIGN_RODATA
 
 	  If in doubt, say N
 
+source "drivers/hwtracing/coresight/Kconfig"
+
 endmenu
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 69ceedc982a5..4d2a925998f9 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -48,7 +48,7 @@ core-$(CONFIG_KVM) += arch/arm64/kvm/
 core-$(CONFIG_XEN) += arch/arm64/xen/
 core-$(CONFIG_CRYPTO) += arch/arm64/crypto/
 libs-y		:= arch/arm64/lib/ $(libs-y)
-libs-$(CONFIG_EFI_STUB) += drivers/firmware/efi/libstub/
+core-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
 
 # Default target when executing plain make
 KBUILD_IMAGE	:= Image.gz
diff --git a/arch/arm64/boot/dts/apm/apm-mustang.dts b/arch/arm64/boot/dts/apm/apm-mustang.dts
index 2e25de0800b9..83578e766b94 100644
--- a/arch/arm64/boot/dts/apm/apm-mustang.dts
+++ b/arch/arm64/boot/dts/apm/apm-mustang.dts
@@ -45,6 +45,10 @@
 	status = "ok";
 };
 
+&sgenet1 {
+	status = "ok";
+};
+
 &xgenet {
 	status = "ok";
 };
diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi
index f1ad9c2ab2e9..e74f6e0a208c 100644
--- a/arch/arm64/boot/dts/apm/apm-storm.dtsi
+++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi
@@ -186,6 +186,16 @@
 				clock-output-names = "sge0clk";
 			};
 
+			sge1clk: sge1clk@1f21c000 {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f21c000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				csr-mask = <0xc>;
+				clock-output-names = "sge1clk";
+			};
+
 			xge0clk: xge0clk@1f61c000 {
 				compatible = "apm,xgene-device-clock";
 				#clock-cells = <1>;
@@ -622,27 +632,45 @@
 		};
 
 		sgenet0: ethernet@1f210000 {
-			compatible = "apm,xgene-enet";
+			compatible = "apm,xgene1-sgenet";
 			status = "disabled";
 			reg = <0x0 0x1f210000 0x0 0xd100>,
 			      <0x0 0x1f200000 0x0 0Xc300>,
 			      <0x0 0x1B000000 0x0 0X200>;
 			reg-names = "enet_csr", "ring_csr", "ring_cmd";
-			interrupts = <0x0 0xA0 0x4>;
+			interrupts = <0x0 0xA0 0x4>,
+				     <0x0 0xA1 0x4>;
 			dma-coherent;
 			clocks = <&sge0clk 0>;
 			local-mac-address = [00 00 00 00 00 00];
 			phy-connection-type = "sgmii";
 		};
 
+		sgenet1: ethernet@1f210030 {
+			compatible = "apm,xgene1-sgenet";
+			status = "disabled";
+			reg = <0x0 0x1f210030 0x0 0xd100>,
+			      <0x0 0x1f200000 0x0 0Xc300>,
+			      <0x0 0x1B000000 0x0 0X8000>;
+			reg-names = "enet_csr", "ring_csr", "ring_cmd";
+			interrupts = <0x0 0xAC 0x4>,
+				     <0x0 0xAD 0x4>;
+			port-id = <1>;
+			dma-coherent;
+			clocks = <&sge1clk 0>;
+			local-mac-address = [00 00 00 00 00 00];
+			phy-connection-type = "sgmii";
+		};
+
 		xgenet: ethernet@1f610000 {
-			compatible = "apm,xgene-enet";
+			compatible = "apm,xgene1-xgenet";
 			status = "disabled";
 			reg = <0x0 0x1f610000 0x0 0xd100>,
 			      <0x0 0x1f600000 0x0 0Xc300>,
 			      <0x0 0x18000000 0x0 0X200>;
 			reg-names = "enet_csr", "ring_csr", "ring_cmd";
-			interrupts = <0x0 0x60 0x4>;
+			interrupts = <0x0 0x60 0x4>,
+				     <0x0 0x61 0x4>;
 			dma-coherent;
 			clocks = <&xge0clk 0>;
 			/* mac address will be overwritten by the bootloader */
diff --git a/arch/arm64/boot/dts/arm/juno-clocks.dtsi b/arch/arm64/boot/dts/arm/juno-clocks.dtsi
index ea2b5666a16f..c9b89efe0f56 100644
--- a/arch/arm64/boot/dts/arm/juno-clocks.dtsi
+++ b/arch/arm64/boot/dts/arm/juno-clocks.dtsi
@@ -8,7 +8,7 @@
  */
 
 	/* SoC fixed clocks */
-	soc_uartclk: refclk72738khz {
+	soc_uartclk: refclk7273800hz {
 		compatible = "fixed-clock";
 		#clock-cells = <0>;
 		clock-frequency = <7273800>;
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index be1f12a5a5f0..4e03d8dd23f6 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -31,8 +31,12 @@ CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
+CONFIG_ARCH_EXYNOS7=y
 CONFIG_ARCH_FSL_LS2085A=y
 CONFIG_ARCH_MEDIATEK=y
+CONFIG_ARCH_SEATTLE=y
+CONFIG_ARCH_TEGRA=y
+CONFIG_ARCH_TEGRA_132_SOC=y
 CONFIG_ARCH_THUNDER=y
 CONFIG_ARCH_VEXPRESS=y
 CONFIG_ARCH_XGENE=y
@@ -48,7 +52,7 @@ CONFIG_CMDLINE="console=ttyAMA0"
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
 CONFIG_CPU_IDLE=y
-CONFIG_ARM64_CPUIDLE=y
+CONFIG_ARM_CPUIDLE=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -62,6 +66,7 @@ CONFIG_BPF_JIT=y
 # CONFIG_WIRELESS is not set
 CONFIG_NET_9P=y
 CONFIG_NET_9P_VIRTIO=y
+# CONFIG_TEGRA_AHB is not set
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
@@ -81,6 +86,7 @@ CONFIG_NETDEVICES=y
 CONFIG_TUN=y
 CONFIG_VIRTIO_NET=y
 CONFIG_NET_XGENE=y
+CONFIG_SKY2=y
 CONFIG_SMC91X=y
 CONFIG_SMSC911X=y
 # CONFIG_WLAN is not set
@@ -100,6 +106,8 @@ CONFIG_SPI=y
 CONFIG_SPI_PL022=y
 CONFIG_GPIO_PL061=y
 CONFIG_GPIO_XGENE=y
+CONFIG_POWER_RESET_XGENE=y
+CONFIG_POWER_RESET_SYSCON=y
 # CONFIG_HWMON is not set
 CONFIG_REGULATOR=y
 CONFIG_REGULATOR_FIXED_VOLTAGE=y
@@ -112,10 +120,10 @@ CONFIG_LOGO=y
 CONFIG_USB=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_HCD_PLATFORM=y
-CONFIG_USB_ISP1760_HCD=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_OHCI_HCD_PLATFORM=y
 CONFIG_USB_STORAGE=y
+CONFIG_USB_ISP1760=y
 CONFIG_USB_ULPI=y
 CONFIG_MMC=y
 CONFIG_MMC_ARMMMCI=y
@@ -125,6 +133,7 @@ CONFIG_MMC_SPI=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_EFI=y
 CONFIG_RTC_DRV_XGENE=y
+CONFIG_VIRTIO_PCI=y
 CONFIG_VIRTIO_BALLOON=y
 CONFIG_VIRTIO_MMIO=y
 # CONFIG_IOMMU_SUPPORT is not set
@@ -143,8 +152,10 @@ CONFIG_CUSE=y
 CONFIG_VFAT_FS=y
 CONFIG_TMPFS=y
 CONFIG_HUGETLBFS=y
+CONFIG_EFIVAR_FS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
 CONFIG_9P_FS=y
 CONFIG_NLS_CODEPAGE_437=y
@@ -159,7 +170,6 @@ CONFIG_LOCKUP_DETECTOR=y
 # CONFIG_SCHED_DEBUG is not set
 # CONFIG_DEBUG_PREEMPT is not set
 # CONFIG_FTRACE is not set
-CONFIG_KEYS=y
 CONFIG_SECURITY=y
 CONFIG_CRYPTO_ANSI_CPRNG=y
 CONFIG_ARM64_CRYPTO=y
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index 432e4841cd81..a2a7fbcacc14 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -101,19 +101,19 @@ ENTRY(ce_aes_ccm_final)
 0:	mov	v4.16b, v3.16b
 1:	ld1	{v5.2d}, [x2], #16		/* load next round key */
 	aese	v0.16b, v4.16b
-	aese	v1.16b, v4.16b
 	aesmc	v0.16b, v0.16b
+	aese	v1.16b, v4.16b
 	aesmc	v1.16b, v1.16b
 2:	ld1	{v3.2d}, [x2], #16		/* load next round key */
 	aese	v0.16b, v5.16b
-	aese	v1.16b, v5.16b
 	aesmc	v0.16b, v0.16b
+	aese	v1.16b, v5.16b
 	aesmc	v1.16b, v1.16b
 3:	ld1	{v4.2d}, [x2], #16		/* load next round key */
 	subs	w3, w3, #3
 	aese	v0.16b, v3.16b
-	aese	v1.16b, v3.16b
 	aesmc	v0.16b, v0.16b
+	aese	v1.16b, v3.16b
 	aesmc	v1.16b, v1.16b
 	bpl	1b
 	aese	v0.16b, v4.16b
@@ -146,19 +146,19 @@ ENDPROC(ce_aes_ccm_final)
 	ld1	{v5.2d}, [x10], #16		/* load 2nd round key */
 2:	/* inner loop: 3 rounds, 2x interleaved */
 	aese	v0.16b, v4.16b
-	aese	v1.16b, v4.16b
 	aesmc	v0.16b, v0.16b
+	aese	v1.16b, v4.16b
 	aesmc	v1.16b, v1.16b
 3:	ld1	{v3.2d}, [x10], #16		/* load next round key */
 	aese	v0.16b, v5.16b
-	aese	v1.16b, v5.16b
 	aesmc	v0.16b, v0.16b
+	aese	v1.16b, v5.16b
 	aesmc	v1.16b, v1.16b
 4:	ld1	{v4.2d}, [x10], #16		/* load next round key */
 	subs	w7, w7, #3
 	aese	v0.16b, v3.16b
-	aese	v1.16b, v3.16b
 	aesmc	v0.16b, v0.16b
+	aese	v1.16b, v3.16b
 	aesmc	v1.16b, v1.16b
 	ld1	{v5.2d}, [x10], #16		/* load next round key */
 	bpl	2b
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
index 685a18f731eb..78f3cfe92c08 100644
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -45,18 +45,14 @@
 
 	.macro		do_enc_Nx, de, mc, k, i0, i1, i2, i3
 	aes\de		\i0\().16b, \k\().16b
-	.ifnb		\i1
-	aes\de		\i1\().16b, \k\().16b
-	.ifnb		\i3
-	aes\de		\i2\().16b, \k\().16b
-	aes\de		\i3\().16b, \k\().16b
-	.endif
-	.endif
 	aes\mc		\i0\().16b, \i0\().16b
 	.ifnb		\i1
+	aes\de		\i1\().16b, \k\().16b
 	aes\mc		\i1\().16b, \i1\().16b
 	.ifnb		\i3
+	aes\de		\i2\().16b, \k\().16b
 	aes\mc		\i2\().16b, \i2\().16b
+	aes\de		\i3\().16b, \k\().16b
 	aes\mc		\i3\().16b, \i3\().16b
 	.endif
 	.endif
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index b1b5b893eb20..05d9e16c0dfd 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -284,7 +284,8 @@ static struct crypto_alg aes_algs[] = { {
 	.cra_name		= "__ecb-aes-" MODE,
 	.cra_driver_name	= "__driver-ecb-aes-" MODE,
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= AES_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
 	.cra_alignmask		= 7,
@@ -302,7 +303,8 @@ static struct crypto_alg aes_algs[] = { {
 	.cra_name		= "__cbc-aes-" MODE,
 	.cra_driver_name	= "__driver-cbc-aes-" MODE,
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= AES_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
 	.cra_alignmask		= 7,
@@ -320,7 +322,8 @@ static struct crypto_alg aes_algs[] = { {
 	.cra_name		= "__ctr-aes-" MODE,
 	.cra_driver_name	= "__driver-ctr-aes-" MODE,
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= 1,
 	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
 	.cra_alignmask		= 7,
@@ -338,7 +341,8 @@ static struct crypto_alg aes_algs[] = { {
 	.cra_name		= "__xts-aes-" MODE,
 	.cra_driver_name	= "__driver-xts-aes-" MODE,
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= AES_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct crypto_aes_xts_ctx),
 	.cra_alignmask		= 7,
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
index 09d57d98609c..033aae6d732a 100644
--- a/arch/arm64/crypto/sha1-ce-core.S
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -66,8 +66,8 @@
 	.word		0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
 
 	/*
-	 * void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
-	 * 			  u8 *head, long bytes)
+	 * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
+	 *			  int blocks)
 	 */
 ENTRY(sha1_ce_transform)
 	/* load round constants */
@@ -78,25 +78,22 @@ ENTRY(sha1_ce_transform)
 	ld1r		{k3.4s}, [x6]
 
 	/* load state */
-	ldr		dga, [x2]
-	ldr		dgb, [x2, #16]
+	ldr		dga, [x0]
+	ldr		dgb, [x0, #16]
 
-	/* load partial state (if supplied) */
-	cbz		x3, 0f
-	ld1		{v8.4s-v11.4s}, [x3]
-	b		1f
+	/* load sha1_ce_state::finalize */
+	ldr		w4, [x0, #:lo12:sha1_ce_offsetof_finalize]
 
 	/* load input */
 0:	ld1		{v8.4s-v11.4s}, [x1], #64
-	sub		w0, w0, #1
+	sub		w2, w2, #1
 
-1:
 CPU_LE(	rev32		v8.16b, v8.16b		)
 CPU_LE(	rev32		v9.16b, v9.16b		)
 CPU_LE(	rev32		v10.16b, v10.16b	)
 CPU_LE(	rev32		v11.16b, v11.16b	)
 
-2:	add		t0.4s, v8.4s, k0.4s
+1:	add		t0.4s, v8.4s, k0.4s
 	mov		dg0v.16b, dgav.16b
 
 	add_update	c, ev, k0,  8,  9, 10, 11, dgb
@@ -127,15 +124,15 @@ CPU_LE(	rev32		v11.16b, v11.16b	)
 	add		dgbv.2s, dgbv.2s, dg1v.2s
 	add		dgav.4s, dgav.4s, dg0v.4s
 
-	cbnz		w0, 0b
+	cbnz		w2, 0b
 
 	/*
 	 * Final block: add padding and total bit count.
-	 * Skip if we have no total byte count in x4. In that case, the input
-	 * size was not a round multiple of the block size, and the padding is
-	 * handled by the C code.
+	 * Skip if the input size was not a round multiple of the block size,
+	 * the padding is handled by the C code in that case.
 	 */
 	cbz		x4, 3f
+	ldr		x4, [x0, #:lo12:sha1_ce_offsetof_count]
 	movi		v9.2d, #0
 	mov		x8, #0x80000000
 	movi		v10.2d, #0
@@ -144,10 +141,10 @@ CPU_LE(	rev32		v11.16b, v11.16b	)
 	mov		x4, #0
 	mov		v11.d[0], xzr
 	mov		v11.d[1], x7
-	b		2b
+	b		1b
 
 	/* store new state */
-3:	str		dga, [x2]
-	str		dgb, [x2, #16]
+3:	str		dga, [x0]
+	str		dgb, [x0, #16]
 	ret
 ENDPROC(sha1_ce_transform)
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index 6fe83f37a750..114e7cc5de8c 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -12,144 +12,81 @@
 #include <asm/unaligned.h>
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
+#include <crypto/sha1_base.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
 #include <linux/module.h>
 
+#define ASM_EXPORT(sym, val) \
+	asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
+
 MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 
-asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
-				  u8 *head, long bytes);
+struct sha1_ce_state {
+	struct sha1_state	sst;
+	u32			finalize;
+};
 
-static int sha1_init(struct shash_desc *desc)
-{
-	struct sha1_state *sctx = shash_desc_ctx(desc);
+asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
+				  int blocks);
 
-	*sctx = (struct sha1_state){
-		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
-	};
-	return 0;
-}
-
-static int sha1_update(struct shash_desc *desc, const u8 *data,
-		       unsigned int len)
+static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
+			  unsigned int len)
 {
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-	unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
-
-	sctx->count += len;
-
-	if ((partial + len) >= SHA1_BLOCK_SIZE) {
-		int blocks;
-
-		if (partial) {
-			int p = SHA1_BLOCK_SIZE - partial;
+	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
 
-			memcpy(sctx->buffer + partial, data, p);
-			data += p;
-			len -= p;
-		}
-
-		blocks = len / SHA1_BLOCK_SIZE;
-		len %= SHA1_BLOCK_SIZE;
-
-		kernel_neon_begin_partial(16);
-		sha1_ce_transform(blocks, data, sctx->state,
-				  partial ? sctx->buffer : NULL, 0);
-		kernel_neon_end();
+	sctx->finalize = 0;
+	kernel_neon_begin_partial(16);
+	sha1_base_do_update(desc, data, len,
+			    (sha1_block_fn *)sha1_ce_transform);
+	kernel_neon_end();
 
-		data += blocks * SHA1_BLOCK_SIZE;
-		partial = 0;
-	}
-	if (len)
-		memcpy(sctx->buffer + partial, data, len);
 	return 0;
 }
 
-static int sha1_final(struct shash_desc *desc, u8 *out)
+static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
+			 unsigned int len, u8 *out)
 {
-	static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
+	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
+	bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
 
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-	__be64 bits = cpu_to_be64(sctx->count << 3);
-	__be32 *dst = (__be32 *)out;
-	int i;
-
-	u32 padlen = SHA1_BLOCK_SIZE
-		     - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE);
-
-	sha1_update(desc, padding, padlen);
-	sha1_update(desc, (const u8 *)&bits, sizeof(bits));
-
-	for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
-		put_unaligned_be32(sctx->state[i], dst++);
-
-	*sctx = (struct sha1_state){};
-	return 0;
-}
-
-static int sha1_finup(struct shash_desc *desc, const u8 *data,
-		      unsigned int len, u8 *out)
-{
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-	__be32 *dst = (__be32 *)out;
-	int blocks;
-	int i;
-
-	if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) {
-		sha1_update(desc, data, len);
-		return sha1_final(desc, out);
-	}
+	ASM_EXPORT(sha1_ce_offsetof_count,
+		   offsetof(struct sha1_ce_state, sst.count));
+	ASM_EXPORT(sha1_ce_offsetof_finalize,
+		   offsetof(struct sha1_ce_state, finalize));
 
 	/*
-	 * Use a fast path if the input is a multiple of 64 bytes. In
-	 * this case, there is no need to copy data around, and we can
-	 * perform the entire digest calculation in a single invocation
-	 * of sha1_ce_transform()
+	 * Allow the asm code to perform the finalization if there is no
+	 * partial data and the input is a round multiple of the block size.
 	 */
-	blocks = len / SHA1_BLOCK_SIZE;
+	sctx->finalize = finalize;
 
 	kernel_neon_begin_partial(16);
-	sha1_ce_transform(blocks, data, sctx->state, NULL, len);
+	sha1_base_do_update(desc, data, len,
+			    (sha1_block_fn *)sha1_ce_transform);
+	if (!finalize)
+		sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
 	kernel_neon_end();
-
-	for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
-		put_unaligned_be32(sctx->state[i], dst++);
-
-	*sctx = (struct sha1_state){};
-	return 0;
+	return sha1_base_finish(desc, out);
 }
 
-static int sha1_export(struct shash_desc *desc, void *out)
+static int sha1_ce_final(struct shash_desc *desc, u8 *out)
 {
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-	struct sha1_state *dst = out;
-
-	*dst = *sctx;
-	return 0;
-}
-
-static int sha1_import(struct shash_desc *desc, const void *in)
-{
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-	struct sha1_state const *src = in;
-
-	*sctx = *src;
-	return 0;
+	kernel_neon_begin_partial(16);
+	sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
+	kernel_neon_end();
+	return sha1_base_finish(desc, out);
 }
 
 static struct shash_alg alg = {
-	.init			= sha1_init,
-	.update			= sha1_update,
-	.final			= sha1_final,
-	.finup			= sha1_finup,
-	.export			= sha1_export,
-	.import			= sha1_import,
-	.descsize		= sizeof(struct sha1_state),
+	.init			= sha1_base_init,
+	.update			= sha1_ce_update,
+	.final			= sha1_ce_final,
+	.finup			= sha1_ce_finup,
+	.descsize		= sizeof(struct sha1_ce_state),
 	.digestsize		= SHA1_DIGEST_SIZE,
-	.statesize		= sizeof(struct sha1_state),
 	.base			= {
 		.cra_name		= "sha1",
 		.cra_driver_name	= "sha1-ce",
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
index 7f29fc031ea8..5df9d9d470ad 100644
--- a/arch/arm64/crypto/sha2-ce-core.S
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -73,8 +73,8 @@
 	.word		0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
 
 	/*
-	 * void sha2_ce_transform(int blocks, u8 const *src, u32 *state,
-	 *                        u8 *head, long bytes)
+	 * void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
+	 *			  int blocks)
 	 */
 ENTRY(sha2_ce_transform)
 	/* load round constants */
@@ -85,24 +85,21 @@ ENTRY(sha2_ce_transform)
 	ld1		{v12.4s-v15.4s}, [x8]
 
 	/* load state */
-	ldp		dga, dgb, [x2]
+	ldp		dga, dgb, [x0]
 
-	/* load partial input (if supplied) */
-	cbz		x3, 0f
-	ld1		{v16.4s-v19.4s}, [x3]
-	b		1f
+	/* load sha256_ce_state::finalize */
+	ldr		w4, [x0, #:lo12:sha256_ce_offsetof_finalize]
 
 	/* load input */
 0:	ld1		{v16.4s-v19.4s}, [x1], #64
-	sub		w0, w0, #1
+	sub		w2, w2, #1
 
-1:
 CPU_LE(	rev32		v16.16b, v16.16b	)
 CPU_LE(	rev32		v17.16b, v17.16b	)
 CPU_LE(	rev32		v18.16b, v18.16b	)
 CPU_LE(	rev32		v19.16b, v19.16b	)
 
-2:	add		t0.4s, v16.4s, v0.4s
+1:	add		t0.4s, v16.4s, v0.4s
 	mov		dg0v.16b, dgav.16b
 	mov		dg1v.16b, dgbv.16b
 
@@ -131,15 +128,15 @@ CPU_LE(	rev32		v19.16b, v19.16b	)
 	add		dgbv.4s, dgbv.4s, dg1v.4s
 
 	/* handled all input blocks? */
-	cbnz		w0, 0b
+	cbnz		w2, 0b
 
 	/*
 	 * Final block: add padding and total bit count.
-	 * Skip if we have no total byte count in x4. In that case, the input
-	 * size was not a round multiple of the block size, and the padding is
-	 * handled by the C code.
+	 * Skip if the input size was not a round multiple of the block size,
+	 * the padding is handled by the C code in that case.
 	 */
 	cbz		x4, 3f
+	ldr		x4, [x0, #:lo12:sha256_ce_offsetof_count]
 	movi		v17.2d, #0
 	mov		x8, #0x80000000
 	movi		v18.2d, #0
@@ -148,9 +145,9 @@ CPU_LE(	rev32		v19.16b, v19.16b	)
 	mov		x4, #0
 	mov		v19.d[0], xzr
 	mov		v19.d[1], x7
-	b		2b
+	b		1b
 
 	/* store new state */
-3:	stp		dga, dgb, [x2]
+3:	stp		dga, dgb, [x0]
 	ret
 ENDPROC(sha2_ce_transform)
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index ae67e88c28b9..1340e44c048b 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -12,206 +12,82 @@
 #include <asm/unaligned.h>
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
+#include <crypto/sha256_base.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
 #include <linux/module.h>
 
+#define ASM_EXPORT(sym, val) \
+	asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
+
 MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 
-asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state,
-				 u8 *head, long bytes);
-
-static int sha224_init(struct shash_desc *desc)
-{
-	struct sha256_state *sctx = shash_desc_ctx(desc);
-
-	*sctx = (struct sha256_state){
-		.state = {
-			SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
-			SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
-		}
-	};
-	return 0;
-}
-
-static int sha256_init(struct shash_desc *desc)
-{
-	struct sha256_state *sctx = shash_desc_ctx(desc);
-
-	*sctx = (struct sha256_state){
-		.state = {
-			SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
-			SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
-		}
-	};
-	return 0;
-}
-
-static int sha2_update(struct shash_desc *desc, const u8 *data,
-		       unsigned int len)
-{
-	struct sha256_state *sctx = shash_desc_ctx(desc);
-	unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
-
-	sctx->count += len;
-
-	if ((partial + len) >= SHA256_BLOCK_SIZE) {
-		int blocks;
-
-		if (partial) {
-			int p = SHA256_BLOCK_SIZE - partial;
-
-			memcpy(sctx->buf + partial, data, p);
-			data += p;
-			len -= p;
-		}
+struct sha256_ce_state {
+	struct sha256_state	sst;
+	u32			finalize;
+};
 
-		blocks = len / SHA256_BLOCK_SIZE;
-		len %= SHA256_BLOCK_SIZE;
+asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
+				  int blocks);
 
-		kernel_neon_begin_partial(28);
-		sha2_ce_transform(blocks, data, sctx->state,
-				  partial ? sctx->buf : NULL, 0);
-		kernel_neon_end();
-
-		data += blocks * SHA256_BLOCK_SIZE;
-		partial = 0;
-	}
-	if (len)
-		memcpy(sctx->buf + partial, data, len);
-	return 0;
-}
-
-static void sha2_final(struct shash_desc *desc)
+static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
+			    unsigned int len)
 {
-	static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
-
-	struct sha256_state *sctx = shash_desc_ctx(desc);
-	__be64 bits = cpu_to_be64(sctx->count << 3);
-	u32 padlen = SHA256_BLOCK_SIZE
-		     - ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE);
-
-	sha2_update(desc, padding, padlen);
-	sha2_update(desc, (const u8 *)&bits, sizeof(bits));
-}
-
-static int sha224_final(struct shash_desc *desc, u8 *out)
-{
-	struct sha256_state *sctx = shash_desc_ctx(desc);
-	__be32 *dst = (__be32 *)out;
-	int i;
-
-	sha2_final(desc);
-
-	for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
-		put_unaligned_be32(sctx->state[i], dst++);
-
-	*sctx = (struct sha256_state){};
-	return 0;
-}
+	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 
-static int sha256_final(struct shash_desc *desc, u8 *out)
-{
-	struct sha256_state *sctx = shash_desc_ctx(desc);
-	__be32 *dst = (__be32 *)out;
-	int i;
-
-	sha2_final(desc);
-
-	for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
-		put_unaligned_be32(sctx->state[i], dst++);
+	sctx->finalize = 0;
+	kernel_neon_begin_partial(28);
+	sha256_base_do_update(desc, data, len,
+			      (sha256_block_fn *)sha2_ce_transform);
+	kernel_neon_end();
 
-	*sctx = (struct sha256_state){};
 	return 0;
 }
 
-static void sha2_finup(struct shash_desc *desc, const u8 *data,
-		       unsigned int len)
+static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
+			   unsigned int len, u8 *out)
 {
-	struct sha256_state *sctx = shash_desc_ctx(desc);
-	int blocks;
+	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
+	bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE);
 
-	if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) {
-		sha2_update(desc, data, len);
-		sha2_final(desc);
-		return;
-	}
+	ASM_EXPORT(sha256_ce_offsetof_count,
+		   offsetof(struct sha256_ce_state, sst.count));
+	ASM_EXPORT(sha256_ce_offsetof_finalize,
+		   offsetof(struct sha256_ce_state, finalize));
 
 	/*
-	 * Use a fast path if the input is a multiple of 64 bytes. In
-	 * this case, there is no need to copy data around, and we can
-	 * perform the entire digest calculation in a single invocation
-	 * of sha2_ce_transform()
+	 * Allow the asm code to perform the finalization if there is no
+	 * partial data and the input is a round multiple of the block size.
 	 */
-	blocks = len / SHA256_BLOCK_SIZE;
+	sctx->finalize = finalize;
 
 	kernel_neon_begin_partial(28);
-	sha2_ce_transform(blocks, data, sctx->state, NULL, len);
+	sha256_base_do_update(desc, data, len,
+			      (sha256_block_fn *)sha2_ce_transform);
+	if (!finalize)
+		sha256_base_do_finalize(desc,
+					(sha256_block_fn *)sha2_ce_transform);
 	kernel_neon_end();
+	return sha256_base_finish(desc, out);
 }
 
-static int sha224_finup(struct shash_desc *desc, const u8 *data,
-			unsigned int len, u8 *out)
+static int sha256_ce_final(struct shash_desc *desc, u8 *out)
 {
-	struct sha256_state *sctx = shash_desc_ctx(desc);
-	__be32 *dst = (__be32 *)out;
-	int i;
-
-	sha2_finup(desc, data, len);
-
-	for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
-		put_unaligned_be32(sctx->state[i], dst++);
-
-	*sctx = (struct sha256_state){};
-	return 0;
-}
-
-static int sha256_finup(struct shash_desc *desc, const u8 *data,
-			unsigned int len, u8 *out)
-{
-	struct sha256_state *sctx = shash_desc_ctx(desc);
-	__be32 *dst = (__be32 *)out;
-	int i;
-
-	sha2_finup(desc, data, len);
-
-	for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
-		put_unaligned_be32(sctx->state[i], dst++);
-
-	*sctx = (struct sha256_state){};
-	return 0;
-}
-
-static int sha2_export(struct shash_desc *desc, void *out)
-{
-	struct sha256_state *sctx = shash_desc_ctx(desc);
-	struct sha256_state *dst = out;
-
-	*dst = *sctx;
-	return 0;
-}
-
-static int sha2_import(struct shash_desc *desc, const void *in)
-{
-	struct sha256_state *sctx = shash_desc_ctx(desc);
-	struct sha256_state const *src = in;
-
-	*sctx = *src;
-	return 0;
+	kernel_neon_begin_partial(28);
+	sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
+	kernel_neon_end();
+	return sha256_base_finish(desc, out);
 }
 
 static struct shash_alg algs[] = { {
-	.init			= sha224_init,
-	.update			= sha2_update,
-	.final			= sha224_final,
-	.finup			= sha224_finup,
-	.export			= sha2_export,
-	.import			= sha2_import,
-	.descsize		= sizeof(struct sha256_state),
+	.init			= sha224_base_init,
+	.update			= sha256_ce_update,
+	.final			= sha256_ce_final,
+	.finup			= sha256_ce_finup,
+	.descsize		= sizeof(struct sha256_ce_state),
 	.digestsize		= SHA224_DIGEST_SIZE,
-	.statesize		= sizeof(struct sha256_state),
 	.base			= {
 		.cra_name		= "sha224",
 		.cra_driver_name	= "sha224-ce",
@@ -221,15 +97,12 @@ static struct shash_alg algs[] = { {
 		.cra_module		= THIS_MODULE,
 	}
 }, {
-	.init			= sha256_init,
-	.update			= sha2_update,
-	.final			= sha256_final,
-	.finup			= sha256_finup,
-	.export			= sha2_export,
-	.import			= sha2_import,
-	.descsize		= sizeof(struct sha256_state),
+	.init			= sha256_base_init,
+	.update			= sha256_ce_update,
+	.final			= sha256_ce_final,
+	.finup			= sha256_ce_finup,
+	.descsize		= sizeof(struct sha256_ce_state),
 	.digestsize		= SHA256_DIGEST_SIZE,
-	.statesize		= sizeof(struct sha256_state),
 	.base			= {
 		.cra_name		= "sha256",
 		.cra_driver_name	= "sha256-ce",
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 750bac4e637e..144b64ad96c3 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -159,4 +159,52 @@ lr	.req	x30		// link register
 	orr	\rd, \lbits, \hbits, lsl #32
 	.endm
 
+/*
+ * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> where
+ * <symbol> is within the range +/- 4 GB of the PC.
+ */
+	/*
+	 * @dst: destination register (64 bit wide)
+	 * @sym: name of the symbol
+	 * @tmp: optional scratch register to be used if <dst> == sp, which
+	 *       is not allowed in an adrp instruction
+	 */
+	.macro	adr_l, dst, sym, tmp=
+	.ifb	\tmp
+	adrp	\dst, \sym
+	add	\dst, \dst, :lo12:\sym
+	.else
+	adrp	\tmp, \sym
+	add	\dst, \tmp, :lo12:\sym
+	.endif
+	.endm
+
+	/*
+	 * @dst: destination register (32 or 64 bit wide)
+	 * @sym: name of the symbol
+	 * @tmp: optional 64-bit scratch register to be used if <dst> is a
+	 *       32-bit wide register, in which case it cannot be used to hold
+	 *       the address
+	 */
+	.macro	ldr_l, dst, sym, tmp=
+	.ifb	\tmp
+	adrp	\dst, \sym
+	ldr	\dst, [\dst, :lo12:\sym]
+	.else
+	adrp	\tmp, \sym
+	ldr	\dst, [\tmp, :lo12:\sym]
+	.endif
+	.endm
+
+	/*
+	 * @src: source register (32 or 64 bit wide)
+	 * @sym: name of the symbol
+	 * @tmp: mandatory 64-bit scratch register to calculate the address
+	 *       while <src> needs to be preserved.
+	 */
+	.macro	str_l, src, sym, tmp
+	adrp	\tmp, \sym
+	str	\src, [\tmp, :lo12:\sym]
+	.endm
+
 #endif	/* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index cb9593079f29..d8c25b7b18fb 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -246,14 +246,30 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
 	__ret; \
 })
 
-#define this_cpu_cmpxchg_1(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
-#define this_cpu_cmpxchg_2(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
-#define this_cpu_cmpxchg_4(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
-#define this_cpu_cmpxchg_8(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
-
-#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \
-	cmpxchg_double_local(raw_cpu_ptr(&(ptr1)), raw_cpu_ptr(&(ptr2)), \
-				o1, o2, n1, n2)
+#define _protect_cmpxchg_local(pcp, o, n)			\
+({								\
+	typeof(*raw_cpu_ptr(&(pcp))) __ret;			\
+	preempt_disable();					\
+	__ret = cmpxchg_local(raw_cpu_ptr(&(pcp)), o, n);	\
+	preempt_enable();					\
+	__ret;							\
+})
+
+#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+#define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+#define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+
+#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2)		\
+({									\
+	int __ret;							\
+	preempt_disable();						\
+	__ret = cmpxchg_double_local(	raw_cpu_ptr(&(ptr1)),		\
+					raw_cpu_ptr(&(ptr2)),		\
+					o1, o2, n1, n2);		\
+	preempt_enable();						\
+	__ret;								\
+})
 
 #define cmpxchg64(ptr,o,n)		cmpxchg((ptr),(o),(n))
 #define cmpxchg64_local(ptr,o,n)	cmpxchg_local((ptr),(o),(n))
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index b6c16d5f622f..82cb9f98ba1a 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -23,11 +23,24 @@
 
 #define ARM64_WORKAROUND_CLEAN_CACHE		0
 #define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE	1
+#define ARM64_WORKAROUND_845719			2
 
-#define ARM64_NCAPS				2
+#define ARM64_NCAPS				3
 
 #ifndef __ASSEMBLY__
 
+struct arm64_cpu_capabilities {
+	const char *desc;
+	u16 capability;
+	bool (*matches)(const struct arm64_cpu_capabilities *);
+	union {
+		struct {	/* To be used for erratum handling only */
+			u32 midr_model;
+			u32 midr_range_min, midr_range_max;
+		};
+	};
+};
+
 extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 
 static inline bool cpu_have_feature(unsigned int num)
@@ -51,7 +64,10 @@ static inline void cpus_set_cap(unsigned int num)
 		__set_bit(num, cpu_hwcaps);
 }
 
+void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
+			    const char *info);
 void check_local_cpu_errata(void);
+void check_local_cpu_features(void);
 bool cpu_supports_mixed_endian_el0(void);
 bool system_supports_mixed_endian_el0(void);
 
diff --git a/arch/arm64/include/asm/cpuidle.h b/arch/arm64/include/asm/cpuidle.h
index c60643f14cda..141b2fcabaa6 100644
--- a/arch/arm64/include/asm/cpuidle.h
+++ b/arch/arm64/include/asm/cpuidle.h
@@ -4,10 +4,10 @@
 #include <asm/proc-fns.h>
 
 #ifdef CONFIG_CPU_IDLE
-extern int cpu_init_idle(unsigned int cpu);
+extern int arm_cpuidle_init(unsigned int cpu);
 extern int cpu_suspend(unsigned long arg);
 #else
-static inline int cpu_init_idle(unsigned int cpu)
+static inline int arm_cpuidle_init(unsigned int cpu)
 {
 	return -EOPNOTSUPP;
 }
@@ -17,5 +17,8 @@ static inline int cpu_suspend(unsigned long arg)
 	return -EOPNOTSUPP;
 }
 #endif
-
+static inline int arm_cpuidle_suspend(int index)
+{
+	return cpu_suspend(index);
+}
 #endif
diff --git a/arch/arm64/include/asm/cputable.h b/arch/arm64/include/asm/cputable.h
deleted file mode 100644
index e3bd983d3661..000000000000
--- a/arch/arm64/include/asm/cputable.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * arch/arm64/include/asm/cputable.h
- *
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_CPUTABLE_H
-#define __ASM_CPUTABLE_H
-
-struct cpu_info {
-	unsigned int	cpu_id_val;
-	unsigned int	cpu_id_mask;
-	const char	*cpu_name;
-	unsigned long	(*cpu_setup)(void);
-};
-
-extern struct cpu_info *lookup_processor_type(unsigned int);
-
-#endif
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index 6932bb57dba0..9437e3dc5833 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -97,7 +97,7 @@ static inline int dma_set_mask(struct device *dev, u64 mask)
 static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 {
 	if (!dev->dma_mask)
-		return 0;
+		return false;
 
 	return addr + size - 1 <= *dev->dma_mask;
 }
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 1f65be393139..faad6df49e5b 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -125,7 +125,6 @@ typedef struct user_fpsimd_state elf_fpregset_t;
  * the loader.  We need to make sure that it is out of the way of the program
  * that it will "exec", and that there is sufficient room for the brk.
  */
-extern unsigned long randomize_et_dyn(unsigned long base);
 #define ELF_ET_DYN_BASE	(2 * TASK_SIZE_64 / 3)
 
 /*
@@ -157,10 +156,6 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
 #define STACK_RND_MASK			(0x3ffff >> (PAGE_SHIFT - 12))
 #endif
 
-struct mm_struct;
-extern unsigned long arch_randomize_brk(struct mm_struct *mm);
-#define arch_randomize_brk arch_randomize_brk
-
 #ifdef CONFIG_COMPAT
 
 #ifdef __AARCH64EB__
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 92bbae381598..70522450ca23 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -90,6 +90,7 @@
 #define ESR_ELx_FSC		(0x3F)
 #define ESR_ELx_FSC_TYPE	(0x3C)
 #define ESR_ELx_FSC_EXTABT	(0x10)
+#define ESR_ELx_FSC_ACCESS	(0x08)
 #define ESR_ELx_FSC_FAULT	(0x04)
 #define ESR_ELx_FSC_PERM	(0x0C)
 #define ESR_ELx_CV		(UL(1) << 24)
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index defa0ff98250..926495686554 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -33,6 +33,7 @@
 enum fixed_addresses {
 	FIX_HOLE,
 	FIX_EARLYCON_MEM_BASE,
+	FIX_TEXT_POKE0,
 	__end_of_permanent_fixed_addresses,
 
 	/*
@@ -49,7 +50,6 @@ enum fixed_addresses {
 
 	FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
 	FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
-	FIX_TEXT_POKE0,
 	__end_of_fixed_addresses
 };
 
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index d2f49423c5dc..f81b328d9cf4 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -285,6 +285,7 @@ bool aarch64_insn_is_nop(u32 insn);
 int aarch64_insn_read(void *addr, u32 *insnp);
 int aarch64_insn_write(void *addr, u32 insn);
 enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
+u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn);
 u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
 				  u32 insn, u64 imm);
 u32 aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr,
diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h
index 076a1c714049..c0e5165c2f76 100644
--- a/arch/arm64/include/asm/jump_label.h
+++ b/arch/arm64/include/asm/jump_label.h
@@ -18,11 +18,12 @@
  */
 #ifndef __ASM_JUMP_LABEL_H
 #define __ASM_JUMP_LABEL_H
+
+#ifndef __ASSEMBLY__
+
 #include <linux/types.h>
 #include <asm/insn.h>
 
-#ifdef __KERNEL__
-
 #define JUMP_LABEL_NOP_SIZE		AARCH64_INSN_SIZE
 
 static __always_inline bool arch_static_branch(struct static_key *key)
@@ -39,8 +40,6 @@ l_yes:
 	return true;
 }
 
-#endif /* __KERNEL__ */
-
 typedef u64 jump_label_t;
 
 struct jump_entry {
@@ -49,4 +48,5 @@ struct jump_entry {
 	jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif	/* __ASM_JUMP_LABEL_H */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 94674eb7e7bb..ac6fafb95fe7 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -129,6 +129,9 @@
  * 40 bits wide (T0SZ = 24).  Systems with a PARange smaller than 40 bits are
  * not known to exist and will break with this configuration.
  *
+ * VTCR_EL2.PS is extracted from ID_AA64MMFR0_EL1.PARange at boot time
+ * (see hyp-init.S).
+ *
  * Note that when using 4K pages, we concatenate two first level page tables
  * together.
  *
@@ -138,7 +141,6 @@
 #ifdef CONFIG_ARM64_64K_PAGES
 /*
  * Stage2 translation configuration:
- * 40bits output (PS = 2)
  * 40bits input  (T0SZ = 24)
  * 64kB pages (TG0 = 1)
  * 2 level page tables (SL = 1)
@@ -150,7 +152,6 @@
 #else
 /*
  * Stage2 translation configuration:
- * 40bits output (PS = 2)
  * 40bits input  (T0SZ = 24)
  * 4kB pages (TG0 = 0)
  * 3 level page tables (SL = 1)
@@ -187,6 +188,7 @@
 
 /* For compatibility with fault code shared with 32-bit */
 #define FSC_FAULT	ESR_ELx_FSC_FAULT
+#define FSC_ACCESS	ESR_ELx_FSC_ACCESS
 #define FSC_PERM	ESR_ELx_FSC_PERM
 
 /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 8ac3c70fe3c6..f0f58c9beec0 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -28,6 +28,8 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
 
+#define __KVM_HAVE_ARCH_INTC_INITIALIZED
+
 #if defined(CONFIG_KVM_ARM_MAX_VCPUS)
 #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
 #else
@@ -177,19 +179,10 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm,
 			unsigned long start, unsigned long end);
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 
 /* We do not have shadow page tables, hence the empty hooks */
-static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
-			      unsigned long end)
-{
-	return 0;
-}
-
-static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
-{
-	return 0;
-}
-
 static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 							 unsigned long address)
 {
diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h
index 9f52beb7cb13..889c908ee631 100644
--- a/arch/arm64/include/asm/kvm_mmio.h
+++ b/arch/arm64/include/asm/kvm_mmio.h
@@ -31,28 +31,6 @@ struct kvm_decode {
 	bool sign_extend;
 };
 
-/*
- * The in-kernel MMIO emulation code wants to use a copy of run->mmio,
- * which is an anonymous type. Use our own type instead.
- */
-struct kvm_exit_mmio {
-	phys_addr_t	phys_addr;
-	u8		data[8];
-	u32		len;
-	bool		is_write;
-	void		*private;
-};
-
-static inline void kvm_prepare_mmio(struct kvm_run *run,
-				    struct kvm_exit_mmio *mmio)
-{
-	run->mmio.phys_addr	= mmio->phys_addr;
-	run->mmio.len		= mmio->len;
-	run->mmio.is_write	= mmio->is_write;
-	memcpy(run->mmio.data, mmio->data, mmio->len);
-	run->exit_reason	= KVM_EXIT_MMIO;
-}
-
 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		 phys_addr_t fault_ipa);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 6458b5373142..61505676d085 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -68,6 +68,8 @@
 #include <asm/pgalloc.h>
 #include <asm/cachetype.h>
 #include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
 
 #define KERN_TO_HYP(kva)	((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET)
 
@@ -158,56 +160,21 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
 #define PTRS_PER_S2_PGD		(1 << PTRS_PER_S2_PGD_SHIFT)
 #define S2_PGD_ORDER		get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
 
+#define kvm_pgd_index(addr)	(((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
+
 /*
  * If we are concatenating first level stage-2 page tables, we would have less
  * than or equal to 16 pointers in the fake PGD, because that's what the
- * architecture allows.  In this case, (4 - CONFIG_ARM64_PGTABLE_LEVELS)
+ * architecture allows.  In this case, (4 - CONFIG_PGTABLE_LEVELS)
  * represents the first level for the host, and we add 1 to go to the next
  * level (which uses contatenation) for the stage-2 tables.
  */
 #if PTRS_PER_S2_PGD <= 16
-#define KVM_PREALLOC_LEVEL	(4 - CONFIG_ARM64_PGTABLE_LEVELS + 1)
+#define KVM_PREALLOC_LEVEL	(4 - CONFIG_PGTABLE_LEVELS + 1)
 #else
 #define KVM_PREALLOC_LEVEL	(0)
 #endif
 
-/**
- * kvm_prealloc_hwpgd - allocate inital table for VTTBR
- * @kvm:	The KVM struct pointer for the VM.
- * @pgd:	The kernel pseudo pgd
- *
- * When the kernel uses more levels of page tables than the guest, we allocate
- * a fake PGD and pre-populate it to point to the next-level page table, which
- * will be the real initial page table pointed to by the VTTBR.
- *
- * When KVM_PREALLOC_LEVEL==2, we allocate a single page for the PMD and
- * the kernel will use folded pud.  When KVM_PREALLOC_LEVEL==1, we
- * allocate 2 consecutive PUD pages.
- */
-static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
-{
-	unsigned int i;
-	unsigned long hwpgd;
-
-	if (KVM_PREALLOC_LEVEL == 0)
-		return 0;
-
-	hwpgd = __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTRS_PER_S2_PGD_SHIFT);
-	if (!hwpgd)
-		return -ENOMEM;
-
-	for (i = 0; i < PTRS_PER_S2_PGD; i++) {
-		if (KVM_PREALLOC_LEVEL == 1)
-			pgd_populate(NULL, pgd + i,
-				     (pud_t *)hwpgd + i * PTRS_PER_PUD);
-		else if (KVM_PREALLOC_LEVEL == 2)
-			pud_populate(NULL, pud_offset(pgd, 0) + i,
-				     (pmd_t *)hwpgd + i * PTRS_PER_PMD);
-	}
-
-	return 0;
-}
-
 static inline void *kvm_get_hwpgd(struct kvm *kvm)
 {
 	pgd_t *pgd = kvm->arch.pgd;
@@ -224,12 +191,11 @@ static inline void *kvm_get_hwpgd(struct kvm *kvm)
 	return pmd_offset(pud, 0);
 }
 
-static inline void kvm_free_hwpgd(struct kvm *kvm)
+static inline unsigned int kvm_get_hwpgd_size(void)
 {
-	if (KVM_PREALLOC_LEVEL > 0) {
-		unsigned long hwpgd = (unsigned long)kvm_get_hwpgd(kvm);
-		free_pages(hwpgd, PTRS_PER_S2_PGD_SHIFT);
-	}
+	if (KVM_PREALLOC_LEVEL > 0)
+		return PTRS_PER_S2_PGD * PAGE_SIZE;
+	return PTRS_PER_S2_PGD * sizeof(pgd_t);
 }
 
 static inline bool kvm_page_empty(void *ptr)
@@ -305,5 +271,36 @@ static inline void __kvm_flush_dcache_pud(pud_t pud)
 void kvm_set_way_flush(struct kvm_vcpu *vcpu);
 void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
 
+static inline bool __kvm_cpu_uses_extended_idmap(void)
+{
+	return __cpu_uses_extended_idmap();
+}
+
+static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
+				       pgd_t *hyp_pgd,
+				       pgd_t *merged_hyp_pgd,
+				       unsigned long hyp_idmap_start)
+{
+	int idmap_idx;
+
+	/*
+	 * Use the first entry to access the HYP mappings. It is
+	 * guaranteed to be free, otherwise we wouldn't use an
+	 * extended idmap.
+	 */
+	VM_BUG_ON(pgd_val(merged_hyp_pgd[0]));
+	merged_hyp_pgd[0] = __pgd(__pa(hyp_pgd) | PMD_TYPE_TABLE);
+
+	/*
+	 * Create another extended level entry that points to the boot HYP map,
+	 * which contains an ID mapping of the HYP init code. We essentially
+	 * merge the boot and runtime HYP maps by doing so, but they don't
+	 * overlap anyway, so this is fine.
+	 */
+	idmap_idx = hyp_idmap_start >> VA_BITS;
+	VM_BUG_ON(pgd_val(merged_hyp_pgd[idmap_idx]));
+	merged_hyp_pgd[idmap_idx] = __pgd(__pa(boot_hyp_pgd) | PMD_TYPE_TABLE);
+}
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index a9eee33dfa62..8ec41e5f56f0 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -64,6 +64,49 @@ static inline void cpu_set_reserved_ttbr0(void)
 	: "r" (ttbr));
 }
 
+/*
+ * TCR.T0SZ value to use when the ID map is active. Usually equals
+ * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
+ * physical memory, in which case it will be smaller.
+ */
+extern u64 idmap_t0sz;
+
+static inline bool __cpu_uses_extended_idmap(void)
+{
+	return (!IS_ENABLED(CONFIG_ARM64_VA_BITS_48) &&
+		unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)));
+}
+
+static inline void __cpu_set_tcr_t0sz(u64 t0sz)
+{
+	unsigned long tcr;
+
+	if (__cpu_uses_extended_idmap())
+		asm volatile (
+		"	mrs	%0, tcr_el1	;"
+		"	bfi	%0, %1, %2, %3	;"
+		"	msr	tcr_el1, %0	;"
+		"	isb"
+		: "=&r" (tcr)
+		: "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
+}
+
+/*
+ * Set TCR.T0SZ to the value appropriate for activating the identity map.
+ */
+static inline void cpu_set_idmap_tcr_t0sz(void)
+{
+	__cpu_set_tcr_t0sz(idmap_t0sz);
+}
+
+/*
+ * Set TCR.T0SZ to its default value (based on VA_BITS)
+ */
+static inline void cpu_set_default_tcr_t0sz(void)
+{
+	__cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS));
+}
+
 static inline void switch_new_context(struct mm_struct *mm)
 {
 	unsigned long flags;
@@ -151,6 +194,15 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 {
 	unsigned int cpu = smp_processor_id();
 
+	/*
+	 * init_mm.pgd does not contain any user mappings and it is always
+	 * active for kernel addresses in TTBR1. Just set the reserved TTBR0.
+	 */
+	if (next == &init_mm) {
+		cpu_set_reserved_ttbr0();
+		return;
+	}
+
 	if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next)
 		check_and_switch_context(next, tsk);
 }
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 22b16232bd60..7d9c7e4a424b 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -33,16 +33,18 @@
  * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
  * map the kernel. With the 64K page configuration, swapper and idmap need to
  * map to pte level. The swapper also maps the FDT (see __create_page_tables
- * for more information).
+ * for more information). Note that the number of ID map translation levels
+ * could be increased on the fly if system RAM is out of reach for the default
+ * VA range, so 3 pages are reserved in all cases.
  */
 #ifdef CONFIG_ARM64_64K_PAGES
-#define SWAPPER_PGTABLE_LEVELS	(CONFIG_ARM64_PGTABLE_LEVELS)
+#define SWAPPER_PGTABLE_LEVELS	(CONFIG_PGTABLE_LEVELS)
 #else
-#define SWAPPER_PGTABLE_LEVELS	(CONFIG_ARM64_PGTABLE_LEVELS - 1)
+#define SWAPPER_PGTABLE_LEVELS	(CONFIG_PGTABLE_LEVELS - 1)
 #endif
 
 #define SWAPPER_DIR_SIZE	(SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
-#define IDMAP_DIR_SIZE		(SWAPPER_DIR_SIZE)
+#define IDMAP_DIR_SIZE		(3 * PAGE_SIZE)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 09da25bc596f..4fde8c1df97f 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -204,25 +204,47 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
 	return ret;
 }
 
+#define _percpu_read(pcp)						\
+({									\
+	typeof(pcp) __retval;						\
+	preempt_disable();						\
+	__retval = (typeof(pcp))__percpu_read(raw_cpu_ptr(&(pcp)), 	\
+					      sizeof(pcp));		\
+	preempt_enable();						\
+	__retval;							\
+})
+
+#define _percpu_write(pcp, val)						\
+do {									\
+	preempt_disable();						\
+	__percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), 	\
+				sizeof(pcp));				\
+	preempt_enable();						\
+} while(0)								\
+
+#define _pcp_protect(operation, pcp, val)			\
+({								\
+	typeof(pcp) __retval;					\
+	preempt_disable();					\
+	__retval = (typeof(pcp))operation(raw_cpu_ptr(&(pcp)),	\
+					  (val), sizeof(pcp));	\
+	preempt_enable();					\
+	__retval;						\
+})
+
 #define _percpu_add(pcp, val) \
-	__percpu_add(raw_cpu_ptr(&(pcp)), val, sizeof(pcp))
+	_pcp_protect(__percpu_add, pcp, val)
 
-#define _percpu_add_return(pcp, val) (typeof(pcp)) (_percpu_add(pcp, val))
+#define _percpu_add_return(pcp, val) _percpu_add(pcp, val)
 
 #define _percpu_and(pcp, val) \
-	__percpu_and(raw_cpu_ptr(&(pcp)), val, sizeof(pcp))
+	_pcp_protect(__percpu_and, pcp, val)
 
 #define _percpu_or(pcp, val) \
-	__percpu_or(raw_cpu_ptr(&(pcp)), val, sizeof(pcp))
-
-#define _percpu_read(pcp) (typeof(pcp))	\
-	(__percpu_read(raw_cpu_ptr(&(pcp)), sizeof(pcp)))
-
-#define _percpu_write(pcp, val) \
-	__percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp))
+	_pcp_protect(__percpu_or, pcp, val)
 
 #define _percpu_xchg(pcp, val) (typeof(pcp)) \
-	(__percpu_xchg(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp)))
+	_pcp_protect(__percpu_xchg, pcp, (unsigned long)(val))
 
 #define this_cpu_add_1(pcp, val) _percpu_add(pcp, val)
 #define this_cpu_add_2(pcp, val) _percpu_add(pcp, val)
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index e20df38a8ff3..76420568d66a 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -28,7 +28,7 @@
 
 #define PGALLOC_GFP	(GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
 
-#if CONFIG_ARM64_PGTABLE_LEVELS > 2
+#if CONFIG_PGTABLE_LEVELS > 2
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
@@ -46,9 +46,9 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 	set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE));
 }
 
-#endif	/* CONFIG_ARM64_PGTABLE_LEVELS > 2 */
+#endif	/* CONFIG_PGTABLE_LEVELS > 2 */
 
-#if CONFIG_ARM64_PGTABLE_LEVELS > 3
+#if CONFIG_PGTABLE_LEVELS > 3
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
@@ -66,7 +66,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
 	set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE));
 }
 
-#endif	/* CONFIG_ARM64_PGTABLE_LEVELS > 3 */
+#endif	/* CONFIG_PGTABLE_LEVELS > 3 */
 
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 5f930cc9ea83..59bfae75dc98 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -21,7 +21,7 @@
 /*
  * PMD_SHIFT determines the size a level 2 page table entry can map.
  */
-#if CONFIG_ARM64_PGTABLE_LEVELS > 2
+#if CONFIG_PGTABLE_LEVELS > 2
 #define PMD_SHIFT		((PAGE_SHIFT - 3) * 2 + 3)
 #define PMD_SIZE		(_AC(1, UL) << PMD_SHIFT)
 #define PMD_MASK		(~(PMD_SIZE-1))
@@ -31,7 +31,7 @@
 /*
  * PUD_SHIFT determines the size a level 1 page table entry can map.
  */
-#if CONFIG_ARM64_PGTABLE_LEVELS > 3
+#if CONFIG_PGTABLE_LEVELS > 3
 #define PUD_SHIFT		((PAGE_SHIFT - 3) * 3 + 3)
 #define PUD_SIZE		(_AC(1, UL) << PUD_SHIFT)
 #define PUD_MASK		(~(PUD_SIZE-1))
@@ -42,7 +42,7 @@
  * PGDIR_SHIFT determines the size a top-level page table entry can map
  * (depending on the configuration, this level can be 0, 1 or 2).
  */
-#define PGDIR_SHIFT		((PAGE_SHIFT - 3) * CONFIG_ARM64_PGTABLE_LEVELS + 3)
+#define PGDIR_SHIFT		((PAGE_SHIFT - 3) * CONFIG_PGTABLE_LEVELS + 3)
 #define PGDIR_SIZE		(_AC(1, UL) << PGDIR_SHIFT)
 #define PGDIR_MASK		(~(PGDIR_SIZE-1))
 #define PTRS_PER_PGD		(1 << (VA_BITS - PGDIR_SHIFT))
@@ -143,7 +143,12 @@
 /*
  * TCR flags.
  */
-#define TCR_TxSZ(x)		(((UL(64) - (x)) << 16) | ((UL(64) - (x)) << 0))
+#define TCR_T0SZ_OFFSET		0
+#define TCR_T1SZ_OFFSET		16
+#define TCR_T0SZ(x)		((UL(64) - (x)) << TCR_T0SZ_OFFSET)
+#define TCR_T1SZ(x)		((UL(64) - (x)) << TCR_T1SZ_OFFSET)
+#define TCR_TxSZ(x)		(TCR_T0SZ(x) | TCR_T1SZ(x))
+#define TCR_TxSZ_WIDTH		6
 #define TCR_IRGN_NC		((UL(0) << 8) | (UL(0) << 24))
 #define TCR_IRGN_WBWA		((UL(1) << 8) | (UL(1) << 24))
 #define TCR_IRGN_WT		((UL(2) << 8) | (UL(2) << 24))
diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h
index ca9df80af896..2b1bd7e52c3b 100644
--- a/arch/arm64/include/asm/pgtable-types.h
+++ b/arch/arm64/include/asm/pgtable-types.h
@@ -38,13 +38,13 @@ typedef struct { pteval_t pte; } pte_t;
 #define pte_val(x)	((x).pte)
 #define __pte(x)	((pte_t) { (x) } )
 
-#if CONFIG_ARM64_PGTABLE_LEVELS > 2
+#if CONFIG_PGTABLE_LEVELS > 2
 typedef struct { pmdval_t pmd; } pmd_t;
 #define pmd_val(x)	((x).pmd)
 #define __pmd(x)	((pmd_t) { (x) } )
 #endif
 
-#if CONFIG_ARM64_PGTABLE_LEVELS > 3
+#if CONFIG_PGTABLE_LEVELS > 3
 typedef struct { pudval_t pud; } pud_t;
 #define pud_val(x)	((x).pud)
 #define __pud(x)	((pud_t) { (x) } )
@@ -64,13 +64,13 @@ typedef pteval_t pte_t;
 #define pte_val(x)	(x)
 #define __pte(x)	(x)
 
-#if CONFIG_ARM64_PGTABLE_LEVELS > 2
+#if CONFIG_PGTABLE_LEVELS > 2
 typedef pmdval_t pmd_t;
 #define pmd_val(x)	(x)
 #define __pmd(x)	(x)
 #endif
 
-#if CONFIG_ARM64_PGTABLE_LEVELS > 3
+#if CONFIG_PGTABLE_LEVELS > 3
 typedef pudval_t pud_t;
 #define pud_val(x)	(x)
 #define __pud(x)	(x)
@@ -86,9 +86,9 @@ typedef pteval_t pgprot_t;
 
 #endif /* STRICT_MM_TYPECHECKS */
 
-#if CONFIG_ARM64_PGTABLE_LEVELS == 2
+#if CONFIG_PGTABLE_LEVELS == 2
 #include <asm-generic/pgtable-nopmd.h>
-#elif CONFIG_ARM64_PGTABLE_LEVELS == 3
+#elif CONFIG_PGTABLE_LEVELS == 3
 #include <asm-generic/pgtable-nopud.h>
 #endif
 
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 800ec0e87ed9..56283f8a675c 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -374,7 +374,7 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
  */
 #define mk_pte(page,prot)	pfn_pte(page_to_pfn(page),prot)
 
-#if CONFIG_ARM64_PGTABLE_LEVELS > 2
+#if CONFIG_PGTABLE_LEVELS > 2
 
 #define pmd_ERROR(pmd)		__pmd_error(__FILE__, __LINE__, pmd_val(pmd))
 
@@ -409,9 +409,9 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
 
 #define pud_page(pud)		pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK))
 
-#endif	/* CONFIG_ARM64_PGTABLE_LEVELS > 2 */
+#endif	/* CONFIG_PGTABLE_LEVELS > 2 */
 
-#if CONFIG_ARM64_PGTABLE_LEVELS > 3
+#if CONFIG_PGTABLE_LEVELS > 3
 
 #define pud_ERROR(pud)		__pud_error(__FILE__, __LINE__, pud_val(pud))
 
@@ -445,7 +445,7 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr)
 
 #define pgd_page(pgd)		pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK))
 
-#endif  /* CONFIG_ARM64_PGTABLE_LEVELS > 3 */
+#endif  /* CONFIG_PGTABLE_LEVELS > 3 */
 
 #define pgd_ERROR(pgd)		__pgd_error(__FILE__, __LINE__, pgd_val(pgd))
 
diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
index e6f087806aaf..b7710a59672c 100644
--- a/arch/arm64/include/asm/pmu.h
+++ b/arch/arm64/include/asm/pmu.h
@@ -44,6 +44,7 @@ struct pmu_hw_events {
 struct arm_pmu {
 	struct pmu		pmu;
 	cpumask_t		active_irqs;
+	int			*irq_affinity;
 	const char		*name;
 	irqreturn_t		(*handle_irq)(int irq_num, void *dev);
 	void			(*enable)(struct hw_perf_event *evt, int idx);
diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
index 9a8fd84f8fb2..220633b791b8 100644
--- a/arch/arm64/include/asm/proc-fns.h
+++ b/arch/arm64/include/asm/proc-fns.h
@@ -39,16 +39,11 @@ extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
 
 #include <asm/memory.h>
 
-#define cpu_switch_mm(pgd,mm) cpu_do_switch_mm(virt_to_phys(pgd),mm)
-
-#define cpu_get_pgd()					\
-({							\
-	unsigned long pg;				\
-	asm("mrs	%0, ttbr0_el1\n"		\
-	    : "=r" (pg));				\
-	pg &= ~0xffff000000003ffful;			\
-	(pgd_t *)phys_to_virt(pg);			\
-})
+#define cpu_switch_mm(pgd,mm)				\
+do {							\
+	BUG_ON(pgd == swapper_pg_dir);			\
+	cpu_do_switch_mm(virt_to_phys(pgd),mm);		\
+} while (0)
 
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 20e9591a60cf..d2c37a1df0eb 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -127,7 +127,11 @@ extern void release_thread(struct task_struct *);
 
 unsigned long get_wchan(struct task_struct *p);
 
-#define cpu_relax()			barrier()
+static inline void cpu_relax(void)
+{
+	asm volatile("yield" ::: "memory");
+}
+
 #define cpu_relax_lowlatency()                cpu_relax()
 
 /* Thread switching */
diff --git a/arch/arm64/include/asm/smp_plat.h b/arch/arm64/include/asm/smp_plat.h
index 59e282311b58..8dcd61e32176 100644
--- a/arch/arm64/include/asm/smp_plat.h
+++ b/arch/arm64/include/asm/smp_plat.h
@@ -40,4 +40,6 @@ static inline u32 mpidr_hash_size(void)
 extern u64 __cpu_logical_map[NR_CPUS];
 #define cpu_logical_map(cpu)    __cpu_logical_map[cpu]
 
+void __init do_post_cpus_up_work(void);
+
 #endif /* __ASM_SMP_PLAT_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 702e1e6a0d80..dcd06d18a42a 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -33,7 +33,6 @@
 #ifndef __ASSEMBLY__
 
 struct task_struct;
-struct exec_domain;
 
 #include <asm/types.h>
 
@@ -47,7 +46,6 @@ struct thread_info {
 	unsigned long		flags;		/* low level flags */
 	mm_segment_t		addr_limit;	/* address limit */
 	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
 	int			preempt_count;	/* 0 => preemptable, <0 => bug */
 	int			cpu;		/* cpu */
 };
@@ -55,7 +53,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)						\
 {									\
 	.task		= &tsk,						\
-	.exec_domain	= &default_exec_domain,				\
 	.flags		= 0,						\
 	.preempt_count	= INIT_PREEMPT_COUNT,				\
 	.addr_limit	= KERNEL_DS,					\
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index c028fe37456f..3a0242c7eb8d 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -48,22 +48,25 @@ static inline void tlb_flush(struct mmu_gather *tlb)
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
 				  unsigned long addr)
 {
+	__flush_tlb_pgtable(tlb->mm, addr);
 	pgtable_page_dtor(pte);
 	tlb_remove_entry(tlb, pte);
 }
 
-#if CONFIG_ARM64_PGTABLE_LEVELS > 2
+#if CONFIG_PGTABLE_LEVELS > 2
 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
 				  unsigned long addr)
 {
+	__flush_tlb_pgtable(tlb->mm, addr);
 	tlb_remove_entry(tlb, virt_to_page(pmdp));
 }
 #endif
 
-#if CONFIG_ARM64_PGTABLE_LEVELS > 3
+#if CONFIG_PGTABLE_LEVELS > 3
 static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
 				  unsigned long addr)
 {
+	__flush_tlb_pgtable(tlb->mm, addr);
 	tlb_remove_entry(tlb, virt_to_page(pudp));
 }
 #endif
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 4abe9b945f77..c3bb05b98616 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -144,6 +144,19 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
 }
 
 /*
+ * Used to invalidate the TLB (walk caches) corresponding to intermediate page
+ * table levels (pgd/pud/pmd).
+ */
+static inline void __flush_tlb_pgtable(struct mm_struct *mm,
+				       unsigned long uaddr)
+{
+	unsigned long addr = uaddr >> 12 | ((unsigned long)ASID(mm) << 48);
+
+	dsb(ishst);
+	asm("tlbi	vae1is, %0" : : "r" (addr));
+	dsb(ish);
+}
+/*
  * On AArch64, the cache coherency is handled via the set_pte_at() function.
  */
 static inline void update_mmu_cache(struct vm_area_struct *vma,
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 27224426e0bf..cef934a90f17 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -406,7 +406,7 @@ __SYSCALL(__NR_vfork, sys_vfork)
 #define __NR_ugetrlimit 191	/* SuS compliant getrlimit */
 __SYSCALL(__NR_ugetrlimit, compat_sys_getrlimit)		/* SuS compliant getrlimit */
 #define __NR_mmap2 192
-__SYSCALL(__NR_mmap2, sys_mmap_pgoff)
+__SYSCALL(__NR_mmap2, compat_sys_mmap2_wrapper)
 #define __NR_truncate64 193
 __SYSCALL(__NR_truncate64, compat_sys_truncate64_wrapper)
 #define __NR_ftruncate64 194
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 3ef77a466018..c154c0b7eb60 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -191,6 +191,9 @@ struct kvm_arch_memory_slot {
 /* Highest supported SPI, from VGIC_NR_IRQS */
 #define KVM_ARM_IRQ_GIC_MAX		127
 
+/* One single KVM irqchip, ie. the VGIC */
+#define KVM_NR_IRQCHIPS          1
+
 /* PSCI interface */
 #define KVM_PSCI_FN_BASE		0x95c1ba5e
 #define KVM_PSCI_FN(n)			(KVM_PSCI_FN_BASE + (n))
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 5ee07eee80c2..b12e15b80516 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -12,12 +12,12 @@ CFLAGS_REMOVE_insn.o = -pg
 CFLAGS_REMOVE_return_address.o = -pg
 
 # Object file lists.
-arm64-obj-y		:= cputable.o debug-monitors.o entry.o irq.o fpsimd.o	\
+arm64-obj-y		:= debug-monitors.o entry.o irq.o fpsimd.o		\
 			   entry-fpsimd.o process.o ptrace.o setup.o signal.o	\
 			   sys.o stacktrace.o time.o traps.o io.o vdso.o	\
 			   hyp-stub.o psci.o psci-call.o cpu_ops.o insn.o	\
 			   return_address.o cpuinfo.o cpu_errata.o		\
-			   alternative.o cacheinfo.o
+			   cpufeature.o alternative.o cacheinfo.o
 
 arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o 	\
 					   sys_compat.o entry32.o		\
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index ad7821d64a1d..21033bba9390 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -24,6 +24,7 @@
 #include <asm/cacheflush.h>
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>
+#include <asm/insn.h>
 #include <linux/stop_machine.h>
 
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
@@ -33,6 +34,48 @@ struct alt_region {
 	struct alt_instr *end;
 };
 
+/*
+ * Decode the imm field of a b/bl instruction, and return the byte
+ * offset as a signed value (so it can be used when computing a new
+ * branch target).
+ */
+static s32 get_branch_offset(u32 insn)
+{
+	s32 imm = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_26, insn);
+
+	/* sign-extend the immediate before turning it into a byte offset */
+	return (imm << 6) >> 4;
+}
+
+static u32 get_alt_insn(u8 *insnptr, u8 *altinsnptr)
+{
+	u32 insn;
+
+	aarch64_insn_read(altinsnptr, &insn);
+
+	/* Stop the world on instructions we don't support... */
+	BUG_ON(aarch64_insn_is_cbz(insn));
+	BUG_ON(aarch64_insn_is_cbnz(insn));
+	BUG_ON(aarch64_insn_is_bcond(insn));
+	/* ... and there is probably more. */
+
+	if (aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn)) {
+		enum aarch64_insn_branch_type type;
+		unsigned long target;
+
+		if (aarch64_insn_is_b(insn))
+			type = AARCH64_INSN_BRANCH_NOLINK;
+		else
+			type = AARCH64_INSN_BRANCH_LINK;
+
+		target = (unsigned long)altinsnptr + get_branch_offset(insn);
+		insn = aarch64_insn_gen_branch_imm((unsigned long)insnptr,
+						   target, type);
+	}
+
+	return insn;
+}
+
 static int __apply_alternatives(void *alt_region)
 {
 	struct alt_instr *alt;
@@ -40,16 +83,24 @@ static int __apply_alternatives(void *alt_region)
 	u8 *origptr, *replptr;
 
 	for (alt = region->begin; alt < region->end; alt++) {
+		u32 insn;
+		int i;
+
 		if (!cpus_have_cap(alt->cpufeature))
 			continue;
 
-		BUG_ON(alt->alt_len > alt->orig_len);
+		BUG_ON(alt->alt_len != alt->orig_len);
 
 		pr_info_once("patching kernel code\n");
 
 		origptr = (u8 *)&alt->orig_offset + alt->orig_offset;
 		replptr = (u8 *)&alt->alt_offset + alt->alt_offset;
-		memcpy(origptr, replptr, alt->alt_len);
+
+		for (i = 0; i < alt->alt_len; i += sizeof(insn)) {
+			insn = get_alt_insn(origptr + i, replptr + i);
+			aarch64_insn_write(origptr + i, insn);
+		}
+
 		flush_icache_range((uintptr_t)origptr,
 				   (uintptr_t)(origptr + alt->alt_len));
 	}
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index f7fa65d4c352..da675cc5dfae 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -24,7 +24,6 @@
 #include <linux/kvm_host.h>
 #include <asm/thread_info.h>
 #include <asm/memory.h>
-#include <asm/cputable.h>
 #include <asm/smp_plat.h>
 #include <asm/suspend.h>
 #include <asm/vdso_datapage.h>
@@ -38,7 +37,6 @@ int main(void)
   DEFINE(TI_PREEMPT,		offsetof(struct thread_info, preempt_count));
   DEFINE(TI_ADDR_LIMIT,		offsetof(struct thread_info, addr_limit));
   DEFINE(TI_TASK,		offsetof(struct thread_info, task));
-  DEFINE(TI_EXEC_DOMAIN,	offsetof(struct thread_info, exec_domain));
   DEFINE(TI_CPU,		offsetof(struct thread_info, cpu));
   BLANK();
   DEFINE(THREAD_CPU_CONTEXT,	offsetof(struct task_struct, thread.cpu_context));
@@ -71,9 +69,6 @@ int main(void)
   BLANK();
   DEFINE(PAGE_SZ,	       	PAGE_SIZE);
   BLANK();
-  DEFINE(CPU_INFO_SZ,		sizeof(struct cpu_info));
-  DEFINE(CPU_INFO_SETUP,	offsetof(struct cpu_info, cpu_setup));
-  BLANK();
   DEFINE(DMA_BIDIRECTIONAL,	DMA_BIDIRECTIONAL);
   DEFINE(DMA_TO_DEVICE,		DMA_TO_DEVICE);
   DEFINE(DMA_FROM_DEVICE,	DMA_FROM_DEVICE);
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index fa62637e63a8..6ffd91438560 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -16,8 +16,6 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#define pr_fmt(fmt) "alternatives: " fmt
-
 #include <linux/types.h>
 #include <asm/cpu.h>
 #include <asm/cputype.h>
@@ -26,27 +24,11 @@
 #define MIDR_CORTEX_A53 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
 #define MIDR_CORTEX_A57 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
 
-/*
- * Add a struct or another datatype to the union below if you need
- * different means to detect an affected CPU.
- */
-struct arm64_cpu_capabilities {
-	const char *desc;
-	u16 capability;
-	bool (*is_affected)(struct arm64_cpu_capabilities *);
-	union {
-		struct {
-			u32 midr_model;
-			u32 midr_range_min, midr_range_max;
-		};
-	};
-};
-
 #define CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
 			MIDR_ARCHITECTURE_MASK)
 
 static bool __maybe_unused
-is_affected_midr_range(struct arm64_cpu_capabilities *entry)
+is_affected_midr_range(const struct arm64_cpu_capabilities *entry)
 {
 	u32 midr = read_cpuid_id();
 
@@ -59,12 +41,12 @@ is_affected_midr_range(struct arm64_cpu_capabilities *entry)
 }
 
 #define MIDR_RANGE(model, min, max) \
-	.is_affected = is_affected_midr_range, \
+	.matches = is_affected_midr_range, \
 	.midr_model = model, \
 	.midr_range_min = min, \
 	.midr_range_max = max
 
-struct arm64_cpu_capabilities arm64_errata[] = {
+const struct arm64_cpu_capabilities arm64_errata[] = {
 #if	defined(CONFIG_ARM64_ERRATUM_826319) || \
 	defined(CONFIG_ARM64_ERRATUM_827319) || \
 	defined(CONFIG_ARM64_ERRATUM_824069)
@@ -88,7 +70,16 @@ struct arm64_cpu_capabilities arm64_errata[] = {
 	/* Cortex-A57 r0p0 - r1p2 */
 		.desc = "ARM erratum 832075",
 		.capability = ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE,
-		MIDR_RANGE(MIDR_CORTEX_A57, 0x00, 0x12),
+		MIDR_RANGE(MIDR_CORTEX_A57, 0x00,
+			   (1 << MIDR_VARIANT_SHIFT) | 2),
+	},
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_845719
+	{
+	/* Cortex-A53 r0p[01234] */
+		.desc = "ARM erratum 845719",
+		.capability = ARM64_WORKAROUND_845719,
+		MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x04),
 	},
 #endif
 	{
@@ -97,15 +88,5 @@ struct arm64_cpu_capabilities arm64_errata[] = {
 
 void check_local_cpu_errata(void)
 {
-	struct arm64_cpu_capabilities *cpus = arm64_errata;
-	int i;
-
-	for (i = 0; cpus[i].desc; i++) {
-		if (!cpus[i].is_affected(&cpus[i]))
-			continue;
-
-		if (!cpus_have_cap(cpus[i].capability))
-			pr_info("enabling workaround for %s\n", cpus[i].desc);
-		cpus_set_cap(cpus[i].capability);
-	}
+	check_cpu_capabilities(arm64_errata, "enabling workaround for");
 }
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
new file mode 100644
index 000000000000..3d9967e43d89
--- /dev/null
+++ b/arch/arm64/kernel/cpufeature.c
@@ -0,0 +1,47 @@
+/*
+ * Contains CPU feature definitions
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) "alternatives: " fmt
+
+#include <linux/types.h>
+#include <asm/cpu.h>
+#include <asm/cpufeature.h>
+
+static const struct arm64_cpu_capabilities arm64_features[] = {
+	{},
+};
+
+void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
+			    const char *info)
+{
+	int i;
+
+	for (i = 0; caps[i].desc; i++) {
+		if (!caps[i].matches(&caps[i]))
+			continue;
+
+		if (!cpus_have_cap(caps[i].capability))
+			pr_info("%s %s\n", info, caps[i].desc);
+		cpus_set_cap(caps[i].capability);
+	}
+}
+
+void check_local_cpu_features(void)
+{
+	check_cpu_capabilities(arm64_features, "detected feature");
+}
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index 5c0896647fd1..a78143a5c99f 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -15,7 +15,7 @@
 #include <asm/cpuidle.h>
 #include <asm/cpu_ops.h>
 
-int cpu_init_idle(unsigned int cpu)
+int arm_cpuidle_init(unsigned int cpu)
 {
 	int ret = -EOPNOTSUPP;
 	struct device_node *cpu_node = of_cpu_device_node_get(cpu);
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 929855691dae..75d5a867e7fb 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -236,6 +236,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 	cpuinfo_detect_icache_policy(info);
 
 	check_local_cpu_errata();
+	check_local_cpu_features();
 	update_cpu_features(info);
 }
 
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index b42c7b480e1e..ab21e0d58278 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -337,7 +337,11 @@ core_initcall(arm64_dmi_init);
 
 static void efi_set_pgd(struct mm_struct *mm)
 {
-	cpu_switch_mm(mm->pgd, mm);
+	if (mm == &init_mm)
+		cpu_set_reserved_ttbr0();
+	else
+		cpu_switch_mm(mm->pgd, mm);
+
 	flush_tlb_all();
 	if (icache_is_aivivt())
 		__flush_icache_all();
@@ -354,3 +358,12 @@ void efi_virtmap_unload(void)
 	efi_set_pgd(current->active_mm);
 	preempt_enable();
 }
+
+/*
+ * UpdateCapsule() depends on the system being shutdown via
+ * ResetSystem().
+ */
+bool efi_poweroff_required(void)
+{
+	return efi_enabled(EFI_RUNTIME_SERVICES);
+}
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index cf21bb3bf752..959fe8733560 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -21,8 +21,10 @@
 #include <linux/init.h>
 #include <linux/linkage.h>
 
+#include <asm/alternative-asm.h>
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
+#include <asm/cpufeature.h>
 #include <asm/errno.h>
 #include <asm/esr.h>
 #include <asm/thread_info.h>
@@ -120,6 +122,24 @@
 	ct_user_enter
 	ldr	x23, [sp, #S_SP]		// load return stack pointer
 	msr	sp_el0, x23
+
+#ifdef CONFIG_ARM64_ERRATUM_845719
+	alternative_insn						\
+	"nop",								\
+	"tbz x22, #4, 1f",						\
+	ARM64_WORKAROUND_845719
+#ifdef CONFIG_PID_IN_CONTEXTIDR
+	alternative_insn						\
+	"nop; nop",							\
+	"mrs x29, contextidr_el1; msr contextidr_el1, x29; 1:",		\
+	ARM64_WORKAROUND_845719
+#else
+	alternative_insn						\
+	"nop",								\
+	"msr contextidr_el1, xzr; 1:",					\
+	ARM64_WORKAROUND_845719
+#endif
+#endif
 	.endif
 	msr	elr_el1, x21			// set up the return data
 	msr	spsr_el1, x22
diff --git a/arch/arm64/kernel/entry32.S b/arch/arm64/kernel/entry32.S
index 9a8f6ae2530e..bd9bfaa9269b 100644
--- a/arch/arm64/kernel/entry32.S
+++ b/arch/arm64/kernel/entry32.S
@@ -19,9 +19,12 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/const.h>
 
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
+#include <asm/errno.h>
+#include <asm/page.h>
 
 /*
  * System call wrappers for the AArch32 compatibility layer.
@@ -54,6 +57,21 @@ ENTRY(compat_sys_fstatfs64_wrapper)
 ENDPROC(compat_sys_fstatfs64_wrapper)
 
 /*
+ * Note: off_4k (w5) is always in units of 4K. If we can't do the
+ * requested offset because it is not page-aligned, we return -EINVAL.
+ */
+ENTRY(compat_sys_mmap2_wrapper)
+#if PAGE_SHIFT > 12
+	tst	w5, #~PAGE_MASK >> 12
+	b.ne	1f
+	lsr	w5, w5, #PAGE_SHIFT - 12
+#endif
+	b	sys_mmap_pgoff
+1:	mov	x0, #-EINVAL
+	ret
+ENDPROC(compat_sys_mmap2_wrapper)
+
+/*
  * Wrappers for AArch32 syscalls that either take 64-bit parameters
  * in registers or that take 32-bit parameters which require sign
  * extension.
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 8ce88e08c030..19f915e8f6e0 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -36,7 +36,7 @@
 #include <asm/page.h>
 #include <asm/virt.h>
 
-#define KERNEL_RAM_VADDR	(PAGE_OFFSET + TEXT_OFFSET)
+#define __PHYS_OFFSET	(KERNEL_START - TEXT_OFFSET)
 
 #if (TEXT_OFFSET & 0xfff) != 0
 #error TEXT_OFFSET must be at least 4KB aligned
@@ -46,13 +46,6 @@
 #error TEXT_OFFSET must be less than 2MB
 #endif
 
-	.macro	pgtbl, ttb0, ttb1, virt_to_phys
-	ldr	\ttb1, =swapper_pg_dir
-	ldr	\ttb0, =idmap_pg_dir
-	add	\ttb1, \ttb1, \virt_to_phys
-	add	\ttb0, \ttb0, \virt_to_phys
-	.endm
-
 #ifdef CONFIG_ARM64_64K_PAGES
 #define BLOCK_SHIFT	PAGE_SHIFT
 #define BLOCK_SIZE	PAGE_SIZE
@@ -63,7 +56,7 @@
 #define TABLE_SHIFT	PUD_SHIFT
 #endif
 
-#define KERNEL_START	KERNEL_RAM_VADDR
+#define KERNEL_START	_text
 #define KERNEL_END	_end
 
 /*
@@ -240,40 +233,43 @@ section_table:
 #endif
 
 ENTRY(stext)
-	mov	x21, x0				// x21=FDT
+	bl	preserve_boot_args
 	bl	el2_setup			// Drop to EL1, w20=cpu_boot_mode
-	bl	__calc_phys_offset		// x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET
+	adrp	x24, __PHYS_OFFSET
 	bl	set_cpu_boot_mode_flag
-	mrs	x22, midr_el1			// x22=cpuid
-	mov	x0, x22
-	bl	lookup_processor_type
-	mov	x23, x0				// x23=current cpu_table
-	/*
-	 * __error_p may end up out of range for cbz if text areas are
-	 * aligned up to section sizes.
-	 */
-	cbnz	x23, 1f				// invalid processor (x23=0)?
-	b	__error_p
-1:
+
 	bl	__vet_fdt
 	bl	__create_page_tables		// x25=TTBR0, x26=TTBR1
 	/*
-	 * The following calls CPU specific code in a position independent
-	 * manner. See arch/arm64/mm/proc.S for details. x23 = base of
-	 * cpu_info structure selected by lookup_processor_type above.
+	 * The following calls CPU setup code, see arch/arm64/mm/proc.S for
+	 * details.
 	 * On return, the CPU will be ready for the MMU to be turned on and
 	 * the TCR will have been set.
 	 */
-	ldr	x27, __switch_data		// address to jump to after
+	ldr	x27, =__mmap_switched		// address to jump to after
 						// MMU has been enabled
-	adrp	lr, __enable_mmu		// return (PIC) address
-	add	lr, lr, #:lo12:__enable_mmu
-	ldr	x12, [x23, #CPU_INFO_SETUP]
-	add	x12, x12, x28			// __virt_to_phys
-	br	x12				// initialise processor
+	adr_l	lr, __enable_mmu		// return (PIC) address
+	b	__cpu_setup			// initialise processor
 ENDPROC(stext)
 
 /*
+ * Preserve the arguments passed by the bootloader in x0 .. x3
+ */
+preserve_boot_args:
+	mov	x21, x0				// x21=FDT
+
+	adr_l	x0, boot_args			// record the contents of
+	stp	x21, x1, [x0]			// x0 .. x3 at kernel entry
+	stp	x2, x3, [x0, #16]
+
+	dmb	sy				// needed before dc ivac with
+						// MMU off
+
+	add	x1, x0, #0x20			// 4 x 8 bytes
+	b	__inval_cache_range		// tail call
+ENDPROC(preserve_boot_args)
+
+/*
  * Determine validity of the x21 FDT pointer.
  * The dtb must be 8-byte aligned and live in the first 512M of memory.
  */
@@ -356,7 +352,8 @@ ENDPROC(__vet_fdt)
  *   - pgd entry for fixed mappings (TTBR1)
  */
 __create_page_tables:
-	pgtbl	x25, x26, x28			// idmap_pg_dir and swapper_pg_dir addresses
+	adrp	x25, idmap_pg_dir
+	adrp	x26, swapper_pg_dir
 	mov	x27, lr
 
 	/*
@@ -385,12 +382,50 @@ __create_page_tables:
 	 * Create the identity mapping.
 	 */
 	mov	x0, x25				// idmap_pg_dir
-	ldr	x3, =KERNEL_START
-	add	x3, x3, x28			// __pa(KERNEL_START)
+	adrp	x3, KERNEL_START		// __pa(KERNEL_START)
+
+#ifndef CONFIG_ARM64_VA_BITS_48
+#define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
+#define EXTRA_PTRS	(1 << (48 - EXTRA_SHIFT))
+
+	/*
+	 * If VA_BITS < 48, it may be too small to allow for an ID mapping to be
+	 * created that covers system RAM if that is located sufficiently high
+	 * in the physical address space. So for the ID map, use an extended
+	 * virtual range in that case, by configuring an additional translation
+	 * level.
+	 * First, we have to verify our assumption that the current value of
+	 * VA_BITS was chosen such that all translation levels are fully
+	 * utilised, and that lowering T0SZ will always result in an additional
+	 * translation level to be configured.
+	 */
+#if VA_BITS != EXTRA_SHIFT
+#error "Mismatch between VA_BITS and page size/number of translation levels"
+#endif
+
+	/*
+	 * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
+	 * entire kernel image can be ID mapped. As T0SZ == (64 - #bits used),
+	 * this number conveniently equals the number of leading zeroes in
+	 * the physical address of KERNEL_END.
+	 */
+	adrp	x5, KERNEL_END
+	clz	x5, x5
+	cmp	x5, TCR_T0SZ(VA_BITS)	// default T0SZ small enough?
+	b.ge	1f			// .. then skip additional level
+
+	adr_l	x6, idmap_t0sz
+	str	x5, [x6]
+	dmb	sy
+	dc	ivac, x6		// Invalidate potentially stale cache line
+
+	create_table_entry x0, x3, EXTRA_SHIFT, EXTRA_PTRS, x5, x6
+1:
+#endif
+
 	create_pgd_entry x0, x3, x5, x6
-	ldr	x6, =KERNEL_END
 	mov	x5, x3				// __pa(KERNEL_START)
-	add	x6, x6, x28			// __pa(KERNEL_END)
+	adr_l	x6, KERNEL_END			// __pa(KERNEL_END)
 	create_block_map x0, x7, x3, x5, x6
 
 	/*
@@ -399,7 +434,7 @@ __create_page_tables:
 	mov	x0, x26				// swapper_pg_dir
 	mov	x5, #PAGE_OFFSET
 	create_pgd_entry x0, x5, x3, x6
-	ldr	x6, =KERNEL_END
+	ldr	x6, =KERNEL_END			// __va(KERNEL_END)
 	mov	x3, x24				// phys offset
 	create_block_map x0, x7, x3, x5, x6
 
@@ -426,6 +461,7 @@ __create_page_tables:
 	 */
 	mov	x0, x25
 	add	x1, x26, #SWAPPER_DIR_SIZE
+	dmb	sy
 	bl	__inval_cache_range
 
 	mov	lr, x27
@@ -433,37 +469,22 @@ __create_page_tables:
 ENDPROC(__create_page_tables)
 	.ltorg
 
-	.align	3
-	.type	__switch_data, %object
-__switch_data:
-	.quad	__mmap_switched
-	.quad	__bss_start			// x6
-	.quad	__bss_stop			// x7
-	.quad	processor_id			// x4
-	.quad	__fdt_pointer			// x5
-	.quad	memstart_addr			// x6
-	.quad	init_thread_union + THREAD_START_SP // sp
-
 /*
- * The following fragment of code is executed with the MMU on in MMU mode, and
- * uses absolute addresses; this is not position independent.
+ * The following fragment of code is executed with the MMU enabled.
  */
+	.set	initial_sp, init_thread_union + THREAD_START_SP
 __mmap_switched:
-	adr	x3, __switch_data + 8
+	adr_l	x6, __bss_start
+	adr_l	x7, __bss_stop
 
-	ldp	x6, x7, [x3], #16
 1:	cmp	x6, x7
 	b.hs	2f
 	str	xzr, [x6], #8			// Clear BSS
 	b	1b
 2:
-	ldp	x4, x5, [x3], #16
-	ldr	x6, [x3], #8
-	ldr	x16, [x3]
-	mov	sp, x16
-	str	x22, [x4]			// Save processor ID
-	str	x21, [x5]			// Save FDT pointer
-	str	x24, [x6]			// Save PHYS_OFFSET
+	adr_l	sp, initial_sp, x4
+	str_l	x21, __fdt_pointer, x5		// Save FDT pointer
+	str_l	x24, memstart_addr, x6		// Save PHYS_OFFSET
 	mov	x29, #0
 	b	start_kernel
 ENDPROC(__mmap_switched)
@@ -566,8 +587,7 @@ ENDPROC(el2_setup)
  * in x20. See arch/arm64/include/asm/virt.h for more info.
  */
 ENTRY(set_cpu_boot_mode_flag)
-	ldr	x1, =__boot_cpu_mode		// Compute __boot_cpu_mode
-	add	x1, x1, x28
+	adr_l	x1, __boot_cpu_mode
 	cmp	w20, #BOOT_CPU_MODE_EL2
 	b.ne	1f
 	add	x1, x1, #4
@@ -585,32 +605,24 @@ ENDPROC(set_cpu_boot_mode_flag)
  * zeroing of .bss would clobber it.
  */
 	.pushsection	.data..cacheline_aligned
-ENTRY(__boot_cpu_mode)
 	.align	L1_CACHE_SHIFT
+ENTRY(__boot_cpu_mode)
 	.long	BOOT_CPU_MODE_EL2
-	.long	0
+	.long	BOOT_CPU_MODE_EL1
 	.popsection
 
 #ifdef CONFIG_SMP
-	.align	3
-1:	.quad	.
-	.quad	secondary_holding_pen_release
-
 	/*
 	 * This provides a "holding pen" for platforms to hold all secondary
 	 * cores are held until we're ready for them to initialise.
 	 */
 ENTRY(secondary_holding_pen)
 	bl	el2_setup			// Drop to EL1, w20=cpu_boot_mode
-	bl	__calc_phys_offset		// x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET
 	bl	set_cpu_boot_mode_flag
 	mrs	x0, mpidr_el1
 	ldr     x1, =MPIDR_HWID_BITMASK
 	and	x0, x0, x1
-	adr	x1, 1b
-	ldp	x2, x3, [x1]
-	sub	x1, x1, x2
-	add	x3, x3, x1
+	adr_l	x3, secondary_holding_pen_release
 pen:	ldr	x4, [x3]
 	cmp	x4, x0
 	b.eq	secondary_startup
@@ -624,7 +636,6 @@ ENDPROC(secondary_holding_pen)
 	 */
 ENTRY(secondary_entry)
 	bl	el2_setup			// Drop to EL1
-	bl	__calc_phys_offset		// x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET
 	bl	set_cpu_boot_mode_flag
 	b	secondary_startup
 ENDPROC(secondary_entry)
@@ -633,16 +644,9 @@ ENTRY(secondary_startup)
 	/*
 	 * Common entry point for secondary CPUs.
 	 */
-	mrs	x22, midr_el1			// x22=cpuid
-	mov	x0, x22
-	bl	lookup_processor_type
-	mov	x23, x0				// x23=current cpu_table
-	cbz	x23, __error_p			// invalid processor (x23=0)?
-
-	pgtbl	x25, x26, x28			// x25=TTBR0, x26=TTBR1
-	ldr	x12, [x23, #CPU_INFO_SETUP]
-	add	x12, x12, x28			// __virt_to_phys
-	blr	x12				// initialise processor
+	adrp	x25, idmap_pg_dir
+	adrp	x26, swapper_pg_dir
+	bl	__cpu_setup			// initialise processor
 
 	ldr	x21, =secondary_data
 	ldr	x27, =__secondary_switched	// address to jump to after enabling the MMU
@@ -658,11 +662,12 @@ ENDPROC(__secondary_switched)
 #endif	/* CONFIG_SMP */
 
 /*
- * Setup common bits before finally enabling the MMU. Essentially this is just
- * loading the page table pointer and vector base registers.
+ * Enable the MMU.
+ *
+ *  x0  = SCTLR_EL1 value for turning on the MMU.
+ *  x27 = *virtual* address to jump to upon completion
  *
- * On entry to this code, x0 must contain the SCTLR_EL1 value for turning on
- * the MMU.
+ * other registers depend on the function called upon completion
  */
 __enable_mmu:
 	ldr	x5, =vectors
@@ -670,89 +675,7 @@ __enable_mmu:
 	msr	ttbr0_el1, x25			// load TTBR0
 	msr	ttbr1_el1, x26			// load TTBR1
 	isb
-	b	__turn_mmu_on
-ENDPROC(__enable_mmu)
-
-/*
- * Enable the MMU. This completely changes the structure of the visible memory
- * space. You will not be able to trace execution through this.
- *
- *  x0  = system control register
- *  x27 = *virtual* address to jump to upon completion
- *
- * other registers depend on the function called upon completion
- *
- * We align the entire function to the smallest power of two larger than it to
- * ensure it fits within a single block map entry. Otherwise were PHYS_OFFSET
- * close to the end of a 512MB or 1GB block we might require an additional
- * table to map the entire function.
- */
-	.align	4
-__turn_mmu_on:
 	msr	sctlr_el1, x0
 	isb
 	br	x27
-ENDPROC(__turn_mmu_on)
-
-/*
- * Calculate the start of physical memory.
- */
-__calc_phys_offset:
-	adr	x0, 1f
-	ldp	x1, x2, [x0]
-	sub	x28, x0, x1			// x28 = PHYS_OFFSET - PAGE_OFFSET
-	add	x24, x2, x28			// x24 = PHYS_OFFSET
-	ret
-ENDPROC(__calc_phys_offset)
-
-	.align 3
-1:	.quad	.
-	.quad	PAGE_OFFSET
-
-/*
- * Exception handling. Something went wrong and we can't proceed. We ought to
- * tell the user, but since we don't have any guarantee that we're even
- * running on the right architecture, we do virtually nothing.
- */
-__error_p:
-ENDPROC(__error_p)
-
-__error:
-1:	nop
-	b	1b
-ENDPROC(__error)
-
-/*
- * This function gets the processor ID in w0 and searches the cpu_table[] for
- * a match. It returns a pointer to the struct cpu_info it found. The
- * cpu_table[] must end with an empty (all zeros) structure.
- *
- * This routine can be called via C code and it needs to work with the MMU
- * both disabled and enabled (the offset is calculated automatically).
- */
-ENTRY(lookup_processor_type)
-	adr	x1, __lookup_processor_type_data
-	ldp	x2, x3, [x1]
-	sub	x1, x1, x2			// get offset between VA and PA
-	add	x3, x3, x1			// convert VA to PA
-1:
-	ldp	w5, w6, [x3]			// load cpu_id_val and cpu_id_mask
-	cbz	w5, 2f				// end of list?
-	and	w6, w6, w0
-	cmp	w5, w6
-	b.eq	3f
-	add	x3, x3, #CPU_INFO_SZ
-	b	1b
-2:
-	mov	x3, #0				// unknown processor
-3:
-	mov	x0, x3
-	ret
-ENDPROC(lookup_processor_type)
-
-	.align	3
-	.type	__lookup_processor_type_data, %object
-__lookup_processor_type_data:
-	.quad	.
-	.quad	cpu_table
-	.size	__lookup_processor_type_data, . - __lookup_processor_type_data
+ENDPROC(__enable_mmu)
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 98bbe06e469c..e7d934d3afe0 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -527,7 +527,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 	 * Disallow per-task kernel breakpoints since these would
 	 * complicate the stepping code.
 	 */
-	if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.bp_target)
+	if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.target)
 		return -EINVAL;
 
 	return 0;
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index c8eca88f12e6..924902083e47 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -265,23 +265,13 @@ int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
 	return aarch64_insn_patch_text_sync(addrs, insns, cnt);
 }
 
-u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
-				  u32 insn, u64 imm)
+static int __kprobes aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type,
+						u32 *maskp, int *shiftp)
 {
-	u32 immlo, immhi, lomask, himask, mask;
+	u32 mask;
 	int shift;
 
 	switch (type) {
-	case AARCH64_INSN_IMM_ADR:
-		lomask = 0x3;
-		himask = 0x7ffff;
-		immlo = imm & lomask;
-		imm >>= 2;
-		immhi = imm & himask;
-		imm = (immlo << 24) | (immhi);
-		mask = (lomask << 24) | (himask);
-		shift = 5;
-		break;
 	case AARCH64_INSN_IMM_26:
 		mask = BIT(26) - 1;
 		shift = 0;
@@ -320,9 +310,68 @@ u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
 		shift = 16;
 		break;
 	default:
-		pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n",
-			type);
-		return 0;
+		return -EINVAL;
+	}
+
+	*maskp = mask;
+	*shiftp = shift;
+
+	return 0;
+}
+
+#define ADR_IMM_HILOSPLIT	2
+#define ADR_IMM_SIZE		SZ_2M
+#define ADR_IMM_LOMASK		((1 << ADR_IMM_HILOSPLIT) - 1)
+#define ADR_IMM_HIMASK		((ADR_IMM_SIZE >> ADR_IMM_HILOSPLIT) - 1)
+#define ADR_IMM_LOSHIFT		29
+#define ADR_IMM_HISHIFT		5
+
+u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn)
+{
+	u32 immlo, immhi, mask;
+	int shift;
+
+	switch (type) {
+	case AARCH64_INSN_IMM_ADR:
+		shift = 0;
+		immlo = (insn >> ADR_IMM_LOSHIFT) & ADR_IMM_LOMASK;
+		immhi = (insn >> ADR_IMM_HISHIFT) & ADR_IMM_HIMASK;
+		insn = (immhi << ADR_IMM_HILOSPLIT) | immlo;
+		mask = ADR_IMM_SIZE - 1;
+		break;
+	default:
+		if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) {
+			pr_err("aarch64_insn_decode_immediate: unknown immediate encoding %d\n",
+			       type);
+			return 0;
+		}
+	}
+
+	return (insn >> shift) & mask;
+}
+
+u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
+				  u32 insn, u64 imm)
+{
+	u32 immlo, immhi, mask;
+	int shift;
+
+	switch (type) {
+	case AARCH64_INSN_IMM_ADR:
+		shift = 0;
+		immlo = (imm & ADR_IMM_LOMASK) << ADR_IMM_LOSHIFT;
+		imm >>= ADR_IMM_HILOSPLIT;
+		immhi = (imm & ADR_IMM_HIMASK) << ADR_IMM_HISHIFT;
+		imm = immlo | immhi;
+		mask = ((ADR_IMM_LOMASK << ADR_IMM_LOSHIFT) |
+			(ADR_IMM_HIMASK << ADR_IMM_HISHIFT));
+		break;
+	default:
+		if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) {
+			pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n",
+			       type);
+			return 0;
+		}
 	}
 
 	/* Update the immediate field. */
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 25a5308744b1..195991dadc37 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -25,8 +25,10 @@
 #include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
+#include <linux/of.h>
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/uaccess.h>
 
@@ -322,22 +324,31 @@ out:
 }
 
 static int
-validate_event(struct pmu_hw_events *hw_events,
-	       struct perf_event *event)
+validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
+				struct perf_event *event)
 {
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct arm_pmu *armpmu;
 	struct hw_perf_event fake_event = event->hw;
 	struct pmu *leader_pmu = event->group_leader->pmu;
 
 	if (is_software_event(event))
 		return 1;
 
+	/*
+	 * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
+	 * core perf code won't check that the pmu->ctx == leader->ctx
+	 * until after pmu->event_init(event).
+	 */
+	if (event->pmu != pmu)
+		return 0;
+
 	if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF)
 		return 1;
 
 	if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
 		return 1;
 
+	armpmu = to_arm_pmu(event->pmu);
 	return armpmu->get_event_idx(hw_events, &fake_event) >= 0;
 }
 
@@ -355,15 +366,15 @@ validate_group(struct perf_event *event)
 	memset(fake_used_mask, 0, sizeof(fake_used_mask));
 	fake_pmu.used_mask = fake_used_mask;
 
-	if (!validate_event(&fake_pmu, leader))
+	if (!validate_event(event->pmu, &fake_pmu, leader))
 		return -EINVAL;
 
 	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
-		if (!validate_event(&fake_pmu, sibling))
+		if (!validate_event(event->pmu, &fake_pmu, sibling))
 			return -EINVAL;
 	}
 
-	if (!validate_event(&fake_pmu, event))
+	if (!validate_event(event->pmu, &fake_pmu, event))
 		return -EINVAL;
 
 	return 0;
@@ -396,7 +407,12 @@ armpmu_release_hardware(struct arm_pmu *armpmu)
 		free_percpu_irq(irq, &cpu_hw_events);
 	} else {
 		for (i = 0; i < irqs; ++i) {
-			if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs))
+			int cpu = i;
+
+			if (armpmu->irq_affinity)
+				cpu = armpmu->irq_affinity[i];
+
+			if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs))
 				continue;
 			irq = platform_get_irq(pmu_device, i);
 			if (irq > 0)
@@ -450,19 +466,24 @@ armpmu_reserve_hardware(struct arm_pmu *armpmu)
 		on_each_cpu(armpmu_enable_percpu_irq, &irq, 1);
 	} else {
 		for (i = 0; i < irqs; ++i) {
+			int cpu = i;
+
 			err = 0;
 			irq = platform_get_irq(pmu_device, i);
 			if (irq <= 0)
 				continue;
 
+			if (armpmu->irq_affinity)
+				cpu = armpmu->irq_affinity[i];
+
 			/*
 			 * If we have a single PMU interrupt that we can't shift,
 			 * assume that we're running on a uniprocessor machine and
 			 * continue. Otherwise, continue without this interrupt.
 			 */
-			if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
+			if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
 				pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
-						irq, i);
+						irq, cpu);
 				continue;
 			}
 
@@ -476,7 +497,7 @@ armpmu_reserve_hardware(struct arm_pmu *armpmu)
 				return err;
 			}
 
-			cpumask_set_cpu(i, &armpmu->active_irqs);
+			cpumask_set_cpu(cpu, &armpmu->active_irqs);
 		}
 	}
 
@@ -1289,9 +1310,46 @@ static const struct of_device_id armpmu_of_device_ids[] = {
 
 static int armpmu_device_probe(struct platform_device *pdev)
 {
+	int i, *irqs;
+
 	if (!cpu_pmu)
 		return -ENODEV;
 
+	irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
+	if (!irqs)
+		return -ENOMEM;
+
+	for (i = 0; i < pdev->num_resources; ++i) {
+		struct device_node *dn;
+		int cpu;
+
+		dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity",
+				      i);
+		if (!dn) {
+			pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
+				of_node_full_name(dn), i);
+			break;
+		}
+
+		for_each_possible_cpu(cpu)
+			if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL))
+				break;
+
+		of_node_put(dn);
+		if (cpu >= nr_cpu_ids) {
+			pr_warn("Failed to find logical CPU for %s\n",
+				dn->name);
+			break;
+		}
+
+		irqs[i] = cpu;
+	}
+
+	if (i == pdev->num_resources)
+		cpu_pmu->irq_affinity = irqs;
+	else
+		kfree(irqs);
+
 	cpu_pmu->plat_device = pdev;
 	return 0;
 }
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index fde9923af859..c6b1f3b96f45 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -21,6 +21,7 @@
 #include <stdarg.h>
 
 #include <linux/compat.h>
+#include <linux/efi.h>
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
@@ -150,6 +151,13 @@ void machine_restart(char *cmd)
 	local_irq_disable();
 	smp_send_stop();
 
+	/*
+	 * UpdateCapsule() depends on the system being reset via
+	 * ResetSystem().
+	 */
+	if (efi_enabled(EFI_RUNTIME_SERVICES))
+		efi_reboot(reboot_mode, NULL);
+
 	/* Now call the architecture specific reboot code. */
 	if (arm_pm_restart)
 		arm_pm_restart(reboot_mode, cmd);
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index e8420f635bd4..51ef97274b52 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -50,7 +50,6 @@
 #include <asm/cpu.h>
 #include <asm/cputype.h>
 #include <asm/elf.h>
-#include <asm/cputable.h>
 #include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
 #include <asm/sections.h>
@@ -62,9 +61,7 @@
 #include <asm/memblock.h>
 #include <asm/psci.h>
 #include <asm/efi.h>
-
-unsigned int processor_id;
-EXPORT_SYMBOL(processor_id);
+#include <asm/virt.h>
 
 unsigned long elf_hwcap __read_mostly;
 EXPORT_SYMBOL_GPL(elf_hwcap);
@@ -83,7 +80,6 @@ unsigned int compat_elf_hwcap2 __read_mostly;
 
 DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 
-static const char *cpu_name;
 phys_addr_t __fdt_pointer __initdata;
 
 /*
@@ -119,6 +115,11 @@ void __init early_print(const char *str, ...)
 	printk("%s", buf);
 }
 
+/*
+ * The recorded values of x0 .. x3 upon kernel entry.
+ */
+u64 __cacheline_aligned boot_args[4];
+
 void __init smp_setup_processor_id(void)
 {
 	u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
@@ -207,24 +208,38 @@ static void __init smp_build_mpidr_hash(void)
 }
 #endif
 
+static void __init hyp_mode_check(void)
+{
+	if (is_hyp_mode_available())
+		pr_info("CPU: All CPU(s) started at EL2\n");
+	else if (is_hyp_mode_mismatched())
+		WARN_TAINT(1, TAINT_CPU_OUT_OF_SPEC,
+			   "CPU: CPUs started in inconsistent modes");
+	else
+		pr_info("CPU: All CPU(s) started at EL1\n");
+}
+
+void __init do_post_cpus_up_work(void)
+{
+	hyp_mode_check();
+	apply_alternatives_all();
+}
+
+#ifdef CONFIG_UP_LATE_INIT
+void __init up_late_init(void)
+{
+	do_post_cpus_up_work();
+}
+#endif /* CONFIG_UP_LATE_INIT */
+
 static void __init setup_processor(void)
 {
-	struct cpu_info *cpu_info;
 	u64 features, block;
 	u32 cwg;
 	int cls;
 
-	cpu_info = lookup_processor_type(read_cpuid_id());
-	if (!cpu_info) {
-		printk("CPU configuration botched (ID %08x), unable to continue.\n",
-		       read_cpuid_id());
-		while (1);
-	}
-
-	cpu_name = cpu_info->cpu_name;
-
-	printk("CPU: %s [%08x] revision %d\n",
-	       cpu_name, read_cpuid_id(), read_cpuid_id() & 15);
+	printk("CPU: AArch64 Processor [%08x] revision %d\n",
+	       read_cpuid_id(), read_cpuid_id() & 15);
 
 	sprintf(init_utsname()->machine, ELF_PLATFORM);
 	elf_hwcap = 0;
@@ -402,6 +417,12 @@ void __init setup_arch(char **cmdline_p)
 	conswitchp = &dummy_con;
 #endif
 #endif
+	if (boot_args[1] || boot_args[2] || boot_args[3]) {
+		pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n"
+			"\tx1: %016llx\n\tx2: %016llx\n\tx3: %016llx\n"
+			"This indicates a broken bootloader or old kernel\n",
+			boot_args[1], boot_args[2], boot_args[3]);
+	}
 }
 
 static int __init arm64_device_init(void)
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 660ccf9f7524..e18c48cb6db1 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -287,19 +287,12 @@ static void setup_restart_syscall(struct pt_regs *regs)
  */
 static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 {
-	struct thread_info *thread = current_thread_info();
 	struct task_struct *tsk = current;
 	sigset_t *oldset = sigmask_to_save();
 	int usig = ksig->sig;
 	int ret;
 
 	/*
-	 * translate the signal
-	 */
-	if (usig < 32 && thread->exec_domain && thread->exec_domain->signal_invmap)
-		usig = thread->exec_domain->signal_invmap[usig];
-
-	/*
 	 * Set up the stack frame
 	 */
 	if (is_compat_task()) {
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 328b8ce4b007..714411f62391 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -151,6 +151,7 @@ asmlinkage void secondary_start_kernel(void)
 	 */
 	cpu_set_reserved_ttbr0();
 	flush_tlb_all();
+	cpu_set_default_tcr_t0sz();
 
 	preempt_disable();
 	trace_hardirqs_off();
@@ -309,7 +310,7 @@ void cpu_die(void)
 void __init smp_cpus_done(unsigned int max_cpus)
 {
 	pr_info("SMP: Total of %d processors activated.\n", num_online_cpus());
-	apply_alternatives_all();
+	do_post_cpus_up_work();
 }
 
 void __init smp_prepare_boot_cpu(void)
@@ -635,7 +636,7 @@ void smp_send_stop(void)
 		cpumask_t mask;
 
 		cpumask_copy(&mask, cpu_online_mask);
-		cpu_clear(smp_processor_id(), mask);
+		cpumask_clear_cpu(smp_processor_id(), &mask);
 
 		smp_cross_call(&mask, IPI_CPU_STOP);
 	}
diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c
index 2d5ab3c90b82..a40b1343b819 100644
--- a/arch/arm64/kernel/sys32.c
+++ b/arch/arm64/kernel/sys32.c
@@ -37,6 +37,7 @@ asmlinkage long compat_sys_readahead_wrapper(void);
 asmlinkage long compat_sys_fadvise64_64_wrapper(void);
 asmlinkage long compat_sys_sync_file_range2_wrapper(void);
 asmlinkage long compat_sys_fallocate_wrapper(void);
+asmlinkage long compat_sys_mmap2_wrapper(void);
 
 #undef __SYSCALL
 #define __SYSCALL(nr, sym)	[nr] = sym,
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 32aeea083d93..ec37ab3f524f 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -200,7 +200,7 @@ up_fail:
 void update_vsyscall(struct timekeeper *tk)
 {
 	struct timespec xtime_coarse;
-	u32 use_syscall = strcmp(tk->tkr.clock->name, "arch_sys_counter");
+	u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter");
 
 	++vdso_data->tb_seq_count;
 	smp_wmb();
@@ -213,11 +213,11 @@ void update_vsyscall(struct timekeeper *tk)
 	vdso_data->wtm_clock_nsec		= tk->wall_to_monotonic.tv_nsec;
 
 	if (!use_syscall) {
-		vdso_data->cs_cycle_last	= tk->tkr.cycle_last;
+		vdso_data->cs_cycle_last	= tk->tkr_mono.cycle_last;
 		vdso_data->xtime_clock_sec	= tk->xtime_sec;
-		vdso_data->xtime_clock_nsec	= tk->tkr.xtime_nsec;
-		vdso_data->cs_mult		= tk->tkr.mult;
-		vdso_data->cs_shift		= tk->tkr.shift;
+		vdso_data->xtime_clock_nsec	= tk->tkr_mono.xtime_nsec;
+		vdso_data->cs_mult		= tk->tkr_mono.mult;
+		vdso_data->cs_shift		= tk->tkr_mono.shift;
 	}
 
 	smp_wmb();
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 5d9d2dca530d..a2c29865c3fe 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -23,10 +23,14 @@ jiffies = jiffies_64;
 
 #define HYPERVISOR_TEXT					\
 	/*						\
-	 * Force the alignment to be compatible with	\
-	 * the vectors requirements			\
+	 * Align to 4 KB so that			\
+	 * a) the HYP vector table is at its minimum	\
+	 *    alignment of 2048 bytes			\
+	 * b) the HYP init code will not cross a page	\
+	 *    boundary if its size does not exceed	\
+	 *    4 KB (see related ASSERT() below)		\
 	 */						\
-	. = ALIGN(2048);				\
+	. = ALIGN(SZ_4K);				\
 	VMLINUX_SYMBOL(__hyp_idmap_text_start) = .;	\
 	*(.hyp.idmap.text)				\
 	VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;	\
@@ -163,10 +167,11 @@ SECTIONS
 }
 
 /*
- * The HYP init code can't be more than a page long.
+ * The HYP init code can't be more than a page long,
+ * and should not cross a page boundary.
  */
-ASSERT(((__hyp_idmap_text_start + PAGE_SIZE) > __hyp_idmap_text_end),
-       "HYP init code too big")
+ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
+	"HYP init code too big or misaligned")
 
 /*
  * If padding is applied before .head.text, virt<->phys conversions will fail.
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index f5590c81d95f..5105e297ed5f 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -18,6 +18,7 @@ if VIRTUALIZATION
 
 config KVM
 	bool "Kernel-based Virtual Machine (KVM) support"
+	depends on OF
 	select MMU_NOTIFIER
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
@@ -25,10 +26,10 @@ config KVM
 	select HAVE_KVM_ARCH_TLB_FLUSH_ALL
 	select KVM_MMIO
 	select KVM_ARM_HOST
-	select KVM_ARM_VGIC
-	select KVM_ARM_TIMER
 	select KVM_GENERIC_DIRTYLOG_READ_PROTECT
 	select SRCU
+	select HAVE_KVM_EVENTFD
+	select HAVE_KVM_IRQFD
 	---help---
 	  Support hosting virtualized guest machines.
 
@@ -50,17 +51,4 @@ config KVM_ARM_MAX_VCPUS
 	  large, so only choose a reasonable number that you expect to
 	  actually use.
 
-config KVM_ARM_VGIC
-	bool
-	depends on KVM_ARM_HOST && OF
-	select HAVE_KVM_IRQCHIP
-	---help---
-	  Adds support for a hardware assisted, in-kernel GIC emulation.
-
-config KVM_ARM_TIMER
-	bool
-	depends on KVM_ARM_VGIC
-	---help---
-	  Adds support for the Architected Timers in virtual machines.
-
 endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 4e6e09ee4033..d5904f876cdb 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -2,7 +2,7 @@
 # Makefile for Kernel-based Virtual Machine module
 #
 
-ccflags-y += -Ivirt/kvm -Iarch/arm64/kvm
+ccflags-y += -Iarch/arm64/kvm
 CFLAGS_arm.o := -I.
 CFLAGS_mmu.o := -I.
 
@@ -11,7 +11,7 @@ ARM=../../../arch/arm/kvm
 
 obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
 
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
 
@@ -19,11 +19,11 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o
 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
 kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
 
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3-emul.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o
-kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o
+kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v2-switch.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
+kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v3-switch.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index c3191168a994..178ba2248a98 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -20,6 +20,7 @@
 #include <asm/assembler.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
+#include <asm/pgtable-hwdef.h>
 
 	.text
 	.pushsection	.hyp.idmap.text, "ax"
@@ -65,6 +66,25 @@ __do_hyp_init:
 	and	x4, x4, x5
 	ldr	x5, =TCR_EL2_FLAGS
 	orr	x4, x4, x5
+
+#ifndef CONFIG_ARM64_VA_BITS_48
+	/*
+	 * If we are running with VA_BITS < 48, we may be running with an extra
+	 * level of translation in the ID map. This is only the case if system
+	 * RAM is out of range for the currently configured page size and number
+	 * of translation levels, in which case we will also need the extra
+	 * level for the HYP ID map, or we won't be able to enable the EL2 MMU.
+	 *
+	 * However, at EL2, there is only one TTBR register, and we can't switch
+	 * between translation tables *and* update TCR_EL2.T0SZ at the same
+	 * time. Bottom line: we need the extra level in *both* our translation
+	 * tables.
+	 *
+	 * So use the same T0SZ value we use for the ID map.
+	 */
+	ldr_l	x5, idmap_t0sz
+	bfi	x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
+#endif
 	msr	tcr_el2, x4
 
 	ldr	x4, =VTCR_EL2_FLAGS
@@ -91,6 +111,10 @@ __do_hyp_init:
 	msr	sctlr_el2, x4
 	isb
 
+	/* Skip the trampoline dance if we merged the boot and runtime PGDs */
+	cmp	x0, x1
+	b.eq	merged
+
 	/* MMU is now enabled. Get ready for the trampoline dance */
 	ldr	x4, =TRAMPOLINE_VA
 	adr	x5, target
@@ -105,6 +129,7 @@ target: /* We're now in the trampoline code, switch page tables */
 	tlbi	alle2
 	dsb	sy
 
+merged:
 	/* Set the stack and new vectors */
 	kern_hyp_va	x2
 	mov	sp, x2
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 58e0c2bdde04..ef7d112f5ce0 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -51,7 +51,7 @@ static int __init early_coherent_pool(char *p)
 }
 early_param("coherent_pool", early_coherent_pool);
 
-static void *__alloc_from_pool(size_t size, struct page **ret_page)
+static void *__alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
 {
 	unsigned long val;
 	void *ptr = NULL;
@@ -67,6 +67,8 @@ static void *__alloc_from_pool(size_t size, struct page **ret_page)
 
 		*ret_page = phys_to_page(phys);
 		ptr = (void *)val;
+		if (flags & __GFP_ZERO)
+			memset(ptr, 0, size);
 	}
 
 	return ptr;
@@ -101,6 +103,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
 		flags |= GFP_DMA;
 	if (IS_ENABLED(CONFIG_DMA_CMA) && (flags & __GFP_WAIT)) {
 		struct page *page;
+		void *addr;
 
 		size = PAGE_ALIGN(size);
 		page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
@@ -109,7 +112,10 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
 			return NULL;
 
 		*dma_handle = phys_to_dma(dev, page_to_phys(page));
-		return page_address(page);
+		addr = page_address(page);
+		if (flags & __GFP_ZERO)
+			memset(addr, 0, size);
+		return addr;
 	} else {
 		return swiotlb_alloc_coherent(dev, size, dma_handle, flags);
 	}
@@ -146,7 +152,7 @@ static void *__dma_alloc(struct device *dev, size_t size,
 
 	if (!coherent && !(flags & __GFP_WAIT)) {
 		struct page *page = NULL;
-		void *addr = __alloc_from_pool(size, &page);
+		void *addr = __alloc_from_pool(size, &page, flags);
 
 		if (addr)
 			*dma_handle = phys_to_dma(dev, page_to_phys(page));
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index ae85da6307bb..597831bdddf3 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -190,6 +190,8 @@ void __init bootmem_init(void)
 	min = PFN_UP(memblock_start_of_DRAM());
 	max = PFN_DOWN(memblock_end_of_DRAM());
 
+	early_memtest(min << PAGE_SHIFT, max << PAGE_SHIFT);
+
 	/*
 	 * Sparsemem tries to allocate bootmem in memory_present(), so must be
 	 * done after the fixed reservations.
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 54922d1275b8..ed177475dd8c 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -47,17 +47,16 @@ static int mmap_is_legacy(void)
 	return sysctl_legacy_va_layout;
 }
 
-static unsigned long mmap_rnd(void)
+unsigned long arch_mmap_rnd(void)
 {
-	unsigned long rnd = 0;
+	unsigned long rnd;
 
-	if (current->flags & PF_RANDOMIZE)
-		rnd = (long)get_random_int() & STACK_RND_MASK;
+	rnd = (unsigned long)get_random_int() & STACK_RND_MASK;
 
 	return rnd << PAGE_SHIFT;
 }
 
-static unsigned long mmap_base(void)
+static unsigned long mmap_base(unsigned long rnd)
 {
 	unsigned long gap = rlimit(RLIMIT_STACK);
 
@@ -66,7 +65,7 @@ static unsigned long mmap_base(void)
 	else if (gap > MAX_GAP)
 		gap = MAX_GAP;
 
-	return PAGE_ALIGN(STACK_TOP - gap - mmap_rnd());
+	return PAGE_ALIGN(STACK_TOP - gap - rnd);
 }
 
 /*
@@ -75,15 +74,20 @@ static unsigned long mmap_base(void)
  */
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
+	unsigned long random_factor = 0UL;
+
+	if (current->flags & PF_RANDOMIZE)
+		random_factor = arch_mmap_rnd();
+
 	/*
 	 * Fall back to the standard layout if the personality bit is set, or
 	 * if the expected stack growth is unlimited:
 	 */
 	if (mmap_is_legacy()) {
-		mm->mmap_base = TASK_UNMAPPED_BASE;
+		mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
 		mm->get_unmapped_area = arch_get_unmapped_area;
 	} else {
-		mm->mmap_base = mmap_base();
+		mm->mmap_base = mmap_base(random_factor);
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 	}
 }
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index c6daaf6c6f97..5b8b664422d3 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -40,6 +40,8 @@
 
 #include "mm.h"
 
+u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
+
 /*
  * Empty_zero_page is a special page that is used for zero-initialized data
  * and COW.
@@ -454,6 +456,7 @@ void __init paging_init(void)
 	 */
 	cpu_set_reserved_ttbr0();
 	flush_tlb_all();
+	cpu_set_default_tcr_t0sz();
 }
 
 /*
@@ -461,8 +464,10 @@ void __init paging_init(void)
  */
 void setup_mm_for_reboot(void)
 {
-	cpu_switch_mm(idmap_pg_dir, &init_mm);
+	cpu_set_reserved_ttbr0();
 	flush_tlb_all();
+	cpu_set_idmap_tcr_t0sz();
+	cpu_switch_mm(idmap_pg_dir, &init_mm);
 }
 
 /*
@@ -550,10 +555,10 @@ void vmemmap_free(unsigned long start, unsigned long end)
 #endif	/* CONFIG_SPARSEMEM_VMEMMAP */
 
 static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
-#if CONFIG_ARM64_PGTABLE_LEVELS > 2
+#if CONFIG_PGTABLE_LEVELS > 2
 static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
 #endif
-#if CONFIG_ARM64_PGTABLE_LEVELS > 3
+#if CONFIG_PGTABLE_LEVELS > 3
 static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
 #endif
 
@@ -627,10 +632,7 @@ void __set_fixmap(enum fixed_addresses idx,
 	unsigned long addr = __fix_to_virt(idx);
 	pte_t *pte;
 
-	if (idx >= __end_of_fixed_addresses) {
-		BUG();
-		return;
-	}
+	BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
 
 	pte = fixmap_pte(addr);
 
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 1d3ec3ddd84b..e47ed1c5dce1 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -73,7 +73,6 @@ int set_memory_ro(unsigned long addr, int numpages)
 					__pgprot(PTE_RDONLY),
 					__pgprot(PTE_WRITE));
 }
-EXPORT_SYMBOL_GPL(set_memory_ro);
 
 int set_memory_rw(unsigned long addr, int numpages)
 {
@@ -81,7 +80,6 @@ int set_memory_rw(unsigned long addr, int numpages)
 					__pgprot(PTE_WRITE),
 					__pgprot(PTE_RDONLY));
 }
-EXPORT_SYMBOL_GPL(set_memory_rw);
 
 int set_memory_nx(unsigned long addr, int numpages)
 {
diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
index 005d29e2977d..4c4d93c4bf65 100644
--- a/arch/arm64/mm/proc-macros.S
+++ b/arch/arm64/mm/proc-macros.S
@@ -52,3 +52,13 @@
 	mov	\reg, #4			// bytes per word
 	lsl	\reg, \reg, \tmp		// actual cache line size
 	.endm
+
+/*
+ * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
+ */
+	.macro	tcr_set_idmap_t0sz, valreg, tmpreg
+#ifndef CONFIG_ARM64_VA_BITS_48
+	ldr_l	\tmpreg, idmap_t0sz
+	bfi	\valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
+#endif
+	.endm
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 28eebfb6af76..cdd754e19b9b 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -156,6 +156,7 @@ ENTRY(cpu_do_resume)
 	msr	cpacr_el1, x6
 	msr	ttbr0_el1, x1
 	msr	ttbr1_el1, x7
+	tcr_set_idmap_t0sz x8, x7
 	msr	tcr_el1, x8
 	msr	vbar_el1, x9
 	msr	mdscr_el1, x10
@@ -233,6 +234,8 @@ ENTRY(__cpu_setup)
 	 */
 	ldr	x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
 			TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0
+	tcr_set_idmap_t0sz	x10, x9
+
 	/*
 	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in
 	 * TCR_EL1.
diff --git a/arch/avr32/include/asm/elf.h b/arch/avr32/include/asm/elf.h
index d232888b99d5..0388ece75b02 100644
--- a/arch/avr32/include/asm/elf.h
+++ b/arch/avr32/include/asm/elf.h
@@ -84,7 +84,7 @@ typedef struct user_fpu_struct elf_fpregset_t;
    the loader.  We need to make sure that it is out of the way of the program
    that it will "exec", and that there is sufficient room for the brk.  */
 
-#define ELF_ET_DYN_BASE         (2 * TASK_SIZE / 3)
+#define ELF_ET_DYN_BASE         (TASK_SIZE / 3 * 2)
 
 
 /* This yields a mask that user programs can use to figure out what
diff --git a/arch/avr32/include/asm/thread_info.h b/arch/avr32/include/asm/thread_info.h
index d56afa99a514..d4d3079541ea 100644
--- a/arch/avr32/include/asm/thread_info.h
+++ b/arch/avr32/include/asm/thread_info.h
@@ -17,11 +17,9 @@
 #include <asm/types.h>
 
 struct task_struct;
-struct exec_domain;
 
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
 	unsigned long		flags;		/* low level flags */
 	__u32			cpu;
 	__s32			preempt_count;	/* 0 => preemptable, <0 => BUG */
@@ -36,7 +34,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)						\
 {									\
 	.task		= &tsk,						\
-	.exec_domain	= &default_exec_domain,				\
 	.flags		= 0,						\
 	.cpu		= 0,						\
 	.preempt_count	= INIT_PREEMPT_COUNT,				\
diff --git a/arch/avr32/kernel/asm-offsets.c b/arch/avr32/kernel/asm-offsets.c
index e41c84516e5d..2c9764fe3532 100644
--- a/arch/avr32/kernel/asm-offsets.c
+++ b/arch/avr32/kernel/asm-offsets.c
@@ -12,7 +12,6 @@
 void foo(void)
 {
 	OFFSET(TI_task, thread_info, task);
-	OFFSET(TI_exec_domain, thread_info, exec_domain);
 	OFFSET(TI_flags, thread_info, flags);
 	OFFSET(TI_cpu, thread_info, cpu);
 	OFFSET(TI_preempt_count, thread_info, preempt_count);
diff --git a/arch/blackfin/include/asm/thread_info.h b/arch/blackfin/include/asm/thread_info.h
index 57c3a8bd583d..2966b93850a1 100644
--- a/arch/blackfin/include/asm/thread_info.h
+++ b/arch/blackfin/include/asm/thread_info.h
@@ -37,7 +37,6 @@ typedef unsigned long mm_segment_t;
 
 struct thread_info {
 	struct task_struct *task;	/* main task structure */
-	struct exec_domain *exec_domain;	/* execution domain */
 	unsigned long flags;	/* low level flags */
 	int cpu;		/* cpu we're on */
 	int preempt_count;	/* 0 => preemptable, <0 => BUG */
@@ -53,7 +52,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
-	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
 	.preempt_count	= INIT_PREEMPT_COUNT,	\
@@ -76,15 +74,6 @@ static inline struct thread_info *current_thread_info(void)
 #endif				/* __ASSEMBLY__ */
 
 /*
- * Offsets in thread_info structure, used in assembly code
- */
-#define TI_TASK		0
-#define TI_EXECDOMAIN	4
-#define TI_FLAGS	8
-#define TI_CPU		12
-#define TI_PREEMPT	16
-
-/*
  * thread information flag bit numbers
  */
 #define TIF_SYSCALL_TRACE	0	/* syscall trace active */
diff --git a/arch/blackfin/kernel/asm-offsets.c b/arch/blackfin/kernel/asm-offsets.c
index 37fcae950216..486560aea050 100644
--- a/arch/blackfin/kernel/asm-offsets.c
+++ b/arch/blackfin/kernel/asm-offsets.c
@@ -42,6 +42,12 @@ int main(void)
 	DEFINE(THREAD_PC, offsetof(struct thread_struct, pc));
 	DEFINE(KERNEL_STACK_SIZE, THREAD_SIZE);
 
+	/* offsets in thread_info struct */
+	OFFSET(TI_TASK, thread_info, task);
+	OFFSET(TI_FLAGS, thread_info, flags);
+	OFFSET(TI_CPU, thread_info, cpu);
+	OFFSET(TI_PREEMPT, thread_info, preempt_count);
+
 	/* offsets into the pt_regs */
 	DEFINE(PT_ORIG_R0, offsetof(struct pt_regs, orig_r0));
 	DEFINE(PT_ORIG_P0, offsetof(struct pt_regs, orig_p0));
diff --git a/arch/blackfin/kernel/signal.c b/arch/blackfin/kernel/signal.c
index f2a8b5493bd3..ea570db598e5 100644
--- a/arch/blackfin/kernel/signal.c
+++ b/arch/blackfin/kernel/signal.c
@@ -151,11 +151,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
 
 	frame = get_sigframe(ksig, sizeof(*frame));
 
-	err |= __put_user((current_thread_info()->exec_domain
-			   && current_thread_info()->exec_domain->signal_invmap
-			   && ksig->sig < 32
-			   ? current_thread_info()->exec_domain->
-			   signal_invmap[ksig->sig] : ksig->sig), &frame->sig);
+	err |= __put_user(ksig->sig, &frame->sig);
 
 	err |= __put_user(&frame->info, &frame->pinfo);
 	err |= __put_user(&frame->uc, &frame->puc);
diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c
index de5c2c3ebd9b..1ed85ddadc0d 100644
--- a/arch/blackfin/kernel/traps.c
+++ b/arch/blackfin/kernel/traps.c
@@ -18,6 +18,7 @@
 #include <asm/fixed_code.h>
 #include <asm/pseudo_instructions.h>
 #include <asm/pda.h>
+#include <asm/asm-offsets.h>
 
 #ifdef CONFIG_KGDB
 # include <linux/kgdb.h>
diff --git a/arch/blackfin/mach-bf561/smp.c b/arch/blackfin/mach-bf561/smp.c
index 11789beca75a..8c0c80fd1a45 100644
--- a/arch/blackfin/mach-bf561/smp.c
+++ b/arch/blackfin/mach-bf561/smp.c
@@ -124,7 +124,7 @@ void platform_send_ipi(cpumask_t callmap, int irq)
 	unsigned int cpu;
 	int offset = (irq == IRQ_SUPPLE_0) ? 6 : 8;
 
-	for_each_cpu_mask(cpu, callmap) {
+	for_each_cpu(cpu, &callmap) {
 		BUG_ON(cpu >= 2);
 		SSYNC();
 		bfin_write_SICB_SYSCR(bfin_read_SICB_SYSCR() | (1 << (offset + cpu)));
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 8ad3e90cc8fc..1c7259597395 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -413,16 +413,14 @@ int __cpu_disable(void)
 	return 0;
 }
 
-static DECLARE_COMPLETION(cpu_killed);
-
 int __cpu_die(unsigned int cpu)
 {
-	return wait_for_completion_timeout(&cpu_killed, 5000);
+	return cpu_wait_death(cpu, 5);
 }
 
 void cpu_die(void)
 {
-	complete(&cpu_killed);
+	(void)cpu_report_death();
 
 	atomic_dec(&init_mm.mm_users);
 	atomic_dec(&init_mm.mm_count);
diff --git a/arch/c6x/Makefile b/arch/c6x/Makefile
index e72eb3417239..6b0be670ddfa 100644
--- a/arch/c6x/Makefile
+++ b/arch/c6x/Makefile
@@ -8,7 +8,7 @@
 
 KBUILD_DEFCONFIG := dsk6455_defconfig
 
-cflags-y += -mno-dsbt -msdata=none
+cflags-y += -mno-dsbt -msdata=none -D__linux__
 
 cflags-$(CONFIG_C6X_BIG_KERNEL) += -mlong-calls
 
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index 2de73391b81e..ae0a51f5376c 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -41,6 +41,7 @@ generic-y += resource.h
 generic-y += scatterlist.h
 generic-y += segment.h
 generic-y += sembuf.h
+generic-y += serial.h
 generic-y += shmbuf.h
 generic-y += shmparam.h
 generic-y += siginfo.h
diff --git a/arch/c6x/include/asm/dma-mapping.h b/arch/c6x/include/asm/dma-mapping.h
index 88bd0d899bdb..bbd7774e4d4e 100644
--- a/arch/c6x/include/asm/dma-mapping.h
+++ b/arch/c6x/include/asm/dma-mapping.h
@@ -17,6 +17,14 @@
 
 #define dma_supported(d, m)	1
 
+static inline void dma_sync_single_range_for_device(struct device *dev,
+						    dma_addr_t addr,
+						    unsigned long offset,
+						    size_t size,
+						    enum dma_data_direction dir)
+{
+}
+
 static inline int dma_set_mask(struct device *dev, u64 dma_mask)
 {
 	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
diff --git a/arch/c6x/include/asm/flat.h b/arch/c6x/include/asm/flat.h
new file mode 100644
index 000000000000..a1858bd5f6c8
--- /dev/null
+++ b/arch/c6x/include/asm/flat.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_C6X_FLAT_H
+#define __ASM_C6X_FLAT_H
+
+#define flat_argvp_envp_on_stack()			0
+#define flat_old_ram_flag(flags)			(flags)
+#define flat_reloc_valid(reloc, size)			((reloc) <= (size))
+#define flat_get_addr_from_rp(rp, relval, flags, p)	get_unaligned(rp)
+#define flat_put_addr_at_rp(rp, val, relval)		put_unaligned(val, rp)
+#define flat_get_relocate_addr(rel)			(rel)
+#define flat_set_persistent(relval, p)			0
+
+#endif /* __ASM_C6X_FLAT_H */
diff --git a/arch/c6x/include/asm/pgtable.h b/arch/c6x/include/asm/pgtable.h
index 78d4483ba40c..ec4db6df5e0d 100644
--- a/arch/c6x/include/asm/pgtable.h
+++ b/arch/c6x/include/asm/pgtable.h
@@ -67,6 +67,11 @@ extern unsigned long empty_zero_page;
  */
 #define pgtable_cache_init()   do { } while (0)
 
+/*
+ * c6x is !MMU, so define the simpliest implementation
+ */
+#define pgprot_writecombine pgprot_noncached
+
 #include <asm-generic/pgtable.h>
 
 #endif /* _ASM_C6X_PGTABLE_H */
diff --git a/arch/c6x/include/asm/setup.h b/arch/c6x/include/asm/setup.h
index 696804475f55..852afb209afb 100644
--- a/arch/c6x/include/asm/setup.h
+++ b/arch/c6x/include/asm/setup.h
@@ -12,6 +12,7 @@
 #define _ASM_C6X_SETUP_H
 
 #include <uapi/asm/setup.h>
+#include <linux/types.h>
 
 #ifndef __ASSEMBLY__
 extern int c6x_add_memory(phys_addr_t start, unsigned long size);
diff --git a/arch/c6x/include/asm/thread_info.h b/arch/c6x/include/asm/thread_info.h
index 584e253f3217..acc70c135ab8 100644
--- a/arch/c6x/include/asm/thread_info.h
+++ b/arch/c6x/include/asm/thread_info.h
@@ -40,7 +40,6 @@ typedef struct {
  */
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
 	unsigned long		flags;		/* low level flags */
 	int			cpu;		/* cpu we're on */
 	int			preempt_count;	/* 0 = preemptable, <0 = BUG */
@@ -55,7 +54,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
-	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
 	.preempt_count	= INIT_PREEMPT_COUNT,	\
diff --git a/arch/c6x/kernel/process.c b/arch/c6x/kernel/process.c
index 57d2ea8d1977..3ae9f5a166a0 100644
--- a/arch/c6x/kernel/process.c
+++ b/arch/c6x/kernel/process.c
@@ -101,7 +101,6 @@ void start_thread(struct pt_regs *regs, unsigned int pc, unsigned long usp)
 	 */
 	usp -= 8;
 
-	set_fs(USER_DS);
 	regs->pc  = pc;
 	regs->sp  = usp;
 	regs->tsr |= 0x40; /* set user mode */
diff --git a/arch/c6x/kernel/setup.c b/arch/c6x/kernel/setup.c
index 757128868d43..72e17f7ebd6f 100644
--- a/arch/c6x/kernel/setup.c
+++ b/arch/c6x/kernel/setup.c
@@ -26,7 +26,8 @@
 #include <linux/cpu.h>
 #include <linux/fs.h>
 #include <linux/of.h>
-
+#include <linux/console.h>
+#include <linux/screen_info.h>
 
 #include <asm/sections.h>
 #include <asm/div64.h>
@@ -38,6 +39,8 @@
 
 static const char *c6x_soc_name;
 
+struct screen_info screen_info;
+
 int c6x_num_cores;
 EXPORT_SYMBOL_GPL(c6x_num_cores);
 
@@ -60,6 +63,7 @@ unsigned char c6x_fuse_mac[6];
 
 unsigned long memory_start;
 unsigned long memory_end;
+EXPORT_SYMBOL(memory_end);
 
 unsigned long ram_start;
 unsigned long ram_end;
@@ -265,8 +269,8 @@ int __init c6x_add_memory(phys_addr_t start, unsigned long size)
  */
 notrace void __init machine_init(unsigned long dt_ptr)
 {
-	const void *dtb = __va(dt_ptr);
-	const void *fdt = _fdt_start;
+	void *dtb = __va(dt_ptr);
+	void *fdt = _fdt_start;
 
 	/* interrupts must be masked */
 	set_creg(IER, 2);
diff --git a/arch/c6x/kernel/time.c b/arch/c6x/kernel/time.c
index 356ee84cad95..04845aaf5985 100644
--- a/arch/c6x/kernel/time.c
+++ b/arch/c6x/kernel/time.c
@@ -49,7 +49,7 @@ u64 sched_clock(void)
 	return (tsc * sched_clock_multiplier) >> SCHED_CLOCK_SHIFT;
 }
 
-void time_init(void)
+void __init time_init(void)
 {
 	u64 tmp = (u64)NSEC_PER_SEC << SCHED_CLOCK_SHIFT;
 
diff --git a/arch/c6x/platforms/cache.c b/arch/c6x/platforms/cache.c
index 86318a16a252..46fd2d530271 100644
--- a/arch/c6x/platforms/cache.c
+++ b/arch/c6x/platforms/cache.c
@@ -350,6 +350,7 @@ void L1P_cache_block_invalidate(unsigned int start, unsigned int end)
 			      (unsigned int *) end,
 			      IMCR_L1PIBAR, IMCR_L1PIWC);
 }
+EXPORT_SYMBOL(L1P_cache_block_invalidate);
 
 void L1D_cache_block_invalidate(unsigned int start, unsigned int end)
 {
@@ -371,6 +372,7 @@ void L1D_cache_block_writeback(unsigned int start, unsigned int end)
 			      (unsigned int *) end,
 			      IMCR_L1DWBAR, IMCR_L1DWWC);
 }
+EXPORT_SYMBOL(L1D_cache_block_writeback);
 
 /*
  *  L2 block operations
diff --git a/arch/cris/arch-v10/kernel/fasttimer.c b/arch/cris/arch-v10/kernel/fasttimer.c
index 48a59afbeeb1..e9298739d72e 100644
--- a/arch/cris/arch-v10/kernel/fasttimer.c
+++ b/arch/cris/arch-v10/kernel/fasttimer.c
@@ -527,7 +527,8 @@ static int proc_fasttimer_show(struct seq_file *m, void *v)
 			i = debug_log_cnt;
 
 		while (i != end_i || debug_log_cnt_wrapped) {
-			if (seq_printf(m, debug_log_string[i], debug_log_value[i]) < 0)
+			seq_printf(m, debug_log_string[i], debug_log_value[i]);
+			if (seq_has_overflowed(m))
 				return 0;
 			i = (i+1) % DEBUG_LOG_MAX;
 		}
@@ -542,24 +543,22 @@ static int proc_fasttimer_show(struct seq_file *m, void *v)
 		int cur = (fast_timers_started - i - 1) % NUM_TIMER_STATS;
 
 #if 1 //ndef FAST_TIMER_LOG
-		seq_printf(m, "div: %i freq: %i delay: %i"
-			   "\n",
+		seq_printf(m, "div: %i freq: %i delay: %i\n",
 			   timer_div_settings[cur],
 			   timer_freq_settings[cur],
 			   timer_delay_settings[cur]);
 #endif
 #ifdef FAST_TIMER_LOG
 		t = &timer_started_log[cur];
-		if (seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu "
-			       "d: %6li us data: 0x%08lX"
-			       "\n",
-			       t->name,
-			       (unsigned long)t->tv_set.tv_jiff,
-			       (unsigned long)t->tv_set.tv_usec,
-			       (unsigned long)t->tv_expires.tv_jiff,
-			       (unsigned long)t->tv_expires.tv_usec,
-			       t->delay_us,
-			       t->data) < 0)
+		seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu d: %6li us data: 0x%08lX\n",
+			   t->name,
+			   (unsigned long)t->tv_set.tv_jiff,
+			   (unsigned long)t->tv_set.tv_usec,
+			   (unsigned long)t->tv_expires.tv_jiff,
+			   (unsigned long)t->tv_expires.tv_usec,
+			   t->delay_us,
+			   t->data);
+		if (seq_has_overflowed(m))
 			return 0;
 #endif
 	}
@@ -571,16 +570,15 @@ static int proc_fasttimer_show(struct seq_file *m, void *v)
 	seq_printf(m, "Timers added: %i\n", fast_timers_added);
 	for (i = 0; i < num_to_show; i++) {
 		t = &timer_added_log[(fast_timers_added - i - 1) % NUM_TIMER_STATS];
-		if (seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu "
-			       "d: %6li us data: 0x%08lX"
-			       "\n",
-			       t->name,
-			       (unsigned long)t->tv_set.tv_jiff,
-			       (unsigned long)t->tv_set.tv_usec,
-			       (unsigned long)t->tv_expires.tv_jiff,
-			       (unsigned long)t->tv_expires.tv_usec,
-			       t->delay_us,
-			       t->data) < 0)
+		seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu d: %6li us data: 0x%08lX\n",
+			   t->name,
+			   (unsigned long)t->tv_set.tv_jiff,
+			   (unsigned long)t->tv_set.tv_usec,
+			   (unsigned long)t->tv_expires.tv_jiff,
+			   (unsigned long)t->tv_expires.tv_usec,
+			   t->delay_us,
+			   t->data);
+		if (seq_has_overflowed(m))
 			return 0;
 	}
 	seq_putc(m, '\n');
@@ -590,16 +588,15 @@ static int proc_fasttimer_show(struct seq_file *m, void *v)
 	seq_printf(m, "Timers expired: %i\n", fast_timers_expired);
 	for (i = 0; i < num_to_show; i++) {
 		t = &timer_expired_log[(fast_timers_expired - i - 1) % NUM_TIMER_STATS];
-		if (seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu "
-			       "d: %6li us data: 0x%08lX"
-			       "\n",
-			       t->name,
-			       (unsigned long)t->tv_set.tv_jiff,
-			       (unsigned long)t->tv_set.tv_usec,
-			       (unsigned long)t->tv_expires.tv_jiff,
-			       (unsigned long)t->tv_expires.tv_usec,
-			       t->delay_us,
-			       t->data) < 0)
+		seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu d: %6li us data: 0x%08lX\n",
+			   t->name,
+			   (unsigned long)t->tv_set.tv_jiff,
+			   (unsigned long)t->tv_set.tv_usec,
+			   (unsigned long)t->tv_expires.tv_jiff,
+			   (unsigned long)t->tv_expires.tv_usec,
+			   t->delay_us,
+			   t->data);
+		if (seq_has_overflowed(m))
 			return 0;
 	}
 	seq_putc(m, '\n');
@@ -611,19 +608,15 @@ static int proc_fasttimer_show(struct seq_file *m, void *v)
 	while (t) {
 		nextt = t->next;
 		local_irq_restore(flags);
-		if (seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu "
-			       "d: %6li us data: 0x%08lX"
-/*                      " func: 0x%08lX" */
-			       "\n",
-			       t->name,
-			       (unsigned long)t->tv_set.tv_jiff,
-			       (unsigned long)t->tv_set.tv_usec,
-			       (unsigned long)t->tv_expires.tv_jiff,
-			       (unsigned long)t->tv_expires.tv_usec,
-			       t->delay_us,
-			       t->data
-/*                      , t->function */
-			       ) < 0)
+		seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu d: %6li us data: 0x%08lX\n",
+			   t->name,
+			   (unsigned long)t->tv_set.tv_jiff,
+			   (unsigned long)t->tv_set.tv_usec,
+			   (unsigned long)t->tv_expires.tv_jiff,
+			   (unsigned long)t->tv_expires.tv_usec,
+			   t->delay_us,
+			   t->data);
+		if (seq_has_overflowed(m))
 			return 0;
 		local_irq_save(flags);
 		if (t->next != nextt)
diff --git a/arch/cris/arch-v10/kernel/setup.c b/arch/cris/arch-v10/kernel/setup.c
index 4f96d71b5154..7ab31f1c7540 100644
--- a/arch/cris/arch-v10/kernel/setup.c
+++ b/arch/cris/arch-v10/kernel/setup.c
@@ -63,35 +63,37 @@ int show_cpuinfo(struct seq_file *m, void *v)
 	else
 		info = &cpu_info[revision];
 
-	return seq_printf(m,
-		       "processor\t: 0\n" 
-		       "cpu\t\t: CRIS\n"
-		       "cpu revision\t: %lu\n"
-		       "cpu model\t: %s\n"
-		       "cache size\t: %d kB\n"
-		       "fpu\t\t: %s\n"
-		       "mmu\t\t: %s\n"
-		       "mmu DMA bug\t: %s\n"
-		       "ethernet\t: %s Mbps\n"
-		       "token ring\t: %s\n"
-		       "scsi\t\t: %s\n"
-		       "ata\t\t: %s\n"
-		       "usb\t\t: %s\n"
-		       "bogomips\t: %lu.%02lu\n",
+	seq_printf(m,
+		   "processor\t: 0\n"
+		   "cpu\t\t: CRIS\n"
+		   "cpu revision\t: %lu\n"
+		   "cpu model\t: %s\n"
+		   "cache size\t: %d kB\n"
+		   "fpu\t\t: %s\n"
+		   "mmu\t\t: %s\n"
+		   "mmu DMA bug\t: %s\n"
+		   "ethernet\t: %s Mbps\n"
+		   "token ring\t: %s\n"
+		   "scsi\t\t: %s\n"
+		   "ata\t\t: %s\n"
+		   "usb\t\t: %s\n"
+		   "bogomips\t: %lu.%02lu\n",
 
-		       revision,
-		       info->model,
-		       info->cache,
-		       info->flags & HAS_FPU ? "yes" : "no",
-		       info->flags & HAS_MMU ? "yes" : "no",
-		       info->flags & HAS_MMU_BUG ? "yes" : "no",
-		       info->flags & HAS_ETHERNET100 ? "10/100" : "10",
-		       info->flags & HAS_TOKENRING ? "4/16 Mbps" : "no",
-		       info->flags & HAS_SCSI ? "yes" : "no",
-		       info->flags & HAS_ATA ? "yes" : "no",
-		       info->flags & HAS_USB ? "yes" : "no",
-		       (loops_per_jiffy * HZ + 500) / 500000,
-		       ((loops_per_jiffy * HZ + 500) / 5000) % 100);
+		   revision,
+		   info->model,
+		   info->cache,
+		   info->flags & HAS_FPU ? "yes" : "no",
+		   info->flags & HAS_MMU ? "yes" : "no",
+		   info->flags & HAS_MMU_BUG ? "yes" : "no",
+		   info->flags & HAS_ETHERNET100 ? "10/100" : "10",
+		   info->flags & HAS_TOKENRING ? "4/16 Mbps" : "no",
+		   info->flags & HAS_SCSI ? "yes" : "no",
+		   info->flags & HAS_ATA ? "yes" : "no",
+		   info->flags & HAS_USB ? "yes" : "no",
+		   (loops_per_jiffy * HZ + 500) / 500000,
+		   ((loops_per_jiffy * HZ + 500) / 5000) % 100);
+
+	return 0;
 }
 
 #endif /* CONFIG_PROC_FS */
diff --git a/arch/cris/arch-v10/kernel/signal.c b/arch/cris/arch-v10/kernel/signal.c
index 74d7ba35120d..7122d9773b13 100644
--- a/arch/cris/arch-v10/kernel/signal.c
+++ b/arch/cris/arch-v10/kernel/signal.c
@@ -321,8 +321,6 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	if (err)
 		return -EFAULT;
 
-	/* TODO what is the current->exec_domain stuff and invmap ? */
-
 	/* Set up registers for signal handler */
 
 	/* What we enter NOW   */
diff --git a/arch/cris/arch-v32/kernel/fasttimer.c b/arch/cris/arch-v32/kernel/fasttimer.c
index b130c2c5fdd8..5c84dbb99f30 100644
--- a/arch/cris/arch-v32/kernel/fasttimer.c
+++ b/arch/cris/arch-v32/kernel/fasttimer.c
@@ -501,7 +501,8 @@ static int proc_fasttimer_show(struct seq_file *m, void *v)
 			i = debug_log_cnt;
 
 		while ((i != end_i || debug_log_cnt_wrapped)) {
-			if (seq_printf(m, debug_log_string[i], debug_log_value[i]) < 0)
+			seq_printf(m, debug_log_string[i], debug_log_value[i]);
+			if (seq_has_overflowed(m))
 				return 0;
 			i = (i+1) % DEBUG_LOG_MAX;
 		}
@@ -516,23 +517,21 @@ static int proc_fasttimer_show(struct seq_file *m, void *v)
 		int cur = (fast_timers_started - i - 1) % NUM_TIMER_STATS;
 
 #if 1 //ndef FAST_TIMER_LOG
-		seq_printf(m, "div: %i delay: %i"
-			   "\n",
+		seq_printf(m, "div: %i delay: %i\n",
 			   timer_div_settings[cur],
 			   timer_delay_settings[cur]);
 #endif
 #ifdef FAST_TIMER_LOG
 		t = &timer_started_log[cur];
-		if (seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu "
-			       "d: %6li us data: 0x%08lX"
-			       "\n",
-			       t->name,
-			       (unsigned long)t->tv_set.tv_jiff,
-			       (unsigned long)t->tv_set.tv_usec,
-			       (unsigned long)t->tv_expires.tv_jiff,
-			       (unsigned long)t->tv_expires.tv_usec,
-			       t->delay_us,
-			       t->data) < 0)
+		seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu d: %6li us data: 0x%08lX\n",
+			   t->name,
+			   (unsigned long)t->tv_set.tv_jiff,
+			   (unsigned long)t->tv_set.tv_usec,
+			   (unsigned long)t->tv_expires.tv_jiff,
+			   (unsigned long)t->tv_expires.tv_usec,
+			   t->delay_us,
+			   t->data);
+		if (seq_has_overflowed(m))
 			return 0;
 #endif
 	}
@@ -544,16 +543,15 @@ static int proc_fasttimer_show(struct seq_file *m, void *v)
 	seq_printf(m, "Timers added: %i\n", fast_timers_added);
 	for (i = 0; i < num_to_show; i++) {
 		t = &timer_added_log[(fast_timers_added - i - 1) % NUM_TIMER_STATS];
-		if (seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu "
-			       "d: %6li us data: 0x%08lX"
-			       "\n",
-			       t->name,
-			       (unsigned long)t->tv_set.tv_jiff,
-			       (unsigned long)t->tv_set.tv_usec,
-			       (unsigned long)t->tv_expires.tv_jiff,
-			       (unsigned long)t->tv_expires.tv_usec,
-			       t->delay_us,
-			       t->data) < 0)
+		seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu d: %6li us data: 0x%08lX\n",
+			   t->name,
+			   (unsigned long)t->tv_set.tv_jiff,
+			   (unsigned long)t->tv_set.tv_usec,
+			   (unsigned long)t->tv_expires.tv_jiff,
+			   (unsigned long)t->tv_expires.tv_usec,
+			   t->delay_us,
+			   t->data);
+		if (seq_has_overflowed(m))
 			return 0;
 	}
 	seq_putc(m, '\n');
@@ -563,16 +561,15 @@ static int proc_fasttimer_show(struct seq_file *m, void *v)
 	seq_printf(m, "Timers expired: %i\n", fast_timers_expired);
 	for (i = 0; i < num_to_show; i++){
 		t = &timer_expired_log[(fast_timers_expired - i - 1) % NUM_TIMER_STATS];
-		if (seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu "
-			       "d: %6li us data: 0x%08lX"
-			       "\n",
-			       t->name,
-			       (unsigned long)t->tv_set.tv_jiff,
-			       (unsigned long)t->tv_set.tv_usec,
-			       (unsigned long)t->tv_expires.tv_jiff,
-			       (unsigned long)t->tv_expires.tv_usec,
-			       t->delay_us,
-			       t->data) < 0)
+		seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu d: %6li us data: 0x%08lX\n",
+			   t->name,
+			   (unsigned long)t->tv_set.tv_jiff,
+			   (unsigned long)t->tv_set.tv_usec,
+			   (unsigned long)t->tv_expires.tv_jiff,
+			   (unsigned long)t->tv_expires.tv_usec,
+			   t->delay_us,
+			   t->data);
+		if (seq_has_overflowed(m))
 			return 0;
 	}
 	seq_putc(m, '\n');
@@ -584,19 +581,15 @@ static int proc_fasttimer_show(struct seq_file *m, void *v)
 	while (t != NULL){
 		nextt = t->next;
 		local_irq_restore(flags);
-		if (seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu "
-			       "d: %6li us data: 0x%08lX"
-/*			" func: 0x%08lX" */
-			       "\n",
-			       t->name,
-			       (unsigned long)t->tv_set.tv_jiff,
-			       (unsigned long)t->tv_set.tv_usec,
-			       (unsigned long)t->tv_expires.tv_jiff,
-			       (unsigned long)t->tv_expires.tv_usec,
-			       t->delay_us,
-			       t->data
-/*                      , t->function */
-			       ) < 0)
+		seq_printf(m, "%-14s s: %6lu.%06lu e: %6lu.%06lu d: %6li us data: 0x%08lX\n",
+			   t->name,
+			   (unsigned long)t->tv_set.tv_jiff,
+			   (unsigned long)t->tv_set.tv_usec,
+			   (unsigned long)t->tv_expires.tv_jiff,
+			   (unsigned long)t->tv_expires.tv_usec,
+			   t->delay_us,
+			   t->data);
+		if (seq_has_overflowed(m))
 			return 0;
 		local_irq_save(flags);
 		if (t->next != nextt)
diff --git a/arch/cris/arch-v32/kernel/setup.c b/arch/cris/arch-v32/kernel/setup.c
index 61e10ae65296..81715c683baf 100644
--- a/arch/cris/arch-v32/kernel/setup.c
+++ b/arch/cris/arch-v32/kernel/setup.c
@@ -77,36 +77,38 @@ int show_cpuinfo(struct seq_file *m, void *v)
 		}
 	}
 
-	return seq_printf(m,
-		"processor\t: %d\n"
-		"cpu\t\t: CRIS\n"
-		"cpu revision\t: %lu\n"
-		"cpu model\t: %s\n"
-		"cache size\t: %d KB\n"
-		"fpu\t\t: %s\n"
-		"mmu\t\t: %s\n"
-		"mmu DMA bug\t: %s\n"
-		"ethernet\t: %s Mbps\n"
-		"token ring\t: %s\n"
-		"scsi\t\t: %s\n"
-		"ata\t\t: %s\n"
-		"usb\t\t: %s\n"
-		"bogomips\t: %lu.%02lu\n\n",
-
-		cpu,
-		revision,
-		info->cpu_model,
-		info->cache_size,
-		info->flags & HAS_FPU ? "yes" : "no",
-		info->flags & HAS_MMU ? "yes" : "no",
-		info->flags & HAS_MMU_BUG ? "yes" : "no",
-		info->flags & HAS_ETHERNET100 ? "10/100" : "10",
-		info->flags & HAS_TOKENRING ? "4/16 Mbps" : "no",
-		info->flags & HAS_SCSI ? "yes" : "no",
-		info->flags & HAS_ATA ? "yes" : "no",
-		info->flags & HAS_USB ? "yes" : "no",
-		(loops_per_jiffy * HZ + 500) / 500000,
-		((loops_per_jiffy * HZ + 500) / 5000) % 100);
+	seq_printf(m,
+		   "processor\t: %d\n"
+		   "cpu\t\t: CRIS\n"
+		   "cpu revision\t: %lu\n"
+		   "cpu model\t: %s\n"
+		   "cache size\t: %d KB\n"
+		   "fpu\t\t: %s\n"
+		   "mmu\t\t: %s\n"
+		   "mmu DMA bug\t: %s\n"
+		   "ethernet\t: %s Mbps\n"
+		   "token ring\t: %s\n"
+		   "scsi\t\t: %s\n"
+		   "ata\t\t: %s\n"
+		   "usb\t\t: %s\n"
+		   "bogomips\t: %lu.%02lu\n\n",
+
+		   cpu,
+		   revision,
+		   info->cpu_model,
+		   info->cache_size,
+		   info->flags & HAS_FPU ? "yes" : "no",
+		   info->flags & HAS_MMU ? "yes" : "no",
+		   info->flags & HAS_MMU_BUG ? "yes" : "no",
+		   info->flags & HAS_ETHERNET100 ? "10/100" : "10",
+		   info->flags & HAS_TOKENRING ? "4/16 Mbps" : "no",
+		   info->flags & HAS_SCSI ? "yes" : "no",
+		   info->flags & HAS_ATA ? "yes" : "no",
+		   info->flags & HAS_USB ? "yes" : "no",
+		   (loops_per_jiffy * HZ + 500) / 500000,
+		   ((loops_per_jiffy * HZ + 500) / 5000) % 100);
+
+	return 0;
 }
 
 #endif /* CONFIG_PROC_FS */
diff --git a/arch/cris/arch-v32/kernel/signal.c b/arch/cris/arch-v32/kernel/signal.c
index 870e3e069318..0c9ce9eac614 100644
--- a/arch/cris/arch-v32/kernel/signal.c
+++ b/arch/cris/arch-v32/kernel/signal.c
@@ -287,8 +287,6 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		return -EFAULT;
 
-	/* TODO: what is the current->exec_domain stuff and invmap ? */
-
 	err |= __put_user(&frame->info, &frame->pinfo);
 	err |= __put_user(&frame->uc, &frame->puc);
 	err |= copy_siginfo_to_user(&frame->info, &ksig->info);
diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h
index 7286db5ed90e..4ead1b40d2d7 100644
--- a/arch/cris/include/asm/thread_info.h
+++ b/arch/cris/include/asm/thread_info.h
@@ -28,7 +28,6 @@
 #ifndef __ASSEMBLY__
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
 	unsigned long		flags;		/* low level flags */
 	__u32			cpu;		/* current CPU */
 	int			preempt_count;	/* 0 => preemptable, <0 => BUG */
@@ -50,7 +49,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)				\
 {							\
 	.task		= &tsk,				\
-	.exec_domain	= &default_exec_domain,		\
 	.flags		= 0,				\
 	.cpu		= 0,				\
 	.preempt_count	= INIT_PREEMPT_COUNT,		\
diff --git a/arch/frv/include/asm/thread_info.h b/arch/frv/include/asm/thread_info.h
index 6b917f1c2955..ccba3b6ce918 100644
--- a/arch/frv/include/asm/thread_info.h
+++ b/arch/frv/include/asm/thread_info.h
@@ -31,7 +31,6 @@
 
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
 	unsigned long		flags;		/* low level flags */
 	unsigned long		status;		/* thread-synchronous flags */
 	__u32			cpu;		/* current CPU */
@@ -59,7 +58,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
-	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
 	.preempt_count	= INIT_PREEMPT_COUNT,	\
diff --git a/arch/frv/kernel/asm-offsets.c b/arch/frv/kernel/asm-offsets.c
index 446e89d500cc..8414293f213a 100644
--- a/arch/frv/kernel/asm-offsets.c
+++ b/arch/frv/kernel/asm-offsets.c
@@ -34,7 +34,6 @@ void foo(void)
 {
 	/* offsets into the thread_info structure */
 	OFFSET(TI_TASK,			thread_info, task);
-	OFFSET(TI_EXEC_DOMAIN,		thread_info, exec_domain);
 	OFFSET(TI_FLAGS,		thread_info, flags);
 	OFFSET(TI_STATUS,		thread_info, status);
 	OFFSET(TI_CPU,			thread_info, cpu);
diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c
index 336713ab4745..82d5e914dc15 100644
--- a/arch/frv/kernel/signal.c
+++ b/arch/frv/kernel/signal.c
@@ -174,22 +174,14 @@ static inline void __user *get_sigframe(struct ksignal *ksig,
 static int setup_frame(struct ksignal *ksig, sigset_t *set)
 {
 	struct sigframe __user *frame;
-	int rsig, sig = ksig->sig;
-
-	set_fs(USER_DS);
+	int sig = ksig->sig;
 
 	frame = get_sigframe(ksig, sizeof(*frame));
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		return -EFAULT;
 
-	rsig = sig;
-	if (sig < 32 &&
-	    __current_thread_info->exec_domain &&
-	    __current_thread_info->exec_domain->signal_invmap)
-		rsig = __current_thread_info->exec_domain->signal_invmap[sig];
-
-	if (__put_user(rsig, &frame->sig) < 0)
+	if (__put_user(sig, &frame->sig) < 0)
 		return -EFAULT;
 
 	if (setup_sigcontext(&frame->sc, set->sig[0]))
@@ -255,22 +247,14 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set)
 static int setup_rt_frame(struct ksignal *ksig, sigset_t *set)
 {
 	struct rt_sigframe __user *frame;
-	int rsig, sig = ksig->sig;
-
-	set_fs(USER_DS);
+	int sig = ksig->sig;
 
 	frame = get_sigframe(ksig, sizeof(*frame));
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		return -EFAULT;
 
-	rsig = sig;
-	if (sig < 32 &&
-	    __current_thread_info->exec_domain &&
-	    __current_thread_info->exec_domain->signal_invmap)
-		rsig = __current_thread_info->exec_domain->signal_invmap[sig];
-
-	if (__put_user(rsig,		&frame->sig) ||
+	if (__put_user(sig,		&frame->sig) ||
 	    __put_user(&frame->info,	&frame->pinfo) ||
 	    __put_user(&frame->uc,	&frame->puc))
 		return -EFAULT;
diff --git a/arch/frv/mb93090-mb00/pci-vdk.c b/arch/frv/mb93090-mb00/pci-vdk.c
index b073f4d771a5..f211839e2cae 100644
--- a/arch/frv/mb93090-mb00/pci-vdk.c
+++ b/arch/frv/mb93090-mb00/pci-vdk.c
@@ -316,6 +316,7 @@ void pcibios_fixup_bus(struct pci_bus *bus)
 
 int __init pcibios_init(void)
 {
+	struct pci_bus *bus;
 	struct pci_ops *dir = NULL;
 	LIST_HEAD(resources);
 
@@ -383,12 +384,15 @@ int __init pcibios_init(void)
 	printk("PCI: Probing PCI hardware\n");
 	pci_add_resource(&resources, &pci_ioport_resource);
 	pci_add_resource(&resources, &pci_iomem_resource);
-	pci_scan_root_bus(NULL, 0, pci_root_ops, NULL, &resources);
+	bus = pci_scan_root_bus(NULL, 0, pci_root_ops, NULL, &resources);
 
 	pcibios_irq_init();
 	pcibios_fixup_irqs();
 	pcibios_resource_survey();
+	if (!bus)
+		return 0;
 
+	pci_bus_add_devices(bus);
 	return 0;
 }
 
diff --git a/arch/hexagon/include/asm/thread_info.h b/arch/hexagon/include/asm/thread_info.h
index bacd3d6030c5..b80fe1db7b64 100644
--- a/arch/hexagon/include/asm/thread_info.h
+++ b/arch/hexagon/include/asm/thread_info.h
@@ -47,7 +47,6 @@ typedef struct {
 
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
-	struct exec_domain      *exec_domain;   /* execution domain */
 	unsigned long		flags;          /* low level flags */
 	__u32                   cpu;            /* current cpu */
 	int                     preempt_count;  /* 0=>preemptible,<0=>BUG */
@@ -77,7 +76,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)                   \
 {                                               \
 	.task           = &tsk,                 \
-	.exec_domain    = &default_exec_domain, \
 	.flags          = 0,                    \
 	.cpu            = 0,                    \
 	.preempt_count  = 1,                    \
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index 0a0dd5c05b46..a9ebd471823a 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -37,8 +37,6 @@
  */
 void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
 {
-	/* Set to run with user-mode data segmentation */
-	set_fs(USER_DS);
 	/* We want to zero all data-containing registers. Is this overkill? */
 	memset(regs, 0, sizeof(*regs));
 	/* We might want to also zero all Processor registers here */
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 074e52bf815c..4f9a6661491b 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -1,3 +1,8 @@
+config PGTABLE_LEVELS
+	int "Page Table Levels" if !IA64_PAGE_SIZE_64KB
+	range 3 4 if !IA64_PAGE_SIZE_64KB
+	default 3
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
@@ -286,19 +291,6 @@ config IA64_PAGE_SIZE_64KB
 
 endchoice
 
-choice
-	prompt "Page Table Levels"
-	default PGTABLE_3
-
-config PGTABLE_3
-	bool "3 Levels"
-
-config PGTABLE_4
-	depends on !IA64_PAGE_SIZE_64KB
-	bool "4 Levels"
-
-endchoice
-
 if IA64_HP_SIM
 config HZ
 	default 32
diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h
index a1d91ab4c5ef..aa0fdf125aba 100644
--- a/arch/ia64/include/asm/acpi.h
+++ b/arch/ia64/include/asm/acpi.h
@@ -117,7 +117,7 @@ static inline void arch_acpi_set_pdc_bits(u32 *buf)
 #ifdef CONFIG_ACPI_NUMA
 extern cpumask_t early_cpu_possible_map;
 #define for_each_possible_early_cpu(cpu)  \
-	for_each_cpu_mask((cpu), early_cpu_possible_map)
+	for_each_cpu((cpu), &early_cpu_possible_map)
 
 static inline void per_cpu_scan_finalize(int min_cpus, int reserve_cpus)
 {
@@ -125,13 +125,13 @@ static inline void per_cpu_scan_finalize(int min_cpus, int reserve_cpus)
 	int cpu;
 	int next_nid = 0;
 
-	low_cpu = cpus_weight(early_cpu_possible_map);
+	low_cpu = cpumask_weight(&early_cpu_possible_map);
 
 	high_cpu = max(low_cpu, min_cpus);
 	high_cpu = min(high_cpu + reserve_cpus, NR_CPUS);
 
 	for (cpu = low_cpu; cpu < high_cpu; cpu++) {
-		cpu_set(cpu, early_cpu_possible_map);
+		cpumask_set_cpu(cpu, &early_cpu_possible_map);
 		if (node_cpuid[cpu].nid == NUMA_NO_NODE) {
 			node_cpuid[cpu].nid = next_nid;
 			next_nid++;
diff --git a/arch/ia64/include/asm/page.h b/arch/ia64/include/asm/page.h
index 1f1bf144fe62..ec48bb9f95e1 100644
--- a/arch/ia64/include/asm/page.h
+++ b/arch/ia64/include/asm/page.h
@@ -173,7 +173,7 @@ get_order (unsigned long size)
    */
   typedef struct { unsigned long pte; } pte_t;
   typedef struct { unsigned long pmd; } pmd_t;
-#ifdef CONFIG_PGTABLE_4
+#if CONFIG_PGTABLE_LEVELS == 4
   typedef struct { unsigned long pud; } pud_t;
 #endif
   typedef struct { unsigned long pgd; } pgd_t;
@@ -182,7 +182,7 @@ get_order (unsigned long size)
 
 # define pte_val(x)	((x).pte)
 # define pmd_val(x)	((x).pmd)
-#ifdef CONFIG_PGTABLE_4
+#if CONFIG_PGTABLE_LEVELS == 4
 # define pud_val(x)	((x).pud)
 #endif
 # define pgd_val(x)	((x).pgd)
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
index 5767cdfc08db..f5e70e961948 100644
--- a/arch/ia64/include/asm/pgalloc.h
+++ b/arch/ia64/include/asm/pgalloc.h
@@ -32,7 +32,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	quicklist_free(0, NULL, pgd);
 }
 
-#ifdef CONFIG_PGTABLE_4
+#if CONFIG_PGTABLE_LEVELS == 4
 static inline void
 pgd_populate(struct mm_struct *mm, pgd_t * pgd_entry, pud_t * pud)
 {
@@ -49,7 +49,7 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 	quicklist_free(0, NULL, pud);
 }
 #define __pud_free_tlb(tlb, pud, address)	pud_free((tlb)->mm, pud)
-#endif /* CONFIG_PGTABLE_4 */
+#endif /* CONFIG_PGTABLE_LEVELS == 4 */
 
 static inline void
 pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd)
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 7b6f8801df57..9f3ed9ee8f13 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -99,7 +99,7 @@
 #define PMD_MASK	(~(PMD_SIZE-1))
 #define PTRS_PER_PMD	(1UL << (PTRS_PER_PTD_SHIFT))
 
-#ifdef CONFIG_PGTABLE_4
+#if CONFIG_PGTABLE_LEVELS == 4
 /*
  * Definitions for second level:
  *
@@ -117,7 +117,7 @@
  *
  * PGDIR_SHIFT determines what a first-level page table entry can map.
  */
-#ifdef CONFIG_PGTABLE_4
+#if CONFIG_PGTABLE_LEVELS == 4
 #define PGDIR_SHIFT		(PUD_SHIFT + (PTRS_PER_PTD_SHIFT))
 #else
 #define PGDIR_SHIFT		(PMD_SHIFT + (PTRS_PER_PTD_SHIFT))
@@ -180,7 +180,7 @@
 #define __S111	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RWX)
 
 #define pgd_ERROR(e)	printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e))
-#ifdef CONFIG_PGTABLE_4
+#if CONFIG_PGTABLE_LEVELS == 4
 #define pud_ERROR(e)	printk("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
 #endif
 #define pmd_ERROR(e)	printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
@@ -281,7 +281,7 @@ extern unsigned long VMALLOC_END;
 #define pud_page_vaddr(pud)		((unsigned long) __va(pud_val(pud) & _PFN_MASK))
 #define pud_page(pud)			virt_to_page((pud_val(pud) + PAGE_OFFSET))
 
-#ifdef CONFIG_PGTABLE_4
+#if CONFIG_PGTABLE_LEVELS == 4
 #define pgd_none(pgd)			(!pgd_val(pgd))
 #define pgd_bad(pgd)			(!ia64_phys_addr_valid(pgd_val(pgd)))
 #define pgd_present(pgd)		(pgd_val(pgd) != 0UL)
@@ -384,7 +384,7 @@ pgd_offset (const struct mm_struct *mm, unsigned long address)
    here.  */
 #define pgd_offset_gate(mm, addr)	pgd_offset_k(addr)
 
-#ifdef CONFIG_PGTABLE_4
+#if CONFIG_PGTABLE_LEVELS == 4
 /* Find an entry in the second-level page table.. */
 #define pud_offset(dir,addr) \
 	((pud_t *) pgd_page_vaddr(*(dir)) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
@@ -586,7 +586,7 @@ extern struct page *zero_page_memmap_ptr;
 #define __HAVE_ARCH_PGD_OFFSET_GATE
 
 
-#ifndef CONFIG_PGTABLE_4
+#if CONFIG_PGTABLE_LEVELS == 3
 #include <asm-generic/pgtable-nopud.h>
 #endif
 #include <asm-generic/pgtable.h>
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index c16f21a068ff..aa995b67c3f5 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -20,7 +20,6 @@
  */
 struct thread_info {
 	struct task_struct *task;	/* XXX not really needed, except for dup_task_struct() */
-	struct exec_domain *exec_domain;/* execution domain */
 	__u32 flags;			/* thread_info flags (see TIF_*) */
 	__u32 cpu;			/* current CPU */
 	__u32 last_cpu;			/* Last CPU thread ran on */
@@ -40,7 +39,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
-	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
 	.addr_limit	= KERNEL_DS,		\
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 20678a9ed11a..d68b5cf81e31 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -51,7 +51,7 @@ obj-$(CONFIG_BINFMT_ELF)	+= elfcore.o
 CFLAGS_traps.o  += -mfixed-range=f2-f5,f16-f31
 
 # The gate DSO image is built using a special linker script.
-include $(srctree)/arch/ia64/kernel/Makefile.gate
+include $(src)/Makefile.gate
 # tell compiled for native
 CPPFLAGS_gate.lds += -D__IA64_GATE_PARAVIRTUALIZED_NATIVE
 
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 2c4498919d3c..35bf22cc71b7 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -483,7 +483,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 	    (pa->apic_id << 8) | (pa->local_sapic_eid);
 	/* nid should be overridden as logical node id later */
 	node_cpuid[srat_num_cpus].nid = pxm;
-	cpu_set(srat_num_cpus, early_cpu_possible_map);
+	cpumask_set_cpu(srat_num_cpus, &early_cpu_possible_map);
 	srat_num_cpus++;
 }
 
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index cd44a57c73be..bc9501e36e77 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -690,7 +690,7 @@ skip_numa_setup:
 	do {
 		if (++cpu >= nr_cpu_ids)
 			cpu = 0;
-	} while (!cpu_online(cpu) || !cpu_isset(cpu, domain));
+	} while (!cpu_online(cpu) || !cpumask_test_cpu(cpu, &domain));
 
 	return cpu_physical_id(cpu);
 #else  /* CONFIG_SMP */
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index 698d8fefde6c..eaa3199f98c8 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -109,13 +109,13 @@ static inline int find_unassigned_vector(cpumask_t domain)
 	int pos, vector;
 
 	cpumask_and(&mask, &domain, cpu_online_mask);
-	if (cpus_empty(mask))
+	if (cpumask_empty(&mask))
 		return -EINVAL;
 
 	for (pos = 0; pos < IA64_NUM_DEVICE_VECTORS; pos++) {
 		vector = IA64_FIRST_DEVICE_VECTOR + pos;
-		cpus_and(mask, domain, vector_table[vector]);
-		if (!cpus_empty(mask))
+		cpumask_and(&mask, &domain, &vector_table[vector]);
+		if (!cpumask_empty(&mask))
 			continue;
 		return vector;
 	}
@@ -132,18 +132,18 @@ static int __bind_irq_vector(int irq, int vector, cpumask_t domain)
 	BUG_ON((unsigned)vector >= IA64_NUM_VECTORS);
 
 	cpumask_and(&mask, &domain, cpu_online_mask);
-	if (cpus_empty(mask))
+	if (cpumask_empty(&mask))
 		return -EINVAL;
-	if ((cfg->vector == vector) && cpus_equal(cfg->domain, domain))
+	if ((cfg->vector == vector) && cpumask_equal(&cfg->domain, &domain))
 		return 0;
 	if (cfg->vector != IRQ_VECTOR_UNASSIGNED)
 		return -EBUSY;
-	for_each_cpu_mask(cpu, mask)
+	for_each_cpu(cpu, &mask)
 		per_cpu(vector_irq, cpu)[vector] = irq;
 	cfg->vector = vector;
 	cfg->domain = domain;
 	irq_status[irq] = IRQ_USED;
-	cpus_or(vector_table[vector], vector_table[vector], domain);
+	cpumask_or(&vector_table[vector], &vector_table[vector], &domain);
 	return 0;
 }
 
@@ -161,7 +161,6 @@ int bind_irq_vector(int irq, int vector, cpumask_t domain)
 static void __clear_irq_vector(int irq)
 {
 	int vector, cpu;
-	cpumask_t mask;
 	cpumask_t domain;
 	struct irq_cfg *cfg = &irq_cfg[irq];
 
@@ -169,13 +168,12 @@ static void __clear_irq_vector(int irq)
 	BUG_ON(cfg->vector == IRQ_VECTOR_UNASSIGNED);
 	vector = cfg->vector;
 	domain = cfg->domain;
-	cpumask_and(&mask, &cfg->domain, cpu_online_mask);
-	for_each_cpu_mask(cpu, mask)
+	for_each_cpu_and(cpu, &cfg->domain, cpu_online_mask)
 		per_cpu(vector_irq, cpu)[vector] = -1;
 	cfg->vector = IRQ_VECTOR_UNASSIGNED;
 	cfg->domain = CPU_MASK_NONE;
 	irq_status[irq] = IRQ_UNUSED;
-	cpus_andnot(vector_table[vector], vector_table[vector], domain);
+	cpumask_andnot(&vector_table[vector], &vector_table[vector], &domain);
 }
 
 static void clear_irq_vector(int irq)
@@ -244,7 +242,7 @@ void __setup_vector_irq(int cpu)
 		per_cpu(vector_irq, cpu)[vector] = -1;
 	/* Mark the inuse vectors */
 	for (irq = 0; irq < NR_IRQS; ++irq) {
-		if (!cpu_isset(cpu, irq_cfg[irq].domain))
+		if (!cpumask_test_cpu(cpu, &irq_cfg[irq].domain))
 			continue;
 		vector = irq_to_vector(irq);
 		per_cpu(vector_irq, cpu)[vector] = irq;
@@ -261,7 +259,7 @@ static enum vector_domain_type {
 static cpumask_t vector_allocation_domain(int cpu)
 {
 	if (vector_domain_type == VECTOR_DOMAIN_PERCPU)
-		return cpumask_of_cpu(cpu);
+		return *cpumask_of(cpu);
 	return CPU_MASK_ALL;
 }
 
@@ -275,7 +273,7 @@ static int __irq_prepare_move(int irq, int cpu)
 		return -EBUSY;
 	if (cfg->vector == IRQ_VECTOR_UNASSIGNED || !cpu_online(cpu))
 		return -EINVAL;
-	if (cpu_isset(cpu, cfg->domain))
+	if (cpumask_test_cpu(cpu, &cfg->domain))
 		return 0;
 	domain = vector_allocation_domain(cpu);
 	vector = find_unassigned_vector(domain);
@@ -309,12 +307,12 @@ void irq_complete_move(unsigned irq)
 	if (likely(!cfg->move_in_progress))
 		return;
 
-	if (unlikely(cpu_isset(smp_processor_id(), cfg->old_domain)))
+	if (unlikely(cpumask_test_cpu(smp_processor_id(), &cfg->old_domain)))
 		return;
 
 	cpumask_and(&cleanup_mask, &cfg->old_domain, cpu_online_mask);
-	cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-	for_each_cpu_mask(i, cleanup_mask)
+	cfg->move_cleanup_count = cpumask_weight(&cleanup_mask);
+	for_each_cpu(i, &cleanup_mask)
 		platform_send_ipi(i, IA64_IRQ_MOVE_VECTOR, IA64_IPI_DM_INT, 0);
 	cfg->move_in_progress = 0;
 }
@@ -340,12 +338,12 @@ static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id)
 		if (!cfg->move_cleanup_count)
 			goto unlock;
 
-		if (!cpu_isset(me, cfg->old_domain))
+		if (!cpumask_test_cpu(me, &cfg->old_domain))
 			goto unlock;
 
 		spin_lock_irqsave(&vector_lock, flags);
 		__this_cpu_write(vector_irq[vector], -1);
-		cpu_clear(me, vector_table[vector]);
+		cpumask_clear_cpu(me, &vector_table[vector]);
 		spin_unlock_irqrestore(&vector_lock, flags);
 		cfg->move_cleanup_count--;
 	unlock:
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 18e794a57248..e42bf7a913f3 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -146,7 +146,7 @@ ENTRY(vhpt_miss)
 (p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=pgd_offset for region 5
 (p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=pgd_offset for region[0-4]
 	cmp.eq p7,p6=0,r21			// unused address bits all zeroes?
-#ifdef CONFIG_PGTABLE_4
+#if CONFIG_PGTABLE_LEVELS == 4
 	shr.u r28=r22,PUD_SHIFT			// shift pud index into position
 #else
 	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
@@ -155,7 +155,7 @@ ENTRY(vhpt_miss)
 	ld8 r17=[r17]				// get *pgd (may be 0)
 	;;
 (p7)	cmp.eq p6,p7=r17,r0			// was pgd_present(*pgd) == NULL?
-#ifdef CONFIG_PGTABLE_4
+#if CONFIG_PGTABLE_LEVELS == 4
 	dep r28=r28,r17,3,(PAGE_SHIFT-3)	// r28=pud_offset(pgd,addr)
 	;;
 	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
@@ -222,13 +222,13 @@ ENTRY(vhpt_miss)
 	 */
 	ld8 r25=[r21]				// read *pte again
 	ld8 r26=[r17]				// read *pmd again
-#ifdef CONFIG_PGTABLE_4
+#if CONFIG_PGTABLE_LEVELS == 4
 	ld8 r19=[r28]				// read *pud again
 #endif
 	cmp.ne p6,p7=r0,r0
 	;;
 	cmp.ne.or.andcm p6,p7=r26,r20		// did *pmd change
-#ifdef CONFIG_PGTABLE_4
+#if CONFIG_PGTABLE_LEVELS == 4
 	cmp.ne.or.andcm p6,p7=r19,r29		// did *pud change
 #endif
 	mov r27=PAGE_SHIFT<<2
@@ -476,7 +476,7 @@ ENTRY(nested_dtlb_miss)
 (p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=pgd_offset for region 5
 (p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=pgd_offset for region[0-4]
 	cmp.eq p7,p6=0,r21			// unused address bits all zeroes?
-#ifdef CONFIG_PGTABLE_4
+#if CONFIG_PGTABLE_LEVELS == 4
 	shr.u r18=r22,PUD_SHIFT			// shift pud index into position
 #else
 	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
@@ -487,7 +487,7 @@ ENTRY(nested_dtlb_miss)
 (p7)	cmp.eq p6,p7=r17,r0			// was pgd_present(*pgd) == NULL?
 	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// r17=p[u|m]d_offset(pgd,addr)
 	;;
-#ifdef CONFIG_PGTABLE_4
+#if CONFIG_PGTABLE_LEVELS == 4
 (p7)	ld8 r17=[r17]				// get *pud (may be 0)
 	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
 	;;
diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
index 5151a649c96b..b72cd7a07222 100644
--- a/arch/ia64/kernel/machine_kexec.c
+++ b/arch/ia64/kernel/machine_kexec.c
@@ -156,9 +156,9 @@ void arch_crash_save_vmcoreinfo(void)
 	VMCOREINFO_OFFSET(node_memblk_s, start_paddr);
 	VMCOREINFO_OFFSET(node_memblk_s, size);
 #endif
-#ifdef CONFIG_PGTABLE_3
+#if CONFIG_PGTABLE_LEVELS == 3
 	VMCOREINFO_CONFIG(PGTABLE_3);
-#elif defined(CONFIG_PGTABLE_4)
+#elif CONFIG_PGTABLE_LEVELS == 4
 	VMCOREINFO_CONFIG(PGTABLE_4);
 #endif
 }
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 8bfd36af46f8..dd5801eb4c69 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1293,7 +1293,7 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
 		monarch_cpu = cpu;
 		sos->monarch = 1;
 	} else {
-		cpu_set(cpu, mca_cpu);
+		cpumask_set_cpu(cpu, &mca_cpu);
 		sos->monarch = 0;
 	}
 	mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d "
@@ -1316,7 +1316,7 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
 		 */
 		ia64_mca_wakeup_all();
 	} else {
-		while (cpu_isset(cpu, mca_cpu))
+		while (cpumask_test_cpu(cpu, &mca_cpu))
 			cpu_relax();	/* spin until monarch wakes us */
 	}
 
@@ -1355,9 +1355,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
 		 * and put this cpu in the rendez loop.
 		 */
 		for_each_online_cpu(i) {
-			if (cpu_isset(i, mca_cpu)) {
+			if (cpumask_test_cpu(i, &mca_cpu)) {
 				monarch_cpu = i;
-				cpu_clear(i, mca_cpu);	/* wake next cpu */
+				cpumask_clear_cpu(i, &mca_cpu);	/* wake next cpu */
 				while (monarch_cpu != -1)
 					cpu_relax();	/* spin until last cpu leaves */
 				set_curr_task(cpu, previous_current);
@@ -1822,7 +1822,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
 	ti->cpu = cpu;
 	p->stack = ti;
 	p->state = TASK_UNINTERRUPTIBLE;
-	cpu_set(cpu, p->cpus_allowed);
+	cpumask_set_cpu(cpu, &p->cpus_allowed);
 	INIT_LIST_HEAD(&p->tasks);
 	p->parent = p->real_parent = p->group_leader = p;
 	INIT_LIST_HEAD(&p->children);
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 8ae36ea177d3..9dd7464f8c17 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -47,15 +47,14 @@ int ia64_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
 	struct msi_msg	msg;
 	unsigned long	dest_phys_id;
 	int	irq, vector;
-	cpumask_t mask;
 
 	irq = create_irq();
 	if (irq < 0)
 		return irq;
 
 	irq_set_msi_desc(irq, desc);
-	cpumask_and(&mask, &(irq_to_domain(irq)), cpu_online_mask);
-	dest_phys_id = cpu_physical_id(first_cpu(mask));
+	dest_phys_id = cpu_physical_id(cpumask_any_and(&(irq_to_domain(irq)),
+						       cpu_online_mask));
 	vector = irq_to_vector(irq);
 
 	msg.address_hi = 0;
@@ -171,10 +170,9 @@ msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
 {
 	struct irq_cfg *cfg = irq_cfg + irq;
 	unsigned dest;
-	cpumask_t mask;
 
-	cpumask_and(&mask, &(irq_to_domain(irq)), cpu_online_mask);
-	dest = cpu_physical_id(first_cpu(mask));
+	dest = cpu_physical_id(cpumask_first_and(&(irq_to_domain(irq)),
+						 cpu_online_mask));
 
 	msg->address_hi = 0;
 	msg->address_lo =
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
index d288cde93606..92c376279c6d 100644
--- a/arch/ia64/kernel/numa.c
+++ b/arch/ia64/kernel/numa.c
@@ -39,7 +39,7 @@ void map_cpu_to_node(int cpu, int nid)
 	}
 	/* sanity check first */
 	oldnid = cpu_to_node_map[cpu];
-	if (cpu_isset(cpu, node_to_cpu_mask[oldnid])) {
+	if (cpumask_test_cpu(cpu, &node_to_cpu_mask[oldnid])) {
 		return; /* nothing to do */
 	}
 	/* we don't have cpu-driven node hot add yet...
@@ -47,16 +47,16 @@ void map_cpu_to_node(int cpu, int nid)
 	if (!node_online(nid))
 		nid = first_online_node;
 	cpu_to_node_map[cpu] = nid;
-	cpu_set(cpu, node_to_cpu_mask[nid]);
+	cpumask_set_cpu(cpu, &node_to_cpu_mask[nid]);
 	return;
 }
 
 void unmap_cpu_from_node(int cpu, int nid)
 {
-	WARN_ON(!cpu_isset(cpu, node_to_cpu_mask[nid]));
+	WARN_ON(!cpumask_test_cpu(cpu, &node_to_cpu_mask[nid]));
 	WARN_ON(cpu_to_node_map[cpu] != nid);
 	cpu_to_node_map[cpu] = 0;
-	cpu_clear(cpu, node_to_cpu_mask[nid]);
+	cpumask_clear_cpu(cpu, &node_to_cpu_mask[nid]);
 }
 
 
@@ -71,7 +71,7 @@ void __init build_cpu_to_node_map(void)
 	int cpu, i, node;
 
 	for(node=0; node < MAX_NUMNODES; node++)
-		cpus_clear(node_to_cpu_mask[node]);
+		cpumask_clear(&node_to_cpu_mask[node]);
 
 	for_each_possible_early_cpu(cpu) {
 		node = -1;
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index ee9719eebb1e..1eeffb7fbb16 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -256,7 +256,7 @@ salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe)
 			data_saved->buffer = buffer;
 		}
 	}
-	cpu_set(smp_processor_id(), data->cpu_event);
+	cpumask_set_cpu(smp_processor_id(), &data->cpu_event);
 	if (irqsafe) {
 		salinfo_work_to_do(data);
 		spin_unlock_irqrestore(&data_saved_lock, flags);
@@ -274,7 +274,7 @@ salinfo_timeout_check(struct salinfo_data *data)
 	unsigned long flags;
 	if (!data->open)
 		return;
-	if (!cpus_empty(data->cpu_event)) {
+	if (!cpumask_empty(&data->cpu_event)) {
 		spin_lock_irqsave(&data_saved_lock, flags);
 		salinfo_work_to_do(data);
 		spin_unlock_irqrestore(&data_saved_lock, flags);
@@ -308,7 +308,7 @@ salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t
 	int i, n, cpu = -1;
 
 retry:
-	if (cpus_empty(data->cpu_event) && down_trylock(&data->mutex)) {
+	if (cpumask_empty(&data->cpu_event) && down_trylock(&data->mutex)) {
 		if (file->f_flags & O_NONBLOCK)
 			return -EAGAIN;
 		if (down_interruptible(&data->mutex))
@@ -317,9 +317,9 @@ retry:
 
 	n = data->cpu_check;
 	for (i = 0; i < nr_cpu_ids; i++) {
-		if (cpu_isset(n, data->cpu_event)) {
+		if (cpumask_test_cpu(n, &data->cpu_event)) {
 			if (!cpu_online(n)) {
-				cpu_clear(n, data->cpu_event);
+				cpumask_clear_cpu(n, &data->cpu_event);
 				continue;
 			}
 			cpu = n;
@@ -451,7 +451,7 @@ retry:
 		call_on_cpu(cpu, salinfo_log_read_cpu, data);
 	if (!data->log_size) {
 		data->state = STATE_NO_DATA;
-		cpu_clear(cpu, data->cpu_event);
+		cpumask_clear_cpu(cpu, &data->cpu_event);
 	} else {
 		data->state = STATE_LOG_RECORD;
 	}
@@ -491,11 +491,11 @@ salinfo_log_clear(struct salinfo_data *data, int cpu)
 	unsigned long flags;
 	spin_lock_irqsave(&data_saved_lock, flags);
 	data->state = STATE_NO_DATA;
-	if (!cpu_isset(cpu, data->cpu_event)) {
+	if (!cpumask_test_cpu(cpu, &data->cpu_event)) {
 		spin_unlock_irqrestore(&data_saved_lock, flags);
 		return 0;
 	}
-	cpu_clear(cpu, data->cpu_event);
+	cpumask_clear_cpu(cpu, &data->cpu_event);
 	if (data->saved_num) {
 		shift1_data_saved(data, data->saved_num - 1);
 		data->saved_num = 0;
@@ -509,7 +509,7 @@ salinfo_log_clear(struct salinfo_data *data, int cpu)
 	salinfo_log_new_read(cpu, data);
 	if (data->state == STATE_LOG_RECORD) {
 		spin_lock_irqsave(&data_saved_lock, flags);
-		cpu_set(cpu, data->cpu_event);
+		cpumask_set_cpu(cpu, &data->cpu_event);
 		salinfo_work_to_do(data);
 		spin_unlock_irqrestore(&data_saved_lock, flags);
 	}
@@ -581,7 +581,7 @@ salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu
 		for (i = 0, data = salinfo_data;
 		     i < ARRAY_SIZE(salinfo_data);
 		     ++i, ++data) {
-			cpu_set(cpu, data->cpu_event);
+			cpumask_set_cpu(cpu, &data->cpu_event);
 			salinfo_work_to_do(data);
 		}
 		spin_unlock_irqrestore(&data_saved_lock, flags);
@@ -601,7 +601,7 @@ salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu
 					shift1_data_saved(data, j);
 				}
 			}
-			cpu_clear(cpu, data->cpu_event);
+			cpumask_clear_cpu(cpu, &data->cpu_event);
 		}
 		spin_unlock_irqrestore(&data_saved_lock, flags);
 		break;
@@ -659,7 +659,7 @@ salinfo_init(void)
 
 		/* we missed any events before now */
 		for_each_online_cpu(j)
-			cpu_set(j, data->cpu_event);
+			cpumask_set_cpu(j, &data->cpu_event);
 
 		*sdir++ = dir;
 	}
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index d86669bcdfb2..b9761389cb8d 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -562,8 +562,8 @@ setup_arch (char **cmdline_p)
 #  ifdef CONFIG_ACPI_HOTPLUG_CPU
 	prefill_possible_map();
 #  endif
-	per_cpu_scan_finalize((cpus_weight(early_cpu_possible_map) == 0 ?
-		32 : cpus_weight(early_cpu_possible_map)),
+	per_cpu_scan_finalize((cpumask_weight(&early_cpu_possible_map) == 0 ?
+		32 : cpumask_weight(&early_cpu_possible_map)),
 		additional_cpus > 0 ? additional_cpus : 0);
 # endif
 #endif /* CONFIG_APCI_BOOT */
@@ -702,7 +702,8 @@ show_cpuinfo (struct seq_file *m, void *v)
 		   c->itc_freq / 1000000, c->itc_freq % 1000000,
 		   lpj*HZ/500000, (lpj*HZ/5000) % 100);
 #ifdef CONFIG_SMP
-	seq_printf(m, "siblings   : %u\n", cpus_weight(cpu_core_map[cpunum]));
+	seq_printf(m, "siblings   : %u\n",
+		   cpumask_weight(&cpu_core_map[cpunum]));
 	if (c->socket_id != -1)
 		seq_printf(m, "physical id: %u\n", c->socket_id);
 	if (c->threads_per_core > 1 || c->cores_per_socket > 1)
@@ -933,8 +934,8 @@ cpu_init (void)
 	 * (must be done after per_cpu area is setup)
 	 */
 	if (smp_processor_id() == 0) {
-		cpu_set(0, per_cpu(cpu_sibling_map, 0));
-		cpu_set(0, cpu_core_map[0]);
+		cpumask_set_cpu(0, &per_cpu(cpu_sibling_map, 0));
+		cpumask_set_cpu(0, &cpu_core_map[0]);
 	} else {
 		/*
 		 * Set ar.k3 so that assembly code in MCA handler can compute
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index 9fcd4e63048f..7f706d4f84f7 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -262,11 +262,11 @@ smp_flush_tlb_cpumask(cpumask_t xcpumask)
 	preempt_disable();
 	mycpu = smp_processor_id();
 
-	for_each_cpu_mask(cpu, cpumask)
+	for_each_cpu(cpu, &cpumask)
 		counts[cpu] = local_tlb_flush_counts[cpu].count & 0xffff;
 
 	mb();
-	for_each_cpu_mask(cpu, cpumask) {
+	for_each_cpu(cpu, &cpumask) {
 		if (cpu == mycpu)
 			flush_mycpu = 1;
 		else
@@ -276,7 +276,7 @@ smp_flush_tlb_cpumask(cpumask_t xcpumask)
 	if (flush_mycpu)
 		smp_local_flush_tlb();
 
-	for_each_cpu_mask(cpu, cpumask)
+	for_each_cpu(cpu, &cpumask)
 		while(counts[cpu] == (local_tlb_flush_counts[cpu].count & 0xffff))
 			udelay(FLUSH_DELAY);
 
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 547a48d78bd7..15051e9c2c6f 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -434,7 +434,7 @@ smp_callin (void)
 	/*
 	 * Allow the master to continue.
 	 */
-	cpu_set(cpuid, cpu_callin_map);
+	cpumask_set_cpu(cpuid, &cpu_callin_map);
 	Dprintk("Stack on CPU %d at about %p\n",cpuid, &cpuid);
 }
 
@@ -475,13 +475,13 @@ do_boot_cpu (int sapicid, int cpu, struct task_struct *idle)
 	 */
 	Dprintk("Waiting on callin_map ...");
 	for (timeout = 0; timeout < 100000; timeout++) {
-		if (cpu_isset(cpu, cpu_callin_map))
+		if (cpumask_test_cpu(cpu, &cpu_callin_map))
 			break;  /* It has booted */
 		udelay(100);
 	}
 	Dprintk("\n");
 
-	if (!cpu_isset(cpu, cpu_callin_map)) {
+	if (!cpumask_test_cpu(cpu, &cpu_callin_map)) {
 		printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid);
 		ia64_cpu_to_sapicid[cpu] = -1;
 		set_cpu_online(cpu, false);  /* was set in smp_callin() */
@@ -541,7 +541,7 @@ smp_prepare_cpus (unsigned int max_cpus)
 
 	smp_setup_percpu_timer();
 
-	cpu_set(0, cpu_callin_map);
+	cpumask_set_cpu(0, &cpu_callin_map);
 
 	local_cpu_data->loops_per_jiffy = loops_per_jiffy;
 	ia64_cpu_to_sapicid[0] = boot_cpu_id;
@@ -565,7 +565,7 @@ smp_prepare_cpus (unsigned int max_cpus)
 void smp_prepare_boot_cpu(void)
 {
 	set_cpu_online(smp_processor_id(), true);
-	cpu_set(smp_processor_id(), cpu_callin_map);
+	cpumask_set_cpu(smp_processor_id(), &cpu_callin_map);
 	set_numa_node(cpu_to_node_map[smp_processor_id()]);
 	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
 	paravirt_post_smp_prepare_boot_cpu();
@@ -577,10 +577,10 @@ clear_cpu_sibling_map(int cpu)
 {
 	int i;
 
-	for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu))
-		cpu_clear(cpu, per_cpu(cpu_sibling_map, i));
-	for_each_cpu_mask(i, cpu_core_map[cpu])
-		cpu_clear(cpu, cpu_core_map[i]);
+	for_each_cpu(i, &per_cpu(cpu_sibling_map, cpu))
+		cpumask_clear_cpu(cpu, &per_cpu(cpu_sibling_map, i));
+	for_each_cpu(i, &cpu_core_map[cpu])
+		cpumask_clear_cpu(cpu, &cpu_core_map[i]);
 
 	per_cpu(cpu_sibling_map, cpu) = cpu_core_map[cpu] = CPU_MASK_NONE;
 }
@@ -592,12 +592,12 @@ remove_siblinginfo(int cpu)
 
 	if (cpu_data(cpu)->threads_per_core == 1 &&
 	    cpu_data(cpu)->cores_per_socket == 1) {
-		cpu_clear(cpu, cpu_core_map[cpu]);
-		cpu_clear(cpu, per_cpu(cpu_sibling_map, cpu));
+		cpumask_clear_cpu(cpu, &cpu_core_map[cpu]);
+		cpumask_clear_cpu(cpu, &per_cpu(cpu_sibling_map, cpu));
 		return;
 	}
 
-	last = (cpus_weight(cpu_core_map[cpu]) == 1 ? 1 : 0);
+	last = (cpumask_weight(&cpu_core_map[cpu]) == 1 ? 1 : 0);
 
 	/* remove it from all sibling map's */
 	clear_cpu_sibling_map(cpu);
@@ -673,7 +673,7 @@ int __cpu_disable(void)
 	remove_siblinginfo(cpu);
 	fixup_irqs();
 	local_flush_tlb_all();
-	cpu_clear(cpu, cpu_callin_map);
+	cpumask_clear_cpu(cpu, &cpu_callin_map);
 	return 0;
 }
 
@@ -718,11 +718,13 @@ static inline void set_cpu_sibling_map(int cpu)
 
 	for_each_online_cpu(i) {
 		if ((cpu_data(cpu)->socket_id == cpu_data(i)->socket_id)) {
-			cpu_set(i, cpu_core_map[cpu]);
-			cpu_set(cpu, cpu_core_map[i]);
+			cpumask_set_cpu(i, &cpu_core_map[cpu]);
+			cpumask_set_cpu(cpu, &cpu_core_map[i]);
 			if (cpu_data(cpu)->core_id == cpu_data(i)->core_id) {
-				cpu_set(i, per_cpu(cpu_sibling_map, cpu));
-				cpu_set(cpu, per_cpu(cpu_sibling_map, i));
+				cpumask_set_cpu(i,
+						&per_cpu(cpu_sibling_map, cpu));
+				cpumask_set_cpu(cpu,
+						&per_cpu(cpu_sibling_map, i));
 			}
 		}
 	}
@@ -742,7 +744,7 @@ __cpu_up(unsigned int cpu, struct task_struct *tidle)
 	 * Already booted cpu? not valid anymore since we dont
 	 * do idle loop tightspin anymore.
 	 */
-	if (cpu_isset(cpu, cpu_callin_map))
+	if (cpumask_test_cpu(cpu, &cpu_callin_map))
 		return -EINVAL;
 
 	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
@@ -753,8 +755,8 @@ __cpu_up(unsigned int cpu, struct task_struct *tidle)
 
 	if (cpu_data(cpu)->threads_per_core == 1 &&
 	    cpu_data(cpu)->cores_per_socket == 1) {
-		cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
-		cpu_set(cpu, cpu_core_map[cpu]);
+		cpumask_set_cpu(cpu, &per_cpu(cpu_sibling_map, cpu));
+		cpumask_set_cpu(cpu, &cpu_core_map[cpu]);
 		return 0;
 	}
 
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 965ab42fabb0..c01fe8991244 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -148,7 +148,7 @@ static void cache_shared_cpu_map_setup(unsigned int cpu,
 
 	if (cpu_data(cpu)->threads_per_core <= 1 &&
 		cpu_data(cpu)->cores_per_socket <= 1) {
-		cpu_set(cpu, this_leaf->shared_cpu_map);
+		cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
 		return;
 	}
 
@@ -164,7 +164,7 @@ static void cache_shared_cpu_map_setup(unsigned int cpu,
 			if (cpu_data(cpu)->socket_id == cpu_data(j)->socket_id
 				&& cpu_data(j)->core_id == csi.log1_cid
 				&& cpu_data(j)->thread_id == csi.log1_tid)
-				cpu_set(j, this_leaf->shared_cpu_map);
+				cpumask_set_cpu(j, &this_leaf->shared_cpu_map);
 
 		i++;
 	} while (i < num_shared &&
@@ -177,7 +177,7 @@ static void cache_shared_cpu_map_setup(unsigned int cpu,
 static void cache_shared_cpu_map_setup(unsigned int cpu,
 		struct cache_info * this_leaf)
 {
-	cpu_set(cpu, this_leaf->shared_cpu_map);
+	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
 	return;
 }
 #endif
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 6b3345758d3e..a9b65cf7b34a 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -692,31 +692,6 @@ int arch_remove_memory(u64 start, u64 size)
 #endif
 #endif
 
-/*
- * Even when CONFIG_IA32_SUPPORT is not enabled it is
- * useful to have the Linux/x86 domain registered to
- * avoid an attempted module load when emulators call
- * personality(PER_LINUX32). This saves several milliseconds
- * on each such call.
- */
-static struct exec_domain ia32_exec_domain;
-
-static int __init
-per_linux32_init(void)
-{
-	ia32_exec_domain.name = "Linux/x86";
-	ia32_exec_domain.handler = NULL;
-	ia32_exec_domain.pers_low = PER_LINUX32;
-	ia32_exec_domain.pers_high = PER_LINUX32;
-	ia32_exec_domain.signal_map = default_exec_domain.signal_map;
-	ia32_exec_domain.signal_invmap = default_exec_domain.signal_invmap;
-	register_exec_domain(&ia32_exec_domain);
-
-	return 0;
-}
-
-__initcall(per_linux32_init);
-
 /**
  * show_mem - give short summary of memory stats
  *
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index 0b5ce82d203d..1be65eb074ec 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -271,7 +271,9 @@ sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus)
  	if (bus == NULL) {
 		kfree(res);
 		kfree(controller);
+		return;
 	}
+	pci_bus_add_devices(bus);
 }
 
 /*
diff --git a/arch/m32r/include/asm/asm-offsets.h b/arch/m32r/include/asm/asm-offsets.h
new file mode 100644
index 000000000000..d370ee36a182
--- /dev/null
+++ b/arch/m32r/include/asm/asm-offsets.h
@@ -0,0 +1 @@
+#include <generated/asm-offsets.h>
diff --git a/arch/m32r/include/asm/io.h b/arch/m32r/include/asm/io.h
index 6e7787f3dac7..9cc00dbd59ce 100644
--- a/arch/m32r/include/asm/io.h
+++ b/arch/m32r/include/asm/io.h
@@ -67,6 +67,7 @@ static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
 
 extern void iounmap(volatile void __iomem *addr);
 #define ioremap_nocache(off,size) ioremap(off,size)
+#define ioremap_wc ioremap_nocache
 
 /*
  * IO bus memory addresses are also 1:1 with the physical address
diff --git a/arch/m32r/include/asm/thread_info.h b/arch/m32r/include/asm/thread_info.h
index 32422d0211c3..f630d9c30b28 100644
--- a/arch/m32r/include/asm/thread_info.h
+++ b/arch/m32r/include/asm/thread_info.h
@@ -24,7 +24,6 @@
 
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
 	unsigned long		flags;		/* low level flags */
 	unsigned long		status;		/* thread-synchronous flags */
 	__u32			cpu;		/* current CPU */
@@ -38,18 +37,7 @@ struct thread_info {
 	__u8			supervisor_stack[0];
 };
 
-#else /* !__ASSEMBLY__ */
-
-/* offsets into the thread_info struct for assembly code access */
-#define TI_TASK		0x00000000
-#define TI_EXEC_DOMAIN	0x00000004
-#define TI_FLAGS	0x00000008
-#define TI_STATUS	0x0000000C
-#define TI_CPU		0x00000010
-#define TI_PRE_COUNT	0x00000014
-#define TI_ADDR_LIMIT	0x00000018
-
-#endif
+#endif /* !__ASSEMBLY__ */
 
 #define THREAD_SIZE		(PAGE_SIZE << 1)
 #define THREAD_SIZE_ORDER	1
@@ -61,7 +49,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
-	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
 	.preempt_count	= INIT_PREEMPT_COUNT,	\
diff --git a/arch/m32r/kernel/asm-offsets.c b/arch/m32r/kernel/asm-offsets.c
index 9e263112a6e2..cd3d2fc9c8df 100644
--- a/arch/m32r/kernel/asm-offsets.c
+++ b/arch/m32r/kernel/asm-offsets.c
@@ -1 +1,14 @@
-/* Dummy asm-offsets.c file. Required by kbuild and ready to be used - hint! */
+#include <linux/thread_info.h>
+#include <linux/kbuild.h>
+
+int foo(void)
+{
+	OFFSET(TI_TASK, thread_info, task);
+	OFFSET(TI_FLAGS, thread_info, flags);
+	OFFSET(TI_STATUS, thread_info, status);
+	OFFSET(TI_CPU, thread_info, cpu);
+	OFFSET(TI_PRE_COUNT, thread_info, preempt_count);
+	OFFSET(TI_ADDR_LIMIT, thread_info, addr_limit);
+
+	return 0;
+}
diff --git a/arch/m32r/kernel/entry.S b/arch/m32r/kernel/entry.S
index 7c3db9940ce1..c639bfa32232 100644
--- a/arch/m32r/kernel/entry.S
+++ b/arch/m32r/kernel/entry.S
@@ -65,6 +65,7 @@
 #include <asm/page.h>
 #include <asm/m32r.h>
 #include <asm/mmu_context.h>
+#include <asm/asm-offsets.h>
 
 #if !defined(CONFIG_MMU)
 #define sys_madvise		sys_ni_syscall
diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c
index 7736c6660a15..1c81e24fd006 100644
--- a/arch/m32r/kernel/signal.c
+++ b/arch/m32r/kernel/signal.c
@@ -172,20 +172,14 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 {
 	struct rt_sigframe __user *frame;
 	int err = 0;
-	int signal, sig = ksig->sig;
+	int sig = ksig->sig;
 
 	frame = get_sigframe(ksig, regs->spu, sizeof(*frame));
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		return -EFAULT;
 
-	signal = current_thread_info()->exec_domain
-		&& current_thread_info()->exec_domain->signal_invmap
-		&& sig < 32
-		? current_thread_info()->exec_domain->signal_invmap[sig]
-		: sig;
-
-	err |= __put_user(signal, &frame->sig);
+	err |= __put_user(sig, &frame->sig);
 	if (err)
 		return -EFAULT;
 
@@ -209,13 +203,11 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 
 	/* Set up registers for signal handler */
 	regs->spu = (unsigned long)frame;
-	regs->r0 = signal;	/* Arg for signal handler */
+	regs->r0 = sig;	/* Arg for signal handler */
 	regs->r1 = (unsigned long)&frame->info;
 	regs->r2 = (unsigned long)&frame->uc;
 	regs->bpc = (unsigned long)ksig->ka.sa.sa_handler;
 
-	set_fs(USER_DS);
-
 #if DEBUG_SIG
 	printk("SIG deliver (%s:%d): sp=%p pc=%p\n",
 		current->comm, current->pid, frame, regs->pc);
diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index bb21f4f63170..a468467542f4 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -376,7 +376,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
 	if (!cpumask_equal(&cpu_callin_map, cpu_online_mask))
 		BUG();
 
-	for (cpu_id = 0 ; cpu_id < num_online_cpus() ; cpu_id++)
+	for_each_online_cpu(cpu_id)
 		show_cpu_info(cpu_id);
 
 	/*
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 87b7c7581b1d..2dd8f63bfbbb 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -67,6 +67,10 @@ config HZ
 	default 1000 if CLEOPATRA
 	default 100
 
+config PGTABLE_LEVELS
+	default 2 if SUN3 || COLDFIRE
+	default 3
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
diff --git a/arch/m68k/coldfire/m527x.c b/arch/m68k/coldfire/m527x.c
index 2ba470735bed..c0b3e28f91df 100644
--- a/arch/m68k/coldfire/m527x.c
+++ b/arch/m68k/coldfire/m527x.c
@@ -92,7 +92,6 @@ static void __init m527x_uarts_init(void)
 
 static void __init m527x_fec_init(void)
 {
-	u16 par;
 	u8 v;
 
 	/* Set multi-function pins to ethernet mode for fec0 */
@@ -100,6 +99,8 @@ static void __init m527x_fec_init(void)
 	v = readb(MCFGPIO_PAR_FECI2C);
 	writeb(v | 0xf0, MCFGPIO_PAR_FECI2C);
 #else
+	u16 par;
+
 	par = readw(MCFGPIO_PAR_FECI2C);
 	writew(par | 0xf00, MCFGPIO_PAR_FECI2C);
 	v = readb(MCFGPIO_PAR_FEC0HL);
diff --git a/arch/m68k/coldfire/pci.c b/arch/m68k/coldfire/pci.c
index df9679238b6d..821de928dc3f 100644
--- a/arch/m68k/coldfire/pci.c
+++ b/arch/m68k/coldfire/pci.c
@@ -313,12 +313,16 @@ static int __init mcf_pci_init(void)
 	schedule_timeout(msecs_to_jiffies(200));
 
 	rootbus = pci_scan_bus(0, &mcf_pci_ops, NULL);
+	if (!rootbus)
+		return -ENODEV;
+
 	rootbus->resource[0] = &mcf_pci_io;
 	rootbus->resource[1] = &mcf_pci_mem;
 
 	pci_fixup_irqs(pci_common_swizzle, mcf_pci_map_irq);
 	pci_bus_size_bridges(rootbus);
 	pci_bus_assign_resources(rootbus);
+	pci_bus_add_devices(rootbus);
 	return 0;
 }
 
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 1a10a08ebec7..ed1643b4c678 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -521,8 +521,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -573,5 +575,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 7859a738c81e..d38822b1847e 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -479,8 +479,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -531,5 +533,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 372593a3b398..c429199cf4a9 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -501,8 +501,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -553,5 +555,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index f3bd35e76ea4..9b880371d642 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -472,8 +472,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -524,5 +526,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 9f9793fb2b73..49ae3376e993 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -481,8 +481,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -533,5 +535,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 89f225c01a0b..ee143a57058c 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -503,8 +503,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -555,5 +557,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index d3cdb5447a2c..c777aa05048f 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -583,8 +583,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -635,5 +637,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index b4c76640973e..a7628a85e260 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -472,8 +472,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -524,5 +526,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 0d4a26f9b58c..ebaa68268a4a 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -472,8 +472,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -524,5 +526,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 5d581c503fa3..2c16853aedd3 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -340,7 +340,7 @@ CONFIG_VETH=m
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
-CONFIG_NE2000=m
+CONFIG_NE2000=y
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
@@ -494,8 +494,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -546,5 +548,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index c6b49a4a887c..e3056bf0f65b 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -473,8 +473,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -524,5 +526,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index b65785eaff8d..73c36b7a0009 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -473,8 +473,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -525,5 +527,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/include/asm/m527xsim.h b/arch/m68k/include/asm/m527xsim.h
index 1bebbe78055a..2c648a043f24 100644
--- a/arch/m68k/include/asm/m527xsim.h
+++ b/arch/m68k/include/asm/m527xsim.h
@@ -103,8 +103,10 @@
  */
 #define	MCFFEC_BASE0		(MCF_IPSBAR + 0x1000)
 #define	MCFFEC_SIZE0		0x800
+#ifdef CONFIG_M5275
 #define	MCFFEC_BASE1		(MCF_IPSBAR + 0x1800)
 #define	MCFFEC_SIZE1		0x800
+#endif
 
 /*
  *	QSPI module.
diff --git a/arch/m68k/include/asm/m68360_pram.h b/arch/m68k/include/asm/m68360_pram.h
index e6088bbce93d..c0cbd96f09bc 100644
--- a/arch/m68k/include/asm/m68360_pram.h
+++ b/arch/m68k/include/asm/m68360_pram.h
@@ -170,7 +170,7 @@ struct uart_pram {
     unsigned short  frmer;          /* Rx framing error counter */
     unsigned short  nosec;          /* Rx noise counter */
     unsigned short  brkec;          /* Rx break character counter */
-    unsigned short  brkln;          /* Reaceive break length */
+    unsigned short  brkln;          /* Receive break length */
                    
     unsigned short  uaddr1;         /* address character 1 */
     unsigned short  uaddr2;         /* address character 2 */
@@ -338,7 +338,7 @@ struct ethernet_pram {
     unsigned long   c_pres;         /* preset CRC */
     unsigned long   c_mask;         /* constant mask for CRC */
     unsigned long   crcec;          /* CRC error counter */
-    unsigned long   alec;           /* alighnment error counter */
+    unsigned long   alec;           /* alignment error counter */
     unsigned long   disfc;          /* discard frame counter */
     unsigned short  pads;           /* short frame PAD characters */
     unsigned short  ret_lim;        /* retry limit threshold */
diff --git a/arch/m68k/include/asm/mcfqspi.h b/arch/m68k/include/asm/mcfqspi.h
index 7b51416ccae2..256da0e4aeb4 100644
--- a/arch/m68k/include/asm/mcfqspi.h
+++ b/arch/m68k/include/asm/mcfqspi.h
@@ -11,11 +11,6 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
 */
 
 #ifndef mcfqspi_h
diff --git a/arch/m68k/include/asm/thread_info.h b/arch/m68k/include/asm/thread_info.h
index c54256e69e64..cee13c2e5161 100644
--- a/arch/m68k/include/asm/thread_info.h
+++ b/arch/m68k/include/asm/thread_info.h
@@ -26,7 +26,6 @@
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
 	unsigned long		flags;
-	struct exec_domain	*exec_domain;	/* execution domain */
 	mm_segment_t		addr_limit;	/* thread address space */
 	int			preempt_count;	/* 0 => preemptable, <0 => BUG */
 	__u32			cpu;		/* should always be 0 on m68k */
@@ -37,7 +36,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
-	.exec_domain	= &default_exec_domain,	\
 	.addr_limit	= KERNEL_DS,		\
 	.preempt_count	= INIT_PREEMPT_COUNT,	\
 }
diff --git a/arch/m68k/kernel/pcibios.c b/arch/m68k/kernel/pcibios.c
index 931a31ff59dd..8520250a1d93 100644
--- a/arch/m68k/kernel/pcibios.c
+++ b/arch/m68k/kernel/pcibios.c
@@ -62,7 +62,7 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
 
 		r = dev->resource + idx;
 		if (!r->start && r->end) {
-			pr_err(KERN_ERR "PCI: Device %s not available because of resource collisions\n",
+			pr_err("PCI: Device %s not available because of resource collisions\n",
 				pci_name(dev));
 			return -EINVAL;
 		}
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index d7179281e74a..af1c4f330aef 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -863,12 +863,7 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
 	if (fsize)
 		err |= copy_to_user (frame + 1, regs + 1, fsize);
 
-	err |= __put_user((current_thread_info()->exec_domain
-			   && current_thread_info()->exec_domain->signal_invmap
-			   && sig < 32
-			   ? current_thread_info()->exec_domain->signal_invmap[sig]
-			   : sig),
-			  &frame->sig);
+	err |= __put_user(sig, &frame->sig);
 
 	err |= __put_user(regs->vector, &frame->code);
 	err |= __put_user(&frame->sc, &frame->psc);
@@ -948,12 +943,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	if (fsize)
 		err |= copy_to_user (&frame->uc.uc_extra, regs + 1, fsize);
 
-	err |= __put_user((current_thread_info()->exec_domain
-			   && current_thread_info()->exec_domain->signal_invmap
-			   && sig < 32
-			   ? current_thread_info()->exec_domain->signal_invmap[sig]
-			   : sig),
-			  &frame->sig);
+	err |= __put_user(sig, &frame->sig);
 	err |= __put_user(&frame->info, &frame->pinfo);
 	err |= __put_user(&frame->uc, &frame->puc);
 	err |= copy_siginfo_to_user(&frame->info, &ksig->info);
diff --git a/arch/m68k/lib/ashldi3.c b/arch/m68k/lib/ashldi3.c
index 7729f33878d1..37234c2df47f 100644
--- a/arch/m68k/lib/ashldi3.c
+++ b/arch/m68k/lib/ashldi3.c
@@ -11,12 +11,7 @@ any later version.
 GNU CC is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+GNU General Public License for more details. */
 
 #define BITS_PER_UNIT 8
 
diff --git a/arch/m68k/lib/ashrdi3.c b/arch/m68k/lib/ashrdi3.c
index 18ea5f7ed921..1d59345f36c6 100644
--- a/arch/m68k/lib/ashrdi3.c
+++ b/arch/m68k/lib/ashrdi3.c
@@ -11,12 +11,7 @@ any later version.
 GNU CC is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+GNU General Public License for more details. */
 
 #define BITS_PER_UNIT 8
 
diff --git a/arch/m68k/lib/divsi3.S b/arch/m68k/lib/divsi3.S
index ec307b61991e..2c0ec85ac661 100644
--- a/arch/m68k/lib/divsi3.S
+++ b/arch/m68k/lib/divsi3.S
@@ -19,12 +19,7 @@ distribution when not linked into another program.)
 This file is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+General Public License for more details. */
 
 /* As a special exception, if you link this library with files
    compiled with GCC to produce an executable, this does not cause
diff --git a/arch/m68k/lib/lshrdi3.c b/arch/m68k/lib/lshrdi3.c
index d06442d3a328..49e1ec8f2cc2 100644
--- a/arch/m68k/lib/lshrdi3.c
+++ b/arch/m68k/lib/lshrdi3.c
@@ -11,12 +11,7 @@ any later version.
 GNU CC is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+GNU General Public License for more details. */
 
 #define BITS_PER_UNIT 8
 
diff --git a/arch/m68k/lib/modsi3.S b/arch/m68k/lib/modsi3.S
index ef3849435768..1d9e0efdf31d 100644
--- a/arch/m68k/lib/modsi3.S
+++ b/arch/m68k/lib/modsi3.S
@@ -19,12 +19,7 @@ distribution when not linked into another program.)
 This file is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+General Public License for more details. */
 
 /* As a special exception, if you link this library with files
    compiled with GCC to produce an executable, this does not cause
diff --git a/arch/m68k/lib/muldi3.c b/arch/m68k/lib/muldi3.c
index ee5f0b1b5c5d..9006d15b8721 100644
--- a/arch/m68k/lib/muldi3.c
+++ b/arch/m68k/lib/muldi3.c
@@ -12,12 +12,7 @@ any later version.
 GNU CC is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+GNU General Public License for more details. */
 
 #ifdef CONFIG_CPU_HAS_NO_MULDIV64
 
diff --git a/arch/m68k/lib/mulsi3.S b/arch/m68k/lib/mulsi3.S
index ce29ea37b45f..c39ad4e738e9 100644
--- a/arch/m68k/lib/mulsi3.S
+++ b/arch/m68k/lib/mulsi3.S
@@ -19,12 +19,7 @@ distribution when not linked into another program.)
 This file is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+General Public License for more details. */
 
 /* As a special exception, if you link this library with files
    compiled with GCC to produce an executable, this does not cause
diff --git a/arch/m68k/lib/udivsi3.S b/arch/m68k/lib/udivsi3.S
index c424c4a1f0a3..35a5446572a5 100644
--- a/arch/m68k/lib/udivsi3.S
+++ b/arch/m68k/lib/udivsi3.S
@@ -19,12 +19,7 @@ distribution when not linked into another program.)
 This file is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+General Public License for more details. */
 
 /* As a special exception, if you link this library with files
    compiled with GCC to produce an executable, this does not cause
diff --git a/arch/m68k/lib/umodsi3.S b/arch/m68k/lib/umodsi3.S
index 5def5f626478..099da514a8fd 100644
--- a/arch/m68k/lib/umodsi3.S
+++ b/arch/m68k/lib/umodsi3.S
@@ -19,12 +19,7 @@ distribution when not linked into another program.)
 This file is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+General Public License for more details. */
 
 /* As a special exception, if you link this library with files
    compiled with GCC to produce an executable, this does not cause
diff --git a/arch/m68k/mac/oss.c b/arch/m68k/mac/oss.c
index 54037125ebf8..bb11dceed7ed 100644
--- a/arch/m68k/mac/oss.c
+++ b/arch/m68k/mac/oss.c
@@ -47,9 +47,8 @@ void __init oss_init(void)
 	/* Disable all interrupts. Unlike a VIA it looks like we    */
 	/* do this by setting the source's interrupt level to zero. */
 
-	for (i = 0; i <= OSS_NUM_SOURCES; i++) {
+	for (i = 0; i < OSS_NUM_SOURCES; i++)
 		oss->irq_level[i] = 0;
-	}
 }
 
 /*
diff --git a/arch/metag/include/asm/io.h b/arch/metag/include/asm/io.h
index 9359e5048442..d5779b0ec573 100644
--- a/arch/metag/include/asm/io.h
+++ b/arch/metag/include/asm/io.h
@@ -2,6 +2,7 @@
 #define _ASM_METAG_IO_H
 
 #include <linux/types.h>
+#include <asm/pgtable-bits.h>
 
 #define IO_SPACE_LIMIT  0
 
diff --git a/arch/metag/include/asm/pgtable-bits.h b/arch/metag/include/asm/pgtable-bits.h
new file mode 100644
index 000000000000..25ba6729f496
--- /dev/null
+++ b/arch/metag/include/asm/pgtable-bits.h
@@ -0,0 +1,104 @@
+/*
+ * Meta page table definitions.
+ */
+
+#ifndef _METAG_PGTABLE_BITS_H
+#define _METAG_PGTABLE_BITS_H
+
+#include <asm/metag_mem.h>
+
+/*
+ * Definitions for MMU descriptors
+ *
+ * These are the hardware bits in the MMCU pte entries.
+ * Derived from the Meta toolkit headers.
+ */
+#define _PAGE_PRESENT		MMCU_ENTRY_VAL_BIT
+#define _PAGE_WRITE		MMCU_ENTRY_WR_BIT
+#define _PAGE_PRIV		MMCU_ENTRY_PRIV_BIT
+/* Write combine bit - this can cause writes to occur out of order */
+#define _PAGE_WR_COMBINE	MMCU_ENTRY_WRC_BIT
+/* Sys coherent bit - this bit is never used by Linux */
+#define _PAGE_SYS_COHERENT	MMCU_ENTRY_SYS_BIT
+#define _PAGE_ALWAYS_ZERO_1	0x020
+#define _PAGE_CACHE_CTRL0	0x040
+#define _PAGE_CACHE_CTRL1	0x080
+#define _PAGE_ALWAYS_ZERO_2	0x100
+#define _PAGE_ALWAYS_ZERO_3	0x200
+#define _PAGE_ALWAYS_ZERO_4	0x400
+#define _PAGE_ALWAYS_ZERO_5	0x800
+
+/* These are software bits that we stuff into the gaps in the hardware
+ * pte entries that are not used.  Note, these DO get stored in the actual
+ * hardware, but the hardware just does not use them.
+ */
+#define _PAGE_ACCESSED		_PAGE_ALWAYS_ZERO_1
+#define _PAGE_DIRTY		_PAGE_ALWAYS_ZERO_2
+
+/* Pages owned, and protected by, the kernel. */
+#define _PAGE_KERNEL		_PAGE_PRIV
+
+/* No cacheing of this page */
+#define _PAGE_CACHE_WIN0	(MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S)
+/* burst cacheing - good for data streaming */
+#define _PAGE_CACHE_WIN1	(MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S)
+/* One cache way per thread */
+#define _PAGE_CACHE_WIN2	(MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S)
+/* Full on cacheing */
+#define _PAGE_CACHE_WIN3	(MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S)
+
+#define _PAGE_CACHEABLE		(_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE)
+
+/* which bits are used for cache control ... */
+#define _PAGE_CACHE_MASK	(_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \
+				 _PAGE_WR_COMBINE)
+
+/* This is a mask of the bits that pte_modify is allowed to change. */
+#define _PAGE_CHG_MASK		(PAGE_MASK)
+
+#define _PAGE_SZ_SHIFT		1
+#define _PAGE_SZ_4K		(0x0)
+#define _PAGE_SZ_8K		(0x1 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_16K		(0x2 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_32K		(0x3 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_64K		(0x4 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_128K		(0x5 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_256K		(0x6 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_512K		(0x7 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_1M		(0x8 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_2M		(0x9 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_4M		(0xa << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_MASK		(0xf << _PAGE_SZ_SHIFT)
+
+#if defined(CONFIG_PAGE_SIZE_4K)
+#define _PAGE_SZ		(_PAGE_SZ_4K)
+#elif defined(CONFIG_PAGE_SIZE_8K)
+#define _PAGE_SZ		(_PAGE_SZ_8K)
+#elif defined(CONFIG_PAGE_SIZE_16K)
+#define _PAGE_SZ		(_PAGE_SZ_16K)
+#endif
+#define _PAGE_TABLE		(_PAGE_SZ | _PAGE_PRESENT)
+
+#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K)
+# define _PAGE_SZHUGE		(_PAGE_SZ_8K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K)
+# define _PAGE_SZHUGE		(_PAGE_SZ_16K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K)
+# define _PAGE_SZHUGE		(_PAGE_SZ_32K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
+# define _PAGE_SZHUGE		(_PAGE_SZ_64K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K)
+# define _PAGE_SZHUGE		(_PAGE_SZ_128K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K)
+# define _PAGE_SZHUGE		(_PAGE_SZ_256K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
+# define _PAGE_SZHUGE		(_PAGE_SZ_512K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M)
+# define _PAGE_SZHUGE		(_PAGE_SZ_1M)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M)
+# define _PAGE_SZHUGE		(_PAGE_SZ_2M)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M)
+# define _PAGE_SZHUGE		(_PAGE_SZ_4M)
+#endif
+
+#endif /* _METAG_PGTABLE_BITS_H */
diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h
index d0604c0a8702..ffa3a3a2ecad 100644
--- a/arch/metag/include/asm/pgtable.h
+++ b/arch/metag/include/asm/pgtable.h
@@ -5,6 +5,7 @@
 #ifndef _METAG_PGTABLE_H
 #define _METAG_PGTABLE_H
 
+#include <asm/pgtable-bits.h>
 #include <asm-generic/pgtable-nopmd.h>
 
 /* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */
@@ -21,100 +22,6 @@
 #endif
 
 /*
- * Definitions for MMU descriptors
- *
- * These are the hardware bits in the MMCU pte entries.
- * Derived from the Meta toolkit headers.
- */
-#define _PAGE_PRESENT		MMCU_ENTRY_VAL_BIT
-#define _PAGE_WRITE		MMCU_ENTRY_WR_BIT
-#define _PAGE_PRIV		MMCU_ENTRY_PRIV_BIT
-/* Write combine bit - this can cause writes to occur out of order */
-#define _PAGE_WR_COMBINE	MMCU_ENTRY_WRC_BIT
-/* Sys coherent bit - this bit is never used by Linux */
-#define _PAGE_SYS_COHERENT	MMCU_ENTRY_SYS_BIT
-#define _PAGE_ALWAYS_ZERO_1	0x020
-#define _PAGE_CACHE_CTRL0	0x040
-#define _PAGE_CACHE_CTRL1	0x080
-#define _PAGE_ALWAYS_ZERO_2	0x100
-#define _PAGE_ALWAYS_ZERO_3	0x200
-#define _PAGE_ALWAYS_ZERO_4	0x400
-#define _PAGE_ALWAYS_ZERO_5	0x800
-
-/* These are software bits that we stuff into the gaps in the hardware
- * pte entries that are not used.  Note, these DO get stored in the actual
- * hardware, but the hardware just does not use them.
- */
-#define _PAGE_ACCESSED		_PAGE_ALWAYS_ZERO_1
-#define _PAGE_DIRTY		_PAGE_ALWAYS_ZERO_2
-
-/* Pages owned, and protected by, the kernel. */
-#define _PAGE_KERNEL		_PAGE_PRIV
-
-/* No cacheing of this page */
-#define _PAGE_CACHE_WIN0	(MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S)
-/* burst cacheing - good for data streaming */
-#define _PAGE_CACHE_WIN1	(MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S)
-/* One cache way per thread */
-#define _PAGE_CACHE_WIN2	(MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S)
-/* Full on cacheing */
-#define _PAGE_CACHE_WIN3	(MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S)
-
-#define _PAGE_CACHEABLE		(_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE)
-
-/* which bits are used for cache control ... */
-#define _PAGE_CACHE_MASK	(_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \
-				 _PAGE_WR_COMBINE)
-
-/* This is a mask of the bits that pte_modify is allowed to change. */
-#define _PAGE_CHG_MASK		(PAGE_MASK)
-
-#define _PAGE_SZ_SHIFT		1
-#define _PAGE_SZ_4K		(0x0)
-#define _PAGE_SZ_8K		(0x1 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_16K		(0x2 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_32K		(0x3 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_64K		(0x4 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_128K		(0x5 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_256K		(0x6 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_512K		(0x7 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_1M		(0x8 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_2M		(0x9 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_4M		(0xa << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_MASK		(0xf << _PAGE_SZ_SHIFT)
-
-#if defined(CONFIG_PAGE_SIZE_4K)
-#define _PAGE_SZ		(_PAGE_SZ_4K)
-#elif defined(CONFIG_PAGE_SIZE_8K)
-#define _PAGE_SZ		(_PAGE_SZ_8K)
-#elif defined(CONFIG_PAGE_SIZE_16K)
-#define _PAGE_SZ		(_PAGE_SZ_16K)
-#endif
-#define _PAGE_TABLE		(_PAGE_SZ | _PAGE_PRESENT)
-
-#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K)
-# define _PAGE_SZHUGE		(_PAGE_SZ_8K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K)
-# define _PAGE_SZHUGE		(_PAGE_SZ_16K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K)
-# define _PAGE_SZHUGE		(_PAGE_SZ_32K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
-# define _PAGE_SZHUGE		(_PAGE_SZ_64K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K)
-# define _PAGE_SZHUGE		(_PAGE_SZ_128K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K)
-# define _PAGE_SZHUGE		(_PAGE_SZ_256K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
-# define _PAGE_SZHUGE		(_PAGE_SZ_512K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M)
-# define _PAGE_SZHUGE		(_PAGE_SZ_1M)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M)
-# define _PAGE_SZHUGE		(_PAGE_SZ_2M)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M)
-# define _PAGE_SZHUGE		(_PAGE_SZ_4M)
-#endif
-
-/*
  * The Linux memory management assumes a three-level page table setup. On
  * Meta, we use that, but "fold" the mid level into the top-level page
  * table.
diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h
index 13272fd5a5ba..0838ca699764 100644
--- a/arch/metag/include/asm/processor.h
+++ b/arch/metag/include/asm/processor.h
@@ -111,7 +111,6 @@ struct thread_struct {
  */
 #define start_thread(regs, pc, usp) do {				   \
 	unsigned int *argc = (unsigned int *) bprm->exec;		   \
-	set_fs(USER_DS);						   \
 	current->thread.int_depth = 1;					   \
 	/* Force this process down to user land */			   \
 	regs->ctx.SaveMask = TBICTX_PRIV_BIT;				   \
diff --git a/arch/metag/include/asm/thread_info.h b/arch/metag/include/asm/thread_info.h
index afb3ca4776d1..32677cc278aa 100644
--- a/arch/metag/include/asm/thread_info.h
+++ b/arch/metag/include/asm/thread_info.h
@@ -28,7 +28,6 @@
 /* This must be 8 byte aligned so we can ensure stack alignment. */
 struct thread_info {
 	struct task_struct *task;	/* main task structure */
-	struct exec_domain *exec_domain;	/* execution domain */
 	unsigned long flags;	/* low level flags */
 	unsigned long status;	/* thread-synchronous flags */
 	u32 cpu;		/* current CPU */
@@ -68,7 +67,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
-	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
 	.preempt_count	= INIT_PREEMPT_COUNT,	\
diff --git a/arch/metag/kernel/irq.c b/arch/metag/kernel/irq.c
index 5385dd1216b7..4f8f1f87ef11 100644
--- a/arch/metag/kernel/irq.c
+++ b/arch/metag/kernel/irq.c
@@ -132,7 +132,6 @@ void irq_ctx_init(int cpu)
 
 	irqctx = (union irq_ctx *) &hardirq_stack[cpu * THREAD_SIZE];
 	irqctx->tinfo.task              = NULL;
-	irqctx->tinfo.exec_domain       = NULL;
 	irqctx->tinfo.cpu               = cpu;
 	irqctx->tinfo.preempt_count     = HARDIRQ_OFFSET;
 	irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
@@ -141,7 +140,6 @@ void irq_ctx_init(int cpu)
 
 	irqctx = (union irq_ctx *) &softirq_stack[cpu * THREAD_SIZE];
 	irqctx->tinfo.task              = NULL;
-	irqctx->tinfo.exec_domain       = NULL;
 	irqctx->tinfo.cpu               = cpu;
 	irqctx->tinfo.preempt_count     = 0;
 	irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
index f006d2276f40..ac3a199e33e7 100644
--- a/arch/metag/kernel/smp.c
+++ b/arch/metag/kernel/smp.c
@@ -261,7 +261,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static DECLARE_COMPLETION(cpu_killed);
 
 /*
  * __cpu_disable runs on the processor to be shutdown.
@@ -299,7 +298,7 @@ int __cpu_disable(void)
  */
 void __cpu_die(unsigned int cpu)
 {
-	if (!wait_for_completion_timeout(&cpu_killed, msecs_to_jiffies(1)))
+	if (!cpu_wait_death(cpu, 1))
 		pr_err("CPU%u: unable to kill\n", cpu);
 }
 
@@ -314,7 +313,7 @@ void cpu_die(void)
 	local_irq_disable();
 	idle_task_exit();
 
-	complete(&cpu_killed);
+	(void)cpu_report_death();
 
 	asm ("XOR	TXENABLE, D0Re0,D0Re0\n");
 }
diff --git a/arch/microblaze/include/asm/seccomp.h b/arch/microblaze/include/asm/seccomp.h
index 0d912758a0d7..204618a2ce84 100644
--- a/arch/microblaze/include/asm/seccomp.h
+++ b/arch/microblaze/include/asm/seccomp.h
@@ -3,14 +3,8 @@
 
 #include <linux/unistd.h>
 
-#define __NR_seccomp_read		__NR_read
-#define __NR_seccomp_write		__NR_write
-#define __NR_seccomp_exit		__NR_exit
 #define __NR_seccomp_sigreturn		__NR_sigreturn
 
-#define __NR_seccomp_read_32		__NR_read
-#define __NR_seccomp_write_32		__NR_write
-#define __NR_seccomp_exit_32		__NR_exit
-#define __NR_seccomp_sigreturn_32	__NR_sigreturn
+#include <asm-generic/seccomp.h>
 
 #endif	/* _ASM_MICROBLAZE_SECCOMP_H */
diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h
index b699fbd7de4a..383f387b4eee 100644
--- a/arch/microblaze/include/asm/thread_info.h
+++ b/arch/microblaze/include/asm/thread_info.h
@@ -65,7 +65,6 @@ typedef struct {
 
 struct thread_info {
 	struct task_struct	*task; /* main task structure */
-	struct exec_domain	*exec_domain; /* execution domain */
 	unsigned long		flags; /* low level flags */
 	unsigned long		status; /* thread-synchronous flags */
 	__u32			cpu; /* current CPU */
@@ -81,7 +80,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
-	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
 	.preempt_count	= INIT_PREEMPT_COUNT,	\
diff --git a/arch/microblaze/kernel/cpu/mb.c b/arch/microblaze/kernel/cpu/mb.c
index 7b5dca7ed39d..9581d194d9e4 100644
--- a/arch/microblaze/kernel/cpu/mb.c
+++ b/arch/microblaze/kernel/cpu/mb.c
@@ -27,7 +27,6 @@
 
 static int show_cpuinfo(struct seq_file *m, void *v)
 {
-	int count = 0;
 	char *fpga_family = "Unknown";
 	char *cpu_ver = "Unknown";
 	int i;
@@ -48,91 +47,89 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		}
 	}
 
-	count = seq_printf(m,
-			"CPU-Family:	MicroBlaze\n"
-			"FPGA-Arch:	%s\n"
-			"CPU-Ver:	%s, %s endian\n"
-			"CPU-MHz:	%d.%02d\n"
-			"BogoMips:	%lu.%02lu\n",
-			fpga_family,
-			cpu_ver,
-			cpuinfo.endian ? "little" : "big",
-			cpuinfo.cpu_clock_freq /
-			1000000,
-			cpuinfo.cpu_clock_freq %
-			1000000,
-			loops_per_jiffy / (500000 / HZ),
-			(loops_per_jiffy / (5000 / HZ)) % 100);
-
-	count += seq_printf(m,
-		"HW:\n Shift:\t\t%s\n"
-		" MSR:\t\t%s\n"
-		" PCMP:\t\t%s\n"
-		" DIV:\t\t%s\n",
-		(cpuinfo.use_instr & PVR0_USE_BARREL_MASK) ? "yes" : "no",
-		(cpuinfo.use_instr & PVR2_USE_MSR_INSTR) ? "yes" : "no",
-		(cpuinfo.use_instr & PVR2_USE_PCMP_INSTR) ? "yes" : "no",
-		(cpuinfo.use_instr & PVR0_USE_DIV_MASK) ? "yes" : "no");
-
-	count += seq_printf(m,
-			" MMU:\t\t%x\n",
-			cpuinfo.mmu);
-
-	count += seq_printf(m,
-		" MUL:\t\t%s\n"
-		" FPU:\t\t%s\n",
-		(cpuinfo.use_mult & PVR2_USE_MUL64_MASK) ? "v2" :
-			(cpuinfo.use_mult & PVR0_USE_HW_MUL_MASK) ? "v1" : "no",
-		(cpuinfo.use_fpu & PVR2_USE_FPU2_MASK) ? "v2" :
-			(cpuinfo.use_fpu & PVR0_USE_FPU_MASK) ? "v1" : "no");
-
-	count += seq_printf(m,
-		" Exc:\t\t%s%s%s%s%s%s%s%s\n",
-		(cpuinfo.use_exc & PVR2_OPCODE_0x0_ILL_MASK) ? "op0x0 " : "",
-		(cpuinfo.use_exc & PVR2_UNALIGNED_EXC_MASK) ? "unal " : "",
-		(cpuinfo.use_exc & PVR2_ILL_OPCODE_EXC_MASK) ? "ill " : "",
-		(cpuinfo.use_exc & PVR2_IOPB_BUS_EXC_MASK) ? "iopb " : "",
-		(cpuinfo.use_exc & PVR2_DOPB_BUS_EXC_MASK) ? "dopb " : "",
-		(cpuinfo.use_exc & PVR2_DIV_ZERO_EXC_MASK) ? "zero " : "",
-		(cpuinfo.use_exc & PVR2_FPU_EXC_MASK) ? "fpu " : "",
-		(cpuinfo.use_exc & PVR2_USE_FSL_EXC) ? "fsl " : "");
-
-	count += seq_printf(m,
-			"Stream-insns:\t%sprivileged\n",
-			cpuinfo.mmu_privins ? "un" : "");
+	seq_printf(m,
+		   "CPU-Family:	MicroBlaze\n"
+		   "FPGA-Arch:	%s\n"
+		   "CPU-Ver:	%s, %s endian\n"
+		   "CPU-MHz:	%d.%02d\n"
+		   "BogoMips:	%lu.%02lu\n",
+		   fpga_family,
+		   cpu_ver,
+		   cpuinfo.endian ? "little" : "big",
+		   cpuinfo.cpu_clock_freq / 1000000,
+		   cpuinfo.cpu_clock_freq % 1000000,
+		   loops_per_jiffy / (500000 / HZ),
+		   (loops_per_jiffy / (5000 / HZ)) % 100);
+
+	seq_printf(m,
+		   "HW:\n Shift:\t\t%s\n"
+		   " MSR:\t\t%s\n"
+		   " PCMP:\t\t%s\n"
+		   " DIV:\t\t%s\n",
+		   (cpuinfo.use_instr & PVR0_USE_BARREL_MASK) ? "yes" : "no",
+		   (cpuinfo.use_instr & PVR2_USE_MSR_INSTR) ? "yes" : "no",
+		   (cpuinfo.use_instr & PVR2_USE_PCMP_INSTR) ? "yes" : "no",
+		   (cpuinfo.use_instr & PVR0_USE_DIV_MASK) ? "yes" : "no");
+
+	seq_printf(m, " MMU:\t\t%x\n", cpuinfo.mmu);
+
+	seq_printf(m,
+		   " MUL:\t\t%s\n"
+		   " FPU:\t\t%s\n",
+		   (cpuinfo.use_mult & PVR2_USE_MUL64_MASK) ? "v2" :
+		   (cpuinfo.use_mult & PVR0_USE_HW_MUL_MASK) ? "v1" : "no",
+		   (cpuinfo.use_fpu & PVR2_USE_FPU2_MASK) ? "v2" :
+		   (cpuinfo.use_fpu & PVR0_USE_FPU_MASK) ? "v1" : "no");
+
+	seq_printf(m,
+		   " Exc:\t\t%s%s%s%s%s%s%s%s\n",
+		   (cpuinfo.use_exc & PVR2_OPCODE_0x0_ILL_MASK) ? "op0x0 " : "",
+		   (cpuinfo.use_exc & PVR2_UNALIGNED_EXC_MASK) ? "unal " : "",
+		   (cpuinfo.use_exc & PVR2_ILL_OPCODE_EXC_MASK) ? "ill " : "",
+		   (cpuinfo.use_exc & PVR2_IOPB_BUS_EXC_MASK) ? "iopb " : "",
+		   (cpuinfo.use_exc & PVR2_DOPB_BUS_EXC_MASK) ? "dopb " : "",
+		   (cpuinfo.use_exc & PVR2_DIV_ZERO_EXC_MASK) ? "zero " : "",
+		   (cpuinfo.use_exc & PVR2_FPU_EXC_MASK) ? "fpu " : "",
+		   (cpuinfo.use_exc & PVR2_USE_FSL_EXC) ? "fsl " : "");
+
+	seq_printf(m,
+		   "Stream-insns:\t%sprivileged\n",
+		   cpuinfo.mmu_privins ? "un" : "");
 
 	if (cpuinfo.use_icache)
-		count += seq_printf(m,
-				"Icache:\t\t%ukB\tline length:\t%dB\n",
-				cpuinfo.icache_size >> 10,
-				cpuinfo.icache_line_length);
+		seq_printf(m,
+			   "Icache:\t\t%ukB\tline length:\t%dB\n",
+			   cpuinfo.icache_size >> 10,
+			   cpuinfo.icache_line_length);
 	else
-		count += seq_printf(m, "Icache:\t\tno\n");
+		seq_puts(m, "Icache:\t\tno\n");
 
 	if (cpuinfo.use_dcache) {
-		count += seq_printf(m,
-				"Dcache:\t\t%ukB\tline length:\t%dB\n",
-				cpuinfo.dcache_size >> 10,
-				cpuinfo.dcache_line_length);
-		seq_printf(m, "Dcache-Policy:\t");
+		seq_printf(m,
+			   "Dcache:\t\t%ukB\tline length:\t%dB\n",
+			   cpuinfo.dcache_size >> 10,
+			   cpuinfo.dcache_line_length);
+		seq_puts(m, "Dcache-Policy:\t");
 		if (cpuinfo.dcache_wb)
-			count += seq_printf(m, "write-back\n");
+			seq_puts(m, "write-back\n");
 		else
-			count += seq_printf(m, "write-through\n");
-	} else
-		count += seq_printf(m, "Dcache:\t\tno\n");
+			seq_puts(m, "write-through\n");
+	} else {
+		seq_puts(m, "Dcache:\t\tno\n");
+	}
+
+	seq_printf(m,
+		   "HW-Debug:\t%s\n",
+		   cpuinfo.hw_debug ? "yes" : "no");
 
-	count += seq_printf(m,
-			"HW-Debug:\t%s\n",
-			cpuinfo.hw_debug ? "yes" : "no");
+	seq_printf(m,
+		   "PVR-USR1:\t%02x\n"
+		   "PVR-USR2:\t%08x\n",
+		   cpuinfo.pvr_user1,
+		   cpuinfo.pvr_user2);
 
-	count += seq_printf(m,
-			"PVR-USR1:\t%02x\n"
-			"PVR-USR2:\t%08x\n",
-			cpuinfo.pvr_user1,
-			cpuinfo.pvr_user2);
+	seq_printf(m, "Page size:\t%lu\n", PAGE_SIZE);
 
-	count += seq_printf(m, "Page size:\t%lu\n", PAGE_SIZE);
 	return 0;
 }
 
diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S
index 0536bc021cc6..ef548510b951 100644
--- a/arch/microblaze/kernel/entry.S
+++ b/arch/microblaze/kernel/entry.S
@@ -348,8 +348,9 @@ C_ENTRY(_user_exception):
  * The LP register should point to the location where the called function
  * should return.  [note that MAKE_SYS_CALL uses label 1] */
 	/* See if the system call number is valid */
+	blti	r12, 5f
 	addi	r11, r12, -__NR_syscalls;
-	bgei	r11,5f;
+	bgei	r11, 5f;
 	/* Figure out which function to use for this system call.  */
 	/* Note Microblaze barrel shift is optional, so don't rely on it */
 	add	r12, r12, r12;			/* convert num -> ptr */
@@ -375,7 +376,7 @@ C_ENTRY(_user_exception):
 
 	/* The syscall number is invalid, return an error.  */
 5:
-	rtsd	r15, 8;		/* looks like a normal subroutine return */
+	braid	ret_from_trap
 	addi	r3, r0, -ENOSYS;
 
 /* Entry point used to return from a syscall/trap */
@@ -411,7 +412,7 @@ C_ENTRY(ret_from_trap):
 	bri	1b
 
 	/* Maybe handle a signal */
-5:	
+5:
 	andi	r11, r19, _TIF_SIGPENDING | _TIF_NOTIFY_RESUME;
 	beqi	r11, 4f;		/* Signals to handle, handle them */
 
diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c
index a1cbaf90e2ea..97001524ca2d 100644
--- a/arch/microblaze/kernel/signal.c
+++ b/arch/microblaze/kernel/signal.c
@@ -158,7 +158,6 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 {
 	struct rt_sigframe __user *frame;
 	int err = 0, sig = ksig->sig;
-	unsigned long signal;
 	unsigned long address = 0;
 #ifdef CONFIG_MMU
 	pmd_t *pmdp;
@@ -170,12 +169,6 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		return -EFAULT;
 
-	signal = current_thread_info()->exec_domain
-		&& current_thread_info()->exec_domain->signal_invmap
-		&& sig < 32
-		? current_thread_info()->exec_domain->signal_invmap[sig]
-		: (unsigned long)sig;
-
 	if (ksig->ka.sa.sa_flags & SA_SIGINFO)
 		err |= copy_siginfo_to_user(&frame->info, &ksig->info);
 
@@ -230,14 +223,12 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	regs->r1 = (unsigned long) frame;
 
 	/* Signal handler args: */
-	regs->r5 = signal; /* arg 0: signum */
+	regs->r5 = sig; /* arg 0: signum */
 	regs->r6 = (unsigned long) &frame->info; /* arg 1: siginfo */
 	regs->r7 = (unsigned long) &frame->uc; /* arg2: ucontext */
 	/* Offset to handle microblaze rtid r14, 0 */
 	regs->pc = (unsigned long)ksig->ka.sa.sa_handler;
 
-	set_fs(USER_DS);
-
 #ifdef DEBUG_SIG
 	pr_info("SIG deliver (%s:%d): sp=%p pc=%08lx\n",
 		current->comm, current->pid, frame, regs->pc);
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index 48528fb81eff..ae838ed5fcf2 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -1382,6 +1382,10 @@ static int __init pcibios_init(void)
 
 	/* Call common code to handle resource allocation */
 	pcibios_resource_survey();
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		if (hose->bus)
+			pci_bus_add_devices(hose->bus);
+	}
 
 	return 0;
 }
diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms
index e5fc463b36d0..39cf40da5f14 100644
--- a/arch/mips/Kbuild.platforms
+++ b/arch/mips/Kbuild.platforms
@@ -4,9 +4,9 @@ platforms += alchemy
 platforms += ar7
 platforms += ath25
 platforms += ath79
-platforms += bcm3384
 platforms += bcm47xx
 platforms += bcm63xx
+platforms += bmips
 platforms += cavium-octeon
 platforms += cobalt
 platforms += dec
@@ -21,6 +21,7 @@ platforms += mti-malta
 platforms += mti-sead3
 platforms += netlogic
 platforms += paravirt
+platforms += pistachio
 platforms += pmcs-msp71xx
 platforms += pnx833x
 platforms += ralink
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index c7a16904cd03..f5016656494f 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -23,7 +23,7 @@ config MIPS
 	select HAVE_KRETPROBES
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_SYSCALL_TRACEPOINTS
-	select ARCH_BINFMT_ELF_RANDOMIZE_PIE
+	select ARCH_HAS_ELF_RANDOMIZE
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES && 64BIT
 	select RTC_LIB if !MACH_LOONGSON
 	select GENERIC_ATOMIC64 if !64BIT
@@ -43,6 +43,7 @@ config MIPS
 	select GENERIC_SMP_IDLE_THREAD
 	select BUILDTIME_EXTABLE_SORT
 	select GENERIC_CLOCKEVENTS
+	select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC
 	select GENERIC_CMOS_UPDATE
 	select HAVE_MOD_ARCH_SPECIFIC
 	select VIRT_TO_BUS
@@ -55,6 +56,8 @@ config MIPS
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_BINFMT_ELF_STATE
 	select SYSCTL_EXCEPTION_TRACE
+	select HAVE_VIRT_CPU_ACCOUNTING_GEN
+	select HAVE_IRQ_TIME_ACCOUNTING
 
 menu "Machine selection"
 
@@ -131,8 +134,8 @@ config ATH79
 	help
 	  Support for the Atheros AR71XX/AR724X/AR913X SoCs.
 
-config BCM3384
-	bool "Broadcom BCM3384 based boards"
+config BMIPS_GENERIC
+	bool "Broadcom Generic BMIPS kernel"
 	select BOOT_RAW
 	select NO_EXCEPT_FILL
 	select USE_OF
@@ -140,22 +143,30 @@ config BCM3384
 	select CSRC_R4K
 	select SYNC_R4K
 	select COMMON_CLK
-	select DMA_NONCOHERENT
+	select BCM7038_L1_IRQ
+	select BCM7120_L2_IRQ
+	select BRCMSTB_L2_IRQ
 	select IRQ_CPU
+	select RAW_IRQ_ACCESSORS
+	select DMA_NONCOHERENT
 	select SYS_SUPPORTS_32BIT_KERNEL
+	select SYS_SUPPORTS_LITTLE_ENDIAN
 	select SYS_SUPPORTS_BIG_ENDIAN
 	select SYS_SUPPORTS_HIGHMEM
+	select SYS_HAS_CPU_BMIPS32_3300
+	select SYS_HAS_CPU_BMIPS4350
+	select SYS_HAS_CPU_BMIPS4380
 	select SYS_HAS_CPU_BMIPS5000
 	select SWAP_IO_SPACE
-	select USB_EHCI_BIG_ENDIAN_DESC
-	select USB_EHCI_BIG_ENDIAN_MMIO
-	select USB_OHCI_BIG_ENDIAN_DESC
-	select USB_OHCI_BIG_ENDIAN_MMIO
+	select USB_EHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
+	select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
+	select USB_OHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
+	select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
 	help
-	  Support for BCM3384 based boards.  BCM3384/BCM33843 is a cable modem
-	  chipset with a Linux application processor that is often used to
-	  provide Samba services, a CUPS print server, and/or advanced routing
-	  features.
+	  Build a generic DT-based kernel image that boots on select
+	  BCM33xx cable modem chips, BCM63xx DSL chips, and BCM7xxx set-top
+	  box chips.  Note that CONFIG_CPU_BIG_ENDIAN/CONFIG_CPU_LITTLE_ENDIAN
+	  must be set appropriately for your board.
 
 config BCM47XX
 	bool "Broadcom BCM47XX based boards"
@@ -352,6 +363,33 @@ config MACH_LOONGSON1
 	  the ICT (Institute of Computing Technology) and the Chinese Academy
 	  of Sciences.
 
+config MACH_PISTACHIO
+	bool "IMG Pistachio SoC based boards"
+	select ARCH_REQUIRE_GPIOLIB
+	select BOOT_ELF32
+	select BOOT_RAW
+	select CEVT_R4K
+	select CLKSRC_MIPS_GIC
+	select COMMON_CLK
+	select CSRC_R4K
+	select DMA_MAYBE_COHERENT
+	select IRQ_CPU
+	select LIBFDT
+	select MFD_SYSCON
+	select MIPS_CPU_SCACHE
+	select MIPS_GIC
+	select PINCTRL
+	select REGULATOR
+	select SYS_HAS_CPU_MIPS32_R2
+	select SYS_SUPPORTS_32BIT_KERNEL
+	select SYS_SUPPORTS_LITTLE_ENDIAN
+	select SYS_SUPPORTS_MIPS_CPS
+	select SYS_SUPPORTS_MULTITHREADING
+	select SYS_SUPPORTS_ZBOOT
+	select USE_OF
+	help
+	  This enables support for the IMG Pistachio SoC platform.
+
 config MIPS_MALTA
 	bool "MIPS Malta board"
 	select ARCH_MAY_HAVE_PC_FDC
@@ -377,6 +415,7 @@ config MIPS_MALTA
 	select SYS_HAS_CPU_MIPS32_R1
 	select SYS_HAS_CPU_MIPS32_R2
 	select SYS_HAS_CPU_MIPS32_R3_5
+	select SYS_HAS_CPU_MIPS32_R5
 	select SYS_HAS_CPU_MIPS32_R6
 	select SYS_HAS_CPU_MIPS64_R1
 	select SYS_HAS_CPU_MIPS64_R2
@@ -386,6 +425,7 @@ config MIPS_MALTA
 	select SYS_SUPPORTS_32BIT_KERNEL
 	select SYS_SUPPORTS_64BIT_KERNEL
 	select SYS_SUPPORTS_BIG_ENDIAN
+	select SYS_SUPPORTS_HIGHMEM
 	select SYS_SUPPORTS_LITTLE_ENDIAN
 	select SYS_SUPPORTS_MICROMIPS
 	select SYS_SUPPORTS_MIPS_CMP
@@ -779,7 +819,8 @@ config CAVIUM_OCTEON_SOC
 	select SYS_SUPPORTS_64BIT_KERNEL
 	select SYS_SUPPORTS_BIG_ENDIAN
 	select EDAC_SUPPORT
-	select SYS_SUPPORTS_HOTPLUG_CPU
+	select SYS_SUPPORTS_LITTLE_ENDIAN
+	select SYS_SUPPORTS_HOTPLUG_CPU if CPU_BIG_ENDIAN
 	select SYS_HAS_EARLY_PRINTK
 	select SYS_HAS_CPU_CAVIUM_OCTEON
 	select SWAP_IO_SPACE
@@ -793,6 +834,7 @@ config CAVIUM_OCTEON_SOC
 	select SYS_SUPPORTS_SMP
 	select NR_CPUS_DEFAULT_16
 	select BUILTIN_DTB
+	select MTD_COMPLEX_MAPPINGS
 	help
 	  This option supports all of the Octeon reference boards from Cavium
 	  Networks. It builds a kernel that dynamically determines the Octeon
@@ -887,6 +929,7 @@ source "arch/mips/ath25/Kconfig"
 source "arch/mips/ath79/Kconfig"
 source "arch/mips/bcm47xx/Kconfig"
 source "arch/mips/bcm63xx/Kconfig"
+source "arch/mips/bmips/Kconfig"
 source "arch/mips/jazz/Kconfig"
 source "arch/mips/jz4740/Kconfig"
 source "arch/mips/lantiq/Kconfig"
@@ -1202,10 +1245,10 @@ config MIPS_L1_CACHE_SHIFT_7
 
 config MIPS_L1_CACHE_SHIFT
 	int
-	default "4" if MIPS_L1_CACHE_SHIFT_4
-	default "5" if MIPS_L1_CACHE_SHIFT_5
-	default "6" if MIPS_L1_CACHE_SHIFT_6
 	default "7" if MIPS_L1_CACHE_SHIFT_7
+	default "6" if MIPS_L1_CACHE_SHIFT_6
+	default "5" if MIPS_L1_CACHE_SHIFT_5
+	default "4" if MIPS_L1_CACHE_SHIFT_4
 	default "5"
 
 config HAVE_STD_PC_SERIAL_PORT
@@ -1245,6 +1288,7 @@ config CPU_LOONGSON3
 	select CPU_SUPPORTS_HUGEPAGES
 	select WEAK_ORDERING
 	select WEAK_REORDERING_BEYOND_LLSC
+	select ARCH_REQUIRE_GPIOLIB
 	help
 		The Loongson 3 processor implements the MIPS64R2 instruction
 		set with many extensions.
@@ -1572,6 +1616,7 @@ config CPU_XLP
 	select WEAK_REORDERING_BEYOND_LLSC
 	select CPU_HAS_PREFETCH
 	select CPU_MIPSR2
+	select CPU_SUPPORTS_HUGEPAGES
 	help
 	  Netlogic Microsystems XLP processors.
 endchoice
@@ -1596,6 +1641,33 @@ config CPU_MIPS32_3_5_EVA
 	  One of its primary benefits is an increase in the maximum size
 	  of lowmem (up to 3GB). If unsure, say 'N' here.
 
+config CPU_MIPS32_R5_FEATURES
+	bool "MIPS32 Release 5 Features"
+	depends on SYS_HAS_CPU_MIPS32_R5
+	depends on CPU_MIPS32_R2
+	help
+	  Choose this option to build a kernel for release 2 or later of the
+	  MIPS32 architecture including features from release 5 such as
+	  support for Extended Physical Addressing (XPA).
+
+config CPU_MIPS32_R5_XPA
+	bool "Extended Physical Addressing (XPA)"
+	depends on CPU_MIPS32_R5_FEATURES
+	depends on !EVA
+	depends on !PAGE_SIZE_4KB
+	depends on SYS_SUPPORTS_HIGHMEM
+	select XPA
+	select HIGHMEM
+	select ARCH_PHYS_ADDR_T_64BIT
+	default n
+	help
+	  Choose this option if you want to enable the Extended Physical
+	  Addressing (XPA) on your MIPS32 core (such as P5600 series). The
+	  benefit is to increase physical addressing equal to or greater
+	  than 40 bits. Note that this has the side effect of turning on
+	  64-bit addressing which in turn makes the PTEs 64-bit in size.
+	  If unsure, say 'N' here.
+
 if CPU_LOONGSON2F
 config CPU_NOP_WORKAROUNDS
 	bool
@@ -1699,6 +1771,9 @@ config SYS_HAS_CPU_MIPS32_R2
 config SYS_HAS_CPU_MIPS32_R3_5
 	bool
 
+config SYS_HAS_CPU_MIPS32_R5
+	bool
+
 config SYS_HAS_CPU_MIPS32_R6
 	bool
 
@@ -1836,6 +1911,9 @@ config CPU_MIPSR6
 config EVA
 	bool
 
+config XPA
+	bool
+
 config SYS_SUPPORTS_32BIT_KERNEL
 	bool
 config SYS_SUPPORTS_64BIT_KERNEL
@@ -2072,7 +2150,7 @@ config MIPSR2_TO_R6_EMULATOR
 	help
 	  Choose this option if you want to run non-R6 MIPS userland code.
 	  Even if you say 'Y' here, the emulator will still be disabled by
-	  default. You can enable it using the 'mipsr2emul' kernel option.
+	  default. You can enable it using the 'mipsr2emu' kernel option.
 	  The only reason this is a build-time option is to save ~14K from the
 	  final kernel image.
 comment "MIPS R2-to-R6 emulator is only available for UP kernels"
@@ -2142,7 +2220,7 @@ config MIPS_CMP
 
 config MIPS_CPS
 	bool "MIPS Coherent Processing System support"
-	depends on SYS_SUPPORTS_MIPS_CPS
+	depends on SYS_SUPPORTS_MIPS_CPS && !64BIT
 	select MIPS_CM
 	select MIPS_CPC
 	select MIPS_CPS_PM if HOTPLUG_CPU
@@ -2348,7 +2426,7 @@ config NODES_SHIFT
 
 config HW_PERF_EVENTS
 	bool "Enable hardware performance counter support for perf events"
-	depends on PERF_EVENTS && OPROFILE=n && (CPU_MIPS32 || CPU_MIPS64 || CPU_R10000 || CPU_SB1 || CPU_CAVIUM_OCTEON || CPU_XLP)
+	depends on PERF_EVENTS && OPROFILE=n && (CPU_MIPS32 || CPU_MIPS64 || CPU_R10000 || CPU_SB1 || CPU_CAVIUM_OCTEON || CPU_XLP || CPU_LOONGSON3)
 	default y
 	help
 	  Enable hardware performance counter support for perf events. If
@@ -2500,6 +2578,9 @@ config HZ
 	default 1000 if HZ_1000
 	default 1024 if HZ_1024
 
+config SCHED_HRTICK
+	def_bool HIGH_RES_TIMERS
+
 source "kernel/Kconfig.preempt"
 
 config KEXEC
@@ -2600,6 +2681,11 @@ config STACKTRACE_SUPPORT
 	bool
 	default y
 
+config PGTABLE_LEVELS
+	int
+	default 3 if 64BIT && !PAGE_SIZE_64KB
+	default 2
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 8f57fc72d62c..5200f649dd4e 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -197,11 +197,17 @@ endif
 # Warning: the 64-bit MIPS architecture does not support the `smartmips' extension
 # Pass -Wa,--no-warn to disable all assembler warnings until the kernel code has
 # been fixed properly.
-mips-cflags				:= "$(cflags-y)"
-cflags-$(CONFIG_CPU_HAS_SMARTMIPS)	+= $(call cc-option,$(mips-cflags),-msmartmips) -Wa,--no-warn
-cflags-$(CONFIG_CPU_MICROMIPS)		+= $(call cc-option,$(mips-cflags),-mmicromips)
+mips-cflags				:= $(cflags-y)
+ifeq ($(CONFIG_CPU_HAS_SMARTMIPS),y)
+smartmips-ase				:= $(call cc-option-yn,$(mips-cflags) -msmartmips)
+cflags-$(smartmips-ase)			+= -msmartmips -Wa,--no-warn
+endif
+ifeq ($(CONFIG_CPU_MICROMIPS),y)
+micromips-ase				:= $(call cc-option-yn,$(mips-cflags) -mmicromips)
+cflags-$(micromips-ase)			+= -mmicromips
+endif
 ifeq ($(CONFIG_CPU_HAS_MSA),y)
-toolchain-msa				:= $(call cc-option-yn,-$(mips-cflags),mhard-float -mfp64 -Wa$(comma)-mmsa)
+toolchain-msa				:= $(call cc-option-yn,$(mips-cflags) -mhard-float -mfp64 -Wa$(comma)-mmsa)
 cflags-$(toolchain-msa)			+= -DTOOLCHAIN_SUPPORTS_MSA
 endif
 
@@ -225,7 +231,7 @@ endif
 #
 # Board-dependent options and extra files
 #
-include $(srctree)/arch/mips/Kbuild.platforms
+include arch/mips/Kbuild.platforms
 
 ifdef CONFIG_PHYSICAL_START
 load-y					= $(CONFIG_PHYSICAL_START)
@@ -365,7 +371,11 @@ core-$(CONFIG_BUILTIN_DTB) += arch/mips/boot/dts/
 
 PHONY += dtbs
 dtbs: scripts
-	$(Q)$(MAKE) $(build)=arch/mips/boot/dts dtbs
+	$(Q)$(MAKE) $(build)=arch/mips/boot/dts
+
+PHONY += dtbs_install
+dtbs_install:
+	$(Q)$(MAKE) $(dtbinst)=arch/mips/boot/dts
 
 archprepare:
 ifdef CONFIG_MIPS32_N32
@@ -407,6 +417,7 @@ define archhelp
 	echo '  uImage.lzma          - U-Boot image (lzma)'
 	echo '  uImage.lzo           - U-Boot image (lzo)'
 	echo '  dtbs                 - Device-tree blobs for enabled boards'
+	echo '  dtbs_install         - Install dtbs to $(INSTALL_DTBS_PATH)'
 	echo
 	echo '  These will be default as appropriate for a configured platform.'
 endef
diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c
index af2441dbfc12..be9ff1673ded 100644
--- a/arch/mips/ar7/platform.c
+++ b/arch/mips/ar7/platform.c
@@ -307,10 +307,7 @@ static void __init cpmac_get_mac(int instance, unsigned char *dev_addr)
 	}
 
 	if (mac) {
-		if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
-					&dev_addr[0], &dev_addr[1],
-					&dev_addr[2], &dev_addr[3],
-					&dev_addr[4], &dev_addr[5]) != 6) {
+		if (!mac_pton(mac, dev_addr)) {
 			pr_warn("cannot parse mac address, using random address\n");
 			eth_random_addr(dev_addr);
 		}
diff --git a/arch/mips/ath79/common.h b/arch/mips/ath79/common.h
index a3120714f0b7..c39de61f9b36 100644
--- a/arch/mips/ath79/common.h
+++ b/arch/mips/ath79/common.h
@@ -17,7 +17,7 @@
 #include <linux/types.h>
 
 #define ATH79_MEM_SIZE_MIN	(2 * 1024 * 1024)
-#define ATH79_MEM_SIZE_MAX	(128 * 1024 * 1024)
+#define ATH79_MEM_SIZE_MAX	(256 * 1024 * 1024)
 
 void ath79_clocks_init(void);
 unsigned long ath79_get_sys_clk_rate(const char *id);
diff --git a/arch/mips/bcm3384/Platform b/arch/mips/bcm3384/Platform
deleted file mode 100644
index 8e1ca0819e1b..000000000000
--- a/arch/mips/bcm3384/Platform
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# Broadcom BCM3384 boards
-#
-platform-$(CONFIG_BCM3384)	+= bcm3384/
-cflags-$(CONFIG_BCM3384)	+=					\
-		-I$(srctree)/arch/mips/include/asm/mach-bcm3384/
-load-$(CONFIG_BCM3384)		:= 0xffffffff80010000
diff --git a/arch/mips/bcm3384/dma.c b/arch/mips/bcm3384/dma.c
deleted file mode 100644
index ea42012fd4f5..000000000000
--- a/arch/mips/bcm3384/dma.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2014 Kevin Cernekee <cernekee@gmail.com>
- */
-
-#include <linux/device.h>
-#include <linux/dma-direction.h>
-#include <linux/dma-mapping.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/of.h>
-#include <linux/pci.h>
-#include <linux/types.h>
-#include <dma-coherence.h>
-
-/*
- * BCM3384 has configurable address translation windows which allow the
- * peripherals' DMA addresses to be different from the Zephyr-visible
- * physical addresses.  e.g. usb_dma_addr = zephyr_pa ^ 0x08000000
- *
- * If our DT "memory" node has a "dma-xor-mask" property we will enable this
- * translation using the provided offset.
- */
-static u32 bcm3384_dma_xor_mask;
-static u32 bcm3384_dma_xor_limit = 0xffffffff;
-
-/*
- * PCI collapses the memory hole at 0x10000000 - 0x1fffffff.
- * On systems with a dma-xor-mask, this range is guaranteed to live above
- * the dma-xor-limit.
- */
-#define BCM3384_MEM_HOLE_PA	0x10000000
-#define BCM3384_MEM_HOLE_SIZE	0x10000000
-
-static dma_addr_t bcm3384_phys_to_dma(struct device *dev, phys_addr_t pa)
-{
-	if (dev && dev_is_pci(dev) &&
-	    pa >= (BCM3384_MEM_HOLE_PA + BCM3384_MEM_HOLE_SIZE))
-		return pa - BCM3384_MEM_HOLE_SIZE;
-	if (pa <= bcm3384_dma_xor_limit)
-		return pa ^ bcm3384_dma_xor_mask;
-	return pa;
-}
-
-dma_addr_t plat_map_dma_mem(struct device *dev, void *addr, size_t size)
-{
-	return bcm3384_phys_to_dma(dev, virt_to_phys(addr));
-}
-
-dma_addr_t plat_map_dma_mem_page(struct device *dev, struct page *page)
-{
-	return bcm3384_phys_to_dma(dev, page_to_phys(page));
-}
-
-unsigned long plat_dma_addr_to_phys(struct device *dev, dma_addr_t dma_addr)
-{
-	if (dev && dev_is_pci(dev) &&
-	    dma_addr >= BCM3384_MEM_HOLE_PA)
-		return dma_addr + BCM3384_MEM_HOLE_SIZE;
-	if ((dma_addr ^ bcm3384_dma_xor_mask) <= bcm3384_dma_xor_limit)
-		return dma_addr ^ bcm3384_dma_xor_mask;
-	return dma_addr;
-}
-
-static int __init bcm3384_init_dma_xor(void)
-{
-	struct device_node *np = of_find_node_by_type(NULL, "memory");
-
-	if (!np)
-		return 0;
-
-	of_property_read_u32(np, "dma-xor-mask", &bcm3384_dma_xor_mask);
-	of_property_read_u32(np, "dma-xor-limit", &bcm3384_dma_xor_limit);
-
-	of_node_put(np);
-	return 0;
-}
-arch_initcall(bcm3384_init_dma_xor);
diff --git a/arch/mips/bcm3384/irq.c b/arch/mips/bcm3384/irq.c
deleted file mode 100644
index fd94fe849af6..000000000000
--- a/arch/mips/bcm3384/irq.c
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * Partially based on arch/mips/ralink/irq.c
- *
- * Copyright (C) 2009 Gabor Juhos <juhosg@openwrt.org>
- * Copyright (C) 2013 John Crispin <blogic@openwrt.org>
- * Copyright (C) 2014 Kevin Cernekee <cernekee@gmail.com>
- */
-
-#include <linux/io.h>
-#include <linux/bitops.h>
-#include <linux/of_platform.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/irqdomain.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-
-#include <asm/bmips.h>
-#include <asm/irq_cpu.h>
-#include <asm/mipsregs.h>
-
-/* INTC register offsets */
-#define INTC_REG_ENABLE		0x00
-#define INTC_REG_STATUS		0x04
-
-#define MAX_WORDS		2
-#define IRQS_PER_WORD		32
-
-struct bcm3384_intc {
-	int			n_words;
-	void __iomem		*reg[MAX_WORDS];
-	u32			enable[MAX_WORDS];
-	spinlock_t		lock;
-};
-
-static void bcm3384_intc_irq_unmask(struct irq_data *d)
-{
-	struct bcm3384_intc *priv = d->domain->host_data;
-	unsigned long flags;
-	int idx = d->hwirq / IRQS_PER_WORD;
-	int bit = d->hwirq % IRQS_PER_WORD;
-
-	spin_lock_irqsave(&priv->lock, flags);
-	priv->enable[idx] |= BIT(bit);
-	__raw_writel(priv->enable[idx], priv->reg[idx] + INTC_REG_ENABLE);
-	spin_unlock_irqrestore(&priv->lock, flags);
-}
-
-static void bcm3384_intc_irq_mask(struct irq_data *d)
-{
-	struct bcm3384_intc *priv = d->domain->host_data;
-	unsigned long flags;
-	int idx = d->hwirq / IRQS_PER_WORD;
-	int bit = d->hwirq % IRQS_PER_WORD;
-
-	spin_lock_irqsave(&priv->lock, flags);
-	priv->enable[idx] &= ~BIT(bit);
-	__raw_writel(priv->enable[idx], priv->reg[idx] + INTC_REG_ENABLE);
-	spin_unlock_irqrestore(&priv->lock, flags);
-}
-
-static struct irq_chip bcm3384_intc_irq_chip = {
-	.name		= "INTC",
-	.irq_unmask	= bcm3384_intc_irq_unmask,
-	.irq_mask	= bcm3384_intc_irq_mask,
-	.irq_mask_ack	= bcm3384_intc_irq_mask,
-};
-
-unsigned int get_c0_compare_int(void)
-{
-	return CP0_LEGACY_COMPARE_IRQ;
-}
-
-static void bcm3384_intc_irq_handler(unsigned int irq, struct irq_desc *desc)
-{
-	struct irq_domain *domain = irq_get_handler_data(irq);
-	struct bcm3384_intc *priv = domain->host_data;
-	unsigned long flags;
-	unsigned int idx;
-
-	for (idx = 0; idx < priv->n_words; idx++) {
-		unsigned long pending;
-		int hwirq;
-
-		spin_lock_irqsave(&priv->lock, flags);
-		pending = __raw_readl(priv->reg[idx] + INTC_REG_STATUS) &
-			  priv->enable[idx];
-		spin_unlock_irqrestore(&priv->lock, flags);
-
-		for_each_set_bit(hwirq, &pending, IRQS_PER_WORD) {
-			generic_handle_irq(irq_find_mapping(domain,
-					   hwirq + idx * IRQS_PER_WORD));
-		}
-	}
-}
-
-asmlinkage void plat_irq_dispatch(void)
-{
-	unsigned long pending =
-		(read_c0_status() & read_c0_cause() & ST0_IM) >> STATUSB_IP0;
-	int bit;
-
-	for_each_set_bit(bit, &pending, 8)
-		do_IRQ(MIPS_CPU_IRQ_BASE + bit);
-}
-
-static int intc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
-{
-	irq_set_chip_and_handler(irq, &bcm3384_intc_irq_chip, handle_level_irq);
-	return 0;
-}
-
-static const struct irq_domain_ops irq_domain_ops = {
-	.xlate = irq_domain_xlate_onecell,
-	.map = intc_map,
-};
-
-static int __init ioremap_one_pair(struct bcm3384_intc *priv,
-				   struct device_node *node,
-				   int idx)
-{
-	struct resource res;
-
-	if (of_address_to_resource(node, idx, &res))
-		return 0;
-
-	if (request_mem_region(res.start, resource_size(&res),
-			       res.name) < 0)
-		pr_err("Failed to request INTC register region\n");
-
-	priv->reg[idx] = ioremap_nocache(res.start, resource_size(&res));
-	if (!priv->reg[idx])
-		panic("Failed to ioremap INTC register range");
-
-	/* start up with everything masked before we hook the parent IRQ */
-	__raw_writel(0, priv->reg[idx] + INTC_REG_ENABLE);
-	priv->enable[idx] = 0;
-
-	return IRQS_PER_WORD;
-}
-
-static int __init intc_of_init(struct device_node *node,
-			       struct device_node *parent)
-{
-	struct irq_domain *domain;
-	unsigned int parent_irq, n_irqs = 0;
-	struct bcm3384_intc *priv;
-
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		panic("Failed to allocate bcm3384_intc struct");
-
-	spin_lock_init(&priv->lock);
-
-	parent_irq = irq_of_parse_and_map(node, 0);
-	if (!parent_irq)
-		panic("Failed to get INTC IRQ");
-
-	n_irqs += ioremap_one_pair(priv, node, 0);
-	n_irqs += ioremap_one_pair(priv, node, 1);
-
-	if (!n_irqs)
-		panic("Failed to map INTC registers");
-
-	priv->n_words = n_irqs / IRQS_PER_WORD;
-	domain = irq_domain_add_linear(node, n_irqs, &irq_domain_ops, priv);
-	if (!domain)
-		panic("Failed to add irqdomain");
-
-	irq_set_chained_handler(parent_irq, bcm3384_intc_irq_handler);
-	irq_set_handler_data(parent_irq, domain);
-
-	return 0;
-}
-
-static struct of_device_id of_irq_ids[] __initdata = {
-	{ .compatible = "mti,cpu-interrupt-controller",
-	  .data = mips_cpu_irq_of_init },
-	{ .compatible = "brcm,bcm3384-intc",
-	  .data = intc_of_init },
-	{},
-};
-
-void __init arch_init_irq(void)
-{
-	bmips_tp1_irqs = 0;
-	of_irq_init(of_irq_ids);
-}
diff --git a/arch/mips/bcm3384/setup.c b/arch/mips/bcm3384/setup.c
deleted file mode 100644
index d84b8400b874..000000000000
--- a/arch/mips/bcm3384/setup.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2008 Maxime Bizon <mbizon@freebox.fr>
- * Copyright (C) 2014 Kevin Cernekee <cernekee@gmail.com>
- */
-
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/clk-provider.h>
-#include <linux/ioport.h>
-#include <linux/of.h>
-#include <linux/of_fdt.h>
-#include <linux/of_platform.h>
-#include <linux/smp.h>
-#include <asm/addrspace.h>
-#include <asm/bmips.h>
-#include <asm/bootinfo.h>
-#include <asm/prom.h>
-#include <asm/smp-ops.h>
-#include <asm/time.h>
-
-void __init prom_init(void)
-{
-	register_bmips_smp_ops();
-}
-
-void __init prom_free_prom_memory(void)
-{
-}
-
-const char *get_system_type(void)
-{
-	return "BCM3384";
-}
-
-void __init plat_time_init(void)
-{
-	struct device_node *np;
-	u32 freq;
-
-	np = of_find_node_by_name(NULL, "cpus");
-	if (!np)
-		panic("missing 'cpus' DT node");
-	if (of_property_read_u32(np, "mips-hpt-frequency", &freq) < 0)
-		panic("missing 'mips-hpt-frequency' property");
-	of_node_put(np);
-
-	mips_hpt_frequency = freq;
-}
-
-void __init plat_mem_setup(void)
-{
-	void *dtb = __dtb_start;
-
-	set_io_port_base(0);
-	ioport_resource.start = 0;
-	ioport_resource.end = ~0;
-
-	/* intended to somewhat resemble ARM; see Documentation/arm/Booting */
-	if (fw_arg0 == 0 && fw_arg1 == 0xffffffff)
-		dtb = phys_to_virt(fw_arg2);
-
-	__dt_setup_arch(dtb);
-
-	strlcpy(arcs_cmdline, boot_command_line, COMMAND_LINE_SIZE);
-}
-
-void __init device_tree_init(void)
-{
-	struct device_node *np;
-
-	unflatten_and_copy_device_tree();
-
-	/* Disable SMP boot unless both CPUs are listed in DT and !disabled */
-	np = of_find_node_by_name(NULL, "cpus");
-	if (np && of_get_available_child_count(np) <= 1)
-		bmips_smp_enabled = 0;
-	of_node_put(np);
-}
-
-int __init plat_of_setup(void)
-{
-	return __dt_register_buses("brcm,bcm3384", "simple-bus");
-}
-
-arch_initcall(plat_of_setup);
-
-static int __init plat_dev_init(void)
-{
-	of_clk_init(NULL);
-	return 0;
-}
-
-device_initcall(plat_dev_init);
diff --git a/arch/mips/bcm47xx/bcm47xx_private.h b/arch/mips/bcm47xx/bcm47xx_private.h
index ea909a56a3ee..41796befa9df 100644
--- a/arch/mips/bcm47xx/bcm47xx_private.h
+++ b/arch/mips/bcm47xx/bcm47xx_private.h
@@ -1,6 +1,10 @@
 #ifndef LINUX_BCM47XX_PRIVATE_H_
 #define LINUX_BCM47XX_PRIVATE_H_
 
+#ifndef pr_fmt
+#define pr_fmt(fmt)		"bcm47xx: " fmt
+#endif
+
 #include <linux/kernel.h>
 
 /* prom.c */
diff --git a/arch/mips/bcm47xx/board.c b/arch/mips/bcm47xx/board.c
index b3ae068ca4fa..bd56415f2f3b 100644
--- a/arch/mips/bcm47xx/board.c
+++ b/arch/mips/bcm47xx/board.c
@@ -1,8 +1,8 @@
 #include <linux/errno.h>
 #include <linux/export.h>
 #include <linux/string.h>
+#include <bcm47xx.h>
 #include <bcm47xx_board.h>
-#include <bcm47xx_nvram.h>
 
 struct bcm47xx_board_type {
 	const enum bcm47xx_board board;
@@ -40,20 +40,6 @@ struct bcm47xx_board_type_list1 bcm47xx_board_list_model_name[] __initconst = {
 	{ {0}, NULL},
 };
 
-/* model_no */
-static const
-struct bcm47xx_board_type_list1 bcm47xx_board_list_model_no[] __initconst = {
-	{{BCM47XX_BOARD_ASUS_WL700GE, "Asus WL700"}, "WL700"},
-	{ {0}, NULL},
-};
-
-/* machine_name */
-static const
-struct bcm47xx_board_type_list1 bcm47xx_board_list_machine_name[] __initconst = {
-	{{BCM47XX_BOARD_LINKSYS_WRTSL54GS, "Linksys WRTSL54GS"}, "WRTSL54GS"},
-	{ {0}, NULL},
-};
-
 /* hardware_version */
 static const
 struct bcm47xx_board_type_list1 bcm47xx_board_list_hardware_version[] __initconst = {
@@ -165,9 +151,11 @@ static const
 struct bcm47xx_board_type_list1 bcm47xx_board_list_board_id[] __initconst = {
 	{{BCM47XX_BOARD_NETGEAR_WGR614V8, "Netgear WGR614 V8"}, "U12H072T00_NETGEAR"},
 	{{BCM47XX_BOARD_NETGEAR_WGR614V9, "Netgear WGR614 V9"}, "U12H094T00_NETGEAR"},
+	{{BCM47XX_BOARD_NETGEAR_WGR614_V10, "Netgear WGR614 V10"}, "U12H139T01_NETGEAR"},
 	{{BCM47XX_BOARD_NETGEAR_WNDR3300, "Netgear WNDR3300"}, "U12H093T00_NETGEAR"},
 	{{BCM47XX_BOARD_NETGEAR_WNDR3400V1, "Netgear WNDR3400 V1"}, "U12H155T00_NETGEAR"},
 	{{BCM47XX_BOARD_NETGEAR_WNDR3400V2, "Netgear WNDR3400 V2"}, "U12H187T00_NETGEAR"},
+	{{BCM47XX_BOARD_NETGEAR_WNDR3400_V3, "Netgear WNDR3400 V3"}, "U12H208T00_NETGEAR"},
 	{{BCM47XX_BOARD_NETGEAR_WNDR3400VCNA, "Netgear WNDR3400 Vcna"}, "U12H155T01_NETGEAR"},
 	{{BCM47XX_BOARD_NETGEAR_WNDR3700V3, "Netgear WNDR3700 V3"}, "U12H194T00_NETGEAR"},
 	{{BCM47XX_BOARD_NETGEAR_WNDR4000, "Netgear WNDR4000"}, "U12H181T00_NETGEAR"},
@@ -202,6 +190,20 @@ struct bcm47xx_board_type_list2 bcm47xx_board_list_board_type_rev[] __initconst
 	{ {0}, NULL},
 };
 
+/*
+ * Some devices don't use any common NVRAM entry for identification and they
+ * have only one model specific variable.
+ * They don't deserve own arrays, let's group them there using key-value array.
+ */
+static const
+struct bcm47xx_board_type_list2 bcm47xx_board_list_key_value[] __initconst = {
+	{{BCM47XX_BOARD_ASUS_WL700GE, "Asus WL700"}, "model_no", "WL700"},
+	{{BCM47XX_BOARD_LINKSYS_WRT300N_V1, "Linksys WRT300N V1"}, "router_name", "WRT300N"},
+	{{BCM47XX_BOARD_LINKSYS_WRT600N_V11, "Linksys WRT600N V1.1"}, "Model_Name", "WRT600N"},
+	{{BCM47XX_BOARD_LINKSYS_WRTSL54GS, "Linksys WRTSL54GS"}, "machine_name", "WRTSL54GS"},
+	{ {0}, NULL},
+};
+
 static const
 struct bcm47xx_board_type bcm47xx_board_unknown[] __initconst = {
 	{BCM47XX_BOARD_UNKNOWN, "Unknown Board"},
@@ -225,20 +227,6 @@ static __init const struct bcm47xx_board_type *bcm47xx_board_get_nvram(void)
 		}
 	}
 
-	if (bcm47xx_nvram_getenv("model_no", buf1, sizeof(buf1)) >= 0) {
-		for (e1 = bcm47xx_board_list_model_no; e1->value1; e1++) {
-			if (strstarts(buf1, e1->value1))
-				return &e1->board;
-		}
-	}
-
-	if (bcm47xx_nvram_getenv("machine_name", buf1, sizeof(buf1)) >= 0) {
-		for (e1 = bcm47xx_board_list_machine_name; e1->value1; e1++) {
-			if (strstarts(buf1, e1->value1))
-				return &e1->board;
-		}
-	}
-
 	if (bcm47xx_nvram_getenv("hardware_version", buf1, sizeof(buf1)) >= 0) {
 		for (e1 = bcm47xx_board_list_hardware_version; e1->value1; e1++) {
 			if (strstarts(buf1, e1->value1))
@@ -247,8 +235,8 @@ static __init const struct bcm47xx_board_type *bcm47xx_board_get_nvram(void)
 	}
 
 	if (bcm47xx_nvram_getenv("hardware_version", buf1, sizeof(buf1)) >= 0 &&
-	    bcm47xx_nvram_getenv("boardtype", buf2, sizeof(buf2)) >= 0) {
-		for (e2 = bcm47xx_board_list_boot_hw; e2->value1; e2++) {
+	    bcm47xx_nvram_getenv("boardnum", buf2, sizeof(buf2)) >= 0) {
+		for (e2 = bcm47xx_board_list_hw_version_num; e2->value1; e2++) {
 			if (!strstarts(buf1, e2->value1) &&
 			    !strcmp(buf2, e2->value2))
 				return &e2->board;
@@ -314,6 +302,14 @@ static __init const struct bcm47xx_board_type *bcm47xx_board_get_nvram(void)
 				return &e2->board;
 		}
 	}
+
+	for (e2 = bcm47xx_board_list_key_value; e2->value1; e2++) {
+		if (bcm47xx_nvram_getenv(e2->value1, buf1, sizeof(buf1)) >= 0) {
+			if (!strcmp(buf1, e2->value2))
+				return &e2->board;
+		}
+	}
+
 	return bcm47xx_board_unknown;
 }
 
@@ -330,9 +326,8 @@ void __init bcm47xx_board_detect(void)
 	err = bcm47xx_nvram_getenv("boardtype", buf, sizeof(buf));
 
 	/* init of nvram failed, probably too early now */
-	if (err == -ENXIO) {
+	if (err == -ENXIO)
 		return;
-	}
 
 	board_detected = bcm47xx_board_get_nvram();
 	bcm47xx_board.board = board_detected->board;
diff --git a/arch/mips/bcm47xx/buttons.c b/arch/mips/bcm47xx/buttons.c
index 913182bcafb8..276276a8c6d7 100644
--- a/arch/mips/bcm47xx/buttons.c
+++ b/arch/mips/bcm47xx/buttons.c
@@ -252,6 +252,12 @@ bcm47xx_buttons_linksys_wrt160nv3[] __initconst = {
 };
 
 static const struct gpio_keys_button
+bcm47xx_buttons_linksys_wrt300n_v1[] __initconst = {
+	BCM47XX_GPIO_KEY(4, KEY_WPS_BUTTON),
+	BCM47XX_GPIO_KEY(6, KEY_RESTART),
+};
+
+static const struct gpio_keys_button
 bcm47xx_buttons_linksys_wrt300nv11[] __initconst = {
 	BCM47XX_GPIO_KEY(4, KEY_UNKNOWN),
 	BCM47XX_GPIO_KEY(6, KEY_RESTART),
@@ -327,6 +333,12 @@ bcm47xx_buttons_netgear_wndr3400v1[] __initconst = {
 };
 
 static const struct gpio_keys_button
+bcm47xx_buttons_netgear_wndr3400_v3[] __initconst = {
+	BCM47XX_GPIO_KEY(12, KEY_RESTART),
+	BCM47XX_GPIO_KEY(23, KEY_WPS_BUTTON),
+};
+
+static const struct gpio_keys_button
 bcm47xx_buttons_netgear_wndr3700v3[] __initconst = {
 	BCM47XX_GPIO_KEY(2, KEY_RFKILL),
 	BCM47XX_GPIO_KEY(3, KEY_RESTART),
@@ -516,6 +528,9 @@ int __init bcm47xx_buttons_register(void)
 	case BCM47XX_BOARD_LINKSYS_WRT160NV3:
 		err = bcm47xx_copy_bdata(bcm47xx_buttons_linksys_wrt160nv3);
 		break;
+	case BCM47XX_BOARD_LINKSYS_WRT300N_V1:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_linksys_wrt300n_v1);
+		break;
 	case BCM47XX_BOARD_LINKSYS_WRT300NV11:
 		err = bcm47xx_copy_bdata(bcm47xx_buttons_linksys_wrt300nv11);
 		break;
@@ -557,6 +572,9 @@ int __init bcm47xx_buttons_register(void)
 	case BCM47XX_BOARD_NETGEAR_WNDR3400V1:
 		err = bcm47xx_copy_bdata(bcm47xx_buttons_netgear_wndr3400v1);
 		break;
+	case BCM47XX_BOARD_NETGEAR_WNDR3400_V3:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_netgear_wndr3400_v3);
+		break;
 	case BCM47XX_BOARD_NETGEAR_WNDR3700V3:
 		err = bcm47xx_copy_bdata(bcm47xx_buttons_netgear_wndr3700v3);
 		break;
diff --git a/arch/mips/bcm47xx/leds.c b/arch/mips/bcm47xx/leds.c
index 903a656d4119..0e4ade342333 100644
--- a/arch/mips/bcm47xx/leds.c
+++ b/arch/mips/bcm47xx/leds.c
@@ -292,6 +292,13 @@ bcm47xx_leds_linksys_wrt160nv3[] __initconst = {
 };
 
 static const struct gpio_led
+bcm47xx_leds_linksys_wrt300n_v1[] __initconst = {
+	BCM47XX_GPIO_LED(1, "green", "power", 0, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(3, "amber", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(5, "green", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
 bcm47xx_leds_linksys_wrt300nv11[] __initconst = {
 	BCM47XX_GPIO_LED(1, "unk", "power", 0, LEDS_GPIO_DEFSTATE_ON),
 	BCM47XX_GPIO_LED(3, "amber", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
@@ -585,6 +592,9 @@ void __init bcm47xx_leds_register(void)
 	case BCM47XX_BOARD_LINKSYS_WRT160NV3:
 		bcm47xx_set_pdata(bcm47xx_leds_linksys_wrt160nv3);
 		break;
+	case BCM47XX_BOARD_LINKSYS_WRT300N_V1:
+		bcm47xx_set_pdata(bcm47xx_leds_linksys_wrt300n_v1);
+		break;
 	case BCM47XX_BOARD_LINKSYS_WRT300NV11:
 		bcm47xx_set_pdata(bcm47xx_leds_linksys_wrt300nv11);
 		break;
diff --git a/arch/mips/bcm47xx/nvram.c b/arch/mips/bcm47xx/nvram.c
index c5c381c43f17..ba632ff08a13 100644
--- a/arch/mips/bcm47xx/nvram.c
+++ b/arch/mips/bcm47xx/nvram.c
@@ -11,15 +11,18 @@
  * option) any later version.
  */
 
+#include <linux/io.h>
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/mtd/mtd.h>
-#include <bcm47xx_nvram.h>
+#include <linux/bcm47xx_nvram.h>
 
-#define NVRAM_MAGIC		0x48534C46	/* 'FLSH' */
-#define NVRAM_SPACE		0x8000
+#define NVRAM_MAGIC			0x48534C46	/* 'FLSH' */
+#define NVRAM_SPACE			0x10000
+#define NVRAM_MAX_GPIO_ENTRIES		32
+#define NVRAM_MAX_GPIO_VALUE_LEN	30
 
 #define FLASH_MIN		0x00020000	/* Minimum flash size */
 
@@ -91,20 +94,18 @@ static int nvram_find_and_copy(void __iomem *iobase, u32 lim)
 	return -ENXIO;
 
 found:
-
 	if (header->len > size)
 		pr_err("The nvram size accoridng to the header seems to be bigger than the partition on flash\n");
 	if (header->len > NVRAM_SPACE)
 		pr_err("nvram on flash (%i bytes) is bigger than the reserved space in memory, will just copy the first %i bytes\n",
 		       header->len, NVRAM_SPACE);
 
-	src = (u32 *) header;
-	dst = (u32 *) nvram_buf;
+	src = (u32 *)header;
+	dst = (u32 *)nvram_buf;
 	for (i = 0; i < sizeof(struct nvram_header); i += 4)
-		*dst++ = *src++;
+		*dst++ = __raw_readl(src++);
 	for (; i < header->len && i < NVRAM_SPACE && i < size; i += 4)
-		*dst++ = le32_to_cpu(*src++);
-	memset(dst, 0x0, NVRAM_SPACE - i);
+		*dst++ = readl(src++);
 
 	return 0;
 }
@@ -138,37 +139,28 @@ static int nvram_init(void)
 	struct mtd_info *mtd;
 	struct nvram_header header;
 	size_t bytes_read;
-	int err, i;
+	int err;
 
 	mtd = get_mtd_device_nm("nvram");
 	if (IS_ERR(mtd))
 		return -ENODEV;
 
-	for (i = 0; i < ARRAY_SIZE(nvram_sizes); i++) {
-		loff_t from = mtd->size - nvram_sizes[i];
+	err = mtd_read(mtd, 0, sizeof(header), &bytes_read, (uint8_t *)&header);
+	if (!err && header.magic == NVRAM_MAGIC) {
+		u8 *dst = (uint8_t *)nvram_buf;
+		size_t len = header.len;
 
-		if (from < 0)
-			continue;
-
-		err = mtd_read(mtd, from, sizeof(header), &bytes_read,
-			       (uint8_t *)&header);
-		if (!err && header.magic == NVRAM_MAGIC) {
-			u8 *dst = (uint8_t *)nvram_buf;
-			size_t len = header.len;
-
-			if (header.len > NVRAM_SPACE) {
-				pr_err("nvram on flash (%i bytes) is bigger than the reserved space in memory, will just copy the first %i bytes\n",
-				       header.len, NVRAM_SPACE);
-				len = NVRAM_SPACE;
-			}
+		if (header.len > NVRAM_SPACE) {
+			pr_err("nvram on flash (%i bytes) is bigger than the reserved space in memory, will just copy the first %i bytes\n",
+				header.len, NVRAM_SPACE);
+			len = NVRAM_SPACE;
+		}
 
-			err = mtd_read(mtd, from, len, &bytes_read, dst);
-			if (err)
-				return err;
-			memset(dst + bytes_read, 0x0, NVRAM_SPACE - bytes_read);
+		err = mtd_read(mtd, 0, len, &bytes_read, dst);
+		if (err)
+			return err;
 
-			return 0;
-		}
+		return 0;
 	}
 #endif
 
@@ -178,7 +170,7 @@ static int nvram_init(void)
 int bcm47xx_nvram_getenv(const char *name, char *val, size_t val_len)
 {
 	char *var, *value, *end, *eq;
-	int err;
+	int data_left, err;
 
 	if (!name)
 		return -EINVAL;
@@ -192,16 +184,18 @@ int bcm47xx_nvram_getenv(const char *name, char *val, size_t val_len)
 	/* Look for name=value and return value */
 	var = &nvram_buf[sizeof(struct nvram_header)];
 	end = nvram_buf + sizeof(nvram_buf) - 2;
-	end[0] = end[1] = '\0';
+	end[0] = '\0';
+	end[1] = '\0';
 	for (; *var; var = value + strlen(value) + 1) {
-		eq = strchr(var, '=');
+		data_left = end - var;
+
+		eq = strnchr(var, data_left, '=');
 		if (!eq)
 			break;
 		value = eq + 1;
-		if ((eq - var) == strlen(name) &&
-			strncmp(var, name, (eq - var)) == 0) {
+		if (eq - var == strlen(name) &&
+		    strncmp(var, name, eq - var) == 0)
 			return snprintf(val, val_len, "%s", value);
-		}
 	}
 	return -ENOENT;
 }
@@ -210,10 +204,11 @@ EXPORT_SYMBOL(bcm47xx_nvram_getenv);
 int bcm47xx_nvram_gpio_pin(const char *name)
 {
 	int i, err;
-	char nvram_var[10];
-	char buf[30];
+	char nvram_var[] = "gpioXX";
+	char buf[NVRAM_MAX_GPIO_VALUE_LEN];
 
-	for (i = 0; i < 32; i++) {
+	/* TODO: Optimize it to don't call getenv so many times */
+	for (i = 0; i < NVRAM_MAX_GPIO_ENTRIES; i++) {
 		err = snprintf(nvram_var, sizeof(nvram_var), "gpio%i", i);
 		if (err <= 0)
 			continue;
diff --git a/arch/mips/bcm47xx/prom.c b/arch/mips/bcm47xx/prom.c
index 1b170bf5f7f0..ab698bad6d62 100644
--- a/arch/mips/bcm47xx/prom.c
+++ b/arch/mips/bcm47xx/prom.c
@@ -35,7 +35,6 @@
 #include <bcm47xx.h>
 #include <bcm47xx_board.h>
 
-
 static char bcm47xx_system_type[20] = "Broadcom BCM47XX";
 
 const char *get_system_type(void)
@@ -83,7 +82,7 @@ static __init void prom_init_mem(void)
 		/* Loop condition may be not enough, off may be over 1 MiB */
 		if (off + mem >= max) {
 			mem = max;
-			printk(KERN_DEBUG "assume 128MB RAM\n");
+			pr_debug("Assume 128MB RAM\n");
 			break;
 		}
 		if (!memcmp(prom_init, prom_init + mem, 32))
diff --git a/arch/mips/bcm47xx/serial.c b/arch/mips/bcm47xx/serial.c
index 2f5bbd68e9a0..df761d38f7fc 100644
--- a/arch/mips/bcm47xx/serial.c
+++ b/arch/mips/bcm47xx/serial.c
@@ -36,8 +36,8 @@ static int __init uart8250_init_ssb(void)
 		struct plat_serial8250_port *p = &(uart8250_data[i]);
 		struct ssb_serial_port *ssb_port = &(mcore->serial_ports[i]);
 
-		p->mapbase = (unsigned int) ssb_port->regs;
-		p->membase = (void *) ssb_port->regs;
+		p->mapbase = (unsigned int)ssb_port->regs;
+		p->membase = (void *)ssb_port->regs;
 		p->irq = ssb_port->irq + 2;
 		p->uartclk = ssb_port->baud_base;
 		p->regshift = ssb_port->reg_shift;
@@ -62,8 +62,8 @@ static int __init uart8250_init_bcma(void)
 		struct bcma_serial_port *bcma_port;
 		bcma_port = &(cc->serial_ports[i]);
 
-		p->mapbase = (unsigned int) bcma_port->regs;
-		p->membase = (void *) bcma_port->regs;
+		p->mapbase = (unsigned int)bcma_port->regs;
+		p->membase = (void *)bcma_port->regs;
 		p->irq = bcma_port->irq;
 		p->uartclk = bcma_port->baud_base;
 		p->regshift = bcma_port->reg_shift;
diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c
index e43b5046cb30..82ff9fd2ab6e 100644
--- a/arch/mips/bcm47xx/setup.c
+++ b/arch/mips/bcm47xx/setup.c
@@ -42,7 +42,6 @@
 #include <asm/reboot.h>
 #include <asm/time.h>
 #include <bcm47xx.h>
-#include <bcm47xx_nvram.h>
 #include <bcm47xx_board.h>
 
 union bcm47xx_bus bcm47xx_bus;
@@ -53,7 +52,7 @@ EXPORT_SYMBOL(bcm47xx_bus_type);
 
 static void bcm47xx_machine_restart(char *command)
 {
-	printk(KERN_ALERT "Please stand by while rebooting the system...\n");
+	pr_alert("Please stand by while rebooting the system...\n");
 	local_irq_disable();
 	/* Set the watchdog timer to reset immediately */
 	switch (bcm47xx_bus_type) {
@@ -108,7 +107,7 @@ static int bcm47xx_get_invariants(struct ssb_bus *bus,
 	char buf[20];
 
 	/* Fill boardinfo structure */
-	memset(&(iv->boardinfo), 0 , sizeof(struct ssb_boardinfo));
+	memset(&iv->boardinfo, 0 , sizeof(struct ssb_boardinfo));
 
 	bcm47xx_fill_ssb_boardinfo(&iv->boardinfo, NULL);
 
@@ -127,7 +126,7 @@ static void __init bcm47xx_register_ssb(void)
 	char buf[100];
 	struct ssb_mipscore *mcore;
 
-	err = ssb_bus_ssbbus_register(&(bcm47xx_bus.ssb), SSB_ENUM_BASE,
+	err = ssb_bus_ssbbus_register(&bcm47xx_bus.ssb, SSB_ENUM_BASE,
 				      bcm47xx_get_invariants);
 	if (err)
 		panic("Failed to initialize SSB bus (err %d)", err);
@@ -137,7 +136,7 @@ static void __init bcm47xx_register_ssb(void)
 		if (strstr(buf, "console=ttyS1")) {
 			struct ssb_serial_port port;
 
-			printk(KERN_DEBUG "Swapping serial ports!\n");
+			pr_debug("Swapping serial ports!\n");
 			/* swap serial ports */
 			memcpy(&port, &mcore->serial_ports[0], sizeof(port));
 			memcpy(&mcore->serial_ports[0], &mcore->serial_ports[1],
@@ -169,7 +168,7 @@ void __init plat_mem_setup(void)
 	struct cpuinfo_mips *c = &current_cpu_data;
 
 	if ((c->cputype == CPU_74K) || (c->cputype == CPU_1074K)) {
-		printk(KERN_INFO "bcm47xx: using bcma bus\n");
+		pr_info("Using bcma bus\n");
 #ifdef CONFIG_BCM47XX_BCMA
 		bcm47xx_bus_type = BCM47XX_BUS_TYPE_BCMA;
 		bcm47xx_sprom_register_fallbacks();
@@ -180,7 +179,7 @@ void __init plat_mem_setup(void)
 #endif
 #endif
 	} else {
-		printk(KERN_INFO "bcm47xx: using ssb bus\n");
+		pr_info("Using ssb bus\n");
 #ifdef CONFIG_BCM47XX_SSB
 		bcm47xx_bus_type = BCM47XX_BUS_TYPE_SSB;
 		bcm47xx_sprom_register_fallbacks();
diff --git a/arch/mips/bcm47xx/sprom.c b/arch/mips/bcm47xx/sprom.c
index 2eff7fe99c6b..68ebf2322f8b 100644
--- a/arch/mips/bcm47xx/sprom.c
+++ b/arch/mips/bcm47xx/sprom.c
@@ -27,7 +27,6 @@
  */
 
 #include <bcm47xx.h>
-#include <bcm47xx_nvram.h>
 #include <linux/if_ether.h>
 #include <linux/etherdevice.h>
 
@@ -181,94 +180,245 @@ static void nvram_read_alpha2(const char *prefix, const char *name,
 	memcpy(val, buf, 2);
 }
 
+/* This is one-function-only macro, it uses local "sprom" variable! */
+#define ENTRY(_revmask, _type, _prefix, _name, _val, _allset, _fallback) \
+	if (_revmask & BIT(sprom->revision)) \
+		nvram_read_ ## _type(_prefix, NULL, _name, &sprom->_val, \
+				     _allset, _fallback)
+/*
+ * Special version of filling function that can be safely called for any SPROM
+ * revision. For every NVRAM to SPROM mapping it contains bitmask of revisions
+ * for which the mapping is valid.
+ * It obviously requires some hexadecimal/bitmasks knowledge, but allows
+ * writing cleaner code (easy revisions handling).
+ * Note that while SPROM revision 0 was never used, we still keep BIT(0)
+ * reserved for it, just to keep numbering sane.
+ */
+static void bcm47xx_sprom_fill_auto(struct ssb_sprom *sprom,
+				    const char *prefix, bool fallback)
+{
+	const char *pre = prefix;
+	bool fb = fallback;
+
+	ENTRY(0xfffffffe, u16, pre, "boardrev", board_rev, 0, true);
+	ENTRY(0x00000002, u16, pre, "boardflags", boardflags_lo, 0, fb);
+	ENTRY(0xfffffffc, u16, pre, "boardtype", board_type, 0, true);
+	ENTRY(0xfffffffe, u16, pre, "boardnum", board_num, 0, fb);
+	ENTRY(0x00000002, u8, pre, "cc", country_code, 0, fb);
+	ENTRY(0xfffffff8, u8, pre, "regrev", regrev, 0, fb);
+
+	ENTRY(0xfffffffe, u8, pre, "ledbh0", gpio0, 0xff, fb);
+	ENTRY(0xfffffffe, u8, pre, "ledbh1", gpio1, 0xff, fb);
+	ENTRY(0xfffffffe, u8, pre, "ledbh2", gpio2, 0xff, fb);
+	ENTRY(0xfffffffe, u8, pre, "ledbh3", gpio3, 0xff, fb);
+
+	ENTRY(0x0000070e, u16, pre, "pa0b0", pa0b0, 0, fb);
+	ENTRY(0x0000070e, u16, pre, "pa0b1", pa0b1, 0, fb);
+	ENTRY(0x0000070e, u16, pre, "pa0b2", pa0b2, 0, fb);
+	ENTRY(0x0000070e, u8, pre, "pa0itssit", itssi_bg, 0, fb);
+	ENTRY(0x0000070e, u8, pre, "pa0maxpwr", maxpwr_bg, 0, fb);
+
+	ENTRY(0x0000070c, u8, pre, "opo", opo, 0, fb);
+	ENTRY(0xfffffffe, u8, pre, "aa2g", ant_available_bg, 0, fb);
+	ENTRY(0xfffffffe, u8, pre, "aa5g", ant_available_a, 0, fb);
+	ENTRY(0x000007fe, s8, pre, "ag0", antenna_gain.a0, 0, fb);
+	ENTRY(0x000007fe, s8, pre, "ag1", antenna_gain.a1, 0, fb);
+	ENTRY(0x000007f0, s8, pre, "ag2", antenna_gain.a2, 0, fb);
+	ENTRY(0x000007f0, s8, pre, "ag3", antenna_gain.a3, 0, fb);
+
+	ENTRY(0x0000070e, u16, pre, "pa1b0", pa1b0, 0, fb);
+	ENTRY(0x0000070e, u16, pre, "pa1b1", pa1b1, 0, fb);
+	ENTRY(0x0000070e, u16, pre, "pa1b2", pa1b2, 0, fb);
+	ENTRY(0x0000070c, u16, pre, "pa1lob0", pa1lob0, 0, fb);
+	ENTRY(0x0000070c, u16, pre, "pa1lob1", pa1lob1, 0, fb);
+	ENTRY(0x0000070c, u16, pre, "pa1lob2", pa1lob2, 0, fb);
+	ENTRY(0x0000070c, u16, pre, "pa1hib0", pa1hib0, 0, fb);
+	ENTRY(0x0000070c, u16, pre, "pa1hib1", pa1hib1, 0, fb);
+	ENTRY(0x0000070c, u16, pre, "pa1hib2", pa1hib2, 0, fb);
+	ENTRY(0x0000070e, u8, pre, "pa1itssit", itssi_a, 0, fb);
+	ENTRY(0x0000070e, u8, pre, "pa1maxpwr", maxpwr_a, 0, fb);
+	ENTRY(0x0000070c, u8, pre, "pa1lomaxpwr", maxpwr_al, 0, fb);
+	ENTRY(0x0000070c, u8, pre, "pa1himaxpwr", maxpwr_ah, 0, fb);
+
+	ENTRY(0x00000708, u8, pre, "bxa2g", bxa2g, 0, fb);
+	ENTRY(0x00000708, u8, pre, "rssisav2g", rssisav2g, 0, fb);
+	ENTRY(0x00000708, u8, pre, "rssismc2g", rssismc2g, 0, fb);
+	ENTRY(0x00000708, u8, pre, "rssismf2g", rssismf2g, 0, fb);
+	ENTRY(0x00000708, u8, pre, "bxa5g", bxa5g, 0, fb);
+	ENTRY(0x00000708, u8, pre, "rssisav5g", rssisav5g, 0, fb);
+	ENTRY(0x00000708, u8, pre, "rssismc5g", rssismc5g, 0, fb);
+	ENTRY(0x00000708, u8, pre, "rssismf5g", rssismf5g, 0, fb);
+	ENTRY(0x00000708, u8, pre, "tri2g", tri2g, 0, fb);
+	ENTRY(0x00000708, u8, pre, "tri5g", tri5g, 0, fb);
+	ENTRY(0x00000708, u8, pre, "tri5gl", tri5gl, 0, fb);
+	ENTRY(0x00000708, u8, pre, "tri5gh", tri5gh, 0, fb);
+	ENTRY(0x00000708, s8, pre, "rxpo2g", rxpo2g, 0, fb);
+	ENTRY(0x00000708, s8, pre, "rxpo5g", rxpo5g, 0, fb);
+	ENTRY(0xfffffff0, u8, pre, "txchain", txchain, 0xf, fb);
+	ENTRY(0xfffffff0, u8, pre, "rxchain", rxchain, 0xf, fb);
+	ENTRY(0xfffffff0, u8, pre, "antswitch", antswitch, 0xff, fb);
+	ENTRY(0x00000700, u8, pre, "tssipos2g", fem.ghz2.tssipos, 0, fb);
+	ENTRY(0x00000700, u8, pre, "extpagain2g", fem.ghz2.extpa_gain, 0, fb);
+	ENTRY(0x00000700, u8, pre, "pdetrange2g", fem.ghz2.pdet_range, 0, fb);
+	ENTRY(0x00000700, u8, pre, "triso2g", fem.ghz2.tr_iso, 0, fb);
+	ENTRY(0x00000700, u8, pre, "antswctl2g", fem.ghz2.antswlut, 0, fb);
+	ENTRY(0x00000700, u8, pre, "tssipos5g", fem.ghz5.tssipos, 0, fb);
+	ENTRY(0x00000700, u8, pre, "extpagain5g", fem.ghz5.extpa_gain, 0, fb);
+	ENTRY(0x00000700, u8, pre, "pdetrange5g", fem.ghz5.pdet_range, 0, fb);
+	ENTRY(0x00000700, u8, pre, "triso5g", fem.ghz5.tr_iso, 0, fb);
+	ENTRY(0x00000700, u8, pre, "antswctl5g", fem.ghz5.antswlut, 0, fb);
+	ENTRY(0x000000f0, u8, pre, "txpid2ga0", txpid2g[0], 0, fb);
+	ENTRY(0x000000f0, u8, pre, "txpid2ga1", txpid2g[1], 0, fb);
+	ENTRY(0x000000f0, u8, pre, "txpid2ga2", txpid2g[2], 0, fb);
+	ENTRY(0x000000f0, u8, pre, "txpid2ga3", txpid2g[3], 0, fb);
+	ENTRY(0x000000f0, u8, pre, "txpid5ga0", txpid5g[0], 0, fb);
+	ENTRY(0x000000f0, u8, pre, "txpid5ga1", txpid5g[1], 0, fb);
+	ENTRY(0x000000f0, u8, pre, "txpid5ga2", txpid5g[2], 0, fb);
+	ENTRY(0x000000f0, u8, pre, "txpid5ga3", txpid5g[3], 0, fb);
+	ENTRY(0x000000f0, u8, pre, "txpid5gla0", txpid5gl[0], 0, fb);
+	ENTRY(0x000000f0, u8, pre, "txpid5gla1", txpid5gl[1], 0, fb);
+	ENTRY(0x000000f0, u8, pre, "txpid5gla2", txpid5gl[2], 0, fb);
+	ENTRY(0x000000f0, u8, pre, "txpid5gla3", txpid5gl[3], 0, fb);
+	ENTRY(0x000000f0, u8, pre, "txpid5gha0", txpid5gh[0], 0, fb);
+	ENTRY(0x000000f0, u8, pre, "txpid5gha1", txpid5gh[1], 0, fb);
+	ENTRY(0x000000f0, u8, pre, "txpid5gha2", txpid5gh[2], 0, fb);
+	ENTRY(0x000000f0, u8, pre, "txpid5gha3", txpid5gh[3], 0, fb);
+
+	ENTRY(0xffffff00, u8, pre, "tempthresh", tempthresh, 0, fb);
+	ENTRY(0xffffff00, u8, pre, "tempoffset", tempoffset, 0, fb);
+	ENTRY(0xffffff00, u16, pre, "rawtempsense", rawtempsense, 0, fb);
+	ENTRY(0xffffff00, u8, pre, "measpower", measpower, 0, fb);
+	ENTRY(0xffffff00, u8, pre, "tempsense_slope", tempsense_slope, 0, fb);
+	ENTRY(0xffffff00, u8, pre, "tempcorrx", tempcorrx, 0, fb);
+	ENTRY(0xffffff00, u8, pre, "tempsense_option", tempsense_option, 0, fb);
+	ENTRY(0x00000700, u8, pre, "freqoffset_corr", freqoffset_corr, 0, fb);
+	ENTRY(0x00000700, u8, pre, "iqcal_swp_dis", iqcal_swp_dis, 0, fb);
+	ENTRY(0x00000700, u8, pre, "hw_iqcal_en", hw_iqcal_en, 0, fb);
+	ENTRY(0x00000700, u8, pre, "elna2g", elna2g, 0, fb);
+	ENTRY(0x00000700, u8, pre, "elna5g", elna5g, 0, fb);
+	ENTRY(0xffffff00, u8, pre, "phycal_tempdelta", phycal_tempdelta, 0, fb);
+	ENTRY(0xffffff00, u8, pre, "temps_period", temps_period, 0, fb);
+	ENTRY(0xffffff00, u8, pre, "temps_hysteresis", temps_hysteresis, 0, fb);
+	ENTRY(0xffffff00, u8, pre, "measpower1", measpower1, 0, fb);
+	ENTRY(0xffffff00, u8, pre, "measpower2", measpower2, 0, fb);
+
+	ENTRY(0x000001f0, u16, pre, "cck2gpo", cck2gpo, 0, fb);
+	ENTRY(0x000001f0, u32, pre, "ofdm2gpo", ofdm2gpo, 0, fb);
+	ENTRY(0x000001f0, u32, pre, "ofdm5gpo", ofdm5gpo, 0, fb);
+	ENTRY(0x000001f0, u32, pre, "ofdm5glpo", ofdm5glpo, 0, fb);
+	ENTRY(0x000001f0, u32, pre, "ofdm5ghpo", ofdm5ghpo, 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs2gpo0", mcs2gpo[0], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs2gpo1", mcs2gpo[1], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs2gpo2", mcs2gpo[2], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs2gpo3", mcs2gpo[3], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs2gpo4", mcs2gpo[4], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs2gpo5", mcs2gpo[5], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs2gpo6", mcs2gpo[6], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs2gpo7", mcs2gpo[7], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5gpo0", mcs5gpo[0], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5gpo1", mcs5gpo[1], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5gpo2", mcs5gpo[2], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5gpo3", mcs5gpo[3], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5gpo4", mcs5gpo[4], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5gpo5", mcs5gpo[5], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5gpo6", mcs5gpo[6], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5gpo7", mcs5gpo[7], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5glpo0", mcs5glpo[0], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5glpo1", mcs5glpo[1], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5glpo2", mcs5glpo[2], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5glpo3", mcs5glpo[3], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5glpo4", mcs5glpo[4], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5glpo5", mcs5glpo[5], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5glpo6", mcs5glpo[6], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5glpo7", mcs5glpo[7], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5ghpo0", mcs5ghpo[0], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5ghpo1", mcs5ghpo[1], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5ghpo2", mcs5ghpo[2], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5ghpo3", mcs5ghpo[3], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5ghpo4", mcs5ghpo[4], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5ghpo5", mcs5ghpo[5], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5ghpo6", mcs5ghpo[6], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "mcs5ghpo7", mcs5ghpo[7], 0, fb);
+	ENTRY(0x000001f0, u16, pre, "cddpo", cddpo, 0, fb);
+	ENTRY(0x000001f0, u16, pre, "stbcpo", stbcpo, 0, fb);
+	ENTRY(0x000001f0, u16, pre, "bw40po", bw40po, 0, fb);
+	ENTRY(0x000001f0, u16, pre, "bwduppo", bwduppo, 0, fb);
+
+	ENTRY(0xfffffe00, u16, pre, "cckbw202gpo", cckbw202gpo, 0, fb);
+	ENTRY(0xfffffe00, u16, pre, "cckbw20ul2gpo", cckbw20ul2gpo, 0, fb);
+	ENTRY(0x00000600, u32, pre, "legofdmbw202gpo", legofdmbw202gpo, 0, fb);
+	ENTRY(0x00000600, u32, pre, "legofdmbw20ul2gpo", legofdmbw20ul2gpo, 0, fb);
+	ENTRY(0x00000600, u32, pre, "legofdmbw205glpo", legofdmbw205glpo, 0, fb);
+	ENTRY(0x00000600, u32, pre, "legofdmbw20ul5glpo", legofdmbw20ul5glpo, 0, fb);
+	ENTRY(0x00000600, u32, pre, "legofdmbw205gmpo", legofdmbw205gmpo, 0, fb);
+	ENTRY(0x00000600, u32, pre, "legofdmbw20ul5gmpo", legofdmbw20ul5gmpo, 0, fb);
+	ENTRY(0x00000600, u32, pre, "legofdmbw205ghpo", legofdmbw205ghpo, 0, fb);
+	ENTRY(0x00000600, u32, pre, "legofdmbw20ul5ghpo", legofdmbw20ul5ghpo, 0, fb);
+	ENTRY(0xfffffe00, u32, pre, "mcsbw202gpo", mcsbw202gpo, 0, fb);
+	ENTRY(0x00000600, u32, pre, "mcsbw20ul2gpo", mcsbw20ul2gpo, 0, fb);
+	ENTRY(0xfffffe00, u32, pre, "mcsbw402gpo", mcsbw402gpo, 0, fb);
+	ENTRY(0xfffffe00, u32, pre, "mcsbw205glpo", mcsbw205glpo, 0, fb);
+	ENTRY(0x00000600, u32, pre, "mcsbw20ul5glpo", mcsbw20ul5glpo, 0, fb);
+	ENTRY(0xfffffe00, u32, pre, "mcsbw405glpo", mcsbw405glpo, 0, fb);
+	ENTRY(0xfffffe00, u32, pre, "mcsbw205gmpo", mcsbw205gmpo, 0, fb);
+	ENTRY(0x00000600, u32, pre, "mcsbw20ul5gmpo", mcsbw20ul5gmpo, 0, fb);
+	ENTRY(0xfffffe00, u32, pre, "mcsbw405gmpo", mcsbw405gmpo, 0, fb);
+	ENTRY(0xfffffe00, u32, pre, "mcsbw205ghpo", mcsbw205ghpo, 0, fb);
+	ENTRY(0x00000600, u32, pre, "mcsbw20ul5ghpo", mcsbw20ul5ghpo, 0, fb);
+	ENTRY(0xfffffe00, u32, pre, "mcsbw405ghpo", mcsbw405ghpo, 0, fb);
+	ENTRY(0x00000600, u16, pre, "mcs32po", mcs32po, 0, fb);
+	ENTRY(0x00000600, u16, pre, "legofdm40duppo", legofdm40duppo, 0, fb);
+	ENTRY(0x00000700, u8, pre, "pcieingress_war", pcieingress_war, 0, fb);
+
+	/* TODO: rev 11 support */
+	ENTRY(0x00000700, u8, pre, "rxgainerr2ga0", rxgainerr2ga[0], 0, fb);
+	ENTRY(0x00000700, u8, pre, "rxgainerr2ga1", rxgainerr2ga[1], 0, fb);
+	ENTRY(0x00000700, u8, pre, "rxgainerr2ga2", rxgainerr2ga[2], 0, fb);
+	ENTRY(0x00000700, u8, pre, "rxgainerr5gla0", rxgainerr5gla[0], 0, fb);
+	ENTRY(0x00000700, u8, pre, "rxgainerr5gla1", rxgainerr5gla[1], 0, fb);
+	ENTRY(0x00000700, u8, pre, "rxgainerr5gla2", rxgainerr5gla[2], 0, fb);
+	ENTRY(0x00000700, u8, pre, "rxgainerr5gma0", rxgainerr5gma[0], 0, fb);
+	ENTRY(0x00000700, u8, pre, "rxgainerr5gma1", rxgainerr5gma[1], 0, fb);
+	ENTRY(0x00000700, u8, pre, "rxgainerr5gma2", rxgainerr5gma[2], 0, fb);
+	ENTRY(0x00000700, u8, pre, "rxgainerr5gha0", rxgainerr5gha[0], 0, fb);
+	ENTRY(0x00000700, u8, pre, "rxgainerr5gha1", rxgainerr5gha[1], 0, fb);
+	ENTRY(0x00000700, u8, pre, "rxgainerr5gha2", rxgainerr5gha[2], 0, fb);
+	ENTRY(0x00000700, u8, pre, "rxgainerr5gua0", rxgainerr5gua[0], 0, fb);
+	ENTRY(0x00000700, u8, pre, "rxgainerr5gua1", rxgainerr5gua[1], 0, fb);
+	ENTRY(0x00000700, u8, pre, "rxgainerr5gua2", rxgainerr5gua[2], 0, fb);
+
+	ENTRY(0xfffffe00, u8, pre, "sar2g", sar2g, 0, fb);
+	ENTRY(0xfffffe00, u8, pre, "sar5g", sar5g, 0, fb);
+
+	/* TODO: rev 11 support */
+	ENTRY(0x00000700, u8, pre, "noiselvl2ga0", noiselvl2ga[0], 0, fb);
+	ENTRY(0x00000700, u8, pre, "noiselvl2ga1", noiselvl2ga[1], 0, fb);
+	ENTRY(0x00000700, u8, pre, "noiselvl2ga2", noiselvl2ga[2], 0, fb);
+	ENTRY(0x00000700, u8, pre, "noiselvl5gla0", noiselvl5gla[0], 0, fb);
+	ENTRY(0x00000700, u8, pre, "noiselvl5gla1", noiselvl5gla[1], 0, fb);
+	ENTRY(0x00000700, u8, pre, "noiselvl5gla2", noiselvl5gla[2], 0, fb);
+	ENTRY(0x00000700, u8, pre, "noiselvl5gma0", noiselvl5gma[0], 0, fb);
+	ENTRY(0x00000700, u8, pre, "noiselvl5gma1", noiselvl5gma[1], 0, fb);
+	ENTRY(0x00000700, u8, pre, "noiselvl5gma2", noiselvl5gma[2], 0, fb);
+	ENTRY(0x00000700, u8, pre, "noiselvl5gha0", noiselvl5gha[0], 0, fb);
+	ENTRY(0x00000700, u8, pre, "noiselvl5gha1", noiselvl5gha[1], 0, fb);
+	ENTRY(0x00000700, u8, pre, "noiselvl5gha2", noiselvl5gha[2], 0, fb);
+	ENTRY(0x00000700, u8, pre, "noiselvl5gua0", noiselvl5gua[0], 0, fb);
+	ENTRY(0x00000700, u8, pre, "noiselvl5gua1", noiselvl5gua[1], 0, fb);
+	ENTRY(0x00000700, u8, pre, "noiselvl5gua2", noiselvl5gua[2], 0, fb);
+}
+#undef ENTRY /* It's specififc, uses local variable, don't use it (again). */
+
 static void bcm47xx_fill_sprom_r1234589(struct ssb_sprom *sprom,
 					const char *prefix, bool fallback)
 {
 	nvram_read_u16(prefix, NULL, "devid", &sprom->dev_id, 0, fallback);
-	nvram_read_u8(prefix, NULL, "ledbh0", &sprom->gpio0, 0xff, fallback);
-	nvram_read_u8(prefix, NULL, "ledbh1", &sprom->gpio1, 0xff, fallback);
-	nvram_read_u8(prefix, NULL, "ledbh2", &sprom->gpio2, 0xff, fallback);
-	nvram_read_u8(prefix, NULL, "ledbh3", &sprom->gpio3, 0xff, fallback);
-	nvram_read_u8(prefix, NULL, "aa2g", &sprom->ant_available_bg, 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "aa5g", &sprom->ant_available_a, 0,
-		      fallback);
-	nvram_read_s8(prefix, NULL, "ag0", &sprom->antenna_gain.a0, 0,
-		      fallback);
-	nvram_read_s8(prefix, NULL, "ag1", &sprom->antenna_gain.a1, 0,
-		      fallback);
 	nvram_read_alpha2(prefix, "ccode", sprom->alpha2, fallback);
 }
 
-static void bcm47xx_fill_sprom_r12389(struct ssb_sprom *sprom,
-				      const char *prefix, bool fallback)
-{
-	nvram_read_u16(prefix, NULL, "pa0b0", &sprom->pa0b0, 0, fallback);
-	nvram_read_u16(prefix, NULL, "pa0b1", &sprom->pa0b1, 0, fallback);
-	nvram_read_u16(prefix, NULL, "pa0b2", &sprom->pa0b2, 0, fallback);
-	nvram_read_u8(prefix, NULL, "pa0itssit", &sprom->itssi_bg, 0, fallback);
-	nvram_read_u8(prefix, NULL, "pa0maxpwr", &sprom->maxpwr_bg, 0,
-		      fallback);
-	nvram_read_u16(prefix, NULL, "pa1b0", &sprom->pa1b0, 0, fallback);
-	nvram_read_u16(prefix, NULL, "pa1b1", &sprom->pa1b1, 0, fallback);
-	nvram_read_u16(prefix, NULL, "pa1b2", &sprom->pa1b2, 0, fallback);
-	nvram_read_u8(prefix, NULL, "pa1itssit", &sprom->itssi_a, 0, fallback);
-	nvram_read_u8(prefix, NULL, "pa1maxpwr", &sprom->maxpwr_a, 0, fallback);
-}
-
-static void bcm47xx_fill_sprom_r1(struct ssb_sprom *sprom, const char *prefix,
-				  bool fallback)
-{
-	nvram_read_u16(prefix, NULL, "boardflags", &sprom->boardflags_lo, 0,
-		       fallback);
-	nvram_read_u8(prefix, NULL, "cc", &sprom->country_code, 0, fallback);
-}
-
-static void bcm47xx_fill_sprom_r2389(struct ssb_sprom *sprom,
-				     const char *prefix, bool fallback)
-{
-	nvram_read_u8(prefix, NULL, "opo", &sprom->opo, 0, fallback);
-	nvram_read_u16(prefix, NULL, "pa1lob0", &sprom->pa1lob0, 0, fallback);
-	nvram_read_u16(prefix, NULL, "pa1lob1", &sprom->pa1lob1, 0, fallback);
-	nvram_read_u16(prefix, NULL, "pa1lob2", &sprom->pa1lob2, 0, fallback);
-	nvram_read_u16(prefix, NULL, "pa1hib0", &sprom->pa1hib0, 0, fallback);
-	nvram_read_u16(prefix, NULL, "pa1hib1", &sprom->pa1hib1, 0, fallback);
-	nvram_read_u16(prefix, NULL, "pa1hib2", &sprom->pa1hib2, 0, fallback);
-	nvram_read_u8(prefix, NULL, "pa1lomaxpwr", &sprom->maxpwr_al, 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "pa1himaxpwr", &sprom->maxpwr_ah, 0,
-		      fallback);
-}
-
-static void bcm47xx_fill_sprom_r389(struct ssb_sprom *sprom, const char *prefix,
-				    bool fallback)
-{
-	nvram_read_u8(prefix, NULL, "bxa2g", &sprom->bxa2g, 0, fallback);
-	nvram_read_u8(prefix, NULL, "rssisav2g", &sprom->rssisav2g, 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "rssismc2g", &sprom->rssismc2g, 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "rssismf2g", &sprom->rssismf2g, 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "bxa5g", &sprom->bxa5g, 0, fallback);
-	nvram_read_u8(prefix, NULL, "rssisav5g", &sprom->rssisav5g, 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "rssismc5g", &sprom->rssismc5g, 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "rssismf5g", &sprom->rssismf5g, 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "tri2g", &sprom->tri2g, 0, fallback);
-	nvram_read_u8(prefix, NULL, "tri5g", &sprom->tri5g, 0, fallback);
-	nvram_read_u8(prefix, NULL, "tri5gl", &sprom->tri5gl, 0, fallback);
-	nvram_read_u8(prefix, NULL, "tri5gh", &sprom->tri5gh, 0, fallback);
-	nvram_read_s8(prefix, NULL, "rxpo2g", &sprom->rxpo2g, 0, fallback);
-	nvram_read_s8(prefix, NULL, "rxpo5g", &sprom->rxpo5g, 0, fallback);
-}
-
 static void bcm47xx_fill_sprom_r3(struct ssb_sprom *sprom, const char *prefix,
 				  bool fallback)
 {
-	nvram_read_u8(prefix, NULL, "regrev", &sprom->regrev, 0, fallback);
 	nvram_read_leddc(prefix, "leddc", &sprom->leddc_on_time,
 			 &sprom->leddc_off_time, fallback);
 }
@@ -276,309 +426,10 @@ static void bcm47xx_fill_sprom_r3(struct ssb_sprom *sprom, const char *prefix,
 static void bcm47xx_fill_sprom_r4589(struct ssb_sprom *sprom,
 				     const char *prefix, bool fallback)
 {
-	nvram_read_u8(prefix, NULL, "regrev", &sprom->regrev, 0, fallback);
-	nvram_read_s8(prefix, NULL, "ag2", &sprom->antenna_gain.a2, 0,
-		      fallback);
-	nvram_read_s8(prefix, NULL, "ag3", &sprom->antenna_gain.a3, 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "txchain", &sprom->txchain, 0xf, fallback);
-	nvram_read_u8(prefix, NULL, "rxchain", &sprom->rxchain, 0xf, fallback);
-	nvram_read_u8(prefix, NULL, "antswitch", &sprom->antswitch, 0xff,
-		      fallback);
 	nvram_read_leddc(prefix, "leddc", &sprom->leddc_on_time,
 			 &sprom->leddc_off_time, fallback);
 }
 
-static void bcm47xx_fill_sprom_r458(struct ssb_sprom *sprom, const char *prefix,
-				    bool fallback)
-{
-	nvram_read_u16(prefix, NULL, "cck2gpo", &sprom->cck2gpo, 0, fallback);
-	nvram_read_u32(prefix, NULL, "ofdm2gpo", &sprom->ofdm2gpo, 0, fallback);
-	nvram_read_u32(prefix, NULL, "ofdm5gpo", &sprom->ofdm5gpo, 0, fallback);
-	nvram_read_u32(prefix, NULL, "ofdm5glpo", &sprom->ofdm5glpo, 0,
-		       fallback);
-	nvram_read_u32(prefix, NULL, "ofdm5ghpo", &sprom->ofdm5ghpo, 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "cddpo", &sprom->cddpo, 0, fallback);
-	nvram_read_u16(prefix, NULL, "stbcpo", &sprom->stbcpo, 0, fallback);
-	nvram_read_u16(prefix, NULL, "bw40po", &sprom->bw40po, 0, fallback);
-	nvram_read_u16(prefix, NULL, "bwduppo", &sprom->bwduppo, 0, fallback);
-	nvram_read_u16(prefix, NULL, "mcs2gpo0", &sprom->mcs2gpo[0], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs2gpo1", &sprom->mcs2gpo[1], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs2gpo2", &sprom->mcs2gpo[2], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs2gpo3", &sprom->mcs2gpo[3], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs2gpo4", &sprom->mcs2gpo[4], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs2gpo5", &sprom->mcs2gpo[5], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs2gpo6", &sprom->mcs2gpo[6], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs2gpo7", &sprom->mcs2gpo[7], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5gpo0", &sprom->mcs5gpo[0], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5gpo1", &sprom->mcs5gpo[1], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5gpo2", &sprom->mcs5gpo[2], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5gpo3", &sprom->mcs5gpo[3], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5gpo4", &sprom->mcs5gpo[4], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5gpo5", &sprom->mcs5gpo[5], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5gpo6", &sprom->mcs5gpo[6], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5gpo7", &sprom->mcs5gpo[7], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5glpo0", &sprom->mcs5glpo[0], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5glpo1", &sprom->mcs5glpo[1], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5glpo2", &sprom->mcs5glpo[2], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5glpo3", &sprom->mcs5glpo[3], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5glpo4", &sprom->mcs5glpo[4], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5glpo5", &sprom->mcs5glpo[5], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5glpo6", &sprom->mcs5glpo[6], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5glpo7", &sprom->mcs5glpo[7], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5ghpo0", &sprom->mcs5ghpo[0], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5ghpo1", &sprom->mcs5ghpo[1], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5ghpo2", &sprom->mcs5ghpo[2], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5ghpo3", &sprom->mcs5ghpo[3], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5ghpo4", &sprom->mcs5ghpo[4], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5ghpo5", &sprom->mcs5ghpo[5], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5ghpo6", &sprom->mcs5ghpo[6], 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs5ghpo7", &sprom->mcs5ghpo[7], 0,
-		       fallback);
-}
-
-static void bcm47xx_fill_sprom_r45(struct ssb_sprom *sprom, const char *prefix,
-				   bool fallback)
-{
-	nvram_read_u8(prefix, NULL, "txpid2ga0", &sprom->txpid2g[0], 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "txpid2ga1", &sprom->txpid2g[1], 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "txpid2ga2", &sprom->txpid2g[2], 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "txpid2ga3", &sprom->txpid2g[3], 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "txpid5ga0", &sprom->txpid5g[0], 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "txpid5ga1", &sprom->txpid5g[1], 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "txpid5ga2", &sprom->txpid5g[2], 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "txpid5ga3", &sprom->txpid5g[3], 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "txpid5gla0", &sprom->txpid5gl[0], 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "txpid5gla1", &sprom->txpid5gl[1], 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "txpid5gla2", &sprom->txpid5gl[2], 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "txpid5gla3", &sprom->txpid5gl[3], 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "txpid5gha0", &sprom->txpid5gh[0], 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "txpid5gha1", &sprom->txpid5gh[1], 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "txpid5gha2", &sprom->txpid5gh[2], 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "txpid5gha3", &sprom->txpid5gh[3], 0,
-		      fallback);
-}
-
-static void bcm47xx_fill_sprom_r89(struct ssb_sprom *sprom, const char *prefix,
-				   bool fallback)
-{
-	nvram_read_u8(prefix, NULL, "tssipos2g", &sprom->fem.ghz2.tssipos, 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "extpagain2g",
-		      &sprom->fem.ghz2.extpa_gain, 0, fallback);
-	nvram_read_u8(prefix, NULL, "pdetrange2g",
-		      &sprom->fem.ghz2.pdet_range, 0, fallback);
-	nvram_read_u8(prefix, NULL, "triso2g", &sprom->fem.ghz2.tr_iso, 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "antswctl2g", &sprom->fem.ghz2.antswlut, 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "tssipos5g", &sprom->fem.ghz5.tssipos, 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "extpagain5g",
-		      &sprom->fem.ghz5.extpa_gain, 0, fallback);
-	nvram_read_u8(prefix, NULL, "pdetrange5g",
-		      &sprom->fem.ghz5.pdet_range, 0, fallback);
-	nvram_read_u8(prefix, NULL, "triso5g", &sprom->fem.ghz5.tr_iso, 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "antswctl5g", &sprom->fem.ghz5.antswlut, 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "tempthresh", &sprom->tempthresh, 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "tempoffset", &sprom->tempoffset, 0,
-		      fallback);
-	nvram_read_u16(prefix, NULL, "rawtempsense", &sprom->rawtempsense, 0,
-		       fallback);
-	nvram_read_u8(prefix, NULL, "measpower", &sprom->measpower, 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "tempsense_slope",
-		      &sprom->tempsense_slope, 0, fallback);
-	nvram_read_u8(prefix, NULL, "tempcorrx", &sprom->tempcorrx, 0,
-		       fallback);
-	nvram_read_u8(prefix, NULL, "tempsense_option",
-		      &sprom->tempsense_option, 0, fallback);
-	nvram_read_u8(prefix, NULL, "freqoffset_corr",
-		      &sprom->freqoffset_corr, 0, fallback);
-	nvram_read_u8(prefix, NULL, "iqcal_swp_dis", &sprom->iqcal_swp_dis, 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "hw_iqcal_en", &sprom->hw_iqcal_en, 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "elna2g", &sprom->elna2g, 0, fallback);
-	nvram_read_u8(prefix, NULL, "elna5g", &sprom->elna5g, 0, fallback);
-	nvram_read_u8(prefix, NULL, "phycal_tempdelta",
-		      &sprom->phycal_tempdelta, 0, fallback);
-	nvram_read_u8(prefix, NULL, "temps_period", &sprom->temps_period, 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "temps_hysteresis",
-		      &sprom->temps_hysteresis, 0, fallback);
-	nvram_read_u8(prefix, NULL, "measpower1", &sprom->measpower1, 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "measpower2", &sprom->measpower2, 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "rxgainerr2ga0",
-		      &sprom->rxgainerr2ga[0], 0, fallback);
-	nvram_read_u8(prefix, NULL, "rxgainerr2ga1",
-		      &sprom->rxgainerr2ga[1], 0, fallback);
-	nvram_read_u8(prefix, NULL, "rxgainerr2ga2",
-		      &sprom->rxgainerr2ga[2], 0, fallback);
-	nvram_read_u8(prefix, NULL, "rxgainerr5gla0",
-		      &sprom->rxgainerr5gla[0], 0, fallback);
-	nvram_read_u8(prefix, NULL, "rxgainerr5gla1",
-		      &sprom->rxgainerr5gla[1], 0, fallback);
-	nvram_read_u8(prefix, NULL, "rxgainerr5gla2",
-		      &sprom->rxgainerr5gla[2], 0, fallback);
-	nvram_read_u8(prefix, NULL, "rxgainerr5gma0",
-		      &sprom->rxgainerr5gma[0], 0, fallback);
-	nvram_read_u8(prefix, NULL, "rxgainerr5gma1",
-		      &sprom->rxgainerr5gma[1], 0, fallback);
-	nvram_read_u8(prefix, NULL, "rxgainerr5gma2",
-		      &sprom->rxgainerr5gma[2], 0, fallback);
-	nvram_read_u8(prefix, NULL, "rxgainerr5gha0",
-		      &sprom->rxgainerr5gha[0], 0, fallback);
-	nvram_read_u8(prefix, NULL, "rxgainerr5gha1",
-		      &sprom->rxgainerr5gha[1], 0, fallback);
-	nvram_read_u8(prefix, NULL, "rxgainerr5gha2",
-		      &sprom->rxgainerr5gha[2], 0, fallback);
-	nvram_read_u8(prefix, NULL, "rxgainerr5gua0",
-		      &sprom->rxgainerr5gua[0], 0, fallback);
-	nvram_read_u8(prefix, NULL, "rxgainerr5gua1",
-		      &sprom->rxgainerr5gua[1], 0, fallback);
-	nvram_read_u8(prefix, NULL, "rxgainerr5gua2",
-		      &sprom->rxgainerr5gua[2], 0, fallback);
-	nvram_read_u8(prefix, NULL, "noiselvl2ga0", &sprom->noiselvl2ga[0], 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "noiselvl2ga1", &sprom->noiselvl2ga[1], 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "noiselvl2ga2", &sprom->noiselvl2ga[2], 0,
-		      fallback);
-	nvram_read_u8(prefix, NULL, "noiselvl5gla0",
-		      &sprom->noiselvl5gla[0], 0, fallback);
-	nvram_read_u8(prefix, NULL, "noiselvl5gla1",
-		      &sprom->noiselvl5gla[1], 0, fallback);
-	nvram_read_u8(prefix, NULL, "noiselvl5gla2",
-		      &sprom->noiselvl5gla[2], 0, fallback);
-	nvram_read_u8(prefix, NULL, "noiselvl5gma0",
-		      &sprom->noiselvl5gma[0], 0, fallback);
-	nvram_read_u8(prefix, NULL, "noiselvl5gma1",
-		      &sprom->noiselvl5gma[1], 0, fallback);
-	nvram_read_u8(prefix, NULL, "noiselvl5gma2",
-		      &sprom->noiselvl5gma[2], 0, fallback);
-	nvram_read_u8(prefix, NULL, "noiselvl5gha0",
-		      &sprom->noiselvl5gha[0], 0, fallback);
-	nvram_read_u8(prefix, NULL, "noiselvl5gha1",
-		      &sprom->noiselvl5gha[1], 0, fallback);
-	nvram_read_u8(prefix, NULL, "noiselvl5gha2",
-		      &sprom->noiselvl5gha[2], 0, fallback);
-	nvram_read_u8(prefix, NULL, "noiselvl5gua0",
-		      &sprom->noiselvl5gua[0], 0, fallback);
-	nvram_read_u8(prefix, NULL, "noiselvl5gua1",
-		      &sprom->noiselvl5gua[1], 0, fallback);
-	nvram_read_u8(prefix, NULL, "noiselvl5gua2",
-		      &sprom->noiselvl5gua[2], 0, fallback);
-	nvram_read_u8(prefix, NULL, "pcieingress_war",
-		      &sprom->pcieingress_war, 0, fallback);
-}
-
-static void bcm47xx_fill_sprom_r9(struct ssb_sprom *sprom, const char *prefix,
-				  bool fallback)
-{
-	nvram_read_u16(prefix, NULL, "cckbw202gpo", &sprom->cckbw202gpo, 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "cckbw20ul2gpo", &sprom->cckbw20ul2gpo, 0,
-		       fallback);
-	nvram_read_u32(prefix, NULL, "legofdmbw202gpo",
-		       &sprom->legofdmbw202gpo, 0, fallback);
-	nvram_read_u32(prefix, NULL, "legofdmbw20ul2gpo",
-		       &sprom->legofdmbw20ul2gpo, 0, fallback);
-	nvram_read_u32(prefix, NULL, "legofdmbw205glpo",
-		       &sprom->legofdmbw205glpo, 0, fallback);
-	nvram_read_u32(prefix, NULL, "legofdmbw20ul5glpo",
-		       &sprom->legofdmbw20ul5glpo, 0, fallback);
-	nvram_read_u32(prefix, NULL, "legofdmbw205gmpo",
-		       &sprom->legofdmbw205gmpo, 0, fallback);
-	nvram_read_u32(prefix, NULL, "legofdmbw20ul5gmpo",
-		       &sprom->legofdmbw20ul5gmpo, 0, fallback);
-	nvram_read_u32(prefix, NULL, "legofdmbw205ghpo",
-		       &sprom->legofdmbw205ghpo, 0, fallback);
-	nvram_read_u32(prefix, NULL, "legofdmbw20ul5ghpo",
-		       &sprom->legofdmbw20ul5ghpo, 0, fallback);
-	nvram_read_u32(prefix, NULL, "mcsbw202gpo", &sprom->mcsbw202gpo, 0,
-		       fallback);
-	nvram_read_u32(prefix, NULL, "mcsbw20ul2gpo", &sprom->mcsbw20ul2gpo, 0,
-		       fallback);
-	nvram_read_u32(prefix, NULL, "mcsbw402gpo", &sprom->mcsbw402gpo, 0,
-		       fallback);
-	nvram_read_u32(prefix, NULL, "mcsbw205glpo", &sprom->mcsbw205glpo, 0,
-		       fallback);
-	nvram_read_u32(prefix, NULL, "mcsbw20ul5glpo",
-		       &sprom->mcsbw20ul5glpo, 0, fallback);
-	nvram_read_u32(prefix, NULL, "mcsbw405glpo", &sprom->mcsbw405glpo, 0,
-		       fallback);
-	nvram_read_u32(prefix, NULL, "mcsbw205gmpo", &sprom->mcsbw205gmpo, 0,
-		       fallback);
-	nvram_read_u32(prefix, NULL, "mcsbw20ul5gmpo",
-		       &sprom->mcsbw20ul5gmpo, 0, fallback);
-	nvram_read_u32(prefix, NULL, "mcsbw405gmpo", &sprom->mcsbw405gmpo, 0,
-		       fallback);
-	nvram_read_u32(prefix, NULL, "mcsbw205ghpo", &sprom->mcsbw205ghpo, 0,
-		       fallback);
-	nvram_read_u32(prefix, NULL, "mcsbw20ul5ghpo",
-		       &sprom->mcsbw20ul5ghpo, 0, fallback);
-	nvram_read_u32(prefix, NULL, "mcsbw405ghpo", &sprom->mcsbw405ghpo, 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "mcs32po", &sprom->mcs32po, 0, fallback);
-	nvram_read_u16(prefix, NULL, "legofdm40duppo",
-		       &sprom->legofdm40duppo, 0, fallback);
-	nvram_read_u8(prefix, NULL, "sar2g", &sprom->sar2g, 0, fallback);
-	nvram_read_u8(prefix, NULL, "sar5g", &sprom->sar5g, 0, fallback);
-}
-
 static void bcm47xx_fill_sprom_path_r4589(struct ssb_sprom *sprom,
 					  const char *prefix, bool fallback)
 {
@@ -715,10 +566,6 @@ static void bcm47xx_fill_sprom_ethernet(struct ssb_sprom *sprom,
 static void bcm47xx_fill_board_data(struct ssb_sprom *sprom, const char *prefix,
 				    bool fallback)
 {
-	nvram_read_u16(prefix, NULL, "boardrev", &sprom->board_rev, 0, true);
-	nvram_read_u16(prefix, NULL, "boardnum", &sprom->board_num, 0,
-		       fallback);
-	nvram_read_u16(prefix, NULL, "boardtype", &sprom->board_type, 0, true);
 	nvram_read_u32_2(prefix, "boardflags", &sprom->boardflags_lo,
 			 &sprom->boardflags_hi, fallback);
 	nvram_read_u32_2(prefix, "boardflags2", &sprom->boardflags2_lo,
@@ -736,58 +583,39 @@ void bcm47xx_fill_sprom(struct ssb_sprom *sprom, const char *prefix,
 	switch (sprom->revision) {
 	case 1:
 		bcm47xx_fill_sprom_r1234589(sprom, prefix, fallback);
-		bcm47xx_fill_sprom_r12389(sprom, prefix, fallback);
-		bcm47xx_fill_sprom_r1(sprom, prefix, fallback);
 		break;
 	case 2:
 		bcm47xx_fill_sprom_r1234589(sprom, prefix, fallback);
-		bcm47xx_fill_sprom_r12389(sprom, prefix, fallback);
-		bcm47xx_fill_sprom_r2389(sprom, prefix, fallback);
 		break;
 	case 3:
 		bcm47xx_fill_sprom_r1234589(sprom, prefix, fallback);
-		bcm47xx_fill_sprom_r12389(sprom, prefix, fallback);
-		bcm47xx_fill_sprom_r2389(sprom, prefix, fallback);
-		bcm47xx_fill_sprom_r389(sprom, prefix, fallback);
 		bcm47xx_fill_sprom_r3(sprom, prefix, fallback);
 		break;
 	case 4:
 	case 5:
 		bcm47xx_fill_sprom_r1234589(sprom, prefix, fallback);
 		bcm47xx_fill_sprom_r4589(sprom, prefix, fallback);
-		bcm47xx_fill_sprom_r458(sprom, prefix, fallback);
-		bcm47xx_fill_sprom_r45(sprom, prefix, fallback);
 		bcm47xx_fill_sprom_path_r4589(sprom, prefix, fallback);
 		bcm47xx_fill_sprom_path_r45(sprom, prefix, fallback);
 		break;
 	case 8:
 		bcm47xx_fill_sprom_r1234589(sprom, prefix, fallback);
-		bcm47xx_fill_sprom_r12389(sprom, prefix, fallback);
-		bcm47xx_fill_sprom_r2389(sprom, prefix, fallback);
-		bcm47xx_fill_sprom_r389(sprom, prefix, fallback);
 		bcm47xx_fill_sprom_r4589(sprom, prefix, fallback);
-		bcm47xx_fill_sprom_r458(sprom, prefix, fallback);
-		bcm47xx_fill_sprom_r89(sprom, prefix, fallback);
 		bcm47xx_fill_sprom_path_r4589(sprom, prefix, fallback);
 		break;
 	case 9:
 		bcm47xx_fill_sprom_r1234589(sprom, prefix, fallback);
-		bcm47xx_fill_sprom_r12389(sprom, prefix, fallback);
-		bcm47xx_fill_sprom_r2389(sprom, prefix, fallback);
-		bcm47xx_fill_sprom_r389(sprom, prefix, fallback);
 		bcm47xx_fill_sprom_r4589(sprom, prefix, fallback);
-		bcm47xx_fill_sprom_r89(sprom, prefix, fallback);
-		bcm47xx_fill_sprom_r9(sprom, prefix, fallback);
 		bcm47xx_fill_sprom_path_r4589(sprom, prefix, fallback);
 		break;
 	default:
-		pr_warn("Unsupported SPROM revision %d detected. Will extract"
-			" v1\n", sprom->revision);
+		pr_warn("Unsupported SPROM revision %d detected. Will extract v1\n",
+			sprom->revision);
 		sprom->revision = 1;
 		bcm47xx_fill_sprom_r1234589(sprom, prefix, fallback);
-		bcm47xx_fill_sprom_r12389(sprom, prefix, fallback);
-		bcm47xx_fill_sprom_r1(sprom, prefix, fallback);
 	}
+
+	bcm47xx_sprom_fill_auto(sprom, prefix, fallback);
 }
 
 #ifdef CONFIG_BCM47XX_SSB
@@ -829,13 +657,45 @@ static int bcm47xx_get_sprom_ssb(struct ssb_bus *bus, struct ssb_sprom *out)
 		bcm47xx_fill_sprom(out, prefix, false);
 		return 0;
 	} else {
-		pr_warn("bcm47xx: unable to fill SPROM for given bustype.\n");
+		pr_warn("Unable to fill SPROM for given bustype.\n");
 		return -EINVAL;
 	}
 }
 #endif
 
 #if defined(CONFIG_BCM47XX_BCMA)
+/*
+ * Having many NVRAM entries for PCI devices led to repeating prefixes like
+ * pci/1/1/ all the time and wasting flash space. So at some point Broadcom
+ * decided to introduce prefixes like 0: 1: 2: etc.
+ * If we find e.g. devpath0=pci/2/1 or devpath0=pci/2/1/ we should use 0:
+ * instead of pci/2/1/.
+ */
+static void bcm47xx_sprom_apply_prefix_alias(char *prefix, size_t prefix_size)
+{
+	size_t prefix_len = strlen(prefix);
+	size_t short_len = prefix_len - 1;
+	char nvram_var[10];
+	char buf[20];
+	int i;
+
+	/* Passed prefix has to end with a slash */
+	if (prefix_len <= 0 || prefix[prefix_len - 1] != '/')
+		return;
+
+	for (i = 0; i < 3; i++) {
+		if (snprintf(nvram_var, sizeof(nvram_var), "devpath%d", i) <= 0)
+			continue;
+		if (bcm47xx_nvram_getenv(nvram_var, buf, sizeof(buf)) < 0)
+			continue;
+		if (!strcmp(buf, prefix) ||
+		    (short_len && strlen(buf) == short_len && !strncmp(buf, prefix, short_len))) {
+			snprintf(prefix, prefix_size, "%d:", i);
+			return;
+		}
+	}
+}
+
 static int bcm47xx_get_sprom_bcma(struct bcma_bus *bus, struct ssb_sprom *out)
 {
 	char prefix[10];
@@ -847,6 +707,7 @@ static int bcm47xx_get_sprom_bcma(struct bcma_bus *bus, struct ssb_sprom *out)
 		snprintf(prefix, sizeof(prefix), "pci/%u/%u/",
 			 bus->host_pci->bus->number + 1,
 			 PCI_SLOT(bus->host_pci->devfn));
+		bcm47xx_sprom_apply_prefix_alias(prefix, sizeof(prefix));
 		bcm47xx_fill_sprom(out, prefix, false);
 		return 0;
 	case BCMA_HOSTTYPE_SOC:
@@ -861,7 +722,7 @@ static int bcm47xx_get_sprom_bcma(struct bcma_bus *bus, struct ssb_sprom *out)
 		}
 		return 0;
 	default:
-		pr_warn("bcm47xx: unable to fill SPROM for given bustype.\n");
+		pr_warn("Unable to fill SPROM for given bustype.\n");
 		return -EINVAL;
 	}
 }
diff --git a/arch/mips/bcm47xx/time.c b/arch/mips/bcm47xx/time.c
index 2c85d9254b5e..74224cf2e84d 100644
--- a/arch/mips/bcm47xx/time.c
+++ b/arch/mips/bcm47xx/time.c
@@ -22,12 +22,10 @@
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-
 #include <linux/init.h>
 #include <linux/ssb/ssb.h>
 #include <asm/time.h>
 #include <bcm47xx.h>
-#include <bcm47xx_nvram.h>
 #include <bcm47xx_board.h>
 
 void __init plat_time_init(void)
diff --git a/arch/mips/bcm63xx/irq.c b/arch/mips/bcm63xx/irq.c
index b94bf44d8d8e..e3e808a6c542 100644
--- a/arch/mips/bcm63xx/irq.c
+++ b/arch/mips/bcm63xx/irq.c
@@ -58,9 +58,9 @@ static inline int enable_irq_for_cpu(int cpu, struct irq_data *d,
 
 #ifdef CONFIG_SMP
 	if (m)
-		enable &= cpu_isset(cpu, *m);
+		enable &= cpumask_test_cpu(cpu, m);
 	else if (irqd_affinity_was_set(d))
-		enable &= cpu_isset(cpu, *d->affinity);
+		enable &= cpumask_test_cpu(cpu, d->affinity);
 #endif
 	return enable;
 }
diff --git a/arch/mips/bcm63xx/prom.c b/arch/mips/bcm63xx/prom.c
index e1f27d653f60..7019e2967009 100644
--- a/arch/mips/bcm63xx/prom.c
+++ b/arch/mips/bcm63xx/prom.c
@@ -17,7 +17,6 @@
 #include <bcm63xx_cpu.h>
 #include <bcm63xx_io.h>
 #include <bcm63xx_regs.h>
-#include <bcm63xx_gpio.h>
 
 void __init prom_init(void)
 {
@@ -53,9 +52,6 @@ void __init prom_init(void)
 	reg &= ~mask;
 	bcm_perf_writel(reg, PERF_CKCTL_REG);
 
-	/* register gpiochip */
-	bcm63xx_gpio_init();
-
 	/* do low level board init */
 	board_prom_init();
 
diff --git a/arch/mips/bcm63xx/setup.c b/arch/mips/bcm63xx/setup.c
index 6660c7ddf87b..240fb4ffa55c 100644
--- a/arch/mips/bcm63xx/setup.c
+++ b/arch/mips/bcm63xx/setup.c
@@ -20,6 +20,7 @@
 #include <bcm63xx_cpu.h>
 #include <bcm63xx_regs.h>
 #include <bcm63xx_io.h>
+#include <bcm63xx_gpio.h>
 
 void bcm63xx_machine_halt(void)
 {
@@ -160,6 +161,9 @@ void __init plat_mem_setup(void)
 
 int __init bcm63xx_register_devices(void)
 {
+	/* register gpiochip */
+	bcm63xx_gpio_init();
+
 	return board_register_devices();
 }
 
diff --git a/arch/mips/bmips/Kconfig b/arch/mips/bmips/Kconfig
new file mode 100644
index 000000000000..f35c84c019df
--- /dev/null
+++ b/arch/mips/bmips/Kconfig
@@ -0,0 +1,62 @@
+if BMIPS_GENERIC
+
+choice
+	prompt "Built-in device tree"
+	help
+	  Legacy bootloaders do not pass a DTB pointer to the kernel, so
+	  if a "wrapper" is not being used, the kernel will need to include
+	  a device tree that matches the target board.
+
+	  The builtin DTB will only be used if the firmware does not supply
+	  a valid DTB.
+
+config DT_NONE
+	bool "None"
+
+config DT_BCM93384WVG
+	bool "BCM93384WVG Zephyr CPU"
+	select BUILTIN_DTB
+
+config DT_BCM93384WVG_VIPER
+	bool "BCM93384WVG Viper CPU (EXPERIMENTAL)"
+	select BUILTIN_DTB
+
+config DT_BCM96368MVWG
+	bool "BCM96368MVWG"
+	select BUILTIN_DTB
+
+config DT_BCM9EJTAGPRB
+	bool "BCM9EJTAGPRB"
+	select BUILTIN_DTB
+
+config DT_BCM97125CBMB
+	bool "BCM97125CBMB"
+	select BUILTIN_DTB
+
+config DT_BCM97346DBSMB
+	bool "BCM97346DBSMB"
+	select BUILTIN_DTB
+
+config DT_BCM97358SVMB
+	bool "BCM97358SVMB"
+	select BUILTIN_DTB
+
+config DT_BCM97360SVMB
+	bool "BCM97360SVMB"
+	select BUILTIN_DTB
+
+config DT_BCM97362SVMB
+	bool "BCM97362SVMB"
+	select BUILTIN_DTB
+
+config DT_BCM97420C
+	bool "BCM97420C"
+	select BUILTIN_DTB
+
+config DT_BCM97425SVMB
+	bool "BCM97425SVMB"
+	select BUILTIN_DTB
+
+endchoice
+
+endif
diff --git a/arch/mips/bcm3384/Makefile b/arch/mips/bmips/Makefile
index a393955cba08..a393955cba08 100644
--- a/arch/mips/bcm3384/Makefile
+++ b/arch/mips/bmips/Makefile
diff --git a/arch/mips/bmips/Platform b/arch/mips/bmips/Platform
new file mode 100644
index 000000000000..5f127fd7f4b5
--- /dev/null
+++ b/arch/mips/bmips/Platform
@@ -0,0 +1,7 @@
+#
+# Broadcom Generic BMIPS kernel
+#
+platform-$(CONFIG_BMIPS_GENERIC)	+= bmips/
+cflags-$(CONFIG_BMIPS_GENERIC)		+=				\
+		-I$(srctree)/arch/mips/include/asm/mach-bmips/
+load-$(CONFIG_BMIPS_GENERIC)		:= 0xffffffff80010000
diff --git a/arch/mips/bmips/dma.c b/arch/mips/bmips/dma.c
new file mode 100644
index 000000000000..04790f4e1805
--- /dev/null
+++ b/arch/mips/bmips/dma.c
@@ -0,0 +1,117 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2014 Kevin Cernekee <cernekee@gmail.com>
+ */
+
+#define pr_fmt(fmt)		"bmips-dma: " fmt
+
+#include <linux/device.h>
+#include <linux/dma-direction.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <dma-coherence.h>
+
+/*
+ * BCM338x has configurable address translation windows which allow the
+ * peripherals' DMA addresses to be different from the Zephyr-visible
+ * physical addresses.  e.g. usb_dma_addr = zephyr_pa ^ 0x08000000
+ *
+ * If the "brcm,ubus" node has a "dma-ranges" property we will enable this
+ * translation globally using the provided information.  This implements a
+ * very limited subset of "dma-ranges" support and it will probably be
+ * replaced by a more generic version later.
+ */
+
+struct bmips_dma_range {
+	u32			child_addr;
+	u32			parent_addr;
+	u32			size;
+};
+
+static struct bmips_dma_range *bmips_dma_ranges;
+
+#define FLUSH_RAC		0x100
+
+static dma_addr_t bmips_phys_to_dma(struct device *dev, phys_addr_t pa)
+{
+	struct bmips_dma_range *r;
+
+	for (r = bmips_dma_ranges; r && r->size; r++) {
+		if (pa >= r->child_addr &&
+		    pa < (r->child_addr + r->size))
+			return pa - r->child_addr + r->parent_addr;
+	}
+	return pa;
+}
+
+dma_addr_t plat_map_dma_mem(struct device *dev, void *addr, size_t size)
+{
+	return bmips_phys_to_dma(dev, virt_to_phys(addr));
+}
+
+dma_addr_t plat_map_dma_mem_page(struct device *dev, struct page *page)
+{
+	return bmips_phys_to_dma(dev, page_to_phys(page));
+}
+
+unsigned long plat_dma_addr_to_phys(struct device *dev, dma_addr_t dma_addr)
+{
+	struct bmips_dma_range *r;
+
+	for (r = bmips_dma_ranges; r && r->size; r++) {
+		if (dma_addr >= r->parent_addr &&
+		    dma_addr < (r->parent_addr + r->size))
+			return dma_addr - r->parent_addr + r->child_addr;
+	}
+	return dma_addr;
+}
+
+static int __init bmips_init_dma_ranges(void)
+{
+	struct device_node *np =
+		of_find_compatible_node(NULL, NULL, "brcm,ubus");
+	const __be32 *data;
+	struct bmips_dma_range *r;
+	int len;
+
+	if (!np)
+		return 0;
+
+	data = of_get_property(np, "dma-ranges", &len);
+	if (!data)
+		goto out_good;
+
+	len /= sizeof(*data) * 3;
+	if (!len)
+		goto out_bad;
+
+	/* add a dummy (zero) entry at the end as a sentinel */
+	bmips_dma_ranges = kzalloc(sizeof(struct bmips_dma_range) * (len + 1),
+				   GFP_KERNEL);
+	if (!bmips_dma_ranges)
+		goto out_bad;
+
+	for (r = bmips_dma_ranges; len; len--, r++) {
+		r->child_addr = be32_to_cpup(data++);
+		r->parent_addr = be32_to_cpup(data++);
+		r->size = be32_to_cpup(data++);
+	}
+
+out_good:
+	of_node_put(np);
+	return 0;
+
+out_bad:
+	pr_err("error parsing dma-ranges property\n");
+	of_node_put(np);
+	return -EINVAL;
+}
+arch_initcall(bmips_init_dma_ranges);
diff --git a/arch/mips/bmips/irq.c b/arch/mips/bmips/irq.c
new file mode 100644
index 000000000000..14552e58ff7e
--- /dev/null
+++ b/arch/mips/bmips/irq.c
@@ -0,0 +1,38 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Copyright (C) 2014 Broadcom Corporation
+ * Author: Kevin Cernekee <cernekee@gmail.com>
+ */
+
+#include <linux/of.h>
+#include <linux/irqchip.h>
+
+#include <asm/bmips.h>
+#include <asm/irq.h>
+#include <asm/irq_cpu.h>
+#include <asm/time.h>
+
+unsigned int get_c0_compare_int(void)
+{
+	return CP0_LEGACY_COMPARE_IRQ;
+}
+
+void __init arch_init_irq(void)
+{
+	struct device_node *dn;
+
+	/* Only the STB (bcm7038) controller supports SMP IRQ affinity */
+	dn = of_find_compatible_node(NULL, NULL, "brcm,bcm7038-l1-intc");
+	if (dn)
+		of_node_put(dn);
+	else
+		bmips_tp1_irqs = 0;
+
+	irqchip_init();
+}
+
+OF_DECLARE_2(irqchip, mips_cpu_intc, "mti,cpu-interrupt-controller",
+	     mips_cpu_irq_of_init);
diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c
new file mode 100644
index 000000000000..fae800e8b1e1
--- /dev/null
+++ b/arch/mips/bmips/setup.c
@@ -0,0 +1,194 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2008 Maxime Bizon <mbizon@freebox.fr>
+ * Copyright (C) 2014 Kevin Cernekee <cernekee@gmail.com>
+ */
+
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/bootmem.h>
+#include <linux/clk-provider.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+#include <linux/smp.h>
+#include <asm/addrspace.h>
+#include <asm/bmips.h>
+#include <asm/bootinfo.h>
+#include <asm/cpu-type.h>
+#include <asm/mipsregs.h>
+#include <asm/prom.h>
+#include <asm/smp-ops.h>
+#include <asm/time.h>
+#include <asm/traps.h>
+
+#define RELO_NORMAL_VEC		BIT(18)
+
+#define REG_BCM6328_OTP		((void __iomem *)CKSEG1ADDR(0x1000062c))
+#define BCM6328_TP1_DISABLED	BIT(9)
+
+static const unsigned long kbase = VMLINUX_LOAD_ADDRESS & 0xfff00000;
+
+struct bmips_quirk {
+	const char		*compatible;
+	void			(*quirk_fn)(void);
+};
+
+static void kbase_setup(void)
+{
+	__raw_writel(kbase | RELO_NORMAL_VEC,
+		     BMIPS_GET_CBR() + BMIPS_RELO_VECTOR_CONTROL_1);
+	ebase = kbase;
+}
+
+static void bcm3384_viper_quirks(void)
+{
+	/*
+	 * Some experimental CM boxes are set up to let CM own the Viper TP0
+	 * and let Linux own TP1.  This requires moving the kernel
+	 * load address to a non-conflicting region (e.g. via
+	 * CONFIG_PHYSICAL_START) and supplying an alternate DTB.
+	 * If we detect this condition, we need to move the MIPS exception
+	 * vectors up to an area that we own.
+	 *
+	 * This is distinct from the OTHER special case mentioned in
+	 * smp-bmips.c (boot on TP1, but enable SMP, then TP0 becomes our
+	 * logical CPU#1).  For the Viper TP1 case, SMP is off limits.
+	 *
+	 * Also note that many BMIPS435x CPUs do not have a
+	 * BMIPS_RELO_VECTOR_CONTROL_1 register, so it isn't safe to just
+	 * write VMLINUX_LOAD_ADDRESS into that register on every SoC.
+	 */
+	board_ebase_setup = &kbase_setup;
+	bmips_smp_enabled = 0;
+}
+
+static void bcm63xx_fixup_cpu1(void)
+{
+	/*
+	 * The bootloader has set up the CPU1 reset vector at
+	 * 0xa000_0200.
+	 * This conflicts with the special interrupt vector (IV).
+	 * The bootloader has also set up CPU1 to respond to the wrong
+	 * IPI interrupt.
+	 * Here we will start up CPU1 in the background and ask it to
+	 * reconfigure itself then go back to sleep.
+	 */
+	memcpy((void *)0xa0000200, &bmips_smp_movevec, 0x20);
+	__sync();
+	set_c0_cause(C_SW0);
+	cpumask_set_cpu(1, &bmips_booted_mask);
+}
+
+static void bcm6328_quirks(void)
+{
+	/* Check CPU1 status in OTP (it is usually disabled) */
+	if (__raw_readl(REG_BCM6328_OTP) & BCM6328_TP1_DISABLED)
+		bmips_smp_enabled = 0;
+	else
+		bcm63xx_fixup_cpu1();
+}
+
+static void bcm6368_quirks(void)
+{
+	bcm63xx_fixup_cpu1();
+}
+
+static const struct bmips_quirk bmips_quirk_list[] = {
+	{ "brcm,bcm3384-viper",		&bcm3384_viper_quirks		},
+	{ "brcm,bcm33843-viper",	&bcm3384_viper_quirks		},
+	{ "brcm,bcm6328",		&bcm6328_quirks			},
+	{ "brcm,bcm6368",		&bcm6368_quirks			},
+	{ },
+};
+
+void __init prom_init(void)
+{
+	register_bmips_smp_ops();
+}
+
+void __init prom_free_prom_memory(void)
+{
+}
+
+const char *get_system_type(void)
+{
+	return "Generic BMIPS kernel";
+}
+
+void __init plat_time_init(void)
+{
+	struct device_node *np;
+	u32 freq;
+
+	np = of_find_node_by_name(NULL, "cpus");
+	if (!np)
+		panic("missing 'cpus' DT node");
+	if (of_property_read_u32(np, "mips-hpt-frequency", &freq) < 0)
+		panic("missing 'mips-hpt-frequency' property");
+	of_node_put(np);
+
+	mips_hpt_frequency = freq;
+}
+
+void __init plat_mem_setup(void)
+{
+	void *dtb;
+	const struct bmips_quirk *q;
+
+	set_io_port_base(0);
+	ioport_resource.start = 0;
+	ioport_resource.end = ~0;
+
+	/* intended to somewhat resemble ARM; see Documentation/arm/Booting */
+	if (fw_arg0 == 0 && fw_arg1 == 0xffffffff)
+		dtb = phys_to_virt(fw_arg2);
+	else if (__dtb_start != __dtb_end)
+		dtb = (void *)__dtb_start;
+	else
+		panic("no dtb found");
+
+	__dt_setup_arch(dtb);
+	strlcpy(arcs_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+
+	for (q = bmips_quirk_list; q->quirk_fn; q++) {
+		if (of_flat_dt_is_compatible(of_get_flat_dt_root(),
+					     q->compatible)) {
+			q->quirk_fn();
+		}
+	}
+}
+
+void __init device_tree_init(void)
+{
+	struct device_node *np;
+
+	unflatten_and_copy_device_tree();
+
+	/* Disable SMP boot unless both CPUs are listed in DT and !disabled */
+	np = of_find_node_by_name(NULL, "cpus");
+	if (np && of_get_available_child_count(np) <= 1)
+		bmips_smp_enabled = 0;
+	of_node_put(np);
+}
+
+int __init plat_of_setup(void)
+{
+	return __dt_register_buses("simple-bus", NULL);
+}
+
+arch_initcall(plat_of_setup);
+
+static int __init plat_dev_init(void)
+{
+	of_clk_init(NULL);
+	return 0;
+}
+
+device_initcall(plat_dev_init);
diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile
index 61af6b6ab13d..dc91bde10d62 100644
--- a/arch/mips/boot/compressed/Makefile
+++ b/arch/mips/boot/compressed/Makefile
@@ -12,6 +12,8 @@
 # Author: Wu Zhangjin <wuzhangjin@gmail.com>
 #
 
+include $(srctree)/arch/mips/Kbuild.platforms
+
 # set the default size of the mallocing area for decompressing
 BOOT_HEAP_SIZE := 0x400000
 
@@ -30,9 +32,10 @@ KBUILD_AFLAGS := $(LINUXINCLUDE) $(KBUILD_AFLAGS) -D__ASSEMBLY__ \
 targets := head.o decompress.o string.o dbg.o uart-16550.o uart-alchemy.o
 
 # decompressor objects (linked with vmlinuz)
-vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o $(obj)/dbg.o
+vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o
 
 ifdef CONFIG_DEBUG_ZBOOT
+vmlinuzobjs-$(CONFIG_DEBUG_ZBOOT)		   += $(obj)/dbg.o
 vmlinuzobjs-$(CONFIG_SYS_SUPPORTS_ZBOOT_UART16550) += $(obj)/uart-16550.o
 vmlinuzobjs-$(CONFIG_MIPS_ALCHEMY)		   += $(obj)/uart-alchemy.o
 endif
@@ -66,8 +69,8 @@ $(obj)/piggy.o: $(obj)/dummy.o $(obj)/vmlinux.bin.z FORCE
 # Calculate the load address of the compressed kernel image
 hostprogs-y := calc_vmlinuz_load_addr
 
-ifeq ($(CONFIG_MACH_JZ4740),y)
-VMLINUZ_LOAD_ADDRESS := 0x80600000
+ifneq ($(zload-y),)
+VMLINUZ_LOAD_ADDRESS := $(zload-y)
 else
 VMLINUZ_LOAD_ADDRESS = $(shell $(obj)/calc_vmlinuz_load_addr \
 		$(obj)/vmlinux.bin $(VMLINUX_LOAD_ADDRESS))
diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c
index 31903cf9709d..54831069a206 100644
--- a/arch/mips/boot/compressed/decompress.c
+++ b/arch/mips/boot/compressed/decompress.c
@@ -28,8 +28,13 @@ unsigned long free_mem_end_ptr;
 extern unsigned char __image_begin, __image_end;
 
 /* debug interfaces  */
+#ifdef CONFIG_DEBUG_ZBOOT
 extern void puts(const char *s);
 extern void puthex(unsigned long long val);
+#else
+#define puts(s) do {} while (0)
+#define puthex(val) do {} while (0)
+#endif
 
 void error(char *x)
 {
diff --git a/arch/mips/boot/dts/Makefile b/arch/mips/boot/dts/Makefile
index 4f49fa477f14..5d95e4bd709a 100644
--- a/arch/mips/boot/dts/Makefile
+++ b/arch/mips/boot/dts/Makefile
@@ -1,21 +1,12 @@
-dtb-$(CONFIG_BCM3384)			+= bcm93384wvg.dtb
-dtb-$(CONFIG_CAVIUM_OCTEON_SOC)		+= octeon_3xxx.dtb octeon_68xx.dtb
-dtb-$(CONFIG_DT_EASY50712)		+= easy50712.dtb
-dtb-$(CONFIG_DT_XLP_EVP)		+= xlp_evp.dtb
-dtb-$(CONFIG_DT_XLP_SVP)		+= xlp_svp.dtb
-dtb-$(CONFIG_DT_XLP_FVP)		+= xlp_fvp.dtb
-dtb-$(CONFIG_DT_XLP_GVP)		+= xlp_gvp.dtb
-dtb-$(CONFIG_DTB_RT2880_EVAL)		+= rt2880_eval.dtb
-dtb-$(CONFIG_DTB_RT305X_EVAL)		+= rt3052_eval.dtb
-dtb-$(CONFIG_DTB_RT3883_EVAL)		+= rt3883_eval.dtb
-dtb-$(CONFIG_DTB_MT7620A_EVAL)		+= mt7620a_eval.dtb
-dtb-$(CONFIG_MIPS_SEAD3)		+= sead3.dtb
-
-obj-y		+= $(patsubst %.dtb, %.dtb.o, $(dtb-y))
-
-targets		+= dtbs
-targets		+= $(dtb-y)
-
-dtbs: $(addprefix $(obj)/, $(dtb-y))
-
-clean-files	+= *.dtb *.dtb.S
+dts-dirs	+= brcm
+dts-dirs	+= cavium-octeon
+dts-dirs	+= lantiq
+dts-dirs	+= mti
+dts-dirs	+= netlogic
+dts-dirs	+= ralink
+
+obj-y		:= $(addsuffix /, $(dts-dirs))
+
+always		:= $(dtb-y)
+subdir-y	:= $(dts-dirs)
+clean-files	:= *.dtb *.dtb.S
diff --git a/arch/mips/boot/dts/bcm3384.dtsi b/arch/mips/boot/dts/bcm3384.dtsi
deleted file mode 100644
index 21b074a99c94..000000000000
--- a/arch/mips/boot/dts/bcm3384.dtsi
+++ /dev/null
@@ -1,109 +0,0 @@
-/ {
-	#address-cells = <1>;
-	#size-cells = <1>;
-	compatible = "brcm,bcm3384", "brcm,bcm33843";
-
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		/* On BMIPS5000 this is 1/8th of the CPU core clock */
-		mips-hpt-frequency = <100000000>;
-
-		cpu@0 {
-			compatible = "brcm,bmips5000";
-			device_type = "cpu";
-			reg = <0>;
-		};
-
-		cpu@1 {
-			compatible = "brcm,bmips5000";
-			device_type = "cpu";
-			reg = <1>;
-		};
-	};
-
-	clocks {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		periph_clk: periph_clk@0 {
-			compatible = "fixed-clock";
-			#clock-cells = <0>;
-			clock-frequency = <54000000>;
-		};
-	};
-
-	aliases {
-		uart0 = &uart0;
-	};
-
-	cpu_intc: cpu_intc@0 {
-		#address-cells = <0>;
-		compatible = "mti,cpu-interrupt-controller";
-
-		interrupt-controller;
-		#interrupt-cells = <1>;
-	};
-
-	periph_intc: periph_intc@14e00038 {
-		compatible = "brcm,bcm3384-intc";
-		reg = <0x14e00038 0x8 0x14e00340 0x8>;
-
-		interrupt-controller;
-		#interrupt-cells = <1>;
-
-		interrupt-parent = <&cpu_intc>;
-		interrupts = <4>;
-	};
-
-	zmips_intc: zmips_intc@104b0060 {
-		compatible = "brcm,bcm3384-intc";
-		reg = <0x104b0060 0x8>;
-
-		interrupt-controller;
-		#interrupt-cells = <1>;
-
-		interrupt-parent = <&periph_intc>;
-		interrupts = <29>;
-	};
-
-	iop_intc: iop_intc@14e00058 {
-		compatible = "brcm,bcm3384-intc";
-		reg = <0x14e00058 0x8>;
-
-		interrupt-controller;
-		#interrupt-cells = <1>;
-
-		interrupt-parent = <&cpu_intc>;
-		interrupts = <6>;
-	};
-
-	uart0: serial@14e00520 {
-		compatible = "brcm,bcm6345-uart";
-		reg = <0x14e00520 0x18>;
-		interrupt-parent = <&periph_intc>;
-		interrupts = <2>;
-		clocks = <&periph_clk>;
-		status = "disabled";
-	};
-
-	ehci0: usb@15400300 {
-		compatible = "brcm,bcm3384-ehci", "generic-ehci";
-		reg = <0x15400300 0x100>;
-		big-endian;
-		interrupt-parent = <&periph_intc>;
-		interrupts = <41>;
-		status = "disabled";
-	};
-
-	ohci0: usb@15400400 {
-		compatible = "brcm,bcm3384-ohci", "generic-ohci";
-		reg = <0x15400400 0x100>;
-		big-endian;
-		no-big-frame-no;
-		interrupt-parent = <&periph_intc>;
-		interrupts = <40>;
-		status = "disabled";
-	};
-};
diff --git a/arch/mips/boot/dts/brcm/Makefile b/arch/mips/boot/dts/brcm/Makefile
new file mode 100644
index 000000000000..1c8353bfe003
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/Makefile
@@ -0,0 +1,19 @@
+dtb-$(CONFIG_DT_BCM93384WVG)		+= bcm93384wvg.dtb
+dtb-$(CONFIG_DT_BCM93384WVG_VIPER)	+= bcm93384wvg_viper.dtb
+dtb-$(CONFIG_DT_BCM96368MVWG)		+= bcm96368mvwg.dtb
+dtb-$(CONFIG_DT_BCM9EJTAGPRB)		+= bcm9ejtagprb.dtb
+dtb-$(CONFIG_DT_BCM97125CBMB)		+= bcm97125cbmb.dtb
+dtb-$(CONFIG_DT_BCM97346DBSMB)		+= bcm97346dbsmb.dtb
+dtb-$(CONFIG_DT_BCM97358SVMB)		+= bcm97358svmb.dtb
+dtb-$(CONFIG_DT_BCM97360SVMB)		+= bcm97360svmb.dtb
+dtb-$(CONFIG_DT_BCM97362SVMB)		+= bcm97362svmb.dtb
+dtb-$(CONFIG_DT_BCM97420C)		+= bcm97420c.dtb
+dtb-$(CONFIG_DT_BCM97425SVMB)		+= bcm97425svmb.dtb
+
+obj-y				+= $(patsubst %.dtb, %.dtb.o, $(dtb-y))
+
+# Force kbuild to make empty built-in.o if necessary
+obj-				+= dummy.o
+
+always				:= $(dtb-y)
+clean-files			:= *.dtb *.dtb.S
diff --git a/arch/mips/boot/dts/brcm/bcm3384_viper.dtsi b/arch/mips/boot/dts/brcm/bcm3384_viper.dtsi
new file mode 100644
index 000000000000..aa406b43c65f
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/bcm3384_viper.dtsi
@@ -0,0 +1,108 @@
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "brcm,bcm3384-viper", "brcm,bcm33843-viper";
+
+	memory@0 {
+		device_type = "memory";
+
+		/* Typical ranges.  The bootloader should fill these in. */
+		reg = <0x06000000 0x02000000>,
+		      <0x0e000000 0x02000000>;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		/* 1/2 of the CPU core clock (standard MIPS behavior) */
+		mips-hpt-frequency = <300000000>;
+
+		cpu@0 {
+			compatible = "brcm,bmips4350";
+			device_type = "cpu";
+			reg = <0>;
+		};
+	};
+
+	cpu_intc: cpu_intc {
+		#address-cells = <0>;
+		compatible = "mti,cpu-interrupt-controller";
+
+		interrupt-controller;
+		#interrupt-cells = <1>;
+	};
+
+	clocks {
+		periph_clk: periph_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <54000000>;
+		};
+	};
+
+	aliases {
+		uart0 = &uart0;
+	};
+
+	ubus {
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		compatible = "brcm,ubus", "simple-bus";
+		ranges;
+		/* No dma-ranges on Viper. */
+
+		periph_intc: periph_intc@14e00048 {
+			compatible = "brcm,bcm3380-l2-intc";
+			reg = <0x14e00048 0x4 0x14e0004c 0x4>,
+			      <0x14e00350 0x4 0x14e00354 0x4>;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&cpu_intc>;
+			interrupts = <4>;
+		};
+
+		cmips_intc: cmips_intc@151f8048 {
+			compatible = "brcm,bcm3380-l2-intc";
+			reg = <0x151f8048 0x4 0x151f804c 0x4>;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&periph_intc>;
+			interrupts = <30>;
+			brcm,int-map-mask = <0xffffffff>;
+		};
+
+		uart0: serial@14e00520 {
+			compatible = "brcm,bcm6345-uart";
+			reg = <0x14e00520 0x18>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <2>;
+			clocks = <&periph_clk>;
+			status = "disabled";
+		};
+
+		ehci0: usb@15400300 {
+			compatible = "brcm,bcm3384-ehci", "generic-ehci";
+			reg = <0x15400300 0x100>;
+			big-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <41>;
+			status = "disabled";
+		};
+
+		ohci0: usb@15400400 {
+			compatible = "brcm,bcm3384-ohci", "generic-ohci";
+			reg = <0x15400400 0x100>;
+			big-endian;
+			no-big-frame-no;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <40>;
+			status = "disabled";
+		};
+	};
+};
diff --git a/arch/mips/boot/dts/brcm/bcm3384_zephyr.dtsi b/arch/mips/boot/dts/brcm/bcm3384_zephyr.dtsi
new file mode 100644
index 000000000000..a7bd8564e9f6
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/bcm3384_zephyr.dtsi
@@ -0,0 +1,126 @@
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "brcm,bcm3384", "brcm,bcm33843";
+
+	memory@0 {
+		device_type = "memory";
+
+		/* Typical range.  The bootloader should fill this in. */
+		reg = <0x0 0x08000000>;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		/* On BMIPS5000 this is 1/8th of the CPU core clock */
+		mips-hpt-frequency = <100000000>;
+
+		cpu@0 {
+			compatible = "brcm,bmips5000";
+			device_type = "cpu";
+			reg = <0>;
+		};
+
+		cpu@1 {
+			compatible = "brcm,bmips5000";
+			device_type = "cpu";
+			reg = <1>;
+		};
+	};
+
+	cpu_intc: cpu_intc {
+		#address-cells = <0>;
+		compatible = "mti,cpu-interrupt-controller";
+
+		interrupt-controller;
+		#interrupt-cells = <1>;
+	};
+
+	clocks {
+		periph_clk: periph_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <54000000>;
+		};
+	};
+
+	aliases {
+		uart0 = &uart0;
+	};
+
+	ubus {
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		compatible = "brcm,ubus", "simple-bus";
+		ranges;
+		dma-ranges = <0x00000000 0x08000000 0x08000000>,
+			     <0x08000000 0x00000000 0x08000000>;
+
+		periph_intc: periph_intc@14e00038 {
+			compatible = "brcm,bcm3380-l2-intc";
+			reg = <0x14e00038 0x4 0x14e0003c 0x4>,
+			      <0x14e00340 0x4 0x14e00344 0x4>;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&cpu_intc>;
+			interrupts = <4>;
+		};
+
+		zmips_intc: zmips_intc@104b0060 {
+			compatible = "brcm,bcm3380-l2-intc";
+			reg = <0x104b0060 0x4 0x104b0064 0x4>;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&periph_intc>;
+			interrupts = <29>;
+			brcm,int-map-mask = <0xffffffff>;
+		};
+
+		iop_intc: iop_intc@14e00058 {
+			compatible = "brcm,bcm3380-l2-intc";
+			reg = <0x14e00058 0x4 0x14e0005c 0x4>;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&cpu_intc>;
+			interrupts = <6>;
+			brcm,int-map-mask = <0xffffffff>;
+		};
+
+		uart0: serial@14e00520 {
+			compatible = "brcm,bcm6345-uart";
+			reg = <0x14e00520 0x18>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <2>;
+			clocks = <&periph_clk>;
+			status = "disabled";
+		};
+
+		ehci0: usb@15400300 {
+			compatible = "brcm,bcm3384-ehci", "generic-ehci";
+			reg = <0x15400300 0x100>;
+			big-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <41>;
+			status = "disabled";
+		};
+
+		ohci0: usb@15400400 {
+			compatible = "brcm,bcm3384-ohci", "generic-ohci";
+			reg = <0x15400400 0x100>;
+			big-endian;
+			no-big-frame-no;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <40>;
+			status = "disabled";
+		};
+	};
+};
diff --git a/arch/mips/boot/dts/brcm/bcm6328.dtsi b/arch/mips/boot/dts/brcm/bcm6328.dtsi
new file mode 100644
index 000000000000..41891c1e58bd
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/bcm6328.dtsi
@@ -0,0 +1,86 @@
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "brcm,bcm6328";
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		mips-hpt-frequency = <160000000>;
+
+		cpu@0 {
+			compatible = "brcm,bmips4350";
+			device_type = "cpu";
+			reg = <0>;
+		};
+
+		cpu@1 {
+			compatible = "brcm,bmips4350";
+			device_type = "cpu";
+			reg = <1>;
+		};
+	};
+
+	clocks {
+		periph_clk: periph_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <50000000>;
+		};
+	};
+
+	aliases {
+		uart0 = &uart0;
+	};
+
+	cpu_intc: cpu_intc {
+		#address-cells = <0>;
+		compatible = "mti,cpu-interrupt-controller";
+
+		interrupt-controller;
+		#interrupt-cells = <1>;
+	};
+
+	ubus {
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		compatible = "simple-bus";
+		ranges;
+
+		periph_intc: periph_intc@10000020 {
+			compatible = "brcm,bcm3380-l2-intc";
+			reg = <0x10000024 0x4 0x1000002c 0x4>,
+			      <0x10000020 0x4 0x10000028 0x4>;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&cpu_intc>;
+			interrupts = <2>;
+		};
+
+		uart0: serial@10000100 {
+			compatible = "brcm,bcm6345-uart";
+			reg = <0x10000100 0x18>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <28>;
+			clocks = <&periph_clk>;
+			status = "disabled";
+		};
+
+		timer: timer@10000040 {
+			compatible = "syscon";
+			reg = <0x10000040 0x2c>;
+			little-endian;
+		};
+
+		reboot {
+			compatible = "syscon-reboot";
+			regmap = <&timer>;
+			offset = <0x28>;
+			mask = <0x1>;
+		};
+	};
+};
diff --git a/arch/mips/boot/dts/brcm/bcm6368.dtsi b/arch/mips/boot/dts/brcm/bcm6368.dtsi
new file mode 100644
index 000000000000..45152bc22117
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/bcm6368.dtsi
@@ -0,0 +1,93 @@
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "brcm,bcm6368";
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		mips-hpt-frequency = <200000000>;
+
+		cpu@0 {
+			compatible = "brcm,bmips4350";
+			device_type = "cpu";
+			reg = <0>;
+		};
+
+		cpu@1 {
+			compatible = "brcm,bmips4350";
+			device_type = "cpu";
+			reg = <1>;
+		};
+
+	};
+
+	clocks {
+		periph_clk: periph_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <50000000>;
+		};
+	};
+
+	aliases {
+		uart0 = &uart0;
+	};
+
+	cpu_intc: cpu_intc {
+		#address-cells = <0>;
+		compatible = "mti,cpu-interrupt-controller";
+
+		interrupt-controller;
+		#interrupt-cells = <1>;
+	};
+
+	ubus {
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		compatible = "simple-bus";
+		ranges;
+
+		periph_intc: periph_intc@10000020 {
+			compatible = "brcm,bcm3380-l2-intc";
+			reg = <0x10000024 0x4 0x1000002c 0x4>,
+			      <0x10000020 0x4 0x10000028 0x4>;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&cpu_intc>;
+			interrupts = <2>;
+		};
+
+		uart0: serial@10000100 {
+			compatible = "brcm,bcm6345-uart";
+			reg = <0x10000100 0x18>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <2>;
+			clocks = <&periph_clk>;
+			status = "disabled";
+		};
+
+		ehci0: usb@10001500 {
+			compatible = "brcm,bcm6368-ehci", "generic-ehci";
+			reg = <0x10001500 0x100>;
+			big-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <7>;
+			status = "disabled";
+		};
+
+		ohci0: usb@10001600 {
+			compatible = "brcm,bcm6368-ohci", "generic-ohci";
+			reg = <0x10001600 0x100>;
+			big-endian;
+			no-big-frame-no;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <5>;
+			status = "disabled";
+		};
+	};
+};
diff --git a/arch/mips/boot/dts/brcm/bcm7125.dtsi b/arch/mips/boot/dts/brcm/bcm7125.dtsi
new file mode 100644
index 000000000000..1a7efa883c5e
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/bcm7125.dtsi
@@ -0,0 +1,139 @@
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "brcm,bcm7125";
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		mips-hpt-frequency = <202500000>;
+
+		cpu@0 {
+			compatible = "brcm,bmips4380";
+			device_type = "cpu";
+			reg = <0>;
+		};
+
+		cpu@1 {
+			compatible = "brcm,bmips4380";
+			device_type = "cpu";
+			reg = <1>;
+		};
+	};
+
+	aliases {
+		uart0 = &uart0;
+	};
+
+	cpu_intc: cpu_intc {
+		#address-cells = <0>;
+		compatible = "mti,cpu-interrupt-controller";
+
+		interrupt-controller;
+		#interrupt-cells = <1>;
+	};
+
+	clocks {
+		uart_clk: uart_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <81000000>;
+		};
+	};
+
+	rdb {
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		compatible = "simple-bus";
+		ranges = <0 0x10000000 0x01000000>;
+
+		periph_intc: periph_intc@441400 {
+			compatible = "brcm,bcm7038-l1-intc";
+			reg = <0x441400 0x30>, <0x441600 0x30>;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&cpu_intc>;
+			interrupts = <2>, <3>;
+		};
+
+		sun_l2_intc: sun_l2_intc@401800 {
+			compatible = "brcm,l2-intc";
+			reg = <0x401800 0x30>;
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <23>;
+		};
+
+		gisb-arb@400000 {
+			compatible = "brcm,bcm7400-gisb-arb";
+			reg = <0x400000 0xdc>;
+			native-endian;
+			interrupt-parent = <&sun_l2_intc>;
+			interrupts = <0>, <2>;
+			brcm,gisb-arb-master-mask = <0x2f7>;
+			brcm,gisb-arb-master-names = "ssp_0", "cpu_0", "pci_0",
+						     "bsp_0", "rdc_0", "rptd_0",
+						     "avd_0", "jtag_0";
+		};
+
+		upg_irq0_intc: upg_irq0_intc@406780 {
+			compatible = "brcm,bcm7120-l2-intc";
+			reg = <0x406780 0x8>;
+
+			brcm,int-map-mask = <0x44>;
+			brcm,int-fwd-mask = <0x70000>;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&periph_intc>;
+			interrupts = <18>;
+		};
+
+		sun_top_ctrl: syscon@404000 {
+			compatible = "brcm,bcm7125-sun-top-ctrl", "syscon";
+			reg = <0x404000 0x60c>;
+			little-endian;
+		};
+
+		reboot {
+			compatible = "brcm,bcm7038-reboot";
+			syscon = <&sun_top_ctrl 0x8 0x14>;
+		};
+
+		uart0: serial@406b00 {
+			compatible = "ns16550a";
+			reg = <0x406b00 0x20>;
+			reg-io-width = <0x4>;
+			reg-shift = <0x2>;
+			native-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <21>;
+			clocks = <&uart_clk>;
+			status = "disabled";
+		};
+
+		ehci0: usb@488300 {
+			compatible = "brcm,bcm7125-ehci", "generic-ehci";
+			reg = <0x488300 0x100>;
+			native-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <60>;
+			status = "disabled";
+		};
+
+		ohci0: usb@488400 {
+			compatible = "brcm,bcm7125-ohci", "generic-ohci";
+			reg = <0x488400 0x100>;
+			native-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <61>;
+			status = "disabled";
+		};
+	};
+};
diff --git a/arch/mips/boot/dts/brcm/bcm7346.dtsi b/arch/mips/boot/dts/brcm/bcm7346.dtsi
new file mode 100644
index 000000000000..1f30728a3177
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/bcm7346.dtsi
@@ -0,0 +1,224 @@
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "brcm,bcm7346";
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		mips-hpt-frequency = <163125000>;
+
+		cpu@0 {
+			compatible = "brcm,bmips5000";
+			device_type = "cpu";
+			reg = <0>;
+		};
+
+		cpu@1 {
+			compatible = "brcm,bmips5000";
+			device_type = "cpu";
+			reg = <1>;
+		};
+	};
+
+	aliases {
+		uart0 = &uart0;
+	};
+
+	cpu_intc: cpu_intc {
+		#address-cells = <0>;
+		compatible = "mti,cpu-interrupt-controller";
+
+		interrupt-controller;
+		#interrupt-cells = <1>;
+	};
+
+	clocks {
+		uart_clk: uart_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <81000000>;
+		};
+	};
+
+	rdb {
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		compatible = "simple-bus";
+		ranges = <0 0x10000000 0x01000000>;
+
+		periph_intc: periph_intc@411400 {
+			compatible = "brcm,bcm7038-l1-intc";
+			reg = <0x411400 0x30>, <0x411600 0x30>;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&cpu_intc>;
+			interrupts = <2>, <3>;
+		};
+
+		sun_l2_intc: sun_l2_intc@403000 {
+			compatible = "brcm,l2-intc";
+			reg = <0x403000 0x30>;
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <51>;
+		};
+
+		gisb-arb@400000 {
+			compatible = "brcm,bcm7400-gisb-arb";
+			reg = <0x400000 0xdc>;
+			native-endian;
+			interrupt-parent = <&sun_l2_intc>;
+			interrupts = <0>, <2>;
+			brcm,gisb-arb-master-mask = <0x673>;
+			brcm,gisb-arb-master-names = "ssp_0", "cpu_0", "bsp_0",
+						     "rdc_0", "raaga_0",
+						     "jtag_0", "svd_0";
+		};
+
+		upg_irq0_intc: upg_irq0_intc@406780 {
+			compatible = "brcm,bcm7120-l2-intc";
+			reg = <0x406780 0x8>;
+
+			brcm,int-map-mask = <0x44>;
+			brcm,int-fwd-mask = <0x70000>;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&periph_intc>;
+			interrupts = <59>;
+		};
+
+		sun_top_ctrl: syscon@404000 {
+			compatible = "brcm,bcm7346-sun-top-ctrl", "syscon";
+			reg = <0x404000 0x51c>;
+			little-endian;
+		};
+
+		reboot {
+			compatible = "brcm,brcmstb-reboot";
+			syscon = <&sun_top_ctrl 0x304 0x308>;
+		};
+
+		uart0: serial@406900 {
+			compatible = "ns16550a";
+			reg = <0x406900 0x20>;
+			reg-io-width = <0x4>;
+			reg-shift = <0x2>;
+			native-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <64>;
+			clocks = <&uart_clk>;
+			status = "disabled";
+		};
+
+		enet0: ethernet@430000 {
+			phy-mode = "internal";
+			phy-handle = <&phy1>;
+			mac-address = [ 00 10 18 36 23 1a ];
+			compatible = "brcm,genet-v2";
+			#address-cells = <0x1>;
+			#size-cells = <0x1>;
+			reg = <0x430000 0x4c8c>;
+			interrupts = <24>, <25>;
+			interrupt-parent = <&periph_intc>;
+			status = "disabled";
+
+			mdio@e14 {
+				compatible = "brcm,genet-mdio-v2";
+				#address-cells = <0x1>;
+				#size-cells = <0x0>;
+				reg = <0xe14 0x8>;
+
+				phy1: ethernet-phy@1 {
+					max-speed = <100>;
+					reg = <0x1>;
+					compatible = "brcm,40nm-ephy",
+						"ethernet-phy-ieee802.3-c22";
+				};
+			};
+		};
+
+		ehci0: usb@480300 {
+			compatible = "brcm,bcm7346-ehci", "generic-ehci";
+			reg = <0x480300 0x100>;
+			native-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <68>;
+			status = "disabled";
+		};
+
+		ohci0: usb@480400 {
+			compatible = "brcm,bcm7346-ohci", "generic-ohci";
+			reg = <0x480400 0x100>;
+			native-endian;
+			no-big-frame-no;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <70>;
+			status = "disabled";
+		};
+
+		ehci1: usb@480500 {
+			compatible = "brcm,bcm7346-ehci", "generic-ehci";
+			reg = <0x480500 0x100>;
+			native-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <69>;
+			status = "disabled";
+		};
+
+		ohci1: usb@480600 {
+			compatible = "brcm,bcm7346-ohci", "generic-ohci";
+			reg = <0x480600 0x100>;
+			native-endian;
+			no-big-frame-no;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <71>;
+			status = "disabled";
+		};
+
+		ehci2: usb@490300 {
+			compatible = "brcm,bcm7346-ehci", "generic-ehci";
+			reg = <0x490300 0x100>;
+			native-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <73>;
+			status = "disabled";
+		};
+
+		ohci2: usb@490400 {
+			compatible = "brcm,bcm7346-ohci", "generic-ohci";
+			reg = <0x490400 0x100>;
+			native-endian;
+			no-big-frame-no;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <75>;
+			status = "disabled";
+		};
+
+		ehci3: usb@490500 {
+			compatible = "brcm,bcm7346-ehci", "generic-ehci";
+			reg = <0x490500 0x100>;
+			native-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <74>;
+			status = "disabled";
+		};
+
+		ohci3: usb@490600 {
+			compatible = "brcm,bcm7346-ohci", "generic-ohci";
+			reg = <0x490600 0x100>;
+			native-endian;
+			no-big-frame-no;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <76>;
+			status = "disabled";
+		};
+	};
+};
diff --git a/arch/mips/boot/dts/brcm/bcm7358.dtsi b/arch/mips/boot/dts/brcm/bcm7358.dtsi
new file mode 100644
index 000000000000..2c2aa9368f76
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/bcm7358.dtsi
@@ -0,0 +1,161 @@
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "brcm,bcm7358";
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		mips-hpt-frequency = <375000000>;
+
+		cpu@0 {
+			compatible = "brcm,bmips3300";
+			device_type = "cpu";
+			reg = <0>;
+		};
+	};
+
+	aliases {
+		uart0 = &uart0;
+	};
+
+	cpu_intc: cpu_intc {
+		#address-cells = <0>;
+		compatible = "mti,cpu-interrupt-controller";
+
+		interrupt-controller;
+		#interrupt-cells = <1>;
+	};
+
+	clocks {
+		uart_clk: uart_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <81000000>;
+		};
+	};
+
+	rdb {
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		compatible = "simple-bus";
+		ranges = <0 0x10000000 0x01000000>;
+
+		periph_intc: periph_intc@411400 {
+			compatible = "brcm,bcm7038-l1-intc";
+			reg = <0x411400 0x30>;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&cpu_intc>;
+			interrupts = <2>;
+		};
+
+		sun_l2_intc: sun_l2_intc@403000 {
+			compatible = "brcm,l2-intc";
+			reg = <0x403000 0x30>;
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <48>;
+		};
+
+		gisb-arb@400000 {
+			compatible = "brcm,bcm7400-gisb-arb";
+			reg = <0x400000 0xdc>;
+			native-endian;
+			interrupt-parent = <&sun_l2_intc>;
+			interrupts = <0>, <2>;
+			brcm,gisb-arb-master-mask = <0x2f3>;
+			brcm,gisb-arb-master-names = "ssp_0", "cpu_0", "bsp_0",
+						     "rdc_0", "raaga_0",
+						     "avd_0", "jtag_0";
+		};
+
+		upg_irq0_intc: upg_irq0_intc@406600 {
+			compatible = "brcm,bcm7120-l2-intc";
+			reg = <0x406600 0x8>;
+
+			brcm,int-map-mask = <0x44>;
+			brcm,int-fwd-mask = <0x70000>;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&periph_intc>;
+			interrupts = <56>;
+		};
+
+		sun_top_ctrl: syscon@404000 {
+			compatible = "brcm,bcm7358-sun-top-ctrl", "syscon";
+			reg = <0x404000 0x51c>;
+			little-endian;
+		};
+
+		reboot {
+			compatible = "brcm,brcmstb-reboot";
+			syscon = <&sun_top_ctrl 0x304 0x308>;
+		};
+
+		uart0: serial@406800 {
+			compatible = "ns16550a";
+			reg = <0x406800 0x20>;
+			reg-io-width = <0x4>;
+			reg-shift = <0x2>;
+			native-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <61>;
+			clocks = <&uart_clk>;
+			status = "disabled";
+		};
+
+		enet0: ethernet@430000 {
+			phy-mode = "internal";
+			phy-handle = <&phy1>;
+			mac-address = [ 00 10 18 36 23 1a ];
+			compatible = "brcm,genet-v2";
+			#address-cells = <0x1>;
+			#size-cells = <0x1>;
+			reg = <0x430000 0x4c8c>;
+			interrupts = <24>, <25>;
+			interrupt-parent = <&periph_intc>;
+			status = "disabled";
+
+			mdio@e14 {
+				compatible = "brcm,genet-mdio-v2";
+				#address-cells = <0x1>;
+				#size-cells = <0x0>;
+				reg = <0xe14 0x8>;
+
+				phy1: ethernet-phy@1 {
+					max-speed = <100>;
+					reg = <0x1>;
+					compatible = "brcm,40nm-ephy",
+						"ethernet-phy-ieee802.3-c22";
+				};
+			};
+		};
+
+		ehci0: usb@480300 {
+			compatible = "brcm,bcm7358-ehci", "generic-ehci";
+			reg = <0x480300 0x100>;
+			native-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <65>;
+			status = "disabled";
+		};
+
+		ohci0: usb@480400 {
+			compatible = "brcm,bcm7358-ohci", "generic-ohci";
+			reg = <0x480400 0x100>;
+			native-endian;
+			no-big-frame-no;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <66>;
+			status = "disabled";
+		};
+	};
+};
diff --git a/arch/mips/boot/dts/brcm/bcm7360.dtsi b/arch/mips/boot/dts/brcm/bcm7360.dtsi
new file mode 100644
index 000000000000..f23b0aed276f
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/bcm7360.dtsi
@@ -0,0 +1,161 @@
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "brcm,bcm7360";
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		mips-hpt-frequency = <375000000>;
+
+		cpu@0 {
+			compatible = "brcm,bmips3300";
+			device_type = "cpu";
+			reg = <0>;
+		};
+	};
+
+	aliases {
+		uart0 = &uart0;
+	};
+
+	cpu_intc: cpu_intc {
+		#address-cells = <0>;
+		compatible = "mti,cpu-interrupt-controller";
+
+		interrupt-controller;
+		#interrupt-cells = <1>;
+	};
+
+	clocks {
+		uart_clk: uart_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <81000000>;
+		};
+	};
+
+	rdb {
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		compatible = "simple-bus";
+		ranges = <0 0x10000000 0x01000000>;
+
+		periph_intc: periph_intc@411400 {
+			compatible = "brcm,bcm7038-l1-intc";
+			reg = <0x411400 0x30>;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&cpu_intc>;
+			interrupts = <2>;
+		};
+
+		sun_l2_intc: sun_l2_intc@403000 {
+			compatible = "brcm,l2-intc";
+			reg = <0x403000 0x30>;
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <48>;
+		};
+
+		gisb-arb@400000 {
+			compatible = "brcm,bcm7400-gisb-arb";
+			reg = <0x400000 0xdc>;
+			native-endian;
+			interrupt-parent = <&sun_l2_intc>;
+			interrupts = <0>, <2>;
+			brcm,gisb-arb-master-mask = <0x2f3>;
+			brcm,gisb-arb-master-names = "ssp_0", "cpu_0", "bsp_0",
+						     "rdc_0", "raaga_0",
+						     "avd_0", "jtag_0";
+		};
+
+		upg_irq0_intc: upg_irq0_intc@406600 {
+			compatible = "brcm,bcm7120-l2-intc";
+			reg = <0x406600 0x8>;
+
+			brcm,int-map-mask = <0x44>;
+			brcm,int-fwd-mask = <0x70000>;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&periph_intc>;
+			interrupts = <56>;
+		};
+
+		sun_top_ctrl: syscon@404000 {
+			compatible = "brcm,bcm7360-sun-top-ctrl", "syscon";
+			reg = <0x404000 0x51c>;
+			little-endian;
+		};
+
+		reboot {
+			compatible = "brcm,brcmstb-reboot";
+			syscon = <&sun_top_ctrl 0x304 0x308>;
+		};
+
+		uart0: serial@406800 {
+			compatible = "ns16550a";
+			reg = <0x406800 0x20>;
+			reg-io-width = <0x4>;
+			reg-shift = <0x2>;
+			native-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <61>;
+			clocks = <&uart_clk>;
+			status = "disabled";
+		};
+
+		enet0: ethernet@430000 {
+			phy-mode = "internal";
+			phy-handle = <&phy1>;
+			mac-address = [ 00 10 18 36 23 1a ];
+			compatible = "brcm,genet-v2";
+			#address-cells = <0x1>;
+			#size-cells = <0x1>;
+			reg = <0x430000 0x4c8c>;
+			interrupts = <24>, <25>;
+			interrupt-parent = <&periph_intc>;
+			status = "disabled";
+
+			mdio@e14 {
+				compatible = "brcm,genet-mdio-v2";
+				#address-cells = <0x1>;
+				#size-cells = <0x0>;
+				reg = <0xe14 0x8>;
+
+				phy1: ethernet-phy@1 {
+					max-speed = <100>;
+					reg = <0x1>;
+					compatible = "brcm,40nm-ephy",
+						"ethernet-phy-ieee802.3-c22";
+				};
+			};
+		};
+
+		ehci0: usb@480300 {
+			compatible = "brcm,bcm7360-ehci", "generic-ehci";
+			reg = <0x480300 0x100>;
+			native-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <65>;
+			status = "disabled";
+		};
+
+		ohci0: usb@480400 {
+			compatible = "brcm,bcm7360-ohci", "generic-ohci";
+			reg = <0x480400 0x100>;
+			native-endian;
+			no-big-frame-no;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <66>;
+			status = "disabled";
+		};
+	};
+};
diff --git a/arch/mips/boot/dts/brcm/bcm7362.dtsi b/arch/mips/boot/dts/brcm/bcm7362.dtsi
new file mode 100644
index 000000000000..da99db665bbc
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/bcm7362.dtsi
@@ -0,0 +1,167 @@
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "brcm,bcm7362";
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		mips-hpt-frequency = <375000000>;
+
+		cpu@0 {
+			compatible = "brcm,bmips4380";
+			device_type = "cpu";
+			reg = <0>;
+		};
+
+		cpu@1 {
+			compatible = "brcm,bmips4380";
+			device_type = "cpu";
+			reg = <1>;
+		};
+	};
+
+	aliases {
+		uart0 = &uart0;
+	};
+
+	cpu_intc: cpu_intc {
+		#address-cells = <0>;
+		compatible = "mti,cpu-interrupt-controller";
+
+		interrupt-controller;
+		#interrupt-cells = <1>;
+	};
+
+	clocks {
+		uart_clk: uart_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <81000000>;
+		};
+	};
+
+	rdb {
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		compatible = "simple-bus";
+		ranges = <0 0x10000000 0x01000000>;
+
+		periph_intc: periph_intc@411400 {
+			compatible = "brcm,bcm7038-l1-intc";
+			reg = <0x411400 0x30>, <0x411600 0x30>;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&cpu_intc>;
+			interrupts = <2>, <3>;
+		};
+
+		sun_l2_intc: sun_l2_intc@403000 {
+			compatible = "brcm,l2-intc";
+			reg = <0x403000 0x30>;
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <48>;
+		};
+
+		gisb-arb@400000 {
+			compatible = "brcm,bcm7400-gisb-arb";
+			reg = <0x400000 0xdc>;
+			native-endian;
+			interrupt-parent = <&sun_l2_intc>;
+			interrupts = <0>, <2>;
+			brcm,gisb-arb-master-mask = <0x2f3>;
+			brcm,gisb-arb-master-names = "ssp_0", "cpu_0", "bsp_0",
+						     "rdc_0", "raaga_0",
+						     "avd_0", "jtag_0";
+		};
+
+		upg_irq0_intc: upg_irq0_intc@406600 {
+			compatible = "brcm,bcm7120-l2-intc";
+			reg = <0x406600 0x8>;
+
+			brcm,int-map-mask = <0x44>;
+			brcm,int-fwd-mask = <0x70000>;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&periph_intc>;
+			interrupts = <56>;
+		};
+
+		sun_top_ctrl: syscon@404000 {
+			compatible = "brcm,bcm7362-sun-top-ctrl", "syscon";
+			reg = <0x404000 0x51c>;
+			little-endian;
+		};
+
+		reboot {
+			compatible = "brcm,brcmstb-reboot";
+			syscon = <&sun_top_ctrl 0x304 0x308>;
+		};
+
+		uart0: serial@406800 {
+			compatible = "ns16550a";
+			reg = <0x406800 0x20>;
+			reg-io-width = <0x4>;
+			reg-shift = <0x2>;
+			native-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <61>;
+			clocks = <&uart_clk>;
+			status = "disabled";
+		};
+
+		enet0: ethernet@430000 {
+			phy-mode = "internal";
+			phy-handle = <&phy1>;
+			mac-address = [ 00 10 18 36 23 1a ];
+			compatible = "brcm,genet-v2";
+			#address-cells = <0x1>;
+			#size-cells = <0x1>;
+			reg = <0x430000 0x4c8c>;
+			interrupts = <24>, <25>;
+			interrupt-parent = <&periph_intc>;
+			status = "disabled";
+
+			mdio@e14 {
+				compatible = "brcm,genet-mdio-v2";
+				#address-cells = <0x1>;
+				#size-cells = <0x0>;
+				reg = <0xe14 0x8>;
+
+				phy1: ethernet-phy@1 {
+					max-speed = <100>;
+					reg = <0x1>;
+					compatible = "brcm,40nm-ephy",
+						"ethernet-phy-ieee802.3-c22";
+				};
+			};
+		};
+
+		ehci0: usb@480300 {
+			compatible = "brcm,bcm7362-ehci", "generic-ehci";
+			reg = <0x480300 0x100>;
+			native-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <65>;
+			status = "disabled";
+		};
+
+		ohci0: usb@480400 {
+			compatible = "brcm,bcm7362-ohci", "generic-ohci";
+			reg = <0x480400 0x100>;
+			native-endian;
+			no-big-frame-no;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <66>;
+			status = "disabled";
+		};
+	};
+};
diff --git a/arch/mips/boot/dts/brcm/bcm7420.dtsi b/arch/mips/boot/dts/brcm/bcm7420.dtsi
new file mode 100644
index 000000000000..5f55d0a50a28
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/bcm7420.dtsi
@@ -0,0 +1,184 @@
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "brcm,bcm7420";
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		mips-hpt-frequency = <93750000>;
+
+		cpu@0 {
+			compatible = "brcm,bmips5000";
+			device_type = "cpu";
+			reg = <0>;
+		};
+
+		cpu@1 {
+			compatible = "brcm,bmips5000";
+			device_type = "cpu";
+			reg = <1>;
+		};
+	};
+
+	aliases {
+		uart0 = &uart0;
+	};
+
+	cpu_intc: cpu_intc {
+		#address-cells = <0>;
+		compatible = "mti,cpu-interrupt-controller";
+
+		interrupt-controller;
+		#interrupt-cells = <1>;
+	};
+
+	clocks {
+		uart_clk: uart_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <81000000>;
+		};
+	};
+
+	rdb {
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		compatible = "simple-bus";
+		ranges = <0 0x10000000 0x01000000>;
+
+		periph_intc: periph_intc@441400 {
+			compatible = "brcm,bcm7038-l1-intc";
+			reg = <0x441400 0x30>, <0x441600 0x30>;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&cpu_intc>;
+			interrupts = <2>, <3>;
+		};
+
+		sun_l2_intc: sun_l2_intc@401800 {
+			compatible = "brcm,l2-intc";
+			reg = <0x401800 0x30>;
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <23>;
+		};
+
+		gisb-arb@400000 {
+			compatible = "brcm,bcm7400-gisb-arb";
+			reg = <0x400000 0xdc>;
+			native-endian;
+			interrupt-parent = <&sun_l2_intc>;
+			interrupts = <0>, <2>;
+			brcm,gisb-arb-master-mask = <0x3ff>;
+			brcm,gisb-arb-master-names = "ssp_0", "cpu_0", "pci_0",
+						     "pcie_0", "bsp_0", "rdc_0",
+						     "rptd_0", "avd_0", "avd_1",
+						     "jtag_0";
+		};
+
+		upg_irq0_intc: upg_irq0_intc@406780 {
+			compatible = "brcm,bcm7120-l2-intc";
+			reg = <0x406780 0x8>;
+
+			brcm,int-map-mask = <0x44>;
+			brcm,int-fwd-mask = <0x70000>;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&periph_intc>;
+			interrupts = <18>;
+		};
+
+		sun_top_ctrl: syscon@404000 {
+			compatible = "brcm,bcm7420-sun-top-ctrl", "syscon";
+			reg = <0x404000 0x60c>;
+			little-endian;
+		};
+
+		reboot {
+			compatible = "brcm,bcm7038-reboot";
+			syscon = <&sun_top_ctrl 0x8 0x14>;
+		};
+
+		uart0: serial@406b00 {
+			compatible = "ns16550a";
+			reg = <0x406b00 0x20>;
+			reg-io-width = <0x4>;
+			reg-shift = <0x2>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <21>;
+			clocks = <&uart_clk>;
+			status = "disabled";
+		};
+
+		enet0: ethernet@468000 {
+			phy-mode = "internal";
+			phy-handle = <&phy1>;
+			mac-address = [ 00 10 18 36 23 1a ];
+			compatible = "brcm,genet-v1";
+			#address-cells = <0x1>;
+			#size-cells = <0x1>;
+			reg = <0x468000 0x3c8c>;
+			interrupts = <69>, <79>;
+			interrupt-parent = <&periph_intc>;
+			status = "disabled";
+
+			mdio@e14 {
+				compatible = "brcm,genet-mdio-v1";
+				#address-cells = <0x1>;
+				#size-cells = <0x0>;
+				reg = <0xe14 0x8>;
+
+				phy1: ethernet-phy@1 {
+					max-speed = <100>;
+					reg = <0x1>;
+					compatible = "brcm,65nm-ephy",
+						"ethernet-phy-ieee802.3-c22";
+				};
+			};
+		};
+
+		ehci0: usb@488300 {
+			compatible = "brcm,bcm7420-ehci", "generic-ehci";
+			reg = <0x488300 0x100>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <60>;
+			status = "disabled";
+		};
+
+		ohci0: usb@488400 {
+			compatible = "brcm,bcm7420-ohci", "generic-ohci";
+			reg = <0x488400 0x100>;
+			native-endian;
+			no-big-frame-no;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <61>;
+			status = "disabled";
+		};
+
+		ehci1: usb@488500 {
+			compatible = "brcm,bcm7420-ehci", "generic-ehci";
+			reg = <0x488500 0x100>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <55>;
+			status = "disabled";
+		};
+
+		ohci1: usb@488600 {
+			compatible = "brcm,bcm7420-ohci", "generic-ohci";
+			reg = <0x488600 0x100>;
+			native-endian;
+			no-big-frame-no;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <62>;
+			status = "disabled";
+		};
+	};
+};
diff --git a/arch/mips/boot/dts/brcm/bcm7425.dtsi b/arch/mips/boot/dts/brcm/bcm7425.dtsi
new file mode 100644
index 000000000000..5b660b617ead
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/bcm7425.dtsi
@@ -0,0 +1,225 @@
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "brcm,bcm7425";
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		mips-hpt-frequency = <163125000>;
+
+		cpu@0 {
+			compatible = "brcm,bmips5000";
+			device_type = "cpu";
+			reg = <0>;
+		};
+
+		cpu@1 {
+			compatible = "brcm,bmips5000";
+			device_type = "cpu";
+			reg = <1>;
+		};
+	};
+
+	aliases {
+		uart0 = &uart0;
+	};
+
+	cpu_intc: cpu_intc {
+		#address-cells = <0>;
+		compatible = "mti,cpu-interrupt-controller";
+
+		interrupt-controller;
+		#interrupt-cells = <1>;
+	};
+
+	clocks {
+		uart_clk: uart_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <81000000>;
+		};
+	};
+
+	rdb {
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		compatible = "simple-bus";
+		ranges = <0 0x10000000 0x01000000>;
+
+		periph_intc: periph_intc@41a400 {
+			compatible = "brcm,bcm7038-l1-intc";
+			reg = <0x41a400 0x30>, <0x41a600 0x30>;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&cpu_intc>;
+			interrupts = <2>, <3>;
+		};
+
+		sun_l2_intc: sun_l2_intc@403000 {
+			compatible = "brcm,l2-intc";
+			reg = <0x403000 0x30>;
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <47>;
+		};
+
+		gisb-arb@400000 {
+			compatible = "brcm,bcm7400-gisb-arb";
+			reg = <0x400000 0xdc>;
+			native-endian;
+			interrupt-parent = <&sun_l2_intc>;
+			interrupts = <0>, <2>;
+			brcm,gisb-arb-master-mask = <0x177b>;
+			brcm,gisb-arb-master-names = "ssp_0", "cpu_0", "pcie_0",
+						     "bsp_0", "rdc_0",
+						     "raaga_0", "avd_1",
+						     "jtag_0", "svd_0",
+						     "vice_0";
+		};
+
+		upg_irq0_intc: upg_irq0_intc@406780 {
+			compatible = "brcm,bcm7120-l2-intc";
+			reg = <0x406780 0x8>;
+
+			brcm,int-map-mask = <0x44>;
+			brcm,int-fwd-mask = <0x70000>;
+
+			interrupt-controller;
+			#interrupt-cells = <1>;
+
+			interrupt-parent = <&periph_intc>;
+			interrupts = <55>;
+		};
+
+		sun_top_ctrl: syscon@404000 {
+			compatible = "brcm,bcm7425-sun-top-ctrl", "syscon";
+			reg = <0x404000 0x51c>;
+			little-endian;
+		};
+
+		reboot {
+			compatible = "brcm,brcmstb-reboot";
+			syscon = <&sun_top_ctrl 0x304 0x308>;
+		};
+
+		uart0: serial@406b00 {
+			compatible = "ns16550a";
+			reg = <0x406b00 0x20>;
+			reg-io-width = <0x4>;
+			reg-shift = <0x2>;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <61>;
+			clocks = <&uart_clk>;
+			status = "disabled";
+		};
+
+		enet0: ethernet@b80000 {
+			phy-mode = "internal";
+			phy-handle = <&phy1>;
+			mac-address = [ 00 10 18 36 23 1a ];
+			compatible = "brcm,genet-v3";
+			#address-cells = <0x1>;
+			#size-cells = <0x1>;
+			reg = <0xb80000 0x11c88>;
+			interrupts = <17>, <18>;
+			interrupt-parent = <&periph_intc>;
+			status = "disabled";
+
+			mdio@e14 {
+				compatible = "brcm,genet-mdio-v3";
+				#address-cells = <0x1>;
+				#size-cells = <0x0>;
+				reg = <0xe14 0x8>;
+
+				phy1: ethernet-phy@1 {
+					max-speed = <100>;
+					reg = <0x1>;
+					compatible = "brcm,40nm-ephy",
+						"ethernet-phy-ieee802.3-c22";
+				};
+			};
+		};
+
+		ehci0: usb@480300 {
+			compatible = "brcm,bcm7425-ehci", "generic-ehci";
+			reg = <0x480300 0x100>;
+			native-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <65>;
+			status = "disabled";
+		};
+
+		ohci0: usb@480400 {
+			compatible = "brcm,bcm7425-ohci", "generic-ohci";
+			reg = <0x480400 0x100>;
+			native-endian;
+			no-big-frame-no;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <67>;
+			status = "disabled";
+		};
+
+		ehci1: usb@480500 {
+			compatible = "brcm,bcm7425-ehci", "generic-ehci";
+			reg = <0x480500 0x100>;
+			native-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <66>;
+			status = "disabled";
+		};
+
+		ohci1: usb@480600 {
+			compatible = "brcm,bcm7425-ohci", "generic-ohci";
+			reg = <0x480600 0x100>;
+			native-endian;
+			no-big-frame-no;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <68>;
+			status = "disabled";
+		};
+
+		ehci2: usb@490300 {
+			compatible = "brcm,bcm7425-ehci", "generic-ehci";
+			reg = <0x490300 0x100>;
+			native-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <70>;
+			status = "disabled";
+		};
+
+		ohci2: usb@490400 {
+			compatible = "brcm,bcm7425-ohci", "generic-ohci";
+			reg = <0x490400 0x100>;
+			native-endian;
+			no-big-frame-no;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <72>;
+			status = "disabled";
+		};
+
+		ehci3: usb@490500 {
+			compatible = "brcm,bcm7425-ehci", "generic-ehci";
+			reg = <0x490500 0x100>;
+			native-endian;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <71>;
+			status = "disabled";
+		};
+
+		ohci3: usb@490600 {
+			compatible = "brcm,bcm7425-ohci", "generic-ohci";
+			reg = <0x490600 0x100>;
+			native-endian;
+			no-big-frame-no;
+			interrupt-parent = <&periph_intc>;
+			interrupts = <73>;
+			status = "disabled";
+		};
+	};
+};
diff --git a/arch/mips/boot/dts/bcm93384wvg.dts b/arch/mips/boot/dts/brcm/bcm93384wvg.dts
index 831741179212..d1e44a17d41a 100644
--- a/arch/mips/boot/dts/bcm93384wvg.dts
+++ b/arch/mips/boot/dts/brcm/bcm93384wvg.dts
@@ -1,6 +1,6 @@
 /dts-v1/;
 
-/include/ "bcm3384.dtsi"
+/include/ "bcm3384_zephyr.dtsi"
 
 / {
 	compatible = "brcm,bcm93384wvg", "brcm,bcm3384";
@@ -10,13 +10,6 @@
 		bootargs = "console=ttyS0,115200";
 		stdout-path = &uart0;
 	};
-
-	memory@0 {
-		device_type = "memory";
-		reg = <0x0 0x04000000>;
-		dma-xor-mask = <0x08000000>;
-		dma-xor-limit = <0x0fffffff>;
-	};
 };
 
 &uart0 {
diff --git a/arch/mips/boot/dts/brcm/bcm93384wvg_viper.dts b/arch/mips/boot/dts/brcm/bcm93384wvg_viper.dts
new file mode 100644
index 000000000000..1ecb2696aca8
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/bcm93384wvg_viper.dts
@@ -0,0 +1,25 @@
+/dts-v1/;
+
+/include/ "bcm3384_viper.dtsi"
+
+/ {
+	compatible = "brcm,bcm93384wvg-viper", "brcm,bcm3384-viper";
+	model = "Broadcom BCM93384WVG-viper";
+
+	chosen {
+		bootargs = "console=ttyS0,115200";
+		stdout-path = &uart0;
+	};
+};
+
+&uart0 {
+	status = "okay";
+};
+
+&ehci0 {
+	status = "okay";
+};
+
+&ohci0 {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm96368mvwg.dts b/arch/mips/boot/dts/brcm/bcm96368mvwg.dts
new file mode 100644
index 000000000000..0e890c28fe5c
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/bcm96368mvwg.dts
@@ -0,0 +1,31 @@
+/dts-v1/;
+
+/include/ "bcm6368.dtsi"
+
+/ {
+	compatible = "brcm,bcm96368mvwg", "brcm,bcm6368";
+	model = "Broadcom BCM96368MVWG";
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x00000000 0x04000000>;
+	};
+
+	chosen {
+		bootargs = "console=ttyS0,115200";
+		stdout-path = &uart0;
+	};
+};
+
+&uart0 {
+	status = "okay";
+};
+
+/* FIXME: need to set up USB_CTRL registers first */
+&ehci0 {
+	status = "disabled";
+};
+
+&ohci0 {
+	status = "disabled";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97125cbmb.dts b/arch/mips/boot/dts/brcm/bcm97125cbmb.dts
new file mode 100644
index 000000000000..e046b1109eab
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/bcm97125cbmb.dts
@@ -0,0 +1,31 @@
+/dts-v1/;
+
+/include/ "bcm7125.dtsi"
+
+/ {
+	compatible = "brcm,bcm97125cbmb", "brcm,bcm7125";
+	model = "Broadcom BCM97125CBMB";
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x00000000 0x10000000>;
+	};
+
+	chosen {
+		bootargs = "console=ttyS0,115200";
+		stdout-path = &uart0;
+	};
+};
+
+&uart0 {
+	status = "okay";
+};
+
+/* FIXME: USB is wonky; disable it for now */
+&ehci0 {
+	status = "disabled";
+};
+
+&ohci0 {
+	status = "disabled";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97346dbsmb.dts b/arch/mips/boot/dts/brcm/bcm97346dbsmb.dts
new file mode 100644
index 000000000000..70f196d89d26
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/bcm97346dbsmb.dts
@@ -0,0 +1,58 @@
+/dts-v1/;
+
+/include/ "bcm7346.dtsi"
+
+/ {
+	compatible = "brcm,bcm97346dbsmb", "brcm,bcm7346";
+	model = "Broadcom BCM97346DBSMB";
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x00000000 0x10000000>, <0x20000000 0x30000000>;
+	};
+
+	chosen {
+		bootargs = "console=ttyS0,115200";
+		stdout-path = &uart0;
+	};
+};
+
+&uart0 {
+	status = "okay";
+};
+
+&enet0 {
+	status = "okay";
+};
+
+&ehci0 {
+	status = "okay";
+};
+
+&ohci0 {
+	status = "okay";
+};
+
+&ehci1 {
+	status = "okay";
+};
+
+&ohci1 {
+	status = "okay";
+};
+
+&ehci2 {
+	status = "okay";
+};
+
+&ohci2 {
+	status = "okay";
+};
+
+&ehci3 {
+	status = "okay";
+};
+
+&ohci3 {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97358svmb.dts b/arch/mips/boot/dts/brcm/bcm97358svmb.dts
new file mode 100644
index 000000000000..d18e6d947739
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/bcm97358svmb.dts
@@ -0,0 +1,34 @@
+/dts-v1/;
+
+/include/ "bcm7358.dtsi"
+
+/ {
+	compatible = "brcm,bcm97358svmb", "brcm,bcm7358";
+	model = "Broadcom BCM97358SVMB";
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x00000000 0x10000000>;
+	};
+
+	chosen {
+		bootargs = "console=ttyS0,115200";
+		stdout-path = &uart0;
+	};
+};
+
+&uart0 {
+	status = "okay";
+};
+
+&enet0 {
+	status = "okay";
+};
+
+&ehci0 {
+	status = "okay";
+};
+
+&ohci0 {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97360svmb.dts b/arch/mips/boot/dts/brcm/bcm97360svmb.dts
new file mode 100644
index 000000000000..4fe515500102
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/bcm97360svmb.dts
@@ -0,0 +1,34 @@
+/dts-v1/;
+
+/include/ "bcm7360.dtsi"
+
+/ {
+	compatible = "brcm,bcm97360svmb", "brcm,bcm7360";
+	model = "Broadcom BCM97360SVMB";
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x00000000 0x10000000>;
+	};
+
+	chosen {
+		bootargs = "console=ttyS0,115200";
+		stdout-path = &uart0;
+	};
+};
+
+&uart0 {
+	status = "okay";
+};
+
+&enet0 {
+	status = "okay";
+};
+
+&ehci0 {
+	status = "okay";
+};
+
+&ohci0 {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97362svmb.dts b/arch/mips/boot/dts/brcm/bcm97362svmb.dts
new file mode 100644
index 000000000000..b7b88e5dc9e7
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/bcm97362svmb.dts
@@ -0,0 +1,34 @@
+/dts-v1/;
+
+/include/ "bcm7362.dtsi"
+
+/ {
+	compatible = "brcm,bcm97362svmb", "brcm,bcm7362";
+	model = "Broadcom BCM97362SVMB";
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x00000000 0x10000000>, <0x20000000 0x30000000>;
+	};
+
+	chosen {
+		bootargs = "console=ttyS0,115200";
+		stdout-path = &uart0;
+	};
+};
+
+&uart0 {
+	status = "okay";
+};
+
+&enet0 {
+	status = "okay";
+};
+
+&ehci0 {
+	status = "okay";
+};
+
+&ohci0 {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97420c.dts b/arch/mips/boot/dts/brcm/bcm97420c.dts
new file mode 100644
index 000000000000..67fe1f3a3891
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/bcm97420c.dts
@@ -0,0 +1,45 @@
+/dts-v1/;
+
+/include/ "bcm7420.dtsi"
+
+/ {
+	compatible = "brcm,bcm97420c", "brcm,bcm7420";
+	model = "Broadcom BCM97420C";
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x00000000 0x10000000>,
+		      <0x20000000 0x30000000>,
+		      <0x60000000 0x10000000>;
+	};
+
+	chosen {
+		bootargs = "console=ttyS0,115200";
+		stdout-path = &uart0;
+	};
+};
+
+&uart0 {
+	status = "okay";
+};
+
+/* FIXME: MAC driver comes up but cannot attach to PHY */
+&enet0 {
+	status = "disabled";
+};
+
+&ehci0 {
+	status = "okay";
+};
+
+&ohci0 {
+	status = "okay";
+};
+
+&ehci1 {
+	status = "okay";
+};
+
+&ohci1 {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97425svmb.dts b/arch/mips/boot/dts/brcm/bcm97425svmb.dts
new file mode 100644
index 000000000000..689c68a4f9c8
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/bcm97425svmb.dts
@@ -0,0 +1,60 @@
+/dts-v1/;
+
+/include/ "bcm7425.dtsi"
+
+/ {
+	compatible = "brcm,bcm97425svmb", "brcm,bcm7425";
+	model = "Broadcom BCM97425SVMB";
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x00000000 0x10000000>,
+		      <0x20000000 0x30000000>,
+		      <0x90000000 0x40000000>;
+	};
+
+	chosen {
+		bootargs = "console=ttyS0,115200";
+		stdout-path = &uart0;
+	};
+};
+
+&uart0 {
+	status = "okay";
+};
+
+&enet0 {
+	status = "okay";
+};
+
+&ehci0 {
+	status = "okay";
+};
+
+&ohci0 {
+	status = "okay";
+};
+
+&ehci1 {
+	status = "okay";
+};
+
+&ohci1 {
+	status = "okay";
+};
+
+&ehci2 {
+	status = "okay";
+};
+
+&ohci2 {
+	status = "okay";
+};
+
+&ehci3 {
+	status = "okay";
+};
+
+&ohci3 {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm9ejtagprb.dts b/arch/mips/boot/dts/brcm/bcm9ejtagprb.dts
new file mode 100644
index 000000000000..1da4608680aa
--- /dev/null
+++ b/arch/mips/boot/dts/brcm/bcm9ejtagprb.dts
@@ -0,0 +1,22 @@
+/dts-v1/;
+
+/include/ "bcm6328.dtsi"
+
+/ {
+	compatible = "brcm,bcm9ejtagprb", "brcm,bcm6328";
+	model = "Broadcom BCM9EJTAGPRB";
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x00000000 0x08000000>;
+	};
+
+	chosen {
+		bootargs = "console=ttyS0,115200";
+		stdout-path = &uart0;
+	};
+};
+
+&uart0 {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/cavium-octeon/Makefile b/arch/mips/boot/dts/cavium-octeon/Makefile
new file mode 100644
index 000000000000..5b99c40a058f
--- /dev/null
+++ b/arch/mips/boot/dts/cavium-octeon/Makefile
@@ -0,0 +1,9 @@
+dtb-$(CONFIG_CAVIUM_OCTEON_SOC)	+= octeon_3xxx.dtb octeon_68xx.dtb
+
+obj-y				+= $(patsubst %.dtb, %.dtb.o, $(dtb-y))
+
+# Force kbuild to make empty built-in.o if necessary
+obj-				+= dummy.o
+
+always				:= $(dtb-y)
+clean-files			:= *.dtb *.dtb.S
diff --git a/arch/mips/boot/dts/octeon_3xxx.dts b/arch/mips/boot/dts/cavium-octeon/octeon_3xxx.dts
index fa33115bde33..9c48e0586ba7 100644
--- a/arch/mips/boot/dts/octeon_3xxx.dts
+++ b/arch/mips/boot/dts/cavium-octeon/octeon_3xxx.dts
@@ -587,4 +587,16 @@
 		usbn = &usbn;
 		led0 = &led0;
 	};
+
+	dsr1000n-leds {
+		compatible = "gpio-leds";
+		usb1 {
+			label = "usb1";
+			gpios = <&gpio 9 1>; /* Active low */
+		};
+		usb2 {
+			label = "usb2";
+			gpios = <&gpio 10 1>; /* Active low */
+		};
+	};
  };
diff --git a/arch/mips/boot/dts/octeon_68xx.dts b/arch/mips/boot/dts/cavium-octeon/octeon_68xx.dts
index 79b46fcb0a11..79b46fcb0a11 100644
--- a/arch/mips/boot/dts/octeon_68xx.dts
+++ b/arch/mips/boot/dts/cavium-octeon/octeon_68xx.dts
diff --git a/arch/mips/boot/dts/lantiq/Makefile b/arch/mips/boot/dts/lantiq/Makefile
new file mode 100644
index 000000000000..0906c62141b9
--- /dev/null
+++ b/arch/mips/boot/dts/lantiq/Makefile
@@ -0,0 +1,9 @@
+dtb-$(CONFIG_DT_EASY50712)	+= easy50712.dtb
+
+obj-y				+= $(patsubst %.dtb, %.dtb.o, $(dtb-y))
+
+# Force kbuild to make empty built-in.o if necessary
+obj-				+= dummy.o
+
+always				:= $(dtb-y)
+clean-files			:= *.dtb *.dtb.S
diff --git a/arch/mips/boot/dts/danube.dtsi b/arch/mips/boot/dts/lantiq/danube.dtsi
index d4c59e003708..d4c59e003708 100644
--- a/arch/mips/boot/dts/danube.dtsi
+++ b/arch/mips/boot/dts/lantiq/danube.dtsi
diff --git a/arch/mips/boot/dts/easy50712.dts b/arch/mips/boot/dts/lantiq/easy50712.dts
index 143b8a37b5e4..143b8a37b5e4 100644
--- a/arch/mips/boot/dts/easy50712.dts
+++ b/arch/mips/boot/dts/lantiq/easy50712.dts
diff --git a/arch/mips/boot/dts/mti/Makefile b/arch/mips/boot/dts/mti/Makefile
new file mode 100644
index 000000000000..ef1f3dbed033
--- /dev/null
+++ b/arch/mips/boot/dts/mti/Makefile
@@ -0,0 +1,9 @@
+dtb-$(CONFIG_MIPS_SEAD3)	+= sead3.dtb
+
+obj-y				+= $(patsubst %.dtb, %.dtb.o, $(dtb-y))
+
+# Force kbuild to make empty built-in.o if necessary
+obj-				+= dummy.o
+
+always				:= $(dtb-y)
+clean-files			:= *.dtb *.dtb.S
diff --git a/arch/mips/boot/dts/sead3.dts b/arch/mips/boot/dts/mti/sead3.dts
index e4b317d414f1..e4b317d414f1 100644
--- a/arch/mips/boot/dts/sead3.dts
+++ b/arch/mips/boot/dts/mti/sead3.dts
diff --git a/arch/mips/boot/dts/netlogic/Makefile b/arch/mips/boot/dts/netlogic/Makefile
new file mode 100644
index 000000000000..9868057140b5
--- /dev/null
+++ b/arch/mips/boot/dts/netlogic/Makefile
@@ -0,0 +1,13 @@
+dtb-$(CONFIG_DT_XLP_EVP)	+= xlp_evp.dtb
+dtb-$(CONFIG_DT_XLP_SVP)	+= xlp_svp.dtb
+dtb-$(CONFIG_DT_XLP_FVP)	+= xlp_fvp.dtb
+dtb-$(CONFIG_DT_XLP_GVP)	+= xlp_gvp.dtb
+dtb-$(CONFIG_DT_XLP_RVP)	+= xlp_rvp.dtb
+
+obj-y				+= $(patsubst %.dtb, %.dtb.o, $(dtb-y))
+
+# Force kbuild to make empty built-in.o if necessary
+obj-				+= dummy.o
+
+always				:= $(dtb-y)
+clean-files			:= *.dtb *.dtb.S
diff --git a/arch/mips/boot/dts/xlp_evp.dts b/arch/mips/boot/dts/netlogic/xlp_evp.dts
index 89ad04808c02..89ad04808c02 100644
--- a/arch/mips/boot/dts/xlp_evp.dts
+++ b/arch/mips/boot/dts/netlogic/xlp_evp.dts
diff --git a/arch/mips/boot/dts/xlp_fvp.dts b/arch/mips/boot/dts/netlogic/xlp_fvp.dts
index 63e62b7bd758..63e62b7bd758 100644
--- a/arch/mips/boot/dts/xlp_fvp.dts
+++ b/arch/mips/boot/dts/netlogic/xlp_fvp.dts
diff --git a/arch/mips/boot/dts/xlp_gvp.dts b/arch/mips/boot/dts/netlogic/xlp_gvp.dts
index bb4ecd1d47fc..bb4ecd1d47fc 100644
--- a/arch/mips/boot/dts/xlp_gvp.dts
+++ b/arch/mips/boot/dts/netlogic/xlp_gvp.dts
diff --git a/arch/mips/boot/dts/netlogic/xlp_rvp.dts b/arch/mips/boot/dts/netlogic/xlp_rvp.dts
new file mode 100644
index 000000000000..7188aed2ea2e
--- /dev/null
+++ b/arch/mips/boot/dts/netlogic/xlp_rvp.dts
@@ -0,0 +1,77 @@
+/*
+ * XLP5XX Device Tree Source for RVP boards
+ */
+
+/dts-v1/;
+/ {
+	model = "netlogic,XLP-RVP";
+	compatible = "netlogic,xlp";
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	soc {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "simple-bus";
+		ranges = <0 0  0 0x18000000  0x04000000   // PCIe CFG
+			  1 0  0 0x16000000  0x02000000>; // GBU chipselects
+
+		serial0: serial@30000 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0 0x112100 0xa00>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+			clock-frequency = <125000000>;
+			interrupt-parent = <&pic>;
+			interrupts = <17>;
+		};
+		pic: pic@110000 {
+			compatible = "netlogic,xlp-pic";
+			#address-cells = <0>;
+			#interrupt-cells = <1>;
+			reg = <0 0x110000 0x200>;
+			interrupt-controller;
+		};
+
+		nor_flash@1,0 {
+			compatible = "cfi-flash";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			bank-width = <2>;
+			reg = <1 0 0x1000000>;
+
+			partition@0 {
+				label = "x-loader";
+				reg = <0x0 0x100000>; /* 1M */
+				read-only;
+			};
+
+			partition@100000 {
+				label = "u-boot";
+				reg = <0x100000 0x100000>; /* 1M */
+			};
+
+			partition@200000 {
+				label = "kernel";
+				reg = <0x200000 0x500000>; /* 5M */
+			};
+
+			partition@700000 {
+				label = "rootfs";
+				reg = <0x700000 0x800000>; /* 8M */
+			};
+
+			partition@f00000 {
+				label = "env";
+				reg = <0xf00000 0x100000>; /* 1M */
+				read-only;
+			};
+		};
+
+	};
+
+	chosen {
+		bootargs = "console=ttyS0,115200 rdinit=/sbin/init";
+	};
+};
diff --git a/arch/mips/boot/dts/xlp_svp.dts b/arch/mips/boot/dts/netlogic/xlp_svp.dts
index 1ebd00edaacc..1ebd00edaacc 100644
--- a/arch/mips/boot/dts/xlp_svp.dts
+++ b/arch/mips/boot/dts/netlogic/xlp_svp.dts
diff --git a/arch/mips/boot/dts/ralink/Makefile b/arch/mips/boot/dts/ralink/Makefile
new file mode 100644
index 000000000000..2a7225954bf6
--- /dev/null
+++ b/arch/mips/boot/dts/ralink/Makefile
@@ -0,0 +1,12 @@
+dtb-$(CONFIG_DTB_RT2880_EVAL)	+= rt2880_eval.dtb
+dtb-$(CONFIG_DTB_RT305X_EVAL)	+= rt3052_eval.dtb
+dtb-$(CONFIG_DTB_RT3883_EVAL)	+= rt3883_eval.dtb
+dtb-$(CONFIG_DTB_MT7620A_EVAL)	+= mt7620a_eval.dtb
+
+obj-y				+= $(patsubst %.dtb, %.dtb.o, $(dtb-y))
+
+# Force kbuild to make empty built-in.o if necessary
+obj-				+= dummy.o
+
+always				:= $(dtb-y)
+clean-files			:= *.dtb *.dtb.S
diff --git a/arch/mips/boot/dts/mt7620a.dtsi b/arch/mips/boot/dts/ralink/mt7620a.dtsi
index 08bf24fefe9f..08bf24fefe9f 100644
--- a/arch/mips/boot/dts/mt7620a.dtsi
+++ b/arch/mips/boot/dts/ralink/mt7620a.dtsi
diff --git a/arch/mips/boot/dts/mt7620a_eval.dts b/arch/mips/boot/dts/ralink/mt7620a_eval.dts
index 709f58132f5c..709f58132f5c 100644
--- a/arch/mips/boot/dts/mt7620a_eval.dts
+++ b/arch/mips/boot/dts/ralink/mt7620a_eval.dts
diff --git a/arch/mips/boot/dts/rt2880.dtsi b/arch/mips/boot/dts/ralink/rt2880.dtsi
index 182afde2f2e1..182afde2f2e1 100644
--- a/arch/mips/boot/dts/rt2880.dtsi
+++ b/arch/mips/boot/dts/ralink/rt2880.dtsi
diff --git a/arch/mips/boot/dts/rt2880_eval.dts b/arch/mips/boot/dts/ralink/rt2880_eval.dts
index 0a685db093d4..0a685db093d4 100644
--- a/arch/mips/boot/dts/rt2880_eval.dts
+++ b/arch/mips/boot/dts/ralink/rt2880_eval.dts
diff --git a/arch/mips/boot/dts/rt3050.dtsi b/arch/mips/boot/dts/ralink/rt3050.dtsi
index e3203d414fee..e3203d414fee 100644
--- a/arch/mips/boot/dts/rt3050.dtsi
+++ b/arch/mips/boot/dts/ralink/rt3050.dtsi
diff --git a/arch/mips/boot/dts/rt3052_eval.dts b/arch/mips/boot/dts/ralink/rt3052_eval.dts
index ec9e9a035541..ec9e9a035541 100644
--- a/arch/mips/boot/dts/rt3052_eval.dts
+++ b/arch/mips/boot/dts/ralink/rt3052_eval.dts
diff --git a/arch/mips/boot/dts/rt3883.dtsi b/arch/mips/boot/dts/ralink/rt3883.dtsi
index 3b131dd0d5ac..3b131dd0d5ac 100644
--- a/arch/mips/boot/dts/rt3883.dtsi
+++ b/arch/mips/boot/dts/ralink/rt3883.dtsi
diff --git a/arch/mips/boot/dts/rt3883_eval.dts b/arch/mips/boot/dts/ralink/rt3883_eval.dts
index e8df21a5d10d..e8df21a5d10d 100644
--- a/arch/mips/boot/dts/rt3883_eval.dts
+++ b/arch/mips/boot/dts/ralink/rt3883_eval.dts
diff --git a/arch/mips/cavium-octeon/crypto/Makefile b/arch/mips/cavium-octeon/crypto/Makefile
index a74f76d85a2f..f7aa9d5d3b87 100644
--- a/arch/mips/cavium-octeon/crypto/Makefile
+++ b/arch/mips/cavium-octeon/crypto/Makefile
@@ -4,4 +4,7 @@
 
 obj-y += octeon-crypto.o
 
-obj-$(CONFIG_CRYPTO_MD5_OCTEON) += octeon-md5.o
+obj-$(CONFIG_CRYPTO_MD5_OCTEON)		+= octeon-md5.o
+obj-$(CONFIG_CRYPTO_SHA1_OCTEON)	+= octeon-sha1.o
+obj-$(CONFIG_CRYPTO_SHA256_OCTEON)	+= octeon-sha256.o
+obj-$(CONFIG_CRYPTO_SHA512_OCTEON)	+= octeon-sha512.o
diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.c b/arch/mips/cavium-octeon/crypto/octeon-crypto.c
index 7c82ff463b65..f66bd1adc7ff 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-crypto.c
+++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.c
@@ -17,7 +17,7 @@
  * crypto operations in calls to octeon_crypto_enable/disable in order to make
  * sure the state of COP2 isn't corrupted if userspace is also performing
  * hardware crypto operations. Allocate the state parameter on the stack.
- * Preemption must be disabled to prevent context switches.
+ * Returns with preemption disabled.
  *
  * @state: Pointer to state structure to store current COP2 state in.
  *
@@ -28,6 +28,7 @@ unsigned long octeon_crypto_enable(struct octeon_cop2_state *state)
 	int status;
 	unsigned long flags;
 
+	preempt_disable();
 	local_irq_save(flags);
 	status = read_c0_status();
 	write_c0_status(status | ST0_CU2);
@@ -62,5 +63,6 @@ void octeon_crypto_disable(struct octeon_cop2_state *state,
 	else
 		write_c0_status(read_c0_status() & ~ST0_CU2);
 	local_irq_restore(flags);
+	preempt_enable();
 }
 EXPORT_SYMBOL_GPL(octeon_crypto_disable);
diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.h b/arch/mips/cavium-octeon/crypto/octeon-crypto.h
index e2a4aece9c24..7315cc307397 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-crypto.h
+++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.h
@@ -5,7 +5,8 @@
  *
  * Copyright (C) 2012-2013 Cavium Inc., All Rights Reserved.
  *
- * MD5 instruction definitions added by Aaro Koskinen <aaro.koskinen@iki.fi>.
+ * MD5/SHA1/SHA256/SHA512 instruction definitions added by
+ * Aaro Koskinen <aaro.koskinen@iki.fi>.
  *
  */
 #ifndef __LINUX_OCTEON_CRYPTO_H
@@ -21,22 +22,22 @@ extern void octeon_crypto_disable(struct octeon_cop2_state *state,
 				  unsigned long flags);
 
 /*
- * Macros needed to implement MD5:
+ * Macros needed to implement MD5/SHA1/SHA256:
  */
 
 /*
- * The index can be 0-1.
+ * The index can be 0-1 (MD5) or 0-2 (SHA1), 0-3 (SHA256).
  */
 #define write_octeon_64bit_hash_dword(value, index)	\
 do {							\
 	__asm__ __volatile__ (				\
 	"dmtc2 %[rt],0x0048+" STR(index)		\
 	:						\
-	: [rt] "d" (value));				\
+	: [rt] "d" (cpu_to_be64(value)));		\
 } while (0)
 
 /*
- * The index can be 0-1.
+ * The index can be 0-1 (MD5) or 0-2 (SHA1), 0-3 (SHA256).
  */
 #define read_octeon_64bit_hash_dword(index)		\
 ({							\
@@ -47,7 +48,7 @@ do {							\
 	: [rt] "=d" (__value)				\
 	: );						\
 							\
-	__value;					\
+	be64_to_cpu(__value);				\
 })
 
 /*
@@ -58,7 +59,7 @@ do {							\
 	__asm__ __volatile__ (				\
 	"dmtc2 %[rt],0x0040+" STR(index)		\
 	:						\
-	: [rt] "d" (value));				\
+	: [rt] "d" (cpu_to_be64(value)));		\
 } while (0)
 
 /*
@@ -69,6 +70,154 @@ do {							\
 	__asm__ __volatile__ (				\
 	"dmtc2 %[rt],0x4047"				\
 	:						\
+	: [rt] "d" (cpu_to_be64(value)));		\
+} while (0)
+
+/*
+ * The value is the final block dword (64-bit).
+ */
+#define octeon_sha1_start(value)			\
+do {							\
+	__asm__ __volatile__ (				\
+	"dmtc2 %[rt],0x4057"				\
+	:						\
+	: [rt] "d" (value));				\
+} while (0)
+
+/*
+ * The value is the final block dword (64-bit).
+ */
+#define octeon_sha256_start(value)			\
+do {							\
+	__asm__ __volatile__ (				\
+	"dmtc2 %[rt],0x404f"				\
+	:						\
+	: [rt] "d" (value));				\
+} while (0)
+
+/*
+ * Macros needed to implement SHA512:
+ */
+
+/*
+ * The index can be 0-7.
+ */
+#define write_octeon_64bit_hash_sha512(value, index)	\
+do {							\
+	__asm__ __volatile__ (				\
+	"dmtc2 %[rt],0x0250+" STR(index)		\
+	:						\
+	: [rt] "d" (value));				\
+} while (0)
+
+/*
+ * The index can be 0-7.
+ */
+#define read_octeon_64bit_hash_sha512(index)		\
+({							\
+	u64 __value;					\
+							\
+	__asm__ __volatile__ (				\
+	"dmfc2 %[rt],0x0250+" STR(index)		\
+	: [rt] "=d" (__value)				\
+	: );						\
+							\
+	__value;					\
+})
+
+/*
+ * The index can be 0-14.
+ */
+#define write_octeon_64bit_block_sha512(value, index)	\
+do {							\
+	__asm__ __volatile__ (				\
+	"dmtc2 %[rt],0x0240+" STR(index)		\
+	:						\
+	: [rt] "d" (value));				\
+} while (0)
+
+/*
+ * The value is the final block word (64-bit).
+ */
+#define octeon_sha512_start(value)			\
+do {							\
+	__asm__ __volatile__ (				\
+	"dmtc2 %[rt],0x424f"				\
+	:						\
+	: [rt] "d" (value));				\
+} while (0)
+
+/*
+ * The value is the final block dword (64-bit).
+ */
+#define octeon_sha1_start(value)			\
+do {							\
+	__asm__ __volatile__ (				\
+	"dmtc2 %[rt],0x4057"				\
+	:						\
+	: [rt] "d" (value));				\
+} while (0)
+
+/*
+ * The value is the final block dword (64-bit).
+ */
+#define octeon_sha256_start(value)			\
+do {							\
+	__asm__ __volatile__ (				\
+	"dmtc2 %[rt],0x404f"				\
+	:						\
+	: [rt] "d" (value));				\
+} while (0)
+
+/*
+ * Macros needed to implement SHA512:
+ */
+
+/*
+ * The index can be 0-7.
+ */
+#define write_octeon_64bit_hash_sha512(value, index)	\
+do {							\
+	__asm__ __volatile__ (				\
+	"dmtc2 %[rt],0x0250+" STR(index)		\
+	:						\
+	: [rt] "d" (value));				\
+} while (0)
+
+/*
+ * The index can be 0-7.
+ */
+#define read_octeon_64bit_hash_sha512(index)		\
+({							\
+	u64 __value;					\
+							\
+	__asm__ __volatile__ (				\
+	"dmfc2 %[rt],0x0250+" STR(index)		\
+	: [rt] "=d" (__value)				\
+	: );						\
+							\
+	__value;					\
+})
+
+/*
+ * The index can be 0-14.
+ */
+#define write_octeon_64bit_block_sha512(value, index)	\
+do {							\
+	__asm__ __volatile__ (				\
+	"dmtc2 %[rt],0x0240+" STR(index)		\
+	:						\
+	: [rt] "d" (value));				\
+} while (0)
+
+/*
+ * The value is the final block word (64-bit).
+ */
+#define octeon_sha512_start(value)			\
+do {							\
+	__asm__ __volatile__ (				\
+	"dmtc2 %[rt],0x424f"				\
+	:						\
 	: [rt] "d" (value));				\
 } while (0)
 
diff --git a/arch/mips/cavium-octeon/crypto/octeon-md5.c b/arch/mips/cavium-octeon/crypto/octeon-md5.c
index b909881ba6c1..12dccdb38286 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-md5.c
+++ b/arch/mips/cavium-octeon/crypto/octeon-md5.c
@@ -97,8 +97,6 @@ static int octeon_md5_update(struct shash_desc *desc, const u8 *data,
 	memcpy((char *)mctx->block + (sizeof(mctx->block) - avail), data,
 	       avail);
 
-	local_bh_disable();
-	preempt_disable();
 	flags = octeon_crypto_enable(&state);
 	octeon_md5_store_hash(mctx);
 
@@ -114,8 +112,6 @@ static int octeon_md5_update(struct shash_desc *desc, const u8 *data,
 
 	octeon_md5_read_hash(mctx);
 	octeon_crypto_disable(&state, flags);
-	preempt_enable();
-	local_bh_enable();
 
 	memcpy(mctx->block, data, len);
 
@@ -133,8 +129,6 @@ static int octeon_md5_final(struct shash_desc *desc, u8 *out)
 
 	*p++ = 0x80;
 
-	local_bh_disable();
-	preempt_disable();
 	flags = octeon_crypto_enable(&state);
 	octeon_md5_store_hash(mctx);
 
@@ -152,8 +146,6 @@ static int octeon_md5_final(struct shash_desc *desc, u8 *out)
 
 	octeon_md5_read_hash(mctx);
 	octeon_crypto_disable(&state, flags);
-	preempt_enable();
-	local_bh_enable();
 
 	memcpy(out, mctx->hash, sizeof(mctx->hash));
 	memset(mctx, 0, sizeof(*mctx));
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha1.c b/arch/mips/cavium-octeon/crypto/octeon-sha1.c
new file mode 100644
index 000000000000..2b74b5b67cae
--- /dev/null
+++ b/arch/mips/cavium-octeon/crypto/octeon-sha1.c
@@ -0,0 +1,241 @@
+/*
+ * Cryptographic API.
+ *
+ * SHA1 Secure Hash Algorithm.
+ *
+ * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>.
+ *
+ * Based on crypto/sha1_generic.c, which is:
+ *
+ * Copyright (c) Alan Smithee.
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/mm.h>
+#include <crypto/sha.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <asm/byteorder.h>
+#include <asm/octeon/octeon.h>
+#include <crypto/internal/hash.h>
+
+#include "octeon-crypto.h"
+
+/*
+ * We pass everything as 64-bit. OCTEON can handle misaligned data.
+ */
+
+static void octeon_sha1_store_hash(struct sha1_state *sctx)
+{
+	u64 *hash = (u64 *)sctx->state;
+	union {
+		u32 word[2];
+		u64 dword;
+	} hash_tail = { { sctx->state[4], } };
+
+	write_octeon_64bit_hash_dword(hash[0], 0);
+	write_octeon_64bit_hash_dword(hash[1], 1);
+	write_octeon_64bit_hash_dword(hash_tail.dword, 2);
+	memzero_explicit(&hash_tail.word[0], sizeof(hash_tail.word[0]));
+}
+
+static void octeon_sha1_read_hash(struct sha1_state *sctx)
+{
+	u64 *hash = (u64 *)sctx->state;
+	union {
+		u32 word[2];
+		u64 dword;
+	} hash_tail;
+
+	hash[0]		= read_octeon_64bit_hash_dword(0);
+	hash[1]		= read_octeon_64bit_hash_dword(1);
+	hash_tail.dword	= read_octeon_64bit_hash_dword(2);
+	sctx->state[4]	= hash_tail.word[0];
+	memzero_explicit(&hash_tail.dword, sizeof(hash_tail.dword));
+}
+
+static void octeon_sha1_transform(const void *_block)
+{
+	const u64 *block = _block;
+
+	write_octeon_64bit_block_dword(block[0], 0);
+	write_octeon_64bit_block_dword(block[1], 1);
+	write_octeon_64bit_block_dword(block[2], 2);
+	write_octeon_64bit_block_dword(block[3], 3);
+	write_octeon_64bit_block_dword(block[4], 4);
+	write_octeon_64bit_block_dword(block[5], 5);
+	write_octeon_64bit_block_dword(block[6], 6);
+	octeon_sha1_start(block[7]);
+}
+
+static int octeon_sha1_init(struct shash_desc *desc)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	sctx->state[0] = SHA1_H0;
+	sctx->state[1] = SHA1_H1;
+	sctx->state[2] = SHA1_H2;
+	sctx->state[3] = SHA1_H3;
+	sctx->state[4] = SHA1_H4;
+	sctx->count = 0;
+
+	return 0;
+}
+
+static void __octeon_sha1_update(struct sha1_state *sctx, const u8 *data,
+				 unsigned int len)
+{
+	unsigned int partial;
+	unsigned int done;
+	const u8 *src;
+
+	partial = sctx->count % SHA1_BLOCK_SIZE;
+	sctx->count += len;
+	done = 0;
+	src = data;
+
+	if ((partial + len) >= SHA1_BLOCK_SIZE) {
+		if (partial) {
+			done = -partial;
+			memcpy(sctx->buffer + partial, data,
+			       done + SHA1_BLOCK_SIZE);
+			src = sctx->buffer;
+		}
+
+		do {
+			octeon_sha1_transform(src);
+			done += SHA1_BLOCK_SIZE;
+			src = data + done;
+		} while (done + SHA1_BLOCK_SIZE <= len);
+
+		partial = 0;
+	}
+	memcpy(sctx->buffer + partial, src, len - done);
+}
+
+static int octeon_sha1_update(struct shash_desc *desc, const u8 *data,
+			unsigned int len)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	struct octeon_cop2_state state;
+	unsigned long flags;
+
+	/*
+	 * Small updates never reach the crypto engine, so the generic sha1 is
+	 * faster because of the heavyweight octeon_crypto_enable() /
+	 * octeon_crypto_disable().
+	 */
+	if ((sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
+		return crypto_sha1_update(desc, data, len);
+
+	flags = octeon_crypto_enable(&state);
+	octeon_sha1_store_hash(sctx);
+
+	__octeon_sha1_update(sctx, data, len);
+
+	octeon_sha1_read_hash(sctx);
+	octeon_crypto_disable(&state, flags);
+
+	return 0;
+}
+
+static int octeon_sha1_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	static const u8 padding[64] = { 0x80, };
+	struct octeon_cop2_state state;
+	__be32 *dst = (__be32 *)out;
+	unsigned int pad_len;
+	unsigned long flags;
+	unsigned int index;
+	__be64 bits;
+	int i;
+
+	/* Save number of bits. */
+	bits = cpu_to_be64(sctx->count << 3);
+
+	/* Pad out to 56 mod 64. */
+	index = sctx->count & 0x3f;
+	pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
+
+	flags = octeon_crypto_enable(&state);
+	octeon_sha1_store_hash(sctx);
+
+	__octeon_sha1_update(sctx, padding, pad_len);
+
+	/* Append length (before padding). */
+	__octeon_sha1_update(sctx, (const u8 *)&bits, sizeof(bits));
+
+	octeon_sha1_read_hash(sctx);
+	octeon_crypto_disable(&state, flags);
+
+	/* Store state in digest */
+	for (i = 0; i < 5; i++)
+		dst[i] = cpu_to_be32(sctx->state[i]);
+
+	/* Zeroize sensitive information. */
+	memset(sctx, 0, sizeof(*sctx));
+
+	return 0;
+}
+
+static int octeon_sha1_export(struct shash_desc *desc, void *out)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(out, sctx, sizeof(*sctx));
+	return 0;
+}
+
+static int octeon_sha1_import(struct shash_desc *desc, const void *in)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(sctx, in, sizeof(*sctx));
+	return 0;
+}
+
+static struct shash_alg octeon_sha1_alg = {
+	.digestsize	=	SHA1_DIGEST_SIZE,
+	.init		=	octeon_sha1_init,
+	.update		=	octeon_sha1_update,
+	.final		=	octeon_sha1_final,
+	.export		=	octeon_sha1_export,
+	.import		=	octeon_sha1_import,
+	.descsize	=	sizeof(struct sha1_state),
+	.statesize	=	sizeof(struct sha1_state),
+	.base		=	{
+		.cra_name	=	"sha1",
+		.cra_driver_name=	"octeon-sha1",
+		.cra_priority	=	OCTEON_CR_OPCODE_PRIORITY,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA1_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static int __init octeon_sha1_mod_init(void)
+{
+	if (!octeon_has_crypto())
+		return -ENOTSUPP;
+	return crypto_register_shash(&octeon_sha1_alg);
+}
+
+static void __exit octeon_sha1_mod_fini(void)
+{
+	crypto_unregister_shash(&octeon_sha1_alg);
+}
+
+module_init(octeon_sha1_mod_init);
+module_exit(octeon_sha1_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm (OCTEON)");
+MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha256.c b/arch/mips/cavium-octeon/crypto/octeon-sha256.c
new file mode 100644
index 000000000000..97e96fead08a
--- /dev/null
+++ b/arch/mips/cavium-octeon/crypto/octeon-sha256.c
@@ -0,0 +1,280 @@
+/*
+ * Cryptographic API.
+ *
+ * SHA-224 and SHA-256 Secure Hash Algorithm.
+ *
+ * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>.
+ *
+ * Based on crypto/sha256_generic.c, which is:
+ *
+ * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/mm.h>
+#include <crypto/sha.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <asm/byteorder.h>
+#include <asm/octeon/octeon.h>
+#include <crypto/internal/hash.h>
+
+#include "octeon-crypto.h"
+
+/*
+ * We pass everything as 64-bit. OCTEON can handle misaligned data.
+ */
+
+static void octeon_sha256_store_hash(struct sha256_state *sctx)
+{
+	u64 *hash = (u64 *)sctx->state;
+
+	write_octeon_64bit_hash_dword(hash[0], 0);
+	write_octeon_64bit_hash_dword(hash[1], 1);
+	write_octeon_64bit_hash_dword(hash[2], 2);
+	write_octeon_64bit_hash_dword(hash[3], 3);
+}
+
+static void octeon_sha256_read_hash(struct sha256_state *sctx)
+{
+	u64 *hash = (u64 *)sctx->state;
+
+	hash[0] = read_octeon_64bit_hash_dword(0);
+	hash[1] = read_octeon_64bit_hash_dword(1);
+	hash[2] = read_octeon_64bit_hash_dword(2);
+	hash[3] = read_octeon_64bit_hash_dword(3);
+}
+
+static void octeon_sha256_transform(const void *_block)
+{
+	const u64 *block = _block;
+
+	write_octeon_64bit_block_dword(block[0], 0);
+	write_octeon_64bit_block_dword(block[1], 1);
+	write_octeon_64bit_block_dword(block[2], 2);
+	write_octeon_64bit_block_dword(block[3], 3);
+	write_octeon_64bit_block_dword(block[4], 4);
+	write_octeon_64bit_block_dword(block[5], 5);
+	write_octeon_64bit_block_dword(block[6], 6);
+	octeon_sha256_start(block[7]);
+}
+
+static int octeon_sha224_init(struct shash_desc *desc)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	sctx->state[0] = SHA224_H0;
+	sctx->state[1] = SHA224_H1;
+	sctx->state[2] = SHA224_H2;
+	sctx->state[3] = SHA224_H3;
+	sctx->state[4] = SHA224_H4;
+	sctx->state[5] = SHA224_H5;
+	sctx->state[6] = SHA224_H6;
+	sctx->state[7] = SHA224_H7;
+	sctx->count = 0;
+
+	return 0;
+}
+
+static int octeon_sha256_init(struct shash_desc *desc)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	sctx->state[0] = SHA256_H0;
+	sctx->state[1] = SHA256_H1;
+	sctx->state[2] = SHA256_H2;
+	sctx->state[3] = SHA256_H3;
+	sctx->state[4] = SHA256_H4;
+	sctx->state[5] = SHA256_H5;
+	sctx->state[6] = SHA256_H6;
+	sctx->state[7] = SHA256_H7;
+	sctx->count = 0;
+
+	return 0;
+}
+
+static void __octeon_sha256_update(struct sha256_state *sctx, const u8 *data,
+				   unsigned int len)
+{
+	unsigned int partial;
+	unsigned int done;
+	const u8 *src;
+
+	partial = sctx->count % SHA256_BLOCK_SIZE;
+	sctx->count += len;
+	done = 0;
+	src = data;
+
+	if ((partial + len) >= SHA256_BLOCK_SIZE) {
+		if (partial) {
+			done = -partial;
+			memcpy(sctx->buf + partial, data,
+			       done + SHA256_BLOCK_SIZE);
+			src = sctx->buf;
+		}
+
+		do {
+			octeon_sha256_transform(src);
+			done += SHA256_BLOCK_SIZE;
+			src = data + done;
+		} while (done + SHA256_BLOCK_SIZE <= len);
+
+		partial = 0;
+	}
+	memcpy(sctx->buf + partial, src, len - done);
+}
+
+static int octeon_sha256_update(struct shash_desc *desc, const u8 *data,
+				unsigned int len)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	struct octeon_cop2_state state;
+	unsigned long flags;
+
+	/*
+	 * Small updates never reach the crypto engine, so the generic sha256 is
+	 * faster because of the heavyweight octeon_crypto_enable() /
+	 * octeon_crypto_disable().
+	 */
+	if ((sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
+		return crypto_sha256_update(desc, data, len);
+
+	flags = octeon_crypto_enable(&state);
+	octeon_sha256_store_hash(sctx);
+
+	__octeon_sha256_update(sctx, data, len);
+
+	octeon_sha256_read_hash(sctx);
+	octeon_crypto_disable(&state, flags);
+
+	return 0;
+}
+
+static int octeon_sha256_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	static const u8 padding[64] = { 0x80, };
+	struct octeon_cop2_state state;
+	__be32 *dst = (__be32 *)out;
+	unsigned int pad_len;
+	unsigned long flags;
+	unsigned int index;
+	__be64 bits;
+	int i;
+
+	/* Save number of bits. */
+	bits = cpu_to_be64(sctx->count << 3);
+
+	/* Pad out to 56 mod 64. */
+	index = sctx->count & 0x3f;
+	pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
+
+	flags = octeon_crypto_enable(&state);
+	octeon_sha256_store_hash(sctx);
+
+	__octeon_sha256_update(sctx, padding, pad_len);
+
+	/* Append length (before padding). */
+	__octeon_sha256_update(sctx, (const u8 *)&bits, sizeof(bits));
+
+	octeon_sha256_read_hash(sctx);
+	octeon_crypto_disable(&state, flags);
+
+	/* Store state in digest */
+	for (i = 0; i < 8; i++)
+		dst[i] = cpu_to_be32(sctx->state[i]);
+
+	/* Zeroize sensitive information. */
+	memset(sctx, 0, sizeof(*sctx));
+
+	return 0;
+}
+
+static int octeon_sha224_final(struct shash_desc *desc, u8 *hash)
+{
+	u8 D[SHA256_DIGEST_SIZE];
+
+	octeon_sha256_final(desc, D);
+
+	memcpy(hash, D, SHA224_DIGEST_SIZE);
+	memzero_explicit(D, SHA256_DIGEST_SIZE);
+
+	return 0;
+}
+
+static int octeon_sha256_export(struct shash_desc *desc, void *out)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(out, sctx, sizeof(*sctx));
+	return 0;
+}
+
+static int octeon_sha256_import(struct shash_desc *desc, const void *in)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(sctx, in, sizeof(*sctx));
+	return 0;
+}
+
+static struct shash_alg octeon_sha256_algs[2] = { {
+	.digestsize	=	SHA256_DIGEST_SIZE,
+	.init		=	octeon_sha256_init,
+	.update		=	octeon_sha256_update,
+	.final		=	octeon_sha256_final,
+	.export		=	octeon_sha256_export,
+	.import		=	octeon_sha256_import,
+	.descsize	=	sizeof(struct sha256_state),
+	.statesize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha256",
+		.cra_driver_name=	"octeon-sha256",
+		.cra_priority	=	OCTEON_CR_OPCODE_PRIORITY,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA256_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+}, {
+	.digestsize	=	SHA224_DIGEST_SIZE,
+	.init		=	octeon_sha224_init,
+	.update		=	octeon_sha256_update,
+	.final		=	octeon_sha224_final,
+	.descsize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha224",
+		.cra_driver_name=	"octeon-sha224",
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA224_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+} };
+
+static int __init octeon_sha256_mod_init(void)
+{
+	if (!octeon_has_crypto())
+		return -ENOTSUPP;
+	return crypto_register_shashes(octeon_sha256_algs,
+				       ARRAY_SIZE(octeon_sha256_algs));
+}
+
+static void __exit octeon_sha256_mod_fini(void)
+{
+	crypto_unregister_shashes(octeon_sha256_algs,
+				  ARRAY_SIZE(octeon_sha256_algs));
+}
+
+module_init(octeon_sha256_mod_init);
+module_exit(octeon_sha256_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm (OCTEON)");
+MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha512.c b/arch/mips/cavium-octeon/crypto/octeon-sha512.c
new file mode 100644
index 000000000000..d5fb3c6f22ae
--- /dev/null
+++ b/arch/mips/cavium-octeon/crypto/octeon-sha512.c
@@ -0,0 +1,277 @@
+/*
+ * Cryptographic API.
+ *
+ * SHA-512 and SHA-384 Secure Hash Algorithm.
+ *
+ * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>.
+ *
+ * Based on crypto/sha512_generic.c, which is:
+ *
+ * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) 2003 Kyle McMartin <kyle@debian.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ */
+
+#include <linux/mm.h>
+#include <crypto/sha.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <asm/byteorder.h>
+#include <asm/octeon/octeon.h>
+#include <crypto/internal/hash.h>
+
+#include "octeon-crypto.h"
+
+/*
+ * We pass everything as 64-bit. OCTEON can handle misaligned data.
+ */
+
+static void octeon_sha512_store_hash(struct sha512_state *sctx)
+{
+	write_octeon_64bit_hash_sha512(sctx->state[0], 0);
+	write_octeon_64bit_hash_sha512(sctx->state[1], 1);
+	write_octeon_64bit_hash_sha512(sctx->state[2], 2);
+	write_octeon_64bit_hash_sha512(sctx->state[3], 3);
+	write_octeon_64bit_hash_sha512(sctx->state[4], 4);
+	write_octeon_64bit_hash_sha512(sctx->state[5], 5);
+	write_octeon_64bit_hash_sha512(sctx->state[6], 6);
+	write_octeon_64bit_hash_sha512(sctx->state[7], 7);
+}
+
+static void octeon_sha512_read_hash(struct sha512_state *sctx)
+{
+	sctx->state[0] = read_octeon_64bit_hash_sha512(0);
+	sctx->state[1] = read_octeon_64bit_hash_sha512(1);
+	sctx->state[2] = read_octeon_64bit_hash_sha512(2);
+	sctx->state[3] = read_octeon_64bit_hash_sha512(3);
+	sctx->state[4] = read_octeon_64bit_hash_sha512(4);
+	sctx->state[5] = read_octeon_64bit_hash_sha512(5);
+	sctx->state[6] = read_octeon_64bit_hash_sha512(6);
+	sctx->state[7] = read_octeon_64bit_hash_sha512(7);
+}
+
+static void octeon_sha512_transform(const void *_block)
+{
+	const u64 *block = _block;
+
+	write_octeon_64bit_block_sha512(block[0], 0);
+	write_octeon_64bit_block_sha512(block[1], 1);
+	write_octeon_64bit_block_sha512(block[2], 2);
+	write_octeon_64bit_block_sha512(block[3], 3);
+	write_octeon_64bit_block_sha512(block[4], 4);
+	write_octeon_64bit_block_sha512(block[5], 5);
+	write_octeon_64bit_block_sha512(block[6], 6);
+	write_octeon_64bit_block_sha512(block[7], 7);
+	write_octeon_64bit_block_sha512(block[8], 8);
+	write_octeon_64bit_block_sha512(block[9], 9);
+	write_octeon_64bit_block_sha512(block[10], 10);
+	write_octeon_64bit_block_sha512(block[11], 11);
+	write_octeon_64bit_block_sha512(block[12], 12);
+	write_octeon_64bit_block_sha512(block[13], 13);
+	write_octeon_64bit_block_sha512(block[14], 14);
+	octeon_sha512_start(block[15]);
+}
+
+static int octeon_sha512_init(struct shash_desc *desc)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+
+	sctx->state[0] = SHA512_H0;
+	sctx->state[1] = SHA512_H1;
+	sctx->state[2] = SHA512_H2;
+	sctx->state[3] = SHA512_H3;
+	sctx->state[4] = SHA512_H4;
+	sctx->state[5] = SHA512_H5;
+	sctx->state[6] = SHA512_H6;
+	sctx->state[7] = SHA512_H7;
+	sctx->count[0] = sctx->count[1] = 0;
+
+	return 0;
+}
+
+static int octeon_sha384_init(struct shash_desc *desc)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+
+	sctx->state[0] = SHA384_H0;
+	sctx->state[1] = SHA384_H1;
+	sctx->state[2] = SHA384_H2;
+	sctx->state[3] = SHA384_H3;
+	sctx->state[4] = SHA384_H4;
+	sctx->state[5] = SHA384_H5;
+	sctx->state[6] = SHA384_H6;
+	sctx->state[7] = SHA384_H7;
+	sctx->count[0] = sctx->count[1] = 0;
+
+	return 0;
+}
+
+static void __octeon_sha512_update(struct sha512_state *sctx, const u8 *data,
+				   unsigned int len)
+{
+	unsigned int part_len;
+	unsigned int index;
+	unsigned int i;
+
+	/* Compute number of bytes mod 128. */
+	index = sctx->count[0] % SHA512_BLOCK_SIZE;
+
+	/* Update number of bytes. */
+	if ((sctx->count[0] += len) < len)
+		sctx->count[1]++;
+
+	part_len = SHA512_BLOCK_SIZE - index;
+
+	/* Transform as many times as possible. */
+	if (len >= part_len) {
+		memcpy(&sctx->buf[index], data, part_len);
+		octeon_sha512_transform(sctx->buf);
+
+		for (i = part_len; i + SHA512_BLOCK_SIZE <= len;
+			i += SHA512_BLOCK_SIZE)
+			octeon_sha512_transform(&data[i]);
+
+		index = 0;
+	} else {
+		i = 0;
+	}
+
+	/* Buffer remaining input. */
+	memcpy(&sctx->buf[index], &data[i], len - i);
+}
+
+static int octeon_sha512_update(struct shash_desc *desc, const u8 *data,
+				unsigned int len)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+	struct octeon_cop2_state state;
+	unsigned long flags;
+
+	/*
+	 * Small updates never reach the crypto engine, so the generic sha512 is
+	 * faster because of the heavyweight octeon_crypto_enable() /
+	 * octeon_crypto_disable().
+	 */
+	if ((sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE)
+		return crypto_sha512_update(desc, data, len);
+
+	flags = octeon_crypto_enable(&state);
+	octeon_sha512_store_hash(sctx);
+
+	__octeon_sha512_update(sctx, data, len);
+
+	octeon_sha512_read_hash(sctx);
+	octeon_crypto_disable(&state, flags);
+
+	return 0;
+}
+
+static int octeon_sha512_final(struct shash_desc *desc, u8 *hash)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+	static u8 padding[128] = { 0x80, };
+	struct octeon_cop2_state state;
+	__be64 *dst = (__be64 *)hash;
+	unsigned int pad_len;
+	unsigned long flags;
+	unsigned int index;
+	__be64 bits[2];
+	int i;
+
+	/* Save number of bits. */
+	bits[1] = cpu_to_be64(sctx->count[0] << 3);
+	bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
+
+	/* Pad out to 112 mod 128. */
+	index = sctx->count[0] & 0x7f;
+	pad_len = (index < 112) ? (112 - index) : ((128+112) - index);
+
+	flags = octeon_crypto_enable(&state);
+	octeon_sha512_store_hash(sctx);
+
+	__octeon_sha512_update(sctx, padding, pad_len);
+
+	/* Append length (before padding). */
+	__octeon_sha512_update(sctx, (const u8 *)bits, sizeof(bits));
+
+	octeon_sha512_read_hash(sctx);
+	octeon_crypto_disable(&state, flags);
+
+	/* Store state in digest. */
+	for (i = 0; i < 8; i++)
+		dst[i] = cpu_to_be64(sctx->state[i]);
+
+	/* Zeroize sensitive information. */
+	memset(sctx, 0, sizeof(struct sha512_state));
+
+	return 0;
+}
+
+static int octeon_sha384_final(struct shash_desc *desc, u8 *hash)
+{
+	u8 D[64];
+
+	octeon_sha512_final(desc, D);
+
+	memcpy(hash, D, 48);
+	memzero_explicit(D, 64);
+
+	return 0;
+}
+
+static struct shash_alg octeon_sha512_algs[2] = { {
+	.digestsize	=	SHA512_DIGEST_SIZE,
+	.init		=	octeon_sha512_init,
+	.update		=	octeon_sha512_update,
+	.final		=	octeon_sha512_final,
+	.descsize	=	sizeof(struct sha512_state),
+	.base		=	{
+		.cra_name	=	"sha512",
+		.cra_driver_name=	"octeon-sha512",
+		.cra_priority	=	OCTEON_CR_OPCODE_PRIORITY,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA512_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+}, {
+	.digestsize	=	SHA384_DIGEST_SIZE,
+	.init		=	octeon_sha384_init,
+	.update		=	octeon_sha512_update,
+	.final		=	octeon_sha384_final,
+	.descsize	=	sizeof(struct sha512_state),
+	.base		=	{
+		.cra_name	=	"sha384",
+		.cra_driver_name=	"octeon-sha384",
+		.cra_priority	=	OCTEON_CR_OPCODE_PRIORITY,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA384_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+} };
+
+static int __init octeon_sha512_mod_init(void)
+{
+	if (!octeon_has_crypto())
+		return -ENOTSUPP;
+	return crypto_register_shashes(octeon_sha512_algs,
+				       ARRAY_SIZE(octeon_sha512_algs));
+}
+
+static void __exit octeon_sha512_mod_fini(void)
+{
+	crypto_unregister_shashes(octeon_sha512_algs,
+				  ARRAY_SIZE(octeon_sha512_algs));
+}
+
+module_init(octeon_sha512_mod_init);
+module_exit(octeon_sha512_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA-512 and SHA-384 Secure Hash Algorithms (OCTEON)");
+MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");
diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c
index 7d8987818ccf..d8960d46417b 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -306,7 +306,7 @@ void __init plat_swiotlb_setup(void)
 		swiotlbsize = 64 * (1<<20);
 	}
 #endif
-#ifdef CONFIG_USB_OCTEON_OHCI
+#ifdef CONFIG_USB_OHCI_HCD_PLATFORM
 	/* OCTEON II ohci is only 32-bit. */
 	if (OCTEON_IS_OCTEON2() && max_addr >= 0x100000000ul)
 		swiotlbsize = 64 * (1<<20);
diff --git a/arch/mips/cavium-octeon/executive/cvmx-l2c.c b/arch/mips/cavium-octeon/executive/cvmx-l2c.c
index 42e38c30b540..89b5273299ab 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-l2c.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-l2c.c
@@ -519,44 +519,89 @@ int cvmx_l2c_unlock_mem_region(uint64_t start, uint64_t len)
 union __cvmx_l2c_tag {
 	uint64_t u64;
 	struct cvmx_l2c_tag_cn50xx {
+#ifdef __BIG_ENDIAN_BITFIELD
 		uint64_t reserved:40;
 		uint64_t V:1;		/* Line valid */
 		uint64_t D:1;		/* Line dirty */
 		uint64_t L:1;		/* Line locked */
 		uint64_t U:1;		/* Use, LRU eviction */
 		uint64_t addr:20;	/* Phys mem addr (33..14) */
+#else
+		uint64_t addr:20;	/* Phys mem addr (33..14) */
+		uint64_t U:1;		/* Use, LRU eviction */
+		uint64_t L:1;		/* Line locked */
+		uint64_t D:1;		/* Line dirty */
+		uint64_t V:1;		/* Line valid */
+		uint64_t reserved:40;
+#endif
 	} cn50xx;
 	struct cvmx_l2c_tag_cn30xx {
+#ifdef __BIG_ENDIAN_BITFIELD
 		uint64_t reserved:41;
 		uint64_t V:1;		/* Line valid */
 		uint64_t D:1;		/* Line dirty */
 		uint64_t L:1;		/* Line locked */
 		uint64_t U:1;		/* Use, LRU eviction */
 		uint64_t addr:19;	/* Phys mem addr (33..15) */
+#else
+		uint64_t addr:19;	/* Phys mem addr (33..15) */
+		uint64_t U:1;		/* Use, LRU eviction */
+		uint64_t L:1;		/* Line locked */
+		uint64_t D:1;		/* Line dirty */
+		uint64_t V:1;		/* Line valid */
+		uint64_t reserved:41;
+#endif
 	} cn30xx;
 	struct cvmx_l2c_tag_cn31xx {
+#ifdef __BIG_ENDIAN_BITFIELD
 		uint64_t reserved:42;
 		uint64_t V:1;		/* Line valid */
 		uint64_t D:1;		/* Line dirty */
 		uint64_t L:1;		/* Line locked */
 		uint64_t U:1;		/* Use, LRU eviction */
 		uint64_t addr:18;	/* Phys mem addr (33..16) */
+#else
+		uint64_t addr:18;	/* Phys mem addr (33..16) */
+		uint64_t U:1;		/* Use, LRU eviction */
+		uint64_t L:1;		/* Line locked */
+		uint64_t D:1;		/* Line dirty */
+		uint64_t V:1;		/* Line valid */
+		uint64_t reserved:42;
+#endif
 	} cn31xx;
 	struct cvmx_l2c_tag_cn38xx {
+#ifdef __BIG_ENDIAN_BITFIELD
 		uint64_t reserved:43;
 		uint64_t V:1;		/* Line valid */
 		uint64_t D:1;		/* Line dirty */
 		uint64_t L:1;		/* Line locked */
 		uint64_t U:1;		/* Use, LRU eviction */
 		uint64_t addr:17;	/* Phys mem addr (33..17) */
+#else
+		uint64_t addr:17;	/* Phys mem addr (33..17) */
+		uint64_t U:1;		/* Use, LRU eviction */
+		uint64_t L:1;		/* Line locked */
+		uint64_t D:1;		/* Line dirty */
+		uint64_t V:1;		/* Line valid */
+		uint64_t reserved:43;
+#endif
 	} cn38xx;
 	struct cvmx_l2c_tag_cn58xx {
+#ifdef __BIG_ENDIAN_BITFIELD
 		uint64_t reserved:44;
 		uint64_t V:1;		/* Line valid */
 		uint64_t D:1;		/* Line dirty */
 		uint64_t L:1;		/* Line locked */
 		uint64_t U:1;		/* Use, LRU eviction */
 		uint64_t addr:16;	/* Phys mem addr (33..18) */
+#else
+		uint64_t addr:16;	/* Phys mem addr (33..18) */
+		uint64_t U:1;		/* Use, LRU eviction */
+		uint64_t L:1;		/* Line locked */
+		uint64_t D:1;		/* Line dirty */
+		uint64_t V:1;		/* Line valid */
+		uint64_t reserved:44;
+#endif
 	} cn58xx;
 	struct cvmx_l2c_tag_cn58xx cn56xx;	/* 2048 sets */
 	struct cvmx_l2c_tag_cn31xx cn52xx;	/* 512 sets */
diff --git a/arch/mips/cavium-octeon/flash_setup.c b/arch/mips/cavium-octeon/flash_setup.c
index 237e5b1a72d8..a5e8f4a784af 100644
--- a/arch/mips/cavium-octeon/flash_setup.c
+++ b/arch/mips/cavium-octeon/flash_setup.c
@@ -8,9 +8,11 @@
  * Copyright (C) 2007, 2008 Cavium Networks
  */
 #include <linux/kernel.h>
-#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/semaphore.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/map.h>
+#include <linux/of_platform.h>
 #include <linux/mtd/partitions.h>
 
 #include <asm/octeon/octeon.h>
@@ -25,19 +27,62 @@ static const char *part_probe_types[] = {
 	NULL
 };
 
+static map_word octeon_flash_map_read(struct map_info *map, unsigned long ofs)
+{
+	map_word r;
+
+	down(&octeon_bootbus_sem);
+	r = inline_map_read(map, ofs);
+	up(&octeon_bootbus_sem);
+
+	return r;
+}
+
+static void octeon_flash_map_write(struct map_info *map, const map_word datum,
+				   unsigned long ofs)
+{
+	down(&octeon_bootbus_sem);
+	inline_map_write(map, datum, ofs);
+	up(&octeon_bootbus_sem);
+}
+
+static void octeon_flash_map_copy_from(struct map_info *map, void *to,
+				       unsigned long from, ssize_t len)
+{
+	down(&octeon_bootbus_sem);
+	inline_map_copy_from(map, to, from, len);
+	up(&octeon_bootbus_sem);
+}
+
+static void octeon_flash_map_copy_to(struct map_info *map, unsigned long to,
+				     const void *from, ssize_t len)
+{
+	down(&octeon_bootbus_sem);
+	inline_map_copy_to(map, to, from, len);
+	up(&octeon_bootbus_sem);
+}
+
 /**
  * Module/ driver initialization.
  *
  * Returns Zero on success
  */
-static int __init flash_init(void)
+static int octeon_flash_probe(struct platform_device *pdev)
 {
+	union cvmx_mio_boot_reg_cfgx region_cfg;
+	u32 cs;
+	int r;
+	struct device_node *np = pdev->dev.of_node;
+
+	r = of_property_read_u32(np, "reg", &cs);
+	if (r)
+		return r;
+
 	/*
 	 * Read the bootbus region 0 setup to determine the base
 	 * address of the flash.
 	 */
-	union cvmx_mio_boot_reg_cfgx region_cfg;
-	region_cfg.u64 = cvmx_read_csr(CVMX_MIO_BOOT_REG_CFGX(0));
+	region_cfg.u64 = cvmx_read_csr(CVMX_MIO_BOOT_REG_CFGX(cs));
 	if (region_cfg.s.en) {
 		/*
 		 * The bootloader always takes the flash and sets its
@@ -56,7 +101,11 @@ static int __init flash_init(void)
 		flash_map.virt = ioremap(flash_map.phys, flash_map.size);
 		pr_notice("Bootbus flash: Setting flash for %luMB flash at "
 			  "0x%08llx\n", flash_map.size >> 20, flash_map.phys);
-		simple_map_init(&flash_map);
+		WARN_ON(!map_bankwidth_supported(flash_map.bankwidth));
+		flash_map.read = octeon_flash_map_read;
+		flash_map.write = octeon_flash_map_write;
+		flash_map.copy_from = octeon_flash_map_copy_from;
+		flash_map.copy_to = octeon_flash_map_copy_to;
 		mymtd = do_map_probe("cfi_probe", &flash_map);
 		if (mymtd) {
 			mymtd->owner = THIS_MODULE;
@@ -69,4 +118,26 @@ static int __init flash_init(void)
 	return 0;
 }
 
-late_initcall(flash_init);
+static const struct of_device_id of_flash_match[] = {
+	{
+		.compatible	= "cfi-flash",
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, of_flash_match);
+
+static struct platform_driver of_flash_driver = {
+	.driver = {
+		.name = "octeon-of-flash",
+		.of_match_table = of_flash_match,
+	},
+	.probe		= octeon_flash_probe,
+};
+
+static int octeon_flash_init(void)
+{
+	return platform_driver_register(&of_flash_driver);
+}
+late_initcall(octeon_flash_init);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c
index 12410a2788d8..d113c8ded6e2 100644
--- a/arch/mips/cavium-octeon/octeon-platform.c
+++ b/arch/mips/cavium-octeon/octeon-platform.c
@@ -325,8 +325,14 @@ static void __init octeon_ehci_hw_start(struct device *dev)
 	/* Use 64-bit addressing. */
 	ehci_ctl.s.ehci_64b_addr_en = 1;
 	ehci_ctl.s.l2c_addr_msb = 0;
+#ifdef __BIG_ENDIAN
 	ehci_ctl.s.l2c_buff_emod = 1; /* Byte swapped. */
 	ehci_ctl.s.l2c_desc_emod = 1; /* Byte swapped. */
+#else
+	ehci_ctl.s.l2c_buff_emod = 0; /* not swapped. */
+	ehci_ctl.s.l2c_desc_emod = 0; /* not swapped. */
+	ehci_ctl.s.inv_reg_a2 = 1;
+#endif
 	cvmx_write_csr(CVMX_UCTLX_EHCI_CTL(0), ehci_ctl.u64);
 
 	octeon2_usb_clocks_stop();
@@ -381,8 +387,14 @@ static void __init octeon_ohci_hw_start(struct device *dev)
 
 	ohci_ctl.u64 = cvmx_read_csr(CVMX_UCTLX_OHCI_CTL(0));
 	ohci_ctl.s.l2c_addr_msb = 0;
+#ifdef __BIG_ENDIAN
 	ohci_ctl.s.l2c_buff_emod = 1; /* Byte swapped. */
 	ohci_ctl.s.l2c_desc_emod = 1; /* Byte swapped. */
+#else
+	ohci_ctl.s.l2c_buff_emod = 0; /* not swapped. */
+	ohci_ctl.s.l2c_desc_emod = 0; /* not swapped. */
+	ohci_ctl.s.inv_reg_a2 = 1;
+#endif
 	cvmx_write_csr(CVMX_UCTLX_OHCI_CTL(0), ohci_ctl.u64);
 
 	octeon2_usb_clocks_stop();
@@ -958,6 +970,13 @@ end_led:
 		}
 	}
 
+	if (octeon_bootinfo->board_type != CVMX_BOARD_TYPE_CUST_DSR1000N) {
+		int dsr1000n_leds = fdt_path_offset(initial_boot_params,
+						    "/dsr1000n-leds");
+		if (dsr1000n_leds >= 0)
+			fdt_nop_node(initial_boot_params, dsr1000n_leds);
+	}
+
 	return 0;
 }
 
diff --git a/arch/mips/cavium-octeon/octeon_boot.h b/arch/mips/cavium-octeon/octeon_boot.h
index 7b066bbca86d..a6ce7c43e0ae 100644
--- a/arch/mips/cavium-octeon/octeon_boot.h
+++ b/arch/mips/cavium-octeon/octeon_boot.h
@@ -37,11 +37,13 @@ struct boot_init_vector {
 
 /* similar to bootloader's linux_app_boot_info but without global data */
 struct linux_app_boot_info {
+#ifdef __BIG_ENDIAN_BITFIELD
 	uint32_t labi_signature;
 	uint32_t start_core0_addr;
 	uint32_t avail_coremask;
 	uint32_t pci_console_active;
 	uint32_t icache_prefetch_disable;
+	uint32_t padding;
 	uint64_t InitTLBStart_addr;
 	uint32_t start_app_addr;
 	uint32_t cur_exception_base;
@@ -49,6 +51,27 @@ struct linux_app_boot_info {
 	uint32_t compact_flash_common_base_addr;
 	uint32_t compact_flash_attribute_base_addr;
 	uint32_t led_display_base_addr;
+#else
+	uint32_t start_core0_addr;
+	uint32_t labi_signature;
+
+	uint32_t pci_console_active;
+	uint32_t avail_coremask;
+
+	uint32_t padding;
+	uint32_t icache_prefetch_disable;
+
+	uint64_t InitTLBStart_addr;
+
+	uint32_t cur_exception_base;
+	uint32_t start_app_addr;
+
+	uint32_t compact_flash_common_base_addr;
+	uint32_t no_mark_private_data;
+
+	uint32_t led_display_base_addr;
+	uint32_t compact_flash_attribute_base_addr;
+#endif
 };
 
 /* If not to copy a lot of bootloader's structures
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index a42110e7edbc..89a628455bc2 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -51,6 +51,9 @@ extern void pci_console_init(const char *arg);
 
 static unsigned long long MAX_MEMORY = 512ull << 20;
 
+DEFINE_SEMAPHORE(octeon_bootbus_sem);
+EXPORT_SYMBOL(octeon_bootbus_sem);
+
 struct octeon_boot_descriptor *octeon_boot_desc_ptr;
 
 struct cvmx_bootinfo *octeon_bootinfo;
@@ -413,7 +416,10 @@ static void octeon_restart(char *command)
 
 	mb();
 	while (1)
-		cvmx_write_csr(CVMX_CIU_SOFT_RST, 1);
+		if (OCTEON_IS_OCTEON3())
+			cvmx_write_csr(CVMX_RST_SOFT_RST, 1);
+		else
+			cvmx_write_csr(CVMX_CIU_SOFT_RST, 1);
 }
 
 
@@ -1043,7 +1049,7 @@ int prom_putchar(char c)
 }
 EXPORT_SYMBOL(prom_putchar);
 
-void prom_free_prom_memory(void)
+void __init prom_free_prom_memory(void)
 {
 	if (CAVIUM_OCTEON_DCACHE_PREFETCH_WAR) {
 		/* Check for presence of Core-14449 fix.  */
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index 8b1eeffa12ed..56f5d080ef9d 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -72,7 +72,7 @@ static inline void octeon_send_ipi_mask(const struct cpumask *mask,
 {
 	unsigned int i;
 
-	for_each_cpu_mask(i, *mask)
+	for_each_cpu(i, mask)
 		octeon_send_ipi_single(i, action);
 }
 
@@ -239,7 +239,7 @@ static int octeon_cpu_disable(void)
 		return -ENOTSUPP;
 
 	set_cpu_online(cpu, false);
-	cpu_clear(cpu, cpu_callin_map);
+	cpumask_clear_cpu(cpu, &cpu_callin_map);
 	octeon_fixup_irqs();
 
 	flush_cache_all();
diff --git a/arch/mips/configs/bcm3384_defconfig b/arch/mips/configs/bmips_be_defconfig
index 88711c28ff32..f5585c8f35ad 100644
--- a/arch/mips/configs/bcm3384_defconfig
+++ b/arch/mips/configs/bmips_be_defconfig
@@ -1,4 +1,4 @@
-CONFIG_BCM3384=y
+CONFIG_BMIPS_GENERIC=y
 CONFIG_HIGHMEM=y
 CONFIG_SMP=y
 CONFIG_NR_CPUS=4
@@ -33,6 +33,7 @@ CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
+CONFIG_BRCMSTB_GISB_ARB=y
 CONFIG_MTD=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
@@ -43,15 +44,19 @@ CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
 # CONFIG_SCSI_LOWLEVEL is not set
 CONFIG_NETDEVICES=y
+CONFIG_BCMGENET=y
 CONFIG_USB_USBNET=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
 # CONFIG_DEVKMEM is not set
-CONFIG_SERIAL_EARLYCON_FORCE=y
 CONFIG_SERIAL_BCM63XX=y
 CONFIG_SERIAL_BCM63XX_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
+CONFIG_POWER_SUPPLY=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_BRCMSTB=y
+CONFIG_POWER_RESET_SYSCON=y
 # CONFIG_HWMON is not set
 CONFIG_USB=y
 CONFIG_USB_EHCI_HCD=y
@@ -75,4 +80,6 @@ CONFIG_NLS_ASCII=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_DEBUG_FS=y
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="earlycon"
 # CONFIG_CRYPTO_HW is not set
diff --git a/arch/mips/configs/bmips_stb_defconfig b/arch/mips/configs/bmips_stb_defconfig
new file mode 100644
index 000000000000..400a47ec1ef1
--- /dev/null
+++ b/arch/mips/configs/bmips_stb_defconfig
@@ -0,0 +1,88 @@
+CONFIG_BMIPS_GENERIC=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_HIGHMEM=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+# CONFIG_SECCOMP is not set
+CONFIG_MIPS_O32_FP64_SUPPORT=y
+# CONFIG_LOCALVERSION_AUTO is not set
+# CONFIG_SWAP is not set
+CONFIG_NO_HZ=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_RD_GZIP is not set
+CONFIG_EXPERT=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+# CONFIG_INET_DIAG is not set
+CONFIG_CFG80211=y
+CONFIG_NL80211_TESTMODE=y
+CONFIG_MAC80211=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+CONFIG_BRCMSTB_GISB_ARB=y
+CONFIG_MTD=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP=y
+# CONFIG_BLK_DEV is not set
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_NETDEVICES=y
+CONFIG_BCMGENET=y
+CONFIG_USB_USBNET=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_POWER_SUPPLY=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_BRCMSTB=y
+CONFIG_POWER_RESET_SYSCON=y
+# CONFIG_HWMON is not set
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+# CONFIG_DNOTIFY is not set
+CONFIG_FUSE_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_NFS_FS=y
+CONFIG_CIFS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="earlycon"
+# CONFIG_CRYPTO_HW is not set
diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig
index 70ffe9b55829..fe48220157a9 100644
--- a/arch/mips/configs/ip32_defconfig
+++ b/arch/mips/configs/ip32_defconfig
@@ -105,7 +105,8 @@ CONFIG_RTC_CLASS=y
 # CONFIG_RTC_HCTOSYS is not set
 # CONFIG_RTC_INTF_SYSFS is not set
 # CONFIG_RTC_INTF_PROC is not set
-CONFIG_RTC_DRV_CMOS=y
+CONFIG_RTC_DRV_DS1685_FAMILY=y
+CONFIG_RTC_DRV_DS1685=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index e51aad9a94b1..0cbc9863c7c8 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig
@@ -171,6 +171,7 @@ CONFIG_SERIAL_8250_FOURPORT=y
 CONFIG_LEGACY_PTY_COUNT=16
 CONFIG_HW_RANDOM=y
 CONFIG_RTC=y
+CONFIG_GPIO_LOONGSON=y
 CONFIG_THERMAL=y
 CONFIG_MEDIA_SUPPORT=m
 CONFIG_VIDEO_DEV=m
diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig
index 7eabcd2031ea..c8442997477b 100644
--- a/arch/mips/configs/loongson3_defconfig
+++ b/arch/mips/configs/loongson3_defconfig
@@ -243,6 +243,7 @@ CONFIG_HW_RANDOM=y
 CONFIG_RAW_DRIVER=m
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_PIIX4=y
+CONFIG_GPIO_LOONGSON=y
 CONFIG_SENSORS_LM75=m
 CONFIG_SENSORS_LM93=m
 CONFIG_SENSORS_W83627HF=m
diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig
new file mode 100644
index 000000000000..c388bff09148
--- /dev/null
+++ b/arch/mips/configs/maltaup_xpa_defconfig
@@ -0,0 +1,439 @@
+CONFIG_MIPS_MALTA=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_CPU_MIPS32_R2=y
+CONFIG_CPU_MIPS32_R5_FEATURES=y
+CONFIG_CPU_MIPS32_R5_XPA=y
+CONFIG_PAGE_SIZE_16KB=y
+CONFIG_HZ_100=y
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=15
+CONFIG_NAMESPACES=y
+CONFIG_RELAY=y
+CONFIG_EXPERT=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_PCI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=y
+CONFIG_NET_KEY_MIGRATE=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_NET_IPIP=m
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_TCP_MD5SIG=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_TUNNEL=m
+CONFIG_IPV6_MROUTE=y
+CONFIG_IPV6_PIMSM_V2=y
+CONFIG_NETWORK_SECMARK=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CT_PROTO_DCCP=m
+CONFIG_NF_CT_PROTO_UDPLITE=m
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_IPV6=y
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_EBT_T_NAT=m
+CONFIG_BRIDGE_EBT_802_3=m
+CONFIG_BRIDGE_EBT_AMONG=m
+CONFIG_BRIDGE_EBT_ARP=m
+CONFIG_BRIDGE_EBT_IP=m
+CONFIG_BRIDGE_EBT_IP6=m
+CONFIG_BRIDGE_EBT_LIMIT=m
+CONFIG_BRIDGE_EBT_MARK=m
+CONFIG_BRIDGE_EBT_PKTTYPE=m
+CONFIG_BRIDGE_EBT_STP=m
+CONFIG_BRIDGE_EBT_VLAN=m
+CONFIG_BRIDGE_EBT_ARPREPLY=m
+CONFIG_BRIDGE_EBT_DNAT=m
+CONFIG_BRIDGE_EBT_MARK_T=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_SNAT=m
+CONFIG_BRIDGE_EBT_LOG=m
+CONFIG_BRIDGE_EBT_NFLOG=m
+CONFIG_IP_SCTP=m
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_ATALK=m
+CONFIG_DEV_APPLETALK=m
+CONFIG_IPDDP=m
+CONFIG_IPDDP_ENCAP=y
+CONFIG_PHONET=m
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=y
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_CLS_IND=y
+CONFIG_CFG80211=m
+CONFIG_MAC80211=m
+CONFIG_MAC80211_MESH=y
+CONFIG_RFKILL=m
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_CONNECTOR=m
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_OOPS=m
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_CFI_STAA=y
+CONFIG_MTD_PHYSMAP=y
+CONFIG_MTD_UBI=m
+CONFIG_MTD_UBI_GLUEBI=m
+CONFIG_BLK_DEV_FD=m
+CONFIG_BLK_DEV_UMEM=m
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_CDROM_PKTCDVD=m
+CONFIG_ATA_OVER_ETH=m
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDECD=y
+CONFIG_IDE_GENERIC=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_BLK_DEV_PIIX=y
+CONFIG_BLK_DEV_IT8213=m
+CONFIG_BLK_DEV_TC86C001=m
+CONFIG_RAID_ATTRS=m
+CONFIG_SCSI=m
+CONFIG_BLK_DEV_SD=m
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=m
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_SCSI_FC_ATTRS=m
+CONFIG_ISCSI_TCP=m
+CONFIG_BLK_DEV_3W_XXXX_RAID=m
+CONFIG_SCSI_3W_9XXX=m
+CONFIG_SCSI_ACARD=m
+CONFIG_SCSI_AACRAID=m
+CONFIG_SCSI_AIC7XXX=m
+CONFIG_AIC7XXX_RESET_DELAY_MS=15000
+# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=m
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID10=m
+CONFIG_MD_RAID456=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_EQUALIZER=m
+CONFIG_IFB=m
+CONFIG_MACVLAN=m
+CONFIG_TUN=m
+CONFIG_VETH=m
+# CONFIG_NET_VENDOR_3COM is not set
+CONFIG_PCNET32=y
+CONFIG_CHELSIO_T3=m
+CONFIG_AX88796=m
+CONFIG_NETXEN_NIC=m
+CONFIG_TC35815=m
+CONFIG_MARVELL_PHY=m
+CONFIG_DAVICOM_PHY=m
+CONFIG_QSEMI_PHY=m
+CONFIG_LXT_PHY=m
+CONFIG_CICADA_PHY=m
+CONFIG_VITESSE_PHY=m
+CONFIG_SMSC_PHY=m
+CONFIG_BROADCOM_PHY=m
+CONFIG_ICPLUS_PHY=m
+CONFIG_REALTEK_PHY=m
+CONFIG_ATMEL=m
+CONFIG_PCI_ATMEL=m
+CONFIG_PRISM54=m
+CONFIG_HOSTAP=m
+CONFIG_HOSTAP_FIRMWARE=y
+CONFIG_HOSTAP_FIRMWARE_NVRAM=y
+CONFIG_HOSTAP_PLX=m
+CONFIG_HOSTAP_PCI=m
+CONFIG_IPW2100=m
+CONFIG_IPW2100_MONITOR=y
+CONFIG_LIBERTAS=m
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO_I8042 is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+CONFIG_FB_CIRRUS=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_HID=m
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_CMOS=y
+CONFIG_UIO=m
+CONFIG_UIO_CIF=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT3_FS=y
+CONFIG_REISERFS_FS=m
+CONFIG_REISERFS_PROC_INFO=y
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_QUOTA=y
+CONFIG_QFMT_V2=y
+CONFIG_FUSE_FS=m
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_AFFS_FS=m
+CONFIG_HFS_FS=m
+CONFIG_HFSPLUS_FS=m
+CONFIG_BEFS_FS=m
+CONFIG_BFS_FS=m
+CONFIG_EFS_FS=m
+CONFIG_JFFS2_FS=m
+CONFIG_JFFS2_FS_XATTR=y
+CONFIG_JFFS2_COMPRESSION_OPTIONS=y
+CONFIG_JFFS2_RUBIN=y
+CONFIG_CRAMFS=m
+CONFIG_VXFS_FS=m
+CONFIG_MINIX_FS=m
+CONFIG_ROMFS_FS=m
+CONFIG_SYSV_FS=m
+CONFIG_UFS_FS=m
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_NFSD=y
+CONFIG_NFSD_V3=y
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRC16=m
diff --git a/arch/mips/configs/pistachio_defconfig b/arch/mips/configs/pistachio_defconfig
new file mode 100644
index 000000000000..f22e92ee7709
--- /dev/null
+++ b/arch/mips/configs/pistachio_defconfig
@@ -0,0 +1,336 @@
+CONFIG_MACH_PISTACHIO=y
+CONFIG_MIPS_MT_SMP=y
+CONFIG_MIPS_CPS=y
+# CONFIG_COMPACTION is not set
+CONFIG_DEFAULT_MMAP_MIN_ADDR=32768
+CONFIG_ZSMALLOC=y
+CONFIG_NR_CPUS=4
+CONFIG_PREEMPT_VOLUNTARY=y
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_DEFAULT_HOSTNAME="localhost"
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=m
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=18
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
+# CONFIG_RD_LZO is not set
+# CONFIG_RD_LZ4 is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EMBEDDED=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_CC_STACKPROTECTOR_STRONG=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PM_DEBUG=y
+CONFIG_PM_ADVANCED_DEBUG=y
+CONFIG_CPU_IDLE=y
+# CONFIG_MIPS_CPS_CPUIDLE is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_INET_XFRM_MODE_BEET=m
+# CONFIG_INET_DIAG is not set
+CONFIG_TCP_CONG_ADVANCED=y
+# CONFIG_TCP_CONG_BIC is not set
+# CONFIG_TCP_CONG_WESTWOOD is not set
+# CONFIG_TCP_CONG_HTCP is not set
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_MD5SIG=y
+CONFIG_IPV6=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_IPV6_SIT=m
+CONFIG_NETWORK_SECMARK=y
+CONFIG_NETFILTER=y
+# CONFIG_BRIDGE_NETFILTER is not set
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_CT_NETLINK=y
+CONFIG_NETFILTER_XT_MARK=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y
+CONFIG_NETFILTER_XT_TARGET_DSCP=y
+CONFIG_NETFILTER_XT_TARGET_NFLOG=y
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y
+CONFIG_NETFILTER_XT_TARGET_SECMARK=y
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
+CONFIG_NETFILTER_XT_MATCH_DSCP=y
+CONFIG_NETFILTER_XT_MATCH_POLICY=y
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_NF_CONNTRACK_IPV4=y
+CONFIG_NF_NAT_IPV4=m
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+CONFIG_IP_NF_MANGLE=y
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_NAT_IPV6=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_CODEL=m
+CONFIG_NET_SCH_FQ_CODEL=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_MARK=y
+CONFIG_BT=m
+CONFIG_BT_RFCOMM=m
+CONFIG_BT_HCIBTUSB=m
+CONFIG_BT_HCIBFUSB=m
+CONFIG_BT_HCIVHCI=m
+CONFIG_CFG80211=m
+CONFIG_NL80211_TESTMODE=y
+CONFIG_CFG80211_DEBUGFS=y
+CONFIG_CFG80211_WEXT=y
+CONFIG_MAC80211=m
+CONFIG_MAC80211_LEDS=y
+CONFIG_MAC80211_DEBUGFS=y
+CONFIG_MAC80211_DEBUG_MENU=y
+CONFIG_MAC80211_VERBOSE_DEBUG=y
+CONFIG_RFKILL=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DEBUG_DEVRES=y
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_M25P80=y
+CONFIG_MTD_SPI_NOR=y
+CONFIG_MTD_UBI=y
+CONFIG_MTD_UBI_BLOCK=y
+CONFIG_ZRAM=m
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=m
+CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=y
+CONFIG_DM_VERITY=y
+CONFIG_NETDEVICES=y
+CONFIG_TUN=m
+CONFIG_VETH=m
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_MICROCHIP is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_SMSC is not set
+CONFIG_STMMAC_ETH=y
+# CONFIG_NET_VENDOR_VIA is not set
+CONFIG_PPP=m
+CONFIG_PPP_ASYNC=m
+CONFIG_USB_PEGASUS=m
+CONFIG_USB_RTL8150=m
+CONFIG_USB_RTL8152=m
+CONFIG_USB_NET_DM9601=m
+CONFIG_USB_NET_SMSC75XX=m
+CONFIG_USB_NET_SMSC95XX=m
+CONFIG_USB_NET_MCS7830=m
+# CONFIG_USB_NET_CDC_SUBSET is not set
+# CONFIG_USB_NET_ZAURUS is not set
+CONFIG_LIBERTAS_THINFIRM=m
+CONFIG_USB_NET_RNDIS_WLAN=m
+CONFIG_MAC80211_HWSIM=m
+CONFIG_HOSTAP=m
+CONFIG_HOSTAP_FIRMWARE=y
+CONFIG_HOSTAP_FIRMWARE_NVRAM=y
+CONFIG_RT2X00=m
+CONFIG_RT2800USB=m
+# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_KEYBOARD_ATKBD is not set
+CONFIG_KEYBOARD_GPIO=y
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_HW_RANDOM=y
+CONFIG_TCG_TPM=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=m
+CONFIG_I2C_IMG=y
+CONFIG_I2C_STUB=m
+CONFIG_SPI=y
+CONFIG_SPI_BITBANG=m
+CONFIG_SPI_IMG_SPFI=y
+CONFIG_SPI_SPIDEV=y
+CONFIG_DEBUG_GPIO=y
+CONFIG_GPIO_SYSFS=y
+CONFIG_POWER_SUPPLY=y
+CONFIG_THERMAL=y
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_CORE=y
+CONFIG_IMGPDC_WDT=y
+CONFIG_REGULATOR_FIXED_VOLTAGE=y
+CONFIG_REGULATOR_GPIO=y
+CONFIG_MEDIA_SUPPORT=y
+CONFIG_MEDIA_RC_SUPPORT=y
+# CONFIG_RC_DECODERS is not set
+CONFIG_RC_DEVICES=y
+CONFIG_IR_IMG=y
+CONFIG_IR_IMG_NEC=y
+CONFIG_IR_IMG_JVC=y
+CONFIG_IR_IMG_SONY=y
+CONFIG_IR_IMG_SHARP=y
+CONFIG_IR_IMG_SANYO=y
+CONFIG_IR_IMG_RC5=y
+CONFIG_IR_IMG_RC6=y
+# CONFIG_DVB_TUNER_DIB0070 is not set
+# CONFIG_DVB_TUNER_DIB0090 is not set
+CONFIG_FB=y
+CONFIG_FB_MODE_HELPERS=y
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_HRTIMER=m
+CONFIG_SND_DYNAMIC_MINORS=y
+# CONFIG_SND_SPI is not set
+CONFIG_SND_USB_AUDIO=m
+CONFIG_USB=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+# CONFIG_USB_DEFAULT_PERSIST is not set
+CONFIG_USB_MON=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_ROOT_HUB_TT=y
+CONFIG_USB_ACM=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_DWC2=y
+CONFIG_USB_SERIAL=y
+CONFIG_USB_SERIAL_GENERIC=y
+CONFIG_USB_SERIAL_CP210X=m
+CONFIG_USB_SERIAL_FTDI_SIO=m
+CONFIG_USB_SERIAL_KEYSPAN=m
+CONFIG_USB_SERIAL_PL2303=m
+CONFIG_USB_SERIAL_OTI6858=m
+CONFIG_USB_SERIAL_QUALCOMM=m
+CONFIG_USB_SERIAL_SIERRAWIRELESS=m
+CONFIG_USB_SERIAL_OPTION=m
+CONFIG_MMC=y
+CONFIG_MMC_BLOCK_MINORS=16
+CONFIG_MMC_TEST=m
+CONFIG_MMC_DW=y
+CONFIG_MMC_DW_IDMAC=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_RTC_CLASS=y
+CONFIG_DMADEVICES=y
+CONFIG_IMG_MDC_DMA=y
+CONFIG_STAGING=y
+CONFIG_ASHMEM=y
+# CONFIG_ANDROID_TIMED_OUTPUT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_MEMORY=y
+CONFIG_IIO=y
+CONFIG_CC10001_ADC=y
+CONFIG_PWM=y
+CONFIG_PWM_IMG=y
+CONFIG_ANDROID=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+# CONFIG_DNOTIFY is not set
+CONFIG_FUSE_FS=m
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_ECRYPT_FS=y
+CONFIG_HFSPLUS_FS=m
+CONFIG_UBIFS_FS=y
+CONFIG_SQUASHFS=y
+CONFIG_SQUASHFS_FILE_DIRECT=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_PSTORE=y
+CONFIG_PSTORE_CONSOLE=y
+CONFIG_PSTORE_RAM=y
+# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_SCHEDSTATS=y
+CONFIG_TIMER_STATS=y
+CONFIG_DEBUG_SPINLOCK=y
+CONFIG_DEBUG_CREDENTIALS=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_LKDTM=y
+CONFIG_TEST_UDELAY=m
+CONFIG_KEYS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_YAMA=y
+CONFIG_SECURITY_YAMA_STACKED=y
+CONFIG_DEFAULT_SECURITY_DAC=y
+CONFIG_CRYPTO_AUTHENC=y
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_SHA1=y
+CONFIG_CRYPTO_SHA256=y
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_ARC4=y
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRC_CCITT=y
+CONFIG_CRC_T10DIF=m
+CONFIG_CRC7=m
+CONFIG_LIBCRC32C=m
+# CONFIG_XZ_DEC_X86 is not set
diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S
index 41a2fa1fa12e..8c6f508e59de 100644
--- a/arch/mips/dec/int-handler.S
+++ b/arch/mips/dec/int-handler.S
@@ -267,8 +267,13 @@ handle_it:
 
 #ifdef CONFIG_32BIT
 fpu:
+		lw	t0,fpu_kstat_irq
+		nop
+		lw	t1,(t0)
+		nop
+		addu	t1,1
 		j	handle_fpe_int
-		 nop
+		 sw	t1,(t0)
 #endif
 
 spurious:
diff --git a/arch/mips/dec/setup.c b/arch/mips/dec/setup.c
index 41bbffd9cc0e..a0b8943c8f11 100644
--- a/arch/mips/dec/setup.c
+++ b/arch/mips/dec/setup.c
@@ -12,13 +12,15 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
+#include <linux/irq.h>
+#include <linux/irqnr.h>
 #include <linux/module.h>
 #include <linux/param.h>
+#include <linux/percpu-defs.h>
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
 #include <linux/pm.h>
-#include <linux/irq.h>
 
 #include <asm/bootinfo.h>
 #include <asm/cpu.h>
@@ -98,6 +100,7 @@ int_ptr asic_mask_nr_tbl[DEC_MAX_ASIC_INTS][2] = {
 	{ { .i = ~0 }, { .p = asic_intr_unimplemented } },
 };
 int cpu_fpu_mask = DEC_CPU_IRQ_MASK(DEC_CPU_INR_FPU);
+int *fpu_kstat_irq;
 
 static struct irqaction ioirq = {
 	.handler = no_action,
@@ -755,8 +758,15 @@ void __init arch_init_irq(void)
 		dec_interrupt[DEC_IRQ_HALT] = -1;
 
 	/* Register board interrupts: FPU and cascade. */
-	if (dec_interrupt[DEC_IRQ_FPU] >= 0)
-		setup_irq(dec_interrupt[DEC_IRQ_FPU], &fpuirq);
+	if (dec_interrupt[DEC_IRQ_FPU] >= 0 && cpu_has_fpu) {
+		struct irq_desc *desc_fpu;
+		int irq_fpu;
+
+		irq_fpu = dec_interrupt[DEC_IRQ_FPU];
+		setup_irq(irq_fpu, &fpuirq);
+		desc_fpu = irq_to_desc(irq_fpu);
+		fpu_kstat_irq = this_cpu_ptr(desc_fpu->kstat_irqs);
+	}
 	if (dec_interrupt[DEC_IRQ_CASCADE] >= 0)
 		setup_irq(dec_interrupt[DEC_IRQ_CASCADE], &ioirq);
 
diff --git a/arch/mips/include/asm/asm-eva.h b/arch/mips/include/asm/asm-eva.h
index e41c56e375b1..1e38f0e1ea3e 100644
--- a/arch/mips/include/asm/asm-eva.h
+++ b/arch/mips/include/asm/asm-eva.h
@@ -11,6 +11,36 @@
 #define __ASM_ASM_EVA_H
 
 #ifndef __ASSEMBLY__
+
+/* Kernel variants */
+
+#define kernel_cache(op, base)		"cache " op ", " base "\n"
+#define kernel_ll(reg, addr)		"ll " reg ", " addr "\n"
+#define kernel_sc(reg, addr)		"sc " reg ", " addr "\n"
+#define kernel_lw(reg, addr)		"lw " reg ", " addr "\n"
+#define kernel_lwl(reg, addr)		"lwl " reg ", " addr "\n"
+#define kernel_lwr(reg, addr)		"lwr " reg ", " addr "\n"
+#define kernel_lh(reg, addr)		"lh " reg ", " addr "\n"
+#define kernel_lb(reg, addr)		"lb " reg ", " addr "\n"
+#define kernel_lbu(reg, addr)		"lbu " reg ", " addr "\n"
+#define kernel_sw(reg, addr)		"sw " reg ", " addr "\n"
+#define kernel_swl(reg, addr)		"swl " reg ", " addr "\n"
+#define kernel_swr(reg, addr)		"swr " reg ", " addr "\n"
+#define kernel_sh(reg, addr)		"sh " reg ", " addr "\n"
+#define kernel_sb(reg, addr)		"sb " reg ", " addr "\n"
+
+#ifdef CONFIG_32BIT
+/*
+ * No 'sd' or 'ld' instructions in 32-bit but the code will
+ * do the correct thing
+ */
+#define kernel_sd(reg, addr)		user_sw(reg, addr)
+#define kernel_ld(reg, addr)		user_lw(reg, addr)
+#else
+#define kernel_sd(reg, addr)		"sd " reg", " addr "\n"
+#define kernel_ld(reg, addr)		"ld " reg", " addr "\n"
+#endif /* CONFIG_32BIT */
+
 #ifdef CONFIG_EVA
 
 #define __BUILD_EVA_INSN(insn, reg, addr)				\
@@ -41,37 +71,60 @@
 
 #else
 
-#define user_cache(op, base)		"cache " op ", " base "\n"
-#define user_ll(reg, addr)		"ll " reg ", " addr "\n"
-#define user_sc(reg, addr)		"sc " reg ", " addr "\n"
-#define user_lw(reg, addr)		"lw " reg ", " addr "\n"
-#define user_lwl(reg, addr)		"lwl " reg ", " addr "\n"
-#define user_lwr(reg, addr)		"lwr " reg ", " addr "\n"
-#define user_lh(reg, addr)		"lh " reg ", " addr "\n"
-#define user_lb(reg, addr)		"lb " reg ", " addr "\n"
-#define user_lbu(reg, addr)		"lbu " reg ", " addr "\n"
-#define user_sw(reg, addr)		"sw " reg ", " addr "\n"
-#define user_swl(reg, addr)		"swl " reg ", " addr "\n"
-#define user_swr(reg, addr)		"swr " reg ", " addr "\n"
-#define user_sh(reg, addr)		"sh " reg ", " addr "\n"
-#define user_sb(reg, addr)		"sb " reg ", " addr "\n"
+#define user_cache(op, base)		kernel_cache(op, base)
+#define user_ll(reg, addr)		kernel_ll(reg, addr)
+#define user_sc(reg, addr)		kernel_sc(reg, addr)
+#define user_lw(reg, addr)		kernel_lw(reg, addr)
+#define user_lwl(reg, addr)		kernel_lwl(reg, addr)
+#define user_lwr(reg, addr)		kernel_lwr(reg, addr)
+#define user_lh(reg, addr)		kernel_lh(reg, addr)
+#define user_lb(reg, addr)		kernel_lb(reg, addr)
+#define user_lbu(reg, addr)		kernel_lbu(reg, addr)
+#define user_sw(reg, addr)		kernel_sw(reg, addr)
+#define user_swl(reg, addr)		kernel_swl(reg, addr)
+#define user_swr(reg, addr)		kernel_swr(reg, addr)
+#define user_sh(reg, addr)		kernel_sh(reg, addr)
+#define user_sb(reg, addr)		kernel_sb(reg, addr)
 
 #ifdef CONFIG_32BIT
-/*
- * No 'sd' or 'ld' instructions in 32-bit but the code will
- * do the correct thing
- */
-#define user_sd(reg, addr)		user_sw(reg, addr)
-#define user_ld(reg, addr)		user_lw(reg, addr)
+#define user_sd(reg, addr)		kernel_sw(reg, addr)
+#define user_ld(reg, addr)		kernel_lw(reg, addr)
 #else
-#define user_sd(reg, addr)		"sd " reg", " addr "\n"
-#define user_ld(reg, addr)		"ld " reg", " addr "\n"
+#define user_sd(reg, addr)		kernel_sd(reg, addr)
+#define user_ld(reg, addr)		kernel_ld(reg, addr)
 #endif /* CONFIG_32BIT */
 
 #endif /* CONFIG_EVA */
 
 #else /* __ASSEMBLY__ */
 
+#define kernel_cache(op, base)		cache op, base
+#define kernel_ll(reg, addr)		ll reg, addr
+#define kernel_sc(reg, addr)		sc reg, addr
+#define kernel_lw(reg, addr)		lw reg, addr
+#define kernel_lwl(reg, addr)		lwl reg, addr
+#define kernel_lwr(reg, addr)		lwr reg, addr
+#define kernel_lh(reg, addr)		lh reg, addr
+#define kernel_lb(reg, addr)		lb reg, addr
+#define kernel_lbu(reg, addr)		lbu reg, addr
+#define kernel_sw(reg, addr)		sw reg, addr
+#define kernel_swl(reg, addr)		swl reg, addr
+#define kernel_swr(reg, addr)		swr reg, addr
+#define kernel_sh(reg, addr)		sh reg, addr
+#define kernel_sb(reg, addr)		sb reg, addr
+
+#ifdef CONFIG_32BIT
+/*
+ * No 'sd' or 'ld' instructions in 32-bit but the code will
+ * do the correct thing
+ */
+#define kernel_sd(reg, addr)		user_sw(reg, addr)
+#define kernel_ld(reg, addr)		user_lw(reg, addr)
+#else
+#define kernel_sd(reg, addr)		sd reg, addr
+#define kernel_ld(reg, addr)		ld reg, addr
+#endif /* CONFIG_32BIT */
+
 #ifdef CONFIG_EVA
 
 #define __BUILD_EVA_INSN(insn, reg, addr)			\
@@ -101,31 +154,27 @@
 #define user_sd(reg, addr)		user_sw(reg, addr)
 #else
 
-#define user_cache(op, base)		cache op, base
-#define user_ll(reg, addr)		ll reg, addr
-#define user_sc(reg, addr)		sc reg, addr
-#define user_lw(reg, addr)		lw reg, addr
-#define user_lwl(reg, addr)		lwl reg, addr
-#define user_lwr(reg, addr)		lwr reg, addr
-#define user_lh(reg, addr)		lh reg, addr
-#define user_lb(reg, addr)		lb reg, addr
-#define user_lbu(reg, addr)		lbu reg, addr
-#define user_sw(reg, addr)		sw reg, addr
-#define user_swl(reg, addr)		swl reg, addr
-#define user_swr(reg, addr)		swr reg, addr
-#define user_sh(reg, addr)		sh reg, addr
-#define user_sb(reg, addr)		sb reg, addr
+#define user_cache(op, base)		kernel_cache(op, base)
+#define user_ll(reg, addr)		kernel_ll(reg, addr)
+#define user_sc(reg, addr)		kernel_sc(reg, addr)
+#define user_lw(reg, addr)		kernel_lw(reg, addr)
+#define user_lwl(reg, addr)		kernel_lwl(reg, addr)
+#define user_lwr(reg, addr)		kernel_lwr(reg, addr)
+#define user_lh(reg, addr)		kernel_lh(reg, addr)
+#define user_lb(reg, addr)		kernel_lb(reg, addr)
+#define user_lbu(reg, addr)		kernel_lbu(reg, addr)
+#define user_sw(reg, addr)		kernel_sw(reg, addr)
+#define user_swl(reg, addr)		kernel_swl(reg, addr)
+#define user_swr(reg, addr)		kernel_swr(reg, addr)
+#define user_sh(reg, addr)		kernel_sh(reg, addr)
+#define user_sb(reg, addr)		kernel_sb(reg, addr)
 
 #ifdef CONFIG_32BIT
-/*
- * No 'sd' or 'ld' instructions in 32-bit but the code will
- * do the correct thing
- */
-#define user_sd(reg, addr)		user_sw(reg, addr)
-#define user_ld(reg, addr)		user_lw(reg, addr)
+#define user_sd(reg, addr)		kernel_sw(reg, addr)
+#define user_ld(reg, addr)		kernel_lw(reg, addr)
 #else
-#define user_sd(reg, addr)		sd reg, addr
-#define user_ld(reg, addr)		ld reg, addr
+#define user_sd(reg, addr)		kernel_sd(reg, addr)
+#define user_ld(reg, addr)		kernel_sd(reg, addr)
 #endif /* CONFIG_32BIT */
 
 #endif /* CONFIG_EVA */
diff --git a/arch/mips/include/asm/asmmacro-32.h b/arch/mips/include/asm/asmmacro-32.h
index cdac7b3eeaf7..0ef39ad0f2d4 100644
--- a/arch/mips/include/asm/asmmacro-32.h
+++ b/arch/mips/include/asm/asmmacro-32.h
@@ -16,38 +16,22 @@
 	.set push
 	SET_HARDFLOAT
 	cfc1	\tmp,  fcr31
-	swc1	$f0,  THREAD_FPR0_LS64(\thread)
-	swc1	$f1,  THREAD_FPR1_LS64(\thread)
-	swc1	$f2,  THREAD_FPR2_LS64(\thread)
-	swc1	$f3,  THREAD_FPR3_LS64(\thread)
-	swc1	$f4,  THREAD_FPR4_LS64(\thread)
-	swc1	$f5,  THREAD_FPR5_LS64(\thread)
-	swc1	$f6,  THREAD_FPR6_LS64(\thread)
-	swc1	$f7,  THREAD_FPR7_LS64(\thread)
-	swc1	$f8,  THREAD_FPR8_LS64(\thread)
-	swc1	$f9,  THREAD_FPR9_LS64(\thread)
-	swc1	$f10, THREAD_FPR10_LS64(\thread)
-	swc1	$f11, THREAD_FPR11_LS64(\thread)
-	swc1	$f12, THREAD_FPR12_LS64(\thread)
-	swc1	$f13, THREAD_FPR13_LS64(\thread)
-	swc1	$f14, THREAD_FPR14_LS64(\thread)
-	swc1	$f15, THREAD_FPR15_LS64(\thread)
-	swc1	$f16, THREAD_FPR16_LS64(\thread)
-	swc1	$f17, THREAD_FPR17_LS64(\thread)
-	swc1	$f18, THREAD_FPR18_LS64(\thread)
-	swc1	$f19, THREAD_FPR19_LS64(\thread)
-	swc1	$f20, THREAD_FPR20_LS64(\thread)
-	swc1	$f21, THREAD_FPR21_LS64(\thread)
-	swc1	$f22, THREAD_FPR22_LS64(\thread)
-	swc1	$f23, THREAD_FPR23_LS64(\thread)
-	swc1	$f24, THREAD_FPR24_LS64(\thread)
-	swc1	$f25, THREAD_FPR25_LS64(\thread)
-	swc1	$f26, THREAD_FPR26_LS64(\thread)
-	swc1	$f27, THREAD_FPR27_LS64(\thread)
-	swc1	$f28, THREAD_FPR28_LS64(\thread)
-	swc1	$f29, THREAD_FPR29_LS64(\thread)
-	swc1	$f30, THREAD_FPR30_LS64(\thread)
-	swc1	$f31, THREAD_FPR31_LS64(\thread)
+	s.d	$f0,  THREAD_FPR0(\thread)
+	s.d	$f2,  THREAD_FPR2(\thread)
+	s.d	$f4,  THREAD_FPR4(\thread)
+	s.d	$f6,  THREAD_FPR6(\thread)
+	s.d	$f8,  THREAD_FPR8(\thread)
+	s.d	$f10, THREAD_FPR10(\thread)
+	s.d	$f12, THREAD_FPR12(\thread)
+	s.d	$f14, THREAD_FPR14(\thread)
+	s.d	$f16, THREAD_FPR16(\thread)
+	s.d	$f18, THREAD_FPR18(\thread)
+	s.d	$f20, THREAD_FPR20(\thread)
+	s.d	$f22, THREAD_FPR22(\thread)
+	s.d	$f24, THREAD_FPR24(\thread)
+	s.d	$f26, THREAD_FPR26(\thread)
+	s.d	$f28, THREAD_FPR28(\thread)
+	s.d	$f30, THREAD_FPR30(\thread)
 	sw	\tmp, THREAD_FCR31(\thread)
 	.set pop
 	.endm
@@ -56,38 +40,22 @@
 	.set push
 	SET_HARDFLOAT
 	lw	\tmp, THREAD_FCR31(\thread)
-	lwc1	$f0,  THREAD_FPR0_LS64(\thread)
-	lwc1	$f1,  THREAD_FPR1_LS64(\thread)
-	lwc1	$f2,  THREAD_FPR2_LS64(\thread)
-	lwc1	$f3,  THREAD_FPR3_LS64(\thread)
-	lwc1	$f4,  THREAD_FPR4_LS64(\thread)
-	lwc1	$f5,  THREAD_FPR5_LS64(\thread)
-	lwc1	$f6,  THREAD_FPR6_LS64(\thread)
-	lwc1	$f7,  THREAD_FPR7_LS64(\thread)
-	lwc1	$f8,  THREAD_FPR8_LS64(\thread)
-	lwc1	$f9,  THREAD_FPR9_LS64(\thread)
-	lwc1	$f10, THREAD_FPR10_LS64(\thread)
-	lwc1	$f11, THREAD_FPR11_LS64(\thread)
-	lwc1	$f12, THREAD_FPR12_LS64(\thread)
-	lwc1	$f13, THREAD_FPR13_LS64(\thread)
-	lwc1	$f14, THREAD_FPR14_LS64(\thread)
-	lwc1	$f15, THREAD_FPR15_LS64(\thread)
-	lwc1	$f16, THREAD_FPR16_LS64(\thread)
-	lwc1	$f17, THREAD_FPR17_LS64(\thread)
-	lwc1	$f18, THREAD_FPR18_LS64(\thread)
-	lwc1	$f19, THREAD_FPR19_LS64(\thread)
-	lwc1	$f20, THREAD_FPR20_LS64(\thread)
-	lwc1	$f21, THREAD_FPR21_LS64(\thread)
-	lwc1	$f22, THREAD_FPR22_LS64(\thread)
-	lwc1	$f23, THREAD_FPR23_LS64(\thread)
-	lwc1	$f24, THREAD_FPR24_LS64(\thread)
-	lwc1	$f25, THREAD_FPR25_LS64(\thread)
-	lwc1	$f26, THREAD_FPR26_LS64(\thread)
-	lwc1	$f27, THREAD_FPR27_LS64(\thread)
-	lwc1	$f28, THREAD_FPR28_LS64(\thread)
-	lwc1	$f29, THREAD_FPR29_LS64(\thread)
-	lwc1	$f30, THREAD_FPR30_LS64(\thread)
-	lwc1	$f31, THREAD_FPR31_LS64(\thread)
+	l.d	$f0,  THREAD_FPR0(\thread)
+	l.d	$f2,  THREAD_FPR2(\thread)
+	l.d	$f4,  THREAD_FPR4(\thread)
+	l.d	$f6,  THREAD_FPR6(\thread)
+	l.d	$f8,  THREAD_FPR8(\thread)
+	l.d	$f10, THREAD_FPR10(\thread)
+	l.d	$f12, THREAD_FPR12(\thread)
+	l.d	$f14, THREAD_FPR14(\thread)
+	l.d	$f16, THREAD_FPR16(\thread)
+	l.d	$f18, THREAD_FPR18(\thread)
+	l.d	$f20, THREAD_FPR20(\thread)
+	l.d	$f22, THREAD_FPR22(\thread)
+	l.d	$f24, THREAD_FPR24(\thread)
+	l.d	$f26, THREAD_FPR26(\thread)
+	l.d	$f28, THREAD_FPR28(\thread)
+	l.d	$f30, THREAD_FPR30(\thread)
 	ctc1	\tmp, fcr31
 	.set pop
 	.endm
diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index 0cae4595e985..6156ac8c4cfb 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -60,22 +60,22 @@
 	.set	push
 	SET_HARDFLOAT
 	cfc1	\tmp, fcr31
-	sdc1	$f0,  THREAD_FPR0_LS64(\thread)
-	sdc1	$f2,  THREAD_FPR2_LS64(\thread)
-	sdc1	$f4,  THREAD_FPR4_LS64(\thread)
-	sdc1	$f6,  THREAD_FPR6_LS64(\thread)
-	sdc1	$f8,  THREAD_FPR8_LS64(\thread)
-	sdc1	$f10, THREAD_FPR10_LS64(\thread)
-	sdc1	$f12, THREAD_FPR12_LS64(\thread)
-	sdc1	$f14, THREAD_FPR14_LS64(\thread)
-	sdc1	$f16, THREAD_FPR16_LS64(\thread)
-	sdc1	$f18, THREAD_FPR18_LS64(\thread)
-	sdc1	$f20, THREAD_FPR20_LS64(\thread)
-	sdc1	$f22, THREAD_FPR22_LS64(\thread)
-	sdc1	$f24, THREAD_FPR24_LS64(\thread)
-	sdc1	$f26, THREAD_FPR26_LS64(\thread)
-	sdc1	$f28, THREAD_FPR28_LS64(\thread)
-	sdc1	$f30, THREAD_FPR30_LS64(\thread)
+	sdc1	$f0,  THREAD_FPR0(\thread)
+	sdc1	$f2,  THREAD_FPR2(\thread)
+	sdc1	$f4,  THREAD_FPR4(\thread)
+	sdc1	$f6,  THREAD_FPR6(\thread)
+	sdc1	$f8,  THREAD_FPR8(\thread)
+	sdc1	$f10, THREAD_FPR10(\thread)
+	sdc1	$f12, THREAD_FPR12(\thread)
+	sdc1	$f14, THREAD_FPR14(\thread)
+	sdc1	$f16, THREAD_FPR16(\thread)
+	sdc1	$f18, THREAD_FPR18(\thread)
+	sdc1	$f20, THREAD_FPR20(\thread)
+	sdc1	$f22, THREAD_FPR22(\thread)
+	sdc1	$f24, THREAD_FPR24(\thread)
+	sdc1	$f26, THREAD_FPR26(\thread)
+	sdc1	$f28, THREAD_FPR28(\thread)
+	sdc1	$f30, THREAD_FPR30(\thread)
 	sw	\tmp, THREAD_FCR31(\thread)
 	.set	pop
 	.endm
@@ -84,22 +84,22 @@
 	.set	push
 	.set	mips64r2
 	SET_HARDFLOAT
-	sdc1	$f1,  THREAD_FPR1_LS64(\thread)
-	sdc1	$f3,  THREAD_FPR3_LS64(\thread)
-	sdc1	$f5,  THREAD_FPR5_LS64(\thread)
-	sdc1	$f7,  THREAD_FPR7_LS64(\thread)
-	sdc1	$f9,  THREAD_FPR9_LS64(\thread)
-	sdc1	$f11, THREAD_FPR11_LS64(\thread)
-	sdc1	$f13, THREAD_FPR13_LS64(\thread)
-	sdc1	$f15, THREAD_FPR15_LS64(\thread)
-	sdc1	$f17, THREAD_FPR17_LS64(\thread)
-	sdc1	$f19, THREAD_FPR19_LS64(\thread)
-	sdc1	$f21, THREAD_FPR21_LS64(\thread)
-	sdc1	$f23, THREAD_FPR23_LS64(\thread)
-	sdc1	$f25, THREAD_FPR25_LS64(\thread)
-	sdc1	$f27, THREAD_FPR27_LS64(\thread)
-	sdc1	$f29, THREAD_FPR29_LS64(\thread)
-	sdc1	$f31, THREAD_FPR31_LS64(\thread)
+	sdc1	$f1,  THREAD_FPR1(\thread)
+	sdc1	$f3,  THREAD_FPR3(\thread)
+	sdc1	$f5,  THREAD_FPR5(\thread)
+	sdc1	$f7,  THREAD_FPR7(\thread)
+	sdc1	$f9,  THREAD_FPR9(\thread)
+	sdc1	$f11, THREAD_FPR11(\thread)
+	sdc1	$f13, THREAD_FPR13(\thread)
+	sdc1	$f15, THREAD_FPR15(\thread)
+	sdc1	$f17, THREAD_FPR17(\thread)
+	sdc1	$f19, THREAD_FPR19(\thread)
+	sdc1	$f21, THREAD_FPR21(\thread)
+	sdc1	$f23, THREAD_FPR23(\thread)
+	sdc1	$f25, THREAD_FPR25(\thread)
+	sdc1	$f27, THREAD_FPR27(\thread)
+	sdc1	$f29, THREAD_FPR29(\thread)
+	sdc1	$f31, THREAD_FPR31(\thread)
 	.set	pop
 	.endm
 
@@ -118,22 +118,22 @@
 	.set	push
 	SET_HARDFLOAT
 	lw	\tmp, THREAD_FCR31(\thread)
-	ldc1	$f0,  THREAD_FPR0_LS64(\thread)
-	ldc1	$f2,  THREAD_FPR2_LS64(\thread)
-	ldc1	$f4,  THREAD_FPR4_LS64(\thread)
-	ldc1	$f6,  THREAD_FPR6_LS64(\thread)
-	ldc1	$f8,  THREAD_FPR8_LS64(\thread)
-	ldc1	$f10, THREAD_FPR10_LS64(\thread)
-	ldc1	$f12, THREAD_FPR12_LS64(\thread)
-	ldc1	$f14, THREAD_FPR14_LS64(\thread)
-	ldc1	$f16, THREAD_FPR16_LS64(\thread)
-	ldc1	$f18, THREAD_FPR18_LS64(\thread)
-	ldc1	$f20, THREAD_FPR20_LS64(\thread)
-	ldc1	$f22, THREAD_FPR22_LS64(\thread)
-	ldc1	$f24, THREAD_FPR24_LS64(\thread)
-	ldc1	$f26, THREAD_FPR26_LS64(\thread)
-	ldc1	$f28, THREAD_FPR28_LS64(\thread)
-	ldc1	$f30, THREAD_FPR30_LS64(\thread)
+	ldc1	$f0,  THREAD_FPR0(\thread)
+	ldc1	$f2,  THREAD_FPR2(\thread)
+	ldc1	$f4,  THREAD_FPR4(\thread)
+	ldc1	$f6,  THREAD_FPR6(\thread)
+	ldc1	$f8,  THREAD_FPR8(\thread)
+	ldc1	$f10, THREAD_FPR10(\thread)
+	ldc1	$f12, THREAD_FPR12(\thread)
+	ldc1	$f14, THREAD_FPR14(\thread)
+	ldc1	$f16, THREAD_FPR16(\thread)
+	ldc1	$f18, THREAD_FPR18(\thread)
+	ldc1	$f20, THREAD_FPR20(\thread)
+	ldc1	$f22, THREAD_FPR22(\thread)
+	ldc1	$f24, THREAD_FPR24(\thread)
+	ldc1	$f26, THREAD_FPR26(\thread)
+	ldc1	$f28, THREAD_FPR28(\thread)
+	ldc1	$f30, THREAD_FPR30(\thread)
 	ctc1	\tmp, fcr31
 	.endm
 
@@ -141,22 +141,22 @@
 	.set	push
 	.set	mips64r2
 	SET_HARDFLOAT
-	ldc1	$f1,  THREAD_FPR1_LS64(\thread)
-	ldc1	$f3,  THREAD_FPR3_LS64(\thread)
-	ldc1	$f5,  THREAD_FPR5_LS64(\thread)
-	ldc1	$f7,  THREAD_FPR7_LS64(\thread)
-	ldc1	$f9,  THREAD_FPR9_LS64(\thread)
-	ldc1	$f11, THREAD_FPR11_LS64(\thread)
-	ldc1	$f13, THREAD_FPR13_LS64(\thread)
-	ldc1	$f15, THREAD_FPR15_LS64(\thread)
-	ldc1	$f17, THREAD_FPR17_LS64(\thread)
-	ldc1	$f19, THREAD_FPR19_LS64(\thread)
-	ldc1	$f21, THREAD_FPR21_LS64(\thread)
-	ldc1	$f23, THREAD_FPR23_LS64(\thread)
-	ldc1	$f25, THREAD_FPR25_LS64(\thread)
-	ldc1	$f27, THREAD_FPR27_LS64(\thread)
-	ldc1	$f29, THREAD_FPR29_LS64(\thread)
-	ldc1	$f31, THREAD_FPR31_LS64(\thread)
+	ldc1	$f1,  THREAD_FPR1(\thread)
+	ldc1	$f3,  THREAD_FPR3(\thread)
+	ldc1	$f5,  THREAD_FPR5(\thread)
+	ldc1	$f7,  THREAD_FPR7(\thread)
+	ldc1	$f9,  THREAD_FPR9(\thread)
+	ldc1	$f11, THREAD_FPR11(\thread)
+	ldc1	$f13, THREAD_FPR13(\thread)
+	ldc1	$f15, THREAD_FPR15(\thread)
+	ldc1	$f17, THREAD_FPR17(\thread)
+	ldc1	$f19, THREAD_FPR19(\thread)
+	ldc1	$f21, THREAD_FPR21(\thread)
+	ldc1	$f23, THREAD_FPR23(\thread)
+	ldc1	$f25, THREAD_FPR25(\thread)
+	ldc1	$f27, THREAD_FPR27(\thread)
+	ldc1	$f29, THREAD_FPR29(\thread)
+	ldc1	$f31, THREAD_FPR31(\thread)
 	.set	pop
 	.endm
 
@@ -211,6 +211,22 @@
 	.endm
 
 #ifdef TOOLCHAIN_SUPPORTS_MSA
+	.macro	_cfcmsa	rd, cs
+	.set	push
+	.set	mips32r2
+	.set	msa
+	cfcmsa	\rd, $\cs
+	.set	pop
+	.endm
+
+	.macro	_ctcmsa	cd, rs
+	.set	push
+	.set	mips32r2
+	.set	msa
+	ctcmsa	$\cd, \rs
+	.set	pop
+	.endm
+
 	.macro	ld_d	wd, off, base
 	.set	push
 	.set	mips32r2
@@ -227,35 +243,35 @@
 	.set	pop
 	.endm
 
-	.macro	copy_u_w	rd, ws, n
+	.macro	copy_u_w	ws, n
 	.set	push
 	.set	mips32r2
 	.set	msa
-	copy_u.w \rd, $w\ws[\n]
+	copy_u.w $1, $w\ws[\n]
 	.set	pop
 	.endm
 
-	.macro	copy_u_d	rd, ws, n
+	.macro	copy_u_d	ws, n
 	.set	push
 	.set	mips64r2
 	.set	msa
-	copy_u.d \rd, $w\ws[\n]
+	copy_u.d $1, $w\ws[\n]
 	.set	pop
 	.endm
 
-	.macro	insert_w	wd, n, rs
+	.macro	insert_w	wd, n
 	.set	push
 	.set	mips32r2
 	.set	msa
-	insert.w $w\wd[\n], \rs
+	insert.w $w\wd[\n], $1
 	.set	pop
 	.endm
 
-	.macro	insert_d	wd, n, rs
+	.macro	insert_d	wd, n
 	.set	push
 	.set	mips64r2
 	.set	msa
-	insert.d $w\wd[\n], \rs
+	insert.d $w\wd[\n], $1
 	.set	pop
 	.endm
 #else
@@ -283,7 +299,7 @@
 	/*
 	 * Temporary until all toolchains in use include MSA support.
 	 */
-	.macro	cfcmsa	rd, cs
+	.macro	_cfcmsa	rd, cs
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
@@ -293,7 +309,7 @@
 	.set	pop
 	.endm
 
-	.macro	ctcmsa	cd, rs
+	.macro	_ctcmsa	cd, rs
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
@@ -320,44 +336,36 @@
 	.set	pop
 	.endm
 
-	.macro	copy_u_w	rd, ws, n
+	.macro	copy_u_w	ws, n
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
 	.insn
 	.word	COPY_UW_MSA_INSN | (\n << 16) | (\ws << 11)
-	/* move triggers an assembler bug... */
-	or	\rd, $1, zero
 	.set	pop
 	.endm
 
-	.macro	copy_u_d	rd, ws, n
+	.macro	copy_u_d	ws, n
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
 	.insn
 	.word	COPY_UD_MSA_INSN | (\n << 16) | (\ws << 11)
-	/* move triggers an assembler bug... */
-	or	\rd, $1, zero
 	.set	pop
 	.endm
 
-	.macro	insert_w	wd, n, rs
+	.macro	insert_w	wd, n
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
-	/* move triggers an assembler bug... */
-	or	$1, \rs, zero
 	.word	INSERT_W_MSA_INSN | (\n << 16) | (\wd << 6)
 	.set	pop
 	.endm
 
-	.macro	insert_d	wd, n, rs
+	.macro	insert_d	wd, n
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
-	/* move triggers an assembler bug... */
-	or	$1, \rs, zero
 	.word	INSERT_D_MSA_INSN | (\n << 16) | (\wd << 6)
 	.set	pop
 	.endm
@@ -399,7 +407,7 @@
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
-	cfcmsa	$1, MSA_CSR
+	_cfcmsa	$1, MSA_CSR
 	sw	$1, THREAD_MSA_CSR(\thread)
 	.set	pop
 	.endm
@@ -409,7 +417,7 @@
 	.set	noat
 	SET_HARDFLOAT
 	lw	$1, THREAD_MSA_CSR(\thread)
-	ctcmsa	MSA_CSR, $1
+	_ctcmsa	MSA_CSR, $1
 	.set	pop
 	ld_d	0, THREAD_FPR0, \thread
 	ld_d	1, THREAD_FPR1, \thread
@@ -452,9 +460,6 @@
 	insert_w \wd, 2
 	insert_w \wd, 3
 #endif
-	.if	31-\wd
-	msa_init_upper	(\wd+1)
-	.endif
 	.endm
 
 	.macro	msa_init_all_upper
@@ -463,6 +468,37 @@
 	SET_HARDFLOAT
 	not	$1, zero
 	msa_init_upper	0
+	msa_init_upper	1
+	msa_init_upper	2
+	msa_init_upper	3
+	msa_init_upper	4
+	msa_init_upper	5
+	msa_init_upper	6
+	msa_init_upper	7
+	msa_init_upper	8
+	msa_init_upper	9
+	msa_init_upper	10
+	msa_init_upper	11
+	msa_init_upper	12
+	msa_init_upper	13
+	msa_init_upper	14
+	msa_init_upper	15
+	msa_init_upper	16
+	msa_init_upper	17
+	msa_init_upper	18
+	msa_init_upper	19
+	msa_init_upper	20
+	msa_init_upper	21
+	msa_init_upper	22
+	msa_init_upper	23
+	msa_init_upper	24
+	msa_init_upper	25
+	msa_init_upper	26
+	msa_init_upper	27
+	msa_init_upper	28
+	msa_init_upper	29
+	msa_init_upper	30
+	msa_init_upper	31
 	.set	pop
 	.endm
 
diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index 9f935f6aa996..0cf29bd5dc5c 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -481,7 +481,7 @@ static inline unsigned long __fls(unsigned long word)
 {
 	int num;
 
-	if (BITS_PER_LONG == 32 &&
+	if (BITS_PER_LONG == 32 && !__builtin_constant_p(word) &&
 	    __builtin_constant_p(cpu_has_clo_clz) && cpu_has_clo_clz) {
 		__asm__(
 		"	.set	push					\n"
@@ -494,7 +494,7 @@ static inline unsigned long __fls(unsigned long word)
 		return 31 - num;
 	}
 
-	if (BITS_PER_LONG == 64 &&
+	if (BITS_PER_LONG == 64 && !__builtin_constant_p(word) &&
 	    __builtin_constant_p(cpu_has_mips64) && cpu_has_mips64) {
 		__asm__(
 		"	.set	push					\n"
@@ -559,7 +559,8 @@ static inline int fls(int x)
 {
 	int r;
 
-	if (__builtin_constant_p(cpu_has_clo_clz) && cpu_has_clo_clz) {
+	if (!__builtin_constant_p(x) &&
+	    __builtin_constant_p(cpu_has_clo_clz) && cpu_has_clo_clz) {
 		__asm__(
 		"	.set	push					\n"
 		"	.set	"MIPS_ISA_LEVEL"			\n"
diff --git a/arch/mips/include/asm/bmips.h b/arch/mips/include/asm/bmips.h
index 30939b02e3ff..6d25ad33ec78 100644
--- a/arch/mips/include/asm/bmips.h
+++ b/arch/mips/include/asm/bmips.h
@@ -122,6 +122,22 @@ static inline void bmips_write_zscm_reg(unsigned int offset, unsigned long data)
 	barrier();
 }
 
+static inline void bmips_post_dma_flush(struct device *dev)
+{
+	void __iomem *cbr = BMIPS_GET_CBR();
+	u32 cfg;
+
+	if (boot_cpu_type() != CPU_BMIPS3300 &&
+	    boot_cpu_type() != CPU_BMIPS4350 &&
+	    boot_cpu_type() != CPU_BMIPS4380)
+		return;
+
+	/* Flush stale data out of the readahead cache */
+	cfg = __raw_readl(cbr + BMIPS_RAC_CONFIG);
+	__raw_writel(cfg | 0x100, cbr + BMIPS_RAC_CONFIG);
+	__raw_readl(cbr + BMIPS_RAC_CONFIG);
+}
+
 #endif /* !defined(__ASSEMBLY__) */
 
 #endif /* _ASM_BMIPS_H */
diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h
index e08381a37f8b..723229f4cf27 100644
--- a/arch/mips/include/asm/cacheflush.h
+++ b/arch/mips/include/asm/cacheflush.h
@@ -29,6 +29,20 @@
  *  - flush_icache_all() flush the entire instruction cache
  *  - flush_data_cache_page() flushes a page from the data cache
  */
+
+ /*
+ * This flag is used to indicate that the page pointed to by a pte
+ * is dirty and requires cleaning before returning it to the user.
+ */
+#define PG_dcache_dirty			PG_arch_1
+
+#define Page_dcache_dirty(page)		\
+	test_bit(PG_dcache_dirty, &(page)->flags)
+#define SetPageDcacheDirty(page)	\
+	set_bit(PG_dcache_dirty, &(page)->flags)
+#define ClearPageDcacheDirty(page)	\
+	clear_bit(PG_dcache_dirty, &(page)->flags)
+
 extern void (*flush_cache_all)(void);
 extern void (*__flush_cache_all)(void);
 extern void (*flush_cache_mm)(struct mm_struct *mm);
@@ -37,13 +51,15 @@ extern void (*flush_cache_range)(struct vm_area_struct *vma,
 	unsigned long start, unsigned long end);
 extern void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page, unsigned long pfn);
 extern void __flush_dcache_page(struct page *page);
+extern void __flush_icache_page(struct vm_area_struct *vma, struct page *page);
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 static inline void flush_dcache_page(struct page *page)
 {
-	if (cpu_has_dc_aliases || !cpu_has_ic_fills_f_dc)
+	if (cpu_has_dc_aliases)
 		__flush_dcache_page(page);
-
+	else if (!cpu_has_ic_fills_f_dc)
+		SetPageDcacheDirty(page);
 }
 
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
@@ -61,6 +77,11 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
 static inline void flush_icache_page(struct vm_area_struct *vma,
 	struct page *page)
 {
+	if (!cpu_has_ic_fills_f_dc && (vma->vm_flags & VM_EXEC) &&
+	    Page_dcache_dirty(page)) {
+		__flush_icache_page(vma, page);
+		ClearPageDcacheDirty(page);
+	}
 }
 
 extern void (*flush_icache_range)(unsigned long start, unsigned long end);
@@ -95,19 +116,6 @@ extern void (*flush_icache_all)(void);
 extern void (*local_flush_data_cache_page)(void * addr);
 extern void (*flush_data_cache_page)(unsigned long addr);
 
-/*
- * This flag is used to indicate that the page pointed to by a pte
- * is dirty and requires cleaning before returning it to the user.
- */
-#define PG_dcache_dirty			PG_arch_1
-
-#define Page_dcache_dirty(page)		\
-	test_bit(PG_dcache_dirty, &(page)->flags)
-#define SetPageDcacheDirty(page)	\
-	set_bit(PG_dcache_dirty, &(page)->flags)
-#define ClearPageDcacheDirty(page)	\
-	clear_bit(PG_dcache_dirty, &(page)->flags)
-
 /* Run kernel code uncached, useful for cache probing functions. */
 unsigned long run_uncached(void *func);
 
diff --git a/arch/mips/include/asm/cdmm.h b/arch/mips/include/asm/cdmm.h
new file mode 100644
index 000000000000..16e22ce9719f
--- /dev/null
+++ b/arch/mips/include/asm/cdmm.h
@@ -0,0 +1,98 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2014 Imagination Technologies Ltd.
+ */
+#ifndef __ASM_CDMM_H
+#define __ASM_CDMM_H
+
+#include <linux/device.h>
+#include <linux/mod_devicetable.h>
+
+/**
+ * struct mips_cdmm_device - Represents a single device on a CDMM bus.
+ * @dev:	Driver model device object.
+ * @cpu:	CPU which can access this device.
+ * @res:	MMIO resource.
+ * @type:	Device type identifier.
+ * @rev:	Device revision number.
+ */
+struct mips_cdmm_device {
+	struct device		dev;
+	unsigned int		cpu;
+	struct resource		res;
+	unsigned int		type;
+	unsigned int		rev;
+};
+
+/**
+ * struct mips_cdmm_driver - Represents a driver for a CDMM device.
+ * @drv:	Driver model driver object.
+ * @probe	Callback for probing newly discovered devices.
+ * @remove:	Callback to remove the device.
+ * @shutdown:	Callback on system shutdown.
+ * @cpu_down:	Callback when the parent CPU is going down.
+ *		Any CPU pinned threads/timers should be disabled.
+ * @cpu_up:	Callback when the parent CPU is coming back up again.
+ *		CPU pinned threads/timers can be restarted.
+ * @id_table:	Table for CDMM IDs to match against.
+ */
+struct mips_cdmm_driver {
+	struct device_driver	drv;
+	int			(*probe)(struct mips_cdmm_device *);
+	int			(*remove)(struct mips_cdmm_device *);
+	void			(*shutdown)(struct mips_cdmm_device *);
+	int			(*cpu_down)(struct mips_cdmm_device *);
+	int			(*cpu_up)(struct mips_cdmm_device *);
+	const struct mips_cdmm_device_id *id_table;
+};
+
+/**
+ * mips_cdmm_phys_base() - Choose a physical base address for CDMM region.
+ *
+ * Picking a suitable physical address at which to map the CDMM region is
+ * platform specific, so this weak function can be defined by platform code to
+ * pick a suitable value if none is configured by the bootloader.
+ *
+ * This address must be 32kB aligned, and the region occupies a maximum of 32kB
+ * of physical address space which must not be used for anything else.
+ *
+ * Returns:	Physical base address for CDMM region, or 0 on failure.
+ */
+phys_addr_t __weak mips_cdmm_phys_base(void);
+
+extern struct bus_type mips_cdmm_bustype;
+void __iomem *mips_cdmm_early_probe(unsigned int dev_type);
+
+#define to_mips_cdmm_device(d)	container_of(d, struct mips_cdmm_device, dev)
+
+#define mips_cdmm_get_drvdata(d)	dev_get_drvdata(&d->dev)
+#define mips_cdmm_set_drvdata(d, p)	dev_set_drvdata(&d->dev, p)
+
+int mips_cdmm_driver_register(struct mips_cdmm_driver *);
+void mips_cdmm_driver_unregister(struct mips_cdmm_driver *);
+
+/*
+ * module_mips_cdmm_driver() - Helper macro for drivers that don't do
+ * anything special in module init/exit.  This eliminates a lot of
+ * boilerplate.  Each module may only use this macro once, and
+ * calling it replaces module_init() and module_exit()
+ */
+#define module_mips_cdmm_driver(__mips_cdmm_driver) \
+	module_driver(__mips_cdmm_driver, mips_cdmm_driver_register, \
+			mips_cdmm_driver_unregister)
+
+/* drivers/tty/mips_ejtag_fdc.c */
+
+#ifdef CONFIG_MIPS_EJTAG_FDC_EARLYCON
+int setup_early_fdc_console(void);
+#else
+static inline int setup_early_fdc_console(void)
+{
+	return -ENODEV;
+}
+#endif
+
+#endif /* __ASM_CDMM_H */
diff --git a/arch/mips/include/asm/cevt-r4k.h b/arch/mips/include/asm/cevt-r4k.h
index 65f9bdd02f1f..f0edf6fcd002 100644
--- a/arch/mips/include/asm/cevt-r4k.h
+++ b/arch/mips/include/asm/cevt-r4k.h
@@ -27,23 +27,4 @@ irqreturn_t c0_compare_interrupt(int, void *);
 extern struct irqaction c0_compare_irqaction;
 extern int cp0_timer_irq_installed;
 
-/*
- * Possibly handle a performance counter interrupt.
- * Return true if the timer interrupt should not be checked
- */
-
-static inline int handle_perf_irq(int r2)
-{
-	/*
-	 * The performance counter overflow interrupt may be shared with the
-	 * timer interrupt (cp0_perfcount_irq < 0). If it is and a
-	 * performance counter has overflowed (perf_irq() == IRQ_HANDLED)
-	 * and we can't reliably determine if a counter interrupt has also
-	 * happened (!r2) then don't check for a timer interrupt.
-	 */
-	return (cp0_perfcount_irq < 0) &&
-		perf_irq() == IRQ_HANDLED &&
-		!r2;
-}
-
 #endif /* __ASM_CEVT_R4K_H */
diff --git a/arch/mips/include/asm/checksum.h b/arch/mips/include/asm/checksum.h
index 5c585c5c1c3e..3ceacde5eb6e 100644
--- a/arch/mips/include/asm/checksum.h
+++ b/arch/mips/include/asm/checksum.h
@@ -218,6 +218,8 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
 					  __u32 len, unsigned short proto,
 					  __wsum sum)
 {
+	__wsum tmp;
+
 	__asm__(
 	"	.set	push		# csum_ipv6_magic\n"
 	"	.set	noreorder	\n"
@@ -270,9 +272,9 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
 
 	"	addu	%0, $1		# Add final carry\n"
 	"	.set	pop"
-	: "=r" (sum), "=r" (proto)
+	: "=&r" (sum), "=&r" (tmp)
 	: "r" (saddr), "r" (daddr),
-	  "0" (htonl(len)), "1" (htonl(proto)), "r" (sum));
+	  "0" (htonl(len)), "r" (htonl(proto)), "r" (sum));
 
 	return csum_fold(sum);
 }
diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index d0a2a68ca600..412f945f1f5e 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -229,21 +229,22 @@ extern void __cmpxchg_called_with_bad_pointer(void);
 #define cmpxchg(ptr, old, new)		__cmpxchg(ptr, old, new, smp_mb__before_llsc(), smp_llsc_mb())
 #define cmpxchg_local(ptr, old, new)	__cmpxchg(ptr, old, new, , )
 
-#define cmpxchg64(ptr, o, n)						\
+#ifdef CONFIG_64BIT
+#define cmpxchg64_local(ptr, o, n)					\
   ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg((ptr), (o), (n));					\
+	cmpxchg_local((ptr), (o), (n));					\
   })
 
-#ifdef CONFIG_64BIT
-#define cmpxchg64_local(ptr, o, n)					\
+#define cmpxchg64(ptr, o, n)						\
   ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg_local((ptr), (o), (n));					\
+	cmpxchg((ptr), (o), (n));					\
   })
 #else
 #include <asm-generic/cmpxchg-local.h>
 #define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
+#define cmpxchg64(ptr, o, n) cmpxchg64_local((ptr), (o), (n))
 #endif
 
 #endif /* __ASM_CMPXCHG_H */
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index 0d8208de9a3f..5aeaf19c26b0 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -68,6 +68,7 @@
 #ifndef cpu_has_octeon_cache
 #define cpu_has_octeon_cache	0
 #endif
+/* Don't override `cpu_has_fpu' to 1 or the "nofpu" option won't work.  */
 #ifndef cpu_has_fpu
 #define cpu_has_fpu		(current_cpu_data.options & MIPS_CPU_FPU)
 #define raw_cpu_has_fpu		(raw_current_cpu_data.options & MIPS_CPU_FPU)
@@ -139,6 +140,9 @@
 # endif
 #endif
 
+#ifndef cpu_has_xpa
+#define cpu_has_xpa		(cpu_data[0].options & MIPS_CPU_XPA)
+#endif
 #ifndef cpu_has_vtag_icache
 #define cpu_has_vtag_icache	(cpu_data[0].icache.flags & MIPS_CACHE_VTAG)
 #endif
@@ -220,8 +224,11 @@
 #define cpu_has_mips_4_5_r	(cpu_has_mips_4 | cpu_has_mips_5_r)
 #define cpu_has_mips_5_r	(cpu_has_mips_5 | cpu_has_mips_r)
 
-#define cpu_has_mips_4_5_r2_r6	(cpu_has_mips_4_5 | cpu_has_mips_r2 | \
-				 cpu_has_mips_r6)
+#define cpu_has_mips_3_4_5_64_r2_r6					\
+				(cpu_has_mips_3 | cpu_has_mips_4_5_64_r2_r6)
+#define cpu_has_mips_4_5_64_r2_r6					\
+				(cpu_has_mips_4_5 | cpu_has_mips64r1 |	\
+				 cpu_has_mips_r2 | cpu_has_mips_r6)
 
 #define cpu_has_mips32	(cpu_has_mips32r1 | cpu_has_mips32r2 | cpu_has_mips32r6)
 #define cpu_has_mips64	(cpu_has_mips64r1 | cpu_has_mips64r2 | cpu_has_mips64r6)
@@ -235,8 +242,39 @@
 /* MIPSR2 and MIPSR6 have a lot of similarities */
 #define cpu_has_mips_r2_r6	(cpu_has_mips_r2 | cpu_has_mips_r6)
 
+/*
+ * cpu_has_mips_r2_exec_hazard - return if IHB is required on current processor
+ *
+ * Returns non-zero value if the current processor implementation requires
+ * an IHB instruction to deal with an instruction hazard as per MIPS R2
+ * architecture specification, zero otherwise.
+ */
 #ifndef cpu_has_mips_r2_exec_hazard
-#define cpu_has_mips_r2_exec_hazard (cpu_has_mips_r2 | cpu_has_mips_r6)
+#define cpu_has_mips_r2_exec_hazard					\
+({									\
+	int __res;							\
+									\
+	switch (current_cpu_type()) {					\
+	case CPU_M14KC:							\
+	case CPU_74K:							\
+	case CPU_1074K:							\
+	case CPU_PROAPTIV:						\
+	case CPU_P5600:							\
+	case CPU_M5150:							\
+	case CPU_QEMU_GENERIC:						\
+	case CPU_CAVIUM_OCTEON:						\
+	case CPU_CAVIUM_OCTEON_PLUS:					\
+	case CPU_CAVIUM_OCTEON2:					\
+	case CPU_CAVIUM_OCTEON3:					\
+		__res = 0;						\
+		break;							\
+									\
+	default:							\
+		__res = 1;						\
+	}								\
+									\
+	__res;								\
+})
 #endif
 
 /*
@@ -366,4 +404,8 @@
 # define cpu_has_fre		(cpu_data[0].options & MIPS_CPU_FRE)
 #endif
 
+#ifndef cpu_has_cdmm
+# define cpu_has_cdmm		(cpu_data[0].options & MIPS_CPU_CDMM)
+#endif
+
 #endif /* __ASM_CPU_FEATURES_H */
diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h
index c3f4f2d2e108..e7dc785a91ca 100644
--- a/arch/mips/include/asm/cpu-info.h
+++ b/arch/mips/include/asm/cpu-info.h
@@ -49,6 +49,8 @@ struct cpuinfo_mips {
 	unsigned int		udelay_val;
 	unsigned int		processor_id;
 	unsigned int		fpu_id;
+	unsigned int		fpu_csr31;
+	unsigned int		fpu_msk31;
 	unsigned int		msa_id;
 	unsigned int		cputype;
 	int			isa_level;
diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h
index 8245875f8b33..33f3cab9e689 100644
--- a/arch/mips/include/asm/cpu-type.h
+++ b/arch/mips/include/asm/cpu-type.h
@@ -157,6 +157,7 @@ static inline int __pure __get_cpu_type(const int cpu_type)
 	case CPU_R10000:
 	case CPU_R12000:
 	case CPU_R14000:
+	case CPU_R16000:
 #endif
 #ifdef CONFIG_SYS_HAS_CPU_RM7000
 	case CPU_RM7000:
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 15687234d70a..e3adca1d0b99 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -67,7 +67,7 @@
 #define PRID_IMP_R4300		0x0b00
 #define PRID_IMP_VR41XX		0x0c00
 #define PRID_IMP_R12000		0x0e00
-#define PRID_IMP_R14000		0x0f00
+#define PRID_IMP_R14000		0x0f00		/* R14K && R16K */
 #define PRID_IMP_R8000		0x1000
 #define PRID_IMP_PR4450		0x1200
 #define PRID_IMP_R4600		0x2000
@@ -284,8 +284,8 @@ enum cpu_type_enum {
 	CPU_R4000PC, CPU_R4000SC, CPU_R4000MC, CPU_R4200, CPU_R4300, CPU_R4310,
 	CPU_R4400PC, CPU_R4400SC, CPU_R4400MC, CPU_R4600, CPU_R4640, CPU_R4650,
 	CPU_R4700, CPU_R5000, CPU_R5500, CPU_NEVADA, CPU_R5432, CPU_R10000,
-	CPU_R12000, CPU_R14000, CPU_VR41XX, CPU_VR4111, CPU_VR4121, CPU_VR4122,
-	CPU_VR4131, CPU_VR4133, CPU_VR4181, CPU_VR4181A, CPU_RM7000,
+	CPU_R12000, CPU_R14000, CPU_R16000, CPU_VR41XX, CPU_VR4111, CPU_VR4121,
+	CPU_VR4122, CPU_VR4131, CPU_VR4133, CPU_VR4181, CPU_VR4181A, CPU_RM7000,
 	CPU_SR71000, CPU_TX49XX,
 
 	/*
@@ -377,6 +377,8 @@ enum cpu_type_enum {
 #define MIPS_CPU_MAAR		0x400000000ull /* MAAR(I) registers are present */
 #define MIPS_CPU_FRE		0x800000000ull /* FRE & UFE bits implemented */
 #define MIPS_CPU_RW_LLB		0x1000000000ull /* LLADDR/LLB writes are allowed */
+#define MIPS_CPU_XPA		0x2000000000ull /* CPU supports Extended Physical Addressing */
+#define MIPS_CPU_CDMM		0x4000000000ull	/* CPU has Common Device Memory Map */
 
 /*
  * CPU ASE encodings
diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h
index 06412aa9e3fb..fd1b4a150759 100644
--- a/arch/mips/include/asm/dma-mapping.h
+++ b/arch/mips/include/asm/dma-mapping.h
@@ -23,7 +23,7 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev)
 static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 {
 	if (!dev->dma_mask)
-		return 0;
+		return false;
 
 	return addr + size <= *dev->dma_mask;
 }
diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h
index 535f196ffe02..a594d8ed9698 100644
--- a/arch/mips/include/asm/elf.h
+++ b/arch/mips/include/asm/elf.h
@@ -11,6 +11,9 @@
 #include <linux/fs.h>
 #include <uapi/linux/elf.h>
 
+#include <asm/cpu-info.h>
+#include <asm/current.h>
+
 /* ELF header e_flags defines. */
 /* MIPS architecture level. */
 #define EF_MIPS_ARCH_1		0x00000000	/* -mips1 code.	 */
@@ -294,9 +297,14 @@ do {									\
 	if (personality(current->personality) != PER_LINUX)		\
 		set_personality(PER_LINUX);				\
 									\
+	clear_thread_flag(TIF_HYBRID_FPREGS);				\
+	set_thread_flag(TIF_32BIT_FPREGS);				\
+									\
 	mips_set_personality_fp(state);					\
 									\
 	current->thread.abi = &mips_abi;				\
+									\
+	current->thread.fpu.fcr31 = current_cpu_data.fpu_csr31;		\
 } while (0)
 
 #endif /* CONFIG_32BIT */
@@ -319,6 +327,8 @@ do {									\
 	do {								\
 		set_thread_flag(TIF_32BIT_REGS);			\
 		set_thread_flag(TIF_32BIT_ADDR);			\
+		clear_thread_flag(TIF_HYBRID_FPREGS);			\
+		set_thread_flag(TIF_32BIT_FPREGS);			\
 									\
 		mips_set_personality_fp(state);				\
 									\
@@ -356,6 +366,8 @@ do {									\
 	else								\
 		current->thread.abi = &mips_abi;			\
 									\
+	current->thread.fpu.fcr31 = current_cpu_data.fpu_csr31;		\
+									\
 	p = personality(current->personality);				\
 	if (p != PER_LINUX32 && p != PER_LINUX)				\
 		set_personality(PER_LINUX);				\
@@ -410,10 +422,6 @@ struct linux_binprm;
 extern int arch_setup_additional_pages(struct linux_binprm *bprm,
 				       int uses_interp);
 
-struct mm_struct;
-extern unsigned long arch_randomize_brk(struct mm_struct *mm);
-#define arch_randomize_brk arch_randomize_brk
-
 struct arch_elf_state {
 	int fp_abi;
 	int interp_fp_abi;
diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index dd083e999b08..084780b355aa 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -30,7 +30,7 @@
 struct sigcontext;
 struct sigcontext32;
 
-extern void _init_fpu(void);
+extern void _init_fpu(unsigned int);
 extern void _save_fp(struct task_struct *);
 extern void _restore_fp(struct task_struct *);
 
@@ -48,6 +48,12 @@ enum fpu_mode {
 #define FPU_FR_MASK		0x1
 };
 
+#define __disable_fpu()							\
+do {									\
+	clear_c0_status(ST0_CU1);					\
+	disable_fpu_hazard();						\
+} while (0)
+
 static inline int __enable_fpu(enum fpu_mode mode)
 {
 	int fr;
@@ -86,7 +92,12 @@ fr_common:
 		enable_fpu_hazard();
 
 		/* check FR has the desired value */
-		return (!!(read_c0_status() & ST0_FR) == !!fr) ? 0 : SIGFPE;
+		if (!!(read_c0_status() & ST0_FR) == !!fr)
+			return 0;
+
+		/* unsupported FR value */
+		__disable_fpu();
+		return SIGFPE;
 
 	default:
 		BUG();
@@ -95,12 +106,6 @@ fr_common:
 	return SIGFPE;
 }
 
-#define __disable_fpu()							\
-do {									\
-	clear_c0_status(ST0_CU1);					\
-	disable_fpu_hazard();						\
-} while (0)
-
 #define clear_fpu_owner()	clear_thread_flag(TIF_USEDFPU)
 
 static inline int __is_fpu_owner(void)
@@ -170,6 +175,7 @@ static inline void lose_fpu(int save)
 		}
 		disable_msa();
 		clear_thread_flag(TIF_USEDMSA);
+		__disable_fpu();
 	} else if (is_fpu_owner()) {
 		if (save)
 			_save_fp(current);
@@ -182,6 +188,7 @@ static inline void lose_fpu(int save)
 
 static inline int init_fpu(void)
 {
+	unsigned int fcr31 = current->thread.fpu.fcr31;
 	int ret = 0;
 
 	if (cpu_has_fpu) {
@@ -192,7 +199,7 @@ static inline int init_fpu(void)
 			return ret;
 
 		if (!cpu_has_fre) {
-			_init_fpu();
+			_init_fpu(fcr31);
 
 			return 0;
 		}
@@ -206,7 +213,7 @@ static inline int init_fpu(void)
 		config5 = clear_c0_config5(MIPS_CONF5_FRE);
 		enable_fpu_hazard();
 
-		_init_fpu();
+		_init_fpu(fcr31);
 
 		/* Restore FRE */
 		write_c0_config5(config5);
diff --git a/arch/mips/include/asm/fpu_emulator.h b/arch/mips/include/asm/fpu_emulator.h
index 3ee347713307..2f021cdfba4f 100644
--- a/arch/mips/include/asm/fpu_emulator.h
+++ b/arch/mips/include/asm/fpu_emulator.h
@@ -44,6 +44,7 @@ struct mips_fpu_emulator_stats {
 	unsigned long ieee754_overflow;
 	unsigned long ieee754_zerodiv;
 	unsigned long ieee754_invalidop;
+	unsigned long ds_emul;
 };
 
 DECLARE_PER_CPU(struct mips_fpu_emulator_stats, fpuemustats);
@@ -65,7 +66,8 @@ extern int do_dsemulret(struct pt_regs *xcp);
 extern int fpu_emulator_cop1Handler(struct pt_regs *xcp,
 				    struct mips_fpu_struct *ctx, int has_fpu,
 				    void *__user *fault_addr);
-int process_fpemu_return(int sig, void __user *fault_addr);
+int process_fpemu_return(int sig, void __user *fault_addr,
+			 unsigned long fcr31);
 int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 		     unsigned long *contpc);
 
@@ -86,8 +88,6 @@ static inline void fpu_emulator_init_fpu(void)
 	struct task_struct *t = current;
 	int i;
 
-	t->thread.fpu.fcr31 = 0;
-
 	for (i = 0; i < 32; i++)
 		set_fpr64(&t->thread.fpu.fpr[i], 0, SIGNALLING_NAN);
 }
diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index 5a4e1bb8fb1b..f0db99f8defe 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -47,6 +47,9 @@ extern void free_irqno(unsigned int irq);
 extern int cp0_compare_irq;
 extern int cp0_compare_irq_shift;
 extern int cp0_perfcount_irq;
+extern int cp0_fdc_irq;
+
+extern int __weak get_c0_fdc_int(void);
 
 void arch_trigger_all_cpu_backtrace(bool);
 #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
index fdbff44e5482..608aa57799c8 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -8,9 +8,9 @@
 #ifndef _ASM_MIPS_JUMP_LABEL_H
 #define _ASM_MIPS_JUMP_LABEL_H
 
-#include <linux/types.h>
+#ifndef __ASSEMBLY__
 
-#ifdef __KERNEL__
+#include <linux/types.h>
 
 #define JUMP_LABEL_NOP_SIZE 4
 
@@ -39,8 +39,6 @@ l_yes:
 	return true;
 }
 
-#endif /* __KERNEL__ */
-
 #ifdef CONFIG_64BIT
 typedef u64 jump_label_t;
 #else
@@ -53,4 +51,5 @@ struct jump_entry {
 	jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif /* _ASM_MIPS_JUMP_LABEL_H */
diff --git a/arch/mips/include/asm/kdebug.h b/arch/mips/include/asm/kdebug.h
index 6a9af5fcb5d7..cba22ab7ad4d 100644
--- a/arch/mips/include/asm/kdebug.h
+++ b/arch/mips/include/asm/kdebug.h
@@ -10,7 +10,8 @@ enum die_val {
 	DIE_RI,
 	DIE_PAGE_FAULT,
 	DIE_BREAK,
-	DIE_SSTEPBP
+	DIE_SSTEPBP,
+	DIE_MSAFP
 };
 
 #endif /* _ASM_MIPS_KDEBUG_H */
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index ac4fc716062b..4c25823563fe 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -21,10 +21,10 @@
 
 /* MIPS KVM register ids */
 #define MIPS_CP0_32(_R, _S)					\
-	(KVM_REG_MIPS | KVM_REG_SIZE_U32 | 0x10000 | (8 * (_R) + (_S)))
+	(KVM_REG_MIPS_CP0 | KVM_REG_SIZE_U32 | (8 * (_R) + (_S)))
 
 #define MIPS_CP0_64(_R, _S)					\
-	(KVM_REG_MIPS | KVM_REG_SIZE_U64 | 0x10000 | (8 * (_R) + (_S)))
+	(KVM_REG_MIPS_CP0 | KVM_REG_SIZE_U64 | (8 * (_R) + (_S)))
 
 #define KVM_REG_MIPS_CP0_INDEX		MIPS_CP0_32(0, 0)
 #define KVM_REG_MIPS_CP0_ENTRYLO0	MIPS_CP0_64(2, 0)
@@ -42,11 +42,14 @@
 #define KVM_REG_MIPS_CP0_STATUS		MIPS_CP0_32(12, 0)
 #define KVM_REG_MIPS_CP0_CAUSE		MIPS_CP0_32(13, 0)
 #define KVM_REG_MIPS_CP0_EPC		MIPS_CP0_64(14, 0)
+#define KVM_REG_MIPS_CP0_PRID		MIPS_CP0_32(15, 0)
 #define KVM_REG_MIPS_CP0_EBASE		MIPS_CP0_64(15, 1)
 #define KVM_REG_MIPS_CP0_CONFIG		MIPS_CP0_32(16, 0)
 #define KVM_REG_MIPS_CP0_CONFIG1	MIPS_CP0_32(16, 1)
 #define KVM_REG_MIPS_CP0_CONFIG2	MIPS_CP0_32(16, 2)
 #define KVM_REG_MIPS_CP0_CONFIG3	MIPS_CP0_32(16, 3)
+#define KVM_REG_MIPS_CP0_CONFIG4	MIPS_CP0_32(16, 4)
+#define KVM_REG_MIPS_CP0_CONFIG5	MIPS_CP0_32(16, 5)
 #define KVM_REG_MIPS_CP0_CONFIG7	MIPS_CP0_32(16, 7)
 #define KVM_REG_MIPS_CP0_XCONTEXT	MIPS_CP0_64(20, 0)
 #define KVM_REG_MIPS_CP0_ERROREPC	MIPS_CP0_64(30, 0)
@@ -119,6 +122,10 @@ struct kvm_vcpu_stat {
 	u32 syscall_exits;
 	u32 resvd_inst_exits;
 	u32 break_inst_exits;
+	u32 trap_inst_exits;
+	u32 msa_fpe_exits;
+	u32 fpe_exits;
+	u32 msa_disabled_exits;
 	u32 flush_dcache_exits;
 	u32 halt_successful_poll;
 	u32 halt_wakeup;
@@ -138,6 +145,10 @@ enum kvm_mips_exit_types {
 	SYSCALL_EXITS,
 	RESVD_INST_EXITS,
 	BREAK_INST_EXITS,
+	TRAP_INST_EXITS,
+	MSA_FPE_EXITS,
+	FPE_EXITS,
+	MSA_DISABLED_EXITS,
 	FLUSH_DCACHE_EXITS,
 	MAX_KVM_MIPS_EXIT_TYPES
 };
@@ -206,6 +217,8 @@ struct mips_coproc {
 #define MIPS_CP0_CONFIG1_SEL	1
 #define MIPS_CP0_CONFIG2_SEL	2
 #define MIPS_CP0_CONFIG3_SEL	3
+#define MIPS_CP0_CONFIG4_SEL	4
+#define MIPS_CP0_CONFIG5_SEL	5
 
 /* Config0 register bits */
 #define CP0C0_M			31
@@ -262,31 +275,6 @@ struct mips_coproc {
 #define CP0C3_SM		1
 #define CP0C3_TL		0
 
-/* Have config1, Cacheable, noncoherent, write-back, write allocate*/
-#define MIPS_CONFIG0						\
-  ((1 << CP0C0_M) | (0x3 << CP0C0_K0))
-
-/* Have config2, no coprocessor2 attached, no MDMX support attached,
-   no performance counters, watch registers present,
-   no code compression, EJTAG present, no FPU, no watch registers */
-#define MIPS_CONFIG1						\
-((1 << CP0C1_M) |						\
- (0 << CP0C1_C2) | (0 << CP0C1_MD) | (0 << CP0C1_PC) |		\
- (0 << CP0C1_WR) | (0 << CP0C1_CA) | (1 << CP0C1_EP) |		\
- (0 << CP0C1_FP))
-
-/* Have config3, no tertiary/secondary caches implemented */
-#define MIPS_CONFIG2						\
-((1 << CP0C2_M))
-
-/* No config4, no DSP ASE, no large physaddr (PABITS),
-   no external interrupt controller, no vectored interrupts,
-   no 1kb pages, no SmartMIPS ASE, no trace logic */
-#define MIPS_CONFIG3						\
-((0 << CP0C3_M) | (0 << CP0C3_DSPP) | (0 << CP0C3_LPA) |	\
- (0 << CP0C3_VEIC) | (0 << CP0C3_VInt) | (0 << CP0C3_SP) |	\
- (0 << CP0C3_SM) | (0 << CP0C3_TL))
-
 /* MMU types, the first four entries have the same layout as the
    CP0C0_MT field.  */
 enum mips_mmu_types {
@@ -321,7 +309,9 @@ enum mips_mmu_types {
  */
 #define T_TRAP			13	/* Trap instruction */
 #define T_VCEI			14	/* Virtual coherency exception */
+#define T_MSAFPE		14	/* MSA floating point exception */
 #define T_FPE			15	/* Floating point exception */
+#define T_MSADIS		21	/* MSA disabled exception */
 #define T_WATCH			23	/* Watch address reference */
 #define T_VCED			31	/* Virtual coherency data */
 
@@ -374,6 +364,9 @@ struct kvm_mips_tlb {
 	long tlb_lo1;
 };
 
+#define KVM_MIPS_FPU_FPU	0x1
+#define KVM_MIPS_FPU_MSA	0x2
+
 #define KVM_MIPS_GUEST_TLB_SIZE	64
 struct kvm_vcpu_arch {
 	void *host_ebase, *guest_ebase;
@@ -395,6 +388,8 @@ struct kvm_vcpu_arch {
 
 	/* FPU State */
 	struct mips_fpu_struct fpu;
+	/* Which FPU state is loaded (KVM_MIPS_FPU_*) */
+	unsigned int fpu_inuse;
 
 	/* COP0 State */
 	struct mips_coproc *cop0;
@@ -441,6 +436,9 @@ struct kvm_vcpu_arch {
 
 	/* WAIT executed */
 	int wait;
+
+	u8 fpu_enabled;
+	u8 msa_enabled;
 };
 
 
@@ -482,11 +480,15 @@ struct kvm_vcpu_arch {
 #define kvm_read_c0_guest_config1(cop0)		(cop0->reg[MIPS_CP0_CONFIG][1])
 #define kvm_read_c0_guest_config2(cop0)		(cop0->reg[MIPS_CP0_CONFIG][2])
 #define kvm_read_c0_guest_config3(cop0)		(cop0->reg[MIPS_CP0_CONFIG][3])
+#define kvm_read_c0_guest_config4(cop0)		(cop0->reg[MIPS_CP0_CONFIG][4])
+#define kvm_read_c0_guest_config5(cop0)		(cop0->reg[MIPS_CP0_CONFIG][5])
 #define kvm_read_c0_guest_config7(cop0)		(cop0->reg[MIPS_CP0_CONFIG][7])
 #define kvm_write_c0_guest_config(cop0, val)	(cop0->reg[MIPS_CP0_CONFIG][0] = (val))
 #define kvm_write_c0_guest_config1(cop0, val)	(cop0->reg[MIPS_CP0_CONFIG][1] = (val))
 #define kvm_write_c0_guest_config2(cop0, val)	(cop0->reg[MIPS_CP0_CONFIG][2] = (val))
 #define kvm_write_c0_guest_config3(cop0, val)	(cop0->reg[MIPS_CP0_CONFIG][3] = (val))
+#define kvm_write_c0_guest_config4(cop0, val)	(cop0->reg[MIPS_CP0_CONFIG][4] = (val))
+#define kvm_write_c0_guest_config5(cop0, val)	(cop0->reg[MIPS_CP0_CONFIG][5] = (val))
 #define kvm_write_c0_guest_config7(cop0, val)	(cop0->reg[MIPS_CP0_CONFIG][7] = (val))
 #define kvm_read_c0_guest_errorepc(cop0)	(cop0->reg[MIPS_CP0_ERROR_PC][0])
 #define kvm_write_c0_guest_errorepc(cop0, val)	(cop0->reg[MIPS_CP0_ERROR_PC][0] = (val))
@@ -567,6 +569,31 @@ static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg,
 	kvm_set_c0_guest_ebase(cop0, ((val) & (change)));		\
 }
 
+/* Helpers */
+
+static inline bool kvm_mips_guest_can_have_fpu(struct kvm_vcpu_arch *vcpu)
+{
+	return (!__builtin_constant_p(cpu_has_fpu) || cpu_has_fpu) &&
+		vcpu->fpu_enabled;
+}
+
+static inline bool kvm_mips_guest_has_fpu(struct kvm_vcpu_arch *vcpu)
+{
+	return kvm_mips_guest_can_have_fpu(vcpu) &&
+		kvm_read_c0_guest_config1(vcpu->cop0) & MIPS_CONF1_FP;
+}
+
+static inline bool kvm_mips_guest_can_have_msa(struct kvm_vcpu_arch *vcpu)
+{
+	return (!__builtin_constant_p(cpu_has_msa) || cpu_has_msa) &&
+		vcpu->msa_enabled;
+}
+
+static inline bool kvm_mips_guest_has_msa(struct kvm_vcpu_arch *vcpu)
+{
+	return kvm_mips_guest_can_have_msa(vcpu) &&
+		kvm_read_c0_guest_config3(vcpu->cop0) & MIPS_CONF3_MSA;
+}
 
 struct kvm_mips_callbacks {
 	int (*handle_cop_unusable)(struct kvm_vcpu *vcpu);
@@ -578,6 +605,10 @@ struct kvm_mips_callbacks {
 	int (*handle_syscall)(struct kvm_vcpu *vcpu);
 	int (*handle_res_inst)(struct kvm_vcpu *vcpu);
 	int (*handle_break)(struct kvm_vcpu *vcpu);
+	int (*handle_trap)(struct kvm_vcpu *vcpu);
+	int (*handle_msa_fpe)(struct kvm_vcpu *vcpu);
+	int (*handle_fpe)(struct kvm_vcpu *vcpu);
+	int (*handle_msa_disabled)(struct kvm_vcpu *vcpu);
 	int (*vm_init)(struct kvm *kvm);
 	int (*vcpu_init)(struct kvm_vcpu *vcpu);
 	int (*vcpu_setup)(struct kvm_vcpu *vcpu);
@@ -596,6 +627,8 @@ struct kvm_mips_callbacks {
 			   const struct kvm_one_reg *reg, s64 *v);
 	int (*set_one_reg)(struct kvm_vcpu *vcpu,
 			   const struct kvm_one_reg *reg, s64 v);
+	int (*vcpu_get_regs)(struct kvm_vcpu *vcpu);
+	int (*vcpu_set_regs)(struct kvm_vcpu *vcpu);
 };
 extern struct kvm_mips_callbacks *kvm_mips_callbacks;
 int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks);
@@ -606,6 +639,19 @@ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu);
 /* Trampoline ASM routine to start running in "Guest" context */
 extern int __kvm_mips_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu);
 
+/* FPU/MSA context management */
+void __kvm_save_fpu(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_fpu(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_fcsr(struct kvm_vcpu_arch *vcpu);
+void __kvm_save_msa(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_msa(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_msa_upper(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_msacsr(struct kvm_vcpu_arch *vcpu);
+void kvm_own_fpu(struct kvm_vcpu *vcpu);
+void kvm_own_msa(struct kvm_vcpu *vcpu);
+void kvm_drop_fpu(struct kvm_vcpu *vcpu);
+void kvm_lose_fpu(struct kvm_vcpu *vcpu);
+
 /* TLB handling */
 uint32_t kvm_get_kernel_asid(struct kvm_vcpu *vcpu);
 
@@ -711,6 +757,26 @@ extern enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause,
 						     struct kvm_run *run,
 						     struct kvm_vcpu *vcpu);
 
+extern enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause,
+						       uint32_t *opc,
+						       struct kvm_run *run,
+						       struct kvm_vcpu *vcpu);
+
+extern enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause,
+							 uint32_t *opc,
+							 struct kvm_run *run,
+							 struct kvm_vcpu *vcpu);
+
+extern enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause,
+						      uint32_t *opc,
+						      struct kvm_run *run,
+						      struct kvm_vcpu *vcpu);
+
+extern enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause,
+							 uint32_t *opc,
+							 struct kvm_run *run,
+							 struct kvm_vcpu *vcpu);
+
 extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
 							 struct kvm_run *run);
 
@@ -749,6 +815,11 @@ enum emulation_result kvm_mips_emulate_load(uint32_t inst,
 					    struct kvm_run *run,
 					    struct kvm_vcpu *vcpu);
 
+unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu);
+unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu);
+unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu);
+unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu);
+
 /* Dynamic binary translation */
 extern int kvm_mips_trans_cache_index(uint32_t inst, uint32_t *opc,
 				      struct kvm_vcpu *vcpu);
diff --git a/arch/mips/include/asm/mach-ar7/war.h b/arch/mips/include/asm/mach-ar7/war.h
deleted file mode 100644
index 99071e50faab..000000000000
--- a/arch/mips/include/asm/mach-ar7/war.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_AR7_WAR_H
-#define __ASM_MIPS_MACH_AR7_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR	0
-#define R4600_V1_HIT_CACHEOP_WAR	0
-#define R4600_V2_HIT_CACHEOP_WAR	0
-#define R5432_CP0_INTERRUPT_WAR		0
-#define BCM1250_M3_WAR			0
-#define SIBYTE_1956_WAR			0
-#define MIPS4K_ICACHE_REFILL_WAR	0
-#define MIPS_CACHE_SYNC_WAR		0
-#define TX49XX_ICACHE_INDEX_INV_WAR	0
-#define ICACHE_REFILLS_WORKAROUND_WAR	0
-#define R10000_LLSC_WAR			0
-#define MIPS34K_MISSED_ITLB_WAR		0
-
-#endif /* __ASM_MIPS_MACH_AR7_WAR_H */
diff --git a/arch/mips/include/asm/mach-ath25/dma-coherence.h b/arch/mips/include/asm/mach-ath25/dma-coherence.h
index d8009c93a465..d5defdde32db 100644
--- a/arch/mips/include/asm/mach-ath25/dma-coherence.h
+++ b/arch/mips/include/asm/mach-ath25/dma-coherence.h
@@ -59,16 +59,6 @@ static inline int plat_dma_supported(struct device *dev, u64 mask)
 	return 1;
 }
 
-static inline void plat_extra_sync_for_device(struct device *dev)
-{
-}
-
-static inline int plat_dma_mapping_error(struct device *dev,
-					 dma_addr_t dma_addr)
-{
-	return 0;
-}
-
 static inline int plat_device_is_coherent(struct device *dev)
 {
 #ifdef CONFIG_DMA_COHERENT
@@ -79,4 +69,8 @@ static inline int plat_device_is_coherent(struct device *dev)
 #endif
 }
 
+static inline void plat_post_dma_flush(struct device *dev)
+{
+}
+
 #endif /* __ASM_MACH_ATH25_DMA_COHERENCE_H */
diff --git a/arch/mips/include/asm/mach-ath25/war.h b/arch/mips/include/asm/mach-ath25/war.h
deleted file mode 100644
index e3a5250ebd67..000000000000
--- a/arch/mips/include/asm/mach-ath25/war.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2008 Felix Fietkau <nbd@openwrt.org>
- */
-#ifndef __ASM_MACH_ATH25_WAR_H
-#define __ASM_MACH_ATH25_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR	0
-#define R4600_V1_HIT_CACHEOP_WAR	0
-#define R4600_V2_HIT_CACHEOP_WAR	0
-#define R5432_CP0_INTERRUPT_WAR		0
-#define BCM1250_M3_WAR			0
-#define SIBYTE_1956_WAR			0
-#define MIPS4K_ICACHE_REFILL_WAR	0
-#define MIPS_CACHE_SYNC_WAR		0
-#define TX49XX_ICACHE_INDEX_INV_WAR	0
-#define RM9000_CDEX_SMP_WAR		0
-#define ICACHE_REFILLS_WORKAROUND_WAR	0
-#define R10000_LLSC_WAR			0
-#define MIPS34K_MISSED_ITLB_WAR		0
-
-#endif /* __ASM_MACH_ATH25_WAR_H */
diff --git a/arch/mips/include/asm/mach-au1x00/war.h b/arch/mips/include/asm/mach-au1x00/war.h
deleted file mode 100644
index 72e260d24e59..000000000000
--- a/arch/mips/include/asm/mach-au1x00/war.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_AU1X00_WAR_H
-#define __ASM_MIPS_MACH_AU1X00_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR	0
-#define R4600_V1_HIT_CACHEOP_WAR	0
-#define R4600_V2_HIT_CACHEOP_WAR	0
-#define R5432_CP0_INTERRUPT_WAR		0
-#define BCM1250_M3_WAR			0
-#define SIBYTE_1956_WAR			0
-#define MIPS4K_ICACHE_REFILL_WAR	0
-#define MIPS_CACHE_SYNC_WAR		0
-#define TX49XX_ICACHE_INDEX_INV_WAR	0
-#define ICACHE_REFILLS_WORKAROUND_WAR	0
-#define R10000_LLSC_WAR			0
-#define MIPS34K_MISSED_ITLB_WAR		0
-
-#endif /* __ASM_MIPS_MACH_AU1X00_WAR_H */
diff --git a/arch/mips/include/asm/mach-bcm3384/war.h b/arch/mips/include/asm/mach-bcm3384/war.h
deleted file mode 100644
index 59d7599059b0..000000000000
--- a/arch/mips/include/asm/mach-bcm3384/war.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_BCM3384_WAR_H
-#define __ASM_MIPS_MACH_BCM3384_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR	0
-#define R4600_V1_HIT_CACHEOP_WAR	0
-#define R4600_V2_HIT_CACHEOP_WAR	0
-#define R5432_CP0_INTERRUPT_WAR		0
-#define BCM1250_M3_WAR			0
-#define SIBYTE_1956_WAR			0
-#define MIPS4K_ICACHE_REFILL_WAR	0
-#define MIPS_CACHE_SYNC_WAR		0
-#define TX49XX_ICACHE_INDEX_INV_WAR	0
-#define ICACHE_REFILLS_WORKAROUND_WAR	0
-#define R10000_LLSC_WAR			0
-#define MIPS34K_MISSED_ITLB_WAR		0
-
-#endif /* __ASM_MIPS_MACH_BCM3384_WAR_H */
diff --git a/arch/mips/include/asm/mach-bcm47xx/bcm47xx.h b/arch/mips/include/asm/mach-bcm47xx/bcm47xx.h
index 7527c1d33d02..8ed77f618940 100644
--- a/arch/mips/include/asm/mach-bcm47xx/bcm47xx.h
+++ b/arch/mips/include/asm/mach-bcm47xx/bcm47xx.h
@@ -22,6 +22,7 @@
 #include <linux/ssb/ssb.h>
 #include <linux/bcma/bcma.h>
 #include <linux/bcma/bcma_soc.h>
+#include <linux/bcm47xx_nvram.h>
 
 enum bcm47xx_bus_type {
 #ifdef CONFIG_BCM47XX_SSB
diff --git a/arch/mips/include/asm/mach-bcm47xx/bcm47xx_board.h b/arch/mips/include/asm/mach-bcm47xx/bcm47xx_board.h
index 1f5643b89a91..c41d1dce1062 100644
--- a/arch/mips/include/asm/mach-bcm47xx/bcm47xx_board.h
+++ b/arch/mips/include/asm/mach-bcm47xx/bcm47xx_board.h
@@ -67,6 +67,7 @@ enum bcm47xx_board {
 	BCM47XX_BOARD_LINKSYS_WRT150NV11,
 	BCM47XX_BOARD_LINKSYS_WRT160NV1,
 	BCM47XX_BOARD_LINKSYS_WRT160NV3,
+	BCM47XX_BOARD_LINKSYS_WRT300N_V1,
 	BCM47XX_BOARD_LINKSYS_WRT300NV11,
 	BCM47XX_BOARD_LINKSYS_WRT310NV1,
 	BCM47XX_BOARD_LINKSYS_WRT310NV2,
@@ -74,6 +75,7 @@ enum bcm47xx_board {
 	BCM47XX_BOARD_LINKSYS_WRT54G_TYPE_0101,
 	BCM47XX_BOARD_LINKSYS_WRT54G_TYPE_0467,
 	BCM47XX_BOARD_LINKSYS_WRT54G_TYPE_0708,
+	BCM47XX_BOARD_LINKSYS_WRT600N_V11,
 	BCM47XX_BOARD_LINKSYS_WRT610NV1,
 	BCM47XX_BOARD_LINKSYS_WRT610NV2,
 	BCM47XX_BOARD_LINKSYS_WRTSL54GS,
@@ -86,9 +88,11 @@ enum bcm47xx_board {
 
 	BCM47XX_BOARD_NETGEAR_WGR614V8,
 	BCM47XX_BOARD_NETGEAR_WGR614V9,
+	BCM47XX_BOARD_NETGEAR_WGR614_V10,
 	BCM47XX_BOARD_NETGEAR_WNDR3300,
 	BCM47XX_BOARD_NETGEAR_WNDR3400V1,
 	BCM47XX_BOARD_NETGEAR_WNDR3400V2,
+	BCM47XX_BOARD_NETGEAR_WNDR3400_V3,
 	BCM47XX_BOARD_NETGEAR_WNDR3400VCNA,
 	BCM47XX_BOARD_NETGEAR_WNDR3700V3,
 	BCM47XX_BOARD_NETGEAR_WNDR4000,
diff --git a/arch/mips/include/asm/mach-bcm47xx/bcm47xx_nvram.h b/arch/mips/include/asm/mach-bcm47xx/bcm47xx_nvram.h
deleted file mode 100644
index ee59ffe99922..000000000000
--- a/arch/mips/include/asm/mach-bcm47xx/bcm47xx_nvram.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- *  Copyright (C) 2005, Broadcom Corporation
- *  Copyright (C) 2006, Felix Fietkau <nbd@openwrt.org>
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- */
-
-#ifndef __BCM47XX_NVRAM_H
-#define __BCM47XX_NVRAM_H
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-
-int bcm47xx_nvram_init_from_mem(u32 base, u32 lim);
-int bcm47xx_nvram_getenv(const char *name, char *val, size_t val_len);
-int bcm47xx_nvram_gpio_pin(const char *name);
-
-#endif /* __BCM47XX_NVRAM_H */
diff --git a/arch/mips/include/asm/mach-bcm47xx/war.h b/arch/mips/include/asm/mach-bcm47xx/war.h
deleted file mode 100644
index a3d2f448b10e..000000000000
--- a/arch/mips/include/asm/mach-bcm47xx/war.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_BCM47XX_WAR_H
-#define __ASM_MIPS_MACH_BCM47XX_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR	0
-#define R4600_V1_HIT_CACHEOP_WAR	0
-#define R4600_V2_HIT_CACHEOP_WAR	0
-#define R5432_CP0_INTERRUPT_WAR		0
-#define BCM1250_M3_WAR			0
-#define SIBYTE_1956_WAR			0
-#define MIPS4K_ICACHE_REFILL_WAR	0
-#define MIPS_CACHE_SYNC_WAR		0
-#define TX49XX_ICACHE_INDEX_INV_WAR	0
-#define ICACHE_REFILLS_WORKAROUND_WAR	0
-#define R10000_LLSC_WAR			0
-#define MIPS34K_MISSED_ITLB_WAR		0
-
-#endif /* __ASM_MIPS_MACH_BCM47XX_WAR_H */
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
index 4794067cb5a7..5035f09c5427 100644
--- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
@@ -1259,20 +1259,6 @@
 #define M2M_DSTID_REG(x)		((x) * 0x40 + 0x18)
 
 /*************************************************************************
- * _REG relative to RSET_RNG
- *************************************************************************/
-
-#define RNG_CTRL			0x00
-#define RNG_EN				(1 << 0)
-
-#define RNG_STAT			0x04
-#define RNG_AVAIL_MASK			(0xff000000)
-
-#define RNG_DATA			0x08
-#define RNG_THRES			0x0c
-#define RNG_MASK			0x10
-
-/*************************************************************************
  * _REG relative to RSET_SPI
  *************************************************************************/
 
diff --git a/arch/mips/include/asm/mach-bcm63xx/dma-coherence.h b/arch/mips/include/asm/mach-bcm63xx/dma-coherence.h
new file mode 100644
index 000000000000..11d3b572b1b3
--- /dev/null
+++ b/arch/mips/include/asm/mach-bcm63xx/dma-coherence.h
@@ -0,0 +1,10 @@
+#ifndef __ASM_MACH_BCM63XX_DMA_COHERENCE_H
+#define __ASM_MACH_BCM63XX_DMA_COHERENCE_H
+
+#include <asm/bmips.h>
+
+#define plat_post_dma_flush	bmips_post_dma_flush
+
+#include <asm/mach-generic/dma-coherence.h>
+
+#endif /* __ASM_MACH_BCM63XX_DMA_COHERENCE_H */
diff --git a/arch/mips/include/asm/mach-bcm63xx/war.h b/arch/mips/include/asm/mach-bcm63xx/war.h
deleted file mode 100644
index 05ee8671bef1..000000000000
--- a/arch/mips/include/asm/mach-bcm63xx/war.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_BCM63XX_WAR_H
-#define __ASM_MIPS_MACH_BCM63XX_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR	0
-#define R4600_V1_HIT_CACHEOP_WAR	0
-#define R4600_V2_HIT_CACHEOP_WAR	0
-#define R5432_CP0_INTERRUPT_WAR		0
-#define BCM1250_M3_WAR			0
-#define SIBYTE_1956_WAR			0
-#define MIPS4K_ICACHE_REFILL_WAR	0
-#define MIPS_CACHE_SYNC_WAR		0
-#define TX49XX_ICACHE_INDEX_INV_WAR	0
-#define ICACHE_REFILLS_WORKAROUND_WAR	0
-#define R10000_LLSC_WAR			0
-#define MIPS34K_MISSED_ITLB_WAR		0
-
-#endif /* __ASM_MIPS_MACH_BCM63XX_WAR_H */
diff --git a/arch/mips/include/asm/mach-bcm3384/dma-coherence.h b/arch/mips/include/asm/mach-bmips/dma-coherence.h
index a3be8e50e1f0..d29781f02285 100644
--- a/arch/mips/include/asm/mach-bcm3384/dma-coherence.h
+++ b/arch/mips/include/asm/mach-bmips/dma-coherence.h
@@ -12,8 +12,12 @@
  * GNU General Public License for more details.
  */
 
-#ifndef __ASM_MACH_BCM3384_DMA_COHERENCE_H
-#define __ASM_MACH_BCM3384_DMA_COHERENCE_H
+#ifndef __ASM_MACH_BMIPS_DMA_COHERENCE_H
+#define __ASM_MACH_BMIPS_DMA_COHERENCE_H
+
+#include <asm/bmips.h>
+#include <asm/cpu-type.h>
+#include <asm/cpu.h>
 
 struct device;
 
@@ -45,4 +49,6 @@ static inline int plat_device_is_coherent(struct device *dev)
 	return 0;
 }
 
-#endif /* __ASM_MACH_BCM3384_DMA_COHERENCE_H */
+#define plat_post_dma_flush	bmips_post_dma_flush
+
+#endif /* __ASM_MACH_BMIPS_DMA_COHERENCE_H */
diff --git a/arch/mips/include/asm/mach-bmips/spaces.h b/arch/mips/include/asm/mach-bmips/spaces.h
new file mode 100644
index 000000000000..1b05bddc8ec5
--- /dev/null
+++ b/arch/mips/include/asm/mach-bmips/spaces.h
@@ -0,0 +1,18 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1994 - 1999, 2000, 03, 04 Ralf Baechle
+ * Copyright (C) 2000, 2002  Maciej W. Rozycki
+ * Copyright (C) 1990, 1999, 2000 Silicon Graphics, Inc.
+ */
+#ifndef _ASM_BMIPS_SPACES_H
+#define _ASM_BMIPS_SPACES_H
+
+/* Avoid collisions with system base register (SBR) region on BMIPS3300 */
+#define FIXADDR_TOP		((unsigned long)(long)(int)0xff000000)
+
+#include <asm/mach-generic/spaces.h>
+
+#endif /* __ASM_BMIPS_SPACES_H */
diff --git a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
index fa1f3cfbae8d..d68e685cde60 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
@@ -50,7 +50,6 @@
 #define cpu_has_mips32r2	0
 #define cpu_has_mips64r1	0
 #define cpu_has_mips64r2	1
-#define cpu_has_mips_r2_exec_hazard 0
 #define cpu_has_dsp		0
 #define cpu_has_dsp2		0
 #define cpu_has_mipsmt		0
diff --git a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
index f9f448650505..460042ee5d6f 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
@@ -57,6 +57,10 @@ static inline int plat_device_is_coherent(struct device *dev)
 	return 1;
 }
 
+static inline void plat_post_dma_flush(struct device *dev)
+{
+}
+
 dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
 phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
 
diff --git a/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h b/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h
new file mode 100644
index 000000000000..374eefafb320
--- /dev/null
+++ b/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h
@@ -0,0 +1,74 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2003, 2004 Ralf Baechle
+ */
+#ifndef __ASM_MACH_GENERIC_MANGLE_PORT_H
+#define __ASM_MACH_GENERIC_MANGLE_PORT_H
+
+#include <asm/byteorder.h>
+
+#ifdef __BIG_ENDIAN
+
+# define __swizzle_addr_b(port)	(port)
+# define __swizzle_addr_w(port)	(port)
+# define __swizzle_addr_l(port)	(port)
+# define __swizzle_addr_q(port)	(port)
+
+#else /* __LITTLE_ENDIAN */
+
+static inline bool __should_swizzle_addr(unsigned long p)
+{
+	/* boot bus? */
+	return ((p >> 40) & 0xff) == 0;
+}
+
+# define __swizzle_addr_b(port)	\
+	(__should_swizzle_addr(port) ? (port) ^ 7 : (port))
+# define __swizzle_addr_w(port)	\
+	(__should_swizzle_addr(port) ? (port) ^ 6 : (port))
+# define __swizzle_addr_l(port)	\
+	(__should_swizzle_addr(port) ? (port) ^ 4 : (port))
+# define __swizzle_addr_q(port)	(port)
+
+#endif /* __BIG_ENDIAN */
+
+/*
+ * Sane hardware offers swapping of PCI/ISA I/O space accesses in hardware;
+ * less sane hardware forces software to fiddle with this...
+ *
+ * Regardless, if the host bus endianness mismatches that of PCI/ISA, then
+ * you can't have the numerical value of data and byte addresses within
+ * multibyte quantities both preserved at the same time.  Hence two
+ * variations of functions: non-prefixed ones that preserve the value
+ * and prefixed ones that preserve byte addresses.  The latters are
+ * typically used for moving raw data between a peripheral and memory (cf.
+ * string I/O functions), hence the "__mem_" prefix.
+ */
+#if defined(CONFIG_SWAP_IO_SPACE)
+
+# define ioswabb(a, x)		(x)
+# define __mem_ioswabb(a, x)	(x)
+# define ioswabw(a, x)		le16_to_cpu(x)
+# define __mem_ioswabw(a, x)	(x)
+# define ioswabl(a, x)		le32_to_cpu(x)
+# define __mem_ioswabl(a, x)	(x)
+# define ioswabq(a, x)		le64_to_cpu(x)
+# define __mem_ioswabq(a, x)	(x)
+
+#else
+
+# define ioswabb(a, x)		(x)
+# define __mem_ioswabb(a, x)	(x)
+# define ioswabw(a, x)		(x)
+# define __mem_ioswabw(a, x)	cpu_to_le16(x)
+# define ioswabl(a, x)		(x)
+# define __mem_ioswabl(a, x)	cpu_to_le32(x)
+# define ioswabq(a, x)		(x)
+# define __mem_ioswabq(a, x)	cpu_to_le32(x)
+
+#endif
+
+#endif /* __ASM_MACH_GENERIC_MANGLE_PORT_H */
diff --git a/arch/mips/include/asm/mach-cobalt/cpu-feature-overrides.h b/arch/mips/include/asm/mach-cobalt/cpu-feature-overrides.h
index 71d4bface1dc..30c5cd9fd973 100644
--- a/arch/mips/include/asm/mach-cobalt/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-cobalt/cpu-feature-overrides.h
@@ -14,7 +14,6 @@
 #define cpu_has_3k_cache	0
 #define cpu_has_4k_cache	1
 #define cpu_has_tx39_cache	0
-#define cpu_has_fpu		1
 #define cpu_has_32fpr		1
 #define cpu_has_counter		1
 #define cpu_has_watch		0
diff --git a/arch/mips/include/asm/mach-cobalt/war.h b/arch/mips/include/asm/mach-cobalt/war.h
deleted file mode 100644
index 34ae4046541e..000000000000
--- a/arch/mips/include/asm/mach-cobalt/war.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_COBALT_WAR_H
-#define __ASM_MIPS_MACH_COBALT_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR	0
-#define R4600_V1_HIT_CACHEOP_WAR	0
-#define R4600_V2_HIT_CACHEOP_WAR	0
-#define R5432_CP0_INTERRUPT_WAR		0
-#define BCM1250_M3_WAR			0
-#define SIBYTE_1956_WAR			0
-#define MIPS4K_ICACHE_REFILL_WAR	0
-#define MIPS_CACHE_SYNC_WAR		0
-#define TX49XX_ICACHE_INDEX_INV_WAR	0
-#define ICACHE_REFILLS_WORKAROUND_WAR	0
-#define R10000_LLSC_WAR			0
-#define MIPS34K_MISSED_ITLB_WAR		0
-
-#endif /* __ASM_MIPS_MACH_COBALT_WAR_H */
diff --git a/arch/mips/include/asm/mach-dec/cpu-feature-overrides.h b/arch/mips/include/asm/mach-dec/cpu-feature-overrides.h
index acce27fd2bb8..bdf045fb00c8 100644
--- a/arch/mips/include/asm/mach-dec/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-dec/cpu-feature-overrides.h
@@ -15,7 +15,6 @@
 /* Generic ones first.  */
 #define cpu_has_tlb			1
 #define cpu_has_tx39_cache		0
-#define cpu_has_fpu			1
 #define cpu_has_divec			0
 #define cpu_has_prefetch		0
 #define cpu_has_mcheck			0
diff --git a/arch/mips/include/asm/mach-dec/war.h b/arch/mips/include/asm/mach-dec/war.h
deleted file mode 100644
index d29996feb3e7..000000000000
--- a/arch/mips/include/asm/mach-dec/war.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_DEC_WAR_H
-#define __ASM_MIPS_MACH_DEC_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR	0
-#define R4600_V1_HIT_CACHEOP_WAR	0
-#define R4600_V2_HIT_CACHEOP_WAR	0
-#define R5432_CP0_INTERRUPT_WAR		0
-#define BCM1250_M3_WAR			0
-#define SIBYTE_1956_WAR			0
-#define MIPS4K_ICACHE_REFILL_WAR	0
-#define MIPS_CACHE_SYNC_WAR		0
-#define TX49XX_ICACHE_INDEX_INV_WAR	0
-#define ICACHE_REFILLS_WORKAROUND_WAR	0
-#define R10000_LLSC_WAR			0
-#define MIPS34K_MISSED_ITLB_WAR		0
-
-#endif /* __ASM_MIPS_MACH_DEC_WAR_H */
diff --git a/arch/mips/include/asm/mach-emma2rh/war.h b/arch/mips/include/asm/mach-emma2rh/war.h
deleted file mode 100644
index 79ae82da3ec7..000000000000
--- a/arch/mips/include/asm/mach-emma2rh/war.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_EMMA2RH_WAR_H
-#define __ASM_MIPS_MACH_EMMA2RH_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR	0
-#define R4600_V1_HIT_CACHEOP_WAR	0
-#define R4600_V2_HIT_CACHEOP_WAR	0
-#define R5432_CP0_INTERRUPT_WAR		0
-#define BCM1250_M3_WAR			0
-#define SIBYTE_1956_WAR			0
-#define MIPS4K_ICACHE_REFILL_WAR	0
-#define MIPS_CACHE_SYNC_WAR		0
-#define TX49XX_ICACHE_INDEX_INV_WAR	0
-#define ICACHE_REFILLS_WORKAROUND_WAR	0
-#define R10000_LLSC_WAR			0
-#define MIPS34K_MISSED_ITLB_WAR		0
-
-#endif /* __ASM_MIPS_MACH_EMMA2RH_WAR_H */
diff --git a/arch/mips/include/asm/mach-generic/dma-coherence.h b/arch/mips/include/asm/mach-generic/dma-coherence.h
index 7629c35986f7..0f8a354fd468 100644
--- a/arch/mips/include/asm/mach-generic/dma-coherence.h
+++ b/arch/mips/include/asm/mach-generic/dma-coherence.h
@@ -52,6 +52,12 @@ static inline int plat_device_is_coherent(struct device *dev)
 	return coherentio;
 }
 
+#ifndef plat_post_dma_flush
+static inline void plat_post_dma_flush(struct device *dev)
+{
+}
+#endif
+
 #ifdef CONFIG_SWIOTLB
 static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
diff --git a/arch/mips/include/asm/mach-ath79/war.h b/arch/mips/include/asm/mach-generic/war.h
index 0bb30905fd5b..a1bc2e71f983 100644
--- a/arch/mips/include/asm/mach-ath79/war.h
+++ b/arch/mips/include/asm/mach-generic/war.h
@@ -5,8 +5,8 @@
  *
  * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
  */
-#ifndef __ASM_MACH_ATH79_WAR_H
-#define __ASM_MACH_ATH79_WAR_H
+#ifndef __ASM_MACH_GENERIC_WAR_H
+#define __ASM_MACH_GENERIC_WAR_H
 
 #define R4600_V1_INDEX_ICACHEOP_WAR	0
 #define R4600_V1_HIT_CACHEOP_WAR	0
@@ -21,4 +21,4 @@
 #define R10000_LLSC_WAR			0
 #define MIPS34K_MISSED_ITLB_WAR		0
 
-#endif /* __ASM_MACH_ATH79_WAR_H */
+#endif /* __ASM_MACH_GENERIC_WAR_H */
diff --git a/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h
index 1dfe47453ea4..9b19b72dba56 100644
--- a/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h
@@ -16,7 +16,6 @@
 #define cpu_has_tlb		1
 #define cpu_has_4kex		1
 #define cpu_has_4k_cache	1
-#define cpu_has_fpu		1
 #define cpu_has_32fpr		1
 #define cpu_has_counter		1
 #define cpu_has_mips16		0
diff --git a/arch/mips/include/asm/mach-ip27/dma-coherence.h b/arch/mips/include/asm/mach-ip27/dma-coherence.h
index 4ffddfdb5062..1daa64412569 100644
--- a/arch/mips/include/asm/mach-ip27/dma-coherence.h
+++ b/arch/mips/include/asm/mach-ip27/dma-coherence.h
@@ -58,6 +58,10 @@ static inline int plat_dma_supported(struct device *dev, u64 mask)
 	return 1;
 }
 
+static inline void plat_post_dma_flush(struct device *dev)
+{
+}
+
 static inline int plat_device_is_coherent(struct device *dev)
 {
 	return 1;		/* IP27 non-cohernet mode is unsupported */
diff --git a/arch/mips/include/asm/mach-ip32/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip32/cpu-feature-overrides.h
index 2e1ec6cfedd5..241409b78ff1 100644
--- a/arch/mips/include/asm/mach-ip32/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-ip32/cpu-feature-overrides.h
@@ -26,7 +26,6 @@
 /* Settings which are common for all ip32 CPUs */
 #define cpu_has_tlb		1
 #define cpu_has_4kex		1
-#define cpu_has_fpu		1
 #define cpu_has_32fpr		1
 #define cpu_has_counter		1
 #define cpu_has_mips16		0
diff --git a/arch/mips/include/asm/mach-ip32/dma-coherence.h b/arch/mips/include/asm/mach-ip32/dma-coherence.h
index 104cfbc3ed63..0a0b0e2ced60 100644
--- a/arch/mips/include/asm/mach-ip32/dma-coherence.h
+++ b/arch/mips/include/asm/mach-ip32/dma-coherence.h
@@ -80,6 +80,10 @@ static inline int plat_dma_supported(struct device *dev, u64 mask)
 	return 1;
 }
 
+static inline void plat_post_dma_flush(struct device *dev)
+{
+}
+
 static inline int plat_device_is_coherent(struct device *dev)
 {
 	return 0;		/* IP32 is non-cohernet */
diff --git a/arch/mips/include/asm/mach-ip32/mc146818rtc.h b/arch/mips/include/asm/mach-ip32/mc146818rtc.h
deleted file mode 100644
index 6b6bab43d5c1..000000000000
--- a/arch/mips/include/asm/mach-ip32/mc146818rtc.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1998, 2001, 03 by Ralf Baechle
- * Copyright (C) 2000 Harald Koerfgen
- *
- * RTC routines for IP32 style attached Dallas chip.
- */
-#ifndef __ASM_MACH_IP32_MC146818RTC_H
-#define __ASM_MACH_IP32_MC146818RTC_H
-
-#include <asm/ip32/mace.h>
-
-#define RTC_PORT(x)	(0x70 + (x))
-
-static unsigned char CMOS_READ(unsigned long addr)
-{
-	return mace->isa.rtc[addr << 8];
-}
-
-static inline void CMOS_WRITE(unsigned char data, unsigned long addr)
-{
-	mace->isa.rtc[addr << 8] = data;
-}
-
-/*
- * FIXME: Do it right. For now just assume that no one lives in 20th century
- * and no O2 user in 22th century ;-)
- */
-#define mc146818_decode_year(year) ((year) + 2000)
-
-#define RTC_ALWAYS_BCD	0
-
-#endif /* __ASM_MACH_IP32_MC146818RTC_H */
diff --git a/arch/mips/include/asm/mach-jazz/dma-coherence.h b/arch/mips/include/asm/mach-jazz/dma-coherence.h
index 949003ef97b3..dc347c25c343 100644
--- a/arch/mips/include/asm/mach-jazz/dma-coherence.h
+++ b/arch/mips/include/asm/mach-jazz/dma-coherence.h
@@ -48,6 +48,10 @@ static inline int plat_dma_supported(struct device *dev, u64 mask)
 	return 1;
 }
 
+static inline void plat_post_dma_flush(struct device *dev)
+{
+}
+
 static inline int plat_device_is_coherent(struct device *dev)
 {
 	return 0;
diff --git a/arch/mips/include/asm/mach-jazz/war.h b/arch/mips/include/asm/mach-jazz/war.h
deleted file mode 100644
index 5b18b9a3d0ec..000000000000
--- a/arch/mips/include/asm/mach-jazz/war.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_JAZZ_WAR_H
-#define __ASM_MIPS_MACH_JAZZ_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR	0
-#define R4600_V1_HIT_CACHEOP_WAR	0
-#define R4600_V2_HIT_CACHEOP_WAR	0
-#define R5432_CP0_INTERRUPT_WAR		0
-#define BCM1250_M3_WAR			0
-#define SIBYTE_1956_WAR			0
-#define MIPS4K_ICACHE_REFILL_WAR	0
-#define MIPS_CACHE_SYNC_WAR		0
-#define TX49XX_ICACHE_INDEX_INV_WAR	0
-#define ICACHE_REFILLS_WORKAROUND_WAR	0
-#define R10000_LLSC_WAR			0
-#define MIPS34K_MISSED_ITLB_WAR		0
-
-#endif /* __ASM_MIPS_MACH_JAZZ_WAR_H */
diff --git a/arch/mips/include/asm/mach-jz4740/war.h b/arch/mips/include/asm/mach-jz4740/war.h
deleted file mode 100644
index 9b511d323838..000000000000
--- a/arch/mips/include/asm/mach-jz4740/war.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_JZ4740_WAR_H
-#define __ASM_MIPS_MACH_JZ4740_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR	0
-#define R4600_V1_HIT_CACHEOP_WAR	0
-#define R4600_V2_HIT_CACHEOP_WAR	0
-#define R5432_CP0_INTERRUPT_WAR		0
-#define BCM1250_M3_WAR			0
-#define SIBYTE_1956_WAR			0
-#define MIPS4K_ICACHE_REFILL_WAR	0
-#define MIPS_CACHE_SYNC_WAR		0
-#define TX49XX_ICACHE_INDEX_INV_WAR	0
-#define ICACHE_REFILLS_WORKAROUND_WAR	0
-#define R10000_LLSC_WAR			0
-#define MIPS34K_MISSED_ITLB_WAR		0
-
-#endif /* __ASM_MIPS_MACH_JZ4740_WAR_H */
diff --git a/arch/mips/include/asm/mach-lantiq/war.h b/arch/mips/include/asm/mach-lantiq/war.h
deleted file mode 100644
index 358ca979c1bd..000000000000
--- a/arch/mips/include/asm/mach-lantiq/war.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- */
-#ifndef __ASM_MIPS_MACH_LANTIQ_WAR_H
-#define __ASM_MIPS_MACH_LANTIQ_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR	0
-#define R4600_V1_HIT_CACHEOP_WAR	0
-#define R4600_V2_HIT_CACHEOP_WAR	0
-#define R5432_CP0_INTERRUPT_WAR		0
-#define BCM1250_M3_WAR			0
-#define SIBYTE_1956_WAR			0
-#define MIPS4K_ICACHE_REFILL_WAR	0
-#define MIPS_CACHE_SYNC_WAR		0
-#define TX49XX_ICACHE_INDEX_INV_WAR	0
-#define ICACHE_REFILLS_WORKAROUND_WAR	0
-#define R10000_LLSC_WAR			0
-#define MIPS34K_MISSED_ITLB_WAR		0
-
-#endif
diff --git a/arch/mips/include/asm/mach-lasat/war.h b/arch/mips/include/asm/mach-lasat/war.h
deleted file mode 100644
index 741ae724adc6..000000000000
--- a/arch/mips/include/asm/mach-lasat/war.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_LASAT_WAR_H
-#define __ASM_MIPS_MACH_LASAT_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR	0
-#define R4600_V1_HIT_CACHEOP_WAR	0
-#define R4600_V2_HIT_CACHEOP_WAR	0
-#define R5432_CP0_INTERRUPT_WAR		0
-#define BCM1250_M3_WAR			0
-#define SIBYTE_1956_WAR			0
-#define MIPS4K_ICACHE_REFILL_WAR	0
-#define MIPS_CACHE_SYNC_WAR		0
-#define TX49XX_ICACHE_INDEX_INV_WAR	0
-#define ICACHE_REFILLS_WORKAROUND_WAR	0
-#define R10000_LLSC_WAR			0
-#define MIPS34K_MISSED_ITLB_WAR		0
-
-#endif /* __ASM_MIPS_MACH_LASAT_WAR_H */
diff --git a/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h
index 6d69332f21ec..acc376897e46 100644
--- a/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h
@@ -34,7 +34,6 @@
 #define cpu_has_dsp		0
 #define cpu_has_dsp2		0
 #define cpu_has_ejtag		0
-#define cpu_has_fpu		1
 #define cpu_has_ic_fills_f_dc	0
 #define cpu_has_inclusive_pcaches	1
 #define cpu_has_llsc		1
diff --git a/arch/mips/include/asm/mach-loongson/dma-coherence.h b/arch/mips/include/asm/mach-loongson/dma-coherence.h
index a90534161bd2..4bf4e19f72e8 100644
--- a/arch/mips/include/asm/mach-loongson/dma-coherence.h
+++ b/arch/mips/include/asm/mach-loongson/dma-coherence.h
@@ -78,4 +78,8 @@ static inline int plat_device_is_coherent(struct device *dev)
 #endif /* CONFIG_DMA_NONCOHERENT */
 }
 
+static inline void plat_post_dma_flush(struct device *dev)
+{
+}
+
 #endif /* __ASM_MACH_LOONGSON_DMA_COHERENCE_H */
diff --git a/arch/mips/include/asm/mach-loongson/gpio.h b/arch/mips/include/asm/mach-loongson/gpio.h
index 211a7b7138fe..b3b216904a9a 100644
--- a/arch/mips/include/asm/mach-loongson/gpio.h
+++ b/arch/mips/include/asm/mach-loongson/gpio.h
@@ -1,8 +1,9 @@
 /*
- * STLS2F GPIO Support
+ * Loongson GPIO Support
  *
  * Copyright (c) 2008  Richard Liu, STMicroelectronics <richard.liu@st.com>
  * Copyright (c) 2008-2010  Arnaud Patard <apatard@mandriva.com>
+ * Copyright (c) 2014  Huacai Chen <chenhc@lemote.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -10,14 +11,14 @@
  * (at your option) any later version.
  */
 
-#ifndef __STLS2F_GPIO_H
-#define __STLS2F_GPIO_H
+#ifndef __LOONGSON_GPIO_H
+#define __LOONGSON_GPIO_H
 
 #include <asm-generic/gpio.h>
 
-extern void gpio_set_value(unsigned gpio, int value);
-extern int gpio_get_value(unsigned gpio);
-extern int gpio_cansleep(unsigned gpio);
+#define gpio_get_value __gpio_get_value
+#define gpio_set_value __gpio_set_value
+#define gpio_cansleep __gpio_cansleep
 
 /* The chip can do interrupt
  * but it has not been tested and doc not clear
@@ -32,4 +33,4 @@ static inline int irq_to_gpio(int gpio)
 	return -EINVAL;
 }
 
-#endif				/* __STLS2F_GPIO_H */
+#endif	/* __LOONGSON_GPIO_H */
diff --git a/arch/mips/include/asm/mach-loongson/loongson.h b/arch/mips/include/asm/mach-loongson/loongson.h
index 5459ac09679f..9783103fd6f6 100644
--- a/arch/mips/include/asm/mach-loongson/loongson.h
+++ b/arch/mips/include/asm/mach-loongson/loongson.h
@@ -255,6 +255,10 @@ static inline void do_perfcnt_IRQ(void)
 extern u64 loongson_chipcfg[MAX_PACKAGES];
 #define LOONGSON_CHIPCFG(id) (*(volatile u32 *)(loongson_chipcfg[id]))
 
+/* Chip Temperature registor of each physical cpu package, PRid >= Loongson-3A */
+extern u64 loongson_chiptemp[MAX_PACKAGES];
+#define LOONGSON_CHIPTEMP(id) (*(volatile u32 *)(loongson_chiptemp[id]))
+
 /* Freq Control register of each physical cpu package, PRid >= Loongson-3B */
 extern u64 loongson_freqctrl[MAX_PACKAGES];
 #define LOONGSON_FREQCTRL(id) (*(volatile u32 *)(loongson_freqctrl[id]))
diff --git a/arch/mips/include/asm/mach-loongson/war.h b/arch/mips/include/asm/mach-loongson/war.h
deleted file mode 100644
index f2570df66bb5..000000000000
--- a/arch/mips/include/asm/mach-loongson/war.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MACH_LOONGSON_WAR_H
-#define __ASM_MACH_LOONGSON_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR	0
-#define R4600_V1_HIT_CACHEOP_WAR	0
-#define R4600_V2_HIT_CACHEOP_WAR	0
-#define R5432_CP0_INTERRUPT_WAR		0
-#define BCM1250_M3_WAR			0
-#define SIBYTE_1956_WAR			0
-#define MIPS4K_ICACHE_REFILL_WAR	0
-#define MIPS_CACHE_SYNC_WAR		0
-#define TX49XX_ICACHE_INDEX_INV_WAR	0
-#define ICACHE_REFILLS_WORKAROUND_WAR	0
-#define R10000_LLSC_WAR			0
-#define MIPS34K_MISSED_ITLB_WAR		0
-
-#endif /* __ASM_MACH_LEMOTE_WAR_H */
diff --git a/arch/mips/include/asm/mach-loongson1/war.h b/arch/mips/include/asm/mach-loongson1/war.h
deleted file mode 100644
index 8fb50d008131..000000000000
--- a/arch/mips/include/asm/mach-loongson1/war.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MACH_LOONGSON1_WAR_H
-#define __ASM_MACH_LOONGSON1_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR	0
-#define R4600_V1_HIT_CACHEOP_WAR	0
-#define R4600_V2_HIT_CACHEOP_WAR	0
-#define R5432_CP0_INTERRUPT_WAR		0
-#define BCM1250_M3_WAR			0
-#define SIBYTE_1956_WAR			0
-#define MIPS4K_ICACHE_REFILL_WAR	0
-#define MIPS_CACHE_SYNC_WAR		0
-#define TX49XX_ICACHE_INDEX_INV_WAR	0
-#define ICACHE_REFILLS_WORKAROUND_WAR	0
-#define R10000_LLSC_WAR			0
-#define MIPS34K_MISSED_ITLB_WAR		0
-
-#endif /* __ASM_MACH_LOONGSON1_WAR_H */
diff --git a/arch/mips/include/asm/mach-netlogic/multi-node.h b/arch/mips/include/asm/mach-netlogic/multi-node.h
index 9ed8dacdc37c..8bdf47e29145 100644
--- a/arch/mips/include/asm/mach-netlogic/multi-node.h
+++ b/arch/mips/include/asm/mach-netlogic/multi-node.h
@@ -48,15 +48,6 @@
 #endif
 
 #define NLM_THREADS_PER_CORE	4
-#ifdef CONFIG_CPU_XLR
-#define nlm_cores_per_node()	8
-#else
-extern unsigned int xlp_cores_per_node;
-#define nlm_cores_per_node()	xlp_cores_per_node
-#endif
-
-#define nlm_threads_per_node()	(nlm_cores_per_node() * NLM_THREADS_PER_CORE)
-#define nlm_cpuid_to_node(c)	((c) / nlm_threads_per_node())
 
 struct nlm_soc_info {
 	unsigned long	coremask;	/* cores enabled on the soc */
diff --git a/arch/mips/include/asm/mach-netlogic/topology.h b/arch/mips/include/asm/mach-netlogic/topology.h
deleted file mode 100644
index 0eb43c832b25..000000000000
--- a/arch/mips/include/asm/mach-netlogic/topology.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2013 Broadcom Corporation
- */
-#ifndef _ASM_MACH_NETLOGIC_TOPOLOGY_H
-#define _ASM_MACH_NETLOGIC_TOPOLOGY_H
-
-#include <asm/mach-netlogic/multi-node.h>
-
-#include <asm-generic/topology.h>
-
-#endif /* _ASM_MACH_NETLOGIC_TOPOLOGY_H */
diff --git a/arch/mips/include/asm/mach-netlogic/war.h b/arch/mips/include/asm/mach-netlogic/war.h
deleted file mode 100644
index 2c7216840e18..000000000000
--- a/arch/mips/include/asm/mach-netlogic/war.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2011 Netlogic Microsystems.
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_NLM_WAR_H
-#define __ASM_MIPS_MACH_NLM_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR	0
-#define R4600_V1_HIT_CACHEOP_WAR	0
-#define R4600_V2_HIT_CACHEOP_WAR	0
-#define R5432_CP0_INTERRUPT_WAR		0
-#define BCM1250_M3_WAR			0
-#define SIBYTE_1956_WAR			0
-#define MIPS4K_ICACHE_REFILL_WAR	0
-#define MIPS_CACHE_SYNC_WAR		0
-#define TX49XX_ICACHE_INDEX_INV_WAR	0
-#define ICACHE_REFILLS_WORKAROUND_WAR	0
-#define R10000_LLSC_WAR			0
-#define MIPS34K_MISSED_ITLB_WAR		0
-
-#endif /* __ASM_MIPS_MACH_NLM_WAR_H */
diff --git a/arch/mips/include/asm/mach-paravirt/war.h b/arch/mips/include/asm/mach-paravirt/war.h
deleted file mode 100644
index 36d3afb98451..000000000000
--- a/arch/mips/include/asm/mach-paravirt/war.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- * Copyright (C) 2013 Cavium Networks <support@caviumnetworks.com>
- */
-#ifndef __ASM_MIPS_MACH_PARAVIRT_WAR_H
-#define __ASM_MIPS_MACH_PARAVIRT_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR	0
-#define R4600_V1_HIT_CACHEOP_WAR	0
-#define R4600_V2_HIT_CACHEOP_WAR	0
-#define R5432_CP0_INTERRUPT_WAR		0
-#define BCM1250_M3_WAR			0
-#define SIBYTE_1956_WAR			0
-#define MIPS4K_ICACHE_REFILL_WAR	0
-#define MIPS_CACHE_SYNC_WAR		0
-#define TX49XX_ICACHE_INDEX_INV_WAR	0
-#define ICACHE_REFILLS_WORKAROUND_WAR	0
-#define R10000_LLSC_WAR			0
-#define MIPS34K_MISSED_ITLB_WAR		0
-
-#endif /* __ASM_MIPS_MACH_PARAVIRT_WAR_H */
diff --git a/arch/mips/include/asm/mach-pistachio/gpio.h b/arch/mips/include/asm/mach-pistachio/gpio.h
new file mode 100644
index 000000000000..6c1649c27b8d
--- /dev/null
+++ b/arch/mips/include/asm/mach-pistachio/gpio.h
@@ -0,0 +1,21 @@
+/*
+ * Pistachio IRQ setup
+ *
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_MACH_PISTACHIO_GPIO_H
+#define __ASM_MACH_PISTACHIO_GPIO_H
+
+#include <asm-generic/gpio.h>
+
+#define gpio_get_value	__gpio_get_value
+#define gpio_set_value	__gpio_set_value
+#define gpio_cansleep	__gpio_cansleep
+#define gpio_to_irq	__gpio_to_irq
+
+#endif /* __ASM_MACH_PISTACHIO_GPIO_H */
diff --git a/arch/mips/include/asm/mach-pistachio/irq.h b/arch/mips/include/asm/mach-pistachio/irq.h
new file mode 100644
index 000000000000..b94a09a54221
--- /dev/null
+++ b/arch/mips/include/asm/mach-pistachio/irq.h
@@ -0,0 +1,18 @@
+/*
+ * Pistachio IRQ setup
+ *
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_MACH_PISTACHIO_IRQ_H
+#define __ASM_MACH_PISTACHIO_IRQ_H
+
+#define NR_IRQS 256
+
+#include_next <irq.h>
+
+#endif /* __ASM_MACH_PISTACHIO_IRQ_H */
diff --git a/arch/mips/include/asm/mach-pnx833x/war.h b/arch/mips/include/asm/mach-pnx833x/war.h
deleted file mode 100644
index e410df4e1b3a..000000000000
--- a/arch/mips/include/asm/mach-pnx833x/war.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_PNX833X_WAR_H
-#define __ASM_MIPS_MACH_PNX833X_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR	0
-#define R4600_V1_HIT_CACHEOP_WAR	0
-#define R4600_V2_HIT_CACHEOP_WAR	0
-#define R5432_CP0_INTERRUPT_WAR		0
-#define BCM1250_M3_WAR			0
-#define SIBYTE_1956_WAR			0
-#define MIPS4K_ICACHE_REFILL_WAR	0
-#define MIPS_CACHE_SYNC_WAR		0
-#define TX49XX_ICACHE_INDEX_INV_WAR	0
-#define ICACHE_REFILLS_WORKAROUND_WAR	0
-#define R10000_LLSC_WAR			0
-#define MIPS34K_MISSED_ITLB_WAR		0
-
-#endif /* __ASM_MIPS_MACH_PNX833X_WAR_H */
diff --git a/arch/mips/include/asm/mach-ralink/war.h b/arch/mips/include/asm/mach-ralink/war.h
deleted file mode 100644
index c074b5dc1f82..000000000000
--- a/arch/mips/include/asm/mach-ralink/war.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MACH_RALINK_WAR_H
-#define __ASM_MACH_RALINK_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR	0
-#define R4600_V1_HIT_CACHEOP_WAR	0
-#define R4600_V2_HIT_CACHEOP_WAR	0
-#define R5432_CP0_INTERRUPT_WAR		0
-#define BCM1250_M3_WAR			0
-#define SIBYTE_1956_WAR			0
-#define MIPS4K_ICACHE_REFILL_WAR	0
-#define MIPS_CACHE_SYNC_WAR		0
-#define TX49XX_ICACHE_INDEX_INV_WAR	0
-#define ICACHE_REFILLS_WORKAROUND_WAR	0
-#define R10000_LLSC_WAR			0
-#define MIPS34K_MISSED_ITLB_WAR		0
-
-#endif /* __ASM_MACH_RALINK_WAR_H */
diff --git a/arch/mips/include/asm/mach-rm/cpu-feature-overrides.h b/arch/mips/include/asm/mach-rm/cpu-feature-overrides.h
index f095c529c48c..98cf40417c5d 100644
--- a/arch/mips/include/asm/mach-rm/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-rm/cpu-feature-overrides.h
@@ -15,7 +15,6 @@
 #define cpu_has_tlb		1
 #define cpu_has_4kex		1
 #define cpu_has_4k_cache	1
-#define cpu_has_fpu		1
 #define cpu_has_32fpr		1
 #define cpu_has_counter		1
 #define cpu_has_watch		0
diff --git a/arch/mips/include/asm/mach-tx39xx/war.h b/arch/mips/include/asm/mach-tx39xx/war.h
deleted file mode 100644
index 6a52e6534776..000000000000
--- a/arch/mips/include/asm/mach-tx39xx/war.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_TX39XX_WAR_H
-#define __ASM_MIPS_MACH_TX39XX_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR	0
-#define R4600_V1_HIT_CACHEOP_WAR	0
-#define R4600_V2_HIT_CACHEOP_WAR	0
-#define R5432_CP0_INTERRUPT_WAR		0
-#define BCM1250_M3_WAR			0
-#define SIBYTE_1956_WAR			0
-#define MIPS4K_ICACHE_REFILL_WAR	0
-#define MIPS_CACHE_SYNC_WAR		0
-#define TX49XX_ICACHE_INDEX_INV_WAR	0
-#define ICACHE_REFILLS_WORKAROUND_WAR	0
-#define R10000_LLSC_WAR			0
-#define MIPS34K_MISSED_ITLB_WAR		0
-
-#endif /* __ASM_MIPS_MACH_TX39XX_WAR_H */
diff --git a/arch/mips/include/asm/mach-vr41xx/war.h b/arch/mips/include/asm/mach-vr41xx/war.h
deleted file mode 100644
index ffe31e736009..000000000000
--- a/arch/mips/include/asm/mach-vr41xx/war.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_VR41XX_WAR_H
-#define __ASM_MIPS_MACH_VR41XX_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR	0
-#define R4600_V1_HIT_CACHEOP_WAR	0
-#define R4600_V2_HIT_CACHEOP_WAR	0
-#define R5432_CP0_INTERRUPT_WAR		0
-#define BCM1250_M3_WAR			0
-#define SIBYTE_1956_WAR			0
-#define MIPS4K_ICACHE_REFILL_WAR	0
-#define MIPS_CACHE_SYNC_WAR		0
-#define TX49XX_ICACHE_INDEX_INV_WAR	0
-#define ICACHE_REFILLS_WORKAROUND_WAR	0
-#define R10000_LLSC_WAR			0
-#define MIPS34K_MISSED_ITLB_WAR		0
-
-#endif /* __ASM_MIPS_MACH_VR41XX_WAR_H */
diff --git a/arch/mips/include/asm/mips-boards/sead3-addr.h b/arch/mips/include/asm/mips-boards/sead3-addr.h
new file mode 100644
index 000000000000..c0db57802f7c
--- /dev/null
+++ b/arch/mips/include/asm/mips-boards/sead3-addr.h
@@ -0,0 +1,83 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2015 Imagination Technologies, Inc.
+ *   written by Ralf Baechle <ralf@linux-mips.org>
+ */
+#ifndef __ASM_MIPS_BOARDS_SEAD3_ADDR_H
+#define __ASM_MIPS_BOARDS_SEAD3_ADDR_H
+
+/*
+ * Target #0 Register Decode
+ */
+#define SEAD3_SD_SPDCNF				0xbb000040
+#define SEAD3_SD_SPADDR				0xbb000048
+#define SEAD3_SD_DATA				0xbb000050
+
+/*
+ * Target #1 Register Decode
+ */
+#define SEAD3_CFG				0xbb100110
+#define SEAD3_GIC_BASE_ADDRESS			0xbb1c0000
+#define SEAD3_SHARED_SECTION			0xbb1c0000
+#define SEAD3_VPE_LOCAL_SECTION			0xbb1c8000
+#define SEAD3_VPE_OTHER_SECTION			0xbb1cc000
+#define SEAD3_USER_MODE_VISIBLE_SECTION		0xbb1d0000
+
+/*
+ * Target #3 Register Decode
+ */
+#define SEAD3_USB_HS_BASE			0xbb200000
+#define SEAD3_USB_HS_IDENTIFICATION_REGS	0xbb200000
+#define SEAD3_USB_HS_CAPABILITY_REGS		0xbb200100
+#define SEAD3_USB_HS_OPERATIONAL_REGS		0xbb200140
+#define SEAD3_RESERVED				0xbe800000
+
+/*
+ * Target #3 Register Decode
+ */
+#define SEAD3_SRAM				0xbe000000
+#define SEAD3_OPTIONAL_SRAM			0xbe400000
+#define SEAD3_FPGA				0xbf000000
+
+#define SEAD3_PI_PIC32_USB_STATUS		0xbf000060
+#define   SEAD3_PI_PIC32_USB_STATUS_IO_RDY	(1 << 0)
+#define   SEAD3_PI_PIC32_USB_STATUS_SPL_INT	(1 << 1)
+#define   SEAD3_PI_PIC32_USB_STATUS_GPIOA_INT	(1 << 2)
+#define   SEAD3_PI_PIC32_USB_STATUS_GPIOB_INT	(1 << 3)
+
+#define SEAD3_PI_SOFT_ENDIAN			0xbf000070
+
+#define SEAD3_CPLD_P_SWITCH			0xbf000200
+#define SEAD3_CPLD_F_SWITCH			0xbf000208
+#define SEAD3_CPLD_P_LED			0xbf000210
+#define SEAD3_CPLD_F_LED			0xbf000218
+#define SEAD3_NEWSC_LIVE			0xbf000220
+#define SEAD3_NEWSC_REG				0xbf000228
+#define SEAD3_NEWSC_CTRL			0xbf000230
+
+#define SEAD3_LCD_CONTROL			0xbf000400
+#define SEAD3_LCD_DATA				0xbf000408
+#define SEAD3_CPLD_LCD_STATUS			0xbf000410
+#define SEAD3_CPLD_LCD_DATA			0xbf000418
+
+#define SEAD3_CPLD_PI_DEVRST			0xbf000480
+#define SEAD3_CPLD_PI_DEVRST_IC32_RST		(1 << 0)
+#define SEAD3_RESERVED_0			0xbf000500
+
+#define SEAD3_PIC32_REGISTERS			0xbf000600
+#define SEAD3_RESERVED_1			0xbf000700
+#define SEAD3_UART_CH_0				0xbf000800
+#define SEAD3_UART_CH_1				0xbf000900
+#define SEAD3_RESERVED_2			0xbf000a00
+#define SEAD3_ETHERNET				0xbf010000
+#define SEAD3_RESERVED_3			0xbf020000
+#define SEAD3_USER_EXPANSION			0xbf400000
+#define SEAD3_RESERVED_4			0xbf800000
+#define SEAD3_BOOT_FLASH_EXTENSION		0xbfa00000
+#define SEAD3_BOOT_FLASH			0xbfc00000
+#define SEAD3_REVISION_REGISTER			0xbfc00010
+
+#endif /* __ASM_MIPS_BOARDS_SEAD3_ADDR_H  */
diff --git a/arch/mips/include/asm/mips-r2-to-r6-emul.h b/arch/mips/include/asm/mips-r2-to-r6-emul.h
index 60570f2c3ba2..4b89f28047f7 100644
--- a/arch/mips/include/asm/mips-r2-to-r6-emul.h
+++ b/arch/mips/include/asm/mips-r2-to-r6-emul.h
@@ -84,11 +84,16 @@ extern void do_trap_or_bp(struct pt_regs *regs, unsigned int code,
 
 #ifndef CONFIG_MIPSR2_TO_R6_EMULATOR
 static int mipsr2_emulation;
-static __maybe_unused int mipsr2_decoder(struct pt_regs *regs, u32 inst) { return 0; };
+static inline int mipsr2_decoder(struct pt_regs *regs, u32 inst,
+				 unsigned long *fcr31)
+{
+	return 0;
+};
 #else
 /* MIPS R2 Emulator ON/OFF */
 extern int mipsr2_emulation;
-extern int mipsr2_decoder(struct pt_regs *regs, u32 inst);
+extern int mipsr2_decoder(struct pt_regs *regs, u32 inst,
+			  unsigned long *fcr31);
 #endif /* CONFIG_MIPSR2_TO_R6_EMULATOR */
 
 #define NO_R6EMU	(cpu_has_mips_r6 && !mipsr2_emulation)
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index fef004434096..764e2756b54d 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -111,70 +111,6 @@
  */
 #define CP0_TX39_CACHE	$7
 
-/*
- * Coprocessor 1 (FPU) register names
- */
-#define CP1_REVISION   $0
-#define CP1_STATUS     $31
-
-/*
- * FPU Status Register Values
- */
-/*
- * Status Register Values
- */
-
-#define FPU_CSR_FLUSH	0x01000000	/* flush denormalised results to 0 */
-#define FPU_CSR_COND	0x00800000	/* $fcc0 */
-#define FPU_CSR_COND0	0x00800000	/* $fcc0 */
-#define FPU_CSR_COND1	0x02000000	/* $fcc1 */
-#define FPU_CSR_COND2	0x04000000	/* $fcc2 */
-#define FPU_CSR_COND3	0x08000000	/* $fcc3 */
-#define FPU_CSR_COND4	0x10000000	/* $fcc4 */
-#define FPU_CSR_COND5	0x20000000	/* $fcc5 */
-#define FPU_CSR_COND6	0x40000000	/* $fcc6 */
-#define FPU_CSR_COND7	0x80000000	/* $fcc7 */
-
-/*
- * Bits 18 - 20 of the FPU Status Register will be read as 0,
- * and should be written as zero.
- */
-#define FPU_CSR_RSVD	0x001c0000
-
-/*
- * X the exception cause indicator
- * E the exception enable
- * S the sticky/flag bit
-*/
-#define FPU_CSR_ALL_X	0x0003f000
-#define FPU_CSR_UNI_X	0x00020000
-#define FPU_CSR_INV_X	0x00010000
-#define FPU_CSR_DIV_X	0x00008000
-#define FPU_CSR_OVF_X	0x00004000
-#define FPU_CSR_UDF_X	0x00002000
-#define FPU_CSR_INE_X	0x00001000
-
-#define FPU_CSR_ALL_E	0x00000f80
-#define FPU_CSR_INV_E	0x00000800
-#define FPU_CSR_DIV_E	0x00000400
-#define FPU_CSR_OVF_E	0x00000200
-#define FPU_CSR_UDF_E	0x00000100
-#define FPU_CSR_INE_E	0x00000080
-
-#define FPU_CSR_ALL_S	0x0000007c
-#define FPU_CSR_INV_S	0x00000040
-#define FPU_CSR_DIV_S	0x00000020
-#define FPU_CSR_OVF_S	0x00000010
-#define FPU_CSR_UDF_S	0x00000008
-#define FPU_CSR_INE_S	0x00000004
-
-/* Bits 0 and 1 of FPU Status Register specify the rounding mode */
-#define FPU_CSR_RM	0x00000003
-#define FPU_CSR_RN	0x0	/* nearest */
-#define FPU_CSR_RZ	0x1	/* towards zero */
-#define FPU_CSR_RU	0x2	/* towards +Infinity */
-#define FPU_CSR_RD	0x3	/* towards -Infinity */
-
 
 /*
  * Values for PageMask register
@@ -341,39 +277,6 @@
 #define ST0_MX			0x01000000
 
 /*
- * Bitfields in the TX39 family CP0 Configuration Register 3
- */
-#define TX39_CONF_ICS_SHIFT	19
-#define TX39_CONF_ICS_MASK	0x00380000
-#define TX39_CONF_ICS_1KB	0x00000000
-#define TX39_CONF_ICS_2KB	0x00080000
-#define TX39_CONF_ICS_4KB	0x00100000
-#define TX39_CONF_ICS_8KB	0x00180000
-#define TX39_CONF_ICS_16KB	0x00200000
-
-#define TX39_CONF_DCS_SHIFT	16
-#define TX39_CONF_DCS_MASK	0x00070000
-#define TX39_CONF_DCS_1KB	0x00000000
-#define TX39_CONF_DCS_2KB	0x00010000
-#define TX39_CONF_DCS_4KB	0x00020000
-#define TX39_CONF_DCS_8KB	0x00030000
-#define TX39_CONF_DCS_16KB	0x00040000
-
-#define TX39_CONF_CWFON		0x00004000
-#define TX39_CONF_WBON		0x00002000
-#define TX39_CONF_RF_SHIFT	10
-#define TX39_CONF_RF_MASK	0x00000c00
-#define TX39_CONF_DOZE		0x00000200
-#define TX39_CONF_HALT		0x00000100
-#define TX39_CONF_LOCK		0x00000080
-#define TX39_CONF_ICE		0x00000020
-#define TX39_CONF_DCE		0x00000010
-#define TX39_CONF_IRSIZE_SHIFT	2
-#define TX39_CONF_IRSIZE_MASK	0x0000000c
-#define TX39_CONF_DRSIZE_SHIFT	0
-#define TX39_CONF_DRSIZE_MASK	0x00000003
-
-/*
  * Status register bits available in all MIPS CPUs.
  */
 #define ST0_IM			0x0000ff00
@@ -425,9 +328,9 @@
 
 /*
  * Bitfields and bit numbers in the coprocessor 0 IntCtl register. (MIPSR2)
- *
- * Refer to your MIPS R4xx0 manual, chapter 5 for explanation.
  */
+#define INTCTLB_IPFDC		23
+#define INTCTLF_IPFDC		(_ULCAST_(7) << INTCTLB_IPFDC)
 #define INTCTLB_IPPCI		26
 #define INTCTLF_IPPCI		(_ULCAST_(7) << INTCTLB_IPPCI)
 #define INTCTLB_IPTI		29
@@ -438,10 +341,10 @@
  *
  * Refer to your MIPS R4xx0 manual, chapter 5 for explanation.
  */
-#define	 CAUSEB_EXCCODE		2
-#define	 CAUSEF_EXCCODE		(_ULCAST_(31)  <<  2)
-#define	 CAUSEB_IP		8
-#define	 CAUSEF_IP		(_ULCAST_(255) <<  8)
+#define CAUSEB_EXCCODE		2
+#define CAUSEF_EXCCODE		(_ULCAST_(31)  <<  2)
+#define CAUSEB_IP		8
+#define CAUSEF_IP		(_ULCAST_(255) <<  8)
 #define	 CAUSEB_IP0		8
 #define	 CAUSEF_IP0		(_ULCAST_(1)   <<  8)
 #define	 CAUSEB_IP1		9
@@ -458,16 +361,18 @@
 #define	 CAUSEF_IP6		(_ULCAST_(1)   << 14)
 #define	 CAUSEB_IP7		15
 #define	 CAUSEF_IP7		(_ULCAST_(1)   << 15)
-#define	 CAUSEB_IV		23
-#define	 CAUSEF_IV		(_ULCAST_(1)   << 23)
-#define	 CAUSEB_PCI		26
-#define	 CAUSEF_PCI		(_ULCAST_(1)   << 26)
-#define	 CAUSEB_CE		28
-#define	 CAUSEF_CE		(_ULCAST_(3)   << 28)
-#define	 CAUSEB_TI		30
-#define	 CAUSEF_TI		(_ULCAST_(1)   << 30)
-#define	 CAUSEB_BD		31
-#define	 CAUSEF_BD		(_ULCAST_(1)   << 31)
+#define CAUSEB_FDCI		21
+#define CAUSEF_FDCI		(_ULCAST_(1)   << 21)
+#define CAUSEB_IV		23
+#define CAUSEF_IV		(_ULCAST_(1)   << 23)
+#define CAUSEB_PCI		26
+#define CAUSEF_PCI		(_ULCAST_(1)   << 26)
+#define CAUSEB_CE		28
+#define CAUSEF_CE		(_ULCAST_(3)   << 28)
+#define CAUSEB_TI		30
+#define CAUSEF_TI		(_ULCAST_(1)   << 30)
+#define CAUSEB_BD		31
+#define CAUSEF_BD		(_ULCAST_(1)   << 31)
 
 /*
  * Bits in the coprocessor 0 config register.
@@ -689,18 +594,6 @@
 #define MIPS_CMGCRF_BASE	(~_ULCAST_((1 << MIPS_CMGCRB_BASE) - 1))
 
 /*
- * Bits in the MIPS32/64 coprocessor 1 (FPU) revision register.
- */
-#define MIPS_FPIR_S		(_ULCAST_(1) << 16)
-#define MIPS_FPIR_D		(_ULCAST_(1) << 17)
-#define MIPS_FPIR_PS		(_ULCAST_(1) << 18)
-#define MIPS_FPIR_3D		(_ULCAST_(1) << 19)
-#define MIPS_FPIR_W		(_ULCAST_(1) << 20)
-#define MIPS_FPIR_L		(_ULCAST_(1) << 21)
-#define MIPS_FPIR_F64		(_ULCAST_(1) << 22)
-#define MIPS_FPIR_FREP		(_ULCAST_(1) << 29)
-
-/*
  * Bits in the MIPS32 Memory Segmentation registers.
  */
 #define MIPS_SEGCFG_PA_SHIFT	9
@@ -751,6 +644,172 @@
 #define MIPS_PWCTL_PSN_SHIFT	0
 #define MIPS_PWCTL_PSN_MASK	0x0000003f
 
+/* CDMMBase register bit definitions */
+#define MIPS_CDMMBASE_SIZE_SHIFT 0
+#define MIPS_CDMMBASE_SIZE	(_ULCAST_(511) << MIPS_CDMMBASE_SIZE_SHIFT)
+#define MIPS_CDMMBASE_CI	(_ULCAST_(1) << 9)
+#define MIPS_CDMMBASE_EN	(_ULCAST_(1) << 10)
+#define MIPS_CDMMBASE_ADDR_SHIFT 11
+#define MIPS_CDMMBASE_ADDR_START 15
+
+/*
+ * Bitfields in the TX39 family CP0 Configuration Register 3
+ */
+#define TX39_CONF_ICS_SHIFT	19
+#define TX39_CONF_ICS_MASK	0x00380000
+#define TX39_CONF_ICS_1KB	0x00000000
+#define TX39_CONF_ICS_2KB	0x00080000
+#define TX39_CONF_ICS_4KB	0x00100000
+#define TX39_CONF_ICS_8KB	0x00180000
+#define TX39_CONF_ICS_16KB	0x00200000
+
+#define TX39_CONF_DCS_SHIFT	16
+#define TX39_CONF_DCS_MASK	0x00070000
+#define TX39_CONF_DCS_1KB	0x00000000
+#define TX39_CONF_DCS_2KB	0x00010000
+#define TX39_CONF_DCS_4KB	0x00020000
+#define TX39_CONF_DCS_8KB	0x00030000
+#define TX39_CONF_DCS_16KB	0x00040000
+
+#define TX39_CONF_CWFON		0x00004000
+#define TX39_CONF_WBON		0x00002000
+#define TX39_CONF_RF_SHIFT	10
+#define TX39_CONF_RF_MASK	0x00000c00
+#define TX39_CONF_DOZE		0x00000200
+#define TX39_CONF_HALT		0x00000100
+#define TX39_CONF_LOCK		0x00000080
+#define TX39_CONF_ICE		0x00000020
+#define TX39_CONF_DCE		0x00000010
+#define TX39_CONF_IRSIZE_SHIFT	2
+#define TX39_CONF_IRSIZE_MASK	0x0000000c
+#define TX39_CONF_DRSIZE_SHIFT	0
+#define TX39_CONF_DRSIZE_MASK	0x00000003
+
+
+/*
+ * Coprocessor 1 (FPU) register names
+ */
+#define CP1_REVISION	$0
+#define CP1_UFR		$1
+#define CP1_UNFR	$4
+#define CP1_FCCR	$25
+#define CP1_FEXR	$26
+#define CP1_FENR	$28
+#define CP1_STATUS	$31
+
+
+/*
+ * Bits in the MIPS32/64 coprocessor 1 (FPU) revision register.
+ */
+#define MIPS_FPIR_S		(_ULCAST_(1) << 16)
+#define MIPS_FPIR_D		(_ULCAST_(1) << 17)
+#define MIPS_FPIR_PS		(_ULCAST_(1) << 18)
+#define MIPS_FPIR_3D		(_ULCAST_(1) << 19)
+#define MIPS_FPIR_W		(_ULCAST_(1) << 20)
+#define MIPS_FPIR_L		(_ULCAST_(1) << 21)
+#define MIPS_FPIR_F64		(_ULCAST_(1) << 22)
+#define MIPS_FPIR_HAS2008	(_ULCAST_(1) << 23)
+#define MIPS_FPIR_UFRP		(_ULCAST_(1) << 28)
+#define MIPS_FPIR_FREP		(_ULCAST_(1) << 29)
+
+/*
+ * Bits in the MIPS32/64 coprocessor 1 (FPU) condition codes register.
+ */
+#define MIPS_FCCR_CONDX_S	0
+#define MIPS_FCCR_CONDX		(_ULCAST_(255) << MIPS_FCCR_CONDX_S)
+#define MIPS_FCCR_COND0_S	0
+#define MIPS_FCCR_COND0		(_ULCAST_(1) << MIPS_FCCR_COND0_S)
+#define MIPS_FCCR_COND1_S	1
+#define MIPS_FCCR_COND1		(_ULCAST_(1) << MIPS_FCCR_COND1_S)
+#define MIPS_FCCR_COND2_S	2
+#define MIPS_FCCR_COND2		(_ULCAST_(1) << MIPS_FCCR_COND2_S)
+#define MIPS_FCCR_COND3_S	3
+#define MIPS_FCCR_COND3		(_ULCAST_(1) << MIPS_FCCR_COND3_S)
+#define MIPS_FCCR_COND4_S	4
+#define MIPS_FCCR_COND4		(_ULCAST_(1) << MIPS_FCCR_COND4_S)
+#define MIPS_FCCR_COND5_S	5
+#define MIPS_FCCR_COND5		(_ULCAST_(1) << MIPS_FCCR_COND5_S)
+#define MIPS_FCCR_COND6_S	6
+#define MIPS_FCCR_COND6		(_ULCAST_(1) << MIPS_FCCR_COND6_S)
+#define MIPS_FCCR_COND7_S	7
+#define MIPS_FCCR_COND7		(_ULCAST_(1) << MIPS_FCCR_COND7_S)
+
+/*
+ * Bits in the MIPS32/64 coprocessor 1 (FPU) enables register.
+ */
+#define MIPS_FENR_FS_S		2
+#define MIPS_FENR_FS		(_ULCAST_(1) << MIPS_FENR_FS_S)
+
+/*
+ * FPU Status Register Values
+ */
+#define FPU_CSR_COND_S	23					/* $fcc0 */
+#define FPU_CSR_COND	(_ULCAST_(1) << FPU_CSR_COND_S)
+
+#define FPU_CSR_FS_S	24		/* flush denormalised results to 0 */
+#define FPU_CSR_FS	(_ULCAST_(1) << FPU_CSR_FS_S)
+
+#define FPU_CSR_CONDX_S	25					/* $fcc[7:1] */
+#define FPU_CSR_CONDX	(_ULCAST_(127) << FPU_CSR_CONDX_S)
+#define FPU_CSR_COND1_S	25					/* $fcc1 */
+#define FPU_CSR_COND1	(_ULCAST_(1) << FPU_CSR_COND1_S)
+#define FPU_CSR_COND2_S	26					/* $fcc2 */
+#define FPU_CSR_COND2	(_ULCAST_(1) << FPU_CSR_COND2_S)
+#define FPU_CSR_COND3_S	27					/* $fcc3 */
+#define FPU_CSR_COND3	(_ULCAST_(1) << FPU_CSR_COND3_S)
+#define FPU_CSR_COND4_S	28					/* $fcc4 */
+#define FPU_CSR_COND4	(_ULCAST_(1) << FPU_CSR_COND4_S)
+#define FPU_CSR_COND5_S	29					/* $fcc5 */
+#define FPU_CSR_COND5	(_ULCAST_(1) << FPU_CSR_COND5_S)
+#define FPU_CSR_COND6_S	30					/* $fcc6 */
+#define FPU_CSR_COND6	(_ULCAST_(1) << FPU_CSR_COND6_S)
+#define FPU_CSR_COND7_S	31					/* $fcc7 */
+#define FPU_CSR_COND7	(_ULCAST_(1) << FPU_CSR_COND7_S)
+
+/*
+ * Bits 22:20 of the FPU Status Register will be read as 0,
+ * and should be written as zero.
+ */
+#define FPU_CSR_RSVD	(_ULCAST_(7) << 20)
+
+#define FPU_CSR_ABS2008	(_ULCAST_(1) << 19)
+#define FPU_CSR_NAN2008	(_ULCAST_(1) << 18)
+
+/*
+ * X the exception cause indicator
+ * E the exception enable
+ * S the sticky/flag bit
+*/
+#define FPU_CSR_ALL_X	0x0003f000
+#define FPU_CSR_UNI_X	0x00020000
+#define FPU_CSR_INV_X	0x00010000
+#define FPU_CSR_DIV_X	0x00008000
+#define FPU_CSR_OVF_X	0x00004000
+#define FPU_CSR_UDF_X	0x00002000
+#define FPU_CSR_INE_X	0x00001000
+
+#define FPU_CSR_ALL_E	0x00000f80
+#define FPU_CSR_INV_E	0x00000800
+#define FPU_CSR_DIV_E	0x00000400
+#define FPU_CSR_OVF_E	0x00000200
+#define FPU_CSR_UDF_E	0x00000100
+#define FPU_CSR_INE_E	0x00000080
+
+#define FPU_CSR_ALL_S	0x0000007c
+#define FPU_CSR_INV_S	0x00000040
+#define FPU_CSR_DIV_S	0x00000020
+#define FPU_CSR_OVF_S	0x00000010
+#define FPU_CSR_UDF_S	0x00000008
+#define FPU_CSR_INE_S	0x00000004
+
+/* Bits 0 and 1 of FPU Status Register specify the rounding mode */
+#define FPU_CSR_RM	0x00000003
+#define FPU_CSR_RN	0x0	/* nearest */
+#define FPU_CSR_RZ	0x1	/* towards zero */
+#define FPU_CSR_RU	0x2	/* towards +Infinity */
+#define FPU_CSR_RD	0x3	/* towards -Infinity */
+
+
 #ifndef __ASSEMBLY__
 
 /*
@@ -1282,6 +1341,9 @@ do {									\
 #define read_c0_ebase()		__read_32bit_c0_register($15, 1)
 #define write_c0_ebase(val)	__write_32bit_c0_register($15, 1, val)
 
+#define read_c0_cdmmbase()	__read_ulong_c0_register($15, 2)
+#define write_c0_cdmmbase(val)	__write_ulong_c0_register($15, 2, val)
+
 /* MIPSR3 */
 #define read_c0_segctl0()	__read_32bit_c0_register($5, 2)
 #define write_c0_segctl0(val)	__write_32bit_c0_register($5, 2, val)
diff --git a/arch/mips/include/asm/netlogic/common.h b/arch/mips/include/asm/netlogic/common.h
index c281f03eb312..2a4c128277e4 100644
--- a/arch/mips/include/asm/netlogic/common.h
+++ b/arch/mips/include/asm/netlogic/common.h
@@ -111,6 +111,25 @@ static inline int nlm_irq_to_xirq(int node, int irq)
 	return node * NR_IRQS / NLM_NR_NODES + irq;
 }
 
-extern int nlm_cpu_ready[];
+#ifdef CONFIG_CPU_XLR
+#define nlm_cores_per_node()	8
+#else
+static inline int nlm_cores_per_node(void)
+{
+	return ((read_c0_prid() & PRID_IMP_MASK)
+				== PRID_IMP_NETLOGIC_XLP9XX) ? 32 : 8;
+}
 #endif
+static inline int nlm_threads_per_node(void)
+{
+	return nlm_cores_per_node() * NLM_THREADS_PER_CORE;
+}
+
+static inline int nlm_hwtid_to_node(int hwtid)
+{
+	return hwtid / nlm_threads_per_node();
+}
+
+extern int nlm_cpu_ready[];
+#endif /* __ASSEMBLY__ */
 #endif /* _NETLOGIC_COMMON_H_ */
diff --git a/arch/mips/include/asm/netlogic/mips-extns.h b/arch/mips/include/asm/netlogic/mips-extns.h
index 06f1f75bfa9b..788baf399e69 100644
--- a/arch/mips/include/asm/netlogic/mips-extns.h
+++ b/arch/mips/include/asm/netlogic/mips-extns.h
@@ -157,7 +157,13 @@ static inline int nlm_nodeid(void)
 
 static inline unsigned int nlm_core_id(void)
 {
-	return (read_c0_ebase() & 0x1c) >> 2;
+	uint32_t prid = read_c0_prid() & PRID_IMP_MASK;
+
+	if ((prid == PRID_IMP_NETLOGIC_XLP9XX) ||
+			(prid == PRID_IMP_NETLOGIC_XLP5XX))
+		return (read_c0_ebase() & 0x7c) >> 2;
+	else
+		return (read_c0_ebase() & 0x1c) >> 2;
 }
 
 static inline unsigned int nlm_thread_id(void)
diff --git a/arch/mips/include/asm/netlogic/xlp-hal/cpucontrol.h b/arch/mips/include/asm/netlogic/xlp-hal/cpucontrol.h
index 6d2e58a9a542..a06b59292153 100644
--- a/arch/mips/include/asm/netlogic/xlp-hal/cpucontrol.h
+++ b/arch/mips/include/asm/netlogic/xlp-hal/cpucontrol.h
@@ -46,6 +46,8 @@
 #define CPU_BLOCKID_FPU		9
 #define CPU_BLOCKID_MAP		10
 
+#define IFU_BRUB_RESERVE	0x007
+
 #define ICU_DEFEATURE		0x100
 
 #define LSU_DEFEATURE		0x304
diff --git a/arch/mips/include/asm/netlogic/xlp-hal/sys.h b/arch/mips/include/asm/netlogic/xlp-hal/sys.h
index bc7bddf25be9..6bcf3952e556 100644
--- a/arch/mips/include/asm/netlogic/xlp-hal/sys.h
+++ b/arch/mips/include/asm/netlogic/xlp-hal/sys.h
@@ -177,6 +177,9 @@
 #define SYS_9XX_CLK_DEV_DIV			0x18d
 #define SYS_9XX_CLK_DEV_CHG			0x18f
 
+#define SYS_9XX_CLK_DEV_SEL_REG			0x1a4
+#define SYS_9XX_CLK_DEV_DIV_REG			0x1a6
+
 /* Registers changed on 9XX */
 #define SYS_9XX_POWER_ON_RESET_CFG		0x00
 #define SYS_9XX_CHIP_RESET			0x01
diff --git a/arch/mips/include/asm/netlogic/xlp-hal/xlp.h b/arch/mips/include/asm/netlogic/xlp-hal/xlp.h
index a862b93223cc..feb6ed807ec6 100644
--- a/arch/mips/include/asm/netlogic/xlp-hal/xlp.h
+++ b/arch/mips/include/asm/netlogic/xlp-hal/xlp.h
@@ -52,6 +52,7 @@
 #define PIC_2XX_XHCI_2_IRQ		25
 #define PIC_9XX_XHCI_0_IRQ		23
 #define PIC_9XX_XHCI_1_IRQ		24
+#define PIC_9XX_XHCI_2_IRQ		25
 
 #define PIC_MMC_IRQ			29
 #define PIC_I2C_0_IRQ			30
@@ -89,7 +90,7 @@ void xlp_wakeup_secondary_cpus(void);
 
 void xlp_mmu_init(void);
 void nlm_hal_init(void);
-int xlp_get_dram_map(int n, uint64_t *dram_map);
+int nlm_get_dram_map(int node, uint64_t *dram_map, int nentries);
 
 struct pci_dev;
 int xlp_socdev_to_node(const struct pci_dev *dev);
diff --git a/arch/mips/include/asm/octeon/cvmx-address.h b/arch/mips/include/asm/octeon/cvmx-address.h
index e2d874e681f6..e4444f8c4a61 100644
--- a/arch/mips/include/asm/octeon/cvmx-address.h
+++ b/arch/mips/include/asm/octeon/cvmx-address.h
@@ -104,6 +104,7 @@ typedef enum {
 typedef union {
 
 	uint64_t u64;
+#ifdef __BIG_ENDIAN_BITFIELD
 	/* mapped or unmapped virtual address */
 	struct {
 		uint64_t R:2;
@@ -202,6 +203,72 @@ typedef union {
 		uint64_t didspace:24;
 		uint64_t unused:40;
 	} sfilldidspace;
+#else
+	struct {
+		uint64_t offset:62;
+		uint64_t R:2;
+	} sva;
+
+	struct {
+		uint64_t offset:31;
+		uint64_t zeroes:33;
+	} suseg;
+
+	struct {
+		uint64_t offset:29;
+		uint64_t sp:2;
+		uint64_t ones:33;
+	} sxkseg;
+
+	struct {
+		uint64_t pa:49;
+		uint64_t mbz:10;
+		uint64_t cca:3;
+		uint64_t R:2;
+	} sxkphys;
+
+	struct {
+		uint64_t offset:36;
+		uint64_t unaddr:4;
+		uint64_t did:8;
+		uint64_t is_io:1;
+		uint64_t mbz:15;
+	} sphys;
+
+	struct {
+		uint64_t offset:36;
+		uint64_t unaddr:4;
+		uint64_t zeroes:24;
+	} smem;
+
+	struct {
+		uint64_t offset:36;
+		uint64_t unaddr:4;
+		uint64_t did:8;
+		uint64_t is_io:1;
+		uint64_t mbz:13;
+		uint64_t mem_region:2;
+	} sio;
+
+	struct {
+		uint64_t addr:13;
+		cvmx_add_win_dec_t csrdec:2;
+		uint64_t ones:49;
+	} sscr;
+
+	struct {
+		uint64_t addr:7;
+		uint64_t type:3;
+		uint64_t unused2:3;
+		uint64_t csrdec:2;
+		uint64_t ones:49;
+	} sdma;
+
+	struct {
+		uint64_t unused:40;
+		uint64_t didspace:24;
+	} sfilldidspace;
+#endif
 
 } cvmx_addr_t;
 
diff --git a/arch/mips/include/asm/octeon/cvmx-bootinfo.h b/arch/mips/include/asm/octeon/cvmx-bootinfo.h
index 2298199a287e..c373d95b5e2c 100644
--- a/arch/mips/include/asm/octeon/cvmx-bootinfo.h
+++ b/arch/mips/include/asm/octeon/cvmx-bootinfo.h
@@ -53,6 +53,7 @@
  * to 0.
  */
 struct cvmx_bootinfo {
+#ifdef __BIG_ENDIAN_BITFIELD
 	uint32_t major_version;
 	uint32_t minor_version;
 
@@ -123,6 +124,60 @@ struct cvmx_bootinfo {
 	 */
 	uint64_t fdt_addr;
 #endif
+#else				/* __BIG_ENDIAN */
+	/*
+	 * Little-Endian: When the CPU mode is switched to
+	 * little-endian, the view of the structure has some of the
+	 * fields swapped.
+	 */
+	uint32_t minor_version;
+	uint32_t major_version;
+
+	uint64_t stack_top;
+	uint64_t heap_base;
+	uint64_t heap_end;
+	uint64_t desc_vaddr;
+
+	uint32_t stack_size;
+	uint32_t exception_base_addr;
+
+	uint32_t core_mask;
+	uint32_t flags;
+
+	uint32_t phy_mem_desc_addr;
+	uint32_t dram_size;
+
+	uint32_t eclock_hz;
+	uint32_t debugger_flags_base_addr;
+
+	uint32_t reserved0;
+	uint32_t dclock_hz;
+
+	uint8_t reserved3;
+	uint8_t reserved2;
+	uint16_t reserved1;
+	uint8_t board_rev_minor;
+	uint8_t board_rev_major;
+	uint16_t board_type;
+
+	char board_serial_number[CVMX_BOOTINFO_OCTEON_SERIAL_LEN];
+	uint8_t mac_addr_base[6];
+	uint8_t mac_addr_count;
+	uint8_t pad[5];
+
+#if (CVMX_BOOTINFO_MIN_VER >= 1)
+	uint64_t compact_flash_common_base_addr;
+	uint64_t compact_flash_attribute_base_addr;
+	uint64_t led_display_base_addr;
+#endif
+#if (CVMX_BOOTINFO_MIN_VER >= 2)
+	uint32_t config_flags;
+	uint32_t dfa_ref_clock_hz;
+#endif
+#if (CVMX_BOOTINFO_MIN_VER >= 3)
+	uint64_t fdt_addr;
+#endif
+#endif
 };
 
 #define CVMX_BOOTINFO_CFG_FLAG_PCI_HOST			(1ull << 0)
diff --git a/arch/mips/include/asm/octeon/cvmx-bootmem.h b/arch/mips/include/asm/octeon/cvmx-bootmem.h
index 352f1dc2508b..374562507d0b 100644
--- a/arch/mips/include/asm/octeon/cvmx-bootmem.h
+++ b/arch/mips/include/asm/octeon/cvmx-bootmem.h
@@ -95,6 +95,7 @@ struct cvmx_bootmem_named_block_desc {
  * positions for backwards compatibility.
  */
 struct cvmx_bootmem_desc {
+#if defined(__BIG_ENDIAN_BITFIELD) || defined(CVMX_BUILD_FOR_LINUX_HOST)
 	/* spinlock to control access to list */
 	uint32_t lock;
 	/* flags for indicating various conditions */
@@ -120,7 +121,20 @@ struct cvmx_bootmem_desc {
 	uint32_t named_block_name_len;
 	/* address of named memory block descriptors */
 	uint64_t named_block_array_addr;
+#else                           /* __LITTLE_ENDIAN */
+	uint32_t flags;
+	uint32_t lock;
+	uint64_t head_addr;
 
+	uint32_t minor_version;
+	uint32_t major_version;
+	uint64_t app_data_addr;
+	uint64_t app_data_size;
+
+	uint32_t named_block_name_len;
+	uint32_t named_block_num_blocks;
+	uint64_t named_block_array_addr;
+#endif
 };
 
 /**
diff --git a/arch/mips/include/asm/octeon/cvmx-fau.h b/arch/mips/include/asm/octeon/cvmx-fau.h
index ef98f7fc102f..dafeae300c97 100644
--- a/arch/mips/include/asm/octeon/cvmx-fau.h
+++ b/arch/mips/include/asm/octeon/cvmx-fau.h
@@ -105,6 +105,16 @@ typedef union {
 	} s;
 } cvmx_fau_async_tagwait_result_t;
 
+#ifdef __BIG_ENDIAN_BITFIELD
+#define SWIZZLE_8  0
+#define SWIZZLE_16 0
+#define SWIZZLE_32 0
+#else
+#define SWIZZLE_8  0x7
+#define SWIZZLE_16 0x6
+#define SWIZZLE_32 0x4
+#endif
+
 /**
  * Builds a store I/O address for writing to the FAU
  *
@@ -175,6 +185,7 @@ static inline int64_t cvmx_fau_fetch_and_add64(cvmx_fau_reg_64_t reg,
 static inline int32_t cvmx_fau_fetch_and_add32(cvmx_fau_reg_32_t reg,
 					       int32_t value)
 {
+	reg ^= SWIZZLE_32;
 	return cvmx_read64_int32(__cvmx_fau_atomic_address(0, reg, value));
 }
 
@@ -189,6 +200,7 @@ static inline int32_t cvmx_fau_fetch_and_add32(cvmx_fau_reg_32_t reg,
 static inline int16_t cvmx_fau_fetch_and_add16(cvmx_fau_reg_16_t reg,
 					       int16_t value)
 {
+	reg ^= SWIZZLE_16;
 	return cvmx_read64_int16(__cvmx_fau_atomic_address(0, reg, value));
 }
 
@@ -201,6 +213,7 @@ static inline int16_t cvmx_fau_fetch_and_add16(cvmx_fau_reg_16_t reg,
  */
 static inline int8_t cvmx_fau_fetch_and_add8(cvmx_fau_reg_8_t reg, int8_t value)
 {
+	reg ^= SWIZZLE_8;
 	return cvmx_read64_int8(__cvmx_fau_atomic_address(0, reg, value));
 }
 
@@ -247,6 +260,7 @@ cvmx_fau_tagwait_fetch_and_add32(cvmx_fau_reg_32_t reg, int32_t value)
 		uint64_t i32;
 		cvmx_fau_tagwait32_t t;
 	} result;
+	reg ^= SWIZZLE_32;
 	result.i32 =
 	    cvmx_read64_int32(__cvmx_fau_atomic_address(1, reg, value));
 	return result.t;
@@ -270,6 +284,7 @@ cvmx_fau_tagwait_fetch_and_add16(cvmx_fau_reg_16_t reg, int16_t value)
 		uint64_t i16;
 		cvmx_fau_tagwait16_t t;
 	} result;
+	reg ^= SWIZZLE_16;
 	result.i16 =
 	    cvmx_read64_int16(__cvmx_fau_atomic_address(1, reg, value));
 	return result.t;
@@ -292,6 +307,7 @@ cvmx_fau_tagwait_fetch_and_add8(cvmx_fau_reg_8_t reg, int8_t value)
 		uint64_t i8;
 		cvmx_fau_tagwait8_t t;
 	} result;
+	reg ^= SWIZZLE_8;
 	result.i8 = cvmx_read64_int8(__cvmx_fau_atomic_address(1, reg, value));
 	return result.t;
 }
@@ -521,6 +537,7 @@ static inline void cvmx_fau_atomic_add64(cvmx_fau_reg_64_t reg, int64_t value)
  */
 static inline void cvmx_fau_atomic_add32(cvmx_fau_reg_32_t reg, int32_t value)
 {
+	reg ^= SWIZZLE_32;
 	cvmx_write64_int32(__cvmx_fau_store_address(0, reg), value);
 }
 
@@ -533,6 +550,7 @@ static inline void cvmx_fau_atomic_add32(cvmx_fau_reg_32_t reg, int32_t value)
  */
 static inline void cvmx_fau_atomic_add16(cvmx_fau_reg_16_t reg, int16_t value)
 {
+	reg ^= SWIZZLE_16;
 	cvmx_write64_int16(__cvmx_fau_store_address(0, reg), value);
 }
 
@@ -544,6 +562,7 @@ static inline void cvmx_fau_atomic_add16(cvmx_fau_reg_16_t reg, int16_t value)
  */
 static inline void cvmx_fau_atomic_add8(cvmx_fau_reg_8_t reg, int8_t value)
 {
+	reg ^= SWIZZLE_8;
 	cvmx_write64_int8(__cvmx_fau_store_address(0, reg), value);
 }
 
@@ -568,6 +587,7 @@ static inline void cvmx_fau_atomic_write64(cvmx_fau_reg_64_t reg, int64_t value)
  */
 static inline void cvmx_fau_atomic_write32(cvmx_fau_reg_32_t reg, int32_t value)
 {
+	reg ^= SWIZZLE_32;
 	cvmx_write64_int32(__cvmx_fau_store_address(1, reg), value);
 }
 
@@ -580,6 +600,7 @@ static inline void cvmx_fau_atomic_write32(cvmx_fau_reg_32_t reg, int32_t value)
  */
 static inline void cvmx_fau_atomic_write16(cvmx_fau_reg_16_t reg, int16_t value)
 {
+	reg ^= SWIZZLE_16;
 	cvmx_write64_int16(__cvmx_fau_store_address(1, reg), value);
 }
 
@@ -591,6 +612,7 @@ static inline void cvmx_fau_atomic_write16(cvmx_fau_reg_16_t reg, int16_t value)
  */
 static inline void cvmx_fau_atomic_write8(cvmx_fau_reg_8_t reg, int8_t value)
 {
+	reg ^= SWIZZLE_8;
 	cvmx_write64_int8(__cvmx_fau_store_address(1, reg), value);
 }
 
diff --git a/arch/mips/include/asm/octeon/cvmx-fpa.h b/arch/mips/include/asm/octeon/cvmx-fpa.h
index aa26a2ce5a0e..c00501d0f7ae 100644
--- a/arch/mips/include/asm/octeon/cvmx-fpa.h
+++ b/arch/mips/include/asm/octeon/cvmx-fpa.h
@@ -49,6 +49,7 @@
 typedef union {
 	uint64_t u64;
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		/*
 		 * the (64-bit word) location in scratchpad to write
 		 * to (if len != 0)
@@ -63,6 +64,12 @@ typedef union {
 		 * the NCB bus.
 		 */
 		uint64_t addr:40;
+#else
+		uint64_t addr:40;
+		uint64_t did:8;
+		uint64_t len:8;
+		uint64_t scraddr:8;
+#endif
 	} s;
 } cvmx_fpa_iobdma_data_t;
 
diff --git a/arch/mips/include/asm/octeon/cvmx-l2c.h b/arch/mips/include/asm/octeon/cvmx-l2c.h
index 11c0a8fa8eb5..ddb429210a0e 100644
--- a/arch/mips/include/asm/octeon/cvmx-l2c.h
+++ b/arch/mips/include/asm/octeon/cvmx-l2c.h
@@ -53,12 +53,21 @@
 union cvmx_l2c_tag {
 	uint64_t u64;
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		uint64_t reserved:28;
 		uint64_t V:1;		/* Line valid */
 		uint64_t D:1;		/* Line dirty */
 		uint64_t L:1;		/* Line locked */
 		uint64_t U:1;		/* Use, LRU eviction */
 		uint64_t addr:32;	/* Phys mem (not all bits valid) */
+#else
+		uint64_t addr:32;	/* Phys mem (not all bits valid) */
+		uint64_t U:1;		/* Use, LRU eviction */
+		uint64_t L:1;		/* Line locked */
+		uint64_t D:1;		/* Line dirty */
+		uint64_t V:1;		/* Line valid */
+		uint64_t reserved:28;
+#endif
 	} s;
 };
 
diff --git a/arch/mips/include/asm/octeon/cvmx-packet.h b/arch/mips/include/asm/octeon/cvmx-packet.h
index 38aefa1bab9d..895e93d682c2 100644
--- a/arch/mips/include/asm/octeon/cvmx-packet.h
+++ b/arch/mips/include/asm/octeon/cvmx-packet.h
@@ -39,6 +39,7 @@ union cvmx_buf_ptr {
 	void *ptr;
 	uint64_t u64;
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		/* if set, invert the "free" pick of the overall
 		 * packet. HW always sets this bit to 0 on inbound
 		 * packet */
@@ -55,6 +56,13 @@ union cvmx_buf_ptr {
 		uint64_t size:16;
 		/* Pointer to the first byte of the data, NOT buffer */
 		uint64_t addr:40;
+#else
+	        uint64_t addr:40;
+	        uint64_t size:16;
+	        uint64_t pool:3;
+	        uint64_t back:4;
+	        uint64_t i:1;
+#endif
 	} s;
 };
 
diff --git a/arch/mips/include/asm/octeon/cvmx-pko.h b/arch/mips/include/asm/octeon/cvmx-pko.h
index f7d2a6718849..3da59bb8ce24 100644
--- a/arch/mips/include/asm/octeon/cvmx-pko.h
+++ b/arch/mips/include/asm/octeon/cvmx-pko.h
@@ -127,6 +127,7 @@ typedef struct {
 typedef union {
 	uint64_t u64;
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		/* Must CVMX_IO_SEG */
 		uint64_t mem_space:2;
 		/* Must be zero */
@@ -151,6 +152,17 @@ typedef union {
 		uint64_t queue:9;
 		/* Must be zero */
 		uint64_t reserved4:3;
+#else
+	        uint64_t reserved4:3;
+	        uint64_t queue:9;
+	        uint64_t port:9;
+	        uint64_t reserved3:15;
+	        uint64_t reserved2:4;
+	        uint64_t did:8;
+	        uint64_t is_io:1;
+	        uint64_t reserved:13;
+	        uint64_t mem_space:2;
+#endif
 	} s;
 } cvmx_pko_doorbell_address_t;
 
@@ -160,6 +172,7 @@ typedef union {
 typedef union {
 	uint64_t u64;
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		/*
 		 * The size of the reg1 operation - could be 8, 16,
 		 * 32, or 64 bits.
@@ -229,6 +242,24 @@ typedef union {
 		uint64_t segs:6;
 		/* Including L2, but no trailing CRC */
 		uint64_t total_bytes:16;
+#else
+	        uint64_t total_bytes:16;
+	        uint64_t segs:6;
+	        uint64_t dontfree:1;
+	        uint64_t ignore_i:1;
+	        uint64_t ipoffp1:7;
+	        uint64_t gather:1;
+	        uint64_t rsp:1;
+	        uint64_t wqp:1;
+	        uint64_t n2:1;
+	        uint64_t le:1;
+	        uint64_t reg0:11;
+	        uint64_t subone0:1;
+	        uint64_t reg1:11;
+	        uint64_t subone1:1;
+	        uint64_t size0:2;
+	        uint64_t size1:2;
+#endif
 	} s;
 } cvmx_pko_command_word0_t;
 
diff --git a/arch/mips/include/asm/octeon/cvmx-pow.h b/arch/mips/include/asm/octeon/cvmx-pow.h
index 2188e65afb86..d5565d758ddd 100644
--- a/arch/mips/include/asm/octeon/cvmx-pow.h
+++ b/arch/mips/include/asm/octeon/cvmx-pow.h
@@ -178,6 +178,7 @@ typedef enum {
 typedef union {
 	uint64_t u64;
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		/*
 		 * Don't reschedule this entry. no_sched is used for
 		 * CVMX_POW_TAG_OP_SWTAG_DESCH and
@@ -217,6 +218,17 @@ typedef union {
 		 * CVMX_POW_TAG_OP_*_NSCHED
 		 */
 		uint64_t tag:32;
+#else
+		uint64_t tag:32;
+		uint64_t type:3;
+		uint64_t grp:4;
+		uint64_t qos:3;
+		uint64_t unused2:2;
+		cvmx_pow_tag_op_t op:4;
+		uint64_t index:13;
+		uint64_t unused:2;
+		uint64_t no_sched:1;
+#endif
 	} s;
 } cvmx_pow_tag_req_t;
 
@@ -230,6 +242,7 @@ typedef union {
      * Address for new work request loads (did<2:0> == 0)
      */
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		/* Mips64 address region. Should be CVMX_IO_SEG */
 		uint64_t mem_region:2;
 		/* Must be zero */
@@ -247,12 +260,22 @@ typedef union {
 		uint64_t wait:1;
 		/* Must be zero */
 		uint64_t reserved_0_2:3;
+#else
+		uint64_t reserved_0_2:3;
+		uint64_t wait:1;
+		uint64_t reserved_4_39:36;
+		uint64_t did:8;
+		uint64_t is_io:1;
+		uint64_t reserved_49_61:13;
+		uint64_t mem_region:2;
+#endif
 	} swork;
 
     /**
      * Address for loads to get POW internal status
      */
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		/* Mips64 address region. Should be CVMX_IO_SEG */
 		uint64_t mem_region:2;
 		/* Must be zero */
@@ -282,12 +305,25 @@ typedef union {
 		uint64_t get_wqp:1;
 		/* Must be zero */
 		uint64_t reserved_0_2:3;
+#else
+		uint64_t reserved_0_2:3;
+		uint64_t get_wqp:1;
+		uint64_t get_cur:1;
+		uint64_t get_rev:1;
+		uint64_t coreid:4;
+		uint64_t reserved_10_39:30;
+		uint64_t did:8;
+		uint64_t is_io:1;
+		uint64_t reserved_49_61:13;
+		uint64_t mem_region:2;
+#endif
 	} sstatus;
 
     /**
      * Address for memory loads to get POW internal state
      */
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		/* Mips64 address region. Should be CVMX_IO_SEG */
 		uint64_t mem_region:2;
 		/* Must be zero */
@@ -314,12 +350,24 @@ typedef union {
 		uint64_t get_wqp:1;
 		/* Must be zero */
 		uint64_t reserved_0_2:3;
+#else
+		uint64_t reserved_0_2:3;
+		uint64_t get_wqp:1;
+		uint64_t get_des:1;
+		uint64_t index:11;
+		uint64_t reserved_16_39:24;
+		uint64_t did:8;
+		uint64_t is_io:1;
+		uint64_t reserved_49_61:13;
+		uint64_t mem_region:2;
+#endif
 	} smemload;
 
     /**
      * Address for index/pointer loads
      */
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		/* Mips64 address region. Should be CVMX_IO_SEG */
 		uint64_t mem_region:2;
 		/* Must be zero */
@@ -366,6 +414,17 @@ typedef union {
 		uint64_t get_rmt:1;
 		/* Must be zero */
 		uint64_t reserved_0_2:3;
+#else
+		uint64_t reserved_0_2:3;
+		uint64_t get_rmt:1;
+		uint64_t get_des_get_tail:1;
+		uint64_t qosgrp:4;
+		uint64_t reserved_9_39:31;
+		uint64_t did:8;
+		uint64_t is_io:1;
+		uint64_t reserved_49_61:13;
+		uint64_t mem_region:2;
+#endif
 	} sindexload;
 
     /**
@@ -377,6 +436,7 @@ typedef union {
      * available.)
      */
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		/* Mips64 address region. Should be CVMX_IO_SEG */
 		uint64_t mem_region:2;
 		/* Must be zero */
@@ -387,6 +447,13 @@ typedef union {
 		uint64_t did:8;
 		/* Must be zero */
 		uint64_t reserved_0_39:40;
+#else
+		uint64_t reserved_0_39:40;
+		uint64_t did:8;
+		uint64_t is_io:1;
+		uint64_t reserved_49_61:13;
+		uint64_t mem_region:2;
+#endif
 	} snull_rd;
 } cvmx_pow_load_addr_t;
 
@@ -401,6 +468,7 @@ typedef union {
      * Response to new work request loads
      */
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		/*
 		 * Set when no new work queue entry was returned.  *
 		 * If there was de-scheduled work, the HW will
@@ -419,12 +487,18 @@ typedef union {
 		uint64_t reserved_40_62:23;
 		/* 36 in O1 -- the work queue pointer */
 		uint64_t addr:40;
+#else
+		uint64_t addr:40;
+		uint64_t reserved_40_62:23;
+		uint64_t no_work:1;
+#endif
 	} s_work;
 
     /**
      * Result for a POW Status Load (when get_cur==0 and get_wqp==0)
      */
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		uint64_t reserved_62_63:2;
 		/* Set when there is a pending non-NULL SWTAG or
 		 * SWTAG_FULL, and the POW entry has not left the list
@@ -476,12 +550,32 @@ typedef union {
 		 *    AND pend_desched_switch) are set.
 		 */
 		uint64_t pend_tag:32;
+#else
+		uint64_t pend_tag:32;
+		uint64_t pend_type:2;
+		uint64_t reserved_34_35:2;
+		uint64_t pend_grp:4;
+		uint64_t pend_index:11;
+		uint64_t reserved_51:1;
+		uint64_t pend_nosched_clr:1;
+		uint64_t pend_null_rd:1;
+		uint64_t pend_new_work_wait:1;
+		uint64_t pend_new_work:1;
+		uint64_t pend_nosched:1;
+		uint64_t pend_desched_switch:1;
+		uint64_t pend_desched:1;
+		uint64_t pend_switch_null:1;
+		uint64_t pend_switch_full:1;
+		uint64_t pend_switch:1;
+		uint64_t reserved_62_63:2;
+#endif
 	} s_sstatus0;
 
     /**
      * Result for a POW Status Load (when get_cur==0 and get_wqp==1)
      */
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		uint64_t reserved_62_63:2;
 		/*
 		 * Set when there is a pending non-NULL SWTAG or
@@ -529,6 +623,23 @@ typedef union {
 		uint64_t pend_grp:4;
 		/* This is the wqp when pend_nosched_clr is set. */
 		uint64_t pend_wqp:36;
+#else
+	        uint64_t pend_wqp:36;
+	        uint64_t pend_grp:4;
+	        uint64_t pend_index:11;
+	        uint64_t reserved_51:1;
+	        uint64_t pend_nosched_clr:1;
+	        uint64_t pend_null_rd:1;
+	        uint64_t pend_new_work_wait:1;
+	        uint64_t pend_new_work:1;
+	        uint64_t pend_nosched:1;
+	        uint64_t pend_desched_switch:1;
+	        uint64_t pend_desched:1;
+	        uint64_t pend_switch_null:1;
+	        uint64_t pend_switch_full:1;
+	        uint64_t pend_switch:1;
+	        uint64_t reserved_62_63:2;
+#endif
 	} s_sstatus1;
 
     /**
@@ -536,6 +647,7 @@ typedef union {
      * get_rev==0)
      */
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		uint64_t reserved_62_63:2;
 		/*
 		 * Points to the next POW entry in the tag list when
@@ -573,12 +685,23 @@ typedef union {
 		 * SWTAG_DESCHED).
 		 */
 		uint64_t tag:32;
+#else
+	        uint64_t tag:32;
+	        uint64_t tag_type:2;
+	        uint64_t tail:1;
+	        uint64_t head:1;
+	        uint64_t grp:4;
+	        uint64_t index:11;
+	        uint64_t link_index:11;
+	        uint64_t reserved_62_63:2;
+#endif
 	} s_sstatus2;
 
     /**
      * Result for a POW Status Load (when get_cur==1, get_wqp==0, and get_rev==1)
      */
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		uint64_t reserved_62_63:2;
 		/*
 		 * Points to the prior POW entry in the tag list when
@@ -617,6 +740,16 @@ typedef union {
 		 * SWTAG_DESCHED).
 		 */
 		uint64_t tag:32;
+#else
+	        uint64_t tag:32;
+	        uint64_t tag_type:2;
+	        uint64_t tail:1;
+	        uint64_t head:1;
+	        uint64_t grp:4;
+	        uint64_t index:11;
+	        uint64_t revlink_index:11;
+	        uint64_t reserved_62_63:2;
+#endif
 	} s_sstatus3;
 
     /**
@@ -624,6 +757,7 @@ typedef union {
      * get_rev==0)
      */
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		uint64_t reserved_62_63:2;
 		/*
 		 * Points to the next POW entry in the tag list when
@@ -642,6 +776,13 @@ typedef union {
 		 * list entered on SWTAG_FULL).
 		 */
 		uint64_t wqp:36;
+#else
+	        uint64_t wqp:36;
+	        uint64_t grp:4;
+	        uint64_t index:11;
+	        uint64_t link_index:11;
+	        uint64_t reserved_62_63:2;
+#endif
 	} s_sstatus4;
 
     /**
@@ -649,6 +790,7 @@ typedef union {
      * get_rev==1)
      */
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		uint64_t reserved_62_63:2;
 		/*
 		 * Points to the prior POW entry in the tag list when
@@ -669,12 +811,20 @@ typedef union {
 		 * list entered on SWTAG_FULL).
 		 */
 		uint64_t wqp:36;
+#else
+	        uint64_t wqp:36;
+	        uint64_t grp:4;
+	        uint64_t index:11;
+	        uint64_t revlink_index:11;
+	        uint64_t reserved_62_63:2;
+#endif
 	} s_sstatus5;
 
     /**
      * Result For POW Memory Load (get_des == 0 and get_wqp == 0)
      */
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		uint64_t reserved_51_63:13;
 		/*
 		 * The next entry in the input, free, descheduled_head
@@ -695,12 +845,22 @@ typedef union {
 		uint64_t tag_type:2;
 		/* The tag of the POW entry. */
 		uint64_t tag:32;
+#else
+	        uint64_t tag:32;
+	        uint64_t tag_type:2;
+	        uint64_t tail:1;
+	        uint64_t reserved_35:1;
+	        uint64_t grp:4;
+	        uint64_t next_index:11;
+	        uint64_t reserved_51_63:13;
+#endif
 	} s_smemload0;
 
     /**
      * Result For POW Memory Load (get_des == 0 and get_wqp == 1)
      */
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		uint64_t reserved_51_63:13;
 		/*
 		 * The next entry in the input, free, descheduled_head
@@ -712,12 +872,19 @@ typedef union {
 		uint64_t grp:4;
 		/* The WQP held in the POW entry. */
 		uint64_t wqp:36;
+#else
+	        uint64_t wqp:36;
+	        uint64_t grp:4;
+	        uint64_t next_index:11;
+	        uint64_t reserved_51_63:13;
+#endif
 	} s_smemload1;
 
     /**
      * Result For POW Memory Load (get_des == 1)
      */
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		uint64_t reserved_51_63:13;
 		/*
 		 * The next entry in the tag list connected to the
@@ -740,12 +907,22 @@ typedef union {
 		 * is set.
 		 */
 		uint64_t pend_tag:32;
+#else
+	        uint64_t pend_tag:32;
+	        uint64_t pend_type:2;
+	        uint64_t pend_switch:1;
+	        uint64_t nosched:1;
+	        uint64_t grp:4;
+	        uint64_t fwd_index:11;
+	        uint64_t reserved_51_63:13;
+#endif
 	} s_smemload2;
 
     /**
      * Result For POW Index/Pointer Load (get_rmt == 0/get_des_get_tail == 0)
      */
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		uint64_t reserved_52_63:12;
 		/*
 		 * set when there is one or more POW entries on the
@@ -791,12 +968,28 @@ typedef union {
 		 * the input Q list selected by qosgrp.
 		 */
 		uint64_t loc_tail:11;
+#else
+	        uint64_t loc_tail:11;
+	        uint64_t reserved_11:1;
+	        uint64_t loc_head:11;
+	        uint64_t reserved_23:1;
+	        uint64_t loc_one:1;
+	        uint64_t loc_val:1;
+	        uint64_t free_tail:11;
+	        uint64_t reserved_37:1;
+	        uint64_t free_head:11;
+	        uint64_t reserved_49:1;
+	        uint64_t free_one:1;
+	        uint64_t free_val:1;
+	        uint64_t reserved_52_63:12;
+#endif
 	} sindexload0;
 
     /**
      * Result For POW Index/Pointer Load (get_rmt == 0/get_des_get_tail == 1)
      */
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		uint64_t reserved_52_63:12;
 		/*
 		 * set when there is one or more POW entries on the
@@ -843,12 +1036,28 @@ typedef union {
 		 * head on the descheduled list selected by qosgrp.
 		 */
 		uint64_t des_tail:11;
+#else
+	        uint64_t des_tail:11;
+	        uint64_t reserved_11:1;
+	        uint64_t des_head:11;
+	        uint64_t reserved_23:1;
+	        uint64_t des_one:1;
+	        uint64_t des_val:1;
+	        uint64_t nosched_tail:11;
+	        uint64_t reserved_37:1;
+	        uint64_t nosched_head:11;
+	        uint64_t reserved_49:1;
+	        uint64_t nosched_one:1;
+	        uint64_t nosched_val:1;
+	        uint64_t reserved_52_63:12;
+#endif
 	} sindexload1;
 
     /**
      * Result For POW Index/Pointer Load (get_rmt == 1/get_des_get_tail == 0)
      */
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		uint64_t reserved_39_63:25;
 		/*
 		 * Set when this DRAM list is the current head
@@ -877,6 +1086,13 @@ typedef union {
 		 * qosgrp.
 		 */
 		uint64_t rmt_head:36;
+#else
+	        uint64_t rmt_head:36;
+	        uint64_t rmt_one:1;
+	        uint64_t rmt_val:1;
+	        uint64_t rmt_is_head:1;
+	        uint64_t reserved_39_63:25;
+#endif
 	} sindexload2;
 
     /**
@@ -884,6 +1100,7 @@ typedef union {
      * 1/get_des_get_tail == 1)
      */
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		uint64_t reserved_39_63:25;
 		/*
 		 * set when this DRAM list is the current head
@@ -912,12 +1129,20 @@ typedef union {
 		 * qosgrp.
 		 */
 		uint64_t rmt_tail:36;
+#else
+	        uint64_t rmt_tail:36;
+	        uint64_t rmt_one:1;
+	        uint64_t rmt_val:1;
+	        uint64_t rmt_is_head:1;
+	        uint64_t reserved_39_63:25;
+#endif
 	} sindexload3;
 
     /**
      * Response to NULL_RD request loads
      */
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		uint64_t unused:62;
 		/* of type cvmx_pow_tag_type_t. state is one of the
 		 * following:
@@ -928,6 +1153,10 @@ typedef union {
 		 * - CVMX_POW_TAG_TYPE_NULL_NULL
 		 */
 		uint64_t state:2;
+#else
+	        uint64_t state:2;
+	        uint64_t unused:62;
+#endif
 	} s_null_rd;
 
 } cvmx_pow_tag_load_resp_t;
@@ -962,6 +1191,7 @@ typedef union {
 	uint64_t u64;
 
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		/* Memory region.  Should be CVMX_IO_SEG in most cases */
 		uint64_t mem_reg:2;
 		uint64_t reserved_49_61:13;	/* Must be zero */
@@ -971,6 +1201,14 @@ typedef union {
 		uint64_t reserved_36_39:4;	/* Must be zero */
 		/* Address field. addr<2:0> must be zero */
 		uint64_t addr:36;
+#else
+	        uint64_t addr:36;
+	        uint64_t reserved_36_39:4;
+	        uint64_t did:8;
+	        uint64_t is_io:1;
+	        uint64_t reserved_49_61:13;
+	        uint64_t mem_reg:2;
+#endif
 	} stag;
 } cvmx_pow_tag_store_addr_t;
 
@@ -981,6 +1219,7 @@ typedef union {
 	uint64_t u64;
 
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		/*
 		 * the (64-bit word) location in scratchpad to write
 		 * to (if len != 0)
@@ -994,6 +1233,14 @@ typedef union {
 		/* if set, don't return load response until work is available */
 		uint64_t wait:1;
 		uint64_t unused2:3;
+#else
+	        uint64_t unused2:3;
+	        uint64_t wait:1;
+	        uint64_t unused:36;
+	        uint64_t did:8;
+	        uint64_t len:8;
+	        uint64_t scraddr:8;
+#endif
 	} s;
 
 } cvmx_pow_iobdma_store_t;
diff --git a/arch/mips/include/asm/octeon/cvmx-wqe.h b/arch/mips/include/asm/octeon/cvmx-wqe.h
index aa0d3d0de75c..2d6d0c7127a7 100644
--- a/arch/mips/include/asm/octeon/cvmx-wqe.h
+++ b/arch/mips/include/asm/octeon/cvmx-wqe.h
@@ -57,6 +57,7 @@ typedef union {
 
 	/* Use this struct if the hardware determines that the packet is IP */
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		/* HW sets this to the number of buffers used by this packet */
 		uint64_t bufs:8;
 		/* HW sets to the number of L2 bytes prior to the IP */
@@ -166,13 +167,45 @@ typedef union {
 		 * the slow path */
 		/* type is cvmx_pip_err_t */
 		uint64_t err_code:8;
+#else
+	        uint64_t err_code:8;
+	        uint64_t rcv_error:1;
+	        uint64_t not_IP:1;
+	        uint64_t is_mcast:1;
+	        uint64_t is_bcast:1;
+	        uint64_t IP_exc:1;
+	        uint64_t is_frag:1;
+	        uint64_t L4_error:1;
+	        uint64_t software:1;
+	        uint64_t is_v6:1;
+	        uint64_t dec_ipsec:1;
+	        uint64_t tcp_or_udp:1;
+	        uint64_t dec_ipcomp:1;
+	        uint64_t unassigned2:4;
+	        uint64_t unassigned2a:4;
+	        uint64_t pr:4;
+	        uint64_t vlan_id:12;
+	        uint64_t vlan_cfi:1;
+	        uint64_t unassigned:1;
+	        uint64_t vlan_stacked:1;
+	        uint64_t vlan_valid:1;
+	        uint64_t ip_offset:8;
+	        uint64_t bufs:8;
+#endif
 	} s;
 
 	/* use this to get at the 16 vlan bits */
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		uint64_t unused1:16;
 		uint64_t vlan:16;
 		uint64_t unused2:32;
+#else
+	        uint64_t unused2:32;
+	        uint64_t vlan:16;
+	        uint64_t unused1:16;
+
+#endif
 	} svlan;
 
 	/*
@@ -180,6 +213,7 @@ typedef union {
 	 * the packet is ip.
 	 */
 	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
 		/*
 		 * HW sets this to the number of buffers used by this
 		 * packet.
@@ -296,6 +330,27 @@ typedef union {
 		 */
 		/* type is cvmx_pip_err_t (union, so can't use directly */
 		uint64_t err_code:8;
+#else
+	        uint64_t err_code:8;
+	        uint64_t rcv_error:1;
+	        uint64_t not_IP:1;
+	        uint64_t is_mcast:1;
+	        uint64_t is_bcast:1;
+	        uint64_t is_arp:1;
+	        uint64_t is_rarp:1;
+	        uint64_t unassigned3:1;
+	        uint64_t software:1;
+	        uint64_t unassigned2:4;
+	        uint64_t unassigned2a:8;
+	        uint64_t pr:4;
+	        uint64_t vlan_id:12;
+	        uint64_t vlan_cfi:1;
+	        uint64_t unassigned:1;
+	        uint64_t vlan_stacked:1;
+	        uint64_t vlan_valid:1;
+	        uint64_t unused:8;
+	        uint64_t bufs:8;
+#endif
 	} snoip;
 
 } cvmx_pip_wqe_word2;
@@ -312,6 +367,7 @@ typedef struct {
      *	HW WRITE: the following 64 bits are filled by HW when a packet arrives
      */
 
+#ifdef __BIG_ENDIAN_BITFIELD
     /**
      * raw chksum result generated by the HW
      */
@@ -327,12 +383,18 @@ typedef struct {
      * (Only 36 bits used in Octeon 1)
      */
 	uint64_t next_ptr:40;
+#else
+	uint64_t next_ptr:40;
+	uint8_t unused;
+	uint16_t hw_chksum;
+#endif
 
     /*****************************************************************
      * WORD 1
      *	HW WRITE: the following 64 bits are filled by HW when a packet arrives
      */
 
+#ifdef __BIG_ENDIAN_BITFIELD
     /**
      * HW sets to the total number of bytes in the packet
      */
@@ -359,6 +421,15 @@ typedef struct {
      * the synchronization/ordering tag
      */
 	uint64_t tag:32;
+#else
+	uint64_t tag:32;
+	uint64_t tag_type:2;
+	uint64_t zero_2:1;
+	uint64_t grp:4;
+	uint64_t qos:3;
+	uint64_t ipprt:6;
+	uint64_t len:16;
+#endif
 
     /**
      * WORD 2 HW WRITE: the following 64-bits are filled in by
diff --git a/arch/mips/include/asm/octeon/cvmx.h b/arch/mips/include/asm/octeon/cvmx.h
index 33db1c806b01..774bb45834cb 100644
--- a/arch/mips/include/asm/octeon/cvmx.h
+++ b/arch/mips/include/asm/octeon/cvmx.h
@@ -436,14 +436,6 @@ static inline uint64_t cvmx_get_cycle_global(void)
 
 /***************************************************************************/
 
-static inline void cvmx_reset_octeon(void)
-{
-	union cvmx_ciu_soft_rst ciu_soft_rst;
-	ciu_soft_rst.u64 = 0;
-	ciu_soft_rst.s.soft_rst = 1;
-	cvmx_write_csr(CVMX_CIU_SOFT_RST, ciu_soft_rst.u64);
-}
-
 /* Return the number of cores available in the chip */
 static inline uint32_t cvmx_octeon_num_cores(void)
 {
diff --git a/arch/mips/include/asm/octeon/octeon.h b/arch/mips/include/asm/octeon/octeon.h
index 041596570856..de9f74ee5dd0 100644
--- a/arch/mips/include/asm/octeon/octeon.h
+++ b/arch/mips/include/asm/octeon/octeon.h
@@ -335,4 +335,6 @@ void octeon_irq_set_ip4_handler(octeon_irq_ip4_handler_t);
 
 extern void octeon_fixup_irqs(void);
 
+extern struct semaphore octeon_bootbus_sem;
+
 #endif /* __ASM_OCTEON_OCTEON_H */
diff --git a/arch/mips/include/asm/octeon/pci-octeon.h b/arch/mips/include/asm/octeon/pci-octeon.h
index 64ba56a02843..1884609741a8 100644
--- a/arch/mips/include/asm/octeon/pci-octeon.h
+++ b/arch/mips/include/asm/octeon/pci-octeon.h
@@ -11,9 +11,6 @@
 
 #include <linux/pci.h>
 
-/* Some PCI cards require delays when accessing config space. */
-#define PCI_CONFIG_SPACE_DELAY 10000
-
 /*
  * The physical memory base mapped by BAR1.  256MB at the end of the
  * first 4GB.
diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
index 154b70a10483..89dd7fed1a57 100644
--- a/arch/mips/include/asm/page.h
+++ b/arch/mips/include/asm/page.h
@@ -105,8 +105,6 @@ static inline void clear_user_page(void *addr, unsigned long vaddr,
 		flush_data_cache_page((unsigned long)addr);
 }
 
-extern void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
-	struct page *to);
 struct vm_area_struct;
 extern void copy_user_highpage(struct page *to, struct page *from,
 	unsigned long vaddr, struct vm_area_struct *vma);
diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h
index 193b4c6b7541..d9692993fc83 100644
--- a/arch/mips/include/asm/pci.h
+++ b/arch/mips/include/asm/pci.h
@@ -35,6 +35,8 @@ struct pci_controller {
 	struct resource *io_resource;
 	unsigned long io_offset;
 	unsigned long io_map_base;
+	struct resource *busn_resource;
+	unsigned long busn_offset;
 
 	unsigned int index;
 	/* For compatibility with current (as of July 2003) pciutils
diff --git a/arch/mips/include/asm/pci/bridge.h b/arch/mips/include/asm/pci/bridge.h
index af2c8a351ca7..8d7a63b52ac7 100644
--- a/arch/mips/include/asm/pci/bridge.h
+++ b/arch/mips/include/asm/pci/bridge.h
@@ -835,6 +835,7 @@ struct bridge_controller {
 	struct pci_controller	pc;
 	struct resource		mem;
 	struct resource		io;
+	struct resource		busn;
 	bridge_t		*base;
 	nasid_t			nasid;
 	unsigned int		widget_id;
diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h
index a6be006b6f75..7d56686c0e62 100644
--- a/arch/mips/include/asm/pgtable-32.h
+++ b/arch/mips/include/asm/pgtable-32.h
@@ -105,13 +105,16 @@ static inline void pmd_clear(pmd_t *pmdp)
 
 #if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
 #define pte_page(x)		pfn_to_page(pte_pfn(x))
-#define pte_pfn(x)		((unsigned long)((x).pte_high >> 6))
+#define pte_pfn(x)		(((unsigned long)((x).pte_high >> _PFN_SHIFT)) | (unsigned long)((x).pte_low << _PAGE_PRESENT_SHIFT))
 static inline pte_t
 pfn_pte(unsigned long pfn, pgprot_t prot)
 {
 	pte_t pte;
-	pte.pte_high = (pfn << 6) | (pgprot_val(prot) & 0x3f);
-	pte.pte_low = pgprot_val(prot);
+
+	pte.pte_low = (pfn >> _PAGE_PRESENT_SHIFT) |
+				(pgprot_val(prot) & ~_PFNX_MASK);
+	pte.pte_high = (pfn << _PFN_SHIFT) |
+				(pgprot_val(prot) & ~_PFN_MASK);
 	return pte;
 }
 
@@ -166,9 +169,9 @@ pfn_pte(unsigned long pfn, pgprot_t prot)
 #if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
 
 /* Swap entries must have VALID and GLOBAL bits cleared. */
-#define __swp_type(x)			(((x).val >> 2) & 0x1f)
-#define __swp_offset(x)			 ((x).val >> 7)
-#define __swp_entry(type,offset)	((swp_entry_t)	{ ((type) << 2) | ((offset) << 7) })
+#define __swp_type(x)			(((x).val >> 4) & 0x1f)
+#define __swp_offset(x)			 ((x).val >> 9)
+#define __swp_entry(type,offset)	((swp_entry_t)  { ((type) << 4) | ((offset) << 9) })
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { (pte).pte_high })
 #define __swp_entry_to_pte(x)		((pte_t) { 0, (x).val })
 
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index 1659bb91ae21..cf661a2fb141 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -279,14 +279,14 @@ extern void pgd_init(unsigned long page);
 extern void pmd_init(unsigned long page, unsigned long pagetable);
 
 /*
- * Non-present pages:  high 24 bits are offset, next 8 bits type,
- * low 32 bits zero.
+ * Non-present pages:  high 40 bits are offset, next 8 bits type,
+ * low 16 bits zero.
  */
 static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
-{ pte_t pte; pte_val(pte) = (type << 32) | (offset << 40); return pte; }
+{ pte_t pte; pte_val(pte) = (type << 16) | (offset << 24); return pte; }
 
-#define __swp_type(x)		(((x).val >> 32) & 0xff)
-#define __swp_offset(x)		((x).val >> 40)
+#define __swp_type(x)		(((x).val >> 16) & 0xff)
+#define __swp_offset(x)		((x).val >> 24)
 #define __swp_entry(type, offset) ((swp_entry_t) { pte_val(mk_swap_pte((type), (offset))) })
 #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
 #define __swp_entry_to_pte(x)	((pte_t) { (x).val })
diff --git a/arch/mips/include/asm/pgtable-bits.h b/arch/mips/include/asm/pgtable-bits.h
index 91747c282bb3..18ae5ddef118 100644
--- a/arch/mips/include/asm/pgtable-bits.h
+++ b/arch/mips/include/asm/pgtable-bits.h
@@ -37,7 +37,11 @@
 /*
  * The following bits are implemented by the TLB hardware
  */
-#define _PAGE_GLOBAL_SHIFT	0
+#define _PAGE_NO_EXEC_SHIFT	0
+#define _PAGE_NO_EXEC		(1 << _PAGE_NO_EXEC_SHIFT)
+#define _PAGE_NO_READ_SHIFT	(_PAGE_NO_EXEC_SHIFT + 1)
+#define _PAGE_NO_READ		(1 << _PAGE_NO_READ_SHIFT)
+#define _PAGE_GLOBAL_SHIFT	(_PAGE_NO_READ_SHIFT + 1)
 #define _PAGE_GLOBAL		(1 << _PAGE_GLOBAL_SHIFT)
 #define _PAGE_VALID_SHIFT	(_PAGE_GLOBAL_SHIFT + 1)
 #define _PAGE_VALID		(1 << _PAGE_VALID_SHIFT)
@@ -49,7 +53,7 @@
 /*
  * The following bits are implemented in software
  */
-#define _PAGE_PRESENT_SHIFT	(_CACHE_SHIFT + 3)
+#define _PAGE_PRESENT_SHIFT	(24)
 #define _PAGE_PRESENT		(1 << _PAGE_PRESENT_SHIFT)
 #define _PAGE_READ_SHIFT	(_PAGE_PRESENT_SHIFT + 1)
 #define _PAGE_READ		(1 << _PAGE_READ_SHIFT)
@@ -62,6 +66,11 @@
 
 #define _PFN_SHIFT		(PAGE_SHIFT - 12 + _CACHE_SHIFT + 3)
 
+/*
+ * Bits for extended EntryLo0/EntryLo1 registers
+ */
+#define _PFNX_MASK		0xffffff
+
 #elif defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
 
 /*
@@ -95,11 +104,7 @@
 
 #else
 /*
- * When using the RI/XI bit support, we have 13 bits of flags below
- * the physical address. The RI/XI bits are placed such that a SRL 5
- * can strip off the software bits, then a ROTR 2 can move the RI/XI
- * into bits [63:62]. This also limits physical address to 56 bits,
- * which is more than we need right now.
+ * Below are the "Normal" R4K cases
  */
 
 /*
@@ -107,38 +112,59 @@
  */
 #define _PAGE_PRESENT_SHIFT	0
 #define _PAGE_PRESENT		(1 << _PAGE_PRESENT_SHIFT)
-#define _PAGE_READ_SHIFT	(cpu_has_rixi ? _PAGE_PRESENT_SHIFT : _PAGE_PRESENT_SHIFT + 1)
-#define _PAGE_READ ({BUG_ON(cpu_has_rixi); 1 << _PAGE_READ_SHIFT; })
+/* R2 or later cores check for RI/XI support to determine _PAGE_READ */
+#ifdef CONFIG_CPU_MIPSR2
+#define _PAGE_WRITE_SHIFT	(_PAGE_PRESENT_SHIFT + 1)
+#define _PAGE_WRITE		(1 << _PAGE_WRITE_SHIFT)
+#else
+#define _PAGE_READ_SHIFT	(_PAGE_PRESENT_SHIFT + 1)
+#define _PAGE_READ		(1 << _PAGE_READ_SHIFT)
 #define _PAGE_WRITE_SHIFT	(_PAGE_READ_SHIFT + 1)
 #define _PAGE_WRITE		(1 << _PAGE_WRITE_SHIFT)
+#endif
 #define _PAGE_ACCESSED_SHIFT	(_PAGE_WRITE_SHIFT + 1)
 #define _PAGE_ACCESSED		(1 << _PAGE_ACCESSED_SHIFT)
 #define _PAGE_MODIFIED_SHIFT	(_PAGE_ACCESSED_SHIFT + 1)
 #define _PAGE_MODIFIED		(1 << _PAGE_MODIFIED_SHIFT)
 
-#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
-/* huge tlb page */
+#if defined(CONFIG_64BIT) && defined(CONFIG_MIPS_HUGE_TLB_SUPPORT)
+/* Huge TLB page */
 #define _PAGE_HUGE_SHIFT	(_PAGE_MODIFIED_SHIFT + 1)
 #define _PAGE_HUGE		(1 << _PAGE_HUGE_SHIFT)
 #define _PAGE_SPLITTING_SHIFT	(_PAGE_HUGE_SHIFT + 1)
 #define _PAGE_SPLITTING		(1 << _PAGE_SPLITTING_SHIFT)
+
+/* Only R2 or newer cores have the XI bit */
+#ifdef CONFIG_CPU_MIPSR2
+#define _PAGE_NO_EXEC_SHIFT	(_PAGE_SPLITTING_SHIFT + 1)
 #else
-#define _PAGE_HUGE_SHIFT	(_PAGE_MODIFIED_SHIFT)
-#define _PAGE_HUGE		({BUG(); 1; })	/* Dummy value */
-#define _PAGE_SPLITTING_SHIFT	(_PAGE_HUGE_SHIFT)
-#define _PAGE_SPLITTING		({BUG(); 1; })	/* Dummy value */
-#endif
+#define _PAGE_GLOBAL_SHIFT	(_PAGE_SPLITTING_SHIFT + 1)
+#define _PAGE_GLOBAL		(1 << _PAGE_GLOBAL_SHIFT)
+#endif	/* CONFIG_CPU_MIPSR2 */
 
-/* Page cannot be executed */
-#define _PAGE_NO_EXEC_SHIFT	(cpu_has_rixi ? _PAGE_SPLITTING_SHIFT + 1 : _PAGE_SPLITTING_SHIFT)
-#define _PAGE_NO_EXEC		({BUG_ON(!cpu_has_rixi); 1 << _PAGE_NO_EXEC_SHIFT; })
+#endif	/* CONFIG_64BIT && CONFIG_MIPS_HUGE_TLB_SUPPORT */
 
-/* Page cannot be read */
-#define _PAGE_NO_READ_SHIFT	(cpu_has_rixi ? _PAGE_NO_EXEC_SHIFT + 1 : _PAGE_NO_EXEC_SHIFT)
-#define _PAGE_NO_READ		({BUG_ON(!cpu_has_rixi); 1 << _PAGE_NO_READ_SHIFT; })
+#ifdef CONFIG_CPU_MIPSR2
+/* XI - page cannot be executed */
+#ifndef _PAGE_NO_EXEC_SHIFT
+#define _PAGE_NO_EXEC_SHIFT	(_PAGE_MODIFIED_SHIFT + 1)
+#endif
+#define _PAGE_NO_EXEC		(cpu_has_rixi ? (1 << _PAGE_NO_EXEC_SHIFT) : 0)
+
+/* RI - page cannot be read */
+#define _PAGE_READ_SHIFT	(_PAGE_NO_EXEC_SHIFT + 1)
+#define _PAGE_READ		(cpu_has_rixi ? 0 : (1 << _PAGE_READ_SHIFT))
+#define _PAGE_NO_READ_SHIFT	_PAGE_READ_SHIFT
+#define _PAGE_NO_READ		(cpu_has_rixi ? (1 << _PAGE_READ_SHIFT) : 0)
 
 #define _PAGE_GLOBAL_SHIFT	(_PAGE_NO_READ_SHIFT + 1)
 #define _PAGE_GLOBAL		(1 << _PAGE_GLOBAL_SHIFT)
+
+#else	/* !CONFIG_CPU_MIPSR2 */
+#define _PAGE_GLOBAL_SHIFT	(_PAGE_MODIFIED_SHIFT + 1)
+#define _PAGE_GLOBAL		(1 << _PAGE_GLOBAL_SHIFT)
+#endif	/* CONFIG_CPU_MIPSR2 */
+
 #define _PAGE_VALID_SHIFT	(_PAGE_GLOBAL_SHIFT + 1)
 #define _PAGE_VALID		(1 << _PAGE_VALID_SHIFT)
 #define _PAGE_DIRTY_SHIFT	(_PAGE_VALID_SHIFT + 1)
@@ -150,18 +176,26 @@
 
 #endif /* defined(CONFIG_PHYS_ADDR_T_64BIT && defined(CONFIG_CPU_MIPS32) */
 
+#ifndef _PAGE_NO_EXEC
+#define _PAGE_NO_EXEC		0
+#endif
+#ifndef _PAGE_NO_READ
+#define _PAGE_NO_READ		0
+#endif
+
 #define _PAGE_SILENT_READ	_PAGE_VALID
 #define _PAGE_SILENT_WRITE	_PAGE_DIRTY
 
 #define _PFN_MASK		(~((1 << (_PFN_SHIFT)) - 1))
 
-#ifndef _PAGE_NO_READ
-#define _PAGE_NO_READ ({BUG(); 0; })
-#define _PAGE_NO_READ_SHIFT ({BUG(); 0; })
-#endif
-#ifndef _PAGE_NO_EXEC
-#define _PAGE_NO_EXEC ({BUG(); 0; })
-#endif
+/*
+ * The final layouts of the PTE bits are:
+ *
+ *   64-bit, R1 or earlier:     CCC D V G [S H] M A W R P
+ *   32-bit, R1 or earler:      CCC D V G M A W R P
+ *   64-bit, R2 or later:       CCC D V G RI/R XI [S H] M A W P
+ *   32-bit, R2 or later:       CCC D V G RI/R XI M A W P
+ */
 
 
 #ifndef __ASSEMBLY__
@@ -171,6 +205,7 @@
  */
 static inline uint64_t pte_to_entrylo(unsigned long pte_val)
 {
+#ifdef CONFIG_CPU_MIPSR2
 	if (cpu_has_rixi) {
 		int sa;
 #ifdef CONFIG_32BIT
@@ -186,6 +221,7 @@ static inline uint64_t pte_to_entrylo(unsigned long pte_val)
 		return (pte_val >> _PAGE_GLOBAL_SHIFT) |
 			((pte_val & (_PAGE_NO_EXEC | _PAGE_NO_READ)) << sa);
 	}
+#endif
 
 	return pte_val >> _PAGE_GLOBAL_SHIFT;
 }
@@ -245,7 +281,7 @@ static inline uint64_t pte_to_entrylo(unsigned long pte_val)
 #define _CACHE_UNCACHED_ACCELERATED	(7<<_CACHE_SHIFT)
 #endif
 
-#define __READABLE	(_PAGE_SILENT_READ | _PAGE_ACCESSED | (cpu_has_rixi ? 0 : _PAGE_READ))
+#define __READABLE	(_PAGE_SILENT_READ | _PAGE_READ | _PAGE_ACCESSED)
 #define __WRITEABLE	(_PAGE_SILENT_WRITE | _PAGE_WRITE | _PAGE_MODIFIED)
 
 #define _PAGE_CHG_MASK	(_PAGE_ACCESSED | _PAGE_MODIFIED |	\
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index bef782c4a44b..819af9d057a8 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -24,17 +24,17 @@ struct mm_struct;
 struct vm_area_struct;
 
 #define PAGE_NONE	__pgprot(_PAGE_PRESENT | _CACHE_CACHABLE_NONCOHERENT)
-#define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_WRITE | (cpu_has_rixi ? 0 : _PAGE_READ) | \
+#define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_WRITE | _PAGE_READ | \
 				 _page_cachable_default)
-#define PAGE_COPY	__pgprot(_PAGE_PRESENT | (cpu_has_rixi ? 0 : _PAGE_READ) | \
-				 (cpu_has_rixi ?  _PAGE_NO_EXEC : 0) | _page_cachable_default)
-#define PAGE_READONLY	__pgprot(_PAGE_PRESENT | (cpu_has_rixi ? 0 : _PAGE_READ) | \
+#define PAGE_COPY	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_NO_EXEC | \
+				 _page_cachable_default)
+#define PAGE_READONLY	__pgprot(_PAGE_PRESENT | _PAGE_READ | \
 				 _page_cachable_default)
 #define PAGE_KERNEL	__pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \
 				 _PAGE_GLOBAL | _page_cachable_default)
 #define PAGE_KERNEL_NC	__pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \
 				 _PAGE_GLOBAL | _CACHE_CACHABLE_NONCOHERENT)
-#define PAGE_USERIO	__pgprot(_PAGE_PRESENT | (cpu_has_rixi ? 0 : _PAGE_READ) | _PAGE_WRITE | \
+#define PAGE_USERIO	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
 				 _page_cachable_default)
 #define PAGE_KERNEL_UNCACHED __pgprot(_PAGE_PRESENT | __READABLE | \
 			__WRITEABLE | _PAGE_GLOBAL | _CACHE_UNCACHED)
@@ -127,13 +127,9 @@ do {									\
 	}								\
 } while(0)
 
-
-extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
-	pte_t pteval);
-
 #if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
 
-#define pte_none(pte)		(!(((pte).pte_low | (pte).pte_high) & ~_PAGE_GLOBAL))
+#define pte_none(pte)		(!(((pte).pte_high) & ~_PAGE_GLOBAL))
 #define pte_present(pte)	((pte).pte_low & _PAGE_PRESENT)
 
 static inline void set_pte(pte_t *ptep, pte_t pte)
@@ -142,18 +138,17 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
 	smp_wmb();
 	ptep->pte_low = pte.pte_low;
 
-	if (pte.pte_low & _PAGE_GLOBAL) {
+	if (pte.pte_high & _PAGE_GLOBAL) {
 		pte_t *buddy = ptep_buddy(ptep);
 		/*
 		 * Make sure the buddy is global too (if it's !none,
 		 * it better already be global)
 		 */
-		if (pte_none(*buddy)) {
-			buddy->pte_low	|= _PAGE_GLOBAL;
+		if (pte_none(*buddy))
 			buddy->pte_high |= _PAGE_GLOBAL;
-		}
 	}
 }
+#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
 
 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
@@ -161,8 +156,8 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt
 
 	htw_stop();
 	/* Preserve global status for the pair */
-	if (ptep_buddy(ptep)->pte_low & _PAGE_GLOBAL)
-		null.pte_low = null.pte_high = _PAGE_GLOBAL;
+	if (ptep_buddy(ptep)->pte_high & _PAGE_GLOBAL)
+		null.pte_high = _PAGE_GLOBAL;
 
 	set_pte_at(mm, addr, ptep, null);
 	htw_start();
@@ -192,6 +187,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
 	}
 #endif
 }
+#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
 
 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
@@ -242,21 +238,21 @@ static inline int pte_young(pte_t pte)	{ return pte.pte_low & _PAGE_ACCESSED; }
 
 static inline pte_t pte_wrprotect(pte_t pte)
 {
-	pte.pte_low  &= ~(_PAGE_WRITE | _PAGE_SILENT_WRITE);
+	pte.pte_low  &= ~_PAGE_WRITE;
 	pte.pte_high &= ~_PAGE_SILENT_WRITE;
 	return pte;
 }
 
 static inline pte_t pte_mkclean(pte_t pte)
 {
-	pte.pte_low  &= ~(_PAGE_MODIFIED | _PAGE_SILENT_WRITE);
+	pte.pte_low  &= ~_PAGE_MODIFIED;
 	pte.pte_high &= ~_PAGE_SILENT_WRITE;
 	return pte;
 }
 
 static inline pte_t pte_mkold(pte_t pte)
 {
-	pte.pte_low  &= ~(_PAGE_ACCESSED | _PAGE_SILENT_READ);
+	pte.pte_low  &= ~_PAGE_ACCESSED;
 	pte.pte_high &= ~_PAGE_SILENT_READ;
 	return pte;
 }
@@ -264,30 +260,24 @@ static inline pte_t pte_mkold(pte_t pte)
 static inline pte_t pte_mkwrite(pte_t pte)
 {
 	pte.pte_low |= _PAGE_WRITE;
-	if (pte.pte_low & _PAGE_MODIFIED) {
-		pte.pte_low  |= _PAGE_SILENT_WRITE;
+	if (pte.pte_low & _PAGE_MODIFIED)
 		pte.pte_high |= _PAGE_SILENT_WRITE;
-	}
 	return pte;
 }
 
 static inline pte_t pte_mkdirty(pte_t pte)
 {
 	pte.pte_low |= _PAGE_MODIFIED;
-	if (pte.pte_low & _PAGE_WRITE) {
-		pte.pte_low  |= _PAGE_SILENT_WRITE;
+	if (pte.pte_low & _PAGE_WRITE)
 		pte.pte_high |= _PAGE_SILENT_WRITE;
-	}
 	return pte;
 }
 
 static inline pte_t pte_mkyoung(pte_t pte)
 {
 	pte.pte_low |= _PAGE_ACCESSED;
-	if (pte.pte_low & _PAGE_READ) {
-		pte.pte_low  |= _PAGE_SILENT_READ;
+	if (pte.pte_low & _PAGE_READ)
 		pte.pte_high |= _PAGE_SILENT_READ;
-	}
 	return pte;
 }
 #else
@@ -332,13 +322,13 @@ static inline pte_t pte_mkdirty(pte_t pte)
 static inline pte_t pte_mkyoung(pte_t pte)
 {
 	pte_val(pte) |= _PAGE_ACCESSED;
-	if (cpu_has_rixi) {
-		if (!(pte_val(pte) & _PAGE_NO_READ))
-			pte_val(pte) |= _PAGE_SILENT_READ;
-	} else {
-		if (pte_val(pte) & _PAGE_READ)
-			pte_val(pte) |= _PAGE_SILENT_READ;
-	}
+#ifdef CONFIG_CPU_MIPSR2
+	if (!(pte_val(pte) & _PAGE_NO_READ))
+		pte_val(pte) |= _PAGE_SILENT_READ;
+	else
+#endif
+	if (pte_val(pte) & _PAGE_READ)
+		pte_val(pte) |= _PAGE_SILENT_READ;
 	return pte;
 }
 
@@ -391,10 +381,10 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot)
 #if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
-	pte.pte_low  &= _PAGE_CHG_MASK;
+	pte.pte_low  &= (_PAGE_MODIFIED | _PAGE_ACCESSED | _PFNX_MASK);
 	pte.pte_high &= (_PFN_MASK | _CACHE_MASK);
-	pte.pte_low  |= pgprot_val(newprot);
-	pte.pte_high |= pgprot_val(newprot) & ~(_PFN_MASK | _CACHE_MASK);
+	pte.pte_low  |= pgprot_val(newprot) & ~_PFNX_MASK;
+	pte.pte_high |= pgprot_val(newprot) & ~_PFN_MASK;
 	return pte;
 }
 #else
@@ -407,12 +397,15 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 
 extern void __update_tlb(struct vm_area_struct *vma, unsigned long address,
 	pte_t pte);
+extern void __update_cache(struct vm_area_struct *vma, unsigned long address,
+	pte_t pte);
 
 static inline void update_mmu_cache(struct vm_area_struct *vma,
 	unsigned long address, pte_t *ptep)
 {
 	pte_t pte = *ptep;
 	__update_tlb(vma, address, pte);
+	__update_cache(vma, address, pte);
 }
 
 static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
@@ -534,13 +527,13 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
 {
 	pmd_val(pmd) |= _PAGE_ACCESSED;
 
-	if (cpu_has_rixi) {
-		if (!(pmd_val(pmd) & _PAGE_NO_READ))
-			pmd_val(pmd) |= _PAGE_SILENT_READ;
-	} else {
-		if (pmd_val(pmd) & _PAGE_READ)
-			pmd_val(pmd) |= _PAGE_SILENT_READ;
-	}
+#ifdef CONFIG_CPU_MIPSR2
+	if (!(pmd_val(pmd) & _PAGE_NO_READ))
+		pmd_val(pmd) |= _PAGE_SILENT_READ;
+	else
+#endif
+	if (pmd_val(pmd) & _PAGE_READ)
+		pmd_val(pmd) |= _PAGE_SILENT_READ;
 
 	return pmd;
 }
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index b5dcbee01fd7..9b3b48e21c22 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -105,7 +105,7 @@ union fpureg {
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
 # define FPR_IDX(width, idx)	(idx)
 #else
-# define FPR_IDX(width, idx)	((FPU_REG_WIDTH / (width)) - 1 - (idx))
+# define FPR_IDX(width, idx)	((idx) ^ ((64 / (width)) - 1))
 #endif
 
 #define BUILD_FPR_ACCESS(width) \
diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
index 1b22d2da88a1..38902bf97adc 100644
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h
@@ -12,6 +12,8 @@
 #ifndef _ASM_R4KCACHE_H
 #define _ASM_R4KCACHE_H
 
+#include <linux/stringify.h>
+
 #include <asm/asm.h>
 #include <asm/cacheops.h>
 #include <asm/compiler.h>
@@ -344,7 +346,7 @@ static inline void invalidate_tcache_page(unsigned long addr)
 	"	cache %1, 0x0a0(%0); cache %1, 0x0b0(%0)\n"	\
 	"	cache %1, 0x0c0(%0); cache %1, 0x0d0(%0)\n"	\
 	"	cache %1, 0x0e0(%0); cache %1, 0x0f0(%0)\n"	\
-	"	addiu $1, $0, 0x100			\n"	\
+	"	"__stringify(LONG_ADDIU)" $1, %0, 0x100	\n"	\
 	"	cache %1, 0x000($1); cache %1, 0x010($1)\n"	\
 	"	cache %1, 0x020($1); cache %1, 0x030($1)\n"	\
 	"	cache %1, 0x040($1); cache %1, 0x050($1)\n"	\
@@ -368,17 +370,17 @@ static inline void invalidate_tcache_page(unsigned long addr)
 	"	cache %1, 0x040(%0); cache %1, 0x060(%0)\n"	\
 	"	cache %1, 0x080(%0); cache %1, 0x0a0(%0)\n"	\
 	"	cache %1, 0x0c0(%0); cache %1, 0x0e0(%0)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
+	"	"__stringify(LONG_ADDIU)" $1, %0, 0x100 \n"	\
 	"	cache %1, 0x000($1); cache %1, 0x020($1)\n"	\
 	"	cache %1, 0x040($1); cache %1, 0x060($1)\n"	\
 	"	cache %1, 0x080($1); cache %1, 0x0a0($1)\n"	\
 	"	cache %1, 0x0c0($1); cache %1, 0x0e0($1)\n"	\
-	"	addiu $1, $1, 0x100\n"				\
+	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
 	"	cache %1, 0x000($1); cache %1, 0x020($1)\n"	\
 	"	cache %1, 0x040($1); cache %1, 0x060($1)\n"	\
 	"	cache %1, 0x080($1); cache %1, 0x0a0($1)\n"	\
 	"	cache %1, 0x0c0($1); cache %1, 0x0e0($1)\n"	\
-	"	addiu $1, $1, 0x100\n"				\
+	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100\n"	\
 	"	cache %1, 0x000($1); cache %1, 0x020($1)\n"	\
 	"	cache %1, 0x040($1); cache %1, 0x060($1)\n"	\
 	"	cache %1, 0x080($1); cache %1, 0x0a0($1)\n"	\
@@ -396,25 +398,25 @@ static inline void invalidate_tcache_page(unsigned long addr)
 	"	.set noat\n"					\
 	"	cache %1, 0x000(%0); cache %1, 0x040(%0)\n"	\
 	"	cache %1, 0x080(%0); cache %1, 0x0c0(%0)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
+	"	"__stringify(LONG_ADDIU)" $1, %0, 0x100 \n"	\
 	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
 	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
+	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
 	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
 	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
+	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
 	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
 	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
+	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
 	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
 	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
+	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
 	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
 	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
+	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
 	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
 	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
+	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
 	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
 	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
 	"	.set pop\n"					\
@@ -429,39 +431,38 @@ static inline void invalidate_tcache_page(unsigned long addr)
 	"	.set mips64r6\n"				\
 	"	.set noat\n"					\
 	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
-	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
-	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
-	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
-	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
-	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
-	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
-	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
-	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
-	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
-	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
-	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
-	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
-	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
-	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
-	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
-	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
-	"	addiu $1, %0, 0x100\n"				\
+	"	"__stringify(LONG_ADDIU)" $1, %0, 0x100 \n"	\
+	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
+	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
+	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
+	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
+	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
+	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
+	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
+	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
+	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
+	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
+	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
+	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
+	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
+	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
+	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
+	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
+	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
+	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
+	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
+	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
+	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
+	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
+	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
+	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
+	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
+	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
+	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
+	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
+	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
+	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
+	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
 	"	.set pop\n"					\
 		:						\
 		: "r" (base),					\
diff --git a/arch/mips/include/asm/seccomp.h b/arch/mips/include/asm/seccomp.h
index f29c75cf83c6..1d8a2e2c75c1 100644
--- a/arch/mips/include/asm/seccomp.h
+++ b/arch/mips/include/asm/seccomp.h
@@ -2,11 +2,6 @@
 
 #include <linux/unistd.h>
 
-#define __NR_seccomp_read __NR_read
-#define __NR_seccomp_write __NR_write
-#define __NR_seccomp_exit __NR_exit
-#define __NR_seccomp_sigreturn __NR_rt_sigreturn
-
 /*
  * Kludge alert:
  *
@@ -29,4 +24,6 @@
 
 #endif /* CONFIG_MIPS32_O32 */
 
+#include <asm-generic/seccomp.h>
+
 #endif /* __ASM_SECCOMP_H */
diff --git a/arch/mips/include/asm/sgi/sgi.h b/arch/mips/include/asm/sgi/sgi.h
index 645cea7c0f8e..b61557151e3f 100644
--- a/arch/mips/include/asm/sgi/sgi.h
+++ b/arch/mips/include/asm/sgi/sgi.h
@@ -22,14 +22,15 @@ enum sgi_mach {
 	ip17,	/* R4K UP */
 	ip19,	/* R4K MP */
 	ip20,	/* R4K UP, Indigo */
-	ip21,	/* TFP MP */
-	ip22,	/* R4x00 UP, Indigo2 */
+	ip21,	/* R8k/TFP MP */
+	ip22,	/* R4x00 UP, Indy, Indigo2 */
 	ip25,	/* R10k MP */
-	ip26,	/* TFP UP, Indigo2 */
-	ip27,	/* R10k MP, R12k MP, Origin */
-	ip28,	/* R10k UP, Indigo2 */
-	ip30,	/* Octane */
-	ip32,	/* O2 */
+	ip26,	/* R8k/TFP UP, Indigo2 */
+	ip27,	/* R10k MP, R12k MP, R14k MP, Origin 200/2k, Onyx2 */
+	ip28,	/* R10k UP, Indigo2 Impact R10k */
+	ip30,	/* R10k MP, R12k MP, R14k MP, Octane */
+	ip32,	/* R5k UP, RM5200 UP, RM7k UP, R10k UP, R12k UP, O2 */
+	ip35,   /* R14k MP, R16k MP, Origin 300/3k, Onyx3, Fuel, Tezro */
 };
 
 extern enum sgi_mach sgimach;
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index eacf865d21c2..bb02fac9b4fa 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -88,7 +88,7 @@ static inline void arch_send_call_function_single_ipi(int cpu)
 {
 	extern struct plat_smp_ops *mp_ops;	/* private */
 
-	mp_ops->send_ipi_mask(&cpumask_of_cpu(cpu), SMP_CALL_FUNCTION);
+	mp_ops->send_ipi_mask(cpumask_of(cpu), SMP_CALL_FUNCTION);
 }
 
 static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index b4548690ade9..1fca2e0793dc 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -263,7 +263,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
 	if (R10000_LLSC_WAR) {
 		__asm__ __volatile__(
 		"1:	ll	%1, %2		# arch_read_unlock	\n"
-		"	addiu	%1, 1					\n"
+		"	addiu	%1, -1					\n"
 		"	sc	%1, %0					\n"
 		"	beqzl	%1, 1b					\n"
 		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index 55ed6602204c..9c0014e87c17 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -23,7 +23,6 @@
  */
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
 	unsigned long		flags;		/* low level flags */
 	unsigned long		tp_value;	/* thread pointer */
 	__u32			cpu;		/* current CPU */
@@ -44,7 +43,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
-	.exec_domain	= &default_exec_domain, \
 	.flags		= _TIF_FIXADE,		\
 	.cpu		= 0,			\
 	.preempt_count	= INIT_PREEMPT_COUNT,	\
@@ -55,10 +53,10 @@ struct thread_info {
 #define init_stack		(init_thread_union.stack)
 
 /* How to get the thread information struct from C.  */
+register struct thread_info *__current_thread_info __asm__("$28");
+
 static inline struct thread_info *current_thread_info(void)
 {
-	register struct thread_info *__current_thread_info __asm__("$28");
-
 	return __current_thread_info;
 }
 
diff --git a/arch/mips/include/uapi/asm/kvm.h b/arch/mips/include/uapi/asm/kvm.h
index 2c04b6d9ff85..6985eb59b085 100644
--- a/arch/mips/include/uapi/asm/kvm.h
+++ b/arch/mips/include/uapi/asm/kvm.h
@@ -36,77 +36,85 @@ struct kvm_regs {
 
 /*
  * for KVM_GET_FPU and KVM_SET_FPU
- *
- * If Status[FR] is zero (32-bit FPU), the upper 32-bits of the FPRs
- * are zero filled.
  */
 struct kvm_fpu {
-	__u64 fpr[32];
-	__u32 fir;
-	__u32 fccr;
-	__u32 fexr;
-	__u32 fenr;
-	__u32 fcsr;
-	__u32 pad;
 };
 
 
 /*
- * For MIPS, we use KVM_SET_ONE_REG and KVM_GET_ONE_REG to access CP0
+ * For MIPS, we use KVM_SET_ONE_REG and KVM_GET_ONE_REG to access various
  * registers.  The id field is broken down as follows:
  *
- *  bits[2..0]   - Register 'sel' index.
- *  bits[7..3]   - Register 'rd'  index.
- *  bits[15..8]  - Must be zero.
- *  bits[31..16] - 1 -> CP0 registers.
- *  bits[51..32] - Must be zero.
  *  bits[63..52] - As per linux/kvm.h
+ *  bits[51..32] - Must be zero.
+ *  bits[31..16] - Register set.
+ *
+ * Register set = 0: GP registers from kvm_regs (see definitions below).
+ *
+ * Register set = 1: CP0 registers.
+ *  bits[15..8]  - Must be zero.
+ *  bits[7..3]   - Register 'rd'  index.
+ *  bits[2..0]   - Register 'sel' index.
+ *
+ * Register set = 2: KVM specific registers (see definitions below).
+ *
+ * Register set = 3: FPU / MSA registers (see definitions below).
  *
  * Other sets registers may be added in the future.  Each set would
  * have its own identifier in bits[31..16].
- *
- * The registers defined in struct kvm_regs are also accessible, the
- * id values for these are below.
  */
 
-#define KVM_REG_MIPS_R0 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 0)
-#define KVM_REG_MIPS_R1 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 1)
-#define KVM_REG_MIPS_R2 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 2)
-#define KVM_REG_MIPS_R3 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 3)
-#define KVM_REG_MIPS_R4 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 4)
-#define KVM_REG_MIPS_R5 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 5)
-#define KVM_REG_MIPS_R6 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 6)
-#define KVM_REG_MIPS_R7 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 7)
-#define KVM_REG_MIPS_R8 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 8)
-#define KVM_REG_MIPS_R9 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 9)
-#define KVM_REG_MIPS_R10 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 10)
-#define KVM_REG_MIPS_R11 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 11)
-#define KVM_REG_MIPS_R12 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 12)
-#define KVM_REG_MIPS_R13 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 13)
-#define KVM_REG_MIPS_R14 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 14)
-#define KVM_REG_MIPS_R15 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 15)
-#define KVM_REG_MIPS_R16 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 16)
-#define KVM_REG_MIPS_R17 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 17)
-#define KVM_REG_MIPS_R18 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 18)
-#define KVM_REG_MIPS_R19 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 19)
-#define KVM_REG_MIPS_R20 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 20)
-#define KVM_REG_MIPS_R21 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 21)
-#define KVM_REG_MIPS_R22 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 22)
-#define KVM_REG_MIPS_R23 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 23)
-#define KVM_REG_MIPS_R24 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 24)
-#define KVM_REG_MIPS_R25 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 25)
-#define KVM_REG_MIPS_R26 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 26)
-#define KVM_REG_MIPS_R27 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 27)
-#define KVM_REG_MIPS_R28 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 28)
-#define KVM_REG_MIPS_R29 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 29)
-#define KVM_REG_MIPS_R30 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 30)
-#define KVM_REG_MIPS_R31 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 31)
-
-#define KVM_REG_MIPS_HI (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 32)
-#define KVM_REG_MIPS_LO (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 33)
-#define KVM_REG_MIPS_PC (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 34)
-
-/* KVM specific control registers */
+#define KVM_REG_MIPS_GP		(KVM_REG_MIPS | 0x0000000000000000ULL)
+#define KVM_REG_MIPS_CP0	(KVM_REG_MIPS | 0x0000000000010000ULL)
+#define KVM_REG_MIPS_KVM	(KVM_REG_MIPS | 0x0000000000020000ULL)
+#define KVM_REG_MIPS_FPU	(KVM_REG_MIPS | 0x0000000000030000ULL)
+
+
+/*
+ * KVM_REG_MIPS_GP - General purpose registers from kvm_regs.
+ */
+
+#define KVM_REG_MIPS_R0		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  0)
+#define KVM_REG_MIPS_R1		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  1)
+#define KVM_REG_MIPS_R2		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  2)
+#define KVM_REG_MIPS_R3		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  3)
+#define KVM_REG_MIPS_R4		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  4)
+#define KVM_REG_MIPS_R5		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  5)
+#define KVM_REG_MIPS_R6		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  6)
+#define KVM_REG_MIPS_R7		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  7)
+#define KVM_REG_MIPS_R8		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  8)
+#define KVM_REG_MIPS_R9		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  9)
+#define KVM_REG_MIPS_R10	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 10)
+#define KVM_REG_MIPS_R11	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 11)
+#define KVM_REG_MIPS_R12	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 12)
+#define KVM_REG_MIPS_R13	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 13)
+#define KVM_REG_MIPS_R14	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 14)
+#define KVM_REG_MIPS_R15	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 15)
+#define KVM_REG_MIPS_R16	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 16)
+#define KVM_REG_MIPS_R17	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 17)
+#define KVM_REG_MIPS_R18	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 18)
+#define KVM_REG_MIPS_R19	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 19)
+#define KVM_REG_MIPS_R20	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 20)
+#define KVM_REG_MIPS_R21	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 21)
+#define KVM_REG_MIPS_R22	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 22)
+#define KVM_REG_MIPS_R23	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 23)
+#define KVM_REG_MIPS_R24	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 24)
+#define KVM_REG_MIPS_R25	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 25)
+#define KVM_REG_MIPS_R26	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 26)
+#define KVM_REG_MIPS_R27	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 27)
+#define KVM_REG_MIPS_R28	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 28)
+#define KVM_REG_MIPS_R29	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 29)
+#define KVM_REG_MIPS_R30	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 30)
+#define KVM_REG_MIPS_R31	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 31)
+
+#define KVM_REG_MIPS_HI		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 32)
+#define KVM_REG_MIPS_LO		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 33)
+#define KVM_REG_MIPS_PC		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 34)
+
+
+/*
+ * KVM_REG_MIPS_KVM - KVM specific control registers.
+ */
 
 /*
  * CP0_Count control
@@ -118,8 +126,7 @@ struct kvm_fpu {
  *        safely without losing time or guest timer interrupts.
  * Other: Reserved, do not change.
  */
-#define KVM_REG_MIPS_COUNT_CTL		(KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
-					 0x20000 | 0)
+#define KVM_REG_MIPS_COUNT_CTL	    (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 0)
 #define KVM_REG_MIPS_COUNT_CTL_DC	0x00000001
 
 /*
@@ -131,15 +138,46 @@ struct kvm_fpu {
  * emulated.
  * Modifications to times in the future are rejected.
  */
-#define KVM_REG_MIPS_COUNT_RESUME	(KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
-					 0x20000 | 1)
+#define KVM_REG_MIPS_COUNT_RESUME   (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 1)
 /*
  * CP0_Count rate in Hz
  * Specifies the rate of the CP0_Count timer in Hz. Modifications occur without
  * discontinuities in CP0_Count.
  */
-#define KVM_REG_MIPS_COUNT_HZ		(KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
-					 0x20000 | 2)
+#define KVM_REG_MIPS_COUNT_HZ	    (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 2)
+
+
+/*
+ * KVM_REG_MIPS_FPU - Floating Point and MIPS SIMD Architecture (MSA) registers.
+ *
+ *  bits[15..8]  - Register subset (see definitions below).
+ *  bits[7..5]   - Must be zero.
+ *  bits[4..0]   - Register number within register subset.
+ */
+
+#define KVM_REG_MIPS_FPR	(KVM_REG_MIPS_FPU | 0x0000000000000000ULL)
+#define KVM_REG_MIPS_FCR	(KVM_REG_MIPS_FPU | 0x0000000000000100ULL)
+#define KVM_REG_MIPS_MSACR	(KVM_REG_MIPS_FPU | 0x0000000000000200ULL)
+
+/*
+ * KVM_REG_MIPS_FPR - Floating point / Vector registers.
+ */
+#define KVM_REG_MIPS_FPR_32(n)	(KVM_REG_MIPS_FPR | KVM_REG_SIZE_U32  | (n))
+#define KVM_REG_MIPS_FPR_64(n)	(KVM_REG_MIPS_FPR | KVM_REG_SIZE_U64  | (n))
+#define KVM_REG_MIPS_VEC_128(n)	(KVM_REG_MIPS_FPR | KVM_REG_SIZE_U128 | (n))
+
+/*
+ * KVM_REG_MIPS_FCR - Floating point control registers.
+ */
+#define KVM_REG_MIPS_FCR_IR	(KVM_REG_MIPS_FCR | KVM_REG_SIZE_U32 |  0)
+#define KVM_REG_MIPS_FCR_CSR	(KVM_REG_MIPS_FCR | KVM_REG_SIZE_U32 | 31)
+
+/*
+ * KVM_REG_MIPS_MSACR - MIPS SIMD Architecture (MSA) control registers.
+ */
+#define KVM_REG_MIPS_MSA_IR	 (KVM_REG_MIPS_MSACR | KVM_REG_SIZE_U32 |  0)
+#define KVM_REG_MIPS_MSA_CSR	 (KVM_REG_MIPS_MSACR | KVM_REG_SIZE_U32 |  1)
+
 
 /*
  * KVM MIPS specific structures and definitions
diff --git a/arch/mips/jz4740/Platform b/arch/mips/jz4740/Platform
index ba91be9c21ef..c41d30080098 100644
--- a/arch/mips/jz4740/Platform
+++ b/arch/mips/jz4740/Platform
@@ -1,3 +1,4 @@
 platform-$(CONFIG_MACH_JZ4740)	+= jz4740/
 cflags-$(CONFIG_MACH_JZ4740)	+= -I$(srctree)/arch/mips/include/asm/mach-jz4740
 load-$(CONFIG_MACH_JZ4740)	+= 0xffffffff80010000
+zload-$(CONFIG_MACH_JZ4740)	+= 0xffffffff80600000
diff --git a/arch/mips/jz4740/time.c b/arch/mips/jz4740/time.c
index 5e430ce9ac7e..72b0cecbc17c 100644
--- a/arch/mips/jz4740/time.c
+++ b/arch/mips/jz4740/time.c
@@ -18,6 +18,7 @@
 #include <linux/time.h>
 
 #include <linux/clockchips.h>
+#include <linux/sched_clock.h>
 
 #include <asm/mach-jz4740/irq.h>
 #include <asm/mach-jz4740/timer.h>
@@ -43,6 +44,11 @@ static struct clocksource jz4740_clocksource = {
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
+static u64 notrace jz4740_read_sched_clock(void)
+{
+	return jz4740_timer_get_count(TIMER_CLOCKSOURCE);
+}
+
 static irqreturn_t jz4740_clockevent_irq(int irq, void *devid)
 {
 	struct clock_event_device *cd = devid;
@@ -126,6 +132,8 @@ void __init plat_time_init(void)
 	if (ret)
 		printk(KERN_ERR "Failed to register clocksource: %d\n", ret);
 
+	sched_clock_register(jz4740_read_sched_clock, 16, clk_rate);
+
 	setup_irq(JZ4740_IRQ_TCU0, &timer_irqaction);
 
 	ctrl = JZ_TIMER_CTRL_PRESCALE_16 | JZ_TIMER_CTRL_SRC_EXT;
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index 750d67ac41e9..beabe19ff8e5 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -92,7 +92,6 @@ void output_thread_info_defines(void)
 {
 	COMMENT("MIPS thread_info offsets.");
 	OFFSET(TI_TASK, thread_info, task);
-	OFFSET(TI_EXEC_DOMAIN, thread_info, exec_domain);
 	OFFSET(TI_FLAGS, thread_info, flags);
 	OFFSET(TI_TP_VALUE, thread_info, tp_value);
 	OFFSET(TI_CPU, thread_info, cpu);
@@ -167,72 +166,6 @@ void output_thread_fpu_defines(void)
 	OFFSET(THREAD_FPR30, task_struct, thread.fpu.fpr[30]);
 	OFFSET(THREAD_FPR31, task_struct, thread.fpu.fpr[31]);
 
-	/* the least significant 64 bits of each FP register */
-	OFFSET(THREAD_FPR0_LS64, task_struct,
-	       thread.fpu.fpr[0].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR1_LS64, task_struct,
-	       thread.fpu.fpr[1].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR2_LS64, task_struct,
-	       thread.fpu.fpr[2].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR3_LS64, task_struct,
-	       thread.fpu.fpr[3].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR4_LS64, task_struct,
-	       thread.fpu.fpr[4].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR5_LS64, task_struct,
-	       thread.fpu.fpr[5].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR6_LS64, task_struct,
-	       thread.fpu.fpr[6].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR7_LS64, task_struct,
-	       thread.fpu.fpr[7].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR8_LS64, task_struct,
-	       thread.fpu.fpr[8].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR9_LS64, task_struct,
-	       thread.fpu.fpr[9].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR10_LS64, task_struct,
-	       thread.fpu.fpr[10].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR11_LS64, task_struct,
-	       thread.fpu.fpr[11].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR12_LS64, task_struct,
-	       thread.fpu.fpr[12].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR13_LS64, task_struct,
-	       thread.fpu.fpr[13].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR14_LS64, task_struct,
-	       thread.fpu.fpr[14].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR15_LS64, task_struct,
-	       thread.fpu.fpr[15].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR16_LS64, task_struct,
-	       thread.fpu.fpr[16].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR17_LS64, task_struct,
-	       thread.fpu.fpr[17].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR18_LS64, task_struct,
-	       thread.fpu.fpr[18].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR19_LS64, task_struct,
-	       thread.fpu.fpr[19].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR20_LS64, task_struct,
-	       thread.fpu.fpr[20].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR21_LS64, task_struct,
-	       thread.fpu.fpr[21].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR22_LS64, task_struct,
-	       thread.fpu.fpr[22].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR23_LS64, task_struct,
-	       thread.fpu.fpr[23].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR24_LS64, task_struct,
-	       thread.fpu.fpr[24].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR25_LS64, task_struct,
-	       thread.fpu.fpr[25].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR26_LS64, task_struct,
-	       thread.fpu.fpr[26].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR27_LS64, task_struct,
-	       thread.fpu.fpr[27].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR28_LS64, task_struct,
-	       thread.fpu.fpr[28].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR29_LS64, task_struct,
-	       thread.fpu.fpr[29].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR30_LS64, task_struct,
-	       thread.fpu.fpr[30].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR31_LS64, task_struct,
-	       thread.fpu.fpr[31].val64[FPR_IDX(64, 0)]);
-
 	OFFSET(THREAD_FCR31, task_struct, thread.fpu.fcr31);
 	OFFSET(THREAD_MSA_CSR, task_struct, thread.fpu.msacsr);
 	BLANK();
@@ -470,6 +403,45 @@ void output_kvm_defines(void)
 	OFFSET(VCPU_LO, kvm_vcpu_arch, lo);
 	OFFSET(VCPU_HI, kvm_vcpu_arch, hi);
 	OFFSET(VCPU_PC, kvm_vcpu_arch, pc);
+	BLANK();
+
+	OFFSET(VCPU_FPR0, kvm_vcpu_arch, fpu.fpr[0]);
+	OFFSET(VCPU_FPR1, kvm_vcpu_arch, fpu.fpr[1]);
+	OFFSET(VCPU_FPR2, kvm_vcpu_arch, fpu.fpr[2]);
+	OFFSET(VCPU_FPR3, kvm_vcpu_arch, fpu.fpr[3]);
+	OFFSET(VCPU_FPR4, kvm_vcpu_arch, fpu.fpr[4]);
+	OFFSET(VCPU_FPR5, kvm_vcpu_arch, fpu.fpr[5]);
+	OFFSET(VCPU_FPR6, kvm_vcpu_arch, fpu.fpr[6]);
+	OFFSET(VCPU_FPR7, kvm_vcpu_arch, fpu.fpr[7]);
+	OFFSET(VCPU_FPR8, kvm_vcpu_arch, fpu.fpr[8]);
+	OFFSET(VCPU_FPR9, kvm_vcpu_arch, fpu.fpr[9]);
+	OFFSET(VCPU_FPR10, kvm_vcpu_arch, fpu.fpr[10]);
+	OFFSET(VCPU_FPR11, kvm_vcpu_arch, fpu.fpr[11]);
+	OFFSET(VCPU_FPR12, kvm_vcpu_arch, fpu.fpr[12]);
+	OFFSET(VCPU_FPR13, kvm_vcpu_arch, fpu.fpr[13]);
+	OFFSET(VCPU_FPR14, kvm_vcpu_arch, fpu.fpr[14]);
+	OFFSET(VCPU_FPR15, kvm_vcpu_arch, fpu.fpr[15]);
+	OFFSET(VCPU_FPR16, kvm_vcpu_arch, fpu.fpr[16]);
+	OFFSET(VCPU_FPR17, kvm_vcpu_arch, fpu.fpr[17]);
+	OFFSET(VCPU_FPR18, kvm_vcpu_arch, fpu.fpr[18]);
+	OFFSET(VCPU_FPR19, kvm_vcpu_arch, fpu.fpr[19]);
+	OFFSET(VCPU_FPR20, kvm_vcpu_arch, fpu.fpr[20]);
+	OFFSET(VCPU_FPR21, kvm_vcpu_arch, fpu.fpr[21]);
+	OFFSET(VCPU_FPR22, kvm_vcpu_arch, fpu.fpr[22]);
+	OFFSET(VCPU_FPR23, kvm_vcpu_arch, fpu.fpr[23]);
+	OFFSET(VCPU_FPR24, kvm_vcpu_arch, fpu.fpr[24]);
+	OFFSET(VCPU_FPR25, kvm_vcpu_arch, fpu.fpr[25]);
+	OFFSET(VCPU_FPR26, kvm_vcpu_arch, fpu.fpr[26]);
+	OFFSET(VCPU_FPR27, kvm_vcpu_arch, fpu.fpr[27]);
+	OFFSET(VCPU_FPR28, kvm_vcpu_arch, fpu.fpr[28]);
+	OFFSET(VCPU_FPR29, kvm_vcpu_arch, fpu.fpr[29]);
+	OFFSET(VCPU_FPR30, kvm_vcpu_arch, fpu.fpr[30]);
+	OFFSET(VCPU_FPR31, kvm_vcpu_arch, fpu.fpr[31]);
+
+	OFFSET(VCPU_FCR31, kvm_vcpu_arch, fpu.fcr31);
+	OFFSET(VCPU_MSA_CSR, kvm_vcpu_arch, fpu.msacsr);
+	BLANK();
+
 	OFFSET(VCPU_COP0, kvm_vcpu_arch, cop0);
 	OFFSET(VCPU_GUEST_KERNEL_ASID, kvm_vcpu_arch, guest_kernel_asid);
 	OFFSET(VCPU_GUEST_USER_ASID, kvm_vcpu_arch, guest_user_asid);
diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index c2e0f45ddf6c..c0c5e5972256 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -36,8 +36,10 @@ int __isa_exception_epc(struct pt_regs *regs)
 		return epc;
 	}
 	if (cpu_has_mips16) {
-		if (((union mips16e_instruction)inst).ri.opcode
-				== MIPS16e_jal_op)
+		union mips16e_instruction inst_mips16e;
+
+		inst_mips16e.full = inst;
+		if (inst_mips16e.ri.opcode == MIPS16e_jal_op)
 			epc += 4;
 		else
 			epc += 2;
diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
index 82bd2b278a24..d70c4d893219 100644
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c
@@ -37,6 +37,24 @@ void mips_set_clock_mode(enum clock_event_mode mode,
 DEFINE_PER_CPU(struct clock_event_device, mips_clockevent_device);
 int cp0_timer_irq_installed;
 
+/*
+ * Possibly handle a performance counter interrupt.
+ * Return true if the timer interrupt should not be checked
+ */
+static inline int handle_perf_irq(int r2)
+{
+	/*
+	 * The performance counter overflow interrupt may be shared with the
+	 * timer interrupt (cp0_perfcount_irq < 0). If it is and a
+	 * performance counter has overflowed (perf_irq() == IRQ_HANDLED)
+	 * and we can't reliably determine if a counter interrupt has also
+	 * happened (!r2) then don't check for a timer interrupt.
+	 */
+	return (cp0_perfcount_irq < 0) &&
+		perf_irq() == IRQ_HANDLED &&
+		!r2;
+}
+
 irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
 {
 	const int r2 = cpu_has_mips_r2_r6;
@@ -50,27 +68,32 @@ irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
 	 * the performance counter interrupt handler anyway.
 	 */
 	if (handle_perf_irq(r2))
-		goto out;
+		return IRQ_HANDLED;
 
 	/*
 	 * The same applies to performance counter interrupts.	But with the
 	 * above we now know that the reason we got here must be a timer
 	 * interrupt.  Being the paranoiacs we are we check anyway.
 	 */
-	if (!r2 || (read_c0_cause() & (1 << 30))) {
+	if (!r2 || (read_c0_cause() & CAUSEF_TI)) {
 		/* Clear Count/Compare Interrupt */
 		write_c0_compare(read_c0_compare());
 		cd = &per_cpu(mips_clockevent_device, cpu);
 		cd->event_handler(cd);
+
+		return IRQ_HANDLED;
 	}
 
-out:
-	return IRQ_HANDLED;
+	return IRQ_NONE;
 }
 
 struct irqaction c0_compare_irqaction = {
 	.handler = c0_compare_interrupt,
-	.flags = IRQF_PERCPU | IRQF_TIMER,
+	/*
+	 * IRQF_SHARED: The timer interrupt may be shared with other interrupts
+	 * such as perf counter and FDC interrupts.
+	 */
+	.flags = IRQF_PERCPU | IRQF_TIMER | IRQF_SHARED,
 	.name = "timer",
 };
 
diff --git a/arch/mips/kernel/cevt-txx9.c b/arch/mips/kernel/cevt-txx9.c
index 2ae08462e46e..723932441ecc 100644
--- a/arch/mips/kernel/cevt-txx9.c
+++ b/arch/mips/kernel/cevt-txx9.c
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/sched_clock.h>
 #include <asm/time.h>
 #include <asm/txx9tmr.h>
 
@@ -46,6 +47,11 @@ static struct txx9_clocksource txx9_clocksource = {
 	},
 };
 
+static u64 notrace txx9_read_sched_clock(void)
+{
+	return __raw_readl(&txx9_clocksource.tmrptr->trr);
+}
+
 void __init txx9_clocksource_init(unsigned long baseaddr,
 				  unsigned int imbusclk)
 {
@@ -61,6 +67,9 @@ void __init txx9_clocksource_init(unsigned long baseaddr,
 	__raw_writel(1 << TXX9_CLOCKSOURCE_BITS, &tmrptr->cpra);
 	__raw_writel(TCR_BASE | TXx9_TMTCR_TCE, &tmrptr->tcr);
 	txx9_clocksource.tmrptr = tmrptr;
+
+	sched_clock_register(txx9_read_sched_clock, TXX9_CLOCKSOURCE_BITS,
+			     TIMER_CLK(imbusclk));
 }
 
 struct txx9_clock_event_device {
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 48dfb9de853d..e36515dcd3b2 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -20,6 +20,7 @@
 
 #include <asm/bugs.h>
 #include <asm/cpu.h>
+#include <asm/cpu-features.h>
 #include <asm/cpu-type.h>
 #include <asm/fpu.h>
 #include <asm/mipsregs.h>
@@ -31,11 +32,127 @@
 #include <asm/spram.h>
 #include <asm/uaccess.h>
 
+/*
+ * Get the FPU Implementation/Revision.
+ */
+static inline unsigned long cpu_get_fpu_id(void)
+{
+	unsigned long tmp, fpu_id;
+
+	tmp = read_c0_status();
+	__enable_fpu(FPU_AS_IS);
+	fpu_id = read_32bit_cp1_register(CP1_REVISION);
+	write_c0_status(tmp);
+	return fpu_id;
+}
+
+/*
+ * Check if the CPU has an external FPU.
+ */
+static inline int __cpu_has_fpu(void)
+{
+	return (cpu_get_fpu_id() & FPIR_IMP_MASK) != FPIR_IMP_NONE;
+}
+
+static inline unsigned long cpu_get_msa_id(void)
+{
+	unsigned long status, msa_id;
+
+	status = read_c0_status();
+	__enable_fpu(FPU_64BIT);
+	enable_msa();
+	msa_id = read_msa_ir();
+	disable_msa();
+	write_c0_status(status);
+	return msa_id;
+}
+
+/*
+ * Determine the FCSR mask for FPU hardware.
+ */
+static inline void cpu_set_fpu_fcsr_mask(struct cpuinfo_mips *c)
+{
+	unsigned long sr, mask, fcsr, fcsr0, fcsr1;
+
+	mask = FPU_CSR_ALL_X | FPU_CSR_ALL_E | FPU_CSR_ALL_S | FPU_CSR_RM;
+
+	sr = read_c0_status();
+	__enable_fpu(FPU_AS_IS);
+
+	fcsr = read_32bit_cp1_register(CP1_STATUS);
+
+	fcsr0 = fcsr & mask;
+	write_32bit_cp1_register(CP1_STATUS, fcsr0);
+	fcsr0 = read_32bit_cp1_register(CP1_STATUS);
+
+	fcsr1 = fcsr | ~mask;
+	write_32bit_cp1_register(CP1_STATUS, fcsr1);
+	fcsr1 = read_32bit_cp1_register(CP1_STATUS);
+
+	write_32bit_cp1_register(CP1_STATUS, fcsr);
+
+	write_c0_status(sr);
+
+	c->fpu_msk31 = ~(fcsr0 ^ fcsr1) & ~mask;
+}
+
+/*
+ * Set the FIR feature flags for the FPU emulator.
+ */
+static void cpu_set_nofpu_id(struct cpuinfo_mips *c)
+{
+	u32 value;
+
+	value = 0;
+	if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
+			    MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+			    MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6))
+		value |= MIPS_FPIR_D | MIPS_FPIR_S;
+	if (c->isa_level & (MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+			    MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6))
+		value |= MIPS_FPIR_F64 | MIPS_FPIR_L | MIPS_FPIR_W;
+	c->fpu_id = value;
+}
+
+/* Determined FPU emulator mask to use for the boot CPU with "nofpu".  */
+static unsigned int mips_nofpu_msk31;
+
+/*
+ * Set options for FPU hardware.
+ */
+static void cpu_set_fpu_opts(struct cpuinfo_mips *c)
+{
+	c->fpu_id = cpu_get_fpu_id();
+	mips_nofpu_msk31 = c->fpu_msk31;
+
+	if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
+			    MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+			    MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) {
+		if (c->fpu_id & MIPS_FPIR_3D)
+			c->ases |= MIPS_ASE_MIPS3D;
+		if (c->fpu_id & MIPS_FPIR_FREP)
+			c->options |= MIPS_CPU_FRE;
+	}
+
+	cpu_set_fpu_fcsr_mask(c);
+}
+
+/*
+ * Set options for the FPU emulator.
+ */
+static void cpu_set_nofpu_opts(struct cpuinfo_mips *c)
+{
+	c->options &= ~MIPS_CPU_FPU;
+	c->fpu_msk31 = mips_nofpu_msk31;
+
+	cpu_set_nofpu_id(c);
+}
+
 static int mips_fpu_disabled;
 
 static int __init fpu_disable(char *s)
 {
-	cpu_data[0].options &= ~MIPS_CPU_FPU;
+	cpu_set_nofpu_opts(&boot_cpu_data);
 	mips_fpu_disabled = 1;
 
 	return 1;
@@ -178,41 +295,6 @@ static inline void set_elf_platform(int cpu, const char *plat)
 		__elf_platform = plat;
 }
 
-/*
- * Get the FPU Implementation/Revision.
- */
-static inline unsigned long cpu_get_fpu_id(void)
-{
-	unsigned long tmp, fpu_id;
-
-	tmp = read_c0_status();
-	__enable_fpu(FPU_AS_IS);
-	fpu_id = read_32bit_cp1_register(CP1_REVISION);
-	write_c0_status(tmp);
-	return fpu_id;
-}
-
-/*
- * Check the CPU has an FPU the official way.
- */
-static inline int __cpu_has_fpu(void)
-{
-	return (cpu_get_fpu_id() & FPIR_IMP_MASK) != FPIR_IMP_NONE;
-}
-
-static inline unsigned long cpu_get_msa_id(void)
-{
-	unsigned long status, msa_id;
-
-	status = read_c0_status();
-	__enable_fpu(FPU_64BIT);
-	enable_msa();
-	msa_id = read_msa_ir();
-	disable_msa();
-	write_c0_status(status);
-	return msa_id;
-}
-
 static inline void cpu_probe_vmbits(struct cpuinfo_mips *c)
 {
 #ifdef __NEED_VMBITS_PROBE
@@ -441,6 +523,8 @@ static inline unsigned int decode_config3(struct cpuinfo_mips *c)
 		c->htw_seq = 0;
 		c->options |= MIPS_CPU_HTW;
 	}
+	if (config3 & MIPS_CONF3_CDMM)
+		c->options |= MIPS_CPU_CDMM;
 
 	return config3 & MIPS_CONF_M;
 }
@@ -516,6 +600,10 @@ static inline unsigned int decode_config5(struct cpuinfo_mips *c)
 		c->options |= MIPS_CPU_MAAR;
 	if (config5 & MIPS_CONF5_LLB)
 		c->options |= MIPS_CPU_RW_LLB;
+#ifdef CONFIG_XPA
+	if (config5 & MIPS_CONF5_MVH)
+		c->options |= MIPS_CPU_XPA;
+#endif
 
 	return config5 & MIPS_CONF_M;
 }
@@ -575,6 +663,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 	case PRID_IMP_R2000:
 		c->cputype = CPU_R2000;
 		__cpu_name[cpu] = "R2000";
+		c->fpu_msk31 |= FPU_CSR_CONDX | FPU_CSR_FS;
 		c->options = MIPS_CPU_TLB | MIPS_CPU_3K_CACHE |
 			     MIPS_CPU_NOFPUEX;
 		if (__cpu_has_fpu())
@@ -594,6 +683,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 			c->cputype = CPU_R3000;
 			__cpu_name[cpu] = "R3000";
 		}
+		c->fpu_msk31 |= FPU_CSR_CONDX | FPU_CSR_FS;
 		c->options = MIPS_CPU_TLB | MIPS_CPU_3K_CACHE |
 			     MIPS_CPU_NOFPUEX;
 		if (__cpu_has_fpu())
@@ -642,6 +732,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 		}
 
 		set_isa(c, MIPS_CPU_ISA_III);
+		c->fpu_msk31 |= FPU_CSR_CONDX;
 		c->options = R4K_OPTS | MIPS_CPU_FPU | MIPS_CPU_32FPR |
 			     MIPS_CPU_WATCH | MIPS_CPU_VCE |
 			     MIPS_CPU_LLSC;
@@ -649,6 +740,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 		break;
 	case PRID_IMP_VR41XX:
 		set_isa(c, MIPS_CPU_ISA_III);
+		c->fpu_msk31 |= FPU_CSR_CONDX;
 		c->options = R4K_OPTS;
 		c->tlbsize = 32;
 		switch (c->processor_id & 0xf0) {
@@ -690,6 +782,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 		c->cputype = CPU_R4300;
 		__cpu_name[cpu] = "R4300";
 		set_isa(c, MIPS_CPU_ISA_III);
+		c->fpu_msk31 |= FPU_CSR_CONDX;
 		c->options = R4K_OPTS | MIPS_CPU_FPU | MIPS_CPU_32FPR |
 			     MIPS_CPU_LLSC;
 		c->tlbsize = 32;
@@ -698,6 +791,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 		c->cputype = CPU_R4600;
 		__cpu_name[cpu] = "R4600";
 		set_isa(c, MIPS_CPU_ISA_III);
+		c->fpu_msk31 |= FPU_CSR_CONDX;
 		c->options = R4K_OPTS | MIPS_CPU_FPU | MIPS_CPU_32FPR |
 			     MIPS_CPU_LLSC;
 		c->tlbsize = 48;
@@ -713,11 +807,13 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 		c->cputype = CPU_R4650;
 		__cpu_name[cpu] = "R4650";
 		set_isa(c, MIPS_CPU_ISA_III);
+		c->fpu_msk31 |= FPU_CSR_CONDX;
 		c->options = R4K_OPTS | MIPS_CPU_FPU | MIPS_CPU_LLSC;
 		c->tlbsize = 48;
 		break;
 	#endif
 	case PRID_IMP_TX39:
+		c->fpu_msk31 |= FPU_CSR_CONDX | FPU_CSR_FS;
 		c->options = MIPS_CPU_TLB | MIPS_CPU_TX39_CACHE;
 
 		if ((c->processor_id & 0xf0) == (PRID_REV_TX3927 & 0xf0)) {
@@ -743,6 +839,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 		c->cputype = CPU_R4700;
 		__cpu_name[cpu] = "R4700";
 		set_isa(c, MIPS_CPU_ISA_III);
+		c->fpu_msk31 |= FPU_CSR_CONDX;
 		c->options = R4K_OPTS | MIPS_CPU_FPU | MIPS_CPU_32FPR |
 			     MIPS_CPU_LLSC;
 		c->tlbsize = 48;
@@ -751,6 +848,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 		c->cputype = CPU_TX49XX;
 		__cpu_name[cpu] = "R49XX";
 		set_isa(c, MIPS_CPU_ISA_III);
+		c->fpu_msk31 |= FPU_CSR_CONDX;
 		c->options = R4K_OPTS | MIPS_CPU_LLSC;
 		if (!(c->processor_id & 0x08))
 			c->options |= MIPS_CPU_FPU | MIPS_CPU_32FPR;
@@ -792,6 +890,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 		c->cputype = CPU_R6000;
 		__cpu_name[cpu] = "R6000";
 		set_isa(c, MIPS_CPU_ISA_II);
+		c->fpu_msk31 |= FPU_CSR_CONDX | FPU_CSR_FS;
 		c->options = MIPS_CPU_TLB | MIPS_CPU_FPU |
 			     MIPS_CPU_LLSC;
 		c->tlbsize = 32;
@@ -800,6 +899,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 		c->cputype = CPU_R6000A;
 		__cpu_name[cpu] = "R6000A";
 		set_isa(c, MIPS_CPU_ISA_II);
+		c->fpu_msk31 |= FPU_CSR_CONDX | FPU_CSR_FS;
 		c->options = MIPS_CPU_TLB | MIPS_CPU_FPU |
 			     MIPS_CPU_LLSC;
 		c->tlbsize = 32;
@@ -850,8 +950,13 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 		c->tlbsize = 64;
 		break;
 	case PRID_IMP_R14000:
-		c->cputype = CPU_R14000;
-		__cpu_name[cpu] = "R14000";
+		if (((c->processor_id >> 4) & 0x0f) > 2) {
+			c->cputype = CPU_R16000;
+			__cpu_name[cpu] = "R16000";
+		} else {
+			c->cputype = CPU_R14000;
+			__cpu_name[cpu] = "R14000";
+		}
 		set_isa(c, MIPS_CPU_ISA_IV);
 		c->options = MIPS_CPU_TLB | MIPS_CPU_4K_CACHE | MIPS_CPU_4KEX |
 			     MIPS_CPU_FPU | MIPS_CPU_32FPR |
@@ -866,12 +971,14 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 			__cpu_name[cpu] = "ICT Loongson-2";
 			set_elf_platform(cpu, "loongson2e");
 			set_isa(c, MIPS_CPU_ISA_III);
+			c->fpu_msk31 |= FPU_CSR_CONDX;
 			break;
 		case PRID_REV_LOONGSON2F:
 			c->cputype = CPU_LOONGSON2;
 			__cpu_name[cpu] = "ICT Loongson-2";
 			set_elf_platform(cpu, "loongson2f");
 			set_isa(c, MIPS_CPU_ISA_III);
+			c->fpu_msk31 |= FPU_CSR_CONDX;
 			break;
 		case PRID_REV_LOONGSON3A:
 			c->cputype = CPU_LOONGSON3;
@@ -1308,6 +1415,9 @@ void cpu_probe(void)
 	c->cputype	= CPU_UNKNOWN;
 	c->writecombine = _CACHE_UNCACHED;
 
+	c->fpu_csr31	= FPU_CSR_RN;
+	c->fpu_msk31	= FPU_CSR_RSVD | FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+
 	c->processor_id = read_c0_prid();
 	switch (c->processor_id & PRID_COMP_MASK) {
 	case PRID_COMP_LEGACY:
@@ -1364,16 +1474,10 @@ void cpu_probe(void)
 			       ~(1 << MIPS_PWCTL_PWEN_SHIFT));
 	}
 
-	if (c->options & MIPS_CPU_FPU) {
-		c->fpu_id = cpu_get_fpu_id();
-
-		if (c->isa_level & cpu_has_mips_r) {
-			if (c->fpu_id & MIPS_FPIR_3D)
-				c->ases |= MIPS_ASE_MIPS3D;
-			if (c->fpu_id & MIPS_FPIR_FREP)
-				c->options |= MIPS_CPU_FRE;
-		}
-	}
+	if (c->options & MIPS_CPU_FPU)
+		cpu_set_fpu_opts(c);
+	else
+		cpu_set_nofpu_opts(c);
 
 	if (cpu_has_mips_r2_r6) {
 		c->srsets = ((read_c0_srsctl() >> 26) & 0x0f) + 1;
diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c
index d21264681e97..d434d5d5ae6e 100644
--- a/arch/mips/kernel/crash.c
+++ b/arch/mips/kernel/crash.c
@@ -25,9 +25,9 @@ static void crash_shutdown_secondary(void *ignore)
 		return;
 
 	local_irq_disable();
-	if (!cpu_isset(cpu, cpus_in_crash))
+	if (!cpumask_test_cpu(cpu, &cpus_in_crash))
 		crash_save_cpu(regs, cpu);
-	cpu_set(cpu, cpus_in_crash);
+	cpumask_set_cpu(cpu, &cpus_in_crash);
 
 	while (!atomic_read(&kexec_ready_to_reboot))
 		cpu_relax();
@@ -50,7 +50,7 @@ static void crash_kexec_prepare_cpus(void)
 	 */
 	pr_emerg("Sending IPI to other cpus...\n");
 	msecs = 10000;
-	while ((cpus_weight(cpus_in_crash) < ncpus) && (--msecs > 0)) {
+	while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) {
 		cpu_relax();
 		mdelay(1);
 	}
@@ -66,5 +66,5 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
 	crashing_cpu = smp_processor_id();
 	crash_save_cpu(regs, crashing_cpu);
 	crash_kexec_prepare_cpus();
-	cpu_set(crashing_cpu, cpus_in_crash);
+	cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
 }
diff --git a/arch/mips/kernel/csrc-bcm1480.c b/arch/mips/kernel/csrc-bcm1480.c
index 468f3eba4132..7f65b53d1b24 100644
--- a/arch/mips/kernel/csrc-bcm1480.c
+++ b/arch/mips/kernel/csrc-bcm1480.c
@@ -10,12 +10,9 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
 #include <linux/clocksource.h>
+#include <linux/sched_clock.h>
 
 #include <asm/addrspace.h>
 #include <asm/io.h>
@@ -41,6 +38,11 @@ struct clocksource bcm1480_clocksource = {
 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
+static u64 notrace sb1480_read_sched_clock(void)
+{
+	return __raw_readq(IOADDR(A_SCD_ZBBUS_CYCLE_COUNT));
+}
+
 void __init sb1480_clocksource_init(void)
 {
 	struct clocksource *cs = &bcm1480_clocksource;
@@ -50,4 +52,6 @@ void __init sb1480_clocksource_init(void)
 	plldiv = G_BCM1480_SYS_PLL_DIV(__raw_readq(IOADDR(A_SCD_SYSTEM_CFG)));
 	zbbus = ((plldiv >> 1) * 50000000) + ((plldiv & 1) * 25000000);
 	clocksource_register_hz(cs, zbbus);
+
+	sched_clock_register(sb1480_read_sched_clock, 64, zbbus);
 }
diff --git a/arch/mips/kernel/csrc-ioasic.c b/arch/mips/kernel/csrc-ioasic.c
index 6cbbf6e106b9..722f5589cd1d 100644
--- a/arch/mips/kernel/csrc-ioasic.c
+++ b/arch/mips/kernel/csrc-ioasic.c
@@ -12,12 +12,9 @@
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include <linux/clocksource.h>
+#include <linux/sched_clock.h>
 #include <linux/init.h>
 
 #include <asm/ds1287.h>
@@ -37,6 +34,11 @@ static struct clocksource clocksource_dec = {
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
+static u64 notrace dec_ioasic_read_sched_clock(void)
+{
+	return ioasic_read(IO_REG_FCTR);
+}
+
 int __init dec_ioasic_clocksource_init(void)
 {
 	unsigned int freq;
@@ -65,5 +67,8 @@ int __init dec_ioasic_clocksource_init(void)
 
 	clocksource_dec.rating = 200 + freq / 10000000;
 	clocksource_register_hz(&clocksource_dec, freq);
+
+	sched_clock_register(dec_ioasic_read_sched_clock, 32, freq);
+
 	return 0;
 }
diff --git a/arch/mips/kernel/csrc-r4k.c b/arch/mips/kernel/csrc-r4k.c
index decd1fa38d55..e5ed7ada1433 100644
--- a/arch/mips/kernel/csrc-r4k.c
+++ b/arch/mips/kernel/csrc-r4k.c
@@ -7,6 +7,7 @@
  */
 #include <linux/clocksource.h>
 #include <linux/init.h>
+#include <linux/sched_clock.h>
 
 #include <asm/time.h>
 
@@ -22,6 +23,11 @@ static struct clocksource clocksource_mips = {
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
+static u64 notrace r4k_read_sched_clock(void)
+{
+	return read_c0_count();
+}
+
 int __init init_r4k_clocksource(void)
 {
 	if (!cpu_has_counter || !mips_hpt_frequency)
@@ -32,5 +38,7 @@ int __init init_r4k_clocksource(void)
 
 	clocksource_register_hz(&clocksource_mips, mips_hpt_frequency);
 
+	sched_clock_register(r4k_read_sched_clock, 32, mips_hpt_frequency);
+
 	return 0;
 }
diff --git a/arch/mips/kernel/csrc-sb1250.c b/arch/mips/kernel/csrc-sb1250.c
index 6ecb77d82063..d915652b4d56 100644
--- a/arch/mips/kernel/csrc-sb1250.c
+++ b/arch/mips/kernel/csrc-sb1250.c
@@ -10,12 +10,9 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
 #include <linux/clocksource.h>
+#include <linux/sched_clock.h>
 
 #include <asm/addrspace.h>
 #include <asm/io.h>
@@ -33,15 +30,22 @@
  * The HPT is free running from SB1250_HPT_VALUE down to 0 then starts over
  * again.
  */
-static cycle_t sb1250_hpt_read(struct clocksource *cs)
+static inline cycle_t sb1250_hpt_get_cycles(void)
 {
 	unsigned int count;
+	void __iomem *addr;
 
-	count = G_SCD_TIMER_CNT(__raw_readq(IOADDR(A_SCD_TIMER_REGISTER(SB1250_HPT_NUM, R_SCD_TIMER_CNT))));
+	addr = IOADDR(A_SCD_TIMER_REGISTER(SB1250_HPT_NUM, R_SCD_TIMER_CNT));
+	count = G_SCD_TIMER_CNT(__raw_readq(addr));
 
 	return SB1250_HPT_VALUE - count;
 }
 
+static cycle_t sb1250_hpt_read(struct clocksource *cs)
+{
+	return sb1250_hpt_get_cycles();
+}
+
 struct clocksource bcm1250_clocksource = {
 	.name	= "bcm1250-counter-3",
 	.rating = 200,
@@ -50,6 +54,11 @@ struct clocksource bcm1250_clocksource = {
 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
+static u64 notrace sb1250_read_sched_clock(void)
+{
+	return sb1250_hpt_get_cycles();
+}
+
 void __init sb1250_clocksource_init(void)
 {
 	struct clocksource *cs = &bcm1250_clocksource;
@@ -66,4 +75,6 @@ void __init sb1250_clocksource_init(void)
 						 R_SCD_TIMER_CFG)));
 
 	clocksource_register_hz(cs, V_SCD_TIMER_FREQ);
+
+	sched_clock_register(sb1250_read_sched_clock, 23, V_SCD_TIMER_FREQ);
 }
diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c
index d2c09f6475c5..be4899f3c393 100644
--- a/arch/mips/kernel/elf.c
+++ b/arch/mips/kernel/elf.c
@@ -131,16 +131,6 @@ int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf,
 	return 0;
 }
 
-static inline unsigned get_fp_abi(int in_abi)
-{
-	/* If the ABI requirement is provided, simply return that */
-	if (in_abi != MIPS_ABI_FP_UNKNOWN)
-		return in_abi;
-
-	/* Unknown ABI */
-	return MIPS_ABI_FP_UNKNOWN;
-}
-
 int arch_check_elf(void *_ehdr, bool has_interpreter,
 		   struct arch_elf_state *state)
 {
@@ -151,10 +141,10 @@ int arch_check_elf(void *_ehdr, bool has_interpreter,
 	if (!config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT))
 		return 0;
 
-	fp_abi = get_fp_abi(state->fp_abi);
+	fp_abi = state->fp_abi;
 
 	if (has_interpreter) {
-		interp_fp_abi = get_fp_abi(state->interp_fp_abi);
+		interp_fp_abi = state->interp_fp_abi;
 
 		abi0 = min(fp_abi, interp_fp_abi);
 		abi1 = max(fp_abi, interp_fp_abi);
diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index af41ba6db960..7791840cf22c 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -10,6 +10,7 @@
 
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
+#include <asm/compiler.h>
 #include <asm/regdef.h>
 #include <asm/mipsregs.h>
 #include <asm/stackframe.h>
@@ -185,7 +186,7 @@ syscall_exit_work:
  * For C code use the inline version named instruction_hazard().
  */
 LEAF(mips_ihb)
-	.set	mips32r2
+	.set	MIPS_ISA_LEVEL_RAW
 	jr.hb	ra
 	nop
 	END(mips_ihb)
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index 2ebaabe3af15..af42e7003f12 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -360,12 +360,15 @@ NESTED(nmi_handler, PT_SIZE, sp)
 	.set	mips1
 	SET_HARDFLOAT
 	cfc1	a1, fcr31
-	li	a2, ~(0x3f << 12)
-	and	a2, a1
-	ctc1	a2, fcr31
 	.set	pop
-	TRACE_IRQS_ON
-	STI
+	CLI
+	TRACE_IRQS_OFF
+	.endm
+
+	.macro	__build_clear_msa_fpe
+	_cfcmsa	a1, MSA_CSR
+	CLI
+	TRACE_IRQS_OFF
 	.endm
 
 	.macro	__build_clear_ade
@@ -426,7 +429,7 @@ NESTED(nmi_handler, PT_SIZE, sp)
 	BUILD_HANDLER cpu cpu sti silent		/* #11 */
 	BUILD_HANDLER ov ov sti silent			/* #12 */
 	BUILD_HANDLER tr tr sti silent			/* #13 */
-	BUILD_HANDLER msa_fpe msa_fpe sti silent	/* #14 */
+	BUILD_HANDLER msa_fpe msa_fpe msa_fpe silent	/* #14 */
 	BUILD_HANDLER fpe fpe fpe silent		/* #15 */
 	BUILD_HANDLER ftlb ftlb none silent		/* #16 */
 	BUILD_HANDLER msa msa sti silent		/* #21 */
diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c
index 368c88b7eb6c..e4f62b7875d2 100644
--- a/arch/mips/kernel/idle.c
+++ b/arch/mips/kernel/idle.c
@@ -176,6 +176,17 @@ void __init check_wait(void)
 		cpu_wait = rm7k_wait_irqoff;
 		break;
 
+	case CPU_PROAPTIV:
+	case CPU_P5600:
+		/*
+		 * Incoming Fast Debug Channel (FDC) data during a wait
+		 * instruction causes the wait never to resume, even if an
+		 * interrupt is received. Avoid using wait at all if FDC data is
+		 * likely to be received.
+		 */
+		if (IS_ENABLED(CONFIG_MIPS_EJTAG_FDC_TTY))
+			break;
+		/* fall through */
 	case CPU_M14KC:
 	case CPU_M14KEC:
 	case CPU_24K:
@@ -183,8 +194,6 @@ void __init check_wait(void)
 	case CPU_1004K:
 	case CPU_1074K:
 	case CPU_INTERAPTIV:
-	case CPU_PROAPTIV:
-	case CPU_P5600:
 	case CPU_M5150:
 	case CPU_QEMU_GENERIC:
 		cpu_wait = r4k_wait;
diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
index 362bb3707e62..3e4491aa6d6b 100644
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c
@@ -114,8 +114,8 @@ asmlinkage long mipsmt_sys_sched_setaffinity(pid_t pid, unsigned int len,
 	/* Compute new global allowed CPU set if necessary */
 	ti = task_thread_info(p);
 	if (test_ti_thread_flag(ti, TIF_FPUBOUND) &&
-	    cpus_intersects(*new_mask, mt_fpu_cpumask)) {
-		cpus_and(*effective_mask, *new_mask, mt_fpu_cpumask);
+	    cpumask_intersects(new_mask, &mt_fpu_cpumask)) {
+		cpumask_and(effective_mask, new_mask, &mt_fpu_cpumask);
 		retval = set_cpus_allowed_ptr(p, effective_mask);
 	} else {
 		cpumask_copy(effective_mask, new_mask);
diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c
index 64d17e41093b..f2977f00911b 100644
--- a/arch/mips/kernel/mips-r2-to-r6-emul.c
+++ b/arch/mips/kernel/mips-r2-to-r6-emul.c
@@ -187,7 +187,7 @@ static inline int mipsr6_emul(struct pt_regs *regs, u32 ir)
 }
 
 /**
- * movt_func - Emulate a MOVT instruction
+ * movf_func - Emulate a MOVF instruction
  * @regs: Process register set
  * @ir: Instruction
  *
@@ -200,9 +200,12 @@ static int movf_func(struct pt_regs *regs, u32 ir)
 
 	csr = current->thread.fpu.fcr31;
 	cond = fpucondbit[MIPSInst_RT(ir) >> 2];
+
 	if (((csr & cond) == 0) && MIPSInst_RD(ir))
 		regs->regs[MIPSInst_RD(ir)] = regs->regs[MIPSInst_RS(ir)];
+
 	MIPS_R2_STATS(movs);
+
 	return 0;
 }
 
@@ -895,8 +898,9 @@ static inline int mipsr2_find_op_func(struct pt_regs *regs, u32 inst,
  * mipsr2_decoder: Decode and emulate a MIPS R2 instruction
  * @regs: Process register set
  * @inst: Instruction to decode and emulate
+ * @fcr31: Floating Point Control and Status Register returned
  */
-int mipsr2_decoder(struct pt_regs *regs, u32 inst)
+int mipsr2_decoder(struct pt_regs *regs, u32 inst, unsigned long *fcr31)
 {
 	int err = 0;
 	unsigned long vaddr;
@@ -1165,6 +1169,13 @@ fpu_emul:
 
 		err = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 0,
 					       &fault_addr);
+		*fcr31 = current->thread.fpu.fcr31;
+
+		/*
+		 * We can't allow the emulated instruction to leave any of
+		 * the cause bits set in $fcr31.
+		 */
+		current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
 
 		/*
 		 * this is a tricky issue - lose_fpu() uses LL/SC atomics
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 9466184d0039..cc1b6fadf089 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -558,8 +558,10 @@ static int mipspmu_get_irq(void)
 	if (mipspmu.irq >= 0) {
 		/* Request my own irq handler. */
 		err = request_irq(mipspmu.irq, mipsxx_pmu_handle_irq,
-			IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_THREAD,
-			"mips_perf_pmu", NULL);
+				  IRQF_PERCPU | IRQF_NOBALANCING |
+				  IRQF_NO_THREAD | IRQF_NO_SUSPEND |
+				  IRQF_SHARED,
+				  "mips_perf_pmu", &mipspmu);
 		if (err) {
 			pr_warn("Unable to request IRQ%d for MIPS performance counters!\n",
 				mipspmu.irq);
@@ -582,7 +584,7 @@ static int mipspmu_get_irq(void)
 static void mipspmu_free_irq(void)
 {
 	if (mipspmu.irq >= 0)
-		free_irq(mipspmu.irq, NULL);
+		free_irq(mipspmu.irq, &mipspmu);
 	else if (cp0_perfcount_irq < 0)
 		perf_irq = save_perf_irq;
 }
@@ -775,6 +777,7 @@ static int n_counters(void)
 
 	case CPU_R12000:
 	case CPU_R14000:
+	case CPU_R16000:
 		counters = 4;
 		break;
 
@@ -822,6 +825,13 @@ static const struct mips_perf_event mipsxxcore_event_map2
 	[PERF_COUNT_HW_BRANCH_MISSES] = { 0x27, CNTR_ODD, T },
 };
 
+static const struct mips_perf_event loongson3_event_map[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = { 0x00, CNTR_EVEN },
+	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, CNTR_ODD },
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x01, CNTR_EVEN },
+	[PERF_COUNT_HW_BRANCH_MISSES] = { 0x01, CNTR_ODD },
+};
+
 static const struct mips_perf_event octeon_event_map[PERF_COUNT_HW_MAX] = {
 	[PERF_COUNT_HW_CPU_CYCLES] = { 0x01, CNTR_ALL },
 	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x03, CNTR_ALL },
@@ -1005,6 +1015,61 @@ static const struct mips_perf_event mipsxxcore_cache_map2
 },
 };
 
+static const struct mips_perf_event loongson3_cache_map
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+[C(L1D)] = {
+	/*
+	 * Like some other architectures (e.g. ARM), the performance
+	 * counters don't differentiate between read and write
+	 * accesses/misses, so this isn't strictly correct, but it's the
+	 * best we can do. Writes and reads get combined.
+	 */
+	[C(OP_READ)] = {
+		[C(RESULT_MISS)]        = { 0x04, CNTR_ODD },
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_MISS)]        = { 0x04, CNTR_ODD },
+	},
+},
+[C(L1I)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_MISS)]        = { 0x04, CNTR_EVEN },
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_MISS)]        = { 0x04, CNTR_EVEN },
+	},
+},
+[C(DTLB)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_MISS)]        = { 0x09, CNTR_ODD },
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_MISS)]        = { 0x09, CNTR_ODD },
+	},
+},
+[C(ITLB)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_MISS)]        = { 0x0c, CNTR_ODD },
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_MISS)]        = { 0x0c, CNTR_ODD },
+	},
+},
+[C(BPU)] = {
+	/* Using the same code for *HW_BRANCH* */
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)]      = { 0x02, CNTR_EVEN },
+		[C(RESULT_MISS)]        = { 0x02, CNTR_ODD },
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)]      = { 0x02, CNTR_EVEN },
+		[C(RESULT_MISS)]        = { 0x02, CNTR_ODD },
+	},
+},
+};
+
 /* BMIPS5000 */
 static const struct mips_perf_event bmips5000_cache_map
 				[PERF_COUNT_HW_CACHE_MAX]
@@ -1539,6 +1604,10 @@ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config)
 		else
 			raw_event.cntr_mask =
 				raw_id > 127 ? CNTR_ODD : CNTR_EVEN;
+		break;
+	case CPU_LOONGSON3:
+		raw_event.cntr_mask = raw_id > 127 ? CNTR_ODD : CNTR_EVEN;
+	break;
 	}
 
 	raw_event.event_id = base_id;
@@ -1615,8 +1684,7 @@ init_hw_perf_events(void)
 
 	if (get_c0_perfcount_int)
 		irq = get_c0_perfcount_int();
-	else if ((cp0_perfcount_irq >= 0) &&
-		 (cp0_compare_irq != cp0_perfcount_irq))
+	else if (cp0_perfcount_irq >= 0)
 		irq = MIPS_CPU_IRQ_BASE + cp0_perfcount_irq;
 	else
 		irq = -1;
@@ -1669,6 +1737,11 @@ init_hw_perf_events(void)
 		mipspmu.general_event_map = &mipsxxcore_event_map;
 		mipspmu.cache_event_map = &mipsxxcore_cache_map;
 		break;
+	case CPU_LOONGSON3:
+		mipspmu.name = "mips/loongson3";
+		mipspmu.general_event_map = &loongson3_event_map;
+		mipspmu.cache_event_map = &loongson3_cache_map;
+		break;
 	case CPU_CAVIUM_OCTEON:
 	case CPU_CAVIUM_OCTEON_PLUS:
 	case CPU_CAVIUM_OCTEON2:
diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c
index 130af7d26a9c..298b2b773d12 100644
--- a/arch/mips/kernel/proc.c
+++ b/arch/mips/kernel/proc.c
@@ -120,6 +120,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	if (cpu_has_msa)	seq_printf(m, "%s", " msa");
 	if (cpu_has_eva)	seq_printf(m, "%s", " eva");
 	if (cpu_has_htw)	seq_printf(m, "%s", " htw");
+	if (cpu_has_xpa)	seq_printf(m, "%s", " xpa");
 	seq_printf(m, "\n");
 
 	if (cpu_has_mmips) {
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index bf85cc180d91..f2975d4d1e44 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -49,7 +49,7 @@
 void arch_cpu_idle_dead(void)
 {
 	/* What the heck is this check doing ? */
-	if (!cpu_isset(smp_processor_id(), cpu_callin_map))
+	if (!cpumask_test_cpu(smp_processor_id(), &cpu_callin_map))
 		play_dead();
 }
 #endif
@@ -107,8 +107,11 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 	return 0;
 }
 
+/*
+ * Copy architecture-specific thread state
+ */
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-	unsigned long arg, struct task_struct *p)
+	unsigned long kthread_arg, struct task_struct *p)
 {
 	struct thread_info *ti = task_thread_info(p);
 	struct pt_regs *childregs, *regs = current_pt_regs();
@@ -123,11 +126,12 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	childksp = (unsigned long) childregs;
 	p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1);
 	if (unlikely(p->flags & PF_KTHREAD)) {
+		/* kernel thread */
 		unsigned long status = p->thread.cp0_status;
 		memset(childregs, 0, sizeof(struct pt_regs));
 		ti->addr_limit = KERNEL_DS;
 		p->thread.reg16 = usp; /* fn */
-		p->thread.reg17 = arg;
+		p->thread.reg17 = kthread_arg;
 		p->thread.reg29 = childksp;
 		p->thread.reg31 = (unsigned long) ret_from_kernel_thread;
 #if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
@@ -139,6 +143,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 		childregs->cp0_status = status;
 		return 0;
 	}
+
+	/* user thread */
 	*childregs = *regs;
 	childregs->regs[7] = 0; /* Clear error flag */
 	childregs->regs[2] = 0; /* Child gets zero as return value */
diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c
index 452d4350ce42..e303cb1ef2f4 100644
--- a/arch/mips/kernel/prom.c
+++ b/arch/mips/kernel/prom.c
@@ -64,7 +64,10 @@ int __init __dt_register_buses(const char *bus0, const char *bus1)
 		panic("device tree not present");
 
 	strlcpy(of_ids[0].compatible, bus0, sizeof(of_ids[0].compatible));
-	strlcpy(of_ids[1].compatible, bus1, sizeof(of_ids[1].compatible));
+	if (bus1) {
+		strlcpy(of_ids[1].compatible, bus1,
+			sizeof(of_ids[1].compatible));
+	}
 
 	if (of_platform_populate(NULL, of_ids, NULL, NULL))
 		panic("failed to populate DT");
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 510452812594..d544e774eea6 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -32,6 +32,7 @@
 
 #include <asm/byteorder.h>
 #include <asm/cpu.h>
+#include <asm/cpu-info.h>
 #include <asm/dsp.h>
 #include <asm/fpu.h>
 #include <asm/mipsregs.h>
@@ -46,6 +47,26 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
 
+static void init_fp_ctx(struct task_struct *target)
+{
+	/* If FP has been used then the target already has context */
+	if (tsk_used_math(target))
+		return;
+
+	/* Begin with data registers set to all 1s... */
+	memset(&target->thread.fpu.fpr, ~0, sizeof(target->thread.fpu.fpr));
+
+	/* ...and FCSR zeroed */
+	target->thread.fpu.fcr31 = 0;
+
+	/*
+	 * Record that the target has "used" math, such that the context
+	 * just initialised, and any modifications made by the caller,
+	 * aren't discarded.
+	 */
+	set_stopped_child_used_math(target);
+}
+
 /*
  * Called by kernel/ptrace.c when detaching..
  *
@@ -137,11 +158,15 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data)
 {
 	union fpureg *fregs;
 	u64 fpr_val;
+	u32 fcr31;
+	u32 value;
+	u32 mask;
 	int i;
 
 	if (!access_ok(VERIFY_READ, data, 33 * 8))
 		return -EIO;
 
+	init_fp_ctx(child);
 	fregs = get_fpu_regs(child);
 
 	for (i = 0; i < 32; i++) {
@@ -149,8 +174,10 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data)
 		set_fpr64(&fregs[i], 0, fpr_val);
 	}
 
-	__get_user(child->thread.fpu.fcr31, data + 64);
-	child->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+	__get_user(value, data + 64);
+	fcr31 = child->thread.fpu.fcr31;
+	mask = current_cpu_data.fpu_msk31;
+	child->thread.fpu.fcr31 = (value & ~mask) | (fcr31 & mask);
 
 	/* FIR may not be written.  */
 
@@ -439,6 +466,8 @@ static int fpr_set(struct task_struct *target,
 
 	/* XXX fcr31  */
 
+	init_fp_ctx(target);
+
 	if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
 		return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 					  &target->thread.fpu,
@@ -660,12 +689,7 @@ long arch_ptrace(struct task_struct *child, long request,
 		case FPR_BASE ... FPR_BASE + 31: {
 			union fpureg *fregs = get_fpu_regs(child);
 
-			if (!tsk_used_math(child)) {
-				/* FP not yet used  */
-				memset(&child->thread.fpu, ~0,
-				       sizeof(child->thread.fpu));
-				child->thread.fpu.fcr31 = 0;
-			}
+			init_fp_ctx(child);
 #ifdef CONFIG_32BIT
 			if (test_thread_flag(TIF_32BIT_FPREGS)) {
 				/*
diff --git a/arch/mips/kernel/r2300_switch.S b/arch/mips/kernel/r2300_switch.S
index 435ea652f5fa..5087a4b72e6b 100644
--- a/arch/mips/kernel/r2300_switch.S
+++ b/arch/mips/kernel/r2300_switch.S
@@ -115,11 +115,9 @@ LEAF(_restore_fp)
  * the property that no matter whether considered as single or as double
  * precision represents signaling NANS.
  *
- * We initialize fcr31 to rounding to nearest, no exceptions.
+ * The value to initialize fcr31 to comes in $a0.
  */
 
-#define FPU_DEFAULT  0x00000000
-
 	.set push
 	SET_HARDFLOAT
 
@@ -129,8 +127,7 @@ LEAF(_init_fpu)
 	or	t0, t1
 	mtc0	t0, CP0_STATUS
 
-	li	t1, FPU_DEFAULT
-	ctc1	t1, fcr31
+	ctc1	a0, fcr31
 
 	li	t0, -1
 
diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
index 676c5030a953..1d88af26ba82 100644
--- a/arch/mips/kernel/r4k_fpu.S
+++ b/arch/mips/kernel/r4k_fpu.S
@@ -34,7 +34,6 @@
 	.endm
 
 	.set	noreorder
-	.set	MIPS_ISA_ARCH_LEVEL_RAW
 
 LEAF(_save_fp_context)
 	.set	push
@@ -103,6 +102,7 @@ LEAF(_save_fp_context)
 	/* Save 32-bit process floating point context */
 LEAF(_save_fp_context32)
 	.set push
+	.set MIPS_ISA_ARCH_LEVEL_RAW
 	SET_HARDFLOAT
 	cfc1	t1, fcr31
 
diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S
index 3b1a36f13a7d..04cbbde3521b 100644
--- a/arch/mips/kernel/r4k_switch.S
+++ b/arch/mips/kernel/r4k_switch.S
@@ -165,11 +165,9 @@ LEAF(_init_msa_upper)
  * the property that no matter whether considered as single or as double
  * precision represents signaling NANS.
  *
- * We initialize fcr31 to rounding to nearest, no exceptions.
+ * The value to initialize fcr31 to comes in $a0.
  */
 
-#define FPU_DEFAULT  0x00000000
-
 	.set push
 	SET_HARDFLOAT
 
@@ -180,8 +178,7 @@ LEAF(_init_fpu)
 	mtc0	t0, CP0_STATUS
 	enable_fpu_hazard
 
-	li	t1, FPU_DEFAULT
-	ctc1	t1, fcr31
+	ctc1	a0, fcr31
 
 	li	t1, -1				# SNaN
 
diff --git a/arch/mips/kernel/reset.c b/arch/mips/kernel/reset.c
index 07fc5244aed4..7c746d3458e7 100644
--- a/arch/mips/kernel/reset.c
+++ b/arch/mips/kernel/reset.c
@@ -11,6 +11,7 @@
 #include <linux/pm.h>
 #include <linux/types.h>
 #include <linux/reboot.h>
+#include <linux/delay.h>
 
 #include <asm/reboot.h>
 
@@ -29,16 +30,40 @@ void machine_restart(char *command)
 {
 	if (_machine_restart)
 		_machine_restart(command);
+
+#ifdef CONFIG_SMP
+	preempt_disable();
+	smp_send_stop();
+#endif
+	do_kernel_restart(command);
+	mdelay(1000);
+	pr_emerg("Reboot failed -- System halted\n");
+	local_irq_disable();
+	while (1);
 }
 
 void machine_halt(void)
 {
 	if (_machine_halt)
 		_machine_halt();
+
+#ifdef CONFIG_SMP
+	preempt_disable();
+	smp_send_stop();
+#endif
+	local_irq_disable();
+	while (1);
 }
 
 void machine_power_off(void)
 {
 	if (pm_power_off)
 		pm_power_off();
+
+#ifdef CONFIG_SMP
+	preempt_disable();
+	smp_send_stop();
+#endif
+	local_irq_disable();
+	while (1);
 }
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 058929041368..be73c491182b 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -31,6 +31,7 @@
 #include <asm/bootinfo.h>
 #include <asm/bugs.h>
 #include <asm/cache.h>
+#include <asm/cdmm.h>
 #include <asm/cpu.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
@@ -763,6 +764,7 @@ void __init setup_arch(char **cmdline_p)
 	cpu_probe();
 	prom_init();
 
+	setup_early_fdc_console();
 #ifdef CONFIG_EARLY_PRINTK
 	setup_early_printk();
 #endif
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index b8bd9340c9c7..fd528d7ea278 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -362,7 +362,7 @@ static int bmips_cpu_disable(void)
 	pr_info("SMP: CPU%d is offline\n", cpu);
 
 	set_cpu_online(cpu, false);
-	cpu_clear(cpu, cpu_callin_map);
+	cpumask_clear_cpu(cpu, &cpu_callin_map);
 	clear_c0_status(IE_IRQ5);
 
 	local_flush_tlb_all();
diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c
index e36a859af666..d5e0f949dc48 100644
--- a/arch/mips/kernel/smp-cmp.c
+++ b/arch/mips/kernel/smp-cmp.c
@@ -66,7 +66,7 @@ static void cmp_smp_finish(void)
 #ifdef CONFIG_MIPS_MT_FPAFF
 	/* If we have an FPU, enroll ourselves in the FPU-full mask */
 	if (cpu_has_fpu)
-		cpu_set(smp_processor_id(), mt_fpu_cpumask);
+		cpumask_set_cpu(smp_processor_id(), &mt_fpu_cpumask);
 #endif /* CONFIG_MIPS_MT_FPAFF */
 
 	local_irq_enable();
@@ -110,7 +110,7 @@ void __init cmp_smp_setup(void)
 #ifdef CONFIG_MIPS_MT_FPAFF
 	/* If we have an FPU, enroll ourselves in the FPU-full mask */
 	if (cpu_has_fpu)
-		cpu_set(0, mt_fpu_cpumask);
+		cpumask_set_cpu(0, &mt_fpu_cpumask);
 #endif /* CONFIG_MIPS_MT_FPAFF */
 
 	for (i = 1; i < NR_CPUS; i++) {
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index bed7590e475f..7e011f95bb8e 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -88,6 +88,12 @@ static void __init cps_smp_setup(void)
 
 	/* Make core 0 coherent with everything */
 	write_gcr_cl_coherence(0xff);
+
+#ifdef CONFIG_MIPS_MT_FPAFF
+	/* If we have an FPU, enroll ourselves in the FPU-full mask */
+	if (cpu_has_fpu)
+		cpu_set(0, mt_fpu_cpumask);
+#endif /* CONFIG_MIPS_MT_FPAFF */
 }
 
 static void __init cps_prepare_cpus(unsigned int max_cpus)
@@ -284,7 +290,7 @@ static void cps_smp_finish(void)
 #ifdef CONFIG_MIPS_MT_FPAFF
 	/* If we have an FPU, enroll ourselves in the FPU-full mask */
 	if (cpu_has_fpu)
-		cpu_set(smp_processor_id(), mt_fpu_cpumask);
+		cpumask_set_cpu(smp_processor_id(), &mt_fpu_cpumask);
 #endif /* CONFIG_MIPS_MT_FPAFF */
 
 	local_irq_enable();
@@ -307,7 +313,7 @@ static int cps_cpu_disable(void)
 	atomic_sub(1 << cpu_vpe_id(&current_cpu_data), &core_cfg->vpe_mask);
 	smp_mb__after_atomic();
 	set_cpu_online(cpu, false);
-	cpu_clear(cpu, cpu_callin_map);
+	cpumask_clear_cpu(cpu, &cpu_callin_map);
 
 	return 0;
 }
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index 17ea705f6c40..86311a164ef1 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -178,7 +178,7 @@ static void vsmp_smp_finish(void)
 #ifdef CONFIG_MIPS_MT_FPAFF
 	/* If we have an FPU, enroll ourselves in the FPU-full mask */
 	if (cpu_has_fpu)
-		cpu_set(smp_processor_id(), mt_fpu_cpumask);
+		cpumask_set_cpu(smp_processor_id(), &mt_fpu_cpumask);
 #endif /* CONFIG_MIPS_MT_FPAFF */
 
 	local_irq_enable();
@@ -239,7 +239,7 @@ static void __init vsmp_smp_setup(void)
 #ifdef CONFIG_MIPS_MT_FPAFF
 	/* If we have an FPU, enroll ourselves in the FPU-full mask */
 	if (cpu_has_fpu)
-		cpu_set(0, mt_fpu_cpumask);
+		cpumask_set_cpu(0, &mt_fpu_cpumask);
 #endif /* CONFIG_MIPS_MT_FPAFF */
 	if (!cpu_has_mipsmt)
 		return;
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 1c0d8c50b7e1..193ace7955fb 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -75,30 +75,30 @@ static inline void set_cpu_sibling_map(int cpu)
 {
 	int i;
 
-	cpu_set(cpu, cpu_sibling_setup_map);
+	cpumask_set_cpu(cpu, &cpu_sibling_setup_map);
 
 	if (smp_num_siblings > 1) {
-		for_each_cpu_mask(i, cpu_sibling_setup_map) {
+		for_each_cpu(i, &cpu_sibling_setup_map) {
 			if (cpu_data[cpu].package == cpu_data[i].package &&
 				    cpu_data[cpu].core == cpu_data[i].core) {
-				cpu_set(i, cpu_sibling_map[cpu]);
-				cpu_set(cpu, cpu_sibling_map[i]);
+				cpumask_set_cpu(i, &cpu_sibling_map[cpu]);
+				cpumask_set_cpu(cpu, &cpu_sibling_map[i]);
 			}
 		}
 	} else
-		cpu_set(cpu, cpu_sibling_map[cpu]);
+		cpumask_set_cpu(cpu, &cpu_sibling_map[cpu]);
 }
 
 static inline void set_cpu_core_map(int cpu)
 {
 	int i;
 
-	cpu_set(cpu, cpu_core_setup_map);
+	cpumask_set_cpu(cpu, &cpu_core_setup_map);
 
-	for_each_cpu_mask(i, cpu_core_setup_map) {
+	for_each_cpu(i, &cpu_core_setup_map) {
 		if (cpu_data[cpu].package == cpu_data[i].package) {
-			cpu_set(i, cpu_core_map[cpu]);
-			cpu_set(cpu, cpu_core_map[i]);
+			cpumask_set_cpu(i, &cpu_core_map[cpu]);
+			cpumask_set_cpu(cpu, &cpu_core_map[i]);
 		}
 	}
 }
@@ -138,7 +138,7 @@ asmlinkage void start_secondary(void)
 	cpu = smp_processor_id();
 	cpu_data[cpu].udelay_val = loops_per_jiffy;
 
-	cpu_set(cpu, cpu_coherent_mask);
+	cpumask_set_cpu(cpu, &cpu_coherent_mask);
 	notify_cpu_starting(cpu);
 
 	set_cpu_online(cpu, true);
@@ -146,7 +146,7 @@ asmlinkage void start_secondary(void)
 	set_cpu_sibling_map(cpu);
 	set_cpu_core_map(cpu);
 
-	cpu_set(cpu, cpu_callin_map);
+	cpumask_set_cpu(cpu, &cpu_callin_map);
 
 	synchronise_count_slave(cpu);
 
@@ -176,10 +176,8 @@ static void stop_this_cpu(void *dummy)
 	 * Remove this CPU:
 	 */
 	set_cpu_online(smp_processor_id(), false);
-	for (;;) {
-		if (cpu_wait)
-			(*cpu_wait)();		/* Wait if available. */
-	}
+	local_irq_disable();
+	while (1);
 }
 
 void smp_send_stop(void)
@@ -210,7 +208,7 @@ void smp_prepare_boot_cpu(void)
 {
 	set_cpu_possible(0, true);
 	set_cpu_online(0, true);
-	cpu_set(0, cpu_callin_map);
+	cpumask_set_cpu(0, &cpu_callin_map);
 }
 
 int __cpu_up(unsigned int cpu, struct task_struct *tidle)
@@ -220,7 +218,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 	/*
 	 * Trust is futile.  We should really have timeouts ...
 	 */
-	while (!cpu_isset(cpu, cpu_callin_map))
+	while (!cpumask_test_cpu(cpu, &cpu_callin_map))
 		udelay(100);
 
 	synchronise_count_master(cpu);
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 33984c04b60b..ba32e48d4697 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -12,6 +12,7 @@
  * Copyright (C) 2000, 2001, 2012 MIPS Technologies, Inc.  All rights reserved.
  * Copyright (C) 2014, Imagination Technologies Ltd.
  */
+#include <linux/bitops.h>
 #include <linux/bug.h>
 #include <linux/compiler.h>
 #include <linux/context_tracking.h>
@@ -699,29 +700,60 @@ asmlinkage void do_ov(struct pt_regs *regs)
 	exception_exit(prev_state);
 }
 
-int process_fpemu_return(int sig, void __user *fault_addr)
+int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31)
 {
-	if (sig == SIGSEGV || sig == SIGBUS) {
-		struct siginfo si = {0};
+	struct siginfo si = { 0 };
+
+	switch (sig) {
+	case 0:
+		return 0;
+
+	case SIGFPE:
 		si.si_addr = fault_addr;
 		si.si_signo = sig;
-		if (sig == SIGSEGV) {
-			down_read(&current->mm->mmap_sem);
-			if (find_vma(current->mm, (unsigned long)fault_addr))
-				si.si_code = SEGV_ACCERR;
-			else
-				si.si_code = SEGV_MAPERR;
-			up_read(&current->mm->mmap_sem);
-		} else {
-			si.si_code = BUS_ADRERR;
-		}
+		/*
+		 * Inexact can happen together with Overflow or Underflow.
+		 * Respect the mask to deliver the correct exception.
+		 */
+		fcr31 &= (fcr31 & FPU_CSR_ALL_E) <<
+			 (ffs(FPU_CSR_ALL_X) - ffs(FPU_CSR_ALL_E));
+		if (fcr31 & FPU_CSR_INV_X)
+			si.si_code = FPE_FLTINV;
+		else if (fcr31 & FPU_CSR_DIV_X)
+			si.si_code = FPE_FLTDIV;
+		else if (fcr31 & FPU_CSR_OVF_X)
+			si.si_code = FPE_FLTOVF;
+		else if (fcr31 & FPU_CSR_UDF_X)
+			si.si_code = FPE_FLTUND;
+		else if (fcr31 & FPU_CSR_INE_X)
+			si.si_code = FPE_FLTRES;
+		else
+			si.si_code = __SI_FAULT;
+		force_sig_info(sig, &si, current);
+		return 1;
+
+	case SIGBUS:
+		si.si_addr = fault_addr;
+		si.si_signo = sig;
+		si.si_code = BUS_ADRERR;
 		force_sig_info(sig, &si, current);
 		return 1;
-	} else if (sig) {
+
+	case SIGSEGV:
+		si.si_addr = fault_addr;
+		si.si_signo = sig;
+		down_read(&current->mm->mmap_sem);
+		if (find_vma(current->mm, (unsigned long)fault_addr))
+			si.si_code = SEGV_ACCERR;
+		else
+			si.si_code = SEGV_MAPERR;
+		up_read(&current->mm->mmap_sem);
+		force_sig_info(sig, &si, current);
+		return 1;
+
+	default:
 		force_sig(sig, current);
 		return 1;
-	} else {
-		return 0;
 	}
 }
 
@@ -729,7 +761,8 @@ static int simulate_fp(struct pt_regs *regs, unsigned int opcode,
 		       unsigned long old_epc, unsigned long old_ra)
 {
 	union mips_instruction inst = { .word = opcode };
-	void __user *fault_addr = NULL;
+	void __user *fault_addr;
+	unsigned long fcr31;
 	int sig;
 
 	/* If it's obviously not an FP instruction, skip it */
@@ -759,13 +792,20 @@ static int simulate_fp(struct pt_regs *regs, unsigned int opcode,
 	/* Run the emulator */
 	sig = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 1,
 				       &fault_addr);
+	fcr31 = current->thread.fpu.fcr31;
 
-	/* If something went wrong, signal */
-	process_fpemu_return(sig, fault_addr);
+	/*
+	 * We can't allow the emulated instruction to leave any of
+	 * the cause bits set in $fcr31.
+	 */
+	current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
 
 	/* Restore the hardware register state */
 	own_fpu(1);
 
+	/* Send a signal if required.  */
+	process_fpemu_return(sig, fault_addr, fcr31);
+
 	return 0;
 }
 
@@ -775,18 +815,21 @@ static int simulate_fp(struct pt_regs *regs, unsigned int opcode,
 asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31)
 {
 	enum ctx_state prev_state;
-	siginfo_t info = {0};
+	void __user *fault_addr;
+	int sig;
 
 	prev_state = exception_enter();
 	if (notify_die(DIE_FP, "FP exception", regs, 0, regs_to_trapnr(regs),
 		       SIGFPE) == NOTIFY_STOP)
 		goto out;
+
+	/* Clear FCSR.Cause before enabling interrupts */
+	write_32bit_cp1_register(CP1_STATUS, fcr31 & ~FPU_CSR_ALL_X);
+	local_irq_enable();
+
 	die_if_kernel("FP exception in kernel code", regs);
 
 	if (fcr31 & FPU_CSR_UNI_X) {
-		int sig;
-		void __user *fault_addr = NULL;
-
 		/*
 		 * Unimplemented operation exception.  If we've got the full
 		 * software emulator on-board, let's use it...
@@ -803,36 +846,23 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31)
 		/* Run the emulator */
 		sig = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 1,
 					       &fault_addr);
+		fcr31 = current->thread.fpu.fcr31;
 
 		/*
 		 * We can't allow the emulated instruction to leave any of
-		 * the cause bit set in $fcr31.
+		 * the cause bits set in $fcr31.
 		 */
 		current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
 
 		/* Restore the hardware register state */
 		own_fpu(1);	/* Using the FPU again.	 */
+	} else {
+		sig = SIGFPE;
+		fault_addr = (void __user *) regs->cp0_epc;
+	}
 
-		/* If something went wrong, signal */
-		process_fpemu_return(sig, fault_addr);
-
-		goto out;
-	} else if (fcr31 & FPU_CSR_INV_X)
-		info.si_code = FPE_FLTINV;
-	else if (fcr31 & FPU_CSR_DIV_X)
-		info.si_code = FPE_FLTDIV;
-	else if (fcr31 & FPU_CSR_OVF_X)
-		info.si_code = FPE_FLTOVF;
-	else if (fcr31 & FPU_CSR_UDF_X)
-		info.si_code = FPE_FLTUND;
-	else if (fcr31 & FPU_CSR_INE_X)
-		info.si_code = FPE_FLTRES;
-	else
-		info.si_code = __SI_FAULT;
-	info.si_signo = SIGFPE;
-	info.si_errno = 0;
-	info.si_addr = (void __user *) regs->cp0_epc;
-	force_sig_info(SIGFPE, &info, current);
+	/* Send a signal if required.  */
+	process_fpemu_return(sig, fault_addr, fcr31);
 
 out:
 	exception_exit(prev_state);
@@ -879,9 +909,9 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code,
 		break;
 	case BRK_MEMU:
 		/*
-		 * Address errors may be deliberately induced by the FPU
-		 * emulator to retake control of the CPU after executing the
-		 * instruction in the delay slot of an emulated branch.
+		 * This breakpoint code is used by the FPU emulator to retake
+		 * control of the CPU after executing the instruction from the
+		 * delay slot of an emulated branch.
 		 *
 		 * Terminate if exception was recognized as a delay slot return
 		 * otherwise handle as normal.
@@ -901,10 +931,9 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code,
 
 asmlinkage void do_bp(struct pt_regs *regs)
 {
+	unsigned long epc = msk_isa16_mode(exception_epc(regs));
 	unsigned int opcode, bcode;
 	enum ctx_state prev_state;
-	unsigned long epc;
-	u16 instr[2];
 	mm_segment_t seg;
 
 	seg = get_fs();
@@ -913,26 +942,28 @@ asmlinkage void do_bp(struct pt_regs *regs)
 
 	prev_state = exception_enter();
 	if (get_isa16_mode(regs->cp0_epc)) {
-		/* Calculate EPC. */
-		epc = exception_epc(regs);
-		if (cpu_has_mmips) {
-			if ((__get_user(instr[0], (u16 __user *)msk_isa16_mode(epc)) ||
-			    (__get_user(instr[1], (u16 __user *)msk_isa16_mode(epc + 2)))))
-				goto out_sigsegv;
-			opcode = (instr[0] << 16) | instr[1];
-		} else {
+		u16 instr[2];
+
+		if (__get_user(instr[0], (u16 __user *)epc))
+			goto out_sigsegv;
+
+		if (!cpu_has_mmips) {
 			/* MIPS16e mode */
-			if (__get_user(instr[0],
-				       (u16 __user *)msk_isa16_mode(epc)))
+			bcode = (instr[0] >> 5) & 0x3f;
+		} else if (mm_insn_16bit(instr[0])) {
+			/* 16-bit microMIPS BREAK */
+			bcode = instr[0] & 0xf;
+		} else {
+			/* 32-bit microMIPS BREAK */
+			if (__get_user(instr[1], (u16 __user *)(epc + 2)))
 				goto out_sigsegv;
-			bcode = (instr[0] >> 6) & 0x3f;
-			do_trap_or_bp(regs, bcode, "Break");
-			goto out;
+			opcode = (instr[0] << 16) | instr[1];
+			bcode = (opcode >> 6) & ((1 << 20) - 1);
 		}
 	} else {
-		if (__get_user(opcode,
-			       (unsigned int __user *) exception_epc(regs)))
+		if (__get_user(opcode, (unsigned int __user *)epc))
 			goto out_sigsegv;
+		bcode = (opcode >> 6) & ((1 << 20) - 1);
 	}
 
 	/*
@@ -941,9 +972,8 @@ asmlinkage void do_bp(struct pt_regs *regs)
 	 * Gas is bug-compatible, but not always, grrr...
 	 * We handle both cases with a simple heuristics.  --macro
 	 */
-	bcode = ((opcode >> 6) & ((1 << 20) - 1));
 	if (bcode >= (1 << 10))
-		bcode >>= 10;
+		bcode = ((bcode & ((1 << 10) - 1)) << 10) | (bcode >> 10);
 
 	/*
 	 * notify the kprobe handlers, if instruction is likely to
@@ -1033,22 +1063,24 @@ asmlinkage void do_ri(struct pt_regs *regs)
 	 * as quickly as possible.
 	 */
 	if (mipsr2_emulation && cpu_has_mips_r6 &&
-	    likely(user_mode(regs))) {
-		if (likely(get_user(opcode, epc) >= 0)) {
-			status = mipsr2_decoder(regs, opcode);
-			switch (status) {
-			case 0:
-			case SIGEMT:
-				task_thread_info(current)->r2_emul_return = 1;
-				return;
-			case SIGILL:
-				goto no_r2_instr;
-			default:
-				process_fpemu_return(status,
-						     &current->thread.cp0_baduaddr);
-				task_thread_info(current)->r2_emul_return = 1;
-				return;
-			}
+	    likely(user_mode(regs)) &&
+	    likely(get_user(opcode, epc) >= 0)) {
+		unsigned long fcr31 = 0;
+
+		status = mipsr2_decoder(regs, opcode, &fcr31);
+		switch (status) {
+		case 0:
+		case SIGEMT:
+			task_thread_info(current)->r2_emul_return = 1;
+			return;
+		case SIGILL:
+			goto no_r2_instr;
+		default:
+			process_fpemu_return(status,
+					     &current->thread.cp0_baduaddr,
+					     fcr31);
+			task_thread_info(current)->r2_emul_return = 1;
+			return;
 		}
 	}
 
@@ -1121,13 +1153,13 @@ static void mt_ase_fp_affinity(void)
 		 * restricted the allowed set to exclude any CPUs with FPUs,
 		 * we'll skip the procedure.
 		 */
-		if (cpus_intersects(current->cpus_allowed, mt_fpu_cpumask)) {
+		if (cpumask_intersects(&current->cpus_allowed, &mt_fpu_cpumask)) {
 			cpumask_t tmask;
 
 			current->thread.user_cpus_allowed
 				= current->cpus_allowed;
-			cpus_and(tmask, current->cpus_allowed,
-				mt_fpu_cpumask);
+			cpumask_and(&tmask, &current->cpus_allowed,
+				    &mt_fpu_cpumask);
 			set_cpus_allowed_ptr(current, &tmask);
 			set_thread_flag(TIF_FPUBOUND);
 		}
@@ -1293,10 +1325,13 @@ asmlinkage void do_cpu(struct pt_regs *regs)
 	enum ctx_state prev_state;
 	unsigned int __user *epc;
 	unsigned long old_epc, old31;
+	void __user *fault_addr;
 	unsigned int opcode;
+	unsigned long fcr31;
 	unsigned int cpid;
 	int status, err;
 	unsigned long __maybe_unused flags;
+	int sig;
 
 	prev_state = exception_enter();
 	cpid = (regs->cp0_cause >> CAUSEB_CE) & 3;
@@ -1313,7 +1348,7 @@ asmlinkage void do_cpu(struct pt_regs *regs)
 		status = -1;
 
 		if (unlikely(compute_return_epc(regs) < 0))
-			goto out;
+			break;
 
 		if (get_isa16_mode(regs->cp0_epc)) {
 			unsigned short mmop[2] = { 0 };
@@ -1346,59 +1381,73 @@ asmlinkage void do_cpu(struct pt_regs *regs)
 			force_sig(status, current);
 		}
 
-		goto out;
+		break;
 
 	case 3:
 		/*
-		 * Old (MIPS I and MIPS II) processors will set this code
-		 * for COP1X opcode instructions that replaced the original
-		 * COP3 space.	We don't limit COP1 space instructions in
-		 * the emulator according to the CPU ISA, so we want to
-		 * treat COP1X instructions consistently regardless of which
-		 * code the CPU chose.	Therefore we redirect this trap to
-		 * the FP emulator too.
-		 *
-		 * Then some newer FPU-less processors use this code
-		 * erroneously too, so they are covered by this choice
-		 * as well.
+		 * The COP3 opcode space and consequently the CP0.Status.CU3
+		 * bit and the CP0.Cause.CE=3 encoding have been removed as
+		 * of the MIPS III ISA.  From the MIPS IV and MIPS32r2 ISAs
+		 * up the space has been reused for COP1X instructions, that
+		 * are enabled by the CP0.Status.CU1 bit and consequently
+		 * use the CP0.Cause.CE=1 encoding for Coprocessor Unusable
+		 * exceptions.  Some FPU-less processors that implement one
+		 * of these ISAs however use this code erroneously for COP1X
+		 * instructions.  Therefore we redirect this trap to the FP
+		 * emulator too.
 		 */
-		if (raw_cpu_has_fpu)
+		if (raw_cpu_has_fpu || !cpu_has_mips_4_5_64_r2_r6) {
+			force_sig(SIGILL, current);
 			break;
+		}
 		/* Fall through.  */
 
 	case 1:
 		err = enable_restore_fp_context(0);
 
-		if (!raw_cpu_has_fpu || err) {
-			int sig;
-			void __user *fault_addr = NULL;
-			sig = fpu_emulator_cop1Handler(regs,
-						       &current->thread.fpu,
-						       0, &fault_addr);
-			if (!process_fpemu_return(sig, fault_addr) && !err)
-				mt_ase_fp_affinity();
-		}
+		if (raw_cpu_has_fpu && !err)
+			break;
 
-		goto out;
+		sig = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 0,
+					       &fault_addr);
+		fcr31 = current->thread.fpu.fcr31;
+
+		/*
+		 * We can't allow the emulated instruction to leave
+		 * any of the cause bits set in $fcr31.
+		 */
+		current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+
+		/* Send a signal if required.  */
+		if (!process_fpemu_return(sig, fault_addr, fcr31) && !err)
+			mt_ase_fp_affinity();
+
+		break;
 
 	case 2:
 		raw_notifier_call_chain(&cu2_chain, CU2_EXCEPTION, regs);
-		goto out;
+		break;
 	}
 
-	force_sig(SIGILL, current);
-
-out:
 	exception_exit(prev_state);
 }
 
-asmlinkage void do_msa_fpe(struct pt_regs *regs)
+asmlinkage void do_msa_fpe(struct pt_regs *regs, unsigned int msacsr)
 {
 	enum ctx_state prev_state;
 
 	prev_state = exception_enter();
+	if (notify_die(DIE_MSAFP, "MSA FP exception", regs, 0,
+		       regs_to_trapnr(regs), SIGFPE) == NOTIFY_STOP)
+		goto out;
+
+	/* Clear MSACSR.Cause before enabling interrupts */
+	write_msa_csr(msacsr & ~MSA_CSR_CAUSEF);
+	local_irq_enable();
+
 	die_if_kernel("do_msa_fpe invoked from kernel context!", regs);
 	force_sig(SIGFPE, current);
+out:
 	exception_exit(prev_state);
 }
 
@@ -1969,6 +2018,12 @@ int cp0_compare_irq_shift;
 int cp0_perfcount_irq;
 EXPORT_SYMBOL_GPL(cp0_perfcount_irq);
 
+/*
+ * Fast debug channel IRQ or -1 if not present
+ */
+int cp0_fdc_irq;
+EXPORT_SYMBOL_GPL(cp0_fdc_irq);
+
 static int noulri;
 
 static int __init ulri_disable(char *s)
@@ -2050,17 +2105,21 @@ void per_cpu_trap_init(bool is_boot_cpu)
 	 *
 	 *  o read IntCtl.IPTI to determine the timer interrupt
 	 *  o read IntCtl.IPPCI to determine the performance counter interrupt
+	 *  o read IntCtl.IPFDC to determine the fast debug channel interrupt
 	 */
 	if (cpu_has_mips_r2_r6) {
 		cp0_compare_irq_shift = CAUSEB_TI - CAUSEB_IP;
 		cp0_compare_irq = (read_c0_intctl() >> INTCTLB_IPTI) & 7;
 		cp0_perfcount_irq = (read_c0_intctl() >> INTCTLB_IPPCI) & 7;
-		if (cp0_perfcount_irq == cp0_compare_irq)
-			cp0_perfcount_irq = -1;
+		cp0_fdc_irq = (read_c0_intctl() >> INTCTLB_IPFDC) & 7;
+		if (!cp0_fdc_irq)
+			cp0_fdc_irq = -1;
+
 	} else {
 		cp0_compare_irq = CP0_LEGACY_COMPARE_IRQ;
 		cp0_compare_irq_shift = CP0_LEGACY_PERFCNT_IRQ;
 		cp0_perfcount_irq = -1;
+		cp0_fdc_irq = -1;
 	}
 
 	if (!cpu_data[cpu].asid_cache)
diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c
index bbb69695a0a1..af84bef0c90d 100644
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -89,8 +89,6 @@
 #include <asm/fpu_emulator.h>
 #include <asm/inst.h>
 #include <asm/uaccess.h>
-#include <asm/fpu.h>
-#include <asm/fpu_emulator.h>
 
 #define STR(x)	__STR(x)
 #define __STR(x)  #x
@@ -109,10 +107,11 @@ static u32 unaligned_action;
 extern void show_registers(struct pt_regs *regs);
 
 #ifdef __BIG_ENDIAN
-#define     LoadHW(addr, value, res)  \
+#define     _LoadHW(addr, value, res, type)  \
+do {                                                        \
 		__asm__ __volatile__ (".set\tnoat\n"        \
-			"1:\t"user_lb("%0", "0(%2)")"\n"    \
-			"2:\t"user_lbu("$1", "1(%2)")"\n\t" \
+			"1:\t"type##_lb("%0", "0(%2)")"\n"  \
+			"2:\t"type##_lbu("$1", "1(%2)")"\n\t"\
 			"sll\t%0, 0x8\n\t"                  \
 			"or\t%0, $1\n\t"                    \
 			"li\t%1, 0\n"                       \
@@ -127,13 +126,15 @@ extern void show_registers(struct pt_regs *regs);
 			STR(PTR)"\t2b, 4b\n\t"              \
 			".previous"                         \
 			: "=&r" (value), "=r" (res)         \
-			: "r" (addr), "i" (-EFAULT));
+			: "r" (addr), "i" (-EFAULT));       \
+} while(0)
 
 #ifndef CONFIG_CPU_MIPSR6
-#define     LoadW(addr, value, res)   \
+#define     _LoadW(addr, value, res, type)   \
+do {                                                        \
 		__asm__ __volatile__ (                      \
-			"1:\t"user_lwl("%0", "(%2)")"\n"    \
-			"2:\t"user_lwr("%0", "3(%2)")"\n\t" \
+			"1:\t"type##_lwl("%0", "(%2)")"\n"   \
+			"2:\t"type##_lwr("%0", "3(%2)")"\n\t"\
 			"li\t%1, 0\n"                       \
 			"3:\n\t"                            \
 			".insn\n\t"                         \
@@ -146,21 +147,24 @@ extern void show_registers(struct pt_regs *regs);
 			STR(PTR)"\t2b, 4b\n\t"              \
 			".previous"                         \
 			: "=&r" (value), "=r" (res)         \
-			: "r" (addr), "i" (-EFAULT));
+			: "r" (addr), "i" (-EFAULT));       \
+} while(0)
+
 #else
 /* MIPSR6 has no lwl instruction */
-#define     LoadW(addr, value, res) \
+#define     _LoadW(addr, value, res, type) \
+do {                                                        \
 		__asm__ __volatile__ (			    \
 			".set\tpush\n"			    \
 			".set\tnoat\n\t"		    \
-			"1:"user_lb("%0", "0(%2)")"\n\t"    \
-			"2:"user_lbu("$1", "1(%2)")"\n\t"   \
+			"1:"type##_lb("%0", "0(%2)")"\n\t"  \
+			"2:"type##_lbu("$1", "1(%2)")"\n\t" \
 			"sll\t%0, 0x8\n\t"		    \
 			"or\t%0, $1\n\t"		    \
-			"3:"user_lbu("$1", "2(%2)")"\n\t"   \
+			"3:"type##_lbu("$1", "2(%2)")"\n\t" \
 			"sll\t%0, 0x8\n\t"		    \
 			"or\t%0, $1\n\t"		    \
-			"4:"user_lbu("$1", "3(%2)")"\n\t"   \
+			"4:"type##_lbu("$1", "3(%2)")"\n\t" \
 			"sll\t%0, 0x8\n\t"		    \
 			"or\t%0, $1\n\t"		    \
 			"li\t%1, 0\n"			    \
@@ -178,14 +182,17 @@ extern void show_registers(struct pt_regs *regs);
 			STR(PTR)"\t4b, 11b\n\t"		    \
 			".previous"			    \
 			: "=&r" (value), "=r" (res)	    \
-			: "r" (addr), "i" (-EFAULT));
+			: "r" (addr), "i" (-EFAULT));       \
+} while(0)
+
 #endif /* CONFIG_CPU_MIPSR6 */
 
-#define     LoadHWU(addr, value, res) \
+#define     _LoadHWU(addr, value, res, type) \
+do {                                                        \
 		__asm__ __volatile__ (                      \
 			".set\tnoat\n"                      \
-			"1:\t"user_lbu("%0", "0(%2)")"\n"   \
-			"2:\t"user_lbu("$1", "1(%2)")"\n\t" \
+			"1:\t"type##_lbu("%0", "0(%2)")"\n" \
+			"2:\t"type##_lbu("$1", "1(%2)")"\n\t"\
 			"sll\t%0, 0x8\n\t"                  \
 			"or\t%0, $1\n\t"                    \
 			"li\t%1, 0\n"                       \
@@ -201,13 +208,15 @@ extern void show_registers(struct pt_regs *regs);
 			STR(PTR)"\t2b, 4b\n\t"              \
 			".previous"                         \
 			: "=&r" (value), "=r" (res)         \
-			: "r" (addr), "i" (-EFAULT));
+			: "r" (addr), "i" (-EFAULT));       \
+} while(0)
 
 #ifndef CONFIG_CPU_MIPSR6
-#define     LoadWU(addr, value, res)  \
+#define     _LoadWU(addr, value, res, type)  \
+do {                                                        \
 		__asm__ __volatile__ (                      \
-			"1:\t"user_lwl("%0", "(%2)")"\n"    \
-			"2:\t"user_lwr("%0", "3(%2)")"\n\t" \
+			"1:\t"type##_lwl("%0", "(%2)")"\n"  \
+			"2:\t"type##_lwr("%0", "3(%2)")"\n\t"\
 			"dsll\t%0, %0, 32\n\t"              \
 			"dsrl\t%0, %0, 32\n\t"              \
 			"li\t%1, 0\n"                       \
@@ -222,9 +231,11 @@ extern void show_registers(struct pt_regs *regs);
 			STR(PTR)"\t2b, 4b\n\t"              \
 			".previous"                         \
 			: "=&r" (value), "=r" (res)         \
-			: "r" (addr), "i" (-EFAULT));
+			: "r" (addr), "i" (-EFAULT));       \
+} while(0)
 
-#define     LoadDW(addr, value, res)  \
+#define     _LoadDW(addr, value, res)  \
+do {                                                        \
 		__asm__ __volatile__ (                      \
 			"1:\tldl\t%0, (%2)\n"               \
 			"2:\tldr\t%0, 7(%2)\n\t"            \
@@ -240,21 +251,24 @@ extern void show_registers(struct pt_regs *regs);
 			STR(PTR)"\t2b, 4b\n\t"              \
 			".previous"                         \
 			: "=&r" (value), "=r" (res)         \
-			: "r" (addr), "i" (-EFAULT));
+			: "r" (addr), "i" (-EFAULT));       \
+} while(0)
+
 #else
 /* MIPSR6 has not lwl and ldl instructions */
-#define	    LoadWU(addr, value, res) \
+#define	    _LoadWU(addr, value, res, type) \
+do {                                                        \
 		__asm__ __volatile__ (			    \
 			".set\tpush\n\t"		    \
 			".set\tnoat\n\t"		    \
-			"1:"user_lbu("%0", "0(%2)")"\n\t"   \
-			"2:"user_lbu("$1", "1(%2)")"\n\t"   \
+			"1:"type##_lbu("%0", "0(%2)")"\n\t" \
+			"2:"type##_lbu("$1", "1(%2)")"\n\t" \
 			"sll\t%0, 0x8\n\t"		    \
 			"or\t%0, $1\n\t"		    \
-			"3:"user_lbu("$1", "2(%2)")"\n\t"   \
+			"3:"type##_lbu("$1", "2(%2)")"\n\t" \
 			"sll\t%0, 0x8\n\t"		    \
 			"or\t%0, $1\n\t"		    \
-			"4:"user_lbu("$1", "3(%2)")"\n\t"   \
+			"4:"type##_lbu("$1", "3(%2)")"\n\t" \
 			"sll\t%0, 0x8\n\t"		    \
 			"or\t%0, $1\n\t"		    \
 			"li\t%1, 0\n"			    \
@@ -272,9 +286,11 @@ extern void show_registers(struct pt_regs *regs);
 			STR(PTR)"\t4b, 11b\n\t"		    \
 			".previous"			    \
 			: "=&r" (value), "=r" (res)	    \
-			: "r" (addr), "i" (-EFAULT));
+			: "r" (addr), "i" (-EFAULT));       \
+} while(0)
 
-#define     LoadDW(addr, value, res)  \
+#define     _LoadDW(addr, value, res)  \
+do {                                                        \
 		__asm__ __volatile__ (			    \
 			".set\tpush\n\t"		    \
 			".set\tnoat\n\t"		    \
@@ -319,16 +335,19 @@ extern void show_registers(struct pt_regs *regs);
 			STR(PTR)"\t8b, 11b\n\t"		    \
 			".previous"			    \
 			: "=&r" (value), "=r" (res)	    \
-			: "r" (addr), "i" (-EFAULT));
+			: "r" (addr), "i" (-EFAULT));       \
+} while(0)
+
 #endif /* CONFIG_CPU_MIPSR6 */
 
 
-#define     StoreHW(addr, value, res) \
+#define     _StoreHW(addr, value, res, type) \
+do {                                                        \
 		__asm__ __volatile__ (                      \
 			".set\tnoat\n"                      \
-			"1:\t"user_sb("%1", "1(%2)")"\n"    \
+			"1:\t"type##_sb("%1", "1(%2)")"\n"  \
 			"srl\t$1, %1, 0x8\n"                \
-			"2:\t"user_sb("$1", "0(%2)")"\n"    \
+			"2:\t"type##_sb("$1", "0(%2)")"\n"  \
 			".set\tat\n\t"                      \
 			"li\t%0, 0\n"                       \
 			"3:\n\t"                            \
@@ -342,13 +361,15 @@ extern void show_registers(struct pt_regs *regs);
 			STR(PTR)"\t2b, 4b\n\t"              \
 			".previous"                         \
 			: "=r" (res)                        \
-			: "r" (value), "r" (addr), "i" (-EFAULT));
+			: "r" (value), "r" (addr), "i" (-EFAULT));\
+} while(0)
 
 #ifndef CONFIG_CPU_MIPSR6
-#define     StoreW(addr, value, res)  \
+#define     _StoreW(addr, value, res, type)  \
+do {                                                        \
 		__asm__ __volatile__ (                      \
-			"1:\t"user_swl("%1", "(%2)")"\n"    \
-			"2:\t"user_swr("%1", "3(%2)")"\n\t" \
+			"1:\t"type##_swl("%1", "(%2)")"\n"  \
+			"2:\t"type##_swr("%1", "3(%2)")"\n\t"\
 			"li\t%0, 0\n"                       \
 			"3:\n\t"                            \
 			".insn\n\t"                         \
@@ -361,9 +382,11 @@ extern void show_registers(struct pt_regs *regs);
 			STR(PTR)"\t2b, 4b\n\t"              \
 			".previous"                         \
 		: "=r" (res)                                \
-		: "r" (value), "r" (addr), "i" (-EFAULT));
+		: "r" (value), "r" (addr), "i" (-EFAULT));  \
+} while(0)
 
-#define     StoreDW(addr, value, res) \
+#define     _StoreDW(addr, value, res) \
+do {                                                        \
 		__asm__ __volatile__ (                      \
 			"1:\tsdl\t%1,(%2)\n"                \
 			"2:\tsdr\t%1, 7(%2)\n\t"            \
@@ -379,20 +402,23 @@ extern void show_registers(struct pt_regs *regs);
 			STR(PTR)"\t2b, 4b\n\t"              \
 			".previous"                         \
 		: "=r" (res)                                \
-		: "r" (value), "r" (addr), "i" (-EFAULT));
+		: "r" (value), "r" (addr), "i" (-EFAULT));  \
+} while(0)
+
 #else
 /* MIPSR6 has no swl and sdl instructions */
-#define     StoreW(addr, value, res)  \
+#define     _StoreW(addr, value, res, type)  \
+do {                                                        \
 		__asm__ __volatile__ (                      \
 			".set\tpush\n\t"		    \
 			".set\tnoat\n\t"		    \
-			"1:"user_sb("%1", "3(%2)")"\n\t"    \
+			"1:"type##_sb("%1", "3(%2)")"\n\t"  \
 			"srl\t$1, %1, 0x8\n\t"		    \
-			"2:"user_sb("$1", "2(%2)")"\n\t"    \
+			"2:"type##_sb("$1", "2(%2)")"\n\t"  \
 			"srl\t$1, $1,  0x8\n\t"		    \
-			"3:"user_sb("$1", "1(%2)")"\n\t"    \
+			"3:"type##_sb("$1", "1(%2)")"\n\t"  \
 			"srl\t$1, $1, 0x8\n\t"		    \
-			"4:"user_sb("$1", "0(%2)")"\n\t"    \
+			"4:"type##_sb("$1", "0(%2)")"\n\t"  \
 			".set\tpop\n\t"			    \
 			"li\t%0, 0\n"			    \
 			"10:\n\t"			    \
@@ -409,9 +435,11 @@ extern void show_registers(struct pt_regs *regs);
 			".previous"			    \
 		: "=&r" (res)			    	    \
 		: "r" (value), "r" (addr), "i" (-EFAULT)    \
-		: "memory");
+		: "memory");                                \
+} while(0)
 
 #define     StoreDW(addr, value, res) \
+do {                                                        \
 		__asm__ __volatile__ (                      \
 			".set\tpush\n\t"		    \
 			".set\tnoat\n\t"		    \
@@ -451,15 +479,18 @@ extern void show_registers(struct pt_regs *regs);
 			".previous"			    \
 		: "=&r" (res)			    	    \
 		: "r" (value), "r" (addr), "i" (-EFAULT)    \
-		: "memory");
+		: "memory");                                \
+} while(0)
+
 #endif /* CONFIG_CPU_MIPSR6 */
 
 #else /* __BIG_ENDIAN */
 
-#define     LoadHW(addr, value, res)  \
+#define     _LoadHW(addr, value, res, type)  \
+do {                                                        \
 		__asm__ __volatile__ (".set\tnoat\n"        \
-			"1:\t"user_lb("%0", "1(%2)")"\n"    \
-			"2:\t"user_lbu("$1", "0(%2)")"\n\t" \
+			"1:\t"type##_lb("%0", "1(%2)")"\n"  \
+			"2:\t"type##_lbu("$1", "0(%2)")"\n\t"\
 			"sll\t%0, 0x8\n\t"                  \
 			"or\t%0, $1\n\t"                    \
 			"li\t%1, 0\n"                       \
@@ -474,13 +505,15 @@ extern void show_registers(struct pt_regs *regs);
 			STR(PTR)"\t2b, 4b\n\t"              \
 			".previous"                         \
 			: "=&r" (value), "=r" (res)         \
-			: "r" (addr), "i" (-EFAULT));
+			: "r" (addr), "i" (-EFAULT));       \
+} while(0)
 
 #ifndef CONFIG_CPU_MIPSR6
-#define     LoadW(addr, value, res)   \
+#define     _LoadW(addr, value, res, type)   \
+do {                                                        \
 		__asm__ __volatile__ (                      \
-			"1:\t"user_lwl("%0", "3(%2)")"\n"   \
-			"2:\t"user_lwr("%0", "(%2)")"\n\t"  \
+			"1:\t"type##_lwl("%0", "3(%2)")"\n" \
+			"2:\t"type##_lwr("%0", "(%2)")"\n\t"\
 			"li\t%1, 0\n"                       \
 			"3:\n\t"                            \
 			".insn\n\t"                         \
@@ -493,21 +526,24 @@ extern void show_registers(struct pt_regs *regs);
 			STR(PTR)"\t2b, 4b\n\t"              \
 			".previous"                         \
 			: "=&r" (value), "=r" (res)         \
-			: "r" (addr), "i" (-EFAULT));
+			: "r" (addr), "i" (-EFAULT));       \
+} while(0)
+
 #else
 /* MIPSR6 has no lwl instruction */
-#define     LoadW(addr, value, res) \
+#define     _LoadW(addr, value, res, type) \
+do {                                                        \
 		__asm__ __volatile__ (			    \
 			".set\tpush\n"			    \
 			".set\tnoat\n\t"		    \
-			"1:"user_lb("%0", "3(%2)")"\n\t"    \
-			"2:"user_lbu("$1", "2(%2)")"\n\t"   \
+			"1:"type##_lb("%0", "3(%2)")"\n\t"  \
+			"2:"type##_lbu("$1", "2(%2)")"\n\t" \
 			"sll\t%0, 0x8\n\t"		    \
 			"or\t%0, $1\n\t"		    \
-			"3:"user_lbu("$1", "1(%2)")"\n\t"   \
+			"3:"type##_lbu("$1", "1(%2)")"\n\t" \
 			"sll\t%0, 0x8\n\t"		    \
 			"or\t%0, $1\n\t"		    \
-			"4:"user_lbu("$1", "0(%2)")"\n\t"   \
+			"4:"type##_lbu("$1", "0(%2)")"\n\t" \
 			"sll\t%0, 0x8\n\t"		    \
 			"or\t%0, $1\n\t"		    \
 			"li\t%1, 0\n"			    \
@@ -525,15 +561,18 @@ extern void show_registers(struct pt_regs *regs);
 			STR(PTR)"\t4b, 11b\n\t"		    \
 			".previous"			    \
 			: "=&r" (value), "=r" (res)	    \
-			: "r" (addr), "i" (-EFAULT));
+			: "r" (addr), "i" (-EFAULT));       \
+} while(0)
+
 #endif /* CONFIG_CPU_MIPSR6 */
 
 
-#define     LoadHWU(addr, value, res) \
+#define     _LoadHWU(addr, value, res, type) \
+do {                                                        \
 		__asm__ __volatile__ (                      \
 			".set\tnoat\n"                      \
-			"1:\t"user_lbu("%0", "1(%2)")"\n"   \
-			"2:\t"user_lbu("$1", "0(%2)")"\n\t" \
+			"1:\t"type##_lbu("%0", "1(%2)")"\n" \
+			"2:\t"type##_lbu("$1", "0(%2)")"\n\t"\
 			"sll\t%0, 0x8\n\t"                  \
 			"or\t%0, $1\n\t"                    \
 			"li\t%1, 0\n"                       \
@@ -549,13 +588,15 @@ extern void show_registers(struct pt_regs *regs);
 			STR(PTR)"\t2b, 4b\n\t"              \
 			".previous"                         \
 			: "=&r" (value), "=r" (res)         \
-			: "r" (addr), "i" (-EFAULT));
+			: "r" (addr), "i" (-EFAULT));       \
+} while(0)
 
 #ifndef CONFIG_CPU_MIPSR6
-#define     LoadWU(addr, value, res)  \
+#define     _LoadWU(addr, value, res, type)  \
+do {                                                        \
 		__asm__ __volatile__ (                      \
-			"1:\t"user_lwl("%0", "3(%2)")"\n"   \
-			"2:\t"user_lwr("%0", "(%2)")"\n\t"  \
+			"1:\t"type##_lwl("%0", "3(%2)")"\n" \
+			"2:\t"type##_lwr("%0", "(%2)")"\n\t"\
 			"dsll\t%0, %0, 32\n\t"              \
 			"dsrl\t%0, %0, 32\n\t"              \
 			"li\t%1, 0\n"                       \
@@ -570,9 +611,11 @@ extern void show_registers(struct pt_regs *regs);
 			STR(PTR)"\t2b, 4b\n\t"              \
 			".previous"                         \
 			: "=&r" (value), "=r" (res)         \
-			: "r" (addr), "i" (-EFAULT));
+			: "r" (addr), "i" (-EFAULT));       \
+} while(0)
 
-#define     LoadDW(addr, value, res)  \
+#define     _LoadDW(addr, value, res)  \
+do {                                                        \
 		__asm__ __volatile__ (                      \
 			"1:\tldl\t%0, 7(%2)\n"              \
 			"2:\tldr\t%0, (%2)\n\t"             \
@@ -588,21 +631,24 @@ extern void show_registers(struct pt_regs *regs);
 			STR(PTR)"\t2b, 4b\n\t"              \
 			".previous"                         \
 			: "=&r" (value), "=r" (res)         \
-			: "r" (addr), "i" (-EFAULT));
+			: "r" (addr), "i" (-EFAULT));       \
+} while(0)
+
 #else
 /* MIPSR6 has not lwl and ldl instructions */
-#define	    LoadWU(addr, value, res) \
+#define	    _LoadWU(addr, value, res, type) \
+do {                                                        \
 		__asm__ __volatile__ (			    \
 			".set\tpush\n\t"		    \
 			".set\tnoat\n\t"		    \
-			"1:"user_lbu("%0", "3(%2)")"\n\t"   \
-			"2:"user_lbu("$1", "2(%2)")"\n\t"   \
+			"1:"type##_lbu("%0", "3(%2)")"\n\t" \
+			"2:"type##_lbu("$1", "2(%2)")"\n\t" \
 			"sll\t%0, 0x8\n\t"		    \
 			"or\t%0, $1\n\t"		    \
-			"3:"user_lbu("$1", "1(%2)")"\n\t"   \
+			"3:"type##_lbu("$1", "1(%2)")"\n\t" \
 			"sll\t%0, 0x8\n\t"		    \
 			"or\t%0, $1\n\t"		    \
-			"4:"user_lbu("$1", "0(%2)")"\n\t"   \
+			"4:"type##_lbu("$1", "0(%2)")"\n\t" \
 			"sll\t%0, 0x8\n\t"		    \
 			"or\t%0, $1\n\t"		    \
 			"li\t%1, 0\n"			    \
@@ -620,9 +666,11 @@ extern void show_registers(struct pt_regs *regs);
 			STR(PTR)"\t4b, 11b\n\t"		    \
 			".previous"			    \
 			: "=&r" (value), "=r" (res)	    \
-			: "r" (addr), "i" (-EFAULT));
+			: "r" (addr), "i" (-EFAULT));       \
+} while(0)
 
-#define     LoadDW(addr, value, res)  \
+#define     _LoadDW(addr, value, res)  \
+do {                                                        \
 		__asm__ __volatile__ (			    \
 			".set\tpush\n\t"		    \
 			".set\tnoat\n\t"		    \
@@ -667,15 +715,17 @@ extern void show_registers(struct pt_regs *regs);
 			STR(PTR)"\t8b, 11b\n\t"		    \
 			".previous"			    \
 			: "=&r" (value), "=r" (res)	    \
-			: "r" (addr), "i" (-EFAULT));
+			: "r" (addr), "i" (-EFAULT));       \
+} while(0)
 #endif /* CONFIG_CPU_MIPSR6 */
 
-#define     StoreHW(addr, value, res) \
+#define     _StoreHW(addr, value, res, type) \
+do {                                                        \
 		__asm__ __volatile__ (                      \
 			".set\tnoat\n"                      \
-			"1:\t"user_sb("%1", "0(%2)")"\n"    \
+			"1:\t"type##_sb("%1", "0(%2)")"\n"  \
 			"srl\t$1,%1, 0x8\n"                 \
-			"2:\t"user_sb("$1", "1(%2)")"\n"    \
+			"2:\t"type##_sb("$1", "1(%2)")"\n"  \
 			".set\tat\n\t"                      \
 			"li\t%0, 0\n"                       \
 			"3:\n\t"                            \
@@ -689,12 +739,15 @@ extern void show_registers(struct pt_regs *regs);
 			STR(PTR)"\t2b, 4b\n\t"              \
 			".previous"                         \
 			: "=r" (res)                        \
-			: "r" (value), "r" (addr), "i" (-EFAULT));
+			: "r" (value), "r" (addr), "i" (-EFAULT));\
+} while(0)
+
 #ifndef CONFIG_CPU_MIPSR6
-#define     StoreW(addr, value, res)  \
+#define     _StoreW(addr, value, res, type)  \
+do {                                                        \
 		__asm__ __volatile__ (                      \
-			"1:\t"user_swl("%1", "3(%2)")"\n"   \
-			"2:\t"user_swr("%1", "(%2)")"\n\t"  \
+			"1:\t"type##_swl("%1", "3(%2)")"\n" \
+			"2:\t"type##_swr("%1", "(%2)")"\n\t"\
 			"li\t%0, 0\n"                       \
 			"3:\n\t"                            \
 			".insn\n\t"                         \
@@ -707,9 +760,11 @@ extern void show_registers(struct pt_regs *regs);
 			STR(PTR)"\t2b, 4b\n\t"              \
 			".previous"                         \
 		: "=r" (res)                                \
-		: "r" (value), "r" (addr), "i" (-EFAULT));
+		: "r" (value), "r" (addr), "i" (-EFAULT));  \
+} while(0)
 
-#define     StoreDW(addr, value, res) \
+#define     _StoreDW(addr, value, res) \
+do {                                                        \
 		__asm__ __volatile__ (                      \
 			"1:\tsdl\t%1, 7(%2)\n"              \
 			"2:\tsdr\t%1, (%2)\n\t"             \
@@ -725,20 +780,23 @@ extern void show_registers(struct pt_regs *regs);
 			STR(PTR)"\t2b, 4b\n\t"              \
 			".previous"                         \
 		: "=r" (res)                                \
-		: "r" (value), "r" (addr), "i" (-EFAULT));
+		: "r" (value), "r" (addr), "i" (-EFAULT));  \
+} while(0)
+
 #else
 /* MIPSR6 has no swl and sdl instructions */
-#define     StoreW(addr, value, res)  \
+#define     _StoreW(addr, value, res, type)  \
+do {                                                        \
 		__asm__ __volatile__ (                      \
 			".set\tpush\n\t"		    \
 			".set\tnoat\n\t"		    \
-			"1:"user_sb("%1", "0(%2)")"\n\t"    \
+			"1:"type##_sb("%1", "0(%2)")"\n\t"  \
 			"srl\t$1, %1, 0x8\n\t"		    \
-			"2:"user_sb("$1", "1(%2)")"\n\t"    \
+			"2:"type##_sb("$1", "1(%2)")"\n\t"  \
 			"srl\t$1, $1,  0x8\n\t"		    \
-			"3:"user_sb("$1", "2(%2)")"\n\t"    \
+			"3:"type##_sb("$1", "2(%2)")"\n\t"  \
 			"srl\t$1, $1, 0x8\n\t"		    \
-			"4:"user_sb("$1", "3(%2)")"\n\t"    \
+			"4:"type##_sb("$1", "3(%2)")"\n\t"  \
 			".set\tpop\n\t"			    \
 			"li\t%0, 0\n"			    \
 			"10:\n\t"			    \
@@ -755,9 +813,11 @@ extern void show_registers(struct pt_regs *regs);
 			".previous"			    \
 		: "=&r" (res)			    	    \
 		: "r" (value), "r" (addr), "i" (-EFAULT)    \
-		: "memory");
+		: "memory");                                \
+} while(0)
 
-#define     StoreDW(addr, value, res) \
+#define     _StoreDW(addr, value, res) \
+do {                                                        \
 		__asm__ __volatile__ (                      \
 			".set\tpush\n\t"		    \
 			".set\tnoat\n\t"		    \
@@ -797,10 +857,28 @@ extern void show_registers(struct pt_regs *regs);
 			".previous"			    \
 		: "=&r" (res)			    	    \
 		: "r" (value), "r" (addr), "i" (-EFAULT)    \
-		: "memory");
+		: "memory");                                \
+} while(0)
+
 #endif /* CONFIG_CPU_MIPSR6 */
 #endif
 
+#define LoadHWU(addr, value, res)	_LoadHWU(addr, value, res, kernel)
+#define LoadHWUE(addr, value, res)	_LoadHWU(addr, value, res, user)
+#define LoadWU(addr, value, res)	_LoadWU(addr, value, res, kernel)
+#define LoadWUE(addr, value, res)	_LoadWU(addr, value, res, user)
+#define LoadHW(addr, value, res)	_LoadHW(addr, value, res, kernel)
+#define LoadHWE(addr, value, res)	_LoadHW(addr, value, res, user)
+#define LoadW(addr, value, res)		_LoadW(addr, value, res, kernel)
+#define LoadWE(addr, value, res)	_LoadW(addr, value, res, user)
+#define LoadDW(addr, value, res)	_LoadDW(addr, value, res)
+
+#define StoreHW(addr, value, res)	_StoreHW(addr, value, res, kernel)
+#define StoreHWE(addr, value, res)	_StoreHW(addr, value, res, user)
+#define StoreW(addr, value, res)	_StoreW(addr, value, res, kernel)
+#define StoreWE(addr, value, res)	_StoreW(addr, value, res, user)
+#define StoreDW(addr, value, res)	_StoreDW(addr, value, res)
+
 static void emulate_load_store_insn(struct pt_regs *regs,
 	void __user *addr, unsigned int __user *pc)
 {
@@ -872,7 +950,7 @@ static void emulate_load_store_insn(struct pt_regs *regs,
 				set_fs(seg);
 				goto sigbus;
 			}
-			LoadHW(addr, value, res);
+			LoadHWE(addr, value, res);
 			if (res) {
 				set_fs(seg);
 				goto fault;
@@ -885,7 +963,7 @@ static void emulate_load_store_insn(struct pt_regs *regs,
 				set_fs(seg);
 				goto sigbus;
 			}
-				LoadW(addr, value, res);
+				LoadWE(addr, value, res);
 			if (res) {
 				set_fs(seg);
 				goto fault;
@@ -898,7 +976,7 @@ static void emulate_load_store_insn(struct pt_regs *regs,
 				set_fs(seg);
 				goto sigbus;
 			}
-			LoadHWU(addr, value, res);
+			LoadHWUE(addr, value, res);
 			if (res) {
 				set_fs(seg);
 				goto fault;
@@ -913,7 +991,7 @@ static void emulate_load_store_insn(struct pt_regs *regs,
 			}
 			compute_return_epc(regs);
 			value = regs->regs[insn.spec3_format.rt];
-			StoreHW(addr, value, res);
+			StoreHWE(addr, value, res);
 			if (res) {
 				set_fs(seg);
 				goto fault;
@@ -926,7 +1004,7 @@ static void emulate_load_store_insn(struct pt_regs *regs,
 			}
 			compute_return_epc(regs);
 			value = regs->regs[insn.spec3_format.rt];
-			StoreW(addr, value, res);
+			StoreWE(addr, value, res);
 			if (res) {
 				set_fs(seg);
 				goto fault;
@@ -943,7 +1021,15 @@ static void emulate_load_store_insn(struct pt_regs *regs,
 		if (!access_ok(VERIFY_READ, addr, 2))
 			goto sigbus;
 
-		LoadHW(addr, value, res);
+		if (config_enabled(CONFIG_EVA)) {
+			if (segment_eq(get_fs(), get_ds()))
+				LoadHW(addr, value, res);
+			else
+				LoadHWE(addr, value, res);
+		} else {
+			LoadHW(addr, value, res);
+		}
+
 		if (res)
 			goto fault;
 		compute_return_epc(regs);
@@ -954,7 +1040,15 @@ static void emulate_load_store_insn(struct pt_regs *regs,
 		if (!access_ok(VERIFY_READ, addr, 4))
 			goto sigbus;
 
-		LoadW(addr, value, res);
+		if (config_enabled(CONFIG_EVA)) {
+			if (segment_eq(get_fs(), get_ds()))
+				LoadW(addr, value, res);
+			else
+				LoadWE(addr, value, res);
+		} else {
+			LoadW(addr, value, res);
+		}
+
 		if (res)
 			goto fault;
 		compute_return_epc(regs);
@@ -965,7 +1059,15 @@ static void emulate_load_store_insn(struct pt_regs *regs,
 		if (!access_ok(VERIFY_READ, addr, 2))
 			goto sigbus;
 
-		LoadHWU(addr, value, res);
+		if (config_enabled(CONFIG_EVA)) {
+			if (segment_eq(get_fs(), get_ds()))
+				LoadHWU(addr, value, res);
+			else
+				LoadHWUE(addr, value, res);
+		} else {
+			LoadHWU(addr, value, res);
+		}
+
 		if (res)
 			goto fault;
 		compute_return_epc(regs);
@@ -1024,7 +1126,16 @@ static void emulate_load_store_insn(struct pt_regs *regs,
 
 		compute_return_epc(regs);
 		value = regs->regs[insn.i_format.rt];
-		StoreHW(addr, value, res);
+
+		if (config_enabled(CONFIG_EVA)) {
+			if (segment_eq(get_fs(), get_ds()))
+				StoreHW(addr, value, res);
+			else
+				StoreHWE(addr, value, res);
+		} else {
+			StoreHW(addr, value, res);
+		}
+
 		if (res)
 			goto fault;
 		break;
@@ -1035,7 +1146,16 @@ static void emulate_load_store_insn(struct pt_regs *regs,
 
 		compute_return_epc(regs);
 		value = regs->regs[insn.i_format.rt];
-		StoreW(addr, value, res);
+
+		if (config_enabled(CONFIG_EVA)) {
+			if (segment_eq(get_fs(), get_ds()))
+				StoreW(addr, value, res);
+			else
+				StoreWE(addr, value, res);
+		} else {
+			StoreW(addr, value, res);
+		}
+
 		if (res)
 			goto fault;
 		break;
@@ -1076,7 +1196,7 @@ static void emulate_load_store_insn(struct pt_regs *regs,
 		own_fpu(1);	/* Restore FPU state. */
 
 		/* Signal if something went wrong. */
-		process_fpemu_return(res, fault_addr);
+		process_fpemu_return(res, fault_addr, 0);
 
 		if (res == 0)
 			break;
@@ -1511,7 +1631,7 @@ fpu_emul:
 		own_fpu(1);	/* restore FPU state */
 
 		/* If something went wrong, signal */
-		process_fpemu_return(res, fault_addr);
+		process_fpemu_return(res, fault_addr, 0);
 
 		if (res == 0)
 			goto success;
diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile
index 401fe027c261..637ebbebd549 100644
--- a/arch/mips/kvm/Makefile
+++ b/arch/mips/kvm/Makefile
@@ -1,13 +1,15 @@
 # Makefile for KVM support for MIPS
 #
 
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
+common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
 
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/mips/kvm
 
-kvm-objs := $(common-objs) mips.o emulate.o locore.o \
+common-objs-$(CONFIG_CPU_HAS_MSA) += msa.o
+
+kvm-objs := $(common-objs-y) mips.o emulate.o locore.o \
 	    interrupt.o stats.o commpage.o \
-	    dyntrans.o trap_emul.o
+	    dyntrans.o trap_emul.o fpu.o
 
 obj-$(CONFIG_KVM)	+= kvm.o
 obj-y			+= callback.o tlb.o
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index fb3e8dfd1ff6..6230f376a44e 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -884,6 +884,84 @@ enum emulation_result kvm_mips_emul_tlbp(struct kvm_vcpu *vcpu)
 	return EMULATE_DONE;
 }
 
+/**
+ * kvm_mips_config1_wrmask() - Find mask of writable bits in guest Config1
+ * @vcpu:	Virtual CPU.
+ *
+ * Finds the mask of bits which are writable in the guest's Config1 CP0
+ * register, by userland (currently read-only to the guest).
+ */
+unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu)
+{
+	unsigned int mask = 0;
+
+	/* Permit FPU to be present if FPU is supported */
+	if (kvm_mips_guest_can_have_fpu(&vcpu->arch))
+		mask |= MIPS_CONF1_FP;
+
+	return mask;
+}
+
+/**
+ * kvm_mips_config3_wrmask() - Find mask of writable bits in guest Config3
+ * @vcpu:	Virtual CPU.
+ *
+ * Finds the mask of bits which are writable in the guest's Config3 CP0
+ * register, by userland (currently read-only to the guest).
+ */
+unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu)
+{
+	/* Config4 is optional */
+	unsigned int mask = MIPS_CONF_M;
+
+	/* Permit MSA to be present if MSA is supported */
+	if (kvm_mips_guest_can_have_msa(&vcpu->arch))
+		mask |= MIPS_CONF3_MSA;
+
+	return mask;
+}
+
+/**
+ * kvm_mips_config4_wrmask() - Find mask of writable bits in guest Config4
+ * @vcpu:	Virtual CPU.
+ *
+ * Finds the mask of bits which are writable in the guest's Config4 CP0
+ * register, by userland (currently read-only to the guest).
+ */
+unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu)
+{
+	/* Config5 is optional */
+	return MIPS_CONF_M;
+}
+
+/**
+ * kvm_mips_config5_wrmask() - Find mask of writable bits in guest Config5
+ * @vcpu:	Virtual CPU.
+ *
+ * Finds the mask of bits which are writable in the guest's Config5 CP0
+ * register, by the guest itself.
+ */
+unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu)
+{
+	unsigned int mask = 0;
+
+	/* Permit MSAEn changes if MSA supported and enabled */
+	if (kvm_mips_guest_has_msa(&vcpu->arch))
+		mask |= MIPS_CONF5_MSAEN;
+
+	/*
+	 * Permit guest FPU mode changes if FPU is enabled and the relevant
+	 * feature exists according to FIR register.
+	 */
+	if (kvm_mips_guest_has_fpu(&vcpu->arch)) {
+		if (cpu_has_fre)
+			mask |= MIPS_CONF5_FRE;
+		/* We don't support UFR or UFE */
+	}
+
+	return mask;
+}
+
 enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
 					   uint32_t cause, struct kvm_run *run,
 					   struct kvm_vcpu *vcpu)
@@ -1021,18 +1099,114 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
 				kvm_mips_write_compare(vcpu,
 						       vcpu->arch.gprs[rt]);
 			} else if ((rd == MIPS_CP0_STATUS) && (sel == 0)) {
-				kvm_write_c0_guest_status(cop0,
-							  vcpu->arch.gprs[rt]);
+				unsigned int old_val, val, change;
+
+				old_val = kvm_read_c0_guest_status(cop0);
+				val = vcpu->arch.gprs[rt];
+				change = val ^ old_val;
+
+				/* Make sure that the NMI bit is never set */
+				val &= ~ST0_NMI;
+
+				/*
+				 * Don't allow CU1 or FR to be set unless FPU
+				 * capability enabled and exists in guest
+				 * configuration.
+				 */
+				if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+					val &= ~(ST0_CU1 | ST0_FR);
+
+				/*
+				 * Also don't allow FR to be set if host doesn't
+				 * support it.
+				 */
+				if (!(current_cpu_data.fpu_id & MIPS_FPIR_F64))
+					val &= ~ST0_FR;
+
+
+				/* Handle changes in FPU mode */
+				preempt_disable();
+
+				/*
+				 * FPU and Vector register state is made
+				 * UNPREDICTABLE by a change of FR, so don't
+				 * even bother saving it.
+				 */
+				if (change & ST0_FR)
+					kvm_drop_fpu(vcpu);
+
+				/*
+				 * If MSA state is already live, it is undefined
+				 * how it interacts with FR=0 FPU state, and we
+				 * don't want to hit reserved instruction
+				 * exceptions trying to save the MSA state later
+				 * when CU=1 && FR=1, so play it safe and save
+				 * it first.
+				 */
+				if (change & ST0_CU1 && !(val & ST0_FR) &&
+				    vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA)
+					kvm_lose_fpu(vcpu);
+
 				/*
-				 * Make sure that CU1 and NMI bits are
-				 * never set
+				 * Propagate CU1 (FPU enable) changes
+				 * immediately if the FPU context is already
+				 * loaded. When disabling we leave the context
+				 * loaded so it can be quickly enabled again in
+				 * the near future.
 				 */
-				kvm_clear_c0_guest_status(cop0,
-							  (ST0_CU1 | ST0_NMI));
+				if (change & ST0_CU1 &&
+				    vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)
+					change_c0_status(ST0_CU1, val);
+
+				preempt_enable();
+
+				kvm_write_c0_guest_status(cop0, val);
 
 #ifdef CONFIG_KVM_MIPS_DYN_TRANS
-				kvm_mips_trans_mtc0(inst, opc, vcpu);
+				/*
+				 * If FPU present, we need CU1/FR bits to take
+				 * effect fairly soon.
+				 */
+				if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+					kvm_mips_trans_mtc0(inst, opc, vcpu);
 #endif
+			} else if ((rd == MIPS_CP0_CONFIG) && (sel == 5)) {
+				unsigned int old_val, val, change, wrmask;
+
+				old_val = kvm_read_c0_guest_config5(cop0);
+				val = vcpu->arch.gprs[rt];
+
+				/* Only a few bits are writable in Config5 */
+				wrmask = kvm_mips_config5_wrmask(vcpu);
+				change = (val ^ old_val) & wrmask;
+				val = old_val ^ change;
+
+
+				/* Handle changes in FPU/MSA modes */
+				preempt_disable();
+
+				/*
+				 * Propagate FRE changes immediately if the FPU
+				 * context is already loaded.
+				 */
+				if (change & MIPS_CONF5_FRE &&
+				    vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)
+					change_c0_config5(MIPS_CONF5_FRE, val);
+
+				/*
+				 * Propagate MSAEn changes immediately if the
+				 * MSA context is already loaded. When disabling
+				 * we leave the context loaded so it can be
+				 * quickly enabled again in the near future.
+				 */
+				if (change & MIPS_CONF5_MSAEN &&
+				    vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA)
+					change_c0_config5(MIPS_CONF5_MSAEN,
+							  val);
+
+				preempt_enable();
+
+				kvm_write_c0_guest_config5(cop0, val);
 			} else if ((rd == MIPS_CP0_CAUSE) && (sel == 0)) {
 				uint32_t old_cause, new_cause;
 
@@ -1970,6 +2144,146 @@ enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause,
 	return er;
 }
 
+enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause,
+						uint32_t *opc,
+						struct kvm_run *run,
+						struct kvm_vcpu *vcpu)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	struct kvm_vcpu_arch *arch = &vcpu->arch;
+	enum emulation_result er = EMULATE_DONE;
+
+	if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
+		/* save old pc */
+		kvm_write_c0_guest_epc(cop0, arch->pc);
+		kvm_set_c0_guest_status(cop0, ST0_EXL);
+
+		if (cause & CAUSEF_BD)
+			kvm_set_c0_guest_cause(cop0, CAUSEF_BD);
+		else
+			kvm_clear_c0_guest_cause(cop0, CAUSEF_BD);
+
+		kvm_debug("Delivering TRAP @ pc %#lx\n", arch->pc);
+
+		kvm_change_c0_guest_cause(cop0, (0xff),
+					  (T_TRAP << CAUSEB_EXCCODE));
+
+		/* Set PC to the exception entry point */
+		arch->pc = KVM_GUEST_KSEG0 + 0x180;
+
+	} else {
+		kvm_err("Trying to deliver TRAP when EXL is already set\n");
+		er = EMULATE_FAIL;
+	}
+
+	return er;
+}
+
+enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause,
+						  uint32_t *opc,
+						  struct kvm_run *run,
+						  struct kvm_vcpu *vcpu)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	struct kvm_vcpu_arch *arch = &vcpu->arch;
+	enum emulation_result er = EMULATE_DONE;
+
+	if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
+		/* save old pc */
+		kvm_write_c0_guest_epc(cop0, arch->pc);
+		kvm_set_c0_guest_status(cop0, ST0_EXL);
+
+		if (cause & CAUSEF_BD)
+			kvm_set_c0_guest_cause(cop0, CAUSEF_BD);
+		else
+			kvm_clear_c0_guest_cause(cop0, CAUSEF_BD);
+
+		kvm_debug("Delivering MSAFPE @ pc %#lx\n", arch->pc);
+
+		kvm_change_c0_guest_cause(cop0, (0xff),
+					  (T_MSAFPE << CAUSEB_EXCCODE));
+
+		/* Set PC to the exception entry point */
+		arch->pc = KVM_GUEST_KSEG0 + 0x180;
+
+	} else {
+		kvm_err("Trying to deliver MSAFPE when EXL is already set\n");
+		er = EMULATE_FAIL;
+	}
+
+	return er;
+}
+
+enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause,
+					       uint32_t *opc,
+					       struct kvm_run *run,
+					       struct kvm_vcpu *vcpu)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	struct kvm_vcpu_arch *arch = &vcpu->arch;
+	enum emulation_result er = EMULATE_DONE;
+
+	if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
+		/* save old pc */
+		kvm_write_c0_guest_epc(cop0, arch->pc);
+		kvm_set_c0_guest_status(cop0, ST0_EXL);
+
+		if (cause & CAUSEF_BD)
+			kvm_set_c0_guest_cause(cop0, CAUSEF_BD);
+		else
+			kvm_clear_c0_guest_cause(cop0, CAUSEF_BD);
+
+		kvm_debug("Delivering FPE @ pc %#lx\n", arch->pc);
+
+		kvm_change_c0_guest_cause(cop0, (0xff),
+					  (T_FPE << CAUSEB_EXCCODE));
+
+		/* Set PC to the exception entry point */
+		arch->pc = KVM_GUEST_KSEG0 + 0x180;
+
+	} else {
+		kvm_err("Trying to deliver FPE when EXL is already set\n");
+		er = EMULATE_FAIL;
+	}
+
+	return er;
+}
+
+enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause,
+						  uint32_t *opc,
+						  struct kvm_run *run,
+						  struct kvm_vcpu *vcpu)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	struct kvm_vcpu_arch *arch = &vcpu->arch;
+	enum emulation_result er = EMULATE_DONE;
+
+	if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
+		/* save old pc */
+		kvm_write_c0_guest_epc(cop0, arch->pc);
+		kvm_set_c0_guest_status(cop0, ST0_EXL);
+
+		if (cause & CAUSEF_BD)
+			kvm_set_c0_guest_cause(cop0, CAUSEF_BD);
+		else
+			kvm_clear_c0_guest_cause(cop0, CAUSEF_BD);
+
+		kvm_debug("Delivering MSADIS @ pc %#lx\n", arch->pc);
+
+		kvm_change_c0_guest_cause(cop0, (0xff),
+					  (T_MSADIS << CAUSEB_EXCCODE));
+
+		/* Set PC to the exception entry point */
+		arch->pc = KVM_GUEST_KSEG0 + 0x180;
+
+	} else {
+		kvm_err("Trying to deliver MSADIS when EXL is already set\n");
+		er = EMULATE_FAIL;
+	}
+
+	return er;
+}
+
 /* ll/sc, rdhwr, sync emulation */
 
 #define OPCODE 0xfc000000
@@ -2176,6 +2490,10 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause,
 		case T_SYSCALL:
 		case T_BREAK:
 		case T_RES_INST:
+		case T_TRAP:
+		case T_MSAFPE:
+		case T_FPE:
+		case T_MSADIS:
 			break;
 
 		case T_COP_UNUSABLE:
diff --git a/arch/mips/kvm/fpu.S b/arch/mips/kvm/fpu.S
new file mode 100644
index 000000000000..531fbf5131c0
--- /dev/null
+++ b/arch/mips/kvm/fpu.S
@@ -0,0 +1,122 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * FPU context handling code for KVM.
+ *
+ * Copyright (C) 2015 Imagination Technologies Ltd.
+ */
+
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/fpregdef.h>
+#include <asm/mipsregs.h>
+#include <asm/regdef.h>
+
+	.set	noreorder
+	.set	noat
+
+LEAF(__kvm_save_fpu)
+	.set	push
+	.set	mips64r2
+	SET_HARDFLOAT
+	mfc0	t0, CP0_STATUS
+	sll     t0, t0, 5			# is Status.FR set?
+	bgez    t0, 1f				# no: skip odd doubles
+	 nop
+	sdc1	$f1,  VCPU_FPR1(a0)
+	sdc1	$f3,  VCPU_FPR3(a0)
+	sdc1	$f5,  VCPU_FPR5(a0)
+	sdc1	$f7,  VCPU_FPR7(a0)
+	sdc1	$f9,  VCPU_FPR9(a0)
+	sdc1	$f11, VCPU_FPR11(a0)
+	sdc1	$f13, VCPU_FPR13(a0)
+	sdc1	$f15, VCPU_FPR15(a0)
+	sdc1	$f17, VCPU_FPR17(a0)
+	sdc1	$f19, VCPU_FPR19(a0)
+	sdc1	$f21, VCPU_FPR21(a0)
+	sdc1	$f23, VCPU_FPR23(a0)
+	sdc1	$f25, VCPU_FPR25(a0)
+	sdc1	$f27, VCPU_FPR27(a0)
+	sdc1	$f29, VCPU_FPR29(a0)
+	sdc1	$f31, VCPU_FPR31(a0)
+1:	sdc1	$f0,  VCPU_FPR0(a0)
+	sdc1	$f2,  VCPU_FPR2(a0)
+	sdc1	$f4,  VCPU_FPR4(a0)
+	sdc1	$f6,  VCPU_FPR6(a0)
+	sdc1	$f8,  VCPU_FPR8(a0)
+	sdc1	$f10, VCPU_FPR10(a0)
+	sdc1	$f12, VCPU_FPR12(a0)
+	sdc1	$f14, VCPU_FPR14(a0)
+	sdc1	$f16, VCPU_FPR16(a0)
+	sdc1	$f18, VCPU_FPR18(a0)
+	sdc1	$f20, VCPU_FPR20(a0)
+	sdc1	$f22, VCPU_FPR22(a0)
+	sdc1	$f24, VCPU_FPR24(a0)
+	sdc1	$f26, VCPU_FPR26(a0)
+	sdc1	$f28, VCPU_FPR28(a0)
+	jr	ra
+	 sdc1	$f30, VCPU_FPR30(a0)
+	.set	pop
+	END(__kvm_save_fpu)
+
+LEAF(__kvm_restore_fpu)
+	.set	push
+	.set	mips64r2
+	SET_HARDFLOAT
+	mfc0	t0, CP0_STATUS
+	sll     t0, t0, 5			# is Status.FR set?
+	bgez    t0, 1f				# no: skip odd doubles
+	 nop
+	ldc1	$f1,  VCPU_FPR1(a0)
+	ldc1	$f3,  VCPU_FPR3(a0)
+	ldc1	$f5,  VCPU_FPR5(a0)
+	ldc1	$f7,  VCPU_FPR7(a0)
+	ldc1	$f9,  VCPU_FPR9(a0)
+	ldc1	$f11, VCPU_FPR11(a0)
+	ldc1	$f13, VCPU_FPR13(a0)
+	ldc1	$f15, VCPU_FPR15(a0)
+	ldc1	$f17, VCPU_FPR17(a0)
+	ldc1	$f19, VCPU_FPR19(a0)
+	ldc1	$f21, VCPU_FPR21(a0)
+	ldc1	$f23, VCPU_FPR23(a0)
+	ldc1	$f25, VCPU_FPR25(a0)
+	ldc1	$f27, VCPU_FPR27(a0)
+	ldc1	$f29, VCPU_FPR29(a0)
+	ldc1	$f31, VCPU_FPR31(a0)
+1:	ldc1	$f0,  VCPU_FPR0(a0)
+	ldc1	$f2,  VCPU_FPR2(a0)
+	ldc1	$f4,  VCPU_FPR4(a0)
+	ldc1	$f6,  VCPU_FPR6(a0)
+	ldc1	$f8,  VCPU_FPR8(a0)
+	ldc1	$f10, VCPU_FPR10(a0)
+	ldc1	$f12, VCPU_FPR12(a0)
+	ldc1	$f14, VCPU_FPR14(a0)
+	ldc1	$f16, VCPU_FPR16(a0)
+	ldc1	$f18, VCPU_FPR18(a0)
+	ldc1	$f20, VCPU_FPR20(a0)
+	ldc1	$f22, VCPU_FPR22(a0)
+	ldc1	$f24, VCPU_FPR24(a0)
+	ldc1	$f26, VCPU_FPR26(a0)
+	ldc1	$f28, VCPU_FPR28(a0)
+	jr	ra
+	 ldc1	$f30, VCPU_FPR30(a0)
+	.set	pop
+	END(__kvm_restore_fpu)
+
+LEAF(__kvm_restore_fcsr)
+	.set	push
+	SET_HARDFLOAT
+	lw	t0, VCPU_FCR31(a0)
+	/*
+	 * The ctc1 must stay at this offset in __kvm_restore_fcsr.
+	 * See kvm_mips_csr_die_notify() which handles t0 containing a value
+	 * which triggers an FP Exception, which must be stepped over and
+	 * ignored since the set cause bits must remain there for the guest.
+	 */
+	ctc1	t0, fcr31
+	jr	ra
+	 nop
+	.set	pop
+	END(__kvm_restore_fcsr)
diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S
index 4a68b176d6e4..c567240386a0 100644
--- a/arch/mips/kvm/locore.S
+++ b/arch/mips/kvm/locore.S
@@ -36,6 +36,8 @@
 #define PT_HOST_USERLOCAL   PT_EPC
 
 #define CP0_DDATA_LO        $28,3
+#define CP0_CONFIG3         $16,3
+#define CP0_CONFIG5         $16,5
 #define CP0_EBASE           $15,1
 
 #define CP0_INTCTL          $12,1
@@ -353,6 +355,42 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra)
 	LONG_L	k0, VCPU_HOST_EBASE(k1)
 	mtc0	k0,CP0_EBASE
 
+	/*
+	 * If FPU is enabled, save FCR31 and clear it so that later ctc1's don't
+	 * trigger FPE for pending exceptions.
+	 */
+	.set	at
+	and	v1, v0, ST0_CU1
+	beqz	v1, 1f
+	 nop
+	.set	push
+	SET_HARDFLOAT
+	cfc1	t0, fcr31
+	sw	t0, VCPU_FCR31(k1)
+	ctc1	zero,fcr31
+	.set	pop
+	.set	noat
+1:
+
+#ifdef CONFIG_CPU_HAS_MSA
+	/*
+	 * If MSA is enabled, save MSACSR and clear it so that later
+	 * instructions don't trigger MSAFPE for pending exceptions.
+	 */
+	mfc0	t0, CP0_CONFIG3
+	ext	t0, t0, 28, 1 /* MIPS_CONF3_MSAP */
+	beqz	t0, 1f
+	 nop
+	mfc0	t0, CP0_CONFIG5
+	ext	t0, t0, 27, 1 /* MIPS_CONF5_MSAEN */
+	beqz	t0, 1f
+	 nop
+	_cfcmsa	t0, MSA_CSR
+	sw	t0, VCPU_MSA_CSR(k1)
+	_ctcmsa	MSA_CSR, zero
+1:
+#endif
+
 	/* Now that the new EBASE has been loaded, unset BEV and KSU_USER */
 	.set	at
 	and	v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE)
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index c9eccf5df912..bb68e8d520e8 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -11,6 +11,7 @@
 
 #include <linux/errno.h>
 #include <linux/err.h>
+#include <linux/kdebug.h>
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
@@ -48,6 +49,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "syscall",	  VCPU_STAT(syscall_exits),	 KVM_STAT_VCPU },
 	{ "resvd_inst",	  VCPU_STAT(resvd_inst_exits),	 KVM_STAT_VCPU },
 	{ "break_inst",	  VCPU_STAT(break_inst_exits),	 KVM_STAT_VCPU },
+	{ "trap_inst",	  VCPU_STAT(trap_inst_exits),	 KVM_STAT_VCPU },
+	{ "msa_fpe",	  VCPU_STAT(msa_fpe_exits),	 KVM_STAT_VCPU },
+	{ "fpe",	  VCPU_STAT(fpe_exits),		 KVM_STAT_VCPU },
+	{ "msa_disabled", VCPU_STAT(msa_disabled_exits), KVM_STAT_VCPU },
 	{ "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU },
 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU },
 	{ "halt_wakeup",  VCPU_STAT(halt_wakeup),	 KVM_STAT_VCPU },
@@ -504,10 +509,13 @@ static u64 kvm_mips_get_one_regs[] = {
 	KVM_REG_MIPS_CP0_STATUS,
 	KVM_REG_MIPS_CP0_CAUSE,
 	KVM_REG_MIPS_CP0_EPC,
+	KVM_REG_MIPS_CP0_PRID,
 	KVM_REG_MIPS_CP0_CONFIG,
 	KVM_REG_MIPS_CP0_CONFIG1,
 	KVM_REG_MIPS_CP0_CONFIG2,
 	KVM_REG_MIPS_CP0_CONFIG3,
+	KVM_REG_MIPS_CP0_CONFIG4,
+	KVM_REG_MIPS_CP0_CONFIG5,
 	KVM_REG_MIPS_CP0_CONFIG7,
 	KVM_REG_MIPS_CP0_ERROREPC,
 
@@ -520,10 +528,14 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
 			    const struct kvm_one_reg *reg)
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	struct mips_fpu_struct *fpu = &vcpu->arch.fpu;
 	int ret;
 	s64 v;
+	s64 vs[2];
+	unsigned int idx;
 
 	switch (reg->id) {
+	/* General purpose registers */
 	case KVM_REG_MIPS_R0 ... KVM_REG_MIPS_R31:
 		v = (long)vcpu->arch.gprs[reg->id - KVM_REG_MIPS_R0];
 		break;
@@ -537,6 +549,67 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
 		v = (long)vcpu->arch.pc;
 		break;
 
+	/* Floating point registers */
+	case KVM_REG_MIPS_FPR_32(0) ... KVM_REG_MIPS_FPR_32(31):
+		if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+			return -EINVAL;
+		idx = reg->id - KVM_REG_MIPS_FPR_32(0);
+		/* Odd singles in top of even double when FR=0 */
+		if (kvm_read_c0_guest_status(cop0) & ST0_FR)
+			v = get_fpr32(&fpu->fpr[idx], 0);
+		else
+			v = get_fpr32(&fpu->fpr[idx & ~1], idx & 1);
+		break;
+	case KVM_REG_MIPS_FPR_64(0) ... KVM_REG_MIPS_FPR_64(31):
+		if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+			return -EINVAL;
+		idx = reg->id - KVM_REG_MIPS_FPR_64(0);
+		/* Can't access odd doubles in FR=0 mode */
+		if (idx & 1 && !(kvm_read_c0_guest_status(cop0) & ST0_FR))
+			return -EINVAL;
+		v = get_fpr64(&fpu->fpr[idx], 0);
+		break;
+	case KVM_REG_MIPS_FCR_IR:
+		if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+			return -EINVAL;
+		v = boot_cpu_data.fpu_id;
+		break;
+	case KVM_REG_MIPS_FCR_CSR:
+		if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+			return -EINVAL;
+		v = fpu->fcr31;
+		break;
+
+	/* MIPS SIMD Architecture (MSA) registers */
+	case KVM_REG_MIPS_VEC_128(0) ... KVM_REG_MIPS_VEC_128(31):
+		if (!kvm_mips_guest_has_msa(&vcpu->arch))
+			return -EINVAL;
+		/* Can't access MSA registers in FR=0 mode */
+		if (!(kvm_read_c0_guest_status(cop0) & ST0_FR))
+			return -EINVAL;
+		idx = reg->id - KVM_REG_MIPS_VEC_128(0);
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+		/* least significant byte first */
+		vs[0] = get_fpr64(&fpu->fpr[idx], 0);
+		vs[1] = get_fpr64(&fpu->fpr[idx], 1);
+#else
+		/* most significant byte first */
+		vs[0] = get_fpr64(&fpu->fpr[idx], 1);
+		vs[1] = get_fpr64(&fpu->fpr[idx], 0);
+#endif
+		break;
+	case KVM_REG_MIPS_MSA_IR:
+		if (!kvm_mips_guest_has_msa(&vcpu->arch))
+			return -EINVAL;
+		v = boot_cpu_data.msa_id;
+		break;
+	case KVM_REG_MIPS_MSA_CSR:
+		if (!kvm_mips_guest_has_msa(&vcpu->arch))
+			return -EINVAL;
+		v = fpu->msacsr;
+		break;
+
+	/* Co-processor 0 registers */
 	case KVM_REG_MIPS_CP0_INDEX:
 		v = (long)kvm_read_c0_guest_index(cop0);
 		break;
@@ -573,8 +646,8 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
 	case KVM_REG_MIPS_CP0_EPC:
 		v = (long)kvm_read_c0_guest_epc(cop0);
 		break;
-	case KVM_REG_MIPS_CP0_ERROREPC:
-		v = (long)kvm_read_c0_guest_errorepc(cop0);
+	case KVM_REG_MIPS_CP0_PRID:
+		v = (long)kvm_read_c0_guest_prid(cop0);
 		break;
 	case KVM_REG_MIPS_CP0_CONFIG:
 		v = (long)kvm_read_c0_guest_config(cop0);
@@ -588,9 +661,18 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
 	case KVM_REG_MIPS_CP0_CONFIG3:
 		v = (long)kvm_read_c0_guest_config3(cop0);
 		break;
+	case KVM_REG_MIPS_CP0_CONFIG4:
+		v = (long)kvm_read_c0_guest_config4(cop0);
+		break;
+	case KVM_REG_MIPS_CP0_CONFIG5:
+		v = (long)kvm_read_c0_guest_config5(cop0);
+		break;
 	case KVM_REG_MIPS_CP0_CONFIG7:
 		v = (long)kvm_read_c0_guest_config7(cop0);
 		break;
+	case KVM_REG_MIPS_CP0_ERROREPC:
+		v = (long)kvm_read_c0_guest_errorepc(cop0);
+		break;
 	/* registers to be handled specially */
 	case KVM_REG_MIPS_CP0_COUNT:
 	case KVM_REG_MIPS_COUNT_CTL:
@@ -612,6 +694,10 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
 		u32 v32 = (u32)v;
 
 		return put_user(v32, uaddr32);
+	} else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) {
+		void __user *uaddr = (void __user *)(long)reg->addr;
+
+		return copy_to_user(uaddr, vs, 16);
 	} else {
 		return -EINVAL;
 	}
@@ -621,7 +707,10 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
 			    const struct kvm_one_reg *reg)
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
-	u64 v;
+	struct mips_fpu_struct *fpu = &vcpu->arch.fpu;
+	s64 v;
+	s64 vs[2];
+	unsigned int idx;
 
 	if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64) {
 		u64 __user *uaddr64 = (u64 __user *)(long)reg->addr;
@@ -635,11 +724,16 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
 		if (get_user(v32, uaddr32) != 0)
 			return -EFAULT;
 		v = (s64)v32;
+	} else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) {
+		void __user *uaddr = (void __user *)(long)reg->addr;
+
+		return copy_from_user(vs, uaddr, 16);
 	} else {
 		return -EINVAL;
 	}
 
 	switch (reg->id) {
+	/* General purpose registers */
 	case KVM_REG_MIPS_R0:
 		/* Silently ignore requests to set $0 */
 		break;
@@ -656,6 +750,64 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
 		vcpu->arch.pc = v;
 		break;
 
+	/* Floating point registers */
+	case KVM_REG_MIPS_FPR_32(0) ... KVM_REG_MIPS_FPR_32(31):
+		if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+			return -EINVAL;
+		idx = reg->id - KVM_REG_MIPS_FPR_32(0);
+		/* Odd singles in top of even double when FR=0 */
+		if (kvm_read_c0_guest_status(cop0) & ST0_FR)
+			set_fpr32(&fpu->fpr[idx], 0, v);
+		else
+			set_fpr32(&fpu->fpr[idx & ~1], idx & 1, v);
+		break;
+	case KVM_REG_MIPS_FPR_64(0) ... KVM_REG_MIPS_FPR_64(31):
+		if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+			return -EINVAL;
+		idx = reg->id - KVM_REG_MIPS_FPR_64(0);
+		/* Can't access odd doubles in FR=0 mode */
+		if (idx & 1 && !(kvm_read_c0_guest_status(cop0) & ST0_FR))
+			return -EINVAL;
+		set_fpr64(&fpu->fpr[idx], 0, v);
+		break;
+	case KVM_REG_MIPS_FCR_IR:
+		if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+			return -EINVAL;
+		/* Read-only */
+		break;
+	case KVM_REG_MIPS_FCR_CSR:
+		if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+			return -EINVAL;
+		fpu->fcr31 = v;
+		break;
+
+	/* MIPS SIMD Architecture (MSA) registers */
+	case KVM_REG_MIPS_VEC_128(0) ... KVM_REG_MIPS_VEC_128(31):
+		if (!kvm_mips_guest_has_msa(&vcpu->arch))
+			return -EINVAL;
+		idx = reg->id - KVM_REG_MIPS_VEC_128(0);
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+		/* least significant byte first */
+		set_fpr64(&fpu->fpr[idx], 0, vs[0]);
+		set_fpr64(&fpu->fpr[idx], 1, vs[1]);
+#else
+		/* most significant byte first */
+		set_fpr64(&fpu->fpr[idx], 1, vs[0]);
+		set_fpr64(&fpu->fpr[idx], 0, vs[1]);
+#endif
+		break;
+	case KVM_REG_MIPS_MSA_IR:
+		if (!kvm_mips_guest_has_msa(&vcpu->arch))
+			return -EINVAL;
+		/* Read-only */
+		break;
+	case KVM_REG_MIPS_MSA_CSR:
+		if (!kvm_mips_guest_has_msa(&vcpu->arch))
+			return -EINVAL;
+		fpu->msacsr = v;
+		break;
+
+	/* Co-processor 0 registers */
 	case KVM_REG_MIPS_CP0_INDEX:
 		kvm_write_c0_guest_index(cop0, v);
 		break;
@@ -686,6 +838,9 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
 	case KVM_REG_MIPS_CP0_EPC:
 		kvm_write_c0_guest_epc(cop0, v);
 		break;
+	case KVM_REG_MIPS_CP0_PRID:
+		kvm_write_c0_guest_prid(cop0, v);
+		break;
 	case KVM_REG_MIPS_CP0_ERROREPC:
 		kvm_write_c0_guest_errorepc(cop0, v);
 		break;
@@ -693,6 +848,12 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
 	case KVM_REG_MIPS_CP0_COUNT:
 	case KVM_REG_MIPS_CP0_COMPARE:
 	case KVM_REG_MIPS_CP0_CAUSE:
+	case KVM_REG_MIPS_CP0_CONFIG:
+	case KVM_REG_MIPS_CP0_CONFIG1:
+	case KVM_REG_MIPS_CP0_CONFIG2:
+	case KVM_REG_MIPS_CP0_CONFIG3:
+	case KVM_REG_MIPS_CP0_CONFIG4:
+	case KVM_REG_MIPS_CP0_CONFIG5:
 	case KVM_REG_MIPS_COUNT_CTL:
 	case KVM_REG_MIPS_COUNT_RESUME:
 	case KVM_REG_MIPS_COUNT_HZ:
@@ -703,6 +864,33 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
+				     struct kvm_enable_cap *cap)
+{
+	int r = 0;
+
+	if (!kvm_vm_ioctl_check_extension(vcpu->kvm, cap->cap))
+		return -EINVAL;
+	if (cap->flags)
+		return -EINVAL;
+	if (cap->args[0])
+		return -EINVAL;
+
+	switch (cap->cap) {
+	case KVM_CAP_MIPS_FPU:
+		vcpu->arch.fpu_enabled = true;
+		break;
+	case KVM_CAP_MIPS_MSA:
+		vcpu->arch.msa_enabled = true;
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
 			 unsigned long arg)
 {
@@ -760,6 +948,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
 			r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
 			break;
 		}
+	case KVM_ENABLE_CAP: {
+		struct kvm_enable_cap cap;
+
+		r = -EFAULT;
+		if (copy_from_user(&cap, argp, sizeof(cap)))
+			goto out;
+		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
+		break;
+	}
 	default:
 		r = -ENOIOCTLCMD;
 	}
@@ -868,11 +1065,30 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 
 	switch (ext) {
 	case KVM_CAP_ONE_REG:
+	case KVM_CAP_ENABLE_CAP:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
 		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
 		break;
+	case KVM_CAP_MIPS_FPU:
+		r = !!cpu_has_fpu;
+		break;
+	case KVM_CAP_MIPS_MSA:
+		/*
+		 * We don't support MSA vector partitioning yet:
+		 * 1) It would require explicit support which can't be tested
+		 *    yet due to lack of support in current hardware.
+		 * 2) It extends the state that would need to be saved/restored
+		 *    by e.g. QEMU for migration.
+		 *
+		 * When vector partitioning hardware becomes available, support
+		 * could be added by requiring a flag when enabling
+		 * KVM_CAP_MIPS_MSA capability to indicate that userland knows
+		 * to save/restore the appropriate extra state.
+		 */
+		r = cpu_has_msa && !(boot_cpu_data.msa_id & MSA_IR_WRPF);
+		break;
 	default:
 		r = 0;
 		break;
@@ -1119,6 +1335,30 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		ret = kvm_mips_callbacks->handle_break(vcpu);
 		break;
 
+	case T_TRAP:
+		++vcpu->stat.trap_inst_exits;
+		trace_kvm_exit(vcpu, TRAP_INST_EXITS);
+		ret = kvm_mips_callbacks->handle_trap(vcpu);
+		break;
+
+	case T_MSAFPE:
+		++vcpu->stat.msa_fpe_exits;
+		trace_kvm_exit(vcpu, MSA_FPE_EXITS);
+		ret = kvm_mips_callbacks->handle_msa_fpe(vcpu);
+		break;
+
+	case T_FPE:
+		++vcpu->stat.fpe_exits;
+		trace_kvm_exit(vcpu, FPE_EXITS);
+		ret = kvm_mips_callbacks->handle_fpe(vcpu);
+		break;
+
+	case T_MSADIS:
+		++vcpu->stat.msa_disabled_exits;
+		trace_kvm_exit(vcpu, MSA_DISABLED_EXITS);
+		ret = kvm_mips_callbacks->handle_msa_disabled(vcpu);
+		break;
+
 	default:
 		kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x  BadVaddr: %#lx Status: %#lx\n",
 			exccode, opc, kvm_get_inst(opc, vcpu), badvaddr,
@@ -1146,12 +1386,233 @@ skip_emul:
 		}
 	}
 
+	if (ret == RESUME_GUEST) {
+		/*
+		 * If FPU / MSA are enabled (i.e. the guest's FPU / MSA context
+		 * is live), restore FCR31 / MSACSR.
+		 *
+		 * This should be before returning to the guest exception
+		 * vector, as it may well cause an [MSA] FP exception if there
+		 * are pending exception bits unmasked. (see
+		 * kvm_mips_csr_die_notifier() for how that is handled).
+		 */
+		if (kvm_mips_guest_has_fpu(&vcpu->arch) &&
+		    read_c0_status() & ST0_CU1)
+			__kvm_restore_fcsr(&vcpu->arch);
+
+		if (kvm_mips_guest_has_msa(&vcpu->arch) &&
+		    read_c0_config5() & MIPS_CONF5_MSAEN)
+			__kvm_restore_msacsr(&vcpu->arch);
+	}
+
 	/* Disable HTW before returning to guest or host */
 	htw_stop();
 
 	return ret;
 }
 
+/* Enable FPU for guest and restore context */
+void kvm_own_fpu(struct kvm_vcpu *vcpu)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	unsigned int sr, cfg5;
+
+	preempt_disable();
+
+	sr = kvm_read_c0_guest_status(cop0);
+
+	/*
+	 * If MSA state is already live, it is undefined how it interacts with
+	 * FR=0 FPU state, and we don't want to hit reserved instruction
+	 * exceptions trying to save the MSA state later when CU=1 && FR=1, so
+	 * play it safe and save it first.
+	 *
+	 * In theory we shouldn't ever hit this case since kvm_lose_fpu() should
+	 * get called when guest CU1 is set, however we can't trust the guest
+	 * not to clobber the status register directly via the commpage.
+	 */
+	if (cpu_has_msa && sr & ST0_CU1 && !(sr & ST0_FR) &&
+	    vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA)
+		kvm_lose_fpu(vcpu);
+
+	/*
+	 * Enable FPU for guest
+	 * We set FR and FRE according to guest context
+	 */
+	change_c0_status(ST0_CU1 | ST0_FR, sr);
+	if (cpu_has_fre) {
+		cfg5 = kvm_read_c0_guest_config5(cop0);
+		change_c0_config5(MIPS_CONF5_FRE, cfg5);
+	}
+	enable_fpu_hazard();
+
+	/* If guest FPU state not active, restore it now */
+	if (!(vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)) {
+		__kvm_restore_fpu(&vcpu->arch);
+		vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_FPU;
+	}
+
+	preempt_enable();
+}
+
+#ifdef CONFIG_CPU_HAS_MSA
+/* Enable MSA for guest and restore context */
+void kvm_own_msa(struct kvm_vcpu *vcpu)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	unsigned int sr, cfg5;
+
+	preempt_disable();
+
+	/*
+	 * Enable FPU if enabled in guest, since we're restoring FPU context
+	 * anyway. We set FR and FRE according to guest context.
+	 */
+	if (kvm_mips_guest_has_fpu(&vcpu->arch)) {
+		sr = kvm_read_c0_guest_status(cop0);
+
+		/*
+		 * If FR=0 FPU state is already live, it is undefined how it
+		 * interacts with MSA state, so play it safe and save it first.
+		 */
+		if (!(sr & ST0_FR) &&
+		    (vcpu->arch.fpu_inuse & (KVM_MIPS_FPU_FPU |
+				KVM_MIPS_FPU_MSA)) == KVM_MIPS_FPU_FPU)
+			kvm_lose_fpu(vcpu);
+
+		change_c0_status(ST0_CU1 | ST0_FR, sr);
+		if (sr & ST0_CU1 && cpu_has_fre) {
+			cfg5 = kvm_read_c0_guest_config5(cop0);
+			change_c0_config5(MIPS_CONF5_FRE, cfg5);
+		}
+	}
+
+	/* Enable MSA for guest */
+	set_c0_config5(MIPS_CONF5_MSAEN);
+	enable_fpu_hazard();
+
+	switch (vcpu->arch.fpu_inuse & (KVM_MIPS_FPU_FPU | KVM_MIPS_FPU_MSA)) {
+	case KVM_MIPS_FPU_FPU:
+		/*
+		 * Guest FPU state already loaded, only restore upper MSA state
+		 */
+		__kvm_restore_msa_upper(&vcpu->arch);
+		vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_MSA;
+		break;
+	case 0:
+		/* Neither FPU or MSA already active, restore full MSA state */
+		__kvm_restore_msa(&vcpu->arch);
+		vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_MSA;
+		if (kvm_mips_guest_has_fpu(&vcpu->arch))
+			vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_FPU;
+		break;
+	default:
+		break;
+	}
+
+	preempt_enable();
+}
+#endif
+
+/* Drop FPU & MSA without saving it */
+void kvm_drop_fpu(struct kvm_vcpu *vcpu)
+{
+	preempt_disable();
+	if (cpu_has_msa && vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) {
+		disable_msa();
+		vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_MSA;
+	}
+	if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) {
+		clear_c0_status(ST0_CU1 | ST0_FR);
+		vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_FPU;
+	}
+	preempt_enable();
+}
+
+/* Save and disable FPU & MSA */
+void kvm_lose_fpu(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * FPU & MSA get disabled in root context (hardware) when it is disabled
+	 * in guest context (software), but the register state in the hardware
+	 * may still be in use. This is why we explicitly re-enable the hardware
+	 * before saving.
+	 */
+
+	preempt_disable();
+	if (cpu_has_msa && vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) {
+		set_c0_config5(MIPS_CONF5_MSAEN);
+		enable_fpu_hazard();
+
+		__kvm_save_msa(&vcpu->arch);
+
+		/* Disable MSA & FPU */
+		disable_msa();
+		if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)
+			clear_c0_status(ST0_CU1 | ST0_FR);
+		vcpu->arch.fpu_inuse &= ~(KVM_MIPS_FPU_FPU | KVM_MIPS_FPU_MSA);
+	} else if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) {
+		set_c0_status(ST0_CU1);
+		enable_fpu_hazard();
+
+		__kvm_save_fpu(&vcpu->arch);
+		vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_FPU;
+
+		/* Disable FPU */
+		clear_c0_status(ST0_CU1 | ST0_FR);
+	}
+	preempt_enable();
+}
+
+/*
+ * Step over a specific ctc1 to FCSR and a specific ctcmsa to MSACSR which are
+ * used to restore guest FCSR/MSACSR state and may trigger a "harmless" FP/MSAFP
+ * exception if cause bits are set in the value being written.
+ */
+static int kvm_mips_csr_die_notify(struct notifier_block *self,
+				   unsigned long cmd, void *ptr)
+{
+	struct die_args *args = (struct die_args *)ptr;
+	struct pt_regs *regs = args->regs;
+	unsigned long pc;
+
+	/* Only interested in FPE and MSAFPE */
+	if (cmd != DIE_FP && cmd != DIE_MSAFP)
+		return NOTIFY_DONE;
+
+	/* Return immediately if guest context isn't active */
+	if (!(current->flags & PF_VCPU))
+		return NOTIFY_DONE;
+
+	/* Should never get here from user mode */
+	BUG_ON(user_mode(regs));
+
+	pc = instruction_pointer(regs);
+	switch (cmd) {
+	case DIE_FP:
+		/* match 2nd instruction in __kvm_restore_fcsr */
+		if (pc != (unsigned long)&__kvm_restore_fcsr + 4)
+			return NOTIFY_DONE;
+		break;
+	case DIE_MSAFP:
+		/* match 2nd/3rd instruction in __kvm_restore_msacsr */
+		if (!cpu_has_msa ||
+		    pc < (unsigned long)&__kvm_restore_msacsr + 4 ||
+		    pc > (unsigned long)&__kvm_restore_msacsr + 8)
+			return NOTIFY_DONE;
+		break;
+	}
+
+	/* Move PC forward a little and continue executing */
+	instruction_pointer(regs) += 4;
+
+	return NOTIFY_STOP;
+}
+
+static struct notifier_block kvm_mips_csr_die_notifier = {
+	.notifier_call = kvm_mips_csr_die_notify,
+};
+
 int __init kvm_mips_init(void)
 {
 	int ret;
@@ -1161,6 +1622,8 @@ int __init kvm_mips_init(void)
 	if (ret)
 		return ret;
 
+	register_die_notifier(&kvm_mips_csr_die_notifier);
+
 	/*
 	 * On MIPS, kernel modules are executed from "mapped space", which
 	 * requires TLBs. The TLB handling code is statically linked with
@@ -1173,7 +1636,6 @@ int __init kvm_mips_init(void)
 	kvm_mips_release_pfn_clean = kvm_release_pfn_clean;
 	kvm_mips_is_error_pfn = is_error_pfn;
 
-	pr_info("KVM/MIPS Initialized\n");
 	return 0;
 }
 
@@ -1185,7 +1647,7 @@ void __exit kvm_mips_exit(void)
 	kvm_mips_release_pfn_clean = NULL;
 	kvm_mips_is_error_pfn = NULL;
 
-	pr_info("KVM/MIPS unloaded\n");
+	unregister_die_notifier(&kvm_mips_csr_die_notifier);
 }
 
 module_init(kvm_mips_init);
diff --git a/arch/mips/kvm/msa.S b/arch/mips/kvm/msa.S
new file mode 100644
index 000000000000..d02f0c6cc2cc
--- /dev/null
+++ b/arch/mips/kvm/msa.S
@@ -0,0 +1,161 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * MIPS SIMD Architecture (MSA) context handling code for KVM.
+ *
+ * Copyright (C) 2015 Imagination Technologies Ltd.
+ */
+
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/asmmacro.h>
+#include <asm/regdef.h>
+
+	.set	noreorder
+	.set	noat
+
+LEAF(__kvm_save_msa)
+	st_d	0,  VCPU_FPR0,  a0
+	st_d	1,  VCPU_FPR1,  a0
+	st_d	2,  VCPU_FPR2,  a0
+	st_d	3,  VCPU_FPR3,  a0
+	st_d	4,  VCPU_FPR4,  a0
+	st_d	5,  VCPU_FPR5,  a0
+	st_d	6,  VCPU_FPR6,  a0
+	st_d	7,  VCPU_FPR7,  a0
+	st_d	8,  VCPU_FPR8,  a0
+	st_d	9,  VCPU_FPR9,  a0
+	st_d	10, VCPU_FPR10, a0
+	st_d	11, VCPU_FPR11, a0
+	st_d	12, VCPU_FPR12, a0
+	st_d	13, VCPU_FPR13, a0
+	st_d	14, VCPU_FPR14, a0
+	st_d	15, VCPU_FPR15, a0
+	st_d	16, VCPU_FPR16, a0
+	st_d	17, VCPU_FPR17, a0
+	st_d	18, VCPU_FPR18, a0
+	st_d	19, VCPU_FPR19, a0
+	st_d	20, VCPU_FPR20, a0
+	st_d	21, VCPU_FPR21, a0
+	st_d	22, VCPU_FPR22, a0
+	st_d	23, VCPU_FPR23, a0
+	st_d	24, VCPU_FPR24, a0
+	st_d	25, VCPU_FPR25, a0
+	st_d	26, VCPU_FPR26, a0
+	st_d	27, VCPU_FPR27, a0
+	st_d	28, VCPU_FPR28, a0
+	st_d	29, VCPU_FPR29, a0
+	st_d	30, VCPU_FPR30, a0
+	st_d	31, VCPU_FPR31, a0
+	jr	ra
+	 nop
+	END(__kvm_save_msa)
+
+LEAF(__kvm_restore_msa)
+	ld_d	0,  VCPU_FPR0,  a0
+	ld_d	1,  VCPU_FPR1,  a0
+	ld_d	2,  VCPU_FPR2,  a0
+	ld_d	3,  VCPU_FPR3,  a0
+	ld_d	4,  VCPU_FPR4,  a0
+	ld_d	5,  VCPU_FPR5,  a0
+	ld_d	6,  VCPU_FPR6,  a0
+	ld_d	7,  VCPU_FPR7,  a0
+	ld_d	8,  VCPU_FPR8,  a0
+	ld_d	9,  VCPU_FPR9,  a0
+	ld_d	10, VCPU_FPR10, a0
+	ld_d	11, VCPU_FPR11, a0
+	ld_d	12, VCPU_FPR12, a0
+	ld_d	13, VCPU_FPR13, a0
+	ld_d	14, VCPU_FPR14, a0
+	ld_d	15, VCPU_FPR15, a0
+	ld_d	16, VCPU_FPR16, a0
+	ld_d	17, VCPU_FPR17, a0
+	ld_d	18, VCPU_FPR18, a0
+	ld_d	19, VCPU_FPR19, a0
+	ld_d	20, VCPU_FPR20, a0
+	ld_d	21, VCPU_FPR21, a0
+	ld_d	22, VCPU_FPR22, a0
+	ld_d	23, VCPU_FPR23, a0
+	ld_d	24, VCPU_FPR24, a0
+	ld_d	25, VCPU_FPR25, a0
+	ld_d	26, VCPU_FPR26, a0
+	ld_d	27, VCPU_FPR27, a0
+	ld_d	28, VCPU_FPR28, a0
+	ld_d	29, VCPU_FPR29, a0
+	ld_d	30, VCPU_FPR30, a0
+	ld_d	31, VCPU_FPR31, a0
+	jr	ra
+	 nop
+	END(__kvm_restore_msa)
+
+	.macro	kvm_restore_msa_upper	wr, off, base
+	.set	push
+	.set	noat
+#ifdef CONFIG_64BIT
+	ld	$1, \off(\base)
+	insert_d \wr, 1
+#elif defined(CONFIG_CPU_LITTLE_ENDIAN)
+	lw	$1, \off(\base)
+	insert_w \wr, 2
+	lw	$1, (\off+4)(\base)
+	insert_w \wr, 3
+#else /* CONFIG_CPU_BIG_ENDIAN */
+	lw	$1, (\off+4)(\base)
+	insert_w \wr, 2
+	lw	$1, \off(\base)
+	insert_w \wr, 3
+#endif
+	.set	pop
+	.endm
+
+LEAF(__kvm_restore_msa_upper)
+	kvm_restore_msa_upper	0,  VCPU_FPR0 +8, a0
+	kvm_restore_msa_upper	1,  VCPU_FPR1 +8, a0
+	kvm_restore_msa_upper	2,  VCPU_FPR2 +8, a0
+	kvm_restore_msa_upper	3,  VCPU_FPR3 +8, a0
+	kvm_restore_msa_upper	4,  VCPU_FPR4 +8, a0
+	kvm_restore_msa_upper	5,  VCPU_FPR5 +8, a0
+	kvm_restore_msa_upper	6,  VCPU_FPR6 +8, a0
+	kvm_restore_msa_upper	7,  VCPU_FPR7 +8, a0
+	kvm_restore_msa_upper	8,  VCPU_FPR8 +8, a0
+	kvm_restore_msa_upper	9,  VCPU_FPR9 +8, a0
+	kvm_restore_msa_upper	10, VCPU_FPR10+8, a0
+	kvm_restore_msa_upper	11, VCPU_FPR11+8, a0
+	kvm_restore_msa_upper	12, VCPU_FPR12+8, a0
+	kvm_restore_msa_upper	13, VCPU_FPR13+8, a0
+	kvm_restore_msa_upper	14, VCPU_FPR14+8, a0
+	kvm_restore_msa_upper	15, VCPU_FPR15+8, a0
+	kvm_restore_msa_upper	16, VCPU_FPR16+8, a0
+	kvm_restore_msa_upper	17, VCPU_FPR17+8, a0
+	kvm_restore_msa_upper	18, VCPU_FPR18+8, a0
+	kvm_restore_msa_upper	19, VCPU_FPR19+8, a0
+	kvm_restore_msa_upper	20, VCPU_FPR20+8, a0
+	kvm_restore_msa_upper	21, VCPU_FPR21+8, a0
+	kvm_restore_msa_upper	22, VCPU_FPR22+8, a0
+	kvm_restore_msa_upper	23, VCPU_FPR23+8, a0
+	kvm_restore_msa_upper	24, VCPU_FPR24+8, a0
+	kvm_restore_msa_upper	25, VCPU_FPR25+8, a0
+	kvm_restore_msa_upper	26, VCPU_FPR26+8, a0
+	kvm_restore_msa_upper	27, VCPU_FPR27+8, a0
+	kvm_restore_msa_upper	28, VCPU_FPR28+8, a0
+	kvm_restore_msa_upper	29, VCPU_FPR29+8, a0
+	kvm_restore_msa_upper	30, VCPU_FPR30+8, a0
+	kvm_restore_msa_upper	31, VCPU_FPR31+8, a0
+	jr	ra
+	 nop
+	END(__kvm_restore_msa_upper)
+
+LEAF(__kvm_restore_msacsr)
+	lw	t0, VCPU_MSA_CSR(a0)
+	/*
+	 * The ctcmsa must stay at this offset in __kvm_restore_msacsr.
+	 * See kvm_mips_csr_die_notify() which handles t0 containing a value
+	 * which triggers an MSA FP Exception, which must be stepped over and
+	 * ignored since the set cause bits must remain there for the guest.
+	 */
+	_ctcmsa	MSA_CSR, t0
+	jr	ra
+	 nop
+	END(__kvm_restore_msacsr)
diff --git a/arch/mips/kvm/stats.c b/arch/mips/kvm/stats.c
index a74d6024c5ad..888bb67070ac 100644
--- a/arch/mips/kvm/stats.c
+++ b/arch/mips/kvm/stats.c
@@ -25,6 +25,10 @@ char *kvm_mips_exit_types_str[MAX_KVM_MIPS_EXIT_TYPES] = {
 	"System Call",
 	"Reserved Inst",
 	"Break Inst",
+	"Trap Inst",
+	"MSA FPE",
+	"FPE",
+	"MSA Disabled",
 	"D-Cache Flushes",
 };
 
diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c
index b6beb0e07b1b..aed0ac2a4972 100644
--- a/arch/mips/kvm/tlb.c
+++ b/arch/mips/kvm/tlb.c
@@ -733,6 +733,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		}
 	}
 
+	/* restore guest state to registers */
+	kvm_mips_callbacks->vcpu_set_regs(vcpu);
+
 	local_irq_restore(flags);
 
 }
@@ -751,6 +754,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 	vcpu->arch.preempt_entryhi = read_c0_entryhi();
 	vcpu->arch.last_sched_cpu = cpu;
 
+	/* save guest state in registers */
+	kvm_mips_callbacks->vcpu_get_regs(vcpu);
+
 	if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) &
 	     ASID_VERSION_MASK)) {
 		kvm_debug("%s: Dropping MMU Context:  %#lx\n", __func__,
diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c
index fd7257b70e65..d836ed5b0bc7 100644
--- a/arch/mips/kvm/trap_emul.c
+++ b/arch/mips/kvm/trap_emul.c
@@ -39,16 +39,30 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva)
 
 static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu)
 {
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	struct kvm_run *run = vcpu->run;
 	uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc;
 	unsigned long cause = vcpu->arch.host_cp0_cause;
 	enum emulation_result er = EMULATE_DONE;
 	int ret = RESUME_GUEST;
 
-	if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1)
-		er = kvm_mips_emulate_fpu_exc(cause, opc, run, vcpu);
-	else
+	if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1) {
+		/* FPU Unusable */
+		if (!kvm_mips_guest_has_fpu(&vcpu->arch) ||
+		    (kvm_read_c0_guest_status(cop0) & ST0_CU1) == 0) {
+			/*
+			 * Unusable/no FPU in guest:
+			 * deliver guest COP1 Unusable Exception
+			 */
+			er = kvm_mips_emulate_fpu_exc(cause, opc, run, vcpu);
+		} else {
+			/* Restore FPU state */
+			kvm_own_fpu(vcpu);
+			er = EMULATE_DONE;
+		}
+	} else {
 		er = kvm_mips_emulate_inst(cause, opc, run, vcpu);
+	}
 
 	switch (er) {
 	case EMULATE_DONE:
@@ -330,6 +344,107 @@ static int kvm_trap_emul_handle_break(struct kvm_vcpu *vcpu)
 	return ret;
 }
 
+static int kvm_trap_emul_handle_trap(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc;
+	unsigned long cause = vcpu->arch.host_cp0_cause;
+	enum emulation_result er = EMULATE_DONE;
+	int ret = RESUME_GUEST;
+
+	er = kvm_mips_emulate_trap_exc(cause, opc, run, vcpu);
+	if (er == EMULATE_DONE) {
+		ret = RESUME_GUEST;
+	} else {
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		ret = RESUME_HOST;
+	}
+	return ret;
+}
+
+static int kvm_trap_emul_handle_msa_fpe(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc;
+	unsigned long cause = vcpu->arch.host_cp0_cause;
+	enum emulation_result er = EMULATE_DONE;
+	int ret = RESUME_GUEST;
+
+	er = kvm_mips_emulate_msafpe_exc(cause, opc, run, vcpu);
+	if (er == EMULATE_DONE) {
+		ret = RESUME_GUEST;
+	} else {
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		ret = RESUME_HOST;
+	}
+	return ret;
+}
+
+static int kvm_trap_emul_handle_fpe(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc;
+	unsigned long cause = vcpu->arch.host_cp0_cause;
+	enum emulation_result er = EMULATE_DONE;
+	int ret = RESUME_GUEST;
+
+	er = kvm_mips_emulate_fpe_exc(cause, opc, run, vcpu);
+	if (er == EMULATE_DONE) {
+		ret = RESUME_GUEST;
+	} else {
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		ret = RESUME_HOST;
+	}
+	return ret;
+}
+
+/**
+ * kvm_trap_emul_handle_msa_disabled() - Guest used MSA while disabled in root.
+ * @vcpu:	Virtual CPU context.
+ *
+ * Handle when the guest attempts to use MSA when it is disabled.
+ */
+static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	struct kvm_run *run = vcpu->run;
+	uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc;
+	unsigned long cause = vcpu->arch.host_cp0_cause;
+	enum emulation_result er = EMULATE_DONE;
+	int ret = RESUME_GUEST;
+
+	if (!kvm_mips_guest_has_msa(&vcpu->arch) ||
+	    (kvm_read_c0_guest_status(cop0) & (ST0_CU1 | ST0_FR)) == ST0_CU1) {
+		/*
+		 * No MSA in guest, or FPU enabled and not in FR=1 mode,
+		 * guest reserved instruction exception
+		 */
+		er = kvm_mips_emulate_ri_exc(cause, opc, run, vcpu);
+	} else if (!(kvm_read_c0_guest_config5(cop0) & MIPS_CONF5_MSAEN)) {
+		/* MSA disabled by guest, guest MSA disabled exception */
+		er = kvm_mips_emulate_msadis_exc(cause, opc, run, vcpu);
+	} else {
+		/* Restore MSA/FPU state */
+		kvm_own_msa(vcpu);
+		er = EMULATE_DONE;
+	}
+
+	switch (er) {
+	case EMULATE_DONE:
+		ret = RESUME_GUEST;
+		break;
+
+	case EMULATE_FAIL:
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		ret = RESUME_HOST;
+		break;
+
+	default:
+		BUG();
+	}
+	return ret;
+}
+
 static int kvm_trap_emul_vm_init(struct kvm *kvm)
 {
 	return 0;
@@ -351,8 +466,9 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
 	 * guest will come up as expected, for now we simulate a MIPS 24kc
 	 */
 	kvm_write_c0_guest_prid(cop0, 0x00019300);
-	kvm_write_c0_guest_config(cop0,
-				  MIPS_CONFIG0 | (0x1 << CP0C0_AR) |
+	/* Have config1, Cacheable, noncoherent, write-back, write allocate */
+	kvm_write_c0_guest_config(cop0, MIPS_CONF_M | (0x3 << CP0C0_K0) |
+				  (0x1 << CP0C0_AR) |
 				  (MMU_TYPE_R4000 << CP0C0_MT));
 
 	/* Read the cache characteristics from the host Config1 Register */
@@ -368,10 +484,18 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
 	      (1 << CP0C1_WR) | (1 << CP0C1_CA));
 	kvm_write_c0_guest_config1(cop0, config1);
 
-	kvm_write_c0_guest_config2(cop0, MIPS_CONFIG2);
-	/* MIPS_CONFIG2 | (read_c0_config2() & 0xfff) */
-	kvm_write_c0_guest_config3(cop0, MIPS_CONFIG3 | (0 << CP0C3_VInt) |
-					 (1 << CP0C3_ULRI));
+	/* Have config3, no tertiary/secondary caches implemented */
+	kvm_write_c0_guest_config2(cop0, MIPS_CONF_M);
+	/* MIPS_CONF_M | (read_c0_config2() & 0xfff) */
+
+	/* Have config4, UserLocal */
+	kvm_write_c0_guest_config3(cop0, MIPS_CONF_M | MIPS_CONF3_ULRI);
+
+	/* Have config5 */
+	kvm_write_c0_guest_config4(cop0, MIPS_CONF_M);
+
+	/* No config6 */
+	kvm_write_c0_guest_config5(cop0, 0);
 
 	/* Set Wait IE/IXMT Ignore in Config7, IAR, AR */
 	kvm_write_c0_guest_config7(cop0, (MIPS_CONF7_WII) | (1 << 10));
@@ -416,6 +540,7 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	int ret = 0;
+	unsigned int cur, change;
 
 	switch (reg->id) {
 	case KVM_REG_MIPS_CP0_COUNT:
@@ -444,6 +569,44 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
 			kvm_write_c0_guest_cause(cop0, v);
 		}
 		break;
+	case KVM_REG_MIPS_CP0_CONFIG:
+		/* read-only for now */
+		break;
+	case KVM_REG_MIPS_CP0_CONFIG1:
+		cur = kvm_read_c0_guest_config1(cop0);
+		change = (cur ^ v) & kvm_mips_config1_wrmask(vcpu);
+		if (change) {
+			v = cur ^ change;
+			kvm_write_c0_guest_config1(cop0, v);
+		}
+		break;
+	case KVM_REG_MIPS_CP0_CONFIG2:
+		/* read-only for now */
+		break;
+	case KVM_REG_MIPS_CP0_CONFIG3:
+		cur = kvm_read_c0_guest_config3(cop0);
+		change = (cur ^ v) & kvm_mips_config3_wrmask(vcpu);
+		if (change) {
+			v = cur ^ change;
+			kvm_write_c0_guest_config3(cop0, v);
+		}
+		break;
+	case KVM_REG_MIPS_CP0_CONFIG4:
+		cur = kvm_read_c0_guest_config4(cop0);
+		change = (cur ^ v) & kvm_mips_config4_wrmask(vcpu);
+		if (change) {
+			v = cur ^ change;
+			kvm_write_c0_guest_config4(cop0, v);
+		}
+		break;
+	case KVM_REG_MIPS_CP0_CONFIG5:
+		cur = kvm_read_c0_guest_config5(cop0);
+		change = (cur ^ v) & kvm_mips_config5_wrmask(vcpu);
+		if (change) {
+			v = cur ^ change;
+			kvm_write_c0_guest_config5(cop0, v);
+		}
+		break;
 	case KVM_REG_MIPS_COUNT_CTL:
 		ret = kvm_mips_set_count_ctl(vcpu, v);
 		break;
@@ -459,6 +622,18 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
 	return ret;
 }
 
+static int kvm_trap_emul_vcpu_get_regs(struct kvm_vcpu *vcpu)
+{
+	kvm_lose_fpu(vcpu);
+
+	return 0;
+}
+
+static int kvm_trap_emul_vcpu_set_regs(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
 static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
 	/* exit handlers */
 	.handle_cop_unusable = kvm_trap_emul_handle_cop_unusable,
@@ -470,6 +645,10 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
 	.handle_syscall = kvm_trap_emul_handle_syscall,
 	.handle_res_inst = kvm_trap_emul_handle_res_inst,
 	.handle_break = kvm_trap_emul_handle_break,
+	.handle_trap = kvm_trap_emul_handle_trap,
+	.handle_msa_fpe = kvm_trap_emul_handle_msa_fpe,
+	.handle_fpe = kvm_trap_emul_handle_fpe,
+	.handle_msa_disabled = kvm_trap_emul_handle_msa_disabled,
 
 	.vm_init = kvm_trap_emul_vm_init,
 	.vcpu_init = kvm_trap_emul_vcpu_init,
@@ -483,6 +662,8 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
 	.irq_clear = kvm_mips_irq_clear_cb,
 	.get_one_reg = kvm_trap_emul_get_one_reg,
 	.set_one_reg = kvm_trap_emul_set_one_reg,
+	.vcpu_get_regs = kvm_trap_emul_vcpu_get_regs,
+	.vcpu_set_regs = kvm_trap_emul_vcpu_set_regs,
 };
 
 int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks)
diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c
index 39ab3e786e59..0db099ecc016 100644
--- a/arch/mips/lantiq/prom.c
+++ b/arch/mips/lantiq/prom.c
@@ -41,7 +41,7 @@ int ltq_soc_type(void)
 	return soc_info.type;
 }
 
-void prom_free_prom_memory(void)
+void __init prom_free_prom_memory(void)
 {
 }
 
diff --git a/arch/mips/lantiq/xway/vmmc.c b/arch/mips/lantiq/xway/vmmc.c
index 696cd57f6f13..d001bc38908a 100644
--- a/arch/mips/lantiq/xway/vmmc.c
+++ b/arch/mips/lantiq/xway/vmmc.c
@@ -61,7 +61,6 @@ static struct platform_driver vmmc_driver = {
 	.probe = vmmc_probe,
 	.driver = {
 		.name = "lantiq,vmmc",
-		.owner = THIS_MODULE,
 		.of_match_table = vmmc_match,
 	},
 };
diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c
index 3b7f65cc4218..a57959e648a6 100644
--- a/arch/mips/lasat/sysctl.c
+++ b/arch/mips/lasat/sysctl.c
@@ -53,21 +53,6 @@ int proc_dolasatstring(struct ctl_table *table, int write,
 	return 0;
 }
 
-/* proc function to write EEPROM after changing int entry */
-int proc_dolasatint(struct ctl_table *table, int write,
-		       void *buffer, size_t *lenp, loff_t *ppos)
-{
-	int r;
-
-	r = proc_dointvec(table, write, buffer, lenp, ppos);
-	if ((!write) || r)
-		return r;
-
-	lasat_write_eeprom_info();
-
-	return 0;
-}
-
 #ifdef CONFIG_DS1603
 static int rtctmp;
 
@@ -75,11 +60,11 @@ static int rtctmp;
 int proc_dolasatrtc(struct ctl_table *table, int write,
 		       void *buffer, size_t *lenp, loff_t *ppos)
 {
-	struct timespec ts;
+	struct timespec64 ts;
 	int r;
 
 	if (!write) {
-		read_persistent_clock(&ts);
+		read_persistent_clock64(&ts);
 		rtctmp = ts.tv_sec;
 		/* check for time < 0 and set to 0 */
 		if (rtctmp < 0)
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index 4c721e247ac9..ed88647b57e2 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -76,10 +76,10 @@
 	LOAD	_t1, (offset + UNIT(1))(src);			\
 	LOAD	_t2, (offset + UNIT(2))(src);			\
 	LOAD	_t3, (offset + UNIT(3))(src);			\
+	ADDC(_t0, _t1);						\
+	ADDC(_t2, _t3);						\
 	ADDC(sum, _t0);						\
-	ADDC(sum, _t1);						\
-	ADDC(sum, _t2);						\
-	ADDC(sum, _t3)
+	ADDC(sum, _t2)
 
 #ifdef USE_DOUBLE
 #define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3)	\
@@ -504,21 +504,21 @@ LEAF(csum_partial)
 	SUB	len, len, 8*NBYTES
 	ADD	src, src, 8*NBYTES
 	STORE(t0, UNIT(0)(dst),	.Ls_exc\@)
-	ADDC(sum, t0)
+	ADDC(t0, t1)
 	STORE(t1, UNIT(1)(dst),	.Ls_exc\@)
-	ADDC(sum, t1)
+	ADDC(sum, t0)
 	STORE(t2, UNIT(2)(dst),	.Ls_exc\@)
-	ADDC(sum, t2)
+	ADDC(t2, t3)
 	STORE(t3, UNIT(3)(dst),	.Ls_exc\@)
-	ADDC(sum, t3)
+	ADDC(sum, t2)
 	STORE(t4, UNIT(4)(dst),	.Ls_exc\@)
-	ADDC(sum, t4)
+	ADDC(t4, t5)
 	STORE(t5, UNIT(5)(dst),	.Ls_exc\@)
-	ADDC(sum, t5)
+	ADDC(sum, t4)
 	STORE(t6, UNIT(6)(dst),	.Ls_exc\@)
-	ADDC(sum, t6)
+	ADDC(t6, t7)
 	STORE(t7, UNIT(7)(dst),	.Ls_exc\@)
-	ADDC(sum, t7)
+	ADDC(sum, t6)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, 8*NBYTES
 	bgez	len, 1b
@@ -544,13 +544,13 @@ LEAF(csum_partial)
 	SUB	len, len, 4*NBYTES
 	ADD	src, src, 4*NBYTES
 	STORE(t0, UNIT(0)(dst),	.Ls_exc\@)
-	ADDC(sum, t0)
+	ADDC(t0, t1)
 	STORE(t1, UNIT(1)(dst),	.Ls_exc\@)
-	ADDC(sum, t1)
+	ADDC(sum, t0)
 	STORE(t2, UNIT(2)(dst),	.Ls_exc\@)
-	ADDC(sum, t2)
+	ADDC(t2, t3)
 	STORE(t3, UNIT(3)(dst),	.Ls_exc\@)
-	ADDC(sum, t3)
+	ADDC(sum, t2)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, 4*NBYTES
 	beqz	len, .Ldone\@
@@ -649,13 +649,13 @@ LEAF(csum_partial)
 	nop				# improves slotting
 #endif
 	STORE(t0, UNIT(0)(dst),	.Ls_exc\@)
-	ADDC(sum, t0)
+	ADDC(t0, t1)
 	STORE(t1, UNIT(1)(dst),	.Ls_exc\@)
-	ADDC(sum, t1)
+	ADDC(sum, t0)
 	STORE(t2, UNIT(2)(dst),	.Ls_exc\@)
-	ADDC(sum, t2)
+	ADDC(t2, t3)
 	STORE(t3, UNIT(3)(dst),	.Ls_exc\@)
-	ADDC(sum, t3)
+	ADDC(sum, t2)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, 4*NBYTES
 	bne	len, rem, 1b
diff --git a/arch/mips/loongson/common/Makefile b/arch/mips/loongson/common/Makefile
index d87e03330b29..e70c33fdb881 100644
--- a/arch/mips/loongson/common/Makefile
+++ b/arch/mips/loongson/common/Makefile
@@ -4,7 +4,6 @@
 
 obj-y += setup.o init.o cmdline.o env.o time.o reset.o irq.o \
     bonito-irq.o mem.o machtype.o platform.o
-obj-$(CONFIG_GPIOLIB) += gpio.o
 obj-$(CONFIG_PCI) += pci.o
 
 #
diff --git a/arch/mips/loongson/common/env.c b/arch/mips/loongson/common/env.c
index 045ea3d47c87..22f04ca2ff3e 100644
--- a/arch/mips/loongson/common/env.c
+++ b/arch/mips/loongson/common/env.c
@@ -29,6 +29,7 @@ struct efi_memory_map_loongson *loongson_memmap;
 struct loongson_system_configuration loongson_sysconf;
 
 u64 loongson_chipcfg[MAX_PACKAGES] = {0xffffffffbfc00180};
+u64 loongson_chiptemp[MAX_PACKAGES];
 u64 loongson_freqctrl[MAX_PACKAGES];
 
 unsigned long long smp_group[4];
@@ -97,6 +98,10 @@ void __init prom_init_env(void)
 		loongson_chipcfg[1] = 0x900010001fe00180;
 		loongson_chipcfg[2] = 0x900020001fe00180;
 		loongson_chipcfg[3] = 0x900030001fe00180;
+		loongson_chiptemp[0] = 0x900000001fe0019c;
+		loongson_chiptemp[1] = 0x900010001fe0019c;
+		loongson_chiptemp[2] = 0x900020001fe0019c;
+		loongson_chiptemp[3] = 0x900030001fe0019c;
 		loongson_sysconf.ht_control_base = 0x90000EFDFB000000;
 		loongson_sysconf.workarounds = WORKAROUND_CPUFREQ;
 	} else if (ecpu->cputype == Loongson_3B) {
@@ -110,6 +115,10 @@ void __init prom_init_env(void)
 		loongson_chipcfg[1] = 0x900020001fe00180;
 		loongson_chipcfg[2] = 0x900040001fe00180;
 		loongson_chipcfg[3] = 0x900060001fe00180;
+		loongson_chiptemp[0] = 0x900000001fe0019c;
+		loongson_chiptemp[1] = 0x900020001fe0019c;
+		loongson_chiptemp[2] = 0x900040001fe0019c;
+		loongson_chiptemp[3] = 0x900060001fe0019c;
 		loongson_freqctrl[0] = 0x900000001fe001d0;
 		loongson_freqctrl[1] = 0x900020001fe001d0;
 		loongson_freqctrl[2] = 0x900040001fe001d0;
diff --git a/arch/mips/loongson/common/gpio.c b/arch/mips/loongson/common/gpio.c
deleted file mode 100644
index 29dbaa253061..000000000000
--- a/arch/mips/loongson/common/gpio.c
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- *  STLS2F GPIO Support
- *
- *  Copyright (c) 2008 Richard Liu,  STMicroelectronics	 <richard.liu@st.com>
- *  Copyright (c) 2008-2010 Arnaud Patard <apatard@mandriva.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/err.h>
-#include <asm/types.h>
-#include <loongson.h>
-#include <linux/gpio.h>
-
-#define STLS2F_N_GPIO		4
-#define STLS2F_GPIO_IN_OFFSET	16
-
-static DEFINE_SPINLOCK(gpio_lock);
-
-int gpio_get_value(unsigned gpio)
-{
-	u32 val;
-	u32 mask;
-
-	if (gpio >= STLS2F_N_GPIO)
-		return __gpio_get_value(gpio);
-
-	mask = 1 << (gpio + STLS2F_GPIO_IN_OFFSET);
-	spin_lock(&gpio_lock);
-	val = LOONGSON_GPIODATA;
-	spin_unlock(&gpio_lock);
-
-	return (val & mask) != 0;
-}
-EXPORT_SYMBOL(gpio_get_value);
-
-void gpio_set_value(unsigned gpio, int state)
-{
-	u32 val;
-	u32 mask;
-
-	if (gpio >= STLS2F_N_GPIO) {
-		__gpio_set_value(gpio, state);
-		return ;
-	}
-
-	mask = 1 << gpio;
-
-	spin_lock(&gpio_lock);
-	val = LOONGSON_GPIODATA;
-	if (state)
-		val |= mask;
-	else
-		val &= (~mask);
-	LOONGSON_GPIODATA = val;
-	spin_unlock(&gpio_lock);
-}
-EXPORT_SYMBOL(gpio_set_value);
-
-int gpio_cansleep(unsigned gpio)
-{
-	if (gpio < STLS2F_N_GPIO)
-		return 0;
-	else
-		return __gpio_cansleep(gpio);
-}
-EXPORT_SYMBOL(gpio_cansleep);
-
-static int ls2f_gpio_direction_input(struct gpio_chip *chip, unsigned gpio)
-{
-	u32 temp;
-	u32 mask;
-
-	if (gpio >= STLS2F_N_GPIO)
-		return -EINVAL;
-
-	spin_lock(&gpio_lock);
-	mask = 1 << gpio;
-	temp = LOONGSON_GPIOIE;
-	temp |= mask;
-	LOONGSON_GPIOIE = temp;
-	spin_unlock(&gpio_lock);
-
-	return 0;
-}
-
-static int ls2f_gpio_direction_output(struct gpio_chip *chip,
-		unsigned gpio, int level)
-{
-	u32 temp;
-	u32 mask;
-
-	if (gpio >= STLS2F_N_GPIO)
-		return -EINVAL;
-
-	gpio_set_value(gpio, level);
-	spin_lock(&gpio_lock);
-	mask = 1 << gpio;
-	temp = LOONGSON_GPIOIE;
-	temp &= (~mask);
-	LOONGSON_GPIOIE = temp;
-	spin_unlock(&gpio_lock);
-
-	return 0;
-}
-
-static int ls2f_gpio_get_value(struct gpio_chip *chip, unsigned gpio)
-{
-	return gpio_get_value(gpio);
-}
-
-static void ls2f_gpio_set_value(struct gpio_chip *chip,
-		unsigned gpio, int value)
-{
-	gpio_set_value(gpio, value);
-}
-
-static struct gpio_chip ls2f_chip = {
-	.label			= "ls2f",
-	.direction_input	= ls2f_gpio_direction_input,
-	.get			= ls2f_gpio_get_value,
-	.direction_output	= ls2f_gpio_direction_output,
-	.set			= ls2f_gpio_set_value,
-	.base			= 0,
-	.ngpio			= STLS2F_N_GPIO,
-};
-
-static int __init ls2f_gpio_setup(void)
-{
-	return gpiochip_add(&ls2f_chip);
-}
-arch_initcall(ls2f_gpio_setup);
diff --git a/arch/mips/loongson/common/pci.c b/arch/mips/loongson/common/pci.c
index 003ab4e618b3..4e2575643781 100644
--- a/arch/mips/loongson/common/pci.c
+++ b/arch/mips/loongson/common/pci.c
@@ -78,6 +78,8 @@ static void __init setup_pcimap(void)
 #endif
 }
 
+extern int sbx00_acpi_init(void);
+
 static int __init pcibios_init(void)
 {
 	setup_pcimap();
@@ -89,6 +91,10 @@ static int __init pcibios_init(void)
 #endif
 	register_pci_controller(&loongson_pci_controller);
 
+#ifdef CONFIG_CPU_LOONGSON3
+	sbx00_acpi_init();
+#endif
+
 	return 0;
 }
 
diff --git a/arch/mips/loongson/loongson-3/cop2-ex.c b/arch/mips/loongson/loongson-3/cop2-ex.c
index b03e37d2071a..ea13764d0a03 100644
--- a/arch/mips/loongson/loongson-3/cop2-ex.c
+++ b/arch/mips/loongson/loongson-3/cop2-ex.c
@@ -43,7 +43,7 @@ static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action,
 		if (!fpu_owned) {
 			set_thread_flag(TIF_USEDFPU);
 			if (!used_math()) {
-				_init_fpu();
+				_init_fpu(current->thread.fpu.fcr31);
 				set_used_math();
 			} else
 				_restore_fp(current);
diff --git a/arch/mips/loongson/loongson-3/hpet.c b/arch/mips/loongson/loongson-3/hpet.c
index e898d68668a9..5c21cd3bd339 100644
--- a/arch/mips/loongson/loongson-3/hpet.c
+++ b/arch/mips/loongson/loongson-3/hpet.c
@@ -162,7 +162,7 @@ static irqreturn_t hpet_irq_handler(int irq, void *data)
 
 static struct irqaction hpet_irq = {
 	.handler = hpet_irq_handler,
-	.flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER,
+	.flags = IRQF_NOBALANCING | IRQF_TIMER,
 	.name = "hpet",
 };
 
diff --git a/arch/mips/loongson/loongson-3/irq.c b/arch/mips/loongson/loongson-3/irq.c
index 21221edda7a9..0f75b6b3d218 100644
--- a/arch/mips/loongson/loongson-3/irq.c
+++ b/arch/mips/loongson/loongson-3/irq.c
@@ -44,6 +44,7 @@ void mach_irq_dispatch(unsigned int pending)
 
 static struct irqaction cascade_irqaction = {
 	.handler = no_action,
+	.flags = IRQF_NO_SUSPEND,
 	.name = "cascade",
 };
 
diff --git a/arch/mips/loongson/loongson-3/numa.c b/arch/mips/loongson/loongson-3/numa.c
index 6cae0e75de27..12d14ed48778 100644
--- a/arch/mips/loongson/loongson-3/numa.c
+++ b/arch/mips/loongson/loongson-3/numa.c
@@ -233,7 +233,7 @@ static __init void prom_meminit(void)
 		if (node_online(node)) {
 			szmem(node);
 			node_mem_init(node);
-			cpus_clear(__node_data[(node)]->cpumask);
+			cpumask_clear(&__node_data[(node)]->cpumask);
 		}
 	}
 	for (cpu = 0; cpu < loongson_sysconf.nr_cpus; cpu++) {
@@ -244,7 +244,7 @@ static __init void prom_meminit(void)
 		if (loongson_sysconf.reserved_cpus_mask & (1<<cpu))
 			continue;
 
-		cpu_set(active_cpu, __node_data[(node)]->cpumask);
+		cpumask_set_cpu(active_cpu, &__node_data[(node)]->cpumask);
 		pr_info("NUMA: set cpumask cpu %d on node %d\n", active_cpu, node);
 
 		active_cpu++;
diff --git a/arch/mips/loongson/loongson-3/smp.c b/arch/mips/loongson/loongson-3/smp.c
index e2eb688b5434..e3c68b5da18d 100644
--- a/arch/mips/loongson/loongson-3/smp.c
+++ b/arch/mips/loongson/loongson-3/smp.c
@@ -408,7 +408,7 @@ static int loongson3_cpu_disable(void)
 		return -EBUSY;
 
 	set_cpu_online(cpu, false);
-	cpu_clear(cpu, cpu_callin_map);
+	cpumask_clear_cpu(cpu, &cpu_callin_map);
 	local_irq_save(flags);
 	fixup_irqs();
 	local_irq_restore(flags);
diff --git a/arch/mips/math-emu/Makefile b/arch/mips/math-emu/Makefile
index 619cfc1a2442..2e5f96275c38 100644
--- a/arch/mips/math-emu/Makefile
+++ b/arch/mips/math-emu/Makefile
@@ -2,12 +2,15 @@
 # Makefile for the Linux/MIPS kernel FPU emulation.
 #
 
-obj-y	+= cp1emu.o ieee754dp.o ieee754sp.o ieee754.o dp_div.o dp_mul.o \
-	   dp_sub.o dp_add.o dp_fsp.o dp_cmp.o dp_simple.o dp_tint.o \
-	   dp_fint.o dp_tlong.o dp_flong.o sp_div.o sp_mul.o sp_sub.o \
-	   sp_add.o sp_fdp.o sp_cmp.o sp_simple.o sp_tint.o sp_fint.o \
-	   sp_tlong.o sp_flong.o dsemul.o
+obj-y	+= cp1emu.o ieee754dp.o ieee754sp.o ieee754.o \
+	   dp_div.o dp_mul.o dp_sub.o dp_add.o dp_fsp.o dp_cmp.o dp_simple.o \
+	   dp_tint.o dp_fint.o \
+	   sp_div.o sp_mul.o sp_sub.o sp_add.o sp_fdp.o sp_cmp.o sp_simple.o \
+	   sp_tint.o sp_fint.o \
+	   dsemul.o
 
-lib-y	+= ieee754d.o dp_sqrt.o sp_sqrt.o
+lib-y	+= ieee754d.o \
+	   dp_tlong.o dp_flong.o dp_sqrt.o \
+	   sp_tlong.o sp_flong.o sp_sqrt.o
 
 obj-$(CONFIG_DEBUG_FS) += me-debugfs.o
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index b30bf65c7d7d..d31c537ace1d 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -45,6 +45,7 @@
 #include <asm/signal.h>
 #include <asm/uaccess.h>
 
+#include <asm/cpu-info.h>
 #include <asm/processor.h>
 #include <asm/fpu_emulator.h>
 #include <asm/fpu.h>
@@ -63,14 +64,14 @@ static int fpux_emu(struct pt_regs *,
 /* Control registers */
 
 #define FPCREG_RID	0	/* $0  = revision id */
+#define FPCREG_FCCR	25	/* $25 = fccr */
+#define FPCREG_FEXR	26	/* $26 = fexr */
+#define FPCREG_FENR	28	/* $28 = fenr */
 #define FPCREG_CSR	31	/* $31 = csr */
 
-/* Determine rounding mode from the RM bits of the FCSR */
-#define modeindex(v) ((v) & FPU_CSR_RM)
-
 /* convert condition code register number to csr bit */
 const unsigned int fpucondbit[8] = {
-	FPU_CSR_COND0,
+	FPU_CSR_COND,
 	FPU_CSR_COND1,
 	FPU_CSR_COND2,
 	FPU_CSR_COND3,
@@ -843,6 +844,127 @@ do {									\
 #define DPTOREG(dp, x)	DITOREG((dp).bits, x)
 
 /*
+ * Emulate a CFC1 instruction.
+ */
+static inline void cop1_cfc(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
+			    mips_instruction ir)
+{
+	u32 fcr31 = ctx->fcr31;
+	u32 value = 0;
+
+	switch (MIPSInst_RD(ir)) {
+	case FPCREG_CSR:
+		value = fcr31;
+		pr_debug("%p gpr[%d]<-csr=%08x\n",
+			 (void *)xcp->cp0_epc, MIPSInst_RT(ir), value);
+		break;
+
+	case FPCREG_FENR:
+		if (!cpu_has_mips_r)
+			break;
+		value = (fcr31 >> (FPU_CSR_FS_S - MIPS_FENR_FS_S)) &
+			MIPS_FENR_FS;
+		value |= fcr31 & (FPU_CSR_ALL_E | FPU_CSR_RM);
+		pr_debug("%p gpr[%d]<-enr=%08x\n",
+			 (void *)xcp->cp0_epc, MIPSInst_RT(ir), value);
+		break;
+
+	case FPCREG_FEXR:
+		if (!cpu_has_mips_r)
+			break;
+		value = fcr31 & (FPU_CSR_ALL_X | FPU_CSR_ALL_S);
+		pr_debug("%p gpr[%d]<-exr=%08x\n",
+			 (void *)xcp->cp0_epc, MIPSInst_RT(ir), value);
+		break;
+
+	case FPCREG_FCCR:
+		if (!cpu_has_mips_r)
+			break;
+		value = (fcr31 >> (FPU_CSR_COND_S - MIPS_FCCR_COND0_S)) &
+			MIPS_FCCR_COND0;
+		value |= (fcr31 >> (FPU_CSR_COND1_S - MIPS_FCCR_COND1_S)) &
+			 (MIPS_FCCR_CONDX & ~MIPS_FCCR_COND0);
+		pr_debug("%p gpr[%d]<-ccr=%08x\n",
+			 (void *)xcp->cp0_epc, MIPSInst_RT(ir), value);
+		break;
+
+	case FPCREG_RID:
+		value = current_cpu_data.fpu_id;
+		break;
+
+	default:
+		break;
+	}
+
+	if (MIPSInst_RT(ir))
+		xcp->regs[MIPSInst_RT(ir)] = value;
+}
+
+/*
+ * Emulate a CTC1 instruction.
+ */
+static inline void cop1_ctc(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
+			    mips_instruction ir)
+{
+	u32 fcr31 = ctx->fcr31;
+	u32 value;
+	u32 mask;
+
+	if (MIPSInst_RT(ir) == 0)
+		value = 0;
+	else
+		value = xcp->regs[MIPSInst_RT(ir)];
+
+	switch (MIPSInst_RD(ir)) {
+	case FPCREG_CSR:
+		pr_debug("%p gpr[%d]->csr=%08x\n",
+			 (void *)xcp->cp0_epc, MIPSInst_RT(ir), value);
+
+		/* Preserve read-only bits.  */
+		mask = current_cpu_data.fpu_msk31;
+		fcr31 = (value & ~mask) | (fcr31 & mask);
+		break;
+
+	case FPCREG_FENR:
+		if (!cpu_has_mips_r)
+			break;
+		pr_debug("%p gpr[%d]->enr=%08x\n",
+			 (void *)xcp->cp0_epc, MIPSInst_RT(ir), value);
+		fcr31 &= ~(FPU_CSR_FS | FPU_CSR_ALL_E | FPU_CSR_RM);
+		fcr31 |= (value << (FPU_CSR_FS_S - MIPS_FENR_FS_S)) &
+			 FPU_CSR_FS;
+		fcr31 |= value & (FPU_CSR_ALL_E | FPU_CSR_RM);
+		break;
+
+	case FPCREG_FEXR:
+		if (!cpu_has_mips_r)
+			break;
+		pr_debug("%p gpr[%d]->exr=%08x\n",
+			 (void *)xcp->cp0_epc, MIPSInst_RT(ir), value);
+		fcr31 &= ~(FPU_CSR_ALL_X | FPU_CSR_ALL_S);
+		fcr31 |= value & (FPU_CSR_ALL_X | FPU_CSR_ALL_S);
+		break;
+
+	case FPCREG_FCCR:
+		if (!cpu_has_mips_r)
+			break;
+		pr_debug("%p gpr[%d]->ccr=%08x\n",
+			 (void *)xcp->cp0_epc, MIPSInst_RT(ir), value);
+		fcr31 &= ~(FPU_CSR_CONDX | FPU_CSR_COND);
+		fcr31 |= (value << (FPU_CSR_COND_S - MIPS_FCCR_COND0_S)) &
+			 FPU_CSR_COND;
+		fcr31 |= (value << (FPU_CSR_COND1_S - MIPS_FCCR_COND1_S)) &
+			 FPU_CSR_CONDX;
+		break;
+
+	default:
+		break;
+	}
+
+	ctx->fcr31 = fcr31;
+}
+
+/*
  * Emulate the single floating point instruction pointed at by EPC.
  * Two instructions if the instruction is in a branch delay slot.
  */
@@ -856,7 +978,6 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 	int likely, pc_inc;
 	u32 __user *wva;
 	u64 __user *dva;
-	u32 value;
 	u32 wval;
 	u64 dval;
 	int sig;
@@ -1049,42 +1170,12 @@ emul:
 
 		case cfc_op:
 			/* cop control register rd -> gpr[rt] */
-			if (MIPSInst_RD(ir) == FPCREG_CSR) {
-				value = ctx->fcr31;
-				value = (value & ~FPU_CSR_RM) | modeindex(value);
-				pr_debug("%p gpr[%d]<-csr=%08x\n",
-					 (void *) (xcp->cp0_epc),
-					 MIPSInst_RT(ir), value);
-			}
-			else if (MIPSInst_RD(ir) == FPCREG_RID)
-				value = 0;
-			else
-				value = 0;
-			if (MIPSInst_RT(ir))
-				xcp->regs[MIPSInst_RT(ir)] = value;
+			cop1_cfc(xcp, ctx, ir);
 			break;
 
 		case ctc_op:
 			/* copregister rd <- rt */
-			if (MIPSInst_RT(ir) == 0)
-				value = 0;
-			else
-				value = xcp->regs[MIPSInst_RT(ir)];
-
-			/* we only have one writable control reg
-			 */
-			if (MIPSInst_RD(ir) == FPCREG_CSR) {
-				pr_debug("%p gpr[%d]->csr=%08x\n",
-					 (void *) (xcp->cp0_epc),
-					 MIPSInst_RT(ir), value);
-
-				/*
-				 * Don't write reserved bits,
-				 * and convert to ieee library modes
-				 */
-				ctx->fcr31 = (value & ~(FPU_CSR_RSVD | FPU_CSR_RM)) |
-					     modeindex(value);
-			}
+			cop1_ctc(xcp, ctx, ir);
 			if ((ctx->fcr31 >> 5) & ctx->fcr31 & FPU_CSR_ALL_E) {
 				return SIGFPE;
 			}
@@ -1103,17 +1194,18 @@ emul:
 			likely = 0;
 			switch (MIPSInst_RT(ir) & 3) {
 			case bcfl_op:
-				likely = 1;
+				if (cpu_has_mips_2_3_4_5_r)
+					likely = 1;
+				/* Fall through */
 			case bcf_op:
 				cond = !cond;
 				break;
 			case bctl_op:
-				likely = 1;
+				if (cpu_has_mips_2_3_4_5_r)
+					likely = 1;
+				/* Fall through */
 			case bct_op:
 				break;
-			default:
-				/* thats an illegal instruction */
-				return SIGILL;
 			}
 
 			set_delay_slot(xcp);
@@ -1121,6 +1213,14 @@ emul:
 				/*
 				 * Branch taken: emulate dslot instruction
 				 */
+				unsigned long bcpc;
+
+				/*
+				 * Remember EPC at the branch to point back
+				 * at so that any delay-slot instruction
+				 * signal is not silently ignored.
+				 */
+				bcpc = xcp->cp0_epc;
 				xcp->cp0_epc += dec_insn.pc_inc;
 
 				contpc = MIPSInst_SIMM(ir);
@@ -1146,63 +1246,77 @@ emul:
 						 * Single step the non-CP1
 						 * instruction in the dslot.
 						 */
-						return mips_dsemul(xcp, ir, contpc);
+						sig = mips_dsemul(xcp, ir,
+								  contpc);
+						if (sig)
+							xcp->cp0_epc = bcpc;
+						/*
+						 * SIGILL forces out of
+						 * the emulation loop.
+						 */
+						return sig ? sig : SIGILL;
 					}
 				} else
 					contpc = (xcp->cp0_epc + (contpc << 2));
 
 				switch (MIPSInst_OPCODE(ir)) {
 				case lwc1_op:
-					goto emul;
-
 				case swc1_op:
 					goto emul;
 
 				case ldc1_op:
 				case sdc1_op:
-					if (cpu_has_mips_2_3_4_5 ||
-					    cpu_has_mips64)
+					if (cpu_has_mips_2_3_4_5_r)
 						goto emul;
 
-					return SIGILL;
-					goto emul;
+					goto bc_sigill;
 
 				case cop1_op:
 					goto emul;
 
 				case cop1x_op:
-					if (cpu_has_mips_4_5 || cpu_has_mips64 || cpu_has_mips32r2)
+					if (cpu_has_mips_4_5_64_r2_r6)
 						/* its one of ours */
 						goto emul;
 
-					return SIGILL;
+					goto bc_sigill;
 
 				case spec_op:
-					if (!cpu_has_mips_4_5_r)
-						return SIGILL;
+					switch (MIPSInst_FUNC(ir)) {
+					case movc_op:
+						if (cpu_has_mips_4_5_r)
+							goto emul;
 
-					if (MIPSInst_FUNC(ir) == movc_op)
-						goto emul;
+						goto bc_sigill;
+					}
 					break;
+
+				bc_sigill:
+					xcp->cp0_epc = bcpc;
+					return SIGILL;
 				}
 
 				/*
 				 * Single step the non-cp1
 				 * instruction in the dslot
 				 */
-				return mips_dsemul(xcp, ir, contpc);
+				sig = mips_dsemul(xcp, ir, contpc);
+				if (sig)
+					xcp->cp0_epc = bcpc;
+				/* SIGILL forces out of the emulation loop.  */
+				return sig ? sig : SIGILL;
 			} else if (likely) {	/* branch not taken */
-					/*
-					 * branch likely nullifies
-					 * dslot if not taken
-					 */
-					xcp->cp0_epc += dec_insn.pc_inc;
-					contpc += dec_insn.pc_inc;
-					/*
-					 * else continue & execute
-					 * dslot as normal insn
-					 */
-				}
+				/*
+				 * branch likely nullifies
+				 * dslot if not taken
+				 */
+				xcp->cp0_epc += dec_insn.pc_inc;
+				contpc += dec_insn.pc_inc;
+				/*
+				 * else continue & execute
+				 * dslot as normal insn
+				 */
+			}
 			break;
 
 		default:
@@ -1216,7 +1330,7 @@ emul:
 		break;
 
 	case cop1x_op:
-		if (!cpu_has_mips_4_5 && !cpu_has_mips64 && !cpu_has_mips32r2)
+		if (!cpu_has_mips_4_5_64_r2_r6)
 			return SIGILL;
 
 		sig = fpux_emu(xcp, ctx, ir, fault_addr);
@@ -1549,7 +1663,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 
 			/* unary  ops */
 		case fsqrt_op:
-			if (!cpu_has_mips_4_5_r)
+			if (!cpu_has_mips_2_3_4_5_r)
 				return SIGILL;
 
 			handler.u = ieee754sp_sqrt;
@@ -1561,14 +1675,14 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 		 * achieve full IEEE-754 accuracy - however this emulator does.
 		 */
 		case frsqrt_op:
-			if (!cpu_has_mips_4_5_r2_r6)
+			if (!cpu_has_mips_4_5_64_r2_r6)
 				return SIGILL;
 
 			handler.u = fpemu_sp_rsqrt;
 			goto scopuop;
 
 		case frecip_op:
-			if (!cpu_has_mips_4_5_r2_r6)
+			if (!cpu_has_mips_4_5_64_r2_r6)
 				return SIGILL;
 
 			handler.u = fpemu_sp_recip;
@@ -1670,19 +1784,19 @@ copcsr:
 		case ftrunc_op:
 		case fceil_op:
 		case ffloor_op:
-			if (!cpu_has_mips_2_3_4_5 && !cpu_has_mips64)
+			if (!cpu_has_mips_2_3_4_5_r)
 				return SIGILL;
 
 			oldrm = ieee754_csr.rm;
 			SPFROMREG(fs, MIPSInst_FS(ir));
-			ieee754_csr.rm = modeindex(MIPSInst_FUNC(ir));
+			ieee754_csr.rm = MIPSInst_FUNC(ir);
 			rv.w = ieee754sp_tint(fs);
 			ieee754_csr.rm = oldrm;
 			rfmt = w_fmt;
 			goto copcsr;
 
 		case fcvtl_op:
-			if (!cpu_has_mips_3_4_5 && !cpu_has_mips64)
+			if (!cpu_has_mips_3_4_5_64_r2_r6)
 				return SIGILL;
 
 			SPFROMREG(fs, MIPSInst_FS(ir));
@@ -1694,12 +1808,12 @@ copcsr:
 		case ftruncl_op:
 		case fceill_op:
 		case ffloorl_op:
-			if (!cpu_has_mips_3_4_5 && !cpu_has_mips64)
+			if (!cpu_has_mips_3_4_5_64_r2_r6)
 				return SIGILL;
 
 			oldrm = ieee754_csr.rm;
 			SPFROMREG(fs, MIPSInst_FS(ir));
-			ieee754_csr.rm = modeindex(MIPSInst_FUNC(ir));
+			ieee754_csr.rm = MIPSInst_FUNC(ir);
 			rv.l = ieee754sp_tlong(fs);
 			ieee754_csr.rm = oldrm;
 			rfmt = l_fmt;
@@ -1763,13 +1877,13 @@ copcsr:
 		 * achieve full IEEE-754 accuracy - however this emulator does.
 		 */
 		case frsqrt_op:
-			if (!cpu_has_mips_4_5_r2_r6)
+			if (!cpu_has_mips_4_5_64_r2_r6)
 				return SIGILL;
 
 			handler.u = fpemu_dp_rsqrt;
 			goto dcopuop;
 		case frecip_op:
-			if (!cpu_has_mips_4_5_r2_r6)
+			if (!cpu_has_mips_4_5_64_r2_r6)
 				return SIGILL;
 
 			handler.u = fpemu_dp_recip;
@@ -1852,14 +1966,14 @@ dcopuop:
 
 			oldrm = ieee754_csr.rm;
 			DPFROMREG(fs, MIPSInst_FS(ir));
-			ieee754_csr.rm = modeindex(MIPSInst_FUNC(ir));
+			ieee754_csr.rm = MIPSInst_FUNC(ir);
 			rv.w = ieee754dp_tint(fs);
 			ieee754_csr.rm = oldrm;
 			rfmt = w_fmt;
 			goto copcsr;
 
 		case fcvtl_op:
-			if (!cpu_has_mips_3_4_5 && !cpu_has_mips64)
+			if (!cpu_has_mips_3_4_5_64_r2_r6)
 				return SIGILL;
 
 			DPFROMREG(fs, MIPSInst_FS(ir));
@@ -1871,12 +1985,12 @@ dcopuop:
 		case ftruncl_op:
 		case fceill_op:
 		case ffloorl_op:
-			if (!cpu_has_mips_3_4_5 && !cpu_has_mips64)
+			if (!cpu_has_mips_3_4_5_64_r2_r6)
 				return SIGILL;
 
 			oldrm = ieee754_csr.rm;
 			DPFROMREG(fs, MIPSInst_FS(ir));
-			ieee754_csr.rm = modeindex(MIPSInst_FUNC(ir));
+			ieee754_csr.rm = MIPSInst_FUNC(ir);
 			rv.l = ieee754dp_tlong(fs);
 			ieee754_csr.rm = oldrm;
 			rfmt = l_fmt;
@@ -1930,7 +2044,7 @@ dcopuop:
 
 	case l_fmt:
 
-		if (!cpu_has_mips_3_4_5 && !cpu_has_mips64)
+		if (!cpu_has_mips_3_4_5_64_r2_r6)
 			return SIGILL;
 
 		DIFROMREG(bits, MIPSInst_FS(ir));
@@ -1994,7 +2108,7 @@ dcopuop:
 		SITOREG(rv.w, MIPSInst_FD(ir));
 		break;
 	case l_fmt:
-		if (!cpu_has_mips_3_4_5 && !cpu_has_mips64)
+		if (!cpu_has_mips_3_4_5_64_r2_r6)
 			return SIGILL;
 
 		DITOREG(rv.l, MIPSInst_FD(ir));
@@ -2081,10 +2195,8 @@ int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 			xcp->cp0_epc += dec_insn.pc_inc;	/* Skip NOPs */
 		else {
 			/*
-			 * The 'ieee754_csr' is an alias of
-			 * ctx->fcr31.	No need to copy ctx->fcr31 to
-			 * ieee754_csr.	 But ieee754_csr.rm is ieee
-			 * library modes. (not mips rounding mode)
+			 * The 'ieee754_csr' is an alias of ctx->fcr31.
+			 * No need to copy ctx->fcr31 to ieee754_csr.
 			 */
 			sig = cop1Emulate(xcp, ctx, dec_insn, fault_addr);
 		}
diff --git a/arch/mips/math-emu/dp_add.c b/arch/mips/math-emu/dp_add.c
index 7f64577df984..8954ef031f84 100644
--- a/arch/mips/math-emu/dp_add.c
+++ b/arch/mips/math-emu/dp_add.c
@@ -37,19 +37,20 @@ union ieee754dp ieee754dp_add(union ieee754dp x, union ieee754dp y)
 	FLUSHYDP;
 
 	switch (CLPAIR(xc, yc)) {
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
+		return ieee754dp_nanxcpt(y);
+
+	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
+	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
-		ieee754_setcx(IEEE754_INVALID_OPERATION);
-		return ieee754dp_nanxcpt(ieee754dp_indef());
+		return ieee754dp_nanxcpt(x);
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
@@ -150,8 +151,6 @@ union ieee754dp ieee754dp_add(union ieee754dp x, union ieee754dp y)
 		 * leaving result in xm, xs and xe.
 		 */
 		xm = xm + ym;
-		xe = xe;
-		xs = xs;
 
 		if (xm >> (DP_FBITS + 1 + 3)) { /* carry out */
 			xm = XDPSRS1(xm);
@@ -160,11 +159,8 @@ union ieee754dp ieee754dp_add(union ieee754dp x, union ieee754dp y)
 	} else {
 		if (xm >= ym) {
 			xm = xm - ym;
-			xe = xe;
-			xs = xs;
 		} else {
 			xm = ym - xm;
-			xe = xe;
 			xs = ys;
 		}
 		if (xm == 0)
diff --git a/arch/mips/math-emu/dp_cmp.c b/arch/mips/math-emu/dp_cmp.c
index 30f95f6e9ac4..a29880e29ae4 100644
--- a/arch/mips/math-emu/dp_cmp.c
+++ b/arch/mips/math-emu/dp_cmp.c
@@ -35,16 +35,11 @@ int ieee754dp_cmp(union ieee754dp x, union ieee754dp y, int cmp, int sig)
 	FLUSHYDP;
 	ieee754_clearcx();	/* Even clear inexact flag here */
 
-	if (ieee754dp_isnan(x) || ieee754dp_isnan(y)) {
-		if (sig || xc == IEEE754_CLASS_SNAN || yc == IEEE754_CLASS_SNAN)
+	if (ieee754_class_nan(xc) || ieee754_class_nan(yc)) {
+		if (sig ||
+		    xc == IEEE754_CLASS_SNAN || yc == IEEE754_CLASS_SNAN)
 			ieee754_setcx(IEEE754_INVALID_OPERATION);
-		if (cmp & IEEE754_CUN)
-			return 1;
-		if (cmp & (IEEE754_CLT | IEEE754_CGT)) {
-			if (sig && ieee754_setandtestcx(IEEE754_INVALID_OPERATION))
-				return 0;
-		}
-		return 0;
+		return (cmp & IEEE754_CUN) != 0;
 	} else {
 		vx = x.bits;
 		vy = y.bits;
diff --git a/arch/mips/math-emu/dp_div.c b/arch/mips/math-emu/dp_div.c
index bef0e55e5938..f4746f7c5f63 100644
--- a/arch/mips/math-emu/dp_div.c
+++ b/arch/mips/math-emu/dp_div.c
@@ -39,19 +39,20 @@ union ieee754dp ieee754dp_div(union ieee754dp x, union ieee754dp y)
 	FLUSHYDP;
 
 	switch (CLPAIR(xc, yc)) {
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
+		return ieee754dp_nanxcpt(y);
+
+	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
+	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
-		ieee754_setcx(IEEE754_INVALID_OPERATION);
-		return ieee754dp_nanxcpt(ieee754dp_indef());
+		return ieee754dp_nanxcpt(x);
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
diff --git a/arch/mips/math-emu/dp_fsp.c b/arch/mips/math-emu/dp_fsp.c
index ffb69c5830b0..57d09ca5403a 100644
--- a/arch/mips/math-emu/dp_fsp.c
+++ b/arch/mips/math-emu/dp_fsp.c
@@ -22,6 +22,12 @@
 #include "ieee754sp.h"
 #include "ieee754dp.h"
 
+static inline union ieee754dp ieee754dp_nan_fsp(int xs, u64 xm)
+{
+	return builddp(xs, DP_EMAX + 1 + DP_EBIAS,
+		       xm << (DP_FBITS - SP_FBITS));
+}
+
 union ieee754dp ieee754dp_fsp(union ieee754sp x)
 {
 	COMPXSP;
@@ -34,15 +40,11 @@ union ieee754dp ieee754dp_fsp(union ieee754sp x)
 
 	switch (xc) {
 	case IEEE754_CLASS_SNAN:
-		ieee754_setcx(IEEE754_INVALID_OPERATION);
-		return ieee754dp_nanxcpt(ieee754dp_indef());
+		return ieee754dp_nanxcpt(ieee754dp_nan_fsp(xs, xm));
 
 	case IEEE754_CLASS_QNAN:
-		return ieee754dp_nanxcpt(builddp(xs,
-						 DP_EMAX + 1 + DP_EBIAS,
-						 ((u64) xm
-						  << (DP_FBITS -
-						      SP_FBITS))));
+		return ieee754dp_nan_fsp(xs, xm);
+
 	case IEEE754_CLASS_INF:
 		return ieee754dp_inf(xs);
 
diff --git a/arch/mips/math-emu/dp_mul.c b/arch/mips/math-emu/dp_mul.c
index d3acdedb5b9d..d0901f03fa19 100644
--- a/arch/mips/math-emu/dp_mul.c
+++ b/arch/mips/math-emu/dp_mul.c
@@ -47,19 +47,20 @@ union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y)
 	FLUSHYDP;
 
 	switch (CLPAIR(xc, yc)) {
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
+		return ieee754dp_nanxcpt(y);
+
+	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
+	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
-		ieee754_setcx(IEEE754_INVALID_OPERATION);
-		return ieee754dp_nanxcpt(ieee754dp_indef());
+		return ieee754dp_nanxcpt(x);
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
diff --git a/arch/mips/math-emu/dp_simple.c b/arch/mips/math-emu/dp_simple.c
index bccbe90efceb..926d56bf37f2 100644
--- a/arch/mips/math-emu/dp_simple.c
+++ b/arch/mips/math-emu/dp_simple.c
@@ -23,44 +23,27 @@
 
 union ieee754dp ieee754dp_neg(union ieee754dp x)
 {
-	COMPXDP;
-
-	EXPLODEXDP;
-	ieee754_clearcx();
-	FLUSHXDP;
-
-	/*
-	 * Invert the sign ALWAYS to prevent an endless recursion on
-	 * pow() in libc.
-	 */
-	/* quick fix up */
-	DPSIGN(x) ^= 1;
-
-	if (xc == IEEE754_CLASS_SNAN) {
-		union ieee754dp y = ieee754dp_indef();
-		ieee754_setcx(IEEE754_INVALID_OPERATION);
-		DPSIGN(y) = DPSIGN(x);
-		return ieee754dp_nanxcpt(y);
-	}
-
-	return x;
+	unsigned int oldrm;
+	union ieee754dp y;
+
+	oldrm = ieee754_csr.rm;
+	ieee754_csr.rm = FPU_CSR_RD;
+	y = ieee754dp_sub(ieee754dp_zero(0), x);
+	ieee754_csr.rm = oldrm;
+	return y;
 }
 
 union ieee754dp ieee754dp_abs(union ieee754dp x)
 {
-	COMPXDP;
-
-	EXPLODEXDP;
-	ieee754_clearcx();
-	FLUSHXDP;
-
-	/* Clear sign ALWAYS, irrespective of NaN */
-	DPSIGN(x) = 0;
-
-	if (xc == IEEE754_CLASS_SNAN) {
-		ieee754_setcx(IEEE754_INVALID_OPERATION);
-		return ieee754dp_nanxcpt(ieee754dp_indef());
-	}
-
-	return x;
+	unsigned int oldrm;
+	union ieee754dp y;
+
+	oldrm = ieee754_csr.rm;
+	ieee754_csr.rm = FPU_CSR_RD;
+	if (DPSIGN(x))
+		y = ieee754dp_sub(ieee754dp_zero(0), x);
+	else
+		y = ieee754dp_add(ieee754dp_zero(0), x);
+	ieee754_csr.rm = oldrm;
+	return y;
 }
diff --git a/arch/mips/math-emu/dp_sqrt.c b/arch/mips/math-emu/dp_sqrt.c
index 041bbb6124bb..cd5bc083001e 100644
--- a/arch/mips/math-emu/dp_sqrt.c
+++ b/arch/mips/math-emu/dp_sqrt.c
@@ -42,13 +42,12 @@ union ieee754dp ieee754dp_sqrt(union ieee754dp x)
 
 	/* x == INF or NAN? */
 	switch (xc) {
-	case IEEE754_CLASS_QNAN:
-		/* sqrt(Nan) = Nan */
+	case IEEE754_CLASS_SNAN:
 		return ieee754dp_nanxcpt(x);
 
-	case IEEE754_CLASS_SNAN:
-		ieee754_setcx(IEEE754_INVALID_OPERATION);
-		return ieee754dp_nanxcpt(ieee754dp_indef());
+	case IEEE754_CLASS_QNAN:
+		/* sqrt(Nan) = Nan */
+		return x;
 
 	case IEEE754_CLASS_ZERO:
 		/* sqrt(0) = 0 */
@@ -58,7 +57,7 @@ union ieee754dp ieee754dp_sqrt(union ieee754dp x)
 		if (xs) {
 			/* sqrt(-Inf) = Nan */
 			ieee754_setcx(IEEE754_INVALID_OPERATION);
-			return ieee754dp_nanxcpt(ieee754dp_indef());
+			return ieee754dp_indef();
 		}
 		/* sqrt(+Inf) = Inf */
 		return x;
@@ -71,7 +70,7 @@ union ieee754dp ieee754dp_sqrt(union ieee754dp x)
 		if (xs) {
 			/* sqrt(-x) = Nan */
 			ieee754_setcx(IEEE754_INVALID_OPERATION);
-			return ieee754dp_nanxcpt(ieee754dp_indef());
+			return ieee754dp_indef();
 		}
 		break;
 	}
diff --git a/arch/mips/math-emu/dp_sub.c b/arch/mips/math-emu/dp_sub.c
index 7a174029043a..fc17a781b9ae 100644
--- a/arch/mips/math-emu/dp_sub.c
+++ b/arch/mips/math-emu/dp_sub.c
@@ -37,19 +37,20 @@ union ieee754dp ieee754dp_sub(union ieee754dp x, union ieee754dp y)
 	FLUSHYDP;
 
 	switch (CLPAIR(xc, yc)) {
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
+		return ieee754dp_nanxcpt(y);
+
+	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
+	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
-		ieee754_setcx(IEEE754_INVALID_OPERATION);
-		return ieee754dp_nanxcpt(ieee754dp_indef());
+		return ieee754dp_nanxcpt(x);
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
@@ -153,8 +154,6 @@ union ieee754dp ieee754dp_sub(union ieee754dp x, union ieee754dp y)
 		/* generate 28 bit result of adding two 27 bit numbers
 		 */
 		xm = xm + ym;
-		xe = xe;
-		xs = xs;
 
 		if (xm >> (DP_FBITS + 1 + 3)) { /* carry out */
 			xm = XDPSRS1(xm);	/* shift preserving sticky */
@@ -163,11 +162,8 @@ union ieee754dp ieee754dp_sub(union ieee754dp x, union ieee754dp y)
 	} else {
 		if (xm >= ym) {
 			xm = xm - ym;
-			xe = xe;
-			xs = xs;
 		} else {
 			xm = ym - xm;
-			xe = xe;
 			xs = ys;
 		}
 		if (xm == 0) {
diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c
index 4f514f3724cb..e0b5cc27d78b 100644
--- a/arch/mips/math-emu/dsemul.c
+++ b/arch/mips/math-emu/dsemul.c
@@ -94,9 +94,9 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
 	regs->cp0_epc = ((unsigned long) &fr->emul) |
 		get_isa16_mode(regs->cp0_epc);
 
-	flush_cache_sigtramp((unsigned long)&fr->badinst);
+	flush_cache_sigtramp((unsigned long)&fr->emul);
 
-	return SIGILL;		/* force out of emulation loop */
+	return 0;
 }
 
 int do_dsemulret(struct pt_regs *xcp)
@@ -158,6 +158,6 @@ int do_dsemulret(struct pt_regs *xcp)
 
 	/* Set EPC to return to post-branch instruction */
 	xcp->cp0_epc = epc;
-
+	MIPS_FPU_EMU_INC_STATS(ds_emul);
 	return 1;
 }
diff --git a/arch/mips/math-emu/ieee754.h b/arch/mips/math-emu/ieee754.h
index 43c4fb522ac2..a5ca108ce467 100644
--- a/arch/mips/math-emu/ieee754.h
+++ b/arch/mips/math-emu/ieee754.h
@@ -126,84 +126,21 @@ enum {
 #define IEEE754_CGT	0x04
 #define IEEE754_CUN	0x08
 
-/* "normal" comparisons
-*/
-static inline int ieee754sp_eq(union ieee754sp x, union ieee754sp y)
-{
-	return ieee754sp_cmp(x, y, IEEE754_CEQ, 0);
-}
-
-static inline int ieee754sp_ne(union ieee754sp x, union ieee754sp y)
-{
-	return ieee754sp_cmp(x, y,
-			     IEEE754_CLT | IEEE754_CGT | IEEE754_CUN, 0);
-}
-
-static inline int ieee754sp_lt(union ieee754sp x, union ieee754sp y)
-{
-	return ieee754sp_cmp(x, y, IEEE754_CLT, 0);
-}
-
-static inline int ieee754sp_le(union ieee754sp x, union ieee754sp y)
-{
-	return ieee754sp_cmp(x, y, IEEE754_CLT | IEEE754_CEQ, 0);
-}
-
-static inline int ieee754sp_gt(union ieee754sp x, union ieee754sp y)
-{
-	return ieee754sp_cmp(x, y, IEEE754_CGT, 0);
-}
-
-
-static inline int ieee754sp_ge(union ieee754sp x, union ieee754sp y)
-{
-	return ieee754sp_cmp(x, y, IEEE754_CGT | IEEE754_CEQ, 0);
-}
-
-static inline int ieee754dp_eq(union ieee754dp x, union ieee754dp y)
-{
-	return ieee754dp_cmp(x, y, IEEE754_CEQ, 0);
-}
-
-static inline int ieee754dp_ne(union ieee754dp x, union ieee754dp y)
-{
-	return ieee754dp_cmp(x, y,
-			     IEEE754_CLT | IEEE754_CGT | IEEE754_CUN, 0);
-}
-
-static inline int ieee754dp_lt(union ieee754dp x, union ieee754dp y)
-{
-	return ieee754dp_cmp(x, y, IEEE754_CLT, 0);
-}
-
-static inline int ieee754dp_le(union ieee754dp x, union ieee754dp y)
-{
-	return ieee754dp_cmp(x, y, IEEE754_CLT | IEEE754_CEQ, 0);
-}
-
-static inline int ieee754dp_gt(union ieee754dp x, union ieee754dp y)
-{
-	return ieee754dp_cmp(x, y, IEEE754_CGT, 0);
-}
-
-static inline int ieee754dp_ge(union ieee754dp x, union ieee754dp y)
-{
-	return ieee754dp_cmp(x, y, IEEE754_CGT | IEEE754_CEQ, 0);
-}
-
 /*
  * The control status register
  */
 struct _ieee754_csr {
-	__BITFIELD_FIELD(unsigned pad0:7,
-	__BITFIELD_FIELD(unsigned nod:1,	/* set 1 for no denormalised numbers */
-	__BITFIELD_FIELD(unsigned c:1,		/* condition */
-	__BITFIELD_FIELD(unsigned pad1:5,
+	__BITFIELD_FIELD(unsigned fcc:7,	/* condition[7:1] */
+	__BITFIELD_FIELD(unsigned nod:1,	/* set 1 for no denormals */
+	__BITFIELD_FIELD(unsigned c:1,		/* condition[0] */
+	__BITFIELD_FIELD(unsigned pad0:3,
+	__BITFIELD_FIELD(unsigned abs2008:1,	/* IEEE 754-2008 ABS/NEG.fmt */
+	__BITFIELD_FIELD(unsigned nan2008:1,	/* IEEE 754-2008 NaN mode */
 	__BITFIELD_FIELD(unsigned cx:6,		/* exceptions this operation */
 	__BITFIELD_FIELD(unsigned mx:5,		/* exception enable  mask */
 	__BITFIELD_FIELD(unsigned sx:5,		/* exceptions total */
 	__BITFIELD_FIELD(unsigned rm:2,		/* current rounding mode */
-	;))))))))
+	;))))))))))
 };
 #define ieee754_csr (*(struct _ieee754_csr *)(&current->thread.fpu.fcr31))
 
@@ -257,23 +194,23 @@ static inline int ieee754_sxtest(unsigned n)
 union ieee754sp ieee754sp_dump(char *s, union ieee754sp x);
 union ieee754dp ieee754dp_dump(char *s, union ieee754dp x);
 
-#define IEEE754_SPCVAL_PZERO	0
-#define IEEE754_SPCVAL_NZERO	1
-#define IEEE754_SPCVAL_PONE	2
-#define IEEE754_SPCVAL_NONE	3
-#define IEEE754_SPCVAL_PTEN	4
-#define IEEE754_SPCVAL_NTEN	5
-#define IEEE754_SPCVAL_PINFINITY	6
-#define IEEE754_SPCVAL_NINFINITY	7
-#define IEEE754_SPCVAL_INDEF	8
-#define IEEE754_SPCVAL_PMAX	9	/* +max norm */
-#define IEEE754_SPCVAL_NMAX	10	/* -max norm */
-#define IEEE754_SPCVAL_PMIN	11	/* +min norm */
-#define IEEE754_SPCVAL_NMIN	12	/* +min norm */
-#define IEEE754_SPCVAL_PMIND	13	/* +min denorm */
-#define IEEE754_SPCVAL_NMIND	14	/* +min denorm */
-#define IEEE754_SPCVAL_P1E31	15	/* + 1.0e31 */
-#define IEEE754_SPCVAL_P1E63	16	/* + 1.0e63 */
+#define IEEE754_SPCVAL_PZERO		0	/* +0.0 */
+#define IEEE754_SPCVAL_NZERO		1	/* -0.0 */
+#define IEEE754_SPCVAL_PONE		2	/* +1.0 */
+#define IEEE754_SPCVAL_NONE		3	/* -1.0 */
+#define IEEE754_SPCVAL_PTEN		4	/* +10.0 */
+#define IEEE754_SPCVAL_NTEN		5	/* -10.0 */
+#define IEEE754_SPCVAL_PINFINITY	6	/* +inf */
+#define IEEE754_SPCVAL_NINFINITY	7	/* -inf */
+#define IEEE754_SPCVAL_INDEF		8	/* quiet NaN */
+#define IEEE754_SPCVAL_PMAX		9	/* +max norm */
+#define IEEE754_SPCVAL_NMAX		10	/* -max norm */
+#define IEEE754_SPCVAL_PMIN		11	/* +min norm */
+#define IEEE754_SPCVAL_NMIN		12	/* -min norm */
+#define IEEE754_SPCVAL_PMIND		13	/* +min denorm */
+#define IEEE754_SPCVAL_NMIND		14	/* -min denorm */
+#define IEEE754_SPCVAL_P1E31		15	/* + 1.0e31 */
+#define IEEE754_SPCVAL_P1E63		16	/* + 1.0e63 */
 
 extern const union ieee754dp __ieee754dp_spcvals[];
 extern const union ieee754sp __ieee754sp_spcvals[];
diff --git a/arch/mips/math-emu/ieee754dp.c b/arch/mips/math-emu/ieee754dp.c
index 068f45a415fc..522d843f2ffd 100644
--- a/arch/mips/math-emu/ieee754dp.c
+++ b/arch/mips/math-emu/ieee754dp.c
@@ -30,9 +30,9 @@ int ieee754dp_class(union ieee754dp x)
 	return xc;
 }
 
-int ieee754dp_isnan(union ieee754dp x)
+static inline int ieee754dp_isnan(union ieee754dp x)
 {
-	return ieee754dp_class(x) >= IEEE754_CLASS_SNAN;
+	return ieee754_class_nan(ieee754dp_class(x));
 }
 
 static inline int ieee754dp_issnan(union ieee754dp x)
@@ -42,23 +42,16 @@ static inline int ieee754dp_issnan(union ieee754dp x)
 }
 
 
+/*
+ * Raise the Invalid Operation IEEE 754 exception
+ * and convert the signaling NaN supplied to a quiet NaN.
+ */
 union ieee754dp __cold ieee754dp_nanxcpt(union ieee754dp r)
 {
-	assert(ieee754dp_isnan(r));
-
-	if (!ieee754dp_issnan(r))	/* QNAN does not cause invalid op !! */
-		return r;
-
-	if (!ieee754_setandtestcx(IEEE754_INVALID_OPERATION)) {
-		/* not enabled convert to a quiet NaN */
-		DPMANT(r) &= (~DP_MBIT(DP_FBITS-1));
-		if (ieee754dp_isnan(r))
-			return r;
-		else
-			return ieee754dp_indef();
-	}
+	assert(ieee754dp_issnan(r));
 
-	return r;
+	ieee754_setcx(IEEE754_INVALID_OPERATION);
+	return ieee754dp_indef();
 }
 
 static u64 ieee754dp_get_rounding(int sn, u64 xm)
diff --git a/arch/mips/math-emu/ieee754dp.h b/arch/mips/math-emu/ieee754dp.h
index 61fd6fd31350..e2babd98fee3 100644
--- a/arch/mips/math-emu/ieee754dp.h
+++ b/arch/mips/math-emu/ieee754dp.h
@@ -77,6 +77,5 @@ static inline union ieee754dp builddp(int s, int bx, u64 m)
 	return r;
 }
 
-extern int ieee754dp_isnan(union ieee754dp);
 extern union ieee754dp __cold ieee754dp_nanxcpt(union ieee754dp);
 extern union ieee754dp ieee754dp_format(int, int, u64);
diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h
index f0365bb86747..05389d5e3a93 100644
--- a/arch/mips/math-emu/ieee754int.h
+++ b/arch/mips/math-emu/ieee754int.h
@@ -44,6 +44,11 @@ static inline int ieee754_setandtestcx(const unsigned int x)
 	return ieee754_csr.mx & x;
 }
 
+static inline int ieee754_class_nan(int xc)
+{
+	return xc >= IEEE754_CLASS_SNAN;
+}
+
 #define COMPXSP \
 	unsigned xm; int xe; int xs __maybe_unused; int xc
 
diff --git a/arch/mips/math-emu/ieee754sp.c b/arch/mips/math-emu/ieee754sp.c
index ba88301579c2..ca8e35e33bf7 100644
--- a/arch/mips/math-emu/ieee754sp.c
+++ b/arch/mips/math-emu/ieee754sp.c
@@ -30,9 +30,9 @@ int ieee754sp_class(union ieee754sp x)
 	return xc;
 }
 
-int ieee754sp_isnan(union ieee754sp x)
+static inline int ieee754sp_isnan(union ieee754sp x)
 {
-	return ieee754sp_class(x) >= IEEE754_CLASS_SNAN;
+	return ieee754_class_nan(ieee754sp_class(x));
 }
 
 static inline int ieee754sp_issnan(union ieee754sp x)
@@ -42,23 +42,16 @@ static inline int ieee754sp_issnan(union ieee754sp x)
 }
 
 
+/*
+ * Raise the Invalid Operation IEEE 754 exception
+ * and convert the signaling NaN supplied to a quiet NaN.
+ */
 union ieee754sp __cold ieee754sp_nanxcpt(union ieee754sp r)
 {
-	assert(ieee754sp_isnan(r));
-
-	if (!ieee754sp_issnan(r))	/* QNAN does not cause invalid op !! */
-		return r;
-
-	if (!ieee754_setandtestcx(IEEE754_INVALID_OPERATION)) {
-		/* not enabled convert to a quiet NaN */
-		SPMANT(r) &= (~SP_MBIT(SP_FBITS-1));
-		if (ieee754sp_isnan(r))
-			return r;
-		else
-			return ieee754sp_indef();
-	}
+	assert(ieee754sp_issnan(r));
 
-	return r;
+	ieee754_setcx(IEEE754_INVALID_OPERATION);
+	return ieee754sp_indef();
 }
 
 static unsigned ieee754sp_get_rounding(int sn, unsigned xm)
diff --git a/arch/mips/math-emu/ieee754sp.h b/arch/mips/math-emu/ieee754sp.h
index ad268e332318..374a3f00a589 100644
--- a/arch/mips/math-emu/ieee754sp.h
+++ b/arch/mips/math-emu/ieee754sp.h
@@ -82,6 +82,5 @@ static inline union ieee754sp buildsp(int s, int bx, unsigned m)
 	return r;
 }
 
-extern int ieee754sp_isnan(union ieee754sp);
 extern union ieee754sp __cold ieee754sp_nanxcpt(union ieee754sp);
 extern union ieee754sp ieee754sp_format(int, int, unsigned);
diff --git a/arch/mips/math-emu/me-debugfs.c b/arch/mips/math-emu/me-debugfs.c
index becdd63e14a9..f308e0f05fc5 100644
--- a/arch/mips/math-emu/me-debugfs.c
+++ b/arch/mips/math-emu/me-debugfs.c
@@ -61,6 +61,7 @@ do {									\
 	FPU_STAT_CREATE(ieee754_overflow);
 	FPU_STAT_CREATE(ieee754_zerodiv);
 	FPU_STAT_CREATE(ieee754_invalidop);
+	FPU_STAT_CREATE(ds_emul);
 
 	return 0;
 }
diff --git a/arch/mips/math-emu/sp_add.c b/arch/mips/math-emu/sp_add.c
index 2d84d460cb67..f1c87b07d3b4 100644
--- a/arch/mips/math-emu/sp_add.c
+++ b/arch/mips/math-emu/sp_add.c
@@ -37,19 +37,20 @@ union ieee754sp ieee754sp_add(union ieee754sp x, union ieee754sp y)
 	FLUSHYSP;
 
 	switch (CLPAIR(xc, yc)) {
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
+		return ieee754sp_nanxcpt(y);
+
+	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
+	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
-		ieee754_setcx(IEEE754_INVALID_OPERATION);
-		return ieee754sp_nanxcpt(ieee754sp_indef());
+		return ieee754sp_nanxcpt(x);
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
@@ -148,8 +149,6 @@ union ieee754sp ieee754sp_add(union ieee754sp x, union ieee754sp y)
 		 * leaving result in xm, xs and xe.
 		 */
 		xm = xm + ym;
-		xe = xe;
-		xs = xs;
 
 		if (xm >> (SP_FBITS + 1 + 3)) { /* carry out */
 			SPXSRSX1();
@@ -157,11 +156,8 @@ union ieee754sp ieee754sp_add(union ieee754sp x, union ieee754sp y)
 	} else {
 		if (xm >= ym) {
 			xm = xm - ym;
-			xe = xe;
-			xs = xs;
 		} else {
 			xm = ym - xm;
-			xe = xe;
 			xs = ys;
 		}
 		if (xm == 0)
diff --git a/arch/mips/math-emu/sp_cmp.c b/arch/mips/math-emu/sp_cmp.c
index addbccb2f556..67b82f1e2c4a 100644
--- a/arch/mips/math-emu/sp_cmp.c
+++ b/arch/mips/math-emu/sp_cmp.c
@@ -35,16 +35,11 @@ int ieee754sp_cmp(union ieee754sp x, union ieee754sp y, int cmp, int sig)
 	FLUSHYSP;
 	ieee754_clearcx();	/* Even clear inexact flag here */
 
-	if (ieee754sp_isnan(x) || ieee754sp_isnan(y)) {
-		if (sig || xc == IEEE754_CLASS_SNAN || yc == IEEE754_CLASS_SNAN)
+	if (ieee754_class_nan(xc) || ieee754_class_nan(yc)) {
+		if (sig ||
+		    xc == IEEE754_CLASS_SNAN || yc == IEEE754_CLASS_SNAN)
 			ieee754_setcx(IEEE754_INVALID_OPERATION);
-		if (cmp & IEEE754_CUN)
-			return 1;
-		if (cmp & (IEEE754_CLT | IEEE754_CGT)) {
-			if (sig && ieee754_setandtestcx(IEEE754_INVALID_OPERATION))
-				return 0;
-		}
-		return 0;
+		return (cmp & IEEE754_CUN) != 0;
 	} else {
 		vx = x.bits;
 		vy = y.bits;
diff --git a/arch/mips/math-emu/sp_div.c b/arch/mips/math-emu/sp_div.c
index 721f317aa877..27f6db3a0a4c 100644
--- a/arch/mips/math-emu/sp_div.c
+++ b/arch/mips/math-emu/sp_div.c
@@ -39,19 +39,20 @@ union ieee754sp ieee754sp_div(union ieee754sp x, union ieee754sp y)
 	FLUSHYSP;
 
 	switch (CLPAIR(xc, yc)) {
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
+		return ieee754sp_nanxcpt(y);
+
+	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
+	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
-		ieee754_setcx(IEEE754_INVALID_OPERATION);
-		return ieee754sp_nanxcpt(ieee754sp_indef());
+		return ieee754sp_nanxcpt(x);
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
diff --git a/arch/mips/math-emu/sp_fdp.c b/arch/mips/math-emu/sp_fdp.c
index 1b266fb16973..3797148893ad 100644
--- a/arch/mips/math-emu/sp_fdp.c
+++ b/arch/mips/math-emu/sp_fdp.c
@@ -22,12 +22,19 @@
 #include "ieee754sp.h"
 #include "ieee754dp.h"
 
+static inline union ieee754sp ieee754sp_nan_fdp(int xs, u64 xm)
+{
+	return buildsp(xs, SP_EMAX + 1 + SP_EBIAS,
+		       xm >> (DP_FBITS - SP_FBITS));
+}
+
 union ieee754sp ieee754sp_fdp(union ieee754dp x)
 {
+	union ieee754sp y;
 	u32 rm;
 
 	COMPXDP;
-	union ieee754sp nan;
+	COMPYSP;
 
 	EXPLODEXDP;
 
@@ -37,15 +44,14 @@ union ieee754sp ieee754sp_fdp(union ieee754dp x)
 
 	switch (xc) {
 	case IEEE754_CLASS_SNAN:
-		ieee754_setcx(IEEE754_INVALID_OPERATION);
-		return ieee754sp_nanxcpt(ieee754sp_indef());
+		return ieee754sp_nanxcpt(ieee754sp_nan_fdp(xs, xm));
 
 	case IEEE754_CLASS_QNAN:
-		nan = buildsp(xs, SP_EMAX + 1 + SP_EBIAS, (u32)
-				(xm >> (DP_FBITS - SP_FBITS)));
-		if (!ieee754sp_isnan(nan))
-			nan = ieee754sp_indef();
-		return ieee754sp_nanxcpt(nan);
+		y = ieee754sp_nan_fdp(xs, xm);
+		EXPLODEYSP;
+		if (!ieee754_class_nan(yc))
+			y = ieee754sp_indef();
+		return y;
 
 	case IEEE754_CLASS_INF:
 		return ieee754sp_inf(xs);
diff --git a/arch/mips/math-emu/sp_mul.c b/arch/mips/math-emu/sp_mul.c
index 890c13a2965e..d910c43a6f30 100644
--- a/arch/mips/math-emu/sp_mul.c
+++ b/arch/mips/math-emu/sp_mul.c
@@ -47,19 +47,20 @@ union ieee754sp ieee754sp_mul(union ieee754sp x, union ieee754sp y)
 	FLUSHYSP;
 
 	switch (CLPAIR(xc, yc)) {
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
+		return ieee754sp_nanxcpt(y);
+
+	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
+	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
-		ieee754_setcx(IEEE754_INVALID_OPERATION);
-		return ieee754sp_nanxcpt(ieee754sp_indef());
+		return ieee754sp_nanxcpt(x);
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
diff --git a/arch/mips/math-emu/sp_simple.c b/arch/mips/math-emu/sp_simple.c
index f1ffaa9a17e0..c50e9451f2d2 100644
--- a/arch/mips/math-emu/sp_simple.c
+++ b/arch/mips/math-emu/sp_simple.c
@@ -23,44 +23,27 @@
 
 union ieee754sp ieee754sp_neg(union ieee754sp x)
 {
-	COMPXSP;
-
-	EXPLODEXSP;
-	ieee754_clearcx();
-	FLUSHXSP;
-
-	/*
-	 * Invert the sign ALWAYS to prevent an endless recursion on
-	 * pow() in libc.
-	 */
-	/* quick fix up */
-	SPSIGN(x) ^= 1;
-
-	if (xc == IEEE754_CLASS_SNAN) {
-		union ieee754sp y = ieee754sp_indef();
-		ieee754_setcx(IEEE754_INVALID_OPERATION);
-		SPSIGN(y) = SPSIGN(x);
-		return ieee754sp_nanxcpt(y);
-	}
-
-	return x;
+	unsigned int oldrm;
+	union ieee754sp y;
+
+	oldrm = ieee754_csr.rm;
+	ieee754_csr.rm = FPU_CSR_RD;
+	y = ieee754sp_sub(ieee754sp_zero(0), x);
+	ieee754_csr.rm = oldrm;
+	return y;
 }
 
 union ieee754sp ieee754sp_abs(union ieee754sp x)
 {
-	COMPXSP;
-
-	EXPLODEXSP;
-	ieee754_clearcx();
-	FLUSHXSP;
-
-	/* Clear sign ALWAYS, irrespective of NaN */
-	SPSIGN(x) = 0;
-
-	if (xc == IEEE754_CLASS_SNAN) {
-		ieee754_setcx(IEEE754_INVALID_OPERATION);
-		return ieee754sp_nanxcpt(ieee754sp_indef());
-	}
-
-	return x;
+	unsigned int oldrm;
+	union ieee754sp y;
+
+	oldrm = ieee754_csr.rm;
+	ieee754_csr.rm = FPU_CSR_RD;
+	if (SPSIGN(x))
+		y = ieee754sp_sub(ieee754sp_zero(0), x);
+	else
+		y = ieee754sp_add(ieee754sp_zero(0), x);
+	ieee754_csr.rm = oldrm;
+	return y;
 }
diff --git a/arch/mips/math-emu/sp_sqrt.c b/arch/mips/math-emu/sp_sqrt.c
index b7c098a86f95..67059c33a250 100644
--- a/arch/mips/math-emu/sp_sqrt.c
+++ b/arch/mips/math-emu/sp_sqrt.c
@@ -35,13 +35,12 @@ union ieee754sp ieee754sp_sqrt(union ieee754sp x)
 
 	/* x == INF or NAN? */
 	switch (xc) {
-	case IEEE754_CLASS_QNAN:
-		/* sqrt(Nan) = Nan */
+	case IEEE754_CLASS_SNAN:
 		return ieee754sp_nanxcpt(x);
 
-	case IEEE754_CLASS_SNAN:
-		ieee754_setcx(IEEE754_INVALID_OPERATION);
-		return ieee754sp_nanxcpt(ieee754sp_indef());
+	case IEEE754_CLASS_QNAN:
+		/* sqrt(Nan) = Nan */
+		return x;
 
 	case IEEE754_CLASS_ZERO:
 		/* sqrt(0) = 0 */
@@ -51,7 +50,7 @@ union ieee754sp ieee754sp_sqrt(union ieee754sp x)
 		if (xs) {
 			/* sqrt(-Inf) = Nan */
 			ieee754_setcx(IEEE754_INVALID_OPERATION);
-			return ieee754sp_nanxcpt(ieee754sp_indef());
+			return ieee754sp_indef();
 		}
 		/* sqrt(+Inf) = Inf */
 		return x;
@@ -61,7 +60,7 @@ union ieee754sp ieee754sp_sqrt(union ieee754sp x)
 		if (xs) {
 			/* sqrt(-x) = Nan */
 			ieee754_setcx(IEEE754_INVALID_OPERATION);
-			return ieee754sp_nanxcpt(ieee754sp_indef());
+			return ieee754sp_indef();
 		}
 		break;
 	}
diff --git a/arch/mips/math-emu/sp_sub.c b/arch/mips/math-emu/sp_sub.c
index 8592e49032b8..ec5f937a8b3e 100644
--- a/arch/mips/math-emu/sp_sub.c
+++ b/arch/mips/math-emu/sp_sub.c
@@ -37,19 +37,20 @@ union ieee754sp ieee754sp_sub(union ieee754sp x, union ieee754sp y)
 	FLUSHYSP;
 
 	switch (CLPAIR(xc, yc)) {
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
+		return ieee754sp_nanxcpt(y);
+
+	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
+	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
-		ieee754_setcx(IEEE754_INVALID_OPERATION);
-		return ieee754sp_nanxcpt(ieee754sp_indef());
+		return ieee754sp_nanxcpt(x);
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
@@ -148,8 +149,6 @@ union ieee754sp ieee754sp_sub(union ieee754sp x, union ieee754sp y)
 		/* generate 28 bit result of adding two 27 bit numbers
 		 */
 		xm = xm + ym;
-		xe = xe;
-		xs = xs;
 
 		if (xm >> (SP_FBITS + 1 + 3)) { /* carry out */
 			SPXSRSX1();	/* shift preserving sticky */
@@ -157,11 +156,8 @@ union ieee754sp ieee754sp_sub(union ieee754sp x, union ieee754sp y)
 	} else {
 		if (xm >= ym) {
 			xm = xm - ym;
-			xe = xe;
-			xs = xs;
 		} else {
 			xm = ym - xm;
-			xe = xe;
 			xs = ys;
 		}
 		if (xm == 0) {
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 3f8059602765..0dbb65a51ce5 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -430,6 +430,7 @@ static inline void local_r4k___flush_cache_all(void * args)
 	case CPU_R10000:
 	case CPU_R12000:
 	case CPU_R14000:
+	case CPU_R16000:
 		/*
 		 * These caches are inclusive caches, that is, if something
 		 * is not cached in the S-cache, we know it also won't be
@@ -506,7 +507,7 @@ static inline void local_r4k_flush_cache_mm(void * args)
 
 	/*
 	 * Kludge alert.  For obscure reasons R4000SC and R4400SC go nuts if we
-	 * only flush the primary caches but R10000 and R12000 behave sane ...
+	 * only flush the primary caches but R1x000 behave sane ...
 	 * R4000SC and R4400SC indexed S-cache ops also invalidate primary
 	 * caches, so we can bail out early.
 	 */
@@ -888,33 +889,39 @@ static inline void rm7k_erratum31(void)
 	}
 }
 
-static inline void alias_74k_erratum(struct cpuinfo_mips *c)
+static inline int alias_74k_erratum(struct cpuinfo_mips *c)
 {
 	unsigned int imp = c->processor_id & PRID_IMP_MASK;
 	unsigned int rev = c->processor_id & PRID_REV_MASK;
+	int present = 0;
 
 	/*
 	 * Early versions of the 74K do not update the cache tags on a
 	 * vtag miss/ptag hit which can occur in the case of KSEG0/KUSEG
-	 * aliases. In this case it is better to treat the cache as always
-	 * having aliases.
+	 * aliases.  In this case it is better to treat the cache as always
+	 * having aliases.  Also disable the synonym tag update feature
+	 * where available.  In this case no opportunistic tag update will
+	 * happen where a load causes a virtual address miss but a physical
+	 * address hit during a D-cache look-up.
 	 */
 	switch (imp) {
 	case PRID_IMP_74K:
 		if (rev <= PRID_REV_ENCODE_332(2, 4, 0))
-			c->dcache.flags |= MIPS_CACHE_VTAG;
+			present = 1;
 		if (rev == PRID_REV_ENCODE_332(2, 4, 0))
 			write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND);
 		break;
 	case PRID_IMP_1074K:
 		if (rev <= PRID_REV_ENCODE_332(1, 1, 0)) {
-			c->dcache.flags |= MIPS_CACHE_VTAG;
+			present = 1;
 			write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND);
 		}
 		break;
 	default:
 		BUG();
 	}
+
+	return present;
 }
 
 static void b5k_instruction_hazard(void)
@@ -938,6 +945,7 @@ static void probe_pcache(void)
 	struct cpuinfo_mips *c = &current_cpu_data;
 	unsigned int config = read_c0_config();
 	unsigned int prid = read_c0_prid();
+	int has_74k_erratum = 0;
 	unsigned long config1;
 	unsigned int lsize;
 
@@ -1012,6 +1020,7 @@ static void probe_pcache(void)
 	case CPU_R10000:
 	case CPU_R12000:
 	case CPU_R14000:
+	case CPU_R16000:
 		icache_size = 1 << (12 + ((config & R10K_CONF_IC) >> 29));
 		c->icache.linesz = 64;
 		c->icache.ways = 2;
@@ -1223,8 +1232,8 @@ static void probe_pcache(void)
 		dcache_size / (c->dcache.linesz * c->dcache.ways) : 0;
 
 	/*
-	 * R10000 and R12000 P-caches are odd in a positive way.  They're 32kB
-	 * 2-way virtually indexed so normally would suffer from aliases.  So
+	 * R1x000 P-caches are odd in a positive way.  They're 32kB 2-way
+	 * virtually indexed so normally would suffer from aliases.  So
 	 * normally they'd suffer from aliases but magic in the hardware deals
 	 * with that for us so we don't need to take care ourselves.
 	 */
@@ -1240,11 +1249,12 @@ static void probe_pcache(void)
 	case CPU_R10000:
 	case CPU_R12000:
 	case CPU_R14000:
+	case CPU_R16000:
 		break;
 
 	case CPU_74K:
 	case CPU_1074K:
-		alias_74k_erratum(c);
+		has_74k_erratum = alias_74k_erratum(c);
 		/* Fall through. */
 	case CPU_M14KC:
 	case CPU_M14KEC:
@@ -1259,7 +1269,7 @@ static void probe_pcache(void)
 		if (!(read_c0_config7() & MIPS_CONF7_IAR) &&
 		    (c->icache.waysize > PAGE_SIZE))
 			c->icache.flags |= MIPS_CACHE_ALIASES;
-		if (read_c0_config7() & MIPS_CONF7_AR) {
+		if (!has_74k_erratum && (read_c0_config7() & MIPS_CONF7_AR)) {
 			/*
 			 * Effectively physically indexed dcache,
 			 * thus no virtual aliases.
@@ -1268,7 +1278,7 @@ static void probe_pcache(void)
 			break;
 		}
 	default:
-		if (c->dcache.waysize > PAGE_SIZE)
+		if (has_74k_erratum || c->dcache.waysize > PAGE_SIZE)
 			c->dcache.flags |= MIPS_CACHE_ALIASES;
 	}
 
@@ -1438,6 +1448,7 @@ static void setup_scache(void)
 	case CPU_R10000:
 	case CPU_R12000:
 	case CPU_R14000:
+	case CPU_R16000:
 		scache_size = 0x80000 << ((config & R10K_CONF_SS) >> 16);
 		c->scache.linesz = 64 << ((config >> 13) & 1);
 		c->scache.ways = 2;
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index 7e3ea7766822..77d96db8253c 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -119,36 +119,37 @@ void __flush_anon_page(struct page *page, unsigned long vmaddr)
 
 EXPORT_SYMBOL(__flush_anon_page);
 
-static void mips_flush_dcache_from_pte(pte_t pteval, unsigned long address)
+void __flush_icache_page(struct vm_area_struct *vma, struct page *page)
+{
+	unsigned long addr;
+
+	if (PageHighMem(page))
+		return;
+
+	addr = (unsigned long) page_address(page);
+	flush_data_cache_page(addr);
+}
+EXPORT_SYMBOL_GPL(__flush_icache_page);
+
+void __update_cache(struct vm_area_struct *vma, unsigned long address,
+	pte_t pte)
 {
 	struct page *page;
-	unsigned long pfn = pte_pfn(pteval);
+	unsigned long pfn, addr;
+	int exec = (vma->vm_flags & VM_EXEC) && !cpu_has_ic_fills_f_dc;
 
+	pfn = pte_pfn(pte);
 	if (unlikely(!pfn_valid(pfn)))
 		return;
-
 	page = pfn_to_page(pfn);
 	if (page_mapping(page) && Page_dcache_dirty(page)) {
-		unsigned long page_addr = (unsigned long) page_address(page);
-
-		if (!cpu_has_ic_fills_f_dc ||
-		    pages_do_alias(page_addr, address & PAGE_MASK))
-			flush_data_cache_page(page_addr);
+		addr = (unsigned long) page_address(page);
+		if (exec || pages_do_alias(addr, address & PAGE_MASK))
+			flush_data_cache_page(addr);
 		ClearPageDcacheDirty(page);
 	}
 }
 
-void set_pte_at(struct mm_struct *mm, unsigned long addr,
-        pte_t *ptep, pte_t pteval)
-{
-        if (cpu_has_dc_aliases || !cpu_has_ic_fills_f_dc) {
-                if (pte_present(pteval))
-                        mips_flush_dcache_from_pte(pteval, addr);
-        }
-
-        set_pte(ptep, pteval);
-}
-
 unsigned long _page_cachable_default;
 EXPORT_SYMBOL(_page_cachable_default);
 
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index af5f046e627e..609d1241b0c4 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -258,7 +258,7 @@ static void mips_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
 	if (cpu_needs_post_dma_flush(dev))
 		__dma_sync(dma_addr_to_page(dev, dma_addr),
 			   dma_addr & ~PAGE_MASK, size, direction);
-
+	plat_post_dma_flush(dev);
 	plat_unmap_dma_mem(dev, dma_addr, size, direction);
 }
 
@@ -312,6 +312,7 @@ static void mips_dma_sync_single_for_cpu(struct device *dev,
 	if (cpu_needs_post_dma_flush(dev))
 		__dma_sync(dma_addr_to_page(dev, dma_handle),
 			   dma_handle & ~PAGE_MASK, size, direction);
+	plat_post_dma_flush(dev);
 }
 
 static void mips_dma_sync_single_for_device(struct device *dev,
@@ -331,6 +332,7 @@ static void mips_dma_sync_sg_for_cpu(struct device *dev,
 		for (i = 0; i < nelems; i++, sg++)
 			__dma_sync(sg_page(sg), sg->offset, sg->length,
 				   direction);
+	plat_post_dma_flush(dev);
 }
 
 static void mips_dma_sync_sg_for_device(struct device *dev,
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 448cde372af0..faa5c9822ecc 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -96,7 +96,7 @@ static void *__kmap_pgprot(struct page *page, unsigned long addr, pgprot_t prot)
 	vaddr = __fix_to_virt(FIX_CMAP_END - idx);
 	pte = mk_pte(page, prot);
 #if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
-	entrylo = pte.pte_high;
+	entrylo = pte_to_entrylo(pte.pte_high);
 #else
 	entrylo = pte_to_entrylo(pte_val(pte));
 #endif
@@ -106,6 +106,11 @@ static void *__kmap_pgprot(struct page *page, unsigned long addr, pgprot_t prot)
 	write_c0_entryhi(vaddr & (PAGE_MASK << 1));
 	write_c0_entrylo0(entrylo);
 	write_c0_entrylo1(entrylo);
+#ifdef CONFIG_XPA
+	entrylo = (pte.pte_low & _PFNX_MASK);
+	writex_c0_entrylo0(entrylo);
+	writex_c0_entrylo1(entrylo);
+#endif
 	tlbidx = read_c0_wired();
 	write_c0_wired(tlbidx + 1);
 	write_c0_index(tlbidx);
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index f1baadd56e82..5c81fdd032c3 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c
@@ -142,18 +142,26 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
 			addr0, len, pgoff, flags, DOWN);
 }
 
+unsigned long arch_mmap_rnd(void)
+{
+	unsigned long rnd;
+
+	rnd = (unsigned long)get_random_int();
+	rnd <<= PAGE_SHIFT;
+	if (TASK_IS_32BIT_ADDR)
+		rnd &= 0xfffffful;
+	else
+		rnd &= 0xffffffful;
+
+	return rnd;
+}
+
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
 	unsigned long random_factor = 0UL;
 
-	if (current->flags & PF_RANDOMIZE) {
-		random_factor = get_random_int();
-		random_factor = random_factor << PAGE_SHIFT;
-		if (TASK_IS_32BIT_ADDR)
-			random_factor &= 0xfffffful;
-		else
-			random_factor &= 0xffffffful;
-	}
+	if (current->flags & PF_RANDOMIZE)
+		random_factor = arch_mmap_rnd();
 
 	if (mmap_is_legacy()) {
 		mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
index 3f85f921801b..885d73ffd6fb 100644
--- a/arch/mips/mm/page.c
+++ b/arch/mips/mm/page.c
@@ -157,6 +157,7 @@ static void set_prefetch_parameters(void)
 		case CPU_R10000:
 		case CPU_R12000:
 		case CPU_R14000:
+		case CPU_R16000:
 			/*
 			 * Those values have been experimentally tuned for an
 			 * Origin 200.
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index b2afa49beab0..a27a088e6f9f 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -333,9 +333,17 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
 		ptep = pte_offset_map(pmdp, address);
 
 #if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
+#ifdef CONFIG_XPA
+		write_c0_entrylo0(pte_to_entrylo(ptep->pte_high));
+		writex_c0_entrylo0(ptep->pte_low & _PFNX_MASK);
+		ptep++;
+		write_c0_entrylo1(pte_to_entrylo(ptep->pte_high));
+		writex_c0_entrylo1(ptep->pte_low & _PFNX_MASK);
+#else
 		write_c0_entrylo0(ptep->pte_high);
 		ptep++;
 		write_c0_entrylo1(ptep->pte_high);
+#endif
 #else
 		write_c0_entrylo0(pte_to_entrylo(pte_val(*ptep++)));
 		write_c0_entrylo1(pte_to_entrylo(pte_val(*ptep)));
@@ -355,6 +363,9 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
 void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1,
 		     unsigned long entryhi, unsigned long pagemask)
 {
+#ifdef CONFIG_XPA
+	panic("Broken for XPA kernels");
+#else
 	unsigned long flags;
 	unsigned long wired;
 	unsigned long old_pagemask;
@@ -383,6 +394,7 @@ void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1,
 	write_c0_pagemask(old_pagemask);
 	local_flush_tlb_all();
 	local_irq_restore(flags);
+#endif
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -477,7 +489,8 @@ static void r4k_tlb_configure(void)
 	write_c0_wired(0);
 	if (current_cpu_type() == CPU_R10000 ||
 	    current_cpu_type() == CPU_R12000 ||
-	    current_cpu_type() == CPU_R14000)
+	    current_cpu_type() == CPU_R14000 ||
+	    current_cpu_type() == CPU_R16000)
 		write_c0_framemask(0);
 
 	if (cpu_has_rixi) {
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index d75ff73a2012..97c87027c17f 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -35,6 +35,17 @@
 #include <asm/uasm.h>
 #include <asm/setup.h>
 
+static int __cpuinitdata mips_xpa_disabled;
+
+static int __init xpa_disable(char *s)
+{
+	mips_xpa_disabled = 1;
+
+	return 1;
+}
+
+__setup("noxpa", xpa_disable);
+
 /*
  * TLB load/store/modify handlers.
  *
@@ -231,14 +242,14 @@ static void output_pgtable_bits_defines(void)
 	pr_define("_PAGE_HUGE_SHIFT %d\n", _PAGE_HUGE_SHIFT);
 	pr_define("_PAGE_SPLITTING_SHIFT %d\n", _PAGE_SPLITTING_SHIFT);
 #endif
+#ifdef CONFIG_CPU_MIPSR2
 	if (cpu_has_rixi) {
 #ifdef _PAGE_NO_EXEC_SHIFT
 		pr_define("_PAGE_NO_EXEC_SHIFT %d\n", _PAGE_NO_EXEC_SHIFT);
-#endif
-#ifdef _PAGE_NO_READ_SHIFT
 		pr_define("_PAGE_NO_READ_SHIFT %d\n", _PAGE_NO_READ_SHIFT);
 #endif
 	}
+#endif
 	pr_define("_PAGE_GLOBAL_SHIFT %d\n", _PAGE_GLOBAL_SHIFT);
 	pr_define("_PAGE_VALID_SHIFT %d\n", _PAGE_VALID_SHIFT);
 	pr_define("_PAGE_DIRTY_SHIFT %d\n", _PAGE_DIRTY_SHIFT);
@@ -501,26 +512,9 @@ static void build_tlb_write_entry(u32 **p, struct uasm_label **l,
 	case tlb_indexed: tlbw = uasm_i_tlbwi; break;
 	}
 
-	if (cpu_has_mips_r2_exec_hazard) {
-		/*
-		 * The architecture spec says an ehb is required here,
-		 * but a number of cores do not have the hazard and
-		 * using an ehb causes an expensive pipeline stall.
-		 */
-		switch (current_cpu_type()) {
-		case CPU_M14KC:
-		case CPU_74K:
-		case CPU_1074K:
-		case CPU_PROAPTIV:
-		case CPU_P5600:
-		case CPU_M5150:
-		case CPU_QEMU_GENERIC:
-			break;
-
-		default:
+	if (cpu_has_mips_r2_r6) {
+		if (cpu_has_mips_r2_exec_hazard)
 			uasm_i_ehb(p);
-			break;
-		}
 		tlbw(p);
 		return;
 	}
@@ -569,6 +563,7 @@ static void build_tlb_write_entry(u32 **p, struct uasm_label **l,
 	case CPU_R10000:
 	case CPU_R12000:
 	case CPU_R14000:
+	case CPU_R16000:
 	case CPU_4KC:
 	case CPU_4KEC:
 	case CPU_M14KC:
@@ -1027,12 +1022,27 @@ static void build_update_entries(u32 **p, unsigned int tmp, unsigned int ptep)
 	} else {
 		int pte_off_even = sizeof(pte_t) / 2;
 		int pte_off_odd = pte_off_even + sizeof(pte_t);
+#ifdef CONFIG_XPA
+		const int scratch = 1; /* Our extra working register */
 
-		/* The pte entries are pre-shifted */
-		uasm_i_lw(p, tmp, pte_off_even, ptep); /* get even pte */
-		UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */
-		uasm_i_lw(p, ptep, pte_off_odd, ptep); /* get odd pte */
-		UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */
+		uasm_i_addu(p, scratch, 0, ptep);
+#endif
+		uasm_i_lw(p, tmp, pte_off_even, ptep); /* even pte */
+		uasm_i_lw(p, ptep, pte_off_odd, ptep); /* odd pte */
+		UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL));
+		UASM_i_ROTR(p, ptep, ptep, ilog2(_PAGE_GLOBAL));
+		UASM_i_MTC0(p, tmp, C0_ENTRYLO0);
+		UASM_i_MTC0(p, ptep, C0_ENTRYLO1);
+#ifdef CONFIG_XPA
+		uasm_i_lw(p, tmp, 0, scratch);
+		uasm_i_lw(p, ptep, sizeof(pte_t), scratch);
+		uasm_i_lui(p, scratch, 0xff);
+		uasm_i_ori(p, scratch, scratch, 0xffff);
+		uasm_i_and(p, tmp, scratch, tmp);
+		uasm_i_and(p, ptep, scratch, ptep);
+		uasm_i_mthc0(p, tmp, C0_ENTRYLO0);
+		uasm_i_mthc0(p, ptep, C0_ENTRYLO1);
+#endif
 	}
 #else
 	UASM_i_LW(p, tmp, 0, ptep); /* get even pte */
@@ -1533,8 +1543,14 @@ iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr,
 {
 #ifdef CONFIG_PHYS_ADDR_T_64BIT
 	unsigned int hwmode = mode & (_PAGE_VALID | _PAGE_DIRTY);
-#endif
 
+	if (!cpu_has_64bits) {
+		const int scratch = 1; /* Our extra working register */
+
+		uasm_i_lui(p, scratch, (mode >> 16));
+		uasm_i_or(p, pte, pte, scratch);
+	} else
+#endif
 	uasm_i_ori(p, pte, pte, mode);
 #ifdef CONFIG_SMP
 # ifdef CONFIG_PHYS_ADDR_T_64BIT
@@ -1598,15 +1614,17 @@ build_pte_present(u32 **p, struct uasm_reloc **r,
 			uasm_il_bbit0(p, r, pte, ilog2(_PAGE_PRESENT), lid);
 			uasm_i_nop(p);
 		} else {
-			uasm_i_andi(p, t, pte, _PAGE_PRESENT);
+			uasm_i_srl(p, t, pte, _PAGE_PRESENT_SHIFT);
+			uasm_i_andi(p, t, t, 1);
 			uasm_il_beqz(p, r, t, lid);
 			if (pte == t)
 				/* You lose the SMP race :-(*/
 				iPTE_LW(p, pte, ptr);
 		}
 	} else {
-		uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_READ);
-		uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_READ);
+		uasm_i_srl(p, t, pte, _PAGE_PRESENT_SHIFT);
+		uasm_i_andi(p, t, t, 3);
+		uasm_i_xori(p, t, t, 3);
 		uasm_il_bnez(p, r, t, lid);
 		if (pte == t)
 			/* You lose the SMP race :-(*/
@@ -1635,8 +1653,9 @@ build_pte_writable(u32 **p, struct uasm_reloc **r,
 {
 	int t = scratch >= 0 ? scratch : pte;
 
-	uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_WRITE);
-	uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_WRITE);
+	uasm_i_srl(p, t, pte, _PAGE_PRESENT_SHIFT);
+	uasm_i_andi(p, t, t, 5);
+	uasm_i_xori(p, t, t, 5);
 	uasm_il_bnez(p, r, t, lid);
 	if (pte == t)
 		/* You lose the SMP race :-(*/
@@ -1672,7 +1691,8 @@ build_pte_modifiable(u32 **p, struct uasm_reloc **r,
 		uasm_i_nop(p);
 	} else {
 		int t = scratch >= 0 ? scratch : pte;
-		uasm_i_andi(p, t, pte, _PAGE_WRITE);
+		uasm_i_srl(p, t, pte, _PAGE_WRITE_SHIFT);
+		uasm_i_andi(p, t, t, 1);
 		uasm_il_beqz(p, r, t, lid);
 		if (pte == t)
 			/* You lose the SMP race :-(*/
@@ -2285,6 +2305,11 @@ static void config_htw_params(void)
 
 	pwsize = ilog2(PTRS_PER_PGD) << MIPS_PWSIZE_GDW_SHIFT;
 	pwsize |= ilog2(PTRS_PER_PTE) << MIPS_PWSIZE_PTW_SHIFT;
+
+	/* If XPA has been enabled, PTEs are 64-bit in size. */
+	if (read_c0_pagegrain() & PG_ELPA)
+		pwsize |= 1;
+
 	write_c0_pwsize(pwsize);
 
 	/* Make sure everything is set before we enable the HTW */
@@ -2298,6 +2323,28 @@ static void config_htw_params(void)
 	print_htw_config();
 }
 
+static void config_xpa_params(void)
+{
+#ifdef CONFIG_XPA
+	unsigned int pagegrain;
+
+	if (mips_xpa_disabled) {
+		pr_info("Extended Physical Addressing (XPA) disabled\n");
+		return;
+	}
+
+	pagegrain = read_c0_pagegrain();
+	write_c0_pagegrain(pagegrain | PG_ELPA);
+	back_to_back_c0_hazard();
+	pagegrain = read_c0_pagegrain();
+
+	if (pagegrain & PG_ELPA)
+		pr_info("Extended Physical Addressing (XPA) enabled\n");
+	else
+		panic("Extended Physical Addressing (XPA) disabled");
+#endif
+}
+
 void build_tlb_refill_handler(void)
 {
 	/*
@@ -2362,8 +2409,9 @@ void build_tlb_refill_handler(void)
 		}
 		if (cpu_has_local_ebase)
 			build_r4000_tlb_refill_handler();
+		if (cpu_has_xpa)
+			config_xpa_params();
 		if (cpu_has_htw)
 			config_htw_params();
-
 	}
 }
diff --git a/arch/mips/mti-malta/malta-init.c b/arch/mips/mti-malta/malta-init.c
index 6849f533154f..cec3e187c48f 100644
--- a/arch/mips/mti-malta/malta-init.c
+++ b/arch/mips/mti-malta/malta-init.c
@@ -14,7 +14,7 @@
 #include <linux/init.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
-#include <linux/serial_8250.h>
+#include <linux/serial_core.h>
 
 #include <asm/cacheflush.h>
 #include <asm/smp-ops.h>
@@ -75,7 +75,7 @@ static void __init console_config(void)
 	if ((strstr(fw_getcmdline(), "earlycon=")) == NULL) {
 		sprintf(console_string, "uart8250,io,0x3f8,%d%c%c", baud,
 			parity, bits);
-		setup_early_serial8250_console(console_string);
+		setup_earlycon(console_string);
 	}
 
 	if ((strstr(fw_getcmdline(), "console=")) == NULL) {
diff --git a/arch/mips/mti-malta/malta-memory.c b/arch/mips/mti-malta/malta-memory.c
index 8fddd2cdbff7..b769657be4d4 100644
--- a/arch/mips/mti-malta/malta-memory.c
+++ b/arch/mips/mti-malta/malta-memory.c
@@ -16,6 +16,7 @@
 #include <linux/string.h>
 
 #include <asm/bootinfo.h>
+#include <asm/cdmm.h>
 #include <asm/maar.h>
 #include <asm/sections.h>
 #include <asm/fw/fw.h>
@@ -53,6 +54,12 @@ fw_memblock_t * __init fw_getmdesc(int eva)
 		pr_warn("memsize not set in YAMON, set to default (32Mb)\n");
 		physical_memsize = 0x02000000;
 	} else {
+		if (memsize > (256 << 20)) { /* memsize should be capped to 256M */
+			pr_warn("Unsupported memsize value (0x%lx) detected! "
+				"Using 0x10000000 (256M) instead\n",
+				memsize);
+			memsize = 256 << 20;
+		}
 		/* If ememsize is set, then set physical_memsize to that */
 		physical_memsize = ememsize ? : memsize;
 	}
@@ -196,3 +203,9 @@ unsigned platform_maar_init(unsigned num_pairs)
 
 	return maar_config(cfg, num_cfg, num_pairs);
 }
+
+phys_addr_t mips_cdmm_phys_base(void)
+{
+	/* This address is "typically unused" */
+	return 0x1fc10000;
+}
diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c
index ce02dbdedc62..185e68261f45 100644
--- a/arch/mips/mti-malta/malta-time.c
+++ b/arch/mips/mti-malta/malta-time.c
@@ -87,8 +87,10 @@ static void __init estimate_frequencies(void)
 
 	/* Initialize counters. */
 	start = read_c0_count();
-	if (gic_present)
+	if (gic_present) {
+		gic_start_count();
 		gicstart = gic_read_count();
+	}
 
 	/* Read counter exactly on falling edge of update flag. */
 	while (CMOS_READ(RTC_REG_A) & RTC_UIP);
@@ -115,6 +117,22 @@ void read_persistent_clock(struct timespec *ts)
 	ts->tv_nsec = 0;
 }
 
+int get_c0_fdc_int(void)
+{
+	int mips_cpu_fdc_irq;
+
+	if (cpu_has_veic)
+		mips_cpu_fdc_irq = -1;
+	else if (gic_present)
+		mips_cpu_fdc_irq = gic_get_c0_fdc_int();
+	else if (cp0_fdc_irq >= 0)
+		mips_cpu_fdc_irq = MIPS_CPU_IRQ_BASE + cp0_fdc_irq;
+	else
+		mips_cpu_fdc_irq = -1;
+
+	return mips_cpu_fdc_irq;
+}
+
 int get_c0_perfcount_int(void)
 {
 	if (cpu_has_veic) {
diff --git a/arch/mips/mti-sead3/Makefile b/arch/mips/mti-sead3/Makefile
index 2ae49e99eb67..ecd71db6258b 100644
--- a/arch/mips/mti-sead3/Makefile
+++ b/arch/mips/mti-sead3/Makefile
@@ -9,14 +9,11 @@
 # Steven J. Hill <sjhill@mips.com>
 #
 obj-y				:= sead3-lcd.o sead3-display.o sead3-init.o \
-				   sead3-int.o sead3-mtd.o sead3-net.o \
-				   sead3-platform.o sead3-reset.o \
+				   sead3-int.o sead3-platform.o sead3-reset.o \
 				   sead3-setup.o sead3-time.o
 
-obj-y				+= sead3-i2c-dev.o sead3-i2c.o \
-				   leds-sead3.o sead3-leds.o
+obj-y				+= leds-sead3.o
 
 obj-$(CONFIG_EARLY_PRINTK)	+= sead3-console.o
-obj-$(CONFIG_USB_EHCI_HCD)	+= sead3-ehci.o
 
 CFLAGS_sead3-setup.o = -I$(src)/../../../scripts/dtc/libfdt
diff --git a/arch/mips/mti-sead3/leds-sead3.c b/arch/mips/mti-sead3/leds-sead3.c
index 3abe47b316aa..c938ceeb8848 100644
--- a/arch/mips/mti-sead3/leds-sead3.c
+++ b/arch/mips/mti-sead3/leds-sead3.c
@@ -4,6 +4,7 @@
  * for more details.
  *
  * Copyright (C) 2012 MIPS Technologies, Inc.  All rights reserved.
+ * Copyright (C) 2015 Imagination Technologies, Inc.
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -13,22 +14,18 @@
 #include <linux/err.h>
 #include <linux/io.h>
 
-#define DRVNAME "sead3-led"
-
-static struct platform_device *pdev;
+#include <asm/mips-boards/sead3-addr.h>
 
 static void sead3_pled_set(struct led_classdev *led_cdev,
 		enum led_brightness value)
 {
-	pr_debug("sead3_pled_set\n");
-	writel(value, (void __iomem *)0xBF000210);	/* FIXME */
+	writel(value, (void __iomem *)SEAD3_CPLD_P_LED);
 }
 
 static void sead3_fled_set(struct led_classdev *led_cdev,
 		enum led_brightness value)
 {
-	pr_debug("sead3_fled_set\n");
-	writel(value, (void __iomem *)0xBF000218);	/* FIXME */
+	writel(value, (void __iomem *)SEAD3_CPLD_F_LED);
 }
 
 static struct led_classdev sead3_pled = {
@@ -69,37 +66,11 @@ static struct platform_driver sead3_led_driver = {
 	.probe		= sead3_led_probe,
 	.remove		= sead3_led_remove,
 	.driver		= {
-		.name		= DRVNAME,
+		.name		= "sead3-led",
 	},
 };
 
-static int __init sead3_led_init(void)
-{
-	int ret;
-
-	ret = platform_driver_register(&sead3_led_driver);
-	if (ret < 0)
-		goto out;
-
-	pdev = platform_device_register_simple(DRVNAME, -1, NULL, 0);
-	if (IS_ERR(pdev)) {
-		ret = PTR_ERR(pdev);
-		platform_driver_unregister(&sead3_led_driver);
-		goto out;
-	}
-
-out:
-	return ret;
-}
-
-static void __exit sead3_led_exit(void)
-{
-	platform_device_unregister(pdev);
-	platform_driver_unregister(&sead3_led_driver);
-}
-
-module_init(sead3_led_init);
-module_exit(sead3_led_exit);
+module_platform_driver(sead3_led_driver);
 
 MODULE_AUTHOR("Kristian Kielhofner <kris@krisk.org>");
 MODULE_DESCRIPTION("SEAD3 LED driver");
diff --git a/arch/mips/mti-sead3/sead3-ehci.c b/arch/mips/mti-sead3/sead3-ehci.c
deleted file mode 100644
index 014dd7ba4d68..000000000000
--- a/arch/mips/mti-sead3/sead3-ehci.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 MIPS Technologies, Inc.  All rights reserved.
- */
-#include <linux/module.h>
-#include <linux/irq.h>
-#include <linux/dma-mapping.h>
-#include <linux/platform_device.h>
-#include <linux/irqchip/mips-gic.h>
-
-#include <asm/mips-boards/sead3int.h>
-
-struct resource ehci_resources[] = {
-	{
-		.start			= 0x1b200000,
-		.end			= 0x1b200fff,
-		.flags			= IORESOURCE_MEM
-	},
-	{
-		.flags			= IORESOURCE_IRQ
-	}
-};
-
-u64 sead3_usbdev_dma_mask = DMA_BIT_MASK(32);
-
-static struct platform_device ehci_device = {
-	.name		= "sead3-ehci",
-	.id		= 0,
-	.dev		= {
-		.dma_mask		= &sead3_usbdev_dma_mask,
-		.coherent_dma_mask	= DMA_BIT_MASK(32)
-	},
-	.num_resources	= ARRAY_SIZE(ehci_resources),
-	.resource	= ehci_resources
-};
-
-static int __init ehci_init(void)
-{
-	if (gic_present)
-		ehci_resources[1].start = MIPS_GIC_IRQ_BASE + GIC_INT_EHCI;
-	else
-		ehci_resources[1].start = MIPS_CPU_IRQ_BASE + CPU_INT_EHCI;
-	return platform_device_register(&ehci_device);
-}
-
-module_init(ehci_init);
-
-MODULE_AUTHOR("Chris Dearman <chris@mips.com>");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("EHCI probe driver for SEAD3");
diff --git a/arch/mips/mti-sead3/sead3-i2c-dev.c b/arch/mips/mti-sead3/sead3-i2c-dev.c
deleted file mode 100644
index eca0b53a71dd..000000000000
--- a/arch/mips/mti-sead3/sead3-i2c-dev.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 MIPS Technologies, Inc.  All rights reserved.
- */
-#include <linux/init.h>
-#include <linux/i2c.h>
-
-static struct i2c_board_info __initdata sead3_i2c_devices[] = {
-	{
-		I2C_BOARD_INFO("adt7476", 0x2c),
-		.irq = 0,
-	},
-	{
-		I2C_BOARD_INFO("m41t80", 0x68),
-		.irq = 0,
-	},
-};
-
-static int __init sead3_i2c_init(void)
-{
-	int err;
-
-	err = i2c_register_board_info(0, sead3_i2c_devices,
-					ARRAY_SIZE(sead3_i2c_devices));
-	if (err < 0)
-		pr_err("sead3-i2c-dev: cannot register board I2C devices\n");
-	return err;
-}
-
-arch_initcall(sead3_i2c_init);
diff --git a/arch/mips/mti-sead3/sead3-i2c-drv.c b/arch/mips/mti-sead3/sead3-i2c-drv.c
deleted file mode 100644
index 2bebf0974e39..000000000000
--- a/arch/mips/mti-sead3/sead3-i2c-drv.c
+++ /dev/null
@@ -1,404 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 MIPS Technologies, Inc.  All rights reserved.
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/i2c.h>
-#include <linux/platform_device.h>
-
-#define PIC32_I2CxCON		0x0000
-#define	 PIC32_I2CCON_ON	(1<<15)
-#define	 PIC32_I2CCON_ACKDT	(1<<5)
-#define	 PIC32_I2CCON_ACKEN	(1<<4)
-#define	 PIC32_I2CCON_RCEN	(1<<3)
-#define	 PIC32_I2CCON_PEN	(1<<2)
-#define	 PIC32_I2CCON_RSEN	(1<<1)
-#define	 PIC32_I2CCON_SEN	(1<<0)
-#define PIC32_I2CxCONCLR	0x0004
-#define PIC32_I2CxCONSET	0x0008
-#define PIC32_I2CxSTAT		0x0010
-#define PIC32_I2CxSTATCLR	0x0014
-#define	 PIC32_I2CSTAT_ACKSTAT	(1<<15)
-#define	 PIC32_I2CSTAT_TRSTAT	(1<<14)
-#define	 PIC32_I2CSTAT_BCL	(1<<10)
-#define	 PIC32_I2CSTAT_IWCOL	(1<<7)
-#define	 PIC32_I2CSTAT_I2COV	(1<<6)
-#define PIC32_I2CxBRG		0x0040
-#define PIC32_I2CxTRN		0x0050
-#define PIC32_I2CxRCV		0x0060
-
-static DEFINE_SPINLOCK(pic32_bus_lock);
-
-static void __iomem *bus_xfer	= (void __iomem *)0xbf000600;
-static void __iomem *bus_status = (void __iomem *)0xbf000060;
-
-#define DELAY() udelay(100)
-
-static inline unsigned int ioready(void)
-{
-	return readl(bus_status) & 1;
-}
-
-static inline void wait_ioready(void)
-{
-	do { } while (!ioready());
-}
-
-static inline void wait_ioclear(void)
-{
-	do { } while (ioready());
-}
-
-static inline void check_ioclear(void)
-{
-	if (ioready()) {
-		do {
-			(void) readl(bus_xfer);
-			DELAY();
-		} while (ioready());
-	}
-}
-
-static u32 pic32_bus_readl(u32 reg)
-{
-	unsigned long flags;
-	u32 status, val;
-
-	spin_lock_irqsave(&pic32_bus_lock, flags);
-
-	check_ioclear();
-	writel((0x01 << 24) | (reg & 0x00ffffff), bus_xfer);
-	DELAY();
-	wait_ioready();
-	status = readl(bus_xfer);
-	DELAY();
-	val = readl(bus_xfer);
-	wait_ioclear();
-
-	spin_unlock_irqrestore(&pic32_bus_lock, flags);
-
-	return val;
-}
-
-static void pic32_bus_writel(u32 val, u32 reg)
-{
-	unsigned long flags;
-	u32 status;
-
-	spin_lock_irqsave(&pic32_bus_lock, flags);
-
-	check_ioclear();
-	writel((0x10 << 24) | (reg & 0x00ffffff), bus_xfer);
-	DELAY();
-	writel(val, bus_xfer);
-	DELAY();
-	wait_ioready();
-	status = readl(bus_xfer);
-	wait_ioclear();
-
-	spin_unlock_irqrestore(&pic32_bus_lock, flags);
-}
-
-struct pic32_i2c_platform_data {
-	u32	base;
-	struct i2c_adapter adap;
-	u32	xfer_timeout;
-	u32	ack_timeout;
-	u32	ctl_timeout;
-};
-
-static inline void pic32_i2c_start(struct pic32_i2c_platform_data *adap)
-{
-	pic32_bus_writel(PIC32_I2CCON_SEN, adap->base + PIC32_I2CxCONSET);
-}
-
-static inline void pic32_i2c_stop(struct pic32_i2c_platform_data *adap)
-{
-	pic32_bus_writel(PIC32_I2CCON_PEN, adap->base + PIC32_I2CxCONSET);
-}
-
-static inline void pic32_i2c_ack(struct pic32_i2c_platform_data *adap)
-{
-	pic32_bus_writel(PIC32_I2CCON_ACKDT, adap->base + PIC32_I2CxCONCLR);
-	pic32_bus_writel(PIC32_I2CCON_ACKEN, adap->base + PIC32_I2CxCONSET);
-}
-
-static inline void pic32_i2c_nack(struct pic32_i2c_platform_data *adap)
-{
-	pic32_bus_writel(PIC32_I2CCON_ACKDT, adap->base + PIC32_I2CxCONSET);
-	pic32_bus_writel(PIC32_I2CCON_ACKEN, adap->base + PIC32_I2CxCONSET);
-}
-
-static inline int pic32_i2c_idle(struct pic32_i2c_platform_data *adap)
-{
-	int i;
-
-	for (i = 0; i < adap->ctl_timeout; i++) {
-		if (((pic32_bus_readl(adap->base + PIC32_I2CxCON) &
-		      (PIC32_I2CCON_ACKEN | PIC32_I2CCON_RCEN |
-		       PIC32_I2CCON_PEN | PIC32_I2CCON_RSEN |
-		       PIC32_I2CCON_SEN)) == 0) &&
-		    ((pic32_bus_readl(adap->base + PIC32_I2CxSTAT) &
-		      (PIC32_I2CSTAT_TRSTAT)) == 0))
-			return 0;
-		udelay(1);
-	}
-	return -ETIMEDOUT;
-}
-
-static inline u32 pic32_i2c_master_write(struct pic32_i2c_platform_data *adap,
-		u32 byte)
-{
-	pic32_bus_writel(byte, adap->base + PIC32_I2CxTRN);
-	return pic32_bus_readl(adap->base + PIC32_I2CxSTAT) &
-			PIC32_I2CSTAT_IWCOL;
-}
-
-static inline u32 pic32_i2c_master_read(struct pic32_i2c_platform_data *adap)
-{
-	pic32_bus_writel(PIC32_I2CCON_RCEN, adap->base + PIC32_I2CxCONSET);
-	while (pic32_bus_readl(adap->base + PIC32_I2CxCON) & PIC32_I2CCON_RCEN)
-		;
-	pic32_bus_writel(PIC32_I2CSTAT_I2COV, adap->base + PIC32_I2CxSTATCLR);
-	return pic32_bus_readl(adap->base + PIC32_I2CxRCV);
-}
-
-static int pic32_i2c_address(struct pic32_i2c_platform_data *adap,
-		unsigned int addr, int rd)
-{
-	pic32_i2c_idle(adap);
-	pic32_i2c_start(adap);
-	pic32_i2c_idle(adap);
-
-	addr <<= 1;
-	if (rd)
-		addr |= 1;
-
-	if (pic32_i2c_master_write(adap, addr))
-		return -EIO;
-	pic32_i2c_idle(adap);
-	if (pic32_bus_readl(adap->base + PIC32_I2CxSTAT) &
-			PIC32_I2CSTAT_ACKSTAT)
-		return -EIO;
-	return 0;
-}
-
-static int sead3_i2c_read(struct pic32_i2c_platform_data *adap,
-		unsigned char *buf, unsigned int len)
-{
-	u32 data;
-	int i;
-
-	i = 0;
-	while (i < len) {
-		data = pic32_i2c_master_read(adap);
-		buf[i++] = data;
-		if (i < len)
-			pic32_i2c_ack(adap);
-		else
-			pic32_i2c_nack(adap);
-	}
-
-	pic32_i2c_stop(adap);
-	pic32_i2c_idle(adap);
-	return 0;
-}
-
-static int sead3_i2c_write(struct pic32_i2c_platform_data *adap,
-		unsigned char *buf, unsigned int len)
-{
-	int i;
-	u32 data;
-
-	i = 0;
-	while (i < len) {
-		data = buf[i];
-		if (pic32_i2c_master_write(adap, data))
-			return -EIO;
-		pic32_i2c_idle(adap);
-		if (pic32_bus_readl(adap->base + PIC32_I2CxSTAT) &
-					PIC32_I2CSTAT_ACKSTAT)
-			return -EIO;
-		i++;
-	}
-
-	pic32_i2c_stop(adap);
-	pic32_i2c_idle(adap);
-	return 0;
-}
-
-static int sead3_pic32_platform_xfer(struct i2c_adapter *i2c_adap,
-		struct i2c_msg *msgs, int num)
-{
-	struct pic32_i2c_platform_data *adap = i2c_adap->algo_data;
-	struct i2c_msg *p;
-	int i, err = 0;
-
-	for (i = 0; i < num; i++) {
-#define __BUFSIZE 80
-		int ii;
-		static char buf[__BUFSIZE];
-		char *b = buf;
-
-		p = &msgs[i];
-		b += sprintf(buf, " [%d bytes]", p->len);
-		if ((p->flags & I2C_M_RD) == 0) {
-			for (ii = 0; ii < p->len; ii++) {
-				if (b < &buf[__BUFSIZE-4]) {
-					b += sprintf(b, " %02x", p->buf[ii]);
-				} else {
-					strcat(b, "...");
-					break;
-				}
-			}
-		}
-	}
-
-	for (i = 0; !err && i < num; i++) {
-		p = &msgs[i];
-		err = pic32_i2c_address(adap, p->addr, p->flags & I2C_M_RD);
-		if (err || !p->len)
-			continue;
-		if (p->flags & I2C_M_RD)
-			err = sead3_i2c_read(adap, p->buf, p->len);
-		else
-			err = sead3_i2c_write(adap, p->buf, p->len);
-	}
-
-	/* Return the number of messages processed, or the error code. */
-	if (err == 0)
-		err = num;
-
-	return err;
-}
-
-static u32 sead3_pic32_platform_func(struct i2c_adapter *adap)
-{
-	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
-}
-
-static const struct i2c_algorithm sead3_platform_algo = {
-	.master_xfer	= sead3_pic32_platform_xfer,
-	.functionality	= sead3_pic32_platform_func,
-};
-
-static void sead3_i2c_platform_setup(struct pic32_i2c_platform_data *priv)
-{
-	pic32_bus_writel(500, priv->base + PIC32_I2CxBRG);
-	pic32_bus_writel(PIC32_I2CCON_ON, priv->base + PIC32_I2CxCONCLR);
-	pic32_bus_writel(PIC32_I2CCON_ON, priv->base + PIC32_I2CxCONSET);
-	pic32_bus_writel(PIC32_I2CSTAT_BCL | PIC32_I2CSTAT_IWCOL,
-		priv->base + PIC32_I2CxSTATCLR);
-}
-
-static int sead3_i2c_platform_probe(struct platform_device *pdev)
-{
-	struct pic32_i2c_platform_data *priv;
-	struct resource *r;
-	int ret;
-
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!r) {
-		ret = -ENODEV;
-		goto out;
-	}
-
-	priv = kzalloc(sizeof(struct pic32_i2c_platform_data), GFP_KERNEL);
-	if (!priv) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	priv->base = r->start;
-	if (!priv->base) {
-		ret = -EBUSY;
-		goto out_mem;
-	}
-
-	priv->xfer_timeout = 200;
-	priv->ack_timeout = 200;
-	priv->ctl_timeout = 200;
-
-	priv->adap.nr = pdev->id;
-	priv->adap.algo = &sead3_platform_algo;
-	priv->adap.algo_data = priv;
-	priv->adap.dev.parent = &pdev->dev;
-	strlcpy(priv->adap.name, "SEAD3 PIC32", sizeof(priv->adap.name));
-
-	sead3_i2c_platform_setup(priv);
-
-	ret = i2c_add_numbered_adapter(&priv->adap);
-	if (ret == 0) {
-		platform_set_drvdata(pdev, priv);
-		return 0;
-	}
-
-out_mem:
-	kfree(priv);
-out:
-	return ret;
-}
-
-static int sead3_i2c_platform_remove(struct platform_device *pdev)
-{
-	struct pic32_i2c_platform_data *priv = platform_get_drvdata(pdev);
-
-	platform_set_drvdata(pdev, NULL);
-	i2c_del_adapter(&priv->adap);
-	kfree(priv);
-	return 0;
-}
-
-#ifdef CONFIG_PM
-static int sead3_i2c_platform_suspend(struct platform_device *pdev,
-		pm_message_t state)
-{
-	dev_dbg(&pdev->dev, "i2c_platform_disable\n");
-	return 0;
-}
-
-static int sead3_i2c_platform_resume(struct platform_device *pdev)
-{
-	struct pic32_i2c_platform_data *priv = platform_get_drvdata(pdev);
-
-	dev_dbg(&pdev->dev, "sead3_i2c_platform_setup\n");
-	sead3_i2c_platform_setup(priv);
-
-	return 0;
-}
-#else
-#define sead3_i2c_platform_suspend	NULL
-#define sead3_i2c_platform_resume	NULL
-#endif
-
-static struct platform_driver sead3_i2c_platform_driver = {
-	.driver = {
-		.name	= "sead3-i2c",
-	},
-	.probe		= sead3_i2c_platform_probe,
-	.remove		= sead3_i2c_platform_remove,
-	.suspend	= sead3_i2c_platform_suspend,
-	.resume		= sead3_i2c_platform_resume,
-};
-
-static int __init sead3_i2c_platform_init(void)
-{
-	return platform_driver_register(&sead3_i2c_platform_driver);
-}
-module_init(sead3_i2c_platform_init);
-
-static void __exit sead3_i2c_platform_exit(void)
-{
-	platform_driver_unregister(&sead3_i2c_platform_driver);
-}
-module_exit(sead3_i2c_platform_exit);
-
-MODULE_AUTHOR("Chris Dearman, MIPS Technologies INC.");
-MODULE_DESCRIPTION("SEAD3 PIC32 I2C driver");
-MODULE_LICENSE("GPL");
diff --git a/arch/mips/mti-sead3/sead3-i2c.c b/arch/mips/mti-sead3/sead3-i2c.c
deleted file mode 100644
index 795ae83894e0..000000000000
--- a/arch/mips/mti-sead3/sead3-i2c.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 MIPS Technologies, Inc.  All rights reserved.
- */
-#include <linux/init.h>
-#include <linux/platform_device.h>
-
-struct resource sead3_i2c_resources[] = {
-	{
-		.start	= 0x805200,
-		.end	= 0x8053ff,
-		.flags	= IORESOURCE_MEM,
-	},
-};
-
-static struct platform_device sead3_i2c_device = {
-	.name		= "sead3-i2c",
-	.id		= 0,
-	.num_resources	= ARRAY_SIZE(sead3_i2c_resources),
-	.resource	= sead3_i2c_resources,
-};
-
-static int __init sead3_i2c_init(void)
-{
-	return platform_device_register(&sead3_i2c_device);
-}
-
-device_initcall(sead3_i2c_init);
diff --git a/arch/mips/mti-sead3/sead3-init.c b/arch/mips/mti-sead3/sead3-init.c
index bfbd17b120a2..3572ea30173e 100644
--- a/arch/mips/mti-sead3/sead3-init.c
+++ b/arch/mips/mti-sead3/sead3-init.c
@@ -147,6 +147,6 @@ void __init prom_init(void)
 #endif
 }
 
-void prom_free_prom_memory(void)
+void __init prom_free_prom_memory(void)
 {
 }
diff --git a/arch/mips/mti-sead3/sead3-leds.c b/arch/mips/mti-sead3/sead3-leds.c
deleted file mode 100644
index c427c5778186..000000000000
--- a/arch/mips/mti-sead3/sead3-leds.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 MIPS Technologies, Inc.  All rights reserved.
- */
-#include <linux/init.h>
-#include <linux/leds.h>
-#include <linux/platform_device.h>
-
-#define LEDFLAGS(bits, shift)		\
-	((bits << 8) | (shift << 8))
-
-#define LEDBITS(id, shift, bits)	\
-	.name = id #shift,		\
-	.flags = LEDFLAGS(bits, shift)
-
-struct led_info led_data_info[] = {
-	{ LEDBITS("bit", 0, 1) },
-	{ LEDBITS("bit", 1, 1) },
-	{ LEDBITS("bit", 2, 1) },
-	{ LEDBITS("bit", 3, 1) },
-	{ LEDBITS("bit", 4, 1) },
-	{ LEDBITS("bit", 5, 1) },
-	{ LEDBITS("bit", 6, 1) },
-	{ LEDBITS("bit", 7, 1) },
-	{ LEDBITS("all", 0, 8) },
-};
-
-static struct led_platform_data led_data = {
-	.num_leds	= ARRAY_SIZE(led_data_info),
-	.leds		= led_data_info
-};
-
-static struct resource pled_resources[] = {
-	{
-		.start	= 0x1f000210,
-		.end	= 0x1f000217,
-		.flags	= IORESOURCE_MEM
-	}
-};
-
-static struct platform_device pled_device = {
-	.name			= "sead3::pled",
-	.id			= 0,
-	.dev			= {
-		.platform_data	= &led_data,
-	},
-	.num_resources		= ARRAY_SIZE(pled_resources),
-	.resource		= pled_resources
-};
-
-
-static struct resource fled_resources[] = {
-	{
-		.start			= 0x1f000218,
-		.end			= 0x1f00021f,
-		.flags			= IORESOURCE_MEM
-	}
-};
-
-static struct platform_device fled_device = {
-	.name			= "sead3::fled",
-	.id			= 0,
-	.dev			= {
-		.platform_data	= &led_data,
-	},
-	.num_resources		= ARRAY_SIZE(fled_resources),
-	.resource		= fled_resources
-};
-
-static int __init led_init(void)
-{
-	platform_device_register(&pled_device);
-	return platform_device_register(&fled_device);
-}
-
-device_initcall(led_init);
diff --git a/arch/mips/mti-sead3/sead3-mtd.c b/arch/mips/mti-sead3/sead3-mtd.c
deleted file mode 100644
index f9c890d72677..000000000000
--- a/arch/mips/mti-sead3/sead3-mtd.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 MIPS Technologies, Inc.  All rights reserved.
- */
-#include <linux/init.h>
-#include <linux/platform_device.h>
-#include <linux/mtd/physmap.h>
-
-static struct mtd_partition sead3_mtd_partitions[] = {
-	{
-		.name =		"User FS",
-		.offset =	0x00000000,
-		.size =		0x01fc0000,
-	}, {
-		.name =		"Board Config",
-		.offset =	0x01fc0000,
-		.size =		0x00040000,
-		.mask_flags =	MTD_WRITEABLE
-	},
-};
-
-static struct physmap_flash_data sead3_flash_data = {
-	.width		= 4,
-	.nr_parts	= ARRAY_SIZE(sead3_mtd_partitions),
-	.parts		= sead3_mtd_partitions
-};
-
-static struct resource sead3_flash_resource = {
-	.start		= 0x1c000000,
-	.end		= 0x1dffffff,
-	.flags		= IORESOURCE_MEM
-};
-
-static struct platform_device sead3_flash = {
-	.name		= "physmap-flash",
-	.id		= 0,
-	.dev		= {
-		.platform_data	= &sead3_flash_data,
-	},
-	.num_resources	= 1,
-	.resource	= &sead3_flash_resource,
-};
-
-static int __init sead3_mtd_init(void)
-{
-	platform_device_register(&sead3_flash);
-
-	return 0;
-}
-device_initcall(sead3_mtd_init);
diff --git a/arch/mips/mti-sead3/sead3-net.c b/arch/mips/mti-sead3/sead3-net.c
deleted file mode 100644
index 46176b804576..000000000000
--- a/arch/mips/mti-sead3/sead3-net.c
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 MIPS Technologies, Inc.  All rights reserved.
- */
-#include <linux/module.h>
-#include <linux/irq.h>
-#include <linux/irqchip/mips-gic.h>
-#include <linux/platform_device.h>
-#include <linux/smsc911x.h>
-
-#include <asm/mips-boards/sead3int.h>
-
-static struct smsc911x_platform_config sead3_smsc911x_data = {
-	.irq_polarity = SMSC911X_IRQ_POLARITY_ACTIVE_LOW,
-	.irq_type = SMSC911X_IRQ_TYPE_PUSH_PULL,
-	.flags	= SMSC911X_USE_32BIT | SMSC911X_SAVE_MAC_ADDRESS,
-	.phy_interface = PHY_INTERFACE_MODE_MII,
-};
-
-struct resource sead3_net_resources[] = {
-	{
-		.start			= 0x1f010000,
-		.end			= 0x1f01ffff,
-		.flags			= IORESOURCE_MEM
-	},
-	{
-		.flags			= IORESOURCE_IRQ
-	}
-};
-
-static struct platform_device sead3_net_device = {
-	.name			= "smsc911x",
-	.id			= 0,
-	.dev			= {
-		.platform_data	= &sead3_smsc911x_data,
-	},
-	.num_resources		= ARRAY_SIZE(sead3_net_resources),
-	.resource		= sead3_net_resources
-};
-
-static int __init sead3_net_init(void)
-{
-	if (gic_present)
-		sead3_net_resources[1].start = MIPS_GIC_IRQ_BASE + GIC_INT_NET;
-	else
-		sead3_net_resources[1].start = MIPS_CPU_IRQ_BASE + CPU_INT_NET;
-	return platform_device_register(&sead3_net_device);
-}
-
-module_init(sead3_net_init);
-
-MODULE_AUTHOR("Chris Dearman <chris@mips.com>");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Network probe driver for SEAD-3");
diff --git a/arch/mips/mti-sead3/sead3-platform.c b/arch/mips/mti-sead3/sead3-platform.c
index 53ee6f1f018d..73b73efbfb05 100644
--- a/arch/mips/mti-sead3/sead3-platform.c
+++ b/arch/mips/mti-sead3/sead3-platform.c
@@ -5,10 +5,15 @@
  *
  * Copyright (C) 2012 MIPS Technologies, Inc.  All rights reserved.
  */
-#include <linux/module.h>
+#include <linux/dma-mapping.h>
 #include <linux/init.h>
+#include <linux/irq.h>
 #include <linux/irqchip/mips-gic.h>
+#include <linux/leds.h>
+#include <linux/mtd/physmap.h>
+#include <linux/platform_device.h>
 #include <linux/serial_8250.h>
+#include <linux/smsc911x.h>
 
 #include <asm/mips-boards/sead3int.h>
 
@@ -36,20 +41,183 @@ static struct platform_device uart8250_device = {
 	},
 };
 
-static int __init uart8250_init(void)
+static struct smsc911x_platform_config sead3_smsc911x_data = {
+	.irq_polarity	= SMSC911X_IRQ_POLARITY_ACTIVE_LOW,
+	.irq_type	= SMSC911X_IRQ_TYPE_PUSH_PULL,
+	.flags		= SMSC911X_USE_32BIT | SMSC911X_SAVE_MAC_ADDRESS,
+	.phy_interface	= PHY_INTERFACE_MODE_MII,
+};
+
+static struct resource sead3_net_resources[] = {
+	{
+		.start			= 0x1f010000,
+		.end			= 0x1f01ffff,
+		.flags			= IORESOURCE_MEM
+	}, {
+		.flags			= IORESOURCE_IRQ
+	}
+};
+
+static struct platform_device sead3_net_device = {
+	.name			= "smsc911x",
+	.id			= 0,
+	.dev			= {
+		.platform_data	= &sead3_smsc911x_data,
+	},
+	.num_resources		= ARRAY_SIZE(sead3_net_resources),
+	.resource		= sead3_net_resources
+};
+
+static struct mtd_partition sead3_mtd_partitions[] = {
+	{
+		.name =		"User FS",
+		.offset =	0x00000000,
+		.size =		0x01fc0000,
+	}, {
+		.name =		"Board Config",
+		.offset =	0x01fc0000,
+		.size =		0x00040000,
+		.mask_flags =	MTD_WRITEABLE
+	},
+};
+
+static struct physmap_flash_data sead3_flash_data = {
+	.width		= 4,
+	.nr_parts	= ARRAY_SIZE(sead3_mtd_partitions),
+	.parts		= sead3_mtd_partitions
+};
+
+static struct resource sead3_flash_resource = {
+	.start		= 0x1c000000,
+	.end		= 0x1dffffff,
+	.flags		= IORESOURCE_MEM
+};
+
+static struct platform_device sead3_flash = {
+	.name		= "physmap-flash",
+	.id		= 0,
+	.dev		= {
+		.platform_data	= &sead3_flash_data,
+	},
+	.num_resources	= 1,
+	.resource	= &sead3_flash_resource,
+};
+
+#define LEDFLAGS(bits, shift)		\
+	((bits << 8) | (shift << 8))
+
+#define LEDBITS(id, shift, bits)	\
+	.name = id #shift,		\
+	.flags = LEDFLAGS(bits, shift)
+
+static struct led_info led_data_info[] = {
+	{ LEDBITS("bit", 0, 1) },
+	{ LEDBITS("bit", 1, 1) },
+	{ LEDBITS("bit", 2, 1) },
+	{ LEDBITS("bit", 3, 1) },
+	{ LEDBITS("bit", 4, 1) },
+	{ LEDBITS("bit", 5, 1) },
+	{ LEDBITS("bit", 6, 1) },
+	{ LEDBITS("bit", 7, 1) },
+	{ LEDBITS("all", 0, 8) },
+};
+
+static struct led_platform_data led_data = {
+	.num_leds	= ARRAY_SIZE(led_data_info),
+	.leds		= led_data_info
+};
+
+static struct resource pled_resources[] = {
+	{
+		.start	= 0x1f000210,
+		.end	= 0x1f000217,
+		.flags	= IORESOURCE_MEM
+	}
+};
+
+static struct platform_device pled_device = {
+	.name			= "sead3::pled",
+	.id			= 0,
+	.dev			= {
+		.platform_data	= &led_data,
+	},
+	.num_resources		= ARRAY_SIZE(pled_resources),
+	.resource		= pled_resources
+};
+
+
+static struct resource fled_resources[] = {
+	{
+		.start			= 0x1f000218,
+		.end			= 0x1f00021f,
+		.flags			= IORESOURCE_MEM
+	}
+};
+
+static struct platform_device fled_device = {
+	.name			= "sead3::fled",
+	.id			= 0,
+	.dev			= {
+		.platform_data	= &led_data,
+	},
+	.num_resources		= ARRAY_SIZE(fled_resources),
+	.resource		= fled_resources
+};
+
+static struct platform_device sead3_led_device = {
+        .name   = "sead3-led",
+        .id     = -1,
+};
+
+static struct resource ehci_resources[] = {
+	{
+		.start			= 0x1b200000,
+		.end			= 0x1b200fff,
+		.flags			= IORESOURCE_MEM
+	}, {
+		.flags			= IORESOURCE_IRQ
+	}
+};
+
+static u64 sead3_usbdev_dma_mask = DMA_BIT_MASK(32);
+
+static struct platform_device ehci_device = {
+	.name		= "sead3-ehci",
+	.id		= 0,
+	.dev		= {
+		.dma_mask		= &sead3_usbdev_dma_mask,
+		.coherent_dma_mask	= DMA_BIT_MASK(32)
+	},
+	.num_resources	= ARRAY_SIZE(ehci_resources),
+	.resource	= ehci_resources
+};
+
+static struct platform_device *sead3_platform_devices[] __initdata = {
+	&uart8250_device,
+	&sead3_flash,
+	&pled_device,
+	&fled_device,
+	&sead3_led_device,
+	&ehci_device,
+	&sead3_net_device,
+};
+
+static int __init sead3_platforms_device_init(void)
 {
 	if (gic_present) {
 		uart8250_data[0].irq = MIPS_GIC_IRQ_BASE + GIC_INT_UART0;
 		uart8250_data[1].irq = MIPS_GIC_IRQ_BASE + GIC_INT_UART1;
+		ehci_resources[1].start = MIPS_GIC_IRQ_BASE + GIC_INT_EHCI;
+		sead3_net_resources[1].start = MIPS_GIC_IRQ_BASE + GIC_INT_NET;
 	} else {
 		uart8250_data[0].irq = MIPS_CPU_IRQ_BASE + CPU_INT_UART0;
 		uart8250_data[1].irq = MIPS_CPU_IRQ_BASE + CPU_INT_UART1;
+		ehci_resources[1].start = MIPS_CPU_IRQ_BASE + CPU_INT_EHCI;
+		sead3_net_resources[1].start = MIPS_CPU_IRQ_BASE + CPU_INT_NET;
 	}
-	return platform_device_register(&uart8250_device);
-}
 
-module_init(uart8250_init);
+	return platform_add_devices(sead3_platform_devices,
+				    ARRAY_SIZE(sead3_platform_devices));
+}
 
-MODULE_AUTHOR("Chris Dearman <chris@mips.com>");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("8250 UART probe driver for SEAD3");
+device_initcall(sead3_platforms_device_init);
diff --git a/arch/mips/netlogic/Kconfig b/arch/mips/netlogic/Kconfig
index 0823321c10e0..fb00606e352d 100644
--- a/arch/mips/netlogic/Kconfig
+++ b/arch/mips/netlogic/Kconfig
@@ -41,6 +41,15 @@ config DT_XLP_GVP
 	  pointer to the kernel.  The corresponding DTS file is at
 	  arch/mips/netlogic/dts/xlp_gvp.dts
 
+config DT_XLP_RVP
+	bool "Built-in device tree for XLP RVP boards"
+	default y
+	help
+	  Add an FDT blob for XLP RVP board into the kernel.
+	  This DTB will be used if the firmware does not pass in a DTB
+	  pointer to the kernel.  The corresponding DTS file is at
+	  arch/mips/netlogic/dts/xlp_rvp.dts
+
 config NLM_MULTINODE
 	bool "Support for multi-chip boards"
 	depends on NLM_XLP_BOARD
diff --git a/arch/mips/netlogic/common/irq.c b/arch/mips/netlogic/common/irq.c
index c100b9afa0ab..5f5d18b0e94d 100644
--- a/arch/mips/netlogic/common/irq.c
+++ b/arch/mips/netlogic/common/irq.c
@@ -230,16 +230,16 @@ static void nlm_init_node_irqs(int node)
 	}
 }
 
-void nlm_smp_irq_init(int hwcpuid)
+void nlm_smp_irq_init(int hwtid)
 {
-	int node, cpu;
+	int cpu, node;
 
-	node = nlm_cpuid_to_node(hwcpuid);
-	cpu  = hwcpuid % nlm_threads_per_node();
+	cpu = hwtid % nlm_threads_per_node();
+	node = hwtid / nlm_threads_per_node();
 
 	if (cpu == 0 && node != 0)
 		nlm_init_node_irqs(node);
-	write_c0_eimr(nlm_current_node()->irqmask);
+	write_c0_eimr(nlm_get_node(node)->irqmask);
 }
 
 asmlinkage void plat_irq_dispatch(void)
diff --git a/arch/mips/netlogic/common/reset.S b/arch/mips/netlogic/common/reset.S
index 701c4bcb9e47..edbab9b8691f 100644
--- a/arch/mips/netlogic/common/reset.S
+++ b/arch/mips/netlogic/common/reset.S
@@ -60,7 +60,7 @@
 	li	t0, LSU_DEFEATURE
 	mfcr	t1, t0
 
-	lui	t2, 0xc080	/* SUE, Enable Unaligned Access, L2HPE */
+	lui	t2, 0x4080	/* Enable Unaligned Access, L2HPE */
 	or	t1, t1, t2
 	mtcr	t1, t0
 
@@ -235,6 +235,26 @@ EXPORT(nlm_boot_siblings)
 	mfc0	v0, CP0_EBASE, 1
 	andi	v0, 0x3ff		/* v0 <- node/core */
 
+	/*
+	 * Errata: to avoid potential live lock, setup IFU_BRUB_RESERVE
+	 * when running 4 threads per core
+	 */
+	andi	v1, v0, 0x3             /* v1 <- thread id */
+	bnez	v1, 2f
+	nop
+
+	/* thread 0 of each core. */
+	li	t0, CKSEG1ADDR(RESET_DATA_PHYS)
+	lw	t1, BOOT_THREAD_MODE(t0)        /* t1 <- thread mode */
+	subu	t1, 0x3				/* 4-thread per core mode? */
+	bnez	t1, 2f
+	nop
+
+	li	t0, IFU_BRUB_RESERVE
+	li	t1, 0x55
+	mtcr	t1, t0
+	_ehb
+2:
 	beqz	v0, 4f		/* boot cpu (cpuid == 0)? */
 	nop
 
diff --git a/arch/mips/netlogic/common/smp.c b/arch/mips/netlogic/common/smp.c
index e743bdd6e20c..dc3e327fbbac 100644
--- a/arch/mips/netlogic/common/smp.c
+++ b/arch/mips/netlogic/common/smp.c
@@ -59,17 +59,17 @@
 
 void nlm_send_ipi_single(int logical_cpu, unsigned int action)
 {
-	int cpu, node;
+	unsigned int hwtid;
 	uint64_t picbase;
 
-	cpu = cpu_logical_map(logical_cpu);
-	node = nlm_cpuid_to_node(cpu);
-	picbase = nlm_get_node(node)->picbase;
+	/* node id is part of hwtid, and needed for send_ipi */
+	hwtid = cpu_logical_map(logical_cpu);
+	picbase = nlm_get_node(nlm_hwtid_to_node(hwtid))->picbase;
 
 	if (action & SMP_CALL_FUNCTION)
-		nlm_pic_send_ipi(picbase, cpu, IRQ_IPI_SMP_FUNCTION, 0);
+		nlm_pic_send_ipi(picbase, hwtid, IRQ_IPI_SMP_FUNCTION, 0);
 	if (action & SMP_RESCHEDULE_YOURSELF)
-		nlm_pic_send_ipi(picbase, cpu, IRQ_IPI_SMP_RESCHEDULE, 0);
+		nlm_pic_send_ipi(picbase, hwtid, IRQ_IPI_SMP_RESCHEDULE, 0);
 }
 
 void nlm_send_ipi_mask(const struct cpumask *mask, unsigned int action)
@@ -120,6 +120,7 @@ static void nlm_init_secondary(void)
 
 	hwtid = hard_smp_processor_id();
 	current_cpu_data.core = hwtid / NLM_THREADS_PER_CORE;
+	current_cpu_data.package = nlm_nodeid();
 	nlm_percpu_init(hwtid);
 	nlm_smp_irq_init(hwtid);
 }
@@ -145,16 +146,18 @@ static cpumask_t phys_cpu_present_mask;
 
 void nlm_boot_secondary(int logical_cpu, struct task_struct *idle)
 {
-	int cpu, node;
+	uint64_t picbase;
+	int hwtid;
+
+	hwtid = cpu_logical_map(logical_cpu);
+	picbase = nlm_get_node(nlm_hwtid_to_node(hwtid))->picbase;
 
-	cpu = cpu_logical_map(logical_cpu);
-	node = nlm_cpuid_to_node(logical_cpu);
 	nlm_next_sp = (unsigned long)__KSTK_TOS(idle);
 	nlm_next_gp = (unsigned long)task_thread_info(idle);
 
 	/* barrier for sp/gp store above */
 	__sync();
-	nlm_pic_send_ipi(nlm_get_node(node)->picbase, cpu, 1, 1);  /* NMI */
+	nlm_pic_send_ipi(picbase, hwtid, 1, 1);  /* NMI */
 }
 
 void __init nlm_smp_setup(void)
@@ -182,7 +185,7 @@ void __init nlm_smp_setup(void)
 			__cpu_number_map[i] = num_cpus;
 			__cpu_logical_map[num_cpus] = i;
 			set_cpu_possible(num_cpus, true);
-			node = nlm_cpuid_to_node(i);
+			node = nlm_hwtid_to_node(i);
 			cpumask_set_cpu(num_cpus, &nlm_get_node(node)->cpumask);
 			++num_cpus;
 		}
diff --git a/arch/mips/netlogic/common/time.c b/arch/mips/netlogic/common/time.c
index 0c0a1a606f73..5873c83e65be 100644
--- a/arch/mips/netlogic/common/time.c
+++ b/arch/mips/netlogic/common/time.c
@@ -40,7 +40,6 @@
 #include <asm/netlogic/interrupt.h>
 #include <asm/netlogic/common.h>
 #include <asm/netlogic/haldefs.h>
-#include <asm/netlogic/common.h>
 
 #if defined(CONFIG_CPU_XLP)
 #include <asm/netlogic/xlp-hal/iomap.h>
diff --git a/arch/mips/netlogic/xlp/ahci-init-xlp2.c b/arch/mips/netlogic/xlp/ahci-init-xlp2.c
index c83dbf3689e2..7b066a44e679 100644
--- a/arch/mips/netlogic/xlp/ahci-init-xlp2.c
+++ b/arch/mips/netlogic/xlp/ahci-init-xlp2.c
@@ -203,6 +203,7 @@ static u8 read_phy_reg(u64 regbase, u32 addr, u32 physel)
 static void config_sata_phy(u64 regbase)
 {
 	u32 port, i, reg;
+	u8 val;
 
 	for (port = 0; port < 2; port++) {
 		for (i = 0, reg = RXCDRCALFOSC0; reg <= CALDUTY; reg++, i++)
@@ -210,6 +211,18 @@ static void config_sata_phy(u64 regbase)
 
 		for (i = 0, reg = RXDPIF; reg <= PPMDRIFTMAX_HI; reg++, i++)
 			write_phy_reg(regbase, reg, port, sata_phy_config2[i]);
+
+		/* Fix for PHY link up failures at lower temperatures */
+		write_phy_reg(regbase, 0x800F, port, 0x1f);
+
+		val = read_phy_reg(regbase, 0x0029, port);
+		write_phy_reg(regbase, 0x0029, port, val | (0x7 << 1));
+
+		val = read_phy_reg(regbase, 0x0056, port);
+		write_phy_reg(regbase, 0x0056, port, val & ~(1 << 3));
+
+		val = read_phy_reg(regbase, 0x0018, port);
+		write_phy_reg(regbase, 0x0018, port, val & ~(0x7 << 0));
 	}
 }
 
diff --git a/arch/mips/netlogic/xlp/ahci-init.c b/arch/mips/netlogic/xlp/ahci-init.c
index a9d0fae02103..92be1a3258b1 100644
--- a/arch/mips/netlogic/xlp/ahci-init.c
+++ b/arch/mips/netlogic/xlp/ahci-init.c
@@ -151,7 +151,7 @@ static void nlm_sata_firmware_init(int node)
 static int __init nlm_ahci_init(void)
 {
 	int node = 0;
-	int chip = read_c0_prid() & PRID_REV_MASK;
+	int chip = read_c0_prid() & PRID_IMP_MASK;
 
 	if (chip == PRID_IMP_NETLOGIC_XLP3XX)
 		nlm_sata_firmware_init(node);
diff --git a/arch/mips/netlogic/xlp/dt.c b/arch/mips/netlogic/xlp/dt.c
index 7cc46032b28e..a625bdb6d6aa 100644
--- a/arch/mips/netlogic/xlp/dt.c
+++ b/arch/mips/netlogic/xlp/dt.c
@@ -41,17 +41,21 @@
 
 #include <asm/prom.h>
 
-extern u32 __dtb_xlp_evp_begin[], __dtb_xlp_svp_begin[],
-	__dtb_xlp_fvp_begin[], __dtb_xlp_gvp_begin[];
+extern u32 __dtb_xlp_evp_begin[], __dtb_xlp_svp_begin[], __dtb_xlp_fvp_begin[],
+	__dtb_xlp_gvp_begin[], __dtb_xlp_rvp_begin[];
 static void *xlp_fdt_blob;
 
 void __init *xlp_dt_init(void *fdtp)
 {
 	if (!fdtp) {
 		switch (current_cpu_data.processor_id & PRID_IMP_MASK) {
+#ifdef CONFIG_DT_XLP_RVP
+		case PRID_IMP_NETLOGIC_XLP5XX:
+			fdtp = __dtb_xlp_rvp_begin;
+			break;
+#endif
 #ifdef CONFIG_DT_XLP_GVP
 		case PRID_IMP_NETLOGIC_XLP9XX:
-		case PRID_IMP_NETLOGIC_XLP5XX:
 			fdtp = __dtb_xlp_gvp_begin;
 			break;
 #endif
diff --git a/arch/mips/netlogic/xlp/nlm_hal.c b/arch/mips/netlogic/xlp/nlm_hal.c
index bc24beb3a426..a8f4144a0297 100644
--- a/arch/mips/netlogic/xlp/nlm_hal.c
+++ b/arch/mips/netlogic/xlp/nlm_hal.c
@@ -71,10 +71,20 @@ static int xlp9xx_irq_to_irt(int irq)
 	switch (irq) {
 	case PIC_GPIO_IRQ:
 		return 12;
+	case PIC_I2C_0_IRQ:
+		return 125;
+	case PIC_I2C_1_IRQ:
+		return 126;
+	case PIC_I2C_2_IRQ:
+		return 127;
+	case PIC_I2C_3_IRQ:
+		return 128;
 	case PIC_9XX_XHCI_0_IRQ:
 		return 114;
 	case PIC_9XX_XHCI_1_IRQ:
 		return 115;
+	case PIC_9XX_XHCI_2_IRQ:
+		return 116;
 	case PIC_UART_0_IRQ:
 		return 133;
 	case PIC_UART_1_IRQ:
@@ -170,16 +180,23 @@ static int xlp_irq_to_irt(int irq)
 	}
 
 	if (devoff != 0) {
+		uint32_t val;
+
 		pcibase = nlm_pcicfg_base(devoff);
-		irt = nlm_read_reg(pcibase, XLP_PCI_IRTINFO_REG) & 0xffff;
-		/* HW weirdness, I2C IRT entry has to be fixed up */
-		switch (irq) {
-		case PIC_I2C_1_IRQ:
-			irt = irt + 1; break;
-		case PIC_I2C_2_IRQ:
-			irt = irt + 2; break;
-		case PIC_I2C_3_IRQ:
-			irt = irt + 3; break;
+		val = nlm_read_reg(pcibase, XLP_PCI_IRTINFO_REG);
+		if (val == 0xffffffff) {
+			irt = -1;
+		} else {
+			irt = val & 0xffff;
+			/* HW weirdness, I2C IRT entry has to be fixed up */
+			switch (irq) {
+			case PIC_I2C_1_IRQ:
+				irt = irt + 1; break;
+			case PIC_I2C_2_IRQ:
+				irt = irt + 2; break;
+			case PIC_I2C_3_IRQ:
+				irt = irt + 3; break;
+			}
 		}
 	} else if (irq >= PIC_PCIE_LINK_LEGACY_IRQ(0) &&
 			irq <= PIC_PCIE_LINK_LEGACY_IRQ(3)) {
@@ -325,7 +342,7 @@ static unsigned int nlm_xlp2_get_pic_frequency(int node)
 	/* Find the clock source PLL device for PIC */
 	if (cpu_xlp9xx) {
 		reg_select = nlm_read_sys_reg(clockbase,
-				SYS_9XX_CLK_DEV_SEL) & 0x3;
+				SYS_9XX_CLK_DEV_SEL_REG) & 0x3;
 		switch (reg_select) {
 		case 0:
 			ctrl_val0 = nlm_read_sys_reg(clockbase,
@@ -354,7 +371,7 @@ static unsigned int nlm_xlp2_get_pic_frequency(int node)
 		}
 	} else {
 		reg_select = (nlm_read_sys_reg(sysbase,
-					SYS_CLK_DEV_SEL) >> 22) & 0x3;
+					SYS_CLK_DEV_SEL_REG) >> 22) & 0x3;
 		switch (reg_select) {
 		case 0:
 			ctrl_val0 = nlm_read_sys_reg(sysbase,
@@ -410,7 +427,7 @@ static unsigned int nlm_xlp2_get_pic_frequency(int node)
 
 	fdiv = fdiv/(1 << 13);
 	pll_out_freq_num = ((ref_clk >> 1) * (6 + mdiv)) + fdiv;
-	pll_out_freq_den = (1 << vco_post_div) * pll_post_div * 3;
+	pll_out_freq_den = (1 << vco_post_div) * pll_post_div * ref_div;
 
 	if (pll_out_freq_den > 0)
 		do_div(pll_out_freq_num, pll_out_freq_den);
@@ -418,10 +435,10 @@ static unsigned int nlm_xlp2_get_pic_frequency(int node)
 	/* PIC post divider, which happens after PLL */
 	if (cpu_xlp9xx)
 		pic_div = nlm_read_sys_reg(clockbase,
-				SYS_9XX_CLK_DEV_DIV) & 0x3;
+				SYS_9XX_CLK_DEV_DIV_REG) & 0x3;
 	else
 		pic_div = (nlm_read_sys_reg(sysbase,
-					SYS_CLK_DEV_DIV) >> 22) & 0x3;
+					SYS_CLK_DEV_DIV_REG) >> 22) & 0x3;
 	do_div(pll_out_freq_num, 1 << pic_div);
 
 	return pll_out_freq_num;
@@ -442,19 +459,21 @@ unsigned int nlm_get_cpu_frequency(void)
 
 /*
  * Fills upto 8 pairs of entries containing the DRAM map of a node
- * if n < 0, get dram map for all nodes
+ * if node < 0, get dram map for all nodes
  */
-int xlp_get_dram_map(int n, uint64_t *dram_map)
+int nlm_get_dram_map(int node, uint64_t *dram_map, int nentries)
 {
 	uint64_t bridgebase, base, lim;
 	uint32_t val;
 	unsigned int barreg, limreg, xlatreg;
-	int i, node, rv;
+	int i, n, rv;
 
 	/* Look only at mapping on Node 0, we don't handle crazy configs */
 	bridgebase = nlm_get_bridge_regbase(0);
 	rv = 0;
 	for (i = 0; i < 8; i++) {
+		if (rv + 1 >= nentries)
+			break;
 		if (cpu_is_xlp9xx()) {
 			barreg = BRIDGE_9XX_DRAM_BAR(i);
 			limreg = BRIDGE_9XX_DRAM_LIMIT(i);
@@ -464,10 +483,10 @@ int xlp_get_dram_map(int n, uint64_t *dram_map)
 			limreg = BRIDGE_DRAM_LIMIT(i);
 			xlatreg = BRIDGE_DRAM_NODE_TRANSLN(i);
 		}
-		if (n >= 0) {
+		if (node >= 0) {
 			/* node specified, get node mapping of BAR */
 			val = nlm_read_bridge_reg(bridgebase, xlatreg);
-			node = (val >> 1) & 0x3;
+			n = (val >> 1) & 0x3;
 			if (n != node)
 				continue;
 		}
diff --git a/arch/mips/netlogic/xlp/setup.c b/arch/mips/netlogic/xlp/setup.c
index 4fdd9fd29d1d..f743fd9da323 100644
--- a/arch/mips/netlogic/xlp/setup.c
+++ b/arch/mips/netlogic/xlp/setup.c
@@ -51,7 +51,6 @@ uint64_t nlm_io_base;
 struct nlm_soc_info nlm_nodes[NLM_NR_NODES];
 cpumask_t nlm_cpumask = CPU_MASK_CPU0;
 unsigned int nlm_threads_per_core;
-unsigned int xlp_cores_per_node;
 
 static void nlm_linux_exit(void)
 {
@@ -82,7 +81,7 @@ static void __init xlp_init_mem_from_bars(void)
 	uint64_t map[16];
 	int i, n;
 
-	n = xlp_get_dram_map(-1, map);	/* -1: info for all nodes */
+	n = nlm_get_dram_map(-1, map, ARRAY_SIZE(map));	/* -1 : all nodes */
 	for (i = 0; i < n; i += 2) {
 		/* exclude 0x1000_0000-0x2000_0000, u-boot device */
 		if (map[i] <= 0x10000000 && map[i+1] > 0x10000000)
@@ -163,10 +162,6 @@ void __init prom_init(void)
 	void *reset_vec;
 
 	nlm_io_base = CKSEG1ADDR(XLP_DEFAULT_IO_BASE);
-	if (cpu_is_xlp9xx())
-		xlp_cores_per_node = 32;
-	else
-		xlp_cores_per_node = 8;
 	nlm_init_boot_cpu();
 	xlp_mmu_init();
 	nlm_node_init(0);
diff --git a/arch/mips/netlogic/xlp/usb-init-xlp2.c b/arch/mips/netlogic/xlp/usb-init-xlp2.c
index 17ade1ce5dfd..2524939a5e3a 100644
--- a/arch/mips/netlogic/xlp/usb-init-xlp2.c
+++ b/arch/mips/netlogic/xlp/usb-init-xlp2.c
@@ -128,6 +128,9 @@ static void xlp9xx_usb_ack(struct irq_data *data)
 	case PIC_9XX_XHCI_1_IRQ:
 		port_addr = nlm_xlpii_get_usb_regbase(node, 2);
 		break;
+	case PIC_9XX_XHCI_2_IRQ:
+		port_addr = nlm_xlpii_get_usb_regbase(node, 3);
+		break;
 	default:
 		pr_err("No matching USB irq %d node  %d!\n", irq, node);
 		return;
@@ -222,14 +225,16 @@ static int __init nlm_platform_xlpii_usb_init(void)
 	}
 
 	/* XLP 9XX, multi-node */
-	pr_info("Initializing 9XX USB Interface\n");
+	pr_info("Initializing 9XX/5XX USB Interface\n");
 	for (node = 0; node < NLM_NR_NODES; node++) {
 		if (!nlm_node_present(node))
 			continue;
 		nlm_xlpii_usb_hw_reset(node, 1);
 		nlm_xlpii_usb_hw_reset(node, 2);
+		nlm_xlpii_usb_hw_reset(node, 3);
 		nlm_set_pic_extra_ack(node, PIC_9XX_XHCI_0_IRQ, xlp9xx_usb_ack);
 		nlm_set_pic_extra_ack(node, PIC_9XX_XHCI_1_IRQ, xlp9xx_usb_ack);
+		nlm_set_pic_extra_ack(node, PIC_9XX_XHCI_2_IRQ, xlp9xx_usb_ack);
 	}
 	return 0;
 }
@@ -253,6 +258,9 @@ static void nlm_xlp9xx_usb_fixup_final(struct pci_dev *dev)
 	case 0x22:
 		dev->irq = nlm_irq_to_xirq(node, PIC_9XX_XHCI_1_IRQ);
 		break;
+	case 0x23:
+		dev->irq = nlm_irq_to_xirq(node, PIC_9XX_XHCI_2_IRQ);
+		break;
 	}
 }
 
diff --git a/arch/mips/netlogic/xlp/wakeup.c b/arch/mips/netlogic/xlp/wakeup.c
index e5f44d2605a8..87d7846af2d0 100644
--- a/arch/mips/netlogic/xlp/wakeup.c
+++ b/arch/mips/netlogic/xlp/wakeup.c
@@ -99,7 +99,7 @@ static int wait_for_cpus(int cpu, int bootcpu)
 	do {
 		notready = nlm_threads_per_core;
 		for (i = 0; i < nlm_threads_per_core; i++)
-			if (cpu_ready[cpu + i] || cpu == bootcpu)
+			if (cpu_ready[cpu + i] || (cpu + i) == bootcpu)
 				--notready;
 	} while (notready != 0 && --count > 0);
 
@@ -111,7 +111,7 @@ static void xlp_enable_secondary_cores(const cpumask_t *wakeup_mask)
 	struct nlm_soc_info *nodep;
 	uint64_t syspcibase, fusebase;
 	uint32_t syscoremask, mask, fusemask;
-	int core, n, cpu;
+	int core, n, cpu, ncores;
 
 	for (n = 0; n < NLM_NR_NODES; n++) {
 		if (n != 0) {
@@ -168,7 +168,8 @@ static void xlp_enable_secondary_cores(const cpumask_t *wakeup_mask)
 		syscoremask = (1 << hweight32(~fusemask & mask)) - 1;
 
 		pr_info("Node %d - SYS/FUSE coremask %x\n", n, syscoremask);
-		for (core = 0; core < nlm_cores_per_node(); core++) {
+		ncores = nlm_cores_per_node();
+		for (core = 0; core < ncores; core++) {
 			/* we will be on node 0 core 0 */
 			if (n == 0 && core == 0)
 				continue;
@@ -178,8 +179,7 @@ static void xlp_enable_secondary_cores(const cpumask_t *wakeup_mask)
 				continue;
 
 			/* see if at least the first hw thread is enabled */
-			cpu = (n * nlm_cores_per_node() + core)
-						* NLM_THREADS_PER_CORE;
+			cpu = (n * ncores + core) * NLM_THREADS_PER_CORE;
 			if (!cpumask_test_cpu(cpu, wakeup_mask))
 				continue;
 
diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c
index a26cbe372e06..81f58958cf08 100644
--- a/arch/mips/oprofile/common.c
+++ b/arch/mips/oprofile/common.c
@@ -98,6 +98,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 	case CPU_R10000:
 	case CPU_R12000:
 	case CPU_R14000:
+	case CPU_R16000:
 	case CPU_XLR:
 		lmodel = &op_model_mipsxx_ops;
 		break;
diff --git a/arch/mips/oprofile/op_model_mipsxx.c b/arch/mips/oprofile/op_model_mipsxx.c
index 01f721a85c5b..6a6e2cc55b89 100644
--- a/arch/mips/oprofile/op_model_mipsxx.c
+++ b/arch/mips/oprofile/op_model_mipsxx.c
@@ -246,7 +246,7 @@ static int mipsxx_perfcount_handler(void)
 	unsigned int counter;
 	int handled = IRQ_NONE;
 
-	if (cpu_has_mips_r2 && !(read_c0_cause() & (1 << 26)))
+	if (cpu_has_mips_r2 && !(read_c0_cause() & CAUSEF_PCI))
 		return handled;
 
 	switch (counters) {
@@ -296,6 +296,7 @@ static inline int n_counters(void)
 
 	case CPU_R12000:
 	case CPU_R14000:
+	case CPU_R16000:
 		counters = 4;
 		break;
 
@@ -411,6 +412,10 @@ static int __init mipsxx_init(void)
 		op_model_mipsxx_ops.cpu_type = "mips/r12000";
 		break;
 
+	case CPU_R16000:
+		op_model_mipsxx_ops.cpu_type = "mips/r16000";
+		break;
+
 	case CPU_SB1:
 	case CPU_SB1A:
 		op_model_mipsxx_ops.cpu_type = "mips/sb1";
@@ -435,15 +440,17 @@ static int __init mipsxx_init(void)
 
 	if (get_c0_perfcount_int)
 		perfcount_irq = get_c0_perfcount_int();
-	else if ((cp0_perfcount_irq >= 0) &&
-		 (cp0_compare_irq != cp0_perfcount_irq))
+	else if (cp0_perfcount_irq >= 0)
 		perfcount_irq = MIPS_CPU_IRQ_BASE + cp0_perfcount_irq;
 	else
 		perfcount_irq = -1;
 
 	if (perfcount_irq >= 0)
 		return request_irq(perfcount_irq, mipsxx_perfcount_int,
-			0, "Perfcounter", save_perf_irq);
+				   IRQF_PERCPU | IRQF_NOBALANCING |
+				   IRQF_NO_THREAD | IRQF_NO_SUSPEND |
+				   IRQF_SHARED,
+				   "Perfcounter", save_perf_irq);
 
 	return 0;
 }
diff --git a/arch/mips/paravirt/paravirt-smp.c b/arch/mips/paravirt/paravirt-smp.c
index 0164b0c48352..42181c7105df 100644
--- a/arch/mips/paravirt/paravirt-smp.c
+++ b/arch/mips/paravirt/paravirt-smp.c
@@ -75,7 +75,7 @@ static void paravirt_send_ipi_mask(const struct cpumask *mask, unsigned int acti
 {
 	unsigned int cpu;
 
-	for_each_cpu_mask(cpu, *mask)
+	for_each_cpu(cpu, mask)
 		paravirt_send_ipi_single(cpu, action);
 }
 
diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile
index 300591c6278d..2eda01e6e08f 100644
--- a/arch/mips/pci/Makefile
+++ b/arch/mips/pci/Makefile
@@ -43,7 +43,7 @@ obj-$(CONFIG_SIBYTE_BCM1x80)	+= pci-bcm1480.o pci-bcm1480ht.o
 obj-$(CONFIG_SNI_RM)		+= fixup-sni.o ops-sni.o
 obj-$(CONFIG_LANTIQ)		+= fixup-lantiq.o
 obj-$(CONFIG_PCI_LANTIQ)	+= pci-lantiq.o ops-lantiq.o
-obj-$(CONFIG_SOC_RT2880)	+= pci-rt2880.o
+obj-$(CONFIG_SOC_RT288X)	+= pci-rt2880.o
 obj-$(CONFIG_SOC_RT3883)	+= pci-rt3883.o
 obj-$(CONFIG_TANBAC_TB0219)	+= fixup-tb0219.o
 obj-$(CONFIG_TANBAC_TB0226)	+= fixup-tb0226.o
diff --git a/arch/mips/pci/msi-xlp.c b/arch/mips/pci/msi-xlp.c
index 6a40f24c91b4..3407495fcbe2 100644
--- a/arch/mips/pci/msi-xlp.c
+++ b/arch/mips/pci/msi-xlp.c
@@ -178,13 +178,6 @@ static void xlp_msi_mask_ack(struct irq_data *d)
 	else
 		nlm_write_reg(md->lnkbase, PCIE_MSI_STATUS, 1u << vec);
 
-	/* Ack at eirr and PIC */
-	ack_c0_eirr(PIC_PCIE_LINK_MSI_IRQ(link));
-	if (cpu_is_xlp9xx())
-		nlm_pic_ack(md->node->picbase,
-				PIC_9XX_IRT_PCIE_LINK_INDEX(link));
-	else
-		nlm_pic_ack(md->node->picbase, PIC_IRT_PCIE_LINK_INDEX(link));
 }
 
 static struct irq_chip xlp_msi_chip = {
@@ -230,8 +223,6 @@ static void xlp_msix_mask_ack(struct irq_data *d)
 	}
 	nlm_write_reg(md->lnkbase, status_reg, 1u << bit);
 
-	/* Ack at eirr and PIC */
-	ack_c0_eirr(PIC_PCIE_MSIX_IRQ(link));
 	if (!cpu_is_xlp9xx())
 		nlm_pic_ack(md->node->picbase,
 				PIC_IRT_PCIE_MSIX_INDEX(msixvec));
@@ -541,6 +532,14 @@ void nlm_dispatch_msi(int node, int lirq)
 		do_IRQ(irqbase + i);
 		status &= status - 1;
 	}
+
+	/* Ack at eirr and PIC */
+	ack_c0_eirr(PIC_PCIE_LINK_MSI_IRQ(link));
+	if (cpu_is_xlp9xx())
+		nlm_pic_ack(md->node->picbase,
+				PIC_9XX_IRT_PCIE_LINK_INDEX(link));
+	else
+		nlm_pic_ack(md->node->picbase, PIC_IRT_PCIE_LINK_INDEX(link));
 }
 
 void nlm_dispatch_msix(int node, int lirq)
@@ -567,4 +566,6 @@ void nlm_dispatch_msix(int node, int lirq)
 		do_IRQ(irqbase + i);
 		status &= status - 1;
 	}
+	/* Ack at eirr and PIC */
+	ack_c0_eirr(PIC_PCIE_MSIX_IRQ(link));
 }
diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c
index bd2b3b60da83..07a18228e63a 100644
--- a/arch/mips/pci/pci-ar2315.c
+++ b/arch/mips/pci/pci-ar2315.c
@@ -488,7 +488,6 @@ static struct platform_driver ar2315_pci_driver = {
 	.probe = ar2315_pci_probe,
 	.driver = {
 		.name = "ar2315-pci",
-		.owner = THIS_MODULE,
 	},
 };
 
diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c
index a04af55d89f1..c258cd406fbb 100644
--- a/arch/mips/pci/pci-octeon.c
+++ b/arch/mips/pci/pci-octeon.c
@@ -214,6 +214,8 @@ const char *octeon_get_pci_interrupts(void)
 		return "AAABAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
 	case CVMX_BOARD_TYPE_BBGW_REF:
 		return "AABCD";
+	case CVMX_BOARD_TYPE_CUST_DSR1000N:
+		return "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC";
 	case CVMX_BOARD_TYPE_THUNDER:
 	case CVMX_BOARD_TYPE_EBH3000:
 	default:
@@ -271,9 +273,6 @@ static int octeon_read_config(struct pci_bus *bus, unsigned int devfn,
 	pci_addr.s.func = devfn & 0x7;
 	pci_addr.s.reg = reg;
 
-#if PCI_CONFIG_SPACE_DELAY
-	udelay(PCI_CONFIG_SPACE_DELAY);
-#endif
 	switch (size) {
 	case 4:
 		*val = le32_to_cpu(cvmx_read64_uint32(pci_addr.u64));
@@ -308,9 +307,6 @@ static int octeon_write_config(struct pci_bus *bus, unsigned int devfn,
 	pci_addr.s.func = devfn & 0x7;
 	pci_addr.s.reg = reg;
 
-#if PCI_CONFIG_SPACE_DELAY
-	udelay(PCI_CONFIG_SPACE_DELAY);
-#endif
 	switch (size) {
 	case 4:
 		cvmx_write64_uint32(pci_addr.u64, cpu_to_le32(val));
diff --git a/arch/mips/pci/pci-rt2880.c b/arch/mips/pci/pci-rt2880.c
index a4574947e698..8a978022630b 100644
--- a/arch/mips/pci/pci-rt2880.c
+++ b/arch/mips/pci/pci-rt2880.c
@@ -267,7 +267,6 @@ static struct platform_driver rt288x_pci_driver = {
 	.probe = rt288x_pci_probe,
 	.driver = {
 		.name = "rt288x-pci",
-		.owner = THIS_MODULE,
 		.of_match_table = rt288x_pci_match,
 	},
 };
diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c
index 1bf60b127377..b8a0bf5766f2 100644
--- a/arch/mips/pci/pci.c
+++ b/arch/mips/pci/pci.c
@@ -91,30 +91,35 @@ static void pcibios_scanbus(struct pci_controller *hose)
 
 	pci_add_resource_offset(&resources,
 				hose->mem_resource, hose->mem_offset);
-	pci_add_resource_offset(&resources, hose->io_resource, hose->io_offset);
+	pci_add_resource_offset(&resources,
+				hose->io_resource, hose->io_offset);
+	pci_add_resource_offset(&resources,
+				hose->busn_resource, hose->busn_offset);
 	bus = pci_scan_root_bus(NULL, next_busno, hose->pci_ops, hose,
 				&resources);
-	if (!bus)
-		pci_free_resource_list(&resources);
-
 	hose->bus = bus;
 
 	need_domain_info = need_domain_info || hose->index;
 	hose->need_domain_info = need_domain_info;
-	if (bus) {
-		next_busno = bus->busn_res.end + 1;
-		/* Don't allow 8-bit bus number overflow inside the hose -
-		   reserve some space for bridges. */
-		if (next_busno > 224) {
-			next_busno = 0;
-			need_domain_info = 1;
-		}
 
-		if (!pci_has_flag(PCI_PROBE_ONLY)) {
-			pci_bus_size_bridges(bus);
-			pci_bus_assign_resources(bus);
-		}
+	if (!bus) {
+		pci_free_resource_list(&resources);
+		return;
+	}
+
+	next_busno = bus->busn_res.end + 1;
+	/* Don't allow 8-bit bus number overflow inside the hose -
+	   reserve some space for bridges. */
+	if (next_busno > 224) {
+		next_busno = 0;
+		need_domain_info = 1;
+	}
+
+	if (!pci_has_flag(PCI_PROBE_ONLY)) {
+		pci_bus_size_bridges(bus);
+		pci_bus_assign_resources(bus);
 	}
+	pci_bus_add_devices(bus);
 }
 
 #ifdef CONFIG_OF
diff --git a/arch/mips/pci/pcie-octeon.c b/arch/mips/pci/pcie-octeon.c
index 1bb0b2bf8d6e..99f3db4f0a9b 100644
--- a/arch/mips/pci/pcie-octeon.c
+++ b/arch/mips/pci/pcie-octeon.c
@@ -1762,14 +1762,6 @@ static int octeon_pcie_write_config(unsigned int pcie_port, struct pci_bus *bus,
 	default:
 		return PCIBIOS_FUNC_NOT_SUPPORTED;
 	}
-#if PCI_CONFIG_SPACE_DELAY
-	/*
-	 * Delay on writes so that devices have time to come up. Some
-	 * bridges need this to allow time for the secondary busses to
-	 * work
-	 */
-	udelay(PCI_CONFIG_SPACE_DELAY);
-#endif
 	return PCIBIOS_SUCCESSFUL;
 }
 
diff --git a/arch/mips/pistachio/Makefile b/arch/mips/pistachio/Makefile
new file mode 100644
index 000000000000..32189c6ebea5
--- /dev/null
+++ b/arch/mips/pistachio/Makefile
@@ -0,0 +1 @@
+obj-y	+= init.o irq.o time.o
diff --git a/arch/mips/pistachio/Platform b/arch/mips/pistachio/Platform
new file mode 100644
index 000000000000..d80cd612df1f
--- /dev/null
+++ b/arch/mips/pistachio/Platform
@@ -0,0 +1,8 @@
+#
+# IMG Pistachio SoC
+#
+platform-$(CONFIG_MACH_PISTACHIO)	+= pistachio/
+cflags-$(CONFIG_MACH_PISTACHIO)		+=				\
+		-I$(srctree)/arch/mips/include/asm/mach-pistachio
+load-$(CONFIG_MACH_PISTACHIO)		+= 0xffffffff80400000
+zload-$(CONFIG_MACH_PISTACHIO)		+= 0xffffffff81000000
diff --git a/arch/mips/pistachio/init.c b/arch/mips/pistachio/init.c
new file mode 100644
index 000000000000..d2dc836523a3
--- /dev/null
+++ b/arch/mips/pistachio/init.c
@@ -0,0 +1,131 @@
+/*
+ * Pistachio platform setup
+ *
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+
+#include <asm/cacheflush.h>
+#include <asm/dma-coherence.h>
+#include <asm/fw/fw.h>
+#include <asm/mips-boards/generic.h>
+#include <asm/mips-cm.h>
+#include <asm/mips-cpc.h>
+#include <asm/prom.h>
+#include <asm/smp-ops.h>
+#include <asm/traps.h>
+
+const char *get_system_type(void)
+{
+	return "IMG Pistachio SoC";
+}
+
+static void __init plat_setup_iocoherency(void)
+{
+	/*
+	 * Kernel has been configured with software coherency
+	 * but we might choose to turn it off and use hardware
+	 * coherency instead.
+	 */
+	if (mips_cm_numiocu() != 0) {
+		/* Nothing special needs to be done to enable coherency */
+		pr_info("CMP IOCU detected\n");
+		hw_coherentio = 1;
+		if (coherentio == 0)
+			pr_info("Hardware DMA cache coherency disabled\n");
+		else
+			pr_info("Hardware DMA cache coherency enabled\n");
+	} else {
+		if (coherentio == 1)
+			pr_info("Hardware DMA cache coherency unsupported, but enabled from command line!\n");
+		else
+			pr_info("Software DMA cache coherency enabled\n");
+	}
+}
+
+void __init plat_mem_setup(void)
+{
+	if (fw_arg0 != -2)
+		panic("Device-tree not present");
+
+	__dt_setup_arch((void *)fw_arg1);
+	strlcpy(arcs_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+
+	plat_setup_iocoherency();
+}
+
+#define DEFAULT_CPC_BASE_ADDR 0x1bde0000
+
+phys_addr_t mips_cpc_default_phys_base(void)
+{
+	return DEFAULT_CPC_BASE_ADDR;
+}
+
+static void __init mips_nmi_setup(void)
+{
+	void *base;
+	extern char except_vec_nmi;
+
+	base = cpu_has_veic ?
+		(void *)(CAC_BASE + 0xa80) :
+		(void *)(CAC_BASE + 0x380);
+	memcpy(base, &except_vec_nmi, 0x80);
+	flush_icache_range((unsigned long)base,
+			   (unsigned long)base + 0x80);
+}
+
+static void __init mips_ejtag_setup(void)
+{
+	void *base;
+	extern char except_vec_ejtag_debug;
+
+	base = cpu_has_veic ?
+		(void *)(CAC_BASE + 0xa00) :
+		(void *)(CAC_BASE + 0x300);
+	memcpy(base, &except_vec_ejtag_debug, 0x80);
+	flush_icache_range((unsigned long)base,
+			   (unsigned long)base + 0x80);
+}
+
+void __init prom_init(void)
+{
+	board_nmi_handler_setup = mips_nmi_setup;
+	board_ejtag_handler_setup = mips_ejtag_setup;
+
+	mips_cm_probe();
+	mips_cpc_probe();
+	register_cps_smp_ops();
+}
+
+void __init prom_free_prom_memory(void)
+{
+}
+
+void __init device_tree_init(void)
+{
+	if (!initial_boot_params)
+		return;
+
+	unflatten_and_copy_device_tree();
+}
+
+static int __init plat_of_setup(void)
+{
+	if (!of_have_populated_dt())
+		panic("Device tree not present");
+
+	if (of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL))
+		panic("Failed to populate DT");
+
+	return 0;
+}
+arch_initcall(plat_of_setup);
diff --git a/arch/mips/pistachio/irq.c b/arch/mips/pistachio/irq.c
new file mode 100644
index 000000000000..0a6b24c24652
--- /dev/null
+++ b/arch/mips/pistachio/irq.c
@@ -0,0 +1,28 @@
+/*
+ * Pistachio IRQ setup
+ *
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/mips-gic.h>
+#include <linux/kernel.h>
+
+#include <asm/cpu-features.h>
+#include <asm/irq_cpu.h>
+
+void __init arch_init_irq(void)
+{
+	pr_info("EIC is %s\n", cpu_has_veic ? "on" : "off");
+	pr_info("VINT is %s\n", cpu_has_vint ? "on" : "off");
+
+	if (!cpu_has_veic)
+		mips_cpu_irq_init();
+
+	irqchip_init();
+}
diff --git a/arch/mips/pistachio/time.c b/arch/mips/pistachio/time.c
new file mode 100644
index 000000000000..67889fcea8aa
--- /dev/null
+++ b/arch/mips/pistachio/time.c
@@ -0,0 +1,52 @@
+/*
+ * Pistachio clocksource/timer setup
+ *
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/clocksource.h>
+#include <linux/init.h>
+#include <linux/irqchip/mips-gic.h>
+#include <linux/of.h>
+
+#include <asm/time.h>
+
+unsigned int get_c0_compare_int(void)
+{
+	return gic_get_c0_compare_int();
+}
+
+int get_c0_perfcount_int(void)
+{
+	return gic_get_c0_perfcount_int();
+}
+
+void __init plat_time_init(void)
+{
+	struct device_node *np;
+	struct clk *clk;
+
+	of_clk_init(NULL);
+	clocksource_of_init();
+
+	np = of_get_cpu_node(0, NULL);
+	if (!np) {
+		pr_err("Failed to get CPU node\n");
+		return;
+	}
+
+	clk = of_clk_get(np, 0);
+	if (IS_ERR(clk)) {
+		pr_err("Failed to get CPU clock: %ld\n", PTR_ERR(clk));
+		return;
+	}
+
+	mips_hpt_frequency = clk_get_rate(clk) / 2;
+	clk_put(clk);
+}
diff --git a/arch/mips/power/Makefile b/arch/mips/power/Makefile
index 73d56b87cb9b..70bd7883bc1b 100644
--- a/arch/mips/power/Makefile
+++ b/arch/mips/power/Makefile
@@ -1 +1 @@
-obj-$(CONFIG_HIBERNATION) += cpu.o hibernate.o
+obj-$(CONFIG_HIBERNATION) += cpu.o hibernate.o hibernate_asm.o
diff --git a/arch/mips/power/hibernate.c b/arch/mips/power/hibernate.c
new file mode 100644
index 000000000000..19a9af68bcdb
--- /dev/null
+++ b/arch/mips/power/hibernate.c
@@ -0,0 +1,10 @@
+#include <asm/tlbflush.h>
+
+extern int restore_image(void);
+
+int swsusp_arch_resume(void)
+{
+	/* Avoid TLB mismatch during and after kernel resume */
+	local_flush_tlb_all();
+	return restore_image();
+}
diff --git a/arch/mips/power/hibernate.S b/arch/mips/power/hibernate_asm.S
index 32a7c828f073..b1fab951100f 100644
--- a/arch/mips/power/hibernate.S
+++ b/arch/mips/power/hibernate_asm.S
@@ -29,7 +29,7 @@ LEAF(swsusp_arch_suspend)
 	j swsusp_save
 END(swsusp_arch_suspend)
 
-LEAF(swsusp_arch_resume)
+LEAF(restore_image)
 	PTR_L t0, restore_pblist
 0:
 	PTR_L t1, PBE_ADDRESS(t0)   /* source */
@@ -43,7 +43,6 @@ LEAF(swsusp_arch_resume)
 	bne t1, t3, 1b
 	PTR_L t0, PBE_NEXT(t0)
 	bnez t0, 0b
-	jal local_flush_tlb_all /* Avoid TLB mismatch after kernel resume */
 	PTR_LA t0, saved_regs
 	PTR_L ra, PT_R31(t0)
 	PTR_L sp, PT_R29(t0)
@@ -59,4 +58,4 @@ LEAF(swsusp_arch_resume)
 	PTR_L s7, PT_R23(t0)
 	PTR_LI v0, 0x0
 	jr ra
-END(swsusp_arch_resume)
+END(restore_image)
diff --git a/arch/mips/ralink/Kconfig b/arch/mips/ralink/Kconfig
index b1c52ca580f9..e9bc8c96174e 100644
--- a/arch/mips/ralink/Kconfig
+++ b/arch/mips/ralink/Kconfig
@@ -7,6 +7,11 @@ config CLKEVT_RT3352
 	select CLKSRC_OF
 	select CLKSRC_MMIO
 
+config RALINK_ILL_ACC
+	bool
+	depends on SOC_RT305X
+	default y
+
 choice
 	prompt "Ralink SoC selection"
 	default SOC_RT305X
diff --git a/arch/mips/sgi-ip27/ip27-init.c b/arch/mips/sgi-ip27/ip27-init.c
index ee736bd103f8..570098bfdf87 100644
--- a/arch/mips/sgi-ip27/ip27-init.c
+++ b/arch/mips/sgi-ip27/ip27-init.c
@@ -60,7 +60,7 @@ static void per_hub_init(cnodeid_t cnode)
 	nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode);
 	int i;
 
-	cpu_set(smp_processor_id(), hub->h_cpus);
+	cpumask_set_cpu(smp_processor_id(), &hub->h_cpus);
 
 	if (test_and_set_bit(cnode, hub_init_mask))
 		return;
diff --git a/arch/mips/sgi-ip27/ip27-klnuma.c b/arch/mips/sgi-ip27/ip27-klnuma.c
index ecbb62f339c5..bda90cf87e8c 100644
--- a/arch/mips/sgi-ip27/ip27-klnuma.c
+++ b/arch/mips/sgi-ip27/ip27-klnuma.c
@@ -29,8 +29,8 @@ static cpumask_t ktext_repmask;
 void __init setup_replication_mask(void)
 {
 	/* Set only the master cnode's bit.  The master cnode is always 0. */
-	cpus_clear(ktext_repmask);
-	cpu_set(0, ktext_repmask);
+	cpumask_clear(&ktext_repmask);
+	cpumask_set_cpu(0, &ktext_repmask);
 
 #ifdef CONFIG_REPLICATE_KTEXT
 #ifndef CONFIG_MAPPED_KERNEL
@@ -43,7 +43,7 @@ void __init setup_replication_mask(void)
 			if (cnode == 0)
 				continue;
 			/* Advertise that we have a copy of the kernel */
-			cpu_set(cnode, ktext_repmask);
+			cpumask_set_cpu(cnode, &ktext_repmask);
 		}
 	}
 #endif
@@ -99,7 +99,7 @@ void __init replicate_kernel_text()
 		client_nasid = COMPACT_TO_NASID_NODEID(cnode);
 
 		/* Check if this node should get a copy of the kernel */
-		if (cpu_isset(cnode, ktext_repmask)) {
+		if (cpumask_test_cpu(cnode, &ktext_repmask)) {
 			server_nasid = client_nasid;
 			copy_kernel(server_nasid);
 		}
@@ -124,7 +124,7 @@ unsigned long node_getfirstfree(cnodeid_t cnode)
 	loadbase += 16777216;
 #endif
 	offset = PAGE_ALIGN((unsigned long)(&_end)) - loadbase;
-	if ((cnode == 0) || (cpu_isset(cnode, ktext_repmask)))
+	if ((cnode == 0) || (cpumask_test_cpu(cnode, &ktext_repmask)))
 		return TO_NODE(nasid, offset) >> PAGE_SHIFT;
 	else
 		return KDM_TO_PHYS(PAGE_ALIGN(SYMMON_STK_ADDR(nasid, 0))) >> PAGE_SHIFT;
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index 0b68469e063f..8d0eb2643248 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -404,7 +404,7 @@ static void __init node_mem_init(cnodeid_t node)
 	NODE_DATA(node)->node_start_pfn = start_pfn;
 	NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn;
 
-	cpus_clear(hub_data(node)->h_cpus);
+	cpumask_clear(&hub_data(node)->h_cpus);
 
 	slot_freepfn += PFN_UP(sizeof(struct pglist_data) +
 			       sizeof(struct hub_data));
diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c
index 1d97eaba0c5f..a6d10f607f34 100644
--- a/arch/mips/sgi-ip27/ip27-timer.c
+++ b/arch/mips/sgi-ip27/ip27-timer.c
@@ -7,6 +7,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched_clock.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
 #include <linux/param.h>
@@ -159,11 +160,18 @@ struct clocksource hub_rt_clocksource = {
 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
+static u64 notrace hub_rt_read_sched_clock(void)
+{
+	return REMOTE_HUB_L(cputonasid(0), PI_RT_COUNT);
+}
+
 static void __init hub_rt_clocksource_init(void)
 {
 	struct clocksource *cs = &hub_rt_clocksource;
 
 	clocksource_register_hz(cs, CYCLES_PER_SEC);
+
+	sched_clock_register(hub_rt_read_sched_clock, 52, CYCLES_PER_SEC);
 }
 
 void __init plat_time_init(void)
diff --git a/arch/mips/sgi-ip32/ip32-platform.c b/arch/mips/sgi-ip32/ip32-platform.c
index 511e9ff2acfd..0134db2ad0a8 100644
--- a/arch/mips/sgi-ip32/ip32-platform.c
+++ b/arch/mips/sgi-ip32/ip32-platform.c
@@ -5,14 +5,16 @@
  *
  * Copyright (C) 2007 Ralf Baechle (ralf@linux-mips.org)
  */
-#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/serial_8250.h>
+#include <linux/rtc/ds1685.h>
 
 #include <asm/ip32/mace.h>
 #include <asm/ip32/ip32_ints.h>
 
+extern void ip32_prepare_poweroff(void);
+
 #define MACEISA_SERIAL1_OFFS   offsetof(struct sgi_mace, isa.serial1)
 #define MACEISA_SERIAL2_OFFS   offsetof(struct sgi_mace, isa.serial2)
 
@@ -90,22 +92,47 @@ static __init int sgio2btns_devinit(void)
 
 device_initcall(sgio2btns_devinit);
 
-static struct resource sgio2_cmos_rsrc[] = {
+#define MACE_RTC_RES_START (MACE_BASE + offsetof(struct sgi_mace, isa.rtc))
+#define MACE_RTC_RES_END (MACE_RTC_RES_START + 32767)
+
+static struct resource ip32_rtc_resources[] = {
 	{
-		.start = 0x70,
-		.end   = 0x71,
-		.flags = IORESOURCE_IO
+		.start	= MACEISA_RTC_IRQ,
+		.end	= MACEISA_RTC_IRQ,
+		.flags	= IORESOURCE_IRQ
+	}, {
+		.start	= MACE_RTC_RES_START,
+		.end	= MACE_RTC_RES_END,
+		.flags	= IORESOURCE_MEM,
 	}
 };
 
-static __init int sgio2_cmos_devinit(void)
+/* RTC registers on IP32 are each padded by 256 bytes (0x100). */
+static struct ds1685_rtc_platform_data
+ip32_rtc_platform_data[] = {
+	{
+		.regstep = 0x100,
+		.bcd_mode = true,
+		.no_irq = false,
+		.uie_unsupported = false,
+		.alloc_io_resources = true,
+		.plat_prepare_poweroff = ip32_prepare_poweroff,
+	},
+};
+
+struct platform_device ip32_rtc_device = {
+	.name			= "rtc-ds1685",
+	.id			= -1,
+	.dev			= {
+		.platform_data	= ip32_rtc_platform_data,
+	},
+	.num_resources		= ARRAY_SIZE(ip32_rtc_resources),
+	.resource		= ip32_rtc_resources,
+};
+
++static int __init sgio2_rtc_devinit(void)
 {
-	return IS_ERR(platform_device_register_simple("rtc_cmos", -1,
-						      sgio2_cmos_rsrc, 1));
+	return platform_device_register(&ip32_rtc_device);
 }
 
 device_initcall(sgio2_cmos_devinit);
-
-MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("8250 UART probe driver for SGI IP32 aka O2");
diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c
index 44b3470a0bbb..8bd415c8729f 100644
--- a/arch/mips/sgi-ip32/ip32-reset.c
+++ b/arch/mips/sgi-ip32/ip32-reset.c
@@ -11,10 +11,11 @@
 #include <linux/compiler.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/notifier.h>
 #include <linux/delay.h>
-#include <linux/ds17287rtc.h>
+#include <linux/rtc/ds1685.h>
 #include <linux/interrupt.h>
 #include <linux/pm.h>
 
@@ -33,53 +34,40 @@
 #define POWERDOWN_FREQ		(HZ / 4)
 #define PANIC_FREQ		(HZ / 8)
 
-static struct timer_list power_timer, blink_timer, debounce_timer;
-static int has_panicked, shuting_down;
+extern struct platform_device ip32_rtc_device;
 
-static void ip32_machine_restart(char *command) __noreturn;
-static void ip32_machine_halt(void) __noreturn;
-static void ip32_machine_power_off(void) __noreturn;
+static struct timer_list power_timer, blink_timer;
+static int has_panicked, shutting_down;
 
-static void ip32_machine_restart(char *cmd)
+static __noreturn void ip32_poweroff(void *data)
 {
-	crime->control = CRIME_CONTROL_HARD_RESET;
-	while (1);
-}
+	void (*poweroff_func)(struct platform_device *) =
+		symbol_get(ds1685_rtc_poweroff);
+
+#ifdef CONFIG_MODULES
+	/* If the first __symbol_get failed, our module wasn't loaded. */
+	if (!poweroff_func) {
+		request_module("rtc-ds1685");
+		poweroff_func = symbol_get(ds1685_rtc_poweroff);
+	}
+#endif
 
-static inline void ip32_machine_halt(void)
-{
-	ip32_machine_power_off();
-}
+	if (!poweroff_func)
+		pr_emerg("RTC not available for power-off.  Spinning forever ...\n");
+	else {
+		(*poweroff_func)((struct platform_device *)data);
+		symbol_put(ds1685_rtc_poweroff);
+	}
 
-static void ip32_machine_power_off(void)
-{
-	unsigned char reg_a, xctrl_a, xctrl_b;
-
-	disable_irq(MACEISA_RTC_IRQ);
-	reg_a = CMOS_READ(RTC_REG_A);
-
-	/* setup for kickstart & wake-up (DS12287 Ref. Man. p. 19) */
-	reg_a &= ~DS_REGA_DV2;
-	reg_a |= DS_REGA_DV1;
-
-	CMOS_WRITE(reg_a | DS_REGA_DV0, RTC_REG_A);
-	wbflush();
-	xctrl_b = CMOS_READ(DS_B1_XCTRL4B)
-		   | DS_XCTRL4B_ABE | DS_XCTRL4B_KFE;
-	CMOS_WRITE(xctrl_b, DS_B1_XCTRL4B);
-	xctrl_a = CMOS_READ(DS_B1_XCTRL4A) & ~DS_XCTRL4A_IFS;
-	CMOS_WRITE(xctrl_a, DS_B1_XCTRL4A);
-	wbflush();
-	/* adios amigos... */
-	CMOS_WRITE(xctrl_a | DS_XCTRL4A_PAB, DS_B1_XCTRL4A);
-	CMOS_WRITE(reg_a, RTC_REG_A);
-	wbflush();
-	while (1);
+	unreachable();
 }
 
-static void power_timeout(unsigned long data)
+static void ip32_machine_restart(char *cmd) __noreturn;
+static void ip32_machine_restart(char *cmd)
 {
-	ip32_machine_power_off();
+	msleep(20);
+	crime->control = CRIME_CONTROL_HARD_RESET;
+	unreachable();
 }
 
 static void blink_timeout(unsigned long data)
@@ -89,44 +77,27 @@ static void blink_timeout(unsigned long data)
 	mod_timer(&blink_timer, jiffies + data);
 }
 
-static void debounce(unsigned long data)
+static void ip32_machine_halt(void)
 {
-	unsigned char reg_a, reg_c, xctrl_a;
-
-	reg_c = CMOS_READ(RTC_INTR_FLAGS);
-	reg_a = CMOS_READ(RTC_REG_A);
-	CMOS_WRITE(reg_a | DS_REGA_DV0, RTC_REG_A);
-	wbflush();
-	xctrl_a = CMOS_READ(DS_B1_XCTRL4A);
-	if ((xctrl_a & DS_XCTRL4A_IFS) || (reg_c & RTC_IRQF )) {
-		/* Interrupt still being sent. */
-		debounce_timer.expires = jiffies + 50;
-		add_timer(&debounce_timer);
-
-		/* clear interrupt source */
-		CMOS_WRITE(xctrl_a & ~DS_XCTRL4A_IFS, DS_B1_XCTRL4A);
-		CMOS_WRITE(reg_a & ~DS_REGA_DV0, RTC_REG_A);
-		return;
-	}
-	CMOS_WRITE(reg_a & ~DS_REGA_DV0, RTC_REG_A);
-
-	if (has_panicked)
-		ip32_machine_restart(NULL);
+	ip32_poweroff(&ip32_rtc_device);
+}
 
-	enable_irq(MACEISA_RTC_IRQ);
+static void power_timeout(unsigned long data)
+{
+	ip32_poweroff(&ip32_rtc_device);
 }
 
-static inline void ip32_power_button(void)
+void ip32_prepare_poweroff(void)
 {
 	if (has_panicked)
 		return;
 
-	if (shuting_down || kill_cad_pid(SIGINT, 1)) {
+	if (shutting_down || kill_cad_pid(SIGINT, 1)) {
 		/* No init process or button pressed twice.  */
-		ip32_machine_power_off();
+		ip32_poweroff(&ip32_rtc_device);
 	}
 
-	shuting_down = 1;
+	shutting_down = 1;
 	blink_timer.data = POWERDOWN_FREQ;
 	blink_timeout(POWERDOWN_FREQ);
 
@@ -136,27 +107,6 @@ static inline void ip32_power_button(void)
 	add_timer(&power_timer);
 }
 
-static irqreturn_t ip32_rtc_int(int irq, void *dev_id)
-{
-	unsigned char reg_c;
-
-	reg_c = CMOS_READ(RTC_INTR_FLAGS);
-	if (!(reg_c & RTC_IRQF)) {
-		printk(KERN_WARNING
-			"%s: RTC IRQ without RTC_IRQF\n", __func__);
-	}
-	/* Wait until interrupt goes away */
-	disable_irq_nosync(MACEISA_RTC_IRQ);
-	init_timer(&debounce_timer);
-	debounce_timer.function = debounce;
-	debounce_timer.expires = jiffies + 50;
-	add_timer(&debounce_timer);
-
-	printk(KERN_DEBUG "Power button pressed\n");
-	ip32_power_button();
-	return IRQ_HANDLED;
-}
-
 static int panic_event(struct notifier_block *this, unsigned long event,
 		       void *ptr)
 {
@@ -190,15 +140,12 @@ static __init int ip32_reboot_setup(void)
 
 	_machine_restart = ip32_machine_restart;
 	_machine_halt = ip32_machine_halt;
-	pm_power_off = ip32_machine_power_off;
+	pm_power_off = ip32_machine_halt;
 
 	init_timer(&blink_timer);
 	blink_timer.function = blink_timeout;
 	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
 
-	if (request_irq(MACEISA_RTC_IRQ, ip32_rtc_int, 0, "rtc", NULL))
-		panic("Can't allocate MACEISA RTC IRQ");
-
 	return 0;
 }
 
diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h
index c1c374f0ec12..4861a78c7160 100644
--- a/arch/mn10300/include/asm/thread_info.h
+++ b/arch/mn10300/include/asm/thread_info.h
@@ -40,7 +40,6 @@ typedef struct {
 
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
 	struct pt_regs		*frame;		/* current exception frame */
 	unsigned long		flags;		/* low level flags */
 	__u32			cpu;		/* current CPU */
@@ -74,7 +73,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
-	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
 	.preempt_count	= INIT_PREEMPT_COUNT,	\
diff --git a/arch/mn10300/kernel/asm-offsets.c b/arch/mn10300/kernel/asm-offsets.c
index d780670cbaf3..e5a61c659b5a 100644
--- a/arch/mn10300/kernel/asm-offsets.c
+++ b/arch/mn10300/kernel/asm-offsets.c
@@ -22,7 +22,6 @@ void foo(void)
 	BLANK();
 
 	OFFSET(TI_task,			thread_info, task);
-	OFFSET(TI_exec_domain,		thread_info, exec_domain);
 	OFFSET(TI_frame,		thread_info, frame);
 	OFFSET(TI_flags,		thread_info, flags);
 	OFFSET(TI_cpu,			thread_info, cpu);
@@ -85,7 +84,6 @@ void foo(void)
 	DEFINE(SIGCHLD_asm,		SIGCHLD);
 	BLANK();
 
-	OFFSET(EXEC_DOMAIN_handler,	exec_domain, handler);
 	OFFSET(RT_SIGFRAME_sigcontext,	rt_sigframe, uc.uc_mcontext);
 
 	DEFINE(PAGE_SIZE_asm,		PAGE_SIZE);
diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c
index 8609845f12c5..dfd0301cf200 100644
--- a/arch/mn10300/kernel/signal.c
+++ b/arch/mn10300/kernel/signal.c
@@ -202,20 +202,14 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
 		       struct pt_regs *regs)
 {
 	struct sigframe __user *frame;
-	int rsig, sig = ksig->sig;
+	int sig = ksig->sig;
 
 	frame = get_sigframe(ksig, regs, sizeof(*frame));
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		return -EFAULT;
 
-	rsig = sig;
-	if (sig < 32 &&
-	    current_thread_info()->exec_domain &&
-	    current_thread_info()->exec_domain->signal_invmap)
-		rsig = current_thread_info()->exec_domain->signal_invmap[sig];
-
-	if (__put_user(rsig, &frame->sig) < 0 ||
+	if (__put_user(sig, &frame->sig) < 0 ||
 	    __put_user(&frame->sc, &frame->psc) < 0)
 		return -EFAULT;
 
@@ -270,20 +264,14 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 			  struct pt_regs *regs)
 {
 	struct rt_sigframe __user *frame;
-	int rsig, sig = ksig->sig;
+	int sig = ksig->sig;
 
 	frame = get_sigframe(ksig, regs, sizeof(*frame));
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		return -EFAULT;
 
-	rsig = sig;
-	if (sig < 32 &&
-	    current_thread_info()->exec_domain &&
-	    current_thread_info()->exec_domain->signal_invmap)
-		rsig = current_thread_info()->exec_domain->signal_invmap[sig];
-
-	if (__put_user(rsig, &frame->sig) ||
+	if (__put_user(sig, &frame->sig) ||
 	    __put_user(&frame->info, &frame->pinfo) ||
 	    __put_user(&frame->uc, &frame->puc) ||
 	    copy_siginfo_to_user(&frame->info, &ksig->info))
diff --git a/arch/mn10300/unit-asb2305/pci.c b/arch/mn10300/unit-asb2305/pci.c
index 613ca1e55b4b..3dfe2d31c67b 100644
--- a/arch/mn10300/unit-asb2305/pci.c
+++ b/arch/mn10300/unit-asb2305/pci.c
@@ -342,6 +342,7 @@ static int __init pcibios_init(void)
 {
 	resource_size_t io_offset, mem_offset;
 	LIST_HEAD(resources);
+	struct pci_bus *bus;
 
 	ioport_resource.start	= 0xA0000000;
 	ioport_resource.end	= 0xDFFFFFFF;
@@ -371,11 +372,14 @@ static int __init pcibios_init(void)
 
 	pci_add_resource_offset(&resources, &pci_ioport_resource, io_offset);
 	pci_add_resource_offset(&resources, &pci_iomem_resource, mem_offset);
-	pci_scan_root_bus(NULL, 0, &pci_direct_ampci, NULL, &resources);
+	bus = pci_scan_root_bus(NULL, 0, &pci_direct_ampci, NULL, &resources);
+	if (!bus)
+		return 0;
 
 	pcibios_irq_init();
 	pcibios_fixup_irqs();
 	pcibios_resource_survey();
+	pci_bus_add_devices(bus);
 	return 0;
 }
 
diff --git a/arch/nios2/include/asm/ptrace.h b/arch/nios2/include/asm/ptrace.h
index 20fb1cf2dab6..642462144872 100644
--- a/arch/nios2/include/asm/ptrace.h
+++ b/arch/nios2/include/asm/ptrace.h
@@ -15,7 +15,54 @@
 
 #include <uapi/asm/ptrace.h>
 
+/* This struct defines the way the registers are stored on the
+   stack during a system call.  */
+
 #ifndef __ASSEMBLY__
+struct pt_regs {
+	unsigned long  r8;	/* r8-r15 Caller-saved GP registers */
+	unsigned long  r9;
+	unsigned long  r10;
+	unsigned long  r11;
+	unsigned long  r12;
+	unsigned long  r13;
+	unsigned long  r14;
+	unsigned long  r15;
+	unsigned long  r1;	/* Assembler temporary */
+	unsigned long  r2;	/* Retval LS 32bits */
+	unsigned long  r3;	/* Retval MS 32bits */
+	unsigned long  r4;	/* r4-r7 Register arguments */
+	unsigned long  r5;
+	unsigned long  r6;
+	unsigned long  r7;
+	unsigned long  orig_r2;	/* Copy of r2 ?? */
+	unsigned long  ra;	/* Return address */
+	unsigned long  fp;	/* Frame pointer */
+	unsigned long  sp;	/* Stack pointer */
+	unsigned long  gp;	/* Global pointer */
+	unsigned long  estatus;
+	unsigned long  ea;	/* Exception return address (pc) */
+	unsigned long  orig_r7;
+};
+
+/*
+ * This is the extended stack used by signal handlers and the context
+ * switcher: it's pushed after the normal "struct pt_regs".
+ */
+struct switch_stack {
+	unsigned long  r16;	/* r16-r23 Callee-saved GP registers */
+	unsigned long  r17;
+	unsigned long  r18;
+	unsigned long  r19;
+	unsigned long  r20;
+	unsigned long  r21;
+	unsigned long  r22;
+	unsigned long  r23;
+	unsigned long  fp;
+	unsigned long  gp;
+	unsigned long  ra;
+};
+
 #define user_mode(regs)	(((regs)->estatus & ESTATUS_EU))
 
 #define instruction_pointer(regs)	((regs)->ra)
diff --git a/arch/nios2/include/asm/thread_info.h b/arch/nios2/include/asm/thread_info.h
index 1f266575beb5..d69c338bd19c 100644
--- a/arch/nios2/include/asm/thread_info.h
+++ b/arch/nios2/include/asm/thread_info.h
@@ -39,7 +39,6 @@ typedef struct {
  */
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
 	unsigned long		flags;		/* low level flags */
 	__u32			cpu;		/* current CPU */
 	int			preempt_count;	/* 0 => preemptable,<0 => BUG */
@@ -47,7 +46,6 @@ struct thread_info {
 						  0-0x7FFFFFFF for user-thead
 						  0-0xFFFFFFFF for kernel-thread
 						*/
-	struct restart_block	restart_block;
 	struct pt_regs		*regs;
 };
 
@@ -59,14 +57,10 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
-	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
 	.preempt_count	= INIT_PREEMPT_COUNT,	\
 	.addr_limit	= KERNEL_DS,		\
-	.restart_block	= {			\
-		.fn = do_no_restart_syscall,	\
-	},					\
 }
 
 #define init_thread_info	(init_thread_union.thread_info)
diff --git a/arch/nios2/include/asm/ucontext.h b/arch/nios2/include/asm/ucontext.h
deleted file mode 100644
index 2c87614b0f6e..000000000000
--- a/arch/nios2/include/asm/ucontext.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2010 Tobias Klauser <tklauser@distanz.ch>
- * Copyright (C) 2004 Microtronix Datacom Ltd
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#ifndef _ASM_NIOS2_UCONTEXT_H
-#define _ASM_NIOS2_UCONTEXT_H
-
-typedef int greg_t;
-#define NGREG 32
-typedef greg_t gregset_t[NGREG];
-
-struct mcontext {
-	int version;
-	gregset_t gregs;
-};
-
-#define MCONTEXT_VERSION 2
-
-struct ucontext {
-	unsigned long	  uc_flags;
-	struct ucontext  *uc_link;
-	stack_t		  uc_stack;
-	struct mcontext	  uc_mcontext;
-	sigset_t	  uc_sigmask;	/* mask last for extensibility */
-};
-
-#endif
diff --git a/arch/nios2/include/uapi/asm/Kbuild b/arch/nios2/include/uapi/asm/Kbuild
index 4f07ca3f8d10..e0bb972a50d7 100644
--- a/arch/nios2/include/uapi/asm/Kbuild
+++ b/arch/nios2/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 header-y += elf.h
-header-y += ucontext.h
+
+generic-y += ucontext.h
diff --git a/arch/nios2/include/uapi/asm/elf.h b/arch/nios2/include/uapi/asm/elf.h
index a5b91ae5cf56..6f06d3b2949e 100644
--- a/arch/nios2/include/uapi/asm/elf.h
+++ b/arch/nios2/include/uapi/asm/elf.h
@@ -50,9 +50,7 @@
 
 typedef unsigned long elf_greg_t;
 
-#define ELF_NGREG	\
-	((sizeof(struct pt_regs) + sizeof(struct switch_stack)) /	\
-		sizeof(elf_greg_t))
+#define ELF_NGREG		49
 typedef elf_greg_t elf_gregset_t[ELF_NGREG];
 
 typedef unsigned long elf_fpregset_t;
diff --git a/arch/nios2/include/uapi/asm/ptrace.h b/arch/nios2/include/uapi/asm/ptrace.h
index e83a7c9d1c36..eff00e67c0a2 100644
--- a/arch/nios2/include/uapi/asm/ptrace.h
+++ b/arch/nios2/include/uapi/asm/ptrace.h
@@ -60,60 +60,21 @@
 #define PTR_IPENDING	37
 #define PTR_CPUID	38
 #define PTR_CTL6	39
-#define PTR_CTL7	40
+#define PTR_EXCEPTION	40
 #define PTR_PTEADDR	41
 #define PTR_TLBACC	42
 #define PTR_TLBMISC	43
+#define PTR_ECCINJ	44
+#define PTR_BADADDR	45
+#define PTR_CONFIG	46
+#define PTR_MPUBASE	47
+#define PTR_MPUACC	48
 
-#define NUM_PTRACE_REG (PTR_TLBMISC + 1)
+#define NUM_PTRACE_REG (PTR_MPUACC + 1)
 
-/* this struct defines the way the registers are stored on the
-   stack during a system call.
-
-   There is a fake_regs in setup.c that has to match pt_regs.*/
-
-struct pt_regs {
-	unsigned long  r8;		/* r8-r15 Caller-saved GP registers */
-	unsigned long  r9;
-	unsigned long  r10;
-	unsigned long  r11;
-	unsigned long  r12;
-	unsigned long  r13;
-	unsigned long  r14;
-	unsigned long  r15;
-	unsigned long  r1;		/* Assembler temporary */
-	unsigned long  r2;		/* Retval LS 32bits */
-	unsigned long  r3;		/* Retval MS 32bits */
-	unsigned long  r4;		/* r4-r7 Register arguments */
-	unsigned long  r5;
-	unsigned long  r6;
-	unsigned long  r7;
-	unsigned long  orig_r2;		/* Copy of r2 ?? */
-	unsigned long  ra;		/* Return address */
-	unsigned long  fp;		/* Frame pointer */
-	unsigned long  sp;		/* Stack pointer */
-	unsigned long  gp;		/* Global pointer */
-	unsigned long  estatus;
-	unsigned long  ea;		/* Exception return address (pc) */
-	unsigned long  orig_r7;
-};
-
-/*
- * This is the extended stack used by signal handlers and the context
- * switcher: it's pushed after the normal "struct pt_regs".
- */
-struct switch_stack {
-	unsigned long  r16;		/* r16-r23 Callee-saved GP registers */
-	unsigned long  r17;
-	unsigned long  r18;
-	unsigned long  r19;
-	unsigned long  r20;
-	unsigned long  r21;
-	unsigned long  r22;
-	unsigned long  r23;
-	unsigned long  fp;
-	unsigned long  gp;
-	unsigned long  ra;
+/* User structures for general purpose registers.  */
+struct user_pt_regs {
+	__u32		regs[49];
 };
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/nios2/include/uapi/asm/sigcontext.h b/arch/nios2/include/uapi/asm/sigcontext.h
index 7b8bb41867d4..b67944a50927 100644
--- a/arch/nios2/include/uapi/asm/sigcontext.h
+++ b/arch/nios2/include/uapi/asm/sigcontext.h
@@ -15,14 +15,16 @@
  * details.
  */
 
-#ifndef _ASM_NIOS2_SIGCONTEXT_H
-#define _ASM_NIOS2_SIGCONTEXT_H
+#ifndef _UAPI__ASM_SIGCONTEXT_H
+#define _UAPI__ASM_SIGCONTEXT_H
 
-#include <asm/ptrace.h>
+#include <linux/types.h>
+
+#define MCONTEXT_VERSION 2
 
 struct sigcontext {
-	struct pt_regs regs;
-	unsigned long  sc_mask;	/* old sigmask */
+	int version;
+	unsigned long gregs[32];
 };
 
 #endif
diff --git a/arch/nios2/kernel/cpuinfo.c b/arch/nios2/kernel/cpuinfo.c
index a223691dff4f..1d96de0bd4aa 100644
--- a/arch/nios2/kernel/cpuinfo.c
+++ b/arch/nios2/kernel/cpuinfo.c
@@ -126,47 +126,46 @@ void __init setup_cpuinfo(void)
  */
 static int show_cpuinfo(struct seq_file *m, void *v)
 {
-	int count = 0;
 	const u32 clockfreq = cpuinfo.cpu_clock_freq;
 
-	count = seq_printf(m,
-			"CPU:\t\tNios II/%s\n"
-			"MMU:\t\t%s\n"
-			"FPU:\t\tnone\n"
-			"Clocking:\t%u.%02u MHz\n"
-			"BogoMips:\t%lu.%02lu\n"
-			"Calibration:\t%lu loops\n",
-			cpuinfo.cpu_impl,
-			cpuinfo.mmu ? "present" : "none",
-			clockfreq / 1000000, (clockfreq / 100000) % 10,
-			(loops_per_jiffy * HZ) / 500000,
-			((loops_per_jiffy * HZ) / 5000) % 100,
-			(loops_per_jiffy * HZ));
-
-	count += seq_printf(m,
-			"HW:\n"
-			" MUL:\t\t%s\n"
-			" MULX:\t\t%s\n"
-			" DIV:\t\t%s\n",
-			cpuinfo.has_mul ? "yes" : "no",
-			cpuinfo.has_mulx ? "yes" : "no",
-			cpuinfo.has_div ? "yes" : "no");
-
-	count += seq_printf(m,
-			"Icache:\t\t%ukB, line length: %u\n",
-			cpuinfo.icache_size >> 10,
-			cpuinfo.icache_line_size);
-
-	count += seq_printf(m,
-			"Dcache:\t\t%ukB, line length: %u\n",
-			cpuinfo.dcache_size >> 10,
-			cpuinfo.dcache_line_size);
-
-	count += seq_printf(m,
-			"TLB:\t\t%u ways, %u entries, %u PID bits\n",
-			cpuinfo.tlb_num_ways,
-			cpuinfo.tlb_num_entries,
-			cpuinfo.tlb_pid_num_bits);
+	seq_printf(m,
+		   "CPU:\t\tNios II/%s\n"
+		   "MMU:\t\t%s\n"
+		   "FPU:\t\tnone\n"
+		   "Clocking:\t%u.%02u MHz\n"
+		   "BogoMips:\t%lu.%02lu\n"
+		   "Calibration:\t%lu loops\n",
+		   cpuinfo.cpu_impl,
+		   cpuinfo.mmu ? "present" : "none",
+		   clockfreq / 1000000, (clockfreq / 100000) % 10,
+		   (loops_per_jiffy * HZ) / 500000,
+		   ((loops_per_jiffy * HZ) / 5000) % 100,
+		   (loops_per_jiffy * HZ));
+
+	seq_printf(m,
+		   "HW:\n"
+		   " MUL:\t\t%s\n"
+		   " MULX:\t\t%s\n"
+		   " DIV:\t\t%s\n",
+		   cpuinfo.has_mul ? "yes" : "no",
+		   cpuinfo.has_mulx ? "yes" : "no",
+		   cpuinfo.has_div ? "yes" : "no");
+
+	seq_printf(m,
+		   "Icache:\t\t%ukB, line length: %u\n",
+		   cpuinfo.icache_size >> 10,
+		   cpuinfo.icache_line_size);
+
+	seq_printf(m,
+		   "Dcache:\t\t%ukB, line length: %u\n",
+		   cpuinfo.dcache_size >> 10,
+		   cpuinfo.dcache_line_size);
+
+	seq_printf(m,
+		   "TLB:\t\t%u ways, %u entries, %u PID bits\n",
+		   cpuinfo.tlb_num_ways,
+		   cpuinfo.tlb_num_entries,
+		   cpuinfo.tlb_pid_num_bits);
 
 	return 0;
 }
diff --git a/arch/nios2/kernel/entry.S b/arch/nios2/kernel/entry.S
index 7729bd3f2e79..27b006c52e12 100644
--- a/arch/nios2/kernel/entry.S
+++ b/arch/nios2/kernel/entry.S
@@ -161,7 +161,7 @@ ENTRY(inthandler)
  ***********************************************************************
  */
 ENTRY(handle_trap)
-	ldw	r24, -4(ea)	/* instruction that caused the exception */
+	ldwio	r24, -4(ea)	/* instruction that caused the exception */
 	srli	r24, r24, 4
 	andi	r24, r24, 0x7c
 	movia	r9,trap_table
diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c
index 0e075b5ad2a5..2f8c74f93e70 100644
--- a/arch/nios2/kernel/process.c
+++ b/arch/nios2/kernel/process.c
@@ -94,7 +94,6 @@ void show_regs(struct pt_regs *regs)
 
 void flush_thread(void)
 {
-	set_fs(USER_DS);
 }
 
 int copy_thread(unsigned long clone_flags,
diff --git a/arch/nios2/kernel/signal.c b/arch/nios2/kernel/signal.c
index 2d0ea25be171..20662b0f6c9e 100644
--- a/arch/nios2/kernel/signal.c
+++ b/arch/nios2/kernel/signal.c
@@ -39,11 +39,11 @@ static inline int rt_restore_ucontext(struct pt_regs *regs,
 					struct ucontext *uc, int *pr2)
 {
 	int temp;
-	greg_t *gregs = uc->uc_mcontext.gregs;
+	unsigned long *gregs = uc->uc_mcontext.gregs;
 	int err;
 
 	/* Always make any pending restarted system calls return -EINTR */
-	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+	current->restart_block.fn = do_no_restart_syscall;
 
 	err = __get_user(temp, &uc->uc_mcontext.version);
 	if (temp != MCONTEXT_VERSION)
@@ -127,7 +127,7 @@ badframe:
 static inline int rt_setup_ucontext(struct ucontext *uc, struct pt_regs *regs)
 {
 	struct switch_stack *sw = (struct switch_stack *)regs - 1;
-	greg_t *gregs = uc->uc_mcontext.gregs;
+	unsigned long *gregs = uc->uc_mcontext.gregs;
 	int err = 0;
 
 	err |= __put_user(MCONTEXT_VERSION, &uc->uc_mcontext.version);
diff --git a/arch/nios2/mm/cacheflush.c b/arch/nios2/mm/cacheflush.c
index 2ae482b42669..796642932e2e 100644
--- a/arch/nios2/mm/cacheflush.c
+++ b/arch/nios2/mm/cacheflush.c
@@ -23,9 +23,6 @@ static void __flush_dcache(unsigned long start, unsigned long end)
 	end += (cpuinfo.dcache_line_size - 1);
 	end &= ~(cpuinfo.dcache_line_size - 1);
 
-	if (end > start + cpuinfo.dcache_size)
-		end = start + cpuinfo.dcache_size;
-
 	for (addr = start; addr < end; addr += cpuinfo.dcache_line_size) {
 		__asm__ __volatile__ ("   flushda 0(%0)\n"
 					: /* Outputs */
diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c
index 0d231adfe576..0c9b6afe69e9 100644
--- a/arch/nios2/mm/fault.c
+++ b/arch/nios2/mm/fault.c
@@ -126,7 +126,6 @@ good_area:
 		break;
 	}
 
-survive:
 	/*
 	 * If for any reason at all we couldn't handle the fault,
 	 * make sure we exit gracefully rather than endlessly redo
@@ -220,11 +219,6 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_global_init(tsk)) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
 	if (!user_mode(regs))
 		goto no_context;
 	pagefault_out_of_memory();
diff --git a/arch/openrisc/include/asm/thread_info.h b/arch/openrisc/include/asm/thread_info.h
index 875f0845a707..6e619a79a401 100644
--- a/arch/openrisc/include/asm/thread_info.h
+++ b/arch/openrisc/include/asm/thread_info.h
@@ -48,7 +48,6 @@ typedef unsigned long mm_segment_t;
 
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
 	unsigned long		flags;		/* low level flags */
 	__u32			cpu;		/* current CPU */
 	__s32			preempt_count; /* 0 => preemptable, <0 => BUG */
@@ -73,7 +72,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)				\
 {							\
 	.task		= &tsk,				\
-	.exec_domain	= &default_exec_domain,		\
 	.flags		= 0,				\
 	.cpu		= 0,				\
 	.preempt_count	= 1,				\
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 386af258591d..7095dfe7666b 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -197,7 +197,6 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
 {
 	unsigned long sr = mfspr(SPR_SR) & ~SPR_SR_SM;
 
-	set_fs(USER_DS);
 	memset(regs, 0, sizeof(struct pt_regs));
 
 	regs->pc = pc;
diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
index 4fc7ccc0a2cf..b4ed8b36e078 100644
--- a/arch/openrisc/kernel/setup.c
+++ b/arch/openrisc/kernel/setup.c
@@ -329,30 +329,32 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	version = (vr & SPR_VR_VER) >> 24;
 	revision = vr & SPR_VR_REV;
 
-	return seq_printf(m,
-			  "cpu\t\t: OpenRISC-%x\n"
-			  "revision\t: %d\n"
-			  "frequency\t: %ld\n"
-			  "dcache size\t: %d bytes\n"
-			  "dcache block size\t: %d bytes\n"
-			  "icache size\t: %d bytes\n"
-			  "icache block size\t: %d bytes\n"
-			  "immu\t\t: %d entries, %lu ways\n"
-			  "dmmu\t\t: %d entries, %lu ways\n"
-			  "bogomips\t: %lu.%02lu\n",
-			  version,
-			  revision,
-			  loops_per_jiffy * HZ,
-			  cpuinfo.dcache_size,
-			  cpuinfo.dcache_block_size,
-			  cpuinfo.icache_size,
-			  cpuinfo.icache_block_size,
-			  1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2),
-			  1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW),
-			  1 << ((mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTS) >> 2),
-			  1 + (mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTW),
-			  (loops_per_jiffy * HZ) / 500000,
-			  ((loops_per_jiffy * HZ) / 5000) % 100);
+	seq_printf(m,
+		   "cpu\t\t: OpenRISC-%x\n"
+		   "revision\t: %d\n"
+		   "frequency\t: %ld\n"
+		   "dcache size\t: %d bytes\n"
+		   "dcache block size\t: %d bytes\n"
+		   "icache size\t: %d bytes\n"
+		   "icache block size\t: %d bytes\n"
+		   "immu\t\t: %d entries, %lu ways\n"
+		   "dmmu\t\t: %d entries, %lu ways\n"
+		   "bogomips\t: %lu.%02lu\n",
+		   version,
+		   revision,
+		   loops_per_jiffy * HZ,
+		   cpuinfo.dcache_size,
+		   cpuinfo.dcache_block_size,
+		   cpuinfo.icache_size,
+		   cpuinfo.icache_block_size,
+		   1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2),
+		   1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW),
+		   1 << ((mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTS) >> 2),
+		   1 + (mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTW),
+		   (loops_per_jiffy * HZ) / 500000,
+		   ((loops_per_jiffy * HZ) / 5000) % 100);
+
+	return 0;
 }
 
 static void *c_start(struct seq_file *m, loff_t * pos)
diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c
index 4112175bf803..c82be69b43c6 100644
--- a/arch/openrisc/kernel/signal.c
+++ b/arch/openrisc/kernel/signal.c
@@ -193,8 +193,6 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	if (err)
 		return -EFAULT;
 
-	/* TODO what is the current->exec_domain stuff and invmap ? */
-
 	/* Set up registers for signal handler */
 	regs->pc = (unsigned long)ksig->ka.sa.sa_handler; /* what we enter NOW */
 	regs->gpr[9] = (unsigned long)return_ip;     /* what we enter LATER */
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 8014727a2743..c36546959e86 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -103,6 +103,11 @@ config ARCH_MAY_HAVE_PC_FDC
 	depends on BROKEN
 	default y
 
+config PGTABLE_LEVELS
+	int
+	default 3 if 64BIT && PARISC_PAGE_SIZE_4KB
+	default 2
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index 8686237a3c3c..7a4bcc36303d 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -20,6 +20,8 @@ generic-y += param.h
 generic-y += percpu.h
 generic-y += poll.h
 generic-y += preempt.h
+generic-y += scatterlist.h
+generic-y += seccomp.h
 generic-y += segment.h
 generic-y += topology.h
 generic-y += trace_clock.h
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index f213f5b4c423..3a08eae3318f 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -26,7 +26,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 
 	if (likely(pgd != NULL)) {
 		memset(pgd, 0, PAGE_SIZE<<PGD_ALLOC_ORDER);
-#ifdef CONFIG_64BIT
+#if CONFIG_PGTABLE_LEVELS == 3
 		actual_pgd += PTRS_PER_PGD;
 		/* Populate first pmd with allocated memory.  We mark it
 		 * with PxD_FLAG_ATTACHED as a signal to the system that this
@@ -45,13 +45,13 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
-#ifdef CONFIG_64BIT
+#if CONFIG_PGTABLE_LEVELS == 3
 	pgd -= PTRS_PER_PGD;
 #endif
 	free_pages((unsigned long)pgd, PGD_ALLOC_ORDER);
 }
 
-#if PT_NLEVELS == 3
+#if CONFIG_PGTABLE_LEVELS == 3
 
 /* Three Level Page Table Support for pmd's */
 
@@ -72,12 +72,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
-#ifdef CONFIG_64BIT
 	if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
-		/* This is the permanent pmd attached to the pgd;
-		 * cannot free it */
+		/*
+		 * This is the permanent pmd attached to the pgd;
+		 * cannot free it.
+		 * Increment the counter to compensate for the decrement
+		 * done by generic mm code.
+		 */
+		mm_inc_nr_pmds(mm);
 		return;
-#endif
 	free_pages((unsigned long)pmd, PMD_ORDER);
 }
 
@@ -99,7 +102,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 static inline void
 pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 {
-#ifdef CONFIG_64BIT
+#if CONFIG_PGTABLE_LEVELS == 3
 	/* preserve the gateway marker if this is the beginning of
 	 * the permanent pmd */
 	if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 15207b9362bf..0a183756d6ec 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -68,13 +68,11 @@ extern void purge_tlb_entries(struct mm_struct *, unsigned long);
 #define KERNEL_INITIAL_ORDER	24	/* 0 to 1<<24 = 16MB */
 #define KERNEL_INITIAL_SIZE	(1 << KERNEL_INITIAL_ORDER)
 
-#if defined(CONFIG_64BIT) && defined(CONFIG_PARISC_PAGE_SIZE_4KB)
-#define PT_NLEVELS	3
+#if CONFIG_PGTABLE_LEVELS == 3
 #define PGD_ORDER	1 /* Number of pages per pgd */
 #define PMD_ORDER	1 /* Number of pages per pmd */
 #define PGD_ALLOC_ORDER	2 /* first pgd contains pmd */
 #else
-#define PT_NLEVELS	2
 #define PGD_ORDER	1 /* Number of pages per pgd */
 #define PGD_ALLOC_ORDER	PGD_ORDER
 #endif
@@ -93,7 +91,7 @@ extern void purge_tlb_entries(struct mm_struct *, unsigned long);
 #define PMD_SHIFT       (PLD_SHIFT + BITS_PER_PTE)
 #define PMD_SIZE	(1UL << PMD_SHIFT)
 #define PMD_MASK	(~(PMD_SIZE-1))
-#if PT_NLEVELS == 3
+#if CONFIG_PGTABLE_LEVELS == 3
 #define BITS_PER_PMD	(PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY)
 #else
 #define __PAGETABLE_PMD_FOLDED
@@ -277,7 +275,7 @@ extern unsigned long *empty_zero_page;
 #define pgd_flag(x)	(pgd_val(x) & PxD_FLAG_MASK)
 #define pgd_address(x)	((unsigned long)(pgd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)
 
-#if PT_NLEVELS == 3
+#if CONFIG_PGTABLE_LEVELS == 3
 /* The first entry of the permanent pmd is not there if it contains
  * the gateway marker */
 #define pmd_none(x)	(!pmd_val(x) || pmd_flag(x) == PxD_FLAG_ATTACHED)
@@ -287,7 +285,7 @@ extern unsigned long *empty_zero_page;
 #define pmd_bad(x)	(!(pmd_flag(x) & PxD_FLAG_VALID))
 #define pmd_present(x)	(pmd_flag(x) & PxD_FLAG_PRESENT)
 static inline void pmd_clear(pmd_t *pmd) {
-#if PT_NLEVELS == 3
+#if CONFIG_PGTABLE_LEVELS == 3
 	if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
 		/* This is the entry pointing to the permanent pmd
 		 * attached to the pgd; cannot clear it */
@@ -299,7 +297,7 @@ static inline void pmd_clear(pmd_t *pmd) {
 
 
 
-#if PT_NLEVELS == 3
+#if CONFIG_PGTABLE_LEVELS == 3
 #define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_address(pgd)))
 #define pgd_page(pgd)	virt_to_page((void *)pgd_page_vaddr(pgd))
 
@@ -309,7 +307,7 @@ static inline void pmd_clear(pmd_t *pmd) {
 #define pgd_bad(x)      (!(pgd_flag(x) & PxD_FLAG_VALID))
 #define pgd_present(x)  (pgd_flag(x) & PxD_FLAG_PRESENT)
 static inline void pgd_clear(pgd_t *pgd) {
-#if PT_NLEVELS == 3
+#if CONFIG_PGTABLE_LEVELS == 3
 	if(pgd_flag(*pgd) & PxD_FLAG_ATTACHED)
 		/* This is the permanent pmd attached to the pgd; cannot
 		 * free it */
@@ -393,7 +391,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 
 /* Find an entry in the second-level page table.. */
 
-#if PT_NLEVELS == 3
+#if CONFIG_PGTABLE_LEVELS == 3
 #define pmd_offset(dir,address) \
 ((pmd_t *) pgd_page_vaddr(*(dir)) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1)))
 #else
diff --git a/arch/parisc/include/asm/scatterlist.h b/arch/parisc/include/asm/scatterlist.h
deleted file mode 100644
index 8bf1f0dd1f15..000000000000
--- a/arch/parisc/include/asm/scatterlist.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _ASM_PARISC_SCATTERLIST_H
-#define _ASM_PARISC_SCATTERLIST_H
-
-#include <asm/page.h>
-#include <asm/types.h>
-#include <asm-generic/scatterlist.h>
-
-#define sg_virt_addr(sg) ((unsigned long)sg_virt(sg))
-
-#endif /* _ASM_PARISC_SCATTERLIST_H */
diff --git a/arch/parisc/include/asm/seccomp.h b/arch/parisc/include/asm/seccomp.h
deleted file mode 100644
index 015f7887aa29..000000000000
--- a/arch/parisc/include/asm/seccomp.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef _ASM_PARISC_SECCOMP_H
-#define _ASM_PARISC_SECCOMP_H
-
-#include <linux/unistd.h>
-
-#define __NR_seccomp_read __NR_read
-#define __NR_seccomp_write __NR_write
-#define __NR_seccomp_exit __NR_exit
-#define __NR_seccomp_sigreturn __NR_rt_sigreturn
-
-#define __NR_seccomp_read_32 __NR_read
-#define __NR_seccomp_write_32 __NR_write
-#define __NR_seccomp_exit_32 __NR_exit
-#define __NR_seccomp_sigreturn_32 __NR_rt_sigreturn
-
-#endif	/* _ASM_PARISC_SECCOMP_H */
diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h
index fb13e3865563..e96e693fd58c 100644
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -9,7 +9,6 @@
 
 struct thread_info {
 	struct task_struct *task;	/* main task structure */
-	struct exec_domain *exec_domain;/* execution domain */
 	unsigned long flags;		/* thread_info flags (see TIF_*) */
 	mm_segment_t addr_limit;	/* user-level address space limit */
 	__u32 cpu;			/* current CPU */
@@ -19,7 +18,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
-	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
 	.addr_limit	= KERNEL_DS,		\
diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
index dcd55103a4bb..59001cea13f9 100644
--- a/arch/parisc/kernel/asm-offsets.c
+++ b/arch/parisc/kernel/asm-offsets.c
@@ -242,7 +242,6 @@ int main(void)
 	DEFINE(PT_SZ_ALGN, align_frame(sizeof(struct pt_regs), FRAME_ALIGN));
 	BLANK();
 	DEFINE(TI_TASK, offsetof(struct thread_info, task));
-	DEFINE(TI_EXEC_DOMAIN, offsetof(struct thread_info, exec_domain));
 	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
 	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
 	DEFINE(TI_SEGMENT, offsetof(struct thread_info, addr_limit));
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 2ab16bb160a8..75819617f93b 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -398,7 +398,7 @@
 	 * can address up to 1TB
 	 */
 	.macro		L2_ptep	pmd,pte,index,va,fault
-#if PT_NLEVELS == 3
+#if CONFIG_PGTABLE_LEVELS == 3
 	extru		\va,31-ASM_PMD_SHIFT,ASM_BITS_PER_PMD,\index
 #else
 # if defined(CONFIG_64BIT)
@@ -436,7 +436,7 @@
 	 * all ILP32 processes and all the kernel for machines with
 	 * under 4GB of memory) */
 	.macro		L3_ptep pgd,pte,index,va,fault
-#if PT_NLEVELS == 3 /* we might have a 2-Level scheme, e.g. with 16kb page size */
+#if CONFIG_PGTABLE_LEVELS == 3 /* we might have a 2-Level scheme, e.g. with 16kb page size */
 	extrd,u		\va,63-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
 	copy		%r0,\pte
 	extrd,u,*=	\va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0
diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S
index d4dc588c0dc1..e7d64527aff9 100644
--- a/arch/parisc/kernel/head.S
+++ b/arch/parisc/kernel/head.S
@@ -74,7 +74,7 @@ $bss_loop:
 	mtctl		%r4,%cr24	/* Initialize kernel root pointer */
 	mtctl		%r4,%cr25	/* Initialize user root pointer */
 
-#if PT_NLEVELS == 3
+#if CONFIG_PGTABLE_LEVELS == 3
 	/* Set pmd in pgd */
 	load32		PA(pmd0),%r5
 	shrd            %r5,PxD_VALUE_SHIFT,%r3	
@@ -97,7 +97,7 @@ $bss_loop:
 	stw		%r3,0(%r4)
 	ldo		(PAGE_SIZE >> PxD_VALUE_SHIFT)(%r3),%r3
 	addib,>		-1,%r1,1b
-#if PT_NLEVELS == 3
+#if CONFIG_PGTABLE_LEVELS == 3
 	ldo             ASM_PMD_ENTRY_SIZE(%r4),%r4
 #else
 	ldo             ASM_PGD_ENTRY_SIZE(%r4),%r4
diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
index cfe056fe7f5c..f3191db6e2e9 100644
--- a/arch/parisc/kernel/irq.c
+++ b/arch/parisc/kernel/irq.c
@@ -525,8 +525,8 @@ void do_cpu_irq_mask(struct pt_regs *regs)
 	desc = irq_to_desc(irq);
 	cpumask_copy(&dest, desc->irq_data.affinity);
 	if (irqd_is_per_cpu(&desc->irq_data) &&
-	    !cpu_isset(smp_processor_id(), dest)) {
-		int cpu = first_cpu(dest);
+	    !cpumask_test_cpu(smp_processor_id(), &dest)) {
+		int cpu = cpumask_first(&dest);
 
 		printk(KERN_DEBUG "redirecting irq %d from CPU %d to %d\n",
 		       irq, smp_processor_id(), cpu);
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index d87d1c476d85..ff834fd67478 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -482,7 +482,7 @@ static int pa11_dma_map_sg(struct device *dev, struct scatterlist *sglist, int n
 	BUG_ON(direction == DMA_NONE);
 
 	for (i = 0; i < nents; i++, sglist++ ) {
-		unsigned long vaddr = sg_virt_addr(sglist);
+		unsigned long vaddr = (unsigned long)sg_virt(sglist);
 		sg_dma_address(sglist) = (dma_addr_t) virt_to_phys(vaddr);
 		sg_dma_len(sglist) = sglist->length;
 		flush_kernel_dcache_range(vaddr, sglist->length);
@@ -502,7 +502,7 @@ static void pa11_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, in
 	/* once we do combining we'll need to use phys_to_virt(sg_dma_address(sglist)) */
 
 	for (i = 0; i < nents; i++, sglist++ )
-		flush_kernel_dcache_range(sg_virt_addr(sglist), sglist->length);
+		flush_kernel_vmap_range(sg_virt(sglist), sglist->length);
 	return;
 }
 
@@ -527,7 +527,7 @@ static void pa11_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl
 	/* once we do combining we'll need to use phys_to_virt(sg_dma_address(sglist)) */
 
 	for (i = 0; i < nents; i++, sglist++ )
-		flush_kernel_dcache_range(sg_virt_addr(sglist), sglist->length);
+		flush_kernel_vmap_range(sg_virt(sglist), sglist->length);
 }
 
 static void pa11_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction direction)
@@ -537,7 +537,7 @@ static void pa11_dma_sync_sg_for_device(struct device *dev, struct scatterlist *
 	/* once we do combining we'll need to use phys_to_virt(sg_dma_address(sglist)) */
 
 	for (i = 0; i < nents; i++, sglist++ )
-		flush_kernel_dcache_range(sg_virt_addr(sglist), sglist->length);
+		flush_kernel_vmap_range(sg_virt(sglist), sglist->length);
 }
 
 struct hppa_dma_ops pcxl_dma_ops = {
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 5a8997d63899..8eefb12d1d33 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -55,8 +55,8 @@
 #define ENTRY_COMP(_name_) .word sys_##_name_
 #endif
 
-	ENTRY_SAME(restart_syscall)	/* 0 */
-	ENTRY_SAME(exit)
+90:	ENTRY_SAME(restart_syscall)	/* 0 */
+91:	ENTRY_SAME(exit)
 	ENTRY_SAME(fork_wrapper)
 	ENTRY_SAME(read)
 	ENTRY_SAME(write)
@@ -439,7 +439,10 @@
 	ENTRY_SAME(bpf)
 	ENTRY_COMP(execveat)
 
-	/* Nothing yet */
+
+.ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b))
+.error "size of syscall table does not fit value of __NR_Linux_syscalls"
+.endif
 
 #undef ENTRY_SAME
 #undef ENTRY_DIFF
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 15dbe81cf5f3..c229427fa546 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -34,7 +34,7 @@
 extern int  data_start;
 extern void parisc_kernel_start(void);	/* Kernel entry point in head.S */
 
-#if PT_NLEVELS == 3
+#if CONFIG_PGTABLE_LEVELS == 3
 /* NOTE: This layout exactly conforms to the hybrid L2/L3 page table layout
  * with the first pmd adjacent to the pgd and below it. gcc doesn't actually
  * guarantee that global objects will be laid out in memory in the same order
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 22b0940494bb..190cc48abc0c 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -88,7 +88,7 @@ config PPC
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
 	select BINFMT_ELF
-	select ARCH_BINFMT_ELF_RANDOMIZE_PIE
+	select ARCH_HAS_ELF_RANDOMIZE
 	select OF
 	select OF_EARLY_FLATTREE
 	select OF_RESERVED_MEM
@@ -126,7 +126,7 @@ config PPC
 	select IRQ_FORCED_THREADING
 	select HAVE_RCU_TABLE_FREE if SMP
 	select HAVE_SYSCALL_TRACEPOINTS
-	select HAVE_BPF_JIT if PPC64
+	select HAVE_BPF_JIT
 	select HAVE_ARCH_JUMP_LABEL
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_HAS_GCOV_PROFILE_ALL
@@ -152,6 +152,7 @@ config PPC
 	select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN
 	select NO_BOOTMEM
 	select HAVE_GENERIC_RCU_GUP
+	select HAVE_PERF_EVENTS_NMI if PPC64
 
 config GENERIC_CSUM
 	def_bool CPU_LITTLE_ENDIAN
@@ -189,9 +190,6 @@ config ARCH_MAY_HAVE_PC_FDC
 	bool
 	default PCI
 
-config PPC_OF
-	def_bool y
-
 config PPC_UDBG_16550
 	bool
 	default n
@@ -297,6 +295,12 @@ config ZONE_DMA32
 	bool
 	default y if PPC64
 
+config PGTABLE_LEVELS
+	int
+	default 2 if !PPC64
+	default 3 if PPC_64K_PAGES
+	default 4
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index ec2e40f2cc11..0efa8f90a8f1 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -117,7 +117,7 @@ config BDI_SWITCH
 
 config BOOTX_TEXT
 	bool "Support for early boot text console (BootX or OpenFirmware only)"
-	depends on PPC_OF && PPC_BOOK3S
+	depends on PPC_BOOK3S
 	help
 	  Say Y here to see progress messages from the boot firmware in text
 	  mode. Requires either BootX or Open Firmware.
@@ -193,13 +193,6 @@ config PPC_EARLY_DEBUG_PAS_REALMODE
 	  Select this to enable early debugging for PA Semi.
 	  Output will be on UART0.
 
-config PPC_EARLY_DEBUG_BEAT
-	bool "Beat HV Console"
-	depends on PPC_CELLEB
-	select PPC_UDBG_BEAT
-	help
-	  Select this to enable early debugging for Celleb with Beat.
-
 config PPC_EARLY_DEBUG_44x
 	bool "Early serial debugging for IBM/AMCC 44x CPUs"
 	depends on 44x
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index fc502e042438..07a480861f78 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -248,10 +248,10 @@ boot := arch/$(ARCH)/boot
 
 ifeq ($(CONFIG_RELOCATABLE),y)
 quiet_cmd_relocs_check = CALL    $<
-      cmd_relocs_check = perl $< "$(OBJDUMP)" "$(obj)/vmlinux"
+      cmd_relocs_check = $(CONFIG_SHELL) $< "$(OBJDUMP)" "$(obj)/vmlinux"
 
 PHONY += relocs_check
-relocs_check: arch/powerpc/relocs_check.pl vmlinux
+relocs_check: arch/powerpc/relocs_check.sh vmlinux
 	$(call cmd,relocs_check)
 
 zImage: relocs_check
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 8a5bc1cfc6aa..73eddda53b8e 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -110,7 +110,6 @@ src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c epapr-wrapper.c
 src-plat-$(CONFIG_PPC_PSERIES) += pseries-head.S
 src-plat-$(CONFIG_PPC_POWERNV) += pseries-head.S
 src-plat-$(CONFIG_PPC_IBM_CELL_BLADE) += pseries-head.S
-src-plat-$(CONFIG_PPC_CELLEB) += pseries-head.S
 src-plat-$(CONFIG_PPC_CELL_QPACE) += pseries-head.S
 
 src-wlib := $(sort $(src-wlib-y))
@@ -215,7 +214,6 @@ image-$(CONFIG_PPC_POWERNV)		+= zImage.pseries
 image-$(CONFIG_PPC_MAPLE)		+= zImage.maple
 image-$(CONFIG_PPC_IBM_CELL_BLADE)	+= zImage.pseries
 image-$(CONFIG_PPC_PS3)			+= dtbImage.ps3
-image-$(CONFIG_PPC_CELLEB)		+= zImage.pseries
 image-$(CONFIG_PPC_CELL_QPACE)		+= zImage.pseries
 image-$(CONFIG_PPC_CHRP)		+= zImage.chrp
 image-$(CONFIG_PPC_EFIKA)		+= zImage.chrp
@@ -317,7 +315,7 @@ endif
 # Allow extra targets to be added to the defconfig
 image-y	+= $(subst ",,$(CONFIG_EXTRA_TARGETS))
 
-initrd-  := $(patsubst zImage%, zImage.initrd%, $(image-n) $(image-))
+initrd-  := $(patsubst zImage%, zImage.initrd%, $(image-))
 initrd-y := $(patsubst zImage%, zImage.initrd%, \
 		$(patsubst dtbImage%, dtbImage.initrd%, \
 		$(patsubst simpleImage%, simpleImage.initrd%, \
diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S
index 14de4f8778a7..12866ccb5694 100644
--- a/arch/powerpc/boot/crt0.S
+++ b/arch/powerpc/boot/crt0.S
@@ -155,29 +155,29 @@ p_base:	mflr	r10		/* r10 now points to runtime addr of p_base */
 	ld	r9,(p_rela-p_base)(r10)
 	add	r9,r9,r10
 
-	li	r7,0
+	li	r13,0
 	li	r8,0
-9:	ld	r6,0(r11)       /* get tag */
-	cmpdi	r6,0
+9:	ld	r12,0(r11)       /* get tag */
+	cmpdi	r12,0
 	beq	12f              /* end of list */
-	cmpdi	r6,RELA
+	cmpdi	r12,RELA
 	bne	10f
-	ld	r7,8(r11)       /* get RELA pointer in r7 */
+	ld	r13,8(r11)       /* get RELA pointer in r13 */
 	b	11f
-10:	addis	r6,r6,(-RELACOUNT)@ha
-	cmpdi	r6,RELACOUNT@l
+10:	addis	r12,r12,(-RELACOUNT)@ha
+	cmpdi	r12,RELACOUNT@l
 	bne	11f
 	ld	r8,8(r11)       /* get RELACOUNT value in r8 */
 11:	addi	r11,r11,16
 	b	9b
 12:
-	cmpdi	r7,0            /* check we have both RELA and RELACOUNT */
+	cmpdi	r13,0            /* check we have both RELA and RELACOUNT */
 	cmpdi	cr1,r8,0
 	beq	3f
 	beq	cr1,3f
 
 	/* Calcuate the runtime offset. */
-	subf	r7,r7,r9
+	subf	r13,r13,r9
 
 	/* Run through the list of relocations and process the
 	 * R_PPC64_RELATIVE ones. */
@@ -185,10 +185,10 @@ p_base:	mflr	r10		/* r10 now points to runtime addr of p_base */
 13:	ld	r0,8(r9)        /* ELF64_R_TYPE(reloc->r_info) */
 	cmpdi	r0,22           /* R_PPC64_RELATIVE */
 	bne	3f
-	ld	r6,0(r9)        /* reloc->r_offset */
+	ld	r12,0(r9)        /* reloc->r_offset */
 	ld	r0,16(r9)       /* reloc->r_addend */
-	add	r0,r0,r7
-	stdx	r0,r7,r6
+	add	r0,r0,r13
+	stdx	r0,r13,r12
 	addi	r9,r9,24
 	bdnz	13b
 
@@ -218,7 +218,7 @@ p_base:	mflr	r10		/* r10 now points to runtime addr of p_base */
 	beq	6f
 	ld	r1,0(r8)
 	li	r0,0
-	stdu	r0,-16(r1)	/* establish a stack frame */
+	stdu	r0,-112(r1)	/* establish a stack frame */
 6:
 #endif  /* __powerpc64__ */
 	/* Call platform_init() */
diff --git a/arch/powerpc/boot/dts/b4860emu.dts b/arch/powerpc/boot/dts/b4860emu.dts
deleted file mode 100644
index 2aa5cd318ce8..000000000000
--- a/arch/powerpc/boot/dts/b4860emu.dts
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * B4860 emulator Device Tree Source
- *
- * Copyright 2013 Freescale Semiconductor Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in the
- *       documentation and/or other materials provided with the distribution.
- *     * Neither the name of Freescale Semiconductor nor the
- *       names of its contributors may be used to endorse or promote products
- *       derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * This software is provided by Freescale Semiconductor "as is" and any
- * express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are
- * disclaimed. In no event shall Freescale Semiconductor be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused and
- * on any theory of liability, whether in contract, strict liability, or tort
- * (including negligence or otherwise) arising in any way out of the use of
- * this software, even if advised of the possibility of such damage.
- */
-
-/dts-v1/;
-
-/include/ "fsl/e6500_power_isa.dtsi"
-
-/ {
-	compatible = "fsl,B4860";
-	#address-cells = <2>;
-	#size-cells = <2>;
-	interrupt-parent = <&mpic>;
-
-	aliases {
-		ccsr = &soc;
-
-		serial0 = &serial0;
-		serial1 = &serial1;
-		serial2 = &serial2;
-		serial3 = &serial3;
-		dma0 = &dma0;
-		dma1 = &dma1;
-	};
-
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		cpu0: PowerPC,e6500@0 {
-			device_type = "cpu";
-			reg = <0 1>;
-			next-level-cache = <&L2>;
-			fsl,portid-mapping = <0x80000000>;
-		};
-		cpu1: PowerPC,e6500@2 {
-			device_type = "cpu";
-			reg = <2 3>;
-			next-level-cache = <&L2>;
-			fsl,portid-mapping = <0x80000000>;
-		};
-		cpu2: PowerPC,e6500@4 {
-			device_type = "cpu";
-			reg = <4 5>;
-			next-level-cache = <&L2>;
-			fsl,portid-mapping = <0x80000000>;
-		};
-		cpu3: PowerPC,e6500@6 {
-			device_type = "cpu";
-			reg = <6 7>;
-			next-level-cache = <&L2>;
-			fsl,portid-mapping = <0x80000000>;
-		};
-	};
-};
-
-/ {
-	model = "fsl,B4860QDS";
-	compatible = "fsl,B4860EMU", "fsl,B4860QDS";
-	#address-cells = <2>;
-	#size-cells = <2>;
-	interrupt-parent = <&mpic>;
-
-	ifc: localbus@ffe124000 {
-		reg = <0xf 0xfe124000 0 0x2000>;
-		ranges = <0 0 0xf 0xe8000000 0x08000000
-			  2 0 0xf 0xff800000 0x00010000
-			  3 0 0xf 0xffdf0000 0x00008000>;
-
-		nor@0,0 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "cfi-flash";
-			reg = <0x0 0x0 0x8000000>;
-			bank-width = <2>;
-			device-width = <1>;
-		};
-	};
-
-	memory {
-		device_type = "memory";
-	};
-
-	soc: soc@ffe000000 {
-		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
-		reg = <0xf 0xfe000000 0 0x00001000>;
-	};
-};
-
-&ifc {
-	#address-cells = <2>;
-	#size-cells = <1>;
-	compatible = "fsl,ifc", "simple-bus";
-	interrupts = <25 2 0 0>;
-};
-
-&soc {
-	#address-cells = <1>;
-	#size-cells = <1>;
-	device_type = "soc";
-	compatible = "simple-bus";
-
-	soc-sram-error {
-		compatible = "fsl,soc-sram-error";
-		interrupts = <16 2 1 2>;
-	};
-
-	corenet-law@0 {
-		compatible = "fsl,corenet-law";
-		reg = <0x0 0x1000>;
-		fsl,num-laws = <32>;
-	};
-
-	ddr1: memory-controller@8000 {
-		compatible = "fsl,qoriq-memory-controller-v4.5", "fsl,qoriq-memory-controller";
-		reg = <0x8000 0x1000>;
-		interrupts = <16 2 1 8>;
-	};
-
-	ddr2: memory-controller@9000 {
-		compatible = "fsl,qoriq-memory-controller-v4.5","fsl,qoriq-memory-controller";
-		reg = <0x9000 0x1000>;
-		interrupts = <16 2 1 9>;
-	};
-
-	cpc: l3-cache-controller@10000 {
-		compatible = "fsl,b4-l3-cache-controller", "cache";
-		reg = <0x10000 0x1000
-		       0x11000 0x1000>;
-		interrupts = <16 2 1 4>;
-	};
-
-	corenet-cf@18000 {
-		compatible = "fsl,corenet2-cf", "fsl,corenet-cf";
-		reg = <0x18000 0x1000>;
-		interrupts = <16 2 1 0>;
-		fsl,ccf-num-csdids = <32>;
-		fsl,ccf-num-snoopids = <32>;
-	};
-
-	iommu@20000 {
-		compatible = "fsl,pamu-v1.0", "fsl,pamu";
-		reg = <0x20000 0x4000>;
-		fsl,portid-mapping = <0x8000>;
-		#address-cells = <1>;
-		#size-cells = <1>;
-		interrupts = <
-			24 2 0 0
-			16 2 1 1>;
-		pamu0: pamu@0 {
-			reg = <0 0x1000>;
-			fsl,primary-cache-geometry = <8 1>;
-			fsl,secondary-cache-geometry = <32 2>;
-		};
-	};
-
-/include/ "fsl/qoriq-mpic.dtsi"
-
-	guts: global-utilities@e0000 {
-		compatible = "fsl,b4-device-config";
-		reg = <0xe0000 0xe00>;
-		fsl,has-rstcr;
-		fsl,liodn-bits = <12>;
-	};
-
-/include/ "fsl/qoriq-clockgen2.dtsi"
-	global-utilities@e1000 {
-		compatible = "fsl,b4-clockgen", "fsl,qoriq-clockgen-2.0";
-	};
-
-/include/ "fsl/qoriq-dma-0.dtsi"
-	dma@100300 {
-		fsl,iommu-parent = <&pamu0>;
-		fsl,liodn-reg = <&guts 0x580>; /* DMA1LIODNR */
-	};
-
-/include/ "fsl/qoriq-dma-1.dtsi"
-	dma@101300 {
-		fsl,iommu-parent = <&pamu0>;
-		fsl,liodn-reg = <&guts 0x584>; /* DMA2LIODNR */
-	};
-
-/include/ "fsl/qoriq-i2c-0.dtsi"
-/include/ "fsl/qoriq-i2c-1.dtsi"
-/include/ "fsl/qoriq-duart-0.dtsi"
-/include/ "fsl/qoriq-duart-1.dtsi"
-
-	L2: l2-cache-controller@c20000 {
-		compatible = "fsl,b4-l2-cache-controller";
-		reg = <0xc20000 0x1000>;
-		next-level-cache = <&cpc>;
-	};
-};
diff --git a/arch/powerpc/boot/dts/b4qds.dtsi b/arch/powerpc/boot/dts/b4qds.dtsi
index e5bde0b85135..24ed80dc2120 100644
--- a/arch/powerpc/boot/dts/b4qds.dtsi
+++ b/arch/powerpc/boot/dts/b4qds.dtsi
@@ -1,7 +1,7 @@
 /*
  * B4420DS Device Tree Source
  *
- * Copyright 2012 Freescale Semiconductor, Inc.
+ * Copyright 2012 - 2014 Freescale Semiconductor, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -97,10 +97,25 @@
 		device_type = "memory";
 	};
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	dcsr: dcsr@f00000000 {
 		ranges = <0x00000000 0xf 0x00000000 0x01052000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
index 65100b9636b7..f35e9e0a5445 100644
--- a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
@@ -1,7 +1,7 @@
 /*
  * B4860 Silicon/SoC Device Tree Source (post include)
  *
- * Copyright 2012 Freescale Semiconductor Inc.
+ * Copyright 2012 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -109,6 +109,64 @@
 	};
 };
 
+&bportals {
+	bman-portal@38000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x38000 0x4000>, <0x100e000 0x1000>;
+		interrupts = <133 2 0 0>;
+	};
+	bman-portal@3c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x3c000 0x4000>, <0x100f000 0x1000>;
+		interrupts = <135 2 0 0>;
+	};
+	bman-portal@40000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x40000 0x4000>, <0x1010000 0x1000>;
+		interrupts = <137 2 0 0>;
+	};
+	bman-portal@44000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x44000 0x4000>, <0x1011000 0x1000>;
+		interrupts = <139 2 0 0>;
+	};
+	bman-portal@48000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x48000 0x4000>, <0x1012000 0x1000>;
+		interrupts = <141 2 0 0>;
+	};
+	bman-portal@4c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x4c000 0x4000>, <0x1013000 0x1000>;
+		interrupts = <143 2 0 0>;
+	};
+	bman-portal@50000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x50000 0x4000>, <0x1014000 0x1000>;
+		interrupts = <145 2 0 0>;
+	};
+	bman-portal@54000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x54000 0x4000>, <0x1015000 0x1000>;
+		interrupts = <147 2 0 0>;
+	};
+	bman-portal@58000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x58000 0x4000>, <0x1016000 0x1000>;
+		interrupts = <149 2 0 0>;
+	};
+	bman-portal@5c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x5c000 0x4000>, <0x1017000 0x1000>;
+		interrupts = <151 2 0 0>;
+	};
+	bman-portal@60000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x60000 0x4000>, <0x1018000 0x1000>;
+		interrupts = <153 2 0 0>;
+	};
+};
+
 &soc {
 	ddr2: memory-controller@9000 {
 		compatible = "fsl,qoriq-memory-controller-v4.5", "fsl,qoriq-memory-controller";
diff --git a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
index 1a54ba71f685..73136c0029d2 100644
--- a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
@@ -1,7 +1,7 @@
 /*
  * B4420 Silicon/SoC Device Tree Source (post include)
  *
- * Copyright 2012 Freescale Semiconductor, Inc.
+ * Copyright 2012 - 2014 Freescale Semiconductor, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,11 @@
  * this software, even if advised of the possibility of such damage.
  */
 
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
 &ifc {
 	#address-cells = <2>;
 	#size-cells = <1>;
@@ -128,6 +133,83 @@
 	};
 };
 
+&bportals {
+	#address-cells = <0x1>;
+	#size-cells = <0x1>;
+	compatible = "simple-bus";
+
+	bman-portal@0 {
+		compatible = "fsl,bman-portal";
+		reg = <0x0 0x4000>, <0x1000000 0x1000>;
+		interrupts = <105 2 0 0>;
+	};
+	bman-portal@4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+		interrupts = <107 2 0 0>;
+	};
+	bman-portal@8000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+		interrupts = <109 2 0 0>;
+	};
+	bman-portal@c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+		interrupts = <111 2 0 0>;
+	};
+	bman-portal@10000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+		interrupts = <113 2 0 0>;
+	};
+	bman-portal@14000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+		interrupts = <115 2 0 0>;
+	};
+	bman-portal@18000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+		interrupts = <117 2 0 0>;
+	};
+	bman-portal@1c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+		interrupts = <119 2 0 0>;
+	};
+	bman-portal@20000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+		interrupts = <121 2 0 0>;
+	};
+	bman-portal@24000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+		interrupts = <123 2 0 0>;
+	};
+	bman-portal@28000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x28000 0x4000>, <0x100a000 0x1000>;
+		interrupts = <125 2 0 0>;
+	};
+	bman-portal@2c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x2c000 0x4000>, <0x100b000 0x1000>;
+		interrupts = <127 2 0 0>;
+	};
+	bman-portal@30000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x30000 0x4000>, <0x100c000 0x1000>;
+		interrupts = <129 2 0 0>;
+	};
+	bman-portal@34000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x34000 0x4000>, <0x100d000 0x1000>;
+		interrupts = <131 2 0 0>;
+	};
+};
+
 &soc {
 	#address-cells = <1>;
 	#size-cells = <1>;
@@ -261,6 +343,11 @@
 /include/ "qoriq-duart-1.dtsi"
 /include/ "qoriq-sec5.3-0.dtsi"
 
+/include/ "qoriq-bman1.dtsi"
+	bman: bman@31a000 {
+		interrupts = <16 2 1 29>;
+	};
+
 	L2: l2-cache-controller@c20000 {
 		compatible = "fsl,b4-l2-cache-controller";
 		reg = <0xc20000 0x1000>;
diff --git a/arch/powerpc/boot/dts/fsl/p1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1023si-post.dtsi
index 81437fdf1db4..7780f21430cb 100644
--- a/arch/powerpc/boot/dts/fsl/p1023si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1023si-post.dtsi
@@ -1,7 +1,7 @@
 /*
  * P1023/P1017 Silicon/SoC Device Tree Source (post include)
  *
- * Copyright 2011 Freescale Semiconductor Inc.
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,11 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10 0>;
+};
+
 &lbc {
 	#address-cells = <2>;
 	#size-cells = <1>;
@@ -97,6 +102,28 @@
 	};
 };
 
+&bportals {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "simple-bus";
+
+	bman-portal@0 {
+		compatible = "fsl,bman-portal";
+		reg = <0x0 0x4000>, <0x100000 0x1000>;
+		interrupts = <30 2 0 0>;
+	};
+	bman-portal@4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x4000 0x4000>, <0x101000 0x1000>;
+		interrupts = <32 2 0 0>;
+	};
+	bman-portal@8000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x8000 0x4000>, <0x102000 0x1000>;
+		interrupts = <34 2 0 0>;
+	};
+};
+
 &soc {
 	#address-cells = <1>;
 	#size-cells = <1>;
@@ -221,6 +248,14 @@
 /include/ "pq3-mpic.dtsi"
 /include/ "pq3-mpic-timer-B.dtsi"
 
+	bman: bman@8a000 {
+		compatible = "fsl,bman";
+		reg = <0x8a000 0x1000>;
+		interrupts = <16 2 0 0>;
+		fsl,bman-portals = <&bportals>;
+		memory-region = <&bman_fbpr>;
+	};
+
 	global-utilities@e0000 {
 		compatible = "fsl,p1023-guts";
 		reg = <0xe0000 0x1000>;
diff --git a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
index efd74db4f9b0..f2feacfd9a25 100644
--- a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
@@ -1,7 +1,7 @@
 /*
  * P2041/P2040 Silicon/SoC Device Tree Source (post include)
  *
- * Copyright 2011 Freescale Semiconductor Inc.
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,11 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10 0>;
+};
+
 &lbc {
 	compatible = "fsl,p2041-elbc", "fsl,elbc", "simple-bus";
 	interrupts = <25 2 0 0>;
@@ -216,6 +221,8 @@
 	};
 };
 
+/include/ "qoriq-bman1-portals.dtsi"
+
 &soc {
 	#address-cells = <1>;
 	#size-cells = <1>;
@@ -407,4 +414,6 @@
 crypto: crypto@300000 {
 		fsl,iommu-parent = <&pamu1>;
 	};
+
+/include/ "qoriq-bman1.dtsi"
 };
diff --git a/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi
index d7425ef1ae41..d6fea37395ad 100644
--- a/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi
@@ -1,7 +1,7 @@
 /*
  * P3041 Silicon/SoC Device Tree Source (post include)
  *
- * Copyright 2011 Freescale Semiconductor Inc.
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,11 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10 0>;
+};
+
 &lbc {
 	compatible = "fsl,p3041-elbc", "fsl,elbc", "simple-bus";
 	interrupts = <25 2 0 0>;
@@ -243,6 +248,8 @@
 	};
 };
 
+/include/ "qoriq-bman1-portals.dtsi"
+
 &soc {
 	#address-cells = <1>;
 	#size-cells = <1>;
@@ -434,4 +441,6 @@
 crypto: crypto@300000 {
 		fsl,iommu-parent = <&pamu1>;
 	};
+
+/include/ "qoriq-bman1.dtsi"
 };
diff --git a/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi
index 7005a4a4cef0..89482c9b2301 100644
--- a/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi
@@ -1,7 +1,7 @@
 /*
  * P4080/P4040 Silicon/SoC Device Tree Source (post include)
  *
- * Copyright 2011 Freescale Semiconductor Inc.
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,11 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10 0>;
+};
+
 &lbc {
 	compatible = "fsl,p4080-elbc", "fsl,elbc", "simple-bus";
 	interrupts = <25 2 0 0>;
@@ -243,6 +248,8 @@
 
 };
 
+/include/ "qoriq-bman1-portals.dtsi"
+
 &soc {
 	#address-cells = <1>;
 	#size-cells = <1>;
@@ -490,4 +497,6 @@
 crypto: crypto@300000 {
 		fsl,iommu-parent = <&pamu1>;
 	};
+
+/include/ "qoriq-bman1.dtsi"
 };
diff --git a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi
index 55834211bd28..6e04851e2fc9 100644
--- a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi
@@ -1,7 +1,7 @@
 /*
  * P5020/5010 Silicon/SoC Device Tree Source (post include)
  *
- * Copyright 2011 Freescale Semiconductor Inc.
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,11 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
 &lbc {
 	compatible = "fsl,p5020-elbc", "fsl,elbc", "simple-bus";
 	interrupts = <25 2 0 0>;
@@ -240,6 +245,8 @@
 	};
 };
 
+/include/ "qoriq-bman1-portals.dtsi"
+
 &soc {
 	#address-cells = <1>;
 	#size-cells = <1>;
@@ -421,6 +428,8 @@
 		fsl,iommu-parent = <&pamu1>;
 	};
 
+/include/ "qoriq-bman1.dtsi"
+
 /include/ "qoriq-raid1.0-0.dtsi"
 	raideng@320000 {
 		fsl,iommu-parent = <&pamu1>;
diff --git a/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi
index 6e4cd6ce363c..5e44dfa1e1a5 100644
--- a/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi
@@ -1,7 +1,7 @@
 /*
  * P5040 Silicon/SoC Device Tree Source (post include)
  *
- * Copyright 2012 Freescale Semiconductor Inc.
+ * Copyright 2012 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,11 @@
  * software, even if advised of the possibility of such damage.
  */
 
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
 &lbc {
 	compatible = "fsl,p5040-elbc", "fsl,elbc", "simple-bus";
 	interrupts = <25 2 0 0>;
@@ -195,6 +200,8 @@
 	};
 };
 
+/include/ "qoriq-bman1-portals.dtsi"
+
 &soc {
 	#address-cells = <1>;
 	#size-cells = <1>;
@@ -399,4 +406,6 @@
 	crypto@300000 {
 		fsl,iommu-parent = <&pamu4>;
 	};
+
+/include/ "qoriq-bman1.dtsi"
 };
diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
index 15ae462e758f..5cc01be5b152 100644
--- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
@@ -1,7 +1,7 @@
 /*
  * T1040 Silicon/SoC Device Tree Source (post include)
  *
- * Copyright 2013 Freescale Semiconductor Inc.
+ * Copyright 2013 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,11 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
 &ifc {
 	#address-cells = <2>;
 	#size-cells = <1>;
@@ -218,6 +223,63 @@
 	};
 };
 
+&bportals {
+	#address-cells = <0x1>;
+	#size-cells = <0x1>;
+	compatible = "simple-bus";
+
+	bman-portal@0 {
+		compatible = "fsl,bman-portal";
+		reg = <0x0 0x4000>, <0x1000000 0x1000>;
+		interrupts = <105 2 0 0>;
+	};
+	bman-portal@4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+		interrupts = <107 2 0 0>;
+	};
+	bman-portal@8000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+		interrupts = <109 2 0 0>;
+	};
+	bman-portal@c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+		interrupts = <111 2 0 0>;
+	};
+	bman-portal@10000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+		interrupts = <113 2 0 0>;
+	};
+	bman-portal@14000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+		interrupts = <115 2 0 0>;
+	};
+	bman-portal@18000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+		interrupts = <117 2 0 0>;
+	};
+	bman-portal@1c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+		interrupts = <119 2 0 0>;
+	};
+	bman-portal@20000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+		interrupts = <121 2 0 0>;
+	};
+	bman-portal@24000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+		interrupts = <123 2 0 0>;
+	};
+};
+
 &soc {
 	#address-cells = <1>;
 	#size-cells = <1>;
@@ -401,4 +463,5 @@
 		fsl,liodn-reg = <&guts 0x554>; /* SATA2LIODNR */
 	};
 /include/ "qoriq-sec5.0-0.dtsi"
+/include/ "qoriq-bman1.dtsi"
 };
diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
index 1ce91e3485a9..86bdaf6cbd14 100644
--- a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
@@ -1,7 +1,7 @@
 /*
  * T2081 Silicon/SoC Device Tree Source (post include)
  *
- * Copyright 2013 Freescale Semiconductor Inc.
+ * Copyright 2013 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,11 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
 &ifc {
 	#address-cells = <2>;
 	#size-cells = <1>;
@@ -224,6 +229,103 @@
 	};
 };
 
+&bportals {
+	#address-cells = <0x1>;
+	#size-cells = <0x1>;
+	compatible = "simple-bus";
+
+	bman-portal@0 {
+		compatible = "fsl,bman-portal";
+		reg = <0x0 0x4000>, <0x1000000 0x1000>;
+		interrupts = <105 2 0 0>;
+	};
+	bman-portal@4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+		interrupts = <107 2 0 0>;
+	};
+	bman-portal@8000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+		interrupts = <109 2 0 0>;
+	};
+	bman-portal@c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+		interrupts = <111 2 0 0>;
+	};
+	bman-portal@10000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+		interrupts = <113 2 0 0>;
+	};
+	bman-portal@14000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+		interrupts = <115 2 0 0>;
+	};
+	bman-portal@18000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+		interrupts = <117 2 0 0>;
+	};
+	bman-portal@1c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+		interrupts = <119 2 0 0>;
+	};
+	bman-portal@20000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+		interrupts = <121 2 0 0>;
+	};
+	bman-portal@24000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+		interrupts = <123 2 0 0>;
+	};
+	bman-portal@28000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x28000 0x4000>, <0x100a000 0x1000>;
+		interrupts = <125 2 0 0>;
+	};
+	bman-portal@2c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x2c000 0x4000>, <0x100b000 0x1000>;
+		interrupts = <127 2 0 0>;
+	};
+	bman-portal@30000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x30000 0x4000>, <0x100c000 0x1000>;
+		interrupts = <129 2 0 0>;
+	};
+	bman-portal@34000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x34000 0x4000>, <0x100d000 0x1000>;
+		interrupts = <131 2 0 0>;
+	};
+	bman-portal@38000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x38000 0x4000>, <0x100e000 0x1000>;
+		interrupts = <133 2 0 0>;
+	};
+	bman-portal@3c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x3c000 0x4000>, <0x100f000 0x1000>;
+		interrupts = <135 2 0 0>;
+	};
+	bman-portal@40000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x40000 0x4000>, <0x1010000 0x1000>;
+		interrupts = <137 2 0 0>;
+	};
+	bman-portal@44000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x44000 0x4000>, <0x1011000 0x1000>;
+		interrupts = <139 2 0 0>;
+	};
+};
+
 &soc {
 	#address-cells = <1>;
 	#size-cells = <1>;
@@ -400,6 +502,7 @@
 		phy_type = "utmi";
 	};
 /include/ "qoriq-sec5.2-0.dtsi"
+/include/ "qoriq-bman1.dtsi"
 
 	L2_1: l2-cache-controller@c20000 {
 		/* Cluster 0 L2 cache */
diff --git a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
index 0e96fcabe812..4d4f25895d8c 100644
--- a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
@@ -1,7 +1,7 @@
 /*
  * T4240 Silicon/SoC Device Tree Source (post include)
  *
- * Copyright 2012 Freescale Semiconductor Inc.
+ * Copyright 2012 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,11 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
 &ifc {
 	#address-cells = <2>;
 	#size-cells = <1>;
@@ -294,6 +299,263 @@
 	};
 };
 
+&bportals {
+	#address-cells = <0x1>;
+	#size-cells = <0x1>;
+	compatible = "simple-bus";
+
+	bman-portal@0 {
+		compatible = "fsl,bman-portal";
+		reg = <0x0 0x4000>, <0x1000000 0x1000>;
+		interrupts = <105 2 0 0>;
+	};
+	bman-portal@4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+		interrupts = <107 2 0 0>;
+	};
+	bman-portal@8000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+		interrupts = <109 2 0 0>;
+	};
+	bman-portal@c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+		interrupts = <111 2 0 0>;
+	};
+	bman-portal@10000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+		interrupts = <113 2 0 0>;
+	};
+	bman-portal@14000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+		interrupts = <115 2 0 0>;
+	};
+	bman-portal@18000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+		interrupts = <117 2 0 0>;
+	};
+	bman-portal@1c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+		interrupts = <119 2 0 0>;
+	};
+	bman-portal@20000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+		interrupts = <121 2 0 0>;
+	};
+	bman-portal@24000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+		interrupts = <123 2 0 0>;
+	};
+	bman-portal@28000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x28000 0x4000>, <0x100a000 0x1000>;
+		interrupts = <125 2 0 0>;
+	};
+	bman-portal@2c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x2c000 0x4000>, <0x100b000 0x1000>;
+		interrupts = <127 2 0 0>;
+	};
+	bman-portal@30000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x30000 0x4000>, <0x100c000 0x1000>;
+		interrupts = <129 2 0 0>;
+	};
+	bman-portal@34000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x34000 0x4000>, <0x100d000 0x1000>;
+		interrupts = <131 2 0 0>;
+	};
+	bman-portal@38000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x38000 0x4000>, <0x100e000 0x1000>;
+		interrupts = <133 2 0 0>;
+	};
+	bman-portal@3c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x3c000 0x4000>, <0x100f000 0x1000>;
+		interrupts = <135 2 0 0>;
+	};
+	bman-portal@40000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x40000 0x4000>, <0x1010000 0x1000>;
+		interrupts = <137 2 0 0>;
+	};
+	bman-portal@44000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x44000 0x4000>, <0x1011000 0x1000>;
+		interrupts = <139 2 0 0>;
+	};
+	bman-portal@48000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x48000 0x4000>, <0x1012000 0x1000>;
+		interrupts = <141 2 0 0>;
+	};
+	bman-portal@4c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x4c000 0x4000>, <0x1013000 0x1000>;
+		interrupts = <143 2 0 0>;
+	};
+	bman-portal@50000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x50000 0x4000>, <0x1014000 0x1000>;
+		interrupts = <145 2 0 0>;
+	};
+	bman-portal@54000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x54000 0x4000>, <0x1015000 0x1000>;
+		interrupts = <147 2 0 0>;
+	};
+	bman-portal@58000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x58000 0x4000>, <0x1016000 0x1000>;
+		interrupts = <149 2 0 0>;
+	};
+	bman-portal@5c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x5c000 0x4000>, <0x1017000 0x1000>;
+		interrupts = <151 2 0 0>;
+	};
+	bman-portal@60000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x60000 0x4000>, <0x1018000 0x1000>;
+		interrupts = <153 2 0 0>;
+	};
+	bman-portal@64000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x64000 0x4000>, <0x1019000 0x1000>;
+		interrupts = <155 2 0 0>;
+	};
+	bman-portal@68000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x68000 0x4000>, <0x101a000 0x1000>;
+		interrupts = <157 2 0 0>;
+	};
+	bman-portal@6c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x6c000 0x4000>, <0x101b000 0x1000>;
+		interrupts = <159 2 0 0>;
+	};
+	bman-portal@70000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x70000 0x4000>, <0x101c000 0x1000>;
+		interrupts = <161 2 0 0>;
+	};
+	bman-portal@74000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x74000 0x4000>, <0x101d000 0x1000>;
+		interrupts = <163 2 0 0>;
+	};
+	bman-portal@78000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x78000 0x4000>, <0x101e000 0x1000>;
+		interrupts = <165 2 0 0>;
+	};
+	bman-portal@7c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x7c000 0x4000>, <0x101f000 0x1000>;
+		interrupts = <167 2 0 0>;
+	};
+	bman-portal@80000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x80000 0x4000>, <0x1020000 0x1000>;
+		interrupts = <169 2 0 0>;
+	};
+	bman-portal@84000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x84000 0x4000>, <0x1021000 0x1000>;
+		interrupts = <171 2 0 0>;
+	};
+	bman-portal@88000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x88000 0x4000>, <0x1022000 0x1000>;
+		interrupts = <173 2 0 0>;
+	};
+	bman-portal@8c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x8c000 0x4000>, <0x1023000 0x1000>;
+		interrupts = <175 2 0 0>;
+	};
+	bman-portal@90000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x90000 0x4000>, <0x1024000 0x1000>;
+		interrupts = <385 2 0 0>;
+	};
+	bman-portal@94000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x94000 0x4000>, <0x1025000 0x1000>;
+		interrupts = <387 2 0 0>;
+	};
+	bman-portal@98000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x98000 0x4000>, <0x1026000 0x1000>;
+		interrupts = <389 2 0 0>;
+	};
+	bman-portal@9c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x9c000 0x4000>, <0x1027000 0x1000>;
+		interrupts = <391 2 0 0>;
+	};
+	bman-portal@a0000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xa0000 0x4000>, <0x1028000 0x1000>;
+		interrupts = <393 2 0 0>;
+	};
+	bman-portal@a4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xa4000 0x4000>, <0x1029000 0x1000>;
+		interrupts = <395 2 0 0>;
+	};
+	bman-portal@a8000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xa8000 0x4000>, <0x102a000 0x1000>;
+		interrupts = <397 2 0 0>;
+	};
+	bman-portal@ac000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xac000 0x4000>, <0x102b000 0x1000>;
+		interrupts = <399 2 0 0>;
+	};
+	bman-portal@b0000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xb0000 0x4000>, <0x102c000 0x1000>;
+		interrupts = <401 2 0 0>;
+	};
+	bman-portal@b4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xb4000 0x4000>, <0x102d000 0x1000>;
+		interrupts = <403 2 0 0>;
+	};
+	bman-portal@b8000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xb8000 0x4000>, <0x102e000 0x1000>;
+		interrupts = <405 2 0 0>;
+	};
+	bman-portal@bc000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xbc000 0x4000>, <0x102f000 0x1000>;
+		interrupts = <407 2 0 0>;
+	};
+	bman-portal@c0000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xc0000 0x4000>, <0x1030000 0x1000>;
+		interrupts = <409 2 0 0>;
+	};
+	bman-portal@c4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xc4000 0x4000>, <0x1031000 0x1000>;
+		interrupts = <411 2 0 0>;
+	};
+};
+
 &soc {
 	#address-cells = <1>;
 	#size-cells = <1>;
@@ -486,6 +748,7 @@
 /include/ "qoriq-sata2-0.dtsi"
 /include/ "qoriq-sata2-1.dtsi"
 /include/ "qoriq-sec5.0-0.dtsi"
+/include/ "qoriq-bman1.dtsi"
 
 	L2_1: l2-cache-controller@c20000 {
 		compatible = "fsl,t4240-l2-cache-controller";
diff --git a/arch/powerpc/boot/dts/kmcoge4.dts b/arch/powerpc/boot/dts/kmcoge4.dts
index 89b4119f3b19..97e6d11d1e6d 100644
--- a/arch/powerpc/boot/dts/kmcoge4.dts
+++ b/arch/powerpc/boot/dts/kmcoge4.dts
@@ -25,10 +25,25 @@
 		device_type = "memory";
 	};
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	dcsr: dcsr@f00000000 {
 		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x200000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/oca4080.dts b/arch/powerpc/boot/dts/oca4080.dts
index 3d4c751d1608..eb76caae11d9 100644
--- a/arch/powerpc/boot/dts/oca4080.dts
+++ b/arch/powerpc/boot/dts/oca4080.dts
@@ -49,10 +49,25 @@
 		device_type = "memory";
 	};
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	dcsr: dcsr@f00000000 {
 		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x200000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/p1023rdb.dts b/arch/powerpc/boot/dts/p1023rdb.dts
index 0a06a88ddbd5..9236e3742a23 100644
--- a/arch/powerpc/boot/dts/p1023rdb.dts
+++ b/arch/powerpc/boot/dts/p1023rdb.dts
@@ -1,7 +1,7 @@
 /*
  * P1023 RDB Device Tree Source
  *
- *    Copyright 2013 Freescale Semiconductor Inc.
+ * Copyright 2013 - 2014 Freescale Semiconductor Inc.
  *
  * Author: Chunhe Lan <Chunhe.Lan@freescale.com>
  *
@@ -47,6 +47,21 @@
 		device_type = "memory";
 	};
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
+	bportals: bman-portals@ff200000 {
+		ranges = <0x0 0xf 0xff200000 0x200000>;
+	};
+
 	soc: soc@ff600000 {
 		ranges = <0x0 0x0 0xff600000 0x200000>;
 
@@ -228,7 +243,6 @@
 				  0x0 0x100000>;
 		};
 	};
-
 };
 
 /include/ "fsl/p1023si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p2041rdb.dts b/arch/powerpc/boot/dts/p2041rdb.dts
index d97ad74c7279..c1e69dc7188e 100644
--- a/arch/powerpc/boot/dts/p2041rdb.dts
+++ b/arch/powerpc/boot/dts/p2041rdb.dts
@@ -1,7 +1,7 @@
 /*
  * P2041RDB Device Tree Source
  *
- * Copyright 2011 Freescale Semiconductor Inc.
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -45,10 +45,25 @@
 		device_type = "memory";
 	};
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	dcsr: dcsr@f00000000 {
 		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x200000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/p3041ds.dts b/arch/powerpc/boot/dts/p3041ds.dts
index 394ea9c943c9..2192fe94866d 100644
--- a/arch/powerpc/boot/dts/p3041ds.dts
+++ b/arch/powerpc/boot/dts/p3041ds.dts
@@ -1,7 +1,7 @@
 /*
  * P3041DS Device Tree Source
  *
- * Copyright 2010-2011 Freescale Semiconductor Inc.
+ * Copyright 2010 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -45,10 +45,25 @@
 		device_type = "memory";
 	};
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	dcsr: dcsr@f00000000 {
 		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x200000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/p4080ds.dts b/arch/powerpc/boot/dts/p4080ds.dts
index 1cf6148b8b05..fad441654642 100644
--- a/arch/powerpc/boot/dts/p4080ds.dts
+++ b/arch/powerpc/boot/dts/p4080ds.dts
@@ -1,7 +1,7 @@
 /*
  * P4080DS Device Tree Source
  *
- * Copyright 2009-2011 Freescale Semiconductor Inc.
+ * Copyright 2009 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -45,10 +45,25 @@
 		device_type = "memory";
 	};
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	dcsr: dcsr@f00000000 {
 		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x200000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/p5020ds.dts b/arch/powerpc/boot/dts/p5020ds.dts
index b7f3057cd894..7382636dc560 100644
--- a/arch/powerpc/boot/dts/p5020ds.dts
+++ b/arch/powerpc/boot/dts/p5020ds.dts
@@ -1,7 +1,7 @@
 /*
  * P5020DS Device Tree Source
  *
- * Copyright 2010-2011 Freescale Semiconductor Inc.
+ * Copyright 2010 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -45,10 +45,25 @@
 		device_type = "memory";
 	};
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	dcsr: dcsr@f00000000 {
 		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x200000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/p5040ds.dts b/arch/powerpc/boot/dts/p5040ds.dts
index 7e04bf487c04..35dabf5b6098 100644
--- a/arch/powerpc/boot/dts/p5040ds.dts
+++ b/arch/powerpc/boot/dts/p5040ds.dts
@@ -1,7 +1,7 @@
 /*
  * P5040DS Device Tree Source
  *
- * Copyright 2012 Freescale Semiconductor Inc.
+ * Copyright 2012 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -45,10 +45,25 @@
 		device_type = "memory";
 	};
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	dcsr: dcsr@f00000000 {
 		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x200000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/t104xqds.dtsi b/arch/powerpc/boot/dts/t104xqds.dtsi
index 234f4b596c5b..f7e9bfbeefc7 100644
--- a/arch/powerpc/boot/dts/t104xqds.dtsi
+++ b/arch/powerpc/boot/dts/t104xqds.dtsi
@@ -1,7 +1,7 @@
 /*
  * T104xQDS Device Tree Source
  *
- * Copyright 2013 Freescale Semiconductor Inc.
+ * Copyright 2013 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -38,6 +38,17 @@
 	#size-cells = <2>;
 	interrupt-parent = <&mpic>;
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	ifc: localbus@ffe124000 {
 		reg = <0xf 0xfe124000 0 0x2000>;
 		ranges = <0 0 0xf 0xe8000000 0x08000000
@@ -77,6 +88,10 @@
 		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/t104xrdb.dtsi b/arch/powerpc/boot/dts/t104xrdb.dtsi
index 187add885cae..76e07a3f2ca8 100644
--- a/arch/powerpc/boot/dts/t104xrdb.dtsi
+++ b/arch/powerpc/boot/dts/t104xrdb.dtsi
@@ -33,6 +33,16 @@
  */
 
 / {
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
 
 	ifc: localbus@ffe124000 {
 		reg = <0xf 0xfe124000 0 0x2000>;
@@ -69,6 +79,10 @@
 		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/t208xqds.dtsi b/arch/powerpc/boot/dts/t208xqds.dtsi
index 59061834d54e..c42e07f4f648 100644
--- a/arch/powerpc/boot/dts/t208xqds.dtsi
+++ b/arch/powerpc/boot/dts/t208xqds.dtsi
@@ -1,7 +1,7 @@
 /*
  * T2080/T2081 QDS Device Tree Source
  *
- * Copyright 2013 Freescale Semiconductor Inc.
+ * Copyright 2013 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -39,6 +39,17 @@
 	#size-cells = <2>;
 	interrupt-parent = <&mpic>;
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	ifc: localbus@ffe124000 {
 		reg = <0xf 0xfe124000 0 0x2000>;
 		ranges = <0 0 0xf 0xe8000000 0x08000000
@@ -78,6 +89,10 @@
 		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
@@ -137,7 +152,7 @@
 					rtc@68 {
 						compatible = "dallas,ds3232";
 						reg = <0x68>;
-						interrupts = <0x1 0x1 0 0>;
+						interrupts = <0xb 0x1 0 0>;
 					};
 				};
 
diff --git a/arch/powerpc/boot/dts/t208xrdb.dtsi b/arch/powerpc/boot/dts/t208xrdb.dtsi
index 1481e192e783..e1463b165d0e 100644
--- a/arch/powerpc/boot/dts/t208xrdb.dtsi
+++ b/arch/powerpc/boot/dts/t208xrdb.dtsi
@@ -39,6 +39,17 @@
 	#size-cells = <2>;
 	interrupt-parent = <&mpic>;
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	ifc: localbus@ffe124000 {
 		reg = <0xf 0xfe124000 0 0x2000>;
 		ranges = <0 0 0xf 0xe8000000 0x08000000
@@ -79,6 +90,10 @@
 		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/t4240qds.dts b/arch/powerpc/boot/dts/t4240qds.dts
index 97683f6a2936..6df77766410b 100644
--- a/arch/powerpc/boot/dts/t4240qds.dts
+++ b/arch/powerpc/boot/dts/t4240qds.dts
@@ -1,7 +1,7 @@
 /*
  * T4240QDS Device Tree Source
  *
- * Copyright 2012 Freescale Semiconductor Inc.
+ * Copyright 2012 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -100,10 +100,25 @@
 		device_type = "memory";
 	};
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	dcsr: dcsr@f00000000 {
 		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/t4240rdb.dts b/arch/powerpc/boot/dts/t4240rdb.dts
index 53761d4e8c51..46049cf37f02 100644
--- a/arch/powerpc/boot/dts/t4240rdb.dts
+++ b/arch/powerpc/boot/dts/t4240rdb.dts
@@ -69,10 +69,25 @@
 		device_type = "memory";
 	};
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	dcsr: dcsr@f00000000 {
 		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/libfdt-wrapper.c b/arch/powerpc/boot/libfdt-wrapper.c
index bb8b9b3505ee..535e8fd8900d 100644
--- a/arch/powerpc/boot/libfdt-wrapper.c
+++ b/arch/powerpc/boot/libfdt-wrapper.c
@@ -44,12 +44,12 @@
 
 #define offset_devp(off)	\
 	({ \
-		int _offset = (off); \
+		unsigned long _offset = (off); \
 		check_err(_offset) ? NULL : (void *)(_offset+1); \
 	})
 
-#define devp_offset_find(devp)	(((int)(devp))-1)
-#define devp_offset(devp)	(devp ? ((int)(devp))-1 : 0)
+#define devp_offset_find(devp)	(((unsigned long)(devp))-1)
+#define devp_offset(devp)	(devp ? ((unsigned long)(devp))-1 : 0)
 
 static void *fdt;
 static void *buf; /* = NULL */
diff --git a/arch/powerpc/boot/libfdt_env.h b/arch/powerpc/boot/libfdt_env.h
index c89fdb1b80e1..8dcd744e5728 100644
--- a/arch/powerpc/boot/libfdt_env.h
+++ b/arch/powerpc/boot/libfdt_env.h
@@ -4,15 +4,17 @@
 #include <types.h>
 #include <string.h>
 
+#include "of.h"
+
 typedef u32 uint32_t;
 typedef u64 uint64_t;
 typedef unsigned long uintptr_t;
 
-#define fdt16_to_cpu(x)		(x)
-#define cpu_to_fdt16(x)		(x)
-#define fdt32_to_cpu(x)		(x)
-#define cpu_to_fdt32(x)		(x)
-#define fdt64_to_cpu(x)		(x)
-#define cpu_to_fdt64(x)		(x)
+#define fdt16_to_cpu(x)		be16_to_cpu(x)
+#define cpu_to_fdt16(x)		cpu_to_be16(x)
+#define fdt32_to_cpu(x)		be32_to_cpu(x)
+#define cpu_to_fdt32(x)		cpu_to_be32(x)
+#define fdt64_to_cpu(x)		be64_to_cpu(x)
+#define cpu_to_fdt64(x)		cpu_to_be64(x)
 
 #endif /* _ARCH_POWERPC_BOOT_LIBFDT_ENV_H */
diff --git a/arch/powerpc/boot/of.h b/arch/powerpc/boot/of.h
index c8c1750aba0c..5603320dce07 100644
--- a/arch/powerpc/boot/of.h
+++ b/arch/powerpc/boot/of.h
@@ -24,11 +24,19 @@ void of_console_init(void);
 typedef u32			__be32;
 
 #ifdef __LITTLE_ENDIAN__
+#define cpu_to_be16(x) swab16(x)
+#define be16_to_cpu(x) swab16(x)
 #define cpu_to_be32(x) swab32(x)
 #define be32_to_cpu(x) swab32(x)
+#define cpu_to_be64(x) swab64(x)
+#define be64_to_cpu(x) swab64(x)
 #else
+#define cpu_to_be16(x) (x)
+#define be16_to_cpu(x) (x)
 #define cpu_to_be32(x) (x)
 #define be32_to_cpu(x) (x)
+#define cpu_to_be64(x) (x)
+#define be64_to_cpu(x) (x)
 #endif
 
 #define PROM_ERROR (-1u)
diff --git a/arch/powerpc/boot/planetcore.c b/arch/powerpc/boot/planetcore.c
index 0d8558a475bb..75117e63e6db 100644
--- a/arch/powerpc/boot/planetcore.c
+++ b/arch/powerpc/boot/planetcore.c
@@ -131,36 +131,3 @@ void planetcore_set_stdout_path(const char *table)
 
 	setprop_str(chosen, "linux,stdout-path", path);
 }
-
-void planetcore_set_serial_speed(const char *table)
-{
-	void *chosen, *stdout;
-	u64 baud;
-	u32 baud32;
-	int len;
-
-	chosen = finddevice("/chosen");
-	if (!chosen)
-		return;
-
-	len = getprop(chosen, "linux,stdout-path", prop_buf, MAX_PROP_LEN);
-	if (len <= 0)
-		return;
-
-	stdout = finddevice(prop_buf);
-	if (!stdout) {
-		printf("planetcore_set_serial_speed: "
-		       "Bad /chosen/linux,stdout-path.\r\n");
-
-		return;
-	}
-
-	if (!planetcore_get_decimal(table, PLANETCORE_KEY_SERIAL_BAUD,
-	                            &baud)) {
-		printf("planetcore_set_serial_speed: No SB tag.\r\n");
-		return;
-	}
-
-	baud32 = baud;
-	setprop(stdout, "current-speed", &baud32, 4);
-}
diff --git a/arch/powerpc/boot/planetcore.h b/arch/powerpc/boot/planetcore.h
index 0d4094f1771c..d53c733cc463 100644
--- a/arch/powerpc/boot/planetcore.h
+++ b/arch/powerpc/boot/planetcore.h
@@ -43,7 +43,4 @@ void planetcore_set_mac_addrs(const char *table);
  */
 void planetcore_set_stdout_path(const char *table);
 
-/* Sets the current-speed property in the serial node. */
-void planetcore_set_serial_speed(const char *table);
-
 #endif
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index ae0f88ec4a32..3f50c27ed8f8 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -277,7 +277,7 @@ treeboot-iss4xx-mpic)
     platformo="$object/treeboot-iss4xx.o"
     ;;
 epapr)
-    platformo="$object/epapr.o $object/epapr-wrapper.o"
+    platformo="$object/pseries-head.o $object/epapr.o $object/epapr-wrapper.o"
     link_address='0x20000000'
     pie=-pie
     ;;
diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig
index 9788b3c2d563..9227b517560a 100644
--- a/arch/powerpc/configs/cell_defconfig
+++ b/arch/powerpc/configs/cell_defconfig
@@ -28,7 +28,6 @@ CONFIG_PS3_ROM=m
 CONFIG_PS3_FLASH=m
 CONFIG_PS3_LPM=m
 CONFIG_PPC_IBM_CELL_BLADE=y
-CONFIG_PPC_CELLEB=y
 CONFIG_RTAS_FLASH=y
 CONFIG_CPU_FREQ=y
 CONFIG_CPU_FREQ_GOV_POWERSAVE=y
@@ -113,7 +112,6 @@ CONFIG_IDE=y
 CONFIG_BLK_DEV_GENERIC=y
 CONFIG_BLK_DEV_AEC62XX=y
 CONFIG_BLK_DEV_SIIMAGE=y
-CONFIG_BLK_DEV_CELLEB=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_BLK_DEV_SR=m
 CONFIG_CHR_DEV_SG=y
@@ -156,7 +154,6 @@ CONFIG_SERIAL_TXX9_NR_UARTS=2
 CONFIG_SERIAL_TXX9_CONSOLE=y
 CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_HVC_RTAS=y
-CONFIG_HVC_BEAT=y
 CONFIG_IPMI_HANDLER=m
 CONFIG_IPMI_DEVICE_INTERFACE=m
 CONFIG_IPMI_SI=m
diff --git a/arch/powerpc/configs/celleb_defconfig b/arch/powerpc/configs/celleb_defconfig
deleted file mode 100644
index ff454dcd2dd3..000000000000
--- a/arch/powerpc/configs/celleb_defconfig
+++ /dev/null
@@ -1,152 +0,0 @@
-CONFIG_PPC64=y
-CONFIG_TUNE_CELL=y
-CONFIG_ALTIVEC=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=4
-CONFIG_SYSVIPC=y
-CONFIG_FHANDLE=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=15
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_COMPAT_BRK is not set
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_PPC_POWERNV is not set
-# CONFIG_PPC_PSERIES is not set
-# CONFIG_PPC_PMAC is not set
-CONFIG_PPC_CELLEB=y
-CONFIG_SPU_FS=y
-# CONFIG_CBE_THERM is not set
-CONFIG_UDBG_RTAS_CONSOLE=y
-# CONFIG_RTAS_PROC is not set
-CONFIG_BINFMT_MISC=m
-CONFIG_KEXEC=y
-CONFIG_NUMA=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_SYN_COOKIES=y
-CONFIG_IPV6=y
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_IPCOMP=m
-CONFIG_IPV6_TUNNEL=m
-CONFIG_NETFILTER=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=131072
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECD=m
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_CELLEB=y
-CONFIG_SCSI=m
-# CONFIG_SCSI_PROC_FS is not set
-CONFIG_BLK_DEV_SD=m
-CONFIG_BLK_DEV_SR=m
-CONFIG_CHR_DEV_SG=m
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=m
-CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
-CONFIG_MD_RAID1=m
-CONFIG_BLK_DEV_DM=m
-CONFIG_DM_CRYPT=m
-CONFIG_DM_SNAPSHOT=m
-CONFIG_DM_MIRROR=m
-CONFIG_DM_ZERO=m
-CONFIG_DM_MULTIPATH=m
-CONFIG_NETDEVICES=y
-CONFIG_SPIDER_NET=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO_I8042 is not set
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_SERIAL_NONSTANDARD=y
-CONFIG_SERIAL_TXX9_NR_UARTS=3
-CONFIG_SERIAL_TXX9_CONSOLE=y
-CONFIG_HVC_RTAS=y
-CONFIG_HVC_BEAT=y
-# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
-CONFIG_I2C=y
-# CONFIG_HWMON is not set
-CONFIG_WATCHDOG=y
-# CONFIG_VGA_CONSOLE is not set
-CONFIG_USB_HIDDEV=y
-CONFIG_USB=y
-CONFIG_USB_MON=y
-CONFIG_USB_EHCI_HCD=m
-# CONFIG_USB_EHCI_HCD_PPC_OF is not set
-CONFIG_USB_OHCI_HCD=m
-CONFIG_USB_STORAGE=m
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-CONFIG_UDF_FS=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_HUGETLBFS=y
-CONFIG_NFS_FS=m
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
-CONFIG_NFSD_V3_ACL=y
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_2=m
-CONFIG_NLS_ISO8859_3=m
-CONFIG_NLS_ISO8859_4=m
-CONFIG_NLS_ISO8859_5=m
-CONFIG_NLS_ISO8859_6=m
-CONFIG_NLS_ISO8859_7=m
-CONFIG_NLS_ISO8859_9=m
-CONFIG_NLS_ISO8859_13=m
-CONFIG_NLS_ISO8859_14=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_LIBCRC32C=m
-CONFIG_DEBUG_FS=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_MUTEXES=y
-CONFIG_XMON=y
-CONFIG_XMON_DEFAULT=y
-CONFIG_CRYPTO_NULL=m
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_SHA256=m
-CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAST5=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/corenet32_smp_defconfig b/arch/powerpc/configs/corenet32_smp_defconfig
index ca7957b09a3c..37659937bd12 100644
--- a/arch/powerpc/configs/corenet32_smp_defconfig
+++ b/arch/powerpc/configs/corenet32_smp_defconfig
@@ -99,6 +99,8 @@ CONFIG_E1000E=y
 CONFIG_AT803X_PHY=y
 CONFIG_VITESSE_PHY=y
 CONFIG_FIXED_PHY=y
+CONFIG_MDIO_BUS_MUX_GPIO=y
+CONFIG_MDIO_BUS_MUX_MMIOREG=y
 # CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
@@ -114,11 +116,14 @@ CONFIG_NVRAM=y
 CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_MPC=y
+CONFIG_I2C_MUX=y
+CONFIG_I2C_MUX_PCA954x=y
 CONFIG_SPI=y
 CONFIG_SPI_GPIO=y
 CONFIG_SPI_FSL_SPI=y
 CONFIG_SPI_FSL_ESPI=y
-# CONFIG_HWMON is not set
+CONFIG_SENSORS_LM90=y
+CONFIG_SENSORS_INA2XX=y
 CONFIG_USB_HID=m
 CONFIG_USB=y
 CONFIG_USB_MON=y
diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig
index 04737aaa8b6b..33cd1df818ad 100644
--- a/arch/powerpc/configs/corenet64_smp_defconfig
+++ b/arch/powerpc/configs/corenet64_smp_defconfig
@@ -12,6 +12,10 @@ CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
+CONFIG_CGROUPS=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_SCHED=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
 CONFIG_KALLSYMS_ALL=y
@@ -75,6 +79,10 @@ CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=131072
 CONFIG_EEPROM_LEGACY=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=y
 CONFIG_ATA=y
 CONFIG_SATA_FSL=y
 CONFIG_SATA_SIL24=y
@@ -85,6 +93,8 @@ CONFIG_FSL_XGMAC_MDIO=y
 CONFIG_E1000E=y
 CONFIG_VITESSE_PHY=y
 CONFIG_FIXED_PHY=y
+CONFIG_MDIO_BUS_MUX_GPIO=y
+CONFIG_MDIO_BUS_MUX_MMIOREG=y
 CONFIG_INPUT_FF_MEMLESS=m
 # CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
@@ -99,11 +109,14 @@ CONFIG_SERIAL_8250_RSA=y
 CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_MPC=y
+CONFIG_I2C_MUX=y
+CONFIG_I2C_MUX_PCA954x=y
 CONFIG_SPI=y
 CONFIG_SPI_GPIO=y
 CONFIG_SPI_FSL_SPI=y
 CONFIG_SPI_FSL_ESPI=y
-# CONFIG_HWMON is not set
+CONFIG_SENSORS_LM90=y
+CONFIG_SENSORS_INA2XX=y
 CONFIG_USB_HID=m
 CONFIG_USB=y
 CONFIG_USB_MON=y
diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig
index 8535c343dd57..6ecf7bdbc2f9 100644
--- a/arch/powerpc/configs/mpc85xx_defconfig
+++ b/arch/powerpc/configs/mpc85xx_defconfig
@@ -150,8 +150,7 @@ CONFIG_SPI=y
 CONFIG_SPI_FSL_SPI=y
 CONFIG_SPI_FSL_ESPI=y
 CONFIG_GPIO_MPC8XXX=y
-CONFIG_HWMON=m
-CONFIG_SENSORS_LM90=m
+CONFIG_SENSORS_LM90=y
 CONFIG_FB=y
 CONFIG_FB_FSL_DIU=y
 # CONFIG_VGA_CONSOLE is not set
diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig
index c45ad2e01b0c..b6c7111ea913 100644
--- a/arch/powerpc/configs/mpc85xx_smp_defconfig
+++ b/arch/powerpc/configs/mpc85xx_smp_defconfig
@@ -143,7 +143,7 @@ CONFIG_SPI=y
 CONFIG_SPI_FSL_SPI=y
 CONFIG_SPI_FSL_ESPI=y
 CONFIG_GPIO_MPC8XXX=y
-# CONFIG_HWMON is not set
+CONFIG_SENSORS_LM90=y
 CONFIG_FB=y
 CONFIG_FB_FSL_DIU=y
 # CONFIG_VGA_CONSOLE is not set
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 3315c9f0828a..aad501ae3834 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -36,7 +36,6 @@ CONFIG_PS3_ROM=m
 CONFIG_PS3_FLASH=m
 CONFIG_PS3_LPM=m
 CONFIG_PPC_IBM_CELL_BLADE=y
-CONFIG_PPC_CELLEB=y
 CONFIG_PPC_CELL_QPACE=y
 CONFIG_RTAS_FLASH=m
 CONFIG_IBMEBUS=y
@@ -89,7 +88,6 @@ CONFIG_IDE=y
 CONFIG_BLK_DEV_IDECD=y
 CONFIG_BLK_DEV_GENERIC=y
 CONFIG_BLK_DEV_AMD74XX=y
-CONFIG_BLK_DEV_CELLEB=y
 CONFIG_BLK_DEV_IDE_PMAC=y
 CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y
 CONFIG_BLK_DEV_SD=y
@@ -196,7 +194,6 @@ CONFIG_SERIAL_TXX9_CONSOLE=y
 CONFIG_SERIAL_JSM=m
 CONFIG_HVC_CONSOLE=y
 CONFIG_HVC_RTAS=y
-CONFIG_HVC_BEAT=y
 CONFIG_HVCS=m
 CONFIG_VIRTIO_CONSOLE=m
 CONFIG_IBM_BSR=m
diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile
index 2926fb9c570a..9c221b69c181 100644
--- a/arch/powerpc/crypto/Makefile
+++ b/arch/powerpc/crypto/Makefile
@@ -4,6 +4,14 @@
 # Arch-specific CryptoAPI modules.
 #
 
+obj-$(CONFIG_CRYPTO_AES_PPC_SPE) += aes-ppc-spe.o
+obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o
 obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o
+obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o
+obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o
 
+aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
+md5-ppc-y := md5-asm.o md5-glue.o
 sha1-powerpc-y := sha1-powerpc-asm.o sha1.o
+sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o
+sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o
diff --git a/arch/powerpc/crypto/aes-spe-core.S b/arch/powerpc/crypto/aes-spe-core.S
new file mode 100644
index 000000000000..5dc6bce90a77
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-core.S
@@ -0,0 +1,351 @@
+/*
+ * Fast AES implementation for SPE instruction set (PPC)
+ *
+ * This code makes use of the SPE SIMD instruction set as defined in
+ * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
+ * Implementation is based on optimization guide notes from
+ * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <asm/ppc_asm.h>
+#include "aes-spe-regs.h"
+
+#define	EAD(in, bpos) \
+	rlwimi		rT0,in,28-((bpos+3)%4)*8,20,27;
+
+#define DAD(in, bpos) \
+	rlwimi		rT1,in,24-((bpos+3)%4)*8,24,31;
+
+#define LWH(out, off) \
+	evlwwsplat	out,off(rT0);	/* load word high		*/
+
+#define LWL(out, off) \
+	lwz		out,off(rT0);	/* load word low		*/
+
+#define LBZ(out, tab, off) \
+	lbz		out,off(tab);	/* load byte			*/
+
+#define LAH(out, in, bpos, off) \
+	EAD(in, bpos)			/* calc addr + load word high	*/ \
+	LWH(out, off)
+
+#define LAL(out, in, bpos, off) \
+	EAD(in, bpos)			/* calc addr + load word low	*/ \
+	LWL(out, off)
+
+#define LAE(out, in, bpos) \
+	EAD(in, bpos)			/* calc addr + load enc byte	*/ \
+	LBZ(out, rT0, 8)
+
+#define LBE(out) \
+	LBZ(out, rT0, 8)		/* load enc byte		*/
+
+#define LAD(out, in, bpos) \
+	DAD(in, bpos)			/* calc addr + load dec byte	*/ \
+	LBZ(out, rT1, 0)
+
+#define LBD(out) \
+	LBZ(out, rT1, 0)
+
+/*
+ * ppc_encrypt_block: The central encryption function for a single 16 bytes
+ * block. It does no stack handling or register saving to support fast calls
+ * via bl/blr. It expects that caller has pre-xored input data with first
+ * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
+ * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
+ * and rW0-rW3 and caller must execute a final xor on the ouput registers.
+ * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
+ *
+ */
+_GLOBAL(ppc_encrypt_block)
+	LAH(rW4, rD1, 2, 4)
+	LAH(rW6, rD0, 3, 0)
+	LAH(rW3, rD0, 1, 8)
+ppc_encrypt_block_loop:
+	LAH(rW0, rD3, 0, 12)
+	LAL(rW0, rD0, 0, 12)
+	LAH(rW1, rD1, 0, 12)
+	LAH(rW2, rD2, 1, 8)
+	LAL(rW2, rD3, 1, 8)
+	LAL(rW3, rD1, 1, 8)
+	LAL(rW4, rD2, 2, 4)
+	LAL(rW6, rD1, 3, 0)
+	LAH(rW5, rD3, 2, 4)
+	LAL(rW5, rD0, 2, 4)
+	LAH(rW7, rD2, 3, 0)
+	evldw		rD1,16(rKP)
+	EAD(rD3, 3)
+	evxor		rW2,rW2,rW4
+	LWL(rW7, 0)
+	evxor		rW2,rW2,rW6
+	EAD(rD2, 0)
+	evxor		rD1,rD1,rW2
+	LWL(rW1, 12)
+	evxor		rD1,rD1,rW0
+	evldw		rD3,24(rKP)
+	evmergehi	rD0,rD0,rD1
+	EAD(rD1, 2)
+	evxor		rW3,rW3,rW5
+	LWH(rW4, 4)
+	evxor		rW3,rW3,rW7
+	EAD(rD0, 3)
+	evxor		rD3,rD3,rW3
+	LWH(rW6, 0)
+	evxor		rD3,rD3,rW1
+	EAD(rD0, 1)
+	evmergehi	rD2,rD2,rD3
+	LWH(rW3, 8)
+	LAH(rW0, rD3, 0, 12)
+	LAL(rW0, rD0, 0, 12)
+	LAH(rW1, rD1, 0, 12)
+	LAH(rW2, rD2, 1, 8)
+	LAL(rW2, rD3, 1, 8)
+	LAL(rW3, rD1, 1, 8)
+	LAL(rW4, rD2, 2, 4)
+	LAL(rW6, rD1, 3, 0)
+	LAH(rW5, rD3, 2, 4)
+	LAL(rW5, rD0, 2, 4)
+	LAH(rW7, rD2, 3, 0)
+	evldw		rD1,32(rKP)
+	EAD(rD3, 3)
+	evxor		rW2,rW2,rW4
+	LWL(rW7, 0)
+	evxor		rW2,rW2,rW6
+	EAD(rD2, 0)
+	evxor		rD1,rD1,rW2
+	LWL(rW1, 12)
+	evxor		rD1,rD1,rW0
+	evldw		rD3,40(rKP)
+	evmergehi	rD0,rD0,rD1
+	EAD(rD1, 2)
+	evxor		rW3,rW3,rW5
+	LWH(rW4, 4)
+	evxor		rW3,rW3,rW7
+	EAD(rD0, 3)
+	evxor		rD3,rD3,rW3
+	LWH(rW6, 0)
+	evxor		rD3,rD3,rW1
+	EAD(rD0, 1)
+	evmergehi	rD2,rD2,rD3
+	LWH(rW3, 8)
+	addi		rKP,rKP,32
+	bdnz		ppc_encrypt_block_loop
+	LAH(rW0, rD3, 0, 12)
+	LAL(rW0, rD0, 0, 12)
+	LAH(rW1, rD1, 0, 12)
+	LAH(rW2, rD2, 1, 8)
+	LAL(rW2, rD3, 1, 8)
+	LAL(rW3, rD1, 1, 8)
+	LAL(rW4, rD2, 2, 4)
+	LAH(rW5, rD3, 2, 4)
+	LAL(rW6, rD1, 3, 0)
+	LAL(rW5, rD0, 2, 4)
+	LAH(rW7, rD2, 3, 0)
+	evldw		rD1,16(rKP)
+	EAD(rD3, 3)
+	evxor		rW2,rW2,rW4
+	LWL(rW7, 0)
+	evxor		rW2,rW2,rW6
+	EAD(rD2, 0)
+	evxor		rD1,rD1,rW2
+	LWL(rW1, 12)
+	evxor		rD1,rD1,rW0
+	evldw		rD3,24(rKP)
+	evmergehi	rD0,rD0,rD1
+	EAD(rD1, 0)
+	evxor		rW3,rW3,rW5
+	LBE(rW2)
+	evxor		rW3,rW3,rW7
+	EAD(rD0, 1)
+	evxor		rD3,rD3,rW3
+	LBE(rW6)
+	evxor		rD3,rD3,rW1
+	EAD(rD0, 0)
+	evmergehi	rD2,rD2,rD3
+	LBE(rW1)
+	LAE(rW0, rD3, 0)
+	LAE(rW1, rD0, 0)
+	LAE(rW4, rD2, 1)
+	LAE(rW5, rD3, 1)
+	LAE(rW3, rD2, 0)
+	LAE(rW7, rD1, 1)
+	rlwimi		rW0,rW4,8,16,23
+	rlwimi		rW1,rW5,8,16,23
+	LAE(rW4, rD1, 2)
+	LAE(rW5, rD2, 2)
+	rlwimi		rW2,rW6,8,16,23
+	rlwimi		rW3,rW7,8,16,23
+	LAE(rW6, rD3, 2)
+	LAE(rW7, rD0, 2)
+	rlwimi		rW0,rW4,16,8,15
+	rlwimi		rW1,rW5,16,8,15
+	LAE(rW4, rD0, 3)
+	LAE(rW5, rD1, 3)
+	rlwimi		rW2,rW6,16,8,15
+	lwz		rD0,32(rKP)
+	rlwimi		rW3,rW7,16,8,15
+	lwz		rD1,36(rKP)
+	LAE(rW6, rD2, 3)
+	LAE(rW7, rD3, 3)
+	rlwimi		rW0,rW4,24,0,7
+	lwz		rD2,40(rKP)
+	rlwimi		rW1,rW5,24,0,7
+	lwz		rD3,44(rKP)
+	rlwimi		rW2,rW6,24,0,7
+	rlwimi		rW3,rW7,24,0,7
+	blr
+
+/*
+ * ppc_decrypt_block: The central decryption function for a single 16 bytes
+ * block. It does no stack handling or register saving to support fast calls
+ * via bl/blr. It expects that caller has pre-xored input data with first
+ * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
+ * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
+ * and rW0-rW3 and caller must execute a final xor on the ouput registers.
+ * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
+ *
+ */
+_GLOBAL(ppc_decrypt_block)
+	LAH(rW0, rD1, 0, 12)
+	LAH(rW6, rD0, 3, 0)
+	LAH(rW3, rD0, 1, 8)
+ppc_decrypt_block_loop:
+	LAH(rW1, rD3, 0, 12)
+	LAL(rW0, rD2, 0, 12)
+	LAH(rW2, rD2, 1, 8)
+	LAL(rW2, rD3, 1, 8)
+	LAH(rW4, rD3, 2, 4)
+	LAL(rW4, rD0, 2, 4)
+	LAL(rW6, rD1, 3, 0)
+	LAH(rW5, rD1, 2, 4)
+	LAH(rW7, rD2, 3, 0)
+	LAL(rW7, rD3, 3, 0)
+	LAL(rW3, rD1, 1, 8)
+	evldw		rD1,16(rKP)
+	EAD(rD0, 0)
+	evxor		rW4,rW4,rW6
+	LWL(rW1, 12)
+	evxor		rW0,rW0,rW4
+	EAD(rD2, 2)
+	evxor		rW0,rW0,rW2
+	LWL(rW5, 4)
+	evxor		rD1,rD1,rW0
+	evldw		rD3,24(rKP)
+	evmergehi	rD0,rD0,rD1
+	EAD(rD1, 0)
+	evxor		rW3,rW3,rW7
+	LWH(rW0, 12)
+	evxor		rW3,rW3,rW1
+	EAD(rD0, 3)
+	evxor		rD3,rD3,rW3
+	LWH(rW6, 0)
+	evxor		rD3,rD3,rW5
+	EAD(rD0, 1)
+	evmergehi	rD2,rD2,rD3
+	LWH(rW3, 8)
+	LAH(rW1, rD3, 0, 12)
+	LAL(rW0, rD2, 0, 12)
+	LAH(rW2, rD2, 1, 8)
+	LAL(rW2, rD3, 1, 8)
+	LAH(rW4, rD3, 2, 4)
+	LAL(rW4, rD0, 2, 4)
+	LAL(rW6, rD1, 3, 0)
+	LAH(rW5, rD1, 2, 4)
+	LAH(rW7, rD2, 3, 0)
+	LAL(rW7, rD3, 3, 0)
+	LAL(rW3, rD1, 1, 8)
+	evldw		 rD1,32(rKP)
+	EAD(rD0, 0)
+	evxor		rW4,rW4,rW6
+	LWL(rW1, 12)
+	evxor		rW0,rW0,rW4
+	EAD(rD2, 2)
+	evxor		rW0,rW0,rW2
+	LWL(rW5, 4)
+	evxor		rD1,rD1,rW0
+	evldw		rD3,40(rKP)
+	evmergehi	rD0,rD0,rD1
+	EAD(rD1, 0)
+	evxor		rW3,rW3,rW7
+	LWH(rW0, 12)
+	evxor		rW3,rW3,rW1
+	EAD(rD0, 3)
+	evxor		rD3,rD3,rW3
+	LWH(rW6, 0)
+	evxor		rD3,rD3,rW5
+	EAD(rD0, 1)
+	evmergehi	rD2,rD2,rD3
+	LWH(rW3, 8)
+	addi		rKP,rKP,32
+	bdnz		ppc_decrypt_block_loop
+	LAH(rW1, rD3, 0, 12)
+	LAL(rW0, rD2, 0, 12)
+	LAH(rW2, rD2, 1, 8)
+	LAL(rW2, rD3, 1, 8)
+	LAH(rW4, rD3, 2, 4)
+	LAL(rW4, rD0, 2, 4)
+	LAL(rW6, rD1, 3, 0)
+	LAH(rW5, rD1, 2, 4)
+	LAH(rW7, rD2, 3, 0)
+	LAL(rW7, rD3, 3, 0)
+	LAL(rW3, rD1, 1, 8)
+	evldw		 rD1,16(rKP)
+	EAD(rD0, 0)
+	evxor		rW4,rW4,rW6
+	LWL(rW1, 12)
+	evxor		rW0,rW0,rW4
+	EAD(rD2, 2)
+	evxor		rW0,rW0,rW2
+	LWL(rW5, 4)
+	evxor		rD1,rD1,rW0
+	evldw		rD3,24(rKP)
+	evmergehi	rD0,rD0,rD1
+	DAD(rD1, 0)
+	evxor		rW3,rW3,rW7
+	LBD(rW0)
+	evxor		rW3,rW3,rW1
+	DAD(rD0, 1)
+	evxor		rD3,rD3,rW3
+	LBD(rW6)
+	evxor		rD3,rD3,rW5
+	DAD(rD0, 0)
+	evmergehi	rD2,rD2,rD3
+	LBD(rW3)
+	LAD(rW2, rD3, 0)
+	LAD(rW1, rD2, 0)
+	LAD(rW4, rD2, 1)
+	LAD(rW5, rD3, 1)
+	LAD(rW7, rD1, 1)
+	rlwimi		rW0,rW4,8,16,23
+	rlwimi		rW1,rW5,8,16,23
+	LAD(rW4, rD3, 2)
+	LAD(rW5, rD0, 2)
+	rlwimi		rW2,rW6,8,16,23
+	rlwimi		rW3,rW7,8,16,23
+	LAD(rW6, rD1, 2)
+	LAD(rW7, rD2, 2)
+	rlwimi		rW0,rW4,16,8,15
+	rlwimi		rW1,rW5,16,8,15
+	LAD(rW4, rD0, 3)
+	LAD(rW5, rD1, 3)
+	rlwimi		rW2,rW6,16,8,15
+	lwz		rD0,32(rKP)
+	rlwimi		rW3,rW7,16,8,15
+	lwz		rD1,36(rKP)
+	LAD(rW6, rD2, 3)
+	LAD(rW7, rD3, 3)
+	rlwimi		rW0,rW4,24,0,7
+	lwz		rD2,40(rKP)
+	rlwimi		rW1,rW5,24,0,7
+	lwz		rD3,44(rKP)
+	rlwimi		rW2,rW6,24,0,7
+	rlwimi		rW3,rW7,24,0,7
+	blr
diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c
new file mode 100644
index 000000000000..bd5e63f72ad4
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-glue.c
@@ -0,0 +1,512 @@
+/*
+ * Glue code for AES implementation for SPE instructions (PPC)
+ *
+ * Based on generic implementation. The assembler module takes care
+ * about the SPE registers so it can run from interrupt context.
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/aes.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/crypto.h>
+#include <asm/byteorder.h>
+#include <asm/switch_to.h>
+#include <crypto/algapi.h>
+
+/*
+ * MAX_BYTES defines the number of bytes that are allowed to be processed
+ * between preempt_disable() and preempt_enable(). e500 cores can issue two
+ * instructions per clock cycle using one 32/64 bit unit (SU1) and one 32
+ * bit unit (SU2). One of these can be a memory access that is executed via
+ * a single load and store unit (LSU). XTS-AES-256 takes ~780 operations per
+ * 16 byte block block or 25 cycles per byte. Thus 768 bytes of input data
+ * will need an estimated maximum of 20,000 cycles. Headroom for cache misses
+ * included. Even with the low end model clocked at 667 MHz this equals to a
+ * critical time window of less than 30us. The value has been choosen to
+ * process a 512 byte disk block in one or a large 1400 bytes IPsec network
+ * packet in two runs.
+ *
+ */
+#define MAX_BYTES 768
+
+struct ppc_aes_ctx {
+	u32 key_enc[AES_MAX_KEYLENGTH_U32];
+	u32 key_dec[AES_MAX_KEYLENGTH_U32];
+	u32 rounds;
+};
+
+struct ppc_xts_ctx {
+	u32 key_enc[AES_MAX_KEYLENGTH_U32];
+	u32 key_dec[AES_MAX_KEYLENGTH_U32];
+	u32 key_twk[AES_MAX_KEYLENGTH_U32];
+	u32 rounds;
+};
+
+extern void ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, u32 rounds);
+extern void ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, u32 rounds);
+extern void ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
+			    u32 bytes);
+extern void ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
+			    u32 bytes);
+extern void ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
+			    u32 bytes, u8 *iv);
+extern void ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
+			    u32 bytes, u8 *iv);
+extern void ppc_crypt_ctr  (u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
+			    u32 bytes, u8 *iv);
+extern void ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
+			    u32 bytes, u8 *iv, u32 *key_twk);
+extern void ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
+			    u32 bytes, u8 *iv, u32 *key_twk);
+
+extern void ppc_expand_key_128(u32 *key_enc, const u8 *key);
+extern void ppc_expand_key_192(u32 *key_enc, const u8 *key);
+extern void ppc_expand_key_256(u32 *key_enc, const u8 *key);
+
+extern void ppc_generate_decrypt_key(u32 *key_dec,u32 *key_enc,
+				     unsigned int key_len);
+
+static void spe_begin(void)
+{
+	/* disable preemption and save users SPE registers if required */
+	preempt_disable();
+	enable_kernel_spe();
+}
+
+static void spe_end(void)
+{
+	/* reenable preemption */
+	preempt_enable();
+}
+
+static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+		unsigned int key_len)
+{
+	struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if (key_len != AES_KEYSIZE_128 &&
+	    key_len != AES_KEYSIZE_192 &&
+	    key_len != AES_KEYSIZE_256) {
+		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+
+	switch (key_len) {
+	case AES_KEYSIZE_128:
+		ctx->rounds = 4;
+		ppc_expand_key_128(ctx->key_enc, in_key);
+		break;
+	case AES_KEYSIZE_192:
+		ctx->rounds = 5;
+		ppc_expand_key_192(ctx->key_enc, in_key);
+		break;
+	case AES_KEYSIZE_256:
+		ctx->rounds = 6;
+		ppc_expand_key_256(ctx->key_enc, in_key);
+		break;
+	}
+
+	ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
+
+	return 0;
+}
+
+static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+		   unsigned int key_len)
+{
+	struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	key_len >>= 1;
+
+	if (key_len != AES_KEYSIZE_128 &&
+	    key_len != AES_KEYSIZE_192 &&
+	    key_len != AES_KEYSIZE_256) {
+		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+
+	switch (key_len) {
+	case AES_KEYSIZE_128:
+		ctx->rounds = 4;
+		ppc_expand_key_128(ctx->key_enc, in_key);
+		ppc_expand_key_128(ctx->key_twk, in_key + AES_KEYSIZE_128);
+		break;
+	case AES_KEYSIZE_192:
+		ctx->rounds = 5;
+		ppc_expand_key_192(ctx->key_enc, in_key);
+		ppc_expand_key_192(ctx->key_twk, in_key + AES_KEYSIZE_192);
+		break;
+	case AES_KEYSIZE_256:
+		ctx->rounds = 6;
+		ppc_expand_key_256(ctx->key_enc, in_key);
+		ppc_expand_key_256(ctx->key_twk, in_key + AES_KEYSIZE_256);
+		break;
+	}
+
+	ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
+
+	return 0;
+}
+
+static void ppc_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	spe_begin();
+	ppc_encrypt_aes(out, in, ctx->key_enc, ctx->rounds);
+	spe_end();
+}
+
+static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	spe_begin();
+	ppc_decrypt_aes(out, in, ctx->key_dec, ctx->rounds);
+	spe_end();
+}
+
+static int ppc_ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+			   struct scatterlist *src, unsigned int nbytes)
+{
+	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	unsigned int ubytes;
+	int err;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		ubytes = nbytes > MAX_BYTES ?
+			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
+		nbytes -= ubytes;
+
+		spe_begin();
+		ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
+				ctx->key_enc, ctx->rounds, nbytes);
+		spe_end();
+
+		err = blkcipher_walk_done(desc, &walk, ubytes);
+	}
+
+	return err;
+}
+
+static int ppc_ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+			   struct scatterlist *src, unsigned int nbytes)
+{
+	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	unsigned int ubytes;
+	int err;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		ubytes = nbytes > MAX_BYTES ?
+			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
+		nbytes -= ubytes;
+
+		spe_begin();
+		ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
+				ctx->key_dec, ctx->rounds, nbytes);
+		spe_end();
+
+		err = blkcipher_walk_done(desc, &walk, ubytes);
+	}
+
+	return err;
+}
+
+static int ppc_cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+			   struct scatterlist *src, unsigned int nbytes)
+{
+	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	unsigned int ubytes;
+	int err;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		ubytes = nbytes > MAX_BYTES ?
+			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
+		nbytes -= ubytes;
+
+		spe_begin();
+		ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
+				ctx->key_enc, ctx->rounds, nbytes, walk.iv);
+		spe_end();
+
+		err = blkcipher_walk_done(desc, &walk, ubytes);
+	}
+
+	return err;
+}
+
+static int ppc_cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+			   struct scatterlist *src, unsigned int nbytes)
+{
+	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	unsigned int ubytes;
+	int err;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		ubytes = nbytes > MAX_BYTES ?
+			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
+		nbytes -= ubytes;
+
+		spe_begin();
+		ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
+				ctx->key_dec, ctx->rounds, nbytes, walk.iv);
+		spe_end();
+
+		err = blkcipher_walk_done(desc, &walk, ubytes);
+	}
+
+	return err;
+}
+
+static int ppc_ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+			 struct scatterlist *src, unsigned int nbytes)
+{
+	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	unsigned int pbytes, ubytes;
+	int err;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+
+	while ((pbytes = walk.nbytes)) {
+		pbytes = pbytes > MAX_BYTES ? MAX_BYTES : pbytes;
+		pbytes = pbytes == nbytes ?
+			 nbytes : pbytes & ~(AES_BLOCK_SIZE - 1);
+		ubytes = walk.nbytes - pbytes;
+
+		spe_begin();
+		ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr,
+			      ctx->key_enc, ctx->rounds, pbytes , walk.iv);
+		spe_end();
+
+		nbytes -= pbytes;
+		err = blkcipher_walk_done(desc, &walk, ubytes);
+	}
+
+	return err;
+}
+
+static int ppc_xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+			   struct scatterlist *src, unsigned int nbytes)
+{
+	struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	unsigned int ubytes;
+	int err;
+	u32 *twk;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+	twk = ctx->key_twk;
+
+	while ((nbytes = walk.nbytes)) {
+		ubytes = nbytes > MAX_BYTES ?
+			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
+		nbytes -= ubytes;
+
+		spe_begin();
+		ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
+				ctx->key_enc, ctx->rounds, nbytes, walk.iv, twk);
+		spe_end();
+
+		twk = NULL;
+		err = blkcipher_walk_done(desc, &walk, ubytes);
+	}
+
+	return err;
+}
+
+static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+			   struct scatterlist *src, unsigned int nbytes)
+{
+	struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	unsigned int ubytes;
+	int err;
+	u32 *twk;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+	twk = ctx->key_twk;
+
+	while ((nbytes = walk.nbytes)) {
+		ubytes = nbytes > MAX_BYTES ?
+			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
+		nbytes -= ubytes;
+
+		spe_begin();
+		ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
+				ctx->key_dec, ctx->rounds, nbytes, walk.iv, twk);
+		spe_end();
+
+		twk = NULL;
+		err = blkcipher_walk_done(desc, &walk, ubytes);
+	}
+
+	return err;
+}
+
+/*
+ * Algorithm definitions. Disabling alignment (cra_alignmask=0) was chosen
+ * because the e500 platform can handle unaligned reads/writes very efficently.
+ * This improves IPsec thoughput by another few percent. Additionally we assume
+ * that AES context is always aligned to at least 8 bytes because it is created
+ * with kmalloc() in the crypto infrastructure
+ *
+ */
+static struct crypto_alg aes_algs[] = { {
+	.cra_name		=	"aes",
+	.cra_driver_name	=	"aes-ppc-spe",
+	.cra_priority		=	300,
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		=	AES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
+	.cra_alignmask		=	0,
+	.cra_module		=	THIS_MODULE,
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	AES_MIN_KEY_SIZE,
+			.cia_max_keysize	=	AES_MAX_KEY_SIZE,
+			.cia_setkey		=	ppc_aes_setkey,
+			.cia_encrypt		=	ppc_aes_encrypt,
+			.cia_decrypt		=	ppc_aes_decrypt
+		}
+	}
+}, {
+	.cra_name		=	"ecb(aes)",
+	.cra_driver_name	=	"ecb-ppc-spe",
+	.cra_priority		=	300,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	AES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
+	.cra_alignmask		=	0,
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize		=	AES_MIN_KEY_SIZE,
+			.max_keysize		=	AES_MAX_KEY_SIZE,
+			.ivsize			=	AES_BLOCK_SIZE,
+			.setkey			=	ppc_aes_setkey,
+			.encrypt		=	ppc_ecb_encrypt,
+			.decrypt		=	ppc_ecb_decrypt,
+		}
+	}
+}, {
+	.cra_name		=	"cbc(aes)",
+	.cra_driver_name	=	"cbc-ppc-spe",
+	.cra_priority		=	300,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	AES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
+	.cra_alignmask		=	0,
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize		=	AES_MIN_KEY_SIZE,
+			.max_keysize		=	AES_MAX_KEY_SIZE,
+			.ivsize			=	AES_BLOCK_SIZE,
+			.setkey			=	ppc_aes_setkey,
+			.encrypt		=	ppc_cbc_encrypt,
+			.decrypt		=	ppc_cbc_decrypt,
+		}
+	}
+}, {
+	.cra_name		=	"ctr(aes)",
+	.cra_driver_name	=	"ctr-ppc-spe",
+	.cra_priority		=	300,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	1,
+	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
+	.cra_alignmask		=	0,
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize		=	AES_MIN_KEY_SIZE,
+			.max_keysize		=	AES_MAX_KEY_SIZE,
+			.ivsize			=	AES_BLOCK_SIZE,
+			.setkey			=	ppc_aes_setkey,
+			.encrypt		=	ppc_ctr_crypt,
+			.decrypt		=	ppc_ctr_crypt,
+		}
+	}
+}, {
+	.cra_name		=	"xts(aes)",
+	.cra_driver_name	=	"xts-ppc-spe",
+	.cra_priority		=	300,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	AES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct ppc_xts_ctx),
+	.cra_alignmask		=	0,
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize		=	AES_MIN_KEY_SIZE * 2,
+			.max_keysize		=	AES_MAX_KEY_SIZE * 2,
+			.ivsize			=	AES_BLOCK_SIZE,
+			.setkey			=	ppc_xts_setkey,
+			.encrypt		=	ppc_xts_encrypt,
+			.decrypt		=	ppc_xts_decrypt,
+		}
+	}
+} };
+
+static int __init ppc_aes_mod_init(void)
+{
+	return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+static void __exit ppc_aes_mod_fini(void)
+{
+	crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+module_init(ppc_aes_mod_init);
+module_exit(ppc_aes_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS, SPE optimized");
+
+MODULE_ALIAS_CRYPTO("aes");
+MODULE_ALIAS_CRYPTO("ecb(aes)");
+MODULE_ALIAS_CRYPTO("cbc(aes)");
+MODULE_ALIAS_CRYPTO("ctr(aes)");
+MODULE_ALIAS_CRYPTO("xts(aes)");
+MODULE_ALIAS_CRYPTO("aes-ppc-spe");
diff --git a/arch/powerpc/crypto/aes-spe-keys.S b/arch/powerpc/crypto/aes-spe-keys.S
new file mode 100644
index 000000000000..be8090f3d700
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-keys.S
@@ -0,0 +1,283 @@
+/*
+ * Key handling functions for PPC AES implementation
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <asm/ppc_asm.h>
+
+#ifdef __BIG_ENDIAN__
+#define LOAD_KEY(d, s, off) \
+	lwz		d,off(s);
+#else
+#define LOAD_KEY(d, s, off) \
+	li		r0,off; \
+	lwbrx		d,s,r0;
+#endif
+
+#define INITIALIZE_KEY \
+	stwu		r1,-32(r1);	/* create stack frame		*/ \
+	stw		r14,8(r1);	/* save registers		*/ \
+	stw		r15,12(r1);					   \
+	stw		r16,16(r1);
+
+#define FINALIZE_KEY \
+	lwz		r14,8(r1);	/* restore registers		*/ \
+	lwz		r15,12(r1);					   \
+	lwz		r16,16(r1);					   \
+	xor		r5,r5,r5;	/* clear sensitive data		*/ \
+	xor		r6,r6,r6;					   \
+	xor		r7,r7,r7;					   \
+	xor		r8,r8,r8;					   \
+	xor		r9,r9,r9;					   \
+	xor		r10,r10,r10;					   \
+	xor		r11,r11,r11;					   \
+	xor		r12,r12,r12;					   \
+	addi		r1,r1,32;	/* cleanup stack		*/
+
+#define LS_BOX(r, t1, t2) \
+	lis		t2,PPC_AES_4K_ENCTAB@h;				   \
+	ori		t2,t2,PPC_AES_4K_ENCTAB@l;			   \
+	rlwimi		t2,r,4,20,27;					   \
+	lbz		t1,8(t2);					   \
+	rlwimi		r,t1,0,24,31;					   \
+	rlwimi		t2,r,28,20,27;					   \
+	lbz		t1,8(t2);					   \
+	rlwimi		r,t1,8,16,23;					   \
+	rlwimi		t2,r,20,20,27;					   \
+	lbz		t1,8(t2);					   \
+	rlwimi		r,t1,16,8,15;					   \
+	rlwimi		t2,r,12,20,27;					   \
+	lbz		t1,8(t2);					   \
+	rlwimi		r,t1,24,0,7;
+
+#define GF8_MUL(out, in, t1, t2) \
+	lis t1,0x8080;			/* multiplication in GF8	*/ \
+	ori t1,t1,0x8080; 						   \
+	and t1,t1,in; 							   \
+	srwi t1,t1,7; 							   \
+	mulli t1,t1,0x1b; 						   \
+	lis t2,0x7f7f; 							   \
+	ori t2,t2,0x7f7f; 						   \
+	and t2,t2,in; 							   \
+	slwi t2,t2,1; 							   \
+	xor out,t1,t2;
+
+/*
+ * ppc_expand_key_128(u32 *key_enc, const u8 *key)
+ *
+ * Expand 128 bit key into 176 bytes encryption key. It consists of
+ * key itself plus 10 rounds with 16 bytes each
+ *
+ */
+_GLOBAL(ppc_expand_key_128)
+	INITIALIZE_KEY
+	LOAD_KEY(r5,r4,0)
+	LOAD_KEY(r6,r4,4)
+	LOAD_KEY(r7,r4,8)
+	LOAD_KEY(r8,r4,12)
+	stw		r5,0(r3)	/* key[0..3] = input data	*/
+	stw		r6,4(r3)
+	stw		r7,8(r3)
+	stw		r8,12(r3)
+	li		r16,10		/* 10 expansion rounds		*/
+	lis		r0,0x0100	/* RCO(1)			*/
+ppc_expand_128_loop:
+	addi		r3,r3,16
+	mr		r14,r8		/* apply LS_BOX to 4th temp	*/
+	rotlwi		r14,r14,8
+	LS_BOX(r14, r15, r4)
+	xor		r14,r14,r0
+	xor		r5,r5,r14	/* xor next 4 keys		*/
+	xor		r6,r6,r5
+	xor		r7,r7,r6
+	xor		r8,r8,r7
+	stw		r5,0(r3)	/* store next 4 keys		*/
+	stw		r6,4(r3)
+	stw		r7,8(r3)
+	stw		r8,12(r3)
+	GF8_MUL(r0, r0, r4, r14)	/* multiply RCO by 2 in GF	*/
+	subi		r16,r16,1
+	cmpwi		r16,0
+	bt		eq,ppc_expand_128_end
+	b		ppc_expand_128_loop
+ppc_expand_128_end:
+	FINALIZE_KEY
+	blr
+
+/*
+ * ppc_expand_key_192(u32 *key_enc, const u8 *key)
+ *
+ * Expand 192 bit key into 208 bytes encryption key. It consists of key
+ * itself plus 12 rounds with 16 bytes each
+ *
+ */
+_GLOBAL(ppc_expand_key_192)
+	INITIALIZE_KEY
+	LOAD_KEY(r5,r4,0)
+	LOAD_KEY(r6,r4,4)
+	LOAD_KEY(r7,r4,8)
+	LOAD_KEY(r8,r4,12)
+	LOAD_KEY(r9,r4,16)
+	LOAD_KEY(r10,r4,20)
+	stw		r5,0(r3)
+	stw		r6,4(r3)
+	stw		r7,8(r3)
+	stw		r8,12(r3)
+	stw		r9,16(r3)
+	stw		r10,20(r3)
+	li		r16,8		/* 8 expansion rounds		*/
+	lis		r0,0x0100	/* RCO(1)			*/
+ppc_expand_192_loop:
+	addi		r3,r3,24
+	mr		r14,r10		/* apply LS_BOX to 6th temp	*/
+	rotlwi		r14,r14,8
+	LS_BOX(r14, r15, r4)
+	xor		r14,r14,r0
+	xor		r5,r5,r14	/* xor next 6 keys		*/
+	xor		r6,r6,r5
+	xor		r7,r7,r6
+	xor		r8,r8,r7
+	xor		r9,r9,r8
+	xor		r10,r10,r9
+	stw		r5,0(r3)
+	stw		r6,4(r3)
+	stw		r7,8(r3)
+	stw		r8,12(r3)
+	subi		r16,r16,1
+	cmpwi		r16,0		/* last round early kick out	*/
+	bt		eq,ppc_expand_192_end
+	stw		r9,16(r3)
+	stw		r10,20(r3)
+	GF8_MUL(r0, r0, r4, r14)	/* multiply RCO GF8		*/
+	b		ppc_expand_192_loop
+ppc_expand_192_end:
+	FINALIZE_KEY
+	blr
+
+/*
+ * ppc_expand_key_256(u32 *key_enc, const u8 *key)
+ *
+ * Expand 256 bit key into 240 bytes encryption key. It consists of key
+ * itself plus 14 rounds with 16 bytes each
+ *
+ */
+_GLOBAL(ppc_expand_key_256)
+	INITIALIZE_KEY
+	LOAD_KEY(r5,r4,0)
+	LOAD_KEY(r6,r4,4)
+	LOAD_KEY(r7,r4,8)
+	LOAD_KEY(r8,r4,12)
+	LOAD_KEY(r9,r4,16)
+	LOAD_KEY(r10,r4,20)
+	LOAD_KEY(r11,r4,24)
+	LOAD_KEY(r12,r4,28)
+	stw		r5,0(r3)
+	stw		r6,4(r3)
+	stw		r7,8(r3)
+	stw		r8,12(r3)
+	stw		r9,16(r3)
+	stw		r10,20(r3)
+	stw		r11,24(r3)
+	stw		r12,28(r3)
+	li		r16,7		/* 7 expansion rounds		*/
+	lis		r0,0x0100	/* RCO(1)			*/
+ppc_expand_256_loop:
+	addi		r3,r3,32
+	mr		r14,r12		/* apply LS_BOX to 8th temp	*/
+	rotlwi		r14,r14,8
+	LS_BOX(r14, r15, r4)
+	xor		r14,r14,r0
+	xor		r5,r5,r14	/* xor 4 keys			*/
+	xor		r6,r6,r5
+	xor		r7,r7,r6
+	xor		r8,r8,r7
+	mr		r14,r8
+	LS_BOX(r14, r15, r4)		/* apply LS_BOX to 4th temp	*/
+	xor		r9,r9,r14	/* xor 4 keys			*/
+	xor		r10,r10,r9
+	xor		r11,r11,r10
+	xor		r12,r12,r11
+	stw		r5,0(r3)
+	stw		r6,4(r3)
+	stw		r7,8(r3)
+	stw		r8,12(r3)
+	subi		r16,r16,1
+	cmpwi		r16,0		/* last round early kick out	*/
+	bt		eq,ppc_expand_256_end
+	stw		r9,16(r3)
+	stw		r10,20(r3)
+	stw		r11,24(r3)
+	stw		r12,28(r3)
+	GF8_MUL(r0, r0, r4, r14)
+	b		ppc_expand_256_loop
+ppc_expand_256_end:
+	FINALIZE_KEY
+	blr
+
+/*
+ * ppc_generate_decrypt_key: derive decryption key from encryption key
+ * number of bytes to handle are calculated from length of key (16/24/32)
+ *
+ */
+_GLOBAL(ppc_generate_decrypt_key)
+	addi		r6,r5,24
+	slwi		r6,r6,2
+	lwzx		r7,r4,r6	/* first/last 4 words are same	*/
+	stw		r7,0(r3)
+	lwz		r7,0(r4)
+	stwx		r7,r3,r6
+	addi		r6,r6,4
+	lwzx		r7,r4,r6
+	stw		r7,4(r3)
+	lwz		r7,4(r4)
+	stwx		r7,r3,r6
+	addi		r6,r6,4
+	lwzx		r7,r4,r6
+	stw		r7,8(r3)
+	lwz		r7,8(r4)
+	stwx		r7,r3,r6
+	addi		r6,r6,4
+	lwzx		r7,r4,r6
+	stw		r7,12(r3)
+	lwz		r7,12(r4)
+	stwx		r7,r3,r6
+	addi		r3,r3,16
+	add		r4,r4,r6
+	subi		r4,r4,28
+	addi		r5,r5,20
+	srwi		r5,r5,2
+ppc_generate_decrypt_block:
+	li	r6,4
+	mtctr	r6
+ppc_generate_decrypt_word:
+	lwz		r6,0(r4)
+	GF8_MUL(r7, r6, r0, r7)
+	GF8_MUL(r8, r7, r0, r8)
+	GF8_MUL(r9, r8, r0, r9)
+	xor		r10,r9,r6
+	xor		r11,r7,r8
+	xor		r11,r11,r9
+	xor		r12,r7,r10
+	rotrwi		r12,r12,24
+	xor		r11,r11,r12
+	xor		r12,r8,r10
+	rotrwi		r12,r12,16
+	xor		r11,r11,r12
+	rotrwi		r12,r10,8
+	xor		r11,r11,r12
+	stw		r11,0(r3)
+	addi		r3,r3,4
+	addi		r4,r4,4
+	bdnz		ppc_generate_decrypt_word
+	subi		r4,r4,32
+	subi		r5,r5,1
+	cmpwi		r5,0
+	bt		gt,ppc_generate_decrypt_block
+	blr
diff --git a/arch/powerpc/crypto/aes-spe-modes.S b/arch/powerpc/crypto/aes-spe-modes.S
new file mode 100644
index 000000000000..ad48032ca8e0
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-modes.S
@@ -0,0 +1,630 @@
+/*
+ * AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <asm/ppc_asm.h>
+#include "aes-spe-regs.h"
+
+#ifdef __BIG_ENDIAN__			/* Macros for big endian builds	*/
+
+#define LOAD_DATA(reg, off) \
+	lwz		reg,off(rSP);	/* load with offset		*/
+#define SAVE_DATA(reg, off) \
+	stw		reg,off(rDP);	/* save with offset		*/
+#define NEXT_BLOCK \
+	addi		rSP,rSP,16;	/* increment pointers per bloc	*/ \
+	addi		rDP,rDP,16;
+#define LOAD_IV(reg, off) \
+	lwz		reg,off(rIP);	/* IV loading with offset	*/
+#define SAVE_IV(reg, off) \
+	stw		reg,off(rIP);	/* IV saving with offset	*/
+#define START_IV			/* nothing to reset		*/
+#define CBC_DEC 16			/* CBC decrement per block	*/
+#define CTR_DEC 1			/* CTR decrement one byte	*/
+
+#else					/* Macros for little endian	*/
+
+#define LOAD_DATA(reg, off) \
+	lwbrx		reg,0,rSP;	/* load reversed		*/ \
+	addi		rSP,rSP,4;	/* and increment pointer	*/
+#define SAVE_DATA(reg, off) \
+	stwbrx		reg,0,rDP;	/* save reversed		*/ \
+	addi		rDP,rDP,4;	/* and increment pointer	*/
+#define NEXT_BLOCK			/* nothing todo			*/
+#define LOAD_IV(reg, off) \
+	lwbrx		reg,0,rIP;	/* load reversed		*/ \
+	addi		rIP,rIP,4;	/* and increment pointer	*/
+#define SAVE_IV(reg, off) \
+	stwbrx		reg,0,rIP;	/* load reversed		*/ \
+	addi		rIP,rIP,4;	/* and increment pointer	*/
+#define START_IV \
+	subi		rIP,rIP,16;	/* must reset pointer		*/
+#define CBC_DEC 32			/* 2 blocks because of incs	*/
+#define CTR_DEC 17			/* 1 block because of incs	*/
+
+#endif
+
+#define SAVE_0_REGS
+#define LOAD_0_REGS
+
+#define SAVE_4_REGS \
+	stw		rI0,96(r1);	/* save 32 bit registers	*/ \
+	stw		rI1,100(r1);					   \
+	stw		rI2,104(r1);					   \
+	stw		rI3,108(r1);
+
+#define LOAD_4_REGS \
+	lwz		rI0,96(r1);	/* restore 32 bit registers	*/ \
+	lwz		rI1,100(r1);					   \
+	lwz		rI2,104(r1);					   \
+	lwz		rI3,108(r1);
+
+#define SAVE_8_REGS \
+	SAVE_4_REGS							   \
+	stw		rG0,112(r1);	/* save 32 bit registers	*/ \
+	stw		rG1,116(r1);					   \
+	stw		rG2,120(r1);					   \
+	stw		rG3,124(r1);
+
+#define LOAD_8_REGS \
+	LOAD_4_REGS							   \
+	lwz		rG0,112(r1);	/* restore 32 bit registers	*/ \
+	lwz		rG1,116(r1);					   \
+	lwz		rG2,120(r1);					   \
+	lwz		rG3,124(r1);
+
+#define INITIALIZE_CRYPT(tab,nr32bitregs) \
+	mflr		r0;						   \
+	stwu		r1,-160(r1);	/* create stack frame		*/ \
+	lis		rT0,tab@h;	/* en-/decryption table pointer	*/ \
+	stw		r0,8(r1);	/* save link register		*/ \
+	ori		rT0,rT0,tab@l;					   \
+	evstdw		r14,16(r1);					   \
+	mr		rKS,rKP;					   \
+	evstdw		r15,24(r1);	/* We must save non volatile	*/ \
+	evstdw		r16,32(r1);	/* registers. Take the chance	*/ \
+	evstdw		r17,40(r1);	/* and save the SPE part too	*/ \
+	evstdw		r18,48(r1);					   \
+	evstdw		r19,56(r1);					   \
+	evstdw		r20,64(r1);					   \
+	evstdw		r21,72(r1);					   \
+	evstdw		r22,80(r1);					   \
+	evstdw		r23,88(r1);					   \
+	SAVE_##nr32bitregs##_REGS
+
+#define FINALIZE_CRYPT(nr32bitregs) \
+	lwz		r0,8(r1);					   \
+	evldw		r14,16(r1);	/* restore SPE registers	*/ \
+	evldw		r15,24(r1);					   \
+	evldw		r16,32(r1);					   \
+	evldw		r17,40(r1);					   \
+	evldw		r18,48(r1);					   \
+	evldw		r19,56(r1);					   \
+	evldw		r20,64(r1);					   \
+	evldw		r21,72(r1);					   \
+	evldw		r22,80(r1);					   \
+	evldw		r23,88(r1);					   \
+	LOAD_##nr32bitregs##_REGS					   \
+	mtlr		r0;		/* restore link register	*/ \
+	xor		r0,r0,r0;					   \
+	stw		r0,16(r1);	/* delete sensitive data	*/ \
+	stw		r0,24(r1);	/* that we might have pushed	*/ \
+	stw		r0,32(r1);	/* from other context that runs	*/ \
+	stw		r0,40(r1);	/* the same code		*/ \
+	stw		r0,48(r1);					   \
+	stw		r0,56(r1);					   \
+	stw		r0,64(r1);					   \
+	stw		r0,72(r1);					   \
+	stw		r0,80(r1);					   \
+	stw		r0,88(r1);					   \
+	addi		r1,r1,160;	/* cleanup stack frame		*/
+
+#define ENDIAN_SWAP(t0, t1, s0, s1) \
+	rotrwi		t0,s0,8;	/* swap endianness for 2 GPRs	*/ \
+	rotrwi		t1,s1,8;					   \
+	rlwimi		t0,s0,8,8,15;					   \
+	rlwimi		t1,s1,8,8,15;					   \
+	rlwimi		t0,s0,8,24,31;					   \
+	rlwimi		t1,s1,8,24,31;
+
+#define GF128_MUL(d0, d1, d2, d3, t0) \
+	li		t0,0x87;	/* multiplication in GF128	*/ \
+	cmpwi		d3,-1;						   \
+	iselgt		t0,0,t0;					   \
+	rlwimi		d3,d2,0,0,0;	/* propagate "carry" bits	*/ \
+	rotlwi		d3,d3,1;					   \
+	rlwimi		d2,d1,0,0,0;					   \
+	rotlwi		d2,d2,1;					   \
+	rlwimi		d1,d0,0,0,0;					   \
+	slwi		d0,d0,1;	/* shift left 128 bit		*/ \
+	rotlwi		d1,d1,1;					   \
+	xor		d0,d0,t0;
+
+#define START_KEY(d0, d1, d2, d3) \
+	lwz		rW0,0(rKP);					   \
+	mtctr		rRR;						   \
+	lwz		rW1,4(rKP);					   \
+	lwz		rW2,8(rKP);					   \
+	lwz		rW3,12(rKP);					   \
+	xor		rD0,d0,rW0;					   \
+	xor		rD1,d1,rW1;					   \
+	xor		rD2,d2,rW2;					   \
+	xor		rD3,d3,rW3;
+
+/*
+ * ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc,
+ *		   u32 rounds)
+ *
+ * called from glue layer to encrypt a single 16 byte block
+ * round values are AES128 = 4, AES192 = 5, AES256 = 6
+ *
+ */
+_GLOBAL(ppc_encrypt_aes)
+	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
+	LOAD_DATA(rD0, 0)
+	LOAD_DATA(rD1, 4)
+	LOAD_DATA(rD2, 8)
+	LOAD_DATA(rD3, 12)
+	START_KEY(rD0, rD1, rD2, rD3)
+	bl		ppc_encrypt_block
+	xor		rD0,rD0,rW0
+	SAVE_DATA(rD0, 0)
+	xor		rD1,rD1,rW1
+	SAVE_DATA(rD1, 4)
+	xor		rD2,rD2,rW2
+	SAVE_DATA(rD2, 8)
+	xor		rD3,rD3,rW3
+	SAVE_DATA(rD3, 12)
+	FINALIZE_CRYPT(0)
+	blr
+
+/*
+ * ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec,
+ *		   u32 rounds)
+ *
+ * called from glue layer to decrypt a single 16 byte block
+ * round values are AES128 = 4, AES192 = 5, AES256 = 6
+ *
+ */
+_GLOBAL(ppc_decrypt_aes)
+	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0)
+	LOAD_DATA(rD0, 0)
+	addi		rT1,rT0,4096
+	LOAD_DATA(rD1, 4)
+	LOAD_DATA(rD2, 8)
+	LOAD_DATA(rD3, 12)
+	START_KEY(rD0, rD1, rD2, rD3)
+	bl		ppc_decrypt_block
+	xor		rD0,rD0,rW0
+	SAVE_DATA(rD0, 0)
+	xor		rD1,rD1,rW1
+	SAVE_DATA(rD1, 4)
+	xor		rD2,rD2,rW2
+	SAVE_DATA(rD2, 8)
+	xor		rD3,rD3,rW3
+	SAVE_DATA(rD3, 12)
+	FINALIZE_CRYPT(0)
+	blr
+
+/*
+ * ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc,
+ *		   u32 rounds, u32 bytes);
+ *
+ * called from glue layer to encrypt multiple blocks via ECB
+ * Bytes must be larger or equal 16 and only whole blocks are
+ * processed. round values are AES128 = 4, AES192 = 5 and
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_encrypt_ecb)
+	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
+ppc_encrypt_ecb_loop:
+	LOAD_DATA(rD0, 0)
+	mr		rKP,rKS
+	LOAD_DATA(rD1, 4)
+	subi		rLN,rLN,16
+	LOAD_DATA(rD2, 8)
+	cmpwi		rLN,15
+	LOAD_DATA(rD3, 12)
+	START_KEY(rD0, rD1, rD2, rD3)
+	bl		ppc_encrypt_block
+	xor		rD0,rD0,rW0
+	SAVE_DATA(rD0, 0)
+	xor		rD1,rD1,rW1
+	SAVE_DATA(rD1, 4)
+	xor		rD2,rD2,rW2
+	SAVE_DATA(rD2, 8)
+	xor		rD3,rD3,rW3
+	SAVE_DATA(rD3, 12)
+	NEXT_BLOCK
+	bt		gt,ppc_encrypt_ecb_loop
+	FINALIZE_CRYPT(0)
+	blr
+
+/*
+ * ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec,
+ *		   u32 rounds, u32 bytes);
+ *
+ * called from glue layer to decrypt multiple blocks via ECB
+ * Bytes must be larger or equal 16 and only whole blocks are
+ * processed. round values are AES128 = 4, AES192 = 5 and
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_decrypt_ecb)
+	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0)
+	addi		rT1,rT0,4096
+ppc_decrypt_ecb_loop:
+	LOAD_DATA(rD0, 0)
+	mr		rKP,rKS
+	LOAD_DATA(rD1, 4)
+	subi		rLN,rLN,16
+	LOAD_DATA(rD2, 8)
+	cmpwi		rLN,15
+	LOAD_DATA(rD3, 12)
+	START_KEY(rD0, rD1, rD2, rD3)
+	bl		ppc_decrypt_block
+	xor		rD0,rD0,rW0
+	SAVE_DATA(rD0, 0)
+	xor		rD1,rD1,rW1
+	SAVE_DATA(rD1, 4)
+	xor		rD2,rD2,rW2
+	SAVE_DATA(rD2, 8)
+	xor		rD3,rD3,rW3
+	SAVE_DATA(rD3, 12)
+	NEXT_BLOCK
+	bt		gt,ppc_decrypt_ecb_loop
+	FINALIZE_CRYPT(0)
+	blr
+
+/*
+ * ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc,
+ *		   32 rounds, u32 bytes, u8 *iv);
+ *
+ * called from glue layer to encrypt multiple blocks via CBC
+ * Bytes must be larger or equal 16 and only whole blocks are
+ * processed. round values are AES128 = 4, AES192 = 5 and
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_encrypt_cbc)
+	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
+	LOAD_IV(rI0, 0)
+	LOAD_IV(rI1, 4)
+	LOAD_IV(rI2, 8)
+	LOAD_IV(rI3, 12)
+ppc_encrypt_cbc_loop:
+	LOAD_DATA(rD0, 0)
+	mr		rKP,rKS
+	LOAD_DATA(rD1, 4)
+	subi		rLN,rLN,16
+	LOAD_DATA(rD2, 8)
+	cmpwi		rLN,15
+	LOAD_DATA(rD3, 12)
+	xor		rD0,rD0,rI0
+	xor		rD1,rD1,rI1
+	xor		rD2,rD2,rI2
+	xor		rD3,rD3,rI3
+	START_KEY(rD0, rD1, rD2, rD3)
+	bl		ppc_encrypt_block
+	xor		rI0,rD0,rW0
+	SAVE_DATA(rI0, 0)
+	xor		rI1,rD1,rW1
+	SAVE_DATA(rI1, 4)
+	xor		rI2,rD2,rW2
+	SAVE_DATA(rI2, 8)
+	xor		rI3,rD3,rW3
+	SAVE_DATA(rI3, 12)
+	NEXT_BLOCK
+	bt		gt,ppc_encrypt_cbc_loop
+	START_IV
+	SAVE_IV(rI0, 0)
+	SAVE_IV(rI1, 4)
+	SAVE_IV(rI2, 8)
+	SAVE_IV(rI3, 12)
+	FINALIZE_CRYPT(4)
+	blr
+
+/*
+ * ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec,
+ *		   u32 rounds, u32 bytes, u8 *iv);
+ *
+ * called from glue layer to decrypt multiple blocks via CBC
+ * round values are AES128 = 4, AES192 = 5, AES256 = 6
+ *
+ */
+_GLOBAL(ppc_decrypt_cbc)
+	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4)
+	li		rT1,15
+	LOAD_IV(rI0, 0)
+	andc		rLN,rLN,rT1
+	LOAD_IV(rI1, 4)
+	subi		rLN,rLN,16
+	LOAD_IV(rI2, 8)
+	add		rSP,rSP,rLN	/* reverse processing		*/
+	LOAD_IV(rI3, 12)
+	add		rDP,rDP,rLN
+	LOAD_DATA(rD0, 0)
+	addi		rT1,rT0,4096
+	LOAD_DATA(rD1, 4)
+	LOAD_DATA(rD2, 8)
+	LOAD_DATA(rD3, 12)
+	START_IV
+	SAVE_IV(rD0, 0)
+	SAVE_IV(rD1, 4)
+	SAVE_IV(rD2, 8)
+	cmpwi		rLN,16
+	SAVE_IV(rD3, 12)
+	bt		lt,ppc_decrypt_cbc_end
+ppc_decrypt_cbc_loop:
+	mr		rKP,rKS
+	START_KEY(rD0, rD1, rD2, rD3)
+	bl		ppc_decrypt_block
+	subi		rLN,rLN,16
+	subi		rSP,rSP,CBC_DEC
+	xor		rW0,rD0,rW0
+	LOAD_DATA(rD0, 0)
+	xor		rW1,rD1,rW1
+	LOAD_DATA(rD1, 4)
+	xor		rW2,rD2,rW2
+	LOAD_DATA(rD2, 8)
+	xor		rW3,rD3,rW3
+	LOAD_DATA(rD3, 12)
+	xor		rW0,rW0,rD0
+	SAVE_DATA(rW0, 0)
+	xor		rW1,rW1,rD1
+	SAVE_DATA(rW1, 4)
+	xor		rW2,rW2,rD2
+	SAVE_DATA(rW2, 8)
+	xor		rW3,rW3,rD3
+	SAVE_DATA(rW3, 12)
+	cmpwi		rLN,15
+	subi		rDP,rDP,CBC_DEC
+	bt		gt,ppc_decrypt_cbc_loop
+ppc_decrypt_cbc_end:
+	mr		rKP,rKS
+	START_KEY(rD0, rD1, rD2, rD3)
+	bl		ppc_decrypt_block
+	xor		rW0,rW0,rD0
+	xor		rW1,rW1,rD1
+	xor		rW2,rW2,rD2
+	xor		rW3,rW3,rD3
+	xor		rW0,rW0,rI0	/* decrypt with initial IV	*/
+	SAVE_DATA(rW0, 0)
+	xor		rW1,rW1,rI1
+	SAVE_DATA(rW1, 4)
+	xor		rW2,rW2,rI2
+	SAVE_DATA(rW2, 8)
+	xor		rW3,rW3,rI3
+	SAVE_DATA(rW3, 12)
+	FINALIZE_CRYPT(4)
+	blr
+
+/*
+ * ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc,
+ *		 u32 rounds, u32 bytes, u8 *iv);
+ *
+ * called from glue layer to encrypt/decrypt multiple blocks
+ * via CTR. Number of bytes does not need to be a multiple of
+ * 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6
+ *
+ */
+_GLOBAL(ppc_crypt_ctr)
+	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
+	LOAD_IV(rI0, 0)
+	LOAD_IV(rI1, 4)
+	LOAD_IV(rI2, 8)
+	cmpwi		rLN,16
+	LOAD_IV(rI3, 12)
+	START_IV
+	bt		lt,ppc_crypt_ctr_partial
+ppc_crypt_ctr_loop:
+	mr		rKP,rKS
+	START_KEY(rI0, rI1, rI2, rI3)
+	bl		ppc_encrypt_block
+	xor		rW0,rD0,rW0
+	xor		rW1,rD1,rW1
+	xor		rW2,rD2,rW2
+	xor		rW3,rD3,rW3
+	LOAD_DATA(rD0, 0)
+	subi		rLN,rLN,16
+	LOAD_DATA(rD1, 4)
+	LOAD_DATA(rD2, 8)
+	LOAD_DATA(rD3, 12)
+	xor		rD0,rD0,rW0
+	SAVE_DATA(rD0, 0)
+	xor		rD1,rD1,rW1
+	SAVE_DATA(rD1, 4)
+	xor		rD2,rD2,rW2
+	SAVE_DATA(rD2, 8)
+	xor		rD3,rD3,rW3
+	SAVE_DATA(rD3, 12)
+	addic		rI3,rI3,1	/* increase counter			*/
+	addze		rI2,rI2
+	addze		rI1,rI1
+	addze		rI0,rI0
+	NEXT_BLOCK
+	cmpwi		rLN,15
+	bt		gt,ppc_crypt_ctr_loop
+ppc_crypt_ctr_partial:
+	cmpwi		rLN,0
+	bt		eq,ppc_crypt_ctr_end
+	mr		rKP,rKS
+	START_KEY(rI0, rI1, rI2, rI3)
+	bl		ppc_encrypt_block
+	xor		rW0,rD0,rW0
+	SAVE_IV(rW0, 0)
+	xor		rW1,rD1,rW1
+	SAVE_IV(rW1, 4)
+	xor		rW2,rD2,rW2
+	SAVE_IV(rW2, 8)
+	xor		rW3,rD3,rW3
+	SAVE_IV(rW3, 12)
+	mtctr		rLN
+	subi		rIP,rIP,CTR_DEC
+	subi		rSP,rSP,1
+	subi		rDP,rDP,1
+ppc_crypt_ctr_xorbyte:
+	lbzu		rW4,1(rIP)	/* bytewise xor for partial block	*/
+	lbzu		rW5,1(rSP)
+	xor		rW4,rW4,rW5
+	stbu		rW4,1(rDP)
+	bdnz		ppc_crypt_ctr_xorbyte
+	subf		rIP,rLN,rIP
+	addi		rIP,rIP,1
+	addic		rI3,rI3,1
+	addze		rI2,rI2
+	addze		rI1,rI1
+	addze		rI0,rI0
+ppc_crypt_ctr_end:
+	SAVE_IV(rI0, 0)
+	SAVE_IV(rI1, 4)
+	SAVE_IV(rI2, 8)
+	SAVE_IV(rI3, 12)
+	FINALIZE_CRYPT(4)
+	blr
+
+/*
+ * ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc,
+ *		   u32 rounds, u32 bytes, u8 *iv, u32 *key_twk);
+ *
+ * called from glue layer to encrypt multiple blocks via XTS
+ * If key_twk is given, the initial IV encryption will be
+ * processed too. Round values are AES128 = 4, AES192 = 5,
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_encrypt_xts)
+	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8)
+	LOAD_IV(rI0, 0)
+	LOAD_IV(rI1, 4)
+	LOAD_IV(rI2, 8)
+	cmpwi		rKT,0
+	LOAD_IV(rI3, 12)
+	bt		eq,ppc_encrypt_xts_notweak
+	mr		rKP,rKT
+	START_KEY(rI0, rI1, rI2, rI3)
+	bl		ppc_encrypt_block
+	xor		rI0,rD0,rW0
+	xor		rI1,rD1,rW1
+	xor		rI2,rD2,rW2
+	xor		rI3,rD3,rW3
+ppc_encrypt_xts_notweak:
+	ENDIAN_SWAP(rG0, rG1, rI0, rI1)
+	ENDIAN_SWAP(rG2, rG3, rI2, rI3)
+ppc_encrypt_xts_loop:
+	LOAD_DATA(rD0, 0)
+	mr		rKP,rKS
+	LOAD_DATA(rD1, 4)
+	subi		rLN,rLN,16
+	LOAD_DATA(rD2, 8)
+	LOAD_DATA(rD3, 12)
+	xor		rD0,rD0,rI0
+	xor		rD1,rD1,rI1
+	xor		rD2,rD2,rI2
+	xor		rD3,rD3,rI3
+	START_KEY(rD0, rD1, rD2, rD3)
+	bl		ppc_encrypt_block
+	xor		rD0,rD0,rW0
+	xor		rD1,rD1,rW1
+	xor		rD2,rD2,rW2
+	xor		rD3,rD3,rW3
+	xor		rD0,rD0,rI0
+	SAVE_DATA(rD0, 0)
+	xor		rD1,rD1,rI1
+	SAVE_DATA(rD1, 4)
+	xor		rD2,rD2,rI2
+	SAVE_DATA(rD2, 8)
+	xor		rD3,rD3,rI3
+	SAVE_DATA(rD3, 12)
+	GF128_MUL(rG0, rG1, rG2, rG3, rW0)
+	ENDIAN_SWAP(rI0, rI1, rG0, rG1)
+	ENDIAN_SWAP(rI2, rI3, rG2, rG3)
+	cmpwi		rLN,0
+	NEXT_BLOCK
+	bt		gt,ppc_encrypt_xts_loop
+	START_IV
+	SAVE_IV(rI0, 0)
+	SAVE_IV(rI1, 4)
+	SAVE_IV(rI2, 8)
+	SAVE_IV(rI3, 12)
+	FINALIZE_CRYPT(8)
+	blr
+
+/*
+ * ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec,
+ *		   u32 rounds, u32 blocks, u8 *iv, u32 *key_twk);
+ *
+ * called from glue layer to decrypt multiple blocks via XTS
+ * If key_twk is given, the initial IV encryption will be
+ * processed too. Round values are AES128 = 4, AES192 = 5,
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_decrypt_xts)
+	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8)
+	LOAD_IV(rI0, 0)
+	addi		rT1,rT0,4096
+	LOAD_IV(rI1, 4)
+	LOAD_IV(rI2, 8)
+	cmpwi		rKT,0
+	LOAD_IV(rI3, 12)
+	bt		eq,ppc_decrypt_xts_notweak
+	subi		rT0,rT0,4096
+	mr		rKP,rKT
+	START_KEY(rI0, rI1, rI2, rI3)
+	bl		ppc_encrypt_block
+	xor		rI0,rD0,rW0
+	xor		rI1,rD1,rW1
+	xor		rI2,rD2,rW2
+	xor		rI3,rD3,rW3
+	addi		rT0,rT0,4096
+ppc_decrypt_xts_notweak:
+	ENDIAN_SWAP(rG0, rG1, rI0, rI1)
+	ENDIAN_SWAP(rG2, rG3, rI2, rI3)
+ppc_decrypt_xts_loop:
+	LOAD_DATA(rD0, 0)
+	mr		rKP,rKS
+	LOAD_DATA(rD1, 4)
+	subi		rLN,rLN,16
+	LOAD_DATA(rD2, 8)
+	LOAD_DATA(rD3, 12)
+	xor		rD0,rD0,rI0
+	xor		rD1,rD1,rI1
+	xor		rD2,rD2,rI2
+	xor		rD3,rD3,rI3
+	START_KEY(rD0, rD1, rD2, rD3)
+	bl		ppc_decrypt_block
+	xor		rD0,rD0,rW0
+	xor		rD1,rD1,rW1
+	xor		rD2,rD2,rW2
+	xor		rD3,rD3,rW3
+	xor		rD0,rD0,rI0
+	SAVE_DATA(rD0, 0)
+	xor		rD1,rD1,rI1
+	SAVE_DATA(rD1, 4)
+	xor		rD2,rD2,rI2
+	SAVE_DATA(rD2, 8)
+	xor		rD3,rD3,rI3
+	SAVE_DATA(rD3, 12)
+	GF128_MUL(rG0, rG1, rG2, rG3, rW0)
+	ENDIAN_SWAP(rI0, rI1, rG0, rG1)
+	ENDIAN_SWAP(rI2, rI3, rG2, rG3)
+	cmpwi		rLN,0
+	NEXT_BLOCK
+	bt		gt,ppc_decrypt_xts_loop
+	START_IV
+	SAVE_IV(rI0, 0)
+	SAVE_IV(rI1, 4)
+	SAVE_IV(rI2, 8)
+	SAVE_IV(rI3, 12)
+	FINALIZE_CRYPT(8)
+	blr
diff --git a/arch/powerpc/crypto/aes-spe-regs.h b/arch/powerpc/crypto/aes-spe-regs.h
new file mode 100644
index 000000000000..30d217b399c3
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-regs.h
@@ -0,0 +1,42 @@
+/*
+ * Common registers for PPC AES implementation
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#define rKS r0	/* copy of en-/decryption key pointer			*/
+#define rDP r3	/* destination pointer					*/
+#define rSP r4	/* source pointer					*/
+#define rKP r5	/* pointer to en-/decryption key pointer		*/
+#define rRR r6	/* en-/decryption rounds				*/
+#define rLN r7	/* length of data to be processed			*/
+#define rIP r8	/* potiner to IV (CBC/CTR/XTS modes)			*/
+#define rKT r9	/* pointer to tweak key (XTS mode)			*/
+#define rT0 r11	/* pointers to en-/decrpytion tables			*/
+#define rT1 r10
+#define rD0 r9	/* data 						*/
+#define rD1 r14
+#define rD2 r12
+#define rD3 r15
+#define rW0 r16	/* working registers					*/
+#define rW1 r17
+#define rW2 r18
+#define rW3 r19
+#define rW4 r20
+#define rW5 r21
+#define rW6 r22
+#define rW7 r23
+#define rI0 r24	/* IV							*/
+#define rI1 r25
+#define rI2 r26
+#define rI3 r27
+#define rG0 r28	/* endian reversed tweak (XTS mode)			*/
+#define rG1 r29
+#define rG2 r30
+#define rG3 r31
diff --git a/arch/powerpc/crypto/aes-tab-4k.S b/arch/powerpc/crypto/aes-tab-4k.S
new file mode 100644
index 000000000000..701e60240dc3
--- /dev/null
+++ b/arch/powerpc/crypto/aes-tab-4k.S
@@ -0,0 +1,331 @@
+/*
+ * 4K AES tables for PPC AES implementation
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+/*
+ * These big endian AES encryption/decryption tables have been taken from
+ * crypto/aes_generic.c and are designed to be simply accessed by a combination
+ * of rlwimi/lwz instructions with a minimum of table registers (usually only
+ * one required). Thus they are aligned to 4K. The locality of rotated values
+ * is derived from the reduced offsets that are available in the SPE load
+ * instructions. E.g. evldw, evlwwsplat, ...
+ *
+ * For the safety-conscious it has to be noted that they might be vulnerable
+ * to cache timing attacks because of their size. Nevertheless in contrast to
+ * the generic tables they have been reduced from 16KB to 8KB + 256 bytes.
+ * This is a quite good tradeoff for low power devices (e.g. routers) without
+ * dedicated encryption hardware where we usually have no multiuser
+ * environment.
+ *
+ */
+
+#define R(a, b, c, d) \
+	0x##a##b##c##d, 0x##d##a##b##c, 0x##c##d##a##b, 0x##b##c##d##a
+
+.data
+.align 12
+.globl PPC_AES_4K_ENCTAB
+PPC_AES_4K_ENCTAB:
+/* encryption table, same as crypto_ft_tab in crypto/aes-generic.c */
+	.long R(c6, 63, 63, a5), R(f8, 7c, 7c, 84)
+	.long R(ee, 77, 77, 99), R(f6, 7b, 7b, 8d)
+	.long R(ff, f2, f2, 0d), R(d6, 6b, 6b, bd)
+	.long R(de, 6f, 6f, b1), R(91, c5, c5, 54)
+	.long R(60, 30, 30, 50), R(02, 01, 01, 03)
+	.long R(ce, 67, 67, a9), R(56, 2b, 2b, 7d)
+	.long R(e7, fe, fe, 19), R(b5, d7, d7, 62)
+	.long R(4d, ab, ab, e6), R(ec, 76, 76, 9a)
+	.long R(8f, ca, ca, 45), R(1f, 82, 82, 9d)
+	.long R(89, c9, c9, 40), R(fa, 7d, 7d, 87)
+	.long R(ef, fa, fa, 15), R(b2, 59, 59, eb)
+	.long R(8e, 47, 47, c9), R(fb, f0, f0, 0b)
+	.long R(41, ad, ad, ec), R(b3, d4, d4, 67)
+	.long R(5f, a2, a2, fd), R(45, af, af, ea)
+	.long R(23, 9c, 9c, bf), R(53, a4, a4, f7)
+	.long R(e4, 72, 72, 96), R(9b, c0, c0, 5b)
+	.long R(75, b7, b7, c2), R(e1, fd, fd, 1c)
+	.long R(3d, 93, 93, ae), R(4c, 26, 26, 6a)
+	.long R(6c, 36, 36, 5a), R(7e, 3f, 3f, 41)
+	.long R(f5, f7, f7, 02), R(83, cc, cc, 4f)
+	.long R(68, 34, 34, 5c), R(51, a5, a5, f4)
+	.long R(d1, e5, e5, 34), R(f9, f1, f1, 08)
+	.long R(e2, 71, 71, 93), R(ab, d8, d8, 73)
+	.long R(62, 31, 31, 53), R(2a, 15, 15, 3f)
+	.long R(08, 04, 04, 0c), R(95, c7, c7, 52)
+	.long R(46, 23, 23, 65), R(9d, c3, c3, 5e)
+	.long R(30, 18, 18, 28), R(37, 96, 96, a1)
+	.long R(0a, 05, 05, 0f), R(2f, 9a, 9a, b5)
+	.long R(0e, 07, 07, 09), R(24, 12, 12, 36)
+	.long R(1b, 80, 80, 9b), R(df, e2, e2, 3d)
+	.long R(cd, eb, eb, 26), R(4e, 27, 27, 69)
+	.long R(7f, b2, b2, cd), R(ea, 75, 75, 9f)
+	.long R(12, 09, 09, 1b), R(1d, 83, 83, 9e)
+	.long R(58, 2c, 2c, 74), R(34, 1a, 1a, 2e)
+	.long R(36, 1b, 1b, 2d), R(dc, 6e, 6e, b2)
+	.long R(b4, 5a, 5a, ee), R(5b, a0, a0, fb)
+	.long R(a4, 52, 52, f6), R(76, 3b, 3b, 4d)
+	.long R(b7, d6, d6, 61), R(7d, b3, b3, ce)
+	.long R(52, 29, 29, 7b), R(dd, e3, e3, 3e)
+	.long R(5e, 2f, 2f, 71), R(13, 84, 84, 97)
+	.long R(a6, 53, 53, f5), R(b9, d1, d1, 68)
+	.long R(00, 00, 00, 00), R(c1, ed, ed, 2c)
+	.long R(40, 20, 20, 60), R(e3, fc, fc, 1f)
+	.long R(79, b1, b1, c8), R(b6, 5b, 5b, ed)
+	.long R(d4, 6a, 6a, be), R(8d, cb, cb, 46)
+	.long R(67, be, be, d9), R(72, 39, 39, 4b)
+	.long R(94, 4a, 4a, de), R(98, 4c, 4c, d4)
+	.long R(b0, 58, 58, e8), R(85, cf, cf, 4a)
+	.long R(bb, d0, d0, 6b), R(c5, ef, ef, 2a)
+	.long R(4f, aa, aa, e5), R(ed, fb, fb, 16)
+	.long R(86, 43, 43, c5), R(9a, 4d, 4d, d7)
+	.long R(66, 33, 33, 55), R(11, 85, 85, 94)
+	.long R(8a, 45, 45, cf), R(e9, f9, f9, 10)
+	.long R(04, 02, 02, 06), R(fe, 7f, 7f, 81)
+	.long R(a0, 50, 50, f0), R(78, 3c, 3c, 44)
+	.long R(25, 9f, 9f, ba), R(4b, a8, a8, e3)
+	.long R(a2, 51, 51, f3), R(5d, a3, a3, fe)
+	.long R(80, 40, 40, c0), R(05, 8f, 8f, 8a)
+	.long R(3f, 92, 92, ad), R(21, 9d, 9d, bc)
+	.long R(70, 38, 38, 48), R(f1, f5, f5, 04)
+	.long R(63, bc, bc, df), R(77, b6, b6, c1)
+	.long R(af, da, da, 75), R(42, 21, 21, 63)
+	.long R(20, 10, 10, 30), R(e5, ff, ff, 1a)
+	.long R(fd, f3, f3, 0e), R(bf, d2, d2, 6d)
+	.long R(81, cd, cd, 4c), R(18, 0c, 0c, 14)
+	.long R(26, 13, 13, 35), R(c3, ec, ec, 2f)
+	.long R(be, 5f, 5f, e1), R(35, 97, 97, a2)
+	.long R(88, 44, 44, cc), R(2e, 17, 17, 39)
+	.long R(93, c4, c4, 57), R(55, a7, a7, f2)
+	.long R(fc, 7e, 7e, 82), R(7a, 3d, 3d, 47)
+	.long R(c8, 64, 64, ac), R(ba, 5d, 5d, e7)
+	.long R(32, 19, 19, 2b), R(e6, 73, 73, 95)
+	.long R(c0, 60, 60, a0), R(19, 81, 81, 98)
+	.long R(9e, 4f, 4f, d1), R(a3, dc, dc, 7f)
+	.long R(44, 22, 22, 66), R(54, 2a, 2a, 7e)
+	.long R(3b, 90, 90, ab), R(0b, 88, 88, 83)
+	.long R(8c, 46, 46, ca), R(c7, ee, ee, 29)
+	.long R(6b, b8, b8, d3), R(28, 14, 14, 3c)
+	.long R(a7, de, de, 79), R(bc, 5e, 5e, e2)
+	.long R(16, 0b, 0b, 1d), R(ad, db, db, 76)
+	.long R(db, e0, e0, 3b), R(64, 32, 32, 56)
+	.long R(74, 3a, 3a, 4e), R(14, 0a, 0a, 1e)
+	.long R(92, 49, 49, db), R(0c, 06, 06, 0a)
+	.long R(48, 24, 24, 6c), R(b8, 5c, 5c, e4)
+	.long R(9f, c2, c2, 5d), R(bd, d3, d3, 6e)
+	.long R(43, ac, ac, ef), R(c4, 62, 62, a6)
+	.long R(39, 91, 91, a8), R(31, 95, 95, a4)
+	.long R(d3, e4, e4, 37), R(f2, 79, 79, 8b)
+	.long R(d5, e7, e7, 32), R(8b, c8, c8, 43)
+	.long R(6e, 37, 37, 59), R(da, 6d, 6d, b7)
+	.long R(01, 8d, 8d, 8c), R(b1, d5, d5, 64)
+	.long R(9c, 4e, 4e, d2), R(49, a9, a9, e0)
+	.long R(d8, 6c, 6c, b4), R(ac, 56, 56, fa)
+	.long R(f3, f4, f4, 07), R(cf, ea, ea, 25)
+	.long R(ca, 65, 65, af), R(f4, 7a, 7a, 8e)
+	.long R(47, ae, ae, e9), R(10, 08, 08, 18)
+	.long R(6f, ba, ba, d5), R(f0, 78, 78, 88)
+	.long R(4a, 25, 25, 6f), R(5c, 2e, 2e, 72)
+	.long R(38, 1c, 1c, 24), R(57, a6, a6, f1)
+	.long R(73, b4, b4, c7), R(97, c6, c6, 51)
+	.long R(cb, e8, e8, 23), R(a1, dd, dd, 7c)
+	.long R(e8, 74, 74, 9c), R(3e, 1f, 1f, 21)
+	.long R(96, 4b, 4b, dd), R(61, bd, bd, dc)
+	.long R(0d, 8b, 8b, 86), R(0f, 8a, 8a, 85)
+	.long R(e0, 70, 70, 90), R(7c, 3e, 3e, 42)
+	.long R(71, b5, b5, c4), R(cc, 66, 66, aa)
+	.long R(90, 48, 48, d8), R(06, 03, 03, 05)
+	.long R(f7, f6, f6, 01), R(1c, 0e, 0e, 12)
+	.long R(c2, 61, 61, a3), R(6a, 35, 35, 5f)
+	.long R(ae, 57, 57, f9), R(69, b9, b9, d0)
+	.long R(17, 86, 86, 91), R(99, c1, c1, 58)
+	.long R(3a, 1d, 1d, 27), R(27, 9e, 9e, b9)
+	.long R(d9, e1, e1, 38), R(eb, f8, f8, 13)
+	.long R(2b, 98, 98, b3), R(22, 11, 11, 33)
+	.long R(d2, 69, 69, bb), R(a9, d9, d9, 70)
+	.long R(07, 8e, 8e, 89), R(33, 94, 94, a7)
+	.long R(2d, 9b, 9b, b6), R(3c, 1e, 1e, 22)
+	.long R(15, 87, 87, 92), R(c9, e9, e9, 20)
+	.long R(87, ce, ce, 49), R(aa, 55, 55, ff)
+	.long R(50, 28, 28, 78), R(a5, df, df, 7a)
+	.long R(03, 8c, 8c, 8f), R(59, a1, a1, f8)
+	.long R(09, 89, 89, 80), R(1a, 0d, 0d, 17)
+	.long R(65, bf, bf, da), R(d7, e6, e6, 31)
+	.long R(84, 42, 42, c6), R(d0, 68, 68, b8)
+	.long R(82, 41, 41, c3), R(29, 99, 99, b0)
+	.long R(5a, 2d, 2d, 77), R(1e, 0f, 0f, 11)
+	.long R(7b, b0, b0, cb), R(a8, 54, 54, fc)
+	.long R(6d, bb, bb, d6), R(2c, 16, 16, 3a)
+.globl PPC_AES_4K_DECTAB
+PPC_AES_4K_DECTAB:
+/* decryption table, same as crypto_it_tab in crypto/aes-generic.c */
+	.long R(51, f4, a7, 50), R(7e, 41, 65, 53)
+	.long R(1a, 17, a4, c3), R(3a, 27, 5e, 96)
+	.long R(3b, ab, 6b, cb), R(1f, 9d, 45, f1)
+	.long R(ac, fa, 58, ab), R(4b, e3, 03, 93)
+	.long R(20, 30, fa, 55), R(ad, 76, 6d, f6)
+	.long R(88, cc, 76, 91), R(f5, 02, 4c, 25)
+	.long R(4f, e5, d7, fc), R(c5, 2a, cb, d7)
+	.long R(26, 35, 44, 80), R(b5, 62, a3, 8f)
+	.long R(de, b1, 5a, 49), R(25, ba, 1b, 67)
+	.long R(45, ea, 0e, 98), R(5d, fe, c0, e1)
+	.long R(c3, 2f, 75, 02), R(81, 4c, f0, 12)
+	.long R(8d, 46, 97, a3), R(6b, d3, f9, c6)
+	.long R(03, 8f, 5f, e7), R(15, 92, 9c, 95)
+	.long R(bf, 6d, 7a, eb), R(95, 52, 59, da)
+	.long R(d4, be, 83, 2d), R(58, 74, 21, d3)
+	.long R(49, e0, 69, 29), R(8e, c9, c8, 44)
+	.long R(75, c2, 89, 6a), R(f4, 8e, 79, 78)
+	.long R(99, 58, 3e, 6b), R(27, b9, 71, dd)
+	.long R(be, e1, 4f, b6), R(f0, 88, ad, 17)
+	.long R(c9, 20, ac, 66), R(7d, ce, 3a, b4)
+	.long R(63, df, 4a, 18), R(e5, 1a, 31, 82)
+	.long R(97, 51, 33, 60), R(62, 53, 7f, 45)
+	.long R(b1, 64, 77, e0), R(bb, 6b, ae, 84)
+	.long R(fe, 81, a0, 1c), R(f9, 08, 2b, 94)
+	.long R(70, 48, 68, 58), R(8f, 45, fd, 19)
+	.long R(94, de, 6c, 87), R(52, 7b, f8, b7)
+	.long R(ab, 73, d3, 23), R(72, 4b, 02, e2)
+	.long R(e3, 1f, 8f, 57), R(66, 55, ab, 2a)
+	.long R(b2, eb, 28, 07), R(2f, b5, c2, 03)
+	.long R(86, c5, 7b, 9a), R(d3, 37, 08, a5)
+	.long R(30, 28, 87, f2), R(23, bf, a5, b2)
+	.long R(02, 03, 6a, ba), R(ed, 16, 82, 5c)
+	.long R(8a, cf, 1c, 2b), R(a7, 79, b4, 92)
+	.long R(f3, 07, f2, f0), R(4e, 69, e2, a1)
+	.long R(65, da, f4, cd), R(06, 05, be, d5)
+	.long R(d1, 34, 62, 1f), R(c4, a6, fe, 8a)
+	.long R(34, 2e, 53, 9d), R(a2, f3, 55, a0)
+	.long R(05, 8a, e1, 32), R(a4, f6, eb, 75)
+	.long R(0b, 83, ec, 39), R(40, 60, ef, aa)
+	.long R(5e, 71, 9f, 06), R(bd, 6e, 10, 51)
+	.long R(3e, 21, 8a, f9), R(96, dd, 06, 3d)
+	.long R(dd, 3e, 05, ae), R(4d, e6, bd, 46)
+	.long R(91, 54, 8d, b5), R(71, c4, 5d, 05)
+	.long R(04, 06, d4, 6f), R(60, 50, 15, ff)
+	.long R(19, 98, fb, 24), R(d6, bd, e9, 97)
+	.long R(89, 40, 43, cc), R(67, d9, 9e, 77)
+	.long R(b0, e8, 42, bd), R(07, 89, 8b, 88)
+	.long R(e7, 19, 5b, 38), R(79, c8, ee, db)
+	.long R(a1, 7c, 0a, 47), R(7c, 42, 0f, e9)
+	.long R(f8, 84, 1e, c9), R(00, 00, 00, 00)
+	.long R(09, 80, 86, 83), R(32, 2b, ed, 48)
+	.long R(1e, 11, 70, ac), R(6c, 5a, 72, 4e)
+	.long R(fd, 0e, ff, fb), R(0f, 85, 38, 56)
+	.long R(3d, ae, d5, 1e), R(36, 2d, 39, 27)
+	.long R(0a, 0f, d9, 64), R(68, 5c, a6, 21)
+	.long R(9b, 5b, 54, d1), R(24, 36, 2e, 3a)
+	.long R(0c, 0a, 67, b1), R(93, 57, e7, 0f)
+	.long R(b4, ee, 96, d2), R(1b, 9b, 91, 9e)
+	.long R(80, c0, c5, 4f), R(61, dc, 20, a2)
+	.long R(5a, 77, 4b, 69), R(1c, 12, 1a, 16)
+	.long R(e2, 93, ba, 0a), R(c0, a0, 2a, e5)
+	.long R(3c, 22, e0, 43), R(12, 1b, 17, 1d)
+	.long R(0e, 09, 0d, 0b), R(f2, 8b, c7, ad)
+	.long R(2d, b6, a8, b9), R(14, 1e, a9, c8)
+	.long R(57, f1, 19, 85), R(af, 75, 07, 4c)
+	.long R(ee, 99, dd, bb), R(a3, 7f, 60, fd)
+	.long R(f7, 01, 26, 9f), R(5c, 72, f5, bc)
+	.long R(44, 66, 3b, c5), R(5b, fb, 7e, 34)
+	.long R(8b, 43, 29, 76), R(cb, 23, c6, dc)
+	.long R(b6, ed, fc, 68), R(b8, e4, f1, 63)
+	.long R(d7, 31, dc, ca), R(42, 63, 85, 10)
+	.long R(13, 97, 22, 40), R(84, c6, 11, 20)
+	.long R(85, 4a, 24, 7d), R(d2, bb, 3d, f8)
+	.long R(ae, f9, 32, 11), R(c7, 29, a1, 6d)
+	.long R(1d, 9e, 2f, 4b), R(dc, b2, 30, f3)
+	.long R(0d, 86, 52, ec), R(77, c1, e3, d0)
+	.long R(2b, b3, 16, 6c), R(a9, 70, b9, 99)
+	.long R(11, 94, 48, fa), R(47, e9, 64, 22)
+	.long R(a8, fc, 8c, c4), R(a0, f0, 3f, 1a)
+	.long R(56, 7d, 2c, d8), R(22, 33, 90, ef)
+	.long R(87, 49, 4e, c7), R(d9, 38, d1, c1)
+	.long R(8c, ca, a2, fe), R(98, d4, 0b, 36)
+	.long R(a6, f5, 81, cf), R(a5, 7a, de, 28)
+	.long R(da, b7, 8e, 26), R(3f, ad, bf, a4)
+	.long R(2c, 3a, 9d, e4), R(50, 78, 92, 0d)
+	.long R(6a, 5f, cc, 9b), R(54, 7e, 46, 62)
+	.long R(f6, 8d, 13, c2), R(90, d8, b8, e8)
+	.long R(2e, 39, f7, 5e), R(82, c3, af, f5)
+	.long R(9f, 5d, 80, be), R(69, d0, 93, 7c)
+	.long R(6f, d5, 2d, a9), R(cf, 25, 12, b3)
+	.long R(c8, ac, 99, 3b), R(10, 18, 7d, a7)
+	.long R(e8, 9c, 63, 6e), R(db, 3b, bb, 7b)
+	.long R(cd, 26, 78, 09), R(6e, 59, 18, f4)
+	.long R(ec, 9a, b7, 01), R(83, 4f, 9a, a8)
+	.long R(e6, 95, 6e, 65), R(aa, ff, e6, 7e)
+	.long R(21, bc, cf, 08), R(ef, 15, e8, e6)
+	.long R(ba, e7, 9b, d9), R(4a, 6f, 36, ce)
+	.long R(ea, 9f, 09, d4), R(29, b0, 7c, d6)
+	.long R(31, a4, b2, af), R(2a, 3f, 23, 31)
+	.long R(c6, a5, 94, 30), R(35, a2, 66, c0)
+	.long R(74, 4e, bc, 37), R(fc, 82, ca, a6)
+	.long R(e0, 90, d0, b0), R(33, a7, d8, 15)
+	.long R(f1, 04, 98, 4a), R(41, ec, da, f7)
+	.long R(7f, cd, 50, 0e), R(17, 91, f6, 2f)
+	.long R(76, 4d, d6, 8d), R(43, ef, b0, 4d)
+	.long R(cc, aa, 4d, 54), R(e4, 96, 04, df)
+	.long R(9e, d1, b5, e3), R(4c, 6a, 88, 1b)
+	.long R(c1, 2c, 1f, b8), R(46, 65, 51, 7f)
+	.long R(9d, 5e, ea, 04), R(01, 8c, 35, 5d)
+	.long R(fa, 87, 74, 73), R(fb, 0b, 41, 2e)
+	.long R(b3, 67, 1d, 5a), R(92, db, d2, 52)
+	.long R(e9, 10, 56, 33), R(6d, d6, 47, 13)
+	.long R(9a, d7, 61, 8c), R(37, a1, 0c, 7a)
+	.long R(59, f8, 14, 8e), R(eb, 13, 3c, 89)
+	.long R(ce, a9, 27, ee), R(b7, 61, c9, 35)
+	.long R(e1, 1c, e5, ed), R(7a, 47, b1, 3c)
+	.long R(9c, d2, df, 59), R(55, f2, 73, 3f)
+	.long R(18, 14, ce, 79), R(73, c7, 37, bf)
+	.long R(53, f7, cd, ea), R(5f, fd, aa, 5b)
+	.long R(df, 3d, 6f, 14), R(78, 44, db, 86)
+	.long R(ca, af, f3, 81), R(b9, 68, c4, 3e)
+	.long R(38, 24, 34, 2c), R(c2, a3, 40, 5f)
+	.long R(16, 1d, c3, 72), R(bc, e2, 25, 0c)
+	.long R(28, 3c, 49, 8b), R(ff, 0d, 95, 41)
+	.long R(39, a8, 01, 71), R(08, 0c, b3, de)
+	.long R(d8, b4, e4, 9c), R(64, 56, c1, 90)
+	.long R(7b, cb, 84, 61), R(d5, 32, b6, 70)
+	.long R(48, 6c, 5c, 74), R(d0, b8, 57, 42)
+.globl PPC_AES_4K_DECTAB2
+PPC_AES_4K_DECTAB2:
+/* decryption table, same as crypto_il_tab in crypto/aes-generic.c */
+	.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+	.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+	.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+	.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+	.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+	.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+	.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+	.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+	.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+	.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+	.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+	.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+	.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+	.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+	.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+	.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+	.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+	.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+	.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+	.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+	.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+	.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+	.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+	.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+	.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+	.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+	.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+	.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+	.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+	.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+	.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+	.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
diff --git a/arch/powerpc/crypto/md5-asm.S b/arch/powerpc/crypto/md5-asm.S
new file mode 100644
index 000000000000..10cdf5bceebb
--- /dev/null
+++ b/arch/powerpc/crypto/md5-asm.S
@@ -0,0 +1,243 @@
+/*
+ * Fast MD5 implementation for PPC
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+#define rHP	r3
+#define rWP	r4
+
+#define rH0	r0
+#define rH1	r6
+#define rH2	r7
+#define rH3	r5
+
+#define rW00	r8
+#define rW01	r9
+#define rW02	r10
+#define rW03	r11
+#define rW04	r12
+#define rW05	r14
+#define rW06	r15
+#define rW07	r16
+#define rW08	r17
+#define rW09	r18
+#define rW10	r19
+#define rW11	r20
+#define rW12	r21
+#define rW13	r22
+#define rW14	r23
+#define rW15	r24
+
+#define rT0	r25
+#define rT1	r26
+
+#define INITIALIZE \
+	PPC_STLU r1,-INT_FRAME_SIZE(r1); \
+	SAVE_8GPRS(14, r1);		/* push registers onto stack	*/ \
+	SAVE_4GPRS(22, r1);						   \
+	SAVE_GPR(26, r1)
+
+#define FINALIZE \
+	REST_8GPRS(14, r1);		/* pop registers from stack	*/ \
+	REST_4GPRS(22, r1);						   \
+	REST_GPR(26, r1);						   \
+	addi	r1,r1,INT_FRAME_SIZE;
+
+#ifdef __BIG_ENDIAN__
+#define LOAD_DATA(reg, off) \
+	lwbrx		reg,0,rWP;	/* load data			*/
+#define INC_PTR \
+	addi		rWP,rWP,4;	/* increment per word		*/
+#define NEXT_BLOCK			/* nothing to do		*/
+#else
+#define LOAD_DATA(reg, off) \
+	lwz		reg,off(rWP);	/* load data			*/
+#define INC_PTR				/* nothing to do		*/
+#define NEXT_BLOCK \
+	addi		rWP,rWP,64;	/* increment per block		*/
+#endif
+
+#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \
+	LOAD_DATA(w0, off)		/*    W				*/ \
+	and		rT0,b,c;	/* 1: f = b and c		*/ \
+	INC_PTR				/*    ptr++			*/ \
+	andc		rT1,d,b;	/* 1: f' = ~b and d		*/ \
+	LOAD_DATA(w1, off+4)		/*    W				*/ \
+	or		rT0,rT0,rT1;	/* 1: f = f or f'		*/ \
+	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
+	add		a,a,rT0;	/* 1: a = a + f			*/ \
+	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
+	addis		w1,w1,k1h;	/* 2: wk = w + k		*/ \
+	add		a,a,w0;		/* 1: a = a + wk		*/ \
+	addi		w1,w1,k1l;	/* 2: wk = w + k'		*/ \
+	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
+	add		d,d,w1;		/* 2: a = a + wk		*/ \
+	add		a,a,b;		/* 1: a = a + b			*/ \
+	and		rT0,a,b;	/* 2: f = b and c		*/ \
+	andc		rT1,c,a;	/* 2: f' = ~b and d		*/ \
+	or		rT0,rT0,rT1;	/* 2: f = f or f'		*/ \
+	add		d,d,rT0;	/* 2: a = a + f			*/ \
+	INC_PTR				/*    ptr++			*/ \
+	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
+	add		d,d,a;		/* 2: a = a + b			*/
+
+#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
+	andc		rT0,c,d;	/* 1: f = c and ~d		*/ \
+	and		rT1,b,d;	/* 1: f' = b and d		*/ \
+	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
+	or		rT0,rT0,rT1;	/* 1: f = f or f'		*/ \
+	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
+	add		a,a,rT0;	/* 1: a = a + f			*/ \
+	addi		w1,w1,k1l;	/* 2: wk = w + k		*/ \
+	add		a,a,w0;		/* 1: a = a + wk		*/ \
+	addis		w1,w1,k1h;	/* 2: wk = w + k'		*/ \
+	andc		rT0,b,c;	/* 2: f = c and ~d		*/ \
+	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
+	add		a,a,b;		/* 1: a = a + b			*/ \
+	add		d,d,w1;		/* 2: a = a + wk		*/ \
+	and		rT1,a,c;	/* 2: f' = b and d		*/ \
+	or		rT0,rT0,rT1;	/* 2: f = f or f'		*/ \
+	add		d,d,rT0;	/* 2: a = a + f			*/ \
+	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
+	add		d,d,a;		/* 2: a = a +b			*/
+
+#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
+	xor		rT0,b,c;	/* 1: f' = b xor c		*/ \
+	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
+	xor		rT1,rT0,d;	/* 1: f = f xor f'		*/ \
+	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
+	add		a,a,rT1;	/* 1: a = a + f			*/ \
+	addi		w1,w1,k1l;	/* 2: wk = w + k		*/ \
+	add		a,a,w0;		/* 1: a = a + wk		*/ \
+	addis		w1,w1,k1h;	/* 2: wk = w + k'		*/ \
+	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
+	add		d,d,w1;		/* 2: a = a + wk		*/ \
+	add		a,a,b;		/* 1: a = a + b			*/ \
+	xor		rT1,rT0,a;	/* 2: f = b xor f'		*/ \
+	add		d,d,rT1;	/* 2: a = a + f			*/ \
+	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
+	add		d,d,a;		/* 2: a = a + b			*/
+
+#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
+	addi		w0,w0,k0l;	/* 1: w = w + k			*/ \
+	orc		rT0,b,d;	/* 1: f = b or ~d		*/ \
+	addis		w0,w0,k0h;	/* 1: w = w + k'		*/ \
+	xor		rT0,rT0,c;	/* 1: f = f xor c		*/ \
+	add		a,a,w0;		/* 1: a = a + wk		*/ \
+	addi		w1,w1,k1l;	/* 2: w = w + k			*/ \
+	add		a,a,rT0;	/* 1: a = a + f			*/ \
+	addis		w1,w1,k1h;	/* 2: w = w + k'		*/ \
+	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
+	add		a,a,b;		/* 1: a = a + b			*/ \
+	orc		rT0,a,c;	/* 2: f = b or ~d		*/ \
+	add		d,d,w1;		/* 2: a = a + wk		*/ \
+	xor		rT0,rT0,b;	/* 2: f = f xor c		*/ \
+	add		d,d,rT0;	/* 2: a = a + f			*/ \
+	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
+	add		d,d,a;		/* 2: a = a + b			*/
+
+_GLOBAL(ppc_md5_transform)
+	INITIALIZE
+
+	mtctr		r5
+	lwz		rH0,0(rHP)
+	lwz		rH1,4(rHP)
+	lwz		rH2,8(rHP)
+	lwz		rH3,12(rHP)
+
+ppc_md5_main:
+	R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0,
+		0xd76b, -23432, 0xe8c8, -18602)
+	R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8,
+		0x2420, 0x70db, 0xc1be, -12562)
+	R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16,
+		0xf57c, 0x0faf, 0x4788, -14806)
+	R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24,
+		0xa830, 0x4613, 0xfd47, -27391)
+	R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32,
+		0x6981, -26408, 0x8b45,  -2129)
+	R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40,
+		0xffff, 0x5bb1, 0x895d, -10306)
+	R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48,
+		0x6b90, 0x1122, 0xfd98, 0x7193)
+	R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56,
+		0xa679, 0x438e, 0x49b4, 0x0821)
+
+	R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23,
+		0x0d56, 0x6e0c, 0x1810, 0x6d2d)
+	R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12,
+		0x9d02, -32109, 0x124c, 0x2332)
+	R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23,
+		0x8ea7, 0x4a33, 0x0245, -18270)
+	R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12,
+		0x8eee,  -8608, 0xf258,  -5095)
+	R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23,
+		0x969d, -10697, 0x1cbe, -15288)
+	R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12,
+		0x3317, 0x3e99, 0xdbd9, 0x7c15)
+	R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23,
+		0xac4b, 0x7772, 0xd8cf, 0x331d)
+	R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12,
+		0x6a28, 0x6dd8, 0x219a, 0x3b68)
+
+	R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21,
+		0x29cb, 0x28e5, 0x4218,  -7788)
+	R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16,  9,
+		0x473f, 0x06d1, 0x3aae, 0x3036)
+	R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21,
+		0xaea1, -15134, 0x640b, -11295)
+	R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16,  9,
+		0x8f4c, 0x4887, 0xbc7c, -22499)
+	R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21,
+		0x7eb8, -27199, 0x00ea, 0x6050)
+	R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16,  9,
+		0xe01a, 0x22fe, 0x4447, 0x69c5)
+	R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21,
+		0xb7f3, 0x0253, 0x59b1, 0x4d5b)
+	R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16,  9,
+		0x4701, -27017, 0xc7bd, -19859)
+
+	R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22,
+		0x0988,  -1462, 0x4c70, -19401)
+	R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11,
+		0xadaf,  -5221, 0xfc99, 0x66f7)
+	R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22,
+		0x7e80, -16418, 0xba1e, -25587)
+	R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11,
+		0x4130, 0x380d, 0xe0c5, 0x738d)
+	lwz		rW00,0(rHP)
+	R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22,
+		0xe837, -30770, 0xde8a, 0x69e8)
+	lwz		rW14,4(rHP)
+	R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11,
+		0x9e79, 0x260f, 0x256d, -27941)
+	lwz		rW12,8(rHP)
+	R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22,
+		0xab75, -20775, 0x4f9e, -28397)
+	lwz		rW10,12(rHP)
+	R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11,
+		0x662b, 0x7c56, 0x11b2, 0x0358)
+
+	add		rH0,rH0,rW00
+	stw		rH0,0(rHP)
+	add		rH1,rH1,rW14
+	stw		rH1,4(rHP)
+	add		rH2,rH2,rW12
+	stw		rH2,8(rHP)
+	add		rH3,rH3,rW10
+	stw		rH3,12(rHP)
+	NEXT_BLOCK
+
+	bdnz		ppc_md5_main
+
+	FINALIZE
+	blr
diff --git a/arch/powerpc/crypto/md5-glue.c b/arch/powerpc/crypto/md5-glue.c
new file mode 100644
index 000000000000..452fb4dc575f
--- /dev/null
+++ b/arch/powerpc/crypto/md5-glue.c
@@ -0,0 +1,165 @@
+/*
+ * Glue code for MD5 implementation for PPC assembler
+ *
+ * Based on generic implementation.
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/md5.h>
+#include <asm/byteorder.h>
+
+extern void ppc_md5_transform(u32 *state, const u8 *src, u32 blocks);
+
+static inline void ppc_md5_clear_context(struct md5_state *sctx)
+{
+	int count = sizeof(struct md5_state) >> 2;
+	u32 *ptr = (u32 *)sctx;
+
+	/* make sure we can clear the fast way */
+	BUILD_BUG_ON(sizeof(struct md5_state) % 4);
+	do { *ptr++ = 0; } while (--count);
+}
+
+static int ppc_md5_init(struct shash_desc *desc)
+{
+	struct md5_state *sctx = shash_desc_ctx(desc);
+
+	sctx->hash[0] = 0x67452301;
+	sctx->hash[1] = 0xefcdab89;
+	sctx->hash[2] = 0x98badcfe;
+	sctx->hash[3] =	0x10325476;
+	sctx->byte_count = 0;
+
+	return 0;
+}
+
+static int ppc_md5_update(struct shash_desc *desc, const u8 *data,
+			unsigned int len)
+{
+	struct md5_state *sctx = shash_desc_ctx(desc);
+	const unsigned int offset = sctx->byte_count & 0x3f;
+	unsigned int avail = 64 - offset;
+	const u8 *src = data;
+
+	sctx->byte_count += len;
+
+	if (avail > len) {
+		memcpy((char *)sctx->block + offset, src, len);
+		return 0;
+	}
+
+	if (offset) {
+		memcpy((char *)sctx->block + offset, src, avail);
+		ppc_md5_transform(sctx->hash, (const u8 *)sctx->block, 1);
+		len -= avail;
+		src += avail;
+	}
+
+	if (len > 63) {
+		ppc_md5_transform(sctx->hash, src, len >> 6);
+		src += len & ~0x3f;
+		len &= 0x3f;
+	}
+
+	memcpy((char *)sctx->block, src, len);
+	return 0;
+}
+
+static int ppc_md5_final(struct shash_desc *desc, u8 *out)
+{
+	struct md5_state *sctx = shash_desc_ctx(desc);
+	const unsigned int offset = sctx->byte_count & 0x3f;
+	const u8 *src = (const u8 *)sctx->block;
+	u8 *p = (u8 *)src + offset;
+	int padlen = 55 - offset;
+	__le64 *pbits = (__le64 *)((char *)sctx->block + 56);
+	__le32 *dst = (__le32 *)out;
+
+	*p++ = 0x80;
+
+	if (padlen < 0) {
+		memset(p, 0x00, padlen + sizeof (u64));
+		ppc_md5_transform(sctx->hash, src, 1);
+		p = (char *)sctx->block;
+		padlen = 56;
+	}
+
+	memset(p, 0, padlen);
+	*pbits = cpu_to_le64(sctx->byte_count << 3);
+	ppc_md5_transform(sctx->hash, src, 1);
+
+	dst[0] = cpu_to_le32(sctx->hash[0]);
+	dst[1] = cpu_to_le32(sctx->hash[1]);
+	dst[2] = cpu_to_le32(sctx->hash[2]);
+	dst[3] = cpu_to_le32(sctx->hash[3]);
+
+	ppc_md5_clear_context(sctx);
+	return 0;
+}
+
+static int ppc_md5_export(struct shash_desc *desc, void *out)
+{
+	struct md5_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(out, sctx, sizeof(*sctx));
+	return 0;
+}
+
+static int ppc_md5_import(struct shash_desc *desc, const void *in)
+{
+	struct md5_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(sctx, in, sizeof(*sctx));
+	return 0;
+}
+
+static struct shash_alg alg = {
+	.digestsize	=	MD5_DIGEST_SIZE,
+	.init		=	ppc_md5_init,
+	.update		=	ppc_md5_update,
+	.final		=	ppc_md5_final,
+	.export		=	ppc_md5_export,
+	.import		=	ppc_md5_import,
+	.descsize	=	sizeof(struct md5_state),
+	.statesize	=	sizeof(struct md5_state),
+	.base		=	{
+		.cra_name	=	"md5",
+		.cra_driver_name=	"md5-ppc",
+		.cra_priority	=	200,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	MD5_HMAC_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static int __init ppc_md5_mod_init(void)
+{
+	return crypto_register_shash(&alg);
+}
+
+static void __exit ppc_md5_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_init(ppc_md5_mod_init);
+module_exit(ppc_md5_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, PPC assembler");
+
+MODULE_ALIAS_CRYPTO("md5");
+MODULE_ALIAS_CRYPTO("md5-ppc");
diff --git a/arch/powerpc/crypto/sha1-spe-asm.S b/arch/powerpc/crypto/sha1-spe-asm.S
new file mode 100644
index 000000000000..fcb6cf002889
--- /dev/null
+++ b/arch/powerpc/crypto/sha1-spe-asm.S
@@ -0,0 +1,299 @@
+/*
+ * Fast SHA-1 implementation for SPE instruction set (PPC)
+ *
+ * This code makes use of the SPE SIMD instruction set as defined in
+ * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
+ * Implementation is based on optimization guide notes from
+ * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+#define rHP	r3	/* pointer to hash value			*/
+#define rWP	r4	/* pointer to input				*/
+#define rKP	r5	/* pointer to constants				*/
+
+#define rW0	r14	/* 64 bit round words				*/
+#define rW1	r15
+#define rW2	r16
+#define rW3	r17
+#define rW4	r18
+#define rW5	r19
+#define rW6	r20
+#define rW7	r21
+
+#define rH0	r6	/* 32 bit hash values 				*/
+#define rH1	r7
+#define rH2	r8
+#define rH3	r9
+#define rH4	r10
+
+#define rT0	r22	/* 64 bit temporary				*/
+#define rT1	r0	/* 32 bit temporaries				*/
+#define rT2	r11
+#define rT3	r12
+
+#define rK	r23	/* 64 bit constant in volatile register		*/
+
+#define LOAD_K01
+
+#define LOAD_K11 \
+	evlwwsplat	rK,0(rKP);
+
+#define LOAD_K21 \
+	evlwwsplat	rK,4(rKP);
+
+#define LOAD_K31 \
+	evlwwsplat	rK,8(rKP);
+
+#define LOAD_K41 \
+	evlwwsplat	rK,12(rKP);
+
+#define INITIALIZE \
+	stwu		r1,-128(r1);	/* create stack frame		*/ \
+	evstdw		r14,8(r1);	/* We must save non volatile	*/ \
+	evstdw		r15,16(r1);	/* registers. Take the chance	*/ \
+	evstdw		r16,24(r1);	/* and save the SPE part too	*/ \
+	evstdw		r17,32(r1);					   \
+	evstdw		r18,40(r1);					   \
+	evstdw		r19,48(r1);					   \
+	evstdw		r20,56(r1);					   \
+	evstdw		r21,64(r1);					   \
+	evstdw		r22,72(r1);					   \
+	evstdw		r23,80(r1);
+
+
+#define FINALIZE \
+	evldw		r14,8(r1);	/* restore SPE registers	*/ \
+	evldw		r15,16(r1);					   \
+	evldw		r16,24(r1);					   \
+	evldw		r17,32(r1);					   \
+	evldw		r18,40(r1);					   \
+	evldw		r19,48(r1);					   \
+	evldw		r20,56(r1);					   \
+	evldw		r21,64(r1);					   \
+	evldw		r22,72(r1);					   \
+	evldw		r23,80(r1);					   \
+	xor		r0,r0,r0;					   \
+	stw		r0,8(r1);	/* Delete sensitive data	*/ \
+	stw		r0,16(r1);	/* that we might have pushed	*/ \
+	stw		r0,24(r1);	/* from other context that runs	*/ \
+	stw		r0,32(r1);	/* the same code. Assume that	*/ \
+	stw		r0,40(r1);	/* the lower part of the GPRs	*/ \
+	stw		r0,48(r1);	/* were already overwritten on	*/ \
+	stw		r0,56(r1);	/* the way down to here		*/ \
+	stw		r0,64(r1);					   \
+	stw		r0,72(r1);					   \
+	stw		r0,80(r1);					   \
+	addi		r1,r1,128;	/* cleanup stack frame		*/
+
+#ifdef __BIG_ENDIAN__
+#define LOAD_DATA(reg, off) \
+	lwz		reg,off(rWP);	/* load data			*/
+#define NEXT_BLOCK \
+	addi		rWP,rWP,64;	/* increment per block		*/
+#else
+#define LOAD_DATA(reg, off) \
+	lwbrx		reg,0,rWP;	/* load data			*/ \
+	addi		rWP,rWP,4;	/* increment per word		*/
+#define NEXT_BLOCK			/* nothing to do		*/
+#endif
+
+#define	R_00_15(a, b, c, d, e, w0, w1, k, off) \
+	LOAD_DATA(w0, off)		/* 1: W				*/ \
+	and		rT2,b,c;	/* 1: F' = B and C 		*/ \
+	LOAD_K##k##1							   \
+	andc		rT1,d,b;	/* 1: F" = ~B and D 		*/ \
+	rotrwi		rT0,a,27;	/* 1: A' = A rotl 5		*/ \
+	or		rT2,rT2,rT1;	/* 1: F = F' or F"		*/ \
+	add		e,e,rT0;	/* 1: E = E + A'		*/ \
+	rotrwi		b,b,2;		/* 1: B = B rotl 30		*/ \
+	add		e,e,w0;		/* 1: E = E + W			*/ \
+	LOAD_DATA(w1, off+4)		/* 2: W				*/ \
+	add		e,e,rT2;	/* 1: E = E + F			*/ \
+	and		rT1,a,b;	/* 2: F' = B and C 		*/ \
+	add		e,e,rK;		/* 1: E = E + K			*/ \
+	andc		rT2,c,a;	/* 2: F" = ~B and D 		*/ \
+	add		d,d,rK;		/* 2: E = E + K			*/ \
+	or		rT2,rT2,rT1;	/* 2: F = F' or F"		*/ \
+	rotrwi		rT0,e,27;	/* 2: A' = A rotl 5		*/ \
+	add		d,d,w1;		/* 2: E = E + W			*/ \
+	rotrwi		a,a,2;		/* 2: B = B rotl 30		*/ \
+	add		d,d,rT0;	/* 2: E = E + A'		*/ \
+	evmergelo	w1,w1,w0;	/*    mix W[0]/W[1]		*/ \
+	add		d,d,rT2		/* 2: E = E + F			*/
+
+#define R_16_19(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
+	and		rT2,b,c;	/* 1: F' = B and C 		*/ \
+	evmergelohi	rT0,w7,w6;	/*    W[-3]			*/ \
+	andc		rT1,d,b;	/* 1: F" = ~B and D 		*/ \
+	evxor		w0,w0,rT0;	/*    W = W[-16] xor W[-3]	*/ \
+	or		rT1,rT1,rT2;	/* 1: F = F' or F"		*/ \
+	evxor		w0,w0,w4;	/*    W = W xor W[-8]		*/ \
+	add		e,e,rT1;	/* 1: E = E + F			*/ \
+	evxor		w0,w0,w1;	/*    W = W xor W[-14]		*/ \
+	rotrwi		rT2,a,27;	/* 1: A' = A rotl 5		*/ \
+	evrlwi		w0,w0,1;	/*    W = W rotl 1		*/ \
+	add		e,e,rT2;	/* 1: E = E + A'		*/ \
+	evaddw		rT0,w0,rK;	/*    WK = W + K		*/ \
+	rotrwi		b,b,2;		/* 1: B = B rotl 30		*/ \
+	LOAD_K##k##1							   \
+	evmergehi	rT1,rT1,rT0;	/*    WK1/WK2			*/ \
+	add		e,e,rT0;	/* 1: E = E + WK		*/ \
+	add		d,d,rT1;	/* 2: E = E + WK		*/ \
+	and		rT2,a,b;	/* 2: F' = B and C 		*/ \
+	andc		rT1,c,a;	/* 2: F" = ~B and D 		*/ \
+	rotrwi		rT0,e,27;	/* 2: A' = A rotl 5		*/ \
+	or		rT1,rT1,rT2;	/* 2: F = F' or F"		*/ \
+	add		d,d,rT0;	/* 2: E = E + A'		*/ \
+	rotrwi		a,a,2;		/* 2: B = B rotl 30		*/ \
+	add		d,d,rT1		/* 2: E = E + F			*/
+
+#define R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
+	evmergelohi	rT0,w7,w6;	/*    W[-3]			*/ \
+	xor		rT2,b,c;	/* 1: F' = B xor C		*/ \
+	evxor		w0,w0,rT0;	/*    W = W[-16] xor W[-3]	*/ \
+	xor		rT2,rT2,d;	/* 1: F = F' xor D		*/ \
+	evxor		w0,w0,w4;	/*    W = W xor W[-8]		*/ \
+	add		e,e,rT2;	/* 1: E = E + F			*/ \
+	evxor		w0,w0,w1;	/*    W = W xor W[-14]		*/ \
+	rotrwi		rT2,a,27;	/* 1: A' = A rotl 5		*/ \
+	evrlwi		w0,w0,1;	/*    W = W rotl 1		*/ \
+	add		e,e,rT2;	/* 1: E = E + A'		*/ \
+	evaddw		rT0,w0,rK;	/*    WK = W + K		*/ \
+	rotrwi		b,b,2;		/* 1: B = B rotl 30		*/ \
+	LOAD_K##k##1							   \
+	evmergehi	rT1,rT1,rT0;	/*    WK1/WK2			*/ \
+	add		e,e,rT0;	/* 1: E = E + WK		*/ \
+	xor		rT2,a,b;	/* 2: F' = B xor C		*/ \
+	add		d,d,rT1;	/* 2: E = E + WK		*/ \
+	xor		rT2,rT2,c;	/* 2: F = F' xor D		*/ \
+	rotrwi		rT0,e,27;	/* 2: A' = A rotl 5		*/ \
+	add		d,d,rT2;	/* 2: E = E + F			*/ \
+	rotrwi		a,a,2;		/* 2: B = B rotl 30		*/ \
+	add		d,d,rT0		/* 2: E = E + A'		*/
+
+#define R_40_59(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
+	and		rT2,b,c;	/* 1: F' = B and C		*/ \
+	evmergelohi	rT0,w7,w6;	/*    W[-3]			*/ \
+	or		rT1,b,c;	/* 1: F" = B or C		*/ \
+	evxor		w0,w0,rT0;	/*    W = W[-16] xor W[-3]	*/ \
+	and		rT1,d,rT1;	/* 1: F" = F" and D		*/ \
+	evxor		w0,w0,w4;	/*    W = W xor W[-8]		*/ \
+	or		rT2,rT2,rT1;	/* 1: F = F' or F"		*/ \
+	evxor		w0,w0,w1;	/*    W = W xor W[-14]		*/ \
+	add		e,e,rT2;	/* 1: E = E + F			*/ \
+	evrlwi		w0,w0,1;	/*    W = W rotl 1		*/ \
+	rotrwi		rT2,a,27;	/* 1: A' = A rotl 5		*/ \
+	evaddw		rT0,w0,rK;	/*    WK = W + K		*/ \
+	add		e,e,rT2;	/* 1: E = E + A'		*/ \
+	LOAD_K##k##1							   \
+	evmergehi	rT1,rT1,rT0;	/*    WK1/WK2			*/ \
+	rotrwi		b,b,2;		/* 1: B = B rotl 30		*/ \
+	add		e,e,rT0;	/* 1: E = E + WK		*/ \
+	and		rT2,a,b;	/* 2: F' = B and C		*/ \
+	or		rT0,a,b;	/* 2: F" = B or C		*/ \
+	add		d,d,rT1;	/* 2: E = E + WK		*/ \
+	and		rT0,c,rT0;	/* 2: F" = F" and D		*/ \
+	rotrwi		a,a,2;		/* 2: B = B rotl 30		*/ \
+	or		rT2,rT2,rT0;	/* 2: F = F' or F"		*/ \
+	rotrwi		rT0,e,27;	/* 2: A' = A rotl 5		*/ \
+	add		d,d,rT2;	/* 2: E = E + F			*/ \
+	add		d,d,rT0		/* 2: E = E + A'		*/
+
+#define R_60_79(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
+	R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k)
+
+_GLOBAL(ppc_spe_sha1_transform)
+	INITIALIZE
+
+	lwz		rH0,0(rHP)
+	lwz		rH1,4(rHP)
+	mtctr		r5
+	lwz		rH2,8(rHP)
+	lis		rKP,PPC_SPE_SHA1_K@h
+	lwz		rH3,12(rHP)
+	ori		rKP,rKP,PPC_SPE_SHA1_K@l
+	lwz		rH4,16(rHP)
+
+ppc_spe_sha1_main:
+	R_00_15(rH0, rH1, rH2, rH3, rH4, rW1, rW0, 1, 0)
+	R_00_15(rH3, rH4, rH0, rH1, rH2, rW2, rW1, 0, 8)
+	R_00_15(rH1, rH2, rH3, rH4, rH0, rW3, rW2, 0, 16)
+	R_00_15(rH4, rH0, rH1, rH2, rH3, rW4, rW3, 0, 24)
+	R_00_15(rH2, rH3, rH4, rH0, rH1, rW5, rW4, 0, 32)
+	R_00_15(rH0, rH1, rH2, rH3, rH4, rW6, rW5, 0, 40)
+	R_00_15(rH3, rH4, rH0, rH1, rH2, rT3, rW6, 0, 48)
+	R_00_15(rH1, rH2, rH3, rH4, rH0, rT3, rW7, 0, 56)
+
+	R_16_19(rH4, rH0, rH1, rH2, rH3, rW0, rW1, rW4, rW6, rW7, 0)
+	R_16_19(rH2, rH3, rH4, rH0, rH1, rW1, rW2, rW5, rW7, rW0, 2)
+
+	R_20_39(rH0, rH1, rH2, rH3, rH4, rW2, rW3, rW6, rW0, rW1, 0)
+	R_20_39(rH3, rH4, rH0, rH1, rH2, rW3, rW4, rW7, rW1, rW2, 0)
+	R_20_39(rH1, rH2, rH3, rH4, rH0, rW4, rW5, rW0, rW2, rW3, 0)
+	R_20_39(rH4, rH0, rH1, rH2, rH3, rW5, rW6, rW1, rW3, rW4, 0)
+	R_20_39(rH2, rH3, rH4, rH0, rH1, rW6, rW7, rW2, rW4, rW5, 0)
+	R_20_39(rH0, rH1, rH2, rH3, rH4, rW7, rW0, rW3, rW5, rW6, 0)
+	R_20_39(rH3, rH4, rH0, rH1, rH2, rW0, rW1, rW4, rW6, rW7, 0)
+	R_20_39(rH1, rH2, rH3, rH4, rH0, rW1, rW2, rW5, rW7, rW0, 0)
+	R_20_39(rH4, rH0, rH1, rH2, rH3, rW2, rW3, rW6, rW0, rW1, 0)
+	R_20_39(rH2, rH3, rH4, rH0, rH1, rW3, rW4, rW7, rW1, rW2, 3)
+
+	R_40_59(rH0, rH1, rH2, rH3, rH4, rW4, rW5, rW0, rW2, rW3, 0)
+	R_40_59(rH3, rH4, rH0, rH1, rH2, rW5, rW6, rW1, rW3, rW4, 0)
+	R_40_59(rH1, rH2, rH3, rH4, rH0, rW6, rW7, rW2, rW4, rW5, 0)
+	R_40_59(rH4, rH0, rH1, rH2, rH3, rW7, rW0, rW3, rW5, rW6, 0)
+	R_40_59(rH2, rH3, rH4, rH0, rH1, rW0, rW1, rW4, rW6, rW7, 0)
+	R_40_59(rH0, rH1, rH2, rH3, rH4, rW1, rW2, rW5, rW7, rW0, 0)
+	R_40_59(rH3, rH4, rH0, rH1, rH2, rW2, rW3, rW6, rW0, rW1, 0)
+	R_40_59(rH1, rH2, rH3, rH4, rH0, rW3, rW4, rW7, rW1, rW2, 0)
+	R_40_59(rH4, rH0, rH1, rH2, rH3, rW4, rW5, rW0, rW2, rW3, 0)
+	R_40_59(rH2, rH3, rH4, rH0, rH1, rW5, rW6, rW1, rW3, rW4, 4)
+
+	R_60_79(rH0, rH1, rH2, rH3, rH4, rW6, rW7, rW2, rW4, rW5, 0)
+	R_60_79(rH3, rH4, rH0, rH1, rH2, rW7, rW0, rW3, rW5, rW6, 0)
+	R_60_79(rH1, rH2, rH3, rH4, rH0, rW0, rW1, rW4, rW6, rW7, 0)
+	R_60_79(rH4, rH0, rH1, rH2, rH3, rW1, rW2, rW5, rW7, rW0, 0)
+	R_60_79(rH2, rH3, rH4, rH0, rH1, rW2, rW3, rW6, rW0, rW1, 0)
+	R_60_79(rH0, rH1, rH2, rH3, rH4, rW3, rW4, rW7, rW1, rW2, 0)
+	R_60_79(rH3, rH4, rH0, rH1, rH2, rW4, rW5, rW0, rW2, rW3, 0)
+	lwz		rT3,0(rHP)
+	R_60_79(rH1, rH2, rH3, rH4, rH0, rW5, rW6, rW1, rW3, rW4, 0)
+	lwz		rW1,4(rHP)
+	R_60_79(rH4, rH0, rH1, rH2, rH3, rW6, rW7, rW2, rW4, rW5, 0)
+	lwz		rW2,8(rHP)
+	R_60_79(rH2, rH3, rH4, rH0, rH1, rW7, rW0, rW3, rW5, rW6, 0)
+	lwz		rW3,12(rHP)
+	NEXT_BLOCK
+	lwz		rW4,16(rHP)
+
+	add		rH0,rH0,rT3
+	stw		rH0,0(rHP)
+	add		rH1,rH1,rW1
+	stw		rH1,4(rHP)
+	add		rH2,rH2,rW2
+	stw		rH2,8(rHP)
+	add		rH3,rH3,rW3
+	stw		rH3,12(rHP)
+	add		rH4,rH4,rW4
+	stw		rH4,16(rHP)
+
+	bdnz		ppc_spe_sha1_main
+
+	FINALIZE
+	blr
+
+.data
+.align 4
+PPC_SPE_SHA1_K:
+	.long 0x5A827999,0x6ED9EBA1,0x8F1BBCDC,0xCA62C1D6
diff --git a/arch/powerpc/crypto/sha1-spe-glue.c b/arch/powerpc/crypto/sha1-spe-glue.c
new file mode 100644
index 000000000000..3e1d22212521
--- /dev/null
+++ b/arch/powerpc/crypto/sha1-spe-glue.c
@@ -0,0 +1,210 @@
+/*
+ * Glue code for SHA-1 implementation for SPE instructions (PPC)
+ *
+ * Based on generic implementation.
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/sha.h>
+#include <asm/byteorder.h>
+#include <asm/switch_to.h>
+#include <linux/hardirq.h>
+
+/*
+ * MAX_BYTES defines the number of bytes that are allowed to be processed
+ * between preempt_disable() and preempt_enable(). SHA1 takes ~1000
+ * operations per 64 bytes. e500 cores can issue two arithmetic instructions
+ * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2).
+ * Thus 2KB of input data will need an estimated maximum of 18,000 cycles.
+ * Headroom for cache misses included. Even with the low end model clocked
+ * at 667 MHz this equals to a critical time window of less than 27us.
+ *
+ */
+#define MAX_BYTES 2048
+
+extern void ppc_spe_sha1_transform(u32 *state, const u8 *src, u32 blocks);
+
+static void spe_begin(void)
+{
+	/* We just start SPE operations and will save SPE registers later. */
+	preempt_disable();
+	enable_kernel_spe();
+}
+
+static void spe_end(void)
+{
+	/* reenable preemption */
+	preempt_enable();
+}
+
+static inline void ppc_sha1_clear_context(struct sha1_state *sctx)
+{
+	int count = sizeof(struct sha1_state) >> 2;
+	u32 *ptr = (u32 *)sctx;
+
+	/* make sure we can clear the fast way */
+	BUILD_BUG_ON(sizeof(struct sha1_state) % 4);
+	do { *ptr++ = 0; } while (--count);
+}
+
+static int ppc_spe_sha1_init(struct shash_desc *desc)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	sctx->state[0] = SHA1_H0;
+	sctx->state[1] = SHA1_H1;
+	sctx->state[2] = SHA1_H2;
+	sctx->state[3] = SHA1_H3;
+	sctx->state[4] = SHA1_H4;
+	sctx->count = 0;
+
+	return 0;
+}
+
+static int ppc_spe_sha1_update(struct shash_desc *desc, const u8 *data,
+			unsigned int len)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	const unsigned int offset = sctx->count & 0x3f;
+	const unsigned int avail = 64 - offset;
+	unsigned int bytes;
+	const u8 *src = data;
+
+	if (avail > len) {
+		sctx->count += len;
+		memcpy((char *)sctx->buffer + offset, src, len);
+		return 0;
+	}
+
+	sctx->count += len;
+
+	if (offset) {
+		memcpy((char *)sctx->buffer + offset, src, avail);
+
+		spe_begin();
+		ppc_spe_sha1_transform(sctx->state, (const u8 *)sctx->buffer, 1);
+		spe_end();
+
+		len -= avail;
+		src += avail;
+	}
+
+	while (len > 63) {
+		bytes = (len > MAX_BYTES) ? MAX_BYTES : len;
+		bytes = bytes & ~0x3f;
+
+		spe_begin();
+		ppc_spe_sha1_transform(sctx->state, src, bytes >> 6);
+		spe_end();
+
+		src += bytes;
+		len -= bytes;
+	};
+
+	memcpy((char *)sctx->buffer, src, len);
+	return 0;
+}
+
+static int ppc_spe_sha1_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	const unsigned int offset = sctx->count & 0x3f;
+	char *p = (char *)sctx->buffer + offset;
+	int padlen;
+	__be64 *pbits = (__be64 *)(((char *)&sctx->buffer) + 56);
+	__be32 *dst = (__be32 *)out;
+
+	padlen = 55 - offset;
+	*p++ = 0x80;
+
+	spe_begin();
+
+	if (padlen < 0) {
+		memset(p, 0x00, padlen + sizeof (u64));
+		ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1);
+		p = (char *)sctx->buffer;
+		padlen = 56;
+	}
+
+	memset(p, 0, padlen);
+	*pbits = cpu_to_be64(sctx->count << 3);
+	ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1);
+
+	spe_end();
+
+	dst[0] = cpu_to_be32(sctx->state[0]);
+	dst[1] = cpu_to_be32(sctx->state[1]);
+	dst[2] = cpu_to_be32(sctx->state[2]);
+	dst[3] = cpu_to_be32(sctx->state[3]);
+	dst[4] = cpu_to_be32(sctx->state[4]);
+
+	ppc_sha1_clear_context(sctx);
+	return 0;
+}
+
+static int ppc_spe_sha1_export(struct shash_desc *desc, void *out)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(out, sctx, sizeof(*sctx));
+	return 0;
+}
+
+static int ppc_spe_sha1_import(struct shash_desc *desc, const void *in)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(sctx, in, sizeof(*sctx));
+	return 0;
+}
+
+static struct shash_alg alg = {
+	.digestsize	=	SHA1_DIGEST_SIZE,
+	.init		=	ppc_spe_sha1_init,
+	.update		=	ppc_spe_sha1_update,
+	.final		=	ppc_spe_sha1_final,
+	.export		=	ppc_spe_sha1_export,
+	.import		=	ppc_spe_sha1_import,
+	.descsize	=	sizeof(struct sha1_state),
+	.statesize	=	sizeof(struct sha1_state),
+	.base		=	{
+		.cra_name	=	"sha1",
+		.cra_driver_name=	"sha1-ppc-spe",
+		.cra_priority	=	300,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA1_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static int __init ppc_spe_sha1_mod_init(void)
+{
+	return crypto_register_shash(&alg);
+}
+
+static void __exit ppc_spe_sha1_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_init(ppc_spe_sha1_mod_init);
+module_exit(ppc_spe_sha1_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, SPE optimized");
+
+MODULE_ALIAS_CRYPTO("sha1");
+MODULE_ALIAS_CRYPTO("sha1-ppc-spe");
diff --git a/arch/powerpc/crypto/sha256-spe-asm.S b/arch/powerpc/crypto/sha256-spe-asm.S
new file mode 100644
index 000000000000..2d10e4c08f03
--- /dev/null
+++ b/arch/powerpc/crypto/sha256-spe-asm.S
@@ -0,0 +1,323 @@
+/*
+ * Fast SHA-256 implementation for SPE instruction set (PPC)
+ *
+ * This code makes use of the SPE SIMD instruction set as defined in
+ * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
+ * Implementation is based on optimization guide notes from
+ * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+#define rHP	r3	/* pointer to hash values in memory		*/
+#define rKP	r24	/* pointer to round constants			*/
+#define rWP	r4	/* pointer to input data			*/
+
+#define rH0	r5	/* 8 32 bit hash values in 8 registers		*/
+#define rH1	r6
+#define rH2	r7
+#define rH3	r8
+#define rH4	r9
+#define rH5	r10
+#define rH6	r11
+#define rH7	r12
+
+#define rW0	r14	/* 64 bit registers. 16 words in 8 registers	*/
+#define rW1	r15
+#define rW2	r16
+#define rW3	r17
+#define rW4	r18
+#define rW5	r19
+#define rW6	r20
+#define rW7	r21
+
+#define rT0	r22	/* 64 bit temporaries 				*/
+#define rT1	r23
+#define rT2	r0	/* 32 bit temporaries				*/
+#define rT3	r25
+
+#define CMP_KN_LOOP
+#define CMP_KC_LOOP \
+	cmpwi		rT1,0;
+
+#define INITIALIZE \
+	stwu		r1,-128(r1);	/* create stack frame		*/ \
+	evstdw		r14,8(r1);	/* We must save non volatile	*/ \
+	evstdw		r15,16(r1);	/* registers. Take the chance	*/ \
+	evstdw		r16,24(r1);	/* and save the SPE part too	*/ \
+	evstdw		r17,32(r1);					   \
+	evstdw		r18,40(r1);					   \
+	evstdw		r19,48(r1);					   \
+	evstdw		r20,56(r1);					   \
+	evstdw		r21,64(r1);					   \
+	evstdw		r22,72(r1);					   \
+	evstdw		r23,80(r1);					   \
+	stw		r24,88(r1);	/* save normal registers	*/ \
+	stw		r25,92(r1);
+
+
+#define FINALIZE \
+	evldw		r14,8(r1);	/* restore SPE registers	*/ \
+	evldw		r15,16(r1);					   \
+	evldw		r16,24(r1);					   \
+	evldw		r17,32(r1);					   \
+	evldw		r18,40(r1);					   \
+	evldw		r19,48(r1);					   \
+	evldw		r20,56(r1);					   \
+	evldw		r21,64(r1);					   \
+	evldw		r22,72(r1);					   \
+	evldw		r23,80(r1);					   \
+	lwz		r24,88(r1);	/* restore normal registers	*/ \
+	lwz		r25,92(r1);					   \
+	xor		r0,r0,r0;					   \
+	stw		r0,8(r1);	/* Delete sensitive data	*/ \
+	stw		r0,16(r1);	/* that we might have pushed	*/ \
+	stw		r0,24(r1);	/* from other context that runs	*/ \
+	stw		r0,32(r1);	/* the same code. Assume that	*/ \
+	stw		r0,40(r1);	/* the lower part of the GPRs	*/ \
+	stw		r0,48(r1);	/* was already overwritten on	*/ \
+	stw		r0,56(r1);	/* the way down to here		*/ \
+	stw		r0,64(r1);					   \
+	stw		r0,72(r1);					   \
+	stw		r0,80(r1);					   \
+	addi		r1,r1,128;	/* cleanup stack frame		*/
+
+#ifdef __BIG_ENDIAN__
+#define LOAD_DATA(reg, off) \
+	lwz		reg,off(rWP);	/* load data			*/
+#define NEXT_BLOCK \
+	addi		rWP,rWP,64;	/* increment per block		*/
+#else
+#define LOAD_DATA(reg, off) \
+	lwbrx		reg,0,rWP; 	/* load data			*/ \
+	addi		rWP,rWP,4;	/* increment per word		*/
+#define NEXT_BLOCK			/* nothing to do		*/
+#endif
+
+#define R_LOAD_W(a, b, c, d, e, f, g, h, w, off) \
+	LOAD_DATA(w, off)		/* 1: W				*/ \
+	rotrwi		rT0,e,6;	/* 1: S1 = e rotr 6		*/ \
+	rotrwi		rT1,e,11;	/* 1: S1' = e rotr 11		*/ \
+	rotrwi		rT2,e,25;	/* 1: S1" = e rotr 25		*/ \
+	xor		rT0,rT0,rT1;	/* 1: S1 = S1 xor S1'		*/ \
+	and		rT3,e,f;	/* 1: ch = e and f		*/ \
+	xor		rT0,rT0,rT2;	/* 1: S1 = S1 xor S1"		*/ \
+	andc		rT1,g,e;	/* 1: ch' = ~e and g		*/ \
+	lwz		rT2,off(rKP);	/* 1: K				*/ \
+	xor		rT3,rT3,rT1;	/* 1: ch = ch xor ch'		*/ \
+	add		h,h,rT0;	/* 1: temp1 = h + S1		*/ \
+	add		rT3,rT3,w;	/* 1: temp1' = ch + w		*/ \
+	rotrwi		rT0,a,2;	/* 1: S0 = a rotr 2		*/ \
+	add		h,h,rT3;	/* 1: temp1 = temp1 + temp1'	*/ \
+	rotrwi		rT1,a,13;	/* 1: S0' = a rotr 13		*/ \
+	add		h,h,rT2;	/* 1: temp1 = temp1 + K		*/ \
+	rotrwi		rT3,a,22;	/* 1: S0" = a rotr 22		*/ \
+	xor		rT0,rT0,rT1;	/* 1: S0 = S0 xor S0'		*/ \
+	add		d,d,h;		/* 1: d = d + temp1		*/ \
+	xor		rT3,rT0,rT3;	/* 1: S0 = S0 xor S0"		*/ \
+	evmergelo	w,w,w;		/*    shift W			*/ \
+	or		rT2,a,b;	/* 1: maj = a or b		*/ \
+	and		rT1,a,b;	/* 1: maj' = a and b		*/ \
+	and		rT2,rT2,c;	/* 1: maj = maj and c		*/ \
+	LOAD_DATA(w, off+4)		/* 2: W				*/ \
+	or		rT2,rT1,rT2;	/* 1: maj = maj or maj'		*/ \
+	rotrwi		rT0,d,6;	/* 2: S1 = e rotr 6		*/ \
+	add		rT3,rT3,rT2;	/* 1: temp2 = S0 + maj		*/ \
+	rotrwi		rT1,d,11;	/* 2: S1' = e rotr 11		*/ \
+	add		h,h,rT3;	/* 1: h = temp1 + temp2		*/ \
+	rotrwi		rT2,d,25;	/* 2: S1" = e rotr 25		*/ \
+	xor		rT0,rT0,rT1;	/* 2: S1 = S1 xor S1'		*/ \
+	and		rT3,d,e;	/* 2: ch = e and f		*/ \
+	xor		rT0,rT0,rT2;	/* 2: S1 = S1 xor S1"		*/ \
+	andc		rT1,f,d;	/* 2: ch' = ~e and g		*/ \
+	lwz		rT2,off+4(rKP);	/* 2: K				*/ \
+	xor		rT3,rT3,rT1;	/* 2: ch = ch xor ch'		*/ \
+	add		g,g,rT0;	/* 2: temp1 = h + S1		*/ \
+	add		rT3,rT3,w;	/* 2: temp1' = ch + w		*/ \
+	rotrwi		rT0,h,2;	/* 2: S0 = a rotr 2		*/ \
+	add		g,g,rT3;	/* 2: temp1 = temp1 + temp1'	*/ \
+	rotrwi		rT1,h,13;	/* 2: S0' = a rotr 13		*/ \
+	add		g,g,rT2;	/* 2: temp1 = temp1 + K		*/ \
+	rotrwi		rT3,h,22;	/* 2: S0" = a rotr 22		*/ \
+	xor		rT0,rT0,rT1;	/* 2: S0 = S0 xor S0'		*/ \
+	or		rT2,h,a;	/* 2: maj = a or b		*/ \
+	xor		rT3,rT0,rT3;	/* 2: S0 = S0 xor S0"		*/ \
+	and		rT1,h,a;	/* 2: maj' = a and b		*/ \
+	and		rT2,rT2,b;	/* 2: maj = maj and c		*/ \
+	add		c,c,g;		/* 2: d = d + temp1		*/ \
+	or		rT2,rT1,rT2;	/* 2: maj = maj or maj'		*/ \
+	add		rT3,rT3,rT2;	/* 2: temp2 = S0 + maj		*/ \
+	add		g,g,rT3		/* 2: h = temp1 + temp2		*/
+
+#define R_CALC_W(a, b, c, d, e, f, g, h, w0, w1, w4, w5, w7, k, off) \
+	rotrwi		rT2,e,6;	/* 1: S1 = e rotr 6		*/ \
+	evmergelohi	rT0,w0,w1;	/*    w[-15]			*/ \
+	rotrwi		rT3,e,11;	/* 1: S1' = e rotr 11		*/ \
+	evsrwiu		rT1,rT0,3;	/*    s0 = w[-15] >> 3		*/ \
+	xor		rT2,rT2,rT3;	/* 1: S1 = S1 xor S1'		*/ \
+	evrlwi		rT0,rT0,25;	/*    s0' = w[-15] rotr	7	*/ \
+	rotrwi		rT3,e,25;	/* 1: S1' = e rotr 25		*/ \
+	evxor		rT1,rT1,rT0;	/*    s0 = s0 xor s0'		*/ \
+	xor		rT2,rT2,rT3;	/* 1: S1 = S1 xor S1'		*/ \
+	evrlwi		rT0,rT0,21;	/*    s0' = w[-15] rotr 18	*/ \
+	add		h,h,rT2;	/* 1: temp1 = h + S1		*/ \
+	evxor		rT0,rT0,rT1;	/*    s0 = s0 xor s0'		*/ \
+	and		rT2,e,f;	/* 1: ch = e and f		*/ \
+	evaddw		w0,w0,rT0;	/*    w = w[-16] + s0		*/ \
+	andc		rT3,g,e;	/* 1: ch' = ~e and g		*/ \
+	evsrwiu		rT0,w7,10;	/*    s1 = w[-2] >> 10		*/ \
+	xor		rT2,rT2,rT3;	/* 1: ch = ch xor ch'		*/ \
+	evrlwi		rT1,w7,15;	/*    s1' = w[-2] rotr 17	*/ \
+	add		h,h,rT2;	/* 1: temp1 = temp1 + ch	*/ \
+	evxor		rT0,rT0,rT1;	/*    s1 = s1 xor s1'		*/ \
+	rotrwi		rT2,a,2;	/* 1: S0 = a rotr 2		*/ \
+	evrlwi		rT1,w7,13;	/*    s1' = w[-2] rotr 19	*/ \
+	rotrwi		rT3,a,13;	/* 1: S0' = a rotr 13		*/ \
+	evxor		rT0,rT0,rT1;	/*    s1 = s1 xor s1'		*/ \
+	xor		rT2,rT2,rT3;	/* 1: S0 = S0 xor S0'		*/ \
+	evldw		rT1,off(rKP);	/*    k				*/ \
+	rotrwi		rT3,a,22;	/* 1: S0' = a rotr 22		*/ \
+	evaddw		w0,w0,rT0;	/*    w = w + s1		*/ \
+	xor		rT2,rT2,rT3;	/* 1: S0 = S0 xor S0'		*/ \
+	evmergelohi	rT0,w4,w5;	/*    w[-7]			*/ \
+	and		rT3,a,b;	/* 1: maj = a and b		*/ \
+	evaddw		w0,w0,rT0;	/*    w = w + w[-7]		*/ \
+	CMP_K##k##_LOOP							   \
+	add		rT2,rT2,rT3;	/* 1: temp2 = S0 + maj		*/ \
+	evaddw		rT1,rT1,w0;	/*    wk = w + k		*/ \
+	xor		rT3,a,b;	/* 1: maj = a xor b		*/ \
+	evmergehi	rT0,rT1,rT1;	/*    wk1/wk2			*/ \
+	and		rT3,rT3,c;	/* 1: maj = maj and c		*/ \
+	add		h,h,rT0;	/* 1: temp1 = temp1 + wk	*/ \
+	add		rT2,rT2,rT3;	/* 1: temp2 = temp2 + maj	*/ \
+	add		g,g,rT1;	/* 2: temp1 = temp1 + wk	*/ \
+	add		d,d,h;		/* 1: d = d + temp1		*/ \
+	rotrwi		rT0,d,6;	/* 2: S1 = e rotr 6		*/ \
+	add		h,h,rT2;	/* 1: h = temp1 + temp2		*/ \
+	rotrwi		rT1,d,11;	/* 2: S1' = e rotr 11		*/ \
+	rotrwi		rT2,d,25;	/* 2: S" = e rotr 25		*/ \
+	xor		rT0,rT0,rT1;	/* 2: S1 = S1 xor S1'		*/ \
+	and		rT3,d,e;	/* 2: ch = e and f		*/ \
+	xor		rT0,rT0,rT2;	/* 2: S1 = S1 xor S1"		*/ \
+	andc		rT1,f,d;	/* 2: ch' = ~e and g		*/ \
+	add		g,g,rT0;	/* 2: temp1 = h + S1		*/ \
+	xor		rT3,rT3,rT1;	/* 2: ch = ch xor ch'		*/ \
+	rotrwi		rT0,h,2;	/* 2: S0 = a rotr 2		*/ \
+	add		g,g,rT3;	/* 2: temp1 = temp1 + ch	*/ \
+	rotrwi		rT1,h,13;	/* 2: S0' = a rotr 13		*/ \
+	rotrwi		rT3,h,22;	/* 2: S0" = a rotr 22		*/ \
+	xor		rT0,rT0,rT1;	/* 2: S0 = S0 xor S0'		*/ \
+	or		rT2,h,a;	/* 2: maj = a or b		*/ \
+	and		rT1,h,a;	/* 2: maj' = a and b		*/ \
+	and		rT2,rT2,b;	/* 2: maj = maj and c		*/ \
+	xor		rT3,rT0,rT3;	/* 2: S0 = S0 xor S0"		*/ \
+	or		rT2,rT1,rT2;	/* 2: maj = maj or maj'		*/ \
+	add		c,c,g;		/* 2: d = d + temp1		*/ \
+	add		rT3,rT3,rT2;	/* 2: temp2 = S0 + maj		*/ \
+	add		g,g,rT3		/* 2: h = temp1 + temp2		*/
+
+_GLOBAL(ppc_spe_sha256_transform)
+	INITIALIZE
+
+	mtctr		r5
+	lwz		rH0,0(rHP)
+	lwz		rH1,4(rHP)
+	lwz		rH2,8(rHP)
+	lwz		rH3,12(rHP)
+	lwz		rH4,16(rHP)
+	lwz		rH5,20(rHP)
+	lwz		rH6,24(rHP)
+	lwz		rH7,28(rHP)
+
+ppc_spe_sha256_main:
+	lis		rKP,PPC_SPE_SHA256_K@ha
+	addi		rKP,rKP,PPC_SPE_SHA256_K@l
+
+	R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW0, 0)
+	R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW1, 8)
+	R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW2, 16)
+	R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW3, 24)
+	R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW4, 32)
+	R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW5, 40)
+	R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW6, 48)
+	R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW7, 56)
+ppc_spe_sha256_16_rounds:
+	addi		rKP,rKP,64
+	R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
+		 rW0, rW1, rW4, rW5, rW7, N, 0)
+	R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
+		 rW1, rW2, rW5, rW6, rW0, N, 8)
+	R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
+		 rW2, rW3, rW6, rW7, rW1, N, 16)
+	R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
+		 rW3, rW4, rW7, rW0, rW2, N, 24)
+	R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
+		 rW4, rW5, rW0, rW1, rW3, N, 32)
+	R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
+		 rW5, rW6, rW1, rW2, rW4, N, 40)
+	R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
+		 rW6, rW7, rW2, rW3, rW5, N, 48)
+	R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
+		 rW7, rW0, rW3, rW4, rW6, C, 56)
+	bt		gt,ppc_spe_sha256_16_rounds
+
+	lwz		rW0,0(rHP)
+	NEXT_BLOCK
+	lwz		rW1,4(rHP)
+	lwz		rW2,8(rHP)
+	lwz		rW3,12(rHP)
+	lwz		rW4,16(rHP)
+	lwz		rW5,20(rHP)
+	lwz		rW6,24(rHP)
+	lwz		rW7,28(rHP)
+
+	add		rH0,rH0,rW0
+	stw		rH0,0(rHP)
+	add		rH1,rH1,rW1
+	stw		rH1,4(rHP)
+	add		rH2,rH2,rW2
+	stw		rH2,8(rHP)
+	add		rH3,rH3,rW3
+	stw		rH3,12(rHP)
+	add		rH4,rH4,rW4
+	stw		rH4,16(rHP)
+	add		rH5,rH5,rW5
+	stw		rH5,20(rHP)
+	add		rH6,rH6,rW6
+	stw		rH6,24(rHP)
+	add		rH7,rH7,rW7
+	stw		rH7,28(rHP)
+
+	bdnz		ppc_spe_sha256_main
+
+	FINALIZE
+	blr
+
+.data
+.align 5
+PPC_SPE_SHA256_K:
+	.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+	.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+	.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+	.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+	.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+	.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+	.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+	.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+	.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+	.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+	.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+	.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+	.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+	.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+	.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+	.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
diff --git a/arch/powerpc/crypto/sha256-spe-glue.c b/arch/powerpc/crypto/sha256-spe-glue.c
new file mode 100644
index 000000000000..f4a616fe1a82
--- /dev/null
+++ b/arch/powerpc/crypto/sha256-spe-glue.c
@@ -0,0 +1,275 @@
+/*
+ * Glue code for SHA-256 implementation for SPE instructions (PPC)
+ *
+ * Based on generic implementation. The assembler module takes care 
+ * about the SPE registers so it can run from interrupt context.
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/sha.h>
+#include <asm/byteorder.h>
+#include <asm/switch_to.h>
+#include <linux/hardirq.h>
+
+/*
+ * MAX_BYTES defines the number of bytes that are allowed to be processed
+ * between preempt_disable() and preempt_enable(). SHA256 takes ~2,000
+ * operations per 64 bytes. e500 cores can issue two arithmetic instructions
+ * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2).
+ * Thus 1KB of input data will need an estimated maximum of 18,000 cycles.
+ * Headroom for cache misses included. Even with the low end model clocked
+ * at 667 MHz this equals to a critical time window of less than 27us.
+ *
+ */
+#define MAX_BYTES 1024
+
+extern void ppc_spe_sha256_transform(u32 *state, const u8 *src, u32 blocks);
+
+static void spe_begin(void)
+{
+	/* We just start SPE operations and will save SPE registers later. */
+	preempt_disable();
+	enable_kernel_spe();
+}
+
+static void spe_end(void)
+{
+	/* reenable preemption */
+	preempt_enable();
+}
+
+static inline void ppc_sha256_clear_context(struct sha256_state *sctx)
+{
+	int count = sizeof(struct sha256_state) >> 2;
+	u32 *ptr = (u32 *)sctx;
+
+	/* make sure we can clear the fast way */
+	BUILD_BUG_ON(sizeof(struct sha256_state) % 4);
+	do { *ptr++ = 0; } while (--count);
+}
+
+static int ppc_spe_sha256_init(struct shash_desc *desc)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	sctx->state[0] = SHA256_H0;
+	sctx->state[1] = SHA256_H1;
+	sctx->state[2] = SHA256_H2;
+	sctx->state[3] = SHA256_H3;
+	sctx->state[4] = SHA256_H4;
+	sctx->state[5] = SHA256_H5;
+	sctx->state[6] = SHA256_H6;
+	sctx->state[7] = SHA256_H7;
+	sctx->count = 0;
+
+	return 0;
+}
+
+static int ppc_spe_sha224_init(struct shash_desc *desc)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	sctx->state[0] = SHA224_H0;
+	sctx->state[1] = SHA224_H1;
+	sctx->state[2] = SHA224_H2;
+	sctx->state[3] = SHA224_H3;
+	sctx->state[4] = SHA224_H4;
+	sctx->state[5] = SHA224_H5;
+	sctx->state[6] = SHA224_H6;
+	sctx->state[7] = SHA224_H7;
+	sctx->count = 0;
+
+	return 0;
+}
+
+static int ppc_spe_sha256_update(struct shash_desc *desc, const u8 *data,
+			unsigned int len)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	const unsigned int offset = sctx->count & 0x3f;
+	const unsigned int avail = 64 - offset;
+	unsigned int bytes;
+	const u8 *src = data;
+
+	if (avail > len) {
+		sctx->count += len;
+		memcpy((char *)sctx->buf + offset, src, len);
+		return 0;
+	}
+
+	sctx->count += len;
+
+	if (offset) {
+		memcpy((char *)sctx->buf + offset, src, avail);
+
+		spe_begin();
+		ppc_spe_sha256_transform(sctx->state, (const u8 *)sctx->buf, 1);
+		spe_end();
+
+		len -= avail;
+		src += avail;
+	}
+
+	while (len > 63) {
+		/* cut input data into smaller blocks */
+		bytes = (len > MAX_BYTES) ? MAX_BYTES : len;
+		bytes = bytes & ~0x3f;
+
+		spe_begin();
+		ppc_spe_sha256_transform(sctx->state, src, bytes >> 6);
+		spe_end();
+
+		src += bytes;
+		len -= bytes;
+	};
+
+	memcpy((char *)sctx->buf, src, len);
+	return 0;
+}
+
+static int ppc_spe_sha256_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	const unsigned int offset = sctx->count & 0x3f;
+	char *p = (char *)sctx->buf + offset;
+	int padlen;
+	__be64 *pbits = (__be64 *)(((char *)&sctx->buf) + 56);
+	__be32 *dst = (__be32 *)out;
+
+	padlen = 55 - offset;
+	*p++ = 0x80;
+
+	spe_begin();
+
+	if (padlen < 0) {
+		memset(p, 0x00, padlen + sizeof (u64));
+		ppc_spe_sha256_transform(sctx->state, sctx->buf, 1);
+		p = (char *)sctx->buf;
+		padlen = 56;
+	}
+
+	memset(p, 0, padlen);
+	*pbits = cpu_to_be64(sctx->count << 3);
+	ppc_spe_sha256_transform(sctx->state, sctx->buf, 1);
+
+	spe_end();
+
+	dst[0] = cpu_to_be32(sctx->state[0]);
+	dst[1] = cpu_to_be32(sctx->state[1]);
+	dst[2] = cpu_to_be32(sctx->state[2]);
+	dst[3] = cpu_to_be32(sctx->state[3]);
+	dst[4] = cpu_to_be32(sctx->state[4]);
+	dst[5] = cpu_to_be32(sctx->state[5]);
+	dst[6] = cpu_to_be32(sctx->state[6]);
+	dst[7] = cpu_to_be32(sctx->state[7]);
+
+	ppc_sha256_clear_context(sctx);
+	return 0;
+}
+
+static int ppc_spe_sha224_final(struct shash_desc *desc, u8 *out)
+{
+	u32 D[SHA256_DIGEST_SIZE >> 2];
+	__be32 *dst = (__be32 *)out;
+
+	ppc_spe_sha256_final(desc, (u8 *)D);
+
+	/* avoid bytewise memcpy */
+	dst[0] = D[0];
+	dst[1] = D[1];
+	dst[2] = D[2];
+	dst[3] = D[3];
+	dst[4] = D[4];
+	dst[5] = D[5];
+	dst[6] = D[6];
+
+	/* clear sensitive data */
+	memzero_explicit(D, SHA256_DIGEST_SIZE);
+	return 0;
+}
+
+static int ppc_spe_sha256_export(struct shash_desc *desc, void *out)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(out, sctx, sizeof(*sctx));
+	return 0;
+}
+
+static int ppc_spe_sha256_import(struct shash_desc *desc, const void *in)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(sctx, in, sizeof(*sctx));
+	return 0;
+}
+
+static struct shash_alg algs[2] = { {
+	.digestsize	=	SHA256_DIGEST_SIZE,
+	.init		=	ppc_spe_sha256_init,
+	.update		=	ppc_spe_sha256_update,
+	.final		=	ppc_spe_sha256_final,
+	.export		=	ppc_spe_sha256_export,
+	.import		=	ppc_spe_sha256_import,
+	.descsize	=	sizeof(struct sha256_state),
+	.statesize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha256",
+		.cra_driver_name=	"sha256-ppc-spe",
+		.cra_priority	=	300,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA256_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+}, {
+	.digestsize	=	SHA224_DIGEST_SIZE,
+	.init		=	ppc_spe_sha224_init,
+	.update		=	ppc_spe_sha256_update,
+	.final		=	ppc_spe_sha224_final,
+	.export		=	ppc_spe_sha256_export,
+	.import		=	ppc_spe_sha256_import,
+	.descsize	=	sizeof(struct sha256_state),
+	.statesize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha224",
+		.cra_driver_name=	"sha224-ppc-spe",
+		.cra_priority	=	300,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA224_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+} };
+
+static int __init ppc_spe_sha256_mod_init(void)
+{
+	return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit ppc_spe_sha256_mod_fini(void)
+{
+	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_init(ppc_spe_sha256_mod_init);
+module_exit(ppc_spe_sha256_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, SPE optimized");
+
+MODULE_ALIAS_CRYPTO("sha224");
+MODULE_ALIAS_CRYPTO("sha224-ppc-spe");
+MODULE_ALIAS_CRYPTO("sha256");
+MODULE_ALIAS_CRYPTO("sha256-ppc-spe");
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 382b28e364dc..4b87205c230c 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -1,6 +1,8 @@
-
 generic-y += clkdev.h
+generic-y += div64.h
+generic-y += irq_regs.h
 generic-y += irq_work.h
+generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += rwsem.h
diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h
index 21be8ae8f809..dc85dcb891cf 100644
--- a/arch/powerpc/include/asm/asm-compat.h
+++ b/arch/powerpc/include/asm/asm-compat.h
@@ -23,6 +23,8 @@
 #define PPC_STL		stringify_in_c(std)
 #define PPC_STLU	stringify_in_c(stdu)
 #define PPC_LCMPI	stringify_in_c(cmpdi)
+#define PPC_LCMPLI	stringify_in_c(cmpldi)
+#define PPC_LCMP	stringify_in_c(cmpd)
 #define PPC_LONG	stringify_in_c(.llong)
 #define PPC_LONG_ALIGN	stringify_in_c(.balign 8)
 #define PPC_TLNEI	stringify_in_c(tdnei)
@@ -52,6 +54,8 @@
 #define PPC_STL		stringify_in_c(stw)
 #define PPC_STLU	stringify_in_c(stwu)
 #define PPC_LCMPI	stringify_in_c(cmpwi)
+#define PPC_LCMPLI	stringify_in_c(cmplwi)
+#define PPC_LCMP	stringify_in_c(cmpw)
 #define PPC_LONG	stringify_in_c(.long)
 #define PPC_LONG_ALIGN	stringify_in_c(.balign 4)
 #define PPC_TLNEI	stringify_in_c(twnei)
diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 34a05a1a990b..0dc42c5082b7 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -76,9 +76,6 @@ extern void _set_L3CR(unsigned long);
 #define _set_L3CR(val)	do { } while(0)
 #endif
 
-extern void cacheable_memzero(void *p, unsigned int nb);
-extern void *cacheable_memcpy(void *, const void *, unsigned int);
-
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_CACHE_H */
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 5cf5a6d10685..6367b8347dad 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -100,7 +100,7 @@ struct cpu_spec {
 	/*
 	 * Processor specific routine to flush tlbs.
 	 */
-	void		(*flush_tlb)(unsigned long inval_selector);
+	void		(*flush_tlb)(unsigned int action);
 
 };
 
@@ -114,6 +114,12 @@ extern void do_feature_fixups(unsigned long value, void *fixup_start,
 
 extern const char *powerpc_base_platform;
 
+/* TLB flush actions. Used as argument to cpu_spec.flush_tlb() hook */
+enum {
+	TLB_INVAL_SCOPE_GLOBAL = 0,	/* invalidate all TLBs */
+	TLB_INVAL_SCOPE_LPID = 1,	/* invalidate TLBs for current LPID */
+};
+
 #endif /* __ASSEMBLY__ */
 
 /* CPU kernel features */
diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index 2bf8e9307be9..5be6c4753667 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -25,7 +25,7 @@ extern cpumask_t threads_core_mask;
 #define threads_per_core	1
 #define threads_per_subcore	1
 #define threads_shift		0
-#define threads_core_mask	(CPU_MASK_CPU0)
+#define threads_core_mask	(*get_cpu_mask(0))
 #endif
 
 /* cpu_thread_mask_to_cores - Return a cpumask of one per cores
@@ -55,7 +55,7 @@ static inline cpumask_t cpu_thread_mask_to_cores(const struct cpumask *threads)
 
 static inline int cpu_nr_cores(void)
 {
-	return NR_CPUS >> threads_shift;
+	return nr_cpu_ids >> threads_shift;
 }
 
 static inline cpumask_t cpu_online_cores_map(void)
diff --git a/arch/powerpc/include/asm/dbdma.h b/arch/powerpc/include/asm/dbdma.h
index e23f07e73cb3..6c69836b4ec2 100644
--- a/arch/powerpc/include/asm/dbdma.h
+++ b/arch/powerpc/include/asm/dbdma.h
@@ -42,12 +42,12 @@ struct dbdma_regs {
  * DBDMA command structure.  These fields are all little-endian!
  */
 struct dbdma_cmd {
-    unsigned short req_count;	/* requested byte transfer count */
-    unsigned short command;	/* command word (has bit-fields) */
-    unsigned int   phy_addr;	/* physical data address */
-    unsigned int   cmd_dep;	/* command-dependent field */
-    unsigned short res_count;	/* residual count after completion */
-    unsigned short xfer_status;	/* transfer status */
+	__le16 req_count;	/* requested byte transfer count */
+	__le16 command;		/* command word (has bit-fields) */
+	__le32 phy_addr;	/* physical data address */
+	__le32 cmd_dep;		/* command-dependent field */
+	__le16 res_count;	/* residual count after completion */
+	__le16 xfer_status;	/* transfer status */
 };
 
 /* DBDMA command values in command field */
diff --git a/arch/powerpc/include/asm/dcr-native.h b/arch/powerpc/include/asm/dcr-native.h
index 7d2e6235726d..4efc11dacb98 100644
--- a/arch/powerpc/include/asm/dcr-native.h
+++ b/arch/powerpc/include/asm/dcr-native.h
@@ -31,7 +31,7 @@ typedef struct {
 
 static inline bool dcr_map_ok_native(dcr_host_native_t host)
 {
-	return 1;
+	return true;
 }
 
 #define dcr_map_native(dev, dcr_n, dcr_c) \
diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h
index 38faeded7d59..9f1371bab5fc 100644
--- a/arch/powerpc/include/asm/device.h
+++ b/arch/powerpc/include/asm/device.h
@@ -8,6 +8,9 @@
 
 struct dma_map_ops;
 struct device_node;
+#ifdef CONFIG_PPC64
+struct pci_dn;
+#endif
 
 /*
  * Arch extensions to struct device.
@@ -34,6 +37,9 @@ struct dev_archdata {
 #ifdef CONFIG_SWIOTLB
 	dma_addr_t		max_direct_dma_addr;
 #endif
+#ifdef CONFIG_PPC64
+	struct pci_dn		*pci_data;
+#endif
 #ifdef CONFIG_EEH
 	struct eeh_dev		*edev;
 #endif
diff --git a/arch/powerpc/include/asm/div64.h b/arch/powerpc/include/asm/div64.h
deleted file mode 100644
index 6cd978cefb28..000000000000
--- a/arch/powerpc/include/asm/div64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/div64.h>
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index 894d538f3567..9103687b0436 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -191,11 +191,11 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 	struct dev_archdata *sd = &dev->archdata;
 
 	if (sd->max_direct_dma_addr && addr + size > sd->max_direct_dma_addr)
-		return 0;
+		return false;
 #endif
 
 	if (!dev->dma_mask)
-		return 0;
+		return false;
 
 	return addr + size - 1 <= *dev->dma_mask;
 }
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 55abfd09e47f..a52db28ecc1e 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -29,7 +29,7 @@
 
 struct pci_dev;
 struct pci_bus;
-struct device_node;
+struct pci_dn;
 
 #ifdef CONFIG_EEH
 
@@ -136,14 +136,14 @@ struct eeh_dev {
 	struct eeh_pe *pe;		/* Associated PE		*/
 	struct list_head list;		/* Form link list in the PE	*/
 	struct pci_controller *phb;	/* Associated PHB		*/
-	struct device_node *dn;		/* Associated device node	*/
+	struct pci_dn *pdn;		/* Associated PCI device node	*/
 	struct pci_dev *pdev;		/* Associated PCI device	*/
 	struct pci_bus *bus;		/* PCI bus for partial hotplug	*/
 };
 
-static inline struct device_node *eeh_dev_to_of_node(struct eeh_dev *edev)
+static inline struct pci_dn *eeh_dev_to_pdn(struct eeh_dev *edev)
 {
-	return edev ? edev->dn : NULL;
+	return edev ? edev->pdn : NULL;
 }
 
 static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev)
@@ -200,8 +200,7 @@ struct eeh_ops {
 	char *name;
 	int (*init)(void);
 	int (*post_init)(void);
-	void* (*of_probe)(struct device_node *dn, void *flag);
-	int (*dev_probe)(struct pci_dev *dev, void *flag);
+	void* (*probe)(struct pci_dn *pdn, void *data);
 	int (*set_option)(struct eeh_pe *pe, int option);
 	int (*get_pe_addr)(struct eeh_pe *pe);
 	int (*get_state)(struct eeh_pe *pe, int *state);
@@ -211,10 +210,10 @@ struct eeh_ops {
 	int (*configure_bridge)(struct eeh_pe *pe);
 	int (*err_inject)(struct eeh_pe *pe, int type, int func,
 			  unsigned long addr, unsigned long mask);
-	int (*read_config)(struct device_node *dn, int where, int size, u32 *val);
-	int (*write_config)(struct device_node *dn, int where, int size, u32 val);
+	int (*read_config)(struct pci_dn *pdn, int where, int size, u32 *val);
+	int (*write_config)(struct pci_dn *pdn, int where, int size, u32 val);
 	int (*next_error)(struct eeh_pe **pe);
-	int (*restore_config)(struct device_node *dn);
+	int (*restore_config)(struct pci_dn *pdn);
 };
 
 extern int eeh_subsystem_flags;
@@ -272,7 +271,7 @@ void eeh_pe_restore_bars(struct eeh_pe *pe);
 const char *eeh_pe_loc_get(struct eeh_pe *pe);
 struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
 
-void *eeh_dev_init(struct device_node *dn, void *data);
+void *eeh_dev_init(struct pci_dn *pdn, void *data);
 void eeh_dev_phb_init_dynamic(struct pci_controller *phb);
 int eeh_init(void);
 int __init eeh_ops_register(struct eeh_ops *ops);
@@ -280,8 +279,8 @@ int __exit eeh_ops_unregister(const char *name);
 int eeh_check_failure(const volatile void __iomem *token);
 int eeh_dev_check_failure(struct eeh_dev *edev);
 void eeh_addr_cache_build(void);
-void eeh_add_device_early(struct device_node *);
-void eeh_add_device_tree_early(struct device_node *);
+void eeh_add_device_early(struct pci_dn *);
+void eeh_add_device_tree_early(struct pci_dn *);
 void eeh_add_device_late(struct pci_dev *);
 void eeh_add_device_tree_late(struct pci_bus *);
 void eeh_add_sysfs_files(struct pci_bus *);
@@ -323,7 +322,7 @@ static inline int eeh_init(void)
 	return 0;
 }
 
-static inline void *eeh_dev_init(struct device_node *dn, void *data)
+static inline void *eeh_dev_init(struct pci_dn *pdn, void *data)
 {
 	return NULL;
 }
@@ -339,9 +338,9 @@ static inline int eeh_check_failure(const volatile void __iomem *token)
 
 static inline void eeh_addr_cache_build(void) { }
 
-static inline void eeh_add_device_early(struct device_node *dn) { }
+static inline void eeh_add_device_early(struct pci_dn *pdn) { }
 
-static inline void eeh_add_device_tree_early(struct device_node *dn) { }
+static inline void eeh_add_device_tree_early(struct pci_dn *pdn) { }
 
 static inline void eeh_add_device_late(struct pci_dev *dev) { }
 
diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index 57d289acb803..ee46ffef608e 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -128,10 +128,6 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
 	(0x7ff >> (PAGE_SHIFT - 12)) : \
 	(0x3ffff >> (PAGE_SHIFT - 12)))
 
-extern unsigned long arch_randomize_brk(struct mm_struct *mm);
-#define arch_randomize_brk arch_randomize_brk
-
-
 #ifdef CONFIG_SPU_BASE
 /* Notes used in ET_CORE. Note name is "SPU/<fd>/<filename>". */
 #define NT_SPU		1
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 681bc0314b6b..e05808a328db 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -42,7 +42,7 @@
 #define FW_FEATURE_SPLPAR	ASM_CONST(0x0000000000100000)
 #define FW_FEATURE_LPAR		ASM_CONST(0x0000000000400000)
 #define FW_FEATURE_PS3_LV1	ASM_CONST(0x0000000000800000)
-#define FW_FEATURE_BEAT		ASM_CONST(0x0000000001000000)
+/* Free				ASM_CONST(0x0000000001000000) */
 #define FW_FEATURE_CMO		ASM_CONST(0x0000000002000000)
 #define FW_FEATURE_VPHN		ASM_CONST(0x0000000004000000)
 #define FW_FEATURE_XCMO		ASM_CONST(0x0000000008000000)
@@ -75,8 +75,6 @@ enum {
 	FW_FEATURE_POWERNV_ALWAYS = 0,
 	FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
 	FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
-	FW_FEATURE_CELLEB_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_BEAT,
-	FW_FEATURE_CELLEB_ALWAYS = 0,
 	FW_FEATURE_NATIVE_POSSIBLE = 0,
 	FW_FEATURE_NATIVE_ALWAYS = 0,
 	FW_FEATURE_POSSIBLE =
@@ -89,9 +87,6 @@ enum {
 #ifdef CONFIG_PPC_PS3
 		FW_FEATURE_PS3_POSSIBLE |
 #endif
-#ifdef CONFIG_PPC_CELLEB
-		FW_FEATURE_CELLEB_POSSIBLE |
-#endif
 #ifdef CONFIG_PPC_NATIVE
 		FW_FEATURE_NATIVE_ALWAYS |
 #endif
@@ -106,9 +101,6 @@ enum {
 #ifdef CONFIG_PPC_PS3
 		FW_FEATURE_PS3_ALWAYS &
 #endif
-#ifdef CONFIG_PPC_CELLEB
-		FW_FEATURE_CELLEB_ALWAYS &
-#endif
 #ifdef CONFIG_PPC_NATIVE
 		FW_FEATURE_NATIVE_ALWAYS &
 #endif
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index f1ea5972f6ec..1e27d6338565 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -29,6 +29,7 @@
 #include <linux/bitops.h>
 #include <asm/machdep.h>
 #include <asm/types.h>
+#include <asm/pci-bridge.h>
 
 #define IOMMU_PAGE_SHIFT_4K      12
 #define IOMMU_PAGE_SIZE_4K       (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K)
@@ -78,6 +79,9 @@ struct iommu_table {
 	struct iommu_group *it_group;
 #endif
 	void (*set_bypass)(struct iommu_table *tbl, bool enable);
+#ifdef CONFIG_PPC_POWERNV
+	void           *data;
+#endif
 };
 
 /* Pure 2^n version of get_order */
@@ -169,7 +173,7 @@ extern void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle,
 			     struct dma_attrs *attrs);
 
 extern void iommu_init_early_pSeries(void);
-extern void iommu_init_early_dart(void);
+extern void iommu_init_early_dart(struct pci_controller_ops *controller_ops);
 extern void iommu_init_early_pasemi(void);
 
 extern void alloc_dart_table(void);
diff --git a/arch/powerpc/include/asm/irq_regs.h b/arch/powerpc/include/asm/irq_regs.h
deleted file mode 100644
index ba94b51a0a70..000000000000
--- a/arch/powerpc/include/asm/irq_regs.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#include <asm-generic/irq_regs.h>
-
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 942c7b1678e3..993090422690 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -106,10 +106,6 @@ struct kvmppc_vcpu_book3s {
 	spinlock_t mmu_lock;
 };
 
-#define CONTEXT_HOST		0
-#define CONTEXT_GUEST		1
-#define CONTEXT_GUEST_END	2
-
 #define VSID_REAL	0x07ffffffffc00000ULL
 #define VSID_BAT	0x07ffffffffb00000ULL
 #define VSID_64K	0x0800000000000000ULL
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 2d81e202bdcc..14619a59ec09 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -290,11 +290,11 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing,
 	pte_t old_pte, new_pte = __pte(0);
 
 	while (1) {
-		old_pte = pte_val(*ptep);
+		old_pte = *ptep;
 		/*
 		 * wait until _PAGE_BUSY is clear then set it atomically
 		 */
-		if (unlikely(old_pte & _PAGE_BUSY)) {
+		if (unlikely(pte_val(old_pte) & _PAGE_BUSY)) {
 			cpu_relax();
 			continue;
 		}
@@ -305,16 +305,18 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing,
 			return __pte(0);
 #endif
 		/* If pte is not present return None */
-		if (unlikely(!(old_pte & _PAGE_PRESENT)))
+		if (unlikely(!(pte_val(old_pte) & _PAGE_PRESENT)))
 			return __pte(0);
 
 		new_pte = pte_mkyoung(old_pte);
 		if (writing && pte_write(old_pte))
 			new_pte = pte_mkdirty(new_pte);
 
-		if (old_pte == __cmpxchg_u64((unsigned long *)ptep, old_pte,
-					     new_pte))
+		if (pte_val(old_pte) == __cmpxchg_u64((unsigned long *)ptep,
+						      pte_val(old_pte),
+						      pte_val(new_pte))) {
 			break;
+		}
 	}
 	return new_pte;
 }
@@ -335,7 +337,7 @@ static inline bool hpte_read_permission(unsigned long pp, unsigned long key)
 {
 	if (key)
 		return PP_RWRX <= pp && pp <= PP_RXRX;
-	return 1;
+	return true;
 }
 
 static inline bool hpte_write_permission(unsigned long pp, unsigned long key)
@@ -373,7 +375,7 @@ static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
 	unsigned long mask = (pagesize >> PAGE_SHIFT) - 1;
 
 	if (pagesize <= PAGE_SIZE)
-		return 1;
+		return true;
 	return !(memslot->base_gfn & mask) && !(memslot->npages & mask);
 }
 
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 8ef05121d3cd..c610961720c7 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -585,7 +585,7 @@ struct kvm_vcpu_arch {
 	pgd_t *pgdir;
 
 	u8 io_gpr; /* GPR used as IO source/target */
-	u8 mmio_is_bigendian;
+	u8 mmio_host_swabbed;
 	u8 mmio_sign_extend;
 	u8 osi_needed;
 	u8 osi_enabled;
diff --git a/arch/powerpc/include/asm/local64.h b/arch/powerpc/include/asm/local64.h
deleted file mode 100644
index 36c93b5cc239..000000000000
--- a/arch/powerpc/include/asm/local64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local64.h>
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index c8175a3fe560..ef8899432ae7 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -103,9 +103,6 @@ struct machdep_calls {
 #endif
 #endif /* CONFIG_PPC64 */
 
-	void		(*pci_dma_dev_setup)(struct pci_dev *dev);
-	void		(*pci_dma_bus_setup)(struct pci_bus *bus);
-
 	/* Platform set_dma_mask and dma_get_required_mask overrides */
 	int		(*dma_set_mask)(struct device *dev, u64 dma_mask);
 	u64		(*dma_get_required_mask)(struct device *dev);
@@ -125,9 +122,8 @@ struct machdep_calls {
 	unsigned int	(*get_irq)(void);
 
 	/* PCI stuff */
-	/* Called after scanning the bus, before allocating resources */
+	/* Called after allocating resources */
 	void		(*pcibios_fixup)(void);
-	int		(*pci_probe_mode)(struct pci_bus *);
 	void		(*pci_irq_fixup)(struct pci_dev *dev);
 	int		(*pcibios_root_bridge_prepare)(struct pci_host_bridge
 				*bridge);
@@ -237,18 +233,13 @@ struct machdep_calls {
 	/* Called for each PCI bus in the system when it's probed */
 	void (*pcibios_fixup_bus)(struct pci_bus *);
 
-	/* Called when pci_enable_device() is called. Returns 0 to
-	 * allow assignment/enabling of the device. */
-	int  (*pcibios_enable_device_hook)(struct pci_dev *);
-
 	/* Called after scan and before resource survey */
 	void (*pcibios_fixup_phb)(struct pci_controller *hose);
 
-	/* Called during PCI resource reassignment */
-	resource_size_t (*pcibios_window_alignment)(struct pci_bus *, unsigned long type);
-
-	/* Reset the secondary bus of bridge */
-	void  (*pcibios_reset_secondary_bus)(struct pci_dev *dev);
+#ifdef CONFIG_PCI_IOV
+	void (*pcibios_fixup_sriov)(struct pci_dev *pdev);
+	resource_size_t (*pcibios_iov_resource_alignment)(struct pci_dev *, int resno);
+#endif /* CONFIG_PCI_IOV */
 
 	/* Called to shutdown machine specific hardware not already controlled
 	 * by other drivers.
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 4f13c3ed7acf..1da6a81ce541 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -112,6 +112,7 @@
 #define TLBIEL_INVAL_SET_SHIFT	12
 
 #define POWER7_TLB_SETS		128	/* # sets in POWER7 TLB */
+#define POWER8_TLB_SETS		512	/* # sets in POWER8 TLB */
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/powerpc/include/asm/mpc85xx.h b/arch/powerpc/include/asm/mpc85xx.h
index 3bef74a9914b..213f3a81593d 100644
--- a/arch/powerpc/include/asm/mpc85xx.h
+++ b/arch/powerpc/include/asm/mpc85xx.h
@@ -61,6 +61,7 @@
 #define SVR_T4240	0x824000
 #define SVR_T4120	0x824001
 #define SVR_T4160	0x824100
+#define SVR_T4080	0x824102
 #define SVR_C291	0x850000
 #define SVR_C292	0x850020
 #define SVR_C293	0x850030
diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h
index 754f93d208fa..98697611e7b3 100644
--- a/arch/powerpc/include/asm/mpic.h
+++ b/arch/powerpc/include/asm/mpic.h
@@ -34,10 +34,6 @@
 #define		MPIC_GREG_GCONF_BASE_MASK		0x000fffff
 #define		MPIC_GREG_GCONF_MCK			0x08000000
 #define MPIC_GREG_GLOBAL_CONF_1		0x00030
-#define		MPIC_GREG_GLOBAL_CONF_1_SIE		0x08000000
-#define		MPIC_GREG_GLOBAL_CONF_1_CLK_RATIO_MASK	0x70000000
-#define		MPIC_GREG_GLOBAL_CONF_1_CLK_RATIO(r)	\
-			(((r) << 28) & MPIC_GREG_GLOBAL_CONF_1_CLK_RATIO_MASK)
 #define MPIC_GREG_VENDOR_0		0x00040
 #define MPIC_GREG_VENDOR_1		0x00050
 #define MPIC_GREG_VENDOR_2		0x00060
@@ -396,14 +392,7 @@ extern struct bus_type mpic_subsys;
 #define	MPIC_REGSET_TSI108		MPIC_REGSET(1)	/* Tsi108/109 PIC */
 
 /* Get the version of primary MPIC */
-#ifdef CONFIG_MPIC
 extern u32 fsl_mpic_primary_get_version(void);
-#else
-static inline u32 fsl_mpic_primary_get_version(void)
-{
-	return 0;
-}
-#endif
 
 /* Allocate the controller structure and setup the linux irq descs
  * for the range if interrupts passed in. No HW initialization is
@@ -496,11 +485,5 @@ extern unsigned int mpic_get_coreint_irq(void);
 /* Fetch Machine Check interrupt from primary mpic */
 extern unsigned int mpic_get_mcirq(void);
 
-/* Set the EPIC clock ratio */
-void mpic_set_clk_ratio(struct mpic *mpic, u32 clock_ratio);
-
-/* Enable/Disable EPIC serial interrupt mode */
-void mpic_set_serial_int(struct mpic *mpic, int enable);
-
 #endif /* __KERNEL__ */
 #endif	/* _ASM_POWERPC_MPIC_H */
diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h
new file mode 100644
index 000000000000..ff1ccb375e60
--- /dev/null
+++ b/arch/powerpc/include/asm/nmi.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_NMI_H
+#define _ASM_NMI_H
+
+#endif /* _ASM_NMI_H */
diff --git a/arch/powerpc/include/asm/nvram.h b/arch/powerpc/include/asm/nvram.h
index b0fe0fe4e626..09a518bb7c03 100644
--- a/arch/powerpc/include/asm/nvram.h
+++ b/arch/powerpc/include/asm/nvram.h
@@ -9,12 +9,43 @@
 #ifndef _ASM_POWERPC_NVRAM_H
 #define _ASM_POWERPC_NVRAM_H
 
-
+#include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/list.h>
 #include <uapi/asm/nvram.h>
 
+/*
+ * Set oops header version to distinguish between old and new format header.
+ * lnx,oops-log partition max size is 4000, header version > 4000 will
+ * help in identifying new header.
+ */
+#define OOPS_HDR_VERSION 5000
+
+struct err_log_info {
+	__be32 error_type;
+	__be32 seq_num;
+};
+
+struct nvram_os_partition {
+	const char *name;
+	int req_size;	/* desired size, in bytes */
+	int min_size;	/* minimum acceptable size (0 means req_size) */
+	long size;	/* size of data portion (excluding err_log_info) */
+	long index;	/* offset of data portion of partition */
+	bool os_partition; /* partition initialized by OS, not FW */
+};
+
+struct oops_log_info {
+	__be16 version;
+	__be16 report_length;
+	__be64 timestamp;
+} __attribute__((packed));
+
+extern struct nvram_os_partition oops_log_partition;
+
 #ifdef CONFIG_PPC_PSERIES
+extern struct nvram_os_partition rtas_log_partition;
+
 extern int nvram_write_error_log(char * buff, int length,
 					 unsigned int err_type, unsigned int err_seq);
 extern int nvram_read_error_log(char * buff, int length,
@@ -50,6 +81,23 @@ extern void	pmac_xpram_write(int xpaddr, u8 data);
 /* Synchronize NVRAM */
 extern void	nvram_sync(void);
 
+/* Initialize NVRAM OS partition */
+extern int __init nvram_init_os_partition(struct nvram_os_partition *part);
+
+/* Initialize NVRAM oops partition */
+extern void __init nvram_init_oops_partition(int rtas_partition_exists);
+
+/* Read a NVRAM partition */
+extern int nvram_read_partition(struct nvram_os_partition *part, char *buff,
+				int length, unsigned int *err_type,
+				unsigned int *error_log_cnt);
+
+/* Write to NVRAM OS partition */
+extern int nvram_write_os_partition(struct nvram_os_partition *part,
+				    char *buff, int length,
+				    unsigned int err_type,
+				    unsigned int error_log_cnt);
+
 /* Determine NVRAM size */
 extern ssize_t nvram_get_size(void);
 
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
new file mode 100644
index 000000000000..0321a909e663
--- /dev/null
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -0,0 +1,735 @@
+/*
+ * OPAL API definitions.
+ *
+ * Copyright 2011-2015 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __OPAL_API_H
+#define __OPAL_API_H
+
+/****** OPAL APIs ******/
+
+/* Return codes */
+#define OPAL_SUCCESS		0
+#define OPAL_PARAMETER		-1
+#define OPAL_BUSY		-2
+#define OPAL_PARTIAL		-3
+#define OPAL_CONSTRAINED	-4
+#define OPAL_CLOSED		-5
+#define OPAL_HARDWARE		-6
+#define OPAL_UNSUPPORTED	-7
+#define OPAL_PERMISSION		-8
+#define OPAL_NO_MEM		-9
+#define OPAL_RESOURCE		-10
+#define OPAL_INTERNAL_ERROR	-11
+#define OPAL_BUSY_EVENT		-12
+#define OPAL_HARDWARE_FROZEN	-13
+#define OPAL_WRONG_STATE	-14
+#define OPAL_ASYNC_COMPLETION	-15
+#define OPAL_EMPTY		-16
+#define OPAL_I2C_TIMEOUT	-17
+#define OPAL_I2C_INVALID_CMD	-18
+#define OPAL_I2C_LBUS_PARITY	-19
+#define OPAL_I2C_BKEND_OVERRUN	-20
+#define OPAL_I2C_BKEND_ACCESS	-21
+#define OPAL_I2C_ARBT_LOST	-22
+#define OPAL_I2C_NACK_RCVD	-23
+#define OPAL_I2C_STOP_ERR	-24
+
+/* API Tokens (in r0) */
+#define OPAL_INVALID_CALL		       -1
+#define OPAL_TEST				0
+#define OPAL_CONSOLE_WRITE			1
+#define OPAL_CONSOLE_READ			2
+#define OPAL_RTC_READ				3
+#define OPAL_RTC_WRITE				4
+#define OPAL_CEC_POWER_DOWN			5
+#define OPAL_CEC_REBOOT				6
+#define OPAL_READ_NVRAM				7
+#define OPAL_WRITE_NVRAM			8
+#define OPAL_HANDLE_INTERRUPT			9
+#define OPAL_POLL_EVENTS			10
+#define OPAL_PCI_SET_HUB_TCE_MEMORY		11
+#define OPAL_PCI_SET_PHB_TCE_MEMORY		12
+#define OPAL_PCI_CONFIG_READ_BYTE		13
+#define OPAL_PCI_CONFIG_READ_HALF_WORD  	14
+#define OPAL_PCI_CONFIG_READ_WORD		15
+#define OPAL_PCI_CONFIG_WRITE_BYTE		16
+#define OPAL_PCI_CONFIG_WRITE_HALF_WORD		17
+#define OPAL_PCI_CONFIG_WRITE_WORD		18
+#define OPAL_SET_XIVE				19
+#define OPAL_GET_XIVE				20
+#define OPAL_GET_COMPLETION_TOKEN_STATUS	21 /* obsolete */
+#define OPAL_REGISTER_OPAL_EXCEPTION_HANDLER	22
+#define OPAL_PCI_EEH_FREEZE_STATUS		23
+#define OPAL_PCI_SHPC				24
+#define OPAL_CONSOLE_WRITE_BUFFER_SPACE		25
+#define OPAL_PCI_EEH_FREEZE_CLEAR		26
+#define OPAL_PCI_PHB_MMIO_ENABLE		27
+#define OPAL_PCI_SET_PHB_MEM_WINDOW		28
+#define OPAL_PCI_MAP_PE_MMIO_WINDOW		29
+#define OPAL_PCI_SET_PHB_TABLE_MEMORY		30
+#define OPAL_PCI_SET_PE				31
+#define OPAL_PCI_SET_PELTV			32
+#define OPAL_PCI_SET_MVE			33
+#define OPAL_PCI_SET_MVE_ENABLE			34
+#define OPAL_PCI_GET_XIVE_REISSUE		35
+#define OPAL_PCI_SET_XIVE_REISSUE		36
+#define OPAL_PCI_SET_XIVE_PE			37
+#define OPAL_GET_XIVE_SOURCE			38
+#define OPAL_GET_MSI_32				39
+#define OPAL_GET_MSI_64				40
+#define OPAL_START_CPU				41
+#define OPAL_QUERY_CPU_STATUS			42
+#define OPAL_WRITE_OPPANEL			43 /* unimplemented */
+#define OPAL_PCI_MAP_PE_DMA_WINDOW		44
+#define OPAL_PCI_MAP_PE_DMA_WINDOW_REAL		45
+#define OPAL_PCI_RESET				49
+#define OPAL_PCI_GET_HUB_DIAG_DATA		50
+#define OPAL_PCI_GET_PHB_DIAG_DATA		51
+#define OPAL_PCI_FENCE_PHB			52
+#define OPAL_PCI_REINIT				53
+#define OPAL_PCI_MASK_PE_ERROR			54
+#define OPAL_SET_SLOT_LED_STATUS		55
+#define OPAL_GET_EPOW_STATUS			56
+#define OPAL_SET_SYSTEM_ATTENTION_LED		57
+#define OPAL_RESERVED1				58
+#define OPAL_RESERVED2				59
+#define OPAL_PCI_NEXT_ERROR			60
+#define OPAL_PCI_EEH_FREEZE_STATUS2		61
+#define OPAL_PCI_POLL				62
+#define OPAL_PCI_MSI_EOI			63
+#define OPAL_PCI_GET_PHB_DIAG_DATA2		64
+#define OPAL_XSCOM_READ				65
+#define OPAL_XSCOM_WRITE			66
+#define OPAL_LPC_READ				67
+#define OPAL_LPC_WRITE				68
+#define OPAL_RETURN_CPU				69
+#define OPAL_REINIT_CPUS			70
+#define OPAL_ELOG_READ				71
+#define OPAL_ELOG_WRITE				72
+#define OPAL_ELOG_ACK				73
+#define OPAL_ELOG_RESEND			74
+#define OPAL_ELOG_SIZE				75
+#define OPAL_FLASH_VALIDATE			76
+#define OPAL_FLASH_MANAGE			77
+#define OPAL_FLASH_UPDATE			78
+#define OPAL_RESYNC_TIMEBASE			79
+#define OPAL_CHECK_TOKEN			80
+#define OPAL_DUMP_INIT				81
+#define OPAL_DUMP_INFO				82
+#define OPAL_DUMP_READ				83
+#define OPAL_DUMP_ACK				84
+#define OPAL_GET_MSG				85
+#define OPAL_CHECK_ASYNC_COMPLETION		86
+#define OPAL_SYNC_HOST_REBOOT			87
+#define OPAL_SENSOR_READ			88
+#define OPAL_GET_PARAM				89
+#define OPAL_SET_PARAM				90
+#define OPAL_DUMP_RESEND			91
+#define OPAL_ELOG_SEND				92	/* Deprecated */
+#define OPAL_PCI_SET_PHB_CAPI_MODE		93
+#define OPAL_DUMP_INFO2				94
+#define OPAL_WRITE_OPPANEL_ASYNC		95
+#define OPAL_PCI_ERR_INJECT			96
+#define OPAL_PCI_EEH_FREEZE_SET			97
+#define OPAL_HANDLE_HMI				98
+#define OPAL_CONFIG_CPU_IDLE_STATE		99
+#define OPAL_SLW_SET_REG			100
+#define OPAL_REGISTER_DUMP_REGION		101
+#define OPAL_UNREGISTER_DUMP_REGION		102
+#define OPAL_WRITE_TPO				103
+#define OPAL_READ_TPO				104
+#define OPAL_GET_DPO_STATUS			105
+#define OPAL_OLD_I2C_REQUEST			106	/* Deprecated */
+#define OPAL_IPMI_SEND				107
+#define OPAL_IPMI_RECV				108
+#define OPAL_I2C_REQUEST			109
+#define OPAL_FLASH_READ				110
+#define OPAL_FLASH_WRITE			111
+#define OPAL_FLASH_ERASE			112
+#define OPAL_LAST				112
+
+/* Device tree flags */
+
+/* Flags set in power-mgmt nodes in device tree if
+ * respective idle states are supported in the platform.
+ */
+#define OPAL_PM_NAP_ENABLED		0x00010000
+#define OPAL_PM_SLEEP_ENABLED		0x00020000
+#define OPAL_PM_WINKLE_ENABLED		0x00040000
+#define OPAL_PM_SLEEP_ENABLED_ER1	0x00080000 /* with workaround */
+
+#ifndef __ASSEMBLY__
+
+/* Other enums */
+enum OpalFreezeState {
+	OPAL_EEH_STOPPED_NOT_FROZEN = 0,
+	OPAL_EEH_STOPPED_MMIO_FREEZE = 1,
+	OPAL_EEH_STOPPED_DMA_FREEZE = 2,
+	OPAL_EEH_STOPPED_MMIO_DMA_FREEZE = 3,
+	OPAL_EEH_STOPPED_RESET = 4,
+	OPAL_EEH_STOPPED_TEMP_UNAVAIL = 5,
+	OPAL_EEH_STOPPED_PERM_UNAVAIL = 6
+};
+
+enum OpalEehFreezeActionToken {
+	OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO = 1,
+	OPAL_EEH_ACTION_CLEAR_FREEZE_DMA = 2,
+	OPAL_EEH_ACTION_CLEAR_FREEZE_ALL = 3,
+
+	OPAL_EEH_ACTION_SET_FREEZE_MMIO = 1,
+	OPAL_EEH_ACTION_SET_FREEZE_DMA  = 2,
+	OPAL_EEH_ACTION_SET_FREEZE_ALL  = 3
+};
+
+enum OpalPciStatusToken {
+	OPAL_EEH_NO_ERROR	= 0,
+	OPAL_EEH_IOC_ERROR	= 1,
+	OPAL_EEH_PHB_ERROR	= 2,
+	OPAL_EEH_PE_ERROR	= 3,
+	OPAL_EEH_PE_MMIO_ERROR	= 4,
+	OPAL_EEH_PE_DMA_ERROR	= 5
+};
+
+enum OpalPciErrorSeverity {
+	OPAL_EEH_SEV_NO_ERROR	= 0,
+	OPAL_EEH_SEV_IOC_DEAD	= 1,
+	OPAL_EEH_SEV_PHB_DEAD	= 2,
+	OPAL_EEH_SEV_PHB_FENCED	= 3,
+	OPAL_EEH_SEV_PE_ER	= 4,
+	OPAL_EEH_SEV_INF	= 5
+};
+
+enum OpalErrinjectType {
+	OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR	= 0,
+	OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64	= 1,
+};
+
+enum OpalErrinjectFunc {
+	/* IOA bus specific errors */
+	OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR	= 0,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_DATA	= 1,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_IO_ADDR	= 2,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_IO_DATA	= 3,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_ADDR	= 4,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_DATA	= 5,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_ADDR	= 6,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_DATA	= 7,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_IO_ADDR	= 8,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_IO_DATA	= 9,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_ADDR	= 10,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_DATA	= 11,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_ADDR	= 12,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_DATA	= 13,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_MASTER	= 14,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_TARGET	= 15,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_ADDR	= 16,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_DATA	= 17,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_MASTER	= 18,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET	= 19,
+};
+
+enum OpalMmioWindowType {
+	OPAL_M32_WINDOW_TYPE = 1,
+	OPAL_M64_WINDOW_TYPE = 2,
+	OPAL_IO_WINDOW_TYPE  = 3
+};
+
+enum OpalExceptionHandler {
+	OPAL_MACHINE_CHECK_HANDLER	    = 1,
+	OPAL_HYPERVISOR_MAINTENANCE_HANDLER = 2,
+	OPAL_SOFTPATCH_HANDLER		    = 3
+};
+
+enum OpalPendingState {
+	OPAL_EVENT_OPAL_INTERNAL   = 0x1,
+	OPAL_EVENT_NVRAM	   = 0x2,
+	OPAL_EVENT_RTC		   = 0x4,
+	OPAL_EVENT_CONSOLE_OUTPUT  = 0x8,
+	OPAL_EVENT_CONSOLE_INPUT   = 0x10,
+	OPAL_EVENT_ERROR_LOG_AVAIL = 0x20,
+	OPAL_EVENT_ERROR_LOG	   = 0x40,
+	OPAL_EVENT_EPOW		   = 0x80,
+	OPAL_EVENT_LED_STATUS	   = 0x100,
+	OPAL_EVENT_PCI_ERROR	   = 0x200,
+	OPAL_EVENT_DUMP_AVAIL	   = 0x400,
+	OPAL_EVENT_MSG_PENDING	   = 0x800,
+};
+
+enum OpalThreadStatus {
+	OPAL_THREAD_INACTIVE = 0x0,
+	OPAL_THREAD_STARTED = 0x1,
+	OPAL_THREAD_UNAVAILABLE = 0x2 /* opal-v3 */
+};
+
+enum OpalPciBusCompare {
+	OpalPciBusAny	= 0,	/* Any bus number match */
+	OpalPciBus3Bits	= 2,	/* Match top 3 bits of bus number */
+	OpalPciBus4Bits	= 3,	/* Match top 4 bits of bus number */
+	OpalPciBus5Bits	= 4,	/* Match top 5 bits of bus number */
+	OpalPciBus6Bits	= 5,	/* Match top 6 bits of bus number */
+	OpalPciBus7Bits	= 6,	/* Match top 7 bits of bus number */
+	OpalPciBusAll	= 7,	/* Match bus number exactly */
+};
+
+enum OpalDeviceCompare {
+	OPAL_IGNORE_RID_DEVICE_NUMBER = 0,
+	OPAL_COMPARE_RID_DEVICE_NUMBER = 1
+};
+
+enum OpalFuncCompare {
+	OPAL_IGNORE_RID_FUNCTION_NUMBER = 0,
+	OPAL_COMPARE_RID_FUNCTION_NUMBER = 1
+};
+
+enum OpalPeAction {
+	OPAL_UNMAP_PE = 0,
+	OPAL_MAP_PE = 1
+};
+
+enum OpalPeltvAction {
+	OPAL_REMOVE_PE_FROM_DOMAIN = 0,
+	OPAL_ADD_PE_TO_DOMAIN = 1
+};
+
+enum OpalMveEnableAction {
+	OPAL_DISABLE_MVE = 0,
+	OPAL_ENABLE_MVE = 1
+};
+
+enum OpalM64Action {
+	OPAL_DISABLE_M64 = 0,
+	OPAL_ENABLE_M64_SPLIT = 1,
+	OPAL_ENABLE_M64_NON_SPLIT = 2
+};
+
+enum OpalPciResetScope {
+	OPAL_RESET_PHB_COMPLETE		= 1,
+	OPAL_RESET_PCI_LINK		= 2,
+	OPAL_RESET_PHB_ERROR		= 3,
+	OPAL_RESET_PCI_HOT		= 4,
+	OPAL_RESET_PCI_FUNDAMENTAL	= 5,
+	OPAL_RESET_PCI_IODA_TABLE	= 6
+};
+
+enum OpalPciReinitScope {
+	/*
+	 * Note: we chose values that do not overlap
+	 * OpalPciResetScope as OPAL v2 used the same
+	 * enum for both
+	 */
+	OPAL_REINIT_PCI_DEV = 1000
+};
+
+enum OpalPciResetState {
+	OPAL_DEASSERT_RESET = 0,
+	OPAL_ASSERT_RESET   = 1
+};
+
+/*
+ * Address cycle types for LPC accesses. These also correspond
+ * to the content of the first cell of the "reg" property for
+ * device nodes on the LPC bus
+ */
+enum OpalLPCAddressType {
+	OPAL_LPC_MEM	= 0,
+	OPAL_LPC_IO	= 1,
+	OPAL_LPC_FW	= 2,
+};
+
+enum opal_msg_type {
+	OPAL_MSG_ASYNC_COMP = 0,	/* params[0] = token, params[1] = rc,
+					 * additional params function-specific
+					 */
+	OPAL_MSG_MEM_ERR,
+	OPAL_MSG_EPOW,
+	OPAL_MSG_SHUTDOWN,		/* params[0] = 1 reboot, 0 shutdown */
+	OPAL_MSG_HMI_EVT,
+	OPAL_MSG_DPO,
+	OPAL_MSG_TYPE_MAX,
+};
+
+struct opal_msg {
+	__be32 msg_type;
+	__be32 reserved;
+	__be64 params[8];
+};
+
+/* System parameter permission */
+enum OpalSysparamPerm {
+	OPAL_SYSPARAM_READ  = 0x1,
+	OPAL_SYSPARAM_WRITE = 0x2,
+	OPAL_SYSPARAM_RW    = (OPAL_SYSPARAM_READ | OPAL_SYSPARAM_WRITE),
+};
+
+enum {
+	OPAL_IPMI_MSG_FORMAT_VERSION_1 = 1,
+};
+
+struct opal_ipmi_msg {
+	uint8_t version;
+	uint8_t netfn;
+	uint8_t cmd;
+	uint8_t data[];
+};
+
+/* FSP memory errors handling */
+enum OpalMemErr_Version {
+	OpalMemErr_V1 = 1,
+};
+
+enum OpalMemErrType {
+	OPAL_MEM_ERR_TYPE_RESILIENCE	= 0,
+	OPAL_MEM_ERR_TYPE_DYN_DALLOC,
+};
+
+/* Memory Reilience error type */
+enum OpalMemErr_ResilErrType {
+	OPAL_MEM_RESILIENCE_CE		= 0,
+	OPAL_MEM_RESILIENCE_UE,
+	OPAL_MEM_RESILIENCE_UE_SCRUB,
+};
+
+/* Dynamic Memory Deallocation type */
+enum OpalMemErr_DynErrType {
+	OPAL_MEM_DYNAMIC_DEALLOC	= 0,
+};
+
+struct OpalMemoryErrorData {
+	enum OpalMemErr_Version	version:8;	/* 0x00 */
+	enum OpalMemErrType	type:8;		/* 0x01 */
+	__be16			flags;		/* 0x02 */
+	uint8_t			reserved_1[4];	/* 0x04 */
+
+	union {
+		/* Memory Resilience corrected/uncorrected error info */
+		struct {
+			enum OpalMemErr_ResilErrType	resil_err_type:8;
+			uint8_t				reserved_1[7];
+			__be64				physical_address_start;
+			__be64				physical_address_end;
+		} resilience;
+		/* Dynamic memory deallocation error info */
+		struct {
+			enum OpalMemErr_DynErrType	dyn_err_type:8;
+			uint8_t				reserved_1[7];
+			__be64				physical_address_start;
+			__be64				physical_address_end;
+		} dyn_dealloc;
+	} u;
+};
+
+/* HMI interrupt event */
+enum OpalHMI_Version {
+	OpalHMIEvt_V1 = 1,
+};
+
+enum OpalHMI_Severity {
+	OpalHMI_SEV_NO_ERROR = 0,
+	OpalHMI_SEV_WARNING = 1,
+	OpalHMI_SEV_ERROR_SYNC = 2,
+	OpalHMI_SEV_FATAL = 3,
+};
+
+enum OpalHMI_Disposition {
+	OpalHMI_DISPOSITION_RECOVERED = 0,
+	OpalHMI_DISPOSITION_NOT_RECOVERED = 1,
+};
+
+enum OpalHMI_ErrType {
+	OpalHMI_ERROR_MALFUNC_ALERT	= 0,
+	OpalHMI_ERROR_PROC_RECOV_DONE,
+	OpalHMI_ERROR_PROC_RECOV_DONE_AGAIN,
+	OpalHMI_ERROR_PROC_RECOV_MASKED,
+	OpalHMI_ERROR_TFAC,
+	OpalHMI_ERROR_TFMR_PARITY,
+	OpalHMI_ERROR_HA_OVERFLOW_WARN,
+	OpalHMI_ERROR_XSCOM_FAIL,
+	OpalHMI_ERROR_XSCOM_DONE,
+	OpalHMI_ERROR_SCOM_FIR,
+	OpalHMI_ERROR_DEBUG_TRIG_FIR,
+	OpalHMI_ERROR_HYP_RESOURCE,
+	OpalHMI_ERROR_CAPP_RECOVERY,
+};
+
+struct OpalHMIEvent {
+	uint8_t		version;	/* 0x00 */
+	uint8_t		severity;	/* 0x01 */
+	uint8_t		type;		/* 0x02 */
+	uint8_t		disposition;	/* 0x03 */
+	uint8_t		reserved_1[4];	/* 0x04 */
+
+	__be64		hmer;
+	/* TFMR register. Valid only for TFAC and TFMR_PARITY error type. */
+	__be64		tfmr;
+};
+
+enum {
+	OPAL_P7IOC_DIAG_TYPE_NONE	= 0,
+	OPAL_P7IOC_DIAG_TYPE_RGC	= 1,
+	OPAL_P7IOC_DIAG_TYPE_BI		= 2,
+	OPAL_P7IOC_DIAG_TYPE_CI		= 3,
+	OPAL_P7IOC_DIAG_TYPE_MISC	= 4,
+	OPAL_P7IOC_DIAG_TYPE_I2C	= 5,
+	OPAL_P7IOC_DIAG_TYPE_LAST	= 6
+};
+
+struct OpalIoP7IOCErrorData {
+	__be16 type;
+
+	/* GEM */
+	__be64 gemXfir;
+	__be64 gemRfir;
+	__be64 gemRirqfir;
+	__be64 gemMask;
+	__be64 gemRwof;
+
+	/* LEM */
+	__be64 lemFir;
+	__be64 lemErrMask;
+	__be64 lemAction0;
+	__be64 lemAction1;
+	__be64 lemWof;
+
+	union {
+		struct OpalIoP7IOCRgcErrorData {
+			__be64 rgcStatus;	/* 3E1C10 */
+			__be64 rgcLdcp;		/* 3E1C18 */
+		}rgc;
+		struct OpalIoP7IOCBiErrorData {
+			__be64 biLdcp0;		/* 3C0100, 3C0118 */
+			__be64 biLdcp1;		/* 3C0108, 3C0120 */
+			__be64 biLdcp2;		/* 3C0110, 3C0128 */
+			__be64 biFenceStatus;	/* 3C0130, 3C0130 */
+
+			uint8_t biDownbound;	/* BI Downbound or Upbound */
+		}bi;
+		struct OpalIoP7IOCCiErrorData {
+			__be64 ciPortStatus;	/* 3Dn008 */
+			__be64 ciPortLdcp;	/* 3Dn010 */
+
+			uint8_t ciPort;		/* Index of CI port: 0/1 */
+		}ci;
+	};
+};
+
+/**
+ * This structure defines the overlay which will be used to store PHB error
+ * data upon request.
+ */
+enum {
+	OPAL_PHB_ERROR_DATA_VERSION_1 = 1,
+};
+
+enum {
+	OPAL_PHB_ERROR_DATA_TYPE_P7IOC = 1,
+	OPAL_PHB_ERROR_DATA_TYPE_PHB3 = 2
+};
+
+enum {
+	OPAL_P7IOC_NUM_PEST_REGS = 128,
+	OPAL_PHB3_NUM_PEST_REGS = 256
+};
+
+struct OpalIoPhbErrorCommon {
+	__be32 version;
+	__be32 ioType;
+	__be32 len;
+};
+
+struct OpalIoP7IOCPhbErrorData {
+	struct OpalIoPhbErrorCommon common;
+
+	__be32 brdgCtl;
+
+	// P7IOC utl regs
+	__be32 portStatusReg;
+	__be32 rootCmplxStatus;
+	__be32 busAgentStatus;
+
+	// P7IOC cfg regs
+	__be32 deviceStatus;
+	__be32 slotStatus;
+	__be32 linkStatus;
+	__be32 devCmdStatus;
+	__be32 devSecStatus;
+
+	// cfg AER regs
+	__be32 rootErrorStatus;
+	__be32 uncorrErrorStatus;
+	__be32 corrErrorStatus;
+	__be32 tlpHdr1;
+	__be32 tlpHdr2;
+	__be32 tlpHdr3;
+	__be32 tlpHdr4;
+	__be32 sourceId;
+
+	__be32 rsv3;
+
+	// Record data about the call to allocate a buffer.
+	__be64 errorClass;
+	__be64 correlator;
+
+	//P7IOC MMIO Error Regs
+	__be64 p7iocPlssr;                // n120
+	__be64 p7iocCsr;                  // n110
+	__be64 lemFir;                    // nC00
+	__be64 lemErrorMask;              // nC18
+	__be64 lemWOF;                    // nC40
+	__be64 phbErrorStatus;            // nC80
+	__be64 phbFirstErrorStatus;       // nC88
+	__be64 phbErrorLog0;              // nCC0
+	__be64 phbErrorLog1;              // nCC8
+	__be64 mmioErrorStatus;           // nD00
+	__be64 mmioFirstErrorStatus;      // nD08
+	__be64 mmioErrorLog0;             // nD40
+	__be64 mmioErrorLog1;             // nD48
+	__be64 dma0ErrorStatus;           // nD80
+	__be64 dma0FirstErrorStatus;      // nD88
+	__be64 dma0ErrorLog0;             // nDC0
+	__be64 dma0ErrorLog1;             // nDC8
+	__be64 dma1ErrorStatus;           // nE00
+	__be64 dma1FirstErrorStatus;      // nE08
+	__be64 dma1ErrorLog0;             // nE40
+	__be64 dma1ErrorLog1;             // nE48
+	__be64 pestA[OPAL_P7IOC_NUM_PEST_REGS];
+	__be64 pestB[OPAL_P7IOC_NUM_PEST_REGS];
+};
+
+struct OpalIoPhb3ErrorData {
+	struct OpalIoPhbErrorCommon common;
+
+	__be32 brdgCtl;
+
+	/* PHB3 UTL regs */
+	__be32 portStatusReg;
+	__be32 rootCmplxStatus;
+	__be32 busAgentStatus;
+
+	/* PHB3 cfg regs */
+	__be32 deviceStatus;
+	__be32 slotStatus;
+	__be32 linkStatus;
+	__be32 devCmdStatus;
+	__be32 devSecStatus;
+
+	/* cfg AER regs */
+	__be32 rootErrorStatus;
+	__be32 uncorrErrorStatus;
+	__be32 corrErrorStatus;
+	__be32 tlpHdr1;
+	__be32 tlpHdr2;
+	__be32 tlpHdr3;
+	__be32 tlpHdr4;
+	__be32 sourceId;
+
+	__be32 rsv3;
+
+	/* Record data about the call to allocate a buffer */
+	__be64 errorClass;
+	__be64 correlator;
+
+	/* PHB3 MMIO Error Regs */
+	__be64 nFir;			/* 000 */
+	__be64 nFirMask;		/* 003 */
+	__be64 nFirWOF;		/* 008 */
+	__be64 phbPlssr;		/* 120 */
+	__be64 phbCsr;		/* 110 */
+	__be64 lemFir;		/* C00 */
+	__be64 lemErrorMask;		/* C18 */
+	__be64 lemWOF;		/* C40 */
+	__be64 phbErrorStatus;	/* C80 */
+	__be64 phbFirstErrorStatus;	/* C88 */
+	__be64 phbErrorLog0;		/* CC0 */
+	__be64 phbErrorLog1;		/* CC8 */
+	__be64 mmioErrorStatus;	/* D00 */
+	__be64 mmioFirstErrorStatus;	/* D08 */
+	__be64 mmioErrorLog0;		/* D40 */
+	__be64 mmioErrorLog1;		/* D48 */
+	__be64 dma0ErrorStatus;	/* D80 */
+	__be64 dma0FirstErrorStatus;	/* D88 */
+	__be64 dma0ErrorLog0;		/* DC0 */
+	__be64 dma0ErrorLog1;		/* DC8 */
+	__be64 dma1ErrorStatus;	/* E00 */
+	__be64 dma1FirstErrorStatus;	/* E08 */
+	__be64 dma1ErrorLog0;		/* E40 */
+	__be64 dma1ErrorLog1;		/* E48 */
+	__be64 pestA[OPAL_PHB3_NUM_PEST_REGS];
+	__be64 pestB[OPAL_PHB3_NUM_PEST_REGS];
+};
+
+enum {
+	OPAL_REINIT_CPUS_HILE_BE	= (1 << 0),
+	OPAL_REINIT_CPUS_HILE_LE	= (1 << 1),
+};
+
+typedef struct oppanel_line {
+	__be64 line;
+	__be64 line_len;
+} oppanel_line_t;
+
+/*
+ * SG entries
+ *
+ * WARNING: The current implementation requires each entry
+ * to represent a block that is 4k aligned *and* each block
+ * size except the last one in the list to be as well.
+ */
+struct opal_sg_entry {
+	__be64 data;
+	__be64 length;
+};
+
+/*
+ * Candiate image SG list.
+ *
+ * length = VER | length
+ */
+struct opal_sg_list {
+	__be64 length;
+	__be64 next;
+	struct opal_sg_entry entry[];
+};
+
+/*
+ * Dump region ID range usable by the OS
+ */
+#define OPAL_DUMP_REGION_HOST_START		0x80
+#define OPAL_DUMP_REGION_LOG_BUF		0x80
+#define OPAL_DUMP_REGION_HOST_END		0xFF
+
+/* CAPI modes for PHB */
+enum {
+	OPAL_PHB_CAPI_MODE_PCIE		= 0,
+	OPAL_PHB_CAPI_MODE_CAPI		= 1,
+	OPAL_PHB_CAPI_MODE_SNOOP_OFF    = 2,
+	OPAL_PHB_CAPI_MODE_SNOOP_ON	= 3,
+};
+
+/* OPAL I2C request */
+struct opal_i2c_request {
+	uint8_t	type;
+#define OPAL_I2C_RAW_READ	0
+#define OPAL_I2C_RAW_WRITE	1
+#define OPAL_I2C_SM_READ	2
+#define OPAL_I2C_SM_WRITE	3
+	uint8_t flags;
+#define OPAL_I2C_ADDR_10	0x01	/* Not supported yet */
+	uint8_t	subaddr_sz;		/* Max 4 */
+	uint8_t reserved;
+	__be16 addr;			/* 7 or 10 bit address */
+	__be16 reserved2;
+	__be32 subaddr;		/* Sub-address if any */
+	__be32 size;			/* Data size */
+	__be64 buffer_ra;		/* Buffer real address */
+};
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __OPAL_API_H */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 9ee0a30a02ce..042af1abfc4d 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -9,755 +9,17 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#ifndef __OPAL_H
-#define __OPAL_H
+#ifndef _ASM_POWERPC_OPAL_H
+#define _ASM_POWERPC_OPAL_H
 
-#ifndef __ASSEMBLY__
-/*
- * SG entry
- *
- * WARNING: The current implementation requires each entry
- * to represent a block that is 4k aligned *and* each block
- * size except the last one in the list to be as well.
- */
-struct opal_sg_entry {
-	__be64 data;
-	__be64 length;
-};
-
-/* SG list */
-struct opal_sg_list {
-	__be64 length;
-	__be64 next;
-	struct opal_sg_entry entry[];
-};
-
-/* We calculate number of sg entries based on PAGE_SIZE */
-#define SG_ENTRIES_PER_NODE ((PAGE_SIZE - 16) / sizeof(struct opal_sg_entry))
-
-#endif /* __ASSEMBLY__ */
-
-/****** OPAL APIs ******/
-
-/* Return codes */
-#define OPAL_SUCCESS 		0
-#define OPAL_PARAMETER		-1
-#define OPAL_BUSY		-2
-#define OPAL_PARTIAL		-3
-#define OPAL_CONSTRAINED	-4
-#define OPAL_CLOSED		-5
-#define OPAL_HARDWARE		-6
-#define OPAL_UNSUPPORTED	-7
-#define OPAL_PERMISSION		-8
-#define OPAL_NO_MEM		-9
-#define OPAL_RESOURCE		-10
-#define OPAL_INTERNAL_ERROR	-11
-#define OPAL_BUSY_EVENT		-12
-#define OPAL_HARDWARE_FROZEN	-13
-#define OPAL_WRONG_STATE	-14
-#define OPAL_ASYNC_COMPLETION	-15
-#define OPAL_I2C_TIMEOUT	-17
-#define OPAL_I2C_INVALID_CMD	-18
-#define OPAL_I2C_LBUS_PARITY	-19
-#define OPAL_I2C_BKEND_OVERRUN	-20
-#define OPAL_I2C_BKEND_ACCESS	-21
-#define OPAL_I2C_ARBT_LOST	-22
-#define OPAL_I2C_NACK_RCVD	-23
-#define OPAL_I2C_STOP_ERR	-24
-
-/* API Tokens (in r0) */
-#define OPAL_INVALID_CALL			-1
-#define OPAL_CONSOLE_WRITE			1
-#define OPAL_CONSOLE_READ			2
-#define OPAL_RTC_READ				3
-#define OPAL_RTC_WRITE				4
-#define OPAL_CEC_POWER_DOWN			5
-#define OPAL_CEC_REBOOT				6
-#define OPAL_READ_NVRAM				7
-#define OPAL_WRITE_NVRAM			8
-#define OPAL_HANDLE_INTERRUPT			9
-#define OPAL_POLL_EVENTS			10
-#define OPAL_PCI_SET_HUB_TCE_MEMORY		11
-#define OPAL_PCI_SET_PHB_TCE_MEMORY		12
-#define OPAL_PCI_CONFIG_READ_BYTE		13
-#define OPAL_PCI_CONFIG_READ_HALF_WORD  	14
-#define OPAL_PCI_CONFIG_READ_WORD		15
-#define OPAL_PCI_CONFIG_WRITE_BYTE		16
-#define OPAL_PCI_CONFIG_WRITE_HALF_WORD		17
-#define OPAL_PCI_CONFIG_WRITE_WORD		18
-#define OPAL_SET_XIVE				19
-#define OPAL_GET_XIVE				20
-#define OPAL_GET_COMPLETION_TOKEN_STATUS	21 /* obsolete */
-#define OPAL_REGISTER_OPAL_EXCEPTION_HANDLER	22
-#define OPAL_PCI_EEH_FREEZE_STATUS		23
-#define OPAL_PCI_SHPC				24
-#define OPAL_CONSOLE_WRITE_BUFFER_SPACE		25
-#define OPAL_PCI_EEH_FREEZE_CLEAR		26
-#define OPAL_PCI_PHB_MMIO_ENABLE		27
-#define OPAL_PCI_SET_PHB_MEM_WINDOW		28
-#define OPAL_PCI_MAP_PE_MMIO_WINDOW		29
-#define OPAL_PCI_SET_PHB_TABLE_MEMORY		30
-#define OPAL_PCI_SET_PE				31
-#define OPAL_PCI_SET_PELTV			32
-#define OPAL_PCI_SET_MVE			33
-#define OPAL_PCI_SET_MVE_ENABLE			34
-#define OPAL_PCI_GET_XIVE_REISSUE		35
-#define OPAL_PCI_SET_XIVE_REISSUE		36
-#define OPAL_PCI_SET_XIVE_PE			37
-#define OPAL_GET_XIVE_SOURCE			38
-#define OPAL_GET_MSI_32				39
-#define OPAL_GET_MSI_64				40
-#define OPAL_START_CPU				41
-#define OPAL_QUERY_CPU_STATUS			42
-#define OPAL_WRITE_OPPANEL			43
-#define OPAL_PCI_MAP_PE_DMA_WINDOW		44
-#define OPAL_PCI_MAP_PE_DMA_WINDOW_REAL		45
-#define OPAL_PCI_RESET				49
-#define OPAL_PCI_GET_HUB_DIAG_DATA		50
-#define OPAL_PCI_GET_PHB_DIAG_DATA		51
-#define OPAL_PCI_FENCE_PHB			52
-#define OPAL_PCI_REINIT				53
-#define OPAL_PCI_MASK_PE_ERROR			54
-#define OPAL_SET_SLOT_LED_STATUS		55
-#define OPAL_GET_EPOW_STATUS			56
-#define OPAL_SET_SYSTEM_ATTENTION_LED		57
-#define OPAL_RESERVED1				58
-#define OPAL_RESERVED2				59
-#define OPAL_PCI_NEXT_ERROR			60
-#define OPAL_PCI_EEH_FREEZE_STATUS2		61
-#define OPAL_PCI_POLL				62
-#define OPAL_PCI_MSI_EOI			63
-#define OPAL_PCI_GET_PHB_DIAG_DATA2		64
-#define OPAL_XSCOM_READ				65
-#define OPAL_XSCOM_WRITE			66
-#define OPAL_LPC_READ				67
-#define OPAL_LPC_WRITE				68
-#define OPAL_RETURN_CPU				69
-#define OPAL_REINIT_CPUS			70
-#define OPAL_ELOG_READ				71
-#define OPAL_ELOG_WRITE				72
-#define OPAL_ELOG_ACK				73
-#define OPAL_ELOG_RESEND			74
-#define OPAL_ELOG_SIZE				75
-#define OPAL_FLASH_VALIDATE			76
-#define OPAL_FLASH_MANAGE			77
-#define OPAL_FLASH_UPDATE			78
-#define OPAL_RESYNC_TIMEBASE			79
-#define OPAL_CHECK_TOKEN			80
-#define OPAL_DUMP_INIT				81
-#define OPAL_DUMP_INFO				82
-#define OPAL_DUMP_READ				83
-#define OPAL_DUMP_ACK				84
-#define OPAL_GET_MSG				85
-#define OPAL_CHECK_ASYNC_COMPLETION		86
-#define OPAL_SYNC_HOST_REBOOT			87
-#define OPAL_SENSOR_READ			88
-#define OPAL_GET_PARAM				89
-#define OPAL_SET_PARAM				90
-#define OPAL_DUMP_RESEND			91
-#define OPAL_PCI_SET_PHB_CXL_MODE		93
-#define OPAL_DUMP_INFO2				94
-#define OPAL_PCI_ERR_INJECT			96
-#define OPAL_PCI_EEH_FREEZE_SET			97
-#define OPAL_HANDLE_HMI				98
-#define OPAL_CONFIG_CPU_IDLE_STATE		99
-#define OPAL_SLW_SET_REG			100
-#define OPAL_REGISTER_DUMP_REGION		101
-#define OPAL_UNREGISTER_DUMP_REGION		102
-#define OPAL_WRITE_TPO				103
-#define OPAL_READ_TPO				104
-#define OPAL_IPMI_SEND				107
-#define OPAL_IPMI_RECV				108
-#define OPAL_I2C_REQUEST			109
-
-/* Device tree flags */
-
-/* Flags set in power-mgmt nodes in device tree if
- * respective idle states are supported in the platform.
- */
-#define OPAL_PM_NAP_ENABLED	0x00010000
-#define OPAL_PM_SLEEP_ENABLED	0x00020000
-#define OPAL_PM_WINKLE_ENABLED	0x00040000
-#define OPAL_PM_SLEEP_ENABLED_ER1	0x00080000
+#include <asm/opal-api.h>
 
 #ifndef __ASSEMBLY__
 
 #include <linux/notifier.h>
 
-/* Other enums */
-enum OpalVendorApiTokens {
-	OPAL_START_VENDOR_API_RANGE = 1000, OPAL_END_VENDOR_API_RANGE = 1999
-};
-
-enum OpalFreezeState {
-	OPAL_EEH_STOPPED_NOT_FROZEN = 0,
-	OPAL_EEH_STOPPED_MMIO_FREEZE = 1,
-	OPAL_EEH_STOPPED_DMA_FREEZE = 2,
-	OPAL_EEH_STOPPED_MMIO_DMA_FREEZE = 3,
-	OPAL_EEH_STOPPED_RESET = 4,
-	OPAL_EEH_STOPPED_TEMP_UNAVAIL = 5,
-	OPAL_EEH_STOPPED_PERM_UNAVAIL = 6
-};
-
-enum OpalEehFreezeActionToken {
-	OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO = 1,
-	OPAL_EEH_ACTION_CLEAR_FREEZE_DMA = 2,
-	OPAL_EEH_ACTION_CLEAR_FREEZE_ALL = 3,
-
-	OPAL_EEH_ACTION_SET_FREEZE_MMIO = 1,
-	OPAL_EEH_ACTION_SET_FREEZE_DMA  = 2,
-	OPAL_EEH_ACTION_SET_FREEZE_ALL  = 3
-};
-
-enum OpalPciStatusToken {
-	OPAL_EEH_NO_ERROR	= 0,
-	OPAL_EEH_IOC_ERROR	= 1,
-	OPAL_EEH_PHB_ERROR	= 2,
-	OPAL_EEH_PE_ERROR	= 3,
-	OPAL_EEH_PE_MMIO_ERROR	= 4,
-	OPAL_EEH_PE_DMA_ERROR	= 5
-};
-
-enum OpalPciErrorSeverity {
-	OPAL_EEH_SEV_NO_ERROR	= 0,
-	OPAL_EEH_SEV_IOC_DEAD	= 1,
-	OPAL_EEH_SEV_PHB_DEAD	= 2,
-	OPAL_EEH_SEV_PHB_FENCED	= 3,
-	OPAL_EEH_SEV_PE_ER	= 4,
-	OPAL_EEH_SEV_INF	= 5
-};
-
-enum OpalErrinjectType {
-	OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR	= 0,
-	OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64	= 1,
-};
-
-enum OpalErrinjectFunc {
-	/* IOA bus specific errors */
-	OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR	= 0,
-	OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_DATA	= 1,
-	OPAL_ERR_INJECT_FUNC_IOA_LD_IO_ADDR	= 2,
-	OPAL_ERR_INJECT_FUNC_IOA_LD_IO_DATA	= 3,
-	OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_ADDR	= 4,
-	OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_DATA	= 5,
-	OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_ADDR	= 6,
-	OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_DATA	= 7,
-	OPAL_ERR_INJECT_FUNC_IOA_ST_IO_ADDR	= 8,
-	OPAL_ERR_INJECT_FUNC_IOA_ST_IO_DATA	= 9,
-	OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_ADDR	= 10,
-	OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_DATA	= 11,
-	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_ADDR	= 12,
-	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_DATA	= 13,
-	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_MASTER	= 14,
-	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_TARGET	= 15,
-	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_ADDR	= 16,
-	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_DATA	= 17,
-	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_MASTER	= 18,
-	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET	= 19,
-};
-
-enum OpalShpcAction {
-	OPAL_SHPC_GET_LINK_STATE = 0,
-	OPAL_SHPC_GET_SLOT_STATE = 1
-};
-
-enum OpalShpcLinkState {
-	OPAL_SHPC_LINK_DOWN = 0,
-	OPAL_SHPC_LINK_UP = 1
-};
-
-enum OpalMmioWindowType {
-	OPAL_M32_WINDOW_TYPE = 1,
-	OPAL_M64_WINDOW_TYPE = 2,
-	OPAL_IO_WINDOW_TYPE = 3
-};
-
-enum OpalShpcSlotState {
-	OPAL_SHPC_DEV_NOT_PRESENT = 0,
-	OPAL_SHPC_DEV_PRESENT = 1
-};
-
-enum OpalExceptionHandler {
-	OPAL_MACHINE_CHECK_HANDLER = 1,
-	OPAL_HYPERVISOR_MAINTENANCE_HANDLER = 2,
-	OPAL_SOFTPATCH_HANDLER = 3
-};
-
-enum OpalPendingState {
-	OPAL_EVENT_OPAL_INTERNAL	= 0x1,
-	OPAL_EVENT_NVRAM		= 0x2,
-	OPAL_EVENT_RTC			= 0x4,
-	OPAL_EVENT_CONSOLE_OUTPUT	= 0x8,
-	OPAL_EVENT_CONSOLE_INPUT	= 0x10,
-	OPAL_EVENT_ERROR_LOG_AVAIL	= 0x20,
-	OPAL_EVENT_ERROR_LOG		= 0x40,
-	OPAL_EVENT_EPOW			= 0x80,
-	OPAL_EVENT_LED_STATUS		= 0x100,
-	OPAL_EVENT_PCI_ERROR		= 0x200,
-	OPAL_EVENT_DUMP_AVAIL		= 0x400,
-	OPAL_EVENT_MSG_PENDING		= 0x800,
-};
-
-enum OpalMessageType {
-	OPAL_MSG_ASYNC_COMP = 0,	/* params[0] = token, params[1] = rc,
-					 * additional params function-specific
-					 */
-	OPAL_MSG_MEM_ERR,
-	OPAL_MSG_EPOW,
-	OPAL_MSG_SHUTDOWN,		/* params[0] = 1 reboot, 0 shutdown */
-	OPAL_MSG_HMI_EVT,
-	OPAL_MSG_TYPE_MAX,
-};
-
-enum OpalThreadStatus {
-	OPAL_THREAD_INACTIVE = 0x0,
-	OPAL_THREAD_STARTED = 0x1,
-	OPAL_THREAD_UNAVAILABLE = 0x2 /* opal-v3 */
-};
-
-enum OpalPciBusCompare {
-	OpalPciBusAny	= 0,	/* Any bus number match */
-	OpalPciBus3Bits	= 2,	/* Match top 3 bits of bus number */
-	OpalPciBus4Bits	= 3,	/* Match top 4 bits of bus number */
-	OpalPciBus5Bits	= 4,	/* Match top 5 bits of bus number */
-	OpalPciBus6Bits	= 5,	/* Match top 6 bits of bus number */
-	OpalPciBus7Bits	= 6,	/* Match top 7 bits of bus number */
-	OpalPciBusAll	= 7,	/* Match bus number exactly */
-};
-
-enum OpalDeviceCompare {
-	OPAL_IGNORE_RID_DEVICE_NUMBER = 0,
-	OPAL_COMPARE_RID_DEVICE_NUMBER = 1
-};
-
-enum OpalFuncCompare {
-	OPAL_IGNORE_RID_FUNCTION_NUMBER = 0,
-	OPAL_COMPARE_RID_FUNCTION_NUMBER = 1
-};
-
-enum OpalPeAction {
-	OPAL_UNMAP_PE = 0,
-	OPAL_MAP_PE = 1
-};
-
-enum OpalPeltvAction {
-	OPAL_REMOVE_PE_FROM_DOMAIN = 0,
-	OPAL_ADD_PE_TO_DOMAIN = 1
-};
-
-enum OpalMveEnableAction {
-	OPAL_DISABLE_MVE = 0,
-	OPAL_ENABLE_MVE = 1
-};
-
-enum OpalM64EnableAction {
-	OPAL_DISABLE_M64 = 0,
-	OPAL_ENABLE_M64_SPLIT = 1,
-	OPAL_ENABLE_M64_NON_SPLIT = 2
-};
-
-enum OpalPciResetScope {
-	OPAL_RESET_PHB_COMPLETE		= 1,
-	OPAL_RESET_PCI_LINK		= 2,
-	OPAL_RESET_PHB_ERROR		= 3,
-	OPAL_RESET_PCI_HOT		= 4,
-	OPAL_RESET_PCI_FUNDAMENTAL	= 5,
-	OPAL_RESET_PCI_IODA_TABLE	= 6
-};
-
-enum OpalPciReinitScope {
-	OPAL_REINIT_PCI_DEV = 1000
-};
-
-enum OpalPciResetState {
-	OPAL_DEASSERT_RESET = 0,
-	OPAL_ASSERT_RESET = 1
-};
-
-enum OpalPciMaskAction {
-	OPAL_UNMASK_ERROR_TYPE = 0,
-	OPAL_MASK_ERROR_TYPE = 1
-};
-
-enum OpalSlotLedType {
-	OPAL_SLOT_LED_ID_TYPE = 0,
-	OPAL_SLOT_LED_FAULT_TYPE = 1
-};
-
-enum OpalLedAction {
-	OPAL_TURN_OFF_LED = 0,
-	OPAL_TURN_ON_LED = 1,
-	OPAL_QUERY_LED_STATE_AFTER_BUSY = 2
-};
-
-enum OpalEpowStatus {
-	OPAL_EPOW_NONE = 0,
-	OPAL_EPOW_UPS = 1,
-	OPAL_EPOW_OVER_AMBIENT_TEMP = 2,
-	OPAL_EPOW_OVER_INTERNAL_TEMP = 3
-};
-
-/*
- * Address cycle types for LPC accesses. These also correspond
- * to the content of the first cell of the "reg" property for
- * device nodes on the LPC bus
- */
-enum OpalLPCAddressType {
-	OPAL_LPC_MEM	= 0,
-	OPAL_LPC_IO	= 1,
-	OPAL_LPC_FW	= 2,
-};
-
-/* System parameter permission */
-enum OpalSysparamPerm {
-	OPAL_SYSPARAM_READ      = 0x1,
-	OPAL_SYSPARAM_WRITE     = 0x2,
-	OPAL_SYSPARAM_RW        = (OPAL_SYSPARAM_READ | OPAL_SYSPARAM_WRITE),
-};
-
-struct opal_msg {
-	__be32 msg_type;
-	__be32 reserved;
-	__be64 params[8];
-};
-
-enum {
-	OPAL_IPMI_MSG_FORMAT_VERSION_1 = 1,
-};
-
-struct opal_ipmi_msg {
-	uint8_t		version;
-	uint8_t		netfn;
-	uint8_t		cmd;
-	uint8_t		data[];
-};
-
-/* FSP memory errors handling */
-enum OpalMemErr_Version {
-	OpalMemErr_V1 = 1,
-};
-
-enum OpalMemErrType {
-	OPAL_MEM_ERR_TYPE_RESILIENCE	= 0,
-	OPAL_MEM_ERR_TYPE_DYN_DALLOC,
-	OPAL_MEM_ERR_TYPE_SCRUB,
-};
-
-/* Memory Reilience error type */
-enum OpalMemErr_ResilErrType {
-	OPAL_MEM_RESILIENCE_CE		= 0,
-	OPAL_MEM_RESILIENCE_UE,
-	OPAL_MEM_RESILIENCE_UE_SCRUB,
-};
-
-/* Dynamic Memory Deallocation type */
-enum OpalMemErr_DynErrType {
-	OPAL_MEM_DYNAMIC_DEALLOC	= 0,
-};
-
-/* OpalMemoryErrorData->flags */
-#define OPAL_MEM_CORRECTED_ERROR	0x0001
-#define OPAL_MEM_THRESHOLD_EXCEEDED	0x0002
-#define OPAL_MEM_ACK_REQUIRED		0x8000
-
-struct OpalMemoryErrorData {
-	enum OpalMemErr_Version	version:8;	/* 0x00 */
-	enum OpalMemErrType	type:8;		/* 0x01 */
-	__be16			flags;		/* 0x02 */
-	uint8_t			reserved_1[4];	/* 0x04 */
-
-	union {
-		/* Memory Resilience corrected/uncorrected error info */
-		struct {
-			enum OpalMemErr_ResilErrType resil_err_type:8;
-			uint8_t		reserved_1[7];
-			__be64		physical_address_start;
-			__be64		physical_address_end;
-		} resilience;
-		/* Dynamic memory deallocation error info */
-		struct {
-			enum OpalMemErr_DynErrType dyn_err_type:8;
-			uint8_t		reserved_1[7];
-			__be64		physical_address_start;
-			__be64		physical_address_end;
-		} dyn_dealloc;
-	} u;
-};
-
-/* HMI interrupt event */
-enum OpalHMI_Version {
-	OpalHMIEvt_V1 = 1,
-};
-
-enum OpalHMI_Severity {
-	OpalHMI_SEV_NO_ERROR = 0,
-	OpalHMI_SEV_WARNING = 1,
-	OpalHMI_SEV_ERROR_SYNC = 2,
-	OpalHMI_SEV_FATAL = 3,
-};
-
-enum OpalHMI_Disposition {
-	OpalHMI_DISPOSITION_RECOVERED = 0,
-	OpalHMI_DISPOSITION_NOT_RECOVERED = 1,
-};
-
-enum OpalHMI_ErrType {
-	OpalHMI_ERROR_MALFUNC_ALERT	= 0,
-	OpalHMI_ERROR_PROC_RECOV_DONE,
-	OpalHMI_ERROR_PROC_RECOV_DONE_AGAIN,
-	OpalHMI_ERROR_PROC_RECOV_MASKED,
-	OpalHMI_ERROR_TFAC,
-	OpalHMI_ERROR_TFMR_PARITY,
-	OpalHMI_ERROR_HA_OVERFLOW_WARN,
-	OpalHMI_ERROR_XSCOM_FAIL,
-	OpalHMI_ERROR_XSCOM_DONE,
-	OpalHMI_ERROR_SCOM_FIR,
-	OpalHMI_ERROR_DEBUG_TRIG_FIR,
-	OpalHMI_ERROR_HYP_RESOURCE,
-};
-
-struct OpalHMIEvent {
-	uint8_t		version;	/* 0x00 */
-	uint8_t		severity;	/* 0x01 */
-	uint8_t		type;		/* 0x02 */
-	uint8_t		disposition;	/* 0x03 */
-	uint8_t		reserved_1[4];	/* 0x04 */
-
-	__be64		hmer;
-	/* TFMR register. Valid only for TFAC and TFMR_PARITY error type. */
-	__be64		tfmr;
-};
-
-enum {
-	OPAL_P7IOC_DIAG_TYPE_NONE	= 0,
-	OPAL_P7IOC_DIAG_TYPE_RGC	= 1,
-	OPAL_P7IOC_DIAG_TYPE_BI		= 2,
-	OPAL_P7IOC_DIAG_TYPE_CI		= 3,
-	OPAL_P7IOC_DIAG_TYPE_MISC	= 4,
-	OPAL_P7IOC_DIAG_TYPE_I2C	= 5,
-	OPAL_P7IOC_DIAG_TYPE_LAST	= 6
-};
-
-struct OpalIoP7IOCErrorData {
-	__be16 type;
-
-	/* GEM */
-	__be64 gemXfir;
-	__be64 gemRfir;
-	__be64 gemRirqfir;
-	__be64 gemMask;
-	__be64 gemRwof;
-
-	/* LEM */
-	__be64 lemFir;
-	__be64 lemErrMask;
-	__be64 lemAction0;
-	__be64 lemAction1;
-	__be64 lemWof;
-
-	union {
-		struct OpalIoP7IOCRgcErrorData {
-			__be64 rgcStatus;	/* 3E1C10 */
-			__be64 rgcLdcp;		/* 3E1C18 */
-		}rgc;
-		struct OpalIoP7IOCBiErrorData {
-			__be64 biLdcp0;		/* 3C0100, 3C0118 */
-			__be64 biLdcp1;		/* 3C0108, 3C0120 */
-			__be64 biLdcp2;		/* 3C0110, 3C0128 */
-			__be64 biFenceStatus;	/* 3C0130, 3C0130 */
-
-			    u8 biDownbound;	/* BI Downbound or Upbound */
-		}bi;
-		struct OpalIoP7IOCCiErrorData {
-			__be64 ciPortStatus;	/* 3Dn008 */
-			__be64 ciPortLdcp;	/* 3Dn010 */
-
-			    u8 ciPort;		/* Index of CI port: 0/1 */
-		}ci;
-	};
-};
-
-/**
- * This structure defines the overlay which will be used to store PHB error
- * data upon request.
- */
-enum {
-	OPAL_PHB_ERROR_DATA_VERSION_1 = 1,
-};
-
-enum {
-	OPAL_PHB_ERROR_DATA_TYPE_P7IOC = 1,
-	OPAL_PHB_ERROR_DATA_TYPE_PHB3 = 2
-};
-
-enum {
-	OPAL_P7IOC_NUM_PEST_REGS = 128,
-	OPAL_PHB3_NUM_PEST_REGS = 256
-};
-
-/* CAPI modes for PHB */
-enum {
-	OPAL_PHB_CAPI_MODE_PCIE         = 0,
-	OPAL_PHB_CAPI_MODE_CAPI         = 1,
-	OPAL_PHB_CAPI_MODE_SNOOP_OFF    = 2,
-	OPAL_PHB_CAPI_MODE_SNOOP_ON     = 3,
-};
-
-struct OpalIoPhbErrorCommon {
-	__be32 version;
-	__be32 ioType;
-	__be32 len;
-};
-
-struct OpalIoP7IOCPhbErrorData {
-	struct OpalIoPhbErrorCommon common;
-
-	__be32 brdgCtl;
-
-	// P7IOC utl regs
-	__be32 portStatusReg;
-	__be32 rootCmplxStatus;
-	__be32 busAgentStatus;
-
-	// P7IOC cfg regs
-	__be32 deviceStatus;
-	__be32 slotStatus;
-	__be32 linkStatus;
-	__be32 devCmdStatus;
-	__be32 devSecStatus;
-
-	// cfg AER regs
-	__be32 rootErrorStatus;
-	__be32 uncorrErrorStatus;
-	__be32 corrErrorStatus;
-	__be32 tlpHdr1;
-	__be32 tlpHdr2;
-	__be32 tlpHdr3;
-	__be32 tlpHdr4;
-	__be32 sourceId;
-
-	__be32 rsv3;
-
-	// Record data about the call to allocate a buffer.
-	__be64 errorClass;
-	__be64 correlator;
-
-	//P7IOC MMIO Error Regs
-	__be64 p7iocPlssr;                // n120
-	__be64 p7iocCsr;                  // n110
-	__be64 lemFir;                    // nC00
-	__be64 lemErrorMask;              // nC18
-	__be64 lemWOF;                    // nC40
-	__be64 phbErrorStatus;            // nC80
-	__be64 phbFirstErrorStatus;       // nC88
-	__be64 phbErrorLog0;              // nCC0
-	__be64 phbErrorLog1;              // nCC8
-	__be64 mmioErrorStatus;           // nD00
-	__be64 mmioFirstErrorStatus;      // nD08
-	__be64 mmioErrorLog0;             // nD40
-	__be64 mmioErrorLog1;             // nD48
-	__be64 dma0ErrorStatus;           // nD80
-	__be64 dma0FirstErrorStatus;      // nD88
-	__be64 dma0ErrorLog0;             // nDC0
-	__be64 dma0ErrorLog1;             // nDC8
-	__be64 dma1ErrorStatus;           // nE00
-	__be64 dma1FirstErrorStatus;      // nE08
-	__be64 dma1ErrorLog0;             // nE40
-	__be64 dma1ErrorLog1;             // nE48
-	__be64 pestA[OPAL_P7IOC_NUM_PEST_REGS];
-	__be64 pestB[OPAL_P7IOC_NUM_PEST_REGS];
-};
-
-struct OpalIoPhb3ErrorData {
-	struct OpalIoPhbErrorCommon common;
-
-	__be32 brdgCtl;
-
-	/* PHB3 UTL regs */
-	__be32 portStatusReg;
-	__be32 rootCmplxStatus;
-	__be32 busAgentStatus;
-
-	/* PHB3 cfg regs */
-	__be32 deviceStatus;
-	__be32 slotStatus;
-	__be32 linkStatus;
-	__be32 devCmdStatus;
-	__be32 devSecStatus;
-
-	/* cfg AER regs */
-	__be32 rootErrorStatus;
-	__be32 uncorrErrorStatus;
-	__be32 corrErrorStatus;
-	__be32 tlpHdr1;
-	__be32 tlpHdr2;
-	__be32 tlpHdr3;
-	__be32 tlpHdr4;
-	__be32 sourceId;
-
-	__be32 rsv3;
-
-	/* Record data about the call to allocate a buffer */
-	__be64 errorClass;
-	__be64 correlator;
-
-	__be64 nFir;			/* 000 */
-	__be64 nFirMask;		/* 003 */
-	__be64 nFirWOF;		/* 008 */
-
-	/* PHB3 MMIO Error Regs */
-	__be64 phbPlssr;		/* 120 */
-	__be64 phbCsr;		/* 110 */
-	__be64 lemFir;		/* C00 */
-	__be64 lemErrorMask;		/* C18 */
-	__be64 lemWOF;		/* C40 */
-	__be64 phbErrorStatus;	/* C80 */
-	__be64 phbFirstErrorStatus;	/* C88 */
-	__be64 phbErrorLog0;		/* CC0 */
-	__be64 phbErrorLog1;		/* CC8 */
-	__be64 mmioErrorStatus;	/* D00 */
-	__be64 mmioFirstErrorStatus;	/* D08 */
-	__be64 mmioErrorLog0;		/* D40 */
-	__be64 mmioErrorLog1;		/* D48 */
-	__be64 dma0ErrorStatus;	/* D80 */
-	__be64 dma0FirstErrorStatus;	/* D88 */
-	__be64 dma0ErrorLog0;		/* DC0 */
-	__be64 dma0ErrorLog1;		/* DC8 */
-	__be64 dma1ErrorStatus;	/* E00 */
-	__be64 dma1FirstErrorStatus;	/* E08 */
-	__be64 dma1ErrorLog0;		/* E40 */
-	__be64 dma1ErrorLog1;		/* E48 */
-	__be64 pestA[OPAL_PHB3_NUM_PEST_REGS];
-	__be64 pestB[OPAL_PHB3_NUM_PEST_REGS];
-};
-
-enum {
-	OPAL_REINIT_CPUS_HILE_BE	= (1 << 0),
-	OPAL_REINIT_CPUS_HILE_LE	= (1 << 1),
-};
-
-typedef struct oppanel_line {
-	const char * 	line;
-	uint64_t 	line_len;
-} oppanel_line_t;
-
-/* OPAL I2C request */
-struct opal_i2c_request {
-	uint8_t	type;
-#define OPAL_I2C_RAW_READ	0
-#define OPAL_I2C_RAW_WRITE	1
-#define OPAL_I2C_SM_READ	2
-#define OPAL_I2C_SM_WRITE	3
-	uint8_t flags;
-#define OPAL_I2C_ADDR_10	0x01	/* Not supported yet */
-	uint8_t	subaddr_sz;		/* Max 4 */
-	uint8_t reserved;
-	__be16 addr;			/* 7 or 10 bit address */
-	__be16 reserved2;
-	__be32 subaddr;		/* Sub-address if any */
-	__be32 size;			/* Data size */
-	__be64 buffer_ra;		/* Buffer real address */
-};
+/* We calculate number of sg entries based on PAGE_SIZE */
+#define SG_ENTRIES_PER_NODE ((PAGE_SIZE - 16) / sizeof(struct opal_sg_entry))
 
 /* /sys/firmware/opal */
 extern struct kobject *opal_kobj;
@@ -932,6 +194,13 @@ int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
 int64_t opal_i2c_request(uint64_t async_token, uint32_t bus_id,
 			 struct opal_i2c_request *oreq);
 
+int64_t opal_flash_read(uint64_t id, uint64_t offset, uint64_t buf,
+		uint64_t size, uint64_t token);
+int64_t opal_flash_write(uint64_t id, uint64_t offset, uint64_t buf,
+		uint64_t size, uint64_t token);
+int64_t opal_flash_erase(uint64_t id, uint64_t offset, uint64_t size,
+		uint64_t token);
+
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
 				   int depth, void *data);
@@ -946,8 +215,10 @@ extern void hvc_opal_init_early(void);
 extern int opal_notifier_register(struct notifier_block *nb);
 extern int opal_notifier_unregister(struct notifier_block *nb);
 
-extern int opal_message_notifier_register(enum OpalMessageType msg_type,
+extern int opal_message_notifier_register(enum opal_msg_type msg_type,
 						struct notifier_block *nb);
+extern int opal_message_notifier_unregister(enum opal_msg_type msg_type,
+					    struct notifier_block *nb);
 extern void opal_notifier_enable(void);
 extern void opal_notifier_disable(void);
 extern void opal_notifier_update_evt(uint64_t evt_mask, uint64_t evt_val);
@@ -962,7 +233,7 @@ extern int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data);
 struct rtc_time;
 extern unsigned long opal_get_boot_time(void);
 extern void opal_nvram_init(void);
-extern void opal_flash_init(void);
+extern void opal_flash_update_init(void);
 extern void opal_flash_term_callback(void);
 extern int opal_elog_init(void);
 extern void opal_platform_dump_init(void);
@@ -983,13 +254,8 @@ struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
 					     unsigned long vmalloc_size);
 void opal_free_sg_list(struct opal_sg_list *sg);
 
-/*
- * Dump region ID range usable by the OS
- */
-#define OPAL_DUMP_REGION_HOST_START		0x80
-#define OPAL_DUMP_REGION_LOG_BUF		0x80
-#define OPAL_DUMP_REGION_HOST_END		0xFF
+extern int opal_error_code(int rc);
 
 #endif /* __ASSEMBLY__ */
 
-#endif /* __OPAL_H */
+#endif /* _ASM_POWERPC_OPAL_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index e5f22c6c4bf9..70bd4381f8e6 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -106,9 +106,9 @@ struct paca_struct {
 #endif /* CONFIG_PPC_STD_MMU_64 */
 
 #ifdef CONFIG_PPC_BOOK3E
-	u64 exgen[8] __attribute__((aligned(0x80)));
+	u64 exgen[8] __aligned(0x40);
 	/* Keep pgd in the same cacheline as the start of extlb */
-	pgd_t *pgd __attribute__((aligned(0x80))); /* Current PGD */
+	pgd_t *pgd __aligned(0x40); /* Current PGD */
 	pgd_t *kernel_pgd;		/* Kernel PGD */
 
 	/* Shared by all threads of a core -- points to tcd of first thread */
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 546d036fe925..1811c44bf34b 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -15,6 +15,24 @@
 struct device_node;
 
 /*
+ * PCI controller operations
+ */
+struct pci_controller_ops {
+	void		(*dma_dev_setup)(struct pci_dev *dev);
+	void		(*dma_bus_setup)(struct pci_bus *bus);
+
+	int		(*probe_mode)(struct pci_bus *);
+
+	/* Called when pci_enable_device() is called. Returns true to
+	 * allow assignment/enabling of the device. */
+	bool		(*enable_device_hook)(struct pci_dev *);
+
+	/* Called during PCI resource reassignment */
+	resource_size_t (*window_alignment)(struct pci_bus *, unsigned long type);
+	void		(*reset_secondary_bus)(struct pci_dev *dev);
+};
+
+/*
  * Structure of a PCI controller (host bridge)
  */
 struct pci_controller {
@@ -46,6 +64,7 @@ struct pci_controller {
 	resource_size_t	isa_mem_phys;
 	resource_size_t	isa_mem_size;
 
+	struct pci_controller_ops controller_ops;
 	struct pci_ops *ops;
 	unsigned int __iomem *cfg_addr;
 	void __iomem *cfg_data;
@@ -89,6 +108,7 @@ struct pci_controller {
 
 #ifdef CONFIG_PPC64
 	unsigned long buid;
+	struct pci_dn *pci_data;
 #endif	/* CONFIG_PPC64 */
 
 	void *private_data;
@@ -154,31 +174,51 @@ static inline int isa_vaddr_is_ioport(void __iomem *address)
 struct iommu_table;
 
 struct pci_dn {
+	int     flags;
+#define PCI_DN_FLAG_IOV_VF	0x01
+
 	int	busno;			/* pci bus number */
 	int	devfn;			/* pci device and function number */
+	int	vendor_id;		/* Vendor ID */
+	int	device_id;		/* Device ID */
+	int	class_code;		/* Device class code */
 
+	struct  pci_dn *parent;
 	struct  pci_controller *phb;	/* for pci devices */
 	struct	iommu_table *iommu_table;	/* for phb's or bridges */
 	struct	device_node *node;	/* back-pointer to the device_node */
 
 	int	pci_ext_config_space;	/* for pci devices */
 
-	struct	pci_dev *pcidev;	/* back-pointer to the pci device */
 #ifdef CONFIG_EEH
 	struct eeh_dev *edev;		/* eeh device */
 #endif
 #define IODA_INVALID_PE		(-1)
 #ifdef CONFIG_PPC_POWERNV
 	int	pe_number;
+#ifdef CONFIG_PCI_IOV
+	u16     vfs_expanded;		/* number of VFs IOV BAR expanded */
+	u16     num_vfs;		/* number of VFs enabled*/
+	int     offset;			/* PE# for the first VF PE */
+#define M64_PER_IOV 4
+	int     m64_per_iov;
+#define IODA_INVALID_M64        (-1)
+	int     m64_wins[PCI_SRIOV_NUM_BARS][M64_PER_IOV];
+#endif /* CONFIG_PCI_IOV */
 #endif
+	struct list_head child_list;
+	struct list_head list;
 };
 
 /* Get the pointer to a device_node's pci_dn */
 #define PCI_DN(dn)	((struct pci_dn *) (dn)->data)
 
+extern struct pci_dn *pci_get_pdn_by_devfn(struct pci_bus *bus,
+					   int devfn);
 extern struct pci_dn *pci_get_pdn(struct pci_dev *pdev);
-
-extern void * update_dn_pci_info(struct device_node *dn, void *data);
+extern struct pci_dn *add_dev_pci_data(struct pci_dev *pdev);
+extern void remove_dev_pci_data(struct pci_dev *pdev);
+extern void *update_dn_pci_info(struct device_node *dn, void *data);
 
 static inline int pci_device_from_OF_node(struct device_node *np,
 					  u8 *bus, u8 *devfn)
@@ -191,20 +231,12 @@ static inline int pci_device_from_OF_node(struct device_node *np,
 }
 
 #if defined(CONFIG_EEH)
-static inline struct eeh_dev *of_node_to_eeh_dev(struct device_node *dn)
+static inline struct eeh_dev *pdn_to_eeh_dev(struct pci_dn *pdn)
 {
-	/*
-	 * For those OF nodes whose parent isn't PCI bridge, they
-	 * don't have PCI_DN actually. So we have to skip them for
-	 * any EEH operations.
-	 */
-	if (!dn || !PCI_DN(dn))
-		return NULL;
-
-	return PCI_DN(dn)->edev;
+	return pdn ? pdn->edev : NULL;
 }
 #else
-#define of_node_to_eeh_dev(x) (NULL)
+#define pdn_to_eeh_dev(x)	(NULL)
 #endif
 
 /** Find the bus corresponding to the indicated device node */
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 1b0739bc14b5..4aef8d660999 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -22,7 +22,7 @@
 
 #include <asm-generic/pci-dma-compat.h>
 
-/* Return values for ppc_md.pci_probe_mode function */
+/* Return values for pci_controller_ops.probe_mode function */
 #define PCI_PROBE_NONE		-1	/* Don't look at this bus at all */
 #define PCI_PROBE_NORMAL	0	/* Do normal PCI probing */
 #define PCI_PROBE_DEVTREE	1	/* Instantiate from device tree */
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 03cd858a401c..5c93f691b495 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -153,6 +153,7 @@
 #define PPC_INST_MFSPR_PVR_MASK		0xfc1fffff
 #define PPC_INST_MFTMR			0x7c0002dc
 #define PPC_INST_MSGSND			0x7c00019c
+#define PPC_INST_MSGCLR			0x7c0001dc
 #define PPC_INST_MSGSNDP		0x7c00011c
 #define PPC_INST_MTTMR			0x7c0003dc
 #define PPC_INST_NOP			0x60000000
@@ -212,6 +213,8 @@
 #define PPC_INST_LWZ			0x80000000
 #define PPC_INST_STD			0xf8000000
 #define PPC_INST_STDU			0xf8000001
+#define PPC_INST_STW			0x90000000
+#define PPC_INST_STWU			0x94000000
 #define PPC_INST_MFLR			0x7c0802a6
 #define PPC_INST_MTLR			0x7c0803a6
 #define PPC_INST_CMPWI			0x2c000000
@@ -309,6 +312,8 @@
 					___PPC_RB(b) | __PPC_EH(eh))
 #define PPC_MSGSND(b)		stringify_in_c(.long PPC_INST_MSGSND | \
 					___PPC_RB(b))
+#define PPC_MSGCLR(b)		stringify_in_c(.long PPC_INST_MSGCLR | \
+					___PPC_RB(b))
 #define PPC_MSGSNDP(b)		stringify_in_c(.long PPC_INST_MSGSNDP | \
 					___PPC_RB(b))
 #define PPC_POPCNTB(a, s)	stringify_in_c(.long PPC_INST_POPCNTB | \
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index db1e2b8eff3c..4122a86d6858 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -23,8 +23,6 @@ extern void pci_setup_phb_io_dynamic(struct pci_controller *hose, int primary);
 
 extern struct list_head hose_list;
 
-extern void find_and_init_phbs(void);
-
 extern struct pci_dev *isa_bridge_pcidev;	/* may be NULL if no ISA bus */
 
 /** Bus Unit ID macros; get low and hi 32-bits of the 64-bit BUID */
@@ -33,9 +31,14 @@ extern struct pci_dev *isa_bridge_pcidev;	/* may be NULL if no ISA bus */
 
 /* PCI device_node operations */
 struct device_node;
+struct pci_dn;
+
 typedef void *(*traverse_func)(struct device_node *me, void *data);
 void *traverse_pci_devices(struct device_node *start, traverse_func pre,
 		void *data);
+void *traverse_pci_dn(struct pci_dn *root,
+		      void *(*fn)(struct pci_dn *, void *),
+		      void *data);
 
 extern void pci_devs_phb_init(void);
 extern void pci_devs_phb_init_dynamic(struct pci_controller *phb);
@@ -76,7 +79,6 @@ static inline const char *eeh_driver_name(struct pci_dev *pdev)
 #endif /* CONFIG_EEH */
 
 #else /* CONFIG_PCI */
-static inline void find_and_init_phbs(void) { }
 static inline void init_pci_config_tokens(void) { }
 #endif /* !CONFIG_PCI */
 
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 7e4612528546..dd0fc18d8103 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -637,105 +637,105 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,945)
 
 /* AltiVec Registers (VPRs) */
 
-#define	vr0	0
-#define	vr1	1
-#define	vr2	2
-#define	vr3	3
-#define	vr4	4
-#define	vr5	5
-#define	vr6	6
-#define	vr7	7
-#define	vr8	8
-#define	vr9	9
-#define	vr10	10
-#define	vr11	11
-#define	vr12	12
-#define	vr13	13
-#define	vr14	14
-#define	vr15	15
-#define	vr16	16
-#define	vr17	17
-#define	vr18	18
-#define	vr19	19
-#define	vr20	20
-#define	vr21	21
-#define	vr22	22
-#define	vr23	23
-#define	vr24	24
-#define	vr25	25
-#define	vr26	26
-#define	vr27	27
-#define	vr28	28
-#define	vr29	29
-#define	vr30	30
-#define	vr31	31
+#define	v0	0
+#define	v1	1
+#define	v2	2
+#define	v3	3
+#define	v4	4
+#define	v5	5
+#define	v6	6
+#define	v7	7
+#define	v8	8
+#define	v9	9
+#define	v10	10
+#define	v11	11
+#define	v12	12
+#define	v13	13
+#define	v14	14
+#define	v15	15
+#define	v16	16
+#define	v17	17
+#define	v18	18
+#define	v19	19
+#define	v20	20
+#define	v21	21
+#define	v22	22
+#define	v23	23
+#define	v24	24
+#define	v25	25
+#define	v26	26
+#define	v27	27
+#define	v28	28
+#define	v29	29
+#define	v30	30
+#define	v31	31
 
 /* VSX Registers (VSRs) */
 
-#define	vsr0	0
-#define	vsr1	1
-#define	vsr2	2
-#define	vsr3	3
-#define	vsr4	4
-#define	vsr5	5
-#define	vsr6	6
-#define	vsr7	7
-#define	vsr8	8
-#define	vsr9	9
-#define	vsr10	10
-#define	vsr11	11
-#define	vsr12	12
-#define	vsr13	13
-#define	vsr14	14
-#define	vsr15	15
-#define	vsr16	16
-#define	vsr17	17
-#define	vsr18	18
-#define	vsr19	19
-#define	vsr20	20
-#define	vsr21	21
-#define	vsr22	22
-#define	vsr23	23
-#define	vsr24	24
-#define	vsr25	25
-#define	vsr26	26
-#define	vsr27	27
-#define	vsr28	28
-#define	vsr29	29
-#define	vsr30	30
-#define	vsr31	31
-#define	vsr32	32
-#define	vsr33	33
-#define	vsr34	34
-#define	vsr35	35
-#define	vsr36	36
-#define	vsr37	37
-#define	vsr38	38
-#define	vsr39	39
-#define	vsr40	40
-#define	vsr41	41
-#define	vsr42	42
-#define	vsr43	43
-#define	vsr44	44
-#define	vsr45	45
-#define	vsr46	46
-#define	vsr47	47
-#define	vsr48	48
-#define	vsr49	49
-#define	vsr50	50
-#define	vsr51	51
-#define	vsr52	52
-#define	vsr53	53
-#define	vsr54	54
-#define	vsr55	55
-#define	vsr56	56
-#define	vsr57	57
-#define	vsr58	58
-#define	vsr59	59
-#define	vsr60	60
-#define	vsr61	61
-#define	vsr62	62
-#define	vsr63	63
+#define	vs0	0
+#define	vs1	1
+#define	vs2	2
+#define	vs3	3
+#define	vs4	4
+#define	vs5	5
+#define	vs6	6
+#define	vs7	7
+#define	vs8	8
+#define	vs9	9
+#define	vs10	10
+#define	vs11	11
+#define	vs12	12
+#define	vs13	13
+#define	vs14	14
+#define	vs15	15
+#define	vs16	16
+#define	vs17	17
+#define	vs18	18
+#define	vs19	19
+#define	vs20	20
+#define	vs21	21
+#define	vs22	22
+#define	vs23	23
+#define	vs24	24
+#define	vs25	25
+#define	vs26	26
+#define	vs27	27
+#define	vs28	28
+#define	vs29	29
+#define	vs30	30
+#define	vs31	31
+#define	vs32	32
+#define	vs33	33
+#define	vs34	34
+#define	vs35	35
+#define	vs36	36
+#define	vs37	37
+#define	vs38	38
+#define	vs39	39
+#define	vs40	40
+#define	vs41	41
+#define	vs42	42
+#define	vs43	43
+#define	vs44	44
+#define	vs45	45
+#define	vs46	46
+#define	vs47	47
+#define	vs48	48
+#define	vs49	49
+#define	vs50	50
+#define	vs51	51
+#define	vs52	52
+#define	vs53	53
+#define	vs54	54
+#define	vs55	55
+#define	vs56	56
+#define	vs57	57
+#define	vs58	58
+#define	vs59	59
+#define	vs60	60
+#define	vs61	61
+#define	vs62	62
+#define	vs63	63
 
 /* SPE Registers (EVPRs) */
 
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 1c874fb533bb..af56b5c6c81a 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -608,13 +608,16 @@
 #define   SRR1_ISI_N_OR_G	0x10000000 /* ISI: Access is no-exec or G */
 #define   SRR1_ISI_PROT		0x08000000 /* ISI: Other protection fault */
 #define   SRR1_WAKEMASK		0x00380000 /* reason for wakeup */
+#define   SRR1_WAKEMASK_P8	0x003c0000 /* reason for wakeup on POWER8 */
 #define   SRR1_WAKESYSERR	0x00300000 /* System error */
 #define   SRR1_WAKEEE		0x00200000 /* External interrupt */
 #define   SRR1_WAKEMT		0x00280000 /* mtctrl */
 #define	  SRR1_WAKEHMI		0x00280000 /* Hypervisor maintenance */
 #define   SRR1_WAKEDEC		0x00180000 /* Decrementer interrupt */
+#define   SRR1_WAKEDBELL	0x00140000 /* Privileged doorbell on P8 */
 #define   SRR1_WAKETHERM	0x00100000 /* Thermal management interrupt */
 #define	  SRR1_WAKERESET	0x00100000 /* System reset */
+#define   SRR1_WAKEHDBELL	0x000c0000 /* Hypervisor doorbell on P8 */
 #define	  SRR1_WAKESTATE	0x00030000 /* Powersave exit mask [46:47] */
 #define	  SRR1_WS_DEEPEST	0x00030000 /* Some resources not maintained,
 					  * may not be recoverable */
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 2e23e92a4372..7a4ede16b283 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -4,6 +4,7 @@
 
 #include <linux/spinlock.h>
 #include <asm/page.h>
+#include <linux/time.h>
 
 /*
  * Definitions for talking to the RTAS on CHRP machines.
@@ -273,6 +274,7 @@ inline uint32_t rtas_ext_event_company_id(struct rtas_ext_event_log_v6 *ext_log)
 #define PSERIES_ELOG_SECT_ID_MANUFACT_INFO	(('M' << 8) | 'I')
 #define PSERIES_ELOG_SECT_ID_CALL_HOME		(('C' << 8) | 'H')
 #define PSERIES_ELOG_SECT_ID_USER_DEF		(('U' << 8) | 'D')
+#define PSERIES_ELOG_SECT_ID_HOTPLUG		(('H' << 8) | 'P')
 
 /* Vendor specific Platform Event Log Format, Version 6, section header */
 struct pseries_errorlog {
@@ -296,6 +298,31 @@ inline uint16_t pseries_errorlog_length(struct pseries_errorlog *sect)
 	return be16_to_cpu(sect->length);
 }
 
+/* RTAS pseries hotplug errorlog section */
+struct pseries_hp_errorlog {
+	u8	resource;
+	u8	action;
+	u8	id_type;
+	u8	reserved;
+	union {
+		__be32	drc_index;
+		__be32	drc_count;
+		char	drc_name[1];
+	} _drc_u;
+};
+
+#define PSERIES_HP_ELOG_RESOURCE_CPU	1
+#define PSERIES_HP_ELOG_RESOURCE_MEM	2
+#define PSERIES_HP_ELOG_RESOURCE_SLOT	3
+#define PSERIES_HP_ELOG_RESOURCE_PHB	4
+
+#define PSERIES_HP_ELOG_ACTION_ADD	1
+#define PSERIES_HP_ELOG_ACTION_REMOVE	2
+
+#define PSERIES_HP_ELOG_ID_DRC_NAME	1
+#define PSERIES_HP_ELOG_ID_DRC_INDEX	2
+#define PSERIES_HP_ELOG_ID_DRC_COUNT	3
+
 struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
 					      uint16_t section_id);
 
@@ -327,7 +354,7 @@ extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data);
 extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data);
 extern int rtas_online_cpus_mask(cpumask_var_t cpus);
 extern int rtas_offline_cpus_mask(cpumask_var_t cpus);
-extern int rtas_ibm_suspend_me(u64 handle, int *vasi_return);
+extern int rtas_ibm_suspend_me(u64 handle);
 
 struct rtc_time;
 extern unsigned long rtas_get_boot_time(void);
@@ -343,8 +370,12 @@ extern int early_init_dt_scan_rtas(unsigned long node,
 extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal);
 
 #ifdef CONFIG_PPC_PSERIES
+extern time64_t last_rtas_event;
+extern int clobbering_unread_rtas_event(void);
 extern int pseries_devicetree_update(s32 scope);
 extern void post_mobility_fixup(void);
+#else
+static inline int clobbering_unread_rtas_event(void) { return 0; }
 #endif
 
 #ifdef CONFIG_PPC_RTAS_DAEMON
diff --git a/arch/powerpc/include/asm/seccomp.h b/arch/powerpc/include/asm/seccomp.h
new file mode 100644
index 000000000000..c1818e35cf02
--- /dev/null
+++ b/arch/powerpc/include/asm/seccomp.h
@@ -0,0 +1,10 @@
+#ifndef _ASM_POWERPC_SECCOMP_H
+#define _ASM_POWERPC_SECCOMP_H
+
+#include <linux/unistd.h>
+
+#define __NR_seccomp_sigreturn_32 __NR_sigreturn
+
+#include <asm-generic/seccomp.h>
+
+#endif	/* _ASM_POWERPC_SECCOMP_H */
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index fbdf18cf954c..e9d384cbd021 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -7,7 +7,6 @@
 extern void ppc_printk_progress(char *s, unsigned short hex);
 
 extern unsigned int rtas_data;
-extern int mem_init_done;	/* set on boot once kmalloc can be called */
 extern unsigned long long memory_limit;
 extern unsigned long klimit;
 extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index d607df5081a7..825663c30945 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -42,7 +42,7 @@ struct smp_ops_t {
 #ifdef CONFIG_PPC_SMP_MUXED_IPI
 	void  (*cause_ipi)(int cpu, unsigned long data);
 #endif
-	int   (*probe)(void);
+	void  (*probe)(void);
 	int   (*kick_cpu)(int nr);
 	void  (*setup_cpu)(int nr);
 	void  (*bringup_done)(void);
@@ -125,7 +125,6 @@ extern irqreturn_t smp_ipi_demux(void);
 
 void smp_init_pSeries(void);
 void smp_init_cell(void);
-void smp_init_celleb(void);
 void smp_setup_cpu_maps(void);
 
 extern int __cpu_disable(void);
@@ -175,7 +174,7 @@ static inline void set_hard_smp_processor_id(int cpu, int phys)
 
 extern int smt_enabled_at_boot;
 
-extern int smp_mpic_probe(void);
+extern void smp_mpic_probe(void);
 extern void smp_mpic_setup_cpu(int cpu);
 extern int smp_generic_kick_cpu(int nr);
 extern int smp_generic_cpu_bootable(unsigned int nr);
diff --git a/arch/powerpc/include/asm/smu.h b/arch/powerpc/include/asm/smu.h
index 6e909f3e6a46..37d2da6feabf 100644
--- a/arch/powerpc/include/asm/smu.h
+++ b/arch/powerpc/include/asm/smu.h
@@ -478,7 +478,7 @@ extern unsigned long smu_cmdbuf_abs;
 
 
 /*
- * Kenrel asynchronous i2c interface
+ * Kernel asynchronous i2c interface
  */
 
 #define SMU_I2C_READ_MAX	0x1d
diff --git a/arch/powerpc/include/asm/swab.h b/arch/powerpc/include/asm/swab.h
index 96f59de61855..487e09077a3e 100644
--- a/arch/powerpc/include/asm/swab.h
+++ b/arch/powerpc/include/asm/swab.h
@@ -9,30 +9,4 @@
 
 #include <uapi/asm/swab.h>
 
-static __inline__ __u16 ld_le16(const volatile __u16 *addr)
-{
-	__u16 val;
-
-	__asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
-	return val;
-}
-
-static __inline__ void st_le16(volatile __u16 *addr, const __u16 val)
-{
-	__asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
-}
-
-static __inline__ __u32 ld_le32(const volatile __u32 *addr)
-{
-	__u32 val;
-
-	__asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
-	return val;
-}
-
-static __inline__ void st_le32(volatile __u32 *addr, const __u32 val)
-{
-	__asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
-}
-
 #endif /* _ASM_POWERPC_SWAB_H */
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 91062eef582f..f1863a138b4a 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -367,3 +367,4 @@ SYSCALL_SPU(getrandom)
 SYSCALL_SPU(memfd_create)
 SYSCALL_SPU(bpf)
 COMPAT_SYS(execveat)
+PPC64ONLY(switch_endian)
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 72489799cf02..7efee4a3240b 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -39,7 +39,6 @@
  */
 struct thread_info {
 	struct task_struct *task;		/* main task structure */
-	struct exec_domain *exec_domain;	/* execution domain */
 	int		cpu;			/* cpu we're on */
 	int		preempt_count;		/* 0 => preemptable,
 						   <0 => BUG */
@@ -55,7 +54,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task =		&tsk,			\
-	.exec_domain =	&default_exec_domain,	\
 	.cpu =		0,			\
 	.preempt_count = INIT_PREEMPT_COUNT,	\
 	.flags =	0,			\
diff --git a/arch/powerpc/include/asm/ucc_slow.h b/arch/powerpc/include/asm/ucc_slow.h
index c44131e68e11..233ef5fe5fde 100644
--- a/arch/powerpc/include/asm/ucc_slow.h
+++ b/arch/powerpc/include/asm/ucc_slow.h
@@ -251,19 +251,6 @@ void ucc_slow_enable(struct ucc_slow_private * uccs, enum comm_dir mode);
  */
 void ucc_slow_disable(struct ucc_slow_private * uccs, enum comm_dir mode);
 
-/* ucc_slow_poll_transmitter_now
- * Immediately forces a poll of the transmitter for data to be sent.
- * Typically, the hardware performs a periodic poll for data that the
- * transmit routine has set up to be transmitted. In cases where
- * this polling cycle is not soon enough, this optional routine can
- * be invoked to force a poll right away, instead. Proper use for
- * each transmission for which this functionality is desired is to
- * call the transmit routine and then this routine right after.
- *
- * uccs - (In) pointer to the slow UCC structure.
- */
-void ucc_slow_poll_transmitter_now(struct ucc_slow_private * uccs);
-
 /* ucc_slow_graceful_stop_tx
  * Smoothly stops transmission on a specified slow UCC.
  *
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 36b79c31eedd..f4f8b667d75b 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,7 +12,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#define __NR_syscalls		363
+#define __NR_syscalls		364
 
 #define __NR__exit __NR_exit
 #define NR_syscalls	__NR_syscalls
diff --git a/arch/powerpc/include/asm/vga.h b/arch/powerpc/include/asm/vga.h
index e5f8dd366212..ab3acd2f2786 100644
--- a/arch/powerpc/include/asm/vga.h
+++ b/arch/powerpc/include/asm/vga.h
@@ -25,12 +25,12 @@
 
 static inline void scr_writew(u16 val, volatile u16 *addr)
 {
-    st_le16(addr, val);
+	*addr = cpu_to_le16(val);
 }
 
 static inline u16 scr_readw(volatile const u16 *addr)
 {
-    return ld_le16(addr);
+	return le16_to_cpu(*addr);
 }
 
 #define VT_BUF_HAVE_MEMCPYW
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index 6997f4a271df..0e25bdb190bb 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -146,7 +146,7 @@ extern void xics_update_irq_servers(void);
 extern void xics_set_cpu_giq(unsigned int gserver, unsigned int join);
 extern void xics_mask_unknown_vec(unsigned int vec);
 extern irqreturn_t xics_ipi_dispatch(int cpu);
-extern int xics_smp_probe(void);
+extern void xics_smp_probe(void);
 extern void xics_register_ics(struct ics *ics);
 extern void xics_teardown_cpu(void);
 extern void xics_kexec_teardown_cpu(int secondary);
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
index 7a3f795ac218..79c4068be278 100644
--- a/arch/powerpc/include/uapi/asm/Kbuild
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -25,7 +25,6 @@ header-y += posix_types.h
 header-y += ps3fb.h
 header-y += ptrace.h
 header-y += resource.h
-header-y += seccomp.h
 header-y += sembuf.h
 header-y += setup.h
 header-y += shmbuf.h
diff --git a/arch/powerpc/include/uapi/asm/ptrace.h b/arch/powerpc/include/uapi/asm/ptrace.h
index 77d2ed35b111..8036b385417d 100644
--- a/arch/powerpc/include/uapi/asm/ptrace.h
+++ b/arch/powerpc/include/uapi/asm/ptrace.h
@@ -136,7 +136,7 @@ struct pt_regs {
 #endif /* __powerpc64__ */
 
 /*
- * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go.
+ * Get/set all the altivec registers v0..v31, vscr, vrsave, in one go.
  * The transfer totals 34 quadword.  Quadwords 0-31 contain the
  * corresponding vector registers.  Quadword 32 contains the vscr as the
  * last word (offset 12) within that quadword.  Quadword 33 contains the
diff --git a/arch/powerpc/include/uapi/asm/seccomp.h b/arch/powerpc/include/uapi/asm/seccomp.h
deleted file mode 100644
index 00c1d9133cfe..000000000000
--- a/arch/powerpc/include/uapi/asm/seccomp.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef _ASM_POWERPC_SECCOMP_H
-#define _ASM_POWERPC_SECCOMP_H
-
-#include <linux/unistd.h>
-
-#define __NR_seccomp_read __NR_read
-#define __NR_seccomp_write __NR_write
-#define __NR_seccomp_exit __NR_exit
-#define __NR_seccomp_sigreturn __NR_rt_sigreturn
-
-#define __NR_seccomp_read_32 __NR_read
-#define __NR_seccomp_write_32 __NR_write
-#define __NR_seccomp_exit_32 __NR_exit
-#define __NR_seccomp_sigreturn_32 __NR_sigreturn
-
-#endif	/* _ASM_POWERPC_SECCOMP_H */
diff --git a/arch/powerpc/include/uapi/asm/tm.h b/arch/powerpc/include/uapi/asm/tm.h
index 5d836b7c1176..5047659815a5 100644
--- a/arch/powerpc/include/uapi/asm/tm.h
+++ b/arch/powerpc/include/uapi/asm/tm.h
@@ -11,7 +11,7 @@
 #define TM_CAUSE_RESCHED	0xde
 #define TM_CAUSE_TLBI		0xdc
 #define TM_CAUSE_FAC_UNAV	0xda
-#define TM_CAUSE_SYSCALL	0xd8  /* future use */
+#define TM_CAUSE_SYSCALL	0xd8
 #define TM_CAUSE_MISC		0xd6  /* future use */
 #define TM_CAUSE_SIGNAL		0xd4
 #define TM_CAUSE_ALIGNMENT	0xd2
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index ef5b5b1f3123..e4aa173dae62 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -385,5 +385,6 @@
 #define __NR_memfd_create	360
 #define __NR_bpf		361
 #define __NR_execveat		362
+#define __NR_switch_endian	363
 
 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 502cf69b6c89..c1ebbdaac28f 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -33,7 +33,8 @@ obj-y				:= cputable.o ptrace.o syscalls.o \
 				   signal.o sysfs.o cacheinfo.o time.o \
 				   prom.o traps.o setup-common.o \
 				   udbg.o misc.o io.o dma.o \
-				   misc_$(CONFIG_WORD_SIZE).o vdso32/
+				   misc_$(CONFIG_WORD_SIZE).o vdso32/ \
+				   of_platform.o prom_parse.o
 obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o \
 				   signal_64.o ptrace32.o \
 				   paca.o nvram_64.o firmware.o
@@ -47,7 +48,6 @@ obj-$(CONFIG_PPC64)		+= vdso64/
 obj-$(CONFIG_ALTIVEC)		+= vecemu.o
 obj-$(CONFIG_PPC_970_NAP)	+= idle_power4.o
 obj-$(CONFIG_PPC_P7_NAP)	+= idle_power7.o
-obj-$(CONFIG_PPC_OF)		+= of_platform.o prom_parse.o
 procfs-y			:= proc_powerpc.o
 obj-$(CONFIG_PROC_FS)		+= $(procfs-y)
 rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI)	:= rtas_pci.o
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index ae77b7e59889..c641983bbdd6 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -61,12 +61,22 @@ struct cache_type_info {
 };
 
 /* These are used to index the cache_type_info array. */
-#define CACHE_TYPE_UNIFIED     0
-#define CACHE_TYPE_INSTRUCTION 1
-#define CACHE_TYPE_DATA        2
+#define CACHE_TYPE_UNIFIED     0 /* cache-size, cache-block-size, etc. */
+#define CACHE_TYPE_UNIFIED_D   1 /* d-cache-size, d-cache-block-size, etc */
+#define CACHE_TYPE_INSTRUCTION 2
+#define CACHE_TYPE_DATA        3
 
 static const struct cache_type_info cache_type_info[] = {
 	{
+		/* Embedded systems that use cache-size, cache-block-size,
+		 * etc. for the Unified (typically L2) cache. */
+		.name            = "Unified",
+		.size_prop       = "cache-size",
+		.line_size_props = { "cache-line-size",
+				     "cache-block-size", },
+		.nr_sets_prop    = "cache-sets",
+	},
+	{
 		/* PowerPC Processor binding says the [di]-cache-*
 		 * must be equal on unified caches, so just use
 		 * d-cache properties. */
@@ -293,7 +303,8 @@ static struct cache *cache_find_first_sibling(struct cache *cache)
 {
 	struct cache *iter;
 
-	if (cache->type == CACHE_TYPE_UNIFIED)
+	if (cache->type == CACHE_TYPE_UNIFIED ||
+	    cache->type == CACHE_TYPE_UNIFIED_D)
 		return cache;
 
 	list_for_each_entry(iter, &cache_list, list)
@@ -324,16 +335,29 @@ static bool cache_node_is_unified(const struct device_node *np)
 	return of_get_property(np, "cache-unified", NULL);
 }
 
-static struct cache *cache_do_one_devnode_unified(struct device_node *node,
-						  int level)
+/*
+ * Unified caches can have two different sets of tags.  Most embedded
+ * use cache-size, etc. for the unified cache size, but open firmware systems
+ * use d-cache-size, etc.   Check on initialization for which type we have, and
+ * return the appropriate structure type.  Assume it's embedded if it isn't
+ * open firmware.  If it's yet a 3rd type, then there will be missing entries
+ * in /sys/devices/system/cpu/cpu0/cache/index2/, and this code will need
+ * to be extended further.
+ */
+static int cache_is_unified_d(const struct device_node *np)
 {
-	struct cache *cache;
+	return of_get_property(np,
+		cache_type_info[CACHE_TYPE_UNIFIED_D].size_prop, NULL) ?
+		CACHE_TYPE_UNIFIED_D : CACHE_TYPE_UNIFIED;
+}
 
+/*
+ */
+static struct cache *cache_do_one_devnode_unified(struct device_node *node, int level)
+{
 	pr_debug("creating L%d ucache for %s\n", level, node->full_name);
 
-	cache = new_cache(CACHE_TYPE_UNIFIED, level, node);
-
-	return cache;
+	return new_cache(cache_is_unified_d(node), level, node);
 }
 
 static struct cache *cache_do_one_devnode_split(struct device_node *node,
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 46733535cc0b..9c9b7411b28b 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -137,15 +137,11 @@ __init_HFSCR:
 /*
  * Clear the TLB using the specified IS form of tlbiel instruction
  * (invalidate by congruence class). P7 has 128 CCs., P8 has 512.
- *
- * r3 = IS field
  */
 __init_tlb_power7:
-	li	r3,0xc00	/* IS field = 0b11 */
-_GLOBAL(__flush_tlb_power7)
 	li	r6,128
 	mtctr	r6
-	mr	r7,r3		/* IS field */
+	li	r7,0xc00	/* IS field = 0b11 */
 	ptesync
 2:	tlbiel	r7
 	addi	r7,r7,0x1000
@@ -154,11 +150,9 @@ _GLOBAL(__flush_tlb_power7)
 1:	blr
 
 __init_tlb_power8:
-	li	r3,0xc00	/* IS field = 0b11 */
-_GLOBAL(__flush_tlb_power8)
 	li	r6,512
 	mtctr	r6
-	mr	r7,r3		/* IS field */
+	li	r7,0xc00	/* IS field = 0b11 */
 	ptesync
 2:	tlbiel	r7
 	addi	r7,r7,0x1000
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index f337666768a7..60262fdf35ba 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -71,8 +71,8 @@ extern void __restore_cpu_power7(void);
 extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec);
 extern void __restore_cpu_power8(void);
 extern void __restore_cpu_a2(void);
-extern void __flush_tlb_power7(unsigned long inval_selector);
-extern void __flush_tlb_power8(unsigned long inval_selector);
+extern void __flush_tlb_power7(unsigned int action);
+extern void __flush_tlb_power8(unsigned int action);
 extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
 extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
 #endif /* CONFIG_PPC64 */
@@ -437,6 +437,26 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.machine_check_early	= __machine_check_early_realmode_p8,
 		.platform		= "power8",
 	},
+	{	/* Power8NVL */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x004c0000,
+		.cpu_name		= "POWER8NVL (raw)",
+		.cpu_features		= CPU_FTRS_POWER8,
+		.cpu_user_features	= COMMON_USER_POWER8,
+		.cpu_user_features2	= COMMON_USER2_POWER8,
+		.mmu_features		= MMU_FTRS_POWER8,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.oprofile_cpu_type	= "ppc64/power8",
+		.oprofile_type		= PPC_OPROFILE_INVALID,
+		.cpu_setup		= __setup_cpu_power8,
+		.cpu_restore		= __restore_cpu_power8,
+		.flush_tlb		= __flush_tlb_power8,
+		.machine_check_early	= __machine_check_early_realmode_p8,
+		.platform		= "power8",
+	},
 	{	/* Power8 DD1: Does not support doorbell IPIs */
 		.pvr_mask		= 0xffffff00,
 		.pvr_value		= 0x004d0100,
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index f4217819cc31..2128f3a96c32 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -17,6 +17,7 @@
 
 #include <asm/dbell.h>
 #include <asm/irq_regs.h>
+#include <asm/kvm_ppc.h>
 
 #ifdef CONFIG_SMP
 void doorbell_setup_this_cpu(void)
@@ -41,6 +42,7 @@ void doorbell_exception(struct pt_regs *regs)
 
 	may_hard_irq_enable();
 
+	kvmppc_set_host_ipi(smp_processor_id(), 0);
 	__this_cpu_inc(irq_stat.doorbell_irqs);
 
 	smp_ipi_demux();
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index 735979764cd4..6e8d764ce47b 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -116,16 +116,13 @@ void __init swiotlb_detect_4g(void)
 	}
 }
 
-static int __init swiotlb_late_init(void)
+static int __init check_swiotlb_enabled(void)
 {
-	if (ppc_swiotlb_enable) {
+	if (ppc_swiotlb_enable)
 		swiotlb_print_info();
-		set_pci_dma_ops(&swiotlb_dma_ops);
-		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
-	} else {
+	else
 		swiotlb_free();
-	}
 
 	return 0;
 }
-subsys_initcall(swiotlb_late_init);
+subsys_initcall(check_swiotlb_enabled);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 3b2252e7731b..a4c62eb0ee48 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -164,30 +164,34 @@ __setup("eeh=", eeh_setup);
  */
 static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
 {
-	struct device_node *dn = eeh_dev_to_of_node(edev);
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 	u32 cfg;
 	int cap, i;
 	int n = 0, l = 0;
 	char buffer[128];
 
-	n += scnprintf(buf+n, len-n, "%s\n", dn->full_name);
-	pr_warn("EEH: of node=%s\n", dn->full_name);
+	n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n",
+		       edev->phb->global_number, pdn->busno,
+		       PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+	pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n",
+		edev->phb->global_number, pdn->busno,
+		PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
 
-	eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg);
+	eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
 	n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
 	pr_warn("EEH: PCI device/vendor: %08x\n", cfg);
 
-	eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg);
+	eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg);
 	n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
 	pr_warn("EEH: PCI cmd/status register: %08x\n", cfg);
 
 	/* Gather bridge-specific registers */
 	if (edev->mode & EEH_DEV_BRIDGE) {
-		eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg);
+		eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg);
 		n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
 		pr_warn("EEH: Bridge secondary status: %04x\n", cfg);
 
-		eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg);
+		eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg);
 		n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
 		pr_warn("EEH: Bridge control: %04x\n", cfg);
 	}
@@ -195,11 +199,11 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
 	/* Dump out the PCI-X command and status regs */
 	cap = edev->pcix_cap;
 	if (cap) {
-		eeh_ops->read_config(dn, cap, 4, &cfg);
+		eeh_ops->read_config(pdn, cap, 4, &cfg);
 		n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
 		pr_warn("EEH: PCI-X cmd: %08x\n", cfg);
 
-		eeh_ops->read_config(dn, cap+4, 4, &cfg);
+		eeh_ops->read_config(pdn, cap+4, 4, &cfg);
 		n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
 		pr_warn("EEH: PCI-X status: %08x\n", cfg);
 	}
@@ -211,7 +215,7 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
 		pr_warn("EEH: PCI-E capabilities and status follow:\n");
 
 		for (i=0; i<=8; i++) {
-			eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
+			eeh_ops->read_config(pdn, cap+4*i, 4, &cfg);
 			n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
 
 			if ((i % 4) == 0) {
@@ -238,7 +242,7 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
 		pr_warn("EEH: PCI-E AER capability register set follows:\n");
 
 		for (i=0; i<=13; i++) {
-			eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
+			eeh_ops->read_config(pdn, cap+4*i, 4, &cfg);
 			n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
 
 			if ((i % 4) == 0) {
@@ -414,11 +418,11 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
 	int ret;
 	int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
 	unsigned long flags;
-	struct device_node *dn;
+	struct pci_dn *pdn;
 	struct pci_dev *dev;
 	struct eeh_pe *pe, *parent_pe, *phb_pe;
 	int rc = 0;
-	const char *location;
+	const char *location = NULL;
 
 	eeh_stats.total_mmio_ffs++;
 
@@ -429,15 +433,14 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
 		eeh_stats.no_dn++;
 		return 0;
 	}
-	dn = eeh_dev_to_of_node(edev);
 	dev = eeh_dev_to_pci_dev(edev);
 	pe = eeh_dev_to_pe(edev);
 
 	/* Access to IO BARs might get this far and still not want checking. */
 	if (!pe) {
 		eeh_stats.ignored_check++;
-		pr_debug("EEH: Ignored check for %s %s\n",
-			eeh_pci_name(dev), dn->full_name);
+		pr_debug("EEH: Ignored check for %s\n",
+			eeh_pci_name(dev));
 		return 0;
 	}
 
@@ -473,10 +476,13 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
 	if (pe->state & EEH_PE_ISOLATED) {
 		pe->check_count++;
 		if (pe->check_count % EEH_MAX_FAILS == 0) {
-			location = of_get_property(dn, "ibm,loc-code", NULL);
+			pdn = eeh_dev_to_pdn(edev);
+			if (pdn->node)
+				location = of_get_property(pdn->node, "ibm,loc-code", NULL);
 			printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
 				"location=%s driver=%s pci addr=%s\n",
-				pe->check_count, location,
+				pe->check_count,
+				location ? location : "unknown",
 				eeh_driver_name(dev), eeh_pci_name(dev));
 			printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
 				eeh_driver_name(dev));
@@ -667,6 +673,55 @@ int eeh_pci_enable(struct eeh_pe *pe, int function)
 	return rc;
 }
 
+static void *eeh_disable_and_save_dev_state(void *data, void *userdata)
+{
+	struct eeh_dev *edev = data;
+	struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
+	struct pci_dev *dev = userdata;
+
+	/*
+	 * The caller should have disabled and saved the
+	 * state for the specified device
+	 */
+	if (!pdev || pdev == dev)
+		return NULL;
+
+	/* Ensure we have D0 power state */
+	pci_set_power_state(pdev, PCI_D0);
+
+	/* Save device state */
+	pci_save_state(pdev);
+
+	/*
+	 * Disable device to avoid any DMA traffic and
+	 * interrupt from the device
+	 */
+	pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
+
+	return NULL;
+}
+
+static void *eeh_restore_dev_state(void *data, void *userdata)
+{
+	struct eeh_dev *edev = data;
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+	struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
+	struct pci_dev *dev = userdata;
+
+	if (!pdev)
+		return NULL;
+
+	/* Apply customization from firmware */
+	if (pdn && eeh_ops->restore_config)
+		eeh_ops->restore_config(pdn);
+
+	/* The caller should restore state for the specified device */
+	if (pdev != dev)
+		pci_save_state(pdev);
+
+	return NULL;
+}
+
 /**
  * pcibios_set_pcie_slot_reset - Set PCI-E reset state
  * @dev: pci device struct
@@ -689,13 +744,19 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
 	switch (state) {
 	case pcie_deassert_reset:
 		eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
+		eeh_unfreeze_pe(pe, false);
 		eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
+		eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev);
 		break;
 	case pcie_hot_reset:
+		eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
+		eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
 		eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
 		eeh_ops->reset(pe, EEH_RESET_HOT);
 		break;
 	case pcie_warm_reset:
+		eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
+		eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
 		eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
 		eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
 		break;
@@ -815,15 +876,15 @@ out:
  */
 void eeh_save_bars(struct eeh_dev *edev)
 {
+	struct pci_dn *pdn;
 	int i;
-	struct device_node *dn;
 
-	if (!edev)
+	pdn = eeh_dev_to_pdn(edev);
+	if (!pdn)
 		return;
-	dn = eeh_dev_to_of_node(edev);
 
 	for (i = 0; i < 16; i++)
-		eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]);
+		eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]);
 
 	/*
 	 * For PCI bridges including root port, we need enable bus
@@ -914,7 +975,7 @@ static struct notifier_block eeh_reboot_nb = {
 int eeh_init(void)
 {
 	struct pci_controller *hose, *tmp;
-	struct device_node *phb;
+	struct pci_dn *pdn;
 	static int cnt = 0;
 	int ret = 0;
 
@@ -949,20 +1010,9 @@ int eeh_init(void)
 		return ret;
 
 	/* Enable EEH for all adapters */
-	if (eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) {
-		list_for_each_entry_safe(hose, tmp,
-			&hose_list, list_node) {
-			phb = hose->dn;
-			traverse_pci_devices(phb, eeh_ops->of_probe, NULL);
-		}
-	} else if (eeh_has_flag(EEH_PROBE_MODE_DEV)) {
-		list_for_each_entry_safe(hose, tmp,
-			&hose_list, list_node)
-			pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL);
-	} else {
-		pr_warn("%s: Invalid probe mode %x",
-			__func__, eeh_subsystem_flags);
-		return -EINVAL;
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		pdn = hose->pci_data;
+		traverse_pci_dn(pdn, eeh_ops->probe, NULL);
 	}
 
 	/*
@@ -987,8 +1037,8 @@ int eeh_init(void)
 core_initcall_sync(eeh_init);
 
 /**
- * eeh_add_device_early - Enable EEH for the indicated device_node
- * @dn: device node for which to set up EEH
+ * eeh_add_device_early - Enable EEH for the indicated device node
+ * @pdn: PCI device node for which to set up EEH
  *
  * This routine must be used to perform EEH initialization for PCI
  * devices that were added after system boot (e.g. hotplug, dlpar).
@@ -998,44 +1048,41 @@ core_initcall_sync(eeh_init);
  * on the CEC architecture, type of the device, on earlier boot
  * command-line arguments & etc.
  */
-void eeh_add_device_early(struct device_node *dn)
+void eeh_add_device_early(struct pci_dn *pdn)
 {
 	struct pci_controller *phb;
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
 
-	/*
-	 * If we're doing EEH probe based on PCI device, we
-	 * would delay the probe until late stage because
-	 * the PCI device isn't available this moment.
-	 */
-	if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE))
+	if (!edev || !eeh_enabled())
 		return;
 
-	if (!of_node_to_eeh_dev(dn))
-		return;
-	phb = of_node_to_eeh_dev(dn)->phb;
-
 	/* USB Bus children of PCI devices will not have BUID's */
-	if (NULL == phb || 0 == phb->buid)
+	phb = edev->phb;
+	if (NULL == phb ||
+	    (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid))
 		return;
 
-	eeh_ops->of_probe(dn, NULL);
+	eeh_ops->probe(pdn, NULL);
 }
 
 /**
  * eeh_add_device_tree_early - Enable EEH for the indicated device
- * @dn: device node
+ * @pdn: PCI device node
  *
  * This routine must be used to perform EEH initialization for the
  * indicated PCI device that was added after system boot (e.g.
  * hotplug, dlpar).
  */
-void eeh_add_device_tree_early(struct device_node *dn)
+void eeh_add_device_tree_early(struct pci_dn *pdn)
 {
-	struct device_node *sib;
+	struct pci_dn *n;
 
-	for_each_child_of_node(dn, sib)
-		eeh_add_device_tree_early(sib);
-	eeh_add_device_early(dn);
+	if (!pdn)
+		return;
+
+	list_for_each_entry(n, &pdn->child_list, list)
+		eeh_add_device_tree_early(n);
+	eeh_add_device_early(pdn);
 }
 EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
 
@@ -1048,7 +1095,7 @@ EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
  */
 void eeh_add_device_late(struct pci_dev *dev)
 {
-	struct device_node *dn;
+	struct pci_dn *pdn;
 	struct eeh_dev *edev;
 
 	if (!dev || !eeh_enabled())
@@ -1056,8 +1103,8 @@ void eeh_add_device_late(struct pci_dev *dev)
 
 	pr_debug("EEH: Adding device %s\n", pci_name(dev));
 
-	dn = pci_device_to_OF_node(dev);
-	edev = of_node_to_eeh_dev(dn);
+	pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
+	edev = pdn_to_eeh_dev(pdn);
 	if (edev->pdev == dev) {
 		pr_debug("EEH: Already referenced !\n");
 		return;
@@ -1089,13 +1136,6 @@ void eeh_add_device_late(struct pci_dev *dev)
 	edev->pdev = dev;
 	dev->dev.archdata.edev = edev;
 
-	/*
-	 * We have to do the EEH probe here because the PCI device
-	 * hasn't been created yet in the early stage.
-	 */
-	if (eeh_has_flag(EEH_PROBE_MODE_DEV))
-		eeh_ops->dev_probe(dev, NULL);
-
 	eeh_addr_cache_insert_dev(dev);
 }
 
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index 07d8a2423a61..eeabeabea49c 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -171,30 +171,27 @@ eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
 
 static void __eeh_addr_cache_insert_dev(struct pci_dev *dev)
 {
-	struct device_node *dn;
+	struct pci_dn *pdn;
 	struct eeh_dev *edev;
 	int i;
 
-	dn = pci_device_to_OF_node(dev);
-	if (!dn) {
+	pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
+	if (!pdn) {
 		pr_warn("PCI: no pci dn found for dev=%s\n",
 			pci_name(dev));
 		return;
 	}
 
-	edev = of_node_to_eeh_dev(dn);
+	edev = pdn_to_eeh_dev(pdn);
 	if (!edev) {
-		pr_warn("PCI: no EEH dev found for dn=%s\n",
-			dn->full_name);
+		pr_warn("PCI: no EEH dev found for %s\n",
+			pci_name(dev));
 		return;
 	}
 
 	/* Skip any devices for which EEH is not enabled. */
 	if (!edev->pe) {
-#ifdef DEBUG
-		pr_info("PCI: skip building address cache for=%s - %s\n",
-			pci_name(dev), dn->full_name);
-#endif
+		dev_dbg(&dev->dev, "EEH: Skip building address cache\n");
 		return;
 	}
 
@@ -282,18 +279,18 @@ void eeh_addr_cache_rmv_dev(struct pci_dev *dev)
  */
 void eeh_addr_cache_build(void)
 {
-	struct device_node *dn;
+	struct pci_dn *pdn;
 	struct eeh_dev *edev;
 	struct pci_dev *dev = NULL;
 
 	spin_lock_init(&pci_io_addr_cache_root.piar_lock);
 
 	for_each_pci_dev(dev) {
-		dn = pci_device_to_OF_node(dev);
-		if (!dn)
+		pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
+		if (!pdn)
 			continue;
 
-		edev = of_node_to_eeh_dev(dn);
+		edev = pdn_to_eeh_dev(pdn);
 		if (!edev)
 			continue;
 
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
index e5274ee9a75f..aabba94ff9cb 100644
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -43,13 +43,13 @@
 
 /**
  * eeh_dev_init - Create EEH device according to OF node
- * @dn: device node
+ * @pdn: PCI device node
  * @data: PHB
  *
  * It will create EEH device according to the given OF node. The function
  * might be called by PCI emunation, DR, PHB hotplug.
  */
-void *eeh_dev_init(struct device_node *dn, void *data)
+void *eeh_dev_init(struct pci_dn *pdn, void *data)
 {
 	struct pci_controller *phb = data;
 	struct eeh_dev *edev;
@@ -63,8 +63,8 @@ void *eeh_dev_init(struct device_node *dn, void *data)
 	}
 
 	/* Associate EEH device with OF node */
-	PCI_DN(dn)->edev = edev;
-	edev->dn  = dn;
+	pdn->edev = edev;
+	edev->pdn = pdn;
 	edev->phb = phb;
 	INIT_LIST_HEAD(&edev->list);
 
@@ -80,16 +80,16 @@ void *eeh_dev_init(struct device_node *dn, void *data)
  */
 void eeh_dev_phb_init_dynamic(struct pci_controller *phb)
 {
-	struct device_node *dn = phb->dn;
+	struct pci_dn *root = phb->pci_data;
 
 	/* EEH PE for PHB */
 	eeh_phb_pe_create(phb);
 
 	/* EEH device for PHB */
-	eeh_dev_init(dn, phb);
+	eeh_dev_init(root, phb);
 
 	/* EEH devices for children OF nodes */
-	traverse_pci_devices(dn, eeh_dev_init, phb);
+	traverse_pci_dn(root, eeh_dev_init, phb);
 }
 
 /**
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index d099540c0f56..24768ff3cb73 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -83,28 +83,6 @@ static inline void eeh_pcid_put(struct pci_dev *pdev)
 	module_put(pdev->driver->driver.owner);
 }
 
-#if 0
-static void print_device_node_tree(struct pci_dn *pdn, int dent)
-{
-	int i;
-	struct device_node *pc;
-
-	if (!pdn)
-		return;
-	for (i = 0; i < dent; i++)
-		printk(" ");
-	printk("dn=%s mode=%x \tcfg_addr=%x pe_addr=%x \tfull=%s\n",
-		pdn->node->name, pdn->eeh_mode, pdn->eeh_config_addr,
-		pdn->eeh_pe_config_addr, pdn->node->full_name);
-	dent += 3;
-	pc = pdn->node->child;
-	while (pc) {
-		print_device_node_tree(PCI_DN(pc), dent);
-		pc = pc->sibling;
-	}
-}
-#endif
-
 /**
  * eeh_disable_irq - Disable interrupt for the recovering device
  * @dev: PCI device
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 1e4946c36f9e..35f0b62259bb 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -291,27 +291,25 @@ struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
  */
 static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
 {
-	struct device_node *dn;
 	struct eeh_dev *parent;
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 
 	/*
 	 * It might have the case for the indirect parent
 	 * EEH device already having associated PE, but
 	 * the direct parent EEH device doesn't have yet.
 	 */
-	dn = edev->dn->parent;
-	while (dn) {
+	pdn = pdn ? pdn->parent : NULL;
+	while (pdn) {
 		/* We're poking out of PCI territory */
-		if (!PCI_DN(dn)) return NULL;
-
-		parent = of_node_to_eeh_dev(dn);
-		/* We're poking out of PCI territory */
-		if (!parent) return NULL;
+		parent = pdn_to_eeh_dev(pdn);
+		if (!parent)
+			return NULL;
 
 		if (parent->pe)
 			return parent->pe;
 
-		dn = dn->parent;
+		pdn = pdn->parent;
 	}
 
 	return NULL;
@@ -330,6 +328,13 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
 {
 	struct eeh_pe *pe, *parent;
 
+	/* Check if the PE number is valid */
+	if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) {
+		pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%d\n",
+		       __func__, edev->config_addr, edev->phb->global_number);
+		return -EINVAL;
+	}
+
 	/*
 	 * Search the PE has been existing or not according
 	 * to the PE address. If that has been existing, the
@@ -338,21 +343,18 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
 	 */
 	pe = eeh_pe_get(edev);
 	if (pe && !(pe->type & EEH_PE_INVALID)) {
-		if (!edev->pe_config_addr) {
-			pr_err("%s: PE with addr 0x%x already exists\n",
-				__func__, edev->config_addr);
-			return -EEXIST;
-		}
-
 		/* Mark the PE as type of PCI bus */
 		pe->type = EEH_PE_BUS;
 		edev->pe = pe;
 
 		/* Put the edev to PE */
 		list_add_tail(&edev->list, &pe->edevs);
-		pr_debug("EEH: Add %s to Bus PE#%x\n",
-			edev->dn->full_name, pe->addr);
-
+		pr_debug("EEH: Add %04x:%02x:%02x.%01x to Bus PE#%x\n",
+			edev->phb->global_number,
+			edev->config_addr >> 8,
+			PCI_SLOT(edev->config_addr & 0xFF),
+			PCI_FUNC(edev->config_addr & 0xFF),
+			pe->addr);
 		return 0;
 	} else if (pe && (pe->type & EEH_PE_INVALID)) {
 		list_add_tail(&edev->list, &pe->edevs);
@@ -368,9 +370,14 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
 			parent->type &= ~(EEH_PE_INVALID | EEH_PE_KEEP);
 			parent = parent->parent;
 		}
-		pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
-			edev->dn->full_name, pe->addr, pe->parent->addr);
 
+		pr_debug("EEH: Add %04x:%02x:%02x.%01x to Device "
+			 "PE#%x, Parent PE#%x\n",
+			edev->phb->global_number,
+			edev->config_addr >> 8,
+                        PCI_SLOT(edev->config_addr & 0xFF),
+                        PCI_FUNC(edev->config_addr & 0xFF),
+			pe->addr, pe->parent->addr);
 		return 0;
 	}
 
@@ -409,8 +416,13 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
 	list_add_tail(&pe->child, &parent->child_list);
 	list_add_tail(&edev->list, &pe->edevs);
 	edev->pe = pe;
-	pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
-		edev->dn->full_name, pe->addr, pe->parent->addr);
+	pr_debug("EEH: Add %04x:%02x:%02x.%01x to "
+		 "Device PE#%x, Parent PE#%x\n",
+		 edev->phb->global_number,
+		 edev->config_addr >> 8,
+		 PCI_SLOT(edev->config_addr & 0xFF),
+		 PCI_FUNC(edev->config_addr & 0xFF),
+		 pe->addr, pe->parent->addr);
 
 	return 0;
 }
@@ -430,8 +442,11 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
 	int cnt;
 
 	if (!edev->pe) {
-		pr_debug("%s: No PE found for EEH device %s\n",
-			 __func__, edev->dn->full_name);
+		pr_debug("%s: No PE found for device %04x:%02x:%02x.%01x\n",
+			 __func__,  edev->phb->global_number,
+			 edev->config_addr >> 8,
+			 PCI_SLOT(edev->config_addr & 0xFF),
+			 PCI_FUNC(edev->config_addr & 0xFF));
 		return -EEXIST;
 	}
 
@@ -653,9 +668,9 @@ void eeh_pe_state_clear(struct eeh_pe *pe, int state)
  * blocked on normal path during the stage. So we need utilize
  * eeh operations, which is always permitted.
  */
-static void eeh_bridge_check_link(struct eeh_dev *edev,
-				  struct device_node *dn)
+static void eeh_bridge_check_link(struct eeh_dev *edev)
 {
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 	int cap;
 	uint32_t val;
 	int timeout = 0;
@@ -675,32 +690,32 @@ static void eeh_bridge_check_link(struct eeh_dev *edev,
 
 	/* Check slot status */
 	cap = edev->pcie_cap;
-	eeh_ops->read_config(dn, cap + PCI_EXP_SLTSTA, 2, &val);
+	eeh_ops->read_config(pdn, cap + PCI_EXP_SLTSTA, 2, &val);
 	if (!(val & PCI_EXP_SLTSTA_PDS)) {
 		pr_debug("  No card in the slot (0x%04x) !\n", val);
 		return;
 	}
 
 	/* Check power status if we have the capability */
-	eeh_ops->read_config(dn, cap + PCI_EXP_SLTCAP, 2, &val);
+	eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCAP, 2, &val);
 	if (val & PCI_EXP_SLTCAP_PCP) {
-		eeh_ops->read_config(dn, cap + PCI_EXP_SLTCTL, 2, &val);
+		eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCTL, 2, &val);
 		if (val & PCI_EXP_SLTCTL_PCC) {
 			pr_debug("  In power-off state, power it on ...\n");
 			val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC);
 			val |= (0x0100 & PCI_EXP_SLTCTL_PIC);
-			eeh_ops->write_config(dn, cap + PCI_EXP_SLTCTL, 2, val);
+			eeh_ops->write_config(pdn, cap + PCI_EXP_SLTCTL, 2, val);
 			msleep(2 * 1000);
 		}
 	}
 
 	/* Enable link */
-	eeh_ops->read_config(dn, cap + PCI_EXP_LNKCTL, 2, &val);
+	eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCTL, 2, &val);
 	val &= ~PCI_EXP_LNKCTL_LD;
-	eeh_ops->write_config(dn, cap + PCI_EXP_LNKCTL, 2, val);
+	eeh_ops->write_config(pdn, cap + PCI_EXP_LNKCTL, 2, val);
 
 	/* Check link */
-	eeh_ops->read_config(dn, cap + PCI_EXP_LNKCAP, 4, &val);
+	eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCAP, 4, &val);
 	if (!(val & PCI_EXP_LNKCAP_DLLLARC)) {
 		pr_debug("  No link reporting capability (0x%08x) \n", val);
 		msleep(1000);
@@ -713,7 +728,7 @@ static void eeh_bridge_check_link(struct eeh_dev *edev,
 		msleep(20);
 		timeout += 20;
 
-		eeh_ops->read_config(dn, cap + PCI_EXP_LNKSTA, 2, &val);
+		eeh_ops->read_config(pdn, cap + PCI_EXP_LNKSTA, 2, &val);
 		if (val & PCI_EXP_LNKSTA_DLLLA)
 			break;
 	}
@@ -728,9 +743,9 @@ static void eeh_bridge_check_link(struct eeh_dev *edev,
 #define BYTE_SWAP(OFF)	(8*((OFF)/4)+3-(OFF))
 #define SAVED_BYTE(OFF)	(((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
 
-static void eeh_restore_bridge_bars(struct eeh_dev *edev,
-				    struct device_node *dn)
+static void eeh_restore_bridge_bars(struct eeh_dev *edev)
 {
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 	int i;
 
 	/*
@@ -738,49 +753,49 @@ static void eeh_restore_bridge_bars(struct eeh_dev *edev,
 	 * Bus numbers and windows: 0x18 - 0x30
 	 */
 	for (i = 4; i < 13; i++)
-		eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
+		eeh_ops->write_config(pdn, i*4, 4, edev->config_space[i]);
 	/* Rom: 0x38 */
-	eeh_ops->write_config(dn, 14*4, 4, edev->config_space[14]);
+	eeh_ops->write_config(pdn, 14*4, 4, edev->config_space[14]);
 
 	/* Cache line & Latency timer: 0xC 0xD */
-	eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
+	eeh_ops->write_config(pdn, PCI_CACHE_LINE_SIZE, 1,
                 SAVED_BYTE(PCI_CACHE_LINE_SIZE));
-        eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
+        eeh_ops->write_config(pdn, PCI_LATENCY_TIMER, 1,
                 SAVED_BYTE(PCI_LATENCY_TIMER));
 	/* Max latency, min grant, interrupt ping and line: 0x3C */
-	eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
+	eeh_ops->write_config(pdn, 15*4, 4, edev->config_space[15]);
 
 	/* PCI Command: 0x4 */
-	eeh_ops->write_config(dn, PCI_COMMAND, 4, edev->config_space[1]);
+	eeh_ops->write_config(pdn, PCI_COMMAND, 4, edev->config_space[1]);
 
 	/* Check the PCIe link is ready */
-	eeh_bridge_check_link(edev, dn);
+	eeh_bridge_check_link(edev);
 }
 
-static void eeh_restore_device_bars(struct eeh_dev *edev,
-				    struct device_node *dn)
+static void eeh_restore_device_bars(struct eeh_dev *edev)
 {
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 	int i;
 	u32 cmd;
 
 	for (i = 4; i < 10; i++)
-		eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
+		eeh_ops->write_config(pdn, i*4, 4, edev->config_space[i]);
 	/* 12 == Expansion ROM Address */
-	eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]);
+	eeh_ops->write_config(pdn, 12*4, 4, edev->config_space[12]);
 
-	eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
+	eeh_ops->write_config(pdn, PCI_CACHE_LINE_SIZE, 1,
 		SAVED_BYTE(PCI_CACHE_LINE_SIZE));
-	eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
+	eeh_ops->write_config(pdn, PCI_LATENCY_TIMER, 1,
 		SAVED_BYTE(PCI_LATENCY_TIMER));
 
 	/* max latency, min grant, interrupt pin and line */
-	eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
+	eeh_ops->write_config(pdn, 15*4, 4, edev->config_space[15]);
 
 	/*
 	 * Restore PERR & SERR bits, some devices require it,
 	 * don't touch the other command bits
 	 */
-	eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd);
+	eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cmd);
 	if (edev->config_space[1] & PCI_COMMAND_PARITY)
 		cmd |= PCI_COMMAND_PARITY;
 	else
@@ -789,7 +804,7 @@ static void eeh_restore_device_bars(struct eeh_dev *edev,
 		cmd |= PCI_COMMAND_SERR;
 	else
 		cmd &= ~PCI_COMMAND_SERR;
-	eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd);
+	eeh_ops->write_config(pdn, PCI_COMMAND, 4, cmd);
 }
 
 /**
@@ -804,16 +819,16 @@ static void eeh_restore_device_bars(struct eeh_dev *edev,
 static void *eeh_restore_one_device_bars(void *data, void *flag)
 {
 	struct eeh_dev *edev = (struct eeh_dev *)data;
-	struct device_node *dn = eeh_dev_to_of_node(edev);
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 
 	/* Do special restore for bridges */
 	if (edev->mode & EEH_DEV_BRIDGE)
-		eeh_restore_bridge_bars(edev, dn);
+		eeh_restore_bridge_bars(edev);
 	else
-		eeh_restore_device_bars(edev, dn);
+		eeh_restore_device_bars(edev);
 
-	if (eeh_ops->restore_config)
-		eeh_ops->restore_config(dn);
+	if (eeh_ops->restore_config && pdn)
+		eeh_ops->restore_config(pdn);
 
 	return NULL;
 }
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index d180caf2d6de..8ca9434c40e6 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -34,6 +34,7 @@
 #include <asm/ftrace.h>
 #include <asm/hw_irq.h>
 #include <asm/context_tracking.h>
+#include <asm/tm.h>
 
 /*
  * System calls.
@@ -145,6 +146,24 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 	andi.	r11,r10,_TIF_SYSCALL_DOTRACE
 	bne	syscall_dotrace
 .Lsyscall_dotrace_cont:
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+	b	1f
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
+	extrdi.	r11, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
+	beq+	1f
+
+	/* Doom the transaction and don't perform the syscall: */
+	mfmsr	r11
+	li	r12, 1
+	rldimi	r11, r12, MSR_TM_LG, 63-MSR_TM_LG
+	mtmsrd	r11, 0
+	li	r11, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
+	TABORT(R11)
+
+	b	.Lsyscall_exit
+1:
+#endif
 	cmpldi	0,r0,NR_syscalls
 	bge-	syscall_enosys
 
@@ -356,6 +375,11 @@ _GLOBAL(ppc64_swapcontext)
 	bl	sys_swapcontext
 	b	.Lsyscall_exit
 
+_GLOBAL(ppc_switch_endian)
+	bl	save_nvgprs
+	bl	sys_switch_endian
+	b	.Lsyscall_exit
+
 _GLOBAL(ret_from_fork)
 	bl	schedule_tail
 	REST_NVGPRS(r1)
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index c2df8150bd7a..9519e6bdc6d7 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1408,7 +1408,7 @@ machine_check_handle_early:
 	bne	9f			/* continue in V mode if we are. */
 
 5:
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 	/*
 	 * We are coming from kernel context. Check if we are coming from
 	 * guest. if yes, then we can continue. We will fall through
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index 05adc8bbdef8..eeaa0d5f69d5 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -94,6 +94,7 @@ _GLOBAL(power7_powersave_common)
 	beq	1f
 	addi	r1,r1,INT_FRAME_SIZE
 	ld	r0,16(r1)
+	li	r3,0			/* Return 0 (no nap) */
 	mtlr	r0
 	blr
 
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index b6f123ab90ed..2c647b1e62e4 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -28,6 +28,55 @@
 #include <asm/mce.h>
 #include <asm/machdep.h>
 
+static void flush_tlb_206(unsigned int num_sets, unsigned int action)
+{
+	unsigned long rb;
+	unsigned int i;
+
+	switch (action) {
+	case TLB_INVAL_SCOPE_GLOBAL:
+		rb = TLBIEL_INVAL_SET;
+		break;
+	case TLB_INVAL_SCOPE_LPID:
+		rb = TLBIEL_INVAL_SET_LPID;
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	asm volatile("ptesync" : : : "memory");
+	for (i = 0; i < num_sets; i++) {
+		asm volatile("tlbiel %0" : : "r" (rb));
+		rb += 1 << TLBIEL_INVAL_SET_SHIFT;
+	}
+	asm volatile("ptesync" : : : "memory");
+}
+
+/*
+ * Generic routine to flush TLB on power7. This routine is used as
+ * flush_tlb hook in cpu_spec for Power7 processor.
+ *
+ * action => TLB_INVAL_SCOPE_GLOBAL:  Invalidate all TLBs.
+ *	     TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID.
+ */
+void __flush_tlb_power7(unsigned int action)
+{
+	flush_tlb_206(POWER7_TLB_SETS, action);
+}
+
+/*
+ * Generic routine to flush TLB on power8. This routine is used as
+ * flush_tlb hook in cpu_spec for power8 processor.
+ *
+ * action => TLB_INVAL_SCOPE_GLOBAL:  Invalidate all TLBs.
+ *	     TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID.
+ */
+void __flush_tlb_power8(unsigned int action)
+{
+	flush_tlb_206(POWER8_TLB_SETS, action);
+}
+
 /* flush SLBs and reload */
 static void flush_and_reload_slb(void)
 {
@@ -79,7 +128,7 @@ static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
 	}
 	if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
 		if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
-			cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET);
+			cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
 		/* reset error bits */
 		dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB;
 	}
@@ -110,7 +159,7 @@ static long mce_handle_common_ierror(uint64_t srr1)
 		break;
 	case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
 		if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
-			cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET);
+			cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
 			handled = 1;
 		}
 		break;
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 34f7c9b7cd96..1e703f8ebad4 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -26,6 +26,9 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/kmsg_dump.h>
+#include <linux/pstore.h>
+#include <linux/zlib.h>
 #include <asm/uaccess.h>
 #include <asm/nvram.h>
 #include <asm/rtas.h>
@@ -54,6 +57,680 @@ struct nvram_partition {
 
 static LIST_HEAD(nvram_partitions);
 
+#ifdef CONFIG_PPC_PSERIES
+struct nvram_os_partition rtas_log_partition = {
+	.name = "ibm,rtas-log",
+	.req_size = 2079,
+	.min_size = 1055,
+	.index = -1,
+	.os_partition = true
+};
+#endif
+
+struct nvram_os_partition oops_log_partition = {
+	.name = "lnx,oops-log",
+	.req_size = 4000,
+	.min_size = 2000,
+	.index = -1,
+	.os_partition = true
+};
+
+static const char *nvram_os_partitions[] = {
+#ifdef CONFIG_PPC_PSERIES
+	"ibm,rtas-log",
+#endif
+	"lnx,oops-log",
+	NULL
+};
+
+static void oops_to_nvram(struct kmsg_dumper *dumper,
+			  enum kmsg_dump_reason reason);
+
+static struct kmsg_dumper nvram_kmsg_dumper = {
+	.dump = oops_to_nvram
+};
+
+/*
+ * For capturing and compressing an oops or panic report...
+
+ * big_oops_buf[] holds the uncompressed text we're capturing.
+ *
+ * oops_buf[] holds the compressed text, preceded by a oops header.
+ * oops header has u16 holding the version of oops header (to differentiate
+ * between old and new format header) followed by u16 holding the length of
+ * the compressed* text (*Or uncompressed, if compression fails.) and u64
+ * holding the timestamp. oops_buf[] gets written to NVRAM.
+ *
+ * oops_log_info points to the header. oops_data points to the compressed text.
+ *
+ * +- oops_buf
+ * |                                   +- oops_data
+ * v                                   v
+ * +-----------+-----------+-----------+------------------------+
+ * | version   | length    | timestamp | text                   |
+ * | (2 bytes) | (2 bytes) | (8 bytes) | (oops_data_sz bytes)   |
+ * +-----------+-----------+-----------+------------------------+
+ * ^
+ * +- oops_log_info
+ *
+ * We preallocate these buffers during init to avoid kmalloc during oops/panic.
+ */
+static size_t big_oops_buf_sz;
+static char *big_oops_buf, *oops_buf;
+static char *oops_data;
+static size_t oops_data_sz;
+
+/* Compression parameters */
+#define COMPR_LEVEL 6
+#define WINDOW_BITS 12
+#define MEM_LEVEL 4
+static struct z_stream_s stream;
+
+#ifdef CONFIG_PSTORE
+#ifdef CONFIG_PPC_POWERNV
+static struct nvram_os_partition skiboot_partition = {
+	.name = "ibm,skiboot",
+	.index = -1,
+	.os_partition = false
+};
+#endif
+
+#ifdef CONFIG_PPC_PSERIES
+static struct nvram_os_partition of_config_partition = {
+	.name = "of-config",
+	.index = -1,
+	.os_partition = false
+};
+#endif
+
+static struct nvram_os_partition common_partition = {
+	.name = "common",
+	.index = -1,
+	.os_partition = false
+};
+
+static enum pstore_type_id nvram_type_ids[] = {
+	PSTORE_TYPE_DMESG,
+	PSTORE_TYPE_PPC_COMMON,
+	-1,
+	-1,
+	-1
+};
+static int read_type;
+#endif
+
+/* nvram_write_os_partition
+ *
+ * We need to buffer the error logs into nvram to ensure that we have
+ * the failure information to decode.  If we have a severe error there
+ * is no way to guarantee that the OS or the machine is in a state to
+ * get back to user land and write the error to disk.  For example if
+ * the SCSI device driver causes a Machine Check by writing to a bad
+ * IO address, there is no way of guaranteeing that the device driver
+ * is in any state that is would also be able to write the error data
+ * captured to disk, thus we buffer it in NVRAM for analysis on the
+ * next boot.
+ *
+ * In NVRAM the partition containing the error log buffer will looks like:
+ * Header (in bytes):
+ * +-----------+----------+--------+------------+------------------+
+ * | signature | checksum | length | name       | data             |
+ * |0          |1         |2      3|4         15|16        length-1|
+ * +-----------+----------+--------+------------+------------------+
+ *
+ * The 'data' section would look like (in bytes):
+ * +--------------+------------+-----------------------------------+
+ * | event_logged | sequence # | error log                         |
+ * |0            3|4          7|8                  error_log_size-1|
+ * +--------------+------------+-----------------------------------+
+ *
+ * event_logged: 0 if event has not been logged to syslog, 1 if it has
+ * sequence #: The unique sequence # for each event. (until it wraps)
+ * error log: The error log from event_scan
+ */
+int nvram_write_os_partition(struct nvram_os_partition *part,
+			     char *buff, int length,
+			     unsigned int err_type,
+			     unsigned int error_log_cnt)
+{
+	int rc;
+	loff_t tmp_index;
+	struct err_log_info info;
+
+	if (part->index == -1)
+		return -ESPIPE;
+
+	if (length > part->size)
+		length = part->size;
+
+	info.error_type = cpu_to_be32(err_type);
+	info.seq_num = cpu_to_be32(error_log_cnt);
+
+	tmp_index = part->index;
+
+	rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info),
+				&tmp_index);
+	if (rc <= 0) {
+		pr_err("%s: Failed nvram_write (%d)\n", __func__, rc);
+		return rc;
+	}
+
+	rc = ppc_md.nvram_write(buff, length, &tmp_index);
+	if (rc <= 0) {
+		pr_err("%s: Failed nvram_write (%d)\n", __func__, rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+/* nvram_read_partition
+ *
+ * Reads nvram partition for at most 'length'
+ */
+int nvram_read_partition(struct nvram_os_partition *part, char *buff,
+			 int length, unsigned int *err_type,
+			 unsigned int *error_log_cnt)
+{
+	int rc;
+	loff_t tmp_index;
+	struct err_log_info info;
+
+	if (part->index == -1)
+		return -1;
+
+	if (length > part->size)
+		length = part->size;
+
+	tmp_index = part->index;
+
+	if (part->os_partition) {
+		rc = ppc_md.nvram_read((char *)&info,
+					sizeof(struct err_log_info),
+					&tmp_index);
+		if (rc <= 0) {
+			pr_err("%s: Failed nvram_read (%d)\n", __func__, rc);
+			return rc;
+		}
+	}
+
+	rc = ppc_md.nvram_read(buff, length, &tmp_index);
+	if (rc <= 0) {
+		pr_err("%s: Failed nvram_read (%d)\n", __func__, rc);
+		return rc;
+	}
+
+	if (part->os_partition) {
+		*error_log_cnt = be32_to_cpu(info.seq_num);
+		*err_type = be32_to_cpu(info.error_type);
+	}
+
+	return 0;
+}
+
+/* nvram_init_os_partition
+ *
+ * This sets up a partition with an "OS" signature.
+ *
+ * The general strategy is the following:
+ * 1.) If a partition with the indicated name already exists...
+ *	- If it's large enough, use it.
+ *	- Otherwise, recycle it and keep going.
+ * 2.) Search for a free partition that is large enough.
+ * 3.) If there's not a free partition large enough, recycle any obsolete
+ * OS partitions and try again.
+ * 4.) Will first try getting a chunk that will satisfy the requested size.
+ * 5.) If a chunk of the requested size cannot be allocated, then try finding
+ * a chunk that will satisfy the minum needed.
+ *
+ * Returns 0 on success, else -1.
+ */
+int __init nvram_init_os_partition(struct nvram_os_partition *part)
+{
+	loff_t p;
+	int size;
+
+	/* Look for ours */
+	p = nvram_find_partition(part->name, NVRAM_SIG_OS, &size);
+
+	/* Found one but too small, remove it */
+	if (p && size < part->min_size) {
+		pr_info("nvram: Found too small %s partition,"
+					" removing it...\n", part->name);
+		nvram_remove_partition(part->name, NVRAM_SIG_OS, NULL);
+		p = 0;
+	}
+
+	/* Create one if we didn't find */
+	if (!p) {
+		p = nvram_create_partition(part->name, NVRAM_SIG_OS,
+					part->req_size, part->min_size);
+		if (p == -ENOSPC) {
+			pr_info("nvram: No room to create %s partition, "
+				"deleting any obsolete OS partitions...\n",
+				part->name);
+			nvram_remove_partition(NULL, NVRAM_SIG_OS,
+					nvram_os_partitions);
+			p = nvram_create_partition(part->name, NVRAM_SIG_OS,
+					part->req_size, part->min_size);
+		}
+	}
+
+	if (p <= 0) {
+		pr_err("nvram: Failed to find or create %s"
+		       " partition, err %d\n", part->name, (int)p);
+		return -1;
+	}
+
+	part->index = p;
+	part->size = nvram_get_partition_size(p) - sizeof(struct err_log_info);
+
+	return 0;
+}
+
+/* Derived from logfs_compress() */
+static int nvram_compress(const void *in, void *out, size_t inlen,
+							size_t outlen)
+{
+	int err, ret;
+
+	ret = -EIO;
+	err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS,
+						MEM_LEVEL, Z_DEFAULT_STRATEGY);
+	if (err != Z_OK)
+		goto error;
+
+	stream.next_in = in;
+	stream.avail_in = inlen;
+	stream.total_in = 0;
+	stream.next_out = out;
+	stream.avail_out = outlen;
+	stream.total_out = 0;
+
+	err = zlib_deflate(&stream, Z_FINISH);
+	if (err != Z_STREAM_END)
+		goto error;
+
+	err = zlib_deflateEnd(&stream);
+	if (err != Z_OK)
+		goto error;
+
+	if (stream.total_out >= stream.total_in)
+		goto error;
+
+	ret = stream.total_out;
+error:
+	return ret;
+}
+
+/* Compress the text from big_oops_buf into oops_buf. */
+static int zip_oops(size_t text_len)
+{
+	struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
+	int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len,
+								oops_data_sz);
+	if (zipped_len < 0) {
+		pr_err("nvram: compression failed; returned %d\n", zipped_len);
+		pr_err("nvram: logging uncompressed oops/panic report\n");
+		return -1;
+	}
+	oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
+	oops_hdr->report_length = cpu_to_be16(zipped_len);
+	oops_hdr->timestamp = cpu_to_be64(ktime_get_real_seconds());
+	return 0;
+}
+
+#ifdef CONFIG_PSTORE
+static int nvram_pstore_open(struct pstore_info *psi)
+{
+	/* Reset the iterator to start reading partitions again */
+	read_type = -1;
+	return 0;
+}
+
+/**
+ * nvram_pstore_write - pstore write callback for nvram
+ * @type:               Type of message logged
+ * @reason:             reason behind dump (oops/panic)
+ * @id:                 identifier to indicate the write performed
+ * @part:               pstore writes data to registered buffer in parts,
+ *                      part number will indicate the same.
+ * @count:              Indicates oops count
+ * @compressed:         Flag to indicate the log is compressed
+ * @size:               number of bytes written to the registered buffer
+ * @psi:                registered pstore_info structure
+ *
+ * Called by pstore_dump() when an oops or panic report is logged in the
+ * printk buffer.
+ * Returns 0 on successful write.
+ */
+static int nvram_pstore_write(enum pstore_type_id type,
+				enum kmsg_dump_reason reason,
+				u64 *id, unsigned int part, int count,
+				bool compressed, size_t size,
+				struct pstore_info *psi)
+{
+	int rc;
+	unsigned int err_type = ERR_TYPE_KERNEL_PANIC;
+	struct oops_log_info *oops_hdr = (struct oops_log_info *) oops_buf;
+
+	/* part 1 has the recent messages from printk buffer */
+	if (part > 1 || (type != PSTORE_TYPE_DMESG))
+		return -1;
+
+	if (clobbering_unread_rtas_event())
+		return -1;
+
+	oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
+	oops_hdr->report_length = cpu_to_be16(size);
+	oops_hdr->timestamp = cpu_to_be64(ktime_get_real_seconds());
+
+	if (compressed)
+		err_type = ERR_TYPE_KERNEL_PANIC_GZ;
+
+	rc = nvram_write_os_partition(&oops_log_partition, oops_buf,
+		(int) (sizeof(*oops_hdr) + size), err_type, count);
+
+	if (rc != 0)
+		return rc;
+
+	*id = part;
+	return 0;
+}
+
+/*
+ * Reads the oops/panic report, rtas, of-config and common partition.
+ * Returns the length of the data we read from each partition.
+ * Returns 0 if we've been called before.
+ */
+static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
+				int *count, struct timespec *time, char **buf,
+				bool *compressed, struct pstore_info *psi)
+{
+	struct oops_log_info *oops_hdr;
+	unsigned int err_type, id_no, size = 0;
+	struct nvram_os_partition *part = NULL;
+	char *buff = NULL;
+	int sig = 0;
+	loff_t p;
+
+	read_type++;
+
+	switch (nvram_type_ids[read_type]) {
+	case PSTORE_TYPE_DMESG:
+		part = &oops_log_partition;
+		*type = PSTORE_TYPE_DMESG;
+		break;
+	case PSTORE_TYPE_PPC_COMMON:
+		sig = NVRAM_SIG_SYS;
+		part = &common_partition;
+		*type = PSTORE_TYPE_PPC_COMMON;
+		*id = PSTORE_TYPE_PPC_COMMON;
+		time->tv_sec = 0;
+		time->tv_nsec = 0;
+		break;
+#ifdef CONFIG_PPC_PSERIES
+	case PSTORE_TYPE_PPC_RTAS:
+		part = &rtas_log_partition;
+		*type = PSTORE_TYPE_PPC_RTAS;
+		time->tv_sec = last_rtas_event;
+		time->tv_nsec = 0;
+		break;
+	case PSTORE_TYPE_PPC_OF:
+		sig = NVRAM_SIG_OF;
+		part = &of_config_partition;
+		*type = PSTORE_TYPE_PPC_OF;
+		*id = PSTORE_TYPE_PPC_OF;
+		time->tv_sec = 0;
+		time->tv_nsec = 0;
+		break;
+#endif
+#ifdef CONFIG_PPC_POWERNV
+	case PSTORE_TYPE_PPC_OPAL:
+		sig = NVRAM_SIG_FW;
+		part = &skiboot_partition;
+		*type = PSTORE_TYPE_PPC_OPAL;
+		*id = PSTORE_TYPE_PPC_OPAL;
+		time->tv_sec = 0;
+		time->tv_nsec = 0;
+		break;
+#endif
+	default:
+		return 0;
+	}
+
+	if (!part->os_partition) {
+		p = nvram_find_partition(part->name, sig, &size);
+		if (p <= 0) {
+			pr_err("nvram: Failed to find partition %s, "
+				"err %d\n", part->name, (int)p);
+			return 0;
+		}
+		part->index = p;
+		part->size = size;
+	}
+
+	buff = kmalloc(part->size, GFP_KERNEL);
+
+	if (!buff)
+		return -ENOMEM;
+
+	if (nvram_read_partition(part, buff, part->size, &err_type, &id_no)) {
+		kfree(buff);
+		return 0;
+	}
+
+	*count = 0;
+
+	if (part->os_partition)
+		*id = id_no;
+
+	if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) {
+		size_t length, hdr_size;
+
+		oops_hdr = (struct oops_log_info *)buff;
+		if (be16_to_cpu(oops_hdr->version) < OOPS_HDR_VERSION) {
+			/* Old format oops header had 2-byte record size */
+			hdr_size = sizeof(u16);
+			length = be16_to_cpu(oops_hdr->version);
+			time->tv_sec = 0;
+			time->tv_nsec = 0;
+		} else {
+			hdr_size = sizeof(*oops_hdr);
+			length = be16_to_cpu(oops_hdr->report_length);
+			time->tv_sec = be64_to_cpu(oops_hdr->timestamp);
+			time->tv_nsec = 0;
+		}
+		*buf = kmalloc(length, GFP_KERNEL);
+		if (*buf == NULL)
+			return -ENOMEM;
+		memcpy(*buf, buff + hdr_size, length);
+		kfree(buff);
+
+		if (err_type == ERR_TYPE_KERNEL_PANIC_GZ)
+			*compressed = true;
+		else
+			*compressed = false;
+		return length;
+	}
+
+	*buf = buff;
+	return part->size;
+}
+
+static struct pstore_info nvram_pstore_info = {
+	.owner = THIS_MODULE,
+	.name = "nvram",
+	.open = nvram_pstore_open,
+	.read = nvram_pstore_read,
+	.write = nvram_pstore_write,
+};
+
+static int nvram_pstore_init(void)
+{
+	int rc = 0;
+
+	if (machine_is(pseries)) {
+		nvram_type_ids[2] = PSTORE_TYPE_PPC_RTAS;
+		nvram_type_ids[3] = PSTORE_TYPE_PPC_OF;
+	} else
+		nvram_type_ids[2] = PSTORE_TYPE_PPC_OPAL;
+
+	nvram_pstore_info.buf = oops_data;
+	nvram_pstore_info.bufsize = oops_data_sz;
+
+	spin_lock_init(&nvram_pstore_info.buf_lock);
+
+	rc = pstore_register(&nvram_pstore_info);
+	if (rc != 0)
+		pr_err("nvram: pstore_register() failed, defaults to "
+				"kmsg_dump; returned %d\n", rc);
+
+	return rc;
+}
+#else
+static int nvram_pstore_init(void)
+{
+	return -1;
+}
+#endif
+
+void __init nvram_init_oops_partition(int rtas_partition_exists)
+{
+	int rc;
+
+	rc = nvram_init_os_partition(&oops_log_partition);
+	if (rc != 0) {
+#ifdef CONFIG_PPC_PSERIES
+		if (!rtas_partition_exists) {
+			pr_err("nvram: Failed to initialize oops partition!");
+			return;
+		}
+		pr_notice("nvram: Using %s partition to log both"
+			" RTAS errors and oops/panic reports\n",
+			rtas_log_partition.name);
+		memcpy(&oops_log_partition, &rtas_log_partition,
+						sizeof(rtas_log_partition));
+#else
+		pr_err("nvram: Failed to initialize oops partition!");
+		return;
+#endif
+	}
+	oops_buf = kmalloc(oops_log_partition.size, GFP_KERNEL);
+	if (!oops_buf) {
+		pr_err("nvram: No memory for %s partition\n",
+						oops_log_partition.name);
+		return;
+	}
+	oops_data = oops_buf + sizeof(struct oops_log_info);
+	oops_data_sz = oops_log_partition.size - sizeof(struct oops_log_info);
+
+	rc = nvram_pstore_init();
+
+	if (!rc)
+		return;
+
+	/*
+	 * Figure compression (preceded by elimination of each line's <n>
+	 * severity prefix) will reduce the oops/panic report to at most
+	 * 45% of its original size.
+	 */
+	big_oops_buf_sz = (oops_data_sz * 100) / 45;
+	big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
+	if (big_oops_buf) {
+		stream.workspace =  kmalloc(zlib_deflate_workspacesize(
+					WINDOW_BITS, MEM_LEVEL), GFP_KERNEL);
+		if (!stream.workspace) {
+			pr_err("nvram: No memory for compression workspace; "
+				"skipping compression of %s partition data\n",
+				oops_log_partition.name);
+			kfree(big_oops_buf);
+			big_oops_buf = NULL;
+		}
+	} else {
+		pr_err("No memory for uncompressed %s data; "
+			"skipping compression\n", oops_log_partition.name);
+		stream.workspace = NULL;
+	}
+
+	rc = kmsg_dump_register(&nvram_kmsg_dumper);
+	if (rc != 0) {
+		pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc);
+		kfree(oops_buf);
+		kfree(big_oops_buf);
+		kfree(stream.workspace);
+	}
+}
+
+/*
+ * This is our kmsg_dump callback, called after an oops or panic report
+ * has been written to the printk buffer.  We want to capture as much
+ * of the printk buffer as possible.  First, capture as much as we can
+ * that we think will compress sufficiently to fit in the lnx,oops-log
+ * partition.  If that's too much, go back and capture uncompressed text.
+ */
+static void oops_to_nvram(struct kmsg_dumper *dumper,
+			  enum kmsg_dump_reason reason)
+{
+	struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
+	static unsigned int oops_count = 0;
+	static bool panicking = false;
+	static DEFINE_SPINLOCK(lock);
+	unsigned long flags;
+	size_t text_len;
+	unsigned int err_type = ERR_TYPE_KERNEL_PANIC_GZ;
+	int rc = -1;
+
+	switch (reason) {
+	case KMSG_DUMP_RESTART:
+	case KMSG_DUMP_HALT:
+	case KMSG_DUMP_POWEROFF:
+		/* These are almost always orderly shutdowns. */
+		return;
+	case KMSG_DUMP_OOPS:
+		break;
+	case KMSG_DUMP_PANIC:
+		panicking = true;
+		break;
+	case KMSG_DUMP_EMERG:
+		if (panicking)
+			/* Panic report already captured. */
+			return;
+		break;
+	default:
+		pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n",
+		       __func__, (int) reason);
+		return;
+	}
+
+	if (clobbering_unread_rtas_event())
+		return;
+
+	if (!spin_trylock_irqsave(&lock, flags))
+		return;
+
+	if (big_oops_buf) {
+		kmsg_dump_get_buffer(dumper, false,
+				     big_oops_buf, big_oops_buf_sz, &text_len);
+		rc = zip_oops(text_len);
+	}
+	if (rc != 0) {
+		kmsg_dump_rewind(dumper);
+		kmsg_dump_get_buffer(dumper, false,
+				     oops_data, oops_data_sz, &text_len);
+		err_type = ERR_TYPE_KERNEL_PANIC;
+		oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
+		oops_hdr->report_length = cpu_to_be16(text_len);
+		oops_hdr->timestamp = cpu_to_be64(ktime_get_real_seconds());
+	}
+
+	(void) nvram_write_os_partition(&oops_log_partition, oops_buf,
+		(int) (sizeof(*oops_hdr) + text_len), err_type,
+		++oops_count);
+
+	spin_unlock_irqrestore(&lock, flags);
+}
+
 static loff_t dev_nvram_llseek(struct file *file, loff_t offset, int origin)
 {
 	int size;
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index 2f35a72642c6..b60a67d92ebd 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -72,7 +72,7 @@ static int of_pci_phb_probe(struct platform_device *dev)
 
 	/* Register devices with EEH */
 	if (dev->dev.of_node->child)
-		eeh_add_device_tree_early(dev->dev.of_node);
+		eeh_add_device_tree_early(PCI_DN(dev->dev.of_node));
 
 	/* Scan the bus */
 	pcibios_scan_phb(phb);
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 2a525c938158..0d054068a21d 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -76,7 +76,7 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
 	list_add_tail(&phb->list_node, &hose_list);
 	spin_unlock(&hose_spinlock);
 	phb->dn = dev;
-	phb->is_dynamic = mem_init_done;
+	phb->is_dynamic = slab_is_available();
 #ifdef CONFIG_PPC64
 	if (dev) {
 		int nid = of_node_to_nid(dev);
@@ -109,8 +109,10 @@ void pcibios_free_controller(struct pci_controller *phb)
 resource_size_t pcibios_window_alignment(struct pci_bus *bus,
 					 unsigned long type)
 {
-	if (ppc_md.pcibios_window_alignment)
-		return ppc_md.pcibios_window_alignment(bus, type);
+	struct pci_controller *phb = pci_bus_to_host(bus);
+
+	if (phb->controller_ops.window_alignment)
+		return phb->controller_ops.window_alignment(bus, type);
 
 	/*
 	 * PCI core will figure out the default
@@ -122,14 +124,26 @@ resource_size_t pcibios_window_alignment(struct pci_bus *bus,
 
 void pcibios_reset_secondary_bus(struct pci_dev *dev)
 {
-	if (ppc_md.pcibios_reset_secondary_bus) {
-		ppc_md.pcibios_reset_secondary_bus(dev);
+	struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
+	if (phb->controller_ops.reset_secondary_bus) {
+		phb->controller_ops.reset_secondary_bus(dev);
 		return;
 	}
 
 	pci_reset_secondary_bus(dev);
 }
 
+#ifdef CONFIG_PCI_IOV
+resource_size_t pcibios_iov_resource_alignment(struct pci_dev *pdev, int resno)
+{
+	if (ppc_md.pcibios_iov_resource_alignment)
+		return ppc_md.pcibios_iov_resource_alignment(pdev, resno);
+
+	return pci_iov_resource_size(pdev, resno);
+}
+#endif /* CONFIG_PCI_IOV */
+
 static resource_size_t pcibios_io_size(const struct pci_controller *hose)
 {
 #ifdef CONFIG_PPC64
@@ -788,6 +802,10 @@ static void pcibios_fixup_resources(struct pci_dev *dev)
 		       pci_name(dev));
 		return;
 	}
+
+	if (dev->is_virtfn)
+		return;
+
 	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
 		struct resource *res = dev->resource + i;
 		struct pci_bus_region reg;
@@ -942,6 +960,8 @@ static void pcibios_fixup_bridge(struct pci_bus *bus)
 
 void pcibios_setup_bus_self(struct pci_bus *bus)
 {
+	struct pci_controller *phb;
+
 	/* Fix up the bus resources for P2P bridges */
 	if (bus->self != NULL)
 		pcibios_fixup_bridge(bus);
@@ -953,12 +973,14 @@ void pcibios_setup_bus_self(struct pci_bus *bus)
 		ppc_md.pcibios_fixup_bus(bus);
 
 	/* Setup bus DMA mappings */
-	if (ppc_md.pci_dma_bus_setup)
-		ppc_md.pci_dma_bus_setup(bus);
+	phb = pci_bus_to_host(bus);
+	if (phb->controller_ops.dma_bus_setup)
+		phb->controller_ops.dma_bus_setup(bus);
 }
 
 static void pcibios_setup_device(struct pci_dev *dev)
 {
+	struct pci_controller *phb;
 	/* Fixup NUMA node as it may not be setup yet by the generic
 	 * code and is needed by the DMA init
 	 */
@@ -969,8 +991,9 @@ static void pcibios_setup_device(struct pci_dev *dev)
 	set_dma_offset(&dev->dev, PCI_DRAM_OFFSET);
 
 	/* Additional platform DMA/iommu setup */
-	if (ppc_md.pci_dma_dev_setup)
-		ppc_md.pci_dma_dev_setup(dev);
+	phb = pci_bus_to_host(dev->bus);
+	if (phb->controller_ops.dma_dev_setup)
+		phb->controller_ops.dma_dev_setup(dev);
 
 	/* Read default IRQs and fixup if necessary */
 	pci_read_irq_line(dev);
@@ -986,6 +1009,12 @@ int pcibios_add_device(struct pci_dev *dev)
 	 */
 	if (dev->bus->is_added)
 		pcibios_setup_device(dev);
+
+#ifdef CONFIG_PCI_IOV
+	if (ppc_md.pcibios_fixup_sriov)
+		ppc_md.pcibios_fixup_sriov(dev);
+#endif /* CONFIG_PCI_IOV */
+
 	return 0;
 }
 
@@ -1450,8 +1479,10 @@ EXPORT_SYMBOL_GPL(pcibios_finish_adding_to_bus);
 
 int pcibios_enable_device(struct pci_dev *dev, int mask)
 {
-	if (ppc_md.pcibios_enable_device_hook)
-		if (ppc_md.pcibios_enable_device_hook(dev))
+	struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
+	if (phb->controller_ops.enable_device_hook)
+		if (!phb->controller_ops.enable_device_hook(dev))
 			return -EINVAL;
 
 	return pci_enable_resources(dev, mask);
@@ -1624,8 +1655,8 @@ void pcibios_scan_phb(struct pci_controller *hose)
 
 	/* Get probe mode and perform scan */
 	mode = PCI_PROBE_NORMAL;
-	if (node && ppc_md.pci_probe_mode)
-		mode = ppc_md.pci_probe_mode(bus);
+	if (node && hose->controller_ops.probe_mode)
+		mode = hose->controller_ops.probe_mode(bus);
 	pr_debug("    probe mode: %d\n", mode);
 	if (mode == PCI_PROBE_DEVTREE)
 		of_scan_bus(node, bus);
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
index 5b789177aa29..7ed85a69a9c2 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -73,13 +73,16 @@ void pcibios_add_pci_devices(struct pci_bus * bus)
 {
 	int slotno, mode, pass, max;
 	struct pci_dev *dev;
+	struct pci_controller *phb;
 	struct device_node *dn = pci_bus_to_OF_node(bus);
 
-	eeh_add_device_tree_early(dn);
+	eeh_add_device_tree_early(PCI_DN(dn));
+
+	phb = pci_bus_to_host(bus);
 
 	mode = PCI_PROBE_NORMAL;
-	if (ppc_md.pci_probe_mode)
-		mode = ppc_md.pci_probe_mode(bus);
+	if (phb->controller_ops.probe_mode)
+		mode = phb->controller_ops.probe_mode(bus);
 
 	if (mode == PCI_PROBE_DEVTREE) {
 		/* use ofdt-based probe */
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 83df3075d3df..b3b4df91b792 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -32,12 +32,237 @@
 #include <asm/ppc-pci.h>
 #include <asm/firmware.h>
 
+/*
+ * The function is used to find the firmware data of one
+ * specific PCI device, which is attached to the indicated
+ * PCI bus. For VFs, their firmware data is linked to that
+ * one of PF's bridge. For other devices, their firmware
+ * data is linked to that of their bridge.
+ */
+static struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus)
+{
+	struct pci_bus *pbus;
+	struct device_node *dn;
+	struct pci_dn *pdn;
+
+	/*
+	 * We probably have virtual bus which doesn't
+	 * have associated bridge.
+	 */
+	pbus = bus;
+	while (pbus) {
+		if (pci_is_root_bus(pbus) || pbus->self)
+			break;
+
+		pbus = pbus->parent;
+	}
+
+	/*
+	 * Except virtual bus, all PCI buses should
+	 * have device nodes.
+	 */
+	dn = pci_bus_to_OF_node(pbus);
+	pdn = dn ? PCI_DN(dn) : NULL;
+
+	return pdn;
+}
+
+struct pci_dn *pci_get_pdn_by_devfn(struct pci_bus *bus,
+				    int devfn)
+{
+	struct device_node *dn = NULL;
+	struct pci_dn *parent, *pdn;
+	struct pci_dev *pdev = NULL;
+
+	/* Fast path: fetch from PCI device */
+	list_for_each_entry(pdev, &bus->devices, bus_list) {
+		if (pdev->devfn == devfn) {
+			if (pdev->dev.archdata.pci_data)
+				return pdev->dev.archdata.pci_data;
+
+			dn = pci_device_to_OF_node(pdev);
+			break;
+		}
+	}
+
+	/* Fast path: fetch from device node */
+	pdn = dn ? PCI_DN(dn) : NULL;
+	if (pdn)
+		return pdn;
+
+	/* Slow path: fetch from firmware data hierarchy */
+	parent = pci_bus_to_pdn(bus);
+	if (!parent)
+		return NULL;
+
+	list_for_each_entry(pdn, &parent->child_list, list) {
+		if (pdn->busno == bus->number &&
+                    pdn->devfn == devfn)
+                        return pdn;
+        }
+
+	return NULL;
+}
+
 struct pci_dn *pci_get_pdn(struct pci_dev *pdev)
 {
-	struct device_node *dn = pci_device_to_OF_node(pdev);
-	if (!dn)
+	struct device_node *dn;
+	struct pci_dn *parent, *pdn;
+
+	/* Search device directly */
+	if (pdev->dev.archdata.pci_data)
+		return pdev->dev.archdata.pci_data;
+
+	/* Check device node */
+	dn = pci_device_to_OF_node(pdev);
+	pdn = dn ? PCI_DN(dn) : NULL;
+	if (pdn)
+		return pdn;
+
+	/*
+	 * VFs don't have device nodes. We hook their
+	 * firmware data to PF's bridge.
+	 */
+	parent = pci_bus_to_pdn(pdev->bus);
+	if (!parent)
+		return NULL;
+
+	list_for_each_entry(pdn, &parent->child_list, list) {
+		if (pdn->busno == pdev->bus->number &&
+		    pdn->devfn == pdev->devfn)
+			return pdn;
+	}
+
+	return NULL;
+}
+
+#ifdef CONFIG_PCI_IOV
+static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
+					   struct pci_dev *pdev,
+					   int busno, int devfn)
+{
+	struct pci_dn *pdn;
+
+	/* Except PHB, we always have the parent */
+	if (!parent)
+		return NULL;
+
+	pdn = kzalloc(sizeof(*pdn), GFP_KERNEL);
+	if (!pdn) {
+		dev_warn(&pdev->dev, "%s: Out of memory!\n", __func__);
 		return NULL;
-	return PCI_DN(dn);
+	}
+
+	pdn->phb = parent->phb;
+	pdn->parent = parent;
+	pdn->busno = busno;
+	pdn->devfn = devfn;
+#ifdef CONFIG_PPC_POWERNV
+	pdn->pe_number = IODA_INVALID_PE;
+#endif
+	INIT_LIST_HEAD(&pdn->child_list);
+	INIT_LIST_HEAD(&pdn->list);
+	list_add_tail(&pdn->list, &parent->child_list);
+
+	/*
+	 * If we already have PCI device instance, lets
+	 * bind them.
+	 */
+	if (pdev)
+		pdev->dev.archdata.pci_data = pdn;
+
+	return pdn;
+}
+#endif
+
+struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
+{
+#ifdef CONFIG_PCI_IOV
+	struct pci_dn *parent, *pdn;
+	int i;
+
+	/* Only support IOV for now */
+	if (!pdev->is_physfn)
+		return pci_get_pdn(pdev);
+
+	/* Check if VFs have been populated */
+	pdn = pci_get_pdn(pdev);
+	if (!pdn || (pdn->flags & PCI_DN_FLAG_IOV_VF))
+		return NULL;
+
+	pdn->flags |= PCI_DN_FLAG_IOV_VF;
+	parent = pci_bus_to_pdn(pdev->bus);
+	if (!parent)
+		return NULL;
+
+	for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) {
+		pdn = add_one_dev_pci_data(parent, NULL,
+					   pci_iov_virtfn_bus(pdev, i),
+					   pci_iov_virtfn_devfn(pdev, i));
+		if (!pdn) {
+			dev_warn(&pdev->dev, "%s: Cannot create firmware data for VF#%d\n",
+				 __func__, i);
+			return NULL;
+		}
+	}
+#endif /* CONFIG_PCI_IOV */
+
+	return pci_get_pdn(pdev);
+}
+
+void remove_dev_pci_data(struct pci_dev *pdev)
+{
+#ifdef CONFIG_PCI_IOV
+	struct pci_dn *parent;
+	struct pci_dn *pdn, *tmp;
+	int i;
+
+	/*
+	 * VF and VF PE are created/released dynamically, so we need to
+	 * bind/unbind them.  Otherwise the VF and VF PE would be mismatched
+	 * when re-enabling SR-IOV.
+	 */
+	if (pdev->is_virtfn) {
+		pdn = pci_get_pdn(pdev);
+#ifdef CONFIG_PPC_POWERNV
+		pdn->pe_number = IODA_INVALID_PE;
+#endif
+		return;
+	}
+
+	/* Only support IOV PF for now */
+	if (!pdev->is_physfn)
+		return;
+
+	/* Check if VFs have been populated */
+	pdn = pci_get_pdn(pdev);
+	if (!pdn || !(pdn->flags & PCI_DN_FLAG_IOV_VF))
+		return;
+
+	pdn->flags &= ~PCI_DN_FLAG_IOV_VF;
+	parent = pci_bus_to_pdn(pdev->bus);
+	if (!parent)
+		return;
+
+	/*
+	 * We might introduce flag to pci_dn in future
+	 * so that we can release VF's firmware data in
+	 * a batch mode.
+	 */
+	for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) {
+		list_for_each_entry_safe(pdn, tmp,
+			&parent->child_list, list) {
+			if (pdn->busno != pci_iov_virtfn_bus(pdev, i) ||
+			    pdn->devfn != pci_iov_virtfn_devfn(pdev, i))
+				continue;
+
+			if (!list_empty(&pdn->list))
+				list_del(&pdn->list);
+
+			kfree(pdn);
+		}
+	}
+#endif /* CONFIG_PCI_IOV */
 }
 
 /*
@@ -49,6 +274,7 @@ void *update_dn_pci_info(struct device_node *dn, void *data)
 	struct pci_controller *phb = data;
 	const __be32 *type = of_get_property(dn, "ibm,pci-config-space-type", NULL);
 	const __be32 *regs;
+	struct device_node *parent;
 	struct pci_dn *pdn;
 
 	pdn = zalloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL);
@@ -69,7 +295,25 @@ void *update_dn_pci_info(struct device_node *dn, void *data)
 		pdn->devfn = (addr >> 8) & 0xff;
 	}
 
+	/* vendor/device IDs and class code */
+	regs = of_get_property(dn, "vendor-id", NULL);
+	pdn->vendor_id = regs ? of_read_number(regs, 1) : 0;
+	regs = of_get_property(dn, "device-id", NULL);
+	pdn->device_id = regs ? of_read_number(regs, 1) : 0;
+	regs = of_get_property(dn, "class-code", NULL);
+	pdn->class_code = regs ? of_read_number(regs, 1) : 0;
+
+	/* Extended config space */
 	pdn->pci_ext_config_space = (type && of_read_number(type, 1) == 1);
+
+	/* Attach to parent node */
+	INIT_LIST_HEAD(&pdn->child_list);
+	INIT_LIST_HEAD(&pdn->list);
+	parent = of_get_parent(dn);
+	pdn->parent = parent ? PCI_DN(parent) : NULL;
+	if (pdn->parent)
+		list_add_tail(&pdn->list, &pdn->parent->child_list);
+
 	return NULL;
 }
 
@@ -131,6 +375,46 @@ void *traverse_pci_devices(struct device_node *start, traverse_func pre,
 	return NULL;
 }
 
+static struct pci_dn *pci_dn_next_one(struct pci_dn *root,
+				      struct pci_dn *pdn)
+{
+	struct list_head *next = pdn->child_list.next;
+
+	if (next != &pdn->child_list)
+		return list_entry(next, struct pci_dn, list);
+
+	while (1) {
+		if (pdn == root)
+			return NULL;
+
+		next = pdn->list.next;
+		if (next != &pdn->parent->child_list)
+			break;
+
+		pdn = pdn->parent;
+	}
+
+	return list_entry(next, struct pci_dn, list);
+}
+
+void *traverse_pci_dn(struct pci_dn *root,
+		      void *(*fn)(struct pci_dn *, void *),
+		      void *data)
+{
+	struct pci_dn *pdn = root;
+	void *ret;
+
+	/* Only scan the child nodes */
+	for (pdn = pci_dn_next_one(root, pdn); pdn;
+	     pdn = pci_dn_next_one(root, pdn)) {
+		ret = fn(pdn, data);
+		if (ret)
+			return ret;
+	}
+
+	return NULL;
+}
+
 /** 
  * pci_devs_phb_init_dynamic - setup pci devices under this PHB
  * phb: pci-to-host bridge (top-level bridge connecting to cpu)
@@ -147,8 +431,12 @@ void pci_devs_phb_init_dynamic(struct pci_controller *phb)
 	/* PHB nodes themselves must not match */
 	update_dn_pci_info(dn, phb);
 	pdn = dn->data;
-	if (pdn)
+	if (pdn) {
 		pdn->devfn = pdn->busno = -1;
+		pdn->vendor_id = pdn->device_id = pdn->class_code = 0;
+		pdn->phb = phb;
+		phb->pci_data = pdn;
+	}
 
 	/* Update dn->phb ptrs for new phb and children devices */
 	traverse_pci_devices(dn, update_dn_pci_info, phb);
@@ -171,3 +459,16 @@ void __init pci_devs_phb_init(void)
 	list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
 		pci_devs_phb_init_dynamic(phb);
 }
+
+static void pci_dev_pdn_setup(struct pci_dev *pdev)
+{
+	struct pci_dn *pdn;
+
+	if (pdev->dev.archdata.pci_data)
+		return;
+
+	/* Setup the fast path */
+	pdn = pci_get_pdn(pdev);
+	pdev->dev.archdata.pci_data = pdn;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, pci_dev_pdn_setup);
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index e6245e9c7d8d..42e02a2d570b 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -207,6 +207,7 @@ void of_scan_pci_bridge(struct pci_dev *dev)
 {
 	struct device_node *node = dev->dev.of_node;
 	struct pci_bus *bus;
+	struct pci_controller *phb;
 	const __be32 *busrange, *ranges;
 	int len, i, mode;
 	struct pci_bus_region region;
@@ -286,9 +287,11 @@ void of_scan_pci_bridge(struct pci_dev *dev)
 		bus->number);
 	pr_debug("    bus name: %s\n", bus->name);
 
+	phb = pci_bus_to_host(bus);
+
 	mode = PCI_PROBE_NORMAL;
-	if (ppc_md.pci_probe_mode)
-		mode = ppc_md.pci_probe_mode(bus);
+	if (phb->controller_ops.probe_mode)
+		mode = phb->controller_ops.probe_mode(bus);
 	pr_debug("    probe mode: %d\n", mode);
 
 	if (mode == PCI_PROBE_DEVTREE)
@@ -305,7 +308,7 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
 	const __be32 *reg;
 	int reglen, devfn;
 #ifdef CONFIG_EEH
-	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+	struct eeh_dev *edev = pdn_to_eeh_dev(PCI_DN(dn));
 #endif
 
 	pr_debug("  * %s\n", dn->full_name);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index b4cc7bef6b16..febb50dd5328 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1114,8 +1114,11 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
  */
 extern unsigned long dscr_default; /* defined in arch/powerpc/kernel/sysfs.c */
 
+/*
+ * Copy architecture-specific thread state
+ */
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-		unsigned long arg, struct task_struct *p)
+		unsigned long kthread_arg, struct task_struct *p)
 {
 	struct pt_regs *childregs, *kregs;
 	extern void ret_from_fork(void);
@@ -1127,6 +1130,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	sp -= sizeof(struct pt_regs);
 	childregs = (struct pt_regs *) sp;
 	if (unlikely(p->flags & PF_KTHREAD)) {
+		/* kernel thread */
 		struct thread_info *ti = (void *)task_stack_page(p);
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->gpr[1] = sp + sizeof(struct pt_regs);
@@ -1137,11 +1141,12 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 		clear_tsk_thread_flag(p, TIF_32BIT);
 		childregs->softe = 1;
 #endif
-		childregs->gpr[15] = arg;
+		childregs->gpr[15] = kthread_arg;
 		p->thread.regs = NULL;	/* no user register state */
 		ti->flags |= _TIF_RESTOREALL;
 		f = ret_from_kernel_thread;
 	} else {
+		/* user thread */
 		struct pt_regs *regs = current_pt_regs();
 		CHECK_FULL_REGS(regs);
 		*childregs = *regs;
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index b8e15c678960..308c5e15676b 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -721,7 +721,7 @@ void __init early_init_devtree(void *params)
 	 */
 	of_scan_flat_dt(early_init_dt_scan_cpus, NULL);
 	if (boot_cpuid < 0) {
-		printk("Failed to indentify boot CPU !\n");
+		printk("Failed to identify boot CPU !\n");
 		BUG();
 	}
 
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 1a85d8f96739..fd1fe4c37599 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2898,7 +2898,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 	 * Call OF "quiesce" method to shut down pending DMA's from
 	 * devices etc...
 	 */
-	prom_printf("Calling quiesce...\n");
+	prom_printf("Quiescing Open Firmware ...\n");
 	call_prom("quiesce", 0, 0);
 
 	/*
@@ -2910,7 +2910,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 
 	/* Don't print anything after quiesce under OPAL, it crashes OFW */
 	if (of_platform != PLATFORM_OPAL) {
-		prom_printf("returning from prom_init\n");
+		prom_printf("Booting Linux via __start() ...\n");
 		prom_debug("->dt_header_start=0x%x\n", hdr);
 	}
 
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 21c45a2d0706..7a488c108410 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -401,7 +401,7 @@ static char *__fetch_rtas_last_error(char *altbuf)
 			buf = altbuf;
 		} else {
 			buf = rtas_err_buf;
-			if (mem_init_done)
+			if (slab_is_available())
 				buf = kmalloc(RTAS_ERROR_LOG_MAX, GFP_ATOMIC);
 		}
 		if (buf)
@@ -461,7 +461,7 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)
 
 	if (buff_copy) {
 		log_error(buff_copy, ERR_TYPE_RTAS_LOG, 0);
-		if (mem_init_done)
+		if (slab_is_available())
 			kfree(buff_copy);
 	}
 	return ret;
@@ -897,7 +897,7 @@ int rtas_offline_cpus_mask(cpumask_var_t cpus)
 }
 EXPORT_SYMBOL(rtas_offline_cpus_mask);
 
-int rtas_ibm_suspend_me(u64 handle, int *vasi_return)
+int rtas_ibm_suspend_me(u64 handle)
 {
 	long state;
 	long rc;
@@ -919,13 +919,11 @@ int rtas_ibm_suspend_me(u64 handle, int *vasi_return)
 		printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned %ld\n",rc);
 		return rc;
 	} else if (state == H_VASI_ENABLED) {
-		*vasi_return = RTAS_NOT_SUSPENDABLE;
-		return 0;
+		return -EAGAIN;
 	} else if (state != H_VASI_SUSPENDING) {
 		printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned state %ld\n",
 		       state);
-		*vasi_return = -1;
-		return 0;
+		return -EIO;
 	}
 
 	if (!alloc_cpumask_var(&offline_mask, GFP_TEMPORARY))
@@ -972,7 +970,7 @@ out:
 	return atomic_read(&data.error);
 }
 #else /* CONFIG_PPC_PSERIES */
-int rtas_ibm_suspend_me(u64 handle, int *vasi_return)
+int rtas_ibm_suspend_me(u64 handle)
 {
 	return -ENOSYS;
 }
@@ -1022,7 +1020,6 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
 	unsigned long flags;
 	char *buff_copy, *errbuf = NULL;
 	int nargs, nret, token;
-	int rc;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -1054,15 +1051,18 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
 	if (token == ibm_suspend_me_token) {
 
 		/*
-		 * rtas_ibm_suspend_me assumes args are in cpu endian, or at least the
-		 * hcall within it requires it.
+		 * rtas_ibm_suspend_me assumes the streamid handle is in cpu
+		 * endian, or at least the hcall within it requires it.
 		 */
-		int vasi_rc = 0;
+		int rc = 0;
 		u64 handle = ((u64)be32_to_cpu(args.args[0]) << 32)
 		              | be32_to_cpu(args.args[1]);
-		rc = rtas_ibm_suspend_me(handle, &vasi_rc);
-		args.rets[0] = cpu_to_be32(vasi_rc);
-		if (rc)
+		rc = rtas_ibm_suspend_me(handle);
+		if (rc == -EAGAIN)
+			args.rets[0] = cpu_to_be32(RTAS_NOT_SUSPENDABLE);
+		else if (rc == -EIO)
+			args.rets[0] = cpu_to_be32(-1);
+		else if (rc)
 			return rc;
 		goto copy_return;
 	}
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index ce230da2c015..73f1934582c2 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -113,7 +113,7 @@ static int rtas_pci_read_config(struct pci_bus *bus,
 
 	ret = rtas_read_config(pdn, where, size, val);
 	if (*val == EEH_IO_ERROR_VALUE(size) &&
-	    eeh_dev_check_failure(of_node_to_eeh_dev(dn)))
+	    eeh_dev_check_failure(pdn_to_eeh_dev(pdn)))
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
 	return ret;
@@ -277,50 +277,3 @@ int rtas_setup_phb(struct pci_controller *phb)
 
 	return 0;
 }
-
-void __init find_and_init_phbs(void)
-{
-	struct device_node *node;
-	struct pci_controller *phb;
-	struct device_node *root = of_find_node_by_path("/");
-
-	for_each_child_of_node(root, node) {
-		if (node->type == NULL || (strcmp(node->type, "pci") != 0 &&
-					   strcmp(node->type, "pciex") != 0))
-			continue;
-
-		phb = pcibios_alloc_controller(node);
-		if (!phb)
-			continue;
-		rtas_setup_phb(phb);
-		pci_process_bridge_OF_ranges(phb, node, 0);
-		isa_bridge_find_early(phb);
-	}
-
-	of_node_put(root);
-	pci_devs_phb_init();
-
-	/*
-	 * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties
-	 * in chosen.
-	 */
-	if (of_chosen) {
-		const int *prop;
-
-		prop = of_get_property(of_chosen,
-				"linux,pci-probe-only", NULL);
-		if (prop) {
-			if (*prop)
-				pci_add_flags(PCI_PROBE_ONLY);
-			else
-				pci_clear_flags(PCI_PROBE_ONLY);
-		}
-
-#ifdef CONFIG_PPC32 /* Will be made generic soon */
-		prop = of_get_property(of_chosen,
-				"linux,pci-assign-all-buses", NULL);
-		if (prop && *prop)
-			pci_add_flags(PCI_REASSIGN_ALL_BUS);
-#endif /* CONFIG_PPC32 */
-	}
-}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 49f553bbb360..c69671c03c3b 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -37,6 +37,7 @@
 #include <linux/memblock.h>
 #include <linux/hugetlb.h>
 #include <linux/memory.h>
+#include <linux/nmi.h>
 
 #include <asm/io.h>
 #include <asm/kdump.h>
@@ -779,3 +780,22 @@ unsigned long memory_block_size_bytes(void)
 struct ppc_pci_io ppc_pci_io;
 EXPORT_SYMBOL(ppc_pci_io);
 #endif
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+u64 hw_nmi_get_sample_period(int watchdog_thresh)
+{
+	return ppc_proc_freq * watchdog_thresh;
+}
+
+/*
+ * The hardlockup detector breaks PMU event based branches and is likely
+ * to get false positives in KVM guests, so disable it by default.
+ */
+static int __init disable_hardlockup_detector(void)
+{
+	hardlockup_detector_disable();
+
+	return 0;
+}
+early_initcall(disable_hardlockup_detector);
+#endif
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index b2702e87db0d..5fa92706444b 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -121,3 +121,20 @@ long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
 	return sys_fadvise64(fd, (u64)offset_high << 32 | offset_low,
 			     (u64)len_high << 32 | len_low, advice);
 }
+
+long sys_switch_endian(void)
+{
+	struct thread_info *ti;
+
+	current->thread.regs->msr ^= MSR_LE;
+
+	/*
+	 * Set TIF_RESTOREALL so that r3 isn't clobbered on return to
+	 * userspace. That also has the effect of restoring the non-volatile
+	 * GPRs, so we saved them on the way in here.
+	 */
+	ti = current_thread_info();
+	ti->flags |= _TIF_RESTOREALL;
+
+	return 0;
+}
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index 7ab5d434e2ee..4d6b1d3a747f 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -22,6 +22,7 @@
 #define PPC_SYS(func)		.llong	DOTSYM(ppc_##func),DOTSYM(ppc_##func)
 #define OLDSYS(func)		.llong	DOTSYM(sys_ni_syscall),DOTSYM(sys_ni_syscall)
 #define SYS32ONLY(func)		.llong	DOTSYM(sys_ni_syscall),DOTSYM(compat_sys_##func)
+#define PPC64ONLY(func)		.llong	DOTSYM(ppc_##func),DOTSYM(sys_ni_syscall)
 #define SYSX(f, f3264, f32)	.llong	DOTSYM(f),DOTSYM(f3264)
 #else
 #define SYSCALL(func)		.long	sys_##func
@@ -29,6 +30,7 @@
 #define PPC_SYS(func)		.long	ppc_##func
 #define OLDSYS(func)		.long	sys_##func
 #define SYS32ONLY(func)		.long	sys_##func
+#define PPC64ONLY(func)		.long	sys_ni_syscall
 #define SYSX(f, f3264, f32)	.long	f32
 #endif
 #define SYSCALL_SPU(func)	SYSCALL(func)
diff --git a/arch/powerpc/kernel/systbl_chk.c b/arch/powerpc/kernel/systbl_chk.c
index 238aa63ced8f..2384129f5893 100644
--- a/arch/powerpc/kernel/systbl_chk.c
+++ b/arch/powerpc/kernel/systbl_chk.c
@@ -21,9 +21,11 @@
 #ifdef CONFIG_PPC64
 #define OLDSYS(func)		-1
 #define SYS32ONLY(func)		-1
+#define PPC64ONLY(func)		__NR_##func
 #else
 #define OLDSYS(func)		__NR_old##func
 #define SYS32ONLY(func)		__NR_##func
+#define PPC64ONLY(func)		-1
 #endif
 #define SYSX(f, f3264, f32)	-1
 
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 2a324f4cb1b9..5754b226da7e 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -152,9 +152,9 @@ _GLOBAL(tm_reclaim)
 
 	addi	r7, r3, THREAD_TRANSACT_VRSTATE
 	SAVE_32VRS(0, r6, r7)	/* r6 scratch, r7 transact vr state */
-	mfvscr	vr0
+	mfvscr	v0
 	li	r6, VRSTATE_VSCR
-	stvx	vr0, r7, r6
+	stvx	v0, r7, r6
 dont_backup_vec:
 	mfspr	r0, SPRN_VRSAVE
 	std	r0, THREAD_TRANSACT_VRSAVE(r3)
@@ -359,8 +359,8 @@ _GLOBAL(__tm_recheckpoint)
 
 	addi	r8, r3, THREAD_VRSTATE
 	li	r5, VRSTATE_VSCR
-	lvx	vr0, r8, r5
-	mtvscr	vr0
+	lvx	v0, r8, r5
+	mtvscr	v0
 	REST_32VRS(0, r5, r8)			/* r5 scratch, r8 ptr */
 dont_restore_vec:
 	ld	r5, THREAD_VRSAVE(r3)
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index b7aa07279a63..7cc38b5b58bc 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -46,8 +46,6 @@ void __init udbg_early_init(void)
 #elif defined(CONFIG_PPC_EARLY_DEBUG_MAPLE)
 	/* Maple real mode debug */
 	udbg_init_maple_realmode();
-#elif defined(CONFIG_PPC_EARLY_DEBUG_BEAT)
-	udbg_init_debug_beat();
 #elif defined(CONFIG_PPC_EARLY_DEBUG_PAS_REALMODE)
 	udbg_init_pas_realmode();
 #elif defined(CONFIG_PPC_EARLY_DEBUG_BOOTX)
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 74f8050518d6..f5c80d567d8d 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -24,8 +24,8 @@ _GLOBAL(do_load_up_transact_altivec)
 	stw	r4,THREAD_USED_VR(r3)
 
 	li	r10,THREAD_TRANSACT_VRSTATE+VRSTATE_VSCR
-	lvx	vr0,r10,r3
-	mtvscr	vr0
+	lvx	v0,r10,r3
+	mtvscr	v0
 	addi	r10,r3,THREAD_TRANSACT_VRSTATE
 	REST_32VRS(0,r4,r10)
 
@@ -52,8 +52,8 @@ _GLOBAL(vec_enable)
  */
 _GLOBAL(load_vr_state)
 	li	r4,VRSTATE_VSCR
-	lvx	vr0,r4,r3
-	mtvscr	vr0
+	lvx	v0,r4,r3
+	mtvscr	v0
 	REST_32VRS(0,r4,r3)
 	blr
 
@@ -63,9 +63,9 @@ _GLOBAL(load_vr_state)
  */
 _GLOBAL(store_vr_state)
 	SAVE_32VRS(0, r4, r3)
-	mfvscr	vr0
+	mfvscr	v0
 	li	r4, VRSTATE_VSCR
-	stvx	vr0, r4, r3
+	stvx	v0, r4, r3
 	blr
 
 /*
@@ -104,9 +104,9 @@ _GLOBAL(load_up_altivec)
 	addi	r4,r4,THREAD
 	addi	r6,r4,THREAD_VRSTATE
 	SAVE_32VRS(0,r5,r6)
-	mfvscr	vr0
+	mfvscr	v0
 	li	r10,VRSTATE_VSCR
-	stvx	vr0,r10,r6
+	stvx	v0,r10,r6
 	/* Disable VMX for last_task_used_altivec */
 	PPC_LL	r5,PT_REGS(r4)
 	toreal(r5)
@@ -142,8 +142,8 @@ _GLOBAL(load_up_altivec)
 	li	r4,1
 	li	r10,VRSTATE_VSCR
 	stw	r4,THREAD_USED_VR(r5)
-	lvx	vr0,r10,r6
-	mtvscr	vr0
+	lvx	v0,r10,r6
+	mtvscr	v0
 	REST_32VRS(0,r4,r6)
 #ifndef CONFIG_SMP
 	/* Update last_task_used_altivec to 'current' */
@@ -186,9 +186,9 @@ _GLOBAL(giveup_altivec)
 	addi	r7,r3,THREAD_VRSTATE
 2:	PPC_LCMPI	0,r5,0
 	SAVE_32VRS(0,r4,r7)
-	mfvscr	vr0
+	mfvscr	v0
 	li	r4,VRSTATE_VSCR
-	stvx	vr0,r4,r7
+	stvx	v0,r4,r7
 	beq	1f
 	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
 #ifdef CONFIG_VSX
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index de4018a1bc4b..de747563d29d 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -636,7 +636,7 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
 	spin_lock(&vcpu->arch.vpa_update_lock);
 	lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr;
 	if (lppaca)
-		yield_count = lppaca->yield_count;
+		yield_count = be32_to_cpu(lppaca->yield_count);
 	spin_unlock(&vcpu->arch.vpa_update_lock);
 	return yield_count;
 }
@@ -942,20 +942,20 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
 static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
 		bool preserve_top32)
 {
+	struct kvm *kvm = vcpu->kvm;
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 	u64 mask;
 
+	mutex_lock(&kvm->lock);
 	spin_lock(&vc->lock);
 	/*
 	 * If ILE (interrupt little-endian) has changed, update the
 	 * MSR_LE bit in the intr_msr for each vcpu in this vcore.
 	 */
 	if ((new_lpcr & LPCR_ILE) != (vc->lpcr & LPCR_ILE)) {
-		struct kvm *kvm = vcpu->kvm;
 		struct kvm_vcpu *vcpu;
 		int i;
 
-		mutex_lock(&kvm->lock);
 		kvm_for_each_vcpu(i, vcpu, kvm) {
 			if (vcpu->arch.vcore != vc)
 				continue;
@@ -964,7 +964,6 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
 			else
 				vcpu->arch.intr_msr &= ~MSR_LE;
 		}
-		mutex_unlock(&kvm->lock);
 	}
 
 	/*
@@ -981,6 +980,7 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
 		mask &= 0xFFFFFFFF;
 	vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask);
 	spin_unlock(&vc->lock);
+	mutex_unlock(&kvm->lock);
 }
 
 static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index 60081bd75847..93b5f5c9b445 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -84,7 +84,7 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
 		}
 		if (dsisr & DSISR_MC_TLB_MULTI) {
 			if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
-				cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET_LPID);
+				cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_LPID);
 			dsisr &= ~DSISR_MC_TLB_MULTI;
 		}
 		/* Any other errors we don't understand? */
@@ -102,7 +102,7 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
 		break;
 	case SRR1_MC_IFETCH_TLBMULTI:
 		if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
-			cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET_LPID);
+			cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_LPID);
 		break;
 	default:
 		handled = 0;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index bb94e6f20c81..6cbf1630cb70 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1005,6 +1005,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	/* Save HEIR (HV emulation assist reg) in emul_inst
 	   if this is an HEI (HV emulation interrupt, e40) */
 	li	r3,KVM_INST_FETCH_FAILED
+	stw	r3,VCPU_LAST_INST(r9)
 	cmpwi	r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
 	bne	11f
 	mfspr	r3,SPRN_HEIR
diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index 39b3a8f816f2..6249cdc834d1 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -34,7 +34,7 @@
 #include <asm/kvm_para.h>
 #include <asm/kvm_host.h>
 #include <asm/kvm_ppc.h>
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 #define MAX_CPU     32
 #define MAX_SRC     256
@@ -289,11 +289,6 @@ static inline void IRQ_resetbit(struct irq_queue *q, int n_IRQ)
 	clear_bit(n_IRQ, q->queue);
 }
 
-static inline int IRQ_testbit(struct irq_queue *q, int n_IRQ)
-{
-	return test_bit(n_IRQ, q->queue);
-}
-
 static void IRQ_check(struct openpic *opp, struct irq_queue *q)
 {
 	int irq = -1;
@@ -1374,8 +1369,9 @@ static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val)
 	return -ENXIO;
 }
 
-static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr,
-			 int len, void *ptr)
+static int kvm_mpic_read(struct kvm_vcpu *vcpu,
+			 struct kvm_io_device *this,
+			 gpa_t addr, int len, void *ptr)
 {
 	struct openpic *opp = container_of(this, struct openpic, mmio);
 	int ret;
@@ -1415,8 +1411,9 @@ static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr,
 	return ret;
 }
 
-static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr,
-			  int len, const void *ptr)
+static int kvm_mpic_write(struct kvm_vcpu *vcpu,
+			  struct kvm_io_device *this,
+			  gpa_t addr, int len, const void *ptr)
 {
 	struct openpic *opp = container_of(this, struct openpic, mmio);
 	int ret;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 27c0face86f4..91bbc845ac66 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -720,7 +720,7 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
 		return;
 	}
 
-	if (vcpu->arch.mmio_is_bigendian) {
+	if (!vcpu->arch.mmio_host_swabbed) {
 		switch (run->mmio.len) {
 		case 8: gpr = *(u64 *)run->mmio.data; break;
 		case 4: gpr = *(u32 *)run->mmio.data; break;
@@ -728,10 +728,10 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
 		case 1: gpr = *(u8 *)run->mmio.data; break;
 		}
 	} else {
-		/* Convert BE data from userland back to LE. */
 		switch (run->mmio.len) {
-		case 4: gpr = ld_le32((u32 *)run->mmio.data); break;
-		case 2: gpr = ld_le16((u16 *)run->mmio.data); break;
+		case 8: gpr = swab64(*(u64 *)run->mmio.data); break;
+		case 4: gpr = swab32(*(u32 *)run->mmio.data); break;
+		case 2: gpr = swab16(*(u16 *)run->mmio.data); break;
 		case 1: gpr = *(u8 *)run->mmio.data; break;
 		}
 	}
@@ -780,14 +780,13 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		       int is_default_endian)
 {
 	int idx, ret;
-	int is_bigendian;
+	bool host_swabbed;
 
+	/* Pity C doesn't have a logical XOR operator */
 	if (kvmppc_need_byteswap(vcpu)) {
-		/* Default endianness is "little endian". */
-		is_bigendian = !is_default_endian;
+		host_swabbed = is_default_endian;
 	} else {
-		/* Default endianness is "big endian". */
-		is_bigendian = is_default_endian;
+		host_swabbed = !is_default_endian;
 	}
 
 	if (bytes > sizeof(run->mmio.data)) {
@@ -800,14 +799,14 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	run->mmio.is_write = 0;
 
 	vcpu->arch.io_gpr = rt;
-	vcpu->arch.mmio_is_bigendian = is_bigendian;
+	vcpu->arch.mmio_host_swabbed = host_swabbed;
 	vcpu->mmio_needed = 1;
 	vcpu->mmio_is_write = 0;
 	vcpu->arch.mmio_sign_extend = 0;
 
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
-	ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+	ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr,
 			      bytes, &run->mmio.data);
 
 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
@@ -840,14 +839,13 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 {
 	void *data = run->mmio.data;
 	int idx, ret;
-	int is_bigendian;
+	bool host_swabbed;
 
+	/* Pity C doesn't have a logical XOR operator */
 	if (kvmppc_need_byteswap(vcpu)) {
-		/* Default endianness is "little endian". */
-		is_bigendian = !is_default_endian;
+		host_swabbed = is_default_endian;
 	} else {
-		/* Default endianness is "big endian". */
-		is_bigendian = is_default_endian;
+		host_swabbed = !is_default_endian;
 	}
 
 	if (bytes > sizeof(run->mmio.data)) {
@@ -862,7 +860,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	vcpu->mmio_is_write = 1;
 
 	/* Store the value at the lowest bytes in 'data'. */
-	if (is_bigendian) {
+	if (!host_swabbed) {
 		switch (bytes) {
 		case 8: *(u64 *)data = val; break;
 		case 4: *(u32 *)data = val; break;
@@ -870,17 +868,17 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		case 1: *(u8  *)data = val; break;
 		}
 	} else {
-		/* Store LE value into 'data'. */
 		switch (bytes) {
-		case 4: st_le32(data, val); break;
-		case 2: st_le16(data, val); break;
-		case 1: *(u8 *)data = val; break;
+		case 8: *(u64 *)data = swab64(val); break;
+		case 4: *(u32 *)data = swab32(val); break;
+		case 2: *(u16 *)data = swab16(val); break;
+		case 1: *(u8  *)data = val; break;
 		}
 	}
 
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
-	ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+	ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr,
 			       bytes, &run->mmio.data);
 
 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c
index 4a6c2cf890d9..60b0b3fc8fc1 100644
--- a/arch/powerpc/lib/alloc.c
+++ b/arch/powerpc/lib/alloc.c
@@ -10,7 +10,7 @@ void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask)
 {
 	void *p;
 
-	if (mem_init_done)
+	if (slab_is_available())
 		p = kzalloc(size, mask);
 	else {
 		p = memblock_virt_alloc(size, 0);
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index 55f19f9fd708..6813f80d1eec 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -69,54 +69,6 @@ CACHELINE_BYTES = L1_CACHE_BYTES
 LG_CACHELINE_BYTES = L1_CACHE_SHIFT
 CACHELINE_MASK = (L1_CACHE_BYTES-1)
 
-/*
- * Use dcbz on the complete cache lines in the destination
- * to set them to zero.  This requires that the destination
- * area is cacheable.  -- paulus
- */
-_GLOBAL(cacheable_memzero)
-	mr	r5,r4
-	li	r4,0
-	addi	r6,r3,-4
-	cmplwi	0,r5,4
-	blt	7f
-	stwu	r4,4(r6)
-	beqlr
-	andi.	r0,r6,3
-	add	r5,r0,r5
-	subf	r6,r0,r6
-	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
-	add	r8,r7,r5
-	srwi	r9,r8,LG_CACHELINE_BYTES
-	addic.	r9,r9,-1	/* total number of complete cachelines */
-	ble	2f
-	xori	r0,r7,CACHELINE_MASK & ~3
-	srwi.	r0,r0,2
-	beq	3f
-	mtctr	r0
-4:	stwu	r4,4(r6)
-	bdnz	4b
-3:	mtctr	r9
-	li	r7,4
-10:	dcbz	r7,r6
-	addi	r6,r6,CACHELINE_BYTES
-	bdnz	10b
-	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
-	addi	r5,r5,4
-2:	srwi	r0,r5,2
-	mtctr	r0
-	bdz	6f
-1:	stwu	r4,4(r6)
-	bdnz	1b
-6:	andi.	r5,r5,3
-7:	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-	addi	r6,r6,3
-8:	stbu	r4,1(r6)
-	bdnz	8b
-	blr
-
 _GLOBAL(memset)
 	rlwimi	r4,r4,8,16,23
 	rlwimi	r4,r4,16,0,15
@@ -142,85 +94,6 @@ _GLOBAL(memset)
 	bdnz	8b
 	blr
 
-/*
- * This version uses dcbz on the complete cache lines in the
- * destination area to reduce memory traffic.  This requires that
- * the destination area is cacheable.
- * We only use this version if the source and dest don't overlap.
- * -- paulus.
- */
-_GLOBAL(cacheable_memcpy)
-	add	r7,r3,r5		/* test if the src & dst overlap */
-	add	r8,r4,r5
-	cmplw	0,r4,r7
-	cmplw	1,r3,r8
-	crand	0,0,4			/* cr0.lt &= cr1.lt */
-	blt	memcpy			/* if regions overlap */
-
-	addi	r4,r4,-4
-	addi	r6,r3,-4
-	neg	r0,r3
-	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
-	beq	58f
-
-	cmplw	0,r5,r0			/* is this more than total to do? */
-	blt	63f			/* if not much to do */
-	andi.	r8,r0,3			/* get it word-aligned first */
-	subf	r5,r0,r5
-	mtctr	r8
-	beq+	61f
-70:	lbz	r9,4(r4)		/* do some bytes */
-	stb	r9,4(r6)
-	addi	r4,r4,1
-	addi	r6,r6,1
-	bdnz	70b
-61:	srwi.	r0,r0,2
-	mtctr	r0
-	beq	58f
-72:	lwzu	r9,4(r4)		/* do some words */
-	stwu	r9,4(r6)
-	bdnz	72b
-
-58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
-	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
-	li	r11,4
-	mtctr	r0
-	beq	63f
-53:
-	dcbz	r11,r6
-	COPY_16_BYTES
-#if L1_CACHE_BYTES >= 32
-	COPY_16_BYTES
-#if L1_CACHE_BYTES >= 64
-	COPY_16_BYTES
-	COPY_16_BYTES
-#if L1_CACHE_BYTES >= 128
-	COPY_16_BYTES
-	COPY_16_BYTES
-	COPY_16_BYTES
-	COPY_16_BYTES
-#endif
-#endif
-#endif
-	bdnz	53b
-
-63:	srwi.	r0,r5,2
-	mtctr	r0
-	beq	64f
-30:	lwzu	r0,4(r4)
-	stwu	r0,4(r6)
-	bdnz	30b
-
-64:	andi.	r0,r5,3
-	mtctr	r0
-	beq+	65f
-40:	lbz	r0,4(r4)
-	stb	r0,4(r6)
-	addi	r4,r4,1
-	addi	r6,r6,1
-	bdnz	40b
-65:	blr
-
 _GLOBAL(memmove)
 	cmplw	0,r3,r4
 	bgt	backwards_memcpy
diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S
index d7dafb3777ac..a84d333ecb09 100644
--- a/arch/powerpc/lib/copypage_power7.S
+++ b/arch/powerpc/lib/copypage_power7.S
@@ -83,23 +83,23 @@ _GLOBAL(copypage_power7)
 	li	r12,112
 
 	.align	5
-1:	lvx	vr7,r0,r4
-	lvx	vr6,r4,r6
-	lvx	vr5,r4,r7
-	lvx	vr4,r4,r8
-	lvx	vr3,r4,r9
-	lvx	vr2,r4,r10
-	lvx	vr1,r4,r11
-	lvx	vr0,r4,r12
+1:	lvx	v7,r0,r4
+	lvx	v6,r4,r6
+	lvx	v5,r4,r7
+	lvx	v4,r4,r8
+	lvx	v3,r4,r9
+	lvx	v2,r4,r10
+	lvx	v1,r4,r11
+	lvx	v0,r4,r12
 	addi	r4,r4,128
-	stvx	vr7,r0,r3
-	stvx	vr6,r3,r6
-	stvx	vr5,r3,r7
-	stvx	vr4,r3,r8
-	stvx	vr3,r3,r9
-	stvx	vr2,r3,r10
-	stvx	vr1,r3,r11
-	stvx	vr0,r3,r12
+	stvx	v7,r0,r3
+	stvx	v6,r3,r6
+	stvx	v5,r3,r7
+	stvx	v4,r3,r8
+	stvx	v3,r3,r9
+	stvx	v2,r3,r10
+	stvx	v1,r3,r11
+	stvx	v0,r3,r12
 	addi	r3,r3,128
 	bdnz	1b
 
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
index 92ee840529bc..da0c568d18c4 100644
--- a/arch/powerpc/lib/copyuser_power7.S
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -388,29 +388,29 @@ err3;	std	r0,0(r3)
 	li	r11,48
 
 	bf	cr7*4+3,5f
-err3;	lvx	vr1,r0,r4
+err3;	lvx	v1,r0,r4
 	addi	r4,r4,16
-err3;	stvx	vr1,r0,r3
+err3;	stvx	v1,r0,r3
 	addi	r3,r3,16
 
 5:	bf	cr7*4+2,6f
-err3;	lvx	vr1,r0,r4
-err3;	lvx	vr0,r4,r9
+err3;	lvx	v1,r0,r4
+err3;	lvx	v0,r4,r9
 	addi	r4,r4,32
-err3;	stvx	vr1,r0,r3
-err3;	stvx	vr0,r3,r9
+err3;	stvx	v1,r0,r3
+err3;	stvx	v0,r3,r9
 	addi	r3,r3,32
 
 6:	bf	cr7*4+1,7f
-err3;	lvx	vr3,r0,r4
-err3;	lvx	vr2,r4,r9
-err3;	lvx	vr1,r4,r10
-err3;	lvx	vr0,r4,r11
+err3;	lvx	v3,r0,r4
+err3;	lvx	v2,r4,r9
+err3;	lvx	v1,r4,r10
+err3;	lvx	v0,r4,r11
 	addi	r4,r4,64
-err3;	stvx	vr3,r0,r3
-err3;	stvx	vr2,r3,r9
-err3;	stvx	vr1,r3,r10
-err3;	stvx	vr0,r3,r11
+err3;	stvx	v3,r0,r3
+err3;	stvx	v2,r3,r9
+err3;	stvx	v1,r3,r10
+err3;	stvx	v0,r3,r11
 	addi	r3,r3,64
 
 7:	sub	r5,r5,r6
@@ -433,23 +433,23 @@ err3;	stvx	vr0,r3,r11
 	 */
 	.align	5
 8:
-err4;	lvx	vr7,r0,r4
-err4;	lvx	vr6,r4,r9
-err4;	lvx	vr5,r4,r10
-err4;	lvx	vr4,r4,r11
-err4;	lvx	vr3,r4,r12
-err4;	lvx	vr2,r4,r14
-err4;	lvx	vr1,r4,r15
-err4;	lvx	vr0,r4,r16
+err4;	lvx	v7,r0,r4
+err4;	lvx	v6,r4,r9
+err4;	lvx	v5,r4,r10
+err4;	lvx	v4,r4,r11
+err4;	lvx	v3,r4,r12
+err4;	lvx	v2,r4,r14
+err4;	lvx	v1,r4,r15
+err4;	lvx	v0,r4,r16
 	addi	r4,r4,128
-err4;	stvx	vr7,r0,r3
-err4;	stvx	vr6,r3,r9
-err4;	stvx	vr5,r3,r10
-err4;	stvx	vr4,r3,r11
-err4;	stvx	vr3,r3,r12
-err4;	stvx	vr2,r3,r14
-err4;	stvx	vr1,r3,r15
-err4;	stvx	vr0,r3,r16
+err4;	stvx	v7,r0,r3
+err4;	stvx	v6,r3,r9
+err4;	stvx	v5,r3,r10
+err4;	stvx	v4,r3,r11
+err4;	stvx	v3,r3,r12
+err4;	stvx	v2,r3,r14
+err4;	stvx	v1,r3,r15
+err4;	stvx	v0,r3,r16
 	addi	r3,r3,128
 	bdnz	8b
 
@@ -463,29 +463,29 @@ err4;	stvx	vr0,r3,r16
 	mtocrf	0x01,r6
 
 	bf	cr7*4+1,9f
-err3;	lvx	vr3,r0,r4
-err3;	lvx	vr2,r4,r9
-err3;	lvx	vr1,r4,r10
-err3;	lvx	vr0,r4,r11
+err3;	lvx	v3,r0,r4
+err3;	lvx	v2,r4,r9
+err3;	lvx	v1,r4,r10
+err3;	lvx	v0,r4,r11
 	addi	r4,r4,64
-err3;	stvx	vr3,r0,r3
-err3;	stvx	vr2,r3,r9
-err3;	stvx	vr1,r3,r10
-err3;	stvx	vr0,r3,r11
+err3;	stvx	v3,r0,r3
+err3;	stvx	v2,r3,r9
+err3;	stvx	v1,r3,r10
+err3;	stvx	v0,r3,r11
 	addi	r3,r3,64
 
 9:	bf	cr7*4+2,10f
-err3;	lvx	vr1,r0,r4
-err3;	lvx	vr0,r4,r9
+err3;	lvx	v1,r0,r4
+err3;	lvx	v0,r4,r9
 	addi	r4,r4,32
-err3;	stvx	vr1,r0,r3
-err3;	stvx	vr0,r3,r9
+err3;	stvx	v1,r0,r3
+err3;	stvx	v0,r3,r9
 	addi	r3,r3,32
 
 10:	bf	cr7*4+3,11f
-err3;	lvx	vr1,r0,r4
+err3;	lvx	v1,r0,r4
 	addi	r4,r4,16
-err3;	stvx	vr1,r0,r3
+err3;	stvx	v1,r0,r3
 	addi	r3,r3,16
 
 	/* Up to 15B to go */
@@ -560,42 +560,42 @@ err3;	stw	r7,4(r3)
 	li	r10,32
 	li	r11,48
 
-	LVS(vr16,0,r4)		/* Setup permute control vector */
-err3;	lvx	vr0,0,r4
+	LVS(v16,0,r4)		/* Setup permute control vector */
+err3;	lvx	v0,0,r4
 	addi	r4,r4,16
 
 	bf	cr7*4+3,5f
-err3;	lvx	vr1,r0,r4
-	VPERM(vr8,vr0,vr1,vr16)
+err3;	lvx	v1,r0,r4
+	VPERM(v8,v0,v1,v16)
 	addi	r4,r4,16
-err3;	stvx	vr8,r0,r3
+err3;	stvx	v8,r0,r3
 	addi	r3,r3,16
-	vor	vr0,vr1,vr1
+	vor	v0,v1,v1
 
 5:	bf	cr7*4+2,6f
-err3;	lvx	vr1,r0,r4
-	VPERM(vr8,vr0,vr1,vr16)
-err3;	lvx	vr0,r4,r9
-	VPERM(vr9,vr1,vr0,vr16)
+err3;	lvx	v1,r0,r4
+	VPERM(v8,v0,v1,v16)
+err3;	lvx	v0,r4,r9
+	VPERM(v9,v1,v0,v16)
 	addi	r4,r4,32
-err3;	stvx	vr8,r0,r3
-err3;	stvx	vr9,r3,r9
+err3;	stvx	v8,r0,r3
+err3;	stvx	v9,r3,r9
 	addi	r3,r3,32
 
 6:	bf	cr7*4+1,7f
-err3;	lvx	vr3,r0,r4
-	VPERM(vr8,vr0,vr3,vr16)
-err3;	lvx	vr2,r4,r9
-	VPERM(vr9,vr3,vr2,vr16)
-err3;	lvx	vr1,r4,r10
-	VPERM(vr10,vr2,vr1,vr16)
-err3;	lvx	vr0,r4,r11
-	VPERM(vr11,vr1,vr0,vr16)
+err3;	lvx	v3,r0,r4
+	VPERM(v8,v0,v3,v16)
+err3;	lvx	v2,r4,r9
+	VPERM(v9,v3,v2,v16)
+err3;	lvx	v1,r4,r10
+	VPERM(v10,v2,v1,v16)
+err3;	lvx	v0,r4,r11
+	VPERM(v11,v1,v0,v16)
 	addi	r4,r4,64
-err3;	stvx	vr8,r0,r3
-err3;	stvx	vr9,r3,r9
-err3;	stvx	vr10,r3,r10
-err3;	stvx	vr11,r3,r11
+err3;	stvx	v8,r0,r3
+err3;	stvx	v9,r3,r9
+err3;	stvx	v10,r3,r10
+err3;	stvx	v11,r3,r11
 	addi	r3,r3,64
 
 7:	sub	r5,r5,r6
@@ -618,31 +618,31 @@ err3;	stvx	vr11,r3,r11
 	 */
 	.align	5
 8:
-err4;	lvx	vr7,r0,r4
-	VPERM(vr8,vr0,vr7,vr16)
-err4;	lvx	vr6,r4,r9
-	VPERM(vr9,vr7,vr6,vr16)
-err4;	lvx	vr5,r4,r10
-	VPERM(vr10,vr6,vr5,vr16)
-err4;	lvx	vr4,r4,r11
-	VPERM(vr11,vr5,vr4,vr16)
-err4;	lvx	vr3,r4,r12
-	VPERM(vr12,vr4,vr3,vr16)
-err4;	lvx	vr2,r4,r14
-	VPERM(vr13,vr3,vr2,vr16)
-err4;	lvx	vr1,r4,r15
-	VPERM(vr14,vr2,vr1,vr16)
-err4;	lvx	vr0,r4,r16
-	VPERM(vr15,vr1,vr0,vr16)
+err4;	lvx	v7,r0,r4
+	VPERM(v8,v0,v7,v16)
+err4;	lvx	v6,r4,r9
+	VPERM(v9,v7,v6,v16)
+err4;	lvx	v5,r4,r10
+	VPERM(v10,v6,v5,v16)
+err4;	lvx	v4,r4,r11
+	VPERM(v11,v5,v4,v16)
+err4;	lvx	v3,r4,r12
+	VPERM(v12,v4,v3,v16)
+err4;	lvx	v2,r4,r14
+	VPERM(v13,v3,v2,v16)
+err4;	lvx	v1,r4,r15
+	VPERM(v14,v2,v1,v16)
+err4;	lvx	v0,r4,r16
+	VPERM(v15,v1,v0,v16)
 	addi	r4,r4,128
-err4;	stvx	vr8,r0,r3
-err4;	stvx	vr9,r3,r9
-err4;	stvx	vr10,r3,r10
-err4;	stvx	vr11,r3,r11
-err4;	stvx	vr12,r3,r12
-err4;	stvx	vr13,r3,r14
-err4;	stvx	vr14,r3,r15
-err4;	stvx	vr15,r3,r16
+err4;	stvx	v8,r0,r3
+err4;	stvx	v9,r3,r9
+err4;	stvx	v10,r3,r10
+err4;	stvx	v11,r3,r11
+err4;	stvx	v12,r3,r12
+err4;	stvx	v13,r3,r14
+err4;	stvx	v14,r3,r15
+err4;	stvx	v15,r3,r16
 	addi	r3,r3,128
 	bdnz	8b
 
@@ -656,36 +656,36 @@ err4;	stvx	vr15,r3,r16
 	mtocrf	0x01,r6
 
 	bf	cr7*4+1,9f
-err3;	lvx	vr3,r0,r4
-	VPERM(vr8,vr0,vr3,vr16)
-err3;	lvx	vr2,r4,r9
-	VPERM(vr9,vr3,vr2,vr16)
-err3;	lvx	vr1,r4,r10
-	VPERM(vr10,vr2,vr1,vr16)
-err3;	lvx	vr0,r4,r11
-	VPERM(vr11,vr1,vr0,vr16)
+err3;	lvx	v3,r0,r4
+	VPERM(v8,v0,v3,v16)
+err3;	lvx	v2,r4,r9
+	VPERM(v9,v3,v2,v16)
+err3;	lvx	v1,r4,r10
+	VPERM(v10,v2,v1,v16)
+err3;	lvx	v0,r4,r11
+	VPERM(v11,v1,v0,v16)
 	addi	r4,r4,64
-err3;	stvx	vr8,r0,r3
-err3;	stvx	vr9,r3,r9
-err3;	stvx	vr10,r3,r10
-err3;	stvx	vr11,r3,r11
+err3;	stvx	v8,r0,r3
+err3;	stvx	v9,r3,r9
+err3;	stvx	v10,r3,r10
+err3;	stvx	v11,r3,r11
 	addi	r3,r3,64
 
 9:	bf	cr7*4+2,10f
-err3;	lvx	vr1,r0,r4
-	VPERM(vr8,vr0,vr1,vr16)
-err3;	lvx	vr0,r4,r9
-	VPERM(vr9,vr1,vr0,vr16)
+err3;	lvx	v1,r0,r4
+	VPERM(v8,v0,v1,v16)
+err3;	lvx	v0,r4,r9
+	VPERM(v9,v1,v0,v16)
 	addi	r4,r4,32
-err3;	stvx	vr8,r0,r3
-err3;	stvx	vr9,r3,r9
+err3;	stvx	v8,r0,r3
+err3;	stvx	v9,r3,r9
 	addi	r3,r3,32
 
 10:	bf	cr7*4+3,11f
-err3;	lvx	vr1,r0,r4
-	VPERM(vr8,vr0,vr1,vr16)
+err3;	lvx	v1,r0,r4
+	VPERM(v8,v0,v1,v16)
 	addi	r4,r4,16
-err3;	stvx	vr8,r0,r3
+err3;	stvx	v8,r0,r3
 	addi	r3,r3,16
 
 	/* Up to 15B to go */
diff --git a/arch/powerpc/lib/crtsavres.S b/arch/powerpc/lib/crtsavres.S
index a5b30c71a8d3..18af0b3d3eb2 100644
--- a/arch/powerpc/lib/crtsavres.S
+++ b/arch/powerpc/lib/crtsavres.S
@@ -236,78 +236,78 @@ _GLOBAL(_rest32gpr_31_x)
 
 _GLOBAL(_savevr_20)
 	li	r11,-192
-	stvx	vr20,r11,r0
+	stvx	v20,r11,r0
 _GLOBAL(_savevr_21)
 	li	r11,-176
-	stvx	vr21,r11,r0
+	stvx	v21,r11,r0
 _GLOBAL(_savevr_22)
 	li	r11,-160
-	stvx	vr22,r11,r0
+	stvx	v22,r11,r0
 _GLOBAL(_savevr_23)
 	li	r11,-144
-	stvx	vr23,r11,r0
+	stvx	v23,r11,r0
 _GLOBAL(_savevr_24)
 	li	r11,-128
-	stvx	vr24,r11,r0
+	stvx	v24,r11,r0
 _GLOBAL(_savevr_25)
 	li	r11,-112
-	stvx	vr25,r11,r0
+	stvx	v25,r11,r0
 _GLOBAL(_savevr_26)
 	li	r11,-96
-	stvx	vr26,r11,r0
+	stvx	v26,r11,r0
 _GLOBAL(_savevr_27)
 	li	r11,-80
-	stvx	vr27,r11,r0
+	stvx	v27,r11,r0
 _GLOBAL(_savevr_28)
 	li	r11,-64
-	stvx	vr28,r11,r0
+	stvx	v28,r11,r0
 _GLOBAL(_savevr_29)
 	li	r11,-48
-	stvx	vr29,r11,r0
+	stvx	v29,r11,r0
 _GLOBAL(_savevr_30)
 	li	r11,-32
-	stvx	vr30,r11,r0
+	stvx	v30,r11,r0
 _GLOBAL(_savevr_31)
 	li	r11,-16
-	stvx	vr31,r11,r0
+	stvx	v31,r11,r0
 	blr
 
 _GLOBAL(_restvr_20)
 	li	r11,-192
-	lvx	vr20,r11,r0
+	lvx	v20,r11,r0
 _GLOBAL(_restvr_21)
 	li	r11,-176
-	lvx	vr21,r11,r0
+	lvx	v21,r11,r0
 _GLOBAL(_restvr_22)
 	li	r11,-160
-	lvx	vr22,r11,r0
+	lvx	v22,r11,r0
 _GLOBAL(_restvr_23)
 	li	r11,-144
-	lvx	vr23,r11,r0
+	lvx	v23,r11,r0
 _GLOBAL(_restvr_24)
 	li	r11,-128
-	lvx	vr24,r11,r0
+	lvx	v24,r11,r0
 _GLOBAL(_restvr_25)
 	li	r11,-112
-	lvx	vr25,r11,r0
+	lvx	v25,r11,r0
 _GLOBAL(_restvr_26)
 	li	r11,-96
-	lvx	vr26,r11,r0
+	lvx	v26,r11,r0
 _GLOBAL(_restvr_27)
 	li	r11,-80
-	lvx	vr27,r11,r0
+	lvx	v27,r11,r0
 _GLOBAL(_restvr_28)
 	li	r11,-64
-	lvx	vr28,r11,r0
+	lvx	v28,r11,r0
 _GLOBAL(_restvr_29)
 	li	r11,-48
-	lvx	vr29,r11,r0
+	lvx	v29,r11,r0
 _GLOBAL(_restvr_30)
 	li	r11,-32
-	lvx	vr30,r11,r0
+	lvx	v30,r11,r0
 _GLOBAL(_restvr_31)
 	li	r11,-16
-	lvx	vr31,r11,r0
+	lvx	v31,r11,r0
 	blr
 
 #endif /* CONFIG_ALTIVEC */
@@ -443,101 +443,101 @@ _restgpr0_31:
 .globl	_savevr_20
 _savevr_20:
 	li	r12,-192
-	stvx	vr20,r12,r0
+	stvx	v20,r12,r0
 .globl	_savevr_21
 _savevr_21:
 	li	r12,-176
-	stvx	vr21,r12,r0
+	stvx	v21,r12,r0
 .globl	_savevr_22
 _savevr_22:
 	li	r12,-160
-	stvx	vr22,r12,r0
+	stvx	v22,r12,r0
 .globl	_savevr_23
 _savevr_23:
 	li	r12,-144
-	stvx	vr23,r12,r0
+	stvx	v23,r12,r0
 .globl	_savevr_24
 _savevr_24:
 	li	r12,-128
-	stvx	vr24,r12,r0
+	stvx	v24,r12,r0
 .globl	_savevr_25
 _savevr_25:
 	li	r12,-112
-	stvx	vr25,r12,r0
+	stvx	v25,r12,r0
 .globl	_savevr_26
 _savevr_26:
 	li	r12,-96
-	stvx	vr26,r12,r0
+	stvx	v26,r12,r0
 .globl	_savevr_27
 _savevr_27:
 	li	r12,-80
-	stvx	vr27,r12,r0
+	stvx	v27,r12,r0
 .globl	_savevr_28
 _savevr_28:
 	li	r12,-64
-	stvx	vr28,r12,r0
+	stvx	v28,r12,r0
 .globl	_savevr_29
 _savevr_29:
 	li	r12,-48
-	stvx	vr29,r12,r0
+	stvx	v29,r12,r0
 .globl	_savevr_30
 _savevr_30:
 	li	r12,-32
-	stvx	vr30,r12,r0
+	stvx	v30,r12,r0
 .globl	_savevr_31
 _savevr_31:
 	li	r12,-16
-	stvx	vr31,r12,r0
+	stvx	v31,r12,r0
 	blr
 
 .globl	_restvr_20
 _restvr_20:
 	li	r12,-192
-	lvx	vr20,r12,r0
+	lvx	v20,r12,r0
 .globl	_restvr_21
 _restvr_21:
 	li	r12,-176
-	lvx	vr21,r12,r0
+	lvx	v21,r12,r0
 .globl	_restvr_22
 _restvr_22:
 	li	r12,-160
-	lvx	vr22,r12,r0
+	lvx	v22,r12,r0
 .globl	_restvr_23
 _restvr_23:
 	li	r12,-144
-	lvx	vr23,r12,r0
+	lvx	v23,r12,r0
 .globl	_restvr_24
 _restvr_24:
 	li	r12,-128
-	lvx	vr24,r12,r0
+	lvx	v24,r12,r0
 .globl	_restvr_25
 _restvr_25:
 	li	r12,-112
-	lvx	vr25,r12,r0
+	lvx	v25,r12,r0
 .globl	_restvr_26
 _restvr_26:
 	li	r12,-96
-	lvx	vr26,r12,r0
+	lvx	v26,r12,r0
 .globl	_restvr_27
 _restvr_27:
 	li	r12,-80
-	lvx	vr27,r12,r0
+	lvx	v27,r12,r0
 .globl	_restvr_28
 _restvr_28:
 	li	r12,-64
-	lvx	vr28,r12,r0
+	lvx	v28,r12,r0
 .globl	_restvr_29
 _restvr_29:
 	li	r12,-48
-	lvx	vr29,r12,r0
+	lvx	v29,r12,r0
 .globl	_restvr_30
 _restvr_30:
 	li	r12,-32
-	lvx	vr30,r12,r0
+	lvx	v30,r12,r0
 .globl	_restvr_31
 _restvr_31:
 	li	r12,-16
-	lvx	vr31,r12,r0
+	lvx	v31,r12,r0
 	blr
 
 #endif /* CONFIG_ALTIVEC */
diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S
index 85aec08ab234..5d0cdbfbe3f2 100644
--- a/arch/powerpc/lib/ldstfp.S
+++ b/arch/powerpc/lib/ldstfp.S
@@ -184,16 +184,16 @@ _GLOBAL(do_stfd)
 	extab	2b,3b
 
 #ifdef CONFIG_ALTIVEC
-/* Get the contents of vrN into vr0; N is in r3. */
+/* Get the contents of vrN into v0; N is in r3. */
 _GLOBAL(get_vr)
 	mflr	r0
 	rlwinm	r3,r3,3,0xf8
 	bcl	20,31,1f
-	blr			/* vr0 is already in vr0 */
+	blr			/* v0 is already in v0 */
 	nop
 reg = 1
 	.rept	31
-	vor	vr0,reg,reg	/* assembler doesn't know vmr? */
+	vor	v0,reg,reg	/* assembler doesn't know vmr? */
 	blr
 reg = reg + 1
 	.endr
@@ -203,16 +203,16 @@ reg = reg + 1
 	mtlr	r0
 	bctr
 
-/* Put the contents of vr0 into vrN; N is in r3. */
+/* Put the contents of v0 into vrN; N is in r3. */
 _GLOBAL(put_vr)
 	mflr	r0
 	rlwinm	r3,r3,3,0xf8
 	bcl	20,31,1f
-	blr			/* vr0 is already in vr0 */
+	blr			/* v0 is already in v0 */
 	nop
 reg = 1
 	.rept	31
-	vor	reg,vr0,vr0
+	vor	reg,v0,v0
 	blr
 reg = reg + 1
 	.endr
@@ -234,13 +234,13 @@ _GLOBAL(do_lvx)
 	MTMSRD(r7)
 	isync
 	beq	cr7,1f
-	stvx	vr0,r1,r8
+	stvx	v0,r1,r8
 1:	li	r9,-EFAULT
-2:	lvx	vr0,0,r4
+2:	lvx	v0,0,r4
 	li	r9,0
 3:	beq	cr7,4f
 	bl	put_vr
-	lvx	vr0,r1,r8
+	lvx	v0,r1,r8
 4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
 	mtlr	r0
 	MTMSRD(r6)
@@ -262,13 +262,13 @@ _GLOBAL(do_stvx)
 	MTMSRD(r7)
 	isync
 	beq	cr7,1f
-	stvx	vr0,r1,r8
+	stvx	v0,r1,r8
 	bl	get_vr
 1:	li	r9,-EFAULT
-2:	stvx	vr0,0,r4
+2:	stvx	v0,0,r4
 	li	r9,0
 3:	beq	cr7,4f
-	lvx	vr0,r1,r8
+	lvx	v0,r1,r8
 4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
 	mtlr	r0
 	MTMSRD(r6)
@@ -280,12 +280,12 @@ _GLOBAL(do_stvx)
 #endif /* CONFIG_ALTIVEC */
 
 #ifdef CONFIG_VSX
-/* Get the contents of vsrN into vsr0; N is in r3. */
+/* Get the contents of vsN into vs0; N is in r3. */
 _GLOBAL(get_vsr)
 	mflr	r0
 	rlwinm	r3,r3,3,0x1f8
 	bcl	20,31,1f
-	blr			/* vsr0 is already in vsr0 */
+	blr			/* vs0 is already in vs0 */
 	nop
 reg = 1
 	.rept	63
@@ -299,12 +299,12 @@ reg = reg + 1
 	mtlr	r0
 	bctr
 
-/* Put the contents of vsr0 into vsrN; N is in r3. */
+/* Put the contents of vs0 into vsN; N is in r3. */
 _GLOBAL(put_vsr)
 	mflr	r0
 	rlwinm	r3,r3,3,0x1f8
 	bcl	20,31,1f
-	blr			/* vr0 is already in vr0 */
+	blr			/* v0 is already in v0 */
 	nop
 reg = 1
 	.rept	63
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 170a0346f756..f7deebdf3365 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -41,6 +41,7 @@ void __spin_yield(arch_spinlock_t *lock)
 	plpar_hcall_norets(H_CONFER,
 		get_hard_smp_processor_id(holder_cpu), yield_count);
 }
+EXPORT_SYMBOL_GPL(__spin_yield);
 
 /*
  * Waiting for a read lock or a write lock on a rwlock...
diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S
index 0830587df16e..786234fd4e91 100644
--- a/arch/powerpc/lib/memcpy_power7.S
+++ b/arch/powerpc/lib/memcpy_power7.S
@@ -321,29 +321,29 @@ _GLOBAL(memcpy_power7)
 	li	r11,48
 
 	bf	cr7*4+3,5f
-	lvx	vr1,r0,r4
+	lvx	v1,r0,r4
 	addi	r4,r4,16
-	stvx	vr1,r0,r3
+	stvx	v1,r0,r3
 	addi	r3,r3,16
 
 5:	bf	cr7*4+2,6f
-	lvx	vr1,r0,r4
-	lvx	vr0,r4,r9
+	lvx	v1,r0,r4
+	lvx	v0,r4,r9
 	addi	r4,r4,32
-	stvx	vr1,r0,r3
-	stvx	vr0,r3,r9
+	stvx	v1,r0,r3
+	stvx	v0,r3,r9
 	addi	r3,r3,32
 
 6:	bf	cr7*4+1,7f
-	lvx	vr3,r0,r4
-	lvx	vr2,r4,r9
-	lvx	vr1,r4,r10
-	lvx	vr0,r4,r11
+	lvx	v3,r0,r4
+	lvx	v2,r4,r9
+	lvx	v1,r4,r10
+	lvx	v0,r4,r11
 	addi	r4,r4,64
-	stvx	vr3,r0,r3
-	stvx	vr2,r3,r9
-	stvx	vr1,r3,r10
-	stvx	vr0,r3,r11
+	stvx	v3,r0,r3
+	stvx	v2,r3,r9
+	stvx	v1,r3,r10
+	stvx	v0,r3,r11
 	addi	r3,r3,64
 
 7:	sub	r5,r5,r6
@@ -366,23 +366,23 @@ _GLOBAL(memcpy_power7)
 	 */
 	.align	5
 8:
-	lvx	vr7,r0,r4
-	lvx	vr6,r4,r9
-	lvx	vr5,r4,r10
-	lvx	vr4,r4,r11
-	lvx	vr3,r4,r12
-	lvx	vr2,r4,r14
-	lvx	vr1,r4,r15
-	lvx	vr0,r4,r16
+	lvx	v7,r0,r4
+	lvx	v6,r4,r9
+	lvx	v5,r4,r10
+	lvx	v4,r4,r11
+	lvx	v3,r4,r12
+	lvx	v2,r4,r14
+	lvx	v1,r4,r15
+	lvx	v0,r4,r16
 	addi	r4,r4,128
-	stvx	vr7,r0,r3
-	stvx	vr6,r3,r9
-	stvx	vr5,r3,r10
-	stvx	vr4,r3,r11
-	stvx	vr3,r3,r12
-	stvx	vr2,r3,r14
-	stvx	vr1,r3,r15
-	stvx	vr0,r3,r16
+	stvx	v7,r0,r3
+	stvx	v6,r3,r9
+	stvx	v5,r3,r10
+	stvx	v4,r3,r11
+	stvx	v3,r3,r12
+	stvx	v2,r3,r14
+	stvx	v1,r3,r15
+	stvx	v0,r3,r16
 	addi	r3,r3,128
 	bdnz	8b
 
@@ -396,29 +396,29 @@ _GLOBAL(memcpy_power7)
 	mtocrf	0x01,r6
 
 	bf	cr7*4+1,9f
-	lvx	vr3,r0,r4
-	lvx	vr2,r4,r9
-	lvx	vr1,r4,r10
-	lvx	vr0,r4,r11
+	lvx	v3,r0,r4
+	lvx	v2,r4,r9
+	lvx	v1,r4,r10
+	lvx	v0,r4,r11
 	addi	r4,r4,64
-	stvx	vr3,r0,r3
-	stvx	vr2,r3,r9
-	stvx	vr1,r3,r10
-	stvx	vr0,r3,r11
+	stvx	v3,r0,r3
+	stvx	v2,r3,r9
+	stvx	v1,r3,r10
+	stvx	v0,r3,r11
 	addi	r3,r3,64
 
 9:	bf	cr7*4+2,10f
-	lvx	vr1,r0,r4
-	lvx	vr0,r4,r9
+	lvx	v1,r0,r4
+	lvx	v0,r4,r9
 	addi	r4,r4,32
-	stvx	vr1,r0,r3
-	stvx	vr0,r3,r9
+	stvx	v1,r0,r3
+	stvx	v0,r3,r9
 	addi	r3,r3,32
 
 10:	bf	cr7*4+3,11f
-	lvx	vr1,r0,r4
+	lvx	v1,r0,r4
 	addi	r4,r4,16
-	stvx	vr1,r0,r3
+	stvx	v1,r0,r3
 	addi	r3,r3,16
 
 	/* Up to 15B to go */
@@ -494,42 +494,42 @@ _GLOBAL(memcpy_power7)
 	li	r10,32
 	li	r11,48
 
-	LVS(vr16,0,r4)		/* Setup permute control vector */
-	lvx	vr0,0,r4
+	LVS(v16,0,r4)		/* Setup permute control vector */
+	lvx	v0,0,r4
 	addi	r4,r4,16
 
 	bf	cr7*4+3,5f
-	lvx	vr1,r0,r4
-	VPERM(vr8,vr0,vr1,vr16)
+	lvx	v1,r0,r4
+	VPERM(v8,v0,v1,v16)
 	addi	r4,r4,16
-	stvx	vr8,r0,r3
+	stvx	v8,r0,r3
 	addi	r3,r3,16
-	vor	vr0,vr1,vr1
+	vor	v0,v1,v1
 
 5:	bf	cr7*4+2,6f
-	lvx	vr1,r0,r4
-	VPERM(vr8,vr0,vr1,vr16)
-	lvx	vr0,r4,r9
-	VPERM(vr9,vr1,vr0,vr16)
+	lvx	v1,r0,r4
+	VPERM(v8,v0,v1,v16)
+	lvx	v0,r4,r9
+	VPERM(v9,v1,v0,v16)
 	addi	r4,r4,32
-	stvx	vr8,r0,r3
-	stvx	vr9,r3,r9
+	stvx	v8,r0,r3
+	stvx	v9,r3,r9
 	addi	r3,r3,32
 
 6:	bf	cr7*4+1,7f
-	lvx	vr3,r0,r4
-	VPERM(vr8,vr0,vr3,vr16)
-	lvx	vr2,r4,r9
-	VPERM(vr9,vr3,vr2,vr16)
-	lvx	vr1,r4,r10
-	VPERM(vr10,vr2,vr1,vr16)
-	lvx	vr0,r4,r11
-	VPERM(vr11,vr1,vr0,vr16)
+	lvx	v3,r0,r4
+	VPERM(v8,v0,v3,v16)
+	lvx	v2,r4,r9
+	VPERM(v9,v3,v2,v16)
+	lvx	v1,r4,r10
+	VPERM(v10,v2,v1,v16)
+	lvx	v0,r4,r11
+	VPERM(v11,v1,v0,v16)
 	addi	r4,r4,64
-	stvx	vr8,r0,r3
-	stvx	vr9,r3,r9
-	stvx	vr10,r3,r10
-	stvx	vr11,r3,r11
+	stvx	v8,r0,r3
+	stvx	v9,r3,r9
+	stvx	v10,r3,r10
+	stvx	v11,r3,r11
 	addi	r3,r3,64
 
 7:	sub	r5,r5,r6
@@ -552,31 +552,31 @@ _GLOBAL(memcpy_power7)
 	 */
 	.align	5
 8:
-	lvx	vr7,r0,r4
-	VPERM(vr8,vr0,vr7,vr16)
-	lvx	vr6,r4,r9
-	VPERM(vr9,vr7,vr6,vr16)
-	lvx	vr5,r4,r10
-	VPERM(vr10,vr6,vr5,vr16)
-	lvx	vr4,r4,r11
-	VPERM(vr11,vr5,vr4,vr16)
-	lvx	vr3,r4,r12
-	VPERM(vr12,vr4,vr3,vr16)
-	lvx	vr2,r4,r14
-	VPERM(vr13,vr3,vr2,vr16)
-	lvx	vr1,r4,r15
-	VPERM(vr14,vr2,vr1,vr16)
-	lvx	vr0,r4,r16
-	VPERM(vr15,vr1,vr0,vr16)
+	lvx	v7,r0,r4
+	VPERM(v8,v0,v7,v16)
+	lvx	v6,r4,r9
+	VPERM(v9,v7,v6,v16)
+	lvx	v5,r4,r10
+	VPERM(v10,v6,v5,v16)
+	lvx	v4,r4,r11
+	VPERM(v11,v5,v4,v16)
+	lvx	v3,r4,r12
+	VPERM(v12,v4,v3,v16)
+	lvx	v2,r4,r14
+	VPERM(v13,v3,v2,v16)
+	lvx	v1,r4,r15
+	VPERM(v14,v2,v1,v16)
+	lvx	v0,r4,r16
+	VPERM(v15,v1,v0,v16)
 	addi	r4,r4,128
-	stvx	vr8,r0,r3
-	stvx	vr9,r3,r9
-	stvx	vr10,r3,r10
-	stvx	vr11,r3,r11
-	stvx	vr12,r3,r12
-	stvx	vr13,r3,r14
-	stvx	vr14,r3,r15
-	stvx	vr15,r3,r16
+	stvx	v8,r0,r3
+	stvx	v9,r3,r9
+	stvx	v10,r3,r10
+	stvx	v11,r3,r11
+	stvx	v12,r3,r12
+	stvx	v13,r3,r14
+	stvx	v14,r3,r15
+	stvx	v15,r3,r16
 	addi	r3,r3,128
 	bdnz	8b
 
@@ -590,36 +590,36 @@ _GLOBAL(memcpy_power7)
 	mtocrf	0x01,r6
 
 	bf	cr7*4+1,9f
-	lvx	vr3,r0,r4
-	VPERM(vr8,vr0,vr3,vr16)
-	lvx	vr2,r4,r9
-	VPERM(vr9,vr3,vr2,vr16)
-	lvx	vr1,r4,r10
-	VPERM(vr10,vr2,vr1,vr16)
-	lvx	vr0,r4,r11
-	VPERM(vr11,vr1,vr0,vr16)
+	lvx	v3,r0,r4
+	VPERM(v8,v0,v3,v16)
+	lvx	v2,r4,r9
+	VPERM(v9,v3,v2,v16)
+	lvx	v1,r4,r10
+	VPERM(v10,v2,v1,v16)
+	lvx	v0,r4,r11
+	VPERM(v11,v1,v0,v16)
 	addi	r4,r4,64
-	stvx	vr8,r0,r3
-	stvx	vr9,r3,r9
-	stvx	vr10,r3,r10
-	stvx	vr11,r3,r11
+	stvx	v8,r0,r3
+	stvx	v9,r3,r9
+	stvx	v10,r3,r10
+	stvx	v11,r3,r11
 	addi	r3,r3,64
 
 9:	bf	cr7*4+2,10f
-	lvx	vr1,r0,r4
-	VPERM(vr8,vr0,vr1,vr16)
-	lvx	vr0,r4,r9
-	VPERM(vr9,vr1,vr0,vr16)
+	lvx	v1,r0,r4
+	VPERM(v8,v0,v1,v16)
+	lvx	v0,r4,r9
+	VPERM(v9,v1,v0,v16)
 	addi	r4,r4,32
-	stvx	vr8,r0,r3
-	stvx	vr9,r3,r9
+	stvx	v8,r0,r3
+	stvx	v9,r3,r9
 	addi	r3,r3,32
 
 10:	bf	cr7*4+3,11f
-	lvx	vr1,r0,r4
-	VPERM(vr8,vr0,vr1,vr16)
+	lvx	v1,r0,r4
+	VPERM(v8,v0,v1,v16)
 	addi	r4,r4,16
-	stvx	vr8,r0,r3
+	stvx	v8,r0,r3
 	addi	r3,r3,16
 
 	/* Up to 15B to go */
diff --git a/arch/powerpc/lib/ppc_ksyms.c b/arch/powerpc/lib/ppc_ksyms.c
index f993959647b5..c7f8e9586316 100644
--- a/arch/powerpc/lib/ppc_ksyms.c
+++ b/arch/powerpc/lib/ppc_ksyms.c
@@ -8,10 +8,6 @@ EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(memcmp);
 EXPORT_SYMBOL(memchr);
-#ifdef CONFIG_PPC32
-EXPORT_SYMBOL(cacheable_memcpy);
-EXPORT_SYMBOL(cacheable_memzero);
-#endif
 
 EXPORT_SYMBOL(strcpy);
 EXPORT_SYMBOL(strncpy);
diff --git a/arch/powerpc/lib/rheap.c b/arch/powerpc/lib/rheap.c
index a1060a868e69..69abf844c2c3 100644
--- a/arch/powerpc/lib/rheap.c
+++ b/arch/powerpc/lib/rheap.c
@@ -284,7 +284,7 @@ EXPORT_SYMBOL_GPL(rh_create);
  */
 void rh_destroy(rh_info_t * info)
 {
-	if ((info->flags & RHIF_STATIC_BLOCK) == 0 && info->block != NULL)
+	if ((info->flags & RHIF_STATIC_BLOCK) == 0)
 		kfree(info->block);
 
 	if ((info->flags & RHIF_STATIC_INFO) == 0)
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 438dcd3fd0d1..9c8770b5f96f 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_40x)		+= 40x_mmu.o
 obj-$(CONFIG_44x)		+= 44x_mmu.o
 obj-$(CONFIG_PPC_FSL_BOOK3E)	+= fsl_booke_mmu.o
 obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
+obj-$(CONFIG_PPC_SPLPAR)	+= vphn.o
 obj-$(CONFIG_PPC_MM_SLICES)	+= slice.o
 obj-y				+= hugetlbpage.o
 ifeq ($(CONFIG_HUGETLB_PAGE),y)
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index d85e86aac7fb..169aba446a74 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -228,7 +228,7 @@ __dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t
 		do {
 			SetPageReserved(page);
 			map_page(vaddr, page_to_phys(page),
-				 pgprot_noncached(PAGE_KERNEL));
+				 pgprot_val(pgprot_noncached(PAGE_KERNEL)));
 			page++;
 			vaddr += PAGE_SIZE;
 		} while (size -= PAGE_SIZE);
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index b46912fee7cd..9c90e66cffb6 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -181,7 +181,7 @@ static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt,
 		unsigned long cam_sz;
 
 		cam_sz = calc_cam_sz(ram, virt, phys);
-		settlbcam(i, virt, phys, cam_sz, PAGE_KERNEL_X, 0);
+		settlbcam(i, virt, phys, cam_sz, pgprot_val(PAGE_KERNEL_X), 0);
 
 		ram -= cam_sz;
 		amount_mapped += cam_sz;
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 86686514ae13..43dafb9d6a46 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -33,7 +33,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
 	 * atomically mark the linux large page PMD busy and dirty
 	 */
 	do {
-		pmd_t pmd = ACCESS_ONCE(*pmdp);
+		pmd_t pmd = READ_ONCE(*pmdp);
 
 		old_pmd = pmd_val(pmd);
 		/* If PMD busy, retry the access */
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 7e408bfc7948..fa9d5c238d22 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -964,7 +964,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
 		*shift = 0;
 
 	pgdp = pgdir + pgd_index(ea);
-	pgd  = ACCESS_ONCE(*pgdp);
+	pgd  = READ_ONCE(*pgdp);
 	/*
 	 * Always operate on the local stack value. This make sure the
 	 * value don't get updated by a parallel THP split/collapse,
@@ -1045,7 +1045,7 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 	if (pte_end < end)
 		end = pte_end;
 
-	pte = ACCESS_ONCE(*ptep);
+	pte = READ_ONCE(*ptep);
 	mask = _PAGE_PRESENT | _PAGE_USER;
 	if (write)
 		mask |= _PAGE_RW;
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 10471f9bb63f..d747dd7bc90b 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -132,6 +132,7 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
 	align = max_t(unsigned long, align, minalign);
 	name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
 	new = kmem_cache_create(name, table_size, align, 0, ctor);
+	kfree(name);
 	pgtable_cache[shift - 1] = new;
 	pr_debug("Allocated pgtable cache for order %d\n", shift);
 }
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index b7285a5870f8..45fda71feb27 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -61,7 +61,6 @@
 #define CPU_FTR_NOEXECUTE	0
 #endif
 
-int mem_init_done;
 unsigned long long memory_limit;
 
 #ifdef CONFIG_HIGHMEM
@@ -377,8 +376,6 @@ void __init mem_init(void)
 	pr_info("  * 0x%08lx..0x%08lx  : vmalloc & ioremap\n",
 		VMALLOC_START, VMALLOC_END);
 #endif /* CONFIG_PPC32 */
-
-	mem_init_done = 1;
 }
 
 void free_initmem(void)
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index cb8bdbe4972f..0f0502e12f6c 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -53,21 +53,20 @@ static inline int mmap_is_legacy(void)
 	return sysctl_legacy_va_layout;
 }
 
-static unsigned long mmap_rnd(void)
+unsigned long arch_mmap_rnd(void)
 {
-	unsigned long rnd = 0;
+	unsigned long rnd;
+
+	/* 8MB for 32bit, 1GB for 64bit */
+	if (is_32bit_task())
+		rnd = (unsigned long)get_random_int() % (1<<(23-PAGE_SHIFT));
+	else
+		rnd = (unsigned long)get_random_int() % (1<<(30-PAGE_SHIFT));
 
-	if (current->flags & PF_RANDOMIZE) {
-		/* 8MB for 32bit, 1GB for 64bit */
-		if (is_32bit_task())
-			rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT)));
-		else
-			rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT)));
-	}
 	return rnd << PAGE_SHIFT;
 }
 
-static inline unsigned long mmap_base(void)
+static inline unsigned long mmap_base(unsigned long rnd)
 {
 	unsigned long gap = rlimit(RLIMIT_STACK);
 
@@ -76,7 +75,7 @@ static inline unsigned long mmap_base(void)
 	else if (gap > MAX_GAP)
 		gap = MAX_GAP;
 
-	return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd());
+	return PAGE_ALIGN(TASK_SIZE - gap - rnd);
 }
 
 /*
@@ -85,6 +84,11 @@ static inline unsigned long mmap_base(void)
  */
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
+	unsigned long random_factor = 0UL;
+
+	if (current->flags & PF_RANDOMIZE)
+		random_factor = arch_mmap_rnd();
+
 	/*
 	 * Fall back to the standard layout if the personality
 	 * bit is set, or if the expected stack growth is unlimited:
@@ -93,7 +97,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 		mm->mmap_base = TASK_UNMAPPED_BASE;
 		mm->get_unmapped_area = arch_get_unmapped_area;
 	} else {
-		mm->mmap_base = mmap_base();
+		mm->mmap_base = mmap_base(random_factor);
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 	}
 }
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 78c45f392f5b..085b66b10891 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -96,7 +96,7 @@ extern void _tlbia(void);
 extern void mapin_ram(void);
 extern int map_page(unsigned long va, phys_addr_t pa, int flags);
 extern void setbat(int index, unsigned long virt, phys_addr_t phys,
-		   unsigned int size, int flags);
+		   unsigned int size, pgprot_t prot);
 
 extern int __map_without_bats;
 extern int __allow_ioremap_reserved;
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 0257a7d659ef..5e80621d9324 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -958,6 +958,13 @@ void __init initmem_init(void)
 
 	memblock_dump_all();
 
+	/*
+	 * Reduce the possible NUMA nodes to the online NUMA nodes,
+	 * since we do not support node hotplug. This ensures that  we
+	 * lower the maximum NUMA node ID to what is actually present.
+	 */
+	nodes_and(node_possible_map, node_possible_map, node_online_map);
+
 	for_each_online_node(nid) {
 		unsigned long start_pfn, end_pfn;
 
@@ -1177,6 +1184,9 @@ u64 memory_hotplug_max(void)
 
 /* Virtual Processor Home Node (VPHN) support */
 #ifdef CONFIG_PPC_SPLPAR
+
+#include "vphn.h"
+
 struct topology_update_data {
 	struct topology_update_data *next;
 	unsigned int cpu;
@@ -1248,55 +1258,6 @@ static int update_cpu_associativity_changes_mask(void)
 }
 
 /*
- * 6 64-bit registers unpacked into 12 32-bit associativity values. To form
- * the complete property we have to add the length in the first cell.
- */
-#define VPHN_ASSOC_BUFSIZE (6*sizeof(u64)/sizeof(u32) + 1)
-
-/*
- * Convert the associativity domain numbers returned from the hypervisor
- * to the sequence they would appear in the ibm,associativity property.
- */
-static int vphn_unpack_associativity(const long *packed, __be32 *unpacked)
-{
-	int i, nr_assoc_doms = 0;
-	const __be16 *field = (const __be16 *) packed;
-
-#define VPHN_FIELD_UNUSED	(0xffff)
-#define VPHN_FIELD_MSB		(0x8000)
-#define VPHN_FIELD_MASK		(~VPHN_FIELD_MSB)
-
-	for (i = 1; i < VPHN_ASSOC_BUFSIZE; i++) {
-		if (be16_to_cpup(field) == VPHN_FIELD_UNUSED) {
-			/* All significant fields processed, and remaining
-			 * fields contain the reserved value of all 1's.
-			 * Just store them.
-			 */
-			unpacked[i] = *((__be32 *)field);
-			field += 2;
-		} else if (be16_to_cpup(field) & VPHN_FIELD_MSB) {
-			/* Data is in the lower 15 bits of this field */
-			unpacked[i] = cpu_to_be32(
-				be16_to_cpup(field) & VPHN_FIELD_MASK);
-			field++;
-			nr_assoc_doms++;
-		} else {
-			/* Data is in the lower 15 bits of this field
-			 * concatenated with the next 16 bit field
-			 */
-			unpacked[i] = *((__be32 *)field);
-			field += 2;
-			nr_assoc_doms++;
-		}
-	}
-
-	/* The first cell contains the length of the property */
-	unpacked[0] = cpu_to_be32(nr_assoc_doms);
-
-	return nr_assoc_doms;
-}
-
-/*
  * Retrieve the new associativity information for a virtual processor's
  * home node.
  */
@@ -1306,11 +1267,8 @@ static long hcall_vphn(unsigned long cpu, __be32 *associativity)
 	long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
 	u64 flags = 1;
 	int hwcpu = get_hard_smp_processor_id(cpu);
-	int i;
 
 	rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
-	for (i = 0; i < 6; i++)
-		retbuf[i] = cpu_to_be64(retbuf[i]);
 	vphn_unpack_associativity(retbuf, associativity);
 
 	return rc;
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 03b1a3b0fbd5..7692d1bb1bc6 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -54,9 +54,6 @@ extern char etext[], _stext[];
 #ifdef HAVE_BATS
 extern phys_addr_t v_mapped_by_bats(unsigned long va);
 extern unsigned long p_mapped_by_bats(phys_addr_t pa);
-void setbat(int index, unsigned long virt, phys_addr_t phys,
-	    unsigned int size, int flags);
-
 #else /* !HAVE_BATS */
 #define v_mapped_by_bats(x)	(0UL)
 #define p_mapped_by_bats(x)	(0UL)
@@ -110,9 +107,8 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
 	pte_t *pte;
-	extern int mem_init_done;
 
-	if (mem_init_done) {
+	if (slab_is_available()) {
 		pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
 	} else {
 		pte = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
@@ -192,7 +188,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
 
 	/* Make sure we have the base flags */
 	if ((flags & _PAGE_PRESENT) == 0)
-		flags |= PAGE_KERNEL;
+		flags |= pgprot_val(PAGE_KERNEL);
 
 	/* Non-cacheable page cannot be coherent */
 	if (flags & _PAGE_NO_CACHE)
@@ -219,9 +215,9 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
 	 * Don't allow anybody to remap normal RAM that we're using.
 	 * mem_init() sets high_memory so only do the check after that.
 	 */
-	if (mem_init_done && (p < virt_to_phys(high_memory)) &&
+	if (slab_is_available() && (p < virt_to_phys(high_memory)) &&
 	    !(__allow_ioremap_reserved && memblock_is_region_reserved(p, size))) {
-		printk("__ioremap(): phys addr 0x%llx is RAM lr %pf\n",
+		printk("__ioremap(): phys addr 0x%llx is RAM lr %ps\n",
 		       (unsigned long long)p, __builtin_return_address(0));
 		return NULL;
 	}
@@ -247,7 +243,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
 	if ((v = p_mapped_by_tlbcam(p)))
 		goto out;
 
-	if (mem_init_done) {
+	if (slab_is_available()) {
 		struct vm_struct *area;
 		area = get_vm_area_caller(size, VM_IOREMAP, caller);
 		if (area == 0)
@@ -266,7 +262,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
 	for (i = 0; i < size && err == 0; i += PAGE_SIZE)
 		err = map_page(v+i, p+i, flags);
 	if (err) {
-		if (mem_init_done)
+		if (slab_is_available())
 			vunmap((void *)v);
 		return NULL;
 	}
@@ -327,7 +323,7 @@ void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
 	p = memstart_addr + s;
 	for (; s < top; s += PAGE_SIZE) {
 		ktext = ((char *) v >= _stext && (char *) v < etext);
-		f = ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL;
+		f = ktext ? pgprot_val(PAGE_KERNEL_TEXT) : pgprot_val(PAGE_KERNEL);
 		map_page(v, p, f);
 #ifdef CONFIG_PPC_STD_MMU_32
 		if (ktext)
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 6957cc1ca0a7..59daa5eeec25 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -231,7 +231,7 @@ void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
 	if ((size == 0) || (paligned == 0))
 		return NULL;
 
-	if (mem_init_done) {
+	if (slab_is_available()) {
 		struct vm_struct *area;
 
 		area = __get_vm_area_caller(size, VM_IOREMAP,
@@ -315,7 +315,7 @@ void __iounmap(volatile void __iomem *token)
 {
 	void *addr;
 
-	if (!mem_init_done)
+	if (!slab_is_available())
 		return;
 	
 	addr = (void *) ((unsigned long __force)
@@ -723,7 +723,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 	assert_spin_locked(&mm->page_table_lock);
 	WARN_ON(!pmd_trans_huge(pmd));
 #endif
-	trace_hugepage_set_pmd(addr, pmd);
+	trace_hugepage_set_pmd(addr, pmd_val(pmd));
 	return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
 }
 
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index 5029dc19b517..6b2f3e457171 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -113,11 +113,12 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
  * of 2 between 128k and 256M.
  */
 void __init setbat(int index, unsigned long virt, phys_addr_t phys,
-		   unsigned int size, int flags)
+		   unsigned int size, pgprot_t prot)
 {
 	unsigned int bl;
 	int wimgxpp;
 	struct ppc_bat *bat = BATS[index];
+	unsigned long flags = pgprot_val(prot);
 
 	if ((flags & _PAGE_NO_CACHE) ||
 	    (cpu_has_feature(CPU_FTR_NEED_COHERENT) == 0))
@@ -224,7 +225,7 @@ void __init MMU_init_hw(void)
 	 */
 	if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
 	Hash = __va(memblock_alloc(Hash_size, Hash_size));
-	cacheable_memzero(Hash, Hash_size);
+	memset(Hash, 0, Hash_size);
 	_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
 
 	Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size);
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index d2a94b85dbc2..c522969f012d 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -216,7 +216,7 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
 			continue;
 		pte = pte_val(*ptep);
 		if (hugepage_shift)
-			trace_hugepage_invalidate(start, pte_val(pte));
+			trace_hugepage_invalidate(start, pte);
 		if (!(pte & _PAGE_HASHPTE))
 			continue;
 		if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte)))
diff --git a/arch/powerpc/mm/vphn.c b/arch/powerpc/mm/vphn.c
new file mode 100644
index 000000000000..5f8ef50e5c66
--- /dev/null
+++ b/arch/powerpc/mm/vphn.c
@@ -0,0 +1,70 @@
+#include <asm/byteorder.h>
+#include "vphn.h"
+
+/*
+ * The associativity domain numbers are returned from the hypervisor as a
+ * stream of mixed 16-bit and 32-bit fields. The stream is terminated by the
+ * special value of "all ones" (aka. 0xffff) and its size may not exceed 48
+ * bytes.
+ *
+ *    --- 16-bit fields -->
+ *  _________________________
+ *  |  0  |  1  |  2  |  3  |   be_packed[0]
+ *  ------+-----+-----+------
+ *  _________________________
+ *  |  4  |  5  |  6  |  7  |   be_packed[1]
+ *  -------------------------
+ *            ...
+ *  _________________________
+ *  | 20  | 21  | 22  | 23  |   be_packed[5]
+ *  -------------------------
+ *
+ * Convert to the sequence they would appear in the ibm,associativity property.
+ */
+int vphn_unpack_associativity(const long *packed, __be32 *unpacked)
+{
+	__be64 be_packed[VPHN_REGISTER_COUNT];
+	int i, nr_assoc_doms = 0;
+	const __be16 *field = (const __be16 *) be_packed;
+	u16 last = 0;
+	bool is_32bit = false;
+
+#define VPHN_FIELD_UNUSED	(0xffff)
+#define VPHN_FIELD_MSB		(0x8000)
+#define VPHN_FIELD_MASK		(~VPHN_FIELD_MSB)
+
+	/* Let's fix the values returned by plpar_hcall9() */
+	for (i = 0; i < VPHN_REGISTER_COUNT; i++)
+		be_packed[i] = cpu_to_be64(packed[i]);
+
+	for (i = 1; i < VPHN_ASSOC_BUFSIZE; i++) {
+		u16 new = be16_to_cpup(field++);
+
+		if (is_32bit) {
+			/* Let's concatenate the 16 bits of this field to the
+			 * 15 lower bits of the previous field
+			 */
+			unpacked[++nr_assoc_doms] =
+				cpu_to_be32(last << 16 | new);
+			is_32bit = false;
+		} else if (new == VPHN_FIELD_UNUSED)
+			/* This is the list terminator */
+			break;
+		else if (new & VPHN_FIELD_MSB) {
+			/* Data is in the lower 15 bits of this field */
+			unpacked[++nr_assoc_doms] =
+				cpu_to_be32(new & VPHN_FIELD_MASK);
+		} else {
+			/* Data is in the lower 15 bits of this field
+			 * concatenated with the next 16 bit field
+			 */
+			last = new;
+			is_32bit = true;
+		}
+	}
+
+	/* The first cell contains the length of the property */
+	unpacked[0] = cpu_to_be32(nr_assoc_doms);
+
+	return nr_assoc_doms;
+}
diff --git a/arch/powerpc/mm/vphn.h b/arch/powerpc/mm/vphn.h
new file mode 100644
index 000000000000..fe8b7805b78f
--- /dev/null
+++ b/arch/powerpc/mm/vphn.h
@@ -0,0 +1,16 @@
+#ifndef _ARCH_POWERPC_MM_VPHN_H_
+#define _ARCH_POWERPC_MM_VPHN_H_
+
+/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers.
+ */
+#define VPHN_REGISTER_COUNT 6
+
+/*
+ * 6 64-bit registers unpacked into up to 24 be32 associativity values. To
+ * form the complete property we have to add the length in the first cell.
+ */
+#define VPHN_ASSOC_BUFSIZE (VPHN_REGISTER_COUNT*sizeof(u64)/sizeof(u16) + 1)
+
+extern int vphn_unpack_associativity(const long *packed, __be32 *unpacked);
+
+#endif
diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile
index 266b3950c3ac..1306a58ac541 100644
--- a/arch/powerpc/net/Makefile
+++ b/arch/powerpc/net/Makefile
@@ -1,4 +1,4 @@
 #
 # Arch-specific network modules
 #
-obj-$(CONFIG_BPF_JIT) += bpf_jit_64.o bpf_jit_comp.o
+obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index c406aa95b2bc..889fd199a821 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -10,12 +10,25 @@
 #ifndef _BPF_JIT_H
 #define _BPF_JIT_H
 
+#ifdef CONFIG_PPC64
+#define BPF_PPC_STACK_R3_OFF	48
 #define BPF_PPC_STACK_LOCALS	32
 #define BPF_PPC_STACK_BASIC	(48+64)
 #define BPF_PPC_STACK_SAVE	(18*8)
 #define BPF_PPC_STACKFRAME	(BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \
 				 BPF_PPC_STACK_SAVE)
 #define BPF_PPC_SLOWPATH_FRAME	(48+64)
+#else
+#define BPF_PPC_STACK_R3_OFF	24
+#define BPF_PPC_STACK_LOCALS	16
+#define BPF_PPC_STACK_BASIC	(24+32)
+#define BPF_PPC_STACK_SAVE	(18*4)
+#define BPF_PPC_STACKFRAME	(BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \
+				 BPF_PPC_STACK_SAVE)
+#define BPF_PPC_SLOWPATH_FRAME	(24+32)
+#endif
+
+#define REG_SZ         (BITS_PER_LONG/8)
 
 /*
  * Generated code register usage:
@@ -57,7 +70,11 @@ DECLARE_LOAD_FUNC(sk_load_half);
 DECLARE_LOAD_FUNC(sk_load_byte);
 DECLARE_LOAD_FUNC(sk_load_byte_msh);
 
+#ifdef CONFIG_PPC64
 #define FUNCTION_DESCR_SIZE	24
+#else
+#define FUNCTION_DESCR_SIZE	0
+#endif
 
 /*
  * 16-bit immediate helper macros: HA() is for use with sign-extending instrs
@@ -86,7 +103,12 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh);
 #define PPC_LIS(r, i)		PPC_ADDIS(r, 0, i)
 #define PPC_STD(r, base, i)	EMIT(PPC_INST_STD | ___PPC_RS(r) |	      \
 				     ___PPC_RA(base) | ((i) & 0xfffc))
-
+#define PPC_STDU(r, base, i)	EMIT(PPC_INST_STDU | ___PPC_RS(r) |	      \
+				     ___PPC_RA(base) | ((i) & 0xfffc))
+#define PPC_STW(r, base, i)	EMIT(PPC_INST_STW | ___PPC_RS(r) |	      \
+				     ___PPC_RA(base) | ((i) & 0xfffc))
+#define PPC_STWU(r, base, i)	EMIT(PPC_INST_STWU | ___PPC_RS(r) |	      \
+				     ___PPC_RA(base) | ((i) & 0xfffc))
 
 #define PPC_LBZ(r, base, i)	EMIT(PPC_INST_LBZ | ___PPC_RT(r) |	      \
 				     ___PPC_RA(base) | IMM_L(i))
@@ -98,6 +120,17 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh);
 				     ___PPC_RA(base) | IMM_L(i))
 #define PPC_LHBRX(r, base, b)	EMIT(PPC_INST_LHBRX | ___PPC_RT(r) |	      \
 				     ___PPC_RA(base) | ___PPC_RB(b))
+
+#ifdef CONFIG_PPC64
+#define PPC_BPF_LL(r, base, i) do { PPC_LD(r, base, i); } while(0)
+#define PPC_BPF_STL(r, base, i) do { PPC_STD(r, base, i); } while(0)
+#define PPC_BPF_STLU(r, base, i) do { PPC_STDU(r, base, i); } while(0)
+#else
+#define PPC_BPF_LL(r, base, i) do { PPC_LWZ(r, base, i); } while(0)
+#define PPC_BPF_STL(r, base, i) do { PPC_STW(r, base, i); } while(0)
+#define PPC_BPF_STLU(r, base, i) do { PPC_STWU(r, base, i); } while(0)
+#endif
+
 /* Convenience helpers for the above with 'far' offsets: */
 #define PPC_LBZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LBZ(r, base, i);   \
 		else {	PPC_ADDIS(r, base, IMM_HA(i));			      \
@@ -115,6 +148,29 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh);
 		else {	PPC_ADDIS(r, base, IMM_HA(i));			      \
 			PPC_LHZ(r, r, IMM_L(i)); } } while(0)
 
+#ifdef CONFIG_PPC64
+#define PPC_LL_OFFS(r, base, i) do { PPC_LD_OFFS(r, base, i); } while(0)
+#else
+#define PPC_LL_OFFS(r, base, i) do { PPC_LWZ_OFFS(r, base, i); } while(0)
+#endif
+
+#ifdef CONFIG_SMP
+#ifdef CONFIG_PPC64
+#define PPC_BPF_LOAD_CPU(r)		\
+	do { BUILD_BUG_ON(FIELD_SIZEOF(struct paca_struct, paca_index) != 2);	\
+		PPC_LHZ_OFFS(r, 13, offsetof(struct paca_struct, paca_index));		\
+	} while (0)
+#else
+#define PPC_BPF_LOAD_CPU(r)     \
+	do { BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, cpu) != 4);			\
+		PPC_LHZ_OFFS(r, (1 & ~(THREAD_SIZE - 1)),							\
+				offsetof(struct thread_info, cpu));							\
+	} while(0)
+#endif
+#else
+#define PPC_BPF_LOAD_CPU(r) do { PPC_LI(r, 0); } while(0)
+#endif
+
 #define PPC_CMPWI(a, i)		EMIT(PPC_INST_CMPWI | ___PPC_RA(a) | IMM_L(i))
 #define PPC_CMPDI(a, i)		EMIT(PPC_INST_CMPDI | ___PPC_RA(a) | IMM_L(i))
 #define PPC_CMPLWI(a, i)	EMIT(PPC_INST_CMPLWI | ___PPC_RA(a) | IMM_L(i))
@@ -196,6 +252,12 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh);
 				PPC_ORI(d, d, (uintptr_t)(i) & 0xffff);	      \
 		} } while (0);
 
+#ifdef CONFIG_PPC64
+#define PPC_FUNC_ADDR(d,i) do { PPC_LI64(d, i); } while(0)
+#else
+#define PPC_FUNC_ADDR(d,i) do { PPC_LI32(d, i); } while(0)
+#endif
+
 #define PPC_LHBRX_OFFS(r, base, i) \
 		do { PPC_LI32(r, i); PPC_LHBRX(r, r, base); } while(0)
 #ifdef __LITTLE_ENDIAN__
diff --git a/arch/powerpc/net/bpf_jit_64.S b/arch/powerpc/net/bpf_jit_asm.S
index 8f87d9217122..8ff5a3b5d1c3 100644
--- a/arch/powerpc/net/bpf_jit_64.S
+++ b/arch/powerpc/net/bpf_jit_asm.S
@@ -34,13 +34,13 @@
  */
 	.globl	sk_load_word
 sk_load_word:
-	cmpdi	r_addr, 0
+	PPC_LCMPI	r_addr, 0
 	blt	bpf_slow_path_word_neg
 	.globl	sk_load_word_positive_offset
 sk_load_word_positive_offset:
 	/* Are we accessing past headlen? */
 	subi	r_scratch1, r_HL, 4
-	cmpd	r_scratch1, r_addr
+	PPC_LCMP	r_scratch1, r_addr
 	blt	bpf_slow_path_word
 	/* Nope, just hitting the header.  cr0 here is eq or gt! */
 #ifdef __LITTLE_ENDIAN__
@@ -52,12 +52,12 @@ sk_load_word_positive_offset:
 
 	.globl	sk_load_half
 sk_load_half:
-	cmpdi	r_addr, 0
+	PPC_LCMPI	r_addr, 0
 	blt	bpf_slow_path_half_neg
 	.globl	sk_load_half_positive_offset
 sk_load_half_positive_offset:
 	subi	r_scratch1, r_HL, 2
-	cmpd	r_scratch1, r_addr
+	PPC_LCMP	r_scratch1, r_addr
 	blt	bpf_slow_path_half
 #ifdef __LITTLE_ENDIAN__
 	lhbrx	r_A, r_D, r_addr
@@ -68,11 +68,11 @@ sk_load_half_positive_offset:
 
 	.globl	sk_load_byte
 sk_load_byte:
-	cmpdi	r_addr, 0
+	PPC_LCMPI	r_addr, 0
 	blt	bpf_slow_path_byte_neg
 	.globl	sk_load_byte_positive_offset
 sk_load_byte_positive_offset:
-	cmpd	r_HL, r_addr
+	PPC_LCMP	r_HL, r_addr
 	ble	bpf_slow_path_byte
 	lbzx	r_A, r_D, r_addr
 	blr
@@ -83,11 +83,11 @@ sk_load_byte_positive_offset:
  */
 	.globl sk_load_byte_msh
 sk_load_byte_msh:
-	cmpdi	r_addr, 0
+	PPC_LCMPI	r_addr, 0
 	blt	bpf_slow_path_byte_msh_neg
 	.globl sk_load_byte_msh_positive_offset
 sk_load_byte_msh_positive_offset:
-	cmpd	r_HL, r_addr
+	PPC_LCMP	r_HL, r_addr
 	ble	bpf_slow_path_byte_msh
 	lbzx	r_X, r_D, r_addr
 	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
@@ -101,13 +101,13 @@ sk_load_byte_msh_positive_offset:
  */
 #define bpf_slow_path_common(SIZE)				\
 	mflr	r0;						\
-	std	r0, 16(r1);					\
+	PPC_STL	r0, PPC_LR_STKOFF(r1);					\
 	/* R3 goes in parameter space of caller's frame */	\
-	std	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
-	std	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
-	std	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
-	addi	r5, r1, BPF_PPC_STACK_BASIC+(2*8);		\
-	stdu	r1, -BPF_PPC_SLOWPATH_FRAME(r1);		\
+	PPC_STL	r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1);		\
+	PPC_STL	r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1);		\
+	PPC_STL	r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1);		\
+	addi	r5, r1, BPF_PPC_STACK_BASIC+(2*REG_SZ);		\
+	PPC_STLU	r1, -BPF_PPC_SLOWPATH_FRAME(r1);		\
 	/* R3 = r_skb, as passed */				\
 	mr	r4, r_addr;					\
 	li	r6, SIZE;					\
@@ -115,19 +115,19 @@ sk_load_byte_msh_positive_offset:
 	nop;							\
 	/* R3 = 0 on success */					\
 	addi	r1, r1, BPF_PPC_SLOWPATH_FRAME;			\
-	ld	r0, 16(r1);					\
-	ld	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
-	ld	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
+	PPC_LL	r0, PPC_LR_STKOFF(r1);					\
+	PPC_LL	r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1);		\
+	PPC_LL	r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1);		\
 	mtlr	r0;						\
-	cmpdi	r3, 0;						\
+	PPC_LCMPI	r3, 0;						\
 	blt	bpf_error;	/* cr0 = LT */			\
-	ld	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
+	PPC_LL	r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1);		\
 	/* Great success! */
 
 bpf_slow_path_word:
 	bpf_slow_path_common(4)
 	/* Data value is on stack, and cr0 != LT */
-	lwz	r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
+	lwz	r_A, BPF_PPC_STACK_BASIC+(2*REG_SZ)(r1)
 	blr
 
 bpf_slow_path_half:
@@ -154,12 +154,12 @@ bpf_slow_path_byte_msh:
  */
 #define sk_negative_common(SIZE)				\
 	mflr	r0;						\
-	std	r0, 16(r1);					\
+	PPC_STL	r0, PPC_LR_STKOFF(r1);					\
 	/* R3 goes in parameter space of caller's frame */	\
-	std	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
-	std	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
-	std	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
-	stdu	r1, -BPF_PPC_SLOWPATH_FRAME(r1);		\
+	PPC_STL	r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1);		\
+	PPC_STL	r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1);		\
+	PPC_STL	r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1);		\
+	PPC_STLU	r1, -BPF_PPC_SLOWPATH_FRAME(r1);		\
 	/* R3 = r_skb, as passed */				\
 	mr	r4, r_addr;					\
 	li	r5, SIZE;					\
@@ -167,19 +167,19 @@ bpf_slow_path_byte_msh:
 	nop;							\
 	/* R3 != 0 on success */				\
 	addi	r1, r1, BPF_PPC_SLOWPATH_FRAME;			\
-	ld	r0, 16(r1);					\
-	ld	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
-	ld	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
+	PPC_LL	r0, PPC_LR_STKOFF(r1);					\
+	PPC_LL	r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1);		\
+	PPC_LL	r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1);		\
 	mtlr	r0;						\
-	cmpldi	r3, 0;						\
+	PPC_LCMPLI	r3, 0;						\
 	beq	bpf_error_slow;	/* cr0 = EQ */			\
 	mr	r_addr, r3;					\
-	ld	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
+	PPC_LL	r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1);		\
 	/* Great success! */
 
 bpf_slow_path_word_neg:
 	lis     r_scratch1,-32	/* SKF_LL_OFF */
-	cmpd	r_addr, r_scratch1	/* addr < SKF_* */
+	PPC_LCMP	r_addr, r_scratch1	/* addr < SKF_* */
 	blt	bpf_error	/* cr0 = LT */
 	.globl	sk_load_word_negative_offset
 sk_load_word_negative_offset:
@@ -189,7 +189,7 @@ sk_load_word_negative_offset:
 
 bpf_slow_path_half_neg:
 	lis     r_scratch1,-32	/* SKF_LL_OFF */
-	cmpd	r_addr, r_scratch1	/* addr < SKF_* */
+	PPC_LCMP	r_addr, r_scratch1	/* addr < SKF_* */
 	blt	bpf_error	/* cr0 = LT */
 	.globl	sk_load_half_negative_offset
 sk_load_half_negative_offset:
@@ -199,7 +199,7 @@ sk_load_half_negative_offset:
 
 bpf_slow_path_byte_neg:
 	lis     r_scratch1,-32	/* SKF_LL_OFF */
-	cmpd	r_addr, r_scratch1	/* addr < SKF_* */
+	PPC_LCMP	r_addr, r_scratch1	/* addr < SKF_* */
 	blt	bpf_error	/* cr0 = LT */
 	.globl	sk_load_byte_negative_offset
 sk_load_byte_negative_offset:
@@ -209,7 +209,7 @@ sk_load_byte_negative_offset:
 
 bpf_slow_path_byte_msh_neg:
 	lis     r_scratch1,-32	/* SKF_LL_OFF */
-	cmpd	r_addr, r_scratch1	/* addr < SKF_* */
+	PPC_LCMP	r_addr, r_scratch1	/* addr < SKF_* */
 	blt	bpf_error	/* cr0 = LT */
 	.globl	sk_load_byte_msh_negative_offset
 sk_load_byte_msh_negative_offset:
@@ -221,7 +221,7 @@ sk_load_byte_msh_negative_offset:
 bpf_error_slow:
 	/* fabricate a cr0 = lt */
 	li	r_scratch1, -1
-	cmpdi	r_scratch1, 0
+	PPC_LCMPI	r_scratch1, 0
 bpf_error:
 	/* Entered with cr0 = lt */
 	li	r3, 0
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index d1916b577f2c..17cea18a09d3 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -1,8 +1,9 @@
-/* bpf_jit_comp.c: BPF JIT compiler for PPC64
+/* bpf_jit_comp.c: BPF JIT compiler
  *
  * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
  *
  * Based on the x86 BPF compiler, by Eric Dumazet (eric.dumazet@gmail.com)
+ * Ported to ppc32 by Denis Kirjanov <kda@linux-powerpc.org>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -36,11 +37,11 @@ static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image,
 		if (ctx->seen & SEEN_DATAREF) {
 			/* If we call any helpers (for loads), save LR */
 			EMIT(PPC_INST_MFLR | __PPC_RT(R0));
-			PPC_STD(0, 1, 16);
+			PPC_BPF_STL(0, 1, PPC_LR_STKOFF);
 
 			/* Back up non-volatile regs. */
-			PPC_STD(r_D, 1, -(8*(32-r_D)));
-			PPC_STD(r_HL, 1, -(8*(32-r_HL)));
+			PPC_BPF_STL(r_D, 1, -(REG_SZ*(32-r_D)));
+			PPC_BPF_STL(r_HL, 1, -(REG_SZ*(32-r_HL)));
 		}
 		if (ctx->seen & SEEN_MEM) {
 			/*
@@ -49,11 +50,10 @@ static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image,
 			 */
 			for (i = r_M; i < (r_M+16); i++) {
 				if (ctx->seen & (1 << (i-r_M)))
-					PPC_STD(i, 1, -(8*(32-i)));
+					PPC_BPF_STL(i, 1, -(REG_SZ*(32-i)));
 			}
 		}
-		EMIT(PPC_INST_STDU | __PPC_RS(R1) | __PPC_RA(R1) |
-		     (-BPF_PPC_STACKFRAME & 0xfffc));
+		PPC_BPF_STLU(1, 1, -BPF_PPC_STACKFRAME);
 	}
 
 	if (ctx->seen & SEEN_DATAREF) {
@@ -67,7 +67,7 @@ static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image,
 							 data_len));
 		PPC_LWZ_OFFS(r_HL, r_skb, offsetof(struct sk_buff, len));
 		PPC_SUB(r_HL, r_HL, r_scratch1);
-		PPC_LD_OFFS(r_D, r_skb, offsetof(struct sk_buff, data));
+		PPC_LL_OFFS(r_D, r_skb, offsetof(struct sk_buff, data));
 	}
 
 	if (ctx->seen & SEEN_XREG) {
@@ -99,16 +99,16 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
 	if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) {
 		PPC_ADDI(1, 1, BPF_PPC_STACKFRAME);
 		if (ctx->seen & SEEN_DATAREF) {
-			PPC_LD(0, 1, 16);
+			PPC_BPF_LL(0, 1, PPC_LR_STKOFF);
 			PPC_MTLR(0);
-			PPC_LD(r_D, 1, -(8*(32-r_D)));
-			PPC_LD(r_HL, 1, -(8*(32-r_HL)));
+			PPC_BPF_LL(r_D, 1, -(REG_SZ*(32-r_D)));
+			PPC_BPF_LL(r_HL, 1, -(REG_SZ*(32-r_HL)));
 		}
 		if (ctx->seen & SEEN_MEM) {
 			/* Restore any saved non-vol registers */
 			for (i = r_M; i < (r_M+16); i++) {
 				if (ctx->seen & (1 << (i-r_M)))
-					PPC_LD(i, 1, -(8*(32-i)));
+					PPC_BPF_LL(i, 1, -(REG_SZ*(32-i)));
 			}
 		}
 	}
@@ -355,7 +355,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 						ifindex) != 4);
 			BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
 						type) != 2);
-			PPC_LD_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
+			PPC_LL_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
 								dev));
 			PPC_CMPDI(r_scratch1, 0);
 			if (ctx->pc_ret0 != -1) {
@@ -411,20 +411,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 			PPC_SRWI(r_A, r_A, 5);
 			break;
 		case BPF_ANC | SKF_AD_CPU:
-#ifdef CONFIG_SMP
-			/*
-			 * PACA ptr is r13:
-			 * raw_smp_processor_id() = local_paca->paca_index
-			 */
-			BUILD_BUG_ON(FIELD_SIZEOF(struct paca_struct,
-						  paca_index) != 2);
-			PPC_LHZ_OFFS(r_A, 13,
-				     offsetof(struct paca_struct, paca_index));
-#else
-			PPC_LI(r_A, 0);
-#endif
+			PPC_BPF_LOAD_CPU(r_A);
 			break;
-
 			/*** Absolute loads from packet header/data ***/
 		case BPF_LD | BPF_W | BPF_ABS:
 			func = CHOOSE_LOAD_FUNC(K, sk_load_word);
@@ -437,7 +425,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 		common_load:
 			/* Load from [K]. */
 			ctx->seen |= SEEN_DATAREF;
-			PPC_LI64(r_scratch1, func);
+			PPC_FUNC_ADDR(r_scratch1, func);
 			PPC_MTLR(r_scratch1);
 			PPC_LI32(r_addr, K);
 			PPC_BLRL();
@@ -463,7 +451,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 			 * in the helper functions.
 			 */
 			ctx->seen |= SEEN_DATAREF | SEEN_XREG;
-			PPC_LI64(r_scratch1, func);
+			PPC_FUNC_ADDR(r_scratch1, func);
 			PPC_MTLR(r_scratch1);
 			PPC_ADDI(r_addr, r_X, IMM_L(K));
 			if (K >= 32768)
@@ -685,9 +673,11 @@ void bpf_jit_compile(struct bpf_prog *fp)
 
 	if (image) {
 		bpf_flush_icache(code_base, code_base + (proglen/4));
+#ifdef CONFIG_PPC64
 		/* Function descriptor nastiness: Address + TOC */
 		((u64 *)image)[0] = (u64)code_base;
 		((u64 *)image)[1] = local_paca->kernel_toc;
+#endif
 		fp->bpf_func = (void *)image;
 		fp->jited = true;
 	}
diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c
index 1c27831df1ac..ed7b0977072a 100644
--- a/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -22,6 +22,7 @@
 #include <linux/kref.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
+#include <linux/file.h>
 #include <linux/module.h>
 #include <linux/notifier.h>
 #include <linux/numa.h>
@@ -322,18 +323,20 @@ get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp,
 	unsigned long app_cookie = 0;
 	unsigned int my_offset = 0;
 	struct vm_area_struct *vma;
+	struct file *exe_file;
 	struct mm_struct *mm = spu->mm;
 
 	if (!mm)
 		goto out;
 
-	down_read(&mm->mmap_sem);
-
-	if (mm->exe_file) {
-		app_cookie = fast_get_dcookie(&mm->exe_file->f_path);
-		pr_debug("got dcookie for %pD\n", mm->exe_file);
+	exe_file = get_mm_exe_file(mm);
+	if (exe_file) {
+		app_cookie = fast_get_dcookie(&exe_file->f_path);
+		pr_debug("got dcookie for %pD\n", exe_file);
+		fput(exe_file);
 	}
 
+	down_read(&mm->mmap_sem);
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
 		if (vma->vm_start > spu_ref || vma->vm_end <= spu_ref)
 			continue;
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index 2396dda282cd..ead55351b254 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -243,7 +243,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry,
 	sp = regs->gpr[1];
 	perf_callchain_store(entry, next_ip);
 
-	for (;;) {
+	while (entry->nr < PERF_MAX_STACK_DEPTH) {
 		fp = (unsigned long __user *) sp;
 		if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
 			return;
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 7c4f6690533a..12b638425bb9 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -124,7 +124,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
 
 static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
 static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
-static void power_pmu_flush_branch_stack(void) {}
+static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {}
 static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
 static void pmao_restore_workaround(bool ebb) { }
 #endif /* CONFIG_PPC32 */
@@ -350,6 +350,7 @@ static void power_pmu_bhrb_enable(struct perf_event *event)
 		cpuhw->bhrb_context = event->ctx;
 	}
 	cpuhw->bhrb_users++;
+	perf_sched_cb_inc(event->ctx->pmu);
 }
 
 static void power_pmu_bhrb_disable(struct perf_event *event)
@@ -361,6 +362,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event)
 
 	cpuhw->bhrb_users--;
 	WARN_ON_ONCE(cpuhw->bhrb_users < 0);
+	perf_sched_cb_dec(event->ctx->pmu);
 
 	if (!cpuhw->disabled && !cpuhw->bhrb_users) {
 		/* BHRB cannot be turned off when other
@@ -375,9 +377,12 @@ static void power_pmu_bhrb_disable(struct perf_event *event)
 /* Called from ctxsw to prevent one process's branch entries to
  * mingle with the other process's entries during context switch.
  */
-static void power_pmu_flush_branch_stack(void)
+static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
 {
-	if (ppmu->bhrb_nr)
+	if (!ppmu->bhrb_nr)
+		return;
+
+	if (sched_in)
 		power_pmu_bhrb_reset();
 }
 /* Calculate the to address for a branch */
@@ -1832,8 +1837,10 @@ static int power_pmu_event_init(struct perf_event *event)
 		cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
 					event->attr.branch_sample_type);
 
-		if(cpuhw->bhrb_filter == -1)
+		if (cpuhw->bhrb_filter == -1) {
+			put_cpu_var(cpu_hw_events);
 			return -EOPNOTSUPP;
+		}
 	}
 
 	put_cpu_var(cpu_hw_events);
@@ -1901,7 +1908,7 @@ static struct pmu power_pmu = {
 	.cancel_txn	= power_pmu_cancel_txn,
 	.commit_txn	= power_pmu_commit_txn,
 	.event_idx	= power_pmu_event_idx,
-	.flush_branch_stack = power_pmu_flush_branch_stack,
+	.sched_task	= power_pmu_sched_task,
 };
 
 /*
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 9445a824819e..ec2eb20631d1 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -142,6 +142,15 @@ static struct attribute_group event_long_desc_group = {
 
 static struct kmem_cache *hv_page_cache;
 
+/*
+ * request_buffer and result_buffer are not required to be 4k aligned,
+ * but are not allowed to cross any 4k boundary. Aligning them to 4k is
+ * the simplest way to ensure that.
+ */
+#define H24x7_DATA_BUFFER_SIZE	4096
+DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
+DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
+
 static char *event_name(struct hv_24x7_event_data *ev, int *len)
 {
 	*len = be16_to_cpu(ev->event_name_len) - 2;
@@ -152,6 +161,7 @@ static char *event_desc(struct hv_24x7_event_data *ev, int *len)
 {
 	unsigned nl = be16_to_cpu(ev->event_name_len);
 	__be16 *desc_len = (__be16 *)(ev->remainder + nl - 2);
+
 	*len = be16_to_cpu(*desc_len) - 2;
 	return (char *)ev->remainder + nl;
 }
@@ -162,6 +172,7 @@ static char *event_long_desc(struct hv_24x7_event_data *ev, int *len)
 	__be16 *desc_len_ = (__be16 *)(ev->remainder + nl - 2);
 	unsigned desc_len = be16_to_cpu(*desc_len_);
 	__be16 *long_desc_len = (__be16 *)(ev->remainder + nl + desc_len - 2);
+
 	*len = be16_to_cpu(*long_desc_len) - 2;
 	return (char *)ev->remainder + nl + desc_len;
 }
@@ -239,14 +250,12 @@ static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096,
 					      unsigned long index)
 {
 	pr_devel("h_get_24x7_catalog_page(0x%lx, %lu, %lu)",
-			phys_4096,
-			version,
-			index);
+			phys_4096, version, index);
+
 	WARN_ON(!IS_ALIGNED(phys_4096, 4096));
+
 	return plpar_hcall_norets(H_GET_24X7_CATALOG_PAGE,
-			phys_4096,
-			version,
-			index);
+			phys_4096, version, index);
 }
 
 static unsigned long h_get_24x7_catalog_page(char page[],
@@ -300,6 +309,7 @@ static ssize_t device_show_string(struct device *dev,
 	struct dev_ext_attribute *d;
 
 	d = container_of(attr, struct dev_ext_attribute, attr);
+
 	return sprintf(buf, "%s\n", (char *)d->var);
 }
 
@@ -314,6 +324,7 @@ static struct attribute *device_str_attr_create_(char *name, char *str)
 	attr->attr.attr.name = name;
 	attr->attr.attr.mode = 0444;
 	attr->attr.show = device_show_string;
+
 	return &attr->attr.attr;
 }
 
@@ -387,7 +398,6 @@ static struct attribute *event_to_attr(unsigned ix,
 		a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s__%d",
 				(int)event_name_len, ev_name, ev_suffix, nonce);
 
-
 	if (!a_ev_name)
 		goto out_val;
 
@@ -637,7 +647,7 @@ static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event,
 
 #define MAX_4K (SIZE_MAX / 4096)
 
-static void create_events_from_catalog(struct attribute ***events_,
+static int create_events_from_catalog(struct attribute ***events_,
 		struct attribute ***event_descs_,
 		struct attribute ***event_long_descs_)
 {
@@ -655,19 +665,25 @@ static void create_events_from_catalog(struct attribute ***events_,
 	void *event_data, *end;
 	struct hv_24x7_event_data *event;
 	struct rb_root ev_uniq = RB_ROOT;
+	int ret = 0;
 
-	if (!page)
+	if (!page) {
+		ret = -ENOMEM;
 		goto e_out;
+	}
 
 	hret = h_get_24x7_catalog_page(page, 0, 0);
-	if (hret)
+	if (hret) {
+		ret = -EIO;
 		goto e_free;
+	}
 
 	catalog_version_num = be64_to_cpu(page_0->version);
 	catalog_page_len = be32_to_cpu(page_0->length);
 
 	if (MAX_4K < catalog_page_len) {
 		pr_err("invalid page count: %zu\n", catalog_page_len);
+		ret = -EIO;
 		goto e_free;
 	}
 
@@ -686,6 +702,7 @@ static void create_events_from_catalog(struct attribute ***events_,
 			|| (MAX_4K - event_data_offs < event_data_len)) {
 		pr_err("invalid event data offs %zu and/or len %zu\n",
 				event_data_offs, event_data_len);
+		ret = -EIO;
 		goto e_free;
 	}
 
@@ -694,12 +711,14 @@ static void create_events_from_catalog(struct attribute ***events_,
 				event_data_offs,
 				event_data_offs + event_data_len,
 				catalog_page_len);
+		ret = -EIO;
 		goto e_free;
 	}
 
 	if (SIZE_MAX / MAX_EVENTS_PER_EVENT_DATA - 1 < event_entry_count) {
 		pr_err("event_entry_count %zu is invalid\n",
 				event_entry_count);
+		ret = -EIO;
 		goto e_free;
 	}
 
@@ -712,6 +731,7 @@ static void create_events_from_catalog(struct attribute ***events_,
 	event_data = vmalloc(event_data_bytes);
 	if (!event_data) {
 		pr_err("could not allocate event data\n");
+		ret = -ENOMEM;
 		goto e_free;
 	}
 
@@ -731,6 +751,7 @@ static void create_events_from_catalog(struct attribute ***events_,
 		if (hret) {
 			pr_err("failed to get event data in page %zu\n",
 					i + event_data_offs);
+			ret = -EIO;
 			goto e_event_data;
 		}
 	}
@@ -778,18 +799,24 @@ static void create_events_from_catalog(struct attribute ***events_,
 				event_idx_last, event_entry_count, junk_events);
 
 	events = kmalloc_array(attr_max + 1, sizeof(*events), GFP_KERNEL);
-	if (!events)
+	if (!events) {
+		ret = -ENOMEM;
 		goto e_event_data;
+	}
 
 	event_descs = kmalloc_array(event_idx + 1, sizeof(*event_descs),
 				GFP_KERNEL);
-	if (!event_descs)
+	if (!event_descs) {
+		ret = -ENOMEM;
 		goto e_event_attrs;
+	}
 
 	event_long_descs = kmalloc_array(event_idx + 1,
 			sizeof(*event_long_descs), GFP_KERNEL);
-	if (!event_long_descs)
+	if (!event_long_descs) {
+		ret = -ENOMEM;
 		goto e_event_descs;
+	}
 
 	/* Iterate over the catalog filling in the attribute vector */
 	for (junk_events = 0, event_attr_ct = 0, desc_ct = 0, long_desc_ct = 0,
@@ -843,7 +870,7 @@ static void create_events_from_catalog(struct attribute ***events_,
 	*events_ = events;
 	*event_descs_ = event_descs;
 	*event_long_descs_ = event_long_descs;
-	return;
+	return 0;
 
 e_event_descs:
 	kfree(event_descs);
@@ -857,6 +884,7 @@ e_out:
 	*events_ = NULL;
 	*event_descs_ = NULL;
 	*event_long_descs_ = NULL;
+	return ret;
 }
 
 static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
@@ -872,6 +900,7 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
 	uint64_t catalog_version_num = 0;
 	void *page = kmem_cache_alloc(hv_page_cache, GFP_USER);
 	struct hv_24x7_catalog_page_0 *page_0 = page;
+
 	if (!page)
 		return -ENOMEM;
 
@@ -976,31 +1005,104 @@ static const struct attribute_group *attr_groups[] = {
 	NULL,
 };
 
-DEFINE_PER_CPU(char, hv_24x7_reqb[4096]) __aligned(4096);
-DEFINE_PER_CPU(char, hv_24x7_resb[4096]) __aligned(4096);
+static void log_24x7_hcall(struct hv_24x7_request_buffer *request_buffer,
+			struct hv_24x7_data_result_buffer *result_buffer,
+			unsigned long ret)
+{
+	struct hv_24x7_request *req;
+
+	req = &request_buffer->requests[0];
+	pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => "
+			"ret 0x%lx (%ld) detail=0x%x failing ix=%x\n",
+			req->performance_domain, req->data_offset,
+			req->starting_ix, req->starting_lpar_ix, ret, ret,
+			result_buffer->detailed_rc,
+			result_buffer->failing_request_ix);
+}
+
+/*
+ * Start the process for a new H_GET_24x7_DATA hcall.
+ */
+static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer,
+			struct hv_24x7_data_result_buffer *result_buffer)
+{
+
+	memset(request_buffer, 0, 4096);
+	memset(result_buffer, 0, 4096);
+
+	request_buffer->interface_version = HV_24X7_IF_VERSION_CURRENT;
+	/* memset above set request_buffer->num_requests to 0 */
+}
 
-static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
-					 u16 lpar, u64 *res,
-					 bool success_expected)
+/*
+ * Commit (i.e perform) the H_GET_24x7_DATA hcall using the data collected
+ * by 'init_24x7_request()' and 'add_event_to_24x7_request()'.
+ */
+static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer,
+			struct hv_24x7_data_result_buffer *result_buffer)
 {
 	unsigned long ret;
 
 	/*
-	 * request_buffer and result_buffer are not required to be 4k aligned,
-	 * but are not allowed to cross any 4k boundary. Aligning them to 4k is
-	 * the simplest way to ensure that.
+	 * NOTE: Due to variable number of array elements in request and
+	 *	 result buffer(s), sizeof() is not reliable. Use the actual
+	 *	 allocated buffer size, H24x7_DATA_BUFFER_SIZE.
 	 */
-	struct reqb {
-		struct hv_24x7_request_buffer buf;
-		struct hv_24x7_request req;
-	} __packed *request_buffer;
-
-	struct {
-		struct hv_24x7_data_result_buffer buf;
-		struct hv_24x7_result res;
-		struct hv_24x7_result_element elem;
-		__be64 result;
-	} __packed *result_buffer;
+	ret = plpar_hcall_norets(H_GET_24X7_DATA,
+			virt_to_phys(request_buffer), H24x7_DATA_BUFFER_SIZE,
+			virt_to_phys(result_buffer),  H24x7_DATA_BUFFER_SIZE);
+
+	if (ret)
+		log_24x7_hcall(request_buffer, result_buffer, ret);
+
+	return ret;
+}
+
+/*
+ * Add the given @event to the next slot in the 24x7 request_buffer.
+ *
+ * Note that H_GET_24X7_DATA hcall allows reading several counters'
+ * values in a single HCALL. We expect the caller to add events to the
+ * request buffer one by one, make the HCALL and process the results.
+ */
+static int add_event_to_24x7_request(struct perf_event *event,
+				struct hv_24x7_request_buffer *request_buffer)
+{
+	u16 idx;
+	int i;
+	struct hv_24x7_request *req;
+
+	if (request_buffer->num_requests > 254) {
+		pr_devel("Too many requests for 24x7 HCALL %d\n",
+				request_buffer->num_requests);
+		return -EINVAL;
+	}
+
+	if (is_physical_domain(event_get_domain(event)))
+		idx = event_get_core(event);
+	else
+		idx = event_get_vcpu(event);
+
+	i = request_buffer->num_requests++;
+	req = &request_buffer->requests[i];
+
+	req->performance_domain = event_get_domain(event);
+	req->data_size = cpu_to_be16(8);
+	req->data_offset = cpu_to_be32(event_get_offset(event));
+	req->starting_lpar_ix = cpu_to_be16(event_get_lpar(event)),
+	req->max_num_lpars = cpu_to_be16(1);
+	req->starting_ix = cpu_to_be16(idx);
+	req->max_ix = cpu_to_be16(1);
+
+	return 0;
+}
+
+static unsigned long single_24x7_request(struct perf_event *event, u64 *count)
+{
+	unsigned long ret;
+	struct hv_24x7_request_buffer *request_buffer;
+	struct hv_24x7_data_result_buffer *result_buffer;
+	struct hv_24x7_result *resb;
 
 	BUILD_BUG_ON(sizeof(*request_buffer) > 4096);
 	BUILD_BUG_ON(sizeof(*result_buffer) > 4096);
@@ -1008,63 +1110,28 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
 	request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
 	result_buffer = (void *)get_cpu_var(hv_24x7_resb);
 
-	memset(request_buffer, 0, 4096);
-	memset(result_buffer, 0, 4096);
-
-	*request_buffer = (struct reqb) {
-		.buf = {
-			.interface_version = HV_24X7_IF_VERSION_CURRENT,
-			.num_requests = 1,
-		},
-		.req = {
-			.performance_domain = domain,
-			.data_size = cpu_to_be16(8),
-			.data_offset = cpu_to_be32(offset),
-			.starting_lpar_ix = cpu_to_be16(lpar),
-			.max_num_lpars = cpu_to_be16(1),
-			.starting_ix = cpu_to_be16(ix),
-			.max_ix = cpu_to_be16(1),
-		}
-	};
+	init_24x7_request(request_buffer, result_buffer);
 
-	ret = plpar_hcall_norets(H_GET_24X7_DATA,
-			virt_to_phys(request_buffer), sizeof(*request_buffer),
-			virt_to_phys(result_buffer),  sizeof(*result_buffer));
+	ret = add_event_to_24x7_request(event, request_buffer);
+	if (ret)
+		goto out;
 
+	ret = make_24x7_request(request_buffer, result_buffer);
 	if (ret) {
-		if (success_expected)
-			pr_err_ratelimited("hcall failed: %d %#x %#x %d => "
-				"0x%lx (%ld) detail=0x%x failing ix=%x\n",
-				domain, offset, ix, lpar, ret, ret,
-				result_buffer->buf.detailed_rc,
-				result_buffer->buf.failing_request_ix);
+		log_24x7_hcall(request_buffer, result_buffer, ret);
 		goto out;
 	}
 
-	*res = be64_to_cpu(result_buffer->result);
+	/* process result from hcall */
+	resb = &result_buffer->results[0];
+	*count = be64_to_cpu(resb->elements[0].element_data[0]);
 
 out:
+	put_cpu_var(hv_24x7_reqb);
+	put_cpu_var(hv_24x7_resb);
 	return ret;
 }
 
-static unsigned long event_24x7_request(struct perf_event *event, u64 *res,
-		bool success_expected)
-{
-	u16 idx;
-	unsigned domain = event_get_domain(event);
-
-	if (is_physical_domain(domain))
-		idx = event_get_core(event);
-	else
-		idx = event_get_vcpu(event);
-
-	return single_24x7_request(event_get_domain(event),
-				event_get_offset(event),
-				idx,
-				event_get_lpar(event),
-				res,
-				success_expected);
-}
 
 static int h_24x7_event_init(struct perf_event *event)
 {
@@ -1126,14 +1193,14 @@ static int h_24x7_event_init(struct perf_event *event)
 	/* Physical domains & other lpars require extra capabilities */
 	if (!caps.collect_privileged && (is_physical_domain(domain) ||
 		(event_get_lpar(event) != event_get_lpar_max()))) {
-		pr_devel("hv permisions disallow: is_physical_domain:%d, lpar=0x%llx\n",
+		pr_devel("hv permissions disallow: is_physical_domain:%d, lpar=0x%llx\n",
 				is_physical_domain(domain),
 				event_get_lpar(event));
 		return -EACCES;
 	}
 
 	/* see if the event complains */
-	if (event_24x7_request(event, &ct, false)) {
+	if (single_24x7_request(event, &ct)) {
 		pr_devel("test hcall failed\n");
 		return -EIO;
 	}
@@ -1145,7 +1212,7 @@ static u64 h_24x7_get_value(struct perf_event *event)
 {
 	unsigned long ret;
 	u64 ct;
-	ret = event_24x7_request(event, &ct, true);
+	ret = single_24x7_request(event, &ct);
 	if (ret)
 		/* We checked this in event init, shouldn't fail here... */
 		return 0;
@@ -1153,15 +1220,22 @@ static u64 h_24x7_get_value(struct perf_event *event)
 	return ct;
 }
 
-static void h_24x7_event_update(struct perf_event *event)
+static void update_event_count(struct perf_event *event, u64 now)
 {
 	s64 prev;
-	u64 now;
-	now = h_24x7_get_value(event);
+
 	prev = local64_xchg(&event->hw.prev_count, now);
 	local64_add(now - prev, &event->count);
 }
 
+static void h_24x7_event_read(struct perf_event *event)
+{
+	u64 now;
+
+	now = h_24x7_get_value(event);
+	update_event_count(event, now);
+}
+
 static void h_24x7_event_start(struct perf_event *event, int flags)
 {
 	if (flags & PERF_EF_RELOAD)
@@ -1170,7 +1244,7 @@ static void h_24x7_event_start(struct perf_event *event, int flags)
 
 static void h_24x7_event_stop(struct perf_event *event, int flags)
 {
-	h_24x7_event_update(event);
+	h_24x7_event_read(event);
 }
 
 static int h_24x7_event_add(struct perf_event *event, int flags)
@@ -1191,7 +1265,7 @@ static struct pmu h_24x7_pmu = {
 	.del         = h_24x7_event_stop,
 	.start       = h_24x7_event_start,
 	.stop        = h_24x7_event_stop,
-	.read        = h_24x7_event_update,
+	.read        = h_24x7_event_read,
 };
 
 static int hv_24x7_init(void)
@@ -1219,10 +1293,13 @@ static int hv_24x7_init(void)
 	/* sampling not supported */
 	h_24x7_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
 
-	create_events_from_catalog(&event_group.attrs,
+	r = create_events_from_catalog(&event_group.attrs,
 				   &event_desc_group.attrs,
 				   &event_long_desc_group.attrs);
 
+	if (r)
+		return r;
+
 	r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1);
 	if (r)
 		return r;
diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h
index 69cd4e690f58..0f9fa21a29f2 100644
--- a/arch/powerpc/perf/hv-24x7.h
+++ b/arch/powerpc/perf/hv-24x7.h
@@ -50,7 +50,7 @@ struct hv_24x7_request_buffer {
 	__u8 interface_version;
 	__u8 num_requests;
 	__u8 reserved[0xE];
-	struct hv_24x7_request requests[];
+	struct hv_24x7_request requests[1];
 } __packed;
 
 struct hv_24x7_result_element {
@@ -66,7 +66,7 @@ struct hv_24x7_result_element {
 	__be32 lpar_cfg_instance_id;
 
 	/* size = @result_element_data_size of cointaining result. */
-	__u8 element_data[];
+	__u64 element_data[1];
 } __packed;
 
 struct hv_24x7_result {
@@ -87,7 +87,7 @@ struct hv_24x7_result {
 	/* WARNING: only valid for first result element due to variable sizes
 	 *          of result elements */
 	/* struct hv_24x7_result_element[@num_elements_returned] */
-	struct hv_24x7_result_element elements[];
+	struct hv_24x7_result_element elements[1];
 } __packed;
 
 struct hv_24x7_data_result_buffer {
@@ -103,7 +103,7 @@ struct hv_24x7_data_result_buffer {
 	__u8 reserved2[0x8];
 	/* WARNING: only valid for the first result due to variable sizes of
 	 *	    results */
-	struct hv_24x7_result results[]; /* [@num_results] */
+	struct hv_24x7_result results[1]; /* [@num_results] */
 } __packed;
 
 #endif
diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c
index 4a9ad871a168..7bfb9b184dd4 100644
--- a/arch/powerpc/platforms/85xx/common.c
+++ b/arch/powerpc/platforms/85xx/common.c
@@ -40,6 +40,7 @@ static const struct of_device_id mpc85xx_common_ids[] __initconst = {
 	{ .compatible = "fsl,qoriq-pcie-v2.4", },
 	{ .compatible = "fsl,qoriq-pcie-v2.3", },
 	{ .compatible = "fsl,qoriq-pcie-v2.2", },
+	{ .compatible = "fsl,fman", },
 	{},
 };
 
diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
index 1f309ccb096e..9824d2cf79bd 100644
--- a/arch/powerpc/platforms/85xx/corenet_generic.c
+++ b/arch/powerpc/platforms/85xx/corenet_generic.c
@@ -88,6 +88,15 @@ static const struct of_device_id of_device_ids[] = {
 		.compatible	= "simple-bus"
 	},
 	{
+		.compatible	= "mdio-mux-gpio"
+	},
+	{
+		.compatible	= "fsl,fpga-ngpixis"
+	},
+	{
+		.compatible	= "fsl,fpga-qixis"
+	},
+	{
 		.compatible	= "fsl,srio",
 	},
 	{
@@ -108,6 +117,9 @@ static const struct of_device_id of_device_ids[] = {
 	{
 		.compatible	= "fsl,qe",
 	},
+	{
+		.compatible    = "fsl,fman",
+	},
 	/* The following two are for the Freescale hypervisor */
 	{
 		.name		= "hypervisor",
diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c
index 7a180f0308d5..680232d6ba48 100644
--- a/arch/powerpc/platforms/85xx/p1022_rdk.c
+++ b/arch/powerpc/platforms/85xx/p1022_rdk.c
@@ -50,14 +50,14 @@ void p1022rdk_set_pixel_clock(unsigned int pixclock)
 	/* Map the global utilities registers. */
 	guts_np = of_find_compatible_node(NULL, NULL, "fsl,p1022-guts");
 	if (!guts_np) {
-		pr_err("p1022rdk: missing global utilties device node\n");
+		pr_err("p1022rdk: missing global utilities device node\n");
 		return;
 	}
 
 	guts = of_iomap(guts_np, 0);
 	of_node_put(guts_np);
 	if (!guts) {
-		pr_err("p1022rdk: could not map global utilties device\n");
+		pr_err("p1022rdk: could not map global utilities device\n");
 		return;
 	}
 
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index d7c1e69f3070..8631ac5f0e57 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -360,10 +360,10 @@ static void mpc85xx_smp_kexec_down(void *arg)
 static void map_and_flush(unsigned long paddr)
 {
 	struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
-	unsigned long kaddr  = (unsigned long)kmap(page);
+	unsigned long kaddr  = (unsigned long)kmap_atomic(page);
 
 	flush_dcache_range(kaddr, kaddr + PAGE_SIZE);
-	kunmap(page);
+	kunmap_atomic((void *)kaddr);
 }
 
 /**
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 391b3f6b54a3..b7f9c408bf24 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -72,11 +72,6 @@ config PPC_SMP_MUXED_IPI
 	  cpu.	This will enable the generic code to multiplex the 4
 	  messages on to one ipi.
 
-config PPC_UDBG_BEAT
-	bool "BEAT based debug console"
-	depends on PPC_CELLEB
-	default n
-
 config IPIC
 	bool
 	default n
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 76483e3acd60..7264e91190be 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -2,6 +2,7 @@ config PPC64
 	bool "64-bit kernel"
 	default n
 	select HAVE_VIRT_CPU_ACCOUNTING
+	select ZLIB_DEFLATE
 	help
 	  This option selects whether a 32-bit or a 64-bit kernel
 	  will be built.
@@ -15,7 +16,7 @@ choice
 	  The most common ones are the desktop and server CPUs (601, 603,
 	  604, 740, 750, 74xx) CPUs from Freescale and IBM, with their
 	  embedded 512x/52xx/82xx/83xx/86xx counterparts.
-	  The other embeeded parts, namely 4xx, 8xx, e200 (55xx) and e500
+	  The other embedded parts, namely 4xx, 8xx, e200 (55xx) and e500
 	  (85xx) each form a family of their own that is not compatible
 	  with the others.
 
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index 870b6dbd4d18..2f23133ab3d1 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -33,17 +33,6 @@ config PPC_IBM_CELL_BLADE
 	select PPC_UDBG_16550
 	select UDBG_RTAS_CONSOLE
 
-config PPC_CELLEB
-	bool "Toshiba's Cell Reference Set 'Celleb' Architecture"
-	depends on PPC64 && PPC_BOOK3S
-	select PPC_CELL_NATIVE
-	select PPC_OF_PLATFORM_PCI
-	select PCI
-	select HAS_TXX9_SERIAL
-	select PPC_UDBG_BEAT
-	select USB_OHCI_BIG_ENDIAN_MMIO
-	select USB_EHCI_BIG_ENDIAN_MMIO
-
 config PPC_CELL_QPACE
 	bool "IBM Cell - QPACE"
 	depends on PPC64 && PPC_BOOK3S
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
index 2d16884f67b9..34699bddfddd 100644
--- a/arch/powerpc/platforms/cell/Makefile
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -29,18 +29,3 @@ obj-$(CONFIG_AXON_MSI)			+= axon_msi.o
 
 # qpace setup
 obj-$(CONFIG_PPC_CELL_QPACE)		+= qpace_setup.o
-
-# celleb stuff
-ifeq ($(CONFIG_PPC_CELLEB),y)
-obj-y					+= celleb_setup.o \
-					   celleb_pci.o celleb_scc_epci.o \
-					   celleb_scc_pciex.o \
-					   celleb_scc_uhc.o \
-					   spider-pci.o beat.o beat_htab.o \
-					   beat_hvCall.o beat_interrupt.o \
-					   beat_iommu.o
-
-obj-$(CONFIG_PPC_UDBG_BEAT)		+= beat_udbg.o
-obj-$(CONFIG_SERIAL_TXX9)		+= celleb_scc_sio.o
-obj-$(CONFIG_SPU_BASE)			+= beat_spu_priv1.o
-endif
diff --git a/arch/powerpc/platforms/cell/beat.c b/arch/powerpc/platforms/cell/beat.c
deleted file mode 100644
index affcf566d460..000000000000
--- a/arch/powerpc/platforms/cell/beat.c
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Simple routines for Celleb/Beat
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/export.h>
-#include <linux/init.h>
-#include <linux/err.h>
-#include <linux/rtc.h>
-#include <linux/interrupt.h>
-#include <linux/irqreturn.h>
-#include <linux/reboot.h>
-
-#include <asm/hvconsole.h>
-#include <asm/time.h>
-#include <asm/machdep.h>
-#include <asm/firmware.h>
-
-#include "beat_wrapper.h"
-#include "beat.h"
-#include "beat_interrupt.h"
-
-static int beat_pm_poweroff_flag;
-
-void beat_restart(char *cmd)
-{
-	beat_shutdown_logical_partition(!beat_pm_poweroff_flag);
-}
-
-void beat_power_off(void)
-{
-	beat_shutdown_logical_partition(0);
-}
-
-u64 beat_halt_code = 0x1000000000000000UL;
-EXPORT_SYMBOL(beat_halt_code);
-
-void beat_halt(void)
-{
-	beat_shutdown_logical_partition(beat_halt_code);
-}
-
-int beat_set_rtc_time(struct rtc_time *rtc_time)
-{
-	u64 tim;
-	tim = mktime(rtc_time->tm_year+1900,
-		     rtc_time->tm_mon+1, rtc_time->tm_mday,
-		     rtc_time->tm_hour, rtc_time->tm_min, rtc_time->tm_sec);
-	if (beat_rtc_write(tim))
-		return -1;
-	return 0;
-}
-
-void beat_get_rtc_time(struct rtc_time *rtc_time)
-{
-	u64 tim;
-
-	if (beat_rtc_read(&tim))
-		tim = 0;
-	to_tm(tim, rtc_time);
-	rtc_time->tm_year -= 1900;
-	rtc_time->tm_mon -= 1;
-}
-
-#define	BEAT_NVRAM_SIZE	4096
-
-ssize_t beat_nvram_read(char *buf, size_t count, loff_t *index)
-{
-	unsigned int i;
-	unsigned long len;
-	char *p = buf;
-
-	if (*index >= BEAT_NVRAM_SIZE)
-		return -ENODEV;
-	i = *index;
-	if (i + count > BEAT_NVRAM_SIZE)
-		count = BEAT_NVRAM_SIZE - i;
-
-	for (; count != 0; count -= len) {
-		len = count;
-		if (len > BEAT_NVRW_CNT)
-			len = BEAT_NVRW_CNT;
-		if (beat_eeprom_read(i, len, p))
-			return -EIO;
-
-		p += len;
-		i += len;
-	}
-	*index = i;
-	return p - buf;
-}
-
-ssize_t beat_nvram_write(char *buf, size_t count, loff_t *index)
-{
-	unsigned int i;
-	unsigned long len;
-	char *p = buf;
-
-	if (*index >= BEAT_NVRAM_SIZE)
-		return -ENODEV;
-	i = *index;
-	if (i + count > BEAT_NVRAM_SIZE)
-		count = BEAT_NVRAM_SIZE - i;
-
-	for (; count != 0; count -= len) {
-		len = count;
-		if (len > BEAT_NVRW_CNT)
-			len = BEAT_NVRW_CNT;
-		if (beat_eeprom_write(i, len, p))
-			return -EIO;
-
-		p += len;
-		i += len;
-	}
-	*index = i;
-	return p - buf;
-}
-
-ssize_t beat_nvram_get_size(void)
-{
-	return BEAT_NVRAM_SIZE;
-}
-
-int beat_set_xdabr(unsigned long dabr, unsigned long dabrx)
-{
-	if (beat_set_dabr(dabr, dabrx))
-		return -1;
-	return 0;
-}
-
-int64_t beat_get_term_char(u64 vterm, u64 *len, u64 *t1, u64 *t2)
-{
-	u64 db[2];
-	s64 ret;
-
-	ret = beat_get_characters_from_console(vterm, len, (u8 *)db);
-	if (ret == 0) {
-		*t1 = db[0];
-		*t2 = db[1];
-	}
-	return ret;
-}
-EXPORT_SYMBOL(beat_get_term_char);
-
-int64_t beat_put_term_char(u64 vterm, u64 len, u64 t1, u64 t2)
-{
-	u64 db[2];
-
-	db[0] = t1;
-	db[1] = t2;
-	return beat_put_characters_to_console(vterm, len, (u8 *)db);
-}
-EXPORT_SYMBOL(beat_put_term_char);
-
-void beat_power_save(void)
-{
-	beat_pause(0);
-}
-
-#ifdef CONFIG_KEXEC
-void beat_kexec_cpu_down(int crash, int secondary)
-{
-	beatic_deinit_IRQ();
-}
-#endif
-
-static irqreturn_t beat_power_event(int virq, void *arg)
-{
-	printk(KERN_DEBUG "Beat: power button pressed\n");
-	beat_pm_poweroff_flag = 1;
-	ctrl_alt_del();
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t beat_reset_event(int virq, void *arg)
-{
-	printk(KERN_DEBUG "Beat: reset button pressed\n");
-	beat_pm_poweroff_flag = 0;
-	ctrl_alt_del();
-	return IRQ_HANDLED;
-}
-
-static struct beat_event_list {
-	const char *typecode;
-	irq_handler_t handler;
-	unsigned int virq;
-} beat_event_list[] = {
-	{ "power", beat_power_event, 0 },
-	{ "reset", beat_reset_event, 0 },
-};
-
-static int __init beat_register_event(void)
-{
-	u64 path[4], data[2];
-	int rc, i;
-	unsigned int virq;
-
-	for (i = 0; i < ARRAY_SIZE(beat_event_list); i++) {
-		struct beat_event_list *ev = &beat_event_list[i];
-
-		if (beat_construct_event_receive_port(data) != 0) {
-			printk(KERN_ERR "Beat: "
-			       "cannot construct event receive port for %s\n",
-			       ev->typecode);
-			return -EINVAL;
-		}
-
-		virq = irq_create_mapping(NULL, data[0]);
-		if (virq == NO_IRQ) {
-			printk(KERN_ERR "Beat: failed to get virtual IRQ"
-			       " for event receive port for %s\n",
-			       ev->typecode);
-			beat_destruct_event_receive_port(data[0]);
-			return -EIO;
-		}
-		ev->virq = virq;
-
-		rc = request_irq(virq, ev->handler, 0,
-				      ev->typecode, NULL);
-		if (rc != 0) {
-			printk(KERN_ERR "Beat: failed to request virtual IRQ"
-			       " for event receive port for %s\n",
-			       ev->typecode);
-			beat_destruct_event_receive_port(data[0]);
-			return rc;
-		}
-
-		path[0] = 0x1000000065780000ul;	/* 1,ex */
-		path[1] = 0x627574746f6e0000ul;	/* button */
-		path[2] = 0;
-		strncpy((char *)&path[2], ev->typecode, 8);
-		path[3] = 0;
-		data[1] = 0;
-
-		beat_create_repository_node(path, data);
-	}
-	return 0;
-}
-
-static int __init beat_event_init(void)
-{
-	if (!firmware_has_feature(FW_FEATURE_BEAT))
-		return -EINVAL;
-
-	beat_pm_poweroff_flag = 0;
-	return beat_register_event();
-}
-
-device_initcall(beat_event_init);
diff --git a/arch/powerpc/platforms/cell/beat.h b/arch/powerpc/platforms/cell/beat.h
deleted file mode 100644
index bfcb8e351ae5..000000000000
--- a/arch/powerpc/platforms/cell/beat.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Guest OS Interfaces.
- *
- * (C) Copyright 2006 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef _CELLEB_BEAT_H
-#define _CELLEB_BEAT_H
-
-int64_t beat_get_term_char(uint64_t, uint64_t *, uint64_t *, uint64_t *);
-int64_t beat_put_term_char(uint64_t, uint64_t, uint64_t, uint64_t);
-int64_t beat_repository_encode(int, const char *, uint64_t[4]);
-void beat_restart(char *);
-void beat_power_off(void);
-void beat_halt(void);
-int beat_set_rtc_time(struct rtc_time *);
-void beat_get_rtc_time(struct rtc_time *);
-ssize_t beat_nvram_get_size(void);
-ssize_t beat_nvram_read(char *, size_t, loff_t *);
-ssize_t beat_nvram_write(char *, size_t, loff_t *);
-int beat_set_xdabr(unsigned long, unsigned long);
-void beat_power_save(void);
-void beat_kexec_cpu_down(int, int);
-
-#endif /* _CELLEB_BEAT_H */
diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
deleted file mode 100644
index bee9232fe619..000000000000
--- a/arch/powerpc/platforms/cell/beat_htab.c
+++ /dev/null
@@ -1,445 +0,0 @@
-/*
- * "Cell Reference Set" HTAB support.
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This code is based on arch/powerpc/platforms/pseries/lpar.c:
- *  Copyright (C) 2001 Todd Inglett, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#undef DEBUG_LOW
-
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-
-#include <asm/mmu.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/machdep.h>
-#include <asm/udbg.h>
-
-#include "beat_wrapper.h"
-
-#ifdef DEBUG_LOW
-#define DBG_LOW(fmt...) do { udbg_printf(fmt); } while (0)
-#else
-#define DBG_LOW(fmt...) do { } while (0)
-#endif
-
-static DEFINE_RAW_SPINLOCK(beat_htab_lock);
-
-static inline unsigned int beat_read_mask(unsigned hpte_group)
-{
-	unsigned long rmask = 0;
-	u64 hpte_v[5];
-
-	beat_read_htab_entries(0, hpte_group + 0, hpte_v);
-	if (!(hpte_v[0] & HPTE_V_BOLTED))
-		rmask |= 0x8000;
-	if (!(hpte_v[1] & HPTE_V_BOLTED))
-		rmask |= 0x4000;
-	if (!(hpte_v[2] & HPTE_V_BOLTED))
-		rmask |= 0x2000;
-	if (!(hpte_v[3] & HPTE_V_BOLTED))
-		rmask |= 0x1000;
-	beat_read_htab_entries(0, hpte_group + 4, hpte_v);
-	if (!(hpte_v[0] & HPTE_V_BOLTED))
-		rmask |= 0x0800;
-	if (!(hpte_v[1] & HPTE_V_BOLTED))
-		rmask |= 0x0400;
-	if (!(hpte_v[2] & HPTE_V_BOLTED))
-		rmask |= 0x0200;
-	if (!(hpte_v[3] & HPTE_V_BOLTED))
-		rmask |= 0x0100;
-	hpte_group = ~hpte_group & (htab_hash_mask * HPTES_PER_GROUP);
-	beat_read_htab_entries(0, hpte_group + 0, hpte_v);
-	if (!(hpte_v[0] & HPTE_V_BOLTED))
-		rmask |= 0x80;
-	if (!(hpte_v[1] & HPTE_V_BOLTED))
-		rmask |= 0x40;
-	if (!(hpte_v[2] & HPTE_V_BOLTED))
-		rmask |= 0x20;
-	if (!(hpte_v[3] & HPTE_V_BOLTED))
-		rmask |= 0x10;
-	beat_read_htab_entries(0, hpte_group + 4, hpte_v);
-	if (!(hpte_v[0] & HPTE_V_BOLTED))
-		rmask |= 0x08;
-	if (!(hpte_v[1] & HPTE_V_BOLTED))
-		rmask |= 0x04;
-	if (!(hpte_v[2] & HPTE_V_BOLTED))
-		rmask |= 0x02;
-	if (!(hpte_v[3] & HPTE_V_BOLTED))
-		rmask |= 0x01;
-	return rmask;
-}
-
-static long beat_lpar_hpte_insert(unsigned long hpte_group,
-				  unsigned long vpn, unsigned long pa,
-				  unsigned long rflags, unsigned long vflags,
-				  int psize, int apsize, int ssize)
-{
-	unsigned long lpar_rc;
-	u64 hpte_v, hpte_r, slot;
-
-	if (vflags & HPTE_V_SECONDARY)
-		return -1;
-
-	if (!(vflags & HPTE_V_BOLTED))
-		DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
-			"rflags=%lx, vflags=%lx, psize=%d)\n",
-		hpte_group, va, pa, rflags, vflags, psize);
-
-	hpte_v = hpte_encode_v(vpn, psize, apsize, MMU_SEGSIZE_256M) |
-		vflags | HPTE_V_VALID;
-	hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
-
-	if (!(vflags & HPTE_V_BOLTED))
-		DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
-
-	if (rflags & _PAGE_NO_CACHE)
-		hpte_r &= ~HPTE_R_M;
-
-	raw_spin_lock(&beat_htab_lock);
-	lpar_rc = beat_read_mask(hpte_group);
-	if (lpar_rc == 0) {
-		if (!(vflags & HPTE_V_BOLTED))
-			DBG_LOW(" full\n");
-		raw_spin_unlock(&beat_htab_lock);
-		return -1;
-	}
-
-	lpar_rc = beat_insert_htab_entry(0, hpte_group, lpar_rc << 48,
-		hpte_v, hpte_r, &slot);
-	raw_spin_unlock(&beat_htab_lock);
-
-	/*
-	 * Since we try and ioremap PHBs we don't own, the pte insert
-	 * will fail. However we must catch the failure in hash_page
-	 * or we will loop forever, so return -2 in this case.
-	 */
-	if (unlikely(lpar_rc != 0)) {
-		if (!(vflags & HPTE_V_BOLTED))
-			DBG_LOW(" lpar err %lx\n", lpar_rc);
-		return -2;
-	}
-	if (!(vflags & HPTE_V_BOLTED))
-		DBG_LOW(" -> slot: %lx\n", slot);
-
-	/* We have to pass down the secondary bucket bit here as well */
-	return (slot ^ hpte_group) & 15;
-}
-
-static long beat_lpar_hpte_remove(unsigned long hpte_group)
-{
-	DBG_LOW("hpte_remove(group=%lx)\n", hpte_group);
-	return -1;
-}
-
-static unsigned long beat_lpar_hpte_getword0(unsigned long slot)
-{
-	unsigned long dword0;
-	unsigned long lpar_rc;
-	u64 dword[5];
-
-	lpar_rc = beat_read_htab_entries(0, slot & ~3UL, dword);
-
-	dword0 = dword[slot&3];
-
-	BUG_ON(lpar_rc != 0);
-
-	return dword0;
-}
-
-static void beat_lpar_hptab_clear(void)
-{
-	unsigned long size_bytes = 1UL << ppc64_pft_size;
-	unsigned long hpte_count = size_bytes >> 4;
-	int i;
-	u64 dummy0, dummy1;
-
-	/* TODO: Use bulk call */
-	for (i = 0; i < hpte_count; i++)
-		beat_write_htab_entry(0, i, 0, 0, -1UL, -1UL, &dummy0, &dummy1);
-}
-
-/*
- * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
- * the low 3 bits of flags happen to line up.  So no transform is needed.
- * We can probably optimize here and assume the high bits of newpp are
- * already zero.  For now I am paranoid.
- */
-static long beat_lpar_hpte_updatepp(unsigned long slot,
-				    unsigned long newpp,
-				    unsigned long vpn,
-				    int psize, int apsize,
-				    int ssize, unsigned long flags)
-{
-	unsigned long lpar_rc;
-	u64 dummy0, dummy1;
-	unsigned long want_v;
-
-	want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
-
-	DBG_LOW("    update: "
-		"avpnv=%016lx, slot=%016lx, psize: %d, newpp %016lx ... ",
-		want_v & HPTE_V_AVPN, slot, psize, newpp);
-
-	raw_spin_lock(&beat_htab_lock);
-	dummy0 = beat_lpar_hpte_getword0(slot);
-	if ((dummy0 & ~0x7FUL) != (want_v & ~0x7FUL)) {
-		DBG_LOW("not found !\n");
-		raw_spin_unlock(&beat_htab_lock);
-		return -1;
-	}
-
-	lpar_rc = beat_write_htab_entry(0, slot, 0, newpp, 0, 7, &dummy0,
-					&dummy1);
-	raw_spin_unlock(&beat_htab_lock);
-	if (lpar_rc != 0 || dummy0 == 0) {
-		DBG_LOW("not found !\n");
-		return -1;
-	}
-
-	DBG_LOW("ok %lx %lx\n", dummy0, dummy1);
-
-	BUG_ON(lpar_rc != 0);
-
-	return 0;
-}
-
-static long beat_lpar_hpte_find(unsigned long vpn, int psize)
-{
-	unsigned long hash;
-	unsigned long i, j;
-	long slot;
-	unsigned long want_v, hpte_v;
-
-	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, MMU_SEGSIZE_256M);
-	want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
-
-	for (j = 0; j < 2; j++) {
-		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
-		for (i = 0; i < HPTES_PER_GROUP; i++) {
-			hpte_v = beat_lpar_hpte_getword0(slot);
-
-			if (HPTE_V_COMPARE(hpte_v, want_v)
-			    && (hpte_v & HPTE_V_VALID)
-			    && (!!(hpte_v & HPTE_V_SECONDARY) == j)) {
-				/* HPTE matches */
-				if (j)
-					slot = -slot;
-				return slot;
-			}
-			++slot;
-		}
-		hash = ~hash;
-	}
-
-	return -1;
-}
-
-static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
-					  unsigned long ea,
-					  int psize, int ssize)
-{
-	unsigned long vpn;
-	unsigned long lpar_rc, slot, vsid;
-	u64 dummy0, dummy1;
-
-	vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M);
-	vpn = hpt_vpn(ea, vsid, MMU_SEGSIZE_256M);
-
-	raw_spin_lock(&beat_htab_lock);
-	slot = beat_lpar_hpte_find(vpn, psize);
-	BUG_ON(slot == -1);
-
-	lpar_rc = beat_write_htab_entry(0, slot, 0, newpp, 0, 7,
-		&dummy0, &dummy1);
-	raw_spin_unlock(&beat_htab_lock);
-
-	BUG_ON(lpar_rc != 0);
-}
-
-static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
-				      int psize, int apsize,
-				      int ssize, int local)
-{
-	unsigned long want_v;
-	unsigned long lpar_rc;
-	u64 dummy1, dummy2;
-	unsigned long flags;
-
-	DBG_LOW("    inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
-		slot, va, psize, local);
-	want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
-
-	raw_spin_lock_irqsave(&beat_htab_lock, flags);
-	dummy1 = beat_lpar_hpte_getword0(slot);
-
-	if ((dummy1 & ~0x7FUL) != (want_v & ~0x7FUL)) {
-		DBG_LOW("not found !\n");
-		raw_spin_unlock_irqrestore(&beat_htab_lock, flags);
-		return;
-	}
-
-	lpar_rc = beat_write_htab_entry(0, slot, 0, 0, HPTE_V_VALID, 0,
-		&dummy1, &dummy2);
-	raw_spin_unlock_irqrestore(&beat_htab_lock, flags);
-
-	BUG_ON(lpar_rc != 0);
-}
-
-void __init hpte_init_beat(void)
-{
-	ppc_md.hpte_invalidate	= beat_lpar_hpte_invalidate;
-	ppc_md.hpte_updatepp	= beat_lpar_hpte_updatepp;
-	ppc_md.hpte_updateboltedpp = beat_lpar_hpte_updateboltedpp;
-	ppc_md.hpte_insert	= beat_lpar_hpte_insert;
-	ppc_md.hpte_remove	= beat_lpar_hpte_remove;
-	ppc_md.hpte_clear_all	= beat_lpar_hptab_clear;
-}
-
-static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
-				  unsigned long vpn, unsigned long pa,
-				  unsigned long rflags, unsigned long vflags,
-				  int psize, int apsize, int ssize)
-{
-	unsigned long lpar_rc;
-	u64 hpte_v, hpte_r, slot;
-
-	if (vflags & HPTE_V_SECONDARY)
-		return -1;
-
-	if (!(vflags & HPTE_V_BOLTED))
-		DBG_LOW("hpte_insert(group=%lx, vpn=%016lx, pa=%016lx, "
-			"rflags=%lx, vflags=%lx, psize=%d)\n",
-		hpte_group, vpn, pa, rflags, vflags, psize);
-
-	hpte_v = hpte_encode_v(vpn, psize, apsize, MMU_SEGSIZE_256M) |
-		vflags | HPTE_V_VALID;
-	hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
-
-	if (!(vflags & HPTE_V_BOLTED))
-		DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
-
-	if (rflags & _PAGE_NO_CACHE)
-		hpte_r &= ~HPTE_R_M;
-
-	/* insert into not-volted entry */
-	lpar_rc = beat_insert_htab_entry3(0, hpte_group, hpte_v, hpte_r,
-		HPTE_V_BOLTED, 0, &slot);
-	/*
-	 * Since we try and ioremap PHBs we don't own, the pte insert
-	 * will fail. However we must catch the failure in hash_page
-	 * or we will loop forever, so return -2 in this case.
-	 */
-	if (unlikely(lpar_rc != 0)) {
-		if (!(vflags & HPTE_V_BOLTED))
-			DBG_LOW(" lpar err %lx\n", lpar_rc);
-		return -2;
-	}
-	if (!(vflags & HPTE_V_BOLTED))
-		DBG_LOW(" -> slot: %lx\n", slot);
-
-	/* We have to pass down the secondary bucket bit here as well */
-	return (slot ^ hpte_group) & 15;
-}
-
-/*
- * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
- * the low 3 bits of flags happen to line up.  So no transform is needed.
- * We can probably optimize here and assume the high bits of newpp are
- * already zero.  For now I am paranoid.
- */
-static long beat_lpar_hpte_updatepp_v3(unsigned long slot,
-				       unsigned long newpp,
-				       unsigned long vpn,
-				       int psize, int apsize,
-				       int ssize, unsigned long flags)
-{
-	unsigned long lpar_rc;
-	unsigned long want_v;
-	unsigned long pss;
-
-	want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
-	pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc[psize];
-
-	DBG_LOW("    update: "
-		"avpnv=%016lx, slot=%016lx, psize: %d, newpp %016lx ... ",
-		want_v & HPTE_V_AVPN, slot, psize, newpp);
-
-	lpar_rc = beat_update_htab_permission3(0, slot, want_v, pss, 7, newpp);
-
-	if (lpar_rc == 0xfffffff7) {
-		DBG_LOW("not found !\n");
-		return -1;
-	}
-
-	DBG_LOW("ok\n");
-
-	BUG_ON(lpar_rc != 0);
-
-	return 0;
-}
-
-static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long vpn,
-					 int psize, int apsize,
-					 int ssize, int local)
-{
-	unsigned long want_v;
-	unsigned long lpar_rc;
-	unsigned long pss;
-
-	DBG_LOW("    inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n",
-		slot, vpn, psize, local);
-	want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
-	pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc[psize];
-
-	lpar_rc = beat_invalidate_htab_entry3(0, slot, want_v, pss);
-
-	/* E_busy can be valid output: page may be already replaced */
-	BUG_ON(lpar_rc != 0 && lpar_rc != 0xfffffff7);
-}
-
-static int64_t _beat_lpar_hptab_clear_v3(void)
-{
-	return beat_clear_htab3(0);
-}
-
-static void beat_lpar_hptab_clear_v3(void)
-{
-	_beat_lpar_hptab_clear_v3();
-}
-
-void __init hpte_init_beat_v3(void)
-{
-	if (_beat_lpar_hptab_clear_v3() == 0) {
-		ppc_md.hpte_invalidate	= beat_lpar_hpte_invalidate_v3;
-		ppc_md.hpte_updatepp	= beat_lpar_hpte_updatepp_v3;
-		ppc_md.hpte_updateboltedpp = beat_lpar_hpte_updateboltedpp;
-		ppc_md.hpte_insert	= beat_lpar_hpte_insert_v3;
-		ppc_md.hpte_remove	= beat_lpar_hpte_remove;
-		ppc_md.hpte_clear_all	= beat_lpar_hptab_clear_v3;
-	} else {
-		ppc_md.hpte_invalidate	= beat_lpar_hpte_invalidate;
-		ppc_md.hpte_updatepp	= beat_lpar_hpte_updatepp;
-		ppc_md.hpte_updateboltedpp = beat_lpar_hpte_updateboltedpp;
-		ppc_md.hpte_insert	= beat_lpar_hpte_insert;
-		ppc_md.hpte_remove	= beat_lpar_hpte_remove;
-		ppc_md.hpte_clear_all	= beat_lpar_hptab_clear;
-	}
-}
diff --git a/arch/powerpc/platforms/cell/beat_hvCall.S b/arch/powerpc/platforms/cell/beat_hvCall.S
deleted file mode 100644
index 96c801907126..000000000000
--- a/arch/powerpc/platforms/cell/beat_hvCall.S
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * Beat hypervisor call I/F
- *
- * (C) Copyright 2007 TOSHIBA CORPORATION
- *
- * This code is based on arch/powerpc/platforms/pseries/hvCall.S.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <asm/ppc_asm.h>
-
-/* Not implemented on Beat, now */
-#define	HCALL_INST_PRECALL
-#define	HCALL_INST_POSTCALL
-
-	.text
-
-#define	HVSC	.long	0x44000022
-
-/* Note: takes only 7 input parameters at maximum */
-_GLOBAL(beat_hcall_norets)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
-
-	HCALL_INST_PRECALL
-
-	mr	r11,r3
-	mr	r3,r4
-	mr	r4,r5
-	mr	r5,r6
-	mr	r6,r7
-	mr	r7,r8
-	mr	r8,r9
-
-	HVSC				/* invoke the hypervisor */
-
-	HCALL_INST_POSTCALL
-
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-
-	blr				/* return r3 = status */
-
-/* Note: takes 8 input parameters at maximum */
-_GLOBAL(beat_hcall_norets8)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
-
-	HCALL_INST_PRECALL
-
-	mr	r11,r3
-	mr	r3,r4
-	mr	r4,r5
-	mr	r5,r6
-	mr	r6,r7
-	mr	r7,r8
-	mr	r8,r9
-	ld	r10,STK_PARAM(R10)(r1)
-
-	HVSC				/* invoke the hypervisor */
-
-	HCALL_INST_POSTCALL
-
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-
-	blr				/* return r3 = status */
-
-/* Note: takes only 6 input parameters, 1 output parameters at maximum */
-_GLOBAL(beat_hcall1)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
-
-	HCALL_INST_PRECALL
-
-	std	r4,STK_PARAM(R4)(r1)	/* save ret buffer */
-
-	mr	r11,r3
-	mr	r3,r5
-	mr	r4,r6
-	mr	r5,r7
-	mr	r6,r8
-	mr	r7,r9
-	mr	r8,r10
-
-	HVSC				/* invoke the hypervisor */
-
-	HCALL_INST_POSTCALL
-
-	ld	r12,STK_PARAM(R4)(r1)
-	std	r4,  0(r12)
-
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-
-	blr				/* return r3 = status */
-
-/* Note: takes only 6 input parameters, 2 output parameters at maximum */
-_GLOBAL(beat_hcall2)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
-
-	HCALL_INST_PRECALL
-
-	std	r4,STK_PARAM(R4)(r1)	/* save ret buffer */
-
-	mr	r11,r3
-	mr	r3,r5
-	mr	r4,r6
-	mr	r5,r7
-	mr	r6,r8
-	mr	r7,r9
-	mr	r8,r10
-
-	HVSC				/* invoke the hypervisor */
-
-	HCALL_INST_POSTCALL
-
-	ld	r12,STK_PARAM(R4)(r1)
-	std	r4,  0(r12)
-	std	r5,  8(r12)
-
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-
-	blr				/* return r3 = status */
-
-/* Note: takes only 6 input parameters, 3 output parameters at maximum */
-_GLOBAL(beat_hcall3)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
-
-	HCALL_INST_PRECALL
-
-	std	r4,STK_PARAM(R4)(r1)	/* save ret buffer */
-
-	mr	r11,r3
-	mr	r3,r5
-	mr	r4,r6
-	mr	r5,r7
-	mr	r6,r8
-	mr	r7,r9
-	mr	r8,r10
-
-	HVSC				/* invoke the hypervisor */
-
-	HCALL_INST_POSTCALL
-
-	ld	r12,STK_PARAM(R4)(r1)
-	std	r4,  0(r12)
-	std	r5,  8(r12)
-	std	r6, 16(r12)
-
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-
-	blr				/* return r3 = status */
-
-/* Note: takes only 6 input parameters, 4 output parameters at maximum */
-_GLOBAL(beat_hcall4)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
-
-	HCALL_INST_PRECALL
-
-	std	r4,STK_PARAM(R4)(r1)	/* save ret buffer */
-
-	mr	r11,r3
-	mr	r3,r5
-	mr	r4,r6
-	mr	r5,r7
-	mr	r6,r8
-	mr	r7,r9
-	mr	r8,r10
-
-	HVSC				/* invoke the hypervisor */
-
-	HCALL_INST_POSTCALL
-
-	ld	r12,STK_PARAM(R4)(r1)
-	std	r4,  0(r12)
-	std	r5,  8(r12)
-	std	r6, 16(r12)
-	std	r7, 24(r12)
-
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-
-	blr				/* return r3 = status */
-
-/* Note: takes only 6 input parameters, 5 output parameters at maximum */
-_GLOBAL(beat_hcall5)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
-
-	HCALL_INST_PRECALL
-
-	std	r4,STK_PARAM(R4)(r1)	/* save ret buffer */
-
-	mr	r11,r3
-	mr	r3,r5
-	mr	r4,r6
-	mr	r5,r7
-	mr	r6,r8
-	mr	r7,r9
-	mr	r8,r10
-
-	HVSC				/* invoke the hypervisor */
-
-	HCALL_INST_POSTCALL
-
-	ld	r12,STK_PARAM(R4)(r1)
-	std	r4,  0(r12)
-	std	r5,  8(r12)
-	std	r6, 16(r12)
-	std	r7, 24(r12)
-	std	r8, 32(r12)
-
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-
-	blr				/* return r3 = status */
-
-/* Note: takes only 6 input parameters, 6 output parameters at maximum */
-_GLOBAL(beat_hcall6)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
-
-	HCALL_INST_PRECALL
-
-	std	r4,STK_PARAM(R4)(r1)	/* save ret buffer */
-
-	mr	r11,r3
-	mr	r3,r5
-	mr	r4,r6
-	mr	r5,r7
-	mr	r6,r8
-	mr	r7,r9
-	mr	r8,r10
-
-	HVSC				/* invoke the hypervisor */
-
-	HCALL_INST_POSTCALL
-
-	ld	r12,STK_PARAM(R4)(r1)
-	std	r4,  0(r12)
-	std	r5,  8(r12)
-	std	r6, 16(r12)
-	std	r7, 24(r12)
-	std	r8, 32(r12)
-	std	r9, 40(r12)
-
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-
-	blr				/* return r3 = status */
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c
deleted file mode 100644
index 9e5dfbcc00af..000000000000
--- a/arch/powerpc/platforms/cell/beat_interrupt.c
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Celleb/Beat Interrupt controller
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/percpu.h>
-#include <linux/types.h>
-
-#include <asm/machdep.h>
-
-#include "beat_interrupt.h"
-#include "beat_wrapper.h"
-
-#define	MAX_IRQS	NR_IRQS
-static DEFINE_RAW_SPINLOCK(beatic_irq_mask_lock);
-static uint64_t	beatic_irq_mask_enable[(MAX_IRQS+255)/64];
-static uint64_t	beatic_irq_mask_ack[(MAX_IRQS+255)/64];
-
-static struct irq_domain *beatic_host;
-
-/*
- * In this implementation, "virq" == "IRQ plug number",
- * "(irq_hw_number_t)hwirq" == "IRQ outlet number".
- */
-
-/* assumption: locked */
-static inline void beatic_update_irq_mask(unsigned int irq_plug)
-{
-	int off;
-	unsigned long masks[4];
-
-	off = (irq_plug / 256) * 4;
-	masks[0] = beatic_irq_mask_enable[off + 0]
-		& beatic_irq_mask_ack[off + 0];
-	masks[1] = beatic_irq_mask_enable[off + 1]
-		& beatic_irq_mask_ack[off + 1];
-	masks[2] = beatic_irq_mask_enable[off + 2]
-		& beatic_irq_mask_ack[off + 2];
-	masks[3] = beatic_irq_mask_enable[off + 3]
-		& beatic_irq_mask_ack[off + 3];
-	if (beat_set_interrupt_mask(irq_plug&~255UL,
-		masks[0], masks[1], masks[2], masks[3]) != 0)
-		panic("Failed to set mask IRQ!");
-}
-
-static void beatic_mask_irq(struct irq_data *d)
-{
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&beatic_irq_mask_lock, flags);
-	beatic_irq_mask_enable[d->irq/64] &= ~(1UL << (63 - (d->irq%64)));
-	beatic_update_irq_mask(d->irq);
-	raw_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
-}
-
-static void beatic_unmask_irq(struct irq_data *d)
-{
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&beatic_irq_mask_lock, flags);
-	beatic_irq_mask_enable[d->irq/64] |= 1UL << (63 - (d->irq%64));
-	beatic_update_irq_mask(d->irq);
-	raw_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
-}
-
-static void beatic_ack_irq(struct irq_data *d)
-{
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&beatic_irq_mask_lock, flags);
-	beatic_irq_mask_ack[d->irq/64] &= ~(1UL << (63 - (d->irq%64)));
-	beatic_update_irq_mask(d->irq);
-	raw_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
-}
-
-static void beatic_end_irq(struct irq_data *d)
-{
-	s64 err;
-	unsigned long flags;
-
-	err = beat_downcount_of_interrupt(d->irq);
-	if (err != 0) {
-		if ((err & 0xFFFFFFFF) != 0xFFFFFFF5) /* -11: wrong state */
-			panic("Failed to downcount IRQ! Error = %16llx", err);
-
-		printk(KERN_ERR "IRQ over-downcounted, plug %d\n", d->irq);
-	}
-	raw_spin_lock_irqsave(&beatic_irq_mask_lock, flags);
-	beatic_irq_mask_ack[d->irq/64] |= 1UL << (63 - (d->irq%64));
-	beatic_update_irq_mask(d->irq);
-	raw_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
-}
-
-static struct irq_chip beatic_pic = {
-	.name = "CELL-BEAT",
-	.irq_unmask = beatic_unmask_irq,
-	.irq_mask = beatic_mask_irq,
-	.irq_eoi = beatic_end_irq,
-};
-
-/*
- * Dispose binding hardware IRQ number (hw) and Virtuql IRQ number (virq),
- * update flags.
- *
- * Note that the number (virq) is already assigned at upper layer.
- */
-static void beatic_pic_host_unmap(struct irq_domain *h, unsigned int virq)
-{
-	beat_destruct_irq_plug(virq);
-}
-
-/*
- * Create or update binding hardware IRQ number (hw) and Virtuql
- * IRQ number (virq). This is called only once for a given mapping.
- *
- * Note that the number (virq) is already assigned at upper layer.
- */
-static int beatic_pic_host_map(struct irq_domain *h, unsigned int virq,
-			       irq_hw_number_t hw)
-{
-	int64_t	err;
-
-	err = beat_construct_and_connect_irq_plug(virq, hw);
-	if (err < 0)
-		return -EIO;
-
-	irq_set_status_flags(virq, IRQ_LEVEL);
-	irq_set_chip_and_handler(virq, &beatic_pic, handle_fasteoi_irq);
-	return 0;
-}
-
-/*
- * Translate device-tree interrupt spec to irq_hw_number_t style (ulong),
- * to pass away to irq_create_mapping().
- *
- * Called from irq_create_of_mapping() only.
- * Note: We have only 1 entry to translate.
- */
-static int beatic_pic_host_xlate(struct irq_domain *h, struct device_node *ct,
-				 const u32 *intspec, unsigned int intsize,
-				 irq_hw_number_t *out_hwirq,
-				 unsigned int *out_flags)
-{
-	const u64 *intspec2 = (const u64 *)intspec;
-
-	*out_hwirq = *intspec2;
-	*out_flags |= IRQ_TYPE_LEVEL_LOW;
-	return 0;
-}
-
-static int beatic_pic_host_match(struct irq_domain *h, struct device_node *np)
-{
-	/* Match all */
-	return 1;
-}
-
-static const struct irq_domain_ops beatic_pic_host_ops = {
-	.map = beatic_pic_host_map,
-	.unmap = beatic_pic_host_unmap,
-	.xlate = beatic_pic_host_xlate,
-	.match = beatic_pic_host_match,
-};
-
-/*
- * Get an IRQ number
- * Note: returns VIRQ
- */
-static inline unsigned int beatic_get_irq_plug(void)
-{
-	int i;
-	uint64_t	pending[4], ub;
-
-	for (i = 0; i < MAX_IRQS; i += 256) {
-		beat_detect_pending_interrupts(i, pending);
-		__asm__ ("cntlzd %0,%1":"=r"(ub):
-			"r"(pending[0] & beatic_irq_mask_enable[i/64+0]
-				       & beatic_irq_mask_ack[i/64+0]));
-		if (ub != 64)
-			return i + ub + 0;
-		__asm__ ("cntlzd %0,%1":"=r"(ub):
-			"r"(pending[1] & beatic_irq_mask_enable[i/64+1]
-				       & beatic_irq_mask_ack[i/64+1]));
-		if (ub != 64)
-			return i + ub + 64;
-		__asm__ ("cntlzd %0,%1":"=r"(ub):
-			"r"(pending[2] & beatic_irq_mask_enable[i/64+2]
-				       & beatic_irq_mask_ack[i/64+2]));
-		if (ub != 64)
-			return i + ub + 128;
-		__asm__ ("cntlzd %0,%1":"=r"(ub):
-			"r"(pending[3] & beatic_irq_mask_enable[i/64+3]
-				       & beatic_irq_mask_ack[i/64+3]));
-		if (ub != 64)
-			return i + ub + 192;
-	}
-
-	return NO_IRQ;
-}
-unsigned int beatic_get_irq(void)
-{
-	unsigned int ret;
-
-	ret = beatic_get_irq_plug();
-	if (ret != NO_IRQ)
-		beatic_ack_irq(irq_get_irq_data(ret));
-	return ret;
-}
-
-/*
- */
-void __init beatic_init_IRQ(void)
-{
-	int	i;
-
-	memset(beatic_irq_mask_enable, 0, sizeof(beatic_irq_mask_enable));
-	memset(beatic_irq_mask_ack, 255, sizeof(beatic_irq_mask_ack));
-	for (i = 0; i < MAX_IRQS; i += 256)
-		beat_set_interrupt_mask(i, 0L, 0L, 0L, 0L);
-
-	/* Set out get_irq function */
-	ppc_md.get_irq = beatic_get_irq;
-
-	/* Allocate an irq host */
-	beatic_host = irq_domain_add_nomap(NULL, ~0, &beatic_pic_host_ops, NULL);
-	BUG_ON(beatic_host == NULL);
-	irq_set_default_host(beatic_host);
-}
-
-void beatic_deinit_IRQ(void)
-{
-	int	i;
-
-	for (i = 1; i < nr_irqs; i++)
-		beat_destruct_irq_plug(i);
-}
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.h b/arch/powerpc/platforms/cell/beat_interrupt.h
deleted file mode 100644
index a7e52f91a078..000000000000
--- a/arch/powerpc/platforms/cell/beat_interrupt.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Celleb/Beat Interrupt controller
- *
- * (C) Copyright 2006 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef ASM_BEAT_PIC_H
-#define ASM_BEAT_PIC_H
-#ifdef __KERNEL__
-
-extern void beatic_init_IRQ(void);
-extern unsigned int beatic_get_irq(void);
-extern void beatic_deinit_IRQ(void);
-
-#endif
-#endif /* ASM_BEAT_PIC_H */
diff --git a/arch/powerpc/platforms/cell/beat_iommu.c b/arch/powerpc/platforms/cell/beat_iommu.c
deleted file mode 100644
index 3ce685568935..000000000000
--- a/arch/powerpc/platforms/cell/beat_iommu.c
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Support for IOMMU on Celleb platform.
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/dma-mapping.h>
-#include <linux/pci.h>
-#include <linux/of_platform.h>
-
-#include <asm/machdep.h>
-
-#include "beat_wrapper.h"
-
-#define DMA_FLAGS 0xf800000000000000UL	/* r/w permitted, coherency required,
-					   strongest order */
-
-static int __init find_dma_window(u64 *io_space_id, u64 *ioid,
-				  u64 *base, u64 *size, u64 *io_page_size)
-{
-	struct device_node *dn;
-	const unsigned long *dma_window;
-
-	for_each_node_by_type(dn, "ioif") {
-		dma_window = of_get_property(dn, "toshiba,dma-window", NULL);
-		if (dma_window) {
-			*io_space_id = (dma_window[0] >> 32) & 0xffffffffUL;
-			*ioid = dma_window[0] & 0x7ffUL;
-			*base = dma_window[1];
-			*size = dma_window[2];
-			*io_page_size = 1 << dma_window[3];
-			of_node_put(dn);
-			return 1;
-		}
-	}
-	return 0;
-}
-
-static unsigned long celleb_dma_direct_offset;
-
-static void __init celleb_init_direct_mapping(void)
-{
-	u64 lpar_addr, io_addr;
-	u64 io_space_id, ioid, dma_base, dma_size, io_page_size;
-
-	if (!find_dma_window(&io_space_id, &ioid, &dma_base, &dma_size,
-			     &io_page_size)) {
-		pr_info("No dma window found !\n");
-		return;
-	}
-
-	for (lpar_addr = 0; lpar_addr < dma_size; lpar_addr += io_page_size) {
-		io_addr = lpar_addr + dma_base;
-		(void)beat_put_iopte(io_space_id, io_addr, lpar_addr,
-				     ioid, DMA_FLAGS);
-	}
-
-	celleb_dma_direct_offset = dma_base;
-}
-
-static void celleb_dma_dev_setup(struct device *dev)
-{
-	set_dma_ops(dev, &dma_direct_ops);
-	set_dma_offset(dev, celleb_dma_direct_offset);
-}
-
-static void celleb_pci_dma_dev_setup(struct pci_dev *pdev)
-{
-	celleb_dma_dev_setup(&pdev->dev);
-}
-
-static int celleb_of_bus_notify(struct notifier_block *nb,
-				unsigned long action, void *data)
-{
-	struct device *dev = data;
-
-	/* We are only intereted in device addition */
-	if (action != BUS_NOTIFY_ADD_DEVICE)
-		return 0;
-
-	celleb_dma_dev_setup(dev);
-
-	return 0;
-}
-
-static struct notifier_block celleb_of_bus_notifier = {
-	.notifier_call = celleb_of_bus_notify
-};
-
-static int __init celleb_init_iommu(void)
-{
-	celleb_init_direct_mapping();
-	ppc_md.pci_dma_dev_setup = celleb_pci_dma_dev_setup;
-	bus_register_notifier(&platform_bus_type, &celleb_of_bus_notifier);
-
-	return 0;
-}
-
-machine_arch_initcall(celleb_beat, celleb_init_iommu);
diff --git a/arch/powerpc/platforms/cell/beat_spu_priv1.c b/arch/powerpc/platforms/cell/beat_spu_priv1.c
deleted file mode 100644
index 13f52589d3a9..000000000000
--- a/arch/powerpc/platforms/cell/beat_spu_priv1.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * spu hypervisor abstraction for Beat
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <asm/types.h>
-#include <asm/spu.h>
-#include <asm/spu_priv1.h>
-
-#include "beat_wrapper.h"
-
-static inline void _int_mask_set(struct spu *spu, int class, u64 mask)
-{
-	spu->shadow_int_mask_RW[class] = mask;
-	beat_set_irq_mask_for_spe(spu->spe_id, class, mask);
-}
-
-static inline u64 _int_mask_get(struct spu *spu, int class)
-{
-	return spu->shadow_int_mask_RW[class];
-}
-
-static void int_mask_set(struct spu *spu, int class, u64 mask)
-{
-	_int_mask_set(spu, class, mask);
-}
-
-static u64 int_mask_get(struct spu *spu, int class)
-{
-	return _int_mask_get(spu, class);
-}
-
-static void int_mask_and(struct spu *spu, int class, u64 mask)
-{
-	u64 old_mask;
-	old_mask = _int_mask_get(spu, class);
-	_int_mask_set(spu, class, old_mask & mask);
-}
-
-static void int_mask_or(struct spu *spu, int class, u64 mask)
-{
-	u64 old_mask;
-	old_mask = _int_mask_get(spu, class);
-	_int_mask_set(spu, class, old_mask | mask);
-}
-
-static void int_stat_clear(struct spu *spu, int class, u64 stat)
-{
-	beat_clear_interrupt_status_of_spe(spu->spe_id, class, stat);
-}
-
-static u64 int_stat_get(struct spu *spu, int class)
-{
-	u64 int_stat;
-	beat_get_interrupt_status_of_spe(spu->spe_id, class, &int_stat);
-	return int_stat;
-}
-
-static void cpu_affinity_set(struct spu *spu, int cpu)
-{
-	return;
-}
-
-static u64 mfc_dar_get(struct spu *spu)
-{
-	u64 dar;
-	beat_get_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, mfc_dar_RW), &dar);
-	return dar;
-}
-
-static u64 mfc_dsisr_get(struct spu *spu)
-{
-	u64 dsisr;
-	beat_get_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, mfc_dsisr_RW), &dsisr);
-	return dsisr;
-}
-
-static void mfc_dsisr_set(struct spu *spu, u64 dsisr)
-{
-	beat_set_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, mfc_dsisr_RW), dsisr);
-}
-
-static void mfc_sdr_setup(struct spu *spu)
-{
-	return;
-}
-
-static void mfc_sr1_set(struct spu *spu, u64 sr1)
-{
-	beat_set_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, mfc_sr1_RW), sr1);
-}
-
-static u64 mfc_sr1_get(struct spu *spu)
-{
-	u64 sr1;
-	beat_get_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, mfc_sr1_RW), &sr1);
-	return sr1;
-}
-
-static void mfc_tclass_id_set(struct spu *spu, u64 tclass_id)
-{
-	beat_set_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, mfc_tclass_id_RW), tclass_id);
-}
-
-static u64 mfc_tclass_id_get(struct spu *spu)
-{
-	u64 tclass_id;
-	beat_get_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, mfc_tclass_id_RW), &tclass_id);
-	return tclass_id;
-}
-
-static void tlb_invalidate(struct spu *spu)
-{
-	beat_set_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, tlb_invalidate_entry_W), 0ul);
-}
-
-static void resource_allocation_groupID_set(struct spu *spu, u64 id)
-{
-	beat_set_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, resource_allocation_groupID_RW),
-		id);
-}
-
-static u64 resource_allocation_groupID_get(struct spu *spu)
-{
-	u64 id;
-	beat_get_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, resource_allocation_groupID_RW),
-		&id);
-	return id;
-}
-
-static void resource_allocation_enable_set(struct spu *spu, u64 enable)
-{
-	beat_set_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, resource_allocation_enable_RW),
-		enable);
-}
-
-static u64 resource_allocation_enable_get(struct spu *spu)
-{
-	u64 enable;
-	beat_get_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, resource_allocation_enable_RW),
-		&enable);
-	return enable;
-}
-
-const struct spu_priv1_ops spu_priv1_beat_ops = {
-	.int_mask_and = int_mask_and,
-	.int_mask_or = int_mask_or,
-	.int_mask_set = int_mask_set,
-	.int_mask_get = int_mask_get,
-	.int_stat_clear = int_stat_clear,
-	.int_stat_get = int_stat_get,
-	.cpu_affinity_set = cpu_affinity_set,
-	.mfc_dar_get = mfc_dar_get,
-	.mfc_dsisr_get = mfc_dsisr_get,
-	.mfc_dsisr_set = mfc_dsisr_set,
-	.mfc_sdr_setup = mfc_sdr_setup,
-	.mfc_sr1_set = mfc_sr1_set,
-	.mfc_sr1_get = mfc_sr1_get,
-	.mfc_tclass_id_set = mfc_tclass_id_set,
-	.mfc_tclass_id_get = mfc_tclass_id_get,
-	.tlb_invalidate = tlb_invalidate,
-	.resource_allocation_groupID_set = resource_allocation_groupID_set,
-	.resource_allocation_groupID_get = resource_allocation_groupID_get,
-	.resource_allocation_enable_set = resource_allocation_enable_set,
-	.resource_allocation_enable_get = resource_allocation_enable_get,
-};
diff --git a/arch/powerpc/platforms/cell/beat_syscall.h b/arch/powerpc/platforms/cell/beat_syscall.h
deleted file mode 100644
index 8580dc7e1798..000000000000
--- a/arch/powerpc/platforms/cell/beat_syscall.h
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Beat hypervisor call numbers
- *
- * (C) Copyright 2004-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef BEAT_BEAT_syscall_H
-#define BEAT_BEAT_syscall_H
-
-#ifdef	__ASSEMBLY__
-#define	__BEAT_ADD_VENDOR_ID(__x, __v)	((__v)<<60|(__x))
-#else
-#define	__BEAT_ADD_VENDOR_ID(__x, __v)	((u64)(__v)<<60|(__x))
-#endif
-#define HV_allocate_memory __BEAT_ADD_VENDOR_ID(0, 0)
-#define HV_construct_virtual_address_space __BEAT_ADD_VENDOR_ID(2, 0)
-#define HV_destruct_virtual_address_space __BEAT_ADD_VENDOR_ID(10, 0)
-#define HV_get_virtual_address_space_id_of_ppe __BEAT_ADD_VENDOR_ID(4, 0)
-#define HV_query_logical_partition_address_region_info 			\
-						__BEAT_ADD_VENDOR_ID(6, 0)
-#define HV_release_memory __BEAT_ADD_VENDOR_ID(13, 0)
-#define HV_select_virtual_address_space __BEAT_ADD_VENDOR_ID(7, 0)
-#define HV_load_range_registers __BEAT_ADD_VENDOR_ID(68, 0)
-#define HV_set_ppe_l2cache_rmt_entry __BEAT_ADD_VENDOR_ID(70, 0)
-#define HV_set_ppe_tlb_rmt_entry __BEAT_ADD_VENDOR_ID(71, 0)
-#define HV_set_spe_tlb_rmt_entry __BEAT_ADD_VENDOR_ID(72, 0)
-#define HV_get_io_address_translation_fault_info __BEAT_ADD_VENDOR_ID(14, 0)
-#define HV_get_iopte __BEAT_ADD_VENDOR_ID(16, 0)
-#define HV_preload_iopt_cache __BEAT_ADD_VENDOR_ID(17, 0)
-#define HV_put_iopte __BEAT_ADD_VENDOR_ID(15, 0)
-#define HV_connect_event_ports __BEAT_ADD_VENDOR_ID(21, 0)
-#define HV_construct_event_receive_port __BEAT_ADD_VENDOR_ID(18, 0)
-#define HV_destruct_event_receive_port __BEAT_ADD_VENDOR_ID(19, 0)
-#define HV_destruct_event_send_port __BEAT_ADD_VENDOR_ID(22, 0)
-#define HV_get_state_of_event_send_port __BEAT_ADD_VENDOR_ID(25, 0)
-#define HV_request_to_connect_event_ports __BEAT_ADD_VENDOR_ID(20, 0)
-#define HV_send_event_externally __BEAT_ADD_VENDOR_ID(23, 0)
-#define HV_send_event_locally __BEAT_ADD_VENDOR_ID(24, 0)
-#define HV_construct_and_connect_irq_plug __BEAT_ADD_VENDOR_ID(28, 0)
-#define HV_destruct_irq_plug __BEAT_ADD_VENDOR_ID(29, 0)
-#define HV_detect_pending_interrupts __BEAT_ADD_VENDOR_ID(26, 0)
-#define HV_end_of_interrupt __BEAT_ADD_VENDOR_ID(27, 0)
-#define HV_assign_control_signal_notification_port __BEAT_ADD_VENDOR_ID(45, 0)
-#define HV_end_of_control_signal_processing __BEAT_ADD_VENDOR_ID(48, 0)
-#define HV_get_control_signal __BEAT_ADD_VENDOR_ID(46, 0)
-#define HV_set_irq_mask_for_spe __BEAT_ADD_VENDOR_ID(61, 0)
-#define HV_shutdown_logical_partition __BEAT_ADD_VENDOR_ID(44, 0)
-#define HV_connect_message_ports __BEAT_ADD_VENDOR_ID(35, 0)
-#define HV_destruct_message_port __BEAT_ADD_VENDOR_ID(36, 0)
-#define HV_receive_message __BEAT_ADD_VENDOR_ID(37, 0)
-#define HV_get_message_port_info __BEAT_ADD_VENDOR_ID(34, 0)
-#define HV_request_to_connect_message_ports __BEAT_ADD_VENDOR_ID(33, 0)
-#define HV_send_message __BEAT_ADD_VENDOR_ID(32, 0)
-#define HV_get_logical_ppe_id __BEAT_ADD_VENDOR_ID(69, 0)
-#define HV_pause __BEAT_ADD_VENDOR_ID(9, 0)
-#define HV_destruct_shared_memory_handle __BEAT_ADD_VENDOR_ID(51, 0)
-#define HV_get_shared_memory_info __BEAT_ADD_VENDOR_ID(52, 0)
-#define HV_permit_sharing_memory __BEAT_ADD_VENDOR_ID(50, 0)
-#define HV_request_to_attach_shared_memory __BEAT_ADD_VENDOR_ID(49, 0)
-#define HV_enable_logical_spe_execution __BEAT_ADD_VENDOR_ID(55, 0)
-#define HV_construct_logical_spe __BEAT_ADD_VENDOR_ID(53, 0)
-#define HV_disable_logical_spe_execution __BEAT_ADD_VENDOR_ID(56, 0)
-#define HV_destruct_logical_spe __BEAT_ADD_VENDOR_ID(54, 0)
-#define HV_sense_spe_execution_status __BEAT_ADD_VENDOR_ID(58, 0)
-#define HV_insert_htab_entry __BEAT_ADD_VENDOR_ID(101, 0)
-#define HV_read_htab_entries __BEAT_ADD_VENDOR_ID(95, 0)
-#define HV_write_htab_entry __BEAT_ADD_VENDOR_ID(94, 0)
-#define HV_assign_io_address_translation_fault_port 			\
-						__BEAT_ADD_VENDOR_ID(100, 0)
-#define HV_set_interrupt_mask __BEAT_ADD_VENDOR_ID(73, 0)
-#define HV_get_logical_partition_id __BEAT_ADD_VENDOR_ID(74, 0)
-#define HV_create_repository_node2 __BEAT_ADD_VENDOR_ID(90, 0)
-#define HV_create_repository_node __BEAT_ADD_VENDOR_ID(90, 0) /* alias */
-#define HV_get_repository_node_value2 __BEAT_ADD_VENDOR_ID(91, 0)
-#define HV_get_repository_node_value __BEAT_ADD_VENDOR_ID(91, 0) /* alias */
-#define HV_modify_repository_node_value2 __BEAT_ADD_VENDOR_ID(92, 0)
-#define HV_modify_repository_node_value __BEAT_ADD_VENDOR_ID(92, 0) /* alias */
-#define HV_remove_repository_node2 __BEAT_ADD_VENDOR_ID(93, 0)
-#define HV_remove_repository_node __BEAT_ADD_VENDOR_ID(93, 0) /* alias */
-#define HV_cancel_shared_memory __BEAT_ADD_VENDOR_ID(104, 0)
-#define HV_clear_interrupt_status_of_spe __BEAT_ADD_VENDOR_ID(206, 0)
-#define HV_construct_spe_irq_outlet __BEAT_ADD_VENDOR_ID(80, 0)
-#define HV_destruct_spe_irq_outlet __BEAT_ADD_VENDOR_ID(81, 0)
-#define HV_disconnect_ipspc_service __BEAT_ADD_VENDOR_ID(88, 0)
-#define HV_execute_ipspc_command __BEAT_ADD_VENDOR_ID(86, 0)
-#define HV_get_interrupt_status_of_spe __BEAT_ADD_VENDOR_ID(205, 0)
-#define HV_get_spe_privileged_state_1_registers __BEAT_ADD_VENDOR_ID(208, 0)
-#define HV_permit_use_of_ipspc_service __BEAT_ADD_VENDOR_ID(85, 0)
-#define HV_reinitialize_logical_spe __BEAT_ADD_VENDOR_ID(82, 0)
-#define HV_request_ipspc_service __BEAT_ADD_VENDOR_ID(84, 0)
-#define HV_stop_ipspc_command __BEAT_ADD_VENDOR_ID(87, 0)
-#define HV_set_spe_privileged_state_1_registers __BEAT_ADD_VENDOR_ID(204, 0)
-#define HV_get_status_of_ipspc_service __BEAT_ADD_VENDOR_ID(203, 0)
-#define HV_put_characters_to_console __BEAT_ADD_VENDOR_ID(0x101, 1)
-#define HV_get_characters_from_console __BEAT_ADD_VENDOR_ID(0x102, 1)
-#define HV_get_base_clock __BEAT_ADD_VENDOR_ID(0x111, 1)
-#define HV_set_base_clock __BEAT_ADD_VENDOR_ID(0x112, 1)
-#define HV_get_frame_cycle __BEAT_ADD_VENDOR_ID(0x114, 1)
-#define HV_disable_console __BEAT_ADD_VENDOR_ID(0x115, 1)
-#define HV_disable_all_console __BEAT_ADD_VENDOR_ID(0x116, 1)
-#define HV_oneshot_timer __BEAT_ADD_VENDOR_ID(0x117, 1)
-#define HV_set_dabr __BEAT_ADD_VENDOR_ID(0x118, 1)
-#define HV_get_dabr __BEAT_ADD_VENDOR_ID(0x119, 1)
-#define HV_start_hv_stats __BEAT_ADD_VENDOR_ID(0x21c, 1)
-#define HV_stop_hv_stats __BEAT_ADD_VENDOR_ID(0x21d, 1)
-#define HV_get_hv_stats __BEAT_ADD_VENDOR_ID(0x21e, 1)
-#define HV_get_hv_error_stats __BEAT_ADD_VENDOR_ID(0x221, 1)
-#define HV_get_stats __BEAT_ADD_VENDOR_ID(0x224, 1)
-#define HV_get_heap_stats __BEAT_ADD_VENDOR_ID(0x225, 1)
-#define HV_get_memory_stats __BEAT_ADD_VENDOR_ID(0x227, 1)
-#define HV_get_memory_detail __BEAT_ADD_VENDOR_ID(0x228, 1)
-#define HV_set_priority_of_irq_outlet __BEAT_ADD_VENDOR_ID(0x122, 1)
-#define HV_get_physical_spe_by_reservation_id __BEAT_ADD_VENDOR_ID(0x128, 1)
-#define HV_get_spe_context __BEAT_ADD_VENDOR_ID(0x129, 1)
-#define HV_set_spe_context __BEAT_ADD_VENDOR_ID(0x12a, 1)
-#define HV_downcount_of_interrupt __BEAT_ADD_VENDOR_ID(0x12e, 1)
-#define HV_peek_spe_context __BEAT_ADD_VENDOR_ID(0x12f, 1)
-#define HV_read_bpa_register __BEAT_ADD_VENDOR_ID(0x131, 1)
-#define HV_write_bpa_register __BEAT_ADD_VENDOR_ID(0x132, 1)
-#define HV_map_context_table_of_spe __BEAT_ADD_VENDOR_ID(0x137, 1)
-#define HV_get_slb_for_logical_spe __BEAT_ADD_VENDOR_ID(0x138, 1)
-#define HV_set_slb_for_logical_spe __BEAT_ADD_VENDOR_ID(0x139, 1)
-#define HV_init_pm __BEAT_ADD_VENDOR_ID(0x150, 1)
-#define HV_set_pm_signal __BEAT_ADD_VENDOR_ID(0x151, 1)
-#define HV_get_pm_signal __BEAT_ADD_VENDOR_ID(0x152, 1)
-#define HV_set_pm_config __BEAT_ADD_VENDOR_ID(0x153, 1)
-#define HV_get_pm_config __BEAT_ADD_VENDOR_ID(0x154, 1)
-#define HV_get_inner_trace_data __BEAT_ADD_VENDOR_ID(0x155, 1)
-#define HV_set_ext_trace_buffer __BEAT_ADD_VENDOR_ID(0x156, 1)
-#define HV_get_ext_trace_buffer __BEAT_ADD_VENDOR_ID(0x157, 1)
-#define HV_set_pm_interrupt __BEAT_ADD_VENDOR_ID(0x158, 1)
-#define HV_get_pm_interrupt __BEAT_ADD_VENDOR_ID(0x159, 1)
-#define HV_kick_pm __BEAT_ADD_VENDOR_ID(0x160, 1)
-#define HV_construct_pm_context __BEAT_ADD_VENDOR_ID(0x164, 1)
-#define HV_destruct_pm_context __BEAT_ADD_VENDOR_ID(0x165, 1)
-#define HV_be_slow __BEAT_ADD_VENDOR_ID(0x170, 1)
-#define HV_assign_ipspc_server_connection_status_notification_port 	\
-						__BEAT_ADD_VENDOR_ID(0x173, 1)
-#define HV_get_raid_of_physical_spe __BEAT_ADD_VENDOR_ID(0x174, 1)
-#define HV_set_physical_spe_to_rag __BEAT_ADD_VENDOR_ID(0x175, 1)
-#define HV_release_physical_spe_from_rag __BEAT_ADD_VENDOR_ID(0x176, 1)
-#define HV_rtc_read __BEAT_ADD_VENDOR_ID(0x190, 1)
-#define HV_rtc_write __BEAT_ADD_VENDOR_ID(0x191, 1)
-#define HV_eeprom_read __BEAT_ADD_VENDOR_ID(0x192, 1)
-#define HV_eeprom_write __BEAT_ADD_VENDOR_ID(0x193, 1)
-#define HV_insert_htab_entry3 __BEAT_ADD_VENDOR_ID(0x104, 1)
-#define HV_invalidate_htab_entry3 __BEAT_ADD_VENDOR_ID(0x105, 1)
-#define HV_update_htab_permission3 __BEAT_ADD_VENDOR_ID(0x106, 1)
-#define HV_clear_htab3 __BEAT_ADD_VENDOR_ID(0x107, 1)
-#endif
diff --git a/arch/powerpc/platforms/cell/beat_udbg.c b/arch/powerpc/platforms/cell/beat_udbg.c
deleted file mode 100644
index 350735bc8888..000000000000
--- a/arch/powerpc/platforms/cell/beat_udbg.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * udbg function for Beat
- *
- * (C) Copyright 2006 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/kernel.h>
-#include <linux/console.h>
-
-#include <asm/machdep.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-
-#include "beat.h"
-
-#define	celleb_vtermno	0
-
-static void udbg_putc_beat(char c)
-{
-	unsigned long rc;
-
-	if (c == '\n')
-		udbg_putc_beat('\r');
-
-	rc = beat_put_term_char(celleb_vtermno, 1, (uint64_t)c << 56, 0);
-}
-
-/* Buffered chars getc */
-static u64 inbuflen;
-static u64 inbuf[2];	/* must be 2 u64s */
-
-static int udbg_getc_poll_beat(void)
-{
-	/* The interface is tricky because it may return up to 16 chars.
-	 * We save them statically for future calls to udbg_getc().
-	 */
-	char ch, *buf = (char *)inbuf;
-	int i;
-	long rc;
-	if (inbuflen == 0) {
-		/* get some more chars. */
-		inbuflen = 0;
-		rc = beat_get_term_char(celleb_vtermno, &inbuflen,
-					inbuf+0, inbuf+1);
-		if (rc != 0)
-			inbuflen = 0;	/* otherwise inbuflen is garbage */
-	}
-	if (inbuflen <= 0 || inbuflen > 16) {
-		/* Catch error case as well as other oddities (corruption) */
-		inbuflen = 0;
-		return -1;
-	}
-	ch = buf[0];
-	for (i = 1; i < inbuflen; i++)	/* shuffle them down. */
-		buf[i-1] = buf[i];
-	inbuflen--;
-	return ch;
-}
-
-static int udbg_getc_beat(void)
-{
-	int ch;
-	for (;;) {
-		ch = udbg_getc_poll_beat();
-		if (ch == -1) {
-			/* This shouldn't be needed...but... */
-			volatile unsigned long delay;
-			for (delay = 0; delay < 2000000; delay++)
-				;
-		} else {
-			return ch;
-		}
-	}
-}
-
-/* call this from early_init() for a working debug console on
- * vterm capable LPAR machines
- */
-void __init udbg_init_debug_beat(void)
-{
-	udbg_putc = udbg_putc_beat;
-	udbg_getc = udbg_getc_beat;
-	udbg_getc_poll = udbg_getc_poll_beat;
-}
diff --git a/arch/powerpc/platforms/cell/beat_wrapper.h b/arch/powerpc/platforms/cell/beat_wrapper.h
deleted file mode 100644
index c1109969f242..000000000000
--- a/arch/powerpc/platforms/cell/beat_wrapper.h
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * Beat hypervisor call I/F
- *
- * (C) Copyright 2007 TOSHIBA CORPORATION
- *
- * This code is based on arch/powerpc/platforms/pseries/plpar_wrapper.h.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-#ifndef BEAT_HCALL
-#include <linux/string.h>
-#include "beat_syscall.h"
-
-/* defined in hvCall.S */
-extern s64 beat_hcall_norets(u64 opcode, ...);
-extern s64 beat_hcall_norets8(u64 opcode, u64 arg1, u64 arg2, u64 arg3,
-	u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8);
-extern s64 beat_hcall1(u64 opcode, u64 retbuf[1], ...);
-extern s64 beat_hcall2(u64 opcode, u64 retbuf[2], ...);
-extern s64 beat_hcall3(u64 opcode, u64 retbuf[3], ...);
-extern s64 beat_hcall4(u64 opcode, u64 retbuf[4], ...);
-extern s64 beat_hcall5(u64 opcode, u64 retbuf[5], ...);
-extern s64 beat_hcall6(u64 opcode, u64 retbuf[6], ...);
-
-static inline s64 beat_downcount_of_interrupt(u64 plug_id)
-{
-	return beat_hcall_norets(HV_downcount_of_interrupt, plug_id);
-}
-
-static inline s64 beat_set_interrupt_mask(u64 index,
-	u64 val0, u64 val1, u64 val2, u64 val3)
-{
-	return beat_hcall_norets(HV_set_interrupt_mask, index,
-	       val0, val1, val2, val3);
-}
-
-static inline s64 beat_destruct_irq_plug(u64 plug_id)
-{
-	return beat_hcall_norets(HV_destruct_irq_plug, plug_id);
-}
-
-static inline s64 beat_construct_and_connect_irq_plug(u64 plug_id,
-	u64 outlet_id)
-{
-	return beat_hcall_norets(HV_construct_and_connect_irq_plug, plug_id,
-	       outlet_id);
-}
-
-static inline s64 beat_detect_pending_interrupts(u64 index, u64 *retbuf)
-{
-	return beat_hcall4(HV_detect_pending_interrupts, retbuf, index);
-}
-
-static inline s64 beat_pause(u64 style)
-{
-	return beat_hcall_norets(HV_pause, style);
-}
-
-static inline s64 beat_read_htab_entries(u64 htab_id, u64 index, u64 *retbuf)
-{
-	return beat_hcall5(HV_read_htab_entries, retbuf, htab_id, index);
-}
-
-static inline s64 beat_insert_htab_entry(u64 htab_id, u64 group,
-	u64 bitmask, u64 hpte_v, u64 hpte_r, u64 *slot)
-{
-	u64 dummy[3];
-	s64 ret;
-
-	ret = beat_hcall3(HV_insert_htab_entry, dummy, htab_id, group,
-		bitmask, hpte_v, hpte_r);
-	*slot = dummy[0];
-	return ret;
-}
-
-static inline s64 beat_write_htab_entry(u64 htab_id, u64 slot,
-	u64 hpte_v, u64 hpte_r, u64 mask_v, u64 mask_r,
-	u64 *ret_v, u64 *ret_r)
-{
-	u64 dummy[2];
-	s64 ret;
-
-	ret = beat_hcall2(HV_write_htab_entry, dummy, htab_id, slot,
-		hpte_v, hpte_r, mask_v, mask_r);
-	*ret_v = dummy[0];
-	*ret_r = dummy[1];
-	return ret;
-}
-
-static inline s64 beat_insert_htab_entry3(u64 htab_id, u64 group,
-	u64 hpte_v, u64 hpte_r, u64 mask_v, u64 value_v, u64 *slot)
-{
-	u64 dummy[1];
-	s64 ret;
-
-	ret = beat_hcall1(HV_insert_htab_entry3, dummy, htab_id, group,
-		hpte_v, hpte_r, mask_v, value_v);
-	*slot = dummy[0];
-	return ret;
-}
-
-static inline s64 beat_invalidate_htab_entry3(u64 htab_id, u64 group,
-	u64 va, u64 pss)
-{
-	return beat_hcall_norets(HV_invalidate_htab_entry3,
-		htab_id, group, va, pss);
-}
-
-static inline s64 beat_update_htab_permission3(u64 htab_id, u64 group,
-	u64 va, u64 pss, u64 ptel_mask, u64 ptel_value)
-{
-	return beat_hcall_norets(HV_update_htab_permission3,
-		htab_id, group, va, pss, ptel_mask, ptel_value);
-}
-
-static inline s64 beat_clear_htab3(u64 htab_id)
-{
-	return beat_hcall_norets(HV_clear_htab3, htab_id);
-}
-
-static inline void beat_shutdown_logical_partition(u64 code)
-{
-	(void)beat_hcall_norets(HV_shutdown_logical_partition, code);
-}
-
-static inline s64 beat_rtc_write(u64 time_from_epoch)
-{
-	return beat_hcall_norets(HV_rtc_write, time_from_epoch);
-}
-
-static inline s64 beat_rtc_read(u64 *time_from_epoch)
-{
-	u64 dummy[1];
-	s64 ret;
-
-	ret = beat_hcall1(HV_rtc_read, dummy);
-	*time_from_epoch = dummy[0];
-	return ret;
-}
-
-#define	BEAT_NVRW_CNT	(sizeof(u64) * 6)
-
-static inline s64 beat_eeprom_write(u64 index, u64 length, u8 *buffer)
-{
-	u64	b[6];
-
-	if (length > BEAT_NVRW_CNT)
-		return -1;
-	memcpy(b, buffer, sizeof(b));
-	return beat_hcall_norets8(HV_eeprom_write, index, length,
-		b[0], b[1], b[2], b[3], b[4], b[5]);
-}
-
-static inline s64 beat_eeprom_read(u64 index, u64 length, u8 *buffer)
-{
-	u64	b[6];
-	s64	ret;
-
-	if (length > BEAT_NVRW_CNT)
-		return -1;
-	ret = beat_hcall6(HV_eeprom_read, b, index, length);
-	memcpy(buffer, b, length);
-	return ret;
-}
-
-static inline s64 beat_set_dabr(u64 value, u64 style)
-{
-	return beat_hcall_norets(HV_set_dabr, value, style);
-}
-
-static inline s64 beat_get_characters_from_console(u64 termno, u64 *len,
-	u8 *buffer)
-{
-	u64 dummy[3];
-	s64 ret;
-
-	ret = beat_hcall3(HV_get_characters_from_console, dummy, termno, len);
-	*len = dummy[0];
-	memcpy(buffer, dummy + 1, *len);
-	return ret;
-}
-
-static inline s64 beat_put_characters_to_console(u64 termno, u64 len,
-	u8 *buffer)
-{
-	u64 b[2];
-
-	memcpy(b, buffer, len);
-	return beat_hcall_norets(HV_put_characters_to_console, termno, len,
-		b[0], b[1]);
-}
-
-static inline s64 beat_get_spe_privileged_state_1_registers(
-		u64 id, u64 offsetof, u64 *value)
-{
-	u64 dummy[1];
-	s64 ret;
-
-	ret = beat_hcall1(HV_get_spe_privileged_state_1_registers, dummy, id,
-		offsetof);
-	*value = dummy[0];
-	return ret;
-}
-
-static inline s64 beat_set_irq_mask_for_spe(u64 id, u64 class, u64 mask)
-{
-	return beat_hcall_norets(HV_set_irq_mask_for_spe, id, class, mask);
-}
-
-static inline s64 beat_clear_interrupt_status_of_spe(u64 id, u64 class,
-	u64 mask)
-{
-	return beat_hcall_norets(HV_clear_interrupt_status_of_spe,
-		id, class, mask);
-}
-
-static inline s64 beat_set_spe_privileged_state_1_registers(
-		u64 id, u64 offsetof, u64 value)
-{
-	return beat_hcall_norets(HV_set_spe_privileged_state_1_registers,
-		id, offsetof, value);
-}
-
-static inline s64 beat_get_interrupt_status_of_spe(u64 id, u64 class, u64 *val)
-{
-	u64 dummy[1];
-	s64 ret;
-
-	ret = beat_hcall1(HV_get_interrupt_status_of_spe, dummy, id, class);
-	*val = dummy[0];
-	return ret;
-}
-
-static inline s64 beat_put_iopte(u64 ioas_id, u64 io_addr, u64 real_addr,
-	u64 ioid, u64 flags)
-{
-	return beat_hcall_norets(HV_put_iopte, ioas_id, io_addr, real_addr,
-		ioid, flags);
-}
-
-static inline s64 beat_construct_event_receive_port(u64 *port)
-{
-	u64 dummy[1];
-	s64 ret;
-
-	ret = beat_hcall1(HV_construct_event_receive_port, dummy);
-	*port = dummy[0];
-	return ret;
-}
-
-static inline s64 beat_destruct_event_receive_port(u64 port)
-{
-	s64 ret;
-
-	ret = beat_hcall_norets(HV_destruct_event_receive_port, port);
-	return ret;
-}
-
-static inline s64 beat_create_repository_node(u64 path[4], u64 data[2])
-{
-	s64 ret;
-
-	ret = beat_hcall_norets(HV_create_repository_node2,
-		path[0], path[1], path[2], path[3], data[0], data[1]);
-	return ret;
-}
-
-static inline s64 beat_get_repository_node_value(u64 lpid, u64 path[4],
-	u64 data[2])
-{
-	s64 ret;
-
-	ret = beat_hcall2(HV_get_repository_node_value2, data,
-		lpid, path[0], path[1], path[2], path[3]);
-	return ret;
-}
-
-#endif
diff --git a/arch/powerpc/platforms/cell/cell.h b/arch/powerpc/platforms/cell/cell.h
new file mode 100644
index 000000000000..ef143dfee068
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cell.h
@@ -0,0 +1,24 @@
+/*
+ * Cell Platform common data structures
+ *
+ * Copyright 2015, Daniel Axtens, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef CELL_H
+#define CELL_H
+
+#include <asm/pci-bridge.h>
+
+extern struct pci_controller_ops cell_pci_controller_ops;
+
+#endif
diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c
deleted file mode 100644
index 3ce70ded2d6a..000000000000
--- a/arch/powerpc/platforms/cell/celleb_pci.c
+++ /dev/null
@@ -1,500 +0,0 @@
-/*
- * Support for PCI on Celleb platform.
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This code is based on arch/powerpc/kernel/rtas_pci.c:
- *  Copyright (C) 2001 Dave Engebretsen, IBM Corporation
- *  Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#undef DEBUG
-
-#include <linux/kernel.h>
-#include <linux/threads.h>
-#include <linux/pci.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/memblock.h>
-#include <linux/pci_regs.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/slab.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-#include <asm/ppc-pci.h>
-
-#include "celleb_pci.h"
-
-#define MAX_PCI_DEVICES    32
-#define MAX_PCI_FUNCTIONS   8
-#define MAX_PCI_BASE_ADDRS  3 /* use 64 bit address */
-
-/* definition for fake pci configuration area for GbE, .... ,and etc. */
-
-struct celleb_pci_resource {
-	struct resource r[MAX_PCI_BASE_ADDRS];
-};
-
-struct celleb_pci_private {
-	unsigned char *fake_config[MAX_PCI_DEVICES][MAX_PCI_FUNCTIONS];
-	struct celleb_pci_resource *res[MAX_PCI_DEVICES][MAX_PCI_FUNCTIONS];
-};
-
-static inline u8 celleb_fake_config_readb(void *addr)
-{
-	u8 *p = addr;
-	return *p;
-}
-
-static inline u16 celleb_fake_config_readw(void *addr)
-{
-	__le16 *p = addr;
-	return le16_to_cpu(*p);
-}
-
-static inline u32 celleb_fake_config_readl(void *addr)
-{
-	__le32 *p = addr;
-	return le32_to_cpu(*p);
-}
-
-static inline void celleb_fake_config_writeb(u32 val, void *addr)
-{
-	u8 *p = addr;
-	*p = val;
-}
-
-static inline void celleb_fake_config_writew(u32 val, void *addr)
-{
-	__le16 val16;
-	__le16 *p = addr;
-	val16 = cpu_to_le16(val);
-	*p = val16;
-}
-
-static inline void celleb_fake_config_writel(u32 val, void *addr)
-{
-	__le32 val32;
-	__le32 *p = addr;
-	val32 = cpu_to_le32(val);
-	*p = val32;
-}
-
-static unsigned char *get_fake_config_start(struct pci_controller *hose,
-					    int devno, int fn)
-{
-	struct celleb_pci_private *private = hose->private_data;
-
-	if (private == NULL)
-		return NULL;
-
-	return private->fake_config[devno][fn];
-}
-
-static struct celleb_pci_resource *get_resource_start(
-				struct pci_controller *hose,
-				int devno, int fn)
-{
-	struct celleb_pci_private *private = hose->private_data;
-
-	if (private == NULL)
-		return NULL;
-
-	return private->res[devno][fn];
-}
-
-
-static void celleb_config_read_fake(unsigned char *config, int where,
-				    int size, u32 *val)
-{
-	char *p = config + where;
-
-	switch (size) {
-	case 1:
-		*val = celleb_fake_config_readb(p);
-		break;
-	case 2:
-		*val = celleb_fake_config_readw(p);
-		break;
-	case 4:
-		*val = celleb_fake_config_readl(p);
-		break;
-	}
-}
-
-static void celleb_config_write_fake(unsigned char *config, int where,
-				     int size, u32 val)
-{
-	char *p = config + where;
-
-	switch (size) {
-	case 1:
-		celleb_fake_config_writeb(val, p);
-		break;
-	case 2:
-		celleb_fake_config_writew(val, p);
-		break;
-	case 4:
-		celleb_fake_config_writel(val, p);
-		break;
-	}
-}
-
-static int celleb_fake_pci_read_config(struct pci_bus *bus,
-		unsigned int devfn, int where, int size, u32 *val)
-{
-	char *config;
-	struct pci_controller *hose = pci_bus_to_host(bus);
-	unsigned int devno = devfn >> 3;
-	unsigned int fn = devfn & 0x7;
-
-	/* allignment check */
-	BUG_ON(where % size);
-
-	pr_debug("    fake read: bus=0x%x, ", bus->number);
-	config = get_fake_config_start(hose, devno, fn);
-
-	pr_debug("devno=0x%x, where=0x%x, size=0x%x, ", devno, where, size);
-	if (!config) {
-		pr_debug("failed\n");
-		return PCIBIOS_DEVICE_NOT_FOUND;
-	}
-
-	celleb_config_read_fake(config, where, size, val);
-	pr_debug("val=0x%x\n", *val);
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-
-static int celleb_fake_pci_write_config(struct pci_bus *bus,
-		unsigned int devfn, int where, int size, u32 val)
-{
-	char *config;
-	struct pci_controller *hose = pci_bus_to_host(bus);
-	struct celleb_pci_resource *res;
-	unsigned int devno = devfn >> 3;
-	unsigned int fn = devfn & 0x7;
-
-	/* allignment check */
-	BUG_ON(where % size);
-
-	config = get_fake_config_start(hose, devno, fn);
-
-	if (!config)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
-	if (val == ~0) {
-		int i = (where - PCI_BASE_ADDRESS_0) >> 3;
-
-		switch (where) {
-		case PCI_BASE_ADDRESS_0:
-		case PCI_BASE_ADDRESS_2:
-			if (size != 4)
-				return PCIBIOS_DEVICE_NOT_FOUND;
-			res = get_resource_start(hose, devno, fn);
-			if (!res)
-				return PCIBIOS_DEVICE_NOT_FOUND;
-			celleb_config_write_fake(config, where, size,
-					(res->r[i].end - res->r[i].start));
-			return PCIBIOS_SUCCESSFUL;
-		case PCI_BASE_ADDRESS_1:
-		case PCI_BASE_ADDRESS_3:
-		case PCI_BASE_ADDRESS_4:
-		case PCI_BASE_ADDRESS_5:
-			break;
-		default:
-			break;
-		}
-	}
-
-	celleb_config_write_fake(config, where, size, val);
-	pr_debug("    fake write: where=%x, size=%d, val=%x\n",
-		 where, size, val);
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static struct pci_ops celleb_fake_pci_ops = {
-	.read = celleb_fake_pci_read_config,
-	.write = celleb_fake_pci_write_config,
-};
-
-static inline void celleb_setup_pci_base_addrs(struct pci_controller *hose,
-					unsigned int devno, unsigned int fn,
-					unsigned int num_base_addr)
-{
-	u32 val;
-	unsigned char *config;
-	struct celleb_pci_resource *res;
-
-	config = get_fake_config_start(hose, devno, fn);
-	res = get_resource_start(hose, devno, fn);
-
-	if (!config || !res)
-		return;
-
-	switch (num_base_addr) {
-	case 3:
-		val = (res->r[2].start & 0xfffffff0)
-		    | PCI_BASE_ADDRESS_MEM_TYPE_64;
-		celleb_config_write_fake(config, PCI_BASE_ADDRESS_4, 4, val);
-		val = res->r[2].start >> 32;
-		celleb_config_write_fake(config, PCI_BASE_ADDRESS_5, 4, val);
-		/* FALLTHROUGH */
-	case 2:
-		val = (res->r[1].start & 0xfffffff0)
-		    | PCI_BASE_ADDRESS_MEM_TYPE_64;
-		celleb_config_write_fake(config, PCI_BASE_ADDRESS_2, 4, val);
-		val = res->r[1].start >> 32;
-		celleb_config_write_fake(config, PCI_BASE_ADDRESS_3, 4, val);
-		/* FALLTHROUGH */
-	case 1:
-		val = (res->r[0].start & 0xfffffff0)
-		    | PCI_BASE_ADDRESS_MEM_TYPE_64;
-		celleb_config_write_fake(config, PCI_BASE_ADDRESS_0, 4, val);
-		val = res->r[0].start >> 32;
-		celleb_config_write_fake(config, PCI_BASE_ADDRESS_1, 4, val);
-		break;
-	}
-
-	val = PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
-	celleb_config_write_fake(config, PCI_COMMAND, 2, val);
-}
-
-static int __init celleb_setup_fake_pci_device(struct device_node *node,
-					       struct pci_controller *hose)
-{
-	unsigned int rlen;
-	int num_base_addr = 0;
-	u32 val;
-	const u32 *wi0, *wi1, *wi2, *wi3, *wi4;
-	unsigned int devno, fn;
-	struct celleb_pci_private *private = hose->private_data;
-	unsigned char **config = NULL;
-	struct celleb_pci_resource **res = NULL;
-	const char *name;
-	const unsigned long *li;
-	int size, result;
-
-	if (private == NULL) {
-		printk(KERN_ERR "PCI: "
-		       "memory space for pci controller is not assigned\n");
-		goto error;
-	}
-
-	name = of_get_property(node, "model", &rlen);
-	if (!name) {
-		printk(KERN_ERR "PCI: model property not found.\n");
-		goto error;
-	}
-
-	wi4 = of_get_property(node, "reg", &rlen);
-	if (wi4 == NULL)
-		goto error;
-
-	devno = ((wi4[0] >> 8) & 0xff) >> 3;
-	fn = (wi4[0] >> 8) & 0x7;
-
-	pr_debug("PCI: celleb_setup_fake_pci() %s devno=%x fn=%x\n", name,
-		 devno, fn);
-
-	size = 256;
-	config = &private->fake_config[devno][fn];
-	*config = zalloc_maybe_bootmem(size, GFP_KERNEL);
-	if (*config == NULL) {
-		printk(KERN_ERR "PCI: "
-		       "not enough memory for fake configuration space\n");
-		goto error;
-	}
-	pr_debug("PCI: fake config area assigned 0x%016lx\n",
-		 (unsigned long)*config);
-
-	size = sizeof(struct celleb_pci_resource);
-	res = &private->res[devno][fn];
-	*res = zalloc_maybe_bootmem(size, GFP_KERNEL);
-	if (*res == NULL) {
-		printk(KERN_ERR
-		       "PCI: not enough memory for resource data space\n");
-		goto error;
-	}
-	pr_debug("PCI: res assigned 0x%016lx\n", (unsigned long)*res);
-
-	wi0 = of_get_property(node, "device-id", NULL);
-	wi1 = of_get_property(node, "vendor-id", NULL);
-	wi2 = of_get_property(node, "class-code", NULL);
-	wi3 = of_get_property(node, "revision-id", NULL);
-	if (!wi0 || !wi1 || !wi2 || !wi3) {
-		printk(KERN_ERR "PCI: Missing device tree properties.\n");
-		goto error;
-	}
-
-	celleb_config_write_fake(*config, PCI_DEVICE_ID, 2, wi0[0] & 0xffff);
-	celleb_config_write_fake(*config, PCI_VENDOR_ID, 2, wi1[0] & 0xffff);
-	pr_debug("class-code = 0x%08x\n", wi2[0]);
-
-	celleb_config_write_fake(*config, PCI_CLASS_PROG, 1, wi2[0] & 0xff);
-	celleb_config_write_fake(*config, PCI_CLASS_DEVICE, 2,
-				 (wi2[0] >> 8) & 0xffff);
-	celleb_config_write_fake(*config, PCI_REVISION_ID, 1, wi3[0]);
-
-	while (num_base_addr < MAX_PCI_BASE_ADDRS) {
-		result = of_address_to_resource(node,
-				num_base_addr, &(*res)->r[num_base_addr]);
-		if (result)
-			break;
-		num_base_addr++;
-	}
-
-	celleb_setup_pci_base_addrs(hose, devno, fn, num_base_addr);
-
-	li = of_get_property(node, "interrupts", &rlen);
-	if (!li) {
-		printk(KERN_ERR "PCI: interrupts not found.\n");
-		goto error;
-	}
-	val = li[0];
-	celleb_config_write_fake(*config, PCI_INTERRUPT_PIN, 1, 1);
-	celleb_config_write_fake(*config, PCI_INTERRUPT_LINE, 1, val);
-
-#ifdef DEBUG
-	pr_debug("PCI: %s irq=%ld\n", name, li[0]);
-	for (i = 0; i < 6; i++) {
-		celleb_config_read_fake(*config,
-					PCI_BASE_ADDRESS_0 + 0x4 * i, 4,
-					&val);
-		pr_debug("PCI: %s fn=%d base_address_%d=0x%x\n",
-			 name, fn, i, val);
-	}
-#endif
-
-	celleb_config_write_fake(*config, PCI_HEADER_TYPE, 1,
-				 PCI_HEADER_TYPE_NORMAL);
-
-	return 0;
-
-error:
-	if (mem_init_done) {
-		if (config && *config)
-			kfree(*config);
-		if (res && *res)
-			kfree(*res);
-
-	} else {
-		if (config && *config) {
-			size = 256;
-			memblock_free(__pa(*config), size);
-		}
-		if (res && *res) {
-			size = sizeof(struct celleb_pci_resource);
-			memblock_free(__pa(*res), size);
-		}
-	}
-
-	return 1;
-}
-
-static int __init phb_set_bus_ranges(struct device_node *dev,
-				     struct pci_controller *phb)
-{
-	const int *bus_range;
-	unsigned int len;
-
-	bus_range = of_get_property(dev, "bus-range", &len);
-	if (bus_range == NULL || len < 2 * sizeof(int))
-		return 1;
-
-	phb->first_busno = bus_range[0];
-	phb->last_busno = bus_range[1];
-
-	return 0;
-}
-
-static void __init celleb_alloc_private_mem(struct pci_controller *hose)
-{
-	hose->private_data =
-		zalloc_maybe_bootmem(sizeof(struct celleb_pci_private),
-			GFP_KERNEL);
-}
-
-static int __init celleb_setup_fake_pci(struct device_node *dev,
-					struct pci_controller *phb)
-{
-	struct device_node *node;
-
-	phb->ops = &celleb_fake_pci_ops;
-	celleb_alloc_private_mem(phb);
-
-	for (node = of_get_next_child(dev, NULL);
-	     node != NULL; node = of_get_next_child(dev, node))
-		celleb_setup_fake_pci_device(node, phb);
-
-	return 0;
-}
-
-static struct celleb_phb_spec celleb_fake_pci_spec __initdata = {
-	.setup = celleb_setup_fake_pci,
-};
-
-static const struct of_device_id celleb_phb_match[] __initconst = {
-	{
-		.name = "pci-pseudo",
-		.data = &celleb_fake_pci_spec,
-	}, {
-		.name = "epci",
-		.data = &celleb_epci_spec,
-	}, {
-		.name = "pcie",
-		.data = &celleb_pciex_spec,
-	}, {
-	},
-};
-
-int __init celleb_setup_phb(struct pci_controller *phb)
-{
-	struct device_node *dev = phb->dn;
-	const struct of_device_id *match;
-	const struct celleb_phb_spec *phb_spec;
-	int rc;
-
-	match = of_match_node(celleb_phb_match, dev);
-	if (!match)
-		return 1;
-
-	phb_set_bus_ranges(dev, phb);
-	phb->buid = 1;
-
-	phb_spec = match->data;
-	rc = (*phb_spec->setup)(dev, phb);
-	if (rc)
-		return 1;
-
-	if (phb_spec->ops)
-		iowa_register_bus(phb, phb_spec->ops,
-				  phb_spec->iowa_init,
-				  phb_spec->iowa_data);
-	return 0;
-}
-
-int celleb_pci_probe_mode(struct pci_bus *bus)
-{
-	return PCI_PROBE_DEVTREE;
-}
diff --git a/arch/powerpc/platforms/cell/celleb_pci.h b/arch/powerpc/platforms/cell/celleb_pci.h
deleted file mode 100644
index a801fcc5f389..000000000000
--- a/arch/powerpc/platforms/cell/celleb_pci.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * pci prototypes for Celleb platform
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef _CELLEB_PCI_H
-#define _CELLEB_PCI_H
-
-#include <linux/pci.h>
-
-#include <asm/pci-bridge.h>
-#include <asm/prom.h>
-#include <asm/ppc-pci.h>
-#include <asm/io-workarounds.h>
-
-struct iowa_bus;
-
-struct celleb_phb_spec {
-	int (*setup)(struct device_node *, struct pci_controller *);
-	struct ppc_pci_io *ops;
-	int (*iowa_init)(struct iowa_bus *, void *);
-	void *iowa_data;
-};
-
-extern int celleb_setup_phb(struct pci_controller *);
-extern int celleb_pci_probe_mode(struct pci_bus *);
-
-extern struct celleb_phb_spec celleb_epci_spec;
-extern struct celleb_phb_spec celleb_pciex_spec;
-
-#endif /* _CELLEB_PCI_H */
diff --git a/arch/powerpc/platforms/cell/celleb_scc.h b/arch/powerpc/platforms/cell/celleb_scc.h
deleted file mode 100644
index b596a711c348..000000000000
--- a/arch/powerpc/platforms/cell/celleb_scc.h
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * SCC (Super Companion Chip) definitions
- *
- * (C) Copyright 2004-2006 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef _CELLEB_SCC_H
-#define _CELLEB_SCC_H
-
-#define PCI_VENDOR_ID_TOSHIBA_2                 0x102f
-#define PCI_DEVICE_ID_TOSHIBA_SCC_PCIEXC_BRIDGE 0x01b0
-#define PCI_DEVICE_ID_TOSHIBA_SCC_EPCI_BRIDGE   0x01b1
-#define PCI_DEVICE_ID_TOSHIBA_SCC_BRIDGE        0x01b2
-#define PCI_DEVICE_ID_TOSHIBA_SCC_GBE           0x01b3
-#define PCI_DEVICE_ID_TOSHIBA_SCC_ATA           0x01b4
-#define PCI_DEVICE_ID_TOSHIBA_SCC_USB2          0x01b5
-#define PCI_DEVICE_ID_TOSHIBA_SCC_USB           0x01b6
-#define PCI_DEVICE_ID_TOSHIBA_SCC_ENCDEC        0x01b7
-
-#define SCC_EPCI_REG            0x0000d000
-
-/* EPCI registers */
-#define SCC_EPCI_CNF10_REG      0x010
-#define SCC_EPCI_CNF14_REG      0x014
-#define SCC_EPCI_CNF18_REG      0x018
-#define SCC_EPCI_PVBAT          0x100
-#define SCC_EPCI_VPMBAT         0x104
-#define SCC_EPCI_VPIBAT         0x108
-#define SCC_EPCI_VCSR           0x110
-#define SCC_EPCI_VIENAB         0x114
-#define SCC_EPCI_VISTAT         0x118
-#define SCC_EPCI_VRDCOUNT       0x124
-#define SCC_EPCI_BAM0           0x12c
-#define SCC_EPCI_BAM1           0x134
-#define SCC_EPCI_BAM2           0x13c
-#define SCC_EPCI_IADR           0x164
-#define SCC_EPCI_CLKRST         0x800
-#define SCC_EPCI_INTSET         0x804
-#define SCC_EPCI_STATUS         0x808
-#define SCC_EPCI_ABTSET         0x80c
-#define SCC_EPCI_WATRP          0x810
-#define SCC_EPCI_DUMYRADR       0x814
-#define SCC_EPCI_SWRESP         0x818
-#define SCC_EPCI_CNTOPT         0x81c
-#define SCC_EPCI_ECMODE         0xf00
-#define SCC_EPCI_IOM_AC_NUM     5
-#define SCC_EPCI_IOM_ACTE(n)    (0xf10 + (n) * 4)
-#define SCC_EPCI_IOT_AC_NUM     4
-#define SCC_EPCI_IOT_ACTE(n)    (0xf30 + (n) * 4)
-#define SCC_EPCI_MAEA           0xf50
-#define SCC_EPCI_MAEC           0xf54
-#define SCC_EPCI_CKCTRL         0xff0
-
-/* bits for SCC_EPCI_VCSR */
-#define SCC_EPCI_VCSR_FRE       0x00020000
-#define SCC_EPCI_VCSR_FWE       0x00010000
-#define SCC_EPCI_VCSR_DR        0x00000400
-#define SCC_EPCI_VCSR_SR        0x00000008
-#define SCC_EPCI_VCSR_AT        0x00000004
-
-/* bits for SCC_EPCI_VIENAB/SCC_EPCI_VISTAT */
-#define SCC_EPCI_VISTAT_PMPE    0x00000008
-#define SCC_EPCI_VISTAT_PMFE    0x00000004
-#define SCC_EPCI_VISTAT_PRA     0x00000002
-#define SCC_EPCI_VISTAT_PRD     0x00000001
-#define SCC_EPCI_VISTAT_ALL     0x0000000f
-
-#define SCC_EPCI_VIENAB_PMPEE   0x00000008
-#define SCC_EPCI_VIENAB_PMFEE   0x00000004
-#define SCC_EPCI_VIENAB_PRA     0x00000002
-#define SCC_EPCI_VIENAB_PRD     0x00000001
-#define SCC_EPCI_VIENAB_ALL     0x0000000f
-
-/* bits for SCC_EPCI_CLKRST */
-#define SCC_EPCI_CLKRST_CKS_MASK 0x00030000
-#define SCC_EPCI_CLKRST_CKS_2   0x00000000
-#define SCC_EPCI_CLKRST_CKS_4   0x00010000
-#define SCC_EPCI_CLKRST_CKS_8   0x00020000
-#define SCC_EPCI_CLKRST_PCICRST 0x00000400
-#define SCC_EPCI_CLKRST_BC      0x00000200
-#define SCC_EPCI_CLKRST_PCIRST  0x00000100
-#define SCC_EPCI_CLKRST_PCKEN   0x00000001
-
-/* bits for SCC_EPCI_INTSET/SCC_EPCI_STATUS */
-#define SCC_EPCI_INT_2M         0x01000000
-#define SCC_EPCI_INT_RERR       0x00200000
-#define SCC_EPCI_INT_SERR       0x00100000
-#define SCC_EPCI_INT_PRTER      0x00080000
-#define SCC_EPCI_INT_SER        0x00040000
-#define SCC_EPCI_INT_PER        0x00020000
-#define SCC_EPCI_INT_PAI        0x00010000
-#define SCC_EPCI_INT_1M         0x00000100
-#define SCC_EPCI_INT_PME        0x00000010
-#define SCC_EPCI_INT_INTD       0x00000008
-#define SCC_EPCI_INT_INTC       0x00000004
-#define SCC_EPCI_INT_INTB       0x00000002
-#define SCC_EPCI_INT_INTA       0x00000001
-#define SCC_EPCI_INT_DEVINT     0x0000000f
-#define SCC_EPCI_INT_ALL        0x003f001f
-#define SCC_EPCI_INT_ALLERR     0x003f0000
-
-/* bits for SCC_EPCI_CKCTRL */
-#define SCC_EPCI_CKCTRL_CRST0   0x00010000
-#define SCC_EPCI_CKCTRL_CRST1   0x00020000
-#define SCC_EPCI_CKCTRL_OCLKEN  0x00000100
-#define SCC_EPCI_CKCTRL_LCLKEN  0x00000001
-
-#define SCC_EPCI_IDSEL_AD_TO_SLOT(ad)       ((ad) - 10)
-#define SCC_EPCI_MAX_DEVNU      SCC_EPCI_IDSEL_AD_TO_SLOT(32)
-
-/* bits for SCC_EPCI_CNTOPT */
-#define SCC_EPCI_CNTOPT_O2PMB   0x00000002
-
-/* SCC PCIEXC SMMIO registers */
-#define PEXCADRS		0x000
-#define PEXCWDATA		0x004
-#define PEXCRDATA		0x008
-#define PEXDADRS		0x010
-#define PEXDCMND		0x014
-#define PEXDWDATA		0x018
-#define PEXDRDATA		0x01c
-#define PEXREQID		0x020
-#define PEXTIDMAP		0x024
-#define PEXINTMASK		0x028
-#define PEXINTSTS		0x02c
-#define PEXAERRMASK		0x030
-#define PEXAERRSTS		0x034
-#define PEXPRERRMASK		0x040
-#define PEXPRERRSTS		0x044
-#define PEXPRERRID01		0x048
-#define PEXPRERRID23		0x04c
-#define PEXVDMASK		0x050
-#define PEXVDSTS		0x054
-#define PEXRCVCPLIDA		0x060
-#define PEXLENERRIDA		0x068
-#define PEXPHYPLLST		0x070
-#define PEXDMRDEN0		0x100
-#define PEXDMRDADR0		0x104
-#define PEXDMRDENX		0x110
-#define PEXDMRDADRX		0x114
-#define PEXECMODE		0xf00
-#define PEXMAEA(n)		(0xf50 + (8 * n))
-#define PEXMAEC(n)		(0xf54 + (8 * n))
-#define PEXCCRCTRL		0xff0
-
-/* SCC PCIEXC bits and shifts for PEXCADRS */
-#define PEXCADRS_BYTE_EN_SHIFT		20
-#define PEXCADRS_CMD_SHIFT		16
-#define PEXCADRS_CMD_READ		(0xa << PEXCADRS_CMD_SHIFT)
-#define PEXCADRS_CMD_WRITE		(0xb << PEXCADRS_CMD_SHIFT)
-
-/* SCC PCIEXC shifts for PEXDADRS */
-#define PEXDADRS_BUSNO_SHIFT		20
-#define PEXDADRS_DEVNO_SHIFT		15
-#define PEXDADRS_FUNCNO_SHIFT		12
-
-/* SCC PCIEXC bits and shifts for PEXDCMND */
-#define PEXDCMND_BYTE_EN_SHIFT		4
-#define PEXDCMND_IO_READ		0x2
-#define PEXDCMND_IO_WRITE		0x3
-#define PEXDCMND_CONFIG_READ		0xa
-#define PEXDCMND_CONFIG_WRITE		0xb
-
-/* SCC PCIEXC bits for PEXPHYPLLST */
-#define PEXPHYPLLST_PEXPHYAPLLST	0x00000001
-
-/* SCC PCIEXC bits for PEXECMODE */
-#define PEXECMODE_ALL_THROUGH		0x00000000
-#define PEXECMODE_ALL_8BIT		0x00550155
-#define PEXECMODE_ALL_16BIT		0x00aa02aa
-
-/* SCC PCIEXC bits for PEXCCRCTRL */
-#define PEXCCRCTRL_PEXIPCOREEN		0x00040000
-#define PEXCCRCTRL_PEXIPCONTEN		0x00020000
-#define PEXCCRCTRL_PEXPHYPLLEN		0x00010000
-#define PEXCCRCTRL_PCIEXCAOCKEN		0x00000100
-
-/* SCC PCIEXC port configuration registers */
-#define PEXTCERRCHK		0x21c
-#define PEXTAMAPB0		0x220
-#define PEXTAMAPL0		0x224
-#define PEXTAMAPB(n)		(PEXTAMAPB0 + 8 * (n))
-#define PEXTAMAPL(n)		(PEXTAMAPL0 + 8 * (n))
-#define PEXCHVC0P		0x500
-#define PEXCHVC0NP		0x504
-#define PEXCHVC0C		0x508
-#define PEXCDVC0P		0x50c
-#define PEXCDVC0NP		0x510
-#define PEXCDVC0C		0x514
-#define PEXCHVCXP		0x518
-#define PEXCHVCXNP		0x51c
-#define PEXCHVCXC		0x520
-#define PEXCDVCXP		0x524
-#define PEXCDVCXNP		0x528
-#define PEXCDVCXC		0x52c
-#define PEXCTTRG		0x530
-#define PEXTSCTRL		0x700
-#define PEXTSSTS		0x704
-#define PEXSKPCTRL		0x708
-
-/* UHC registers */
-#define SCC_UHC_CKRCTRL         0xff0
-#define SCC_UHC_ECMODE          0xf00
-
-/* bits for SCC_UHC_CKRCTRL */
-#define SCC_UHC_F48MCKLEN       0x00000001
-#define SCC_UHC_P_SUSPEND       0x00000002
-#define SCC_UHC_PHY_SUSPEND_SEL 0x00000004
-#define SCC_UHC_HCLKEN          0x00000100
-#define SCC_UHC_USBEN           0x00010000
-#define SCC_UHC_USBCEN          0x00020000
-#define SCC_UHC_PHYEN           0x00040000
-
-/* bits for SCC_UHC_ECMODE */
-#define SCC_UHC_ECMODE_BY_BYTE  0x00000555
-#define SCC_UHC_ECMODE_BY_WORD  0x00000aaa
-
-#endif /* _CELLEB_SCC_H */
diff --git a/arch/powerpc/platforms/cell/celleb_scc_epci.c b/arch/powerpc/platforms/cell/celleb_scc_epci.c
deleted file mode 100644
index 9438bbed402f..000000000000
--- a/arch/powerpc/platforms/cell/celleb_scc_epci.c
+++ /dev/null
@@ -1,428 +0,0 @@
-/*
- * Support for SCC external PCI
- *
- * (C) Copyright 2004-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#undef DEBUG
-
-#include <linux/kernel.h>
-#include <linux/threads.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/pci_regs.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-#include <asm/ppc-pci.h>
-
-#include "celleb_scc.h"
-#include "celleb_pci.h"
-
-#define MAX_PCI_DEVICES   32
-#define MAX_PCI_FUNCTIONS  8
-
-#define iob()  __asm__ __volatile__("eieio; sync":::"memory")
-
-static inline PCI_IO_ADDR celleb_epci_get_epci_base(
-					struct pci_controller *hose)
-{
-	/*
-	 * Note:
-	 * Celleb epci uses cfg_addr as a base address for
-	 * epci control registers.
-	 */
-
-	return hose->cfg_addr;
-}
-
-static inline PCI_IO_ADDR celleb_epci_get_epci_cfg(
-					struct pci_controller *hose)
-{
-	/*
-	 * Note:
-	 * Celleb epci uses cfg_data as a base address for
-	 * configuration area for epci devices.
-	 */
-
-	return hose->cfg_data;
-}
-
-static inline void clear_and_disable_master_abort_interrupt(
-					struct pci_controller *hose)
-{
-	PCI_IO_ADDR epci_base;
-	PCI_IO_ADDR reg;
-	epci_base = celleb_epci_get_epci_base(hose);
-	reg = epci_base + PCI_COMMAND;
-	out_be32(reg, in_be32(reg) | (PCI_STATUS_REC_MASTER_ABORT << 16));
-}
-
-static int celleb_epci_check_abort(struct pci_controller *hose,
-				   PCI_IO_ADDR addr)
-{
-	PCI_IO_ADDR reg;
-	PCI_IO_ADDR epci_base;
-	u32 val;
-
-	iob();
-	epci_base = celleb_epci_get_epci_base(hose);
-
-	reg = epci_base + PCI_COMMAND;
-	val = in_be32(reg);
-
-	if (val & (PCI_STATUS_REC_MASTER_ABORT << 16)) {
-		out_be32(reg,
-			 (val & 0xffff) | (PCI_STATUS_REC_MASTER_ABORT << 16));
-
-		/* clear PCI Controller error, FRE, PMFE */
-		reg = epci_base + SCC_EPCI_STATUS;
-		out_be32(reg, SCC_EPCI_INT_PAI);
-
-		reg = epci_base + SCC_EPCI_VCSR;
-		val = in_be32(reg) & 0xffff;
-		val |= SCC_EPCI_VCSR_FRE;
-		out_be32(reg, val);
-
-		reg = epci_base + SCC_EPCI_VISTAT;
-		out_be32(reg, SCC_EPCI_VISTAT_PMFE);
-		return PCIBIOS_DEVICE_NOT_FOUND;
-	}
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static PCI_IO_ADDR celleb_epci_make_config_addr(struct pci_bus *bus,
-		struct pci_controller *hose, unsigned int devfn, int where)
-{
-	PCI_IO_ADDR addr;
-
-	if (bus != hose->bus)
-		addr = celleb_epci_get_epci_cfg(hose) +
-		       (((bus->number & 0xff) << 16)
-			| ((devfn & 0xff) << 8)
-			| (where & 0xff)
-			| 0x01000000);
-	else
-		addr = celleb_epci_get_epci_cfg(hose) +
-		       (((devfn & 0xff) << 8) | (where & 0xff));
-
-	pr_debug("EPCI: config_addr = 0x%p\n", addr);
-
-	return addr;
-}
-
-static int celleb_epci_read_config(struct pci_bus *bus,
-			unsigned int devfn, int where, int size, u32 *val)
-{
-	PCI_IO_ADDR epci_base;
-	PCI_IO_ADDR addr;
-	struct pci_controller *hose = pci_bus_to_host(bus);
-
-	/* allignment check */
-	BUG_ON(where % size);
-
-	if (!celleb_epci_get_epci_cfg(hose))
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
-	if (bus->number == hose->first_busno && devfn == 0) {
-		/* EPCI controller self */
-
-		epci_base = celleb_epci_get_epci_base(hose);
-		addr = epci_base + where;
-
-		switch (size) {
-		case 1:
-			*val = in_8(addr);
-			break;
-		case 2:
-			*val = in_be16(addr);
-			break;
-		case 4:
-			*val = in_be32(addr);
-			break;
-		default:
-			return PCIBIOS_DEVICE_NOT_FOUND;
-		}
-
-	} else {
-
-		clear_and_disable_master_abort_interrupt(hose);
-		addr = celleb_epci_make_config_addr(bus, hose, devfn, where);
-
-		switch (size) {
-		case 1:
-			*val = in_8(addr);
-			break;
-		case 2:
-			*val = in_le16(addr);
-			break;
-		case 4:
-			*val = in_le32(addr);
-			break;
-		default:
-			return PCIBIOS_DEVICE_NOT_FOUND;
-		}
-	}
-
-	pr_debug("EPCI: "
-		 "addr=0x%p, devfn=0x%x, where=0x%x, size=0x%x, val=0x%x\n",
-		 addr, devfn, where, size, *val);
-
-	return celleb_epci_check_abort(hose, NULL);
-}
-
-static int celleb_epci_write_config(struct pci_bus *bus,
-			unsigned int devfn, int where, int size, u32 val)
-{
-	PCI_IO_ADDR epci_base;
-	PCI_IO_ADDR addr;
-	struct pci_controller *hose = pci_bus_to_host(bus);
-
-	/* allignment check */
-	BUG_ON(where % size);
-
-	if (!celleb_epci_get_epci_cfg(hose))
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
-	if (bus->number == hose->first_busno && devfn == 0) {
-		/* EPCI controller self */
-
-		epci_base = celleb_epci_get_epci_base(hose);
-		addr = epci_base + where;
-
-		switch (size) {
-		case 1:
-			out_8(addr, val);
-			break;
-		case 2:
-			out_be16(addr, val);
-			break;
-		case 4:
-			out_be32(addr, val);
-			break;
-		default:
-			return PCIBIOS_DEVICE_NOT_FOUND;
-		}
-
-	} else {
-
-		clear_and_disable_master_abort_interrupt(hose);
-		addr = celleb_epci_make_config_addr(bus, hose, devfn, where);
-
-		switch (size) {
-		case 1:
-			out_8(addr, val);
-			break;
-		case 2:
-			out_le16(addr, val);
-			break;
-		case 4:
-			out_le32(addr, val);
-			break;
-		default:
-			return PCIBIOS_DEVICE_NOT_FOUND;
-		}
-	}
-
-	return celleb_epci_check_abort(hose, addr);
-}
-
-struct pci_ops celleb_epci_ops = {
-	.read = celleb_epci_read_config,
-	.write = celleb_epci_write_config,
-};
-
-/* to be moved in FW */
-static int __init celleb_epci_init(struct pci_controller *hose)
-{
-	u32 val;
-	PCI_IO_ADDR reg;
-	PCI_IO_ADDR epci_base;
-	int hwres = 0;
-
-	epci_base = celleb_epci_get_epci_base(hose);
-
-	/* PCI core reset(Internal bus and PCI clock) */
-	reg = epci_base + SCC_EPCI_CKCTRL;
-	val = in_be32(reg);
-	if (val == 0x00030101)
-		hwres = 1;
-	else {
-		val &= ~(SCC_EPCI_CKCTRL_CRST0 | SCC_EPCI_CKCTRL_CRST1);
-		out_be32(reg, val);
-
-		/* set PCI core clock */
-		val = in_be32(reg);
-		val |= (SCC_EPCI_CKCTRL_OCLKEN | SCC_EPCI_CKCTRL_LCLKEN);
-		out_be32(reg, val);
-
-		/* release PCI core reset (internal bus) */
-		val = in_be32(reg);
-		val |= SCC_EPCI_CKCTRL_CRST0;
-		out_be32(reg, val);
-
-		/* set PCI clock select */
-		reg = epci_base + SCC_EPCI_CLKRST;
-		val = in_be32(reg);
-		val &= ~SCC_EPCI_CLKRST_CKS_MASK;
-		val |= SCC_EPCI_CLKRST_CKS_2;
-		out_be32(reg, val);
-
-		/* set arbiter */
-		reg = epci_base + SCC_EPCI_ABTSET;
-		out_be32(reg, 0x0f1f001f);	/* temporary value */
-
-		/* buffer on */
-		reg = epci_base + SCC_EPCI_CLKRST;
-		val = in_be32(reg);
-		val |= SCC_EPCI_CLKRST_BC;
-		out_be32(reg, val);
-
-		/* PCI clock enable */
-		val = in_be32(reg);
-		val |= SCC_EPCI_CLKRST_PCKEN;
-		out_be32(reg, val);
-
-		/* release PCI core reset (all) */
-		reg = epci_base + SCC_EPCI_CKCTRL;
-		val = in_be32(reg);
-		val |= (SCC_EPCI_CKCTRL_CRST0 | SCC_EPCI_CKCTRL_CRST1);
-		out_be32(reg, val);
-
-		/* set base translation registers. (already set by Beat) */
-
-		/* set base address masks. (already set by Beat) */
-	}
-
-	/* release interrupt masks and clear all interrupts */
-	reg = epci_base + SCC_EPCI_INTSET;
-	out_be32(reg, 0x013f011f);	/* all interrupts enable */
-	reg = epci_base + SCC_EPCI_VIENAB;
-	val = SCC_EPCI_VIENAB_PMPEE | SCC_EPCI_VIENAB_PMFEE;
-	out_be32(reg, val);
-	reg = epci_base + SCC_EPCI_STATUS;
-	out_be32(reg, 0xffffffff);
-	reg = epci_base + SCC_EPCI_VISTAT;
-	out_be32(reg, 0xffffffff);
-
-	/* disable PCI->IB address translation */
-	reg = epci_base + SCC_EPCI_VCSR;
-	val = in_be32(reg);
-	val &= ~(SCC_EPCI_VCSR_DR | SCC_EPCI_VCSR_AT);
-	out_be32(reg, val);
-
-	/* set base addresses. (no need to set?) */
-
-	/* memory space, bus master enable */
-	reg = epci_base + PCI_COMMAND;
-	val = PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
-	out_be32(reg, val);
-
-	/* endian mode setup */
-	reg = epci_base + SCC_EPCI_ECMODE;
-	val = 0x00550155;
-	out_be32(reg, val);
-
-	/* set control option */
-	reg = epci_base + SCC_EPCI_CNTOPT;
-	val = in_be32(reg);
-	val |= SCC_EPCI_CNTOPT_O2PMB;
-	out_be32(reg, val);
-
-	/* XXX: temporay: set registers for address conversion setup */
-	reg = epci_base + SCC_EPCI_CNF10_REG;
-	out_be32(reg, 0x80000008);
-	reg = epci_base + SCC_EPCI_CNF14_REG;
-	out_be32(reg, 0x40000008);
-
-	reg = epci_base + SCC_EPCI_BAM0;
-	out_be32(reg, 0x80000000);
-	reg = epci_base + SCC_EPCI_BAM1;
-	out_be32(reg, 0xe0000000);
-
-	reg = epci_base + SCC_EPCI_PVBAT;
-	out_be32(reg, 0x80000000);
-
-	if (!hwres) {
-		/* release external PCI reset */
-		reg = epci_base + SCC_EPCI_CLKRST;
-		val = in_be32(reg);
-		val |= SCC_EPCI_CLKRST_PCIRST;
-		out_be32(reg, val);
-	}
-
-	return 0;
-}
-
-static int __init celleb_setup_epci(struct device_node *node,
-				    struct pci_controller *hose)
-{
-	struct resource r;
-
-	pr_debug("PCI: celleb_setup_epci()\n");
-
-	/*
-	 * Note:
-	 * Celleb epci uses cfg_addr and cfg_data member of
-	 * pci_controller structure in irregular way.
-	 *
-	 * cfg_addr is used to map for control registers of
-	 * celleb epci.
-	 *
-	 * cfg_data is used for configuration area of devices
-	 * on Celleb epci buses.
-	 */
-
-	if (of_address_to_resource(node, 0, &r))
-		goto error;
-	hose->cfg_addr = ioremap(r.start, resource_size(&r));
-	if (!hose->cfg_addr)
-		goto error;
-	pr_debug("EPCI: cfg_addr map 0x%016llx->0x%016lx + 0x%016llx\n",
-		 r.start, (unsigned long)hose->cfg_addr, resource_size(&r));
-
-	if (of_address_to_resource(node, 2, &r))
-		goto error;
-	hose->cfg_data = ioremap(r.start, resource_size(&r));
-	if (!hose->cfg_data)
-		goto error;
-	pr_debug("EPCI: cfg_data map 0x%016llx->0x%016lx + 0x%016llx\n",
-		 r.start, (unsigned long)hose->cfg_data, resource_size(&r));
-
-	hose->ops = &celleb_epci_ops;
-	celleb_epci_init(hose);
-
-	return 0;
-
-error:
-	if (hose->cfg_addr)
-		iounmap(hose->cfg_addr);
-
-	if (hose->cfg_data)
-		iounmap(hose->cfg_data);
-	return 1;
-}
-
-struct celleb_phb_spec celleb_epci_spec __initdata = {
-	.setup = celleb_setup_epci,
-	.ops = &spiderpci_ops,
-	.iowa_init = &spiderpci_iowa_init,
-	.iowa_data = (void *)0,
-};
diff --git a/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
deleted file mode 100644
index 94170e4f2ce7..000000000000
--- a/arch/powerpc/platforms/cell/celleb_scc_pciex.c
+++ /dev/null
@@ -1,538 +0,0 @@
-/*
- * Support for Celleb PCI-Express.
- *
- * (C) Copyright 2007-2008 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#undef DEBUG
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/iommu.h>
-#include <asm/byteorder.h>
-
-#include "celleb_scc.h"
-#include "celleb_pci.h"
-
-#define PEX_IN(base, off)	in_be32((void __iomem *)(base) + (off))
-#define PEX_OUT(base, off, data) out_be32((void __iomem *)(base) + (off), (data))
-
-static void scc_pciex_io_flush(struct iowa_bus *bus)
-{
-	(void)PEX_IN(bus->phb->cfg_addr, PEXDMRDEN0);
-}
-
-/*
- * Memory space access to device on PCIEX
- */
-#define PCIEX_MMIO_READ(name, ret)					\
-static ret scc_pciex_##name(const PCI_IO_ADDR addr)			\
-{									\
-	ret val = __do_##name(addr);					\
-	scc_pciex_io_flush(iowa_mem_find_bus(addr));			\
-	return val;							\
-}
-
-#define PCIEX_MMIO_READ_STR(name)					\
-static void scc_pciex_##name(const PCI_IO_ADDR addr, void *buf,		\
-			     unsigned long count)			\
-{									\
-	__do_##name(addr, buf, count);					\
-	scc_pciex_io_flush(iowa_mem_find_bus(addr));			\
-}
-
-PCIEX_MMIO_READ(readb, u8)
-PCIEX_MMIO_READ(readw, u16)
-PCIEX_MMIO_READ(readl, u32)
-PCIEX_MMIO_READ(readq, u64)
-PCIEX_MMIO_READ(readw_be, u16)
-PCIEX_MMIO_READ(readl_be, u32)
-PCIEX_MMIO_READ(readq_be, u64)
-PCIEX_MMIO_READ_STR(readsb)
-PCIEX_MMIO_READ_STR(readsw)
-PCIEX_MMIO_READ_STR(readsl)
-
-static void scc_pciex_memcpy_fromio(void *dest, const PCI_IO_ADDR src,
-				    unsigned long n)
-{
-	__do_memcpy_fromio(dest, src, n);
-	scc_pciex_io_flush(iowa_mem_find_bus(src));
-}
-
-/*
- * I/O port access to devices on PCIEX.
- */
-
-static inline unsigned long get_bus_address(struct pci_controller *phb,
-					    unsigned long port)
-{
-	return port - ((unsigned long)(phb->io_base_virt) - _IO_BASE);
-}
-
-static u32 scc_pciex_read_port(struct pci_controller *phb,
-			       unsigned long port, int size)
-{
-	unsigned int byte_enable;
-	unsigned int cmd, shift;
-	unsigned long addr;
-	u32 data, ret;
-
-	BUG_ON(((port & 0x3ul) + size) > 4);
-
-	addr = get_bus_address(phb, port);
-	shift = addr & 0x3ul;
-	byte_enable = ((1 << size) - 1) << shift;
-	cmd = PEXDCMND_IO_READ | (byte_enable << PEXDCMND_BYTE_EN_SHIFT);
-	PEX_OUT(phb->cfg_addr, PEXDADRS, (addr & ~0x3ul));
-	PEX_OUT(phb->cfg_addr, PEXDCMND, cmd);
-	data = PEX_IN(phb->cfg_addr, PEXDRDATA);
-	ret = (data >> (shift * 8)) & (0xFFFFFFFF >> ((4 - size) * 8));
-
-	pr_debug("PCIEX:PIO READ:port=0x%lx, addr=0x%lx, size=%d, be=%x,"
-		 " cmd=%x, data=%x, ret=%x\n", port, addr, size, byte_enable,
-		 cmd, data, ret);
-
-	return ret;
-}
-
-static void scc_pciex_write_port(struct pci_controller *phb,
-				 unsigned long port, int size, u32 val)
-{
-	unsigned int byte_enable;
-	unsigned int cmd, shift;
-	unsigned long addr;
-	u32 data;
-
-	BUG_ON(((port & 0x3ul) + size) > 4);
-
-	addr = get_bus_address(phb, port);
-	shift = addr & 0x3ul;
-	byte_enable = ((1 << size) - 1) << shift;
-	cmd = PEXDCMND_IO_WRITE | (byte_enable << PEXDCMND_BYTE_EN_SHIFT);
-	data = (val & (0xFFFFFFFF >> (4 - size) * 8)) << (shift * 8);
-	PEX_OUT(phb->cfg_addr, PEXDADRS, (addr & ~0x3ul));
-	PEX_OUT(phb->cfg_addr, PEXDCMND, cmd);
-	PEX_OUT(phb->cfg_addr, PEXDWDATA, data);
-
-	pr_debug("PCIEX:PIO WRITE:port=0x%lx, addr=%lx, size=%d, val=%x,"
-		 " be=%x, cmd=%x, data=%x\n", port, addr, size, val,
-		 byte_enable, cmd, data);
-}
-
-static u8 __scc_pciex_inb(struct pci_controller *phb, unsigned long port)
-{
-	return (u8)scc_pciex_read_port(phb, port, 1);
-}
-
-static u16 __scc_pciex_inw(struct pci_controller *phb, unsigned long port)
-{
-	u32 data;
-	if ((port & 0x3ul) < 3)
-		data = scc_pciex_read_port(phb, port, 2);
-	else {
-		u32 d1 = scc_pciex_read_port(phb, port, 1);
-		u32 d2 = scc_pciex_read_port(phb, port + 1, 1);
-		data = d1 | (d2 << 8);
-	}
-	return (u16)data;
-}
-
-static u32 __scc_pciex_inl(struct pci_controller *phb, unsigned long port)
-{
-	unsigned int mod = port & 0x3ul;
-	u32 data;
-	if (mod == 0)
-		data = scc_pciex_read_port(phb, port, 4);
-	else {
-		u32 d1 = scc_pciex_read_port(phb, port, 4 - mod);
-		u32 d2 = scc_pciex_read_port(phb, port + 1, mod);
-		data = d1 | (d2 << (mod * 8));
-	}
-	return data;
-}
-
-static void __scc_pciex_outb(struct pci_controller *phb,
-			     u8 val, unsigned long port)
-{
-	scc_pciex_write_port(phb, port, 1, (u32)val);
-}
-
-static void __scc_pciex_outw(struct pci_controller *phb,
-			     u16 val, unsigned long port)
-{
-	if ((port & 0x3ul) < 3)
-		scc_pciex_write_port(phb, port, 2, (u32)val);
-	else {
-		u32 d1 = val & 0x000000FF;
-		u32 d2 = (val & 0x0000FF00) >> 8;
-		scc_pciex_write_port(phb, port, 1, d1);
-		scc_pciex_write_port(phb, port + 1, 1, d2);
-	}
-}
-
-static void __scc_pciex_outl(struct pci_controller *phb,
-			     u32 val, unsigned long port)
-{
-	unsigned int mod = port & 0x3ul;
-	if (mod == 0)
-		scc_pciex_write_port(phb, port, 4, val);
-	else {
-		u32 d1 = val & (0xFFFFFFFFul >> (mod * 8));
-		u32 d2 = val >> ((4 - mod) * 8);
-		scc_pciex_write_port(phb, port, 4 - mod, d1);
-		scc_pciex_write_port(phb, port + 1, mod, d2);
-	}
-}
-
-#define PCIEX_PIO_FUNC(size, name)					\
-static u##size scc_pciex_in##name(unsigned long port)			\
-{									\
-	struct iowa_bus *bus = iowa_pio_find_bus(port);			\
-	u##size data = __scc_pciex_in##name(bus->phb, port);		\
-	scc_pciex_io_flush(bus);					\
-	return data;							\
-}									\
-static void scc_pciex_ins##name(unsigned long p, void *b, unsigned long c) \
-{									\
-	struct iowa_bus *bus = iowa_pio_find_bus(p);			\
-	__le##size *dst = b;						\
-	for (; c != 0; c--, dst++)					\
-		*dst = cpu_to_le##size(__scc_pciex_in##name(bus->phb, p)); \
-	scc_pciex_io_flush(bus);					\
-}									\
-static void scc_pciex_out##name(u##size val, unsigned long port)	\
-{									\
-	struct iowa_bus *bus = iowa_pio_find_bus(port);			\
-	__scc_pciex_out##name(bus->phb, val, port);			\
-}									\
-static void scc_pciex_outs##name(unsigned long p, const void *b,	\
-				 unsigned long c)			\
-{									\
-	struct iowa_bus *bus = iowa_pio_find_bus(p);			\
-	const __le##size *src = b;					\
-	for (; c != 0; c--, src++)					\
-		__scc_pciex_out##name(bus->phb, le##size##_to_cpu(*src), p); \
-}
-#define __le8 u8
-#define cpu_to_le8(x) (x)
-#define le8_to_cpu(x) (x)
-PCIEX_PIO_FUNC(8, b)
-PCIEX_PIO_FUNC(16, w)
-PCIEX_PIO_FUNC(32, l)
-
-static struct ppc_pci_io scc_pciex_ops = {
-	.readb = scc_pciex_readb,
-	.readw = scc_pciex_readw,
-	.readl = scc_pciex_readl,
-	.readq = scc_pciex_readq,
-	.readw_be = scc_pciex_readw_be,
-	.readl_be = scc_pciex_readl_be,
-	.readq_be = scc_pciex_readq_be,
-	.readsb = scc_pciex_readsb,
-	.readsw = scc_pciex_readsw,
-	.readsl = scc_pciex_readsl,
-	.memcpy_fromio = scc_pciex_memcpy_fromio,
-	.inb = scc_pciex_inb,
-	.inw = scc_pciex_inw,
-	.inl = scc_pciex_inl,
-	.outb = scc_pciex_outb,
-	.outw = scc_pciex_outw,
-	.outl = scc_pciex_outl,
-	.insb = scc_pciex_insb,
-	.insw = scc_pciex_insw,
-	.insl = scc_pciex_insl,
-	.outsb = scc_pciex_outsb,
-	.outsw = scc_pciex_outsw,
-	.outsl = scc_pciex_outsl,
-};
-
-static int __init scc_pciex_iowa_init(struct iowa_bus *bus, void *data)
-{
-	dma_addr_t dummy_page_da;
-	void *dummy_page_va;
-
-	dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!dummy_page_va) {
-		pr_err("PCIEX:Alloc dummy_page_va failed\n");
-		return -1;
-	}
-
-	dummy_page_da = dma_map_single(bus->phb->parent, dummy_page_va,
-				       PAGE_SIZE, DMA_FROM_DEVICE);
-	if (dma_mapping_error(bus->phb->parent, dummy_page_da)) {
-		pr_err("PCIEX:Map dummy page failed.\n");
-		kfree(dummy_page_va);
-		return -1;
-	}
-
-	PEX_OUT(bus->phb->cfg_addr, PEXDMRDADR0, dummy_page_da);
-
-	return 0;
-}
-
-/*
- * config space access
- */
-#define MK_PEXDADRS(bus_no, dev_no, func_no, addr) \
-	((uint32_t)(((addr) & ~0x3UL) | \
-	((bus_no) << PEXDADRS_BUSNO_SHIFT) | \
-	((dev_no)  << PEXDADRS_DEVNO_SHIFT) | \
-	((func_no) << PEXDADRS_FUNCNO_SHIFT)))
-
-#define MK_PEXDCMND_BYTE_EN(addr, size) \
-	((((0x1 << (size))-1) << ((addr) & 0x3)) << PEXDCMND_BYTE_EN_SHIFT)
-#define MK_PEXDCMND(cmd, addr, size) ((cmd) | MK_PEXDCMND_BYTE_EN(addr, size))
-
-static uint32_t config_read_pciex_dev(unsigned int __iomem *base,
-		uint64_t bus_no, uint64_t dev_no, uint64_t func_no,
-		uint64_t off, uint64_t size)
-{
-	uint32_t ret;
-	uint32_t addr, cmd;
-
-	addr = MK_PEXDADRS(bus_no, dev_no, func_no, off);
-	cmd = MK_PEXDCMND(PEXDCMND_CONFIG_READ, off, size);
-	PEX_OUT(base, PEXDADRS, addr);
-	PEX_OUT(base, PEXDCMND, cmd);
-	ret = (PEX_IN(base, PEXDRDATA)
-		>> ((off & (4-size)) * 8)) & ((0x1 << (size * 8)) - 1);
-	return ret;
-}
-
-static void config_write_pciex_dev(unsigned int __iomem *base, uint64_t bus_no,
-	uint64_t dev_no, uint64_t func_no, uint64_t off, uint64_t size,
-	uint32_t data)
-{
-	uint32_t addr, cmd;
-
-	addr = MK_PEXDADRS(bus_no, dev_no, func_no, off);
-	cmd = MK_PEXDCMND(PEXDCMND_CONFIG_WRITE, off, size);
-	PEX_OUT(base, PEXDADRS, addr);
-	PEX_OUT(base, PEXDCMND, cmd);
-	PEX_OUT(base, PEXDWDATA,
-		(data & ((0x1 << (size * 8)) - 1)) << ((off & (4-size)) * 8));
-}
-
-#define MK_PEXCADRS_BYTE_EN(off, len) \
-	((((0x1 << (len)) - 1) << ((off) & 0x3)) << PEXCADRS_BYTE_EN_SHIFT)
-#define MK_PEXCADRS(cmd, addr, size) \
-	((cmd) | MK_PEXCADRS_BYTE_EN(addr, size) | ((addr) & ~0x3))
-static uint32_t config_read_pciex_rc(unsigned int __iomem *base,
-				     uint32_t where, uint32_t size)
-{
-	PEX_OUT(base, PEXCADRS, MK_PEXCADRS(PEXCADRS_CMD_READ, where, size));
-	return (PEX_IN(base, PEXCRDATA)
-		>> ((where & (4 - size)) * 8)) & ((0x1 << (size * 8)) - 1);
-}
-
-static void config_write_pciex_rc(unsigned int __iomem *base, uint32_t where,
-				  uint32_t size, uint32_t val)
-{
-	uint32_t data;
-
-	data = (val & ((0x1 << (size * 8)) - 1)) << ((where & (4 - size)) * 8);
-	PEX_OUT(base, PEXCADRS, MK_PEXCADRS(PEXCADRS_CMD_WRITE, where, size));
-	PEX_OUT(base, PEXCWDATA, data);
-}
-
-/* Interfaces */
-/* Note: Work-around
- *  On SCC PCIEXC, one device is seen on all 32 dev_no.
- *  As SCC PCIEXC can have only one device on the bus, we look only one dev_no.
- * (dev_no = 1)
- */
-static int scc_pciex_read_config(struct pci_bus *bus, unsigned int devfn,
-				 int where, int size, unsigned int *val)
-{
-	struct pci_controller *phb = pci_bus_to_host(bus);
-
-	if (bus->number == phb->first_busno && PCI_SLOT(devfn) != 1) {
-		*val = ~0;
-		return PCIBIOS_DEVICE_NOT_FOUND;
-	}
-
-	if (bus->number == 0 && PCI_SLOT(devfn) == 0)
-		*val = config_read_pciex_rc(phb->cfg_addr, where, size);
-	else
-		*val = config_read_pciex_dev(phb->cfg_addr, bus->number,
-				PCI_SLOT(devfn), PCI_FUNC(devfn), where, size);
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static int scc_pciex_write_config(struct pci_bus *bus, unsigned int devfn,
-				  int where, int size, unsigned int val)
-{
-	struct pci_controller *phb = pci_bus_to_host(bus);
-
-	if (bus->number == phb->first_busno && PCI_SLOT(devfn) != 1)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
-	if (bus->number == 0 && PCI_SLOT(devfn) == 0)
-		config_write_pciex_rc(phb->cfg_addr, where, size, val);
-	else
-		config_write_pciex_dev(phb->cfg_addr, bus->number,
-			PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static struct pci_ops scc_pciex_pci_ops = {
-	.read = scc_pciex_read_config,
-	.write = scc_pciex_write_config,
-};
-
-static void pciex_clear_intr_all(unsigned int __iomem *base)
-{
-	PEX_OUT(base, PEXAERRSTS, 0xffffffff);
-	PEX_OUT(base, PEXPRERRSTS, 0xffffffff);
-	PEX_OUT(base, PEXINTSTS, 0xffffffff);
-}
-
-#if 0
-static void pciex_disable_intr_all(unsigned int *base)
-{
-	PEX_OUT(base, PEXINTMASK,   0x0);
-	PEX_OUT(base, PEXAERRMASK,  0x0);
-	PEX_OUT(base, PEXPRERRMASK, 0x0);
-	PEX_OUT(base, PEXVDMASK,    0x0);
-}
-#endif
-
-static void pciex_enable_intr_all(unsigned int __iomem *base)
-{
-	PEX_OUT(base, PEXINTMASK, 0x0000e7f1);
-	PEX_OUT(base, PEXAERRMASK, 0x03ff01ff);
-	PEX_OUT(base, PEXPRERRMASK, 0x0001010f);
-	PEX_OUT(base, PEXVDMASK, 0x00000001);
-}
-
-static void pciex_check_status(unsigned int __iomem *base)
-{
-	uint32_t err = 0;
-	uint32_t intsts, aerr, prerr, rcvcp, lenerr;
-	uint32_t maea, maec;
-
-	intsts = PEX_IN(base, PEXINTSTS);
-	aerr = PEX_IN(base, PEXAERRSTS);
-	prerr = PEX_IN(base, PEXPRERRSTS);
-	rcvcp = PEX_IN(base, PEXRCVCPLIDA);
-	lenerr = PEX_IN(base, PEXLENERRIDA);
-
-	if (intsts || aerr || prerr || rcvcp || lenerr)
-		err = 1;
-
-	pr_info("PCEXC interrupt!!\n");
-	pr_info("PEXINTSTS    :0x%08x\n", intsts);
-	pr_info("PEXAERRSTS   :0x%08x\n", aerr);
-	pr_info("PEXPRERRSTS  :0x%08x\n", prerr);
-	pr_info("PEXRCVCPLIDA :0x%08x\n", rcvcp);
-	pr_info("PEXLENERRIDA :0x%08x\n", lenerr);
-
-	/* print detail of Protection Error */
-	if (intsts & 0x00004000) {
-		uint32_t i, n;
-		for (i = 0; i < 4; i++) {
-			n = 1 << i;
-			if (prerr & n) {
-				maea = PEX_IN(base, PEXMAEA(i));
-				maec = PEX_IN(base, PEXMAEC(i));
-				pr_info("PEXMAEC%d     :0x%08x\n", i, maec);
-				pr_info("PEXMAEA%d     :0x%08x\n", i, maea);
-			}
-		}
-	}
-
-	if (err)
-		pciex_clear_intr_all(base);
-}
-
-static irqreturn_t pciex_handle_internal_irq(int irq, void *dev_id)
-{
-	struct pci_controller *phb = dev_id;
-
-	pr_debug("PCIEX:pciex_handle_internal_irq(irq=%d)\n", irq);
-
-	BUG_ON(phb->cfg_addr == NULL);
-
-	pciex_check_status(phb->cfg_addr);
-
-	return IRQ_HANDLED;
-}
-
-static __init int celleb_setup_pciex(struct device_node *node,
-				     struct pci_controller *phb)
-{
-	struct resource	r;
-	int virq;
-
-	/* SMMIO registers; used inside this file */
-	if (of_address_to_resource(node, 0, &r)) {
-		pr_err("PCIEXC:Failed to get config resource.\n");
-		return 1;
-	}
-	phb->cfg_addr = ioremap(r.start, resource_size(&r));
-	if (!phb->cfg_addr) {
-		pr_err("PCIEXC:Failed to remap SMMIO region.\n");
-		return 1;
-	}
-
-	/* Not use cfg_data,  cmd and data regs are near address reg */
-	phb->cfg_data = NULL;
-
-	/* set pci_ops */
-	phb->ops = &scc_pciex_pci_ops;
-
-	/* internal interrupt handler */
-	virq = irq_of_parse_and_map(node, 1);
-	if (!virq) {
-		pr_err("PCIEXC:Failed to map irq\n");
-		goto error;
-	}
-	if (request_irq(virq, pciex_handle_internal_irq,
-			0, "pciex", (void *)phb)) {
-		pr_err("PCIEXC:Failed to request irq\n");
-		goto error;
-	}
-
-	/* enable all interrupts */
-	pciex_clear_intr_all(phb->cfg_addr);
-	pciex_enable_intr_all(phb->cfg_addr);
-	/* MSI: TBD */
-
-	return 0;
-
-error:
-	phb->cfg_data = NULL;
-	if (phb->cfg_addr)
-		iounmap(phb->cfg_addr);
-	phb->cfg_addr = NULL;
-	return 1;
-}
-
-struct celleb_phb_spec celleb_pciex_spec __initdata = {
-	.setup = celleb_setup_pciex,
-	.ops = &scc_pciex_ops,
-	.iowa_init = &scc_pciex_iowa_init,
-};
diff --git a/arch/powerpc/platforms/cell/celleb_scc_sio.c b/arch/powerpc/platforms/cell/celleb_scc_sio.c
deleted file mode 100644
index c8eb57193826..000000000000
--- a/arch/powerpc/platforms/cell/celleb_scc_sio.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * setup serial port in SCC
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/tty.h>
-#include <linux/serial.h>
-#include <linux/serial_core.h>
-#include <linux/console.h>
-
-#include <asm/io.h>
-#include <asm/prom.h>
-
-/* sio irq0=0xb00010022 irq0=0xb00010023 irq2=0xb00010024
-    mmio=0xfff000-0x1000,0xff2000-0x1000 */
-static int txx9_serial_bitmap __initdata;
-
-static struct {
-	uint32_t offset;
-	uint32_t index;
-} txx9_scc_tab[3] __initdata = {
-	{ 0x300, 0 },	/* 0xFFF300 */
-	{ 0x400, 0 },	/* 0xFFF400 */
-	{ 0x800, 1 }	/* 0xFF2800 */
-};
-
-static int __init txx9_serial_init(void)
-{
-	extern int early_serial_txx9_setup(struct uart_port *port);
-	struct device_node *node;
-	int i;
-	struct uart_port req;
-	struct of_phandle_args irq;
-	struct resource res;
-
-	for_each_compatible_node(node, "serial", "toshiba,sio-scc") {
-		for (i = 0; i < ARRAY_SIZE(txx9_scc_tab); i++) {
-			if (!(txx9_serial_bitmap & (1<<i)))
-				continue;
-
-			if (of_irq_parse_one(node, i, &irq))
-				continue;
-			if (of_address_to_resource(node,
-				txx9_scc_tab[i].index, &res))
-				continue;
-
-			memset(&req, 0, sizeof(req));
-			req.line = i;
-			req.iotype = UPIO_MEM;
-			req.mapbase = res.start + txx9_scc_tab[i].offset;
-#ifdef CONFIG_SERIAL_TXX9_CONSOLE
-			req.membase = ioremap(req.mapbase, 0x24);
-#endif
-			req.irq = irq_create_of_mapping(&irq);
-			req.flags |= UPF_IOREMAP | UPF_BUGGY_UART
-				/*HAVE_CTS_LINE*/;
-			req.uartclk = 83300000;
-			early_serial_txx9_setup(&req);
-		}
-	}
-
-	return 0;
-}
-
-static int __init txx9_serial_config(char *ptr)
-{
-	int	i;
-
-	for (;;) {
-		switch (get_option(&ptr, &i)) {
-		default:
-			return 0;
-		case 2:
-			txx9_serial_bitmap |= 1 << i;
-			break;
-		case 1:
-			txx9_serial_bitmap |= 1 << i;
-			return 0;
-		}
-	}
-}
-__setup("txx9_serial=", txx9_serial_config);
-
-console_initcall(txx9_serial_init);
diff --git a/arch/powerpc/platforms/cell/celleb_scc_uhc.c b/arch/powerpc/platforms/cell/celleb_scc_uhc.c
deleted file mode 100644
index d63b720bfe3a..000000000000
--- a/arch/powerpc/platforms/cell/celleb_scc_uhc.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * SCC (Super Companion Chip) UHC setup
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-
-#include <asm/delay.h>
-#include <asm/io.h>
-#include <asm/machdep.h>
-
-#include "celleb_scc.h"
-
-#define UHC_RESET_WAIT_MAX 10000
-
-static inline int uhc_clkctrl_ready(u32 val)
-{
-	const u32 mask = SCC_UHC_USBCEN | SCC_UHC_USBCEN;
-	return((val & mask) == mask);
-}
-
-/*
- * UHC(usb host controller) enable function.
- * affect to both of OHCI and EHCI core module.
- */
-static void enable_scc_uhc(struct pci_dev *dev)
-{
-	void __iomem *uhc_base;
-	u32 __iomem *uhc_clkctrl;
-	u32 __iomem *uhc_ecmode;
-	u32 val = 0;
-	int i;
-
-	if (!machine_is(celleb_beat) &&
-	    !machine_is(celleb_native))
-		return;
-
-	uhc_base = ioremap(pci_resource_start(dev, 0),
-			   pci_resource_len(dev, 0));
-	if (!uhc_base) {
-		printk(KERN_ERR "failed to map UHC register base.\n");
-		return;
-	}
-	uhc_clkctrl = uhc_base + SCC_UHC_CKRCTRL;
-	uhc_ecmode  = uhc_base + SCC_UHC_ECMODE;
-
-	/* setup for normal mode */
-	val |= SCC_UHC_F48MCKLEN;
-	out_be32(uhc_clkctrl, val);
-	val |= SCC_UHC_PHY_SUSPEND_SEL;
-	out_be32(uhc_clkctrl, val);
-	udelay(10);
-	val |= SCC_UHC_PHYEN;
-	out_be32(uhc_clkctrl, val);
-	udelay(50);
-
-	/* disable reset */
-	val |= SCC_UHC_HCLKEN;
-	out_be32(uhc_clkctrl, val);
-	val |= (SCC_UHC_USBCEN | SCC_UHC_USBEN);
-	out_be32(uhc_clkctrl, val);
-	i = 0;
-	while (!uhc_clkctrl_ready(in_be32(uhc_clkctrl))) {
-		udelay(10);
-		if (i++ > UHC_RESET_WAIT_MAX) {
-			printk(KERN_ERR "Failed to disable UHC reset %x\n",
-			       in_be32(uhc_clkctrl));
-			break;
-		}
-	}
-
-	/* Endian Conversion Mode for Master ALL area */
-	out_be32(uhc_ecmode, SCC_UHC_ECMODE_BY_BYTE);
-
-	iounmap(uhc_base);
-}
-
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TOSHIBA_2,
-		 PCI_DEVICE_ID_TOSHIBA_SCC_USB, enable_scc_uhc);
diff --git a/arch/powerpc/platforms/cell/celleb_setup.c b/arch/powerpc/platforms/cell/celleb_setup.c
deleted file mode 100644
index 90be8ec51686..000000000000
--- a/arch/powerpc/platforms/cell/celleb_setup.c
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * Celleb setup code
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This code is based on arch/powerpc/platforms/cell/setup.c:
- *  Copyright (C) 1995  Linus Torvalds
- *  Adapted from 'alpha' version by Gary Thomas
- *  Modified by Cort Dougan (cort@cs.nmt.edu)
- *  Modified by PPC64 Team, IBM Corp
- *  Modified by Cell Team, IBM Deutschland Entwicklung GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#undef DEBUG
-
-#include <linux/cpu.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/mm.h>
-#include <linux/stddef.h>
-#include <linux/unistd.h>
-#include <linux/reboot.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/irq.h>
-#include <linux/seq_file.h>
-#include <linux/root_dev.h>
-#include <linux/console.h>
-#include <linux/of_platform.h>
-
-#include <asm/mmu.h>
-#include <asm/processor.h>
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/machdep.h>
-#include <asm/cputable.h>
-#include <asm/irq.h>
-#include <asm/time.h>
-#include <asm/spu_priv1.h>
-#include <asm/firmware.h>
-#include <asm/rtas.h>
-#include <asm/cell-regs.h>
-
-#include "beat_interrupt.h"
-#include "beat_wrapper.h"
-#include "beat.h"
-#include "celleb_pci.h"
-#include "interrupt.h"
-#include "pervasive.h"
-#include "ras.h"
-
-static char celleb_machine_type[128] = "Celleb";
-
-static void celleb_show_cpuinfo(struct seq_file *m)
-{
-	struct device_node *root;
-	const char *model = "";
-
-	root = of_find_node_by_path("/");
-	if (root)
-		model = of_get_property(root, "model", NULL);
-	/* using "CHRP" is to trick anaconda into installing FCx into Celleb */
-	seq_printf(m, "machine\t\t: %s %s\n", celleb_machine_type, model);
-	of_node_put(root);
-}
-
-static int __init celleb_machine_type_hack(char *ptr)
-{
-	strlcpy(celleb_machine_type, ptr, sizeof(celleb_machine_type));
-	return 0;
-}
-
-__setup("celleb_machine_type_hack=", celleb_machine_type_hack);
-
-static void celleb_progress(char *s, unsigned short hex)
-{
-	printk("*** %04x : %s\n", hex, s ? s : "");
-}
-
-static void __init celleb_setup_arch_common(void)
-{
-	/* init to some ~sane value until calibrate_delay() runs */
-	loops_per_jiffy = 50000000;
-
-#ifdef CONFIG_DUMMY_CONSOLE
-	conswitchp = &dummy_con;
-#endif
-}
-
-static const struct of_device_id celleb_bus_ids[] __initconst = {
-	{ .type = "scc", },
-	{ .type = "ioif", },	/* old style */
-	{},
-};
-
-static int __init celleb_publish_devices(void)
-{
-	/* Publish OF platform devices for southbridge IOs */
-	of_platform_bus_probe(NULL, celleb_bus_ids, NULL);
-
-	return 0;
-}
-machine_device_initcall(celleb_beat, celleb_publish_devices);
-machine_device_initcall(celleb_native, celleb_publish_devices);
-
-
-/*
- * functions for Celleb-Beat
- */
-static void __init celleb_setup_arch_beat(void)
-{
-#ifdef CONFIG_SPU_BASE
-	spu_priv1_ops		= &spu_priv1_beat_ops;
-	spu_management_ops	= &spu_management_of_ops;
-#endif
-
-	celleb_setup_arch_common();
-}
-
-static int __init celleb_probe_beat(void)
-{
-	unsigned long root = of_get_flat_dt_root();
-
-	if (!of_flat_dt_is_compatible(root, "Beat"))
-		return 0;
-
-	powerpc_firmware_features |= FW_FEATURE_CELLEB_ALWAYS
-		| FW_FEATURE_BEAT | FW_FEATURE_LPAR;
-	hpte_init_beat_v3();
-	pm_power_off = beat_power_off;
-
-	return 1;
-}
-
-
-/*
- * functions for Celleb-native
- */
-static void __init celleb_init_IRQ_native(void)
-{
-	iic_init_IRQ();
-	spider_init_IRQ();
-}
-
-static void __init celleb_setup_arch_native(void)
-{
-#ifdef CONFIG_SPU_BASE
-	spu_priv1_ops		= &spu_priv1_mmio_ops;
-	spu_management_ops	= &spu_management_of_ops;
-#endif
-
-	cbe_regs_init();
-
-#ifdef CONFIG_CBE_RAS
-	cbe_ras_init();
-#endif
-
-#ifdef CONFIG_SMP
-	smp_init_cell();
-#endif
-
-	cbe_pervasive_init();
-
-	/* XXX: nvram initialization should be added */
-
-	celleb_setup_arch_common();
-}
-
-static int __init celleb_probe_native(void)
-{
-	unsigned long root = of_get_flat_dt_root();
-
-	if (of_flat_dt_is_compatible(root, "Beat") ||
-	    !of_flat_dt_is_compatible(root, "TOSHIBA,Celleb"))
-		return 0;
-
-	powerpc_firmware_features |= FW_FEATURE_CELLEB_ALWAYS;
-	hpte_init_native();
-	pm_power_off = rtas_power_off;
-
-	return 1;
-}
-
-
-/*
- * machine definitions
- */
-define_machine(celleb_beat) {
-	.name			= "Cell Reference Set (Beat)",
-	.probe			= celleb_probe_beat,
-	.setup_arch		= celleb_setup_arch_beat,
-	.show_cpuinfo		= celleb_show_cpuinfo,
-	.restart		= beat_restart,
-	.halt			= beat_halt,
-	.get_rtc_time		= beat_get_rtc_time,
-	.set_rtc_time		= beat_set_rtc_time,
-	.calibrate_decr		= generic_calibrate_decr,
-	.progress		= celleb_progress,
-	.power_save		= beat_power_save,
-	.nvram_size		= beat_nvram_get_size,
-	.nvram_read		= beat_nvram_read,
-	.nvram_write		= beat_nvram_write,
-	.set_dabr		= beat_set_xdabr,
-	.init_IRQ		= beatic_init_IRQ,
-	.get_irq		= beatic_get_irq,
-	.pci_probe_mode 	= celleb_pci_probe_mode,
-	.pci_setup_phb		= celleb_setup_phb,
-#ifdef CONFIG_KEXEC
-	.kexec_cpu_down		= beat_kexec_cpu_down,
-#endif
-};
-
-define_machine(celleb_native) {
-	.name			= "Cell Reference Set (native)",
-	.probe			= celleb_probe_native,
-	.setup_arch		= celleb_setup_arch_native,
-	.show_cpuinfo		= celleb_show_cpuinfo,
-	.restart		= rtas_restart,
-	.halt			= rtas_halt,
-	.get_boot_time		= rtas_get_boot_time,
-	.get_rtc_time		= rtas_get_rtc_time,
-	.set_rtc_time		= rtas_set_rtc_time,
-	.calibrate_decr		= generic_calibrate_decr,
-	.progress		= celleb_progress,
-	.pci_probe_mode 	= celleb_pci_probe_mode,
-	.pci_setup_phb		= celleb_setup_phb,
-	.init_IRQ		= celleb_init_IRQ_native,
-};
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 4c11421847be..3af8324c122e 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -163,7 +163,7 @@ static unsigned int iic_get_irq(void)
 
 void iic_setup_cpu(void)
 {
-	out_be64(this_cpu_ptr(&cpu_iic.regs->prio), 0xff);
+	out_be64(&this_cpu_ptr(&cpu_iic)->regs->prio, 0xff);
 }
 
 u8 iic_get_target_id(int cpu)
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index c7c8720aa39f..21b502398bf3 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -39,6 +39,7 @@
 #include <asm/firmware.h>
 #include <asm/cell-regs.h>
 
+#include "cell.h"
 #include "interrupt.h"
 
 /* Define CELL_IOMMU_REAL_UNMAP to actually unmap non-used pages
@@ -197,7 +198,7 @@ static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
 
 	io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset);
 
-	for (i = 0; i < npages; i++, uaddr += tbl->it_page_shift)
+	for (i = 0; i < npages; i++, uaddr += (1 << tbl->it_page_shift))
 		io_pte[i] = base_pte | (__pa(uaddr) & CBE_IOPTE_RPN_Mask);
 
 	mb();
@@ -857,7 +858,7 @@ static int __init cell_iommu_init_disabled(void)
 	cell_dma_direct_offset += base;
 
 	if (cell_dma_direct_offset != 0)
-		ppc_md.pci_dma_dev_setup = cell_pci_dma_dev_setup;
+		cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup;
 
 	printk("iommu: disabled, direct DMA offset is 0x%lx\n",
 	       cell_dma_direct_offset);
@@ -1197,8 +1198,8 @@ static int __init cell_iommu_init(void)
 		if (cell_iommu_init_disabled() == 0)
 			goto bail;
 
-	/* Setup various ppc_md. callbacks */
-	ppc_md.pci_dma_dev_setup = cell_pci_dma_dev_setup;
+	/* Setup various callbacks */
+	cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup;
 	ppc_md.dma_get_required_mask = cell_dma_get_required_mask;
 	ppc_md.tce_build = tce_build_cell;
 	ppc_md.tce_free = tce_free_cell;
@@ -1234,5 +1235,3 @@ static int __init cell_iommu_init(void)
 	return 0;
 }
 machine_arch_initcall(cell, cell_iommu_init);
-machine_arch_initcall(celleb_native, cell_iommu_init);
-
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index d62aa982d530..36cff28d0293 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -54,6 +54,7 @@
 #include <asm/cell-regs.h>
 #include <asm/io-workarounds.h>
 
+#include "cell.h"
 #include "interrupt.h"
 #include "pervasive.h"
 #include "ras.h"
@@ -126,6 +127,8 @@ static int cell_setup_phb(struct pci_controller *phb)
 	if (rc)
 		return rc;
 
+	phb->controller_ops = cell_pci_controller_ops;
+
 	np = phb->dn;
 	model = of_get_property(np, "model", NULL);
 	if (model == NULL || strcmp(np->name, "pci"))
@@ -279,3 +282,5 @@ define_machine(cell) {
 	.init_IRQ       	= cell_init_irq,
 	.pci_setup_phb		= cell_setup_phb,
 };
+
+struct pci_controller_ops cell_pci_controller_ops;
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index b64e7ead752f..895560f4be69 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -102,13 +102,6 @@ static inline int smp_startup_cpu(unsigned int lcpu)
 	return 1;
 }
 
-static int __init smp_iic_probe(void)
-{
-	iic_request_IPIs();
-
-	return num_possible_cpus();
-}
-
 static void smp_cell_setup_cpu(int cpu)
 {
 	if (cpu != boot_cpuid)
@@ -139,7 +132,7 @@ static int smp_cell_kick_cpu(int nr)
 
 static struct smp_ops_t bpa_iic_smp_ops = {
 	.message_pass	= iic_message_pass,
-	.probe		= smp_iic_probe,
+	.probe		= iic_request_IPIs,
 	.kick_cpu	= smp_cell_kick_cpu,
 	.setup_cpu	= smp_cell_setup_cpu,
 	.cpu_bootable	= smp_generic_cpu_bootable,
diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c
index b0ec78e8ad68..a494028b2cdf 100644
--- a/arch/powerpc/platforms/cell/spu_callbacks.c
+++ b/arch/powerpc/platforms/cell/spu_callbacks.c
@@ -39,6 +39,7 @@ static void *spu_syscall_table[] = {
 #define PPC_SYS(func)		sys_ni_syscall,
 #define OLDSYS(func)		sys_ni_syscall,
 #define SYS32ONLY(func)		sys_ni_syscall,
+#define PPC64ONLY(func)		sys_ni_syscall,
 #define SYSX(f, f3264, f32)	sys_ni_syscall,
 
 #define SYSCALL_SPU(func)	sys_##func,
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index 860a59eb8ea2..15ebc4e8a151 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -253,7 +253,7 @@ static void briq_restart(char *cmd)
  * But unfortunately, the firmware does not connect /chosen/{stdin,stdout}
  * the the built-in serial node. Instead, a /failsafe node is created.
  */
-static void chrp_init_early(void)
+static __init void chrp_init_early(void)
 {
 	struct device_node *node;
 	const char *property;
diff --git a/arch/powerpc/platforms/maple/maple.h b/arch/powerpc/platforms/maple/maple.h
index c6911ddc479f..eecfa182b06e 100644
--- a/arch/powerpc/platforms/maple/maple.h
+++ b/arch/powerpc/platforms/maple/maple.h
@@ -10,3 +10,5 @@ extern void maple_calibrate_decr(void);
 extern void maple_pci_init(void);
 extern void maple_pci_irq_fixup(struct pci_dev *dev);
 extern int maple_pci_get_legacy_ide_irq(struct pci_dev *dev, int channel);
+
+extern struct pci_controller_ops maple_pci_controller_ops;
diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c
index d3a13067ec42..a923230e575b 100644
--- a/arch/powerpc/platforms/maple/pci.c
+++ b/arch/powerpc/platforms/maple/pci.c
@@ -510,6 +510,7 @@ static int __init maple_add_bridge(struct device_node *dev)
 		return -ENOMEM;
 	hose->first_busno = bus_range ? bus_range[0] : 0;
 	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+	hose->controller_ops = maple_pci_controller_ops;
 
 	disp_name = NULL;
 	if (of_device_is_compatible(dev, "u3-agp")) {
@@ -660,3 +661,6 @@ static void quirk_ipr_msi(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_OBSIDIAN,
 			quirk_ipr_msi);
+
+struct pci_controller_ops maple_pci_controller_ops = {
+};
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index 56b85cd61aaf..a837188544c8 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -203,7 +203,7 @@ static void __init maple_init_early(void)
 {
 	DBG(" -> maple_init_early\n");
 
-	iommu_init_early_dart();
+	iommu_init_early_dart(&maple_pci_controller_ops);
 
 	DBG(" <- maple_init_early\n");
 }
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index 2e576f2ae442..b8f567b2ea19 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -27,6 +27,8 @@
 #include <asm/machdep.h>
 #include <asm/firmware.h>
 
+#include "pasemi.h"
+
 #define IOBMAP_PAGE_SHIFT	12
 #define IOBMAP_PAGE_SIZE	(1 << IOBMAP_PAGE_SHIFT)
 #define IOBMAP_PAGE_MASK	(IOBMAP_PAGE_SIZE - 1)
@@ -248,8 +250,8 @@ void __init iommu_init_early_pasemi(void)
 
 	iob_init(NULL);
 
-	ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_pasemi;
-	ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_pasemi;
+	pasemi_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pasemi;
+	pasemi_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pasemi;
 	ppc_md.tce_build = iobmap_build;
 	ppc_md.tce_free  = iobmap_free;
 	set_pci_dma_ops(&dma_iommu_ops);
diff --git a/arch/powerpc/platforms/pasemi/pasemi.h b/arch/powerpc/platforms/pasemi/pasemi.h
index ea65bf0eb897..11f230a48227 100644
--- a/arch/powerpc/platforms/pasemi/pasemi.h
+++ b/arch/powerpc/platforms/pasemi/pasemi.h
@@ -30,5 +30,6 @@ static inline void restore_astate(int cpu)
 }
 #endif
 
+extern struct pci_controller_ops pasemi_pci_controller_ops;
 
 #endif /* _PASEMI_PASEMI_H */
diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c
index aa862713258c..f3a68a0fef23 100644
--- a/arch/powerpc/platforms/pasemi/pci.c
+++ b/arch/powerpc/platforms/pasemi/pci.c
@@ -31,6 +31,8 @@
 
 #include <asm/ppc-pci.h>
 
+#include "pasemi.h"
+
 #define PA_PXP_CFA(bus, devfn, off) (((bus) << 20) | ((devfn) << 12) | (off))
 
 static inline int pa_pxp_offset_valid(u8 bus, u8 devfn, int offset)
@@ -199,6 +201,7 @@ static int __init pas_add_bridge(struct device_node *dev)
 
 	hose->first_busno = 0;
 	hose->last_busno = 0xff;
+	hose->controller_ops = pasemi_pci_controller_ops;
 
 	setup_pa_pxp(hose);
 
@@ -239,3 +242,5 @@ void __iomem *pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset)
 
 	return (void __iomem *)pa_pxp_cfg_addr(hose, dev->bus->number, dev->devfn, offset);
 }
+
+struct pci_controller_ops pasemi_pci_controller_ops;
diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index 3e91ef538114..76f5013c35e5 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -246,7 +246,7 @@ static void __init bootx_scan_dt_build_strings(unsigned long base,
 		DBG(" detected display ! adding properties names !\n");
 		bootx_dt_add_string("linux,boot-display", mem_end);
 		bootx_dt_add_string("linux,opened", mem_end);
-		strncpy(bootx_disp_path, namep, 255);
+		strlcpy(bootx_disp_path, namep, sizeof(bootx_disp_path));
 	}
 
 	/* get and store all property names */
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index f4071a67ad00..59ab16fa600f 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -27,6 +27,8 @@
 #include <asm/grackle.h>
 #include <asm/ppc-pci.h>
 
+#include "pmac.h"
+
 #undef DEBUG
 
 #ifdef DEBUG
@@ -798,6 +800,7 @@ static int __init pmac_add_bridge(struct device_node *dev)
 		return -ENOMEM;
 	hose->first_busno = bus_range ? bus_range[0] : 0;
 	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+	hose->controller_ops = pmac_pci_controller_ops;
 
 	disp_name = NULL;
 
@@ -942,7 +945,7 @@ void __init pmac_pci_init(void)
 }
 
 #ifdef CONFIG_PPC32
-int pmac_pci_enable_device_hook(struct pci_dev *dev)
+static bool pmac_pci_enable_device_hook(struct pci_dev *dev)
 {
 	struct device_node* node;
 	int updatecfg = 0;
@@ -958,11 +961,11 @@ int pmac_pci_enable_device_hook(struct pci_dev *dev)
 	    && !node) {
 		printk(KERN_INFO "Apple USB OHCI %s disabled by firmware\n",
 		       pci_name(dev));
-		return -EINVAL;
+		return false;
 	}
 
 	if (!node)
-		return 0;
+		return true;
 
 	uninorth_child = node->parent &&
 		of_device_is_compatible(node->parent, "uni-north");
@@ -1003,7 +1006,7 @@ int pmac_pci_enable_device_hook(struct pci_dev *dev)
 				      L1_CACHE_BYTES >> 2);
 	}
 
-	return 0;
+	return true;
 }
 
 void pmac_pci_fixup_ohci(struct pci_dev *dev)
@@ -1223,3 +1226,30 @@ static void fixup_u4_pcie(struct pci_dev* dev)
 	pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0);
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_U4_PCIE, fixup_u4_pcie);
+
+#ifdef CONFIG_PPC64
+static int pmac_pci_probe_mode(struct pci_bus *bus)
+{
+	struct device_node *node = pci_bus_to_OF_node(bus);
+
+	/* We need to use normal PCI probing for the AGP bus,
+	 * since the device for the AGP bridge isn't in the tree.
+	 * Same for the PCIe host on U4 and the HT host bridge.
+	 */
+	if (bus->self == NULL && (of_device_is_compatible(node, "u3-agp") ||
+				  of_device_is_compatible(node, "u4-pcie") ||
+				  of_device_is_compatible(node, "u3-ht")))
+		return PCI_PROBE_NORMAL;
+	return PCI_PROBE_DEVTREE;
+}
+#endif /* CONFIG_PPC64 */
+
+struct pci_controller_ops pmac_pci_controller_ops = {
+#ifdef CONFIG_PPC64
+	.probe_mode		= pmac_pci_probe_mode,
+#endif
+#ifdef CONFIG_PPC32
+	.enable_device_hook	= pmac_pci_enable_device_hook,
+#endif
+};
+
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 4c24bf60d39d..59cfc9d63c2d 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -321,6 +321,9 @@ static void __init pmac_pic_probe_oldstyle(void)
 		max_irqs = max_real_irqs = 64;
 
 		/* We might have a second cascaded heathrow */
+
+		/* Compensate for of_node_put() in of_find_node_by_name() */
+		of_node_get(master);
 		slave = of_find_node_by_name(master, "mac-io");
 
 		/* Check ordering of master & slave */
diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h
index 8327cce2bdb0..e7f8163d6769 100644
--- a/arch/powerpc/platforms/powermac/pmac.h
+++ b/arch/powerpc/platforms/powermac/pmac.h
@@ -25,7 +25,6 @@ extern void pmac_pci_init(void);
 extern void pmac_nvram_update(void);
 extern unsigned char pmac_nvram_read_byte(int addr);
 extern void pmac_nvram_write_byte(int addr, unsigned char val);
-extern int pmac_pci_enable_device_hook(struct pci_dev *dev);
 extern void pmac_pcibios_after_init(void);
 extern int of_show_percpuinfo(struct seq_file *m, int i);
 
@@ -39,4 +38,6 @@ extern void low_cpu_die(void) __attribute__((noreturn));
 extern int pmac_nvram_init(void);
 extern void pmac_pic_init(void);
 
+extern struct pci_controller_ops pmac_pci_controller_ops;
+
 #endif /* __PMAC_H__ */
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 713d36d45d1d..8dd78f4e1af4 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -473,7 +473,7 @@ static void __init pmac_init_early(void)
 	udbg_adb_init(!!strstr(boot_command_line, "btextdbg"));
 
 #ifdef CONFIG_PPC64
-	iommu_init_early_dart();
+	iommu_init_early_dart(&pmac_pci_controller_ops);
 #endif
 
 	/* SMP Init has to be done early as we need to patch up
@@ -637,24 +637,6 @@ static int __init pmac_probe(void)
 	return 1;
 }
 
-#ifdef CONFIG_PPC64
-/* Move that to pci.c */
-static int pmac_pci_probe_mode(struct pci_bus *bus)
-{
-	struct device_node *node = pci_bus_to_OF_node(bus);
-
-	/* We need to use normal PCI probing for the AGP bus,
-	 * since the device for the AGP bridge isn't in the tree.
-	 * Same for the PCIe host on U4 and the HT host bridge.
-	 */
-	if (bus->self == NULL && (of_device_is_compatible(node, "u3-agp") ||
-				  of_device_is_compatible(node, "u4-pcie") ||
-				  of_device_is_compatible(node, "u3-ht")))
-		return PCI_PROBE_NORMAL;
-	return PCI_PROBE_DEVTREE;
-}
-#endif /* CONFIG_PPC64 */
-
 define_machine(powermac) {
 	.name			= "PowerMac",
 	.probe			= pmac_probe,
@@ -674,12 +656,10 @@ define_machine(powermac) {
 	.feature_call		= pmac_do_feature_call,
 	.progress		= udbg_progress,
 #ifdef CONFIG_PPC64
-	.pci_probe_mode		= pmac_pci_probe_mode,
 	.power_save		= power4_idle,
 	.enable_pmcs		= power4_enable_pmcs,
 #endif /* CONFIG_PPC64 */
 #ifdef CONFIG_PPC32
-	.pcibios_enable_device_hook = pmac_pci_enable_device_hook,
 	.pcibios_after_init	= pmac_pcibios_after_init,
 	.phys_mem_access_prot	= pci_phys_mem_access_prot,
 #endif
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index af094ae03dbb..28a147ca32ba 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -268,14 +268,14 @@ static void __init psurge_quad_init(void)
 	mdelay(33);
 }
 
-static int __init smp_psurge_probe(void)
+static void __init smp_psurge_probe(void)
 {
 	int i, ncpus;
 	struct device_node *dn;
 
 	/* We don't do SMP on the PPC601 -- paulus */
 	if (PVR_VER(mfspr(SPRN_PVR)) == 1)
-		return 1;
+		return;
 
 	/*
 	 * The powersurge cpu board can be used in the generation
@@ -289,7 +289,7 @@ static int __init smp_psurge_probe(void)
 	 */
 	dn = of_find_node_by_name(NULL, "hammerhead");
 	if (dn == NULL)
-		return 1;
+		return;
 	of_node_put(dn);
 
 	hhead_base = ioremap(HAMMERHEAD_BASE, 0x800);
@@ -310,13 +310,13 @@ static int __init smp_psurge_probe(void)
 			/* not a dual-cpu card */
 			iounmap(hhead_base);
 			psurge_type = PSURGE_NONE;
-			return 1;
+			return;
 		}
 		ncpus = 2;
 	}
 
 	if (psurge_secondary_ipi_init())
-		return 1;
+		return;
 
 	psurge_start = ioremap(PSURGE_START, 4);
 	psurge_pri_intr = ioremap(PSURGE_PRI_INTR, 4);
@@ -332,8 +332,6 @@ static int __init smp_psurge_probe(void)
 		set_cpu_present(i, true);
 
 	if (ppc_md.progress) ppc_md.progress("smp_psurge_probe - done", 0x352);
-
-	return ncpus;
 }
 
 static int __init smp_psurge_kick_cpu(int nr)
@@ -766,7 +764,7 @@ static void __init smp_core99_setup(int ncpus)
 		powersave_nap = 0;
 }
 
-static int __init smp_core99_probe(void)
+static void __init smp_core99_probe(void)
 {
 	struct device_node *cpus;
 	int ncpus = 0;
@@ -781,7 +779,7 @@ static int __init smp_core99_probe(void)
 
 	/* Nothing more to do if less than 2 of them */
 	if (ncpus <= 1)
-		return 1;
+		return;
 
 	/* We need to perform some early initialisations before we can start
 	 * setting up SMP as we are running before initcalls
@@ -797,8 +795,6 @@ static int __init smp_core99_probe(void)
 
 	/* Collect l2cr and l3cr values from CPU 0 */
 	core99_init_caches(0);
-
-	return ncpus;
 }
 
 static int smp_core99_kick_cpu(int nr)
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 45a8ed0585cd..4b044d8cb49a 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -19,10 +19,3 @@ config PPC_POWERNV
 	select CPU_FREQ_GOV_CONSERVATIVE
 	select PPC_DOORBELL
 	default y
-
-config PPC_POWERNV_RTAS
-	depends on PPC_POWERNV
-	bool "Support for RTAS based PowerNV platforms such as BML"
-	default y
-	select PPC_ICS_RTAS
-	select PPC_RTAS
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 6f3c5d33c3af..33e44f37212f 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -5,7 +5,7 @@ obj-y			+= opal-msglog.o opal-hmi.o opal-power.o
 
 obj-$(CONFIG_SMP)	+= smp.o subcore.o subcore-asm.o
 obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o
-obj-$(CONFIG_EEH)	+= eeh-ioda.o eeh-powernv.o
+obj-$(CONFIG_EEH)	+= eeh-powernv.o
 obj-$(CONFIG_PPC_SCOM)	+= opal-xscom.o
 obj-$(CONFIG_MEMORY_FAILURE)	+= opal-memory-errors.o
 obj-$(CONFIG_TRACEPOINTS)	+= opal-tracepoints.o
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
deleted file mode 100644
index 2809c9895288..000000000000
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ /dev/null
@@ -1,1149 +0,0 @@
-/*
- * The file intends to implement the functions needed by EEH, which is
- * built on IODA compliant chip. Actually, lots of functions related
- * to EEH would be built based on the OPAL APIs.
- *
- * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/debugfs.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-#include <linux/irq.h>
-#include <linux/kernel.h>
-#include <linux/msi.h>
-#include <linux/notifier.h>
-#include <linux/pci.h>
-#include <linux/string.h>
-
-#include <asm/eeh.h>
-#include <asm/eeh_event.h>
-#include <asm/io.h>
-#include <asm/iommu.h>
-#include <asm/msi_bitmap.h>
-#include <asm/opal.h>
-#include <asm/pci-bridge.h>
-#include <asm/ppc-pci.h>
-#include <asm/tce.h>
-
-#include "powernv.h"
-#include "pci.h"
-
-static int ioda_eeh_nb_init = 0;
-
-static int ioda_eeh_event(struct notifier_block *nb,
-			  unsigned long events, void *change)
-{
-	uint64_t changed_evts = (uint64_t)change;
-
-	/*
-	 * We simply send special EEH event if EEH has
-	 * been enabled, or clear pending events in
-	 * case that we enable EEH soon
-	 */
-	if (!(changed_evts & OPAL_EVENT_PCI_ERROR) ||
-	    !(events & OPAL_EVENT_PCI_ERROR))
-		return 0;
-
-	if (eeh_enabled())
-		eeh_send_failure_event(NULL);
-	else
-		opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
-
-	return 0;
-}
-
-static struct notifier_block ioda_eeh_nb = {
-	.notifier_call	= ioda_eeh_event,
-	.next		= NULL,
-	.priority	= 0
-};
-
-#ifdef CONFIG_DEBUG_FS
-static ssize_t ioda_eeh_ei_write(struct file *filp,
-				 const char __user *user_buf,
-				 size_t count, loff_t *ppos)
-{
-	struct pci_controller *hose = filp->private_data;
-	struct pnv_phb *phb = hose->private_data;
-	struct eeh_dev *edev;
-	struct eeh_pe *pe;
-	int pe_no, type, func;
-	unsigned long addr, mask;
-	char buf[50];
-	int ret;
-
-	if (!phb->eeh_ops || !phb->eeh_ops->err_inject)
-		return -ENXIO;
-
-	ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
-	if (!ret)
-		return -EFAULT;
-
-	/* Retrieve parameters */
-	ret = sscanf(buf, "%x:%x:%x:%lx:%lx",
-		     &pe_no, &type, &func, &addr, &mask);
-	if (ret != 5)
-		return -EINVAL;
-
-	/* Retrieve PE */
-	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
-	if (!edev)
-		return -ENOMEM;
-	edev->phb = hose;
-	edev->pe_config_addr = pe_no;
-	pe = eeh_pe_get(edev);
-	kfree(edev);
-	if (!pe)
-		return -ENODEV;
-
-	/* Do error injection */
-	ret = phb->eeh_ops->err_inject(pe, type, func, addr, mask);
-	return ret < 0 ? ret : count;
-}
-
-static const struct file_operations ioda_eeh_ei_fops = {
-	.open   = simple_open,
-	.llseek = no_llseek,
-	.write  = ioda_eeh_ei_write,
-};
-
-static int ioda_eeh_dbgfs_set(void *data, int offset, u64 val)
-{
-	struct pci_controller *hose = data;
-	struct pnv_phb *phb = hose->private_data;
-
-	out_be64(phb->regs + offset, val);
-	return 0;
-}
-
-static int ioda_eeh_dbgfs_get(void *data, int offset, u64 *val)
-{
-	struct pci_controller *hose = data;
-	struct pnv_phb *phb = hose->private_data;
-
-	*val = in_be64(phb->regs + offset);
-	return 0;
-}
-
-static int ioda_eeh_outb_dbgfs_set(void *data, u64 val)
-{
-	return ioda_eeh_dbgfs_set(data, 0xD10, val);
-}
-
-static int ioda_eeh_outb_dbgfs_get(void *data, u64 *val)
-{
-	return ioda_eeh_dbgfs_get(data, 0xD10, val);
-}
-
-static int ioda_eeh_inbA_dbgfs_set(void *data, u64 val)
-{
-	return ioda_eeh_dbgfs_set(data, 0xD90, val);
-}
-
-static int ioda_eeh_inbA_dbgfs_get(void *data, u64 *val)
-{
-	return ioda_eeh_dbgfs_get(data, 0xD90, val);
-}
-
-static int ioda_eeh_inbB_dbgfs_set(void *data, u64 val)
-{
-	return ioda_eeh_dbgfs_set(data, 0xE10, val);
-}
-
-static int ioda_eeh_inbB_dbgfs_get(void *data, u64 *val)
-{
-	return ioda_eeh_dbgfs_get(data, 0xE10, val);
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_outb_dbgfs_ops, ioda_eeh_outb_dbgfs_get,
-			ioda_eeh_outb_dbgfs_set, "0x%llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbA_dbgfs_ops, ioda_eeh_inbA_dbgfs_get,
-			ioda_eeh_inbA_dbgfs_set, "0x%llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbB_dbgfs_ops, ioda_eeh_inbB_dbgfs_get,
-			ioda_eeh_inbB_dbgfs_set, "0x%llx\n");
-#endif /* CONFIG_DEBUG_FS */
-
-
-/**
- * ioda_eeh_post_init - Chip dependent post initialization
- * @hose: PCI controller
- *
- * The function will be called after eeh PEs and devices
- * have been built. That means the EEH is ready to supply
- * service with I/O cache.
- */
-static int ioda_eeh_post_init(struct pci_controller *hose)
-{
-	struct pnv_phb *phb = hose->private_data;
-	int ret;
-
-	/* Register OPAL event notifier */
-	if (!ioda_eeh_nb_init) {
-		ret = opal_notifier_register(&ioda_eeh_nb);
-		if (ret) {
-			pr_err("%s: Can't register OPAL event notifier (%d)\n",
-			       __func__, ret);
-			return ret;
-		}
-
-		ioda_eeh_nb_init = 1;
-	}
-
-#ifdef CONFIG_DEBUG_FS
-	if (!phb->has_dbgfs && phb->dbgfs) {
-		phb->has_dbgfs = 1;
-
-		debugfs_create_file("err_injct", 0200,
-				    phb->dbgfs, hose,
-				    &ioda_eeh_ei_fops);
-
-		debugfs_create_file("err_injct_outbound", 0600,
-				    phb->dbgfs, hose,
-				    &ioda_eeh_outb_dbgfs_ops);
-		debugfs_create_file("err_injct_inboundA", 0600,
-				    phb->dbgfs, hose,
-				    &ioda_eeh_inbA_dbgfs_ops);
-		debugfs_create_file("err_injct_inboundB", 0600,
-				    phb->dbgfs, hose,
-				    &ioda_eeh_inbB_dbgfs_ops);
-	}
-#endif
-
-	/* If EEH is enabled, we're going to rely on that.
-	 * Otherwise, we restore to conventional mechanism
-	 * to clear frozen PE during PCI config access.
-	 */
-	if (eeh_enabled())
-		phb->flags |= PNV_PHB_FLAG_EEH;
-	else
-		phb->flags &= ~PNV_PHB_FLAG_EEH;
-
-	return 0;
-}
-
-/**
- * ioda_eeh_set_option - Set EEH operation or I/O setting
- * @pe: EEH PE
- * @option: options
- *
- * Enable or disable EEH option for the indicated PE. The
- * function also can be used to enable I/O or DMA for the
- * PE.
- */
-static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
-{
-	struct pci_controller *hose = pe->phb;
-	struct pnv_phb *phb = hose->private_data;
-	bool freeze_pe = false;
-	int enable, ret = 0;
-	s64 rc;
-
-	/* Check on PE number */
-	if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) {
-		pr_err("%s: PE address %x out of range [0, %x] "
-		       "on PHB#%x\n",
-			__func__, pe->addr, phb->ioda.total_pe,
-			hose->global_number);
-		return -EINVAL;
-	}
-
-	switch (option) {
-	case EEH_OPT_DISABLE:
-		return -EPERM;
-	case EEH_OPT_ENABLE:
-		return 0;
-	case EEH_OPT_THAW_MMIO:
-		enable = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO;
-		break;
-	case EEH_OPT_THAW_DMA:
-		enable = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA;
-		break;
-	case EEH_OPT_FREEZE_PE:
-		freeze_pe = true;
-		enable = OPAL_EEH_ACTION_SET_FREEZE_ALL;
-		break;
-	default:
-		pr_warn("%s: Invalid option %d\n",
-			__func__, option);
-		return -EINVAL;
-	}
-
-	/* If PHB supports compound PE, to handle it */
-	if (freeze_pe) {
-		if (phb->freeze_pe) {
-			phb->freeze_pe(phb, pe->addr);
-		} else {
-			rc = opal_pci_eeh_freeze_set(phb->opal_id,
-						     pe->addr,
-						     enable);
-			if (rc != OPAL_SUCCESS) {
-				pr_warn("%s: Failure %lld freezing "
-					"PHB#%x-PE#%x\n",
-					__func__, rc,
-					phb->hose->global_number, pe->addr);
-				ret = -EIO;
-			}
-		}
-	} else {
-		if (phb->unfreeze_pe) {
-			ret = phb->unfreeze_pe(phb, pe->addr, enable);
-		} else {
-			rc = opal_pci_eeh_freeze_clear(phb->opal_id,
-						       pe->addr,
-						       enable);
-			if (rc != OPAL_SUCCESS) {
-				pr_warn("%s: Failure %lld enable %d "
-					"for PHB#%x-PE#%x\n",
-					__func__, rc, option,
-					phb->hose->global_number, pe->addr);
-				ret = -EIO;
-			}
-		}
-	}
-
-	return ret;
-}
-
-static void ioda_eeh_phb_diag(struct eeh_pe *pe)
-{
-	struct pnv_phb *phb = pe->phb->private_data;
-	long rc;
-
-	rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data,
-					 PNV_PCI_DIAG_BUF_SIZE);
-	if (rc != OPAL_SUCCESS)
-		pr_warn("%s: Failed to get diag-data for PHB#%x (%ld)\n",
-			__func__, pe->phb->global_number, rc);
-}
-
-static int ioda_eeh_get_phb_state(struct eeh_pe *pe)
-{
-	struct pnv_phb *phb = pe->phb->private_data;
-	u8 fstate;
-	__be16 pcierr;
-	s64 rc;
-	int result = 0;
-
-	rc = opal_pci_eeh_freeze_status(phb->opal_id,
-					pe->addr,
-					&fstate,
-					&pcierr,
-					NULL);
-	if (rc != OPAL_SUCCESS) {
-		pr_warn("%s: Failure %lld getting PHB#%x state\n",
-			__func__, rc, phb->hose->global_number);
-		return EEH_STATE_NOT_SUPPORT;
-	}
-
-	/*
-	 * Check PHB state. If the PHB is frozen for the
-	 * first time, to dump the PHB diag-data.
-	 */
-	if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
-		result = (EEH_STATE_MMIO_ACTIVE  |
-			  EEH_STATE_DMA_ACTIVE   |
-			  EEH_STATE_MMIO_ENABLED |
-			  EEH_STATE_DMA_ENABLED);
-	} else if (!(pe->state & EEH_PE_ISOLATED)) {
-		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
-		ioda_eeh_phb_diag(pe);
-
-		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
-			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
-	}
-
-	return result;
-}
-
-static int ioda_eeh_get_pe_state(struct eeh_pe *pe)
-{
-	struct pnv_phb *phb = pe->phb->private_data;
-	u8 fstate;
-	__be16 pcierr;
-	s64 rc;
-	int result;
-
-	/*
-	 * We don't clobber hardware frozen state until PE
-	 * reset is completed. In order to keep EEH core
-	 * moving forward, we have to return operational
-	 * state during PE reset.
-	 */
-	if (pe->state & EEH_PE_RESET) {
-		result = (EEH_STATE_MMIO_ACTIVE  |
-			  EEH_STATE_DMA_ACTIVE   |
-			  EEH_STATE_MMIO_ENABLED |
-			  EEH_STATE_DMA_ENABLED);
-		return result;
-	}
-
-	/*
-	 * Fetch PE state from hardware. If the PHB
-	 * supports compound PE, let it handle that.
-	 */
-	if (phb->get_pe_state) {
-		fstate = phb->get_pe_state(phb, pe->addr);
-	} else {
-		rc = opal_pci_eeh_freeze_status(phb->opal_id,
-						pe->addr,
-						&fstate,
-						&pcierr,
-						NULL);
-		if (rc != OPAL_SUCCESS) {
-			pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",
-				__func__, rc, phb->hose->global_number, pe->addr);
-			return EEH_STATE_NOT_SUPPORT;
-		}
-	}
-
-	/* Figure out state */
-	switch (fstate) {
-	case OPAL_EEH_STOPPED_NOT_FROZEN:
-		result = (EEH_STATE_MMIO_ACTIVE  |
-			  EEH_STATE_DMA_ACTIVE   |
-			  EEH_STATE_MMIO_ENABLED |
-			  EEH_STATE_DMA_ENABLED);
-		break;
-	case OPAL_EEH_STOPPED_MMIO_FREEZE:
-		result = (EEH_STATE_DMA_ACTIVE |
-			  EEH_STATE_DMA_ENABLED);
-		break;
-	case OPAL_EEH_STOPPED_DMA_FREEZE:
-		result = (EEH_STATE_MMIO_ACTIVE |
-			  EEH_STATE_MMIO_ENABLED);
-		break;
-	case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
-		result = 0;
-		break;
-	case OPAL_EEH_STOPPED_RESET:
-		result = EEH_STATE_RESET_ACTIVE;
-		break;
-	case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
-		result = EEH_STATE_UNAVAILABLE;
-		break;
-	case OPAL_EEH_STOPPED_PERM_UNAVAIL:
-		result = EEH_STATE_NOT_SUPPORT;
-		break;
-	default:
-		result = EEH_STATE_NOT_SUPPORT;
-		pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n",
-			__func__, phb->hose->global_number,
-			pe->addr, fstate);
-	}
-
-	/*
-	 * If PHB supports compound PE, to freeze all
-	 * slave PEs for consistency.
-	 *
-	 * If the PE is switching to frozen state for the
-	 * first time, to dump the PHB diag-data.
-	 */
-	if (!(result & EEH_STATE_NOT_SUPPORT) &&
-	    !(result & EEH_STATE_UNAVAILABLE) &&
-	    !(result & EEH_STATE_MMIO_ACTIVE) &&
-	    !(result & EEH_STATE_DMA_ACTIVE)  &&
-	    !(pe->state & EEH_PE_ISOLATED)) {
-		if (phb->freeze_pe)
-			phb->freeze_pe(phb, pe->addr);
-
-		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
-		ioda_eeh_phb_diag(pe);
-
-		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
-			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
-	}
-
-	return result;
-}
-
-/**
- * ioda_eeh_get_state - Retrieve the state of PE
- * @pe: EEH PE
- *
- * The PE's state should be retrieved from the PEEV, PEST
- * IODA tables. Since the OPAL has exported the function
- * to do it, it'd better to use that.
- */
-static int ioda_eeh_get_state(struct eeh_pe *pe)
-{
-	struct pnv_phb *phb = pe->phb->private_data;
-
-	/* Sanity check on PE number. PHB PE should have 0 */
-	if (pe->addr < 0 ||
-	    pe->addr >= phb->ioda.total_pe) {
-		pr_warn("%s: PHB#%x-PE#%x out of range [0, %x]\n",
-			__func__, phb->hose->global_number,
-			pe->addr, phb->ioda.total_pe);
-		return EEH_STATE_NOT_SUPPORT;
-	}
-
-	if (pe->type & EEH_PE_PHB)
-		return ioda_eeh_get_phb_state(pe);
-
-	return ioda_eeh_get_pe_state(pe);
-}
-
-static s64 ioda_eeh_phb_poll(struct pnv_phb *phb)
-{
-	s64 rc = OPAL_HARDWARE;
-
-	while (1) {
-		rc = opal_pci_poll(phb->opal_id);
-		if (rc <= 0)
-			break;
-
-		if (system_state < SYSTEM_RUNNING)
-			udelay(1000 * rc);
-		else
-			msleep(rc);
-	}
-
-	return rc;
-}
-
-int ioda_eeh_phb_reset(struct pci_controller *hose, int option)
-{
-	struct pnv_phb *phb = hose->private_data;
-	s64 rc = OPAL_HARDWARE;
-
-	pr_debug("%s: Reset PHB#%x, option=%d\n",
-		 __func__, hose->global_number, option);
-
-	/* Issue PHB complete reset request */
-	if (option == EEH_RESET_FUNDAMENTAL ||
-	    option == EEH_RESET_HOT)
-		rc = opal_pci_reset(phb->opal_id,
-				OPAL_RESET_PHB_COMPLETE,
-				OPAL_ASSERT_RESET);
-	else if (option == EEH_RESET_DEACTIVATE)
-		rc = opal_pci_reset(phb->opal_id,
-				OPAL_RESET_PHB_COMPLETE,
-				OPAL_DEASSERT_RESET);
-	if (rc < 0)
-		goto out;
-
-	/*
-	 * Poll state of the PHB until the request is done
-	 * successfully. The PHB reset is usually PHB complete
-	 * reset followed by hot reset on root bus. So we also
-	 * need the PCI bus settlement delay.
-	 */
-	rc = ioda_eeh_phb_poll(phb);
-	if (option == EEH_RESET_DEACTIVATE) {
-		if (system_state < SYSTEM_RUNNING)
-			udelay(1000 * EEH_PE_RST_SETTLE_TIME);
-		else
-			msleep(EEH_PE_RST_SETTLE_TIME);
-	}
-out:
-	if (rc != OPAL_SUCCESS)
-		return -EIO;
-
-	return 0;
-}
-
-static int ioda_eeh_root_reset(struct pci_controller *hose, int option)
-{
-	struct pnv_phb *phb = hose->private_data;
-	s64 rc = OPAL_SUCCESS;
-
-	pr_debug("%s: Reset PHB#%x, option=%d\n",
-		 __func__, hose->global_number, option);
-
-	/*
-	 * During the reset deassert time, we needn't care
-	 * the reset scope because the firmware does nothing
-	 * for fundamental or hot reset during deassert phase.
-	 */
-	if (option == EEH_RESET_FUNDAMENTAL)
-		rc = opal_pci_reset(phb->opal_id,
-				OPAL_RESET_PCI_FUNDAMENTAL,
-				OPAL_ASSERT_RESET);
-	else if (option == EEH_RESET_HOT)
-		rc = opal_pci_reset(phb->opal_id,
-				OPAL_RESET_PCI_HOT,
-				OPAL_ASSERT_RESET);
-	else if (option == EEH_RESET_DEACTIVATE)
-		rc = opal_pci_reset(phb->opal_id,
-				OPAL_RESET_PCI_HOT,
-				OPAL_DEASSERT_RESET);
-	if (rc < 0)
-		goto out;
-
-	/* Poll state of the PHB until the request is done */
-	rc = ioda_eeh_phb_poll(phb);
-	if (option == EEH_RESET_DEACTIVATE)
-		msleep(EEH_PE_RST_SETTLE_TIME);
-out:
-	if (rc != OPAL_SUCCESS)
-		return -EIO;
-
-	return 0;
-}
-
-static int ioda_eeh_bridge_reset(struct pci_dev *dev, int option)
-
-{
-	struct device_node *dn = pci_device_to_OF_node(dev);
-	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
-	int aer = edev ? edev->aer_cap : 0;
-	u32 ctrl;
-
-	pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n",
-		 __func__, pci_domain_nr(dev->bus),
-		 dev->bus->number, option);
-
-	switch (option) {
-	case EEH_RESET_FUNDAMENTAL:
-	case EEH_RESET_HOT:
-		/* Don't report linkDown event */
-		if (aer) {
-			eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK,
-					     4, &ctrl);
-			ctrl |= PCI_ERR_UNC_SURPDN;
-                        eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK,
-					      4, ctrl);
-                }
-
-		eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl);
-		ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
-		eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl);
-		msleep(EEH_PE_RST_HOLD_TIME);
-
-		break;
-	case EEH_RESET_DEACTIVATE:
-		eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl);
-		ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
-		eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl);
-		msleep(EEH_PE_RST_SETTLE_TIME);
-
-		/* Continue reporting linkDown event */
-		if (aer) {
-			eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK,
-					     4, &ctrl);
-			ctrl &= ~PCI_ERR_UNC_SURPDN;
-			eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK,
-					      4, ctrl);
-		}
-
-		break;
-	}
-
-	return 0;
-}
-
-void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
-{
-	struct pci_controller *hose;
-
-	if (pci_is_root_bus(dev->bus)) {
-		hose = pci_bus_to_host(dev->bus);
-		ioda_eeh_root_reset(hose, EEH_RESET_HOT);
-		ioda_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
-	} else {
-		ioda_eeh_bridge_reset(dev, EEH_RESET_HOT);
-		ioda_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
-	}
-}
-
-/**
- * ioda_eeh_reset - Reset the indicated PE
- * @pe: EEH PE
- * @option: reset option
- *
- * Do reset on the indicated PE. For PCI bus sensitive PE,
- * we need to reset the parent p2p bridge. The PHB has to
- * be reinitialized if the p2p bridge is root bridge. For
- * PCI device sensitive PE, we will try to reset the device
- * through FLR. For now, we don't have OPAL APIs to do HARD
- * reset yet, so all reset would be SOFT (HOT) reset.
- */
-static int ioda_eeh_reset(struct eeh_pe *pe, int option)
-{
-	struct pci_controller *hose = pe->phb;
-	struct pci_bus *bus;
-	int ret;
-
-	/*
-	 * For PHB reset, we always have complete reset. For those PEs whose
-	 * primary bus derived from root complex (root bus) or root port
-	 * (usually bus#1), we apply hot or fundamental reset on the root port.
-	 * For other PEs, we always have hot reset on the PE primary bus.
-	 *
-	 * Here, we have different design to pHyp, which always clear the
-	 * frozen state during PE reset. However, the good idea here from
-	 * benh is to keep frozen state before we get PE reset done completely
-	 * (until BAR restore). With the frozen state, HW drops illegal IO
-	 * or MMIO access, which can incur recrusive frozen PE during PE
-	 * reset. The side effect is that EEH core has to clear the frozen
-	 * state explicitly after BAR restore.
-	 */
-	if (pe->type & EEH_PE_PHB) {
-		ret = ioda_eeh_phb_reset(hose, option);
-	} else {
-		struct pnv_phb *phb;
-		s64 rc;
-
-		/*
-		 * The frozen PE might be caused by PAPR error injection
-		 * registers, which are expected to be cleared after hitting
-		 * frozen PE as stated in the hardware spec. Unfortunately,
-		 * that's not true on P7IOC. So we have to clear it manually
-		 * to avoid recursive EEH errors during recovery.
-		 */
-		phb = hose->private_data;
-		if (phb->model == PNV_PHB_MODEL_P7IOC &&
-		    (option == EEH_RESET_HOT ||
-		    option == EEH_RESET_FUNDAMENTAL)) {
-			rc = opal_pci_reset(phb->opal_id,
-					    OPAL_RESET_PHB_ERROR,
-					    OPAL_ASSERT_RESET);
-			if (rc != OPAL_SUCCESS) {
-				pr_warn("%s: Failure %lld clearing "
-					"error injection registers\n",
-					__func__, rc);
-				return -EIO;
-			}
-		}
-
-		bus = eeh_pe_bus_get(pe);
-		if (pci_is_root_bus(bus) ||
-		    pci_is_root_bus(bus->parent))
-			ret = ioda_eeh_root_reset(hose, option);
-		else
-			ret = ioda_eeh_bridge_reset(bus->self, option);
-	}
-
-	return ret;
-}
-
-/**
- * ioda_eeh_get_log - Retrieve error log
- * @pe: frozen PE
- * @severity: permanent or temporary error
- * @drv_log: device driver log
- * @len: length of device driver log
- *
- * Retrieve error log, which contains log from device driver
- * and firmware.
- */
-static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
-			    char *drv_log, unsigned long len)
-{
-	if (!eeh_has_flag(EEH_EARLY_DUMP_LOG))
-		pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
-
-	return 0;
-}
-
-/**
- * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE
- * @pe: EEH PE
- *
- * For particular PE, it might have included PCI bridges. In order
- * to make the PE work properly, those PCI bridges should be configured
- * correctly. However, we need do nothing on P7IOC since the reset
- * function will do everything that should be covered by the function.
- */
-static int ioda_eeh_configure_bridge(struct eeh_pe *pe)
-{
-	return 0;
-}
-
-static int ioda_eeh_err_inject(struct eeh_pe *pe, int type, int func,
-			       unsigned long addr, unsigned long mask)
-{
-	struct pci_controller *hose = pe->phb;
-	struct pnv_phb *phb = hose->private_data;
-	s64 ret;
-
-	/* Sanity check on error type */
-	if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&
-	    type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) {
-		pr_warn("%s: Invalid error type %d\n",
-			__func__, type);
-		return -ERANGE;
-	}
-
-	if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR ||
-	    func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) {
-		pr_warn("%s: Invalid error function %d\n",
-			__func__, func);
-		return -ERANGE;
-	}
-
-	/* Firmware supports error injection ? */
-	if (!opal_check_token(OPAL_PCI_ERR_INJECT)) {
-		pr_warn("%s: Firmware doesn't support error injection\n",
-			__func__);
-		return -ENXIO;
-	}
-
-	/* Do error injection */
-	ret = opal_pci_err_inject(phb->opal_id, pe->addr,
-				  type, func, addr, mask);
-	if (ret != OPAL_SUCCESS) {
-		pr_warn("%s: Failure %lld injecting error "
-			"%d-%d to PHB#%x-PE#%x\n",
-			__func__, ret, type, func,
-			hose->global_number, pe->addr);
-		return -EIO;
-	}
-
-	return 0;
-}
-
-static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data)
-{
-	/* GEM */
-	if (data->gemXfir || data->gemRfir ||
-	    data->gemRirqfir || data->gemMask || data->gemRwof)
-		pr_info("  GEM: %016llx %016llx %016llx %016llx %016llx\n",
-			be64_to_cpu(data->gemXfir),
-			be64_to_cpu(data->gemRfir),
-			be64_to_cpu(data->gemRirqfir),
-			be64_to_cpu(data->gemMask),
-			be64_to_cpu(data->gemRwof));
-
-	/* LEM */
-	if (data->lemFir || data->lemErrMask ||
-	    data->lemAction0 || data->lemAction1 || data->lemWof)
-		pr_info("  LEM: %016llx %016llx %016llx %016llx %016llx\n",
-			be64_to_cpu(data->lemFir),
-			be64_to_cpu(data->lemErrMask),
-			be64_to_cpu(data->lemAction0),
-			be64_to_cpu(data->lemAction1),
-			be64_to_cpu(data->lemWof));
-}
-
-static void ioda_eeh_hub_diag(struct pci_controller *hose)
-{
-	struct pnv_phb *phb = hose->private_data;
-	struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag;
-	long rc;
-
-	rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));
-	if (rc != OPAL_SUCCESS) {
-		pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n",
-			__func__, phb->hub_id, rc);
-		return;
-	}
-
-	switch (data->type) {
-	case OPAL_P7IOC_DIAG_TYPE_RGC:
-		pr_info("P7IOC diag-data for RGC\n\n");
-		ioda_eeh_hub_diag_common(data);
-		if (data->rgc.rgcStatus || data->rgc.rgcLdcp)
-			pr_info("  RGC: %016llx %016llx\n",
-				be64_to_cpu(data->rgc.rgcStatus),
-				be64_to_cpu(data->rgc.rgcLdcp));
-		break;
-	case OPAL_P7IOC_DIAG_TYPE_BI:
-		pr_info("P7IOC diag-data for BI %s\n\n",
-			data->bi.biDownbound ? "Downbound" : "Upbound");
-		ioda_eeh_hub_diag_common(data);
-		if (data->bi.biLdcp0 || data->bi.biLdcp1 ||
-		    data->bi.biLdcp2 || data->bi.biFenceStatus)
-			pr_info("  BI:  %016llx %016llx %016llx %016llx\n",
-				be64_to_cpu(data->bi.biLdcp0),
-				be64_to_cpu(data->bi.biLdcp1),
-				be64_to_cpu(data->bi.biLdcp2),
-				be64_to_cpu(data->bi.biFenceStatus));
-		break;
-	case OPAL_P7IOC_DIAG_TYPE_CI:
-		pr_info("P7IOC diag-data for CI Port %d\n\n",
-			data->ci.ciPort);
-		ioda_eeh_hub_diag_common(data);
-		if (data->ci.ciPortStatus || data->ci.ciPortLdcp)
-			pr_info("  CI:  %016llx %016llx\n",
-				be64_to_cpu(data->ci.ciPortStatus),
-				be64_to_cpu(data->ci.ciPortLdcp));
-		break;
-	case OPAL_P7IOC_DIAG_TYPE_MISC:
-		pr_info("P7IOC diag-data for MISC\n\n");
-		ioda_eeh_hub_diag_common(data);
-		break;
-	case OPAL_P7IOC_DIAG_TYPE_I2C:
-		pr_info("P7IOC diag-data for I2C\n\n");
-		ioda_eeh_hub_diag_common(data);
-		break;
-	default:
-		pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n",
-			__func__, phb->hub_id, data->type);
-	}
-}
-
-static int ioda_eeh_get_pe(struct pci_controller *hose,
-			   u16 pe_no, struct eeh_pe **pe)
-{
-	struct pnv_phb *phb = hose->private_data;
-	struct pnv_ioda_pe *pnv_pe;
-	struct eeh_pe *dev_pe;
-	struct eeh_dev edev;
-
-	/*
-	 * If PHB supports compound PE, to fetch
-	 * the master PE because slave PE is invisible
-	 * to EEH core.
-	 */
-	pnv_pe = &phb->ioda.pe_array[pe_no];
-	if (pnv_pe->flags & PNV_IODA_PE_SLAVE) {
-		pnv_pe = pnv_pe->master;
-		WARN_ON(!pnv_pe ||
-			!(pnv_pe->flags & PNV_IODA_PE_MASTER));
-		pe_no = pnv_pe->pe_number;
-	}
-
-	/* Find the PE according to PE# */
-	memset(&edev, 0, sizeof(struct eeh_dev));
-	edev.phb = hose;
-	edev.pe_config_addr = pe_no;
-	dev_pe = eeh_pe_get(&edev);
-	if (!dev_pe)
-		return -EEXIST;
-
-	/* Freeze the (compound) PE */
-	*pe = dev_pe;
-	if (!(dev_pe->state & EEH_PE_ISOLATED))
-		phb->freeze_pe(phb, pe_no);
-
-	/*
-	 * At this point, we're sure the (compound) PE should
-	 * have been frozen. However, we still need poke until
-	 * hitting the frozen PE on top level.
-	 */
-	dev_pe = dev_pe->parent;
-	while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {
-		int ret;
-		int active_flags = (EEH_STATE_MMIO_ACTIVE |
-				    EEH_STATE_DMA_ACTIVE);
-
-		ret = eeh_ops->get_state(dev_pe, NULL);
-		if (ret <= 0 || (ret & active_flags) == active_flags) {
-			dev_pe = dev_pe->parent;
-			continue;
-		}
-
-		/* Frozen parent PE */
-		*pe = dev_pe;
-		if (!(dev_pe->state & EEH_PE_ISOLATED))
-			phb->freeze_pe(phb, dev_pe->addr);
-
-		/* Next one */
-		dev_pe = dev_pe->parent;
-	}
-
-	return 0;
-}
-
-/**
- * ioda_eeh_next_error - Retrieve next error for EEH core to handle
- * @pe: The affected PE
- *
- * The function is expected to be called by EEH core while it gets
- * special EEH event (without binding PE). The function calls to
- * OPAL APIs for next error to handle. The informational error is
- * handled internally by platform. However, the dead IOC, dead PHB,
- * fenced PHB and frozen PE should be handled by EEH core eventually.
- */
-static int ioda_eeh_next_error(struct eeh_pe **pe)
-{
-	struct pci_controller *hose;
-	struct pnv_phb *phb;
-	struct eeh_pe *phb_pe, *parent_pe;
-	__be64 frozen_pe_no;
-	__be16 err_type, severity;
-	int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
-	long rc;
-	int state, ret = EEH_NEXT_ERR_NONE;
-
-	/*
-	 * While running here, it's safe to purge the event queue.
-	 * And we should keep the cached OPAL notifier event sychronized
-	 * between the kernel and firmware.
-	 */
-	eeh_remove_event(NULL, false);
-	opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
-
-	list_for_each_entry(hose, &hose_list, list_node) {
-		/*
-		 * If the subordinate PCI buses of the PHB has been
-		 * removed or is exactly under error recovery, we
-		 * needn't take care of it any more.
-		 */
-		phb = hose->private_data;
-		phb_pe = eeh_phb_pe_get(hose);
-		if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED))
-			continue;
-
-		rc = opal_pci_next_error(phb->opal_id,
-				&frozen_pe_no, &err_type, &severity);
-
-		/* If OPAL API returns error, we needn't proceed */
-		if (rc != OPAL_SUCCESS) {
-			pr_devel("%s: Invalid return value on "
-				 "PHB#%x (0x%lx) from opal_pci_next_error",
-				 __func__, hose->global_number, rc);
-			continue;
-		}
-
-		/* If the PHB doesn't have error, stop processing */
-		if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR ||
-		    be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) {
-			pr_devel("%s: No error found on PHB#%x\n",
-				 __func__, hose->global_number);
-			continue;
-		}
-
-		/*
-		 * Processing the error. We're expecting the error with
-		 * highest priority reported upon multiple errors on the
-		 * specific PHB.
-		 */
-		pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",
-			 __func__, be16_to_cpu(err_type), be16_to_cpu(severity),
-			 be64_to_cpu(frozen_pe_no), hose->global_number);
-		switch (be16_to_cpu(err_type)) {
-		case OPAL_EEH_IOC_ERROR:
-			if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) {
-				pr_err("EEH: dead IOC detected\n");
-				ret = EEH_NEXT_ERR_DEAD_IOC;
-			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
-				pr_info("EEH: IOC informative error "
-					"detected\n");
-				ioda_eeh_hub_diag(hose);
-				ret = EEH_NEXT_ERR_NONE;
-			}
-
-			break;
-		case OPAL_EEH_PHB_ERROR:
-			if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) {
-				*pe = phb_pe;
-				pr_err("EEH: dead PHB#%x detected, "
-				       "location: %s\n",
-				       hose->global_number,
-				       eeh_pe_loc_get(phb_pe));
-				ret = EEH_NEXT_ERR_DEAD_PHB;
-			} else if (be16_to_cpu(severity) ==
-						OPAL_EEH_SEV_PHB_FENCED) {
-				*pe = phb_pe;
-				pr_err("EEH: Fenced PHB#%x detected, "
-				       "location: %s\n",
-				       hose->global_number,
-				       eeh_pe_loc_get(phb_pe));
-				ret = EEH_NEXT_ERR_FENCED_PHB;
-			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
-				pr_info("EEH: PHB#%x informative error "
-					"detected, location: %s\n",
-					hose->global_number,
-					eeh_pe_loc_get(phb_pe));
-				ioda_eeh_phb_diag(phb_pe);
-				pnv_pci_dump_phb_diag_data(hose, phb_pe->data);
-				ret = EEH_NEXT_ERR_NONE;
-			}
-
-			break;
-		case OPAL_EEH_PE_ERROR:
-			/*
-			 * If we can't find the corresponding PE, we
-			 * just try to unfreeze.
-			 */
-			if (ioda_eeh_get_pe(hose,
-					    be64_to_cpu(frozen_pe_no), pe)) {
-				/* Try best to clear it */
-				pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
-					hose->global_number, frozen_pe_no);
-				pr_info("EEH: PHB location: %s\n",
-					eeh_pe_loc_get(phb_pe));
-				opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no,
-					OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
-				ret = EEH_NEXT_ERR_NONE;
-			} else if ((*pe)->state & EEH_PE_ISOLATED ||
-				   eeh_pe_passed(*pe)) {
-				ret = EEH_NEXT_ERR_NONE;
-			} else {
-				pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
-					(*pe)->addr, (*pe)->phb->global_number);
-				pr_err("EEH: PE location: %s, PHB location: %s\n",
-					eeh_pe_loc_get(*pe), eeh_pe_loc_get(phb_pe));
-				ret = EEH_NEXT_ERR_FROZEN_PE;
-			}
-
-			break;
-		default:
-			pr_warn("%s: Unexpected error type %d\n",
-				__func__, be16_to_cpu(err_type));
-		}
-
-		/*
-		 * EEH core will try recover from fenced PHB or
-		 * frozen PE. In the time for frozen PE, EEH core
-		 * enable IO path for that before collecting logs,
-		 * but it ruins the site. So we have to dump the
-		 * log in advance here.
-		 */
-		if ((ret == EEH_NEXT_ERR_FROZEN_PE  ||
-		    ret == EEH_NEXT_ERR_FENCED_PHB) &&
-		    !((*pe)->state & EEH_PE_ISOLATED)) {
-			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
-			ioda_eeh_phb_diag(*pe);
-
-			if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
-				pnv_pci_dump_phb_diag_data((*pe)->phb,
-							   (*pe)->data);
-		}
-
-		/*
-		 * We probably have the frozen parent PE out there and
-		 * we need have to handle frozen parent PE firstly.
-		 */
-		if (ret == EEH_NEXT_ERR_FROZEN_PE) {
-			parent_pe = (*pe)->parent;
-			while (parent_pe) {
-				/* Hit the ceiling ? */
-				if (parent_pe->type & EEH_PE_PHB)
-					break;
-
-				/* Frozen parent PE ? */
-				state = ioda_eeh_get_state(parent_pe);
-				if (state > 0 &&
-				    (state & active_flags) != active_flags)
-					*pe = parent_pe;
-
-				/* Next parent level */
-				parent_pe = parent_pe->parent;
-			}
-
-			/* We possibly migrate to another PE */
-			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
-		}
-
-		/*
-		 * If we have no errors on the specific PHB or only
-		 * informative error there, we continue poking it.
-		 * Otherwise, we need actions to be taken by upper
-		 * layer.
-		 */
-		if (ret > EEH_NEXT_ERR_INF)
-			break;
-	}
-
-	return ret;
-}
-
-struct pnv_eeh_ops ioda_eeh_ops = {
-	.post_init		= ioda_eeh_post_init,
-	.set_option		= ioda_eeh_set_option,
-	.get_state		= ioda_eeh_get_state,
-	.reset			= ioda_eeh_reset,
-	.get_log		= ioda_eeh_get_log,
-	.configure_bridge	= ioda_eeh_configure_bridge,
-	.err_inject		= ioda_eeh_err_inject,
-	.next_error		= ioda_eeh_next_error
-};
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index e261869adc86..ce738ab3d5a9 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/atomic.h>
+#include <linux/debugfs.h>
 #include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/init.h>
@@ -38,12 +39,14 @@
 #include "powernv.h"
 #include "pci.h"
 
+static bool pnv_eeh_nb_init = false;
+
 /**
- * powernv_eeh_init - EEH platform dependent initialization
+ * pnv_eeh_init - EEH platform dependent initialization
  *
  * EEH platform dependent initialization on powernv
  */
-static int powernv_eeh_init(void)
+static int pnv_eeh_init(void)
 {
 	struct pci_controller *hose;
 	struct pnv_phb *phb;
@@ -85,37 +88,280 @@ static int powernv_eeh_init(void)
 	return 0;
 }
 
+static int pnv_eeh_event(struct notifier_block *nb,
+			 unsigned long events, void *change)
+{
+	uint64_t changed_evts = (uint64_t)change;
+
+	/*
+	 * We simply send special EEH event if EEH has
+	 * been enabled, or clear pending events in
+	 * case that we enable EEH soon
+	 */
+	if (!(changed_evts & OPAL_EVENT_PCI_ERROR) ||
+	    !(events & OPAL_EVENT_PCI_ERROR))
+		return 0;
+
+	if (eeh_enabled())
+		eeh_send_failure_event(NULL);
+	else
+		opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
+
+	return 0;
+}
+
+static struct notifier_block pnv_eeh_nb = {
+	.notifier_call	= pnv_eeh_event,
+	.next		= NULL,
+	.priority	= 0
+};
+
+#ifdef CONFIG_DEBUG_FS
+static ssize_t pnv_eeh_ei_write(struct file *filp,
+				const char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	struct pci_controller *hose = filp->private_data;
+	struct eeh_dev *edev;
+	struct eeh_pe *pe;
+	int pe_no, type, func;
+	unsigned long addr, mask;
+	char buf[50];
+	int ret;
+
+	if (!eeh_ops || !eeh_ops->err_inject)
+		return -ENXIO;
+
+	/* Copy over argument buffer */
+	ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
+	if (!ret)
+		return -EFAULT;
+
+	/* Retrieve parameters */
+	ret = sscanf(buf, "%x:%x:%x:%lx:%lx",
+		     &pe_no, &type, &func, &addr, &mask);
+	if (ret != 5)
+		return -EINVAL;
+
+	/* Retrieve PE */
+	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
+	if (!edev)
+		return -ENOMEM;
+	edev->phb = hose;
+	edev->pe_config_addr = pe_no;
+	pe = eeh_pe_get(edev);
+	kfree(edev);
+	if (!pe)
+		return -ENODEV;
+
+	/* Do error injection */
+	ret = eeh_ops->err_inject(pe, type, func, addr, mask);
+	return ret < 0 ? ret : count;
+}
+
+static const struct file_operations pnv_eeh_ei_fops = {
+	.open	= simple_open,
+	.llseek	= no_llseek,
+	.write	= pnv_eeh_ei_write,
+};
+
+static int pnv_eeh_dbgfs_set(void *data, int offset, u64 val)
+{
+	struct pci_controller *hose = data;
+	struct pnv_phb *phb = hose->private_data;
+
+	out_be64(phb->regs + offset, val);
+	return 0;
+}
+
+static int pnv_eeh_dbgfs_get(void *data, int offset, u64 *val)
+{
+	struct pci_controller *hose = data;
+	struct pnv_phb *phb = hose->private_data;
+
+	*val = in_be64(phb->regs + offset);
+	return 0;
+}
+
+static int pnv_eeh_outb_dbgfs_set(void *data, u64 val)
+{
+	return pnv_eeh_dbgfs_set(data, 0xD10, val);
+}
+
+static int pnv_eeh_outb_dbgfs_get(void *data, u64 *val)
+{
+	return pnv_eeh_dbgfs_get(data, 0xD10, val);
+}
+
+static int pnv_eeh_inbA_dbgfs_set(void *data, u64 val)
+{
+	return pnv_eeh_dbgfs_set(data, 0xD90, val);
+}
+
+static int pnv_eeh_inbA_dbgfs_get(void *data, u64 *val)
+{
+	return pnv_eeh_dbgfs_get(data, 0xD90, val);
+}
+
+static int pnv_eeh_inbB_dbgfs_set(void *data, u64 val)
+{
+	return pnv_eeh_dbgfs_set(data, 0xE10, val);
+}
+
+static int pnv_eeh_inbB_dbgfs_get(void *data, u64 *val)
+{
+	return pnv_eeh_dbgfs_get(data, 0xE10, val);
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_outb_dbgfs_ops, pnv_eeh_outb_dbgfs_get,
+			pnv_eeh_outb_dbgfs_set, "0x%llx\n");
+DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbA_dbgfs_ops, pnv_eeh_inbA_dbgfs_get,
+			pnv_eeh_inbA_dbgfs_set, "0x%llx\n");
+DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbB_dbgfs_ops, pnv_eeh_inbB_dbgfs_get,
+			pnv_eeh_inbB_dbgfs_set, "0x%llx\n");
+#endif /* CONFIG_DEBUG_FS */
+
 /**
- * powernv_eeh_post_init - EEH platform dependent post initialization
+ * pnv_eeh_post_init - EEH platform dependent post initialization
  *
  * EEH platform dependent post initialization on powernv. When
  * the function is called, the EEH PEs and devices should have
  * been built. If the I/O cache staff has been built, EEH is
  * ready to supply service.
  */
-static int powernv_eeh_post_init(void)
+static int pnv_eeh_post_init(void)
 {
 	struct pci_controller *hose;
 	struct pnv_phb *phb;
 	int ret = 0;
 
+	/* Register OPAL event notifier */
+	if (!pnv_eeh_nb_init) {
+		ret = opal_notifier_register(&pnv_eeh_nb);
+		if (ret) {
+			pr_warn("%s: Can't register OPAL event notifier (%d)\n",
+				__func__, ret);
+			return ret;
+		}
+
+		pnv_eeh_nb_init = true;
+	}
+
 	list_for_each_entry(hose, &hose_list, list_node) {
 		phb = hose->private_data;
 
-		if (phb->eeh_ops && phb->eeh_ops->post_init) {
-			ret = phb->eeh_ops->post_init(hose);
-			if (ret)
-				break;
-		}
+		/*
+		 * If EEH is enabled, we're going to rely on that.
+		 * Otherwise, we restore to conventional mechanism
+		 * to clear frozen PE during PCI config access.
+		 */
+		if (eeh_enabled())
+			phb->flags |= PNV_PHB_FLAG_EEH;
+		else
+			phb->flags &= ~PNV_PHB_FLAG_EEH;
+
+		/* Create debugfs entries */
+#ifdef CONFIG_DEBUG_FS
+		if (phb->has_dbgfs || !phb->dbgfs)
+			continue;
+
+		phb->has_dbgfs = 1;
+		debugfs_create_file("err_injct", 0200,
+				    phb->dbgfs, hose,
+				    &pnv_eeh_ei_fops);
+
+		debugfs_create_file("err_injct_outbound", 0600,
+				    phb->dbgfs, hose,
+				    &pnv_eeh_outb_dbgfs_ops);
+		debugfs_create_file("err_injct_inboundA", 0600,
+				    phb->dbgfs, hose,
+				    &pnv_eeh_inbA_dbgfs_ops);
+		debugfs_create_file("err_injct_inboundB", 0600,
+				    phb->dbgfs, hose,
+				    &pnv_eeh_inbB_dbgfs_ops);
+#endif /* CONFIG_DEBUG_FS */
 	}
 
+
 	return ret;
 }
 
+static int pnv_eeh_cap_start(struct pci_dn *pdn)
+{
+	u32 status;
+
+	if (!pdn)
+		return 0;
+
+	pnv_pci_cfg_read(pdn, PCI_STATUS, 2, &status);
+	if (!(status & PCI_STATUS_CAP_LIST))
+		return 0;
+
+	return PCI_CAPABILITY_LIST;
+}
+
+static int pnv_eeh_find_cap(struct pci_dn *pdn, int cap)
+{
+	int pos = pnv_eeh_cap_start(pdn);
+	int cnt = 48;   /* Maximal number of capabilities */
+	u32 id;
+
+	if (!pos)
+		return 0;
+
+	while (cnt--) {
+		pnv_pci_cfg_read(pdn, pos, 1, &pos);
+		if (pos < 0x40)
+			break;
+
+		pos &= ~3;
+		pnv_pci_cfg_read(pdn, pos + PCI_CAP_LIST_ID, 1, &id);
+		if (id == 0xff)
+			break;
+
+		/* Found */
+		if (id == cap)
+			return pos;
+
+		/* Next one */
+		pos += PCI_CAP_LIST_NEXT;
+	}
+
+	return 0;
+}
+
+static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap)
+{
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+	u32 header;
+	int pos = 256, ttl = (4096 - 256) / 8;
+
+	if (!edev || !edev->pcie_cap)
+		return 0;
+	if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+		return 0;
+	else if (!header)
+		return 0;
+
+	while (ttl-- > 0) {
+		if (PCI_EXT_CAP_ID(header) == cap && pos)
+			return pos;
+
+		pos = PCI_EXT_CAP_NEXT(header);
+		if (pos < 256)
+			break;
+
+		if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+			break;
+	}
+
+	return 0;
+}
+
 /**
- * powernv_eeh_dev_probe - Do probe on PCI device
- * @dev: PCI device
- * @flag: unused
+ * pnv_eeh_probe - Do probe on PCI device
+ * @pdn: PCI device node
+ * @data: unused
  *
  * When EEH module is installed during system boot, all PCI devices
  * are checked one by one to see if it supports EEH. The function
@@ -129,12 +375,12 @@ static int powernv_eeh_post_init(void)
  * was possiblly triggered by EEH core, the binding between EEH device
  * and the PCI device isn't built yet.
  */
-static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
+static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
 {
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pci_controller *hose = pdn->phb;
 	struct pnv_phb *phb = hose->private_data;
-	struct device_node *dn = pci_device_to_OF_node(dev);
-	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+	uint32_t pcie_flags;
 	int ret;
 
 	/*
@@ -143,40 +389,42 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
 	 * the root bridge. So it's not reasonable to continue
 	 * the probing.
 	 */
-	if (!dn || !edev || edev->pe)
-		return 0;
+	if (!edev || edev->pe)
+		return NULL;
 
 	/* Skip for PCI-ISA bridge */
-	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
-		return 0;
+	if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
+		return NULL;
 
 	/* Initialize eeh device */
-	edev->class_code = dev->class;
+	edev->class_code = pdn->class_code;
 	edev->mode	&= 0xFFFFFF00;
-	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
+	edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
+	edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
+	edev->aer_cap  = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
+	if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
 		edev->mode |= EEH_DEV_BRIDGE;
-	edev->pcix_cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
-	if (pci_is_pcie(dev)) {
-		edev->pcie_cap = pci_pcie_cap(dev);
-
-		if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT)
-			edev->mode |= EEH_DEV_ROOT_PORT;
-		else if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM)
-			edev->mode |= EEH_DEV_DS_PORT;
-
-		edev->aer_cap = pci_find_ext_capability(dev,
-							PCI_EXT_CAP_ID_ERR);
+		if (edev->pcie_cap) {
+			pnv_pci_cfg_read(pdn, edev->pcie_cap + PCI_EXP_FLAGS,
+					 2, &pcie_flags);
+			pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4;
+			if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT)
+				edev->mode |= EEH_DEV_ROOT_PORT;
+			else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM)
+				edev->mode |= EEH_DEV_DS_PORT;
+		}
 	}
 
-	edev->config_addr	= ((dev->bus->number << 8) | dev->devfn);
-	edev->pe_config_addr	= phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff);
+	edev->config_addr    = (pdn->busno << 8) | (pdn->devfn);
+	edev->pe_config_addr = phb->ioda.pe_rmap[edev->config_addr];
 
 	/* Create PE */
 	ret = eeh_add_to_parent_pe(edev);
 	if (ret) {
-		pr_warn("%s: Can't add PCI dev %s to parent PE (%d)\n",
-			__func__, pci_name(dev), ret);
-		return ret;
+		pr_warn("%s: Can't add PCI dev %04x:%02x:%02x.%01x to parent PE (%d)\n",
+			__func__, hose->global_number, pdn->busno,
+			PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn), ret);
+		return NULL;
 	}
 
 	/*
@@ -195,8 +443,10 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
 	 * Broadcom Austin 4-ports NICs (14e4:1657)
 	 * Broadcom Shiner 2-ports 10G NICs (14e4:168e)
 	 */
-	if ((dev->vendor == PCI_VENDOR_ID_BROADCOM && dev->device == 0x1657) ||
-	    (dev->vendor == PCI_VENDOR_ID_BROADCOM && dev->device == 0x168e))
+	if ((pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
+	     pdn->device_id == 0x1657) ||
+	    (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
+	     pdn->device_id == 0x168e))
 		edev->pe->state |= EEH_PE_CFG_RESTRICTED;
 
 	/*
@@ -206,7 +456,8 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
 	 * to PE reset.
 	 */
 	if (!edev->pe->bus)
-		edev->pe->bus = dev->bus;
+		edev->pe->bus = pci_find_bus(hose->global_number,
+					     pdn->busno);
 
 	/*
 	 * Enable EEH explicitly so that we will do EEH check
@@ -217,11 +468,11 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
 	/* Save memory bars */
 	eeh_save_bars(edev);
 
-	return 0;
+	return NULL;
 }
 
 /**
- * powernv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
+ * pnv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
  * @pe: EEH PE
  * @option: operation to be issued
  *
@@ -229,36 +480,236 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
  * Currently, following options are support according to PAPR:
  * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
  */
-static int powernv_eeh_set_option(struct eeh_pe *pe, int option)
+static int pnv_eeh_set_option(struct eeh_pe *pe, int option)
 {
 	struct pci_controller *hose = pe->phb;
 	struct pnv_phb *phb = hose->private_data;
-	int ret = -EEXIST;
+	bool freeze_pe = false;
+	int opt, ret = 0;
+	s64 rc;
+
+	/* Sanity check on option */
+	switch (option) {
+	case EEH_OPT_DISABLE:
+		return -EPERM;
+	case EEH_OPT_ENABLE:
+		return 0;
+	case EEH_OPT_THAW_MMIO:
+		opt = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO;
+		break;
+	case EEH_OPT_THAW_DMA:
+		opt = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA;
+		break;
+	case EEH_OPT_FREEZE_PE:
+		freeze_pe = true;
+		opt = OPAL_EEH_ACTION_SET_FREEZE_ALL;
+		break;
+	default:
+		pr_warn("%s: Invalid option %d\n", __func__, option);
+		return -EINVAL;
+	}
 
-	/*
-	 * What we need do is pass it down for hardware
-	 * implementation to handle it.
-	 */
-	if (phb->eeh_ops && phb->eeh_ops->set_option)
-		ret = phb->eeh_ops->set_option(pe, option);
+	/* If PHB supports compound PE, to handle it */
+	if (freeze_pe) {
+		if (phb->freeze_pe) {
+			phb->freeze_pe(phb, pe->addr);
+		} else {
+			rc = opal_pci_eeh_freeze_set(phb->opal_id,
+						     pe->addr, opt);
+			if (rc != OPAL_SUCCESS) {
+				pr_warn("%s: Failure %lld freezing "
+					"PHB#%x-PE#%x\n",
+					__func__, rc,
+					phb->hose->global_number, pe->addr);
+				ret = -EIO;
+			}
+		}
+	} else {
+		if (phb->unfreeze_pe) {
+			ret = phb->unfreeze_pe(phb, pe->addr, opt);
+		} else {
+			rc = opal_pci_eeh_freeze_clear(phb->opal_id,
+						       pe->addr, opt);
+			if (rc != OPAL_SUCCESS) {
+				pr_warn("%s: Failure %lld enable %d "
+					"for PHB#%x-PE#%x\n",
+					__func__, rc, option,
+					phb->hose->global_number, pe->addr);
+				ret = -EIO;
+			}
+		}
+	}
 
 	return ret;
 }
 
 /**
- * powernv_eeh_get_pe_addr - Retrieve PE address
+ * pnv_eeh_get_pe_addr - Retrieve PE address
  * @pe: EEH PE
  *
  * Retrieve the PE address according to the given tranditional
  * PCI BDF (Bus/Device/Function) address.
  */
-static int powernv_eeh_get_pe_addr(struct eeh_pe *pe)
+static int pnv_eeh_get_pe_addr(struct eeh_pe *pe)
 {
 	return pe->addr;
 }
 
+static void pnv_eeh_get_phb_diag(struct eeh_pe *pe)
+{
+	struct pnv_phb *phb = pe->phb->private_data;
+	s64 rc;
+
+	rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data,
+					 PNV_PCI_DIAG_BUF_SIZE);
+	if (rc != OPAL_SUCCESS)
+		pr_warn("%s: Failure %lld getting PHB#%x diag-data\n",
+			__func__, rc, pe->phb->global_number);
+}
+
+static int pnv_eeh_get_phb_state(struct eeh_pe *pe)
+{
+	struct pnv_phb *phb = pe->phb->private_data;
+	u8 fstate;
+	__be16 pcierr;
+	s64 rc;
+	int result = 0;
+
+	rc = opal_pci_eeh_freeze_status(phb->opal_id,
+					pe->addr,
+					&fstate,
+					&pcierr,
+					NULL);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld getting PHB#%x state\n",
+			__func__, rc, phb->hose->global_number);
+		return EEH_STATE_NOT_SUPPORT;
+	}
+
+	/*
+	 * Check PHB state. If the PHB is frozen for the
+	 * first time, to dump the PHB diag-data.
+	 */
+	if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
+		result = (EEH_STATE_MMIO_ACTIVE  |
+			  EEH_STATE_DMA_ACTIVE   |
+			  EEH_STATE_MMIO_ENABLED |
+			  EEH_STATE_DMA_ENABLED);
+	} else if (!(pe->state & EEH_PE_ISOLATED)) {
+		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+		pnv_eeh_get_phb_diag(pe);
+
+		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
+	}
+
+	return result;
+}
+
+static int pnv_eeh_get_pe_state(struct eeh_pe *pe)
+{
+	struct pnv_phb *phb = pe->phb->private_data;
+	u8 fstate;
+	__be16 pcierr;
+	s64 rc;
+	int result;
+
+	/*
+	 * We don't clobber hardware frozen state until PE
+	 * reset is completed. In order to keep EEH core
+	 * moving forward, we have to return operational
+	 * state during PE reset.
+	 */
+	if (pe->state & EEH_PE_RESET) {
+		result = (EEH_STATE_MMIO_ACTIVE  |
+			  EEH_STATE_DMA_ACTIVE   |
+			  EEH_STATE_MMIO_ENABLED |
+			  EEH_STATE_DMA_ENABLED);
+		return result;
+	}
+
+	/*
+	 * Fetch PE state from hardware. If the PHB
+	 * supports compound PE, let it handle that.
+	 */
+	if (phb->get_pe_state) {
+		fstate = phb->get_pe_state(phb, pe->addr);
+	} else {
+		rc = opal_pci_eeh_freeze_status(phb->opal_id,
+						pe->addr,
+						&fstate,
+						&pcierr,
+						NULL);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",
+				__func__, rc, phb->hose->global_number,
+				pe->addr);
+			return EEH_STATE_NOT_SUPPORT;
+		}
+	}
+
+	/* Figure out state */
+	switch (fstate) {
+	case OPAL_EEH_STOPPED_NOT_FROZEN:
+		result = (EEH_STATE_MMIO_ACTIVE  |
+			  EEH_STATE_DMA_ACTIVE   |
+			  EEH_STATE_MMIO_ENABLED |
+			  EEH_STATE_DMA_ENABLED);
+		break;
+	case OPAL_EEH_STOPPED_MMIO_FREEZE:
+		result = (EEH_STATE_DMA_ACTIVE |
+			  EEH_STATE_DMA_ENABLED);
+		break;
+	case OPAL_EEH_STOPPED_DMA_FREEZE:
+		result = (EEH_STATE_MMIO_ACTIVE |
+			  EEH_STATE_MMIO_ENABLED);
+		break;
+	case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
+		result = 0;
+		break;
+	case OPAL_EEH_STOPPED_RESET:
+		result = EEH_STATE_RESET_ACTIVE;
+		break;
+	case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
+		result = EEH_STATE_UNAVAILABLE;
+		break;
+	case OPAL_EEH_STOPPED_PERM_UNAVAIL:
+		result = EEH_STATE_NOT_SUPPORT;
+		break;
+	default:
+		result = EEH_STATE_NOT_SUPPORT;
+		pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n",
+			__func__, phb->hose->global_number,
+			pe->addr, fstate);
+	}
+
+	/*
+	 * If PHB supports compound PE, to freeze all
+	 * slave PEs for consistency.
+	 *
+	 * If the PE is switching to frozen state for the
+	 * first time, to dump the PHB diag-data.
+	 */
+	if (!(result & EEH_STATE_NOT_SUPPORT) &&
+	    !(result & EEH_STATE_UNAVAILABLE) &&
+	    !(result & EEH_STATE_MMIO_ACTIVE) &&
+	    !(result & EEH_STATE_DMA_ACTIVE)  &&
+	    !(pe->state & EEH_PE_ISOLATED)) {
+		if (phb->freeze_pe)
+			phb->freeze_pe(phb, pe->addr);
+
+		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+		pnv_eeh_get_phb_diag(pe);
+
+		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
+	}
+
+	return result;
+}
+
 /**
- * powernv_eeh_get_state - Retrieve PE state
+ * pnv_eeh_get_state - Retrieve PE state
  * @pe: EEH PE
  * @delay: delay while PE state is temporarily unavailable
  *
@@ -267,64 +718,279 @@ static int powernv_eeh_get_pe_addr(struct eeh_pe *pe)
  * we prefer passing down to hardware implementation to handle
  * it.
  */
-static int powernv_eeh_get_state(struct eeh_pe *pe, int *delay)
+static int pnv_eeh_get_state(struct eeh_pe *pe, int *delay)
+{
+	int ret;
+
+	if (pe->type & EEH_PE_PHB)
+		ret = pnv_eeh_get_phb_state(pe);
+	else
+		ret = pnv_eeh_get_pe_state(pe);
+
+	if (!delay)
+		return ret;
+
+	/*
+	 * If the PE state is temporarily unavailable,
+	 * to inform the EEH core delay for default
+	 * period (1 second)
+	 */
+	*delay = 0;
+	if (ret & EEH_STATE_UNAVAILABLE)
+		*delay = 1000;
+
+	return ret;
+}
+
+static s64 pnv_eeh_phb_poll(struct pnv_phb *phb)
+{
+	s64 rc = OPAL_HARDWARE;
+
+	while (1) {
+		rc = opal_pci_poll(phb->opal_id);
+		if (rc <= 0)
+			break;
+
+		if (system_state < SYSTEM_RUNNING)
+			udelay(1000 * rc);
+		else
+			msleep(rc);
+	}
+
+	return rc;
+}
+
+int pnv_eeh_phb_reset(struct pci_controller *hose, int option)
 {
-	struct pci_controller *hose = pe->phb;
 	struct pnv_phb *phb = hose->private_data;
-	int ret = EEH_STATE_NOT_SUPPORT;
+	s64 rc = OPAL_HARDWARE;
+
+	pr_debug("%s: Reset PHB#%x, option=%d\n",
+		 __func__, hose->global_number, option);
+
+	/* Issue PHB complete reset request */
+	if (option == EEH_RESET_FUNDAMENTAL ||
+	    option == EEH_RESET_HOT)
+		rc = opal_pci_reset(phb->opal_id,
+				    OPAL_RESET_PHB_COMPLETE,
+				    OPAL_ASSERT_RESET);
+	else if (option == EEH_RESET_DEACTIVATE)
+		rc = opal_pci_reset(phb->opal_id,
+				    OPAL_RESET_PHB_COMPLETE,
+				    OPAL_DEASSERT_RESET);
+	if (rc < 0)
+		goto out;
 
-	if (phb->eeh_ops && phb->eeh_ops->get_state) {
-		ret = phb->eeh_ops->get_state(pe);
+	/*
+	 * Poll state of the PHB until the request is done
+	 * successfully. The PHB reset is usually PHB complete
+	 * reset followed by hot reset on root bus. So we also
+	 * need the PCI bus settlement delay.
+	 */
+	rc = pnv_eeh_phb_poll(phb);
+	if (option == EEH_RESET_DEACTIVATE) {
+		if (system_state < SYSTEM_RUNNING)
+			udelay(1000 * EEH_PE_RST_SETTLE_TIME);
+		else
+			msleep(EEH_PE_RST_SETTLE_TIME);
+	}
+out:
+	if (rc != OPAL_SUCCESS)
+		return -EIO;
 
-		/*
-		 * If the PE state is temporarily unavailable,
-		 * to inform the EEH core delay for default
-		 * period (1 second)
-		 */
-		if (delay) {
-			*delay = 0;
-			if (ret & EEH_STATE_UNAVAILABLE)
-				*delay = 1000;
+	return 0;
+}
+
+static int pnv_eeh_root_reset(struct pci_controller *hose, int option)
+{
+	struct pnv_phb *phb = hose->private_data;
+	s64 rc = OPAL_HARDWARE;
+
+	pr_debug("%s: Reset PHB#%x, option=%d\n",
+		 __func__, hose->global_number, option);
+
+	/*
+	 * During the reset deassert time, we needn't care
+	 * the reset scope because the firmware does nothing
+	 * for fundamental or hot reset during deassert phase.
+	 */
+	if (option == EEH_RESET_FUNDAMENTAL)
+		rc = opal_pci_reset(phb->opal_id,
+				    OPAL_RESET_PCI_FUNDAMENTAL,
+				    OPAL_ASSERT_RESET);
+	else if (option == EEH_RESET_HOT)
+		rc = opal_pci_reset(phb->opal_id,
+				    OPAL_RESET_PCI_HOT,
+				    OPAL_ASSERT_RESET);
+	else if (option == EEH_RESET_DEACTIVATE)
+		rc = opal_pci_reset(phb->opal_id,
+				    OPAL_RESET_PCI_HOT,
+				    OPAL_DEASSERT_RESET);
+	if (rc < 0)
+		goto out;
+
+	/* Poll state of the PHB until the request is done */
+	rc = pnv_eeh_phb_poll(phb);
+	if (option == EEH_RESET_DEACTIVATE)
+		msleep(EEH_PE_RST_SETTLE_TIME);
+out:
+	if (rc != OPAL_SUCCESS)
+		return -EIO;
+
+	return 0;
+}
+
+static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
+{
+	struct pci_dn *pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+	int aer = edev ? edev->aer_cap : 0;
+	u32 ctrl;
+
+	pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n",
+		 __func__, pci_domain_nr(dev->bus),
+		 dev->bus->number, option);
+
+	switch (option) {
+	case EEH_RESET_FUNDAMENTAL:
+	case EEH_RESET_HOT:
+		/* Don't report linkDown event */
+		if (aer) {
+			eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK,
+					     4, &ctrl);
+			ctrl |= PCI_ERR_UNC_SURPDN;
+			eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK,
+					      4, ctrl);
 		}
+
+		eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl);
+		ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
+		eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl);
+
+		msleep(EEH_PE_RST_HOLD_TIME);
+		break;
+	case EEH_RESET_DEACTIVATE:
+		eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl);
+		ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+		eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl);
+
+		msleep(EEH_PE_RST_SETTLE_TIME);
+
+		/* Continue reporting linkDown event */
+		if (aer) {
+			eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK,
+					     4, &ctrl);
+			ctrl &= ~PCI_ERR_UNC_SURPDN;
+			eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK,
+					      4, ctrl);
+		}
+
+		break;
 	}
 
-	return ret;
+	return 0;
+}
+
+void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
+{
+	struct pci_controller *hose;
+
+	if (pci_is_root_bus(dev->bus)) {
+		hose = pci_bus_to_host(dev->bus);
+		pnv_eeh_root_reset(hose, EEH_RESET_HOT);
+		pnv_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
+	} else {
+		pnv_eeh_bridge_reset(dev, EEH_RESET_HOT);
+		pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
+	}
 }
 
 /**
- * powernv_eeh_reset - Reset the specified PE
+ * pnv_eeh_reset - Reset the specified PE
  * @pe: EEH PE
  * @option: reset option
  *
- * Reset the specified PE
+ * Do reset on the indicated PE. For PCI bus sensitive PE,
+ * we need to reset the parent p2p bridge. The PHB has to
+ * be reinitialized if the p2p bridge is root bridge. For
+ * PCI device sensitive PE, we will try to reset the device
+ * through FLR. For now, we don't have OPAL APIs to do HARD
+ * reset yet, so all reset would be SOFT (HOT) reset.
  */
-static int powernv_eeh_reset(struct eeh_pe *pe, int option)
+static int pnv_eeh_reset(struct eeh_pe *pe, int option)
 {
 	struct pci_controller *hose = pe->phb;
-	struct pnv_phb *phb = hose->private_data;
-	int ret = -EEXIST;
+	struct pci_bus *bus;
+	int ret;
+
+	/*
+	 * For PHB reset, we always have complete reset. For those PEs whose
+	 * primary bus derived from root complex (root bus) or root port
+	 * (usually bus#1), we apply hot or fundamental reset on the root port.
+	 * For other PEs, we always have hot reset on the PE primary bus.
+	 *
+	 * Here, we have different design to pHyp, which always clear the
+	 * frozen state during PE reset. However, the good idea here from
+	 * benh is to keep frozen state before we get PE reset done completely
+	 * (until BAR restore). With the frozen state, HW drops illegal IO
+	 * or MMIO access, which can incur recrusive frozen PE during PE
+	 * reset. The side effect is that EEH core has to clear the frozen
+	 * state explicitly after BAR restore.
+	 */
+	if (pe->type & EEH_PE_PHB) {
+		ret = pnv_eeh_phb_reset(hose, option);
+	} else {
+		struct pnv_phb *phb;
+		s64 rc;
 
-	if (phb->eeh_ops && phb->eeh_ops->reset)
-		ret = phb->eeh_ops->reset(pe, option);
+		/*
+		 * The frozen PE might be caused by PAPR error injection
+		 * registers, which are expected to be cleared after hitting
+		 * frozen PE as stated in the hardware spec. Unfortunately,
+		 * that's not true on P7IOC. So we have to clear it manually
+		 * to avoid recursive EEH errors during recovery.
+		 */
+		phb = hose->private_data;
+		if (phb->model == PNV_PHB_MODEL_P7IOC &&
+		    (option == EEH_RESET_HOT ||
+		    option == EEH_RESET_FUNDAMENTAL)) {
+			rc = opal_pci_reset(phb->opal_id,
+					    OPAL_RESET_PHB_ERROR,
+					    OPAL_ASSERT_RESET);
+			if (rc != OPAL_SUCCESS) {
+				pr_warn("%s: Failure %lld clearing "
+					"error injection registers\n",
+					__func__, rc);
+				return -EIO;
+			}
+		}
+
+		bus = eeh_pe_bus_get(pe);
+		if (pci_is_root_bus(bus) ||
+			pci_is_root_bus(bus->parent))
+			ret = pnv_eeh_root_reset(hose, option);
+		else
+			ret = pnv_eeh_bridge_reset(bus->self, option);
+	}
 
 	return ret;
 }
 
 /**
- * powernv_eeh_wait_state - Wait for PE state
+ * pnv_eeh_wait_state - Wait for PE state
  * @pe: EEH PE
  * @max_wait: maximal period in microsecond
  *
  * Wait for the state of associated PE. It might take some time
  * to retrieve the PE's state.
  */
-static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
+static int pnv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
 {
 	int ret;
 	int mwait;
 
 	while (1) {
-		ret = powernv_eeh_get_state(pe, &mwait);
+		ret = pnv_eeh_get_state(pe, &mwait);
 
 		/*
 		 * If the PE's state is temporarily unavailable,
@@ -348,7 +1014,7 @@ static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
 }
 
 /**
- * powernv_eeh_get_log - Retrieve error log
+ * pnv_eeh_get_log - Retrieve error log
  * @pe: EEH PE
  * @severity: temporary or permanent error log
  * @drv_log: driver log to be combined with retrieved error log
@@ -356,41 +1022,30 @@ static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
  *
  * Retrieve the temporary or permanent error from the PE.
  */
-static int powernv_eeh_get_log(struct eeh_pe *pe, int severity,
-			       char *drv_log, unsigned long len)
+static int pnv_eeh_get_log(struct eeh_pe *pe, int severity,
+			   char *drv_log, unsigned long len)
 {
-	struct pci_controller *hose = pe->phb;
-	struct pnv_phb *phb = hose->private_data;
-	int ret = -EEXIST;
+	if (!eeh_has_flag(EEH_EARLY_DUMP_LOG))
+		pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
 
-	if (phb->eeh_ops && phb->eeh_ops->get_log)
-		ret = phb->eeh_ops->get_log(pe, severity, drv_log, len);
-
-	return ret;
+	return 0;
 }
 
 /**
- * powernv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
+ * pnv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
  * @pe: EEH PE
  *
  * The function will be called to reconfigure the bridges included
  * in the specified PE so that the mulfunctional PE would be recovered
  * again.
  */
-static int powernv_eeh_configure_bridge(struct eeh_pe *pe)
+static int pnv_eeh_configure_bridge(struct eeh_pe *pe)
 {
-	struct pci_controller *hose = pe->phb;
-	struct pnv_phb *phb = hose->private_data;
-	int ret = 0;
-
-	if (phb->eeh_ops && phb->eeh_ops->configure_bridge)
-		ret = phb->eeh_ops->configure_bridge(pe);
-
-	return ret;
+	return 0;
 }
 
 /**
- * powernv_pe_err_inject - Inject specified error to the indicated PE
+ * pnv_pe_err_inject - Inject specified error to the indicated PE
  * @pe: the indicated PE
  * @type: error type
  * @func: specific error type
@@ -401,22 +1056,52 @@ static int powernv_eeh_configure_bridge(struct eeh_pe *pe)
  * determined by @type and @func, to the indicated PE for
  * testing purpose.
  */
-static int powernv_eeh_err_inject(struct eeh_pe *pe, int type, int func,
-				  unsigned long addr, unsigned long mask)
+static int pnv_eeh_err_inject(struct eeh_pe *pe, int type, int func,
+			      unsigned long addr, unsigned long mask)
 {
 	struct pci_controller *hose = pe->phb;
 	struct pnv_phb *phb = hose->private_data;
-	int ret = -EEXIST;
+	s64 rc;
+
+	/* Sanity check on error type */
+	if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&
+	    type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) {
+		pr_warn("%s: Invalid error type %d\n",
+			__func__, type);
+		return -ERANGE;
+	}
 
-	if (phb->eeh_ops && phb->eeh_ops->err_inject)
-		ret = phb->eeh_ops->err_inject(pe, type, func, addr, mask);
+	if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR ||
+	    func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) {
+		pr_warn("%s: Invalid error function %d\n",
+			__func__, func);
+		return -ERANGE;
+	}
 
-	return ret;
+	/* Firmware supports error injection ? */
+	if (!opal_check_token(OPAL_PCI_ERR_INJECT)) {
+		pr_warn("%s: Firmware doesn't support error injection\n",
+			__func__);
+		return -ENXIO;
+	}
+
+	/* Do error injection */
+	rc = opal_pci_err_inject(phb->opal_id, pe->addr,
+				 type, func, addr, mask);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld injecting error "
+			"%d-%d to PHB#%x-PE#%x\n",
+			__func__, rc, type, func,
+			hose->global_number, pe->addr);
+		return -EIO;
+	}
+
+	return 0;
 }
 
-static inline bool powernv_eeh_cfg_blocked(struct device_node *dn)
+static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn)
 {
-	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
 
 	if (!edev || !edev->pe)
 		return false;
@@ -427,51 +1112,377 @@ static inline bool powernv_eeh_cfg_blocked(struct device_node *dn)
 	return false;
 }
 
-static int powernv_eeh_read_config(struct device_node *dn,
-				   int where, int size, u32 *val)
+static int pnv_eeh_read_config(struct pci_dn *pdn,
+			       int where, int size, u32 *val)
 {
-	if (powernv_eeh_cfg_blocked(dn)) {
+	if (!pdn)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (pnv_eeh_cfg_blocked(pdn)) {
 		*val = 0xFFFFFFFF;
 		return PCIBIOS_SET_FAILED;
 	}
 
-	return pnv_pci_cfg_read(dn, where, size, val);
+	return pnv_pci_cfg_read(pdn, where, size, val);
 }
 
-static int powernv_eeh_write_config(struct device_node *dn,
-				    int where, int size, u32 val)
+static int pnv_eeh_write_config(struct pci_dn *pdn,
+				int where, int size, u32 val)
 {
-	if (powernv_eeh_cfg_blocked(dn))
+	if (!pdn)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (pnv_eeh_cfg_blocked(pdn))
 		return PCIBIOS_SET_FAILED;
 
-	return pnv_pci_cfg_write(dn, where, size, val);
+	return pnv_pci_cfg_write(pdn, where, size, val);
+}
+
+static void pnv_eeh_dump_hub_diag_common(struct OpalIoP7IOCErrorData *data)
+{
+	/* GEM */
+	if (data->gemXfir || data->gemRfir ||
+	    data->gemRirqfir || data->gemMask || data->gemRwof)
+		pr_info("  GEM: %016llx %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->gemXfir),
+			be64_to_cpu(data->gemRfir),
+			be64_to_cpu(data->gemRirqfir),
+			be64_to_cpu(data->gemMask),
+			be64_to_cpu(data->gemRwof));
+
+	/* LEM */
+	if (data->lemFir || data->lemErrMask ||
+	    data->lemAction0 || data->lemAction1 || data->lemWof)
+		pr_info("  LEM: %016llx %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->lemFir),
+			be64_to_cpu(data->lemErrMask),
+			be64_to_cpu(data->lemAction0),
+			be64_to_cpu(data->lemAction1),
+			be64_to_cpu(data->lemWof));
+}
+
+static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller *hose)
+{
+	struct pnv_phb *phb = hose->private_data;
+	struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag;
+	long rc;
+
+	rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n",
+			__func__, phb->hub_id, rc);
+		return;
+	}
+
+	switch (data->type) {
+	case OPAL_P7IOC_DIAG_TYPE_RGC:
+		pr_info("P7IOC diag-data for RGC\n\n");
+		pnv_eeh_dump_hub_diag_common(data);
+		if (data->rgc.rgcStatus || data->rgc.rgcLdcp)
+			pr_info("  RGC: %016llx %016llx\n",
+				be64_to_cpu(data->rgc.rgcStatus),
+				be64_to_cpu(data->rgc.rgcLdcp));
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_BI:
+		pr_info("P7IOC diag-data for BI %s\n\n",
+			data->bi.biDownbound ? "Downbound" : "Upbound");
+		pnv_eeh_dump_hub_diag_common(data);
+		if (data->bi.biLdcp0 || data->bi.biLdcp1 ||
+		    data->bi.biLdcp2 || data->bi.biFenceStatus)
+			pr_info("  BI:  %016llx %016llx %016llx %016llx\n",
+				be64_to_cpu(data->bi.biLdcp0),
+				be64_to_cpu(data->bi.biLdcp1),
+				be64_to_cpu(data->bi.biLdcp2),
+				be64_to_cpu(data->bi.biFenceStatus));
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_CI:
+		pr_info("P7IOC diag-data for CI Port %d\n\n",
+			data->ci.ciPort);
+		pnv_eeh_dump_hub_diag_common(data);
+		if (data->ci.ciPortStatus || data->ci.ciPortLdcp)
+			pr_info("  CI:  %016llx %016llx\n",
+				be64_to_cpu(data->ci.ciPortStatus),
+				be64_to_cpu(data->ci.ciPortLdcp));
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_MISC:
+		pr_info("P7IOC diag-data for MISC\n\n");
+		pnv_eeh_dump_hub_diag_common(data);
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_I2C:
+		pr_info("P7IOC diag-data for I2C\n\n");
+		pnv_eeh_dump_hub_diag_common(data);
+		break;
+	default:
+		pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n",
+			__func__, phb->hub_id, data->type);
+	}
+}
+
+static int pnv_eeh_get_pe(struct pci_controller *hose,
+			  u16 pe_no, struct eeh_pe **pe)
+{
+	struct pnv_phb *phb = hose->private_data;
+	struct pnv_ioda_pe *pnv_pe;
+	struct eeh_pe *dev_pe;
+	struct eeh_dev edev;
+
+	/*
+	 * If PHB supports compound PE, to fetch
+	 * the master PE because slave PE is invisible
+	 * to EEH core.
+	 */
+	pnv_pe = &phb->ioda.pe_array[pe_no];
+	if (pnv_pe->flags & PNV_IODA_PE_SLAVE) {
+		pnv_pe = pnv_pe->master;
+		WARN_ON(!pnv_pe ||
+			!(pnv_pe->flags & PNV_IODA_PE_MASTER));
+		pe_no = pnv_pe->pe_number;
+	}
+
+	/* Find the PE according to PE# */
+	memset(&edev, 0, sizeof(struct eeh_dev));
+	edev.phb = hose;
+	edev.pe_config_addr = pe_no;
+	dev_pe = eeh_pe_get(&edev);
+	if (!dev_pe)
+		return -EEXIST;
+
+	/* Freeze the (compound) PE */
+	*pe = dev_pe;
+	if (!(dev_pe->state & EEH_PE_ISOLATED))
+		phb->freeze_pe(phb, pe_no);
+
+	/*
+	 * At this point, we're sure the (compound) PE should
+	 * have been frozen. However, we still need poke until
+	 * hitting the frozen PE on top level.
+	 */
+	dev_pe = dev_pe->parent;
+	while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {
+		int ret;
+		int active_flags = (EEH_STATE_MMIO_ACTIVE |
+				    EEH_STATE_DMA_ACTIVE);
+
+		ret = eeh_ops->get_state(dev_pe, NULL);
+		if (ret <= 0 || (ret & active_flags) == active_flags) {
+			dev_pe = dev_pe->parent;
+			continue;
+		}
+
+		/* Frozen parent PE */
+		*pe = dev_pe;
+		if (!(dev_pe->state & EEH_PE_ISOLATED))
+			phb->freeze_pe(phb, dev_pe->addr);
+
+		/* Next one */
+		dev_pe = dev_pe->parent;
+	}
+
+	return 0;
 }
 
 /**
- * powernv_eeh_next_error - Retrieve next EEH error to handle
+ * pnv_eeh_next_error - Retrieve next EEH error to handle
  * @pe: Affected PE
  *
- * Using OPAL API, to retrieve next EEH error for EEH core to handle
+ * The function is expected to be called by EEH core while it gets
+ * special EEH event (without binding PE). The function calls to
+ * OPAL APIs for next error to handle. The informational error is
+ * handled internally by platform. However, the dead IOC, dead PHB,
+ * fenced PHB and frozen PE should be handled by EEH core eventually.
  */
-static int powernv_eeh_next_error(struct eeh_pe **pe)
+static int pnv_eeh_next_error(struct eeh_pe **pe)
 {
 	struct pci_controller *hose;
-	struct pnv_phb *phb = NULL;
+	struct pnv_phb *phb;
+	struct eeh_pe *phb_pe, *parent_pe;
+	__be64 frozen_pe_no;
+	__be16 err_type, severity;
+	int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+	long rc;
+	int state, ret = EEH_NEXT_ERR_NONE;
+
+	/*
+	 * While running here, it's safe to purge the event queue.
+	 * And we should keep the cached OPAL notifier event sychronized
+	 * between the kernel and firmware.
+	 */
+	eeh_remove_event(NULL, false);
+	opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
 
 	list_for_each_entry(hose, &hose_list, list_node) {
+		/*
+		 * If the subordinate PCI buses of the PHB has been
+		 * removed or is exactly under error recovery, we
+		 * needn't take care of it any more.
+		 */
 		phb = hose->private_data;
-		break;
-	}
+		phb_pe = eeh_phb_pe_get(hose);
+		if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED))
+			continue;
+
+		rc = opal_pci_next_error(phb->opal_id,
+					 &frozen_pe_no, &err_type, &severity);
+		if (rc != OPAL_SUCCESS) {
+			pr_devel("%s: Invalid return value on "
+				 "PHB#%x (0x%lx) from opal_pci_next_error",
+				 __func__, hose->global_number, rc);
+			continue;
+		}
+
+		/* If the PHB doesn't have error, stop processing */
+		if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR ||
+		    be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) {
+			pr_devel("%s: No error found on PHB#%x\n",
+				 __func__, hose->global_number);
+			continue;
+		}
+
+		/*
+		 * Processing the error. We're expecting the error with
+		 * highest priority reported upon multiple errors on the
+		 * specific PHB.
+		 */
+		pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",
+			__func__, be16_to_cpu(err_type),
+			be16_to_cpu(severity), be64_to_cpu(frozen_pe_no),
+			hose->global_number);
+		switch (be16_to_cpu(err_type)) {
+		case OPAL_EEH_IOC_ERROR:
+			if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) {
+				pr_err("EEH: dead IOC detected\n");
+				ret = EEH_NEXT_ERR_DEAD_IOC;
+			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
+				pr_info("EEH: IOC informative error "
+					"detected\n");
+				pnv_eeh_get_and_dump_hub_diag(hose);
+				ret = EEH_NEXT_ERR_NONE;
+			}
+
+			break;
+		case OPAL_EEH_PHB_ERROR:
+			if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) {
+				*pe = phb_pe;
+				pr_err("EEH: dead PHB#%x detected, "
+				       "location: %s\n",
+					hose->global_number,
+					eeh_pe_loc_get(phb_pe));
+				ret = EEH_NEXT_ERR_DEAD_PHB;
+			} else if (be16_to_cpu(severity) ==
+				   OPAL_EEH_SEV_PHB_FENCED) {
+				*pe = phb_pe;
+				pr_err("EEH: Fenced PHB#%x detected, "
+				       "location: %s\n",
+					hose->global_number,
+					eeh_pe_loc_get(phb_pe));
+				ret = EEH_NEXT_ERR_FENCED_PHB;
+			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
+				pr_info("EEH: PHB#%x informative error "
+					"detected, location: %s\n",
+					hose->global_number,
+					eeh_pe_loc_get(phb_pe));
+				pnv_eeh_get_phb_diag(phb_pe);
+				pnv_pci_dump_phb_diag_data(hose, phb_pe->data);
+				ret = EEH_NEXT_ERR_NONE;
+			}
+
+			break;
+		case OPAL_EEH_PE_ERROR:
+			/*
+			 * If we can't find the corresponding PE, we
+			 * just try to unfreeze.
+			 */
+			if (pnv_eeh_get_pe(hose,
+				be64_to_cpu(frozen_pe_no), pe)) {
+				/* Try best to clear it */
+				pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
+					hose->global_number, frozen_pe_no);
+				pr_info("EEH: PHB location: %s\n",
+					eeh_pe_loc_get(phb_pe));
+				opal_pci_eeh_freeze_clear(phb->opal_id,
+					frozen_pe_no,
+					OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+				ret = EEH_NEXT_ERR_NONE;
+			} else if ((*pe)->state & EEH_PE_ISOLATED ||
+				   eeh_pe_passed(*pe)) {
+				ret = EEH_NEXT_ERR_NONE;
+			} else {
+				pr_err("EEH: Frozen PE#%x "
+				       "on PHB#%x detected\n",
+				       (*pe)->addr,
+					(*pe)->phb->global_number);
+				pr_err("EEH: PE location: %s, "
+				       "PHB location: %s\n",
+				       eeh_pe_loc_get(*pe),
+				       eeh_pe_loc_get(phb_pe));
+				ret = EEH_NEXT_ERR_FROZEN_PE;
+			}
+
+			break;
+		default:
+			pr_warn("%s: Unexpected error type %d\n",
+				__func__, be16_to_cpu(err_type));
+		}
 
-	if (phb && phb->eeh_ops->next_error)
-		return phb->eeh_ops->next_error(pe);
+		/*
+		 * EEH core will try recover from fenced PHB or
+		 * frozen PE. In the time for frozen PE, EEH core
+		 * enable IO path for that before collecting logs,
+		 * but it ruins the site. So we have to dump the
+		 * log in advance here.
+		 */
+		if ((ret == EEH_NEXT_ERR_FROZEN_PE  ||
+		    ret == EEH_NEXT_ERR_FENCED_PHB) &&
+		    !((*pe)->state & EEH_PE_ISOLATED)) {
+			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
+			pnv_eeh_get_phb_diag(*pe);
+
+			if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+				pnv_pci_dump_phb_diag_data((*pe)->phb,
+							   (*pe)->data);
+		}
 
-	return -EEXIST;
+		/*
+		 * We probably have the frozen parent PE out there and
+		 * we need have to handle frozen parent PE firstly.
+		 */
+		if (ret == EEH_NEXT_ERR_FROZEN_PE) {
+			parent_pe = (*pe)->parent;
+			while (parent_pe) {
+				/* Hit the ceiling ? */
+				if (parent_pe->type & EEH_PE_PHB)
+					break;
+
+				/* Frozen parent PE ? */
+				state = eeh_ops->get_state(parent_pe, NULL);
+				if (state > 0 &&
+				    (state & active_flags) != active_flags)
+					*pe = parent_pe;
+
+				/* Next parent level */
+				parent_pe = parent_pe->parent;
+			}
+
+			/* We possibly migrate to another PE */
+			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
+		}
+
+		/*
+		 * If we have no errors on the specific PHB or only
+		 * informative error there, we continue poking it.
+		 * Otherwise, we need actions to be taken by upper
+		 * layer.
+		 */
+		if (ret > EEH_NEXT_ERR_INF)
+			break;
+	}
+
+	return ret;
 }
 
-static int powernv_eeh_restore_config(struct device_node *dn)
+static int pnv_eeh_restore_config(struct pci_dn *pdn)
 {
-	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
 	struct pnv_phb *phb;
 	s64 ret;
 
@@ -490,24 +1501,23 @@ static int powernv_eeh_restore_config(struct device_node *dn)
 	return 0;
 }
 
-static struct eeh_ops powernv_eeh_ops = {
+static struct eeh_ops pnv_eeh_ops = {
 	.name                   = "powernv",
-	.init                   = powernv_eeh_init,
-	.post_init              = powernv_eeh_post_init,
-	.of_probe               = NULL,
-	.dev_probe              = powernv_eeh_dev_probe,
-	.set_option             = powernv_eeh_set_option,
-	.get_pe_addr            = powernv_eeh_get_pe_addr,
-	.get_state              = powernv_eeh_get_state,
-	.reset                  = powernv_eeh_reset,
-	.wait_state             = powernv_eeh_wait_state,
-	.get_log                = powernv_eeh_get_log,
-	.configure_bridge       = powernv_eeh_configure_bridge,
-	.err_inject		= powernv_eeh_err_inject,
-	.read_config            = powernv_eeh_read_config,
-	.write_config           = powernv_eeh_write_config,
-	.next_error		= powernv_eeh_next_error,
-	.restore_config		= powernv_eeh_restore_config
+	.init                   = pnv_eeh_init,
+	.post_init              = pnv_eeh_post_init,
+	.probe			= pnv_eeh_probe,
+	.set_option             = pnv_eeh_set_option,
+	.get_pe_addr            = pnv_eeh_get_pe_addr,
+	.get_state              = pnv_eeh_get_state,
+	.reset                  = pnv_eeh_reset,
+	.wait_state             = pnv_eeh_wait_state,
+	.get_log                = pnv_eeh_get_log,
+	.configure_bridge       = pnv_eeh_configure_bridge,
+	.err_inject		= pnv_eeh_err_inject,
+	.read_config            = pnv_eeh_read_config,
+	.write_config           = pnv_eeh_write_config,
+	.next_error		= pnv_eeh_next_error,
+	.restore_config		= pnv_eeh_restore_config
 };
 
 /**
@@ -521,7 +1531,7 @@ static int __init eeh_powernv_init(void)
 	int ret = -EINVAL;
 
 	eeh_set_pe_aux_size(PNV_PCI_DIAG_BUF_SIZE);
-	ret = eeh_ops_register(&powernv_eeh_ops);
+	ret = eeh_ops_register(&pnv_eeh_ops);
 	if (!ret)
 		pr_info("EEH: PowerNV platform initialized\n");
 	else
diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c
index 23260f7dfa7a..5aa9c1ce4de3 100644
--- a/arch/powerpc/platforms/powernv/opal-dump.c
+++ b/arch/powerpc/platforms/powernv/opal-dump.c
@@ -452,5 +452,6 @@ void __init opal_platform_dump_init(void)
 		return;
 	}
 
-	opal_dump_resend_notification();
+	if (opal_check_token(OPAL_DUMP_RESEND))
+		opal_dump_resend_notification();
 }
diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c
index 518fe95dbf24..38ce757e5e2a 100644
--- a/arch/powerpc/platforms/powernv/opal-elog.c
+++ b/arch/powerpc/platforms/powernv/opal-elog.c
@@ -313,7 +313,8 @@ int __init opal_elog_init(void)
 	}
 
 	/* We are now ready to pull error logs from opal. */
-	opal_resend_pending_logs();
+	if (opal_check_token(OPAL_ELOG_RESEND))
+		opal_resend_pending_logs();
 
 	return 0;
 }
diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c
index 5c21d9c07f45..4ec6219287fc 100644
--- a/arch/powerpc/platforms/powernv/opal-flash.c
+++ b/arch/powerpc/platforms/powernv/opal-flash.c
@@ -120,7 +120,11 @@ static struct image_header_t	image_header;
 static struct image_data_t	image_data;
 static struct validate_flash_t	validate_flash_data;
 static struct manage_flash_t	manage_flash_data;
-static struct update_flash_t	update_flash_data;
+
+/* Initialize update_flash_data status to No Operation */
+static struct update_flash_t	update_flash_data = {
+	.status = FLASH_NO_OP,
+};
 
 static DEFINE_MUTEX(image_data_mutex);
 
@@ -542,7 +546,7 @@ static struct attribute_group image_op_attr_group = {
 	.attrs = image_op_attrs,
 };
 
-void __init opal_flash_init(void)
+void __init opal_flash_update_init(void)
 {
 	int ret;
 
diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c
index f9896fd5d04a..9db4398ded5d 100644
--- a/arch/powerpc/platforms/powernv/opal-nvram.c
+++ b/arch/powerpc/platforms/powernv/opal-nvram.c
@@ -16,6 +16,7 @@
 #include <linux/of.h>
 
 #include <asm/opal.h>
+#include <asm/nvram.h>
 #include <asm/machdep.h>
 
 static unsigned int nvram_size;
@@ -62,6 +63,15 @@ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index)
 	return count;
 }
 
+static int __init opal_nvram_init_log_partitions(void)
+{
+	/* Scan nvram for partitions */
+	nvram_scan_partitions();
+	nvram_init_oops_partition(0);
+	return 0;
+}
+machine_arch_initcall(powernv, opal_nvram_init_log_partitions);
+
 void __init opal_nvram_init(void)
 {
 	struct device_node *np;
diff --git a/arch/powerpc/platforms/powernv/opal-power.c b/arch/powerpc/platforms/powernv/opal-power.c
index 48bf5b080bcf..ac46c2c24f99 100644
--- a/arch/powerpc/platforms/powernv/opal-power.c
+++ b/arch/powerpc/platforms/powernv/opal-power.c
@@ -29,8 +29,9 @@ static int opal_power_control_event(struct notifier_block *nb,
 
 	switch (type) {
 	case SOFT_REBOOT:
-		/* Fall through. The service processor is responsible for
-		 * bringing the machine back up */
+		pr_info("OPAL: reboot requested\n");
+		orderly_reboot();
+		break;
 	case SOFT_OFF:
 		pr_info("OPAL: poweroff requested\n");
 		orderly_poweroff(true);
diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c
index 4ab67ef7abc9..655250499d18 100644
--- a/arch/powerpc/platforms/powernv/opal-sensor.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor.c
@@ -46,18 +46,28 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
 
 	mutex_lock(&opal_sensor_mutex);
 	ret = opal_sensor_read(sensor_hndl, token, &data);
-	if (ret != OPAL_ASYNC_COMPLETION)
-		goto out_token;
+	switch (ret) {
+	case OPAL_ASYNC_COMPLETION:
+		ret = opal_async_wait_response(token, &msg);
+		if (ret) {
+			pr_err("%s: Failed to wait for the async response, %d\n",
+			       __func__, ret);
+			goto out_token;
+		}
 
-	ret = opal_async_wait_response(token, &msg);
-	if (ret) {
-		pr_err("%s: Failed to wait for the async response, %d\n",
-				__func__, ret);
-		goto out_token;
-	}
+		ret = opal_error_code(be64_to_cpu(msg.params[1]));
+		*sensor_data = be32_to_cpu(data);
+		break;
+
+	case OPAL_SUCCESS:
+		ret = 0;
+		*sensor_data = be32_to_cpu(data);
+		break;
 
-	*sensor_data = be32_to_cpu(data);
-	ret = be64_to_cpu(msg.params[1]);
+	default:
+		ret = opal_error_code(ret);
+		break;
+	}
 
 out_token:
 	mutex_unlock(&opal_sensor_mutex);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 0509bca5e830..a7ade94cdf87 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -9,11 +9,11 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/jump_label.h>
 #include <asm/ppc_asm.h>
 #include <asm/hvcall.h>
 #include <asm/asm-offsets.h>
 #include <asm/opal.h>
-#include <asm/jump_label.h>
 
 	.section	".text"
 
@@ -286,9 +286,12 @@ OPAL_CALL(opal_handle_hmi,			OPAL_HANDLE_HMI);
 OPAL_CALL(opal_slw_set_reg,			OPAL_SLW_SET_REG);
 OPAL_CALL(opal_register_dump_region,		OPAL_REGISTER_DUMP_REGION);
 OPAL_CALL(opal_unregister_dump_region,		OPAL_UNREGISTER_DUMP_REGION);
-OPAL_CALL(opal_pci_set_phb_cxl_mode,		OPAL_PCI_SET_PHB_CXL_MODE);
+OPAL_CALL(opal_pci_set_phb_cxl_mode,		OPAL_PCI_SET_PHB_CAPI_MODE);
 OPAL_CALL(opal_tpo_write,			OPAL_WRITE_TPO);
 OPAL_CALL(opal_tpo_read,			OPAL_READ_TPO);
 OPAL_CALL(opal_ipmi_send,			OPAL_IPMI_SEND);
 OPAL_CALL(opal_ipmi_recv,			OPAL_IPMI_RECV);
 OPAL_CALL(opal_i2c_request,			OPAL_I2C_REQUEST);
+OPAL_CALL(opal_flash_read,			OPAL_FLASH_READ);
+OPAL_CALL(opal_flash_write,			OPAL_FLASH_WRITE);
+OPAL_CALL(opal_flash_erase,			OPAL_FLASH_ERASE);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 18fd4e71c9c1..2241565b0739 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -23,6 +23,8 @@
 #include <linux/kobject.h>
 #include <linux/delay.h>
 #include <linux/memblock.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
 
 #include <asm/machdep.h>
 #include <asm/opal.h>
@@ -58,6 +60,7 @@ static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
 static DEFINE_SPINLOCK(opal_notifier_lock);
 static uint64_t last_notified_mask = 0x0ul;
 static atomic_t opal_notifier_hold = ATOMIC_INIT(0);
+static uint32_t opal_heartbeat;
 
 static void opal_reinit_cores(void)
 {
@@ -302,23 +305,26 @@ void opal_notifier_disable(void)
  * Opal message notifier based on message type. Allow subscribers to get
  * notified for specific messgae type.
  */
-int opal_message_notifier_register(enum OpalMessageType msg_type,
+int opal_message_notifier_register(enum opal_msg_type msg_type,
 					struct notifier_block *nb)
 {
-	if (!nb) {
-		pr_warning("%s: Invalid argument (%p)\n",
-			   __func__, nb);
-		return -EINVAL;
-	}
-	if (msg_type > OPAL_MSG_TYPE_MAX) {
-		pr_warning("%s: Invalid message type argument (%d)\n",
+	if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) {
+		pr_warning("%s: Invalid arguments, msg_type:%d\n",
 			   __func__, msg_type);
 		return -EINVAL;
 	}
+
 	return atomic_notifier_chain_register(
 				&opal_msg_notifier_head[msg_type], nb);
 }
 
+int opal_message_notifier_unregister(enum opal_msg_type msg_type,
+				     struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(
+			&opal_msg_notifier_head[msg_type], nb);
+}
+
 static void opal_message_do_notify(uint32_t msg_type, void *msg)
 {
 	/* notify subscribers */
@@ -351,7 +357,7 @@ static void opal_handle_message(void)
 	type = be32_to_cpu(msg.msg_type);
 
 	/* Sanity check */
-	if (type > OPAL_MSG_TYPE_MAX) {
+	if (type >= OPAL_MSG_TYPE_MAX) {
 		pr_warning("%s: Unknown message type: %u\n", __func__, type);
 		return;
 	}
@@ -665,6 +671,9 @@ static void __init opal_dump_region_init(void)
 	uint64_t size;
 	int rc;
 
+	if (!opal_check_token(OPAL_REGISTER_DUMP_REGION))
+		return;
+
 	/* Register kernel log buffer */
 	addr = log_buf_addr_get();
 	if (addr == NULL)
@@ -684,6 +693,15 @@ static void __init opal_dump_region_init(void)
 			"rc = %d\n", rc);
 }
 
+static void opal_flash_init(struct device_node *opal_node)
+{
+	struct device_node *np;
+
+	for_each_child_of_node(opal_node, np)
+		if (of_device_is_compatible(np, "ibm,opal-flash"))
+			of_platform_device_create(np, NULL, NULL);
+}
+
 static void opal_ipmi_init(struct device_node *opal_node)
 {
 	struct device_node *np;
@@ -741,6 +759,29 @@ static void __init opal_irq_init(struct device_node *dn)
 	}
 }
 
+static int kopald(void *unused)
+{
+	set_freezable();
+	do {
+		try_to_freeze();
+		opal_poll_events(NULL);
+		msleep_interruptible(opal_heartbeat);
+	} while (!kthread_should_stop());
+
+	return 0;
+}
+
+static void opal_init_heartbeat(void)
+{
+	/* Old firwmware, we assume the HVC heartbeat is sufficient */
+	if (of_property_read_u32(opal_node, "ibm,heartbeat-ms",
+				 &opal_heartbeat) != 0)
+		opal_heartbeat = 0;
+
+	if (opal_heartbeat)
+		kthread_run(kopald, NULL, "kopald");
+}
+
 static int __init opal_init(void)
 {
 	struct device_node *np, *consoles;
@@ -769,6 +810,9 @@ static int __init opal_init(void)
 	/* Create i2c platform devices */
 	opal_i2c_create_devs();
 
+	/* Setup a heatbeat thread if requested by OPAL */
+	opal_init_heartbeat();
+
 	/* Find all OPAL interrupts and request them */
 	opal_irq_init(opal_node);
 
@@ -782,7 +826,7 @@ static int __init opal_init(void)
 		/* Setup error log interface */
 		rc = opal_elog_init();
 		/* Setup code update interface */
-		opal_flash_init();
+		opal_flash_update_init();
 		/* Setup platform dump extract interface */
 		opal_platform_dump_init();
 		/* Setup system parameters interface */
@@ -791,8 +835,11 @@ static int __init opal_init(void)
 		opal_msglog_init();
 	}
 
+	/* Initialize OPAL IPMI backend */
 	opal_ipmi_init(opal_node);
 
+	opal_flash_init(opal_node);
+
 	return 0;
 }
 machine_subsys_initcall(powernv, opal_init);
@@ -823,13 +870,17 @@ void opal_shutdown(void)
 	}
 
 	/* Unregister memory dump region */
-	opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
+	if (opal_check_token(OPAL_UNREGISTER_DUMP_REGION))
+		opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
 }
 
 /* Export this so that test modules can use it */
 EXPORT_SYMBOL_GPL(opal_invalid_call);
 EXPORT_SYMBOL_GPL(opal_ipmi_send);
 EXPORT_SYMBOL_GPL(opal_ipmi_recv);
+EXPORT_SYMBOL_GPL(opal_flash_read);
+EXPORT_SYMBOL_GPL(opal_flash_write);
+EXPORT_SYMBOL_GPL(opal_flash_erase);
 
 /* Convert a region of vmalloc memory to an opal sg list */
 struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
@@ -894,6 +945,25 @@ void opal_free_sg_list(struct opal_sg_list *sg)
 	}
 }
 
+int opal_error_code(int rc)
+{
+	switch (rc) {
+	case OPAL_SUCCESS:		return 0;
+
+	case OPAL_PARAMETER:		return -EINVAL;
+	case OPAL_ASYNC_COMPLETION:	return -EINPROGRESS;
+	case OPAL_BUSY_EVENT:		return -EBUSY;
+	case OPAL_NO_MEM:		return -ENOMEM;
+
+	case OPAL_UNSUPPORTED:		return -EIO;
+	case OPAL_HARDWARE:		return -EIO;
+	case OPAL_INTERNAL_ERROR:	return -EIO;
+	default:
+		pr_err("%s: unexpected OPAL error %d\n", __func__, rc);
+		return -EIO;
+	}
+}
+
 EXPORT_SYMBOL_GPL(opal_poll_events);
 EXPORT_SYMBOL_GPL(opal_rtc_read);
 EXPORT_SYMBOL_GPL(opal_rtc_write);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 6c9ff2b95119..920c252d1f49 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -44,6 +44,9 @@
 #include "powernv.h"
 #include "pci.h"
 
+/* 256M DMA window, 4K TCE pages, 8 bytes TCE */
+#define TCE32_TABLE_SIZE	((0x10000000 / 0x1000) * 8)
+
 static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
 			    const char *fmt, ...)
 {
@@ -56,11 +59,18 @@ static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
 	vaf.fmt = fmt;
 	vaf.va = &args;
 
-	if (pe->pdev)
+	if (pe->flags & PNV_IODA_PE_DEV)
 		strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix));
-	else
+	else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
 		sprintf(pfix, "%04x:%02x     ",
 			pci_domain_nr(pe->pbus), pe->pbus->number);
+#ifdef CONFIG_PCI_IOV
+	else if (pe->flags & PNV_IODA_PE_VF)
+		sprintf(pfix, "%04x:%02x:%2x.%d",
+			pci_domain_nr(pe->parent_dev->bus),
+			(pe->rid & 0xff00) >> 8,
+			PCI_SLOT(pe->rid), PCI_FUNC(pe->rid));
+#endif /* CONFIG_PCI_IOV*/
 
 	printk("%spci %s: [PE# %.3d] %pV",
 	       level, pfix, pe->pe_number, &vaf);
@@ -591,7 +601,7 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb,
 			      bool is_add)
 {
 	struct pnv_ioda_pe *slave;
-	struct pci_dev *pdev;
+	struct pci_dev *pdev = NULL;
 	int ret;
 
 	/*
@@ -630,8 +640,12 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb,
 
 	if (pe->flags & (PNV_IODA_PE_BUS_ALL | PNV_IODA_PE_BUS))
 		pdev = pe->pbus->self;
-	else
+	else if (pe->flags & PNV_IODA_PE_DEV)
 		pdev = pe->pdev->bus->self;
+#ifdef CONFIG_PCI_IOV
+	else if (pe->flags & PNV_IODA_PE_VF)
+		pdev = pe->parent_dev->bus->self;
+#endif /* CONFIG_PCI_IOV */
 	while (pdev) {
 		struct pci_dn *pdn = pci_get_pdn(pdev);
 		struct pnv_ioda_pe *parent;
@@ -649,6 +663,87 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb,
 	return 0;
 }
 
+#ifdef CONFIG_PCI_IOV
+static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
+{
+	struct pci_dev *parent;
+	uint8_t bcomp, dcomp, fcomp;
+	int64_t rc;
+	long rid_end, rid;
+
+	/* Currently, we just deconfigure VF PE. Bus PE will always there.*/
+	if (pe->pbus) {
+		int count;
+
+		dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
+		fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
+		parent = pe->pbus->self;
+		if (pe->flags & PNV_IODA_PE_BUS_ALL)
+			count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
+		else
+			count = 1;
+
+		switch(count) {
+		case  1: bcomp = OpalPciBusAll;         break;
+		case  2: bcomp = OpalPciBus7Bits;       break;
+		case  4: bcomp = OpalPciBus6Bits;       break;
+		case  8: bcomp = OpalPciBus5Bits;       break;
+		case 16: bcomp = OpalPciBus4Bits;       break;
+		case 32: bcomp = OpalPciBus3Bits;       break;
+		default:
+			dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n",
+			        count);
+			/* Do an exact match only */
+			bcomp = OpalPciBusAll;
+		}
+		rid_end = pe->rid + (count << 8);
+	} else {
+		if (pe->flags & PNV_IODA_PE_VF)
+			parent = pe->parent_dev;
+		else
+			parent = pe->pdev->bus->self;
+		bcomp = OpalPciBusAll;
+		dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
+		fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
+		rid_end = pe->rid + 1;
+	}
+
+	/* Clear the reverse map */
+	for (rid = pe->rid; rid < rid_end; rid++)
+		phb->ioda.pe_rmap[rid] = 0;
+
+	/* Release from all parents PELT-V */
+	while (parent) {
+		struct pci_dn *pdn = pci_get_pdn(parent);
+		if (pdn && pdn->pe_number != IODA_INVALID_PE) {
+			rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
+						pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
+			/* XXX What to do in case of error ? */
+		}
+		parent = parent->bus->self;
+	}
+
+	opal_pci_eeh_freeze_set(phb->opal_id, pe->pe_number,
+				  OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+
+	/* Disassociate PE in PELT */
+	rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
+				pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
+	if (rc)
+		pe_warn(pe, "OPAL error %ld remove self from PELTV\n", rc);
+	rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
+			     bcomp, dcomp, fcomp, OPAL_UNMAP_PE);
+	if (rc)
+		pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
+
+	pe->pbus = NULL;
+	pe->pdev = NULL;
+	pe->parent_dev = NULL;
+
+	return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+
 static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 {
 	struct pci_dev *parent;
@@ -675,15 +770,19 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 		case 16: bcomp = OpalPciBus4Bits;	break;
 		case 32: bcomp = OpalPciBus3Bits;	break;
 		default:
-			pr_err("%s: Number of subordinate busses %d"
-			       " unsupported\n",
-			       pci_name(pe->pbus->self), count);
+			dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n",
+			        count);
 			/* Do an exact match only */
 			bcomp = OpalPciBusAll;
 		}
 		rid_end = pe->rid + (count << 8);
 	} else {
-		parent = pe->pdev->bus->self;
+#ifdef CONFIG_PCI_IOV
+		if (pe->flags & PNV_IODA_PE_VF)
+			parent = pe->parent_dev;
+		else
+#endif /* CONFIG_PCI_IOV */
+			parent = pe->pdev->bus->self;
 		bcomp = OpalPciBusAll;
 		dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
 		fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
@@ -774,6 +873,78 @@ static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
 	return 10;
 }
 
+#ifdef CONFIG_PCI_IOV
+static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
+{
+	struct pci_dn *pdn = pci_get_pdn(dev);
+	int i;
+	struct resource *res, res2;
+	resource_size_t size;
+	u16 num_vfs;
+
+	if (!dev->is_physfn)
+		return -EINVAL;
+
+	/*
+	 * "offset" is in VFs.  The M64 windows are sized so that when they
+	 * are segmented, each segment is the same size as the IOV BAR.
+	 * Each segment is in a separate PE, and the high order bits of the
+	 * address are the PE number.  Therefore, each VF's BAR is in a
+	 * separate PE, and changing the IOV BAR start address changes the
+	 * range of PEs the VFs are in.
+	 */
+	num_vfs = pdn->num_vfs;
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &dev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || !res->parent)
+			continue;
+
+		if (!pnv_pci_is_mem_pref_64(res->flags))
+			continue;
+
+		/*
+		 * The actual IOV BAR range is determined by the start address
+		 * and the actual size for num_vfs VFs BAR.  This check is to
+		 * make sure that after shifting, the range will not overlap
+		 * with another device.
+		 */
+		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
+		res2.flags = res->flags;
+		res2.start = res->start + (size * offset);
+		res2.end = res2.start + (size * num_vfs) - 1;
+
+		if (res2.end > res->end) {
+			dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n",
+				i, &res2, res, num_vfs, offset);
+			return -EBUSY;
+		}
+	}
+
+	/*
+	 * After doing so, there would be a "hole" in the /proc/iomem when
+	 * offset is a positive value. It looks like the device return some
+	 * mmio back to the system, which actually no one could use it.
+	 */
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &dev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || !res->parent)
+			continue;
+
+		if (!pnv_pci_is_mem_pref_64(res->flags))
+			continue;
+
+		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
+		res2 = *res;
+		res->start += size * offset;
+
+		dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (enabling %d VFs shifted by %d)\n",
+			 i, &res2, res, num_vfs, offset);
+		pci_update_resource(dev, i + PCI_IOV_RESOURCES);
+	}
+	return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+
 #if 0
 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
 {
@@ -857,7 +1028,6 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
 				pci_name(dev));
 			continue;
 		}
-		pdn->pcidev = dev;
 		pdn->pe_number = pe->pe_number;
 		pe->dma_weight += pnv_ioda_dma_weight(dev);
 		if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
@@ -916,6 +1086,10 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
 		return;
 	}
 
+	pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
+			GFP_KERNEL, hose->node);
+	pe->tce32_table->data = pe;
+
 	/* Associate it with all child devices */
 	pnv_ioda_setup_same_PE(bus, pe);
 
@@ -974,6 +1148,441 @@ static void pnv_pci_ioda_setup_PEs(void)
 	}
 }
 
+#ifdef CONFIG_PCI_IOV
+static int pnv_pci_vf_release_m64(struct pci_dev *pdev)
+{
+	struct pci_bus        *bus;
+	struct pci_controller *hose;
+	struct pnv_phb        *phb;
+	struct pci_dn         *pdn;
+	int                    i, j;
+
+	bus = pdev->bus;
+	hose = pci_bus_to_host(bus);
+	phb = hose->private_data;
+	pdn = pci_get_pdn(pdev);
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
+		for (j = 0; j < M64_PER_IOV; j++) {
+			if (pdn->m64_wins[i][j] == IODA_INVALID_M64)
+				continue;
+			opal_pci_phb_mmio_enable(phb->opal_id,
+				OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 0);
+			clear_bit(pdn->m64_wins[i][j], &phb->ioda.m64_bar_alloc);
+			pdn->m64_wins[i][j] = IODA_INVALID_M64;
+		}
+
+	return 0;
+}
+
+static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pci_bus        *bus;
+	struct pci_controller *hose;
+	struct pnv_phb        *phb;
+	struct pci_dn         *pdn;
+	unsigned int           win;
+	struct resource       *res;
+	int                    i, j;
+	int64_t                rc;
+	int                    total_vfs;
+	resource_size_t        size, start;
+	int                    pe_num;
+	int                    vf_groups;
+	int                    vf_per_group;
+
+	bus = pdev->bus;
+	hose = pci_bus_to_host(bus);
+	phb = hose->private_data;
+	pdn = pci_get_pdn(pdev);
+	total_vfs = pci_sriov_get_totalvfs(pdev);
+
+	/* Initialize the m64_wins to IODA_INVALID_M64 */
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
+		for (j = 0; j < M64_PER_IOV; j++)
+			pdn->m64_wins[i][j] = IODA_INVALID_M64;
+
+	if (pdn->m64_per_iov == M64_PER_IOV) {
+		vf_groups = (num_vfs <= M64_PER_IOV) ? num_vfs: M64_PER_IOV;
+		vf_per_group = (num_vfs <= M64_PER_IOV)? 1:
+			roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
+	} else {
+		vf_groups = 1;
+		vf_per_group = 1;
+	}
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &pdev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || !res->parent)
+			continue;
+
+		if (!pnv_pci_is_mem_pref_64(res->flags))
+			continue;
+
+		for (j = 0; j < vf_groups; j++) {
+			do {
+				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
+						phb->ioda.m64_bar_idx + 1, 0);
+
+				if (win >= phb->ioda.m64_bar_idx + 1)
+					goto m64_failed;
+			} while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
+
+			pdn->m64_wins[i][j] = win;
+
+			if (pdn->m64_per_iov == M64_PER_IOV) {
+				size = pci_iov_resource_size(pdev,
+							PCI_IOV_RESOURCES + i);
+				size = size * vf_per_group;
+				start = res->start + size * j;
+			} else {
+				size = resource_size(res);
+				start = res->start;
+			}
+
+			/* Map the M64 here */
+			if (pdn->m64_per_iov == M64_PER_IOV) {
+				pe_num = pdn->offset + j;
+				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+						pe_num, OPAL_M64_WINDOW_TYPE,
+						pdn->m64_wins[i][j], 0);
+			}
+
+			rc = opal_pci_set_phb_mem_window(phb->opal_id,
+						 OPAL_M64_WINDOW_TYPE,
+						 pdn->m64_wins[i][j],
+						 start,
+						 0, /* unused */
+						 size);
+
+
+			if (rc != OPAL_SUCCESS) {
+				dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n",
+					win, rc);
+				goto m64_failed;
+			}
+
+			if (pdn->m64_per_iov == M64_PER_IOV)
+				rc = opal_pci_phb_mmio_enable(phb->opal_id,
+				     OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 2);
+			else
+				rc = opal_pci_phb_mmio_enable(phb->opal_id,
+				     OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 1);
+
+			if (rc != OPAL_SUCCESS) {
+				dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n",
+					win, rc);
+				goto m64_failed;
+			}
+		}
+	}
+	return 0;
+
+m64_failed:
+	pnv_pci_vf_release_m64(pdev);
+	return -EBUSY;
+}
+
+static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe)
+{
+	struct pci_bus        *bus;
+	struct pci_controller *hose;
+	struct pnv_phb        *phb;
+	struct iommu_table    *tbl;
+	unsigned long         addr;
+	int64_t               rc;
+
+	bus = dev->bus;
+	hose = pci_bus_to_host(bus);
+	phb = hose->private_data;
+	tbl = pe->tce32_table;
+	addr = tbl->it_base;
+
+	opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
+				   pe->pe_number << 1, 1, __pa(addr),
+				   0, 0x1000);
+
+	rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
+				        pe->pe_number,
+				        (pe->pe_number << 1) + 1,
+				        pe->tce_bypass_base,
+				        0);
+	if (rc)
+		pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
+
+	iommu_free_table(tbl, of_node_full_name(dev->dev.of_node));
+	free_pages(addr, get_order(TCE32_TABLE_SIZE));
+	pe->tce32_table = NULL;
+}
+
+static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pci_bus        *bus;
+	struct pci_controller *hose;
+	struct pnv_phb        *phb;
+	struct pnv_ioda_pe    *pe, *pe_n;
+	struct pci_dn         *pdn;
+	u16                    vf_index;
+	int64_t                rc;
+
+	bus = pdev->bus;
+	hose = pci_bus_to_host(bus);
+	phb = hose->private_data;
+	pdn = pci_get_pdn(pdev);
+
+	if (!pdev->is_physfn)
+		return;
+
+	if (pdn->m64_per_iov == M64_PER_IOV && num_vfs > M64_PER_IOV) {
+		int   vf_group;
+		int   vf_per_group;
+		int   vf_index1;
+
+		vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
+
+		for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++)
+			for (vf_index = vf_group * vf_per_group;
+				vf_index < (vf_group + 1) * vf_per_group &&
+				vf_index < num_vfs;
+				vf_index++)
+				for (vf_index1 = vf_group * vf_per_group;
+					vf_index1 < (vf_group + 1) * vf_per_group &&
+					vf_index1 < num_vfs;
+					vf_index1++){
+
+					rc = opal_pci_set_peltv(phb->opal_id,
+						pdn->offset + vf_index,
+						pdn->offset + vf_index1,
+						OPAL_REMOVE_PE_FROM_DOMAIN);
+
+					if (rc)
+					    dev_warn(&pdev->dev, "%s: Failed to unlink same group PE#%d(%lld)\n",
+						__func__,
+						pdn->offset + vf_index1, rc);
+				}
+	}
+
+	list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
+		if (pe->parent_dev != pdev)
+			continue;
+
+		pnv_pci_ioda2_release_dma_pe(pdev, pe);
+
+		/* Remove from list */
+		mutex_lock(&phb->ioda.pe_list_mutex);
+		list_del(&pe->list);
+		mutex_unlock(&phb->ioda.pe_list_mutex);
+
+		pnv_ioda_deconfigure_pe(phb, pe);
+
+		pnv_ioda_free_pe(phb, pe->pe_number);
+	}
+}
+
+void pnv_pci_sriov_disable(struct pci_dev *pdev)
+{
+	struct pci_bus        *bus;
+	struct pci_controller *hose;
+	struct pnv_phb        *phb;
+	struct pci_dn         *pdn;
+	struct pci_sriov      *iov;
+	u16 num_vfs;
+
+	bus = pdev->bus;
+	hose = pci_bus_to_host(bus);
+	phb = hose->private_data;
+	pdn = pci_get_pdn(pdev);
+	iov = pdev->sriov;
+	num_vfs = pdn->num_vfs;
+
+	/* Release VF PEs */
+	pnv_ioda_release_vf_PE(pdev, num_vfs);
+
+	if (phb->type == PNV_PHB_IODA2) {
+		if (pdn->m64_per_iov == 1)
+			pnv_pci_vf_resource_shift(pdev, -pdn->offset);
+
+		/* Release M64 windows */
+		pnv_pci_vf_release_m64(pdev);
+
+		/* Release PE numbers */
+		bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
+		pdn->offset = 0;
+	}
+}
+
+static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
+				       struct pnv_ioda_pe *pe);
+static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pci_bus        *bus;
+	struct pci_controller *hose;
+	struct pnv_phb        *phb;
+	struct pnv_ioda_pe    *pe;
+	int                    pe_num;
+	u16                    vf_index;
+	struct pci_dn         *pdn;
+	int64_t                rc;
+
+	bus = pdev->bus;
+	hose = pci_bus_to_host(bus);
+	phb = hose->private_data;
+	pdn = pci_get_pdn(pdev);
+
+	if (!pdev->is_physfn)
+		return;
+
+	/* Reserve PE for each VF */
+	for (vf_index = 0; vf_index < num_vfs; vf_index++) {
+		pe_num = pdn->offset + vf_index;
+
+		pe = &phb->ioda.pe_array[pe_num];
+		pe->pe_number = pe_num;
+		pe->phb = phb;
+		pe->flags = PNV_IODA_PE_VF;
+		pe->pbus = NULL;
+		pe->parent_dev = pdev;
+		pe->tce32_seg = -1;
+		pe->mve_number = -1;
+		pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) |
+			   pci_iov_virtfn_devfn(pdev, vf_index);
+
+		pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%d\n",
+			hose->global_number, pdev->bus->number,
+			PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)),
+			PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num);
+
+		if (pnv_ioda_configure_pe(phb, pe)) {
+			/* XXX What do we do here ? */
+			if (pe_num)
+				pnv_ioda_free_pe(phb, pe_num);
+			pe->pdev = NULL;
+			continue;
+		}
+
+		pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
+				GFP_KERNEL, hose->node);
+		pe->tce32_table->data = pe;
+
+		/* Put PE to the list */
+		mutex_lock(&phb->ioda.pe_list_mutex);
+		list_add_tail(&pe->list, &phb->ioda.pe_list);
+		mutex_unlock(&phb->ioda.pe_list_mutex);
+
+		pnv_pci_ioda2_setup_dma_pe(phb, pe);
+	}
+
+	if (pdn->m64_per_iov == M64_PER_IOV && num_vfs > M64_PER_IOV) {
+		int   vf_group;
+		int   vf_per_group;
+		int   vf_index1;
+
+		vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
+
+		for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++) {
+			for (vf_index = vf_group * vf_per_group;
+			     vf_index < (vf_group + 1) * vf_per_group &&
+			     vf_index < num_vfs;
+			     vf_index++) {
+				for (vf_index1 = vf_group * vf_per_group;
+				     vf_index1 < (vf_group + 1) * vf_per_group &&
+				     vf_index1 < num_vfs;
+				     vf_index1++) {
+
+					rc = opal_pci_set_peltv(phb->opal_id,
+						pdn->offset + vf_index,
+						pdn->offset + vf_index1,
+						OPAL_ADD_PE_TO_DOMAIN);
+
+					if (rc)
+					    dev_warn(&pdev->dev, "%s: Failed to link same group PE#%d(%lld)\n",
+						__func__,
+						pdn->offset + vf_index1, rc);
+				}
+			}
+		}
+	}
+}
+
+int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pci_bus        *bus;
+	struct pci_controller *hose;
+	struct pnv_phb        *phb;
+	struct pci_dn         *pdn;
+	int                    ret;
+
+	bus = pdev->bus;
+	hose = pci_bus_to_host(bus);
+	phb = hose->private_data;
+	pdn = pci_get_pdn(pdev);
+
+	if (phb->type == PNV_PHB_IODA2) {
+		/* Calculate available PE for required VFs */
+		mutex_lock(&phb->ioda.pe_alloc_mutex);
+		pdn->offset = bitmap_find_next_zero_area(
+			phb->ioda.pe_alloc, phb->ioda.total_pe,
+			0, num_vfs, 0);
+		if (pdn->offset >= phb->ioda.total_pe) {
+			mutex_unlock(&phb->ioda.pe_alloc_mutex);
+			dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
+			pdn->offset = 0;
+			return -EBUSY;
+		}
+		bitmap_set(phb->ioda.pe_alloc, pdn->offset, num_vfs);
+		pdn->num_vfs = num_vfs;
+		mutex_unlock(&phb->ioda.pe_alloc_mutex);
+
+		/* Assign M64 window accordingly */
+		ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
+		if (ret) {
+			dev_info(&pdev->dev, "Not enough M64 window resources\n");
+			goto m64_failed;
+		}
+
+		/*
+		 * When using one M64 BAR to map one IOV BAR, we need to shift
+		 * the IOV BAR according to the PE# allocated to the VFs.
+		 * Otherwise, the PE# for the VF will conflict with others.
+		 */
+		if (pdn->m64_per_iov == 1) {
+			ret = pnv_pci_vf_resource_shift(pdev, pdn->offset);
+			if (ret)
+				goto m64_failed;
+		}
+	}
+
+	/* Setup VF PEs */
+	pnv_ioda_setup_vf_PE(pdev, num_vfs);
+
+	return 0;
+
+m64_failed:
+	bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
+	pdn->offset = 0;
+
+	return ret;
+}
+
+int pcibios_sriov_disable(struct pci_dev *pdev)
+{
+	pnv_pci_sriov_disable(pdev);
+
+	/* Release PCI data */
+	remove_dev_pci_data(pdev);
+	return 0;
+}
+
+int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+	/* Allocate PCI data */
+	add_dev_pci_data(pdev);
+
+	pnv_pci_sriov_enable(pdev, num_vfs);
+	return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+
 static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev)
 {
 	struct pci_dn *pdn = pci_get_pdn(pdev);
@@ -989,7 +1598,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
 
 	pe = &phb->ioda.pe_array[pdn->pe_number];
 	WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
-	set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table);
+	set_iommu_table_base_and_group(&pdev->dev, pe->tce32_table);
 }
 
 static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
@@ -1016,7 +1625,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
 	} else {
 		dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
 		set_dma_ops(&pdev->dev, &dma_iommu_ops);
-		set_iommu_table_base(&pdev->dev, &pe->tce32_table);
+		set_iommu_table_base(&pdev->dev, pe->tce32_table);
 	}
 	*pdev->dev.dma_mask = dma_mask;
 	return 0;
@@ -1053,9 +1662,9 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		if (add_to_iommu_group)
 			set_iommu_table_base_and_group(&dev->dev,
-						       &pe->tce32_table);
+						       pe->tce32_table);
 		else
-			set_iommu_table_base(&dev->dev, &pe->tce32_table);
+			set_iommu_table_base(&dev->dev, pe->tce32_table);
 
 		if (dev->subordinate)
 			pnv_ioda_setup_bus_dma(pe, dev->subordinate,
@@ -1145,8 +1754,7 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
 void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
 				 __be64 *startp, __be64 *endp, bool rm)
 {
-	struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
-					      tce32_table);
+	struct pnv_ioda_pe *pe = tbl->data;
 	struct pnv_phb *phb = pe->phb;
 
 	if (phb->type == PNV_PHB_IODA1)
@@ -1167,9 +1775,6 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 	int64_t rc;
 	void *addr;
 
-	/* 256M DMA window, 4K TCE pages, 8 bytes TCE */
-#define TCE32_TABLE_SIZE	((0x10000000 / 0x1000) * 8)
-
 	/* XXX FIXME: Handle 64-bit only DMA devices */
 	/* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
 	/* XXX FIXME: Allocate multi-level tables on PHB3 */
@@ -1212,7 +1817,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 	}
 
 	/* Setup linux iommu table */
-	tbl = &pe->tce32_table;
+	tbl = pe->tce32_table;
 	pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
 				  base << 28, IOMMU_PAGE_SHIFT_4K);
 
@@ -1232,12 +1837,19 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 				 TCE_PCI_SWINV_PAIR);
 	}
 	iommu_init_table(tbl, phb->hose->node);
-	iommu_register_group(tbl, phb->hose->global_number, pe->pe_number);
 
-	if (pe->pdev)
+	if (pe->flags & PNV_IODA_PE_DEV) {
+		iommu_register_group(tbl, phb->hose->global_number,
+				     pe->pe_number);
 		set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
-	else
+	} else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) {
+		iommu_register_group(tbl, phb->hose->global_number,
+				     pe->pe_number);
 		pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
+	} else if (pe->flags & PNV_IODA_PE_VF) {
+		iommu_register_group(tbl, phb->hose->global_number,
+				     pe->pe_number);
+	}
 
 	return;
  fail:
@@ -1250,8 +1862,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 
 static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
 {
-	struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
-					      tce32_table);
+	struct pnv_ioda_pe *pe = tbl->data;
 	uint16_t window_id = (pe->pe_number << 1 ) + 1;
 	int64_t rc;
 
@@ -1296,10 +1907,10 @@ static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb,
 	pe->tce_bypass_base = 1ull << 59;
 
 	/* Install set_bypass callback for VFIO */
-	pe->tce32_table.set_bypass = pnv_pci_ioda2_set_bypass;
+	pe->tce32_table->set_bypass = pnv_pci_ioda2_set_bypass;
 
 	/* Enable bypass by default */
-	pnv_pci_ioda2_set_bypass(&pe->tce32_table, true);
+	pnv_pci_ioda2_set_bypass(pe->tce32_table, true);
 }
 
 static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
@@ -1347,7 +1958,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 	}
 
 	/* Setup linux iommu table */
-	tbl = &pe->tce32_table;
+	tbl = pe->tce32_table;
 	pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0,
 			IOMMU_PAGE_SHIFT_4K);
 
@@ -1365,12 +1976,19 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 		tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
 	}
 	iommu_init_table(tbl, phb->hose->node);
-	iommu_register_group(tbl, phb->hose->global_number, pe->pe_number);
 
-	if (pe->pdev)
+	if (pe->flags & PNV_IODA_PE_DEV) {
+		iommu_register_group(tbl, phb->hose->global_number,
+				     pe->pe_number);
 		set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
-	else
+	} else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) {
+		iommu_register_group(tbl, phb->hose->global_number,
+				     pe->pe_number);
 		pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
+	} else if (pe->flags & PNV_IODA_PE_VF) {
+		iommu_register_group(tbl, phb->hose->global_number,
+				     pe->pe_number);
+	}
 
 	/* Also create a bypass window */
 	if (!pnv_iommu_bypass_disabled)
@@ -1731,6 +2349,73 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
 #endif /* CONFIG_PCI_MSI */
 
+#ifdef CONFIG_PCI_IOV
+static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+	struct resource *res;
+	int i;
+	resource_size_t size;
+	struct pci_dn *pdn;
+	int mul, total_vfs;
+
+	if (!pdev->is_physfn || pdev->is_added)
+		return;
+
+	hose = pci_bus_to_host(pdev->bus);
+	phb = hose->private_data;
+
+	pdn = pci_get_pdn(pdev);
+	pdn->vfs_expanded = 0;
+
+	total_vfs = pci_sriov_get_totalvfs(pdev);
+	pdn->m64_per_iov = 1;
+	mul = phb->ioda.total_pe;
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &pdev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || res->parent)
+			continue;
+		if (!pnv_pci_is_mem_pref_64(res->flags)) {
+			dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n",
+				 i, res);
+			continue;
+		}
+
+		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
+
+		/* bigger than 64M */
+		if (size > (1 << 26)) {
+			dev_info(&pdev->dev, "PowerNV: VF BAR%d: %pR IOV size is bigger than 64M, roundup power2\n",
+				 i, res);
+			pdn->m64_per_iov = M64_PER_IOV;
+			mul = roundup_pow_of_two(total_vfs);
+			break;
+		}
+	}
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &pdev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || res->parent)
+			continue;
+		if (!pnv_pci_is_mem_pref_64(res->flags)) {
+			dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
+				 i, res);
+			continue;
+		}
+
+		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
+		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
+		res->end = res->start + size * mul - 1;
+		dev_dbg(&pdev->dev, "                       %pR\n", res);
+		dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
+			 i, res, mul);
+	}
+	pdn->vfs_expanded = mul;
+}
+#endif /* CONFIG_PCI_IOV */
+
 /*
  * This function is supposed to be called on basis of PE from top
  * to bottom style. So the the I/O or MMIO segment assigned to
@@ -1777,7 +2462,8 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
 				region.start += phb->ioda.io_segsize;
 				index++;
 			}
-		} else if (res->flags & IORESOURCE_MEM) {
+		} else if ((res->flags & IORESOURCE_MEM) &&
+			   !pnv_pci_is_mem_pref_64(res->flags)) {
 			region.start = res->start -
 				       hose->mem_offset[0] -
 				       phb->ioda.m32_pci_base;
@@ -1907,10 +2593,29 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
 	return phb->ioda.io_segsize;
 }
 
+#ifdef CONFIG_PCI_IOV
+static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
+						      int resno)
+{
+	struct pci_dn *pdn = pci_get_pdn(pdev);
+	resource_size_t align, iov_align;
+
+	iov_align = resource_size(&pdev->resource[resno]);
+	if (iov_align)
+		return iov_align;
+
+	align = pci_iov_resource_size(pdev, resno);
+	if (pdn->vfs_expanded)
+		return pdn->vfs_expanded * align;
+
+	return align;
+}
+#endif /* CONFIG_PCI_IOV */
+
 /* Prevent enabling devices for which we couldn't properly
  * assign a PE
  */
-static int pnv_pci_enable_device_hook(struct pci_dev *dev)
+static bool pnv_pci_enable_device_hook(struct pci_dev *dev)
 {
 	struct pci_controller *hose = pci_bus_to_host(dev->bus);
 	struct pnv_phb *phb = hose->private_data;
@@ -1922,13 +2627,13 @@ static int pnv_pci_enable_device_hook(struct pci_dev *dev)
 	 * PEs isn't ready.
 	 */
 	if (!phb->initialized)
-		return 0;
+		return true;
 
 	pdn = pci_get_pdn(dev);
 	if (!pdn || pdn->pe_number == IODA_INVALID_PE)
-		return -EINVAL;
+		return false;
 
-	return 0;
+	return true;
 }
 
 static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
@@ -1988,9 +2693,11 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 		hose->last_busno = 0xff;
 	}
 	hose->private_data = phb;
+	hose->controller_ops = pnv_pci_controller_ops;
 	phb->hub_id = hub_id;
 	phb->opal_id = phb_id;
 	phb->type = ioda_type;
+	mutex_init(&phb->ioda.pe_alloc_mutex);
 
 	/* Detect specific models for error handling */
 	if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
@@ -2050,6 +2757,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 
 	INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
 	INIT_LIST_HEAD(&phb->ioda.pe_list);
+	mutex_init(&phb->ioda.pe_list_mutex);
 
 	/* Calculate how many 32-bit TCE segments we have */
 	phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
@@ -2078,9 +2786,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	phb->get_pe_state = pnv_ioda_get_pe_state;
 	phb->freeze_pe = pnv_ioda_freeze_pe;
 	phb->unfreeze_pe = pnv_ioda_unfreeze_pe;
-#ifdef CONFIG_EEH
-	phb->eeh_ops = &ioda_eeh_ops;
-#endif
 
 	/* Setup RID -> PE mapping function */
 	phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
@@ -2104,9 +2809,15 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	 * the child P2P bridges) can form individual PE.
 	 */
 	ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
-	ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
-	ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
-	ppc_md.pcibios_reset_secondary_bus = pnv_pci_reset_secondary_bus;
+	pnv_pci_controller_ops.enable_device_hook = pnv_pci_enable_device_hook;
+	pnv_pci_controller_ops.window_alignment = pnv_pci_window_alignment;
+	pnv_pci_controller_ops.reset_secondary_bus = pnv_pci_reset_secondary_bus;
+
+#ifdef CONFIG_PCI_IOV
+	ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources;
+	ppc_md.pcibios_iov_resource_alignment = pnv_pci_iov_resource_alignment;
+#endif
+
 	pci_add_flags(PCI_REASSIGN_ALL_RSRC);
 
 	/* Reset IODA tables to a clean state */
@@ -2121,8 +2832,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	 */
 	if (is_kdump_kernel()) {
 		pr_info("  Issue PHB reset ...\n");
-		ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
-		ioda_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE);
+		pnv_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
+		pnv_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE);
 	}
 
 	/* Remove M64 resource if we can't configure it successfully */
diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
index 6ef6d4d8e7e2..4729ca793813 100644
--- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c
+++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
@@ -133,6 +133,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
 	phb->hose->first_busno = 0;
 	phb->hose->last_busno = 0xff;
 	phb->hose->private_data = phb;
+	phb->hose->controller_ops = pnv_pci_controller_ops;
 	phb->hub_id = hub_id;
 	phb->opal_id = phb_id;
 	phb->type = PNV_PHB_P5IOC2;
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 54323d6b5166..bca2aeb6e4b6 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -366,9 +366,9 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
 	spin_unlock_irqrestore(&phb->lock, flags);
 }
 
-static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
-				     struct device_node *dn)
+static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
 {
+	struct pnv_phb *phb = pdn->phb->private_data;
 	u8	fstate;
 	__be16	pcierr;
 	int	pe_no;
@@ -379,7 +379,7 @@ static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
 	 * setup that yet. So all ER errors should be mapped to
 	 * reserved PE.
 	 */
-	pe_no = PCI_DN(dn)->pe_number;
+	pe_no = pdn->pe_number;
 	if (pe_no == IODA_INVALID_PE) {
 		if (phb->type == PNV_PHB_P5IOC2)
 			pe_no = 0;
@@ -407,8 +407,7 @@ static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
 	}
 
 	cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
-		(PCI_DN(dn)->busno << 8) | (PCI_DN(dn)->devfn),
-		pe_no, fstate);
+		(pdn->busno << 8) | (pdn->devfn), pe_no, fstate);
 
 	/* Clear the frozen state if applicable */
 	if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE ||
@@ -425,10 +424,9 @@ static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
 	}
 }
 
-int pnv_pci_cfg_read(struct device_node *dn,
+int pnv_pci_cfg_read(struct pci_dn *pdn,
 		     int where, int size, u32 *val)
 {
-	struct pci_dn *pdn = PCI_DN(dn);
 	struct pnv_phb *phb = pdn->phb->private_data;
 	u32 bdfn = (pdn->busno << 8) | pdn->devfn;
 	s64 rc;
@@ -462,10 +460,9 @@ int pnv_pci_cfg_read(struct device_node *dn,
 	return PCIBIOS_SUCCESSFUL;
 }
 
-int pnv_pci_cfg_write(struct device_node *dn,
+int pnv_pci_cfg_write(struct pci_dn *pdn,
 		      int where, int size, u32 val)
 {
-	struct pci_dn *pdn = PCI_DN(dn);
 	struct pnv_phb *phb = pdn->phb->private_data;
 	u32 bdfn = (pdn->busno << 8) | pdn->devfn;
 
@@ -489,18 +486,17 @@ int pnv_pci_cfg_write(struct device_node *dn,
 }
 
 #if CONFIG_EEH
-static bool pnv_pci_cfg_check(struct pci_controller *hose,
-			      struct device_node *dn)
+static bool pnv_pci_cfg_check(struct pci_dn *pdn)
 {
 	struct eeh_dev *edev = NULL;
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pdn->phb->private_data;
 
 	/* EEH not enabled ? */
 	if (!(phb->flags & PNV_PHB_FLAG_EEH))
 		return true;
 
 	/* PE reset or device removed ? */
-	edev = of_node_to_eeh_dev(dn);
+	edev = pdn->edev;
 	if (edev) {
 		if (edev->pe &&
 		    (edev->pe->state & EEH_PE_CFG_BLOCKED))
@@ -513,8 +509,7 @@ static bool pnv_pci_cfg_check(struct pci_controller *hose,
 	return true;
 }
 #else
-static inline pnv_pci_cfg_check(struct pci_controller *hose,
-				struct device_node *dn)
+static inline pnv_pci_cfg_check(struct pci_dn *pdn)
 {
 	return true;
 }
@@ -524,32 +519,26 @@ static int pnv_pci_read_config(struct pci_bus *bus,
 			       unsigned int devfn,
 			       int where, int size, u32 *val)
 {
-	struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
 	struct pci_dn *pdn;
 	struct pnv_phb *phb;
-	bool found = false;
 	int ret;
 
 	*val = 0xFFFFFFFF;
-	for (dn = busdn->child; dn; dn = dn->sibling) {
-		pdn = PCI_DN(dn);
-		if (pdn && pdn->devfn == devfn) {
-			phb = pdn->phb->private_data;
-			found = true;
-			break;
-		}
-	}
+	pdn = pci_get_pdn_by_devfn(bus, devfn);
+	if (!pdn)
+		return PCIBIOS_DEVICE_NOT_FOUND;
 
-	if (!found || !pnv_pci_cfg_check(pdn->phb, dn))
+	if (!pnv_pci_cfg_check(pdn))
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
-	ret = pnv_pci_cfg_read(dn, where, size, val);
-	if (phb->flags & PNV_PHB_FLAG_EEH) {
+	ret = pnv_pci_cfg_read(pdn, where, size, val);
+	phb = pdn->phb->private_data;
+	if (phb->flags & PNV_PHB_FLAG_EEH && pdn->edev) {
 		if (*val == EEH_IO_ERROR_VALUE(size) &&
-		    eeh_dev_check_failure(of_node_to_eeh_dev(dn)))
+		    eeh_dev_check_failure(pdn->edev))
                         return PCIBIOS_DEVICE_NOT_FOUND;
 	} else {
-		pnv_pci_config_check_eeh(phb, dn);
+		pnv_pci_config_check_eeh(pdn);
 	}
 
 	return ret;
@@ -559,27 +548,21 @@ static int pnv_pci_write_config(struct pci_bus *bus,
 				unsigned int devfn,
 				int where, int size, u32 val)
 {
-	struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
 	struct pci_dn *pdn;
 	struct pnv_phb *phb;
-	bool found = false;
 	int ret;
 
-	for (dn = busdn->child; dn; dn = dn->sibling) {
-		pdn = PCI_DN(dn);
-		if (pdn && pdn->devfn == devfn) {
-			phb = pdn->phb->private_data;
-			found = true;
-			break;
-		}
-	}
+	pdn = pci_get_pdn_by_devfn(bus, devfn);
+	if (!pdn)
+		return PCIBIOS_DEVICE_NOT_FOUND;
 
-	if (!found || !pnv_pci_cfg_check(pdn->phb, dn))
+	if (!pnv_pci_cfg_check(pdn))
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
-	ret = pnv_pci_cfg_write(dn, where, size, val);
+	ret = pnv_pci_cfg_write(pdn, where, size, val);
+	phb = pdn->phb->private_data;
 	if (!(phb->flags & PNV_PHB_FLAG_EEH))
-		pnv_pci_config_check_eeh(phb, dn);
+		pnv_pci_config_check_eeh(pdn);
 
 	return ret;
 }
@@ -679,66 +662,31 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
 	tbl->it_type = TCE_PCI;
 }
 
-static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose)
-{
-	struct iommu_table *tbl;
-	const __be64 *basep, *swinvp;
-	const __be32 *sizep;
-
-	basep = of_get_property(hose->dn, "linux,tce-base", NULL);
-	sizep = of_get_property(hose->dn, "linux,tce-size", NULL);
-	if (basep == NULL || sizep == NULL) {
-		pr_err("PCI: %s has missing tce entries !\n",
-		       hose->dn->full_name);
-		return NULL;
-	}
-	tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, hose->node);
-	if (WARN_ON(!tbl))
-		return NULL;
-	pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)),
-				  be32_to_cpup(sizep), 0, IOMMU_PAGE_SHIFT_4K);
-	iommu_init_table(tbl, hose->node);
-	iommu_register_group(tbl, pci_domain_nr(hose->bus), 0);
-
-	/* Deal with SW invalidated TCEs when needed (BML way) */
-	swinvp = of_get_property(hose->dn, "linux,tce-sw-invalidate-info",
-				 NULL);
-	if (swinvp) {
-		tbl->it_busno = be64_to_cpu(swinvp[1]);
-		tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
-		tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
-	}
-	return tbl;
-}
-
-static void pnv_pci_dma_fallback_setup(struct pci_controller *hose,
-				       struct pci_dev *pdev)
-{
-	struct device_node *np = pci_bus_to_OF_node(hose->bus);
-	struct pci_dn *pdn;
-
-	if (np == NULL)
-		return;
-	pdn = PCI_DN(np);
-	if (!pdn->iommu_table)
-		pdn->iommu_table = pnv_pci_setup_bml_iommu(hose);
-	if (!pdn->iommu_table)
-		return;
-	set_iommu_table_base_and_group(&pdev->dev, pdn->iommu_table);
-}
-
 static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
 {
 	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
 	struct pnv_phb *phb = hose->private_data;
+#ifdef CONFIG_PCI_IOV
+	struct pnv_ioda_pe *pe;
+	struct pci_dn *pdn;
+
+	/* Fix the VF pdn PE number */
+	if (pdev->is_virtfn) {
+		pdn = pci_get_pdn(pdev);
+		WARN_ON(pdn->pe_number != IODA_INVALID_PE);
+		list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+			if (pe->rid == ((pdev->bus->number << 8) |
+			    (pdev->devfn & 0xff))) {
+				pdn->pe_number = pe->pe_number;
+				pe->pdev = pdev;
+				break;
+			}
+		}
+	}
+#endif /* CONFIG_PCI_IOV */
 
-	/* If we have no phb structure, try to setup a fallback based on
-	 * the device-tree (RTAS PCI for example)
-	 */
 	if (phb && phb->dma_dev_setup)
 		phb->dma_dev_setup(phb, pdev);
-	else
-		pnv_pci_dma_fallback_setup(hose, pdev);
 }
 
 int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
@@ -784,44 +732,36 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk);
 void __init pnv_pci_init(void)
 {
 	struct device_node *np;
+	bool found_ioda = false;
 
 	pci_add_flags(PCI_CAN_SKIP_ISA_ALIGN);
 
-	/* OPAL absent, try POPAL first then RTAS detection of PHBs */
-	if (!firmware_has_feature(FW_FEATURE_OPAL)) {
-#ifdef CONFIG_PPC_POWERNV_RTAS
-		init_pci_config_tokens();
-		find_and_init_phbs();
-#endif /* CONFIG_PPC_POWERNV_RTAS */
-	}
-	/* OPAL is here, do our normal stuff */
-	else {
-		int found_ioda = 0;
+	/* If we don't have OPAL, eg. in sim, just skip PCI probe */
+	if (!firmware_has_feature(FW_FEATURE_OPAL))
+		return;
 
-		/* Look for IODA IO-Hubs. We don't support mixing IODA
-		 * and p5ioc2 due to the need to change some global
-		 * probing flags
-		 */
-		for_each_compatible_node(np, NULL, "ibm,ioda-hub") {
-			pnv_pci_init_ioda_hub(np);
-			found_ioda = 1;
-		}
+	/* Look for IODA IO-Hubs. We don't support mixing IODA
+	 * and p5ioc2 due to the need to change some global
+	 * probing flags
+	 */
+	for_each_compatible_node(np, NULL, "ibm,ioda-hub") {
+		pnv_pci_init_ioda_hub(np);
+		found_ioda = true;
+	}
 
-		/* Look for p5ioc2 IO-Hubs */
-		if (!found_ioda)
-			for_each_compatible_node(np, NULL, "ibm,p5ioc2")
-				pnv_pci_init_p5ioc2_hub(np);
+	/* Look for p5ioc2 IO-Hubs */
+	if (!found_ioda)
+		for_each_compatible_node(np, NULL, "ibm,p5ioc2")
+			pnv_pci_init_p5ioc2_hub(np);
 
-		/* Look for ioda2 built-in PHB3's */
-		for_each_compatible_node(np, NULL, "ibm,ioda2-phb")
-			pnv_pci_init_ioda2_phb(np);
-	}
+	/* Look for ioda2 built-in PHB3's */
+	for_each_compatible_node(np, NULL, "ibm,ioda2-phb")
+		pnv_pci_init_ioda2_phb(np);
 
 	/* Setup the linkage between OF nodes and PHBs */
 	pci_devs_phb_init();
 
 	/* Configure IOMMU DMA hooks */
-	ppc_md.pci_dma_dev_setup = pnv_pci_dma_dev_setup;
 	ppc_md.tce_build = pnv_tce_build_vm;
 	ppc_md.tce_free = pnv_tce_free_vm;
 	ppc_md.tce_build_rm = pnv_tce_build_rm;
@@ -837,3 +777,7 @@ void __init pnv_pci_init(void)
 }
 
 machine_subsys_initcall_sync(powernv, tce_iommu_bus_notifier_init);
+
+struct pci_controller_ops pnv_pci_controller_ops = {
+	.dma_dev_setup = pnv_pci_dma_dev_setup,
+};
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 6c02ff8dd69f..070ee888fc95 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -23,6 +23,7 @@ enum pnv_phb_model {
 #define PNV_IODA_PE_BUS_ALL	(1 << 2)	/* PE has subordinate buses	*/
 #define PNV_IODA_PE_MASTER	(1 << 3)	/* Master PE in compound case	*/
 #define PNV_IODA_PE_SLAVE	(1 << 4)	/* Slave PE in compound case	*/
+#define PNV_IODA_PE_VF		(1 << 5)	/* PE for one VF 		*/
 
 /* Data associated with a PE, including IOMMU tracking etc.. */
 struct pnv_phb;
@@ -34,6 +35,9 @@ struct pnv_ioda_pe {
 	 * entire bus (& children). In the former case, pdev
 	 * is populated, in the later case, pbus is.
 	 */
+#ifdef CONFIG_PCI_IOV
+	struct pci_dev          *parent_dev;
+#endif
 	struct pci_dev		*pdev;
 	struct pci_bus		*pbus;
 
@@ -53,7 +57,7 @@ struct pnv_ioda_pe {
 	/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
 	int			tce32_seg;
 	int			tce32_segcount;
-	struct iommu_table	tce32_table;
+	struct iommu_table	*tce32_table;
 	phys_addr_t		tce_inval_reg_phys;
 
 	/* 64-bit TCE bypass region */
@@ -75,22 +79,6 @@ struct pnv_ioda_pe {
 	struct list_head	list;
 };
 
-/* IOC dependent EEH operations */
-#ifdef CONFIG_EEH
-struct pnv_eeh_ops {
-	int (*post_init)(struct pci_controller *hose);
-	int (*set_option)(struct eeh_pe *pe, int option);
-	int (*get_state)(struct eeh_pe *pe);
-	int (*reset)(struct eeh_pe *pe, int option);
-	int (*get_log)(struct eeh_pe *pe, int severity,
-		       char *drv_log, unsigned long len);
-	int (*configure_bridge)(struct eeh_pe *pe);
-	int (*err_inject)(struct eeh_pe *pe, int type, int func,
-			  unsigned long addr, unsigned long mask);
-	int (*next_error)(struct eeh_pe **pe);
-};
-#endif /* CONFIG_EEH */
-
 #define PNV_PHB_FLAG_EEH	(1 << 0)
 
 struct pnv_phb {
@@ -104,10 +92,6 @@ struct pnv_phb {
 	int			initialized;
 	spinlock_t		lock;
 
-#ifdef CONFIG_EEH
-	struct pnv_eeh_ops	*eeh_ops;
-#endif
-
 #ifdef CONFIG_DEBUG_FS
 	int			has_dbgfs;
 	struct dentry		*dbgfs;
@@ -165,6 +149,8 @@ struct pnv_phb {
 
 			/* PE allocation bitmap */
 			unsigned long		*pe_alloc;
+			/* PE allocation mutex */
+			struct mutex		pe_alloc_mutex;
 
 			/* M32 & IO segment maps */
 			unsigned int		*m32_segmap;
@@ -179,6 +165,7 @@ struct pnv_phb {
 			 * on the sequence of creation
 			 */
 			struct list_head	pe_list;
+			struct mutex            pe_list_mutex;
 
 			/* Reverse map of PEs, will have to extend if
 			 * we are to support more than 256 PEs, indexed
@@ -213,15 +200,12 @@ struct pnv_phb {
 };
 
 extern struct pci_ops pnv_pci_ops;
-#ifdef CONFIG_EEH
-extern struct pnv_eeh_ops ioda_eeh_ops;
-#endif
 
 void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
 				unsigned char *log_buff);
-int pnv_pci_cfg_read(struct device_node *dn,
+int pnv_pci_cfg_read(struct pci_dn *pdn,
 		     int where, int size, u32 *val);
-int pnv_pci_cfg_write(struct device_node *dn,
+int pnv_pci_cfg_write(struct pci_dn *pdn,
 		      int where, int size, u32 val);
 extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
 				      void *tce_mem, u64 tce_size,
@@ -232,6 +216,6 @@ extern void pnv_pci_init_ioda2_phb(struct device_node *np);
 extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
 					__be64 *startp, __be64 *endp, bool rm);
 extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
-extern int ioda_eeh_phb_reset(struct pci_controller *hose, int option);
+extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
 
 #endif /* __POWERNV_PCI_H */
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index 604c48e7879a..826d2c9bea56 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -29,6 +29,8 @@ static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev)
 }
 #endif
 
+extern struct pci_controller_ops pnv_pci_controller_ops;
+
 extern u32 pnv_get_supported_cpuidle_states(void);
 
 extern void pnv_lpc_init(void);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index d2de7d5d7574..16fdcb23f4c3 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -32,7 +32,6 @@
 #include <asm/machdep.h>
 #include <asm/firmware.h>
 #include <asm/xics.h>
-#include <asm/rtas.h>
 #include <asm/opal.h>
 #include <asm/kexec.h>
 #include <asm/smp.h>
@@ -278,20 +277,6 @@ static void __init pnv_setup_machdep_opal(void)
 	ppc_md.handle_hmi_exception = opal_handle_hmi_exception;
 }
 
-#ifdef CONFIG_PPC_POWERNV_RTAS
-static void __init pnv_setup_machdep_rtas(void)
-{
-	if (rtas_token("get-time-of-day") != RTAS_UNKNOWN_SERVICE) {
-		ppc_md.get_boot_time = rtas_get_boot_time;
-		ppc_md.get_rtc_time = rtas_get_rtc_time;
-		ppc_md.set_rtc_time = rtas_set_rtc_time;
-	}
-	ppc_md.restart = rtas_restart;
-	pm_power_off = rtas_power_off;
-	ppc_md.halt = rtas_halt;
-}
-#endif /* CONFIG_PPC_POWERNV_RTAS */
-
 static u32 supported_cpuidle_states;
 
 int pnv_save_sprs_for_winkle(void)
@@ -409,37 +394,39 @@ static int __init pnv_init_idle_states(void)
 {
 	struct device_node *power_mgt;
 	int dt_idle_states;
-	const __be32 *idle_state_flags;
-	u32 len_flags, flags;
+	u32 *flags;
 	int i;
 
 	supported_cpuidle_states = 0;
 
 	if (cpuidle_disable != IDLE_NO_OVERRIDE)
-		return 0;
+		goto out;
 
 	if (!firmware_has_feature(FW_FEATURE_OPALv3))
-		return 0;
+		goto out;
 
 	power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
 	if (!power_mgt) {
 		pr_warn("opal: PowerMgmt Node not found\n");
-		return 0;
+		goto out;
+	}
+	dt_idle_states = of_property_count_u32_elems(power_mgt,
+			"ibm,cpu-idle-state-flags");
+	if (dt_idle_states < 0) {
+		pr_warn("cpuidle-powernv: no idle states found in the DT\n");
+		goto out;
 	}
 
-	idle_state_flags = of_get_property(power_mgt,
-			"ibm,cpu-idle-state-flags", &len_flags);
-	if (!idle_state_flags) {
-		pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-flags\n");
-		return 0;
+	flags = kzalloc(sizeof(*flags) * dt_idle_states, GFP_KERNEL);
+	if (of_property_read_u32_array(power_mgt,
+			"ibm,cpu-idle-state-flags", flags, dt_idle_states)) {
+		pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n");
+		goto out_free;
 	}
 
-	dt_idle_states = len_flags / sizeof(u32);
+	for (i = 0; i < dt_idle_states; i++)
+		supported_cpuidle_states |= flags[i];
 
-	for (i = 0; i < dt_idle_states; i++) {
-		flags = be32_to_cpu(idle_state_flags[i]);
-		supported_cpuidle_states |= flags;
-	}
 	if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
 		patch_instruction(
 			(unsigned int *)pnv_fastsleep_workaround_at_entry,
@@ -449,6 +436,9 @@ static int __init pnv_init_idle_states(void)
 			PPC_INST_NOP);
 	}
 	pnv_alloc_idle_core_states();
+out_free:
+	kfree(flags);
+out:
 	return 0;
 }
 
@@ -465,10 +455,6 @@ static int __init pnv_probe(void)
 
 	if (firmware_has_feature(FW_FEATURE_OPAL))
 		pnv_setup_machdep_opal();
-#ifdef CONFIG_PPC_POWERNV_RTAS
-	else if (rtas.base)
-		pnv_setup_machdep_rtas();
-#endif /* CONFIG_PPC_POWERNV_RTAS */
 
 	pr_debug("PowerNV detected !\n");
 
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index fc34025ef822..8f70ba681a78 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -25,7 +25,6 @@
 #include <asm/machdep.h>
 #include <asm/cputable.h>
 #include <asm/firmware.h>
-#include <asm/rtas.h>
 #include <asm/vdso_datapage.h>
 #include <asm/cputhreads.h>
 #include <asm/xics.h>
@@ -33,6 +32,8 @@
 #include <asm/runlatch.h>
 #include <asm/code-patching.h>
 #include <asm/dbell.h>
+#include <asm/kvm_ppc.h>
+#include <asm/ppc-opcode.h>
 
 #include "powernv.h"
 
@@ -149,7 +150,7 @@ static int pnv_smp_cpu_disable(void)
 static void pnv_smp_cpu_kill_self(void)
 {
 	unsigned int cpu;
-	unsigned long srr1;
+	unsigned long srr1, wmask;
 	u32 idle_states;
 
 	/* Standard hot unplug procedure */
@@ -161,6 +162,10 @@ static void pnv_smp_cpu_kill_self(void)
 	generic_set_cpu_dead(cpu);
 	smp_wmb();
 
+	wmask = SRR1_WAKEMASK;
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		wmask = SRR1_WAKEMASK_P8;
+
 	idle_states = pnv_get_supported_cpuidle_states();
 	/* We don't want to take decrementer interrupts while we are offline,
 	 * so clear LPCR:PECE1. We keep PECE2 enabled.
@@ -191,10 +196,14 @@ static void pnv_smp_cpu_kill_self(void)
 		 * having finished executing in a KVM guest, then srr1
 		 * contains 0.
 		 */
-		if ((srr1 & SRR1_WAKEMASK) == SRR1_WAKEEE) {
+		if ((srr1 & wmask) == SRR1_WAKEEE) {
 			icp_native_flush_interrupt();
 			local_paca->irq_happened &= PACA_IRQ_HARD_DIS;
 			smp_mb();
+		} else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
+			unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
+			asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
+			kvmppc_set_host_ipi(cpu, 0);
 		}
 
 		if (cpu_core_split_required())
@@ -241,18 +250,6 @@ void __init pnv_smp_init(void)
 {
 	smp_ops = &pnv_smp_ops;
 
-	/* XXX We don't yet have a proper entry point from HAL, for
-	 * now we rely on kexec-style entry from BML
-	 */
-
-#ifdef CONFIG_PPC_RTAS
-	/* Non-lpar has additional take/give timebase */
-	if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
-		smp_ops->give_timebase = rtas_give_timebase;
-		smp_ops->take_timebase = rtas_take_timebase;
-	}
-#endif /* CONFIG_PPC_RTAS */
-
 #ifdef CONFIG_HOTPLUG_CPU
 	ppc_md.cpu_die	= pnv_smp_cpu_kill_self;
 #endif
diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c
index b358bec6c8cb..3c7707af3384 100644
--- a/arch/powerpc/platforms/ps3/smp.c
+++ b/arch/powerpc/platforms/ps3/smp.c
@@ -57,7 +57,7 @@ static void ps3_smp_message_pass(int cpu, int msg)
 			" (%d)\n", __func__, __LINE__, cpu, msg, result);
 }
 
-static int __init ps3_smp_probe(void)
+static void __init ps3_smp_probe(void)
 {
 	int cpu;
 
@@ -100,8 +100,6 @@ static int __init ps3_smp_probe(void)
 
 		DBG(" <- %s:%d: (%d)\n", __func__, __LINE__, cpu);
 	}
-
-	return 2;
 }
 
 void ps3_smp_cleanup_cpu(int cpu)
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index a758a9c3bbba..54c87d5d349d 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -16,7 +16,6 @@ config PPC_PSERIES
 	select PPC_UDBG_16550
 	select PPC_NATIVE
 	select PPC_PCI_CHOICE if EXPERT
-	select ZLIB_DEFLATE
 	select PPC_DOORBELL
 	select HAVE_CONTEXT_TRACKING
 	select HOTPLUG_CPU if SMP
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index c22bb1b4beb8..b4b11096ea8b 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -10,6 +10,8 @@
  * 2 as published by the Free Software Foundation.
  */
 
+#define pr_fmt(fmt)	"dlpar: " fmt
+
 #include <linux/kernel.h>
 #include <linux/notifier.h>
 #include <linux/spinlock.h>
@@ -535,13 +537,125 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count)
 	return count;
 }
 
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
+
+static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
+{
+	int rc;
+
+	/* pseries error logs are in BE format, convert to cpu type */
+	switch (hp_elog->id_type) {
+	case PSERIES_HP_ELOG_ID_DRC_COUNT:
+		hp_elog->_drc_u.drc_count =
+					be32_to_cpu(hp_elog->_drc_u.drc_count);
+		break;
+	case PSERIES_HP_ELOG_ID_DRC_INDEX:
+		hp_elog->_drc_u.drc_index =
+					be32_to_cpu(hp_elog->_drc_u.drc_index);
+	}
+
+	switch (hp_elog->resource) {
+	case PSERIES_HP_ELOG_RESOURCE_MEM:
+		rc = dlpar_memory(hp_elog);
+		break;
+	default:
+		pr_warn_ratelimited("Invalid resource (%d) specified\n",
+				    hp_elog->resource);
+		rc = -EINVAL;
+	}
+
+	return rc;
+}
+
+static ssize_t dlpar_store(struct class *class, struct class_attribute *attr,
+			   const char *buf, size_t count)
+{
+	struct pseries_hp_errorlog *hp_elog;
+	const char *arg;
+	int rc;
+
+	hp_elog = kzalloc(sizeof(*hp_elog), GFP_KERNEL);
+	if (!hp_elog) {
+		rc = -ENOMEM;
+		goto dlpar_store_out;
+	}
+
+	/* Parse out the request from the user, this will be in the form
+	 * <resource> <action> <id_type> <id>
+	 */
+	arg = buf;
+	if (!strncmp(arg, "memory", 6)) {
+		hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_MEM;
+		arg += strlen("memory ");
+	} else {
+		pr_err("Invalid resource specified: \"%s\"\n", buf);
+		rc = -EINVAL;
+		goto dlpar_store_out;
+	}
+
+	if (!strncmp(arg, "add", 3)) {
+		hp_elog->action = PSERIES_HP_ELOG_ACTION_ADD;
+		arg += strlen("add ");
+	} else if (!strncmp(arg, "remove", 6)) {
+		hp_elog->action = PSERIES_HP_ELOG_ACTION_REMOVE;
+		arg += strlen("remove ");
+	} else {
+		pr_err("Invalid action specified: \"%s\"\n", buf);
+		rc = -EINVAL;
+		goto dlpar_store_out;
+	}
+
+	if (!strncmp(arg, "index", 5)) {
+		u32 index;
+
+		hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
+		arg += strlen("index ");
+		if (kstrtou32(arg, 0, &index)) {
+			rc = -EINVAL;
+			pr_err("Invalid drc_index specified: \"%s\"\n", buf);
+			goto dlpar_store_out;
+		}
+
+		hp_elog->_drc_u.drc_index = cpu_to_be32(index);
+	} else if (!strncmp(arg, "count", 5)) {
+		u32 count;
+
+		hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_COUNT;
+		arg += strlen("count ");
+		if (kstrtou32(arg, 0, &count)) {
+			rc = -EINVAL;
+			pr_err("Invalid count specified: \"%s\"\n", buf);
+			goto dlpar_store_out;
+		}
+
+		hp_elog->_drc_u.drc_count = cpu_to_be32(count);
+	} else {
+		pr_err("Invalid id_type specified: \"%s\"\n", buf);
+		rc = -EINVAL;
+		goto dlpar_store_out;
+	}
+
+	rc = handle_dlpar_errorlog(hp_elog);
+
+dlpar_store_out:
+	kfree(hp_elog);
+	return rc ? rc : count;
+}
+
+static CLASS_ATTR(dlpar, S_IWUSR, NULL, dlpar_store);
+
 static int __init pseries_dlpar_init(void)
 {
+	int rc;
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
 	ppc_md.cpu_probe = dlpar_cpu_probe;
 	ppc_md.cpu_release = dlpar_cpu_release;
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
 
-	return 0;
+	rc = sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr);
+
+	return rc;
 }
 machine_device_initcall(pseries, pseries_dlpar_init);
 
-#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index a6c7e19f5eb3..2039397cc75d 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -118,9 +118,8 @@ static int pseries_eeh_init(void)
 	return 0;
 }
 
-static int pseries_eeh_cap_start(struct device_node *dn)
+static int pseries_eeh_cap_start(struct pci_dn *pdn)
 {
-	struct pci_dn *pdn = PCI_DN(dn);
 	u32 status;
 
 	if (!pdn)
@@ -134,10 +133,9 @@ static int pseries_eeh_cap_start(struct device_node *dn)
 }
 
 
-static int pseries_eeh_find_cap(struct device_node *dn, int cap)
+static int pseries_eeh_find_cap(struct pci_dn *pdn, int cap)
 {
-	struct pci_dn *pdn = PCI_DN(dn);
-	int pos = pseries_eeh_cap_start(dn);
+	int pos = pseries_eeh_cap_start(pdn);
 	int cnt = 48;	/* Maximal number of capabilities */
 	u32 id;
 
@@ -160,10 +158,9 @@ static int pseries_eeh_find_cap(struct device_node *dn, int cap)
 	return 0;
 }
 
-static int pseries_eeh_find_ecap(struct device_node *dn, int cap)
+static int pseries_eeh_find_ecap(struct pci_dn *pdn, int cap)
 {
-	struct pci_dn *pdn = PCI_DN(dn);
-	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
 	u32 header;
 	int pos = 256;
 	int ttl = (4096 - 256) / 8;
@@ -191,53 +188,44 @@ static int pseries_eeh_find_ecap(struct device_node *dn, int cap)
 }
 
 /**
- * pseries_eeh_of_probe - EEH probe on the given device
- * @dn: OF node
- * @flag: Unused
+ * pseries_eeh_probe - EEH probe on the given device
+ * @pdn: PCI device node
+ * @data: Unused
  *
  * When EEH module is installed during system boot, all PCI devices
  * are checked one by one to see if it supports EEH. The function
  * is introduced for the purpose.
  */
-static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
+static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
 {
 	struct eeh_dev *edev;
 	struct eeh_pe pe;
-	struct pci_dn *pdn = PCI_DN(dn);
-	const __be32 *classp, *vendorp, *devicep;
-	u32 class_code;
-	const __be32 *regs;
 	u32 pcie_flags;
 	int enable = 0;
 	int ret;
 
 	/* Retrieve OF node and eeh device */
-	edev = of_node_to_eeh_dev(dn);
-	if (edev->pe || !of_device_is_available(dn))
+	edev = pdn_to_eeh_dev(pdn);
+	if (!edev || edev->pe)
 		return NULL;
 
-	/* Retrieve class/vendor/device IDs */
-	classp = of_get_property(dn, "class-code", NULL);
-	vendorp = of_get_property(dn, "vendor-id", NULL);
-	devicep = of_get_property(dn, "device-id", NULL);
-
-	/* Skip for bad OF node or PCI-ISA bridge */
-	if (!classp || !vendorp || !devicep)
-		return NULL;
-	if (dn->type && !strcmp(dn->type, "isa"))
+	/* Check class/vendor/device IDs */
+	if (!pdn->vendor_id || !pdn->device_id || !pdn->class_code)
 		return NULL;
 
-	class_code = of_read_number(classp, 1);
+	/* Skip for PCI-ISA bridge */
+        if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
+		return NULL;
 
 	/*
 	 * Update class code and mode of eeh device. We need
 	 * correctly reflects that current device is root port
 	 * or PCIe switch downstream port.
 	 */
-	edev->class_code = class_code;
-	edev->pcix_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_PCIX);
-	edev->pcie_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_EXP);
-	edev->aer_cap = pseries_eeh_find_ecap(dn, PCI_EXT_CAP_ID_ERR);
+	edev->class_code = pdn->class_code;
+	edev->pcix_cap = pseries_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
+	edev->pcie_cap = pseries_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
+	edev->aer_cap = pseries_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
 	edev->mode &= 0xFFFFFF00;
 	if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
 		edev->mode |= EEH_DEV_BRIDGE;
@@ -252,24 +240,16 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
 		}
 	}
 
-	/* Retrieve the device address */
-	regs = of_get_property(dn, "reg", NULL);
-	if (!regs) {
-		pr_warn("%s: OF node property %s::reg not found\n",
-			__func__, dn->full_name);
-		return NULL;
-	}
-
 	/* Initialize the fake PE */
 	memset(&pe, 0, sizeof(struct eeh_pe));
 	pe.phb = edev->phb;
-	pe.config_addr = of_read_number(regs, 1);
+	pe.config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
 
 	/* Enable EEH on the device */
 	ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
 	if (!ret) {
-		edev->config_addr = of_read_number(regs, 1);
 		/* Retrieve PE address */
+		edev->config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
 		edev->pe_config_addr = eeh_ops->get_pe_addr(&pe);
 		pe.addr = edev->pe_config_addr;
 
@@ -285,16 +265,17 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
 			eeh_add_flag(EEH_ENABLED);
 			eeh_add_to_parent_pe(edev);
 
-			pr_debug("%s: EEH enabled on %s PHB#%d-PE#%x, config addr#%x\n",
-				__func__, dn->full_name, pe.phb->global_number,
-				pe.addr, pe.config_addr);
-		} else if (dn->parent && of_node_to_eeh_dev(dn->parent) &&
-			   (of_node_to_eeh_dev(dn->parent))->pe) {
+			pr_debug("%s: EEH enabled on %02x:%02x.%01x PHB#%d-PE#%x\n",
+				__func__, pdn->busno, PCI_SLOT(pdn->devfn),
+				PCI_FUNC(pdn->devfn), pe.phb->global_number,
+				pe.addr);
+		} else if (pdn->parent && pdn_to_eeh_dev(pdn->parent) &&
+			   (pdn_to_eeh_dev(pdn->parent))->pe) {
 			/* This device doesn't support EEH, but it may have an
 			 * EEH parent, in which case we mark it as supported.
 			 */
-			edev->config_addr = of_node_to_eeh_dev(dn->parent)->config_addr;
-			edev->pe_config_addr = of_node_to_eeh_dev(dn->parent)->pe_config_addr;
+			edev->config_addr = pdn_to_eeh_dev(pdn->parent)->config_addr;
+			edev->pe_config_addr = pdn_to_eeh_dev(pdn->parent)->pe_config_addr;
 			eeh_add_to_parent_pe(edev);
 		}
 	}
@@ -670,45 +651,36 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
 
 /**
  * pseries_eeh_read_config - Read PCI config space
- * @dn: device node
+ * @pdn: PCI device node
  * @where: PCI address
  * @size: size to read
  * @val: return value
  *
  * Read config space from the speicifed device
  */
-static int pseries_eeh_read_config(struct device_node *dn, int where, int size, u32 *val)
+static int pseries_eeh_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
 {
-	struct pci_dn *pdn;
-
-	pdn = PCI_DN(dn);
-
 	return rtas_read_config(pdn, where, size, val);
 }
 
 /**
  * pseries_eeh_write_config - Write PCI config space
- * @dn: device node
+ * @pdn: PCI device node
  * @where: PCI address
  * @size: size to write
  * @val: value to be written
  *
  * Write config space to the specified device
  */
-static int pseries_eeh_write_config(struct device_node *dn, int where, int size, u32 val)
+static int pseries_eeh_write_config(struct pci_dn *pdn, int where, int size, u32 val)
 {
-	struct pci_dn *pdn;
-
-	pdn = PCI_DN(dn);
-
 	return rtas_write_config(pdn, where, size, val);
 }
 
 static struct eeh_ops pseries_eeh_ops = {
 	.name			= "pseries",
 	.init			= pseries_eeh_init,
-	.of_probe		= pseries_eeh_of_probe,
-	.dev_probe		= NULL,
+	.probe			= pseries_eeh_probe,
 	.set_option		= pseries_eeh_set_option,
 	.get_pe_addr		= pseries_eeh_get_pe_addr,
 	.get_state		= pseries_eeh_get_state,
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index fa41f0da5b6f..0ced387e1463 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -9,11 +9,14 @@
  *      2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt)	"pseries-hotplug-mem: " fmt
+
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/memblock.h>
 #include <linux/memory.h>
 #include <linux/memory_hotplug.h>
+#include <linux/slab.h>
 
 #include <asm/firmware.h>
 #include <asm/machdep.h>
@@ -21,6 +24,8 @@
 #include <asm/sparsemem.h>
 #include "pseries.h"
 
+static bool rtas_hp_event;
+
 unsigned long pseries_memory_block_size(void)
 {
 	struct device_node *np;
@@ -64,6 +69,67 @@ unsigned long pseries_memory_block_size(void)
 	return memblock_size;
 }
 
+static void dlpar_free_drconf_property(struct property *prop)
+{
+	kfree(prop->name);
+	kfree(prop->value);
+	kfree(prop);
+}
+
+static struct property *dlpar_clone_drconf_property(struct device_node *dn)
+{
+	struct property *prop, *new_prop;
+	struct of_drconf_cell *lmbs;
+	u32 num_lmbs, *p;
+	int i;
+
+	prop = of_find_property(dn, "ibm,dynamic-memory", NULL);
+	if (!prop)
+		return NULL;
+
+	new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
+	if (!new_prop)
+		return NULL;
+
+	new_prop->name = kstrdup(prop->name, GFP_KERNEL);
+	new_prop->value = kmalloc(prop->length, GFP_KERNEL);
+	if (!new_prop->name || !new_prop->value) {
+		dlpar_free_drconf_property(new_prop);
+		return NULL;
+	}
+
+	memcpy(new_prop->value, prop->value, prop->length);
+	new_prop->length = prop->length;
+
+	/* Convert the property to cpu endian-ness */
+	p = new_prop->value;
+	*p = be32_to_cpu(*p);
+
+	num_lmbs = *p++;
+	lmbs = (struct of_drconf_cell *)p;
+
+	for (i = 0; i < num_lmbs; i++) {
+		lmbs[i].base_addr = be64_to_cpu(lmbs[i].base_addr);
+		lmbs[i].drc_index = be32_to_cpu(lmbs[i].drc_index);
+		lmbs[i].flags = be32_to_cpu(lmbs[i].flags);
+	}
+
+	return new_prop;
+}
+
+static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
+{
+	unsigned long section_nr;
+	struct mem_section *mem_sect;
+	struct memory_block *mem_block;
+
+	section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
+	mem_sect = __nr_to_section(section_nr);
+
+	mem_block = find_memory_block(mem_sect);
+	return mem_block;
+}
+
 #ifdef CONFIG_MEMORY_HOTREMOVE
 static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
 {
@@ -122,6 +188,173 @@ static int pseries_remove_mem_node(struct device_node *np)
 	pseries_remove_memblock(base, lmb_size);
 	return 0;
 }
+
+static bool lmb_is_removable(struct of_drconf_cell *lmb)
+{
+	int i, scns_per_block;
+	int rc = 1;
+	unsigned long pfn, block_sz;
+	u64 phys_addr;
+
+	if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
+		return false;
+
+	block_sz = memory_block_size_bytes();
+	scns_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
+	phys_addr = lmb->base_addr;
+
+	for (i = 0; i < scns_per_block; i++) {
+		pfn = PFN_DOWN(phys_addr);
+		if (!pfn_present(pfn))
+			continue;
+
+		rc &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
+		phys_addr += MIN_MEMORY_BLOCK_SIZE;
+	}
+
+	return rc ? true : false;
+}
+
+static int dlpar_add_lmb(struct of_drconf_cell *);
+
+static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
+{
+	struct memory_block *mem_block;
+	unsigned long block_sz;
+	int nid, rc;
+
+	if (!lmb_is_removable(lmb))
+		return -EINVAL;
+
+	mem_block = lmb_to_memblock(lmb);
+	if (!mem_block)
+		return -EINVAL;
+
+	rc = device_offline(&mem_block->dev);
+	put_device(&mem_block->dev);
+	if (rc)
+		return rc;
+
+	block_sz = pseries_memory_block_size();
+	nid = memory_add_physaddr_to_nid(lmb->base_addr);
+
+	remove_memory(nid, lmb->base_addr, block_sz);
+
+	/* Update memory regions for memory remove */
+	memblock_remove(lmb->base_addr, block_sz);
+
+	dlpar_release_drc(lmb->drc_index);
+
+	lmb->flags &= ~DRCONF_MEM_ASSIGNED;
+	return 0;
+}
+
+static int dlpar_memory_remove_by_count(u32 lmbs_to_remove,
+					struct property *prop)
+{
+	struct of_drconf_cell *lmbs;
+	int lmbs_removed = 0;
+	int lmbs_available = 0;
+	u32 num_lmbs, *p;
+	int i, rc;
+
+	pr_info("Attempting to hot-remove %d LMB(s)\n", lmbs_to_remove);
+
+	if (lmbs_to_remove == 0)
+		return -EINVAL;
+
+	p = prop->value;
+	num_lmbs = *p++;
+	lmbs = (struct of_drconf_cell *)p;
+
+	/* Validate that there are enough LMBs to satisfy the request */
+	for (i = 0; i < num_lmbs; i++) {
+		if (lmbs[i].flags & DRCONF_MEM_ASSIGNED)
+			lmbs_available++;
+	}
+
+	if (lmbs_available < lmbs_to_remove)
+		return -EINVAL;
+
+	for (i = 0; i < num_lmbs && lmbs_removed < lmbs_to_remove; i++) {
+		rc = dlpar_remove_lmb(&lmbs[i]);
+		if (rc)
+			continue;
+
+		lmbs_removed++;
+
+		/* Mark this lmb so we can add it later if all of the
+		 * requested LMBs cannot be removed.
+		 */
+		lmbs[i].reserved = 1;
+	}
+
+	if (lmbs_removed != lmbs_to_remove) {
+		pr_err("Memory hot-remove failed, adding LMB's back\n");
+
+		for (i = 0; i < num_lmbs; i++) {
+			if (!lmbs[i].reserved)
+				continue;
+
+			rc = dlpar_add_lmb(&lmbs[i]);
+			if (rc)
+				pr_err("Failed to add LMB back, drc index %x\n",
+				       lmbs[i].drc_index);
+
+			lmbs[i].reserved = 0;
+		}
+
+		rc = -EINVAL;
+	} else {
+		for (i = 0; i < num_lmbs; i++) {
+			if (!lmbs[i].reserved)
+				continue;
+
+			pr_info("Memory at %llx was hot-removed\n",
+				lmbs[i].base_addr);
+
+			lmbs[i].reserved = 0;
+		}
+		rc = 0;
+	}
+
+	return rc;
+}
+
+static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop)
+{
+	struct of_drconf_cell *lmbs;
+	u32 num_lmbs, *p;
+	int lmb_found;
+	int i, rc;
+
+	pr_info("Attempting to hot-remove LMB, drc index %x\n", drc_index);
+
+	p = prop->value;
+	num_lmbs = *p++;
+	lmbs = (struct of_drconf_cell *)p;
+
+	lmb_found = 0;
+	for (i = 0; i < num_lmbs; i++) {
+		if (lmbs[i].drc_index == drc_index) {
+			lmb_found = 1;
+			rc = dlpar_remove_lmb(&lmbs[i]);
+			break;
+		}
+	}
+
+	if (!lmb_found)
+		rc = -EINVAL;
+
+	if (rc)
+		pr_info("Failed to hot-remove memory at %llx\n",
+			lmbs[i].base_addr);
+	else
+		pr_info("Memory at %llx was hot-removed\n", lmbs[i].base_addr);
+
+	return rc;
+}
+
 #else
 static inline int pseries_remove_memblock(unsigned long base,
 					  unsigned int memblock_size)
@@ -132,8 +365,261 @@ static inline int pseries_remove_mem_node(struct device_node *np)
 {
 	return 0;
 }
+static inline int dlpar_memory_remove(struct pseries_hp_errorlog *hp_elog)
+{
+	return -EOPNOTSUPP;
+}
+static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
+{
+	return -EOPNOTSUPP;
+}
+static int dlpar_memory_remove_by_count(u32 lmbs_to_remove,
+					struct property *prop)
+{
+	return -EOPNOTSUPP;
+}
+static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
+static int dlpar_add_lmb(struct of_drconf_cell *lmb)
+{
+	struct memory_block *mem_block;
+	unsigned long block_sz;
+	int nid, rc;
+
+	if (lmb->flags & DRCONF_MEM_ASSIGNED)
+		return -EINVAL;
+
+	block_sz = memory_block_size_bytes();
+
+	rc = dlpar_acquire_drc(lmb->drc_index);
+	if (rc)
+		return rc;
+
+	/* Find the node id for this address */
+	nid = memory_add_physaddr_to_nid(lmb->base_addr);
+
+	/* Add the memory */
+	rc = add_memory(nid, lmb->base_addr, block_sz);
+	if (rc) {
+		dlpar_release_drc(lmb->drc_index);
+		return rc;
+	}
+
+	/* Register this block of memory */
+	rc = memblock_add(lmb->base_addr, block_sz);
+	if (rc) {
+		remove_memory(nid, lmb->base_addr, block_sz);
+		dlpar_release_drc(lmb->drc_index);
+		return rc;
+	}
+
+	mem_block = lmb_to_memblock(lmb);
+	if (!mem_block) {
+		remove_memory(nid, lmb->base_addr, block_sz);
+		dlpar_release_drc(lmb->drc_index);
+		return -EINVAL;
+	}
+
+	rc = device_online(&mem_block->dev);
+	put_device(&mem_block->dev);
+	if (rc) {
+		remove_memory(nid, lmb->base_addr, block_sz);
+		dlpar_release_drc(lmb->drc_index);
+		return rc;
+	}
+
+	lmb->flags |= DRCONF_MEM_ASSIGNED;
+	return 0;
+}
+
+static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop)
+{
+	struct of_drconf_cell *lmbs;
+	u32 num_lmbs, *p;
+	int lmbs_available = 0;
+	int lmbs_added = 0;
+	int i, rc;
+
+	pr_info("Attempting to hot-add %d LMB(s)\n", lmbs_to_add);
+
+	if (lmbs_to_add == 0)
+		return -EINVAL;
+
+	p = prop->value;
+	num_lmbs = *p++;
+	lmbs = (struct of_drconf_cell *)p;
+
+	/* Validate that there are enough LMBs to satisfy the request */
+	for (i = 0; i < num_lmbs; i++) {
+		if (!(lmbs[i].flags & DRCONF_MEM_ASSIGNED))
+			lmbs_available++;
+	}
+
+	if (lmbs_available < lmbs_to_add)
+		return -EINVAL;
+
+	for (i = 0; i < num_lmbs && lmbs_to_add != lmbs_added; i++) {
+		rc = dlpar_add_lmb(&lmbs[i]);
+		if (rc)
+			continue;
+
+		lmbs_added++;
+
+		/* Mark this lmb so we can remove it later if all of the
+		 * requested LMBs cannot be added.
+		 */
+		lmbs[i].reserved = 1;
+	}
+
+	if (lmbs_added != lmbs_to_add) {
+		pr_err("Memory hot-add failed, removing any added LMBs\n");
+
+		for (i = 0; i < num_lmbs; i++) {
+			if (!lmbs[i].reserved)
+				continue;
+
+			rc = dlpar_remove_lmb(&lmbs[i]);
+			if (rc)
+				pr_err("Failed to remove LMB, drc index %x\n",
+				       be32_to_cpu(lmbs[i].drc_index));
+		}
+		rc = -EINVAL;
+	} else {
+		for (i = 0; i < num_lmbs; i++) {
+			if (!lmbs[i].reserved)
+				continue;
+
+			pr_info("Memory at %llx (drc index %x) was hot-added\n",
+				lmbs[i].base_addr, lmbs[i].drc_index);
+			lmbs[i].reserved = 0;
+		}
+	}
+
+	return rc;
+}
+
+static int dlpar_memory_add_by_index(u32 drc_index, struct property *prop)
+{
+	struct of_drconf_cell *lmbs;
+	u32 num_lmbs, *p;
+	int i, lmb_found;
+	int rc;
+
+	pr_info("Attempting to hot-add LMB, drc index %x\n", drc_index);
+
+	p = prop->value;
+	num_lmbs = *p++;
+	lmbs = (struct of_drconf_cell *)p;
+
+	lmb_found = 0;
+	for (i = 0; i < num_lmbs; i++) {
+		if (lmbs[i].drc_index == drc_index) {
+			lmb_found = 1;
+			rc = dlpar_add_lmb(&lmbs[i]);
+			break;
+		}
+	}
+
+	if (!lmb_found)
+		rc = -EINVAL;
+
+	if (rc)
+		pr_info("Failed to hot-add memory, drc index %x\n", drc_index);
+	else
+		pr_info("Memory at %llx (drc index %x) was hot-added\n",
+			lmbs[i].base_addr, drc_index);
+
+	return rc;
+}
+
+static void dlpar_update_drconf_property(struct device_node *dn,
+					 struct property *prop)
+{
+	struct of_drconf_cell *lmbs;
+	u32 num_lmbs, *p;
+	int i;
+
+	/* Convert the property back to BE */
+	p = prop->value;
+	num_lmbs = *p;
+	*p = cpu_to_be32(*p);
+	p++;
+
+	lmbs = (struct of_drconf_cell *)p;
+	for (i = 0; i < num_lmbs; i++) {
+		lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr);
+		lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index);
+		lmbs[i].flags = cpu_to_be32(lmbs[i].flags);
+	}
+
+	rtas_hp_event = true;
+	of_update_property(dn, prop);
+	rtas_hp_event = false;
+}
+
+int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
+{
+	struct device_node *dn;
+	struct property *prop;
+	u32 count, drc_index;
+	int rc;
+
+	count = hp_elog->_drc_u.drc_count;
+	drc_index = hp_elog->_drc_u.drc_index;
+
+	lock_device_hotplug();
+
+	dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+	if (!dn) {
+		rc = -EINVAL;
+		goto dlpar_memory_out;
+	}
+
+	prop = dlpar_clone_drconf_property(dn);
+	if (!prop) {
+		rc = -EINVAL;
+		goto dlpar_memory_out;
+	}
+
+	switch (hp_elog->action) {
+	case PSERIES_HP_ELOG_ACTION_ADD:
+		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
+			rc = dlpar_memory_add_by_count(count, prop);
+		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
+			rc = dlpar_memory_add_by_index(drc_index, prop);
+		else
+			rc = -EINVAL;
+		break;
+	case PSERIES_HP_ELOG_ACTION_REMOVE:
+		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
+			rc = dlpar_memory_remove_by_count(count, prop);
+		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
+			rc = dlpar_memory_remove_by_index(drc_index, prop);
+		else
+			rc = -EINVAL;
+		break;
+	default:
+		pr_err("Invalid action (%d) specified\n", hp_elog->action);
+		rc = -EINVAL;
+		break;
+	}
+
+	if (rc)
+		dlpar_free_drconf_property(prop);
+	else
+		dlpar_update_drconf_property(dn, prop);
+
+dlpar_memory_out:
+	of_node_put(dn);
+	unlock_device_hotplug();
+	return rc;
+}
+
 static int pseries_add_mem_node(struct device_node *np)
 {
 	const char *type;
@@ -174,6 +660,9 @@ static int pseries_update_drconf_memory(struct of_reconfig_data *pr)
 	__be32 *p;
 	int i, rc = -EINVAL;
 
+	if (rtas_hp_event)
+		return 0;
+
 	memblock_size = pseries_memory_block_size();
 	if (!memblock_size)
 		return -EINVAL;
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index ccd53f91e8aa..74b5b8e239c8 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -7,12 +7,12 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
+#include <linux/jump_label.h>
 #include <asm/hvcall.h>
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
 #include <asm/ptrace.h>
-#include <asm/jump_label.h>
 
 	.section	".text"
 	
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 7803a19adb31..61d5a17f45c0 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -49,6 +49,7 @@
 #include <asm/mmzone.h>
 #include <asm/plpar_wrappers.h>
 
+#include "pseries.h"
 
 static void tce_invalidate_pSeries_sw(struct iommu_table *tbl,
 				      __be64 *startp, __be64 *endp)
@@ -1307,16 +1308,16 @@ void iommu_init_early_pSeries(void)
 			ppc_md.tce_free	 = tce_free_pSeriesLP;
 		}
 		ppc_md.tce_get   = tce_get_pSeriesLP;
-		ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
-		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
+		pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
+		pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
 		ppc_md.dma_set_mask = dma_set_mask_pSeriesLP;
 		ppc_md.dma_get_required_mask = dma_get_required_mask_pSeriesLP;
 	} else {
 		ppc_md.tce_build = tce_build_pSeries;
 		ppc_md.tce_free  = tce_free_pSeries;
 		ppc_md.tce_get   = tce_get_pseries;
-		ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_pSeries;
-		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_pSeries;
+		pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries;
+		pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries;
 	}
 
 
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index b5682fd6c984..b7a67e3d2201 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -26,7 +26,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/console.h>
 #include <linux/export.h>
-#include <linux/static_key.h>
+#include <linux/jump_label.h>
 #include <asm/processor.h>
 #include <asm/mmu.h>
 #include <asm/page.h>
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 90cf3dcbd9f2..ceb18d349459 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -25,10 +25,10 @@
 static struct kobject *mobility_kobj;
 
 struct update_props_workarea {
-	u32 phandle;
-	u32 state;
-	u64 reserved;
-	u32 nprops;
+	__be32 phandle;
+	__be32 state;
+	__be64 reserved;
+	__be32 nprops;
 } __packed;
 
 #define NODE_ACTION_MASK	0xff000000
@@ -54,11 +54,11 @@ static int mobility_rtas_call(int token, char *buf, s32 scope)
 	return rc;
 }
 
-static int delete_dt_node(u32 phandle)
+static int delete_dt_node(__be32 phandle)
 {
 	struct device_node *dn;
 
-	dn = of_find_node_by_phandle(phandle);
+	dn = of_find_node_by_phandle(be32_to_cpu(phandle));
 	if (!dn)
 		return -ENOENT;
 
@@ -127,7 +127,7 @@ static int update_dt_property(struct device_node *dn, struct property **prop,
 	return 0;
 }
 
-static int update_dt_node(u32 phandle, s32 scope)
+static int update_dt_node(__be32 phandle, s32 scope)
 {
 	struct update_props_workarea *upwa;
 	struct device_node *dn;
@@ -136,6 +136,7 @@ static int update_dt_node(u32 phandle, s32 scope)
 	char *prop_data;
 	char *rtas_buf;
 	int update_properties_token;
+	u32 nprops;
 	u32 vd;
 
 	update_properties_token = rtas_token("ibm,update-properties");
@@ -146,7 +147,7 @@ static int update_dt_node(u32 phandle, s32 scope)
 	if (!rtas_buf)
 		return -ENOMEM;
 
-	dn = of_find_node_by_phandle(phandle);
+	dn = of_find_node_by_phandle(be32_to_cpu(phandle));
 	if (!dn) {
 		kfree(rtas_buf);
 		return -ENOENT;
@@ -162,6 +163,7 @@ static int update_dt_node(u32 phandle, s32 scope)
 			break;
 
 		prop_data = rtas_buf + sizeof(*upwa);
+		nprops = be32_to_cpu(upwa->nprops);
 
 		/* On the first call to ibm,update-properties for a node the
 		 * the first property value descriptor contains an empty
@@ -170,17 +172,17 @@ static int update_dt_node(u32 phandle, s32 scope)
 		 */
 		if (*prop_data == 0) {
 			prop_data++;
-			vd = *(u32 *)prop_data;
+			vd = be32_to_cpu(*(__be32 *)prop_data);
 			prop_data += vd + sizeof(vd);
-			upwa->nprops--;
+			nprops--;
 		}
 
-		for (i = 0; i < upwa->nprops; i++) {
+		for (i = 0; i < nprops; i++) {
 			char *prop_name;
 
 			prop_name = prop_data;
 			prop_data += strlen(prop_name) + 1;
-			vd = *(u32 *)prop_data;
+			vd = be32_to_cpu(*(__be32 *)prop_data);
 			prop_data += sizeof(vd);
 
 			switch (vd) {
@@ -212,13 +214,13 @@ static int update_dt_node(u32 phandle, s32 scope)
 	return 0;
 }
 
-static int add_dt_node(u32 parent_phandle, u32 drc_index)
+static int add_dt_node(__be32 parent_phandle, __be32 drc_index)
 {
 	struct device_node *dn;
 	struct device_node *parent_dn;
 	int rc;
 
-	parent_dn = of_find_node_by_phandle(parent_phandle);
+	parent_dn = of_find_node_by_phandle(be32_to_cpu(parent_phandle));
 	if (!parent_dn)
 		return -ENOENT;
 
@@ -237,7 +239,7 @@ static int add_dt_node(u32 parent_phandle, u32 drc_index)
 int pseries_devicetree_update(s32 scope)
 {
 	char *rtas_buf;
-	u32 *data;
+	__be32 *data;
 	int update_nodes_token;
 	int rc;
 
@@ -254,17 +256,17 @@ int pseries_devicetree_update(s32 scope)
 		if (rc && rc != 1)
 			break;
 
-		data = (u32 *)rtas_buf + 4;
-		while (*data & NODE_ACTION_MASK) {
+		data = (__be32 *)rtas_buf + 4;
+		while (be32_to_cpu(*data) & NODE_ACTION_MASK) {
 			int i;
-			u32 action = *data & NODE_ACTION_MASK;
-			int node_count = *data & NODE_COUNT_MASK;
+			u32 action = be32_to_cpu(*data) & NODE_ACTION_MASK;
+			u32 node_count = be32_to_cpu(*data) & NODE_COUNT_MASK;
 
 			data++;
 
 			for (i = 0; i < node_count; i++) {
-				u32 phandle = *data++;
-				u32 drc_index;
+				__be32 phandle = *data++;
+				__be32 drc_index;
 
 				switch (action) {
 				case DELETE_DT_NODE:
@@ -318,28 +320,34 @@ static ssize_t migrate_store(struct class *class, struct class_attribute *attr,
 {
 	u64 streamid;
 	int rc;
-	int vasi_rc = 0;
 
 	rc = kstrtou64(buf, 0, &streamid);
 	if (rc)
 		return rc;
 
 	do {
-		rc = rtas_ibm_suspend_me(streamid, &vasi_rc);
-		if (!rc && vasi_rc == RTAS_NOT_SUSPENDABLE)
+		rc = rtas_ibm_suspend_me(streamid);
+		if (rc == -EAGAIN)
 			ssleep(1);
-	} while (!rc && vasi_rc == RTAS_NOT_SUSPENDABLE);
+	} while (rc == -EAGAIN);
 
 	if (rc)
 		return rc;
-	if (vasi_rc)
-		return vasi_rc;
 
 	post_mobility_fixup();
 	return count;
 }
 
+/*
+ * Used by drmgr to determine the kernel behavior of the migration interface.
+ *
+ * Version 1: Performs all PAPR requirements for migration including
+ *	firmware activation and device tree update.
+ */
+#define MIGRATION_API_VERSION	1
+
 static CLASS_ATTR(migration, S_IWUSR, NULL, migrate_store);
+static CLASS_ATTR_STRING(api_version, S_IRUGO, __stringify(MIGRATION_API_VERSION));
 
 static int __init mobility_sysfs_init(void)
 {
@@ -350,7 +358,13 @@ static int __init mobility_sysfs_init(void)
 		return -ENOMEM;
 
 	rc = sysfs_create_file(mobility_kobj, &class_attr_migration.attr);
+	if (rc)
+		pr_err("mobility: unable to create migration sysfs file (%d)\n", rc);
 
-	return rc;
+	rc = sysfs_create_file(mobility_kobj, &class_attr_api_version.attr.attr);
+	if (rc)
+		pr_err("mobility: unable to create api_version sysfs file (%d)\n", rc);
+
+	return 0;
 }
 machine_device_initcall(pseries, mobility_sysfs_init);
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 691a154c286d..c8d24f9a6948 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -195,6 +195,7 @@ static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
 static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
 {
 	struct device_node *dn;
+	struct pci_dn *pdn;
 	struct eeh_dev *edev;
 
 	/* Found our PE and assume 8 at that point. */
@@ -204,10 +205,11 @@ static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
 		return NULL;
 
 	/* Get the top level device in the PE */
-	edev = of_node_to_eeh_dev(dn);
+	edev = pdn_to_eeh_dev(PCI_DN(dn));
 	if (edev->pe)
 		edev = list_first_entry(&edev->pe->edevs, struct eeh_dev, list);
-	dn = eeh_dev_to_of_node(edev);
+	pdn = eeh_dev_to_pdn(edev);
+	dn = pdn ? pdn->node : NULL;
 	if (!dn)
 		return NULL;
 
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 054a0ed5c7ee..9f8184175c86 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -20,7 +20,6 @@
 #include <linux/kmsg_dump.h>
 #include <linux/pstore.h>
 #include <linux/ctype.h>
-#include <linux/zlib.h>
 #include <asm/uaccess.h>
 #include <asm/nvram.h>
 #include <asm/rtas.h>
@@ -30,129 +29,17 @@
 /* Max bytes to read/write in one go */
 #define NVRW_CNT 0x20
 
-/*
- * Set oops header version to distinguish between old and new format header.
- * lnx,oops-log partition max size is 4000, header version > 4000 will
- * help in identifying new header.
- */
-#define OOPS_HDR_VERSION 5000
-
 static unsigned int nvram_size;
 static int nvram_fetch, nvram_store;
 static char nvram_buf[NVRW_CNT];	/* assume this is in the first 4GB */
 static DEFINE_SPINLOCK(nvram_lock);
 
-struct err_log_info {
-	__be32 error_type;
-	__be32 seq_num;
-};
-
-struct nvram_os_partition {
-	const char *name;
-	int req_size;	/* desired size, in bytes */
-	int min_size;	/* minimum acceptable size (0 means req_size) */
-	long size;	/* size of data portion (excluding err_log_info) */
-	long index;	/* offset of data portion of partition */
-	bool os_partition; /* partition initialized by OS, not FW */
-};
-
-static struct nvram_os_partition rtas_log_partition = {
-	.name = "ibm,rtas-log",
-	.req_size = 2079,
-	.min_size = 1055,
-	.index = -1,
-	.os_partition = true
-};
-
-static struct nvram_os_partition oops_log_partition = {
-	.name = "lnx,oops-log",
-	.req_size = 4000,
-	.min_size = 2000,
-	.index = -1,
-	.os_partition = true
-};
-
-static const char *pseries_nvram_os_partitions[] = {
-	"ibm,rtas-log",
-	"lnx,oops-log",
-	NULL
-};
-
-struct oops_log_info {
-	__be16 version;
-	__be16 report_length;
-	__be64 timestamp;
-} __attribute__((packed));
-
-static void oops_to_nvram(struct kmsg_dumper *dumper,
-			  enum kmsg_dump_reason reason);
-
-static struct kmsg_dumper nvram_kmsg_dumper = {
-	.dump = oops_to_nvram
-};
-
 /* See clobbering_unread_rtas_event() */
 #define NVRAM_RTAS_READ_TIMEOUT 5		/* seconds */
-static unsigned long last_unread_rtas_event;	/* timestamp */
-
-/*
- * For capturing and compressing an oops or panic report...
-
- * big_oops_buf[] holds the uncompressed text we're capturing.
- *
- * oops_buf[] holds the compressed text, preceded by a oops header.
- * oops header has u16 holding the version of oops header (to differentiate
- * between old and new format header) followed by u16 holding the length of
- * the compressed* text (*Or uncompressed, if compression fails.) and u64
- * holding the timestamp. oops_buf[] gets written to NVRAM.
- *
- * oops_log_info points to the header. oops_data points to the compressed text.
- *
- * +- oops_buf
- * |                                   +- oops_data
- * v                                   v
- * +-----------+-----------+-----------+------------------------+
- * | version   | length    | timestamp | text                   |
- * | (2 bytes) | (2 bytes) | (8 bytes) | (oops_data_sz bytes)   |
- * +-----------+-----------+-----------+------------------------+
- * ^
- * +- oops_log_info
- *
- * We preallocate these buffers during init to avoid kmalloc during oops/panic.
- */
-static size_t big_oops_buf_sz;
-static char *big_oops_buf, *oops_buf;
-static char *oops_data;
-static size_t oops_data_sz;
-
-/* Compression parameters */
-#define COMPR_LEVEL 6
-#define WINDOW_BITS 12
-#define MEM_LEVEL 4
-static struct z_stream_s stream;
+static time64_t last_unread_rtas_event;		/* timestamp */
 
 #ifdef CONFIG_PSTORE
-static struct nvram_os_partition of_config_partition = {
-	.name = "of-config",
-	.index = -1,
-	.os_partition = false
-};
-
-static struct nvram_os_partition common_partition = {
-	.name = "common",
-	.index = -1,
-	.os_partition = false
-};
-
-static enum pstore_type_id nvram_type_ids[] = {
-	PSTORE_TYPE_DMESG,
-	PSTORE_TYPE_PPC_RTAS,
-	PSTORE_TYPE_PPC_OF,
-	PSTORE_TYPE_PPC_COMMON,
-	-1
-};
-static int read_type;
-static unsigned long last_rtas_event;
+time64_t last_rtas_event;
 #endif
 
 static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index)
@@ -246,132 +133,26 @@ static ssize_t pSeries_nvram_get_size(void)
 	return nvram_size ? nvram_size : -ENODEV;
 }
 
-
-/* nvram_write_os_partition, nvram_write_error_log
+/* nvram_write_error_log
  *
  * We need to buffer the error logs into nvram to ensure that we have
- * the failure information to decode.  If we have a severe error there
- * is no way to guarantee that the OS or the machine is in a state to
- * get back to user land and write the error to disk.  For example if
- * the SCSI device driver causes a Machine Check by writing to a bad
- * IO address, there is no way of guaranteeing that the device driver
- * is in any state that is would also be able to write the error data
- * captured to disk, thus we buffer it in NVRAM for analysis on the
- * next boot.
- *
- * In NVRAM the partition containing the error log buffer will looks like:
- * Header (in bytes):
- * +-----------+----------+--------+------------+------------------+
- * | signature | checksum | length | name       | data             |
- * |0          |1         |2      3|4         15|16        length-1|
- * +-----------+----------+--------+------------+------------------+
- *
- * The 'data' section would look like (in bytes):
- * +--------------+------------+-----------------------------------+
- * | event_logged | sequence # | error log                         |
- * |0            3|4          7|8                  error_log_size-1|
- * +--------------+------------+-----------------------------------+
- *
- * event_logged: 0 if event has not been logged to syslog, 1 if it has
- * sequence #: The unique sequence # for each event. (until it wraps)
- * error log: The error log from event_scan
+ * the failure information to decode.
  */
-static int nvram_write_os_partition(struct nvram_os_partition *part,
-				    char *buff, int length,
-				    unsigned int err_type,
-				    unsigned int error_log_cnt)
-{
-	int rc;
-	loff_t tmp_index;
-	struct err_log_info info;
-	
-	if (part->index == -1) {
-		return -ESPIPE;
-	}
-
-	if (length > part->size) {
-		length = part->size;
-	}
-
-	info.error_type = cpu_to_be32(err_type);
-	info.seq_num = cpu_to_be32(error_log_cnt);
-
-	tmp_index = part->index;
-
-	rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index);
-	if (rc <= 0) {
-		pr_err("%s: Failed nvram_write (%d)\n", __func__, rc);
-		return rc;
-	}
-
-	rc = ppc_md.nvram_write(buff, length, &tmp_index);
-	if (rc <= 0) {
-		pr_err("%s: Failed nvram_write (%d)\n", __func__, rc);
-		return rc;
-	}
-	
-	return 0;
-}
-
 int nvram_write_error_log(char * buff, int length,
                           unsigned int err_type, unsigned int error_log_cnt)
 {
 	int rc = nvram_write_os_partition(&rtas_log_partition, buff, length,
 						err_type, error_log_cnt);
 	if (!rc) {
-		last_unread_rtas_event = get_seconds();
+		last_unread_rtas_event = ktime_get_real_seconds();
 #ifdef CONFIG_PSTORE
-		last_rtas_event = get_seconds();
+		last_rtas_event = ktime_get_real_seconds();
 #endif
 	}
 
 	return rc;
 }
 
-/* nvram_read_partition
- *
- * Reads nvram partition for at most 'length'
- */
-static int nvram_read_partition(struct nvram_os_partition *part, char *buff,
-				int length, unsigned int *err_type,
-				unsigned int *error_log_cnt)
-{
-	int rc;
-	loff_t tmp_index;
-	struct err_log_info info;
-	
-	if (part->index == -1)
-		return -1;
-
-	if (length > part->size)
-		length = part->size;
-
-	tmp_index = part->index;
-
-	if (part->os_partition) {
-		rc = ppc_md.nvram_read((char *)&info,
-					sizeof(struct err_log_info),
-					&tmp_index);
-		if (rc <= 0) {
-			pr_err("%s: Failed nvram_read (%d)\n", __func__, rc);
-			return rc;
-		}
-	}
-
-	rc = ppc_md.nvram_read(buff, length, &tmp_index);
-	if (rc <= 0) {
-		pr_err("%s: Failed nvram_read (%d)\n", __func__, rc);
-		return rc;
-	}
-
-	if (part->os_partition) {
-		*error_log_cnt = be32_to_cpu(info.seq_num);
-		*err_type = be32_to_cpu(info.error_type);
-	}
-
-	return 0;
-}
-
 /* nvram_read_error_log
  *
  * Reads nvram for error log for at most 'length'
@@ -407,67 +188,6 @@ int nvram_clear_error_log(void)
 	return 0;
 }
 
-/* pseries_nvram_init_os_partition
- *
- * This sets up a partition with an "OS" signature.
- *
- * The general strategy is the following:
- * 1.) If a partition with the indicated name already exists...
- *	- If it's large enough, use it.
- *	- Otherwise, recycle it and keep going.
- * 2.) Search for a free partition that is large enough.
- * 3.) If there's not a free partition large enough, recycle any obsolete
- * OS partitions and try again.
- * 4.) Will first try getting a chunk that will satisfy the requested size.
- * 5.) If a chunk of the requested size cannot be allocated, then try finding
- * a chunk that will satisfy the minum needed.
- *
- * Returns 0 on success, else -1.
- */
-static int __init pseries_nvram_init_os_partition(struct nvram_os_partition
-									*part)
-{
-	loff_t p;
-	int size;
-
-	/* Look for ours */
-	p = nvram_find_partition(part->name, NVRAM_SIG_OS, &size);
-
-	/* Found one but too small, remove it */
-	if (p && size < part->min_size) {
-		pr_info("nvram: Found too small %s partition,"
-					" removing it...\n", part->name);
-		nvram_remove_partition(part->name, NVRAM_SIG_OS, NULL);
-		p = 0;
-	}
-
-	/* Create one if we didn't find */
-	if (!p) {
-		p = nvram_create_partition(part->name, NVRAM_SIG_OS,
-					part->req_size, part->min_size);
-		if (p == -ENOSPC) {
-			pr_info("nvram: No room to create %s partition, "
-				"deleting any obsolete OS partitions...\n",
-				part->name);
-			nvram_remove_partition(NULL, NVRAM_SIG_OS,
-						pseries_nvram_os_partitions);
-			p = nvram_create_partition(part->name, NVRAM_SIG_OS,
-					part->req_size, part->min_size);
-		}
-	}
-
-	if (p <= 0) {
-		pr_err("nvram: Failed to find or create %s"
-		       " partition, err %d\n", part->name, (int)p);
-		return -1;
-	}
-
-	part->index = p;
-	part->size = nvram_get_partition_size(p) - sizeof(struct err_log_info);
-	
-	return 0;
-}
-
 /*
  * Are we using the ibm,rtas-log for oops/panic reports?  And if so,
  * would logging this oops/panic overwrite an RTAS event that rtas_errd
@@ -476,321 +196,14 @@ static int __init pseries_nvram_init_os_partition(struct nvram_os_partition
  * We assume that if rtas_errd hasn't read the RTAS event in
  * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to.
  */
-static int clobbering_unread_rtas_event(void)
+int clobbering_unread_rtas_event(void)
 {
 	return (oops_log_partition.index == rtas_log_partition.index
 		&& last_unread_rtas_event
-		&& get_seconds() - last_unread_rtas_event <=
+		&& ktime_get_real_seconds() - last_unread_rtas_event <=
 						NVRAM_RTAS_READ_TIMEOUT);
 }
 
-/* Derived from logfs_compress() */
-static int nvram_compress(const void *in, void *out, size_t inlen,
-							size_t outlen)
-{
-	int err, ret;
-
-	ret = -EIO;
-	err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS,
-						MEM_LEVEL, Z_DEFAULT_STRATEGY);
-	if (err != Z_OK)
-		goto error;
-
-	stream.next_in = in;
-	stream.avail_in = inlen;
-	stream.total_in = 0;
-	stream.next_out = out;
-	stream.avail_out = outlen;
-	stream.total_out = 0;
-
-	err = zlib_deflate(&stream, Z_FINISH);
-	if (err != Z_STREAM_END)
-		goto error;
-
-	err = zlib_deflateEnd(&stream);
-	if (err != Z_OK)
-		goto error;
-
-	if (stream.total_out >= stream.total_in)
-		goto error;
-
-	ret = stream.total_out;
-error:
-	return ret;
-}
-
-/* Compress the text from big_oops_buf into oops_buf. */
-static int zip_oops(size_t text_len)
-{
-	struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
-	int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len,
-								oops_data_sz);
-	if (zipped_len < 0) {
-		pr_err("nvram: compression failed; returned %d\n", zipped_len);
-		pr_err("nvram: logging uncompressed oops/panic report\n");
-		return -1;
-	}
-	oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
-	oops_hdr->report_length = cpu_to_be16(zipped_len);
-	oops_hdr->timestamp = cpu_to_be64(get_seconds());
-	return 0;
-}
-
-#ifdef CONFIG_PSTORE
-static int nvram_pstore_open(struct pstore_info *psi)
-{
-	/* Reset the iterator to start reading partitions again */
-	read_type = -1;
-	return 0;
-}
-
-/**
- * nvram_pstore_write - pstore write callback for nvram
- * @type:               Type of message logged
- * @reason:             reason behind dump (oops/panic)
- * @id:                 identifier to indicate the write performed
- * @part:               pstore writes data to registered buffer in parts,
- *                      part number will indicate the same.
- * @count:              Indicates oops count
- * @compressed:         Flag to indicate the log is compressed
- * @size:               number of bytes written to the registered buffer
- * @psi:                registered pstore_info structure
- *
- * Called by pstore_dump() when an oops or panic report is logged in the
- * printk buffer.
- * Returns 0 on successful write.
- */
-static int nvram_pstore_write(enum pstore_type_id type,
-				enum kmsg_dump_reason reason,
-				u64 *id, unsigned int part, int count,
-				bool compressed, size_t size,
-				struct pstore_info *psi)
-{
-	int rc;
-	unsigned int err_type = ERR_TYPE_KERNEL_PANIC;
-	struct oops_log_info *oops_hdr = (struct oops_log_info *) oops_buf;
-
-	/* part 1 has the recent messages from printk buffer */
-	if (part > 1 || type != PSTORE_TYPE_DMESG ||
-				clobbering_unread_rtas_event())
-		return -1;
-
-	oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
-	oops_hdr->report_length = cpu_to_be16(size);
-	oops_hdr->timestamp = cpu_to_be64(get_seconds());
-
-	if (compressed)
-		err_type = ERR_TYPE_KERNEL_PANIC_GZ;
-
-	rc = nvram_write_os_partition(&oops_log_partition, oops_buf,
-		(int) (sizeof(*oops_hdr) + size), err_type, count);
-
-	if (rc != 0)
-		return rc;
-
-	*id = part;
-	return 0;
-}
-
-/*
- * Reads the oops/panic report, rtas, of-config and common partition.
- * Returns the length of the data we read from each partition.
- * Returns 0 if we've been called before.
- */
-static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
-				int *count, struct timespec *time, char **buf,
-				bool *compressed, struct pstore_info *psi)
-{
-	struct oops_log_info *oops_hdr;
-	unsigned int err_type, id_no, size = 0;
-	struct nvram_os_partition *part = NULL;
-	char *buff = NULL;
-	int sig = 0;
-	loff_t p;
-
-	read_type++;
-
-	switch (nvram_type_ids[read_type]) {
-	case PSTORE_TYPE_DMESG:
-		part = &oops_log_partition;
-		*type = PSTORE_TYPE_DMESG;
-		break;
-	case PSTORE_TYPE_PPC_RTAS:
-		part = &rtas_log_partition;
-		*type = PSTORE_TYPE_PPC_RTAS;
-		time->tv_sec = last_rtas_event;
-		time->tv_nsec = 0;
-		break;
-	case PSTORE_TYPE_PPC_OF:
-		sig = NVRAM_SIG_OF;
-		part = &of_config_partition;
-		*type = PSTORE_TYPE_PPC_OF;
-		*id = PSTORE_TYPE_PPC_OF;
-		time->tv_sec = 0;
-		time->tv_nsec = 0;
-		break;
-	case PSTORE_TYPE_PPC_COMMON:
-		sig = NVRAM_SIG_SYS;
-		part = &common_partition;
-		*type = PSTORE_TYPE_PPC_COMMON;
-		*id = PSTORE_TYPE_PPC_COMMON;
-		time->tv_sec = 0;
-		time->tv_nsec = 0;
-		break;
-	default:
-		return 0;
-	}
-
-	if (!part->os_partition) {
-		p = nvram_find_partition(part->name, sig, &size);
-		if (p <= 0) {
-			pr_err("nvram: Failed to find partition %s, "
-				"err %d\n", part->name, (int)p);
-			return 0;
-		}
-		part->index = p;
-		part->size = size;
-	}
-
-	buff = kmalloc(part->size, GFP_KERNEL);
-
-	if (!buff)
-		return -ENOMEM;
-
-	if (nvram_read_partition(part, buff, part->size, &err_type, &id_no)) {
-		kfree(buff);
-		return 0;
-	}
-
-	*count = 0;
-
-	if (part->os_partition)
-		*id = id_no;
-
-	if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) {
-		size_t length, hdr_size;
-
-		oops_hdr = (struct oops_log_info *)buff;
-		if (be16_to_cpu(oops_hdr->version) < OOPS_HDR_VERSION) {
-			/* Old format oops header had 2-byte record size */
-			hdr_size = sizeof(u16);
-			length = be16_to_cpu(oops_hdr->version);
-			time->tv_sec = 0;
-			time->tv_nsec = 0;
-		} else {
-			hdr_size = sizeof(*oops_hdr);
-			length = be16_to_cpu(oops_hdr->report_length);
-			time->tv_sec = be64_to_cpu(oops_hdr->timestamp);
-			time->tv_nsec = 0;
-		}
-		*buf = kmalloc(length, GFP_KERNEL);
-		if (*buf == NULL)
-			return -ENOMEM;
-		memcpy(*buf, buff + hdr_size, length);
-		kfree(buff);
-
-		if (err_type == ERR_TYPE_KERNEL_PANIC_GZ)
-			*compressed = true;
-		else
-			*compressed = false;
-		return length;
-	}
-
-	*buf = buff;
-	return part->size;
-}
-
-static struct pstore_info nvram_pstore_info = {
-	.owner = THIS_MODULE,
-	.name = "nvram",
-	.open = nvram_pstore_open,
-	.read = nvram_pstore_read,
-	.write = nvram_pstore_write,
-};
-
-static int nvram_pstore_init(void)
-{
-	int rc = 0;
-
-	nvram_pstore_info.buf = oops_data;
-	nvram_pstore_info.bufsize = oops_data_sz;
-
-	spin_lock_init(&nvram_pstore_info.buf_lock);
-
-	rc = pstore_register(&nvram_pstore_info);
-	if (rc != 0)
-		pr_err("nvram: pstore_register() failed, defaults to "
-				"kmsg_dump; returned %d\n", rc);
-
-	return rc;
-}
-#else
-static int nvram_pstore_init(void)
-{
-	return -1;
-}
-#endif
-
-static void __init nvram_init_oops_partition(int rtas_partition_exists)
-{
-	int rc;
-
-	rc = pseries_nvram_init_os_partition(&oops_log_partition);
-	if (rc != 0) {
-		if (!rtas_partition_exists)
-			return;
-		pr_notice("nvram: Using %s partition to log both"
-			" RTAS errors and oops/panic reports\n",
-			rtas_log_partition.name);
-		memcpy(&oops_log_partition, &rtas_log_partition,
-						sizeof(rtas_log_partition));
-	}
-	oops_buf = kmalloc(oops_log_partition.size, GFP_KERNEL);
-	if (!oops_buf) {
-		pr_err("nvram: No memory for %s partition\n",
-						oops_log_partition.name);
-		return;
-	}
-	oops_data = oops_buf + sizeof(struct oops_log_info);
-	oops_data_sz = oops_log_partition.size - sizeof(struct oops_log_info);
-
-	rc = nvram_pstore_init();
-
-	if (!rc)
-		return;
-
-	/*
-	 * Figure compression (preceded by elimination of each line's <n>
-	 * severity prefix) will reduce the oops/panic report to at most
-	 * 45% of its original size.
-	 */
-	big_oops_buf_sz = (oops_data_sz * 100) / 45;
-	big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
-	if (big_oops_buf) {
-		stream.workspace =  kmalloc(zlib_deflate_workspacesize(
-					WINDOW_BITS, MEM_LEVEL), GFP_KERNEL);
-		if (!stream.workspace) {
-			pr_err("nvram: No memory for compression workspace; "
-				"skipping compression of %s partition data\n",
-				oops_log_partition.name);
-			kfree(big_oops_buf);
-			big_oops_buf = NULL;
-		}
-	} else {
-		pr_err("No memory for uncompressed %s data; "
-			"skipping compression\n", oops_log_partition.name);
-		stream.workspace = NULL;
-	}
-
-	rc = kmsg_dump_register(&nvram_kmsg_dumper);
-	if (rc != 0) {
-		pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc);
-		kfree(oops_buf);
-		kfree(big_oops_buf);
-		kfree(stream.workspace);
-	}
-}
-
 static int __init pseries_nvram_init_log_partitions(void)
 {
 	int rc;
@@ -798,7 +211,7 @@ static int __init pseries_nvram_init_log_partitions(void)
 	/* Scan nvram for partitions */
 	nvram_scan_partitions();
 
-	rc = pseries_nvram_init_os_partition(&rtas_log_partition);
+	rc = nvram_init_os_partition(&rtas_log_partition);
 	nvram_init_oops_partition(rc == 0);
 	return 0;
 }
@@ -834,72 +247,3 @@ int __init pSeries_nvram_init(void)
 	return 0;
 }
 
-
-/*
- * This is our kmsg_dump callback, called after an oops or panic report
- * has been written to the printk buffer.  We want to capture as much
- * of the printk buffer as possible.  First, capture as much as we can
- * that we think will compress sufficiently to fit in the lnx,oops-log
- * partition.  If that's too much, go back and capture uncompressed text.
- */
-static void oops_to_nvram(struct kmsg_dumper *dumper,
-			  enum kmsg_dump_reason reason)
-{
-	struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
-	static unsigned int oops_count = 0;
-	static bool panicking = false;
-	static DEFINE_SPINLOCK(lock);
-	unsigned long flags;
-	size_t text_len;
-	unsigned int err_type = ERR_TYPE_KERNEL_PANIC_GZ;
-	int rc = -1;
-
-	switch (reason) {
-	case KMSG_DUMP_RESTART:
-	case KMSG_DUMP_HALT:
-	case KMSG_DUMP_POWEROFF:
-		/* These are almost always orderly shutdowns. */
-		return;
-	case KMSG_DUMP_OOPS:
-		break;
-	case KMSG_DUMP_PANIC:
-		panicking = true;
-		break;
-	case KMSG_DUMP_EMERG:
-		if (panicking)
-			/* Panic report already captured. */
-			return;
-		break;
-	default:
-		pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n",
-		       __func__, (int) reason);
-		return;
-	}
-
-	if (clobbering_unread_rtas_event())
-		return;
-
-	if (!spin_trylock_irqsave(&lock, flags))
-		return;
-
-	if (big_oops_buf) {
-		kmsg_dump_get_buffer(dumper, false,
-				     big_oops_buf, big_oops_buf_sz, &text_len);
-		rc = zip_oops(text_len);
-	}
-	if (rc != 0) {
-		kmsg_dump_rewind(dumper);
-		kmsg_dump_get_buffer(dumper, false,
-				     oops_data, oops_data_sz, &text_len);
-		err_type = ERR_TYPE_KERNEL_PANIC;
-		oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
-		oops_hdr->report_length = cpu_to_be16(text_len);
-		oops_hdr->timestamp = cpu_to_be64(get_seconds());
-	}
-
-	(void) nvram_write_os_partition(&oops_log_partition, oops_buf,
-		(int) (sizeof(*oops_hdr) + text_len), err_type,
-		++oops_count);
-
-	spin_unlock_irqrestore(&lock, flags);
-}
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 89e23811199c..5d4a3df59d0c 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -32,6 +32,8 @@
 #include <asm/firmware.h>
 #include <asm/eeh.h>
 
+#include "pseries.h"
+
 static struct pci_bus *
 find_bus_among_children(struct pci_bus *bus,
                         struct device_node *dn)
@@ -75,6 +77,7 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn)
 		return NULL;
 	rtas_setup_phb(phb);
 	pci_process_bridge_OF_ranges(phb, dn, 0);
+	phb->controller_ops = pseries_pci_controller_ops;
 
 	pci_devs_phb_init_dynamic(phb);
 
@@ -82,7 +85,7 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn)
 	eeh_dev_phb_init_dynamic(phb);
 
 	if (dn->child)
-		eeh_add_device_tree_early(dn);
+		eeh_add_device_tree_early(PCI_DN(dn));
 
 	pcibios_scan_phb(phb);
 	pcibios_finish_adding_to_bus(phb->bus);
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 1796c5438cc6..8411c27293e4 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -11,6 +11,7 @@
 #define _PSERIES_PSERIES_H
 
 #include <linux/interrupt.h>
+#include <asm/rtas.h>
 
 struct device_node;
 
@@ -60,11 +61,24 @@ extern struct device_node *dlpar_configure_connector(__be32,
 						struct device_node *);
 extern int dlpar_attach_node(struct device_node *);
 extern int dlpar_detach_node(struct device_node *);
+extern int dlpar_acquire_drc(u32 drc_index);
+extern int dlpar_release_drc(u32 drc_index);
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int dlpar_memory(struct pseries_hp_errorlog *hp_elog);
+#else
+static inline int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
+{
+	return -EOPNOTSUPP;
+}
+#endif
 
 /* PCI root bridge prepare function override for pseries */
 struct pci_host_bridge;
 int pseries_root_bridge_prepare(struct pci_host_bridge *bridge);
 
+extern struct pci_controller_ops pseries_pci_controller_ops;
+
 unsigned long pseries_memory_block_size(void);
 
 #endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index e445b6701f50..df6a7041922b 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -265,7 +265,7 @@ static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long act
 			update_dn_pci_info(np, pci->phb);
 
 			/* Create EEH device for the OF node */
-			eeh_dev_init(np, pci->phb);
+			eeh_dev_init(PCI_DN(np), pci->phb);
 		}
 		break;
 	default:
@@ -461,6 +461,47 @@ static long pseries_little_endian_exceptions(void)
 }
 #endif
 
+static void __init find_and_init_phbs(void)
+{
+	struct device_node *node;
+	struct pci_controller *phb;
+	struct device_node *root = of_find_node_by_path("/");
+
+	for_each_child_of_node(root, node) {
+		if (node->type == NULL || (strcmp(node->type, "pci") != 0 &&
+					   strcmp(node->type, "pciex") != 0))
+			continue;
+
+		phb = pcibios_alloc_controller(node);
+		if (!phb)
+			continue;
+		rtas_setup_phb(phb);
+		pci_process_bridge_OF_ranges(phb, node, 0);
+		isa_bridge_find_early(phb);
+		phb->controller_ops = pseries_pci_controller_ops;
+	}
+
+	of_node_put(root);
+	pci_devs_phb_init();
+
+	/*
+	 * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties
+	 * in chosen.
+	 */
+	if (of_chosen) {
+		const int *prop;
+
+		prop = of_get_property(of_chosen,
+				"linux,pci-probe-only", NULL);
+		if (prop) {
+			if (*prop)
+				pci_add_flags(PCI_PROBE_ONLY);
+			else
+				pci_clear_flags(PCI_PROBE_ONLY);
+		}
+	}
+}
+
 static void __init pSeries_setup_arch(void)
 {
 	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
@@ -793,6 +834,10 @@ static int pSeries_pci_probe_mode(struct pci_bus *bus)
 void pSeries_final_fixup(void) { }
 #endif
 
+struct pci_controller_ops pseries_pci_controller_ops = {
+	.probe_mode		= pSeries_pci_probe_mode,
+};
+
 define_machine(pseries) {
 	.name			= "pSeries",
 	.probe			= pSeries_probe,
@@ -801,7 +846,6 @@ define_machine(pseries) {
 	.show_cpuinfo		= pSeries_show_cpuinfo,
 	.log_error		= pSeries_log_error,
 	.pcibios_fixup		= pSeries_final_fixup,
-	.pci_probe_mode		= pSeries_pci_probe_mode,
 	.restart		= rtas_restart,
 	.halt			= rtas_halt,
 	.panic			= rtas_os_term,
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index a3555b10c1a5..6932ea803e33 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -197,16 +197,14 @@ static void pSeries_cause_ipi_mux(int cpu, unsigned long data)
 		xics_cause_ipi(cpu, data);
 }
 
-static __init int pSeries_smp_probe(void)
+static __init void pSeries_smp_probe(void)
 {
-	int ret = xics_smp_probe();
+	xics_smp_probe();
 
 	if (cpu_has_feature(CPU_FTR_DBELL)) {
 		xics_cause_ipi = smp_ops->cause_ipi;
 		smp_ops->cause_ipi = pSeries_cause_ipi_mux;
 	}
-
-	return ret;
 }
 
 static struct smp_ops_t pSeries_mpic_smp_ops = {
diff --git a/arch/powerpc/relocs_check.pl b/arch/powerpc/relocs_check.pl
deleted file mode 100755
index 3f46e8b9c56d..000000000000
--- a/arch/powerpc/relocs_check.pl
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/usr/bin/perl
-
-# Copyright © 2009 IBM Corporation
-
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version
-# 2 of the License, or (at your option) any later version.
-
-# This script checks the relocations of a vmlinux for "suspicious"
-# relocations.
-
-use strict;
-use warnings;
-
-if ($#ARGV != 1) {
-	die "$0 [path to objdump] [path to vmlinux]\n";
-}
-
-# Have Kbuild supply the path to objdump so we handle cross compilation.
-my $objdump = shift;
-my $vmlinux = shift;
-my $bad_relocs_count = 0;
-my $bad_relocs = "";
-my $old_binutils = 0;
-
-open(FD, "$objdump -R $vmlinux|") or die;
-while (<FD>) {
-	study $_;
-
-	# Only look at relocation lines.
-	next if (!/\s+R_/);
-
-	# These relocations are okay
-	# On PPC64:
-	# 	R_PPC64_RELATIVE, R_PPC64_NONE, R_PPC64_ADDR64
-	# On PPC:
-	# 	R_PPC_RELATIVE, R_PPC_ADDR16_HI, 
-	# 	R_PPC_ADDR16_HA,R_PPC_ADDR16_LO,
-	# 	R_PPC_NONE
-
-	next if (/\bR_PPC64_RELATIVE\b/ or /\bR_PPC64_NONE\b/ or
-	         /\bR_PPC64_ADDR64\s+mach_/);
-	next if (/\bR_PPC_ADDR16_LO\b/ or /\bR_PPC_ADDR16_HI\b/ or
-		 /\bR_PPC_ADDR16_HA\b/ or /\bR_PPC_RELATIVE\b/ or
-		 /\bR_PPC_NONE\b/);
-
-	# If we see this type of relocation it's an idication that
-	# we /may/ be using an old version of binutils.
-	if (/R_PPC64_UADDR64/) {
-		$old_binutils++;
-	}
-
-	$bad_relocs_count++;
-	$bad_relocs .= $_;
-}
-
-if ($bad_relocs_count) {
-	print "WARNING: $bad_relocs_count bad relocations\n";
-	print $bad_relocs;
-}
-
-if ($old_binutils) {
-	print "WARNING: You need at least binutils >= 2.19 to build a ".
-	      "CONFIG_RELOCATABLE kernel\n";
-}
diff --git a/arch/powerpc/relocs_check.sh b/arch/powerpc/relocs_check.sh
new file mode 100755
index 000000000000..2e4ebd0e25b3
--- /dev/null
+++ b/arch/powerpc/relocs_check.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+
+# Copyright © 2015 IBM Corporation
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version
+# 2 of the License, or (at your option) any later version.
+
+# This script checks the relocations of a vmlinux for "suspicious"
+# relocations.
+
+# based on relocs_check.pl
+# Copyright © 2009 IBM Corporation
+
+if [ $# -lt 2 ]; then
+	echo "$0 [path to objdump] [path to vmlinux]" 1>&2
+	exit 1
+fi
+
+# Have Kbuild supply the path to objdump so we handle cross compilation.
+objdump="$1"
+vmlinux="$2"
+
+bad_relocs=$(
+"$objdump" -R "$vmlinux" |
+	# Only look at relocation lines.
+	grep -E '\<R_' |
+	# These relocations are okay
+	# On PPC64:
+	#	R_PPC64_RELATIVE, R_PPC64_NONE
+	#	R_PPC64_ADDR64 mach_<name>
+	# On PPC:
+	#	R_PPC_RELATIVE, R_PPC_ADDR16_HI,
+	#	R_PPC_ADDR16_HA,R_PPC_ADDR16_LO,
+	#	R_PPC_NONE
+	grep -F -w -v 'R_PPC64_RELATIVE
+R_PPC64_NONE
+R_PPC_ADDR16_LO
+R_PPC_ADDR16_HI
+R_PPC_ADDR16_HA
+R_PPC_RELATIVE
+R_PPC_NONE' |
+	grep -E -v '\<R_PPC64_ADDR64[[:space:]]+mach_'
+)
+
+if [ -z "$bad_relocs" ]; then
+	exit 0
+fi
+
+num_bad=$(echo "$bad_relocs" | wc -l)
+echo "WARNING: $num_bad bad relocations"
+echo "$bad_relocs"
+
+# If we see this type of relocation it's an idication that
+# we /may/ be using an old version of binutils.
+if echo "$bad_relocs" | grep -q -F -w R_PPC64_UADDR64; then
+	echo "WARNING: You need at least binutils >= 2.19 to build a CONFIG_RELOCATABLE kernel"
+fi
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index 9e5353ff6d1b..d00a5663e312 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -369,7 +369,7 @@ static int dart_dma_set_mask(struct device *dev, u64 dma_mask)
 	return 0;
 }
 
-void __init iommu_init_early_dart(void)
+void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops)
 {
 	struct device_node *dn;
 
@@ -395,8 +395,8 @@ void __init iommu_init_early_dart(void)
 	if (dart_is_u4)
 		ppc_md.dma_set_mask = dart_dma_set_mask;
 
-	ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_dart;
-	ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_dart;
+	controller_ops->dma_dev_setup = pci_dma_dev_setup_dart;
+	controller_ops->dma_bus_setup = pci_dma_bus_setup_dart;
 
 	/* Setup pci_dma ops */
 	set_pci_dma_ops(&dma_iommu_ops);
@@ -404,8 +404,8 @@ void __init iommu_init_early_dart(void)
 
  bail:
 	/* If init failed, use direct iommu and null setup functions */
-	ppc_md.pci_dma_dev_setup = NULL;
-	ppc_md.pci_dma_bus_setup = NULL;
+	controller_ops->dma_dev_setup = NULL;
+	controller_ops->dma_bus_setup = NULL;
 
 	/* Setup pci_dma ops */
 	set_pci_dma_ops(&dma_direct_ops);
diff --git a/arch/powerpc/sysdev/dcr.c b/arch/powerpc/sysdev/dcr.c
index 2d8a101b6b9e..121e26fffd50 100644
--- a/arch/powerpc/sysdev/dcr.c
+++ b/arch/powerpc/sysdev/dcr.c
@@ -54,7 +54,7 @@ bool dcr_map_ok_generic(dcr_host_t host)
 	else if (host.type == DCR_HOST_MMIO)
 		return dcr_map_ok_mmio(host.host.mmio);
 	else
-		return 0;
+		return false;
 }
 EXPORT_SYMBOL_GPL(dcr_map_ok_generic);
 
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 4bbb4b8dfd09..f086c6f22dc9 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -162,7 +162,17 @@ static void fsl_compose_msi_msg(struct pci_dev *pdev, int hwirq,
 	msg->address_lo = lower_32_bits(address);
 	msg->address_hi = upper_32_bits(address);
 
-	msg->data = hwirq;
+	/*
+	 * MPIC version 2.0 has erratum PIC1. It causes
+	 * that neither MSI nor MSI-X can work fine.
+	 * This is a workaround to allow MSI-X to function
+	 * properly. It only works for MSI-X, we prevent
+	 * MSI on buggy chips in fsl_setup_msi_irqs().
+	 */
+	if (msi_data->feature & MSI_HW_ERRATA_ENDIAN)
+		msg->data = __swab32(hwirq);
+	else
+		msg->data = hwirq;
 
 	pr_debug("%s: allocated srs: %d, ibs: %d\n", __func__,
 		 (hwirq >> msi_data->srs_shift) & MSI_SRS_MASK,
@@ -180,8 +190,16 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 	struct msi_msg msg;
 	struct fsl_msi *msi_data;
 
-	if (type == PCI_CAP_ID_MSIX)
-		pr_debug("fslmsi: MSI-X untested, trying anyway.\n");
+	if (type == PCI_CAP_ID_MSI) {
+		/*
+		 * MPIC version 2.0 has erratum PIC1. For now MSI
+		 * could not work. So check to prevent MSI from
+		 * being used on the board with this erratum.
+		 */
+		list_for_each_entry(msi_data, &msi_head, list)
+			if (msi_data->feature & MSI_HW_ERRATA_ENDIAN)
+				return -EINVAL;
+	}
 
 	/*
 	 * If the PCI node has an fsl,msi property, then we need to use it
@@ -446,6 +464,11 @@ static int fsl_of_msi_probe(struct platform_device *dev)
 
 	msi->feature = features->fsl_pic_ip;
 
+	/* For erratum PIC1 on MPIC version 2.0*/
+	if ((features->fsl_pic_ip & FSL_PIC_IP_MASK) == FSL_PIC_IP_MPIC
+			&& (fsl_mpic_primary_get_version() == 0x0200))
+		msi->feature |= MSI_HW_ERRATA_ENDIAN;
+
 	/*
 	 * Remember the phandle, so that we can match with any PCI nodes
 	 * that have an "fsl,msi" property.
diff --git a/arch/powerpc/sysdev/fsl_msi.h b/arch/powerpc/sysdev/fsl_msi.h
index 420cfcbdac01..a67359d993e5 100644
--- a/arch/powerpc/sysdev/fsl_msi.h
+++ b/arch/powerpc/sysdev/fsl_msi.h
@@ -27,6 +27,8 @@
 #define FSL_PIC_IP_IPIC   0x00000002
 #define FSL_PIC_IP_VMPIC  0x00000003
 
+#define MSI_HW_ERRATA_ENDIAN 0x00000010
+
 struct fsl_msi_cascade_data;
 
 struct fsl_msi {
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 4b74c276e427..9a8fcf0d79d7 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -111,6 +111,18 @@ static struct pci_ops fsl_indirect_pcie_ops =
 #define MAX_PHYS_ADDR_BITS	40
 static u64 pci64_dma_offset = 1ull << MAX_PHYS_ADDR_BITS;
 
+#ifdef CONFIG_SWIOTLB
+static void setup_swiotlb_ops(struct pci_controller *hose)
+{
+	if (ppc_swiotlb_enable) {
+		hose->controller_ops.dma_dev_setup = pci_dma_dev_setup_swiotlb;
+		set_pci_dma_ops(&swiotlb_dma_ops);
+	}
+}
+#else
+static inline void setup_swiotlb_ops(struct pci_controller *hose) {}
+#endif
+
 static int fsl_pci_dma_set_mask(struct device *dev, u64 dma_mask)
 {
 	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
@@ -548,6 +560,9 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary)
 	/* Setup PEX window registers */
 	setup_pci_atmu(hose);
 
+	/* Set up controller operations */
+	setup_swiotlb_ops(hose);
+
 	return 0;
 
 no_bridge:
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index bbfbbf2025fd..b2b8447a227a 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -655,7 +655,6 @@ static inline struct mpic * mpic_from_irq_data(struct irq_data *d)
 static inline void mpic_eoi(struct mpic *mpic)
 {
 	mpic_cpu_write(MPIC_INFO(CPU_EOI), 0);
-	(void)mpic_cpu_read(MPIC_INFO(CPU_WHOAMI));
 }
 
 /*
@@ -1676,31 +1675,6 @@ void __init mpic_init(struct mpic *mpic)
 		mpic_err_int_init(mpic, MPIC_FSL_ERR_INT);
 }
 
-void __init mpic_set_clk_ratio(struct mpic *mpic, u32 clock_ratio)
-{
-	u32 v;
-
-	v = mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_1);
-	v &= ~MPIC_GREG_GLOBAL_CONF_1_CLK_RATIO_MASK;
-	v |= MPIC_GREG_GLOBAL_CONF_1_CLK_RATIO(clock_ratio);
-	mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_1, v);
-}
-
-void __init mpic_set_serial_int(struct mpic *mpic, int enable)
-{
-	unsigned long flags;
-	u32 v;
-
-	raw_spin_lock_irqsave(&mpic_lock, flags);
-	v = mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_1);
-	if (enable)
-		v |= MPIC_GREG_GLOBAL_CONF_1_SIE;
-	else
-		v &= ~MPIC_GREG_GLOBAL_CONF_1_SIE;
-	mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_1, v);
-	raw_spin_unlock_irqrestore(&mpic_lock, flags);
-}
-
 void mpic_irq_set_priority(unsigned int irq, unsigned int pri)
 {
 	struct mpic *mpic = mpic_find(irq);
@@ -1923,7 +1897,7 @@ void smp_mpic_message_pass(int cpu, int msg)
 		       msg * MPIC_INFO(CPU_IPI_DISPATCH_STRIDE), physmask);
 }
 
-int __init smp_mpic_probe(void)
+void __init smp_mpic_probe(void)
 {
 	int nr_cpus;
 
@@ -1935,8 +1909,6 @@ int __init smp_mpic_probe(void)
 
 	if (nr_cpus > 1)
 		mpic_request_ipis();
-
-	return nr_cpus;
 }
 
 void smp_mpic_setup_cpu(int cpu)
diff --git a/arch/powerpc/sysdev/qe_lib/qe_io.c b/arch/powerpc/sysdev/qe_lib/qe_io.c
index d09994164daf..7ea0174f6d3d 100644
--- a/arch/powerpc/sysdev/qe_lib/qe_io.c
+++ b/arch/powerpc/sysdev/qe_lib/qe_io.c
@@ -190,28 +190,3 @@ int par_io_of_config(struct device_node *np)
 	return 0;
 }
 EXPORT_SYMBOL(par_io_of_config);
-
-#ifdef DEBUG
-static void dump_par_io(void)
-{
-	unsigned int i;
-
-	printk(KERN_INFO "%s: par_io=%p\n", __func__, par_io);
-	for (i = 0; i < num_par_io_ports; i++) {
-		printk(KERN_INFO "	cpodr[%u]=%08x\n", i,
-			in_be32(&par_io[i].cpodr));
-		printk(KERN_INFO "	cpdata[%u]=%08x\n", i,
-			in_be32(&par_io[i].cpdata));
-		printk(KERN_INFO "	cpdir1[%u]=%08x\n", i,
-			in_be32(&par_io[i].cpdir1));
-		printk(KERN_INFO "	cpdir2[%u]=%08x\n", i,
-			in_be32(&par_io[i].cpdir2));
-		printk(KERN_INFO "	cppar1[%u]=%08x\n", i,
-			in_be32(&par_io[i].cppar1));
-		printk(KERN_INFO "	cppar2[%u]=%08x\n", i,
-			in_be32(&par_io[i].cppar2));
-	}
-
-}
-EXPORT_SYMBOL(dump_par_io);
-#endif /* DEBUG */
diff --git a/arch/powerpc/sysdev/qe_lib/ucc_slow.c b/arch/powerpc/sysdev/qe_lib/ucc_slow.c
index befaf1123f7f..5f91628209eb 100644
--- a/arch/powerpc/sysdev/qe_lib/ucc_slow.c
+++ b/arch/powerpc/sysdev/qe_lib/ucc_slow.c
@@ -43,11 +43,6 @@ u32 ucc_slow_get_qe_cr_subblock(int uccs_num)
 }
 EXPORT_SYMBOL(ucc_slow_get_qe_cr_subblock);
 
-void ucc_slow_poll_transmitter_now(struct ucc_slow_private * uccs)
-{
-	out_be16(&uccs->us_regs->utodr, UCC_SLOW_TOD);
-}
-
 void ucc_slow_graceful_stop_tx(struct ucc_slow_private * uccs)
 {
 	struct ucc_slow_info *us_info = uccs->us_info;
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index 125743b58c70..878a54036a25 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -140,15 +140,13 @@ static void xics_request_ipi(void)
 			   IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL));
 }
 
-int __init xics_smp_probe(void)
+void __init xics_smp_probe(void)
 {
 	/* Setup cause_ipi callback  based on which ICP is used */
 	smp_ops->cause_ipi = icp_ops->cause_ipi;
 
 	/* Register all the IPIs */
 	xics_request_ipi();
-
-	return num_possible_cpus();
 }
 
 #endif /* CONFIG_SMP */
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild
index 647c3eccc3d0..2938934c6518 100644
--- a/arch/s390/Kbuild
+++ b/arch/s390/Kbuild
@@ -4,6 +4,5 @@ obj-$(CONFIG_KVM)		+= kvm/
 obj-$(CONFIG_CRYPTO_HW)		+= crypto/
 obj-$(CONFIG_S390_HYPFS_FS)	+= hypfs/
 obj-$(CONFIG_APPLDATA_BASE)	+= appldata/
-obj-$(CONFIG_MATHEMU)		+= math-emu/
 obj-y				+= net/
 obj-$(CONFIG_PCI)		+= pci/
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 373cd5badf1c..8e58c614c37d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -35,7 +35,7 @@ config GENERIC_BUG_RELATIVE_POINTERS
 	def_bool y
 
 config ARCH_DMA_ADDR_T_64BIT
-	def_bool 64BIT
+	def_bool y
 
 config GENERIC_LOCKBREAK
 	def_bool y if SMP && PREEMPT
@@ -59,12 +59,13 @@ config PCI_QUIRKS
 	def_bool n
 
 config ARCH_SUPPORTS_UPROBES
-	def_bool 64BIT
+	def_bool y
 
 config S390
 	def_bool y
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
+	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_SG_CHAIN
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
@@ -110,19 +111,19 @@ config S390
 	select GENERIC_TIME_VSYSCALL
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
 	select HAVE_ARCH_AUDITSYSCALL
-	select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
+	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
-	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT
-	select HAVE_BPF_JIT if 64BIT && PACK_STACK
+	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z9_109_FEATURES
 	select HAVE_CMPXCHG_DOUBLE
 	select HAVE_CMPXCHG_LOCAL
 	select HAVE_DEBUG_KMEMLEAK
-	select HAVE_DYNAMIC_FTRACE if 64BIT
-	select HAVE_DYNAMIC_FTRACE_WITH_REGS if 64BIT
+	select HAVE_DYNAMIC_FTRACE
+	select HAVE_DYNAMIC_FTRACE_WITH_REGS
 	select HAVE_FTRACE_MCOUNT_RECORD
-	select HAVE_FUNCTION_GRAPH_TRACER if 64BIT
-	select HAVE_FUNCTION_TRACER if 64BIT
+	select HAVE_FUNCTION_GRAPH_TRACER
+	select HAVE_FUNCTION_TRACER
 	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_GZIP
@@ -132,7 +133,8 @@ config S390
 	select HAVE_KERNEL_XZ
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
-	select HAVE_KVM if 64BIT
+	select HAVE_KVM
+	select HAVE_LIVEPATCH
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_MEMBLOCK_PHYS_MAP
@@ -141,7 +143,6 @@ config S390
 	select HAVE_PERF_EVENTS
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_SYSCALL_TRACEPOINTS
-	select HAVE_UID16 if 32BIT
 	select HAVE_VIRT_CPU_ACCOUNTING
 	select MODULES_USE_ELF_RELA
 	select NO_BOOTMEM
@@ -155,10 +156,17 @@ config S390
 config SCHED_OMIT_FRAME_POINTER
 	def_bool y
 
+config PGTABLE_LEVELS
+	int
+	default 4 if 64BIT
+	default 2
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
 
+source "kernel/livepatch/Kconfig"
+
 menu "Processor type and features"
 
 config HAVE_MARCH_Z900_FEATURES
@@ -190,18 +198,11 @@ config HAVE_MARCH_Z13_FEATURES
 
 choice
 	prompt "Processor type"
-	default MARCH_G5
-
-config MARCH_G5
-	bool "System/390 model G5 and G6"
-	depends on !64BIT
-	help
-	  Select this to build a 31 bit kernel that works
-	  on all ESA/390 and z/Architecture machines.
+	default MARCH_Z900
 
 config MARCH_Z900
 	bool "IBM zSeries model z800 and z900"
-	select HAVE_MARCH_Z900_FEATURES if 64BIT
+	select HAVE_MARCH_Z900_FEATURES
 	help
 	  Select this to enable optimizations for model z800/z900 (2064 and
 	  2066 series). This will enable some optimizations that are not
@@ -209,7 +210,7 @@ config MARCH_Z900
 
 config MARCH_Z990
 	bool "IBM zSeries model z890 and z990"
-	select HAVE_MARCH_Z990_FEATURES if 64BIT
+	select HAVE_MARCH_Z990_FEATURES
 	help
 	  Select this to enable optimizations for model z890/z990 (2084 and
 	  2086 series). The kernel will be slightly faster but will not work
@@ -217,7 +218,7 @@ config MARCH_Z990
 
 config MARCH_Z9_109
 	bool "IBM System z9"
-	select HAVE_MARCH_Z9_109_FEATURES if 64BIT
+	select HAVE_MARCH_Z9_109_FEATURES
 	help
 	  Select this to enable optimizations for IBM System z9 (2094 and
 	  2096 series). The kernel will be slightly faster but will not work
@@ -225,7 +226,7 @@ config MARCH_Z9_109
 
 config MARCH_Z10
 	bool "IBM System z10"
-	select HAVE_MARCH_Z10_FEATURES if 64BIT
+	select HAVE_MARCH_Z10_FEATURES
 	help
 	  Select this to enable optimizations for IBM System z10 (2097 and
 	  2098 series). The kernel will be slightly faster but will not work
@@ -233,7 +234,7 @@ config MARCH_Z10
 
 config MARCH_Z196
 	bool "IBM zEnterprise 114 and 196"
-	select HAVE_MARCH_Z196_FEATURES if 64BIT
+	select HAVE_MARCH_Z196_FEATURES
 	help
 	  Select this to enable optimizations for IBM zEnterprise 114 and 196
 	  (2818 and 2817 series). The kernel will be slightly faster but will
@@ -241,7 +242,7 @@ config MARCH_Z196
 
 config MARCH_ZEC12
 	bool "IBM zBC12 and zEC12"
-	select HAVE_MARCH_ZEC12_FEATURES if 64BIT
+	select HAVE_MARCH_ZEC12_FEATURES
 	help
 	  Select this to enable optimizations for IBM zBC12 and zEC12 (2828 and
 	  2827 series). The kernel will be slightly faster but will not work on
@@ -249,7 +250,7 @@ config MARCH_ZEC12
 
 config MARCH_Z13
 	bool "IBM z13"
-	select HAVE_MARCH_Z13_FEATURES if 64BIT
+	select HAVE_MARCH_Z13_FEATURES
 	help
 	  Select this to enable optimizations for IBM z13 (2964 series).
 	  The kernel will be slightly faster but will not work on older
@@ -257,9 +258,6 @@ config MARCH_Z13
 
 endchoice
 
-config MARCH_G5_TUNE
-	def_bool TUNE_G5 || MARCH_G5 && TUNE_DEFAULT
-
 config MARCH_Z900_TUNE
 	def_bool TUNE_Z900 || MARCH_Z900 && TUNE_DEFAULT
 
@@ -298,9 +296,6 @@ config TUNE_DEFAULT
 	  Tune the generated code for the target processor for which the kernel
 	  will be compiled.
 
-config TUNE_G5
-	bool "System/390 model G5 and G6"
-
 config TUNE_Z900
 	bool "IBM zSeries model z800 and z900"
 
@@ -326,21 +321,14 @@ endchoice
 
 config 64BIT
 	def_bool y
-	prompt "64 bit kernel"
-	help
-	  Select this option if you have an IBM z/Architecture machine
-	  and want to use the 64 bit addressing mode.
-
-config 32BIT
-	def_bool y if !64BIT
 
 config COMPAT
 	def_bool y
 	prompt "Kernel support for 31 bit emulation"
-	depends on 64BIT
 	select COMPAT_BINFMT_ELF if BINFMT_ELF
 	select ARCH_WANT_OLD_COMPAT_IPC
 	select COMPAT_OLD_SIGACTION
+	depends on MULTIUSER
 	help
 	  Select this option if you want to enable your system kernel to
 	  handle system-calls from ELF binaries for 31 bit ESA.  This option
@@ -376,8 +364,7 @@ config NR_CPUS
 	int "Maximum number of CPUs (2-512)"
 	range 2 512
 	depends on SMP
-	default "32" if !64BIT
-	default "64" if 64BIT
+	default "64"
 	help
 	  This allows you to specify the maximum number of CPUs which this
 	  kernel will support. The maximum supported value is 512 and the
@@ -418,15 +405,6 @@ config SCHED_TOPOLOGY
 
 source kernel/Kconfig.preempt
 
-config MATHEMU
-	def_bool y
-	prompt "IEEE FPU emulation"
-	depends on MARCH_G5
-	help
-	  This option is required for IEEE compliant floating point arithmetic
-	  on older ESA/390 machines. Say Y unless you know your machine doesn't
-	  need this.
-
 source kernel/Kconfig.hz
 
 endmenu
@@ -437,7 +415,6 @@ config ARCH_SPARSEMEM_ENABLE
 	def_bool y
 	select SPARSEMEM_VMEMMAP_ENABLE
 	select SPARSEMEM_VMEMMAP
-	select SPARSEMEM_STATIC if !64BIT
 
 config ARCH_SPARSEMEM_DEFAULT
 	def_bool y
@@ -453,7 +430,6 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE
 
 config ARCH_ENABLE_SPLIT_PMD_PTLOCK
 	def_bool y
-	depends on 64BIT
 
 config FORCE_MAX_ZONEORDER
 	int
@@ -528,7 +504,6 @@ config QDIO
 
 menuconfig PCI
 	bool "PCI support"
-	depends on 64BIT
 	select HAVE_DMA_ATTRS
 	select PCI_MSI
 	help
@@ -598,7 +573,6 @@ config CHSC_SCH
 
 config SCM_BUS
 	def_bool y
-	depends on 64BIT
 	prompt "SCM bus driver"
 	help
 	  Bus driver for Storage Class Memory.
@@ -620,7 +594,7 @@ menu "Dump support"
 
 config CRASH_DUMP
 	bool "kernel crash dumps"
-	depends on 64BIT && SMP
+	depends on SMP
 	select KEXEC
 	help
 	  Generate crash dump after being started by kexec.
@@ -659,7 +633,7 @@ endmenu
 menu "Power Management"
 
 config ARCH_HIBERNATION_POSSIBLE
-	def_bool y if 64BIT
+	def_bool y
 
 source "kernel/power/Kconfig"
 
@@ -810,7 +784,6 @@ source "arch/s390/kvm/Kconfig"
 config S390_GUEST
 	def_bool y
 	prompt "s390 support for virtio devices"
-	depends on 64BIT
 	select TTY
 	select VIRTUALIZATION
 	select VIRTIO
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index acb6859c6a95..667b1bca5681 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -13,15 +13,6 @@
 # Copyright (C) 1994 by Linus Torvalds
 #
 
-ifndef CONFIG_64BIT
-LD_BFD		:= elf32-s390
-LDFLAGS		:= -m elf_s390
-KBUILD_CFLAGS	+= -m31
-KBUILD_AFLAGS	+= -m31
-UTS_MACHINE	:= s390
-STACK_SIZE	:= 8192
-CHECKFLAGS	+= -D__s390__ -msize-long
-else
 LD_BFD		:= elf64-s390
 LDFLAGS		:= -m elf64_s390
 KBUILD_AFLAGS_MODULE += -fPIC
@@ -31,11 +22,9 @@ KBUILD_AFLAGS	+= -m64
 UTS_MACHINE	:= s390x
 STACK_SIZE	:= 16384
 CHECKFLAGS	+= -D__s390__ -D__s390x__
-endif
 
 export LD_BFD
 
-mflags-$(CONFIG_MARCH_G5)     := -march=g5
 mflags-$(CONFIG_MARCH_Z900)   := -march=z900
 mflags-$(CONFIG_MARCH_Z990)   := -march=z990
 mflags-$(CONFIG_MARCH_Z9_109) := -march=z9-109
@@ -47,7 +36,6 @@ mflags-$(CONFIG_MARCH_Z13)   := -march=z13
 aflags-y += $(mflags-y)
 cflags-y += $(mflags-y)
 
-cflags-$(CONFIG_MARCH_G5_TUNE)		+= -mtune=g5
 cflags-$(CONFIG_MARCH_Z900_TUNE)	+= -mtune=z900
 cflags-$(CONFIG_MARCH_Z990_TUNE)	+= -mtune=z990
 cflags-$(CONFIG_MARCH_Z9_109_TUNE)	+= -mtune=z9-109
@@ -104,7 +92,7 @@ KBUILD_AFLAGS	+= $(aflags-y)
 OBJCOPYFLAGS	:= -O binary
 
 head-y		:= arch/s390/kernel/head.o
-head-y		+= arch/s390/kernel/$(if $(CONFIG_64BIT),head64.o,head31.o)
+head-y		+= arch/s390/kernel/head64.o
 
 # See arch/s390/Kbuild for content of core part of the kernel
 core-y		+= arch/s390/
@@ -129,9 +117,7 @@ zfcpdump:
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
 vdso_install:
-ifeq ($(CONFIG_64BIT),y)
 	$(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@
-endif
 	$(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@
 
 archclean:
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index f90d1fc6d603..d4788111c161 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -4,13 +4,11 @@
 # create a compressed vmlinux image from the original vmlinux
 #
 
-BITS := $(if $(CONFIG_64BIT),64,31)
-
 targets	:= vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
-targets += misc.o piggy.o sizes.h head$(BITS).o
+targets += misc.o piggy.o sizes.h head.o
 
-KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
+KBUILD_CFLAGS := -m64 -D__KERNEL__ $(LINUX_INCLUDE) -O2
 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
 KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks
 KBUILD_CFLAGS += $(call cc-option,-mpacked-stack)
@@ -19,7 +17,7 @@ KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
 GCOV_PROFILE := n
 
 OBJECTS := $(addprefix $(objtree)/arch/s390/kernel/, head.o sclp.o ebcdic.o)
-OBJECTS += $(obj)/head$(BITS).o $(obj)/misc.o $(obj)/piggy.o
+OBJECTS += $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o
 
 LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T
 $(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS)
@@ -34,8 +32,8 @@ quiet_cmd_sizes = GEN $@
 $(obj)/sizes.h: vmlinux
 	$(call if_changed,sizes)
 
-AFLAGS_head$(BITS).o += -I$(obj)
-$(obj)/head$(BITS).o: $(obj)/sizes.h
+AFLAGS_head.o += -I$(obj)
+$(obj)/head.o: $(obj)/sizes.h
 
 CFLAGS_misc.o += -I$(obj)
 $(obj)/misc.o: $(obj)/sizes.h
diff --git a/arch/s390/boot/compressed/head64.S b/arch/s390/boot/compressed/head.S
index f86a4eef28a9..f86a4eef28a9 100644
--- a/arch/s390/boot/compressed/head64.S
+++ b/arch/s390/boot/compressed/head.S
diff --git a/arch/s390/boot/compressed/head31.S b/arch/s390/boot/compressed/head31.S
deleted file mode 100644
index e8c9e18b8039..000000000000
--- a/arch/s390/boot/compressed/head31.S
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Startup glue code to uncompress the kernel
- *
- * Copyright IBM Corp. 2010
- *
- *   Author(s):	Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/page.h>
-#include "sizes.h"
-
-__HEAD
-ENTRY(startup_continue)
-	basr	%r13,0			# get base
-.LPG1:
-	# setup stack
-	l	%r15,.Lstack-.LPG1(%r13)
-	ahi	%r15,-96
-	l	%r1,.Ldecompress-.LPG1(%r13)
-	basr	%r14,%r1
-	# setup registers for memory mover & branch to target
-	lr	%r4,%r2
-	l	%r2,.Loffset-.LPG1(%r13)
-	la	%r4,0(%r2,%r4)
-	l	%r3,.Lmvsize-.LPG1(%r13)
-	lr	%r5,%r3
-	# move the memory mover someplace safe
-	la	%r1,0x200
-	mvc	0(mover_end-mover,%r1),mover-.LPG1(%r13)
-	# decompress image is started at 0x11000
-	lr	%r6,%r2
-	br	%r1
-mover:
-	mvcle	%r2,%r4,0
-	jo	mover
-	br	%r6
-mover_end:
-
-	.align	8
-.Lstack:
-	.long	0x8000 + (1<<(PAGE_SHIFT+THREAD_ORDER))
-.Ldecompress:
-	.long	decompress_kernel
-.Loffset:
-	.long	0x11000
-.Lmvsize:
-	.long	SZ__bss_start
diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S
index 8e1fb8239287..747735f83426 100644
--- a/arch/s390/boot/compressed/vmlinux.lds.S
+++ b/arch/s390/boot/compressed/vmlinux.lds.S
@@ -1,12 +1,7 @@
 #include <asm-generic/vmlinux.lds.h>
 
-#ifdef CONFIG_64BIT
 OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
 OUTPUT_ARCH(s390:64-bit)
-#else
-OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
-OUTPUT_ARCH(s390:31-bit)
-#endif
 
 ENTRY(startup)
 
diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h
index 6c5cc6da7111..ba3b2aefddf5 100644
--- a/arch/s390/crypto/crypt_s390.h
+++ b/arch/s390/crypto/crypt_s390.h
@@ -369,14 +369,10 @@ static inline int crypt_s390_func_available(int func,
 
 	if (facility_mask & CRYPT_S390_MSA && !test_facility(17))
 		return 0;
-
-	if (facility_mask & CRYPT_S390_MSA3 &&
-	    (!test_facility(2) || !test_facility(76)))
+	if (facility_mask & CRYPT_S390_MSA3 && !test_facility(76))
 		return 0;
-	if (facility_mask & CRYPT_S390_MSA4 &&
-	    (!test_facility(2) || !test_facility(77)))
+	if (facility_mask & CRYPT_S390_MSA4 && !test_facility(77))
 		return 0;
-
 	switch (func & CRYPT_S390_OP_MASK) {
 	case CRYPT_S390_KM:
 		ret = crypt_s390_km(KM_QUERY, &status, NULL, NULL, 0);
diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c
index d4c0d3717543..24c747a0fcc3 100644
--- a/arch/s390/hypfs/hypfs_diag0c.c
+++ b/arch/s390/hypfs/hypfs_diag0c.c
@@ -19,13 +19,9 @@
 static void diag0c(struct hypfs_diag0c_entry *entry)
 {
 	asm volatile (
-#ifdef CONFIG_64BIT
 		"	sam31\n"
 		"	diag	%0,%0,0x0c\n"
 		"	sam64\n"
-#else
-		"	diag %0,%0,0x0c\n"
-#endif
 		: /* no output register */
 		: "a" (entry)
 		: "memory");
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 99824ff8dd35..3f5c799b7fb5 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -21,7 +21,7 @@
 #include <linux/module.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include <asm/ebcdic.h>
 #include "hypfs.h"
 
@@ -437,8 +437,6 @@ struct dentry *hypfs_create_str(struct dentry *dir,
 static const struct file_operations hypfs_file_ops = {
 	.open		= hypfs_open,
 	.release	= hypfs_release,
-	.read		= new_sync_read,
-	.write		= new_sync_write,
 	.read_iter	= hypfs_read_iter,
 	.write_iter	= hypfs_write_iter,
 	.llseek		= no_llseek,
diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h
index 32a705987156..16887c5fd989 100644
--- a/arch/s390/include/asm/appldata.h
+++ b/arch/s390/include/asm/appldata.h
@@ -9,28 +9,6 @@
 
 #include <asm/io.h>
 
-#ifndef CONFIG_64BIT
-
-#define APPLDATA_START_INTERVAL_REC	0x00	/* Function codes for */
-#define APPLDATA_STOP_REC		0x01	/* DIAG 0xDC	      */
-#define APPLDATA_GEN_EVENT_REC		0x02
-#define APPLDATA_START_CONFIG_REC	0x03
-
-/*
- * Parameter list for DIAGNOSE X'DC'
- */
-struct appldata_parameter_list {
-	u16 diag;		/* The DIAGNOSE code X'00DC'	      */
-	u8  function;		/* The function code for the DIAGNOSE */
-	u8  parlist_length;	/* Length of the parameter list       */
-	u32 product_id_addr;	/* Address of the 16-byte product ID  */
-	u16 reserved;
-	u16 buffer_length;	/* Length of the application data buffer  */
-	u32 buffer_addr;	/* Address of the application data buffer */
-} __attribute__ ((packed));
-
-#else /* CONFIG_64BIT */
-
 #define APPLDATA_START_INTERVAL_REC	0x80
 #define APPLDATA_STOP_REC		0x81
 #define APPLDATA_GEN_EVENT_REC		0x82
@@ -51,8 +29,6 @@ struct appldata_parameter_list {
 	u64 buffer_addr;
 } __attribute__ ((packed));
 
-#endif /* CONFIG_64BIT */
-
 struct appldata_product_id {
 	char prod_nr[7];	/* product number */
 	u16  prod_fn;		/* product function */
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index fa934fe080c1..adbe3802e377 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -160,8 +160,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 
 #define ATOMIC64_INIT(i)  { (i) }
 
-#ifdef CONFIG_64BIT
-
 #define __ATOMIC64_NO_BARRIER	"\n"
 
 #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
@@ -274,99 +272,6 @@ static inline long long atomic64_cmpxchg(atomic64_t *v,
 
 #undef __ATOMIC64_LOOP
 
-#else /* CONFIG_64BIT */
-
-typedef struct {
-	long long counter;
-} atomic64_t;
-
-static inline long long atomic64_read(const atomic64_t *v)
-{
-	register_pair rp;
-
-	asm volatile(
-		"	lm	%0,%N0,%1"
-		: "=&d" (rp) : "Q" (v->counter)	);
-	return rp.pair;
-}
-
-static inline void atomic64_set(atomic64_t *v, long long i)
-{
-	register_pair rp = {.pair = i};
-
-	asm volatile(
-		"	stm	%1,%N1,%0"
-		: "=Q" (v->counter) : "d" (rp) );
-}
-
-static inline long long atomic64_xchg(atomic64_t *v, long long new)
-{
-	register_pair rp_new = {.pair = new};
-	register_pair rp_old;
-
-	asm volatile(
-		"	lm	%0,%N0,%1\n"
-		"0:	cds	%0,%2,%1\n"
-		"	jl	0b\n"
-		: "=&d" (rp_old), "+Q" (v->counter)
-		: "d" (rp_new)
-		: "cc");
-	return rp_old.pair;
-}
-
-static inline long long atomic64_cmpxchg(atomic64_t *v,
-					 long long old, long long new)
-{
-	register_pair rp_old = {.pair = old};
-	register_pair rp_new = {.pair = new};
-
-	asm volatile(
-		"	cds	%0,%2,%1"
-		: "+&d" (rp_old), "+Q" (v->counter)
-		: "d" (rp_new)
-		: "cc");
-	return rp_old.pair;
-}
-
-
-static inline long long atomic64_add_return(long long i, atomic64_t *v)
-{
-	long long old, new;
-
-	do {
-		old = atomic64_read(v);
-		new = old + i;
-	} while (atomic64_cmpxchg(v, old, new) != old);
-	return new;
-}
-
-static inline void atomic64_set_mask(unsigned long long mask, atomic64_t *v)
-{
-	long long old, new;
-
-	do {
-		old = atomic64_read(v);
-		new = old | mask;
-	} while (atomic64_cmpxchg(v, old, new) != old);
-}
-
-static inline void atomic64_clear_mask(unsigned long long mask, atomic64_t *v)
-{
-	long long old, new;
-
-	do {
-		old = atomic64_read(v);
-		new = old & mask;
-	} while (atomic64_cmpxchg(v, old, new) != old);
-}
-
-static inline void atomic64_add(long long i, atomic64_t *v)
-{
-	atomic64_add_return(i, v);
-}
-
-#endif /* CONFIG_64BIT */
-
 static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u)
 {
 	long long c, old;
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 520542477678..9b68e98a724f 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -51,32 +51,6 @@
 
 #define __BITOPS_NO_BARRIER	"\n"
 
-#ifndef CONFIG_64BIT
-
-#define __BITOPS_OR		"or"
-#define __BITOPS_AND		"nr"
-#define __BITOPS_XOR		"xr"
-#define __BITOPS_BARRIER	"\n"
-
-#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier)	\
-({								\
-	unsigned long __old, __new;				\
-								\
-	typecheck(unsigned long *, (__addr));			\
-	asm volatile(						\
-		"	l	%0,%2\n"			\
-		"0:	lr	%1,%0\n"			\
-		__op_string "	%1,%3\n"			\
-		"	cs	%0,%1,%2\n"			\
-		"	jl	0b"				\
-		: "=&d" (__old), "=&d" (__new), "+Q" (*(__addr))\
-		: "d" (__val)					\
-		: "cc", "memory");				\
-	__old;							\
-})
-
-#else /* CONFIG_64BIT */
-
 #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
 
 #define __BITOPS_OR		"laog"
@@ -125,8 +99,6 @@
 
 #endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
 
-#endif /* CONFIG_64BIT */
-
 #define __BITOPS_WORDS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG)
 
 static inline unsigned long *
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 6259895fcd97..4eadec466b8c 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -80,15 +80,10 @@ extern void __cmpxchg_double_called_with_bad_pointer(void);
 ({									\
 	__typeof__(p1) __p1 = (p1);					\
 	__typeof__(p2) __p2 = (p2);					\
-	int __ret;							\
 	BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long));			\
 	BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long));			\
 	VM_BUG_ON((unsigned long)((__p1) + 1) != (unsigned long)(__p2));\
-	if (sizeof(long) == 4)						\
-		__ret = __cmpxchg_double_4(__p1, __p2, o1, o2, n1, n2);	\
-	else								\
-		__ret = __cmpxchg_double_8(__p1, __p2, o1, o2, n1, n2);	\
-	__ret;								\
+	__cmpxchg_double_8(__p1, __p2, o1, o2, n1, n2);			\
 })
 
 #define system_has_cmpxchg_double()	1
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index b91e960e4045..221b454c734a 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -22,15 +22,7 @@ typedef unsigned long long __nocast cputime64_t;
 
 static inline unsigned long __div(unsigned long long n, unsigned long base)
 {
-#ifndef CONFIG_64BIT
-	register_pair rp;
-
-	rp.pair = n >> 1;
-	asm ("dr %0,%1" : "+d" (rp) : "d" (base >> 1));
-	return rp.subreg.odd;
-#else /* CONFIG_64BIT */
 	return n / base;
-#endif /* CONFIG_64BIT */
 }
 
 #define cputime_one_jiffy		jiffies_to_cputime(1)
@@ -101,17 +93,8 @@ static inline void cputime_to_timespec(const cputime_t cputime,
 				       struct timespec *value)
 {
 	unsigned long long __cputime = (__force unsigned long long) cputime;
-#ifndef CONFIG_64BIT
-	register_pair rp;
-
-	rp.pair = __cputime >> 1;
-	asm ("dr %0,%1" : "+d" (rp) : "d" (CPUTIME_PER_SEC / 2));
-	value->tv_nsec = rp.subreg.even * NSEC_PER_USEC / CPUTIME_PER_USEC;
-	value->tv_sec = rp.subreg.odd;
-#else
 	value->tv_nsec = (__cputime % CPUTIME_PER_SEC) * NSEC_PER_USEC / CPUTIME_PER_USEC;
 	value->tv_sec = __cputime / CPUTIME_PER_SEC;
-#endif
 }
 
 /*
@@ -129,17 +112,8 @@ static inline void cputime_to_timeval(const cputime_t cputime,
 				      struct timeval *value)
 {
 	unsigned long long __cputime = (__force unsigned long long) cputime;
-#ifndef CONFIG_64BIT
-	register_pair rp;
-
-	rp.pair = __cputime >> 1;
-	asm ("dr %0,%1" : "+d" (rp) : "d" (CPUTIME_PER_USEC / 2));
-	value->tv_usec = rp.subreg.even / CPUTIME_PER_USEC;
-	value->tv_sec = rp.subreg.odd;
-#else
 	value->tv_usec = (__cputime % CPUTIME_PER_SEC) / CPUTIME_PER_USEC;
 	value->tv_sec = __cputime / CPUTIME_PER_SEC;
-#endif
 }
 
 /*
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index 31ab9f346d7e..cfad7fca01d6 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -9,20 +9,12 @@
 
 #include <linux/bug.h>
 
-#ifdef CONFIG_64BIT
-# define __CTL_LOAD	"lctlg"
-# define __CTL_STORE	"stctg"
-#else
-# define __CTL_LOAD	"lctl"
-# define __CTL_STORE	"stctl"
-#endif
-
 #define __ctl_load(array, low, high) {					\
 	typedef struct { char _[sizeof(array)]; } addrtype;		\
 									\
 	BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\
 	asm volatile(							\
-		__CTL_LOAD " %1,%2,%0\n"				\
+		"	lctlg	%1,%2,%0\n"				\
 		: : "Q" (*(addrtype *)(&array)), "i" (low), "i" (high));\
 }
 
@@ -31,7 +23,7 @@
 									\
 	BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\
 	asm volatile(							\
-		__CTL_STORE " %1,%2,%0\n"				\
+		"	stctg	%1,%2,%0\n"				\
 		: "=Q" (*(addrtype *)(&array))				\
 		: "i" (low), "i" (high));				\
 }
@@ -60,9 +52,7 @@ void smp_ctl_clear_bit(int cr, int bit);
 union ctlreg0 {
 	unsigned long val;
 	struct {
-#ifdef CONFIG_64BIT
 		unsigned long	   : 32;
-#endif
 		unsigned long	   : 3;
 		unsigned long lap  : 1; /* Low-address-protection control */
 		unsigned long	   : 4;
diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h
index 709955ddaa4d..9d395961e713 100644
--- a/arch/s390/include/asm/dma-mapping.h
+++ b/arch/s390/include/asm/dma-mapping.h
@@ -42,7 +42,7 @@ static inline int dma_supported(struct device *dev, u64 mask)
 static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 {
 	if (!dev->dma_mask)
-		return 0;
+		return false;
 	return addr + size - 1 <= *dev->dma_mask;
 }
 
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index c9df40b5c0ac..3ad48f22de78 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -107,11 +107,7 @@
 /*
  * These are used to set parameters in the core dumps.
  */
-#ifndef CONFIG_64BIT
-#define ELF_CLASS	ELFCLASS32
-#else /* CONFIG_64BIT */
 #define ELF_CLASS	ELFCLASS64
-#endif /* CONFIG_64BIT */
 #define ELF_DATA	ELFDATA2MSB
 #define ELF_ARCH	EM_S390
 
@@ -161,10 +157,11 @@ extern unsigned int vdso_enabled;
 /* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
    use of this is to invoke "./ld.so someprog" to test out a new version of
    the loader.  We need to make sure that it is out of the way of the program
-   that it will "exec", and that there is sufficient room for the brk.  */
-
-extern unsigned long randomize_et_dyn(void);
-#define ELF_ET_DYN_BASE		randomize_et_dyn()
+   that it will "exec", and that there is sufficient room for the brk. 64-bit
+   tasks are aligned to 4GB. */
+#define ELF_ET_DYN_BASE (is_32bit_task() ? \
+				(STACK_TOP / 3 * 2) : \
+				(STACK_TOP / 3 * 2) & ~((1UL << 32) - 1))
 
 /* This yields a mask that user programs can use to figure out what
    instruction set this CPU supports. */
@@ -211,7 +208,7 @@ do {								\
 
 extern unsigned long mmap_rnd_mask;
 
-#define STACK_RND_MASK	(mmap_rnd_mask)
+#define STACK_RND_MASK	(test_thread_flag(TIF_31BIT) ? 0x7ff : mmap_rnd_mask)
 
 #define ARCH_DLINFO							    \
 do {									    \
@@ -225,9 +222,6 @@ struct linux_binprm;
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
 int arch_setup_additional_pages(struct linux_binprm *, int);
 
-extern unsigned long arch_randomize_brk(struct mm_struct *mm);
-#define arch_randomize_brk arch_randomize_brk
-
 void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vxrs);
 
 #endif
diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h
index ea5a6e45fd93..a7b2d7504049 100644
--- a/arch/s390/include/asm/idals.h
+++ b/arch/s390/include/asm/idals.h
@@ -19,11 +19,7 @@
 #include <asm/cio.h>
 #include <asm/uaccess.h>
 
-#ifdef CONFIG_64BIT
 #define IDA_SIZE_LOG 12 /* 11 for 2k , 12 for 4k */
-#else
-#define IDA_SIZE_LOG 11 /* 11 for 2k , 12 for 4k */
-#endif
 #define IDA_BLOCK_SIZE (1L<<IDA_SIZE_LOG)
 
 /*
@@ -32,11 +28,7 @@
 static inline int
 idal_is_needed(void *vaddr, unsigned int length)
 {
-#ifdef CONFIG_64BIT
 	return ((__pa(vaddr) + length - 1) >> 31) != 0;
-#else
-	return 0;
-#endif
 }
 
 
@@ -77,7 +69,6 @@ static inline unsigned long *idal_create_words(unsigned long *idaws,
 static inline int
 set_normalized_cda(struct ccw1 * ccw, void *vaddr)
 {
-#ifdef CONFIG_64BIT
 	unsigned int nridaws;
 	unsigned long *idal;
 
@@ -93,7 +84,6 @@ set_normalized_cda(struct ccw1 * ccw, void *vaddr)
 		ccw->flags |= CCW_FLAG_IDA;
 		vaddr = idal;
 	}
-#endif
 	ccw->cda = (__u32)(unsigned long) vaddr;
 	return 0;
 }
@@ -104,12 +94,10 @@ set_normalized_cda(struct ccw1 * ccw, void *vaddr)
 static inline void
 clear_normalized_cda(struct ccw1 * ccw)
 {
-#ifdef CONFIG_64BIT
 	if (ccw->flags & CCW_FLAG_IDA) {
 		kfree((void *)(unsigned long) ccw->cda);
 		ccw->flags &= ~CCW_FLAG_IDA;
 	}
-#endif
 	ccw->cda = 0;
 }
 
@@ -181,12 +169,8 @@ idal_buffer_free(struct idal_buffer *ib)
 static inline int
 __idal_buffer_is_needed(struct idal_buffer *ib)
 {
-#ifdef CONFIG_64BIT
 	return ib->size > (4096ul << ib->page_order) ||
 		idal_is_needed(ib->data[0], ib->size);
-#else
-	return ib->size > (4096ul << ib->page_order);
-#endif
 }
 
 /*
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index 343ea7c987aa..ff95d15a2384 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -57,7 +57,6 @@ enum interruption_class {
 	IRQIO_TAP,
 	IRQIO_VMR,
 	IRQIO_LCS,
-	IRQIO_CLW,
 	IRQIO_CTC,
 	IRQIO_APB,
 	IRQIO_ADM,
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 58642fd29c87..69972b7957ee 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -1,19 +1,13 @@
 #ifndef _ASM_S390_JUMP_LABEL_H
 #define _ASM_S390_JUMP_LABEL_H
 
+#ifndef __ASSEMBLY__
+
 #include <linux/types.h>
 
 #define JUMP_LABEL_NOP_SIZE 6
 #define JUMP_LABEL_NOP_OFFSET 2
 
-#ifdef CONFIG_64BIT
-#define ASM_PTR ".quad"
-#define ASM_ALIGN ".balign 8"
-#else
-#define ASM_PTR ".long"
-#define ASM_ALIGN ".balign 4"
-#endif
-
 /*
  * We use a brcl 0,2 instruction for jump labels at compile time so it
  * can be easily distinguished from a hotpatch generated instruction.
@@ -22,8 +16,8 @@ static __always_inline bool arch_static_branch(struct static_key *key)
 {
 	asm_volatile_goto("0:	brcl 0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n"
 		".pushsection __jump_table, \"aw\"\n"
-		ASM_ALIGN "\n"
-		ASM_PTR " 0b, %l[label], %0\n"
+		".balign 8\n"
+		".quad 0b, %l[label], %0\n"
 		".popsection\n"
 		: : "X" (key) : : label);
 	return false;
@@ -39,4 +33,5 @@ struct jump_entry {
 	jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index d84559e31f32..d01fc588b5c3 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -172,7 +172,9 @@ struct kvm_s390_sie_block {
 	__u32	fac;			/* 0x01a0 */
 	__u8	reserved1a4[20];	/* 0x01a4 */
 	__u64	cbrlo;			/* 0x01b8 */
-	__u8	reserved1c0[30];	/* 0x01c0 */
+	__u8	reserved1c0[8];		/* 0x01c0 */
+	__u32	ecd;			/* 0x01c8 */
+	__u8	reserved1cc[18];	/* 0x01cc */
 	__u64	pp;			/* 0x01de */
 	__u8	reserved1e6[2];		/* 0x01e6 */
 	__u64	itdba;			/* 0x01e8 */
@@ -183,11 +185,17 @@ struct kvm_s390_itdb {
 	__u8	data[256];
 } __packed;
 
+struct kvm_s390_vregs {
+	__vector128 vrs[32];
+	__u8	reserved200[512];	/* for future vector expansion */
+} __packed;
+
 struct sie_page {
 	struct kvm_s390_sie_block sie_block;
 	__u8 reserved200[1024];		/* 0x0200 */
 	struct kvm_s390_itdb itdb;	/* 0x0600 */
-	__u8 reserved700[2304];		/* 0x0700 */
+	__u8 reserved700[1280];		/* 0x0700 */
+	struct kvm_s390_vregs vregs;	/* 0x0c00 */
 } __packed;
 
 struct kvm_vcpu_stat {
@@ -238,6 +246,7 @@ struct kvm_vcpu_stat {
 	u32 instruction_sigp_stop;
 	u32 instruction_sigp_stop_store_status;
 	u32 instruction_sigp_store_status;
+	u32 instruction_sigp_store_adtl_status;
 	u32 instruction_sigp_arch;
 	u32 instruction_sigp_prefix;
 	u32 instruction_sigp_restart;
@@ -270,6 +279,7 @@ struct kvm_vcpu_stat {
 #define PGM_SPECIAL_OPERATION		0x13
 #define PGM_OPERAND			0x15
 #define PGM_TRACE_TABEL			0x16
+#define PGM_VECTOR_PROCESSING		0x1b
 #define PGM_SPACE_SWITCH		0x1c
 #define PGM_HFP_SQUARE_ROOT		0x1d
 #define PGM_PC_TRANSLATION_SPEC		0x1f
@@ -334,6 +344,11 @@ enum irq_types {
 	IRQ_PEND_COUNT
 };
 
+/* We have 2M for virtio device descriptor pages. Smallest amount of
+ * memory per page is 24 bytes (1 queue), so (2048*1024) / 24 = 87381
+ */
+#define KVM_S390_MAX_VIRTIO_IRQS 87381
+
 /*
  * Repressible (non-floating) machine check interrupts
  * subclass bits in MCIC
@@ -411,13 +426,32 @@ struct kvm_s390_local_interrupt {
 	unsigned long pending_irqs;
 };
 
+#define FIRQ_LIST_IO_ISC_0 0
+#define FIRQ_LIST_IO_ISC_1 1
+#define FIRQ_LIST_IO_ISC_2 2
+#define FIRQ_LIST_IO_ISC_3 3
+#define FIRQ_LIST_IO_ISC_4 4
+#define FIRQ_LIST_IO_ISC_5 5
+#define FIRQ_LIST_IO_ISC_6 6
+#define FIRQ_LIST_IO_ISC_7 7
+#define FIRQ_LIST_PFAULT   8
+#define FIRQ_LIST_VIRTIO   9
+#define FIRQ_LIST_COUNT   10
+#define FIRQ_CNTR_IO       0
+#define FIRQ_CNTR_SERVICE  1
+#define FIRQ_CNTR_VIRTIO   2
+#define FIRQ_CNTR_PFAULT   3
+#define FIRQ_MAX_COUNT     4
+
 struct kvm_s390_float_interrupt {
+	unsigned long pending_irqs;
 	spinlock_t lock;
-	struct list_head list;
-	atomic_t active;
+	struct list_head lists[FIRQ_LIST_COUNT];
+	int counters[FIRQ_MAX_COUNT];
+	struct kvm_s390_mchk_info mchk;
+	struct kvm_s390_ext_info srv_signal;
 	int next_rr_cpu;
 	unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
-	unsigned int irq_count;
 };
 
 struct kvm_hw_wp_info_arch {
@@ -465,6 +499,7 @@ struct kvm_vcpu_arch {
 	s390_fp_regs      host_fpregs;
 	unsigned int      host_acrs[NUM_ACRS];
 	s390_fp_regs      guest_fpregs;
+	struct kvm_s390_vregs	*host_vregs;
 	struct kvm_s390_local_interrupt local_int;
 	struct hrtimer    ckc_timer;
 	struct kvm_s390_pgm_info pgm;
@@ -515,15 +550,15 @@ struct s390_io_adapter {
 #define S390_ARCH_FAC_MASK_SIZE_U64 \
 	(S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64))
 
-struct s390_model_fac {
-	/* facilities used in SIE context */
-	__u64 sie[S390_ARCH_FAC_LIST_SIZE_U64];
-	/* subset enabled by kvm */
-	__u64 kvm[S390_ARCH_FAC_LIST_SIZE_U64];
+struct kvm_s390_fac {
+	/* facility list requested by guest */
+	__u64 list[S390_ARCH_FAC_LIST_SIZE_U64];
+	/* facility mask supported by kvm & hosting machine */
+	__u64 mask[S390_ARCH_FAC_LIST_SIZE_U64];
 };
 
 struct kvm_s390_cpu_model {
-	struct s390_model_fac *fac;
+	struct kvm_s390_fac *fac;
 	struct cpuid cpu_id;
 	unsigned short ibc;
 };
@@ -553,6 +588,7 @@ struct kvm_arch{
 	int use_cmma;
 	int user_cpu_state_ctrl;
 	int user_sigp;
+	int user_stsi;
 	struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
 	wait_queue_head_t ipte_wq;
 	int ipte_lock_count;
diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h
new file mode 100644
index 000000000000..7aa799134a11
--- /dev/null
+++ b/arch/s390/include/asm/livepatch.h
@@ -0,0 +1,43 @@
+/*
+ * livepatch.h - s390-specific Kernel Live Patching Core
+ *
+ *  Copyright (c) 2013-2015 SUSE
+ *   Authors: Jiri Kosina
+ *	      Vojtech Pavlik
+ *	      Jiri Slaby
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#ifndef ASM_LIVEPATCH_H
+#define ASM_LIVEPATCH_H
+
+#include <linux/module.h>
+
+#ifdef CONFIG_LIVEPATCH
+static inline int klp_check_compiler_support(void)
+{
+	return 0;
+}
+
+static inline int klp_write_module_reloc(struct module *mod, unsigned long
+		type, unsigned long loc, unsigned long value)
+{
+	/* not supported yet */
+	return -ENOSYS;
+}
+
+static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
+{
+	regs->psw.addr = ip;
+}
+#else
+#error Live patching support is disabled; check CONFIG_LIVEPATCH
+#endif
+
+#endif
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 34fbcac61133..663f23e37460 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -13,163 +13,6 @@
 #include <asm/cpu.h>
 #include <asm/types.h>
 
-#ifdef CONFIG_32BIT
-
-#define LC_ORDER 0
-#define LC_PAGES 1
-
-struct save_area {
-	u32	ext_save;
-	u64	timer;
-	u64	clk_cmp;
-	u8	pad1[24];
-	u8	psw[8];
-	u32	pref_reg;
-	u8	pad2[20];
-	u32	acc_regs[16];
-	u64	fp_regs[4];
-	u32	gp_regs[16];
-	u32	ctrl_regs[16];
-} __packed;
-
-struct save_area_ext {
-	struct save_area	sa;
-	__vector128		vx_regs[32];
-};
-
-struct _lowcore {
-	psw_t	restart_psw;			/* 0x0000 */
-	psw_t	restart_old_psw;		/* 0x0008 */
-	__u8	pad_0x0010[0x0014-0x0010];	/* 0x0010 */
-	__u32	ipl_parmblock_ptr;		/* 0x0014 */
-	psw_t	external_old_psw;		/* 0x0018 */
-	psw_t	svc_old_psw;			/* 0x0020 */
-	psw_t	program_old_psw;		/* 0x0028 */
-	psw_t	mcck_old_psw;			/* 0x0030 */
-	psw_t	io_old_psw;			/* 0x0038 */
-	__u8	pad_0x0040[0x0058-0x0040];	/* 0x0040 */
-	psw_t	external_new_psw;		/* 0x0058 */
-	psw_t	svc_new_psw;			/* 0x0060 */
-	psw_t	program_new_psw;		/* 0x0068 */
-	psw_t	mcck_new_psw;			/* 0x0070 */
-	psw_t	io_new_psw;			/* 0x0078 */
-	__u32	ext_params;			/* 0x0080 */
-	__u16	ext_cpu_addr;			/* 0x0084 */
-	__u16	ext_int_code;			/* 0x0086 */
-	__u16	svc_ilc;			/* 0x0088 */
-	__u16	svc_code;			/* 0x008a */
-	__u16	pgm_ilc;			/* 0x008c */
-	__u16	pgm_code;			/* 0x008e */
-	__u32	trans_exc_code;			/* 0x0090 */
-	__u16	mon_class_num;			/* 0x0094 */
-	__u8	per_code;			/* 0x0096 */
-	__u8	per_atmid;			/* 0x0097 */
-	__u32	per_address;			/* 0x0098 */
-	__u32	monitor_code;			/* 0x009c */
-	__u8	exc_access_id;			/* 0x00a0 */
-	__u8	per_access_id;			/* 0x00a1 */
-	__u8	op_access_id;			/* 0x00a2 */
-	__u8	ar_mode_id;			/* 0x00a3 */
-	__u8	pad_0x00a4[0x00b8-0x00a4];	/* 0x00a4 */
-	__u16	subchannel_id;			/* 0x00b8 */
-	__u16	subchannel_nr;			/* 0x00ba */
-	__u32	io_int_parm;			/* 0x00bc */
-	__u32	io_int_word;			/* 0x00c0 */
-	__u8	pad_0x00c4[0x00c8-0x00c4];	/* 0x00c4 */
-	__u32	stfl_fac_list;			/* 0x00c8 */
-	__u8	pad_0x00cc[0x00d4-0x00cc];	/* 0x00cc */
-	__u32	extended_save_area_addr;	/* 0x00d4 */
-	__u32	cpu_timer_save_area[2];		/* 0x00d8 */
-	__u32	clock_comp_save_area[2];	/* 0x00e0 */
-	__u32	mcck_interruption_code[2];	/* 0x00e8 */
-	__u8	pad_0x00f0[0x00f4-0x00f0];	/* 0x00f0 */
-	__u32	external_damage_code;		/* 0x00f4 */
-	__u32	failing_storage_address;	/* 0x00f8 */
-	__u8	pad_0x00fc[0x0100-0x00fc];	/* 0x00fc */
-	psw_t	psw_save_area;			/* 0x0100 */
-	__u32	prefixreg_save_area;		/* 0x0108 */
-	__u8	pad_0x010c[0x0120-0x010c];	/* 0x010c */
-
-	/* CPU register save area: defined by architecture */
-	__u32	access_regs_save_area[16];	/* 0x0120 */
-	__u32	floating_pt_save_area[8];	/* 0x0160 */
-	__u32	gpregs_save_area[16];		/* 0x0180 */
-	__u32	cregs_save_area[16];		/* 0x01c0 */
-
-	/* Save areas. */
-	__u32	save_area_sync[8];		/* 0x0200 */
-	__u32	save_area_async[8];		/* 0x0220 */
-	__u32	save_area_restart[1];		/* 0x0240 */
-
-	/* CPU flags. */
-	__u32	cpu_flags;			/* 0x0244 */
-
-	/* Return psws. */
-	psw_t	return_psw;			/* 0x0248 */
-	psw_t	return_mcck_psw;		/* 0x0250 */
-
-	/* CPU time accounting values */
-	__u64	sync_enter_timer;		/* 0x0258 */
-	__u64	async_enter_timer;		/* 0x0260 */
-	__u64	mcck_enter_timer;		/* 0x0268 */
-	__u64	exit_timer;			/* 0x0270 */
-	__u64	user_timer;			/* 0x0278 */
-	__u64	system_timer;			/* 0x0280 */
-	__u64	steal_timer;			/* 0x0288 */
-	__u64	last_update_timer;		/* 0x0290 */
-	__u64	last_update_clock;		/* 0x0298 */
-	__u64	int_clock;			/* 0x02a0 */
-	__u64	mcck_clock;			/* 0x02a8 */
-	__u64	clock_comparator;		/* 0x02b0 */
-
-	/* Current process. */
-	__u32	current_task;			/* 0x02b8 */
-	__u32	thread_info;			/* 0x02bc */
-	__u32	kernel_stack;			/* 0x02c0 */
-
-	/* Interrupt, panic and restart stack. */
-	__u32	async_stack;			/* 0x02c4 */
-	__u32	panic_stack;			/* 0x02c8 */
-	__u32	restart_stack;			/* 0x02cc */
-
-	/* Restart function and parameter. */
-	__u32	restart_fn;			/* 0x02d0 */
-	__u32	restart_data;			/* 0x02d4 */
-	__u32	restart_source;			/* 0x02d8 */
-
-	/* Address space pointer. */
-	__u32	kernel_asce;			/* 0x02dc */
-	__u32	user_asce;			/* 0x02e0 */
-	__u32	current_pid;			/* 0x02e4 */
-
-	/* SMP info area */
-	__u32	cpu_nr;				/* 0x02e8 */
-	__u32	softirq_pending;		/* 0x02ec */
-	__u32	percpu_offset;			/* 0x02f0 */
-	__u32	machine_flags;			/* 0x02f4 */
-	__u8	pad_0x02f8[0x02fc-0x02f8];	/* 0x02f8 */
-	__u32	spinlock_lockval;		/* 0x02fc */
-
-	__u8	pad_0x0300[0x0e00-0x0300];	/* 0x0300 */
-
-	/*
-	 * 0xe00 contains the address of the IPL Parameter Information
-	 * block. Dump tools need IPIB for IPL after dump.
-	 * Note: do not change the position of any fields in 0x0e00-0x0f00
-	 */
-	__u32	ipib;				/* 0x0e00 */
-	__u32	ipib_checksum;			/* 0x0e04 */
-	__u32	vmcore_info;			/* 0x0e08 */
-	__u8	pad_0x0e0c[0x0e18-0x0e0c];	/* 0x0e0c */
-	__u32	os_info;			/* 0x0e18 */
-	__u8	pad_0x0e1c[0x0f00-0x0e1c];	/* 0x0e1c */
-
-	/* Extended facility list */
-	__u64	stfle_fac_list[32];		/* 0x0f00 */
-} __packed;
-
-#else /* CONFIG_32BIT */
-
 #define LC_ORDER 1
 #define LC_PAGES 2
 
@@ -354,8 +197,6 @@ struct _lowcore {
 	__u8	vector_save_area[1024];		/* 0x1c00 */
 } __packed;
 
-#endif /* CONFIG_32BIT */
-
 #define S390_lowcore (*((struct _lowcore *) 0))
 
 extern struct _lowcore *lowcore_ptr[];
diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h
index 9977e08df5bd..b55a59e1d134 100644
--- a/arch/s390/include/asm/mman.h
+++ b/arch/s390/include/asm/mman.h
@@ -8,7 +8,7 @@
 
 #include <uapi/asm/mman.h>
 
-#if !defined(__ASSEMBLY__) && defined(CONFIG_64BIT)
+#ifndef __ASSEMBLY__
 int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags);
 #define arch_mmap_check(addr, len, flags) s390_mmap_check(addr, len, flags)
 #endif
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index f49b71954654..d25d9ff10ba8 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -19,9 +19,7 @@ static inline int init_new_context(struct task_struct *tsk,
 	atomic_set(&mm->context.attach_count, 0);
 	mm->context.flush_mm = 0;
 	mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS;
-#ifdef CONFIG_64BIT
 	mm->context.asce_bits |= _ASCE_TYPE_REGION3;
-#endif
 	mm->context.has_pgste = 0;
 	mm->context.use_skey = 0;
 	mm->context.asce_limit = STACK_TOP_MAX;
@@ -62,6 +60,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 {
 	int cpu = smp_processor_id();
 
+	S390_lowcore.user_asce = next->context.asce_bits | __pa(next->pgd);
 	if (prev == next)
 		return;
 	if (MACHINE_HAS_TLB_LC)
@@ -73,7 +72,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	atomic_dec(&prev->context.attach_count);
 	if (MACHINE_HAS_TLB_LC)
 		cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
-	S390_lowcore.user_asce = next->context.asce_bits | __pa(next->pgd);
 }
 
 #define finish_arch_post_lock_switch finish_arch_post_lock_switch
@@ -110,10 +108,8 @@ static inline void activate_mm(struct mm_struct *prev,
 static inline void arch_dup_mmap(struct mm_struct *oldmm,
 				 struct mm_struct *mm)
 {
-#ifdef CONFIG_64BIT
 	if (oldmm->context.asce_limit < mm->context.asce_limit)
 		crst_table_downgrade(mm, oldmm->context.asce_limit);
-#endif
 }
 
 static inline void arch_exit_mmap(struct mm_struct *mm)
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 7b2ac6e44166..53eacbd4f09b 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -37,16 +37,7 @@ static inline void storage_key_init_range(unsigned long start, unsigned long end
 #endif
 }
 
-static inline void clear_page(void *page)
-{
-	register unsigned long reg1 asm ("1") = 0;
-	register void *reg2 asm ("2") = page;
-	register unsigned long reg3 asm ("3") = 4096;
-	asm volatile(
-		"	mvcl	2,0"
-		: "+d" (reg2), "+d" (reg3) : "d" (reg1)
-		: "memory", "cc");
-}
+#define clear_page(page)	memset((page), 0, PAGE_SIZE)
 
 /*
  * copy_page uses the mvcl instruction with 0xb0 padding byte in order to
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index ef803c202d42..a648338c434a 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -7,6 +7,7 @@
 #define PCI_BAR_COUNT	6
 
 #include <linux/pci.h>
+#include <linux/mutex.h>
 #include <asm-generic/pci.h>
 #include <asm-generic/pci-dma-compat.h>
 #include <asm/pci_clp.h>
@@ -44,10 +45,6 @@ struct zpci_fmb {
 	u64 rpcit_ops;
 	u64 dma_rbytes;
 	u64 dma_wbytes;
-	/* software counters */
-	atomic64_t allocated_pages;
-	atomic64_t mapped_pages;
-	atomic64_t unmapped_pages;
 } __packed __aligned(16);
 
 enum zpci_state {
@@ -80,6 +77,7 @@ struct zpci_dev {
 	u8		pft;		/* pci function type */
 	u16		domain;
 
+	struct mutex lock;
 	u8 pfip[CLP_PFIP_NR_SEGMENTS];	/* pci function internal path */
 	u32 uid;			/* user defined id */
 	u8 util_str[CLP_UTIL_STR_LEN];	/* utility string */
@@ -111,6 +109,10 @@ struct zpci_dev {
 	/* Function measurement block */
 	struct zpci_fmb *fmb;
 	u16		fmb_update;	/* update interval */
+	/* software counters */
+	atomic64_t allocated_pages;
+	atomic64_t mapped_pages;
+	atomic64_t unmapped_pages;
 
 	enum pci_bus_speed max_bus_speed;
 
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index 933355e0d091..6d6556ca24aa 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -10,8 +10,6 @@
  */
 #define __my_cpu_offset S390_lowcore.percpu_offset
 
-#ifdef CONFIG_64BIT
-
 /*
  * For 64 bit module code, the module may be more than 4G above the
  * per cpu area, use weak definitions to force the compiler to
@@ -183,8 +181,6 @@
 #define this_cpu_cmpxchg_double_4 arch_this_cpu_cmpxchg_double
 #define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double
 
-#endif /* CONFIG_64BIT */
-
 #include <asm-generic/percpu.h>
 
 #endif /* __ARCH_S390_PERCPU__ */
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 159a8ec6da9a..4cb19fe76dd9 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -9,8 +9,6 @@
 #ifndef _ASM_S390_PERF_EVENT_H
 #define _ASM_S390_PERF_EVENT_H
 
-#ifdef CONFIG_64BIT
-
 #include <linux/perf_event.h>
 #include <linux/device.h>
 #include <asm/cpu_mf.h>
@@ -92,5 +90,4 @@ struct sf_raw_sample {
 int perf_reserve_sampling(void);
 void perf_release_sampling(void);
 
-#endif /* CONFIG_64BIT */
 #endif /* _ASM_S390_PERF_EVENT_H */
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 3009c2ba46d2..51e7fb634ebc 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -33,11 +33,7 @@ static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
 	*s = val;
 	n = (n / 256) - 1;
 	asm volatile(
-#ifdef CONFIG_64BIT
 		"	mvc	8(248,%0),0(%0)\n"
-#else
-		"	mvc	4(252,%0),0(%0)\n"
-#endif
 		"0:	mvc	256(256,%0),0(%0)\n"
 		"	la	%0,256(%0)\n"
 		"	brct	%1,0b\n"
@@ -50,24 +46,6 @@ static inline void crst_table_init(unsigned long *crst, unsigned long entry)
 	clear_table(crst, entry, sizeof(unsigned long)*2048);
 }
 
-#ifndef CONFIG_64BIT
-
-static inline unsigned long pgd_entry_type(struct mm_struct *mm)
-{
-	return _SEGMENT_ENTRY_EMPTY;
-}
-
-#define pud_alloc_one(mm,address)		({ BUG(); ((pud_t *)2); })
-#define pud_free(mm, x)				do { } while (0)
-
-#define pmd_alloc_one(mm,address)		({ BUG(); ((pmd_t *)2); })
-#define pmd_free(mm, x)				do { } while (0)
-
-#define pgd_populate(mm, pgd, pud)		BUG()
-#define pud_populate(mm, pud, pmd)		BUG()
-
-#else /* CONFIG_64BIT */
-
 static inline unsigned long pgd_entry_type(struct mm_struct *mm)
 {
 	if (mm->context.asce_limit <= (1UL << 31))
@@ -119,8 +97,6 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 	pud_val(*pud) = _REGION3_ENTRY | __pa(pmd);
 }
 
-#endif /* CONFIG_64BIT */
-
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	spin_lock_init(&mm->context.list_lock);
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index e08ec38f8c6e..989cfae9e202 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -66,15 +66,9 @@ extern unsigned long zero_page_mask;
  * table can map
  * PGDIR_SHIFT determines what a third-level page table entry can map
  */
-#ifndef CONFIG_64BIT
-# define PMD_SHIFT	20
-# define PUD_SHIFT	20
-# define PGDIR_SHIFT	20
-#else /* CONFIG_64BIT */
-# define PMD_SHIFT	20
-# define PUD_SHIFT	31
-# define PGDIR_SHIFT	42
-#endif /* CONFIG_64BIT */
+#define PMD_SHIFT	20
+#define PUD_SHIFT	31
+#define PGDIR_SHIFT	42
 
 #define PMD_SIZE        (1UL << PMD_SHIFT)
 #define PMD_MASK        (~(PMD_SIZE-1))
@@ -90,15 +84,8 @@ extern unsigned long zero_page_mask;
  * that leads to 1024 pte per pgd
  */
 #define PTRS_PER_PTE	256
-#ifndef CONFIG_64BIT
-#define __PAGETABLE_PUD_FOLDED
-#define PTRS_PER_PMD	1
-#define __PAGETABLE_PMD_FOLDED
-#define PTRS_PER_PUD	1
-#else /* CONFIG_64BIT */
 #define PTRS_PER_PMD	2048
 #define PTRS_PER_PUD	2048
-#endif /* CONFIG_64BIT */
 #define PTRS_PER_PGD	2048
 
 #define FIRST_USER_ADDRESS  0UL
@@ -127,23 +114,19 @@ extern struct page *vmemmap;
 
 #define VMEM_MAX_PHYS ((unsigned long) vmemmap)
 
-#ifdef CONFIG_64BIT
 extern unsigned long MODULES_VADDR;
 extern unsigned long MODULES_END;
 #define MODULES_VADDR	MODULES_VADDR
 #define MODULES_END	MODULES_END
 #define MODULES_LEN	(1UL << 31)
-#endif
 
 static inline int is_module_addr(void *addr)
 {
-#ifdef CONFIG_64BIT
 	BUILD_BUG_ON(MODULES_LEN > (1UL << 31));
 	if (addr < (void *)MODULES_VADDR)
 		return 0;
 	if (addr > (void *)MODULES_END)
 		return 0;
-#endif
 	return 1;
 }
 
@@ -284,56 +267,6 @@ static inline int is_module_addr(void *addr)
  * pte_swap    is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402
  */
 
-#ifndef CONFIG_64BIT
-
-/* Bits in the segment table address-space-control-element */
-#define _ASCE_SPACE_SWITCH	0x80000000UL	/* space switch event	    */
-#define _ASCE_ORIGIN_MASK	0x7ffff000UL	/* segment table origin	    */
-#define _ASCE_PRIVATE_SPACE	0x100	/* private space control	    */
-#define _ASCE_ALT_EVENT		0x80	/* storage alteration event control */
-#define _ASCE_TABLE_LENGTH	0x7f	/* 128 x 64 entries = 8k	    */
-
-/* Bits in the segment table entry */
-#define _SEGMENT_ENTRY_BITS	0x7fffffffUL	/* Valid segment table bits */
-#define _SEGMENT_ENTRY_ORIGIN	0x7fffffc0UL	/* page table origin	    */
-#define _SEGMENT_ENTRY_PROTECT	0x200	/* page protection bit		    */
-#define _SEGMENT_ENTRY_INVALID	0x20	/* invalid segment table entry	    */
-#define _SEGMENT_ENTRY_COMMON	0x10	/* common segment bit		    */
-#define _SEGMENT_ENTRY_PTL	0x0f	/* page table length		    */
-
-#define _SEGMENT_ENTRY_DIRTY	0	/* No sw dirty bit for 31-bit */
-#define _SEGMENT_ENTRY_YOUNG	0	/* No sw young bit for 31-bit */
-#define _SEGMENT_ENTRY_READ	0	/* No sw read bit for 31-bit */
-#define _SEGMENT_ENTRY_WRITE	0	/* No sw write bit for 31-bit */
-#define _SEGMENT_ENTRY_LARGE	0	/* No large pages for 31-bit */
-#define _SEGMENT_ENTRY_BITS_LARGE 0
-#define _SEGMENT_ENTRY_ORIGIN_LARGE 0
-
-#define _SEGMENT_ENTRY		(_SEGMENT_ENTRY_PTL)
-#define _SEGMENT_ENTRY_EMPTY	(_SEGMENT_ENTRY_INVALID)
-
-/*
- * Segment table entry encoding (I = invalid, R = read-only bit):
- *		..R...I.....
- * prot-none	..1...1.....
- * read-only	..1...0.....
- * read-write	..0...0.....
- * empty	..0...1.....
- */
-
-/* Page status table bits for virtualization */
-#define PGSTE_ACC_BITS	0xf0000000UL
-#define PGSTE_FP_BIT	0x08000000UL
-#define PGSTE_PCL_BIT	0x00800000UL
-#define PGSTE_HR_BIT	0x00400000UL
-#define PGSTE_HC_BIT	0x00200000UL
-#define PGSTE_GR_BIT	0x00040000UL
-#define PGSTE_GC_BIT	0x00020000UL
-#define PGSTE_UC_BIT	0x00008000UL	/* user dirty (migration) */
-#define PGSTE_IN_BIT	0x00004000UL	/* IPTE notify bit */
-
-#else /* CONFIG_64BIT */
-
 /* Bits in the segment/region table address-space-control-element */
 #define _ASCE_ORIGIN		~0xfffUL/* segment table origin		    */
 #define _ASCE_PRIVATE_SPACE	0x100	/* private space control	    */
@@ -417,8 +350,6 @@ static inline int is_module_addr(void *addr)
 #define PGSTE_UC_BIT	0x0000800000000000UL	/* user dirty (migration) */
 #define PGSTE_IN_BIT	0x0000400000000000UL	/* IPTE notify bit */
 
-#endif /* CONFIG_64BIT */
-
 /* Guest Page State used for virtualization */
 #define _PGSTE_GPS_ZERO		0x0000000080000000UL
 #define _PGSTE_GPS_USAGE_MASK	0x0000000003000000UL
@@ -509,19 +440,6 @@ static inline int mm_use_skey(struct mm_struct *mm)
 /*
  * pgd/pmd/pte query functions
  */
-#ifndef CONFIG_64BIT
-
-static inline int pgd_present(pgd_t pgd) { return 1; }
-static inline int pgd_none(pgd_t pgd)    { return 0; }
-static inline int pgd_bad(pgd_t pgd)     { return 0; }
-
-static inline int pud_present(pud_t pud) { return 1; }
-static inline int pud_none(pud_t pud)	 { return 0; }
-static inline int pud_large(pud_t pud)	 { return 0; }
-static inline int pud_bad(pud_t pud)	 { return 0; }
-
-#else /* CONFIG_64BIT */
-
 static inline int pgd_present(pgd_t pgd)
 {
 	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
@@ -583,8 +501,6 @@ static inline int pud_bad(pud_t pud)
 	return (pud_val(pud) & mask) != 0;
 }
 
-#endif /* CONFIG_64BIT */
-
 static inline int pmd_present(pmd_t pmd)
 {
 	return pmd_val(pmd) != _SEGMENT_ENTRY_INVALID;
@@ -916,18 +832,14 @@ static inline int pte_unused(pte_t pte)
 
 static inline void pgd_clear(pgd_t *pgd)
 {
-#ifdef CONFIG_64BIT
 	if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
 		pgd_val(*pgd) = _REGION2_ENTRY_EMPTY;
-#endif
 }
 
 static inline void pud_clear(pud_t *pud)
 {
-#ifdef CONFIG_64BIT
 	if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
 		pud_val(*pud) = _REGION3_ENTRY_EMPTY;
-#endif
 }
 
 static inline void pmd_clear(pmd_t *pmdp)
@@ -1026,10 +938,6 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
 {
 	unsigned long pto = (unsigned long) ptep;
 
-#ifndef CONFIG_64BIT
-	/* pto in ESA mode must point to the start of the segment table */
-	pto &= 0x7ffffc00;
-#endif
 	/* Invalidation + global TLB flush for the pte */
 	asm volatile(
 		"	ipte	%2,%3"
@@ -1040,10 +948,6 @@ static inline void __ptep_ipte_local(unsigned long address, pte_t *ptep)
 {
 	unsigned long pto = (unsigned long) ptep;
 
-#ifndef CONFIG_64BIT
-	/* pto in ESA mode must point to the start of the segment table */
-	pto &= 0x7ffffc00;
-#endif
 	/* Invalidation + local TLB flush for the pte */
 	asm volatile(
 		"	.insn rrf,0xb2210000,%2,%3,0,1"
@@ -1054,10 +958,6 @@ static inline void __ptep_ipte_range(unsigned long address, int nr, pte_t *ptep)
 {
 	unsigned long pto = (unsigned long) ptep;
 
-#ifndef CONFIG_64BIT
-	/* pto in ESA mode must point to the start of the segment table */
-	pto &= 0x7ffffc00;
-#endif
 	/* Invalidate a range of ptes + global TLB flush of the ptes */
 	do {
 		asm volatile(
@@ -1376,17 +1276,6 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
 #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
 #define pgd_offset_k(address) pgd_offset(&init_mm, address)
 
-#ifndef CONFIG_64BIT
-
-#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN)
-#define pud_deref(pmd) ({ BUG(); 0UL; })
-#define pgd_deref(pmd) ({ BUG(); 0UL; })
-
-#define pud_offset(pgd, address) ((pud_t *) pgd)
-#define pmd_offset(pud, address) ((pmd_t *) pud + pmd_index(address))
-
-#else /* CONFIG_64BIT */
-
 #define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN)
 #define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN)
 #define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN)
@@ -1407,8 +1296,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
 	return pmd + pmd_index(address);
 }
 
-#endif /* CONFIG_64BIT */
-
 #define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot))
 #define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
 #define pte_page(x) pfn_to_page(pte_pfn(x))
@@ -1729,11 +1616,9 @@ static inline int has_transparent_hugepage(void)
  *  0000000000111111111122222222223333333333444444444455 5555 5 55566 66
  *  0123456789012345678901234567890123456789012345678901 2345 6 78901 23
  */
-#ifndef CONFIG_64BIT
-#define __SWP_OFFSET_MASK (~0UL >> 12)
-#else
+
 #define __SWP_OFFSET_MASK (~0UL >> 11)
-#endif
+
 static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
 {
 	pte_t pte;
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index e7cbbdcdee13..dedb6218544b 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -19,7 +19,6 @@
 #define _CIF_ASCE		(1<<CIF_ASCE)
 #define _CIF_NOHZ_DELAY		(1<<CIF_NOHZ_DELAY)
 
-
 #ifndef __ASSEMBLY__
 
 #include <linux/linkage.h>
@@ -66,13 +65,6 @@ extern void execve_tail(void);
 /*
  * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
  */
-#ifndef CONFIG_64BIT
-
-#define TASK_SIZE		(1UL << 31)
-#define TASK_MAX_SIZE		(1UL << 31)
-#define TASK_UNMAPPED_BASE	(1UL << 30)
-
-#else /* CONFIG_64BIT */
 
 #define TASK_SIZE_OF(tsk)	((tsk)->mm->context.asce_limit)
 #define TASK_UNMAPPED_BASE	(test_thread_flag(TIF_31BIT) ? \
@@ -80,15 +72,8 @@ extern void execve_tail(void);
 #define TASK_SIZE		TASK_SIZE_OF(current)
 #define TASK_MAX_SIZE		(1UL << 53)
 
-#endif /* CONFIG_64BIT */
-
-#ifndef CONFIG_64BIT
-#define STACK_TOP		(1UL << 31)
-#define STACK_TOP_MAX		(1UL << 31)
-#else /* CONFIG_64BIT */
 #define STACK_TOP		(1UL << (test_thread_flag(TIF_31BIT) ? 31:42))
 #define STACK_TOP_MAX		(1UL << 42)
-#endif /* CONFIG_64BIT */
 
 #define HAVE_ARCH_PICK_MMAP_LAYOUT
 
@@ -115,10 +100,8 @@ struct thread_struct {
 	/* cpu runtime instrumentation */
 	struct runtime_instr_cb *ri_cb;
 	int ri_signum;
-#ifdef CONFIG_64BIT
 	unsigned char trap_tdb[256];	/* Transaction abort diagnose block */
 	__vector128 *vxrs;		/* Vector register save area */
-#endif
 };
 
 /* Flag to disable transactions. */
@@ -181,11 +164,7 @@ struct task_struct;
 struct mm_struct;
 struct seq_file;
 
-#ifdef CONFIG_64BIT
-extern void show_cacheinfo(struct seq_file *m);
-#else
-static inline void show_cacheinfo(struct seq_file *m) { }
-#endif
+void show_cacheinfo(struct seq_file *m);
 
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
@@ -229,11 +208,7 @@ static inline void psw_set_key(unsigned int key)
  */
 static inline void __load_psw(psw_t psw)
 {
-#ifndef CONFIG_64BIT
-	asm volatile("lpsw  %0" : : "Q" (psw) : "cc");
-#else
 	asm volatile("lpswe %0" : : "Q" (psw) : "cc");
-#endif
 }
 
 /*
@@ -247,22 +222,12 @@ static inline void __load_psw_mask (unsigned long mask)
 
 	psw.mask = mask;
 
-#ifndef CONFIG_64BIT
-	asm volatile(
-		"	basr	%0,0\n"
-		"0:	ahi	%0,1f-0b\n"
-		"	st	%0,%O1+4(%R1)\n"
-		"	lpsw	%1\n"
-		"1:"
-		: "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc");
-#else /* CONFIG_64BIT */
 	asm volatile(
 		"	larl	%0,1f\n"
 		"	stg	%0,%O1+8(%R1)\n"
 		"	lpswe	%1\n"
 		"1:"
 		: "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc");
-#endif /* CONFIG_64BIT */
 }
 
 /*
@@ -270,20 +235,12 @@ static inline void __load_psw_mask (unsigned long mask)
  */
 static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc)
 {
-#ifndef CONFIG_64BIT
-	if (psw.addr & PSW_ADDR_AMODE)
-		/* 31 bit mode */
-		return (psw.addr - ilc) | PSW_ADDR_AMODE;
-	/* 24 bit mode */
-	return (psw.addr - ilc) & ((1UL << 24) - 1);
-#else
 	unsigned long mask;
 
 	mask = (psw.mask & PSW_MASK_EA) ? -1UL :
 	       (psw.mask & PSW_MASK_BA) ? (1UL << 31) - 1 :
 					  (1UL << 24) - 1;
 	return (psw.addr - ilc) & mask;
-#endif
 }
 
 /*
@@ -305,26 +262,6 @@ static inline void __noreturn disabled_wait(unsigned long code)
          * Store status and then load disabled wait psw,
          * the processor is dead afterwards
          */
-#ifndef CONFIG_64BIT
-	asm volatile(
-		"	stctl	0,0,0(%2)\n"
-		"	ni	0(%2),0xef\n"	/* switch off protection */
-		"	lctl	0,0,0(%2)\n"
-		"	stpt	0xd8\n"		/* store timer */
-		"	stckc	0xe0\n"		/* store clock comparator */
-		"	stpx	0x108\n"	/* store prefix register */
-		"	stam	0,15,0x120\n"	/* store access registers */
-		"	std	0,0x160\n"	/* store f0 */
-		"	std	2,0x168\n"	/* store f2 */
-		"	std	4,0x170\n"	/* store f4 */
-		"	std	6,0x178\n"	/* store f6 */
-		"	stm	0,15,0x180\n"	/* store general registers */
-		"	stctl	0,15,0x1c0\n"	/* store control registers */
-		"	oi	0x1c0,0x10\n"	/* fake protection bit */
-		"	lpsw	0(%1)"
-		: "=m" (ctl_buf)
-		: "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc");
-#else /* CONFIG_64BIT */
 	asm volatile(
 		"	stctg	0,0,0(%2)\n"
 		"	ni	4(%2),0xef\n"	/* switch off protection */
@@ -357,7 +294,6 @@ static inline void __noreturn disabled_wait(unsigned long code)
 		"	lpswe	0(%1)"
 		: "=m" (ctl_buf)
 		: "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc", "0", "1");
-#endif /* CONFIG_64BIT */
 	while (1);
 }
 
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index be317feff7ac..6feda2599282 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -40,12 +40,8 @@ struct psw_bits {
 	unsigned long long ri	: 1; /* Runtime Instrumentation */
 	unsigned long long	: 6;
 	unsigned long long eaba : 2; /* Addressing Mode */
-#ifdef CONFIG_64BIT
 	unsigned long long	: 31;
 	unsigned long long ia	: 64;/* Instruction Address */
-#else
-	unsigned long long ia	: 31;/* Instruction Address */
-#endif
 };
 
 enum {
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 06f3034605a1..998b61cd0e56 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -211,11 +211,6 @@ struct qdio_buffer_element {
 	u8 scount;
 	u8 sflags;
 	u32 length;
-#ifdef CONFIG_32BIT
-	/* private: */
-	void *res2;
-	/* public: */
-#endif
 	void *addr;
 } __attribute__ ((packed, aligned(16)));
 
@@ -232,11 +227,6 @@ struct qdio_buffer {
  * @sbal: absolute SBAL address
  */
 struct sl_element {
-#ifdef CONFIG_32BIT
-	/* private: */
-	unsigned long reserved;
-	/* public: */
-#endif
 	unsigned long sbal;
 } __attribute__ ((packed));
 
diff --git a/arch/s390/include/asm/runtime_instr.h b/arch/s390/include/asm/runtime_instr.h
index 830da737ff85..402ad6df4897 100644
--- a/arch/s390/include/asm/runtime_instr.h
+++ b/arch/s390/include/asm/runtime_instr.h
@@ -72,27 +72,19 @@ static inline void store_runtime_instr_cb(struct runtime_instr_cb *cb)
 
 static inline void save_ri_cb(struct runtime_instr_cb *cb_prev)
 {
-#ifdef CONFIG_64BIT
 	if (cb_prev)
 		store_runtime_instr_cb(cb_prev);
-#endif
 }
 
 static inline void restore_ri_cb(struct runtime_instr_cb *cb_next,
 				 struct runtime_instr_cb *cb_prev)
 {
-#ifdef CONFIG_64BIT
 	if (cb_next)
 		load_runtime_instr_cb(cb_next);
 	else if (cb_prev)
 		load_runtime_instr_cb(&runtime_instr_empty_cb);
-#endif
 }
 
-#ifdef CONFIG_64BIT
-extern void exit_thread_runtime_instr(void);
-#else
-static inline void exit_thread_runtime_instr(void) { }
-#endif
+void exit_thread_runtime_instr(void);
 
 #endif /* _RUNTIME_INSTR_H */
diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
index 487f9b64efb9..4b43ee7e6776 100644
--- a/arch/s390/include/asm/rwsem.h
+++ b/arch/s390/include/asm/rwsem.h
@@ -39,17 +39,10 @@
 #error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
 #endif
 
-#ifndef CONFIG_64BIT
-#define RWSEM_UNLOCKED_VALUE	0x00000000
-#define RWSEM_ACTIVE_BIAS	0x00000001
-#define RWSEM_ACTIVE_MASK	0x0000ffff
-#define RWSEM_WAITING_BIAS	(-0x00010000)
-#else /* CONFIG_64BIT */
 #define RWSEM_UNLOCKED_VALUE	0x0000000000000000L
 #define RWSEM_ACTIVE_BIAS	0x0000000000000001L
 #define RWSEM_ACTIVE_MASK	0x00000000ffffffffL
 #define RWSEM_WAITING_BIAS	(-0x0000000100000000L)
-#endif /* CONFIG_64BIT */
 #define RWSEM_ACTIVE_READ_BIAS	RWSEM_ACTIVE_BIAS
 #define RWSEM_ACTIVE_WRITE_BIAS	(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
 
@@ -61,19 +54,11 @@ static inline void __down_read(struct rw_semaphore *sem)
 	signed long old, new;
 
 	asm volatile(
-#ifndef CONFIG_64BIT
-		"	l	%0,%2\n"
-		"0:	lr	%1,%0\n"
-		"	ahi	%1,%4\n"
-		"	cs	%0,%1,%2\n"
-		"	jl	0b"
-#else /* CONFIG_64BIT */
 		"	lg	%0,%2\n"
 		"0:	lgr	%1,%0\n"
 		"	aghi	%1,%4\n"
 		"	csg	%0,%1,%2\n"
 		"	jl	0b"
-#endif /* CONFIG_64BIT */
 		: "=&d" (old), "=&d" (new), "=Q" (sem->count)
 		: "Q" (sem->count), "i" (RWSEM_ACTIVE_READ_BIAS)
 		: "cc", "memory");
@@ -89,15 +74,6 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
 	signed long old, new;
 
 	asm volatile(
-#ifndef CONFIG_64BIT
-		"	l	%0,%2\n"
-		"0:	ltr	%1,%0\n"
-		"	jm	1f\n"
-		"	ahi	%1,%4\n"
-		"	cs	%0,%1,%2\n"
-		"	jl	0b\n"
-		"1:"
-#else /* CONFIG_64BIT */
 		"	lg	%0,%2\n"
 		"0:	ltgr	%1,%0\n"
 		"	jm	1f\n"
@@ -105,7 +81,6 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
 		"	csg	%0,%1,%2\n"
 		"	jl	0b\n"
 		"1:"
-#endif /* CONFIG_64BIT */
 		: "=&d" (old), "=&d" (new), "=Q" (sem->count)
 		: "Q" (sem->count), "i" (RWSEM_ACTIVE_READ_BIAS)
 		: "cc", "memory");
@@ -121,19 +96,11 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
 
 	tmp = RWSEM_ACTIVE_WRITE_BIAS;
 	asm volatile(
-#ifndef CONFIG_64BIT
-		"	l	%0,%2\n"
-		"0:	lr	%1,%0\n"
-		"	a	%1,%4\n"
-		"	cs	%0,%1,%2\n"
-		"	jl	0b"
-#else /* CONFIG_64BIT */
 		"	lg	%0,%2\n"
 		"0:	lgr	%1,%0\n"
 		"	ag	%1,%4\n"
 		"	csg	%0,%1,%2\n"
 		"	jl	0b"
-#endif /* CONFIG_64BIT */
 		: "=&d" (old), "=&d" (new), "=Q" (sem->count)
 		: "Q" (sem->count), "m" (tmp)
 		: "cc", "memory");
@@ -154,19 +121,11 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
 	signed long old;
 
 	asm volatile(
-#ifndef CONFIG_64BIT
-		"	l	%0,%1\n"
-		"0:	ltr	%0,%0\n"
-		"	jnz	1f\n"
-		"	cs	%0,%3,%1\n"
-		"	jl	0b\n"
-#else /* CONFIG_64BIT */
 		"	lg	%0,%1\n"
 		"0:	ltgr	%0,%0\n"
 		"	jnz	1f\n"
 		"	csg	%0,%3,%1\n"
 		"	jl	0b\n"
-#endif /* CONFIG_64BIT */
 		"1:"
 		: "=&d" (old), "=Q" (sem->count)
 		: "Q" (sem->count), "d" (RWSEM_ACTIVE_WRITE_BIAS)
@@ -182,19 +141,11 @@ static inline void __up_read(struct rw_semaphore *sem)
 	signed long old, new;
 
 	asm volatile(
-#ifndef CONFIG_64BIT
-		"	l	%0,%2\n"
-		"0:	lr	%1,%0\n"
-		"	ahi	%1,%4\n"
-		"	cs	%0,%1,%2\n"
-		"	jl	0b"
-#else /* CONFIG_64BIT */
 		"	lg	%0,%2\n"
 		"0:	lgr	%1,%0\n"
 		"	aghi	%1,%4\n"
 		"	csg	%0,%1,%2\n"
 		"	jl	0b"
-#endif /* CONFIG_64BIT */
 		: "=&d" (old), "=&d" (new), "=Q" (sem->count)
 		: "Q" (sem->count), "i" (-RWSEM_ACTIVE_READ_BIAS)
 		: "cc", "memory");
@@ -212,19 +163,11 @@ static inline void __up_write(struct rw_semaphore *sem)
 
 	tmp = -RWSEM_ACTIVE_WRITE_BIAS;
 	asm volatile(
-#ifndef CONFIG_64BIT
-		"	l	%0,%2\n"
-		"0:	lr	%1,%0\n"
-		"	a	%1,%4\n"
-		"	cs	%0,%1,%2\n"
-		"	jl	0b"
-#else /* CONFIG_64BIT */
 		"	lg	%0,%2\n"
 		"0:	lgr	%1,%0\n"
 		"	ag	%1,%4\n"
 		"	csg	%0,%1,%2\n"
 		"	jl	0b"
-#endif /* CONFIG_64BIT */
 		: "=&d" (old), "=&d" (new), "=Q" (sem->count)
 		: "Q" (sem->count), "m" (tmp)
 		: "cc", "memory");
@@ -242,19 +185,11 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
 
 	tmp = -RWSEM_WAITING_BIAS;
 	asm volatile(
-#ifndef CONFIG_64BIT
-		"	l	%0,%2\n"
-		"0:	lr	%1,%0\n"
-		"	a	%1,%4\n"
-		"	cs	%0,%1,%2\n"
-		"	jl	0b"
-#else /* CONFIG_64BIT */
 		"	lg	%0,%2\n"
 		"0:	lgr	%1,%0\n"
 		"	ag	%1,%4\n"
 		"	csg	%0,%1,%2\n"
 		"	jl	0b"
-#endif /* CONFIG_64BIT */
 		: "=&d" (old), "=&d" (new), "=Q" (sem->count)
 		: "Q" (sem->count), "m" (tmp)
 		: "cc", "memory");
@@ -270,19 +205,11 @@ static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
 	signed long old, new;
 
 	asm volatile(
-#ifndef CONFIG_64BIT
-		"	l	%0,%2\n"
-		"0:	lr	%1,%0\n"
-		"	ar	%1,%4\n"
-		"	cs	%0,%1,%2\n"
-		"	jl	0b"
-#else /* CONFIG_64BIT */
 		"	lg	%0,%2\n"
 		"0:	lgr	%1,%0\n"
 		"	agr	%1,%4\n"
 		"	csg	%0,%1,%2\n"
 		"	jl	0b"
-#endif /* CONFIG_64BIT */
 		: "=&d" (old), "=&d" (new), "=Q" (sem->count)
 		: "Q" (sem->count), "d" (delta)
 		: "cc", "memory");
@@ -296,19 +223,11 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
 	signed long old, new;
 
 	asm volatile(
-#ifndef CONFIG_64BIT
-		"	l	%0,%2\n"
-		"0:	lr	%1,%0\n"
-		"	ar	%1,%4\n"
-		"	cs	%0,%1,%2\n"
-		"	jl	0b"
-#else /* CONFIG_64BIT */
 		"	lg	%0,%2\n"
 		"0:	lgr	%1,%0\n"
 		"	agr	%1,%4\n"
 		"	csg	%0,%1,%2\n"
 		"	jl	0b"
-#endif /* CONFIG_64BIT */
 		: "=&d" (old), "=&d" (new), "=Q" (sem->count)
 		: "Q" (sem->count), "d" (delta)
 		: "cc", "memory");
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index b8d1e54b4733..b8ffc1bd0a9f 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -15,19 +15,11 @@
 #include <asm/lowcore.h>
 #include <asm/types.h>
 
-#ifndef CONFIG_64BIT
-#define IPL_DEVICE        (*(unsigned long *)  (0x10404))
-#define INITRD_START      (*(unsigned long *)  (0x1040C))
-#define INITRD_SIZE       (*(unsigned long *)  (0x10414))
-#define OLDMEM_BASE	  (*(unsigned long *)  (0x1041C))
-#define OLDMEM_SIZE	  (*(unsigned long *)  (0x10424))
-#else /* CONFIG_64BIT */
 #define IPL_DEVICE        (*(unsigned long *)  (0x10400))
 #define INITRD_START      (*(unsigned long *)  (0x10408))
 #define INITRD_SIZE       (*(unsigned long *)  (0x10410))
 #define OLDMEM_BASE	  (*(unsigned long *)  (0x10418))
 #define OLDMEM_SIZE	  (*(unsigned long *)  (0x10420))
-#endif /* CONFIG_64BIT */
 #define COMMAND_LINE      ((char *)            (0x10480))
 
 extern int memory_end_set;
@@ -68,26 +60,8 @@ extern void detect_memory_memblock(void);
 #define MACHINE_HAS_PFMF	MACHINE_HAS_EDAT1
 #define MACHINE_HAS_HPAGE	MACHINE_HAS_EDAT1
 
-#ifndef CONFIG_64BIT
-#define MACHINE_HAS_IEEE	(S390_lowcore.machine_flags & MACHINE_FLAG_IEEE)
-#define MACHINE_HAS_CSP		(S390_lowcore.machine_flags & MACHINE_FLAG_CSP)
-#define MACHINE_HAS_IDTE	(0)
-#define MACHINE_HAS_DIAG44	(1)
-#define MACHINE_HAS_MVPG	(S390_lowcore.machine_flags & MACHINE_FLAG_MVPG)
-#define MACHINE_HAS_EDAT1	(0)
-#define MACHINE_HAS_EDAT2	(0)
-#define MACHINE_HAS_LPP		(0)
-#define MACHINE_HAS_TOPOLOGY	(0)
-#define MACHINE_HAS_TE		(0)
-#define MACHINE_HAS_TLB_LC	(0)
-#define MACHINE_HAS_VX		(0)
-#define MACHINE_HAS_CAD		(0)
-#else /* CONFIG_64BIT */
-#define MACHINE_HAS_IEEE	(1)
-#define MACHINE_HAS_CSP		(1)
 #define MACHINE_HAS_IDTE	(S390_lowcore.machine_flags & MACHINE_FLAG_IDTE)
 #define MACHINE_HAS_DIAG44	(S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44)
-#define MACHINE_HAS_MVPG	(1)
 #define MACHINE_HAS_EDAT1	(S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1)
 #define MACHINE_HAS_EDAT2	(S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2)
 #define MACHINE_HAS_LPP		(S390_lowcore.machine_flags & MACHINE_FLAG_LPP)
@@ -96,7 +70,6 @@ extern void detect_memory_memblock(void);
 #define MACHINE_HAS_TLB_LC	(S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
 #define MACHINE_HAS_VX		(S390_lowcore.machine_flags & MACHINE_FLAG_VX)
 #define MACHINE_HAS_CAD		(S390_lowcore.machine_flags & MACHINE_FLAG_CAD)
-#endif /* CONFIG_64BIT */
 
 /*
  * Console mode. Override with conmode=
@@ -135,19 +108,11 @@ extern void (*_machine_power_off)(void);
 
 #else /* __ASSEMBLY__ */
 
-#ifndef CONFIG_64BIT
-#define IPL_DEVICE        0x10404
-#define INITRD_START      0x1040C
-#define INITRD_SIZE       0x10414
-#define OLDMEM_BASE	  0x1041C
-#define OLDMEM_SIZE	  0x10424
-#else /* CONFIG_64BIT */
 #define IPL_DEVICE        0x10400
 #define INITRD_START      0x10408
 #define INITRD_SIZE       0x10410
 #define OLDMEM_BASE	  0x10418
 #define OLDMEM_SIZE	  0x10420
-#endif /* CONFIG_64BIT */
 #define COMMAND_LINE      0x10480
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/asm/sfp-util.h b/arch/s390/include/asm/sfp-util.h
index 5959bfb3b693..c8b7cf9d6279 100644
--- a/arch/s390/include/asm/sfp-util.h
+++ b/arch/s390/include/asm/sfp-util.h
@@ -51,7 +51,6 @@
 	wl = __wl;					\
 })
 
-#ifdef CONFIG_64BIT
 #define udiv_qrnnd(q, r, n1, n0, d)			\
   do { unsigned long __n;				\
        unsigned int __r, __d;				\
@@ -60,15 +59,6 @@
     (q) = __n / __d;					\
     (r) = __n % __d;					\
   } while (0)
-#else
-#define udiv_qrnnd(q, r, n1, n0, d)			\
-  do { unsigned int __r;				\
-    (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));		\
-    (r) = __r;						\
-  } while (0)
-extern unsigned long __udiv_qrnnd (unsigned int *, unsigned int,
-				   unsigned int , unsigned int);
-#endif
 
 #define UDIV_NEEDS_NORMALIZATION 0
 
diff --git a/arch/s390/include/asm/sparsemem.h b/arch/s390/include/asm/sparsemem.h
index a60d085ddb4d..487428b6d099 100644
--- a/arch/s390/include/asm/sparsemem.h
+++ b/arch/s390/include/asm/sparsemem.h
@@ -1,16 +1,7 @@
 #ifndef _ASM_S390_SPARSEMEM_H
 #define _ASM_S390_SPARSEMEM_H
 
-#ifdef CONFIG_64BIT
-
 #define SECTION_SIZE_BITS	28
 #define MAX_PHYSMEM_BITS	46
 
-#else
-
-#define SECTION_SIZE_BITS	25
-#define MAX_PHYSMEM_BITS	31
-
-#endif /* CONFIG_64BIT */
-
 #endif /* _ASM_S390_SPARSEMEM_H */
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index 2542a7e4c8b4..d62e7a69605f 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -18,9 +18,6 @@ static inline int test_fp_ctl(u32 fpc)
 	u32 orig_fpc;
 	int rc;
 
-	if (!MACHINE_HAS_IEEE)
-		return 0;
-
 	asm volatile(
 		"	efpc    %1\n"
 		"	sfpc	%2\n"
@@ -35,9 +32,6 @@ static inline int test_fp_ctl(u32 fpc)
 
 static inline void save_fp_ctl(u32 *fpc)
 {
-	if (!MACHINE_HAS_IEEE)
-		return;
-
 	asm volatile(
 		"       stfpc   %0\n"
 		: "+Q" (*fpc));
@@ -47,9 +41,6 @@ static inline int restore_fp_ctl(u32 *fpc)
 {
 	int rc;
 
-	if (!MACHINE_HAS_IEEE)
-		return 0;
-
 	asm volatile(
 		"	lfpc    %1\n"
 		"0:	la	%0,0\n"
@@ -65,8 +56,6 @@ static inline void save_fp_regs(freg_t *fprs)
 	asm volatile("std 2,%0" : "=Q" (fprs[2]));
 	asm volatile("std 4,%0" : "=Q" (fprs[4]));
 	asm volatile("std 6,%0" : "=Q" (fprs[6]));
-	if (!MACHINE_HAS_IEEE)
-		return;
 	asm volatile("std 1,%0" : "=Q" (fprs[1]));
 	asm volatile("std 3,%0" : "=Q" (fprs[3]));
 	asm volatile("std 5,%0" : "=Q" (fprs[5]));
@@ -87,8 +76,6 @@ static inline void restore_fp_regs(freg_t *fprs)
 	asm volatile("ld 2,%0" : : "Q" (fprs[2]));
 	asm volatile("ld 4,%0" : : "Q" (fprs[4]));
 	asm volatile("ld 6,%0" : : "Q" (fprs[6]));
-	if (!MACHINE_HAS_IEEE)
-		return;
 	asm volatile("ld 1,%0" : : "Q" (fprs[1]));
 	asm volatile("ld 3,%0" : : "Q" (fprs[3]));
 	asm volatile("ld 5,%0" : : "Q" (fprs[5]));
@@ -140,22 +127,18 @@ static inline void restore_vx_regs(__vector128 *vxrs)
 
 static inline void save_fp_vx_regs(struct task_struct *task)
 {
-#ifdef CONFIG_64BIT
 	if (task->thread.vxrs)
 		save_vx_regs(task->thread.vxrs);
 	else
-#endif
-	save_fp_regs(task->thread.fp_regs.fprs);
+		save_fp_regs(task->thread.fp_regs.fprs);
 }
 
 static inline void restore_fp_vx_regs(struct task_struct *task)
 {
-#ifdef CONFIG_64BIT
 	if (task->thread.vxrs)
 		restore_vx_regs(task->thread.vxrs);
 	else
-#endif
-	restore_fp_regs(task->thread.fp_regs.fprs);
+		restore_fp_regs(task->thread.fp_regs.fprs);
 }
 
 static inline void save_access_regs(unsigned int *acrs)
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index 5bc12598ae9e..6ba0bf928909 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -95,6 +95,6 @@ static inline int syscall_get_arch(void)
 	if (test_tsk_thread_flag(current, TIF_31BIT))
 		return AUDIT_ARCH_S390;
 #endif
-	return sizeof(long) == 8 ? AUDIT_ARCH_S390X : AUDIT_ARCH_S390;
+	return AUDIT_ARCH_S390X;
 }
 #endif	/* _ASM_SYSCALL_H */
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index ef1df718642d..4c27ec764c36 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -10,13 +10,8 @@
 /*
  * Size of kernel stack for each process
  */
-#ifndef CONFIG_64BIT
-#define THREAD_ORDER 1
-#define ASYNC_ORDER  1
-#else /* CONFIG_64BIT */
 #define THREAD_ORDER 2
 #define ASYNC_ORDER  2
-#endif /* CONFIG_64BIT */
 
 #define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER)
 #define ASYNC_SIZE  (PAGE_SIZE << ASYNC_ORDER)
@@ -34,7 +29,6 @@
  */
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
 	unsigned long		flags;		/* low level flags */
 	unsigned long		sys_call_table;	/* System call table address */
 	unsigned int		cpu;		/* current CPU */
@@ -51,7 +45,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
-	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
 	.preempt_count	= INIT_PREEMPT_COUNT,	\
@@ -66,6 +59,8 @@ static inline struct thread_info *current_thread_info(void)
 	return (struct thread_info *) S390_lowcore.thread_info;
 }
 
+void arch_release_task_struct(struct task_struct *tsk);
+
 #define THREAD_SIZE_ORDER THREAD_ORDER
 
 #endif
@@ -99,10 +94,6 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_31BIT		(1<<TIF_31BIT)
 #define _TIF_SINGLE_STEP	(1<<TIF_SINGLE_STEP)
 
-#ifdef CONFIG_64BIT
 #define is_32bit_task()		(test_thread_flag(TIF_31BIT))
-#else
-#define is_32bit_task()		(1)
-#endif
 
 #endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 06d8741ad6f4..7a92e69c50bc 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -118,12 +118,10 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
 static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
 				unsigned long address)
 {
-#ifdef CONFIG_64BIT
 	if (tlb->mm->context.asce_limit <= (1UL << 31))
 		return;
 	pgtable_pmd_page_dtor(virt_to_page(pmd));
 	tlb_remove_table(tlb, pmd);
-#endif
 }
 
 /*
@@ -136,11 +134,9 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
 static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
 				unsigned long address)
 {
-#ifdef CONFIG_64BIT
 	if (tlb->mm->context.asce_limit <= (1UL << 42))
 		return;
 	tlb_remove_table(tlb, pud);
-#endif
 }
 
 #define tlb_start_vma(tlb, vma)			do { } while (0)
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 16c9c88658c8..ca148f7c3eaa 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -49,13 +49,6 @@ static inline void __tlb_flush_global(void)
 	register unsigned long reg4 asm("4");
 	long dummy;
 
-#ifndef CONFIG_64BIT
-	if (!MACHINE_HAS_CSP) {
-		smp_ptlb_all();
-		return;
-	}
-#endif /* CONFIG_64BIT */
-
 	dummy = 0;
 	reg2 = reg3 = 0;
 	reg4 = ((unsigned long) &dummy) + 1;
diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h
index dccef3ca91fa..6740f4f9781f 100644
--- a/arch/s390/include/asm/types.h
+++ b/arch/s390/include/asm/types.h
@@ -8,21 +8,4 @@
 
 #include <uapi/asm/types.h>
 
-/*
- * These aren't exported outside the kernel to avoid name space clashes
- */
-
-#ifndef __ASSEMBLY__
-
-#ifndef CONFIG_64BIT
-typedef union {
-	unsigned long long pair;
-	struct {
-		unsigned long even;
-		unsigned long odd;
-	} subreg;
-} register_pair;
-
-#endif /* ! CONFIG_64BIT   */
-#endif /* __ASSEMBLY__  */
 #endif /* _S390_TYPES_H */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index cd4c68e0398d..d64a7a62164f 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -372,5 +372,6 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo
 }
 
 int copy_to_user_real(void __user *dest, void *src, unsigned long count);
+void s390_kernel_write(void *dst, const void *src, size_t size);
 
 #endif /* __S390_UACCESS_H */
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 651886353551..91f56b1d8156 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -9,11 +9,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#ifndef CONFIG_64BIT
-#define __IGNORE_select
-#else
 #define __IGNORE_time
-#endif
 
 /* Ignore NUMA system calls. Not wired up on s390. */
 #define __IGNORE_mbind
@@ -43,10 +39,6 @@
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
-# ifndef CONFIG_64BIT
-#   define __ARCH_WANT_STAT64
-#   define __ARCH_WANT_SYS_TIME
-# endif
 # ifdef CONFIG_COMPAT
 #   define __ARCH_WANT_COMPAT_SYS_TIME
 # endif
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index a62526d09201..787acd4f9668 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -42,10 +42,8 @@ struct vdso_per_cpu_data {
 
 extern struct vdso_data *vdso_data;
 
-#ifdef CONFIG_64BIT
 int vdso_alloc_per_cpu(struct _lowcore *lowcore);
 void vdso_free_per_cpu(struct _lowcore *lowcore);
-#endif
 
 #endif /* __ASSEMBLY__ */
 #endif /* __S390_VDSO_H__ */
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 9c77e60b9a26..ef1a5fcc6c66 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -150,6 +150,7 @@ struct kvm_guest_debug_arch {
 #define KVM_SYNC_CRS    (1UL << 3)
 #define KVM_SYNC_ARCH0  (1UL << 4)
 #define KVM_SYNC_PFAULT (1UL << 5)
+#define KVM_SYNC_VRS    (1UL << 6)
 /* definition of registers in kvm_run */
 struct kvm_sync_regs {
 	__u64 prefix;	/* prefix register */
@@ -164,6 +165,9 @@ struct kvm_sync_regs {
 	__u64 pft;	/* pfault token [PFAULT] */
 	__u64 pfs;	/* pfault select [PFAULT] */
 	__u64 pfc;	/* pfault compare [PFAULT] */
+	__u64 vrs[32][2];	/* vector registers */
+	__u8  reserved[512];	/* for future vector expansion */
+	__u32 fpc;	/* only valid with vector registers */
 };
 
 #define KVM_REG_S390_TODPR	(KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h
index d4096fdfc6ab..ee69c0854c88 100644
--- a/arch/s390/include/uapi/asm/sie.h
+++ b/arch/s390/include/uapi/asm/sie.h
@@ -230,7 +230,7 @@
  * and returns a key, which can be used to find a mnemonic name
  * of the instruction in the icpt_insn_codes table.
  */
-#define icpt_insn_decoder(insn)			\
+#define icpt_insn_decoder(insn) (		\
 	INSN_DECODE_IPA0(0x01, insn, 48, 0xff)	\
 	INSN_DECODE_IPA0(0xaa, insn, 48, 0x0f)	\
 	INSN_DECODE_IPA0(0xb2, insn, 48, 0xff)	\
@@ -239,6 +239,6 @@
 	INSN_DECODE_IPA0(0xe5, insn, 48, 0xff)	\
 	INSN_DECODE_IPA0(0xeb, insn, 16, 0xff)	\
 	INSN_DECODE_IPA0(0xc8, insn, 48, 0x0f)	\
-	INSN_DECODE(insn)
+	INSN_DECODE(insn))
 
 #endif /* _UAPI_ASM_S390_SIE_H */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 31fab2676fe9..ffb87617a36c 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -26,25 +26,21 @@ CFLAGS_dumpstack.o	+= -fno-optimize-sibling-calls
 #
 CFLAGS_ptrace.o		+= -DUTS_MACHINE='"$(UTS_MACHINE)"'
 
-CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w
+CFLAGS_sysinfo.o += -w
 
 obj-y	:= traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
 obj-y	+= processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
 obj-y	+= debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o
 obj-y	+= sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
-obj-y	+= dumpstack.o
+obj-y	+= runtime_instr.o cache.o dumpstack.o
+obj-y	+= entry.o reipl.o relocate_kernel.o
 
-obj-y	+= $(if $(CONFIG_64BIT),entry64.o,entry.o)
-obj-y	+= $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
-obj-y	+= $(if $(CONFIG_64BIT),relocate_kernel64.o,relocate_kernel.o)
-
-extra-y				+= head.o vmlinux.lds
-extra-y				+= $(if $(CONFIG_64BIT),head64.o,head31.o)
+extra-y				+= head.o head64.o vmlinux.lds
 
 obj-$(CONFIG_MODULES)		+= s390_ksyms.o module.o
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_SCHED_BOOK)	+= topology.o
-obj-$(CONFIG_HIBERNATION)	+= suspend.o swsusp_asm64.o
+obj-$(CONFIG_HIBERNATION)	+= suspend.o swsusp.o
 obj-$(CONFIG_AUDIT)		+= audit.o
 compat-obj-$(CONFIG_AUDIT)	+= compat_audit.o
 obj-$(CONFIG_COMPAT)		+= compat_linux.o compat_signal.o
@@ -56,13 +52,9 @@ obj-$(CONFIG_FUNCTION_TRACER)	+= mcount.o ftrace.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 obj-$(CONFIG_UPROBES)		+= uprobes.o
 
-ifdef CONFIG_64BIT
-obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_cpum_cf.o perf_cpum_sf.o \
-						perf_cpum_cf_events.o
-obj-y				+= runtime_instr.o cache.o
-endif
+obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_cpum_cf.o perf_cpum_sf.o
+obj-$(CONFIG_PERF_EVENTS)	+= perf_cpum_cf_events.o
 
 # vdso
-obj-$(CONFIG_64BIT)		+= vdso64/
-obj-$(CONFIG_32BIT)		+= vdso32/
+obj-y				+= vdso64/
 obj-$(CONFIG_COMPAT)		+= vdso32/
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index e07e91605353..c7d1b9d09011 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -34,7 +34,6 @@ int main(void)
 	DEFINE(__THREAD_per_paid, offsetof(struct task_struct, thread.per_event.paid));
 	BLANK();
 	DEFINE(__TI_task, offsetof(struct thread_info, task));
-	DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain));
 	DEFINE(__TI_flags, offsetof(struct thread_info, flags));
 	DEFINE(__TI_sysc_table, offsetof(struct thread_info, sys_call_table));
 	DEFINE(__TI_cpu, offsetof(struct thread_info, cpu));
@@ -166,11 +165,9 @@ int main(void)
 	DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area));
 	DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area));
 	DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area));
-#ifdef CONFIG_32BIT
-	DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr));
-#else /* CONFIG_32BIT */
 	DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code));
 	DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address));
+	DEFINE(__LC_VX_SAVE_AREA_ADDR, offsetof(struct _lowcore, vector_save_area_addr));
 	DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2));
 	DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area));
 	DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste));
@@ -183,6 +180,5 @@ int main(void)
 	DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce));
 	DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c));
 	DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20));
-#endif /* CONFIG_32BIT */
 	return 0;
 }
diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S
index f74a53d339b0..daed3fde42ec 100644
--- a/arch/s390/kernel/base.S
+++ b/arch/s390/kernel/base.S
@@ -11,8 +11,6 @@
 #include <asm/ptrace.h>
 #include <asm/sigp.h>
 
-#ifdef CONFIG_64BIT
-
 ENTRY(s390_base_mcck_handler)
 	basr	%r13,0
 0:	lg	%r15,__LC_PANIC_STACK	# load panic stack
@@ -131,77 +129,3 @@ ENTRY(diag308_reset)
 .Lfpctl:
 	.long	0
 	.previous
-
-#else /* CONFIG_64BIT */
-
-ENTRY(s390_base_mcck_handler)
-	basr	%r13,0
-0:	l	%r15,__LC_PANIC_STACK	# load panic stack
-	ahi	%r15,-STACK_FRAME_OVERHEAD
-	l	%r1,2f-0b(%r13)
-	l	%r1,0(%r1)
-	ltr	%r1,%r1
-	jz	1f
-	basr	%r14,%r1
-1:	lm	%r0,%r15,__LC_GPREGS_SAVE_AREA
-	lpsw	__LC_MCK_OLD_PSW
-
-2:	.long	s390_base_mcck_handler_fn
-
-	.section .bss
-	.align 4
-	.globl	s390_base_mcck_handler_fn
-s390_base_mcck_handler_fn:
-	.long	0
-	.previous
-
-ENTRY(s390_base_ext_handler)
-	stm	%r0,%r15,__LC_SAVE_AREA_ASYNC
-	basr	%r13,0
-0:	ahi	%r15,-STACK_FRAME_OVERHEAD
-	l	%r1,2f-0b(%r13)
-	l	%r1,0(%r1)
-	ltr	%r1,%r1
-	jz	1f
-	basr	%r14,%r1
-1:	lm	%r0,%r15,__LC_SAVE_AREA_ASYNC
-	ni	__LC_EXT_OLD_PSW+1,0xfd	# clear wait state bit
-	lpsw	__LC_EXT_OLD_PSW
-
-2:	.long	s390_base_ext_handler_fn
-
-	.section .bss
-	.align 4
-	.globl	s390_base_ext_handler_fn
-s390_base_ext_handler_fn:
-	.long	0
-	.previous
-
-ENTRY(s390_base_pgm_handler)
-	stm	%r0,%r15,__LC_SAVE_AREA_SYNC
-	basr	%r13,0
-0:	ahi	%r15,-STACK_FRAME_OVERHEAD
-	l	%r1,2f-0b(%r13)
-	l	%r1,0(%r1)
-	ltr	%r1,%r1
-	jz	1f
-	basr	%r14,%r1
-	lm	%r0,%r15,__LC_SAVE_AREA_SYNC
-	lpsw	__LC_PGM_OLD_PSW
-
-1:	lpsw	disabled_wait_psw-0b(%r13)
-
-2:	.long	s390_base_pgm_handler_fn
-
-disabled_wait_psw:
-	.align	8
-	.long	0x000a0000,0x00000000 + s390_base_pgm_handler
-
-	.section .bss
-	.align 4
-	.globl	s390_base_pgm_handler_fn
-s390_base_pgm_handler_fn:
-	.long	0
-	.previous
-
-#endif /* CONFIG_64BIT */
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
index 0969d113b3d6..bff5e3b6d822 100644
--- a/arch/s390/kernel/cache.c
+++ b/arch/s390/kernel/cache.c
@@ -70,6 +70,8 @@ void show_cacheinfo(struct seq_file *m)
 	struct cacheinfo *cache;
 	int idx;
 
+	if (!test_facility(34))
+		return;
 	get_online_cpus();
 	this_cpu_ci = get_cpu_cacheinfo(cpumask_any(cpu_online_mask));
 	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
@@ -159,6 +161,8 @@ int populate_cache_leaves(unsigned int cpu)
 	union cache_topology ct;
 	enum cache_type ctype;
 
+	if (!test_facility(34))
+		return -EOPNOTSUPP;
 	ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
 	for (idx = 0, level = 0; level < this_cpu_ci->num_levels &&
 	     idx < this_cpu_ci->num_leaves; idx++, level++) {
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index bc1df12dd4f8..fe8d6924efaa 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -370,16 +370,6 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
 	return (void __user *)((sp - frame_size) & -8ul);
 }
 
-static inline int map_signal(int sig)
-{
-	if (current_thread_info()->exec_domain
-	    && current_thread_info()->exec_domain->signal_invmap
-	    && sig < 32)
-		return current_thread_info()->exec_domain->signal_invmap[sig];
-        else
-		return sig;
-}
-
 static int setup_frame32(struct ksignal *ksig, sigset_t *set,
 			 struct pt_regs *regs)
 {
@@ -449,7 +439,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set,
 		(regs->psw.mask & ~PSW_MASK_ASC);
 	regs->psw.addr = (__force __u64) ksig->ka.sa.sa_handler;
 
-	regs->gprs[2] = map_signal(sig);
+	regs->gprs[2] = sig;
 	regs->gprs[3] = (__force __u64) &frame->sc;
 
 	/* We forgot to include these in the sigcontext.
@@ -532,7 +522,7 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
 		(regs->psw.mask & ~PSW_MASK_ASC);
 	regs->psw.addr = (__u64 __force) ksig->ka.sa.sa_handler;
 
-	regs->gprs[2] = map_signal(ksig->sig);
+	regs->gprs[2] = ksig->sig;
 	regs->gprs[3] = (__force __u64) &frame->info;
 	regs->gprs[4] = (__force __u64) &frame->uc;
 	regs->gprs[5] = task_thread_info(current)->last_break;
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
index d7b0c4d27880..199ec92ef4fe 100644
--- a/arch/s390/kernel/cpcmd.c
+++ b/arch/s390/kernel/cpcmd.c
@@ -27,13 +27,9 @@ static int diag8_noresponse(int cmdlen)
 	register unsigned long reg3 asm ("3") = cmdlen;
 
 	asm volatile(
-#ifndef CONFIG_64BIT
-		"	diag	%1,%0,0x8\n"
-#else /* CONFIG_64BIT */
 		"	sam31\n"
 		"	diag	%1,%0,0x8\n"
 		"	sam64\n"
-#endif /* CONFIG_64BIT */
 		: "+d" (reg3) : "d" (reg2) : "cc");
 	return reg3;
 }
@@ -46,17 +42,11 @@ static int diag8_response(int cmdlen, char *response, int *rlen)
 	register unsigned long reg5 asm ("5") = *rlen;
 
 	asm volatile(
-#ifndef CONFIG_64BIT
-		"	diag	%2,%0,0x8\n"
-		"	brc	8,1f\n"
-		"	ar	%1,%4\n"
-#else /* CONFIG_64BIT */
 		"	sam31\n"
 		"	diag	%2,%0,0x8\n"
 		"	sam64\n"
 		"	brc	8,1f\n"
 		"	agr	%1,%4\n"
-#endif /* CONFIG_64BIT */
 		"1:\n"
 		: "+d" (reg4), "+d" (reg5)
 		: "d" (reg2), "d" (reg3), "d" (*rlen) : "cc");
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index 8237fc07ac79..2f69243bf700 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -18,13 +18,9 @@ int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
 	int rc = 0;
 
 	asm volatile(
-#ifdef CONFIG_64BIT
 		"   sam31\n"
 		"   diag    %2,2,0x14\n"
 		"   sam64\n"
-#else
-		"   diag    %2,2,0x14\n"
-#endif
 		"   ipm     %0\n"
 		"   srl     %0,28\n"
 		: "=d" (rc), "+d" (_ry2)
@@ -52,7 +48,6 @@ int diag210(struct diag210 *addr)
 	spin_lock_irqsave(&diag210_lock, flags);
 	diag210_tmp = *addr;
 
-#ifdef CONFIG_64BIT
 	asm volatile(
 		"	lhi	%0,-1\n"
 		"	sam31\n"
@@ -62,16 +57,6 @@ int diag210(struct diag210 *addr)
 		"1:	sam64\n"
 		EX_TABLE(0b, 1b)
 		: "=&d" (ccode) : "a" (&diag210_tmp) : "cc", "memory");
-#else
-	asm volatile(
-		"	lhi	%0,-1\n"
-		"	diag	%1,0,0x210\n"
-		"0:	ipm	%0\n"
-		"	srl	%0,28\n"
-		"1:\n"
-		EX_TABLE(0b, 1b)
-		: "=&d" (ccode) : "a" (&diag210_tmp) : "cc", "memory");
-#endif
 
 	*addr = diag210_tmp;
 	spin_unlock_irqrestore(&diag210_lock, flags);
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 533430307da8..8140d10c6785 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -32,12 +32,6 @@
 #include <asm/debug.h>
 #include <asm/irq.h>
 
-#ifndef CONFIG_64BIT
-#define ONELONG "%08lx: "
-#else /* CONFIG_64BIT */
-#define ONELONG "%016lx: "
-#endif /* CONFIG_64BIT */
-
 enum {
 	UNUSED,	/* Indicates the end of the operand list */
 	R_8,	/* GPR starting at position 8 */
@@ -536,12 +530,10 @@ static char *long_insn_name[] = {
 };
 
 static struct s390_insn opcode[] = {
-#ifdef CONFIG_64BIT
 	{ "bprp", 0xc5, INSTR_MII_UPI },
 	{ "bpp", 0xc7, INSTR_SMI_U0RDP },
 	{ "trtr", 0xd0, INSTR_SS_L0RDRD },
 	{ "lmd", 0xef, INSTR_SS_RRRDRD3 },
-#endif
 	{ "spm", 0x04, INSTR_RR_R0 },
 	{ "balr", 0x05, INSTR_RR_RR },
 	{ "bctr", 0x06, INSTR_RR_RR },
@@ -725,11 +717,9 @@ static struct s390_insn opcode[] = {
 };
 
 static struct s390_insn opcode_01[] = {
-#ifdef CONFIG_64BIT
 	{ "ptff", 0x04, INSTR_E },
 	{ "pfpo", 0x0a, INSTR_E },
 	{ "sam64", 0x0e, INSTR_E },
-#endif
 	{ "pr", 0x01, INSTR_E },
 	{ "upt", 0x02, INSTR_E },
 	{ "sckpf", 0x07, INSTR_E },
@@ -741,7 +731,6 @@ static struct s390_insn opcode_01[] = {
 };
 
 static struct s390_insn opcode_a5[] = {
-#ifdef CONFIG_64BIT
 	{ "iihh", 0x00, INSTR_RI_RU },
 	{ "iihl", 0x01, INSTR_RI_RU },
 	{ "iilh", 0x02, INSTR_RI_RU },
@@ -758,12 +747,10 @@ static struct s390_insn opcode_a5[] = {
 	{ "llihl", 0x0d, INSTR_RI_RU },
 	{ "llilh", 0x0e, INSTR_RI_RU },
 	{ "llill", 0x0f, INSTR_RI_RU },
-#endif
 	{ "", 0, INSTR_INVALID }
 };
 
 static struct s390_insn opcode_a7[] = {
-#ifdef CONFIG_64BIT
 	{ "tmhh", 0x02, INSTR_RI_RU },
 	{ "tmhl", 0x03, INSTR_RI_RU },
 	{ "brctg", 0x07, INSTR_RI_RP },
@@ -771,7 +758,6 @@ static struct s390_insn opcode_a7[] = {
 	{ "aghi", 0x0b, INSTR_RI_RI },
 	{ "mghi", 0x0d, INSTR_RI_RI },
 	{ "cghi", 0x0f, INSTR_RI_RI },
-#endif
 	{ "tmlh", 0x00, INSTR_RI_RU },
 	{ "tmll", 0x01, INSTR_RI_RU },
 	{ "brc", 0x04, INSTR_RI_UP },
@@ -785,18 +771,15 @@ static struct s390_insn opcode_a7[] = {
 };
 
 static struct s390_insn opcode_aa[] = {
-#ifdef CONFIG_64BIT
 	{ { 0, LONG_INSN_RINEXT }, 0x00, INSTR_RI_RI },
 	{ "rion", 0x01, INSTR_RI_RI },
 	{ "tric", 0x02, INSTR_RI_RI },
 	{ "rioff", 0x03, INSTR_RI_RI },
 	{ { 0, LONG_INSN_RIEMIT }, 0x04, INSTR_RI_RI },
-#endif
 	{ "", 0, INSTR_INVALID }
 };
 
 static struct s390_insn opcode_b2[] = {
-#ifdef CONFIG_64BIT
 	{ "stckf", 0x7c, INSTR_S_RD },
 	{ "lpp", 0x80, INSTR_S_RD },
 	{ "lcctl", 0x84, INSTR_S_RD },
@@ -819,7 +802,6 @@ static struct s390_insn opcode_b2[] = {
 	{ "tend", 0xf8, INSTR_S_00 },
 	{ "niai", 0xfa, INSTR_IE_UU },
 	{ { 0, LONG_INSN_TABORT }, 0xfc, INSTR_S_RD },
-#endif
 	{ "stidp", 0x02, INSTR_S_RD },
 	{ "sck", 0x04, INSTR_S_RD },
 	{ "stck", 0x05, INSTR_S_RD },
@@ -908,7 +890,6 @@ static struct s390_insn opcode_b2[] = {
 };
 
 static struct s390_insn opcode_b3[] = {
-#ifdef CONFIG_64BIT
 	{ "maylr", 0x38, INSTR_RRF_F0FF },
 	{ "mylr", 0x39, INSTR_RRF_F0FF },
 	{ "mayr", 0x3a, INSTR_RRF_F0FF },
@@ -996,7 +977,6 @@ static struct s390_insn opcode_b3[] = {
 	{ "qaxtr", 0xfd, INSTR_RRF_FUFF },
 	{ "iextr", 0xfe, INSTR_RRF_F0FR },
 	{ "rrxtr", 0xff, INSTR_RRF_FFRU },
-#endif
 	{ "lpebr", 0x00, INSTR_RRE_FF },
 	{ "lnebr", 0x01, INSTR_RRE_FF },
 	{ "ltebr", 0x02, INSTR_RRE_FF },
@@ -1091,7 +1071,6 @@ static struct s390_insn opcode_b3[] = {
 };
 
 static struct s390_insn opcode_b9[] = {
-#ifdef CONFIG_64BIT
 	{ "lpgr", 0x00, INSTR_RRE_RR },
 	{ "lngr", 0x01, INSTR_RRE_RR },
 	{ "ltgr", 0x02, INSTR_RRE_RR },
@@ -1204,7 +1183,6 @@ static struct s390_insn opcode_b9[] = {
 	{ "srk", 0xf9, INSTR_RRF_R0RR2 },
 	{ "alrk", 0xfa, INSTR_RRF_R0RR2 },
 	{ "slrk", 0xfb, INSTR_RRF_R0RR2 },
-#endif
 	{ "kmac", 0x1e, INSTR_RRE_RR },
 	{ "lrvr", 0x1f, INSTR_RRE_RR },
 	{ "km", 0x2e, INSTR_RRE_RR },
@@ -1224,7 +1202,6 @@ static struct s390_insn opcode_b9[] = {
 };
 
 static struct s390_insn opcode_c0[] = {
-#ifdef CONFIG_64BIT
 	{ "lgfi", 0x01, INSTR_RIL_RI },
 	{ "xihf", 0x06, INSTR_RIL_RU },
 	{ "xilf", 0x07, INSTR_RIL_RU },
@@ -1236,7 +1213,6 @@ static struct s390_insn opcode_c0[] = {
 	{ "oilf", 0x0d, INSTR_RIL_RU },
 	{ "llihf", 0x0e, INSTR_RIL_RU },
 	{ "llilf", 0x0f, INSTR_RIL_RU },
-#endif
 	{ "larl", 0x00, INSTR_RIL_RP },
 	{ "brcl", 0x04, INSTR_RIL_UP },
 	{ "brasl", 0x05, INSTR_RIL_RP },
@@ -1244,7 +1220,6 @@ static struct s390_insn opcode_c0[] = {
 };
 
 static struct s390_insn opcode_c2[] = {
-#ifdef CONFIG_64BIT
 	{ "msgfi", 0x00, INSTR_RIL_RI },
 	{ "msfi", 0x01, INSTR_RIL_RI },
 	{ "slgfi", 0x04, INSTR_RIL_RU },
@@ -1257,12 +1232,10 @@ static struct s390_insn opcode_c2[] = {
 	{ "cfi", 0x0d, INSTR_RIL_RI },
 	{ "clgfi", 0x0e, INSTR_RIL_RU },
 	{ "clfi", 0x0f, INSTR_RIL_RU },
-#endif
 	{ "", 0, INSTR_INVALID }
 };
 
 static struct s390_insn opcode_c4[] = {
-#ifdef CONFIG_64BIT
 	{ "llhrl", 0x02, INSTR_RIL_RP },
 	{ "lghrl", 0x04, INSTR_RIL_RP },
 	{ "lhrl", 0x05, INSTR_RIL_RP },
@@ -1274,12 +1247,10 @@ static struct s390_insn opcode_c4[] = {
 	{ "lrl", 0x0d, INSTR_RIL_RP },
 	{ { 0, LONG_INSN_LLGFRL }, 0x0e, INSTR_RIL_RP },
 	{ "strl", 0x0f, INSTR_RIL_RP },
-#endif
 	{ "", 0, INSTR_INVALID }
 };
 
 static struct s390_insn opcode_c6[] = {
-#ifdef CONFIG_64BIT
 	{ "exrl", 0x00, INSTR_RIL_RP },
 	{ "pfdrl", 0x02, INSTR_RIL_UP },
 	{ "cghrl", 0x04, INSTR_RIL_RP },
@@ -1292,35 +1263,29 @@ static struct s390_insn opcode_c6[] = {
 	{ "crl", 0x0d, INSTR_RIL_RP },
 	{ { 0, LONG_INSN_CLGFRL }, 0x0e, INSTR_RIL_RP },
 	{ "clrl", 0x0f, INSTR_RIL_RP },
-#endif
 	{ "", 0, INSTR_INVALID }
 };
 
 static struct s390_insn opcode_c8[] = {
-#ifdef CONFIG_64BIT
 	{ "mvcos", 0x00, INSTR_SSF_RRDRD },
 	{ "ectg", 0x01, INSTR_SSF_RRDRD },
 	{ "csst", 0x02, INSTR_SSF_RRDRD },
 	{ "lpd", 0x04, INSTR_SSF_RRDRD2 },
 	{ "lpdg", 0x05, INSTR_SSF_RRDRD2 },
-#endif
 	{ "", 0, INSTR_INVALID }
 };
 
 static struct s390_insn opcode_cc[] = {
-#ifdef CONFIG_64BIT
 	{ "brcth", 0x06, INSTR_RIL_RP },
 	{ "aih", 0x08, INSTR_RIL_RI },
 	{ "alsih", 0x0a, INSTR_RIL_RI },
 	{ { 0, LONG_INSN_ALSIHN }, 0x0b, INSTR_RIL_RI },
 	{ "cih", 0x0d, INSTR_RIL_RI },
 	{ "clih", 0x0f, INSTR_RIL_RI },
-#endif
 	{ "", 0, INSTR_INVALID }
 };
 
 static struct s390_insn opcode_e3[] = {
-#ifdef CONFIG_64BIT
 	{ "ltg", 0x02, INSTR_RXY_RRRD },
 	{ "lrag", 0x03, INSTR_RXY_RRRD },
 	{ "lg", 0x04, INSTR_RXY_RRRD },
@@ -1414,7 +1379,6 @@ static struct s390_insn opcode_e3[] = {
 	{ "clhf", 0xcf, INSTR_RXY_RRRD },
 	{ { 0, LONG_INSN_MPCIFC }, 0xd0, INSTR_RXY_RRRD },
 	{ { 0, LONG_INSN_STPCIFC }, 0xd4, INSTR_RXY_RRRD },
-#endif
 	{ "lrv", 0x1e, INSTR_RXY_RRRD },
 	{ "lrvh", 0x1f, INSTR_RXY_RRRD },
 	{ "strv", 0x3e, INSTR_RXY_RRRD },
@@ -1426,7 +1390,6 @@ static struct s390_insn opcode_e3[] = {
 };
 
 static struct s390_insn opcode_e5[] = {
-#ifdef CONFIG_64BIT
 	{ "strag", 0x02, INSTR_SSE_RDRD },
 	{ "mvhhi", 0x44, INSTR_SIL_RDI },
 	{ "mvghi", 0x48, INSTR_SIL_RDI },
@@ -1439,7 +1402,6 @@ static struct s390_insn opcode_e5[] = {
 	{ { 0, LONG_INSN_CLFHSI }, 0x5d, INSTR_SIL_RDU },
 	{ { 0, LONG_INSN_TBEGIN }, 0x60, INSTR_SIL_RDU },
 	{ { 0, LONG_INSN_TBEGINC }, 0x61, INSTR_SIL_RDU },
-#endif
 	{ "lasp", 0x00, INSTR_SSE_RDRD },
 	{ "tprot", 0x01, INSTR_SSE_RDRD },
 	{ "mvcsk", 0x0e, INSTR_SSE_RDRD },
@@ -1448,7 +1410,6 @@ static struct s390_insn opcode_e5[] = {
 };
 
 static struct s390_insn opcode_e7[] = {
-#ifdef CONFIG_64BIT
 	{ "lcbb", 0x27, INSTR_RXE_RRRDM },
 	{ "vgef", 0x13, INSTR_VRV_VVRDM },
 	{ "vgeg", 0x12, INSTR_VRV_VVRDM },
@@ -1588,11 +1549,9 @@ static struct s390_insn opcode_e7[] = {
 	{ "vfsq", 0xce, INSTR_VRR_VV000MM },
 	{ "vfs", 0xe2, INSTR_VRR_VVV00MM },
 	{ "vftci", 0x4a, INSTR_VRI_VVIMM },
-#endif
 };
 
 static struct s390_insn opcode_eb[] = {
-#ifdef CONFIG_64BIT
 	{ "lmg", 0x04, INSTR_RSY_RRRD },
 	{ "srag", 0x0a, INSTR_RSY_RRRD },
 	{ "slag", 0x0b, INSTR_RSY_RRRD },
@@ -1659,7 +1618,6 @@ static struct s390_insn opcode_eb[] = {
 	{ "stric", 0x61, INSTR_RSY_RDRM },
 	{ "mric", 0x62, INSTR_RSY_RDRM },
 	{ { 0, LONG_INSN_STCCTM }, 0x17, INSTR_RSY_RMRD },
-#endif
 	{ "rll", 0x1d, INSTR_RSY_RRRD },
 	{ "mvclu", 0x8e, INSTR_RSY_RRRD },
 	{ "tp", 0xc0, INSTR_RSL_R0RD },
@@ -1667,7 +1625,6 @@ static struct s390_insn opcode_eb[] = {
 };
 
 static struct s390_insn opcode_ec[] = {
-#ifdef CONFIG_64BIT
 	{ "brxhg", 0x44, INSTR_RIE_RRP },
 	{ "brxlg", 0x45, INSTR_RIE_RRP },
 	{ { 0, LONG_INSN_RISBLG }, 0x51, INSTR_RIE_RRUUU },
@@ -1701,12 +1658,10 @@ static struct s390_insn opcode_ec[] = {
 	{ "clgib", 0xfd, INSTR_RIS_RURDU },
 	{ "cib", 0xfe, INSTR_RIS_RURDI },
 	{ "clib", 0xff, INSTR_RIS_RURDU },
-#endif
 	{ "", 0, INSTR_INVALID }
 };
 
 static struct s390_insn opcode_ed[] = {
-#ifdef CONFIG_64BIT
 	{ "mayl", 0x38, INSTR_RXF_FRRDF },
 	{ "myl", 0x39, INSTR_RXF_FRRDF },
 	{ "may", 0x3a, INSTR_RXF_FRRDF },
@@ -1731,7 +1686,6 @@ static struct s390_insn opcode_ed[] = {
 	{ "czxt", 0xa9, INSTR_RSL_LRDFU },
 	{ "cdzt", 0xaa, INSTR_RSL_LRDFU },
 	{ "cxzt", 0xab, INSTR_RSL_LRDFU },
-#endif
 	{ "ldeb", 0x04, INSTR_RXE_FRRD },
 	{ "lxdb", 0x05, INSTR_RXE_FRRD },
 	{ "lxeb", 0x06, INSTR_RXE_FRRD },
@@ -2051,7 +2005,7 @@ void show_code(struct pt_regs *regs)
 		else
 			*ptr++ = ' ';
 		addr = regs->psw.addr + start - 32;
-		ptr += sprintf(ptr, ONELONG, addr);
+		ptr += sprintf(ptr, "%016lx: ", addr);
 		if (start + opsize >= end)
 			break;
 		for (i = 0; i < opsize; i++)
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index a99852e96a77..dc8e20473484 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -18,16 +18,6 @@
 #include <asm/dis.h>
 #include <asm/ipl.h>
 
-#ifndef CONFIG_64BIT
-#define LONG "%08lx "
-#define FOURLONG "%08lx %08lx %08lx %08lx\n"
-static int kstack_depth_to_print = 12;
-#else /* CONFIG_64BIT */
-#define LONG "%016lx "
-#define FOURLONG "%016lx %016lx %016lx %016lx\n"
-static int kstack_depth_to_print = 20;
-#endif /* CONFIG_64BIT */
-
 /*
  * For show_trace we have tree different stack to consider:
  *   - the panic stack which is used if the kernel stack has overflown
@@ -115,12 +105,12 @@ void show_stack(struct task_struct *task, unsigned long *sp)
 	else
 		stack = sp;
 
-	for (i = 0; i < kstack_depth_to_print; i++) {
+	for (i = 0; i < 20; i++) {
 		if (((addr_t) stack & (THREAD_SIZE-1)) == 0)
 			break;
 		if ((i * sizeof(long) % 32) == 0)
 			printk("%s       ", i == 0 ? "" : "\n");
-		printk(LONG, *stack++);
+		printk("%016lx ", *stack++);
 	}
 	printk("\n");
 	show_trace(task, sp);
@@ -128,10 +118,8 @@ void show_stack(struct task_struct *task, unsigned long *sp)
 
 static void show_last_breaking_event(struct pt_regs *regs)
 {
-#ifdef CONFIG_64BIT
 	printk("Last Breaking-Event-Address:\n");
 	printk(" [<%016lx>] %pSR\n", regs->args[0], (void *)regs->args[0]);
-#endif
 }
 
 static inline int mask_bits(struct pt_regs *regs, unsigned long bits)
@@ -155,16 +143,14 @@ void show_registers(struct pt_regs *regs)
 	       mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT),
 	       mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC),
 	       mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM));
-#ifdef CONFIG_64BIT
 	printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA));
-#endif
-	printk("\n%s GPRS: " FOURLONG, mode,
+	printk("\n%s GPRS: %016lx %016lx %016lx %016lx\n", mode,
 	       regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]);
-	printk("           " FOURLONG,
+	printk("           %016lx %016lx %016lx %016lx\n",
 	       regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]);
-	printk("           " FOURLONG,
+	printk("           %016lx %016lx %016lx %016lx\n",
 	       regs->gprs[8], regs->gprs[9], regs->gprs[10], regs->gprs[11]);
-	printk("           " FOURLONG,
+	printk("           %016lx %016lx %016lx %016lx\n",
 	       regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]);
 	show_code(regs);
 }
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 4427ab7ac23a..549a73a4b543 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -64,7 +64,6 @@ asm(
 	"	.align	4\n"
 	"	.type	savesys_ipl_nss, @function\n"
 	"savesys_ipl_nss:\n"
-#ifdef CONFIG_64BIT
 	"	stmg	6,15,48(15)\n"
 	"	lgr	14,3\n"
 	"	sam31\n"
@@ -72,13 +71,6 @@ asm(
 	"	sam64\n"
 	"	lgr	2,14\n"
 	"	lmg	6,15,48(15)\n"
-#else
-	"	stm	6,15,24(15)\n"
-	"	lr	14,3\n"
-	"	diag	2,14,0x8\n"
-	"	lr	2,14\n"
-	"	lm	6,15,24(15)\n"
-#endif
 	"	br	14\n"
 	"	.size	savesys_ipl_nss, .-savesys_ipl_nss\n"
 	"	.previous\n");
@@ -240,7 +232,6 @@ static noinline __init void detect_machine_type(void)
 
 static __init void setup_topology(void)
 {
-#ifdef CONFIG_64BIT
 	int max_mnest;
 
 	if (!test_facility(11))
@@ -251,7 +242,6 @@ static __init void setup_topology(void)
 			break;
 	}
 	topology_max_mnest = max_mnest;
-#endif
 }
 
 static void early_pgm_check_handler(void)
@@ -290,58 +280,6 @@ static noinline __init void setup_facility_list(void)
 	      ARRAY_SIZE(S390_lowcore.stfle_fac_list));
 }
 
-static __init void detect_mvpg(void)
-{
-#ifndef CONFIG_64BIT
-	int rc;
-
-	asm volatile(
-		"	la	0,0\n"
-		"	mvpg	%2,%2\n"
-		"0:	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "=d" (rc) : "0" (-EOPNOTSUPP), "a" (0) : "memory", "cc", "0");
-	if (!rc)
-		S390_lowcore.machine_flags |= MACHINE_FLAG_MVPG;
-#endif
-}
-
-static __init void detect_ieee(void)
-{
-#ifndef CONFIG_64BIT
-	int rc, tmp;
-
-	asm volatile(
-		"	efpc	%1,0\n"
-		"0:	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "=d" (rc), "=d" (tmp): "0" (-EOPNOTSUPP) : "cc");
-	if (!rc)
-		S390_lowcore.machine_flags |= MACHINE_FLAG_IEEE;
-#endif
-}
-
-static __init void detect_csp(void)
-{
-#ifndef CONFIG_64BIT
-	int rc;
-
-	asm volatile(
-		"	la	0,0\n"
-		"	la	1,0\n"
-		"	la	2,4\n"
-		"	csp	0,2\n"
-		"0:	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "=d" (rc) : "0" (-EOPNOTSUPP) : "cc", "0", "1", "2");
-	if (!rc)
-		S390_lowcore.machine_flags |= MACHINE_FLAG_CSP;
-#endif
-}
-
 static __init void detect_diag9c(void)
 {
 	unsigned int cpu_address;
@@ -360,7 +298,6 @@ static __init void detect_diag9c(void)
 
 static __init void detect_diag44(void)
 {
-#ifdef CONFIG_64BIT
 	int rc;
 
 	asm volatile(
@@ -371,12 +308,10 @@ static __init void detect_diag44(void)
 		: "=d" (rc) : "0" (-EOPNOTSUPP) : "cc");
 	if (!rc)
 		S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44;
-#endif
 }
 
 static __init void detect_machine_facilities(void)
 {
-#ifdef CONFIG_64BIT
 	if (test_facility(8)) {
 		S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT1;
 		__ctl_set_bit(0, 23);
@@ -393,7 +328,6 @@ static __init void detect_machine_facilities(void)
 		S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
 	if (test_facility(129))
 		S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
-#endif
 }
 
 static int __init cad_setup(char *str)
@@ -501,9 +435,6 @@ void __init startup_init(void)
 	ipl_update_parameters();
 	setup_boot_command_line();
 	create_kernel_nss();
-	detect_mvpg();
-	detect_ieee();
-	detect_csp();
 	detect_diag9c();
 	detect_diag44();
 	detect_machine_facilities();
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 398329b2b518..99b44acbfcc7 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -22,27 +22,28 @@
 #include <asm/irq.h>
 
 __PT_R0      =	__PT_GPRS
-__PT_R1      =	__PT_GPRS + 4
-__PT_R2      =	__PT_GPRS + 8
-__PT_R3      =	__PT_GPRS + 12
-__PT_R4      =	__PT_GPRS + 16
-__PT_R5      =	__PT_GPRS + 20
-__PT_R6      =	__PT_GPRS + 24
-__PT_R7      =	__PT_GPRS + 28
-__PT_R8      =	__PT_GPRS + 32
-__PT_R9      =	__PT_GPRS + 36
-__PT_R10     =	__PT_GPRS + 40
-__PT_R11     =	__PT_GPRS + 44
-__PT_R12     =	__PT_GPRS + 48
-__PT_R13     =	__PT_GPRS + 524
-__PT_R14     =	__PT_GPRS + 56
-__PT_R15     =	__PT_GPRS + 60
+__PT_R1      =	__PT_GPRS + 8
+__PT_R2      =	__PT_GPRS + 16
+__PT_R3      =	__PT_GPRS + 24
+__PT_R4      =	__PT_GPRS + 32
+__PT_R5      =	__PT_GPRS + 40
+__PT_R6      =	__PT_GPRS + 48
+__PT_R7      =	__PT_GPRS + 56
+__PT_R8      =	__PT_GPRS + 64
+__PT_R9      =	__PT_GPRS + 72
+__PT_R10     =	__PT_GPRS + 80
+__PT_R11     =	__PT_GPRS + 88
+__PT_R12     =	__PT_GPRS + 96
+__PT_R13     =	__PT_GPRS + 104
+__PT_R14     =	__PT_GPRS + 112
+__PT_R15     =	__PT_GPRS + 120
 
 STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
 STACK_SIZE  = 1 << STACK_SHIFT
-STACK_INIT  = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
+STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
 
-_TIF_WORK	= (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED)
+_TIF_WORK	= (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
+		   _TIF_UPROBE)
 _TIF_TRACE	= (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
 		   _TIF_SYSCALL_TRACEPOINT)
 _CIF_WORK	= (_CIF_MCCK_PENDING | _CIF_ASCE)
@@ -53,16 +54,14 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 	.macro	TRACE_IRQS_ON
 #ifdef CONFIG_TRACE_IRQFLAGS
 	basr	%r2,%r0
-	l	%r1,BASED(.Lc_hardirqs_on)
-	basr	%r14,%r1		# call trace_hardirqs_on_caller
+	brasl	%r14,trace_hardirqs_on_caller
 #endif
 	.endm
 
 	.macro	TRACE_IRQS_OFF
 #ifdef CONFIG_TRACE_IRQFLAGS
 	basr	%r2,%r0
-	l	%r1,BASED(.Lc_hardirqs_off)
-	basr	%r14,%r1		# call trace_hardirqs_off_caller
+	brasl	%r14,trace_hardirqs_off_caller
 #endif
 	.endm
 
@@ -70,73 +69,104 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 #ifdef CONFIG_LOCKDEP
 	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
 	jz	.+10
-	l	%r1,BASED(.Lc_lockdep_sys_exit)
-	basr	%r14,%r1		# call lockdep_sys_exit
+	brasl	%r14,lockdep_sys_exit
+#endif
+	.endm
+
+	.macro LPP newpp
+#if IS_ENABLED(CONFIG_KVM)
+	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
+	jz	.+8
+	.insn	s,0xb2800000,\newpp
+#endif
+	.endm
+
+	.macro	HANDLE_SIE_INTERCEPT scratch,reason
+#if IS_ENABLED(CONFIG_KVM)
+	tmhh	%r8,0x0001		# interrupting from user ?
+	jnz	.+62
+	lgr	\scratch,%r9
+	slg	\scratch,BASED(.Lsie_critical)
+	clg	\scratch,BASED(.Lsie_critical_length)
+	.if	\reason==1
+	# Some program interrupts are suppressing (e.g. protection).
+	# We must also check the instruction after SIE in that case.
+	# do_protection_exception will rewind to .Lrewind_pad
+	jh	.+42
+	.else
+	jhe	.+42
+	.endif
+	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
+	LPP	__SF_EMPTY+16(%r15)		# set host id
+	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
+	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
+	larl	%r9,sie_exit			# skip forward to sie_exit
+	mvi	__SF_EMPTY+31(%r15),\reason	# set exit reason
 #endif
 	.endm
 
 	.macro	CHECK_STACK stacksize,savearea
 #ifdef CONFIG_CHECK_STACK
 	tml	%r15,\stacksize - CONFIG_STACK_GUARD
-	la	%r14,\savearea
+	lghi	%r14,\savearea
 	jz	stack_overflow
 #endif
 	.endm
 
 	.macro	SWITCH_ASYNC savearea,stack,shift
-	tmh	%r8,0x0001		# interrupting from user ?
+	tmhh	%r8,0x0001		# interrupting from user ?
 	jnz	1f
-	lr	%r14,%r9
-	sl	%r14,BASED(.Lc_critical_start)
-	cl	%r14,BASED(.Lc_critical_length)
+	lgr	%r14,%r9
+	slg	%r14,BASED(.Lcritical_start)
+	clg	%r14,BASED(.Lcritical_length)
 	jhe	0f
-	la	%r11,\savearea		# inside critical section, do cleanup
-	bras	%r14,cleanup_critical
-	tmh	%r8,0x0001		# retest problem state after cleanup
+	lghi	%r11,\savearea		# inside critical section, do cleanup
+	brasl	%r14,cleanup_critical
+	tmhh	%r8,0x0001		# retest problem state after cleanup
 	jnz	1f
-0:	l	%r14,\stack		# are we already on the target stack?
-	slr	%r14,%r15
-	sra	%r14,\shift
+0:	lg	%r14,\stack		# are we already on the target stack?
+	slgr	%r14,%r15
+	srag	%r14,%r14,\shift
 	jnz	1f
 	CHECK_STACK 1<<\shift,\savearea
-	ahi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
 	j	2f
-1:	l	%r15,\stack		# load target stack
+1:	lg	%r15,\stack		# load target stack
 2:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	.endm
 
-	.macro	ADD64 high,low,timer
-	al	\high,\timer
-	al	\low,4+\timer
-	brc	12,.+8
-	ahi	\high,1
-	.endm
-
-	.macro	SUB64 high,low,timer
-	sl	\high,\timer
-	sl	\low,4+\timer
-	brc	3,.+8
-	ahi	\high,-1
+	.macro UPDATE_VTIME scratch,enter_timer
+	lg	\scratch,__LC_EXIT_TIMER
+	slg	\scratch,\enter_timer
+	alg	\scratch,__LC_USER_TIMER
+	stg	\scratch,__LC_USER_TIMER
+	lg	\scratch,__LC_LAST_UPDATE_TIMER
+	slg	\scratch,__LC_EXIT_TIMER
+	alg	\scratch,__LC_SYSTEM_TIMER
+	stg	\scratch,__LC_SYSTEM_TIMER
+	mvc	__LC_LAST_UPDATE_TIMER(8),\enter_timer
 	.endm
 
-	.macro	UPDATE_VTIME high,low,enter_timer
-	lm	\high,\low,__LC_EXIT_TIMER
-	SUB64	\high,\low,\enter_timer
-	ADD64	\high,\low,__LC_USER_TIMER
-	stm	\high,\low,__LC_USER_TIMER
-	lm	\high,\low,__LC_LAST_UPDATE_TIMER
-	SUB64	\high,\low,__LC_EXIT_TIMER
-	ADD64	\high,\low,__LC_SYSTEM_TIMER
-	stm	\high,\low,__LC_SYSTEM_TIMER
-	mvc	__LC_LAST_UPDATE_TIMER(8),\enter_timer
+	.macro	LAST_BREAK scratch
+	srag	\scratch,%r10,23
+	jz	.+10
+	stg	%r10,__TI_last_break(%r12)
 	.endm
 
 	.macro REENABLE_IRQS
-	st	%r8,__LC_RETURN_PSW
+	stg	%r8,__LC_RETURN_PSW
 	ni	__LC_RETURN_PSW,0xbf
 	ssm	__LC_RETURN_PSW
 	.endm
 
+	.macro STCK savearea
+#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
+	.insn	s,0xb27c0000,\savearea		# store clock fast
+#else
+	.insn	s,0xb2050000,\savearea		# store clock
+#endif
+	.endm
+
 	.section .kprobes.text, "ax"
 
 /*
@@ -147,19 +177,19 @@ _PIF_WORK	= (_PIF_PER_TRAP)
  *  gpr2 = prev
  */
 ENTRY(__switch_to)
-	stm	%r6,%r15,__SF_GPRS(%r15)	# store gprs of prev task
-	st	%r15,__THREAD_ksp(%r2)		# store kernel stack of prev
-	l	%r4,__THREAD_info(%r2)		# get thread_info of prev
-	l	%r5,__THREAD_info(%r3)		# get thread_info of next
-	lr	%r15,%r5
-	ahi	%r15,STACK_INIT			# end of kernel stack of next
-	st	%r3,__LC_CURRENT		# store task struct of next
-	st	%r5,__LC_THREAD_INFO		# store thread info of next
-	st	%r15,__LC_KERNEL_STACK		# store end of kernel stack
+	stmg	%r6,%r15,__SF_GPRS(%r15)	# store gprs of prev task
+	stg	%r15,__THREAD_ksp(%r2)		# store kernel stack of prev
+	lg	%r4,__THREAD_info(%r2)		# get thread_info of prev
+	lg	%r5,__THREAD_info(%r3)		# get thread_info of next
+	lgr	%r15,%r5
+	aghi	%r15,STACK_INIT			# end of kernel stack of next
+	stg	%r3,__LC_CURRENT		# store task struct of next
+	stg	%r5,__LC_THREAD_INFO		# store thread info of next
+	stg	%r15,__LC_KERNEL_STACK		# store end of kernel stack
 	lctl	%c4,%c4,__TASK_pid(%r3)		# load pid to control reg. 4
-	mvc	__LC_CURRENT_PID(4,%r0),__TASK_pid(%r3)	# store pid of next
-	l	%r15,__THREAD_ksp(%r3)		# load kernel stack of next
-	lm	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
+	mvc	__LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next
+	lg	%r15,__THREAD_ksp(%r3)		# load kernel stack of next
+	lmg	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
 	br	%r14
 
 .L__critical_start:
@@ -170,75 +200,83 @@ ENTRY(__switch_to)
 
 ENTRY(system_call)
 	stpt	__LC_SYNC_ENTER_TIMER
-.Lsysc_stm:
-	stm	%r8,%r15,__LC_SAVE_AREA_SYNC
-	l	%r12,__LC_THREAD_INFO
-	l	%r13,__LC_SVC_NEW_PSW+4
-	lhi	%r14,_PIF_SYSCALL
+.Lsysc_stmg:
+	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
+	lg	%r10,__LC_LAST_BREAK
+	lg	%r12,__LC_THREAD_INFO
+	lghi	%r14,_PIF_SYSCALL
 .Lsysc_per:
-	l	%r15,__LC_KERNEL_STACK
+	lg	%r15,__LC_KERNEL_STACK
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)	# pointer to pt_regs
 .Lsysc_vtime:
-	UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER
-	stm	%r0,%r7,__PT_R0(%r11)
-	mvc	__PT_R8(32,%r11),__LC_SAVE_AREA_SYNC
-	mvc	__PT_PSW(8,%r11),__LC_SVC_OLD_PSW
+	UPDATE_VTIME %r13,__LC_SYNC_ENTER_TIMER
+	LAST_BREAK %r13
+	stmg	%r0,%r7,__PT_R0(%r11)
+	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
+	mvc	__PT_PSW(16,%r11),__LC_SVC_OLD_PSW
 	mvc	__PT_INT_CODE(4,%r11),__LC_SVC_ILC
-	st	%r14,__PT_FLAGS(%r11)
+	stg	%r14,__PT_FLAGS(%r11)
 .Lsysc_do_svc:
-	l	%r10,__TI_sysc_table(%r12)	# 31 bit system call table
-	lh	%r8,__PT_INT_CODE+2(%r11)
-	sla	%r8,2				# shift and test for svc0
+	lg	%r10,__TI_sysc_table(%r12)	# address of system call table
+	llgh	%r8,__PT_INT_CODE+2(%r11)
+	slag	%r8,%r8,2			# shift and test for svc 0
 	jnz	.Lsysc_nr_ok
 	# svc 0: system call number in %r1
-	cl	%r1,BASED(.Lnr_syscalls)
+	llgfr	%r1,%r1				# clear high word in r1
+	cghi	%r1,NR_syscalls
 	jnl	.Lsysc_nr_ok
 	sth	%r1,__PT_INT_CODE+2(%r11)
-	lr	%r8,%r1
-	sla	%r8,2
+	slag	%r8,%r1,2
 .Lsysc_nr_ok:
-	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
-	st	%r2,__PT_ORIG_GPR2(%r11)
-	st	%r7,STACK_FRAME_OVERHEAD(%r15)
-	l	%r9,0(%r8,%r10)			# get system call addr.
-	tm	__TI_flags+3(%r12),_TIF_TRACE
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	stg	%r2,__PT_ORIG_GPR2(%r11)
+	stg	%r7,STACK_FRAME_OVERHEAD(%r15)
+	lgf	%r9,0(%r8,%r10)			# get system call add.
+	tm	__TI_flags+7(%r12),_TIF_TRACE
 	jnz	.Lsysc_tracesys
 	basr	%r14,%r9			# call sys_xxxx
-	st	%r2,__PT_R2(%r11)		# store return value
+	stg	%r2,__PT_R2(%r11)		# store return value
 
 .Lsysc_return:
 	LOCKDEP_SYS_EXIT
 .Lsysc_tif:
 	tm	__PT_PSW+1(%r11),0x01		# returning to user ?
 	jno	.Lsysc_restore
-	tm	__PT_FLAGS+3(%r11),_PIF_WORK
+	tm	__PT_FLAGS+7(%r11),_PIF_WORK
 	jnz	.Lsysc_work
-	tm	__TI_flags+3(%r12),_TIF_WORK
-	jnz	.Lsysc_work			# check for thread work
-	tm	__LC_CPU_FLAGS+3,_CIF_WORK
+	tm	__TI_flags+7(%r12),_TIF_WORK
+	jnz	.Lsysc_work			# check for work
+	tm	__LC_CPU_FLAGS+7,_CIF_WORK
 	jnz	.Lsysc_work
 .Lsysc_restore:
-	mvc	__LC_RETURN_PSW(8),__PT_PSW(%r11)
+	lg	%r14,__LC_VDSO_PER_CPU
+	lmg	%r0,%r10,__PT_R0(%r11)
+	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r11)
 	stpt	__LC_EXIT_TIMER
-	lm	%r0,%r15,__PT_R0(%r11)
-	lpsw	__LC_RETURN_PSW
+	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
+	lmg	%r11,%r15,__PT_R11(%r11)
+	lpswe	__LC_RETURN_PSW
 .Lsysc_done:
 
 #
 # One of the work bits is on. Find out which one.
 #
 .Lsysc_work:
-	tm	__LC_CPU_FLAGS+3,_CIF_MCCK_PENDING
+	tm	__LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
 	jo	.Lsysc_mcck_pending
-	tm	__TI_flags+3(%r12),_TIF_NEED_RESCHED
+	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
 	jo	.Lsysc_reschedule
-	tm	__PT_FLAGS+3(%r11),_PIF_PER_TRAP
+#ifdef CONFIG_UPROBES
+	tm	__TI_flags+7(%r12),_TIF_UPROBE
+	jo	.Lsysc_uprobe_notify
+#endif
+	tm	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
 	jo	.Lsysc_singlestep
-	tm	__TI_flags+3(%r12),_TIF_SIGPENDING
+	tm	__TI_flags+7(%r12),_TIF_SIGPENDING
 	jo	.Lsysc_sigpending
-	tm	__TI_flags+3(%r12),_TIF_NOTIFY_RESUME
+	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
 	jo	.Lsysc_notify_resume
-	tm	__LC_CPU_FLAGS+3,_CIF_ASCE
+	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
 	jo	.Lsysc_uaccess
 	j	.Lsysc_return		# beware of critical section cleanup
 
@@ -246,109 +284,109 @@ ENTRY(system_call)
 # _TIF_NEED_RESCHED is set, call schedule
 #
 .Lsysc_reschedule:
-	l	%r1,BASED(.Lc_schedule)
-	la	%r14,BASED(.Lsysc_return)
-	br	%r1			# call schedule
+	larl	%r14,.Lsysc_return
+	jg	schedule
 
 #
 # _CIF_MCCK_PENDING is set, call handler
 #
 .Lsysc_mcck_pending:
-	l	%r1,BASED(.Lc_handle_mcck)
-	la	%r14,BASED(.Lsysc_return)
-	br	%r1			# TIF bit will be cleared by handler
+	larl	%r14,.Lsysc_return
+	jg	s390_handle_mcck	# TIF bit will be cleared by handler
 
 #
 # _CIF_ASCE is set, load user space asce
 #
 .Lsysc_uaccess:
-	ni	__LC_CPU_FLAGS+3,255-_CIF_ASCE
-	lctl	%c1,%c1,__LC_USER_ASCE	# load primary asce
+	ni	__LC_CPU_FLAGS+7,255-_CIF_ASCE
+	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 	j	.Lsysc_return
 
 #
 # _TIF_SIGPENDING is set, call do_signal
 #
 .Lsysc_sigpending:
-	lr	%r2,%r11		# pass pointer to pt_regs
-	l	%r1,BASED(.Lc_do_signal)
-	basr	%r14,%r1		# call do_signal
-	tm	__PT_FLAGS+3(%r11),_PIF_SYSCALL
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	brasl	%r14,do_signal
+	tm	__PT_FLAGS+7(%r11),_PIF_SYSCALL
 	jno	.Lsysc_return
-	lm	%r2,%r7,__PT_R2(%r11)	# load svc arguments
-	l	%r10,__TI_sysc_table(%r12)	# 31 bit system call table
-	xr	%r8,%r8			# svc 0 returns -ENOSYS
-	clc	__PT_INT_CODE+2(2,%r11),BASED(.Lnr_syscalls+2)
+	lmg	%r2,%r7,__PT_R2(%r11)	# load svc arguments
+	lg	%r10,__TI_sysc_table(%r12)	# address of system call table
+	lghi	%r8,0			# svc 0 returns -ENOSYS
+	llgh	%r1,__PT_INT_CODE+2(%r11)	# load new svc number
+	cghi	%r1,NR_syscalls
 	jnl	.Lsysc_nr_ok		# invalid svc number -> do svc 0
-	lh	%r8,__PT_INT_CODE+2(%r11)	# load new svc number
-	sla	%r8,2
+	slag	%r8,%r1,2
 	j	.Lsysc_nr_ok		# restart svc
 
 #
 # _TIF_NOTIFY_RESUME is set, call do_notify_resume
 #
 .Lsysc_notify_resume:
-	lr	%r2,%r11		# pass pointer to pt_regs
-	l	%r1,BASED(.Lc_do_notify_resume)
-	la	%r14,BASED(.Lsysc_return)
-	br	%r1			# call do_notify_resume
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	larl	%r14,.Lsysc_return
+	jg	do_notify_resume
+
+#
+# _TIF_UPROBE is set, call uprobe_notify_resume
+#
+#ifdef CONFIG_UPROBES
+.Lsysc_uprobe_notify:
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	larl	%r14,.Lsysc_return
+	jg	uprobe_notify_resume
+#endif
 
 #
 # _PIF_PER_TRAP is set, call do_per_trap
 #
 .Lsysc_singlestep:
-	ni	__PT_FLAGS+3(%r11),255-_PIF_PER_TRAP
-	lr	%r2,%r11		# pass pointer to pt_regs
-	l	%r1,BASED(.Lc_do_per_trap)
-	la	%r14,BASED(.Lsysc_return)
-	br	%r1			# call do_per_trap
+	ni	__PT_FLAGS+7(%r11),255-_PIF_PER_TRAP
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	larl	%r14,.Lsysc_return
+	jg	do_per_trap
 
 #
 # call tracehook_report_syscall_entry/tracehook_report_syscall_exit before
 # and after the system call
 #
 .Lsysc_tracesys:
-	l	%r1,BASED(.Lc_trace_enter)
-	lr	%r2,%r11		# pass pointer to pt_regs
+	lgr	%r2,%r11		# pass pointer to pt_regs
 	la	%r3,0
-	xr	%r0,%r0
-	icm	%r0,3,__PT_INT_CODE+2(%r11)
-	st	%r0,__PT_R2(%r11)
-	basr	%r14,%r1		# call do_syscall_trace_enter
-	cl	%r2,BASED(.Lnr_syscalls)
-	jnl	.Lsysc_tracenogo
-	lr	%r8,%r2
-	sll	%r8,2
-	l	%r9,0(%r8,%r10)
+	llgh	%r0,__PT_INT_CODE+2(%r11)
+	stg	%r0,__PT_R2(%r11)
+	brasl	%r14,do_syscall_trace_enter
+	lghi	%r0,NR_syscalls
+	clgr	%r0,%r2
+	jnh	.Lsysc_tracenogo
+	sllg	%r8,%r2,2
+	lgf	%r9,0(%r8,%r10)
 .Lsysc_tracego:
-	lm	%r3,%r7,__PT_R3(%r11)
-	st	%r7,STACK_FRAME_OVERHEAD(%r15)
-	l	%r2,__PT_ORIG_GPR2(%r11)
+	lmg	%r3,%r7,__PT_R3(%r11)
+	stg	%r7,STACK_FRAME_OVERHEAD(%r15)
+	lg	%r2,__PT_ORIG_GPR2(%r11)
 	basr	%r14,%r9		# call sys_xxx
-	st	%r2,__PT_R2(%r11)	# store return value
+	stg	%r2,__PT_R2(%r11)	# store return value
 .Lsysc_tracenogo:
-	tm	__TI_flags+3(%r12),_TIF_TRACE
+	tm	__TI_flags+7(%r12),_TIF_TRACE
 	jz	.Lsysc_return
-	l	%r1,BASED(.Lc_trace_exit)
-	lr	%r2,%r11		# pass pointer to pt_regs
-	la	%r14,BASED(.Lsysc_return)
-	br	%r1			# call do_syscall_trace_exit
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	larl	%r14,.Lsysc_return
+	jg	do_syscall_trace_exit
 
 #
 # a new process exits the kernel with ret_from_fork
 #
 ENTRY(ret_from_fork)
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)
-	l	%r12,__LC_THREAD_INFO
-	l	%r13,__LC_SVC_NEW_PSW+4
-	l	%r1,BASED(.Lc_schedule_tail)
-	basr	%r14,%r1		# call schedule_tail
+	lg	%r12,__LC_THREAD_INFO
+	brasl	%r14,schedule_tail
 	TRACE_IRQS_ON
 	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
 	tm	__PT_PSW+1(%r11),0x01	# forking a kernel thread ?
 	jne	.Lsysc_tracenogo
 	# it's a kernel thread
-	lm	%r9,%r10,__PT_R9(%r11)	# load gprs
+	lmg	%r9,%r10,__PT_R9(%r11)	# load gprs
 ENTRY(kernel_thread_starter)
 	la	%r2,0(%r10)
 	basr	%r14,%r9
@@ -360,46 +398,54 @@ ENTRY(kernel_thread_starter)
 
 ENTRY(pgm_check_handler)
 	stpt	__LC_SYNC_ENTER_TIMER
-	stm	%r8,%r15,__LC_SAVE_AREA_SYNC
-	l	%r12,__LC_THREAD_INFO
-	l	%r13,__LC_SVC_NEW_PSW+4
-	lm	%r8,%r9,__LC_PGM_OLD_PSW
-	tmh	%r8,0x0001		# test problem state bit
+	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
+	lg	%r10,__LC_LAST_BREAK
+	lg	%r12,__LC_THREAD_INFO
+	larl	%r13,system_call
+	lmg	%r8,%r9,__LC_PGM_OLD_PSW
+	HANDLE_SIE_INTERCEPT %r14,1
+	tmhh	%r8,0x0001		# test problem state bit
 	jnz	1f			# -> fault in user space
-	tmh	%r8,0x4000		# PER bit set in old PSW ?
+	tmhh	%r8,0x4000		# PER bit set in old PSW ?
 	jnz	0f			# -> enabled, can't be a double fault
 	tm	__LC_PGM_ILC+3,0x80	# check for per exception
 	jnz	.Lpgm_svcper		# -> single stepped svc
 0:	CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
-	ahi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
 	j	2f
-1:	UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
-	l	%r15,__LC_KERNEL_STACK
+1:	UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER
+	LAST_BREAK %r14
+	lg	%r15,__LC_KERNEL_STACK
+	lg	%r14,__TI_task(%r12)
+	lghi	%r13,__LC_PGM_TDB
+	tm	__LC_PGM_ILC+2,0x02	# check for transaction abort
+	jz	2f
+	mvc	__THREAD_trap_tdb(256,%r14),0(%r13)
 2:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
-	stm	%r0,%r7,__PT_R0(%r11)
-	mvc	__PT_R8(32,%r11),__LC_SAVE_AREA_SYNC
-	stm	%r8,%r9,__PT_PSW(%r11)
+	stmg	%r0,%r7,__PT_R0(%r11)
+	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
+	stmg	%r8,%r9,__PT_PSW(%r11)
 	mvc	__PT_INT_CODE(4,%r11),__LC_PGM_ILC
-	mvc	__PT_INT_PARM_LONG(4,%r11),__LC_TRANS_EXC_CODE
-	xc	__PT_FLAGS(4,%r11),__PT_FLAGS(%r11)
+	mvc	__PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE
+	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+	stg	%r10,__PT_ARGS(%r11)
 	tm	__LC_PGM_ILC+3,0x80	# check for per exception
 	jz	0f
-	l	%r1,__TI_task(%r12)
-	tmh	%r8,0x0001		# kernel per event ?
+	tmhh	%r8,0x0001		# kernel per event ?
 	jz	.Lpgm_kprobe
-	oi	__PT_FLAGS+3(%r11),_PIF_PER_TRAP
-	mvc	__THREAD_per_address(4,%r1),__LC_PER_ADDRESS
-	mvc	__THREAD_per_cause(2,%r1),__LC_PER_CODE
-	mvc	__THREAD_per_paid(1,%r1),__LC_PER_ACCESS_ID
+	oi	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
+	mvc	__THREAD_per_address(8,%r14),__LC_PER_ADDRESS
+	mvc	__THREAD_per_cause(2,%r14),__LC_PER_CODE
+	mvc	__THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
 0:	REENABLE_IRQS
-	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
-	l	%r1,BASED(.Lc_jump_table)
-	la	%r10,0x7f
-	n	%r10,__PT_INT_CODE(%r11)
-	je	.Lsysc_return
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	larl	%r1,pgm_check_table
+	llgh	%r10,__PT_INT_CODE+2(%r11)
+	nill	%r10,0x007f
 	sll	%r10,2
-	l	%r1,0(%r10,%r1)		# load address of handler routine
-	lr	%r2,%r11		# pass pointer to pt_regs
+	je	.Lsysc_return
+	lgf	%r1,0(%r10,%r1)		# load address of handler routine
+	lgr	%r2,%r11		# pass pointer to pt_regs
 	basr	%r14,%r1		# branch to interrupt-handler
 	j	.Lsysc_return
 
@@ -408,54 +454,55 @@ ENTRY(pgm_check_handler)
 #
 .Lpgm_kprobe:
 	REENABLE_IRQS
-	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
-	l	%r1,BASED(.Lc_do_per_trap)
-	lr	%r2,%r11		# pass pointer to pt_regs
-	basr	%r14,%r1		# call do_per_trap
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	brasl	%r14,do_per_trap
 	j	.Lsysc_return
 
 #
 # single stepped system call
 #
 .Lpgm_svcper:
-	mvc	__LC_RETURN_PSW(4),__LC_SVC_NEW_PSW
-	mvc	__LC_RETURN_PSW+4(4),BASED(.Lc_sysc_per)
-	lhi	%r14,_PIF_SYSCALL | _PIF_PER_TRAP
-	lpsw	__LC_RETURN_PSW		# branch to .Lsysc_per and enable irqs
+	mvc	__LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
+	larl	%r14,.Lsysc_per
+	stg	%r14,__LC_RETURN_PSW+8
+	lghi	%r14,_PIF_SYSCALL | _PIF_PER_TRAP
+	lpswe	__LC_RETURN_PSW		# branch to .Lsysc_per and enable irqs
 
 /*
  * IO interrupt handler routine
  */
-
 ENTRY(io_int_handler)
-	stck	__LC_INT_CLOCK
+	STCK	__LC_INT_CLOCK
 	stpt	__LC_ASYNC_ENTER_TIMER
-	stm	%r8,%r15,__LC_SAVE_AREA_ASYNC
-	l	%r12,__LC_THREAD_INFO
-	l	%r13,__LC_SVC_NEW_PSW+4
-	lm	%r8,%r9,__LC_IO_OLD_PSW
-	tmh	%r8,0x0001		# interrupting from user ?
+	stmg	%r8,%r15,__LC_SAVE_AREA_ASYNC
+	lg	%r10,__LC_LAST_BREAK
+	lg	%r12,__LC_THREAD_INFO
+	larl	%r13,system_call
+	lmg	%r8,%r9,__LC_IO_OLD_PSW
+	HANDLE_SIE_INTERCEPT %r14,2
+	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
+	tmhh	%r8,0x0001		# interrupting from user?
 	jz	.Lio_skip
-	UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER
+	UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
+	LAST_BREAK %r14
 .Lio_skip:
-	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
-	stm	%r0,%r7,__PT_R0(%r11)
-	mvc	__PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC
-	stm	%r8,%r9,__PT_PSW(%r11)
+	stmg	%r0,%r7,__PT_R0(%r11)
+	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
+	stmg	%r8,%r9,__PT_PSW(%r11)
 	mvc	__PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
-	xc	__PT_FLAGS(4,%r11),__PT_FLAGS(%r11)
+	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
 	TRACE_IRQS_OFF
-	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 .Lio_loop:
-	l	%r1,BASED(.Lc_do_IRQ)
-	lr	%r2,%r11		# pass pointer to pt_regs
-	lhi	%r3,IO_INTERRUPT
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	lghi	%r3,IO_INTERRUPT
 	tm	__PT_INT_CODE+8(%r11),0x80	# adapter interrupt ?
 	jz	.Lio_call
-	lhi	%r3,THIN_INTERRUPT
+	lghi	%r3,THIN_INTERRUPT
 .Lio_call:
-	basr	%r14,%r1		# call do_IRQ
-	tm	__LC_MACHINE_FLAGS+2,0x10	# MACHINE_FLAG_LPAR
+	brasl	%r14,do_IRQ
+	tm	__LC_MACHINE_FLAGS+6,0x10	# MACHINE_FLAG_LPAR
 	jz	.Lio_return
 	tpi	0
 	jz	.Lio_return
@@ -465,21 +512,26 @@ ENTRY(io_int_handler)
 	LOCKDEP_SYS_EXIT
 	TRACE_IRQS_ON
 .Lio_tif:
-	tm	__TI_flags+3(%r12),_TIF_WORK
+	tm	__TI_flags+7(%r12),_TIF_WORK
 	jnz	.Lio_work		# there is work to do (signals etc.)
-	tm	__LC_CPU_FLAGS+3,_CIF_WORK
+	tm	__LC_CPU_FLAGS+7,_CIF_WORK
 	jnz	.Lio_work
 .Lio_restore:
-	mvc	__LC_RETURN_PSW(8),__PT_PSW(%r11)
+	lg	%r14,__LC_VDSO_PER_CPU
+	lmg	%r0,%r10,__PT_R0(%r11)
+	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r11)
 	stpt	__LC_EXIT_TIMER
-	lm	%r0,%r15,__PT_R0(%r11)
-	lpsw	__LC_RETURN_PSW
+	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
+	lmg	%r11,%r15,__PT_R11(%r11)
+	lpswe	__LC_RETURN_PSW
 .Lio_done:
 
 #
 # There is work todo, find out in which context we have been interrupted:
 # 1) if we return to user space we can do all _TIF_WORK work
-# 2) if we return to kernel code and preemptive scheduling is enabled check
+# 2) if we return to kernel code and kvm is enabled check if we need to
+#    modify the psw to leave SIE
+# 3) if we return to kernel code and preemptive scheduling is enabled check
 #    the preemption counter and if it is zero call preempt_schedule_irq
 # Before any work can be done, a switch to the kernel stack is required.
 #
@@ -489,21 +541,20 @@ ENTRY(io_int_handler)
 #ifdef CONFIG_PREEMPT
 	# check for preemptive scheduling
 	icm	%r0,15,__TI_precount(%r12)
-	jnz	.Lio_restore		# preemption disabled
-	tm	__TI_flags+3(%r12),_TIF_NEED_RESCHED
+	jnz	.Lio_restore		# preemption is disabled
+	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
 	jno	.Lio_restore
 	# switch to kernel stack
-	l	%r1,__PT_R15(%r11)
-	ahi	%r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+	lg	%r1,__PT_R15(%r11)
+	aghi	%r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
 	mvc	STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
-	xc	__SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1)
+	xc	__SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
 	la	%r11,STACK_FRAME_OVERHEAD(%r1)
-	lr	%r15,%r1
+	lgr	%r15,%r1
 	# TRACE_IRQS_ON already done at .Lio_return, call
 	# TRACE_IRQS_OFF to keep things symmetrical
 	TRACE_IRQS_OFF
-	l	%r1,BASED(.Lc_preempt_irq)
-	basr	%r14,%r1		# call preempt_schedule_irq
+	brasl	%r14,preempt_schedule_irq
 	j	.Lio_return
 #else
 	j	.Lio_restore
@@ -513,25 +564,25 @@ ENTRY(io_int_handler)
 # Need to do work before returning to userspace, switch to kernel stack
 #
 .Lio_work_user:
-	l	%r1,__LC_KERNEL_STACK
+	lg	%r1,__LC_KERNEL_STACK
 	mvc	STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
-	xc	__SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1)
+	xc	__SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
 	la	%r11,STACK_FRAME_OVERHEAD(%r1)
-	lr	%r15,%r1
+	lgr	%r15,%r1
 
 #
 # One of the work bits is on. Find out which one.
 #
 .Lio_work_tif:
-	tm	__LC_CPU_FLAGS+3(%r12),_CIF_MCCK_PENDING
+	tm	__LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
 	jo	.Lio_mcck_pending
-	tm	__TI_flags+3(%r12),_TIF_NEED_RESCHED
+	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
 	jo	.Lio_reschedule
-	tm	__TI_flags+3(%r12),_TIF_SIGPENDING
+	tm	__TI_flags+7(%r12),_TIF_SIGPENDING
 	jo	.Lio_sigpending
-	tm	__TI_flags+3(%r12),_TIF_NOTIFY_RESUME
+	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
 	jo	.Lio_notify_resume
-	tm	__LC_CPU_FLAGS+3,_CIF_ASCE
+	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
 	jo	.Lio_uaccess
 	j	.Lio_return		# beware of critical section cleanup
 
@@ -540,8 +591,7 @@ ENTRY(io_int_handler)
 #
 .Lio_mcck_pending:
 	# TRACE_IRQS_ON already done at .Lio_return
-	l	%r1,BASED(.Lc_handle_mcck)
-	basr	%r14,%r1		# TIF bit will be cleared by handler
+	brasl	%r14,s390_handle_mcck	# TIF bit will be cleared by handler
 	TRACE_IRQS_OFF
 	j	.Lio_return
 
@@ -549,8 +599,8 @@ ENTRY(io_int_handler)
 # _CIF_ASCE is set, load user space asce
 #
 .Lio_uaccess:
-	ni	__LC_CPU_FLAGS+3,255-_CIF_ASCE
-	lctl	%c1,%c1,__LC_USER_ASCE	# load primary asce
+	ni	__LC_CPU_FLAGS+7,255-_CIF_ASCE
+	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 	j	.Lio_return
 
 #
@@ -558,35 +608,32 @@ ENTRY(io_int_handler)
 #
 .Lio_reschedule:
 	# TRACE_IRQS_ON already done at .Lio_return
-	l	%r1,BASED(.Lc_schedule)
 	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
-	basr	%r14,%r1		# call scheduler
+	brasl	%r14,schedule		# call scheduler
 	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
 	TRACE_IRQS_OFF
 	j	.Lio_return
 
 #
-# _TIF_SIGPENDING is set, call do_signal
+# _TIF_SIGPENDING or is set, call do_signal
 #
 .Lio_sigpending:
 	# TRACE_IRQS_ON already done at .Lio_return
-	l	%r1,BASED(.Lc_do_signal)
 	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
-	lr	%r2,%r11		# pass pointer to pt_regs
-	basr	%r14,%r1		# call do_signal
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	brasl	%r14,do_signal
 	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
 	TRACE_IRQS_OFF
 	j	.Lio_return
 
 #
-# _TIF_SIGPENDING is set, call do_signal
+# _TIF_NOTIFY_RESUME or is set, call do_notify_resume
 #
 .Lio_notify_resume:
 	# TRACE_IRQS_ON already done at .Lio_return
-	l	%r1,BASED(.Lc_do_notify_resume)
 	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
-	lr	%r2,%r11		# pass pointer to pt_regs
-	basr	%r14,%r1		# call do_notify_resume
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	brasl	%r14,do_notify_resume
 	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
 	TRACE_IRQS_OFF
 	j	.Lio_return
@@ -594,45 +641,47 @@ ENTRY(io_int_handler)
 /*
  * External interrupt handler routine
  */
-
 ENTRY(ext_int_handler)
-	stck	__LC_INT_CLOCK
+	STCK	__LC_INT_CLOCK
 	stpt	__LC_ASYNC_ENTER_TIMER
-	stm	%r8,%r15,__LC_SAVE_AREA_ASYNC
-	l	%r12,__LC_THREAD_INFO
-	l	%r13,__LC_SVC_NEW_PSW+4
-	lm	%r8,%r9,__LC_EXT_OLD_PSW
-	tmh	%r8,0x0001		# interrupting from user ?
+	stmg	%r8,%r15,__LC_SAVE_AREA_ASYNC
+	lg	%r10,__LC_LAST_BREAK
+	lg	%r12,__LC_THREAD_INFO
+	larl	%r13,system_call
+	lmg	%r8,%r9,__LC_EXT_OLD_PSW
+	HANDLE_SIE_INTERCEPT %r14,3
+	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
+	tmhh	%r8,0x0001		# interrupting from user ?
 	jz	.Lext_skip
-	UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER
+	UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
+	LAST_BREAK %r14
 .Lext_skip:
-	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
-	stm	%r0,%r7,__PT_R0(%r11)
-	mvc	__PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC
-	stm	%r8,%r9,__PT_PSW(%r11)
+	stmg	%r0,%r7,__PT_R0(%r11)
+	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
+	stmg	%r8,%r9,__PT_PSW(%r11)
+	lghi	%r1,__LC_EXT_PARAMS2
 	mvc	__PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
 	mvc	__PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
-	xc	__PT_FLAGS(4,%r11),__PT_FLAGS(%r11)
+	mvc	__PT_INT_PARM_LONG(8,%r11),0(%r1)
+	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
 	TRACE_IRQS_OFF
-	l	%r1,BASED(.Lc_do_IRQ)
-	lr	%r2,%r11		# pass pointer to pt_regs
-	lhi	%r3,EXT_INTERRUPT
-	basr	%r14,%r1		# call do_IRQ
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	lghi	%r3,EXT_INTERRUPT
+	brasl	%r14,do_IRQ
 	j	.Lio_return
 
 /*
  * Load idle PSW. The second "half" of this function is in .Lcleanup_idle.
  */
 ENTRY(psw_idle)
-	st	%r3,__SF_EMPTY(%r15)
-	basr	%r1,0
-	la	%r1,.Lpsw_idle_lpsw+4-.(%r1)
-	st	%r1,__SF_EMPTY+4(%r15)
-	oi	__SF_EMPTY+4(%r15),0x80
-	stck	__CLOCK_IDLE_ENTER(%r2)
+	stg	%r3,__SF_EMPTY(%r15)
+	larl	%r1,.Lpsw_idle_lpsw+4
+	stg	%r1,__SF_EMPTY+8(%r15)
+	STCK	__CLOCK_IDLE_ENTER(%r2)
 	stpt	__TIMER_IDLE_ENTER(%r2)
 .Lpsw_idle_lpsw:
-	lpsw	__SF_EMPTY(%r15)
+	lpswe	__SF_EMPTY(%r15)
 	br	%r14
 .Lpsw_idle_end:
 
@@ -641,17 +690,19 @@ ENTRY(psw_idle)
 /*
  * Machine check handler routines
  */
-
 ENTRY(mcck_int_handler)
-	stck	__LC_MCCK_CLOCK
-	spt	__LC_CPU_TIMER_SAVE_AREA	# revalidate cpu timer
-	lm	%r0,%r15,__LC_GPREGS_SAVE_AREA	# revalidate gprs
-	l	%r12,__LC_THREAD_INFO
-	l	%r13,__LC_SVC_NEW_PSW+4
-	lm	%r8,%r9,__LC_MCK_OLD_PSW
+	STCK	__LC_MCCK_CLOCK
+	la	%r1,4095		# revalidate r1
+	spt	__LC_CPU_TIMER_SAVE_AREA-4095(%r1)	# revalidate cpu timer
+	lmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
+	lg	%r10,__LC_LAST_BREAK
+	lg	%r12,__LC_THREAD_INFO
+	larl	%r13,system_call
+	lmg	%r8,%r9,__LC_MCK_OLD_PSW
+	HANDLE_SIE_INTERCEPT %r14,4
 	tm	__LC_MCCK_CODE,0x80	# system damage?
 	jo	.Lmcck_panic		# yes -> rest of mcck code invalid
-	la	%r14,__LC_CPU_TIMER_SAVE_AREA
+	lghi	%r14,__LC_CPU_TIMER_SAVE_AREA
 	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
 	tm	__LC_MCCK_CODE+5,0x02	# stored cpu timer value valid?
 	jo	3f
@@ -669,76 +720,76 @@ ENTRY(mcck_int_handler)
 	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
 3:	tm	__LC_MCCK_CODE+2,0x09	# mwp + ia of old psw valid?
 	jno	.Lmcck_panic		# no -> skip cleanup critical
+	SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_PANIC_STACK,PAGE_SHIFT
 	tm	%r8,0x0001		# interrupting from user ?
 	jz	.Lmcck_skip
-	UPDATE_VTIME %r14,%r15,__LC_MCCK_ENTER_TIMER
+	UPDATE_VTIME %r14,__LC_MCCK_ENTER_TIMER
+	LAST_BREAK %r14
 .Lmcck_skip:
-	SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+32,__LC_PANIC_STACK,PAGE_SHIFT
-	stm	%r0,%r7,__PT_R0(%r11)
-	mvc	__PT_R8(32,%r11),__LC_GPREGS_SAVE_AREA+32
-	stm	%r8,%r9,__PT_PSW(%r11)
-	xc	__PT_FLAGS(4,%r11),__PT_FLAGS(%r11)
-	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
-	l	%r1,BASED(.Lc_do_machine_check)
-	lr	%r2,%r11		# pass pointer to pt_regs
-	basr	%r14,%r1		# call s390_do_machine_check
+	lghi	%r14,__LC_GPREGS_SAVE_AREA+64
+	stmg	%r0,%r7,__PT_R0(%r11)
+	mvc	__PT_R8(64,%r11),0(%r14)
+	stmg	%r8,%r9,__PT_PSW(%r11)
+	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	brasl	%r14,s390_do_machine_check
 	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
 	jno	.Lmcck_return
-	l	%r1,__LC_KERNEL_STACK	# switch to kernel stack
+	lg	%r1,__LC_KERNEL_STACK	# switch to kernel stack
 	mvc	STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
-	xc	__SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1)
-	la	%r11,STACK_FRAME_OVERHEAD(%r15)
-	lr	%r15,%r1
+	xc	__SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
+	la	%r11,STACK_FRAME_OVERHEAD(%r1)
+	lgr	%r15,%r1
 	ssm	__LC_PGM_NEW_PSW	# turn dat on, keep irqs off
-	tm	__LC_CPU_FLAGS+3,_CIF_MCCK_PENDING
+	tm	__LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
 	jno	.Lmcck_return
 	TRACE_IRQS_OFF
-	l	%r1,BASED(.Lc_handle_mcck)
-	basr	%r14,%r1		# call s390_handle_mcck
+	brasl	%r14,s390_handle_mcck
 	TRACE_IRQS_ON
 .Lmcck_return:
-	mvc	__LC_RETURN_MCCK_PSW(8),__PT_PSW(%r11) # move return PSW
+	lg	%r14,__LC_VDSO_PER_CPU
+	lmg	%r0,%r10,__PT_R0(%r11)
+	mvc	__LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
 	tm	__LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
 	jno	0f
-	lm	%r0,%r15,__PT_R0(%r11)
 	stpt	__LC_EXIT_TIMER
-	lpsw	__LC_RETURN_MCCK_PSW
-0:	lm	%r0,%r15,__PT_R0(%r11)
-	lpsw	__LC_RETURN_MCCK_PSW
+	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
+0:	lmg	%r11,%r15,__PT_R11(%r11)
+	lpswe	__LC_RETURN_MCCK_PSW
 
 .Lmcck_panic:
-	l	%r14,__LC_PANIC_STACK
-	slr	%r14,%r15
-	sra	%r14,PAGE_SHIFT
+	lg	%r14,__LC_PANIC_STACK
+	slgr	%r14,%r15
+	srag	%r14,%r14,PAGE_SHIFT
 	jz	0f
-	l	%r15,__LC_PANIC_STACK
-	j	.Lmcck_skip
-0:	ahi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+	lg	%r15,__LC_PANIC_STACK
+0:	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
 	j	.Lmcck_skip
 
 #
 # PSW restart interrupt handler
 #
 ENTRY(restart_int_handler)
-	st	%r15,__LC_SAVE_AREA_RESTART
-	l	%r15,__LC_RESTART_STACK
-	ahi	%r15,-__PT_SIZE			# create pt_regs on stack
+	stg	%r15,__LC_SAVE_AREA_RESTART
+	lg	%r15,__LC_RESTART_STACK
+	aghi	%r15,-__PT_SIZE			# create pt_regs on stack
 	xc	0(__PT_SIZE,%r15),0(%r15)
-	stm	%r0,%r14,__PT_R0(%r15)
-	mvc	__PT_R15(4,%r15),__LC_SAVE_AREA_RESTART
-	mvc	__PT_PSW(8,%r15),__LC_RST_OLD_PSW # store restart old psw
-	ahi	%r15,-STACK_FRAME_OVERHEAD	# create stack frame on stack
+	stmg	%r0,%r14,__PT_R0(%r15)
+	mvc	__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
+	mvc	__PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw
+	aghi	%r15,-STACK_FRAME_OVERHEAD	# create stack frame on stack
 	xc	0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
-	l	%r1,__LC_RESTART_FN		# load fn, parm & source cpu
-	l	%r2,__LC_RESTART_DATA
-	l	%r3,__LC_RESTART_SOURCE
-	ltr	%r3,%r3				# test source cpu address
+	lg	%r1,__LC_RESTART_FN		# load fn, parm & source cpu
+	lg	%r2,__LC_RESTART_DATA
+	lg	%r3,__LC_RESTART_SOURCE
+	ltgr	%r3,%r3				# test source cpu address
 	jm	1f				# negative -> skip source stop
 0:	sigp	%r4,%r3,SIGP_SENSE		# sigp sense to source cpu
 	brc	10,0b				# wait for status stored
 1:	basr	%r14,%r1			# call function
 	stap	__SF_EMPTY(%r15)		# store cpu address
-	lh	%r3,__SF_EMPTY(%r15)
+	llgh	%r3,__SF_EMPTY(%r15)
 2:	sigp	%r4,%r3,SIGP_STOP		# sigp stop to current cpu
 	brc	2,2b
 3:	j	3b
@@ -752,215 +803,257 @@ ENTRY(restart_int_handler)
  * Setup a pt_regs so that show_trace can provide a good call trace.
  */
 stack_overflow:
-	l	%r15,__LC_PANIC_STACK	# change to panic stack
+	lg	%r15,__LC_PANIC_STACK	# change to panic stack
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)
-	stm	%r0,%r7,__PT_R0(%r11)
-	stm	%r8,%r9,__PT_PSW(%r11)
-	mvc	__PT_R8(32,%r11),0(%r14)
-	l	%r1,BASED(1f)
-	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
-	lr	%r2,%r11		# pass pointer to pt_regs
-	br	%r1			# branch to kernel_stack_overflow
-1:	.long	kernel_stack_overflow
+	stmg	%r0,%r7,__PT_R0(%r11)
+	stmg	%r8,%r9,__PT_PSW(%r11)
+	mvc	__PT_R8(64,%r11),0(%r14)
+	stg	%r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	jg	kernel_stack_overflow
 #endif
 
+	.align	8
 .Lcleanup_table:
-	.long	system_call + 0x80000000
-	.long	.Lsysc_do_svc + 0x80000000
-	.long	.Lsysc_tif + 0x80000000
-	.long	.Lsysc_restore + 0x80000000
-	.long	.Lsysc_done + 0x80000000
-	.long	.Lio_tif + 0x80000000
-	.long	.Lio_restore + 0x80000000
-	.long	.Lio_done + 0x80000000
-	.long	psw_idle + 0x80000000
-	.long	.Lpsw_idle_end + 0x80000000
+	.quad	system_call
+	.quad	.Lsysc_do_svc
+	.quad	.Lsysc_tif
+	.quad	.Lsysc_restore
+	.quad	.Lsysc_done
+	.quad	.Lio_tif
+	.quad	.Lio_restore
+	.quad	.Lio_done
+	.quad	psw_idle
+	.quad	.Lpsw_idle_end
 
 cleanup_critical:
-	cl	%r9,BASED(.Lcleanup_table)	# system_call
+	clg	%r9,BASED(.Lcleanup_table)	# system_call
 	jl	0f
-	cl	%r9,BASED(.Lcleanup_table+4)	# .Lsysc_do_svc
+	clg	%r9,BASED(.Lcleanup_table+8)	# .Lsysc_do_svc
 	jl	.Lcleanup_system_call
-	cl	%r9,BASED(.Lcleanup_table+8)	# .Lsysc_tif
+	clg	%r9,BASED(.Lcleanup_table+16)	# .Lsysc_tif
 	jl	0f
-	cl	%r9,BASED(.Lcleanup_table+12)	# .Lsysc_restore
+	clg	%r9,BASED(.Lcleanup_table+24)	# .Lsysc_restore
 	jl	.Lcleanup_sysc_tif
-	cl	%r9,BASED(.Lcleanup_table+16)	# .Lsysc_done
+	clg	%r9,BASED(.Lcleanup_table+32)	# .Lsysc_done
 	jl	.Lcleanup_sysc_restore
-	cl	%r9,BASED(.Lcleanup_table+20)	# .Lio_tif
+	clg	%r9,BASED(.Lcleanup_table+40)	# .Lio_tif
 	jl	0f
-	cl	%r9,BASED(.Lcleanup_table+24)	# .Lio_restore
+	clg	%r9,BASED(.Lcleanup_table+48)	# .Lio_restore
 	jl	.Lcleanup_io_tif
-	cl	%r9,BASED(.Lcleanup_table+28)	# .Lio_done
+	clg	%r9,BASED(.Lcleanup_table+56)	# .Lio_done
 	jl	.Lcleanup_io_restore
-	cl	%r9,BASED(.Lcleanup_table+32)	# psw_idle
+	clg	%r9,BASED(.Lcleanup_table+64)	# psw_idle
 	jl	0f
-	cl	%r9,BASED(.Lcleanup_table+36)	# .Lpsw_idle_end
+	clg	%r9,BASED(.Lcleanup_table+72)	# .Lpsw_idle_end
 	jl	.Lcleanup_idle
 0:	br	%r14
 
+
 .Lcleanup_system_call:
 	# check if stpt has been executed
-	cl	%r9,BASED(.Lcleanup_system_call_insn)
+	clg	%r9,BASED(.Lcleanup_system_call_insn)
 	jh	0f
 	mvc	__LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER
-	chi	%r11,__LC_SAVE_AREA_ASYNC
+	cghi	%r11,__LC_SAVE_AREA_ASYNC
 	je	0f
 	mvc	__LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER
-0:	# check if stm has been executed
-	cl	%r9,BASED(.Lcleanup_system_call_insn+4)
+0:	# check if stmg has been executed
+	clg	%r9,BASED(.Lcleanup_system_call_insn+8)
 	jh	0f
-	mvc	__LC_SAVE_AREA_SYNC(32),0(%r11)
-0:	# set up saved registers r12, and r13
-	st	%r12,16(%r11)		# r12 thread-info pointer
-	st	%r13,20(%r11)		# r13 literal-pool pointer
-	# check if the user time calculation has been done
-	cl	%r9,BASED(.Lcleanup_system_call_insn+8)
+	mvc	__LC_SAVE_AREA_SYNC(64),0(%r11)
+0:	# check if base register setup + TIF bit load has been done
+	clg	%r9,BASED(.Lcleanup_system_call_insn+16)
+	jhe	0f
+	# set up saved registers r10 and r12
+	stg	%r10,16(%r11)		# r10 last break
+	stg	%r12,32(%r11)		# r12 thread-info pointer
+0:	# check if the user time update has been done
+	clg	%r9,BASED(.Lcleanup_system_call_insn+24)
 	jh	0f
-	l	%r10,__LC_EXIT_TIMER
-	l	%r15,__LC_EXIT_TIMER+4
-	SUB64	%r10,%r15,__LC_SYNC_ENTER_TIMER
-	ADD64	%r10,%r15,__LC_USER_TIMER
-	st	%r10,__LC_USER_TIMER
-	st	%r15,__LC_USER_TIMER+4
-0:	# check if the system time calculation has been done
-	cl	%r9,BASED(.Lcleanup_system_call_insn+12)
+	lg	%r15,__LC_EXIT_TIMER
+	slg	%r15,__LC_SYNC_ENTER_TIMER
+	alg	%r15,__LC_USER_TIMER
+	stg	%r15,__LC_USER_TIMER
+0:	# check if the system time update has been done
+	clg	%r9,BASED(.Lcleanup_system_call_insn+32)
 	jh	0f
-	l	%r10,__LC_LAST_UPDATE_TIMER
-	l	%r15,__LC_LAST_UPDATE_TIMER+4
-	SUB64	%r10,%r15,__LC_EXIT_TIMER
-	ADD64	%r10,%r15,__LC_SYSTEM_TIMER
-	st	%r10,__LC_SYSTEM_TIMER
-	st	%r15,__LC_SYSTEM_TIMER+4
+	lg	%r15,__LC_LAST_UPDATE_TIMER
+	slg	%r15,__LC_EXIT_TIMER
+	alg	%r15,__LC_SYSTEM_TIMER
+	stg	%r15,__LC_SYSTEM_TIMER
 0:	# update accounting time stamp
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
-	# set up saved register 11
-	l	%r15,__LC_KERNEL_STACK
+	# do LAST_BREAK
+	lg	%r9,16(%r11)
+	srag	%r9,%r9,23
+	jz	0f
+	mvc	__TI_last_break(8,%r12),16(%r11)
+0:	# set up saved register r11
+	lg	%r15,__LC_KERNEL_STACK
 	la	%r9,STACK_FRAME_OVERHEAD(%r15)
-	st	%r9,12(%r11)		# r11 pt_regs pointer
+	stg	%r9,24(%r11)		# r11 pt_regs pointer
 	# fill pt_regs
-	mvc	__PT_R8(32,%r9),__LC_SAVE_AREA_SYNC
-	stm	%r0,%r7,__PT_R0(%r9)
-	mvc	__PT_PSW(8,%r9),__LC_SVC_OLD_PSW
+	mvc	__PT_R8(64,%r9),__LC_SAVE_AREA_SYNC
+	stmg	%r0,%r7,__PT_R0(%r9)
+	mvc	__PT_PSW(16,%r9),__LC_SVC_OLD_PSW
 	mvc	__PT_INT_CODE(4,%r9),__LC_SVC_ILC
-	xc	__PT_FLAGS(4,%r9),__PT_FLAGS(%r9)
-	mvi	__PT_FLAGS+3(%r9),_PIF_SYSCALL
-	# setup saved register 15
-	st	%r15,28(%r11)		# r15 stack pointer
+	xc	__PT_FLAGS(8,%r9),__PT_FLAGS(%r9)
+	mvi	__PT_FLAGS+7(%r9),_PIF_SYSCALL
+	# setup saved register r15
+	stg	%r15,56(%r11)		# r15 stack pointer
 	# set new psw address and exit
-	l	%r9,BASED(.Lcleanup_table+4)	# .Lsysc_do_svc + 0x80000000
+	larl	%r9,.Lsysc_do_svc
 	br	%r14
 .Lcleanup_system_call_insn:
-	.long	system_call + 0x80000000
-	.long	.Lsysc_stm + 0x80000000
-	.long	.Lsysc_vtime + 0x80000000 + 36
-	.long	.Lsysc_vtime + 0x80000000 + 76
+	.quad	system_call
+	.quad	.Lsysc_stmg
+	.quad	.Lsysc_per
+	.quad	.Lsysc_vtime+18
+	.quad	.Lsysc_vtime+42
 
 .Lcleanup_sysc_tif:
-	l	%r9,BASED(.Lcleanup_table+8)	# .Lsysc_tif + 0x80000000
+	larl	%r9,.Lsysc_tif
 	br	%r14
 
 .Lcleanup_sysc_restore:
-	cl	%r9,BASED(.Lcleanup_sysc_restore_insn)
-	jhe	0f
-	l	%r9,12(%r11)		# get saved pointer to pt_regs
-	mvc	__LC_RETURN_PSW(8),__PT_PSW(%r9)
-	mvc	0(32,%r11),__PT_R8(%r9)
-	lm	%r0,%r7,__PT_R0(%r9)
-0:	lm	%r8,%r9,__LC_RETURN_PSW
+	clg	%r9,BASED(.Lcleanup_sysc_restore_insn)
+	je	0f
+	lg	%r9,24(%r11)		# get saved pointer to pt_regs
+	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r9)
+	mvc	0(64,%r11),__PT_R8(%r9)
+	lmg	%r0,%r7,__PT_R0(%r9)
+0:	lmg	%r8,%r9,__LC_RETURN_PSW
 	br	%r14
 .Lcleanup_sysc_restore_insn:
-	.long	.Lsysc_done - 4 + 0x80000000
+	.quad	.Lsysc_done - 4
 
 .Lcleanup_io_tif:
-	l	%r9,BASED(.Lcleanup_table+20)	# .Lio_tif + 0x80000000
+	larl	%r9,.Lio_tif
 	br	%r14
 
 .Lcleanup_io_restore:
-	cl	%r9,BASED(.Lcleanup_io_restore_insn)
-	jhe	0f
-	l	%r9,12(%r11)		# get saved r11 pointer to pt_regs
-	mvc	__LC_RETURN_PSW(8),__PT_PSW(%r9)
-	mvc	0(32,%r11),__PT_R8(%r9)
-	lm	%r0,%r7,__PT_R0(%r9)
-0:	lm	%r8,%r9,__LC_RETURN_PSW
+	clg	%r9,BASED(.Lcleanup_io_restore_insn)
+	je	0f
+	lg	%r9,24(%r11)		# get saved r11 pointer to pt_regs
+	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r9)
+	mvc	0(64,%r11),__PT_R8(%r9)
+	lmg	%r0,%r7,__PT_R0(%r9)
+0:	lmg	%r8,%r9,__LC_RETURN_PSW
 	br	%r14
 .Lcleanup_io_restore_insn:
-	.long	.Lio_done - 4 + 0x80000000
+	.quad	.Lio_done - 4
 
 .Lcleanup_idle:
 	# copy interrupt clock & cpu timer
 	mvc	__CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK
 	mvc	__TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER
-	chi	%r11,__LC_SAVE_AREA_ASYNC
+	cghi	%r11,__LC_SAVE_AREA_ASYNC
 	je	0f
 	mvc	__CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK
 	mvc	__TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER
-0:	# check if stck has been executed
-	cl	%r9,BASED(.Lcleanup_idle_insn)
+0:	# check if stck & stpt have been executed
+	clg	%r9,BASED(.Lcleanup_idle_insn)
 	jhe	1f
 	mvc	__CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2)
-	mvc	__TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r3)
+	mvc	__TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2)
 1:	# account system time going idle
-	lm	%r9,%r10,__LC_STEAL_TIMER
-	ADD64	%r9,%r10,__CLOCK_IDLE_ENTER(%r2)
-	SUB64	%r9,%r10,__LC_LAST_UPDATE_CLOCK
-	stm	%r9,%r10,__LC_STEAL_TIMER
+	lg	%r9,__LC_STEAL_TIMER
+	alg	%r9,__CLOCK_IDLE_ENTER(%r2)
+	slg	%r9,__LC_LAST_UPDATE_CLOCK
+	stg	%r9,__LC_STEAL_TIMER
 	mvc	__LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2)
-	lm	%r9,%r10,__LC_SYSTEM_TIMER
-	ADD64	%r9,%r10,__LC_LAST_UPDATE_TIMER
-	SUB64	%r9,%r10,__TIMER_IDLE_ENTER(%r2)
-	stm	%r9,%r10,__LC_SYSTEM_TIMER
+	lg	%r9,__LC_SYSTEM_TIMER
+	alg	%r9,__LC_LAST_UPDATE_TIMER
+	slg	%r9,__TIMER_IDLE_ENTER(%r2)
+	stg	%r9,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
 	# prepare return psw
-	n	%r8,BASED(.Lcleanup_idle_wait)	# clear irq & wait state bits
-	l	%r9,24(%r11)			# return from psw_idle
+	nihh	%r8,0xfcfd		# clear irq & wait state bits
+	lg	%r9,48(%r11)		# return from psw_idle
 	br	%r14
 .Lcleanup_idle_insn:
-	.long	.Lpsw_idle_lpsw + 0x80000000
-.Lcleanup_idle_wait:
-	.long	0xfcfdffff
+	.quad	.Lpsw_idle_lpsw
 
 /*
  * Integer constants
  */
-	.align	4
-.Lnr_syscalls:
-	.long	NR_syscalls
-.Lvtimer_max:
-	.quad	0x7fffffffffffffff
+	.align	8
+.Lcritical_start:
+	.quad	.L__critical_start
+.Lcritical_length:
+	.quad	.L__critical_end - .L__critical_start
+
 
+#if IS_ENABLED(CONFIG_KVM)
 /*
- * Symbol constants
+ * sie64a calling convention:
+ * %r2 pointer to sie control block
+ * %r3 guest register save area
  */
-.Lc_do_machine_check:	.long	s390_do_machine_check
-.Lc_handle_mcck:	.long	s390_handle_mcck
-.Lc_do_IRQ:		.long	do_IRQ
-.Lc_do_signal:		.long	do_signal
-.Lc_do_notify_resume:	.long	do_notify_resume
-.Lc_do_per_trap:	.long	do_per_trap
-.Lc_jump_table:		.long	pgm_check_table
-.Lc_schedule:		.long	schedule
-#ifdef CONFIG_PREEMPT
-.Lc_preempt_irq:	.long	preempt_schedule_irq
-#endif
-.Lc_trace_enter:	.long	do_syscall_trace_enter
-.Lc_trace_exit:		.long	do_syscall_trace_exit
-.Lc_schedule_tail:	.long	schedule_tail
-.Lc_sysc_per:		.long	.Lsysc_per + 0x80000000
-#ifdef CONFIG_TRACE_IRQFLAGS
-.Lc_hardirqs_on:	.long	trace_hardirqs_on_caller
-.Lc_hardirqs_off:	.long	trace_hardirqs_off_caller
-#endif
-#ifdef CONFIG_LOCKDEP
-.Lc_lockdep_sys_exit:	.long	lockdep_sys_exit
+ENTRY(sie64a)
+	stmg	%r6,%r14,__SF_GPRS(%r15)	# save kernel registers
+	stg	%r2,__SF_EMPTY(%r15)		# save control block pointer
+	stg	%r3,__SF_EMPTY+8(%r15)		# save guest register save area
+	xc	__SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
+	lmg	%r0,%r13,0(%r3)			# load guest gprs 0-13
+	lg	%r14,__LC_GMAP			# get gmap pointer
+	ltgr	%r14,%r14
+	jz	.Lsie_gmap
+	lctlg	%c1,%c1,__GMAP_ASCE(%r14)	# load primary asce
+.Lsie_gmap:
+	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
+	oi	__SIE_PROG0C+3(%r14),1		# we are going into SIE now
+	tm	__SIE_PROG20+3(%r14),1		# last exit...
+	jnz	.Lsie_done
+	LPP	__SF_EMPTY(%r15)		# set guest id
+	sie	0(%r14)
+.Lsie_done:
+	LPP	__SF_EMPTY+16(%r15)		# set host id
+	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
+	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
+# some program checks are suppressing. C code (e.g. do_protection_exception)
+# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
+# instructions between sie64a and .Lsie_done should not cause program
+# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
+# See also HANDLE_SIE_INTERCEPT
+.Lrewind_pad:
+	nop	0
+	.globl sie_exit
+sie_exit:
+	lg	%r14,__SF_EMPTY+8(%r15)		# load guest register save area
+	stmg	%r0,%r13,0(%r14)		# save guest gprs 0-13
+	lmg	%r6,%r14,__SF_GPRS(%r15)	# restore kernel registers
+	lg	%r2,__SF_EMPTY+24(%r15)		# return exit reason code
+	br	%r14
+.Lsie_fault:
+	lghi	%r14,-EFAULT
+	stg	%r14,__SF_EMPTY+24(%r15)	# set exit reason code
+	j	sie_exit
+
+	.align	8
+.Lsie_critical:
+	.quad	.Lsie_gmap
+.Lsie_critical_length:
+	.quad	.Lsie_done - .Lsie_gmap
+
+	EX_TABLE(.Lrewind_pad,.Lsie_fault)
+	EX_TABLE(sie_exit,.Lsie_fault)
 #endif
-.Lc_critical_start:	.long	.L__critical_start + 0x80000000
-.Lc_critical_length:	.long	.L__critical_end - .L__critical_start
 
-		.section .rodata, "a"
-#define SYSCALL(esa,esame,emu)	.long esa
+	.section .rodata, "a"
+#define SYSCALL(esame,emu)	.long esame
 	.globl	sys_call_table
 sys_call_table:
 #include "syscalls.S"
 #undef SYSCALL
+
+#ifdef CONFIG_COMPAT
+
+#define SYSCALL(esame,emu)	.long emu
+	.globl	sys_call_table_emu
+sys_call_table_emu:
+#include "syscalls.S"
+#undef SYSCALL
+#endif
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
deleted file mode 100644
index c329446a951d..000000000000
--- a/arch/s390/kernel/entry64.S
+++ /dev/null
@@ -1,1059 +0,0 @@
-/*
- *    S390 low-level entry points.
- *
- *    Copyright IBM Corp. 1999, 2012
- *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- *		 Hartmut Penner (hp@de.ibm.com),
- *		 Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
- *		 Heiko Carstens <heiko.carstens@de.ibm.com>
- */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/processor.h>
-#include <asm/cache.h>
-#include <asm/errno.h>
-#include <asm/ptrace.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/page.h>
-#include <asm/sigp.h>
-#include <asm/irq.h>
-
-__PT_R0      =	__PT_GPRS
-__PT_R1      =	__PT_GPRS + 8
-__PT_R2      =	__PT_GPRS + 16
-__PT_R3      =	__PT_GPRS + 24
-__PT_R4      =	__PT_GPRS + 32
-__PT_R5      =	__PT_GPRS + 40
-__PT_R6      =	__PT_GPRS + 48
-__PT_R7      =	__PT_GPRS + 56
-__PT_R8      =	__PT_GPRS + 64
-__PT_R9      =	__PT_GPRS + 72
-__PT_R10     =	__PT_GPRS + 80
-__PT_R11     =	__PT_GPRS + 88
-__PT_R12     =	__PT_GPRS + 96
-__PT_R13     =	__PT_GPRS + 104
-__PT_R14     =	__PT_GPRS + 112
-__PT_R15     =	__PT_GPRS + 120
-
-STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
-STACK_SIZE  = 1 << STACK_SHIFT
-STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
-
-_TIF_WORK	= (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
-		   _TIF_UPROBE)
-_TIF_TRACE	= (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
-		   _TIF_SYSCALL_TRACEPOINT)
-_CIF_WORK	= (_CIF_MCCK_PENDING | _CIF_ASCE)
-_PIF_WORK	= (_PIF_PER_TRAP)
-
-#define BASED(name) name-system_call(%r13)
-
-	.macro	TRACE_IRQS_ON
-#ifdef CONFIG_TRACE_IRQFLAGS
-	basr	%r2,%r0
-	brasl	%r14,trace_hardirqs_on_caller
-#endif
-	.endm
-
-	.macro	TRACE_IRQS_OFF
-#ifdef CONFIG_TRACE_IRQFLAGS
-	basr	%r2,%r0
-	brasl	%r14,trace_hardirqs_off_caller
-#endif
-	.endm
-
-	.macro	LOCKDEP_SYS_EXIT
-#ifdef CONFIG_LOCKDEP
-	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
-	jz	.+10
-	brasl	%r14,lockdep_sys_exit
-#endif
-	.endm
-
-	.macro LPP newpp
-#if IS_ENABLED(CONFIG_KVM)
-	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
-	jz	.+8
-	.insn	s,0xb2800000,\newpp
-#endif
-	.endm
-
-	.macro	HANDLE_SIE_INTERCEPT scratch,reason
-#if IS_ENABLED(CONFIG_KVM)
-	tmhh	%r8,0x0001		# interrupting from user ?
-	jnz	.+62
-	lgr	\scratch,%r9
-	slg	\scratch,BASED(.Lsie_critical)
-	clg	\scratch,BASED(.Lsie_critical_length)
-	.if	\reason==1
-	# Some program interrupts are suppressing (e.g. protection).
-	# We must also check the instruction after SIE in that case.
-	# do_protection_exception will rewind to .Lrewind_pad
-	jh	.+42
-	.else
-	jhe	.+42
-	.endif
-	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
-	LPP	__SF_EMPTY+16(%r15)		# set host id
-	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
-	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
-	larl	%r9,sie_exit			# skip forward to sie_exit
-	mvi	__SF_EMPTY+31(%r15),\reason	# set exit reason
-#endif
-	.endm
-
-	.macro	CHECK_STACK stacksize,savearea
-#ifdef CONFIG_CHECK_STACK
-	tml	%r15,\stacksize - CONFIG_STACK_GUARD
-	lghi	%r14,\savearea
-	jz	stack_overflow
-#endif
-	.endm
-
-	.macro	SWITCH_ASYNC savearea,stack,shift
-	tmhh	%r8,0x0001		# interrupting from user ?
-	jnz	1f
-	lgr	%r14,%r9
-	slg	%r14,BASED(.Lcritical_start)
-	clg	%r14,BASED(.Lcritical_length)
-	jhe	0f
-	lghi	%r11,\savearea		# inside critical section, do cleanup
-	brasl	%r14,cleanup_critical
-	tmhh	%r8,0x0001		# retest problem state after cleanup
-	jnz	1f
-0:	lg	%r14,\stack		# are we already on the target stack?
-	slgr	%r14,%r15
-	srag	%r14,%r14,\shift
-	jnz	1f
-	CHECK_STACK 1<<\shift,\savearea
-	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	j	2f
-1:	lg	%r15,\stack		# load target stack
-2:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
-	.endm
-
-	.macro UPDATE_VTIME scratch,enter_timer
-	lg	\scratch,__LC_EXIT_TIMER
-	slg	\scratch,\enter_timer
-	alg	\scratch,__LC_USER_TIMER
-	stg	\scratch,__LC_USER_TIMER
-	lg	\scratch,__LC_LAST_UPDATE_TIMER
-	slg	\scratch,__LC_EXIT_TIMER
-	alg	\scratch,__LC_SYSTEM_TIMER
-	stg	\scratch,__LC_SYSTEM_TIMER
-	mvc	__LC_LAST_UPDATE_TIMER(8),\enter_timer
-	.endm
-
-	.macro	LAST_BREAK scratch
-	srag	\scratch,%r10,23
-	jz	.+10
-	stg	%r10,__TI_last_break(%r12)
-	.endm
-
-	.macro REENABLE_IRQS
-	stg	%r8,__LC_RETURN_PSW
-	ni	__LC_RETURN_PSW,0xbf
-	ssm	__LC_RETURN_PSW
-	.endm
-
-	.macro STCK savearea
-#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
-	.insn	s,0xb27c0000,\savearea		# store clock fast
-#else
-	.insn	s,0xb2050000,\savearea		# store clock
-#endif
-	.endm
-
-	.section .kprobes.text, "ax"
-
-/*
- * Scheduler resume function, called by switch_to
- *  gpr2 = (task_struct *) prev
- *  gpr3 = (task_struct *) next
- * Returns:
- *  gpr2 = prev
- */
-ENTRY(__switch_to)
-	stmg	%r6,%r15,__SF_GPRS(%r15)	# store gprs of prev task
-	stg	%r15,__THREAD_ksp(%r2)		# store kernel stack of prev
-	lg	%r4,__THREAD_info(%r2)		# get thread_info of prev
-	lg	%r5,__THREAD_info(%r3)		# get thread_info of next
-	lgr	%r15,%r5
-	aghi	%r15,STACK_INIT			# end of kernel stack of next
-	stg	%r3,__LC_CURRENT		# store task struct of next
-	stg	%r5,__LC_THREAD_INFO		# store thread info of next
-	stg	%r15,__LC_KERNEL_STACK		# store end of kernel stack
-	lctl	%c4,%c4,__TASK_pid(%r3)		# load pid to control reg. 4
-	mvc	__LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next
-	lg	%r15,__THREAD_ksp(%r3)		# load kernel stack of next
-	lmg	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
-	br	%r14
-
-.L__critical_start:
-/*
- * SVC interrupt handler routine. System calls are synchronous events and
- * are executed with interrupts enabled.
- */
-
-ENTRY(system_call)
-	stpt	__LC_SYNC_ENTER_TIMER
-.Lsysc_stmg:
-	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
-	lg	%r10,__LC_LAST_BREAK
-	lg	%r12,__LC_THREAD_INFO
-	lghi	%r14,_PIF_SYSCALL
-.Lsysc_per:
-	lg	%r15,__LC_KERNEL_STACK
-	la	%r11,STACK_FRAME_OVERHEAD(%r15)	# pointer to pt_regs
-.Lsysc_vtime:
-	UPDATE_VTIME %r13,__LC_SYNC_ENTER_TIMER
-	LAST_BREAK %r13
-	stmg	%r0,%r7,__PT_R0(%r11)
-	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
-	mvc	__PT_PSW(16,%r11),__LC_SVC_OLD_PSW
-	mvc	__PT_INT_CODE(4,%r11),__LC_SVC_ILC
-	stg	%r14,__PT_FLAGS(%r11)
-.Lsysc_do_svc:
-	lg	%r10,__TI_sysc_table(%r12)	# address of system call table
-	llgh	%r8,__PT_INT_CODE+2(%r11)
-	slag	%r8,%r8,2			# shift and test for svc 0
-	jnz	.Lsysc_nr_ok
-	# svc 0: system call number in %r1
-	llgfr	%r1,%r1				# clear high word in r1
-	cghi	%r1,NR_syscalls
-	jnl	.Lsysc_nr_ok
-	sth	%r1,__PT_INT_CODE+2(%r11)
-	slag	%r8,%r1,2
-.Lsysc_nr_ok:
-	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-	stg	%r2,__PT_ORIG_GPR2(%r11)
-	stg	%r7,STACK_FRAME_OVERHEAD(%r15)
-	lgf	%r9,0(%r8,%r10)			# get system call add.
-	tm	__TI_flags+7(%r12),_TIF_TRACE
-	jnz	.Lsysc_tracesys
-	basr	%r14,%r9			# call sys_xxxx
-	stg	%r2,__PT_R2(%r11)		# store return value
-
-.Lsysc_return:
-	LOCKDEP_SYS_EXIT
-.Lsysc_tif:
-	tm	__PT_PSW+1(%r11),0x01		# returning to user ?
-	jno	.Lsysc_restore
-	tm	__PT_FLAGS+7(%r11),_PIF_WORK
-	jnz	.Lsysc_work
-	tm	__TI_flags+7(%r12),_TIF_WORK
-	jnz	.Lsysc_work			# check for work
-	tm	__LC_CPU_FLAGS+7,_CIF_WORK
-	jnz	.Lsysc_work
-.Lsysc_restore:
-	lg	%r14,__LC_VDSO_PER_CPU
-	lmg	%r0,%r10,__PT_R0(%r11)
-	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r11)
-	stpt	__LC_EXIT_TIMER
-	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
-	lmg	%r11,%r15,__PT_R11(%r11)
-	lpswe	__LC_RETURN_PSW
-.Lsysc_done:
-
-#
-# One of the work bits is on. Find out which one.
-#
-.Lsysc_work:
-	tm	__LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
-	jo	.Lsysc_mcck_pending
-	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
-	jo	.Lsysc_reschedule
-#ifdef CONFIG_UPROBES
-	tm	__TI_flags+7(%r12),_TIF_UPROBE
-	jo	.Lsysc_uprobe_notify
-#endif
-	tm	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
-	jo	.Lsysc_singlestep
-	tm	__TI_flags+7(%r12),_TIF_SIGPENDING
-	jo	.Lsysc_sigpending
-	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
-	jo	.Lsysc_notify_resume
-	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
-	jo	.Lsysc_uaccess
-	j	.Lsysc_return		# beware of critical section cleanup
-
-#
-# _TIF_NEED_RESCHED is set, call schedule
-#
-.Lsysc_reschedule:
-	larl	%r14,.Lsysc_return
-	jg	schedule
-
-#
-# _CIF_MCCK_PENDING is set, call handler
-#
-.Lsysc_mcck_pending:
-	larl	%r14,.Lsysc_return
-	jg	s390_handle_mcck	# TIF bit will be cleared by handler
-
-#
-# _CIF_ASCE is set, load user space asce
-#
-.Lsysc_uaccess:
-	ni	__LC_CPU_FLAGS+7,255-_CIF_ASCE
-	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
-	j	.Lsysc_return
-
-#
-# _TIF_SIGPENDING is set, call do_signal
-#
-.Lsysc_sigpending:
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	brasl	%r14,do_signal
-	tm	__PT_FLAGS+7(%r11),_PIF_SYSCALL
-	jno	.Lsysc_return
-	lmg	%r2,%r7,__PT_R2(%r11)	# load svc arguments
-	lg	%r10,__TI_sysc_table(%r12)	# address of system call table
-	lghi	%r8,0			# svc 0 returns -ENOSYS
-	llgh	%r1,__PT_INT_CODE+2(%r11)	# load new svc number
-	cghi	%r1,NR_syscalls
-	jnl	.Lsysc_nr_ok		# invalid svc number -> do svc 0
-	slag	%r8,%r1,2
-	j	.Lsysc_nr_ok		# restart svc
-
-#
-# _TIF_NOTIFY_RESUME is set, call do_notify_resume
-#
-.Lsysc_notify_resume:
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	larl	%r14,.Lsysc_return
-	jg	do_notify_resume
-
-#
-# _TIF_UPROBE is set, call uprobe_notify_resume
-#
-#ifdef CONFIG_UPROBES
-.Lsysc_uprobe_notify:
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	larl	%r14,.Lsysc_return
-	jg	uprobe_notify_resume
-#endif
-
-#
-# _PIF_PER_TRAP is set, call do_per_trap
-#
-.Lsysc_singlestep:
-	ni	__PT_FLAGS+7(%r11),255-_PIF_PER_TRAP
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	larl	%r14,.Lsysc_return
-	jg	do_per_trap
-
-#
-# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before
-# and after the system call
-#
-.Lsysc_tracesys:
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	la	%r3,0
-	llgh	%r0,__PT_INT_CODE+2(%r11)
-	stg	%r0,__PT_R2(%r11)
-	brasl	%r14,do_syscall_trace_enter
-	lghi	%r0,NR_syscalls
-	clgr	%r0,%r2
-	jnh	.Lsysc_tracenogo
-	sllg	%r8,%r2,2
-	lgf	%r9,0(%r8,%r10)
-.Lsysc_tracego:
-	lmg	%r3,%r7,__PT_R3(%r11)
-	stg	%r7,STACK_FRAME_OVERHEAD(%r15)
-	lg	%r2,__PT_ORIG_GPR2(%r11)
-	basr	%r14,%r9		# call sys_xxx
-	stg	%r2,__PT_R2(%r11)	# store return value
-.Lsysc_tracenogo:
-	tm	__TI_flags+7(%r12),_TIF_TRACE
-	jz	.Lsysc_return
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	larl	%r14,.Lsysc_return
-	jg	do_syscall_trace_exit
-
-#
-# a new process exits the kernel with ret_from_fork
-#
-ENTRY(ret_from_fork)
-	la	%r11,STACK_FRAME_OVERHEAD(%r15)
-	lg	%r12,__LC_THREAD_INFO
-	brasl	%r14,schedule_tail
-	TRACE_IRQS_ON
-	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
-	tm	__PT_PSW+1(%r11),0x01	# forking a kernel thread ?
-	jne	.Lsysc_tracenogo
-	# it's a kernel thread
-	lmg	%r9,%r10,__PT_R9(%r11)	# load gprs
-ENTRY(kernel_thread_starter)
-	la	%r2,0(%r10)
-	basr	%r14,%r9
-	j	.Lsysc_tracenogo
-
-/*
- * Program check handler routine
- */
-
-ENTRY(pgm_check_handler)
-	stpt	__LC_SYNC_ENTER_TIMER
-	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
-	lg	%r10,__LC_LAST_BREAK
-	lg	%r12,__LC_THREAD_INFO
-	larl	%r13,system_call
-	lmg	%r8,%r9,__LC_PGM_OLD_PSW
-	HANDLE_SIE_INTERCEPT %r14,1
-	tmhh	%r8,0x0001		# test problem state bit
-	jnz	1f			# -> fault in user space
-	tmhh	%r8,0x4000		# PER bit set in old PSW ?
-	jnz	0f			# -> enabled, can't be a double fault
-	tm	__LC_PGM_ILC+3,0x80	# check for per exception
-	jnz	.Lpgm_svcper		# -> single stepped svc
-0:	CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
-	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	j	2f
-1:	UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER
-	LAST_BREAK %r14
-	lg	%r15,__LC_KERNEL_STACK
-	lg	%r14,__TI_task(%r12)
-	lghi	%r13,__LC_PGM_TDB
-	tm	__LC_PGM_ILC+2,0x02	# check for transaction abort
-	jz	2f
-	mvc	__THREAD_trap_tdb(256,%r14),0(%r13)
-2:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
-	stmg	%r0,%r7,__PT_R0(%r11)
-	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
-	stmg	%r8,%r9,__PT_PSW(%r11)
-	mvc	__PT_INT_CODE(4,%r11),__LC_PGM_ILC
-	mvc	__PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE
-	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
-	stg	%r10,__PT_ARGS(%r11)
-	tm	__LC_PGM_ILC+3,0x80	# check for per exception
-	jz	0f
-	tmhh	%r8,0x0001		# kernel per event ?
-	jz	.Lpgm_kprobe
-	oi	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
-	mvc	__THREAD_per_address(8,%r14),__LC_PER_ADDRESS
-	mvc	__THREAD_per_cause(2,%r14),__LC_PER_CODE
-	mvc	__THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
-0:	REENABLE_IRQS
-	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-	larl	%r1,pgm_check_table
-	llgh	%r10,__PT_INT_CODE+2(%r11)
-	nill	%r10,0x007f
-	sll	%r10,2
-	je	.Lsysc_return
-	lgf	%r1,0(%r10,%r1)		# load address of handler routine
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	basr	%r14,%r1		# branch to interrupt-handler
-	j	.Lsysc_return
-
-#
-# PER event in supervisor state, must be kprobes
-#
-.Lpgm_kprobe:
-	REENABLE_IRQS
-	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	brasl	%r14,do_per_trap
-	j	.Lsysc_return
-
-#
-# single stepped system call
-#
-.Lpgm_svcper:
-	mvc	__LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
-	larl	%r14,.Lsysc_per
-	stg	%r14,__LC_RETURN_PSW+8
-	lghi	%r14,_PIF_SYSCALL | _PIF_PER_TRAP
-	lpswe	__LC_RETURN_PSW		# branch to .Lsysc_per and enable irqs
-
-/*
- * IO interrupt handler routine
- */
-ENTRY(io_int_handler)
-	STCK	__LC_INT_CLOCK
-	stpt	__LC_ASYNC_ENTER_TIMER
-	stmg	%r8,%r15,__LC_SAVE_AREA_ASYNC
-	lg	%r10,__LC_LAST_BREAK
-	lg	%r12,__LC_THREAD_INFO
-	larl	%r13,system_call
-	lmg	%r8,%r9,__LC_IO_OLD_PSW
-	HANDLE_SIE_INTERCEPT %r14,2
-	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
-	tmhh	%r8,0x0001		# interrupting from user?
-	jz	.Lio_skip
-	UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
-	LAST_BREAK %r14
-.Lio_skip:
-	stmg	%r0,%r7,__PT_R0(%r11)
-	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
-	stmg	%r8,%r9,__PT_PSW(%r11)
-	mvc	__PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
-	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
-	TRACE_IRQS_OFF
-	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-.Lio_loop:
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	lghi	%r3,IO_INTERRUPT
-	tm	__PT_INT_CODE+8(%r11),0x80	# adapter interrupt ?
-	jz	.Lio_call
-	lghi	%r3,THIN_INTERRUPT
-.Lio_call:
-	brasl	%r14,do_IRQ
-	tm	__LC_MACHINE_FLAGS+6,0x10	# MACHINE_FLAG_LPAR
-	jz	.Lio_return
-	tpi	0
-	jz	.Lio_return
-	mvc	__PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
-	j	.Lio_loop
-.Lio_return:
-	LOCKDEP_SYS_EXIT
-	TRACE_IRQS_ON
-.Lio_tif:
-	tm	__TI_flags+7(%r12),_TIF_WORK
-	jnz	.Lio_work		# there is work to do (signals etc.)
-	tm	__LC_CPU_FLAGS+7,_CIF_WORK
-	jnz	.Lio_work
-.Lio_restore:
-	lg	%r14,__LC_VDSO_PER_CPU
-	lmg	%r0,%r10,__PT_R0(%r11)
-	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r11)
-	stpt	__LC_EXIT_TIMER
-	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
-	lmg	%r11,%r15,__PT_R11(%r11)
-	lpswe	__LC_RETURN_PSW
-.Lio_done:
-
-#
-# There is work todo, find out in which context we have been interrupted:
-# 1) if we return to user space we can do all _TIF_WORK work
-# 2) if we return to kernel code and kvm is enabled check if we need to
-#    modify the psw to leave SIE
-# 3) if we return to kernel code and preemptive scheduling is enabled check
-#    the preemption counter and if it is zero call preempt_schedule_irq
-# Before any work can be done, a switch to the kernel stack is required.
-#
-.Lio_work:
-	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
-	jo	.Lio_work_user		# yes -> do resched & signal
-#ifdef CONFIG_PREEMPT
-	# check for preemptive scheduling
-	icm	%r0,15,__TI_precount(%r12)
-	jnz	.Lio_restore		# preemption is disabled
-	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
-	jno	.Lio_restore
-	# switch to kernel stack
-	lg	%r1,__PT_R15(%r11)
-	aghi	%r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	mvc	STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
-	xc	__SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
-	la	%r11,STACK_FRAME_OVERHEAD(%r1)
-	lgr	%r15,%r1
-	# TRACE_IRQS_ON already done at .Lio_return, call
-	# TRACE_IRQS_OFF to keep things symmetrical
-	TRACE_IRQS_OFF
-	brasl	%r14,preempt_schedule_irq
-	j	.Lio_return
-#else
-	j	.Lio_restore
-#endif
-
-#
-# Need to do work before returning to userspace, switch to kernel stack
-#
-.Lio_work_user:
-	lg	%r1,__LC_KERNEL_STACK
-	mvc	STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
-	xc	__SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
-	la	%r11,STACK_FRAME_OVERHEAD(%r1)
-	lgr	%r15,%r1
-
-#
-# One of the work bits is on. Find out which one.
-#
-.Lio_work_tif:
-	tm	__LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
-	jo	.Lio_mcck_pending
-	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
-	jo	.Lio_reschedule
-	tm	__TI_flags+7(%r12),_TIF_SIGPENDING
-	jo	.Lio_sigpending
-	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
-	jo	.Lio_notify_resume
-	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
-	jo	.Lio_uaccess
-	j	.Lio_return		# beware of critical section cleanup
-
-#
-# _CIF_MCCK_PENDING is set, call handler
-#
-.Lio_mcck_pending:
-	# TRACE_IRQS_ON already done at .Lio_return
-	brasl	%r14,s390_handle_mcck	# TIF bit will be cleared by handler
-	TRACE_IRQS_OFF
-	j	.Lio_return
-
-#
-# _CIF_ASCE is set, load user space asce
-#
-.Lio_uaccess:
-	ni	__LC_CPU_FLAGS+7,255-_CIF_ASCE
-	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
-	j	.Lio_return
-
-#
-# _TIF_NEED_RESCHED is set, call schedule
-#
-.Lio_reschedule:
-	# TRACE_IRQS_ON already done at .Lio_return
-	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
-	brasl	%r14,schedule		# call scheduler
-	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
-	TRACE_IRQS_OFF
-	j	.Lio_return
-
-#
-# _TIF_SIGPENDING or is set, call do_signal
-#
-.Lio_sigpending:
-	# TRACE_IRQS_ON already done at .Lio_return
-	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	brasl	%r14,do_signal
-	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
-	TRACE_IRQS_OFF
-	j	.Lio_return
-
-#
-# _TIF_NOTIFY_RESUME or is set, call do_notify_resume
-#
-.Lio_notify_resume:
-	# TRACE_IRQS_ON already done at .Lio_return
-	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	brasl	%r14,do_notify_resume
-	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
-	TRACE_IRQS_OFF
-	j	.Lio_return
-
-/*
- * External interrupt handler routine
- */
-ENTRY(ext_int_handler)
-	STCK	__LC_INT_CLOCK
-	stpt	__LC_ASYNC_ENTER_TIMER
-	stmg	%r8,%r15,__LC_SAVE_AREA_ASYNC
-	lg	%r10,__LC_LAST_BREAK
-	lg	%r12,__LC_THREAD_INFO
-	larl	%r13,system_call
-	lmg	%r8,%r9,__LC_EXT_OLD_PSW
-	HANDLE_SIE_INTERCEPT %r14,3
-	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
-	tmhh	%r8,0x0001		# interrupting from user ?
-	jz	.Lext_skip
-	UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
-	LAST_BREAK %r14
-.Lext_skip:
-	stmg	%r0,%r7,__PT_R0(%r11)
-	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
-	stmg	%r8,%r9,__PT_PSW(%r11)
-	lghi	%r1,__LC_EXT_PARAMS2
-	mvc	__PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
-	mvc	__PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
-	mvc	__PT_INT_PARM_LONG(8,%r11),0(%r1)
-	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
-	TRACE_IRQS_OFF
-	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	lghi	%r3,EXT_INTERRUPT
-	brasl	%r14,do_IRQ
-	j	.Lio_return
-
-/*
- * Load idle PSW. The second "half" of this function is in .Lcleanup_idle.
- */
-ENTRY(psw_idle)
-	stg	%r3,__SF_EMPTY(%r15)
-	larl	%r1,.Lpsw_idle_lpsw+4
-	stg	%r1,__SF_EMPTY+8(%r15)
-	STCK	__CLOCK_IDLE_ENTER(%r2)
-	stpt	__TIMER_IDLE_ENTER(%r2)
-.Lpsw_idle_lpsw:
-	lpswe	__SF_EMPTY(%r15)
-	br	%r14
-.Lpsw_idle_end:
-
-.L__critical_end:
-
-/*
- * Machine check handler routines
- */
-ENTRY(mcck_int_handler)
-	STCK	__LC_MCCK_CLOCK
-	la	%r1,4095		# revalidate r1
-	spt	__LC_CPU_TIMER_SAVE_AREA-4095(%r1)	# revalidate cpu timer
-	lmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
-	lg	%r10,__LC_LAST_BREAK
-	lg	%r12,__LC_THREAD_INFO
-	larl	%r13,system_call
-	lmg	%r8,%r9,__LC_MCK_OLD_PSW
-	HANDLE_SIE_INTERCEPT %r14,4
-	tm	__LC_MCCK_CODE,0x80	# system damage?
-	jo	.Lmcck_panic		# yes -> rest of mcck code invalid
-	lghi	%r14,__LC_CPU_TIMER_SAVE_AREA
-	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
-	tm	__LC_MCCK_CODE+5,0x02	# stored cpu timer value valid?
-	jo	3f
-	la	%r14,__LC_SYNC_ENTER_TIMER
-	clc	0(8,%r14),__LC_ASYNC_ENTER_TIMER
-	jl	0f
-	la	%r14,__LC_ASYNC_ENTER_TIMER
-0:	clc	0(8,%r14),__LC_EXIT_TIMER
-	jl	1f
-	la	%r14,__LC_EXIT_TIMER
-1:	clc	0(8,%r14),__LC_LAST_UPDATE_TIMER
-	jl	2f
-	la	%r14,__LC_LAST_UPDATE_TIMER
-2:	spt	0(%r14)
-	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
-3:	tm	__LC_MCCK_CODE+2,0x09	# mwp + ia of old psw valid?
-	jno	.Lmcck_panic		# no -> skip cleanup critical
-	SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_PANIC_STACK,PAGE_SHIFT
-	tm	%r8,0x0001		# interrupting from user ?
-	jz	.Lmcck_skip
-	UPDATE_VTIME %r14,__LC_MCCK_ENTER_TIMER
-	LAST_BREAK %r14
-.Lmcck_skip:
-	lghi	%r14,__LC_GPREGS_SAVE_AREA+64
-	stmg	%r0,%r7,__PT_R0(%r11)
-	mvc	__PT_R8(64,%r11),0(%r14)
-	stmg	%r8,%r9,__PT_PSW(%r11)
-	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
-	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	brasl	%r14,s390_do_machine_check
-	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
-	jno	.Lmcck_return
-	lg	%r1,__LC_KERNEL_STACK	# switch to kernel stack
-	mvc	STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
-	xc	__SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
-	la	%r11,STACK_FRAME_OVERHEAD(%r1)
-	lgr	%r15,%r1
-	ssm	__LC_PGM_NEW_PSW	# turn dat on, keep irqs off
-	tm	__LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
-	jno	.Lmcck_return
-	TRACE_IRQS_OFF
-	brasl	%r14,s390_handle_mcck
-	TRACE_IRQS_ON
-.Lmcck_return:
-	lg	%r14,__LC_VDSO_PER_CPU
-	lmg	%r0,%r10,__PT_R0(%r11)
-	mvc	__LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
-	tm	__LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
-	jno	0f
-	stpt	__LC_EXIT_TIMER
-	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
-0:	lmg	%r11,%r15,__PT_R11(%r11)
-	lpswe	__LC_RETURN_MCCK_PSW
-
-.Lmcck_panic:
-	lg	%r14,__LC_PANIC_STACK
-	slgr	%r14,%r15
-	srag	%r14,%r14,PAGE_SHIFT
-	jz	0f
-	lg	%r15,__LC_PANIC_STACK
-0:	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	j	.Lmcck_skip
-
-#
-# PSW restart interrupt handler
-#
-ENTRY(restart_int_handler)
-	stg	%r15,__LC_SAVE_AREA_RESTART
-	lg	%r15,__LC_RESTART_STACK
-	aghi	%r15,-__PT_SIZE			# create pt_regs on stack
-	xc	0(__PT_SIZE,%r15),0(%r15)
-	stmg	%r0,%r14,__PT_R0(%r15)
-	mvc	__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
-	mvc	__PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw
-	aghi	%r15,-STACK_FRAME_OVERHEAD	# create stack frame on stack
-	xc	0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
-	lg	%r1,__LC_RESTART_FN		# load fn, parm & source cpu
-	lg	%r2,__LC_RESTART_DATA
-	lg	%r3,__LC_RESTART_SOURCE
-	ltgr	%r3,%r3				# test source cpu address
-	jm	1f				# negative -> skip source stop
-0:	sigp	%r4,%r3,SIGP_SENSE		# sigp sense to source cpu
-	brc	10,0b				# wait for status stored
-1:	basr	%r14,%r1			# call function
-	stap	__SF_EMPTY(%r15)		# store cpu address
-	llgh	%r3,__SF_EMPTY(%r15)
-2:	sigp	%r4,%r3,SIGP_STOP		# sigp stop to current cpu
-	brc	2,2b
-3:	j	3b
-
-	.section .kprobes.text, "ax"
-
-#ifdef CONFIG_CHECK_STACK
-/*
- * The synchronous or the asynchronous stack overflowed. We are dead.
- * No need to properly save the registers, we are going to panic anyway.
- * Setup a pt_regs so that show_trace can provide a good call trace.
- */
-stack_overflow:
-	lg	%r15,__LC_PANIC_STACK	# change to panic stack
-	la	%r11,STACK_FRAME_OVERHEAD(%r15)
-	stmg	%r0,%r7,__PT_R0(%r11)
-	stmg	%r8,%r9,__PT_PSW(%r11)
-	mvc	__PT_R8(64,%r11),0(%r14)
-	stg	%r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2
-	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	jg	kernel_stack_overflow
-#endif
-
-	.align	8
-.Lcleanup_table:
-	.quad	system_call
-	.quad	.Lsysc_do_svc
-	.quad	.Lsysc_tif
-	.quad	.Lsysc_restore
-	.quad	.Lsysc_done
-	.quad	.Lio_tif
-	.quad	.Lio_restore
-	.quad	.Lio_done
-	.quad	psw_idle
-	.quad	.Lpsw_idle_end
-
-cleanup_critical:
-	clg	%r9,BASED(.Lcleanup_table)	# system_call
-	jl	0f
-	clg	%r9,BASED(.Lcleanup_table+8)	# .Lsysc_do_svc
-	jl	.Lcleanup_system_call
-	clg	%r9,BASED(.Lcleanup_table+16)	# .Lsysc_tif
-	jl	0f
-	clg	%r9,BASED(.Lcleanup_table+24)	# .Lsysc_restore
-	jl	.Lcleanup_sysc_tif
-	clg	%r9,BASED(.Lcleanup_table+32)	# .Lsysc_done
-	jl	.Lcleanup_sysc_restore
-	clg	%r9,BASED(.Lcleanup_table+40)	# .Lio_tif
-	jl	0f
-	clg	%r9,BASED(.Lcleanup_table+48)	# .Lio_restore
-	jl	.Lcleanup_io_tif
-	clg	%r9,BASED(.Lcleanup_table+56)	# .Lio_done
-	jl	.Lcleanup_io_restore
-	clg	%r9,BASED(.Lcleanup_table+64)	# psw_idle
-	jl	0f
-	clg	%r9,BASED(.Lcleanup_table+72)	# .Lpsw_idle_end
-	jl	.Lcleanup_idle
-0:	br	%r14
-
-
-.Lcleanup_system_call:
-	# check if stpt has been executed
-	clg	%r9,BASED(.Lcleanup_system_call_insn)
-	jh	0f
-	mvc	__LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER
-	cghi	%r11,__LC_SAVE_AREA_ASYNC
-	je	0f
-	mvc	__LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER
-0:	# check if stmg has been executed
-	clg	%r9,BASED(.Lcleanup_system_call_insn+8)
-	jh	0f
-	mvc	__LC_SAVE_AREA_SYNC(64),0(%r11)
-0:	# check if base register setup + TIF bit load has been done
-	clg	%r9,BASED(.Lcleanup_system_call_insn+16)
-	jhe	0f
-	# set up saved registers r10 and r12
-	stg	%r10,16(%r11)		# r10 last break
-	stg	%r12,32(%r11)		# r12 thread-info pointer
-0:	# check if the user time update has been done
-	clg	%r9,BASED(.Lcleanup_system_call_insn+24)
-	jh	0f
-	lg	%r15,__LC_EXIT_TIMER
-	slg	%r15,__LC_SYNC_ENTER_TIMER
-	alg	%r15,__LC_USER_TIMER
-	stg	%r15,__LC_USER_TIMER
-0:	# check if the system time update has been done
-	clg	%r9,BASED(.Lcleanup_system_call_insn+32)
-	jh	0f
-	lg	%r15,__LC_LAST_UPDATE_TIMER
-	slg	%r15,__LC_EXIT_TIMER
-	alg	%r15,__LC_SYSTEM_TIMER
-	stg	%r15,__LC_SYSTEM_TIMER
-0:	# update accounting time stamp
-	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
-	# do LAST_BREAK
-	lg	%r9,16(%r11)
-	srag	%r9,%r9,23
-	jz	0f
-	mvc	__TI_last_break(8,%r12),16(%r11)
-0:	# set up saved register r11
-	lg	%r15,__LC_KERNEL_STACK
-	la	%r9,STACK_FRAME_OVERHEAD(%r15)
-	stg	%r9,24(%r11)		# r11 pt_regs pointer
-	# fill pt_regs
-	mvc	__PT_R8(64,%r9),__LC_SAVE_AREA_SYNC
-	stmg	%r0,%r7,__PT_R0(%r9)
-	mvc	__PT_PSW(16,%r9),__LC_SVC_OLD_PSW
-	mvc	__PT_INT_CODE(4,%r9),__LC_SVC_ILC
-	xc	__PT_FLAGS(8,%r9),__PT_FLAGS(%r9)
-	mvi	__PT_FLAGS+7(%r9),_PIF_SYSCALL
-	# setup saved register r15
-	stg	%r15,56(%r11)		# r15 stack pointer
-	# set new psw address and exit
-	larl	%r9,.Lsysc_do_svc
-	br	%r14
-.Lcleanup_system_call_insn:
-	.quad	system_call
-	.quad	.Lsysc_stmg
-	.quad	.Lsysc_per
-	.quad	.Lsysc_vtime+18
-	.quad	.Lsysc_vtime+42
-
-.Lcleanup_sysc_tif:
-	larl	%r9,.Lsysc_tif
-	br	%r14
-
-.Lcleanup_sysc_restore:
-	clg	%r9,BASED(.Lcleanup_sysc_restore_insn)
-	je	0f
-	lg	%r9,24(%r11)		# get saved pointer to pt_regs
-	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r9)
-	mvc	0(64,%r11),__PT_R8(%r9)
-	lmg	%r0,%r7,__PT_R0(%r9)
-0:	lmg	%r8,%r9,__LC_RETURN_PSW
-	br	%r14
-.Lcleanup_sysc_restore_insn:
-	.quad	.Lsysc_done - 4
-
-.Lcleanup_io_tif:
-	larl	%r9,.Lio_tif
-	br	%r14
-
-.Lcleanup_io_restore:
-	clg	%r9,BASED(.Lcleanup_io_restore_insn)
-	je	0f
-	lg	%r9,24(%r11)		# get saved r11 pointer to pt_regs
-	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r9)
-	mvc	0(64,%r11),__PT_R8(%r9)
-	lmg	%r0,%r7,__PT_R0(%r9)
-0:	lmg	%r8,%r9,__LC_RETURN_PSW
-	br	%r14
-.Lcleanup_io_restore_insn:
-	.quad	.Lio_done - 4
-
-.Lcleanup_idle:
-	# copy interrupt clock & cpu timer
-	mvc	__CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK
-	mvc	__TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER
-	cghi	%r11,__LC_SAVE_AREA_ASYNC
-	je	0f
-	mvc	__CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK
-	mvc	__TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER
-0:	# check if stck & stpt have been executed
-	clg	%r9,BASED(.Lcleanup_idle_insn)
-	jhe	1f
-	mvc	__CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2)
-	mvc	__TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2)
-1:	# account system time going idle
-	lg	%r9,__LC_STEAL_TIMER
-	alg	%r9,__CLOCK_IDLE_ENTER(%r2)
-	slg	%r9,__LC_LAST_UPDATE_CLOCK
-	stg	%r9,__LC_STEAL_TIMER
-	mvc	__LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2)
-	lg	%r9,__LC_SYSTEM_TIMER
-	alg	%r9,__LC_LAST_UPDATE_TIMER
-	slg	%r9,__TIMER_IDLE_ENTER(%r2)
-	stg	%r9,__LC_SYSTEM_TIMER
-	mvc	__LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
-	# prepare return psw
-	nihh	%r8,0xfcfd		# clear irq & wait state bits
-	lg	%r9,48(%r11)		# return from psw_idle
-	br	%r14
-.Lcleanup_idle_insn:
-	.quad	.Lpsw_idle_lpsw
-
-/*
- * Integer constants
- */
-	.align	8
-.Lcritical_start:
-	.quad	.L__critical_start
-.Lcritical_length:
-	.quad	.L__critical_end - .L__critical_start
-
-
-#if IS_ENABLED(CONFIG_KVM)
-/*
- * sie64a calling convention:
- * %r2 pointer to sie control block
- * %r3 guest register save area
- */
-ENTRY(sie64a)
-	stmg	%r6,%r14,__SF_GPRS(%r15)	# save kernel registers
-	stg	%r2,__SF_EMPTY(%r15)		# save control block pointer
-	stg	%r3,__SF_EMPTY+8(%r15)		# save guest register save area
-	xc	__SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
-	lmg	%r0,%r13,0(%r3)			# load guest gprs 0-13
-	lg	%r14,__LC_GMAP			# get gmap pointer
-	ltgr	%r14,%r14
-	jz	.Lsie_gmap
-	lctlg	%c1,%c1,__GMAP_ASCE(%r14)	# load primary asce
-.Lsie_gmap:
-	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
-	oi	__SIE_PROG0C+3(%r14),1		# we are going into SIE now
-	tm	__SIE_PROG20+3(%r14),1		# last exit...
-	jnz	.Lsie_done
-	LPP	__SF_EMPTY(%r15)		# set guest id
-	sie	0(%r14)
-.Lsie_done:
-	LPP	__SF_EMPTY+16(%r15)		# set host id
-	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
-	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
-# some program checks are suppressing. C code (e.g. do_protection_exception)
-# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
-# instructions between sie64a and .Lsie_done should not cause program
-# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
-# See also HANDLE_SIE_INTERCEPT
-.Lrewind_pad:
-	nop	0
-	.globl sie_exit
-sie_exit:
-	lg	%r14,__SF_EMPTY+8(%r15)		# load guest register save area
-	stmg	%r0,%r13,0(%r14)		# save guest gprs 0-13
-	lmg	%r6,%r14,__SF_GPRS(%r15)	# restore kernel registers
-	lg	%r2,__SF_EMPTY+24(%r15)		# return exit reason code
-	br	%r14
-.Lsie_fault:
-	lghi	%r14,-EFAULT
-	stg	%r14,__SF_EMPTY+24(%r15)	# set exit reason code
-	j	sie_exit
-
-	.align	8
-.Lsie_critical:
-	.quad	.Lsie_gmap
-.Lsie_critical_length:
-	.quad	.Lsie_done - .Lsie_gmap
-
-	EX_TABLE(.Lrewind_pad,.Lsie_fault)
-	EX_TABLE(sie_exit,.Lsie_fault)
-#endif
-
-		.section .rodata, "a"
-#define SYSCALL(esa,esame,emu)	.long esame
-	.globl	sys_call_table
-sys_call_table:
-#include "syscalls.S"
-#undef SYSCALL
-
-#ifdef CONFIG_COMPAT
-
-#define SYSCALL(esa,esame,emu)	.long emu
-	.globl	sys_call_table_emu
-sys_call_table_emu:
-#include "syscalls.S"
-#undef SYSCALL
-#endif
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 82c19899574f..e0eaf11134b4 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -57,6 +57,44 @@
 
 unsigned long ftrace_plt;
 
+static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn)
+{
+#ifdef CC_USING_HOTPATCH
+	/* brcl 0,0 */
+	insn->opc = 0xc004;
+	insn->disp = 0;
+#else
+	/* stg r14,8(r15) */
+	insn->opc = 0xe3e0;
+	insn->disp = 0xf0080024;
+#endif
+}
+
+static inline int is_kprobe_on_ftrace(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_KPROBES
+	if (insn->opc == BREAKPOINT_INSTRUCTION)
+		return 1;
+#endif
+	return 0;
+}
+
+static inline void ftrace_generate_kprobe_nop_insn(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_KPROBES
+	insn->opc = BREAKPOINT_INSTRUCTION;
+	insn->disp = KPROBE_ON_FTRACE_NOP;
+#endif
+}
+
+static inline void ftrace_generate_kprobe_call_insn(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_KPROBES
+	insn->opc = BREAKPOINT_INSTRUCTION;
+	insn->disp = KPROBE_ON_FTRACE_CALL;
+#endif
+}
+
 int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
 		       unsigned long addr)
 {
@@ -72,16 +110,9 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
 		return -EFAULT;
 	if (addr == MCOUNT_ADDR) {
 		/* Initial code replacement */
-#ifdef CC_USING_HOTPATCH
-		/* We expect to see brcl 0,0 */
-		ftrace_generate_nop_insn(&orig);
-#else
-		/* We expect to see stg r14,8(r15) */
-		orig.opc = 0xe3e0;
-		orig.disp = 0xf0080024;
-#endif
+		ftrace_generate_orig_insn(&orig);
 		ftrace_generate_nop_insn(&new);
-	} else if (old.opc == BREAKPOINT_INSTRUCTION) {
+	} else if (is_kprobe_on_ftrace(&old)) {
 		/*
 		 * If we find a breakpoint instruction, a kprobe has been
 		 * placed at the beginning of the function. We write the
@@ -89,9 +120,8 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
 		 * bytes of the original instruction so that the kprobes
 		 * handler can execute a nop, if it reaches this breakpoint.
 		 */
-		new.opc = orig.opc = BREAKPOINT_INSTRUCTION;
-		orig.disp = KPROBE_ON_FTRACE_CALL;
-		new.disp = KPROBE_ON_FTRACE_NOP;
+		ftrace_generate_kprobe_call_insn(&orig);
+		ftrace_generate_kprobe_nop_insn(&new);
 	} else {
 		/* Replace ftrace call with a nop. */
 		ftrace_generate_call_insn(&orig, rec->ip);
@@ -100,8 +130,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
 	/* Verify that the to be replaced code matches what we expect. */
 	if (memcmp(&orig, &old, sizeof(old)))
 		return -EINVAL;
-	if (probe_kernel_write((void *) rec->ip, &new, sizeof(new)))
-		return -EPERM;
+	s390_kernel_write((void *) rec->ip, &new, sizeof(new));
 	return 0;
 }
 
@@ -111,7 +140,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 
 	if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old)))
 		return -EFAULT;
-	if (old.opc == BREAKPOINT_INSTRUCTION) {
+	if (is_kprobe_on_ftrace(&old)) {
 		/*
 		 * If we find a breakpoint instruction, a kprobe has been
 		 * placed at the beginning of the function. We write the
@@ -119,9 +148,8 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 		 * bytes of the original instruction so that the kprobes
 		 * handler can execute a brasl if it reaches this breakpoint.
 		 */
-		new.opc = orig.opc = BREAKPOINT_INSTRUCTION;
-		orig.disp = KPROBE_ON_FTRACE_NOP;
-		new.disp = KPROBE_ON_FTRACE_CALL;
+		ftrace_generate_kprobe_nop_insn(&orig);
+		ftrace_generate_kprobe_call_insn(&new);
 	} else {
 		/* Replace nop with an ftrace call. */
 		ftrace_generate_nop_insn(&orig);
@@ -130,8 +158,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 	/* Verify that the to be replaced code matches what we expect. */
 	if (memcmp(&orig, &old, sizeof(old)))
 		return -EINVAL;
-	if (probe_kernel_write((void *) rec->ip, &new, sizeof(new)))
-		return -EPERM;
+	s390_kernel_write((void *) rec->ip, &new, sizeof(new));
 	return 0;
 }
 
@@ -202,14 +229,16 @@ int ftrace_enable_ftrace_graph_caller(void)
 {
 	u8 op = 0x04; /* set mask field to zero */
 
-	return probe_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op));
+	s390_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op));
+	return 0;
 }
 
 int ftrace_disable_ftrace_graph_caller(void)
 {
 	u8 op = 0xf4; /* set mask field to all ones */
 
-	return probe_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op));
+	s390_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op));
+	return 0;
 }
 
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 132f4c9ade60..59b7c6470567 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -27,11 +27,7 @@
 #include <asm/thread_info.h>
 #include <asm/page.h>
 
-#ifdef CONFIG_64BIT
 #define ARCH_OFFSET	4
-#else
-#define ARCH_OFFSET	0
-#endif
 
 __HEAD
 
@@ -67,7 +63,6 @@ __HEAD
 # subroutine to set architecture mode
 #
 .Lsetmode:
-#ifdef CONFIG_64BIT
 	mvi	__LC_AR_MODE_ID,1	# set esame flag
 	slr	%r0,%r0 		# set cpuid to zero
 	lhi	%r1,2			# mode 2 = esame (dump)
@@ -76,16 +71,12 @@ __HEAD
 	.fill	16,4,0x0
 0:	lmh	%r0,%r15,0(%r13)	# clear high-order half of gprs
 	sam31				# switch to 31 bit addressing mode
-#else
-	mvi	__LC_AR_MODE_ID,0	# set ESA flag (mode 0)
-#endif
 	br	%r14
 
 #
 # subroutine to wait for end I/O
 #
 .Lirqwait:
-#ifdef CONFIG_64BIT
 	mvc	0x1f0(16),.Lnewpsw	# set up IO interrupt psw
 	lpsw	.Lwaitpsw
 .Lioint:
@@ -93,15 +84,6 @@ __HEAD
 	.align	8
 .Lnewpsw:
 	.quad	0x0000000080000000,.Lioint
-#else
-	mvc	0x78(8),.Lnewpsw	# set up IO interrupt psw
-	lpsw	.Lwaitpsw
-.Lioint:
-	br	%r14
-	.align	8
-.Lnewpsw:
-	.long	0x00080000,0x80000000+.Lioint
-#endif
 .Lwaitpsw:
 	.long	0x020a0000,0x80000000+.Lioint
 
@@ -375,7 +357,6 @@ ENTRY(startup)
 ENTRY(startup_kdump)
 	j	.Lep_startup_kdump
 .Lep_startup_normal:
-#ifdef CONFIG_64BIT
 	mvi	__LC_AR_MODE_ID,1	# set esame flag
 	slr	%r0,%r0 		# set cpuid to zero
 	lhi	%r1,2			# mode 2 = esame (dump)
@@ -384,9 +365,6 @@ ENTRY(startup_kdump)
 	.fill	16,4,0x0
 0:	lmh	%r0,%r15,0(%r13)	# clear high-order half of gprs
 	sam31				# switch to 31 bit addressing mode
-#else
-	mvi	__LC_AR_MODE_ID,0	# set ESA flag (mode 0)
-#endif
 	basr	%r13,0			# get base
 .LPG0:
 	xc	0x200(256),0x200	# partially clear lowcore
@@ -396,7 +374,6 @@ ENTRY(startup_kdump)
 	spt	6f-.LPG0(%r13)
 	mvc	__LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
 	xc	__LC_STFL_FAC_LIST(8),__LC_STFL_FAC_LIST
-#ifndef CONFIG_MARCH_G5
 	# check capabilities against MARCH_{G5,Z900,Z990,Z9_109,Z10}
 	.insn	s,0xb2b10000,0		# store facilities @ __LC_STFL_FAC_LIST
 	tm	__LC_STFL_FAC_LIST,0x01	# stfle available ?
@@ -435,7 +412,6 @@ ENTRY(startup_kdump)
 # the kernel will crash. Format is number of facility words with bits set,
 # followed by the facility words.
 
-#if defined(CONFIG_64BIT)
 #if defined(CONFIG_MARCH_Z13)
 	.long 3, 0xc100eff2, 0xf46ce800, 0x00400000
 #elif defined(CONFIG_MARCH_ZEC12)
@@ -451,35 +427,10 @@ ENTRY(startup_kdump)
 #elif defined(CONFIG_MARCH_Z900)
 	.long 1, 0xc0000000
 #endif
-#else
-#if defined(CONFIG_MARCH_ZEC12)
-	.long 1, 0x8100c880
-#elif defined(CONFIG_MARCH_Z196)
-	.long 1, 0x8100c880
-#elif defined(CONFIG_MARCH_Z10)
-	.long 1, 0x8100c880
-#elif defined(CONFIG_MARCH_Z9_109)
-	.long 1, 0x8100c880
-#elif defined(CONFIG_MARCH_Z990)
-	.long 1, 0x80002000
-#elif defined(CONFIG_MARCH_Z900)
-	.long 1, 0x80000000
-#endif
-#endif
 4:
-#endif
-
-#ifdef CONFIG_64BIT
 	/* Continue with 64bit startup code in head64.S */
 	sam64				# switch to 64 bit mode
 	jg	startup_continue
-#else
-	/* Continue with 31bit startup code in head31.S */
-	l	%r13,5f-.LPG0(%r13)
-	b	0(%r13)
-	.align	8
-5:	.long	startup_continue
-#endif
 
 	.align	8
 6:	.long	0x7fffffff,0xffffffff
diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S
deleted file mode 100644
index 6dbe80983a24..000000000000
--- a/arch/s390/kernel/head31.S
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright IBM Corp. 2005, 2010
- *
- *   Author(s):	Hartmut Penner <hp@de.ibm.com>
- *		Martin Schwidefsky <schwidefsky@de.ibm.com>
- *		Rob van der Heij <rvdhei@iae.nl>
- *		Heiko Carstens <heiko.carstens@de.ibm.com>
- *
- */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/page.h>
-
-__HEAD
-ENTRY(startup_continue)
-	basr	%r13,0			# get base
-.LPG1:
-
-	l	%r1,.Lbase_cc-.LPG1(%r13)
-	mvc	0(8,%r1),__LC_LAST_UPDATE_CLOCK
-	lctl	%c0,%c15,.Lctl-.LPG1(%r13) # load control registers
-	l	%r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
-					# move IPL device to lowcore
-#
-# Setup stack
-#
-	l	%r15,.Linittu-.LPG1(%r13)
-	st	%r15,__LC_THREAD_INFO	# cache thread info in lowcore
-	mvc	__LC_CURRENT(4),__TI_task(%r15)
-	ahi	%r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE
-	st	%r15,__LC_KERNEL_STACK	# set end of kernel stack
-	ahi	%r15,-96
-#
-# Save ipl parameters, clear bss memory, initialize storage key for kernel pages,
-# and create a kernel NSS if the SAVESYS= parm is defined
-#
-	l	%r14,.Lstartup_init-.LPG1(%r13)
-	basr	%r14,%r14
-	lpsw  .Lentry-.LPG1(13)		# jump to _stext in primary-space,
-					# virtual and never return ...
-	.align	8
-.Lentry:.long	0x00080000,0x80000000 + _stext
-.Lctl:	.long	0x04b50000		# cr0: various things
-	.long	0			# cr1: primary space segment table
-	.long	.Lduct			# cr2: dispatchable unit control table
-	.long	0			# cr3: instruction authorization
-	.long	0			# cr4: instruction authorization
-	.long	.Lduct			# cr5: primary-aste origin
-	.long	0			# cr6:	I/O interrupts
-	.long	0			# cr7:	secondary space segment table
-	.long	0			# cr8:	access registers translation
-	.long	0			# cr9:	tracing off
-	.long	0			# cr10: tracing off
-	.long	0			# cr11: tracing off
-	.long	0			# cr12: tracing off
-	.long	0			# cr13: home space segment table
-	.long	0xc0000000		# cr14: machine check handling off
-	.long	0			# cr15: linkage stack operations
-.Lbss_bgn:  .long __bss_start
-.Lbss_end:  .long _end
-.Lparmaddr: .long PARMAREA
-.Linittu:   .long init_thread_union
-.Lstartup_init:
-	    .long startup_init
-	.align	64
-.Lduct:	.long	0,0,0,0,.Lduald,0,0,0
-	.long	0,0,0,0,0,0,0,0
-	.align	128
-.Lduald:.rept	8
-	.long	0x80000000,0,0,0	# invalid access-list entries
-	.endr
-.Lbase_cc:
-	.long	sched_clock_base_cc
-
-ENTRY(_ehead)
-
-	.org	0x100000 - 0x11000	# head.o ends at 0x11000
-#
-# startup-code, running in absolute addressing mode
-#
-ENTRY(_stext)
-	basr	%r13,0			# get base
-.LPG3:
-# check control registers
-	stctl	%c0,%c15,0(%r15)
-	oi	2(%r15),0x60		# enable sigp emergency & external call
-	oi	0(%r15),0x10		# switch on low address protection
-	lctl	%c0,%c15,0(%r15)
-
-#
-	lam	0,15,.Laregs-.LPG3(%r13) # load access regs needed by uaccess
-	l	%r14,.Lstart-.LPG3(%r13)
-	basr	%r14,%r14		# call start_kernel
-#
-# We returned from start_kernel ?!? PANIK
-#
-	basr	%r13,0
-	lpsw	.Ldw-.(%r13)		# load disabled wait psw
-#
-	.align	8
-.Ldw:	.long	0x000a0000,0x00000000
-.Lstart:.long	start_kernel
-.Laregs:.long	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/arch/s390/kernel/head_kdump.S b/arch/s390/kernel/head_kdump.S
index 085a95eb315f..d05950f02c34 100644
--- a/arch/s390/kernel/head_kdump.S
+++ b/arch/s390/kernel/head_kdump.S
@@ -92,17 +92,9 @@ startup_kdump_relocated:
 #else
 .align 2
 .Lep_startup_kdump:
-#ifdef CONFIG_64BIT
 	larl	%r13,startup_kdump_crash
 	lpswe	0(%r13)
 .align 8
 startup_kdump_crash:
 	.quad	0x0002000080000000,0x0000000000000000 + startup_kdump_crash
-#else
-	basr	%r13,0
-0:	lpsw	startup_kdump_crash-0b(%r13)
-.align 8
-startup_kdump_crash:
-	.long	0x000a0000,0x00000000 + startup_kdump_crash
-#endif /* CONFIG_64BIT */
 #endif /* CONFIG_CRASH_DUMP */
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 5c8651f36509..52fbef91d1d9 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -182,24 +182,21 @@ EXPORT_SYMBOL_GPL(diag308);
 
 /* SYSFS */
 
-#define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value)		\
+#define IPL_ATTR_SHOW_FN(_prefix, _name, _format, args...)		\
 static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj,	\
 		struct kobj_attribute *attr,				\
 		char *page)						\
 {									\
-	return sprintf(page, _format, _value);				\
-}									\
+	return snprintf(page, PAGE_SIZE, _format, ##args);		\
+}
+
+#define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value)		\
+IPL_ATTR_SHOW_FN(_prefix, _name, _format, _value)			\
 static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
-	__ATTR(_name, S_IRUGO, sys_##_prefix##_##_name##_show, NULL);
+	__ATTR(_name, S_IRUGO, sys_##_prefix##_##_name##_show, NULL)
 
 #define DEFINE_IPL_ATTR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)	\
-static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj,	\
-		struct kobj_attribute *attr,				\
-		char *page)						\
-{									\
-	return sprintf(page, _fmt_out,					\
-			(unsigned long long) _value);			\
-}									\
+IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, (unsigned long long) _value)	\
 static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,	\
 		struct kobj_attribute *attr,				\
 		const char *buf, size_t len)				\
@@ -213,15 +210,10 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,	\
 static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
 	__ATTR(_name,(S_IRUGO | S_IWUSR),				\
 			sys_##_prefix##_##_name##_show,			\
-			sys_##_prefix##_##_name##_store);
+			sys_##_prefix##_##_name##_store)
 
 #define DEFINE_IPL_ATTR_STR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)\
-static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj,	\
-		struct kobj_attribute *attr,				\
-		char *page)						\
-{									\
-	return sprintf(page, _fmt_out, _value);				\
-}									\
+IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, _value)			\
 static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,	\
 		struct kobj_attribute *attr,				\
 		const char *buf, size_t len)				\
@@ -233,7 +225,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,	\
 static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
 	__ATTR(_name,(S_IRUGO | S_IWUSR),				\
 			sys_##_prefix##_##_name##_show,			\
-			sys_##_prefix##_##_name##_store);
+			sys_##_prefix##_##_name##_store)
 
 static void make_attrs_ro(struct attribute **attrs)
 {
@@ -415,15 +407,9 @@ static ssize_t ipl_parameter_read(struct file *filp, struct kobject *kobj,
 	return memory_read_from_buffer(buf, count, &off, IPL_PARMBLOCK_START,
 					IPL_PARMBLOCK_SIZE);
 }
-
-static struct bin_attribute ipl_parameter_attr = {
-	.attr = {
-		.name = "binary_parameter",
-		.mode = S_IRUGO,
-	},
-	.size = PAGE_SIZE,
-	.read = &ipl_parameter_read,
-};
+static struct bin_attribute ipl_parameter_attr =
+	__BIN_ATTR(binary_parameter, S_IRUGO, ipl_parameter_read, NULL,
+		   PAGE_SIZE);
 
 static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj,
 				 struct bin_attribute *attr, char *buf,
@@ -434,14 +420,13 @@ static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj,
 
 	return memory_read_from_buffer(buf, count, &off, scp_data, size);
 }
+static struct bin_attribute ipl_scp_data_attr =
+	__BIN_ATTR(scp_data, S_IRUGO, ipl_scp_data_read, NULL, PAGE_SIZE);
 
-static struct bin_attribute ipl_scp_data_attr = {
-	.attr = {
-		.name = "scp_data",
-		.mode = S_IRUGO,
-	},
-	.size = PAGE_SIZE,
-	.read = ipl_scp_data_read,
+static struct bin_attribute *ipl_fcp_bin_attrs[] = {
+	&ipl_parameter_attr,
+	&ipl_scp_data_attr,
+	NULL,
 };
 
 /* FCP ipl device attributes */
@@ -484,6 +469,7 @@ static struct attribute *ipl_fcp_attrs[] = {
 
 static struct attribute_group ipl_fcp_attr_group = {
 	.attrs = ipl_fcp_attrs,
+	.bin_attrs = ipl_fcp_bin_attrs,
 };
 
 /* CCW ipl device attributes */
@@ -540,28 +526,6 @@ static struct attribute_group ipl_unknown_attr_group = {
 
 static struct kset *ipl_kset;
 
-static int __init ipl_register_fcp_files(void)
-{
-	int rc;
-
-	rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group);
-	if (rc)
-		goto out;
-	rc = sysfs_create_bin_file(&ipl_kset->kobj, &ipl_parameter_attr);
-	if (rc)
-		goto out_ipl_parm;
-	rc = sysfs_create_bin_file(&ipl_kset->kobj, &ipl_scp_data_attr);
-	if (!rc)
-		goto out;
-
-	sysfs_remove_bin_file(&ipl_kset->kobj, &ipl_parameter_attr);
-
-out_ipl_parm:
-	sysfs_remove_group(&ipl_kset->kobj, &ipl_fcp_attr_group);
-out:
-	return rc;
-}
-
 static void __ipl_run(void *unused)
 {
 	diag308(DIAG308_IPL, NULL);
@@ -596,7 +560,7 @@ static int __init ipl_init(void)
 		break;
 	case IPL_TYPE_FCP:
 	case IPL_TYPE_FCP_DUMP:
-		rc = ipl_register_fcp_files();
+		rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group);
 		break;
 	case IPL_TYPE_NSS:
 		rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nss_attr_group);
@@ -744,15 +708,13 @@ static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj,
 
 	return count;
 }
+static struct bin_attribute sys_reipl_fcp_scp_data_attr =
+	__BIN_ATTR(scp_data, (S_IRUGO | S_IWUSR), reipl_fcp_scpdata_read,
+		   reipl_fcp_scpdata_write, PAGE_SIZE);
 
-static struct bin_attribute sys_reipl_fcp_scp_data_attr = {
-	.attr = {
-		.name = "scp_data",
-		.mode = S_IRUGO | S_IWUSR,
-	},
-	.size = PAGE_SIZE,
-	.read = reipl_fcp_scpdata_read,
-	.write = reipl_fcp_scpdata_write,
+static struct bin_attribute *reipl_fcp_bin_attrs[] = {
+	&sys_reipl_fcp_scp_data_attr,
+	NULL,
 };
 
 DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n",
@@ -841,6 +803,7 @@ static struct attribute *reipl_fcp_attrs[] = {
 
 static struct attribute_group reipl_fcp_attr_group = {
 	.attrs = reipl_fcp_attrs,
+	.bin_attrs = reipl_fcp_bin_attrs,
 };
 
 /* CCW reipl device attributes */
@@ -1261,15 +1224,6 @@ static int __init reipl_fcp_init(void)
 		return rc;
 	}
 
-	rc = sysfs_create_bin_file(&reipl_fcp_kset->kobj,
-				   &sys_reipl_fcp_scp_data_attr);
-	if (rc) {
-		sysfs_remove_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
-		kset_unregister(reipl_fcp_kset);
-		free_page((unsigned long) reipl_block_fcp);
-		return rc;
-	}
-
 	if (ipl_info.type == IPL_TYPE_FCP) {
 		memcpy(reipl_block_fcp, IPL_PARMBLOCK_START, PAGE_SIZE);
 		/*
@@ -1713,9 +1667,7 @@ static ssize_t on_reboot_store(struct kobject *kobj,
 {
 	return set_trigger(buf, &on_reboot_trigger, len);
 }
-
-static struct kobj_attribute on_reboot_attr =
-	__ATTR(on_reboot, 0644, on_reboot_show, on_reboot_store);
+static struct kobj_attribute on_reboot_attr = __ATTR_RW(on_reboot);
 
 static void do_machine_restart(char *__unused)
 {
@@ -1741,9 +1693,7 @@ static ssize_t on_panic_store(struct kobject *kobj,
 {
 	return set_trigger(buf, &on_panic_trigger, len);
 }
-
-static struct kobj_attribute on_panic_attr =
-	__ATTR(on_panic, 0644, on_panic_show, on_panic_store);
+static struct kobj_attribute on_panic_attr = __ATTR_RW(on_panic);
 
 static void do_panic(void)
 {
@@ -1769,9 +1719,7 @@ static ssize_t on_restart_store(struct kobject *kobj,
 {
 	return set_trigger(buf, &on_restart_trigger, len);
 }
-
-static struct kobj_attribute on_restart_attr =
-	__ATTR(on_restart, 0644, on_restart_show, on_restart_store);
+static struct kobj_attribute on_restart_attr = __ATTR_RW(on_restart);
 
 static void __do_restart(void *ignore)
 {
@@ -1808,10 +1756,7 @@ static ssize_t on_halt_store(struct kobject *kobj,
 {
 	return set_trigger(buf, &on_halt_trigger, len);
 }
-
-static struct kobj_attribute on_halt_attr =
-	__ATTR(on_halt, 0644, on_halt_show, on_halt_store);
-
+static struct kobj_attribute on_halt_attr = __ATTR_RW(on_halt);
 
 static void do_machine_halt(void)
 {
@@ -1837,10 +1782,7 @@ static ssize_t on_poff_store(struct kobject *kobj,
 {
 	return set_trigger(buf, &on_poff_trigger, len);
 }
-
-static struct kobj_attribute on_poff_attr =
-	__ATTR(on_poff, 0644, on_poff_show, on_poff_store);
-
+static struct kobj_attribute on_poff_attr = __ATTR_RW(on_poff);
 
 static void do_machine_power_off(void)
 {
@@ -1850,26 +1792,27 @@ static void do_machine_power_off(void)
 }
 void (*_machine_power_off)(void) = do_machine_power_off;
 
+static struct attribute *shutdown_action_attrs[] = {
+	&on_restart_attr.attr,
+	&on_reboot_attr.attr,
+	&on_panic_attr.attr,
+	&on_halt_attr.attr,
+	&on_poff_attr.attr,
+	NULL,
+};
+
+static struct attribute_group shutdown_action_attr_group = {
+	.attrs = shutdown_action_attrs,
+};
+
 static void __init shutdown_triggers_init(void)
 {
 	shutdown_actions_kset = kset_create_and_add("shutdown_actions", NULL,
 						    firmware_kobj);
 	if (!shutdown_actions_kset)
 		goto fail;
-	if (sysfs_create_file(&shutdown_actions_kset->kobj,
-			      &on_reboot_attr.attr))
-		goto fail;
-	if (sysfs_create_file(&shutdown_actions_kset->kobj,
-			      &on_panic_attr.attr))
-		goto fail;
-	if (sysfs_create_file(&shutdown_actions_kset->kobj,
-			      &on_halt_attr.attr))
-		goto fail;
-	if (sysfs_create_file(&shutdown_actions_kset->kobj,
-			      &on_poff_attr.attr))
-		goto fail;
-	if (sysfs_create_file(&shutdown_actions_kset->kobj,
-			      &on_restart_attr.attr))
+	if (sysfs_create_group(&shutdown_actions_kset->kobj,
+			       &shutdown_action_attr_group))
 		goto fail;
 	return;
 fail:
@@ -2062,12 +2005,10 @@ static void do_reset_calls(void)
 {
 	struct reset_call *reset;
 
-#ifdef CONFIG_64BIT
 	if (diag308_set_works) {
 		diag308_reset();
 		return;
 	}
-#endif
 	list_for_each_entry(reset, &rcall, list)
 		reset->fn();
 }
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index f238720690f3..e9d9addfaa44 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -56,7 +56,7 @@ static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = {
  * /proc/interrupts.
  * In addition this list contains non external / I/O events like NMIs.
  */
-static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = {
+static const struct irq_class irqclass_sub_desc[] = {
 	{.irq = IRQEXT_CLK, .name = "CLK", .desc = "[EXT] Clock Comparator"},
 	{.irq = IRQEXT_EXC, .name = "EXC", .desc = "[EXT] External Call"},
 	{.irq = IRQEXT_EMS, .name = "EMS", .desc = "[EXT] Emergency Signal"},
@@ -79,7 +79,6 @@ static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = {
 	{.irq = IRQIO_TAP,  .name = "TAP", .desc = "[I/O] Tape"},
 	{.irq = IRQIO_VMR,  .name = "VMR", .desc = "[I/O] Unit Record Devices"},
 	{.irq = IRQIO_LCS,  .name = "LCS", .desc = "[I/O] LCS"},
-	{.irq = IRQIO_CLW,  .name = "CLW", .desc = "[I/O] CLAW"},
 	{.irq = IRQIO_CTC,  .name = "CTC", .desc = "[I/O] CTC"},
 	{.irq = IRQIO_APB,  .name = "APB", .desc = "[I/O] AP Bus"},
 	{.irq = IRQIO_ADM,  .name = "ADM", .desc = "[I/O] EADM Subchannel"},
@@ -94,6 +93,7 @@ static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = {
 
 void __init init_IRQ(void)
 {
+	BUILD_BUG_ON(ARRAY_SIZE(irqclass_sub_desc) != NR_ARCH_IRQS);
 	init_cio_interrupts();
 	init_airq_interrupts();
 	init_ext_interrupts();
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
index cb2d51e779df..a90299600483 100644
--- a/arch/s390/kernel/jump_label.c
+++ b/arch/s390/kernel/jump_label.c
@@ -36,16 +36,20 @@ static void jump_label_make_branch(struct jump_entry *entry, struct insn *insn)
 	insn->offset = (entry->target - entry->code) >> 1;
 }
 
-static void jump_label_bug(struct jump_entry *entry, struct insn *insn)
+static void jump_label_bug(struct jump_entry *entry, struct insn *expected,
+			   struct insn *new)
 {
 	unsigned char *ipc = (unsigned char *)entry->code;
-	unsigned char *ipe = (unsigned char *)insn;
+	unsigned char *ipe = (unsigned char *)expected;
+	unsigned char *ipn = (unsigned char *)new;
 
 	pr_emerg("Jump label code mismatch at %pS [%p]\n", ipc, ipc);
 	pr_emerg("Found:    %02x %02x %02x %02x %02x %02x\n",
 		 ipc[0], ipc[1], ipc[2], ipc[3], ipc[4], ipc[5]);
 	pr_emerg("Expected: %02x %02x %02x %02x %02x %02x\n",
 		 ipe[0], ipe[1], ipe[2], ipe[3], ipe[4], ipe[5]);
+	pr_emerg("New:      %02x %02x %02x %02x %02x %02x\n",
+		 ipn[0], ipn[1], ipn[2], ipn[3], ipn[4], ipn[5]);
 	panic("Corrupted kernel text");
 }
 
@@ -69,12 +73,12 @@ static void __jump_label_transform(struct jump_entry *entry,
 	}
 	if (init) {
 		if (memcmp((void *)entry->code, &orignop, sizeof(orignop)))
-			jump_label_bug(entry, &old);
+			jump_label_bug(entry, &orignop, &new);
 	} else {
 		if (memcmp((void *)entry->code, &old, sizeof(old)))
-			jump_label_bug(entry, &old);
+			jump_label_bug(entry, &old, &new);
 	}
-	probe_kernel_write((void *)entry->code, &new, sizeof(new));
+	s390_kernel_write((void *)entry->code, &new, sizeof(new));
 }
 
 static int __sm_arch_jump_label_transform(void *data)
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index f516edc1fbe3..389db56a2208 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -178,7 +178,7 @@ static int swap_instruction(void *data)
 	}
 skip_ftrace:
 	kcb->kprobe_status = KPROBE_SWAP_INST;
-	probe_kernel_write(p->addr, &new_insn, len);
+	s390_kernel_write(p->addr, &new_insn, len);
 	kcb->kprobe_status = status;
 	return 0;
 }
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 36154a2f1814..0c1a679314dd 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -38,13 +38,8 @@
 #define DEBUGP(fmt , ...)
 #endif
 
-#ifndef CONFIG_64BIT
-#define PLT_ENTRY_SIZE 12
-#else /* CONFIG_64BIT */
 #define PLT_ENTRY_SIZE 20
-#endif /* CONFIG_64BIT */
 
-#ifdef CONFIG_64BIT
 void *module_alloc(unsigned long size)
 {
 	if (PAGE_ALIGN(size) > MODULES_LEN)
@@ -53,7 +48,6 @@ void *module_alloc(unsigned long size)
 				    GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
 				    __builtin_return_address(0));
 }
-#endif
 
 void module_arch_freeing_init(struct module *mod)
 {
@@ -323,17 +317,11 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
 			unsigned int *ip;
 			ip = me->module_core + me->arch.plt_offset +
 				info->plt_offset;
-#ifndef CONFIG_64BIT
-			ip[0] = 0x0d105810; /* basr 1,0; l 1,6(1); br 1 */
-			ip[1] = 0x100607f1;
-			ip[2] = val;
-#else /* CONFIG_64BIT */
 			ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */
 			ip[1] = 0x100a0004;
 			ip[2] = 0x07f10000;
 			ip[3] = (unsigned int) (val >> 32);
 			ip[4] = (unsigned int) val;
-#endif /* CONFIG_64BIT */
 			info->plt_initialized = 1;
 		}
 		if (r_type == R_390_PLTOFF16 ||
@@ -436,6 +424,7 @@ int module_finalize(const Elf_Ehdr *hdr,
 		    const Elf_Shdr *sechdrs,
 		    struct module *me)
 {
+	jump_label_apply_nops(me);
 	vfree(me->arch.syminfo);
 	me->arch.syminfo = NULL;
 	return 0;
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 3f51cf4e8f02..505c17c0ae1a 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -117,55 +117,36 @@ static int notrace s390_revalidate_registers(struct mci *mci)
 		 */
 		kill_task = 1;
 	}
-#ifndef CONFIG_64BIT
+	fpt_save_area = &S390_lowcore.floating_pt_save_area;
+	fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
+	if (!mci->fc) {
+		/*
+		 * Floating point control register can't be restored.
+		 * Task will be terminated.
+		 */
+		asm volatile("lfpc 0(%0)" : : "a" (&zero), "m" (zero));
+		kill_task = 1;
+	} else
+		asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area));
+
 	asm volatile(
 		"	ld	0,0(%0)\n"
-		"	ld	2,8(%0)\n"
-		"	ld	4,16(%0)\n"
-		"	ld	6,24(%0)"
-		: : "a" (&S390_lowcore.floating_pt_save_area));
-#endif
-
-	if (MACHINE_HAS_IEEE) {
-#ifdef CONFIG_64BIT
-		fpt_save_area = &S390_lowcore.floating_pt_save_area;
-		fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
-#else
-		fpt_save_area = (void *) S390_lowcore.extended_save_area_addr;
-		fpt_creg_save_area = fpt_save_area + 128;
-#endif
-		if (!mci->fc) {
-			/*
-			 * Floating point control register can't be restored.
-			 * Task will be terminated.
-			 */
-			asm volatile("lfpc 0(%0)" : : "a" (&zero), "m" (zero));
-			kill_task = 1;
-
-		} else
-			asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area));
-
-		asm volatile(
-			"	ld	0,0(%0)\n"
-			"	ld	1,8(%0)\n"
-			"	ld	2,16(%0)\n"
-			"	ld	3,24(%0)\n"
-			"	ld	4,32(%0)\n"
-			"	ld	5,40(%0)\n"
-			"	ld	6,48(%0)\n"
-			"	ld	7,56(%0)\n"
-			"	ld	8,64(%0)\n"
-			"	ld	9,72(%0)\n"
-			"	ld	10,80(%0)\n"
-			"	ld	11,88(%0)\n"
-			"	ld	12,96(%0)\n"
-			"	ld	13,104(%0)\n"
-			"	ld	14,112(%0)\n"
-			"	ld	15,120(%0)\n"
-			: : "a" (fpt_save_area));
-	}
-
-#ifdef CONFIG_64BIT
+		"	ld	1,8(%0)\n"
+		"	ld	2,16(%0)\n"
+		"	ld	3,24(%0)\n"
+		"	ld	4,32(%0)\n"
+		"	ld	5,40(%0)\n"
+		"	ld	6,48(%0)\n"
+		"	ld	7,56(%0)\n"
+		"	ld	8,64(%0)\n"
+		"	ld	9,72(%0)\n"
+		"	ld	10,80(%0)\n"
+		"	ld	11,88(%0)\n"
+		"	ld	12,96(%0)\n"
+		"	ld	13,104(%0)\n"
+		"	ld	14,112(%0)\n"
+		"	ld	15,120(%0)\n"
+		: : "a" (fpt_save_area));
 	/* Revalidate vector registers */
 	if (MACHINE_HAS_VX && current->thread.vxrs) {
 		if (!mci->vr) {
@@ -178,7 +159,6 @@ static int notrace s390_revalidate_registers(struct mci *mci)
 		restore_vx_regs((__vector128 *)
 				S390_lowcore.vector_save_area_addr);
 	}
-#endif
 	/* Revalidate access registers */
 	asm volatile(
 		"	lam	0,15,0(%0)"
@@ -198,21 +178,14 @@ static int notrace s390_revalidate_registers(struct mci *mci)
 		 */
 		s390_handle_damage("invalid control registers.");
 	} else {
-#ifdef CONFIG_64BIT
 		asm volatile(
 			"	lctlg	0,15,0(%0)"
 			: : "a" (&S390_lowcore.cregs_save_area));
-#else
-		asm volatile(
-			"	lctl	0,15,0(%0)"
-			: : "a" (&S390_lowcore.cregs_save_area));
-#endif
 	}
 	/*
 	 * We don't even try to revalidate the TOD register, since we simply
 	 * can't write something sensible into that register.
 	 */
-#ifdef CONFIG_64BIT
 	/*
 	 * See if we can revalidate the TOD programmable register with its
 	 * old contents (should be zero) otherwise set it to zero.
@@ -228,7 +201,6 @@ static int notrace s390_revalidate_registers(struct mci *mci)
 			"	sckpf"
 			: : "a" (&S390_lowcore.tod_progreg_save_area)
 			: "0", "cc");
-#endif
 	/* Revalidate clock comparator register */
 	set_clock_comparator(S390_lowcore.clock_comparator);
 	/* Check if old PSW is valid */
@@ -280,19 +252,11 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
 		if (mci->b) {
 			/* Processing backup -> verify if we can survive this */
 			u64 z_mcic, o_mcic, t_mcic;
-#ifdef CONFIG_64BIT
 			z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
 			o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
 				  1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
 				  1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
 				  1ULL<<16);
-#else
-			z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<57 | 1ULL<<50 |
-				  1ULL<<29);
-			o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
-				  1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
-				  1ULL<<30 | 1ULL<<20 | 1ULL<<17 | 1ULL<<16);
-#endif
 			t_mcic = *(u64 *)mci;
 
 			if (((t_mcic & z_mcic) != 0) ||
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index c3f8d157cb0d..e6a1578fc000 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1415,7 +1415,7 @@ CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);
 
 static struct attribute *cpumsf_pmu_events_attr[] = {
 	CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
-	CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG),
+	NULL,
 	NULL,
 };
 
@@ -1606,8 +1606,11 @@ static int __init init_cpum_sampling_pmu(void)
 		return -EINVAL;
 	}
 
-	if (si.ad)
+	if (si.ad) {
 		sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+		cpumsf_pmu_events_attr[1] =
+			CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG);
+	}
 
 	sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
 	if (!sfdbg)
diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S
index f6f8886399f6..036aa01d06a9 100644
--- a/arch/s390/kernel/pgm_check.S
+++ b/arch/s390/kernel/pgm_check.S
@@ -6,19 +6,13 @@
 
 #include <linux/linkage.h>
 
-#ifdef CONFIG_32BIT
-#define PGM_CHECK_64BIT(handler) .long default_trap_handler
-#else
-#define PGM_CHECK_64BIT(handler) .long handler
-#endif
-
 #define PGM_CHECK(handler)	.long handler
 #define PGM_CHECK_DEFAULT	PGM_CHECK(default_trap_handler)
 
 /*
  * The program check table contains exactly 128 (0x00-0x7f) entries. Each
- * line defines the 31 and/or 64 bit function to be called corresponding
- * to the program check interruption code.
+ * line defines the function to be called corresponding to the program check
+ * interruption code.
  */
 .section .rodata, "a"
 ENTRY(pgm_check_table)
@@ -46,10 +40,10 @@ PGM_CHECK_DEFAULT			/* 14 */
 PGM_CHECK(operand_exception)		/* 15 */
 PGM_CHECK_DEFAULT			/* 16 */
 PGM_CHECK_DEFAULT			/* 17 */
-PGM_CHECK_64BIT(transaction_exception)	/* 18 */
+PGM_CHECK(transaction_exception)	/* 18 */
 PGM_CHECK_DEFAULT			/* 19 */
 PGM_CHECK_DEFAULT			/* 1a */
-PGM_CHECK_64BIT(vector_exception)	/* 1b */
+PGM_CHECK(vector_exception)		/* 1b */
 PGM_CHECK(space_switch_exception)	/* 1c */
 PGM_CHECK(hfp_sqrt_exception)		/* 1d */
 PGM_CHECK_DEFAULT			/* 1e */
@@ -78,10 +72,10 @@ PGM_CHECK_DEFAULT			/* 34 */
 PGM_CHECK_DEFAULT			/* 35 */
 PGM_CHECK_DEFAULT			/* 36 */
 PGM_CHECK_DEFAULT			/* 37 */
-PGM_CHECK_64BIT(do_dat_exception)	/* 38 */
-PGM_CHECK_64BIT(do_dat_exception)	/* 39 */
-PGM_CHECK_64BIT(do_dat_exception)	/* 3a */
-PGM_CHECK_64BIT(do_dat_exception)	/* 3b */
+PGM_CHECK(do_dat_exception)		/* 38 */
+PGM_CHECK(do_dat_exception)		/* 39 */
+PGM_CHECK(do_dat_exception)		/* 3a */
+PGM_CHECK(do_dat_exception)		/* 3b */
 PGM_CHECK_DEFAULT			/* 3c */
 PGM_CHECK_DEFAULT			/* 3d */
 PGM_CHECK_DEFAULT			/* 3e */
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 13fc0978ca7e..dc5edc29b73a 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -79,13 +79,11 @@ void release_thread(struct task_struct *dead_task)
 {
 }
 
-#ifdef CONFIG_64BIT
 void arch_release_task_struct(struct task_struct *tsk)
 {
 	if (tsk->thread.vxrs)
 		kfree(tsk->thread.vxrs);
 }
-#endif
 
 int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
 		unsigned long arg, struct task_struct *p)
@@ -144,19 +142,6 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
 	p->thread.ri_signum = 0;
 	frame->childregs.psw.mask &= ~PSW_MASK_RI;
 
-#ifndef CONFIG_64BIT
-	/*
-	 * save fprs to current->thread.fp_regs to merge them with
-	 * the emulated registers and then copy the result to the child.
-	 */
-	save_fp_ctl(&current->thread.fp_regs.fpc);
-	save_fp_regs(current->thread.fp_regs.fprs);
-	memcpy(&p->thread.fp_regs, &current->thread.fp_regs,
-	       sizeof(s390_fp_regs));
-	/* Set a new TLS ?  */
-	if (clone_flags & CLONE_SETTLS)
-		p->thread.acrs[0] = frame->childregs.gprs[6];
-#else /* CONFIG_64BIT */
 	/* Save the fpu registers to new thread structure. */
 	save_fp_ctl(&p->thread.fp_regs.fpc);
 	save_fp_regs(p->thread.fp_regs.fprs);
@@ -172,15 +157,13 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
 			p->thread.acrs[1] = (unsigned int)tls;
 		}
 	}
-#endif /* CONFIG_64BIT */
 	return 0;
 }
 
 asmlinkage void execve_tail(void)
 {
 	current->thread.fp_regs.fpc = 0;
-	if (MACHINE_HAS_IEEE)
-		asm volatile("sfpc %0,%0" : : "d" (0));
+	asm volatile("sfpc %0,%0" : : "d" (0));
 }
 
 /*
@@ -188,18 +171,8 @@ asmlinkage void execve_tail(void)
  */
 int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
 {
-#ifndef CONFIG_64BIT
-	/*
-	 * save fprs to current->thread.fp_regs to merge them with
-	 * the emulated registers and then copy the result to the dump.
-	 */
-	save_fp_ctl(&current->thread.fp_regs.fpc);
-	save_fp_regs(current->thread.fp_regs.fprs);
-	memcpy(fpregs, &current->thread.fp_regs, sizeof(s390_fp_regs));
-#else /* CONFIG_64BIT */
 	save_fp_ctl(&fpregs->fpc);
 	save_fp_regs(fpregs->fprs);
-#endif /* CONFIG_64BIT */
 	return 1;
 }
 EXPORT_SYMBOL(dump_fpu);
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 26108232fcaa..dc488e13b7e3 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -18,7 +18,7 @@
 
 static DEFINE_PER_CPU(struct cpuid, cpu_id);
 
-void cpu_relax(void)
+void notrace cpu_relax(void)
 {
 	if (!smp_cpu_mtid && MACHINE_HAS_DIAG44)
 		asm volatile("diag 0,0,0x44");
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index eabfb4594517..d363c9c322a1 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -44,7 +44,6 @@ void update_cr_regs(struct task_struct *task)
 	struct thread_struct *thread = &task->thread;
 	struct per_regs old, new;
 
-#ifdef CONFIG_64BIT
 	/* Take care of the enable/disable of transactional execution. */
 	if (MACHINE_HAS_TE || MACHINE_HAS_VX) {
 		unsigned long cr, cr_new;
@@ -80,7 +79,6 @@ void update_cr_regs(struct task_struct *task)
 				__ctl_load(cr_new, 2, 2);
 		}
 	}
-#endif
 	/* Copy user specified PER registers */
 	new.control = thread->per_user.control;
 	new.start = thread->per_user.start;
@@ -93,10 +91,8 @@ void update_cr_regs(struct task_struct *task)
 			new.control |= PER_EVENT_BRANCH;
 		else
 			new.control |= PER_EVENT_IFETCH;
-#ifdef CONFIG_64BIT
 		new.control |= PER_CONTROL_SUSPENSION;
 		new.control |= PER_EVENT_TRANSACTION_END;
-#endif
 		if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP))
 			new.control |= PER_EVENT_IFETCH;
 		new.start = 0;
@@ -146,11 +142,7 @@ void ptrace_disable(struct task_struct *task)
 	task->thread.per_flags = 0;
 }
 
-#ifndef CONFIG_64BIT
-# define __ADDR_MASK 3
-#else
-# define __ADDR_MASK 7
-#endif
+#define __ADDR_MASK 7
 
 static inline unsigned long __peek_user_per(struct task_struct *child,
 					    addr_t addr)
@@ -223,7 +215,6 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
 		 * access registers are stored in the thread structure
 		 */
 		offset = addr - (addr_t) &dummy->regs.acrs;
-#ifdef CONFIG_64BIT
 		/*
 		 * Very special case: old & broken 64 bit gdb reading
 		 * from acrs[15]. Result is a 64 bit value. Read the
@@ -232,8 +223,7 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
 		if (addr == (addr_t) &dummy->regs.acrs[15])
 			tmp = ((unsigned long) child->thread.acrs[15]) << 32;
 		else
-#endif
-		tmp = *(addr_t *)((addr_t) &child->thread.acrs + offset);
+			tmp = *(addr_t *)((addr_t) &child->thread.acrs + offset);
 
 	} else if (addr == (addr_t) &dummy->regs.orig_gpr2) {
 		/*
@@ -261,12 +251,10 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
 		 * or the child->thread.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
-#ifdef CONFIG_64BIT
 		if (child->thread.vxrs)
 			tmp = *(addr_t *)
 			       ((addr_t) child->thread.vxrs + 2*offset);
 		else
-#endif
 			tmp = *(addr_t *)
 			       ((addr_t) &child->thread.fp_regs.fprs + offset);
 
@@ -293,11 +281,9 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data)
 	 * an alignment of 4. Programmers from hell...
 	 */
 	mask = __ADDR_MASK;
-#ifdef CONFIG_64BIT
 	if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs &&
 	    addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2)
 		mask = 3;
-#endif
 	if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
 		return -EIO;
 
@@ -370,7 +356,6 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
 		 * access registers are stored in the thread structure
 		 */
 		offset = addr - (addr_t) &dummy->regs.acrs;
-#ifdef CONFIG_64BIT
 		/*
 		 * Very special case: old & broken 64 bit gdb writing
 		 * to acrs[15] with a 64 bit value. Ignore the lower
@@ -380,8 +365,7 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
 		if (addr == (addr_t) &dummy->regs.acrs[15])
 			child->thread.acrs[15] = (unsigned int) (data >> 32);
 		else
-#endif
-		*(addr_t *)((addr_t) &child->thread.acrs + offset) = data;
+			*(addr_t *)((addr_t) &child->thread.acrs + offset) = data;
 
 	} else if (addr == (addr_t) &dummy->regs.orig_gpr2) {
 		/*
@@ -411,12 +395,10 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
 		 * or the child->thread.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
-#ifdef CONFIG_64BIT
 		if (child->thread.vxrs)
 			*(addr_t *)((addr_t)
 				child->thread.vxrs + 2*offset) = data;
 		else
-#endif
 			*(addr_t *)((addr_t)
 				&child->thread.fp_regs.fprs + offset) = data;
 
@@ -441,11 +423,9 @@ static int poke_user(struct task_struct *child, addr_t addr, addr_t data)
 	 * an alignment of 4. Programmers from hell indeed...
 	 */
 	mask = __ADDR_MASK;
-#ifdef CONFIG_64BIT
 	if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs &&
 	    addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2)
 		mask = 3;
-#endif
 	if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
 		return -EIO;
 
@@ -649,12 +629,10 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
 		 * or the child->thread.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
-#ifdef CONFIG_64BIT
 		if (child->thread.vxrs)
 			tmp = *(__u32 *)
 			       ((addr_t) child->thread.vxrs + 2*offset);
 		else
-#endif
 			tmp = *(__u32 *)
 			       ((addr_t) &child->thread.fp_regs.fprs + offset);
 
@@ -776,12 +754,10 @@ static int __poke_user_compat(struct task_struct *child,
 		 * or the child->thread.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
-#ifdef CONFIG_64BIT
 		if (child->thread.vxrs)
 			*(__u32 *)((addr_t)
 				child->thread.vxrs + 2*offset) = tmp;
 		else
-#endif
 			*(__u32 *)((addr_t)
 				&child->thread.fp_regs.fprs + offset) = tmp;
 
@@ -979,16 +955,13 @@ static int s390_fpregs_get(struct task_struct *target,
 	if (target == current) {
 		save_fp_ctl(&target->thread.fp_regs.fpc);
 		save_fp_regs(target->thread.fp_regs.fprs);
-	}
-#ifdef CONFIG_64BIT
-	else if (target->thread.vxrs) {
+	} else if (target->thread.vxrs) {
 		int i;
 
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
 			target->thread.fp_regs.fprs[i] =
 				*(freg_t *)(target->thread.vxrs + i);
 	}
-#endif
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
 				   &target->thread.fp_regs, 0, -1);
 }
@@ -1026,23 +999,18 @@ static int s390_fpregs_set(struct task_struct *target,
 		if (target == current) {
 			restore_fp_ctl(&target->thread.fp_regs.fpc);
 			restore_fp_regs(target->thread.fp_regs.fprs);
-		}
-#ifdef CONFIG_64BIT
-		else if (target->thread.vxrs) {
+		} else if (target->thread.vxrs) {
 			int i;
 
 			for (i = 0; i < __NUM_VXRS_LOW; i++)
 				*(freg_t *)(target->thread.vxrs + i) =
 					target->thread.fp_regs.fprs[i];
 		}
-#endif
 	}
 
 	return rc;
 }
 
-#ifdef CONFIG_64BIT
-
 static int s390_last_break_get(struct task_struct *target,
 			       const struct user_regset *regset,
 			       unsigned int pos, unsigned int count,
@@ -1182,8 +1150,6 @@ static int s390_vxrs_high_set(struct task_struct *target,
 	return rc;
 }
 
-#endif
-
 static int s390_system_call_get(struct task_struct *target,
 				const struct user_regset *regset,
 				unsigned int pos, unsigned int count,
@@ -1229,7 +1195,6 @@ static const struct user_regset s390_regsets[] = {
 		.get = s390_system_call_get,
 		.set = s390_system_call_set,
 	},
-#ifdef CONFIG_64BIT
 	{
 		.core_note_type = NT_S390_LAST_BREAK,
 		.n = 1,
@@ -1262,7 +1227,6 @@ static const struct user_regset s390_regsets[] = {
 		.get = s390_vxrs_high_get,
 		.set = s390_vxrs_high_set,
 	},
-#endif
 };
 
 static const struct user_regset_view user_s390_view = {
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
index dd8016b0477e..52aab0bd84f8 100644
--- a/arch/s390/kernel/reipl.S
+++ b/arch/s390/kernel/reipl.S
@@ -1,7 +1,7 @@
 /*
- *  S390 version
- *    Copyright IBM Corp. 2000
- *    Author(s): Holger Smolinski (Holger.Smolinski@de.ibm.com)
+ *    Copyright IBM Corp 2000, 2011
+ *    Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>,
+ *		 Denis Joseph Barrow,
  */
 
 #include <linux/linkage.h>
@@ -9,43 +9,90 @@
 #include <asm/sigp.h>
 
 #
-# store_status: Empty implementation until kdump is supported on 31 bit
+# store_status
+#
+# Prerequisites to run this function:
+# - Prefix register is set to zero
+# - Original prefix register is stored in "dump_prefix_page"
+# - Lowcore protection is off
 #
 ENTRY(store_status)
-		br	%r14
+	/* Save register one and load save area base */
+	stg	%r1,__LC_SAVE_AREA_RESTART
+	lghi	%r1,SAVE_AREA_BASE
+	/* General purpose registers */
+	stmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	lg	%r2,__LC_SAVE_AREA_RESTART
+	stg	%r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1)
+	/* Control registers */
+	stctg	%c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	/* Access registers */
+	stam	%a0,%a15,__LC_AREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	/* Floating point registers */
+	std	%f0, 0x00 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f1, 0x08 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f2, 0x10 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f3, 0x18 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f4, 0x20 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f5, 0x28 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f6, 0x30 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f7, 0x38 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f8, 0x40 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f9, 0x48 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f10,0x50 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f11,0x58 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f12,0x60 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f13,0x68 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f14,0x70 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f15,0x78 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	/* Floating point control register */
+	stfpc	__LC_FP_CREG_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	/* CPU timer */
+	stpt	__LC_CPU_TIMER_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	/* Saved prefix register */
+	larl	%r2,dump_prefix_page
+	mvc	__LC_PREFIX_SAVE_AREA-SAVE_AREA_BASE(4,%r1),0(%r2)
+	/* Clock comparator - seven bytes */
+	larl	%r2,.Lclkcmp
+	stckc	0(%r2)
+	mvc	__LC_CLOCK_COMP_SAVE_AREA-SAVE_AREA_BASE + 1(7,%r1),1(%r2)
+	/* Program status word */
+	epsw	%r2,%r3
+	st	%r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 0(%r1)
+	st	%r3,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 4(%r1)
+	larl	%r2,store_status
+	stg	%r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1)
+	br	%r14
+
+	.section .bss
+	.align	8
+.Lclkcmp:	.quad	0x0000000000000000
+	.previous
 
 #
 # do_reipl_asm
 # Parameter: r2 = schid of reipl device
 #
+
 ENTRY(do_reipl_asm)
 		basr	%r13,0
-.Lpg0:		lpsw	.Lnewpsw-.Lpg0(%r13)
-.Lpg1:		# do store status of all registers
+.Lpg0:		lpswe	.Lnewpsw-.Lpg0(%r13)
+.Lpg1:		brasl	%r14,store_status
 
-		stm	%r0,%r15,__LC_GPREGS_SAVE_AREA
-		stctl	%c0,%c15,__LC_CREGS_SAVE_AREA
-		stam	%a0,%a15,__LC_AREGS_SAVE_AREA
-		l	%r10,.Ldump_pfx-.Lpg0(%r13)
-		mvc	__LC_PREFIX_SAVE_AREA(4),0(%r10)
-		stckc	.Lclkcmp-.Lpg0(%r13)
-		mvc	__LC_CLOCK_COMP_SAVE_AREA(8),.Lclkcmp-.Lpg0(%r13)
-		stpt	__LC_CPU_TIMER_SAVE_AREA
-		st	%r13, __LC_PSW_SAVE_AREA+4
-		lctl	%c6,%c6,.Lall-.Lpg0(%r13)
-		lr	%r1,%r2
-		mvc	__LC_PGM_NEW_PSW(8),.Lpcnew-.Lpg0(%r13)
+		lctlg	%c6,%c6,.Lall-.Lpg0(%r13)
+		lgr	%r1,%r2
+		mvc	__LC_PGM_NEW_PSW(16),.Lpcnew-.Lpg0(%r13)
 		stsch	.Lschib-.Lpg0(%r13)
 		oi	.Lschib+5-.Lpg0(%r13),0x84
-.Lecs:  	xi	.Lschib+27-.Lpg0(%r13),0x01
+.Lecs:		xi	.Lschib+27-.Lpg0(%r13),0x01
 		msch	.Lschib-.Lpg0(%r13)
-		lhi	%r0,5
+		lghi	%r0,5
 .Lssch:		ssch	.Liplorb-.Lpg0(%r13)
 		jz	.L001
 		brct	%r0,.Lssch
 		bas	%r14,.Ldisab-.Lpg0(%r13)
-.L001:		mvc	__LC_IO_NEW_PSW(8),.Lionew-.Lpg0(%r13)
-.Ltpi:		lpsw	.Lwaitpsw-.Lpg0(%r13)
+.L001:		mvc	__LC_IO_NEW_PSW(16),.Lionew-.Lpg0(%r13)
+.Ltpi:		lpswe	.Lwaitpsw-.Lpg0(%r13)
 .Lcont:		c	%r1,__LC_SUBCHANNEL_ID
 		jnz	.Ltpi
 		clc	__LC_IO_INT_PARM(4),.Liplorb-.Lpg0(%r13)
@@ -58,20 +105,36 @@ ENTRY(do_reipl_asm)
 		jz	.L003
 		bas	%r14,.Ldisab-.Lpg0(%r13)
 .L003:		st	%r1,__LC_SUBCHANNEL_ID
+		lhi	%r1,0		 # mode 0 = esa
+		slr	%r0,%r0		 # set cpuid to zero
+		sigp	%r1,%r0,SIGP_SET_ARCHITECTURE # switch to esa mode
 		lpsw	0
-		sigp	0,0,SIGP_RESTART
-.Ldisab:	st	%r14,.Ldispsw+4-.Lpg0(%r13)
-		lpsw	.Ldispsw-.Lpg0(%r13)
+.Ldisab:	sll	%r14,1
+		srl	%r14,1		 # need to kill hi bit to avoid specification exceptions.
+		st	%r14,.Ldispsw+12-.Lpg0(%r13)
+		lpswe	.Ldispsw-.Lpg0(%r13)
 		.align	8
-.Lclkcmp:	.quad	0x0000000000000000
-.Lall:		.long	0xff000000
-.Ldump_pfx:	.long	dump_prefix_page
-		.align	8
-.Lnewpsw:	.long	0x00080000,0x80000000+.Lpg1
-.Lpcnew:	.long	0x00080000,0x80000000+.Lecs
-.Lionew:	.long	0x00080000,0x80000000+.Lcont
-.Lwaitpsw:	.long	0x020a0000,0x00000000+.Ltpi
-.Ldispsw:	.long	0x000a0000,0x00000000
+.Lall:		.quad	0x00000000ff000000
+		.align	16
+/*
+ * These addresses have to be 31 bit otherwise
+ * the sigp will throw a specifcation exception
+ * when switching to ESA mode as bit 31 be set
+ * in the ESA psw.
+ * Bit 31 of the addresses has to be 0 for the
+ * 31bit lpswe instruction a fact they appear to have
+ * omitted from the pop.
+ */
+.Lnewpsw:	.quad	0x0000000080000000
+		.quad	.Lpg1
+.Lpcnew:	.quad	0x0000000080000000
+		.quad	.Lecs
+.Lionew:	.quad	0x0000000080000000
+		.quad	.Lcont
+.Lwaitpsw:	.quad	0x0202000080000000
+		.quad	.Ltpi
+.Ldispsw:	.quad	0x0002000080000000
+		.quad	0x0000000000000000
 .Liplccws:	.long	0x02000000,0x60000018
 		.long	0x08000008,0x20000001
 .Liplorb:	.long	0x0049504c,0x0040ff80
diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S
deleted file mode 100644
index dc3b1273c4dc..000000000000
--- a/arch/s390/kernel/reipl64.S
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- *    Copyright IBM Corp 2000, 2011
- *    Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>,
- *		 Denis Joseph Barrow,
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/sigp.h>
-
-#
-# store_status
-#
-# Prerequisites to run this function:
-# - Prefix register is set to zero
-# - Original prefix register is stored in "dump_prefix_page"
-# - Lowcore protection is off
-#
-ENTRY(store_status)
-	/* Save register one and load save area base */
-	stg	%r1,__LC_SAVE_AREA_RESTART
-	lghi	%r1,SAVE_AREA_BASE
-	/* General purpose registers */
-	stmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	lg	%r2,__LC_SAVE_AREA_RESTART
-	stg	%r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1)
-	/* Control registers */
-	stctg	%c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	/* Access registers */
-	stam	%a0,%a15,__LC_AREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	/* Floating point registers */
-	std	%f0, 0x00 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f1, 0x08 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f2, 0x10 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f3, 0x18 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f4, 0x20 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f5, 0x28 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f6, 0x30 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f7, 0x38 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f8, 0x40 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f9, 0x48 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f10,0x50 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f11,0x58 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f12,0x60 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f13,0x68 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f14,0x70 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f15,0x78 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	/* Floating point control register */
-	stfpc	__LC_FP_CREG_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	/* CPU timer */
-	stpt	__LC_CPU_TIMER_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	/* Saved prefix register */
-	larl	%r2,dump_prefix_page
-	mvc	__LC_PREFIX_SAVE_AREA-SAVE_AREA_BASE(4,%r1),0(%r2)
-	/* Clock comparator - seven bytes */
-	larl	%r2,.Lclkcmp
-	stckc	0(%r2)
-	mvc	__LC_CLOCK_COMP_SAVE_AREA-SAVE_AREA_BASE + 1(7,%r1),1(%r2)
-	/* Program status word */
-	epsw	%r2,%r3
-	st	%r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 0(%r1)
-	st	%r3,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 4(%r1)
-	larl	%r2,store_status
-	stg	%r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1)
-	br	%r14
-
-	.section .bss
-	.align	8
-.Lclkcmp:	.quad	0x0000000000000000
-	.previous
-
-#
-# do_reipl_asm
-# Parameter: r2 = schid of reipl device
-#
-
-ENTRY(do_reipl_asm)
-		basr	%r13,0
-.Lpg0:		lpswe	.Lnewpsw-.Lpg0(%r13)
-.Lpg1:		brasl	%r14,store_status
-
-		lctlg	%c6,%c6,.Lall-.Lpg0(%r13)
-		lgr	%r1,%r2
-		mvc	__LC_PGM_NEW_PSW(16),.Lpcnew-.Lpg0(%r13)
-		stsch	.Lschib-.Lpg0(%r13)
-		oi	.Lschib+5-.Lpg0(%r13),0x84
-.Lecs:  	xi	.Lschib+27-.Lpg0(%r13),0x01
-		msch	.Lschib-.Lpg0(%r13)
-		lghi	%r0,5
-.Lssch:		ssch	.Liplorb-.Lpg0(%r13)
-		jz	.L001
-		brct	%r0,.Lssch
-		bas	%r14,.Ldisab-.Lpg0(%r13)
-.L001:		mvc	__LC_IO_NEW_PSW(16),.Lionew-.Lpg0(%r13)
-.Ltpi:		lpswe	.Lwaitpsw-.Lpg0(%r13)
-.Lcont:		c	%r1,__LC_SUBCHANNEL_ID
-		jnz	.Ltpi
-		clc	__LC_IO_INT_PARM(4),.Liplorb-.Lpg0(%r13)
-		jnz	.Ltpi
-		tsch	.Liplirb-.Lpg0(%r13)
-		tm	.Liplirb+9-.Lpg0(%r13),0xbf
-		jz	.L002
-		bas	%r14,.Ldisab-.Lpg0(%r13)
-.L002:		tm	.Liplirb+8-.Lpg0(%r13),0xf3
-		jz	.L003
-		bas	%r14,.Ldisab-.Lpg0(%r13)
-.L003:		st	%r1,__LC_SUBCHANNEL_ID
-		lhi	%r1,0		 # mode 0 = esa
-		slr	%r0,%r0 	 # set cpuid to zero
-		sigp	%r1,%r0,SIGP_SET_ARCHITECTURE # switch to esa mode
-		lpsw	0
-.Ldisab:	sll	%r14,1
-		srl	%r14,1		 # need to kill hi bit to avoid specification exceptions.
-		st	%r14,.Ldispsw+12-.Lpg0(%r13)
-		lpswe	.Ldispsw-.Lpg0(%r13)
-		.align	8
-.Lall:		.quad	0x00000000ff000000
-		.align	16
-/*
- * These addresses have to be 31 bit otherwise
- * the sigp will throw a specifcation exception
- * when switching to ESA mode as bit 31 be set
- * in the ESA psw.
- * Bit 31 of the addresses has to be 0 for the
- * 31bit lpswe instruction a fact they appear to have
- * omitted from the pop.
- */
-.Lnewpsw:	.quad	0x0000000080000000
-		.quad	.Lpg1
-.Lpcnew:	.quad	0x0000000080000000
-		.quad	.Lecs
-.Lionew:	.quad	0x0000000080000000
-		.quad	.Lcont
-.Lwaitpsw:	.quad	0x0202000080000000
-		.quad	.Ltpi
-.Ldispsw:	.quad	0x0002000080000000
-		.quad	0x0000000000000000
-.Liplccws:	.long	0x02000000,0x60000018
-		.long	0x08000008,0x20000001
-.Liplorb:	.long	0x0049504c,0x0040ff80
-		.long	0x00000000+.Liplccws
-.Lschib:	.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
-.Liplirb:	.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
index f4e6f20e117a..cfac28330b03 100644
--- a/arch/s390/kernel/relocate_kernel.S
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -19,7 +19,8 @@
  * %r7 = PAGE_SIZE
  * %r8 holds the source address
  * %r9 = PAGE_SIZE
- * %r10 is a page mask
+ *
+ * 0xf000 is a page_mask
  */
 
 	.text
@@ -27,46 +28,47 @@ ENTRY(relocate_kernel)
 		basr	%r13,0		# base address
 	.base:
 		stnsm	sys_msk-.base(%r13),0xfb	# disable DAT
-		stctl	%c0,%c15,ctlregs-.base(%r13)
-		stm	%r0,%r15,gprregs-.base(%r13)
+		stctg	%c0,%c15,ctlregs-.base(%r13)
+		stmg	%r0,%r15,gprregs-.base(%r13)
+		lghi	%r0,3
+		sllg	%r0,%r0,31
+		stg	%r0,0x1d0(%r0)
+		la	%r0,.back_pgm-.base(%r13)
+		stg	%r0,0x1d8(%r0)
 		la	%r1,load_psw-.base(%r13)
 		mvc	0(8,%r0),0(%r1)
 		la	%r0,.back-.base(%r13)
 		st	%r0,4(%r0)
 		oi	4(%r0),0x80
-		mvc	0x68(8,%r0),0(%r1)
-		la	%r0,.back_pgm-.base(%r13)
-		st	%r0,0x6c(%r0)
-		oi	0x6c(%r0),0x80
-		lhi	%r0,0
+		lghi	%r0,0
 		diag	%r0,%r0,0x308
 	.back:
+		lhi	%r1,1		# mode 1 = esame
+		sigp	%r1,%r0,SIGP_SET_ARCHITECTURE # switch to esame mode
+		sam64			# switch to 64 bit addressing mode
 		basr	%r13,0
 	.back_base:
 		oi	have_diag308-.back_base(%r13),0x01
-		lctl	%c0,%c15,ctlregs-.back_base(%r13)
-		lm	%r0,%r15,gprregs-.back_base(%r13)
-		j	.start_reloc
+		lctlg	%c0,%c15,ctlregs-.back_base(%r13)
+		lmg	%r0,%r15,gprregs-.back_base(%r13)
+		j	.top
 	.back_pgm:
-		lm	%r0,%r15,gprregs-.base(%r13)
-	.start_reloc:
-		lhi	%r10,-1		# preparing the mask
-		sll	%r10,12		# shift it such that it becomes 0xf000
+		lmg	%r0,%r15,gprregs-.base(%r13)
 	.top:
-		lhi	%r7,4096	# load PAGE_SIZE in r7
-		lhi	%r9,4096	# load PAGE_SIZE in r9
-		l	%r5,0(%r2)	# read another word for indirection page
-		ahi	%r2,4		# increment pointer
+		lghi	%r7,4096	# load PAGE_SIZE in r7
+		lghi	%r9,4096	# load PAGE_SIZE in r9
+		lg	%r5,0(%r2)	# read another word for indirection page
+		aghi	%r2,8		# increment pointer
 		tml	%r5,0x1		# is it a destination page?
 		je	.indir_check	# NO, goto "indir_check"
-		lr	%r6,%r5		# r6 = r5
-		nr	%r6,%r10	# mask it out and...
+		lgr	%r6,%r5		# r6 = r5
+		nill	%r6,0xf000	# mask it out and...
 		j	.top		# ...next iteration
 	.indir_check:
 		tml	%r5,0x2		# is it a indirection page?
 		je	.done_test	# NO, goto "done_test"
-		nr	%r5,%r10	# YES, mask out,
-		lr	%r2,%r5		# move it into the right register,
+		nill	%r5,0xf000	# YES, mask out,
+		lgr	%r2,%r5		# move it into the right register,
 		j	.top		# and read next...
 	.done_test:
 		tml	%r5,0x4		# is it the done indicator?
@@ -75,13 +77,13 @@ ENTRY(relocate_kernel)
 	.source_test:
 		tml	%r5,0x8		# it should be a source indicator...
 		je	.top		# NO, ignore it...
-		lr	%r8,%r5		# r8 = r5
-		nr	%r8,%r10	# masking
+		lgr	%r8,%r5		# r8 = r5
+		nill	%r8,0xf000	# masking
 	0:	mvcle	%r6,%r8,0x0	# copy PAGE_SIZE bytes from r8 to r6 - pad with 0
 		jo	0b
 		j	.top
 	.done:
-		sr	%r0,%r0		# clear register r0
+		sgr	%r0,%r0		# clear register r0
 		la	%r4,load_psw-.base(%r13)	# load psw-address into the register
 		o	%r3,4(%r4)	# or load address into psw
 		st	%r3,4(%r4)
@@ -90,8 +92,9 @@ ENTRY(relocate_kernel)
 		jno	.no_diag308
 		diag	%r0,%r0,0x308
 	.no_diag308:
-		sr	%r1,%r1		# clear %r1
-		sr	%r2,%r2		# clear %r2
+		sam31			# 31 bit mode
+		sr	%r1,%r1		# erase register r1
+		sr	%r2,%r2		# erase register r2
 		sigp	%r1,%r2,SIGP_SET_ARCHITECTURE # set cpuid to zero
 		lpsw	0		# hopefully start new kernel...
 
@@ -102,11 +105,11 @@ ENTRY(relocate_kernel)
 		.quad	0
 	ctlregs:
 		.rept	16
-		.long	0
+		.quad	0
 		.endr
 	gprregs:
 		.rept	16
-		.long	0
+		.quad	0
 		.endr
 	have_diag308:
 		.byte	0
diff --git a/arch/s390/kernel/relocate_kernel64.S b/arch/s390/kernel/relocate_kernel64.S
deleted file mode 100644
index cfac28330b03..000000000000
--- a/arch/s390/kernel/relocate_kernel64.S
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright IBM Corp. 2005
- *
- * Author(s): Rolf Adelsberger,
- *	      Heiko Carstens <heiko.carstens@de.ibm.com>
- *
- */
-
-#include <linux/linkage.h>
-#include <asm/sigp.h>
-
-/*
- * moves the new kernel to its destination...
- * %r2 = pointer to first kimage_entry_t
- * %r3 = start address - where to jump to after the job is done...
- *
- * %r5 will be used as temp. storage
- * %r6 holds the destination address
- * %r7 = PAGE_SIZE
- * %r8 holds the source address
- * %r9 = PAGE_SIZE
- *
- * 0xf000 is a page_mask
- */
-
-	.text
-ENTRY(relocate_kernel)
-		basr	%r13,0		# base address
-	.base:
-		stnsm	sys_msk-.base(%r13),0xfb	# disable DAT
-		stctg	%c0,%c15,ctlregs-.base(%r13)
-		stmg	%r0,%r15,gprregs-.base(%r13)
-		lghi	%r0,3
-		sllg	%r0,%r0,31
-		stg	%r0,0x1d0(%r0)
-		la	%r0,.back_pgm-.base(%r13)
-		stg	%r0,0x1d8(%r0)
-		la	%r1,load_psw-.base(%r13)
-		mvc	0(8,%r0),0(%r1)
-		la	%r0,.back-.base(%r13)
-		st	%r0,4(%r0)
-		oi	4(%r0),0x80
-		lghi	%r0,0
-		diag	%r0,%r0,0x308
-	.back:
-		lhi	%r1,1		# mode 1 = esame
-		sigp	%r1,%r0,SIGP_SET_ARCHITECTURE # switch to esame mode
-		sam64			# switch to 64 bit addressing mode
-		basr	%r13,0
-	.back_base:
-		oi	have_diag308-.back_base(%r13),0x01
-		lctlg	%c0,%c15,ctlregs-.back_base(%r13)
-		lmg	%r0,%r15,gprregs-.back_base(%r13)
-		j	.top
-	.back_pgm:
-		lmg	%r0,%r15,gprregs-.base(%r13)
-	.top:
-		lghi	%r7,4096	# load PAGE_SIZE in r7
-		lghi	%r9,4096	# load PAGE_SIZE in r9
-		lg	%r5,0(%r2)	# read another word for indirection page
-		aghi	%r2,8		# increment pointer
-		tml	%r5,0x1		# is it a destination page?
-		je	.indir_check	# NO, goto "indir_check"
-		lgr	%r6,%r5		# r6 = r5
-		nill	%r6,0xf000	# mask it out and...
-		j	.top		# ...next iteration
-	.indir_check:
-		tml	%r5,0x2		# is it a indirection page?
-		je	.done_test	# NO, goto "done_test"
-		nill	%r5,0xf000	# YES, mask out,
-		lgr	%r2,%r5		# move it into the right register,
-		j	.top		# and read next...
-	.done_test:
-		tml	%r5,0x4		# is it the done indicator?
-		je	.source_test	# NO! Well, then it should be the source indicator...
-		j	.done		# ok, lets finish it here...
-	.source_test:
-		tml	%r5,0x8		# it should be a source indicator...
-		je	.top		# NO, ignore it...
-		lgr	%r8,%r5		# r8 = r5
-		nill	%r8,0xf000	# masking
-	0:	mvcle	%r6,%r8,0x0	# copy PAGE_SIZE bytes from r8 to r6 - pad with 0
-		jo	0b
-		j	.top
-	.done:
-		sgr	%r0,%r0		# clear register r0
-		la	%r4,load_psw-.base(%r13)	# load psw-address into the register
-		o	%r3,4(%r4)	# or load address into psw
-		st	%r3,4(%r4)
-		mvc	0(8,%r0),0(%r4)	# copy psw to absolute address 0
-		tm	have_diag308-.base(%r13),0x01
-		jno	.no_diag308
-		diag	%r0,%r0,0x308
-	.no_diag308:
-		sam31			# 31 bit mode
-		sr	%r1,%r1		# erase register r1
-		sr	%r2,%r2		# erase register r2
-		sigp	%r1,%r2,SIGP_SET_ARCHITECTURE # set cpuid to zero
-		lpsw	0		# hopefully start new kernel...
-
-		.align	8
-	load_psw:
-		.long	0x00080000,0x80000000
-	sys_msk:
-		.quad	0
-	ctlregs:
-		.rept	16
-		.quad	0
-		.endr
-	gprregs:
-		.rept	16
-		.quad	0
-		.endr
-	have_diag308:
-		.byte	0
-		.align	8
-	relocate_kernel_end:
-	.align 8
-	.globl	relocate_kernel_len
-	relocate_kernel_len:
-		.quad	relocate_kernel_end - relocate_kernel
diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S
index 7e77e03378f3..43c3169ea49c 100644
--- a/arch/s390/kernel/sclp.S
+++ b/arch/s390/kernel/sclp.S
@@ -36,21 +36,17 @@ _sclp_wait_int:
 	ahi	%r15,-96			# create stack frame
 	la	%r8,LC_EXT_NEW_PSW		# register int handler
 	la	%r9,.LextpswS1-.LbaseS1(%r13)
-#ifdef CONFIG_64BIT
 	tm	LC_AR_MODE_ID,1
 	jno	.Lesa1
 	la	%r8,LC_EXT_NEW_PSW_64		# register int handler 64 bit
 	la	%r9,.LextpswS1_64-.LbaseS1(%r13)
 .Lesa1:
-#endif
 	mvc	.LoldpswS1-.LbaseS1(16,%r13),0(%r8)
 	mvc	0(16,%r8),0(%r9)
-#ifdef CONFIG_64BIT
 	epsw	%r6,%r7				# set current addressing mode
 	nill	%r6,0x1				# in new psw (31 or 64 bit mode)
 	nilh	%r7,0x8000
 	stm	%r6,%r7,0(%r8)
-#endif
 	lhi	%r6,0x0200			# cr mask for ext int (cr0.54)
 	ltr	%r2,%r2
 	jz	.LsetctS1
@@ -92,10 +88,8 @@ _sclp_wait_int:
 	.long	0, 0, 0, 0			# old ext int PSW
 .LextpswS1:
 	.long	0x00080000, 0x80000000+.LwaitS1	# PSW to handle ext int
-#ifdef CONFIG_64BIT
 .LextpswS1_64:
 	.quad	0, .LwaitS1			# PSW to handle ext int, 64 bit
-#endif
 .LwaitpswS1:
 	.long	0x010a0000, 0x00000000+.LloopS1	# PSW to wait for ext int
 .LtimeS1:
@@ -272,13 +266,11 @@ _sclp_print:
 ENTRY(_sclp_print_early)
 	stm	%r6,%r15,24(%r15)		# save registers
 	ahi	%r15,-96			# create stack frame
-#ifdef CONFIG_64BIT
 	tm	LC_AR_MODE_ID,1
 	jno	.Lesa2
 	ahi	%r15,-80
 	stmh	%r6,%r15,96(%r15)		# store upper register halves
 .Lesa2:
-#endif
 	lr	%r10,%r2			# save string pointer
 	lhi	%r2,0
 	bras	%r14,_sclp_setup		# enable console
@@ -291,14 +283,12 @@ ENTRY(_sclp_print_early)
 	lhi	%r2,1
 	bras	%r14,_sclp_setup		# disable console
 .LendS5:
-#ifdef CONFIG_64BIT
 	tm	LC_AR_MODE_ID,1
 	jno	.Lesa3
 	lgfr	%r2,%r2				# sign extend return value
 	lmh	%r6,%r15,96(%r15)		# restore upper register halves
 	ahi	%r15,80
 .Lesa3:
-#endif
 	lm	%r6,%r15,120(%r15)		# restore registers
 	br	%r14
 
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index a5ea8bc17cb3..7262fe438c99 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -92,10 +92,8 @@ EXPORT_SYMBOL(VMALLOC_END);
 struct page *vmemmap;
 EXPORT_SYMBOL(vmemmap);
 
-#ifdef CONFIG_64BIT
 unsigned long MODULES_VADDR;
 unsigned long MODULES_END;
-#endif
 
 /* An array with a pointer to the lowcore of every CPU. */
 struct _lowcore *lowcore_ptr[NR_CPUS];
@@ -334,19 +332,10 @@ static void __init setup_lowcore(void)
 	lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
 	memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
 	       MAX_FACILITY_BIT/8);
-#ifndef CONFIG_64BIT
-	if (MACHINE_HAS_IEEE) {
-		lc->extended_save_area_addr = (__u32)
-			__alloc_bootmem_low(PAGE_SIZE, PAGE_SIZE, 0);
-		/* enable extended save area */
-		__ctl_set_bit(14, 29);
-	}
-#else
 	if (MACHINE_HAS_VX)
 		lc->vector_save_area_addr =
 			(unsigned long) &lc->vector_save_area;
 	lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
-#endif
 	lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
 	lc->async_enter_timer = S390_lowcore.async_enter_timer;
 	lc->exit_timer = S390_lowcore.exit_timer;
@@ -450,7 +439,6 @@ static void __init setup_memory_end(void)
 	unsigned long vmax, vmalloc_size, tmp;
 
 	/* Choose kernel address space layout: 2, 3, or 4 levels. */
-#ifdef CONFIG_64BIT
 	vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN;
 	tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
 	tmp = tmp * (sizeof(struct page) + PAGE_SIZE);
@@ -462,12 +450,6 @@ static void __init setup_memory_end(void)
 	MODULES_END = vmax;
 	MODULES_VADDR = MODULES_END - MODULES_LEN;
 	VMALLOC_END = MODULES_VADDR;
-#else
-	vmalloc_size = VMALLOC_END ?: 96UL << 20;
-	vmax = 1UL << 31;		/* 2-level kernel page table */
-	/* vmalloc area is at the end of the kernel address space. */
-	VMALLOC_END = vmax;
-#endif
 	VMALLOC_START = vmax - vmalloc_size;
 
 	/* Split remaining virtual space between 1:1 mapping & vmemmap array */
@@ -754,7 +736,6 @@ static void __init setup_hwcaps(void)
 	if (MACHINE_HAS_HPAGE)
 		elf_hwcap |= HWCAP_S390_HPAGE;
 
-#if defined(CONFIG_64BIT)
 	/*
 	 * 64-bit register support for 31-bit processes
 	 * HWCAP_S390_HIGH_GPRS is bit 9.
@@ -772,22 +753,15 @@ static void __init setup_hwcaps(void)
 	 */
 	if (test_facility(129))
 		elf_hwcap |= HWCAP_S390_VXRS;
-#endif
-
 	get_cpu_id(&cpu_id);
 	add_device_randomness(&cpu_id, sizeof(cpu_id));
 	switch (cpu_id.machine) {
 	case 0x9672:
-#if !defined(CONFIG_64BIT)
-	default:	/* Use "g5" as default for 31 bit kernels. */
-#endif
 		strcpy(elf_platform, "g5");
 		break;
 	case 0x2064:
 	case 0x2066:
-#if defined(CONFIG_64BIT)
 	default:	/* Use "z900" as default for 64 bit kernels. */
-#endif
 		strcpy(elf_platform, "z900");
 		break;
 	case 0x2084:
@@ -839,19 +813,6 @@ void __init setup_arch(char **cmdline_p)
         /*
          * print what head.S has found out about the machine
          */
-#ifndef CONFIG_64BIT
-	if (MACHINE_IS_VM)
-		pr_info("Linux is running as a z/VM "
-			"guest operating system in 31-bit mode\n");
-	else if (MACHINE_IS_LPAR)
-		pr_info("Linux is running natively in 31-bit mode\n");
-	if (MACHINE_HAS_IEEE)
-		pr_info("The hardware system has IEEE compatible "
-			"floating point units\n");
-	else
-		pr_info("The hardware system has no IEEE compatible "
-			"floating point units\n");
-#else /* CONFIG_64BIT */
 	if (MACHINE_IS_VM)
 		pr_info("Linux is running as a z/VM "
 			"guest operating system in 64-bit mode\n");
@@ -859,7 +820,6 @@ void __init setup_arch(char **cmdline_p)
 		pr_info("Linux is running under KVM in 64-bit mode\n");
 	else if (MACHINE_IS_LPAR)
 		pr_info("Linux is running natively in 64-bit mode\n");
-#endif /* CONFIG_64BIT */
 
 	/* Have one command line that is parsed and saved in /proc/cmdline */
 	/* boot_command_line has been already set up in early.c */
@@ -930,35 +890,3 @@ void __init setup_arch(char **cmdline_p)
 	/* Add system specific data to the random pool */
 	setup_randomness();
 }
-
-#ifdef CONFIG_32BIT
-static int no_removal_warning __initdata;
-
-static int __init parse_no_removal_warning(char *str)
-{
-	no_removal_warning = 1;
-	return 0;
-}
-__setup("no_removal_warning", parse_no_removal_warning);
-
-static int __init removal_warning(void)
-{
-	if (no_removal_warning)
-		return 0;
-	printk(KERN_ALERT "\n\n");
-	printk(KERN_CONT "Warning - you are using a 31 bit kernel!\n\n");
-	printk(KERN_CONT "We plan to remove 31 bit kernel support from the kernel sources in March 2015.\n");
-	printk(KERN_CONT "Currently we assume that nobody is using the 31 bit kernel on old 31 bit\n");
-	printk(KERN_CONT "hardware anymore. If you think that the code should not be removed and also\n");
-	printk(KERN_CONT "future versions of the Linux kernel should be able to run in 31 bit mode\n");
-	printk(KERN_CONT "please let us know. Please write to:\n");
-	printk(KERN_CONT "linux390@de.ibm.com (mail address) and/or\n");
-	printk(KERN_CONT "linux-s390@vger.kernel.org (mailing list).\n\n");
-	printk(KERN_CONT "Thank you!\n\n");
-	printk(KERN_CONT "If this kernel runs on a 64 bit machine you may consider using a 64 bit kernel.\n");
-	printk(KERN_CONT "This message can be disabled with the \"no_removal_warning\" kernel parameter.\n");
-	schedule_timeout_uninterruptible(300 * HZ);
-	return 0;
-}
-early_initcall(removal_warning);
-#endif
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index b3ae6f70c6d6..c551f22ce066 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -106,7 +106,6 @@ static void store_sigregs(void)
 {
 	save_access_regs(current->thread.acrs);
 	save_fp_ctl(&current->thread.fp_regs.fpc);
-#ifdef CONFIG_64BIT
 	if (current->thread.vxrs) {
 		int i;
 
@@ -115,7 +114,6 @@ static void store_sigregs(void)
 			current->thread.fp_regs.fprs[i] =
 				*(freg_t *)(current->thread.vxrs + i);
 	} else
-#endif
 		save_fp_regs(current->thread.fp_regs.fprs);
 }
 
@@ -124,7 +122,6 @@ static void load_sigregs(void)
 {
 	restore_access_regs(current->thread.acrs);
 	/* restore_fp_ctl is done in restore_sigregs */
-#ifdef CONFIG_64BIT
 	if (current->thread.vxrs) {
 		int i;
 
@@ -133,7 +130,6 @@ static void load_sigregs(void)
 				current->thread.fp_regs.fprs[i];
 		restore_vx_regs(current->thread.vxrs);
 	} else
-#endif
 		restore_fp_regs(current->thread.fp_regs.fprs);
 }
 
@@ -200,7 +196,6 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 static int save_sigregs_ext(struct pt_regs *regs,
 			    _sigregs_ext __user *sregs_ext)
 {
-#ifdef CONFIG_64BIT
 	__u64 vxrs[__NUM_VXRS_LOW];
 	int i;
 
@@ -215,14 +210,12 @@ static int save_sigregs_ext(struct pt_regs *regs,
 				   sizeof(sregs_ext->vxrs_high)))
 			return -EFAULT;
 	}
-#endif
 	return 0;
 }
 
 static int restore_sigregs_ext(struct pt_regs *regs,
 			       _sigregs_ext __user *sregs_ext)
 {
-#ifdef CONFIG_64BIT
 	__u64 vxrs[__NUM_VXRS_LOW];
 	int i;
 
@@ -237,7 +230,6 @@ static int restore_sigregs_ext(struct pt_regs *regs,
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
 			*((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i];
 	}
-#endif
 	return 0;
 }
 
@@ -309,16 +301,6 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
 	return (void __user *)((sp - frame_size) & -8ul);
 }
 
-static inline int map_signal(int sig)
-{
-	if (current_thread_info()->exec_domain
-	    && current_thread_info()->exec_domain->signal_invmap
-	    && sig < 32)
-		return current_thread_info()->exec_domain->signal_invmap[sig];
-	else
-		return sig;
-}
-
 static int setup_frame(int sig, struct k_sigaction *ka,
 		       sigset_t *set, struct pt_regs * regs)
 {
@@ -386,7 +368,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 		(regs->psw.mask & ~PSW_MASK_ASC);
 	regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
 
-	regs->gprs[2] = map_signal(sig);
+	regs->gprs[2] = sig;
 	regs->gprs[3] = (unsigned long) &frame->sc;
 
 	/* We forgot to include these in the sigcontext.
@@ -416,13 +398,11 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	 * included in the signal frame on a 31-bit system.
 	 */
 	uc_flags = 0;
-#ifdef CONFIG_64BIT
 	if (MACHINE_HAS_VX) {
 		frame_size += sizeof(_sigregs_ext);
 		if (current->thread.vxrs)
 			uc_flags |= UC_VXRS;
 	}
-#endif
 	frame = get_sigframe(&ksig->ka, regs, frame_size);
 	if (frame == (void __user *) -1UL)
 		return -EFAULT;
@@ -468,7 +448,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 		(regs->psw.mask & ~PSW_MASK_ASC);
 	regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler | PSW_ADDR_AMODE;
 
-	regs->gprs[2] = map_signal(ksig->sig);
+	regs->gprs[2] = ksig->sig;
 	regs->gprs[3] = (unsigned long) &frame->info;
 	regs->gprs[4] = (unsigned long) &frame->uc;
 	regs->gprs[5] = task_thread_info(current)->last_break;
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index db8f1115a3bf..efd2c1968000 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -198,19 +198,11 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
 	lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET;
 	lc->cpu_nr = cpu;
 	lc->spinlock_lockval = arch_spin_lockval(cpu);
-#ifndef CONFIG_64BIT
-	if (MACHINE_HAS_IEEE) {
-		lc->extended_save_area_addr = get_zeroed_page(GFP_KERNEL);
-		if (!lc->extended_save_area_addr)
-			goto out;
-	}
-#else
 	if (MACHINE_HAS_VX)
 		lc->vector_save_area_addr =
 			(unsigned long) &lc->vector_save_area;
 	if (vdso_alloc_per_cpu(lc))
 		goto out;
-#endif
 	lowcore_ptr[cpu] = lc;
 	pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc);
 	return 0;
@@ -229,16 +221,7 @@ static void pcpu_free_lowcore(struct pcpu *pcpu)
 {
 	pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
 	lowcore_ptr[pcpu - pcpu_devices] = NULL;
-#ifndef CONFIG_64BIT
-	if (MACHINE_HAS_IEEE) {
-		struct _lowcore *lc = pcpu->lowcore;
-
-		free_page((unsigned long) lc->extended_save_area_addr);
-		lc->extended_save_area_addr = 0;
-	}
-#else
 	vdso_free_per_cpu(pcpu->lowcore);
-#endif
 	if (pcpu == &pcpu_devices[0])
 		return;
 	free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET);
@@ -492,22 +475,6 @@ void arch_send_call_function_single_ipi(int cpu)
 	pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
 }
 
-#ifndef CONFIG_64BIT
-/*
- * this function sends a 'purge tlb' signal to another CPU.
- */
-static void smp_ptlb_callback(void *info)
-{
-	__tlb_flush_local();
-}
-
-void smp_ptlb_all(void)
-{
-	on_each_cpu(smp_ptlb_callback, NULL, 1);
-}
-EXPORT_SYMBOL(smp_ptlb_all);
-#endif /* ! CONFIG_64BIT */
-
 /*
  * this function sends a 'reschedule' IPI to another CPU.
  * it goes straight through and wastes no time serializing
@@ -851,7 +818,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 	pcpu_prepare_secondary(pcpu, cpu);
 	pcpu_attach_task(pcpu, tidle);
 	pcpu_start_fn(pcpu, smp_start_secondary, NULL);
-	while (!cpu_online(cpu))
+	/* Wait until cpu puts itself in the online & active maps */
+	while (!cpu_online(cpu) || !cpu_active(cpu))
 		cpu_relax();
 	return 0;
 }
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
index 1c4c5accd220..d3236c9e226b 100644
--- a/arch/s390/kernel/suspend.c
+++ b/arch/s390/kernel/suspend.c
@@ -138,6 +138,8 @@ int pfn_is_nosave(unsigned long pfn)
 {
 	unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin));
 	unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end));
+	unsigned long eshared_pfn = PFN_DOWN(__pa(&_eshared)) - 1;
+	unsigned long stext_pfn = PFN_DOWN(__pa(&_stext));
 
 	/* Always save lowcore pages (LC protection might be enabled). */
 	if (pfn <= LC_PAGES)
@@ -145,6 +147,8 @@ int pfn_is_nosave(unsigned long pfn)
 	if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn)
 		return 1;
 	/* Skip memory holes and read-only pages (NSS, DCSS, ...). */
+	if (pfn >= stext_pfn && pfn <= eshared_pfn)
+		return ipl_info.type == IPL_TYPE_NSS ? 1 : 0;
 	if (tprot(PFN_PHYS(pfn)))
 		return 1;
 	return 0;
diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp.S
index 6b09fdffbd2f..ca6294645dd3 100644
--- a/arch/s390/kernel/swsusp_asm64.S
+++ b/arch/s390/kernel/swsusp.S
@@ -177,6 +177,17 @@ restart_entry:
 	lhi	%r1,1
 	sigp	%r1,%r0,SIGP_SET_ARCHITECTURE
 	sam64
+#ifdef CONFIG_SMP
+	larl	%r1,smp_cpu_mt_shift
+	icm	%r1,15,0(%r1)
+	jz	smt_done
+	llgfr	%r1,%r1
+smt_loop:
+	sigp	%r1,%r0,SIGP_SET_MULTI_THREADING
+	brc	8,smt_done			/* accepted */
+	brc	2,smt_loop			/* busy, try again */
+smt_done:
+#endif
 	larl	%r1,.Lnew_pgm_check_psw
 	lpswe	0(%r1)
 pgm_check_entry:
diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c
index 23eb222c1658..f145490cce54 100644
--- a/arch/s390/kernel/sys_s390.c
+++ b/arch/s390/kernel/sys_s390.c
@@ -76,7 +76,6 @@ SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second,
 	return sys_ipc(call, first, second, third, ptr, third);
 }
 
-#ifdef CONFIG_64BIT
 SYSCALL_DEFINE1(s390_personality, unsigned int, personality)
 {
 	unsigned int ret;
@@ -90,51 +89,3 @@ SYSCALL_DEFINE1(s390_personality, unsigned int, personality)
 
 	return ret;
 }
-#endif /* CONFIG_64BIT */
-
-/*
- * Wrapper function for sys_fadvise64/fadvise64_64
- */
-#ifndef CONFIG_64BIT
-
-SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, offset_high, u32, offset_low,
-		size_t, len, int, advice)
-{
-	return sys_fadvise64(fd, (u64) offset_high << 32 | offset_low,
-			len, advice);
-}
-
-struct fadvise64_64_args {
-	int fd;
-	long long offset;
-	long long len;
-	int advice;
-};
-
-SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args)
-{
-	struct fadvise64_64_args a;
-
-	if ( copy_from_user(&a, args, sizeof(a)) )
-		return -EFAULT;
-	return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice);
-}
-
-/*
- * This is a wrapper to call sys_fallocate(). For 31 bit s390 the last
- * 64 bit argument "len" is split into the upper and lower 32 bits. The
- * system call wrapper in the user space loads the value to %r6/%r7.
- * The code in entry.S keeps the values in %r2 - %r6 where they are and
- * stores %r7 to 96(%r15). But the standard C linkage requires that
- * the whole 64 bit value for len is stored on the stack and doesn't
- * use %r6 at all. So s390_fallocate has to convert the arguments from
- *   %r2: fd, %r3: mode, %r4/%r5: offset, %r6/96(%r15)-99(%r15): len
- * to
- *   %r2: fd, %r3: mode, %r4/%r5: offset, 96(%r15)-103(%r15): len
- */
-SYSCALL_DEFINE5(s390_fallocate, int, fd, int, mode, loff_t, offset,
-			       u32, len_high, u32, len_low)
-{
-	return sys_fallocate(fd, mode, offset, ((u64)len_high << 32) | len_low);
-}
-#endif
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 939ec474b1dd..1acad02681c4 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -1,365 +1,365 @@
 /*
  * definitions for sys_call_table, each line represents an
- * entry in the table in the form 
- * SYSCALL(31 bit syscall, 64 bit syscall, 31 bit emulated syscall)
+ * entry in the table in the form
+ * SYSCALL(64 bit syscall, 31 bit emulated syscall)
  *
- * this file is meant to be included from entry.S and entry64.S
+ * this file is meant to be included from entry.S
  */
 
-#define NI_SYSCALL SYSCALL(sys_ni_syscall,sys_ni_syscall,sys_ni_syscall)
+#define NI_SYSCALL SYSCALL(sys_ni_syscall,sys_ni_syscall)
 
-NI_SYSCALL							/* 0 */
-SYSCALL(sys_exit,sys_exit,compat_sys_exit)
-SYSCALL(sys_fork,sys_fork,sys_fork)
-SYSCALL(sys_read,sys_read,compat_sys_s390_read)
-SYSCALL(sys_write,sys_write,compat_sys_s390_write)
-SYSCALL(sys_open,sys_open,compat_sys_open)			/* 5 */
-SYSCALL(sys_close,sys_close,compat_sys_close)
-SYSCALL(sys_restart_syscall,sys_restart_syscall,sys_restart_syscall)
-SYSCALL(sys_creat,sys_creat,compat_sys_creat)
-SYSCALL(sys_link,sys_link,compat_sys_link)
-SYSCALL(sys_unlink,sys_unlink,compat_sys_unlink)		/* 10 */
-SYSCALL(sys_execve,sys_execve,compat_sys_execve)
-SYSCALL(sys_chdir,sys_chdir,compat_sys_chdir)
-SYSCALL(sys_time,sys_ni_syscall,compat_sys_time)		/* old time syscall */
-SYSCALL(sys_mknod,sys_mknod,compat_sys_mknod)
-SYSCALL(sys_chmod,sys_chmod,compat_sys_chmod)			/* 15 */
-SYSCALL(sys_lchown16,sys_ni_syscall,compat_sys_s390_lchown16)	/* old lchown16 syscall*/
-NI_SYSCALL							/* old break syscall holder */
-NI_SYSCALL							/* old stat syscall holder */
-SYSCALL(sys_lseek,sys_lseek,compat_sys_lseek)
-SYSCALL(sys_getpid,sys_getpid,sys_getpid)			/* 20 */
-SYSCALL(sys_mount,sys_mount,compat_sys_mount)
-SYSCALL(sys_oldumount,sys_oldumount,compat_sys_oldumount)
-SYSCALL(sys_setuid16,sys_ni_syscall,compat_sys_s390_setuid16)	/* old setuid16 syscall*/
-SYSCALL(sys_getuid16,sys_ni_syscall,compat_sys_s390_getuid16)	/* old getuid16 syscall*/
-SYSCALL(sys_stime,sys_ni_syscall,compat_sys_stime)		/* 25 old stime syscall */
-SYSCALL(sys_ptrace,sys_ptrace,compat_sys_ptrace)
-SYSCALL(sys_alarm,sys_alarm,compat_sys_alarm)
-NI_SYSCALL							/* old fstat syscall */
-SYSCALL(sys_pause,sys_pause,sys_pause)
-SYSCALL(sys_utime,sys_utime,compat_sys_utime)		/* 30 */
-NI_SYSCALL							/* old stty syscall */
-NI_SYSCALL							/* old gtty syscall */
-SYSCALL(sys_access,sys_access,compat_sys_access)
-SYSCALL(sys_nice,sys_nice,compat_sys_nice)
-NI_SYSCALL							/* 35 old ftime syscall */
-SYSCALL(sys_sync,sys_sync,sys_sync)
-SYSCALL(sys_kill,sys_kill,compat_sys_kill)
-SYSCALL(sys_rename,sys_rename,compat_sys_rename)
-SYSCALL(sys_mkdir,sys_mkdir,compat_sys_mkdir)
-SYSCALL(sys_rmdir,sys_rmdir,compat_sys_rmdir)		/* 40 */
-SYSCALL(sys_dup,sys_dup,compat_sys_dup)
-SYSCALL(sys_pipe,sys_pipe,compat_sys_pipe)
-SYSCALL(sys_times,sys_times,compat_sys_times)
-NI_SYSCALL							/* old prof syscall */
-SYSCALL(sys_brk,sys_brk,compat_sys_brk)				/* 45 */
-SYSCALL(sys_setgid16,sys_ni_syscall,compat_sys_s390_setgid16)	/* old setgid16 syscall*/
-SYSCALL(sys_getgid16,sys_ni_syscall,compat_sys_s390_getgid16)	/* old getgid16 syscall*/
-SYSCALL(sys_signal,sys_signal,compat_sys_signal)
-SYSCALL(sys_geteuid16,sys_ni_syscall,compat_sys_s390_geteuid16)	/* old geteuid16 syscall */
-SYSCALL(sys_getegid16,sys_ni_syscall,compat_sys_s390_getegid16)	/* 50 old getegid16 syscall */
-SYSCALL(sys_acct,sys_acct,compat_sys_acct)
-SYSCALL(sys_umount,sys_umount,compat_sys_umount)
-NI_SYSCALL							/* old lock syscall */
-SYSCALL(sys_ioctl,sys_ioctl,compat_sys_ioctl)
-SYSCALL(sys_fcntl,sys_fcntl,compat_sys_fcntl)		/* 55 */
-NI_SYSCALL							/* intel mpx syscall */
-SYSCALL(sys_setpgid,sys_setpgid,compat_sys_setpgid)
-NI_SYSCALL							/* old ulimit syscall */
-NI_SYSCALL							/* old uname syscall */
-SYSCALL(sys_umask,sys_umask,compat_sys_umask)			/* 60 */
-SYSCALL(sys_chroot,sys_chroot,compat_sys_chroot)
-SYSCALL(sys_ustat,sys_ustat,compat_sys_ustat)
-SYSCALL(sys_dup2,sys_dup2,compat_sys_dup2)
-SYSCALL(sys_getppid,sys_getppid,sys_getppid)
-SYSCALL(sys_getpgrp,sys_getpgrp,sys_getpgrp)			/* 65 */
-SYSCALL(sys_setsid,sys_setsid,sys_setsid)
-SYSCALL(sys_sigaction,sys_sigaction,compat_sys_sigaction)
-NI_SYSCALL							/* old sgetmask syscall*/
-NI_SYSCALL							/* old ssetmask syscall*/
-SYSCALL(sys_setreuid16,sys_ni_syscall,compat_sys_s390_setreuid16) /* old setreuid16 syscall */
-SYSCALL(sys_setregid16,sys_ni_syscall,compat_sys_s390_setregid16) /* old setregid16 syscall */
-SYSCALL(sys_sigsuspend,sys_sigsuspend,compat_sys_sigsuspend)
-SYSCALL(sys_sigpending,sys_sigpending,compat_sys_sigpending)
-SYSCALL(sys_sethostname,sys_sethostname,compat_sys_sethostname)
-SYSCALL(sys_setrlimit,sys_setrlimit,compat_sys_setrlimit)	/* 75 */
-SYSCALL(sys_old_getrlimit,sys_getrlimit,compat_sys_old_getrlimit)
-SYSCALL(sys_getrusage,sys_getrusage,compat_sys_getrusage)
-SYSCALL(sys_gettimeofday,sys_gettimeofday,compat_sys_gettimeofday)
-SYSCALL(sys_settimeofday,sys_settimeofday,compat_sys_settimeofday)
-SYSCALL(sys_getgroups16,sys_ni_syscall,compat_sys_s390_getgroups16)	/* 80 old getgroups16 syscall */
-SYSCALL(sys_setgroups16,sys_ni_syscall,compat_sys_s390_setgroups16)	/* old setgroups16 syscall */
-NI_SYSCALL							/* old select syscall */
-SYSCALL(sys_symlink,sys_symlink,compat_sys_symlink)
-NI_SYSCALL							/* old lstat syscall */
-SYSCALL(sys_readlink,sys_readlink,compat_sys_readlink)		/* 85 */
-SYSCALL(sys_uselib,sys_uselib,compat_sys_uselib)
-SYSCALL(sys_swapon,sys_swapon,compat_sys_swapon)
-SYSCALL(sys_reboot,sys_reboot,compat_sys_reboot)
-SYSCALL(sys_ni_syscall,sys_ni_syscall,compat_sys_old_readdir)	/* old readdir syscall */
-SYSCALL(sys_old_mmap,sys_old_mmap,compat_sys_s390_old_mmap)	/* 90 */
-SYSCALL(sys_munmap,sys_munmap,compat_sys_munmap)
-SYSCALL(sys_truncate,sys_truncate,compat_sys_truncate)
-SYSCALL(sys_ftruncate,sys_ftruncate,compat_sys_ftruncate)
-SYSCALL(sys_fchmod,sys_fchmod,compat_sys_fchmod)
-SYSCALL(sys_fchown16,sys_ni_syscall,compat_sys_s390_fchown16)	/* 95 old fchown16 syscall*/
-SYSCALL(sys_getpriority,sys_getpriority,compat_sys_getpriority)
-SYSCALL(sys_setpriority,sys_setpriority,compat_sys_setpriority)
-NI_SYSCALL							/* old profil syscall */
-SYSCALL(sys_statfs,sys_statfs,compat_sys_statfs)
-SYSCALL(sys_fstatfs,sys_fstatfs,compat_sys_fstatfs)	/* 100 */
-NI_SYSCALL							/* ioperm for i386 */
-SYSCALL(sys_socketcall,sys_socketcall,compat_sys_socketcall)
-SYSCALL(sys_syslog,sys_syslog,compat_sys_syslog)
-SYSCALL(sys_setitimer,sys_setitimer,compat_sys_setitimer)
-SYSCALL(sys_getitimer,sys_getitimer,compat_sys_getitimer)	/* 105 */
-SYSCALL(sys_newstat,sys_newstat,compat_sys_newstat)
-SYSCALL(sys_newlstat,sys_newlstat,compat_sys_newlstat)
-SYSCALL(sys_newfstat,sys_newfstat,compat_sys_newfstat)
-NI_SYSCALL							/* old uname syscall */
-SYSCALL(sys_lookup_dcookie,sys_lookup_dcookie,compat_sys_lookup_dcookie)	/* 110 */
-SYSCALL(sys_vhangup,sys_vhangup,sys_vhangup)
-NI_SYSCALL							/* old "idle" system call */
-NI_SYSCALL							/* vm86old for i386 */
-SYSCALL(sys_wait4,sys_wait4,compat_sys_wait4)
-SYSCALL(sys_swapoff,sys_swapoff,compat_sys_swapoff)		/* 115 */
-SYSCALL(sys_sysinfo,sys_sysinfo,compat_sys_sysinfo)
-SYSCALL(sys_s390_ipc,sys_s390_ipc,compat_sys_s390_ipc)
-SYSCALL(sys_fsync,sys_fsync,compat_sys_fsync)
-SYSCALL(sys_sigreturn,sys_sigreturn,compat_sys_sigreturn)
-SYSCALL(sys_clone,sys_clone,compat_sys_clone)			/* 120 */
-SYSCALL(sys_setdomainname,sys_setdomainname,compat_sys_setdomainname)
-SYSCALL(sys_newuname,sys_newuname,compat_sys_newuname)
-NI_SYSCALL							/* modify_ldt for i386 */
-SYSCALL(sys_adjtimex,sys_adjtimex,compat_sys_adjtimex)
-SYSCALL(sys_mprotect,sys_mprotect,compat_sys_mprotect)		/* 125 */
-SYSCALL(sys_sigprocmask,sys_sigprocmask,compat_sys_sigprocmask)
-NI_SYSCALL							/* old "create module" */
-SYSCALL(sys_init_module,sys_init_module,compat_sys_init_module)
-SYSCALL(sys_delete_module,sys_delete_module,compat_sys_delete_module)
-NI_SYSCALL							/* 130: old get_kernel_syms */
-SYSCALL(sys_quotactl,sys_quotactl,compat_sys_quotactl)
-SYSCALL(sys_getpgid,sys_getpgid,compat_sys_getpgid)
-SYSCALL(sys_fchdir,sys_fchdir,compat_sys_fchdir)
-SYSCALL(sys_bdflush,sys_bdflush,compat_sys_bdflush)
-SYSCALL(sys_sysfs,sys_sysfs,compat_sys_sysfs)		/* 135 */
-SYSCALL(sys_personality,sys_s390_personality,compat_sys_s390_personality)
-NI_SYSCALL							/* for afs_syscall */
-SYSCALL(sys_setfsuid16,sys_ni_syscall,compat_sys_s390_setfsuid16)	/* old setfsuid16 syscall */
-SYSCALL(sys_setfsgid16,sys_ni_syscall,compat_sys_s390_setfsgid16)	/* old setfsgid16 syscall */
-SYSCALL(sys_llseek,sys_llseek,compat_sys_llseek)		/* 140 */
-SYSCALL(sys_getdents,sys_getdents,compat_sys_getdents)
-SYSCALL(sys_select,sys_select,compat_sys_select)
-SYSCALL(sys_flock,sys_flock,compat_sys_flock)
-SYSCALL(sys_msync,sys_msync,compat_sys_msync)
-SYSCALL(sys_readv,sys_readv,compat_sys_readv)		/* 145 */
-SYSCALL(sys_writev,sys_writev,compat_sys_writev)
-SYSCALL(sys_getsid,sys_getsid,compat_sys_getsid)
-SYSCALL(sys_fdatasync,sys_fdatasync,compat_sys_fdatasync)
-SYSCALL(sys_sysctl,sys_sysctl,compat_sys_sysctl)
-SYSCALL(sys_mlock,sys_mlock,compat_sys_mlock)			/* 150 */
-SYSCALL(sys_munlock,sys_munlock,compat_sys_munlock)
-SYSCALL(sys_mlockall,sys_mlockall,compat_sys_mlockall)
-SYSCALL(sys_munlockall,sys_munlockall,sys_munlockall)
-SYSCALL(sys_sched_setparam,sys_sched_setparam,compat_sys_sched_setparam)
-SYSCALL(sys_sched_getparam,sys_sched_getparam,compat_sys_sched_getparam)	/* 155 */
-SYSCALL(sys_sched_setscheduler,sys_sched_setscheduler,compat_sys_sched_setscheduler)
-SYSCALL(sys_sched_getscheduler,sys_sched_getscheduler,compat_sys_sched_getscheduler)
-SYSCALL(sys_sched_yield,sys_sched_yield,sys_sched_yield)
-SYSCALL(sys_sched_get_priority_max,sys_sched_get_priority_max,compat_sys_sched_get_priority_max)
-SYSCALL(sys_sched_get_priority_min,sys_sched_get_priority_min,compat_sys_sched_get_priority_min)	/* 160 */
-SYSCALL(sys_sched_rr_get_interval,sys_sched_rr_get_interval,compat_sys_sched_rr_get_interval)
-SYSCALL(sys_nanosleep,sys_nanosleep,compat_sys_nanosleep)
-SYSCALL(sys_mremap,sys_mremap,compat_sys_mremap)
-SYSCALL(sys_setresuid16,sys_ni_syscall,compat_sys_s390_setresuid16)	/* old setresuid16 syscall */
-SYSCALL(sys_getresuid16,sys_ni_syscall,compat_sys_s390_getresuid16)	/* 165 old getresuid16 syscall */
-NI_SYSCALL							/* for vm86 */
-NI_SYSCALL							/* old sys_query_module */
-SYSCALL(sys_poll,sys_poll,compat_sys_poll)
-NI_SYSCALL							/* old nfsservctl */
-SYSCALL(sys_setresgid16,sys_ni_syscall,compat_sys_s390_setresgid16)	/* 170 old setresgid16 syscall */
-SYSCALL(sys_getresgid16,sys_ni_syscall,compat_sys_s390_getresgid16)	/* old getresgid16 syscall */
-SYSCALL(sys_prctl,sys_prctl,compat_sys_prctl)
-SYSCALL(sys_rt_sigreturn,sys_rt_sigreturn,compat_sys_rt_sigreturn)
-SYSCALL(sys_rt_sigaction,sys_rt_sigaction,compat_sys_rt_sigaction)
-SYSCALL(sys_rt_sigprocmask,sys_rt_sigprocmask,compat_sys_rt_sigprocmask) /* 175 */
-SYSCALL(sys_rt_sigpending,sys_rt_sigpending,compat_sys_rt_sigpending)
-SYSCALL(sys_rt_sigtimedwait,sys_rt_sigtimedwait,compat_sys_rt_sigtimedwait)
-SYSCALL(sys_rt_sigqueueinfo,sys_rt_sigqueueinfo,compat_sys_rt_sigqueueinfo)
-SYSCALL(sys_rt_sigsuspend,sys_rt_sigsuspend,compat_sys_rt_sigsuspend)
-SYSCALL(sys_pread64,sys_pread64,compat_sys_s390_pread64)		/* 180 */
-SYSCALL(sys_pwrite64,sys_pwrite64,compat_sys_s390_pwrite64)
-SYSCALL(sys_chown16,sys_ni_syscall,compat_sys_s390_chown16)	/* old chown16 syscall */
-SYSCALL(sys_getcwd,sys_getcwd,compat_sys_getcwd)
-SYSCALL(sys_capget,sys_capget,compat_sys_capget)
-SYSCALL(sys_capset,sys_capset,compat_sys_capset)		/* 185 */
-SYSCALL(sys_sigaltstack,sys_sigaltstack,compat_sys_sigaltstack)
-SYSCALL(sys_sendfile,sys_sendfile64,compat_sys_sendfile)
-NI_SYSCALL							/* streams1 */
-NI_SYSCALL							/* streams2 */
-SYSCALL(sys_vfork,sys_vfork,sys_vfork)				/* 190 */
-SYSCALL(sys_getrlimit,sys_getrlimit,compat_sys_getrlimit)
-SYSCALL(sys_mmap2,sys_mmap2,compat_sys_s390_mmap2)
-SYSCALL(sys_truncate64,sys_ni_syscall,compat_sys_s390_truncate64)
-SYSCALL(sys_ftruncate64,sys_ni_syscall,compat_sys_s390_ftruncate64)
-SYSCALL(sys_stat64,sys_ni_syscall,compat_sys_s390_stat64)		/* 195 */
-SYSCALL(sys_lstat64,sys_ni_syscall,compat_sys_s390_lstat64)
-SYSCALL(sys_fstat64,sys_ni_syscall,compat_sys_s390_fstat64)
-SYSCALL(sys_lchown,sys_lchown,compat_sys_lchown)
-SYSCALL(sys_getuid,sys_getuid,sys_getuid)
-SYSCALL(sys_getgid,sys_getgid,sys_getgid)			/* 200 */
-SYSCALL(sys_geteuid,sys_geteuid,sys_geteuid)
-SYSCALL(sys_getegid,sys_getegid,sys_getegid)
-SYSCALL(sys_setreuid,sys_setreuid,compat_sys_setreuid)
-SYSCALL(sys_setregid,sys_setregid,compat_sys_setregid)
-SYSCALL(sys_getgroups,sys_getgroups,compat_sys_getgroups)	/* 205 */
-SYSCALL(sys_setgroups,sys_setgroups,compat_sys_setgroups)
-SYSCALL(sys_fchown,sys_fchown,compat_sys_fchown)
-SYSCALL(sys_setresuid,sys_setresuid,compat_sys_setresuid)
-SYSCALL(sys_getresuid,sys_getresuid,compat_sys_getresuid)
-SYSCALL(sys_setresgid,sys_setresgid,compat_sys_setresgid)	/* 210 */
-SYSCALL(sys_getresgid,sys_getresgid,compat_sys_getresgid)
-SYSCALL(sys_chown,sys_chown,compat_sys_chown)
-SYSCALL(sys_setuid,sys_setuid,compat_sys_setuid)
-SYSCALL(sys_setgid,sys_setgid,compat_sys_setgid)
-SYSCALL(sys_setfsuid,sys_setfsuid,compat_sys_setfsuid)	/* 215 */
-SYSCALL(sys_setfsgid,sys_setfsgid,compat_sys_setfsgid)
-SYSCALL(sys_pivot_root,sys_pivot_root,compat_sys_pivot_root)
-SYSCALL(sys_mincore,sys_mincore,compat_sys_mincore)
-SYSCALL(sys_madvise,sys_madvise,compat_sys_madvise)
-SYSCALL(sys_getdents64,sys_getdents64,compat_sys_getdents64)	/* 220 */
-SYSCALL(sys_fcntl64,sys_ni_syscall,compat_sys_fcntl64)
-SYSCALL(sys_readahead,sys_readahead,compat_sys_s390_readahead)
-SYSCALL(sys_sendfile64,sys_ni_syscall,compat_sys_sendfile64)
-SYSCALL(sys_setxattr,sys_setxattr,compat_sys_setxattr)
-SYSCALL(sys_lsetxattr,sys_lsetxattr,compat_sys_lsetxattr)	/* 225 */
-SYSCALL(sys_fsetxattr,sys_fsetxattr,compat_sys_fsetxattr)
-SYSCALL(sys_getxattr,sys_getxattr,compat_sys_getxattr)
-SYSCALL(sys_lgetxattr,sys_lgetxattr,compat_sys_lgetxattr)
-SYSCALL(sys_fgetxattr,sys_fgetxattr,compat_sys_fgetxattr)
-SYSCALL(sys_listxattr,sys_listxattr,compat_sys_listxattr)	/* 230 */
-SYSCALL(sys_llistxattr,sys_llistxattr,compat_sys_llistxattr)
-SYSCALL(sys_flistxattr,sys_flistxattr,compat_sys_flistxattr)
-SYSCALL(sys_removexattr,sys_removexattr,compat_sys_removexattr)
-SYSCALL(sys_lremovexattr,sys_lremovexattr,compat_sys_lremovexattr)
-SYSCALL(sys_fremovexattr,sys_fremovexattr,compat_sys_fremovexattr)	/* 235 */
-SYSCALL(sys_gettid,sys_gettid,sys_gettid)
-SYSCALL(sys_tkill,sys_tkill,compat_sys_tkill)
-SYSCALL(sys_futex,sys_futex,compat_sys_futex)
-SYSCALL(sys_sched_setaffinity,sys_sched_setaffinity,compat_sys_sched_setaffinity)
-SYSCALL(sys_sched_getaffinity,sys_sched_getaffinity,compat_sys_sched_getaffinity)	/* 240 */
-SYSCALL(sys_tgkill,sys_tgkill,compat_sys_tgkill)
-NI_SYSCALL							/* reserved for TUX */
-SYSCALL(sys_io_setup,sys_io_setup,compat_sys_io_setup)
-SYSCALL(sys_io_destroy,sys_io_destroy,compat_sys_io_destroy)
-SYSCALL(sys_io_getevents,sys_io_getevents,compat_sys_io_getevents)	/* 245 */
-SYSCALL(sys_io_submit,sys_io_submit,compat_sys_io_submit)
-SYSCALL(sys_io_cancel,sys_io_cancel,compat_sys_io_cancel)
-SYSCALL(sys_exit_group,sys_exit_group,compat_sys_exit_group)
-SYSCALL(sys_epoll_create,sys_epoll_create,compat_sys_epoll_create)
-SYSCALL(sys_epoll_ctl,sys_epoll_ctl,compat_sys_epoll_ctl)	/* 250 */
-SYSCALL(sys_epoll_wait,sys_epoll_wait,compat_sys_epoll_wait)
-SYSCALL(sys_set_tid_address,sys_set_tid_address,compat_sys_set_tid_address)
-SYSCALL(sys_s390_fadvise64,sys_fadvise64_64,compat_sys_s390_fadvise64)
-SYSCALL(sys_timer_create,sys_timer_create,compat_sys_timer_create)
-SYSCALL(sys_timer_settime,sys_timer_settime,compat_sys_timer_settime)	/* 255 */
-SYSCALL(sys_timer_gettime,sys_timer_gettime,compat_sys_timer_gettime)
-SYSCALL(sys_timer_getoverrun,sys_timer_getoverrun,compat_sys_timer_getoverrun)
-SYSCALL(sys_timer_delete,sys_timer_delete,compat_sys_timer_delete)
-SYSCALL(sys_clock_settime,sys_clock_settime,compat_sys_clock_settime)
-SYSCALL(sys_clock_gettime,sys_clock_gettime,compat_sys_clock_gettime)	/* 260 */
-SYSCALL(sys_clock_getres,sys_clock_getres,compat_sys_clock_getres)
-SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,compat_sys_clock_nanosleep)
-NI_SYSCALL							/* reserved for vserver */
-SYSCALL(sys_s390_fadvise64_64,sys_ni_syscall,compat_sys_s390_fadvise64_64)
-SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64)
-SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64)
-SYSCALL(sys_remap_file_pages,sys_remap_file_pages,compat_sys_remap_file_pages)
-NI_SYSCALL							/* 268 sys_mbind */
-NI_SYSCALL							/* 269 sys_get_mempolicy */
-NI_SYSCALL							/* 270 sys_set_mempolicy */
-SYSCALL(sys_mq_open,sys_mq_open,compat_sys_mq_open)
-SYSCALL(sys_mq_unlink,sys_mq_unlink,compat_sys_mq_unlink)
-SYSCALL(sys_mq_timedsend,sys_mq_timedsend,compat_sys_mq_timedsend)
-SYSCALL(sys_mq_timedreceive,sys_mq_timedreceive,compat_sys_mq_timedreceive)
-SYSCALL(sys_mq_notify,sys_mq_notify,compat_sys_mq_notify) /* 275 */
-SYSCALL(sys_mq_getsetattr,sys_mq_getsetattr,compat_sys_mq_getsetattr)
-SYSCALL(sys_kexec_load,sys_kexec_load,compat_sys_kexec_load)
-SYSCALL(sys_add_key,sys_add_key,compat_sys_add_key)
-SYSCALL(sys_request_key,sys_request_key,compat_sys_request_key)
-SYSCALL(sys_keyctl,sys_keyctl,compat_sys_keyctl)		/* 280 */
-SYSCALL(sys_waitid,sys_waitid,compat_sys_waitid)
-SYSCALL(sys_ioprio_set,sys_ioprio_set,compat_sys_ioprio_set)
-SYSCALL(sys_ioprio_get,sys_ioprio_get,compat_sys_ioprio_get)
-SYSCALL(sys_inotify_init,sys_inotify_init,sys_inotify_init)
-SYSCALL(sys_inotify_add_watch,sys_inotify_add_watch,compat_sys_inotify_add_watch)	/* 285 */
-SYSCALL(sys_inotify_rm_watch,sys_inotify_rm_watch,compat_sys_inotify_rm_watch)
-NI_SYSCALL							/* 287 sys_migrate_pages */
-SYSCALL(sys_openat,sys_openat,compat_sys_openat)
-SYSCALL(sys_mkdirat,sys_mkdirat,compat_sys_mkdirat)
-SYSCALL(sys_mknodat,sys_mknodat,compat_sys_mknodat)	/* 290 */
-SYSCALL(sys_fchownat,sys_fchownat,compat_sys_fchownat)
-SYSCALL(sys_futimesat,sys_futimesat,compat_sys_futimesat)
-SYSCALL(sys_fstatat64,sys_newfstatat,compat_sys_s390_fstatat64)
-SYSCALL(sys_unlinkat,sys_unlinkat,compat_sys_unlinkat)
-SYSCALL(sys_renameat,sys_renameat,compat_sys_renameat)	/* 295 */
-SYSCALL(sys_linkat,sys_linkat,compat_sys_linkat)
-SYSCALL(sys_symlinkat,sys_symlinkat,compat_sys_symlinkat)
-SYSCALL(sys_readlinkat,sys_readlinkat,compat_sys_readlinkat)
-SYSCALL(sys_fchmodat,sys_fchmodat,compat_sys_fchmodat)
-SYSCALL(sys_faccessat,sys_faccessat,compat_sys_faccessat)	/* 300 */
-SYSCALL(sys_pselect6,sys_pselect6,compat_sys_pselect6)
-SYSCALL(sys_ppoll,sys_ppoll,compat_sys_ppoll)
-SYSCALL(sys_unshare,sys_unshare,compat_sys_unshare)
-SYSCALL(sys_set_robust_list,sys_set_robust_list,compat_sys_set_robust_list)
-SYSCALL(sys_get_robust_list,sys_get_robust_list,compat_sys_get_robust_list)
-SYSCALL(sys_splice,sys_splice,compat_sys_splice)
-SYSCALL(sys_sync_file_range,sys_sync_file_range,compat_sys_s390_sync_file_range)
-SYSCALL(sys_tee,sys_tee,compat_sys_tee)
-SYSCALL(sys_vmsplice,sys_vmsplice,compat_sys_vmsplice)
-NI_SYSCALL							/* 310 sys_move_pages */
-SYSCALL(sys_getcpu,sys_getcpu,compat_sys_getcpu)
-SYSCALL(sys_epoll_pwait,sys_epoll_pwait,compat_sys_epoll_pwait)
-SYSCALL(sys_utimes,sys_utimes,compat_sys_utimes)
-SYSCALL(sys_s390_fallocate,sys_fallocate,compat_sys_s390_fallocate)
-SYSCALL(sys_utimensat,sys_utimensat,compat_sys_utimensat)	/* 315 */
-SYSCALL(sys_signalfd,sys_signalfd,compat_sys_signalfd)
+NI_SYSCALL						/* 0 */
+SYSCALL(sys_exit,compat_sys_exit)
+SYSCALL(sys_fork,sys_fork)
+SYSCALL(sys_read,compat_sys_s390_read)
+SYSCALL(sys_write,compat_sys_s390_write)
+SYSCALL(sys_open,compat_sys_open)			/* 5 */
+SYSCALL(sys_close,compat_sys_close)
+SYSCALL(sys_restart_syscall,sys_restart_syscall)
+SYSCALL(sys_creat,compat_sys_creat)
+SYSCALL(sys_link,compat_sys_link)
+SYSCALL(sys_unlink,compat_sys_unlink)			/* 10 */
+SYSCALL(sys_execve,compat_sys_execve)
+SYSCALL(sys_chdir,compat_sys_chdir)
+SYSCALL(sys_ni_syscall,compat_sys_time)			/* old time syscall */
+SYSCALL(sys_mknod,compat_sys_mknod)
+SYSCALL(sys_chmod,compat_sys_chmod)			/* 15 */
+SYSCALL(sys_ni_syscall,compat_sys_s390_lchown16)	/* old lchown16 syscall*/
+NI_SYSCALL						/* old break syscall holder */
+NI_SYSCALL						/* old stat syscall holder */
+SYSCALL(sys_lseek,compat_sys_lseek)
+SYSCALL(sys_getpid,sys_getpid)				/* 20 */
+SYSCALL(sys_mount,compat_sys_mount)
+SYSCALL(sys_oldumount,compat_sys_oldumount)
+SYSCALL(sys_ni_syscall,compat_sys_s390_setuid16)	/* old setuid16 syscall*/
+SYSCALL(sys_ni_syscall,compat_sys_s390_getuid16)	/* old getuid16 syscall*/
+SYSCALL(sys_ni_syscall,compat_sys_stime)		/* 25 old stime syscall */
+SYSCALL(sys_ptrace,compat_sys_ptrace)
+SYSCALL(sys_alarm,compat_sys_alarm)
+NI_SYSCALL						/* old fstat syscall */
+SYSCALL(sys_pause,sys_pause)
+SYSCALL(sys_utime,compat_sys_utime)			/* 30 */
+NI_SYSCALL						/* old stty syscall */
+NI_SYSCALL						/* old gtty syscall */
+SYSCALL(sys_access,compat_sys_access)
+SYSCALL(sys_nice,compat_sys_nice)
+NI_SYSCALL						/* 35 old ftime syscall */
+SYSCALL(sys_sync,sys_sync)
+SYSCALL(sys_kill,compat_sys_kill)
+SYSCALL(sys_rename,compat_sys_rename)
+SYSCALL(sys_mkdir,compat_sys_mkdir)
+SYSCALL(sys_rmdir,compat_sys_rmdir)			/* 40 */
+SYSCALL(sys_dup,compat_sys_dup)
+SYSCALL(sys_pipe,compat_sys_pipe)
+SYSCALL(sys_times,compat_sys_times)
+NI_SYSCALL						/* old prof syscall */
+SYSCALL(sys_brk,compat_sys_brk)				/* 45 */
+SYSCALL(sys_ni_syscall,compat_sys_s390_setgid16)	/* old setgid16 syscall*/
+SYSCALL(sys_ni_syscall,compat_sys_s390_getgid16)	/* old getgid16 syscall*/
+SYSCALL(sys_signal,compat_sys_signal)
+SYSCALL(sys_ni_syscall,compat_sys_s390_geteuid16)	/* old geteuid16 syscall */
+SYSCALL(sys_ni_syscall,compat_sys_s390_getegid16)	/* 50 old getegid16 syscall */
+SYSCALL(sys_acct,compat_sys_acct)
+SYSCALL(sys_umount,compat_sys_umount)
+NI_SYSCALL						/* old lock syscall */
+SYSCALL(sys_ioctl,compat_sys_ioctl)
+SYSCALL(sys_fcntl,compat_sys_fcntl)			/* 55 */
+NI_SYSCALL						/* intel mpx syscall */
+SYSCALL(sys_setpgid,compat_sys_setpgid)
+NI_SYSCALL						/* old ulimit syscall */
+NI_SYSCALL						/* old uname syscall */
+SYSCALL(sys_umask,compat_sys_umask)			/* 60 */
+SYSCALL(sys_chroot,compat_sys_chroot)
+SYSCALL(sys_ustat,compat_sys_ustat)
+SYSCALL(sys_dup2,compat_sys_dup2)
+SYSCALL(sys_getppid,sys_getppid)
+SYSCALL(sys_getpgrp,sys_getpgrp)			/* 65 */
+SYSCALL(sys_setsid,sys_setsid)
+SYSCALL(sys_sigaction,compat_sys_sigaction)
+NI_SYSCALL						/* old sgetmask syscall*/
+NI_SYSCALL						/* old ssetmask syscall*/
+SYSCALL(sys_ni_syscall,compat_sys_s390_setreuid16)	/* old setreuid16 syscall */
+SYSCALL(sys_ni_syscall,compat_sys_s390_setregid16)	/* old setregid16 syscall */
+SYSCALL(sys_sigsuspend,compat_sys_sigsuspend)
+SYSCALL(sys_sigpending,compat_sys_sigpending)
+SYSCALL(sys_sethostname,compat_sys_sethostname)
+SYSCALL(sys_setrlimit,compat_sys_setrlimit)		/* 75 */
+SYSCALL(sys_getrlimit,compat_sys_old_getrlimit)
+SYSCALL(sys_getrusage,compat_sys_getrusage)
+SYSCALL(sys_gettimeofday,compat_sys_gettimeofday)
+SYSCALL(sys_settimeofday,compat_sys_settimeofday)
+SYSCALL(sys_ni_syscall,compat_sys_s390_getgroups16)	/* 80 old getgroups16 syscall */
+SYSCALL(sys_ni_syscall,compat_sys_s390_setgroups16)	/* old setgroups16 syscall */
+NI_SYSCALL						/* old select syscall */
+SYSCALL(sys_symlink,compat_sys_symlink)
+NI_SYSCALL						/* old lstat syscall */
+SYSCALL(sys_readlink,compat_sys_readlink)		/* 85 */
+SYSCALL(sys_uselib,compat_sys_uselib)
+SYSCALL(sys_swapon,compat_sys_swapon)
+SYSCALL(sys_reboot,compat_sys_reboot)
+SYSCALL(sys_ni_syscall,compat_sys_old_readdir)		/* old readdir syscall */
+SYSCALL(sys_old_mmap,compat_sys_s390_old_mmap)		/* 90 */
+SYSCALL(sys_munmap,compat_sys_munmap)
+SYSCALL(sys_truncate,compat_sys_truncate)
+SYSCALL(sys_ftruncate,compat_sys_ftruncate)
+SYSCALL(sys_fchmod,compat_sys_fchmod)
+SYSCALL(sys_ni_syscall,compat_sys_s390_fchown16)	/* 95 old fchown16 syscall*/
+SYSCALL(sys_getpriority,compat_sys_getpriority)
+SYSCALL(sys_setpriority,compat_sys_setpriority)
+NI_SYSCALL						/* old profil syscall */
+SYSCALL(sys_statfs,compat_sys_statfs)
+SYSCALL(sys_fstatfs,compat_sys_fstatfs)			/* 100 */
+NI_SYSCALL						/* ioperm for i386 */
+SYSCALL(sys_socketcall,compat_sys_socketcall)
+SYSCALL(sys_syslog,compat_sys_syslog)
+SYSCALL(sys_setitimer,compat_sys_setitimer)
+SYSCALL(sys_getitimer,compat_sys_getitimer)		/* 105 */
+SYSCALL(sys_newstat,compat_sys_newstat)
+SYSCALL(sys_newlstat,compat_sys_newlstat)
+SYSCALL(sys_newfstat,compat_sys_newfstat)
+NI_SYSCALL						/* old uname syscall */
+SYSCALL(sys_lookup_dcookie,compat_sys_lookup_dcookie)	/* 110 */
+SYSCALL(sys_vhangup,sys_vhangup)
+NI_SYSCALL						/* old "idle" system call */
+NI_SYSCALL						/* vm86old for i386 */
+SYSCALL(sys_wait4,compat_sys_wait4)
+SYSCALL(sys_swapoff,compat_sys_swapoff)			/* 115 */
+SYSCALL(sys_sysinfo,compat_sys_sysinfo)
+SYSCALL(sys_s390_ipc,compat_sys_s390_ipc)
+SYSCALL(sys_fsync,compat_sys_fsync)
+SYSCALL(sys_sigreturn,compat_sys_sigreturn)
+SYSCALL(sys_clone,compat_sys_clone)			/* 120 */
+SYSCALL(sys_setdomainname,compat_sys_setdomainname)
+SYSCALL(sys_newuname,compat_sys_newuname)
+NI_SYSCALL						/* modify_ldt for i386 */
+SYSCALL(sys_adjtimex,compat_sys_adjtimex)
+SYSCALL(sys_mprotect,compat_sys_mprotect)		/* 125 */
+SYSCALL(sys_sigprocmask,compat_sys_sigprocmask)
+NI_SYSCALL						/* old "create module" */
+SYSCALL(sys_init_module,compat_sys_init_module)
+SYSCALL(sys_delete_module,compat_sys_delete_module)
+NI_SYSCALL						/* 130: old get_kernel_syms */
+SYSCALL(sys_quotactl,compat_sys_quotactl)
+SYSCALL(sys_getpgid,compat_sys_getpgid)
+SYSCALL(sys_fchdir,compat_sys_fchdir)
+SYSCALL(sys_bdflush,compat_sys_bdflush)
+SYSCALL(sys_sysfs,compat_sys_sysfs)			/* 135 */
+SYSCALL(sys_s390_personality,compat_sys_s390_personality)
+NI_SYSCALL						/* for afs_syscall */
+SYSCALL(sys_ni_syscall,compat_sys_s390_setfsuid16)	/* old setfsuid16 syscall */
+SYSCALL(sys_ni_syscall,compat_sys_s390_setfsgid16)	/* old setfsgid16 syscall */
+SYSCALL(sys_llseek,compat_sys_llseek)			/* 140 */
+SYSCALL(sys_getdents,compat_sys_getdents)
+SYSCALL(sys_select,compat_sys_select)
+SYSCALL(sys_flock,compat_sys_flock)
+SYSCALL(sys_msync,compat_sys_msync)
+SYSCALL(sys_readv,compat_sys_readv)			/* 145 */
+SYSCALL(sys_writev,compat_sys_writev)
+SYSCALL(sys_getsid,compat_sys_getsid)
+SYSCALL(sys_fdatasync,compat_sys_fdatasync)
+SYSCALL(sys_sysctl,compat_sys_sysctl)
+SYSCALL(sys_mlock,compat_sys_mlock)			/* 150 */
+SYSCALL(sys_munlock,compat_sys_munlock)
+SYSCALL(sys_mlockall,compat_sys_mlockall)
+SYSCALL(sys_munlockall,sys_munlockall)
+SYSCALL(sys_sched_setparam,compat_sys_sched_setparam)
+SYSCALL(sys_sched_getparam,compat_sys_sched_getparam)	/* 155 */
+SYSCALL(sys_sched_setscheduler,compat_sys_sched_setscheduler)
+SYSCALL(sys_sched_getscheduler,compat_sys_sched_getscheduler)
+SYSCALL(sys_sched_yield,sys_sched_yield)
+SYSCALL(sys_sched_get_priority_max,compat_sys_sched_get_priority_max)
+SYSCALL(sys_sched_get_priority_min,compat_sys_sched_get_priority_min)	/* 160 */
+SYSCALL(sys_sched_rr_get_interval,compat_sys_sched_rr_get_interval)
+SYSCALL(sys_nanosleep,compat_sys_nanosleep)
+SYSCALL(sys_mremap,compat_sys_mremap)
+SYSCALL(sys_ni_syscall,compat_sys_s390_setresuid16)	/* old setresuid16 syscall */
+SYSCALL(sys_ni_syscall,compat_sys_s390_getresuid16)	/* 165 old getresuid16 syscall */
+NI_SYSCALL						/* for vm86 */
+NI_SYSCALL						/* old sys_query_module */
+SYSCALL(sys_poll,compat_sys_poll)
+NI_SYSCALL						/* old nfsservctl */
+SYSCALL(sys_ni_syscall,compat_sys_s390_setresgid16)	/* 170 old setresgid16 syscall */
+SYSCALL(sys_ni_syscall,compat_sys_s390_getresgid16)	/* old getresgid16 syscall */
+SYSCALL(sys_prctl,compat_sys_prctl)
+SYSCALL(sys_rt_sigreturn,compat_sys_rt_sigreturn)
+SYSCALL(sys_rt_sigaction,compat_sys_rt_sigaction)
+SYSCALL(sys_rt_sigprocmask,compat_sys_rt_sigprocmask)	/* 175 */
+SYSCALL(sys_rt_sigpending,compat_sys_rt_sigpending)
+SYSCALL(sys_rt_sigtimedwait,compat_sys_rt_sigtimedwait)
+SYSCALL(sys_rt_sigqueueinfo,compat_sys_rt_sigqueueinfo)
+SYSCALL(sys_rt_sigsuspend,compat_sys_rt_sigsuspend)
+SYSCALL(sys_pread64,compat_sys_s390_pread64)		/* 180 */
+SYSCALL(sys_pwrite64,compat_sys_s390_pwrite64)
+SYSCALL(sys_ni_syscall,compat_sys_s390_chown16)		/* old chown16 syscall */
+SYSCALL(sys_getcwd,compat_sys_getcwd)
+SYSCALL(sys_capget,compat_sys_capget)
+SYSCALL(sys_capset,compat_sys_capset)			/* 185 */
+SYSCALL(sys_sigaltstack,compat_sys_sigaltstack)
+SYSCALL(sys_sendfile64,compat_sys_sendfile)
+NI_SYSCALL						/* streams1 */
+NI_SYSCALL						/* streams2 */
+SYSCALL(sys_vfork,sys_vfork)				/* 190 */
+SYSCALL(sys_getrlimit,compat_sys_getrlimit)
+SYSCALL(sys_mmap2,compat_sys_s390_mmap2)
+SYSCALL(sys_ni_syscall,compat_sys_s390_truncate64)
+SYSCALL(sys_ni_syscall,compat_sys_s390_ftruncate64)
+SYSCALL(sys_ni_syscall,compat_sys_s390_stat64)		/* 195 */
+SYSCALL(sys_ni_syscall,compat_sys_s390_lstat64)
+SYSCALL(sys_ni_syscall,compat_sys_s390_fstat64)
+SYSCALL(sys_lchown,compat_sys_lchown)
+SYSCALL(sys_getuid,sys_getuid)
+SYSCALL(sys_getgid,sys_getgid)				/* 200 */
+SYSCALL(sys_geteuid,sys_geteuid)
+SYSCALL(sys_getegid,sys_getegid)
+SYSCALL(sys_setreuid,compat_sys_setreuid)
+SYSCALL(sys_setregid,compat_sys_setregid)
+SYSCALL(sys_getgroups,compat_sys_getgroups)		/* 205 */
+SYSCALL(sys_setgroups,compat_sys_setgroups)
+SYSCALL(sys_fchown,compat_sys_fchown)
+SYSCALL(sys_setresuid,compat_sys_setresuid)
+SYSCALL(sys_getresuid,compat_sys_getresuid)
+SYSCALL(sys_setresgid,compat_sys_setresgid)		/* 210 */
+SYSCALL(sys_getresgid,compat_sys_getresgid)
+SYSCALL(sys_chown,compat_sys_chown)
+SYSCALL(sys_setuid,compat_sys_setuid)
+SYSCALL(sys_setgid,compat_sys_setgid)
+SYSCALL(sys_setfsuid,compat_sys_setfsuid)		/* 215 */
+SYSCALL(sys_setfsgid,compat_sys_setfsgid)
+SYSCALL(sys_pivot_root,compat_sys_pivot_root)
+SYSCALL(sys_mincore,compat_sys_mincore)
+SYSCALL(sys_madvise,compat_sys_madvise)
+SYSCALL(sys_getdents64,compat_sys_getdents64)		/* 220 */
+SYSCALL(sys_ni_syscall,compat_sys_fcntl64)
+SYSCALL(sys_readahead,compat_sys_s390_readahead)
+SYSCALL(sys_ni_syscall,compat_sys_sendfile64)
+SYSCALL(sys_setxattr,compat_sys_setxattr)
+SYSCALL(sys_lsetxattr,compat_sys_lsetxattr)		/* 225 */
+SYSCALL(sys_fsetxattr,compat_sys_fsetxattr)
+SYSCALL(sys_getxattr,compat_sys_getxattr)
+SYSCALL(sys_lgetxattr,compat_sys_lgetxattr)
+SYSCALL(sys_fgetxattr,compat_sys_fgetxattr)
+SYSCALL(sys_listxattr,compat_sys_listxattr)		/* 230 */
+SYSCALL(sys_llistxattr,compat_sys_llistxattr)
+SYSCALL(sys_flistxattr,compat_sys_flistxattr)
+SYSCALL(sys_removexattr,compat_sys_removexattr)
+SYSCALL(sys_lremovexattr,compat_sys_lremovexattr)
+SYSCALL(sys_fremovexattr,compat_sys_fremovexattr)	/* 235 */
+SYSCALL(sys_gettid,sys_gettid)
+SYSCALL(sys_tkill,compat_sys_tkill)
+SYSCALL(sys_futex,compat_sys_futex)
+SYSCALL(sys_sched_setaffinity,compat_sys_sched_setaffinity)
+SYSCALL(sys_sched_getaffinity,compat_sys_sched_getaffinity)	/* 240 */
+SYSCALL(sys_tgkill,compat_sys_tgkill)
+NI_SYSCALL						/* reserved for TUX */
+SYSCALL(sys_io_setup,compat_sys_io_setup)
+SYSCALL(sys_io_destroy,compat_sys_io_destroy)
+SYSCALL(sys_io_getevents,compat_sys_io_getevents)	/* 245 */
+SYSCALL(sys_io_submit,compat_sys_io_submit)
+SYSCALL(sys_io_cancel,compat_sys_io_cancel)
+SYSCALL(sys_exit_group,compat_sys_exit_group)
+SYSCALL(sys_epoll_create,compat_sys_epoll_create)
+SYSCALL(sys_epoll_ctl,compat_sys_epoll_ctl)		/* 250 */
+SYSCALL(sys_epoll_wait,compat_sys_epoll_wait)
+SYSCALL(sys_set_tid_address,compat_sys_set_tid_address)
+SYSCALL(sys_fadvise64_64,compat_sys_s390_fadvise64)
+SYSCALL(sys_timer_create,compat_sys_timer_create)
+SYSCALL(sys_timer_settime,compat_sys_timer_settime)	/* 255 */
+SYSCALL(sys_timer_gettime,compat_sys_timer_gettime)
+SYSCALL(sys_timer_getoverrun,compat_sys_timer_getoverrun)
+SYSCALL(sys_timer_delete,compat_sys_timer_delete)
+SYSCALL(sys_clock_settime,compat_sys_clock_settime)
+SYSCALL(sys_clock_gettime,compat_sys_clock_gettime)	/* 260 */
+SYSCALL(sys_clock_getres,compat_sys_clock_getres)
+SYSCALL(sys_clock_nanosleep,compat_sys_clock_nanosleep)
+NI_SYSCALL						/* reserved for vserver */
+SYSCALL(sys_ni_syscall,compat_sys_s390_fadvise64_64)
+SYSCALL(sys_statfs64,compat_sys_statfs64)
+SYSCALL(sys_fstatfs64,compat_sys_fstatfs64)
+SYSCALL(sys_remap_file_pages,compat_sys_remap_file_pages)
+NI_SYSCALL						/* 268 sys_mbind */
+NI_SYSCALL						/* 269 sys_get_mempolicy */
+NI_SYSCALL						/* 270 sys_set_mempolicy */
+SYSCALL(sys_mq_open,compat_sys_mq_open)
+SYSCALL(sys_mq_unlink,compat_sys_mq_unlink)
+SYSCALL(sys_mq_timedsend,compat_sys_mq_timedsend)
+SYSCALL(sys_mq_timedreceive,compat_sys_mq_timedreceive)
+SYSCALL(sys_mq_notify,compat_sys_mq_notify)		/* 275 */
+SYSCALL(sys_mq_getsetattr,compat_sys_mq_getsetattr)
+SYSCALL(sys_kexec_load,compat_sys_kexec_load)
+SYSCALL(sys_add_key,compat_sys_add_key)
+SYSCALL(sys_request_key,compat_sys_request_key)
+SYSCALL(sys_keyctl,compat_sys_keyctl)			/* 280 */
+SYSCALL(sys_waitid,compat_sys_waitid)
+SYSCALL(sys_ioprio_set,compat_sys_ioprio_set)
+SYSCALL(sys_ioprio_get,compat_sys_ioprio_get)
+SYSCALL(sys_inotify_init,sys_inotify_init)
+SYSCALL(sys_inotify_add_watch,compat_sys_inotify_add_watch)	/* 285 */
+SYSCALL(sys_inotify_rm_watch,compat_sys_inotify_rm_watch)
+NI_SYSCALL						/* 287 sys_migrate_pages */
+SYSCALL(sys_openat,compat_sys_openat)
+SYSCALL(sys_mkdirat,compat_sys_mkdirat)
+SYSCALL(sys_mknodat,compat_sys_mknodat)			/* 290 */
+SYSCALL(sys_fchownat,compat_sys_fchownat)
+SYSCALL(sys_futimesat,compat_sys_futimesat)
+SYSCALL(sys_newfstatat,compat_sys_s390_fstatat64)
+SYSCALL(sys_unlinkat,compat_sys_unlinkat)
+SYSCALL(sys_renameat,compat_sys_renameat)		/* 295 */
+SYSCALL(sys_linkat,compat_sys_linkat)
+SYSCALL(sys_symlinkat,compat_sys_symlinkat)
+SYSCALL(sys_readlinkat,compat_sys_readlinkat)
+SYSCALL(sys_fchmodat,compat_sys_fchmodat)
+SYSCALL(sys_faccessat,compat_sys_faccessat)		/* 300 */
+SYSCALL(sys_pselect6,compat_sys_pselect6)
+SYSCALL(sys_ppoll,compat_sys_ppoll)
+SYSCALL(sys_unshare,compat_sys_unshare)
+SYSCALL(sys_set_robust_list,compat_sys_set_robust_list)
+SYSCALL(sys_get_robust_list,compat_sys_get_robust_list)
+SYSCALL(sys_splice,compat_sys_splice)
+SYSCALL(sys_sync_file_range,compat_sys_s390_sync_file_range)
+SYSCALL(sys_tee,compat_sys_tee)
+SYSCALL(sys_vmsplice,compat_sys_vmsplice)
+NI_SYSCALL						/* 310 sys_move_pages */
+SYSCALL(sys_getcpu,compat_sys_getcpu)
+SYSCALL(sys_epoll_pwait,compat_sys_epoll_pwait)
+SYSCALL(sys_utimes,compat_sys_utimes)
+SYSCALL(sys_fallocate,compat_sys_s390_fallocate)
+SYSCALL(sys_utimensat,compat_sys_utimensat)		/* 315 */
+SYSCALL(sys_signalfd,compat_sys_signalfd)
 NI_SYSCALL						/* 317 old sys_timer_fd */
-SYSCALL(sys_eventfd,sys_eventfd,compat_sys_eventfd)
-SYSCALL(sys_timerfd_create,sys_timerfd_create,compat_sys_timerfd_create)
-SYSCALL(sys_timerfd_settime,sys_timerfd_settime,compat_sys_timerfd_settime) /* 320 */
-SYSCALL(sys_timerfd_gettime,sys_timerfd_gettime,compat_sys_timerfd_gettime)
-SYSCALL(sys_signalfd4,sys_signalfd4,compat_sys_signalfd4)
-SYSCALL(sys_eventfd2,sys_eventfd2,compat_sys_eventfd2)
-SYSCALL(sys_inotify_init1,sys_inotify_init1,compat_sys_inotify_init1)
-SYSCALL(sys_pipe2,sys_pipe2,compat_sys_pipe2) /* 325 */
-SYSCALL(sys_dup3,sys_dup3,compat_sys_dup3)
-SYSCALL(sys_epoll_create1,sys_epoll_create1,compat_sys_epoll_create1)
-SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv)
-SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev)
-SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo) /* 330 */
-SYSCALL(sys_perf_event_open,sys_perf_event_open,compat_sys_perf_event_open)
-SYSCALL(sys_fanotify_init,sys_fanotify_init,compat_sys_fanotify_init)
-SYSCALL(sys_fanotify_mark,sys_fanotify_mark,compat_sys_fanotify_mark)
-SYSCALL(sys_prlimit64,sys_prlimit64,compat_sys_prlimit64)
-SYSCALL(sys_name_to_handle_at,sys_name_to_handle_at,compat_sys_name_to_handle_at) /* 335 */
-SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at)
-SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime)
-SYSCALL(sys_syncfs,sys_syncfs,compat_sys_syncfs)
-SYSCALL(sys_setns,sys_setns,compat_sys_setns)
-SYSCALL(sys_process_vm_readv,sys_process_vm_readv,compat_sys_process_vm_readv) /* 340 */
-SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev)
-SYSCALL(sys_ni_syscall,sys_s390_runtime_instr,compat_sys_s390_runtime_instr)
-SYSCALL(sys_kcmp,sys_kcmp,compat_sys_kcmp)
-SYSCALL(sys_finit_module,sys_finit_module,compat_sys_finit_module)
-SYSCALL(sys_sched_setattr,sys_sched_setattr,compat_sys_sched_setattr) /* 345 */
-SYSCALL(sys_sched_getattr,sys_sched_getattr,compat_sys_sched_getattr)
-SYSCALL(sys_renameat2,sys_renameat2,compat_sys_renameat2)
-SYSCALL(sys_seccomp,sys_seccomp,compat_sys_seccomp)
-SYSCALL(sys_getrandom,sys_getrandom,compat_sys_getrandom)
-SYSCALL(sys_memfd_create,sys_memfd_create,compat_sys_memfd_create) /* 350 */
-SYSCALL(sys_bpf,sys_bpf,compat_sys_bpf)
-SYSCALL(sys_ni_syscall,sys_s390_pci_mmio_write,compat_sys_s390_pci_mmio_write)
-SYSCALL(sys_ni_syscall,sys_s390_pci_mmio_read,compat_sys_s390_pci_mmio_read)
-SYSCALL(sys_execveat,sys_execveat,compat_sys_execveat)
+SYSCALL(sys_eventfd,compat_sys_eventfd)
+SYSCALL(sys_timerfd_create,compat_sys_timerfd_create)
+SYSCALL(sys_timerfd_settime,compat_sys_timerfd_settime) /* 320 */
+SYSCALL(sys_timerfd_gettime,compat_sys_timerfd_gettime)
+SYSCALL(sys_signalfd4,compat_sys_signalfd4)
+SYSCALL(sys_eventfd2,compat_sys_eventfd2)
+SYSCALL(sys_inotify_init1,compat_sys_inotify_init1)
+SYSCALL(sys_pipe2,compat_sys_pipe2)			/* 325 */
+SYSCALL(sys_dup3,compat_sys_dup3)
+SYSCALL(sys_epoll_create1,compat_sys_epoll_create1)
+SYSCALL(sys_preadv,compat_sys_preadv)
+SYSCALL(sys_pwritev,compat_sys_pwritev)
+SYSCALL(sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo) /* 330 */
+SYSCALL(sys_perf_event_open,compat_sys_perf_event_open)
+SYSCALL(sys_fanotify_init,compat_sys_fanotify_init)
+SYSCALL(sys_fanotify_mark,compat_sys_fanotify_mark)
+SYSCALL(sys_prlimit64,compat_sys_prlimit64)
+SYSCALL(sys_name_to_handle_at,compat_sys_name_to_handle_at) /* 335 */
+SYSCALL(sys_open_by_handle_at,compat_sys_open_by_handle_at)
+SYSCALL(sys_clock_adjtime,compat_sys_clock_adjtime)
+SYSCALL(sys_syncfs,compat_sys_syncfs)
+SYSCALL(sys_setns,compat_sys_setns)
+SYSCALL(sys_process_vm_readv,compat_sys_process_vm_readv) /* 340 */
+SYSCALL(sys_process_vm_writev,compat_sys_process_vm_writev)
+SYSCALL(sys_s390_runtime_instr,compat_sys_s390_runtime_instr)
+SYSCALL(sys_kcmp,compat_sys_kcmp)
+SYSCALL(sys_finit_module,compat_sys_finit_module)
+SYSCALL(sys_sched_setattr,compat_sys_sched_setattr)	/* 345 */
+SYSCALL(sys_sched_getattr,compat_sys_sched_getattr)
+SYSCALL(sys_renameat2,compat_sys_renameat2)
+SYSCALL(sys_seccomp,compat_sys_seccomp)
+SYSCALL(sys_getrandom,compat_sys_getrandom)
+SYSCALL(sys_memfd_create,compat_sys_memfd_create)	/* 350 */
+SYSCALL(sys_bpf,compat_sys_bpf)
+SYSCALL(sys_s390_pci_mmio_write,compat_sys_s390_pci_mmio_write)
+SYSCALL(sys_s390_pci_mmio_read,compat_sys_s390_pci_mmio_read)
+SYSCALL(sys_execveat,compat_sys_execveat)
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 20660dddb2d6..170ddd2018b3 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -215,20 +215,20 @@ void update_vsyscall(struct timekeeper *tk)
 {
 	u64 nsecps;
 
-	if (tk->tkr.clock != &clocksource_tod)
+	if (tk->tkr_mono.clock != &clocksource_tod)
 		return;
 
 	/* Make userspace gettimeofday spin until we're done. */
 	++vdso_data->tb_update_count;
 	smp_wmb();
-	vdso_data->xtime_tod_stamp = tk->tkr.cycle_last;
+	vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last;
 	vdso_data->xtime_clock_sec = tk->xtime_sec;
-	vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec;
+	vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
 	vdso_data->wtom_clock_sec =
 		tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
-	vdso_data->wtom_clock_nsec = tk->tkr.xtime_nsec +
-		+ ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr.shift);
-	nsecps = (u64) NSEC_PER_SEC << tk->tkr.shift;
+	vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec +
+		+ ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
+	nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift;
 	while (vdso_data->wtom_clock_nsec >= nsecps) {
 		vdso_data->wtom_clock_nsec -= nsecps;
 		vdso_data->wtom_clock_sec++;
@@ -236,7 +236,7 @@ void update_vsyscall(struct timekeeper *tk)
 
 	vdso_data->xtime_coarse_sec = tk->xtime_sec;
 	vdso_data->xtime_coarse_nsec =
-		(long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+		(long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
 	vdso_data->wtom_coarse_sec =
 		vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec;
 	vdso_data->wtom_coarse_nsec =
@@ -246,8 +246,8 @@ void update_vsyscall(struct timekeeper *tk)
 		vdso_data->wtom_coarse_sec++;
 	}
 
-	vdso_data->tk_mult = tk->tkr.mult;
-	vdso_data->tk_shift = tk->tkr.shift;
+	vdso_data->tk_mult = tk->tkr_mono.mult;
+	vdso_data->tk_shift = tk->tkr_mono.shift;
 	smp_wmb();
 	++vdso_data->tb_update_count;
 }
@@ -283,7 +283,7 @@ void __init time_init(void)
 	if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt))
 		panic("Couldn't request external interrupt 0x1406");
 
-	if (clocksource_register(&clocksource_tod) != 0)
+	if (__clocksource_register(&clocksource_tod) != 0)
 		panic("Could not register TOD clock source");
 
 	/* Enable TOD clock interrupts on the boot cpu. */
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 14da43b801d9..5728c5bd44a8 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -421,7 +421,7 @@ int topology_cpu_init(struct cpu *cpu)
 	return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group);
 }
 
-const struct cpumask *cpu_thread_mask(int cpu)
+static const struct cpumask *cpu_thread_mask(int cpu)
 {
 	return &per_cpu(cpu_topology, cpu).thread_mask;
 }
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index f081cf1157c3..4d96c9f53455 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -26,7 +26,6 @@ int show_unhandled_signals = 1;
 
 static inline void __user *get_trap_ip(struct pt_regs *regs)
 {
-#ifdef CONFIG_64BIT
 	unsigned long address;
 
 	if (regs->int_code & 0x200)
@@ -35,10 +34,6 @@ static inline void __user *get_trap_ip(struct pt_regs *regs)
 		address = regs->psw.addr;
 	return (void __user *)
 		((address - (regs->int_code >> 16)) & PSW_ADDR_INSN);
-#else
-	return (void __user *)
-		((regs->psw.addr - (regs->int_code >> 16)) & PSW_ADDR_INSN);
-#endif
 }
 
 static inline void report_user_fault(struct pt_regs *regs, int signr)
@@ -153,11 +148,8 @@ DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC,
 	      "privileged operation")
 DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN,
 	      "special operation exception")
-
-#ifdef CONFIG_64BIT
 DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
 	      "transaction constraint exception")
-#endif
 
 static inline void do_fp_trap(struct pt_regs *regs, int fpc)
 {
@@ -182,7 +174,7 @@ static inline void do_fp_trap(struct pt_regs *regs, int fpc)
 void translation_exception(struct pt_regs *regs)
 {
 	/* May never happen. */
-	die(regs, "Translation exception");
+	panic("Translation exception");
 }
 
 void illegal_op(struct pt_regs *regs)
@@ -211,29 +203,6 @@ void illegal_op(struct pt_regs *regs)
 		} else if (*((__u16 *) opcode) == UPROBE_SWBP_INSN) {
 			is_uprobe_insn = 1;
 #endif
-#ifdef CONFIG_MATHEMU
-		} else if (opcode[0] == 0xb3) {
-			if (get_user(*((__u16 *) (opcode+2)), location+1))
-				return;
-			signal = math_emu_b3(opcode, regs);
-                } else if (opcode[0] == 0xed) {
-			if (get_user(*((__u32 *) (opcode+2)),
-				     (__u32 __user *)(location+1)))
-				return;
-			signal = math_emu_ed(opcode, regs);
-		} else if (*((__u16 *) opcode) == 0xb299) {
-			if (get_user(*((__u16 *) (opcode+2)), location+1))
-				return;
-			signal = math_emu_srnm(opcode, regs);
-		} else if (*((__u16 *) opcode) == 0xb29c) {
-			if (get_user(*((__u16 *) (opcode+2)), location+1))
-				return;
-			signal = math_emu_stfpc(opcode, regs);
-		} else if (*((__u16 *) opcode) == 0xb29d) {
-			if (get_user(*((__u16 *) (opcode+2)), location+1))
-				return;
-			signal = math_emu_lfpc(opcode, regs);
-#endif
 		} else
 			signal = SIGILL;
 	}
@@ -247,71 +216,14 @@ void illegal_op(struct pt_regs *regs)
 			       3, SIGTRAP) != NOTIFY_STOP)
 			signal = SIGILL;
 	}
-
-#ifdef CONFIG_MATHEMU
-        if (signal == SIGFPE)
-		do_fp_trap(regs, current->thread.fp_regs.fpc);
-	else if (signal == SIGSEGV)
-		do_trap(regs, signal, SEGV_MAPERR, "user address fault");
-	else
-#endif
 	if (signal)
 		do_trap(regs, signal, ILL_ILLOPC, "illegal operation");
 }
 NOKPROBE_SYMBOL(illegal_op);
 
-#ifdef CONFIG_MATHEMU
-void specification_exception(struct pt_regs *regs)
-{
-        __u8 opcode[6];
-	__u16 __user *location = NULL;
-	int signal = 0;
-
-	location = (__u16 __user *) get_trap_ip(regs);
-
-	if (user_mode(regs)) {
-		get_user(*((__u16 *) opcode), location);
-		switch (opcode[0]) {
-		case 0x28: /* LDR Rx,Ry   */
-			signal = math_emu_ldr(opcode);
-			break;
-		case 0x38: /* LER Rx,Ry   */
-			signal = math_emu_ler(opcode);
-			break;
-		case 0x60: /* STD R,D(X,B) */
-			get_user(*((__u16 *) (opcode+2)), location+1);
-			signal = math_emu_std(opcode, regs);
-			break;
-		case 0x68: /* LD R,D(X,B) */
-			get_user(*((__u16 *) (opcode+2)), location+1);
-			signal = math_emu_ld(opcode, regs);
-			break;
-		case 0x70: /* STE R,D(X,B) */
-			get_user(*((__u16 *) (opcode+2)), location+1);
-			signal = math_emu_ste(opcode, regs);
-			break;
-		case 0x78: /* LE R,D(X,B) */
-			get_user(*((__u16 *) (opcode+2)), location+1);
-			signal = math_emu_le(opcode, regs);
-			break;
-		default:
-			signal = SIGILL;
-			break;
-                }
-        } else
-		signal = SIGILL;
-
-        if (signal == SIGFPE)
-		do_fp_trap(regs, current->thread.fp_regs.fpc);
-	else if (signal)
-		do_trap(regs, signal, ILL_ILLOPN, "specification exception");
-}
-#else
 DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
 	      "specification exception");
-#endif
 
-#ifdef CONFIG_64BIT
 int alloc_vector_registers(struct task_struct *tsk)
 {
 	__vector128 *vxrs;
@@ -377,7 +289,6 @@ static int __init disable_vector_extension(char *str)
 	return 1;
 }
 __setup("novx", disable_vector_extension);
-#endif
 
 void data_exception(struct pt_regs *regs)
 {
@@ -386,65 +297,7 @@ void data_exception(struct pt_regs *regs)
 
 	location = get_trap_ip(regs);
 
-	if (MACHINE_HAS_IEEE)
-		asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc));
-
-#ifdef CONFIG_MATHEMU
-	else if (user_mode(regs)) {
-        	__u8 opcode[6];
-		get_user(*((__u16 *) opcode), location);
-		switch (opcode[0]) {
-		case 0x28: /* LDR Rx,Ry   */
-			signal = math_emu_ldr(opcode);
-			break;
-		case 0x38: /* LER Rx,Ry   */
-			signal = math_emu_ler(opcode);
-			break;
-		case 0x60: /* STD R,D(X,B) */
-			get_user(*((__u16 *) (opcode+2)), location+1);
-			signal = math_emu_std(opcode, regs);
-			break;
-		case 0x68: /* LD R,D(X,B) */
-			get_user(*((__u16 *) (opcode+2)), location+1);
-			signal = math_emu_ld(opcode, regs);
-			break;
-		case 0x70: /* STE R,D(X,B) */
-			get_user(*((__u16 *) (opcode+2)), location+1);
-			signal = math_emu_ste(opcode, regs);
-			break;
-		case 0x78: /* LE R,D(X,B) */
-			get_user(*((__u16 *) (opcode+2)), location+1);
-			signal = math_emu_le(opcode, regs);
-			break;
-		case 0xb3:
-			get_user(*((__u16 *) (opcode+2)), location+1);
-			signal = math_emu_b3(opcode, regs);
-			break;
-                case 0xed:
-			get_user(*((__u32 *) (opcode+2)),
-				 (__u32 __user *)(location+1));
-			signal = math_emu_ed(opcode, regs);
-			break;
-	        case 0xb2:
-			if (opcode[1] == 0x99) {
-				get_user(*((__u16 *) (opcode+2)), location+1);
-				signal = math_emu_srnm(opcode, regs);
-			} else if (opcode[1] == 0x9c) {
-				get_user(*((__u16 *) (opcode+2)), location+1);
-				signal = math_emu_stfpc(opcode, regs);
-			} else if (opcode[1] == 0x9d) {
-				get_user(*((__u16 *) (opcode+2)), location+1);
-				signal = math_emu_lfpc(opcode, regs);
-			} else
-				signal = SIGILL;
-			break;
-		default:
-			signal = SIGILL;
-			break;
-                }
-        }
-#endif 
-#ifdef CONFIG_64BIT
+	asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc));
 	/* Check for vector register enablement */
 	if (MACHINE_HAS_VX && !current->thread.vxrs &&
 	    (current->thread.fp_regs.fpc & FPC_DXC_MASK) == 0xfe00) {
@@ -454,13 +307,11 @@ void data_exception(struct pt_regs *regs)
 		clear_pt_regs_flag(regs, PIF_PER_TRAP);
 		return;
 	}
-#endif
-
 	if (current->thread.fp_regs.fpc & FPC_DXC_MASK)
 		signal = SIGFPE;
 	else
 		signal = SIGILL;
-        if (signal == SIGFPE)
+	if (signal == SIGFPE)
 		do_fp_trap(regs, current->thread.fp_regs.fpc);
 	else if (signal)
 		do_trap(regs, signal, ILL_ILLOPN, "data exception");
diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c
index cc7328080b60..66956c09d5bf 100644
--- a/arch/s390/kernel/uprobes.c
+++ b/arch/s390/kernel/uprobes.c
@@ -188,7 +188,9 @@ static void adjust_psw_addr(psw_t *psw, unsigned long len)
 	else if (put_user(*(input), __ptr))		\
 		__rc = EMU_ADDRESSING;			\
 	if (__rc == 0)					\
-		sim_stor_event(regs, __ptr, mask + 1);	\
+		sim_stor_event(regs,			\
+			       (void __force *)__ptr,	\
+			       mask + 1);		\
 	__rc;						\
 })
 
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 0bbb7e027c5a..0d58269ff425 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -32,19 +32,17 @@
 #include <asm/vdso.h>
 #include <asm/facility.h>
 
-#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
+#ifdef CONFIG_COMPAT
 extern char vdso32_start, vdso32_end;
 static void *vdso32_kbase = &vdso32_start;
 static unsigned int vdso32_pages;
 static struct page **vdso32_pagelist;
 #endif
 
-#ifdef CONFIG_64BIT
 extern char vdso64_start, vdso64_end;
 static void *vdso64_kbase = &vdso64_start;
 static unsigned int vdso64_pages;
 static struct page **vdso64_pagelist;
-#endif /* CONFIG_64BIT */
 
 /*
  * Should the kernel map a VDSO page into processes and pass its
@@ -87,7 +85,6 @@ static void vdso_init_data(struct vdso_data *vd)
 	vd->ectg_available = test_facility(31);
 }
 
-#ifdef CONFIG_64BIT
 /*
  * Allocate/free per cpu vdso data.
  */
@@ -169,7 +166,6 @@ static void vdso_init_cr5(void)
 	cr5 = offsetof(struct _lowcore, paste);
 	__ctl_load(cr5, 5, 5);
 }
-#endif /* CONFIG_64BIT */
 
 /*
  * This is called from binfmt_elf, we create the special vma for the
@@ -191,7 +187,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	if (!uses_interp)
 		return 0;
 
-#ifdef CONFIG_64BIT
 	vdso_pagelist = vdso64_pagelist;
 	vdso_pages = vdso64_pages;
 #ifdef CONFIG_COMPAT
@@ -200,11 +195,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 		vdso_pages = vdso32_pages;
 	}
 #endif
-#else
-	vdso_pagelist = vdso32_pagelist;
-	vdso_pages = vdso32_pages;
-#endif
-
 	/*
 	 * vDSO has a problem and was disabled, just don't "enable" it for
 	 * the process
@@ -268,7 +258,7 @@ static int __init vdso_init(void)
 	if (!vdso_enabled)
 		return 0;
 	vdso_init_data(vdso_data);
-#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
+#ifdef CONFIG_COMPAT
 	/* Calculate the size of the 32 bit vDSO */
 	vdso32_pages = ((&vdso32_end - &vdso32_start
 			 + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
@@ -287,7 +277,6 @@ static int __init vdso_init(void)
 	vdso32_pagelist[vdso32_pages] = NULL;
 #endif
 
-#ifdef CONFIG_64BIT
 	/* Calculate the size of the 64 bit vDSO */
 	vdso64_pages = ((&vdso64_end - &vdso64_start
 			 + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
@@ -307,7 +296,6 @@ static int __init vdso_init(void)
 	if (vdso_alloc_per_cpu(&S390_lowcore))
 		BUG();
 	vdso_init_cr5();
-#endif /* CONFIG_64BIT */
 
 	get_page(virt_to_page(vdso_data));
 
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 35b13ed0af5f..445657fe658c 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -6,17 +6,10 @@
 #include <asm/page.h>
 #include <asm-generic/vmlinux.lds.h>
 
-#ifndef CONFIG_64BIT
-OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
-OUTPUT_ARCH(s390:31-bit)
-ENTRY(startup)
-jiffies = jiffies_64 + 4;
-#else
 OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
 OUTPUT_ARCH(s390:64-bit)
 ENTRY(startup)
 jiffies = jiffies_64;
-#endif
 
 PHDRS {
 	text PT_LOAD FLAGS(5);	/* R_E */
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 9254afff250c..fc7ec95848c3 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -77,7 +77,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 
 	if (vcpu->run->s.regs.gprs[rx] & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-	rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm));
+	rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm));
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 	if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
@@ -213,7 +213,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
 	 * - gpr 3 contains the virtqueue index (passed as datamatch)
 	 * - gpr 4 contains the index on the bus (optionally)
 	 */
-	ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
+	ret = kvm_io_bus_write_cookie(vcpu, KVM_VIRTIO_CCW_NOTIFY_BUS,
 				      vcpu->run->s.regs.gprs[2] & 0xffffffff,
 				      8, &vcpu->run->s.regs.gprs[3],
 				      vcpu->run->s.regs.gprs[4]);
@@ -230,7 +230,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
 
 int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
 {
-	int code = kvm_s390_get_base_disp_rs(vcpu) & 0xffff;
+	int code = kvm_s390_get_base_disp_rs(vcpu, NULL) & 0xffff;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 267523cac6de..a7559f7207df 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -10,6 +10,7 @@
 #include <asm/pgtable.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
+#include <asm/switch_to.h>
 
 union asce {
 	unsigned long val;
@@ -207,6 +208,54 @@ union raddress {
 	unsigned long pfra : 52; /* Page-Frame Real Address */
 };
 
+union alet {
+	u32 val;
+	struct {
+		u32 reserved : 7;
+		u32 p        : 1;
+		u32 alesn    : 8;
+		u32 alen     : 16;
+	};
+};
+
+union ald {
+	u32 val;
+	struct {
+		u32     : 1;
+		u32 alo : 24;
+		u32 all : 7;
+	};
+};
+
+struct ale {
+	unsigned long i      : 1; /* ALEN-Invalid Bit */
+	unsigned long        : 5;
+	unsigned long fo     : 1; /* Fetch-Only Bit */
+	unsigned long p      : 1; /* Private Bit */
+	unsigned long alesn  : 8; /* Access-List-Entry Sequence Number */
+	unsigned long aleax  : 16; /* Access-List-Entry Authorization Index */
+	unsigned long        : 32;
+	unsigned long        : 1;
+	unsigned long asteo  : 25; /* ASN-Second-Table-Entry Origin */
+	unsigned long        : 6;
+	unsigned long astesn : 32; /* ASTE Sequence Number */
+} __packed;
+
+struct aste {
+	unsigned long i      : 1; /* ASX-Invalid Bit */
+	unsigned long ato    : 29; /* Authority-Table Origin */
+	unsigned long        : 1;
+	unsigned long b      : 1; /* Base-Space Bit */
+	unsigned long ax     : 16; /* Authorization Index */
+	unsigned long atl    : 12; /* Authority-Table Length */
+	unsigned long        : 2;
+	unsigned long ca     : 1; /* Controlled-ASN Bit */
+	unsigned long ra     : 1; /* Reusable-ASN Bit */
+	unsigned long asce   : 64; /* Address-Space-Control Element */
+	unsigned long ald    : 32;
+	unsigned long astesn : 32;
+	/* .. more fields there */
+} __packed;
 
 int ipte_lock_held(struct kvm_vcpu *vcpu)
 {
@@ -307,15 +356,157 @@ void ipte_unlock(struct kvm_vcpu *vcpu)
 		ipte_unlock_simple(vcpu);
 }
 
-static unsigned long get_vcpu_asce(struct kvm_vcpu *vcpu)
+static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar,
+			  int write)
+{
+	union alet alet;
+	struct ale ale;
+	struct aste aste;
+	unsigned long ald_addr, authority_table_addr;
+	union ald ald;
+	int eax, rc;
+	u8 authority_table;
+
+	if (ar >= NUM_ACRS)
+		return -EINVAL;
+
+	save_access_regs(vcpu->run->s.regs.acrs);
+	alet.val = vcpu->run->s.regs.acrs[ar];
+
+	if (ar == 0 || alet.val == 0) {
+		asce->val = vcpu->arch.sie_block->gcr[1];
+		return 0;
+	} else if (alet.val == 1) {
+		asce->val = vcpu->arch.sie_block->gcr[7];
+		return 0;
+	}
+
+	if (alet.reserved)
+		return PGM_ALET_SPECIFICATION;
+
+	if (alet.p)
+		ald_addr = vcpu->arch.sie_block->gcr[5];
+	else
+		ald_addr = vcpu->arch.sie_block->gcr[2];
+	ald_addr &= 0x7fffffc0;
+
+	rc = read_guest_real(vcpu, ald_addr + 16, &ald.val, sizeof(union ald));
+	if (rc)
+		return rc;
+
+	if (alet.alen / 8 > ald.all)
+		return PGM_ALEN_TRANSLATION;
+
+	if (0x7fffffff - ald.alo * 128 < alet.alen * 16)
+		return PGM_ADDRESSING;
+
+	rc = read_guest_real(vcpu, ald.alo * 128 + alet.alen * 16, &ale,
+			     sizeof(struct ale));
+	if (rc)
+		return rc;
+
+	if (ale.i == 1)
+		return PGM_ALEN_TRANSLATION;
+	if (ale.alesn != alet.alesn)
+		return PGM_ALE_SEQUENCE;
+
+	rc = read_guest_real(vcpu, ale.asteo * 64, &aste, sizeof(struct aste));
+	if (rc)
+		return rc;
+
+	if (aste.i)
+		return PGM_ASTE_VALIDITY;
+	if (aste.astesn != ale.astesn)
+		return PGM_ASTE_SEQUENCE;
+
+	if (ale.p == 1) {
+		eax = (vcpu->arch.sie_block->gcr[8] >> 16) & 0xffff;
+		if (ale.aleax != eax) {
+			if (eax / 16 > aste.atl)
+				return PGM_EXTENDED_AUTHORITY;
+
+			authority_table_addr = aste.ato * 4 + eax / 4;
+
+			rc = read_guest_real(vcpu, authority_table_addr,
+					     &authority_table,
+					     sizeof(u8));
+			if (rc)
+				return rc;
+
+			if ((authority_table & (0x40 >> ((eax & 3) * 2))) == 0)
+				return PGM_EXTENDED_AUTHORITY;
+		}
+	}
+
+	if (ale.fo == 1 && write)
+		return PGM_PROTECTION;
+
+	asce->val = aste.asce;
+	return 0;
+}
+
+struct trans_exc_code_bits {
+	unsigned long addr : 52; /* Translation-exception Address */
+	unsigned long fsi  : 2;  /* Access Exception Fetch/Store Indication */
+	unsigned long	   : 6;
+	unsigned long b60  : 1;
+	unsigned long b61  : 1;
+	unsigned long as   : 2;  /* ASCE Identifier */
+};
+
+enum {
+	FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
+	FSI_STORE   = 1, /* Exception was due to store operation */
+	FSI_FETCH   = 2  /* Exception was due to fetch operation */
+};
+
+static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
+			 ar_t ar, int write)
 {
+	int rc;
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+	struct trans_exc_code_bits *tec_bits;
+
+	memset(pgm, 0, sizeof(*pgm));
+	tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+	tec_bits->fsi = write ? FSI_STORE : FSI_FETCH;
+	tec_bits->as = psw_bits(*psw).as;
+
+	if (!psw_bits(*psw).t) {
+		asce->val = 0;
+		asce->r = 1;
+		return 0;
+	}
+
 	switch (psw_bits(vcpu->arch.sie_block->gpsw).as) {
 	case PSW_AS_PRIMARY:
-		return vcpu->arch.sie_block->gcr[1];
+		asce->val = vcpu->arch.sie_block->gcr[1];
+		return 0;
 	case PSW_AS_SECONDARY:
-		return vcpu->arch.sie_block->gcr[7];
+		asce->val = vcpu->arch.sie_block->gcr[7];
+		return 0;
 	case PSW_AS_HOME:
-		return vcpu->arch.sie_block->gcr[13];
+		asce->val = vcpu->arch.sie_block->gcr[13];
+		return 0;
+	case PSW_AS_ACCREG:
+		rc = ar_translation(vcpu, asce, ar, write);
+		switch (rc) {
+		case PGM_ALEN_TRANSLATION:
+		case PGM_ALE_SEQUENCE:
+		case PGM_ASTE_VALIDITY:
+		case PGM_ASTE_SEQUENCE:
+		case PGM_EXTENDED_AUTHORITY:
+			vcpu->arch.pgm.exc_access_id = ar;
+			break;
+		case PGM_PROTECTION:
+			tec_bits->b60 = 1;
+			tec_bits->b61 = 1;
+			break;
+		}
+		if (rc > 0)
+			pgm->code = rc;
+		return rc;
 	}
 	return 0;
 }
@@ -330,10 +521,11 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
  * @vcpu: virtual cpu
  * @gva: guest virtual address
  * @gpa: points to where guest physical (absolute) address should be stored
+ * @asce: effective asce
  * @write: indicates if access is a write access
  *
  * Translate a guest virtual address into a guest absolute address by means
- * of dynamic address translation as specified by the architecuture.
+ * of dynamic address translation as specified by the architecture.
  * If the resulting absolute address is not available in the configuration
  * an addressing exception is indicated and @gpa will not be changed.
  *
@@ -345,7 +537,8 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
  *	      by the architecture
  */
 static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
-				     unsigned long *gpa, int write)
+				     unsigned long *gpa, const union asce asce,
+				     int write)
 {
 	union vaddress vaddr = {.addr = gva};
 	union raddress raddr = {.addr = gva};
@@ -354,12 +547,10 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
 	union ctlreg0 ctlreg0;
 	unsigned long ptr;
 	int edat1, edat2;
-	union asce asce;
 
 	ctlreg0.val = vcpu->arch.sie_block->gcr[0];
 	edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8);
 	edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78);
-	asce.val = get_vcpu_asce(vcpu);
 	if (asce.r)
 		goto real_address;
 	ptr = asce.origin * 4096;
@@ -506,48 +697,30 @@ static inline int is_low_address(unsigned long ga)
 	return (ga & ~0x11fful) == 0;
 }
 
-static int low_address_protection_enabled(struct kvm_vcpu *vcpu)
+static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
+					  const union asce asce)
 {
 	union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
 	psw_t *psw = &vcpu->arch.sie_block->gpsw;
-	union asce asce;
 
 	if (!ctlreg0.lap)
 		return 0;
-	asce.val = get_vcpu_asce(vcpu);
 	if (psw_bits(*psw).t && asce.p)
 		return 0;
 	return 1;
 }
 
-struct trans_exc_code_bits {
-	unsigned long addr : 52; /* Translation-exception Address */
-	unsigned long fsi  : 2;  /* Access Exception Fetch/Store Indication */
-	unsigned long	   : 7;
-	unsigned long b61  : 1;
-	unsigned long as   : 2;  /* ASCE Identifier */
-};
-
-enum {
-	FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
-	FSI_STORE   = 1, /* Exception was due to store operation */
-	FSI_FETCH   = 2  /* Exception was due to fetch operation */
-};
-
 static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
 			    unsigned long *pages, unsigned long nr_pages,
-			    int write)
+			    const union asce asce, int write)
 {
 	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
 	psw_t *psw = &vcpu->arch.sie_block->gpsw;
 	struct trans_exc_code_bits *tec_bits;
 	int lap_enabled, rc;
 
-	memset(pgm, 0, sizeof(*pgm));
 	tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
-	tec_bits->fsi = write ? FSI_STORE : FSI_FETCH;
-	tec_bits->as = psw_bits(*psw).as;
-	lap_enabled = low_address_protection_enabled(vcpu);
+	lap_enabled = low_address_protection_enabled(vcpu, asce);
 	while (nr_pages) {
 		ga = kvm_s390_logical_to_effective(vcpu, ga);
 		tec_bits->addr = ga >> PAGE_SHIFT;
@@ -557,7 +730,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
 		}
 		ga &= PAGE_MASK;
 		if (psw_bits(*psw).t) {
-			rc = guest_translate(vcpu, ga, pages, write);
+			rc = guest_translate(vcpu, ga, pages, asce, write);
 			if (rc < 0)
 				return rc;
 			if (rc == PGM_PROTECTION)
@@ -578,7 +751,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
 	return 0;
 }
 
-int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
 		 unsigned long len, int write)
 {
 	psw_t *psw = &vcpu->arch.sie_block->gpsw;
@@ -591,20 +764,19 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
 
 	if (!len)
 		return 0;
-	/* Access register mode is not supported yet. */
-	if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
-		return -EOPNOTSUPP;
+	rc = get_vcpu_asce(vcpu, &asce, ar, write);
+	if (rc)
+		return rc;
 	nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
 	pages = pages_array;
 	if (nr_pages > ARRAY_SIZE(pages_array))
 		pages = vmalloc(nr_pages * sizeof(unsigned long));
 	if (!pages)
 		return -ENOMEM;
-	asce.val = get_vcpu_asce(vcpu);
 	need_ipte_lock = psw_bits(*psw).t && !asce.r;
 	if (need_ipte_lock)
 		ipte_lock(vcpu);
-	rc = guest_page_range(vcpu, ga, pages, nr_pages, write);
+	rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, write);
 	for (idx = 0; idx < nr_pages && !rc; idx++) {
 		gpa = *(pages + idx) + (ga & ~PAGE_MASK);
 		_len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
@@ -652,7 +824,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
  * Note: The IPTE lock is not taken during this function, so the caller
  * has to take care of this.
  */
-int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
+int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
 			    unsigned long *gpa, int write)
 {
 	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
@@ -661,26 +833,21 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
 	union asce asce;
 	int rc;
 
-	/* Access register mode is not supported yet. */
-	if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
-		return -EOPNOTSUPP;
-
 	gva = kvm_s390_logical_to_effective(vcpu, gva);
-	memset(pgm, 0, sizeof(*pgm));
 	tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
-	tec->as = psw_bits(*psw).as;
-	tec->fsi = write ? FSI_STORE : FSI_FETCH;
+	rc = get_vcpu_asce(vcpu, &asce, ar, write);
 	tec->addr = gva >> PAGE_SHIFT;
-	if (is_low_address(gva) && low_address_protection_enabled(vcpu)) {
+	if (rc)
+		return rc;
+	if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
 		if (write) {
 			rc = pgm->code = PGM_PROTECTION;
 			return rc;
 		}
 	}
 
-	asce.val = get_vcpu_asce(vcpu);
 	if (psw_bits(*psw).t && !asce.r) {	/* Use DAT? */
-		rc = guest_translate(vcpu, gva, gpa, write);
+		rc = guest_translate(vcpu, gva, gpa, asce, write);
 		if (rc > 0) {
 			if (rc == PGM_PROTECTION)
 				tec->b61 = 1;
@@ -697,28 +864,51 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
 }
 
 /**
- * kvm_s390_check_low_addr_protection - check for low-address protection
- * @ga: Guest address
+ * check_gva_range - test a range of guest virtual addresses for accessibility
+ */
+int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
+		    unsigned long length, int is_write)
+{
+	unsigned long gpa;
+	unsigned long currlen;
+	int rc = 0;
+
+	ipte_lock(vcpu);
+	while (length > 0 && !rc) {
+		currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE));
+		rc = guest_translate_address(vcpu, gva, ar, &gpa, is_write);
+		gva += currlen;
+		length -= currlen;
+	}
+	ipte_unlock(vcpu);
+
+	return rc;
+}
+
+/**
+ * kvm_s390_check_low_addr_prot_real - check for low-address protection
+ * @gra: Guest real address
  *
  * Checks whether an address is subject to low-address protection and set
  * up vcpu->arch.pgm accordingly if necessary.
  *
  * Return: 0 if no protection exception, or PGM_PROTECTION if protected.
  */
-int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga)
+int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
 {
 	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
 	psw_t *psw = &vcpu->arch.sie_block->gpsw;
 	struct trans_exc_code_bits *tec_bits;
+	union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
 
-	if (!is_low_address(ga) || !low_address_protection_enabled(vcpu))
+	if (!ctlreg0.lap || !is_low_address(gra))
 		return 0;
 
 	memset(pgm, 0, sizeof(*pgm));
 	tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
 	tec_bits->fsi = FSI_STORE;
 	tec_bits->as = psw_bits(*psw).as;
-	tec_bits->addr = ga >> PAGE_SHIFT;
+	tec_bits->addr = gra >> PAGE_SHIFT;
 	pgm->code = PGM_PROTECTION;
 
 	return pgm->code;
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 0149cf15058a..ef03726cc661 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -156,9 +156,11 @@ int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
 }
 
 int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
-			    unsigned long *gpa, int write);
+			    ar_t ar, unsigned long *gpa, int write);
+int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
+		    unsigned long length, int is_write);
 
-int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
 		 unsigned long len, int write);
 
 int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
@@ -168,6 +170,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
  * write_guest - copy data from kernel space to guest space
  * @vcpu: virtual cpu
  * @ga: guest address
+ * @ar: access register
  * @data: source address in kernel space
  * @len: number of bytes to copy
  *
@@ -176,8 +179,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
  * If DAT is off data will be copied to guest real or absolute memory.
  * If DAT is on data will be copied to the address space as specified by
  * the address space bits of the PSW:
- * Primary, secondory or home space (access register mode is currently not
- * implemented).
+ * Primary, secondary, home space or access register mode.
  * The addressing mode of the PSW is also inspected, so that address wrap
  * around is taken into account for 24-, 31- and 64-bit addressing mode,
  * if the to be copied data crosses page boundaries in guest address space.
@@ -210,16 +212,17 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
  *	 if data has been changed in guest space in case of an exception.
  */
 static inline __must_check
-int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
 		unsigned long len)
 {
-	return access_guest(vcpu, ga, data, len, 1);
+	return access_guest(vcpu, ga, ar, data, len, 1);
 }
 
 /**
  * read_guest - copy data from guest space to kernel space
  * @vcpu: virtual cpu
  * @ga: guest address
+ * @ar: access register
  * @data: destination address in kernel space
  * @len: number of bytes to copy
  *
@@ -229,10 +232,10 @@ int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
  * data will be copied from guest space to kernel space.
  */
 static inline __must_check
-int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
 	       unsigned long len)
 {
-	return access_guest(vcpu, ga, data, len, 0);
+	return access_guest(vcpu, ga, ar, data, len, 0);
 }
 
 /**
@@ -330,6 +333,6 @@ int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
 void ipte_lock(struct kvm_vcpu *vcpu);
 void ipte_unlock(struct kvm_vcpu *vcpu);
 int ipte_lock_held(struct kvm_vcpu *vcpu);
-int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga);
+int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
 
 #endif /* __KVM_S390_GACCESS_H */
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index 3e8d4092ce30..e97b3455d7e6 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -191,8 +191,8 @@ static int __import_wp_info(struct kvm_vcpu *vcpu,
 	if (!wp_info->old_data)
 		return -ENOMEM;
 	/* try to backup the original value */
-	ret = read_guest(vcpu, wp_info->phys_addr, wp_info->old_data,
-			 wp_info->len);
+	ret = read_guest_abs(vcpu, wp_info->phys_addr, wp_info->old_data,
+			     wp_info->len);
 	if (ret) {
 		kfree(wp_info->old_data);
 		wp_info->old_data = NULL;
@@ -362,8 +362,8 @@ static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu)
 			continue;
 
 		/* refetch the wp data and compare it to the old value */
-		if (!read_guest(vcpu, wp_info->phys_addr, temp,
-				wp_info->len)) {
+		if (!read_guest_abs(vcpu, wp_info->phys_addr, temp,
+				    wp_info->len)) {
 			if (memcmp(temp, wp_info->old_data, wp_info->len)) {
 				kfree(temp);
 				return wp_info;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index bebd2157edd0..9e3779e3e496 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -165,6 +165,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu,
 		pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn;
 		pgm_info->mon_code = vcpu->arch.sie_block->tecmc;
 		break;
+	case PGM_VECTOR_PROCESSING:
 	case PGM_DATA:
 		pgm_info->data_exc_code = vcpu->arch.sie_block->dxc;
 		break;
@@ -319,7 +320,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
 
 	/* Make sure that the source is paged-in */
 	rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2],
-				     &srcaddr, 0);
+				     reg2, &srcaddr, 0);
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 	rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
@@ -328,7 +329,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
 
 	/* Make sure that the destination is paged-in */
 	rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1],
-				     &dstaddr, 1);
+				     reg1, &dstaddr, 1);
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 	rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 073b5f387d1d..9de47265ef73 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1,7 +1,7 @@
 /*
  * handling kvm guest interrupts
  *
- * Copyright IBM Corp. 2008,2014
+ * Copyright IBM Corp. 2008, 2015
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License (version 2 only)
@@ -17,9 +17,12 @@
 #include <linux/signal.h>
 #include <linux/slab.h>
 #include <linux/bitmap.h>
+#include <linux/vmalloc.h>
 #include <asm/asm-offsets.h>
+#include <asm/dis.h>
 #include <asm/uaccess.h>
 #include <asm/sclp.h>
+#include <asm/isc.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
 #include "trace-s390.h"
@@ -32,11 +35,6 @@
 #define PFAULT_DONE 0x0680
 #define VIRTIO_PARAM 0x0d00
 
-static int is_ioint(u64 type)
-{
-	return ((type & 0xfffe0000u) != 0xfffe0000u);
-}
-
 int psw_extint_disabled(struct kvm_vcpu *vcpu)
 {
 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
@@ -72,70 +70,45 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
-static u64 int_word_to_isc_bits(u32 int_word)
+static int ckc_irq_pending(struct kvm_vcpu *vcpu)
+{
+	if (!(vcpu->arch.sie_block->ckc <
+	      get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
+		return 0;
+	return ckc_interrupts_enabled(vcpu);
+}
+
+static int cpu_timer_interrupts_enabled(struct kvm_vcpu *vcpu)
+{
+	return !psw_extint_disabled(vcpu) &&
+	       (vcpu->arch.sie_block->gcr[0] & 0x400ul);
+}
+
+static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
+{
+	return (vcpu->arch.sie_block->cputm >> 63) &&
+	       cpu_timer_interrupts_enabled(vcpu);
+}
+
+static inline int is_ioirq(unsigned long irq_type)
 {
-	u8 isc = (int_word & 0x38000000) >> 27;
+	return ((irq_type >= IRQ_PEND_IO_ISC_0) &&
+		(irq_type <= IRQ_PEND_IO_ISC_7));
+}
 
+static uint64_t isc_to_isc_bits(int isc)
+{
 	return (0x80 >> isc) << 24;
 }
 
-static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
-				      struct kvm_s390_interrupt_info *inti)
+static inline u8 int_word_to_isc(u32 int_word)
 {
-	switch (inti->type) {
-	case KVM_S390_INT_EXTERNAL_CALL:
-		if (psw_extint_disabled(vcpu))
-			return 0;
-		if (vcpu->arch.sie_block->gcr[0] & 0x2000ul)
-			return 1;
-		return 0;
-	case KVM_S390_INT_EMERGENCY:
-		if (psw_extint_disabled(vcpu))
-			return 0;
-		if (vcpu->arch.sie_block->gcr[0] & 0x4000ul)
-			return 1;
-		return 0;
-	case KVM_S390_INT_CLOCK_COMP:
-		return ckc_interrupts_enabled(vcpu);
-	case KVM_S390_INT_CPU_TIMER:
-		if (psw_extint_disabled(vcpu))
-			return 0;
-		if (vcpu->arch.sie_block->gcr[0] & 0x400ul)
-			return 1;
-		return 0;
-	case KVM_S390_INT_SERVICE:
-	case KVM_S390_INT_PFAULT_INIT:
-	case KVM_S390_INT_PFAULT_DONE:
-	case KVM_S390_INT_VIRTIO:
-		if (psw_extint_disabled(vcpu))
-			return 0;
-		if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
-			return 1;
-		return 0;
-	case KVM_S390_PROGRAM_INT:
-	case KVM_S390_SIGP_STOP:
-	case KVM_S390_SIGP_SET_PREFIX:
-	case KVM_S390_RESTART:
-		return 1;
-	case KVM_S390_MCHK:
-		if (psw_mchk_disabled(vcpu))
-			return 0;
-		if (vcpu->arch.sie_block->gcr[14] & inti->mchk.cr14)
-			return 1;
-		return 0;
-	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-		if (psw_ioint_disabled(vcpu))
-			return 0;
-		if (vcpu->arch.sie_block->gcr[6] &
-		    int_word_to_isc_bits(inti->io.io_int_word))
-			return 1;
-		return 0;
-	default:
-		printk(KERN_WARNING "illegal interrupt type %llx\n",
-		       inti->type);
-		BUG();
-	}
-	return 0;
+	return (int_word & 0x38000000) >> 27;
+}
+
+static inline unsigned long pending_floating_irqs(struct kvm_vcpu *vcpu)
+{
+	return vcpu->kvm->arch.float_int.pending_irqs;
 }
 
 static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu)
@@ -143,12 +116,31 @@ static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu)
 	return vcpu->arch.local_int.pending_irqs;
 }
 
-static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu)
+static unsigned long disable_iscs(struct kvm_vcpu *vcpu,
+				   unsigned long active_mask)
+{
+	int i;
+
+	for (i = 0; i <= MAX_ISC; i++)
+		if (!(vcpu->arch.sie_block->gcr[6] & isc_to_isc_bits(i)))
+			active_mask &= ~(1UL << (IRQ_PEND_IO_ISC_0 + i));
+
+	return active_mask;
+}
+
+static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
 {
-	unsigned long active_mask = pending_local_irqs(vcpu);
+	unsigned long active_mask;
+
+	active_mask = pending_local_irqs(vcpu);
+	active_mask |= pending_floating_irqs(vcpu);
 
 	if (psw_extint_disabled(vcpu))
 		active_mask &= ~IRQ_PEND_EXT_MASK;
+	if (psw_ioint_disabled(vcpu))
+		active_mask &= ~IRQ_PEND_IO_MASK;
+	else
+		active_mask = disable_iscs(vcpu, active_mask);
 	if (!(vcpu->arch.sie_block->gcr[0] & 0x2000ul))
 		__clear_bit(IRQ_PEND_EXT_EXTERNAL, &active_mask);
 	if (!(vcpu->arch.sie_block->gcr[0] & 0x4000ul))
@@ -157,8 +149,13 @@ static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu)
 		__clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask);
 	if (!(vcpu->arch.sie_block->gcr[0] & 0x400ul))
 		__clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
+		__clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask);
 	if (psw_mchk_disabled(vcpu))
 		active_mask &= ~IRQ_PEND_MCHK_MASK;
+	if (!(vcpu->arch.sie_block->gcr[14] &
+	      vcpu->kvm->arch.float_int.mchk.cr14))
+		__clear_bit(IRQ_PEND_MCHK_REP, &active_mask);
 
 	/*
 	 * STOP irqs will never be actively delivered. They are triggered via
@@ -200,6 +197,16 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
 	atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
 }
 
+static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
+{
+	if (!(pending_floating_irqs(vcpu) & IRQ_PEND_IO_MASK))
+		return;
+	else if (psw_ioint_disabled(vcpu))
+		__set_cpuflag(vcpu, CPUSTAT_IO_INT);
+	else
+		vcpu->arch.sie_block->lctl |= LCTL_CR6;
+}
+
 static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
 {
 	if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK))
@@ -226,47 +233,17 @@ static void set_intercept_indicators_stop(struct kvm_vcpu *vcpu)
 		__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
 }
 
-/* Set interception request for non-deliverable local interrupts */
-static void set_intercept_indicators_local(struct kvm_vcpu *vcpu)
+/* Set interception request for non-deliverable interrupts */
+static void set_intercept_indicators(struct kvm_vcpu *vcpu)
 {
+	set_intercept_indicators_io(vcpu);
 	set_intercept_indicators_ext(vcpu);
 	set_intercept_indicators_mchk(vcpu);
 	set_intercept_indicators_stop(vcpu);
 }
 
-static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
-				      struct kvm_s390_interrupt_info *inti)
-{
-	switch (inti->type) {
-	case KVM_S390_INT_SERVICE:
-	case KVM_S390_INT_PFAULT_DONE:
-	case KVM_S390_INT_VIRTIO:
-		if (psw_extint_disabled(vcpu))
-			__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
-		else
-			vcpu->arch.sie_block->lctl |= LCTL_CR0;
-		break;
-	case KVM_S390_MCHK:
-		if (psw_mchk_disabled(vcpu))
-			vcpu->arch.sie_block->ictl |= ICTL_LPSW;
-		else
-			vcpu->arch.sie_block->lctl |= LCTL_CR14;
-		break;
-	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-		if (psw_ioint_disabled(vcpu))
-			__set_cpuflag(vcpu, CPUSTAT_IO_INT);
-		else
-			vcpu->arch.sie_block->lctl |= LCTL_CR6;
-		break;
-	default:
-		BUG();
-	}
-}
-
 static u16 get_ilc(struct kvm_vcpu *vcpu)
 {
-	const unsigned short table[] = { 2, 4, 4, 6 };
-
 	switch (vcpu->arch.sie_block->icptcode) {
 	case ICPT_INST:
 	case ICPT_INSTPROGI:
@@ -274,7 +251,7 @@ static u16 get_ilc(struct kvm_vcpu *vcpu)
 	case ICPT_PARTEXEC:
 	case ICPT_IOINST:
 		/* last instruction only stored for these icptcodes */
-		return table[vcpu->arch.sie_block->ipa >> 14];
+		return insn_length(vcpu->arch.sie_block->ipa >> 8);
 	case ICPT_PROGI:
 		return vcpu->arch.sie_block->pgmilc;
 	default:
@@ -350,38 +327,72 @@ static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu)
 
 static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
 {
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	struct kvm_s390_mchk_info mchk;
-	int rc;
+	struct kvm_s390_mchk_info mchk = {};
+	unsigned long adtl_status_addr;
+	int deliver = 0;
+	int rc = 0;
 
+	spin_lock(&fi->lock);
 	spin_lock(&li->lock);
-	mchk = li->irq.mchk;
+	if (test_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs) ||
+	    test_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs)) {
+		/*
+		 * If there was an exigent machine check pending, then any
+		 * repressible machine checks that might have been pending
+		 * are indicated along with it, so always clear bits for
+		 * repressible and exigent interrupts
+		 */
+		mchk = li->irq.mchk;
+		clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
+		clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
+		memset(&li->irq.mchk, 0, sizeof(mchk));
+		deliver = 1;
+	}
 	/*
-	 * If there was an exigent machine check pending, then any repressible
-	 * machine checks that might have been pending are indicated along
-	 * with it, so always clear both bits
+	 * We indicate floating repressible conditions along with
+	 * other pending conditions. Channel Report Pending and Channel
+	 * Subsystem damage are the only two and and are indicated by
+	 * bits in mcic and masked in cr14.
 	 */
-	clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
-	clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
-	memset(&li->irq.mchk, 0, sizeof(mchk));
+	if (test_and_clear_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) {
+		mchk.mcic |= fi->mchk.mcic;
+		mchk.cr14 |= fi->mchk.cr14;
+		memset(&fi->mchk, 0, sizeof(mchk));
+		deliver = 1;
+	}
 	spin_unlock(&li->lock);
+	spin_unlock(&fi->lock);
 
-	VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
-		   mchk.mcic);
-	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
-					 mchk.cr14, mchk.mcic);
-
-	rc  = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
-	rc |= put_guest_lc(vcpu, mchk.mcic,
-			   (u64 __user *) __LC_MCCK_CODE);
-	rc |= put_guest_lc(vcpu, mchk.failing_storage_address,
-			   (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
-	rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
-			     &mchk.fixed_logout, sizeof(mchk.fixed_logout));
-	rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
-			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
-			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	if (deliver) {
+		VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+			   mchk.mcic);
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+						 KVM_S390_MCHK,
+						 mchk.cr14, mchk.mcic);
+
+		rc  = kvm_s390_vcpu_store_status(vcpu,
+						 KVM_S390_STORE_STATUS_PREFIXED);
+		rc |= read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR,
+				    &adtl_status_addr,
+				    sizeof(unsigned long));
+		rc |= kvm_s390_vcpu_store_adtl_status(vcpu,
+						      adtl_status_addr);
+		rc |= put_guest_lc(vcpu, mchk.mcic,
+				   (u64 __user *) __LC_MCCK_CODE);
+		rc |= put_guest_lc(vcpu, mchk.failing_storage_address,
+				   (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
+				     &mchk.fixed_logout,
+				     sizeof(mchk.fixed_logout));
+		rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw,
+				     sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+				    &vcpu->arch.sie_block->gpsw,
+				    sizeof(psw_t));
+	}
 	return rc ? -EFAULT : 0;
 }
 
@@ -484,7 +495,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_pgm_info pgm_info;
-	int rc = 0;
+	int rc = 0, nullifying = false;
 	u16 ilc = get_ilc(vcpu);
 
 	spin_lock(&li->lock);
@@ -509,6 +520,8 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
 	case PGM_LX_TRANSLATION:
 	case PGM_PRIMARY_AUTHORITY:
 	case PGM_SECONDARY_AUTHORITY:
+		nullifying = true;
+		/* fall through */
 	case PGM_SPACE_SWITCH:
 		rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
 				  (u64 *)__LC_TRANS_EXC_CODE);
@@ -521,6 +534,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
 	case PGM_EXTENDED_AUTHORITY:
 		rc = put_guest_lc(vcpu, pgm_info.exc_access_id,
 				  (u8 *)__LC_EXC_ACCESS_ID);
+		nullifying = true;
 		break;
 	case PGM_ASCE_TYPE:
 	case PGM_PAGE_TRANSLATION:
@@ -534,6 +548,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
 				   (u8 *)__LC_EXC_ACCESS_ID);
 		rc |= put_guest_lc(vcpu, pgm_info.op_access_id,
 				   (u8 *)__LC_OP_ACCESS_ID);
+		nullifying = true;
 		break;
 	case PGM_MONITOR:
 		rc = put_guest_lc(vcpu, pgm_info.mon_class_nr,
@@ -541,6 +556,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
 		rc |= put_guest_lc(vcpu, pgm_info.mon_code,
 				   (u64 *)__LC_MON_CODE);
 		break;
+	case PGM_VECTOR_PROCESSING:
 	case PGM_DATA:
 		rc = put_guest_lc(vcpu, pgm_info.data_exc_code,
 				  (u32 *)__LC_DATA_EXC_CODE);
@@ -551,6 +567,15 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
 		rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
 				   (u8 *)__LC_EXC_ACCESS_ID);
 		break;
+	case PGM_STACK_FULL:
+	case PGM_STACK_EMPTY:
+	case PGM_STACK_SPECIFICATION:
+	case PGM_STACK_TYPE:
+	case PGM_STACK_OPERATION:
+	case PGM_TRACE_TABEL:
+	case PGM_CRYPTO_OPERATION:
+		nullifying = true;
+		break;
 	}
 
 	if (pgm_info.code & PGM_PER) {
@@ -564,7 +589,12 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
 				   (u8 *) __LC_PER_ACCESS_ID);
 	}
 
+	if (nullifying && vcpu->arch.sie_block->icptcode == ICPT_INST)
+		kvm_s390_rewind_psw(vcpu, ilc);
+
 	rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC);
+	rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea,
+				 (u64 *) __LC_LAST_BREAK);
 	rc |= put_guest_lc(vcpu, pgm_info.code,
 			   (u16 *)__LC_PGM_INT_CODE);
 	rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
@@ -574,16 +604,27 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
 	return rc ? -EFAULT : 0;
 }
 
-static int __must_check __deliver_service(struct kvm_vcpu *vcpu,
-					  struct kvm_s390_interrupt_info *inti)
+static int __must_check __deliver_service(struct kvm_vcpu *vcpu)
 {
-	int rc;
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct kvm_s390_ext_info ext;
+	int rc = 0;
+
+	spin_lock(&fi->lock);
+	if (!(test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs))) {
+		spin_unlock(&fi->lock);
+		return 0;
+	}
+	ext = fi->srv_signal;
+	memset(&fi->srv_signal, 0, sizeof(ext));
+	clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
 
 	VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
-		   inti->ext.ext_params);
+		   ext.ext_params);
 	vcpu->stat.deliver_service_signal++;
-	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-					 inti->ext.ext_params, 0);
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
+					 ext.ext_params, 0);
 
 	rc  = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
 	rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
@@ -591,106 +632,146 @@ static int __must_check __deliver_service(struct kvm_vcpu *vcpu,
 			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
 	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
 			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+	rc |= put_guest_lc(vcpu, ext.ext_params,
 			   (u32 *)__LC_EXT_PARAMS);
+
 	return rc ? -EFAULT : 0;
 }
 
-static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu,
-					   struct kvm_s390_interrupt_info *inti)
+static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
 {
-	int rc;
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct kvm_s390_interrupt_info *inti;
+	int rc = 0;
 
-	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
-					 KVM_S390_INT_PFAULT_DONE, 0,
-					 inti->ext.ext_params2);
+	spin_lock(&fi->lock);
+	inti = list_first_entry_or_null(&fi->lists[FIRQ_LIST_PFAULT],
+					struct kvm_s390_interrupt_info,
+					list);
+	if (inti) {
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+				KVM_S390_INT_PFAULT_DONE, 0,
+				inti->ext.ext_params2);
+		list_del(&inti->list);
+		fi->counters[FIRQ_CNTR_PFAULT] -= 1;
+	}
+	if (list_empty(&fi->lists[FIRQ_LIST_PFAULT]))
+		clear_bit(IRQ_PEND_PFAULT_DONE, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
 
-	rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
-	rc |= put_guest_lc(vcpu, PFAULT_DONE, (u16 *)__LC_EXT_CPU_ADDR);
-	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-			   (u64 *)__LC_EXT_PARAMS2);
+	if (inti) {
+		rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
+				(u16 *)__LC_EXT_INT_CODE);
+		rc |= put_guest_lc(vcpu, PFAULT_DONE,
+				(u16 *)__LC_EXT_CPU_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				&vcpu->arch.sie_block->gpsw,
+				sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+				&vcpu->arch.sie_block->gpsw,
+				sizeof(psw_t));
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+				(u64 *)__LC_EXT_PARAMS2);
+		kfree(inti);
+	}
 	return rc ? -EFAULT : 0;
 }
 
-static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu,
-					 struct kvm_s390_interrupt_info *inti)
+static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu)
 {
-	int rc;
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct kvm_s390_interrupt_info *inti;
+	int rc = 0;
 
-	VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
-		   inti->ext.ext_params, inti->ext.ext_params2);
-	vcpu->stat.deliver_virtio_interrupt++;
-	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-					 inti->ext.ext_params,
-					 inti->ext.ext_params2);
+	spin_lock(&fi->lock);
+	inti = list_first_entry_or_null(&fi->lists[FIRQ_LIST_VIRTIO],
+					struct kvm_s390_interrupt_info,
+					list);
+	if (inti) {
+		VCPU_EVENT(vcpu, 4,
+			   "interrupt: virtio parm:%x,parm64:%llx",
+			   inti->ext.ext_params, inti->ext.ext_params2);
+		vcpu->stat.deliver_virtio_interrupt++;
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+				inti->type,
+				inti->ext.ext_params,
+				inti->ext.ext_params2);
+		list_del(&inti->list);
+		fi->counters[FIRQ_CNTR_VIRTIO] -= 1;
+	}
+	if (list_empty(&fi->lists[FIRQ_LIST_VIRTIO]))
+		clear_bit(IRQ_PEND_VIRTIO, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
 
-	rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
-	rc |= put_guest_lc(vcpu, VIRTIO_PARAM, (u16 *)__LC_EXT_CPU_ADDR);
-	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= put_guest_lc(vcpu, inti->ext.ext_params,
-			   (u32 *)__LC_EXT_PARAMS);
-	rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-			   (u64 *)__LC_EXT_PARAMS2);
+	if (inti) {
+		rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
+				(u16 *)__LC_EXT_INT_CODE);
+		rc |= put_guest_lc(vcpu, VIRTIO_PARAM,
+				(u16 *)__LC_EXT_CPU_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				&vcpu->arch.sie_block->gpsw,
+				sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+				&vcpu->arch.sie_block->gpsw,
+				sizeof(psw_t));
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+				(u32 *)__LC_EXT_PARAMS);
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+				(u64 *)__LC_EXT_PARAMS2);
+		kfree(inti);
+	}
 	return rc ? -EFAULT : 0;
 }
 
 static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
-				     struct kvm_s390_interrupt_info *inti)
+				     unsigned long irq_type)
 {
-	int rc;
+	struct list_head *isc_list;
+	struct kvm_s390_float_interrupt *fi;
+	struct kvm_s390_interrupt_info *inti = NULL;
+	int rc = 0;
 
-	VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
-	vcpu->stat.deliver_io_int++;
-	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-					 ((__u32)inti->io.subchannel_id << 16) |
-						inti->io.subchannel_nr,
-					 ((__u64)inti->io.io_int_parm << 32) |
-						inti->io.io_int_word);
-
-	rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
-			   (u16 *)__LC_SUBCHANNEL_ID);
-	rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
-			   (u16 *)__LC_SUBCHANNEL_NR);
-	rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
-			   (u32 *)__LC_IO_INT_PARM);
-	rc |= put_guest_lc(vcpu, inti->io.io_int_word,
-			   (u32 *)__LC_IO_INT_WORD);
-	rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
-			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
-			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	return rc ? -EFAULT : 0;
-}
+	fi = &vcpu->kvm->arch.float_int;
 
-static int __must_check __deliver_mchk_floating(struct kvm_vcpu *vcpu,
-					   struct kvm_s390_interrupt_info *inti)
-{
-	struct kvm_s390_mchk_info *mchk = &inti->mchk;
-	int rc;
+	spin_lock(&fi->lock);
+	isc_list = &fi->lists[irq_type - IRQ_PEND_IO_ISC_0];
+	inti = list_first_entry_or_null(isc_list,
+					struct kvm_s390_interrupt_info,
+					list);
+	if (inti) {
+		VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
+		vcpu->stat.deliver_io_int++;
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+				inti->type,
+				((__u32)inti->io.subchannel_id << 16) |
+				inti->io.subchannel_nr,
+				((__u64)inti->io.io_int_parm << 32) |
+				inti->io.io_int_word);
+		list_del(&inti->list);
+		fi->counters[FIRQ_CNTR_IO] -= 1;
+	}
+	if (list_empty(isc_list))
+		clear_bit(irq_type, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+
+	if (inti) {
+		rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
+				(u16 *)__LC_SUBCHANNEL_ID);
+		rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
+				(u16 *)__LC_SUBCHANNEL_NR);
+		rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
+				(u32 *)__LC_IO_INT_PARM);
+		rc |= put_guest_lc(vcpu, inti->io.io_int_word,
+				(u32 *)__LC_IO_INT_WORD);
+		rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
+				&vcpu->arch.sie_block->gpsw,
+				sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
+				&vcpu->arch.sie_block->gpsw,
+				sizeof(psw_t));
+		kfree(inti);
+	}
 
-	VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
-		   mchk->mcic);
-	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
-					 mchk->cr14, mchk->mcic);
-
-	rc  = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
-	rc |= put_guest_lc(vcpu, mchk->mcic,
-			(u64 __user *) __LC_MCCK_CODE);
-	rc |= put_guest_lc(vcpu, mchk->failing_storage_address,
-			(u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
-	rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
-			     &mchk->fixed_logout, sizeof(mchk->fixed_logout));
-	rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
-			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
-			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
 	return rc ? -EFAULT : 0;
 }
 
@@ -698,6 +779,7 @@ typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu);
 
 static const deliver_irq_t deliver_irq_funcs[] = {
 	[IRQ_PEND_MCHK_EX]        = __deliver_machine_check,
+	[IRQ_PEND_MCHK_REP]       = __deliver_machine_check,
 	[IRQ_PEND_PROG]           = __deliver_prog,
 	[IRQ_PEND_EXT_EMERGENCY]  = __deliver_emergency_signal,
 	[IRQ_PEND_EXT_EXTERNAL]   = __deliver_external_call,
@@ -706,36 +788,11 @@ static const deliver_irq_t deliver_irq_funcs[] = {
 	[IRQ_PEND_RESTART]        = __deliver_restart,
 	[IRQ_PEND_SET_PREFIX]     = __deliver_set_prefix,
 	[IRQ_PEND_PFAULT_INIT]    = __deliver_pfault_init,
+	[IRQ_PEND_EXT_SERVICE]    = __deliver_service,
+	[IRQ_PEND_PFAULT_DONE]    = __deliver_pfault_done,
+	[IRQ_PEND_VIRTIO]         = __deliver_virtio,
 };
 
-static int __must_check __deliver_floating_interrupt(struct kvm_vcpu *vcpu,
-					   struct kvm_s390_interrupt_info *inti)
-{
-	int rc;
-
-	switch (inti->type) {
-	case KVM_S390_INT_SERVICE:
-		rc = __deliver_service(vcpu, inti);
-		break;
-	case KVM_S390_INT_PFAULT_DONE:
-		rc = __deliver_pfault_done(vcpu, inti);
-		break;
-	case KVM_S390_INT_VIRTIO:
-		rc = __deliver_virtio(vcpu, inti);
-		break;
-	case KVM_S390_MCHK:
-		rc = __deliver_mchk_floating(vcpu, inti);
-		break;
-	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-		rc = __deliver_io(vcpu, inti);
-		break;
-	default:
-		BUG();
-	}
-
-	return rc;
-}
-
 /* Check whether an external call is pending (deliverable or not) */
 int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
 {
@@ -751,21 +808,9 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
 
 int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
 {
-	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
-	struct kvm_s390_interrupt_info  *inti;
 	int rc;
 
-	rc = !!deliverable_local_irqs(vcpu);
-
-	if ((!rc) && atomic_read(&fi->active)) {
-		spin_lock(&fi->lock);
-		list_for_each_entry(inti, &fi->list, list)
-			if (__interrupt_is_deliverable(vcpu, inti)) {
-				rc = 1;
-				break;
-			}
-		spin_unlock(&fi->lock);
-	}
+	rc = !!deliverable_irqs(vcpu);
 
 	if (!rc && kvm_cpu_has_pending_timer(vcpu))
 		rc = 1;
@@ -784,12 +829,7 @@ int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
 
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
-	if (!(vcpu->arch.sie_block->ckc <
-	      get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
-		return 0;
-	if (!ckc_interrupts_enabled(vcpu))
-		return 0;
-	return 1;
+	return ckc_irq_pending(vcpu) || cpu_timer_irq_pending(vcpu);
 }
 
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
@@ -884,60 +924,45 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
 int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
-	struct kvm_s390_interrupt_info  *n, *inti = NULL;
 	deliver_irq_t func;
-	int deliver;
 	int rc = 0;
 	unsigned long irq_type;
-	unsigned long deliverable_irqs;
+	unsigned long irqs;
 
 	__reset_intercept_indicators(vcpu);
 
 	/* pending ckc conditions might have been invalidated */
 	clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
-	if (kvm_cpu_has_pending_timer(vcpu))
+	if (ckc_irq_pending(vcpu))
 		set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
 
+	/* pending cpu timer conditions might have been invalidated */
+	clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+	if (cpu_timer_irq_pending(vcpu))
+		set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+
 	do {
-		deliverable_irqs = deliverable_local_irqs(vcpu);
+		irqs = deliverable_irqs(vcpu);
 		/* bits are in the order of interrupt priority */
-		irq_type = find_first_bit(&deliverable_irqs, IRQ_PEND_COUNT);
+		irq_type = find_first_bit(&irqs, IRQ_PEND_COUNT);
 		if (irq_type == IRQ_PEND_COUNT)
 			break;
-		func = deliver_irq_funcs[irq_type];
-		if (!func) {
-			WARN_ON_ONCE(func == NULL);
-			clear_bit(irq_type, &li->pending_irqs);
-			continue;
+		if (is_ioirq(irq_type)) {
+			rc = __deliver_io(vcpu, irq_type);
+		} else {
+			func = deliver_irq_funcs[irq_type];
+			if (!func) {
+				WARN_ON_ONCE(func == NULL);
+				clear_bit(irq_type, &li->pending_irqs);
+				continue;
+			}
+			rc = func(vcpu);
 		}
-		rc = func(vcpu);
-	} while (!rc && irq_type != IRQ_PEND_COUNT);
+		if (rc)
+			break;
+	} while (!rc);
 
-	set_intercept_indicators_local(vcpu);
-
-	if (!rc && atomic_read(&fi->active)) {
-		do {
-			deliver = 0;
-			spin_lock(&fi->lock);
-			list_for_each_entry_safe(inti, n, &fi->list, list) {
-				if (__interrupt_is_deliverable(vcpu, inti)) {
-					list_del(&inti->list);
-					fi->irq_count--;
-					deliver = 1;
-					break;
-				}
-				__set_intercept_indicator(vcpu, inti);
-			}
-			if (list_empty(&fi->list))
-				atomic_set(&fi->active, 0);
-			spin_unlock(&fi->lock);
-			if (deliver) {
-				rc = __deliver_floating_interrupt(vcpu, inti);
-				kfree(inti);
-			}
-		} while (!rc && deliver);
-	}
+	set_intercept_indicators(vcpu);
 
 	return rc;
 }
@@ -1172,80 +1197,182 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static struct kvm_s390_interrupt_info *get_io_int(struct kvm *kvm,
+						  int isc, u32 schid)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+	struct list_head *isc_list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
+	struct kvm_s390_interrupt_info *iter;
+	u16 id = (schid & 0xffff0000U) >> 16;
+	u16 nr = schid & 0x0000ffffU;
 
+	spin_lock(&fi->lock);
+	list_for_each_entry(iter, isc_list, list) {
+		if (schid && (id != iter->io.subchannel_id ||
+			      nr != iter->io.subchannel_nr))
+			continue;
+		/* found an appropriate entry */
+		list_del_init(&iter->list);
+		fi->counters[FIRQ_CNTR_IO] -= 1;
+		if (list_empty(isc_list))
+			clear_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs);
+		spin_unlock(&fi->lock);
+		return iter;
+	}
+	spin_unlock(&fi->lock);
+	return NULL;
+}
+
+/*
+ * Dequeue and return an I/O interrupt matching any of the interruption
+ * subclasses as designated by the isc mask in cr6 and the schid (if != 0).
+ */
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
-						    u64 cr6, u64 schid)
+						    u64 isc_mask, u32 schid)
+{
+	struct kvm_s390_interrupt_info *inti = NULL;
+	int isc;
+
+	for (isc = 0; isc <= MAX_ISC && !inti; isc++) {
+		if (isc_mask & isc_to_isc_bits(isc))
+			inti = get_io_int(kvm, isc, schid);
+	}
+	return inti;
+}
+
+#define SCCB_MASK 0xFFFFFFF8
+#define SCCB_EVENT_PENDING 0x3
+
+static int __inject_service(struct kvm *kvm,
+			     struct kvm_s390_interrupt_info *inti)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+	spin_lock(&fi->lock);
+	fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_EVENT_PENDING;
+	/*
+	 * Early versions of the QEMU s390 bios will inject several
+	 * service interrupts after another without handling a
+	 * condition code indicating busy.
+	 * We will silently ignore those superfluous sccb values.
+	 * A future version of QEMU will take care of serialization
+	 * of servc requests
+	 */
+	if (fi->srv_signal.ext_params & SCCB_MASK)
+		goto out;
+	fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_MASK;
+	set_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+out:
+	spin_unlock(&fi->lock);
+	kfree(inti);
+	return 0;
+}
+
+static int __inject_virtio(struct kvm *kvm,
+			    struct kvm_s390_interrupt_info *inti)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+	spin_lock(&fi->lock);
+	if (fi->counters[FIRQ_CNTR_VIRTIO] >= KVM_S390_MAX_VIRTIO_IRQS) {
+		spin_unlock(&fi->lock);
+		return -EBUSY;
+	}
+	fi->counters[FIRQ_CNTR_VIRTIO] += 1;
+	list_add_tail(&inti->list, &fi->lists[FIRQ_LIST_VIRTIO]);
+	set_bit(IRQ_PEND_VIRTIO, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+	return 0;
+}
+
+static int __inject_pfault_done(struct kvm *kvm,
+				 struct kvm_s390_interrupt_info *inti)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+	spin_lock(&fi->lock);
+	if (fi->counters[FIRQ_CNTR_PFAULT] >=
+		(ASYNC_PF_PER_VCPU * KVM_MAX_VCPUS)) {
+		spin_unlock(&fi->lock);
+		return -EBUSY;
+	}
+	fi->counters[FIRQ_CNTR_PFAULT] += 1;
+	list_add_tail(&inti->list, &fi->lists[FIRQ_LIST_PFAULT]);
+	set_bit(IRQ_PEND_PFAULT_DONE, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+	return 0;
+}
+
+#define CR_PENDING_SUBCLASS 28
+static int __inject_float_mchk(struct kvm *kvm,
+				struct kvm_s390_interrupt_info *inti)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+	spin_lock(&fi->lock);
+	fi->mchk.cr14 |= inti->mchk.cr14 & (1UL << CR_PENDING_SUBCLASS);
+	fi->mchk.mcic |= inti->mchk.mcic;
+	set_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+	kfree(inti);
+	return 0;
+}
+
+static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 {
 	struct kvm_s390_float_interrupt *fi;
-	struct kvm_s390_interrupt_info *inti, *iter;
+	struct list_head *list;
+	int isc;
 
-	if ((!schid && !cr6) || (schid && cr6))
-		return NULL;
 	fi = &kvm->arch.float_int;
 	spin_lock(&fi->lock);
-	inti = NULL;
-	list_for_each_entry(iter, &fi->list, list) {
-		if (!is_ioint(iter->type))
-			continue;
-		if (cr6 &&
-		    ((cr6 & int_word_to_isc_bits(iter->io.io_int_word)) == 0))
-			continue;
-		if (schid) {
-			if (((schid & 0x00000000ffff0000) >> 16) !=
-			    iter->io.subchannel_id)
-				continue;
-			if ((schid & 0x000000000000ffff) !=
-			    iter->io.subchannel_nr)
-				continue;
-		}
-		inti = iter;
-		break;
-	}
-	if (inti) {
-		list_del_init(&inti->list);
-		fi->irq_count--;
+	if (fi->counters[FIRQ_CNTR_IO] >= KVM_S390_MAX_FLOAT_IRQS) {
+		spin_unlock(&fi->lock);
+		return -EBUSY;
 	}
-	if (list_empty(&fi->list))
-		atomic_set(&fi->active, 0);
+	fi->counters[FIRQ_CNTR_IO] += 1;
+
+	isc = int_word_to_isc(inti->io.io_int_word);
+	list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
+	list_add_tail(&inti->list, list);
+	set_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs);
 	spin_unlock(&fi->lock);
-	return inti;
+	return 0;
 }
 
 static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 {
 	struct kvm_s390_local_interrupt *li;
 	struct kvm_s390_float_interrupt *fi;
-	struct kvm_s390_interrupt_info *iter;
 	struct kvm_vcpu *dst_vcpu = NULL;
 	int sigcpu;
-	int rc = 0;
+	u64 type = READ_ONCE(inti->type);
+	int rc;
 
 	fi = &kvm->arch.float_int;
-	spin_lock(&fi->lock);
-	if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) {
+
+	switch (type) {
+	case KVM_S390_MCHK:
+		rc = __inject_float_mchk(kvm, inti);
+		break;
+	case KVM_S390_INT_VIRTIO:
+		rc = __inject_virtio(kvm, inti);
+		break;
+	case KVM_S390_INT_SERVICE:
+		rc = __inject_service(kvm, inti);
+		break;
+	case KVM_S390_INT_PFAULT_DONE:
+		rc = __inject_pfault_done(kvm, inti);
+		break;
+	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+		rc = __inject_io(kvm, inti);
+		break;
+	default:
 		rc = -EINVAL;
-		goto unlock_fi;
 	}
-	fi->irq_count++;
-	if (!is_ioint(inti->type)) {
-		list_add_tail(&inti->list, &fi->list);
-	} else {
-		u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word);
+	if (rc)
+		return rc;
 
-		/* Keep I/O interrupts sorted in isc order. */
-		list_for_each_entry(iter, &fi->list, list) {
-			if (!is_ioint(iter->type))
-				continue;
-			if (int_word_to_isc_bits(iter->io.io_int_word)
-			    <= isc_bits)
-				continue;
-			break;
-		}
-		list_add_tail(&inti->list, &iter->list);
-	}
-	atomic_set(&fi->active, 1);
-	if (atomic_read(&kvm->online_vcpus) == 0)
-		goto unlock_fi;
 	sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
 	if (sigcpu == KVM_MAX_VCPUS) {
 		do {
@@ -1257,7 +1384,7 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 	dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
 	li = &dst_vcpu->arch.local_int;
 	spin_lock(&li->lock);
-	switch (inti->type) {
+	switch (type) {
 	case KVM_S390_MCHK:
 		atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
 		break;
@@ -1270,9 +1397,8 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 	}
 	spin_unlock(&li->lock);
 	kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
-unlock_fi:
-	spin_unlock(&fi->lock);
-	return rc;
+	return 0;
+
 }
 
 int kvm_s390_inject_vm(struct kvm *kvm,
@@ -1332,10 +1458,10 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 	return rc;
 }
 
-void kvm_s390_reinject_io_int(struct kvm *kvm,
+int kvm_s390_reinject_io_int(struct kvm *kvm,
 			      struct kvm_s390_interrupt_info *inti)
 {
-	__inject_vm(kvm, inti);
+	return __inject_vm(kvm, inti);
 }
 
 int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
@@ -1388,12 +1514,10 @@ void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu)
 	spin_unlock(&li->lock);
 }
 
-int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+static int do_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 {
-	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	int rc;
 
-	spin_lock(&li->lock);
 	switch (irq->type) {
 	case KVM_S390_PROGRAM_INT:
 		VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
@@ -1433,83 +1557,130 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	default:
 		rc = -EINVAL;
 	}
+
+	return rc;
+}
+
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+
+	spin_lock(&li->lock);
+	rc = do_inject_vcpu(vcpu, irq);
 	spin_unlock(&li->lock);
 	if (!rc)
 		kvm_s390_vcpu_wakeup(vcpu);
 	return rc;
 }
 
-void kvm_s390_clear_float_irqs(struct kvm *kvm)
+static inline void clear_irq_list(struct list_head *_list)
 {
-	struct kvm_s390_float_interrupt *fi;
-	struct kvm_s390_interrupt_info	*n, *inti = NULL;
+	struct kvm_s390_interrupt_info *inti, *n;
 
-	fi = &kvm->arch.float_int;
-	spin_lock(&fi->lock);
-	list_for_each_entry_safe(inti, n, &fi->list, list) {
+	list_for_each_entry_safe(inti, n, _list, list) {
 		list_del(&inti->list);
 		kfree(inti);
 	}
-	fi->irq_count = 0;
-	atomic_set(&fi->active, 0);
-	spin_unlock(&fi->lock);
 }
 
-static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti,
-				   u8 *addr)
+static void inti_to_irq(struct kvm_s390_interrupt_info *inti,
+		       struct kvm_s390_irq *irq)
 {
-	struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
-	struct kvm_s390_irq irq = {0};
-
-	irq.type = inti->type;
+	irq->type = inti->type;
 	switch (inti->type) {
 	case KVM_S390_INT_PFAULT_INIT:
 	case KVM_S390_INT_PFAULT_DONE:
 	case KVM_S390_INT_VIRTIO:
-	case KVM_S390_INT_SERVICE:
-		irq.u.ext = inti->ext;
+		irq->u.ext = inti->ext;
 		break;
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-		irq.u.io = inti->io;
+		irq->u.io = inti->io;
 		break;
-	case KVM_S390_MCHK:
-		irq.u.mchk = inti->mchk;
-		break;
-	default:
-		return -EINVAL;
 	}
+}
 
-	if (copy_to_user(uptr, &irq, sizeof(irq)))
-		return -EFAULT;
+void kvm_s390_clear_float_irqs(struct kvm *kvm)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+	int i;
 
-	return 0;
-}
+	spin_lock(&fi->lock);
+	for (i = 0; i < FIRQ_LIST_COUNT; i++)
+		clear_irq_list(&fi->lists[i]);
+	for (i = 0; i < FIRQ_MAX_COUNT; i++)
+		fi->counters[i] = 0;
+	spin_unlock(&fi->lock);
+};
 
-static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len)
+static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
 {
 	struct kvm_s390_interrupt_info *inti;
 	struct kvm_s390_float_interrupt *fi;
+	struct kvm_s390_irq *buf;
+	struct kvm_s390_irq *irq;
+	int max_irqs;
 	int ret = 0;
 	int n = 0;
+	int i;
+
+	if (len > KVM_S390_FLIC_MAX_BUFFER || len == 0)
+		return -EINVAL;
+
+	/*
+	 * We are already using -ENOMEM to signal
+	 * userspace it may retry with a bigger buffer,
+	 * so we need to use something else for this case
+	 */
+	buf = vzalloc(len);
+	if (!buf)
+		return -ENOBUFS;
+
+	max_irqs = len / sizeof(struct kvm_s390_irq);
 
 	fi = &kvm->arch.float_int;
 	spin_lock(&fi->lock);
-
-	list_for_each_entry(inti, &fi->list, list) {
-		if (len < sizeof(struct kvm_s390_irq)) {
+	for (i = 0; i < FIRQ_LIST_COUNT; i++) {
+		list_for_each_entry(inti, &fi->lists[i], list) {
+			if (n == max_irqs) {
+				/* signal userspace to try again */
+				ret = -ENOMEM;
+				goto out;
+			}
+			inti_to_irq(inti, &buf[n]);
+			n++;
+		}
+	}
+	if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs)) {
+		if (n == max_irqs) {
 			/* signal userspace to try again */
 			ret = -ENOMEM;
-			break;
+			goto out;
 		}
-		ret = copy_irq_to_user(inti, buf);
-		if (ret)
-			break;
-		buf += sizeof(struct kvm_s390_irq);
-		len -= sizeof(struct kvm_s390_irq);
+		irq = (struct kvm_s390_irq *) &buf[n];
+		irq->type = KVM_S390_INT_SERVICE;
+		irq->u.ext = fi->srv_signal;
 		n++;
 	}
+	if (test_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) {
+		if (n == max_irqs) {
+				/* signal userspace to try again */
+				ret = -ENOMEM;
+				goto out;
+		}
+		irq = (struct kvm_s390_irq *) &buf[n];
+		irq->type = KVM_S390_MCHK;
+		irq->u.mchk = fi->mchk;
+		n++;
+}
 
+out:
 	spin_unlock(&fi->lock);
+	if (!ret && n > 0) {
+		if (copy_to_user(usrbuf, buf, sizeof(struct kvm_s390_irq) * n))
+			ret = -EFAULT;
+	}
+	vfree(buf);
 
 	return ret < 0 ? ret : n;
 }
@@ -1520,7 +1691,7 @@ static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 
 	switch (attr->group) {
 	case KVM_DEV_FLIC_GET_ALL_IRQS:
-		r = get_all_floating_irqs(dev->kvm, (u8 *) attr->addr,
+		r = get_all_floating_irqs(dev->kvm, (u8 __user *) attr->addr,
 					  attr->attr);
 		break;
 	default:
@@ -1952,3 +2123,143 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
 {
 	return -EINVAL;
 }
+
+int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu, void __user *irqstate, int len)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_irq *buf;
+	int r = 0;
+	int n;
+
+	buf = vmalloc(len);
+	if (!buf)
+		return -ENOMEM;
+
+	if (copy_from_user((void *) buf, irqstate, len)) {
+		r = -EFAULT;
+		goto out_free;
+	}
+
+	/*
+	 * Don't allow setting the interrupt state
+	 * when there are already interrupts pending
+	 */
+	spin_lock(&li->lock);
+	if (li->pending_irqs) {
+		r = -EBUSY;
+		goto out_unlock;
+	}
+
+	for (n = 0; n < len / sizeof(*buf); n++) {
+		r = do_inject_vcpu(vcpu, &buf[n]);
+		if (r)
+			break;
+	}
+
+out_unlock:
+	spin_unlock(&li->lock);
+out_free:
+	vfree(buf);
+
+	return r;
+}
+
+static void store_local_irq(struct kvm_s390_local_interrupt *li,
+			    struct kvm_s390_irq *irq,
+			    unsigned long irq_type)
+{
+	switch (irq_type) {
+	case IRQ_PEND_MCHK_EX:
+	case IRQ_PEND_MCHK_REP:
+		irq->type = KVM_S390_MCHK;
+		irq->u.mchk = li->irq.mchk;
+		break;
+	case IRQ_PEND_PROG:
+		irq->type = KVM_S390_PROGRAM_INT;
+		irq->u.pgm = li->irq.pgm;
+		break;
+	case IRQ_PEND_PFAULT_INIT:
+		irq->type = KVM_S390_INT_PFAULT_INIT;
+		irq->u.ext = li->irq.ext;
+		break;
+	case IRQ_PEND_EXT_EXTERNAL:
+		irq->type = KVM_S390_INT_EXTERNAL_CALL;
+		irq->u.extcall = li->irq.extcall;
+		break;
+	case IRQ_PEND_EXT_CLOCK_COMP:
+		irq->type = KVM_S390_INT_CLOCK_COMP;
+		break;
+	case IRQ_PEND_EXT_CPU_TIMER:
+		irq->type = KVM_S390_INT_CPU_TIMER;
+		break;
+	case IRQ_PEND_SIGP_STOP:
+		irq->type = KVM_S390_SIGP_STOP;
+		irq->u.stop = li->irq.stop;
+		break;
+	case IRQ_PEND_RESTART:
+		irq->type = KVM_S390_RESTART;
+		break;
+	case IRQ_PEND_SET_PREFIX:
+		irq->type = KVM_S390_SIGP_SET_PREFIX;
+		irq->u.prefix = li->irq.prefix;
+		break;
+	}
+}
+
+int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
+{
+	uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+	unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	unsigned long pending_irqs;
+	struct kvm_s390_irq irq;
+	unsigned long irq_type;
+	int cpuaddr;
+	int n = 0;
+
+	spin_lock(&li->lock);
+	pending_irqs = li->pending_irqs;
+	memcpy(&sigp_emerg_pending, &li->sigp_emerg_pending,
+	       sizeof(sigp_emerg_pending));
+	spin_unlock(&li->lock);
+
+	for_each_set_bit(irq_type, &pending_irqs, IRQ_PEND_COUNT) {
+		memset(&irq, 0, sizeof(irq));
+		if (irq_type == IRQ_PEND_EXT_EMERGENCY)
+			continue;
+		if (n + sizeof(irq) > len)
+			return -ENOBUFS;
+		store_local_irq(&vcpu->arch.local_int, &irq, irq_type);
+		if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+			return -EFAULT;
+		n += sizeof(irq);
+	}
+
+	if (test_bit(IRQ_PEND_EXT_EMERGENCY, &pending_irqs)) {
+		for_each_set_bit(cpuaddr, sigp_emerg_pending, KVM_MAX_VCPUS) {
+			memset(&irq, 0, sizeof(irq));
+			if (n + sizeof(irq) > len)
+				return -ENOBUFS;
+			irq.type = KVM_S390_INT_EMERGENCY;
+			irq.u.emerg.code = cpuaddr;
+			if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+				return -EFAULT;
+			n += sizeof(irq);
+		}
+	}
+
+	if ((sigp_ctrl & SIGP_CTRL_C) &&
+	    (atomic_read(&vcpu->arch.sie_block->cpuflags) &
+	     CPUSTAT_ECALL_PEND)) {
+		if (n + sizeof(irq) > len)
+			return -ENOBUFS;
+		memset(&irq, 0, sizeof(irq));
+		irq.type = KVM_S390_INT_EXTERNAL_CALL;
+		irq.u.extcall.code = sigp_ctrl & SIGP_CTRL_SCN_MASK;
+		if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+			return -EFAULT;
+		n += sizeof(irq);
+	}
+
+	return n;
+}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 0c3623927563..afa2bd750ffc 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -25,11 +25,13 @@
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/timer.h>
+#include <linux/vmalloc.h>
 #include <asm/asm-offsets.h>
 #include <asm/lowcore.h>
 #include <asm/pgtable.h>
 #include <asm/nmi.h>
 #include <asm/switch_to.h>
+#include <asm/isc.h>
 #include <asm/sclp.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
@@ -38,6 +40,11 @@
 #include "trace.h"
 #include "trace-s390.h"
 
+#define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
+#define LOCAL_IRQS 32
+#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
+			   (KVM_MAX_VCPUS + LOCAL_IRQS))
+
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
@@ -87,6 +94,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
+	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
@@ -101,8 +109,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 /* upper facilities limit for kvm */
 unsigned long kvm_s390_fac_list_mask[] = {
-	0xff82fffbf4fc2000UL,
-	0x005c000000000000UL,
+	0xffe6fffbfcfdfc40UL,
+	0x205c800000000000UL,
 };
 
 unsigned long kvm_s390_fac_list_mask_size(void)
@@ -165,16 +173,22 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_ONE_REG:
 	case KVM_CAP_ENABLE_CAP:
 	case KVM_CAP_S390_CSS_SUPPORT:
-	case KVM_CAP_IRQFD:
 	case KVM_CAP_IOEVENTFD:
 	case KVM_CAP_DEVICE_CTRL:
 	case KVM_CAP_ENABLE_CAP_VM:
 	case KVM_CAP_S390_IRQCHIP:
 	case KVM_CAP_VM_ATTRIBUTES:
 	case KVM_CAP_MP_STATE:
+	case KVM_CAP_S390_INJECT_IRQ:
 	case KVM_CAP_S390_USER_SIGP:
+	case KVM_CAP_S390_USER_STSI:
+	case KVM_CAP_S390_SKEYS:
+	case KVM_CAP_S390_IRQ_STATE:
 		r = 1;
 		break;
+	case KVM_CAP_S390_MEM_OP:
+		r = MEM_OP_MAX_SIZE;
+		break;
 	case KVM_CAP_NR_VCPUS:
 	case KVM_CAP_MAX_VCPUS:
 		r = KVM_MAX_VCPUS;
@@ -185,6 +199,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_S390_COW:
 		r = MACHINE_HAS_ESOP;
 		break;
+	case KVM_CAP_S390_VECTOR_REGISTERS:
+		r = MACHINE_HAS_VX;
+		break;
 	default:
 		r = 0;
 	}
@@ -265,6 +282,18 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 		kvm->arch.user_sigp = 1;
 		r = 0;
 		break;
+	case KVM_CAP_S390_VECTOR_REGISTERS:
+		if (MACHINE_HAS_VX) {
+			set_kvm_facility(kvm->arch.model.fac->mask, 129);
+			set_kvm_facility(kvm->arch.model.fac->list, 129);
+			r = 0;
+		} else
+			r = -EINVAL;
+		break;
+	case KVM_CAP_S390_USER_STSI:
+		kvm->arch.user_stsi = 1;
+		r = 0;
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -522,7 +551,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 		memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
 		       sizeof(struct cpuid));
 		kvm->arch.model.ibc = proc->ibc;
-		memcpy(kvm->arch.model.fac->kvm, proc->fac_list,
+		memcpy(kvm->arch.model.fac->list, proc->fac_list,
 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
 	} else
 		ret = -EFAULT;
@@ -556,7 +585,7 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 	}
 	memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
 	proc->ibc = kvm->arch.model.ibc;
-	memcpy(&proc->fac_list, kvm->arch.model.fac->kvm, S390_ARCH_FAC_LIST_SIZE_BYTE);
+	memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 		ret = -EFAULT;
 	kfree(proc);
@@ -576,10 +605,10 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 	}
 	get_cpu_id((struct cpuid *) &mach->cpuid);
 	mach->ibc = sclp_get_ibc();
-	memcpy(&mach->fac_mask, kvm_s390_fac_list_mask,
-	       kvm_s390_fac_list_mask_size() * sizeof(u64));
+	memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
+	       S390_ARCH_FAC_LIST_SIZE_BYTE);
 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
-	       S390_ARCH_FAC_LIST_SIZE_U64);
+	       S390_ARCH_FAC_LIST_SIZE_BYTE);
 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 		ret = -EFAULT;
 	kfree(mach);
@@ -709,6 +738,108 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 	return ret;
 }
 
+static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+{
+	uint8_t *keys;
+	uint64_t hva;
+	unsigned long curkey;
+	int i, r = 0;
+
+	if (args->flags != 0)
+		return -EINVAL;
+
+	/* Is this guest using storage keys? */
+	if (!mm_use_skey(current->mm))
+		return KVM_S390_GET_SKEYS_NONE;
+
+	/* Enforce sane limit on memory allocation */
+	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
+		return -EINVAL;
+
+	keys = kmalloc_array(args->count, sizeof(uint8_t),
+			     GFP_KERNEL | __GFP_NOWARN);
+	if (!keys)
+		keys = vmalloc(sizeof(uint8_t) * args->count);
+	if (!keys)
+		return -ENOMEM;
+
+	for (i = 0; i < args->count; i++) {
+		hva = gfn_to_hva(kvm, args->start_gfn + i);
+		if (kvm_is_error_hva(hva)) {
+			r = -EFAULT;
+			goto out;
+		}
+
+		curkey = get_guest_storage_key(current->mm, hva);
+		if (IS_ERR_VALUE(curkey)) {
+			r = curkey;
+			goto out;
+		}
+		keys[i] = curkey;
+	}
+
+	r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
+			 sizeof(uint8_t) * args->count);
+	if (r)
+		r = -EFAULT;
+out:
+	kvfree(keys);
+	return r;
+}
+
+static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+{
+	uint8_t *keys;
+	uint64_t hva;
+	int i, r = 0;
+
+	if (args->flags != 0)
+		return -EINVAL;
+
+	/* Enforce sane limit on memory allocation */
+	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
+		return -EINVAL;
+
+	keys = kmalloc_array(args->count, sizeof(uint8_t),
+			     GFP_KERNEL | __GFP_NOWARN);
+	if (!keys)
+		keys = vmalloc(sizeof(uint8_t) * args->count);
+	if (!keys)
+		return -ENOMEM;
+
+	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
+			   sizeof(uint8_t) * args->count);
+	if (r) {
+		r = -EFAULT;
+		goto out;
+	}
+
+	/* Enable storage key handling for the guest */
+	s390_enable_skey();
+
+	for (i = 0; i < args->count; i++) {
+		hva = gfn_to_hva(kvm, args->start_gfn + i);
+		if (kvm_is_error_hva(hva)) {
+			r = -EFAULT;
+			goto out;
+		}
+
+		/* Lowest order bit is reserved */
+		if (keys[i] & 0x01) {
+			r = -EINVAL;
+			goto out;
+		}
+
+		r = set_guest_storage_key(current->mm, hva,
+					  (unsigned long)keys[i], 0);
+		if (r)
+			goto out;
+	}
+out:
+	kvfree(keys);
+	return r;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg)
 {
@@ -768,6 +899,26 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = kvm_s390_vm_has_attr(kvm, &attr);
 		break;
 	}
+	case KVM_S390_GET_SKEYS: {
+		struct kvm_s390_skeys args;
+
+		r = -EFAULT;
+		if (copy_from_user(&args, argp,
+				   sizeof(struct kvm_s390_skeys)))
+			break;
+		r = kvm_s390_get_skeys(kvm, &args);
+		break;
+	}
+	case KVM_S390_SET_SKEYS: {
+		struct kvm_s390_skeys args;
+
+		r = -EFAULT;
+		if (copy_from_user(&args, argp,
+				   sizeof(struct kvm_s390_skeys)))
+			break;
+		r = kvm_s390_set_skeys(kvm, &args);
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}
@@ -778,15 +929,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
 static int kvm_s390_query_ap_config(u8 *config)
 {
 	u32 fcn_code = 0x04000000UL;
-	u32 cc;
+	u32 cc = 0;
 
+	memset(config, 0, 128);
 	asm volatile(
 		"lgr 0,%1\n"
 		"lgr 2,%2\n"
 		".long 0xb2af0000\n"		/* PQAP(QCI) */
-		"ipm %0\n"
+		"0: ipm %0\n"
 		"srl %0,28\n"
-		: "=r" (cc)
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: "+r" (cc)
 		: "r" (fcn_code), "r" (config)
 		: "cc", "0", "2", "memory"
 	);
@@ -839,9 +993,13 @@ static int kvm_s390_crypto_init(struct kvm *kvm)
 
 	kvm_s390_set_crycb_format(kvm);
 
-	/* Disable AES/DEA protected key functions by default */
-	kvm->arch.crypto.aes_kw = 0;
-	kvm->arch.crypto.dea_kw = 0;
+	/* Enable AES/DEA protected key functions by default */
+	kvm->arch.crypto.aes_kw = 1;
+	kvm->arch.crypto.dea_kw = 1;
+	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
+			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
+	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
+			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 
 	return 0;
 }
@@ -881,53 +1039,43 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
 	if (!kvm->arch.dbf)
-		goto out_nodbf;
+		goto out_err;
 
 	/*
 	 * The architectural maximum amount of facilities is 16 kbit. To store
 	 * this amount, 2 kbyte of memory is required. Thus we need a full
-	 * page to hold the active copy (arch.model.fac->sie) and the current
-	 * facilities set (arch.model.fac->kvm). Its address size has to be
+	 * page to hold the guest facility list (arch.model.fac->list) and the
+	 * facility mask (arch.model.fac->mask). Its address size has to be
 	 * 31 bits and word aligned.
 	 */
 	kvm->arch.model.fac =
-		(struct s390_model_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+		(struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
 	if (!kvm->arch.model.fac)
-		goto out_nofac;
-
-	memcpy(kvm->arch.model.fac->kvm, S390_lowcore.stfle_fac_list,
-	       S390_ARCH_FAC_LIST_SIZE_U64);
-
-	/*
-	 * If this KVM host runs *not* in a LPAR, relax the facility bits
-	 * of the kvm facility mask by all missing facilities. This will allow
-	 * to determine the right CPU model by means of the remaining facilities.
-	 * Live guest migration must prohibit the migration of KVMs running in
-	 * a LPAR to non LPAR hosts.
-	 */
-	if (!MACHINE_IS_LPAR)
-		for (i = 0; i < kvm_s390_fac_list_mask_size(); i++)
-			kvm_s390_fac_list_mask[i] &= kvm->arch.model.fac->kvm[i];
+		goto out_err;
 
-	/*
-	 * Apply the kvm facility mask to limit the kvm supported/tolerated
-	 * facility list.
-	 */
+	/* Populate the facility mask initially. */
+	memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
+	       S390_ARCH_FAC_LIST_SIZE_BYTE);
 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
 		if (i < kvm_s390_fac_list_mask_size())
-			kvm->arch.model.fac->kvm[i] &= kvm_s390_fac_list_mask[i];
+			kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
 		else
-			kvm->arch.model.fac->kvm[i] = 0UL;
+			kvm->arch.model.fac->mask[i] = 0UL;
 	}
 
+	/* Populate the facility list initially. */
+	memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
+	       S390_ARCH_FAC_LIST_SIZE_BYTE);
+
 	kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
 	kvm->arch.model.ibc = sclp_get_ibc() & 0x0fff;
 
 	if (kvm_s390_crypto_init(kvm) < 0)
-		goto out_crypto;
+		goto out_err;
 
 	spin_lock_init(&kvm->arch.float_int.lock);
-	INIT_LIST_HEAD(&kvm->arch.float_int.list);
+	for (i = 0; i < FIRQ_LIST_COUNT; i++)
+		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
 	init_waitqueue_head(&kvm->arch.ipte_wq);
 	mutex_init(&kvm->arch.ipte_mutex);
 
@@ -939,7 +1087,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	} else {
 		kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
 		if (!kvm->arch.gmap)
-			goto out_nogmap;
+			goto out_err;
 		kvm->arch.gmap->private = kvm;
 		kvm->arch.gmap->pfault_enabled = 0;
 	}
@@ -951,15 +1099,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	spin_lock_init(&kvm->arch.start_stop_lock);
 
 	return 0;
-out_nogmap:
+out_err:
 	kfree(kvm->arch.crypto.crycb);
-out_crypto:
 	free_page((unsigned long)kvm->arch.model.fac);
-out_nofac:
 	debug_unregister(kvm->arch.dbf);
-out_nodbf:
 	free_page((unsigned long)(kvm->arch.sca));
-out_err:
 	return rc;
 }
 
@@ -1039,6 +1183,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 				    KVM_SYNC_CRS |
 				    KVM_SYNC_ARCH0 |
 				    KVM_SYNC_PFAULT;
+	if (test_kvm_facility(vcpu->kvm, 129))
+		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
 
 	if (kvm_is_ucontrol(vcpu->kvm))
 		return __kvm_ucontrol_vcpu_init(vcpu);
@@ -1049,10 +1195,18 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
-	save_fp_regs(vcpu->arch.host_fpregs.fprs);
+	if (test_kvm_facility(vcpu->kvm, 129))
+		save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
+	else
+		save_fp_regs(vcpu->arch.host_fpregs.fprs);
 	save_access_regs(vcpu->arch.host_acrs);
-	restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-	restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	if (test_kvm_facility(vcpu->kvm, 129)) {
+		restore_fp_ctl(&vcpu->run->s.regs.fpc);
+		restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+	} else {
+		restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+		restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	}
 	restore_access_regs(vcpu->run->s.regs.acrs);
 	gmap_enable(vcpu->arch.gmap);
 	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
@@ -1062,11 +1216,19 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
 	gmap_disable(vcpu->arch.gmap);
-	save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-	save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	if (test_kvm_facility(vcpu->kvm, 129)) {
+		save_fp_ctl(&vcpu->run->s.regs.fpc);
+		save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+	} else {
+		save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+		save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	}
 	save_access_regs(vcpu->run->s.regs.acrs);
 	restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
-	restore_fp_regs(vcpu->arch.host_fpregs.fprs);
+	if (test_kvm_facility(vcpu->kvm, 129))
+		restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
+	else
+		restore_fp_regs(vcpu->arch.host_fpregs.fprs);
 	restore_access_regs(vcpu->arch.host_acrs);
 }
 
@@ -1134,6 +1296,15 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
+
+	vcpu->arch.cpu_id = model->cpu_id;
+	vcpu->arch.sie_block->ibc = model->ibc;
+	vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
+}
+
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
 	int rc = 0;
@@ -1142,6 +1313,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 						    CPUSTAT_SM |
 						    CPUSTAT_STOPPED |
 						    CPUSTAT_GED);
+	kvm_s390_vcpu_setup_model(vcpu);
+
 	vcpu->arch.sie_block->ecb   = 6;
 	if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
 		vcpu->arch.sie_block->ecb |= 0x10;
@@ -1152,8 +1325,11 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 		vcpu->arch.sie_block->eca |= 1;
 	if (sclp_has_sigpif())
 		vcpu->arch.sie_block->eca |= 0x10000000U;
-	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE |
-				      ICTL_TPROT;
+	if (test_kvm_facility(vcpu->kvm, 129)) {
+		vcpu->arch.sie_block->eca |= 0x00020000;
+		vcpu->arch.sie_block->ecd |= 0x20000000;
+	}
+	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
 
 	if (kvm_s390_cmma_enabled(vcpu->kvm)) {
 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
@@ -1163,13 +1339,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
 
-	mutex_lock(&vcpu->kvm->lock);
-	vcpu->arch.cpu_id = vcpu->kvm->arch.model.cpu_id;
-	memcpy(vcpu->kvm->arch.model.fac->sie, vcpu->kvm->arch.model.fac->kvm,
-	       S390_ARCH_FAC_LIST_SIZE_BYTE);
-	vcpu->arch.sie_block->ibc = vcpu->kvm->arch.model.ibc;
-	mutex_unlock(&vcpu->kvm->lock);
-
 	kvm_s390_vcpu_crypto_setup(vcpu);
 
 	return rc;
@@ -1197,6 +1366,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
 	vcpu->arch.sie_block = &sie_page->sie_block;
 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
+	vcpu->arch.host_vregs = &sie_page->vregs;
 
 	vcpu->arch.sie_block->icpua = id;
 	if (!kvm_is_ucontrol(kvm)) {
@@ -1212,7 +1382,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 		vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
 		set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
 	}
-	vcpu->arch.sie_block->fac = (int) (long) kvm->arch.model.fac->sie;
 
 	spin_lock_init(&vcpu->arch.local_int.lock);
 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
@@ -1732,6 +1901,31 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
+{
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	u8 opcode;
+	int rc;
+
+	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
+	trace_kvm_s390_sie_fault(vcpu);
+
+	/*
+	 * We want to inject an addressing exception, which is defined as a
+	 * suppressing or terminating exception. However, since we came here
+	 * by a DAT access exception, the PSW still points to the faulting
+	 * instruction since DAT exceptions are nullifying. So we've got
+	 * to look up the current opcode to get the length of the instruction
+	 * to be able to forward the PSW.
+	 */
+	rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+	psw->addr = __rewind_psw(*psw, -insn_length(opcode));
+
+	return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+}
+
 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 {
 	int rc = -1;
@@ -1763,11 +1957,8 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 		}
 	}
 
-	if (rc == -1) {
-		VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
-		trace_kvm_s390_sie_fault(vcpu);
-		rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-	}
+	if (rc == -1)
+		rc = vcpu_post_run_fault_in_sie(vcpu);
 
 	memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
 
@@ -1983,6 +2174,35 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
 	return kvm_s390_store_status_unloaded(vcpu, addr);
 }
 
+/*
+ * store additional status at address
+ */
+int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
+					unsigned long gpa)
+{
+	/* Only bits 0-53 are used for address formation */
+	if (!(gpa & ~0x3ff))
+		return 0;
+
+	return write_guest_abs(vcpu, gpa & ~0x3ff,
+			       (void *)&vcpu->run->s.regs.vrs, 512);
+}
+
+int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	if (!test_kvm_facility(vcpu->kvm, 129))
+		return 0;
+
+	/*
+	 * The guest VXRS are in the host VXRs due to the lazy
+	 * copying in vcpu load/put. Let's update our copies before we save
+	 * it into the save area.
+	 */
+	save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+
+	return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
+}
+
 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
 {
 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
@@ -2107,6 +2327,65 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 	return r;
 }
 
+static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
+				  struct kvm_s390_mem_op *mop)
+{
+	void __user *uaddr = (void __user *)mop->buf;
+	void *tmpbuf = NULL;
+	int r, srcu_idx;
+	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
+				    | KVM_S390_MEMOP_F_CHECK_ONLY;
+
+	if (mop->flags & ~supported_flags)
+		return -EINVAL;
+
+	if (mop->size > MEM_OP_MAX_SIZE)
+		return -E2BIG;
+
+	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
+		tmpbuf = vmalloc(mop->size);
+		if (!tmpbuf)
+			return -ENOMEM;
+	}
+
+	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+	switch (mop->op) {
+	case KVM_S390_MEMOP_LOGICAL_READ:
+		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
+			break;
+		}
+		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
+		if (r == 0) {
+			if (copy_to_user(uaddr, tmpbuf, mop->size))
+				r = -EFAULT;
+		}
+		break;
+	case KVM_S390_MEMOP_LOGICAL_WRITE:
+		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
+			break;
+		}
+		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
+			r = -EFAULT;
+			break;
+		}
+		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
+		break;
+	default:
+		r = -EINVAL;
+	}
+
+	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+
+	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
+		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+
+	vfree(tmpbuf);
+	return r;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 {
@@ -2116,6 +2395,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 	long r;
 
 	switch (ioctl) {
+	case KVM_S390_IRQ: {
+		struct kvm_s390_irq s390irq;
+
+		r = -EFAULT;
+		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
+			break;
+		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
+		break;
+	}
 	case KVM_S390_INTERRUPT: {
 		struct kvm_s390_interrupt s390int;
 		struct kvm_s390_irq s390irq;
@@ -2206,6 +2494,47 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
 		break;
 	}
+	case KVM_S390_MEM_OP: {
+		struct kvm_s390_mem_op mem_op;
+
+		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
+			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
+		else
+			r = -EFAULT;
+		break;
+	}
+	case KVM_S390_SET_IRQ_STATE: {
+		struct kvm_s390_irq_state irq_state;
+
+		r = -EFAULT;
+		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
+			break;
+		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
+		    irq_state.len == 0 ||
+		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
+			r = -EINVAL;
+			break;
+		}
+		r = kvm_s390_set_irq_state(vcpu,
+					   (void __user *) irq_state.buf,
+					   irq_state.len);
+		break;
+	}
+	case KVM_S390_GET_IRQ_STATE: {
+		struct kvm_s390_irq_state irq_state;
+
+		r = -EFAULT;
+		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
+			break;
+		if (irq_state.len == 0) {
+			r = -EINVAL;
+			break;
+		}
+		r = kvm_s390_get_irq_state(vcpu,
+					   (__u8 __user *)  irq_state.buf,
+					   irq_state.len);
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 985c2114d7ef..ca108b90ae56 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -70,16 +70,22 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
 	kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
 }
 
-static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu)
+typedef u8 __bitwise ar_t;
+
+static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, ar_t *ar)
 {
 	u32 base2 = vcpu->arch.sie_block->ipb >> 28;
 	u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
 
+	if (ar)
+		*ar = base2;
+
 	return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
 }
 
 static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
-					      u64 *address1, u64 *address2)
+					      u64 *address1, u64 *address2,
+					      ar_t *ar_b1, ar_t *ar_b2)
 {
 	u32 base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
 	u32 disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16;
@@ -88,6 +94,11 @@ static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
 
 	*address1 = (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1;
 	*address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
+
+	if (ar_b1)
+		*ar_b1 = base1;
+	if (ar_b2)
+		*ar_b2 = base2;
 }
 
 static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2)
@@ -98,7 +109,7 @@ static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2
 		*r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
 }
 
-static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)
+static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu, ar_t *ar)
 {
 	u32 base2 = vcpu->arch.sie_block->ipb >> 28;
 	u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
@@ -107,14 +118,20 @@ static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)
 	if (disp2 & 0x80000)
 		disp2+=0xfff00000;
 
+	if (ar)
+		*ar = base2;
+
 	return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + (long)(int)disp2;
 }
 
-static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu)
+static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu, ar_t *ar)
 {
 	u32 base2 = vcpu->arch.sie_block->ipb >> 28;
 	u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
 
+	if (ar)
+		*ar = base2;
+
 	return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
 }
 
@@ -125,10 +142,22 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
 	vcpu->arch.sie_block->gpsw.mask |= cc << 44;
 }
 
-/* test availability of facility in a kvm intance */
+/* test availability of facility in a kvm instance */
 static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr)
 {
-	return __test_facility(nr, kvm->arch.model.fac->kvm);
+	return __test_facility(nr, kvm->arch.model.fac->mask) &&
+		__test_facility(nr, kvm->arch.model.fac->list);
+}
+
+static inline int set_kvm_facility(u64 *fac_list, unsigned long nr)
+{
+	unsigned char *ptr;
+
+	if (nr >= MAX_FACILITY_BIT)
+		return -EINVAL;
+	ptr = (unsigned char *) fac_list + (nr >> 3);
+	*ptr |= (0x80UL >> (nr & 7));
+	return 0;
 }
 
 /* are cpu states controlled by user space */
@@ -149,9 +178,9 @@ int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 				      struct kvm_s390_irq *irq);
 int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
-						    u64 cr6, u64 schid);
-void kvm_s390_reinject_io_int(struct kvm *kvm,
-			      struct kvm_s390_interrupt_info *inti);
+						    u64 isc_mask, u32 schid);
+int kvm_s390_reinject_io_int(struct kvm *kvm,
+			     struct kvm_s390_interrupt_info *inti);
 int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
 
 /* implemented in intercept.c */
@@ -176,7 +205,10 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
 /* implemented in kvm-s390.c */
 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
+					unsigned long addr);
 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
 void s390_vcpu_block(struct kvm_vcpu *vcpu);
@@ -240,6 +272,10 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu);
 extern struct kvm_device_ops kvm_flic_ops;
 int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu);
 void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu);
+int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu,
+			   void __user *buf, int len);
+int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu,
+			   __u8 __user *buf, int len);
 
 /* implemented in guestdbg.c */
 void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index bdd9b5b17e03..d22d8ee1ff9d 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -36,15 +36,16 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
 	struct kvm_vcpu *cpup;
 	s64 hostclk, val;
 	int i, rc;
+	ar_t ar;
 	u64 op2;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	op2 = kvm_s390_get_base_disp_s(vcpu);
+	op2 = kvm_s390_get_base_disp_s(vcpu, &ar);
 	if (op2 & 7)	/* Operand must be on a doubleword boundary */
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-	rc = read_guest(vcpu, op2, &val, sizeof(val));
+	rc = read_guest(vcpu, op2, ar, &val, sizeof(val));
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 
@@ -68,20 +69,21 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
 	u64 operand2;
 	u32 address;
 	int rc;
+	ar_t ar;
 
 	vcpu->stat.instruction_spx++;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	operand2 = kvm_s390_get_base_disp_s(vcpu);
+	operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
 
 	/* must be word boundary */
 	if (operand2 & 3)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	/* get the value */
-	rc = read_guest(vcpu, operand2, &address, sizeof(address));
+	rc = read_guest(vcpu, operand2, ar, &address, sizeof(address));
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 
@@ -107,13 +109,14 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
 	u64 operand2;
 	u32 address;
 	int rc;
+	ar_t ar;
 
 	vcpu->stat.instruction_stpx++;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	operand2 = kvm_s390_get_base_disp_s(vcpu);
+	operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
 
 	/* must be word boundary */
 	if (operand2 & 3)
@@ -122,7 +125,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
 	address = kvm_s390_get_prefix(vcpu);
 
 	/* get the value */
-	rc = write_guest(vcpu, operand2, &address, sizeof(address));
+	rc = write_guest(vcpu, operand2, ar, &address, sizeof(address));
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 
@@ -136,18 +139,19 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
 	u16 vcpu_id = vcpu->vcpu_id;
 	u64 ga;
 	int rc;
+	ar_t ar;
 
 	vcpu->stat.instruction_stap++;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	ga = kvm_s390_get_base_disp_s(vcpu);
+	ga = kvm_s390_get_base_disp_s(vcpu, &ar);
 
 	if (ga & 1)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	rc = write_guest(vcpu, ga, &vcpu_id, sizeof(vcpu_id));
+	rc = write_guest(vcpu, ga, ar, &vcpu_id, sizeof(vcpu_id));
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 
@@ -207,7 +211,7 @@ static int handle_test_block(struct kvm_vcpu *vcpu)
 	kvm_s390_get_regs_rre(vcpu, NULL, &reg2);
 	addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
 	addr = kvm_s390_logical_to_effective(vcpu, addr);
-	if (kvm_s390_check_low_addr_protection(vcpu, addr))
+	if (kvm_s390_check_low_addr_prot_real(vcpu, addr))
 		return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
 	addr = kvm_s390_real_to_abs(vcpu, addr);
 
@@ -229,18 +233,20 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
 	struct kvm_s390_interrupt_info *inti;
 	unsigned long len;
 	u32 tpi_data[3];
-	int cc, rc;
+	int rc;
 	u64 addr;
+	ar_t ar;
 
-	rc = 0;
-	addr = kvm_s390_get_base_disp_s(vcpu);
+	addr = kvm_s390_get_base_disp_s(vcpu, &ar);
 	if (addr & 3)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-	cc = 0;
+
 	inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0);
-	if (!inti)
-		goto no_interrupt;
-	cc = 1;
+	if (!inti) {
+		kvm_s390_set_psw_cc(vcpu, 0);
+		return 0;
+	}
+
 	tpi_data[0] = inti->io.subchannel_id << 16 | inti->io.subchannel_nr;
 	tpi_data[1] = inti->io.io_int_parm;
 	tpi_data[2] = inti->io.io_int_word;
@@ -250,40 +256,51 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
 		 * provided area.
 		 */
 		len = sizeof(tpi_data) - 4;
-		rc = write_guest(vcpu, addr, &tpi_data, len);
-		if (rc)
-			return kvm_s390_inject_prog_cond(vcpu, rc);
+		rc = write_guest(vcpu, addr, ar, &tpi_data, len);
+		if (rc) {
+			rc = kvm_s390_inject_prog_cond(vcpu, rc);
+			goto reinject_interrupt;
+		}
 	} else {
 		/*
 		 * Store the three-word I/O interruption code into
 		 * the appropriate lowcore area.
 		 */
 		len = sizeof(tpi_data);
-		if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len))
+		if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len)) {
+			/* failed writes to the low core are not recoverable */
 			rc = -EFAULT;
+			goto reinject_interrupt;
+		}
 	}
+
+	/* irq was successfully handed to the guest */
+	kfree(inti);
+	kvm_s390_set_psw_cc(vcpu, 1);
+	return 0;
+reinject_interrupt:
 	/*
 	 * If we encounter a problem storing the interruption code, the
 	 * instruction is suppressed from the guest's view: reinject the
 	 * interrupt.
 	 */
-	if (!rc)
+	if (kvm_s390_reinject_io_int(vcpu->kvm, inti)) {
 		kfree(inti);
-	else
-		kvm_s390_reinject_io_int(vcpu->kvm, inti);
-no_interrupt:
-	/* Set condition code and we're done. */
-	if (!rc)
-		kvm_s390_set_psw_cc(vcpu, cc);
+		rc = -EFAULT;
+	}
+	/* don't set the cc, a pgm irq was injected or we drop to user space */
 	return rc ? -EFAULT : 0;
 }
 
 static int handle_tsch(struct kvm_vcpu *vcpu)
 {
-	struct kvm_s390_interrupt_info *inti;
+	struct kvm_s390_interrupt_info *inti = NULL;
+	const u64 isc_mask = 0xffUL << 24; /* all iscs set */
 
-	inti = kvm_s390_get_io_int(vcpu->kvm, 0,
-				   vcpu->run->s.regs.gprs[1]);
+	/* a valid schid has at least one bit set */
+	if (vcpu->run->s.regs.gprs[1])
+		inti = kvm_s390_get_io_int(vcpu->kvm, isc_mask,
+					   vcpu->run->s.regs.gprs[1]);
 
 	/*
 	 * Prepare exit to userspace.
@@ -348,7 +365,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
 	 * We need to shift the lower 32 facility bits (bit 0-31) from a u64
 	 * into a u32 memory representation. They will remain bits 0-31.
 	 */
-	fac = *vcpu->kvm->arch.model.fac->sie >> 32;
+	fac = *vcpu->kvm->arch.model.fac->list >> 32;
 	rc = write_guest_lc(vcpu, offsetof(struct _lowcore, stfl_fac_list),
 			    &fac, sizeof(fac));
 	if (rc)
@@ -386,15 +403,16 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
 	psw_compat_t new_psw;
 	u64 addr;
 	int rc;
+	ar_t ar;
 
 	if (gpsw->mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	addr = kvm_s390_get_base_disp_s(vcpu);
+	addr = kvm_s390_get_base_disp_s(vcpu, &ar);
 	if (addr & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw));
+	rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 	if (!(new_psw.mask & PSW32_MASK_BASE))
@@ -412,14 +430,15 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
 	psw_t new_psw;
 	u64 addr;
 	int rc;
+	ar_t ar;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	addr = kvm_s390_get_base_disp_s(vcpu);
+	addr = kvm_s390_get_base_disp_s(vcpu, &ar);
 	if (addr & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-	rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw));
+	rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 	vcpu->arch.sie_block->gpsw = new_psw;
@@ -433,18 +452,19 @@ static int handle_stidp(struct kvm_vcpu *vcpu)
 	u64 stidp_data = vcpu->arch.stidp_data;
 	u64 operand2;
 	int rc;
+	ar_t ar;
 
 	vcpu->stat.instruction_stidp++;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	operand2 = kvm_s390_get_base_disp_s(vcpu);
+	operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
 
 	if (operand2 & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	rc = write_guest(vcpu, operand2, &stidp_data, sizeof(stidp_data));
+	rc = write_guest(vcpu, operand2, ar, &stidp_data, sizeof(stidp_data));
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 
@@ -467,6 +487,7 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
 	for (n = mem->count - 1; n > 0 ; n--)
 		memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0]));
 
+	memset(&mem->vm[0], 0, sizeof(mem->vm[0]));
 	mem->vm[0].cpus_total = cpus;
 	mem->vm[0].cpus_configured = cpus;
 	mem->vm[0].cpus_standby = 0;
@@ -478,6 +499,17 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
 	ASCEBC(mem->vm[0].cpi, 16);
 }
 
+static void insert_stsi_usr_data(struct kvm_vcpu *vcpu, u64 addr, ar_t ar,
+				 u8 fc, u8 sel1, u16 sel2)
+{
+	vcpu->run->exit_reason = KVM_EXIT_S390_STSI;
+	vcpu->run->s390_stsi.addr = addr;
+	vcpu->run->s390_stsi.ar = ar;
+	vcpu->run->s390_stsi.fc = fc;
+	vcpu->run->s390_stsi.sel1 = sel1;
+	vcpu->run->s390_stsi.sel2 = sel2;
+}
+
 static int handle_stsi(struct kvm_vcpu *vcpu)
 {
 	int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28;
@@ -486,6 +518,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
 	unsigned long mem = 0;
 	u64 operand2;
 	int rc = 0;
+	ar_t ar;
 
 	vcpu->stat.instruction_stsi++;
 	VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
@@ -508,7 +541,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
 		return 0;
 	}
 
-	operand2 = kvm_s390_get_base_disp_s(vcpu);
+	operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
 
 	if (operand2 & 0xfff)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -532,16 +565,20 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
 		break;
 	}
 
-	rc = write_guest(vcpu, operand2, (void *)mem, PAGE_SIZE);
+	rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE);
 	if (rc) {
 		rc = kvm_s390_inject_prog_cond(vcpu, rc);
 		goto out;
 	}
+	if (vcpu->kvm->arch.user_stsi) {
+		insert_stsi_usr_data(vcpu, operand2, ar, fc, sel1, sel2);
+		rc = -EREMOTE;
+	}
 	trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
 	free_page(mem);
 	kvm_s390_set_psw_cc(vcpu, 0);
 	vcpu->run->s.regs.gprs[0] = 0;
-	return 0;
+	return rc;
 out_no_data:
 	kvm_s390_set_psw_cc(vcpu, 3);
 out:
@@ -670,7 +707,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 	}
 
 	if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
-		if (kvm_s390_check_low_addr_protection(vcpu, start))
+		if (kvm_s390_check_low_addr_prot_real(vcpu, start))
 			return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
 	}
 
@@ -776,13 +813,14 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
 	int reg, rc, nr_regs;
 	u32 ctl_array[16];
 	u64 ga;
+	ar_t ar;
 
 	vcpu->stat.instruction_lctl++;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	ga = kvm_s390_get_base_disp_rs(vcpu);
+	ga = kvm_s390_get_base_disp_rs(vcpu, &ar);
 
 	if (ga & 3)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -791,7 +829,7 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
 	trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
 
 	nr_regs = ((reg3 - reg1) & 0xf) + 1;
-	rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+	rc = read_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u32));
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 	reg = reg1;
@@ -814,13 +852,14 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
 	int reg, rc, nr_regs;
 	u32 ctl_array[16];
 	u64 ga;
+	ar_t ar;
 
 	vcpu->stat.instruction_stctl++;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	ga = kvm_s390_get_base_disp_rs(vcpu);
+	ga = kvm_s390_get_base_disp_rs(vcpu, &ar);
 
 	if (ga & 3)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -836,7 +875,7 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
 			break;
 		reg = (reg + 1) % 16;
 	} while (1);
-	rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+	rc = write_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u32));
 	return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
 }
 
@@ -847,13 +886,14 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
 	int reg, rc, nr_regs;
 	u64 ctl_array[16];
 	u64 ga;
+	ar_t ar;
 
 	vcpu->stat.instruction_lctlg++;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	ga = kvm_s390_get_base_disp_rsy(vcpu);
+	ga = kvm_s390_get_base_disp_rsy(vcpu, &ar);
 
 	if (ga & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -862,7 +902,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
 	trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
 
 	nr_regs = ((reg3 - reg1) & 0xf) + 1;
-	rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
+	rc = read_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u64));
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 	reg = reg1;
@@ -884,13 +924,14 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
 	int reg, rc, nr_regs;
 	u64 ctl_array[16];
 	u64 ga;
+	ar_t ar;
 
 	vcpu->stat.instruction_stctg++;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	ga = kvm_s390_get_base_disp_rsy(vcpu);
+	ga = kvm_s390_get_base_disp_rsy(vcpu, &ar);
 
 	if (ga & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -906,7 +947,7 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
 			break;
 		reg = (reg + 1) % 16;
 	} while (1);
-	rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
+	rc = write_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u64));
 	return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
 }
 
@@ -931,13 +972,14 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
 	unsigned long hva, gpa;
 	int ret = 0, cc = 0;
 	bool writable;
+	ar_t ar;
 
 	vcpu->stat.instruction_tprot++;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	kvm_s390_get_base_disp_sse(vcpu, &address1, &address2);
+	kvm_s390_get_base_disp_sse(vcpu, &address1, &address2, &ar, NULL);
 
 	/* we only handle the Linux memory detection case:
 	 * access key == 0
@@ -946,11 +988,11 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
 		return -EOPNOTSUPP;
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
 		ipte_lock(vcpu);
-	ret = guest_translate_address(vcpu, address1, &gpa, 1);
+	ret = guest_translate_address(vcpu, address1, ar, &gpa, 1);
 	if (ret == PGM_PROTECTION) {
 		/* Write protected? Try again with read-only... */
 		cc = 1;
-		ret = guest_translate_address(vcpu, address1, &gpa, 0);
+		ret = guest_translate_address(vcpu, address1, ar, &gpa, 0);
 	}
 	if (ret) {
 		if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) {
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 23b1e86b2122..72e58bd2bee7 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -393,6 +393,9 @@ static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code)
 	case SIGP_STORE_STATUS_AT_ADDRESS:
 		vcpu->stat.instruction_sigp_store_status++;
 		break;
+	case SIGP_STORE_ADDITIONAL_STATUS:
+		vcpu->stat.instruction_sigp_store_adtl_status++;
+		break;
 	case SIGP_SET_PREFIX:
 		vcpu->stat.instruction_sigp_prefix++;
 		break;
@@ -431,7 +434,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	order_code = kvm_s390_get_base_disp_rs(vcpu);
+	order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
 	if (handle_sigp_order_in_user_space(vcpu, order_code))
 		return -EOPNOTSUPP;
 
@@ -473,7 +476,7 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
 	int r3 = vcpu->arch.sie_block->ipa & 0x000f;
 	u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
 	struct kvm_vcpu *dest_vcpu;
-	u8 order_code = kvm_s390_get_base_disp_rs(vcpu);
+	u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
 
 	trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
 
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index 653a7ec09ef5..3208d33a48cb 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -10,6 +10,13 @@
 #define TRACE_INCLUDE_FILE trace-s390
 
 /*
+ * The TRACE_SYSTEM_VAR defaults to TRACE_SYSTEM, but must be a
+ * legitimate C variable. It is not exported to user space.
+ */
+#undef TRACE_SYSTEM_VAR
+#define TRACE_SYSTEM_VAR kvm_s390
+
+/*
  * Trace point for the creation of the kvm instance.
  */
 TRACE_EVENT(kvm_s390_create_vm,
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index a01df233856f..0e8fefe5b0ce 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -3,8 +3,7 @@
 #
 
 lib-y += delay.o string.o uaccess.o find.o
-obj-$(CONFIG_32BIT) += div64.o qrnnd.o ucmpdi2.o mem32.o
-obj-$(CONFIG_64BIT) += mem64.o
+obj-y += mem.o
 lib-$(CONFIG_SMP) += spinlock.o
 lib-$(CONFIG_KPROBES) += probes.o
 lib-$(CONFIG_UPROBES) += probes.o
diff --git a/arch/s390/lib/div64.c b/arch/s390/lib/div64.c
deleted file mode 100644
index 261152f83242..000000000000
--- a/arch/s390/lib/div64.c
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- *  __div64_32 implementation for 31 bit.
- *
- *    Copyright IBM Corp. 2006
- *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-
-#ifdef CONFIG_MARCH_G5
-
-/*
- * Function to divide an unsigned 64 bit integer by an unsigned
- * 31 bit integer using signed 64/32 bit division.
- */
-static uint32_t __div64_31(uint64_t *n, uint32_t base)
-{
-	register uint32_t reg2 asm("2");
-	register uint32_t reg3 asm("3");
-	uint32_t *words = (uint32_t *) n;
-	uint32_t tmp;
-
-	/* Special case base==1, remainder = 0, quotient = n */
-	if (base == 1)
-		return 0;
-	/*
-	 * Special case base==0 will cause a fixed point divide exception
-	 * on the dr instruction and may not happen anyway. For the
-	 * following calculation we can assume base > 1. The first
-	 * signed 64 / 32 bit division with an upper half of 0 will
-	 * give the correct upper half of the 64 bit quotient.
-	 */
-	reg2 = 0UL;
-	reg3 = words[0];
-	asm volatile(
-		"	dr	%0,%2\n"
-		: "+d" (reg2), "+d" (reg3) : "d" (base) : "cc" );
-	words[0] = reg3;
-	reg3 = words[1];
-	/*
-	 * To get the lower half of the 64 bit quotient and the 32 bit
-	 * remainder we have to use a little trick. Since we only have
-	 * a signed division the quotient can get too big. To avoid this
-	 * the 64 bit dividend is halved, then the signed division will
-	 * work. Afterwards the quotient and the remainder are doubled.
-	 * If the last bit of the dividend has been one the remainder
-	 * is increased by one then checked against the base. If the
-	 * remainder has overflown subtract base and increase the
-	 * quotient. Simple, no ?
-	 */
-	asm volatile(
-		"	nr	%2,%1\n"
-		"	srdl	%0,1\n"
-		"	dr	%0,%3\n"
-		"	alr	%0,%0\n"
-		"	alr	%1,%1\n"
-		"	alr	%0,%2\n"
-		"	clr	%0,%3\n"
-		"	jl	0f\n"
-		"	slr	%0,%3\n"
-		"	ahi	%1,1\n"
-		"0:\n"
-		: "+d" (reg2), "+d" (reg3), "=d" (tmp)
-		: "d" (base), "2" (1UL) : "cc" );
-	words[1] = reg3;
-	return reg2;
-}
-
-/*
- * Function to divide an unsigned 64 bit integer by an unsigned
- * 32 bit integer using the unsigned 64/31 bit division.
- */
-uint32_t __div64_32(uint64_t *n, uint32_t base)
-{
-	uint32_t r;
-
-	/*
-	 * If the most significant bit of base is set, divide n by
-	 * (base/2). That allows to use 64/31 bit division and gives a
-	 * good approximation of the result: n = (base/2)*q + r. The
-	 * result needs to be corrected with two simple transformations.
-	 * If base is already < 2^31-1 __div64_31 can be used directly.
-	 */
-	r = __div64_31(n, ((signed) base < 0) ? (base/2) : base);
-	if ((signed) base < 0) {
-		uint64_t q = *n;
-		/*
-		 * First transformation:
-		 * n = (base/2)*q + r
-		 *   = ((base/2)*2)*(q/2) + ((q&1) ? (base/2) : 0) + r
-		 * Since r < (base/2), r + (base/2) < base.
-		 * With q1 = (q/2) and r1 = r + ((q&1) ? (base/2) : 0)
-		 * n = ((base/2)*2)*q1 + r1 with r1 < base.
-		 */
-		if (q & 1)
-			r += base/2;
-		q >>= 1;
-		/*
-		 * Second transformation. ((base/2)*2) could have lost the
-		 * last bit.
-		 * n = ((base/2)*2)*q1 + r1
-		 *   = base*q1 - ((base&1) ? q1 : 0) + r1
-		 */
-		if (base & 1) {
-			int64_t rx = r - q;
-			/*
-			 * base is >= 2^31. The worst case for the while
-			 * loop is n=2^64-1 base=2^31+1. That gives a
-			 * maximum for q=(2^64-1)/2^31 = 0x1ffffffff. Since
-			 * base >= 2^31 the loop is finished after a maximum
-			 * of three iterations.
-			 */
-			while (rx < 0) {
-				rx += base;
-				q--;
-			}
-			r = rx;
-		}
-		*n = q;
-	}
-	return r;
-}
-
-#else /* MARCH_G5 */
-
-uint32_t __div64_32(uint64_t *n, uint32_t base)
-{
-	register uint32_t reg2 asm("2");
-	register uint32_t reg3 asm("3");
-	uint32_t *words = (uint32_t *) n;
-
-	reg2 = 0UL;
-	reg3 = words[0];
-	asm volatile(
-		"	dlr	%0,%2\n"
-		: "+d" (reg2), "+d" (reg3) : "d" (base) : "cc" );
-	words[0] = reg3;
-	reg3 = words[1];
-	asm volatile(
-		"	dlr	%0,%2\n"
-		: "+d" (reg2), "+d" (reg3) : "d" (base) : "cc" );
-	words[1] = reg3;
-	return reg2;
-}
-
-#endif /* MARCH_G5 */
diff --git a/arch/s390/lib/mem64.S b/arch/s390/lib/mem.S
index c6d553e85ab1..c6d553e85ab1 100644
--- a/arch/s390/lib/mem64.S
+++ b/arch/s390/lib/mem.S
diff --git a/arch/s390/lib/mem32.S b/arch/s390/lib/mem32.S
deleted file mode 100644
index 14ca9244b615..000000000000
--- a/arch/s390/lib/mem32.S
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * String handling functions.
- *
- * Copyright IBM Corp. 2012
- */
-
-#include <linux/linkage.h>
-
-/*
- * memset implementation
- *
- * This code corresponds to the C construct below. We do distinguish
- * between clearing (c == 0) and setting a memory array (c != 0) simply
- * because nearly all memset invocations in the kernel clear memory and
- * the xc instruction is preferred in such cases.
- *
- * void *memset(void *s, int c, size_t n)
- * {
- *	if (likely(c == 0))
- *		return __builtin_memset(s, 0, n);
- *	return __builtin_memset(s, c, n);
- * }
- */
-ENTRY(memset)
-	basr	%r5,%r0
-.Lmemset_base:
-	ltr	%r4,%r4
-	bzr	%r14
-	ltr	%r3,%r3
-	jnz	.Lmemset_fill
-	ahi	%r4,-1
-	lr	%r3,%r4
-	srl	%r3,8
-	ltr	%r3,%r3
-	lr	%r1,%r2
-	je	.Lmemset_clear_rest
-.Lmemset_clear_loop:
-	xc	0(256,%r1),0(%r1)
-	la	%r1,256(%r1)
-	brct	%r3,.Lmemset_clear_loop
-.Lmemset_clear_rest:
-	ex	%r4,.Lmemset_xc-.Lmemset_base(%r5)
-	br	%r14
-.Lmemset_fill:
-	stc	%r3,0(%r2)
-	chi	%r4,1
-	lr	%r1,%r2
-	ber	%r14
-	ahi	%r4,-2
-	lr	%r3,%r4
-	srl	%r3,8
-	ltr	%r3,%r3
-	je	.Lmemset_fill_rest
-.Lmemset_fill_loop:
-	mvc	1(256,%r1),0(%r1)
-	la	%r1,256(%r1)
-	brct	%r3,.Lmemset_fill_loop
-.Lmemset_fill_rest:
-	ex	%r4,.Lmemset_mvc-.Lmemset_base(%r5)
-	br	%r14
-.Lmemset_xc:
-	xc	0(1,%r1),0(%r1)
-.Lmemset_mvc:
-	mvc	1(1,%r1),0(%r1)
-
-/*
- * memcpy implementation
- *
- * void *memcpy(void *dest, const void *src, size_t n)
- */
-ENTRY(memcpy)
-	basr	%r5,%r0
-.Lmemcpy_base:
-	ltr	%r4,%r4
-	bzr	%r14
-	ahi	%r4,-1
-	lr	%r0,%r4
-	srl	%r0,8
-	ltr	%r0,%r0
-	lr	%r1,%r2
-	jnz	.Lmemcpy_loop
-.Lmemcpy_rest:
-	ex	%r4,.Lmemcpy_mvc-.Lmemcpy_base(%r5)
-	br	%r14
-.Lmemcpy_loop:
-	mvc	0(256,%r1),0(%r3)
-	la	%r1,256(%r1)
-	la	%r3,256(%r3)
-	brct	%r0,.Lmemcpy_loop
-	j	.Lmemcpy_rest
-.Lmemcpy_mvc:
-	mvc	0(1,%r1),0(%r3)
diff --git a/arch/s390/lib/qrnnd.S b/arch/s390/lib/qrnnd.S
deleted file mode 100644
index d321329130ec..000000000000
--- a/arch/s390/lib/qrnnd.S
+++ /dev/null
@@ -1,78 +0,0 @@
-# S/390 __udiv_qrnnd
-
-#include <linux/linkage.h>
-
-# r2 : &__r
-# r3 : upper half of 64 bit word n
-# r4 : lower half of 64 bit word n
-# r5 : divisor d
-# the reminder r of the division is to be stored to &__r and
-# the quotient q is to be returned
-
-	.text
-ENTRY(__udiv_qrnnd)
-	st    %r2,24(%r15)	  # store pointer to reminder for later
-	lr    %r0,%r3		  # reload n
-	lr    %r1,%r4
-	ltr   %r2,%r5		  # reload and test divisor
-	jp    5f
-	# divisor >= 0x80000000
-	srdl  %r0,2		  # n/4
-	srl   %r2,1		  # d/2
-	slr   %r1,%r2		  # special case if last bit of d is set
-	brc   3,0f		  #  (n/4) div (n/2) can overflow by 1
-	ahi   %r0,-1		  #  trick: subtract n/2, then divide
-0:	dr    %r0,%r2		  # signed division
-	ahi   %r1,1		  #  trick part 2: add 1 to the quotient
-	# now (n >> 2) = (d >> 1) * %r1 + %r0
-	lhi   %r3,1
-	nr    %r3,%r1		  # test last bit of q
-	jz    1f
-	alr   %r0,%r2		  # add (d>>1) to r
-1:	srl   %r1,1		  # q >>= 1
-	# now (n >> 2) = (d&-2) * %r1 + %r0
-	lhi   %r3,1
-	nr    %r3,%r5		  # test last bit of d
-	jz    2f
-	slr   %r0,%r1		  # r -= q
-	brc   3,2f		  # borrow ?
-	alr   %r0,%r5		  # r += d
-	ahi   %r1,-1
-2:	# now (n >> 2) = d * %r1 + %r0
-	alr   %r1,%r1		  # q <<= 1
-	alr   %r0,%r0		  # r <<= 1
-	brc   12,3f		  # overflow on r ?
-	slr   %r0,%r5		  # r -= d
-	ahi   %r1,1		  # q += 1
-3:	lhi   %r3,2
-	nr    %r3,%r4		  # test next to last bit of n
-	jz    4f
-	ahi   %r0,1		  # r += 1
-4:	clr   %r0,%r5		  # r >= d ?
-	jl    6f
-	slr   %r0,%r5		  # r -= d
-	ahi   %r1,1		  # q += 1
-	# now (n >> 1) = d * %r1 + %r0
-	j     6f
-5:	# divisor < 0x80000000
-	srdl  %r0,1
-	dr    %r0,%r2		  # signed division
-	# now (n >> 1) = d * %r1 + %r0
-6:	alr   %r1,%r1		  # q <<= 1
-	alr   %r0,%r0		  # r <<= 1
-	brc   12,7f		  # overflow on r ?
-	slr   %r0,%r5		  # r -= d
-	ahi   %r1,1		  # q += 1
-7:	lhi   %r3,1
-	nr    %r3,%r4		  # isolate last bit of n
-	alr   %r0,%r3		  # r += (n & 1)
-	clr   %r0,%r5		  # r >= d ?
-	jl    8f
-	slr   %r0,%r5		  # r -= d
-	ahi   %r1,1		  # q += 1
-8:	# now n = d * %r1 + %r0
-	l     %r2,24(%r15)
-	st    %r0,0(%r2)
-	lr    %r2,%r1
-	br    %r14
-	.end	__udiv_qrnnd
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index 53dd5d7a0c96..4614d415bb58 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -15,20 +15,6 @@
 #include <asm/mmu_context.h>
 #include <asm/facility.h>
 
-#ifndef CONFIG_64BIT
-#define AHI	"ahi"
-#define ALR	"alr"
-#define CLR	"clr"
-#define LHI	"lhi"
-#define SLR	"slr"
-#else
-#define AHI	"aghi"
-#define ALR	"algr"
-#define CLR	"clgr"
-#define LHI	"lghi"
-#define SLR	"slgr"
-#endif
-
 static struct static_key have_mvcos = STATIC_KEY_INIT_FALSE;
 
 static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
@@ -41,29 +27,29 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr
 	asm volatile(
 		"0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n"
 		"9: jz    7f\n"
-		"1:"ALR"  %0,%3\n"
-		"  "SLR"  %1,%3\n"
-		"  "SLR"  %2,%3\n"
+		"1: algr  %0,%3\n"
+		"   slgr  %1,%3\n"
+		"   slgr  %2,%3\n"
 		"   j     0b\n"
 		"2: la    %4,4095(%1)\n"/* %4 = ptr + 4095 */
 		"   nr    %4,%3\n"	/* %4 = (ptr + 4095) & -4096 */
-		"  "SLR"  %4,%1\n"
-		"  "CLR"  %0,%4\n"	/* copy crosses next page boundary? */
+		"   slgr  %4,%1\n"
+		"   clgr  %0,%4\n"	/* copy crosses next page boundary? */
 		"   jnh   4f\n"
 		"3: .insn ss,0xc80000000000,0(%4,%2),0(%1),0\n"
-		"10:"SLR"  %0,%4\n"
-		"  "ALR"  %2,%4\n"
-		"4:"LHI"  %4,-1\n"
-		"  "ALR"  %4,%0\n"	/* copy remaining size, subtract 1 */
+		"10:slgr  %0,%4\n"
+		"   algr  %2,%4\n"
+		"4: lghi  %4,-1\n"
+		"   algr  %4,%0\n"	/* copy remaining size, subtract 1 */
 		"   bras  %3,6f\n"	/* memset loop */
 		"   xc    0(1,%2),0(%2)\n"
 		"5: xc    0(256,%2),0(%2)\n"
 		"   la    %2,256(%2)\n"
-		"6:"AHI"  %4,-256\n"
+		"6: aghi  %4,-256\n"
 		"   jnm   5b\n"
 		"   ex    %4,0(%3)\n"
 		"   j     8f\n"
-		"7:"SLR"  %0,%0\n"
+		"7:slgr  %0,%0\n"
 		"8:\n"
 		EX_TABLE(0b,2b) EX_TABLE(3b,4b) EX_TABLE(9b,2b) EX_TABLE(10b,4b)
 		: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
@@ -82,32 +68,32 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
 		"   sacf  0\n"
 		"0: mvcp  0(%0,%2),0(%1),%3\n"
 		"10:jz    8f\n"
-		"1:"ALR"  %0,%3\n"
+		"1: algr  %0,%3\n"
 		"   la    %1,256(%1)\n"
 		"   la    %2,256(%2)\n"
 		"2: mvcp  0(%0,%2),0(%1),%3\n"
 		"11:jnz   1b\n"
 		"   j     8f\n"
 		"3: la    %4,255(%1)\n"	/* %4 = ptr + 255 */
-		"  "LHI"  %3,-4096\n"
+		"   lghi  %3,-4096\n"
 		"   nr    %4,%3\n"	/* %4 = (ptr + 255) & -4096 */
-		"  "SLR"  %4,%1\n"
-		"  "CLR"  %0,%4\n"	/* copy crosses next page boundary? */
+		"   slgr  %4,%1\n"
+		"   clgr  %0,%4\n"	/* copy crosses next page boundary? */
 		"   jnh   5f\n"
 		"4: mvcp  0(%4,%2),0(%1),%3\n"
-		"12:"SLR"  %0,%4\n"
-		"  "ALR"  %2,%4\n"
-		"5:"LHI"  %4,-1\n"
-		"  "ALR"  %4,%0\n"	/* copy remaining size, subtract 1 */
+		"12:slgr  %0,%4\n"
+		"   algr  %2,%4\n"
+		"5: lghi  %4,-1\n"
+		"   algr  %4,%0\n"	/* copy remaining size, subtract 1 */
 		"   bras  %3,7f\n"	/* memset loop */
 		"   xc    0(1,%2),0(%2)\n"
 		"6: xc    0(256,%2),0(%2)\n"
 		"   la    %2,256(%2)\n"
-		"7:"AHI"  %4,-256\n"
+		"7: aghi  %4,-256\n"
 		"   jnm   6b\n"
 		"   ex    %4,0(%3)\n"
 		"   j     9f\n"
-		"8:"SLR"  %0,%0\n"
+		"8:slgr  %0,%0\n"
 		"9: sacf  768\n"
 		EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,5b)
 		EX_TABLE(10b,3b) EX_TABLE(11b,3b) EX_TABLE(12b,5b)
@@ -134,19 +120,19 @@ static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
 	asm volatile(
 		"0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
 		"6: jz    4f\n"
-		"1:"ALR"  %0,%3\n"
-		"  "SLR"  %1,%3\n"
-		"  "SLR"  %2,%3\n"
+		"1: algr  %0,%3\n"
+		"   slgr  %1,%3\n"
+		"   slgr  %2,%3\n"
 		"   j     0b\n"
 		"2: la    %4,4095(%1)\n"/* %4 = ptr + 4095 */
 		"   nr    %4,%3\n"	/* %4 = (ptr + 4095) & -4096 */
-		"  "SLR"  %4,%1\n"
-		"  "CLR"  %0,%4\n"	/* copy crosses next page boundary? */
+		"   slgr  %4,%1\n"
+		"   clgr  %0,%4\n"	/* copy crosses next page boundary? */
 		"   jnh   5f\n"
 		"3: .insn ss,0xc80000000000,0(%4,%1),0(%2),0\n"
-		"7:"SLR"  %0,%4\n"
+		"7: slgr  %0,%4\n"
 		"   j     5f\n"
-		"4:"SLR"  %0,%0\n"
+		"4: slgr  %0,%0\n"
 		"5:\n"
 		EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b)
 		: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
@@ -165,22 +151,22 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
 		"   sacf  0\n"
 		"0: mvcs  0(%0,%1),0(%2),%3\n"
 		"7: jz    5f\n"
-		"1:"ALR"  %0,%3\n"
+		"1: algr  %0,%3\n"
 		"   la    %1,256(%1)\n"
 		"   la    %2,256(%2)\n"
 		"2: mvcs  0(%0,%1),0(%2),%3\n"
 		"8: jnz   1b\n"
 		"   j     5f\n"
 		"3: la    %4,255(%1)\n" /* %4 = ptr + 255 */
-		"  "LHI"  %3,-4096\n"
+		"   lghi  %3,-4096\n"
 		"   nr    %4,%3\n"	/* %4 = (ptr + 255) & -4096 */
-		"  "SLR"  %4,%1\n"
-		"  "CLR"  %0,%4\n"	/* copy crosses next page boundary? */
+		"   slgr  %4,%1\n"
+		"   clgr  %0,%4\n"	/* copy crosses next page boundary? */
 		"   jnh   6f\n"
 		"4: mvcs  0(%4,%1),0(%2),%3\n"
-		"9:"SLR"  %0,%4\n"
+		"9: slgr  %0,%4\n"
 		"   j     6f\n"
-		"5:"SLR"  %0,%0\n"
+		"5: slgr  %0,%0\n"
 		"6: sacf  768\n"
 		EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b)
 		EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b)
@@ -208,11 +194,11 @@ static inline unsigned long copy_in_user_mvcos(void __user *to, const void __use
 	asm volatile(
 		"0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
 		"   jz	  2f\n"
-		"1:"ALR"  %0,%3\n"
-		"  "SLR"  %1,%3\n"
-		"  "SLR"  %2,%3\n"
+		"1: algr  %0,%3\n"
+		"   slgr  %1,%3\n"
+		"   slgr  %2,%3\n"
 		"   j	  0b\n"
-		"2:"SLR"  %0,%0\n"
+		"2:slgr  %0,%0\n"
 		"3: \n"
 		EX_TABLE(0b,3b)
 		: "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2)
@@ -228,23 +214,23 @@ static inline unsigned long copy_in_user_mvc(void __user *to, const void __user
 	load_kernel_asce();
 	asm volatile(
 		"   sacf  256\n"
-		"  "AHI"  %0,-1\n"
+		"   aghi  %0,-1\n"
 		"   jo	  5f\n"
 		"   bras  %3,3f\n"
-		"0:"AHI"  %0,257\n"
+		"0: aghi  %0,257\n"
 		"1: mvc	  0(1,%1),0(%2)\n"
 		"   la	  %1,1(%1)\n"
 		"   la	  %2,1(%2)\n"
-		"  "AHI"  %0,-1\n"
+		"   aghi  %0,-1\n"
 		"   jnz	  1b\n"
 		"   j	  5f\n"
 		"2: mvc	  0(256,%1),0(%2)\n"
 		"   la	  %1,256(%1)\n"
 		"   la	  %2,256(%2)\n"
-		"3:"AHI"  %0,-256\n"
+		"3: aghi  %0,-256\n"
 		"   jnm	  2b\n"
 		"4: ex	  %0,1b-0b(%3)\n"
-		"5: "SLR"  %0,%0\n"
+		"5: slgr  %0,%0\n"
 		"6: sacf  768\n"
 		EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
 		: "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1)
@@ -269,18 +255,18 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size
 	asm volatile(
 		"0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n"
 		"   jz	  4f\n"
-		"1:"ALR"  %0,%2\n"
-		"  "SLR"  %1,%2\n"
+		"1: algr  %0,%2\n"
+		"   slgr  %1,%2\n"
 		"   j	  0b\n"
 		"2: la	  %3,4095(%1)\n"/* %4 = to + 4095 */
 		"   nr	  %3,%2\n"	/* %4 = (to + 4095) & -4096 */
-		"  "SLR"  %3,%1\n"
-		"  "CLR"  %0,%3\n"	/* copy crosses next page boundary? */
+		"   slgr  %3,%1\n"
+		"   clgr  %0,%3\n"	/* copy crosses next page boundary? */
 		"   jnh	  5f\n"
 		"3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n"
-		"  "SLR"  %0,%3\n"
+		"   slgr  %0,%3\n"
 		"   j	  5f\n"
-		"4:"SLR"  %0,%0\n"
+		"4:slgr  %0,%0\n"
 		"5:\n"
 		EX_TABLE(0b,2b) EX_TABLE(3b,5b)
 		: "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
@@ -295,28 +281,28 @@ static inline unsigned long clear_user_xc(void __user *to, unsigned long size)
 	load_kernel_asce();
 	asm volatile(
 		"   sacf  256\n"
-		"  "AHI"  %0,-1\n"
+		"   aghi  %0,-1\n"
 		"   jo    5f\n"
 		"   bras  %3,3f\n"
 		"   xc    0(1,%1),0(%1)\n"
-		"0:"AHI"  %0,257\n"
+		"0: aghi  %0,257\n"
 		"   la    %2,255(%1)\n" /* %2 = ptr + 255 */
 		"   srl   %2,12\n"
 		"   sll   %2,12\n"	/* %2 = (ptr + 255) & -4096 */
-		"  "SLR"  %2,%1\n"
-		"  "CLR"  %0,%2\n"	/* clear crosses next page boundary? */
+		"   slgr  %2,%1\n"
+		"   clgr  %0,%2\n"	/* clear crosses next page boundary? */
 		"   jnh   5f\n"
-		"  "AHI"  %2,-1\n"
+		"   aghi  %2,-1\n"
 		"1: ex    %2,0(%3)\n"
-		"  "AHI"  %2,1\n"
-		"  "SLR"  %0,%2\n"
+		"   aghi  %2,1\n"
+		"   slgr  %0,%2\n"
 		"   j     5f\n"
 		"2: xc    0(256,%1),0(%1)\n"
 		"   la    %1,256(%1)\n"
-		"3:"AHI"  %0,-256\n"
+		"3: aghi  %0,-256\n"
 		"   jnm   2b\n"
 		"4: ex    %0,0(%3)\n"
-		"5: "SLR"  %0,%0\n"
+		"5: slgr  %0,%0\n"
 		"6: sacf  768\n"
 		EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
 		: "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2)
@@ -341,12 +327,12 @@ static inline unsigned long strnlen_user_srst(const char __user *src,
 	asm volatile(
 		"   la    %2,0(%1)\n"
 		"   la    %3,0(%0,%1)\n"
-		"  "SLR"  %0,%0\n"
+		"   slgr  %0,%0\n"
 		"   sacf  256\n"
 		"0: srst  %3,%2\n"
 		"   jo    0b\n"
 		"   la    %0,1(%3)\n"	/* strnlen_user results includes \0 */
-		"  "SLR"  %0,%1\n"
+		"   slgr  %0,%1\n"
 		"1: sacf  768\n"
 		EX_TABLE(0b,1b)
 		: "+a" (size), "+a" (src), "=a" (tmp1), "=a" (tmp2)
@@ -399,7 +385,7 @@ early_param("uaccess_primary", parse_uaccess_pt);
 
 static int __init uaccess_init(void)
 {
-	if (IS_ENABLED(CONFIG_64BIT) && !uaccess_primary && test_facility(27))
+	if (!uaccess_primary && test_facility(27))
 		static_key_slow_inc(&have_mvcos);
 	return 0;
 }
diff --git a/arch/s390/lib/ucmpdi2.c b/arch/s390/lib/ucmpdi2.c
deleted file mode 100644
index 3e05ff532582..000000000000
--- a/arch/s390/lib/ucmpdi2.c
+++ /dev/null
@@ -1,26 +0,0 @@
-#include <linux/module.h>
-
-union ull_union {
-	unsigned long long ull;
-	struct {
-		unsigned int high;
-		unsigned int low;
-	} ui;
-};
-
-int __ucmpdi2(unsigned long long a, unsigned long long b)
-{
-	union ull_union au = {.ull = a};
-	union ull_union bu = {.ull = b};
-
-	if (au.ui.high < bu.ui.high)
-		return 0;
-	else if (au.ui.high > bu.ui.high)
-		return 2;
-	if (au.ui.low < bu.ui.low)
-		return 0;
-	else if (au.ui.low > bu.ui.low)
-		return 2;
-	return 1;
-}
-EXPORT_SYMBOL(__ucmpdi2);
diff --git a/arch/s390/math-emu/Makefile b/arch/s390/math-emu/Makefile
deleted file mode 100644
index 51d399549f60..000000000000
--- a/arch/s390/math-emu/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# Makefile for the FPU instruction emulation.
-#
-
-obj-$(CONFIG_MATHEMU) := math.o
-
-ccflags-y := -I$(src) -Iinclude/math-emu -w
diff --git a/arch/s390/math-emu/math.c b/arch/s390/math-emu/math.c
deleted file mode 100644
index a6ba0d724335..000000000000
--- a/arch/s390/math-emu/math.c
+++ /dev/null
@@ -1,2255 +0,0 @@
-/*
- *  S390 version
- *    Copyright IBM Corp. 1999, 2001
- *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- *
- * 'math.c' emulates IEEE instructions on a S390 processor
- *          that does not have the IEEE fpu (all processors before G5).
- */
-
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <asm/uaccess.h>
-#include <asm/lowcore.h>
-
-#include <asm/sfp-util.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/single.h>
-#include <math-emu/double.h>
-#include <math-emu/quad.h>
-
-#define FPC_VALID_MASK		0xF8F8FF03
-
-/*
- * I miss a macro to round a floating point number to the
- * nearest integer in the same floating point format.
- */
-#define _FP_TO_FPINT_ROUND(fs, wc, X)					\
-  do {									\
-    switch (X##_c)							\
-      {									\
-      case FP_CLS_NORMAL:						\
-        if (X##_e > _FP_FRACBITS_##fs + _FP_EXPBIAS_##fs)		\
-          { /* floating point number has no bits after the dot. */	\
-          }								\
-        else if (X##_e <= _FP_FRACBITS_##fs + _FP_EXPBIAS_##fs &&	\
-                 X##_e > _FP_EXPBIAS_##fs)				\
-	  { /* some bits before the dot, some after it. */		\
-            _FP_FRAC_SRS_##wc(X, _FP_WFRACBITS_##fs,			\
-                              X##_e - _FP_EXPBIAS_##fs			\
-                              + _FP_FRACBITS_##fs);			\
-	    _FP_ROUND(wc, X);						\
-	    _FP_FRAC_SLL_##wc(X, X##_e - _FP_EXPBIAS_##fs		\
-                              + _FP_FRACBITS_##fs);			\
-          }								\
-        else								\
-          { /* all bits after the dot. */				\
-	    FP_SET_EXCEPTION(FP_EX_INEXACT);				\
-            X##_c = FP_CLS_ZERO;					\
-	  }								\
-        break;								\
-      case FP_CLS_NAN:							\
-      case FP_CLS_INF:							\
-      case FP_CLS_ZERO:							\
-        break;								\
-      }									\
-  } while (0)
-
-#define FP_TO_FPINT_ROUND_S(X)	_FP_TO_FPINT_ROUND(S,1,X)
-#define FP_TO_FPINT_ROUND_D(X)	_FP_TO_FPINT_ROUND(D,2,X)
-#define FP_TO_FPINT_ROUND_Q(X)	_FP_TO_FPINT_ROUND(Q,4,X)
-
-typedef union {
-        long double ld;
-        struct {
-                __u64 high;
-                __u64 low;
-        } w;
-} mathemu_ldcv;
-
-#ifdef CONFIG_SYSCTL
-int sysctl_ieee_emulation_warnings=1;
-#endif
-
-#define mathemu_put_user(x, p) \
-        do { \
-                if (put_user((x),(p))) \
-                        return SIGSEGV; \
-        } while (0)
-
-#define mathemu_get_user(x, p) \
-        do { \
-                if (get_user((x),(p))) \
-                        return SIGSEGV; \
-        } while (0)
-
-#define mathemu_copy_from_user(d, s, n)\
-        do { \
-                if (copy_from_user((d),(s),(n)) != 0) \
-                        return SIGSEGV; \
-        } while (0)
-
-#define mathemu_copy_to_user(d, s, n) \
-        do { \
-                if (copy_to_user((d),(s),(n)) != 0) \
-                        return SIGSEGV; \
-        } while (0)
-
-static void display_emulation_not_implemented(struct pt_regs *regs, char *instr)
-{
-        __u16 *location;
-        
-#ifdef CONFIG_SYSCTL
-        if(sysctl_ieee_emulation_warnings)
-#endif
-        {
-                location = (__u16 *)(regs->psw.addr-S390_lowcore.pgm_ilc);
-                printk("%s ieee fpu instruction not emulated "
-                       "process name: %s pid: %d \n",
-                       instr, current->comm, current->pid);
-                printk("%s's PSW:    %08lx %08lx\n", instr,
-                       (unsigned long) regs->psw.mask,
-                       (unsigned long) location);
-        }
-}
-
-static inline void emu_set_CC (struct pt_regs *regs, int cc)
-{
-        regs->psw.mask = (regs->psw.mask & 0xFFFFCFFF) | ((cc&3) << 12);
-}
-
-/*
- * Set the condition code in the user psw.
- *  0 : Result is zero
- *  1 : Result is less than zero
- *  2 : Result is greater than zero
- *  3 : Result is NaN or INF
- */
-static inline void emu_set_CC_cs(struct pt_regs *regs, int class, int sign)
-{
-        switch (class) {
-        case FP_CLS_NORMAL:
-        case FP_CLS_INF:
-                emu_set_CC(regs, sign ? 1 : 2);
-                break;
-        case FP_CLS_ZERO:
-                emu_set_CC(regs, 0);
-                break;
-        case FP_CLS_NAN:
-                emu_set_CC(regs, 3);
-                break;
-        }
-}
-
-/* Add long double */
-static int emu_axbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_Q(QA); FP_DECL_Q(QB); FP_DECL_Q(QR);
-        FP_DECL_EX;
-	mathemu_ldcv cvt;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        cvt.w.high = current->thread.fp_regs.fprs[rx].ui;
-        cvt.w.low = current->thread.fp_regs.fprs[rx+2].ui;
-        FP_UNPACK_QP(QA, &cvt.ld);
-        cvt.w.high = current->thread.fp_regs.fprs[ry].ui;
-        cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui;
-        FP_UNPACK_QP(QB, &cvt.ld);
-        FP_ADD_Q(QR, QA, QB);
-        FP_PACK_QP(&cvt.ld, QR);
-        current->thread.fp_regs.fprs[rx].ui = cvt.w.high;
-        current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low;
-        emu_set_CC_cs(regs, QR_c, QR_s);
-        return _fex;
-}
-
-/* Add double */
-static int emu_adbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_DP(DA, &current->thread.fp_regs.fprs[rx].d);
-        FP_UNPACK_DP(DB, &current->thread.fp_regs.fprs[ry].d);
-        FP_ADD_D(DR, DA, DB);
-	FP_PACK_DP(&current->thread.fp_regs.fprs[rx].d, DR);
-        emu_set_CC_cs(regs, DR_c, DR_s);
-        return _fex;
-}
-
-/* Add double */
-static int emu_adb (struct pt_regs *regs, int rx, double *val) {
-        FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_DP(DA, &current->thread.fp_regs.fprs[rx].d);
-        FP_UNPACK_DP(DB, val);
-        FP_ADD_D(DR, DA, DB);
-	FP_PACK_DP(&current->thread.fp_regs.fprs[rx].d, DR);
-        emu_set_CC_cs(regs, DR_c, DR_s);
-        return _fex;
-}
-
-/* Add float */
-static int emu_aebr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_SP(SA, &current->thread.fp_regs.fprs[rx].f);
-        FP_UNPACK_SP(SB, &current->thread.fp_regs.fprs[ry].f);
-        FP_ADD_S(SR, SA, SB);
-	FP_PACK_SP(&current->thread.fp_regs.fprs[rx].f, SR);
-        emu_set_CC_cs(regs, SR_c, SR_s);
-        return _fex;
-}
-
-/* Add float */
-static int emu_aeb (struct pt_regs *regs, int rx, float *val) {
-        FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_SP(SA, &current->thread.fp_regs.fprs[rx].f);
-        FP_UNPACK_SP(SB, val);
-        FP_ADD_S(SR, SA, SB);
-	FP_PACK_SP(&current->thread.fp_regs.fprs[rx].f, SR);
-        emu_set_CC_cs(regs, SR_c, SR_s);
-        return _fex;
-}
-
-/* Compare long double */
-static int emu_cxbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_Q(QA); FP_DECL_Q(QB);
-	mathemu_ldcv cvt;
-        int IR;
-
-        cvt.w.high = current->thread.fp_regs.fprs[rx].ui;
-        cvt.w.low = current->thread.fp_regs.fprs[rx+2].ui;
-        FP_UNPACK_RAW_QP(QA, &cvt.ld);
-        cvt.w.high = current->thread.fp_regs.fprs[ry].ui;
-        cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui;
-        FP_UNPACK_RAW_QP(QB, &cvt.ld);
-        FP_CMP_Q(IR, QA, QB, 3);
-        /*
-         * IR == -1 if DA < DB, IR == 0 if DA == DB,
-         * IR == 1 if DA > DB and IR == 3 if unorderded
-         */
-        emu_set_CC(regs, (IR == -1) ? 1 : (IR == 1) ? 2 : IR);
-        return 0;
-}
-
-/* Compare double */
-static int emu_cdbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_D(DA); FP_DECL_D(DB);
-        int IR;
-
-        FP_UNPACK_RAW_DP(DA, &current->thread.fp_regs.fprs[rx].d);
-        FP_UNPACK_RAW_DP(DB, &current->thread.fp_regs.fprs[ry].d);
-        FP_CMP_D(IR, DA, DB, 3);
-        /*
-         * IR == -1 if DA < DB, IR == 0 if DA == DB,
-         * IR == 1 if DA > DB and IR == 3 if unorderded
-         */
-        emu_set_CC(regs, (IR == -1) ? 1 : (IR == 1) ? 2 : IR);
-        return 0;
-}
-
-/* Compare double */
-static int emu_cdb (struct pt_regs *regs, int rx, double *val) {
-        FP_DECL_D(DA); FP_DECL_D(DB);
-        int IR;
-
-        FP_UNPACK_RAW_DP(DA, &current->thread.fp_regs.fprs[rx].d);
-        FP_UNPACK_RAW_DP(DB, val);
-        FP_CMP_D(IR, DA, DB, 3);
-        /*
-         * IR == -1 if DA < DB, IR == 0 if DA == DB,
-         * IR == 1 if DA > DB and IR == 3 if unorderded
-         */
-        emu_set_CC(regs, (IR == -1) ? 1 : (IR == 1) ? 2 : IR);
-        return 0;
-}
-
-/* Compare float */
-static int emu_cebr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_S(SA); FP_DECL_S(SB);
-        int IR;
-
-        FP_UNPACK_RAW_SP(SA, &current->thread.fp_regs.fprs[rx].f);
-        FP_UNPACK_RAW_SP(SB, &current->thread.fp_regs.fprs[ry].f);
-        FP_CMP_S(IR, SA, SB, 3);
-        /*
-         * IR == -1 if DA < DB, IR == 0 if DA == DB,
-         * IR == 1 if DA > DB and IR == 3 if unorderded
-         */
-        emu_set_CC(regs, (IR == -1) ? 1 : (IR == 1) ? 2 : IR);
-        return 0;
-}
-
-/* Compare float */
-static int emu_ceb (struct pt_regs *regs, int rx, float *val) {
-        FP_DECL_S(SA); FP_DECL_S(SB);
-        int IR;
-
-        FP_UNPACK_RAW_SP(SA, &current->thread.fp_regs.fprs[rx].f);
-        FP_UNPACK_RAW_SP(SB, val);
-        FP_CMP_S(IR, SA, SB, 3);
-        /*
-         * IR == -1 if DA < DB, IR == 0 if DA == DB,
-         * IR == 1 if DA > DB and IR == 3 if unorderded
-         */
-        emu_set_CC(regs, (IR == -1) ? 1 : (IR == 1) ? 2 : IR);
-        return 0;
-}
-
-/* Compare and signal long double */
-static int emu_kxbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_Q(QA); FP_DECL_Q(QB);
-        FP_DECL_EX;
-	mathemu_ldcv cvt;
-        int IR;
-
-        cvt.w.high = current->thread.fp_regs.fprs[rx].ui;
-        cvt.w.low = current->thread.fp_regs.fprs[rx+2].ui;
-        FP_UNPACK_RAW_QP(QA, &cvt.ld);
-        cvt.w.high = current->thread.fp_regs.fprs[ry].ui;
-        cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui;
-        FP_UNPACK_QP(QB, &cvt.ld);
-        FP_CMP_Q(IR, QA, QB, 3);
-        /*
-         * IR == -1 if DA < DB, IR == 0 if DA == DB,
-         * IR == 1 if DA > DB and IR == 3 if unorderded
-         */
-        emu_set_CC(regs, (IR == -1) ? 1 : (IR == 1) ? 2 : IR);
-        if (IR == 3)
-                FP_SET_EXCEPTION (FP_EX_INVALID);
-        return _fex;
-}
-
-/* Compare and signal double */
-static int emu_kdbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_D(DA); FP_DECL_D(DB);
-        FP_DECL_EX;
-        int IR;
-
-        FP_UNPACK_RAW_DP(DA, &current->thread.fp_regs.fprs[rx].d);
-        FP_UNPACK_RAW_DP(DB, &current->thread.fp_regs.fprs[ry].d);
-        FP_CMP_D(IR, DA, DB, 3);
-        /*
-         * IR == -1 if DA < DB, IR == 0 if DA == DB,
-         * IR == 1 if DA > DB and IR == 3 if unorderded
-         */
-        emu_set_CC(regs, (IR == -1) ? 1 : (IR == 1) ? 2 : IR);
-        if (IR == 3)
-                FP_SET_EXCEPTION (FP_EX_INVALID);
-        return _fex;
-}
-
-/* Compare and signal double */
-static int emu_kdb (struct pt_regs *regs, int rx, double *val) {
-        FP_DECL_D(DA); FP_DECL_D(DB);
-        FP_DECL_EX;
-        int IR;
-
-        FP_UNPACK_RAW_DP(DA, &current->thread.fp_regs.fprs[rx].d);
-        FP_UNPACK_RAW_DP(DB, val);
-        FP_CMP_D(IR, DA, DB, 3);
-        /*
-         * IR == -1 if DA < DB, IR == 0 if DA == DB,
-         * IR == 1 if DA > DB and IR == 3 if unorderded
-         */
-        emu_set_CC(regs, (IR == -1) ? 1 : (IR == 1) ? 2 : IR);
-        if (IR == 3)
-                FP_SET_EXCEPTION (FP_EX_INVALID);
-        return _fex;
-}
-
-/* Compare and signal float */
-static int emu_kebr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_S(SA); FP_DECL_S(SB);
-        FP_DECL_EX;
-        int IR;
-
-        FP_UNPACK_RAW_SP(SA, &current->thread.fp_regs.fprs[rx].f);
-        FP_UNPACK_RAW_SP(SB, &current->thread.fp_regs.fprs[ry].f);
-        FP_CMP_S(IR, SA, SB, 3);
-        /*
-         * IR == -1 if DA < DB, IR == 0 if DA == DB,
-         * IR == 1 if DA > DB and IR == 3 if unorderded
-         */
-        emu_set_CC(regs, (IR == -1) ? 1 : (IR == 1) ? 2 : IR);
-        if (IR == 3)
-                FP_SET_EXCEPTION (FP_EX_INVALID);
-        return _fex;
-}
-
-/* Compare and signal float */
-static int emu_keb (struct pt_regs *regs, int rx, float *val) {
-        FP_DECL_S(SA); FP_DECL_S(SB);
-        FP_DECL_EX;
-        int IR;
-
-        FP_UNPACK_RAW_SP(SA, &current->thread.fp_regs.fprs[rx].f);
-        FP_UNPACK_RAW_SP(SB, val);
-        FP_CMP_S(IR, SA, SB, 3);
-        /*
-         * IR == -1 if DA < DB, IR == 0 if DA == DB,
-         * IR == 1 if DA > DB and IR == 3 if unorderded
-         */
-        emu_set_CC(regs, (IR == -1) ? 1 : (IR == 1) ? 2 : IR);
-        if (IR == 3)
-                FP_SET_EXCEPTION (FP_EX_INVALID);
-        return _fex;
-}
-
-/* Convert from fixed long double */
-static int emu_cxfbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_Q(QR);
-        FP_DECL_EX;
-	mathemu_ldcv cvt;
-        __s32 si;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        si = regs->gprs[ry];
-        FP_FROM_INT_Q(QR, si, 32, int);
-        FP_PACK_QP(&cvt.ld, QR);
-        current->thread.fp_regs.fprs[rx].ui = cvt.w.high;
-        current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low;
-        return _fex;
-}
-
-/* Convert from fixed double */
-static int emu_cdfbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_D(DR);
-        FP_DECL_EX;
-        __s32 si;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        si = regs->gprs[ry];
-        FP_FROM_INT_D(DR, si, 32, int);
-        FP_PACK_DP(&current->thread.fp_regs.fprs[rx].d, DR);
-        return _fex;
-}
-
-/* Convert from fixed float */
-static int emu_cefbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_S(SR);
-        FP_DECL_EX;
-        __s32 si;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        si = regs->gprs[ry];
-        FP_FROM_INT_S(SR, si, 32, int);
-        FP_PACK_SP(&current->thread.fp_regs.fprs[rx].f, SR);
-        return _fex;
-}
-
-/* Convert to fixed long double */
-static int emu_cfxbr (struct pt_regs *regs, int rx, int ry, int mask) {
-        FP_DECL_Q(QA);
-        FP_DECL_EX;
-	mathemu_ldcv cvt;
-        __s32 si;
-        int mode;
-
-	if (mask == 0)
-		mode = current->thread.fp_regs.fpc & 3;
-	else if (mask == 1)
-		mode = FP_RND_NEAREST;
-	else
-		mode = mask - 4;
-        cvt.w.high = current->thread.fp_regs.fprs[ry].ui;
-        cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui;
-        FP_UNPACK_QP(QA, &cvt.ld);
-        FP_TO_INT_ROUND_Q(si, QA, 32, 1);
-        regs->gprs[rx] = si;
-        emu_set_CC_cs(regs, QA_c, QA_s);
-        return _fex;
-}
-
-/* Convert to fixed double */
-static int emu_cfdbr (struct pt_regs *regs, int rx, int ry, int mask) {
-        FP_DECL_D(DA);
-        FP_DECL_EX;
-        __s32 si;
-        int mode;
-
-	if (mask == 0)
-		mode = current->thread.fp_regs.fpc & 3;
-	else if (mask == 1)
-		mode = FP_RND_NEAREST;
-	else
-		mode = mask - 4;
-        FP_UNPACK_DP(DA, &current->thread.fp_regs.fprs[ry].d);
-        FP_TO_INT_ROUND_D(si, DA, 32, 1);
-        regs->gprs[rx] = si;
-        emu_set_CC_cs(regs, DA_c, DA_s);
-        return _fex;
-}
-
-/* Convert to fixed float */
-static int emu_cfebr (struct pt_regs *regs, int rx, int ry, int mask) {
-        FP_DECL_S(SA);
-        FP_DECL_EX;
-        __s32 si;
-        int mode;
-
-	if (mask == 0)
-		mode = current->thread.fp_regs.fpc & 3;
-	else if (mask == 1)
-		mode = FP_RND_NEAREST;
-	else
-		mode = mask - 4;
-        FP_UNPACK_SP(SA, &current->thread.fp_regs.fprs[ry].f);
-        FP_TO_INT_ROUND_S(si, SA, 32, 1);
-        regs->gprs[rx] = si;
-        emu_set_CC_cs(regs, SA_c, SA_s);
-        return _fex;
-}
-
-/* Divide long double */
-static int emu_dxbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_Q(QA); FP_DECL_Q(QB); FP_DECL_Q(QR);
-        FP_DECL_EX;
-	mathemu_ldcv cvt;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        cvt.w.high = current->thread.fp_regs.fprs[rx].ui;
-        cvt.w.low = current->thread.fp_regs.fprs[rx+2].ui;
-        FP_UNPACK_QP(QA, &cvt.ld);
-        cvt.w.high = current->thread.fp_regs.fprs[ry].ui;
-        cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui;
-        FP_UNPACK_QP(QB, &cvt.ld);
-        FP_DIV_Q(QR, QA, QB);
-        FP_PACK_QP(&cvt.ld, QR);
-        current->thread.fp_regs.fprs[rx].ui = cvt.w.high;
-        current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low;
-        return _fex;
-}
-
-/* Divide double */
-static int emu_ddbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_DP(DA, &current->thread.fp_regs.fprs[rx].d);
-        FP_UNPACK_DP(DB, &current->thread.fp_regs.fprs[ry].d);
-        FP_DIV_D(DR, DA, DB);
-	FP_PACK_DP(&current->thread.fp_regs.fprs[rx].d, DR);
-        return _fex;
-}
-
-/* Divide double */
-static int emu_ddb (struct pt_regs *regs, int rx, double *val) {
-        FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_DP(DA, &current->thread.fp_regs.fprs[rx].d);
-        FP_UNPACK_DP(DB, val);
-        FP_DIV_D(DR, DA, DB);
-	FP_PACK_DP(&current->thread.fp_regs.fprs[rx].d, DR);
-        return _fex;
-}
-
-/* Divide float */
-static int emu_debr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_SP(SA, &current->thread.fp_regs.fprs[rx].f);
-        FP_UNPACK_SP(SB, &current->thread.fp_regs.fprs[ry].f);
-        FP_DIV_S(SR, SA, SB);
-	FP_PACK_SP(&current->thread.fp_regs.fprs[rx].f, SR);
-        return _fex;
-}
-
-/* Divide float */
-static int emu_deb (struct pt_regs *regs, int rx, float *val) {
-        FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_SP(SA, &current->thread.fp_regs.fprs[rx].f);
-        FP_UNPACK_SP(SB, val);
-        FP_DIV_S(SR, SA, SB);
-	FP_PACK_SP(&current->thread.fp_regs.fprs[rx].f, SR);
-        return _fex;
-}
-
-/* Divide to integer double */
-static int emu_didbr (struct pt_regs *regs, int rx, int ry, int mask) {
-        display_emulation_not_implemented(regs, "didbr");
-        return 0;
-}
-
-/* Divide to integer float */
-static int emu_diebr (struct pt_regs *regs, int rx, int ry, int mask) {
-        display_emulation_not_implemented(regs, "diebr");
-        return 0;
-}
-
-/* Extract fpc */
-static int emu_efpc (struct pt_regs *regs, int rx, int ry) {
-        regs->gprs[rx] = current->thread.fp_regs.fpc;
-        return 0;
-}
-
-/* Load and test long double */
-static int emu_ltxbr (struct pt_regs *regs, int rx, int ry) {
-        s390_fp_regs *fp_regs = &current->thread.fp_regs;
-	mathemu_ldcv cvt;
-        FP_DECL_Q(QA);
-        FP_DECL_EX;
-
-        cvt.w.high = current->thread.fp_regs.fprs[ry].ui;
-        cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui;
-        FP_UNPACK_QP(QA, &cvt.ld);
-        fp_regs->fprs[rx].ui = fp_regs->fprs[ry].ui;
-        fp_regs->fprs[rx+2].ui = fp_regs->fprs[ry+2].ui;
-        emu_set_CC_cs(regs, QA_c, QA_s);
-        return _fex;
-}
-
-/* Load and test double */
-static int emu_ltdbr (struct pt_regs *regs, int rx, int ry) {
-        s390_fp_regs *fp_regs = &current->thread.fp_regs;
-        FP_DECL_D(DA);
-        FP_DECL_EX;
-
-        FP_UNPACK_DP(DA, &fp_regs->fprs[ry].d);
-        fp_regs->fprs[rx].ui = fp_regs->fprs[ry].ui;
-        emu_set_CC_cs(regs, DA_c, DA_s);
-        return _fex;
-}
-
-/* Load and test double */
-static int emu_ltebr (struct pt_regs *regs, int rx, int ry) {
-        s390_fp_regs *fp_regs = &current->thread.fp_regs;
-        FP_DECL_S(SA);
-        FP_DECL_EX;
-
-        FP_UNPACK_SP(SA, &fp_regs->fprs[ry].f);
-        fp_regs->fprs[rx].ui = fp_regs->fprs[ry].ui;
-        emu_set_CC_cs(regs, SA_c, SA_s);
-        return _fex;
-}
-
-/* Load complement long double */
-static int emu_lcxbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_Q(QA); FP_DECL_Q(QR);
-        FP_DECL_EX;
-	mathemu_ldcv cvt;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        cvt.w.high = current->thread.fp_regs.fprs[ry].ui;
-        cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui;
-        FP_UNPACK_QP(QA, &cvt.ld);
-	FP_NEG_Q(QR, QA);
-        FP_PACK_QP(&cvt.ld, QR);
-        current->thread.fp_regs.fprs[rx].ui = cvt.w.high;
-        current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low;
-        emu_set_CC_cs(regs, QR_c, QR_s);
-        return _fex;
-}
-
-/* Load complement double */
-static int emu_lcdbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_D(DA); FP_DECL_D(DR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_DP(DA, &current->thread.fp_regs.fprs[ry].d);
-	FP_NEG_D(DR, DA);
-	FP_PACK_DP(&current->thread.fp_regs.fprs[rx].d, DR);
-        emu_set_CC_cs(regs, DR_c, DR_s);
-        return _fex;
-}
-
-/* Load complement float */
-static int emu_lcebr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_S(SA); FP_DECL_S(SR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_SP(SA, &current->thread.fp_regs.fprs[ry].f);
-	FP_NEG_S(SR, SA);
-	FP_PACK_SP(&current->thread.fp_regs.fprs[rx].f, SR);
-        emu_set_CC_cs(regs, SR_c, SR_s);
-        return _fex;
-}
-
-/* Load floating point integer long double */
-static int emu_fixbr (struct pt_regs *regs, int rx, int ry, int mask) {
-        s390_fp_regs *fp_regs = &current->thread.fp_regs;
-        FP_DECL_Q(QA);
-        FP_DECL_EX;
-	mathemu_ldcv cvt;
-        __s32 si;
-        int mode;
-
-	if (mask == 0)
-		mode = fp_regs->fpc & 3;
-	else if (mask == 1)
-		mode = FP_RND_NEAREST;
-	else
-		mode = mask - 4;
-        cvt.w.high = fp_regs->fprs[ry].ui;
-        cvt.w.low = fp_regs->fprs[ry+2].ui;
-        FP_UNPACK_QP(QA, &cvt.ld);
-	FP_TO_FPINT_ROUND_Q(QA);
-	FP_PACK_QP(&cvt.ld, QA);
-	fp_regs->fprs[rx].ui = cvt.w.high;
-	fp_regs->fprs[rx+2].ui = cvt.w.low;
-        return _fex;
-}
-
-/* Load floating point integer double */
-static int emu_fidbr (struct pt_regs *regs, int rx, int ry, int mask) {
-	/* FIXME: rounding mode !! */
-        s390_fp_regs *fp_regs = &current->thread.fp_regs;
-        FP_DECL_D(DA);
-        FP_DECL_EX;
-        __s32 si;
-        int mode;
-
-	if (mask == 0)
-		mode = fp_regs->fpc & 3;
-	else if (mask == 1)
-		mode = FP_RND_NEAREST;
-	else
-		mode = mask - 4;
-        FP_UNPACK_DP(DA, &fp_regs->fprs[ry].d);
-	FP_TO_FPINT_ROUND_D(DA);
-	FP_PACK_DP(&fp_regs->fprs[rx].d, DA);
-        return _fex;
-}
-
-/* Load floating point integer float */
-static int emu_fiebr (struct pt_regs *regs, int rx, int ry, int mask) {
-        s390_fp_regs *fp_regs = &current->thread.fp_regs;
-        FP_DECL_S(SA);
-        FP_DECL_EX;
-        __s32 si;
-        int mode;
-
-	if (mask == 0)
-		mode = fp_regs->fpc & 3;
-	else if (mask == 1)
-		mode = FP_RND_NEAREST;
-	else
-		mode = mask - 4;
-        FP_UNPACK_SP(SA, &fp_regs->fprs[ry].f);
-	FP_TO_FPINT_ROUND_S(SA);
-	FP_PACK_SP(&fp_regs->fprs[rx].f, SA);
-        return _fex;
-}
-
-/* Load lengthened double to long double */
-static int emu_lxdbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_D(DA); FP_DECL_Q(QR);
-	FP_DECL_EX;
-	mathemu_ldcv cvt;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_DP(DA, &current->thread.fp_regs.fprs[ry].d);
-	FP_CONV (Q, D, 4, 2, QR, DA);
-        FP_PACK_QP(&cvt.ld, QR);
-        current->thread.fp_regs.fprs[rx].ui = cvt.w.high;
-        current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low;
-        return _fex;
-}
-
-/* Load lengthened double to long double */
-static int emu_lxdb (struct pt_regs *regs, int rx, double *val) {
-        FP_DECL_D(DA); FP_DECL_Q(QR);
-	FP_DECL_EX;
-	mathemu_ldcv cvt;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_DP(DA, val);
-	FP_CONV (Q, D, 4, 2, QR, DA);
-        FP_PACK_QP(&cvt.ld, QR);
-        current->thread.fp_regs.fprs[rx].ui = cvt.w.high;
-        current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low;
-        return _fex;
-}
-
-/* Load lengthened float to long double */
-static int emu_lxebr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_S(SA); FP_DECL_Q(QR);
-	FP_DECL_EX;
-	mathemu_ldcv cvt;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_SP(SA, &current->thread.fp_regs.fprs[ry].f);
-	FP_CONV (Q, S, 4, 1, QR, SA);
-        FP_PACK_QP(&cvt.ld, QR);
-        current->thread.fp_regs.fprs[rx].ui = cvt.w.high;
-        current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low;
-        return _fex;
-}
-
-/* Load lengthened float to long double */
-static int emu_lxeb (struct pt_regs *regs, int rx, float *val) {
-        FP_DECL_S(SA); FP_DECL_Q(QR);
-	FP_DECL_EX;
-	mathemu_ldcv cvt;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_SP(SA, val);
-	FP_CONV (Q, S, 4, 1, QR, SA);
-        FP_PACK_QP(&cvt.ld, QR);
-        current->thread.fp_regs.fprs[rx].ui = cvt.w.high;
-        current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low;
-        return _fex;
-}
-
-/* Load lengthened float to double */
-static int emu_ldebr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_S(SA); FP_DECL_D(DR);
-	FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_SP(SA, &current->thread.fp_regs.fprs[ry].f);
-	FP_CONV (D, S, 2, 1, DR, SA);
-	FP_PACK_DP(&current->thread.fp_regs.fprs[rx].d, DR);
-        return _fex;
-}
-
-/* Load lengthened float to double */
-static int emu_ldeb (struct pt_regs *regs, int rx, float *val) {
-        FP_DECL_S(SA); FP_DECL_D(DR);
-	FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_SP(SA, val);
-	FP_CONV (D, S, 2, 1, DR, SA);
-	FP_PACK_DP(&current->thread.fp_regs.fprs[rx].d, DR);
-        return _fex;
-}
-
-/* Load negative long double */
-static int emu_lnxbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_Q(QA); FP_DECL_Q(QR);
-	FP_DECL_EX;
-	mathemu_ldcv cvt;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        cvt.w.high = current->thread.fp_regs.fprs[ry].ui;
-        cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui;
-        FP_UNPACK_QP(QA, &cvt.ld);
-        if (QA_s == 0) {
-		FP_NEG_Q(QR, QA);
-		FP_PACK_QP(&cvt.ld, QR);
-		current->thread.fp_regs.fprs[rx].ui = cvt.w.high;
-		current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low;
-	} else {
-		current->thread.fp_regs.fprs[rx].ui =
-			current->thread.fp_regs.fprs[ry].ui;
-		current->thread.fp_regs.fprs[rx+2].ui =
-			current->thread.fp_regs.fprs[ry+2].ui;
-	}
-	emu_set_CC_cs(regs, QR_c, QR_s);
-        return _fex;
-}
-
-/* Load negative double */
-static int emu_lndbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_D(DA); FP_DECL_D(DR);
-	FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_DP(DA, &current->thread.fp_regs.fprs[ry].d);
-        if (DA_s == 0) {
-		FP_NEG_D(DR, DA);
-		FP_PACK_DP(&current->thread.fp_regs.fprs[rx].d, DR);
-	} else
-		current->thread.fp_regs.fprs[rx].ui =
-			current->thread.fp_regs.fprs[ry].ui;
-	emu_set_CC_cs(regs, DR_c, DR_s);
-        return _fex;
-}
-
-/* Load negative float */
-static int emu_lnebr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_S(SA); FP_DECL_S(SR);
-	FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_SP(SA, &current->thread.fp_regs.fprs[ry].f);
-        if (SA_s == 0) {
-		FP_NEG_S(SR, SA);
-		FP_PACK_SP(&current->thread.fp_regs.fprs[rx].f, SR);
-	} else
-		current->thread.fp_regs.fprs[rx].ui =
-			current->thread.fp_regs.fprs[ry].ui;
-	emu_set_CC_cs(regs, SR_c, SR_s);
-        return _fex;
-}
-
-/* Load positive long double */
-static int emu_lpxbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_Q(QA); FP_DECL_Q(QR);
-	FP_DECL_EX;
-	mathemu_ldcv cvt;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        cvt.w.high = current->thread.fp_regs.fprs[ry].ui;
-        cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui;
-        FP_UNPACK_QP(QA, &cvt.ld);
-        if (QA_s != 0) {
-		FP_NEG_Q(QR, QA);
-		FP_PACK_QP(&cvt.ld, QR);
-		current->thread.fp_regs.fprs[rx].ui = cvt.w.high;
-		current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low;
-	} else{
-		current->thread.fp_regs.fprs[rx].ui =
-			current->thread.fp_regs.fprs[ry].ui;
-		current->thread.fp_regs.fprs[rx+2].ui =
-			current->thread.fp_regs.fprs[ry+2].ui;
-	}
-	emu_set_CC_cs(regs, QR_c, QR_s);
-        return _fex;
-}
-
-/* Load positive double */
-static int emu_lpdbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_D(DA); FP_DECL_D(DR);
-	FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_DP(DA, &current->thread.fp_regs.fprs[ry].d);
-        if (DA_s != 0) {
-		FP_NEG_D(DR, DA);
-		FP_PACK_DP(&current->thread.fp_regs.fprs[rx].d, DR);
-	} else
-		current->thread.fp_regs.fprs[rx].ui =
-			current->thread.fp_regs.fprs[ry].ui;
-	emu_set_CC_cs(regs, DR_c, DR_s);
-        return _fex;
-}
-
-/* Load positive float */
-static int emu_lpebr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_S(SA); FP_DECL_S(SR);
-	FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_SP(SA, &current->thread.fp_regs.fprs[ry].f);
-        if (SA_s != 0) {
-		FP_NEG_S(SR, SA);
-		FP_PACK_SP(&current->thread.fp_regs.fprs[rx].f, SR);
-	} else
-		current->thread.fp_regs.fprs[rx].ui =
-			current->thread.fp_regs.fprs[ry].ui;
-	emu_set_CC_cs(regs, SR_c, SR_s);
-        return _fex;
-}
-
-/* Load rounded long double to double */
-static int emu_ldxbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_Q(QA); FP_DECL_D(DR);
-	FP_DECL_EX;
-	mathemu_ldcv cvt;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        cvt.w.high = current->thread.fp_regs.fprs[ry].ui;
-        cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui;
-        FP_UNPACK_QP(QA, &cvt.ld);
-	FP_CONV (D, Q, 2, 4, DR, QA);
-	FP_PACK_DP(&current->thread.fp_regs.fprs[rx].f, DR);
-        return _fex;
-}
-
-/* Load rounded long double to float */
-static int emu_lexbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_Q(QA); FP_DECL_S(SR);
-	FP_DECL_EX;
-	mathemu_ldcv cvt;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        cvt.w.high = current->thread.fp_regs.fprs[ry].ui;
-        cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui;
-        FP_UNPACK_QP(QA, &cvt.ld);
-	FP_CONV (S, Q, 1, 4, SR, QA);
-	FP_PACK_SP(&current->thread.fp_regs.fprs[rx].f, SR);
-        return _fex;
-}
-
-/* Load rounded double to float */
-static int emu_ledbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_D(DA); FP_DECL_S(SR);
-	FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_DP(DA, &current->thread.fp_regs.fprs[ry].d);
-	FP_CONV (S, D, 1, 2, SR, DA);
-	FP_PACK_SP(&current->thread.fp_regs.fprs[rx].f, SR);
-        return _fex;
-}
-
-/* Multiply long double */
-static int emu_mxbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_Q(QA); FP_DECL_Q(QB); FP_DECL_Q(QR);
-        FP_DECL_EX;
-	mathemu_ldcv cvt;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        cvt.w.high = current->thread.fp_regs.fprs[rx].ui;
-        cvt.w.low = current->thread.fp_regs.fprs[rx+2].ui;
-        FP_UNPACK_QP(QA, &cvt.ld);
-        cvt.w.high = current->thread.fp_regs.fprs[ry].ui;
-        cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui;
-        FP_UNPACK_QP(QB, &cvt.ld);
-        FP_MUL_Q(QR, QA, QB);
-        FP_PACK_QP(&cvt.ld, QR);
-        current->thread.fp_regs.fprs[rx].ui = cvt.w.high;
-        current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low;
-        return _fex;
-}
-
-/* Multiply double */
-static int emu_mdbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_DP(DA, &current->thread.fp_regs.fprs[rx].d);
-        FP_UNPACK_DP(DB, &current->thread.fp_regs.fprs[ry].d);
-        FP_MUL_D(DR, DA, DB);
-	FP_PACK_DP(&current->thread.fp_regs.fprs[rx].d, DR);
-        return _fex;
-}
-
-/* Multiply double */
-static int emu_mdb (struct pt_regs *regs, int rx, double *val) {
-        FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_DP(DA, &current->thread.fp_regs.fprs[rx].d);
-        FP_UNPACK_DP(DB, val);
-        FP_MUL_D(DR, DA, DB);
-	FP_PACK_DP(&current->thread.fp_regs.fprs[rx].d, DR);
-        return _fex;
-}
-
-/* Multiply double to long double */
-static int emu_mxdbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_D(DA); FP_DECL_Q(QA); FP_DECL_Q(QB); FP_DECL_Q(QR);
-	FP_DECL_EX;
-	mathemu_ldcv cvt;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_DP(DA, &current->thread.fp_regs.fprs[rx].d);
-	FP_CONV (Q, D, 4, 2, QA, DA);
-        FP_UNPACK_DP(DA, &current->thread.fp_regs.fprs[ry].d);
-	FP_CONV (Q, D, 4, 2, QB, DA);
-        FP_MUL_Q(QR, QA, QB);
-        FP_PACK_QP(&cvt.ld, QR);
-        current->thread.fp_regs.fprs[rx].ui = cvt.w.high;
-        current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low;
-        return _fex;
-}
-
-/* Multiply double to long double */
-static int emu_mxdb (struct pt_regs *regs, int rx, long double *val) {
-        FP_DECL_Q(QA); FP_DECL_Q(QB); FP_DECL_Q(QR);
-        FP_DECL_EX;
-	mathemu_ldcv cvt;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        cvt.w.high = current->thread.fp_regs.fprs[rx].ui;
-        cvt.w.low = current->thread.fp_regs.fprs[rx+2].ui;
-        FP_UNPACK_QP(QA, &cvt.ld);
-        FP_UNPACK_QP(QB, val);
-        FP_MUL_Q(QR, QA, QB);
-        FP_PACK_QP(&cvt.ld, QR);
-        current->thread.fp_regs.fprs[rx].ui = cvt.w.high;
-        current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low;
-        return _fex;
-}
-
-/* Multiply float */
-static int emu_meebr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_SP(SA, &current->thread.fp_regs.fprs[rx].f);
-        FP_UNPACK_SP(SB, &current->thread.fp_regs.fprs[ry].f);
-        FP_MUL_S(SR, SA, SB);
-	FP_PACK_SP(&current->thread.fp_regs.fprs[rx].f, SR);
-        return _fex;
-}
-
-/* Multiply float */
-static int emu_meeb (struct pt_regs *regs, int rx, float *val) {
-        FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_SP(SA, &current->thread.fp_regs.fprs[rx].f);
-        FP_UNPACK_SP(SB, val);
-        FP_MUL_S(SR, SA, SB);
-	FP_PACK_SP(&current->thread.fp_regs.fprs[rx].f, SR);
-        return _fex;
-}
-
-/* Multiply float to double */
-static int emu_mdebr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_S(SA); FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
-	FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_SP(SA, &current->thread.fp_regs.fprs[rx].f);
-	FP_CONV (D, S, 2, 1, DA, SA);
-        FP_UNPACK_SP(SA, &current->thread.fp_regs.fprs[ry].f);
-	FP_CONV (D, S, 2, 1, DB, SA);
-        FP_MUL_D(DR, DA, DB);
-	FP_PACK_DP(&current->thread.fp_regs.fprs[rx].d, DR);
-        return _fex;
-}
-
-/* Multiply float to double */
-static int emu_mdeb (struct pt_regs *regs, int rx, float *val) {
-        FP_DECL_S(SA); FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
-	FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_SP(SA, &current->thread.fp_regs.fprs[rx].f);
-	FP_CONV (D, S, 2, 1, DA, SA);
-        FP_UNPACK_SP(SA, val);
-	FP_CONV (D, S, 2, 1, DB, SA);
-        FP_MUL_D(DR, DA, DB);
-	FP_PACK_DP(&current->thread.fp_regs.fprs[rx].d, DR);
-        return _fex;
-}
-
-/* Multiply and add double */
-static int emu_madbr (struct pt_regs *regs, int rx, int ry, int rz) {
-        FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DC); FP_DECL_D(DR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_DP(DA, &current->thread.fp_regs.fprs[rx].d);
-        FP_UNPACK_DP(DB, &current->thread.fp_regs.fprs[ry].d);
-        FP_UNPACK_DP(DC, &current->thread.fp_regs.fprs[rz].d);
-        FP_MUL_D(DR, DA, DB);
-        FP_ADD_D(DR, DR, DC);
-	FP_PACK_DP(&current->thread.fp_regs.fprs[rz].d, DR);
-        return _fex;
-}
-
-/* Multiply and add double */
-static int emu_madb (struct pt_regs *regs, int rx, double *val, int rz) {
-        FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DC); FP_DECL_D(DR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_DP(DA, &current->thread.fp_regs.fprs[rx].d);
-        FP_UNPACK_DP(DB, val);
-        FP_UNPACK_DP(DC, &current->thread.fp_regs.fprs[rz].d);
-        FP_MUL_D(DR, DA, DB);
-        FP_ADD_D(DR, DR, DC);
-	FP_PACK_DP(&current->thread.fp_regs.fprs[rz].d, DR);
-        return _fex;
-}
-
-/* Multiply and add float */
-static int emu_maebr (struct pt_regs *regs, int rx, int ry, int rz) {
-        FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SC); FP_DECL_S(SR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_SP(SA, &current->thread.fp_regs.fprs[rx].f);
-        FP_UNPACK_SP(SB, &current->thread.fp_regs.fprs[ry].f);
-        FP_UNPACK_SP(SC, &current->thread.fp_regs.fprs[rz].f);
-        FP_MUL_S(SR, SA, SB);
-        FP_ADD_S(SR, SR, SC);
-	FP_PACK_SP(&current->thread.fp_regs.fprs[rz].f, SR);
-        return _fex;
-}
-
-/* Multiply and add float */
-static int emu_maeb (struct pt_regs *regs, int rx, float *val, int rz) {
-        FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SC); FP_DECL_S(SR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_SP(SA, &current->thread.fp_regs.fprs[rx].f);
-        FP_UNPACK_SP(SB, val);
-        FP_UNPACK_SP(SC, &current->thread.fp_regs.fprs[rz].f);
-        FP_MUL_S(SR, SA, SB);
-        FP_ADD_S(SR, SR, SC);
-	FP_PACK_SP(&current->thread.fp_regs.fprs[rz].f, SR);
-        return _fex;
-}
-
-/* Multiply and subtract double */
-static int emu_msdbr (struct pt_regs *regs, int rx, int ry, int rz) {
-        FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DC); FP_DECL_D(DR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_DP(DA, &current->thread.fp_regs.fprs[rx].d);
-        FP_UNPACK_DP(DB, &current->thread.fp_regs.fprs[ry].d);
-        FP_UNPACK_DP(DC, &current->thread.fp_regs.fprs[rz].d);
-        FP_MUL_D(DR, DA, DB);
-        FP_SUB_D(DR, DR, DC);
-	FP_PACK_DP(&current->thread.fp_regs.fprs[rz].d, DR);
-        return _fex;
-}
-
-/* Multiply and subtract double */
-static int emu_msdb (struct pt_regs *regs, int rx, double *val, int rz) {
-        FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DC); FP_DECL_D(DR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_DP(DA, &current->thread.fp_regs.fprs[rx].d);
-        FP_UNPACK_DP(DB, val);
-        FP_UNPACK_DP(DC, &current->thread.fp_regs.fprs[rz].d);
-        FP_MUL_D(DR, DA, DB);
-        FP_SUB_D(DR, DR, DC);
-	FP_PACK_DP(&current->thread.fp_regs.fprs[rz].d, DR);
-        return _fex;
-}
-
-/* Multiply and subtract float */
-static int emu_msebr (struct pt_regs *regs, int rx, int ry, int rz) {
-        FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SC); FP_DECL_S(SR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_SP(SA, &current->thread.fp_regs.fprs[rx].f);
-        FP_UNPACK_SP(SB, &current->thread.fp_regs.fprs[ry].f);
-        FP_UNPACK_SP(SC, &current->thread.fp_regs.fprs[rz].f);
-        FP_MUL_S(SR, SA, SB);
-        FP_SUB_S(SR, SR, SC);
-	FP_PACK_SP(&current->thread.fp_regs.fprs[rz].f, SR);
-        return _fex;
-}
-
-/* Multiply and subtract float */
-static int emu_mseb (struct pt_regs *regs, int rx, float *val, int rz) {
-        FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SC); FP_DECL_S(SR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_SP(SA, &current->thread.fp_regs.fprs[rx].f);
-        FP_UNPACK_SP(SB, val);
-        FP_UNPACK_SP(SC, &current->thread.fp_regs.fprs[rz].f);
-        FP_MUL_S(SR, SA, SB);
-        FP_SUB_S(SR, SR, SC);
-	FP_PACK_SP(&current->thread.fp_regs.fprs[rz].f, SR);
-        return _fex;
-}
-
-/* Set floating point control word */
-static int emu_sfpc (struct pt_regs *regs, int rx, int ry) {
-        __u32 temp;
-
-        temp = regs->gprs[rx];
-        if ((temp & ~FPC_VALID_MASK) != 0)
-		return SIGILL;
-	current->thread.fp_regs.fpc = temp;
-        return 0;
-}
-
-/* Square root long double */
-static int emu_sqxbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_Q(QA); FP_DECL_Q(QR);
-        FP_DECL_EX;
-	mathemu_ldcv cvt;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        cvt.w.high = current->thread.fp_regs.fprs[ry].ui;
-        cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui;
-        FP_UNPACK_QP(QA, &cvt.ld);
-	FP_SQRT_Q(QR, QA);
-        FP_PACK_QP(&cvt.ld, QR);
-        current->thread.fp_regs.fprs[rx].ui = cvt.w.high;
-        current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low;
-        emu_set_CC_cs(regs, QR_c, QR_s);
-        return _fex;
-}
-
-/* Square root double */
-static int emu_sqdbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_D(DA); FP_DECL_D(DR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_DP(DA, &current->thread.fp_regs.fprs[ry].d);
-	FP_SQRT_D(DR, DA);
-	FP_PACK_DP(&current->thread.fp_regs.fprs[rx].d, DR);
-        emu_set_CC_cs(regs, DR_c, DR_s);
-        return _fex;
-}
-
-/* Square root double */
-static int emu_sqdb (struct pt_regs *regs, int rx, double *val) {
-        FP_DECL_D(DA); FP_DECL_D(DR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_DP(DA, val);
-	FP_SQRT_D(DR, DA);
-	FP_PACK_DP(&current->thread.fp_regs.fprs[rx].d, DR);
-        emu_set_CC_cs(regs, DR_c, DR_s);
-        return _fex;
-}
-
-/* Square root float */
-static int emu_sqebr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_S(SA); FP_DECL_S(SR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_SP(SA, &current->thread.fp_regs.fprs[ry].f);
-	FP_SQRT_S(SR, SA);
-	FP_PACK_SP(&current->thread.fp_regs.fprs[rx].f, SR);
-        emu_set_CC_cs(regs, SR_c, SR_s);
-        return _fex;
-}
-
-/* Square root float */
-static int emu_sqeb (struct pt_regs *regs, int rx, float *val) {
-        FP_DECL_S(SA); FP_DECL_S(SR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_SP(SA, val);
-	FP_SQRT_S(SR, SA);
-	FP_PACK_SP(&current->thread.fp_regs.fprs[rx].f, SR);
-        emu_set_CC_cs(regs, SR_c, SR_s);
-        return _fex;
-}
-
-/* Subtract long double */
-static int emu_sxbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_Q(QA); FP_DECL_Q(QB); FP_DECL_Q(QR);
-        FP_DECL_EX;
-	mathemu_ldcv cvt;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        cvt.w.high = current->thread.fp_regs.fprs[rx].ui;
-        cvt.w.low = current->thread.fp_regs.fprs[rx+2].ui;
-        FP_UNPACK_QP(QA, &cvt.ld);
-        cvt.w.high = current->thread.fp_regs.fprs[ry].ui;
-        cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui;
-        FP_UNPACK_QP(QB, &cvt.ld);
-        FP_SUB_Q(QR, QA, QB);
-        FP_PACK_QP(&cvt.ld, QR);
-        current->thread.fp_regs.fprs[rx].ui = cvt.w.high;
-        current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low;
-        emu_set_CC_cs(regs, QR_c, QR_s);
-        return _fex;
-}
-
-/* Subtract double */
-static int emu_sdbr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_DP(DA, &current->thread.fp_regs.fprs[rx].d);
-        FP_UNPACK_DP(DB, &current->thread.fp_regs.fprs[ry].d);
-        FP_SUB_D(DR, DA, DB);
-	FP_PACK_DP(&current->thread.fp_regs.fprs[rx].d, DR);
-        emu_set_CC_cs(regs, DR_c, DR_s);
-        return _fex;
-}
-
-/* Subtract double */
-static int emu_sdb (struct pt_regs *regs, int rx, double *val) {
-        FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_DP(DA, &current->thread.fp_regs.fprs[rx].d);
-        FP_UNPACK_DP(DB, val);
-        FP_SUB_D(DR, DA, DB);
-	FP_PACK_DP(&current->thread.fp_regs.fprs[rx].d, DR);
-        emu_set_CC_cs(regs, DR_c, DR_s);
-        return _fex;
-}
-
-/* Subtract float */
-static int emu_sebr (struct pt_regs *regs, int rx, int ry) {
-        FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_SP(SA, &current->thread.fp_regs.fprs[rx].f);
-        FP_UNPACK_SP(SB, &current->thread.fp_regs.fprs[ry].f);
-        FP_SUB_S(SR, SA, SB);
-	FP_PACK_SP(&current->thread.fp_regs.fprs[rx].f, SR);
-        emu_set_CC_cs(regs, SR_c, SR_s);
-        return _fex;
-}
-
-/* Subtract float */
-static int emu_seb (struct pt_regs *regs, int rx, float *val) {
-        FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
-        FP_DECL_EX;
-        int mode;
-
-	mode = current->thread.fp_regs.fpc & 3;
-        FP_UNPACK_SP(SA, &current->thread.fp_regs.fprs[rx].f);
-        FP_UNPACK_SP(SB, val);
-        FP_SUB_S(SR, SA, SB);
-	FP_PACK_SP(&current->thread.fp_regs.fprs[rx].f, SR);
-        emu_set_CC_cs(regs, SR_c, SR_s);
-        return _fex;
-}
-
-/* Test data class long double */
-static int emu_tcxb (struct pt_regs *regs, int rx, long val) {
-        FP_DECL_Q(QA);
-	mathemu_ldcv cvt;
-	int bit;
-
-        cvt.w.high = current->thread.fp_regs.fprs[rx].ui;
-        cvt.w.low = current->thread.fp_regs.fprs[rx+2].ui;
-        FP_UNPACK_RAW_QP(QA, &cvt.ld);
-	switch (QA_e) {
-	default:
-		bit = 8;		/* normalized number */
-		break;
-	case 0:
-		if (_FP_FRAC_ZEROP_4(QA))
-			bit = 10;	/* zero */
-		else
-			bit = 6;	/* denormalized number */
-		break;
-	case _FP_EXPMAX_Q:
-		if (_FP_FRAC_ZEROP_4(QA))
-			bit = 4;	/* infinity */
-		else if (_FP_FRAC_HIGH_RAW_Q(QA) & _FP_QNANBIT_Q)
-			bit = 2;	/* quiet NAN */
-		else
-			bit = 0;	/* signaling NAN */
-		break;
-	}
-	if (!QA_s)
-		bit++;
-	emu_set_CC(regs, ((__u32) val >> bit) & 1);
-        return 0;
-}
-
-/* Test data class double */
-static int emu_tcdb (struct pt_regs *regs, int rx, long val) {
-        FP_DECL_D(DA);
-	int bit;
-
-        FP_UNPACK_RAW_DP(DA, &current->thread.fp_regs.fprs[rx].d);
-	switch (DA_e) {
-	default:
-		bit = 8;		/* normalized number */
-		break;
-	case 0:
-		if (_FP_FRAC_ZEROP_2(DA))
-			bit = 10;	/* zero */
-		else
-			bit = 6;	/* denormalized number */
-		break;
-	case _FP_EXPMAX_D:
-		if (_FP_FRAC_ZEROP_2(DA))
-			bit = 4;	/* infinity */
-		else if (_FP_FRAC_HIGH_RAW_D(DA) & _FP_QNANBIT_D)
-			bit = 2;	/* quiet NAN */
-		else
-			bit = 0;	/* signaling NAN */
-		break;
-	}
-	if (!DA_s)
-		bit++;
-	emu_set_CC(regs, ((__u32) val >> bit) & 1);
-        return 0;
-}
-
-/* Test data class float */
-static int emu_tceb (struct pt_regs *regs, int rx, long val) {
-        FP_DECL_S(SA);
-	int bit;
-
-        FP_UNPACK_RAW_SP(SA, &current->thread.fp_regs.fprs[rx].f);
-	switch (SA_e) {
-	default:
-		bit = 8;		/* normalized number */
-		break;
-	case 0:
-		if (_FP_FRAC_ZEROP_1(SA))
-			bit = 10;	/* zero */
-		else
-			bit = 6;	/* denormalized number */
-		break;
-	case _FP_EXPMAX_S:
-		if (_FP_FRAC_ZEROP_1(SA))
-			bit = 4;	/* infinity */
-		else if (_FP_FRAC_HIGH_RAW_S(SA) & _FP_QNANBIT_S)
-			bit = 2;	/* quiet NAN */
-		else
-			bit = 0;	/* signaling NAN */
-		break;
-	}
-	if (!SA_s)
-		bit++;
-	emu_set_CC(regs, ((__u32) val >> bit) & 1);
-        return 0;
-}
-
-static inline void emu_load_regd(int reg) {
-	if ((reg&9) != 0)	/* test if reg in {0,2,4,6} */
-                return;
-	asm volatile(		/* load reg from fp_regs.fprs[reg] */
-		"	bras	1,0f\n"
-		"	ld	0,0(%1)\n"
-		"0:	ex	%0,0(1)"
-		: /* no output */
-		: "a" (reg<<4),"a" (&current->thread.fp_regs.fprs[reg].d)
-		: "1");
-}
-
-static inline void emu_load_rege(int reg) {
-	if ((reg&9) != 0)	/* test if reg in {0,2,4,6} */
-                return;
-	asm volatile(		/* load reg from fp_regs.fprs[reg] */
-		"	bras	1,0f\n"
-		"	le	0,0(%1)\n"
-		"0:	ex	%0,0(1)"
-		: /* no output */
-		: "a" (reg<<4), "a" (&current->thread.fp_regs.fprs[reg].f)
-		: "1");
-}
-
-static inline void emu_store_regd(int reg) {
-	if ((reg&9) != 0)	/* test if reg in {0,2,4,6} */
-                return;
-	asm volatile(		/* store reg to fp_regs.fprs[reg] */
-		"	bras	1,0f\n"
-		"	std	0,0(%1)\n"
-		"0:	ex	%0,0(1)"
-		: /* no output */
-		: "a" (reg<<4), "a" (&current->thread.fp_regs.fprs[reg].d)
-		: "1");
-}
-
-
-static inline void emu_store_rege(int reg) {
-	if ((reg&9) != 0)	/* test if reg in {0,2,4,6} */
-                return;
-	asm volatile(		/* store reg to fp_regs.fprs[reg] */
-		"	bras	1,0f\n"
-		"	ste	0,0(%1)\n"
-		"0:	ex	%0,0(1)"
-		: /* no output */
-		: "a" (reg<<4), "a" (&current->thread.fp_regs.fprs[reg].f)
-		: "1");
-}
-
-int math_emu_b3(__u8 *opcode, struct pt_regs * regs) {
-        int _fex = 0;
-        static const __u8 format_table[256] = {
-                [0x00] = 0x03,[0x01] = 0x03,[0x02] = 0x03,[0x03] = 0x03,
-		[0x04] = 0x0f,[0x05] = 0x0d,[0x06] = 0x0e,[0x07] = 0x0d,
-		[0x08] = 0x03,[0x09] = 0x03,[0x0a] = 0x03,[0x0b] = 0x03,
-                [0x0c] = 0x0f,[0x0d] = 0x03,[0x0e] = 0x06,[0x0f] = 0x06,
-		[0x10] = 0x02,[0x11] = 0x02,[0x12] = 0x02,[0x13] = 0x02,
-		[0x14] = 0x03,[0x15] = 0x02,[0x16] = 0x01,[0x17] = 0x03,
-                [0x18] = 0x02,[0x19] = 0x02,[0x1a] = 0x02,[0x1b] = 0x02,
-		[0x1c] = 0x02,[0x1d] = 0x02,[0x1e] = 0x05,[0x1f] = 0x05,
-		[0x40] = 0x01,[0x41] = 0x01,[0x42] = 0x01,[0x43] = 0x01,
-                [0x44] = 0x12,[0x45] = 0x0d,[0x46] = 0x11,[0x47] = 0x04,
-		[0x48] = 0x01,[0x49] = 0x01,[0x4a] = 0x01,[0x4b] = 0x01,
-		[0x4c] = 0x01,[0x4d] = 0x01,[0x53] = 0x06,[0x57] = 0x06,
-                [0x5b] = 0x05,[0x5f] = 0x05,[0x84] = 0x13,[0x8c] = 0x13,
-		[0x94] = 0x09,[0x95] = 0x08,[0x96] = 0x07,[0x98] = 0x0c,
-		[0x99] = 0x0b,[0x9a] = 0x0a
-        };
-        static const void *jump_table[256]= {
-                [0x00] = emu_lpebr,[0x01] = emu_lnebr,[0x02] = emu_ltebr,
-                [0x03] = emu_lcebr,[0x04] = emu_ldebr,[0x05] = emu_lxdbr,
-                [0x06] = emu_lxebr,[0x07] = emu_mxdbr,[0x08] = emu_kebr,
-                [0x09] = emu_cebr, [0x0a] = emu_aebr, [0x0b] = emu_sebr,
-                [0x0c] = emu_mdebr,[0x0d] = emu_debr, [0x0e] = emu_maebr,
-                [0x0f] = emu_msebr,[0x10] = emu_lpdbr,[0x11] = emu_lndbr, 
-                [0x12] = emu_ltdbr,[0x13] = emu_lcdbr,[0x14] = emu_sqebr,
-                [0x15] = emu_sqdbr,[0x16] = emu_sqxbr,[0x17] = emu_meebr,
-                [0x18] = emu_kdbr, [0x19] = emu_cdbr, [0x1a] = emu_adbr,
-                [0x1b] = emu_sdbr, [0x1c] = emu_mdbr, [0x1d] = emu_ddbr,  
-                [0x1e] = emu_madbr,[0x1f] = emu_msdbr,[0x40] = emu_lpxbr,
-                [0x41] = emu_lnxbr,[0x42] = emu_ltxbr,[0x43] = emu_lcxbr,
-                [0x44] = emu_ledbr,[0x45] = emu_ldxbr,[0x46] = emu_lexbr,
-                [0x47] = emu_fixbr,[0x48] = emu_kxbr, [0x49] = emu_cxbr,  
-                [0x4a] = emu_axbr, [0x4b] = emu_sxbr, [0x4c] = emu_mxbr,
-                [0x4d] = emu_dxbr, [0x53] = emu_diebr,[0x57] = emu_fiebr,
-                [0x5b] = emu_didbr,[0x5f] = emu_fidbr,[0x84] = emu_sfpc,
-                [0x8c] = emu_efpc, [0x94] = emu_cefbr,[0x95] = emu_cdfbr, 
-                [0x96] = emu_cxfbr,[0x98] = emu_cfebr,[0x99] = emu_cfdbr,
-                [0x9a] = emu_cfxbr
-        };
-
-        switch (format_table[opcode[1]]) {
-        case 1: /* RRE format, long double operation */
-                if (opcode[3] & 0x22)
-			return SIGILL;
-                emu_store_regd((opcode[3] >> 4) & 15);
-                emu_store_regd(((opcode[3] >> 4) & 15) + 2);
-                emu_store_regd(opcode[3] & 15);
-                emu_store_regd((opcode[3] & 15) + 2);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *,int, int))
-			jump_table[opcode[1]])
-                        (regs, opcode[3] >> 4, opcode[3] & 15);
-                emu_load_regd((opcode[3] >> 4) & 15);
-                emu_load_regd(((opcode[3] >> 4) & 15) + 2);
-                emu_load_regd(opcode[3] & 15);
-                emu_load_regd((opcode[3] & 15) + 2);
-		break;
-        case 2: /* RRE format, double operation */
-                emu_store_regd((opcode[3] >> 4) & 15);
-                emu_store_regd(opcode[3] & 15);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, int))
-			jump_table[opcode[1]])
-                        (regs, opcode[3] >> 4, opcode[3] & 15);
-                emu_load_regd((opcode[3] >> 4) & 15);
-                emu_load_regd(opcode[3] & 15);
-		break;
-        case 3: /* RRE format, float operation */
-                emu_store_rege((opcode[3] >> 4) & 15);
-                emu_store_rege(opcode[3] & 15);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, int))
-			jump_table[opcode[1]])
-                        (regs, opcode[3] >> 4, opcode[3] & 15);
-                emu_load_rege((opcode[3] >> 4) & 15);
-                emu_load_rege(opcode[3] & 15);
-		break;
-        case 4: /* RRF format, long double operation */
-                if (opcode[3] & 0x22)
-			return SIGILL;
-                emu_store_regd((opcode[3] >> 4) & 15);
-                emu_store_regd(((opcode[3] >> 4) & 15) + 2);
-                emu_store_regd(opcode[3] & 15);
-                emu_store_regd((opcode[3] & 15) + 2);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, int, int))
-			jump_table[opcode[1]])
-                        (regs, opcode[3] >> 4, opcode[3] & 15, opcode[2] >> 4);
-                emu_load_regd((opcode[3] >> 4) & 15);
-                emu_load_regd(((opcode[3] >> 4) & 15) + 2);
-                emu_load_regd(opcode[3] & 15);
-                emu_load_regd((opcode[3] & 15) + 2);
-		break;
-        case 5: /* RRF format, double operation */
-                emu_store_regd((opcode[2] >> 4) & 15);
-                emu_store_regd((opcode[3] >> 4) & 15);
-                emu_store_regd(opcode[3] & 15);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, int, int))
-			jump_table[opcode[1]])
-                        (regs, opcode[3] >> 4, opcode[3] & 15, opcode[2] >> 4);
-                emu_load_regd((opcode[2] >> 4) & 15);
-                emu_load_regd((opcode[3] >> 4) & 15);
-                emu_load_regd(opcode[3] & 15);
-		break;
-        case 6: /* RRF format, float operation */
-                emu_store_rege((opcode[2] >> 4) & 15);
-                emu_store_rege((opcode[3] >> 4) & 15);
-                emu_store_rege(opcode[3] & 15);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, int, int))
-			jump_table[opcode[1]])
-                        (regs, opcode[3] >> 4, opcode[3] & 15, opcode[2] >> 4);
-                emu_load_rege((opcode[2] >> 4) & 15);
-                emu_load_rege((opcode[3] >> 4) & 15);
-                emu_load_rege(opcode[3] & 15);
-		break;
-        case 7: /* RRE format, cxfbr instruction */
-                /* call the emulation function */
-                if (opcode[3] & 0x20)
-			return SIGILL;
-                _fex = ((int (*)(struct pt_regs *, int, int))
-			jump_table[opcode[1]])
-                        (regs, opcode[3] >> 4, opcode[3] & 15);
-                emu_load_regd((opcode[3] >> 4) & 15);
-                emu_load_regd(((opcode[3] >> 4) & 15) + 2);
-		break;
-        case 8: /* RRE format, cdfbr instruction */
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, int))
-			jump_table[opcode[1]])
-                        (regs, opcode[3] >> 4, opcode[3] & 15);
-                emu_load_regd((opcode[3] >> 4) & 15);
-		break;
-        case 9: /* RRE format, cefbr instruction */
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, int))
-			jump_table[opcode[1]])
-                        (regs, opcode[3] >> 4, opcode[3] & 15);
-                emu_load_rege((opcode[3] >> 4) & 15);
-		break;
-        case 10: /* RRF format, cfxbr instruction */
-                if ((opcode[2] & 128) == 128 || (opcode[2] & 96) == 32)
-			/* mask of { 2,3,8-15 } is invalid */
-			return SIGILL;
-                if (opcode[3] & 2)
-			return SIGILL;
-                emu_store_regd(opcode[3] & 15);
-                emu_store_regd((opcode[3] & 15) + 2);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, int, int))
-			jump_table[opcode[1]])
-                        (regs, opcode[3] >> 4, opcode[3] & 15, opcode[2] >> 4);
-		break;
-        case 11: /* RRF format, cfdbr instruction */
-                if ((opcode[2] & 128) == 128 || (opcode[2] & 96) == 32)
-			/* mask of { 2,3,8-15 } is invalid */
-			return SIGILL;
-                emu_store_regd(opcode[3] & 15);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, int, int))
-			jump_table[opcode[1]])
-                        (regs, opcode[3] >> 4, opcode[3] & 15, opcode[2] >> 4);
-		break;
-        case 12: /* RRF format, cfebr instruction */
-                if ((opcode[2] & 128) == 128 || (opcode[2] & 96) == 32)
-			/* mask of { 2,3,8-15 } is invalid */
-			return SIGILL;
-                emu_store_rege(opcode[3] & 15);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, int, int))
-			jump_table[opcode[1]])
-                        (regs, opcode[3] >> 4, opcode[3] & 15, opcode[2] >> 4);
-		break;
-        case 13: /* RRE format, ldxbr & mdxbr instruction */
-                /* double store but long double load */
-                if (opcode[3] & 0x20)
-			return SIGILL;
-                emu_store_regd((opcode[3] >> 4) & 15);
-                emu_store_regd(opcode[3]  & 15);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, int))
-			jump_table[opcode[1]])
-                        (regs, opcode[3] >> 4, opcode[3] & 15);
-                emu_load_regd((opcode[3] >> 4) & 15);
-                emu_load_regd(((opcode[3] >> 4) & 15) + 2);
-		break;
-        case 14: /* RRE format, ldxbr & mdxbr instruction */
-                /* float store but long double load */
-                if (opcode[3] & 0x20)
-			return SIGILL;
-                emu_store_rege((opcode[3] >> 4) & 15);
-                emu_store_rege(opcode[3]  & 15);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, int))
-			jump_table[opcode[1]])
-                        (regs, opcode[3] >> 4, opcode[3] & 15);
-                emu_load_regd((opcode[3] >> 4) & 15);
-                emu_load_regd(((opcode[3] >> 4) & 15) + 2);
-		break;
-        case 15: /* RRE format, ldebr & mdebr instruction */
-                /* float store but double load */
-                emu_store_rege((opcode[3] >> 4) & 15);
-                emu_store_rege(opcode[3]  & 15);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, int))
-			jump_table[opcode[1]])
-                        (regs, opcode[3] >> 4, opcode[3] & 15);
-                emu_load_regd((opcode[3] >> 4) & 15);
-		break;
-        case 16: /* RRE format, ldxbr instruction */
-                /* long double store but double load */
-                if (opcode[3] & 2)
-			return SIGILL;
-                emu_store_regd(opcode[3] & 15);
-                emu_store_regd((opcode[3] & 15) + 2);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, int))
-			jump_table[opcode[1]])
-                        (regs, opcode[3] >> 4, opcode[3] & 15);
-                emu_load_regd((opcode[3] >> 4) & 15);
-                break;
-        case 17: /* RRE format, ldxbr instruction */
-                /* long double store but float load */
-                if (opcode[3] & 2)
-			return SIGILL;
-                emu_store_regd(opcode[3] & 15);
-                emu_store_regd((opcode[3] & 15) + 2);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, int))
-			jump_table[opcode[1]])
-                        (regs, opcode[3] >> 4, opcode[3] & 15);
-                emu_load_rege((opcode[3] >> 4) & 15);
-                break;
-        case 18: /* RRE format, ledbr instruction */
-                /* double store but float load */
-                emu_store_regd(opcode[3] & 15);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, int))
-			jump_table[opcode[1]])
-                        (regs, opcode[3] >> 4, opcode[3] & 15);
-                emu_load_rege((opcode[3] >> 4) & 15);
-                break;
-        case 19: /* RRE format, efpc & sfpc instruction */
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, int))
-			jump_table[opcode[1]])
-                        (regs, opcode[3] >> 4, opcode[3] & 15);
-                break;
-        default: /* invalid operation */
-                return SIGILL;
-        }
-	if (_fex != 0) {
-		current->thread.fp_regs.fpc |= _fex;
-		if (current->thread.fp_regs.fpc & (_fex << 8))
-			return SIGFPE;
-	}
-	return 0;
-}
-
-static void* calc_addr(struct pt_regs *regs, int rx, int rb, int disp)
-{
-        addr_t addr;
-
-        rx &= 15;
-        rb &= 15;
-        addr = disp & 0xfff;
-        addr += (rx != 0) ? regs->gprs[rx] : 0;  /* + index */
-        addr += (rb != 0) ? regs->gprs[rb] : 0;  /* + base  */
-        return (void*) addr;
-}
-    
-int math_emu_ed(__u8 *opcode, struct pt_regs * regs) {
-        int _fex = 0;
-
-        static const __u8 format_table[256] = {
-                [0x04] = 0x06,[0x05] = 0x05,[0x06] = 0x07,[0x07] = 0x05,
-		[0x08] = 0x02,[0x09] = 0x02,[0x0a] = 0x02,[0x0b] = 0x02,
-		[0x0c] = 0x06,[0x0d] = 0x02,[0x0e] = 0x04,[0x0f] = 0x04,
-                [0x10] = 0x08,[0x11] = 0x09,[0x12] = 0x0a,[0x14] = 0x02,
-		[0x15] = 0x01,[0x17] = 0x02,[0x18] = 0x01,[0x19] = 0x01,
-		[0x1a] = 0x01,[0x1b] = 0x01,[0x1c] = 0x01,[0x1d] = 0x01,
-                [0x1e] = 0x03,[0x1f] = 0x03,
-        };
-        static const void *jump_table[]= {
-                [0x04] = emu_ldeb,[0x05] = emu_lxdb,[0x06] = emu_lxeb,
-                [0x07] = emu_mxdb,[0x08] = emu_keb, [0x09] = emu_ceb,
-                [0x0a] = emu_aeb, [0x0b] = emu_seb, [0x0c] = emu_mdeb,
-                [0x0d] = emu_deb, [0x0e] = emu_maeb,[0x0f] = emu_mseb,
-                [0x10] = emu_tceb,[0x11] = emu_tcdb,[0x12] = emu_tcxb,
-                [0x14] = emu_sqeb,[0x15] = emu_sqdb,[0x17] = emu_meeb,
-                [0x18] = emu_kdb, [0x19] = emu_cdb, [0x1a] = emu_adb,
-                [0x1b] = emu_sdb, [0x1c] = emu_mdb, [0x1d] = emu_ddb,
-                [0x1e] = emu_madb,[0x1f] = emu_msdb
-        };
-
-        switch (format_table[opcode[5]]) {
-        case 1: /* RXE format, double constant */ {
-                __u64 *dxb, temp;
-                __u32 opc;
-
-                emu_store_regd((opcode[1] >> 4) & 15);
-                opc = *((__u32 *) opcode);
-                dxb = (__u64 *) calc_addr(regs, opc >> 16, opc >> 12, opc);
-                mathemu_copy_from_user(&temp, dxb, 8);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, double *))
-			jump_table[opcode[5]])
-                        (regs, opcode[1] >> 4, (double *) &temp);
-                emu_load_regd((opcode[1] >> 4) & 15);
-                break;
-        }
-        case 2: /* RXE format, float constant */ {
-                __u32 *dxb, temp;
-                __u32 opc;
-
-                emu_store_rege((opcode[1] >> 4) & 15);
-                opc = *((__u32 *) opcode);
-                dxb = (__u32 *) calc_addr(regs, opc >> 16, opc >> 12, opc);
-                mathemu_get_user(temp, dxb);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, float *))
-			jump_table[opcode[5]])
-                        (regs, opcode[1] >> 4, (float *) &temp);
-                emu_load_rege((opcode[1] >> 4) & 15);
-                break;
-        }
-        case 3: /* RXF format, double constant */ {
-                __u64 *dxb, temp;
-                __u32 opc;
-
-                emu_store_regd((opcode[1] >> 4) & 15);
-                emu_store_regd((opcode[4] >> 4) & 15);
-                opc = *((__u32 *) opcode);
-                dxb = (__u64 *) calc_addr(regs, opc >> 16, opc >> 12, opc);
-                mathemu_copy_from_user(&temp, dxb, 8);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, double *, int))
-			jump_table[opcode[5]])
-                        (regs, opcode[1] >> 4, (double *) &temp, opcode[4] >> 4);
-                emu_load_regd((opcode[1] >> 4) & 15);
-                break;
-        }
-        case 4: /* RXF format, float constant */ {
-                __u32 *dxb, temp;
-                __u32 opc;
-
-                emu_store_rege((opcode[1] >> 4) & 15);
-                emu_store_rege((opcode[4] >> 4) & 15);
-                opc = *((__u32 *) opcode);
-                dxb = (__u32 *) calc_addr(regs, opc >> 16, opc >> 12, opc);
-                mathemu_get_user(temp, dxb);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, float *, int))
-			jump_table[opcode[5]])
-                        (regs, opcode[1] >> 4, (float *) &temp, opcode[4] >> 4);
-                emu_load_rege((opcode[4] >> 4) & 15);
-                break;
-        }
-        case 5: /* RXE format, double constant */
-                /* store double and load long double */ 
-        {
-                __u64 *dxb, temp;
-                __u32 opc;
-                if ((opcode[1] >> 4) & 0x20)
-			return SIGILL;
-                emu_store_regd((opcode[1] >> 4) & 15);
-                opc = *((__u32 *) opcode);
-                dxb = (__u64 *) calc_addr(regs, opc >> 16, opc >> 12, opc);
-                mathemu_copy_from_user(&temp, dxb, 8);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, double *))
-			jump_table[opcode[5]])
-                        (regs, opcode[1] >> 4, (double *) &temp);
-                emu_load_regd((opcode[1] >> 4) & 15);
-                emu_load_regd(((opcode[1] >> 4) & 15) + 2);
-                break;
-        }
-        case 6: /* RXE format, float constant */
-                /* store float and load double */ 
-        {
-                __u32 *dxb, temp;
-                __u32 opc;
-                emu_store_rege((opcode[1] >> 4) & 15);
-                opc = *((__u32 *) opcode);
-                dxb = (__u32 *) calc_addr(regs, opc >> 16, opc >> 12, opc);
-                mathemu_get_user(temp, dxb);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, float *))
-			jump_table[opcode[5]])
-                        (regs, opcode[1] >> 4, (float *) &temp);
-                emu_load_regd((opcode[1] >> 4) & 15);
-                break;
-        }
-        case 7: /* RXE format, float constant */
-                /* store float and load long double */ 
-        {
-                __u32 *dxb, temp;
-                __u32 opc;
-                if ((opcode[1] >> 4) & 0x20)
-			return SIGILL;
-                emu_store_rege((opcode[1] >> 4) & 15);
-                opc = *((__u32 *) opcode);
-                dxb = (__u32 *) calc_addr(regs, opc >> 16, opc >> 12, opc);
-                mathemu_get_user(temp, dxb);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, float *))
-			jump_table[opcode[5]])
-                        (regs, opcode[1] >> 4, (float *) &temp);
-                emu_load_regd((opcode[1] >> 4) & 15);
-                emu_load_regd(((opcode[1] >> 4) & 15) + 2);
-                break;
-        }
-        case 8: /* RXE format, RX address used as int value */ {
-                __u64 dxb;
-                __u32 opc;
-
-                emu_store_rege((opcode[1] >> 4) & 15);
-                opc = *((__u32 *) opcode);
-                dxb = (__u64) calc_addr(regs, opc >> 16, opc >> 12, opc);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, long))
-			jump_table[opcode[5]])
-                        (regs, opcode[1] >> 4, dxb);
-                break;
-        }
-        case 9: /* RXE format, RX address used as int value */ {
-                __u64 dxb;
-                __u32 opc;
-
-                emu_store_regd((opcode[1] >> 4) & 15);
-                opc = *((__u32 *) opcode);
-                dxb = (__u64) calc_addr(regs, opc >> 16, opc >> 12, opc);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, long))
-			jump_table[opcode[5]])
-                        (regs, opcode[1] >> 4, dxb);
-                break;
-        }
-        case 10: /* RXE format, RX address used as int value */ {
-                __u64 dxb;
-                __u32 opc;
-
-                if ((opcode[1] >> 4) & 2)
-			return SIGILL;
-                emu_store_regd((opcode[1] >> 4) & 15);
-                emu_store_regd(((opcode[1] >> 4) & 15) + 2);
-                opc = *((__u32 *) opcode);
-                dxb = (__u64) calc_addr(regs, opc >> 16, opc >> 12, opc);
-                /* call the emulation function */
-                _fex = ((int (*)(struct pt_regs *, int, long))
-			jump_table[opcode[5]])
-                        (regs, opcode[1] >> 4, dxb);
-                break;
-        }
-        default: /* invalid operation */
-                return SIGILL;
-        }
-	if (_fex != 0) {
-		current->thread.fp_regs.fpc |= _fex;
-		if (current->thread.fp_regs.fpc & (_fex << 8))
-			return SIGFPE;
-	}
-	return 0;
-}
-
-/*
- * Emulate LDR Rx,Ry with Rx or Ry not in {0, 2, 4, 6}
- */
-int math_emu_ldr(__u8 *opcode) {
-        s390_fp_regs *fp_regs = &current->thread.fp_regs;
-        __u16 opc = *((__u16 *) opcode);
-
-        if ((opc & 0x90) == 0) {           /* test if rx in {0,2,4,6} */
-                /* we got an exception therefore ry can't be in {0,2,4,6} */
-		asm volatile(		/* load rx from fp_regs.fprs[ry] */
-			"	bras	1,0f\n"
-			"	ld	0,0(%1)\n"
-			"0:	ex	%0,0(1)"
-			: /* no output */
-			: "a" (opc & 0xf0), "a" (&fp_regs->fprs[opc & 0xf].d)
-			: "1");
-        } else if ((opc & 0x9) == 0) {     /* test if ry in {0,2,4,6} */
-		asm volatile (		/* store ry to fp_regs.fprs[rx] */
-			"	bras	1,0f\n"
-			"	std	0,0(%1)\n"
-			"0:	ex	%0,0(1)"
-			: /* no output */
-			: "a" ((opc & 0xf) << 4),
-			  "a" (&fp_regs->fprs[(opc & 0xf0)>>4].d)
-			: "1");
-        } else  /* move fp_regs.fprs[ry] to fp_regs.fprs[rx] */
-                fp_regs->fprs[(opc & 0xf0) >> 4] = fp_regs->fprs[opc & 0xf];
-	return 0;
-}
-
-/*
- * Emulate LER Rx,Ry with Rx or Ry not in {0, 2, 4, 6}
- */
-int math_emu_ler(__u8 *opcode) {
-        s390_fp_regs *fp_regs = &current->thread.fp_regs;
-        __u16 opc = *((__u16 *) opcode);
-
-        if ((opc & 0x90) == 0) {           /* test if rx in {0,2,4,6} */
-                /* we got an exception therefore ry can't be in {0,2,4,6} */
-		asm volatile(		/* load rx from fp_regs.fprs[ry] */
-			"	bras	1,0f\n"
-			"	le	0,0(%1)\n"
-			"0:	ex	%0,0(1)"
-			: /* no output */
-			: "a" (opc & 0xf0), "a" (&fp_regs->fprs[opc & 0xf].f)
-			: "1");
-        } else if ((opc & 0x9) == 0) {     /* test if ry in {0,2,4,6} */
-		asm volatile(		/* store ry to fp_regs.fprs[rx] */
-			"	bras	1,0f\n"
-			"	ste	0,0(%1)\n"
-			"0:	ex	%0,0(1)"
-			: /* no output */
-			: "a" ((opc & 0xf) << 4),
-			  "a" (&fp_regs->fprs[(opc & 0xf0) >> 4].f)
-			: "1");
-        } else  /* move fp_regs.fprs[ry] to fp_regs.fprs[rx] */
-                fp_regs->fprs[(opc & 0xf0) >> 4] = fp_regs->fprs[opc & 0xf];
-	return 0;
-}
-
-/*
- * Emulate LD R,D(X,B) with R not in {0, 2, 4, 6}
- */
-int math_emu_ld(__u8 *opcode, struct pt_regs * regs) {
-        s390_fp_regs *fp_regs = &current->thread.fp_regs;
-        __u32 opc = *((__u32 *) opcode);
-        __u64 *dxb;
-
-        dxb = (__u64 *) calc_addr(regs, opc >> 16, opc >> 12, opc);
-        mathemu_copy_from_user(&fp_regs->fprs[(opc >> 20) & 0xf].d, dxb, 8);
-	return 0;
-}
-
-/*
- * Emulate LE R,D(X,B) with R not in {0, 2, 4, 6}
- */
-int math_emu_le(__u8 *opcode, struct pt_regs * regs) {
-        s390_fp_regs *fp_regs = &current->thread.fp_regs;
-        __u32 opc = *((__u32 *) opcode);
-        __u32 *mem, *dxb;
-
-        dxb = (__u32 *) calc_addr(regs, opc >> 16, opc >> 12, opc);
-        mem = (__u32 *) (&fp_regs->fprs[(opc >> 20) & 0xf].f);
-        mathemu_get_user(mem[0], dxb);
-	return 0;
-}
-
-/*
- * Emulate STD R,D(X,B) with R not in {0, 2, 4, 6}
- */
-int math_emu_std(__u8 *opcode, struct pt_regs * regs) {
-        s390_fp_regs *fp_regs = &current->thread.fp_regs;
-        __u32 opc = *((__u32 *) opcode);
-        __u64 *dxb;
-
-        dxb = (__u64 *) calc_addr(regs, opc >> 16, opc >> 12, opc);
-        mathemu_copy_to_user(dxb, &fp_regs->fprs[(opc >> 20) & 0xf].d, 8);
-	return 0;
-}
-
-/*
- * Emulate STE R,D(X,B) with R not in {0, 2, 4, 6}
- */
-int math_emu_ste(__u8 *opcode, struct pt_regs * regs) {
-        s390_fp_regs *fp_regs = &current->thread.fp_regs;
-        __u32 opc = *((__u32 *) opcode);
-        __u32 *mem, *dxb;
-
-        dxb = (__u32 *) calc_addr(regs, opc >> 16, opc >> 12, opc);
-        mem = (__u32 *) (&fp_regs->fprs[(opc >> 20) & 0xf].f);
-        mathemu_put_user(mem[0], dxb);
-	return 0;
-}
-
-/*
- * Emulate LFPC D(B)
- */
-int math_emu_lfpc(__u8 *opcode, struct pt_regs *regs) {
-        __u32 opc = *((__u32 *) opcode);
-        __u32 *dxb, temp;
-
-        dxb= (__u32 *) calc_addr(regs, 0, opc>>12, opc);
-        mathemu_get_user(temp, dxb);
-        if ((temp & ~FPC_VALID_MASK) != 0)
-		return SIGILL;
-	current->thread.fp_regs.fpc = temp;
-        return 0;
-}
-
-/*
- * Emulate STFPC D(B)
- */
-int math_emu_stfpc(__u8 *opcode, struct pt_regs *regs) {
-        __u32 opc = *((__u32 *) opcode);
-        __u32 *dxb;
-
-        dxb= (__u32 *) calc_addr(regs, 0, opc>>12, opc);
-        mathemu_put_user(current->thread.fp_regs.fpc, dxb);
-        return 0;
-}
-
-/*
- * Emulate SRNM D(B)
- */
-int math_emu_srnm(__u8 *opcode, struct pt_regs *regs) {
-        __u32 opc = *((__u32 *) opcode);
-        __u32 temp;
-
-        temp = calc_addr(regs, 0, opc>>12, opc);
-	current->thread.fp_regs.fpc &= ~3;
-        current->thread.fp_regs.fpc |= (temp & 3);
-        return 0;
-}
-
-/* broken compiler ... */
-long long
-__negdi2 (long long u)
-{
-
-  union lll {
-    long long ll;
-    long s[2];
-  };
-
-  union lll w,uu;
-
-  uu.ll = u;
-
-  w.s[1] = -uu.s[1];
-  w.s[0] = -uu.s[0] - ((int) w.s[1] != 0);
-
-  return w.ll;
-}
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index d46cadeda204..8556d6be9b54 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -18,9 +18,7 @@ enum address_markers_idx {
 	KERNEL_END_NR,
 	VMEMMAP_NR,
 	VMALLOC_NR,
-#ifdef CONFIG_64BIT
 	MODULES_NR,
-#endif
 };
 
 static struct addr_marker address_markers[] = {
@@ -29,9 +27,7 @@ static struct addr_marker address_markers[] = {
 	[KERNEL_END_NR]	  = {(unsigned long)&_end, "Kernel Image End"},
 	[VMEMMAP_NR]	  = {0, "vmemmap Area"},
 	[VMALLOC_NR]	  = {0, "vmalloc Area"},
-#ifdef CONFIG_64BIT
 	[MODULES_NR]	  = {0, "Modules Area"},
-#endif
 	{ -1, NULL }
 };
 
@@ -127,12 +123,6 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st,
 	}
 }
 
-#ifdef CONFIG_64BIT
-#define _PMD_PROT_MASK _SEGMENT_ENTRY_PROTECT
-#else
-#define _PMD_PROT_MASK 0
-#endif
-
 static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
 			   pud_t *pud, unsigned long addr)
 {
@@ -145,7 +135,7 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
 		pmd = pmd_offset(pud, addr);
 		if (!pmd_none(*pmd)) {
 			if (pmd_large(*pmd)) {
-				prot = pmd_val(*pmd) & _PMD_PROT_MASK;
+				prot = pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT;
 				note_page(m, st, prot, 3);
 			} else
 				walk_pte_level(m, st, pmd, addr);
@@ -155,12 +145,6 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
 	}
 }
 
-#ifdef CONFIG_64BIT
-#define _PUD_PROT_MASK _REGION3_ENTRY_RO
-#else
-#define _PUD_PROT_MASK 0
-#endif
-
 static void walk_pud_level(struct seq_file *m, struct pg_state *st,
 			   pgd_t *pgd, unsigned long addr)
 {
@@ -173,7 +157,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st,
 		pud = pud_offset(pgd, addr);
 		if (!pud_none(*pud))
 			if (pud_large(*pud)) {
-				prot = pud_val(*pud) & _PUD_PROT_MASK;
+				prot = pud_val(*pud) & _REGION3_ENTRY_RO;
 				note_page(m, st, prot, 2);
 			} else
 				walk_pmd_level(m, st, pud, addr);
@@ -230,13 +214,9 @@ static int pt_dump_init(void)
 	 * kernel ASCE. We need this to keep the page table walker functions
 	 * from accessing non-existent entries.
 	 */
-#ifdef CONFIG_32BIT
-	max_addr = 1UL << 31;
-#else
 	max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
 	max_addr = 1UL << (max_addr * 11 + 31);
 	address_markers[MODULES_NR].start_address = MODULES_VADDR;
-#endif
 	address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
 	address_markers[VMALLOC_NR].start_address = VMALLOC_START;
 	debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 519bba716cc3..23c496957c22 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -51,7 +51,6 @@ struct qout64 {
 	struct qrange range[6];
 };
 
-#ifdef CONFIG_64BIT
 struct qrange_old {
 	unsigned int start; /* last byte type */
 	unsigned int end;   /* last byte reserved */
@@ -65,7 +64,6 @@ struct qout64_old {
 	int segrcnt;
 	struct qrange_old range[6];
 };
-#endif
 
 struct qin64 {
 	char qopcode;
@@ -103,7 +101,6 @@ static int scode_set;
 static int
 dcss_set_subcodes(void)
 {
-#ifdef CONFIG_64BIT
 	char *name = kmalloc(8 * sizeof(char), GFP_KERNEL | GFP_DMA);
 	unsigned long rx, ry;
 	int rc;
@@ -135,7 +132,6 @@ dcss_set_subcodes(void)
 		segext_scode = DCSS_SEGEXTX;
 		return 0;
 	}
-#endif
 	/* Diag x'64' new subcodes are not supported, set to old subcodes */
 	loadshr_scode = DCSS_LOADNOLY;
 	loadnsr_scode = DCSS_LOADNSR;
@@ -208,7 +204,6 @@ dcss_diag(int *func, void *parameter,
 	rx = (unsigned long) parameter;
 	ry = (unsigned long) *func;
 
-#ifdef CONFIG_64BIT
 	/* 64-bit Diag x'64' new subcode, keep in 64-bit addressing mode */
 	if (*func > DCSS_SEGEXT)
 		asm volatile(
@@ -225,13 +220,6 @@ dcss_diag(int *func, void *parameter,
 			"	ipm	%2\n"
 			"	srl	%2,28\n"
 			: "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
-#else
-	asm volatile(
-		"	diag	%0,%1,0x64\n"
-		"	ipm	%2\n"
-		"	srl	%2,28\n"
-		: "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
-#endif
 	*ret1 = rx;
 	*ret2 = ry;
 	return rc;
@@ -281,7 +269,6 @@ query_segment_type (struct dcss_segment *seg)
 		goto out_free;
 	}
 
-#ifdef CONFIG_64BIT
 	/* Only old format of output area of Diagnose x'64' is supported,
 	   copy data for the new format. */
 	if (segext_scode == DCSS_SEGEXT) {
@@ -307,7 +294,6 @@ query_segment_type (struct dcss_segment *seg)
 		}
 		kfree(qout_old);
 	}
-#endif
 	if (qout->segcnt > 6) {
 		rc = -EOPNOTSUPP;
 		goto out_free;
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 3ff86533f7db..76515bcea2f1 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -36,15 +36,9 @@
 #include <asm/facility.h>
 #include "../kernel/entry.h"
 
-#ifndef CONFIG_64BIT
-#define __FAIL_ADDR_MASK 0x7ffff000
-#define __SUBCODE_MASK 0x0200
-#define __PF_RES_FIELD 0ULL
-#else /* CONFIG_64BIT */
 #define __FAIL_ADDR_MASK -4096L
 #define __SUBCODE_MASK 0x0600
 #define __PF_RES_FIELD 0x8000000000000000ULL
-#endif /* CONFIG_64BIT */
 
 #define VM_FAULT_BADCONTEXT	0x010000
 #define VM_FAULT_BADMAP		0x020000
@@ -54,7 +48,6 @@
 
 static unsigned long store_indication __read_mostly;
 
-#ifdef CONFIG_64BIT
 static int __init fault_init(void)
 {
 	if (test_facility(75))
@@ -62,7 +55,6 @@ static int __init fault_init(void)
 	return 0;
 }
 early_initcall(fault_init);
-#endif
 
 static inline int notify_page_fault(struct pt_regs *regs)
 {
@@ -133,7 +125,6 @@ static int bad_address(void *p)
 	return probe_kernel_address((unsigned long *)p, dummy);
 }
 
-#ifdef CONFIG_64BIT
 static void dump_pagetable(unsigned long asce, unsigned long address)
 {
 	unsigned long *table = __va(asce & PAGE_MASK);
@@ -187,33 +178,6 @@ bad:
 	pr_cont("BAD\n");
 }
 
-#else /* CONFIG_64BIT */
-
-static void dump_pagetable(unsigned long asce, unsigned long address)
-{
-	unsigned long *table = __va(asce & PAGE_MASK);
-
-	pr_alert("AS:%08lx ", asce);
-	table = table + ((address >> 20) & 0x7ff);
-	if (bad_address(table))
-		goto bad;
-	pr_cont("S:%08lx ", *table);
-	if (*table & _SEGMENT_ENTRY_INVALID)
-		goto out;
-	table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
-	table = table + ((address >> 12) & 0xff);
-	if (bad_address(table))
-		goto bad;
-	pr_cont("P:%08lx ", *table);
-out:
-	pr_cont("\n");
-	return;
-bad:
-	pr_cont("BAD\n");
-}
-
-#endif /* CONFIG_64BIT */
-
 static void dump_fault_info(struct pt_regs *regs)
 {
 	unsigned long asce;
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 5c586c78ca8d..1eb41bb3010c 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -106,11 +106,9 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
 	pmd_t *pmdp, pmd;
 
 	pmdp = (pmd_t *) pudp;
-#ifdef CONFIG_64BIT
 	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
 		pmdp = (pmd_t *) pud_deref(pud);
 	pmdp += pmd_index(addr);
-#endif
 	do {
 		pmd = *pmdp;
 		barrier();
@@ -145,11 +143,9 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
 	pud_t *pudp, pud;
 
 	pudp = (pud_t *) pgdp;
-#ifdef CONFIG_64BIT
 	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
 		pudp = (pud_t *) pgd_deref(pgd);
 	pudp += pud_index(addr);
-#endif
 	do {
 		pud = *pudp;
 		barrier();
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index d35b15113b17..80875c43a4a4 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -105,7 +105,6 @@ void __init paging_init(void)
 	unsigned long pgd_type, asce_bits;
 
 	init_mm.pgd = swapper_pg_dir;
-#ifdef CONFIG_64BIT
 	if (VMALLOC_END > (1UL << 42)) {
 		asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
 		pgd_type = _REGION2_ENTRY_EMPTY;
@@ -113,10 +112,6 @@ void __init paging_init(void)
 		asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
 		pgd_type = _REGION3_ENTRY_EMPTY;
 	}
-#else
-	asce_bits = _ASCE_TABLE_LENGTH;
-	pgd_type = _SEGMENT_ENTRY_EMPTY;
-#endif
 	S390_lowcore.kernel_asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits;
 	clear_table((unsigned long *) init_mm.pgd, pgd_type,
 		    sizeof(unsigned long)*2048);
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 2eb34bdfc613..8a993a53fcd6 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -1,7 +1,7 @@
 /*
  * Access kernel memory without faulting -- s390 specific implementation.
  *
- * Copyright IBM Corp. 2009
+ * Copyright IBM Corp. 2009, 2015
  *
  *   Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
  *
@@ -16,51 +16,55 @@
 #include <asm/ctl_reg.h>
 #include <asm/io.h>
 
-/*
- * This function writes to kernel memory bypassing DAT and possible
- * write protection. It copies one to four bytes from src to dst
- * using the stura instruction.
- * Returns the number of bytes copied or -EFAULT.
- */
-static long probe_kernel_write_odd(void *dst, const void *src, size_t size)
+static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size)
 {
-	unsigned long count, aligned;
-	int offset, mask;
-	int rc = -EFAULT;
+	unsigned long aligned, offset, count;
+	char tmp[8];
 
-	aligned = (unsigned long) dst & ~3UL;
-	offset = (unsigned long) dst & 3;
-	count = min_t(unsigned long, 4 - offset, size);
-	mask = (0xf << (4 - count)) & 0xf;
-	mask >>= offset;
+	aligned = (unsigned long) dst & ~7UL;
+	offset = (unsigned long) dst & 7UL;
+	size = min(8UL - offset, size);
+	count = size - 1;
 	asm volatile(
 		"	bras	1,0f\n"
-		"	icm	0,0,0(%3)\n"
-		"0:	l	0,0(%1)\n"
-		"	lra	%1,0(%1)\n"
-		"1:	ex	%2,0(1)\n"
-		"2:	stura	0,%1\n"
-		"	la	%0,0\n"
-		"3:\n"
-		EX_TABLE(0b,3b) EX_TABLE(1b,3b) EX_TABLE(2b,3b)
-		: "+d" (rc), "+a" (aligned)
-		: "a" (mask), "a" (src) : "cc", "memory", "0", "1");
-	return rc ? rc : count;
+		"	mvc	0(1,%4),0(%5)\n"
+		"0:	mvc	0(8,%3),0(%0)\n"
+		"	ex	%1,0(1)\n"
+		"	lg	%1,0(%3)\n"
+		"	lra	%0,0(%0)\n"
+		"	sturg	%1,%0\n"
+		: "+&a" (aligned), "+&a" (count), "=m" (tmp)
+		: "a" (&tmp), "a" (&tmp[offset]), "a" (src)
+		: "cc", "memory", "1");
+	return size;
 }
 
-long probe_kernel_write(void *dst, const void *src, size_t size)
+/*
+ * s390_kernel_write - write to kernel memory bypassing DAT
+ * @dst: destination address
+ * @src: source address
+ * @size: number of bytes to copy
+ *
+ * This function writes to kernel memory bypassing DAT and possible page table
+ * write protection. It writes to the destination using the sturg instruction.
+ * Therefore we have a read-modify-write sequence: the function reads eight
+ * bytes from destination at an eight byte boundary, modifies the bytes
+ * requested and writes the result back in a loop.
+ *
+ * Note: this means that this function may not be called concurrently on
+ *	 several cpus with overlapping words, since this may potentially
+ *	 cause data corruption.
+ */
+void notrace s390_kernel_write(void *dst, const void *src, size_t size)
 {
-	long copied = 0;
+	long copied;
 
 	while (size) {
-		copied = probe_kernel_write_odd(dst, src, size);
-		if (copied < 0)
-			break;
+		copied = s390_kernel_write_odd(dst, src, size);
 		dst += copied;
 		src += copied;
 		size -= copied;
 	}
-	return copied < 0 ? -EFAULT : 0;
 }
 
 static int __memcpy_real(void *dest, void *src, size_t count)
diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c
index 5535cfe0ee11..0f3604395805 100644
--- a/arch/s390/mm/mem_detect.c
+++ b/arch/s390/mm/mem_detect.c
@@ -36,10 +36,6 @@ void __init detect_memory_memblock(void)
 	memsize = rzm * rnmax;
 	if (!rzm)
 		rzm = 1ULL << 17;
-	if (IS_ENABLED(CONFIG_32BIT)) {
-		rzm = min(ADDR2G, rzm);
-		memsize = min(ADDR2G, memsize);
-	}
 	max_physmem_end = memsize;
 	addr = 0;
 	/* keep memblock lists close to the kernel */
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 179a2c20b01f..6e552af08c76 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -32,7 +32,7 @@
 #include <asm/pgalloc.h>
 
 unsigned long mmap_rnd_mask;
-unsigned long mmap_align_mask;
+static unsigned long mmap_align_mask;
 
 static unsigned long stack_maxrandom_size(void)
 {
@@ -60,22 +60,20 @@ static inline int mmap_is_legacy(void)
 	return sysctl_legacy_va_layout;
 }
 
-static unsigned long mmap_rnd(void)
+unsigned long arch_mmap_rnd(void)
 {
-	if (!(current->flags & PF_RANDOMIZE))
-		return 0;
 	if (is_32bit_task())
 		return (get_random_int() & 0x7ff) << PAGE_SHIFT;
 	else
 		return (get_random_int() & mmap_rnd_mask) << PAGE_SHIFT;
 }
 
-static unsigned long mmap_base_legacy(void)
+static unsigned long mmap_base_legacy(unsigned long rnd)
 {
-	return TASK_UNMAPPED_BASE + mmap_rnd();
+	return TASK_UNMAPPED_BASE + rnd;
 }
 
-static inline unsigned long mmap_base(void)
+static inline unsigned long mmap_base(unsigned long rnd)
 {
 	unsigned long gap = rlimit(RLIMIT_STACK);
 
@@ -84,7 +82,7 @@ static inline unsigned long mmap_base(void)
 	else if (gap > MAX_GAP)
 		gap = MAX_GAP;
 	gap &= PAGE_MASK;
-	return STACK_TOP - stack_maxrandom_size() - mmap_rnd() - gap;
+	return STACK_TOP - stack_maxrandom_size() - rnd - gap;
 }
 
 unsigned long
@@ -179,40 +177,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	return addr;
 }
 
-unsigned long randomize_et_dyn(void)
-{
-	unsigned long base;
-
-	base = STACK_TOP / 3 * 2;
-	if (!is_32bit_task())
-		/* Align to 4GB */
-		base &= ~((1UL << 32) - 1);
-	return base + mmap_rnd();
-}
-
-#ifndef CONFIG_64BIT
-
-/*
- * This function, called very early during the creation of a new
- * process VM image, sets up which VM layout function to use:
- */
-void arch_pick_mmap_layout(struct mm_struct *mm)
-{
-	/*
-	 * Fall back to the standard layout if the personality
-	 * bit is set, or if the expected stack growth is unlimited:
-	 */
-	if (mmap_is_legacy()) {
-		mm->mmap_base = mmap_base_legacy();
-		mm->get_unmapped_area = arch_get_unmapped_area;
-	} else {
-		mm->mmap_base = mmap_base();
-		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
-	}
-}
-
-#else
-
 int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
 {
 	if (is_compat_task() || (TASK_SIZE >= (1UL << 53)))
@@ -273,15 +237,20 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
  */
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
+	unsigned long random_factor = 0UL;
+
+	if (current->flags & PF_RANDOMIZE)
+		random_factor = arch_mmap_rnd();
+
 	/*
 	 * Fall back to the standard layout if the personality
 	 * bit is set, or if the expected stack growth is unlimited:
 	 */
 	if (mmap_is_legacy()) {
-		mm->mmap_base = mmap_base_legacy();
+		mm->mmap_base = mmap_base_legacy(random_factor);
 		mm->get_unmapped_area = s390_get_unmapped_area;
 	} else {
-		mm->mmap_base = mmap_base();
+		mm->mmap_base = mmap_base(random_factor);
 		mm->get_unmapped_area = s390_get_unmapped_area_topdown;
 	}
 }
@@ -317,5 +286,3 @@ static int __init setup_mmap_rnd(void)
 	return 0;
 }
 early_initcall(setup_mmap_rnd);
-
-#endif
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 426c9d462d1c..749c98407b41 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -109,7 +109,7 @@ static void ipte_range(pte_t *pte, unsigned long address, int nr)
 {
 	int i;
 
-	if (test_facility(13) && IS_ENABLED(CONFIG_64BIT)) {
+	if (test_facility(13)) {
 		__ptep_ipte_range(address, nr - 1, pte);
 		return;
 	}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index b2c1542f2ba2..33f589459113 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -27,14 +27,8 @@
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 
-#ifndef CONFIG_64BIT
-#define ALLOC_ORDER	1
-#define FRAG_MASK	0x0f
-#else
 #define ALLOC_ORDER	2
 #define FRAG_MASK	0x03
-#endif
-
 
 unsigned long *crst_table_alloc(struct mm_struct *mm)
 {
@@ -50,7 +44,6 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table)
 	free_pages((unsigned long) table, ALLOC_ORDER);
 }
 
-#ifdef CONFIG_64BIT
 static void __crst_table_upgrade(void *arg)
 {
 	struct mm_struct *mm = arg;
@@ -140,7 +133,6 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
 	if (current->active_mm == mm)
 		set_user_asce(mm);
 }
-#endif
 
 #ifdef CONFIG_PGSTE
 
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index b1593c2f751a..ef7d6c8fea66 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -38,12 +38,10 @@ static inline pud_t *vmem_pud_alloc(void)
 {
 	pud_t *pud = NULL;
 
-#ifdef CONFIG_64BIT
 	pud = vmem_alloc_pages(2);
 	if (!pud)
 		return NULL;
 	clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4);
-#endif
 	return pud;
 }
 
@@ -51,12 +49,10 @@ static inline pmd_t *vmem_pmd_alloc(void)
 {
 	pmd_t *pmd = NULL;
 
-#ifdef CONFIG_64BIT
 	pmd = vmem_alloc_pages(2);
 	if (!pmd)
 		return NULL;
 	clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4);
-#endif
 	return pmd;
 }
 
@@ -98,7 +94,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 			pgd_populate(&init_mm, pg_dir, pu_dir);
 		}
 		pu_dir = pud_offset(pg_dir, address);
-#if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC)
+#ifndef CONFIG_DEBUG_PAGEALLOC
 		if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
 		    !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) {
 			pud_val(*pu_dir) = __pa(address) |
@@ -115,7 +111,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 			pud_populate(&init_mm, pu_dir, pm_dir);
 		}
 		pm_dir = pmd_offset(pu_dir, address);
-#if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC)
+#ifndef CONFIG_DEBUG_PAGEALLOC
 		if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
 		    !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) {
 			pmd_val(*pm_dir) = __pa(address) |
@@ -222,7 +218,6 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 
 		pm_dir = pmd_offset(pu_dir, address);
 		if (pmd_none(*pm_dir)) {
-#ifdef CONFIG_64BIT
 			/* Use 1MB frames for vmemmap if available. We always
 			 * use large frames even if they are only partially
 			 * used.
@@ -240,7 +235,6 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 				address = (address + PMD_SIZE) & PMD_MASK;
 				continue;
 			}
-#endif
 			pt_dir = vmem_pte_alloc(address);
 			if (!pt_dir)
 				goto out;
diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S
index ba44c9f55346..a1c917d881ec 100644
--- a/arch/s390/net/bpf_jit.S
+++ b/arch/s390/net/bpf_jit.S
@@ -1,134 +1,115 @@
 /*
  * BPF Jit compiler for s390, help functions.
  *
- * Copyright IBM Corp. 2012
+ * Copyright IBM Corp. 2012,2015
  *
  * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
  */
+
 #include <linux/linkage.h>
+#include "bpf_jit.h"
 
 /*
  * Calling convention:
- * registers %r2, %r6-%r8, %r10-%r11, %r13, %r15 are call saved
- *   %r2: skb pointer
- *   %r3: offset parameter
- *   %r5: BPF A accumulator
- *   %r8: return address
- *   %r9: save register for skb pointer
- *   %r10: skb->data
- *   %r11: skb->len - skb->data_len (headlen)
- *   %r12: BPF X accumulator
+ * registers %r7-%r10, %r11,%r13, and %r15 are call saved
+ *
+ * Input (64 bit):
+ *   %r3 (%b2) = offset into skb data
+ *   %r6 (%b5) = return address
+ *   %r7 (%b6) = skb pointer
+ *   %r12      = skb data pointer
+ *
+ * Output:
+ *   %r14= %b0 = return value (read skb value)
+ *
+ * Work registers: %r2,%r4,%r5,%r14
  *
  * skb_copy_bits takes 4 parameters:
  *   %r2 = skb pointer
  *   %r3 = offset into skb data
  *   %r4 = pointer to temp buffer
  *   %r5 = length to copy
+ *   Return value in %r2: 0 = ok
+ *
+ * bpf_internal_load_pointer_neg_helper takes 3 parameters:
+ *   %r2 = skb pointer
+ *   %r3 = offset into data
+ *   %r4 = length to copy
+ *   Return value in %r2: Pointer to data
  */
-#define SKBDATA	%r8
-
-	/* A = *(u32 *) (skb->data+K+X) */
-ENTRY(sk_load_word_ind)
-	ar	%r3,%r12		# offset += X
-	bmr	%r8			# < 0 -> return with cc
-
-	/* A = *(u32 *) (skb->data+K) */
-ENTRY(sk_load_word)
-	llgfr	%r1,%r3			# extend offset
-	ahi	%r3,4			# offset + 4
-	clr	%r11,%r3		# hlen <= offset + 4 ?
-	jl	sk_load_word_slow
-	l	%r5,0(%r1,%r10)		# get word from skb
-	xr	%r1,%r1			# set cc to zero
-	br	%r8
 
-sk_load_word_slow:
-	lgr	%r9,%r2			# save %r2
-	lgr	%r3,%r1			# offset
-	la	%r4,160(%r15)		# pointer to temp buffer
-	lghi	%r5,4			# 4 bytes
-	brasl	%r14,skb_copy_bits	# get data from skb
-	l	%r5,160(%r15)		# load result from temp buffer
-	ltgr	%r2,%r2			# set cc to (%r2 != 0)
-	lgr	%r2,%r9			# restore %r2
-	br	%r8
+#define SKF_MAX_NEG_OFF	-0x200000	/* SKF_LL_OFF from filter.h */
 
-	/* A = *(u16 *) (skb->data+K+X) */
-ENTRY(sk_load_half_ind)
-	ar	%r3,%r12		# offset += X
-	bmr	%r8			# < 0 -> return with cc
-
-	/* A = *(u16 *) (skb->data+K) */
-ENTRY(sk_load_half)
-	llgfr	%r1,%r3			# extend offset
-	ahi	%r3,2			# offset + 2
-	clr	%r11,%r3		# hlen <= offset + 2 ?
-	jl	sk_load_half_slow
-	llgh	%r5,0(%r1,%r10)		# get half from skb
-	xr	%r1,%r1			# set cc to zero
-	br	%r8
-
-sk_load_half_slow:
-	lgr	%r9,%r2			# save %r2
-	lgr	%r3,%r1			# offset
-	la	%r4,162(%r15)		# pointer to temp buffer
-	lghi	%r5,2			# 2 bytes
-	brasl	%r14,skb_copy_bits	# get data from skb
-	xc	160(2,%r15),160(%r15)
-	l	%r5,160(%r15)		# load result from temp buffer
-	ltgr	%r2,%r2			# set cc to (%r2 != 0)
-	lgr	%r2,%r9			# restore %r2
-	br	%r8
+/*
+ * Load SIZE bytes from SKB
+ */
+#define sk_load_common(NAME, SIZE, LOAD)				\
+ENTRY(sk_load_##NAME);							\
+	ltgr	%r3,%r3;		/* Is offset negative? */	\
+	jl	sk_load_##NAME##_slow_neg;				\
+ENTRY(sk_load_##NAME##_pos);						\
+	aghi	%r3,SIZE;		/* Offset + SIZE */		\
+	clg	%r3,STK_OFF_HLEN(%r15);	/* Offset + SIZE > hlen? */	\
+	jh	sk_load_##NAME##_slow;					\
+	LOAD	%r14,-SIZE(%r3,%r12);	/* Get data from skb */		\
+	b	OFF_OK(%r6);		/* Return */			\
+									\
+sk_load_##NAME##_slow:;							\
+	lgr	%r2,%r7;		/* Arg1 = skb pointer */	\
+	aghi	%r3,-SIZE;		/* Arg2 = offset */		\
+	la	%r4,STK_OFF_TMP(%r15);	/* Arg3 = temp bufffer */	\
+	lghi	%r5,SIZE;		/* Arg4 = size */		\
+	brasl	%r14,skb_copy_bits;	/* Get data from skb */		\
+	LOAD	%r14,STK_OFF_TMP(%r15);	/* Load from temp bufffer */	\
+	ltgr	%r2,%r2;		/* Set cc to (%r2 != 0) */	\
+	br	%r6;			/* Return */
 
-	/* A = *(u8 *) (skb->data+K+X) */
-ENTRY(sk_load_byte_ind)
-	ar	%r3,%r12		# offset += X
-	bmr	%r8			# < 0 -> return with cc
+sk_load_common(word, 4, llgf)	/* r14 = *(u32 *) (skb->data+offset) */
+sk_load_common(half, 2, llgh)	/* r14 = *(u16 *) (skb->data+offset) */
 
-	/* A = *(u8 *) (skb->data+K) */
+/*
+ * Load 1 byte from SKB (optimized version)
+ */
+	/* r14 = *(u8 *) (skb->data+offset) */
 ENTRY(sk_load_byte)
-	llgfr	%r1,%r3			# extend offset
-	clr	%r11,%r3		# hlen < offset ?
-	jle	sk_load_byte_slow
-	lhi	%r5,0
-	ic	%r5,0(%r1,%r10)		# get byte from skb
-	xr	%r1,%r1			# set cc to zero
-	br	%r8
+	ltgr	%r3,%r3			# Is offset negative?
+	jl	sk_load_byte_slow_neg
+ENTRY(sk_load_byte_pos)
+	clg	%r3,STK_OFF_HLEN(%r15)	# Offset >= hlen?
+	jnl	sk_load_byte_slow
+	llgc	%r14,0(%r3,%r12)	# Get byte from skb
+	b	OFF_OK(%r6)		# Return OK
 
 sk_load_byte_slow:
-	lgr	%r9,%r2			# save %r2
-	lgr	%r3,%r1			# offset
-	la	%r4,163(%r15)		# pointer to temp buffer
-	lghi	%r5,1			# 1 byte
-	brasl	%r14,skb_copy_bits	# get data from skb
-	xc	160(3,%r15),160(%r15)
-	l	%r5,160(%r15)		# load result from temp buffer
-	ltgr	%r2,%r2			# set cc to (%r2 != 0)
-	lgr	%r2,%r9			# restore %r2
-	br	%r8
+	lgr	%r2,%r7			# Arg1 = skb pointer
+					# Arg2 = offset
+	la	%r4,STK_OFF_TMP(%r15)	# Arg3 = pointer to temp buffer
+	lghi	%r5,1			# Arg4 = size (1 byte)
+	brasl	%r14,skb_copy_bits	# Get data from skb
+	llgc	%r14,STK_OFF_TMP(%r15)	# Load result from temp buffer
+	ltgr	%r2,%r2			# Set cc to (%r2 != 0)
+	br	%r6			# Return cc
+
+#define sk_negative_common(NAME, SIZE, LOAD)				\
+sk_load_##NAME##_slow_neg:;						\
+	cgfi	%r3,SKF_MAX_NEG_OFF;					\
+	jl	bpf_error;						\
+	lgr	%r2,%r7;		/* Arg1 = skb pointer */	\
+					/* Arg2 = offset */		\
+	lghi	%r4,SIZE;		/* Arg3 = size */		\
+	brasl	%r14,bpf_internal_load_pointer_neg_helper;		\
+	ltgr	%r2,%r2;						\
+	jz	bpf_error;						\
+	LOAD	%r14,0(%r2);		/* Get data from pointer */	\
+	xr	%r3,%r3;		/* Set cc to zero */		\
+	br	%r6;			/* Return cc */
 
-	/* X = (*(u8 *)(skb->data+K) & 0xf) << 2 */
-ENTRY(sk_load_byte_msh)
-	llgfr	%r1,%r3			# extend offset
-	clr	%r11,%r3		# hlen < offset ?
-	jle	sk_load_byte_msh_slow
-	lhi	%r12,0
-	ic	%r12,0(%r1,%r10)	# get byte from skb
-	nill	%r12,0x0f
-	sll	%r12,2
-	xr	%r1,%r1			# set cc to zero
-	br	%r8
+sk_negative_common(word, 4, llgf)
+sk_negative_common(half, 2, llgh)
+sk_negative_common(byte, 1, llgc)
 
-sk_load_byte_msh_slow:
-	lgr	%r9,%r2			# save %r2
-	lgr	%r3,%r1			# offset
-	la	%r4,163(%r15)		# pointer to temp buffer
-	lghi	%r5,1			# 1 byte
-	brasl	%r14,skb_copy_bits	# get data from skb
-	xc	160(3,%r15),160(%r15)
-	l	%r12,160(%r15)		# load result from temp buffer
-	nill	%r12,0x0f
-	sll	%r12,2
-	ltgr	%r2,%r2			# set cc to (%r2 != 0)
-	lgr	%r2,%r9			# restore %r2
-	br	%r8
+bpf_error:
+# force a return 0 from jit handler
+	ltgr	%r15,%r15	# Set condition code
+	br	%r6
diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h
new file mode 100644
index 000000000000..ba8593a515ba
--- /dev/null
+++ b/arch/s390/net/bpf_jit.h
@@ -0,0 +1,58 @@
+/*
+ * BPF Jit compiler defines
+ *
+ * Copyright IBM Corp. 2012,2015
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#ifndef __ARCH_S390_NET_BPF_JIT_H
+#define __ARCH_S390_NET_BPF_JIT_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/filter.h>
+#include <linux/types.h>
+
+extern u8 sk_load_word_pos[], sk_load_half_pos[], sk_load_byte_pos[];
+extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Stackframe layout (packed stack):
+ *
+ *				    ^ high
+ *	      +---------------+     |
+ *	      | old backchain |     |
+ *	      +---------------+     |
+ *	      |   r15 - r6    |     |
+ * BFP	   -> +===============+     |
+ *	      |		      |     |
+ *	      |   BPF stack   |     |
+ *	      |		      |     |
+ *	      +---------------+     |
+ *	      | 8 byte hlen   |     |
+ * R15+168 -> +---------------+     |
+ *	      | 4 byte align  |     |
+ *	      +---------------+     |
+ *	      | 4 byte temp   |     |
+ *	      | for bpf_jit.S |     |
+ * R15+160 -> +---------------+     |
+ *	      | new backchain |     |
+ * R15+152 -> +---------------+     |
+ *	      | + 152 byte SA |     |
+ * R15	   -> +---------------+     + low
+ *
+ * We get 160 bytes stack space from calling function, but only use
+ * 11 * 8 byte (old backchain + r15 - r6) for storing registers.
+ */
+#define STK_OFF (MAX_BPF_STACK + 8 + 4 + 4 + (160 - 11 * 8))
+#define STK_OFF_TMP	160	/* Offset of tmp buffer on stack */
+#define STK_OFF_HLEN	168	/* Offset of SKB header length on stack */
+
+/* Offset to skip condition code check */
+#define OFF_OK		4
+
+#endif /* __ARCH_S390_NET_BPF_JIT_H */
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index bbd1981cc150..7690dc8e1ab5 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -1,817 +1,1209 @@
 /*
  * BPF Jit compiler for s390.
  *
- * Copyright IBM Corp. 2012
+ * Minimum build requirements:
+ *
+ *  - HAVE_MARCH_Z196_FEATURES: laal, laalg
+ *  - HAVE_MARCH_Z10_FEATURES: msfi, cgrj, clgrj
+ *  - HAVE_MARCH_Z9_109_FEATURES: alfi, llilf, clfi, oilf, nilf
+ *  - PACK_STACK
+ *  - 64BIT
+ *
+ * Copyright IBM Corp. 2012,2015
  *
  * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
  */
+
+#define KMSG_COMPONENT "bpf_jit"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/netdevice.h>
-#include <linux/if_vlan.h>
 #include <linux/filter.h>
 #include <linux/init.h>
 #include <asm/cacheflush.h>
-#include <asm/facility.h>
 #include <asm/dis.h>
+#include "bpf_jit.h"
 
-/*
- * Conventions:
- *   %r2 = skb pointer
- *   %r3 = offset parameter
- *   %r4 = scratch register / length parameter
- *   %r5 = BPF A accumulator
- *   %r8 = return address
- *   %r9 = save register for skb pointer
- *   %r10 = skb->data
- *   %r11 = skb->len - skb->data_len (headlen)
- *   %r12 = BPF X accumulator
- *   %r13 = literal pool pointer
- *   0(%r15) - 63(%r15) scratch memory array with BPF_MEMWORDS
- */
 int bpf_jit_enable __read_mostly;
 
+struct bpf_jit {
+	u32 seen;		/* Flags to remember seen eBPF instructions */
+	u32 seen_reg[16];	/* Array to remember which registers are used */
+	u32 *addrs;		/* Array with relative instruction addresses */
+	u8 *prg_buf;		/* Start of program */
+	int size;		/* Size of program and literal pool */
+	int size_prg;		/* Size of program */
+	int prg;		/* Current position in program */
+	int lit_start;		/* Start of literal pool */
+	int lit;		/* Current position in literal pool */
+	int base_ip;		/* Base address for literal pool */
+	int ret0_ip;		/* Address of return 0 */
+	int exit_ip;		/* Address of exit */
+};
+
+#define BPF_SIZE_MAX	4096	/* Max size for program */
+
+#define SEEN_SKB	1	/* skb access */
+#define SEEN_MEM	2	/* use mem[] for temporary storage */
+#define SEEN_RET0	4	/* ret0_ip points to a valid return 0 */
+#define SEEN_LITERAL	8	/* code uses literals */
+#define SEEN_FUNC	16	/* calls C functions */
+#define SEEN_STACK	(SEEN_FUNC | SEEN_MEM | SEEN_SKB)
+
 /*
- * assembly code in arch/x86/net/bpf_jit.S
+ * s390 registers
  */
-extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
-extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[];
+#define REG_W0		(__MAX_BPF_REG+0)	/* Work register 1 (even) */
+#define REG_W1		(__MAX_BPF_REG+1)	/* Work register 2 (odd) */
+#define REG_SKB_DATA	(__MAX_BPF_REG+2)	/* SKB data register */
+#define REG_L		(__MAX_BPF_REG+3)	/* Literal pool register */
+#define REG_15		(__MAX_BPF_REG+4)	/* Register 15 */
+#define REG_0		REG_W0			/* Register 0 */
+#define REG_2		BPF_REG_1		/* Register 2 */
+#define REG_14		BPF_REG_0		/* Register 14 */
 
-struct bpf_jit {
-	unsigned int seen;
-	u8 *start;
-	u8 *prg;
-	u8 *mid;
-	u8 *lit;
-	u8 *end;
-	u8 *base_ip;
-	u8 *ret0_ip;
-	u8 *exit_ip;
-	unsigned int off_load_word;
-	unsigned int off_load_half;
-	unsigned int off_load_byte;
-	unsigned int off_load_bmsh;
-	unsigned int off_load_iword;
-	unsigned int off_load_ihalf;
-	unsigned int off_load_ibyte;
+/*
+ * Mapping of BPF registers to s390 registers
+ */
+static const int reg2hex[] = {
+	/* Return code */
+	[BPF_REG_0]	= 14,
+	/* Function parameters */
+	[BPF_REG_1]	= 2,
+	[BPF_REG_2]	= 3,
+	[BPF_REG_3]	= 4,
+	[BPF_REG_4]	= 5,
+	[BPF_REG_5]	= 6,
+	/* Call saved registers */
+	[BPF_REG_6]	= 7,
+	[BPF_REG_7]	= 8,
+	[BPF_REG_8]	= 9,
+	[BPF_REG_9]	= 10,
+	/* BPF stack pointer */
+	[BPF_REG_FP]	= 13,
+	/* SKB data pointer */
+	[REG_SKB_DATA]	= 12,
+	/* Work registers for s390x backend */
+	[REG_W0]	= 0,
+	[REG_W1]	= 1,
+	[REG_L]		= 11,
+	[REG_15]	= 15,
 };
 
-#define BPF_SIZE_MAX	4096	/* Max size for program */
+static inline u32 reg(u32 dst_reg, u32 src_reg)
+{
+	return reg2hex[dst_reg] << 4 | reg2hex[src_reg];
+}
+
+static inline u32 reg_high(u32 reg)
+{
+	return reg2hex[reg] << 4;
+}
+
+static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
+{
+	u32 r1 = reg2hex[b1];
+
+	if (!jit->seen_reg[r1] && r1 >= 6 && r1 <= 15)
+		jit->seen_reg[r1] = 1;
+}
+
+#define REG_SET_SEEN(b1)					\
+({								\
+	reg_set_seen(jit, b1);					\
+})
+
+#define REG_SEEN(b1) jit->seen_reg[reg2hex[(b1)]]
+
+/*
+ * EMIT macros for code generation
+ */
+
+#define _EMIT2(op)						\
+({								\
+	if (jit->prg_buf)					\
+		*(u16 *) (jit->prg_buf + jit->prg) = op;	\
+	jit->prg += 2;						\
+})
 
-#define SEEN_DATAREF	1	/* might call external helpers */
-#define SEEN_XREG	2	/* ebx is used */
-#define SEEN_MEM	4	/* use mem[] for temporary storage */
-#define SEEN_RET0	8	/* pc_ret0 points to a valid return 0 */
-#define SEEN_LITERAL	16	/* code uses literals */
-#define SEEN_LOAD_WORD	32	/* code uses sk_load_word */
-#define SEEN_LOAD_HALF	64	/* code uses sk_load_half */
-#define SEEN_LOAD_BYTE	128	/* code uses sk_load_byte */
-#define SEEN_LOAD_BMSH	256	/* code uses sk_load_byte_msh */
-#define SEEN_LOAD_IWORD	512	/* code uses sk_load_word_ind */
-#define SEEN_LOAD_IHALF	1024	/* code uses sk_load_half_ind */
-#define SEEN_LOAD_IBYTE	2048	/* code uses sk_load_byte_ind */
-
-#define EMIT2(op)					\
-({							\
-	if (jit->prg + 2 <= jit->mid)			\
-		*(u16 *) jit->prg = op;			\
-	jit->prg += 2;					\
+#define EMIT2(op, b1, b2)					\
+({								\
+	_EMIT2(op | reg(b1, b2));				\
+	REG_SET_SEEN(b1);					\
+	REG_SET_SEEN(b2);					\
 })
 
-#define EMIT4(op)					\
-({							\
-	if (jit->prg + 4 <= jit->mid)			\
-		*(u32 *) jit->prg = op;			\
-	jit->prg += 4;					\
+#define _EMIT4(op)						\
+({								\
+	if (jit->prg_buf)					\
+		*(u32 *) (jit->prg_buf + jit->prg) = op;	\
+	jit->prg += 4;						\
 })
 
-#define EMIT4_DISP(op, disp)				\
-({							\
-	unsigned int __disp = (disp) & 0xfff;		\
-	EMIT4(op | __disp);				\
+#define EMIT4(op, b1, b2)					\
+({								\
+	_EMIT4(op | reg(b1, b2));				\
+	REG_SET_SEEN(b1);					\
+	REG_SET_SEEN(b2);					\
 })
 
-#define EMIT4_IMM(op, imm)				\
-({							\
-	unsigned int __imm = (imm) & 0xffff;		\
-	EMIT4(op | __imm);				\
+#define EMIT4_RRF(op, b1, b2, b3)				\
+({								\
+	_EMIT4(op | reg_high(b3) << 8 | reg(b1, b2));		\
+	REG_SET_SEEN(b1);					\
+	REG_SET_SEEN(b2);					\
+	REG_SET_SEEN(b3);					\
 })
 
-#define EMIT4_PCREL(op, pcrel)				\
-({							\
-	long __pcrel = ((pcrel) >> 1) & 0xffff;		\
-	EMIT4(op | __pcrel);				\
+#define _EMIT4_DISP(op, disp)					\
+({								\
+	unsigned int __disp = (disp) & 0xfff;			\
+	_EMIT4(op | __disp);					\
 })
 
-#define EMIT6(op1, op2)					\
-({							\
-	if (jit->prg + 6 <= jit->mid) {			\
-		*(u32 *) jit->prg = op1;		\
-		*(u16 *) (jit->prg + 4) = op2;		\
-	}						\
-	jit->prg += 6;					\
+#define EMIT4_DISP(op, b1, b2, disp)				\
+({								\
+	_EMIT4_DISP(op | reg_high(b1) << 16 |			\
+		    reg_high(b2) << 8, disp);			\
+	REG_SET_SEEN(b1);					\
+	REG_SET_SEEN(b2);					\
 })
 
-#define EMIT6_DISP(op1, op2, disp)			\
-({							\
-	unsigned int __disp = (disp) & 0xfff;		\
-	EMIT6(op1 | __disp, op2);			\
+#define EMIT4_IMM(op, b1, imm)					\
+({								\
+	unsigned int __imm = (imm) & 0xffff;			\
+	_EMIT4(op | reg_high(b1) << 16 | __imm);		\
+	REG_SET_SEEN(b1);					\
 })
 
-#define EMIT6_IMM(op, imm)				\
-({							\
-	unsigned int __imm = (imm);			\
-	EMIT6(op | (__imm >> 16), __imm & 0xffff);	\
+#define EMIT4_PCREL(op, pcrel)					\
+({								\
+	long __pcrel = ((pcrel) >> 1) & 0xffff;			\
+	_EMIT4(op | __pcrel);					\
 })
 
-#define EMIT_CONST(val)					\
-({							\
-	unsigned int ret;				\
-	ret = (unsigned int) (jit->lit - jit->base_ip);	\
-	jit->seen |= SEEN_LITERAL;			\
-	if (jit->lit + 4 <= jit->end)			\
-		*(u32 *) jit->lit = val;		\
-	jit->lit += 4;					\
-	ret;						\
+#define _EMIT6(op1, op2)					\
+({								\
+	if (jit->prg_buf) {					\
+		*(u32 *) (jit->prg_buf + jit->prg) = op1;	\
+		*(u16 *) (jit->prg_buf + jit->prg + 4) = op2;	\
+	}							\
+	jit->prg += 6;						\
 })
 
-#define EMIT_FN_CONST(bit, fn)				\
-({							\
-	unsigned int ret;				\
-	ret = (unsigned int) (jit->lit - jit->base_ip);	\
-	if (jit->seen & bit) {				\
-		jit->seen |= SEEN_LITERAL;		\
-		if (jit->lit + 8 <= jit->end)		\
-			*(void **) jit->lit = fn;	\
-		jit->lit += 8;				\
-	}						\
-	ret;						\
+#define _EMIT6_DISP(op1, op2, disp)				\
+({								\
+	unsigned int __disp = (disp) & 0xfff;			\
+	_EMIT6(op1 | __disp, op2);				\
 })
 
-static void bpf_jit_fill_hole(void *area, unsigned int size)
+#define EMIT6_DISP(op1, op2, b1, b2, b3, disp)			\
+({								\
+	_EMIT6_DISP(op1 | reg(b1, b2) << 16 |			\
+		    reg_high(b3) << 8, op2, disp);		\
+	REG_SET_SEEN(b1);					\
+	REG_SET_SEEN(b2);					\
+	REG_SET_SEEN(b3);					\
+})
+
+#define _EMIT6_DISP_LH(op1, op2, disp)				\
+({								\
+	unsigned int __disp_h = ((u32)disp) & 0xff000;		\
+	unsigned int __disp_l = ((u32)disp) & 0x00fff;		\
+	_EMIT6(op1 | __disp_l, op2 | __disp_h >> 4);		\
+})
+
+#define EMIT6_DISP_LH(op1, op2, b1, b2, b3, disp)		\
+({								\
+	_EMIT6_DISP_LH(op1 | reg(b1, b2) << 16 |		\
+		       reg_high(b3) << 8, op2, disp);		\
+	REG_SET_SEEN(b1);					\
+	REG_SET_SEEN(b2);					\
+	REG_SET_SEEN(b3);					\
+})
+
+#define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask)		\
+({								\
+	/* Branch instruction needs 6 bytes */			\
+	int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\
+	_EMIT6(op1 | reg(b1, b2) << 16 | rel, op2 | mask);	\
+	REG_SET_SEEN(b1);					\
+	REG_SET_SEEN(b2);					\
+})
+
+#define _EMIT6_IMM(op, imm)					\
+({								\
+	unsigned int __imm = (imm);				\
+	_EMIT6(op | (__imm >> 16), __imm & 0xffff);		\
+})
+
+#define EMIT6_IMM(op, b1, imm)					\
+({								\
+	_EMIT6_IMM(op | reg_high(b1) << 16, imm);		\
+	REG_SET_SEEN(b1);					\
+})
+
+#define EMIT_CONST_U32(val)					\
+({								\
+	unsigned int ret;					\
+	ret = jit->lit - jit->base_ip;				\
+	jit->seen |= SEEN_LITERAL;				\
+	if (jit->prg_buf)					\
+		*(u32 *) (jit->prg_buf + jit->lit) = (u32) val;	\
+	jit->lit += 4;						\
+	ret;							\
+})
+
+#define EMIT_CONST_U64(val)					\
+({								\
+	unsigned int ret;					\
+	ret = jit->lit - jit->base_ip;				\
+	jit->seen |= SEEN_LITERAL;				\
+	if (jit->prg_buf)					\
+		*(u64 *) (jit->prg_buf + jit->lit) = (u64) val;	\
+	jit->lit += 8;						\
+	ret;							\
+})
+
+#define EMIT_ZERO(b1)						\
+({								\
+	/* llgfr %dst,%dst (zero extend to 64 bit) */		\
+	EMIT4(0xb9160000, b1, b1);				\
+	REG_SET_SEEN(b1);					\
+})
+
+/*
+ * Fill whole space with illegal instructions
+ */
+static void jit_fill_hole(void *area, unsigned int size)
 {
-	/* Fill whole space with illegal instructions */
 	memset(area, 0, size);
 }
 
-static void bpf_jit_prologue(struct bpf_jit *jit)
+/*
+ * Save registers from "rs" (register start) to "re" (register end) on stack
+ */
+static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
+{
+	u32 off = 72 + (rs - 6) * 8;
+
+	if (rs == re)
+		/* stg %rs,off(%r15) */
+		_EMIT6(0xe300f000 | rs << 20 | off, 0x0024);
+	else
+		/* stmg %rs,%re,off(%r15) */
+		_EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0024, off);
+}
+
+/*
+ * Restore registers from "rs" (register start) to "re" (register end) on stack
+ */
+static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re)
 {
-	/* Save registers and create stack frame if necessary */
-	if (jit->seen & SEEN_DATAREF) {
-		/* stmg %r8,%r15,88(%r15) */
-		EMIT6(0xeb8ff058, 0x0024);
-		/* lgr %r14,%r15 */
-		EMIT4(0xb90400ef);
-		/* aghi %r15,<offset> */
-		EMIT4_IMM(0xa7fb0000, (jit->seen & SEEN_MEM) ? -112 : -80);
-		/* stg %r14,152(%r15) */
-		EMIT6(0xe3e0f098, 0x0024);
-	} else if ((jit->seen & SEEN_XREG) && (jit->seen & SEEN_LITERAL))
-		/* stmg %r12,%r13,120(%r15) */
-		EMIT6(0xebcdf078, 0x0024);
-	else if (jit->seen & SEEN_XREG)
-		/* stg %r12,120(%r15) */
-		EMIT6(0xe3c0f078, 0x0024);
-	else if (jit->seen & SEEN_LITERAL)
-		/* stg %r13,128(%r15) */
-		EMIT6(0xe3d0f080, 0x0024);
+	u32 off = 72 + (rs - 6) * 8;
+
+	if (jit->seen & SEEN_STACK)
+		off += STK_OFF;
+
+	if (rs == re)
+		/* lg %rs,off(%r15) */
+		_EMIT6(0xe300f000 | rs << 20 | off, 0x0004);
+	else
+		/* lmg %rs,%re,off(%r15) */
+		_EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0004, off);
+}
 
+/*
+ * Return first seen register (from start)
+ */
+static int get_start(struct bpf_jit *jit, int start)
+{
+	int i;
+
+	for (i = start; i <= 15; i++) {
+		if (jit->seen_reg[i])
+			return i;
+	}
+	return 0;
+}
+
+/*
+ * Return last seen register (from start) (gap >= 2)
+ */
+static int get_end(struct bpf_jit *jit, int start)
+{
+	int i;
+
+	for (i = start; i < 15; i++) {
+		if (!jit->seen_reg[i] && !jit->seen_reg[i + 1])
+			return i - 1;
+	}
+	return jit->seen_reg[15] ? 15 : 14;
+}
+
+#define REGS_SAVE	1
+#define REGS_RESTORE	0
+/*
+ * Save and restore clobbered registers (6-15) on stack.
+ * We save/restore registers in chunks with gap >= 2 registers.
+ */
+static void save_restore_regs(struct bpf_jit *jit, int op)
+{
+
+	int re = 6, rs;
+
+	do {
+		rs = get_start(jit, re);
+		if (!rs)
+			break;
+		re = get_end(jit, rs + 1);
+		if (op == REGS_SAVE)
+			save_regs(jit, rs, re);
+		else
+			restore_regs(jit, rs, re);
+		re++;
+	} while (re <= 15);
+}
+
+/*
+ * Emit function prologue
+ *
+ * Save registers and create stack frame if necessary.
+ * See stack frame layout desription in "bpf_jit.h"!
+ */
+static void bpf_jit_prologue(struct bpf_jit *jit)
+{
+	/* Save registers */
+	save_restore_regs(jit, REGS_SAVE);
 	/* Setup literal pool */
 	if (jit->seen & SEEN_LITERAL) {
 		/* basr %r13,0 */
-		EMIT2(0x0dd0);
+		EMIT2(0x0d00, REG_L, REG_0);
 		jit->base_ip = jit->prg;
 	}
-	jit->off_load_word = EMIT_FN_CONST(SEEN_LOAD_WORD, sk_load_word);
-	jit->off_load_half = EMIT_FN_CONST(SEEN_LOAD_HALF, sk_load_half);
-	jit->off_load_byte = EMIT_FN_CONST(SEEN_LOAD_BYTE, sk_load_byte);
-	jit->off_load_bmsh = EMIT_FN_CONST(SEEN_LOAD_BMSH, sk_load_byte_msh);
-	jit->off_load_iword = EMIT_FN_CONST(SEEN_LOAD_IWORD, sk_load_word_ind);
-	jit->off_load_ihalf = EMIT_FN_CONST(SEEN_LOAD_IHALF, sk_load_half_ind);
-	jit->off_load_ibyte = EMIT_FN_CONST(SEEN_LOAD_IBYTE, sk_load_byte_ind);
-
-	/* Filter needs to access skb data */
-	if (jit->seen & SEEN_DATAREF) {
-		/* l %r11,<len>(%r2) */
-		EMIT4_DISP(0x58b02000, offsetof(struct sk_buff, len));
-		/* s %r11,<data_len>(%r2) */
-		EMIT4_DISP(0x5bb02000, offsetof(struct sk_buff, data_len));
-		/* lg %r10,<data>(%r2) */
-		EMIT6_DISP(0xe3a02000, 0x0004,
-			   offsetof(struct sk_buff, data));
+	/* Setup stack and backchain */
+	if (jit->seen & SEEN_STACK) {
+		/* lgr %bfp,%r15 (BPF frame pointer) */
+		EMIT4(0xb9040000, BPF_REG_FP, REG_15);
+		/* aghi %r15,-STK_OFF */
+		EMIT4_IMM(0xa70b0000, REG_15, -STK_OFF);
+		if (jit->seen & SEEN_FUNC)
+			/* stg %bfp,152(%r15) (backchain) */
+			EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_FP, REG_0,
+				      REG_15, 152);
+	}
+	/*
+	 * For SKB access %b1 contains the SKB pointer. For "bpf_jit.S"
+	 * we store the SKB header length on the stack and the SKB data
+	 * pointer in REG_SKB_DATA.
+	 */
+	if (jit->seen & SEEN_SKB) {
+		/* Header length: llgf %w1,<len>(%b1) */
+		EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_1,
+			      offsetof(struct sk_buff, len));
+		/* s %w1,<data_len>(%b1) */
+		EMIT4_DISP(0x5b000000, REG_W1, BPF_REG_1,
+			   offsetof(struct sk_buff, data_len));
+		/* stg %w1,ST_OFF_HLEN(%r0,%r15) */
+		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15,
+			      STK_OFF_HLEN);
+		/* lg %skb_data,data_off(%b1) */
+		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0,
+			      BPF_REG_1, offsetof(struct sk_buff, data));
 	}
+	/* BPF compatibility: clear A (%b7) and X (%b8) registers */
+	if (REG_SEEN(BPF_REG_7))
+		/* lghi %b7,0 */
+		EMIT4_IMM(0xa7090000, BPF_REG_7, 0);
+	if (REG_SEEN(BPF_REG_8))
+		/* lghi %b8,0 */
+		EMIT4_IMM(0xa7090000, BPF_REG_8, 0);
 }
 
+/*
+ * Function epilogue
+ */
 static void bpf_jit_epilogue(struct bpf_jit *jit)
 {
 	/* Return 0 */
 	if (jit->seen & SEEN_RET0) {
 		jit->ret0_ip = jit->prg;
-		/* lghi %r2,0 */
-		EMIT4(0xa7290000);
+		/* lghi %b0,0 */
+		EMIT4_IMM(0xa7090000, BPF_REG_0, 0);
 	}
 	jit->exit_ip = jit->prg;
+	/* Load exit code: lgr %r2,%b0 */
+	EMIT4(0xb9040000, REG_2, BPF_REG_0);
 	/* Restore registers */
-	if (jit->seen & SEEN_DATAREF)
-		/* lmg %r8,%r15,<offset>(%r15) */
-		EMIT6_DISP(0xeb8ff000, 0x0004,
-			   (jit->seen & SEEN_MEM) ? 200 : 168);
-	else if ((jit->seen & SEEN_XREG) && (jit->seen & SEEN_LITERAL))
-		/* lmg %r12,%r13,120(%r15) */
-		EMIT6(0xebcdf078, 0x0004);
-	else if (jit->seen & SEEN_XREG)
-		/* lg %r12,120(%r15) */
-		EMIT6(0xe3c0f078, 0x0004);
-	else if (jit->seen & SEEN_LITERAL)
-		/* lg %r13,128(%r15) */
-		EMIT6(0xe3d0f080, 0x0004);
+	save_restore_regs(jit, REGS_RESTORE);
 	/* br %r14 */
-	EMIT2(0x07fe);
+	_EMIT2(0x07fe);
 }
 
 /*
- * make sure we dont leak kernel information to user
+ * Compile one eBPF instruction into s390x code
  */
-static void bpf_jit_noleaks(struct bpf_jit *jit, struct sock_filter *filter)
+static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
 {
-	/* Clear temporary memory if (seen & SEEN_MEM) */
-	if (jit->seen & SEEN_MEM)
-		/* xc 0(64,%r15),0(%r15) */
-		EMIT6(0xd73ff000, 0xf000);
-	/* Clear X if (seen & SEEN_XREG) */
-	if (jit->seen & SEEN_XREG)
-		/* lhi %r12,0 */
-		EMIT4(0xa7c80000);
-	/* Clear A if the first register does not set it. */
-	switch (filter[0].code) {
-	case BPF_LD | BPF_W | BPF_ABS:
-	case BPF_LD | BPF_H | BPF_ABS:
-	case BPF_LD | BPF_B | BPF_ABS:
-	case BPF_LD | BPF_W | BPF_LEN:
-	case BPF_LD | BPF_W | BPF_IND:
-	case BPF_LD | BPF_H | BPF_IND:
-	case BPF_LD | BPF_B | BPF_IND:
-	case BPF_LD | BPF_IMM:
-	case BPF_LD | BPF_MEM:
-	case BPF_MISC | BPF_TXA:
-	case BPF_RET | BPF_K:
-		/* first instruction sets A register */
-		break;
-	default: /* A = 0 */
-		/* lhi %r5,0 */
-		EMIT4(0xa7580000);
-	}
-}
+	struct bpf_insn *insn = &fp->insnsi[i];
+	int jmp_off, last, insn_count = 1;
+	unsigned int func_addr, mask;
+	u32 dst_reg = insn->dst_reg;
+	u32 src_reg = insn->src_reg;
+	u32 *addrs = jit->addrs;
+	s32 imm = insn->imm;
+	s16 off = insn->off;
 
-static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
-			unsigned int *addrs, int i, int last)
-{
-	unsigned int K;
-	int offset;
-	unsigned int mask;
-	u16 code;
-
-	K = filter->k;
-	code = bpf_anc_helper(filter);
-
-	switch (code) {
-	case BPF_ALU | BPF_ADD | BPF_X: /* A += X */
-		jit->seen |= SEEN_XREG;
-		/* ar %r5,%r12 */
-		EMIT2(0x1a5c);
-		break;
-	case BPF_ALU | BPF_ADD | BPF_K: /* A += K */
-		if (!K)
+	switch (insn->code) {
+	/*
+	 * BPF_MOV
+	 */
+	case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */
+		/* llgfr %dst,%src */
+		EMIT4(0xb9160000, dst_reg, src_reg);
+		break;
+	case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
+		/* lgr %dst,%src */
+		EMIT4(0xb9040000, dst_reg, src_reg);
+		break;
+	case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */
+		/* llilf %dst,imm */
+		EMIT6_IMM(0xc00f0000, dst_reg, imm);
+		break;
+	case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = imm */
+		/* lgfi %dst,imm */
+		EMIT6_IMM(0xc0010000, dst_reg, imm);
+		break;
+	/*
+	 * BPF_LD 64
+	 */
+	case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
+	{
+		/* 16 byte instruction that uses two 'struct bpf_insn' */
+		u64 imm64;
+
+		imm64 = (u64)(u32) insn[0].imm | ((u64)(u32) insn[1].imm) << 32;
+		/* lg %dst,<d(imm)>(%l) */
+		EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, REG_0, REG_L,
+			      EMIT_CONST_U64(imm64));
+		insn_count = 2;
+		break;
+	}
+	/*
+	 * BPF_ADD
+	 */
+	case BPF_ALU | BPF_ADD | BPF_X: /* dst = (u32) dst + (u32) src */
+		/* ar %dst,%src */
+		EMIT2(0x1a00, dst_reg, src_reg);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_ADD | BPF_X: /* dst = dst + src */
+		/* agr %dst,%src */
+		EMIT4(0xb9080000, dst_reg, src_reg);
+		break;
+	case BPF_ALU | BPF_ADD | BPF_K: /* dst = (u32) dst + (u32) imm */
+		if (!imm)
 			break;
-		if (K <= 16383)
-			/* ahi %r5,<K> */
-			EMIT4_IMM(0xa75a0000, K);
-		else if (test_facility(21))
-			/* alfi %r5,<K> */
-			EMIT6_IMM(0xc25b0000, K);
-		else
-			/* a %r5,<d(K)>(%r13) */
-			EMIT4_DISP(0x5a50d000, EMIT_CONST(K));
+		/* alfi %dst,imm */
+		EMIT6_IMM(0xc20b0000, dst_reg, imm);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_ADD | BPF_K: /* dst = dst + imm */
+		if (!imm)
+			break;
+		/* agfi %dst,imm */
+		EMIT6_IMM(0xc2080000, dst_reg, imm);
+		break;
+	/*
+	 * BPF_SUB
+	 */
+	case BPF_ALU | BPF_SUB | BPF_X: /* dst = (u32) dst - (u32) src */
+		/* sr %dst,%src */
+		EMIT2(0x1b00, dst_reg, src_reg);
+		EMIT_ZERO(dst_reg);
 		break;
-	case BPF_ALU | BPF_SUB | BPF_X: /* A -= X */
-		jit->seen |= SEEN_XREG;
-		/* sr %r5,%r12 */
-		EMIT2(0x1b5c);
+	case BPF_ALU64 | BPF_SUB | BPF_X: /* dst = dst - src */
+		/* sgr %dst,%src */
+		EMIT4(0xb9090000, dst_reg, src_reg);
 		break;
-	case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */
-		if (!K)
+	case BPF_ALU | BPF_SUB | BPF_K: /* dst = (u32) dst - (u32) imm */
+		if (!imm)
 			break;
-		if (K <= 16384)
-			/* ahi %r5,-K */
-			EMIT4_IMM(0xa75a0000, -K);
-		else if (test_facility(21))
-			/* alfi %r5,-K */
-			EMIT6_IMM(0xc25b0000, -K);
-		else
-			/* s %r5,<d(K)>(%r13) */
-			EMIT4_DISP(0x5b50d000, EMIT_CONST(K));
-		break;
-	case BPF_ALU | BPF_MUL | BPF_X: /* A *= X */
-		jit->seen |= SEEN_XREG;
-		/* msr %r5,%r12 */
-		EMIT4(0xb252005c);
-		break;
-	case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */
-		if (K <= 16383)
-			/* mhi %r5,K */
-			EMIT4_IMM(0xa75c0000, K);
-		else if (test_facility(34))
-			/* msfi %r5,<K> */
-			EMIT6_IMM(0xc2510000, K);
-		else
-			/* ms %r5,<d(K)>(%r13) */
-			EMIT4_DISP(0x7150d000, EMIT_CONST(K));
+		/* alfi %dst,-imm */
+		EMIT6_IMM(0xc20b0000, dst_reg, -imm);
+		EMIT_ZERO(dst_reg);
 		break;
-	case BPF_ALU | BPF_DIV | BPF_X: /* A /= X */
-		jit->seen |= SEEN_XREG | SEEN_RET0;
-		/* ltr %r12,%r12 */
-		EMIT2(0x12cc);
-		/* jz <ret0> */
-		EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg));
-		/* lhi %r4,0 */
-		EMIT4(0xa7480000);
-		/* dlr %r4,%r12 */
-		EMIT4(0xb997004c);
-		break;
-	case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */
-		if (K == 1)
+	case BPF_ALU64 | BPF_SUB | BPF_K: /* dst = dst - imm */
+		if (!imm)
+			break;
+		/* agfi %dst,-imm */
+		EMIT6_IMM(0xc2080000, dst_reg, -imm);
+		break;
+	/*
+	 * BPF_MUL
+	 */
+	case BPF_ALU | BPF_MUL | BPF_X: /* dst = (u32) dst * (u32) src */
+		/* msr %dst,%src */
+		EMIT4(0xb2520000, dst_reg, src_reg);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_MUL | BPF_X: /* dst = dst * src */
+		/* msgr %dst,%src */
+		EMIT4(0xb90c0000, dst_reg, src_reg);
+		break;
+	case BPF_ALU | BPF_MUL | BPF_K: /* dst = (u32) dst * (u32) imm */
+		if (imm == 1)
+			break;
+		/* msfi %r5,imm */
+		EMIT6_IMM(0xc2010000, dst_reg, imm);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_MUL | BPF_K: /* dst = dst * imm */
+		if (imm == 1)
 			break;
-		/* lhi %r4,0 */
-		EMIT4(0xa7480000);
-		/* dl %r4,<d(K)>(%r13) */
-		EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K));
-		break;
-	case BPF_ALU | BPF_MOD | BPF_X: /* A %= X */
-		jit->seen |= SEEN_XREG | SEEN_RET0;
-		/* ltr %r12,%r12 */
-		EMIT2(0x12cc);
+		/* msgfi %dst,imm */
+		EMIT6_IMM(0xc2000000, dst_reg, imm);
+		break;
+	/*
+	 * BPF_DIV / BPF_MOD
+	 */
+	case BPF_ALU | BPF_DIV | BPF_X: /* dst = (u32) dst / (u32) src */
+	case BPF_ALU | BPF_MOD | BPF_X: /* dst = (u32) dst % (u32) src */
+	{
+		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
+
+		jit->seen |= SEEN_RET0;
+		/* ltr %src,%src (if src == 0 goto fail) */
+		EMIT2(0x1200, src_reg, src_reg);
+		/* jz <ret0> */
+		EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
+		/* lhi %w0,0 */
+		EMIT4_IMM(0xa7080000, REG_W0, 0);
+		/* lr %w1,%dst */
+		EMIT2(0x1800, REG_W1, dst_reg);
+		/* dlr %w0,%src */
+		EMIT4(0xb9970000, REG_W0, src_reg);
+		/* llgfr %dst,%rc */
+		EMIT4(0xb9160000, dst_reg, rc_reg);
+		break;
+	}
+	case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / (u32) src */
+	case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % (u32) src */
+	{
+		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
+
+		jit->seen |= SEEN_RET0;
+		/* ltgr %src,%src (if src == 0 goto fail) */
+		EMIT4(0xb9020000, src_reg, src_reg);
 		/* jz <ret0> */
-		EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg));
-		/* lhi %r4,0 */
-		EMIT4(0xa7480000);
-		/* dlr %r4,%r12 */
-		EMIT4(0xb997004c);
-		/* lr %r5,%r4 */
-		EMIT2(0x1854);
-		break;
-	case BPF_ALU | BPF_MOD | BPF_K: /* A %= K */
-		if (K == 1) {
-			/* lhi %r5,0 */
-			EMIT4(0xa7580000);
+		EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
+		/* lghi %w0,0 */
+		EMIT4_IMM(0xa7090000, REG_W0, 0);
+		/* lgr %w1,%dst */
+		EMIT4(0xb9040000, REG_W1, dst_reg);
+		/* llgfr %dst,%src (u32 cast) */
+		EMIT4(0xb9160000, dst_reg, src_reg);
+		/* dlgr %w0,%dst */
+		EMIT4(0xb9870000, REG_W0, dst_reg);
+		/* lgr %dst,%rc */
+		EMIT4(0xb9040000, dst_reg, rc_reg);
+		break;
+	}
+	case BPF_ALU | BPF_DIV | BPF_K: /* dst = (u32) dst / (u32) imm */
+	case BPF_ALU | BPF_MOD | BPF_K: /* dst = (u32) dst % (u32) imm */
+	{
+		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
+
+		if (imm == 1) {
+			if (BPF_OP(insn->code) == BPF_MOD)
+				/* lhgi %dst,0 */
+				EMIT4_IMM(0xa7090000, dst_reg, 0);
 			break;
 		}
-		/* lhi %r4,0 */
-		EMIT4(0xa7480000);
-		/* dl %r4,<d(K)>(%r13) */
-		EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K));
-		/* lr %r5,%r4 */
-		EMIT2(0x1854);
-		break;
-	case BPF_ALU | BPF_AND | BPF_X: /* A &= X */
-		jit->seen |= SEEN_XREG;
-		/* nr %r5,%r12 */
-		EMIT2(0x145c);
-		break;
-	case BPF_ALU | BPF_AND | BPF_K: /* A &= K */
-		if (test_facility(21))
-			/* nilf %r5,<K> */
-			EMIT6_IMM(0xc05b0000, K);
-		else
-			/* n %r5,<d(K)>(%r13) */
-			EMIT4_DISP(0x5450d000, EMIT_CONST(K));
-		break;
-	case BPF_ALU | BPF_OR | BPF_X: /* A |= X */
-		jit->seen |= SEEN_XREG;
-		/* or %r5,%r12 */
-		EMIT2(0x165c);
-		break;
-	case BPF_ALU | BPF_OR | BPF_K: /* A |= K */
-		if (test_facility(21))
-			/* oilf %r5,<K> */
-			EMIT6_IMM(0xc05d0000, K);
-		else
-			/* o %r5,<d(K)>(%r13) */
-			EMIT4_DISP(0x5650d000, EMIT_CONST(K));
+		/* lhi %w0,0 */
+		EMIT4_IMM(0xa7080000, REG_W0, 0);
+		/* lr %w1,%dst */
+		EMIT2(0x1800, REG_W1, dst_reg);
+		/* dl %w0,<d(imm)>(%l) */
+		EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
+			      EMIT_CONST_U32(imm));
+		/* llgfr %dst,%rc */
+		EMIT4(0xb9160000, dst_reg, rc_reg);
+		break;
+	}
+	case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / (u32) imm */
+	case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % (u32) imm */
+	{
+		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
+
+		if (imm == 1) {
+			if (BPF_OP(insn->code) == BPF_MOD)
+				/* lhgi %dst,0 */
+				EMIT4_IMM(0xa7090000, dst_reg, 0);
+			break;
+		}
+		/* lghi %w0,0 */
+		EMIT4_IMM(0xa7090000, REG_W0, 0);
+		/* lgr %w1,%dst */
+		EMIT4(0xb9040000, REG_W1, dst_reg);
+		/* dlg %w0,<d(imm)>(%l) */
+		EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
+			      EMIT_CONST_U64((u32) imm));
+		/* lgr %dst,%rc */
+		EMIT4(0xb9040000, dst_reg, rc_reg);
+		break;
+	}
+	/*
+	 * BPF_AND
+	 */
+	case BPF_ALU | BPF_AND | BPF_X: /* dst = (u32) dst & (u32) src */
+		/* nr %dst,%src */
+		EMIT2(0x1400, dst_reg, src_reg);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
+		/* ngr %dst,%src */
+		EMIT4(0xb9800000, dst_reg, src_reg);
+		break;
+	case BPF_ALU | BPF_AND | BPF_K: /* dst = (u32) dst & (u32) imm */
+		/* nilf %dst,imm */
+		EMIT6_IMM(0xc00b0000, dst_reg, imm);
+		EMIT_ZERO(dst_reg);
 		break;
-	case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */
-	case BPF_ALU | BPF_XOR | BPF_X:
-		jit->seen |= SEEN_XREG;
-		/* xr %r5,%r12 */
-		EMIT2(0x175c);
+	case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
+		/* ng %dst,<d(imm)>(%l) */
+		EMIT6_DISP_LH(0xe3000000, 0x0080, dst_reg, REG_0, REG_L,
+			      EMIT_CONST_U64(imm));
 		break;
-	case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */
-		if (!K)
+	/*
+	 * BPF_OR
+	 */
+	case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
+		/* or %dst,%src */
+		EMIT2(0x1600, dst_reg, src_reg);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
+		/* ogr %dst,%src */
+		EMIT4(0xb9810000, dst_reg, src_reg);
+		break;
+	case BPF_ALU | BPF_OR | BPF_K: /* dst = (u32) dst | (u32) imm */
+		/* oilf %dst,imm */
+		EMIT6_IMM(0xc00d0000, dst_reg, imm);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_OR | BPF_K: /* dst = dst | imm */
+		/* og %dst,<d(imm)>(%l) */
+		EMIT6_DISP_LH(0xe3000000, 0x0081, dst_reg, REG_0, REG_L,
+			      EMIT_CONST_U64(imm));
+		break;
+	/*
+	 * BPF_XOR
+	 */
+	case BPF_ALU | BPF_XOR | BPF_X: /* dst = (u32) dst ^ (u32) src */
+		/* xr %dst,%src */
+		EMIT2(0x1700, dst_reg, src_reg);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_XOR | BPF_X: /* dst = dst ^ src */
+		/* xgr %dst,%src */
+		EMIT4(0xb9820000, dst_reg, src_reg);
+		break;
+	case BPF_ALU | BPF_XOR | BPF_K: /* dst = (u32) dst ^ (u32) imm */
+		if (!imm)
 			break;
-		/* x %r5,<d(K)>(%r13) */
-		EMIT4_DISP(0x5750d000, EMIT_CONST(K));
+		/* xilf %dst,imm */
+		EMIT6_IMM(0xc0070000, dst_reg, imm);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */
+		/* xg %dst,<d(imm)>(%l) */
+		EMIT6_DISP_LH(0xe3000000, 0x0082, dst_reg, REG_0, REG_L,
+			      EMIT_CONST_U64(imm));
+		break;
+	/*
+	 * BPF_LSH
+	 */
+	case BPF_ALU | BPF_LSH | BPF_X: /* dst = (u32) dst << (u32) src */
+		/* sll %dst,0(%src) */
+		EMIT4_DISP(0x89000000, dst_reg, src_reg, 0);
+		EMIT_ZERO(dst_reg);
 		break;
-	case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */
-		jit->seen |= SEEN_XREG;
-		/* sll %r5,0(%r12) */
-		EMIT4(0x8950c000);
+	case BPF_ALU64 | BPF_LSH | BPF_X: /* dst = dst << src */
+		/* sllg %dst,%dst,0(%src) */
+		EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, src_reg, 0);
 		break;
-	case BPF_ALU | BPF_LSH | BPF_K: /* A <<= K */
-		if (K == 0)
+	case BPF_ALU | BPF_LSH | BPF_K: /* dst = (u32) dst << (u32) imm */
+		if (imm == 0)
 			break;
-		/* sll %r5,K */
-		EMIT4_DISP(0x89500000, K);
+		/* sll %dst,imm(%r0) */
+		EMIT4_DISP(0x89000000, dst_reg, REG_0, imm);
+		EMIT_ZERO(dst_reg);
 		break;
-	case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */
-		jit->seen |= SEEN_XREG;
-		/* srl %r5,0(%r12) */
-		EMIT4(0x8850c000);
+	case BPF_ALU64 | BPF_LSH | BPF_K: /* dst = dst << imm */
+		if (imm == 0)
+			break;
+		/* sllg %dst,%dst,imm(%r0) */
+		EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, REG_0, imm);
+		break;
+	/*
+	 * BPF_RSH
+	 */
+	case BPF_ALU | BPF_RSH | BPF_X: /* dst = (u32) dst >> (u32) src */
+		/* srl %dst,0(%src) */
+		EMIT4_DISP(0x88000000, dst_reg, src_reg, 0);
+		EMIT_ZERO(dst_reg);
 		break;
-	case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */
-		if (K == 0)
+	case BPF_ALU64 | BPF_RSH | BPF_X: /* dst = dst >> src */
+		/* srlg %dst,%dst,0(%src) */
+		EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, src_reg, 0);
+		break;
+	case BPF_ALU | BPF_RSH | BPF_K: /* dst = (u32) dst >> (u32) imm */
+		if (imm == 0)
 			break;
-		/* srl %r5,K */
-		EMIT4_DISP(0x88500000, K);
-		break;
-	case BPF_ALU | BPF_NEG: /* A = -A */
-		/* lcr %r5,%r5 */
-		EMIT2(0x1355);
-		break;
-	case BPF_JMP | BPF_JA: /* ip += K */
-		offset = addrs[i + K] + jit->start - jit->prg;
-		EMIT4_PCREL(0xa7f40000, offset);
-		break;
-	case BPF_JMP | BPF_JGT | BPF_K: /* ip += (A > K) ? jt : jf */
-		mask = 0x200000; /* jh */
-		goto kbranch;
-	case BPF_JMP | BPF_JGE | BPF_K: /* ip += (A >= K) ? jt : jf */
-		mask = 0xa00000; /* jhe */
-		goto kbranch;
-	case BPF_JMP | BPF_JEQ | BPF_K: /* ip += (A == K) ? jt : jf */
-		mask = 0x800000; /* je */
-kbranch:	/* Emit compare if the branch targets are different */
-		if (filter->jt != filter->jf) {
-			if (test_facility(21))
-				/* clfi %r5,<K> */
-				EMIT6_IMM(0xc25f0000, K);
-			else
-				/* cl %r5,<d(K)>(%r13) */
-				EMIT4_DISP(0x5550d000, EMIT_CONST(K));
-		}
-branch:		if (filter->jt == filter->jf) {
-			if (filter->jt == 0)
-				break;
-			/* j <jt> */
-			offset = addrs[i + filter->jt] + jit->start - jit->prg;
-			EMIT4_PCREL(0xa7f40000, offset);
+		/* srl %dst,imm(%r0) */
+		EMIT4_DISP(0x88000000, dst_reg, REG_0, imm);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_RSH | BPF_K: /* dst = dst >> imm */
+		if (imm == 0)
 			break;
-		}
-		if (filter->jt != 0) {
-			/* brc	<mask>,<jt> */
-			offset = addrs[i + filter->jt] + jit->start - jit->prg;
-			EMIT4_PCREL(0xa7040000 | mask, offset);
-		}
-		if (filter->jf != 0) {
-			/* brc	<mask^15>,<jf> */
-			offset = addrs[i + filter->jf] + jit->start - jit->prg;
-			EMIT4_PCREL(0xa7040000 | (mask ^ 0xf00000), offset);
-		}
+		/* srlg %dst,%dst,imm(%r0) */
+		EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, REG_0, imm);
 		break;
-	case BPF_JMP | BPF_JSET | BPF_K: /* ip += (A & K) ? jt : jf */
-		mask = 0x700000; /* jnz */
-		/* Emit test if the branch targets are different */
-		if (filter->jt != filter->jf) {
-			if (K > 65535) {
-				/* lr %r4,%r5 */
-				EMIT2(0x1845);
-				/* n %r4,<d(K)>(%r13) */
-				EMIT4_DISP(0x5440d000, EMIT_CONST(K));
-			} else
-				/* tmll %r5,K */
-				EMIT4_IMM(0xa7510000, K);
-		}
-		goto branch;
-	case BPF_JMP | BPF_JGT | BPF_X: /* ip += (A > X) ? jt : jf */
-		mask = 0x200000; /* jh */
-		goto xbranch;
-	case BPF_JMP | BPF_JGE | BPF_X: /* ip += (A >= X) ? jt : jf */
-		mask = 0xa00000; /* jhe */
-		goto xbranch;
-	case BPF_JMP | BPF_JEQ | BPF_X: /* ip += (A == X) ? jt : jf */
-		mask = 0x800000; /* je */
-xbranch:	/* Emit compare if the branch targets are different */
-		if (filter->jt != filter->jf) {
-			jit->seen |= SEEN_XREG;
-			/* clr %r5,%r12 */
-			EMIT2(0x155c);
-		}
-		goto branch;
-	case BPF_JMP | BPF_JSET | BPF_X: /* ip += (A & X) ? jt : jf */
-		mask = 0x700000; /* jnz */
-		/* Emit test if the branch targets are different */
-		if (filter->jt != filter->jf) {
-			jit->seen |= SEEN_XREG;
-			/* lr %r4,%r5 */
-			EMIT2(0x1845);
-			/* nr %r4,%r12 */
-			EMIT2(0x144c);
+	/*
+	 * BPF_ARSH
+	 */
+	case BPF_ALU64 | BPF_ARSH | BPF_X: /* ((s64) dst) >>= src */
+		/* srag %dst,%dst,0(%src) */
+		EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, src_reg, 0);
+		break;
+	case BPF_ALU64 | BPF_ARSH | BPF_K: /* ((s64) dst) >>= imm */
+		if (imm == 0)
+			break;
+		/* srag %dst,%dst,imm(%r0) */
+		EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, REG_0, imm);
+		break;
+	/*
+	 * BPF_NEG
+	 */
+	case BPF_ALU | BPF_NEG: /* dst = (u32) -dst */
+		/* lcr %dst,%dst */
+		EMIT2(0x1300, dst_reg, dst_reg);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_NEG: /* dst = -dst */
+		/* lcgr %dst,%dst */
+		EMIT4(0xb9130000, dst_reg, dst_reg);
+		break;
+	/*
+	 * BPF_FROM_BE/LE
+	 */
+	case BPF_ALU | BPF_END | BPF_FROM_BE:
+		/* s390 is big endian, therefore only clear high order bytes */
+		switch (imm) {
+		case 16: /* dst = (u16) cpu_to_be16(dst) */
+			/* llghr %dst,%dst */
+			EMIT4(0xb9850000, dst_reg, dst_reg);
+			break;
+		case 32: /* dst = (u32) cpu_to_be32(dst) */
+			/* llgfr %dst,%dst */
+			EMIT4(0xb9160000, dst_reg, dst_reg);
+			break;
+		case 64: /* dst = (u64) cpu_to_be64(dst) */
+			break;
 		}
-		goto branch;
-	case BPF_LD | BPF_W | BPF_ABS: /* A = *(u32 *) (skb->data+K) */
-		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_WORD;
-		offset = jit->off_load_word;
-		goto load_abs;
-	case BPF_LD | BPF_H | BPF_ABS: /* A = *(u16 *) (skb->data+K) */
-		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_HALF;
-		offset = jit->off_load_half;
-		goto load_abs;
-	case BPF_LD | BPF_B | BPF_ABS: /* A = *(u8 *) (skb->data+K) */
-		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_BYTE;
-		offset = jit->off_load_byte;
-load_abs:	if ((int) K < 0)
-			goto out;
-call_fn:	/* lg %r1,<d(function)>(%r13) */
-		EMIT6_DISP(0xe310d000, 0x0004, offset);
-		/* l %r3,<d(K)>(%r13) */
-		EMIT4_DISP(0x5830d000, EMIT_CONST(K));
-		/* basr %r8,%r1 */
-		EMIT2(0x0d81);
-		/* jnz <ret0> */
-		EMIT4_PCREL(0xa7740000, (jit->ret0_ip - jit->prg));
 		break;
-	case BPF_LD | BPF_W | BPF_IND: /* A = *(u32 *) (skb->data+K+X) */
-		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IWORD;
-		offset = jit->off_load_iword;
-		goto call_fn;
-	case BPF_LD | BPF_H | BPF_IND: /* A = *(u16 *) (skb->data+K+X) */
-		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IHALF;
-		offset = jit->off_load_ihalf;
-		goto call_fn;
-	case BPF_LD | BPF_B | BPF_IND: /* A = *(u8 *) (skb->data+K+X) */
-		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IBYTE;
-		offset = jit->off_load_ibyte;
-		goto call_fn;
-	case BPF_LDX | BPF_B | BPF_MSH:
-		/* X = (*(u8 *)(skb->data+K) & 0xf) << 2 */
-		jit->seen |= SEEN_RET0;
-		if ((int) K < 0) {
-			/* j <ret0> */
-			EMIT4_PCREL(0xa7f40000, (jit->ret0_ip - jit->prg));
+	case BPF_ALU | BPF_END | BPF_FROM_LE:
+		switch (imm) {
+		case 16: /* dst = (u16) cpu_to_le16(dst) */
+			/* lrvr %dst,%dst */
+			EMIT4(0xb91f0000, dst_reg, dst_reg);
+			/* srl %dst,16(%r0) */
+			EMIT4_DISP(0x88000000, dst_reg, REG_0, 16);
+			/* llghr %dst,%dst */
+			EMIT4(0xb9850000, dst_reg, dst_reg);
+			break;
+		case 32: /* dst = (u32) cpu_to_le32(dst) */
+			/* lrvr %dst,%dst */
+			EMIT4(0xb91f0000, dst_reg, dst_reg);
+			/* llgfr %dst,%dst */
+			EMIT4(0xb9160000, dst_reg, dst_reg);
+			break;
+		case 64: /* dst = (u64) cpu_to_le64(dst) */
+			/* lrvgr %dst,%dst */
+			EMIT4(0xb90f0000, dst_reg, dst_reg);
 			break;
 		}
-		jit->seen |= SEEN_DATAREF | SEEN_LOAD_BMSH;
-		offset = jit->off_load_bmsh;
-		goto call_fn;
-	case BPF_LD | BPF_W | BPF_LEN: /*	A = skb->len; */
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
-		/* l %r5,<d(len)>(%r2) */
-		EMIT4_DISP(0x58502000, offsetof(struct sk_buff, len));
-		break;
-	case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */
-		jit->seen |= SEEN_XREG;
-		/* l %r12,<d(len)>(%r2) */
-		EMIT4_DISP(0x58c02000, offsetof(struct sk_buff, len));
-		break;
-	case BPF_LD | BPF_IMM: /* A = K */
-		if (K <= 16383)
-			/* lhi %r5,K */
-			EMIT4_IMM(0xa7580000, K);
-		else if (test_facility(21))
-			/* llilf %r5,<K> */
-			EMIT6_IMM(0xc05f0000, K);
-		else
-			/* l %r5,<d(K)>(%r13) */
-			EMIT4_DISP(0x5850d000, EMIT_CONST(K));
-		break;
-	case BPF_LDX | BPF_IMM: /* X = K */
-		jit->seen |= SEEN_XREG;
-		if (K <= 16383)
-			/* lhi %r12,<K> */
-			EMIT4_IMM(0xa7c80000, K);
-		else if (test_facility(21))
-			/* llilf %r12,<K> */
-			EMIT6_IMM(0xc0cf0000, K);
-		else
-			/* l %r12,<d(K)>(%r13) */
-			EMIT4_DISP(0x58c0d000, EMIT_CONST(K));
 		break;
-	case BPF_LD | BPF_MEM: /* A = mem[K] */
+	/*
+	 * BPF_ST(X)
+	 */
+	case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src_reg */
+		/* stcy %src,off(%dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0072, src_reg, dst_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
+		/* sthy %src,off(%dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0070, src_reg, dst_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
+		/* sty %src,off(%dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0050, src_reg, dst_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
+		/* stg %src,off(%dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0024, src_reg, dst_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
+		/* lhi %w0,imm */
+		EMIT4_IMM(0xa7080000, REG_W0, (u8) imm);
+		/* stcy %w0,off(dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0072, REG_W0, dst_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
+		/* lhi %w0,imm */
+		EMIT4_IMM(0xa7080000, REG_W0, (u16) imm);
+		/* sthy %w0,off(dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0070, REG_W0, dst_reg, REG_0, off);
 		jit->seen |= SEEN_MEM;
-		/* l %r5,<K>(%r15) */
-		EMIT4_DISP(0x5850f000,
-			   (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
-		break;
-	case BPF_LDX | BPF_MEM: /* X = mem[K] */
-		jit->seen |= SEEN_XREG | SEEN_MEM;
-		/* l %r12,<K>(%r15) */
-		EMIT4_DISP(0x58c0f000,
-			   (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
-		break;
-	case BPF_MISC | BPF_TAX: /* X = A */
-		jit->seen |= SEEN_XREG;
-		/* lr %r12,%r5 */
-		EMIT2(0x18c5);
-		break;
-	case BPF_MISC | BPF_TXA: /* A = X */
-		jit->seen |= SEEN_XREG;
-		/* lr %r5,%r12 */
-		EMIT2(0x185c);
-		break;
-	case BPF_RET | BPF_K:
-		if (K == 0) {
-			jit->seen |= SEEN_RET0;
-			if (last)
-				break;
-			/* j <ret0> */
-			EMIT4_PCREL(0xa7f40000, jit->ret0_ip - jit->prg);
-		} else {
-			if (K <= 16383)
-				/* lghi %r2,K */
-				EMIT4_IMM(0xa7290000, K);
-			else
-				/* llgf %r2,<K>(%r13) */
-				EMIT6_DISP(0xe320d000, 0x0016, EMIT_CONST(K));
-			/* j <exit> */
-			if (last && !(jit->seen & SEEN_RET0))
-				break;
-			EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
-		}
 		break;
-	case BPF_RET | BPF_A:
-		/* llgfr %r2,%r5 */
-		EMIT4(0xb9160025);
+	case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
+		/* llilf %w0,imm  */
+		EMIT6_IMM(0xc00f0000, REG_W0, (u32) imm);
+		/* sty %w0,off(%dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0050, REG_W0, dst_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
+		/* lgfi %w0,imm */
+		EMIT6_IMM(0xc0010000, REG_W0, imm);
+		/* stg %w0,off(%dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, dst_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	/*
+	 * BPF_STX XADD (atomic_add)
+	 */
+	case BPF_STX | BPF_XADD | BPF_W: /* *(u32 *)(dst + off) += src */
+		/* laal %w0,%src,off(%dst) */
+		EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W0, src_reg,
+			      dst_reg, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_STX | BPF_XADD | BPF_DW: /* *(u64 *)(dst + off) += src */
+		/* laalg %w0,%src,off(%dst) */
+		EMIT6_DISP_LH(0xeb000000, 0x00ea, REG_W0, src_reg,
+			      dst_reg, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	/*
+	 * BPF_LDX
+	 */
+	case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
+		/* llgc %dst,0(off,%src) */
+		EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
+		/* llgh %dst,0(off,%src) */
+		EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
+		/* llgf %dst,off(%src) */
+		jit->seen |= SEEN_MEM;
+		EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off);
+		break;
+	case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
+		/* lg %dst,0(off,%src) */
+		jit->seen |= SEEN_MEM;
+		EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg, REG_0, off);
+		break;
+	/*
+	 * BPF_JMP / CALL
+	 */
+	case BPF_JMP | BPF_CALL:
+	{
+		/*
+		 * b0 = (__bpf_call_base + imm)(b1, b2, b3, b4, b5)
+		 */
+		const u64 func = (u64)__bpf_call_base + imm;
+
+		REG_SET_SEEN(BPF_REG_5);
+		jit->seen |= SEEN_FUNC;
+		/* lg %w1,<d(imm)>(%l) */
+		EMIT6_DISP(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
+			   EMIT_CONST_U64(func));
+		/* basr %r14,%w1 */
+		EMIT2(0x0d00, REG_14, REG_W1);
+		/* lgr %b0,%r2: load return value into %b0 */
+		EMIT4(0xb9040000, BPF_REG_0, REG_2);
+		break;
+	}
+	case BPF_JMP | BPF_EXIT: /* return b0 */
+		last = (i == fp->len - 1) ? 1 : 0;
+		if (last && !(jit->seen & SEEN_RET0))
+			break;
 		/* j <exit> */
 		EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
 		break;
-	case BPF_ST: /* mem[K] = A */
-		jit->seen |= SEEN_MEM;
-		/* st %r5,<K>(%r15) */
-		EMIT4_DISP(0x5050f000,
-			   (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
-		break;
-	case BPF_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
-		jit->seen |= SEEN_XREG | SEEN_MEM;
-		/* st %r12,<K>(%r15) */
-		EMIT4_DISP(0x50c0f000,
-			   (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
-		break;
-	case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
-		/* lhi %r5,0 */
-		EMIT4(0xa7580000);
-		/* icm	%r5,3,<d(protocol)>(%r2) */
-		EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, protocol));
-		break;
-	case BPF_ANC | SKF_AD_IFINDEX:	/* if (!skb->dev) return 0;
-					 * A = skb->dev->ifindex */
-		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
-		jit->seen |= SEEN_RET0;
-		/* lg %r1,<d(dev)>(%r2) */
-		EMIT6_DISP(0xe3102000, 0x0004, offsetof(struct sk_buff, dev));
-		/* ltgr %r1,%r1 */
-		EMIT4(0xb9020011);
-		/* jz <ret0> */
-		EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
-		/* l %r5,<d(ifindex)>(%r1) */
-		EMIT4_DISP(0x58501000, offsetof(struct net_device, ifindex));
-		break;
-	case BPF_ANC | SKF_AD_MARK: /* A = skb->mark */
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
-		/* l %r5,<d(mark)>(%r2) */
-		EMIT4_DISP(0x58502000, offsetof(struct sk_buff, mark));
-		break;
-	case BPF_ANC | SKF_AD_QUEUE: /* A = skb->queue_mapping */
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
-		/* lhi %r5,0 */
-		EMIT4(0xa7580000);
-		/* icm	%r5,3,<d(queue_mapping)>(%r2) */
-		EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, queue_mapping));
-		break;
-	case BPF_ANC | SKF_AD_HATYPE:	/* if (!skb->dev) return 0;
-					 * A = skb->dev->type */
-		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
-		jit->seen |= SEEN_RET0;
-		/* lg %r1,<d(dev)>(%r2) */
-		EMIT6_DISP(0xe3102000, 0x0004, offsetof(struct sk_buff, dev));
-		/* ltgr %r1,%r1 */
-		EMIT4(0xb9020011);
-		/* jz <ret0> */
-		EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
-		/* lhi %r5,0 */
-		EMIT4(0xa7580000);
-		/* icm	%r5,3,<d(type)>(%r1) */
-		EMIT4_DISP(0xbf531000, offsetof(struct net_device, type));
-		break;
-	case BPF_ANC | SKF_AD_RXHASH: /* A = skb->hash */
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
-		/* l %r5,<d(hash)>(%r2) */
-		EMIT4_DISP(0x58502000, offsetof(struct sk_buff, hash));
-		break;
-	case BPF_ANC | SKF_AD_VLAN_TAG:
-	case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
-		BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
-		/* lhi %r5,0 */
-		EMIT4(0xa7580000);
-		/* icm	%r5,3,<d(vlan_tci)>(%r2) */
-		EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, vlan_tci));
-		if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) {
-			/* nill %r5,0xefff */
-			EMIT4_IMM(0xa5570000, ~VLAN_TAG_PRESENT);
-		} else {
-			/* nill %r5,0x1000 */
-			EMIT4_IMM(0xa5570000, VLAN_TAG_PRESENT);
-			/* srl %r5,12 */
-			EMIT4_DISP(0x88500000, 12);
-		}
+	/*
+	 * Branch relative (number of skipped instructions) to offset on
+	 * condition.
+	 *
+	 * Condition code to mask mapping:
+	 *
+	 * CC | Description	   | Mask
+	 * ------------------------------
+	 * 0  | Operands equal	   |	8
+	 * 1  | First operand low  |	4
+	 * 2  | First operand high |	2
+	 * 3  | Unused		   |	1
+	 *
+	 * For s390x relative branches: ip = ip + off_bytes
+	 * For BPF relative branches:	insn = insn + off_insns + 1
+	 *
+	 * For example for s390x with offset 0 we jump to the branch
+	 * instruction itself (loop) and for BPF with offset 0 we
+	 * branch to the instruction behind the branch.
+	 */
+	case BPF_JMP | BPF_JA: /* if (true) */
+		mask = 0xf000; /* j */
+		goto branch_oc;
+	case BPF_JMP | BPF_JSGT | BPF_K: /* ((s64) dst > (s64) imm) */
+		mask = 0x2000; /* jh */
+		goto branch_ks;
+	case BPF_JMP | BPF_JSGE | BPF_K: /* ((s64) dst >= (s64) imm) */
+		mask = 0xa000; /* jhe */
+		goto branch_ks;
+	case BPF_JMP | BPF_JGT | BPF_K: /* (dst_reg > imm) */
+		mask = 0x2000; /* jh */
+		goto branch_ku;
+	case BPF_JMP | BPF_JGE | BPF_K: /* (dst_reg >= imm) */
+		mask = 0xa000; /* jhe */
+		goto branch_ku;
+	case BPF_JMP | BPF_JNE | BPF_K: /* (dst_reg != imm) */
+		mask = 0x7000; /* jne */
+		goto branch_ku;
+	case BPF_JMP | BPF_JEQ | BPF_K: /* (dst_reg == imm) */
+		mask = 0x8000; /* je */
+		goto branch_ku;
+	case BPF_JMP | BPF_JSET | BPF_K: /* (dst_reg & imm) */
+		mask = 0x7000; /* jnz */
+		/* lgfi %w1,imm (load sign extend imm) */
+		EMIT6_IMM(0xc0010000, REG_W1, imm);
+		/* ngr %w1,%dst */
+		EMIT4(0xb9800000, REG_W1, dst_reg);
+		goto branch_oc;
+
+	case BPF_JMP | BPF_JSGT | BPF_X: /* ((s64) dst > (s64) src) */
+		mask = 0x2000; /* jh */
+		goto branch_xs;
+	case BPF_JMP | BPF_JSGE | BPF_X: /* ((s64) dst >= (s64) src) */
+		mask = 0xa000; /* jhe */
+		goto branch_xs;
+	case BPF_JMP | BPF_JGT | BPF_X: /* (dst > src) */
+		mask = 0x2000; /* jh */
+		goto branch_xu;
+	case BPF_JMP | BPF_JGE | BPF_X: /* (dst >= src) */
+		mask = 0xa000; /* jhe */
+		goto branch_xu;
+	case BPF_JMP | BPF_JNE | BPF_X: /* (dst != src) */
+		mask = 0x7000; /* jne */
+		goto branch_xu;
+	case BPF_JMP | BPF_JEQ | BPF_X: /* (dst == src) */
+		mask = 0x8000; /* je */
+		goto branch_xu;
+	case BPF_JMP | BPF_JSET | BPF_X: /* (dst & src) */
+		mask = 0x7000; /* jnz */
+		/* ngrk %w1,%dst,%src */
+		EMIT4_RRF(0xb9e40000, REG_W1, dst_reg, src_reg);
+		goto branch_oc;
+branch_ks:
+		/* lgfi %w1,imm (load sign extend imm) */
+		EMIT6_IMM(0xc0010000, REG_W1, imm);
+		/* cgrj %dst,%w1,mask,off */
+		EMIT6_PCREL(0xec000000, 0x0064, dst_reg, REG_W1, i, off, mask);
+		break;
+branch_ku:
+		/* lgfi %w1,imm (load sign extend imm) */
+		EMIT6_IMM(0xc0010000, REG_W1, imm);
+		/* clgrj %dst,%w1,mask,off */
+		EMIT6_PCREL(0xec000000, 0x0065, dst_reg, REG_W1, i, off, mask);
+		break;
+branch_xs:
+		/* cgrj %dst,%src,mask,off */
+		EMIT6_PCREL(0xec000000, 0x0064, dst_reg, src_reg, i, off, mask);
+		break;
+branch_xu:
+		/* clgrj %dst,%src,mask,off */
+		EMIT6_PCREL(0xec000000, 0x0065, dst_reg, src_reg, i, off, mask);
+		break;
+branch_oc:
+		/* brc mask,jmp_off (branch instruction needs 4 bytes) */
+		jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4);
+		EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off);
 		break;
-	case BPF_ANC | SKF_AD_PKTTYPE:
-		/* lhi %r5,0 */
-		EMIT4(0xa7580000);
-		/* ic %r5,<d(pkt_type_offset)>(%r2) */
-		EMIT4_DISP(0x43502000, PKT_TYPE_OFFSET());
-		/* srl %r5,5 */
-		EMIT4_DISP(0x88500000, 5);
-		break;
-	case BPF_ANC | SKF_AD_CPU: /* A = smp_processor_id() */
-#ifdef CONFIG_SMP
-		/* l %r5,<d(cpu_nr)> */
-		EMIT4_DISP(0x58500000, offsetof(struct _lowcore, cpu_nr));
-#else
-		/* lhi %r5,0 */
-		EMIT4(0xa7580000);
-#endif
+	/*
+	 * BPF_LD
+	 */
+	case BPF_LD | BPF_ABS | BPF_B: /* b0 = *(u8 *) (skb->data+imm) */
+	case BPF_LD | BPF_IND | BPF_B: /* b0 = *(u8 *) (skb->data+imm+src) */
+		if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
+			func_addr = __pa(sk_load_byte_pos);
+		else
+			func_addr = __pa(sk_load_byte);
+		goto call_fn;
+	case BPF_LD | BPF_ABS | BPF_H: /* b0 = *(u16 *) (skb->data+imm) */
+	case BPF_LD | BPF_IND | BPF_H: /* b0 = *(u16 *) (skb->data+imm+src) */
+		if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
+			func_addr = __pa(sk_load_half_pos);
+		else
+			func_addr = __pa(sk_load_half);
+		goto call_fn;
+	case BPF_LD | BPF_ABS | BPF_W: /* b0 = *(u32 *) (skb->data+imm) */
+	case BPF_LD | BPF_IND | BPF_W: /* b0 = *(u32 *) (skb->data+imm+src) */
+		if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
+			func_addr = __pa(sk_load_word_pos);
+		else
+			func_addr = __pa(sk_load_word);
+		goto call_fn;
+call_fn:
+		jit->seen |= SEEN_SKB | SEEN_RET0 | SEEN_FUNC;
+		REG_SET_SEEN(REG_14); /* Return address of possible func call */
+
+		/*
+		 * Implicit input:
+		 *  BPF_REG_6	 (R7) : skb pointer
+		 *  REG_SKB_DATA (R12): skb data pointer
+		 *
+		 * Calculated input:
+		 *  BPF_REG_2	 (R3) : offset of byte(s) to fetch in skb
+		 *  BPF_REG_5	 (R6) : return address
+		 *
+		 * Output:
+		 *  BPF_REG_0	 (R14): data read from skb
+		 *
+		 * Scratch registers (BPF_REG_1-5)
+		 */
+
+		/* Call function: llilf %w1,func_addr  */
+		EMIT6_IMM(0xc00f0000, REG_W1, func_addr);
+
+		/* Offset: lgfi %b2,imm */
+		EMIT6_IMM(0xc0010000, BPF_REG_2, imm);
+		if (BPF_MODE(insn->code) == BPF_IND)
+			/* agfr %b2,%src (%src is s32 here) */
+			EMIT4(0xb9180000, BPF_REG_2, src_reg);
+
+		/* basr %b5,%w1 (%b5 is call saved) */
+		EMIT2(0x0d00, BPF_REG_5, REG_W1);
+
+		/*
+		 * Note: For fast access we jump directly after the
+		 * jnz instruction from bpf_jit.S
+		 */
+		/* jnz <ret0> */
+		EMIT4_PCREL(0xa7740000, jit->ret0_ip - jit->prg);
 		break;
 	default: /* too complex, give up */
-		goto out;
+		pr_err("Unknown opcode %02x\n", insn->code);
+		return -1;
+	}
+	return insn_count;
+}
+
+/*
+ * Compile eBPF program into s390x code
+ */
+static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp)
+{
+	int i, insn_count;
+
+	jit->lit = jit->lit_start;
+	jit->prg = 0;
+
+	bpf_jit_prologue(jit);
+	for (i = 0; i < fp->len; i += insn_count) {
+		insn_count = bpf_jit_insn(jit, fp, i);
+		if (insn_count < 0)
+			return -1;
+		jit->addrs[i + 1] = jit->prg; /* Next instruction address */
 	}
-	addrs[i] = jit->prg - jit->start;
+	bpf_jit_epilogue(jit);
+
+	jit->lit_start = jit->prg;
+	jit->size = jit->lit;
+	jit->size_prg = jit->prg;
 	return 0;
-out:
-	return -1;
 }
 
+/*
+ * Classic BPF function stub. BPF programs will be converted into
+ * eBPF and then bpf_int_jit_compile() will be called.
+ */
 void bpf_jit_compile(struct bpf_prog *fp)
 {
-	struct bpf_binary_header *header = NULL;
-	unsigned long size, prg_len, lit_len;
-	struct bpf_jit jit, cjit;
-	unsigned int *addrs;
-	int pass, i;
+}
+
+/*
+ * Compile eBPF program "fp"
+ */
+void bpf_int_jit_compile(struct bpf_prog *fp)
+{
+	struct bpf_binary_header *header;
+	struct bpf_jit jit;
+	int pass;
 
 	if (!bpf_jit_enable)
 		return;
-	addrs = kcalloc(fp->len, sizeof(*addrs), GFP_KERNEL);
-	if (addrs == NULL)
+	memset(&jit, 0, sizeof(jit));
+	jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
+	if (jit.addrs == NULL)
 		return;
-	memset(&jit, 0, sizeof(cjit));
-	memset(&cjit, 0, sizeof(cjit));
-
-	for (pass = 0; pass < 10; pass++) {
-		jit.prg = jit.start;
-		jit.lit = jit.mid;
-
-		bpf_jit_prologue(&jit);
-		bpf_jit_noleaks(&jit, fp->insns);
-		for (i = 0; i < fp->len; i++) {
-			if (bpf_jit_insn(&jit, fp->insns + i, addrs, i,
-					 i == fp->len - 1))
-				goto out;
-		}
-		bpf_jit_epilogue(&jit);
-		if (jit.start) {
-			WARN_ON(jit.prg > cjit.prg || jit.lit > cjit.lit);
-			if (memcmp(&jit, &cjit, sizeof(jit)) == 0)
-				break;
-		} else if (jit.prg == cjit.prg && jit.lit == cjit.lit) {
-			prg_len = jit.prg - jit.start;
-			lit_len = jit.lit - jit.mid;
-			size = prg_len + lit_len;
-			if (size >= BPF_SIZE_MAX)
-				goto out;
-			header = bpf_jit_binary_alloc(size, &jit.start,
-						      2, bpf_jit_fill_hole);
-			if (!header)
-				goto out;
-			jit.prg = jit.mid = jit.start + prg_len;
-			jit.lit = jit.end = jit.start + prg_len + lit_len;
-			jit.base_ip += (unsigned long) jit.start;
-			jit.exit_ip += (unsigned long) jit.start;
-			jit.ret0_ip += (unsigned long) jit.start;
-		}
-		cjit = jit;
+	/*
+	 * Three initial passes:
+	 *   - 1/2: Determine clobbered registers
+	 *   - 3:   Calculate program size and addrs arrray
+	 */
+	for (pass = 1; pass <= 3; pass++) {
+		if (bpf_jit_prog(&jit, fp))
+			goto free_addrs;
 	}
+	/*
+	 * Final pass: Allocate and generate program
+	 */
+	if (jit.size >= BPF_SIZE_MAX)
+		goto free_addrs;
+	header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole);
+	if (!header)
+		goto free_addrs;
+	if (bpf_jit_prog(&jit, fp))
+		goto free_addrs;
 	if (bpf_jit_enable > 1) {
-		bpf_jit_dump(fp->len, jit.end - jit.start, pass, jit.start);
-		if (jit.start)
-			print_fn_code(jit.start, jit.mid - jit.start);
+		bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf);
+		if (jit.prg_buf)
+			print_fn_code(jit.prg_buf, jit.size_prg);
 	}
-	if (jit.start) {
+	if (jit.prg_buf) {
 		set_memory_ro((unsigned long)header, header->pages);
-		fp->bpf_func = (void *) jit.start;
+		fp->bpf_func = (void *) jit.prg_buf;
 		fp->jited = true;
 	}
-out:
-	kfree(addrs);
+free_addrs:
+	kfree(jit.addrs);
 }
 
+/*
+ * Free eBPF program
+ */
 void bpf_jit_free(struct bpf_prog *fp)
 {
 	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile
index 524c4b615821..1bd23017191e 100644
--- a/arch/s390/oprofile/Makefile
+++ b/arch/s390/oprofile/Makefile
@@ -7,4 +7,4 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
 		timer_int.o )
 
 oprofile-y :=	$(DRIVER_OBJS) init.o backtrace.o
-oprofile-$(CONFIG_64BIT)	+= hwsampler.o
+oprofile-y +=	hwsampler.o
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 9ffe645d5989..bc927a09a172 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -21,8 +21,6 @@
 
 extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
 
-#ifdef CONFIG_64BIT
-
 #include "hwsampler.h"
 #include "op_counter.h"
 
@@ -495,14 +493,10 @@ static void oprofile_hwsampler_exit(void)
 	hwsampler_shutdown();
 }
 
-#endif /* CONFIG_64BIT */
-
 int __init oprofile_arch_init(struct oprofile_operations *ops)
 {
 	ops->backtrace = s390_backtrace;
 
-#ifdef CONFIG_64BIT
-
 	/*
 	 * -ENODEV is not reported to the caller.  The module itself
          * will use the timer mode sampling as fallback and this is
@@ -511,14 +505,9 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 	hwsampler_available = oprofile_hwsampler_init(ops) == 0;
 
 	return 0;
-#else
-	return -ENODEV;
-#endif
 }
 
 void oprofile_arch_exit(void)
 {
-#ifdef CONFIG_64BIT
 	oprofile_hwsampler_exit();
-#endif
 }
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 753a56731951..598f023cf8a6 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -190,6 +190,11 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
 		return -ENOMEM;
 	WARN_ON((u64) zdev->fmb & 0xf);
 
+	/* reset software counters */
+	atomic64_set(&zdev->allocated_pages, 0);
+	atomic64_set(&zdev->mapped_pages, 0);
+	atomic64_set(&zdev->unmapped_pages, 0);
+
 	args.fmb_addr = virt_to_phys(zdev->fmb);
 	return mod_pci(zdev, ZPCI_MOD_FC_SET_MEASURE, 0, &args);
 }
@@ -287,7 +292,7 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev,
 	addr = ZPCI_IOMAP_ADDR_BASE | ((u64) idx << 48);
 	return (void __iomem *) addr + offset;
 }
-EXPORT_SYMBOL_GPL(pci_iomap_range);
+EXPORT_SYMBOL(pci_iomap_range);
 
 void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
 {
@@ -309,7 +314,7 @@ void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
 	}
 	spin_unlock(&zpci_iomap_lock);
 }
-EXPORT_SYMBOL_GPL(pci_iounmap);
+EXPORT_SYMBOL(pci_iounmap);
 
 static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
 		    int size, u32 *val)
@@ -483,9 +488,8 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
 	airq_iv_free_bit(zpci_aisb_iv, zdev->aisb);
 }
 
-static void zpci_map_resources(struct zpci_dev *zdev)
+static void zpci_map_resources(struct pci_dev *pdev)
 {
-	struct pci_dev *pdev = zdev->pdev;
 	resource_size_t len;
 	int i;
 
@@ -499,9 +503,8 @@ static void zpci_map_resources(struct zpci_dev *zdev)
 	}
 }
 
-static void zpci_unmap_resources(struct zpci_dev *zdev)
+static void zpci_unmap_resources(struct pci_dev *pdev)
 {
-	struct pci_dev *pdev = zdev->pdev;
 	resource_size_t len;
 	int i;
 
@@ -651,7 +654,7 @@ int pcibios_add_device(struct pci_dev *pdev)
 
 	zdev->pdev = pdev;
 	pdev->dev.groups = zpci_attr_groups;
-	zpci_map_resources(zdev);
+	zpci_map_resources(pdev);
 
 	for (i = 0; i < PCI_BAR_COUNT; i++) {
 		res = &pdev->resource[i];
@@ -663,6 +666,11 @@ int pcibios_add_device(struct pci_dev *pdev)
 	return 0;
 }
 
+void pcibios_release_device(struct pci_dev *pdev)
+{
+	zpci_unmap_resources(pdev);
+}
+
 int pcibios_enable_device(struct pci_dev *pdev, int mask)
 {
 	struct zpci_dev *zdev = get_zdev(pdev);
@@ -670,7 +678,6 @@ int pcibios_enable_device(struct pci_dev *pdev, int mask)
 	zdev->pdev = pdev;
 	zpci_debug_init_device(zdev);
 	zpci_fmb_enable_device(zdev);
-	zpci_map_resources(zdev);
 
 	return pci_enable_resources(pdev, mask);
 }
@@ -679,7 +686,6 @@ void pcibios_disable_device(struct pci_dev *pdev)
 {
 	struct zpci_dev *zdev = get_zdev(pdev);
 
-	zpci_unmap_resources(zdev);
 	zpci_fmb_disable_device(zdev);
 	zpci_debug_exit_device(zdev);
 	zdev->pdev = NULL;
@@ -688,7 +694,8 @@ void pcibios_disable_device(struct pci_dev *pdev)
 #ifdef CONFIG_HIBERNATE_CALLBACKS
 static int zpci_restore(struct device *dev)
 {
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct zpci_dev *zdev = get_zdev(pdev);
 	int ret = 0;
 
 	if (zdev->state != ZPCI_FN_STATE_ONLINE)
@@ -698,7 +705,7 @@ static int zpci_restore(struct device *dev)
 	if (ret)
 		goto out;
 
-	zpci_map_resources(zdev);
+	zpci_map_resources(pdev);
 	zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET,
 			   zdev->start_dma + zdev->iommu_size - 1,
 			   (u64) zdev->dma_table);
@@ -709,12 +716,14 @@ out:
 
 static int zpci_freeze(struct device *dev)
 {
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct zpci_dev *zdev = get_zdev(pdev);
 
 	if (zdev->state != ZPCI_FN_STATE_ONLINE)
 		return 0;
 
 	zpci_unregister_ioat(zdev, 0);
+	zpci_unmap_resources(pdev);
 	return clp_disable_fh(zdev);
 }
 
@@ -776,8 +785,8 @@ static int zpci_scan_bus(struct zpci_dev *zdev)
 		zpci_cleanup_bus_resources(zdev);
 		return -EIO;
 	}
-
 	zdev->bus->max_bus_speed = zdev->max_bus_speed;
+	pci_bus_add_devices(zdev->bus);
 	return 0;
 }
 
@@ -818,6 +827,7 @@ int zpci_create_device(struct zpci_dev *zdev)
 	if (rc)
 		goto out;
 
+	mutex_init(&zdev->lock);
 	if (zdev->state == ZPCI_FN_STATE_CONFIGURED) {
 		rc = zpci_enable_device(zdev);
 		if (rc)
@@ -909,8 +919,7 @@ static int __init pci_base_init(void)
 	if (!s390_pci_probe)
 		return 0;
 
-	if (!test_facility(2) || !test_facility(69)
-	    || !test_facility(71) || !test_facility(72))
+	if (!test_facility(69) || !test_facility(71) || !test_facility(72))
 		return 0;
 
 	rc = zpci_debug_init();
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index 3229a2e570df..4129b0a5fd78 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -31,12 +31,25 @@ static char *pci_perf_names[] = {
 	"Refresh operations",
 	"DMA read bytes",
 	"DMA write bytes",
-	/* software counters */
+};
+
+static char *pci_sw_names[] = {
 	"Allocated pages",
 	"Mapped pages",
 	"Unmapped pages",
 };
 
+static void pci_sw_counter_show(struct seq_file *m)
+{
+	struct zpci_dev *zdev = m->private;
+	atomic64_t *counter = &zdev->allocated_pages;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++)
+		seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i],
+			   atomic64_read(counter));
+}
+
 static int pci_perf_show(struct seq_file *m, void *v)
 {
 	struct zpci_dev *zdev = m->private;
@@ -45,8 +58,13 @@ static int pci_perf_show(struct seq_file *m, void *v)
 
 	if (!zdev)
 		return 0;
-	if (!zdev->fmb)
-		return seq_printf(m, "FMB statistics disabled\n");
+
+	mutex_lock(&zdev->lock);
+	if (!zdev->fmb) {
+		mutex_unlock(&zdev->lock);
+		seq_puts(m, "FMB statistics disabled\n");
+		return 0;
+	}
 
 	/* header */
 	seq_printf(m, "FMB @ %p\n", zdev->fmb);
@@ -63,12 +81,9 @@ static int pci_perf_show(struct seq_file *m, void *v)
 		for (i = 4; i < 6; i++)
 			seq_printf(m, "%26s:\t%llu\n",
 				   pci_perf_names[i], *(stat + i));
-	/* software counters */
-	for (i = 6; i < ARRAY_SIZE(pci_perf_names); i++)
-		seq_printf(m, "%26s:\t%llu\n",
-			   pci_perf_names[i],
-			   atomic64_read((atomic64_t *) (stat + i)));
 
+	pci_sw_counter_show(m);
+	mutex_unlock(&zdev->lock);
 	return 0;
 }
 
@@ -86,19 +101,17 @@ static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf,
 	if (rc)
 		return rc;
 
+	mutex_lock(&zdev->lock);
 	switch (val) {
 	case 0:
 		rc = zpci_fmb_disable_device(zdev);
-		if (rc)
-			return rc;
 		break;
 	case 1:
 		rc = zpci_fmb_enable_device(zdev);
-		if (rc)
-			return rc;
 		break;
 	}
-	return count;
+	mutex_unlock(&zdev->lock);
+	return rc ? rc : count;
 }
 
 static int pci_perf_seq_open(struct inode *inode, struct file *filp)
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 4cbb29a4d615..6fd8d5836138 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -300,7 +300,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
 		flags |= ZPCI_TABLE_PROTECTED;
 
 	if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) {
-		atomic64_add(nr_pages, &zdev->fmb->mapped_pages);
+		atomic64_add(nr_pages, &zdev->mapped_pages);
 		return dma_addr + (offset & ~PAGE_MASK);
 	}
 
@@ -328,7 +328,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
 		zpci_err_hex(&dma_addr, sizeof(dma_addr));
 	}
 
-	atomic64_add(npages, &zdev->fmb->unmapped_pages);
+	atomic64_add(npages, &zdev->unmapped_pages);
 	iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
 	dma_free_iommu(zdev, iommu_page_index, npages);
 }
@@ -357,7 +357,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
 		return NULL;
 	}
 
-	atomic64_add(size / PAGE_SIZE, &zdev->fmb->allocated_pages);
+	atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages);
 	if (dma_handle)
 		*dma_handle = map;
 	return (void *) pa;
@@ -370,7 +370,7 @@ static void s390_dma_free(struct device *dev, size_t size,
 	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
 
 	size = PAGE_ALIGN(size);
-	atomic64_sub(size / PAGE_SIZE, &zdev->fmb->allocated_pages);
+	atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
 	s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
 	free_pages((unsigned long) pa, get_order(size));
 }
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index 8aa271b3d1ad..b1bb2b72302c 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -64,8 +64,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
 	if (copy_from_user(buf, user_buffer, length))
 		goto out;
 
-	memcpy_toio(io_addr, buf, length);
-	ret = 0;
+	ret = zpci_memcpy_toio(io_addr, buf, length);
 out:
 	if (buf != local_buf)
 		kfree(buf);
@@ -98,16 +97,16 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
 		goto out;
 	io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK));
 
-	ret = -EFAULT;
-	if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE)
+	if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) {
+		ret = -EFAULT;
 		goto out;
-
-	memcpy_fromio(buf, io_addr, length);
-
-	if (copy_to_user(user_buffer, buf, length))
+	}
+	ret = zpci_memcpy_fromio(buf, io_addr, length);
+	if (ret)
 		goto out;
+	if (copy_to_user(user_buffer, buf, length))
+		ret = -EFAULT;
 
-	ret = 0;
 out:
 	if (buf != local_buf)
 		kfree(buf);
diff --git a/arch/score/include/asm/thread_info.h b/arch/score/include/asm/thread_info.h
index 33864fa2a8d4..7d9ffb15c477 100644
--- a/arch/score/include/asm/thread_info.h
+++ b/arch/score/include/asm/thread_info.h
@@ -28,7 +28,6 @@
  */
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
 	unsigned long		flags;		/* low level flags */
 	unsigned long		tp_value;	/* thread pointer */
 	__u32			cpu;		/* current CPU */
@@ -53,7 +52,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
-	.exec_domain	= &default_exec_domain,	\
 	.cpu		= 0,			\
 	.preempt_count	= 1,			\
 	.addr_limit	= KERNEL_DS,		\
diff --git a/arch/score/kernel/asm-offsets.c b/arch/score/kernel/asm-offsets.c
index b4d5214a7a7e..52794f9421e2 100644
--- a/arch/score/kernel/asm-offsets.c
+++ b/arch/score/kernel/asm-offsets.c
@@ -100,7 +100,6 @@ void output_thread_info_defines(void)
 {
 	COMMENT("SCORE thread_info offsets.");
 	OFFSET(TI_TASK, thread_info, task);
-	OFFSET(TI_EXEC_DOMAIN, thread_info, exec_domain);
 	OFFSET(TI_FLAGS, thread_info, flags);
 	OFFSET(TI_TP_VALUE, thread_info, tp_value);
 	OFFSET(TI_CPU, thread_info, cpu);
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index eb4ef274ae9b..50057fed819d 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -162,6 +162,10 @@ config NEED_DMA_MAP_STATE
 config NEED_SG_DMA_LENGTH
 	def_bool y
 
+config PGTABLE_LEVELS
+	default 3 if X2TLB
+	default 2
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index 1bc09ee7948f..d5462b7bc514 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -58,20 +58,23 @@ static void pcibios_scanbus(struct pci_channel *hose)
 
 	need_domain_info = need_domain_info || hose->index;
 	hose->need_domain_info = need_domain_info;
-	if (bus) {
-		next_busno = bus->busn_res.end + 1;
-		/* Don't allow 8-bit bus number overflow inside the hose -
-		   reserve some space for bridges. */
-		if (next_busno > 224) {
-			next_busno = 0;
-			need_domain_info = 1;
-		}
 
-		pci_bus_size_bridges(bus);
-		pci_bus_assign_resources(bus);
-	} else {
+	if (!bus) {
 		pci_free_resource_list(&resources);
+		return;
+	}
+
+	next_busno = bus->busn_res.end + 1;
+	/* Don't allow 8-bit bus number overflow inside the hose -
+	   reserve some space for bridges. */
+	if (next_busno > 224) {
+		next_busno = 0;
+		need_domain_info = 1;
 	}
+
+	pci_bus_size_bridges(bus);
+	pci_bus_assign_resources(bus);
+	pci_bus_add_devices(bus);
 }
 
 /*
diff --git a/arch/sh/include/asm/mmu_context.h b/arch/sh/include/asm/mmu_context.h
index b9d9489a5012..9f417feaf6e8 100644
--- a/arch/sh/include/asm/mmu_context.h
+++ b/arch/sh/include/asm/mmu_context.h
@@ -99,7 +99,7 @@ static inline int init_new_context(struct task_struct *tsk,
 {
 	int i;
 
-	for (i = 0; i < num_online_cpus(); i++)
+	for_each_online_cpu(i)
 		cpu_context(i, mm) = NO_CONTEXT;
 
 	return 0;
diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h
index 657c03919627..2afa321157be 100644
--- a/arch/sh/include/asm/thread_info.h
+++ b/arch/sh/include/asm/thread_info.h
@@ -27,7 +27,6 @@
 
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
 	unsigned long		flags;		/* low level flags */
 	__u32			status;		/* thread synchronous flags */
 	__u32			cpu;
@@ -56,7 +55,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
-	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.status		= 0,			\
 	.cpu		= 0,			\
diff --git a/arch/sh/kernel/asm-offsets.c b/arch/sh/kernel/asm-offsets.c
index 542225fedb11..4bd44da910f3 100644
--- a/arch/sh/kernel/asm-offsets.c
+++ b/arch/sh/kernel/asm-offsets.c
@@ -21,7 +21,6 @@ int main(void)
 {
 	/* offsets into the thread_info struct */
 	DEFINE(TI_TASK,		offsetof(struct thread_info, task));
-	DEFINE(TI_EXEC_DOMAIN,	offsetof(struct thread_info, exec_domain));
 	DEFINE(TI_FLAGS,	offsetof(struct thread_info, flags));
 	DEFINE(TI_CPU,		offsetof(struct thread_info, cpu));
 	DEFINE(TI_PRE_COUNT,	offsetof(struct thread_info, preempt_count));
diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c
index 67a049e75ec1..9d209a07235e 100644
--- a/arch/sh/kernel/dwarf.c
+++ b/arch/sh/kernel/dwarf.c
@@ -993,7 +993,7 @@ static struct unwinder dwarf_unwinder = {
 	.rating = 150,
 };
 
-static void dwarf_unwinder_cleanup(void)
+static void __init dwarf_unwinder_cleanup(void)
 {
 	struct dwarf_fde *fde, *next_fde;
 	struct dwarf_cie *cie, *next_cie;
@@ -1009,6 +1009,10 @@ static void dwarf_unwinder_cleanup(void)
 	rbtree_postorder_for_each_entry_safe(cie, next_cie, &cie_root, node)
 		kfree(cie);
 
+	if (dwarf_reg_pool)
+		mempool_destroy(dwarf_reg_pool);
+	if (dwarf_frame_pool)
+		mempool_destroy(dwarf_frame_pool);
 	kmem_cache_destroy(dwarf_reg_cachep);
 	kmem_cache_destroy(dwarf_frame_cachep);
 }
@@ -1176,17 +1180,13 @@ static int __init dwarf_unwinder_init(void)
 			sizeof(struct dwarf_reg), 0,
 			SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL);
 
-	dwarf_frame_pool = mempool_create(DWARF_FRAME_MIN_REQ,
-					  mempool_alloc_slab,
-					  mempool_free_slab,
-					  dwarf_frame_cachep);
+	dwarf_frame_pool = mempool_create_slab_pool(DWARF_FRAME_MIN_REQ,
+						    dwarf_frame_cachep);
 	if (!dwarf_frame_pool)
 		goto out;
 
-	dwarf_reg_pool = mempool_create(DWARF_REG_MIN_REQ,
-					 mempool_alloc_slab,
-					 mempool_free_slab,
-					 dwarf_reg_cachep);
+	dwarf_reg_pool = mempool_create_slab_pool(DWARF_REG_MIN_REQ,
+						  dwarf_reg_cachep);
 	if (!dwarf_reg_pool)
 		goto out;
 
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 65a1ecd77f96..eb10ff84015c 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -124,7 +124,6 @@ void irq_ctx_init(int cpu)
 
 	irqctx = (union irq_ctx *)&hardirq_stack[cpu * THREAD_SIZE];
 	irqctx->tinfo.task		= NULL;
-	irqctx->tinfo.exec_domain	= NULL;
 	irqctx->tinfo.cpu		= cpu;
 	irqctx->tinfo.preempt_count	= HARDIRQ_OFFSET;
 	irqctx->tinfo.addr_limit	= MAKE_MM_SEG(0);
@@ -133,7 +132,6 @@ void irq_ctx_init(int cpu)
 
 	irqctx = (union irq_ctx *)&softirq_stack[cpu * THREAD_SIZE];
 	irqctx->tinfo.task		= NULL;
-	irqctx->tinfo.exec_domain	= NULL;
 	irqctx->tinfo.cpu		= cpu;
 	irqctx->tinfo.preempt_count	= 0;
 	irqctx->tinfo.addr_limit	= MAKE_MM_SEG(0);
diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index 0b34f2a704fe..f7c3d5c25caf 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -267,19 +267,12 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
 {
 	struct sigframe __user *frame;
 	int err = 0, sig = ksig->sig;
-	int signal;
 
 	frame = get_sigframe(&ksig->ka, regs->regs[15], sizeof(*frame));
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		return -EFAULT;
 
-	signal = current_thread_info()->exec_domain
-		&& current_thread_info()->exec_domain->signal_invmap
-		&& sig < 32
-		? current_thread_info()->exec_domain->signal_invmap[sig]
-		: sig;
-
 	err |= setup_sigcontext(&frame->sc, regs, set->sig[0]);
 
 	if (_NSIG_WORDS > 1)
@@ -313,7 +306,7 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
 
 	/* Set up registers for signal handler */
 	regs->regs[15] = (unsigned long) frame;
-	regs->regs[4] = signal; /* Arg for signal handler */
+	regs->regs[4] = sig; /* Arg for signal handler */
 	regs->regs[5] = 0;
 	regs->regs[6] = (unsigned long) &frame->sc;
 
@@ -329,8 +322,6 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
 	if (err)
 		return -EFAULT;
 
-	set_fs(USER_DS);
-
 	pr_debug("SIG deliver (%s:%d): sp=%p pc=%08lx pr=%08lx\n",
 		 current->comm, task_pid_nr(current), frame, regs->pc, regs->pr);
 
@@ -342,19 +333,12 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 {
 	struct rt_sigframe __user *frame;
 	int err = 0, sig = ksig->sig;
-	int signal;
 
 	frame = get_sigframe(&ksig->ka, regs->regs[15], sizeof(*frame));
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		return -EFAULT;
 
-	signal = current_thread_info()->exec_domain
-		&& current_thread_info()->exec_domain->signal_invmap
-		&& sig < 32
-		? current_thread_info()->exec_domain->signal_invmap[sig]
-		: sig;
-
 	err |= copy_siginfo_to_user(&frame->info, &ksig->info);
 
 	/* Create the ucontext.  */
@@ -392,7 +376,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 
 	/* Set up registers for signal handler */
 	regs->regs[15] = (unsigned long) frame;
-	regs->regs[4] = signal; /* Arg for signal handler */
+	regs->regs[4] = sig; /* Arg for signal handler */
 	regs->regs[5] = (unsigned long) &frame->info;
 	regs->regs[6] = (unsigned long) &frame->uc;
 
@@ -408,8 +392,6 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	if (err)
 		return -EFAULT;
 
-	set_fs(USER_DS);
-
 	pr_debug("SIG deliver (%s:%d): sp=%p pc=%08lx pr=%08lx\n",
 		 current->comm, task_pid_nr(current), frame, regs->pc, regs->pr);
 
diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index 71993c6a7d94..d8a3f0d22809 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@@ -385,12 +385,6 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		return -EFAULT;
 
-	signal = current_thread_info()->exec_domain
-		&& current_thread_info()->exec_domain->signal_invmap
-		&& sig < 32
-		? current_thread_info()->exec_domain->signal_invmap[sig]
-		: sig;
-
 	err |= setup_sigcontext(&frame->sc, regs, set->sig[0]);
 
 	/* Give up earlier as i386, in case */
@@ -441,7 +435,7 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs
 	 * All edited pointers are subject to NEFF.
 	 */
 	regs->regs[REG_SP] = neff_sign_extend((unsigned long)frame);
-	regs->regs[REG_ARG1] = signal; /* Arg for signal handler */
+	regs->regs[REG_ARG1] = sig; /* Arg for signal handler */
 
         /* FIXME:
            The glibc profiling support for SH-5 needs to be passed a sigcontext
@@ -457,11 +451,9 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs
 
 	regs->pc = neff_sign_extend((unsigned long)ksig->ka.sa.sa_handler);
 
-	set_fs(USER_DS);
-
 	/* Broken %016Lx */
 	pr_debug("SIG deliver (#%d,%s:%d): sp=%p pc=%08Lx%08Lx link=%08Lx%08Lx\n",
-		 signal, current->comm, current->pid, frame,
+		 sig, current->comm, current->pid, frame,
 		 regs->pc >> 32, regs->pc & 0xffffffff,
 		 DEREF_REG_PR >> 32, DEREF_REG_PR & 0xffffffff);
 
@@ -473,19 +465,12 @@ static int setup_rt_frame(struct ksignal *kig, sigset_t *set,
 {
 	struct rt_sigframe __user *frame;
 	int err = 0, sig = ksig->sig;
-	int signal;
 
 	frame = get_sigframe(&ksig->ka, regs->regs[REG_SP], sizeof(*frame));
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		return -EFAULT;
 
-	signal = current_thread_info()->exec_domain
-		&& current_thread_info()->exec_domain->signal_invmap
-		&& sig < 32
-		? current_thread_info()->exec_domain->signal_invmap[sig]
-		: sig;
-
 	err |= __put_user(&frame->info, &frame->pinfo);
 	err |= __put_user(&frame->uc, &frame->puc);
 	err |= copy_siginfo_to_user(&frame->info, &ksig->info);
@@ -542,15 +527,13 @@ static int setup_rt_frame(struct ksignal *kig, sigset_t *set,
 	 * All edited pointers are subject to NEFF.
 	 */
 	regs->regs[REG_SP] = neff_sign_extend((unsigned long)frame);
-	regs->regs[REG_ARG1] = signal; /* Arg for signal handler */
+	regs->regs[REG_ARG1] = sig; /* Arg for signal handler */
 	regs->regs[REG_ARG2] = (unsigned long long)(unsigned long)(signed long)&frame->info;
 	regs->regs[REG_ARG3] = (unsigned long long)(unsigned long)(signed long)&frame->uc.uc_mcontext;
 	regs->pc = neff_sign_extend((unsigned long)ksig->ka.sa.sa_handler);
 
-	set_fs(USER_DS);
-
 	pr_debug("SIG deliver (#%d,%s:%d): sp=%p pc=%08Lx%08Lx link=%08Lx%08Lx\n",
-		 signal, current->comm, current->pid, frame,
+		 sig, current->comm, current->pid, frame,
 		 regs->pc >> 32, regs->pc & 0xffffffff,
 		 DEREF_REG_PR >> 32, DEREF_REG_PR & 0xffffffff);
 
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index fc5acfc93c92..de6be008fc01 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -363,7 +363,7 @@ void flush_tlb_mm(struct mm_struct *mm)
 		smp_call_function(flush_tlb_mm_ipi, (void *)mm, 1);
 	} else {
 		int i;
-		for (i = 0; i < num_online_cpus(); i++)
+		for_each_online_cpu(i)
 			if (smp_processor_id() != i)
 				cpu_context(i, mm) = 0;
 	}
@@ -400,7 +400,7 @@ void flush_tlb_range(struct vm_area_struct *vma,
 		smp_call_function(flush_tlb_range_ipi, (void *)&fd, 1);
 	} else {
 		int i;
-		for (i = 0; i < num_online_cpus(); i++)
+		for_each_online_cpu(i)
 			if (smp_processor_id() != i)
 				cpu_context(i, mm) = 0;
 	}
@@ -443,7 +443,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 		smp_call_function(flush_tlb_page_ipi, (void *)&fd, 1);
 	} else {
 		int i;
-		for (i = 0; i < num_online_cpus(); i++)
+		for_each_online_cpu(i)
 			if (smp_processor_id() != i)
 				cpu_context(i, vma->vm_mm) = 0;
 	}
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 96ac69c5eba0..e49502acbab4 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -86,6 +86,9 @@ config ARCH_DEFCONFIG
 	default "arch/sparc/configs/sparc32_defconfig" if SPARC32
 	default "arch/sparc/configs/sparc64_defconfig" if SPARC64
 
+config ARCH_PROC_KCORE_TEXT
+	def_bool y
+
 config IOMMU_HELPER
 	bool
 	default y if SPARC64
@@ -143,6 +146,10 @@ config GENERIC_ISA_DMA
 config ARCH_SUPPORTS_DEBUG_PAGEALLOC
 	def_bool y if SPARC64
 
+config PGTABLE_LEVELS
+	default 4 if 64BIT
+	default 3
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h
index 4f6725ff4c33..f5b6537306f0 100644
--- a/arch/sparc/include/asm/hypervisor.h
+++ b/arch/sparc/include/asm/hypervisor.h
@@ -2957,6 +2957,17 @@ unsigned long sun4v_t5_set_perfreg(unsigned long reg_num,
 				   unsigned long reg_val);
 #endif
 
+
+#define HV_FAST_M7_GET_PERFREG	0x43
+#define HV_FAST_M7_SET_PERFREG	0x44
+
+#ifndef	__ASSEMBLY__
+unsigned long sun4v_m7_get_perfreg(unsigned long reg_num,
+				      unsigned long *reg_val);
+unsigned long sun4v_m7_set_perfreg(unsigned long reg_num,
+				      unsigned long reg_val);
+#endif
+
 /* Function numbers for HV_CORE_TRAP.  */
 #define HV_CORE_SET_VER			0x00
 #define HV_CORE_PUTCHAR			0x01
@@ -2981,6 +2992,7 @@ unsigned long sun4v_t5_set_perfreg(unsigned long reg_num,
 #define HV_GRP_SDIO			0x0108
 #define HV_GRP_SDIO_ERR			0x0109
 #define HV_GRP_REBOOT_DATA		0x0110
+#define HV_GRP_M7_PERF			0x0114
 #define HV_GRP_NIAG_PERF		0x0200
 #define HV_GRP_FIRE_PERF		0x0201
 #define HV_GRP_N2_CPU			0x0202
diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h
index 9b672be70dda..50d4840d9aeb 100644
--- a/arch/sparc/include/asm/io_64.h
+++ b/arch/sparc/include/asm/io_64.h
@@ -407,16 +407,16 @@ static inline void iounmap(volatile void __iomem *addr)
 {
 }
 
-#define ioread8(X)			readb(X)
-#define ioread16(X)			readw(X)
-#define ioread16be(X)			__raw_readw(X)
-#define ioread32(X)			readl(X)
-#define ioread32be(X)			__raw_readl(X)
-#define iowrite8(val,X)			writeb(val,X)
-#define iowrite16(val,X)		writew(val,X)
-#define iowrite16be(val,X)		__raw_writew(val,X)
-#define iowrite32(val,X)		writel(val,X)
-#define iowrite32be(val,X)		__raw_writel(val,X)
+#define ioread8			readb
+#define ioread16		readw
+#define ioread16be		__raw_readw
+#define ioread32		readl
+#define ioread32be		__raw_readl
+#define iowrite8		writeb
+#define iowrite16		writew
+#define iowrite16be		__raw_writew
+#define iowrite32		writel
+#define iowrite32be		__raw_writel
 
 /* Create a virtual mapping cookie for an IO port range */
 void __iomem *ioport_map(unsigned long port, unsigned int nr);
diff --git a/arch/sparc/include/asm/iommu_64.h b/arch/sparc/include/asm/iommu_64.h
index 2b9321ab064d..cd0d69fa7592 100644
--- a/arch/sparc/include/asm/iommu_64.h
+++ b/arch/sparc/include/asm/iommu_64.h
@@ -16,6 +16,7 @@
 #define IOPTE_WRITE   0x0000000000000002UL
 
 #define IOMMU_NUM_CTXS	4096
+#include <linux/iommu-common.h>
 
 struct iommu_arena {
 	unsigned long	*map;
@@ -24,11 +25,10 @@ struct iommu_arena {
 };
 
 struct iommu {
+	struct iommu_map_table	tbl;
 	spinlock_t		lock;
-	struct iommu_arena	arena;
-	void			(*flush_all)(struct iommu *);
+	u32			dma_addr_mask;
 	iopte_t			*page_table;
-	u32			page_table_map_base;
 	unsigned long		iommu_control;
 	unsigned long		iommu_tsbbase;
 	unsigned long		iommu_flush;
@@ -40,7 +40,6 @@ struct iommu {
 	unsigned long		dummy_page_pa;
 	unsigned long		ctx_lowest_free;
 	DECLARE_BITMAP(ctx_bitmap, IOMMU_NUM_CTXS);
-	u32			dma_addr_mask;
 };
 
 struct strbuf {
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
index ec2e2e2aba7d..cc9b04a2b11b 100644
--- a/arch/sparc/include/asm/jump_label.h
+++ b/arch/sparc/include/asm/jump_label.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_SPARC_JUMP_LABEL_H
 #define _ASM_SPARC_JUMP_LABEL_H
 
-#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
 
 #include <linux/types.h>
 
@@ -22,8 +22,6 @@ l_yes:
 	return true;
 }
 
-#endif /* __KERNEL__ */
-
 typedef u32 jump_label_t;
 
 struct jump_entry {
@@ -32,4 +30,5 @@ struct jump_entry {
 	jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif
diff --git a/arch/sparc/include/asm/seccomp.h b/arch/sparc/include/asm/seccomp.h
index adca1bce41d4..5ef8826d44f8 100644
--- a/arch/sparc/include/asm/seccomp.h
+++ b/arch/sparc/include/asm/seccomp.h
@@ -1,15 +1,10 @@
 #ifndef _ASM_SECCOMP_H
+#define _ASM_SECCOMP_H
 
 #include <linux/unistd.h>
 
-#define __NR_seccomp_read __NR_read
-#define __NR_seccomp_write __NR_write
-#define __NR_seccomp_exit __NR_exit
-#define __NR_seccomp_sigreturn __NR_rt_sigreturn
-
-#define __NR_seccomp_read_32 __NR_read
-#define __NR_seccomp_write_32 __NR_write
-#define __NR_seccomp_exit_32 __NR_exit
 #define __NR_seccomp_sigreturn_32 __NR_sigreturn
 
+#include <asm-generic/seccomp.h>
+
 #endif /* _ASM_SECCOMP_H */
diff --git a/arch/sparc/include/asm/starfire.h b/arch/sparc/include/asm/starfire.h
index c100dc27a0a9..176fa0ad19f1 100644
--- a/arch/sparc/include/asm/starfire.h
+++ b/arch/sparc/include/asm/starfire.h
@@ -12,7 +12,6 @@
 extern int this_is_starfire;
 
 void check_if_starfire(void);
-int starfire_hard_smp_processor_id(void);
 void starfire_hookup(int);
 unsigned int starfire_translate(unsigned long imap, unsigned int upaid);
 
diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h
index fd7bd0a440ca..229475f0d7ce 100644
--- a/arch/sparc/include/asm/thread_info_32.h
+++ b/arch/sparc/include/asm/thread_info_32.h
@@ -27,7 +27,6 @@
 struct thread_info {
 	unsigned long		uwinmask;
 	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
 	unsigned long		flags;		/* low level flags */
 	int			cpu;		/* cpu we're on */
 	int			preempt_count;	/* 0 => preemptable,
@@ -35,6 +34,8 @@ struct thread_info {
 	int			softirq_count;
 	int			hardirq_count;
 
+	u32 __unused;
+
 	/* Context switch saved kernel state. */
 	unsigned long ksp;	/* ... ksp __attribute__ ((aligned (8))); */
 	unsigned long kpc;
@@ -56,7 +57,6 @@ struct thread_info {
 {							\
 	.uwinmask	=	0,			\
 	.task		=	&tsk,			\
-	.exec_domain	=	&default_exec_domain,	\
 	.flags		=	0,			\
 	.cpu		=	0,			\
 	.preempt_count	=	INIT_PREEMPT_COUNT,	\
@@ -85,12 +85,11 @@ register struct thread_info *current_thread_info_reg asm("g6");
  */
 #define TI_UWINMASK	0x00	/* uwinmask */
 #define TI_TASK		0x04
-#define TI_EXECDOMAIN	0x08	/* exec_domain */
-#define TI_FLAGS	0x0c
-#define TI_CPU		0x10
-#define TI_PREEMPT	0x14	/* preempt_count */
-#define TI_SOFTIRQ	0x18	/* softirq_count */
-#define TI_HARDIRQ	0x1c	/* hardirq_count */
+#define TI_FLAGS	0x08
+#define TI_CPU		0x0c
+#define TI_PREEMPT	0x10	/* preempt_count */
+#define TI_SOFTIRQ	0x14	/* softirq_count */
+#define TI_HARDIRQ	0x18	/* hardirq_count */
 #define TI_KSP		0x20	/* ksp */
 #define TI_KPC		0x24	/* kpc (ldd'ed with kpc) */
 #define TI_KPSR		0x28	/* kpsr */
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index ff455164732a..bde59825d06c 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -31,7 +31,6 @@
 #include <asm/types.h>
 
 struct task_struct;
-struct exec_domain;
 
 struct thread_info {
 	/* D$ line 1 */
@@ -44,7 +43,6 @@ struct thread_info {
 	/* D$ line 2 */
 	unsigned long		fault_address;
 	struct pt_regs		*kregs;
-	struct exec_domain	*exec_domain;
 	int			preempt_count;	/* 0 => preemptable, <0 => BUG */
 	__u8			new_child;
 	__u8			current_ds;
@@ -80,18 +78,17 @@ struct thread_info {
 #define TI_KSP		0x00000018
 #define TI_FAULT_ADDR	0x00000020
 #define TI_KREGS	0x00000028
-#define TI_EXEC_DOMAIN	0x00000030
-#define TI_PRE_COUNT	0x00000038
-#define TI_NEW_CHILD	0x0000003c
-#define TI_CURRENT_DS	0x0000003d
-#define TI_CPU		0x0000003e
-#define TI_UTRAPS	0x00000040
-#define TI_REG_WINDOW	0x00000048
-#define TI_RWIN_SPTRS	0x000003c8
-#define TI_GSR		0x00000400
-#define TI_XFSR		0x00000438
-#define TI_KUNA_REGS	0x00000470
-#define TI_KUNA_INSN	0x00000478
+#define TI_PRE_COUNT	0x00000030
+#define TI_NEW_CHILD	0x00000034
+#define TI_CURRENT_DS	0x00000035
+#define TI_CPU		0x00000036
+#define TI_UTRAPS	0x00000038
+#define TI_REG_WINDOW	0x00000040
+#define TI_RWIN_SPTRS	0x000003c0
+#define TI_GSR		0x000003f8
+#define TI_XFSR		0x00000430
+#define TI_KUNA_REGS	0x00000468
+#define TI_KUNA_INSN	0x00000470
 #define TI_FPREGS	0x00000480
 
 /* We embed this in the uppermost byte of thread_info->flags */
@@ -119,7 +116,6 @@ struct thread_info {
 {							\
 	.task		=	&tsk,			\
 	.current_ds	=	ASI_P,			\
-	.exec_domain	=	&default_exec_domain,	\
 	.preempt_count	=	INIT_PREEMPT_COUNT,	\
 }
 
diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h
index 88d322b67fac..07cc49e541f4 100644
--- a/arch/sparc/kernel/entry.h
+++ b/arch/sparc/kernel/entry.h
@@ -98,11 +98,7 @@ void sun4v_do_mna(struct pt_regs *regs,
 void do_privop(struct pt_regs *regs);
 void do_privact(struct pt_regs *regs);
 void do_cee(struct pt_regs *regs);
-void do_cee_tl1(struct pt_regs *regs);
-void do_dae_tl1(struct pt_regs *regs);
-void do_iae_tl1(struct pt_regs *regs);
 void do_div0_tl1(struct pt_regs *regs);
-void do_fpdis_tl1(struct pt_regs *regs);
 void do_fpieee_tl1(struct pt_regs *regs);
 void do_fpother_tl1(struct pt_regs *regs);
 void do_ill_tl1(struct pt_regs *regs);
diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c
index 5c55145bfbf0..662500fa555f 100644
--- a/arch/sparc/kernel/hvapi.c
+++ b/arch/sparc/kernel/hvapi.c
@@ -48,6 +48,7 @@ static struct api_info api_table[] = {
 	{ .group = HV_GRP_VT_CPU,				},
 	{ .group = HV_GRP_T5_CPU,				},
 	{ .group = HV_GRP_DIAG,		.flags = FLAG_PRE_API	},
+	{ .group = HV_GRP_M7_PERF,				},
 };
 
 static DEFINE_SPINLOCK(hvapi_lock);
diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S
index caedf8320416..afbaba52d2f1 100644
--- a/arch/sparc/kernel/hvcalls.S
+++ b/arch/sparc/kernel/hvcalls.S
@@ -837,3 +837,19 @@ ENTRY(sun4v_t5_set_perfreg)
 	retl
 	 nop
 ENDPROC(sun4v_t5_set_perfreg)
+
+ENTRY(sun4v_m7_get_perfreg)
+	mov	%o1, %o4
+	mov	HV_FAST_M7_GET_PERFREG, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o4]
+	retl
+	nop
+ENDPROC(sun4v_m7_get_perfreg)
+
+ENTRY(sun4v_m7_set_perfreg)
+	mov	HV_FAST_M7_SET_PERFREG, %o5
+	ta	HV_FAST_TRAP
+	retl
+	nop
+ENDPROC(sun4v_m7_set_perfreg)
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index bfa4d0c2df42..5320689c06e9 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -13,6 +13,7 @@
 #include <linux/errno.h>
 #include <linux/iommu-helper.h>
 #include <linux/bitmap.h>
+#include <linux/iommu-common.h>
 
 #ifdef CONFIG_PCI
 #include <linux/pci.h>
@@ -45,8 +46,9 @@
 			       "i" (ASI_PHYS_BYPASS_EC_E))
 
 /* Must be invoked under the IOMMU lock. */
-static void iommu_flushall(struct iommu *iommu)
+static void iommu_flushall(struct iommu_map_table *iommu_map_table)
 {
+	struct iommu *iommu = container_of(iommu_map_table, struct iommu, tbl);
 	if (iommu->iommu_flushinv) {
 		iommu_write(iommu->iommu_flushinv, ~(u64)0);
 	} else {
@@ -87,94 +89,6 @@ static inline void iopte_make_dummy(struct iommu *iommu, iopte_t *iopte)
 	iopte_val(*iopte) = val;
 }
 
-/* Based almost entirely upon the ppc64 iommu allocator.  If you use the 'handle'
- * facility it must all be done in one pass while under the iommu lock.
- *
- * On sun4u platforms, we only flush the IOMMU once every time we've passed
- * over the entire page table doing allocations.  Therefore we only ever advance
- * the hint and cannot backtrack it.
- */
-unsigned long iommu_range_alloc(struct device *dev,
-				struct iommu *iommu,
-				unsigned long npages,
-				unsigned long *handle)
-{
-	unsigned long n, end, start, limit, boundary_size;
-	struct iommu_arena *arena = &iommu->arena;
-	int pass = 0;
-
-	/* This allocator was derived from x86_64's bit string search */
-
-	/* Sanity check */
-	if (unlikely(npages == 0)) {
-		if (printk_ratelimit())
-			WARN_ON(1);
-		return DMA_ERROR_CODE;
-	}
-
-	if (handle && *handle)
-		start = *handle;
-	else
-		start = arena->hint;
-
-	limit = arena->limit;
-
-	/* The case below can happen if we have a small segment appended
-	 * to a large, or when the previous alloc was at the very end of
-	 * the available space. If so, go back to the beginning and flush.
-	 */
-	if (start >= limit) {
-		start = 0;
-		if (iommu->flush_all)
-			iommu->flush_all(iommu);
-	}
-
- again:
-
-	if (dev)
-		boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
-				      1 << IO_PAGE_SHIFT);
-	else
-		boundary_size = ALIGN(1UL << 32, 1 << IO_PAGE_SHIFT);
-
-	n = iommu_area_alloc(arena->map, limit, start, npages,
-			     iommu->page_table_map_base >> IO_PAGE_SHIFT,
-			     boundary_size >> IO_PAGE_SHIFT, 0);
-	if (n == -1) {
-		if (likely(pass < 1)) {
-			/* First failure, rescan from the beginning.  */
-			start = 0;
-			if (iommu->flush_all)
-				iommu->flush_all(iommu);
-			pass++;
-			goto again;
-		} else {
-			/* Second failure, give up */
-			return DMA_ERROR_CODE;
-		}
-	}
-
-	end = n + npages;
-
-	arena->hint = end;
-
-	/* Update handle for SG allocations */
-	if (handle)
-		*handle = end;
-
-	return n;
-}
-
-void iommu_range_free(struct iommu *iommu, dma_addr_t dma_addr, unsigned long npages)
-{
-	struct iommu_arena *arena = &iommu->arena;
-	unsigned long entry;
-
-	entry = (dma_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
-
-	bitmap_clear(arena->map, entry, npages);
-}
-
 int iommu_table_init(struct iommu *iommu, int tsbsize,
 		     u32 dma_offset, u32 dma_addr_mask,
 		     int numa_node)
@@ -187,22 +101,20 @@ int iommu_table_init(struct iommu *iommu, int tsbsize,
 	/* Setup initial software IOMMU state. */
 	spin_lock_init(&iommu->lock);
 	iommu->ctx_lowest_free = 1;
-	iommu->page_table_map_base = dma_offset;
+	iommu->tbl.table_map_base = dma_offset;
 	iommu->dma_addr_mask = dma_addr_mask;
 
 	/* Allocate and initialize the free area map.  */
 	sz = num_tsb_entries / 8;
 	sz = (sz + 7UL) & ~7UL;
-	iommu->arena.map = kmalloc_node(sz, GFP_KERNEL, numa_node);
-	if (!iommu->arena.map) {
-		printk(KERN_ERR "IOMMU: Error, kmalloc(arena.map) failed.\n");
+	iommu->tbl.map = kmalloc_node(sz, GFP_KERNEL, numa_node);
+	if (!iommu->tbl.map)
 		return -ENOMEM;
-	}
-	memset(iommu->arena.map, 0, sz);
-	iommu->arena.limit = num_tsb_entries;
+	memset(iommu->tbl.map, 0, sz);
 
-	if (tlb_type != hypervisor)
-		iommu->flush_all = iommu_flushall;
+	iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT,
+			    (tlb_type != hypervisor ? iommu_flushall : NULL),
+			    false, 1, false);
 
 	/* Allocate and initialize the dummy page which we
 	 * set inactive IO PTEs to point to.
@@ -235,18 +147,20 @@ out_free_dummy_page:
 	iommu->dummy_page = 0UL;
 
 out_free_map:
-	kfree(iommu->arena.map);
-	iommu->arena.map = NULL;
+	kfree(iommu->tbl.map);
+	iommu->tbl.map = NULL;
 
 	return -ENOMEM;
 }
 
-static inline iopte_t *alloc_npages(struct device *dev, struct iommu *iommu,
+static inline iopte_t *alloc_npages(struct device *dev,
+				    struct iommu *iommu,
 				    unsigned long npages)
 {
 	unsigned long entry;
 
-	entry = iommu_range_alloc(dev, iommu, npages, NULL);
+	entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
+				      (unsigned long)(-1), 0);
 	if (unlikely(entry == DMA_ERROR_CODE))
 		return NULL;
 
@@ -284,7 +198,7 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
 				   dma_addr_t *dma_addrp, gfp_t gfp,
 				   struct dma_attrs *attrs)
 {
-	unsigned long flags, order, first_page;
+	unsigned long order, first_page;
 	struct iommu *iommu;
 	struct page *page;
 	int npages, nid;
@@ -306,16 +220,14 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
 
 	iommu = dev->archdata.iommu;
 
-	spin_lock_irqsave(&iommu->lock, flags);
 	iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT);
-	spin_unlock_irqrestore(&iommu->lock, flags);
 
 	if (unlikely(iopte == NULL)) {
 		free_pages(first_page, order);
 		return NULL;
 	}
 
-	*dma_addrp = (iommu->page_table_map_base +
+	*dma_addrp = (iommu->tbl.table_map_base +
 		      ((iopte - iommu->page_table) << IO_PAGE_SHIFT));
 	ret = (void *) first_page;
 	npages = size >> IO_PAGE_SHIFT;
@@ -336,16 +248,12 @@ static void dma_4u_free_coherent(struct device *dev, size_t size,
 				 struct dma_attrs *attrs)
 {
 	struct iommu *iommu;
-	unsigned long flags, order, npages;
+	unsigned long order, npages;
 
 	npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
 	iommu = dev->archdata.iommu;
 
-	spin_lock_irqsave(&iommu->lock, flags);
-
-	iommu_range_free(iommu, dvma, npages);
-
-	spin_unlock_irqrestore(&iommu->lock, flags);
+	iommu_tbl_range_free(&iommu->tbl, dvma, npages, DMA_ERROR_CODE);
 
 	order = get_order(size);
 	if (order < 10)
@@ -375,8 +283,8 @@ static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page,
 	npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
 	npages >>= IO_PAGE_SHIFT;
 
-	spin_lock_irqsave(&iommu->lock, flags);
 	base = alloc_npages(dev, iommu, npages);
+	spin_lock_irqsave(&iommu->lock, flags);
 	ctx = 0;
 	if (iommu->iommu_ctxflush)
 		ctx = iommu_alloc_ctx(iommu);
@@ -385,7 +293,7 @@ static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page,
 	if (unlikely(!base))
 		goto bad;
 
-	bus_addr = (iommu->page_table_map_base +
+	bus_addr = (iommu->tbl.table_map_base +
 		    ((base - iommu->page_table) << IO_PAGE_SHIFT));
 	ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
 	base_paddr = __pa(oaddr & IO_PAGE_MASK);
@@ -496,7 +404,7 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
 	npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
 	npages >>= IO_PAGE_SHIFT;
 	base = iommu->page_table +
-		((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
+		((bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT);
 	bus_addr &= IO_PAGE_MASK;
 
 	spin_lock_irqsave(&iommu->lock, flags);
@@ -515,11 +423,10 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
 	for (i = 0; i < npages; i++)
 		iopte_make_dummy(iommu, base + i);
 
-	iommu_range_free(iommu, bus_addr, npages);
-
 	iommu_free_ctx(iommu, ctx);
-
 	spin_unlock_irqrestore(&iommu->lock, flags);
+
+	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE);
 }
 
 static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
@@ -567,7 +474,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
 	max_seg_size = dma_get_max_seg_size(dev);
 	seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
 				  IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
-	base_shift = iommu->page_table_map_base >> IO_PAGE_SHIFT;
+	base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT;
 	for_each_sg(sglist, s, nelems, i) {
 		unsigned long paddr, npages, entry, out_entry = 0, slen;
 		iopte_t *base;
@@ -581,7 +488,8 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
 		/* Allocate iommu entries for that segment */
 		paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
 		npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
-		entry = iommu_range_alloc(dev, iommu, npages, &handle);
+		entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages,
+					      &handle, (unsigned long)(-1), 0);
 
 		/* Handle failure */
 		if (unlikely(entry == DMA_ERROR_CODE)) {
@@ -594,7 +502,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
 		base = iommu->page_table + entry;
 
 		/* Convert entry to a dma_addr_t */
-		dma_addr = iommu->page_table_map_base +
+		dma_addr = iommu->tbl.table_map_base +
 			(entry << IO_PAGE_SHIFT);
 		dma_addr |= (s->offset & ~IO_PAGE_MASK);
 
@@ -654,15 +562,17 @@ iommu_map_failed:
 			vaddr = s->dma_address & IO_PAGE_MASK;
 			npages = iommu_num_pages(s->dma_address, s->dma_length,
 						 IO_PAGE_SIZE);
-			iommu_range_free(iommu, vaddr, npages);
 
-			entry = (vaddr - iommu->page_table_map_base)
+			entry = (vaddr - iommu->tbl.table_map_base)
 				>> IO_PAGE_SHIFT;
 			base = iommu->page_table + entry;
 
 			for (j = 0; j < npages; j++)
 				iopte_make_dummy(iommu, base + j);
 
+			iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
+					     DMA_ERROR_CODE);
+
 			s->dma_address = DMA_ERROR_CODE;
 			s->dma_length = 0;
 		}
@@ -684,10 +594,11 @@ static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg)
 	if (iommu->iommu_ctxflush) {
 		iopte_t *base;
 		u32 bus_addr;
+		struct iommu_map_table *tbl = &iommu->tbl;
 
 		bus_addr = sg->dma_address & IO_PAGE_MASK;
 		base = iommu->page_table +
-			((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
+			((bus_addr - tbl->table_map_base) >> IO_PAGE_SHIFT);
 
 		ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
 	}
@@ -723,9 +634,8 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
 		if (!len)
 			break;
 		npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
-		iommu_range_free(iommu, dma_handle, npages);
 
-		entry = ((dma_handle - iommu->page_table_map_base)
+		entry = ((dma_handle - iommu->tbl.table_map_base)
 			 >> IO_PAGE_SHIFT);
 		base = iommu->page_table + entry;
 
@@ -737,6 +647,8 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
 		for (i = 0; i < npages; i++)
 			iopte_make_dummy(iommu, base + i);
 
+		iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
+				     DMA_ERROR_CODE);
 		sg = sg_next(sg);
 	}
 
@@ -770,9 +682,10 @@ static void dma_4u_sync_single_for_cpu(struct device *dev,
 	if (iommu->iommu_ctxflush &&
 	    strbuf->strbuf_ctxflush) {
 		iopte_t *iopte;
+		struct iommu_map_table *tbl = &iommu->tbl;
 
 		iopte = iommu->page_table +
-			((bus_addr - iommu->page_table_map_base)>>IO_PAGE_SHIFT);
+			((bus_addr - tbl->table_map_base)>>IO_PAGE_SHIFT);
 		ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
 	}
 
@@ -805,9 +718,10 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev,
 	if (iommu->iommu_ctxflush &&
 	    strbuf->strbuf_ctxflush) {
 		iopte_t *iopte;
+		struct iommu_map_table *tbl = &iommu->tbl;
 
-		iopte = iommu->page_table +
-			((sglist[0].dma_address - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
+		iopte = iommu->page_table + ((sglist[0].dma_address -
+			tbl->table_map_base) >> IO_PAGE_SHIFT);
 		ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
 	}
 
diff --git a/arch/sparc/kernel/iommu_common.h b/arch/sparc/kernel/iommu_common.h
index 1ec0de4156e7..f4be0d724fc6 100644
--- a/arch/sparc/kernel/iommu_common.h
+++ b/arch/sparc/kernel/iommu_common.h
@@ -48,12 +48,4 @@ static inline int is_span_boundary(unsigned long entry,
 	return iommu_is_span_boundary(entry, nr, shift, boundary_size);
 }
 
-unsigned long iommu_range_alloc(struct device *dev,
-				struct iommu *iommu,
-				unsigned long npages,
-				unsigned long *handle);
-void iommu_range_free(struct iommu *iommu,
-		      dma_addr_t dma_addr,
-		      unsigned long npages);
-
 #endif /* _IOMMU_COMMON_H */
diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c
index 274a9f59d95c..7d3ca30fcd15 100644
--- a/arch/sparc/kernel/ldc.c
+++ b/arch/sparc/kernel/ldc.c
@@ -15,6 +15,7 @@
 #include <linux/list.h>
 #include <linux/init.h>
 #include <linux/bitmap.h>
+#include <linux/iommu-common.h>
 
 #include <asm/hypervisor.h>
 #include <asm/iommu.h>
@@ -27,6 +28,10 @@
 #define DRV_MODULE_VERSION	"1.1"
 #define DRV_MODULE_RELDATE	"July 22, 2008"
 
+#define COOKIE_PGSZ_CODE	0xf000000000000000ULL
+#define COOKIE_PGSZ_CODE_SHIFT	60ULL
+
+
 static char version[] =
 	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
 #define LDC_PACKET_SIZE		64
@@ -98,10 +103,10 @@ static const struct ldc_mode_ops stream_ops;
 int ldom_domaining_enabled;
 
 struct ldc_iommu {
-	/* Protects arena alloc/free.  */
+	/* Protects ldc_unmap.  */
 	spinlock_t			lock;
-	struct iommu_arena		arena;
 	struct ldc_mtable_entry		*page_table;
+	struct iommu_map_table		iommu_map_table;
 };
 
 struct ldc_channel {
@@ -998,31 +1003,59 @@ static void free_queue(unsigned long num_entries, struct ldc_packet *q)
 	free_pages((unsigned long)q, order);
 }
 
+static unsigned long ldc_cookie_to_index(u64 cookie, void *arg)
+{
+	u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
+	/* struct ldc_iommu *ldc_iommu = (struct ldc_iommu *)arg; */
+
+	cookie &= ~COOKIE_PGSZ_CODE;
+
+	return (cookie >> (13ULL + (szcode * 3ULL)));
+}
+
+static void ldc_demap(struct ldc_iommu *iommu, unsigned long id, u64 cookie,
+		      unsigned long entry, unsigned long npages)
+{
+	struct ldc_mtable_entry *base;
+	unsigned long i, shift;
+
+	shift = (cookie >> COOKIE_PGSZ_CODE_SHIFT) * 3;
+	base = iommu->page_table + entry;
+	for (i = 0; i < npages; i++) {
+		if (base->cookie)
+			sun4v_ldc_revoke(id, cookie + (i << shift),
+					 base->cookie);
+		base->mte = 0;
+	}
+}
+
 /* XXX Make this configurable... XXX */
 #define LDC_IOTABLE_SIZE	(8 * 1024)
 
-static int ldc_iommu_init(struct ldc_channel *lp)
+static int ldc_iommu_init(const char *name, struct ldc_channel *lp)
 {
 	unsigned long sz, num_tsb_entries, tsbsize, order;
-	struct ldc_iommu *iommu = &lp->iommu;
+	struct ldc_iommu *ldc_iommu = &lp->iommu;
+	struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
 	struct ldc_mtable_entry *table;
 	unsigned long hv_err;
 	int err;
 
 	num_tsb_entries = LDC_IOTABLE_SIZE;
 	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
-
-	spin_lock_init(&iommu->lock);
+	spin_lock_init(&ldc_iommu->lock);
 
 	sz = num_tsb_entries / 8;
 	sz = (sz + 7UL) & ~7UL;
-	iommu->arena.map = kzalloc(sz, GFP_KERNEL);
-	if (!iommu->arena.map) {
+	iommu->map = kzalloc(sz, GFP_KERNEL);
+	if (!iommu->map) {
 		printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
 		return -ENOMEM;
 	}
-
-	iommu->arena.limit = num_tsb_entries;
+	iommu_tbl_pool_init(iommu, num_tsb_entries, PAGE_SHIFT,
+			    NULL, false /* no large pool */,
+			    1 /* npools */,
+			    true /* skip span boundary check */);
 
 	order = get_order(tsbsize);
 
@@ -1037,7 +1070,7 @@ static int ldc_iommu_init(struct ldc_channel *lp)
 
 	memset(table, 0, PAGE_SIZE << order);
 
-	iommu->page_table = table;
+	ldc_iommu->page_table = table;
 
 	hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
 					 num_tsb_entries);
@@ -1049,31 +1082,32 @@ static int ldc_iommu_init(struct ldc_channel *lp)
 
 out_free_table:
 	free_pages((unsigned long) table, order);
-	iommu->page_table = NULL;
+	ldc_iommu->page_table = NULL;
 
 out_free_map:
-	kfree(iommu->arena.map);
-	iommu->arena.map = NULL;
+	kfree(iommu->map);
+	iommu->map = NULL;
 
 	return err;
 }
 
 static void ldc_iommu_release(struct ldc_channel *lp)
 {
-	struct ldc_iommu *iommu = &lp->iommu;
+	struct ldc_iommu *ldc_iommu = &lp->iommu;
+	struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
 	unsigned long num_tsb_entries, tsbsize, order;
 
 	(void) sun4v_ldc_set_map_table(lp->id, 0, 0);
 
-	num_tsb_entries = iommu->arena.limit;
+	num_tsb_entries = iommu->poolsize * iommu->nr_pools;
 	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
 	order = get_order(tsbsize);
 
-	free_pages((unsigned long) iommu->page_table, order);
-	iommu->page_table = NULL;
+	free_pages((unsigned long) ldc_iommu->page_table, order);
+	ldc_iommu->page_table = NULL;
 
-	kfree(iommu->arena.map);
-	iommu->arena.map = NULL;
+	kfree(iommu->map);
+	iommu->map = NULL;
 }
 
 struct ldc_channel *ldc_alloc(unsigned long id,
@@ -1140,7 +1174,7 @@ struct ldc_channel *ldc_alloc(unsigned long id,
 
 	lp->id = id;
 
-	err = ldc_iommu_init(lp);
+	err = ldc_iommu_init(name, lp);
 	if (err)
 		goto out_free_ldc;
 
@@ -1885,40 +1919,6 @@ int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
 }
 EXPORT_SYMBOL(ldc_read);
 
-static long arena_alloc(struct ldc_iommu *iommu, unsigned long npages)
-{
-	struct iommu_arena *arena = &iommu->arena;
-	unsigned long n, start, end, limit;
-	int pass;
-
-	limit = arena->limit;
-	start = arena->hint;
-	pass = 0;
-
-again:
-	n = bitmap_find_next_zero_area(arena->map, limit, start, npages, 0);
-	end = n + npages;
-	if (unlikely(end >= limit)) {
-		if (likely(pass < 1)) {
-			limit = start;
-			start = 0;
-			pass++;
-			goto again;
-		} else {
-			/* Scanned the whole thing, give up. */
-			return -1;
-		}
-	}
-	bitmap_set(arena->map, n, npages);
-
-	arena->hint = end;
-
-	return n;
-}
-
-#define COOKIE_PGSZ_CODE	0xf000000000000000ULL
-#define COOKIE_PGSZ_CODE_SHIFT	60ULL
-
 static u64 pagesize_code(void)
 {
 	switch (PAGE_SIZE) {
@@ -1945,23 +1945,14 @@ static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
 		page_offset);
 }
 
-static u64 cookie_to_index(u64 cookie, unsigned long *shift)
-{
-	u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
-
-	cookie &= ~COOKIE_PGSZ_CODE;
-
-	*shift = szcode * 3;
-
-	return (cookie >> (13ULL + (szcode * 3ULL)));
-}
 
 static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
 					     unsigned long npages)
 {
 	long entry;
 
-	entry = arena_alloc(iommu, npages);
+	entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_map_table,
+				      npages, NULL, (unsigned long)-1, 0);
 	if (unlikely(entry < 0))
 		return NULL;
 
@@ -2090,7 +2081,7 @@ int ldc_map_sg(struct ldc_channel *lp,
 	       struct ldc_trans_cookie *cookies, int ncookies,
 	       unsigned int map_perm)
 {
-	unsigned long i, npages, flags;
+	unsigned long i, npages;
 	struct ldc_mtable_entry *base;
 	struct cookie_state state;
 	struct ldc_iommu *iommu;
@@ -2109,9 +2100,7 @@ int ldc_map_sg(struct ldc_channel *lp,
 
 	iommu = &lp->iommu;
 
-	spin_lock_irqsave(&iommu->lock, flags);
 	base = alloc_npages(iommu, npages);
-	spin_unlock_irqrestore(&iommu->lock, flags);
 
 	if (!base)
 		return -ENOMEM;
@@ -2136,7 +2125,7 @@ int ldc_map_single(struct ldc_channel *lp,
 		   struct ldc_trans_cookie *cookies, int ncookies,
 		   unsigned int map_perm)
 {
-	unsigned long npages, pa, flags;
+	unsigned long npages, pa;
 	struct ldc_mtable_entry *base;
 	struct cookie_state state;
 	struct ldc_iommu *iommu;
@@ -2152,9 +2141,7 @@ int ldc_map_single(struct ldc_channel *lp,
 
 	iommu = &lp->iommu;
 
-	spin_lock_irqsave(&iommu->lock, flags);
 	base = alloc_npages(iommu, npages);
-	spin_unlock_irqrestore(&iommu->lock, flags);
 
 	if (!base)
 		return -ENOMEM;
@@ -2172,35 +2159,25 @@ int ldc_map_single(struct ldc_channel *lp,
 }
 EXPORT_SYMBOL(ldc_map_single);
 
+
 static void free_npages(unsigned long id, struct ldc_iommu *iommu,
 			u64 cookie, u64 size)
 {
-	struct iommu_arena *arena = &iommu->arena;
-	unsigned long i, shift, index, npages;
-	struct ldc_mtable_entry *base;
+	unsigned long npages, entry;
 
 	npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
-	index = cookie_to_index(cookie, &shift);
-	base = iommu->page_table + index;
 
-	BUG_ON(index > arena->limit ||
-	       (index + npages) > arena->limit);
-
-	for (i = 0; i < npages; i++) {
-		if (base->cookie)
-			sun4v_ldc_revoke(id, cookie + (i << shift),
-					 base->cookie);
-		base->mte = 0;
-		__clear_bit(index + i, arena->map);
-	}
+	entry = ldc_cookie_to_index(cookie, iommu);
+	ldc_demap(iommu, id, cookie, entry, npages);
+	iommu_tbl_range_free(&iommu->iommu_map_table, cookie, npages, entry);
 }
 
 void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
 	       int ncookies)
 {
 	struct ldc_iommu *iommu = &lp->iommu;
-	unsigned long flags;
 	int i;
+	unsigned long flags;
 
 	spin_lock_irqsave(&iommu->lock, flags);
 	for (i = 0; i < ncookies; i++) {
@@ -2313,7 +2290,7 @@ void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
 	if (len & (8UL - 1))
 		return ERR_PTR(-EINVAL);
 
-	buf = kzalloc(len, GFP_KERNEL);
+	buf = kzalloc(len, GFP_ATOMIC);
 	if (!buf)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c
index 899b7203a4e4..4371f72ff025 100644
--- a/arch/sparc/kernel/leon_pci.c
+++ b/arch/sparc/kernel/leon_pci.c
@@ -34,15 +34,17 @@ void leon_pci_init(struct platform_device *ofdev, struct leon_pci_info *info)
 
 	root_bus = pci_scan_root_bus(&ofdev->dev, 0, info->ops, info,
 				     &resources);
-	if (root_bus) {
-		/* Setup IRQs of all devices using custom routines */
-		pci_fixup_irqs(pci_common_swizzle, info->map_irq);
-
-		/* Assign devices with resources */
-		pci_assign_unassigned_resources();
-	} else {
+	if (!root_bus) {
 		pci_free_resource_list(&resources);
+		return;
 	}
+
+	/* Setup IRQs of all devices using custom routines */
+	pci_fixup_irqs(pci_common_swizzle, info->map_irq);
+
+	/* Assign devices with resources */
+	pci_assign_unassigned_resources();
+	pci_bus_add_devices(root_bus);
 }
 
 void pcibios_fixup_bus(struct pci_bus *pbus)
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 99632a87e697..26c80e18d7b1 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -130,26 +130,26 @@ static struct mdesc_mem_ops memblock_mdesc_ops = {
 static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size)
 {
 	unsigned int handle_size;
+	struct mdesc_handle *hp;
+	unsigned long addr;
 	void *base;
 
 	handle_size = (sizeof(struct mdesc_handle) -
 		       sizeof(struct mdesc_hdr) +
 		       mdesc_size);
 
+	/*
+	 * Allocation has to succeed because mdesc update would be missed
+	 * and such events are not retransmitted.
+	 */
 	base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_NOFAIL);
-	if (base) {
-		struct mdesc_handle *hp;
-		unsigned long addr;
-
-		addr = (unsigned long)base;
-		addr = (addr + 15UL) & ~15UL;
-		hp = (struct mdesc_handle *) addr;
+	addr = (unsigned long)base;
+	addr = (addr + 15UL) & ~15UL;
+	hp = (struct mdesc_handle *) addr;
 
-		mdesc_handle_init(hp, handle_size, base);
-		return hp;
-	}
+	mdesc_handle_init(hp, handle_size, base);
 
-	return NULL;
+	return hp;
 }
 
 static void mdesc_kfree(struct mdesc_handle *hp)
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index 9ce5afe167ff..6f7251fd2eab 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -639,10 +639,7 @@ static void pci_claim_bus_resources(struct pci_bus *bus)
 				       (unsigned long long)r->end,
 				       (unsigned int)r->flags);
 
-			if (pci_claim_resource(dev, i) == 0)
-				continue;
-
-			pci_claim_bridge_resource(dev, i);
+			pci_claim_resource(dev, i);
 		}
 	}
 
@@ -677,11 +674,10 @@ struct pci_bus *pci_scan_one_pbm(struct pci_pbm_info *pbm,
 	}
 
 	pci_of_scan_bus(pbm, node, bus);
-	pci_bus_add_devices(bus);
 	pci_bus_register_of_sysfs(bus);
 
 	pci_claim_bus_resources(bus);
-
+	pci_bus_add_devices(bus);
 	return bus;
 }
 
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index 47ddbd496a1e..d2fe57dad433 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -15,6 +15,7 @@
 #include <linux/export.h>
 #include <linux/log2.h>
 #include <linux/of_device.h>
+#include <linux/iommu-common.h>
 
 #include <asm/iommu.h>
 #include <asm/irq.h>
@@ -155,15 +156,13 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
 
 	iommu = dev->archdata.iommu;
 
-	spin_lock_irqsave(&iommu->lock, flags);
-	entry = iommu_range_alloc(dev, iommu, npages, NULL);
-	spin_unlock_irqrestore(&iommu->lock, flags);
+	entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
+				      (unsigned long)(-1), 0);
 
 	if (unlikely(entry == DMA_ERROR_CODE))
 		goto range_alloc_fail;
 
-	*dma_addrp = (iommu->page_table_map_base +
-		      (entry << IO_PAGE_SHIFT));
+	*dma_addrp = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
 	ret = (void *) first_page;
 	first_page = __pa(first_page);
 
@@ -188,45 +187,46 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
 	return ret;
 
 iommu_map_fail:
-	/* Interrupts are disabled.  */
-	spin_lock(&iommu->lock);
-	iommu_range_free(iommu, *dma_addrp, npages);
-	spin_unlock_irqrestore(&iommu->lock, flags);
+	iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, DMA_ERROR_CODE);
 
 range_alloc_fail:
 	free_pages(first_page, order);
 	return NULL;
 }
 
+static void dma_4v_iommu_demap(void *demap_arg, unsigned long entry,
+			       unsigned long npages)
+{
+	u32 devhandle = *(u32 *)demap_arg;
+	unsigned long num, flags;
+
+	local_irq_save(flags);
+	do {
+		num = pci_sun4v_iommu_demap(devhandle,
+					    HV_PCI_TSBID(0, entry),
+					    npages);
+
+		entry += num;
+		npages -= num;
+	} while (npages != 0);
+	local_irq_restore(flags);
+}
+
 static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
 				 dma_addr_t dvma, struct dma_attrs *attrs)
 {
 	struct pci_pbm_info *pbm;
 	struct iommu *iommu;
-	unsigned long flags, order, npages, entry;
+	unsigned long order, npages, entry;
 	u32 devhandle;
 
 	npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
 	iommu = dev->archdata.iommu;
 	pbm = dev->archdata.host_controller;
 	devhandle = pbm->devhandle;
-	entry = ((dvma - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
-
-	spin_lock_irqsave(&iommu->lock, flags);
-
-	iommu_range_free(iommu, dvma, npages);
-
-	do {
-		unsigned long num;
-
-		num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
-					    npages);
-		entry += num;
-		npages -= num;
-	} while (npages != 0);
-
-	spin_unlock_irqrestore(&iommu->lock, flags);
-
+	entry = ((dvma - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT);
+	dma_4v_iommu_demap(&devhandle, entry, npages);
+	iommu_tbl_range_free(&iommu->tbl, dvma, npages, DMA_ERROR_CODE);
 	order = get_order(size);
 	if (order < 10)
 		free_pages((unsigned long)cpu, order);
@@ -253,15 +253,13 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
 	npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
 	npages >>= IO_PAGE_SHIFT;
 
-	spin_lock_irqsave(&iommu->lock, flags);
-	entry = iommu_range_alloc(dev, iommu, npages, NULL);
-	spin_unlock_irqrestore(&iommu->lock, flags);
+	entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
+				      (unsigned long)(-1), 0);
 
 	if (unlikely(entry == DMA_ERROR_CODE))
 		goto bad;
 
-	bus_addr = (iommu->page_table_map_base +
-		    (entry << IO_PAGE_SHIFT));
+	bus_addr = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
 	ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
 	base_paddr = __pa(oaddr & IO_PAGE_MASK);
 	prot = HV_PCI_MAP_ATTR_READ;
@@ -290,11 +288,7 @@ bad:
 	return DMA_ERROR_CODE;
 
 iommu_map_fail:
-	/* Interrupts are disabled.  */
-	spin_lock(&iommu->lock);
-	iommu_range_free(iommu, bus_addr, npages);
-	spin_unlock_irqrestore(&iommu->lock, flags);
-
+	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE);
 	return DMA_ERROR_CODE;
 }
 
@@ -304,7 +298,7 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
 {
 	struct pci_pbm_info *pbm;
 	struct iommu *iommu;
-	unsigned long flags, npages;
+	unsigned long npages;
 	long entry;
 	u32 devhandle;
 
@@ -321,22 +315,9 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
 	npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
 	npages >>= IO_PAGE_SHIFT;
 	bus_addr &= IO_PAGE_MASK;
-
-	spin_lock_irqsave(&iommu->lock, flags);
-
-	iommu_range_free(iommu, bus_addr, npages);
-
-	entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
-	do {
-		unsigned long num;
-
-		num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
-					    npages);
-		entry += num;
-		npages -= num;
-	} while (npages != 0);
-
-	spin_unlock_irqrestore(&iommu->lock, flags);
+	entry = (bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT;
+	dma_4v_iommu_demap(&devhandle, entry, npages);
+	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE);
 }
 
 static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
@@ -371,14 +352,14 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
 	/* Init first segment length for backout at failure */
 	outs->dma_length = 0;
 
-	spin_lock_irqsave(&iommu->lock, flags);
+	local_irq_save(flags);
 
 	iommu_batch_start(dev, prot, ~0UL);
 
 	max_seg_size = dma_get_max_seg_size(dev);
 	seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
 				  IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
-	base_shift = iommu->page_table_map_base >> IO_PAGE_SHIFT;
+	base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT;
 	for_each_sg(sglist, s, nelems, i) {
 		unsigned long paddr, npages, entry, out_entry = 0, slen;
 
@@ -391,7 +372,8 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
 		/* Allocate iommu entries for that segment */
 		paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
 		npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
-		entry = iommu_range_alloc(dev, iommu, npages, &handle);
+		entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages,
+					      &handle, (unsigned long)(-1), 0);
 
 		/* Handle failure */
 		if (unlikely(entry == DMA_ERROR_CODE)) {
@@ -404,8 +386,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
 		iommu_batch_new_entry(entry);
 
 		/* Convert entry to a dma_addr_t */
-		dma_addr = iommu->page_table_map_base +
-			(entry << IO_PAGE_SHIFT);
+		dma_addr = iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT);
 		dma_addr |= (s->offset & ~IO_PAGE_MASK);
 
 		/* Insert into HW table */
@@ -451,7 +432,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
 	if (unlikely(err < 0L))
 		goto iommu_map_failed;
 
-	spin_unlock_irqrestore(&iommu->lock, flags);
+	local_irq_restore(flags);
 
 	if (outcount < incount) {
 		outs = sg_next(outs);
@@ -469,7 +450,8 @@ iommu_map_failed:
 			vaddr = s->dma_address & IO_PAGE_MASK;
 			npages = iommu_num_pages(s->dma_address, s->dma_length,
 						 IO_PAGE_SIZE);
-			iommu_range_free(iommu, vaddr, npages);
+			iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
+					     DMA_ERROR_CODE);
 			/* XXX demap? XXX */
 			s->dma_address = DMA_ERROR_CODE;
 			s->dma_length = 0;
@@ -477,7 +459,7 @@ iommu_map_failed:
 		if (s == outs)
 			break;
 	}
-	spin_unlock_irqrestore(&iommu->lock, flags);
+	local_irq_restore(flags);
 
 	return 0;
 }
@@ -489,7 +471,7 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
 	struct pci_pbm_info *pbm;
 	struct scatterlist *sg;
 	struct iommu *iommu;
-	unsigned long flags;
+	unsigned long flags, entry;
 	u32 devhandle;
 
 	BUG_ON(direction == DMA_NONE);
@@ -498,33 +480,27 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
 	pbm = dev->archdata.host_controller;
 	devhandle = pbm->devhandle;
 	
-	spin_lock_irqsave(&iommu->lock, flags);
+	local_irq_save(flags);
 
 	sg = sglist;
 	while (nelems--) {
 		dma_addr_t dma_handle = sg->dma_address;
 		unsigned int len = sg->dma_length;
-		unsigned long npages, entry;
+		unsigned long npages;
+		struct iommu_map_table *tbl = &iommu->tbl;
+		unsigned long shift = IO_PAGE_SHIFT;
 
 		if (!len)
 			break;
 		npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
-		iommu_range_free(iommu, dma_handle, npages);
-
-		entry = ((dma_handle - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
-		while (npages) {
-			unsigned long num;
-
-			num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
-						    npages);
-			entry += num;
-			npages -= num;
-		}
-
+		entry = ((dma_handle - tbl->table_map_base) >> shift);
+		dma_4v_iommu_demap(&devhandle, entry, npages);
+		iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
+				     DMA_ERROR_CODE);
 		sg = sg_next(sg);
 	}
 
-	spin_unlock_irqrestore(&iommu->lock, flags);
+	local_irq_restore(flags);
 }
 
 static struct dma_map_ops sun4v_dma_ops = {
@@ -550,30 +526,33 @@ static void pci_sun4v_scan_bus(struct pci_pbm_info *pbm, struct device *parent)
 }
 
 static unsigned long probe_existing_entries(struct pci_pbm_info *pbm,
-					    struct iommu *iommu)
+					    struct iommu_map_table *iommu)
 {
-	struct iommu_arena *arena = &iommu->arena;
-	unsigned long i, cnt = 0;
+	struct iommu_pool *pool;
+	unsigned long i, pool_nr, cnt = 0;
 	u32 devhandle;
 
 	devhandle = pbm->devhandle;
-	for (i = 0; i < arena->limit; i++) {
-		unsigned long ret, io_attrs, ra;
-
-		ret = pci_sun4v_iommu_getmap(devhandle,
-					     HV_PCI_TSBID(0, i),
-					     &io_attrs, &ra);
-		if (ret == HV_EOK) {
-			if (page_in_phys_avail(ra)) {
-				pci_sun4v_iommu_demap(devhandle,
-						      HV_PCI_TSBID(0, i), 1);
-			} else {
-				cnt++;
-				__set_bit(i, arena->map);
+	for (pool_nr = 0; pool_nr < iommu->nr_pools; pool_nr++) {
+		pool = &(iommu->pools[pool_nr]);
+		for (i = pool->start; i <= pool->end; i++) {
+			unsigned long ret, io_attrs, ra;
+
+			ret = pci_sun4v_iommu_getmap(devhandle,
+						     HV_PCI_TSBID(0, i),
+						     &io_attrs, &ra);
+			if (ret == HV_EOK) {
+				if (page_in_phys_avail(ra)) {
+					pci_sun4v_iommu_demap(devhandle,
+							      HV_PCI_TSBID(0,
+							      i), 1);
+				} else {
+					cnt++;
+					__set_bit(i, iommu->map);
+				}
 			}
 		}
 	}
-
 	return cnt;
 }
 
@@ -603,20 +582,22 @@ static int pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
 	/* Setup initial software IOMMU state. */
 	spin_lock_init(&iommu->lock);
 	iommu->ctx_lowest_free = 1;
-	iommu->page_table_map_base = dma_offset;
+	iommu->tbl.table_map_base = dma_offset;
 	iommu->dma_addr_mask = dma_mask;
 
 	/* Allocate and initialize the free area map.  */
 	sz = (num_tsb_entries + 7) / 8;
 	sz = (sz + 7UL) & ~7UL;
-	iommu->arena.map = kzalloc(sz, GFP_KERNEL);
-	if (!iommu->arena.map) {
+	iommu->tbl.map = kzalloc(sz, GFP_KERNEL);
+	if (!iommu->tbl.map) {
 		printk(KERN_ERR PFX "Error, kmalloc(arena.map) failed.\n");
 		return -ENOMEM;
 	}
-	iommu->arena.limit = num_tsb_entries;
-
-	sz = probe_existing_entries(pbm, iommu);
+	iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT,
+			    NULL, false /* no large_pool */,
+			    0 /* default npools */,
+			    false /* want span boundary checking */);
+	sz = probe_existing_entries(pbm, &iommu->tbl);
 	if (sz)
 		printk("%s: Imported %lu TSB entries from OBP\n",
 		       pbm->name, sz);
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c
index 6cc78c213c01..24384e1dc33d 100644
--- a/arch/sparc/kernel/pcic.c
+++ b/arch/sparc/kernel/pcic.c
@@ -391,12 +391,16 @@ static void __init pcic_pbm_scan_bus(struct linux_pcic *pcic)
 	struct linux_pbm_info *pbm = &pcic->pbm;
 
 	pbm->pci_bus = pci_scan_bus(pbm->pci_first_busno, &pcic_ops, pbm);
+	if (!pbm->pci_bus)
+		return;
+
 #if 0 /* deadwood transplanted from sparc64 */
 	pci_fill_in_pbm_cookies(pbm->pci_bus, pbm, pbm->prom_node);
 	pci_record_assignments(pbm, pbm->pci_bus);
 	pci_assign_unassigned(pbm, pbm->pci_bus);
 	pci_fixup_irq(pbm, pbm->pci_bus);
 #endif
+	pci_bus_add_devices(pbm->pci_bus);
 }
 
 /*
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c
index 7e967c8018c8..eb978c77c76a 100644
--- a/arch/sparc/kernel/pcr.c
+++ b/arch/sparc/kernel/pcr.c
@@ -217,6 +217,31 @@ static const struct pcr_ops n5_pcr_ops = {
 	.pcr_nmi_disable	= PCR_N4_PICNPT,
 };
 
+static u64 m7_pcr_read(unsigned long reg_num)
+{
+	unsigned long val;
+
+	(void) sun4v_m7_get_perfreg(reg_num, &val);
+
+	return val;
+}
+
+static void m7_pcr_write(unsigned long reg_num, u64 val)
+{
+	(void) sun4v_m7_set_perfreg(reg_num, val);
+}
+
+static const struct pcr_ops m7_pcr_ops = {
+	.read_pcr		= m7_pcr_read,
+	.write_pcr		= m7_pcr_write,
+	.read_pic		= n4_pic_read,
+	.write_pic		= n4_pic_write,
+	.nmi_picl_value		= n4_picl_value,
+	.pcr_nmi_enable		= (PCR_N4_PICNPT | PCR_N4_STRACE |
+				   PCR_N4_UTRACE | PCR_N4_TOE |
+				   (26 << PCR_N4_SL_SHIFT)),
+	.pcr_nmi_disable	= PCR_N4_PICNPT,
+};
 
 static unsigned long perf_hsvc_group;
 static unsigned long perf_hsvc_major;
@@ -248,6 +273,10 @@ static int __init register_perf_hsvc(void)
 			perf_hsvc_group = HV_GRP_T5_CPU;
 			break;
 
+		case SUN4V_CHIP_SPARC_M7:
+			perf_hsvc_group = HV_GRP_M7_PERF;
+			break;
+
 		default:
 			return -ENODEV;
 		}
@@ -293,6 +322,10 @@ static int __init setup_sun4v_pcr_ops(void)
 		pcr_ops = &n5_pcr_ops;
 		break;
 
+	case SUN4V_CHIP_SPARC_M7:
+		pcr_ops = &m7_pcr_ops;
+		break;
+
 	default:
 		ret = -ENODEV;
 		break;
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 46a5e4508752..59cf917a77b5 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -737,25 +737,9 @@ static void sparc_vt_write_pmc(int idx, u64 val)
 {
 	u64 pcr;
 
-	/* There seems to be an internal latch on the overflow event
-	 * on SPARC-T4 that prevents it from triggering unless you
-	 * update the PIC exactly as we do here.  The requirement
-	 * seems to be that you have to turn off event counting in the
-	 * PCR around the PIC update.
-	 *
-	 * For example, after the following sequence:
-	 *
-	 * 1) set PIC to -1
-	 * 2) enable event counting and overflow reporting in PCR
-	 * 3) overflow triggers, softint 15 handler invoked
-	 * 4) clear OV bit in PCR
-	 * 5) write PIC to -1
-	 *
-	 * a subsequent overflow event will not trigger.  This
-	 * sequence works on SPARC-T3 and previous chips.
-	 */
 	pcr = pcr_ops->read_pcr(idx);
-	pcr_ops->write_pcr(idx, PCR_N4_PICNPT);
+	/* ensure ov and ntc are reset */
+	pcr &= ~(PCR_N4_OV | PCR_N4_NTC);
 
 	pcr_ops->write_pic(idx, val & 0xffffffff);
 
@@ -792,6 +776,29 @@ static const struct sparc_pmu niagara4_pmu = {
 	.num_pic_regs	= 4,
 };
 
+static const struct sparc_pmu sparc_m7_pmu = {
+	.event_map	= niagara4_event_map,
+	.cache_map	= &niagara4_cache_map,
+	.max_events	= ARRAY_SIZE(niagara4_perfmon_event_map),
+	.read_pmc	= sparc_vt_read_pmc,
+	.write_pmc	= sparc_vt_write_pmc,
+	.upper_shift	= 5,
+	.lower_shift	= 5,
+	.event_mask	= 0x7ff,
+	.user_bit	= PCR_N4_UTRACE,
+	.priv_bit	= PCR_N4_STRACE,
+
+	/* We explicitly don't support hypervisor tracing. */
+	.hv_bit		= 0,
+
+	.irq_bit	= PCR_N4_TOE,
+	.upper_nop	= 0,
+	.lower_nop	= 0,
+	.flags		= 0,
+	.max_hw_events	= 4,
+	.num_pcrs	= 4,
+	.num_pic_regs	= 4,
+};
 static const struct sparc_pmu *sparc_pmu __read_mostly;
 
 static u64 event_encoding(u64 event_id, int idx)
@@ -960,6 +967,8 @@ out:
 	cpuc->pcr[0] |= cpuc->event[0]->hw.config_base;
 }
 
+static void sparc_pmu_start(struct perf_event *event, int flags);
+
 /* On this PMU each PIC has it's own PCR control register.  */
 static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
 {
@@ -972,20 +981,13 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
 		struct perf_event *cp = cpuc->event[i];
 		struct hw_perf_event *hwc = &cp->hw;
 		int idx = hwc->idx;
-		u64 enc;
 
 		if (cpuc->current_idx[i] != PIC_NO_INDEX)
 			continue;
 
-		sparc_perf_event_set_period(cp, hwc, idx);
 		cpuc->current_idx[i] = idx;
 
-		enc = perf_event_get_enc(cpuc->events[i]);
-		cpuc->pcr[idx] &= ~mask_for_index(idx);
-		if (hwc->state & PERF_HES_STOPPED)
-			cpuc->pcr[idx] |= nop_for_index(idx);
-		else
-			cpuc->pcr[idx] |= event_encoding(enc, idx);
+		sparc_pmu_start(cp, PERF_EF_RELOAD);
 	}
 out:
 	for (i = 0; i < cpuc->n_events; i++) {
@@ -1101,7 +1103,6 @@ static void sparc_pmu_del(struct perf_event *event, int _flags)
 	int i;
 
 	local_irq_save(flags);
-	perf_pmu_disable(event->pmu);
 
 	for (i = 0; i < cpuc->n_events; i++) {
 		if (event == cpuc->event[i]) {
@@ -1127,7 +1128,6 @@ static void sparc_pmu_del(struct perf_event *event, int _flags)
 		}
 	}
 
-	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
 }
 
@@ -1361,7 +1361,6 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)
 	unsigned long flags;
 
 	local_irq_save(flags);
-	perf_pmu_disable(event->pmu);
 
 	n0 = cpuc->n_events;
 	if (n0 >= sparc_pmu->max_hw_events)
@@ -1394,7 +1393,6 @@ nocheck:
 
 	ret = 0;
 out:
-	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
 	return ret;
 }
@@ -1667,6 +1665,10 @@ static bool __init supported_pmu(void)
 		sparc_pmu = &niagara4_pmu;
 		return true;
 	}
+	if (!strcmp(sparc_pmu_type, "sparc-m7")) {
+		sparc_pmu = &sparc_m7_pmu;
+		return true;
+	}
 	return false;
 }
 
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 0be7bf978cb1..46a59643bb1c 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -287,6 +287,8 @@ void arch_trigger_all_cpu_backtrace(bool include_self)
 			printk("             TPC[%lx] O7[%lx] I7[%lx] RPC[%lx]\n",
 			       gp->tpc, gp->o7, gp->i7, gp->rpc);
 		}
+
+		touch_nmi_watchdog();
 	}
 
 	memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
@@ -362,6 +364,8 @@ static void pmu_snapshot_all_cpus(void)
 		       (cpu == this_cpu ? '*' : ' '), cpu,
 		       pp->pcr[0], pp->pcr[1], pp->pcr[2], pp->pcr[3],
 		       pp->pic[0], pp->pic[1], pp->pic[2], pp->pic[3]);
+
+		touch_nmi_watchdog();
 	}
 
 	memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index da6f1a7fc4db..61139d9924ca 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1406,11 +1406,32 @@ void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
 	scheduler_ipi();
 }
 
-/* This is a nop because we capture all other cpus
- * anyways when making the PROM active.
- */
+static void stop_this_cpu(void *dummy)
+{
+	prom_stopself();
+}
+
 void smp_send_stop(void)
 {
+	int cpu;
+
+	if (tlb_type == hypervisor) {
+		for_each_online_cpu(cpu) {
+			if (cpu == smp_processor_id())
+				continue;
+#ifdef CONFIG_SUN_LDOMS
+			if (ldom_domaining_enabled) {
+				unsigned long hv_err;
+				hv_err = sun4v_cpu_stop(cpu);
+				if (hv_err)
+					printk(KERN_ERR "sun4v_cpu_stop() "
+					       "failed err=%lu\n", hv_err);
+			} else
+#endif
+				prom_stopcpu_cpuid(cpu);
+		}
+	} else
+		smp_call_function(stop_this_cpu, NULL, 0);
 }
 
 /**
diff --git a/arch/sparc/kernel/starfire.c b/arch/sparc/kernel/starfire.c
index 82281a566bb8..167fdfd9c837 100644
--- a/arch/sparc/kernel/starfire.c
+++ b/arch/sparc/kernel/starfire.c
@@ -28,11 +28,6 @@ void check_if_starfire(void)
 		this_is_starfire = 1;
 }
 
-int starfire_hard_smp_processor_id(void)
-{
-	return upa_readl(0x1fff40000d0UL);
-}
-
 /*
  * Each Starfire board has 32 registers which perform translation
  * and delivery of traditional interrupt packets into the extended
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index c85403d0496c..30e7ddb27a3a 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -333,7 +333,7 @@ SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second
 	long err;
 
 	/* No need for backward compatibility. We can start fresh... */
-	if (call <= SEMCTL) {
+	if (call <= SEMTIMEDOP) {
 		switch (call) {
 		case SEMOP:
 			err = sys_semtimedop(first, ptr,
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index 2f80d23a0a44..8caf45ee81d9 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -181,24 +181,20 @@ static struct clocksource timer_cs = {
 	.rating	= 100,
 	.read	= timer_cs_read,
 	.mask	= CLOCKSOURCE_MASK(64),
-	.shift	= 2,
 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
 static __init int setup_timer_cs(void)
 {
 	timer_cs_enabled = 1;
-	timer_cs.mult = clocksource_hz2mult(sparc_config.clock_rate,
-	                                    timer_cs.shift);
-
-	return clocksource_register(&timer_cs);
+	return clocksource_register_hz(&timer_cs, sparc_config.clock_rate);
 }
 
 #ifdef CONFIG_SMP
 static void percpu_ce_setup(enum clock_event_mode mode,
 			struct clock_event_device *evt)
 {
-	int cpu = __first_cpu(evt->cpumask);
+	int cpu = cpumask_first(evt->cpumask);
 
 	switch (mode) {
 		case CLOCK_EVT_MODE_PERIODIC:
@@ -218,7 +214,7 @@ static void percpu_ce_setup(enum clock_event_mode mode,
 static int percpu_ce_set_next_event(unsigned long delta,
 				    struct clock_event_device *evt)
 {
-	int cpu = __first_cpu(evt->cpumask);
+	int cpu = cpumask_first(evt->cpumask);
 	unsigned int next = (unsigned int)delta;
 
 	sparc_config.load_profile_irq(cpu, next);
diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c
index 6fd386c5232a..4f21df7d4f13 100644
--- a/arch/sparc/kernel/traps_32.c
+++ b/arch/sparc/kernel/traps_32.c
@@ -433,7 +433,6 @@ void trap_init(void)
 	/* Force linker to barf if mismatched */
 	if (TI_UWINMASK    != offsetof(struct thread_info, uwinmask) ||
 	    TI_TASK        != offsetof(struct thread_info, task) ||
-	    TI_EXECDOMAIN  != offsetof(struct thread_info, exec_domain) ||
 	    TI_FLAGS       != offsetof(struct thread_info, flags) ||
 	    TI_CPU         != offsetof(struct thread_info, cpu) ||
 	    TI_PREEMPT     != offsetof(struct thread_info, preempt_count) ||
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index a27651e866e7..d21cd625c0de 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -2427,6 +2427,8 @@ void __noreturn die_if_kernel(char *str, struct pt_regs *regs)
 		}
 		user_instruction_dump ((unsigned int __user *) regs->tpc);
 	}
+	if (panic_on_oops)
+		panic("Fatal exception");
 	if (regs->tstate & TSTATE_PRIV)
 		do_exit(SIGKILL);
 	do_exit(SIGSEGV);
@@ -2564,27 +2566,6 @@ void do_cee(struct pt_regs *regs)
 	die_if_kernel("TL0: Cache Error Exception", regs);
 }
 
-void do_cee_tl1(struct pt_regs *regs)
-{
-	exception_enter();
-	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
-	die_if_kernel("TL1: Cache Error Exception", regs);
-}
-
-void do_dae_tl1(struct pt_regs *regs)
-{
-	exception_enter();
-	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
-	die_if_kernel("TL1: Data Access Exception", regs);
-}
-
-void do_iae_tl1(struct pt_regs *regs)
-{
-	exception_enter();
-	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
-	die_if_kernel("TL1: Instruction Access Exception", regs);
-}
-
 void do_div0_tl1(struct pt_regs *regs)
 {
 	exception_enter();
@@ -2592,13 +2573,6 @@ void do_div0_tl1(struct pt_regs *regs)
 	die_if_kernel("TL1: DIV0 Exception", regs);
 }
 
-void do_fpdis_tl1(struct pt_regs *regs)
-{
-	exception_enter();
-	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
-	die_if_kernel("TL1: FPU Disabled", regs);
-}
-
 void do_fpieee_tl1(struct pt_regs *regs)
 {
 	exception_enter();
@@ -2717,8 +2691,6 @@ void __init trap_init(void)
 					       fault_address) ||
 		     TI_KREGS != offsetof(struct thread_info, kregs) ||
 		     TI_UTRAPS != offsetof(struct thread_info, utraps) ||
-		     TI_EXEC_DOMAIN != offsetof(struct thread_info,
-						exec_domain) ||
 		     TI_REG_WINDOW != offsetof(struct thread_info,
 					       reg_window) ||
 		     TI_RWIN_SPTRS != offsetof(struct thread_info,
diff --git a/arch/sparc/lib/memmove.S b/arch/sparc/lib/memmove.S
index b7f6334e159f..857ad4f8905f 100644
--- a/arch/sparc/lib/memmove.S
+++ b/arch/sparc/lib/memmove.S
@@ -8,9 +8,11 @@
 
 	.text
 ENTRY(memmove) /* o0=dst o1=src o2=len */
-	mov		%o0, %g1
+	brz,pn		%o2, 99f
+	 mov		%o0, %g1
+
 	cmp		%o0, %o1
-	bleu,pt		%xcc, memcpy
+	bleu,pt		%xcc, 2f
 	 add		%o1, %o2, %g7
 	cmp		%g7, %o0
 	bleu,pt		%xcc, memcpy
@@ -24,7 +26,34 @@ ENTRY(memmove) /* o0=dst o1=src o2=len */
 	stb		%g7, [%o0]
 	bne,pt		%icc, 1b
 	 sub		%o0, 1, %o0
-
+99:
 	retl
 	 mov		%g1, %o0
+
+	/* We can't just call memcpy for these memmove cases.  On some
+	 * chips the memcpy uses cache initializing stores and when dst
+	 * and src are close enough, those can clobber the source data
+	 * before we've loaded it in.
+	 */
+2:	or		%o0, %o1, %g7
+	or		%o2, %g7, %g7
+	andcc		%g7, 0x7, %g0
+	bne,pn		%xcc, 4f
+	 nop
+
+3:	ldx		[%o1], %g7
+	add		%o1, 8, %o1
+	subcc		%o2, 8, %o2
+	add		%o0, 8, %o0
+	bne,pt		%icc, 3b
+	 stx		%g7, [%o0 - 0x8]
+	ba,a,pt		%xcc, 99b
+
+4:	ldub		[%o1], %g7
+	add		%o1, 1, %o1
+	subcc		%o2, 1, %o2
+	add		%o0, 1, %o0
+	bne,pt		%icc, 4b
+	 stb		%g7, [%o0 - 0x1]
+	ba,a,pt		%xcc, 99b
 ENDPROC(memmove)
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 3ea267c53320..4ca0d6ba5ec8 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2820,7 +2820,7 @@ static int __init report_memory(void)
 
 	return 0;
 }
-device_initcall(report_memory);
+arch_initcall(report_memory);
 
 #ifdef CONFIG_SMP
 #define do_flush_tlb_kernel_range	smp_flush_tlb_kernel_range
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 7cca41842a9e..a07e31b50d3f 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -27,6 +27,7 @@ config TILE
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select HAVE_DEBUG_STACKOVERFLOW
 	select ARCH_WANT_FRAME_POINTERS
+	select HAVE_CONTEXT_TRACKING
 
 # FIXME: investigate whether we need/want these options.
 #	select HAVE_IOREMAP_PROT
@@ -147,6 +148,11 @@ config ARCH_DEFCONFIG
 	default "arch/tile/configs/tilepro_defconfig" if !TILEGX
 	default "arch/tile/configs/tilegx_defconfig" if TILEGX
 
+config PGTABLE_LEVELS
+	int
+	default 3 if 64BIT
+	default 2
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
diff --git a/arch/tile/gxio/mpipe.c b/arch/tile/gxio/mpipe.c
index 6f00e9850636..ee186e13dfe6 100644
--- a/arch/tile/gxio/mpipe.c
+++ b/arch/tile/gxio/mpipe.c
@@ -456,7 +456,7 @@ int gxio_mpipe_equeue_init(gxio_mpipe_equeue_t *equeue,
 EXPORT_SYMBOL_GPL(gxio_mpipe_equeue_init);
 
 int gxio_mpipe_set_timestamp(gxio_mpipe_context_t *context,
-			     const struct timespec *ts)
+			     const struct timespec64 *ts)
 {
 	cycles_t cycles = get_cycles();
 	return gxio_mpipe_set_timestamp_aux(context, (uint64_t)ts->tv_sec,
@@ -466,7 +466,7 @@ int gxio_mpipe_set_timestamp(gxio_mpipe_context_t *context,
 EXPORT_SYMBOL_GPL(gxio_mpipe_set_timestamp);
 
 int gxio_mpipe_get_timestamp(gxio_mpipe_context_t *context,
-			     struct timespec *ts)
+			     struct timespec64 *ts)
 {
 	int ret;
 	cycles_t cycles_prev, cycles_now, clock_rate;
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index b4c488b65745..f5433e0e34e0 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -16,7 +16,6 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
-generic-y += irq_work.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
diff --git a/arch/tile/include/asm/ftrace.h b/arch/tile/include/asm/ftrace.h
index 13a9bb81a8ab..738d239b792f 100644
--- a/arch/tile/include/asm/ftrace.h
+++ b/arch/tile/include/asm/ftrace.h
@@ -23,6 +23,8 @@
 #ifndef __ASSEMBLY__
 extern void __mcount(void);
 
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
diff --git a/arch/tile/include/asm/irq_work.h b/arch/tile/include/asm/irq_work.h
new file mode 100644
index 000000000000..48af33a61a2c
--- /dev/null
+++ b/arch/tile/include/asm/irq_work.h
@@ -0,0 +1,14 @@
+#ifndef __ASM_IRQ_WORK_H
+#define __ASM_IRQ_WORK_H
+
+static inline bool arch_irq_work_has_interrupt(void)
+{
+#ifdef CONFIG_SMP
+	extern bool self_interrupt_ok;
+	return self_interrupt_ok;
+#else
+	return false;
+#endif
+}
+
+#endif /* __ASM_IRQ_WORK_H */
diff --git a/arch/tile/include/asm/smp.h b/arch/tile/include/asm/smp.h
index 9a326b64f7ae..735e7f144733 100644
--- a/arch/tile/include/asm/smp.h
+++ b/arch/tile/include/asm/smp.h
@@ -69,6 +69,7 @@ static inline int xy_to_cpu(int x, int y)
 #define MSG_TAG_STOP_CPU		2
 #define MSG_TAG_CALL_FUNCTION_MANY	3
 #define MSG_TAG_CALL_FUNCTION_SINGLE	4
+#define MSG_TAG_IRQ_WORK		5
 
 /* Hook for the generic smp_call_function_many() routine. */
 static inline void arch_send_call_function_ipi_mask(struct cpumask *mask)
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index 96c14c1430d8..f804c39a5e4d 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -26,7 +26,6 @@
  */
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
 	unsigned long		flags;		/* low level flags */
 	unsigned long		status;		/* thread-synchronous flags */
 	__u32			homecache_cpu;	/* CPU we are homecached on */
@@ -51,7 +50,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
-	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
 	.preempt_count	= INIT_PREEMPT_COUNT,	\
@@ -126,6 +124,7 @@ extern void _cpu_idle(void);
 #define TIF_NOTIFY_RESUME	8	/* callback before returning to user */
 #define TIF_SYSCALL_TRACEPOINT	9	/* syscall tracepoint instrumentation */
 #define TIF_POLLING_NRFLAG	10	/* idle is polling for TIF_NEED_RESCHED */
+#define TIF_NOHZ		11	/* in adaptive nohz mode */
 
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
@@ -138,14 +137,16 @@ extern void _cpu_idle(void);
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
 #define _TIF_SYSCALL_TRACEPOINT	(1<<TIF_SYSCALL_TRACEPOINT)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
+#define _TIF_NOHZ		(1<<TIF_NOHZ)
 
 /* Work to do on any return to user space. */
 #define _TIF_ALLWORK_MASK \
-  (_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SINGLESTEP|\
-   _TIF_ASYNC_TLB|_TIF_NOTIFY_RESUME)
+	(_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_SINGLESTEP | \
+	 _TIF_ASYNC_TLB | _TIF_NOTIFY_RESUME | _TIF_NOHZ)
 
 /* Work to do at syscall entry. */
-#define _TIF_SYSCALL_ENTRY_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT)
+#define _TIF_SYSCALL_ENTRY_WORK \
+	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ)
 
 /* Work to do at syscall exit. */
 #define _TIF_SYSCALL_EXIT_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT)
diff --git a/arch/tile/include/gxio/mpipe.h b/arch/tile/include/gxio/mpipe.h
index e37cf4f0cffd..73e83a187866 100644
--- a/arch/tile/include/gxio/mpipe.h
+++ b/arch/tile/include/gxio/mpipe.h
@@ -1830,7 +1830,7 @@ extern int gxio_mpipe_link_set_attr(gxio_mpipe_link_t *link, uint32_t attr,
  *  code.
  */
 extern int gxio_mpipe_get_timestamp(gxio_mpipe_context_t *context,
-				    struct timespec *ts);
+				    struct timespec64 *ts);
 
 /* Set the timestamp of mPIPE.
  *
@@ -1840,7 +1840,7 @@ extern int gxio_mpipe_get_timestamp(gxio_mpipe_context_t *context,
  *  code.
  */
 extern int gxio_mpipe_set_timestamp(gxio_mpipe_context_t *context,
-				    const struct timespec *ts);
+				    const struct timespec64 *ts);
 
 /* Adjust the timestamp of mPIPE.
  *
diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h
index dfcdeb61ba34..e0e6af4e783b 100644
--- a/arch/tile/include/hv/hypervisor.h
+++ b/arch/tile/include/hv/hypervisor.h
@@ -961,7 +961,11 @@ typedef enum {
   HV_INQ_TILES_HFH_CACHE       = 2,
 
   /** The set of tiles that can be legally used as a LOTAR for a PTE. */
-  HV_INQ_TILES_LOTAR           = 3
+  HV_INQ_TILES_LOTAR           = 3,
+
+  /** The set of "shared" driver tiles that the hypervisor may
+   *  periodically interrupt. */
+  HV_INQ_TILES_SHARED          = 4
 } HV_InqTileSet;
 
 /** Returns specific information about various sets of tiles within the
diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c
index 8c5abf2e4794..e8c2c04143cd 100644
--- a/arch/tile/kernel/compat_signal.c
+++ b/arch/tile/kernel/compat_signal.c
@@ -68,7 +68,7 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *to, const siginfo_t *fr
 	if (from->si_code < 0) {
 		err |= __put_user(from->si_pid, &to->si_pid);
 		err |= __put_user(from->si_uid, &to->si_uid);
-		err |= __put_user(ptr_to_compat(from->si_ptr), &to->si_ptr);
+		err |= __put_user(from->si_int, &to->si_int);
 	} else {
 		/*
 		 * First 32bits of unions are always present:
@@ -93,8 +93,7 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *to, const siginfo_t *fr
 			break;
 		case __SI_TIMER >> 16:
 			err |= __put_user(from->si_overrun, &to->si_overrun);
-			err |= __put_user(ptr_to_compat(from->si_ptr),
-					  &to->si_ptr);
+			err |= __put_user(from->si_int, &to->si_int);
 			break;
 			 /* This is not generated by the kernel as of now.  */
 		case __SI_RT >> 16:
@@ -110,19 +109,19 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *to, const siginfo_t *fr
 int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from)
 {
 	int err;
-	u32 ptr32;
 
 	if (!access_ok(VERIFY_READ, from, sizeof(struct compat_siginfo)))
 		return -EFAULT;
 
+	memset(to, 0, sizeof(*to));
+
 	err = __get_user(to->si_signo, &from->si_signo);
 	err |= __get_user(to->si_errno, &from->si_errno);
 	err |= __get_user(to->si_code, &from->si_code);
 
 	err |= __get_user(to->si_pid, &from->si_pid);
 	err |= __get_user(to->si_uid, &from->si_uid);
-	err |= __get_user(ptr32, &from->si_ptr);
-	to->si_ptr = compat_ptr(ptr32);
+	err |= __get_user(to->si_int, &from->si_int);
 
 	return err;
 }
@@ -196,19 +195,12 @@ int compat_setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	unsigned long restorer;
 	struct compat_rt_sigframe __user *frame;
 	int err = 0, sig = ksig->sig;
-	int usig;
 
 	frame = compat_get_sigframe(&ksig->ka, regs, sizeof(*frame));
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		goto err;
 
-	usig = current_thread_info()->exec_domain
-		&& current_thread_info()->exec_domain->signal_invmap
-		&& sig < 32
-		? current_thread_info()->exec_domain->signal_invmap[sig]
-		: sig;
-
 	/* Always write at least the signal number for the stack backtracer. */
 	if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
 		/* At sigreturn time, restore the callee-save registers too. */
@@ -243,7 +235,7 @@ int compat_setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	regs->ex1 = PL_ICS_EX1(USER_PL, 1); /* set crit sec in handler */
 	regs->sp = ptr_to_compat_reg(frame);
 	regs->lr = restorer;
-	regs->regs[0] = (unsigned long) usig;
+	regs->regs[0] = (unsigned long) sig;
 	regs->regs[1] = ptr_to_compat_reg(&frame->info);
 	regs->regs[2] = ptr_to_compat_reg(&frame->uc);
 	regs->flags |= PT_FLAGS_CALLER_SAVES;
diff --git a/arch/tile/kernel/ftrace.c b/arch/tile/kernel/ftrace.c
index 8d52d83cc516..0c0996175b1e 100644
--- a/arch/tile/kernel/ftrace.c
+++ b/arch/tile/kernel/ftrace.c
@@ -74,7 +74,11 @@ static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr,
 			create_JumpOff_X1(pcrel_by_instr);
 	}
 
-	if (addr == FTRACE_ADDR) {
+	/*
+	 * Also put { move r10, lr; jal ftrace_stub } in a bundle, which
+	 * is used to replace the instruction in address ftrace_call.
+	 */
+	if (addr == FTRACE_ADDR || addr == (unsigned long)ftrace_stub) {
 		/* opcode: or r10, lr, zero */
 		opcode_x0 =
 			create_Dest_X0(10) |
diff --git a/arch/tile/kernel/mcount_64.S b/arch/tile/kernel/mcount_64.S
index 3c2b8d5e1d1a..6c6702451962 100644
--- a/arch/tile/kernel/mcount_64.S
+++ b/arch/tile/kernel/mcount_64.S
@@ -81,7 +81,12 @@ STD_ENTRY(ftrace_caller)
 
 	/* arg1: self return address */
 	/* arg2: parent's return address */
-	{ move	r0, lr; move	r1, r10 }
+	/* arg3: ftrace_ops */
+	/* arg4: regs (but make it NULL) */
+	{ move	r0, lr;  moveli        r2, hw2_last(function_trace_op) }
+	{ move	r1, r10; shl16insli    r2, r2, hw1(function_trace_op) }
+	{ movei	r3, 0;   shl16insli    r2, r2, hw0(function_trace_op) }
+	ld	r2,r2
 
 	.global	ftrace_call
 ftrace_call:
diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c
index 325df47f114d..9475a74cd53a 100644
--- a/arch/tile/kernel/pci.c
+++ b/arch/tile/kernel/pci.c
@@ -339,6 +339,8 @@ int __init pcibios_init(void)
 			struct pci_bus *next_bus;
 			struct pci_dev *dev;
 
+			pci_bus_add_devices(root_bus);
+
 			list_for_each_entry(dev, &root_bus->devices, bus_list) {
 				/*
 				 * Find the PCI host controller, ie. the 1st
diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c
index 2c95f37ebbed..b1df847d0686 100644
--- a/arch/tile/kernel/pci_gx.c
+++ b/arch/tile/kernel/pci_gx.c
@@ -1030,6 +1030,8 @@ int __init pcibios_init(void)
 alloc_mem_map_failed:
 			break;
 		}
+
+		pci_bus_add_devices(root_bus);
 	}
 
 	return 0;
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 48e5773dd0b7..b403c2e3e263 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -27,6 +27,7 @@
 #include <linux/kernel.h>
 #include <linux/tracehook.h>
 #include <linux/signal.h>
+#include <linux/context_tracking.h>
 #include <asm/stack.h>
 #include <asm/switch_to.h>
 #include <asm/homecache.h>
@@ -474,6 +475,8 @@ int do_work_pending(struct pt_regs *regs, u32 thread_info_flags)
 	if (!user_mode(regs))
 		return 0;
 
+	user_exit();
+
 	/* Enable interrupts; they are disabled again on return to caller. */
 	local_irq_enable();
 
@@ -496,11 +499,12 @@ int do_work_pending(struct pt_regs *regs, u32 thread_info_flags)
 		tracehook_notify_resume(regs);
 		return 1;
 	}
-	if (thread_info_flags & _TIF_SINGLESTEP) {
+	if (thread_info_flags & _TIF_SINGLESTEP)
 		single_step_once(regs);
-		return 0;
-	}
-	panic("work_pending: bad flags %#x\n", thread_info_flags);
+
+	user_enter();
+
+	return 0;
 }
 
 unsigned long get_wchan(struct task_struct *p)
diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c
index de98c6ddf136..f84eed8243da 100644
--- a/arch/tile/kernel/ptrace.c
+++ b/arch/tile/kernel/ptrace.c
@@ -22,6 +22,7 @@
 #include <linux/regset.h>
 #include <linux/elf.h>
 #include <linux/tracehook.h>
+#include <linux/context_tracking.h>
 #include <asm/traps.h>
 #include <arch/chip.h>
 
@@ -252,12 +253,21 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 
 int do_syscall_trace_enter(struct pt_regs *regs)
 {
-	if (test_thread_flag(TIF_SYSCALL_TRACE)) {
+	u32 work = ACCESS_ONCE(current_thread_info()->flags);
+
+	/*
+	 * If TIF_NOHZ is set, we are required to call user_exit() before
+	 * doing anything that could touch RCU.
+	 */
+	if (work & _TIF_NOHZ)
+		user_exit();
+
+	if (work & _TIF_SYSCALL_TRACE) {
 		if (tracehook_report_syscall_entry(regs))
 			regs->regs[TREG_SYSCALL_NR] = -1;
 	}
 
-	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+	if (work & _TIF_SYSCALL_TRACEPOINT)
 		trace_sys_enter(regs, regs->regs[TREG_SYSCALL_NR]);
 
 	return regs->regs[TREG_SYSCALL_NR];
@@ -268,6 +278,12 @@ void do_syscall_trace_exit(struct pt_regs *regs)
 	long errno;
 
 	/*
+	 * We may come here right after calling schedule_user()
+	 * in which case we can be in RCU user mode.
+	 */
+	user_exit();
+
+	/*
 	 * The standard tile calling convention returns the value (or negative
 	 * errno) in r0, and zero (or positive errno) in r1.
 	 * It saves a couple of cycles on the hot path to do this work in
@@ -303,5 +319,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs)
 /* Handle synthetic interrupt delivered only by the simulator. */
 void __kprobes do_breakpoint(struct pt_regs* regs, int fault_num)
 {
+	enum ctx_state prev_state = exception_enter();
 	send_sigtrap(current, regs);
+	exception_exit(prev_state);
 }
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index f1f579914952..6873f006f7d0 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -32,6 +32,7 @@
 #include <linux/hugetlb.h>
 #include <linux/start_kernel.h>
 #include <linux/screen_info.h>
+#include <linux/tick.h>
 #include <asm/setup.h>
 #include <asm/sections.h>
 #include <asm/cacheflush.h>
@@ -773,7 +774,7 @@ static void __init zone_sizes_init(void)
 		 * though, there'll be no lowmem, so we just alloc_bootmem
 		 * the memmap.  There will be no percpu memory either.
 		 */
-		if (i != 0 && cpu_isset(i, isolnodes)) {
+		if (i != 0 && cpumask_test_cpu(i, &isolnodes)) {
 			node_memmap_pfn[i] =
 				alloc_bootmem_pfn(0, memmap_size, 0);
 			BUG_ON(node_percpu[i] != 0);
@@ -1390,6 +1391,28 @@ static int __init dataplane(char *str)
 
 early_param("dataplane", dataplane);
 
+#ifdef CONFIG_NO_HZ_FULL
+/* Warn if hypervisor shared cpus are marked as nohz_full. */
+static int __init check_nohz_full_cpus(void)
+{
+	struct cpumask shared;
+	int cpu;
+
+	if (hv_inquire_tiles(HV_INQ_TILES_SHARED,
+			     (HV_VirtAddr) shared.bits, sizeof(shared)) < 0) {
+		pr_warn("WARNING: No support for inquiring hv shared tiles\n");
+		return 0;
+	}
+	for_each_cpu(cpu, &shared) {
+		if (tick_nohz_full_cpu(cpu))
+			pr_warn("WARNING: nohz_full cpu %d receives hypervisor interrupts!\n",
+			       cpu);
+	}
+	return 0;
+}
+arch_initcall(check_nohz_full_cpus);
+#endif
+
 #ifdef CONFIG_CMDLINE_BOOL
 static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
 #endif
diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c
index 8a524e332c1a..87299a6cfec8 100644
--- a/arch/tile/kernel/signal.c
+++ b/arch/tile/kernel/signal.c
@@ -151,19 +151,12 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	unsigned long restorer;
 	struct rt_sigframe __user *frame;
 	int err = 0, sig = ksig->sig;
-	int usig;
 
 	frame = get_sigframe(&ksig->ka, regs, sizeof(*frame));
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		goto err;
 
-	usig = current_thread_info()->exec_domain
-		&& current_thread_info()->exec_domain->signal_invmap
-		&& sig < 32
-		? current_thread_info()->exec_domain->signal_invmap[sig]
-		: sig;
-
 	/* Always write at least the signal number for the stack backtracer. */
 	if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
 		/* At sigreturn time, restore the callee-save registers too. */
@@ -198,7 +191,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	regs->ex1 = PL_ICS_EX1(USER_PL, 1); /* set crit sec in handler */
 	regs->sp = (unsigned long) frame;
 	regs->lr = restorer;
-	regs->regs[0] = (unsigned long) usig;
+	regs->regs[0] = (unsigned long) sig;
 	regs->regs[1] = (unsigned long) &frame->info;
 	regs->regs[2] = (unsigned long) &frame->uc;
 	regs->flags |= PT_FLAGS_CALLER_SAVES;
diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c
index 862973074bf9..53f7b9def07b 100644
--- a/arch/tile/kernel/single_step.c
+++ b/arch/tile/kernel/single_step.c
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 #include <linux/err.h>
 #include <linux/prctl.h>
+#include <linux/context_tracking.h>
 #include <asm/cacheflush.h>
 #include <asm/traps.h>
 #include <asm/uaccess.h>
@@ -738,6 +739,7 @@ static DEFINE_PER_CPU(unsigned long, ss_saved_pc);
 
 void gx_singlestep_handle(struct pt_regs *regs, int fault_num)
 {
+	enum ctx_state prev_state = exception_enter();
 	unsigned long *ss_pc = this_cpu_ptr(&ss_saved_pc);
 	struct thread_info *info = (void *)current_thread_info();
 	int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP);
@@ -754,6 +756,7 @@ void gx_singlestep_handle(struct pt_regs *regs, int fault_num)
 		__insn_mtspr(SPR_SINGLE_STEP_CONTROL_K, control);
 		send_sigtrap(current, regs);
 	}
+	exception_exit(prev_state);
 }
 
 
diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c
index d3c4ed780ce2..07e3ff5cc740 100644
--- a/arch/tile/kernel/smp.c
+++ b/arch/tile/kernel/smp.c
@@ -18,6 +18,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/irq.h>
+#include <linux/irq_work.h>
 #include <linux/module.h>
 #include <asm/cacheflush.h>
 #include <asm/homecache.h>
@@ -33,6 +34,8 @@ EXPORT_SYMBOL(smp_topology);
 static unsigned long __iomem *ipi_mappings[NR_CPUS];
 #endif
 
+/* Does messaging work correctly to the local cpu? */
+bool self_interrupt_ok;
 
 /*
  * Top-level send_IPI*() functions to send messages to other cpus.
@@ -147,6 +150,10 @@ void evaluate_message(int tag)
 		generic_smp_call_function_single_interrupt();
 		break;
 
+	case MSG_TAG_IRQ_WORK: /* Invoke IRQ work */
+		irq_work_run();
+		break;
+
 	default:
 		panic("Unknown IPI message tag %d", tag);
 		break;
@@ -186,6 +193,15 @@ void flush_icache_range(unsigned long start, unsigned long end)
 EXPORT_SYMBOL(flush_icache_range);
 
 
+#ifdef CONFIG_IRQ_WORK
+void arch_irq_work_raise(void)
+{
+	if (arch_irq_work_has_interrupt())
+		send_IPI_single(smp_processor_id(), MSG_TAG_IRQ_WORK);
+}
+#endif
+
+
 /* Called when smp_send_reschedule() triggers IRQ_RESCHEDULE. */
 static irqreturn_t handle_reschedule_ipi(int irq, void *token)
 {
@@ -203,8 +219,22 @@ static struct irqaction resched_action = {
 
 void __init ipi_init(void)
 {
+	int cpu = smp_processor_id();
+	HV_Recipient recip = { .y = cpu_y(cpu), .x = cpu_x(cpu),
+			       .state = HV_TO_BE_SENT };
+	int tag = MSG_TAG_CALL_FUNCTION_SINGLE;
+
+	/*
+	 * Test if we can message ourselves for arch_irq_work_raise.
+	 * This functionality is only available in the Tilera hypervisor
+	 * in versions 4.3.4 and following.
+	 */
+	if (hv_send_message(&recip, 1, (HV_VirtAddr)&tag, sizeof(tag)) == 1)
+		self_interrupt_ok = true;
+	else
+		pr_warn("Older hypervisor: disabling fast irq_work_raise\n");
+
 #if CHIP_HAS_IPI()
-	int cpu;
 	/* Map IPI trigger MMIO addresses. */
 	for_each_possible_cpu(cpu) {
 		HV_Coord tile;
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c
index 7ff5afdbd3aa..c42dce50acd8 100644
--- a/arch/tile/kernel/stack.c
+++ b/arch/tile/kernel/stack.c
@@ -108,14 +108,15 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt)
 		   p->sp < PAGE_OFFSET && p->sp != 0) {
 		if (kbt->verbose)
 			pr_err("  <%s while in user mode>\n", fault);
-	} else if (kbt->verbose) {
-		pr_err("  (odd fault: pc %#lx, sp %#lx, ex1 %#lx?)\n",
-		       p->pc, p->sp, p->ex1);
-		p = NULL;
+	} else {
+		if (kbt->verbose)
+			pr_err("  (odd fault: pc %#lx, sp %#lx, ex1 %#lx?)\n",
+			       p->pc, p->sp, p->ex1);
+		return NULL;
 	}
-	if (!kbt->profile || ((1ULL << p->faultnum) & QUEUED_INTERRUPTS) == 0)
-		return p;
-	return NULL;
+	if (kbt->profile && ((1ULL << p->faultnum) & QUEUED_INTERRUPTS) != 0)
+		return NULL;
+	return p;
 }
 
 /* Is the pc pointing to a sigreturn trampoline? */
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
index d412b0856c0a..00178ecf9aea 100644
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -257,34 +257,34 @@ void update_vsyscall_tz(void)
 
 void update_vsyscall(struct timekeeper *tk)
 {
-	if (tk->tkr.clock != &cycle_counter_cs)
+	if (tk->tkr_mono.clock != &cycle_counter_cs)
 		return;
 
 	write_seqcount_begin(&vdso_data->tb_seq);
 
-	vdso_data->cycle_last		= tk->tkr.cycle_last;
-	vdso_data->mask			= tk->tkr.mask;
-	vdso_data->mult			= tk->tkr.mult;
-	vdso_data->shift		= tk->tkr.shift;
+	vdso_data->cycle_last		= tk->tkr_mono.cycle_last;
+	vdso_data->mask			= tk->tkr_mono.mask;
+	vdso_data->mult			= tk->tkr_mono.mult;
+	vdso_data->shift		= tk->tkr_mono.shift;
 
 	vdso_data->wall_time_sec	= tk->xtime_sec;
-	vdso_data->wall_time_snsec	= tk->tkr.xtime_nsec;
+	vdso_data->wall_time_snsec	= tk->tkr_mono.xtime_nsec;
 
 	vdso_data->monotonic_time_sec	= tk->xtime_sec
 					+ tk->wall_to_monotonic.tv_sec;
-	vdso_data->monotonic_time_snsec	= tk->tkr.xtime_nsec
+	vdso_data->monotonic_time_snsec	= tk->tkr_mono.xtime_nsec
 					+ ((u64)tk->wall_to_monotonic.tv_nsec
-						<< tk->tkr.shift);
+						<< tk->tkr_mono.shift);
 	while (vdso_data->monotonic_time_snsec >=
-					(((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
+					(((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
 		vdso_data->monotonic_time_snsec -=
-					((u64)NSEC_PER_SEC) << tk->tkr.shift;
+					((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
 		vdso_data->monotonic_time_sec++;
 	}
 
 	vdso_data->wall_time_coarse_sec	= tk->xtime_sec;
-	vdso_data->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >>
-						 tk->tkr.shift);
+	vdso_data->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >>
+						 tk->tkr_mono.shift);
 
 	vdso_data->monotonic_time_coarse_sec =
 		vdso_data->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c
index bf841ca517bb..312fc134c1cb 100644
--- a/arch/tile/kernel/traps.c
+++ b/arch/tile/kernel/traps.c
@@ -20,6 +20,7 @@
 #include <linux/reboot.h>
 #include <linux/uaccess.h>
 #include <linux/ptrace.h>
+#include <linux/context_tracking.h>
 #include <asm/stack.h>
 #include <asm/traps.h>
 #include <asm/setup.h>
@@ -253,6 +254,7 @@ static int do_bpt(struct pt_regs *regs)
 void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 		       unsigned long reason)
 {
+	enum ctx_state prev_state = exception_enter();
 	siginfo_t info = { 0 };
 	int signo, code;
 	unsigned long address = 0;
@@ -261,7 +263,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 
 	/* Handle breakpoints, etc. */
 	if (is_kernel && fault_num == INT_ILL && do_bpt(regs))
-		return;
+		goto done;
 
 	/* Re-enable interrupts, if they were previously enabled. */
 	if (!(regs->flags & PT_FLAGS_DISABLE_IRQ))
@@ -275,7 +277,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 		const char *name;
 		char buf[100];
 		if (fixup_exception(regs))  /* ILL_TRANS or UNALIGN_DATA */
-			return;
+			goto done;
 		if (fault_num >= 0 &&
 		    fault_num < ARRAY_SIZE(int_name) &&
 		    int_name[fault_num] != NULL)
@@ -294,7 +296,6 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 			 fault_num, name, regs->pc, buf);
 		show_regs(regs);
 		do_exit(SIGKILL);  /* FIXME: implement i386 die() */
-		return;
 	}
 
 	switch (fault_num) {
@@ -308,7 +309,6 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 			pr_err("Unreadable instruction for INT_ILL: %#lx\n",
 			       regs->pc);
 			do_exit(SIGKILL);
-			return;
 		}
 		if (!special_ill(instr, &signo, &code)) {
 			signo = SIGILL;
@@ -319,7 +319,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 	case INT_GPV:
 #if CHIP_HAS_TILE_DMA()
 		if (retry_gpv(reason))
-			return;
+			goto done;
 #endif
 		/*FALLTHROUGH*/
 	case INT_UDN_ACCESS:
@@ -346,7 +346,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 			if (!state ||
 			    (void __user *)(regs->pc) != state->buffer) {
 				single_step_once(regs);
-				return;
+				goto done;
 			}
 		}
 #endif
@@ -380,7 +380,6 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 #endif
 	default:
 		panic("Unexpected do_trap interrupt number %d", fault_num);
-		return;
 	}
 
 	info.si_signo = signo;
@@ -391,6 +390,9 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 	if (signo != SIGTRAP)
 		trace_unhandled_signal("trap", regs, address, signo);
 	force_sig_info(signo, &info, current);
+
+done:
+	exception_exit(prev_state);
 }
 
 void kernel_double_fault(int dummy, ulong pc, ulong lr, ulong sp, ulong r52)
diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c
index 7d9a83be0aca..d075f92ccee0 100644
--- a/arch/tile/kernel/unaligned.c
+++ b/arch/tile/kernel/unaligned.c
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/compat.h>
 #include <linux/prctl.h>
+#include <linux/context_tracking.h>
 #include <asm/cacheflush.h>
 #include <asm/traps.h>
 #include <asm/uaccess.h>
@@ -1448,6 +1449,7 @@ void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
 
 void do_unaligned(struct pt_regs *regs, int vecnum)
 {
+	enum ctx_state prev_state = exception_enter();
 	tilegx_bundle_bits __user  *pc;
 	tilegx_bundle_bits bundle;
 	struct thread_info *info = current_thread_info();
@@ -1487,12 +1489,11 @@ void do_unaligned(struct pt_regs *regs, int vecnum)
 						(int)unaligned_fixup,
 						(unsigned long long)regs->ex1,
 						(unsigned long long)regs->pc);
-				return;
+			} else {
+				/* Not fixable. Go panic. */
+				panic("Unalign exception in Kernel. pc=%lx",
+				      regs->pc);
 			}
-			/* Not fixable. Go panic. */
-			panic("Unalign exception in Kernel. pc=%lx",
-			      regs->pc);
-			return;
 		} else {
 			/*
 			 * Try to fix the exception. If we can't, panic the
@@ -1501,8 +1502,8 @@ void do_unaligned(struct pt_regs *regs, int vecnum)
 			bundle = GX_INSN_BSWAP(
 				*((tilegx_bundle_bits *)(regs->pc)));
 			jit_bundle_gen(regs, bundle, align_ctl);
-			return;
 		}
+		goto done;
 	}
 
 	/*
@@ -1526,7 +1527,7 @@ void do_unaligned(struct pt_regs *regs, int vecnum)
 
 		trace_unhandled_signal("unaligned fixup trap", regs, 0, SIGBUS);
 		force_sig_info(info.si_signo, &info, current);
-		return;
+		goto done;
 	}
 
 
@@ -1543,7 +1544,7 @@ void do_unaligned(struct pt_regs *regs, int vecnum)
 		trace_unhandled_signal("segfault in unalign fixup", regs,
 				       (unsigned long)info.si_addr, SIGSEGV);
 		force_sig_info(info.si_signo, &info, current);
-		return;
+		goto done;
 	}
 
 	if (!info->unalign_jit_base) {
@@ -1578,7 +1579,7 @@ void do_unaligned(struct pt_regs *regs, int vecnum)
 
 		if (IS_ERR((void __force *)user_page)) {
 			pr_err("Out of kernel pages trying do_mmap\n");
-			return;
+			goto done;
 		}
 
 		/* Save the address in the thread_info struct */
@@ -1591,6 +1592,9 @@ void do_unaligned(struct pt_regs *regs, int vecnum)
 
 	/* Generate unalign JIT */
 	jit_bundle_gen(regs, GX_INSN_BSWAP(bundle), align_ctl);
+
+done:
+	exception_exit(prev_state);
 }
 
 #endif /* __tilegx__ */
diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c
index 23f044e8a7ab..f7ddae3725a4 100644
--- a/arch/tile/mm/elf.c
+++ b/arch/tile/mm/elf.c
@@ -17,6 +17,7 @@
 #include <linux/binfmts.h>
 #include <linux/compat.h>
 #include <linux/mman.h>
+#include <linux/file.h>
 #include <linux/elf.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -39,30 +40,34 @@ static void sim_notify_exec(const char *binary_name)
 
 static int notify_exec(struct mm_struct *mm)
 {
+	int ret = 0;
 	char *buf, *path;
 	struct vm_area_struct *vma;
+	struct file *exe_file;
 
 	if (!sim_is_simulator())
 		return 1;
 
-	if (mm->exe_file == NULL)
-		return 0;
-
-	for (vma = current->mm->mmap; ; vma = vma->vm_next) {
-		if (vma == NULL)
-			return 0;
-		if (vma->vm_file == mm->exe_file)
-			break;
-	}
-
 	buf = (char *) __get_free_page(GFP_KERNEL);
 	if (buf == NULL)
 		return 0;
 
-	path = d_path(&mm->exe_file->f_path, buf, PAGE_SIZE);
-	if (IS_ERR(path)) {
-		free_page((unsigned long)buf);
-		return 0;
+	exe_file = get_mm_exe_file(mm);
+	if (exe_file == NULL)
+		goto done_free;
+
+	path = d_path(&exe_file->f_path, buf, PAGE_SIZE);
+	if (IS_ERR(path))
+		goto done_put;
+
+	down_read(&mm->mmap_sem);
+	for (vma = current->mm->mmap; ; vma = vma->vm_next) {
+		if (vma == NULL) {
+			up_read(&mm->mmap_sem);
+			goto done_put;
+		}
+		if (vma->vm_file == exe_file)
+			break;
 	}
 
 	/*
@@ -80,14 +85,20 @@ static int notify_exec(struct mm_struct *mm)
 			__insn_mtspr(SPR_SIM_CONTROL,
 				     (SIM_CONTROL_DLOPEN
 				      | (c << _SIM_CONTROL_OPERATOR_BITS)));
-			if (c == '\0')
+			if (c == '\0') {
+				ret = 1; /* success */
 				break;
+			}
 		}
 	}
+	up_read(&mm->mmap_sem);
 
 	sim_notify_exec(path);
+done_put:
+	fput(exe_file);
+done_free:
 	free_page((unsigned long)buf);
-	return 1;
+	return ret;
 }
 
 /* Notify a running simulator, if any, that we loaded an interpreter. */
@@ -109,8 +120,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
 	struct mm_struct *mm = current->mm;
 	int retval = 0;
 
-	down_write(&mm->mmap_sem);
-
 	/*
 	 * Notify the simulator that an exec just occurred.
 	 * If we can't find the filename of the mapping, just use
@@ -119,6 +128,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
 	if (!notify_exec(mm))
 		sim_notify_exec(bprm->filename);
 
+	down_write(&mm->mmap_sem);
+
 	retval = setup_vdso_pages();
 
 #ifndef __tilegx__
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 0f61a73534e6..e83cc999da02 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -35,6 +35,7 @@
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
 #include <linux/kdebug.h>
+#include <linux/context_tracking.h>
 
 #include <asm/pgalloc.h>
 #include <asm/sections.h>
@@ -702,6 +703,7 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
 		   unsigned long address, unsigned long write)
 {
 	int is_page_fault;
+	enum ctx_state prev_state = exception_enter();
 
 #ifdef CONFIG_KPROBES
 	/*
@@ -711,7 +713,7 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
 	 */
 	if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1,
 		       regs->faultnum, SIGSEGV) == NOTIFY_STOP)
-		return;
+		goto done;
 #endif
 
 #ifdef __tilegx__
@@ -750,7 +752,6 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
 				 current->comm, current->pid, pc, address);
 			show_regs(regs);
 			do_group_exit(SIGKILL);
-			return;
 		}
 	}
 #else
@@ -834,12 +835,15 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
 			async->is_fault = is_page_fault;
 			async->is_write = write;
 			async->address = address;
-			return;
+			goto done;
 		}
 	}
 #endif
 
 	handle_page_fault(regs, fault_num, is_page_fault, address, write);
+
+done:
+	exception_exit(prev_state);
 }
 
 
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index ace32d7d3864..5bd252e3fdc5 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -233,9 +233,12 @@ static pgprot_t __init init_pgprot(ulong address)
 	if (kdata_huge)
 		return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH);
 
-	/* We map the aliased pages of permanent text inaccessible. */
+	/*
+	 * We map the aliased pages of permanent text so we can
+	 * update them if necessary, for ftrace, etc.
+	 */
 	if (address < (ulong) _sinittext - CODE_DELTA)
-		return PAGE_NONE;
+		return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH);
 
 	/* We map read-only data non-coherent for performance. */
 	if ((address >= (ulong) __start_rodata &&
diff --git a/arch/um/Kconfig.um b/arch/um/Kconfig.um
index a7520c90f62d..6e67847f5272 100644
--- a/arch/um/Kconfig.um
+++ b/arch/um/Kconfig.um
@@ -95,48 +95,6 @@ config MAGIC_SYSRQ
 	  The keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
 	  unless you really know what this hack does.
 
-config SMP
-	bool "Symmetric multi-processing support"
-	default n
-	depends on BROKEN
-	help
-	  This option enables UML SMP support.
-	  It is NOT related to having a real SMP box. Not directly, at least.
-
-	  UML implements virtual SMP by allowing as many processes to run
-	  simultaneously on the host as there are virtual processors configured.
-
-	  Obviously, if the host is a uniprocessor, those processes will
-	  timeshare, but, inside UML, will appear to be running simultaneously.
-	  If the host is a multiprocessor, then UML processes may run
-	  simultaneously, depending on the host scheduler.
-
-	  This, however, is supported only in TT mode. So, if you use the SKAS
-	  patch on your host, switching to TT mode and enabling SMP usually
-	  gives	you worse performances.
-	  Also, since the support for SMP has been under-developed, there could
-	  be some bugs being exposed by enabling SMP.
-
-	  If you don't know what to do, say N.
-
-config NR_CPUS
-	int "Maximum number of CPUs (2-32)"
-	range 2 32
-	depends on SMP
-	default "32"
-
-config HIGHMEM
-	bool "Highmem support"
-	depends on !64BIT && BROKEN
-	default n
-	help
-	  This was used to allow UML to run with big amounts of memory.
-	  Currently it is unstable, so if unsure say N.
-
-	  To use big amounts of memory, it is recommended enable static
-	  linking (i.e. CONFIG_STATIC_LINK) - this should allow the
-	  guest to use up to 2.75G of memory.
-
 config KERNEL_STACK_ORDER
 	int "Kernel stack size order"
 	default 1 if 64BIT
@@ -155,3 +113,8 @@ config MMAPPER
 
 config NO_DMA
 	def_bool y
+
+config PGTABLE_LEVELS
+	int
+	default 3 if 3_LEVEL_PGTABLES
+	default 2
diff --git a/arch/um/Makefile b/arch/um/Makefile
index e4b1a9639c4d..17d4460b1af3 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -43,8 +43,8 @@ endif
 
 HOST_DIR := arch/$(HEADER_ARCH)
 
-include $(srctree)/$(ARCH_DIR)/Makefile-skas
-include $(srctree)/$(HOST_DIR)/Makefile.um
+include $(ARCH_DIR)/Makefile-skas
+include $(HOST_DIR)/Makefile.um
 
 core-y += $(HOST_DIR)/um/
 
@@ -73,7 +73,7 @@ USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -D__KERNEL__,,\
 	$(filter -I%,$(CFLAGS)) -D_FILE_OFFSET_BITS=64 -idirafter include
 
 #This will adjust *FLAGS accordingly to the platform.
-include $(srctree)/$(ARCH_DIR)/Makefile-os-$(OS)
+include $(ARCH_DIR)/Makefile-os-$(OS)
 
 KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/include \
 		   -I$(srctree)/$(HOST_DIR)/include/uapi \
diff --git a/arch/um/Makefile-ia64 b/arch/um/Makefile-ia64
deleted file mode 100644
index f84dc23b0f6e..000000000000
--- a/arch/um/Makefile-ia64
+++ /dev/null
@@ -1 +0,0 @@
-START_ADDR = 0x1000000000000000
diff --git a/arch/um/Makefile-ppc b/arch/um/Makefile-ppc
deleted file mode 100644
index 66fd2003e165..000000000000
--- a/arch/um/Makefile-ppc
+++ /dev/null
@@ -1,9 +0,0 @@
-ifeq ($(CONFIG_HOST_2G_2G), y)
-START_ADDR = 0x80000000
-else
-START_ADDR = 0xc0000000
-endif
-ARCH_CFLAGS = -U__powerpc__ -D__UM_PPC__
-
-# The arch is ppc, but the elf32 name is powerpc
-ELF_SUBARCH = powerpc
diff --git a/arch/um/include/asm/fixmap.h b/arch/um/include/asm/fixmap.h
index 3094ea3c73b0..1761fd75bf13 100644
--- a/arch/um/include/asm/fixmap.h
+++ b/arch/um/include/asm/fixmap.h
@@ -33,10 +33,6 @@
  * fix-mapped?
  */
 enum fixed_addresses {
-#ifdef CONFIG_HIGHMEM
-	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
-	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
-#endif
 	__end_of_fixed_addresses
 };
 
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 2324b624f195..18eb9924dda3 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -47,11 +47,7 @@ extern unsigned long end_iomem;
 #define VMALLOC_OFFSET	(__va_space)
 #define VMALLOC_START ((end_iomem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))
 #define PKMAP_BASE ((FIXADDR_START - LAST_PKMAP * PAGE_SIZE) & PMD_MASK)
-#ifdef CONFIG_HIGHMEM
-# define VMALLOC_END	(PKMAP_BASE-2*PAGE_SIZE)
-#else
-# define VMALLOC_END	(FIXADDR_START-2*PAGE_SIZE)
-#endif
+#define VMALLOC_END	(FIXADDR_START-2*PAGE_SIZE)
 #define MODULES_VADDR	VMALLOC_START
 #define MODULES_END	VMALLOC_END
 #define MODULES_LEN	(MODULES_VADDR - MODULES_END)
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index cbc5edd5a901..2d1e0dd5bb0b 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -98,16 +98,8 @@ struct cpuinfo_um {
 
 extern struct cpuinfo_um boot_cpu_data;
 
-#define my_cpu_data		cpu_data[smp_processor_id()]
-
-#ifdef CONFIG_SMP
-extern struct cpuinfo_um cpu_data[];
-#define current_cpu_data cpu_data[smp_processor_id()]
-#else
 #define cpu_data (&boot_cpu_data)
 #define current_cpu_data boot_cpu_data
-#endif
-
 
 #define KSTK_REG(tsk, reg) get_thread_reg(reg, &tsk->thread.switch_buf)
 extern unsigned long get_wchan(struct task_struct *p);
diff --git a/arch/um/include/asm/smp.h b/arch/um/include/asm/smp.h
index e4507938d8cf..9c3be355ed01 100644
--- a/arch/um/include/asm/smp.h
+++ b/arch/um/include/asm/smp.h
@@ -1,32 +1,6 @@
 #ifndef __UM_SMP_H
 #define __UM_SMP_H
 
-#ifdef CONFIG_SMP
-
-#include <linux/bitops.h>
-#include <asm/current.h>
-#include <linux/cpumask.h>
-
-#define raw_smp_processor_id() (current_thread->cpu)
-
-#define cpu_logical_map(n) (n)
-#define cpu_number_map(n) (n)
-extern int hard_smp_processor_id(void);
-#define NO_PROC_ID -1
-
-extern int ncpus;
-
-
-static inline void smp_cpus_done(unsigned int maxcpus)
-{
-}
-
-extern struct task_struct *idle_threads[NR_CPUS];
-
-#else
-
 #define hard_smp_processor_id()		0
 
 #endif
-
-#endif
diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h
index e04114c4fcd9..b30c85b141d9 100644
--- a/arch/um/include/asm/thread_info.h
+++ b/arch/um/include/asm/thread_info.h
@@ -14,7 +14,6 @@
 
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
 	unsigned long		flags;		/* low level flags */
 	__u32			cpu;		/* current CPU */
 	int			preempt_count;  /* 0 => preemptable,
@@ -28,7 +27,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task =		&tsk,			\
-	.exec_domain =	&default_exec_domain,	\
 	.flags =		0,		\
 	.cpu =		0,			\
 	.preempt_count = INIT_PREEMPT_COUNT,	\
diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h
index 41c8c774ec10..ca1843e1df15 100644
--- a/arch/um/include/shared/as-layout.h
+++ b/arch/um/include/shared/as-layout.h
@@ -56,6 +56,7 @@ extern unsigned long brk_start;
 extern unsigned long host_task_size;
 
 extern int linux_main(int argc, char **argv);
+extern void uml_finishsetup(void);
 
 struct siginfo;
 extern void (*sig_info[])(int, struct siginfo *si, struct uml_pt_regs *);
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 08eec0b691b0..d824528f6f62 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -174,7 +174,6 @@ extern unsigned long long os_makedev(unsigned major, unsigned minor);
 
 /* start_up.c */
 extern void os_early_checks(void);
-extern void can_do_skas(void);
 extern void os_check_bugs(void);
 extern void check_host_supports_tls(int *supports_tls, int *tls_min);
 
@@ -187,7 +186,6 @@ extern int os_process_parent(int pid);
 extern void os_stop_process(int pid);
 extern void os_kill_process(int pid, int reap_child);
 extern void os_kill_ptraced_process(int pid, int reap_child);
-extern long os_ptrace_ldt(long pid, long addr, long data);
 
 extern int os_getpid(void);
 extern int os_getpgrp(void);
diff --git a/arch/um/include/shared/skas/proc_mm.h b/arch/um/include/shared/skas/proc_mm.h
deleted file mode 100644
index 902809209603..000000000000
--- a/arch/um/include/shared/skas/proc_mm.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SKAS_PROC_MM_H
-#define __SKAS_PROC_MM_H
-
-#define MM_MMAP 54
-#define MM_MUNMAP 55
-#define MM_MPROTECT 56
-#define MM_COPY_SEGMENTS 57
-
-struct mm_mmap {
-	unsigned long addr;
-	unsigned long len;
-	unsigned long prot;
-	unsigned long flags;
-	unsigned long fd;
-	unsigned long offset;
-};
-
-struct mm_munmap {
-	unsigned long addr;
-	unsigned long len;
-};
-
-struct mm_mprotect {
-	unsigned long addr;
-	unsigned long len;
-	unsigned int prot;
-};
-
-struct proc_mm_op {
-	int op;
-	union {
-		struct mm_mmap mmap;
-		struct mm_munmap munmap;
-	        struct mm_mprotect mprotect;
-		int copy_segments;
-	} u;
-};
-
-#endif
diff --git a/arch/um/include/shared/skas/skas.h b/arch/um/include/shared/skas/skas.h
index c45df961c874..911f3c45ad1f 100644
--- a/arch/um/include/shared/skas/skas.h
+++ b/arch/um/include/shared/skas/skas.h
@@ -9,13 +9,10 @@
 #include <sysdep/ptrace.h>
 
 extern int userspace_pid[];
-extern int proc_mm, ptrace_faultinfo, ptrace_ldt;
-extern int skas_needs_stub;
 
 extern int user_thread(unsigned long stack, int flags);
 extern void new_thread_handler(void);
 extern void handle_syscall(struct uml_pt_regs *regs);
-extern int new_mm(unsigned long stack);
 extern long execute_syscall_skas(void *r);
 extern unsigned long current_stub_stack(void);
 
diff --git a/arch/um/include/shared/skas_ptrace.h b/arch/um/include/shared/skas_ptrace.h
deleted file mode 100644
index 630a9c92b93c..000000000000
--- a/arch/um/include/shared/skas_ptrace.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* 
- * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __SKAS_PTRACE_H
-#define __SKAS_PTRACE_H
-
-#define PTRACE_FAULTINFO 52
-#define PTRACE_SWITCH_MM 55
-
-#include <sysdep/skas_ptrace.h>
-
-#endif
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index 2d840a070c8b..a6a5e42caaef 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -12,8 +12,8 @@ clean-files :=
 
 obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
 	physmem.o process.o ptrace.o reboot.o sigio.o \
-	signal.o smp.o syscall.o sysrq.o time.o tlb.o trap.o \
-	um_arch.o umid.o maccess.o skas/
+	signal.o syscall.o sysrq.o time.o tlb.o trap.o \
+	um_arch.o umid.o maccess.o kmsg_dump.o skas/
 
 obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o
 obj-$(CONFIG_GPROF)	+= gprof_syms.o
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 1d8505b1e290..23cb9350d47e 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -35,9 +35,6 @@ void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
 	struct irq_fd *irq_fd;
 	int n;
 
-	if (smp_sigio_handler())
-		return;
-
 	while (1) {
 		n = os_waiting_for_events(active_fds);
 		if (n <= 0) {
diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c
new file mode 100644
index 000000000000..407d49251d6f
--- /dev/null
+++ b/arch/um/kernel/kmsg_dump.c
@@ -0,0 +1,43 @@
+#include <linux/kmsg_dump.h>
+#include <linux/console.h>
+#include <shared/init.h>
+#include <shared/kern.h>
+#include <os.h>
+
+static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
+				enum kmsg_dump_reason reason)
+{
+	static char line[1024];
+
+	size_t len = 0;
+	bool con_available = false;
+
+	/* only dump kmsg when no console is available */
+	if (!console_trylock())
+		return;
+
+	if (console_drivers != NULL)
+		con_available = true;
+
+	console_unlock();
+
+	if (con_available == true)
+		return;
+
+	printf("kmsg_dump:\n");
+	while (kmsg_dump_get_line(dumper, true, line, sizeof(line), &len)) {
+		line[len] = '\0';
+		printf("%s", line);
+	}
+}
+
+static struct kmsg_dumper kmsg_dumper = {
+	.dump = kmsg_dumper_stdout
+};
+
+int __init kmsg_dumper_stdout_init(void)
+{
+	return kmsg_dump_register(&kmsg_dumper);
+}
+
+__uml_postsetup(kmsg_dumper_stdout_init);
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 8636e905426f..b2a2dff50b4e 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -38,19 +38,6 @@ int kmalloc_ok = 0;
 /* Used during early boot */
 static unsigned long brk_end;
 
-#ifdef CONFIG_HIGHMEM
-static void setup_highmem(unsigned long highmem_start,
-			  unsigned long highmem_len)
-{
-	unsigned long highmem_pfn;
-	int i;
-
-	highmem_pfn = __pa(highmem_start) >> PAGE_SHIFT;
-	for (i = 0; i < highmem_len >> PAGE_SHIFT; i++)
-		free_highmem_page(&mem_map[highmem_pfn + i]);
-}
-#endif
-
 void __init mem_init(void)
 {
 	/* clear the zero-page */
@@ -67,9 +54,6 @@ void __init mem_init(void)
 	/* this will put all low memory onto the freelists */
 	free_all_bootmem();
 	max_low_pfn = totalram_pages;
-#ifdef CONFIG_HIGHMEM
-	setup_highmem(end_iomem, highmem);
-#endif
 	max_pfn = totalram_pages;
 	mem_init_print_info(NULL);
 	kmalloc_ok = 1;
@@ -127,49 +111,6 @@ static void __init fixrange_init(unsigned long start, unsigned long end,
 	}
 }
 
-#ifdef CONFIG_HIGHMEM
-pte_t *kmap_pte;
-pgprot_t kmap_prot;
-
-#define kmap_get_fixmap_pte(vaddr)					\
-	pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)),\
-				     (vaddr)), (vaddr))
-
-static void __init kmap_init(void)
-{
-	unsigned long kmap_vstart;
-
-	/* cache the first kmap pte */
-	kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
-	kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
-
-	kmap_prot = PAGE_KERNEL;
-}
-
-static void __init init_highmem(void)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	unsigned long vaddr;
-
-	/*
-	 * Permanent kmaps:
-	 */
-	vaddr = PKMAP_BASE;
-	fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, swapper_pg_dir);
-
-	pgd = swapper_pg_dir + pgd_index(vaddr);
-	pud = pud_offset(pgd, vaddr);
-	pmd = pmd_offset(pud, vaddr);
-	pte = pte_offset_kernel(pmd, vaddr);
-	pkmap_page_table = pte;
-
-	kmap_init();
-}
-#endif /* CONFIG_HIGHMEM */
-
 static void __init fixaddr_user_init( void)
 {
 #ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA
@@ -211,9 +152,6 @@ void __init paging_init(void)
 
 	zones_size[ZONE_NORMAL] = (end_iomem >> PAGE_SHIFT) -
 		(uml_physmem >> PAGE_SHIFT);
-#ifdef CONFIG_HIGHMEM
-	zones_size[ZONE_HIGHMEM] = highmem >> PAGE_SHIFT;
-#endif
 	free_area_init(zones_size);
 
 	/*
@@ -224,10 +162,6 @@ void __init paging_init(void)
 	fixrange_init(vaddr, FIXADDR_TOP, swapper_pg_dir);
 
 	fixaddr_user_init();
-
-#ifdef CONFIG_HIGHMEM
-	init_highmem();
-#endif
 }
 
 /*
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
index 549ecf3f5857..9034fc8056b4 100644
--- a/arch/um/kernel/physmem.c
+++ b/arch/um/kernel/physmem.c
@@ -57,22 +57,51 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len,
 
 extern int __syscall_stub_start;
 
+/**
+ * setup_physmem() - Setup physical memory for UML
+ * @start:	Start address of the physical kernel memory,
+ *		i.e start address of the executable image.
+ * @reserve_end:	end address of the physical kernel memory.
+ * @len:	Length of total physical memory that should be mapped/made
+ *		available, in bytes.
+ * @highmem:	Number of highmem bytes that should be mapped/made available.
+ *
+ * Creates an unlinked temporary file of size (len + highmem) and memory maps
+ * it on the last executable image address (uml_reserved).
+ *
+ * The offset is needed as the length of the total physical memory
+ * (len + highmem) includes the size of the memory used be the executable image,
+ * but the mapped-to address is the last address of the executable image
+ * (uml_reserved == end address of executable image).
+ *
+ * The memory mapped memory of the temporary file is used as backing memory
+ * of all user space processes/kernel tasks.
+ */
 void __init setup_physmem(unsigned long start, unsigned long reserve_end,
 			  unsigned long len, unsigned long long highmem)
 {
 	unsigned long reserve = reserve_end - start;
-	int pfn = PFN_UP(__pa(reserve_end));
-	int delta = (len - reserve) >> PAGE_SHIFT;
-	int err, offset, bootmap_size;
+	unsigned long pfn = PFN_UP(__pa(reserve_end));
+	unsigned long delta = (len - reserve) >> PAGE_SHIFT;
+	unsigned long offset, bootmap_size;
+	long map_size;
+	int err;
+
+	offset = uml_reserved - uml_physmem;
+	map_size = len - offset;
+	if(map_size <= 0) {
+		printf("Too few physical memory! Needed=%d, given=%d\n",
+		       offset, len);
+		exit(1);
+	}
 
 	physmem_fd = create_mem_file(len + highmem);
 
-	offset = uml_reserved - uml_physmem;
 	err = os_map_memory((void *) uml_reserved, physmem_fd, offset,
-			    len - offset, 1, 1, 1);
+			    map_size, 1, 1, 1);
 	if (err < 0) {
 		printf("setup_physmem - mapping %ld bytes of memory at 0x%p "
-		       "failed - errno = %d\n", len - offset,
+		       "failed - errno = %d\n", map_size,
 		       (void *) uml_reserved, err);
 		exit(1);
 	}
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index f17bca8ed2ce..68b9119841cd 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -259,17 +259,6 @@ int strlen_user_proc(char __user *str)
 	return strlen_user(str);
 }
 
-int smp_sigio_handler(void)
-{
-#ifdef CONFIG_SMP
-	int cpu = current_thread_info()->cpu;
-	IPI_handler(cpu);
-	if (cpu != 0)
-		return 1;
-#endif
-	return 0;
-}
-
 int cpu(void)
 {
 	return current_thread_info()->cpu;
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index 62435ef003d9..174ee5017264 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -8,9 +8,6 @@
 #include <linux/sched.h>
 #include <linux/tracehook.h>
 #include <asm/uaccess.h>
-#include <skas_ptrace.h>
-
-
 
 void user_enable_single_step(struct task_struct *child)
 {
@@ -104,35 +101,6 @@ long arch_ptrace(struct task_struct *child, long request,
 		ret = ptrace_set_thread_area(child, addr, vp);
 		break;
 
-	case PTRACE_FAULTINFO: {
-		/*
-		 * Take the info from thread->arch->faultinfo,
-		 * but transfer max. sizeof(struct ptrace_faultinfo).
-		 * On i386, ptrace_faultinfo is smaller!
-		 */
-		ret = copy_to_user(p, &child->thread.arch.faultinfo,
-				   sizeof(struct ptrace_faultinfo)) ?
-			-EIO : 0;
-		break;
-	}
-
-#ifdef PTRACE_LDT
-	case PTRACE_LDT: {
-		struct ptrace_ldt ldt;
-
-		if (copy_from_user(&ldt, p, sizeof(ldt))) {
-			ret = -EIO;
-			break;
-		}
-
-		/*
-		 * This one is confusing, so just punt and return -EIO for
-		 * now
-		 */
-		ret = -EIO;
-		break;
-	}
-#endif
 	default:
 		ret = ptrace_request(child, request, addr, data);
 		if (ret == -EIO)
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index ced8903921ae..9bdf67a092a5 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -15,28 +15,21 @@ void (*pm_power_off)(void);
 
 static void kill_off_processes(void)
 {
-	if (proc_mm)
-		/*
-		 * FIXME: need to loop over userspace_pids
-		 */
-		os_kill_ptraced_process(userspace_pid[0], 1);
-	else {
-		struct task_struct *p;
-		int pid;
-
-		read_lock(&tasklist_lock);
-		for_each_process(p) {
-			struct task_struct *t;
-
-			t = find_lock_task_mm(p);
-			if (!t)
-				continue;
-			pid = t->mm->context.id.u.pid;
-			task_unlock(t);
-			os_kill_ptraced_process(pid, 1);
-		}
-		read_unlock(&tasklist_lock);
+	struct task_struct *p;
+	int pid;
+
+	read_lock(&tasklist_lock);
+	for_each_process(p) {
+		struct task_struct *t;
+
+		t = find_lock_task_mm(p);
+		if (!t)
+			continue;
+		pid = t->mm->context.id.u.pid;
+		task_unlock(t);
+		os_kill_ptraced_process(pid, 1);
 	}
+	read_unlock(&tasklist_lock);
 }
 
 void uml_cleanup(void)
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 007d5503f49b..94abdcc1d6ad 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -54,35 +54,22 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
 	unsigned long stack = 0;
 	int ret = -ENOMEM;
 
-	if (skas_needs_stub) {
-		stack = get_zeroed_page(GFP_KERNEL);
-		if (stack == 0)
-			goto out;
-	}
+	stack = get_zeroed_page(GFP_KERNEL);
+	if (stack == 0)
+		goto out;
 
 	to_mm->id.stack = stack;
 	if (current->mm != NULL && current->mm != &init_mm)
 		from_mm = &current->mm->context;
 
-	if (proc_mm) {
-		ret = new_mm(stack);
-		if (ret < 0) {
-			printk(KERN_ERR "init_new_context_skas - "
-			       "new_mm failed, errno = %d\n", ret);
-			goto out_free;
-		}
-		to_mm->id.u.mm_fd = ret;
-	}
-	else {
-		if (from_mm)
-			to_mm->id.u.pid = copy_context_skas0(stack,
-							     from_mm->id.u.pid);
-		else to_mm->id.u.pid = start_userspace(stack);
-
-		if (to_mm->id.u.pid < 0) {
-			ret = to_mm->id.u.pid;
-			goto out_free;
-		}
+	if (from_mm)
+		to_mm->id.u.pid = copy_context_skas0(stack,
+						     from_mm->id.u.pid);
+	else to_mm->id.u.pid = start_userspace(stack);
+
+	if (to_mm->id.u.pid < 0) {
+		ret = to_mm->id.u.pid;
+		goto out_free;
 	}
 
 	ret = init_new_ldt(to_mm, from_mm);
@@ -105,9 +92,6 @@ void uml_setup_stubs(struct mm_struct *mm)
 {
 	int err, ret;
 
-	if (!skas_needs_stub)
-		return;
-
 	ret = init_stub_pte(mm, STUB_CODE,
 			    (unsigned long) &__syscall_stub_start);
 	if (ret)
@@ -154,25 +138,19 @@ void destroy_context(struct mm_struct *mm)
 {
 	struct mm_context *mmu = &mm->context;
 
-	if (proc_mm)
-		os_close_file(mmu->id.u.mm_fd);
-	else {
-		/*
-		 * If init_new_context wasn't called, this will be
-		 * zero, resulting in a kill(0), which will result in the
-		 * whole UML suddenly dying.  Also, cover negative and
-		 * 1 cases, since they shouldn't happen either.
-		 */
-		if (mmu->id.u.pid < 2) {
-			printk(KERN_ERR "corrupt mm_context - pid = %d\n",
-			       mmu->id.u.pid);
-			return;
-		}
-		os_kill_ptraced_process(mmu->id.u.pid, 1);
+	/*
+	 * If init_new_context wasn't called, this will be
+	 * zero, resulting in a kill(0), which will result in the
+	 * whole UML suddenly dying.  Also, cover negative and
+	 * 1 cases, since they shouldn't happen either.
+	 */
+	if (mmu->id.u.pid < 2) {
+		printk(KERN_ERR "corrupt mm_context - pid = %d\n",
+		       mmu->id.u.pid);
+		return;
 	}
+	os_kill_ptraced_process(mmu->id.u.pid, 1);
 
-	if (skas_needs_stub)
-		free_page(mmu->id.stack);
-
+	free_page(mmu->id.stack);
 	free_ldt(mmu);
 }
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index 4da11b3c8ddb..527fa5881915 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -10,25 +10,6 @@
 #include <os.h>
 #include <skas.h>
 
-int new_mm(unsigned long stack)
-{
-	int fd, err;
-
-	fd = os_open_file("/proc/mm", of_cloexec(of_write(OPENFLAGS())), 0);
-	if (fd < 0)
-		return fd;
-
-	if (skas_needs_stub) {
-		err = map_stub_pages(fd, STUB_CODE, STUB_DATA, stack);
-		if (err) {
-			os_close_file(fd);
-			return err;
-		}
-	}
-
-	return fd;
-}
-
 extern void start_kernel(void);
 
 static int __init start_kernel_proc(void *unused)
@@ -40,9 +21,7 @@ static int __init start_kernel_proc(void *unused)
 
 	cpu_tasks[0].pid = pid;
 	cpu_tasks[0].task = current;
-#ifdef CONFIG_SMP
-	init_cpu_online(get_cpu_mask(0));
-#endif
+
 	start_kernel();
 	return 0;
 }
@@ -55,14 +34,6 @@ int __init start_uml(void)
 {
 	stack_protections((unsigned long) &cpu0_irqstack);
 	set_sigstack(cpu0_irqstack, THREAD_SIZE);
-	if (proc_mm) {
-		userspace_pid[0] = start_userspace(0);
-		if (userspace_pid[0] < 0) {
-			printf("start_uml - start_userspace returned %d\n",
-			       userspace_pid[0]);
-			exit(1);
-		}
-	}
 
 	init_new_thread_signals();
 
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
deleted file mode 100644
index 5c8c3ea7db7b..000000000000
--- a/arch/um/kernel/smp.c
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <linux/percpu.h>
-#include <asm/pgalloc.h>
-#include <asm/tlb.h>
-
-#ifdef CONFIG_SMP
-
-#include <linux/sched.h>
-#include <linux/module.h>
-#include <linux/threads.h>
-#include <linux/interrupt.h>
-#include <linux/err.h>
-#include <linux/hardirq.h>
-#include <asm/smp.h>
-#include <asm/processor.h>
-#include <asm/spinlock.h>
-#include <kern.h>
-#include <irq_user.h>
-#include <os.h>
-
-/* Per CPU bogomips and other parameters
- * The only piece used here is the ipi pipe, which is set before SMP is
- * started and never changed.
- */
-struct cpuinfo_um cpu_data[NR_CPUS];
-
-/* A statistic, can be a little off */
-int num_reschedules_sent = 0;
-
-/* Not changed after boot */
-struct task_struct *idle_threads[NR_CPUS];
-
-void smp_send_reschedule(int cpu)
-{
-	os_write_file(cpu_data[cpu].ipi_pipe[1], "R", 1);
-	num_reschedules_sent++;
-}
-
-void smp_send_stop(void)
-{
-	int i;
-
-	printk(KERN_INFO "Stopping all CPUs...");
-	for (i = 0; i < num_online_cpus(); i++) {
-		if (i == current_thread->cpu)
-			continue;
-		os_write_file(cpu_data[i].ipi_pipe[1], "S", 1);
-	}
-	printk(KERN_CONT "done\n");
-}
-
-static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
-static cpumask_t cpu_callin_map = CPU_MASK_NONE;
-
-static int idle_proc(void *cpup)
-{
-	int cpu = (int) cpup, err;
-
-	err = os_pipe(cpu_data[cpu].ipi_pipe, 1, 1);
-	if (err < 0)
-		panic("CPU#%d failed to create IPI pipe, err = %d", cpu, -err);
-
-	os_set_fd_async(cpu_data[cpu].ipi_pipe[0]);
-
-	wmb();
-	if (cpu_test_and_set(cpu, cpu_callin_map)) {
-		printk(KERN_ERR "huh, CPU#%d already present??\n", cpu);
-		BUG();
-	}
-
-	while (!cpu_isset(cpu, smp_commenced_mask))
-		cpu_relax();
-
-	notify_cpu_starting(cpu);
-	set_cpu_online(cpu, true);
-	default_idle();
-	return 0;
-}
-
-static struct task_struct *idle_thread(int cpu)
-{
-	struct task_struct *new_task;
-
-	current->thread.request.u.thread.proc = idle_proc;
-	current->thread.request.u.thread.arg = (void *) cpu;
-	new_task = fork_idle(cpu);
-	if (IS_ERR(new_task))
-		panic("copy_process failed in idle_thread, error = %ld",
-		      PTR_ERR(new_task));
-
-	cpu_tasks[cpu] = ((struct cpu_task)
-		          { .pid = 	new_task->thread.mode.tt.extern_pid,
-			    .task = 	new_task } );
-	idle_threads[cpu] = new_task;
-	panic("skas mode doesn't support SMP");
-	return new_task;
-}
-
-void smp_prepare_cpus(unsigned int maxcpus)
-{
-	struct task_struct *idle;
-	unsigned long waittime;
-	int err, cpu, me = smp_processor_id();
-	int i;
-
-	for (i = 0; i < ncpus; ++i)
-		set_cpu_possible(i, true);
-
-	set_cpu_online(me, true);
-	cpu_set(me, cpu_callin_map);
-
-	err = os_pipe(cpu_data[me].ipi_pipe, 1, 1);
-	if (err < 0)
-		panic("CPU#0 failed to create IPI pipe, errno = %d", -err);
-
-	os_set_fd_async(cpu_data[me].ipi_pipe[0]);
-
-	for (cpu = 1; cpu < ncpus; cpu++) {
-		printk(KERN_INFO "Booting processor %d...\n", cpu);
-
-		idle = idle_thread(cpu);
-
-		init_idle(idle, cpu);
-
-		waittime = 200000000;
-		while (waittime-- && !cpu_isset(cpu, cpu_callin_map))
-			cpu_relax();
-
-		printk(KERN_INFO "%s\n",
-		       cpu_isset(cpu, cpu_calling_map) ? "done" : "failed");
-	}
-}
-
-void smp_prepare_boot_cpu(void)
-{
-	set_cpu_online(smp_processor_id(), true);
-}
-
-int __cpu_up(unsigned int cpu, struct task_struct *tidle)
-{
-	cpu_set(cpu, smp_commenced_mask);
-	while (!cpu_online(cpu))
-		mb();
-	return 0;
-}
-
-int setup_profiling_timer(unsigned int multiplier)
-{
-	printk(KERN_INFO "setup_profiling_timer\n");
-	return 0;
-}
-
-void smp_call_function_slave(int cpu);
-
-void IPI_handler(int cpu)
-{
-	unsigned char c;
-	int fd;
-
-	fd = cpu_data[cpu].ipi_pipe[0];
-	while (os_read_file(fd, &c, 1) == 1) {
-		switch (c) {
-		case 'C':
-			smp_call_function_slave(cpu);
-			break;
-
-		case 'R':
-			scheduler_ipi();
-			break;
-
-		case 'S':
-			printk(KERN_INFO "CPU#%d stopping\n", cpu);
-			while (1)
-				pause();
-			break;
-
-		default:
-			printk(KERN_ERR "CPU#%d received unknown IPI [%c]!\n",
-			       cpu, c);
-			break;
-		}
-	}
-}
-
-int hard_smp_processor_id(void)
-{
-	return pid_to_processor_id(os_getpid());
-}
-
-static DEFINE_SPINLOCK(call_lock);
-static atomic_t scf_started;
-static atomic_t scf_finished;
-static void (*func)(void *info);
-static void *info;
-
-void smp_call_function_slave(int cpu)
-{
-	atomic_inc(&scf_started);
-	(*func)(info);
-	atomic_inc(&scf_finished);
-}
-
-int smp_call_function(void (*_func)(void *info), void *_info, int wait)
-{
-	int cpus = num_online_cpus() - 1;
-	int i;
-
-	if (!cpus)
-		return 0;
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	spin_lock_bh(&call_lock);
-	atomic_set(&scf_started, 0);
-	atomic_set(&scf_finished, 0);
-	func = _func;
-	info = _info;
-
-	for_each_online_cpu(i)
-		os_write_file(cpu_data[i].ipi_pipe[1], "C", 1);
-
-	while (atomic_read(&scf_started) != cpus)
-		barrier();
-
-	if (wait)
-		while (atomic_read(&scf_finished) != cpus)
-			barrier();
-
-	spin_unlock_bh(&call_lock);
-	return 0;
-}
-
-#endif
diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
index 894c8d303cda..aa1b56f5ac68 100644
--- a/arch/um/kernel/sysrq.c
+++ b/arch/um/kernel/sysrq.c
@@ -29,7 +29,7 @@ static const struct stacktrace_ops stackops = {
 
 void show_stack(struct task_struct *task, unsigned long *stack)
 {
-	unsigned long *sp = stack, bp = 0;
+	unsigned long *sp = stack;
 	struct pt_regs *segv_regs = current->thread.segv_regs;
 	int i;
 
@@ -39,10 +39,6 @@ void show_stack(struct task_struct *task, unsigned long *stack)
 		return;
 	}
 
-#ifdef CONFIG_FRAME_POINTER
-	bp = get_frame_pointer(task, segv_regs);
-#endif
-
 	if (!stack)
 		sp = get_stack_pointer(task, segv_regs);
 
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 209617302df8..8e4daf44e980 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -220,7 +220,7 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
 		panic("Segfault with no mm");
 	}
 
-	if (SEGV_IS_FIXABLE(&fi) || SEGV_MAYBE_FIXABLE(&fi))
+	if (SEGV_IS_FIXABLE(&fi))
 		err = handle_page_fault(address, ip, is_write, is_user,
 					&si.si_code);
 	else {
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 9274eae6ae7b..07f798f4bcee 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -11,6 +11,7 @@
 #include <linux/string.h>
 #include <linux/utsname.h>
 #include <linux/sched.h>
+#include <linux/kmsg_dump.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/sections.h>
@@ -66,12 +67,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 {
 	int index = 0;
 
-#ifdef CONFIG_SMP
-	index = (struct cpuinfo_um *) v - cpu_data;
-	if (!cpu_online(index))
-		return 0;
-#endif
-
 	seq_printf(m, "processor\t: %d\n", index);
 	seq_printf(m, "vendor_id\t: User Mode Linux\n");
 	seq_printf(m, "model name\t: UML\n");
@@ -168,23 +163,6 @@ __uml_setup("debug", no_skas_debug_setup,
 "    this flag is not needed to run gdb on UML in skas mode\n\n"
 );
 
-#ifdef CONFIG_SMP
-static int __init uml_ncpus_setup(char *line, int *add)
-{
-	if (!sscanf(line, "%d", &ncpus)) {
-		printf("Couldn't parse [%s]\n", line);
-		return -1;
-	}
-
-	return 0;
-}
-
-__uml_setup("ncpus=", uml_ncpus_setup,
-"ncpus=<# of desired CPUs>\n"
-"    This tells an SMP kernel how many virtual processors to start.\n\n"
-);
-#endif
-
 static int __init Usage(char *line, int *add)
 {
 	const char **p;
@@ -234,6 +212,7 @@ static void __init uml_postsetup(void)
 static int panic_exit(struct notifier_block *self, unsigned long unused1,
 		      void *unused2)
 {
+	kmsg_dump(KMSG_DUMP_PANIC);
 	bust_spinlocks(1);
 	bust_spinlocks(0);
 	uml_exitcode = 1;
@@ -247,6 +226,16 @@ static struct notifier_block panic_exit_notifier = {
 	.priority 		= 0
 };
 
+void uml_finishsetup(void)
+{
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &panic_exit_notifier);
+
+	uml_postsetup();
+
+	new_thread_handler();
+}
+
 /* Set during early boot */
 unsigned long task_size;
 EXPORT_SYMBOL(task_size);
@@ -268,7 +257,6 @@ int __init linux_main(int argc, char **argv)
 	unsigned long stack;
 	unsigned int i;
 	int add;
-	char * mode;
 
 	for (i = 1; i < argc; i++) {
 		if ((i == 1) && (argv[i][0] == ' '))
@@ -291,15 +279,6 @@ int __init linux_main(int argc, char **argv)
 	/* OS sanity checks that need to happen before the kernel runs */
 	os_early_checks();
 
-	can_do_skas();
-
-	if (proc_mm && ptrace_faultinfo)
-		mode = "SKAS3";
-	else
-		mode = "SKAS0";
-
-	printf("UML running in %s mode\n", mode);
-
 	brk_start = (unsigned long) sbrk(0);
 
 	/*
@@ -334,11 +313,6 @@ int __init linux_main(int argc, char **argv)
 	if (physmem_size + iomem_size > max_physmem) {
 		highmem = physmem_size + iomem_size - max_physmem;
 		physmem_size -= highmem;
-#ifndef CONFIG_HIGHMEM
-		highmem = 0;
-		printf("CONFIG_HIGHMEM not enabled - physical memory shrunk "
-		       "to %Lu bytes\n", physmem_size);
-#endif
 	}
 
 	high_physmem = uml_physmem + physmem_size;
@@ -362,11 +336,6 @@ int __init linux_main(int argc, char **argv)
 		printf("Kernel virtual memory size shrunk to %lu bytes\n",
 		       virtmem_size);
 
-	atomic_notifier_chain_register(&panic_notifier_list,
-				       &panic_exit_notifier);
-
-	uml_postsetup();
-
 	stack_protections((unsigned long) &init_thread_info);
 	os_flush_stdout();
 
@@ -390,15 +359,3 @@ void __init check_bugs(void)
 void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
 {
 }
-
-#ifdef CONFIG_SMP
-void alternatives_smp_module_add(struct module *mod, char *name,
-				 void *locks, void *locks_end,
-				 void *text,  void *text_end)
-{
-}
-
-void alternatives_smp_module_del(struct module *mod)
-{
-}
-#endif
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index 33496fe2bb52..8408aba915b2 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -16,7 +16,6 @@
 #include <init.h>
 #include <longjmp.h>
 #include <os.h>
-#include <skas_ptrace.h>
 
 #define ARBITRARY_ADDR -1
 #define FAILURE_PID    -1
@@ -102,21 +101,6 @@ void os_kill_process(int pid, int reap_child)
 		CATCH_EINTR(waitpid(pid, NULL, __WALL));
 }
 
-/* This is here uniquely to have access to the userspace errno, i.e. the one
- * used by ptrace in case of error.
- */
-
-long os_ptrace_ldt(long pid, long addr, long data)
-{
-	int ret;
-
-	ret = ptrace(PTRACE_LDT, pid, addr, data);
-
-	if (ret < 0)
-		return -errno;
-	return ret;
-}
-
 /* Kill off a ptraced child by all means available.  kill it normally first,
  * then PTRACE_KILL it, then PTRACE_CONT it in case it's in a run state from
  * which it can't exit directly.
diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c
index 689b18db798f..e7f8c945a573 100644
--- a/arch/um/os-Linux/skas/mem.c
+++ b/arch/um/os-Linux/skas/mem.c
@@ -12,7 +12,6 @@
 #include <as-layout.h>
 #include <mm_id.h>
 #include <os.h>
-#include <proc_mm.h>
 #include <ptrace_user.h>
 #include <registers.h>
 #include <skas.h>
@@ -46,8 +45,6 @@ static int __init init_syscall_regs(void)
 
 __initcall(init_syscall_regs);
 
-extern int proc_mm;
-
 static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
 {
 	int n, i;
@@ -56,10 +53,6 @@ static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
 	unsigned long * syscall;
 	int err, pid = mm_idp->u.pid;
 
-	if (proc_mm)
-		/* FIXME: Need to look up userspace_pid by cpu */
-		pid = userspace_pid[0];
-
 	n = ptrace_setregs(pid, syscall_regs);
 	if (n < 0) {
 		printk(UM_KERN_ERR "Registers - \n");
@@ -178,38 +171,12 @@ int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, int prot,
 	int phys_fd, unsigned long long offset, int done, void **data)
 {
 	int ret;
+	unsigned long args[] = { virt, len, prot,
+				 MAP_SHARED | MAP_FIXED, phys_fd,
+				 MMAP_OFFSET(offset) };
 
-	if (proc_mm) {
-		struct proc_mm_op map;
-		int fd = mm_idp->u.mm_fd;
-
-		map = ((struct proc_mm_op) { .op	= MM_MMAP,
-				       .u		=
-				       { .mmap	=
-					 { .addr	= virt,
-					   .len	= len,
-					   .prot	= prot,
-					   .flags	= MAP_SHARED |
-					   MAP_FIXED,
-					   .fd	= phys_fd,
-					   .offset= offset
-					 } } } );
-		CATCH_EINTR(ret = write(fd, &map, sizeof(map)));
-		if (ret != sizeof(map)) {
-			ret = -errno;
-			printk(UM_KERN_ERR "map : /proc/mm map failed, "
-			       "err = %d\n", -ret);
-		}
-		else ret = 0;
-	}
-	else {
-		unsigned long args[] = { virt, len, prot,
-					 MAP_SHARED | MAP_FIXED, phys_fd,
-					 MMAP_OFFSET(offset) };
-
-		ret = run_syscall_stub(mm_idp, STUB_MMAP_NR, args, virt,
-				       data, done);
-	}
+	ret = run_syscall_stub(mm_idp, STUB_MMAP_NR, args, virt,
+			       data, done);
 
 	return ret;
 }
@@ -218,32 +185,11 @@ int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
 	  int done, void **data)
 {
 	int ret;
+	unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0,
+				 0 };
 
-	if (proc_mm) {
-		struct proc_mm_op unmap;
-		int fd = mm_idp->u.mm_fd;
-
-		unmap = ((struct proc_mm_op) { .op	= MM_MUNMAP,
-					 .u	=
-					 { .munmap	=
-					   { .addr	=
-					     (unsigned long) addr,
-					     .len		= len } } } );
-		CATCH_EINTR(ret = write(fd, &unmap, sizeof(unmap)));
-		if (ret != sizeof(unmap)) {
-			ret = -errno;
-			printk(UM_KERN_ERR "unmap - proc_mm write returned "
-			       "%d\n", ret);
-		}
-		else ret = 0;
-	}
-	else {
-		unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0,
-					 0 };
-
-		ret = run_syscall_stub(mm_idp, __NR_munmap, args, 0,
-				       data, done);
-	}
+	ret = run_syscall_stub(mm_idp, __NR_munmap, args, 0,
+			       data, done);
 
 	return ret;
 }
@@ -251,33 +197,11 @@ int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
 int protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
 	    unsigned int prot, int done, void **data)
 {
-	struct proc_mm_op protect;
 	int ret;
+	unsigned long args[] = { addr, len, prot, 0, 0, 0 };
 
-	if (proc_mm) {
-		int fd = mm_idp->u.mm_fd;
-
-		protect = ((struct proc_mm_op) { .op	= MM_MPROTECT,
-					   .u	=
-					   { .mprotect	=
-					     { .addr	=
-					       (unsigned long) addr,
-					       .len	= len,
-					       .prot	= prot } } } );
-
-		CATCH_EINTR(ret = write(fd, &protect, sizeof(protect)));
-		if (ret != sizeof(protect)) {
-			ret = -errno;
-			printk(UM_KERN_ERR "protect failed, err = %d", -ret);
-		}
-		else ret = 0;
-	}
-	else {
-		unsigned long args[] = { addr, len, prot, 0, 0, 0 };
-
-		ret = run_syscall_stub(mm_idp, __NR_mprotect, args, 0,
-				       data, done);
-	}
+	ret = run_syscall_stub(mm_idp, __NR_mprotect, args, 0,
+			       data, done);
 
 	return ret;
 }
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 908579f2b0ab..7a9777570a62 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -16,11 +16,9 @@
 #include <kern_util.h>
 #include <mem.h>
 #include <os.h>
-#include <proc_mm.h>
 #include <ptrace_user.h>
 #include <registers.h>
 #include <skas.h>
-#include <skas_ptrace.h>
 #include <sysdep/stub.h>
 
 int is_skas_winch(int pid, int fd, void *data)
@@ -91,50 +89,33 @@ extern unsigned long current_stub_stack(void);
 static void get_skas_faultinfo(int pid, struct faultinfo *fi)
 {
 	int err;
+	unsigned long fpregs[FP_SIZE];
 
-	if (ptrace_faultinfo) {
-		err = ptrace(PTRACE_FAULTINFO, pid, 0, fi);
-		if (err) {
-			printk(UM_KERN_ERR "get_skas_faultinfo - "
-			       "PTRACE_FAULTINFO failed, errno = %d\n", errno);
-			fatal_sigsegv();
-		}
-
-		/* Special handling for i386, which has different structs */
-		if (sizeof(struct ptrace_faultinfo) < sizeof(struct faultinfo))
-			memset((char *)fi + sizeof(struct ptrace_faultinfo), 0,
-			       sizeof(struct faultinfo) -
-			       sizeof(struct ptrace_faultinfo));
+	err = get_fp_registers(pid, fpregs);
+	if (err < 0) {
+		printk(UM_KERN_ERR "save_fp_registers returned %d\n",
+		       err);
+		fatal_sigsegv();
 	}
-	else {
-		unsigned long fpregs[FP_SIZE];
-
-		err = get_fp_registers(pid, fpregs);
-		if (err < 0) {
-			printk(UM_KERN_ERR "save_fp_registers returned %d\n",
-			       err);
-			fatal_sigsegv();
-		}
-		err = ptrace(PTRACE_CONT, pid, 0, SIGSEGV);
-		if (err) {
-			printk(UM_KERN_ERR "Failed to continue stub, pid = %d, "
-			       "errno = %d\n", pid, errno);
-			fatal_sigsegv();
-		}
-		wait_stub_done(pid);
+	err = ptrace(PTRACE_CONT, pid, 0, SIGSEGV);
+	if (err) {
+		printk(UM_KERN_ERR "Failed to continue stub, pid = %d, "
+		       "errno = %d\n", pid, errno);
+		fatal_sigsegv();
+	}
+	wait_stub_done(pid);
 
-		/*
-		 * faultinfo is prepared by the stub-segv-handler at start of
-		 * the stub stack page. We just have to copy it.
-		 */
-		memcpy(fi, (void *)current_stub_stack(), sizeof(*fi));
+	/*
+	 * faultinfo is prepared by the stub-segv-handler at start of
+	 * the stub stack page. We just have to copy it.
+	 */
+	memcpy(fi, (void *)current_stub_stack(), sizeof(*fi));
 
-		err = put_fp_registers(pid, fpregs);
-		if (err < 0) {
-			printk(UM_KERN_ERR "put_fp_registers returned %d\n",
-			       err);
-			fatal_sigsegv();
-		}
+	err = put_fp_registers(pid, fpregs);
+	if (err < 0) {
+		printk(UM_KERN_ERR "put_fp_registers returned %d\n",
+		       err);
+		fatal_sigsegv();
 	}
 }
 
@@ -198,7 +179,8 @@ extern int __syscall_stub_start;
 static int userspace_tramp(void *stack)
 {
 	void *addr;
-	int err;
+	int err, fd;
+	unsigned long long offset;
 
 	ptrace(PTRACE_TRACEME, 0, 0, 0);
 
@@ -211,36 +193,32 @@ static int userspace_tramp(void *stack)
 		exit(1);
 	}
 
-	if (!proc_mm) {
-		/*
-		 * This has a pte, but it can't be mapped in with the usual
-		 * tlb_flush mechanism because this is part of that mechanism
-		 */
-		int fd;
-		unsigned long long offset;
-		fd = phys_mapping(to_phys(&__syscall_stub_start), &offset);
-		addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE,
-			      PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset);
+	/*
+	 * This has a pte, but it can't be mapped in with the usual
+	 * tlb_flush mechanism because this is part of that mechanism
+	 */
+	fd = phys_mapping(to_phys(&__syscall_stub_start), &offset);
+	addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE,
+		      PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset);
+	if (addr == MAP_FAILED) {
+		printk(UM_KERN_ERR "mapping mmap stub at 0x%lx failed, "
+		       "errno = %d\n", STUB_CODE, errno);
+		exit(1);
+	}
+
+	if (stack != NULL) {
+		fd = phys_mapping(to_phys(stack), &offset);
+		addr = mmap((void *) STUB_DATA,
+			    UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE,
+			    MAP_FIXED | MAP_SHARED, fd, offset);
 		if (addr == MAP_FAILED) {
-			printk(UM_KERN_ERR "mapping mmap stub at 0x%lx failed, "
-			       "errno = %d\n", STUB_CODE, errno);
+			printk(UM_KERN_ERR "mapping segfault stack "
+			       "at 0x%lx failed, errno = %d\n",
+			       STUB_DATA, errno);
 			exit(1);
 		}
-
-		if (stack != NULL) {
-			fd = phys_mapping(to_phys(stack), &offset);
-			addr = mmap((void *) STUB_DATA,
-				    UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE,
-				    MAP_FIXED | MAP_SHARED, fd, offset);
-			if (addr == MAP_FAILED) {
-				printk(UM_KERN_ERR "mapping segfault stack "
-				       "at 0x%lx failed, errno = %d\n",
-				       STUB_DATA, errno);
-				exit(1);
-			}
-		}
 	}
-	if (!ptrace_faultinfo && (stack != NULL)) {
+	if (stack != NULL) {
 		struct sigaction sa;
 
 		unsigned long v = STUB_CODE +
@@ -286,11 +264,7 @@ int start_userspace(unsigned long stub_stack)
 
 	sp = (unsigned long) stack + UM_KERN_PAGE_SIZE - sizeof(void *);
 
-	flags = CLONE_FILES;
-	if (proc_mm)
-		flags |= CLONE_VM;
-	else
-		flags |= SIGCHLD;
+	flags = CLONE_FILES | SIGCHLD;
 
 	pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack);
 	if (pid < 0) {
@@ -413,8 +387,7 @@ void userspace(struct uml_pt_regs *regs)
 
 			switch (sig) {
 			case SIGSEGV:
-				if (PTRACE_FULL_FAULTINFO ||
-				    !ptrace_faultinfo) {
+				if (PTRACE_FULL_FAULTINFO) {
 					get_skas_faultinfo(pid,
 							   &regs->faultinfo);
 					(*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si,
@@ -571,67 +544,6 @@ int copy_context_skas0(unsigned long new_stack, int pid)
 	return err;
 }
 
-/*
- * This is used only, if stub pages are needed, while proc_mm is
- * available. Opening /proc/mm creates a new mm_context, which lacks
- * the stub-pages. Thus, we map them using /proc/mm-fd
- */
-int map_stub_pages(int fd, unsigned long code, unsigned long data,
-		   unsigned long stack)
-{
-	struct proc_mm_op mmop;
-	int n;
-	unsigned long long code_offset;
-	int code_fd = phys_mapping(to_phys((void *) &__syscall_stub_start),
-				   &code_offset);
-
-	mmop = ((struct proc_mm_op) { .op        = MM_MMAP,
-				      .u         =
-				      { .mmap    =
-					{ .addr    = code,
-					  .len     = UM_KERN_PAGE_SIZE,
-					  .prot    = PROT_EXEC,
-					  .flags   = MAP_FIXED | MAP_PRIVATE,
-					  .fd      = code_fd,
-					  .offset  = code_offset
-	} } });
-	CATCH_EINTR(n = write(fd, &mmop, sizeof(mmop)));
-	if (n != sizeof(mmop)) {
-		n = errno;
-		printk(UM_KERN_ERR "mmap args - addr = 0x%lx, fd = %d, "
-		       "offset = %llx\n", code, code_fd,
-		       (unsigned long long) code_offset);
-		printk(UM_KERN_ERR "map_stub_pages : /proc/mm map for code "
-		       "failed, err = %d\n", n);
-		return -n;
-	}
-
-	if (stack) {
-		unsigned long long map_offset;
-		int map_fd = phys_mapping(to_phys((void *)stack), &map_offset);
-		mmop = ((struct proc_mm_op)
-				{ .op        = MM_MMAP,
-				  .u         =
-				  { .mmap    =
-				    { .addr    = data,
-				      .len     = UM_KERN_PAGE_SIZE,
-				      .prot    = PROT_READ | PROT_WRITE,
-				      .flags   = MAP_FIXED | MAP_SHARED,
-				      .fd      = map_fd,
-				      .offset  = map_offset
-		} } });
-		CATCH_EINTR(n = write(fd, &mmop, sizeof(mmop)));
-		if (n != sizeof(mmop)) {
-			n = errno;
-			printk(UM_KERN_ERR "map_stub_pages : /proc/mm map for "
-			       "data failed, err = %d\n", n);
-			return -n;
-		}
-	}
-
-	return 0;
-}
-
 void new_thread(void *stack, jmp_buf *buf, void (*handler)(void))
 {
 	(*buf)[0].JB_IP = (unsigned long) handler;
@@ -674,7 +586,7 @@ int start_idle_thread(void *stack, jmp_buf *switch_buf)
 	n = setjmp(initial_jmpbuf);
 	switch (n) {
 	case INIT_JMP_NEW_THREAD:
-		(*switch_buf)[0].JB_IP = (unsigned long) new_thread_handler;
+		(*switch_buf)[0].JB_IP = (unsigned long) uml_finishsetup;
 		(*switch_buf)[0].JB_SP = (unsigned long) stack +
 			UM_THREAD_SIZE - sizeof(void *);
 		break;
@@ -728,17 +640,5 @@ void reboot_skas(void)
 
 void __switch_mm(struct mm_id *mm_idp)
 {
-	int err;
-
-	/* FIXME: need cpu pid in __switch_mm */
-	if (proc_mm) {
-		err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0,
-			     mm_idp->u.mm_fd);
-		if (err) {
-			printk(UM_KERN_ERR "__switch_mm - PTRACE_SWITCH_MM "
-			       "failed, errno = %d\n", errno);
-			fatal_sigsegv();
-		}
-	}
-	else userspace_pid[0] = mm_idp->u.pid;
+	userspace_pid[0] = mm_idp->u.pid;
 }
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 337518c5042a..47f1ff056a54 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -24,7 +24,6 @@
 #include <ptrace_user.h>
 #include <registers.h>
 #include <skas.h>
-#include <skas_ptrace.h>
 
 static void ptrace_child(void)
 {
@@ -143,44 +142,6 @@ static int stop_ptraced_child(int pid, int exitcode, int mustexit)
 }
 
 /* Changed only during early boot */
-int ptrace_faultinfo;
-static int disable_ptrace_faultinfo;
-
-int ptrace_ldt;
-static int disable_ptrace_ldt;
-
-int proc_mm;
-static int disable_proc_mm;
-
-int have_switch_mm;
-static int disable_switch_mm;
-
-int skas_needs_stub;
-
-static int __init skas0_cmd_param(char *str, int* add)
-{
-	disable_ptrace_faultinfo = 1;
-	disable_ptrace_ldt = 1;
-	disable_proc_mm = 1;
-	disable_switch_mm = 1;
-
-	return 0;
-}
-
-/* The two __uml_setup would conflict, without this stupid alias. */
-
-static int __init mode_skas0_cmd_param(char *str, int* add)
-	__attribute__((alias("skas0_cmd_param")));
-
-__uml_setup("skas0", skas0_cmd_param,
-"skas0\n"
-"    Disables SKAS3 and SKAS4 usage, so that SKAS0 is used\n\n");
-
-__uml_setup("mode=skas0", mode_skas0_cmd_param,
-"mode=skas0\n"
-"    Disables SKAS3 and SKAS4 usage, so that SKAS0 is used.\n\n");
-
-/* Changed only during early boot */
 static int force_sysemu_disabled = 0;
 
 static int __init nosysemu_cmd_param(char *str, int* add)
@@ -376,121 +337,6 @@ void __init os_early_checks(void)
 	stop_ptraced_child(pid, 1, 1);
 }
 
-static int __init noprocmm_cmd_param(char *str, int* add)
-{
-	disable_proc_mm = 1;
-	return 0;
-}
-
-__uml_setup("noprocmm", noprocmm_cmd_param,
-"noprocmm\n"
-"    Turns off usage of /proc/mm, even if host supports it.\n"
-"    To support /proc/mm, the host needs to be patched using\n"
-"    the current skas3 patch.\n\n");
-
-static int __init noptracefaultinfo_cmd_param(char *str, int* add)
-{
-	disable_ptrace_faultinfo = 1;
-	return 0;
-}
-
-__uml_setup("noptracefaultinfo", noptracefaultinfo_cmd_param,
-"noptracefaultinfo\n"
-"    Turns off usage of PTRACE_FAULTINFO, even if host supports\n"
-"    it. To support PTRACE_FAULTINFO, the host needs to be patched\n"
-"    using the current skas3 patch.\n\n");
-
-static int __init noptraceldt_cmd_param(char *str, int* add)
-{
-	disable_ptrace_ldt = 1;
-	return 0;
-}
-
-__uml_setup("noptraceldt", noptraceldt_cmd_param,
-"noptraceldt\n"
-"    Turns off usage of PTRACE_LDT, even if host supports it.\n"
-"    To support PTRACE_LDT, the host needs to be patched using\n"
-"    the current skas3 patch.\n\n");
-
-static inline void check_skas3_ptrace_faultinfo(void)
-{
-	struct ptrace_faultinfo fi;
-	int pid, n;
-
-	non_fatal("  - PTRACE_FAULTINFO...");
-	pid = start_ptraced_child();
-
-	n = ptrace(PTRACE_FAULTINFO, pid, 0, &fi);
-	if (n < 0) {
-		if (errno == EIO)
-			non_fatal("not found\n");
-		else
-			perror("not found");
-	} else if (disable_ptrace_faultinfo)
-		non_fatal("found but disabled on command line\n");
-	else {
-		ptrace_faultinfo = 1;
-		non_fatal("found\n");
-	}
-
-	stop_ptraced_child(pid, 1, 1);
-}
-
-static inline void check_skas3_ptrace_ldt(void)
-{
-#ifdef PTRACE_LDT
-	int pid, n;
-	unsigned char ldtbuf[40];
-	struct ptrace_ldt ldt_op = (struct ptrace_ldt) {
-		.func = 2, /* read default ldt */
-		.ptr = ldtbuf,
-		.bytecount = sizeof(ldtbuf)};
-
-	non_fatal("  - PTRACE_LDT...");
-	pid = start_ptraced_child();
-
-	n = ptrace(PTRACE_LDT, pid, 0, (unsigned long) &ldt_op);
-	if (n < 0) {
-		if (errno == EIO)
-			non_fatal("not found\n");
-		else
-			perror("not found");
-	} else if (disable_ptrace_ldt)
-		non_fatal("found, but use is disabled\n");
-	else {
-		ptrace_ldt = 1;
-		non_fatal("found\n");
-	}
-
-	stop_ptraced_child(pid, 1, 1);
-#endif
-}
-
-static inline void check_skas3_proc_mm(void)
-{
-	non_fatal("  - /proc/mm...");
-	if (access("/proc/mm", W_OK) < 0)
-		perror("not found");
-	else if (disable_proc_mm)
-		non_fatal("found but disabled on command line\n");
-	else {
-		proc_mm = 1;
-		non_fatal("found\n");
-	}
-}
-
-void can_do_skas(void)
-{
-	non_fatal("Checking for the skas3 patch in the host:\n");
-
-	check_skas3_proc_mm();
-	check_skas3_ptrace_faultinfo();
-	check_skas3_ptrace_ldt();
-
-	if (!proc_mm || !ptrace_faultinfo || !ptrace_ldt)
-		skas_needs_stub = 1;
-}
-
 int __init parse_iomem(char *str, int *add)
 {
 	struct iomem_region *new;
diff --git a/arch/um/sys-ia64/Makefile b/arch/um/sys-ia64/Makefile
deleted file mode 100644
index d02f4c265232..000000000000
--- a/arch/um/sys-ia64/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-OBJ = built-in.o
-
-OBJS =
-
-all: $(OBJ)
-
-$(OBJ): $(OBJS)
-	rm -f $@
-	$(LD) $(LINKFLAGS) --start-group $^ --end-group -o $@
-
-clean-files := $(OBJS) link.ld
diff --git a/arch/um/sys-ia64/sysdep/ptrace.h b/arch/um/sys-ia64/sysdep/ptrace.h
deleted file mode 100644
index 0f0f4e6fd334..000000000000
--- a/arch/um/sys-ia64/sysdep/ptrace.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* 
- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_IA64_PTRACE_H
-#define __SYSDEP_IA64_PTRACE_H
-
-struct sys_pt_regs {
-  int foo;
-};
-
-#define EMPTY_REGS { 0 }
-
-#endif
-
diff --git a/arch/um/sys-ia64/sysdep/sigcontext.h b/arch/um/sys-ia64/sysdep/sigcontext.h
deleted file mode 100644
index 76b43161e779..000000000000
--- a/arch/um/sys-ia64/sysdep/sigcontext.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* 
- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_IA64_SIGCONTEXT_H
-#define __SYSDEP_IA64_SIGCONTEXT_H
-
-#endif
-
diff --git a/arch/um/sys-ia64/sysdep/skas_ptrace.h b/arch/um/sys-ia64/sysdep/skas_ptrace.h
deleted file mode 100644
index 25a38e715702..000000000000
--- a/arch/um/sys-ia64/sysdep/skas_ptrace.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_IA64_SKAS_PTRACE_H
-#define __SYSDEP_IA64_SKAS_PTRACE_H
-
-struct ptrace_faultinfo {
-        int is_write;
-        unsigned long addr;
-};
-
-struct ptrace_ldt {
-        int func;
-        void *ptr;
-        unsigned long bytecount;
-};
-
-#define PTRACE_LDT 54
-
-#endif
diff --git a/arch/um/sys-ia64/sysdep/syscalls.h b/arch/um/sys-ia64/sysdep/syscalls.h
deleted file mode 100644
index 5f6700c41558..000000000000
--- a/arch/um/sys-ia64/sysdep/syscalls.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* 
- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_IA64_SYSCALLS_H
-#define __SYSDEP_IA64_SYSCALLS_H
-
-#endif
-
diff --git a/arch/um/sys-ppc/Makefile b/arch/um/sys-ppc/Makefile
deleted file mode 100644
index 20d363bd7004..000000000000
--- a/arch/um/sys-ppc/Makefile
+++ /dev/null
@@ -1,65 +0,0 @@
-OBJ = built-in.o
-
-.S.o:
-	$(CC) $(KBUILD_AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o
-
-OBJS = ptrace.o sigcontext.o checksum.o miscthings.o misc.o \
-	ptrace_user.o sysrq.o
-
-asflags-y := -DCONFIG_PPC32 -I. -I$(srctree)/arch/ppc/kernel
-
-all: $(OBJ)
-
-$(OBJ): $(OBJS)
-	rm -f $@
-	$(LD) $(LINKFLAGS) --start-group $^ --end-group -o $@
-
-ptrace_user.o: ptrace_user.c
-	$(CC) -D__KERNEL__ $(USER_CFLAGS) $(ccflags-y) -c -o $@ $<
-
-sigcontext.o: sigcontext.c
-	$(CC) $(USER_CFLAGS) $(ccflags-y) -c -o $@ $<
-
-checksum.S:
-	rm -f $@
-	ln -s $(srctree)/arch/ppc/lib/$@ $@
-
-mk_defs.c:
-	rm -f $@
-	ln -s $(srctree)/arch/ppc/kernel/$@ $@
-
-ppc_defs.head:
-	rm -f $@
-	ln -s $(srctree)/arch/ppc/kernel/$@ $@
-
-ppc_defs.h: mk_defs.c ppc_defs.head \
-		$(srctree)/include/asm-ppc/mmu.h \
-		$(srctree)/include/asm-ppc/processor.h \
-		$(srctree)/include/asm-ppc/pgtable.h \
-		$(srctree)/include/asm-ppc/ptrace.h
-#	$(CC) $(CFLAGS) -S mk_defs.c
-	cp ppc_defs.head ppc_defs.h
-# for bk, this way we can write to the file even if it's not checked out
-	echo '#define THREAD 608' >> ppc_defs.h
-	echo '#define PT_REGS 8' >> ppc_defs.h
-	echo '#define CLONE_VM 256' >> ppc_defs.h
-#	chmod u+w ppc_defs.h
-#	grep '^#define' mk_defs.s >> ppc_defs.h
-#	rm mk_defs.s
-
-# the asm link is horrible, and breaks the other targets.  This is also
-# not going to work with parallel makes.
-
-checksum.o: checksum.S
-	rm -f asm
-	ln -s $(srctree)/include/asm-ppc asm
-	$(CC) $(asflags-y) $(KBUILD_AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o
-	rm -f asm
-
-misc.o: misc.S ppc_defs.h
-	rm -f asm
-	ln -s $(srctree)/include/asm-ppc asm
-	$(CC) $(asflags-y) $(KBUILD_AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o
-	rm -f asm
-
-clean-files := $(OBJS) ppc_defs.h checksum.S mk_defs.c
diff --git a/arch/um/sys-ppc/asm/archparam.h b/arch/um/sys-ppc/asm/archparam.h
deleted file mode 100644
index 4269d8a37b4f..000000000000
--- a/arch/um/sys-ppc/asm/archparam.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __UM_ARCHPARAM_PPC_H
-#define __UM_ARCHPARAM_PPC_H
-
-/********* Bits for asm-um/string.h **********/
-
-#define __HAVE_ARCH_STRRCHR
-
-#endif
diff --git a/arch/um/sys-ppc/asm/elf.h b/arch/um/sys-ppc/asm/elf.h
deleted file mode 100644
index 8aacaf56508d..000000000000
--- a/arch/um/sys-ppc/asm/elf.h
+++ /dev/null
@@ -1,51 +0,0 @@
-#ifndef __UM_ELF_PPC_H
-#define __UM_ELF_PPC_H
-
-
-extern long elf_aux_hwcap;
-#define ELF_HWCAP (elf_aux_hwcap)
-
-#define SET_PERSONALITY(ex) do ; while(0)
-
-#define ELF_EXEC_PAGESIZE 4096
-
-#define elf_check_arch(x) (1)
-
-#ifdef CONFIG_64BIT
-#define ELF_CLASS ELFCLASS64
-#else
-#define ELF_CLASS ELFCLASS32
-#endif
-
-#define R_386_NONE	0
-#define R_386_32	1
-#define R_386_PC32	2
-#define R_386_GOT32	3
-#define R_386_PLT32	4
-#define R_386_COPY	5
-#define R_386_GLOB_DAT	6
-#define R_386_JMP_SLOT	7
-#define R_386_RELATIVE	8
-#define R_386_GOTOFF	9
-#define R_386_GOTPC	10
-#define R_386_NUM	11
-
-#define ELF_PLATFORM (0)
-
-#define ELF_ET_DYN_BASE (0x08000000)
-
-/* the following stolen from asm-ppc/elf.h */
-#define ELF_NGREG	48	/* includes nip, msr, lr, etc. */
-#define ELF_NFPREG	33	/* includes fpscr */
-/* General registers */
-typedef unsigned long elf_greg_t;
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-
-/* Floating point registers */
-typedef double elf_fpreg_t;
-typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
-
-#define ELF_DATA        ELFDATA2MSB
-#define ELF_ARCH	EM_PPC
-
-#endif
diff --git a/arch/um/sys-ppc/asm/processor.h b/arch/um/sys-ppc/asm/processor.h
deleted file mode 100644
index 959323151229..000000000000
--- a/arch/um/sys-ppc/asm/processor.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef __UM_PROCESSOR_PPC_H
-#define __UM_PROCESSOR_PPC_H
-
-#if defined(__ASSEMBLY__)
-
-#define CONFIG_PPC_MULTIPLATFORM
-#include "arch/processor.h"
-
-#else
-
-#include "asm/processor-generic.h"
-
-#endif
-
-#endif
diff --git a/arch/um/sys-ppc/misc.S b/arch/um/sys-ppc/misc.S
deleted file mode 100644
index 1364b7da578c..000000000000
--- a/arch/um/sys-ppc/misc.S
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * This file contains miscellaneous low-level functions.
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
- * and Paul Mackerras.
- *
- * A couple of functions stolen from arch/ppc/kernel/misc.S for UML
- * by Chris Emerson.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <asm/processor.h>
-#include "ppc_asm.h"
-
-#if defined(CONFIG_4xx) || defined(CONFIG_8xx)
-#define CACHE_LINE_SIZE		16
-#define LG_CACHE_LINE_SIZE	4
-#define MAX_COPY_PREFETCH	1
-#else
-#define CACHE_LINE_SIZE		32
-#define LG_CACHE_LINE_SIZE	5
-#define MAX_COPY_PREFETCH	4
-#endif /* CONFIG_4xx || CONFIG_8xx */
-
-	.text
-
-/*
- * Clear a page using the dcbz instruction, which doesn't cause any
- * memory traffic (except to write out any cache lines which get
- * displaced).  This only works on cacheable memory.
- */
-_GLOBAL(clear_page)
-	li	r0,4096/CACHE_LINE_SIZE
-	mtctr	r0
-#ifdef CONFIG_8xx
-	li	r4, 0
-1:	stw	r4, 0(r3)
-	stw	r4, 4(r3)
-	stw	r4, 8(r3)
-	stw	r4, 12(r3)
-#else
-1:	dcbz	0,r3
-#endif
-	addi	r3,r3,CACHE_LINE_SIZE
-	bdnz	1b
-	blr
-
-/*
- * Copy a whole page.  We use the dcbz instruction on the destination
- * to reduce memory traffic (it eliminates the unnecessary reads of
- * the destination into cache).  This requires that the destination
- * is cacheable.
- */
-#define COPY_16_BYTES		\
-	lwz	r6,4(r4);	\
-	lwz	r7,8(r4);	\
-	lwz	r8,12(r4);	\
-	lwzu	r9,16(r4);	\
-	stw	r6,4(r3);	\
-	stw	r7,8(r3);	\
-	stw	r8,12(r3);	\
-	stwu	r9,16(r3)
-
-_GLOBAL(copy_page)
-	addi	r3,r3,-4
-	addi	r4,r4,-4
-	li	r5,4
-
-#ifndef CONFIG_8xx
-#if MAX_COPY_PREFETCH > 1
-	li	r0,MAX_COPY_PREFETCH
-	li	r11,4
-	mtctr	r0
-11:	dcbt	r11,r4
-	addi	r11,r11,CACHE_LINE_SIZE
-	bdnz	11b
-#else /* MAX_COPY_PREFETCH == 1 */
-	dcbt	r5,r4
-	li	r11,CACHE_LINE_SIZE+4
-#endif /* MAX_COPY_PREFETCH */
-#endif /* CONFIG_8xx */
-
-	li	r0,4096/CACHE_LINE_SIZE
-	mtctr	r0
-1:
-#ifndef CONFIG_8xx
-	dcbt	r11,r4
-	dcbz	r5,r3
-#endif
-	COPY_16_BYTES
-#if CACHE_LINE_SIZE >= 32
-	COPY_16_BYTES
-#if CACHE_LINE_SIZE >= 64
-	COPY_16_BYTES
-	COPY_16_BYTES
-#if CACHE_LINE_SIZE >= 128
-	COPY_16_BYTES
-	COPY_16_BYTES
-	COPY_16_BYTES
-	COPY_16_BYTES
-#endif
-#endif
-#endif
-	bdnz	1b
-	blr
diff --git a/arch/um/sys-ppc/miscthings.c b/arch/um/sys-ppc/miscthings.c
deleted file mode 100644
index 25908d26ce07..000000000000
--- a/arch/um/sys-ppc/miscthings.c
+++ /dev/null
@@ -1,42 +0,0 @@
-#include <linux/threads.h>
-#include <linux/stddef.h>  // for NULL
-#include <linux/elf.h>  // for AT_NULL
-
-/* The following function nicked from arch/ppc/kernel/process.c and
- * adapted slightly */
-/*
- * XXX ld.so expects the auxiliary table to start on
- * a 16-byte boundary, so we have to find it and
- * move it up. :-(
- */
-void shove_aux_table(unsigned long sp)
-{
-	int argc;
-	char *p;
-	unsigned long e;
-	unsigned long aux_start, offset;
-
-	argc = *(int *)sp;
-	sp += sizeof(int) + (argc + 1) * sizeof(char *);
-	/* skip over the environment pointers */
-	do {
-		p = *(char **)sp;
-		sp += sizeof(char *);
-	} while (p != NULL);
-	aux_start = sp;
-	/* skip to the end of the auxiliary table */
-	do {
-		e = *(unsigned long *)sp;
-		sp += 2 * sizeof(unsigned long);
-	} while (e != AT_NULL);
-	offset = ((aux_start + 15) & ~15) - aux_start;
-	if (offset != 0) {
-		do {
-			sp -= sizeof(unsigned long);
-			e = *(unsigned long *)sp;
-			*(unsigned long *)(sp + offset) = e;
-		} while (sp > aux_start);
-	}
-}
-/* END stuff taken from arch/ppc/kernel/process.c */
-
diff --git a/arch/um/sys-ppc/ptrace.c b/arch/um/sys-ppc/ptrace.c
deleted file mode 100644
index 8245df41b201..000000000000
--- a/arch/um/sys-ppc/ptrace.c
+++ /dev/null
@@ -1,58 +0,0 @@
-#include <linux/sched.h>
-#include "asm/ptrace.h"
-
-int putreg(struct task_struct *child, unsigned long regno, 
-		  unsigned long value)
-{
-	child->thread.process_regs.regs[regno >> 2] = value;
-	return 0;
-}
-
-int poke_user(struct task_struct *child, long addr, long data)
-{
-	if ((addr & 3) || addr < 0)
-		return -EIO;
-
-	if (addr < MAX_REG_OFFSET)
-		return putreg(child, addr, data);
-
-	else if((addr >= offsetof(struct user, u_debugreg[0])) &&
-		(addr <= offsetof(struct user, u_debugreg[7]))){
-		  addr -= offsetof(struct user, u_debugreg[0]);
-		  addr = addr >> 2;
-		  if((addr == 4) || (addr == 5)) return -EIO;
-		  child->thread.arch.debugregs[addr] = data;
-		  return 0;
-	}
-	return -EIO;
-}
-
-unsigned long getreg(struct task_struct *child, unsigned long regno)
-{
-	unsigned long retval = ~0UL;
-
-	retval &= child->thread.process_regs.regs[regno >> 2];
-	return retval;
-}
-
-int peek_user(struct task_struct *child, long addr, long data)
-{
-	/* read the word at location addr in the USER area. */
-	unsigned long tmp;
-
-	if ((addr & 3) || addr < 0)
-		return -EIO;
-
-	tmp = 0;  /* Default return condition */
-	if(addr < MAX_REG_OFFSET){
-		tmp = getreg(child, addr);
-	}
-	else if((addr >= offsetof(struct user, u_debugreg[0])) &&
-		(addr <= offsetof(struct user, u_debugreg[7]))){
-		addr -= offsetof(struct user, u_debugreg[0]);
-		addr = addr >> 2;
-		tmp = child->thread.arch.debugregs[addr];
-	}
-	return put_user(tmp, (unsigned long *) data);
-}
-
diff --git a/arch/um/sys-ppc/ptrace_user.c b/arch/um/sys-ppc/ptrace_user.c
deleted file mode 100644
index 4601b9296aa7..000000000000
--- a/arch/um/sys-ppc/ptrace_user.c
+++ /dev/null
@@ -1,29 +0,0 @@
-#include <errno.h>
-#include <asm/ptrace.h>
-#include <sysdep/ptrace.h>
-
-int ptrace_getregs(long pid, unsigned long *regs_out)
-{
-    int i;
-    for (i=0; i < sizeof(struct sys_pt_regs)/sizeof(PPC_REG); ++i) {
-	errno = 0;
-	regs_out->regs[i] = ptrace(PTRACE_PEEKUSR, pid, i*4, 0);
-	if (errno) {
-	    return -errno;
-	}
-    }
-    return 0;
-}
-
-int ptrace_setregs(long pid, unsigned long *regs_in)
-{
-    int i;
-    for (i=0; i < sizeof(struct sys_pt_regs)/sizeof(PPC_REG); ++i) {
-	if (i != 34 /* FIXME: PT_ORIG_R3 */ && i <= PT_MQ) {
-	    if (ptrace(PTRACE_POKEUSR, pid, i*4, regs_in->regs[i]) < 0) {
-		return -errno;
-	    }
-	}
-    }
-    return 0;
-}
diff --git a/arch/um/sys-ppc/shared/sysdep/ptrace.h b/arch/um/sys-ppc/shared/sysdep/ptrace.h
deleted file mode 100644
index efe0c1a3ea9c..000000000000
--- a/arch/um/sys-ppc/shared/sysdep/ptrace.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/* 
- * Licensed under the GPL
- */
-
-#ifndef __SYS_PTRACE_PPC_H
-#define __SYS_PTRACE_PPC_H
-
-#include <linux/types.h>
-
-/* the following taken from <asm-ppc/ptrace.h> */
-
-#ifdef CONFIG_PPC64
-#define PPC_REG unsigned long /*long*/
-#else
-#define PPC_REG unsigned long
-#endif
-struct sys_pt_regs_s {
-	PPC_REG gpr[32];
-	PPC_REG nip;
-	PPC_REG msr;
-	PPC_REG orig_gpr3;	/* Used for restarting system calls */
-	PPC_REG ctr;
-	PPC_REG link;
-	PPC_REG xer;
-	PPC_REG ccr;
-	PPC_REG mq;		/* 601 only (not used at present) */
-				/* Used on APUS to hold IPL value. */
-	PPC_REG trap;		/* Reason for being here */
-	PPC_REG dar;		/* Fault registers */
-	PPC_REG dsisr;
-	PPC_REG result; 	/* Result of a system call */
-};
-
-#define NUM_REGS (sizeof(struct sys_pt_regs_s) / sizeof(PPC_REG))
-
-struct sys_pt_regs {
-    PPC_REG regs[sizeof(struct sys_pt_regs_s) / sizeof(PPC_REG)];
-};
-
-#define UM_MAX_REG (PT_FPR0)
-#define UM_MAX_REG_OFFSET (UM_MAX_REG * sizeof(PPC_REG))
-
-#define EMPTY_REGS { { [ 0 ... NUM_REGS - 1] = 0 } }
-
-#define UM_REG(r, n) ((r)->regs[n])
-
-#define UM_SYSCALL_RET(r) UM_REG(r, PT_R3)
-#define UM_SP(r) UM_REG(r, PT_R1)
-#define UM_IP(r) UM_REG(r, PT_NIP)
-#define UM_ELF_ZERO(r) UM_REG(r, PT_FPSCR)
-#define UM_SYSCALL_NR(r) UM_REG(r, PT_R0)
-#define UM_SYSCALL_ARG1(r) UM_REG(r, PT_ORIG_R3)
-#define UM_SYSCALL_ARG2(r) UM_REG(r, PT_R4)
-#define UM_SYSCALL_ARG3(r) UM_REG(r, PT_R5)
-#define UM_SYSCALL_ARG4(r) UM_REG(r, PT_R6)
-#define UM_SYSCALL_ARG5(r) UM_REG(r, PT_R7)
-#define UM_SYSCALL_ARG6(r) UM_REG(r, PT_R8)
-
-#define UM_SYSCALL_NR_OFFSET (PT_R0 * sizeof(PPC_REG))
-#define UM_SYSCALL_RET_OFFSET (PT_R3 * sizeof(PPC_REG))
-#define UM_SYSCALL_ARG1_OFFSET (PT_R3 * sizeof(PPC_REG))
-#define UM_SYSCALL_ARG2_OFFSET (PT_R4 * sizeof(PPC_REG))
-#define UM_SYSCALL_ARG3_OFFSET (PT_R5 * sizeof(PPC_REG))
-#define UM_SYSCALL_ARG4_OFFSET (PT_R6 * sizeof(PPC_REG))
-#define UM_SYSCALL_ARG5_OFFSET (PT_R7 * sizeof(PPC_REG))
-#define UM_SYSCALL_ARG6_OFFSET (PT_R8 * sizeof(PPC_REG))
-#define UM_SP_OFFSET (PT_R1 * sizeof(PPC_REG))
-#define UM_IP_OFFSET (PT_NIP * sizeof(PPC_REG))
-#define UM_ELF_ZERO_OFFSET (PT_R3 * sizeof(PPC_REG))
-
-#define UM_SET_SYSCALL_RETURN(_regs, result)	        \
-do {                                                    \
-        if (result < 0) {				\
-		(_regs)->regs[PT_CCR] |= 0x10000000;	\
-		UM_SYSCALL_RET((_regs)) = -result;	\
-        } else {					\
-		UM_SYSCALL_RET((_regs)) = result;	\
-        }                                               \
-} while(0)
-
-extern void shove_aux_table(unsigned long sp);
-#define UM_FIX_EXEC_STACK(sp) shove_aux_table(sp);
-
-/* These aren't actually defined.  The undefs are just to make sure
- * everyone's clear on the concept.
- */
-#undef UML_HAVE_GETREGS
-#undef UML_HAVE_GETFPREGS
-#undef UML_HAVE_SETREGS
-#undef UML_HAVE_SETFPREGS
-
-#endif
-
diff --git a/arch/um/sys-ppc/shared/sysdep/sigcontext.h b/arch/um/sys-ppc/shared/sysdep/sigcontext.h
deleted file mode 100644
index b7286f0a1e00..000000000000
--- a/arch/um/sys-ppc/shared/sysdep/sigcontext.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/* 
- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYS_SIGCONTEXT_PPC_H
-#define __SYS_SIGCONTEXT_PPC_H
-
-#define DSISR_WRITE 0x02000000
-
-#define SC_FAULT_ADDR(sc) ({ \
-		struct sigcontext *_sc = (sc); \
-		long retval = -1; \
-		switch (_sc->regs->trap) { \
-		case 0x300: \
-			/* data exception */ \
-			retval = _sc->regs->dar; \
-			break; \
-		case 0x400: \
-			/* instruction exception */ \
-			retval = _sc->regs->nip; \
-			break; \
-		default: \
-			panic("SC_FAULT_ADDR: unhandled trap type\n"); \
-		} \
-		retval; \
-	})
-
-#define SC_FAULT_WRITE(sc) ({ \
-		struct sigcontext *_sc = (sc); \
-		long retval = -1; \
-		switch (_sc->regs->trap) { \
-		case 0x300: \
-			/* data exception */ \
-			retval = !!(_sc->regs->dsisr & DSISR_WRITE); \
-			break; \
-		case 0x400: \
-			/* instruction exception: not a write */ \
-			retval = 0; \
-			break; \
-		default: \
-			panic("SC_FAULT_ADDR: unhandled trap type\n"); \
-		} \
-		retval; \
-	})
-
-#define SC_IP(sc) ((sc)->regs->nip)
-#define SC_SP(sc) ((sc)->regs->gpr[1])
-#define SEGV_IS_FIXABLE(sc) (1)
-
-#endif
-
diff --git a/arch/um/sys-ppc/shared/sysdep/skas_ptrace.h b/arch/um/sys-ppc/shared/sysdep/skas_ptrace.h
deleted file mode 100644
index d9fbbac10de0..000000000000
--- a/arch/um/sys-ppc/shared/sysdep/skas_ptrace.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_PPC_SKAS_PTRACE_H
-#define __SYSDEP_PPC_SKAS_PTRACE_H
-
-struct ptrace_faultinfo {
-        int is_write;
-        unsigned long addr;
-};
-
-struct ptrace_ldt {
-        int func;
-        void *ptr;
-        unsigned long bytecount;
-};
-
-#define PTRACE_LDT 54
-
-#endif
diff --git a/arch/um/sys-ppc/shared/sysdep/syscalls.h b/arch/um/sys-ppc/shared/sysdep/syscalls.h
deleted file mode 100644
index 1ff81552251c..000000000000
--- a/arch/um/sys-ppc/shared/sysdep/syscalls.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* 
- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-typedef long syscall_handler_t(unsigned long arg1, unsigned long arg2,
-			       unsigned long arg3, unsigned long arg4,
-			       unsigned long arg5, unsigned long arg6);
-
-#define EXECUTE_SYSCALL(syscall, regs) \
-        (*sys_call_table[syscall])(UM_SYSCALL_ARG1(&regs), \
-			           UM_SYSCALL_ARG2(&regs), \
-				   UM_SYSCALL_ARG3(&regs), \
-				   UM_SYSCALL_ARG4(&regs), \
-				   UM_SYSCALL_ARG5(&regs), \
-				   UM_SYSCALL_ARG6(&regs))
-
-extern syscall_handler_t sys_mincore;
-extern syscall_handler_t sys_madvise;
-
-/* old_mmap needs the correct prototype since syscall_kern.c includes
- * this file.
- */
-int old_mmap(unsigned long addr, unsigned long len,
-	     unsigned long prot, unsigned long flags,
-	     unsigned long fd, unsigned long offset);
-
-#define ARCH_SYSCALLS \
-	[ __NR_modify_ldt ] = sys_ni_syscall, \
-	[ __NR_pciconfig_read ] = sys_ni_syscall, \
-	[ __NR_pciconfig_write ] = sys_ni_syscall, \
-	[ __NR_pciconfig_iobase ] = sys_ni_syscall, \
-	[ __NR_pivot_root ] = sys_ni_syscall, \
-	[ __NR_multiplexer ] = sys_ni_syscall, \
-	[ __NR_mmap ] = old_mmap, \
-	[ __NR_madvise ] = sys_madvise, \
-	[ __NR_mincore ] = sys_mincore, \
-	[ __NR_iopl ] = (syscall_handler_t *) sys_ni_syscall, \
-	[ __NR_utimes ] = (syscall_handler_t *) sys_utimes, \
-	[ __NR_fadvise64 ] = (syscall_handler_t *) sys_fadvise64,
-
-#define LAST_ARCH_SYSCALL __NR_fadvise64
-
diff --git a/arch/um/sys-ppc/sigcontext.c b/arch/um/sys-ppc/sigcontext.c
deleted file mode 100644
index aac6c83fe44e..000000000000
--- a/arch/um/sys-ppc/sigcontext.c
+++ /dev/null
@@ -1,4 +0,0 @@
-#include "asm/ptrace.h"
-#include "asm/sigcontext.h"
-#include <sysdep/ptrace.h>
-
diff --git a/arch/um/sys-ppc/sysrq.c b/arch/um/sys-ppc/sysrq.c
deleted file mode 100644
index 1ff1ad7f27da..000000000000
--- a/arch/um/sys-ppc/sysrq.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/* 
- * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk)
- * Licensed under the GPL
- */
-
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include "asm/ptrace.h"
-#include "sysrq.h"
-
-void show_regs(struct pt_regs_subarch *regs)
-{
-	printk("\n");
-	show_regs_print_info(KERN_DEFAULT);
-
-	printk("show_regs(): insert regs here.\n");
-#if 0
-        printk("\n");
-        printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs, regs->eip,
-	       smp_processor_id());
-        if (regs->xcs & 3)
-                printk(" ESP: %04x:%08lx",0xffff & regs->xss, regs->esp);
-        printk(" EFLAGS: %08lx\n", regs->eflags);
-        printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
-                regs->eax, regs->ebx, regs->ecx, regs->edx);
-        printk("ESI: %08lx EDI: %08lx EBP: %08lx",
-                regs->esi, regs->edi, regs->ebp);
-        printk(" DS: %04x ES: %04x\n",
-                0xffff & regs->xds, 0xffff & regs->xes);
-#endif
-
-        show_trace(current, &regs->gpr[1]);
-}
diff --git a/arch/unicore32/include/asm/thread_info.h b/arch/unicore32/include/asm/thread_info.h
index 63e2839dfeb8..e79ad6d5b5b2 100644
--- a/arch/unicore32/include/asm/thread_info.h
+++ b/arch/unicore32/include/asm/thread_info.h
@@ -24,7 +24,6 @@
 #ifndef __ASSEMBLY__
 
 struct task_struct;
-struct exec_domain;
 
 #include <asm/types.h>
 
@@ -71,7 +70,6 @@ struct thread_info {
 						/* <0 => bug */
 	mm_segment_t		addr_limit;	/* address limit */
 	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
 	__u32			cpu;		/* cpu */
 	struct cpu_context_save	cpu_context;	/* cpu context */
 	__u32			syscall;	/* syscall number */
@@ -84,7 +82,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)						\
 {									\
 	.task		= &tsk,						\
-	.exec_domain	= &default_exec_domain,				\
 	.flags		= 0,						\
 	.preempt_count	= INIT_PREEMPT_COUNT,				\
 	.addr_limit	= KERNEL_DS,					\
diff --git a/arch/unicore32/kernel/asm-offsets.c b/arch/unicore32/kernel/asm-offsets.c
index ffcbe7536ca7..80d50c4651e3 100644
--- a/arch/unicore32/kernel/asm-offsets.c
+++ b/arch/unicore32/kernel/asm-offsets.c
@@ -42,7 +42,6 @@ int main(void)
 	DEFINE(TI_PREEMPT,	offsetof(struct thread_info, preempt_count));
 	DEFINE(TI_ADDR_LIMIT,	offsetof(struct thread_info, addr_limit));
 	DEFINE(TI_TASK,		offsetof(struct thread_info, task));
-	DEFINE(TI_EXEC_DOMAIN,	offsetof(struct thread_info, exec_domain));
 	DEFINE(TI_CPU,		offsetof(struct thread_info, cpu));
 	DEFINE(TI_CPU_SAVE,	offsetof(struct thread_info, cpu_context));
 	DEFINE(TI_USED_CP,	offsetof(struct thread_info, used_cp));
diff --git a/arch/unicore32/kernel/pci.c b/arch/unicore32/kernel/pci.c
index 374a055a8e6b..d45fa5f3e9c4 100644
--- a/arch/unicore32/kernel/pci.c
+++ b/arch/unicore32/kernel/pci.c
@@ -266,17 +266,10 @@ static int __init pci_common_init(void)
 	pci_fixup_irqs(pci_common_swizzle, pci_puv3_map_irq);
 
 	if (!pci_has_flag(PCI_PROBE_ONLY)) {
-		/*
-		 * Size the bridge windows.
-		 */
 		pci_bus_size_bridges(puv3_bus);
-
-		/*
-		 * Assign resources.
-		 */
 		pci_bus_assign_resources(puv3_bus);
 	}
-
+	pci_bus_add_devices(puv3_bus);
 	return 0;
 }
 subsys_initcall(pci_common_init);
diff --git a/arch/unicore32/kernel/signal.c b/arch/unicore32/kernel/signal.c
index d329f85766cc..4ae51cf15ade 100644
--- a/arch/unicore32/kernel/signal.c
+++ b/arch/unicore32/kernel/signal.c
@@ -330,13 +330,6 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs,
 	}
 
 	/*
-	 * translate the signal
-	 */
-	if (usig < 32 && thread->exec_domain
-			&& thread->exec_domain->signal_invmap)
-		usig = thread->exec_domain->signal_invmap[usig];
-
-	/*
 	 * Set up the stack frame
 	 */
 	if (ksig->ka.sa.sa_flags & SA_SIGINFO)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b7d31ca55187..6049d587599e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -87,7 +87,7 @@ config X86
 	select HAVE_ARCH_KMEMCHECK
 	select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP
 	select HAVE_USER_RETURN_NOTIFIER
-	select ARCH_BINFMT_ELF_RANDOMIZE_PIE
+	select ARCH_HAS_ELF_RANDOMIZE
 	select HAVE_ARCH_JUMP_LABEL
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select SPARSE_IRQ
@@ -99,6 +99,7 @@ config X86
 	select IRQ_FORCED_THREADING
 	select HAVE_BPF_JIT if X86_64
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	select HAVE_ARCH_HUGE_VMAP if X86_64 || (X86_32 && X86_PAE)
 	select ARCH_HAS_SG_CHAIN
 	select CLKEVT_I8253
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
@@ -177,7 +178,7 @@ config SBUS
 
 config NEED_DMA_MAP_STATE
 	def_bool y
-	depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG
+	depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG || SWIOTLB
 
 config NEED_SG_DMA_LENGTH
 	def_bool y
@@ -235,12 +236,10 @@ config ARCH_WANT_GENERAL_HUGETLB
 	def_bool y
 
 config ZONE_DMA32
-	bool
-	default X86_64
+	def_bool y if X86_64
 
 config AUDIT_ARCH
-	bool
-	default X86_64
+	def_bool y if X86_64
 
 config ARCH_SUPPORTS_OPTIMIZED_INLINING
 	def_bool y
@@ -279,6 +278,12 @@ config ARCH_SUPPORTS_UPROBES
 config FIX_EARLYCON_MEM
 	def_bool y
 
+config PGTABLE_LEVELS
+	int
+	default 4 if X86_64
+	default 3 if X86_PAE
+	default 2
+
 source "init/Kconfig"
 source "kernel/Kconfig.freezer"
 
@@ -716,17 +721,6 @@ endif #HYPERVISOR_GUEST
 config NO_BOOTMEM
 	def_bool y
 
-config MEMTEST
-	bool "Memtest"
-	---help---
-	  This option adds a kernel parameter 'memtest', which allows memtest
-	  to be set.
-	        memtest=0, mean disabled; -- default
-	        memtest=1, mean do 1 test pattern;
-	        ...
-	        memtest=4, mean do 4 test patterns.
-	  If you are unsure how to answer this question, answer N.
-
 source "arch/x86/Kconfig.cpu"
 
 config HPET_TIMER
@@ -891,7 +885,8 @@ config UP_LATE_INIT
        depends on !SMP && X86_LOCAL_APIC
 
 config X86_UP_APIC
-	bool "Local APIC support on uniprocessors"
+	bool "Local APIC support on uniprocessors" if !PCI_MSI
+	default PCI_MSI
 	depends on X86_32 && !SMP && !X86_32_NON_STANDARD
 	---help---
 	  A local APIC (Advanced Programmable Interrupt Controller) is an
@@ -903,10 +898,6 @@ config X86_UP_APIC
 	  performance counters), and the NMI watchdog which detects hard
 	  lockups.
 
-config X86_UP_APIC_MSI
-	def_bool y
-	select X86_UP_APIC if X86_32 && !SMP && !X86_32_NON_STANDARD && PCI_MSI
-
 config X86_UP_IOAPIC
 	bool "IO-APIC support on uniprocessors"
 	depends on X86_UP_APIC
@@ -925,8 +916,8 @@ config X86_LOCAL_APIC
 	select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
 
 config X86_IO_APIC
-	def_bool X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC
-	depends on X86_LOCAL_APIC
+	def_bool y
+	depends on X86_LOCAL_APIC || X86_UP_IOAPIC
 	select IRQ_DOMAIN
 
 config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
@@ -1145,10 +1136,10 @@ config MICROCODE_OLD_INTERFACE
 	depends on MICROCODE
 
 config MICROCODE_INTEL_EARLY
-	def_bool n
+	bool
 
 config MICROCODE_AMD_EARLY
-	def_bool n
+	bool
 
 config MICROCODE_EARLY
 	bool "Early load microcode"
@@ -1300,14 +1291,14 @@ config ARCH_DMA_ADDR_T_64BIT
 	def_bool y
 	depends on X86_64 || HIGHMEM64G
 
-config DIRECT_GBPAGES
-	bool "Enable 1GB pages for kernel pagetables" if EXPERT
-	default y
-	depends on X86_64
+config X86_DIRECT_GBPAGES
+	def_bool y
+	depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK
 	---help---
-	  Allow the kernel linear mapping to use 1GB pages on CPUs that
-	  support it. This can improve the kernel's performance a tiny bit by
-	  reducing TLB pressure. If in doubt, say "Y".
+	  Certain kernel features effectively disable kernel
+	  linear 1 GB mappings (even if the CPU otherwise
+	  supports them), so don't confuse the user by printing
+	  that we have them enabled.
 
 # Common NUMA Features
 config NUMA
@@ -1430,6 +1421,16 @@ config ILLEGAL_POINTER_VALUE
 
 source "mm/Kconfig"
 
+config X86_PMEM_LEGACY
+	bool "Support non-standard NVDIMMs and ADR protected memory"
+	help
+	  Treat memory marked using the non-standard e820 type of 12 as used
+	  by the Intel Sandy Bridge-EP reference BIOS as protected memory.
+	  The kernel will offer these regions to the 'pmem' driver so
+	  they can be used for persistent storage.
+
+	  Say Y if unsure.
+
 config HIGHPTE
 	bool "Allocate 3rd-level pagetables from highmem"
 	depends on HIGHMEM
@@ -1747,14 +1748,11 @@ config KEXEC_VERIFY_SIG
 	depends on KEXEC_FILE
 	---help---
 	  This option makes kernel signature verification mandatory for
-	  kexec_file_load() syscall. If kernel is signature can not be
-	  verified, kexec_file_load() will fail.
-
-	  This option enforces signature verification at generic level.
-	  One needs to enable signature verification for type of kernel
-	  image being loaded to make sure it works. For example, enable
-	  bzImage signature verification option to be able to load and
-	  verify signatures of bzImage. Otherwise kernel loading will fail.
+	  the kexec_file_load() syscall.
+
+	  In addition to that option, you need to enable signature
+	  verification for the corresponding kernel image type being
+	  loaded in order for this to work.
 
 config KEXEC_BZIMAGE_VERIFY_SIG
 	bool "Enable bzImage signature verification support"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 20028da8ae18..72484a645f05 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -43,10 +43,6 @@ config EARLY_PRINTK
 	  with klogd/syslogd or the X server. You should normally N here,
 	  unless you want to debug such a crash.
 
-config EARLY_PRINTK_INTEL_MID
-	bool "Early printk for Intel MID platform support"
-	depends on EARLY_PRINTK && X86_INTEL_MID
-
 config EARLY_PRINTK_DBGP
 	bool "Early printk via EHCI debug port"
 	depends on EARLY_PRINTK && PCI
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 5ba2d9ce82dc..2fda005bb334 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -63,7 +63,7 @@ ifeq ($(CONFIG_X86_32),y)
 				$(call cc-option,-fno-unit-at-a-time))
 
         # CPU-specific tuning. Anything which can be shared with UML should go here.
-        include $(srctree)/arch/x86/Makefile_32.cpu
+        include arch/x86/Makefile_32.cpu
         KBUILD_CFLAGS += $(cflags-y)
 
         # temporary until string.h is fixed
diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um
index 95eba554baf9..5b7e898ffd9a 100644
--- a/arch/x86/Makefile.um
+++ b/arch/x86/Makefile.um
@@ -18,7 +18,7 @@ LDS_EXTRA		:= -Ui386
 export LDS_EXTRA
 
 # First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y.
-include $(srctree)/arch/x86/Makefile_32.cpu
+include arch/x86/Makefile_32.cpu
 
 # prevent gcc from keeping the stack 16 byte aligned. Taken from i386.
 cflags-y += $(call cc-option,-mpreferred-stack-boundary=2)
diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index 7083c16cccba..d7b1f655b3ef 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -14,13 +14,6 @@
 static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
 		LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
 
-struct kaslr_setup_data {
-	__u64 next;
-	__u32 type;
-	__u32 len;
-	__u8 data[1];
-} kaslr_setup_data;
-
 #define I8254_PORT_CONTROL	0x43
 #define I8254_PORT_COUNTER0	0x40
 #define I8254_CMD_READBACK	0xC0
@@ -302,28 +295,7 @@ static unsigned long find_random_addr(unsigned long minimum,
 	return slots_fetch_random();
 }
 
-static void add_kaslr_setup_data(struct boot_params *params, __u8 enabled)
-{
-	struct setup_data *data;
-
-	kaslr_setup_data.type = SETUP_KASLR;
-	kaslr_setup_data.len = 1;
-	kaslr_setup_data.next = 0;
-	kaslr_setup_data.data[0] = enabled;
-
-	data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
-
-	while (data && data->next)
-		data = (struct setup_data *)(unsigned long)data->next;
-
-	if (data)
-		data->next = (unsigned long)&kaslr_setup_data;
-	else
-		params->hdr.setup_data = (unsigned long)&kaslr_setup_data;
-
-}
-
-unsigned char *choose_kernel_location(struct boot_params *params,
+unsigned char *choose_kernel_location(struct boot_params *boot_params,
 				      unsigned char *input,
 				      unsigned long input_size,
 				      unsigned char *output,
@@ -335,17 +307,16 @@ unsigned char *choose_kernel_location(struct boot_params *params,
 #ifdef CONFIG_HIBERNATION
 	if (!cmdline_find_option_bool("kaslr")) {
 		debug_putstr("KASLR disabled by default...\n");
-		add_kaslr_setup_data(params, 0);
 		goto out;
 	}
 #else
 	if (cmdline_find_option_bool("nokaslr")) {
 		debug_putstr("KASLR disabled by cmdline...\n");
-		add_kaslr_setup_data(params, 0);
 		goto out;
 	}
 #endif
-	add_kaslr_setup_data(params, 1);
+
+	boot_params->hdr.loadflags |= KASLR_FLAG;
 
 	/* Record the various known unsafe memory ranges. */
 	mem_avoid_init((unsigned long)input, input_size,
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 1d7fbbcc196d..8ef964ddc18e 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -29,6 +29,7 @@
 #include <asm/page_types.h>
 #include <asm/boot.h>
 #include <asm/asm-offsets.h>
+#include <asm/bootparam.h>
 
 	__HEAD
 ENTRY(startup_32)
@@ -102,7 +103,7 @@ preferred_addr:
 	 * Test KEEP_SEGMENTS flag to see if the bootloader is asking
 	 * us to not reload segments
 	 */
-	testb	$(1<<6), BP_loadflags(%esi)
+	testb	$KEEP_SEGMENTS, BP_loadflags(%esi)
 	jnz	1f
 
 	cli
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 6b1766c6c082..b0c0d16ef58d 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -31,6 +31,7 @@
 #include <asm/msr.h>
 #include <asm/processor-flags.h>
 #include <asm/asm-offsets.h>
+#include <asm/bootparam.h>
 
 	__HEAD
 	.code32
@@ -46,7 +47,7 @@ ENTRY(startup_32)
 	 * Test KEEP_SEGMENTS flag to see if the bootloader is asking
 	 * us to not reload segments
 	 */
-	testb $(1<<6), BP_loadflags(%esi)
+	testb $KEEP_SEGMENTS, BP_loadflags(%esi)
 	jnz 1f
 
 	cli
@@ -164,7 +165,7 @@ ENTRY(startup_32)
 	/* After gdt is loaded */
 	xorl	%eax, %eax
 	lldt	%ax
-	movl    $0x20, %eax
+	movl    $__BOOT_TSS, %eax
 	ltr	%ax
 
 	/*
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 5903089c818f..a107b935e22f 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -377,6 +377,9 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
 
 	real_mode = rmode;
 
+	/* Clear it for solely in-kernel use */
+	real_mode->hdr.loadflags &= ~KASLR_FLAG;
+
 	sanitize_boot_params(real_mode);
 
 	if (real_mode->screen_info.orig_video_mode == 7) {
@@ -401,8 +404,7 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
 	 * the entire decompressed kernel plus relocation table, or the
 	 * entire decompressed kernel plus .bss and .brk sections.
 	 */
-	output = choose_kernel_location(real_mode, input_data, input_len,
-					output,
+	output = choose_kernel_location(real_mode, input_data, input_len, output,
 					output_len > run_size ? output_len
 							      : run_size);
 
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index ee3576b2666b..89dd0d78013a 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -57,7 +57,7 @@ int cmdline_find_option_bool(const char *option);
 
 #if CONFIG_RANDOMIZE_BASE
 /* aslr.c */
-unsigned char *choose_kernel_location(struct boot_params *params,
+unsigned char *choose_kernel_location(struct boot_params *boot_params,
 				      unsigned char *input,
 				      unsigned long input_size,
 				      unsigned char *output,
@@ -66,7 +66,7 @@ unsigned char *choose_kernel_location(struct boot_params *params,
 bool has_cpuflag(int flag);
 #else
 static inline
-unsigned char *choose_kernel_location(struct boot_params *params,
+unsigned char *choose_kernel_location(struct boot_params *boot_params,
 				      unsigned char *input,
 				      unsigned long input_size,
 				      unsigned char *output,
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 493f3fd9f139..318b8465d302 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -30,7 +30,7 @@ int strcmp(const char *str1, const char *str2)
 	int delta = 0;
 
 	while (*s1 || *s2) {
-		delta = *s2 - *s1;
+		delta = *s1 - *s2;
 		if (delta)
 			return delta;
 		s1++;
diff --git a/arch/x86/boot/video-mode.c b/arch/x86/boot/video-mode.c
index 748e8d06290a..aa8a96b052e3 100644
--- a/arch/x86/boot/video-mode.c
+++ b/arch/x86/boot/video-mode.c
@@ -22,10 +22,8 @@
 /*
  * Common variables
  */
-int adapter;			/* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */
-u16 video_segment;
+int adapter;		/* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */
 int force_x, force_y;	/* Don't query the BIOS for cols/rows */
-
 int do_restore;		/* Screen contents changed during mode flip */
 int graphic_mode;	/* Graphic mode with linear frame buffer */
 
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index 43eda284d27f..05111bb8d018 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -17,6 +17,8 @@
 #include "video.h"
 #include "vesa.h"
 
+static u16 video_segment;
+
 static void store_cursor_position(void)
 {
 	struct biosregs ireg, oreg;
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h
index 0bb25491262d..b54e0328c449 100644
--- a/arch/x86/boot/video.h
+++ b/arch/x86/boot/video.h
@@ -91,7 +91,6 @@ int mode_defined(u16 mode);	/* video.c */
 #define ADAPTER_VGA	2
 
 extern int adapter;
-extern u16 video_segment;
 extern int force_x, force_y;	/* Don't query the BIOS for cols/rows */
 extern int do_restore;		/* Restore screen contents */
 extern int graphic_mode;	/* Graphics mode with linear frame buffer */
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 419819d6dab3..aaa1118bf01e 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -248,7 +248,7 @@ CONFIG_USB=y
 CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
 CONFIG_USB_MON=y
 CONFIG_USB_EHCI_HCD=y
-# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_EHCI_TT_NEWSCHED=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_UHCI_HCD=y
 CONFIG_USB_PRINTER=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 4c311ddd973b..315b86106572 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -243,7 +243,7 @@ CONFIG_USB=y
 CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
 CONFIG_USB_MON=y
 CONFIG_USB_EHCI_HCD=y
-# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_EHCI_TT_NEWSCHED=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_UHCI_HCD=y
 CONFIG_USB_PRINTER=y
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 947c6bf52c33..112cefacf2af 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -797,7 +797,9 @@ static int rfc4106_init(struct crypto_tfm *tfm)
 		PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN);
 	struct crypto_aead *cryptd_child;
 	struct aesni_rfc4106_gcm_ctx *child_ctx;
-	cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", 0, 0);
+	cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni",
+				       CRYPTO_ALG_INTERNAL,
+				       CRYPTO_ALG_INTERNAL);
 	if (IS_ERR(cryptd_tfm))
 		return PTR_ERR(cryptd_tfm);
 
@@ -890,15 +892,12 @@ out_free_ablkcipher:
 	return ret;
 }
 
-static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
-						   unsigned int key_len)
+static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key,
+				  unsigned int key_len)
 {
 	int ret = 0;
-	struct crypto_tfm *tfm = crypto_aead_tfm(parent);
-	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent);
-	struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
-	struct aesni_rfc4106_gcm_ctx *child_ctx =
-                                 aesni_rfc4106_gcm_ctx_get(cryptd_child);
+	struct crypto_tfm *tfm = crypto_aead_tfm(aead);
+	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(aead);
 	u8 *new_key_align, *new_key_mem = NULL;
 
 	if (key_len < 4) {
@@ -943,20 +942,31 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
 		goto exit;
 	}
 	ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
-	memcpy(child_ctx, ctx, sizeof(*ctx));
 exit:
 	kfree(new_key_mem);
 	return ret;
 }
 
-/* This is the Integrity Check Value (aka the authentication tag length and can
- * be 8, 12 or 16 bytes long. */
-static int rfc4106_set_authsize(struct crypto_aead *parent,
-				unsigned int authsize)
+static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
+			   unsigned int key_len)
 {
 	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent);
-	struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
+	struct crypto_aead *child = cryptd_aead_child(ctx->cryptd_tfm);
+	struct aesni_rfc4106_gcm_ctx *c_ctx = aesni_rfc4106_gcm_ctx_get(child);
+	struct cryptd_aead *cryptd_tfm = ctx->cryptd_tfm;
+	int ret;
 
+	ret = crypto_aead_setkey(child, key, key_len);
+	if (!ret) {
+		memcpy(ctx, c_ctx, sizeof(*ctx));
+		ctx->cryptd_tfm = cryptd_tfm;
+	}
+	return ret;
+}
+
+static int common_rfc4106_set_authsize(struct crypto_aead *aead,
+				       unsigned int authsize)
+{
 	switch (authsize) {
 	case 8:
 	case 12:
@@ -965,51 +975,23 @@ static int rfc4106_set_authsize(struct crypto_aead *parent,
 	default:
 		return -EINVAL;
 	}
-	crypto_aead_crt(parent)->authsize = authsize;
-	crypto_aead_crt(cryptd_child)->authsize = authsize;
+	crypto_aead_crt(aead)->authsize = authsize;
 	return 0;
 }
 
-static int rfc4106_encrypt(struct aead_request *req)
-{
-	int ret;
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
-
-	if (!irq_fpu_usable()) {
-		struct aead_request *cryptd_req =
-			(struct aead_request *) aead_request_ctx(req);
-		memcpy(cryptd_req, req, sizeof(*req));
-		aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-		return crypto_aead_encrypt(cryptd_req);
-	} else {
-		struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
-		kernel_fpu_begin();
-		ret = cryptd_child->base.crt_aead.encrypt(req);
-		kernel_fpu_end();
-		return ret;
-	}
-}
-
-static int rfc4106_decrypt(struct aead_request *req)
+/* This is the Integrity Check Value (aka the authentication tag length and can
+ * be 8, 12 or 16 bytes long. */
+static int rfc4106_set_authsize(struct crypto_aead *parent,
+				unsigned int authsize)
 {
+	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent);
+	struct crypto_aead *child = cryptd_aead_child(ctx->cryptd_tfm);
 	int ret;
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
 
-	if (!irq_fpu_usable()) {
-		struct aead_request *cryptd_req =
-			(struct aead_request *) aead_request_ctx(req);
-		memcpy(cryptd_req, req, sizeof(*req));
-		aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-		return crypto_aead_decrypt(cryptd_req);
-	} else {
-		struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
-		kernel_fpu_begin();
-		ret = cryptd_child->base.crt_aead.decrypt(req);
-		kernel_fpu_end();
-		return ret;
-	}
+	ret = crypto_aead_setauthsize(child, authsize);
+	if (!ret)
+		crypto_aead_crt(parent)->authsize = authsize;
+	return ret;
 }
 
 static int __driver_rfc4106_encrypt(struct aead_request *req)
@@ -1155,7 +1137,7 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
 		src = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC);
 		if (!src)
 			return -ENOMEM;
-		assoc = (src + req->cryptlen + auth_tag_len);
+		assoc = (src + req->cryptlen);
 		scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0);
 		scatterwalk_map_and_copy(assoc, req->assoc, 0,
 			req->assoclen, 0);
@@ -1180,11 +1162,83 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
 		scatterwalk_done(&src_sg_walk, 0, 0);
 		scatterwalk_done(&assoc_sg_walk, 0, 0);
 	} else {
-		scatterwalk_map_and_copy(dst, req->dst, 0, req->cryptlen, 1);
+		scatterwalk_map_and_copy(dst, req->dst, 0, tempCipherLen, 1);
 		kfree(src);
 	}
 	return retval;
 }
+
+static int rfc4106_encrypt(struct aead_request *req)
+{
+	int ret;
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+
+	if (!irq_fpu_usable()) {
+		struct aead_request *cryptd_req =
+			(struct aead_request *) aead_request_ctx(req);
+
+		memcpy(cryptd_req, req, sizeof(*req));
+		aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
+		ret = crypto_aead_encrypt(cryptd_req);
+	} else {
+		kernel_fpu_begin();
+		ret = __driver_rfc4106_encrypt(req);
+		kernel_fpu_end();
+	}
+	return ret;
+}
+
+static int rfc4106_decrypt(struct aead_request *req)
+{
+	int ret;
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+
+	if (!irq_fpu_usable()) {
+		struct aead_request *cryptd_req =
+			(struct aead_request *) aead_request_ctx(req);
+
+		memcpy(cryptd_req, req, sizeof(*req));
+		aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
+		ret = crypto_aead_decrypt(cryptd_req);
+	} else {
+		kernel_fpu_begin();
+		ret = __driver_rfc4106_decrypt(req);
+		kernel_fpu_end();
+	}
+	return ret;
+}
+
+static int helper_rfc4106_encrypt(struct aead_request *req)
+{
+	int ret;
+
+	if (unlikely(!irq_fpu_usable())) {
+		WARN_ONCE(1, "__gcm-aes-aesni alg used in invalid context");
+		ret = -EINVAL;
+	} else {
+		kernel_fpu_begin();
+		ret = __driver_rfc4106_encrypt(req);
+		kernel_fpu_end();
+	}
+	return ret;
+}
+
+static int helper_rfc4106_decrypt(struct aead_request *req)
+{
+	int ret;
+
+	if (unlikely(!irq_fpu_usable())) {
+		WARN_ONCE(1, "__gcm-aes-aesni alg used in invalid context");
+		ret = -EINVAL;
+	} else {
+		kernel_fpu_begin();
+		ret = __driver_rfc4106_decrypt(req);
+		kernel_fpu_end();
+	}
+	return ret;
+}
 #endif
 
 static struct crypto_alg aesni_algs[] = { {
@@ -1210,7 +1264,7 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_name		= "__aes-aesni",
 	.cra_driver_name	= "__driver-aes-aesni",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= AES_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct crypto_aes_ctx) +
 				  AESNI_ALIGN - 1,
@@ -1229,7 +1283,8 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_name		= "__ecb-aes-aesni",
 	.cra_driver_name	= "__driver-ecb-aes-aesni",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= AES_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct crypto_aes_ctx) +
 				  AESNI_ALIGN - 1,
@@ -1249,7 +1304,8 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_name		= "__cbc-aes-aesni",
 	.cra_driver_name	= "__driver-cbc-aes-aesni",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= AES_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct crypto_aes_ctx) +
 				  AESNI_ALIGN - 1,
@@ -1313,7 +1369,8 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_name		= "__ctr-aes-aesni",
 	.cra_driver_name	= "__driver-ctr-aes-aesni",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= 1,
 	.cra_ctxsize		= sizeof(struct crypto_aes_ctx) +
 				  AESNI_ALIGN - 1,
@@ -1357,7 +1414,7 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_name		= "__gcm-aes-aesni",
 	.cra_driver_name	= "__driver-gcm-aes-aesni",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_AEAD,
+	.cra_flags		= CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= 1,
 	.cra_ctxsize		= sizeof(struct aesni_rfc4106_gcm_ctx) +
 				  AESNI_ALIGN,
@@ -1366,8 +1423,12 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_module		= THIS_MODULE,
 	.cra_u = {
 		.aead = {
-			.encrypt	= __driver_rfc4106_encrypt,
-			.decrypt	= __driver_rfc4106_decrypt,
+			.setkey		= common_rfc4106_set_key,
+			.setauthsize	= common_rfc4106_set_authsize,
+			.encrypt	= helper_rfc4106_encrypt,
+			.decrypt	= helper_rfc4106_decrypt,
+			.ivsize		= 8,
+			.maxauthsize	= 16,
 		},
 	},
 }, {
@@ -1423,7 +1484,8 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_name		= "__lrw-aes-aesni",
 	.cra_driver_name	= "__driver-lrw-aes-aesni",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= AES_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct aesni_lrw_ctx),
 	.cra_alignmask		= 0,
@@ -1444,7 +1506,8 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_name		= "__xts-aes-aesni",
 	.cra_driver_name	= "__driver-xts-aes-aesni",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= AES_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct aesni_xts_ctx),
 	.cra_alignmask		= 0,
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index 9a07fafe3831..baf0ac21ace5 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -343,7 +343,8 @@ static struct crypto_alg cmll_algs[10] = { {
 	.cra_name		= "__ecb-camellia-aesni-avx2",
 	.cra_driver_name	= "__driver-ecb-camellia-aesni-avx2",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct camellia_ctx),
 	.cra_alignmask		= 0,
@@ -362,7 +363,8 @@ static struct crypto_alg cmll_algs[10] = { {
 	.cra_name		= "__cbc-camellia-aesni-avx2",
 	.cra_driver_name	= "__driver-cbc-camellia-aesni-avx2",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct camellia_ctx),
 	.cra_alignmask		= 0,
@@ -381,7 +383,8 @@ static struct crypto_alg cmll_algs[10] = { {
 	.cra_name		= "__ctr-camellia-aesni-avx2",
 	.cra_driver_name	= "__driver-ctr-camellia-aesni-avx2",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= 1,
 	.cra_ctxsize		= sizeof(struct camellia_ctx),
 	.cra_alignmask		= 0,
@@ -401,7 +404,8 @@ static struct crypto_alg cmll_algs[10] = { {
 	.cra_name		= "__lrw-camellia-aesni-avx2",
 	.cra_driver_name	= "__driver-lrw-camellia-aesni-avx2",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct camellia_lrw_ctx),
 	.cra_alignmask		= 0,
@@ -424,7 +428,8 @@ static struct crypto_alg cmll_algs[10] = { {
 	.cra_name		= "__xts-camellia-aesni-avx2",
 	.cra_driver_name	= "__driver-xts-camellia-aesni-avx2",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct camellia_xts_ctx),
 	.cra_alignmask		= 0,
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index ed38d959add6..78818a1e73e3 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -335,7 +335,8 @@ static struct crypto_alg cmll_algs[10] = { {
 	.cra_name		= "__ecb-camellia-aesni",
 	.cra_driver_name	= "__driver-ecb-camellia-aesni",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct camellia_ctx),
 	.cra_alignmask		= 0,
@@ -354,7 +355,8 @@ static struct crypto_alg cmll_algs[10] = { {
 	.cra_name		= "__cbc-camellia-aesni",
 	.cra_driver_name	= "__driver-cbc-camellia-aesni",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct camellia_ctx),
 	.cra_alignmask		= 0,
@@ -373,7 +375,8 @@ static struct crypto_alg cmll_algs[10] = { {
 	.cra_name		= "__ctr-camellia-aesni",
 	.cra_driver_name	= "__driver-ctr-camellia-aesni",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= 1,
 	.cra_ctxsize		= sizeof(struct camellia_ctx),
 	.cra_alignmask		= 0,
@@ -393,7 +396,8 @@ static struct crypto_alg cmll_algs[10] = { {
 	.cra_name		= "__lrw-camellia-aesni",
 	.cra_driver_name	= "__driver-lrw-camellia-aesni",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct camellia_lrw_ctx),
 	.cra_alignmask		= 0,
@@ -416,7 +420,8 @@ static struct crypto_alg cmll_algs[10] = { {
 	.cra_name		= "__xts-camellia-aesni",
 	.cra_driver_name	= "__driver-xts-camellia-aesni",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct camellia_xts_ctx),
 	.cra_alignmask		= 0,
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index 60ada677a928..236c80974457 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -341,7 +341,8 @@ static struct crypto_alg cast5_algs[6] = { {
 	.cra_name		= "__ecb-cast5-avx",
 	.cra_driver_name	= "__driver-ecb-cast5-avx",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= CAST5_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct cast5_ctx),
 	.cra_alignmask		= 0,
@@ -360,7 +361,8 @@ static struct crypto_alg cast5_algs[6] = { {
 	.cra_name		= "__cbc-cast5-avx",
 	.cra_driver_name	= "__driver-cbc-cast5-avx",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= CAST5_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct cast5_ctx),
 	.cra_alignmask		= 0,
@@ -379,7 +381,8 @@ static struct crypto_alg cast5_algs[6] = { {
 	.cra_name		= "__ctr-cast5-avx",
 	.cra_driver_name	= "__driver-ctr-cast5-avx",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= 1,
 	.cra_ctxsize		= sizeof(struct cast5_ctx),
 	.cra_alignmask		= 0,
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index 0160f68a57ff..f448810ca4ac 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -372,7 +372,8 @@ static struct crypto_alg cast6_algs[10] = { {
 	.cra_name		= "__ecb-cast6-avx",
 	.cra_driver_name	= "__driver-ecb-cast6-avx",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= CAST6_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct cast6_ctx),
 	.cra_alignmask		= 0,
@@ -391,7 +392,8 @@ static struct crypto_alg cast6_algs[10] = { {
 	.cra_name		= "__cbc-cast6-avx",
 	.cra_driver_name	= "__driver-cbc-cast6-avx",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= CAST6_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct cast6_ctx),
 	.cra_alignmask		= 0,
@@ -410,7 +412,8 @@ static struct crypto_alg cast6_algs[10] = { {
 	.cra_name		= "__ctr-cast6-avx",
 	.cra_driver_name	= "__driver-ctr-cast6-avx",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= 1,
 	.cra_ctxsize		= sizeof(struct cast6_ctx),
 	.cra_alignmask		= 0,
@@ -430,7 +433,8 @@ static struct crypto_alg cast6_algs[10] = { {
 	.cra_name		= "__lrw-cast6-avx",
 	.cra_driver_name	= "__driver-lrw-cast6-avx",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= CAST6_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct cast6_lrw_ctx),
 	.cra_alignmask		= 0,
@@ -453,7 +457,8 @@ static struct crypto_alg cast6_algs[10] = { {
 	.cra_name		= "__xts-cast6-avx",
 	.cra_driver_name	= "__driver-xts-cast6-avx",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= CAST6_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct cast6_xts_ctx),
 	.cra_alignmask		= 0,
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 26d49ebae040..225be06edc80 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -178,7 +178,7 @@ continue_block:
 	## 2a) PROCESS FULL BLOCKS:
 	################################################################
 full_block:
-	movq    $128,%rax
+	movl    $128,%eax
 	lea     128*8*2(block_0), block_1
 	lea     128*8*3(block_0), block_2
 	add     $128*8*1, block_0
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 8253d85aa165..2079baf06bdd 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -154,7 +154,8 @@ static struct shash_alg ghash_alg = {
 		.cra_name		= "__ghash",
 		.cra_driver_name	= "__ghash-pclmulqdqni",
 		.cra_priority		= 0,
-		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH |
+					  CRYPTO_ALG_INTERNAL,
 		.cra_blocksize		= GHASH_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(struct ghash_ctx),
 		.cra_module		= THIS_MODULE,
@@ -261,7 +262,9 @@ static int ghash_async_init_tfm(struct crypto_tfm *tfm)
 	struct cryptd_ahash *cryptd_tfm;
 	struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 0, 0);
+	cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni",
+					CRYPTO_ALG_INTERNAL,
+					CRYPTO_ALG_INTERNAL);
 	if (IS_ERR(cryptd_tfm))
 		return PTR_ERR(cryptd_tfm);
 	ctx->cryptd_tfm = cryptd_tfm;
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index 432f1d76ceb8..6a85598931b5 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -232,7 +232,6 @@ static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr,
 
 	le128_to_be128((be128 *)walk->iv, &ctrblk);
 }
-EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit);
 
 static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
 					    struct blkcipher_desc *desc,
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index 437e47a4d302..2f63dc89e7a9 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -309,7 +309,8 @@ static struct crypto_alg srp_algs[10] = { {
 	.cra_name		= "__ecb-serpent-avx2",
 	.cra_driver_name	= "__driver-ecb-serpent-avx2",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct serpent_ctx),
 	.cra_alignmask		= 0,
@@ -329,7 +330,8 @@ static struct crypto_alg srp_algs[10] = { {
 	.cra_name		= "__cbc-serpent-avx2",
 	.cra_driver_name	= "__driver-cbc-serpent-avx2",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct serpent_ctx),
 	.cra_alignmask		= 0,
@@ -349,7 +351,8 @@ static struct crypto_alg srp_algs[10] = { {
 	.cra_name		= "__ctr-serpent-avx2",
 	.cra_driver_name	= "__driver-ctr-serpent-avx2",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= 1,
 	.cra_ctxsize		= sizeof(struct serpent_ctx),
 	.cra_alignmask		= 0,
@@ -370,7 +373,8 @@ static struct crypto_alg srp_algs[10] = { {
 	.cra_name		= "__lrw-serpent-avx2",
 	.cra_driver_name	= "__driver-lrw-serpent-avx2",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct serpent_lrw_ctx),
 	.cra_alignmask		= 0,
@@ -394,7 +398,8 @@ static struct crypto_alg srp_algs[10] = { {
 	.cra_name		= "__xts-serpent-avx2",
 	.cra_driver_name	= "__driver-xts-serpent-avx2",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct serpent_xts_ctx),
 	.cra_alignmask		= 0,
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index 7e217398b4eb..c8d478af8456 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -378,7 +378,8 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_name		= "__ecb-serpent-avx",
 	.cra_driver_name	= "__driver-ecb-serpent-avx",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct serpent_ctx),
 	.cra_alignmask		= 0,
@@ -397,7 +398,8 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_name		= "__cbc-serpent-avx",
 	.cra_driver_name	= "__driver-cbc-serpent-avx",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct serpent_ctx),
 	.cra_alignmask		= 0,
@@ -416,7 +418,8 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_name		= "__ctr-serpent-avx",
 	.cra_driver_name	= "__driver-ctr-serpent-avx",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= 1,
 	.cra_ctxsize		= sizeof(struct serpent_ctx),
 	.cra_alignmask		= 0,
@@ -436,7 +439,8 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_name		= "__lrw-serpent-avx",
 	.cra_driver_name	= "__driver-lrw-serpent-avx",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct serpent_lrw_ctx),
 	.cra_alignmask		= 0,
@@ -459,7 +463,8 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_name		= "__xts-serpent-avx",
 	.cra_driver_name	= "__driver-xts-serpent-avx",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct serpent_xts_ctx),
 	.cra_alignmask		= 0,
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index bf025adaea01..3643dd508f45 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -387,7 +387,8 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_name		= "__ecb-serpent-sse2",
 	.cra_driver_name	= "__driver-ecb-serpent-sse2",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct serpent_ctx),
 	.cra_alignmask		= 0,
@@ -406,7 +407,8 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_name		= "__cbc-serpent-sse2",
 	.cra_driver_name	= "__driver-cbc-serpent-sse2",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct serpent_ctx),
 	.cra_alignmask		= 0,
@@ -425,7 +427,8 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_name		= "__ctr-serpent-sse2",
 	.cra_driver_name	= "__driver-ctr-serpent-sse2",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= 1,
 	.cra_ctxsize		= sizeof(struct serpent_ctx),
 	.cra_alignmask		= 0,
@@ -445,7 +448,8 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_name		= "__lrw-serpent-sse2",
 	.cra_driver_name	= "__driver-lrw-serpent-sse2",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct serpent_lrw_ctx),
 	.cra_alignmask		= 0,
@@ -468,7 +472,8 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_name		= "__xts-serpent-sse2",
 	.cra_driver_name	= "__driver-xts-serpent-sse2",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct serpent_xts_ctx),
 	.cra_alignmask		= 0,
diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c
index fd9f6b035b16..e510b1c5d690 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha-mb/sha1_mb.c
@@ -694,7 +694,8 @@ static struct shash_alg sha1_mb_shash_alg = {
 		 * use ASYNC flag as some buffers in multi-buffer
 		 * algo may not have completed before hashing thread sleep
 		 */
-		.cra_flags	 = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_ASYNC,
+		.cra_flags	 = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_ASYNC |
+				   CRYPTO_ALG_INTERNAL,
 		.cra_blocksize	 = SHA1_BLOCK_SIZE,
 		.cra_module	 = THIS_MODULE,
 		.cra_list	 = LIST_HEAD_INIT(sha1_mb_shash_alg.base.cra_list),
@@ -770,7 +771,9 @@ static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm)
 	struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct mcryptd_hash_ctx *mctx;
 
-	mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb", 0, 0);
+	mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb",
+					  CRYPTO_ALG_INTERNAL,
+					  CRYPTO_ALG_INTERNAL);
 	if (IS_ERR(mcryptd_tfm))
 		return PTR_ERR(mcryptd_tfm);
 	mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
@@ -828,7 +831,7 @@ static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate)
 	while (!list_empty(&cstate->work_list)) {
 		rctx = list_entry(cstate->work_list.next,
 				struct mcryptd_hash_request_ctx, waiter);
-		if time_before(cur_time, rctx->tag.expire)
+		if (time_before(cur_time, rctx->tag.expire))
 			break;
 		kernel_fpu_begin();
 		sha_ctx = (struct sha1_hash_ctx *) sha1_ctx_mgr_flush(cstate->mgr);
diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c b/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c
index 4ca7e166a2aa..822acb5b464c 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c
+++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c
@@ -56,7 +56,7 @@
 void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state)
 {
 	unsigned int j;
-	state->unused_lanes = 0xF76543210;
+	state->unused_lanes = 0xF76543210ULL;
 	for (j = 0; j < 8; j++) {
 		state->lens[j] = 0xFFFFFFFF;
 		state->ldata[j].job_in_lane = NULL;
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 6c20fe04a738..33d1b9dc14cc 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -28,7 +28,7 @@
 #include <linux/cryptohash.h>
 #include <linux/types.h>
 #include <crypto/sha.h>
-#include <asm/byteorder.h>
+#include <crypto/sha1_base.h>
 #include <asm/i387.h>
 #include <asm/xcr.h>
 #include <asm/xsave.h>
@@ -44,132 +44,51 @@ asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
 #define SHA1_AVX2_BLOCK_OPTSIZE	4	/* optimal 4*64 bytes of SHA1 blocks */
 
 asmlinkage void sha1_transform_avx2(u32 *digest, const char *data,
-				unsigned int rounds);
+				    unsigned int rounds);
 #endif
 
-static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int);
-
-
-static int sha1_ssse3_init(struct shash_desc *desc)
-{
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-
-	*sctx = (struct sha1_state){
-		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
-	};
-
-	return 0;
-}
-
-static int __sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
-			       unsigned int len, unsigned int partial)
-{
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-	unsigned int done = 0;
-
-	sctx->count += len;
-
-	if (partial) {
-		done = SHA1_BLOCK_SIZE - partial;
-		memcpy(sctx->buffer + partial, data, done);
-		sha1_transform_asm(sctx->state, sctx->buffer, 1);
-	}
-
-	if (len - done >= SHA1_BLOCK_SIZE) {
-		const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
-
-		sha1_transform_asm(sctx->state, data + done, rounds);
-		done += rounds * SHA1_BLOCK_SIZE;
-	}
-
-	memcpy(sctx->buffer, data + done, len - done);
-
-	return 0;
-}
+static void (*sha1_transform_asm)(u32 *, const char *, unsigned int);
 
 static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
 			     unsigned int len)
 {
 	struct sha1_state *sctx = shash_desc_ctx(desc);
-	unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
-	int res;
 
-	/* Handle the fast case right here */
-	if (partial + len < SHA1_BLOCK_SIZE) {
-		sctx->count += len;
-		memcpy(sctx->buffer + partial, data, len);
+	if (!irq_fpu_usable() ||
+	    (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
+		return crypto_sha1_update(desc, data, len);
 
-		return 0;
-	}
+	/* make sure casting to sha1_block_fn() is safe */
+	BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0);
 
-	if (!irq_fpu_usable()) {
-		res = crypto_sha1_update(desc, data, len);
-	} else {
-		kernel_fpu_begin();
-		res = __sha1_ssse3_update(desc, data, len, partial);
-		kernel_fpu_end();
-	}
-
-	return res;
-}
-
-
-/* Add padding and return the message digest. */
-static int sha1_ssse3_final(struct shash_desc *desc, u8 *out)
-{
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-	unsigned int i, index, padlen;
-	__be32 *dst = (__be32 *)out;
-	__be64 bits;
-	static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
-
-	bits = cpu_to_be64(sctx->count << 3);
-
-	/* Pad out to 56 mod 64 and append length */
-	index = sctx->count % SHA1_BLOCK_SIZE;
-	padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
-	if (!irq_fpu_usable()) {
-		crypto_sha1_update(desc, padding, padlen);
-		crypto_sha1_update(desc, (const u8 *)&bits, sizeof(bits));
-	} else {
-		kernel_fpu_begin();
-		/* We need to fill a whole block for __sha1_ssse3_update() */
-		if (padlen <= 56) {
-			sctx->count += padlen;
-			memcpy(sctx->buffer + index, padding, padlen);
-		} else {
-			__sha1_ssse3_update(desc, padding, padlen, index);
-		}
-		__sha1_ssse3_update(desc, (const u8 *)&bits, sizeof(bits), 56);
-		kernel_fpu_end();
-	}
-
-	/* Store state in digest */
-	for (i = 0; i < 5; i++)
-		dst[i] = cpu_to_be32(sctx->state[i]);
-
-	/* Wipe context */
-	memset(sctx, 0, sizeof(*sctx));
+	kernel_fpu_begin();
+	sha1_base_do_update(desc, data, len,
+			    (sha1_block_fn *)sha1_transform_asm);
+	kernel_fpu_end();
 
 	return 0;
 }
 
-static int sha1_ssse3_export(struct shash_desc *desc, void *out)
+static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data,
+			      unsigned int len, u8 *out)
 {
-	struct sha1_state *sctx = shash_desc_ctx(desc);
+	if (!irq_fpu_usable())
+		return crypto_sha1_finup(desc, data, len, out);
 
-	memcpy(out, sctx, sizeof(*sctx));
+	kernel_fpu_begin();
+	if (len)
+		sha1_base_do_update(desc, data, len,
+				    (sha1_block_fn *)sha1_transform_asm);
+	sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_asm);
+	kernel_fpu_end();
 
-	return 0;
+	return sha1_base_finish(desc, out);
 }
 
-static int sha1_ssse3_import(struct shash_desc *desc, const void *in)
+/* Add padding and return the message digest. */
+static int sha1_ssse3_final(struct shash_desc *desc, u8 *out)
 {
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-
-	memcpy(sctx, in, sizeof(*sctx));
-
-	return 0;
+	return sha1_ssse3_finup(desc, NULL, 0, out);
 }
 
 #ifdef CONFIG_AS_AVX2
@@ -186,13 +105,11 @@ static void sha1_apply_transform_avx2(u32 *digest, const char *data,
 
 static struct shash_alg alg = {
 	.digestsize	=	SHA1_DIGEST_SIZE,
-	.init		=	sha1_ssse3_init,
+	.init		=	sha1_base_init,
 	.update		=	sha1_ssse3_update,
 	.final		=	sha1_ssse3_final,
-	.export		=	sha1_ssse3_export,
-	.import		=	sha1_ssse3_import,
+	.finup		=	sha1_ssse3_finup,
 	.descsize	=	sizeof(struct sha1_state),
-	.statesize	=	sizeof(struct sha1_state),
 	.base		=	{
 		.cra_name	=	"sha1",
 		.cra_driver_name=	"sha1-ssse3",
diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S
index 642f15687a0a..92b3b5d75ba9 100644
--- a/arch/x86/crypto/sha256-avx-asm.S
+++ b/arch/x86/crypto/sha256-avx-asm.S
@@ -96,10 +96,10 @@ SHUF_DC00 = %xmm12      # shuffle xDxC -> DC00
 BYTE_FLIP_MASK = %xmm13
 
 NUM_BLKS = %rdx   # 3rd arg
-CTX = %rsi        # 2nd arg
-INP = %rdi        # 1st arg
+INP = %rsi        # 2nd arg
+CTX = %rdi        # 1st arg
 
-SRND = %rdi       # clobbers INP
+SRND = %rsi       # clobbers INP
 c = %ecx
 d = %r8d
 e = %edx
@@ -342,8 +342,8 @@ a = TMP_
 
 ########################################################################
 ## void sha256_transform_avx(void *input_data, UINT32 digest[8], UINT64 num_blks)
-## arg 1 : pointer to input data
-## arg 2 : pointer to digest
+## arg 1 : pointer to digest
+## arg 2 : pointer to input data
 ## arg 3 : Num blocks
 ########################################################################
 .text
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
index 9e86944c539d..570ec5ec62d7 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -91,12 +91,12 @@ BYTE_FLIP_MASK = %ymm13
 X_BYTE_FLIP_MASK = %xmm13 # XMM version of BYTE_FLIP_MASK
 
 NUM_BLKS = %rdx	# 3rd arg
-CTX	= %rsi  # 2nd arg
-INP	= %rdi	# 1st arg
+INP	= %rsi  # 2nd arg
+CTX	= %rdi	# 1st arg
 c	= %ecx
 d	= %r8d
 e       = %edx	# clobbers NUM_BLKS
-y3	= %edi	# clobbers INP
+y3	= %esi	# clobbers INP
 
 
 TBL	= %rbp
@@ -523,8 +523,8 @@ STACK_SIZE	= _RSP      + _RSP_SIZE
 
 ########################################################################
 ## void sha256_transform_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks)
-## arg 1 : pointer to input data
-## arg 2 : pointer to digest
+## arg 1 : pointer to digest
+## arg 2 : pointer to input data
 ## arg 3 : Num blocks
 ########################################################################
 .text
diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S
index f833b74d902b..2cedc44e8121 100644
--- a/arch/x86/crypto/sha256-ssse3-asm.S
+++ b/arch/x86/crypto/sha256-ssse3-asm.S
@@ -88,10 +88,10 @@ SHUF_DC00 = %xmm11      # shuffle xDxC -> DC00
 BYTE_FLIP_MASK = %xmm12
 
 NUM_BLKS = %rdx   # 3rd arg
-CTX = %rsi        # 2nd arg
-INP = %rdi        # 1st arg
+INP = %rsi        # 2nd arg
+CTX = %rdi        # 1st arg
 
-SRND = %rdi       # clobbers INP
+SRND = %rsi       # clobbers INP
 c = %ecx
 d = %r8d
 e = %edx
@@ -348,8 +348,8 @@ a = TMP_
 
 ########################################################################
 ## void sha256_transform_ssse3(void *input_data, UINT32 digest[8], UINT64 num_blks)
-## arg 1 : pointer to input data
-## arg 2 : pointer to digest
+## arg 1 : pointer to digest
+## arg 2 : pointer to input data
 ## arg 3 : Num blocks
 ########################################################################
 .text
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index 8fad72f4dfd2..ccc338881ee8 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -36,195 +36,74 @@
 #include <linux/cryptohash.h>
 #include <linux/types.h>
 #include <crypto/sha.h>
-#include <asm/byteorder.h>
+#include <crypto/sha256_base.h>
 #include <asm/i387.h>
 #include <asm/xcr.h>
 #include <asm/xsave.h>
 #include <linux/string.h>
 
-asmlinkage void sha256_transform_ssse3(const char *data, u32 *digest,
-				     u64 rounds);
+asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data,
+				       u64 rounds);
 #ifdef CONFIG_AS_AVX
-asmlinkage void sha256_transform_avx(const char *data, u32 *digest,
+asmlinkage void sha256_transform_avx(u32 *digest, const char *data,
 				     u64 rounds);
 #endif
 #ifdef CONFIG_AS_AVX2
-asmlinkage void sha256_transform_rorx(const char *data, u32 *digest,
-				     u64 rounds);
+asmlinkage void sha256_transform_rorx(u32 *digest, const char *data,
+				      u64 rounds);
 #endif
 
-static asmlinkage void (*sha256_transform_asm)(const char *, u32 *, u64);
-
-
-static int sha256_ssse3_init(struct shash_desc *desc)
-{
-	struct sha256_state *sctx = shash_desc_ctx(desc);
-
-	sctx->state[0] = SHA256_H0;
-	sctx->state[1] = SHA256_H1;
-	sctx->state[2] = SHA256_H2;
-	sctx->state[3] = SHA256_H3;
-	sctx->state[4] = SHA256_H4;
-	sctx->state[5] = SHA256_H5;
-	sctx->state[6] = SHA256_H6;
-	sctx->state[7] = SHA256_H7;
-	sctx->count = 0;
-
-	return 0;
-}
-
-static int __sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
-			       unsigned int len, unsigned int partial)
-{
-	struct sha256_state *sctx = shash_desc_ctx(desc);
-	unsigned int done = 0;
-
-	sctx->count += len;
-
-	if (partial) {
-		done = SHA256_BLOCK_SIZE - partial;
-		memcpy(sctx->buf + partial, data, done);
-		sha256_transform_asm(sctx->buf, sctx->state, 1);
-	}
-
-	if (len - done >= SHA256_BLOCK_SIZE) {
-		const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
-
-		sha256_transform_asm(data + done, sctx->state, (u64) rounds);
-
-		done += rounds * SHA256_BLOCK_SIZE;
-	}
-
-	memcpy(sctx->buf, data + done, len - done);
-
-	return 0;
-}
+static void (*sha256_transform_asm)(u32 *, const char *, u64);
 
 static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
 			     unsigned int len)
 {
 	struct sha256_state *sctx = shash_desc_ctx(desc);
-	unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
-	int res;
 
-	/* Handle the fast case right here */
-	if (partial + len < SHA256_BLOCK_SIZE) {
-		sctx->count += len;
-		memcpy(sctx->buf + partial, data, len);
+	if (!irq_fpu_usable() ||
+	    (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
+		return crypto_sha256_update(desc, data, len);
 
-		return 0;
-	}
-
-	if (!irq_fpu_usable()) {
-		res = crypto_sha256_update(desc, data, len);
-	} else {
-		kernel_fpu_begin();
-		res = __sha256_ssse3_update(desc, data, len, partial);
-		kernel_fpu_end();
-	}
-
-	return res;
-}
+	/* make sure casting to sha256_block_fn() is safe */
+	BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
 
-
-/* Add padding and return the message digest. */
-static int sha256_ssse3_final(struct shash_desc *desc, u8 *out)
-{
-	struct sha256_state *sctx = shash_desc_ctx(desc);
-	unsigned int i, index, padlen;
-	__be32 *dst = (__be32 *)out;
-	__be64 bits;
-	static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
-
-	bits = cpu_to_be64(sctx->count << 3);
-
-	/* Pad out to 56 mod 64 and append length */
-	index = sctx->count % SHA256_BLOCK_SIZE;
-	padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index);
-
-	if (!irq_fpu_usable()) {
-		crypto_sha256_update(desc, padding, padlen);
-		crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits));
-	} else {
-		kernel_fpu_begin();
-		/* We need to fill a whole block for __sha256_ssse3_update() */
-		if (padlen <= 56) {
-			sctx->count += padlen;
-			memcpy(sctx->buf + index, padding, padlen);
-		} else {
-			__sha256_ssse3_update(desc, padding, padlen, index);
-		}
-		__sha256_ssse3_update(desc, (const u8 *)&bits,
-					sizeof(bits), 56);
-		kernel_fpu_end();
-	}
-
-	/* Store state in digest */
-	for (i = 0; i < 8; i++)
-		dst[i] = cpu_to_be32(sctx->state[i]);
-
-	/* Wipe context */
-	memset(sctx, 0, sizeof(*sctx));
+	kernel_fpu_begin();
+	sha256_base_do_update(desc, data, len,
+			      (sha256_block_fn *)sha256_transform_asm);
+	kernel_fpu_end();
 
 	return 0;
 }
 
-static int sha256_ssse3_export(struct shash_desc *desc, void *out)
+static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data,
+			      unsigned int len, u8 *out)
 {
-	struct sha256_state *sctx = shash_desc_ctx(desc);
+	if (!irq_fpu_usable())
+		return crypto_sha256_finup(desc, data, len, out);
 
-	memcpy(out, sctx, sizeof(*sctx));
+	kernel_fpu_begin();
+	if (len)
+		sha256_base_do_update(desc, data, len,
+				      (sha256_block_fn *)sha256_transform_asm);
+	sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_transform_asm);
+	kernel_fpu_end();
 
-	return 0;
+	return sha256_base_finish(desc, out);
 }
 
-static int sha256_ssse3_import(struct shash_desc *desc, const void *in)
-{
-	struct sha256_state *sctx = shash_desc_ctx(desc);
-
-	memcpy(sctx, in, sizeof(*sctx));
-
-	return 0;
-}
-
-static int sha224_ssse3_init(struct shash_desc *desc)
-{
-	struct sha256_state *sctx = shash_desc_ctx(desc);
-
-	sctx->state[0] = SHA224_H0;
-	sctx->state[1] = SHA224_H1;
-	sctx->state[2] = SHA224_H2;
-	sctx->state[3] = SHA224_H3;
-	sctx->state[4] = SHA224_H4;
-	sctx->state[5] = SHA224_H5;
-	sctx->state[6] = SHA224_H6;
-	sctx->state[7] = SHA224_H7;
-	sctx->count = 0;
-
-	return 0;
-}
-
-static int sha224_ssse3_final(struct shash_desc *desc, u8 *hash)
+/* Add padding and return the message digest. */
+static int sha256_ssse3_final(struct shash_desc *desc, u8 *out)
 {
-	u8 D[SHA256_DIGEST_SIZE];
-
-	sha256_ssse3_final(desc, D);
-
-	memcpy(hash, D, SHA224_DIGEST_SIZE);
-	memzero_explicit(D, SHA256_DIGEST_SIZE);
-
-	return 0;
+	return sha256_ssse3_finup(desc, NULL, 0, out);
 }
 
 static struct shash_alg algs[] = { {
 	.digestsize	=	SHA256_DIGEST_SIZE,
-	.init		=	sha256_ssse3_init,
+	.init		=	sha256_base_init,
 	.update		=	sha256_ssse3_update,
 	.final		=	sha256_ssse3_final,
-	.export		=	sha256_ssse3_export,
-	.import		=	sha256_ssse3_import,
+	.finup		=	sha256_ssse3_finup,
 	.descsize	=	sizeof(struct sha256_state),
-	.statesize	=	sizeof(struct sha256_state),
 	.base		=	{
 		.cra_name	=	"sha256",
 		.cra_driver_name =	"sha256-ssse3",
@@ -235,13 +114,11 @@ static struct shash_alg algs[] = { {
 	}
 }, {
 	.digestsize	=	SHA224_DIGEST_SIZE,
-	.init		=	sha224_ssse3_init,
+	.init		=	sha224_base_init,
 	.update		=	sha256_ssse3_update,
-	.final		=	sha224_ssse3_final,
-	.export		=	sha256_ssse3_export,
-	.import		=	sha256_ssse3_import,
+	.final		=	sha256_ssse3_final,
+	.finup		=	sha256_ssse3_finup,
 	.descsize	=	sizeof(struct sha256_state),
-	.statesize	=	sizeof(struct sha256_state),
 	.base		=	{
 		.cra_name	=	"sha224",
 		.cra_driver_name =	"sha224-ssse3",
diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S
index 974dde9bc6cd..565274d6a641 100644
--- a/arch/x86/crypto/sha512-avx-asm.S
+++ b/arch/x86/crypto/sha512-avx-asm.S
@@ -54,9 +54,9 @@
 
 # Virtual Registers
 # ARG1
-msg	= %rdi
+digest	= %rdi
 # ARG2
-digest	= %rsi
+msg	= %rsi
 # ARG3
 msglen	= %rdx
 T1	= %rcx
@@ -271,7 +271,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
 .endm
 
 ########################################################################
-# void sha512_transform_avx(const void* M, void* D, u64 L)
+# void sha512_transform_avx(void* D, const void* M, u64 L)
 # Purpose: Updates the SHA512 digest stored at D with the message stored in M.
 # The size of the message pointed to by M must be an integer multiple of SHA512
 # message blocks.
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S
index 568b96105f5c..a4771dcd1fcf 100644
--- a/arch/x86/crypto/sha512-avx2-asm.S
+++ b/arch/x86/crypto/sha512-avx2-asm.S
@@ -70,9 +70,9 @@ XFER  = YTMP0
 BYTE_FLIP_MASK  = %ymm9
 
 # 1st arg
-INP         = %rdi
+CTX         = %rdi
 # 2nd arg
-CTX         = %rsi
+INP         = %rsi
 # 3rd arg
 NUM_BLKS    = %rdx
 
@@ -562,7 +562,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
 .endm
 
 ########################################################################
-# void sha512_transform_rorx(const void* M, void* D, uint64_t L)#
+# void sha512_transform_rorx(void* D, const void* M, uint64_t L)#
 # Purpose: Updates the SHA512 digest stored at D with the message stored in M.
 # The size of the message pointed to by M must be an integer multiple of SHA512
 #   message blocks.
diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S
index fb56855d51f5..e610e29cbc81 100644
--- a/arch/x86/crypto/sha512-ssse3-asm.S
+++ b/arch/x86/crypto/sha512-ssse3-asm.S
@@ -53,9 +53,9 @@
 
 # Virtual Registers
 # ARG1
-msg =		%rdi
+digest =	%rdi
 # ARG2
-digest =	%rsi
+msg =		%rsi
 # ARG3
 msglen =	%rdx
 T1 =		%rcx
@@ -269,7 +269,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
 .endm
 
 ########################################################################
-# void sha512_transform_ssse3(const void* M, void* D, u64 L)#
+# void sha512_transform_ssse3(void* D, const void* M, u64 L)#
 # Purpose: Updates the SHA512 digest stored at D with the message stored in M.
 # The size of the message pointed to by M must be an integer multiple of SHA512
 #   message blocks.
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index 0b6af26832bf..d9fa4c1e063f 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -34,205 +34,75 @@
 #include <linux/cryptohash.h>
 #include <linux/types.h>
 #include <crypto/sha.h>
-#include <asm/byteorder.h>
+#include <crypto/sha512_base.h>
 #include <asm/i387.h>
 #include <asm/xcr.h>
 #include <asm/xsave.h>
 
 #include <linux/string.h>
 
-asmlinkage void sha512_transform_ssse3(const char *data, u64 *digest,
-				     u64 rounds);
+asmlinkage void sha512_transform_ssse3(u64 *digest, const char *data,
+				       u64 rounds);
 #ifdef CONFIG_AS_AVX
-asmlinkage void sha512_transform_avx(const char *data, u64 *digest,
+asmlinkage void sha512_transform_avx(u64 *digest, const char *data,
 				     u64 rounds);
 #endif
 #ifdef CONFIG_AS_AVX2
-asmlinkage void sha512_transform_rorx(const char *data, u64 *digest,
-				     u64 rounds);
+asmlinkage void sha512_transform_rorx(u64 *digest, const char *data,
+				      u64 rounds);
 #endif
 
-static asmlinkage void (*sha512_transform_asm)(const char *, u64 *, u64);
-
-
-static int sha512_ssse3_init(struct shash_desc *desc)
-{
-	struct sha512_state *sctx = shash_desc_ctx(desc);
-
-	sctx->state[0] = SHA512_H0;
-	sctx->state[1] = SHA512_H1;
-	sctx->state[2] = SHA512_H2;
-	sctx->state[3] = SHA512_H3;
-	sctx->state[4] = SHA512_H4;
-	sctx->state[5] = SHA512_H5;
-	sctx->state[6] = SHA512_H6;
-	sctx->state[7] = SHA512_H7;
-	sctx->count[0] = sctx->count[1] = 0;
-
-	return 0;
-}
+static void (*sha512_transform_asm)(u64 *, const char *, u64);
 
-static int __sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
-			       unsigned int len, unsigned int partial)
+static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
+			       unsigned int len)
 {
 	struct sha512_state *sctx = shash_desc_ctx(desc);
-	unsigned int done = 0;
-
-	sctx->count[0] += len;
-	if (sctx->count[0] < len)
-		sctx->count[1]++;
 
-	if (partial) {
-		done = SHA512_BLOCK_SIZE - partial;
-		memcpy(sctx->buf + partial, data, done);
-		sha512_transform_asm(sctx->buf, sctx->state, 1);
-	}
-
-	if (len - done >= SHA512_BLOCK_SIZE) {
-		const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
+	if (!irq_fpu_usable() ||
+	    (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE)
+		return crypto_sha512_update(desc, data, len);
 
-		sha512_transform_asm(data + done, sctx->state, (u64) rounds);
-
-		done += rounds * SHA512_BLOCK_SIZE;
-	}
+	/* make sure casting to sha512_block_fn() is safe */
+	BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0);
 
-	memcpy(sctx->buf, data + done, len - done);
+	kernel_fpu_begin();
+	sha512_base_do_update(desc, data, len,
+			      (sha512_block_fn *)sha512_transform_asm);
+	kernel_fpu_end();
 
 	return 0;
 }
 
-static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
-			     unsigned int len)
+static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data,
+			      unsigned int len, u8 *out)
 {
-	struct sha512_state *sctx = shash_desc_ctx(desc);
-	unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
-	int res;
-
-	/* Handle the fast case right here */
-	if (partial + len < SHA512_BLOCK_SIZE) {
-		sctx->count[0] += len;
-		if (sctx->count[0] < len)
-			sctx->count[1]++;
-		memcpy(sctx->buf + partial, data, len);
-
-		return 0;
-	}
+	if (!irq_fpu_usable())
+		return crypto_sha512_finup(desc, data, len, out);
 
-	if (!irq_fpu_usable()) {
-		res = crypto_sha512_update(desc, data, len);
-	} else {
-		kernel_fpu_begin();
-		res = __sha512_ssse3_update(desc, data, len, partial);
-		kernel_fpu_end();
-	}
+	kernel_fpu_begin();
+	if (len)
+		sha512_base_do_update(desc, data, len,
+				      (sha512_block_fn *)sha512_transform_asm);
+	sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_transform_asm);
+	kernel_fpu_end();
 
-	return res;
+	return sha512_base_finish(desc, out);
 }
 
-
 /* Add padding and return the message digest. */
 static int sha512_ssse3_final(struct shash_desc *desc, u8 *out)
 {
-	struct sha512_state *sctx = shash_desc_ctx(desc);
-	unsigned int i, index, padlen;
-	__be64 *dst = (__be64 *)out;
-	__be64 bits[2];
-	static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
-
-	/* save number of bits */
-	bits[1] = cpu_to_be64(sctx->count[0] << 3);
-	bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
-
-	/* Pad out to 112 mod 128 and append length */
-	index = sctx->count[0] & 0x7f;
-	padlen = (index < 112) ? (112 - index) : ((128+112) - index);
-
-	if (!irq_fpu_usable()) {
-		crypto_sha512_update(desc, padding, padlen);
-		crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits));
-	} else {
-		kernel_fpu_begin();
-		/* We need to fill a whole block for __sha512_ssse3_update() */
-		if (padlen <= 112) {
-			sctx->count[0] += padlen;
-			if (sctx->count[0] < padlen)
-				sctx->count[1]++;
-			memcpy(sctx->buf + index, padding, padlen);
-		} else {
-			__sha512_ssse3_update(desc, padding, padlen, index);
-		}
-		__sha512_ssse3_update(desc, (const u8 *)&bits,
-					sizeof(bits), 112);
-		kernel_fpu_end();
-	}
-
-	/* Store state in digest */
-	for (i = 0; i < 8; i++)
-		dst[i] = cpu_to_be64(sctx->state[i]);
-
-	/* Wipe context */
-	memset(sctx, 0, sizeof(*sctx));
-
-	return 0;
-}
-
-static int sha512_ssse3_export(struct shash_desc *desc, void *out)
-{
-	struct sha512_state *sctx = shash_desc_ctx(desc);
-
-	memcpy(out, sctx, sizeof(*sctx));
-
-	return 0;
-}
-
-static int sha512_ssse3_import(struct shash_desc *desc, const void *in)
-{
-	struct sha512_state *sctx = shash_desc_ctx(desc);
-
-	memcpy(sctx, in, sizeof(*sctx));
-
-	return 0;
-}
-
-static int sha384_ssse3_init(struct shash_desc *desc)
-{
-	struct sha512_state *sctx = shash_desc_ctx(desc);
-
-	sctx->state[0] = SHA384_H0;
-	sctx->state[1] = SHA384_H1;
-	sctx->state[2] = SHA384_H2;
-	sctx->state[3] = SHA384_H3;
-	sctx->state[4] = SHA384_H4;
-	sctx->state[5] = SHA384_H5;
-	sctx->state[6] = SHA384_H6;
-	sctx->state[7] = SHA384_H7;
-
-	sctx->count[0] = sctx->count[1] = 0;
-
-	return 0;
-}
-
-static int sha384_ssse3_final(struct shash_desc *desc, u8 *hash)
-{
-	u8 D[SHA512_DIGEST_SIZE];
-
-	sha512_ssse3_final(desc, D);
-
-	memcpy(hash, D, SHA384_DIGEST_SIZE);
-	memzero_explicit(D, SHA512_DIGEST_SIZE);
-
-	return 0;
+	return sha512_ssse3_finup(desc, NULL, 0, out);
 }
 
 static struct shash_alg algs[] = { {
 	.digestsize	=	SHA512_DIGEST_SIZE,
-	.init		=	sha512_ssse3_init,
+	.init		=	sha512_base_init,
 	.update		=	sha512_ssse3_update,
 	.final		=	sha512_ssse3_final,
-	.export		=	sha512_ssse3_export,
-	.import		=	sha512_ssse3_import,
+	.finup		=	sha512_ssse3_finup,
 	.descsize	=	sizeof(struct sha512_state),
-	.statesize	=	sizeof(struct sha512_state),
 	.base		=	{
 		.cra_name	=	"sha512",
 		.cra_driver_name =	"sha512-ssse3",
@@ -243,13 +113,11 @@ static struct shash_alg algs[] = { {
 	}
 },  {
 	.digestsize	=	SHA384_DIGEST_SIZE,
-	.init		=	sha384_ssse3_init,
+	.init		=	sha384_base_init,
 	.update		=	sha512_ssse3_update,
-	.final		=	sha384_ssse3_final,
-	.export		=	sha512_ssse3_export,
-	.import		=	sha512_ssse3_import,
+	.final		=	sha512_ssse3_final,
+	.finup		=	sha512_ssse3_finup,
 	.descsize	=	sizeof(struct sha512_state),
-	.statesize	=	sizeof(struct sha512_state),
 	.base		=	{
 		.cra_name	=	"sha384",
 		.cra_driver_name =	"sha384-ssse3",
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S
index a039d21986a2..a350c990dc86 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64.S
@@ -264,7 +264,7 @@ ENTRY(twofish_enc_blk)
 	movq	R1,	8(%rsi)
 
 	popq	R1
-	movq	$1,%rax
+	movl	$1,%eax
 	ret
 ENDPROC(twofish_enc_blk)
 
@@ -316,6 +316,6 @@ ENTRY(twofish_dec_blk)
 	movq	R1,	8(%rsi)
 
 	popq	R1
-	movq	$1,%rax
+	movl	$1,%eax
 	ret
 ENDPROC(twofish_dec_blk)
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index 1ac531ea9bcc..b5e2d5651851 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -340,7 +340,8 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_name		= "__ecb-twofish-avx",
 	.cra_driver_name	= "__driver-ecb-twofish-avx",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= TF_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct twofish_ctx),
 	.cra_alignmask		= 0,
@@ -359,7 +360,8 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_name		= "__cbc-twofish-avx",
 	.cra_driver_name	= "__driver-cbc-twofish-avx",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= TF_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct twofish_ctx),
 	.cra_alignmask		= 0,
@@ -378,7 +380,8 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_name		= "__ctr-twofish-avx",
 	.cra_driver_name	= "__driver-ctr-twofish-avx",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= 1,
 	.cra_ctxsize		= sizeof(struct twofish_ctx),
 	.cra_alignmask		= 0,
@@ -398,7 +401,8 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_name		= "__lrw-twofish-avx",
 	.cra_driver_name	= "__driver-lrw-twofish-avx",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= TF_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct twofish_lrw_ctx),
 	.cra_alignmask		= 0,
@@ -421,7 +425,8 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_name		= "__xts-twofish-avx",
 	.cra_driver_name	= "__driver-xts-twofish-avx",
 	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				  CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= TF_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct twofish_xts_ctx),
 	.cra_alignmask		= 0,
diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile
index e785b422b766..bb635c641869 100644
--- a/arch/x86/ia32/Makefile
+++ b/arch/x86/ia32/Makefile
@@ -3,7 +3,6 @@
 #
 
 obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o
-obj-$(CONFIG_IA32_EMULATION) += nosyscall.o syscall_ia32.o
 
 obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
 
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index d0165c9a2932..c81d35e6c7f1 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -161,8 +161,7 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
 }
 
 static int ia32_restore_sigcontext(struct pt_regs *regs,
-				   struct sigcontext_ia32 __user *sc,
-				   unsigned int *pax)
+				   struct sigcontext_ia32 __user *sc)
 {
 	unsigned int tmpflags, err = 0;
 	void __user *buf;
@@ -184,7 +183,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 		RELOAD_SEG(es);
 
 		COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
-		COPY(dx); COPY(cx); COPY(ip);
+		COPY(dx); COPY(cx); COPY(ip); COPY(ax);
 		/* Don't touch extended registers */
 
 		COPY_SEG_CPL3(cs);
@@ -197,12 +196,12 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 
 		get_user_ex(tmp, &sc->fpstate);
 		buf = compat_ptr(tmp);
-
-		get_user_ex(*pax, &sc->ax);
 	} get_user_catch(err);
 
 	err |= restore_xstate_sig(buf, 1);
 
+	force_iret();
+
 	return err;
 }
 
@@ -211,7 +210,6 @@ asmlinkage long sys32_sigreturn(void)
 	struct pt_regs *regs = current_pt_regs();
 	struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
 	sigset_t set;
-	unsigned int ax;
 
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
@@ -224,9 +222,9 @@ asmlinkage long sys32_sigreturn(void)
 
 	set_current_blocked(&set);
 
-	if (ia32_restore_sigcontext(regs, &frame->sc, &ax))
+	if (ia32_restore_sigcontext(regs, &frame->sc))
 		goto badframe;
-	return ax;
+	return regs->ax;
 
 badframe:
 	signal_fault(regs, frame, "32bit sigreturn");
@@ -238,7 +236,6 @@ asmlinkage long sys32_rt_sigreturn(void)
 	struct pt_regs *regs = current_pt_regs();
 	struct rt_sigframe_ia32 __user *frame;
 	sigset_t set;
-	unsigned int ax;
 
 	frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4);
 
@@ -249,13 +246,13 @@ asmlinkage long sys32_rt_sigreturn(void)
 
 	set_current_blocked(&set);
 
-	if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
+	if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 
 	if (compat_restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
 
-	return ax;
+	return regs->ax;
 
 badframe:
 	signal_fault(regs, frame, "32bit rt sigreturn");
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 156ebcab4ada..a821b1cd4fa7 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -30,24 +30,13 @@
 
 	.section .entry.text, "ax"
 
-	.macro IA32_ARG_FIXUP noebp=0
-	movl	%edi,%r8d
-	.if \noebp
-	.else
-	movl	%ebp,%r9d
-	.endif
-	xchg	%ecx,%esi
-	movl	%ebx,%edi
-	movl	%edx,%edx	/* zero extension */
-	.endm 
-
-	/* clobbers %eax */	
-	.macro  CLEAR_RREGS offset=0, _r9=rax
+	/* clobbers %rax */
+	.macro  CLEAR_RREGS _r9=rax
 	xorl 	%eax,%eax
-	movq	%rax,\offset+R11(%rsp)
-	movq	%rax,\offset+R10(%rsp)
-	movq	%\_r9,\offset+R9(%rsp)
-	movq	%rax,\offset+R8(%rsp)
+	movq	%rax,R11(%rsp)
+	movq	%rax,R10(%rsp)
+	movq	%\_r9,R9(%rsp)
+	movq	%rax,R8(%rsp)
 	.endm
 
 	/*
@@ -60,14 +49,14 @@
 	 * If it's -1 to make us punt the syscall, then (u32)-1 is still
 	 * an appropriately invalid value.
 	 */
-	.macro LOAD_ARGS32 offset, _r9=0
+	.macro LOAD_ARGS32 _r9=0
 	.if \_r9
-	movl \offset+16(%rsp),%r9d
+	movl R9(%rsp),%r9d
 	.endif
-	movl \offset+40(%rsp),%ecx
-	movl \offset+48(%rsp),%edx
-	movl \offset+56(%rsp),%esi
-	movl \offset+64(%rsp),%edi
+	movl RCX(%rsp),%ecx
+	movl RDX(%rsp),%edx
+	movl RSI(%rsp),%esi
+	movl RDI(%rsp),%edi
 	movl %eax,%eax			/* zero extension */
 	.endm
 	
@@ -99,54 +88,69 @@ ENDPROC(native_irq_enable_sysexit)
 /*
  * 32bit SYSENTER instruction entry.
  *
+ * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
+ * IF and VM in rflags are cleared (IOW: interrupts are off).
+ * SYSENTER does not save anything on the stack,
+ * and does not save old rip (!!!) and rflags.
+ *
  * Arguments:
- * %eax	System call number.
- * %ebx Arg1
- * %ecx Arg2
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp user stack
- * 0(%ebp) Arg6	
- * 	
- * Interrupts off.
- *	
+ * eax  system call number
+ * ebx  arg1
+ * ecx  arg2
+ * edx  arg3
+ * esi  arg4
+ * edi  arg5
+ * ebp  user stack
+ * 0(%ebp) arg6
+ *
  * This is purely a fast path. For anything complicated we use the int 0x80
- * path below.	Set up a complete hardware stack frame to share code
+ * path below. We set up a complete hardware stack frame to share code
  * with the int 0x80 path.
- */ 	
+ */
 ENTRY(ia32_sysenter_target)
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
 	CFI_DEF_CFA	rsp,0
 	CFI_REGISTER	rsp,rbp
-	SWAPGS_UNSAFE_STACK
-	movq	PER_CPU_VAR(kernel_stack), %rsp
-	addq	$(KERNEL_STACK_OFFSET),%rsp
+
 	/*
-	 * No need to follow this irqs on/off section: the syscall
-	 * disabled irqs, here we enable it straight after entry:
+	 * Interrupts are off on entry.
+	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+	 * it is too small to ever cause noticeable irq latency.
 	 */
+	SWAPGS_UNSAFE_STACK
+	movq	PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
 	ENABLE_INTERRUPTS(CLBR_NONE)
- 	movl	%ebp,%ebp		/* zero extension */
-	pushq_cfi $__USER32_DS
-	/*CFI_REL_OFFSET ss,0*/
-	pushq_cfi %rbp
-	CFI_REL_OFFSET rsp,0
-	pushfq_cfi
-	/*CFI_REL_OFFSET rflags,0*/
-	movl	TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d
-	CFI_REGISTER rip,r10
-	pushq_cfi $__USER32_CS
-	/*CFI_REL_OFFSET cs,0*/
+
+	/* Zero-extending 32-bit regs, do not remove */
+	movl	%ebp, %ebp
 	movl	%eax, %eax
-	pushq_cfi %r10
-	CFI_REL_OFFSET rip,0
-	pushq_cfi %rax
+
+	movl	ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d
+	CFI_REGISTER rip,r10
+
+	/* Construct struct pt_regs on stack */
+	pushq_cfi	$__USER32_DS		/* pt_regs->ss */
+	pushq_cfi	%rbp			/* pt_regs->sp */
+	CFI_REL_OFFSET	rsp,0
+	pushfq_cfi				/* pt_regs->flags */
+	pushq_cfi	$__USER32_CS		/* pt_regs->cs */
+	pushq_cfi	%r10 /* pt_regs->ip = thread_info->sysenter_return */
+	CFI_REL_OFFSET	rip,0
+	pushq_cfi_reg	rax			/* pt_regs->orig_ax */
+	pushq_cfi_reg	rdi			/* pt_regs->di */
+	pushq_cfi_reg	rsi			/* pt_regs->si */
+	pushq_cfi_reg	rdx			/* pt_regs->dx */
+	pushq_cfi_reg	rcx			/* pt_regs->cx */
+	pushq_cfi_reg	rax			/* pt_regs->ax */
 	cld
-	SAVE_ARGS 0,1,0
- 	/* no need to do an access_ok check here because rbp has been
- 	   32bit zero extended */ 
+	sub	$(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
+	CFI_ADJUST_CFA_OFFSET 10*8
+
+	/*
+	 * no need to do an access_ok check here because rbp has been
+	 * 32bit zero extended
+	 */
 	ASM_STAC
 1:	movl	(%rbp),%ebp
 	_ASM_EXTABLE(1b,ia32_badarg)
@@ -157,42 +161,80 @@ ENTRY(ia32_sysenter_target)
 	 * ourselves.  To save a few cycles, we can check whether
 	 * NT was set instead of doing an unconditional popfq.
 	 */
-	testl $X86_EFLAGS_NT,EFLAGS-ARGOFFSET(%rsp)
+	testl $X86_EFLAGS_NT,EFLAGS(%rsp)
 	jnz sysenter_fix_flags
 sysenter_flags_fixed:
 
-	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	orl     $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+	testl   $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	CFI_REMEMBER_STATE
 	jnz  sysenter_tracesys
 	cmpq	$(IA32_NR_syscalls-1),%rax
 	ja	ia32_badsys
 sysenter_do_call:
-	IA32_ARG_FIXUP
+	/* 32bit syscall -> 64bit C ABI argument conversion */
+	movl	%edi,%r8d	/* arg5 */
+	movl	%ebp,%r9d	/* arg6 */
+	xchg	%ecx,%esi	/* rsi:arg2, rcx:arg4 */
+	movl	%ebx,%edi	/* arg1 */
+	movl	%edx,%edx	/* arg3 (zero extension) */
 sysenter_dispatch:
 	call	*ia32_sys_call_table(,%rax,8)
-	movq	%rax,RAX-ARGOFFSET(%rsp)
+	movq	%rax,RAX(%rsp)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl	$_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl	$_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	jnz	sysexit_audit
 sysexit_from_sys_call:
-	andl    $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	/* clear IF, that popfq doesn't enable interrupts early */
-	andl	$~0x200,EFLAGS-ARGOFFSET(%rsp)
-	movl	RIP-ARGOFFSET(%rsp),%edx		/* User %eip */
-	CFI_REGISTER rip,rdx
-	RESTORE_ARGS 0,24,0,0,0,0
+	/*
+	 * NB: SYSEXIT is not obviously safe for 64-bit kernels -- an
+	 * NMI between STI and SYSEXIT has poorly specified behavior,
+	 * and and NMI followed by an IRQ with usergs is fatal.  So
+	 * we just pretend we're using SYSEXIT but we really use
+	 * SYSRETL instead.
+	 *
+	 * This code path is still called 'sysexit' because it pairs
+	 * with 'sysenter' and it uses the SYSENTER calling convention.
+	 */
+	andl    $~TS_COMPAT,ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+	movl	RIP(%rsp),%ecx		/* User %eip */
+	CFI_REGISTER rip,rcx
+	RESTORE_RSI_RDI
+	xorl	%edx,%edx		/* avoid info leaks */
 	xorq	%r8,%r8
 	xorq	%r9,%r9
 	xorq	%r10,%r10
-	xorq	%r11,%r11
-	popfq_cfi
+	movl	EFLAGS(%rsp),%r11d	/* User eflags */
 	/*CFI_RESTORE rflags*/
-	popq_cfi %rcx				/* User %esp */
-	CFI_REGISTER rsp,rcx
 	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS_SYSEXIT32
+
+	/*
+	 * SYSRETL works even on Intel CPUs.  Use it in preference to SYSEXIT,
+	 * since it avoids a dicey window with interrupts enabled.
+	 */
+	movl	RSP(%rsp),%esp
+
+	/*
+	 * USERGS_SYSRET32 does:
+	 *  gsbase = user's gs base
+	 *  eip = ecx
+	 *  rflags = r11
+	 *  cs = __USER32_CS
+	 *  ss = __USER_DS
+	 *
+	 * The prologue set RIP(%rsp) to VDSO32_SYSENTER_RETURN, which does:
+	 *
+	 *  pop %ebp
+	 *  pop %edx
+	 *  pop %ecx
+	 *
+	 * Therefore, we invoke SYSRETL with EDX and R8-R10 zeroed to
+	 * avoid info leaks.  R11 ends up with VDSO32_SYSENTER_RETURN's
+	 * address (already known to user code), and R12-R15 are
+	 * callee-saved and therefore don't contain any interesting
+	 * kernel data.
+	 */
+	USERGS_SYSRET32
 
 	CFI_RESTORE_STATE
 
@@ -205,18 +247,18 @@ sysexit_from_sys_call:
 	movl %ebx,%esi			/* 2nd arg: 1st syscall arg */
 	movl %eax,%edi			/* 1st arg: syscall number */
 	call __audit_syscall_entry
-	movl RAX-ARGOFFSET(%rsp),%eax	/* reload syscall number */
+	movl RAX(%rsp),%eax	/* reload syscall number */
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja ia32_badsys
 	movl %ebx,%edi			/* reload 1st syscall arg */
-	movl RCX-ARGOFFSET(%rsp),%esi	/* reload 2nd syscall arg */
-	movl RDX-ARGOFFSET(%rsp),%edx	/* reload 3rd syscall arg */
-	movl RSI-ARGOFFSET(%rsp),%ecx	/* reload 4th syscall arg */
-	movl RDI-ARGOFFSET(%rsp),%r8d	/* reload 5th syscall arg */
+	movl RCX(%rsp),%esi	/* reload 2nd syscall arg */
+	movl RDX(%rsp),%edx	/* reload 3rd syscall arg */
+	movl RSI(%rsp),%ecx	/* reload 4th syscall arg */
+	movl RDI(%rsp),%r8d	/* reload 5th syscall arg */
 	.endm
 
 	.macro auditsys_exit exit
-	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	jnz ia32_ret_from_sys_call
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
@@ -227,13 +269,13 @@ sysexit_from_sys_call:
 1:	setbe %al		/* 1 if error, 0 if not */
 	movzbl %al,%edi		/* zero-extend that into %edi */
 	call __audit_syscall_exit
-	movq RAX-ARGOFFSET(%rsp),%rax	/* reload syscall return value */
+	movq RAX(%rsp),%rax	/* reload syscall return value */
 	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	jz \exit
-	CLEAR_RREGS -ARGOFFSET
+	CLEAR_RREGS
 	jmp int_with_check
 	.endm
 
@@ -253,16 +295,16 @@ sysenter_fix_flags:
 
 sysenter_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	jz	sysenter_auditsys
 #endif
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	CLEAR_RREGS
 	movq	$-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
 	movq	%rsp,%rdi        /* &pt_regs -> arg1 */
 	call	syscall_trace_enter
-	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
-	RESTORE_REST
+	LOAD_ARGS32  /* reload args from stack in case ptrace changed it */
+	RESTORE_EXTRA_REGS
 	cmpq	$(IA32_NR_syscalls-1),%rax
 	ja	int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
 	jmp	sysenter_do_call
@@ -272,94 +314,128 @@ ENDPROC(ia32_sysenter_target)
 /*
  * 32bit SYSCALL instruction entry.
  *
+ * 32bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
+ * then loads new ss, cs, and rip from previously programmed MSRs.
+ * rflags gets masked by a value from another MSR (so CLD and CLAC
+ * are not needed). SYSCALL does not save anything on the stack
+ * and does not change rsp.
+ *
+ * Note: rflags saving+masking-with-MSR happens only in Long mode
+ * (in legacy 32bit mode, IF, RF and VM bits are cleared and that's it).
+ * Don't get confused: rflags saving+masking depends on Long Mode Active bit
+ * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
+ * or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
+ *
  * Arguments:
- * %eax	System call number.
- * %ebx Arg1
- * %ecx return EIP 
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp Arg2    [note: not saved in the stack frame, should not be touched]
- * %esp user stack 
- * 0(%esp) Arg6
- * 	
- * Interrupts off.
- *	
+ * eax  system call number
+ * ecx  return address
+ * ebx  arg1
+ * ebp  arg2	(note: not saved in the stack frame, should not be touched)
+ * edx  arg3
+ * esi  arg4
+ * edi  arg5
+ * esp  user stack
+ * 0(%esp) arg6
+ *
  * This is purely a fast path. For anything complicated we use the int 0x80
- * path below.	Set up a complete hardware stack frame to share code
- * with the int 0x80 path.	
- */ 	
+ * path below. We set up a complete hardware stack frame to share code
+ * with the int 0x80 path.
+ */
 ENTRY(ia32_cstar_target)
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,KERNEL_STACK_OFFSET
+	CFI_DEF_CFA	rsp,0
 	CFI_REGISTER	rip,rcx
 	/*CFI_REGISTER	rflags,r11*/
+
+	/*
+	 * Interrupts are off on entry.
+	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+	 * it is too small to ever cause noticeable irq latency.
+	 */
 	SWAPGS_UNSAFE_STACK
 	movl	%esp,%r8d
 	CFI_REGISTER	rsp,r8
 	movq	PER_CPU_VAR(kernel_stack),%rsp
-	/*
-	 * No need to follow this irqs on/off section: the syscall
-	 * disabled irqs and here we enable it straight after entry:
-	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	SAVE_ARGS 8,0,0
-	movl 	%eax,%eax	/* zero extension */
-	movq	%rax,ORIG_RAX-ARGOFFSET(%rsp)
-	movq	%rcx,RIP-ARGOFFSET(%rsp)
-	CFI_REL_OFFSET rip,RIP-ARGOFFSET
-	movq	%rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */
+
+	/* Zero-extending 32-bit regs, do not remove */
+	movl	%eax,%eax
+
+	/* Construct struct pt_regs on stack */
+	pushq_cfi	$__USER32_DS		/* pt_regs->ss */
+	pushq_cfi	%r8			/* pt_regs->sp */
+	CFI_REL_OFFSET rsp,0
+	pushq_cfi	%r11			/* pt_regs->flags */
+	pushq_cfi	$__USER32_CS		/* pt_regs->cs */
+	pushq_cfi	%rcx			/* pt_regs->ip */
+	CFI_REL_OFFSET rip,0
+	pushq_cfi_reg	rax			/* pt_regs->orig_ax */
+	pushq_cfi_reg	rdi			/* pt_regs->di */
+	pushq_cfi_reg	rsi			/* pt_regs->si */
+	pushq_cfi_reg	rdx			/* pt_regs->dx */
+	pushq_cfi_reg	rbp			/* pt_regs->cx */
 	movl	%ebp,%ecx
-	movq	$__USER32_CS,CS-ARGOFFSET(%rsp)
-	movq	$__USER32_DS,SS-ARGOFFSET(%rsp)
-	movq	%r11,EFLAGS-ARGOFFSET(%rsp)
-	/*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
-	movq	%r8,RSP-ARGOFFSET(%rsp)	
-	CFI_REL_OFFSET rsp,RSP-ARGOFFSET
-	/* no need to do an access_ok check here because r8 has been
-	   32bit zero extended */ 
-	/* hardware stack frame is complete now */	
+	pushq_cfi_reg	rax			/* pt_regs->ax */
+	sub	$(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
+	CFI_ADJUST_CFA_OFFSET 10*8
+
+	/*
+	 * no need to do an access_ok check here because r8 has been
+	 * 32bit zero extended
+	 */
 	ASM_STAC
 1:	movl	(%r8),%r9d
 	_ASM_EXTABLE(1b,ia32_badarg)
 	ASM_CLAC
-	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	orl     $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+	testl   $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	CFI_REMEMBER_STATE
 	jnz   cstar_tracesys
 	cmpq $IA32_NR_syscalls-1,%rax
 	ja  ia32_badsys
 cstar_do_call:
-	IA32_ARG_FIXUP 1
+	/* 32bit syscall -> 64bit C ABI argument conversion */
+	movl	%edi,%r8d	/* arg5 */
+	/* r9 already loaded */	/* arg6 */
+	xchg	%ecx,%esi	/* rsi:arg2, rcx:arg4 */
+	movl	%ebx,%edi	/* arg1 */
+	movl	%edx,%edx	/* arg3 (zero extension) */
 cstar_dispatch:
 	call *ia32_sys_call_table(,%rax,8)
-	movq %rax,RAX-ARGOFFSET(%rsp)
+	movq %rax,RAX(%rsp)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	jnz sysretl_audit
 sysretl_from_sys_call:
-	andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	RESTORE_ARGS 0,-ARG_SKIP,0,0,0
-	movl RIP-ARGOFFSET(%rsp),%ecx
+	andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+	RESTORE_RSI_RDI_RDX
+	movl RIP(%rsp),%ecx
 	CFI_REGISTER rip,rcx
-	movl EFLAGS-ARGOFFSET(%rsp),%r11d	
+	movl EFLAGS(%rsp),%r11d
 	/*CFI_REGISTER rflags,r11*/
 	xorq	%r10,%r10
 	xorq	%r9,%r9
 	xorq	%r8,%r8
 	TRACE_IRQS_ON
-	movl RSP-ARGOFFSET(%rsp),%esp
+	movl RSP(%rsp),%esp
 	CFI_RESTORE rsp
+	/*
+	 * 64bit->32bit SYSRET restores eip from ecx,
+	 * eflags from r11 (but RF and VM bits are forced to 0),
+	 * cs and ss are loaded from MSRs.
+	 * (Note: 32bit->32bit SYSRET is different: since r11
+	 * does not exist, it merely sets eflags.IF=1).
+	 */
 	USERGS_SYSRET32
-	
+
 #ifdef CONFIG_AUDITSYSCALL
 cstar_auditsys:
 	CFI_RESTORE_STATE
-	movl %r9d,R9-ARGOFFSET(%rsp)	/* register to be clobbered by call */
+	movl %r9d,R9(%rsp)	/* register to be clobbered by call */
 	auditsys_entry_common
-	movl R9-ARGOFFSET(%rsp),%r9d	/* reload 6th syscall arg */
+	movl R9(%rsp),%r9d	/* reload 6th syscall arg */
 	jmp cstar_dispatch
 
 sysretl_audit:
@@ -368,17 +444,17 @@ sysretl_audit:
 
 cstar_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	jz cstar_auditsys
 #endif
 	xchgl %r9d,%ebp
-	SAVE_REST
-	CLEAR_RREGS 0, r9
+	SAVE_EXTRA_REGS
+	CLEAR_RREGS r9
 	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
-	LOAD_ARGS32 ARGOFFSET, 1  /* reload args from stack in case ptrace changed it */
-	RESTORE_REST
+	LOAD_ARGS32 1	/* reload args from stack in case ptrace changed it */
+	RESTORE_EXTRA_REGS
 	xchgl %ebp,%r9d
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
@@ -391,78 +467,94 @@ ia32_badarg:
 	jmp ia32_sysret
 	CFI_ENDPROC
 
-/* 
- * Emulated IA32 system calls via int 0x80. 
+/*
+ * Emulated IA32 system calls via int 0x80.
  *
- * Arguments:	 
- * %eax	System call number.
- * %ebx Arg1
- * %ecx Arg2
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp Arg6    [note: not saved in the stack frame, should not be touched]
+ * Arguments:
+ * eax  system call number
+ * ebx  arg1
+ * ecx  arg2
+ * edx  arg3
+ * esi  arg4
+ * edi  arg5
+ * ebp  arg6	(note: not saved in the stack frame, should not be touched)
  *
  * Notes:
- * Uses the same stack frame as the x86-64 version.	
- * All registers except %eax must be saved (but ptrace may violate that)
+ * Uses the same stack frame as the x86-64 version.
+ * All registers except eax must be saved (but ptrace may violate that).
  * Arguments are zero extended. For system calls that want sign extension and
  * take long arguments a wrapper is needed. Most calls can just be called
  * directly.
- * Assumes it is only called from user space and entered with interrupts off.	
- */ 				
+ * Assumes it is only called from user space and entered with interrupts off.
+ */
 
 ENTRY(ia32_syscall)
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,SS+8-RIP
-	/*CFI_REL_OFFSET	ss,SS-RIP*/
-	CFI_REL_OFFSET	rsp,RSP-RIP
-	/*CFI_REL_OFFSET	rflags,EFLAGS-RIP*/
-	/*CFI_REL_OFFSET	cs,CS-RIP*/
-	CFI_REL_OFFSET	rip,RIP-RIP
-	PARAVIRT_ADJUST_EXCEPTION_FRAME
-	SWAPGS
+	CFI_DEF_CFA	rsp,5*8
+	/*CFI_REL_OFFSET	ss,4*8 */
+	CFI_REL_OFFSET	rsp,3*8
+	/*CFI_REL_OFFSET	rflags,2*8 */
+	/*CFI_REL_OFFSET	cs,1*8 */
+	CFI_REL_OFFSET	rip,0*8
+
 	/*
-	 * No need to follow this irqs on/off section: the syscall
-	 * disabled irqs and here we enable it straight after entry:
+	 * Interrupts are off on entry.
+	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+	 * it is too small to ever cause noticeable irq latency.
 	 */
+	PARAVIRT_ADJUST_EXCEPTION_FRAME
+	SWAPGS
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	movl %eax,%eax
-	pushq_cfi %rax
+
+	/* Zero-extending 32-bit regs, do not remove */
+	movl	%eax,%eax
+
+	/* Construct struct pt_regs on stack (iret frame is already on stack) */
+	pushq_cfi_reg	rax			/* pt_regs->orig_ax */
+	pushq_cfi_reg	rdi			/* pt_regs->di */
+	pushq_cfi_reg	rsi			/* pt_regs->si */
+	pushq_cfi_reg	rdx			/* pt_regs->dx */
+	pushq_cfi_reg	rcx			/* pt_regs->cx */
+	pushq_cfi_reg	rax			/* pt_regs->ax */
 	cld
-	/* note the registers are not zero extended to the sf.
-	   this could be a problem. */
-	SAVE_ARGS 0,1,0
-	orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	sub	$(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
+	CFI_ADJUST_CFA_OFFSET 10*8
+
+	orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+	testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	jnz ia32_tracesys
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja ia32_badsys
 ia32_do_call:
-	IA32_ARG_FIXUP
+	/* 32bit syscall -> 64bit C ABI argument conversion */
+	movl %edi,%r8d	/* arg5 */
+	movl %ebp,%r9d	/* arg6 */
+	xchg %ecx,%esi	/* rsi:arg2, rcx:arg4 */
+	movl %ebx,%edi	/* arg1 */
+	movl %edx,%edx	/* arg3 (zero extension) */
 	call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
 ia32_sysret:
-	movq %rax,RAX-ARGOFFSET(%rsp)
+	movq %rax,RAX(%rsp)
 ia32_ret_from_sys_call:
-	CLEAR_RREGS -ARGOFFSET
-	jmp int_ret_from_sys_call 
+	CLEAR_RREGS
+	jmp int_ret_from_sys_call
 
-ia32_tracesys:			 
-	SAVE_REST
+ia32_tracesys:
+	SAVE_EXTRA_REGS
 	CLEAR_RREGS
 	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
-	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
-	RESTORE_REST
+	LOAD_ARGS32	/* reload args from stack in case ptrace changed it */
+	RESTORE_EXTRA_REGS
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja  int_ret_from_sys_call	/* ia32_tracesys has set RAX(%rsp) */
 	jmp ia32_do_call
 END(ia32_syscall)
 
 ia32_badsys:
-	movq $0,ORIG_RAX-ARGOFFSET(%rsp)
+	movq $0,ORIG_RAX(%rsp)
 	movq $-ENOSYS,%rax
 	jmp ia32_sysret
 
@@ -479,8 +571,6 @@ GLOBAL(\label)
 
 	PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn
 	PTREGSCALL stub32_sigreturn, sys32_sigreturn
-	PTREGSCALL stub32_execve, compat_sys_execve
-	PTREGSCALL stub32_execveat, compat_sys_execveat
 	PTREGSCALL stub32_fork, sys_fork
 	PTREGSCALL stub32_vfork, sys_vfork
 
@@ -492,24 +582,23 @@ GLOBAL(stub32_clone)
 
 	ALIGN
 ia32_ptregs_common:
-	popq %r11
 	CFI_ENDPROC
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,SS+8-ARGOFFSET
-	CFI_REL_OFFSET	rax,RAX-ARGOFFSET
-	CFI_REL_OFFSET	rcx,RCX-ARGOFFSET
-	CFI_REL_OFFSET	rdx,RDX-ARGOFFSET
-	CFI_REL_OFFSET	rsi,RSI-ARGOFFSET
-	CFI_REL_OFFSET	rdi,RDI-ARGOFFSET
-	CFI_REL_OFFSET	rip,RIP-ARGOFFSET
-/*	CFI_REL_OFFSET	cs,CS-ARGOFFSET*/
-/*	CFI_REL_OFFSET	rflags,EFLAGS-ARGOFFSET*/
-	CFI_REL_OFFSET	rsp,RSP-ARGOFFSET
-/*	CFI_REL_OFFSET	ss,SS-ARGOFFSET*/
-	SAVE_REST
+	CFI_DEF_CFA	rsp,SIZEOF_PTREGS
+	CFI_REL_OFFSET	rax,RAX
+	CFI_REL_OFFSET	rcx,RCX
+	CFI_REL_OFFSET	rdx,RDX
+	CFI_REL_OFFSET	rsi,RSI
+	CFI_REL_OFFSET	rdi,RDI
+	CFI_REL_OFFSET	rip,RIP
+/*	CFI_REL_OFFSET	cs,CS*/
+/*	CFI_REL_OFFSET	rflags,EFLAGS*/
+	CFI_REL_OFFSET	rsp,RSP
+/*	CFI_REL_OFFSET	ss,SS*/
+	SAVE_EXTRA_REGS 8
 	call *%rax
-	RESTORE_REST
-	jmp  ia32_sysret	/* misbalances the return cache */
+	RESTORE_EXTRA_REGS 8
+	ret
 	CFI_ENDPROC
 END(ia32_ptregs_common)
diff --git a/arch/x86/ia32/nosyscall.c b/arch/x86/ia32/nosyscall.c
deleted file mode 100644
index 51ecd5b4e787..000000000000
--- a/arch/x86/ia32/nosyscall.c
+++ /dev/null
@@ -1,7 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/errno.h>
-
-long compat_ni_syscall(void)
-{
-	return -ENOSYS;
-}
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 8e0ceecdc957..719cd702b0a4 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -201,20 +201,6 @@ long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
 				advice);
 }
 
-long sys32_vm86_warning(void)
-{
-	struct task_struct *me = current;
-	static char lastcomm[sizeof(me->comm)];
-
-	if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
-		compat_printk(KERN_INFO
-			      "%s: vm86 mode not supported on 64 bit kernel\n",
-			      me->comm);
-		strncpy(lastcomm, me->comm, sizeof(lastcomm));
-	}
-	return -ENOSYS;
-}
-
 asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi,
 				   size_t count)
 {
diff --git a/arch/x86/ia32/syscall_ia32.c b/arch/x86/ia32/syscall_ia32.c
deleted file mode 100644
index 4754ba0f5d9f..000000000000
--- a/arch/x86/ia32/syscall_ia32.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/* System call table for ia32 emulation. */
-
-#include <linux/linkage.h>
-#include <linux/sys.h>
-#include <linux/cache.h>
-#include <asm/asm-offsets.h>
-
-#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void compat(void) ;
-#include <asm/syscalls_32.h>
-#undef __SYSCALL_I386
-
-#define __SYSCALL_I386(nr, sym, compat) [nr] = compat,
-
-typedef void (*sys_call_ptr_t)(void);
-
-extern void compat_ni_syscall(void);
-
-const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
-	/*
-	 * Smells like a compiler bug -- it doesn't work
-	 * when the & below is removed.
-	 */
-	[0 ... __NR_ia32_syscall_max] = &compat_ni_syscall,
-#include <asm/syscalls_32.h>
-};
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 372231c22a47..bdf02eeee765 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -18,12 +18,63 @@
 	.endm
 #endif
 
-.macro altinstruction_entry orig alt feature orig_len alt_len
+.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
 	.long \orig - .
 	.long \alt - .
 	.word \feature
 	.byte \orig_len
 	.byte \alt_len
+	.byte \pad_len
+.endm
+
+.macro ALTERNATIVE oldinstr, newinstr, feature
+140:
+	\oldinstr
+141:
+	.skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
+142:
+
+	.pushsection .altinstructions,"a"
+	altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
+	.popsection
+
+	.pushsection .altinstr_replacement,"ax"
+143:
+	\newinstr
+144:
+	.popsection
+.endm
+
+#define old_len			141b-140b
+#define new_len1		144f-143f
+#define new_len2		145f-144f
+
+/*
+ * max without conditionals. Idea adapted from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ */
+#define alt_max_short(a, b)	((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
+
+.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
+140:
+	\oldinstr
+141:
+	.skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
+		(alt_max_short(new_len1, new_len2) - (old_len)),0x90
+142:
+
+	.pushsection .altinstructions,"a"
+	altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
+	altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
+	.popsection
+
+	.pushsection .altinstr_replacement,"ax"
+143:
+	\newinstr1
+144:
+	\newinstr2
+145:
+	.popsection
 .endm
 
 #endif  /*  __ASSEMBLY__  */
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 473bdbee378a..ba32af062f61 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -48,8 +48,9 @@ struct alt_instr {
 	s32 repl_offset;	/* offset to replacement instruction */
 	u16 cpuid;		/* cpuid bit set for replacement */
 	u8  instrlen;		/* length of original instruction */
-	u8  replacementlen;	/* length of new instruction, <= instrlen */
-};
+	u8  replacementlen;	/* length of new instruction */
+	u8  padlen;		/* length of build-time padding */
+} __packed;
 
 extern void alternative_instructions(void);
 extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
@@ -76,50 +77,69 @@ static inline int alternatives_text_reserved(void *start, void *end)
 }
 #endif	/* CONFIG_SMP */
 
-#define OLDINSTR(oldinstr)	"661:\n\t" oldinstr "\n662:\n"
+#define b_replacement(num)	"664"#num
+#define e_replacement(num)	"665"#num
 
-#define b_replacement(number)	"663"#number
-#define e_replacement(number)	"664"#number
+#define alt_end_marker		"663"
+#define alt_slen		"662b-661b"
+#define alt_pad_len		alt_end_marker"b-662b"
+#define alt_total_slen		alt_end_marker"b-661b"
+#define alt_rlen(num)		e_replacement(num)"f-"b_replacement(num)"f"
 
-#define alt_slen "662b-661b"
-#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f"
+#define __OLDINSTR(oldinstr, num)					\
+	"661:\n\t" oldinstr "\n662:\n"					\
+	".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * "		\
+		"((" alt_rlen(num) ")-(" alt_slen ")),0x90\n"
 
-#define ALTINSTR_ENTRY(feature, number)					      \
+#define OLDINSTR(oldinstr, num)						\
+	__OLDINSTR(oldinstr, num)					\
+	alt_end_marker ":\n"
+
+/*
+ * max without conditionals. Idea adapted from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ *
+ * The additional "-" is needed because gas works with s32s.
+ */
+#define alt_max_short(a, b)	"((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))"
+
+/*
+ * Pad the second replacement alternative with additional NOPs if it is
+ * additionally longer than the first replacement alternative.
+ */
+#define OLDINSTR_2(oldinstr, num1, num2) \
+	"661:\n\t" oldinstr "\n662:\n"								\
+	".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * "	\
+		"(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n"	\
+	alt_end_marker ":\n"
+
+#define ALTINSTR_ENTRY(feature, num)					      \
 	" .long 661b - .\n"				/* label           */ \
-	" .long " b_replacement(number)"f - .\n"	/* new instruction */ \
+	" .long " b_replacement(num)"f - .\n"		/* new instruction */ \
 	" .word " __stringify(feature) "\n"		/* feature bit     */ \
-	" .byte " alt_slen "\n"				/* source len      */ \
-	" .byte " alt_rlen(number) "\n"			/* replacement len */
-
-#define DISCARD_ENTRY(number)				/* rlen <= slen */    \
-	" .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n"
+	" .byte " alt_total_slen "\n"			/* source len      */ \
+	" .byte " alt_rlen(num) "\n"			/* replacement len */ \
+	" .byte " alt_pad_len "\n"			/* pad len */
 
-#define ALTINSTR_REPLACEMENT(newinstr, feature, number)	/* replacement */     \
-	b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t"
+#define ALTINSTR_REPLACEMENT(newinstr, feature, num)	/* replacement */     \
+	b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t"
 
 /* alternative assembly primitive: */
 #define ALTERNATIVE(oldinstr, newinstr, feature)			\
-	OLDINSTR(oldinstr)						\
+	OLDINSTR(oldinstr, 1)						\
 	".pushsection .altinstructions,\"a\"\n"				\
 	ALTINSTR_ENTRY(feature, 1)					\
 	".popsection\n"							\
-	".pushsection .discard,\"aw\",@progbits\n"			\
-	DISCARD_ENTRY(1)						\
-	".popsection\n"							\
 	".pushsection .altinstr_replacement, \"ax\"\n"			\
 	ALTINSTR_REPLACEMENT(newinstr, feature, 1)			\
 	".popsection"
 
 #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
-	OLDINSTR(oldinstr)						\
+	OLDINSTR_2(oldinstr, 1, 2)					\
 	".pushsection .altinstructions,\"a\"\n"				\
 	ALTINSTR_ENTRY(feature1, 1)					\
 	ALTINSTR_ENTRY(feature2, 2)					\
 	".popsection\n"							\
-	".pushsection .discard,\"aw\",@progbits\n"			\
-	DISCARD_ENTRY(1)						\
-	DISCARD_ENTRY(2)						\
-	".popsection\n"							\
 	".pushsection .altinstr_replacement, \"ax\"\n"			\
 	ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)			\
 	ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)			\
@@ -146,6 +166,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
 #define alternative(oldinstr, newinstr, feature)			\
 	asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
 
+#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
+	asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
+
 /*
  * Alternative inline assembly with input.
  *
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index efc3b22d896e..976b86a325e5 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -91,7 +91,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
 {
 	volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
 
-	alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP,
+	alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP,
 		       ASM_OUTPUT2("=r" (v), "=m" (*addr)),
 		       ASM_OUTPUT2("0" (v), "m" (*addr)));
 }
@@ -204,7 +204,6 @@ extern void clear_local_APIC(void);
 extern void disconnect_bsp_APIC(int virt_wire_setup);
 extern void disable_local_APIC(void);
 extern void lapic_shutdown(void);
-extern int verify_local_APIC(void);
 extern void sync_Arb_IDs(void);
 extern void init_bsp_APIC(void);
 extern void setup_local_APIC(void);
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 2ab1eb33106e..959e45b81fe2 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -95,13 +95,11 @@ do {									\
  * Stop RDTSC speculation. This is needed when you need to use RDTSC
  * (or get_cycles or vread that possibly accesses the TSC) in a defined
  * code region.
- *
- * (Could use an alternative three way for this if there was one.)
  */
 static __always_inline void rdtsc_barrier(void)
 {
-	alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
-	alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
+	alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
+			  "lfence", X86_FEATURE_LFENCE_RDTSC);
 }
 
 #endif /* _ASM_X86_BARRIER_H */
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 1f1297b46f83..1c8b50edb2db 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -55,143 +55,157 @@ For 32-bit we have the following conventions - kernel is built with
  * for assembly code:
  */
 
-#define R15		  0
-#define R14		  8
-#define R13		 16
-#define R12		 24
-#define RBP		 32
-#define RBX		 40
-
-/* arguments: interrupts/non tracing syscalls only save up to here: */
-#define R11		 48
-#define R10		 56
-#define R9		 64
-#define R8		 72
-#define RAX		 80
-#define RCX		 88
-#define RDX		 96
-#define RSI		104
-#define RDI		112
-#define ORIG_RAX	120       /* + error_code */
-/* end of arguments */
-
-/* cpu exception frame or undefined in case of fast syscall: */
-#define RIP		128
-#define CS		136
-#define EFLAGS		144
-#define RSP		152
-#define SS		160
-
-#define ARGOFFSET	R11
-
-	.macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0
-	subq  $9*8+\addskip, %rsp
-	CFI_ADJUST_CFA_OFFSET	9*8+\addskip
-	movq_cfi rdi, 8*8
-	movq_cfi rsi, 7*8
-	movq_cfi rdx, 6*8
-
-	.if \save_rcx
-	movq_cfi rcx, 5*8
-	.endif
+/* The layout forms the "struct pt_regs" on the stack: */
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
+#define R15		0*8
+#define R14		1*8
+#define R13		2*8
+#define R12		3*8
+#define RBP		4*8
+#define RBX		5*8
+/* These regs are callee-clobbered. Always saved on kernel entry. */
+#define R11		6*8
+#define R10		7*8
+#define R9		8*8
+#define R8		9*8
+#define RAX		10*8
+#define RCX		11*8
+#define RDX		12*8
+#define RSI		13*8
+#define RDI		14*8
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
+#define ORIG_RAX	15*8
+/* Return frame for iretq */
+#define RIP		16*8
+#define CS		17*8
+#define EFLAGS		18*8
+#define RSP		19*8
+#define SS		20*8
+
+#define SIZEOF_PTREGS	21*8
+
+	.macro ALLOC_PT_GPREGS_ON_STACK addskip=0
+	subq	$15*8+\addskip, %rsp
+	CFI_ADJUST_CFA_OFFSET 15*8+\addskip
+	.endm
 
-	.if \rax_enosys
-	movq $-ENOSYS, 4*8(%rsp)
-	.else
-	movq_cfi rax, 4*8
+	.macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
+	.if \r11
+	movq_cfi r11, 6*8+\offset
 	.endif
-
-	.if \save_r891011
-	movq_cfi r8,  3*8
-	movq_cfi r9,  2*8
-	movq_cfi r10, 1*8
-	movq_cfi r11, 0*8
+	.if \r8910
+	movq_cfi r10, 7*8+\offset
+	movq_cfi r9,  8*8+\offset
+	movq_cfi r8,  9*8+\offset
+	.endif
+	.if \rax
+	movq_cfi rax, 10*8+\offset
+	.endif
+	.if \rcx
+	movq_cfi rcx, 11*8+\offset
 	.endif
+	movq_cfi rdx, 12*8+\offset
+	movq_cfi rsi, 13*8+\offset
+	movq_cfi rdi, 14*8+\offset
+	.endm
+	.macro SAVE_C_REGS offset=0
+	SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
+	.endm
+	.macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
+	SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1
+	.endm
+	.macro SAVE_C_REGS_EXCEPT_R891011
+	SAVE_C_REGS_HELPER 0, 1, 1, 0, 0
+	.endm
+	.macro SAVE_C_REGS_EXCEPT_RCX_R891011
+	SAVE_C_REGS_HELPER 0, 1, 0, 0, 0
+	.endm
+	.macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11
+	SAVE_C_REGS_HELPER 0, 0, 0, 1, 0
+	.endm
+
+	.macro SAVE_EXTRA_REGS offset=0
+	movq_cfi r15, 0*8+\offset
+	movq_cfi r14, 1*8+\offset
+	movq_cfi r13, 2*8+\offset
+	movq_cfi r12, 3*8+\offset
+	movq_cfi rbp, 4*8+\offset
+	movq_cfi rbx, 5*8+\offset
+	.endm
+	.macro SAVE_EXTRA_REGS_RBP offset=0
+	movq_cfi rbp, 4*8+\offset
+	.endm
 
+	.macro RESTORE_EXTRA_REGS offset=0
+	movq_cfi_restore 0*8+\offset, r15
+	movq_cfi_restore 1*8+\offset, r14
+	movq_cfi_restore 2*8+\offset, r13
+	movq_cfi_restore 3*8+\offset, r12
+	movq_cfi_restore 4*8+\offset, rbp
+	movq_cfi_restore 5*8+\offset, rbx
 	.endm
 
-#define ARG_SKIP	(9*8)
+	.macro ZERO_EXTRA_REGS
+	xorl	%r15d, %r15d
+	xorl	%r14d, %r14d
+	xorl	%r13d, %r13d
+	xorl	%r12d, %r12d
+	xorl	%ebp, %ebp
+	xorl	%ebx, %ebx
+	.endm
 
-	.macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \
-			    rstor_r8910=1, rstor_rdx=1
+	.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
 	.if \rstor_r11
-	movq_cfi_restore 0*8, r11
+	movq_cfi_restore 6*8, r11
 	.endif
-
 	.if \rstor_r8910
-	movq_cfi_restore 1*8, r10
-	movq_cfi_restore 2*8, r9
-	movq_cfi_restore 3*8, r8
+	movq_cfi_restore 7*8, r10
+	movq_cfi_restore 8*8, r9
+	movq_cfi_restore 9*8, r8
 	.endif
-
 	.if \rstor_rax
-	movq_cfi_restore 4*8, rax
+	movq_cfi_restore 10*8, rax
 	.endif
-
 	.if \rstor_rcx
-	movq_cfi_restore 5*8, rcx
+	movq_cfi_restore 11*8, rcx
 	.endif
-
 	.if \rstor_rdx
-	movq_cfi_restore 6*8, rdx
-	.endif
-
-	movq_cfi_restore 7*8, rsi
-	movq_cfi_restore 8*8, rdi
-
-	.if ARG_SKIP+\addskip > 0
-	addq $ARG_SKIP+\addskip, %rsp
-	CFI_ADJUST_CFA_OFFSET	-(ARG_SKIP+\addskip)
+	movq_cfi_restore 12*8, rdx
 	.endif
+	movq_cfi_restore 13*8, rsi
+	movq_cfi_restore 14*8, rdi
 	.endm
-
-	.macro LOAD_ARGS offset, skiprax=0
-	movq \offset(%rsp),    %r11
-	movq \offset+8(%rsp),  %r10
-	movq \offset+16(%rsp), %r9
-	movq \offset+24(%rsp), %r8
-	movq \offset+40(%rsp), %rcx
-	movq \offset+48(%rsp), %rdx
-	movq \offset+56(%rsp), %rsi
-	movq \offset+64(%rsp), %rdi
-	.if \skiprax
-	.else
-	movq \offset+72(%rsp), %rax
-	.endif
+	.macro RESTORE_C_REGS
+	RESTORE_C_REGS_HELPER 1,1,1,1,1
 	.endm
-
-#define REST_SKIP	(6*8)
-
-	.macro SAVE_REST
-	subq $REST_SKIP, %rsp
-	CFI_ADJUST_CFA_OFFSET	REST_SKIP
-	movq_cfi rbx, 5*8
-	movq_cfi rbp, 4*8
-	movq_cfi r12, 3*8
-	movq_cfi r13, 2*8
-	movq_cfi r14, 1*8
-	movq_cfi r15, 0*8
+	.macro RESTORE_C_REGS_EXCEPT_RAX
+	RESTORE_C_REGS_HELPER 0,1,1,1,1
 	.endm
-
-	.macro RESTORE_REST
-	movq_cfi_restore 0*8, r15
-	movq_cfi_restore 1*8, r14
-	movq_cfi_restore 2*8, r13
-	movq_cfi_restore 3*8, r12
-	movq_cfi_restore 4*8, rbp
-	movq_cfi_restore 5*8, rbx
-	addq $REST_SKIP, %rsp
-	CFI_ADJUST_CFA_OFFSET	-(REST_SKIP)
+	.macro RESTORE_C_REGS_EXCEPT_RCX
+	RESTORE_C_REGS_HELPER 1,0,1,1,1
 	.endm
-
-	.macro SAVE_ALL
-	SAVE_ARGS
-	SAVE_REST
+	.macro RESTORE_C_REGS_EXCEPT_R11
+	RESTORE_C_REGS_HELPER 1,1,0,1,1
+	.endm
+	.macro RESTORE_C_REGS_EXCEPT_RCX_R11
+	RESTORE_C_REGS_HELPER 1,0,0,1,1
+	.endm
+	.macro RESTORE_RSI_RDI
+	RESTORE_C_REGS_HELPER 0,0,0,0,0
+	.endm
+	.macro RESTORE_RSI_RDI_RDX
+	RESTORE_C_REGS_HELPER 0,0,0,0,1
 	.endm
 
-	.macro RESTORE_ALL addskip=0
-	RESTORE_REST
-	RESTORE_ARGS 1, \addskip
+	.macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
+	addq $15*8+\addskip, %rsp
+	CFI_ADJUST_CFA_OFFSET -(15*8+\addskip)
 	.endm
 
 	.macro icebp
@@ -210,37 +224,23 @@ For 32-bit we have the following conventions - kernel is built with
  */
 
 	.macro SAVE_ALL
-	pushl_cfi %eax
-	CFI_REL_OFFSET eax, 0
-	pushl_cfi %ebp
-	CFI_REL_OFFSET ebp, 0
-	pushl_cfi %edi
-	CFI_REL_OFFSET edi, 0
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
-	pushl_cfi %edx
-	CFI_REL_OFFSET edx, 0
-	pushl_cfi %ecx
-	CFI_REL_OFFSET ecx, 0
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
+	pushl_cfi_reg eax
+	pushl_cfi_reg ebp
+	pushl_cfi_reg edi
+	pushl_cfi_reg esi
+	pushl_cfi_reg edx
+	pushl_cfi_reg ecx
+	pushl_cfi_reg ebx
 	.endm
 
 	.macro RESTORE_ALL
-	popl_cfi %ebx
-	CFI_RESTORE ebx
-	popl_cfi %ecx
-	CFI_RESTORE ecx
-	popl_cfi %edx
-	CFI_RESTORE edx
-	popl_cfi %esi
-	CFI_RESTORE esi
-	popl_cfi %edi
-	CFI_RESTORE edi
-	popl_cfi %ebp
-	CFI_RESTORE ebp
-	popl_cfi %eax
-	CFI_RESTORE eax
+	popl_cfi_reg ebx
+	popl_cfi_reg ecx
+	popl_cfi_reg edx
+	popl_cfi_reg esi
+	popl_cfi_reg edi
+	popl_cfi_reg ebp
+	popl_cfi_reg eax
 	.endm
 
 #endif /* CONFIG_X86_64 */
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 59c6c401f79f..acdee09228b3 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -301,7 +301,7 @@ static inline void __user *arch_compat_alloc_user_space(long len)
 		sp = task_pt_regs(current)->sp;
 	} else {
 		/* -128 for the x32 ABI redzone */
-		sp = this_cpu_read(old_rsp) - 128;
+		sp = task_pt_regs(current)->sp - 128;
 	}
 
 	return (void __user *)round_down(sp - len, 16);
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index d2b12988d2ed..bf2caa1dedc5 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -34,8 +34,6 @@ extern int _debug_hotplug_cpu(int cpu, int action);
 #endif
 #endif
 
-DECLARE_PER_CPU(int, cpu_state);
-
 int mwait_usable(const struct cpuinfo_x86 *);
 
 #endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 90a54851aedc..7ee9b94d9921 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -12,7 +12,7 @@
 #include <asm/disabled-features.h>
 #endif
 
-#define NCAPINTS	11	/* N 32-bit words worth of info */
+#define NCAPINTS	13	/* N 32-bit words worth of info */
 #define NBUGINTS	1	/* N 32-bit bug flags */
 
 /*
@@ -195,6 +195,7 @@
 #define X86_FEATURE_HWP_ACT_WINDOW ( 7*32+ 12) /* Intel HWP_ACT_WINDOW */
 #define X86_FEATURE_HWP_EPP	( 7*32+13) /* Intel HWP_EPP */
 #define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */
+#define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
@@ -226,12 +227,15 @@
 #define X86_FEATURE_ERMS	( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
 #define X86_FEATURE_INVPCID	( 9*32+10) /* Invalidate Processor Context ID */
 #define X86_FEATURE_RTM		( 9*32+11) /* Restricted Transactional Memory */
+#define X86_FEATURE_CQM		( 9*32+12) /* Cache QoS Monitoring */
 #define X86_FEATURE_MPX		( 9*32+14) /* Memory Protection Extension */
 #define X86_FEATURE_AVX512F	( 9*32+16) /* AVX-512 Foundation */
 #define X86_FEATURE_RDSEED	( 9*32+18) /* The RDSEED instruction */
 #define X86_FEATURE_ADX		( 9*32+19) /* The ADCX and ADOX instructions */
 #define X86_FEATURE_SMAP	( 9*32+20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_PCOMMIT	( 9*32+22) /* PCOMMIT instruction */
 #define X86_FEATURE_CLFLUSHOPT	( 9*32+23) /* CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB	( 9*32+24) /* CLWB instruction */
 #define X86_FEATURE_AVX512PF	( 9*32+26) /* AVX-512 Prefetch */
 #define X86_FEATURE_AVX512ER	( 9*32+27) /* AVX-512 Exponential and Reciprocal */
 #define X86_FEATURE_AVX512CD	( 9*32+28) /* AVX-512 Conflict Detection */
@@ -242,6 +246,12 @@
 #define X86_FEATURE_XGETBV1	(10*32+ 2) /* XGETBV with ECX = 1 */
 #define X86_FEATURE_XSAVES	(10*32+ 3) /* XSAVES/XRSTORS */
 
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
+#define X86_FEATURE_CQM_LLC	(11*32+ 1) /* LLC QoS if 1 */
+
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
+#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
+
 /*
  * BUG word(s)
  */
@@ -418,6 +428,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
 			 " .word %P0\n"		/* 1: do replace */
 			 " .byte 2b - 1b\n"	/* source len */
 			 " .byte 0\n"		/* replacement len */
+			 " .byte 0\n"		/* pad len */
 			 ".previous\n"
 			 /* skipping size check since replacement size = 0 */
 			 : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
@@ -432,6 +443,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
 			 " .word %P0\n"		/* feature bit */
 			 " .byte 2b - 1b\n"	/* source len */
 			 " .byte 0\n"		/* replacement len */
+			 " .byte 0\n"		/* pad len */
 			 ".previous\n"
 			 /* skipping size check since replacement size = 0 */
 			 : : "i" (bit) : : t_no);
@@ -457,6 +469,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
 			     " .word %P1\n"		/* feature bit */
 			     " .byte 2b - 1b\n"		/* source len */
 			     " .byte 4f - 3f\n"		/* replacement len */
+			     " .byte 0\n"		/* pad len */
 			     ".previous\n"
 			     ".section .discard,\"aw\",@progbits\n"
 			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
@@ -483,31 +496,30 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
 static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 {
 #ifdef CC_HAVE_ASM_GOTO
-/*
- * We need to spell the jumps to the compiler because, depending on the offset,
- * the replacement jump can be bigger than the original jump, and this we cannot
- * have. Thus, we force the jump to the widest, 4-byte, signed relative
- * offset even though the last would often fit in less bytes.
- */
-		asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
+		asm_volatile_goto("1: jmp %l[t_dynamic]\n"
 			 "2:\n"
+			 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+			         "((5f-4f) - (2b-1b)),0x90\n"
+			 "3:\n"
 			 ".section .altinstructions,\"a\"\n"
 			 " .long 1b - .\n"		/* src offset */
-			 " .long 3f - .\n"		/* repl offset */
+			 " .long 4f - .\n"		/* repl offset */
 			 " .word %P1\n"			/* always replace */
-			 " .byte 2b - 1b\n"		/* src len */
-			 " .byte 4f - 3f\n"		/* repl len */
+			 " .byte 3b - 1b\n"		/* src len */
+			 " .byte 5f - 4f\n"		/* repl len */
+			 " .byte 3b - 2b\n"		/* pad len */
 			 ".previous\n"
 			 ".section .altinstr_replacement,\"ax\"\n"
-			 "3: .byte 0xe9\n .long %l[t_no] - 2b\n"
-			 "4:\n"
+			 "4: jmp %l[t_no]\n"
+			 "5:\n"
 			 ".previous\n"
 			 ".section .altinstructions,\"a\"\n"
 			 " .long 1b - .\n"		/* src offset */
 			 " .long 0\n"			/* no replacement */
 			 " .word %P0\n"			/* feature bit */
-			 " .byte 2b - 1b\n"		/* src len */
+			 " .byte 3b - 1b\n"		/* src len */
 			 " .byte 0\n"			/* repl len */
+			 " .byte 0\n"			/* pad len */
 			 ".previous\n"
 			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
 			 : : t_dynamic, t_no);
@@ -527,6 +539,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 			     " .word %P2\n"		/* always replace */
 			     " .byte 2b - 1b\n"		/* source len */
 			     " .byte 4f - 3f\n"		/* replacement len */
+			     " .byte 0\n"		/* pad len */
 			     ".previous\n"
 			     ".section .discard,\"aw\",@progbits\n"
 			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
@@ -541,6 +554,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 			     " .word %P1\n"		/* feature bit */
 			     " .byte 4b - 3b\n"		/* src len */
 			     " .byte 6f - 5f\n"		/* repl len */
+			     " .byte 0\n"		/* pad len */
 			     ".previous\n"
 			     ".section .discard,\"aw\",@progbits\n"
 			     " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index a94b82e8f156..a0bf89fd2647 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -376,11 +376,16 @@ static inline void _set_gate(int gate, unsigned type, void *addr,
  * Pentium F0 0F bugfix can have resulted in the mapped
  * IDT being write-protected.
  */
-#define set_intr_gate(n, addr)						\
+#define set_intr_gate_notrace(n, addr)					\
 	do {								\
 		BUG_ON((unsigned)n > 0xFF);				\
 		_set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0,	\
 			  __KERNEL_CS);					\
+	} while (0)
+
+#define set_intr_gate(n, addr)						\
+	do {								\
+		set_intr_gate_notrace(n, addr);				\
 		_trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
 				0, 0, __KERNEL_CS);			\
 	} while (0)
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index f6f15986df6c..de1cdaf4d743 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -86,11 +86,23 @@
 	CFI_ADJUST_CFA_OFFSET 8
 	.endm
 
+	.macro pushq_cfi_reg reg
+	pushq %\reg
+	CFI_ADJUST_CFA_OFFSET 8
+	CFI_REL_OFFSET \reg, 0
+	.endm
+
 	.macro popq_cfi reg
 	popq \reg
 	CFI_ADJUST_CFA_OFFSET -8
 	.endm
 
+	.macro popq_cfi_reg reg
+	popq %\reg
+	CFI_ADJUST_CFA_OFFSET -8
+	CFI_RESTORE \reg
+	.endm
+
 	.macro pushfq_cfi
 	pushfq
 	CFI_ADJUST_CFA_OFFSET 8
@@ -116,11 +128,23 @@
 	CFI_ADJUST_CFA_OFFSET 4
 	.endm
 
+	.macro pushl_cfi_reg reg
+	pushl %\reg
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET \reg, 0
+	.endm
+
 	.macro popl_cfi reg
 	popl \reg
 	CFI_ADJUST_CFA_OFFSET -4
 	.endm
 
+	.macro popl_cfi_reg reg
+	popl %\reg
+	CFI_ADJUST_CFA_OFFSET -4
+	CFI_RESTORE \reg
+	.endm
+
 	.macro pushfl_cfi
 	pushfl
 	CFI_ADJUST_CFA_OFFSET 4
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 779c2efe2e97..3ab0537872fb 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -40,14 +40,6 @@ static inline void e820_mark_nosave_regions(unsigned long limit_pfn)
 }
 #endif
 
-#ifdef CONFIG_MEMTEST
-extern void early_memtest(unsigned long start, unsigned long end);
-#else
-static inline void early_memtest(unsigned long start, unsigned long end)
-{
-}
-#endif
-
 extern unsigned long e820_end_of_ram_pfn(void);
 extern unsigned long e820_end_of_low_ram_pfn(void);
 extern u64 early_reserve_e820(u64 sizet, u64 align);
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 25bce45c6fc4..3738b138b843 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -2,6 +2,8 @@
 #define _ASM_X86_EFI_H
 
 #include <asm/i387.h>
+#include <asm/pgtable.h>
+
 /*
  * We map the EFI regions needed for runtime services non-contiguously,
  * with preserved alignment on virtual addresses starting from -4G down
@@ -89,8 +91,8 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
 extern struct efi_scratch efi_scratch;
 extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
 extern int __init efi_memblock_x86_reserve_range(void);
-extern void __init efi_call_phys_prolog(void);
-extern void __init efi_call_phys_epilog(void);
+extern pgd_t * __init efi_call_phys_prolog(void);
+extern void __init efi_call_phys_epilog(pgd_t *save_pgd);
 extern void __init efi_unmap_memmap(void);
 extern void __init efi_memory_uc(u64 addr, unsigned long size);
 extern void __init efi_map_region(efi_memory_desc_t *md);
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index ca3347a9dab5..f161c189c27b 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -171,10 +171,11 @@ do {						\
 static inline void elf_common_init(struct thread_struct *t,
 				   struct pt_regs *regs, const u16 ds)
 {
-	regs->ax = regs->bx = regs->cx = regs->dx = 0;
-	regs->si = regs->di = regs->bp = 0;
+	/* Commented-out registers are cleared in stub_execve */
+	/*regs->ax = regs->bx =*/ regs->cx = regs->dx = 0;
+	regs->si = regs->di /*= regs->bp*/ = 0;
 	regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
-	regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
+	/*regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;*/
 	t->fs = t->gs = 0;
 	t->fsindex = t->gsindex = 0;
 	t->ds = t->es = ds;
@@ -338,9 +339,6 @@ extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
 					      int uses_interp);
 #define compat_arch_setup_additional_pages compat_arch_setup_additional_pages
 
-extern unsigned long arch_randomize_brk(struct mm_struct *mm);
-#define arch_randomize_brk arch_randomize_brk
-
 /*
  * True on X86_32 or when emulating IA32 on X86_64
  */
@@ -365,6 +363,7 @@ enum align_flags {
 struct va_alignment {
 	int flags;
 	unsigned long mask;
+	unsigned long bits;
 } ____cacheline_aligned;
 
 extern struct va_alignment va_align;
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 0dbc08282291..da5e96756570 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -67,6 +67,34 @@ extern void finit_soft_fpu(struct i387_soft_struct *soft);
 static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
 #endif
 
+/*
+ * Must be run with preemption disabled: this clears the fpu_owner_task,
+ * on this CPU.
+ *
+ * This will disable any lazy FPU state restore of the current FPU state,
+ * but if the current thread owns the FPU, it will still be saved by.
+ */
+static inline void __cpu_disable_lazy_restore(unsigned int cpu)
+{
+	per_cpu(fpu_owner_task, cpu) = NULL;
+}
+
+/*
+ * Used to indicate that the FPU state in memory is newer than the FPU
+ * state in registers, and the FPU state should be reloaded next time the
+ * task is run. Only safe on the current task, or non-running tasks.
+ */
+static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk)
+{
+	tsk->thread.fpu.last_cpu = ~0;
+}
+
+static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
+{
+	return new == this_cpu_read_stable(fpu_owner_task) &&
+		cpu == new->thread.fpu.last_cpu;
+}
+
 static inline int is_ia32_compat_frame(void)
 {
 	return config_enabled(CONFIG_IA32_EMULATION) &&
@@ -107,7 +135,6 @@ static __always_inline __pure bool use_fxsr(void)
 
 static inline void fx_finit(struct i387_fxsave_struct *fx)
 {
-	memset(fx, 0, xstate_size);
 	fx->cwd = 0x37f;
 	fx->mxcsr = MXCSR_DEFAULT;
 }
@@ -351,8 +378,14 @@ static inline void __thread_fpu_begin(struct task_struct *tsk)
 	__thread_set_has_fpu(tsk);
 }
 
-static inline void __drop_fpu(struct task_struct *tsk)
+static inline void drop_fpu(struct task_struct *tsk)
 {
+	/*
+	 * Forget coprocessor state..
+	 */
+	preempt_disable();
+	tsk->thread.fpu_counter = 0;
+
 	if (__thread_has_fpu(tsk)) {
 		/* Ignore delayed exceptions from user space */
 		asm volatile("1: fwait\n"
@@ -360,30 +393,29 @@ static inline void __drop_fpu(struct task_struct *tsk)
 			     _ASM_EXTABLE(1b, 2b));
 		__thread_fpu_end(tsk);
 	}
+
+	clear_stopped_child_used_math(tsk);
+	preempt_enable();
 }
 
-static inline void drop_fpu(struct task_struct *tsk)
+static inline void restore_init_xstate(void)
 {
-	/*
-	 * Forget coprocessor state..
-	 */
-	preempt_disable();
-	tsk->thread.fpu_counter = 0;
-	__drop_fpu(tsk);
-	clear_used_math();
-	preempt_enable();
+	if (use_xsave())
+		xrstor_state(init_xstate_buf, -1);
+	else
+		fxrstor_checking(&init_xstate_buf->i387);
 }
 
-static inline void drop_init_fpu(struct task_struct *tsk)
+/*
+ * Reset the FPU state in the eager case and drop it in the lazy case (later use
+ * will reinit it).
+ */
+static inline void fpu_reset_state(struct task_struct *tsk)
 {
 	if (!use_eager_fpu())
 		drop_fpu(tsk);
-	else {
-		if (use_xsave())
-			xrstor_state(init_xstate_buf, -1);
-		else
-			fxrstor_checking(&init_xstate_buf->i387);
-	}
+	else
+		restore_init_xstate();
 }
 
 /*
@@ -400,24 +432,6 @@ static inline void drop_init_fpu(struct task_struct *tsk)
  */
 typedef struct { int preload; } fpu_switch_t;
 
-/*
- * Must be run with preemption disabled: this clears the fpu_owner_task,
- * on this CPU.
- *
- * This will disable any lazy FPU state restore of the current FPU state,
- * but if the current thread owns the FPU, it will still be saved by.
- */
-static inline void __cpu_disable_lazy_restore(unsigned int cpu)
-{
-	per_cpu(fpu_owner_task, cpu) = NULL;
-}
-
-static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
-{
-	return new == this_cpu_read_stable(fpu_owner_task) &&
-		cpu == new->thread.fpu.last_cpu;
-}
-
 static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
 {
 	fpu_switch_t fpu;
@@ -426,13 +440,17 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
 	 * If the task has used the math, pre-load the FPU on xsave processors
 	 * or if the past 5 consecutive context-switches used math.
 	 */
-	fpu.preload = tsk_used_math(new) && (use_eager_fpu() ||
-					     new->thread.fpu_counter > 5);
+	fpu.preload = tsk_used_math(new) &&
+		      (use_eager_fpu() || new->thread.fpu_counter > 5);
+
 	if (__thread_has_fpu(old)) {
 		if (!__save_init_fpu(old))
-			cpu = ~0;
-		old->thread.fpu.last_cpu = cpu;
-		old->thread.fpu.has_fpu = 0;	/* But leave fpu_owner_task! */
+			task_disable_lazy_fpu_restore(old);
+		else
+			old->thread.fpu.last_cpu = cpu;
+
+		/* But leave fpu_owner_task! */
+		old->thread.fpu.has_fpu = 0;
 
 		/* Don't change CR0.TS if we just switch! */
 		if (fpu.preload) {
@@ -443,10 +461,10 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
 			stts();
 	} else {
 		old->thread.fpu_counter = 0;
-		old->thread.fpu.last_cpu = ~0;
+		task_disable_lazy_fpu_restore(old);
 		if (fpu.preload) {
 			new->thread.fpu_counter++;
-			if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))
+			if (fpu_lazy_restore(new, cpu))
 				fpu.preload = 0;
 			else
 				prefetch(new->thread.fpu.state);
@@ -466,7 +484,7 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
 {
 	if (fpu.preload) {
 		if (unlikely(restore_fpu_checking(new)))
-			drop_init_fpu(new);
+			fpu_reset_state(new);
 	}
 }
 
@@ -495,10 +513,12 @@ static inline int restore_xstate_sig(void __user *buf, int ia32_frame)
 }
 
 /*
- * Need to be preemption-safe.
+ * Needs to be preemption-safe.
  *
  * NOTE! user_fpu_begin() must be used only immediately before restoring
- * it. This function does not do any save/restore on their own.
+ * the save state. It does not do any saving/restoring on its own. In
+ * lazy FPU mode, it is just an optimization to avoid a #NM exception,
+ * the task can lose the FPU right after preempt_enable().
  */
 static inline void user_fpu_begin(void)
 {
@@ -520,24 +540,6 @@ static inline void __save_fpu(struct task_struct *tsk)
 }
 
 /*
- * These disable preemption on their own and are safe
- */
-static inline void save_init_fpu(struct task_struct *tsk)
-{
-	WARN_ON_ONCE(!__thread_has_fpu(tsk));
-
-	if (use_eager_fpu()) {
-		__save_fpu(tsk);
-		return;
-	}
-
-	preempt_disable();
-	__save_init_fpu(tsk);
-	__thread_fpu_end(tsk);
-	preempt_enable();
-}
-
-/*
  * i387 state interaction
  */
 static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 9662290e0b20..e9571ddabc4f 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -181,10 +181,9 @@ extern __visible void smp_call_function_single_interrupt(struct pt_regs *);
 extern __visible void smp_invalidate_interrupt(struct pt_regs *);
 #endif
 
-extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR
-				    - FIRST_EXTERNAL_VECTOR])(void);
+extern char irq_entries_start[];
 #ifdef CONFIG_TRACING
-#define trace_interrupt interrupt
+#define trace_irq_entries_start irq_entries_start
 #endif
 
 #define VECTOR_UNDEFINED	(-1)
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 47f29b1d1846..e7814b74caf8 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -69,7 +69,7 @@ struct insn {
 	const insn_byte_t *next_byte;
 };
 
-#define MAX_INSN_SIZE	16
+#define MAX_INSN_SIZE	15
 
 #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
 #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index 705d35708a50..7c5af123bdbd 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -136,9 +136,6 @@ extern enum intel_mid_timer_options intel_mid_timer_options;
 #define SFI_MTMR_MAX_NUM 8
 #define SFI_MRTC_MAX	8
 
-extern struct console early_hsu_console;
-extern void hsu_early_console_init(const char *);
-
 extern void intel_scu_devices_create(void);
 extern void intel_scu_devices_destroy(void);
 
diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h
index f42a04735a0a..e37d6b3ad983 100644
--- a/arch/x86/include/asm/iommu_table.h
+++ b/arch/x86/include/asm/iommu_table.h
@@ -79,11 +79,12 @@ struct iommu_table_entry {
  *  d). Similar to the 'init', except that this gets called from pci_iommu_init
  *      where we do have a memory allocator.
  *
- * The standard vs the _FINISH differs in that the _FINISH variant will
- * continue detecting other IOMMUs in the call list after the
- * the detection routine returns a positive number. The _FINISH will
- * stop the execution chain. Both will still call the 'init' and
- * 'late_init' functions if they are set.
+ * The standard IOMMU_INIT differs from the IOMMU_INIT_FINISH variant
+ * in that the former will continue detecting other IOMMUs in the call
+ * list after the detection routine returns a positive number, while the
+ * latter will stop the execution chain upon first successful detection.
+ * Both variants will still call the 'init' and 'late_init' functions if
+ * they are set.
  */
 #define IOMMU_INIT_FINISH(_detect, _depend, _init, _late_init)		\
 	__IOMMU_INIT(_detect, _depend, _init, _late_init, 1)
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 0a8b519226b8..b77f5edb03b0 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -136,10 +136,6 @@ static inline notrace unsigned long arch_local_irq_save(void)
 #define USERGS_SYSRET32				\
 	swapgs;					\
 	sysretl
-#define ENABLE_INTERRUPTS_SYSEXIT32		\
-	swapgs;					\
-	sti;					\
-	sysexit
 
 #else
 #define INTERRUPT_RETURN		iret
@@ -163,22 +159,27 @@ static inline int arch_irqs_disabled(void)
 
 	return arch_irqs_disabled_flags(flags);
 }
+#endif /* !__ASSEMBLY__ */
 
+#ifdef __ASSEMBLY__
+#ifdef CONFIG_TRACE_IRQFLAGS
+#  define TRACE_IRQS_ON		call trace_hardirqs_on_thunk;
+#  define TRACE_IRQS_OFF	call trace_hardirqs_off_thunk;
 #else
-
-#ifdef CONFIG_X86_64
-#define ARCH_LOCKDEP_SYS_EXIT		call lockdep_sys_exit_thunk
-#define ARCH_LOCKDEP_SYS_EXIT_IRQ	\
+#  define TRACE_IRQS_ON
+#  define TRACE_IRQS_OFF
+#endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#  ifdef CONFIG_X86_64
+#    define LOCKDEP_SYS_EXIT		call lockdep_sys_exit_thunk
+#    define LOCKDEP_SYS_EXIT_IRQ \
 	TRACE_IRQS_ON; \
 	sti; \
-	SAVE_REST; \
-	LOCKDEP_SYS_EXIT; \
-	RESTORE_REST; \
+	call lockdep_sys_exit_thunk; \
 	cli; \
 	TRACE_IRQS_OFF;
-
-#else
-#define ARCH_LOCKDEP_SYS_EXIT			\
+#  else
+#    define LOCKDEP_SYS_EXIT \
 	pushl %eax;				\
 	pushl %ecx;				\
 	pushl %edx;				\
@@ -186,24 +187,12 @@ static inline int arch_irqs_disabled(void)
 	popl %edx;				\
 	popl %ecx;				\
 	popl %eax;
-
-#define ARCH_LOCKDEP_SYS_EXIT_IRQ
-#endif
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-#  define TRACE_IRQS_ON		call trace_hardirqs_on_thunk;
-#  define TRACE_IRQS_OFF	call trace_hardirqs_off_thunk;
+#    define LOCKDEP_SYS_EXIT_IRQ
+#  endif
 #else
-#  define TRACE_IRQS_ON
-#  define TRACE_IRQS_OFF
-#endif
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-#  define LOCKDEP_SYS_EXIT	ARCH_LOCKDEP_SYS_EXIT
-#  define LOCKDEP_SYS_EXIT_IRQ	ARCH_LOCKDEP_SYS_EXIT_IRQ
-# else
 #  define LOCKDEP_SYS_EXIT
 #  define LOCKDEP_SYS_EXIT_IRQ
-# endif
-
+#endif
 #endif /* __ASSEMBLY__ */
+
 #endif
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 6a2cefb4395a..a4c1cf7e93f8 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_X86_JUMP_LABEL_H
 #define _ASM_X86_JUMP_LABEL_H
 
-#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
 
 #include <linux/stringify.h>
 #include <linux/types.h>
@@ -30,8 +30,6 @@ l_yes:
 	return true;
 }
 
-#endif /* __KERNEL__ */
-
 #ifdef CONFIG_X86_64
 typedef u64 jump_label_t;
 #else
@@ -44,4 +42,5 @@ struct jump_entry {
 	jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a236e39cc385..dea2e7e962e3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -81,11 +81,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
 		(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
 }
 
-#define SELECTOR_TI_MASK (1 << 2)
-#define SELECTOR_RPL_MASK 0x03
-
-#define IOPL_SHIFT 12
-
 #define KVM_PERMILLE_MMU_PAGES 20
 #define KVM_MIN_ALLOC_MMU_PAGES 64
 #define KVM_MMU_HASH_SHIFT 10
@@ -345,6 +340,7 @@ struct kvm_pmu {
 enum {
 	KVM_DEBUGREG_BP_ENABLED = 1,
 	KVM_DEBUGREG_WONT_EXIT = 2,
+	KVM_DEBUGREG_RELOAD = 4,
 };
 
 struct kvm_vcpu_arch {
@@ -431,6 +427,9 @@ struct kvm_vcpu_arch {
 
 	int cpuid_nent;
 	struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
+
+	int maxphyaddr;
+
 	/* emulate context */
 
 	struct x86_emulate_ctxt emulate_ctxt;
@@ -550,11 +549,20 @@ struct kvm_arch_memory_slot {
 	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
 };
 
+/*
+ * We use as the mode the number of bits allocated in the LDR for the
+ * logical processor ID.  It happens that these are all powers of two.
+ * This makes it is very easy to detect cases where the APICs are
+ * configured for multiple modes; in that case, we cannot use the map and
+ * hence cannot use kvm_irq_delivery_to_apic_fast either.
+ */
+#define KVM_APIC_MODE_XAPIC_CLUSTER          4
+#define KVM_APIC_MODE_XAPIC_FLAT             8
+#define KVM_APIC_MODE_X2APIC                16
+
 struct kvm_apic_map {
 	struct rcu_head rcu;
-	u8 ldr_bits;
-	/* fields bellow are used to decode ldr values in different modes */
-	u32 cid_shift, cid_mask, lid_mask, broadcast;
+	u8 mode;
 	struct kvm_lapic *phys_map[256];
 	/* first index is cluster id second is cpu id in a cluster */
 	struct kvm_lapic *logical_map[16][16];
@@ -859,6 +867,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
 				      struct kvm_memory_slot *memslot);
+void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
+					struct kvm_memory_slot *memslot);
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot);
 void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
@@ -933,6 +943,7 @@ struct x86_emulate_ctxt;
 int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
 int kvm_emulate_halt(struct kvm_vcpu *vcpu);
+int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
 
 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
@@ -1128,7 +1139,6 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
 int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
-int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
 int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index e62cf897f781..c1adf33fdd0d 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -115,7 +115,7 @@ static inline void kvm_spinlock_init(void)
 
 static inline bool kvm_para_available(void)
 {
-	return 0;
+	return false;
 }
 
 static inline unsigned int kvm_arch_para_features(void)
diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h
index a455a53d789a..2d29197bd2fb 100644
--- a/arch/x86/include/asm/livepatch.h
+++ b/arch/x86/include/asm/livepatch.h
@@ -32,8 +32,8 @@ static inline int klp_check_compiler_support(void)
 #endif
 	return 0;
 }
-extern int klp_write_module_reloc(struct module *mod, unsigned long type,
-				  unsigned long loc, unsigned long value);
+int klp_write_module_reloc(struct module *mod, unsigned long type,
+			   unsigned long loc, unsigned long value);
 
 static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
 {
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 9b3de99dc004..1f5a86d518db 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -116,6 +116,12 @@ struct mca_config {
 	u32 rip_msr;
 };
 
+struct mce_vendor_flags {
+	__u64		overflow_recov	: 1, /* cpuid_ebx(80000007) */
+			__reserved_0	: 63;
+};
+extern struct mce_vendor_flags mce_flags;
+
 extern struct mca_config mca_cfg;
 extern void mce_register_decode_chain(struct notifier_block *nb);
 extern void mce_unregister_decode_chain(struct notifier_block *nb);
@@ -128,9 +134,11 @@ extern int mce_p5_enabled;
 #ifdef CONFIG_X86_MCE
 int mcheck_init(void);
 void mcheck_cpu_init(struct cpuinfo_x86 *c);
+void mcheck_vendor_init_severity(void);
 #else
 static inline int mcheck_init(void) { return 0; }
 static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
+static inline void mcheck_vendor_init_severity(void) {}
 #endif
 
 #ifdef CONFIG_X86_ANCIENT_MCE
@@ -183,11 +191,11 @@ typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
 DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
 
 enum mcp_flags {
-	MCP_TIMESTAMP = (1 << 0),	/* log time stamp */
-	MCP_UC = (1 << 1),		/* log uncorrected errors */
-	MCP_DONTLOG = (1 << 2),		/* only clear, don't log */
+	MCP_TIMESTAMP	= BIT(0),	/* log time stamp */
+	MCP_UC		= BIT(1),	/* log uncorrected errors */
+	MCP_DONTLOG	= BIT(2),	/* only clear, don't log */
 };
-void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
+bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
 
 int mce_notify_irq(void);
 
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 201b520521ed..2fb20d6f7e23 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -75,6 +75,79 @@ static inline void __exit exit_amd_microcode(void) {}
 
 #ifdef CONFIG_MICROCODE_EARLY
 #define MAX_UCODE_COUNT 128
+
+#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
+#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u')
+#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I')
+#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l')
+#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h')
+#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i')
+#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D')
+
+#define CPUID_IS(a, b, c, ebx, ecx, edx)	\
+		(!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c))))
+
+/*
+ * In early loading microcode phase on BSP, boot_cpu_data is not set up yet.
+ * x86_vendor() gets vendor id for BSP.
+ *
+ * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify
+ * coding, we still use x86_vendor() to get vendor id for AP.
+ *
+ * x86_vendor() gets vendor information directly from CPUID.
+ */
+static inline int x86_vendor(void)
+{
+	u32 eax = 0x00000000;
+	u32 ebx, ecx = 0, edx;
+
+	native_cpuid(&eax, &ebx, &ecx, &edx);
+
+	if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx))
+		return X86_VENDOR_INTEL;
+
+	if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx))
+		return X86_VENDOR_AMD;
+
+	return X86_VENDOR_UNKNOWN;
+}
+
+static inline unsigned int __x86_family(unsigned int sig)
+{
+	unsigned int x86;
+
+	x86 = (sig >> 8) & 0xf;
+
+	if (x86 == 0xf)
+		x86 += (sig >> 20) & 0xff;
+
+	return x86;
+}
+
+static inline unsigned int x86_family(void)
+{
+	u32 eax = 0x00000001;
+	u32 ebx, ecx = 0, edx;
+
+	native_cpuid(&eax, &ebx, &ecx, &edx);
+
+	return __x86_family(eax);
+}
+
+static inline unsigned int x86_model(unsigned int sig)
+{
+	unsigned int x86, model;
+
+	x86 = __x86_family(sig);
+
+	model = (sig >> 4) & 0xf;
+
+	if (x86 == 0x6 || x86 == 0xf)
+		model += ((sig >> 16) & 0xf) << 4;
+
+	return model;
+}
+
 extern void __init load_ucode_bsp(void);
 extern void load_ucode_ap(void);
 extern int __init save_microcode_in_initrd(void);
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index dd4c20043ce7..2b9209c46ca9 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -56,12 +56,15 @@ struct extended_sigtable {
 
 #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
 
-extern int
-get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev);
+extern int get_matching_microcode(unsigned int csig, int cpf, int rev, void *mc);
 extern int microcode_sanity_check(void *mc, int print_err);
-extern int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev);
-extern int
-update_match_revision(struct microcode_header_intel *mc_header, int rev);
+extern int get_matching_sig(unsigned int csig, int cpf, int rev, void *mc);
+
+static inline int
+revision_is_newer(struct microcode_header_intel *mc_header, int rev)
+{
+	return (mc_header->rev <= rev) ? 0 : 1;
+}
 
 #ifdef CONFIG_MICROCODE_INTEL_EARLY
 extern void __init load_ucode_intel_bsp(void);
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index a1410db38a1a..653dfa7662e1 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -30,6 +30,14 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
 		     :: "a" (eax), "c" (ecx));
 }
 
+static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
+{
+	trace_hardirqs_on();
+	/* "mwait %eax, %ecx;" */
+	asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
+		     :: "a" (eax), "c" (ecx));
+}
+
 /*
  * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
  * which can obviate IPI to trigger checking of need_resched.
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 95e11f79f123..c7c712f2648b 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -40,8 +40,10 @@
 
 #ifdef CONFIG_X86_64
 #include <asm/page_64_types.h>
+#define IOREMAP_MAX_ORDER       (PUD_SHIFT)
 #else
 #include <asm/page_32_types.h>
+#define IOREMAP_MAX_ORDER       (PMD_SHIFT)
 #endif	/* CONFIG_X86_64 */
 
 #ifndef __ASSEMBLY__
@@ -51,8 +53,6 @@ extern int devmem_is_allowed(unsigned long pagenr);
 extern unsigned long max_low_pfn_mapped;
 extern unsigned long max_pfn_mapped;
 
-extern bool kaslr_enabled;
-
 static inline phys_addr_t get_max_mapped(void)
 {
 	return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 965c47d254aa..8957810ad7d1 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -545,7 +545,7 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 		PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val);
 }
 
-#if PAGETABLE_LEVELS >= 3
+#if CONFIG_PGTABLE_LEVELS >= 3
 static inline pmd_t __pmd(pmdval_t val)
 {
 	pmdval_t ret;
@@ -585,7 +585,7 @@ static inline void set_pud(pud_t *pudp, pud_t pud)
 		PVOP_VCALL2(pv_mmu_ops.set_pud, pudp,
 			    val);
 }
-#if PAGETABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS == 4
 static inline pud_t __pud(pudval_t val)
 {
 	pudval_t ret;
@@ -636,9 +636,9 @@ static inline void pud_clear(pud_t *pudp)
 	set_pud(pudp, __pud(0));
 }
 
-#endif	/* PAGETABLE_LEVELS == 4 */
+#endif	/* CONFIG_PGTABLE_LEVELS == 4 */
 
-#endif	/* PAGETABLE_LEVELS >= 3 */
+#endif	/* CONFIG_PGTABLE_LEVELS >= 3 */
 
 #ifdef CONFIG_X86_PAE
 /* Special-case pte-setting operations for PAE, which can't update a
@@ -976,11 +976,6 @@ extern void default_banner(void);
 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),	\
 		  CLBR_NONE,						\
 		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
-
-#define ENABLE_INTERRUPTS_SYSEXIT32					\
-	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit),	\
-		  CLBR_NONE,						\
-		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
 #endif	/* CONFIG_X86_32 */
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 7549b8b369e4..f7b0b5c112f2 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -294,7 +294,7 @@ struct pv_mmu_ops {
 	struct paravirt_callee_save pgd_val;
 	struct paravirt_callee_save make_pgd;
 
-#if PAGETABLE_LEVELS >= 3
+#if CONFIG_PGTABLE_LEVELS >= 3
 #ifdef CONFIG_X86_PAE
 	void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
 	void (*pte_clear)(struct mm_struct *mm, unsigned long addr,
@@ -308,13 +308,13 @@ struct pv_mmu_ops {
 	struct paravirt_callee_save pmd_val;
 	struct paravirt_callee_save make_pmd;
 
-#if PAGETABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS == 4
 	struct paravirt_callee_save pud_val;
 	struct paravirt_callee_save make_pud;
 
 	void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
-#endif	/* PAGETABLE_LEVELS == 4 */
-#endif	/* PAGETABLE_LEVELS >= 3 */
+#endif	/* CONFIG_PGTABLE_LEVELS == 4 */
+#endif	/* CONFIG_PGTABLE_LEVELS >= 3 */
 
 	struct pv_lazy_ops lazy_mode;
 
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index fa1195dae425..164e3f8d3c3d 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -93,6 +93,8 @@ extern raw_spinlock_t pci_config_lock;
 extern int (*pcibios_enable_irq)(struct pci_dev *dev);
 extern void (*pcibios_disable_irq)(struct pci_dev *dev);
 
+extern bool mp_should_keep_irq(struct device *dev);
+
 struct pci_raw_ops {
 	int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
 						int reg, int len, u32 *val);
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index c4412e972bbd..bf7f8b55b0f9 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -77,7 +77,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
-#if PAGETABLE_LEVELS > 2
+#if CONFIG_PGTABLE_LEVELS > 2
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
 	struct page *page;
@@ -116,7 +116,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 }
 #endif	/* CONFIG_X86_PAE */
 
-#if PAGETABLE_LEVELS > 3
+#if CONFIG_PGTABLE_LEVELS > 3
 static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
 {
 	paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
@@ -142,7 +142,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
 	___pud_free_tlb(tlb, pud);
 }
 
-#endif	/* PAGETABLE_LEVELS > 3 */
-#endif	/* PAGETABLE_LEVELS > 2 */
+#endif	/* CONFIG_PGTABLE_LEVELS > 3 */
+#endif	/* CONFIG_PGTABLE_LEVELS > 2 */
 
 #endif /* _ASM_X86_PGALLOC_H */
diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h
index daacc23e3fb9..392576433e77 100644
--- a/arch/x86/include/asm/pgtable-2level_types.h
+++ b/arch/x86/include/asm/pgtable-2level_types.h
@@ -17,7 +17,6 @@ typedef union {
 #endif	/* !__ASSEMBLY__ */
 
 #define SHARED_KERNEL_PMD	0
-#define PAGETABLE_LEVELS	2
 
 /*
  * traditional i386 two-level paging structure:
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index 1bd5876c8649..bcc89625ebe5 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -24,8 +24,6 @@ typedef union {
 #define SHARED_KERNEL_PMD	1
 #endif
 
-#define PAGETABLE_LEVELS	3
-
 /*
  * PGDIR_SHIFT determines what a top-level page table entry can map
  */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index a0c35bf6cb92..fe57e7a98839 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -551,7 +551,7 @@ static inline unsigned long pages_to_mb(unsigned long npg)
 	return npg >> (20 - PAGE_SHIFT);
 }
 
-#if PAGETABLE_LEVELS > 2
+#if CONFIG_PGTABLE_LEVELS > 2
 static inline int pud_none(pud_t pud)
 {
 	return native_pud_val(pud) == 0;
@@ -594,9 +594,9 @@ static inline int pud_large(pud_t pud)
 {
 	return 0;
 }
-#endif	/* PAGETABLE_LEVELS > 2 */
+#endif	/* CONFIG_PGTABLE_LEVELS > 2 */
 
-#if PAGETABLE_LEVELS > 3
+#if CONFIG_PGTABLE_LEVELS > 3
 static inline int pgd_present(pgd_t pgd)
 {
 	return pgd_flags(pgd) & _PAGE_PRESENT;
@@ -633,7 +633,7 @@ static inline int pgd_none(pgd_t pgd)
 {
 	return !native_pgd_val(pgd);
 }
-#endif	/* PAGETABLE_LEVELS > 3 */
+#endif	/* CONFIG_PGTABLE_LEVELS > 3 */
 
 #endif	/* __ASSEMBLY__ */
 
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 602b6028c5b6..e6844dfb4471 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -20,7 +20,6 @@ typedef struct { pteval_t pte; } pte_t;
 #endif	/* !__ASSEMBLY__ */
 
 #define SHARED_KERNEL_PMD	0
-#define PAGETABLE_LEVELS	4
 
 /*
  * PGDIR_SHIFT determines what a top-level page table entry can map
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 8c7c10802e9c..78f0c8cbe316 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -234,7 +234,7 @@ static inline pgdval_t pgd_flags(pgd_t pgd)
 	return native_pgd_val(pgd) & PTE_FLAGS_MASK;
 }
 
-#if PAGETABLE_LEVELS > 3
+#if CONFIG_PGTABLE_LEVELS > 3
 typedef struct { pudval_t pud; } pud_t;
 
 static inline pud_t native_make_pud(pmdval_t val)
@@ -255,7 +255,7 @@ static inline pudval_t native_pud_val(pud_t pud)
 }
 #endif
 
-#if PAGETABLE_LEVELS > 2
+#if CONFIG_PGTABLE_LEVELS > 2
 typedef struct { pmdval_t pmd; } pmd_t;
 
 static inline pmd_t native_make_pmd(pmdval_t val)
diff --git a/arch/x86/include/asm/resume-trace.h b/arch/x86/include/asm/pm-trace.h
index 3ff1c2cb1da5..7b7ac42c3661 100644
--- a/arch/x86/include/asm/resume-trace.h
+++ b/arch/x86/include/asm/pm-trace.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_RESUME_TRACE_H
-#define _ASM_X86_RESUME_TRACE_H
+#ifndef _ASM_X86_PM_TRACE_H
+#define _ASM_X86_PM_TRACE_H
 
 #include <asm/asm.h>
 
@@ -14,8 +14,10 @@ do {								\
 			     ".previous"			\
 			     :"=r" (tracedata)			\
 			     : "i" (__LINE__), "i" (__FILE__));	\
-		generate_resume_trace(tracedata, user);		\
+		generate_pm_trace(tracedata, user);		\
 	}							\
 } while (0)
 
-#endif /* _ASM_X86_RESUME_TRACE_H */
+#define TRACE_SUSPEND(user)	TRACE_RESUME(user)
+
+#endif /* _ASM_X86_PM_TRACE_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index ec1c93588cef..23ba6765b718 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -109,6 +109,9 @@ struct cpuinfo_x86 {
 	/* in KB - valid for CPUS which support this call: */
 	int			x86_cache_size;
 	int			x86_cache_alignment;	/* In bytes */
+	/* Cache QoS architectural values: */
+	int			x86_cache_max_rmid;	/* max index */
+	int			x86_cache_occ_scale;	/* scale to bytes */
 	int			x86_power;
 	unsigned long		loops_per_jiffy;
 	/* cpuid returned max cores value: */
@@ -210,8 +213,23 @@ struct x86_hw_tss {
 	unsigned long		sp0;
 	unsigned short		ss0, __ss0h;
 	unsigned long		sp1;
-	/* ss1 caches MSR_IA32_SYSENTER_CS: */
-	unsigned short		ss1, __ss1h;
+
+	/*
+	 * We don't use ring 1, so ss1 is a convenient scratch space in
+	 * the same cacheline as sp0.  We use ss1 to cache the value in
+	 * MSR_IA32_SYSENTER_CS.  When we context switch
+	 * MSR_IA32_SYSENTER_CS, we first check if the new value being
+	 * written matches ss1, and, if it's not, then we wrmsr the new
+	 * value and update ss1.
+	 *
+	 * The only reason we context switch MSR_IA32_SYSENTER_CS is
+	 * that we set it to zero in vm86 tasks to avoid corrupting the
+	 * stack if we were to go through the sysenter path from vm86
+	 * mode.
+	 */
+	unsigned short		ss1;	/* MSR_IA32_SYSENTER_CS */
+
+	unsigned short		__ss1h;
 	unsigned long		sp2;
 	unsigned short		ss2, __ss2h;
 	unsigned long		__cr3;
@@ -276,13 +294,17 @@ struct tss_struct {
 	unsigned long		io_bitmap[IO_BITMAP_LONGS + 1];
 
 	/*
-	 * .. and then another 0x100 bytes for the emergency kernel stack:
+	 * Space for the temporary SYSENTER stack:
 	 */
-	unsigned long		stack[64];
+	unsigned long		SYSENTER_stack[64];
 
 } ____cacheline_aligned;
 
-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss);
+DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
+
+#ifdef CONFIG_X86_32
+DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+#endif
 
 /*
  * Save the original ist values for checking stack pointers during debugging
@@ -474,7 +496,6 @@ struct thread_struct {
 #ifdef CONFIG_X86_32
 	unsigned long		sysenter_cs;
 #else
-	unsigned long		usersp;	/* Copy from PDA */
 	unsigned short		es;
 	unsigned short		ds;
 	unsigned short		fsindex;
@@ -564,6 +585,16 @@ static inline void native_swapgs(void)
 #endif
 }
 
+static inline unsigned long current_top_of_stack(void)
+{
+#ifdef CONFIG_X86_64
+	return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
+#else
+	/* sp0 on x86_32 is special in and around vm86 mode. */
+	return this_cpu_read_stable(cpu_current_top_of_stack);
+#endif
+}
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
@@ -761,10 +792,10 @@ extern char			ignore_fpu_irq;
 #define ARCH_HAS_SPINLOCK_PREFETCH
 
 #ifdef CONFIG_X86_32
-# define BASE_PREFETCH		ASM_NOP4
+# define BASE_PREFETCH		""
 # define ARCH_HAS_PREFETCH
 #else
-# define BASE_PREFETCH		"prefetcht0 (%1)"
+# define BASE_PREFETCH		"prefetcht0 %P1"
 #endif
 
 /*
@@ -775,10 +806,9 @@ extern char			ignore_fpu_irq;
  */
 static inline void prefetch(const void *x)
 {
-	alternative_input(BASE_PREFETCH,
-			  "prefetchnta (%1)",
+	alternative_input(BASE_PREFETCH, "prefetchnta %P1",
 			  X86_FEATURE_XMM,
-			  "r" (x));
+			  "m" (*(const char *)x));
 }
 
 /*
@@ -788,10 +818,9 @@ static inline void prefetch(const void *x)
  */
 static inline void prefetchw(const void *x)
 {
-	alternative_input(BASE_PREFETCH,
-			  "prefetchw (%1)",
-			  X86_FEATURE_3DNOW,
-			  "r" (x));
+	alternative_input(BASE_PREFETCH, "prefetchw %P1",
+			  X86_FEATURE_3DNOWPREFETCH,
+			  "m" (*(const char *)x));
 }
 
 static inline void spin_lock_prefetch(const void *x)
@@ -799,6 +828,9 @@ static inline void spin_lock_prefetch(const void *x)
 	prefetchw(x);
 }
 
+#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
+			   TOP_OF_KERNEL_STACK_PADDING)
+
 #ifdef CONFIG_X86_32
 /*
  * User space process size: 3GB (default).
@@ -809,39 +841,16 @@ static inline void spin_lock_prefetch(const void *x)
 #define STACK_TOP_MAX		STACK_TOP
 
 #define INIT_THREAD  {							  \
-	.sp0			= sizeof(init_stack) + (long)&init_stack, \
+	.sp0			= TOP_OF_INIT_STACK,			  \
 	.vm86_info		= NULL,					  \
 	.sysenter_cs		= __KERNEL_CS,				  \
 	.io_bitmap_ptr		= NULL,					  \
 }
 
-/*
- * Note that the .io_bitmap member must be extra-big. This is because
- * the CPU will access an additional byte beyond the end of the IO
- * permission bitmap. The extra byte must be all 1 bits, and must
- * be within the limit.
- */
-#define INIT_TSS  {							  \
-	.x86_tss = {							  \
-		.sp0		= sizeof(init_stack) + (long)&init_stack, \
-		.ss0		= __KERNEL_DS,				  \
-		.ss1		= __KERNEL_CS,				  \
-		.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,		  \
-	 },								  \
-	.io_bitmap		= { [0 ... IO_BITMAP_LONGS] = ~0 },	  \
-}
-
 extern unsigned long thread_saved_pc(struct task_struct *tsk);
 
-#define THREAD_SIZE_LONGS      (THREAD_SIZE/sizeof(unsigned long))
-#define KSTK_TOP(info)                                                 \
-({                                                                     \
-       unsigned long *__ptr = (unsigned long *)(info);                 \
-       (unsigned long)(&__ptr[THREAD_SIZE_LONGS]);                     \
-})
-
 /*
- * The below -8 is to reserve 8 bytes on top of the ring0 stack.
+ * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
  * This is necessary to guarantee that the entire "struct pt_regs"
  * is accessible even if the CPU haven't stored the SS/ESP registers
  * on the stack (interrupt gate does not save these registers
@@ -850,11 +859,11 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
  * "struct pt_regs" is possible, but they may contain the
  * completely wrong values.
  */
-#define task_pt_regs(task)                                             \
-({                                                                     \
-       struct pt_regs *__regs__;                                       \
-       __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \
-       __regs__ - 1;                                                   \
+#define task_pt_regs(task) \
+({									\
+	unsigned long __ptr = (unsigned long)task_stack_page(task);	\
+	__ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;		\
+	((struct pt_regs *)__ptr) - 1;					\
 })
 
 #define KSTK_ESP(task)		(task_pt_regs(task)->sp)
@@ -886,11 +895,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
 #define STACK_TOP_MAX		TASK_SIZE_MAX
 
 #define INIT_THREAD  { \
-	.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
-}
-
-#define INIT_TSS  { \
-	.x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
+	.sp0 = TOP_OF_INIT_STACK \
 }
 
 /*
@@ -902,11 +907,6 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
 #define task_pt_regs(tsk)	((struct pt_regs *)(tsk)->thread.sp0 - 1)
 extern unsigned long KSTK_ESP(struct task_struct *task);
 
-/*
- * User space RSP while inside the SYSCALL fast path
- */
-DECLARE_PER_CPU(unsigned long, old_rsp);
-
 #endif /* CONFIG_X86_64 */
 
 extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 86fc2bb82287..19507ffa5d28 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -31,13 +31,17 @@ struct pt_regs {
 #else /* __i386__ */
 
 struct pt_regs {
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
 	unsigned long r15;
 	unsigned long r14;
 	unsigned long r13;
 	unsigned long r12;
 	unsigned long bp;
 	unsigned long bx;
-/* arguments: non interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
 	unsigned long r11;
 	unsigned long r10;
 	unsigned long r9;
@@ -47,9 +51,12 @@ struct pt_regs {
 	unsigned long dx;
 	unsigned long si;
 	unsigned long di;
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
 	unsigned long orig_ax;
-/* end of arguments */
-/* cpu exception frame or undefined */
+/* Return frame for iretq */
 	unsigned long ip;
 	unsigned long cs;
 	unsigned long flags;
@@ -89,11 +96,13 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
 }
 
 /*
- * user_mode_vm(regs) determines whether a register set came from user mode.
- * This is true if V8086 mode was enabled OR if the register set was from
- * protected mode with RPL-3 CS value.  This tricky test checks that with
- * one comparison.  Many places in the kernel can bypass this full check
- * if they have already ruled out V8086 mode, so user_mode(regs) can be used.
+ * user_mode(regs) determines whether a register set came from user
+ * mode.  On x86_32, this is true if V8086 mode was enabled OR if the
+ * register set was from protected mode with RPL-3 CS value.  This
+ * tricky test checks that with one comparison.
+ *
+ * On x86_64, vm86 mode is mercifully nonexistent, and we don't need
+ * the extra check.
  */
 static inline int user_mode(struct pt_regs *regs)
 {
@@ -104,16 +113,6 @@ static inline int user_mode(struct pt_regs *regs)
 #endif
 }
 
-static inline int user_mode_vm(struct pt_regs *regs)
-{
-#ifdef CONFIG_X86_32
-	return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >=
-		USER_RPL;
-#else
-	return user_mode(regs);
-#endif
-}
-
 static inline int v8086_mode(struct pt_regs *regs)
 {
 #ifdef CONFIG_X86_32
@@ -138,12 +137,8 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
 #endif
 }
 
-#define current_user_stack_pointer()	this_cpu_read(old_rsp)
-/* ia32 vs. x32 difference */
-#define compat_user_stack_pointer()	\
-	(test_thread_flag(TIF_IA32) 	\
-	 ? current_pt_regs()->sp 	\
-	 : this_cpu_read(old_rsp))
+#define current_user_stack_pointer()	current_pt_regs()->sp
+#define compat_user_stack_pointer()	current_pt_regs()->sp
 #endif
 
 #ifdef CONFIG_X86_32
@@ -248,7 +243,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
  */
 #define arch_ptrace_stop_needed(code, info)				\
 ({									\
-	set_thread_flag(TIF_NOTIFY_RESUME);				\
+	force_iret();							\
 	false;								\
 })
 
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index d6b078e9fa28..25b1cc07d496 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -95,6 +95,7 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
 
 struct pvclock_vsyscall_time_info {
 	struct pvclock_vcpu_time_info pvti;
+	u32 migrate_count;
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
 #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
diff --git a/arch/x86/include/asm/seccomp.h b/arch/x86/include/asm/seccomp.h
index 0f3d7f099224..0c8c7c8861b4 100644
--- a/arch/x86/include/asm/seccomp.h
+++ b/arch/x86/include/asm/seccomp.h
@@ -1,5 +1,20 @@
+#ifndef _ASM_X86_SECCOMP_H
+#define _ASM_X86_SECCOMP_H
+
+#include <asm/unistd.h>
+
 #ifdef CONFIG_X86_32
-# include <asm/seccomp_32.h>
-#else
-# include <asm/seccomp_64.h>
+#define __NR_seccomp_sigreturn		__NR_sigreturn
 #endif
+
+#ifdef CONFIG_COMPAT
+#include <asm/ia32_unistd.h>
+#define __NR_seccomp_read_32		__NR_ia32_read
+#define __NR_seccomp_write_32		__NR_ia32_write
+#define __NR_seccomp_exit_32		__NR_ia32_exit
+#define __NR_seccomp_sigreturn_32	__NR_ia32_sigreturn
+#endif
+
+#include <asm-generic/seccomp.h>
+
+#endif /* _ASM_X86_SECCOMP_H */
diff --git a/arch/x86/include/asm/seccomp_32.h b/arch/x86/include/asm/seccomp_32.h
deleted file mode 100644
index b811d6f5780c..000000000000
--- a/arch/x86/include/asm/seccomp_32.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _ASM_X86_SECCOMP_32_H
-#define _ASM_X86_SECCOMP_32_H
-
-#include <linux/unistd.h>
-
-#define __NR_seccomp_read __NR_read
-#define __NR_seccomp_write __NR_write
-#define __NR_seccomp_exit __NR_exit
-#define __NR_seccomp_sigreturn __NR_sigreturn
-
-#endif /* _ASM_X86_SECCOMP_32_H */
diff --git a/arch/x86/include/asm/seccomp_64.h b/arch/x86/include/asm/seccomp_64.h
deleted file mode 100644
index 84ec1bd161a5..000000000000
--- a/arch/x86/include/asm/seccomp_64.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef _ASM_X86_SECCOMP_64_H
-#define _ASM_X86_SECCOMP_64_H
-
-#include <linux/unistd.h>
-#include <asm/ia32_unistd.h>
-
-#define __NR_seccomp_read __NR_read
-#define __NR_seccomp_write __NR_write
-#define __NR_seccomp_exit __NR_exit
-#define __NR_seccomp_sigreturn __NR_rt_sigreturn
-
-#define __NR_seccomp_read_32 __NR_ia32_read
-#define __NR_seccomp_write_32 __NR_ia32_write
-#define __NR_seccomp_exit_32 __NR_ia32_exit
-#define __NR_seccomp_sigreturn_32 __NR_ia32_sigreturn
-
-#endif /* _ASM_X86_SECCOMP_64_H */
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index db257a58571f..5a9856eb12ba 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -3,8 +3,10 @@
 
 #include <linux/const.h>
 
-/* Constructor for a conventional segment GDT (or LDT) entry */
-/* This is a macro so it can be used in initializers */
+/*
+ * Constructor for a conventional segment GDT (or LDT) entry.
+ * This is a macro so it can be used in initializers.
+ */
 #define GDT_ENTRY(flags, base, limit)			\
 	((((base)  & _AC(0xff000000,ULL)) << (56-24)) |	\
 	 (((flags) & _AC(0x0000f0ff,ULL)) << 40) |	\
@@ -12,198 +14,228 @@
 	 (((base)  & _AC(0x00ffffff,ULL)) << 16) |	\
 	 (((limit) & _AC(0x0000ffff,ULL))))
 
-/* Simple and small GDT entries for booting only */
+/* Simple and small GDT entries for booting only: */
 
 #define GDT_ENTRY_BOOT_CS	2
-#define __BOOT_CS		(GDT_ENTRY_BOOT_CS * 8)
+#define GDT_ENTRY_BOOT_DS	3
+#define GDT_ENTRY_BOOT_TSS	4
+#define __BOOT_CS		(GDT_ENTRY_BOOT_CS*8)
+#define __BOOT_DS		(GDT_ENTRY_BOOT_DS*8)
+#define __BOOT_TSS		(GDT_ENTRY_BOOT_TSS*8)
+
+/*
+ * Bottom two bits of selector give the ring
+ * privilege level
+ */
+#define SEGMENT_RPL_MASK	0x3
 
-#define GDT_ENTRY_BOOT_DS	(GDT_ENTRY_BOOT_CS + 1)
-#define __BOOT_DS		(GDT_ENTRY_BOOT_DS * 8)
+/* User mode is privilege level 3: */
+#define USER_RPL		0x3
 
-#define GDT_ENTRY_BOOT_TSS	(GDT_ENTRY_BOOT_CS + 2)
-#define __BOOT_TSS		(GDT_ENTRY_BOOT_TSS * 8)
+/* Bit 2 is Table Indicator (TI): selects between LDT or GDT */
+#define SEGMENT_TI_MASK		0x4
+/* LDT segment has TI set ... */
+#define SEGMENT_LDT		0x4
+/* ... GDT has it cleared */
+#define SEGMENT_GDT		0x0
 
-#define SEGMENT_RPL_MASK	0x3 /*
-				     * Bottom two bits of selector give the ring
-				     * privilege level
-				     */
-#define SEGMENT_TI_MASK		0x4 /* Bit 2 is table indicator (LDT/GDT) */
-#define USER_RPL		0x3 /* User mode is privilege level 3 */
-#define SEGMENT_LDT		0x4 /* LDT segment has TI set... */
-#define SEGMENT_GDT		0x0 /* ... GDT has it cleared */
+#define GDT_ENTRY_INVALID_SEG	0
 
 #ifdef CONFIG_X86_32
 /*
  * The layout of the per-CPU GDT under Linux:
  *
- *   0 - null
+ *   0 - null								<=== cacheline #1
  *   1 - reserved
  *   2 - reserved
  *   3 - reserved
  *
- *   4 - unused			<==== new cacheline
+ *   4 - unused								<=== cacheline #2
  *   5 - unused
  *
  *  ------- start of TLS (Thread-Local Storage) segments:
  *
  *   6 - TLS segment #1			[ glibc's TLS segment ]
  *   7 - TLS segment #2			[ Wine's %fs Win32 segment ]
- *   8 - TLS segment #3
+ *   8 - TLS segment #3							<=== cacheline #3
  *   9 - reserved
  *  10 - reserved
  *  11 - reserved
  *
  *  ------- start of kernel segments:
  *
- *  12 - kernel code segment		<==== new cacheline
+ *  12 - kernel code segment						<=== cacheline #4
  *  13 - kernel data segment
  *  14 - default user CS
  *  15 - default user DS
- *  16 - TSS
+ *  16 - TSS								<=== cacheline #5
  *  17 - LDT
  *  18 - PNPBIOS support (16->32 gate)
  *  19 - PNPBIOS support
- *  20 - PNPBIOS support
+ *  20 - PNPBIOS support						<=== cacheline #6
  *  21 - PNPBIOS support
  *  22 - PNPBIOS support
  *  23 - APM BIOS support
- *  24 - APM BIOS support
+ *  24 - APM BIOS support						<=== cacheline #7
  *  25 - APM BIOS support
  *
  *  26 - ESPFIX small SS
  *  27 - per-cpu			[ offset to per-cpu data area ]
- *  28 - stack_canary-20		[ for stack protector ]
+ *  28 - stack_canary-20		[ for stack protector ]		<=== cacheline #8
  *  29 - unused
  *  30 - unused
  *  31 - TSS for double fault handler
  */
-#define GDT_ENTRY_TLS_MIN	6
-#define GDT_ENTRY_TLS_MAX 	(GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
+#define GDT_ENTRY_TLS_MIN		6
+#define GDT_ENTRY_TLS_MAX 		(GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
 
+#define GDT_ENTRY_KERNEL_CS		12
+#define GDT_ENTRY_KERNEL_DS		13
 #define GDT_ENTRY_DEFAULT_USER_CS	14
-
 #define GDT_ENTRY_DEFAULT_USER_DS	15
+#define GDT_ENTRY_TSS			16
+#define GDT_ENTRY_LDT			17
+#define GDT_ENTRY_PNPBIOS_CS32		18
+#define GDT_ENTRY_PNPBIOS_CS16		19
+#define GDT_ENTRY_PNPBIOS_DS		20
+#define GDT_ENTRY_PNPBIOS_TS1		21
+#define GDT_ENTRY_PNPBIOS_TS2		22
+#define GDT_ENTRY_APMBIOS_BASE		23
+
+#define GDT_ENTRY_ESPFIX_SS		26
+#define GDT_ENTRY_PERCPU		27
+#define GDT_ENTRY_STACK_CANARY		28
+
+#define GDT_ENTRY_DOUBLEFAULT_TSS	31
 
-#define GDT_ENTRY_KERNEL_BASE		(12)
+/*
+ * Number of entries in the GDT table:
+ */
+#define GDT_ENTRIES			32
 
-#define GDT_ENTRY_KERNEL_CS		(GDT_ENTRY_KERNEL_BASE+0)
+/*
+ * Segment selector values corresponding to the above entries:
+ */
 
-#define GDT_ENTRY_KERNEL_DS		(GDT_ENTRY_KERNEL_BASE+1)
+#define __KERNEL_CS			(GDT_ENTRY_KERNEL_CS*8)
+#define __KERNEL_DS			(GDT_ENTRY_KERNEL_DS*8)
+#define __USER_DS			(GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
+#define __USER_CS			(GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
+#define __ESPFIX_SS			(GDT_ENTRY_ESPFIX_SS*8)
 
-#define GDT_ENTRY_TSS			(GDT_ENTRY_KERNEL_BASE+4)
-#define GDT_ENTRY_LDT			(GDT_ENTRY_KERNEL_BASE+5)
+/* segment for calling fn: */
+#define PNP_CS32			(GDT_ENTRY_PNPBIOS_CS32*8)
+/* code segment for BIOS: */
+#define PNP_CS16			(GDT_ENTRY_PNPBIOS_CS16*8)
 
-#define GDT_ENTRY_PNPBIOS_BASE		(GDT_ENTRY_KERNEL_BASE+6)
-#define GDT_ENTRY_APMBIOS_BASE		(GDT_ENTRY_KERNEL_BASE+11)
+/* "Is this PNP code selector (PNP_CS32 or PNP_CS16)?" */
+#define SEGMENT_IS_PNP_CODE(x)		(((x) & 0xf4) == PNP_CS32)
 
-#define GDT_ENTRY_ESPFIX_SS		(GDT_ENTRY_KERNEL_BASE+14)
-#define __ESPFIX_SS			(GDT_ENTRY_ESPFIX_SS*8)
+/* data segment for BIOS: */
+#define PNP_DS				(GDT_ENTRY_PNPBIOS_DS*8)
+/* transfer data segment: */
+#define PNP_TS1				(GDT_ENTRY_PNPBIOS_TS1*8)
+/* another data segment: */
+#define PNP_TS2				(GDT_ENTRY_PNPBIOS_TS2*8)
 
-#define GDT_ENTRY_PERCPU		(GDT_ENTRY_KERNEL_BASE+15)
 #ifdef CONFIG_SMP
-#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8)
+# define __KERNEL_PERCPU		(GDT_ENTRY_PERCPU*8)
 #else
-#define __KERNEL_PERCPU 0
+# define __KERNEL_PERCPU		0
 #endif
 
-#define GDT_ENTRY_STACK_CANARY		(GDT_ENTRY_KERNEL_BASE+16)
 #ifdef CONFIG_CC_STACKPROTECTOR
-#define __KERNEL_STACK_CANARY		(GDT_ENTRY_STACK_CANARY*8)
+# define __KERNEL_STACK_CANARY		(GDT_ENTRY_STACK_CANARY*8)
 #else
-#define __KERNEL_STACK_CANARY		0
+# define __KERNEL_STACK_CANARY		0
 #endif
 
-#define GDT_ENTRY_DOUBLEFAULT_TSS	31
-
-/*
- * The GDT has 32 entries
- */
-#define GDT_ENTRIES 32
+#else /* 64-bit: */
 
-/* The PnP BIOS entries in the GDT */
-#define GDT_ENTRY_PNPBIOS_CS32		(GDT_ENTRY_PNPBIOS_BASE + 0)
-#define GDT_ENTRY_PNPBIOS_CS16		(GDT_ENTRY_PNPBIOS_BASE + 1)
-#define GDT_ENTRY_PNPBIOS_DS		(GDT_ENTRY_PNPBIOS_BASE + 2)
-#define GDT_ENTRY_PNPBIOS_TS1		(GDT_ENTRY_PNPBIOS_BASE + 3)
-#define GDT_ENTRY_PNPBIOS_TS2		(GDT_ENTRY_PNPBIOS_BASE + 4)
-
-/* The PnP BIOS selectors */
-#define PNP_CS32   (GDT_ENTRY_PNPBIOS_CS32 * 8)	/* segment for calling fn */
-#define PNP_CS16   (GDT_ENTRY_PNPBIOS_CS16 * 8)	/* code segment for BIOS */
-#define PNP_DS     (GDT_ENTRY_PNPBIOS_DS * 8)	/* data segment for BIOS */
-#define PNP_TS1    (GDT_ENTRY_PNPBIOS_TS1 * 8)	/* transfer data segment */
-#define PNP_TS2    (GDT_ENTRY_PNPBIOS_TS2 * 8)	/* another data segment */
+#include <asm/cache.h>
 
+#define GDT_ENTRY_KERNEL32_CS		1
+#define GDT_ENTRY_KERNEL_CS		2
+#define GDT_ENTRY_KERNEL_DS		3
 
 /*
- * Matching rules for certain types of segments.
+ * We cannot use the same code segment descriptor for user and kernel mode,
+ * not even in long flat mode, because of different DPL.
+ *
+ * GDT layout to get 64-bit SYSCALL/SYSRET support right. SYSRET hardcodes
+ * selectors:
+ *
+ *   if returning to 32-bit userspace: cs = STAR.SYSRET_CS,
+ *   if returning to 64-bit userspace: cs = STAR.SYSRET_CS+16,
+ *
+ * ss = STAR.SYSRET_CS+8 (in either case)
+ *
+ * thus USER_DS should be between 32-bit and 64-bit code selectors:
  */
+#define GDT_ENTRY_DEFAULT_USER32_CS	4
+#define GDT_ENTRY_DEFAULT_USER_DS	5
+#define GDT_ENTRY_DEFAULT_USER_CS	6
 
-/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
-#define SEGMENT_IS_PNP_CODE(x)   (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
-
+/* Needs two entries */
+#define GDT_ENTRY_TSS			8
+/* Needs two entries */
+#define GDT_ENTRY_LDT			10
 
-#else
-#include <asm/cache.h>
-
-#define GDT_ENTRY_KERNEL32_CS 1
-#define GDT_ENTRY_KERNEL_CS 2
-#define GDT_ENTRY_KERNEL_DS 3
+#define GDT_ENTRY_TLS_MIN		12
+#define GDT_ENTRY_TLS_MAX		14
 
-#define __KERNEL32_CS   (GDT_ENTRY_KERNEL32_CS * 8)
+/* Abused to load per CPU data from limit */
+#define GDT_ENTRY_PER_CPU		15
 
 /*
- * we cannot use the same code segment descriptor for user and kernel
- * -- not even in the long flat mode, because of different DPL /kkeil
- * The segment offset needs to contain a RPL. Grr. -AK
- * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets)
+ * Number of entries in the GDT table:
  */
-#define GDT_ENTRY_DEFAULT_USER32_CS 4
-#define GDT_ENTRY_DEFAULT_USER_DS 5
-#define GDT_ENTRY_DEFAULT_USER_CS 6
-#define __USER32_CS   (GDT_ENTRY_DEFAULT_USER32_CS*8+3)
-#define __USER32_DS	__USER_DS
-
-#define GDT_ENTRY_TSS 8	/* needs two entries */
-#define GDT_ENTRY_LDT 10 /* needs two entries */
-#define GDT_ENTRY_TLS_MIN 12
-#define GDT_ENTRY_TLS_MAX 14
-
-#define GDT_ENTRY_PER_CPU 15	/* Abused to load per CPU data from limit */
-#define __PER_CPU_SEG	(GDT_ENTRY_PER_CPU * 8 + 3)
+#define GDT_ENTRIES			16
 
-/* TLS indexes for 64bit - hardcoded in arch_prctl */
-#define FS_TLS 0
-#define GS_TLS 1
-
-#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
-#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
-
-#define GDT_ENTRIES 16
+/*
+ * Segment selector values corresponding to the above entries:
+ *
+ * Note, selectors also need to have a correct RPL,
+ * expressed with the +3 value for user-space selectors:
+ */
+#define __KERNEL32_CS			(GDT_ENTRY_KERNEL32_CS*8)
+#define __KERNEL_CS			(GDT_ENTRY_KERNEL_CS*8)
+#define __KERNEL_DS			(GDT_ENTRY_KERNEL_DS*8)
+#define __USER32_CS			(GDT_ENTRY_DEFAULT_USER32_CS*8 + 3)
+#define __USER_DS			(GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
+#define __USER32_DS			__USER_DS
+#define __USER_CS			(GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
+#define __PER_CPU_SEG			(GDT_ENTRY_PER_CPU*8 + 3)
+
+/* TLS indexes for 64-bit - hardcoded in arch_prctl(): */
+#define FS_TLS				0
+#define GS_TLS				1
+
+#define GS_TLS_SEL			((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
+#define FS_TLS_SEL			((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
 
 #endif
 
-#define __KERNEL_CS	(GDT_ENTRY_KERNEL_CS*8)
-#define __KERNEL_DS	(GDT_ENTRY_KERNEL_DS*8)
-#define __USER_DS	(GDT_ENTRY_DEFAULT_USER_DS*8+3)
-#define __USER_CS	(GDT_ENTRY_DEFAULT_USER_CS*8+3)
 #ifndef CONFIG_PARAVIRT
-#define get_kernel_rpl()  0
+# define get_kernel_rpl()		0
 #endif
 
-#define IDT_ENTRIES 256
-#define NUM_EXCEPTION_VECTORS 32
-/* Bitmask of exception vectors which push an error code on the stack */
-#define EXCEPTION_ERRCODE_MASK  0x00027d00
-#define GDT_SIZE (GDT_ENTRIES * 8)
-#define GDT_ENTRY_TLS_ENTRIES 3
-#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
+#define IDT_ENTRIES			256
+#define NUM_EXCEPTION_VECTORS		32
+
+/* Bitmask of exception vectors which push an error code on the stack: */
+#define EXCEPTION_ERRCODE_MASK		0x00027d00
+
+#define GDT_SIZE			(GDT_ENTRIES*8)
+#define GDT_ENTRY_TLS_ENTRIES		3
+#define TLS_SIZE			(GDT_ENTRY_TLS_ENTRIES* 8)
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
+
 extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5];
 #ifdef CONFIG_TRACING
-#define trace_early_idt_handlers early_idt_handlers
+# define trace_early_idt_handlers early_idt_handlers
 #endif
 
 /*
@@ -228,37 +260,30 @@ do {									\
 } while (0)
 
 /*
- * Save a segment register away
+ * Save a segment register away:
  */
 #define savesegment(seg, value)				\
 	asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
 
 /*
- * x86_32 user gs accessors.
+ * x86-32 user GS accessors:
  */
 #ifdef CONFIG_X86_32
-#ifdef CONFIG_X86_32_LAZY_GS
-#define get_user_gs(regs)	(u16)({unsigned long v; savesegment(gs, v); v;})
-#define set_user_gs(regs, v)	loadsegment(gs, (unsigned long)(v))
-#define task_user_gs(tsk)	((tsk)->thread.gs)
-#define lazy_save_gs(v)		savesegment(gs, (v))
-#define lazy_load_gs(v)		loadsegment(gs, (v))
-#else	/* X86_32_LAZY_GS */
-#define get_user_gs(regs)	(u16)((regs)->gs)
-#define set_user_gs(regs, v)	do { (regs)->gs = (v); } while (0)
-#define task_user_gs(tsk)	(task_pt_regs(tsk)->gs)
-#define lazy_save_gs(v)		do { } while (0)
-#define lazy_load_gs(v)		do { } while (0)
-#endif	/* X86_32_LAZY_GS */
+# ifdef CONFIG_X86_32_LAZY_GS
+#  define get_user_gs(regs)		(u16)({ unsigned long v; savesegment(gs, v); v; })
+#  define set_user_gs(regs, v)		loadsegment(gs, (unsigned long)(v))
+#  define task_user_gs(tsk)		((tsk)->thread.gs)
+#  define lazy_save_gs(v)		savesegment(gs, (v))
+#  define lazy_load_gs(v)		loadsegment(gs, (v))
+# else	/* X86_32_LAZY_GS */
+#  define get_user_gs(regs)		(u16)((regs)->gs)
+#  define set_user_gs(regs, v)		do { (regs)->gs = (v); } while (0)
+#  define task_user_gs(tsk)		(task_pt_regs(tsk)->gs)
+#  define lazy_save_gs(v)		do { } while (0)
+#  define lazy_load_gs(v)		do { } while (0)
+# endif	/* X86_32_LAZY_GS */
 #endif	/* X86_32 */
 
-static inline unsigned long get_limit(unsigned long segment)
-{
-	unsigned long __limit;
-	asm("lsll %1,%0" : "=r" (__limit) : "r" (segment));
-	return __limit + 1;
-}
-
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
 
diff --git a/arch/x86/include/asm/serial.h b/arch/x86/include/asm/serial.h
index 460b84f64556..8378b8c9109c 100644
--- a/arch/x86/include/asm/serial.h
+++ b/arch/x86/include/asm/serial.h
@@ -12,11 +12,11 @@
 
 /* Standard COM flags (except for COM4, because of the 8514 problem) */
 #ifdef CONFIG_SERIAL_DETECT_IRQ
-# define STD_COMX_FLAGS	(ASYNC_BOOT_AUTOCONF |	ASYNC_SKIP_TEST	| ASYNC_AUTO_IRQ)
-# define STD_COM4_FLAGS	(ASYNC_BOOT_AUTOCONF |	0		| ASYNC_AUTO_IRQ)
+# define STD_COMX_FLAGS	(UPF_BOOT_AUTOCONF |	UPF_SKIP_TEST	| UPF_AUTO_IRQ)
+# define STD_COM4_FLAGS	(UPF_BOOT_AUTOCONF |	0		| UPF_AUTO_IRQ)
 #else
-# define STD_COMX_FLAGS	(ASYNC_BOOT_AUTOCONF |	ASYNC_SKIP_TEST	| 0		)
-# define STD_COM4_FLAGS	(ASYNC_BOOT_AUTOCONF |	0		| 0		)
+# define STD_COMX_FLAGS	(UPF_BOOT_AUTOCONF |	UPF_SKIP_TEST	| 0		)
+# define STD_COM4_FLAGS	(UPF_BOOT_AUTOCONF |	0		| 0		)
 #endif
 
 #define SERIAL_PORT_DFNS								\
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ff4e7b236e21..f69e06b283fb 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -66,6 +66,11 @@ static inline void x86_ce4100_early_setup(void) { }
  */
 extern struct boot_params boot_params;
 
+static inline bool kaslr_enabled(void)
+{
+	return !!(boot_params.hdr.loadflags & KASLR_FLAG);
+}
+
 /*
  * Do NOT EVER look at the BIOS memory size location.
  * It does not work on many machines.
diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h
index 9dfce4e0417d..6fe6b182c998 100644
--- a/arch/x86/include/asm/sigcontext.h
+++ b/arch/x86/include/asm/sigcontext.h
@@ -57,9 +57,9 @@ struct sigcontext {
 	unsigned long ip;
 	unsigned long flags;
 	unsigned short cs;
-	unsigned short gs;
-	unsigned short fs;
-	unsigned short __pad0;
+	unsigned short __pad2;	/* Was called gs, but was always zero. */
+	unsigned short __pad1;	/* Was called fs, but was always zero. */
+	unsigned short ss;
 	unsigned long err;
 	unsigned long trapno;
 	unsigned long oldmask;
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index 7a958164088c..89db46752a8f 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -13,9 +13,7 @@
 			 X86_EFLAGS_CF | X86_EFLAGS_RF)
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
-
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
-		       unsigned long *pax);
+int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc);
 int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 		     struct pt_regs *regs, unsigned long mask);
 
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index 8d3120f4e270..ba665ebd17bb 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -27,23 +27,11 @@
 
 #ifdef CONFIG_X86_SMAP
 
-#define ASM_CLAC							\
-	661: ASM_NOP3 ;							\
-	.pushsection .altinstr_replacement, "ax" ;			\
-	662: __ASM_CLAC ;						\
-	.popsection ;							\
-	.pushsection .altinstructions, "a" ;				\
-	altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ;	\
-	.popsection
-
-#define ASM_STAC							\
-	661: ASM_NOP3 ;							\
-	.pushsection .altinstr_replacement, "ax" ;			\
-	662: __ASM_STAC ;						\
-	.popsection ;							\
-	.pushsection .altinstructions, "a" ;				\
-	altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ;	\
-	.popsection
+#define ASM_CLAC \
+	ALTERNATIVE "", __stringify(__ASM_CLAC), X86_FEATURE_SMAP
+
+#define ASM_STAC \
+	ALTERNATIVE "", __stringify(__ASM_STAC), X86_FEATURE_SMAP
 
 #else /* CONFIG_X86_SMAP */
 
@@ -61,20 +49,20 @@
 static __always_inline void clac(void)
 {
 	/* Note: a barrier is implicit in alternative() */
-	alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
+	alternative("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
 }
 
 static __always_inline void stac(void)
 {
 	/* Note: a barrier is implicit in alternative() */
-	alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP);
+	alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP);
 }
 
 /* These macros can be used in asm() statements */
 #define ASM_CLAC \
-	ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
+	ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
 #define ASM_STAC \
-	ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP)
+	ALTERNATIVE("", __stringify(__ASM_STAC), X86_FEATURE_SMAP)
 
 #else /* CONFIG_X86_SMAP */
 
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 8cd1cc3bc835..17a8dced12da 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -150,12 +150,13 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 }
 
 void cpu_disable_common(void);
-void cpu_die_common(unsigned int cpu);
 void native_smp_prepare_boot_cpu(void);
 void native_smp_prepare_cpus(unsigned int max_cpus);
 void native_smp_cpus_done(unsigned int max_cpus);
+void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_disable(void);
+int common_cpu_die(unsigned int cpu);
 void native_cpu_die(unsigned int cpu);
 void native_play_dead(void);
 void play_dead_common(void);
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 6a4b00fafb00..aeb4666e0c0a 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -4,6 +4,8 @@
 
 #ifdef __KERNEL__
 
+#include <asm/nops.h>
+
 static inline void native_clts(void)
 {
 	asm volatile("clts");
@@ -199,6 +201,28 @@ static inline void clflushopt(volatile void *__p)
 		       "+m" (*(volatile char __force *)__p));
 }
 
+static inline void clwb(volatile void *__p)
+{
+	volatile struct { char x[64]; } *p = __p;
+
+	asm volatile(ALTERNATIVE_2(
+		".byte " __stringify(NOP_DS_PREFIX) "; clflush (%[pax])",
+		".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */
+		X86_FEATURE_CLFLUSHOPT,
+		".byte 0x66, 0x0f, 0xae, 0x30",  /* clwb (%%rax) */
+		X86_FEATURE_CLWB)
+		: [p] "+m" (*p)
+		: [pax] "a" (p));
+}
+
+static inline void pcommit_sfence(void)
+{
+	alternative(ASM_NOP7,
+		    ".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */
+		    "sfence",
+		    X86_FEATURE_PCOMMIT);
+}
+
 #define nop() asm volatile ("nop")
 
 
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 1d4e4f279a32..b4bdec3e9523 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -13,19 +13,44 @@
 #include <asm/types.h>
 
 /*
+ * TOP_OF_KERNEL_STACK_PADDING is a number of unused bytes that we
+ * reserve at the top of the kernel stack.  We do it because of a nasty
+ * 32-bit corner case.  On x86_32, the hardware stack frame is
+ * variable-length.  Except for vm86 mode, struct pt_regs assumes a
+ * maximum-length frame.  If we enter from CPL 0, the top 8 bytes of
+ * pt_regs don't actually exist.  Ordinarily this doesn't matter, but it
+ * does in at least one case:
+ *
+ * If we take an NMI early enough in SYSENTER, then we can end up with
+ * pt_regs that extends above sp0.  On the way out, in the espfix code,
+ * we can read the saved SS value, but that value will be above sp0.
+ * Without this offset, that can result in a page fault.  (We are
+ * careful that, in this case, the value we read doesn't matter.)
+ *
+ * In vm86 mode, the hardware frame is much longer still, but we neither
+ * access the extra members from NMI context, nor do we write such a
+ * frame at sp0 at all.
+ *
+ * x86_64 has a fixed-length stack frame.
+ */
+#ifdef CONFIG_X86_32
+# define TOP_OF_KERNEL_STACK_PADDING 8
+#else
+# define TOP_OF_KERNEL_STACK_PADDING 0
+#endif
+
+/*
  * low level task data that entry.S needs immediate access to
  * - this struct should fit entirely inside of one cache line
  * - this struct shares the supervisor stack pages
  */
 #ifndef __ASSEMBLY__
 struct task_struct;
-struct exec_domain;
 #include <asm/processor.h>
 #include <linux/atomic.h>
 
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
 	__u32			flags;		/* low level flags */
 	__u32			status;		/* thread synchronous flags */
 	__u32			cpu;		/* current CPU */
@@ -39,7 +64,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
-	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
 	.saved_preempt_count = INIT_PREEMPT_COUNT,	\
@@ -145,7 +169,6 @@ struct thread_info {
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
 
 #define STACK_WARN		(THREAD_SIZE/8)
-#define KERNEL_STACK_OFFSET	(5*(BITS_PER_LONG/8))
 
 /*
  * macros/functions for gaining access to the thread information structure
@@ -158,10 +181,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
 
 static inline struct thread_info *current_thread_info(void)
 {
-	struct thread_info *ti;
-	ti = (void *)(this_cpu_read_stable(kernel_stack) +
-		      KERNEL_STACK_OFFSET - THREAD_SIZE);
-	return ti;
+	return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
 }
 
 static inline unsigned long current_stack_pointer(void)
@@ -177,16 +197,37 @@ static inline unsigned long current_stack_pointer(void)
 
 #else /* !__ASSEMBLY__ */
 
-/* how to get the thread information struct from ASM */
+/* Load thread_info address into "reg" */
 #define GET_THREAD_INFO(reg) \
 	_ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \
-	_ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ;
+	_ASM_SUB $(THREAD_SIZE),reg ;
 
 /*
- * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in
- * a certain register (to be used in assembler memory operands).
+ * ASM operand which evaluates to a 'thread_info' address of
+ * the current task, if it is known that "reg" is exactly "off"
+ * bytes below the top of the stack currently.
+ *
+ * ( The kernel stack's size is known at build time, it is usually
+ *   2 or 4 pages, and the bottom  of the kernel stack contains
+ *   the thread_info structure. So to access the thread_info very
+ *   quickly from assembly code we can calculate down from the
+ *   top of the kernel stack to the bottom, using constant,
+ *   build-time calculations only. )
+ *
+ * For example, to fetch the current thread_info->flags value into %eax
+ * on x86-64 defconfig kernels, in syscall entry code where RSP is
+ * currently at exactly SIZEOF_PTREGS bytes away from the top of the
+ * stack:
+ *
+ *      mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax
+ *
+ * will translate to:
+ *
+ *      8b 84 24 b8 c0 ff ff      mov    -0x3f48(%rsp), %eax
+ *
+ * which is below the current RSP by almost 16K.
  */
-#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg)
+#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg)
 
 #endif
 
@@ -236,6 +277,16 @@ static inline bool is_ia32_task(void)
 #endif
 	return false;
 }
+
+/*
+ * Force syscall return via IRET by making it look as if there was
+ * some work pending. IRET is our most capable (but slowest) syscall
+ * return path, which is able to restore modified SS, CS and certain
+ * EFLAGS values that other (fast) syscall return instructions
+ * are not able to restore properly.
+ */
+#define force_iret() set_thread_flag(TIF_NOTIFY_RESUME)
+
 #endif	/* !__ASSEMBLY__ */
 
 #ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 12a26b979bf1..f2f9b39b274a 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -231,6 +231,6 @@ __copy_from_user_inatomic_nocache(void *dst, const void __user *src,
 }
 
 unsigned long
-copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest);
+copy_user_handle_tail(char *to, char *from, unsigned len);
 
 #endif /* _ASM_X86_UACCESS_64_H */
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 44e6dd7e36a2..ab456dc233b5 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -7,7 +7,6 @@
 #define SETUP_DTB			2
 #define SETUP_PCI			3
 #define SETUP_EFI			4
-#define SETUP_KASLR			5
 
 /* ram_size flags */
 #define RAMDISK_IMAGE_START_MASK	0x07FF
@@ -16,6 +15,7 @@
 
 /* loadflags */
 #define LOADED_HIGH	(1<<0)
+#define KASLR_FLAG	(1<<1)
 #define QUIET_FLAG	(1<<5)
 #define KEEP_SEGMENTS	(1<<6)
 #define CAN_USE_HEAP	(1<<7)
diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h
index d993e33f5236..960a8a9dc4ab 100644
--- a/arch/x86/include/uapi/asm/e820.h
+++ b/arch/x86/include/uapi/asm/e820.h
@@ -33,6 +33,16 @@
 #define E820_NVS	4
 #define E820_UNUSABLE	5
 
+/*
+ * This is a non-standardized way to represent ADR or NVDIMM regions that
+ * persist over a reboot.  The kernel will ignore their special capabilities
+ * unless the CONFIG_X86_PMEM_LEGACY=y option is set.
+ *
+ * ( Note that older platforms also used 6 for the same type of memory,
+ *   but newer versions switched to 12 as 6 was assigned differently.  Some
+ *   time they will learn... )
+ */
+#define E820_PRAM	12
 
 /*
  * reserved RAM used by kernel itself
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 90c458e66e13..ce6068dbcfbc 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -225,6 +225,8 @@
 #define HV_STATUS_INVALID_HYPERCALL_CODE	2
 #define HV_STATUS_INVALID_HYPERCALL_INPUT	3
 #define HV_STATUS_INVALID_ALIGNMENT		4
+#define HV_STATUS_INSUFFICIENT_MEMORY		11
+#define HV_STATUS_INVALID_CONNECTION_ID		18
 #define HV_STATUS_INSUFFICIENT_BUFFERS		19
 
 typedef struct _HV_REFERENCE_TSC_PAGE {
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 3ce079136c11..c469490db4a8 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -61,6 +61,9 @@
 #define MSR_OFFCORE_RSP_1		0x000001a7
 #define MSR_NHM_TURBO_RATIO_LIMIT	0x000001ad
 #define MSR_IVT_TURBO_RATIO_LIMIT	0x000001ae
+#define MSR_TURBO_RATIO_LIMIT		0x000001ad
+#define MSR_TURBO_RATIO_LIMIT1		0x000001ae
+#define MSR_TURBO_RATIO_LIMIT2		0x000001af
 
 #define MSR_LBR_SELECT			0x000001c8
 #define MSR_LBR_TOS			0x000001c9
@@ -74,6 +77,24 @@
 #define MSR_IA32_PERF_CAPABILITIES	0x00000345
 #define MSR_PEBS_LD_LAT_THRESHOLD	0x000003f6
 
+#define MSR_IA32_RTIT_CTL		0x00000570
+#define RTIT_CTL_TRACEEN		BIT(0)
+#define RTIT_CTL_OS			BIT(2)
+#define RTIT_CTL_USR			BIT(3)
+#define RTIT_CTL_CR3EN			BIT(7)
+#define RTIT_CTL_TOPA			BIT(8)
+#define RTIT_CTL_TSC_EN			BIT(10)
+#define RTIT_CTL_DISRETC		BIT(11)
+#define RTIT_CTL_BRANCH_EN		BIT(13)
+#define MSR_IA32_RTIT_STATUS		0x00000571
+#define RTIT_STATUS_CONTEXTEN		BIT(1)
+#define RTIT_STATUS_TRIGGEREN		BIT(2)
+#define RTIT_STATUS_ERROR		BIT(4)
+#define RTIT_STATUS_STOPPED		BIT(5)
+#define MSR_IA32_RTIT_CR3_MATCH		0x00000572
+#define MSR_IA32_RTIT_OUTPUT_BASE	0x00000560
+#define MSR_IA32_RTIT_OUTPUT_MASK	0x00000561
+
 #define MSR_MTRRfix64K_00000		0x00000250
 #define MSR_MTRRfix16K_80000		0x00000258
 #define MSR_MTRRfix16K_A0000		0x00000259
@@ -147,6 +168,11 @@
 #define MSR_PP1_ENERGY_STATUS		0x00000641
 #define MSR_PP1_POLICY			0x00000642
 
+#define MSR_PKG_WEIGHTED_CORE_C0_RES	0x00000658
+#define MSR_PKG_ANY_CORE_C0_RES		0x00000659
+#define MSR_PKG_ANY_GFXE_C0_RES		0x0000065A
+#define MSR_PKG_BOTH_CORE_GFXE_C0_RES	0x0000065B
+
 #define MSR_CORE_C1_RES			0x00000660
 
 #define MSR_CC6_DEMOTION_POLICY_CONFIG	0x00000668
diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h
index 7b0a55a88851..580aee3072e0 100644
--- a/arch/x86/include/uapi/asm/ptrace-abi.h
+++ b/arch/x86/include/uapi/asm/ptrace-abi.h
@@ -25,13 +25,17 @@
 #else /* __i386__ */
 
 #if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
 #define R15 0
 #define R14 8
 #define R13 16
 #define R12 24
 #define RBP 32
 #define RBX 40
-/* arguments: interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
 #define R11 48
 #define R10 56
 #define R9 64
@@ -41,15 +45,17 @@
 #define RDX 96
 #define RSI 104
 #define RDI 112
-#define ORIG_RAX 120       /* = ERROR */
-/* end of arguments */
-/* cpu exception frame or undefined in case of fast syscall. */
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
+#define ORIG_RAX 120
+/* Return frame for iretq */
 #define RIP 128
 #define CS 136
 #define EFLAGS 144
 #define RSP 152
 #define SS 160
-#define ARGOFFSET R11
 #endif /* __ASSEMBLY__ */
 
 /* top of stack page */
diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h
index ac4b9aa4d999..bc16115af39b 100644
--- a/arch/x86/include/uapi/asm/ptrace.h
+++ b/arch/x86/include/uapi/asm/ptrace.h
@@ -41,13 +41,17 @@ struct pt_regs {
 #ifndef __KERNEL__
 
 struct pt_regs {
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
 	unsigned long r15;
 	unsigned long r14;
 	unsigned long r13;
 	unsigned long r12;
 	unsigned long rbp;
 	unsigned long rbx;
-/* arguments: non interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
 	unsigned long r11;
 	unsigned long r10;
 	unsigned long r9;
@@ -57,9 +61,12 @@ struct pt_regs {
 	unsigned long rdx;
 	unsigned long rsi;
 	unsigned long rdi;
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
 	unsigned long orig_rax;
-/* end of arguments */
-/* cpu exception frame or undefined */
+/* Return frame for iretq */
 	unsigned long rip;
 	unsigned long cs;
 	unsigned long eflags;
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
index d8b9f9081e86..16dc4e8a2cd3 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -177,9 +177,24 @@ struct sigcontext {
 	__u64 rip;
 	__u64 eflags;		/* RFLAGS */
 	__u16 cs;
-	__u16 gs;
-	__u16 fs;
-	__u16 __pad0;
+
+	/*
+	 * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"),
+	 * Linux saved and restored fs and gs in these slots.  This
+	 * was counterproductive, as fsbase and gsbase were never
+	 * saved, so arch_prctl was presumably unreliable.
+	 *
+	 * If these slots are ever needed for any other purpose, there
+	 * is some risk that very old 64-bit binaries could get
+	 * confused.  I doubt that many such binaries still work,
+	 * though, since the same patch in 2.5.64 also removed the
+	 * 64-bit set_thread_area syscall, so it appears that there is
+	 * no TLS API that works in both pre- and post-2.5.64 kernels.
+	 */
+	__u16 __pad2;		/* Was gs. */
+	__u16 __pad1;		/* Was fs. */
+
+	__u16 ss;
 	__u64 err;
 	__u64 trapno;
 	__u64 oldmask;
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index c5f1a1deb91a..1fe92181ee9e 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -67,6 +67,7 @@
 #define EXIT_REASON_EPT_VIOLATION       48
 #define EXIT_REASON_EPT_MISCONFIG       49
 #define EXIT_REASON_INVEPT              50
+#define EXIT_REASON_RDTSCP              51
 #define EXIT_REASON_PREEMPTION_TIMER    52
 #define EXIT_REASON_INVVPID             53
 #define EXIT_REASON_WBINVD              54
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index cdb1b70ddad0..9bcd0b56ca17 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_X86_32)	+= i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-$(CONFIG_X86_64)	+= mcount_64.o
 obj-y			+= syscall_$(BITS).o vsyscall_gtod.o
+obj-$(CONFIG_IA32_EMULATION)	+= syscall_32.o
 obj-$(CONFIG_X86_VSYSCALL_EMULATION)	+= vsyscall_64.o vsyscall_emu_64.o
 obj-$(CONFIG_X86_ESPFIX64)	+= espfix_64.o
 obj-$(CONFIG_SYSFS)	+= ksysfs.o
@@ -94,6 +95,7 @@ obj-$(CONFIG_KVM_GUEST)		+= kvm.o kvmclock.o
 obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
 obj-$(CONFIG_PARAVIRT_CLOCK)	+= pvclock.o
+obj-$(CONFIG_X86_PMEM_LEGACY)	+= pmem.o
 
 obj-$(CONFIG_PCSPKR_PLATFORM)	+= pcspeaker.o
 
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 3d525c6124f6..803b684676ff 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1338,6 +1338,26 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
 }
 
 /*
+ * ACPI offers an alternative platform interface model that removes
+ * ACPI hardware requirements for platforms that do not implement
+ * the PC Architecture.
+ *
+ * We initialize the Hardware-reduced ACPI model here:
+ */
+static void __init acpi_reduced_hw_init(void)
+{
+	if (acpi_gbl_reduced_hardware) {
+		/*
+		 * Override x86_init functions and bypass legacy pic
+		 * in Hardware-reduced ACPI mode
+		 */
+		x86_init.timers.timer_init	= x86_init_noop;
+		x86_init.irqs.pre_vector_init	= x86_init_noop;
+		legacy_pic			= &null_legacy_pic;
+	}
+}
+
+/*
  * If your system is blacklisted here, but you find that acpi=force
  * works for you, please contact linux-acpi@vger.kernel.org
  */
@@ -1536,6 +1556,11 @@ int __init early_acpi_boot_init(void)
 	 */
 	early_acpi_process_madt();
 
+	/*
+	 * Hardware-reduced ACPI mode initialization:
+	 */
+	acpi_reduced_hw_init();
+
 	return 0;
 }
 
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 703130f469ec..aef653193160 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -52,10 +52,25 @@ static int __init setup_noreplace_paravirt(char *str)
 __setup("noreplace-paravirt", setup_noreplace_paravirt);
 #endif
 
-#define DPRINTK(fmt, ...)				\
-do {							\
-	if (debug_alternative)				\
-		printk(KERN_DEBUG fmt, ##__VA_ARGS__);	\
+#define DPRINTK(fmt, args...)						\
+do {									\
+	if (debug_alternative)						\
+		printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args);	\
+} while (0)
+
+#define DUMP_BYTES(buf, len, fmt, args...)				\
+do {									\
+	if (unlikely(debug_alternative)) {				\
+		int j;							\
+									\
+		if (!(len))						\
+			break;						\
+									\
+		printk(KERN_DEBUG fmt, ##args);				\
+		for (j = 0; j < (len) - 1; j++)				\
+			printk(KERN_CONT "%02hhx ", buf[j]);		\
+		printk(KERN_CONT "%02hhx\n", buf[j]);			\
+	}								\
 } while (0)
 
 /*
@@ -243,12 +258,89 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 extern s32 __smp_locks[], __smp_locks_end[];
 void *text_poke_early(void *addr, const void *opcode, size_t len);
 
-/* Replace instructions with better alternatives for this CPU type.
-   This runs before SMP is initialized to avoid SMP problems with
-   self modifying code. This implies that asymmetric systems where
-   APs have less capabilities than the boot processor are not handled.
-   Tough. Make sure you disable such features by hand. */
+/*
+ * Are we looking at a near JMP with a 1 or 4-byte displacement.
+ */
+static inline bool is_jmp(const u8 opcode)
+{
+	return opcode == 0xeb || opcode == 0xe9;
+}
+
+static void __init_or_module
+recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
+{
+	u8 *next_rip, *tgt_rip;
+	s32 n_dspl, o_dspl;
+	int repl_len;
+
+	if (a->replacementlen != 5)
+		return;
+
+	o_dspl = *(s32 *)(insnbuf + 1);
+
+	/* next_rip of the replacement JMP */
+	next_rip = repl_insn + a->replacementlen;
+	/* target rip of the replacement JMP */
+	tgt_rip  = next_rip + o_dspl;
+	n_dspl = tgt_rip - orig_insn;
+
+	DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl);
+
+	if (tgt_rip - orig_insn >= 0) {
+		if (n_dspl - 2 <= 127)
+			goto two_byte_jmp;
+		else
+			goto five_byte_jmp;
+	/* negative offset */
+	} else {
+		if (((n_dspl - 2) & 0xff) == (n_dspl - 2))
+			goto two_byte_jmp;
+		else
+			goto five_byte_jmp;
+	}
+
+two_byte_jmp:
+	n_dspl -= 2;
+
+	insnbuf[0] = 0xeb;
+	insnbuf[1] = (s8)n_dspl;
+	add_nops(insnbuf + 2, 3);
+
+	repl_len = 2;
+	goto done;
+
+five_byte_jmp:
+	n_dspl -= 5;
+
+	insnbuf[0] = 0xe9;
+	*(s32 *)&insnbuf[1] = n_dspl;
 
+	repl_len = 5;
+
+done:
+
+	DPRINTK("final displ: 0x%08x, JMP 0x%lx",
+		n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
+}
+
+static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
+{
+	if (instr[0] != 0x90)
+		return;
+
+	add_nops(instr + (a->instrlen - a->padlen), a->padlen);
+
+	DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ",
+		   instr, a->instrlen - a->padlen, a->padlen);
+}
+
+/*
+ * Replace instructions with better alternatives for this CPU type. This runs
+ * before SMP is initialized to avoid SMP problems with self modifying code.
+ * This implies that asymmetric systems where APs have less capabilities than
+ * the boot processor are not handled. Tough. Make sure you disable such
+ * features by hand.
+ */
 void __init_or_module apply_alternatives(struct alt_instr *start,
 					 struct alt_instr *end)
 {
@@ -256,10 +348,10 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
 	u8 *instr, *replacement;
 	u8 insnbuf[MAX_PATCH_LEN];
 
-	DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
+	DPRINTK("alt table %p -> %p", start, end);
 	/*
 	 * The scan order should be from start to end. A later scanned
-	 * alternative code can overwrite a previous scanned alternative code.
+	 * alternative code can overwrite previously scanned alternative code.
 	 * Some kernel functions (e.g. memcpy, memset, etc) use this order to
 	 * patch code.
 	 *
@@ -267,29 +359,54 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
 	 * order.
 	 */
 	for (a = start; a < end; a++) {
+		int insnbuf_sz = 0;
+
 		instr = (u8 *)&a->instr_offset + a->instr_offset;
 		replacement = (u8 *)&a->repl_offset + a->repl_offset;
-		BUG_ON(a->replacementlen > a->instrlen);
 		BUG_ON(a->instrlen > sizeof(insnbuf));
 		BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
-		if (!boot_cpu_has(a->cpuid))
+		if (!boot_cpu_has(a->cpuid)) {
+			if (a->padlen > 1)
+				optimize_nops(a, instr);
+
 			continue;
+		}
+
+		DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d",
+			a->cpuid >> 5,
+			a->cpuid & 0x1f,
+			instr, a->instrlen,
+			replacement, a->replacementlen, a->padlen);
+
+		DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr);
+		DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement);
 
 		memcpy(insnbuf, replacement, a->replacementlen);
+		insnbuf_sz = a->replacementlen;
 
 		/* 0xe8 is a relative jump; fix the offset. */
-		if (*insnbuf == 0xe8 && a->replacementlen == 5)
-		    *(s32 *)(insnbuf + 1) += replacement - instr;
+		if (*insnbuf == 0xe8 && a->replacementlen == 5) {
+			*(s32 *)(insnbuf + 1) += replacement - instr;
+			DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
+				*(s32 *)(insnbuf + 1),
+				(unsigned long)instr + *(s32 *)(insnbuf + 1) + 5);
+		}
+
+		if (a->replacementlen && is_jmp(replacement[0]))
+			recompute_jump(a, instr, replacement, insnbuf);
 
-		add_nops(insnbuf + a->replacementlen,
-			 a->instrlen - a->replacementlen);
+		if (a->instrlen > a->replacementlen) {
+			add_nops(insnbuf + a->replacementlen,
+				 a->instrlen - a->replacementlen);
+			insnbuf_sz += a->instrlen - a->replacementlen;
+		}
+		DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr);
 
-		text_poke_early(instr, insnbuf, a->instrlen);
+		text_poke_early(instr, insnbuf, insnbuf_sz);
 	}
 }
 
 #ifdef CONFIG_SMP
-
 static void alternatives_smp_lock(const s32 *start, const s32 *end,
 				  u8 *text, u8 *text_end)
 {
@@ -371,8 +488,8 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
 	smp->locks_end	= locks_end;
 	smp->text	= text;
 	smp->text_end	= text_end;
-	DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
-		__func__, smp->locks, smp->locks_end,
+	DPRINTK("locks %p -> %p, text %p -> %p, name %s\n",
+		smp->locks, smp->locks_end,
 		smp->text, smp->text_end, smp->name);
 
 	list_add_tail(&smp->next, &smp_alt_modules);
@@ -440,7 +557,7 @@ int alternatives_text_reserved(void *start, void *end)
 
 	return 0;
 }
-#endif
+#endif /* CONFIG_SMP */
 
 #ifdef CONFIG_PARAVIRT
 void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
@@ -601,7 +718,7 @@ int poke_int3_handler(struct pt_regs *regs)
 	if (likely(!bp_patching_in_progress))
 		return 0;
 
-	if (user_mode_vm(regs) || regs->ip != (unsigned long)bp_int3_addr)
+	if (user_mode(regs) || regs->ip != (unsigned long)bp_int3_addr)
 		return 0;
 
 	/* set up the specified breakpoint handler */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ad3639ae1b9b..dcb52850a28f 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1084,67 +1084,6 @@ void lapic_shutdown(void)
 	local_irq_restore(flags);
 }
 
-/*
- * This is to verify that we're looking at a real local APIC.
- * Check these against your board if the CPUs aren't getting
- * started for no apparent reason.
- */
-int __init verify_local_APIC(void)
-{
-	unsigned int reg0, reg1;
-
-	/*
-	 * The version register is read-only in a real APIC.
-	 */
-	reg0 = apic_read(APIC_LVR);
-	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
-	apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
-	reg1 = apic_read(APIC_LVR);
-	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
-
-	/*
-	 * The two version reads above should print the same
-	 * numbers.  If the second one is different, then we
-	 * poke at a non-APIC.
-	 */
-	if (reg1 != reg0)
-		return 0;
-
-	/*
-	 * Check if the version looks reasonably.
-	 */
-	reg1 = GET_APIC_VERSION(reg0);
-	if (reg1 == 0x00 || reg1 == 0xff)
-		return 0;
-	reg1 = lapic_get_maxlvt();
-	if (reg1 < 0x02 || reg1 == 0xff)
-		return 0;
-
-	/*
-	 * The ID register is read/write in a real APIC.
-	 */
-	reg0 = apic_read(APIC_ID);
-	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
-	apic_write(APIC_ID, reg0 ^ apic->apic_id_mask);
-	reg1 = apic_read(APIC_ID);
-	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
-	apic_write(APIC_ID, reg0);
-	if (reg1 != (reg0 ^ apic->apic_id_mask))
-		return 0;
-
-	/*
-	 * The next two are just to see if we have sane values.
-	 * They're only really relevant if we're in Virtual Wire
-	 * compatibility mode, but most boxes are anymore.
-	 */
-	reg0 = apic_read(APIC_LVT0);
-	apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
-	reg1 = apic_read(APIC_LVT1);
-	apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
-
-	return 1;
-}
-
 /**
  * sync_Arb_IDs - synchronize APIC bus arbitration IDs
  */
@@ -2283,7 +2222,6 @@ int __init APIC_init_uniprocessor(void)
 		disable_ioapic_support();
 
 	default_setup_apic_routing();
-	verify_local_APIC();
 	apic_bsp_setup(true);
 	return 0;
 }
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index c2fd21fed002..017149cded07 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -37,10 +37,12 @@ static const struct apic apic_numachip;
 static unsigned int get_apic_id(unsigned long x)
 {
 	unsigned long value;
-	unsigned int id;
+	unsigned int id = (x >> 24) & 0xff;
 
-	rdmsrl(MSR_FAM10H_NODE_ID, value);
-	id = ((x >> 24) & 0xffU) | ((value << 2) & 0xff00U);
+	if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+		rdmsrl(MSR_FAM10H_NODE_ID, value);
+		id |= (value << 2) & 0xff00;
+	}
 
 	return id;
 }
@@ -155,10 +157,18 @@ static int __init numachip_probe(void)
 
 static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
 {
-	if (c->phys_proc_id != node) {
-		c->phys_proc_id = node;
-		per_cpu(cpu_llc_id, smp_processor_id()) = node;
+	u64 val;
+	u32 nodes = 1;
+
+	this_cpu_write(cpu_llc_id, node);
+
+	/* Account for nodes per socket in multi-core-module processors */
+	if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+		rdmsrl(MSR_FAM10H_NODE_ID, val);
+		nodes = ((val >> 3) & 7) + 1;
 	}
+
+	c->phys_proc_id = node / nodes;
 }
 
 static int __init numachip_system_init(void)
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index e658f21681c8..ab3219b3fbda 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -135,12 +135,12 @@ static void init_x2apic_ldr(void)
 
 	per_cpu(x86_cpu_to_logical_apicid, this_cpu) = apic_read(APIC_LDR);
 
-	__cpu_set(this_cpu, per_cpu(cpus_in_cluster, this_cpu));
+	cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, this_cpu));
 	for_each_online_cpu(cpu) {
 		if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
 			continue;
-		__cpu_set(this_cpu, per_cpu(cpus_in_cluster, cpu));
-		__cpu_set(cpu, per_cpu(cpus_in_cluster, this_cpu));
+		cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
+		cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
 	}
 }
 
@@ -171,8 +171,8 @@ update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		for_each_online_cpu(cpu) {
 			if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
 				continue;
-			__cpu_clear(this_cpu, per_cpu(cpus_in_cluster, cpu));
-			__cpu_clear(cpu, per_cpu(cpus_in_cluster, this_cpu));
+			cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
+			cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
 		}
 		free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
 		free_cpumask_var(per_cpu(ipi_mask, this_cpu));
@@ -195,7 +195,7 @@ static int x2apic_init_cpu_notifier(void)
 
 	BUG_ON(!per_cpu(cpus_in_cluster, cpu) || !per_cpu(ipi_mask, cpu));
 
-	__cpu_set(cpu, per_cpu(cpus_in_cluster, cpu));
+	cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
 	register_hotcpu_notifier(&x2apic_cpu_notifier);
 	return 1;
 }
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 8e9dcfd630e4..c8d92950bc04 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -144,33 +144,60 @@ static void __init uv_set_apicid_hibit(void)
 
 static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
-	int pnodeid, is_uv1, is_uv2, is_uv3;
-
-	is_uv1 = !strcmp(oem_id, "SGI");
-	is_uv2 = !strcmp(oem_id, "SGI2");
-	is_uv3 = !strncmp(oem_id, "SGI3", 4);	/* there are varieties of UV3 */
-	if (is_uv1 || is_uv2 || is_uv3) {
-		uv_hub_info->hub_revision =
-			(is_uv1 ? UV1_HUB_REVISION_BASE :
-			(is_uv2 ? UV2_HUB_REVISION_BASE :
-				  UV3_HUB_REVISION_BASE));
-		pnodeid = early_get_pnodeid();
-		early_get_apic_pnode_shift();
-		x86_platform.is_untracked_pat_range =  uv_is_untracked_pat_range;
-		x86_platform.nmi_init = uv_nmi_init;
-		if (!strcmp(oem_table_id, "UVL"))
-			uv_system_type = UV_LEGACY_APIC;
-		else if (!strcmp(oem_table_id, "UVX"))
-			uv_system_type = UV_X2APIC;
-		else if (!strcmp(oem_table_id, "UVH")) {
-			__this_cpu_write(x2apic_extra_bits,
-				pnodeid << uvh_apicid.s.pnode_shift);
-			uv_system_type = UV_NON_UNIQUE_APIC;
-			uv_set_apicid_hibit();
-			return 1;
-		}
+	int pnodeid;
+	int uv_apic;
+
+	if (strncmp(oem_id, "SGI", 3) != 0)
+		return 0;
+
+	/*
+	 * Determine UV arch type.
+	 *   SGI: UV100/1000
+	 *   SGI2: UV2000/3000
+	 *   SGI3: UV300 (truncated to 4 chars because of different varieties)
+	 */
+	uv_hub_info->hub_revision =
+		!strncmp(oem_id, "SGI3", 4) ? UV3_HUB_REVISION_BASE :
+		!strcmp(oem_id, "SGI2") ? UV2_HUB_REVISION_BASE :
+		!strcmp(oem_id, "SGI") ? UV1_HUB_REVISION_BASE : 0;
+
+	if (uv_hub_info->hub_revision == 0)
+		goto badbios;
+
+	pnodeid = early_get_pnodeid();
+	early_get_apic_pnode_shift();
+	x86_platform.is_untracked_pat_range =  uv_is_untracked_pat_range;
+	x86_platform.nmi_init = uv_nmi_init;
+
+	if (!strcmp(oem_table_id, "UVX")) {		/* most common */
+		uv_system_type = UV_X2APIC;
+		uv_apic = 0;
+
+	} else if (!strcmp(oem_table_id, "UVH")) {	/* only UV1 systems */
+		uv_system_type = UV_NON_UNIQUE_APIC;
+		__this_cpu_write(x2apic_extra_bits,
+			pnodeid << uvh_apicid.s.pnode_shift);
+		uv_set_apicid_hibit();
+		uv_apic = 1;
+
+	} else	if (!strcmp(oem_table_id, "UVL")) {	/* only used for */
+		uv_system_type = UV_LEGACY_APIC;	/* very small systems */
+		uv_apic = 0;
+
+	} else {
+		goto badbios;
 	}
-	return 0;
+
+	pr_info("UV: OEM IDs %s/%s, System/HUB Types %d/%d, uv_apic %d\n",
+		oem_id, oem_table_id, uv_system_type,
+		uv_min_hub_revision_id, uv_apic);
+
+	return uv_apic;
+
+badbios:
+	pr_err("UV: OEM_ID:%s OEM_TABLE_ID:%s\n", oem_id, oem_table_id);
+	pr_err("Current BIOS not supported, update kernel and/or BIOS\n");
+	BUG();
 }
 
 enum uv_system_type get_uv_system_type(void)
@@ -854,10 +881,14 @@ void __init uv_system_init(void)
 	unsigned long mmr_base, present, paddr;
 	unsigned short pnode_mask;
 	unsigned char n_lshift;
-	char *hub = (is_uv1_hub() ? "UV1" :
-		    (is_uv2_hub() ? "UV2" :
-				    "UV3"));
+	char *hub = (is_uv1_hub() ? "UV100/1000" :
+		    (is_uv2_hub() ? "UV2000/3000" :
+		    (is_uv3_hub() ? "UV300" : NULL)));
 
+	if (!hub) {
+		pr_err("UV: Unknown/unsupported UV hub\n");
+		return;
+	}
 	pr_info("UV: Found %s hub\n", hub);
 	map_low_mmrs();
 
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 3b3b9d33ac1d..47703aed74cf 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -68,7 +68,7 @@ void foo(void)
 
 	/* Offset from the sysenter stack to tss.sp0 */
 	DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
-		 sizeof(struct tss_struct));
+	       offsetofend(struct tss_struct, SYSENTER_stack));
 
 #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
 	BLANK();
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index fdcbb4d27c9f..5ce6f2da8763 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -81,6 +81,7 @@ int main(void)
 #undef ENTRY
 
 	OFFSET(TSS_ist, tss_struct, x86_tss.ist);
+	OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
 	BLANK();
 
 	DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 80091ae54c2b..9bff68798836 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -39,7 +39,8 @@ obj-$(CONFIG_CPU_SUP_AMD)		+= perf_event_amd_iommu.o
 endif
 obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_p6.o perf_event_knc.o perf_event_p4.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
-obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_rapl.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_rapl.o perf_event_intel_cqm.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_pt.o perf_event_intel_bts.o
 
 obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)	+= perf_event_intel_uncore.o \
 					   perf_event_intel_uncore_snb.o \
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a220239cea65..fd470ebf924e 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -5,6 +5,7 @@
 
 #include <linux/io.h>
 #include <linux/sched.h>
+#include <linux/random.h>
 #include <asm/processor.h>
 #include <asm/apic.h>
 #include <asm/cpu.h>
@@ -488,6 +489,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
 
 		va_align.mask	  = (upperbit - 1) & PAGE_MASK;
 		va_align.flags    = ALIGN_VA_32 | ALIGN_VA_64;
+
+		/* A random value per boot for bit slice [12:upper_bit) */
+		va_align.bits = get_random_int() & va_align.mask;
 	}
 }
 
@@ -711,6 +715,11 @@ static void init_amd(struct cpuinfo_x86 *c)
 		set_cpu_bug(c, X86_BUG_AMD_APIC_C1E);
 
 	rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
+
+	/* 3DNow or LM implies PREFETCHW */
+	if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH))
+		if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
+			set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2346c95c6ab1..a62cf04dac8a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -646,6 +646,30 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
 		c->x86_capability[10] = eax;
 	}
 
+	/* Additional Intel-defined flags: level 0x0000000F */
+	if (c->cpuid_level >= 0x0000000F) {
+		u32 eax, ebx, ecx, edx;
+
+		/* QoS sub-leaf, EAX=0Fh, ECX=0 */
+		cpuid_count(0x0000000F, 0, &eax, &ebx, &ecx, &edx);
+		c->x86_capability[11] = edx;
+		if (cpu_has(c, X86_FEATURE_CQM_LLC)) {
+			/* will be overridden if occupancy monitoring exists */
+			c->x86_cache_max_rmid = ebx;
+
+			/* QoS sub-leaf, EAX=0Fh, ECX=1 */
+			cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx);
+			c->x86_capability[12] = edx;
+			if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) {
+				c->x86_cache_max_rmid = ecx;
+				c->x86_cache_occ_scale = ebx;
+			}
+		} else {
+			c->x86_cache_max_rmid = -1;
+			c->x86_cache_occ_scale = -1;
+		}
+	}
+
 	/* AMD-defined flags: level 0x80000001 */
 	xlvl = cpuid_eax(0x80000000);
 	c->extended_cpuid_level = xlvl;
@@ -834,6 +858,20 @@ static void generic_identify(struct cpuinfo_x86 *c)
 	detect_nopl(c);
 }
 
+static void x86_init_cache_qos(struct cpuinfo_x86 *c)
+{
+	/*
+	 * The heavy lifting of max_rmid and cache_occ_scale are handled
+	 * in get_cpu_cap().  Here we just set the max_rmid for the boot_cpu
+	 * in case CQM bits really aren't there in this CPU.
+	 */
+	if (c != &boot_cpu_data) {
+		boot_cpu_data.x86_cache_max_rmid =
+			min(boot_cpu_data.x86_cache_max_rmid,
+			    c->x86_cache_max_rmid);
+	}
+}
+
 /*
  * This does the hard work of actually picking apart the CPU stuff...
  */
@@ -923,6 +961,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 
 	init_hypervisor(c);
 	x86_init_rdrand(c);
+	x86_init_cache_qos(c);
 
 	/*
 	 * Clear/Set all flags overriden by options, need do it
@@ -959,38 +998,37 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 #endif
 }
 
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_IA32_EMULATION
-/* May not be __init: called during resume */
-static void syscall32_cpu_init(void)
-{
-	/* Load these always in case some future AMD CPU supports
-	   SYSENTER from compat mode too. */
-	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
-	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
-	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
-
-	wrmsrl(MSR_CSTAR, ia32_cstar_target);
-}
-#endif		/* CONFIG_IA32_EMULATION */
-#endif		/* CONFIG_X86_64 */
-
+/*
+ * Set up the CPU state needed to execute SYSENTER/SYSEXIT instructions
+ * on 32-bit kernels:
+ */
 #ifdef CONFIG_X86_32
 void enable_sep_cpu(void)
 {
-	int cpu = get_cpu();
-	struct tss_struct *tss = &per_cpu(init_tss, cpu);
+	struct tss_struct *tss;
+	int cpu;
 
-	if (!boot_cpu_has(X86_FEATURE_SEP)) {
-		put_cpu();
-		return;
-	}
+	cpu = get_cpu();
+	tss = &per_cpu(cpu_tss, cpu);
+
+	if (!boot_cpu_has(X86_FEATURE_SEP))
+		goto out;
+
+	/*
+	 * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
+	 * see the big comment in struct x86_hw_tss's definition.
+	 */
 
 	tss->x86_tss.ss1 = __KERNEL_CS;
-	tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss;
-	wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
-	wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0);
-	wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0);
+	wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
+
+	wrmsr(MSR_IA32_SYSENTER_ESP,
+	      (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
+	      0);
+
+	wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)ia32_sysenter_target, 0);
+
+out:
 	put_cpu();
 }
 #endif
@@ -1118,7 +1156,7 @@ static __init int setup_disablecpuid(char *arg)
 __setup("clearcpuid=", setup_disablecpuid);
 
 DEFINE_PER_CPU(unsigned long, kernel_stack) =
-	(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
+	(unsigned long)&init_thread_union + THREAD_SIZE;
 EXPORT_PER_CPU_SYMBOL(kernel_stack);
 
 #ifdef CONFIG_X86_64
@@ -1130,8 +1168,8 @@ DEFINE_PER_CPU_FIRST(union irq_stack_union,
 		     irq_stack_union) __aligned(PAGE_SIZE) __visible;
 
 /*
- * The following four percpu variables are hot.  Align current_task to
- * cacheline size such that all four fall in the same cacheline.
+ * The following percpu variables are hot.  Align current_task to
+ * cacheline size such that they fall in the same cacheline.
  */
 DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
 	&init_task;
@@ -1171,10 +1209,23 @@ void syscall_init(void)
 	 */
 	wrmsrl(MSR_STAR,  ((u64)__USER32_CS)<<48  | ((u64)__KERNEL_CS)<<32);
 	wrmsrl(MSR_LSTAR, system_call);
-	wrmsrl(MSR_CSTAR, ignore_sysret);
 
 #ifdef CONFIG_IA32_EMULATION
-	syscall32_cpu_init();
+	wrmsrl(MSR_CSTAR, ia32_cstar_target);
+	/*
+	 * This only works on Intel CPUs.
+	 * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
+	 * This does not cause SYSENTER to jump to the wrong location, because
+	 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
+	 */
+	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
+#else
+	wrmsrl(MSR_CSTAR, ignore_sysret);
+	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
+	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
 #endif
 
 	/* Flags to clear on syscall */
@@ -1226,6 +1277,15 @@ DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
 EXPORT_PER_CPU_SYMBOL(__preempt_count);
 DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
 
+/*
+ * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
+ * the top of the kernel stack.  Use an extra percpu variable to track the
+ * top of the kernel stack directly.
+ */
+DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
+	(unsigned long)&init_thread_union + THREAD_SIZE;
+EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
+
 #ifdef CONFIG_CC_STACKPROTECTOR
 DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 #endif
@@ -1307,7 +1367,7 @@ void cpu_init(void)
 	 */
 	load_ucode_ap();
 
-	t = &per_cpu(init_tss, cpu);
+	t = &per_cpu(cpu_tss, cpu);
 	oist = &per_cpu(orig_ist, cpu);
 
 #ifdef CONFIG_NUMA
@@ -1391,7 +1451,7 @@ void cpu_init(void)
 {
 	int cpu = smp_processor_id();
 	struct task_struct *curr = current;
-	struct tss_struct *t = &per_cpu(init_tss, cpu);
+	struct tss_struct *t = &per_cpu(cpu_tss, cpu);
 	struct thread_struct *thread = &curr->thread;
 
 	wait_for_master_cpu(cpu);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 659643376dbf..edcb0e28c336 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -7,16 +7,14 @@
  *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
  */
 
-#include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/device.h>
-#include <linux/compiler.h>
+#include <linux/cacheinfo.h>
 #include <linux/cpu.h>
 #include <linux/sched.h>
+#include <linux/sysfs.h>
 #include <linux/pci.h>
 
 #include <asm/processor.h>
-#include <linux/smp.h>
 #include <asm/amd_nb.h>
 #include <asm/smp.h>
 
@@ -116,10 +114,10 @@ static const struct _cache_table cache_table[] =
 
 
 enum _cache_type {
-	CACHE_TYPE_NULL	= 0,
-	CACHE_TYPE_DATA = 1,
-	CACHE_TYPE_INST = 2,
-	CACHE_TYPE_UNIFIED = 3
+	CTYPE_NULL = 0,
+	CTYPE_DATA = 1,
+	CTYPE_INST = 2,
+	CTYPE_UNIFIED = 3
 };
 
 union _cpuid4_leaf_eax {
@@ -159,11 +157,6 @@ struct _cpuid4_info_regs {
 	struct amd_northbridge *nb;
 };
 
-struct _cpuid4_info {
-	struct _cpuid4_info_regs base;
-	DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
-};
-
 unsigned short			num_cache_leaves;
 
 /* AMD doesn't have CPUID4. Emulate it here to report the same
@@ -220,6 +213,13 @@ static const unsigned short assocs[] = {
 static const unsigned char levels[] = { 1, 1, 2, 3 };
 static const unsigned char types[] = { 1, 2, 3, 3 };
 
+static const enum cache_type cache_type_map[] = {
+	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
+	[CTYPE_DATA] = CACHE_TYPE_DATA,
+	[CTYPE_INST] = CACHE_TYPE_INST,
+	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
+};
+
 static void
 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 		     union _cpuid4_leaf_ebx *ebx,
@@ -291,14 +291,8 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 		(ebx->split.ways_of_associativity + 1) - 1;
 }
 
-struct _cache_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int);
-	ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count,
-			 unsigned int);
-};
-
 #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
+
 /*
  * L3 cache descriptors
  */
@@ -325,20 +319,6 @@ static void amd_calc_l3_indices(struct amd_northbridge *nb)
 	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 }
 
-static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
-{
-	int node;
-
-	/* only for L3, and not in virtualized environments */
-	if (index < 3)
-		return;
-
-	node = amd_get_nb_id(smp_processor_id());
-	this_leaf->nb = node_to_amd_nb(node);
-	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
-		amd_calc_l3_indices(this_leaf->nb);
-}
-
 /*
  * check whether a slot used for disabling an L3 index is occupied.
  * @l3: L3 cache descriptor
@@ -359,15 +339,13 @@ int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
 	return -1;
 }
 
-static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
+static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
 				  unsigned int slot)
 {
 	int index;
+	struct amd_northbridge *nb = this_leaf->priv;
 
-	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
-		return -EINVAL;
-
-	index = amd_get_l3_disable_slot(this_leaf->base.nb, slot);
+	index = amd_get_l3_disable_slot(nb, slot);
 	if (index >= 0)
 		return sprintf(buf, "%d\n", index);
 
@@ -376,9 +354,10 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
 
 #define SHOW_CACHE_DISABLE(slot)					\
 static ssize_t								\
-show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf,	\
-			  unsigned int cpu)				\
+cache_disable_##slot##_show(struct device *dev,				\
+			    struct device_attribute *attr, char *buf)	\
 {									\
+	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
 	return show_cache_disable(this_leaf, buf, slot);		\
 }
 SHOW_CACHE_DISABLE(0)
@@ -446,25 +425,23 @@ int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot,
 	return 0;
 }
 
-static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
-				  const char *buf, size_t count,
-				  unsigned int slot)
+static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
+				   const char *buf, size_t count,
+				   unsigned int slot)
 {
 	unsigned long val = 0;
 	int cpu, err = 0;
+	struct amd_northbridge *nb = this_leaf->priv;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
-		return -EINVAL;
-
-	cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
+	cpu = cpumask_first(&this_leaf->shared_cpu_map);
 
 	if (kstrtoul(buf, 10, &val) < 0)
 		return -EINVAL;
 
-	err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val);
+	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
 	if (err) {
 		if (err == -EEXIST)
 			pr_warning("L3 slot %d in use/index already disabled!\n",
@@ -476,41 +453,36 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
 
 #define STORE_CACHE_DISABLE(slot)					\
 static ssize_t								\
-store_cache_disable_##slot(struct _cpuid4_info *this_leaf,		\
-			   const char *buf, size_t count,		\
-			   unsigned int cpu)				\
+cache_disable_##slot##_store(struct device *dev,			\
+			     struct device_attribute *attr,		\
+			     const char *buf, size_t count)		\
 {									\
+	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
 	return store_cache_disable(this_leaf, buf, count, slot);	\
 }
 STORE_CACHE_DISABLE(0)
 STORE_CACHE_DISABLE(1)
 
-static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
-		show_cache_disable_0, store_cache_disable_0);
-static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
-		show_cache_disable_1, store_cache_disable_1);
-
-static ssize_t
-show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu)
+static ssize_t subcaches_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
 {
-	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
-		return -EINVAL;
+	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
+	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
 
 	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
 }
 
-static ssize_t
-store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
-		unsigned int cpu)
+static ssize_t subcaches_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count)
 {
+	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
+	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
 	unsigned long val;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
-		return -EINVAL;
-
 	if (kstrtoul(buf, 16, &val) < 0)
 		return -EINVAL;
 
@@ -520,9 +492,92 @@ store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
 	return count;
 }
 
-static struct _cache_attr subcaches =
-	__ATTR(subcaches, 0644, show_subcaches, store_subcaches);
+static DEVICE_ATTR_RW(cache_disable_0);
+static DEVICE_ATTR_RW(cache_disable_1);
+static DEVICE_ATTR_RW(subcaches);
+
+static umode_t
+cache_private_attrs_is_visible(struct kobject *kobj,
+			       struct attribute *attr, int unused)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
+	umode_t mode = attr->mode;
+
+	if (!this_leaf->priv)
+		return 0;
+
+	if ((attr == &dev_attr_subcaches.attr) &&
+	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+		return mode;
+
+	if ((attr == &dev_attr_cache_disable_0.attr ||
+	     attr == &dev_attr_cache_disable_1.attr) &&
+	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
+		return mode;
+
+	return 0;
+}
+
+static struct attribute_group cache_private_group = {
+	.is_visible = cache_private_attrs_is_visible,
+};
+
+static void init_amd_l3_attrs(void)
+{
+	int n = 1;
+	static struct attribute **amd_l3_attrs;
+
+	if (amd_l3_attrs) /* already initialized */
+		return;
+
+	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
+		n += 2;
+	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+		n += 1;
+
+	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
+	if (!amd_l3_attrs)
+		return;
+
+	n = 0;
+	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
+		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
+		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
+	}
+	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
 
+	cache_private_group.attrs = amd_l3_attrs;
+}
+
+const struct attribute_group *
+cache_get_priv_group(struct cacheinfo *this_leaf)
+{
+	struct amd_northbridge *nb = this_leaf->priv;
+
+	if (this_leaf->level < 3 || !nb)
+		return NULL;
+
+	if (nb && nb->l3_cache.indices)
+		init_amd_l3_attrs();
+
+	return &cache_private_group;
+}
+
+static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
+{
+	int node;
+
+	/* only for L3, and not in virtualized environments */
+	if (index < 3)
+		return;
+
+	node = amd_get_nb_id(smp_processor_id());
+	this_leaf->nb = node_to_amd_nb(node);
+	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
+		amd_calc_l3_indices(this_leaf->nb);
+}
 #else
 #define amd_init_l3_cache(x, y)
 #endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
@@ -546,7 +601,7 @@ cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
 		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
 	}
 
-	if (eax.split.type == CACHE_TYPE_NULL)
+	if (eax.split.type == CTYPE_NULL)
 		return -EIO; /* better error ? */
 
 	this_leaf->eax = eax;
@@ -575,7 +630,7 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c)
 		/* Do cpuid(op) loop to find out num_cache_leaves */
 		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
 		cache_eax.full = eax;
-	} while (cache_eax.split.type != CACHE_TYPE_NULL);
+	} while (cache_eax.split.type != CTYPE_NULL);
 	return i;
 }
 
@@ -626,9 +681,9 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
 
 			switch (this_leaf.eax.split.level) {
 			case 1:
-				if (this_leaf.eax.split.type == CACHE_TYPE_DATA)
+				if (this_leaf.eax.split.type == CTYPE_DATA)
 					new_l1d = this_leaf.size/1024;
-				else if (this_leaf.eax.split.type == CACHE_TYPE_INST)
+				else if (this_leaf.eax.split.type == CTYPE_INST)
 					new_l1i = this_leaf.size/1024;
 				break;
 			case 2:
@@ -747,55 +802,52 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
 	return l2;
 }
 
-#ifdef CONFIG_SYSFS
-
-/* pointer to _cpuid4_info array (for each cache leaf) */
-static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
-#define CPUID4_INFO_IDX(x, y)	(&((per_cpu(ici_cpuid4_info, x))[y]))
-
-#ifdef CONFIG_SMP
-
-static int cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
+static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
+				    struct _cpuid4_info_regs *base)
 {
-	struct _cpuid4_info *this_leaf;
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	struct cacheinfo *this_leaf;
 	int i, sibling;
 
 	if (cpu_has_topoext) {
 		unsigned int apicid, nshared, first, last;
 
-		if (!per_cpu(ici_cpuid4_info, cpu))
-			return 0;
-
-		this_leaf = CPUID4_INFO_IDX(cpu, index);
-		nshared = this_leaf->base.eax.split.num_threads_sharing + 1;
+		this_leaf = this_cpu_ci->info_list + index;
+		nshared = base->eax.split.num_threads_sharing + 1;
 		apicid = cpu_data(cpu).apicid;
 		first = apicid - (apicid % nshared);
 		last = first + nshared - 1;
 
 		for_each_online_cpu(i) {
+			this_cpu_ci = get_cpu_cacheinfo(i);
+			if (!this_cpu_ci->info_list)
+				continue;
+
 			apicid = cpu_data(i).apicid;
 			if ((apicid < first) || (apicid > last))
 				continue;
-			if (!per_cpu(ici_cpuid4_info, i))
-				continue;
-			this_leaf = CPUID4_INFO_IDX(i, index);
+
+			this_leaf = this_cpu_ci->info_list + index;
 
 			for_each_online_cpu(sibling) {
 				apicid = cpu_data(sibling).apicid;
 				if ((apicid < first) || (apicid > last))
 					continue;
-				set_bit(sibling, this_leaf->shared_cpu_map);
+				cpumask_set_cpu(sibling,
+						&this_leaf->shared_cpu_map);
 			}
 		}
 	} else if (index == 3) {
 		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
-			if (!per_cpu(ici_cpuid4_info, i))
+			this_cpu_ci = get_cpu_cacheinfo(i);
+			if (!this_cpu_ci->info_list)
 				continue;
-			this_leaf = CPUID4_INFO_IDX(i, index);
+			this_leaf = this_cpu_ci->info_list + index;
 			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
 				if (!cpu_online(sibling))
 					continue;
-				set_bit(sibling, this_leaf->shared_cpu_map);
+				cpumask_set_cpu(sibling,
+						&this_leaf->shared_cpu_map);
 			}
 		}
 	} else
@@ -804,457 +856,86 @@ static int cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
 	return 1;
 }
 
-static void cache_shared_cpu_map_setup(unsigned int cpu, int index)
+static void __cache_cpumap_setup(unsigned int cpu, int index,
+				 struct _cpuid4_info_regs *base)
 {
-	struct _cpuid4_info *this_leaf, *sibling_leaf;
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	struct cacheinfo *this_leaf, *sibling_leaf;
 	unsigned long num_threads_sharing;
 	int index_msb, i;
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	if (c->x86_vendor == X86_VENDOR_AMD) {
-		if (cache_shared_amd_cpu_map_setup(cpu, index))
+		if (__cache_amd_cpumap_setup(cpu, index, base))
 			return;
 	}
 
-	this_leaf = CPUID4_INFO_IDX(cpu, index);
-	num_threads_sharing = 1 + this_leaf->base.eax.split.num_threads_sharing;
+	this_leaf = this_cpu_ci->info_list + index;
+	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
 
+	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
 	if (num_threads_sharing == 1)
-		cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
-	else {
-		index_msb = get_count_order(num_threads_sharing);
-
-		for_each_online_cpu(i) {
-			if (cpu_data(i).apicid >> index_msb ==
-			    c->apicid >> index_msb) {
-				cpumask_set_cpu(i,
-					to_cpumask(this_leaf->shared_cpu_map));
-				if (i != cpu && per_cpu(ici_cpuid4_info, i))  {
-					sibling_leaf =
-						CPUID4_INFO_IDX(i, index);
-					cpumask_set_cpu(cpu, to_cpumask(
-						sibling_leaf->shared_cpu_map));
-				}
-			}
-		}
-	}
-}
-static void cache_remove_shared_cpu_map(unsigned int cpu, int index)
-{
-	struct _cpuid4_info	*this_leaf, *sibling_leaf;
-	int sibling;
-
-	this_leaf = CPUID4_INFO_IDX(cpu, index);
-	for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
-		sibling_leaf = CPUID4_INFO_IDX(sibling, index);
-		cpumask_clear_cpu(cpu,
-				  to_cpumask(sibling_leaf->shared_cpu_map));
-	}
-}
-#else
-static void cache_shared_cpu_map_setup(unsigned int cpu, int index)
-{
-}
-
-static void cache_remove_shared_cpu_map(unsigned int cpu, int index)
-{
-}
-#endif
-
-static void free_cache_attributes(unsigned int cpu)
-{
-	int i;
-
-	for (i = 0; i < num_cache_leaves; i++)
-		cache_remove_shared_cpu_map(cpu, i);
-
-	kfree(per_cpu(ici_cpuid4_info, cpu));
-	per_cpu(ici_cpuid4_info, cpu) = NULL;
-}
-
-static void get_cpu_leaves(void *_retval)
-{
-	int j, *retval = _retval, cpu = smp_processor_id();
+		return;
 
-	/* Do cpuid and store the results */
-	for (j = 0; j < num_cache_leaves; j++) {
-		struct _cpuid4_info *this_leaf = CPUID4_INFO_IDX(cpu, j);
+	index_msb = get_count_order(num_threads_sharing);
 
-		*retval = cpuid4_cache_lookup_regs(j, &this_leaf->base);
-		if (unlikely(*retval < 0)) {
-			int i;
+	for_each_online_cpu(i)
+		if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
+			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
 
-			for (i = 0; i < j; i++)
-				cache_remove_shared_cpu_map(cpu, i);
-			break;
+			if (i == cpu || !sib_cpu_ci->info_list)
+				continue;/* skip if itself or no cacheinfo */
+			sibling_leaf = sib_cpu_ci->info_list + index;
+			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
+			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
 		}
-		cache_shared_cpu_map_setup(cpu, j);
-	}
 }
 
-static int detect_cache_attributes(unsigned int cpu)
+static void ci_leaf_init(struct cacheinfo *this_leaf,
+			 struct _cpuid4_info_regs *base)
 {
-	int			retval;
-
-	if (num_cache_leaves == 0)
-		return -ENOENT;
-
-	per_cpu(ici_cpuid4_info, cpu) = kzalloc(
-	    sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
-	if (per_cpu(ici_cpuid4_info, cpu) == NULL)
-		return -ENOMEM;
-
-	smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
-	if (retval) {
-		kfree(per_cpu(ici_cpuid4_info, cpu));
-		per_cpu(ici_cpuid4_info, cpu) = NULL;
-	}
-
-	return retval;
+	this_leaf->level = base->eax.split.level;
+	this_leaf->type = cache_type_map[base->eax.split.type];
+	this_leaf->coherency_line_size =
+				base->ebx.split.coherency_line_size + 1;
+	this_leaf->ways_of_associativity =
+				base->ebx.split.ways_of_associativity + 1;
+	this_leaf->size = base->size;
+	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
+	this_leaf->physical_line_partition =
+				base->ebx.split.physical_line_partition + 1;
+	this_leaf->priv = base->nb;
 }
 
-#include <linux/kobject.h>
-#include <linux/sysfs.h>
-#include <linux/cpu.h>
-
-/* pointer to kobject for cpuX/cache */
-static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
-
-struct _index_kobject {
-	struct kobject kobj;
-	unsigned int cpu;
-	unsigned short index;
-};
-
-/* pointer to array of kobjects for cpuX/cache/indexY */
-static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
-#define INDEX_KOBJECT_PTR(x, y)		(&((per_cpu(ici_index_kobject, x))[y]))
-
-#define show_one_plus(file_name, object, val)				\
-static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \
-				unsigned int cpu)			\
-{									\
-	return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
-}
-
-show_one_plus(level, base.eax.split.level, 0);
-show_one_plus(coherency_line_size, base.ebx.split.coherency_line_size, 1);
-show_one_plus(physical_line_partition, base.ebx.split.physical_line_partition, 1);
-show_one_plus(ways_of_associativity, base.ebx.split.ways_of_associativity, 1);
-show_one_plus(number_of_sets, base.ecx.split.number_of_sets, 1);
-
-static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
-			 unsigned int cpu)
-{
-	return sprintf(buf, "%luK\n", this_leaf->base.size / 1024);
-}
-
-static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
-					int type, char *buf)
-{
-	const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
-	int ret;
-
-	if (type)
-		ret = scnprintf(buf, PAGE_SIZE - 1, "%*pbl",
-				cpumask_pr_args(mask));
-	else
-		ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb",
-				cpumask_pr_args(mask));
-	buf[ret++] = '\n';
-	buf[ret] = '\0';
-	return ret;
-}
-
-static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf,
-					  unsigned int cpu)
+static int __init_cache_level(unsigned int cpu)
 {
-	return show_shared_cpu_map_func(leaf, 0, buf);
-}
-
-static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf,
-					   unsigned int cpu)
-{
-	return show_shared_cpu_map_func(leaf, 1, buf);
-}
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 
-static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf,
-			 unsigned int cpu)
-{
-	switch (this_leaf->base.eax.split.type) {
-	case CACHE_TYPE_DATA:
-		return sprintf(buf, "Data\n");
-	case CACHE_TYPE_INST:
-		return sprintf(buf, "Instruction\n");
-	case CACHE_TYPE_UNIFIED:
-		return sprintf(buf, "Unified\n");
-	default:
-		return sprintf(buf, "Unknown\n");
-	}
-}
-
-#define to_object(k)	container_of(k, struct _index_kobject, kobj)
-#define to_attr(a)	container_of(a, struct _cache_attr, attr)
-
-#define define_one_ro(_name) \
-static struct _cache_attr _name = \
-	__ATTR(_name, 0444, show_##_name, NULL)
-
-define_one_ro(level);
-define_one_ro(type);
-define_one_ro(coherency_line_size);
-define_one_ro(physical_line_partition);
-define_one_ro(ways_of_associativity);
-define_one_ro(number_of_sets);
-define_one_ro(size);
-define_one_ro(shared_cpu_map);
-define_one_ro(shared_cpu_list);
-
-static struct attribute *default_attrs[] = {
-	&type.attr,
-	&level.attr,
-	&coherency_line_size.attr,
-	&physical_line_partition.attr,
-	&ways_of_associativity.attr,
-	&number_of_sets.attr,
-	&size.attr,
-	&shared_cpu_map.attr,
-	&shared_cpu_list.attr,
-	NULL
-};
-
-#ifdef CONFIG_AMD_NB
-static struct attribute **amd_l3_attrs(void)
-{
-	static struct attribute **attrs;
-	int n;
-
-	if (attrs)
-		return attrs;
-
-	n = ARRAY_SIZE(default_attrs);
-
-	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
-		n += 2;
-
-	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
-		n += 1;
-
-	attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
-	if (attrs == NULL)
-		return attrs = default_attrs;
-
-	for (n = 0; default_attrs[n]; n++)
-		attrs[n] = default_attrs[n];
-
-	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
-		attrs[n++] = &cache_disable_0.attr;
-		attrs[n++] = &cache_disable_1.attr;
-	}
-
-	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
-		attrs[n++] = &subcaches.attr;
-
-	return attrs;
-}
-#endif
-
-static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
-{
-	struct _cache_attr *fattr = to_attr(attr);
-	struct _index_kobject *this_leaf = to_object(kobj);
-	ssize_t ret;
-
-	ret = fattr->show ?
-		fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
-			buf, this_leaf->cpu) :
-		0;
-	return ret;
-}
-
-static ssize_t store(struct kobject *kobj, struct attribute *attr,
-		     const char *buf, size_t count)
-{
-	struct _cache_attr *fattr = to_attr(attr);
-	struct _index_kobject *this_leaf = to_object(kobj);
-	ssize_t ret;
-
-	ret = fattr->store ?
-		fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
-			buf, count, this_leaf->cpu) :
-		0;
-	return ret;
-}
-
-static const struct sysfs_ops sysfs_ops = {
-	.show   = show,
-	.store  = store,
-};
-
-static struct kobj_type ktype_cache = {
-	.sysfs_ops	= &sysfs_ops,
-	.default_attrs	= default_attrs,
-};
-
-static struct kobj_type ktype_percpu_entry = {
-	.sysfs_ops	= &sysfs_ops,
-};
-
-static void cpuid4_cache_sysfs_exit(unsigned int cpu)
-{
-	kfree(per_cpu(ici_cache_kobject, cpu));
-	kfree(per_cpu(ici_index_kobject, cpu));
-	per_cpu(ici_cache_kobject, cpu) = NULL;
-	per_cpu(ici_index_kobject, cpu) = NULL;
-	free_cache_attributes(cpu);
-}
-
-static int cpuid4_cache_sysfs_init(unsigned int cpu)
-{
-	int err;
-
-	if (num_cache_leaves == 0)
+	if (!num_cache_leaves)
 		return -ENOENT;
-
-	err = detect_cache_attributes(cpu);
-	if (err)
-		return err;
-
-	/* Allocate all required memory */
-	per_cpu(ici_cache_kobject, cpu) =
-		kzalloc(sizeof(struct kobject), GFP_KERNEL);
-	if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL))
-		goto err_out;
-
-	per_cpu(ici_index_kobject, cpu) = kzalloc(
-	    sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
-	if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL))
-		goto err_out;
-
+	if (!this_cpu_ci)
+		return -EINVAL;
+	this_cpu_ci->num_levels = 3;
+	this_cpu_ci->num_leaves = num_cache_leaves;
 	return 0;
-
-err_out:
-	cpuid4_cache_sysfs_exit(cpu);
-	return -ENOMEM;
 }
 
-static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
-
-/* Add/Remove cache interface for CPU device */
-static int cache_add_dev(struct device *dev)
+static int __populate_cache_leaves(unsigned int cpu)
 {
-	unsigned int cpu = dev->id;
-	unsigned long i, j;
-	struct _index_kobject *this_object;
-	struct _cpuid4_info   *this_leaf;
-	int retval;
-
-	retval = cpuid4_cache_sysfs_init(cpu);
-	if (unlikely(retval < 0))
-		return retval;
-
-	retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
-				      &ktype_percpu_entry,
-				      &dev->kobj, "%s", "cache");
-	if (retval < 0) {
-		cpuid4_cache_sysfs_exit(cpu);
-		return retval;
-	}
+	unsigned int idx, ret;
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
+	struct _cpuid4_info_regs id4_regs = {};
 
-	for (i = 0; i < num_cache_leaves; i++) {
-		this_object = INDEX_KOBJECT_PTR(cpu, i);
-		this_object->cpu = cpu;
-		this_object->index = i;
-
-		this_leaf = CPUID4_INFO_IDX(cpu, i);
-
-		ktype_cache.default_attrs = default_attrs;
-#ifdef CONFIG_AMD_NB
-		if (this_leaf->base.nb)
-			ktype_cache.default_attrs = amd_l3_attrs();
-#endif
-		retval = kobject_init_and_add(&(this_object->kobj),
-					      &ktype_cache,
-					      per_cpu(ici_cache_kobject, cpu),
-					      "index%1lu", i);
-		if (unlikely(retval)) {
-			for (j = 0; j < i; j++)
-				kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
-			kobject_put(per_cpu(ici_cache_kobject, cpu));
-			cpuid4_cache_sysfs_exit(cpu);
-			return retval;
-		}
-		kobject_uevent(&(this_object->kobj), KOBJ_ADD);
+	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
+		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
+		if (ret)
+			return ret;
+		ci_leaf_init(this_leaf++, &id4_regs);
+		__cache_cpumap_setup(cpu, idx, &id4_regs);
 	}
-	cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
-
-	kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD);
 	return 0;
 }
 
-static void cache_remove_dev(struct device *dev)
-{
-	unsigned int cpu = dev->id;
-	unsigned long i;
-
-	if (per_cpu(ici_cpuid4_info, cpu) == NULL)
-		return;
-	if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
-		return;
-	cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
-
-	for (i = 0; i < num_cache_leaves; i++)
-		kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
-	kobject_put(per_cpu(ici_cache_kobject, cpu));
-	cpuid4_cache_sysfs_exit(cpu);
-}
-
-static int cacheinfo_cpu_callback(struct notifier_block *nfb,
-				  unsigned long action, void *hcpu)
-{
-	unsigned int cpu = (unsigned long)hcpu;
-	struct device *dev;
-
-	dev = get_cpu_device(cpu);
-	switch (action) {
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		cache_add_dev(dev);
-		break;
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		cache_remove_dev(dev);
-		break;
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block cacheinfo_cpu_notifier = {
-	.notifier_call = cacheinfo_cpu_callback,
-};
-
-static int __init cache_sysfs_init(void)
-{
-	int i, err = 0;
-
-	if (num_cache_leaves == 0)
-		return 0;
-
-	cpu_notifier_register_begin();
-	for_each_online_cpu(i) {
-		struct device *dev = get_cpu_device(i);
-
-		err = cache_add_dev(dev);
-		if (err)
-			goto out;
-	}
-	__register_hotcpu_notifier(&cacheinfo_cpu_notifier);
-
-out:
-	cpu_notifier_register_done();
-	return err;
-}
-
-device_initcall(cache_sysfs_init);
-
-#endif
+DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
+DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
diff --git a/arch/x86/kernel/cpu/intel_pt.h b/arch/x86/kernel/cpu/intel_pt.h
new file mode 100644
index 000000000000..1c338b0eba05
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_pt.h
@@ -0,0 +1,131 @@
+/*
+ * Intel(R) Processor Trace PMU driver for perf
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * Intel PT is specified in the Intel Architecture Instruction Set Extensions
+ * Programming Reference:
+ * http://software.intel.com/en-us/intel-isa-extensions
+ */
+
+#ifndef __INTEL_PT_H__
+#define __INTEL_PT_H__
+
+/*
+ * Single-entry ToPA: when this close to region boundary, switch
+ * buffers to avoid losing data.
+ */
+#define TOPA_PMI_MARGIN 512
+
+/*
+ * Table of Physical Addresses bits
+ */
+enum topa_sz {
+	TOPA_4K	= 0,
+	TOPA_8K,
+	TOPA_16K,
+	TOPA_32K,
+	TOPA_64K,
+	TOPA_128K,
+	TOPA_256K,
+	TOPA_512K,
+	TOPA_1MB,
+	TOPA_2MB,
+	TOPA_4MB,
+	TOPA_8MB,
+	TOPA_16MB,
+	TOPA_32MB,
+	TOPA_64MB,
+	TOPA_128MB,
+	TOPA_SZ_END,
+};
+
+static inline unsigned int sizes(enum topa_sz tsz)
+{
+	return 1 << (tsz + 12);
+};
+
+struct topa_entry {
+	u64	end	: 1;
+	u64	rsvd0	: 1;
+	u64	intr	: 1;
+	u64	rsvd1	: 1;
+	u64	stop	: 1;
+	u64	rsvd2	: 1;
+	u64	size	: 4;
+	u64	rsvd3	: 2;
+	u64	base	: 36;
+	u64	rsvd4	: 16;
+};
+
+#define TOPA_SHIFT 12
+#define PT_CPUID_LEAVES 2
+
+enum pt_capabilities {
+	PT_CAP_max_subleaf = 0,
+	PT_CAP_cr3_filtering,
+	PT_CAP_topa_output,
+	PT_CAP_topa_multiple_entries,
+	PT_CAP_payloads_lip,
+};
+
+struct pt_pmu {
+	struct pmu		pmu;
+	u32			caps[4 * PT_CPUID_LEAVES];
+};
+
+/**
+ * struct pt_buffer - buffer configuration; one buffer per task_struct or
+ *		cpu, depending on perf event configuration
+ * @cpu:	cpu for per-cpu allocation
+ * @tables:	list of ToPA tables in this buffer
+ * @first:	shorthand for first topa table
+ * @last:	shorthand for last topa table
+ * @cur:	current topa table
+ * @nr_pages:	buffer size in pages
+ * @cur_idx:	current output region's index within @cur table
+ * @output_off:	offset within the current output region
+ * @data_size:	running total of the amount of data in this buffer
+ * @lost:	if data was lost/truncated
+ * @head:	logical write offset inside the buffer
+ * @snapshot:	if this is for a snapshot/overwrite counter
+ * @stop_pos:	STOP topa entry in the buffer
+ * @intr_pos:	INT topa entry in the buffer
+ * @data_pages:	array of pages from perf
+ * @topa_index:	table of topa entries indexed by page offset
+ */
+struct pt_buffer {
+	int			cpu;
+	struct list_head	tables;
+	struct topa		*first, *last, *cur;
+	unsigned int		cur_idx;
+	size_t			output_off;
+	unsigned long		nr_pages;
+	local_t			data_size;
+	local_t			lost;
+	local64_t		head;
+	bool			snapshot;
+	unsigned long		stop_pos, intr_pos;
+	void			**data_pages;
+	struct topa_entry	*topa_index[0];
+};
+
+/**
+ * struct pt - per-cpu pt context
+ * @handle:	perf output handle
+ * @handle_nmi:	do handle PT PMI on this cpu, there's an active event
+ */
+struct pt {
+	struct perf_output_handle handle;
+	int			handle_nmi;
+};
+
+#endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 10b46906767f..fe32074b865b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -14,6 +14,7 @@ enum severity_level {
 };
 
 #define ATTR_LEN		16
+#define INITIAL_CHECK_INTERVAL	5 * 60 /* 5 minutes */
 
 /* One object for each MCE bank, shared by all CPUs */
 struct mce_bank {
@@ -23,20 +24,20 @@ struct mce_bank {
 	char			attrname[ATTR_LEN];	/* attribute name */
 };
 
-int mce_severity(struct mce *a, int tolerant, char **msg, bool is_excp);
+extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
 struct dentry *mce_get_debugfs_dir(void);
 
 extern struct mce_bank *mce_banks;
 extern mce_banks_t mce_banks_ce_disabled;
 
 #ifdef CONFIG_X86_MCE_INTEL
-unsigned long mce_intel_adjust_timer(unsigned long interval);
-void mce_intel_cmci_poll(void);
+unsigned long cmci_intel_adjust_timer(unsigned long interval);
+bool mce_intel_cmci_poll(void);
 void mce_intel_hcpu_update(unsigned long cpu);
 void cmci_disable_bank(int bank);
 #else
-# define mce_intel_adjust_timer mce_adjust_timer_default
-static inline void mce_intel_cmci_poll(void) { }
+# define cmci_intel_adjust_timer mce_adjust_timer_default
+static inline bool mce_intel_cmci_poll(void) { return false; }
 static inline void mce_intel_hcpu_update(unsigned long cpu) { }
 static inline void cmci_disable_bank(int bank) { }
 #endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 8bb433043a7f..9c682c222071 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -186,7 +186,61 @@ static int error_context(struct mce *m)
 	return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
 }
 
-int mce_severity(struct mce *m, int tolerant, char **msg, bool is_excp)
+/*
+ * See AMD Error Scope Hierarchy table in a newer BKDG. For example
+ * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
+ */
+static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_excp)
+{
+	enum context ctx = error_context(m);
+
+	/* Processor Context Corrupt, no need to fumble too much, die! */
+	if (m->status & MCI_STATUS_PCC)
+		return MCE_PANIC_SEVERITY;
+
+	if (m->status & MCI_STATUS_UC) {
+
+		/*
+		 * On older systems where overflow_recov flag is not present, we
+		 * should simply panic if an error overflow occurs. If
+		 * overflow_recov flag is present and set, then software can try
+		 * to at least kill process to prolong system operation.
+		 */
+		if (mce_flags.overflow_recov) {
+			/* software can try to contain */
+			if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL))
+				return MCE_PANIC_SEVERITY;
+
+			/* kill current process */
+			return MCE_AR_SEVERITY;
+		} else {
+			/* at least one error was not logged */
+			if (m->status & MCI_STATUS_OVER)
+				return MCE_PANIC_SEVERITY;
+		}
+
+		/*
+		 * For any other case, return MCE_UC_SEVERITY so that we log the
+		 * error and exit #MC handler.
+		 */
+		return MCE_UC_SEVERITY;
+	}
+
+	/*
+	 * deferred error: poll handler catches these and adds to mce_ring so
+	 * memory-failure can take recovery actions.
+	 */
+	if (m->status & MCI_STATUS_DEFERRED)
+		return MCE_DEFERRED_SEVERITY;
+
+	/*
+	 * corrected error: poll handler catches these and passes responsibility
+	 * of decoding the error to EDAC
+	 */
+	return MCE_KEEP_SEVERITY;
+}
+
+static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_excp)
 {
 	enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
 	enum context ctx = error_context(m);
@@ -216,6 +270,16 @@ int mce_severity(struct mce *m, int tolerant, char **msg, bool is_excp)
 	}
 }
 
+/* Default to mce_severity_intel */
+int (*mce_severity)(struct mce *m, int tolerant, char **msg, bool is_excp) =
+		    mce_severity_intel;
+
+void __init mcheck_vendor_init_severity(void)
+{
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		mce_severity = mce_severity_amd;
+}
+
 #ifdef CONFIG_DEBUG_FS
 static void *s_start(struct seq_file *f, loff_t *pos)
 {
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3c036cb4a370..e535533d5ab8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -60,11 +60,12 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
 #define CREATE_TRACE_POINTS
 #include <trace/events/mce.h>
 
-#define SPINUNIT 100	/* 100ns */
+#define SPINUNIT		100	/* 100ns */
 
 DEFINE_PER_CPU(unsigned, mce_exception_count);
 
 struct mce_bank *mce_banks __read_mostly;
+struct mce_vendor_flags mce_flags __read_mostly;
 
 struct mca_config mca_cfg __read_mostly = {
 	.bootlog  = -1,
@@ -89,9 +90,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
 static DEFINE_PER_CPU(struct mce, mces_seen);
 static int			cpu_missing;
 
-/* CMCI storm detection filter */
-static DEFINE_PER_CPU(unsigned long, mce_polled_error);
-
 /*
  * MCA banks polled by the period polling timer for corrected events.
  * With Intel CMCI, this only has MCA banks which do not support CMCI (if any).
@@ -622,8 +620,9 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
  * is already totally * confused. In this case it's likely it will
  * not fully execute the machine check handler either.
  */
-void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
+bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 {
+	bool error_logged = false;
 	struct mce m;
 	int severity;
 	int i;
@@ -646,7 +645,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 		if (!(m.status & MCI_STATUS_VAL))
 			continue;
 
-		this_cpu_write(mce_polled_error, 1);
+
 		/*
 		 * Uncorrected or signalled events are handled by the exception
 		 * handler when it is enabled, so don't process those here.
@@ -679,8 +678,10 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 		 * Don't get the IP here because it's unlikely to
 		 * have anything to do with the actual error location.
 		 */
-		if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce)
+		if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) {
+			error_logged = true;
 			mce_log(&m);
+		}
 
 		/*
 		 * Clear state for this bank.
@@ -694,6 +695,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 	 */
 
 	sync_core();
+
+	return error_logged;
 }
 EXPORT_SYMBOL_GPL(machine_check_poll);
 
@@ -813,7 +816,7 @@ static void mce_reign(void)
 	 * other CPUs.
 	 */
 	if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
-		mce_panic("Fatal Machine check", m, msg);
+		mce_panic("Fatal machine check", m, msg);
 
 	/*
 	 * For UC somewhere we let the CPU who detects it handle it.
@@ -826,7 +829,7 @@ static void mce_reign(void)
 	 * source or one CPU is hung. Panic.
 	 */
 	if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3)
-		mce_panic("Machine check from unknown source", NULL, NULL);
+		mce_panic("Fatal machine check from unknown source", NULL, NULL);
 
 	/*
 	 * Now clear all the mces_seen so that they don't reappear on
@@ -1258,7 +1261,7 @@ void mce_log_therm_throt_event(__u64 status)
  * poller finds an MCE, poll 2x faster.  When the poller finds no more
  * errors, poll 2x slower (up to check_interval seconds).
  */
-static unsigned long check_interval = 5 * 60; /* 5 minutes */
+static unsigned long check_interval = INITIAL_CHECK_INTERVAL;
 
 static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
 static DEFINE_PER_CPU(struct timer_list, mce_timer);
@@ -1268,49 +1271,57 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
 	return interval;
 }
 
-static unsigned long (*mce_adjust_timer)(unsigned long interval) =
-	mce_adjust_timer_default;
+static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
 
-static int cmc_error_seen(void)
+static void __restart_timer(struct timer_list *t, unsigned long interval)
 {
-	unsigned long *v = this_cpu_ptr(&mce_polled_error);
+	unsigned long when = jiffies + interval;
+	unsigned long flags;
+
+	local_irq_save(flags);
 
-	return test_and_clear_bit(0, v);
+	if (timer_pending(t)) {
+		if (time_before(when, t->expires))
+			mod_timer_pinned(t, when);
+	} else {
+		t->expires = round_jiffies(when);
+		add_timer_on(t, smp_processor_id());
+	}
+
+	local_irq_restore(flags);
 }
 
 static void mce_timer_fn(unsigned long data)
 {
 	struct timer_list *t = this_cpu_ptr(&mce_timer);
+	int cpu = smp_processor_id();
 	unsigned long iv;
-	int notify;
 
-	WARN_ON(smp_processor_id() != data);
+	WARN_ON(cpu != data);
+
+	iv = __this_cpu_read(mce_next_interval);
 
 	if (mce_available(this_cpu_ptr(&cpu_info))) {
-		machine_check_poll(MCP_TIMESTAMP,
-				this_cpu_ptr(&mce_poll_banks));
-		mce_intel_cmci_poll();
+		machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_poll_banks));
+
+		if (mce_intel_cmci_poll()) {
+			iv = mce_adjust_timer(iv);
+			goto done;
+		}
 	}
 
 	/*
-	 * Alert userspace if needed.  If we logged an MCE, reduce the
-	 * polling interval, otherwise increase the polling interval.
+	 * Alert userspace if needed. If we logged an MCE, reduce the polling
+	 * interval, otherwise increase the polling interval.
 	 */
-	iv = __this_cpu_read(mce_next_interval);
-	notify = mce_notify_irq();
-	notify |= cmc_error_seen();
-	if (notify) {
+	if (mce_notify_irq())
 		iv = max(iv / 2, (unsigned long) HZ/100);
-	} else {
+	else
 		iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
-		iv = mce_adjust_timer(iv);
-	}
+
+done:
 	__this_cpu_write(mce_next_interval, iv);
-	/* Might have become 0 after CMCI storm subsided */
-	if (iv) {
-		t->expires = jiffies + iv;
-		add_timer_on(t, smp_processor_id());
-	}
+	__restart_timer(t, iv);
 }
 
 /*
@@ -1319,16 +1330,10 @@ static void mce_timer_fn(unsigned long data)
 void mce_timer_kick(unsigned long interval)
 {
 	struct timer_list *t = this_cpu_ptr(&mce_timer);
-	unsigned long when = jiffies + interval;
 	unsigned long iv = __this_cpu_read(mce_next_interval);
 
-	if (timer_pending(t)) {
-		if (time_before(when, t->expires))
-			mod_timer_pinned(t, when);
-	} else {
-		t->expires = round_jiffies(when);
-		add_timer_on(t, smp_processor_id());
-	}
+	__restart_timer(t, interval);
+
 	if (interval < iv)
 		__this_cpu_write(mce_next_interval, interval);
 }
@@ -1525,45 +1530,46 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
 		 * Various K7s with broken bank 0 around. Always disable
 		 * by default.
 		 */
-		 if (c->x86 == 6 && cfg->banks > 0)
+		if (c->x86 == 6 && cfg->banks > 0)
 			mce_banks[0].ctl = 0;
 
-		 /*
-		  * Turn off MC4_MISC thresholding banks on those models since
-		  * they're not supported there.
-		  */
-		 if (c->x86 == 0x15 &&
-		     (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
-			 int i;
-			 u64 val, hwcr;
-			 bool need_toggle;
-			 u32 msrs[] = {
+		/*
+		 * overflow_recov is supported for F15h Models 00h-0fh
+		 * even though we don't have a CPUID bit for it.
+		 */
+		if (c->x86 == 0x15 && c->x86_model <= 0xf)
+			mce_flags.overflow_recov = 1;
+
+		/*
+		 * Turn off MC4_MISC thresholding banks on those models since
+		 * they're not supported there.
+		 */
+		if (c->x86 == 0x15 &&
+		    (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
+			int i;
+			u64 hwcr;
+			bool need_toggle;
+			u32 msrs[] = {
 				0x00000413, /* MC4_MISC0 */
 				0xc0000408, /* MC4_MISC1 */
-			 };
+			};
 
-			 rdmsrl(MSR_K7_HWCR, hwcr);
+			rdmsrl(MSR_K7_HWCR, hwcr);
 
-			 /* McStatusWrEn has to be set */
-			 need_toggle = !(hwcr & BIT(18));
+			/* McStatusWrEn has to be set */
+			need_toggle = !(hwcr & BIT(18));
 
-			 if (need_toggle)
-				 wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
+			if (need_toggle)
+				wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
 
-			 for (i = 0; i < ARRAY_SIZE(msrs); i++) {
-				 rdmsrl(msrs[i], val);
+			/* Clear CntP bit safely */
+			for (i = 0; i < ARRAY_SIZE(msrs); i++)
+				msr_clear_bit(msrs[i], 62);
 
-				 /* CntP bit set? */
-				 if (val & BIT_64(62)) {
-					val &= ~BIT_64(62);
-					wrmsrl(msrs[i], val);
-				 }
-			 }
-
-			 /* restore old settings */
-			 if (need_toggle)
-				 wrmsrl(MSR_K7_HWCR, hwcr);
-		 }
+			/* restore old settings */
+			if (need_toggle)
+				wrmsrl(MSR_K7_HWCR, hwcr);
+		}
 	}
 
 	if (c->x86_vendor == X86_VENDOR_INTEL) {
@@ -1629,10 +1635,11 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 	switch (c->x86_vendor) {
 	case X86_VENDOR_INTEL:
 		mce_intel_feature_init(c);
-		mce_adjust_timer = mce_intel_adjust_timer;
+		mce_adjust_timer = cmci_intel_adjust_timer;
 		break;
 	case X86_VENDOR_AMD:
 		mce_amd_feature_init(c);
+		mce_flags.overflow_recov = cpuid_ebx(0x80000007) & 0x1;
 		break;
 	default:
 		break;
@@ -2017,6 +2024,7 @@ __setup("mce", mcheck_enable);
 int __init mcheck_init(void)
 {
 	mcheck_intel_therm_init();
+	mcheck_vendor_init_severity();
 
 	return 0;
 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index f1c3769bbd64..55ad9b37cae8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -79,7 +79,7 @@ static inline bool is_shared_bank(int bank)
 	return (bank == 4);
 }
 
-static const char * const bank4_names(struct threshold_block *b)
+static const char *bank4_names(const struct threshold_block *b)
 {
 	switch (b->address) {
 	/* MSR4_MISC0 */
@@ -250,6 +250,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 			if (!b.interrupt_capable)
 				goto init;
 
+			b.interrupt_enable = 1;
 			new	= (high & MASK_LVTOFF_HI) >> 20;
 			offset  = setup_APIC_mce(offset, new);
 
@@ -322,6 +323,8 @@ static void amd_threshold_interrupt(void)
 log:
 	mce_setup(&m);
 	rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status);
+	if (!(m.status & MCI_STATUS_VAL))
+		return;
 	m.misc = ((u64)high << 32) | low;
 	m.bank = bank;
 	mce_log(&m);
@@ -497,10 +500,12 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
 	b->interrupt_capable	= lvt_interrupt_supported(bank, high);
 	b->threshold_limit	= THRESHOLD_MAX;
 
-	if (b->interrupt_capable)
+	if (b->interrupt_capable) {
 		threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
-	else
+		b->interrupt_enable = 1;
+	} else {
 		threshold_ktype.default_attrs[2] = NULL;
+	}
 
 	INIT_LIST_HEAD(&b->miscj);
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index b3c97bafc123..b4a41cf030ed 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -39,6 +39,15 @@
 static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
 
 /*
+ * CMCI storm detection backoff counter
+ *
+ * During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've
+ * encountered an error. If not, we decrement it by one. We signal the end of
+ * the CMCI storm when it reaches 0.
+ */
+static DEFINE_PER_CPU(int, cmci_backoff_cnt);
+
+/*
  * cmci_discover_lock protects against parallel discovery attempts
  * which could race against each other.
  */
@@ -46,7 +55,7 @@ static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
 
 #define CMCI_THRESHOLD		1
 #define CMCI_POLL_INTERVAL	(30 * HZ)
-#define CMCI_STORM_INTERVAL	(1 * HZ)
+#define CMCI_STORM_INTERVAL	(HZ)
 #define CMCI_STORM_THRESHOLD	15
 
 static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
@@ -82,11 +91,21 @@ static int cmci_supported(int *banks)
 	return !!(cap & MCG_CMCI_P);
 }
 
-void mce_intel_cmci_poll(void)
+bool mce_intel_cmci_poll(void)
 {
 	if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
-		return;
-	machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
+		return false;
+
+	/*
+	 * Reset the counter if we've logged an error in the last poll
+	 * during the storm.
+	 */
+	if (machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)))
+		this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
+	else
+		this_cpu_dec(cmci_backoff_cnt);
+
+	return true;
 }
 
 void mce_intel_hcpu_update(unsigned long cpu)
@@ -97,31 +116,32 @@ void mce_intel_hcpu_update(unsigned long cpu)
 	per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
 }
 
-unsigned long mce_intel_adjust_timer(unsigned long interval)
+unsigned long cmci_intel_adjust_timer(unsigned long interval)
 {
-	int r;
-
-	if (interval < CMCI_POLL_INTERVAL)
-		return interval;
+	if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
+	    (__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) {
+		mce_notify_irq();
+		return CMCI_STORM_INTERVAL;
+	}
 
 	switch (__this_cpu_read(cmci_storm_state)) {
 	case CMCI_STORM_ACTIVE:
+
 		/*
 		 * We switch back to interrupt mode once the poll timer has
-		 * silenced itself. That means no events recorded and the
-		 * timer interval is back to our poll interval.
+		 * silenced itself. That means no events recorded and the timer
+		 * interval is back to our poll interval.
 		 */
 		__this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
-		r = atomic_sub_return(1, &cmci_storm_on_cpus);
-		if (r == 0)
+		if (!atomic_sub_return(1, &cmci_storm_on_cpus))
 			pr_notice("CMCI storm subsided: switching to interrupt mode\n");
+
 		/* FALLTHROUGH */
 
 	case CMCI_STORM_SUBSIDED:
 		/*
-		 * We wait for all cpus to go back to SUBSIDED
-		 * state. When that happens we switch back to
-		 * interrupt mode.
+		 * We wait for all CPUs to go back to SUBSIDED state. When that
+		 * happens we switch back to interrupt mode.
 		 */
 		if (!atomic_read(&cmci_storm_on_cpus)) {
 			__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
@@ -130,10 +150,8 @@ unsigned long mce_intel_adjust_timer(unsigned long interval)
 		}
 		return CMCI_POLL_INTERVAL;
 	default:
-		/*
-		 * We have shiny weather. Let the poll do whatever it
-		 * thinks.
-		 */
+
+		/* We have shiny weather. Let the poll do whatever it thinks. */
 		return interval;
 	}
 }
@@ -178,7 +196,8 @@ static bool cmci_storm_detect(void)
 	cmci_storm_disable_banks();
 	__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
 	r = atomic_add_return(1, &cmci_storm_on_cpus);
-	mce_timer_kick(CMCI_POLL_INTERVAL);
+	mce_timer_kick(CMCI_STORM_INTERVAL);
+	this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
 
 	if (r == 1)
 		pr_notice("CMCI storm detected: switching to poll mode\n");
@@ -195,6 +214,7 @@ static void intel_threshold_interrupt(void)
 {
 	if (cmci_storm_detect())
 		return;
+
 	machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
 	mce_notify_irq();
 }
@@ -286,6 +306,7 @@ void cmci_recheck(void)
 
 	if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
 		return;
+
 	local_irq_save(flags);
 	machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
 	local_irq_restore(flags);
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index bfbbe6195e2d..12829c3ced3c 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -21,7 +21,6 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/firmware.h>
-#include <linux/pci_ids.h>
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
 #include <linux/kernel.h>
diff --git a/arch/x86/kernel/cpu/microcode/core_early.c b/arch/x86/kernel/cpu/microcode/core_early.c
index d45df4bd16ab..a413a69cbd74 100644
--- a/arch/x86/kernel/cpu/microcode/core_early.c
+++ b/arch/x86/kernel/cpu/microcode/core_early.c
@@ -23,57 +23,6 @@
 #include <asm/processor.h>
 #include <asm/cmdline.h>
 
-#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
-#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u')
-#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I')
-#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l')
-#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h')
-#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i')
-#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D')
-
-#define CPUID_IS(a, b, c, ebx, ecx, edx)	\
-		(!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c))))
-
-/*
- * In early loading microcode phase on BSP, boot_cpu_data is not set up yet.
- * x86_vendor() gets vendor id for BSP.
- *
- * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify
- * coding, we still use x86_vendor() to get vendor id for AP.
- *
- * x86_vendor() gets vendor information directly through cpuid.
- */
-static int x86_vendor(void)
-{
-	u32 eax = 0x00000000;
-	u32 ebx, ecx = 0, edx;
-
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-
-	if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx))
-		return X86_VENDOR_INTEL;
-
-	if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx))
-		return X86_VENDOR_AMD;
-
-	return X86_VENDOR_UNKNOWN;
-}
-
-static int x86_family(void)
-{
-	u32 eax = 0x00000001;
-	u32 ebx, ecx = 0, edx;
-	int x86;
-
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-
-	x86 = (eax >> 8) & 0xf;
-	if (x86 == 15)
-		x86 += (eax >> 20) & 0xff;
-
-	return x86;
-}
-
 static bool __init check_loader_disabled_bsp(void)
 {
 #ifdef CONFIG_X86_32
@@ -96,7 +45,7 @@ static bool __init check_loader_disabled_bsp(void)
 
 void __init load_ucode_bsp(void)
 {
-	int vendor, x86;
+	int vendor, family;
 
 	if (check_loader_disabled_bsp())
 		return;
@@ -105,15 +54,15 @@ void __init load_ucode_bsp(void)
 		return;
 
 	vendor = x86_vendor();
-	x86 = x86_family();
+	family = x86_family();
 
 	switch (vendor) {
 	case X86_VENDOR_INTEL:
-		if (x86 >= 6)
+		if (family >= 6)
 			load_ucode_intel_bsp();
 		break;
 	case X86_VENDOR_AMD:
-		if (x86 >= 0x10)
+		if (family >= 0x10)
 			load_ucode_amd_bsp();
 		break;
 	default:
@@ -132,7 +81,7 @@ static bool check_loader_disabled_ap(void)
 
 void load_ucode_ap(void)
 {
-	int vendor, x86;
+	int vendor, family;
 
 	if (check_loader_disabled_ap())
 		return;
@@ -141,15 +90,15 @@ void load_ucode_ap(void)
 		return;
 
 	vendor = x86_vendor();
-	x86 = x86_family();
+	family = x86_family();
 
 	switch (vendor) {
 	case X86_VENDOR_INTEL:
-		if (x86 >= 6)
+		if (family >= 6)
 			load_ucode_intel_ap();
 		break;
 	case X86_VENDOR_AMD:
-		if (x86 >= 0x10)
+		if (family >= 0x10)
 			load_ucode_amd_ap();
 		break;
 	default:
@@ -179,18 +128,18 @@ int __init save_microcode_in_initrd(void)
 
 void reload_early_microcode(void)
 {
-	int vendor, x86;
+	int vendor, family;
 
 	vendor = x86_vendor();
-	x86 = x86_family();
+	family = x86_family();
 
 	switch (vendor) {
 	case X86_VENDOR_INTEL:
-		if (x86 >= 6)
+		if (family >= 6)
 			reload_ucode_intel();
 		break;
 	case X86_VENDOR_AMD:
-		if (x86 >= 0x10)
+		if (family >= 0x10)
 			reload_ucode_amd();
 		break;
 	default:
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 746e7fd08aad..a41beadb3db9 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -124,7 +124,7 @@ static int get_matching_mc(struct microcode_intel *mc_intel, int cpu)
 	cpf = cpu_sig.pf;
 	crev = cpu_sig.rev;
 
-	return get_matching_microcode(csig, cpf, mc_intel, crev);
+	return get_matching_microcode(csig, cpf, crev, mc_intel);
 }
 
 static int apply_microcode_intel(int cpu)
@@ -226,7 +226,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 
 		csig = uci->cpu_sig.sig;
 		cpf = uci->cpu_sig.pf;
-		if (get_matching_microcode(csig, cpf, mc, new_rev)) {
+		if (get_matching_microcode(csig, cpf, new_rev, mc)) {
 			vfree(new_mc);
 			new_rev = mc_header.rev;
 			new_mc  = mc;
diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c
index 420eb933189c..2f49ab4ac0ae 100644
--- a/arch/x86/kernel/cpu/microcode/intel_early.c
+++ b/arch/x86/kernel/cpu/microcode/intel_early.c
@@ -16,6 +16,14 @@
  *	as published by the Free Software Foundation; either version
  *	2 of the License, or (at your option) any later version.
  */
+
+/*
+ * This needs to be before all headers so that pr_debug in printk.h doesn't turn
+ * printk calls into no_printk().
+ *
+ *#define DEBUG
+ */
+
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
@@ -28,6 +36,9 @@
 #include <asm/tlbflush.h>
 #include <asm/setup.h>
 
+#undef pr_fmt
+#define pr_fmt(fmt)	"microcode: " fmt
+
 static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT];
 static struct mc_saved_data {
 	unsigned int mc_saved_count;
@@ -35,50 +46,45 @@ static struct mc_saved_data {
 } mc_saved_data;
 
 static enum ucode_state
-generic_load_microcode_early(struct microcode_intel **mc_saved_p,
-			     unsigned int mc_saved_count,
-			     struct ucode_cpu_info *uci)
+load_microcode_early(struct microcode_intel **saved,
+		     unsigned int num_saved, struct ucode_cpu_info *uci)
 {
 	struct microcode_intel *ucode_ptr, *new_mc = NULL;
-	int new_rev = uci->cpu_sig.rev;
-	enum ucode_state state = UCODE_OK;
-	unsigned int mc_size;
-	struct microcode_header_intel *mc_header;
-	unsigned int csig = uci->cpu_sig.sig;
-	unsigned int cpf = uci->cpu_sig.pf;
-	int i;
+	struct microcode_header_intel *mc_hdr;
+	int new_rev, ret, i;
 
-	for (i = 0; i < mc_saved_count; i++) {
-		ucode_ptr = mc_saved_p[i];
+	new_rev = uci->cpu_sig.rev;
 
-		mc_header = (struct microcode_header_intel *)ucode_ptr;
-		mc_size = get_totalsize(mc_header);
-		if (get_matching_microcode(csig, cpf, ucode_ptr, new_rev)) {
-			new_rev = mc_header->rev;
-			new_mc  = ucode_ptr;
-		}
-	}
+	for (i = 0; i < num_saved; i++) {
+		ucode_ptr = saved[i];
+		mc_hdr	  = (struct microcode_header_intel *)ucode_ptr;
 
-	if (!new_mc) {
-		state = UCODE_NFOUND;
-		goto out;
+		ret = get_matching_microcode(uci->cpu_sig.sig,
+					     uci->cpu_sig.pf,
+					     new_rev,
+					     ucode_ptr);
+		if (!ret)
+			continue;
+
+		new_rev = mc_hdr->rev;
+		new_mc  = ucode_ptr;
 	}
 
+	if (!new_mc)
+		return UCODE_NFOUND;
+
 	uci->mc = (struct microcode_intel *)new_mc;
-out:
-	return state;
+	return UCODE_OK;
 }
 
-static void
-microcode_pointer(struct microcode_intel **mc_saved,
-		  unsigned long *mc_saved_in_initrd,
-		  unsigned long initrd_start, int mc_saved_count)
+static inline void
+copy_initrd_ptrs(struct microcode_intel **mc_saved, unsigned long *initrd,
+		  unsigned long off, int num_saved)
 {
 	int i;
 
-	for (i = 0; i < mc_saved_count; i++)
-		mc_saved[i] = (struct microcode_intel *)
-			      (mc_saved_in_initrd[i] + initrd_start);
+	for (i = 0; i < num_saved; i++)
+		mc_saved[i] = (struct microcode_intel *)(initrd[i] + off);
 }
 
 #ifdef CONFIG_X86_32
@@ -102,55 +108,27 @@ microcode_phys(struct microcode_intel **mc_saved_tmp,
 #endif
 
 static enum ucode_state
-load_microcode(struct mc_saved_data *mc_saved_data,
-	       unsigned long *mc_saved_in_initrd,
-	       unsigned long initrd_start,
-	       struct ucode_cpu_info *uci)
+load_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
+	       unsigned long initrd_start, struct ucode_cpu_info *uci)
 {
 	struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
 	unsigned int count = mc_saved_data->mc_saved_count;
 
 	if (!mc_saved_data->mc_saved) {
-		microcode_pointer(mc_saved_tmp, mc_saved_in_initrd,
-				  initrd_start, count);
+		copy_initrd_ptrs(mc_saved_tmp, initrd, initrd_start, count);
 
-		return generic_load_microcode_early(mc_saved_tmp, count, uci);
+		return load_microcode_early(mc_saved_tmp, count, uci);
 	} else {
 #ifdef CONFIG_X86_32
 		microcode_phys(mc_saved_tmp, mc_saved_data);
-		return generic_load_microcode_early(mc_saved_tmp, count, uci);
+		return load_microcode_early(mc_saved_tmp, count, uci);
 #else
-		return generic_load_microcode_early(mc_saved_data->mc_saved,
+		return load_microcode_early(mc_saved_data->mc_saved,
 						    count, uci);
 #endif
 	}
 }
 
-static u8 get_x86_family(unsigned long sig)
-{
-	u8 x86;
-
-	x86 = (sig >> 8) & 0xf;
-
-	if (x86 == 0xf)
-		x86 += (sig >> 20) & 0xff;
-
-	return x86;
-}
-
-static u8 get_x86_model(unsigned long sig)
-{
-	u8 x86, x86_model;
-
-	x86 = get_x86_family(sig);
-	x86_model = (sig >> 4) & 0xf;
-
-	if (x86 == 0x6 || x86 == 0xf)
-		x86_model += ((sig >> 16) & 0xf) << 4;
-
-	return x86_model;
-}
-
 /*
  * Given CPU signature and a microcode patch, this function finds if the
  * microcode patch has matching family and model with the CPU.
@@ -159,42 +137,40 @@ static enum ucode_state
 matching_model_microcode(struct microcode_header_intel *mc_header,
 			unsigned long sig)
 {
-	u8 x86, x86_model;
-	u8 x86_ucode, x86_model_ucode;
+	unsigned int fam, model;
+	unsigned int fam_ucode, model_ucode;
 	struct extended_sigtable *ext_header;
 	unsigned long total_size = get_totalsize(mc_header);
 	unsigned long data_size = get_datasize(mc_header);
 	int ext_sigcount, i;
 	struct extended_signature *ext_sig;
 
-	x86 = get_x86_family(sig);
-	x86_model = get_x86_model(sig);
+	fam   = __x86_family(sig);
+	model = x86_model(sig);
 
-	x86_ucode = get_x86_family(mc_header->sig);
-	x86_model_ucode = get_x86_model(mc_header->sig);
+	fam_ucode   = __x86_family(mc_header->sig);
+	model_ucode = x86_model(mc_header->sig);
 
-	if (x86 == x86_ucode && x86_model == x86_model_ucode)
+	if (fam == fam_ucode && model == model_ucode)
 		return UCODE_OK;
 
 	/* Look for ext. headers: */
 	if (total_size <= data_size + MC_HEADER_SIZE)
 		return UCODE_NFOUND;
 
-	ext_header = (struct extended_sigtable *)
-		     mc_header + data_size + MC_HEADER_SIZE;
+	ext_header   = (void *) mc_header + data_size + MC_HEADER_SIZE;
+	ext_sig      = (void *)ext_header + EXT_HEADER_SIZE;
 	ext_sigcount = ext_header->count;
-	ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
 
 	for (i = 0; i < ext_sigcount; i++) {
-		x86_ucode = get_x86_family(ext_sig->sig);
-		x86_model_ucode = get_x86_model(ext_sig->sig);
+		fam_ucode   = __x86_family(ext_sig->sig);
+		model_ucode = x86_model(ext_sig->sig);
 
-		if (x86 == x86_ucode && x86_model == x86_model_ucode)
+		if (fam == fam_ucode && model == model_ucode)
 			return UCODE_OK;
 
 		ext_sig++;
 	}
-
 	return UCODE_NFOUND;
 }
 
@@ -204,7 +180,7 @@ save_microcode(struct mc_saved_data *mc_saved_data,
 	       unsigned int mc_saved_count)
 {
 	int i, j;
-	struct microcode_intel **mc_saved_p;
+	struct microcode_intel **saved_ptr;
 	int ret;
 
 	if (!mc_saved_count)
@@ -213,39 +189,45 @@ save_microcode(struct mc_saved_data *mc_saved_data,
 	/*
 	 * Copy new microcode data.
 	 */
-	mc_saved_p = kmalloc(mc_saved_count*sizeof(struct microcode_intel *),
-			     GFP_KERNEL);
-	if (!mc_saved_p)
+	saved_ptr = kcalloc(mc_saved_count, sizeof(struct microcode_intel *), GFP_KERNEL);
+	if (!saved_ptr)
 		return -ENOMEM;
 
 	for (i = 0; i < mc_saved_count; i++) {
-		struct microcode_intel *mc = mc_saved_src[i];
-		struct microcode_header_intel *mc_header = &mc->hdr;
-		unsigned long mc_size = get_totalsize(mc_header);
-		mc_saved_p[i] = kmalloc(mc_size, GFP_KERNEL);
-		if (!mc_saved_p[i]) {
-			ret = -ENOMEM;
-			goto err;
-		}
+		struct microcode_header_intel *mc_hdr;
+		struct microcode_intel *mc;
+		unsigned long size;
+
 		if (!mc_saved_src[i]) {
 			ret = -EINVAL;
 			goto err;
 		}
-		memcpy(mc_saved_p[i], mc, mc_size);
+
+		mc     = mc_saved_src[i];
+		mc_hdr = &mc->hdr;
+		size   = get_totalsize(mc_hdr);
+
+		saved_ptr[i] = kmalloc(size, GFP_KERNEL);
+		if (!saved_ptr[i]) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		memcpy(saved_ptr[i], mc, size);
 	}
 
 	/*
 	 * Point to newly saved microcode.
 	 */
-	mc_saved_data->mc_saved = mc_saved_p;
+	mc_saved_data->mc_saved = saved_ptr;
 	mc_saved_data->mc_saved_count = mc_saved_count;
 
 	return 0;
 
 err:
 	for (j = 0; j <= i; j++)
-		kfree(mc_saved_p[j]);
-	kfree(mc_saved_p);
+		kfree(saved_ptr[j]);
+	kfree(saved_ptr);
 
 	return ret;
 }
@@ -257,48 +239,45 @@ err:
  * - or if it is a newly discovered microcode patch.
  *
  * The microcode patch should have matching model with CPU.
+ *
+ * Returns: The updated number @num_saved of saved microcode patches.
  */
-static void _save_mc(struct microcode_intel **mc_saved, u8 *ucode_ptr,
-		     unsigned int *mc_saved_count_p)
+static unsigned int _save_mc(struct microcode_intel **mc_saved,
+			     u8 *ucode_ptr, unsigned int num_saved)
 {
-	int i;
-	int found = 0;
-	unsigned int mc_saved_count = *mc_saved_count_p;
-	struct microcode_header_intel *mc_header;
+	struct microcode_header_intel *mc_hdr, *mc_saved_hdr;
+	unsigned int sig, pf, new_rev;
+	int found = 0, i;
+
+	mc_hdr = (struct microcode_header_intel *)ucode_ptr;
+
+	for (i = 0; i < num_saved; i++) {
+		mc_saved_hdr = (struct microcode_header_intel *)mc_saved[i];
+		sig	     = mc_saved_hdr->sig;
+		pf	     = mc_saved_hdr->pf;
+		new_rev	     = mc_hdr->rev;
+
+		if (!get_matching_sig(sig, pf, new_rev, ucode_ptr))
+			continue;
+
+		found = 1;
+
+		if (!revision_is_newer(mc_hdr, new_rev))
+			continue;
 
-	mc_header = (struct microcode_header_intel *)ucode_ptr;
-	for (i = 0; i < mc_saved_count; i++) {
-		unsigned int sig, pf;
-		unsigned int new_rev;
-		struct microcode_header_intel *mc_saved_header =
-			     (struct microcode_header_intel *)mc_saved[i];
-		sig = mc_saved_header->sig;
-		pf = mc_saved_header->pf;
-		new_rev = mc_header->rev;
-
-		if (get_matching_sig(sig, pf, ucode_ptr, new_rev)) {
-			found = 1;
-			if (update_match_revision(mc_header, new_rev)) {
-				/*
-				 * Found an older ucode saved before.
-				 * Replace the older one with this newer
-				 * one.
-				 */
-				mc_saved[i] =
-					(struct microcode_intel *)ucode_ptr;
-				break;
-			}
-		}
-	}
-	if (i >= mc_saved_count && !found)
 		/*
-		 * This ucode is first time discovered in ucode file.
-		 * Save it to memory.
+		 * Found an older ucode saved earlier. Replace it with
+		 * this newer one.
 		 */
-		mc_saved[mc_saved_count++] =
-				 (struct microcode_intel *)ucode_ptr;
+		mc_saved[i] = (struct microcode_intel *)ucode_ptr;
+		break;
+	}
+
+	/* Newly detected microcode, save it to memory. */
+	if (i >= num_saved && !found)
+		mc_saved[num_saved++] = (struct microcode_intel *)ucode_ptr;
 
-	*mc_saved_count_p = mc_saved_count;
+	return num_saved;
 }
 
 /*
@@ -346,7 +325,7 @@ get_matching_model_microcode(int cpu, unsigned long start,
 			continue;
 		}
 
-		_save_mc(mc_saved_tmp, ucode_ptr, &mc_saved_count);
+		mc_saved_count = _save_mc(mc_saved_tmp, ucode_ptr, mc_saved_count);
 
 		ucode_ptr += mc_size;
 	}
@@ -372,7 +351,7 @@ out:
 static int collect_cpu_info_early(struct ucode_cpu_info *uci)
 {
 	unsigned int val[2];
-	u8 x86, x86_model;
+	unsigned int family, model;
 	struct cpu_signature csig;
 	unsigned int eax, ebx, ecx, edx;
 
@@ -387,10 +366,10 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci)
 	native_cpuid(&eax, &ebx, &ecx, &edx);
 	csig.sig = eax;
 
-	x86 = get_x86_family(csig.sig);
-	x86_model = get_x86_model(csig.sig);
+	family = __x86_family(csig.sig);
+	model  = x86_model(csig.sig);
 
-	if ((x86_model >= 5) || (x86 > 6)) {
+	if ((model >= 5) || (family > 6)) {
 		/* get processor flags from MSR 0x17 */
 		native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
 		csig.pf = 1 << ((val[1] >> 18) & 7);
@@ -429,8 +408,7 @@ static void __ref show_saved_mc(void)
 	sig = uci.cpu_sig.sig;
 	pf = uci.cpu_sig.pf;
 	rev = uci.cpu_sig.rev;
-	pr_debug("CPU%d: sig=0x%x, pf=0x%x, rev=0x%x\n",
-		 smp_processor_id(), sig, pf, rev);
+	pr_debug("CPU: sig=0x%x, pf=0x%x, rev=0x%x\n", sig, pf, rev);
 
 	for (i = 0; i < mc_saved_data.mc_saved_count; i++) {
 		struct microcode_header_intel *mc_saved_header;
@@ -457,8 +435,7 @@ static void __ref show_saved_mc(void)
 		if (total_size <= data_size + MC_HEADER_SIZE)
 			continue;
 
-		ext_header = (struct extended_sigtable *)
-			     mc_saved_header + data_size + MC_HEADER_SIZE;
+		ext_header = (void *) mc_saved_header + data_size + MC_HEADER_SIZE;
 		ext_sigcount = ext_header->count;
 		ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
 
@@ -515,8 +492,7 @@ int save_mc_for_early(u8 *mc)
 	 * Save the microcode patch mc in mc_save_tmp structure if it's a newer
 	 * version.
 	 */
-
-	_save_mc(mc_saved_tmp, mc, &mc_saved_count);
+	mc_saved_count = _save_mc(mc_saved_tmp, mc, mc_saved_count);
 
 	/*
 	 * Save the mc_save_tmp in global mc_saved_data.
@@ -548,12 +524,10 @@ EXPORT_SYMBOL_GPL(save_mc_for_early);
 
 static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin";
 static __init enum ucode_state
-scan_microcode(unsigned long start, unsigned long end,
-		struct mc_saved_data *mc_saved_data,
-		unsigned long *mc_saved_in_initrd,
-		struct ucode_cpu_info *uci)
+scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
+	       unsigned long start, unsigned long size,
+	       struct ucode_cpu_info *uci)
 {
-	unsigned int size = end - start + 1;
 	struct cpio_data cd;
 	long offset = 0;
 #ifdef CONFIG_X86_32
@@ -569,10 +543,8 @@ scan_microcode(unsigned long start, unsigned long end,
 	if (!cd.data)
 		return UCODE_ERROR;
 
-
 	return get_matching_model_microcode(0, start, cd.data, cd.size,
-					    mc_saved_data, mc_saved_in_initrd,
-					    uci);
+					    mc_saved_data, initrd, uci);
 }
 
 /*
@@ -704,7 +676,7 @@ int __init save_microcode_in_initrd_intel(void)
 	if (count == 0)
 		return ret;
 
-	microcode_pointer(mc_saved, mc_saved_in_initrd, initrd_start, count);
+	copy_initrd_ptrs(mc_saved, mc_saved_in_initrd, initrd_start, count);
 	ret = save_microcode(&mc_saved_data, mc_saved, count);
 	if (ret)
 		pr_err("Cannot save microcode patches from initrd.\n");
@@ -716,52 +688,44 @@ int __init save_microcode_in_initrd_intel(void)
 
 static void __init
 _load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data,
-		      unsigned long *mc_saved_in_initrd,
-		      unsigned long initrd_start_early,
-		      unsigned long initrd_end_early,
-		      struct ucode_cpu_info *uci)
+		      unsigned long *initrd,
+		      unsigned long start, unsigned long size)
 {
+	struct ucode_cpu_info uci;
 	enum ucode_state ret;
 
-	collect_cpu_info_early(uci);
-	scan_microcode(initrd_start_early, initrd_end_early, mc_saved_data,
-		       mc_saved_in_initrd, uci);
+	collect_cpu_info_early(&uci);
 
-	ret = load_microcode(mc_saved_data, mc_saved_in_initrd,
-			     initrd_start_early, uci);
+	ret = scan_microcode(mc_saved_data, initrd, start, size, &uci);
+	if (ret != UCODE_OK)
+		return;
 
-	if (ret == UCODE_OK)
-		apply_microcode_early(uci, true);
+	ret = load_microcode(mc_saved_data, initrd, start, &uci);
+	if (ret != UCODE_OK)
+		return;
+
+	apply_microcode_early(&uci, true);
 }
 
-void __init
-load_ucode_intel_bsp(void)
+void __init load_ucode_intel_bsp(void)
 {
-	u64 ramdisk_image, ramdisk_size;
-	unsigned long initrd_start_early, initrd_end_early;
-	struct ucode_cpu_info uci;
+	u64 start, size;
 #ifdef CONFIG_X86_32
-	struct boot_params *boot_params_p;
+	struct boot_params *p;
 
-	boot_params_p = (struct boot_params *)__pa_nodebug(&boot_params);
-	ramdisk_image = boot_params_p->hdr.ramdisk_image;
-	ramdisk_size  = boot_params_p->hdr.ramdisk_size;
-	initrd_start_early = ramdisk_image;
-	initrd_end_early = initrd_start_early + ramdisk_size;
+	p	= (struct boot_params *)__pa_nodebug(&boot_params);
+	start	= p->hdr.ramdisk_image;
+	size	= p->hdr.ramdisk_size;
 
 	_load_ucode_intel_bsp(
-		(struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
-		(unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
-		initrd_start_early, initrd_end_early, &uci);
+			(struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
+			(unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
+			start, size);
 #else
-	ramdisk_image = boot_params.hdr.ramdisk_image;
-	ramdisk_size  = boot_params.hdr.ramdisk_size;
-	initrd_start_early = ramdisk_image + PAGE_OFFSET;
-	initrd_end_early = initrd_start_early + ramdisk_size;
-
-	_load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd,
-			      initrd_start_early, initrd_end_early,
-			      &uci);
+	start	= boot_params.hdr.ramdisk_image + PAGE_OFFSET;
+	size	= boot_params.hdr.ramdisk_size;
+
+	_load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, start, size);
 #endif
 }
 
@@ -771,6 +735,7 @@ void load_ucode_intel_ap(void)
 	struct ucode_cpu_info uci;
 	unsigned long *mc_saved_in_initrd_p;
 	unsigned long initrd_start_addr;
+	enum ucode_state ret;
 #ifdef CONFIG_X86_32
 	unsigned long *initrd_start_p;
 
@@ -793,8 +758,12 @@ void load_ucode_intel_ap(void)
 		return;
 
 	collect_cpu_info_early(&uci);
-	load_microcode(mc_saved_data_p, mc_saved_in_initrd_p,
-		       initrd_start_addr, &uci);
+	ret = load_microcode(mc_saved_data_p, mc_saved_in_initrd_p,
+			     initrd_start_addr, &uci);
+
+	if (ret != UCODE_OK)
+		return;
+
 	apply_microcode_early(&uci, true);
 }
 
@@ -808,8 +777,8 @@ void reload_ucode_intel(void)
 
 	collect_cpu_info_early(&uci);
 
-	ret = generic_load_microcode_early(mc_saved_data.mc_saved,
-					   mc_saved_data.mc_saved_count, &uci);
+	ret = load_microcode_early(mc_saved_data.mc_saved,
+				   mc_saved_data.mc_saved_count, &uci);
 	if (ret != UCODE_OK)
 		return;
 
diff --git a/arch/x86/kernel/cpu/microcode/intel_lib.c b/arch/x86/kernel/cpu/microcode/intel_lib.c
index ce69320d0179..cd47a510a3f1 100644
--- a/arch/x86/kernel/cpu/microcode/intel_lib.c
+++ b/arch/x86/kernel/cpu/microcode/intel_lib.c
@@ -38,12 +38,6 @@ update_match_cpu(unsigned int csig, unsigned int cpf,
 	return (!sigmatch(sig, csig, pf, cpf)) ? 0 : 1;
 }
 
-int
-update_match_revision(struct microcode_header_intel *mc_header, int rev)
-{
-	return (mc_header->rev <= rev) ? 0 : 1;
-}
-
 int microcode_sanity_check(void *mc, int print_err)
 {
 	unsigned long total_size, data_size, ext_table_size;
@@ -128,10 +122,9 @@ int microcode_sanity_check(void *mc, int print_err)
 EXPORT_SYMBOL_GPL(microcode_sanity_check);
 
 /*
- * return 0 - no update found
- * return 1 - found update
+ * Returns 1 if update has been found, 0 otherwise.
  */
-int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev)
+int get_matching_sig(unsigned int csig, int cpf, int rev, void *mc)
 {
 	struct microcode_header_intel *mc_header = mc;
 	struct extended_sigtable *ext_header;
@@ -159,16 +152,15 @@ int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev)
 }
 
 /*
- * return 0 - no update found
- * return 1 - found update
+ * Returns 1 if update has been found, 0 otherwise.
  */
-int get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev)
+int get_matching_microcode(unsigned int csig, int cpf, int rev, void *mc)
 {
-	struct microcode_header_intel *mc_header = mc;
+	struct microcode_header_intel *mc_hdr = mc;
 
-	if (!update_match_revision(mc_header, rev))
+	if (!revision_is_newer(mc_hdr, rev))
 		return 0;
 
-	return get_matching_sig(csig, cpf, mc, rev);
+	return get_matching_sig(csig, cpf, rev, mc);
 }
 EXPORT_SYMBOL_GPL(get_matching_microcode);
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh
index 36d99a337b49..3f20710a5b23 100644
--- a/arch/x86/kernel/cpu/mkcapflags.sh
+++ b/arch/x86/kernel/cpu/mkcapflags.sh
@@ -6,7 +6,7 @@
 IN=$1
 OUT=$2
 
-function dump_array()
+dump_array()
 {
 	ARRAY=$1
 	SIZE=$2
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index a041e094b8b9..d76f13d6d8d6 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -404,11 +404,10 @@ static const struct file_operations mtrr_fops = {
 static int mtrr_seq_show(struct seq_file *seq, void *offset)
 {
 	char factor;
-	int i, max, len;
+	int i, max;
 	mtrr_type type;
 	unsigned long base, size;
 
-	len = 0;
 	max = num_var_ranges;
 	for (i = 0; i < max; i++) {
 		mtrr_if->get(i, &base, &size, &type);
@@ -425,11 +424,10 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
 			size >>= 20 - PAGE_SHIFT;
 		}
 		/* Base can be > 32bit */
-		len += seq_printf(seq, "reg%02i: base=0x%06lx000 "
-			"(%5luMB), size=%5lu%cB, count=%d: %s\n",
-			i, base, base >> (20 - PAGE_SHIFT), size,
-			factor, mtrr_usage_table[i],
-			mtrr_attrib_to_str(type));
+		seq_printf(seq, "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n",
+			   i, base, base >> (20 - PAGE_SHIFT),
+			   size, factor,
+			   mtrr_usage_table[i], mtrr_attrib_to_str(type));
 	}
 	return 0;
 }
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index b71a7f86d68a..87848ebe2bb7 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -263,6 +263,14 @@ static void hw_perf_event_destroy(struct perf_event *event)
 	}
 }
 
+void hw_perf_lbr_event_destroy(struct perf_event *event)
+{
+	hw_perf_event_destroy(event);
+
+	/* undo the lbr/bts event accounting */
+	x86_del_exclusive(x86_lbr_exclusive_lbr);
+}
+
 static inline int x86_pmu_initialized(void)
 {
 	return x86_pmu.handle_irq != NULL;
@@ -302,6 +310,35 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
 	return x86_pmu_extra_regs(val, event);
 }
 
+/*
+ * Check if we can create event of a certain type (that no conflicting events
+ * are present).
+ */
+int x86_add_exclusive(unsigned int what)
+{
+	int ret = -EBUSY, i;
+
+	if (atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what]))
+		return 0;
+
+	mutex_lock(&pmc_reserve_mutex);
+	for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++)
+		if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
+			goto out;
+
+	atomic_inc(&x86_pmu.lbr_exclusive[what]);
+	ret = 0;
+
+out:
+	mutex_unlock(&pmc_reserve_mutex);
+	return ret;
+}
+
+void x86_del_exclusive(unsigned int what)
+{
+	atomic_dec(&x86_pmu.lbr_exclusive[what]);
+}
+
 int x86_setup_perfctr(struct perf_event *event)
 {
 	struct perf_event_attr *attr = &event->attr;
@@ -346,6 +383,12 @@ int x86_setup_perfctr(struct perf_event *event)
 		/* BTS is currently only allowed for user-mode. */
 		if (!attr->exclude_kernel)
 			return -EOPNOTSUPP;
+
+		/* disallow bts if conflicting events are present */
+		if (x86_add_exclusive(x86_lbr_exclusive_lbr))
+			return -EBUSY;
+
+		event->destroy = hw_perf_lbr_event_destroy;
 	}
 
 	hwc->config |= config;
@@ -399,39 +442,41 @@ int x86_pmu_hw_config(struct perf_event *event)
 
 		if (event->attr.precise_ip > precise)
 			return -EOPNOTSUPP;
-		/*
-		 * check that PEBS LBR correction does not conflict with
-		 * whatever the user is asking with attr->branch_sample_type
-		 */
-		if (event->attr.precise_ip > 1 &&
-		    x86_pmu.intel_cap.pebs_format < 2) {
-			u64 *br_type = &event->attr.branch_sample_type;
-
-			if (has_branch_stack(event)) {
-				if (!precise_br_compat(event))
-					return -EOPNOTSUPP;
-
-				/* branch_sample_type is compatible */
-
-			} else {
-				/*
-				 * user did not specify  branch_sample_type
-				 *
-				 * For PEBS fixups, we capture all
-				 * the branches at the priv level of the
-				 * event.
-				 */
-				*br_type = PERF_SAMPLE_BRANCH_ANY;
-
-				if (!event->attr.exclude_user)
-					*br_type |= PERF_SAMPLE_BRANCH_USER;
-
-				if (!event->attr.exclude_kernel)
-					*br_type |= PERF_SAMPLE_BRANCH_KERNEL;
-			}
+	}
+	/*
+	 * check that PEBS LBR correction does not conflict with
+	 * whatever the user is asking with attr->branch_sample_type
+	 */
+	if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format < 2) {
+		u64 *br_type = &event->attr.branch_sample_type;
+
+		if (has_branch_stack(event)) {
+			if (!precise_br_compat(event))
+				return -EOPNOTSUPP;
+
+			/* branch_sample_type is compatible */
+
+		} else {
+			/*
+			 * user did not specify  branch_sample_type
+			 *
+			 * For PEBS fixups, we capture all
+			 * the branches at the priv level of the
+			 * event.
+			 */
+			*br_type = PERF_SAMPLE_BRANCH_ANY;
+
+			if (!event->attr.exclude_user)
+				*br_type |= PERF_SAMPLE_BRANCH_USER;
+
+			if (!event->attr.exclude_kernel)
+				*br_type |= PERF_SAMPLE_BRANCH_KERNEL;
 		}
 	}
 
+	if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK)
+		event->attach_state |= PERF_ATTACH_TASK_DATA;
+
 	/*
 	 * Generate PMC IRQs:
 	 * (keep 'enabled' bit clear for now)
@@ -449,6 +494,12 @@ int x86_pmu_hw_config(struct perf_event *event)
 	if (event->attr.type == PERF_TYPE_RAW)
 		event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
 
+	if (event->attr.sample_period && x86_pmu.limit_period) {
+		if (x86_pmu.limit_period(event, event->attr.sample_period) >
+				event->attr.sample_period)
+			return -EINVAL;
+	}
+
 	return x86_setup_perfctr(event);
 }
 
@@ -728,14 +779,17 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 	struct event_constraint *c;
 	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	struct perf_event *e;
-	int i, wmin, wmax, num = 0;
+	int i, wmin, wmax, unsched = 0;
 	struct hw_perf_event *hwc;
 
 	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
 
+	if (x86_pmu.start_scheduling)
+		x86_pmu.start_scheduling(cpuc);
+
 	for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
 		hwc = &cpuc->event_list[i]->hw;
-		c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
+		c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
 		hwc->constraint = c;
 
 		wmin = min(wmin, c->weight);
@@ -768,24 +822,30 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 
 	/* slow path */
 	if (i != n)
-		num = perf_assign_events(cpuc->event_list, n, wmin,
-					 wmax, assign);
+		unsched = perf_assign_events(cpuc->event_list, n, wmin,
+					     wmax, assign);
 
 	/*
-	 * Mark the event as committed, so we do not put_constraint()
-	 * in case new events are added and fail scheduling.
+	 * In case of success (unsched = 0), mark events as committed,
+	 * so we do not put_constraint() in case new events are added
+	 * and fail to be scheduled
+	 *
+	 * We invoke the lower level commit callback to lock the resource
+	 *
+	 * We do not need to do all of this in case we are called to
+	 * validate an event group (assign == NULL)
 	 */
-	if (!num && assign) {
+	if (!unsched && assign) {
 		for (i = 0; i < n; i++) {
 			e = cpuc->event_list[i];
 			e->hw.flags |= PERF_X86_EVENT_COMMITTED;
+			if (x86_pmu.commit_scheduling)
+				x86_pmu.commit_scheduling(cpuc, e, assign[i]);
 		}
 	}
-	/*
-	 * scheduling failed or is just a simulation,
-	 * free resources if necessary
-	 */
-	if (!assign || num) {
+
+	if (!assign || unsched) {
+
 		for (i = 0; i < n; i++) {
 			e = cpuc->event_list[i];
 			/*
@@ -795,11 +855,18 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 			if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
 				continue;
 
+			/*
+			 * release events that failed scheduling
+			 */
 			if (x86_pmu.put_event_constraints)
 				x86_pmu.put_event_constraints(cpuc, e);
 		}
 	}
-	return num ? -EINVAL : 0;
+
+	if (x86_pmu.stop_scheduling)
+		x86_pmu.stop_scheduling(cpuc);
+
+	return unsched ? -EINVAL : 0;
 }
 
 /*
@@ -986,6 +1053,9 @@ int x86_perf_event_set_period(struct perf_event *event)
 	if (left > x86_pmu.max_period)
 		left = x86_pmu.max_period;
 
+	if (x86_pmu.limit_period)
+		left = x86_pmu.limit_period(event, left);
+
 	per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
 
 	/*
@@ -1033,7 +1103,6 @@ static int x86_pmu_add(struct perf_event *event, int flags)
 
 	hwc = &event->hw;
 
-	perf_pmu_disable(event->pmu);
 	n0 = cpuc->n_events;
 	ret = n = collect_events(cpuc, event, false);
 	if (ret < 0)
@@ -1071,7 +1140,6 @@ done_collect:
 
 	ret = 0;
 out:
-	perf_pmu_enable(event->pmu);
 	return ret;
 }
 
@@ -1103,7 +1171,7 @@ static void x86_pmu_start(struct perf_event *event, int flags)
 void perf_event_print_debug(void)
 {
 	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
-	u64 pebs;
+	u64 pebs, debugctl;
 	struct cpu_hw_events *cpuc;
 	unsigned long flags;
 	int cpu, idx;
@@ -1121,14 +1189,20 @@ void perf_event_print_debug(void)
 		rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
 		rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
 		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
-		rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
 
 		pr_info("\n");
 		pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
 		pr_info("CPU#%d: status:     %016llx\n", cpu, status);
 		pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
 		pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
-		pr_info("CPU#%d: pebs:       %016llx\n", cpu, pebs);
+		if (x86_pmu.pebs_constraints) {
+			rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
+			pr_info("CPU#%d: pebs:       %016llx\n", cpu, pebs);
+		}
+		if (x86_pmu.lbr_nr) {
+			rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+			pr_info("CPU#%d: debugctl:   %016llx\n", cpu, debugctl);
+		}
 	}
 	pr_info("CPU#%d: active:     %016llx\n", cpu, *(u64 *)cpuc->active_mask);
 
@@ -1321,11 +1395,12 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (long)hcpu;
 	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-	int ret = NOTIFY_OK;
+	int i, ret = NOTIFY_OK;
 
 	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_UP_PREPARE:
-		cpuc->kfree_on_online = NULL;
+		for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
+			cpuc->kfree_on_online[i] = NULL;
 		if (x86_pmu.cpu_prepare)
 			ret = x86_pmu.cpu_prepare(cpu);
 		break;
@@ -1336,7 +1411,10 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
 		break;
 
 	case CPU_ONLINE:
-		kfree(cpuc->kfree_on_online);
+		for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
+			kfree(cpuc->kfree_on_online[i]);
+			cpuc->kfree_on_online[i] = NULL;
+		}
 		break;
 
 	case CPU_DYING:
@@ -1712,7 +1790,7 @@ static int validate_event(struct perf_event *event)
 	if (IS_ERR(fake_cpuc))
 		return PTR_ERR(fake_cpuc);
 
-	c = x86_pmu.get_event_constraints(fake_cpuc, event);
+	c = x86_pmu.get_event_constraints(fake_cpuc, -1, event);
 
 	if (!c || !c->weight)
 		ret = -EINVAL;
@@ -1914,10 +1992,10 @@ static const struct attribute_group *x86_pmu_attr_groups[] = {
 	NULL,
 };
 
-static void x86_pmu_flush_branch_stack(void)
+static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
 {
-	if (x86_pmu.flush_branch_stack)
-		x86_pmu.flush_branch_stack();
+	if (x86_pmu.sched_task)
+		x86_pmu.sched_task(ctx, sched_in);
 }
 
 void perf_check_microcode(void)
@@ -1949,7 +2027,8 @@ static struct pmu pmu = {
 	.commit_txn		= x86_pmu_commit_txn,
 
 	.event_idx		= x86_pmu_event_idx,
-	.flush_branch_stack	= x86_pmu_flush_branch_stack,
+	.sched_task		= x86_pmu_sched_task,
+	.task_ctx_size          = sizeof(struct x86_perf_task_context),
 };
 
 void arch_perf_update_userpage(struct perf_event *event,
@@ -1968,13 +2047,23 @@ void arch_perf_update_userpage(struct perf_event *event,
 
 	data = cyc2ns_read_begin();
 
+	/*
+	 * Internal timekeeping for enabled/running/stopped times
+	 * is always in the local_clock domain.
+	 */
 	userpg->cap_user_time = 1;
 	userpg->time_mult = data->cyc2ns_mul;
 	userpg->time_shift = data->cyc2ns_shift;
 	userpg->time_offset = data->cyc2ns_offset - now;
 
-	userpg->cap_user_time_zero = 1;
-	userpg->time_zero = data->cyc2ns_offset;
+	/*
+	 * cap_user_time_zero doesn't make sense when we're using a different
+	 * time base for the records.
+	 */
+	if (event->clock == &local_clock) {
+		userpg->cap_user_time_zero = 1;
+		userpg->time_zero = data->cyc2ns_offset;
+	}
 
 	cyc2ns_read_end(data);
 }
@@ -2147,24 +2236,24 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 static unsigned long code_segment_base(struct pt_regs *regs)
 {
 	/*
+	 * For IA32 we look at the GDT/LDT segment base to convert the
+	 * effective IP to a linear address.
+	 */
+
+#ifdef CONFIG_X86_32
+	/*
 	 * If we are in VM86 mode, add the segment offset to convert to a
 	 * linear address.
 	 */
 	if (regs->flags & X86_VM_MASK)
 		return 0x10 * regs->cs;
 
-	/*
-	 * For IA32 we look at the GDT/LDT segment base to convert the
-	 * effective IP to a linear address.
-	 */
-#ifdef CONFIG_X86_32
 	if (user_mode(regs) && regs->cs != __USER_CS)
 		return get_segment_base(regs->cs);
 #else
-	if (test_thread_flag(TIF_IA32)) {
-		if (user_mode(regs) && regs->cs != __USER32_CS)
-			return get_segment_base(regs->cs);
-	}
+	if (user_mode(regs) && !user_64bit_mode(regs) &&
+	    regs->cs != __USER32_CS)
+		return get_segment_base(regs->cs);
 #endif
 	return 0;
 }
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index df525d2be1e8..6ac5cb7a9e14 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -65,13 +65,15 @@ struct event_constraint {
 /*
  * struct hw_perf_event.flags flags
  */
-#define PERF_X86_EVENT_PEBS_LDLAT	0x1 /* ld+ldlat data address sampling */
-#define PERF_X86_EVENT_PEBS_ST		0x2 /* st data address sampling */
-#define PERF_X86_EVENT_PEBS_ST_HSW	0x4 /* haswell style datala, store */
-#define PERF_X86_EVENT_COMMITTED	0x8 /* event passed commit_txn */
-#define PERF_X86_EVENT_PEBS_LD_HSW	0x10 /* haswell style datala, load */
-#define PERF_X86_EVENT_PEBS_NA_HSW	0x20 /* haswell style datala, unknown */
-#define PERF_X86_EVENT_RDPMC_ALLOWED	0x40 /* grant rdpmc permission */
+#define PERF_X86_EVENT_PEBS_LDLAT	0x0001 /* ld+ldlat data address sampling */
+#define PERF_X86_EVENT_PEBS_ST		0x0002 /* st data address sampling */
+#define PERF_X86_EVENT_PEBS_ST_HSW	0x0004 /* haswell style datala, store */
+#define PERF_X86_EVENT_COMMITTED	0x0008 /* event passed commit_txn */
+#define PERF_X86_EVENT_PEBS_LD_HSW	0x0010 /* haswell style datala, load */
+#define PERF_X86_EVENT_PEBS_NA_HSW	0x0020 /* haswell style datala, unknown */
+#define PERF_X86_EVENT_EXCL		0x0040 /* HT exclusivity on counter */
+#define PERF_X86_EVENT_DYNAMIC		0x0080 /* dynamic alloc'd constraint */
+#define PERF_X86_EVENT_RDPMC_ALLOWED	0x0100 /* grant rdpmc permission */
 
 
 struct amd_nb {
@@ -123,8 +125,37 @@ struct intel_shared_regs {
 	unsigned                core_id;	/* per-core: core id */
 };
 
+enum intel_excl_state_type {
+	INTEL_EXCL_UNUSED    = 0, /* counter is unused */
+	INTEL_EXCL_SHARED    = 1, /* counter can be used by both threads */
+	INTEL_EXCL_EXCLUSIVE = 2, /* counter can be used by one thread only */
+};
+
+struct intel_excl_states {
+	enum intel_excl_state_type init_state[X86_PMC_IDX_MAX];
+	enum intel_excl_state_type state[X86_PMC_IDX_MAX];
+	int  num_alloc_cntrs;/* #counters allocated */
+	int  max_alloc_cntrs;/* max #counters allowed */
+	bool sched_started; /* true if scheduling has started */
+};
+
+struct intel_excl_cntrs {
+	raw_spinlock_t	lock;
+
+	struct intel_excl_states states[2];
+
+	int		refcnt;		/* per-core: #HT threads */
+	unsigned	core_id;	/* per-core: core id */
+};
+
 #define MAX_LBR_ENTRIES		16
 
+enum {
+	X86_PERF_KFREE_SHARED = 0,
+	X86_PERF_KFREE_EXCL   = 1,
+	X86_PERF_KFREE_MAX
+};
+
 struct cpu_hw_events {
 	/*
 	 * Generic x86 PMC bits
@@ -179,6 +210,12 @@ struct cpu_hw_events {
 	 * used on Intel NHM/WSM/SNB
 	 */
 	struct intel_shared_regs	*shared_regs;
+	/*
+	 * manage exclusive counter access between hyperthread
+	 */
+	struct event_constraint *constraint_list; /* in enable order */
+	struct intel_excl_cntrs		*excl_cntrs;
+	int excl_thread_id; /* 0 or 1 */
 
 	/*
 	 * AMD specific bits
@@ -187,7 +224,7 @@ struct cpu_hw_events {
 	/* Inverted mask of bits to clear in the perf_ctr ctrl registers */
 	u64				perf_ctr_virt_mask;
 
-	void				*kfree_on_online;
+	void				*kfree_on_online[X86_PERF_KFREE_MAX];
 };
 
 #define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\
@@ -202,6 +239,10 @@ struct cpu_hw_events {
 #define EVENT_CONSTRAINT(c, n, m)	\
 	__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
 
+#define INTEL_EXCLEVT_CONSTRAINT(c, n)	\
+	__EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\
+			   0, PERF_X86_EVENT_EXCL)
+
 /*
  * The overlap flag marks event constraints with overlapping counter
  * masks. This is the case if the counter mask of such an event is not
@@ -259,6 +300,10 @@ struct cpu_hw_events {
 #define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n)	\
 	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
 
+#define INTEL_EXCLUEVT_CONSTRAINT(c, n)	\
+	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+			   HWEIGHT(n), 0, PERF_X86_EVENT_EXCL)
+
 #define INTEL_PLD_CONSTRAINT(c, n)	\
 	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
 			   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
@@ -283,22 +328,40 @@ struct cpu_hw_events {
 
 /* Check flags and event code, and set the HSW load flag */
 #define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(code, n) \
-	__EVENT_CONSTRAINT(code, n, 			\
+	__EVENT_CONSTRAINT(code, n,			\
 			  ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
 			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
 
+#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(code, n) \
+	__EVENT_CONSTRAINT(code, n,			\
+			  ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
+			  HWEIGHT(n), 0, \
+			  PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
+
 /* Check flags and event code/umask, and set the HSW store flag */
 #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(code, n) \
 	__EVENT_CONSTRAINT(code, n, 			\
 			  INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
 			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
 
+#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(code, n) \
+	__EVENT_CONSTRAINT(code, n,			\
+			  INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+			  HWEIGHT(n), 0, \
+			  PERF_X86_EVENT_PEBS_ST_HSW|PERF_X86_EVENT_EXCL)
+
 /* Check flags and event code/umask, and set the HSW load flag */
 #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(code, n) \
 	__EVENT_CONSTRAINT(code, n, 			\
 			  INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
 			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
 
+#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(code, n) \
+	__EVENT_CONSTRAINT(code, n,			\
+			  INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+			  HWEIGHT(n), 0, \
+			  PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
+
 /* Check flags and event code/umask, and set the HSW N/A flag */
 #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \
 	__EVENT_CONSTRAINT(code, n, 			\
@@ -408,6 +471,13 @@ union x86_pmu_config {
 
 #define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value
 
+enum {
+	x86_lbr_exclusive_lbr,
+	x86_lbr_exclusive_bts,
+	x86_lbr_exclusive_pt,
+	x86_lbr_exclusive_max,
+};
+
 /*
  * struct x86_pmu - generic x86 pmu
  */
@@ -443,14 +513,25 @@ struct x86_pmu {
 	u64		max_period;
 	struct event_constraint *
 			(*get_event_constraints)(struct cpu_hw_events *cpuc,
+						 int idx,
 						 struct perf_event *event);
 
 	void		(*put_event_constraints)(struct cpu_hw_events *cpuc,
 						 struct perf_event *event);
+
+	void		(*commit_scheduling)(struct cpu_hw_events *cpuc,
+					     struct perf_event *event,
+					     int cntr);
+
+	void		(*start_scheduling)(struct cpu_hw_events *cpuc);
+
+	void		(*stop_scheduling)(struct cpu_hw_events *cpuc);
+
 	struct event_constraint *event_constraints;
 	struct x86_pmu_quirk *quirks;
 	int		perfctr_second_write;
 	bool		late_ack;
+	unsigned	(*limit_period)(struct perf_event *event, unsigned l);
 
 	/*
 	 * sysfs attrs
@@ -472,7 +553,8 @@ struct x86_pmu {
 	void		(*cpu_dead)(int cpu);
 
 	void		(*check_microcode)(void);
-	void		(*flush_branch_stack)(void);
+	void		(*sched_task)(struct perf_event_context *ctx,
+				      bool sched_in);
 
 	/*
 	 * Intel Arch Perfmon v2+
@@ -504,10 +586,15 @@ struct x86_pmu {
 	bool		lbr_double_abort;	   /* duplicated lbr aborts */
 
 	/*
+	 * Intel PT/LBR/BTS are exclusive
+	 */
+	atomic_t	lbr_exclusive[x86_lbr_exclusive_max];
+
+	/*
 	 * Extra registers for events
 	 */
 	struct extra_reg *extra_regs;
-	unsigned int er_flags;
+	unsigned int flags;
 
 	/*
 	 * Intel host/guest support (KVM)
@@ -515,6 +602,13 @@ struct x86_pmu {
 	struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
 };
 
+struct x86_perf_task_context {
+	u64 lbr_from[MAX_LBR_ENTRIES];
+	u64 lbr_to[MAX_LBR_ENTRIES];
+	int lbr_callstack_users;
+	int lbr_stack_state;
+};
+
 #define x86_add_quirk(func_)						\
 do {									\
 	static struct x86_pmu_quirk __quirk __initdata = {		\
@@ -524,8 +618,13 @@ do {									\
 	x86_pmu.quirks = &__quirk;					\
 } while (0)
 
-#define ERF_NO_HT_SHARING	1
-#define ERF_HAS_RSP_1		2
+/*
+ * x86_pmu flags
+ */
+#define PMU_FL_NO_HT_SHARING	0x1 /* no hyper-threading resource sharing */
+#define PMU_FL_HAS_RSP_1	0x2 /* has 2 equivalent offcore_rsp regs   */
+#define PMU_FL_EXCL_CNTRS	0x4 /* has exclusive counter requirements  */
+#define PMU_FL_EXCL_ENABLED	0x8 /* exclusive counter active */
 
 #define EVENT_VAR(_id)  event_attr_##_id
 #define EVENT_PTR(_id) &event_attr_##_id.attr.attr
@@ -546,6 +645,12 @@ static struct perf_pmu_events_attr event_attr_##v = {			\
 
 extern struct x86_pmu x86_pmu __read_mostly;
 
+static inline bool x86_pmu_has_lbr_callstack(void)
+{
+	return  x86_pmu.lbr_sel_map &&
+		x86_pmu.lbr_sel_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] > 0;
+}
+
 DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
 
 int x86_perf_event_set_period(struct perf_event *event);
@@ -588,6 +693,12 @@ static inline int x86_pmu_rdpmc_index(int index)
 	return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
 }
 
+int x86_add_exclusive(unsigned int what);
+
+void x86_del_exclusive(unsigned int what);
+
+void hw_perf_lbr_event_destroy(struct perf_event *event);
+
 int x86_setup_perfctr(struct perf_event *event);
 
 int x86_pmu_hw_config(struct perf_event *event);
@@ -674,10 +785,34 @@ static inline int amd_pmu_init(void)
 
 #ifdef CONFIG_CPU_SUP_INTEL
 
+static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
+{
+	/* user explicitly requested branch sampling */
+	if (has_branch_stack(event))
+		return true;
+
+	/* implicit branch sampling to correct PEBS skid */
+	if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 &&
+	    x86_pmu.intel_cap.pebs_format < 2)
+		return true;
+
+	return false;
+}
+
+static inline bool intel_pmu_has_bts(struct perf_event *event)
+{
+	if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
+	    !event->attr.freq && event->hw.sample_period == 1)
+		return true;
+
+	return false;
+}
+
 int intel_pmu_save_and_restart(struct perf_event *event);
 
 struct event_constraint *
-x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event);
+x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+			  struct perf_event *event);
 
 struct intel_shared_regs *allocate_shared_regs(int cpu);
 
@@ -727,13 +862,15 @@ void intel_pmu_pebs_disable_all(void);
 
 void intel_ds_init(void);
 
+void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
+
 void intel_pmu_lbr_reset(void);
 
 void intel_pmu_lbr_enable(struct perf_event *event);
 
 void intel_pmu_lbr_disable(struct perf_event *event);
 
-void intel_pmu_lbr_enable_all(void);
+void intel_pmu_lbr_enable_all(bool pmi);
 
 void intel_pmu_lbr_disable_all(void);
 
@@ -747,8 +884,18 @@ void intel_pmu_lbr_init_atom(void);
 
 void intel_pmu_lbr_init_snb(void);
 
+void intel_pmu_lbr_init_hsw(void);
+
 int intel_pmu_setup_lbr_filter(struct perf_event *event);
 
+void intel_pt_interrupt(void);
+
+int intel_bts_interrupt(void);
+
+void intel_bts_enable_local(void);
+
+void intel_bts_disable_local(void);
+
 int p4_pmu_init(void);
 
 int p6_pmu_init(void);
@@ -758,6 +905,10 @@ int knc_pmu_init(void);
 ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
 			  char *page);
 
+static inline int is_ht_workaround_enabled(void)
+{
+	return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
+}
 #else /* CONFIG_CPU_SUP_INTEL */
 
 static inline void reserve_ds_buffers(void)
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 28926311aac1..1cee5d2d7ece 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -382,6 +382,7 @@ static int amd_pmu_cpu_prepare(int cpu)
 static void amd_pmu_cpu_starting(int cpu)
 {
 	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+	void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
 	struct amd_nb *nb;
 	int i, nb_id;
 
@@ -399,7 +400,7 @@ static void amd_pmu_cpu_starting(int cpu)
 			continue;
 
 		if (nb->nb_id == nb_id) {
-			cpuc->kfree_on_online = cpuc->amd_nb;
+			*onln = cpuc->amd_nb;
 			cpuc->amd_nb = nb;
 			break;
 		}
@@ -429,7 +430,8 @@ static void amd_pmu_cpu_dead(int cpu)
 }
 
 static struct event_constraint *
-amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+			  struct perf_event *event)
 {
 	/*
 	 * if not NB event or no NB, then no constraints
@@ -537,7 +539,8 @@ static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
 static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
 
 static struct event_constraint *
-amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
+amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
+			       struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	unsigned int event_code = amd_get_event_code(hwc);
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index a61f5c6911da..989d3c215d2b 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -796,7 +796,7 @@ static int setup_ibs_ctl(int ibs_eilvt_off)
  * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
  * is using the new offset.
  */
-static int force_ibs_eilvt_setup(void)
+static void force_ibs_eilvt_setup(void)
 {
 	int offset;
 	int ret;
@@ -811,26 +811,24 @@ static int force_ibs_eilvt_setup(void)
 
 	if (offset == APIC_EILVT_NR_MAX) {
 		printk(KERN_DEBUG "No EILVT entry available\n");
-		return -EBUSY;
+		return;
 	}
 
 	ret = setup_ibs_ctl(offset);
 	if (ret)
 		goto out;
 
-	if (!ibs_eilvt_valid()) {
-		ret = -EFAULT;
+	if (!ibs_eilvt_valid())
 		goto out;
-	}
 
 	pr_info("IBS: LVT offset %d assigned\n", offset);
 
-	return 0;
+	return;
 out:
 	preempt_disable();
 	put_eilvt(offset);
 	preempt_enable();
-	return ret;
+	return;
 }
 
 static void ibs_eilvt_setup(void)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 498b6d967138..219d3fb423a1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/watchdog.h>
 
 #include <asm/cpufeature.h>
 #include <asm/hardirq.h>
@@ -113,6 +114,12 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
 	INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
 	INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
 	INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+
+	INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+
 	EVENT_CONSTRAINT_END
 };
 
@@ -131,15 +138,12 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
 	INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
 	INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
 	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
-	/*
-	 * Errata BV98 -- MEM_*_RETIRED events can leak between counters of SMT
-	 * siblings; disable these events because they can corrupt unrelated
-	 * counters.
-	 */
-	INTEL_EVENT_CONSTRAINT(0xd0, 0x0), /* MEM_UOPS_RETIRED.* */
-	INTEL_EVENT_CONSTRAINT(0xd1, 0x0), /* MEM_LOAD_UOPS_RETIRED.* */
-	INTEL_EVENT_CONSTRAINT(0xd2, 0x0), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
-	INTEL_EVENT_CONSTRAINT(0xd3, 0x0), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+
+	INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+
 	EVENT_CONSTRAINT_END
 };
 
@@ -212,11 +216,26 @@ static struct event_constraint intel_hsw_event_constraints[] = {
 	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
 	INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
 	/* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
-	INTEL_EVENT_CONSTRAINT(0x08a3, 0x4),
+	INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4),
 	/* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
-	INTEL_EVENT_CONSTRAINT(0x0ca3, 0x4),
+	INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4),
 	/* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
-	INTEL_EVENT_CONSTRAINT(0x04a3, 0xf),
+	INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf),
+
+	INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+
+	EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_bdw_event_constraints[] = {
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
+	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
+	FIXED_EVENT_CONSTRAINT(0x0300, 2),	/* CPU_CLK_UNHALTED.REF */
+	INTEL_UEVENT_CONSTRAINT(0x148, 0x4),	/* L1D_PEND_MISS.PENDING */
+	INTEL_EVENT_CONSTRAINT(0xa3, 0x4),	/* CYCLE_ACTIVITY.* */
 	EVENT_CONSTRAINT_END
 };
 
@@ -415,6 +434,202 @@ static __initconst const u64 snb_hw_cache_event_ids
 
 };
 
+/*
+ * Notes on the events:
+ * - data reads do not include code reads (comparable to earlier tables)
+ * - data counts include speculative execution (except L1 write, dtlb, bpu)
+ * - remote node access includes remote memory, remote cache, remote mmio.
+ * - prefetches are not included in the counts because they are not
+ *   reliably counted.
+ */
+
+#define HSW_DEMAND_DATA_RD		BIT_ULL(0)
+#define HSW_DEMAND_RFO			BIT_ULL(1)
+#define HSW_ANY_RESPONSE		BIT_ULL(16)
+#define HSW_SUPPLIER_NONE		BIT_ULL(17)
+#define HSW_L3_MISS_LOCAL_DRAM		BIT_ULL(22)
+#define HSW_L3_MISS_REMOTE_HOP0		BIT_ULL(27)
+#define HSW_L3_MISS_REMOTE_HOP1		BIT_ULL(28)
+#define HSW_L3_MISS_REMOTE_HOP2P	BIT_ULL(29)
+#define HSW_L3_MISS			(HSW_L3_MISS_LOCAL_DRAM| \
+					 HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
+					 HSW_L3_MISS_REMOTE_HOP2P)
+#define HSW_SNOOP_NONE			BIT_ULL(31)
+#define HSW_SNOOP_NOT_NEEDED		BIT_ULL(32)
+#define HSW_SNOOP_MISS			BIT_ULL(33)
+#define HSW_SNOOP_HIT_NO_FWD		BIT_ULL(34)
+#define HSW_SNOOP_HIT_WITH_FWD		BIT_ULL(35)
+#define HSW_SNOOP_HITM			BIT_ULL(36)
+#define HSW_SNOOP_NON_DRAM		BIT_ULL(37)
+#define HSW_ANY_SNOOP			(HSW_SNOOP_NONE| \
+					 HSW_SNOOP_NOT_NEEDED|HSW_SNOOP_MISS| \
+					 HSW_SNOOP_HIT_NO_FWD|HSW_SNOOP_HIT_WITH_FWD| \
+					 HSW_SNOOP_HITM|HSW_SNOOP_NON_DRAM)
+#define HSW_SNOOP_DRAM			(HSW_ANY_SNOOP & ~HSW_SNOOP_NON_DRAM)
+#define HSW_DEMAND_READ			HSW_DEMAND_DATA_RD
+#define HSW_DEMAND_WRITE		HSW_DEMAND_RFO
+#define HSW_L3_MISS_REMOTE		(HSW_L3_MISS_REMOTE_HOP0|\
+					 HSW_L3_MISS_REMOTE_HOP1|HSW_L3_MISS_REMOTE_HOP2P)
+#define HSW_LLC_ACCESS			HSW_ANY_RESPONSE
+
+#define BDW_L3_MISS_LOCAL		BIT(26)
+#define BDW_L3_MISS			(BDW_L3_MISS_LOCAL| \
+					 HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
+					 HSW_L3_MISS_REMOTE_HOP2P)
+
+
+static __initconst const u64 hsw_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x81d0,	/* MEM_UOPS_RETIRED.ALL_LOADS */
+		[ C(RESULT_MISS)   ] = 0x151,	/* L1D.REPLACEMENT */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x82d0,	/* MEM_UOPS_RETIRED.ALL_STORES */
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x280,	/* ICACHE.MISSES */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x1b7,	/* OFFCORE_RESPONSE */
+		[ C(RESULT_MISS)   ] = 0x1b7,	/* OFFCORE_RESPONSE */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x1b7,	/* OFFCORE_RESPONSE */
+		[ C(RESULT_MISS)   ] = 0x1b7,	/* OFFCORE_RESPONSE */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x81d0,	/* MEM_UOPS_RETIRED.ALL_LOADS */
+		[ C(RESULT_MISS)   ] = 0x108,	/* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x82d0,	/* MEM_UOPS_RETIRED.ALL_STORES */
+		[ C(RESULT_MISS)   ] = 0x149,	/* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x6085,	/* ITLB_MISSES.STLB_HIT */
+		[ C(RESULT_MISS)   ] = 0x185,	/* ITLB_MISSES.MISS_CAUSES_A_WALK */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0xc4,	/* BR_INST_RETIRED.ALL_BRANCHES */
+		[ C(RESULT_MISS)   ] = 0xc5,	/* BR_MISP_RETIRED.ALL_BRANCHES */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(NODE) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x1b7,	/* OFFCORE_RESPONSE */
+		[ C(RESULT_MISS)   ] = 0x1b7,	/* OFFCORE_RESPONSE */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x1b7,	/* OFFCORE_RESPONSE */
+		[ C(RESULT_MISS)   ] = 0x1b7,	/* OFFCORE_RESPONSE */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+};
+
+static __initconst const u64 hsw_hw_cache_extra_regs
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
+				       HSW_LLC_ACCESS,
+		[ C(RESULT_MISS)   ] = HSW_DEMAND_READ|
+				       HSW_L3_MISS|HSW_ANY_SNOOP,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
+				       HSW_LLC_ACCESS,
+		[ C(RESULT_MISS)   ] = HSW_DEMAND_WRITE|
+				       HSW_L3_MISS|HSW_ANY_SNOOP,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(NODE) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
+				       HSW_L3_MISS_LOCAL_DRAM|
+				       HSW_SNOOP_DRAM,
+		[ C(RESULT_MISS)   ] = HSW_DEMAND_READ|
+				       HSW_L3_MISS_REMOTE|
+				       HSW_SNOOP_DRAM,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
+				       HSW_L3_MISS_LOCAL_DRAM|
+				       HSW_SNOOP_DRAM,
+		[ C(RESULT_MISS)   ] = HSW_DEMAND_WRITE|
+				       HSW_L3_MISS_REMOTE|
+				       HSW_SNOOP_DRAM,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+};
+
 static __initconst const u64 westmere_hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -1029,21 +1244,10 @@ static __initconst const u64 slm_hw_cache_event_ids
  },
 };
 
-static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
-{
-	/* user explicitly requested branch sampling */
-	if (has_branch_stack(event))
-		return true;
-
-	/* implicit branch sampling to correct PEBS skid */
-	if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 &&
-	    x86_pmu.intel_cap.pebs_format < 2)
-		return true;
-
-	return false;
-}
-
-static void intel_pmu_disable_all(void)
+/*
+ * Use from PMIs where the LBRs are already disabled.
+ */
+static void __intel_pmu_disable_all(void)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 
@@ -1051,17 +1255,24 @@ static void intel_pmu_disable_all(void)
 
 	if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
 		intel_pmu_disable_bts();
+	else
+		intel_bts_disable_local();
 
 	intel_pmu_pebs_disable_all();
+}
+
+static void intel_pmu_disable_all(void)
+{
+	__intel_pmu_disable_all();
 	intel_pmu_lbr_disable_all();
 }
 
-static void intel_pmu_enable_all(int added)
+static void __intel_pmu_enable_all(int added, bool pmi)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 
 	intel_pmu_pebs_enable_all();
-	intel_pmu_lbr_enable_all();
+	intel_pmu_lbr_enable_all(pmi);
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
 			x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
 
@@ -1073,7 +1284,13 @@ static void intel_pmu_enable_all(int added)
 			return;
 
 		intel_pmu_enable_bts(event->hw.config);
-	}
+	} else
+		intel_bts_enable_local();
+}
+
+static void intel_pmu_enable_all(int added)
+{
+	__intel_pmu_enable_all(added, false);
 }
 
 /*
@@ -1207,7 +1424,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
 	 * must disable before any actual event
 	 * because any event may be combined with LBR
 	 */
-	if (intel_pmu_needs_lbr_smpl(event))
+	if (needs_branch_stack(event))
 		intel_pmu_lbr_disable(event);
 
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
@@ -1268,7 +1485,7 @@ static void intel_pmu_enable_event(struct perf_event *event)
 	 * must enabled before any actual event
 	 * because any event may be combined with LBR
 	 */
-	if (intel_pmu_needs_lbr_smpl(event))
+	if (needs_branch_stack(event))
 		intel_pmu_lbr_enable(event);
 
 	if (event->attr.exclude_host)
@@ -1334,6 +1551,18 @@ static void intel_pmu_reset(void)
 	if (ds)
 		ds->bts_index = ds->bts_buffer_base;
 
+	/* Ack all overflows and disable fixed counters */
+	if (x86_pmu.version >= 2) {
+		intel_pmu_ack_status(intel_pmu_get_status());
+		wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+	}
+
+	/* Reset LBRs and LBR freezing */
+	if (x86_pmu.lbr_nr) {
+		update_debugctlmsr(get_debugctlmsr() &
+			~(DEBUGCTLMSR_FREEZE_LBRS_ON_PMI|DEBUGCTLMSR_LBR));
+	}
+
 	local_irq_restore(flags);
 }
 
@@ -1357,8 +1586,9 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 	 */
 	if (!x86_pmu.late_ack)
 		apic_write(APIC_LVTPC, APIC_DM_NMI);
-	intel_pmu_disable_all();
+	__intel_pmu_disable_all();
 	handled = intel_pmu_drain_bts_buffer();
+	handled += intel_bts_interrupt();
 	status = intel_pmu_get_status();
 	if (!status)
 		goto done;
@@ -1399,6 +1629,14 @@ again:
 	}
 
 	/*
+	 * Intel PT
+	 */
+	if (__test_and_clear_bit(55, (unsigned long *)&status)) {
+		handled++;
+		intel_pt_interrupt();
+	}
+
+	/*
 	 * Checkpointed counters can lead to 'spurious' PMIs because the
 	 * rollback caused by the PMI will have cleared the overflow status
 	 * bit. Therefore always force probe these counters.
@@ -1433,7 +1671,7 @@ again:
 		goto again;
 
 done:
-	intel_pmu_enable_all(0);
+	__intel_pmu_enable_all(0, true);
 	/*
 	 * Only unmask the NMI after the overflow counters
 	 * have been reset. This avoids spurious NMIs on
@@ -1464,7 +1702,7 @@ intel_bts_constraints(struct perf_event *event)
 
 static int intel_alt_er(int idx)
 {
-	if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
+	if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
 		return idx;
 
 	if (idx == EXTRA_REG_RSP_0)
@@ -1624,7 +1862,8 @@ intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
 }
 
 struct event_constraint *
-x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+			  struct perf_event *event)
 {
 	struct event_constraint *c;
 
@@ -1641,7 +1880,8 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
 }
 
 static struct event_constraint *
-intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+__intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+			    struct perf_event *event)
 {
 	struct event_constraint *c;
 
@@ -1649,15 +1889,286 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
 	if (c)
 		return c;
 
-	c = intel_pebs_constraints(event);
+	c = intel_shared_regs_constraints(cpuc, event);
 	if (c)
 		return c;
 
-	c = intel_shared_regs_constraints(cpuc, event);
+	c = intel_pebs_constraints(event);
 	if (c)
 		return c;
 
-	return x86_get_event_constraints(cpuc, event);
+	return x86_get_event_constraints(cpuc, idx, event);
+}
+
+static void
+intel_start_scheduling(struct cpu_hw_events *cpuc)
+{
+	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+	struct intel_excl_states *xl, *xlo;
+	int tid = cpuc->excl_thread_id;
+	int o_tid = 1 - tid; /* sibling thread */
+
+	/*
+	 * nothing needed if in group validation mode
+	 */
+	if (cpuc->is_fake || !is_ht_workaround_enabled())
+		return;
+
+	/*
+	 * no exclusion needed
+	 */
+	if (!excl_cntrs)
+		return;
+
+	xlo = &excl_cntrs->states[o_tid];
+	xl = &excl_cntrs->states[tid];
+
+	xl->sched_started = true;
+	xl->num_alloc_cntrs = 0;
+	/*
+	 * lock shared state until we are done scheduling
+	 * in stop_event_scheduling()
+	 * makes scheduling appear as a transaction
+	 */
+	WARN_ON_ONCE(!irqs_disabled());
+	raw_spin_lock(&excl_cntrs->lock);
+
+	/*
+	 * save initial state of sibling thread
+	 */
+	memcpy(xlo->init_state, xlo->state, sizeof(xlo->init_state));
+}
+
+static void
+intel_stop_scheduling(struct cpu_hw_events *cpuc)
+{
+	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+	struct intel_excl_states *xl, *xlo;
+	int tid = cpuc->excl_thread_id;
+	int o_tid = 1 - tid; /* sibling thread */
+
+	/*
+	 * nothing needed if in group validation mode
+	 */
+	if (cpuc->is_fake || !is_ht_workaround_enabled())
+		return;
+	/*
+	 * no exclusion needed
+	 */
+	if (!excl_cntrs)
+		return;
+
+	xlo = &excl_cntrs->states[o_tid];
+	xl = &excl_cntrs->states[tid];
+
+	/*
+	 * make new sibling thread state visible
+	 */
+	memcpy(xlo->state, xlo->init_state, sizeof(xlo->state));
+
+	xl->sched_started = false;
+	/*
+	 * release shared state lock (acquired in intel_start_scheduling())
+	 */
+	raw_spin_unlock(&excl_cntrs->lock);
+}
+
+static struct event_constraint *
+intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
+			   int idx, struct event_constraint *c)
+{
+	struct event_constraint *cx;
+	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+	struct intel_excl_states *xl, *xlo;
+	int is_excl, i;
+	int tid = cpuc->excl_thread_id;
+	int o_tid = 1 - tid; /* alternate */
+
+	/*
+	 * validating a group does not require
+	 * enforcing cross-thread  exclusion
+	 */
+	if (cpuc->is_fake || !is_ht_workaround_enabled())
+		return c;
+
+	/*
+	 * no exclusion needed
+	 */
+	if (!excl_cntrs)
+		return c;
+	/*
+	 * event requires exclusive counter access
+	 * across HT threads
+	 */
+	is_excl = c->flags & PERF_X86_EVENT_EXCL;
+
+	/*
+	 * xl = state of current HT
+	 * xlo = state of sibling HT
+	 */
+	xl = &excl_cntrs->states[tid];
+	xlo = &excl_cntrs->states[o_tid];
+
+	/*
+	 * do not allow scheduling of more than max_alloc_cntrs
+	 * which is set to half the available generic counters.
+	 * this helps avoid counter starvation of sibling thread
+	 * by ensuring at most half the counters cannot be in
+	 * exclusive mode. There is not designated counters for the
+	 * limits. Any N/2 counters can be used. This helps with
+	 * events with specifix counter constraints
+	 */
+	if (xl->num_alloc_cntrs++ == xl->max_alloc_cntrs)
+		return &emptyconstraint;
+
+	cx = c;
+
+	/*
+	 * because we modify the constraint, we need
+	 * to make a copy. Static constraints come
+	 * from static const tables.
+	 *
+	 * only needed when constraint has not yet
+	 * been cloned (marked dynamic)
+	 */
+	if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
+
+		/* sanity check */
+		if (idx < 0)
+			return &emptyconstraint;
+
+		/*
+		 * grab pre-allocated constraint entry
+		 */
+		cx = &cpuc->constraint_list[idx];
+
+		/*
+		 * initialize dynamic constraint
+		 * with static constraint
+		 */
+		memcpy(cx, c, sizeof(*cx));
+
+		/*
+		 * mark constraint as dynamic, so we
+		 * can free it later on
+		 */
+		cx->flags |= PERF_X86_EVENT_DYNAMIC;
+	}
+
+	/*
+	 * From here on, the constraint is dynamic.
+	 * Either it was just allocated above, or it
+	 * was allocated during a earlier invocation
+	 * of this function
+	 */
+
+	/*
+	 * Modify static constraint with current dynamic
+	 * state of thread
+	 *
+	 * EXCLUSIVE: sibling counter measuring exclusive event
+	 * SHARED   : sibling counter measuring non-exclusive event
+	 * UNUSED   : sibling counter unused
+	 */
+	for_each_set_bit(i, cx->idxmsk, X86_PMC_IDX_MAX) {
+		/*
+		 * exclusive event in sibling counter
+		 * our corresponding counter cannot be used
+		 * regardless of our event
+		 */
+		if (xl->state[i] == INTEL_EXCL_EXCLUSIVE)
+			__clear_bit(i, cx->idxmsk);
+		/*
+		 * if measuring an exclusive event, sibling
+		 * measuring non-exclusive, then counter cannot
+		 * be used
+		 */
+		if (is_excl && xl->state[i] == INTEL_EXCL_SHARED)
+			__clear_bit(i, cx->idxmsk);
+	}
+
+	/*
+	 * recompute actual bit weight for scheduling algorithm
+	 */
+	cx->weight = hweight64(cx->idxmsk64);
+
+	/*
+	 * if we return an empty mask, then switch
+	 * back to static empty constraint to avoid
+	 * the cost of freeing later on
+	 */
+	if (cx->weight == 0)
+		cx = &emptyconstraint;
+
+	return cx;
+}
+
+static struct event_constraint *
+intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+			    struct perf_event *event)
+{
+	struct event_constraint *c1 = event->hw.constraint;
+	struct event_constraint *c2;
+
+	/*
+	 * first time only
+	 * - static constraint: no change across incremental scheduling calls
+	 * - dynamic constraint: handled by intel_get_excl_constraints()
+	 */
+	c2 = __intel_get_event_constraints(cpuc, idx, event);
+	if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) {
+		bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX);
+		c1->weight = c2->weight;
+		c2 = c1;
+	}
+
+	if (cpuc->excl_cntrs)
+		return intel_get_excl_constraints(cpuc, event, idx, c2);
+
+	return c2;
+}
+
+static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
+		struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+	struct intel_excl_states *xlo, *xl;
+	unsigned long flags = 0; /* keep compiler happy */
+	int tid = cpuc->excl_thread_id;
+	int o_tid = 1 - tid;
+
+	/*
+	 * nothing needed if in group validation mode
+	 */
+	if (cpuc->is_fake)
+		return;
+
+	WARN_ON_ONCE(!excl_cntrs);
+
+	if (!excl_cntrs)
+		return;
+
+	xl = &excl_cntrs->states[tid];
+	xlo = &excl_cntrs->states[o_tid];
+
+	/*
+	 * put_constraint may be called from x86_schedule_events()
+	 * which already has the lock held so here make locking
+	 * conditional
+	 */
+	if (!xl->sched_started)
+		raw_spin_lock_irqsave(&excl_cntrs->lock, flags);
+
+	/*
+	 * if event was actually assigned, then mark the
+	 * counter state as unused now
+	 */
+	if (hwc->idx >= 0)
+		xlo->state[hwc->idx] = INTEL_EXCL_UNUSED;
+
+	if (!xl->sched_started)
+		raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags);
 }
 
 static void
@@ -1678,7 +2189,57 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
 static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
 					struct perf_event *event)
 {
+	struct event_constraint *c = event->hw.constraint;
+
 	intel_put_shared_regs_event_constraints(cpuc, event);
+
+	/*
+	 * is PMU has exclusive counter restrictions, then
+	 * all events are subject to and must call the
+	 * put_excl_constraints() routine
+	 */
+	if (c && cpuc->excl_cntrs)
+		intel_put_excl_constraints(cpuc, event);
+
+	/* cleanup dynamic constraint */
+	if (c && (c->flags & PERF_X86_EVENT_DYNAMIC))
+		event->hw.constraint = NULL;
+}
+
+static void intel_commit_scheduling(struct cpu_hw_events *cpuc,
+				    struct perf_event *event, int cntr)
+{
+	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+	struct event_constraint *c = event->hw.constraint;
+	struct intel_excl_states *xlo, *xl;
+	int tid = cpuc->excl_thread_id;
+	int o_tid = 1 - tid;
+	int is_excl;
+
+	if (cpuc->is_fake || !c)
+		return;
+
+	is_excl = c->flags & PERF_X86_EVENT_EXCL;
+
+	if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
+		return;
+
+	WARN_ON_ONCE(!excl_cntrs);
+
+	if (!excl_cntrs)
+		return;
+
+	xl = &excl_cntrs->states[tid];
+	xlo = &excl_cntrs->states[o_tid];
+
+	WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock));
+
+	if (cntr >= 0) {
+		if (is_excl)
+			xlo->init_state[cntr] = INTEL_EXCL_EXCLUSIVE;
+		else
+			xlo->init_state[cntr] = INTEL_EXCL_SHARED;
+	}
 }
 
 static void intel_pebs_aliases_core2(struct perf_event *event)
@@ -1747,10 +2308,21 @@ static int intel_pmu_hw_config(struct perf_event *event)
 	if (event->attr.precise_ip && x86_pmu.pebs_aliases)
 		x86_pmu.pebs_aliases(event);
 
-	if (intel_pmu_needs_lbr_smpl(event)) {
+	if (needs_branch_stack(event)) {
 		ret = intel_pmu_setup_lbr_filter(event);
 		if (ret)
 			return ret;
+
+		/*
+		 * BTS is set up earlier in this path, so don't account twice
+		 */
+		if (!intel_pmu_has_bts(event)) {
+			/* disallow lbr if conflicting events are present */
+			if (x86_add_exclusive(x86_lbr_exclusive_lbr))
+				return -EBUSY;
+
+			event->destroy = hw_perf_lbr_event_destroy;
+		}
 	}
 
 	if (event->attr.type != PERF_TYPE_RAW)
@@ -1891,9 +2463,12 @@ static struct event_constraint counter2_constraint =
 			EVENT_CONSTRAINT(0, 0x4, 0);
 
 static struct event_constraint *
-hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+			  struct perf_event *event)
 {
-	struct event_constraint *c = intel_get_event_constraints(cpuc, event);
+	struct event_constraint *c;
+
+	c = intel_get_event_constraints(cpuc, idx, event);
 
 	/* Handle special quirk on in_tx_checkpointed only in counter 2 */
 	if (event->hw.config & HSW_IN_TX_CHECKPOINTED) {
@@ -1905,6 +2480,32 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
 	return c;
 }
 
+/*
+ * Broadwell:
+ *
+ * The INST_RETIRED.ALL period always needs to have lowest 6 bits cleared
+ * (BDM55) and it must not use a period smaller than 100 (BDM11). We combine
+ * the two to enforce a minimum period of 128 (the smallest value that has bits
+ * 0-5 cleared and >= 100).
+ *
+ * Because of how the code in x86_perf_event_set_period() works, the truncation
+ * of the lower 6 bits is 'harmless' as we'll occasionally add a longer period
+ * to make up for the 'lost' events due to carrying the 'error' in period_left.
+ *
+ * Therefore the effective (average) period matches the requested period,
+ * despite coarser hardware granularity.
+ */
+static unsigned bdw_limit_period(struct perf_event *event, unsigned left)
+{
+	if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
+			X86_CONFIG(.event=0xc0, .umask=0x01)) {
+		if (left < 128)
+			left = 128;
+		left &= ~0x3fu;
+	}
+	return left;
+}
+
 PMU_FORMAT_ATTR(event,	"config:0-7"	);
 PMU_FORMAT_ATTR(umask,	"config:8-15"	);
 PMU_FORMAT_ATTR(edge,	"config:18"	);
@@ -1979,16 +2580,52 @@ struct intel_shared_regs *allocate_shared_regs(int cpu)
 	return regs;
 }
 
+static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
+{
+	struct intel_excl_cntrs *c;
+	int i;
+
+	c = kzalloc_node(sizeof(struct intel_excl_cntrs),
+			 GFP_KERNEL, cpu_to_node(cpu));
+	if (c) {
+		raw_spin_lock_init(&c->lock);
+		for (i = 0; i < X86_PMC_IDX_MAX; i++) {
+			c->states[0].state[i] = INTEL_EXCL_UNUSED;
+			c->states[0].init_state[i] = INTEL_EXCL_UNUSED;
+
+			c->states[1].state[i] = INTEL_EXCL_UNUSED;
+			c->states[1].init_state[i] = INTEL_EXCL_UNUSED;
+		}
+		c->core_id = -1;
+	}
+	return c;
+}
+
 static int intel_pmu_cpu_prepare(int cpu)
 {
 	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 
-	if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map))
-		return NOTIFY_OK;
+	if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
+		cpuc->shared_regs = allocate_shared_regs(cpu);
+		if (!cpuc->shared_regs)
+			return NOTIFY_BAD;
+	}
+
+	if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
+		size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
 
-	cpuc->shared_regs = allocate_shared_regs(cpu);
-	if (!cpuc->shared_regs)
-		return NOTIFY_BAD;
+		cpuc->constraint_list = kzalloc(sz, GFP_KERNEL);
+		if (!cpuc->constraint_list)
+			return NOTIFY_BAD;
+
+		cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
+		if (!cpuc->excl_cntrs) {
+			kfree(cpuc->constraint_list);
+			kfree(cpuc->shared_regs);
+			return NOTIFY_BAD;
+		}
+		cpuc->excl_thread_id = 0;
+	}
 
 	return NOTIFY_OK;
 }
@@ -2010,13 +2647,15 @@ static void intel_pmu_cpu_starting(int cpu)
 	if (!cpuc->shared_regs)
 		return;
 
-	if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) {
+	if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
+		void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
+
 		for_each_cpu(i, topology_thread_cpumask(cpu)) {
 			struct intel_shared_regs *pc;
 
 			pc = per_cpu(cpu_hw_events, i).shared_regs;
 			if (pc && pc->core_id == core_id) {
-				cpuc->kfree_on_online = cpuc->shared_regs;
+				*onln = cpuc->shared_regs;
 				cpuc->shared_regs = pc;
 				break;
 			}
@@ -2027,6 +2666,44 @@ static void intel_pmu_cpu_starting(int cpu)
 
 	if (x86_pmu.lbr_sel_map)
 		cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
+
+	if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
+		int h = x86_pmu.num_counters >> 1;
+
+		for_each_cpu(i, topology_thread_cpumask(cpu)) {
+			struct intel_excl_cntrs *c;
+
+			c = per_cpu(cpu_hw_events, i).excl_cntrs;
+			if (c && c->core_id == core_id) {
+				cpuc->kfree_on_online[1] = cpuc->excl_cntrs;
+				cpuc->excl_cntrs = c;
+				cpuc->excl_thread_id = 1;
+				break;
+			}
+		}
+		cpuc->excl_cntrs->core_id = core_id;
+		cpuc->excl_cntrs->refcnt++;
+		/*
+		 * set hard limit to half the number of generic counters
+		 */
+		cpuc->excl_cntrs->states[0].max_alloc_cntrs = h;
+		cpuc->excl_cntrs->states[1].max_alloc_cntrs = h;
+	}
+}
+
+static void free_excl_cntrs(int cpu)
+{
+	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+	struct intel_excl_cntrs *c;
+
+	c = cpuc->excl_cntrs;
+	if (c) {
+		if (c->core_id == -1 || --c->refcnt == 0)
+			kfree(c);
+		cpuc->excl_cntrs = NULL;
+		kfree(cpuc->constraint_list);
+		cpuc->constraint_list = NULL;
+	}
 }
 
 static void intel_pmu_cpu_dying(int cpu)
@@ -2041,19 +2718,9 @@ static void intel_pmu_cpu_dying(int cpu)
 		cpuc->shared_regs = NULL;
 	}
 
-	fini_debug_store_on_cpu(cpu);
-}
+	free_excl_cntrs(cpu);
 
-static void intel_pmu_flush_branch_stack(void)
-{
-	/*
-	 * Intel LBR does not tag entries with the
-	 * PID of the current task, then we need to
-	 * flush it on ctxsw
-	 * For now, we simply reset it
-	 */
-	if (x86_pmu.lbr_nr)
-		intel_pmu_lbr_reset();
+	fini_debug_store_on_cpu(cpu);
 }
 
 PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
@@ -2107,7 +2774,7 @@ static __initconst const struct x86_pmu intel_pmu = {
 	.cpu_starting		= intel_pmu_cpu_starting,
 	.cpu_dying		= intel_pmu_cpu_dying,
 	.guest_get_msrs		= intel_guest_get_msrs,
-	.flush_branch_stack	= intel_pmu_flush_branch_stack,
+	.sched_task		= intel_pmu_lbr_sched_task,
 };
 
 static __init void intel_clovertown_quirk(void)
@@ -2264,6 +2931,27 @@ static __init void intel_nehalem_quirk(void)
 	}
 }
 
+/*
+ * enable software workaround for errata:
+ * SNB: BJ122
+ * IVB: BV98
+ * HSW: HSD29
+ *
+ * Only needed when HT is enabled. However detecting
+ * if HT is enabled is difficult (model specific). So instead,
+ * we enable the workaround in the early boot, and verify if
+ * it is needed in a later initcall phase once we have valid
+ * topology information to check if HT is actually enabled
+ */
+static __init void intel_ht_bug(void)
+{
+	x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED;
+
+	x86_pmu.commit_scheduling = intel_commit_scheduling;
+	x86_pmu.start_scheduling = intel_start_scheduling;
+	x86_pmu.stop_scheduling = intel_stop_scheduling;
+}
+
 EVENT_ATTR_STR(mem-loads,	mem_ld_hsw,	"event=0xcd,umask=0x1,ldlat=3");
 EVENT_ATTR_STR(mem-stores,	mem_st_hsw,	"event=0xd0,umask=0x82")
 
@@ -2443,7 +3131,7 @@ __init int intel_pmu_init(void)
 		x86_pmu.event_constraints = intel_slm_event_constraints;
 		x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_slm_extra_regs;
-		x86_pmu.er_flags |= ERF_HAS_RSP_1;
+		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
 		pr_cont("Silvermont events, ");
 		break;
 
@@ -2461,7 +3149,7 @@ __init int intel_pmu_init(void)
 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
 		x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_westmere_extra_regs;
-		x86_pmu.er_flags |= ERF_HAS_RSP_1;
+		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
 
 		x86_pmu.cpu_events = nhm_events_attrs;
 
@@ -2478,6 +3166,7 @@ __init int intel_pmu_init(void)
 	case 42: /* 32nm SandyBridge         */
 	case 45: /* 32nm SandyBridge-E/EN/EP */
 		x86_add_quirk(intel_sandybridge_quirk);
+		x86_add_quirk(intel_ht_bug);
 		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
@@ -2492,9 +3181,11 @@ __init int intel_pmu_init(void)
 			x86_pmu.extra_regs = intel_snbep_extra_regs;
 		else
 			x86_pmu.extra_regs = intel_snb_extra_regs;
+
+
 		/* all extra regs are per-cpu when HT is on */
-		x86_pmu.er_flags |= ERF_HAS_RSP_1;
-		x86_pmu.er_flags |= ERF_NO_HT_SHARING;
+		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
 
 		x86_pmu.cpu_events = snb_events_attrs;
 
@@ -2510,6 +3201,7 @@ __init int intel_pmu_init(void)
 
 	case 58: /* 22nm IvyBridge       */
 	case 62: /* 22nm IvyBridge-EP/EX */
+		x86_add_quirk(intel_ht_bug);
 		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 		/* dTLB-load-misses on IVB is different than SNB */
@@ -2528,8 +3220,8 @@ __init int intel_pmu_init(void)
 		else
 			x86_pmu.extra_regs = intel_snb_extra_regs;
 		/* all extra regs are per-cpu when HT is on */
-		x86_pmu.er_flags |= ERF_HAS_RSP_1;
-		x86_pmu.er_flags |= ERF_NO_HT_SHARING;
+		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
 
 		x86_pmu.cpu_events = snb_events_attrs;
 
@@ -2545,19 +3237,20 @@ __init int intel_pmu_init(void)
 	case 63: /* 22nm Haswell Server */
 	case 69: /* 22nm Haswell ULT */
 	case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+		x86_add_quirk(intel_ht_bug);
 		x86_pmu.late_ack = true;
-		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids));
-		memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+		memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+		memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
 
-		intel_pmu_lbr_init_snb();
+		intel_pmu_lbr_init_hsw();
 
 		x86_pmu.event_constraints = intel_hsw_event_constraints;
 		x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_snbep_extra_regs;
 		x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
 		/* all extra regs are per-cpu when HT is on */
-		x86_pmu.er_flags |= ERF_HAS_RSP_1;
-		x86_pmu.er_flags |= ERF_NO_HT_SHARING;
+		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
 
 		x86_pmu.hw_config = hsw_hw_config;
 		x86_pmu.get_event_constraints = hsw_get_event_constraints;
@@ -2566,6 +3259,39 @@ __init int intel_pmu_init(void)
 		pr_cont("Haswell events, ");
 		break;
 
+	case 61: /* 14nm Broadwell Core-M */
+	case 86: /* 14nm Broadwell Xeon D */
+		x86_pmu.late_ack = true;
+		memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+		memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+
+		/* L3_MISS_LOCAL_DRAM is BIT(26) in Broadwell */
+		hw_cache_extra_regs[C(LL)][C(OP_READ)][C(RESULT_MISS)] = HSW_DEMAND_READ |
+									 BDW_L3_MISS|HSW_SNOOP_DRAM;
+		hw_cache_extra_regs[C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = HSW_DEMAND_WRITE|BDW_L3_MISS|
+									  HSW_SNOOP_DRAM;
+		hw_cache_extra_regs[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = HSW_DEMAND_READ|
+									     BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
+		hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE|
+									      BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
+
+		intel_pmu_lbr_init_hsw();
+
+		x86_pmu.event_constraints = intel_bdw_event_constraints;
+		x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
+		x86_pmu.extra_regs = intel_snbep_extra_regs;
+		x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+		/* all extra regs are per-cpu when HT is on */
+		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+		x86_pmu.hw_config = hsw_hw_config;
+		x86_pmu.get_event_constraints = hsw_get_event_constraints;
+		x86_pmu.cpu_events = hsw_events_attrs;
+		x86_pmu.limit_period = bdw_limit_period;
+		pr_cont("Broadwell events, ");
+		break;
+
 	default:
 		switch (x86_pmu.version) {
 		case 1:
@@ -2651,3 +3377,47 @@ __init int intel_pmu_init(void)
 
 	return 0;
 }
+
+/*
+ * HT bug: phase 2 init
+ * Called once we have valid topology information to check
+ * whether or not HT is enabled
+ * If HT is off, then we disable the workaround
+ */
+static __init int fixup_ht_bug(void)
+{
+	int cpu = smp_processor_id();
+	int w, c;
+	/*
+	 * problem not present on this CPU model, nothing to do
+	 */
+	if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
+		return 0;
+
+	w = cpumask_weight(topology_thread_cpumask(cpu));
+	if (w > 1) {
+		pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
+		return 0;
+	}
+
+	watchdog_nmi_disable_all();
+
+	x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
+
+	x86_pmu.commit_scheduling = NULL;
+	x86_pmu.start_scheduling = NULL;
+	x86_pmu.stop_scheduling = NULL;
+
+	watchdog_nmi_enable_all();
+
+	get_online_cpus();
+
+	for_each_online_cpu(c) {
+		free_excl_cntrs(c);
+	}
+
+	put_online_cpus();
+	pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n");
+	return 0;
+}
+subsys_initcall(fixup_ht_bug)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c
new file mode 100644
index 000000000000..ac1f0c55f379
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c
@@ -0,0 +1,525 @@
+/*
+ * BTS PMU driver for perf
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#undef DEBUG
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/coredump.h>
+
+#include <asm-generic/sizes.h>
+#include <asm/perf_event.h>
+
+#include "perf_event.h"
+
+struct bts_ctx {
+	struct perf_output_handle	handle;
+	struct debug_store		ds_back;
+	int				started;
+};
+
+static DEFINE_PER_CPU(struct bts_ctx, bts_ctx);
+
+#define BTS_RECORD_SIZE		24
+#define BTS_SAFETY_MARGIN	4080
+
+struct bts_phys {
+	struct page	*page;
+	unsigned long	size;
+	unsigned long	offset;
+	unsigned long	displacement;
+};
+
+struct bts_buffer {
+	size_t		real_size;	/* multiple of BTS_RECORD_SIZE */
+	unsigned int	nr_pages;
+	unsigned int	nr_bufs;
+	unsigned int	cur_buf;
+	bool		snapshot;
+	local_t		data_size;
+	local_t		lost;
+	local_t		head;
+	unsigned long	end;
+	void		**data_pages;
+	struct bts_phys	buf[0];
+};
+
+struct pmu bts_pmu;
+
+void intel_pmu_enable_bts(u64 config);
+void intel_pmu_disable_bts(void);
+
+static size_t buf_size(struct page *page)
+{
+	return 1 << (PAGE_SHIFT + page_private(page));
+}
+
+static void *
+bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
+{
+	struct bts_buffer *buf;
+	struct page *page;
+	int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
+	unsigned long offset;
+	size_t size = nr_pages << PAGE_SHIFT;
+	int pg, nbuf, pad;
+
+	/* count all the high order buffers */
+	for (pg = 0, nbuf = 0; pg < nr_pages;) {
+		page = virt_to_page(pages[pg]);
+		if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1))
+			return NULL;
+		pg += 1 << page_private(page);
+		nbuf++;
+	}
+
+	/*
+	 * to avoid interrupts in overwrite mode, only allow one physical
+	 */
+	if (overwrite && nbuf > 1)
+		return NULL;
+
+	buf = kzalloc_node(offsetof(struct bts_buffer, buf[nbuf]), GFP_KERNEL, node);
+	if (!buf)
+		return NULL;
+
+	buf->nr_pages = nr_pages;
+	buf->nr_bufs = nbuf;
+	buf->snapshot = overwrite;
+	buf->data_pages = pages;
+	buf->real_size = size - size % BTS_RECORD_SIZE;
+
+	for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) {
+		unsigned int __nr_pages;
+
+		page = virt_to_page(pages[pg]);
+		__nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
+		buf->buf[nbuf].page = page;
+		buf->buf[nbuf].offset = offset;
+		buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
+		buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement;
+		pad = buf->buf[nbuf].size % BTS_RECORD_SIZE;
+		buf->buf[nbuf].size -= pad;
+
+		pg += __nr_pages;
+		offset += __nr_pages << PAGE_SHIFT;
+	}
+
+	return buf;
+}
+
+static void bts_buffer_free_aux(void *data)
+{
+	kfree(data);
+}
+
+static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx)
+{
+	return buf->buf[idx].offset + buf->buf[idx].displacement;
+}
+
+static void
+bts_config_buffer(struct bts_buffer *buf)
+{
+	int cpu = raw_smp_processor_id();
+	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+	struct bts_phys *phys = &buf->buf[buf->cur_buf];
+	unsigned long index, thresh = 0, end = phys->size;
+	struct page *page = phys->page;
+
+	index = local_read(&buf->head);
+
+	if (!buf->snapshot) {
+		if (buf->end < phys->offset + buf_size(page))
+			end = buf->end - phys->offset - phys->displacement;
+
+		index -= phys->offset + phys->displacement;
+
+		if (end - index > BTS_SAFETY_MARGIN)
+			thresh = end - BTS_SAFETY_MARGIN;
+		else if (end - index > BTS_RECORD_SIZE)
+			thresh = end - BTS_RECORD_SIZE;
+		else
+			thresh = end;
+	}
+
+	ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement;
+	ds->bts_index = ds->bts_buffer_base + index;
+	ds->bts_absolute_maximum = ds->bts_buffer_base + end;
+	ds->bts_interrupt_threshold = !buf->snapshot
+		? ds->bts_buffer_base + thresh
+		: ds->bts_absolute_maximum + BTS_RECORD_SIZE;
+}
+
+static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head)
+{
+	unsigned long index = head - phys->offset;
+
+	memset(page_address(phys->page) + index, 0, phys->size - index);
+}
+
+static bool bts_buffer_is_full(struct bts_buffer *buf, struct bts_ctx *bts)
+{
+	if (buf->snapshot)
+		return false;
+
+	if (local_read(&buf->data_size) >= bts->handle.size ||
+	    bts->handle.size - local_read(&buf->data_size) < BTS_RECORD_SIZE)
+		return true;
+
+	return false;
+}
+
+static void bts_update(struct bts_ctx *bts)
+{
+	int cpu = raw_smp_processor_id();
+	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+	struct bts_buffer *buf = perf_get_aux(&bts->handle);
+	unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head;
+
+	if (!buf)
+		return;
+
+	head = index + bts_buffer_offset(buf, buf->cur_buf);
+	old = local_xchg(&buf->head, head);
+
+	if (!buf->snapshot) {
+		if (old == head)
+			return;
+
+		if (ds->bts_index >= ds->bts_absolute_maximum)
+			local_inc(&buf->lost);
+
+		/*
+		 * old and head are always in the same physical buffer, so we
+		 * can subtract them to get the data size.
+		 */
+		local_add(head - old, &buf->data_size);
+	} else {
+		local_set(&buf->data_size, head);
+	}
+}
+
+static void __bts_event_start(struct perf_event *event)
+{
+	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+	struct bts_buffer *buf = perf_get_aux(&bts->handle);
+	u64 config = 0;
+
+	if (!buf || bts_buffer_is_full(buf, bts))
+		return;
+
+	event->hw.state = 0;
+
+	if (!buf->snapshot)
+		config |= ARCH_PERFMON_EVENTSEL_INT;
+	if (!event->attr.exclude_kernel)
+		config |= ARCH_PERFMON_EVENTSEL_OS;
+	if (!event->attr.exclude_user)
+		config |= ARCH_PERFMON_EVENTSEL_USR;
+
+	bts_config_buffer(buf);
+
+	/*
+	 * local barrier to make sure that ds configuration made it
+	 * before we enable BTS
+	 */
+	wmb();
+
+	intel_pmu_enable_bts(config);
+}
+
+static void bts_event_start(struct perf_event *event, int flags)
+{
+	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+
+	__bts_event_start(event);
+
+	/* PMI handler: this counter is running and likely generating PMIs */
+	ACCESS_ONCE(bts->started) = 1;
+}
+
+static void __bts_event_stop(struct perf_event *event)
+{
+	/*
+	 * No extra synchronization is mandated by the documentation to have
+	 * BTS data stores globally visible.
+	 */
+	intel_pmu_disable_bts();
+
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
+	ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED;
+}
+
+static void bts_event_stop(struct perf_event *event, int flags)
+{
+	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+
+	/* PMI handler: don't restart this counter */
+	ACCESS_ONCE(bts->started) = 0;
+
+	__bts_event_stop(event);
+
+	if (flags & PERF_EF_UPDATE)
+		bts_update(bts);
+}
+
+void intel_bts_enable_local(void)
+{
+	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+
+	if (bts->handle.event && bts->started)
+		__bts_event_start(bts->handle.event);
+}
+
+void intel_bts_disable_local(void)
+{
+	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+
+	if (bts->handle.event)
+		__bts_event_stop(bts->handle.event);
+}
+
+static int
+bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
+{
+	unsigned long head, space, next_space, pad, gap, skip, wakeup;
+	unsigned int next_buf;
+	struct bts_phys *phys, *next_phys;
+	int ret;
+
+	if (buf->snapshot)
+		return 0;
+
+	head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
+	if (WARN_ON_ONCE(head != local_read(&buf->head)))
+		return -EINVAL;
+
+	phys = &buf->buf[buf->cur_buf];
+	space = phys->offset + phys->displacement + phys->size - head;
+	pad = space;
+	if (space > handle->size) {
+		space = handle->size;
+		space -= space % BTS_RECORD_SIZE;
+	}
+	if (space <= BTS_SAFETY_MARGIN) {
+		/* See if next phys buffer has more space */
+		next_buf = buf->cur_buf + 1;
+		if (next_buf >= buf->nr_bufs)
+			next_buf = 0;
+		next_phys = &buf->buf[next_buf];
+		gap = buf_size(phys->page) - phys->displacement - phys->size +
+		      next_phys->displacement;
+		skip = pad + gap;
+		if (handle->size >= skip) {
+			next_space = next_phys->size;
+			if (next_space + skip > handle->size) {
+				next_space = handle->size - skip;
+				next_space -= next_space % BTS_RECORD_SIZE;
+			}
+			if (next_space > space || !space) {
+				if (pad)
+					bts_buffer_pad_out(phys, head);
+				ret = perf_aux_output_skip(handle, skip);
+				if (ret)
+					return ret;
+				/* Advance to next phys buffer */
+				phys = next_phys;
+				space = next_space;
+				head = phys->offset + phys->displacement;
+				/*
+				 * After this, cur_buf and head won't match ds
+				 * anymore, so we must not be racing with
+				 * bts_update().
+				 */
+				buf->cur_buf = next_buf;
+				local_set(&buf->head, head);
+			}
+		}
+	}
+
+	/* Don't go far beyond wakeup watermark */
+	wakeup = BTS_SAFETY_MARGIN + BTS_RECORD_SIZE + handle->wakeup -
+		 handle->head;
+	if (space > wakeup) {
+		space = wakeup;
+		space -= space % BTS_RECORD_SIZE;
+	}
+
+	buf->end = head + space;
+
+	/*
+	 * If we have no space, the lost notification would have been sent when
+	 * we hit absolute_maximum - see bts_update()
+	 */
+	if (!space)
+		return -ENOSPC;
+
+	return 0;
+}
+
+int intel_bts_interrupt(void)
+{
+	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+	struct perf_event *event = bts->handle.event;
+	struct bts_buffer *buf;
+	s64 old_head;
+	int err;
+
+	if (!event || !bts->started)
+		return 0;
+
+	buf = perf_get_aux(&bts->handle);
+	/*
+	 * Skip snapshot counters: they don't use the interrupt, but
+	 * there's no other way of telling, because the pointer will
+	 * keep moving
+	 */
+	if (!buf || buf->snapshot)
+		return 0;
+
+	old_head = local_read(&buf->head);
+	bts_update(bts);
+
+	/* no new data */
+	if (old_head == local_read(&buf->head))
+		return 0;
+
+	perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
+			    !!local_xchg(&buf->lost, 0));
+
+	buf = perf_aux_output_begin(&bts->handle, event);
+	if (!buf)
+		return 1;
+
+	err = bts_buffer_reset(buf, &bts->handle);
+	if (err)
+		perf_aux_output_end(&bts->handle, 0, false);
+
+	return 1;
+}
+
+static void bts_event_del(struct perf_event *event, int mode)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+	struct bts_buffer *buf = perf_get_aux(&bts->handle);
+
+	bts_event_stop(event, PERF_EF_UPDATE);
+
+	if (buf) {
+		if (buf->snapshot)
+			bts->handle.head =
+				local_xchg(&buf->data_size,
+					   buf->nr_pages << PAGE_SHIFT);
+		perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
+				    !!local_xchg(&buf->lost, 0));
+	}
+
+	cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
+	cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
+	cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
+	cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
+}
+
+static int bts_event_add(struct perf_event *event, int mode)
+{
+	struct bts_buffer *buf;
+	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int ret = -EBUSY;
+
+	event->hw.state = PERF_HES_STOPPED;
+
+	if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
+		return -EBUSY;
+
+	if (bts->handle.event)
+		return -EBUSY;
+
+	buf = perf_aux_output_begin(&bts->handle, event);
+	if (!buf)
+		return -EINVAL;
+
+	ret = bts_buffer_reset(buf, &bts->handle);
+	if (ret) {
+		perf_aux_output_end(&bts->handle, 0, false);
+		return ret;
+	}
+
+	bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
+	bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
+	bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
+
+	if (mode & PERF_EF_START) {
+		bts_event_start(event, 0);
+		if (hwc->state & PERF_HES_STOPPED) {
+			bts_event_del(event, 0);
+			return -EBUSY;
+		}
+	}
+
+	return 0;
+}
+
+static void bts_event_destroy(struct perf_event *event)
+{
+	x86_del_exclusive(x86_lbr_exclusive_bts);
+}
+
+static int bts_event_init(struct perf_event *event)
+{
+	if (event->attr.type != bts_pmu.type)
+		return -ENOENT;
+
+	if (x86_add_exclusive(x86_lbr_exclusive_bts))
+		return -EBUSY;
+
+	event->destroy = bts_event_destroy;
+
+	return 0;
+}
+
+static void bts_event_read(struct perf_event *event)
+{
+}
+
+static __init int bts_init(void)
+{
+	if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
+		return -ENODEV;
+
+	bts_pmu.capabilities	= PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE;
+	bts_pmu.task_ctx_nr	= perf_sw_context;
+	bts_pmu.event_init	= bts_event_init;
+	bts_pmu.add		= bts_event_add;
+	bts_pmu.del		= bts_event_del;
+	bts_pmu.start		= bts_event_start;
+	bts_pmu.stop		= bts_event_stop;
+	bts_pmu.read		= bts_event_read;
+	bts_pmu.setup_aux	= bts_buffer_setup_aux;
+	bts_pmu.free_aux	= bts_buffer_free_aux;
+
+	return perf_pmu_register(&bts_pmu, "intel_bts", -1);
+}
+
+module_init(bts_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
new file mode 100644
index 000000000000..e4d1b8b738fa
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -0,0 +1,1379 @@
+/*
+ * Intel Cache Quality-of-Service Monitoring (CQM) support.
+ *
+ * Based very, very heavily on work by Peter Zijlstra.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <asm/cpu_device_id.h>
+#include "perf_event.h"
+
+#define MSR_IA32_PQR_ASSOC	0x0c8f
+#define MSR_IA32_QM_CTR		0x0c8e
+#define MSR_IA32_QM_EVTSEL	0x0c8d
+
+static unsigned int cqm_max_rmid = -1;
+static unsigned int cqm_l3_scale; /* supposedly cacheline size */
+
+struct intel_cqm_state {
+	raw_spinlock_t		lock;
+	int			rmid;
+	int			cnt;
+};
+
+static DEFINE_PER_CPU(struct intel_cqm_state, cqm_state);
+
+/*
+ * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
+ * Also protects event->hw.cqm_rmid
+ *
+ * Hold either for stability, both for modification of ->hw.cqm_rmid.
+ */
+static DEFINE_MUTEX(cache_mutex);
+static DEFINE_RAW_SPINLOCK(cache_lock);
+
+/*
+ * Groups of events that have the same target(s), one RMID per group.
+ */
+static LIST_HEAD(cache_groups);
+
+/*
+ * Mask of CPUs for reading CQM values. We only need one per-socket.
+ */
+static cpumask_t cqm_cpumask;
+
+#define RMID_VAL_ERROR		(1ULL << 63)
+#define RMID_VAL_UNAVAIL	(1ULL << 62)
+
+#define QOS_L3_OCCUP_EVENT_ID	(1 << 0)
+
+#define QOS_EVENT_MASK	QOS_L3_OCCUP_EVENT_ID
+
+/*
+ * This is central to the rotation algorithm in __intel_cqm_rmid_rotate().
+ *
+ * This rmid is always free and is guaranteed to have an associated
+ * near-zero occupancy value, i.e. no cachelines are tagged with this
+ * RMID, once __intel_cqm_rmid_rotate() returns.
+ */
+static unsigned int intel_cqm_rotation_rmid;
+
+#define INVALID_RMID		(-1)
+
+/*
+ * Is @rmid valid for programming the hardware?
+ *
+ * rmid 0 is reserved by the hardware for all non-monitored tasks, which
+ * means that we should never come across an rmid with that value.
+ * Likewise, an rmid value of -1 is used to indicate "no rmid currently
+ * assigned" and is used as part of the rotation code.
+ */
+static inline bool __rmid_valid(unsigned int rmid)
+{
+	if (!rmid || rmid == INVALID_RMID)
+		return false;
+
+	return true;
+}
+
+static u64 __rmid_read(unsigned int rmid)
+{
+	u64 val;
+
+	/*
+	 * Ignore the SDM, this thing is _NOTHING_ like a regular perfcnt,
+	 * it just says that to increase confusion.
+	 */
+	wrmsr(MSR_IA32_QM_EVTSEL, QOS_L3_OCCUP_EVENT_ID, rmid);
+	rdmsrl(MSR_IA32_QM_CTR, val);
+
+	/*
+	 * Aside from the ERROR and UNAVAIL bits, assume this thing returns
+	 * the number of cachelines tagged with @rmid.
+	 */
+	return val;
+}
+
+enum rmid_recycle_state {
+	RMID_YOUNG = 0,
+	RMID_AVAILABLE,
+	RMID_DIRTY,
+};
+
+struct cqm_rmid_entry {
+	unsigned int rmid;
+	enum rmid_recycle_state state;
+	struct list_head list;
+	unsigned long queue_time;
+};
+
+/*
+ * cqm_rmid_free_lru - A least recently used list of RMIDs.
+ *
+ * Oldest entry at the head, newest (most recently used) entry at the
+ * tail. This list is never traversed, it's only used to keep track of
+ * the lru order. That is, we only pick entries of the head or insert
+ * them on the tail.
+ *
+ * All entries on the list are 'free', and their RMIDs are not currently
+ * in use. To mark an RMID as in use, remove its entry from the lru
+ * list.
+ *
+ *
+ * cqm_rmid_limbo_lru - list of currently unused but (potentially) dirty RMIDs.
+ *
+ * This list is contains RMIDs that no one is currently using but that
+ * may have a non-zero occupancy value associated with them. The
+ * rotation worker moves RMIDs from the limbo list to the free list once
+ * the occupancy value drops below __intel_cqm_threshold.
+ *
+ * Both lists are protected by cache_mutex.
+ */
+static LIST_HEAD(cqm_rmid_free_lru);
+static LIST_HEAD(cqm_rmid_limbo_lru);
+
+/*
+ * We use a simple array of pointers so that we can lookup a struct
+ * cqm_rmid_entry in O(1). This alleviates the callers of __get_rmid()
+ * and __put_rmid() from having to worry about dealing with struct
+ * cqm_rmid_entry - they just deal with rmids, i.e. integers.
+ *
+ * Once this array is initialized it is read-only. No locks are required
+ * to access it.
+ *
+ * All entries for all RMIDs can be looked up in the this array at all
+ * times.
+ */
+static struct cqm_rmid_entry **cqm_rmid_ptrs;
+
+static inline struct cqm_rmid_entry *__rmid_entry(int rmid)
+{
+	struct cqm_rmid_entry *entry;
+
+	entry = cqm_rmid_ptrs[rmid];
+	WARN_ON(entry->rmid != rmid);
+
+	return entry;
+}
+
+/*
+ * Returns < 0 on fail.
+ *
+ * We expect to be called with cache_mutex held.
+ */
+static int __get_rmid(void)
+{
+	struct cqm_rmid_entry *entry;
+
+	lockdep_assert_held(&cache_mutex);
+
+	if (list_empty(&cqm_rmid_free_lru))
+		return INVALID_RMID;
+
+	entry = list_first_entry(&cqm_rmid_free_lru, struct cqm_rmid_entry, list);
+	list_del(&entry->list);
+
+	return entry->rmid;
+}
+
+static void __put_rmid(unsigned int rmid)
+{
+	struct cqm_rmid_entry *entry;
+
+	lockdep_assert_held(&cache_mutex);
+
+	WARN_ON(!__rmid_valid(rmid));
+	entry = __rmid_entry(rmid);
+
+	entry->queue_time = jiffies;
+	entry->state = RMID_YOUNG;
+
+	list_add_tail(&entry->list, &cqm_rmid_limbo_lru);
+}
+
+static int intel_cqm_setup_rmid_cache(void)
+{
+	struct cqm_rmid_entry *entry;
+	unsigned int nr_rmids;
+	int r = 0;
+
+	nr_rmids = cqm_max_rmid + 1;
+	cqm_rmid_ptrs = kmalloc(sizeof(struct cqm_rmid_entry *) *
+				nr_rmids, GFP_KERNEL);
+	if (!cqm_rmid_ptrs)
+		return -ENOMEM;
+
+	for (; r <= cqm_max_rmid; r++) {
+		struct cqm_rmid_entry *entry;
+
+		entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+		if (!entry)
+			goto fail;
+
+		INIT_LIST_HEAD(&entry->list);
+		entry->rmid = r;
+		cqm_rmid_ptrs[r] = entry;
+
+		list_add_tail(&entry->list, &cqm_rmid_free_lru);
+	}
+
+	/*
+	 * RMID 0 is special and is always allocated. It's used for all
+	 * tasks that are not monitored.
+	 */
+	entry = __rmid_entry(0);
+	list_del(&entry->list);
+
+	mutex_lock(&cache_mutex);
+	intel_cqm_rotation_rmid = __get_rmid();
+	mutex_unlock(&cache_mutex);
+
+	return 0;
+fail:
+	while (r--)
+		kfree(cqm_rmid_ptrs[r]);
+
+	kfree(cqm_rmid_ptrs);
+	return -ENOMEM;
+}
+
+/*
+ * Determine if @a and @b measure the same set of tasks.
+ *
+ * If @a and @b measure the same set of tasks then we want to share a
+ * single RMID.
+ */
+static bool __match_event(struct perf_event *a, struct perf_event *b)
+{
+	/* Per-cpu and task events don't mix */
+	if ((a->attach_state & PERF_ATTACH_TASK) !=
+	    (b->attach_state & PERF_ATTACH_TASK))
+		return false;
+
+#ifdef CONFIG_CGROUP_PERF
+	if (a->cgrp != b->cgrp)
+		return false;
+#endif
+
+	/* If not task event, we're machine wide */
+	if (!(b->attach_state & PERF_ATTACH_TASK))
+		return true;
+
+	/*
+	 * Events that target same task are placed into the same cache group.
+	 */
+	if (a->hw.target == b->hw.target)
+		return true;
+
+	/*
+	 * Are we an inherited event?
+	 */
+	if (b->parent == a)
+		return true;
+
+	return false;
+}
+
+#ifdef CONFIG_CGROUP_PERF
+static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event)
+{
+	if (event->attach_state & PERF_ATTACH_TASK)
+		return perf_cgroup_from_task(event->hw.target);
+
+	return event->cgrp;
+}
+#endif
+
+/*
+ * Determine if @a's tasks intersect with @b's tasks
+ *
+ * There are combinations of events that we explicitly prohibit,
+ *
+ *		   PROHIBITS
+ *     system-wide    -> 	cgroup and task
+ *     cgroup 	      ->	system-wide
+ *     		      ->	task in cgroup
+ *     task 	      -> 	system-wide
+ *     		      ->	task in cgroup
+ *
+ * Call this function before allocating an RMID.
+ */
+static bool __conflict_event(struct perf_event *a, struct perf_event *b)
+{
+#ifdef CONFIG_CGROUP_PERF
+	/*
+	 * We can have any number of cgroups but only one system-wide
+	 * event at a time.
+	 */
+	if (a->cgrp && b->cgrp) {
+		struct perf_cgroup *ac = a->cgrp;
+		struct perf_cgroup *bc = b->cgrp;
+
+		/*
+		 * This condition should have been caught in
+		 * __match_event() and we should be sharing an RMID.
+		 */
+		WARN_ON_ONCE(ac == bc);
+
+		if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
+		    cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
+			return true;
+
+		return false;
+	}
+
+	if (a->cgrp || b->cgrp) {
+		struct perf_cgroup *ac, *bc;
+
+		/*
+		 * cgroup and system-wide events are mutually exclusive
+		 */
+		if ((a->cgrp && !(b->attach_state & PERF_ATTACH_TASK)) ||
+		    (b->cgrp && !(a->attach_state & PERF_ATTACH_TASK)))
+			return true;
+
+		/*
+		 * Ensure neither event is part of the other's cgroup
+		 */
+		ac = event_to_cgroup(a);
+		bc = event_to_cgroup(b);
+		if (ac == bc)
+			return true;
+
+		/*
+		 * Must have cgroup and non-intersecting task events.
+		 */
+		if (!ac || !bc)
+			return false;
+
+		/*
+		 * We have cgroup and task events, and the task belongs
+		 * to a cgroup. Check for for overlap.
+		 */
+		if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
+		    cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
+			return true;
+
+		return false;
+	}
+#endif
+	/*
+	 * If one of them is not a task, same story as above with cgroups.
+	 */
+	if (!(a->attach_state & PERF_ATTACH_TASK) ||
+	    !(b->attach_state & PERF_ATTACH_TASK))
+		return true;
+
+	/*
+	 * Must be non-overlapping.
+	 */
+	return false;
+}
+
+struct rmid_read {
+	unsigned int rmid;
+	atomic64_t value;
+};
+
+static void __intel_cqm_event_count(void *info);
+
+/*
+ * Exchange the RMID of a group of events.
+ */
+static unsigned int
+intel_cqm_xchg_rmid(struct perf_event *group, unsigned int rmid)
+{
+	struct perf_event *event;
+	unsigned int old_rmid = group->hw.cqm_rmid;
+	struct list_head *head = &group->hw.cqm_group_entry;
+
+	lockdep_assert_held(&cache_mutex);
+
+	/*
+	 * If our RMID is being deallocated, perform a read now.
+	 */
+	if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) {
+		struct rmid_read rr = {
+			.value = ATOMIC64_INIT(0),
+			.rmid = old_rmid,
+		};
+
+		on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count,
+				 &rr, 1);
+		local64_set(&group->count, atomic64_read(&rr.value));
+	}
+
+	raw_spin_lock_irq(&cache_lock);
+
+	group->hw.cqm_rmid = rmid;
+	list_for_each_entry(event, head, hw.cqm_group_entry)
+		event->hw.cqm_rmid = rmid;
+
+	raw_spin_unlock_irq(&cache_lock);
+
+	return old_rmid;
+}
+
+/*
+ * If we fail to assign a new RMID for intel_cqm_rotation_rmid because
+ * cachelines are still tagged with RMIDs in limbo, we progressively
+ * increment the threshold until we find an RMID in limbo with <=
+ * __intel_cqm_threshold lines tagged. This is designed to mitigate the
+ * problem where cachelines tagged with an RMID are not steadily being
+ * evicted.
+ *
+ * On successful rotations we decrease the threshold back towards zero.
+ *
+ * __intel_cqm_max_threshold provides an upper bound on the threshold,
+ * and is measured in bytes because it's exposed to userland.
+ */
+static unsigned int __intel_cqm_threshold;
+static unsigned int __intel_cqm_max_threshold;
+
+/*
+ * Test whether an RMID has a zero occupancy value on this cpu.
+ */
+static void intel_cqm_stable(void *arg)
+{
+	struct cqm_rmid_entry *entry;
+
+	list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
+		if (entry->state != RMID_AVAILABLE)
+			break;
+
+		if (__rmid_read(entry->rmid) > __intel_cqm_threshold)
+			entry->state = RMID_DIRTY;
+	}
+}
+
+/*
+ * If we have group events waiting for an RMID that don't conflict with
+ * events already running, assign @rmid.
+ */
+static bool intel_cqm_sched_in_event(unsigned int rmid)
+{
+	struct perf_event *leader, *event;
+
+	lockdep_assert_held(&cache_mutex);
+
+	leader = list_first_entry(&cache_groups, struct perf_event,
+				  hw.cqm_groups_entry);
+	event = leader;
+
+	list_for_each_entry_continue(event, &cache_groups,
+				     hw.cqm_groups_entry) {
+		if (__rmid_valid(event->hw.cqm_rmid))
+			continue;
+
+		if (__conflict_event(event, leader))
+			continue;
+
+		intel_cqm_xchg_rmid(event, rmid);
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * Initially use this constant for both the limbo queue time and the
+ * rotation timer interval, pmu::hrtimer_interval_ms.
+ *
+ * They don't need to be the same, but the two are related since if you
+ * rotate faster than you recycle RMIDs, you may run out of available
+ * RMIDs.
+ */
+#define RMID_DEFAULT_QUEUE_TIME 250	/* ms */
+
+static unsigned int __rmid_queue_time_ms = RMID_DEFAULT_QUEUE_TIME;
+
+/*
+ * intel_cqm_rmid_stabilize - move RMIDs from limbo to free list
+ * @nr_available: number of freeable RMIDs on the limbo list
+ *
+ * Quiescent state; wait for all 'freed' RMIDs to become unused, i.e. no
+ * cachelines are tagged with those RMIDs. After this we can reuse them
+ * and know that the current set of active RMIDs is stable.
+ *
+ * Return %true or %false depending on whether stabilization needs to be
+ * reattempted.
+ *
+ * If we return %true then @nr_available is updated to indicate the
+ * number of RMIDs on the limbo list that have been queued for the
+ * minimum queue time (RMID_AVAILABLE), but whose data occupancy values
+ * are above __intel_cqm_threshold.
+ */
+static bool intel_cqm_rmid_stabilize(unsigned int *available)
+{
+	struct cqm_rmid_entry *entry, *tmp;
+
+	lockdep_assert_held(&cache_mutex);
+
+	*available = 0;
+	list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
+		unsigned long min_queue_time;
+		unsigned long now = jiffies;
+
+		/*
+		 * We hold RMIDs placed into limbo for a minimum queue
+		 * time. Before the minimum queue time has elapsed we do
+		 * not recycle RMIDs.
+		 *
+		 * The reasoning is that until a sufficient time has
+		 * passed since we stopped using an RMID, any RMID
+		 * placed onto the limbo list will likely still have
+		 * data tagged in the cache, which means we'll probably
+		 * fail to recycle it anyway.
+		 *
+		 * We can save ourselves an expensive IPI by skipping
+		 * any RMIDs that have not been queued for the minimum
+		 * time.
+		 */
+		min_queue_time = entry->queue_time +
+			msecs_to_jiffies(__rmid_queue_time_ms);
+
+		if (time_after(min_queue_time, now))
+			break;
+
+		entry->state = RMID_AVAILABLE;
+		(*available)++;
+	}
+
+	/*
+	 * Fast return if none of the RMIDs on the limbo list have been
+	 * sitting on the queue for the minimum queue time.
+	 */
+	if (!*available)
+		return false;
+
+	/*
+	 * Test whether an RMID is free for each package.
+	 */
+	on_each_cpu_mask(&cqm_cpumask, intel_cqm_stable, NULL, true);
+
+	list_for_each_entry_safe(entry, tmp, &cqm_rmid_limbo_lru, list) {
+		/*
+		 * Exhausted all RMIDs that have waited min queue time.
+		 */
+		if (entry->state == RMID_YOUNG)
+			break;
+
+		if (entry->state == RMID_DIRTY)
+			continue;
+
+		list_del(&entry->list);	/* remove from limbo */
+
+		/*
+		 * The rotation RMID gets priority if it's
+		 * currently invalid. In which case, skip adding
+		 * the RMID to the the free lru.
+		 */
+		if (!__rmid_valid(intel_cqm_rotation_rmid)) {
+			intel_cqm_rotation_rmid = entry->rmid;
+			continue;
+		}
+
+		/*
+		 * If we have groups waiting for RMIDs, hand
+		 * them one now provided they don't conflict.
+		 */
+		if (intel_cqm_sched_in_event(entry->rmid))
+			continue;
+
+		/*
+		 * Otherwise place it onto the free list.
+		 */
+		list_add_tail(&entry->list, &cqm_rmid_free_lru);
+	}
+
+
+	return __rmid_valid(intel_cqm_rotation_rmid);
+}
+
+/*
+ * Pick a victim group and move it to the tail of the group list.
+ * @next: The first group without an RMID
+ */
+static void __intel_cqm_pick_and_rotate(struct perf_event *next)
+{
+	struct perf_event *rotor;
+	unsigned int rmid;
+
+	lockdep_assert_held(&cache_mutex);
+
+	rotor = list_first_entry(&cache_groups, struct perf_event,
+				 hw.cqm_groups_entry);
+
+	/*
+	 * The group at the front of the list should always have a valid
+	 * RMID. If it doesn't then no groups have RMIDs assigned and we
+	 * don't need to rotate the list.
+	 */
+	if (next == rotor)
+		return;
+
+	rmid = intel_cqm_xchg_rmid(rotor, INVALID_RMID);
+	__put_rmid(rmid);
+
+	list_rotate_left(&cache_groups);
+}
+
+/*
+ * Deallocate the RMIDs from any events that conflict with @event, and
+ * place them on the back of the group list.
+ */
+static void intel_cqm_sched_out_conflicting_events(struct perf_event *event)
+{
+	struct perf_event *group, *g;
+	unsigned int rmid;
+
+	lockdep_assert_held(&cache_mutex);
+
+	list_for_each_entry_safe(group, g, &cache_groups, hw.cqm_groups_entry) {
+		if (group == event)
+			continue;
+
+		rmid = group->hw.cqm_rmid;
+
+		/*
+		 * Skip events that don't have a valid RMID.
+		 */
+		if (!__rmid_valid(rmid))
+			continue;
+
+		/*
+		 * No conflict? No problem! Leave the event alone.
+		 */
+		if (!__conflict_event(group, event))
+			continue;
+
+		intel_cqm_xchg_rmid(group, INVALID_RMID);
+		__put_rmid(rmid);
+	}
+}
+
+/*
+ * Attempt to rotate the groups and assign new RMIDs.
+ *
+ * We rotate for two reasons,
+ *   1. To handle the scheduling of conflicting events
+ *   2. To recycle RMIDs
+ *
+ * Rotating RMIDs is complicated because the hardware doesn't give us
+ * any clues.
+ *
+ * There's problems with the hardware interface; when you change the
+ * task:RMID map cachelines retain their 'old' tags, giving a skewed
+ * picture. In order to work around this, we must always keep one free
+ * RMID - intel_cqm_rotation_rmid.
+ *
+ * Rotation works by taking away an RMID from a group (the old RMID),
+ * and assigning the free RMID to another group (the new RMID). We must
+ * then wait for the old RMID to not be used (no cachelines tagged).
+ * This ensure that all cachelines are tagged with 'active' RMIDs. At
+ * this point we can start reading values for the new RMID and treat the
+ * old RMID as the free RMID for the next rotation.
+ *
+ * Return %true or %false depending on whether we did any rotating.
+ */
+static bool __intel_cqm_rmid_rotate(void)
+{
+	struct perf_event *group, *start = NULL;
+	unsigned int threshold_limit;
+	unsigned int nr_needed = 0;
+	unsigned int nr_available;
+	bool rotated = false;
+
+	mutex_lock(&cache_mutex);
+
+again:
+	/*
+	 * Fast path through this function if there are no groups and no
+	 * RMIDs that need cleaning.
+	 */
+	if (list_empty(&cache_groups) && list_empty(&cqm_rmid_limbo_lru))
+		goto out;
+
+	list_for_each_entry(group, &cache_groups, hw.cqm_groups_entry) {
+		if (!__rmid_valid(group->hw.cqm_rmid)) {
+			if (!start)
+				start = group;
+			nr_needed++;
+		}
+	}
+
+	/*
+	 * We have some event groups, but they all have RMIDs assigned
+	 * and no RMIDs need cleaning.
+	 */
+	if (!nr_needed && list_empty(&cqm_rmid_limbo_lru))
+		goto out;
+
+	if (!nr_needed)
+		goto stabilize;
+
+	/*
+	 * We have more event groups without RMIDs than available RMIDs,
+	 * or we have event groups that conflict with the ones currently
+	 * scheduled.
+	 *
+	 * We force deallocate the rmid of the group at the head of
+	 * cache_groups. The first event group without an RMID then gets
+	 * assigned intel_cqm_rotation_rmid. This ensures we always make
+	 * forward progress.
+	 *
+	 * Rotate the cache_groups list so the previous head is now the
+	 * tail.
+	 */
+	__intel_cqm_pick_and_rotate(start);
+
+	/*
+	 * If the rotation is going to succeed, reduce the threshold so
+	 * that we don't needlessly reuse dirty RMIDs.
+	 */
+	if (__rmid_valid(intel_cqm_rotation_rmid)) {
+		intel_cqm_xchg_rmid(start, intel_cqm_rotation_rmid);
+		intel_cqm_rotation_rmid = __get_rmid();
+
+		intel_cqm_sched_out_conflicting_events(start);
+
+		if (__intel_cqm_threshold)
+			__intel_cqm_threshold--;
+	}
+
+	rotated = true;
+
+stabilize:
+	/*
+	 * We now need to stablize the RMID we freed above (if any) to
+	 * ensure that the next time we rotate we have an RMID with zero
+	 * occupancy value.
+	 *
+	 * Alternatively, if we didn't need to perform any rotation,
+	 * we'll have a bunch of RMIDs in limbo that need stabilizing.
+	 */
+	threshold_limit = __intel_cqm_max_threshold / cqm_l3_scale;
+
+	while (intel_cqm_rmid_stabilize(&nr_available) &&
+	       __intel_cqm_threshold < threshold_limit) {
+		unsigned int steal_limit;
+
+		/*
+		 * Don't spin if nobody is actively waiting for an RMID,
+		 * the rotation worker will be kicked as soon as an
+		 * event needs an RMID anyway.
+		 */
+		if (!nr_needed)
+			break;
+
+		/* Allow max 25% of RMIDs to be in limbo. */
+		steal_limit = (cqm_max_rmid + 1) / 4;
+
+		/*
+		 * We failed to stabilize any RMIDs so our rotation
+		 * logic is now stuck. In order to make forward progress
+		 * we have a few options:
+		 *
+		 *   1. rotate ("steal") another RMID
+		 *   2. increase the threshold
+		 *   3. do nothing
+		 *
+		 * We do both of 1. and 2. until we hit the steal limit.
+		 *
+		 * The steal limit prevents all RMIDs ending up on the
+		 * limbo list. This can happen if every RMID has a
+		 * non-zero occupancy above threshold_limit, and the
+		 * occupancy values aren't dropping fast enough.
+		 *
+		 * Note that there is prioritisation at work here - we'd
+		 * rather increase the number of RMIDs on the limbo list
+		 * than increase the threshold, because increasing the
+		 * threshold skews the event data (because we reuse
+		 * dirty RMIDs) - threshold bumps are a last resort.
+		 */
+		if (nr_available < steal_limit)
+			goto again;
+
+		__intel_cqm_threshold++;
+	}
+
+out:
+	mutex_unlock(&cache_mutex);
+	return rotated;
+}
+
+static void intel_cqm_rmid_rotate(struct work_struct *work);
+
+static DECLARE_DELAYED_WORK(intel_cqm_rmid_work, intel_cqm_rmid_rotate);
+
+static struct pmu intel_cqm_pmu;
+
+static void intel_cqm_rmid_rotate(struct work_struct *work)
+{
+	unsigned long delay;
+
+	__intel_cqm_rmid_rotate();
+
+	delay = msecs_to_jiffies(intel_cqm_pmu.hrtimer_interval_ms);
+	schedule_delayed_work(&intel_cqm_rmid_work, delay);
+}
+
+/*
+ * Find a group and setup RMID.
+ *
+ * If we're part of a group, we use the group's RMID.
+ */
+static void intel_cqm_setup_event(struct perf_event *event,
+				  struct perf_event **group)
+{
+	struct perf_event *iter;
+	unsigned int rmid;
+	bool conflict = false;
+
+	list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
+		rmid = iter->hw.cqm_rmid;
+
+		if (__match_event(iter, event)) {
+			/* All tasks in a group share an RMID */
+			event->hw.cqm_rmid = rmid;
+			*group = iter;
+			return;
+		}
+
+		/*
+		 * We only care about conflicts for events that are
+		 * actually scheduled in (and hence have a valid RMID).
+		 */
+		if (__conflict_event(iter, event) && __rmid_valid(rmid))
+			conflict = true;
+	}
+
+	if (conflict)
+		rmid = INVALID_RMID;
+	else
+		rmid = __get_rmid();
+
+	event->hw.cqm_rmid = rmid;
+}
+
+static void intel_cqm_event_read(struct perf_event *event)
+{
+	unsigned long flags;
+	unsigned int rmid;
+	u64 val;
+
+	/*
+	 * Task events are handled by intel_cqm_event_count().
+	 */
+	if (event->cpu == -1)
+		return;
+
+	raw_spin_lock_irqsave(&cache_lock, flags);
+	rmid = event->hw.cqm_rmid;
+
+	if (!__rmid_valid(rmid))
+		goto out;
+
+	val = __rmid_read(rmid);
+
+	/*
+	 * Ignore this reading on error states and do not update the value.
+	 */
+	if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
+		goto out;
+
+	local64_set(&event->count, val);
+out:
+	raw_spin_unlock_irqrestore(&cache_lock, flags);
+}
+
+static void __intel_cqm_event_count(void *info)
+{
+	struct rmid_read *rr = info;
+	u64 val;
+
+	val = __rmid_read(rr->rmid);
+
+	if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
+		return;
+
+	atomic64_add(val, &rr->value);
+}
+
+static inline bool cqm_group_leader(struct perf_event *event)
+{
+	return !list_empty(&event->hw.cqm_groups_entry);
+}
+
+static u64 intel_cqm_event_count(struct perf_event *event)
+{
+	unsigned long flags;
+	struct rmid_read rr = {
+		.value = ATOMIC64_INIT(0),
+	};
+
+	/*
+	 * We only need to worry about task events. System-wide events
+	 * are handled like usual, i.e. entirely with
+	 * intel_cqm_event_read().
+	 */
+	if (event->cpu != -1)
+		return __perf_event_count(event);
+
+	/*
+	 * Only the group leader gets to report values. This stops us
+	 * reporting duplicate values to userspace, and gives us a clear
+	 * rule for which task gets to report the values.
+	 *
+	 * Note that it is impossible to attribute these values to
+	 * specific packages - we forfeit that ability when we create
+	 * task events.
+	 */
+	if (!cqm_group_leader(event))
+		return 0;
+
+	/*
+	 * Notice that we don't perform the reading of an RMID
+	 * atomically, because we can't hold a spin lock across the
+	 * IPIs.
+	 *
+	 * Speculatively perform the read, since @event might be
+	 * assigned a different (possibly invalid) RMID while we're
+	 * busying performing the IPI calls. It's therefore necessary to
+	 * check @event's RMID afterwards, and if it has changed,
+	 * discard the result of the read.
+	 */
+	rr.rmid = ACCESS_ONCE(event->hw.cqm_rmid);
+
+	if (!__rmid_valid(rr.rmid))
+		goto out;
+
+	on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1);
+
+	raw_spin_lock_irqsave(&cache_lock, flags);
+	if (event->hw.cqm_rmid == rr.rmid)
+		local64_set(&event->count, atomic64_read(&rr.value));
+	raw_spin_unlock_irqrestore(&cache_lock, flags);
+out:
+	return __perf_event_count(event);
+}
+
+static void intel_cqm_event_start(struct perf_event *event, int mode)
+{
+	struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
+	unsigned int rmid = event->hw.cqm_rmid;
+	unsigned long flags;
+
+	if (!(event->hw.cqm_state & PERF_HES_STOPPED))
+		return;
+
+	event->hw.cqm_state &= ~PERF_HES_STOPPED;
+
+	raw_spin_lock_irqsave(&state->lock, flags);
+
+	if (state->cnt++)
+		WARN_ON_ONCE(state->rmid != rmid);
+	else
+		WARN_ON_ONCE(state->rmid);
+
+	state->rmid = rmid;
+	wrmsrl(MSR_IA32_PQR_ASSOC, state->rmid);
+
+	raw_spin_unlock_irqrestore(&state->lock, flags);
+}
+
+static void intel_cqm_event_stop(struct perf_event *event, int mode)
+{
+	struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
+	unsigned long flags;
+
+	if (event->hw.cqm_state & PERF_HES_STOPPED)
+		return;
+
+	event->hw.cqm_state |= PERF_HES_STOPPED;
+
+	raw_spin_lock_irqsave(&state->lock, flags);
+	intel_cqm_event_read(event);
+
+	if (!--state->cnt) {
+		state->rmid = 0;
+		wrmsrl(MSR_IA32_PQR_ASSOC, 0);
+	} else {
+		WARN_ON_ONCE(!state->rmid);
+	}
+
+	raw_spin_unlock_irqrestore(&state->lock, flags);
+}
+
+static int intel_cqm_event_add(struct perf_event *event, int mode)
+{
+	unsigned long flags;
+	unsigned int rmid;
+
+	raw_spin_lock_irqsave(&cache_lock, flags);
+
+	event->hw.cqm_state = PERF_HES_STOPPED;
+	rmid = event->hw.cqm_rmid;
+
+	if (__rmid_valid(rmid) && (mode & PERF_EF_START))
+		intel_cqm_event_start(event, mode);
+
+	raw_spin_unlock_irqrestore(&cache_lock, flags);
+
+	return 0;
+}
+
+static void intel_cqm_event_del(struct perf_event *event, int mode)
+{
+	intel_cqm_event_stop(event, mode);
+}
+
+static void intel_cqm_event_destroy(struct perf_event *event)
+{
+	struct perf_event *group_other = NULL;
+
+	mutex_lock(&cache_mutex);
+
+	/*
+	 * If there's another event in this group...
+	 */
+	if (!list_empty(&event->hw.cqm_group_entry)) {
+		group_other = list_first_entry(&event->hw.cqm_group_entry,
+					       struct perf_event,
+					       hw.cqm_group_entry);
+		list_del(&event->hw.cqm_group_entry);
+	}
+
+	/*
+	 * And we're the group leader..
+	 */
+	if (cqm_group_leader(event)) {
+		/*
+		 * If there was a group_other, make that leader, otherwise
+		 * destroy the group and return the RMID.
+		 */
+		if (group_other) {
+			list_replace(&event->hw.cqm_groups_entry,
+				     &group_other->hw.cqm_groups_entry);
+		} else {
+			unsigned int rmid = event->hw.cqm_rmid;
+
+			if (__rmid_valid(rmid))
+				__put_rmid(rmid);
+			list_del(&event->hw.cqm_groups_entry);
+		}
+	}
+
+	mutex_unlock(&cache_mutex);
+}
+
+static int intel_cqm_event_init(struct perf_event *event)
+{
+	struct perf_event *group = NULL;
+	bool rotate = false;
+
+	if (event->attr.type != intel_cqm_pmu.type)
+		return -ENOENT;
+
+	if (event->attr.config & ~QOS_EVENT_MASK)
+		return -EINVAL;
+
+	/* unsupported modes and filters */
+	if (event->attr.exclude_user   ||
+	    event->attr.exclude_kernel ||
+	    event->attr.exclude_hv     ||
+	    event->attr.exclude_idle   ||
+	    event->attr.exclude_host   ||
+	    event->attr.exclude_guest  ||
+	    event->attr.sample_period) /* no sampling */
+		return -EINVAL;
+
+	INIT_LIST_HEAD(&event->hw.cqm_group_entry);
+	INIT_LIST_HEAD(&event->hw.cqm_groups_entry);
+
+	event->destroy = intel_cqm_event_destroy;
+
+	mutex_lock(&cache_mutex);
+
+	/* Will also set rmid */
+	intel_cqm_setup_event(event, &group);
+
+	if (group) {
+		list_add_tail(&event->hw.cqm_group_entry,
+			      &group->hw.cqm_group_entry);
+	} else {
+		list_add_tail(&event->hw.cqm_groups_entry,
+			      &cache_groups);
+
+		/*
+		 * All RMIDs are either in use or have recently been
+		 * used. Kick the rotation worker to clean/free some.
+		 *
+		 * We only do this for the group leader, rather than for
+		 * every event in a group to save on needless work.
+		 */
+		if (!__rmid_valid(event->hw.cqm_rmid))
+			rotate = true;
+	}
+
+	mutex_unlock(&cache_mutex);
+
+	if (rotate)
+		schedule_delayed_work(&intel_cqm_rmid_work, 0);
+
+	return 0;
+}
+
+EVENT_ATTR_STR(llc_occupancy, intel_cqm_llc, "event=0x01");
+EVENT_ATTR_STR(llc_occupancy.per-pkg, intel_cqm_llc_pkg, "1");
+EVENT_ATTR_STR(llc_occupancy.unit, intel_cqm_llc_unit, "Bytes");
+EVENT_ATTR_STR(llc_occupancy.scale, intel_cqm_llc_scale, NULL);
+EVENT_ATTR_STR(llc_occupancy.snapshot, intel_cqm_llc_snapshot, "1");
+
+static struct attribute *intel_cqm_events_attr[] = {
+	EVENT_PTR(intel_cqm_llc),
+	EVENT_PTR(intel_cqm_llc_pkg),
+	EVENT_PTR(intel_cqm_llc_unit),
+	EVENT_PTR(intel_cqm_llc_scale),
+	EVENT_PTR(intel_cqm_llc_snapshot),
+	NULL,
+};
+
+static struct attribute_group intel_cqm_events_group = {
+	.name = "events",
+	.attrs = intel_cqm_events_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+static struct attribute *intel_cqm_formats_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group intel_cqm_format_group = {
+	.name = "format",
+	.attrs = intel_cqm_formats_attr,
+};
+
+static ssize_t
+max_recycle_threshold_show(struct device *dev, struct device_attribute *attr,
+			   char *page)
+{
+	ssize_t rv;
+
+	mutex_lock(&cache_mutex);
+	rv = snprintf(page, PAGE_SIZE-1, "%u\n", __intel_cqm_max_threshold);
+	mutex_unlock(&cache_mutex);
+
+	return rv;
+}
+
+static ssize_t
+max_recycle_threshold_store(struct device *dev,
+			    struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	unsigned int bytes, cachelines;
+	int ret;
+
+	ret = kstrtouint(buf, 0, &bytes);
+	if (ret)
+		return ret;
+
+	mutex_lock(&cache_mutex);
+
+	__intel_cqm_max_threshold = bytes;
+	cachelines = bytes / cqm_l3_scale;
+
+	/*
+	 * The new maximum takes effect immediately.
+	 */
+	if (__intel_cqm_threshold > cachelines)
+		__intel_cqm_threshold = cachelines;
+
+	mutex_unlock(&cache_mutex);
+
+	return count;
+}
+
+static DEVICE_ATTR_RW(max_recycle_threshold);
+
+static struct attribute *intel_cqm_attrs[] = {
+	&dev_attr_max_recycle_threshold.attr,
+	NULL,
+};
+
+static const struct attribute_group intel_cqm_group = {
+	.attrs = intel_cqm_attrs,
+};
+
+static const struct attribute_group *intel_cqm_attr_groups[] = {
+	&intel_cqm_events_group,
+	&intel_cqm_format_group,
+	&intel_cqm_group,
+	NULL,
+};
+
+static struct pmu intel_cqm_pmu = {
+	.hrtimer_interval_ms = RMID_DEFAULT_QUEUE_TIME,
+	.attr_groups	     = intel_cqm_attr_groups,
+	.task_ctx_nr	     = perf_sw_context,
+	.event_init	     = intel_cqm_event_init,
+	.add		     = intel_cqm_event_add,
+	.del		     = intel_cqm_event_del,
+	.start		     = intel_cqm_event_start,
+	.stop		     = intel_cqm_event_stop,
+	.read		     = intel_cqm_event_read,
+	.count		     = intel_cqm_event_count,
+};
+
+static inline void cqm_pick_event_reader(int cpu)
+{
+	int phys_id = topology_physical_package_id(cpu);
+	int i;
+
+	for_each_cpu(i, &cqm_cpumask) {
+		if (phys_id == topology_physical_package_id(i))
+			return;	/* already got reader for this socket */
+	}
+
+	cpumask_set_cpu(cpu, &cqm_cpumask);
+}
+
+static void intel_cqm_cpu_prepare(unsigned int cpu)
+{
+	struct intel_cqm_state *state = &per_cpu(cqm_state, cpu);
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+	raw_spin_lock_init(&state->lock);
+	state->rmid = 0;
+	state->cnt  = 0;
+
+	WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid);
+	WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale);
+}
+
+static void intel_cqm_cpu_exit(unsigned int cpu)
+{
+	int phys_id = topology_physical_package_id(cpu);
+	int i;
+
+	/*
+	 * Is @cpu a designated cqm reader?
+	 */
+	if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask))
+		return;
+
+	for_each_online_cpu(i) {
+		if (i == cpu)
+			continue;
+
+		if (phys_id == topology_physical_package_id(i)) {
+			cpumask_set_cpu(i, &cqm_cpumask);
+			break;
+		}
+	}
+}
+
+static int intel_cqm_cpu_notifier(struct notifier_block *nb,
+				  unsigned long action, void *hcpu)
+{
+	unsigned int cpu  = (unsigned long)hcpu;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_UP_PREPARE:
+		intel_cqm_cpu_prepare(cpu);
+		break;
+	case CPU_DOWN_PREPARE:
+		intel_cqm_cpu_exit(cpu);
+		break;
+	case CPU_STARTING:
+		cqm_pick_event_reader(cpu);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static const struct x86_cpu_id intel_cqm_match[] = {
+	{ .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_OCCUP_LLC },
+	{}
+};
+
+static int __init intel_cqm_init(void)
+{
+	char *str, scale[20];
+	int i, cpu, ret;
+
+	if (!x86_match_cpu(intel_cqm_match))
+		return -ENODEV;
+
+	cqm_l3_scale = boot_cpu_data.x86_cache_occ_scale;
+
+	/*
+	 * It's possible that not all resources support the same number
+	 * of RMIDs. Instead of making scheduling much more complicated
+	 * (where we have to match a task's RMID to a cpu that supports
+	 * that many RMIDs) just find the minimum RMIDs supported across
+	 * all cpus.
+	 *
+	 * Also, check that the scales match on all cpus.
+	 */
+	cpu_notifier_register_begin();
+
+	for_each_online_cpu(cpu) {
+		struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+		if (c->x86_cache_max_rmid < cqm_max_rmid)
+			cqm_max_rmid = c->x86_cache_max_rmid;
+
+		if (c->x86_cache_occ_scale != cqm_l3_scale) {
+			pr_err("Multiple LLC scale values, disabling\n");
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+
+	/*
+	 * A reasonable upper limit on the max threshold is the number
+	 * of lines tagged per RMID if all RMIDs have the same number of
+	 * lines tagged in the LLC.
+	 *
+	 * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
+	 */
+	__intel_cqm_max_threshold =
+		boot_cpu_data.x86_cache_size * 1024 / (cqm_max_rmid + 1);
+
+	snprintf(scale, sizeof(scale), "%u", cqm_l3_scale);
+	str = kstrdup(scale, GFP_KERNEL);
+	if (!str) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	event_attr_intel_cqm_llc_scale.event_str = str;
+
+	ret = intel_cqm_setup_rmid_cache();
+	if (ret)
+		goto out;
+
+	for_each_online_cpu(i) {
+		intel_cqm_cpu_prepare(i);
+		cqm_pick_event_reader(i);
+	}
+
+	__perf_cpu_notifier(intel_cqm_cpu_notifier);
+
+	ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
+	if (ret)
+		pr_err("Intel CQM perf registration failed: %d\n", ret);
+	else
+		pr_info("Intel CQM monitoring enabled\n");
+
+out:
+	cpu_notifier_register_done();
+
+	return ret;
+}
+device_initcall(intel_cqm_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 073983398364..813f75d71175 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -461,7 +461,8 @@ void intel_pmu_enable_bts(u64 config)
 
 	debugctlmsr |= DEBUGCTLMSR_TR;
 	debugctlmsr |= DEBUGCTLMSR_BTS;
-	debugctlmsr |= DEBUGCTLMSR_BTINT;
+	if (config & ARCH_PERFMON_EVENTSEL_INT)
+		debugctlmsr |= DEBUGCTLMSR_BTINT;
 
 	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
 		debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
@@ -557,6 +558,8 @@ struct event_constraint intel_core2_pebs_event_constraints[] = {
 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
 	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
+	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+	INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
 	EVENT_CONSTRAINT_END
 };
 
@@ -564,6 +567,8 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
 	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
+	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+	INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
 	EVENT_CONSTRAINT_END
 };
 
@@ -587,6 +592,8 @@ struct event_constraint intel_nehalem_pebs_event_constraints[] = {
 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
 	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
 	INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
+	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+	INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
 	EVENT_CONSTRAINT_END
 };
 
@@ -602,6 +609,8 @@ struct event_constraint intel_westmere_pebs_event_constraints[] = {
 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
 	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
 	INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
+	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+	INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
 	EVENT_CONSTRAINT_END
 };
 
@@ -611,6 +620,10 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
 	INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
 	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
 	INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+        INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
+        INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
+        INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+        INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
 	/* Allow all events as PEBS with no flags */
 	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
 	EVENT_CONSTRAINT_END
@@ -622,6 +635,10 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
 	INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
 	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
 	INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+	INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
 	/* Allow all events as PEBS with no flags */
 	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
         EVENT_CONSTRAINT_END
@@ -633,16 +650,16 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = {
 	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
 	INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
-	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
-	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
-	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
-	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
-	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
-	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
-	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
-	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
-	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
-	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
+	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
+	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
+	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
 	/* Allow all events as PEBS with no flags */
 	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
 	EVENT_CONSTRAINT_END
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 58f1a94beaf0..94e5b506caa6 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -39,6 +39,7 @@ static enum {
 #define LBR_IND_JMP_BIT		6 /* do not capture indirect jumps */
 #define LBR_REL_JMP_BIT		7 /* do not capture relative jumps */
 #define LBR_FAR_BIT		8 /* do not capture far branches */
+#define LBR_CALL_STACK_BIT	9 /* enable call stack */
 
 #define LBR_KERNEL	(1 << LBR_KERNEL_BIT)
 #define LBR_USER	(1 << LBR_USER_BIT)
@@ -49,6 +50,7 @@ static enum {
 #define LBR_REL_JMP	(1 << LBR_REL_JMP_BIT)
 #define LBR_IND_JMP	(1 << LBR_IND_JMP_BIT)
 #define LBR_FAR		(1 << LBR_FAR_BIT)
+#define LBR_CALL_STACK	(1 << LBR_CALL_STACK_BIT)
 
 #define LBR_PLM (LBR_KERNEL | LBR_USER)
 
@@ -69,33 +71,31 @@ static enum {
 #define LBR_FROM_FLAG_IN_TX    (1ULL << 62)
 #define LBR_FROM_FLAG_ABORT    (1ULL << 61)
 
-#define for_each_branch_sample_type(x) \
-	for ((x) = PERF_SAMPLE_BRANCH_USER; \
-	     (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
-
 /*
  * x86control flow change classification
  * x86control flow changes include branches, interrupts, traps, faults
  */
 enum {
-	X86_BR_NONE     = 0,      /* unknown */
-
-	X86_BR_USER     = 1 << 0, /* branch target is user */
-	X86_BR_KERNEL   = 1 << 1, /* branch target is kernel */
-
-	X86_BR_CALL     = 1 << 2, /* call */
-	X86_BR_RET      = 1 << 3, /* return */
-	X86_BR_SYSCALL  = 1 << 4, /* syscall */
-	X86_BR_SYSRET   = 1 << 5, /* syscall return */
-	X86_BR_INT      = 1 << 6, /* sw interrupt */
-	X86_BR_IRET     = 1 << 7, /* return from interrupt */
-	X86_BR_JCC      = 1 << 8, /* conditional */
-	X86_BR_JMP      = 1 << 9, /* jump */
-	X86_BR_IRQ      = 1 << 10,/* hw interrupt or trap or fault */
-	X86_BR_IND_CALL = 1 << 11,/* indirect calls */
-	X86_BR_ABORT    = 1 << 12,/* transaction abort */
-	X86_BR_IN_TX    = 1 << 13,/* in transaction */
-	X86_BR_NO_TX    = 1 << 14,/* not in transaction */
+	X86_BR_NONE		= 0,      /* unknown */
+
+	X86_BR_USER		= 1 << 0, /* branch target is user */
+	X86_BR_KERNEL		= 1 << 1, /* branch target is kernel */
+
+	X86_BR_CALL		= 1 << 2, /* call */
+	X86_BR_RET		= 1 << 3, /* return */
+	X86_BR_SYSCALL		= 1 << 4, /* syscall */
+	X86_BR_SYSRET		= 1 << 5, /* syscall return */
+	X86_BR_INT		= 1 << 6, /* sw interrupt */
+	X86_BR_IRET		= 1 << 7, /* return from interrupt */
+	X86_BR_JCC		= 1 << 8, /* conditional */
+	X86_BR_JMP		= 1 << 9, /* jump */
+	X86_BR_IRQ		= 1 << 10,/* hw interrupt or trap or fault */
+	X86_BR_IND_CALL		= 1 << 11,/* indirect calls */
+	X86_BR_ABORT		= 1 << 12,/* transaction abort */
+	X86_BR_IN_TX		= 1 << 13,/* in transaction */
+	X86_BR_NO_TX		= 1 << 14,/* not in transaction */
+	X86_BR_ZERO_CALL	= 1 << 15,/* zero length call */
+	X86_BR_CALL_STACK	= 1 << 16,/* call stack */
 };
 
 #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
@@ -112,13 +112,15 @@ enum {
 	 X86_BR_JMP	 |\
 	 X86_BR_IRQ	 |\
 	 X86_BR_ABORT	 |\
-	 X86_BR_IND_CALL)
+	 X86_BR_IND_CALL |\
+	 X86_BR_ZERO_CALL)
 
 #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
 
 #define X86_BR_ANY_CALL		 \
 	(X86_BR_CALL		|\
 	 X86_BR_IND_CALL	|\
+	 X86_BR_ZERO_CALL	|\
 	 X86_BR_SYSCALL		|\
 	 X86_BR_IRQ		|\
 	 X86_BR_INT)
@@ -130,17 +132,32 @@ static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
  * otherwise it becomes near impossible to get a reliable stack.
  */
 
-static void __intel_pmu_lbr_enable(void)
+static void __intel_pmu_lbr_enable(bool pmi)
 {
-	u64 debugctl;
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	u64 debugctl, lbr_select = 0, orig_debugctl;
 
-	if (cpuc->lbr_sel)
-		wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config);
+	/*
+	 * No need to reprogram LBR_SELECT in a PMI, as it
+	 * did not change.
+	 */
+	if (cpuc->lbr_sel && !pmi) {
+		lbr_select = cpuc->lbr_sel->config;
+		wrmsrl(MSR_LBR_SELECT, lbr_select);
+	}
 
 	rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
-	debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
-	wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+	orig_debugctl = debugctl;
+	debugctl |= DEBUGCTLMSR_LBR;
+	/*
+	 * LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
+	 * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
+	 * may cause superfluous increase/decrease of LBR_TOS.
+	 */
+	if (!(lbr_select & LBR_CALL_STACK))
+		debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
+	if (orig_debugctl != debugctl)
+		wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
 }
 
 static void __intel_pmu_lbr_disable(void)
@@ -181,9 +198,116 @@ void intel_pmu_lbr_reset(void)
 		intel_pmu_lbr_reset_64();
 }
 
+/*
+ * TOS = most recently recorded branch
+ */
+static inline u64 intel_pmu_lbr_tos(void)
+{
+	u64 tos;
+
+	rdmsrl(x86_pmu.lbr_tos, tos);
+	return tos;
+}
+
+enum {
+	LBR_NONE,
+	LBR_VALID,
+};
+
+static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
+{
+	int i;
+	unsigned lbr_idx, mask;
+	u64 tos;
+
+	if (task_ctx->lbr_callstack_users == 0 ||
+	    task_ctx->lbr_stack_state == LBR_NONE) {
+		intel_pmu_lbr_reset();
+		return;
+	}
+
+	mask = x86_pmu.lbr_nr - 1;
+	tos = intel_pmu_lbr_tos();
+	for (i = 0; i < x86_pmu.lbr_nr; i++) {
+		lbr_idx = (tos - i) & mask;
+		wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
+		wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+	}
+	task_ctx->lbr_stack_state = LBR_NONE;
+}
+
+static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
+{
+	int i;
+	unsigned lbr_idx, mask;
+	u64 tos;
+
+	if (task_ctx->lbr_callstack_users == 0) {
+		task_ctx->lbr_stack_state = LBR_NONE;
+		return;
+	}
+
+	mask = x86_pmu.lbr_nr - 1;
+	tos = intel_pmu_lbr_tos();
+	for (i = 0; i < x86_pmu.lbr_nr; i++) {
+		lbr_idx = (tos - i) & mask;
+		rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
+		rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+	}
+	task_ctx->lbr_stack_state = LBR_VALID;
+}
+
+void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct x86_perf_task_context *task_ctx;
+
+	if (!x86_pmu.lbr_nr)
+		return;
+
+	/*
+	 * If LBR callstack feature is enabled and the stack was saved when
+	 * the task was scheduled out, restore the stack. Otherwise flush
+	 * the LBR stack.
+	 */
+	task_ctx = ctx ? ctx->task_ctx_data : NULL;
+	if (task_ctx) {
+		if (sched_in) {
+			__intel_pmu_lbr_restore(task_ctx);
+			cpuc->lbr_context = ctx;
+		} else {
+			__intel_pmu_lbr_save(task_ctx);
+		}
+		return;
+	}
+
+	/*
+	 * When sampling the branck stack in system-wide, it may be
+	 * necessary to flush the stack on context switch. This happens
+	 * when the branch stack does not tag its entries with the pid
+	 * of the current task. Otherwise it becomes impossible to
+	 * associate a branch entry with a task. This ambiguity is more
+	 * likely to appear when the branch stack supports priv level
+	 * filtering and the user sets it to monitor only at the user
+	 * level (which could be a useful measurement in system-wide
+	 * mode). In that case, the risk is high of having a branch
+	 * stack with branch from multiple tasks.
+ 	 */
+	if (sched_in) {
+		intel_pmu_lbr_reset();
+		cpuc->lbr_context = ctx;
+	}
+}
+
+static inline bool branch_user_callstack(unsigned br_sel)
+{
+	return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
+}
+
 void intel_pmu_lbr_enable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct x86_perf_task_context *task_ctx;
 
 	if (!x86_pmu.lbr_nr)
 		return;
@@ -198,18 +322,33 @@ void intel_pmu_lbr_enable(struct perf_event *event)
 	}
 	cpuc->br_sel = event->hw.branch_reg.reg;
 
+	if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
+					event->ctx->task_ctx_data) {
+		task_ctx = event->ctx->task_ctx_data;
+		task_ctx->lbr_callstack_users++;
+	}
+
 	cpuc->lbr_users++;
+	perf_sched_cb_inc(event->ctx->pmu);
 }
 
 void intel_pmu_lbr_disable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct x86_perf_task_context *task_ctx;
 
 	if (!x86_pmu.lbr_nr)
 		return;
 
+	if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
+					event->ctx->task_ctx_data) {
+		task_ctx = event->ctx->task_ctx_data;
+		task_ctx->lbr_callstack_users--;
+	}
+
 	cpuc->lbr_users--;
 	WARN_ON_ONCE(cpuc->lbr_users < 0);
+	perf_sched_cb_dec(event->ctx->pmu);
 
 	if (cpuc->enabled && !cpuc->lbr_users) {
 		__intel_pmu_lbr_disable();
@@ -218,12 +357,12 @@ void intel_pmu_lbr_disable(struct perf_event *event)
 	}
 }
 
-void intel_pmu_lbr_enable_all(void)
+void intel_pmu_lbr_enable_all(bool pmi)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 
 	if (cpuc->lbr_users)
-		__intel_pmu_lbr_enable();
+		__intel_pmu_lbr_enable(pmi);
 }
 
 void intel_pmu_lbr_disable_all(void)
@@ -234,18 +373,6 @@ void intel_pmu_lbr_disable_all(void)
 		__intel_pmu_lbr_disable();
 }
 
-/*
- * TOS = most recently recorded branch
- */
-static inline u64 intel_pmu_lbr_tos(void)
-{
-	u64 tos;
-
-	rdmsrl(x86_pmu.lbr_tos, tos);
-
-	return tos;
-}
-
 static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
 {
 	unsigned long mask = x86_pmu.lbr_nr - 1;
@@ -350,7 +477,7 @@ void intel_pmu_lbr_read(void)
  * - in case there is no HW filter
  * - in case the HW filter has errata or limitations
  */
-static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
+static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
 {
 	u64 br_type = event->attr.branch_sample_type;
 	int mask = 0;
@@ -387,11 +514,21 @@ static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
 	if (br_type & PERF_SAMPLE_BRANCH_COND)
 		mask |= X86_BR_JCC;
 
+	if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
+		if (!x86_pmu_has_lbr_callstack())
+			return -EOPNOTSUPP;
+		if (mask & ~(X86_BR_USER | X86_BR_KERNEL))
+			return -EINVAL;
+		mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET |
+			X86_BR_CALL_STACK;
+	}
+
 	/*
 	 * stash actual user request into reg, it may
 	 * be used by fixup code for some CPU
 	 */
 	event->hw.branch_reg.reg = mask;
+	return 0;
 }
 
 /*
@@ -403,14 +540,14 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
 {
 	struct hw_perf_event_extra *reg;
 	u64 br_type = event->attr.branch_sample_type;
-	u64 mask = 0, m;
-	u64 v;
+	u64 mask = 0, v;
+	int i;
 
-	for_each_branch_sample_type(m) {
-		if (!(br_type & m))
+	for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
+		if (!(br_type & (1ULL << i)))
 			continue;
 
-		v = x86_pmu.lbr_sel_map[m];
+		v = x86_pmu.lbr_sel_map[i];
 		if (v == LBR_NOT_SUPP)
 			return -EOPNOTSUPP;
 
@@ -420,8 +557,12 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
 	reg = &event->hw.branch_reg;
 	reg->idx = EXTRA_REG_LBR;
 
-	/* LBR_SELECT operates in suppress mode so invert mask */
-	reg->config = ~mask & x86_pmu.lbr_sel_mask;
+	/*
+	 * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
+	 * in suppress mode. So LBR_SELECT should be set to
+	 * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
+	 */
+	reg->config = mask ^ x86_pmu.lbr_sel_mask;
 
 	return 0;
 }
@@ -439,7 +580,9 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
 	/*
 	 * setup SW LBR filter
 	 */
-	intel_pmu_setup_sw_lbr_filter(event);
+	ret = intel_pmu_setup_sw_lbr_filter(event);
+	if (ret)
+		return ret;
 
 	/*
 	 * setup HW LBR filter, if any
@@ -568,6 +711,12 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
 		ret = X86_BR_INT;
 		break;
 	case 0xe8: /* call near rel */
+		insn_get_immediate(&insn);
+		if (insn.immediate1.value == 0) {
+			/* zero length call */
+			ret = X86_BR_ZERO_CALL;
+			break;
+		}
 	case 0x9a: /* call far absolute */
 		ret = X86_BR_CALL;
 		break;
@@ -678,35 +827,49 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
 /*
  * Map interface branch filters onto LBR filters
  */
-static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
-	[PERF_SAMPLE_BRANCH_ANY]	= LBR_ANY,
-	[PERF_SAMPLE_BRANCH_USER]	= LBR_USER,
-	[PERF_SAMPLE_BRANCH_KERNEL]	= LBR_KERNEL,
-	[PERF_SAMPLE_BRANCH_HV]		= LBR_IGN,
-	[PERF_SAMPLE_BRANCH_ANY_RETURN]	= LBR_RETURN | LBR_REL_JMP
-					| LBR_IND_JMP | LBR_FAR,
+static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+	[PERF_SAMPLE_BRANCH_ANY_SHIFT]		= LBR_ANY,
+	[PERF_SAMPLE_BRANCH_USER_SHIFT]		= LBR_USER,
+	[PERF_SAMPLE_BRANCH_KERNEL_SHIFT]	= LBR_KERNEL,
+	[PERF_SAMPLE_BRANCH_HV_SHIFT]		= LBR_IGN,
+	[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]	= LBR_RETURN | LBR_REL_JMP
+						| LBR_IND_JMP | LBR_FAR,
 	/*
 	 * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
 	 */
-	[PERF_SAMPLE_BRANCH_ANY_CALL] =
+	[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] =
 	 LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
 	/*
 	 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
 	 */
-	[PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
-	[PERF_SAMPLE_BRANCH_COND]     = LBR_JCC,
+	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
+	[PERF_SAMPLE_BRANCH_COND_SHIFT]     = LBR_JCC,
 };
 
-static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
-	[PERF_SAMPLE_BRANCH_ANY]	= LBR_ANY,
-	[PERF_SAMPLE_BRANCH_USER]	= LBR_USER,
-	[PERF_SAMPLE_BRANCH_KERNEL]	= LBR_KERNEL,
-	[PERF_SAMPLE_BRANCH_HV]		= LBR_IGN,
-	[PERF_SAMPLE_BRANCH_ANY_RETURN]	= LBR_RETURN | LBR_FAR,
-	[PERF_SAMPLE_BRANCH_ANY_CALL]	= LBR_REL_CALL | LBR_IND_CALL
-					| LBR_FAR,
-	[PERF_SAMPLE_BRANCH_IND_CALL]	= LBR_IND_CALL,
-	[PERF_SAMPLE_BRANCH_COND]       = LBR_JCC,
+static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+	[PERF_SAMPLE_BRANCH_ANY_SHIFT]		= LBR_ANY,
+	[PERF_SAMPLE_BRANCH_USER_SHIFT]		= LBR_USER,
+	[PERF_SAMPLE_BRANCH_KERNEL_SHIFT]	= LBR_KERNEL,
+	[PERF_SAMPLE_BRANCH_HV_SHIFT]		= LBR_IGN,
+	[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]	= LBR_RETURN | LBR_FAR,
+	[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]	= LBR_REL_CALL | LBR_IND_CALL
+						| LBR_FAR,
+	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]	= LBR_IND_CALL,
+	[PERF_SAMPLE_BRANCH_COND_SHIFT]		= LBR_JCC,
+};
+
+static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+	[PERF_SAMPLE_BRANCH_ANY_SHIFT]		= LBR_ANY,
+	[PERF_SAMPLE_BRANCH_USER_SHIFT]		= LBR_USER,
+	[PERF_SAMPLE_BRANCH_KERNEL_SHIFT]	= LBR_KERNEL,
+	[PERF_SAMPLE_BRANCH_HV_SHIFT]		= LBR_IGN,
+	[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]	= LBR_RETURN | LBR_FAR,
+	[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]	= LBR_REL_CALL | LBR_IND_CALL
+						| LBR_FAR,
+	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]	= LBR_IND_CALL,
+	[PERF_SAMPLE_BRANCH_COND_SHIFT]		= LBR_JCC,
+	[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]	= LBR_REL_CALL | LBR_IND_CALL
+						| LBR_RETURN | LBR_CALL_STACK,
 };
 
 /* core */
@@ -765,6 +928,20 @@ void __init intel_pmu_lbr_init_snb(void)
 	pr_cont("16-deep LBR, ");
 }
 
+/* haswell */
+void intel_pmu_lbr_init_hsw(void)
+{
+	x86_pmu.lbr_nr	 = 16;
+	x86_pmu.lbr_tos	 = MSR_LBR_TOS;
+	x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+	x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
+
+	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+	x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
+
+	pr_cont("16-deep LBR, ");
+}
+
 /* atom */
 void __init intel_pmu_lbr_init_atom(void)
 {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
new file mode 100644
index 000000000000..ffe666c2c6b5
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -0,0 +1,1100 @@
+/*
+ * Intel(R) Processor Trace PMU driver for perf
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * Intel PT is specified in the Intel Architecture Instruction Set Extensions
+ * Programming Reference:
+ * http://software.intel.com/en-us/intel-isa-extensions
+ */
+
+#undef DEBUG
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+
+#include <asm/perf_event.h>
+#include <asm/insn.h>
+#include <asm/io.h>
+
+#include "perf_event.h"
+#include "intel_pt.h"
+
+static DEFINE_PER_CPU(struct pt, pt_ctx);
+
+static struct pt_pmu pt_pmu;
+
+enum cpuid_regs {
+	CR_EAX = 0,
+	CR_ECX,
+	CR_EDX,
+	CR_EBX
+};
+
+/*
+ * Capabilities of Intel PT hardware, such as number of address bits or
+ * supported output schemes, are cached and exported to userspace as "caps"
+ * attribute group of pt pmu device
+ * (/sys/bus/event_source/devices/intel_pt/caps/) so that userspace can store
+ * relevant bits together with intel_pt traces.
+ *
+ * These are necessary for both trace decoding (payloads_lip, contains address
+ * width encoded in IP-related packets), and event configuration (bitmasks with
+ * permitted values for certain bit fields).
+ */
+#define PT_CAP(_n, _l, _r, _m)						\
+	[PT_CAP_ ## _n] = { .name = __stringify(_n), .leaf = _l,	\
+			    .reg = _r, .mask = _m }
+
+static struct pt_cap_desc {
+	const char	*name;
+	u32		leaf;
+	u8		reg;
+	u32		mask;
+} pt_caps[] = {
+	PT_CAP(max_subleaf,		0, CR_EAX, 0xffffffff),
+	PT_CAP(cr3_filtering,		0, CR_EBX, BIT(0)),
+	PT_CAP(topa_output,		0, CR_ECX, BIT(0)),
+	PT_CAP(topa_multiple_entries,	0, CR_ECX, BIT(1)),
+	PT_CAP(payloads_lip,		0, CR_ECX, BIT(31)),
+};
+
+static u32 pt_cap_get(enum pt_capabilities cap)
+{
+	struct pt_cap_desc *cd = &pt_caps[cap];
+	u32 c = pt_pmu.caps[cd->leaf * 4 + cd->reg];
+	unsigned int shift = __ffs(cd->mask);
+
+	return (c & cd->mask) >> shift;
+}
+
+static ssize_t pt_cap_show(struct device *cdev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct dev_ext_attribute *ea =
+		container_of(attr, struct dev_ext_attribute, attr);
+	enum pt_capabilities cap = (long)ea->var;
+
+	return snprintf(buf, PAGE_SIZE, "%x\n", pt_cap_get(cap));
+}
+
+static struct attribute_group pt_cap_group = {
+	.name	= "caps",
+};
+
+PMU_FORMAT_ATTR(tsc,		"config:10"	);
+PMU_FORMAT_ATTR(noretcomp,	"config:11"	);
+
+static struct attribute *pt_formats_attr[] = {
+	&format_attr_tsc.attr,
+	&format_attr_noretcomp.attr,
+	NULL,
+};
+
+static struct attribute_group pt_format_group = {
+	.name	= "format",
+	.attrs	= pt_formats_attr,
+};
+
+static const struct attribute_group *pt_attr_groups[] = {
+	&pt_cap_group,
+	&pt_format_group,
+	NULL,
+};
+
+static int __init pt_pmu_hw_init(void)
+{
+	struct dev_ext_attribute *de_attrs;
+	struct attribute **attrs;
+	size_t size;
+	int ret;
+	long i;
+
+	attrs = NULL;
+	ret = -ENODEV;
+	if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT))
+		goto fail;
+
+	for (i = 0; i < PT_CPUID_LEAVES; i++) {
+		cpuid_count(20, i,
+			    &pt_pmu.caps[CR_EAX + i*4],
+			    &pt_pmu.caps[CR_EBX + i*4],
+			    &pt_pmu.caps[CR_ECX + i*4],
+			    &pt_pmu.caps[CR_EDX + i*4]);
+	}
+
+	ret = -ENOMEM;
+	size = sizeof(struct attribute *) * (ARRAY_SIZE(pt_caps)+1);
+	attrs = kzalloc(size, GFP_KERNEL);
+	if (!attrs)
+		goto fail;
+
+	size = sizeof(struct dev_ext_attribute) * (ARRAY_SIZE(pt_caps)+1);
+	de_attrs = kzalloc(size, GFP_KERNEL);
+	if (!de_attrs)
+		goto fail;
+
+	for (i = 0; i < ARRAY_SIZE(pt_caps); i++) {
+		struct dev_ext_attribute *de_attr = de_attrs + i;
+
+		de_attr->attr.attr.name = pt_caps[i].name;
+
+		sysfs_attr_init(&de_attrs->attr.attr);
+
+		de_attr->attr.attr.mode		= S_IRUGO;
+		de_attr->attr.show		= pt_cap_show;
+		de_attr->var			= (void *)i;
+
+		attrs[i] = &de_attr->attr.attr;
+	}
+
+	pt_cap_group.attrs = attrs;
+
+	return 0;
+
+fail:
+	kfree(attrs);
+
+	return ret;
+}
+
+#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN | RTIT_CTL_DISRETC)
+
+static bool pt_event_valid(struct perf_event *event)
+{
+	u64 config = event->attr.config;
+
+	if ((config & PT_CONFIG_MASK) != config)
+		return false;
+
+	return true;
+}
+
+/*
+ * PT configuration helpers
+ * These all are cpu affine and operate on a local PT
+ */
+
+static bool pt_is_running(void)
+{
+	u64 ctl;
+
+	rdmsrl(MSR_IA32_RTIT_CTL, ctl);
+
+	return !!(ctl & RTIT_CTL_TRACEEN);
+}
+
+static void pt_config(struct perf_event *event)
+{
+	u64 reg;
+
+	reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
+
+	if (!event->attr.exclude_kernel)
+		reg |= RTIT_CTL_OS;
+	if (!event->attr.exclude_user)
+		reg |= RTIT_CTL_USR;
+
+	reg |= (event->attr.config & PT_CONFIG_MASK);
+
+	wrmsrl(MSR_IA32_RTIT_CTL, reg);
+}
+
+static void pt_config_start(bool start)
+{
+	u64 ctl;
+
+	rdmsrl(MSR_IA32_RTIT_CTL, ctl);
+	if (start)
+		ctl |= RTIT_CTL_TRACEEN;
+	else
+		ctl &= ~RTIT_CTL_TRACEEN;
+	wrmsrl(MSR_IA32_RTIT_CTL, ctl);
+
+	/*
+	 * A wrmsr that disables trace generation serializes other PT
+	 * registers and causes all data packets to be written to memory,
+	 * but a fence is required for the data to become globally visible.
+	 *
+	 * The below WMB, separating data store and aux_head store matches
+	 * the consumer's RMB that separates aux_head load and data load.
+	 */
+	if (!start)
+		wmb();
+}
+
+static void pt_config_buffer(void *buf, unsigned int topa_idx,
+			     unsigned int output_off)
+{
+	u64 reg;
+
+	wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(buf));
+
+	reg = 0x7f | ((u64)topa_idx << 7) | ((u64)output_off << 32);
+
+	wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
+}
+
+/*
+ * Keep ToPA table-related metadata on the same page as the actual table,
+ * taking up a few words from the top
+ */
+
+#define TENTS_PER_PAGE (((PAGE_SIZE - 40) / sizeof(struct topa_entry)) - 1)
+
+/**
+ * struct topa - page-sized ToPA table with metadata at the top
+ * @table:	actual ToPA table entries, as understood by PT hardware
+ * @list:	linkage to struct pt_buffer's list of tables
+ * @phys:	physical address of this page
+ * @offset:	offset of the first entry in this table in the buffer
+ * @size:	total size of all entries in this table
+ * @last:	index of the last initialized entry in this table
+ */
+struct topa {
+	struct topa_entry	table[TENTS_PER_PAGE];
+	struct list_head	list;
+	u64			phys;
+	u64			offset;
+	size_t			size;
+	int			last;
+};
+
+/* make -1 stand for the last table entry */
+#define TOPA_ENTRY(t, i) ((i) == -1 ? &(t)->table[(t)->last] : &(t)->table[(i)])
+
+/**
+ * topa_alloc() - allocate page-sized ToPA table
+ * @cpu:	CPU on which to allocate.
+ * @gfp:	Allocation flags.
+ *
+ * Return:	On success, return the pointer to ToPA table page.
+ */
+static struct topa *topa_alloc(int cpu, gfp_t gfp)
+{
+	int node = cpu_to_node(cpu);
+	struct topa *topa;
+	struct page *p;
+
+	p = alloc_pages_node(node, gfp | __GFP_ZERO, 0);
+	if (!p)
+		return NULL;
+
+	topa = page_address(p);
+	topa->last = 0;
+	topa->phys = page_to_phys(p);
+
+	/*
+	 * In case of singe-entry ToPA, always put the self-referencing END
+	 * link as the 2nd entry in the table
+	 */
+	if (!pt_cap_get(PT_CAP_topa_multiple_entries)) {
+		TOPA_ENTRY(topa, 1)->base = topa->phys >> TOPA_SHIFT;
+		TOPA_ENTRY(topa, 1)->end = 1;
+	}
+
+	return topa;
+}
+
+/**
+ * topa_free() - free a page-sized ToPA table
+ * @topa:	Table to deallocate.
+ */
+static void topa_free(struct topa *topa)
+{
+	free_page((unsigned long)topa);
+}
+
+/**
+ * topa_insert_table() - insert a ToPA table into a buffer
+ * @buf:	 PT buffer that's being extended.
+ * @topa:	 New topa table to be inserted.
+ *
+ * If it's the first table in this buffer, set up buffer's pointers
+ * accordingly; otherwise, add a END=1 link entry to @topa to the current
+ * "last" table and adjust the last table pointer to @topa.
+ */
+static void topa_insert_table(struct pt_buffer *buf, struct topa *topa)
+{
+	struct topa *last = buf->last;
+
+	list_add_tail(&topa->list, &buf->tables);
+
+	if (!buf->first) {
+		buf->first = buf->last = buf->cur = topa;
+		return;
+	}
+
+	topa->offset = last->offset + last->size;
+	buf->last = topa;
+
+	if (!pt_cap_get(PT_CAP_topa_multiple_entries))
+		return;
+
+	BUG_ON(last->last != TENTS_PER_PAGE - 1);
+
+	TOPA_ENTRY(last, -1)->base = topa->phys >> TOPA_SHIFT;
+	TOPA_ENTRY(last, -1)->end = 1;
+}
+
+/**
+ * topa_table_full() - check if a ToPA table is filled up
+ * @topa:	ToPA table.
+ */
+static bool topa_table_full(struct topa *topa)
+{
+	/* single-entry ToPA is a special case */
+	if (!pt_cap_get(PT_CAP_topa_multiple_entries))
+		return !!topa->last;
+
+	return topa->last == TENTS_PER_PAGE - 1;
+}
+
+/**
+ * topa_insert_pages() - create a list of ToPA tables
+ * @buf:	PT buffer being initialized.
+ * @gfp:	Allocation flags.
+ *
+ * This initializes a list of ToPA tables with entries from
+ * the data_pages provided by rb_alloc_aux().
+ *
+ * Return:	0 on success or error code.
+ */
+static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp)
+{
+	struct topa *topa = buf->last;
+	int order = 0;
+	struct page *p;
+
+	p = virt_to_page(buf->data_pages[buf->nr_pages]);
+	if (PagePrivate(p))
+		order = page_private(p);
+
+	if (topa_table_full(topa)) {
+		topa = topa_alloc(buf->cpu, gfp);
+		if (!topa)
+			return -ENOMEM;
+
+		topa_insert_table(buf, topa);
+	}
+
+	TOPA_ENTRY(topa, -1)->base = page_to_phys(p) >> TOPA_SHIFT;
+	TOPA_ENTRY(topa, -1)->size = order;
+	if (!buf->snapshot && !pt_cap_get(PT_CAP_topa_multiple_entries)) {
+		TOPA_ENTRY(topa, -1)->intr = 1;
+		TOPA_ENTRY(topa, -1)->stop = 1;
+	}
+
+	topa->last++;
+	topa->size += sizes(order);
+
+	buf->nr_pages += 1ul << order;
+
+	return 0;
+}
+
+/**
+ * pt_topa_dump() - print ToPA tables and their entries
+ * @buf:	PT buffer.
+ */
+static void pt_topa_dump(struct pt_buffer *buf)
+{
+	struct topa *topa;
+
+	list_for_each_entry(topa, &buf->tables, list) {
+		int i;
+
+		pr_debug("# table @%p (%016Lx), off %llx size %zx\n", topa->table,
+			 topa->phys, topa->offset, topa->size);
+		for (i = 0; i < TENTS_PER_PAGE; i++) {
+			pr_debug("# entry @%p (%lx sz %u %c%c%c) raw=%16llx\n",
+				 &topa->table[i],
+				 (unsigned long)topa->table[i].base << TOPA_SHIFT,
+				 sizes(topa->table[i].size),
+				 topa->table[i].end ?  'E' : ' ',
+				 topa->table[i].intr ? 'I' : ' ',
+				 topa->table[i].stop ? 'S' : ' ',
+				 *(u64 *)&topa->table[i]);
+			if ((pt_cap_get(PT_CAP_topa_multiple_entries) &&
+			     topa->table[i].stop) ||
+			    topa->table[i].end)
+				break;
+		}
+	}
+}
+
+/**
+ * pt_buffer_advance() - advance to the next output region
+ * @buf:	PT buffer.
+ *
+ * Advance the current pointers in the buffer to the next ToPA entry.
+ */
+static void pt_buffer_advance(struct pt_buffer *buf)
+{
+	buf->output_off = 0;
+	buf->cur_idx++;
+
+	if (buf->cur_idx == buf->cur->last) {
+		if (buf->cur == buf->last)
+			buf->cur = buf->first;
+		else
+			buf->cur = list_entry(buf->cur->list.next, struct topa,
+					      list);
+		buf->cur_idx = 0;
+	}
+}
+
+/**
+ * pt_update_head() - calculate current offsets and sizes
+ * @pt:		Per-cpu pt context.
+ *
+ * Update buffer's current write pointer position and data size.
+ */
+static void pt_update_head(struct pt *pt)
+{
+	struct pt_buffer *buf = perf_get_aux(&pt->handle);
+	u64 topa_idx, base, old;
+
+	/* offset of the first region in this table from the beginning of buf */
+	base = buf->cur->offset + buf->output_off;
+
+	/* offset of the current output region within this table */
+	for (topa_idx = 0; topa_idx < buf->cur_idx; topa_idx++)
+		base += sizes(buf->cur->table[topa_idx].size);
+
+	if (buf->snapshot) {
+		local_set(&buf->data_size, base);
+	} else {
+		old = (local64_xchg(&buf->head, base) &
+		       ((buf->nr_pages << PAGE_SHIFT) - 1));
+		if (base < old)
+			base += buf->nr_pages << PAGE_SHIFT;
+
+		local_add(base - old, &buf->data_size);
+	}
+}
+
+/**
+ * pt_buffer_region() - obtain current output region's address
+ * @buf:	PT buffer.
+ */
+static void *pt_buffer_region(struct pt_buffer *buf)
+{
+	return phys_to_virt(buf->cur->table[buf->cur_idx].base << TOPA_SHIFT);
+}
+
+/**
+ * pt_buffer_region_size() - obtain current output region's size
+ * @buf:	PT buffer.
+ */
+static size_t pt_buffer_region_size(struct pt_buffer *buf)
+{
+	return sizes(buf->cur->table[buf->cur_idx].size);
+}
+
+/**
+ * pt_handle_status() - take care of possible status conditions
+ * @pt:		Per-cpu pt context.
+ */
+static void pt_handle_status(struct pt *pt)
+{
+	struct pt_buffer *buf = perf_get_aux(&pt->handle);
+	int advance = 0;
+	u64 status;
+
+	rdmsrl(MSR_IA32_RTIT_STATUS, status);
+
+	if (status & RTIT_STATUS_ERROR) {
+		pr_err_ratelimited("ToPA ERROR encountered, trying to recover\n");
+		pt_topa_dump(buf);
+		status &= ~RTIT_STATUS_ERROR;
+	}
+
+	if (status & RTIT_STATUS_STOPPED) {
+		status &= ~RTIT_STATUS_STOPPED;
+
+		/*
+		 * On systems that only do single-entry ToPA, hitting STOP
+		 * means we are already losing data; need to let the decoder
+		 * know.
+		 */
+		if (!pt_cap_get(PT_CAP_topa_multiple_entries) ||
+		    buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
+			local_inc(&buf->lost);
+			advance++;
+		}
+	}
+
+	/*
+	 * Also on single-entry ToPA implementations, interrupt will come
+	 * before the output reaches its output region's boundary.
+	 */
+	if (!pt_cap_get(PT_CAP_topa_multiple_entries) && !buf->snapshot &&
+	    pt_buffer_region_size(buf) - buf->output_off <= TOPA_PMI_MARGIN) {
+		void *head = pt_buffer_region(buf);
+
+		/* everything within this margin needs to be zeroed out */
+		memset(head + buf->output_off, 0,
+		       pt_buffer_region_size(buf) -
+		       buf->output_off);
+		advance++;
+	}
+
+	if (advance)
+		pt_buffer_advance(buf);
+
+	wrmsrl(MSR_IA32_RTIT_STATUS, status);
+}
+
+/**
+ * pt_read_offset() - translate registers into buffer pointers
+ * @buf:	PT buffer.
+ *
+ * Set buffer's output pointers from MSR values.
+ */
+static void pt_read_offset(struct pt_buffer *buf)
+{
+	u64 offset, base_topa;
+
+	rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa);
+	buf->cur = phys_to_virt(base_topa);
+
+	rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
+	/* offset within current output region */
+	buf->output_off = offset >> 32;
+	/* index of current output region within this table */
+	buf->cur_idx = (offset & 0xffffff80) >> 7;
+}
+
+/**
+ * pt_topa_next_entry() - obtain index of the first page in the next ToPA entry
+ * @buf:	PT buffer.
+ * @pg:		Page offset in the buffer.
+ *
+ * When advancing to the next output region (ToPA entry), given a page offset
+ * into the buffer, we need to find the offset of the first page in the next
+ * region.
+ */
+static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg)
+{
+	struct topa_entry *te = buf->topa_index[pg];
+
+	/* one region */
+	if (buf->first == buf->last && buf->first->last == 1)
+		return pg;
+
+	do {
+		pg++;
+		pg &= buf->nr_pages - 1;
+	} while (buf->topa_index[pg] == te);
+
+	return pg;
+}
+
+/**
+ * pt_buffer_reset_markers() - place interrupt and stop bits in the buffer
+ * @buf:	PT buffer.
+ * @handle:	Current output handle.
+ *
+ * Place INT and STOP marks to prevent overwriting old data that the consumer
+ * hasn't yet collected.
+ */
+static int pt_buffer_reset_markers(struct pt_buffer *buf,
+				   struct perf_output_handle *handle)
+
+{
+	unsigned long idx, npages, end;
+
+	if (buf->snapshot)
+		return 0;
+
+	/* can't stop in the middle of an output region */
+	if (buf->output_off + handle->size + 1 <
+	    sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size))
+		return -EINVAL;
+
+
+	/* single entry ToPA is handled by marking all regions STOP=1 INT=1 */
+	if (!pt_cap_get(PT_CAP_topa_multiple_entries))
+		return 0;
+
+	/* clear STOP and INT from current entry */
+	buf->topa_index[buf->stop_pos]->stop = 0;
+	buf->topa_index[buf->intr_pos]->intr = 0;
+
+	if (pt_cap_get(PT_CAP_topa_multiple_entries)) {
+		npages = (handle->size + 1) >> PAGE_SHIFT;
+		end = (local64_read(&buf->head) >> PAGE_SHIFT) + npages;
+		/*if (end > handle->wakeup >> PAGE_SHIFT)
+		  end = handle->wakeup >> PAGE_SHIFT;*/
+		idx = end & (buf->nr_pages - 1);
+		buf->stop_pos = idx;
+		idx = (local64_read(&buf->head) >> PAGE_SHIFT) + npages - 1;
+		idx &= buf->nr_pages - 1;
+		buf->intr_pos = idx;
+	}
+
+	buf->topa_index[buf->stop_pos]->stop = 1;
+	buf->topa_index[buf->intr_pos]->intr = 1;
+
+	return 0;
+}
+
+/**
+ * pt_buffer_setup_topa_index() - build topa_index[] table of regions
+ * @buf:	PT buffer.
+ *
+ * topa_index[] references output regions indexed by offset into the
+ * buffer for purposes of quick reverse lookup.
+ */
+static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
+{
+	struct topa *cur = buf->first, *prev = buf->last;
+	struct topa_entry *te_cur = TOPA_ENTRY(cur, 0),
+		*te_prev = TOPA_ENTRY(prev, prev->last - 1);
+	int pg = 0, idx = 0, ntopa = 0;
+
+	while (pg < buf->nr_pages) {
+		int tidx;
+
+		/* pages within one topa entry */
+		for (tidx = 0; tidx < 1 << te_cur->size; tidx++, pg++)
+			buf->topa_index[pg] = te_prev;
+
+		te_prev = te_cur;
+
+		if (idx == cur->last - 1) {
+			/* advance to next topa table */
+			idx = 0;
+			cur = list_entry(cur->list.next, struct topa, list);
+			ntopa++;
+		} else
+			idx++;
+		te_cur = TOPA_ENTRY(cur, idx);
+	}
+
+}
+
+/**
+ * pt_buffer_reset_offsets() - adjust buffer's write pointers from aux_head
+ * @buf:	PT buffer.
+ * @head:	Write pointer (aux_head) from AUX buffer.
+ *
+ * Find the ToPA table and entry corresponding to given @head and set buffer's
+ * "current" pointers accordingly.
+ */
+static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
+{
+	int pg;
+
+	if (buf->snapshot)
+		head &= (buf->nr_pages << PAGE_SHIFT) - 1;
+
+	pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
+	pg = pt_topa_next_entry(buf, pg);
+
+	buf->cur = (struct topa *)((unsigned long)buf->topa_index[pg] & PAGE_MASK);
+	buf->cur_idx = ((unsigned long)buf->topa_index[pg] -
+			(unsigned long)buf->cur) / sizeof(struct topa_entry);
+	buf->output_off = head & (sizes(buf->cur->table[buf->cur_idx].size) - 1);
+
+	local64_set(&buf->head, head);
+	local_set(&buf->data_size, 0);
+}
+
+/**
+ * pt_buffer_fini_topa() - deallocate ToPA structure of a buffer
+ * @buf:	PT buffer.
+ */
+static void pt_buffer_fini_topa(struct pt_buffer *buf)
+{
+	struct topa *topa, *iter;
+
+	list_for_each_entry_safe(topa, iter, &buf->tables, list) {
+		/*
+		 * right now, this is in free_aux() path only, so
+		 * no need to unlink this table from the list
+		 */
+		topa_free(topa);
+	}
+}
+
+/**
+ * pt_buffer_init_topa() - initialize ToPA table for pt buffer
+ * @buf:	PT buffer.
+ * @size:	Total size of all regions within this ToPA.
+ * @gfp:	Allocation flags.
+ */
+static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages,
+			       gfp_t gfp)
+{
+	struct topa *topa;
+	int err;
+
+	topa = topa_alloc(buf->cpu, gfp);
+	if (!topa)
+		return -ENOMEM;
+
+	topa_insert_table(buf, topa);
+
+	while (buf->nr_pages < nr_pages) {
+		err = topa_insert_pages(buf, gfp);
+		if (err) {
+			pt_buffer_fini_topa(buf);
+			return -ENOMEM;
+		}
+	}
+
+	pt_buffer_setup_topa_index(buf);
+
+	/* link last table to the first one, unless we're double buffering */
+	if (pt_cap_get(PT_CAP_topa_multiple_entries)) {
+		TOPA_ENTRY(buf->last, -1)->base = buf->first->phys >> TOPA_SHIFT;
+		TOPA_ENTRY(buf->last, -1)->end = 1;
+	}
+
+	pt_topa_dump(buf);
+	return 0;
+}
+
+/**
+ * pt_buffer_setup_aux() - set up topa tables for a PT buffer
+ * @cpu:	Cpu on which to allocate, -1 means current.
+ * @pages:	Array of pointers to buffer pages passed from perf core.
+ * @nr_pages:	Number of pages in the buffer.
+ * @snapshot:	If this is a snapshot/overwrite counter.
+ *
+ * This is a pmu::setup_aux callback that sets up ToPA tables and all the
+ * bookkeeping for an AUX buffer.
+ *
+ * Return:	Our private PT buffer structure.
+ */
+static void *
+pt_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool snapshot)
+{
+	struct pt_buffer *buf;
+	int node, ret;
+
+	if (!nr_pages)
+		return NULL;
+
+	if (cpu == -1)
+		cpu = raw_smp_processor_id();
+	node = cpu_to_node(cpu);
+
+	buf = kzalloc_node(offsetof(struct pt_buffer, topa_index[nr_pages]),
+			   GFP_KERNEL, node);
+	if (!buf)
+		return NULL;
+
+	buf->cpu = cpu;
+	buf->snapshot = snapshot;
+	buf->data_pages = pages;
+
+	INIT_LIST_HEAD(&buf->tables);
+
+	ret = pt_buffer_init_topa(buf, nr_pages, GFP_KERNEL);
+	if (ret) {
+		kfree(buf);
+		return NULL;
+	}
+
+	return buf;
+}
+
+/**
+ * pt_buffer_free_aux() - perf AUX deallocation path callback
+ * @data:	PT buffer.
+ */
+static void pt_buffer_free_aux(void *data)
+{
+	struct pt_buffer *buf = data;
+
+	pt_buffer_fini_topa(buf);
+	kfree(buf);
+}
+
+/**
+ * pt_buffer_is_full() - check if the buffer is full
+ * @buf:	PT buffer.
+ * @pt:		Per-cpu pt handle.
+ *
+ * If the user hasn't read data from the output region that aux_head
+ * points to, the buffer is considered full: the user needs to read at
+ * least this region and update aux_tail to point past it.
+ */
+static bool pt_buffer_is_full(struct pt_buffer *buf, struct pt *pt)
+{
+	if (buf->snapshot)
+		return false;
+
+	if (local_read(&buf->data_size) >= pt->handle.size)
+		return true;
+
+	return false;
+}
+
+/**
+ * intel_pt_interrupt() - PT PMI handler
+ */
+void intel_pt_interrupt(void)
+{
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
+	struct pt_buffer *buf;
+	struct perf_event *event = pt->handle.event;
+
+	/*
+	 * There may be a dangling PT bit in the interrupt status register
+	 * after PT has been disabled by pt_event_stop(). Make sure we don't
+	 * do anything (particularly, re-enable) for this event here.
+	 */
+	if (!ACCESS_ONCE(pt->handle_nmi))
+		return;
+
+	pt_config_start(false);
+
+	if (!event)
+		return;
+
+	buf = perf_get_aux(&pt->handle);
+	if (!buf)
+		return;
+
+	pt_read_offset(buf);
+
+	pt_handle_status(pt);
+
+	pt_update_head(pt);
+
+	perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
+			    local_xchg(&buf->lost, 0));
+
+	if (!event->hw.state) {
+		int ret;
+
+		buf = perf_aux_output_begin(&pt->handle, event);
+		if (!buf) {
+			event->hw.state = PERF_HES_STOPPED;
+			return;
+		}
+
+		pt_buffer_reset_offsets(buf, pt->handle.head);
+		ret = pt_buffer_reset_markers(buf, &pt->handle);
+		if (ret) {
+			perf_aux_output_end(&pt->handle, 0, true);
+			return;
+		}
+
+		pt_config_buffer(buf->cur->table, buf->cur_idx,
+				 buf->output_off);
+		wrmsrl(MSR_IA32_RTIT_STATUS, 0);
+		pt_config(event);
+	}
+}
+
+/*
+ * PMU callbacks
+ */
+
+static void pt_event_start(struct perf_event *event, int mode)
+{
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
+	struct pt_buffer *buf = perf_get_aux(&pt->handle);
+
+	if (pt_is_running() || !buf || pt_buffer_is_full(buf, pt)) {
+		event->hw.state = PERF_HES_STOPPED;
+		return;
+	}
+
+	ACCESS_ONCE(pt->handle_nmi) = 1;
+	event->hw.state = 0;
+
+	pt_config_buffer(buf->cur->table, buf->cur_idx,
+			 buf->output_off);
+	wrmsrl(MSR_IA32_RTIT_STATUS, 0);
+	pt_config(event);
+}
+
+static void pt_event_stop(struct perf_event *event, int mode)
+{
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
+
+	/*
+	 * Protect against the PMI racing with disabling wrmsr,
+	 * see comment in intel_pt_interrupt().
+	 */
+	ACCESS_ONCE(pt->handle_nmi) = 0;
+	pt_config_start(false);
+
+	if (event->hw.state == PERF_HES_STOPPED)
+		return;
+
+	event->hw.state = PERF_HES_STOPPED;
+
+	if (mode & PERF_EF_UPDATE) {
+		struct pt *pt = this_cpu_ptr(&pt_ctx);
+		struct pt_buffer *buf = perf_get_aux(&pt->handle);
+
+		if (!buf)
+			return;
+
+		if (WARN_ON_ONCE(pt->handle.event != event))
+			return;
+
+		pt_read_offset(buf);
+
+		pt_handle_status(pt);
+
+		pt_update_head(pt);
+	}
+}
+
+static void pt_event_del(struct perf_event *event, int mode)
+{
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
+	struct pt_buffer *buf;
+
+	pt_event_stop(event, PERF_EF_UPDATE);
+
+	buf = perf_get_aux(&pt->handle);
+
+	if (buf) {
+		if (buf->snapshot)
+			pt->handle.head =
+				local_xchg(&buf->data_size,
+					   buf->nr_pages << PAGE_SHIFT);
+		perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
+				    local_xchg(&buf->lost, 0));
+	}
+}
+
+static int pt_event_add(struct perf_event *event, int mode)
+{
+	struct pt_buffer *buf;
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
+	struct hw_perf_event *hwc = &event->hw;
+	int ret = -EBUSY;
+
+	if (pt->handle.event)
+		goto fail;
+
+	buf = perf_aux_output_begin(&pt->handle, event);
+	ret = -EINVAL;
+	if (!buf)
+		goto fail_stop;
+
+	pt_buffer_reset_offsets(buf, pt->handle.head);
+	if (!buf->snapshot) {
+		ret = pt_buffer_reset_markers(buf, &pt->handle);
+		if (ret)
+			goto fail_end_stop;
+	}
+
+	if (mode & PERF_EF_START) {
+		pt_event_start(event, 0);
+		ret = -EBUSY;
+		if (hwc->state == PERF_HES_STOPPED)
+			goto fail_end_stop;
+	} else {
+		hwc->state = PERF_HES_STOPPED;
+	}
+
+	return 0;
+
+fail_end_stop:
+	perf_aux_output_end(&pt->handle, 0, true);
+fail_stop:
+	hwc->state = PERF_HES_STOPPED;
+fail:
+	return ret;
+}
+
+static void pt_event_read(struct perf_event *event)
+{
+}
+
+static void pt_event_destroy(struct perf_event *event)
+{
+	x86_del_exclusive(x86_lbr_exclusive_pt);
+}
+
+static int pt_event_init(struct perf_event *event)
+{
+	if (event->attr.type != pt_pmu.pmu.type)
+		return -ENOENT;
+
+	if (!pt_event_valid(event))
+		return -EINVAL;
+
+	if (x86_add_exclusive(x86_lbr_exclusive_pt))
+		return -EBUSY;
+
+	event->destroy = pt_event_destroy;
+
+	return 0;
+}
+
+static __init int pt_init(void)
+{
+	int ret, cpu, prior_warn = 0;
+
+	BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE);
+	get_online_cpus();
+	for_each_online_cpu(cpu) {
+		u64 ctl;
+
+		ret = rdmsrl_safe_on_cpu(cpu, MSR_IA32_RTIT_CTL, &ctl);
+		if (!ret && (ctl & RTIT_CTL_TRACEEN))
+			prior_warn++;
+	}
+	put_online_cpus();
+
+	if (prior_warn) {
+		x86_add_exclusive(x86_lbr_exclusive_pt);
+		pr_warn("PT is enabled at boot time, doing nothing\n");
+
+		return -EBUSY;
+	}
+
+	ret = pt_pmu_hw_init();
+	if (ret)
+		return ret;
+
+	if (!pt_cap_get(PT_CAP_topa_output)) {
+		pr_warn("ToPA output is not supported on this CPU\n");
+		return -ENODEV;
+	}
+
+	if (!pt_cap_get(PT_CAP_topa_multiple_entries))
+		pt_pmu.pmu.capabilities =
+			PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
+
+	pt_pmu.pmu.capabilities	|= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
+	pt_pmu.pmu.attr_groups	= pt_attr_groups;
+	pt_pmu.pmu.task_ctx_nr	= perf_sw_context;
+	pt_pmu.pmu.event_init	= pt_event_init;
+	pt_pmu.pmu.add		= pt_event_add;
+	pt_pmu.pmu.del		= pt_event_del;
+	pt_pmu.pmu.start	= pt_event_start;
+	pt_pmu.pmu.stop		= pt_event_stop;
+	pt_pmu.pmu.read		= pt_event_read;
+	pt_pmu.pmu.setup_aux	= pt_buffer_setup_aux;
+	pt_pmu.pmu.free_aux	= pt_buffer_free_aux;
+	ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
+
+	return ret;
+}
+
+module_init(pt_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index c4bb8b8e5017..999289b94025 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -62,6 +62,14 @@
 #define RAPL_IDX_PP1_NRG_STAT	3	/* gpu */
 #define INTEL_RAPL_PP1		0x4	/* pseudo-encoding */
 
+#define NR_RAPL_DOMAINS         0x4
+static const char *rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
+	"pp0-core",
+	"package",
+	"dram",
+	"pp1-gpu",
+};
+
 /* Clients have PP0, PKG */
 #define RAPL_IDX_CLN	(1<<RAPL_IDX_PP0_NRG_STAT|\
 			 1<<RAPL_IDX_PKG_NRG_STAT|\
@@ -112,7 +120,6 @@ static struct perf_pmu_events_attr event_attr_##v = {			\
 
 struct rapl_pmu {
 	spinlock_t	 lock;
-	int		 hw_unit;  /* 1/2^hw_unit Joule */
 	int		 n_active; /* number of active events */
 	struct list_head active_list;
 	struct pmu	 *pmu; /* pointer to rapl_pmu_class */
@@ -120,6 +127,7 @@ struct rapl_pmu {
 	struct hrtimer   hrtimer;
 };
 
+static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;  /* 1/2^hw_unit Joule */
 static struct pmu rapl_pmu_class;
 static cpumask_t rapl_cpu_mask;
 static int rapl_cntr_mask;
@@ -127,6 +135,7 @@ static int rapl_cntr_mask;
 static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu);
 static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free);
 
+static struct x86_pmu_quirk *rapl_quirks;
 static inline u64 rapl_read_counter(struct perf_event *event)
 {
 	u64 raw;
@@ -134,15 +143,28 @@ static inline u64 rapl_read_counter(struct perf_event *event)
 	return raw;
 }
 
-static inline u64 rapl_scale(u64 v)
+#define rapl_add_quirk(func_)						\
+do {									\
+	static struct x86_pmu_quirk __quirk __initdata = {		\
+		.func = func_,						\
+	};								\
+	__quirk.next = rapl_quirks;					\
+	rapl_quirks = &__quirk;						\
+} while (0)
+
+static inline u64 rapl_scale(u64 v, int cfg)
 {
+	if (cfg > NR_RAPL_DOMAINS) {
+		pr_warn("invalid domain %d, failed to scale data\n", cfg);
+		return v;
+	}
 	/*
 	 * scale delta to smallest unit (1/2^32)
 	 * users must then scale back: count * 1/(1e9*2^32) to get Joules
 	 * or use ldexp(count, -32).
 	 * Watts = Joules/Time delta
 	 */
-	return v << (32 - __this_cpu_read(rapl_pmu)->hw_unit);
+	return v << (32 - rapl_hw_unit[cfg - 1]);
 }
 
 static u64 rapl_event_update(struct perf_event *event)
@@ -173,7 +195,7 @@ again:
 	delta = (new_raw_count << shift) - (prev_raw_count << shift);
 	delta >>= shift;
 
-	sdelta = rapl_scale(delta);
+	sdelta = rapl_scale(delta, event->hw.config);
 
 	local64_add(sdelta, &event->count);
 
@@ -546,12 +568,22 @@ static void rapl_cpu_init(int cpu)
 	cpumask_set_cpu(cpu, &rapl_cpu_mask);
 }
 
+static __init void rapl_hsw_server_quirk(void)
+{
+	/*
+	 * DRAM domain on HSW server has fixed energy unit which can be
+	 * different than the unit from power unit MSR.
+	 * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
+	 * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
+	 */
+	rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
+}
+
 static int rapl_cpu_prepare(int cpu)
 {
 	struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
 	int phys_id = topology_physical_package_id(cpu);
 	u64 ms;
-	u64 msr_rapl_power_unit_bits;
 
 	if (pmu)
 		return 0;
@@ -559,24 +591,13 @@ static int rapl_cpu_prepare(int cpu)
 	if (phys_id < 0)
 		return -1;
 
-	/* protect rdmsrl() to handle virtualization */
-	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
-		return -1;
-
 	pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
 	if (!pmu)
 		return -1;
-
 	spin_lock_init(&pmu->lock);
 
 	INIT_LIST_HEAD(&pmu->active_list);
 
-	/*
-	 * grab power unit as: 1/2^unit Joules
-	 *
-	 * we cache in local PMU instance
-	 */
-	pmu->hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
 	pmu->pmu = &rapl_pmu_class;
 
 	/*
@@ -586,8 +607,8 @@ static int rapl_cpu_prepare(int cpu)
 	 * divide interval by 2 to avoid lockstep (2 * 100)
 	 * if hw unit is 32, then we use 2 ms 1/200/2
 	 */
-	if (pmu->hw_unit < 32)
-		ms = (1000 / (2 * 100)) * (1ULL << (32 - pmu->hw_unit - 1));
+	if (rapl_hw_unit[0] < 32)
+		ms = (1000 / (2 * 100)) * (1ULL << (32 - rapl_hw_unit[0] - 1));
 	else
 		ms = 2;
 
@@ -655,6 +676,20 @@ static int rapl_cpu_notifier(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
+static int rapl_check_hw_unit(void)
+{
+	u64 msr_rapl_power_unit_bits;
+	int i;
+
+	/* protect rdmsrl() to handle virtualization */
+	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
+		return -1;
+	for (i = 0; i < NR_RAPL_DOMAINS; i++)
+		rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
+
+	return 0;
+}
+
 static const struct x86_cpu_id rapl_cpu_match[] = {
 	[0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
 	[1] = {},
@@ -664,6 +699,8 @@ static int __init rapl_pmu_init(void)
 {
 	struct rapl_pmu *pmu;
 	int cpu, ret;
+	struct x86_pmu_quirk *quirk;
+	int i;
 
 	/*
 	 * check for Intel processor family 6
@@ -678,6 +715,11 @@ static int __init rapl_pmu_init(void)
 		rapl_cntr_mask = RAPL_IDX_CLN;
 		rapl_pmu_events_group.attrs = rapl_events_cln_attr;
 		break;
+	case 63: /* Haswell-Server */
+		rapl_add_quirk(rapl_hsw_server_quirk);
+		rapl_cntr_mask = RAPL_IDX_SRV;
+		rapl_pmu_events_group.attrs = rapl_events_srv_attr;
+		break;
 	case 60: /* Haswell */
 	case 69: /* Haswell-Celeron */
 		rapl_cntr_mask = RAPL_IDX_HSW;
@@ -693,7 +735,13 @@ static int __init rapl_pmu_init(void)
 		/* unsupported */
 		return 0;
 	}
+	ret = rapl_check_hw_unit();
+	if (ret)
+		return ret;
 
+	/* run cpu model quirks */
+	for (quirk = rapl_quirks; quirk; quirk = quirk->next)
+		quirk->func();
 	cpu_notifier_register_begin();
 
 	for_each_online_cpu(cpu) {
@@ -714,14 +762,18 @@ static int __init rapl_pmu_init(void)
 
 	pmu = __this_cpu_read(rapl_pmu);
 
-	pr_info("RAPL PMU detected, hw unit 2^-%d Joules,"
+	pr_info("RAPL PMU detected,"
 		" API unit is 2^-32 Joules,"
 		" %d fixed counters"
 		" %llu ms ovfl timer\n",
-		pmu->hw_unit,
 		hweight32(rapl_cntr_mask),
 		ktime_to_ms(pmu->timer_interval));
-
+	for (i = 0; i < NR_RAPL_DOMAINS; i++) {
+		if (rapl_cntr_mask & (1 << i)) {
+			pr_info("hw unit of domain %s 2^-%d Joules\n",
+				rapl_domain_names[i], rapl_hw_unit[i]);
+		}
+	}
 out:
 	cpu_notifier_register_done();
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
index 21af6149edf2..12d9548457e7 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
@@ -1132,8 +1132,7 @@ static int snbep_pci2phy_map_init(int devid)
 		}
 	}
 
-	if (ubox_dev)
-		pci_dev_put(ubox_dev);
+	pci_dev_put(ubox_dev);
 
 	return err ? pcibios_err_to_errno(err) : 0;
 }
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 60639093d536..3d423a101fae 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -41,6 +41,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 		{ X86_FEATURE_HWP_ACT_WINDOW,	CR_EAX, 9, 0x00000006, 0 },
 		{ X86_FEATURE_HWP_EPP,		CR_EAX,10, 0x00000006, 0 },
 		{ X86_FEATURE_HWP_PKG_REQ,	CR_EAX,11, 0x00000006, 0 },
+		{ X86_FEATURE_INTEL_PT,		CR_EBX,25, 0x00000007, 0 },
 		{ X86_FEATURE_APERFMPERF,	CR_ECX, 0, 0x00000006, 0 },
 		{ X86_FEATURE_EPB,		CR_ECX, 3, 0x00000006, 0 },
 		{ X86_FEATURE_HW_PSTATE,	CR_EDX, 7, 0x80000007, 0 },
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index aceb2f90c716..c76d3e37c6e1 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -105,7 +105,7 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
 #ifdef CONFIG_X86_32
 	struct pt_regs fixed_regs;
 
-	if (!user_mode_vm(regs)) {
+	if (!user_mode(regs)) {
 		crash_fixup_ss_esp(&fixed_regs, regs);
 		regs = &fixed_regs;
 	}
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 3d3503351242..6367a780cc8c 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -286,13 +286,13 @@ static void __init x86_flattree_get_config(void)
 	initial_boot_params = dt = early_memremap(initial_dtb, map_len);
 	size = of_get_flat_dt_size();
 	if (map_len < size) {
-		early_iounmap(dt, map_len);
+		early_memunmap(dt, map_len);
 		initial_boot_params = dt = early_memremap(initial_dtb, size);
 		map_len = size;
 	}
 
 	unflatten_and_copy_device_tree();
-	early_iounmap(dt, map_len);
+	early_memunmap(dt, map_len);
 }
 #else
 static inline void x86_flattree_get_config(void) { }
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index cf3df1d8d039..9c30acfadae2 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -25,10 +25,12 @@ unsigned int code_bytes = 64;
 int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
 static int die_counter;
 
-static void printk_stack_address(unsigned long address, int reliable)
+static void printk_stack_address(unsigned long address, int reliable,
+		void *data)
 {
-	pr_cont(" [<%p>] %s%pB\n",
-		(void *)address, reliable ? "" : "? ", (void *)address);
+	printk("%s [<%p>] %s%pB\n",
+		(char *)data, (void *)address, reliable ? "" : "? ",
+		(void *)address);
 }
 
 void printk_address(unsigned long address)
@@ -155,8 +157,7 @@ static int print_trace_stack(void *data, char *name)
 static void print_trace_address(void *data, unsigned long addr, int reliable)
 {
 	touch_nmi_watchdog();
-	printk(data);
-	printk_stack_address(addr, reliable);
+	printk_stack_address(addr, reliable, data);
 }
 
 static const struct stacktrace_ops print_trace_ops = {
@@ -278,7 +279,7 @@ int __die(const char *str, struct pt_regs *regs, long err)
 	print_modules();
 	show_regs(regs);
 #ifdef CONFIG_X86_32
-	if (user_mode_vm(regs)) {
+	if (user_mode(regs)) {
 		sp = regs->sp;
 		ss = regs->ss & 0xffff;
 	} else {
@@ -307,7 +308,7 @@ void die(const char *str, struct pt_regs *regs, long err)
 	unsigned long flags = oops_begin();
 	int sig = SIGSEGV;
 
-	if (!user_mode_vm(regs))
+	if (!user_mode(regs))
 		report_bug(regs->ip, regs);
 
 	if (__die(str, regs, err))
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 5abd4cd4230c..464ffd69b92e 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -108,9 +108,12 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 	for (i = 0; i < kstack_depth_to_print; i++) {
 		if (kstack_end(stack))
 			break;
-		if (i && ((i % STACKSLOTS_PER_LINE) == 0))
-			pr_cont("\n");
-		pr_cont(" %08lx", *stack++);
+		if ((i % STACKSLOTS_PER_LINE) == 0) {
+			if (i != 0)
+				pr_cont("\n");
+			printk("%s %08lx", log_lvl, *stack++);
+		} else
+			pr_cont(" %08lx", *stack++);
 		touch_nmi_watchdog();
 	}
 	pr_cont("\n");
@@ -123,13 +126,13 @@ void show_regs(struct pt_regs *regs)
 	int i;
 
 	show_regs_print_info(KERN_EMERG);
-	__show_regs(regs, !user_mode_vm(regs));
+	__show_regs(regs, !user_mode(regs));
 
 	/*
 	 * When in-kernel, we also print out the stack and code at the
 	 * time of the fault..
 	 */
-	if (!user_mode_vm(regs)) {
+	if (!user_mode(regs)) {
 		unsigned int code_prologue = code_bytes * 43 / 64;
 		unsigned int code_len = code_bytes;
 		unsigned char c;
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index ff86f19b5758..5f1c6266eb30 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -280,12 +280,15 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 				pr_cont(" <EOI> ");
 			}
 		} else {
-		if (((long) stack & (THREAD_SIZE-1)) == 0)
+		if (kstack_end(stack))
 			break;
 		}
-		if (i && ((i % STACKSLOTS_PER_LINE) == 0))
-			pr_cont("\n");
-		pr_cont(" %016lx", *stack++);
+		if ((i % STACKSLOTS_PER_LINE) == 0) {
+			if (i != 0)
+				pr_cont("\n");
+			printk("%s %016lx", log_lvl, *stack++);
+		} else
+			pr_cont(" %016lx", *stack++);
 		touch_nmi_watchdog();
 	}
 	preempt_enable();
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 46201deee923..e2ce85db2283 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -149,6 +149,9 @@ static void __init e820_print_type(u32 type)
 	case E820_UNUSABLE:
 		printk(KERN_CONT "unusable");
 		break;
+	case E820_PRAM:
+		printk(KERN_CONT "persistent (type %u)", type);
+		break;
 	default:
 		printk(KERN_CONT "type %u", type);
 		break;
@@ -343,7 +346,7 @@ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
 		 * continue building up new bios map based on this
 		 * information
 		 */
-		if (current_type != last_type)	{
+		if (current_type != last_type || current_type == E820_PRAM) {
 			if (last_type != 0)	 {
 				new_bios[new_bios_entry].size =
 					change_point[chgidx]->addr - last_addr;
@@ -661,7 +664,7 @@ void __init parse_e820_ext(u64 phys_addr, u32 data_len)
 	extmap = (struct e820entry *)(sdata->data);
 	__append_e820_map(extmap, entries);
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
-	early_iounmap(sdata, data_len);
+	early_memunmap(sdata, data_len);
 	printk(KERN_INFO "e820: extended physical RAM map:\n");
 	e820_print_map("extended");
 }
@@ -688,6 +691,7 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn)
 			register_nosave_region(pfn, PFN_UP(ei->addr));
 
 		pfn = PFN_DOWN(ei->addr + ei->size);
+
 		if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
 			register_nosave_region(PFN_UP(ei->addr), pfn);
 
@@ -748,7 +752,7 @@ u64 __init early_reserve_e820(u64 size, u64 align)
 /*
  * Find the highest page frame number we have available
  */
-static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
+static unsigned long __init e820_end_pfn(unsigned long limit_pfn)
 {
 	int i;
 	unsigned long last_pfn = 0;
@@ -759,7 +763,11 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
 		unsigned long start_pfn;
 		unsigned long end_pfn;
 
-		if (ei->type != type)
+		/*
+		 * Persistent memory is accounted as ram for purposes of
+		 * establishing max_pfn and mem_map.
+		 */
+		if (ei->type != E820_RAM && ei->type != E820_PRAM)
 			continue;
 
 		start_pfn = ei->addr >> PAGE_SHIFT;
@@ -784,12 +792,12 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
 }
 unsigned long __init e820_end_of_ram_pfn(void)
 {
-	return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
+	return e820_end_pfn(MAX_ARCH_PFN);
 }
 
 unsigned long __init e820_end_of_low_ram_pfn(void)
 {
-	return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
+	return e820_end_pfn(1UL << (32-PAGE_SHIFT));
 }
 
 static void early_panic(char *msg)
@@ -866,6 +874,9 @@ static int __init parse_memmap_one(char *p)
 	} else if (*p == '$') {
 		start_at = memparse(p+1, &p);
 		e820_add_region(start_at, mem_size, E820_RESERVED);
+	} else if (*p == '!') {
+		start_at = memparse(p+1, &p);
+		e820_add_region(start_at, mem_size, E820_PRAM);
 	} else
 		e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
 
@@ -907,6 +918,7 @@ static inline const char *e820_type_to_string(int e820_type)
 	case E820_ACPI:	return "ACPI Tables";
 	case E820_NVS:	return "ACPI Non-volatile Storage";
 	case E820_UNUSABLE:	return "Unusable memory";
+	case E820_PRAM: return "Persistent RAM";
 	default:	return "reserved";
 	}
 }
@@ -940,7 +952,9 @@ void __init e820_reserve_resources(void)
 		 * pci device BAR resource and insert them later in
 		 * pcibios_resource_survey()
 		 */
-		if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) {
+		if (((e820.map[i].type != E820_RESERVED) &&
+		     (e820.map[i].type != E820_PRAM)) ||
+		     res->start < (1ULL<<20)) {
 			res->flags |= IORESOURCE_BUSY;
 			insert_resource(&iomem_resource, res);
 		}
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index a62536a1be88..89427d8d4fc5 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -95,20 +95,6 @@ static unsigned long early_serial_base = 0x3f8;  /* ttyS0 */
 #define DLL             0       /*  Divisor Latch Low         */
 #define DLH             1       /*  Divisor latch High        */
 
-static void mem32_serial_out(unsigned long addr, int offset, int value)
-{
-	uint32_t *vaddr = (uint32_t *)addr;
-	/* shift implied by pointer type */
-	writel(value, vaddr + offset);
-}
-
-static unsigned int mem32_serial_in(unsigned long addr, int offset)
-{
-	uint32_t *vaddr = (uint32_t *)addr;
-	/* shift implied by pointer type */
-	return readl(vaddr + offset);
-}
-
 static unsigned int io_serial_in(unsigned long addr, int offset)
 {
 	return inb(addr + offset);
@@ -205,6 +191,20 @@ static __init void early_serial_init(char *s)
 }
 
 #ifdef CONFIG_PCI
+static void mem32_serial_out(unsigned long addr, int offset, int value)
+{
+	u32 *vaddr = (u32 *)addr;
+	/* shift implied by pointer type */
+	writel(value, vaddr + offset);
+}
+
+static unsigned int mem32_serial_in(unsigned long addr, int offset)
+{
+	u32 *vaddr = (u32 *)addr;
+	/* shift implied by pointer type */
+	return readl(vaddr + offset);
+}
+
 /*
  * early_pci_serial_init()
  *
@@ -217,8 +217,8 @@ static __init void early_pci_serial_init(char *s)
 	unsigned divisor;
 	unsigned long baud = DEFAULT_BAUD;
 	u8 bus, slot, func;
-	uint32_t classcode, bar0;
-	uint16_t cmdreg;
+	u32 classcode, bar0;
+	u16 cmdreg;
 	char *e;
 
 
@@ -375,12 +375,6 @@ static int __init setup_early_printk(char *buf)
 		if (!strncmp(buf, "xen", 3))
 			early_console_register(&xenboot_console, keep);
 #endif
-#ifdef CONFIG_EARLY_PRINTK_INTEL_MID
-		if (!strncmp(buf, "hsu", 3)) {
-			hsu_early_console_init(buf + 3);
-			early_console_register(&early_hsu_console, keep);
-		}
-#endif
 #ifdef CONFIG_EARLY_PRINTK_EFI
 		if (!strncmp(buf, "efi", 3))
 			early_console_register(&early_efi_console, keep);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 31e2d5bf3e38..1c309763e321 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -395,10 +395,13 @@ sysenter_past_esp:
 	/*CFI_REL_OFFSET cs, 0*/
 	/*
 	 * Push current_thread_info()->sysenter_return to the stack.
-	 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
-	 * pushed above; +8 corresponds to copy_thread's esp0 setting.
+	 * A tiny bit of offset fixup is necessary: TI_sysenter_return
+	 * is relative to thread_info, which is at the bottom of the
+	 * kernel stack page.  4*4 means the 4 words pushed above;
+	 * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack;
+	 * and THREAD_SIZE takes us to the bottom.
 	 */
-	pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp)
+	pushl_cfi ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp)
 	CFI_REL_OFFSET eip, 0
 
 	pushl_cfi %eax
@@ -432,7 +435,7 @@ sysenter_after_call:
 	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
 	testl $_TIF_ALLWORK_MASK, %ecx
-	jne sysexit_audit
+	jnz sysexit_audit
 sysenter_exit:
 /* if something modifies registers it must also disable sysexit */
 	movl PT_EIP(%esp), %edx
@@ -460,7 +463,7 @@ sysenter_audit:
 
 sysexit_audit:
 	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
-	jne syscall_exit_work
+	jnz syscall_exit_work
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_ANY)
 	movl %eax,%edx		/* second arg, syscall return value */
@@ -472,7 +475,7 @@ sysexit_audit:
 	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
 	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
-	jne syscall_exit_work
+	jnz syscall_exit_work
 	movl PT_EAX(%esp),%eax	/* reload syscall return value */
 	jmp sysenter_exit
 #endif
@@ -510,7 +513,7 @@ syscall_exit:
 	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
 	testl $_TIF_ALLWORK_MASK, %ecx	# current->work
-	jne syscall_exit_work
+	jnz syscall_exit_work
 
 restore_all:
 	TRACE_IRQS_IRET
@@ -612,7 +615,7 @@ work_notifysig:				# deal with pending signals and
 #ifdef CONFIG_VM86
 	testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
 	movl %esp, %eax
-	jne work_notifysig_v86		# returning to kernel-space or
+	jnz work_notifysig_v86		# returning to kernel-space or
 					# vm86-space
 1:
 #else
@@ -720,43 +723,22 @@ END(sysenter_badsys)
 .endm
 
 /*
- * Build the entry stubs and pointer table with some assembler magic.
- * We pack 7 stubs into a single 32-byte chunk, which will fit in a
- * single cache line on all modern x86 implementations.
+ * Build the entry stubs with some assembler magic.
+ * We pack 1 stub into every 8-byte block.
  */
-.section .init.rodata,"a"
-ENTRY(interrupt)
-.section .entry.text, "ax"
-	.p2align 5
-	.p2align CONFIG_X86_L1_CACHE_SHIFT
+	.align 8
 ENTRY(irq_entries_start)
 	RING0_INT_FRAME
-vector=FIRST_EXTERNAL_VECTOR
-.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
-	.balign 32
-  .rept	7
-    .if vector < FIRST_SYSTEM_VECTOR
-      .if vector <> FIRST_EXTERNAL_VECTOR
+    vector=FIRST_EXTERNAL_VECTOR
+    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+	pushl_cfi $(~vector+0x80)	/* Note: always in signed byte range */
+    vector=vector+1
+	jmp	common_interrupt
 	CFI_ADJUST_CFA_OFFSET -4
-      .endif
-1:	pushl_cfi $(~vector+0x80)	/* Note: always in signed byte range */
-      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
-	jmp 2f
-      .endif
-      .previous
-	.long 1b
-      .section .entry.text, "ax"
-vector=vector+1
-    .endif
-  .endr
-2:	jmp common_interrupt
-.endr
+	.align	8
+    .endr
 END(irq_entries_start)
 
-.previous
-END(interrupt)
-.previous
-
 /*
  * the CPU automatically disables interrupts when executing an IRQ vector,
  * so IRQ-flags tracing has to follow that:
@@ -816,15 +798,9 @@ ENTRY(simd_coprocessor_error)
 	pushl_cfi $0
 #ifdef CONFIG_X86_INVD_BUG
 	/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
-661:	pushl_cfi $do_general_protection
-662:
-.section .altinstructions,"a"
-	altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f
-.previous
-.section .altinstr_replacement,"ax"
-663:	pushl $do_simd_coprocessor_error
-664:
-.previous
+	ALTERNATIVE "pushl_cfi $do_general_protection",	\
+		    "pushl $do_simd_coprocessor_error", \
+		    X86_FEATURE_XMM
 #else
 	pushl_cfi $do_simd_coprocessor_error
 #endif
@@ -1240,20 +1216,13 @@ error_code:
 	/*CFI_REL_OFFSET es, 0*/
 	pushl_cfi %ds
 	/*CFI_REL_OFFSET ds, 0*/
-	pushl_cfi %eax
-	CFI_REL_OFFSET eax, 0
-	pushl_cfi %ebp
-	CFI_REL_OFFSET ebp, 0
-	pushl_cfi %edi
-	CFI_REL_OFFSET edi, 0
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
-	pushl_cfi %edx
-	CFI_REL_OFFSET edx, 0
-	pushl_cfi %ecx
-	CFI_REL_OFFSET ecx, 0
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
+	pushl_cfi_reg eax
+	pushl_cfi_reg ebp
+	pushl_cfi_reg edi
+	pushl_cfi_reg esi
+	pushl_cfi_reg edx
+	pushl_cfi_reg ecx
+	pushl_cfi_reg ebx
 	cld
 	movl $(__KERNEL_PERCPU), %ecx
 	movl %ecx, %fs
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1d74d161687c..c7b238494b31 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -14,27 +14,14 @@
  * NOTE: This code handles signal-recognition, which happens every time
  * after an interrupt and after each system call.
  *
- * Normal syscalls and interrupts don't save a full stack frame, this is
- * only done for syscall tracing, signals or fork/exec et.al.
- *
  * A note on terminology:
- * - top of stack: Architecture defined interrupt frame from SS to RIP
+ * - iret frame: Architecture defined interrupt frame from SS to RIP
  * at the top of the kernel process stack.
- * - partial stack frame: partially saved registers up to R11.
- * - full stack frame: Like partial stack frame, but all register saved.
  *
  * Some macro usage:
  * - CFI macros are used to generate dwarf2 unwind information for better
  * backtraces. They don't change any code.
- * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
- * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
- * There are unfortunately lots of special cases where some registers
- * not touched. The macro is a big mess that should be cleaned up.
- * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
- * Gives a full stack frame.
  * - ENTRY/END Define functions in the symbol table.
- * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
- * frame that is otherwise undefined after a SYSCALL
  * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
  * - idtentry - Define exception entry points.
  */
@@ -70,10 +57,6 @@
 	.section .entry.text, "ax"
 
 
-#ifndef CONFIG_PREEMPT
-#define retint_kernel retint_restore_args
-#endif
-
 #ifdef CONFIG_PARAVIRT
 ENTRY(native_usergs_sysret64)
 	swapgs
@@ -82,9 +65,9 @@ ENDPROC(native_usergs_sysret64)
 #endif /* CONFIG_PARAVIRT */
 
 
-.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
+.macro TRACE_IRQS_IRETQ
 #ifdef CONFIG_TRACE_IRQFLAGS
-	bt   $9,EFLAGS-\offset(%rsp)	/* interrupts off? */
+	bt   $9,EFLAGS(%rsp)	/* interrupts off? */
 	jnc  1f
 	TRACE_IRQS_ON
 1:
@@ -116,8 +99,8 @@ ENDPROC(native_usergs_sysret64)
 	call debug_stack_reset
 .endm
 
-.macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET
-	bt   $9,EFLAGS-\offset(%rsp)	/* interrupts off? */
+.macro TRACE_IRQS_IRETQ_DEBUG
+	bt   $9,EFLAGS(%rsp)	/* interrupts off? */
 	jnc  1f
 	TRACE_IRQS_ON_DEBUG
 1:
@@ -130,34 +113,7 @@ ENDPROC(native_usergs_sysret64)
 #endif
 
 /*
- * C code is not supposed to know about undefined top of stack. Every time
- * a C function with an pt_regs argument is called from the SYSCALL based
- * fast path FIXUP_TOP_OF_STACK is needed.
- * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
- * manipulation.
- */
-
-	/* %rsp:at FRAMEEND */
-	.macro FIXUP_TOP_OF_STACK tmp offset=0
-	movq PER_CPU_VAR(old_rsp),\tmp
-	movq \tmp,RSP+\offset(%rsp)
-	movq $__USER_DS,SS+\offset(%rsp)
-	movq $__USER_CS,CS+\offset(%rsp)
-	movq RIP+\offset(%rsp),\tmp  /* get rip */
-	movq \tmp,RCX+\offset(%rsp)  /* copy it to rcx as sysret would do */
-	movq R11+\offset(%rsp),\tmp  /* get eflags */
-	movq \tmp,EFLAGS+\offset(%rsp)
-	.endm
-
-	.macro RESTORE_TOP_OF_STACK tmp offset=0
-	movq RSP+\offset(%rsp),\tmp
-	movq \tmp,PER_CPU_VAR(old_rsp)
-	movq EFLAGS+\offset(%rsp),\tmp
-	movq \tmp,R11+\offset(%rsp)
-	.endm
-
-/*
- * initial frame state for interrupts (and exceptions without error code)
+ * empty frame
  */
 	.macro EMPTY_FRAME start=1 offset=0
 	.if \start
@@ -173,12 +129,12 @@ ENDPROC(native_usergs_sysret64)
  * initial frame state for interrupts (and exceptions without error code)
  */
 	.macro INTR_FRAME start=1 offset=0
-	EMPTY_FRAME \start, SS+8+\offset-RIP
-	/*CFI_REL_OFFSET ss, SS+\offset-RIP*/
-	CFI_REL_OFFSET rsp, RSP+\offset-RIP
-	/*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
-	/*CFI_REL_OFFSET cs, CS+\offset-RIP*/
-	CFI_REL_OFFSET rip, RIP+\offset-RIP
+	EMPTY_FRAME \start, 5*8+\offset
+	/*CFI_REL_OFFSET ss, 4*8+\offset*/
+	CFI_REL_OFFSET rsp, 3*8+\offset
+	/*CFI_REL_OFFSET rflags, 2*8+\offset*/
+	/*CFI_REL_OFFSET cs, 1*8+\offset*/
+	CFI_REL_OFFSET rip, 0*8+\offset
 	.endm
 
 /*
@@ -186,30 +142,23 @@ ENDPROC(native_usergs_sysret64)
  * with vector already pushed)
  */
 	.macro XCPT_FRAME start=1 offset=0
-	INTR_FRAME \start, RIP+\offset-ORIG_RAX
-	.endm
-
-/*
- * frame that enables calling into C.
- */
-	.macro PARTIAL_FRAME start=1 offset=0
-	XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
-	CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
-	CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
-	CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
-	CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
-	CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
-	CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
-	CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
-	CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
-	CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
+	INTR_FRAME \start, 1*8+\offset
 	.endm
 
 /*
  * frame that enables passing a complete pt_regs to a C function.
  */
 	.macro DEFAULT_FRAME start=1 offset=0
-	PARTIAL_FRAME \start, R11+\offset-R15
+	XCPT_FRAME \start, ORIG_RAX+\offset
+	CFI_REL_OFFSET rdi, RDI+\offset
+	CFI_REL_OFFSET rsi, RSI+\offset
+	CFI_REL_OFFSET rdx, RDX+\offset
+	CFI_REL_OFFSET rcx, RCX+\offset
+	CFI_REL_OFFSET rax, RAX+\offset
+	CFI_REL_OFFSET r8, R8+\offset
+	CFI_REL_OFFSET r9, R9+\offset
+	CFI_REL_OFFSET r10, R10+\offset
+	CFI_REL_OFFSET r11, R11+\offset
 	CFI_REL_OFFSET rbx, RBX+\offset
 	CFI_REL_OFFSET rbp, RBP+\offset
 	CFI_REL_OFFSET r12, R12+\offset
@@ -218,105 +167,30 @@ ENDPROC(native_usergs_sysret64)
 	CFI_REL_OFFSET r15, R15+\offset
 	.endm
 
-ENTRY(save_paranoid)
-	XCPT_FRAME 1 RDI+8
-	cld
-	movq %rdi, RDI+8(%rsp)
-	movq %rsi, RSI+8(%rsp)
-	movq_cfi rdx, RDX+8
-	movq_cfi rcx, RCX+8
-	movq_cfi rax, RAX+8
-	movq %r8, R8+8(%rsp)
-	movq %r9, R9+8(%rsp)
-	movq %r10, R10+8(%rsp)
-	movq %r11, R11+8(%rsp)
-	movq_cfi rbx, RBX+8
-	movq %rbp, RBP+8(%rsp)
-	movq %r12, R12+8(%rsp)
-	movq %r13, R13+8(%rsp)
-	movq %r14, R14+8(%rsp)
-	movq %r15, R15+8(%rsp)
-	movl $1,%ebx
-	movl $MSR_GS_BASE,%ecx
-	rdmsr
-	testl %edx,%edx
-	js 1f	/* negative -> in kernel */
-	SWAPGS
-	xorl %ebx,%ebx
-1:	ret
-	CFI_ENDPROC
-END(save_paranoid)
-
 /*
- * A newly forked process directly context switches into this address.
+ * 64bit SYSCALL instruction entry. Up to 6 arguments in registers.
  *
- * rdi: prev task we switched from
- */
-ENTRY(ret_from_fork)
-	DEFAULT_FRAME
-
-	LOCK ; btr $TIF_FORK,TI_flags(%r8)
-
-	pushq_cfi $0x0002
-	popfq_cfi				# reset kernel eflags
-
-	call schedule_tail			# rdi: 'prev' task parameter
-
-	GET_THREAD_INFO(%rcx)
-
-	RESTORE_REST
-
-	testl $3, CS-ARGOFFSET(%rsp)		# from kernel_thread?
-	jz   1f
-
-	/*
-	 * By the time we get here, we have no idea whether our pt_regs,
-	 * ti flags, and ti status came from the 64-bit SYSCALL fast path,
-	 * the slow path, or one of the ia32entry paths.
-	 * Use int_ret_from_sys_call to return, since it can safely handle
-	 * all of the above.
-	 */
-	jmp  int_ret_from_sys_call
-
-1:
-	subq $REST_SKIP, %rsp	# leave space for volatiles
-	CFI_ADJUST_CFA_OFFSET	REST_SKIP
-	movq %rbp, %rdi
-	call *%rbx
-	movl $0, RAX(%rsp)
-	RESTORE_REST
-	jmp int_ret_from_sys_call
-	CFI_ENDPROC
-END(ret_from_fork)
-
-/*
- * System call entry. Up to 6 arguments in registers are supported.
+ * 64bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
+ * then loads new ss, cs, and rip from previously programmed MSRs.
+ * rflags gets masked by a value from another MSR (so CLD and CLAC
+ * are not needed). SYSCALL does not save anything on the stack
+ * and does not change rsp.
  *
- * SYSCALL does not save anything on the stack and does not change the
- * stack pointer.  However, it does mask the flags register for us, so
- * CLD and CLAC are not needed.
- */
-
-/*
- * Register setup:
+ * Registers on entry:
  * rax  system call number
+ * rcx  return address
+ * r11  saved rflags (note: r11 is callee-clobbered register in C ABI)
  * rdi  arg0
- * rcx  return address for syscall/sysret, C arg3
  * rsi  arg1
  * rdx  arg2
- * r10  arg3 	(--> moved to rcx for C)
+ * r10  arg3 (needs to be moved to rcx to conform to C ABI)
  * r8   arg4
  * r9   arg5
- * r11  eflags for syscall/sysret, temporary for C
- * r12-r15,rbp,rbx saved by C code, not touched.
+ * (note: r12-r15,rbp,rbx are callee-preserved in C ABI)
  *
- * Interrupts are off on entry.
  * Only called from user space.
  *
- * XXX	if we had a free scratch register we could save the RSP into the stack frame
- *      and report it properly in ps. Unfortunately we haven't.
- *
- * When user can change the frames always force IRET. That is because
+ * When user can change pt_regs->foo always force IRET. That is because
  * it deals with uncanonical addresses better. SYSRET has trouble
  * with them due to bugs in both AMD and Intel CPUs.
  */
@@ -324,9 +198,15 @@ END(ret_from_fork)
 ENTRY(system_call)
 	CFI_STARTPROC	simple
 	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,KERNEL_STACK_OFFSET
+	CFI_DEF_CFA	rsp,0
 	CFI_REGISTER	rip,rcx
 	/*CFI_REGISTER	rflags,r11*/
+
+	/*
+	 * Interrupts are off on entry.
+	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+	 * it is too small to ever cause noticeable irq latency.
+	 */
 	SWAPGS_UNSAFE_STACK
 	/*
 	 * A hypervisor implementation might want to use a label
@@ -335,18 +215,38 @@ ENTRY(system_call)
 	 */
 GLOBAL(system_call_after_swapgs)
 
-	movq	%rsp,PER_CPU_VAR(old_rsp)
+	movq	%rsp,PER_CPU_VAR(rsp_scratch)
 	movq	PER_CPU_VAR(kernel_stack),%rsp
+
+	/* Construct struct pt_regs on stack */
+	pushq_cfi $__USER_DS			/* pt_regs->ss */
+	pushq_cfi PER_CPU_VAR(rsp_scratch)	/* pt_regs->sp */
 	/*
-	 * No need to follow this irqs off/on section - it's straight
-	 * and short:
+	 * Re-enable interrupts.
+	 * We use 'rsp_scratch' as a scratch space, hence irq-off block above
+	 * must execute atomically in the face of possible interrupt-driven
+	 * task preemption. We must enable interrupts only after we're done
+	 * with using rsp_scratch:
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	SAVE_ARGS 8, 0, rax_enosys=1
-	movq_cfi rax,(ORIG_RAX-ARGOFFSET)
-	movq  %rcx,RIP-ARGOFFSET(%rsp)
-	CFI_REL_OFFSET rip,RIP-ARGOFFSET
-	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	pushq_cfi	%r11			/* pt_regs->flags */
+	pushq_cfi	$__USER_CS		/* pt_regs->cs */
+	pushq_cfi	%rcx			/* pt_regs->ip */
+	CFI_REL_OFFSET rip,0
+	pushq_cfi_reg	rax			/* pt_regs->orig_ax */
+	pushq_cfi_reg	rdi			/* pt_regs->di */
+	pushq_cfi_reg	rsi			/* pt_regs->si */
+	pushq_cfi_reg	rdx			/* pt_regs->dx */
+	pushq_cfi_reg	rcx			/* pt_regs->cx */
+	pushq_cfi	$-ENOSYS		/* pt_regs->ax */
+	pushq_cfi_reg	r8			/* pt_regs->r8 */
+	pushq_cfi_reg	r9			/* pt_regs->r9 */
+	pushq_cfi_reg	r10			/* pt_regs->r10 */
+	pushq_cfi_reg	r11			/* pt_regs->r11 */
+	sub	$(6*8),%rsp /* pt_regs->bp,bx,r12-15 not saved */
+	CFI_ADJUST_CFA_OFFSET 6*8
+
+	testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	jnz tracesys
 system_call_fastpath:
 #if __SYSCALL_MASK == ~0
@@ -355,82 +255,96 @@ system_call_fastpath:
 	andl $__SYSCALL_MASK,%eax
 	cmpl $__NR_syscall_max,%eax
 #endif
-	ja ret_from_sys_call  /* and return regs->ax */
+	ja	1f	/* return -ENOSYS (already in pt_regs->ax) */
 	movq %r10,%rcx
-	call *sys_call_table(,%rax,8)  # XXX:	 rip relative
-	movq %rax,RAX-ARGOFFSET(%rsp)
+	call *sys_call_table(,%rax,8)
+	movq %rax,RAX(%rsp)
+1:
 /*
- * Syscall return path ending with SYSRET (fast path)
- * Has incomplete stack frame and undefined top of stack.
+ * Syscall return path ending with SYSRET (fast path).
+ * Has incompletely filled pt_regs.
  */
-ret_from_sys_call:
-	testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	jnz int_ret_from_sys_call_fixup	/* Go the the slow path */
-
 	LOCKDEP_SYS_EXIT
+	/*
+	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+	 * it is too small to ever cause noticeable irq latency.
+	 */
 	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF
-	CFI_REMEMBER_STATE
+
 	/*
-	 * sysretq will re-enable interrupts:
+	 * We must check ti flags with interrupts (or at least preemption)
+	 * off because we must *never* return to userspace without
+	 * processing exit work that is enqueued if we're preempted here.
+	 * In particular, returning to userspace with any of the one-shot
+	 * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is
+	 * very bad.
 	 */
-	TRACE_IRQS_ON
-	movq RIP-ARGOFFSET(%rsp),%rcx
+	testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+	jnz int_ret_from_sys_call_irqs_off	/* Go to the slow path */
+
+	CFI_REMEMBER_STATE
+
+	RESTORE_C_REGS_EXCEPT_RCX_R11
+	movq	RIP(%rsp),%rcx
 	CFI_REGISTER	rip,rcx
-	RESTORE_ARGS 1,-ARG_SKIP,0
+	movq	EFLAGS(%rsp),%r11
 	/*CFI_REGISTER	rflags,r11*/
-	movq	PER_CPU_VAR(old_rsp), %rsp
+	movq	RSP(%rsp),%rsp
+	/*
+	 * 64bit SYSRET restores rip from rcx,
+	 * rflags from r11 (but RF and VM bits are forced to 0),
+	 * cs and ss are loaded from MSRs.
+	 * Restoration of rflags re-enables interrupts.
+	 */
 	USERGS_SYSRET64
 
 	CFI_RESTORE_STATE
 
-int_ret_from_sys_call_fixup:
-	FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
-	jmp int_ret_from_sys_call
-
-	/* Do syscall tracing */
+	/* Do syscall entry tracing */
 tracesys:
-	leaq -REST_SKIP(%rsp), %rdi
-	movq $AUDIT_ARCH_X86_64, %rsi
+	movq %rsp, %rdi
+	movl $AUDIT_ARCH_X86_64, %esi
 	call syscall_trace_enter_phase1
 	test %rax, %rax
 	jnz tracesys_phase2		/* if needed, run the slow path */
-	LOAD_ARGS 0			/* else restore clobbered regs */
+	RESTORE_C_REGS_EXCEPT_RAX	/* else restore clobbered regs */
+	movq ORIG_RAX(%rsp), %rax
 	jmp system_call_fastpath	/*      and return to the fast path */
 
 tracesys_phase2:
-	SAVE_REST
-	FIXUP_TOP_OF_STACK %rdi
+	SAVE_EXTRA_REGS
 	movq %rsp, %rdi
-	movq $AUDIT_ARCH_X86_64, %rsi
+	movl $AUDIT_ARCH_X86_64, %esi
 	movq %rax,%rdx
 	call syscall_trace_enter_phase2
 
 	/*
-	 * Reload arg registers from stack in case ptrace changed them.
+	 * Reload registers from stack in case ptrace changed them.
 	 * We don't reload %rax because syscall_trace_entry_phase2() returned
 	 * the value it wants us to use in the table lookup.
 	 */
-	LOAD_ARGS ARGOFFSET, 1
-	RESTORE_REST
+	RESTORE_C_REGS_EXCEPT_RAX
+	RESTORE_EXTRA_REGS
 #if __SYSCALL_MASK == ~0
 	cmpq $__NR_syscall_max,%rax
 #else
 	andl $__SYSCALL_MASK,%eax
 	cmpl $__NR_syscall_max,%eax
 #endif
-	ja   int_ret_from_sys_call	/* RAX(%rsp) is already set */
+	ja	1f	/* return -ENOSYS (already in pt_regs->ax) */
 	movq %r10,%rcx	/* fixup for C */
 	call *sys_call_table(,%rax,8)
-	movq %rax,RAX-ARGOFFSET(%rsp)
-	/* Use IRET because user could have changed frame */
+	movq %rax,RAX(%rsp)
+1:
+	/* Use IRET because user could have changed pt_regs->foo */
 
 /*
  * Syscall return path ending with IRET.
- * Has correct top of stack, but partial stack frame.
+ * Has correct iret frame.
  */
 GLOBAL(int_ret_from_sys_call)
 	DISABLE_INTERRUPTS(CLBR_NONE)
+int_ret_from_sys_call_irqs_off: /* jumps come here from the irqs-off SYSRET path */
 	TRACE_IRQS_OFF
 	movl $_TIF_ALLWORK_MASK,%edi
 	/* edi:	mask to check */
@@ -440,8 +354,8 @@ GLOBAL(int_with_check)
 	movl TI_flags(%rcx),%edx
 	andl %edi,%edx
 	jnz   int_careful
-	andl    $~TS_COMPAT,TI_status(%rcx)
-	jmp   retint_swapgs
+	andl	$~TS_COMPAT,TI_status(%rcx)
+	jmp	syscall_return
 
 	/* Either reschedule or signal or syscall exit tracking needed. */
 	/* First do a reschedule test. */
@@ -458,12 +372,11 @@ int_careful:
 	TRACE_IRQS_OFF
 	jmp int_with_check
 
-	/* handle signals and tracing -- both require a full stack frame */
+	/* handle signals and tracing -- both require a full pt_regs */
 int_very_careful:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
-int_check_syscall_exit_work:
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	/* Check for syscall exit trace */
 	testl $_TIF_WORK_SYSCALL_EXIT,%edx
 	jz int_signal
@@ -482,86 +395,192 @@ int_signal:
 	call do_notify_resume
 1:	movl $_TIF_WORK_MASK,%edi
 int_restore_rest:
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	jmp int_with_check
+
+syscall_return:
+	/* The IRETQ could re-enable interrupts: */
+	DISABLE_INTERRUPTS(CLBR_ANY)
+	TRACE_IRQS_IRETQ
+
+	/*
+	 * Try to use SYSRET instead of IRET if we're returning to
+	 * a completely clean 64-bit userspace context.
+	 */
+	movq RCX(%rsp),%rcx
+	cmpq %rcx,RIP(%rsp)		/* RCX == RIP */
+	jne opportunistic_sysret_failed
+
+	/*
+	 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
+	 * in kernel space.  This essentially lets the user take over
+	 * the kernel, since userspace controls RSP.  It's not worth
+	 * testing for canonicalness exactly -- this check detects any
+	 * of the 17 high bits set, which is true for non-canonical
+	 * or kernel addresses.  (This will pessimize vsyscall=native.
+	 * Big deal.)
+	 *
+	 * If virtual addresses ever become wider, this will need
+	 * to be updated to remain correct on both old and new CPUs.
+	 */
+	.ifne __VIRTUAL_MASK_SHIFT - 47
+	.error "virtual address width changed -- SYSRET checks need update"
+	.endif
+	shr $__VIRTUAL_MASK_SHIFT, %rcx
+	jnz opportunistic_sysret_failed
+
+	cmpq $__USER_CS,CS(%rsp)	/* CS must match SYSRET */
+	jne opportunistic_sysret_failed
+
+	movq R11(%rsp),%r11
+	cmpq %r11,EFLAGS(%rsp)		/* R11 == RFLAGS */
+	jne opportunistic_sysret_failed
+
+	/*
+	 * SYSRET can't restore RF.  SYSRET can restore TF, but unlike IRET,
+	 * restoring TF results in a trap from userspace immediately after
+	 * SYSRET.  This would cause an infinite loop whenever #DB happens
+	 * with register state that satisfies the opportunistic SYSRET
+	 * conditions.  For example, single-stepping this user code:
+	 *
+	 *           movq $stuck_here,%rcx
+	 *           pushfq
+	 *           popq %r11
+	 *   stuck_here:
+	 *
+	 * would never get past 'stuck_here'.
+	 */
+	testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
+	jnz opportunistic_sysret_failed
+
+	/* nothing to check for RSP */
+
+	cmpq $__USER_DS,SS(%rsp)	/* SS must match SYSRET */
+	jne opportunistic_sysret_failed
+
+	/*
+	 * We win!  This label is here just for ease of understanding
+	 * perf profiles.  Nothing jumps here.
+	 */
+syscall_return_via_sysret:
+	CFI_REMEMBER_STATE
+	/* r11 is already restored (see code above) */
+	RESTORE_C_REGS_EXCEPT_R11
+	movq RSP(%rsp),%rsp
+	USERGS_SYSRET64
+	CFI_RESTORE_STATE
+
+opportunistic_sysret_failed:
+	SWAPGS
+	jmp	restore_c_regs_and_iret
 	CFI_ENDPROC
 END(system_call)
 
+
 	.macro FORK_LIKE func
 ENTRY(stub_\func)
 	CFI_STARTPROC
-	popq	%r11			/* save return address */
-	PARTIAL_FRAME 0
-	SAVE_REST
-	pushq	%r11			/* put it back on stack */
-	FIXUP_TOP_OF_STACK %r11, 8
-	DEFAULT_FRAME 0 8		/* offset 8: return address */
-	call sys_\func
-	RESTORE_TOP_OF_STACK %r11, 8
-	ret $REST_SKIP		/* pop extended registers */
+	DEFAULT_FRAME 0, 8		/* offset 8: return address */
+	SAVE_EXTRA_REGS 8
+	jmp sys_\func
 	CFI_ENDPROC
 END(stub_\func)
 	.endm
 
-	.macro FIXED_FRAME label,func
-ENTRY(\label)
-	CFI_STARTPROC
-	PARTIAL_FRAME 0 8		/* offset 8: return address */
-	FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET
-	call \func
-	RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET
-	ret
-	CFI_ENDPROC
-END(\label)
-	.endm
-
 	FORK_LIKE  clone
 	FORK_LIKE  fork
 	FORK_LIKE  vfork
-	FIXED_FRAME stub_iopl, sys_iopl
 
 ENTRY(stub_execve)
 	CFI_STARTPROC
-	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
-	FIXUP_TOP_OF_STACK %r11
-	call sys_execve
-	movq %rax,RAX(%rsp)
-	RESTORE_REST
-	jmp int_ret_from_sys_call
+	DEFAULT_FRAME 0, 8
+	call	sys_execve
+return_from_execve:
+	testl	%eax, %eax
+	jz	1f
+	/* exec failed, can use fast SYSRET code path in this case */
+	ret
+1:
+	/* must use IRET code path (pt_regs->cs may have changed) */
+	addq	$8, %rsp
+	CFI_ADJUST_CFA_OFFSET -8
+	ZERO_EXTRA_REGS
+	movq	%rax,RAX(%rsp)
+	jmp	int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_execve)
-
-ENTRY(stub_execveat)
+/*
+ * Remaining execve stubs are only 7 bytes long.
+ * ENTRY() often aligns to 16 bytes, which in this case has no benefits.
+ */
+	.align	8
+GLOBAL(stub_execveat)
 	CFI_STARTPROC
-	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
-	FIXUP_TOP_OF_STACK %r11
-	call sys_execveat
-	RESTORE_TOP_OF_STACK %r11
-	movq %rax,RAX(%rsp)
-	RESTORE_REST
-	jmp int_ret_from_sys_call
+	DEFAULT_FRAME 0, 8
+	call	sys_execveat
+	jmp	return_from_execve
 	CFI_ENDPROC
 END(stub_execveat)
 
+#ifdef CONFIG_X86_X32_ABI
+	.align	8
+GLOBAL(stub_x32_execve)
+	CFI_STARTPROC
+	DEFAULT_FRAME 0, 8
+	call	compat_sys_execve
+	jmp	return_from_execve
+	CFI_ENDPROC
+END(stub_x32_execve)
+	.align	8
+GLOBAL(stub_x32_execveat)
+	CFI_STARTPROC
+	DEFAULT_FRAME 0, 8
+	call	compat_sys_execveat
+	jmp	return_from_execve
+	CFI_ENDPROC
+END(stub_x32_execveat)
+#endif
+
+#ifdef CONFIG_IA32_EMULATION
+	.align	8
+GLOBAL(stub32_execve)
+	CFI_STARTPROC
+	call	compat_sys_execve
+	jmp	return_from_execve
+	CFI_ENDPROC
+END(stub32_execve)
+	.align	8
+GLOBAL(stub32_execveat)
+	CFI_STARTPROC
+	call	compat_sys_execveat
+	jmp	return_from_execve
+	CFI_ENDPROC
+END(stub32_execveat)
+#endif
+
 /*
  * sigreturn is special because it needs to restore all registers on return.
  * This cannot be done with SYSRET, so use the IRET return path instead.
  */
 ENTRY(stub_rt_sigreturn)
 	CFI_STARTPROC
-	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
-	FIXUP_TOP_OF_STACK %r11
+	DEFAULT_FRAME 0, 8
+	/*
+	 * SAVE_EXTRA_REGS result is not normally needed:
+	 * sigreturn overwrites all pt_regs->GPREGS.
+	 * But sigreturn can fail (!), and there is no easy way to detect that.
+	 * To make sure RESTORE_EXTRA_REGS doesn't restore garbage on error,
+	 * we SAVE_EXTRA_REGS here.
+	 */
+	SAVE_EXTRA_REGS 8
 	call sys_rt_sigreturn
-	movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
-	RESTORE_REST
+return_from_stub:
+	addq	$8, %rsp
+	CFI_ADJUST_CFA_OFFSET -8
+	RESTORE_EXTRA_REGS
+	movq %rax,RAX(%rsp)
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_rt_sigreturn)
@@ -569,86 +588,70 @@ END(stub_rt_sigreturn)
 #ifdef CONFIG_X86_X32_ABI
 ENTRY(stub_x32_rt_sigreturn)
 	CFI_STARTPROC
-	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
-	FIXUP_TOP_OF_STACK %r11
+	DEFAULT_FRAME 0, 8
+	SAVE_EXTRA_REGS 8
 	call sys32_x32_rt_sigreturn
-	movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
-	RESTORE_REST
-	jmp int_ret_from_sys_call
+	jmp  return_from_stub
 	CFI_ENDPROC
 END(stub_x32_rt_sigreturn)
+#endif
 
-ENTRY(stub_x32_execve)
-	CFI_STARTPROC
-	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
-	FIXUP_TOP_OF_STACK %r11
-	call compat_sys_execve
-	RESTORE_TOP_OF_STACK %r11
-	movq %rax,RAX(%rsp)
-	RESTORE_REST
-	jmp int_ret_from_sys_call
-	CFI_ENDPROC
-END(stub_x32_execve)
+/*
+ * A newly forked process directly context switches into this address.
+ *
+ * rdi: prev task we switched from
+ */
+ENTRY(ret_from_fork)
+	DEFAULT_FRAME
 
-ENTRY(stub_x32_execveat)
-	CFI_STARTPROC
-	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
-	FIXUP_TOP_OF_STACK %r11
-	call compat_sys_execveat
-	RESTORE_TOP_OF_STACK %r11
-	movq %rax,RAX(%rsp)
-	RESTORE_REST
+	LOCK ; btr $TIF_FORK,TI_flags(%r8)
+
+	pushq_cfi $0x0002
+	popfq_cfi				# reset kernel eflags
+
+	call schedule_tail			# rdi: 'prev' task parameter
+
+	RESTORE_EXTRA_REGS
+
+	testl $3,CS(%rsp)			# from kernel_thread?
+
+	/*
+	 * By the time we get here, we have no idea whether our pt_regs,
+	 * ti flags, and ti status came from the 64-bit SYSCALL fast path,
+	 * the slow path, or one of the ia32entry paths.
+	 * Use IRET code path to return, since it can safely handle
+	 * all of the above.
+	 */
+	jnz	int_ret_from_sys_call
+
+	/* We came from kernel_thread */
+	/* nb: we depend on RESTORE_EXTRA_REGS above */
+	movq %rbp, %rdi
+	call *%rbx
+	movl $0, RAX(%rsp)
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
-END(stub_x32_execveat)
-
-#endif
+END(ret_from_fork)
 
 /*
- * Build the entry stubs and pointer table with some assembler magic.
- * We pack 7 stubs into a single 32-byte chunk, which will fit in a
- * single cache line on all modern x86 implementations.
+ * Build the entry stubs with some assembler magic.
+ * We pack 1 stub into every 8-byte block.
  */
-	.section .init.rodata,"a"
-ENTRY(interrupt)
-	.section .entry.text
-	.p2align 5
-	.p2align CONFIG_X86_L1_CACHE_SHIFT
+	.align 8
 ENTRY(irq_entries_start)
 	INTR_FRAME
-vector=FIRST_EXTERNAL_VECTOR
-.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
-	.balign 32
-  .rept	7
-    .if vector < FIRST_SYSTEM_VECTOR
-      .if vector <> FIRST_EXTERNAL_VECTOR
+    vector=FIRST_EXTERNAL_VECTOR
+    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+	pushq_cfi $(~vector+0x80)	/* Note: always in signed byte range */
+    vector=vector+1
+	jmp	common_interrupt
 	CFI_ADJUST_CFA_OFFSET -8
-      .endif
-1:	pushq_cfi $(~vector+0x80)	/* Note: always in signed byte range */
-      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
-	jmp 2f
-      .endif
-      .previous
-	.quad 1b
-      .section .entry.text
-vector=vector+1
-    .endif
-  .endr
-2:	jmp common_interrupt
-.endr
+	.align	8
+    .endr
 	CFI_ENDPROC
 END(irq_entries_start)
 
-.previous
-END(interrupt)
-.previous
-
 /*
  * Interrupt entry/exit.
  *
@@ -659,47 +662,45 @@ END(interrupt)
 
 /* 0(%rsp): ~(interrupt number) */
 	.macro interrupt func
-	/* reserve pt_regs for scratch regs and rbp */
-	subq $ORIG_RAX-RBP, %rsp
-	CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
 	cld
-	/* start from rbp in pt_regs and jump over */
-	movq_cfi rdi, (RDI-RBP)
-	movq_cfi rsi, (RSI-RBP)
-	movq_cfi rdx, (RDX-RBP)
-	movq_cfi rcx, (RCX-RBP)
-	movq_cfi rax, (RAX-RBP)
-	movq_cfi  r8,  (R8-RBP)
-	movq_cfi  r9,  (R9-RBP)
-	movq_cfi r10, (R10-RBP)
-	movq_cfi r11, (R11-RBP)
-
-	/* Save rbp so that we can unwind from get_irq_regs() */
-	movq_cfi rbp, 0
-
-	/* Save previous stack value */
-	movq %rsp, %rsi
+	/*
+	 * Since nothing in interrupt handling code touches r12...r15 members
+	 * of "struct pt_regs", and since interrupts can nest, we can save
+	 * four stack slots and simultaneously provide
+	 * an unwind-friendly stack layout by saving "truncated" pt_regs
+	 * exactly up to rbp slot, without these members.
+	 */
+	ALLOC_PT_GPREGS_ON_STACK -RBP
+	SAVE_C_REGS -RBP
+	/* this goes to 0(%rsp) for unwinder, not for saving the value: */
+	SAVE_EXTRA_REGS_RBP -RBP
 
-	leaq -RBP(%rsp),%rdi	/* arg1 for handler */
-	testl $3, CS-RBP(%rsi)
+	leaq -RBP(%rsp),%rdi	/* arg1 for \func (pointer to pt_regs) */
+
+	testl $3, CS-RBP(%rsp)
 	je 1f
 	SWAPGS
+1:
 	/*
+	 * Save previous stack pointer, optionally switch to interrupt stack.
 	 * irq_count is used to check if a CPU is already on an interrupt stack
 	 * or not. While this is essentially redundant with preempt_count it is
 	 * a little cheaper to use a separate counter in the PDA (short of
 	 * moving irq_enter into assembly, which would be too much work)
 	 */
-1:	incl PER_CPU_VAR(irq_count)
+	movq %rsp, %rsi
+	incl PER_CPU_VAR(irq_count)
 	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
 	CFI_DEF_CFA_REGISTER	rsi
-
-	/* Store previous stack value */
 	pushq %rsi
+	/*
+	 * For debugger:
+	 * "CFA (Current Frame Address) is the value on stack + offset"
+	 */
 	CFI_ESCAPE	0x0f /* DW_CFA_def_cfa_expression */, 6, \
-			0x77 /* DW_OP_breg7 */, 0, \
+			0x77 /* DW_OP_breg7 (rsp) */, 0, \
 			0x06 /* DW_OP_deref */, \
-			0x08 /* DW_OP_const1u */, SS+8-RBP, \
+			0x08 /* DW_OP_const1u */, SIZEOF_PTREGS-RBP, \
 			0x22 /* DW_OP_plus */
 	/* We entered an interrupt context - irqs are off: */
 	TRACE_IRQS_OFF
@@ -717,7 +718,7 @@ common_interrupt:
 	ASM_CLAC
 	addq $-0x80,(%rsp)		/* Adjust vector to [-256,-1] range */
 	interrupt do_IRQ
-	/* 0(%rsp): old_rsp-ARGOFFSET */
+	/* 0(%rsp): old RSP */
 ret_from_intr:
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
@@ -725,19 +726,18 @@ ret_from_intr:
 
 	/* Restore saved previous stack */
 	popq %rsi
-	CFI_DEF_CFA rsi,SS+8-RBP	/* reg/off reset after def_cfa_expr */
-	leaq ARGOFFSET-RBP(%rsi), %rsp
+	CFI_DEF_CFA rsi,SIZEOF_PTREGS-RBP /* reg/off reset after def_cfa_expr */
+	/* return code expects complete pt_regs - adjust rsp accordingly: */
+	leaq -RBP(%rsi),%rsp
 	CFI_DEF_CFA_REGISTER	rsp
-	CFI_ADJUST_CFA_OFFSET	RBP-ARGOFFSET
+	CFI_ADJUST_CFA_OFFSET	RBP
 
-exit_intr:
-	GET_THREAD_INFO(%rcx)
-	testl $3,CS-ARGOFFSET(%rsp)
+	testl $3,CS(%rsp)
 	je retint_kernel
-
 	/* Interrupt came from user space */
+
+	GET_THREAD_INFO(%rcx)
 	/*
-	 * Has a correct top of stack, but a partial stack frame
 	 * %rcx: thread info. Interrupts off.
 	 */
 retint_with_reschedule:
@@ -756,70 +756,34 @@ retint_swapgs:		/* return to user-space */
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_IRETQ
 
-	/*
-	 * Try to use SYSRET instead of IRET if we're returning to
-	 * a completely clean 64-bit userspace context.
-	 */
-	movq (RCX-R11)(%rsp), %rcx
-	cmpq %rcx,(RIP-R11)(%rsp)		/* RCX == RIP */
-	jne opportunistic_sysret_failed
-
-	/*
-	 * On Intel CPUs, sysret with non-canonical RCX/RIP will #GP
-	 * in kernel space.  This essentially lets the user take over
-	 * the kernel, since userspace controls RSP.  It's not worth
-	 * testing for canonicalness exactly -- this check detects any
-	 * of the 17 high bits set, which is true for non-canonical
-	 * or kernel addresses.  (This will pessimize vsyscall=native.
-	 * Big deal.)
-	 *
-	 * If virtual addresses ever become wider, this will need
-	 * to be updated to remain correct on both old and new CPUs.
-	 */
-	.ifne __VIRTUAL_MASK_SHIFT - 47
-	.error "virtual address width changed -- sysret checks need update"
-	.endif
-	shr $__VIRTUAL_MASK_SHIFT, %rcx
-	jnz opportunistic_sysret_failed
-
-	cmpq $__USER_CS,(CS-R11)(%rsp)		/* CS must match SYSRET */
-	jne opportunistic_sysret_failed
-
-	movq (R11-ARGOFFSET)(%rsp), %r11
-	cmpq %r11,(EFLAGS-ARGOFFSET)(%rsp)	/* R11 == RFLAGS */
-	jne opportunistic_sysret_failed
-
-	testq $X86_EFLAGS_RF,%r11		/* sysret can't restore RF */
-	jnz opportunistic_sysret_failed
-
-	/* nothing to check for RSP */
-
-	cmpq $__USER_DS,(SS-ARGOFFSET)(%rsp)	/* SS must match SYSRET */
-	jne opportunistic_sysret_failed
-
-	/*
-	 * We win!  This label is here just for ease of understanding
-	 * perf profiles.  Nothing jumps here.
-	 */
-irq_return_via_sysret:
-	CFI_REMEMBER_STATE
-	RESTORE_ARGS 1,8,1
-	movq (RSP-RIP)(%rsp),%rsp
-	USERGS_SYSRET64
-	CFI_RESTORE_STATE
-
-opportunistic_sysret_failed:
 	SWAPGS
-	jmp restore_args
+	jmp	restore_c_regs_and_iret
 
-retint_restore_args:	/* return to kernel space */
-	DISABLE_INTERRUPTS(CLBR_ANY)
+/* Returning to kernel space */
+retint_kernel:
+#ifdef CONFIG_PREEMPT
+	/* Interrupts are off */
+	/* Check if we need preemption */
+	bt	$9,EFLAGS(%rsp)	/* interrupts were off? */
+	jnc	1f
+0:	cmpl	$0,PER_CPU_VAR(__preempt_count)
+	jnz	1f
+	call	preempt_schedule_irq
+	jmp	0b
+1:
+#endif
 	/*
 	 * The iretq could re-enable interrupts:
 	 */
 	TRACE_IRQS_IRETQ
-restore_args:
-	RESTORE_ARGS 1,8,1
+
+/*
+ * At this label, code paths which return to kernel and to user,
+ * which come from interrupts/exception and from syscalls, merge.
+ */
+restore_c_regs_and_iret:
+	RESTORE_C_REGS
+	REMOVE_PT_GPREGS_FROM_STACK 8
 
 irq_return:
 	INTERRUPT_RETURN
@@ -890,28 +854,17 @@ retint_signal:
 	jz    retint_swapgs
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	movq $-1,ORIG_RAX(%rsp)
 	xorl %esi,%esi		# oldset
 	movq %rsp,%rdi		# &pt_regs
 	call do_notify_resume
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	GET_THREAD_INFO(%rcx)
 	jmp retint_with_reschedule
 
-#ifdef CONFIG_PREEMPT
-	/* Returning to kernel space. Check if we need preemption */
-	/* rcx:	 threadinfo. interrupts off. */
-ENTRY(retint_kernel)
-	cmpl $0,PER_CPU_VAR(__preempt_count)
-	jnz  retint_restore_args
-	bt   $9,EFLAGS-ARGOFFSET(%rsp)	/* interrupts off? */
-	jnc  retint_restore_args
-	call preempt_schedule_irq
-	jmp exit_intr
-#endif
 	CFI_ENDPROC
 END(common_interrupt)
 
@@ -1000,7 +953,7 @@ apicinterrupt IRQ_WORK_VECTOR \
 /*
  * Exception entry points.
  */
-#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
+#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
 
 .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
 ENTRY(\sym)
@@ -1022,8 +975,7 @@ ENTRY(\sym)
 	pushq_cfi $-1			/* ORIG_RAX: no syscall to restart */
 	.endif
 
-	subq $ORIG_RAX-R15, %rsp
-	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
+	ALLOC_PT_GPREGS_ON_STACK
 
 	.if \paranoid
 	.if \paranoid == 1
@@ -1031,10 +983,11 @@ ENTRY(\sym)
 	testl $3, CS(%rsp)		/* If coming from userspace, switch */
 	jnz 1f				/* stacks. */
 	.endif
-	call save_paranoid
+	call paranoid_entry
 	.else
 	call error_entry
 	.endif
+	/* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
 
 	DEFAULT_FRAME 0
 
@@ -1056,19 +1009,20 @@ ENTRY(\sym)
 	.endif
 
 	.if \shift_ist != -1
-	subq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist)
+	subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
 	.endif
 
 	call \do_sym
 
 	.if \shift_ist != -1
-	addq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist)
+	addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
 	.endif
 
+	/* these procedures expect "no swapgs" flag in ebx */
 	.if \paranoid
-	jmp paranoid_exit		/* %ebx: no swapgs flag */
+	jmp paranoid_exit
 	.else
-	jmp error_exit			/* %ebx: no swapgs flag */
+	jmp error_exit
 	.endif
 
 	.if \paranoid == 1
@@ -1272,7 +1226,9 @@ ENTRY(xen_failsafe_callback)
 	addq $0x30,%rsp
 	CFI_ADJUST_CFA_OFFSET -0x30
 	pushq_cfi $-1 /* orig_ax = -1 => not a system call */
-	SAVE_ALL
+	ALLOC_PT_GPREGS_ON_STACK
+	SAVE_C_REGS
+	SAVE_EXTRA_REGS
 	jmp error_exit
 	CFI_ENDPROC
 END(xen_failsafe_callback)
@@ -1304,59 +1260,66 @@ idtentry async_page_fault do_async_page_fault has_error_code=1
 idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
 #endif
 
-	/*
-	 * "Paranoid" exit path from exception stack.  This is invoked
-	 * only on return from non-NMI IST interrupts that came
-	 * from kernel space.
-	 *
-	 * We may be returning to very strange contexts (e.g. very early
-	 * in syscall entry), so checking for preemption here would
-	 * be complicated.  Fortunately, we there's no good reason
-	 * to try to handle preemption here.
-	 */
+/*
+ * Save all registers in pt_regs, and switch gs if needed.
+ * Use slow, but surefire "are we in kernel?" check.
+ * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
+ */
+ENTRY(paranoid_entry)
+	XCPT_FRAME 1 15*8
+	cld
+	SAVE_C_REGS 8
+	SAVE_EXTRA_REGS 8
+	movl $1,%ebx
+	movl $MSR_GS_BASE,%ecx
+	rdmsr
+	testl %edx,%edx
+	js 1f	/* negative -> in kernel */
+	SWAPGS
+	xorl %ebx,%ebx
+1:	ret
+	CFI_ENDPROC
+END(paranoid_entry)
 
-	/* ebx:	no swapgs flag */
+/*
+ * "Paranoid" exit path from exception stack.  This is invoked
+ * only on return from non-NMI IST interrupts that came
+ * from kernel space.
+ *
+ * We may be returning to very strange contexts (e.g. very early
+ * in syscall entry), so checking for preemption here would
+ * be complicated.  Fortunately, we there's no good reason
+ * to try to handle preemption here.
+ */
+/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */
 ENTRY(paranoid_exit)
 	DEFAULT_FRAME
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF_DEBUG
 	testl %ebx,%ebx				/* swapgs needed? */
-	jnz paranoid_restore
-	TRACE_IRQS_IRETQ 0
+	jnz paranoid_exit_no_swapgs
+	TRACE_IRQS_IRETQ
 	SWAPGS_UNSAFE_STACK
-	RESTORE_ALL 8
-	INTERRUPT_RETURN
-paranoid_restore:
-	TRACE_IRQS_IRETQ_DEBUG 0
-	RESTORE_ALL 8
+	jmp paranoid_exit_restore
+paranoid_exit_no_swapgs:
+	TRACE_IRQS_IRETQ_DEBUG
+paranoid_exit_restore:
+	RESTORE_EXTRA_REGS
+	RESTORE_C_REGS
+	REMOVE_PT_GPREGS_FROM_STACK 8
 	INTERRUPT_RETURN
 	CFI_ENDPROC
 END(paranoid_exit)
 
 /*
- * Exception entry point. This expects an error code/orig_rax on the stack.
- * returns in "no swapgs flag" in %ebx.
+ * Save all registers in pt_regs, and switch gs if needed.
+ * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
  */
 ENTRY(error_entry)
-	XCPT_FRAME
-	CFI_ADJUST_CFA_OFFSET 15*8
-	/* oldrax contains error code */
+	XCPT_FRAME 1 15*8
 	cld
-	movq %rdi, RDI+8(%rsp)
-	movq %rsi, RSI+8(%rsp)
-	movq %rdx, RDX+8(%rsp)
-	movq %rcx, RCX+8(%rsp)
-	movq %rax, RAX+8(%rsp)
-	movq  %r8,  R8+8(%rsp)
-	movq  %r9,  R9+8(%rsp)
-	movq %r10, R10+8(%rsp)
-	movq %r11, R11+8(%rsp)
-	movq_cfi rbx, RBX+8
-	movq %rbp, RBP+8(%rsp)
-	movq %r12, R12+8(%rsp)
-	movq %r13, R13+8(%rsp)
-	movq %r14, R14+8(%rsp)
-	movq %r15, R15+8(%rsp)
+	SAVE_C_REGS 8
+	SAVE_EXTRA_REGS 8
 	xorl %ebx,%ebx
 	testl $3,CS+8(%rsp)
 	je error_kernelspace
@@ -1366,12 +1329,12 @@ error_sti:
 	TRACE_IRQS_OFF
 	ret
 
-/*
- * There are two places in the kernel that can potentially fault with
- * usergs. Handle them here.  B stepping K8s sometimes report a
- * truncated RIP for IRET exceptions returning to compat mode. Check
- * for these here too.
- */
+	/*
+	 * There are two places in the kernel that can potentially fault with
+	 * usergs. Handle them here.  B stepping K8s sometimes report a
+	 * truncated RIP for IRET exceptions returning to compat mode. Check
+	 * for these here too.
+	 */
 error_kernelspace:
 	CFI_REL_OFFSET rcx, RCX+8
 	incl %ebx
@@ -1401,11 +1364,11 @@ error_bad_iret:
 END(error_entry)
 
 
-/* ebx:	no swapgs flag (1: don't need swapgs, 0: need it) */
+/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */
 ENTRY(error_exit)
 	DEFAULT_FRAME
 	movl %ebx,%eax
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	GET_THREAD_INFO(%rcx)
@@ -1420,19 +1383,7 @@ ENTRY(error_exit)
 	CFI_ENDPROC
 END(error_exit)
 
-/*
- * Test if a given stack is an NMI stack or not.
- */
-	.macro test_in_nmi reg stack nmi_ret normal_ret
-	cmpq %\reg, \stack
-	ja \normal_ret
-	subq $EXCEPTION_STKSZ, %\reg
-	cmpq %\reg, \stack
-	jb \normal_ret
-	jmp \nmi_ret
-	.endm
-
-	/* runs on exception stack */
+/* Runs on exception stack */
 ENTRY(nmi)
 	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
@@ -1468,7 +1419,7 @@ ENTRY(nmi)
 	 * NMI.
 	 */
 
-	/* Use %rdx as out temp variable throughout */
+	/* Use %rdx as our temp variable throughout */
 	pushq_cfi %rdx
 	CFI_REL_OFFSET rdx, 0
 
@@ -1493,8 +1444,17 @@ ENTRY(nmi)
 	 * We check the variable because the first NMI could be in a
 	 * breakpoint routine using a breakpoint stack.
 	 */
-	lea 6*8(%rsp), %rdx
-	test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
+	lea	6*8(%rsp), %rdx
+	/* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */
+	cmpq	%rdx, 4*8(%rsp)
+	/* If the stack pointer is above the NMI stack, this is a normal NMI */
+	ja	first_nmi
+	subq	$EXCEPTION_STKSZ, %rdx
+	cmpq	%rdx, 4*8(%rsp)
+	/* If it is below the NMI stack, it is a normal NMI */
+	jb	first_nmi
+	/* Ah, it is within the NMI stack, treat it as nested */
+
 	CFI_REMEMBER_STATE
 
 nested_nmi:
@@ -1587,7 +1547,7 @@ first_nmi:
 	.rept 5
 	pushq_cfi 11*8(%rsp)
 	.endr
-	CFI_DEF_CFA_OFFSET SS+8-RIP
+	CFI_DEF_CFA_OFFSET 5*8
 
 	/* Everything up to here is safe from nested NMIs */
 
@@ -1615,7 +1575,7 @@ repeat_nmi:
 	pushq_cfi -6*8(%rsp)
 	.endr
 	subq $(5*8), %rsp
-	CFI_DEF_CFA_OFFSET SS+8-RIP
+	CFI_DEF_CFA_OFFSET 5*8
 end_repeat_nmi:
 
 	/*
@@ -1624,16 +1584,16 @@ end_repeat_nmi:
 	 * so that we repeat another NMI.
 	 */
 	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
-	subq $ORIG_RAX-R15, %rsp
-	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
+	ALLOC_PT_GPREGS_ON_STACK
+
 	/*
-	 * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
+	 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
 	 * as we should not be calling schedule in NMI context.
 	 * Even with normal interrupts enabled. An NMI should not be
 	 * setting NEED_RESCHED or anything that normal interrupts and
 	 * exceptions might do.
 	 */
-	call save_paranoid
+	call paranoid_entry
 	DEFAULT_FRAME 0
 
 	/*
@@ -1664,8 +1624,10 @@ end_repeat_nmi:
 nmi_swapgs:
 	SWAPGS_UNSAFE_STACK
 nmi_restore:
+	RESTORE_EXTRA_REGS
+	RESTORE_C_REGS
 	/* Pop the extra iret frame at once */
-	RESTORE_ALL 6*8
+	REMOVE_PT_GPREGS_FROM_STACK 6*8
 
 	/* Clear the NMI executing stack variable */
 	movq $0, 5*8(%rsp)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index c4f8d4659070..2b55ee6db053 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -177,9 +177,6 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
 	 */
 	load_ucode_bsp();
 
-	if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG)
-		early_printk("Kernel alive\n");
-
 	clear_page(init_level4_pgt);
 	/* set init_level4_pgt kernel high mapping*/
 	init_level4_pgt[511] = early_level4_pgt[511];
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index f36bd42d6f0c..d031bad9e07e 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -22,6 +22,7 @@
 #include <asm/cpufeature.h>
 #include <asm/percpu.h>
 #include <asm/nops.h>
+#include <asm/bootparam.h>
 
 /* Physical address */
 #define pa(X) ((X) - __PAGE_OFFSET)
@@ -90,7 +91,7 @@ ENTRY(startup_32)
 	
 	/* test KEEP_SEGMENTS flag to see if the bootloader is asking
 		us to not reload segments */
-	testb $(1<<6), BP_loadflags(%esi)
+	testb $KEEP_SEGMENTS, BP_loadflags(%esi)
 	jnz 2f
 
 /*
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 6fd514d9f69a..ae6588b301c2 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -1,5 +1,5 @@
 /*
- *  linux/arch/x86_64/kernel/head.S -- start in 32bit and switch to 64bit
+ *  linux/arch/x86/kernel/head_64.S -- start in 32bit and switch to 64bit
  *
  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
  *  Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
@@ -56,7 +56,7 @@ startup_64:
 	 * %rsi holds a physical pointer to real_mode_data.
 	 *
 	 * We come here either directly from a 64bit bootloader, or from
-	 * arch/x86_64/boot/compressed/head.S.
+	 * arch/x86/boot/compressed/head_64.S.
 	 *
 	 * We only come here initially at boot nothing else comes here.
 	 *
@@ -146,7 +146,7 @@ startup_64:
 	leaq	level2_kernel_pgt(%rip), %rdi
 	leaq	4096(%rdi), %r8
 	/* See if it is a valid page table entry */
-1:	testq	$1, 0(%rdi)
+1:	testb	$1, 0(%rdi)
 	jz	2f
 	addq	%rbp, 0(%rdi)
 	/* Go to the next page */
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index d5651fce0b71..009183276bb7 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -42,8 +42,8 @@ void kernel_fpu_enable(void)
  * be set (so that the clts/stts pair does nothing that is
  * visible in the interrupted kernel thread).
  *
- * Except for the eagerfpu case when we return 1 unless we've already
- * been eager and saved the state in kernel_fpu_begin().
+ * Except for the eagerfpu case when we return true; in the likely case
+ * the thread has FPU but we are not going to set/clear TS.
  */
 static inline bool interrupted_kernel_fpu_idle(void)
 {
@@ -51,7 +51,7 @@ static inline bool interrupted_kernel_fpu_idle(void)
 		return false;
 
 	if (use_eager_fpu())
-		return __thread_has_fpu(current);
+		return true;
 
 	return !__thread_has_fpu(current) &&
 		(read_cr0() & X86_CR0_TS);
@@ -68,7 +68,7 @@ static inline bool interrupted_kernel_fpu_idle(void)
 static inline bool interrupted_user_mode(void)
 {
 	struct pt_regs *regs = get_irq_regs();
-	return regs && user_mode_vm(regs);
+	return regs && user_mode(regs);
 }
 
 /*
@@ -94,9 +94,10 @@ void __kernel_fpu_begin(void)
 
 	if (__thread_has_fpu(me)) {
 		__save_init_fpu(me);
-	} else if (!use_eager_fpu()) {
+	} else {
 		this_cpu_write(fpu_owner_task, NULL);
-		clts();
+		if (!use_eager_fpu())
+			clts();
 	}
 }
 EXPORT_SYMBOL(__kernel_fpu_begin);
@@ -107,7 +108,7 @@ void __kernel_fpu_end(void)
 
 	if (__thread_has_fpu(me)) {
 		if (WARN_ON(restore_fpu_checking(me)))
-			drop_init_fpu(me);
+			fpu_reset_state(me);
 	} else if (!use_eager_fpu()) {
 		stts();
 	}
@@ -120,10 +121,13 @@ void unlazy_fpu(struct task_struct *tsk)
 {
 	preempt_disable();
 	if (__thread_has_fpu(tsk)) {
-		__save_init_fpu(tsk);
-		__thread_fpu_end(tsk);
-	} else
-		tsk->thread.fpu_counter = 0;
+		if (use_eager_fpu()) {
+			__save_fpu(tsk);
+		} else {
+			__save_init_fpu(tsk);
+			__thread_fpu_end(tsk);
+		}
+	}
 	preempt_enable();
 }
 EXPORT_SYMBOL(unlazy_fpu);
@@ -221,11 +225,12 @@ void fpu_finit(struct fpu *fpu)
 		return;
 	}
 
+	memset(fpu->state, 0, xstate_size);
+
 	if (cpu_has_fxsr) {
 		fx_finit(&fpu->state->fxsave);
 	} else {
 		struct i387_fsave_struct *fp = &fpu->state->fsave;
-		memset(fp, 0, xstate_size);
 		fp->cwd = 0xffff037fu;
 		fp->swd = 0xffff0000u;
 		fp->twd = 0xffffffffu;
@@ -247,7 +252,7 @@ int init_fpu(struct task_struct *tsk)
 	if (tsk_used_math(tsk)) {
 		if (cpu_has_fpu && tsk == current)
 			unlazy_fpu(tsk);
-		tsk->thread.fpu.last_cpu = ~0;
+		task_disable_lazy_fpu_restore(tsk);
 		return 0;
 	}
 
@@ -336,6 +341,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 		unsigned int pos, unsigned int count,
 		void *kbuf, void __user *ubuf)
 {
+	struct xsave_struct *xsave;
 	int ret;
 
 	if (!cpu_has_xsave)
@@ -345,19 +351,19 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 	if (ret)
 		return ret;
 
+	xsave = &target->thread.fpu.state->xsave;
+
 	/*
 	 * Copy the 48bytes defined by the software first into the xstate
 	 * memory layout in the thread struct, so that we can copy the entire
 	 * xstateregs to the user using one user_regset_copyout().
 	 */
-	memcpy(&target->thread.fpu.state->fxsave.sw_reserved,
-	       xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
-
+	memcpy(&xsave->i387.sw_reserved,
+		xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
 	/*
 	 * Copy the xstate memory layout.
 	 */
-	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				  &target->thread.fpu.state->xsave, 0, -1);
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
 	return ret;
 }
 
@@ -365,8 +371,8 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 		  unsigned int pos, unsigned int count,
 		  const void *kbuf, const void __user *ubuf)
 {
+	struct xsave_struct *xsave;
 	int ret;
-	struct xsave_hdr_struct *xsave_hdr;
 
 	if (!cpu_has_xsave)
 		return -ENODEV;
@@ -375,22 +381,18 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 	if (ret)
 		return ret;
 
-	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-				 &target->thread.fpu.state->xsave, 0, -1);
+	xsave = &target->thread.fpu.state->xsave;
 
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
 	/*
 	 * mxcsr reserved bits must be masked to zero for security reasons.
 	 */
-	target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
-
-	xsave_hdr = &target->thread.fpu.state->xsave.xsave_hdr;
-
-	xsave_hdr->xstate_bv &= pcntxt_mask;
+	xsave->i387.mxcsr &= mxcsr_feature_mask;
+	xsave->xsave_hdr.xstate_bv &= pcntxt_mask;
 	/*
 	 * These bits must be zero.
 	 */
-	memset(xsave_hdr->reserved, 0, 48);
-
+	memset(&xsave->xsave_hdr.reserved, 0, 48);
 	return ret;
 }
 
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 4ddaf66ea35f..37dae792dbbe 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -54,7 +54,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 	 * because the ->io_bitmap_max value must match the bitmap
 	 * contents:
 	 */
-	tss = &per_cpu(init_tss, get_cpu());
+	tss = &per_cpu(cpu_tss, get_cpu());
 
 	if (turn_on)
 		bitmap_clear(t->io_bitmap_ptr, from, num);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 67b1cbe0093a..e5952c225532 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -295,7 +295,7 @@ int check_irq_vectors_for_cpu_disable(void)
 
 	this_cpu = smp_processor_id();
 	cpumask_copy(&online_new, cpu_online_mask);
-	cpu_clear(this_cpu, online_new);
+	cpumask_clear_cpu(this_cpu, &online_new);
 
 	this_count = 0;
 	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
@@ -307,7 +307,7 @@ int check_irq_vectors_for_cpu_disable(void)
 
 			data = irq_desc_get_irq_data(desc);
 			cpumask_copy(&affinity_new, data->affinity);
-			cpu_clear(this_cpu, affinity_new);
+			cpumask_clear_cpu(this_cpu, &affinity_new);
 
 			/* Do not count inactive or per-cpu irqs. */
 			if (!irq_has_action(irq) || irqd_is_per_cpu(data))
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 28d28f5eb8f4..f9fd86a7fcc7 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -165,7 +165,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
 	if (unlikely(!desc))
 		return false;
 
-	if (user_mode_vm(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
+	if (user_mode(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
 		if (unlikely(overflow))
 			print_stack_overflow();
 		desc->handle_irq(irq, desc);
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index e4b503d5558c..394e643d7830 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -44,7 +44,7 @@ static inline void stack_overflow_check(struct pt_regs *regs)
 	u64 estack_top, estack_bottom;
 	u64 curbase = (u64)task_stack_page(current);
 
-	if (user_mode_vm(regs))
+	if (user_mode(regs))
 		return;
 
 	if (regs->sp >= curbase + sizeof(struct thread_info) +
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 70e181ea1eac..cd10a6437264 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -178,7 +178,8 @@ void __init native_init_IRQ(void)
 #endif
 	for_each_clear_bit_from(i, used_vectors, first_system_vector) {
 		/* IA32_SYSCALL_VECTOR could be used in trap_init already. */
-		set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
+		set_intr_gate(i, irq_entries_start +
+				8 * (i - FIRST_EXTERNAL_VECTOR));
 	}
 #ifdef CONFIG_X86_LOCAL_APIC
 	for_each_clear_bit_from(i, used_vectors, NR_VECTORS)
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 7ec1d5f8d283..d6178d9791db 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -72,7 +72,7 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
 	{ "bx", 8, offsetof(struct pt_regs, bx) },
 	{ "cx", 8, offsetof(struct pt_regs, cx) },
 	{ "dx", 8, offsetof(struct pt_regs, dx) },
-	{ "si", 8, offsetof(struct pt_regs, dx) },
+	{ "si", 8, offsetof(struct pt_regs, si) },
 	{ "di", 8, offsetof(struct pt_regs, di) },
 	{ "bp", 8, offsetof(struct pt_regs, bp) },
 	{ "sp", 8, offsetof(struct pt_regs, sp) },
@@ -126,11 +126,11 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
 #ifdef CONFIG_X86_32
 	switch (regno) {
 	case GDB_SS:
-		if (!user_mode_vm(regs))
+		if (!user_mode(regs))
 			*(unsigned long *)mem = __KERNEL_DS;
 		break;
 	case GDB_SP:
-		if (!user_mode_vm(regs))
+		if (!user_mode(regs))
 			*(unsigned long *)mem = kernel_stack_pointer(regs);
 		break;
 	case GDB_GS:
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 4e3d5a9621fe..1deffe6cc873 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -354,6 +354,7 @@ int __copy_instruction(u8 *dest, u8 *src)
 {
 	struct insn insn;
 	kprobe_opcode_t buf[MAX_INSN_SIZE];
+	int length;
 	unsigned long recovered_insn =
 		recover_probed_instruction(buf, (unsigned long)src);
 
@@ -361,16 +362,18 @@ int __copy_instruction(u8 *dest, u8 *src)
 		return 0;
 	kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
 	insn_get_length(&insn);
+	length = insn.length;
+
 	/* Another subsystem puts a breakpoint, failed to recover */
 	if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
 		return 0;
-	memcpy(dest, insn.kaddr, insn.length);
+	memcpy(dest, insn.kaddr, length);
 
 #ifdef CONFIG_X86_64
 	if (insn_rip_relative(&insn)) {
 		s64 newdisp;
 		u8 *disp;
-		kernel_insn_init(&insn, dest, insn.length);
+		kernel_insn_init(&insn, dest, length);
 		insn_get_displacement(&insn);
 		/*
 		 * The copied instruction uses the %rip-relative addressing
@@ -394,7 +397,7 @@ int __copy_instruction(u8 *dest, u8 *src)
 		*(s32 *) disp = (s32) newdisp;
 	}
 #endif
-	return insn.length;
+	return length;
 }
 
 static int arch_copy_kprobe(struct kprobe *p)
@@ -602,7 +605,7 @@ int kprobe_int3_handler(struct pt_regs *regs)
 	struct kprobe *p;
 	struct kprobe_ctlblk *kcb;
 
-	if (user_mode_vm(regs))
+	if (user_mode(regs))
 		return 0;
 
 	addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
@@ -1007,7 +1010,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
 	struct die_args *args = data;
 	int ret = NOTIFY_DONE;
 
-	if (args->regs && user_mode_vm(args->regs))
+	if (args->regs && user_mode(args->regs))
 		return ret;
 
 	if (val == DIE_GPF) {
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index e354cc6446ab..9435620062df 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -513,7 +513,7 @@ void __init kvm_guest_init(void)
 	 * can get false positives too easily, for example if the host is
 	 * overcommitted.
 	 */
-	watchdog_enable_hardlockup_detector(false);
+	hardlockup_detector_disable();
 }
 
 static noinline uint32_t __kvm_cpuid_base(void)
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 9bbb9b35c144..005c03e93fc5 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -33,6 +33,7 @@
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
+#include <asm/setup.h>
 
 #if 0
 #define DEBUGP(fmt, ...)				\
@@ -53,7 +54,7 @@ static DEFINE_MUTEX(module_kaslr_mutex);
 
 static unsigned long int get_module_load_offset(void)
 {
-	if (kaslr_enabled) {
+	if (kaslr_enabled()) {
 		mutex_lock(&module_kaslr_mutex);
 		/*
 		 * Calculate the module_load_offset the first time this
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 548d25f00c90..c614dd492f5f 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -443,7 +443,7 @@ struct pv_mmu_ops pv_mmu_ops = {
 	.ptep_modify_prot_start = __ptep_modify_prot_start,
 	.ptep_modify_prot_commit = __ptep_modify_prot_commit,
 
-#if PAGETABLE_LEVELS >= 3
+#if CONFIG_PGTABLE_LEVELS >= 3
 #ifdef CONFIG_X86_PAE
 	.set_pte_atomic = native_set_pte_atomic,
 	.pte_clear = native_pte_clear,
@@ -454,13 +454,13 @@ struct pv_mmu_ops pv_mmu_ops = {
 	.pmd_val = PTE_IDENT,
 	.make_pmd = PTE_IDENT,
 
-#if PAGETABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS == 4
 	.pud_val = PTE_IDENT,
 	.make_pud = PTE_IDENT,
 
 	.set_pgd = native_set_pgd,
 #endif
-#endif /* PAGETABLE_LEVELS >= 3 */
+#endif /* CONFIG_PGTABLE_LEVELS >= 3 */
 
 	.pte_val = PTE_IDENT,
 	.pgd_val = PTE_IDENT,
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index 781861cc5ee8..da8cb987b973 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -131,10 +131,11 @@ void perf_get_regs_user(struct perf_regs *regs_user,
 	}
 
 	/*
-	 * RIP, flags, and the argument registers are usually saved.
-	 * orig_ax is probably okay, too.
+	 * These registers are always saved on 64-bit syscall entry.
+	 * On 32-bit entry points, they are saved too except r8..r11.
 	 */
 	regs_user_copy->ip = user_regs->ip;
+	regs_user_copy->ax = user_regs->ax;
 	regs_user_copy->cx = user_regs->cx;
 	regs_user_copy->dx = user_regs->dx;
 	regs_user_copy->si = user_regs->si;
@@ -145,9 +146,12 @@ void perf_get_regs_user(struct perf_regs *regs_user,
 	regs_user_copy->r11 = user_regs->r11;
 	regs_user_copy->orig_ax = user_regs->orig_ax;
 	regs_user_copy->flags = user_regs->flags;
+	regs_user_copy->sp = user_regs->sp;
+	regs_user_copy->cs = user_regs->cs;
+	regs_user_copy->ss = user_regs->ss;
 
 	/*
-	 * Don't even try to report the "rest" regs.
+	 * Most system calls don't save these registers, don't report them.
 	 */
 	regs_user_copy->bx = -1;
 	regs_user_copy->bp = -1;
@@ -158,37 +162,13 @@ void perf_get_regs_user(struct perf_regs *regs_user,
 
 	/*
 	 * For this to be at all useful, we need a reasonable guess for
-	 * sp and the ABI.  Be careful: we're in NMI context, and we're
+	 * the ABI.  Be careful: we're in NMI context, and we're
 	 * considering current to be the current task, so we should
 	 * be careful not to look at any other percpu variables that might
 	 * change during context switches.
 	 */
-	if (IS_ENABLED(CONFIG_IA32_EMULATION) &&
-	    task_thread_info(current)->status & TS_COMPAT) {
-		/* Easy case: we're in a compat syscall. */
-		regs_user->abi = PERF_SAMPLE_REGS_ABI_32;
-		regs_user_copy->sp = user_regs->sp;
-		regs_user_copy->cs = user_regs->cs;
-		regs_user_copy->ss = user_regs->ss;
-	} else if (user_regs->orig_ax != -1) {
-		/*
-		 * We're probably in a 64-bit syscall.
-		 * Warning: this code is severely racy.  At least it's better
-		 * than just blindly copying user_regs.
-		 */
-		regs_user->abi = PERF_SAMPLE_REGS_ABI_64;
-		regs_user_copy->sp = this_cpu_read(old_rsp);
-		regs_user_copy->cs = __USER_CS;
-		regs_user_copy->ss = __USER_DS;
-		regs_user_copy->cx = -1;  /* usually contains garbage */
-	} else {
-		/* We're probably in an interrupt or exception. */
-		regs_user->abi = user_64bit_mode(user_regs) ?
-			PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32;
-		regs_user_copy->sp = user_regs->sp;
-		regs_user_copy->cs = user_regs->cs;
-		regs_user_copy->ss = user_regs->ss;
-	}
+	regs_user->abi = user_64bit_mode(user_regs) ?
+		PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32;
 
 	regs_user->regs = regs_user_copy;
 }
diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c
new file mode 100644
index 000000000000..3420c874ddc5
--- /dev/null
+++ b/arch/x86/kernel/pmem.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2015, Christoph Hellwig.
+ */
+#include <linux/memblock.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <asm/e820.h>
+#include <asm/page_types.h>
+#include <asm/setup.h>
+
+static __init void register_pmem_device(struct resource *res)
+{
+	struct platform_device *pdev;
+	int error;
+
+	pdev = platform_device_alloc("pmem", PLATFORM_DEVID_AUTO);
+	if (!pdev)
+		return;
+
+	error = platform_device_add_resources(pdev, res, 1);
+	if (error)
+		goto out_put_pdev;
+
+	error = platform_device_add(pdev);
+	if (error)
+		goto out_put_pdev;
+	return;
+
+out_put_pdev:
+	dev_warn(&pdev->dev, "failed to add 'pmem' (persistent memory) device!\n");
+	platform_device_put(pdev);
+}
+
+static __init int register_pmem_devices(void)
+{
+	int i;
+
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+
+		if (ei->type == E820_PRAM) {
+			struct resource res = {
+				.flags	= IORESOURCE_MEM,
+				.start	= ei->addr,
+				.end	= ei->addr + ei->size - 1,
+			};
+			register_pmem_device(&res);
+		}
+	}
+
+	return 0;
+}
+device_initcall(register_pmem_devices);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 046e2d620bbe..8213da62b1b7 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -9,7 +9,7 @@
 #include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/pm.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <linux/random.h>
 #include <linux/user-return-notifier.h>
 #include <linux/dmi.h>
@@ -24,6 +24,7 @@
 #include <asm/syscalls.h>
 #include <asm/idle.h>
 #include <asm/uaccess.h>
+#include <asm/mwait.h>
 #include <asm/i387.h>
 #include <asm/fpu-internal.h>
 #include <asm/debugreg.h>
@@ -37,7 +38,26 @@
  * section. Since TSS's are completely CPU-local, we want them
  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
  */
-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
+__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
+	.x86_tss = {
+		.sp0 = TOP_OF_INIT_STACK,
+#ifdef CONFIG_X86_32
+		.ss0 = __KERNEL_DS,
+		.ss1 = __KERNEL_CS,
+		.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,
+#endif
+	 },
+#ifdef CONFIG_X86_32
+	 /*
+	  * Note that the .io_bitmap member must be extra-big. This is because
+	  * the CPU will access an additional byte beyond the end of the IO
+	  * permission bitmap. The extra byte must be all 1 bits, and must
+	  * be within the limit.
+	  */
+	.io_bitmap		= { [0 ... IO_BITMAP_LONGS] = ~0 },
+#endif
+};
+EXPORT_PER_CPU_SYMBOL_GPL(cpu_tss);
 
 #ifdef CONFIG_X86_64
 static DEFINE_PER_CPU(unsigned char, is_idle);
@@ -69,8 +89,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 
 	dst->thread.fpu_counter = 0;
 	dst->thread.fpu.has_fpu = 0;
-	dst->thread.fpu.last_cpu = ~0;
 	dst->thread.fpu.state = NULL;
+	task_disable_lazy_fpu_restore(dst);
 	if (tsk_used_math(src)) {
 		int err = fpu_alloc(&dst->thread.fpu);
 		if (err)
@@ -109,7 +129,7 @@ void exit_thread(void)
 	unsigned long *bp = t->io_bitmap_ptr;
 
 	if (bp) {
-		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
+		struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
 
 		t->io_bitmap_ptr = NULL;
 		clear_thread_flag(TIF_IO_BITMAP);
@@ -131,13 +151,18 @@ void flush_thread(void)
 
 	flush_ptrace_hw_breakpoint(tsk);
 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
-	drop_init_fpu(tsk);
-	/*
-	 * Free the FPU state for non xsave platforms. They get reallocated
-	 * lazily at the first use.
-	 */
-	if (!use_eager_fpu())
+
+	if (!use_eager_fpu()) {
+		/* FPU state will be reallocated lazily at the first use. */
+		drop_fpu(tsk);
 		free_thread_xstate(tsk);
+	} else if (!used_math()) {
+		/* kthread execs. TODO: cleanup this horror. */
+		if (WARN_ON(init_fpu(tsk)))
+			force_sig(SIGKILL, tsk);
+		user_fpu_begin();
+		restore_init_xstate();
+	}
 }
 
 static void hard_disable_TSC(void)
@@ -377,14 +402,11 @@ static void amd_e400_idle(void)
 
 		if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) {
 			cpumask_set_cpu(cpu, amd_e400_c1e_mask);
-			/*
-			 * Force broadcast so ACPI can not interfere.
-			 */
-			clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
-					   &cpu);
+			/* Force broadcast so ACPI can not interfere. */
+			tick_broadcast_force();
 			pr_info("Switch to broadcast mode on CPU%d\n", cpu);
 		}
-		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
+		tick_broadcast_enter();
 
 		default_idle();
 
@@ -393,12 +415,59 @@ static void amd_e400_idle(void)
 		 * called with interrupts disabled.
 		 */
 		local_irq_disable();
-		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
+		tick_broadcast_exit();
 		local_irq_enable();
 	} else
 		default_idle();
 }
 
+/*
+ * Intel Core2 and older machines prefer MWAIT over HALT for C1.
+ * We can't rely on cpuidle installing MWAIT, because it will not load
+ * on systems that support only C1 -- so the boot default must be MWAIT.
+ *
+ * Some AMD machines are the opposite, they depend on using HALT.
+ *
+ * So for default C1, which is used during boot until cpuidle loads,
+ * use MWAIT-C1 on Intel HW that has it, else use HALT.
+ */
+static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
+{
+	if (c->x86_vendor != X86_VENDOR_INTEL)
+		return 0;
+
+	if (!cpu_has(c, X86_FEATURE_MWAIT))
+		return 0;
+
+	return 1;
+}
+
+/*
+ * MONITOR/MWAIT with no hints, used for default default C1 state.
+ * This invokes MWAIT with interrutps enabled and no flags,
+ * which is backwards compatible with the original MWAIT implementation.
+ */
+
+static void mwait_idle(void)
+{
+	if (!current_set_polling_and_test()) {
+		if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
+			smp_mb(); /* quirk */
+			clflush((void *)&current_thread_info()->flags);
+			smp_mb(); /* quirk */
+		}
+
+		__monitor((void *)&current_thread_info()->flags, 0, 0);
+		if (!need_resched())
+			__sti_mwait(0, 0);
+		else
+			local_irq_enable();
+	} else {
+		local_irq_enable();
+	}
+	__current_clr_polling();
+}
+
 void select_idle_routine(const struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
@@ -412,6 +481,9 @@ void select_idle_routine(const struct cpuinfo_x86 *c)
 		/* E400: APIC timer interrupt does not wake up CPU from C1e */
 		pr_info("using AMD E400 aware idle routine\n");
 		x86_idle = amd_e400_idle;
+	} else if (prefer_mwait_c1_over_halt(c)) {
+		pr_info("using mwait in idle threads\n");
+		x86_idle = mwait_idle;
 	} else
 		x86_idle = default_idle;
 }
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 603c4f99cb5a..8ed2106b06da 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -73,7 +73,7 @@ void __show_regs(struct pt_regs *regs, int all)
 	unsigned long sp;
 	unsigned short ss, gs;
 
-	if (user_mode_vm(regs)) {
+	if (user_mode(regs)) {
 		sp = regs->sp;
 		ss = regs->ss & 0xffff;
 		gs = get_user_gs(regs);
@@ -206,11 +206,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 	regs->ip		= new_ip;
 	regs->sp		= new_sp;
 	regs->flags		= X86_EFLAGS_IF;
-	/*
-	 * force it to the iret return path by making it look as if there was
-	 * some work pending.
-	 */
-	set_thread_flag(TIF_NOTIFY_RESUME);
+	force_iret();
 }
 EXPORT_SYMBOL_GPL(start_thread);
 
@@ -248,7 +244,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	struct thread_struct *prev = &prev_p->thread,
 				 *next = &next_p->thread;
 	int cpu = smp_processor_id();
-	struct tss_struct *tss = &per_cpu(init_tss, cpu);
+	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
 	fpu_switch_t fpu;
 
 	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
@@ -256,11 +252,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	fpu = switch_fpu_prepare(prev_p, next_p, cpu);
 
 	/*
-	 * Reload esp0.
-	 */
-	load_sp0(tss, next);
-
-	/*
 	 * Save away %gs. No need to save %fs, as it was saved on the
 	 * stack on entry.  No need to save %es and %ds, as those are
 	 * always kernel segments while inside the kernel.  Doing this
@@ -310,9 +301,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 */
 	arch_end_context_switch(next_p);
 
+	/*
+	 * Reload esp0, kernel_stack, and current_top_of_stack.  This changes
+	 * current_thread_info().
+	 */
+	load_sp0(tss, next);
 	this_cpu_write(kernel_stack,
-		  (unsigned long)task_stack_page(next_p) +
-		  THREAD_SIZE - KERNEL_STACK_OFFSET);
+		       (unsigned long)task_stack_page(next_p) +
+		       THREAD_SIZE);
+	this_cpu_write(cpu_current_top_of_stack,
+		       (unsigned long)task_stack_page(next_p) +
+		       THREAD_SIZE);
 
 	/*
 	 * Restore %gs if needed (which is common)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 67fcc43577d2..4baaa972f52a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -52,7 +52,7 @@
 
 asmlinkage extern void ret_from_fork(void);
 
-__visible DEFINE_PER_CPU(unsigned long, old_rsp);
+__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
 
 /* Prints also some state that isn't saved in the pt_regs */
 void __show_regs(struct pt_regs *regs, int all)
@@ -161,7 +161,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 	p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
 	childregs = task_pt_regs(p);
 	p->thread.sp = (unsigned long) childregs;
-	p->thread.usersp = me->thread.usersp;
 	set_tsk_thread_flag(p, TIF_FORK);
 	p->thread.io_bitmap_ptr = NULL;
 
@@ -207,7 +206,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 	 */
 	if (clone_flags & CLONE_SETTLS) {
 #ifdef CONFIG_IA32_EMULATION
-		if (test_thread_flag(TIF_IA32))
+		if (is_ia32_task())
 			err = do_set_thread_area(p, -1,
 				(struct user_desc __user *)childregs->si, 0);
 		else
@@ -235,13 +234,12 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
 	loadsegment(es, _ds);
 	loadsegment(ds, _ds);
 	load_gs_index(0);
-	current->thread.usersp	= new_sp;
 	regs->ip		= new_ip;
 	regs->sp		= new_sp;
-	this_cpu_write(old_rsp, new_sp);
 	regs->cs		= _cs;
 	regs->ss		= _ss;
 	regs->flags		= X86_EFLAGS_IF;
+	force_iret();
 }
 
 void
@@ -277,15 +275,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	struct thread_struct *prev = &prev_p->thread;
 	struct thread_struct *next = &next_p->thread;
 	int cpu = smp_processor_id();
-	struct tss_struct *tss = &per_cpu(init_tss, cpu);
+	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
 	unsigned fsindex, gsindex;
 	fpu_switch_t fpu;
 
 	fpu = switch_fpu_prepare(prev_p, next_p, cpu);
 
-	/* Reload esp0 and ss1. */
-	load_sp0(tss, next);
-
 	/* We must save %fs and %gs before load_TLS() because
 	 * %fs and %gs may be cleared by load_TLS().
 	 *
@@ -401,8 +396,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	/*
 	 * Switch the PDA and FPU contexts.
 	 */
-	prev->usersp = this_cpu_read(old_rsp);
-	this_cpu_write(old_rsp, next->usersp);
 	this_cpu_write(current_task, next_p);
 
 	/*
@@ -413,9 +406,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
 	this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
 
+	/* Reload esp0 and ss1.  This changes current_thread_info(). */
+	load_sp0(tss, next);
+
 	this_cpu_write(kernel_stack,
-		  (unsigned long)task_stack_page(next_p) +
-		  THREAD_SIZE - KERNEL_STACK_OFFSET);
+		(unsigned long)task_stack_page(next_p) + THREAD_SIZE);
 
 	/*
 	 * Now maybe reload the debug registers and handle I/O bitmaps
@@ -602,6 +597,5 @@ long sys_arch_prctl(int code, unsigned long addr)
 
 unsigned long KSTK_ESP(struct task_struct *task)
 {
-	return (test_tsk_thread_flag(task, TIF_IA32)) ?
-			(task_pt_regs(task)->sp) : ((task)->thread.usersp);
+	return task_pt_regs(task)->sp;
 }
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index e510618b2e91..a7bc79480719 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -364,18 +364,12 @@ static int set_segment_reg(struct task_struct *task,
 	case offsetof(struct user_regs_struct,cs):
 		if (unlikely(value == 0))
 			return -EIO;
-#ifdef CONFIG_IA32_EMULATION
-		if (test_tsk_thread_flag(task, TIF_IA32))
-			task_pt_regs(task)->cs = value;
-#endif
+		task_pt_regs(task)->cs = value;
 		break;
 	case offsetof(struct user_regs_struct,ss):
 		if (unlikely(value == 0))
 			return -EIO;
-#ifdef CONFIG_IA32_EMULATION
-		if (test_tsk_thread_flag(task, TIF_IA32))
-			task_pt_regs(task)->ss = value;
-#endif
+		task_pt_regs(task)->ss = value;
 		break;
 	}
 
@@ -1421,7 +1415,7 @@ static void fill_sigtrap_info(struct task_struct *tsk,
 	memset(info, 0, sizeof(*info));
 	info->si_signo = SIGTRAP;
 	info->si_code = si_code;
-	info->si_addr = user_mode_vm(regs) ? (void __user *)regs->ip : NULL;
+	info->si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
 }
 
 void user_single_step_siginfo(struct task_struct *tsk,
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 2f355d229a58..e5ecd20e72dd 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -141,7 +141,46 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
 	set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
 }
 
+static struct pvclock_vsyscall_time_info *pvclock_vdso_info;
+
+static struct pvclock_vsyscall_time_info *
+pvclock_get_vsyscall_user_time_info(int cpu)
+{
+	if (!pvclock_vdso_info) {
+		BUG();
+		return NULL;
+	}
+
+	return &pvclock_vdso_info[cpu];
+}
+
+struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
+{
+	return &pvclock_get_vsyscall_user_time_info(cpu)->pvti;
+}
+
 #ifdef CONFIG_X86_64
+static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
+			        void *v)
+{
+	struct task_migration_notifier *mn = v;
+	struct pvclock_vsyscall_time_info *pvti;
+
+	pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu);
+
+	/* this is NULL when pvclock vsyscall is not initialized */
+	if (unlikely(pvti == NULL))
+		return NOTIFY_DONE;
+
+	pvti->migrate_count++;
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block pvclock_migrate = {
+	.notifier_call = pvclock_task_migrate,
+};
+
 /*
  * Initialize the generic pvclock vsyscall state.  This will allocate
  * a/some page(s) for the per-vcpu pvclock information, set up a
@@ -155,12 +194,17 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
 
 	WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
 
+	pvclock_vdso_info = i;
+
 	for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
 		__set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
 			     __pa(i) + (idx*PAGE_SIZE),
 			     PAGE_KERNEL_VVAR);
 	}
 
+
+	register_task_migration_notifier(&pvclock_migrate);
+
 	return 0;
 }
 #endif
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index bae6c609888e..86db4bcd7ce5 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -183,6 +183,16 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 		},
 	},
 
+	/* ASRock */
+	{	/* Handle problems with rebooting on ASRock Q1900DC-ITX */
+		.callback = set_pci_reboot,
+		.ident = "ASRock Q1900DC-ITX",
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "ASRock"),
+			DMI_MATCH(DMI_BOARD_NAME, "Q1900DC-ITX"),
+		},
+	},
+
 	/* ASUS */
 	{	/* Handle problems with rebooting on ASUS P4S800 */
 		.callback = set_bios_reboot,
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index e13f8e7c22a6..77630d57e7bf 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -226,23 +226,23 @@ swap_pages:
 	movl	(%ebx), %ecx
 	addl	$4, %ebx
 1:
-	testl	$0x1,   %ecx  /* is it a destination page */
+	testb	$0x1, %cl     /* is it a destination page */
 	jz	2f
 	movl	%ecx,	%edi
 	andl	$0xfffff000, %edi
 	jmp     0b
 2:
-	testl	$0x2,	%ecx  /* is it an indirection page */
+	testb	$0x2, %cl    /* is it an indirection page */
 	jz	2f
 	movl	%ecx,	%ebx
 	andl	$0xfffff000, %ebx
 	jmp     0b
 2:
-	testl   $0x4,   %ecx /* is it the done indicator */
+	testb   $0x4, %cl    /* is it the done indicator */
 	jz      2f
 	jmp     3f
 2:
-	testl   $0x8,   %ecx /* is it the source indicator */
+	testb   $0x8, %cl    /* is it the source indicator */
 	jz      0b	     /* Ignore it otherwise */
 	movl    %ecx,   %esi /* For every source page do a copy */
 	andl    $0xfffff000, %esi
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 3fd2c693e475..98111b38ebfd 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -123,7 +123,7 @@ identity_mapped:
 	 * Set cr4 to a known state:
 	 *  - physical address extension enabled
 	 */
-	movq	$X86_CR4_PAE, %rax
+	movl	$X86_CR4_PAE, %eax
 	movq	%rax, %cr4
 
 	jmp 1f
@@ -221,23 +221,23 @@ swap_pages:
 	movq	(%rbx), %rcx
 	addq	$8,	%rbx
 1:
-	testq	$0x1,	%rcx  /* is it a destination page? */
+	testb	$0x1,	%cl   /* is it a destination page? */
 	jz	2f
 	movq	%rcx,	%rdi
 	andq	$0xfffffffffffff000, %rdi
 	jmp	0b
 2:
-	testq	$0x2,	%rcx  /* is it an indirection page? */
+	testb	$0x2,	%cl   /* is it an indirection page? */
 	jz	2f
 	movq	%rcx,   %rbx
 	andq	$0xfffffffffffff000, %rbx
 	jmp	0b
 2:
-	testq	$0x4,	%rcx  /* is it the done indicator? */
+	testb	$0x4,	%cl   /* is it the done indicator? */
 	jz	2f
 	jmp	3f
 2:
-	testq	$0x8,	%rcx  /* is it the source indicator? */
+	testb	$0x8,	%cl   /* is it the source indicator? */
 	jz	0b	      /* Ignore it otherwise */
 	movq	%rcx,   %rsi  /* For ever source page do a copy */
 	andq	$0xfffffffffffff000, %rsi
@@ -246,17 +246,17 @@ swap_pages:
 	movq	%rsi, %rax
 
 	movq	%r10, %rdi
-	movq	$512,   %rcx
+	movl	$512, %ecx
 	rep ; movsq
 
 	movq	%rax, %rdi
 	movq	%rdx, %rsi
-	movq	$512,   %rcx
+	movl	$512, %ecx
 	rep ; movsq
 
 	movq	%rdx, %rdi
 	movq	%r10, %rsi
-	movq	$512,   %rcx
+	movl	$512, %ecx
 	rep ; movsq
 
 	lea	PAGE_SIZE(%rax), %rsi
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 98dc9317286e..d74ac33290ae 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -122,8 +122,6 @@
 unsigned long max_low_pfn_mapped;
 unsigned long max_pfn_mapped;
 
-bool __read_mostly kaslr_enabled = false;
-
 #ifdef CONFIG_DMI
 RESERVE_BRK(dmi_alloc, 65536);
 #endif
@@ -356,7 +354,7 @@ static void __init relocate_initrd(void)
 		mapaddr = ramdisk_image & PAGE_MASK;
 		p = early_memremap(mapaddr, clen+slop);
 		memcpy(q, p+slop, clen);
-		early_iounmap(p, clen+slop);
+		early_memunmap(p, clen+slop);
 		q += clen;
 		ramdisk_image += clen;
 		ramdisk_size  -= clen;
@@ -427,11 +425,6 @@ static void __init reserve_initrd(void)
 }
 #endif /* CONFIG_BLK_DEV_INITRD */
 
-static void __init parse_kaslr_setup(u64 pa_data, u32 data_len)
-{
-	kaslr_enabled = (bool)(pa_data + sizeof(struct setup_data));
-}
-
 static void __init parse_setup_data(void)
 {
 	struct setup_data *data;
@@ -445,7 +438,7 @@ static void __init parse_setup_data(void)
 		data_len = data->len + sizeof(struct setup_data);
 		data_type = data->type;
 		pa_next = data->next;
-		early_iounmap(data, sizeof(*data));
+		early_memunmap(data, sizeof(*data));
 
 		switch (data_type) {
 		case SETUP_E820_EXT:
@@ -457,9 +450,6 @@ static void __init parse_setup_data(void)
 		case SETUP_EFI:
 			parse_efi_setup(pa_data, data_len);
 			break;
-		case SETUP_KASLR:
-			parse_kaslr_setup(pa_data, data_len);
-			break;
 		default:
 			break;
 		}
@@ -480,7 +470,7 @@ static void __init e820_reserve_setup_data(void)
 			 E820_RAM, E820_RESERVED_KERN);
 		found = 1;
 		pa_data = data->next;
-		early_iounmap(data, sizeof(*data));
+		early_memunmap(data, sizeof(*data));
 	}
 	if (!found)
 		return;
@@ -501,7 +491,7 @@ static void __init memblock_x86_reserve_range_setup_data(void)
 		data = early_memremap(pa_data, sizeof(*data));
 		memblock_reserve(pa_data, sizeof(*data) + data->len);
 		pa_data = data->next;
-		early_iounmap(data, sizeof(*data));
+		early_memunmap(data, sizeof(*data));
 	}
 }
 
@@ -842,14 +832,15 @@ static void __init trim_low_memory_range(void)
 static int
 dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
 {
-	if (kaslr_enabled)
+	if (kaslr_enabled()) {
 		pr_emerg("Kernel Offset: 0x%lx from 0x%lx (relocation range: 0x%lx-0x%lx)\n",
 			 (unsigned long)&_text - __START_KERNEL,
 			 __START_KERNEL,
 			 __START_KERNEL_map,
 			 MODULES_VADDR-1);
-	else
+	} else {
 		pr_emerg("Kernel Offset: disabled\n");
+	}
 
 	return 0;
 }
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index e5042463c1bc..1ea14fd53933 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -61,8 +61,7 @@
 	regs->seg = GET_SEG(seg) | 3;			\
 } while (0)
 
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
-		       unsigned long *pax)
+int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
 {
 	void __user *buf;
 	unsigned int tmpflags;
@@ -81,7 +80,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 #endif /* CONFIG_X86_32 */
 
 		COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
-		COPY(dx); COPY(cx); COPY(ip);
+		COPY(dx); COPY(cx); COPY(ip); COPY(ax);
 
 #ifdef CONFIG_X86_64
 		COPY(r8);
@@ -94,27 +93,20 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 		COPY(r15);
 #endif /* CONFIG_X86_64 */
 
-#ifdef CONFIG_X86_32
 		COPY_SEG_CPL3(cs);
 		COPY_SEG_CPL3(ss);
-#else /* !CONFIG_X86_32 */
-		/* Kernel saves and restores only the CS segment register on signals,
-		 * which is the bare minimum needed to allow mixed 32/64-bit code.
-		 * App's signal handler can save/restore other segments if needed. */
-		COPY_SEG_CPL3(cs);
-#endif /* CONFIG_X86_32 */
 
 		get_user_ex(tmpflags, &sc->flags);
 		regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
 		regs->orig_ax = -1;		/* disable syscall checks */
 
 		get_user_ex(buf, &sc->fpstate);
-
-		get_user_ex(*pax, &sc->ax);
 	} get_user_catch(err);
 
 	err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32));
 
+	force_iret();
+
 	return err;
 }
 
@@ -162,8 +154,9 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 #else /* !CONFIG_X86_32 */
 		put_user_ex(regs->flags, &sc->flags);
 		put_user_ex(regs->cs, &sc->cs);
-		put_user_ex(0, &sc->gs);
-		put_user_ex(0, &sc->fs);
+		put_user_ex(0, &sc->__pad2);
+		put_user_ex(0, &sc->__pad1);
+		put_user_ex(regs->ss, &sc->ss);
 #endif /* CONFIG_X86_32 */
 
 		put_user_ex(fpstate, &sc->fpstate);
@@ -457,9 +450,19 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
 
 	regs->sp = (unsigned long)frame;
 
-	/* Set up the CS register to run signal handlers in 64-bit mode,
-	   even if the handler happens to be interrupting 32-bit code. */
+	/*
+	 * Set up the CS and SS registers to run signal handlers in
+	 * 64-bit mode, even if the handler happens to be interrupting
+	 * 32-bit or 16-bit code.
+	 *
+	 * SS is subtle.  In 64-bit mode, we don't need any particular
+	 * SS descriptor, but we do need SS to be valid.  It's possible
+	 * that the old SS is entirely bogus -- this can happen if the
+	 * signal we're trying to deliver is #GP or #SS caused by a bad
+	 * SS value.
+	 */
 	regs->cs = __USER_CS;
+	regs->ss = __USER_DS;
 
 	return 0;
 }
@@ -539,7 +542,6 @@ asmlinkage unsigned long sys_sigreturn(void)
 {
 	struct pt_regs *regs = current_pt_regs();
 	struct sigframe __user *frame;
-	unsigned long ax;
 	sigset_t set;
 
 	frame = (struct sigframe __user *)(regs->sp - 8);
@@ -553,9 +555,9 @@ asmlinkage unsigned long sys_sigreturn(void)
 
 	set_current_blocked(&set);
 
-	if (restore_sigcontext(regs, &frame->sc, &ax))
+	if (restore_sigcontext(regs, &frame->sc))
 		goto badframe;
-	return ax;
+	return regs->ax;
 
 badframe:
 	signal_fault(regs, frame, "sigreturn");
@@ -568,7 +570,6 @@ asmlinkage long sys_rt_sigreturn(void)
 {
 	struct pt_regs *regs = current_pt_regs();
 	struct rt_sigframe __user *frame;
-	unsigned long ax;
 	sigset_t set;
 
 	frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
@@ -579,37 +580,23 @@ asmlinkage long sys_rt_sigreturn(void)
 
 	set_current_blocked(&set);
 
-	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 
 	if (restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
 
-	return ax;
+	return regs->ax;
 
 badframe:
 	signal_fault(regs, frame, "rt_sigreturn");
 	return 0;
 }
 
-/*
- * OK, we're invoking a handler:
- */
-static int signr_convert(int sig)
-{
-#ifdef CONFIG_X86_32
-	struct thread_info *info = current_thread_info();
-
-	if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32)
-		return info->exec_domain->signal_invmap[sig];
-#endif /* CONFIG_X86_32 */
-	return sig;
-}
-
 static int
 setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
 {
-	int usig = signr_convert(ksig->sig);
+	int usig = ksig->sig;
 	sigset_t *set = sigmask_to_save();
 	compat_sigset_t *cset = (compat_sigset_t *) set;
 
@@ -629,7 +616,8 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
 static void
 handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 {
-	bool failed;
+	bool stepping, failed;
+
 	/* Are we from a system call? */
 	if (syscall_get_nr(current, regs) >= 0) {
 		/* If so, check system call restarting.. */
@@ -653,12 +641,13 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 	}
 
 	/*
-	 * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF
-	 * flag so that register information in the sigcontext is correct.
+	 * If TF is set due to a debugger (TIF_FORCED_TF), clear TF now
+	 * so that register information in the sigcontext is correct and
+	 * then notify the tracer before entering the signal handler.
 	 */
-	if (unlikely(regs->flags & X86_EFLAGS_TF) &&
-	    likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
-		regs->flags &= ~X86_EFLAGS_TF;
+	stepping = test_thread_flag(TIF_SINGLESTEP);
+	if (stepping)
+		user_disable_single_step(current);
 
 	failed = (setup_rt_frame(ksig, regs) < 0);
 	if (!failed) {
@@ -669,19 +658,17 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 		 * it might disable possible debug exception from the
 		 * signal handler.
 		 *
-		 * Clear TF when entering the signal handler, but
-		 * notify any tracer that was single-stepping it.
-		 * The tracer may want to single-step inside the
-		 * handler too.
+		 * Clear TF for the case when it wasn't set by debugger to
+		 * avoid the recursive send_sigtrap() in SIGTRAP handler.
 		 */
 		regs->flags &= ~(X86_EFLAGS_DF|X86_EFLAGS_RF|X86_EFLAGS_TF);
 		/*
 		 * Ensure the signal handler starts with the new fpu state.
 		 */
 		if (used_math())
-			drop_init_fpu(current);
+			fpu_reset_state(current);
 	}
-	signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP));
+	signal_setup_done(failed, ksig, stepping);
 }
 
 #ifdef CONFIG_X86_32
@@ -780,7 +767,6 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
 	struct pt_regs *regs = current_pt_regs();
 	struct rt_sigframe_x32 __user *frame;
 	sigset_t set;
-	unsigned long ax;
 
 	frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
 
@@ -791,13 +777,13 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
 
 	set_current_blocked(&set);
 
-	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 
 	if (compat_restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
 
-	return ax;
+	return regs->ax;
 
 badframe:
 	signal_fault(regs, frame, "x32 rt_sigreturn");
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index febc6aabc72e..50e547eac8cd 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -77,9 +77,6 @@
 #include <asm/realmode.h>
 #include <asm/misc.h>
 
-/* State of each CPU */
-DEFINE_PER_CPU(int, cpu_state) = { 0 };
-
 /* Number of siblings per CPU package */
 int smp_num_siblings = 1;
 EXPORT_SYMBOL(smp_num_siblings);
@@ -257,7 +254,7 @@ static void notrace start_secondary(void *unused)
 	lock_vector_lock();
 	set_cpu_online(smp_processor_id(), true);
 	unlock_vector_lock();
-	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
+	cpu_set_state_online(smp_processor_id());
 	x86_platform.nmi_init();
 
 	/* enable local interrupts */
@@ -779,6 +776,26 @@ out:
 	return boot_error;
 }
 
+void common_cpu_up(unsigned int cpu, struct task_struct *idle)
+{
+	/* Just in case we booted with a single CPU. */
+	alternatives_enable_smp();
+
+	per_cpu(current_task, cpu) = idle;
+
+#ifdef CONFIG_X86_32
+	/* Stack for startup_32 can be just as for start_secondary onwards */
+	irq_ctx_init(cpu);
+	per_cpu(cpu_current_top_of_stack, cpu) =
+		(unsigned long)task_stack_page(idle) + THREAD_SIZE;
+#else
+	clear_tsk_thread_flag(idle, TIF_FORK);
+	initial_gs = per_cpu_offset(cpu);
+#endif
+	per_cpu(kernel_stack, cpu) =
+		(unsigned long)task_stack_page(idle) + THREAD_SIZE;
+}
+
 /*
  * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
  * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -796,23 +813,9 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 	int cpu0_nmi_registered = 0;
 	unsigned long timeout;
 
-	/* Just in case we booted with a single CPU. */
-	alternatives_enable_smp();
-
 	idle->thread.sp = (unsigned long) (((struct pt_regs *)
 			  (THREAD_SIZE +  task_stack_page(idle))) - 1);
-	per_cpu(current_task, cpu) = idle;
 
-#ifdef CONFIG_X86_32
-	/* Stack for startup_32 can be just as for start_secondary onwards */
-	irq_ctx_init(cpu);
-#else
-	clear_tsk_thread_flag(idle, TIF_FORK);
-	initial_gs = per_cpu_offset(cpu);
-#endif
-	per_cpu(kernel_stack, cpu) =
-		(unsigned long)task_stack_page(idle) -
-		KERNEL_STACK_OFFSET + THREAD_SIZE;
 	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
 	initial_code = (unsigned long)start_secondary;
 	stack_start  = idle->thread.sp;
@@ -948,11 +951,16 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
 	 */
 	mtrr_save_state();
 
-	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
+	/* x86 CPUs take themselves offline, so delayed offline is OK. */
+	err = cpu_check_up_prepare(cpu);
+	if (err && err != -EBUSY)
+		return err;
 
 	/* the FPU context is blank, nobody can own it */
 	__cpu_disable_lazy_restore(cpu);
 
+	common_cpu_up(cpu, tidle);
+
 	err = do_boot_cpu(apicid, cpu, tidle);
 	if (err) {
 		pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
@@ -1086,8 +1094,6 @@ static int __init smp_sanity_check(unsigned max_cpus)
 		return SMP_NO_APIC;
 	}
 
-	verify_local_APIC();
-
 	/*
 	 * If SMP should be disabled, then really disable it!
 	 */
@@ -1191,7 +1197,7 @@ void __init native_smp_prepare_boot_cpu(void)
 	switch_to_new_gdt(me);
 	/* already set me in cpu_online_mask in boot_cpu_init() */
 	cpumask_set_cpu(me, cpu_callout_mask);
-	per_cpu(cpu_state, me) = CPU_ONLINE;
+	cpu_set_state_online(me);
 }
 
 void __init native_smp_cpus_done(unsigned int max_cpus)
@@ -1318,14 +1324,10 @@ static void __ref remove_cpu_from_maps(int cpu)
 	numa_remove_cpu(cpu);
 }
 
-static DEFINE_PER_CPU(struct completion, die_complete);
-
 void cpu_disable_common(void)
 {
 	int cpu = smp_processor_id();
 
-	init_completion(&per_cpu(die_complete, smp_processor_id()));
-
 	remove_siblinginfo(cpu);
 
 	/* It's now safe to remove this processor from the online map */
@@ -1349,24 +1351,27 @@ int native_cpu_disable(void)
 	return 0;
 }
 
-void cpu_die_common(unsigned int cpu)
+int common_cpu_die(unsigned int cpu)
 {
-	wait_for_completion_timeout(&per_cpu(die_complete, cpu), HZ);
-}
+	int ret = 0;
 
-void native_cpu_die(unsigned int cpu)
-{
 	/* We don't do anything here: idle task is faking death itself. */
 
-	cpu_die_common(cpu);
-
 	/* They ack this in play_dead() by setting CPU_DEAD */
-	if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
+	if (cpu_wait_death(cpu, 5)) {
 		if (system_state == SYSTEM_RUNNING)
 			pr_info("CPU %u is now offline\n", cpu);
 	} else {
 		pr_err("CPU %u didn't die...\n", cpu);
+		ret = -1;
 	}
+
+	return ret;
+}
+
+void native_cpu_die(unsigned int cpu)
+{
+	common_cpu_die(cpu);
 }
 
 void play_dead_common(void)
@@ -1375,10 +1380,8 @@ void play_dead_common(void)
 	reset_lazy_tlbstate();
 	amd_e400_remove_cpu(raw_smp_processor_id());
 
-	mb();
 	/* Ack it */
-	__this_cpu_write(cpu_state, CPU_DEAD);
-	complete(&per_cpu(die_complete, smp_processor_id()));
+	(void)cpu_report_death();
 
 	/*
 	 * With physical CPU hotplug, we should halt the cpu
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 30277e27431a..10e0272d789a 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -34,10 +34,26 @@ static unsigned long get_align_mask(void)
 	return va_align.mask;
 }
 
+/*
+ * To avoid aliasing in the I$ on AMD F15h, the bits defined by the
+ * va_align.bits, [12:upper_bit), are set to a random value instead of
+ * zeroing them. This random value is computed once per boot. This form
+ * of ASLR is known as "per-boot ASLR".
+ *
+ * To achieve this, the random value is added to the info.align_offset
+ * value before calling vm_unmapped_area() or ORed directly to the
+ * address.
+ */
+static unsigned long get_align_bits(void)
+{
+	return va_align.bits & get_align_mask();
+}
+
 unsigned long align_vdso_addr(unsigned long addr)
 {
 	unsigned long align_mask = get_align_mask();
-	return (addr + align_mask) & ~align_mask;
+	addr = (addr + align_mask) & ~align_mask;
+	return addr | get_align_bits();
 }
 
 static int __init control_va_addr_alignment(char *str)
@@ -135,8 +151,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	info.length = len;
 	info.low_limit = begin;
 	info.high_limit = end;
-	info.align_mask = filp ? get_align_mask() : 0;
+	info.align_mask = 0;
 	info.align_offset = pgoff << PAGE_SHIFT;
+	if (filp) {
+		info.align_mask = get_align_mask();
+		info.align_offset += get_align_bits();
+	}
 	return vm_unmapped_area(&info);
 }
 
@@ -174,8 +194,12 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	info.length = len;
 	info.low_limit = PAGE_SIZE;
 	info.high_limit = mm->mmap_base;
-	info.align_mask = filp ? get_align_mask() : 0;
+	info.align_mask = 0;
 	info.align_offset = pgoff << PAGE_SHIFT;
+	if (filp) {
+		info.align_mask = get_align_mask();
+		info.align_offset += get_align_bits();
+	}
 	addr = vm_unmapped_area(&info);
 	if (!(addr & ~PAGE_MASK))
 		return addr;
diff --git a/arch/x86/kernel/syscall_32.c b/arch/x86/kernel/syscall_32.c
index e9bcd57d8a9e..3777189c4a19 100644
--- a/arch/x86/kernel/syscall_32.c
+++ b/arch/x86/kernel/syscall_32.c
@@ -5,21 +5,29 @@
 #include <linux/cache.h>
 #include <asm/asm-offsets.h>
 
-#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ;
+#ifdef CONFIG_IA32_EMULATION
+#define SYM(sym, compat) compat
+#else
+#define SYM(sym, compat) sym
+#define ia32_sys_call_table sys_call_table
+#define __NR_ia32_syscall_max __NR_syscall_max
+#endif
+
+#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void SYM(sym, compat)(void) ;
 #include <asm/syscalls_32.h>
 #undef __SYSCALL_I386
 
-#define __SYSCALL_I386(nr, sym, compat) [nr] = sym,
+#define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat),
 
 typedef asmlinkage void (*sys_call_ptr_t)(void);
 
 extern asmlinkage void sys_ni_syscall(void);
 
-__visible const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
+__visible const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
 	/*
 	 * Smells like a compiler bug -- it doesn't work
 	 * when the & below is removed.
 	 */
-	[0 ... __NR_syscall_max] = &sys_ni_syscall,
+	[0 ... __NR_ia32_syscall_max] = &sys_ni_syscall,
 #include <asm/syscalls_32.h>
 };
diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c
index b79133abda48..5ecbfe5099da 100644
--- a/arch/x86/kernel/test_rodata.c
+++ b/arch/x86/kernel/test_rodata.c
@@ -57,7 +57,7 @@ int rodata_test(void)
 	/* test 3: check the value hasn't changed */
 	/* If this test fails, we managed to overwrite the data */
 	if (!rodata_test_data) {
-		printk(KERN_ERR "rodata_test: Test 3 failes (end data)\n");
+		printk(KERN_ERR "rodata_test: Test 3 fails (end data)\n");
 		return -ENODEV;
 	}
 	/* test 4: check if the rodata section is 4Kb aligned */
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 25adc0e16eaa..d39c09119db6 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -30,7 +30,7 @@ unsigned long profile_pc(struct pt_regs *regs)
 {
 	unsigned long pc = instruction_pointer(regs);
 
-	if (!user_mode_vm(regs) && in_lock_functions(pc)) {
+	if (!user_mode(regs) && in_lock_functions(pc)) {
 #ifdef CONFIG_FRAME_POINTER
 		return *(unsigned long *)(regs->bp + sizeof(long));
 #else
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 9d2073e2ecc9..324ab5247687 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -112,7 +112,7 @@ enum ctx_state ist_enter(struct pt_regs *regs)
 {
 	enum ctx_state prev_state;
 
-	if (user_mode_vm(regs)) {
+	if (user_mode(regs)) {
 		/* Other than that, we're just an exception. */
 		prev_state = exception_enter();
 	} else {
@@ -123,7 +123,7 @@ enum ctx_state ist_enter(struct pt_regs *regs)
 		 * but we need to notify RCU.
 		 */
 		rcu_nmi_enter();
-		prev_state = IN_KERNEL;  /* the value is irrelevant. */
+		prev_state = CONTEXT_KERNEL;  /* the value is irrelevant. */
 	}
 
 	/*
@@ -146,7 +146,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
 	/* Must be before exception_exit. */
 	preempt_count_sub(HARDIRQ_OFFSET);
 
-	if (user_mode_vm(regs))
+	if (user_mode(regs))
 		return exception_exit(prev_state);
 	else
 		rcu_nmi_exit();
@@ -158,7 +158,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
  *
  * IST exception handlers normally cannot schedule.  As a special
  * exception, if the exception interrupted userspace code (i.e.
- * user_mode_vm(regs) would return true) and the exception was not
+ * user_mode(regs) would return true) and the exception was not
  * a double fault, it can be safe to schedule.  ist_begin_non_atomic()
  * begins a non-atomic section within an ist_enter()/ist_exit() region.
  * Callers are responsible for enabling interrupts themselves inside
@@ -167,15 +167,15 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
  */
 void ist_begin_non_atomic(struct pt_regs *regs)
 {
-	BUG_ON(!user_mode_vm(regs));
+	BUG_ON(!user_mode(regs));
 
 	/*
 	 * Sanity check: we need to be on the normal thread stack.  This
 	 * will catch asm bugs and any attempt to use ist_preempt_enable
 	 * from double_fault.
 	 */
-	BUG_ON(((current_stack_pointer() ^ this_cpu_read_stable(kernel_stack))
-		& ~(THREAD_SIZE - 1)) != 0);
+	BUG_ON((unsigned long)(current_top_of_stack() -
+			       current_stack_pointer()) >= THREAD_SIZE);
 
 	preempt_count_sub(HARDIRQ_OFFSET);
 }
@@ -194,8 +194,7 @@ static nokprobe_inline int
 do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
 		  struct pt_regs *regs,	long error_code)
 {
-#ifdef CONFIG_X86_32
-	if (regs->flags & X86_VM_MASK) {
+	if (v8086_mode(regs)) {
 		/*
 		 * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
 		 * On nmi (interrupt 2), do_trap should not be called.
@@ -207,7 +206,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
 		}
 		return -1;
 	}
-#endif
+
 	if (!user_mode(regs)) {
 		if (!fixup_exception(regs)) {
 			tsk->thread.error_code = error_code;
@@ -462,13 +461,11 @@ do_general_protection(struct pt_regs *regs, long error_code)
 	prev_state = exception_enter();
 	conditional_sti(regs);
 
-#ifdef CONFIG_X86_32
-	if (regs->flags & X86_VM_MASK) {
+	if (v8086_mode(regs)) {
 		local_irq_enable();
 		handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
 		goto exit;
 	}
-#endif
 
 	tsk = current;
 	if (!user_mode(regs)) {
@@ -587,7 +584,7 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
 	/* Copy the remainder of the stack from the current stack. */
 	memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip));
 
-	BUG_ON(!user_mode_vm(&new_stack->regs));
+	BUG_ON(!user_mode(&new_stack->regs));
 	return new_stack;
 }
 NOKPROBE_SYMBOL(fixup_bad_iret);
@@ -673,7 +670,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
 	/* It's safe to allow irq's after DR6 has been saved */
 	preempt_conditional_sti(regs);
 
-	if (regs->flags & X86_VM_MASK) {
+	if (v8086_mode(regs)) {
 		handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
 					X86_TRAP_DB);
 		preempt_conditional_cli(regs);
@@ -721,7 +718,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
 		return;
 	conditional_sti(regs);
 
-	if (!user_mode_vm(regs))
+	if (!user_mode(regs))
 	{
 		if (!fixup_exception(regs)) {
 			task->thread.error_code = error_code;
@@ -734,7 +731,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
 	/*
 	 * Save the info for the exception handler and clear the error.
 	 */
-	save_init_fpu(task);
+	unlazy_fpu(task);
 	task->thread.trap_nr = trapnr;
 	task->thread.error_code = error_code;
 	info.si_signo = SIGFPE;
@@ -863,7 +860,7 @@ void math_state_restore(void)
 	kernel_fpu_disable();
 	__thread_fpu_begin(tsk);
 	if (unlikely(restore_fpu_checking(tsk))) {
-		drop_init_fpu(tsk);
+		fpu_reset_state(tsk);
 		force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
 	} else {
 		tsk->thread.fpu_counter++;
@@ -925,9 +922,21 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 /* Set of traps needed for early debugging. */
 void __init early_trap_init(void)
 {
-	set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
+	/*
+	 * Don't use IST to set DEBUG_STACK as it doesn't work until TSS
+	 * is ready in cpu_init() <-- trap_init(). Before trap_init(),
+	 * CPU runs at ring 0 so it is impossible to hit an invalid
+	 * stack.  Using the original stack works well enough at this
+	 * early stage. DEBUG_STACK will be equipped after cpu_init() in
+	 * trap_init().
+	 *
+	 * We don't need to set trace_idt_table like set_intr_gate(),
+	 * since we don't have trace_debug and it will be reset to
+	 * 'debug' in trap_init() by set_intr_gate_ist().
+	 */
+	set_intr_gate_notrace(X86_TRAP_DB, debug);
 	/* int3 can be called from all */
-	set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
+	set_system_intr_gate(X86_TRAP_BP, &int3);
 #ifdef CONFIG_X86_32
 	set_intr_gate(X86_TRAP_PF, page_fault);
 #endif
@@ -1005,6 +1014,15 @@ void __init trap_init(void)
 	 */
 	cpu_init();
 
+	/*
+	 * X86_TRAP_DB and X86_TRAP_BP have been set
+	 * in early_trap_init(). However, ITS works only after
+	 * cpu_init() loads TSS. See comments in early_trap_init().
+	 */
+	set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
+	/* int3 can be called from all */
+	set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
+
 	x86_init.irqs.trap_init();
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 81f8adb0679e..0b81ad67da07 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -912,7 +912,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
 	int ret = NOTIFY_DONE;
 
 	/* We are only interested in userspace traps */
-	if (regs && !user_mode_vm(regs))
+	if (regs && !user_mode(regs))
 		return NOTIFY_DONE;
 
 	switch (val) {
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index e8edcf52e069..fc9db6ef2a95 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -150,7 +150,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
 		do_exit(SIGSEGV);
 	}
 
-	tss = &per_cpu(init_tss, get_cpu());
+	tss = &per_cpu(cpu_tss, get_cpu());
 	current->thread.sp0 = current->thread.saved_sp0;
 	current->thread.sysenter_cs = __KERNEL_CS;
 	load_sp0(tss, &current->thread);
@@ -318,7 +318,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
 	tsk->thread.saved_fs = info->regs32->fs;
 	tsk->thread.saved_gs = get_user_gs(info->regs32);
 
-	tss = &per_cpu(init_tss, get_cpu());
+	tss = &per_cpu(cpu_tss, get_cpu());
 	tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
 	if (cpu_has_sep)
 		tsk->thread.sysenter_cs = 0;
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
index c7d791f32b98..51e330416995 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -31,30 +31,30 @@ void update_vsyscall(struct timekeeper *tk)
 	gtod_write_begin(vdata);
 
 	/* copy vsyscall data */
-	vdata->vclock_mode	= tk->tkr.clock->archdata.vclock_mode;
-	vdata->cycle_last	= tk->tkr.cycle_last;
-	vdata->mask		= tk->tkr.mask;
-	vdata->mult		= tk->tkr.mult;
-	vdata->shift		= tk->tkr.shift;
+	vdata->vclock_mode	= tk->tkr_mono.clock->archdata.vclock_mode;
+	vdata->cycle_last	= tk->tkr_mono.cycle_last;
+	vdata->mask		= tk->tkr_mono.mask;
+	vdata->mult		= tk->tkr_mono.mult;
+	vdata->shift		= tk->tkr_mono.shift;
 
 	vdata->wall_time_sec		= tk->xtime_sec;
-	vdata->wall_time_snsec		= tk->tkr.xtime_nsec;
+	vdata->wall_time_snsec		= tk->tkr_mono.xtime_nsec;
 
 	vdata->monotonic_time_sec	= tk->xtime_sec
 					+ tk->wall_to_monotonic.tv_sec;
-	vdata->monotonic_time_snsec	= tk->tkr.xtime_nsec
+	vdata->monotonic_time_snsec	= tk->tkr_mono.xtime_nsec
 					+ ((u64)tk->wall_to_monotonic.tv_nsec
-						<< tk->tkr.shift);
+						<< tk->tkr_mono.shift);
 	while (vdata->monotonic_time_snsec >=
-					(((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
+					(((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
 		vdata->monotonic_time_snsec -=
-					((u64)NSEC_PER_SEC) << tk->tkr.shift;
+					((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
 		vdata->monotonic_time_sec++;
 	}
 
 	vdata->wall_time_coarse_sec	= tk->xtime_sec;
-	vdata->wall_time_coarse_nsec	= (long)(tk->tkr.xtime_nsec >>
-						 tk->tkr.shift);
+	vdata->wall_time_coarse_nsec	= (long)(tk->tkr_mono.xtime_nsec >>
+						 tk->tkr_mono.shift);
 
 	vdata->monotonic_time_coarse_sec =
 		vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 34f66e58a896..87a815b85f3e 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -342,7 +342,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
 			 config_enabled(CONFIG_IA32_EMULATION));
 
 	if (!buf) {
-		drop_init_fpu(tsk);
+		fpu_reset_state(tsk);
 		return 0;
 	}
 
@@ -379,7 +379,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
 		 * thread's fpu state, reconstruct fxstate from the fsave
 		 * header. Sanitize the copied state etc.
 		 */
-		struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
+		struct fpu *fpu = &tsk->thread.fpu;
 		struct user_i387_ia32_struct env;
 		int err = 0;
 
@@ -393,14 +393,15 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
 		 */
 		drop_fpu(tsk);
 
-		if (__copy_from_user(xsave, buf_fx, state_size) ||
+		if (__copy_from_user(&fpu->state->xsave, buf_fx, state_size) ||
 		    __copy_from_user(&env, buf, sizeof(env))) {
+			fpu_finit(fpu);
 			err = -1;
 		} else {
 			sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only);
-			set_used_math();
 		}
 
+		set_used_math();
 		if (use_eager_fpu()) {
 			preempt_disable();
 			math_state_restore();
@@ -415,7 +416,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
 		 */
 		user_fpu_begin();
 		if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) {
-			drop_init_fpu(tsk);
+			fpu_reset_state(tsk);
 			return -1;
 		}
 	}
@@ -677,19 +678,13 @@ void xsave_init(void)
 	this_func();
 }
 
-static inline void __init eager_fpu_init_bp(void)
-{
-	current->thread.fpu.state =
-	    alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct));
-	if (!init_xstate_buf)
-		setup_init_fpu_buf();
-}
-
-void eager_fpu_init(void)
+/*
+ * setup_init_fpu_buf() is __init and it is OK to call it here because
+ * init_xstate_buf will be unset only once during boot.
+ */
+void __init_refok eager_fpu_init(void)
 {
-	static __refdata void (*boot_func)(void) = eager_fpu_init_bp;
-
-	clear_used_math();
+	WARN_ON(used_math());
 	current_thread_info()->status = 0;
 
 	if (eagerfpu == ENABLE)
@@ -700,21 +695,8 @@ void eager_fpu_init(void)
 		return;
 	}
 
-	if (boot_func) {
-		boot_func();
-		boot_func = NULL;
-	}
-
-	/*
-	 * This is same as math_state_restore(). But use_xsave() is
-	 * not yet patched to use math_state_restore().
-	 */
-	init_fpu(current);
-	__thread_fpu_begin(current);
-	if (cpu_has_xsave)
-		xrstor_state(init_xstate_buf, -1);
-	else
-		fxrstor_checking(&init_xstate_buf->i387);
+	if (!init_xstate_buf)
+		setup_init_fpu_buf();
 }
 
 /*
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 08f790dfadc9..16e8f962eaad 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -1,5 +1,5 @@
 
-ccflags-y += -Ivirt/kvm -Iarch/x86/kvm
+ccflags-y += -Iarch/x86/kvm
 
 CFLAGS_x86.o := -I.
 CFLAGS_svm.o := -I.
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 8a80737ee6e6..59b69f6a2844 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -104,6 +104,9 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 		((best->eax & 0xff00) >> 8) != 0)
 		return -EINVAL;
 
+	/* Update physical-address width */
+	vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+
 	kvm_pmu_cpuid_update(vcpu);
 	return 0;
 }
@@ -135,6 +138,21 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
 	}
 }
 
+int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
+	if (!best || best->eax < 0x80000008)
+		goto not_found;
+	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+	if (best)
+		return best->eax & 0xff;
+not_found:
+	return 36;
+}
+EXPORT_SYMBOL_GPL(cpuid_query_maxphyaddr);
+
 /* when an old userspace process fills a new kernel module */
 int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
 			     struct kvm_cpuid *cpuid,
@@ -757,21 +775,6 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
 }
 EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
 
-int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best;
-
-	best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
-	if (!best || best->eax < 0x80000008)
-		goto not_found;
-	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
-	if (best)
-		return best->eax & 0xff;
-not_found:
-	return 36;
-}
-EXPORT_SYMBOL_GPL(cpuid_maxphyaddr);
-
 /*
  * If no match is found, check whether we exceed the vCPU's limit
  * and return the content of the highest valid _standard_ leaf instead.
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 4452eedfaedd..c3b1ad9fca81 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -20,13 +20,19 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
 			      struct kvm_cpuid_entry2 __user *entries);
 void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
 
+int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu);
+
+static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.maxphyaddr;
+}
 
 static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *best;
 
 	if (!static_cpu_has(X86_FEATURE_XSAVE))
-		return 0;
+		return false;
 
 	best = kvm_find_cpuid_entry(vcpu, 1, 0);
 	return best && (best->ecx & bit(X86_FEATURE_XSAVE));
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 106c01557f2b..630bcb0d7a04 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -248,27 +248,7 @@ struct mode_dual {
 	struct opcode mode64;
 };
 
-/* EFLAGS bit definitions. */
-#define EFLG_ID (1<<21)
-#define EFLG_VIP (1<<20)
-#define EFLG_VIF (1<<19)
-#define EFLG_AC (1<<18)
-#define EFLG_VM (1<<17)
-#define EFLG_RF (1<<16)
-#define EFLG_IOPL (3<<12)
-#define EFLG_NT (1<<14)
-#define EFLG_OF (1<<11)
-#define EFLG_DF (1<<10)
-#define EFLG_IF (1<<9)
-#define EFLG_TF (1<<8)
-#define EFLG_SF (1<<7)
-#define EFLG_ZF (1<<6)
-#define EFLG_AF (1<<4)
-#define EFLG_PF (1<<2)
-#define EFLG_CF (1<<0)
-
 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
-#define EFLG_RESERVED_ONE_MASK 2
 
 enum x86_transfer_type {
 	X86_TRANSFER_NONE,
@@ -317,7 +297,8 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
  * These EFLAGS bits are restored from saved value during emulation, and
  * any changes are written back to the saved value after emulation.
  */
-#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
+#define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
+		     X86_EFLAGS_PF|X86_EFLAGS_CF)
 
 #ifdef CONFIG_X86_64
 #define ON64(x) x
@@ -478,6 +459,25 @@ static void assign_masked(ulong *dest, ulong src, ulong mask)
 	*dest = (*dest & ~mask) | (src & mask);
 }
 
+static void assign_register(unsigned long *reg, u64 val, int bytes)
+{
+	/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
+	switch (bytes) {
+	case 1:
+		*(u8 *)reg = (u8)val;
+		break;
+	case 2:
+		*(u16 *)reg = (u16)val;
+		break;
+	case 4:
+		*reg = (u32)val;
+		break;	/* 64b: zero-extend */
+	case 8:
+		*reg = val;
+		break;
+	}
+}
+
 static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
 {
 	return (1UL << (ctxt->ad_bytes << 3)) - 1;
@@ -943,6 +943,22 @@ FASTOP2(xadd);
 
 FASTOP2R(cmp, cmp_r);
 
+static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
+{
+	/* If src is zero, do not writeback, but update flags */
+	if (ctxt->src.val == 0)
+		ctxt->dst.type = OP_NONE;
+	return fastop(ctxt, em_bsf);
+}
+
+static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
+{
+	/* If src is zero, do not writeback, but update flags */
+	if (ctxt->src.val == 0)
+		ctxt->dst.type = OP_NONE;
+	return fastop(ctxt, em_bsr);
+}
+
 static u8 test_cc(unsigned int condition, unsigned long flags)
 {
 	u8 rc;
@@ -1399,7 +1415,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
 		unsigned int in_page, n;
 		unsigned int count = ctxt->rep_prefix ?
 			address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
-		in_page = (ctxt->eflags & EFLG_DF) ?
+		in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
 			offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
 			PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
 		n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
@@ -1412,7 +1428,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
 	}
 
 	if (ctxt->rep_prefix && (ctxt->d & String) &&
-	    !(ctxt->eflags & EFLG_DF)) {
+	    !(ctxt->eflags & X86_EFLAGS_DF)) {
 		ctxt->dst.data = rc->data + rc->pos;
 		ctxt->dst.type = OP_MEM_STR;
 		ctxt->dst.count = (rc->end - rc->pos) / size;
@@ -1691,21 +1707,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 
 static void write_register_operand(struct operand *op)
 {
-	/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
-	switch (op->bytes) {
-	case 1:
-		*(u8 *)op->addr.reg = (u8)op->val;
-		break;
-	case 2:
-		*(u16 *)op->addr.reg = (u16)op->val;
-		break;
-	case 4:
-		*op->addr.reg = (u32)op->val;
-		break;	/* 64b: zero-extend */
-	case 8:
-		*op->addr.reg = op->val;
-		break;
-	}
+	return assign_register(op->addr.reg, op->val, op->bytes);
 }
 
 static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
@@ -1792,32 +1794,34 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt,
 {
 	int rc;
 	unsigned long val, change_mask;
-	int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+	int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
 	int cpl = ctxt->ops->cpl(ctxt);
 
 	rc = emulate_pop(ctxt, &val, len);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
-		| EFLG_TF | EFLG_DF | EFLG_NT | EFLG_AC | EFLG_ID;
+	change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
+		      X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
+		      X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
+		      X86_EFLAGS_AC | X86_EFLAGS_ID;
 
 	switch(ctxt->mode) {
 	case X86EMUL_MODE_PROT64:
 	case X86EMUL_MODE_PROT32:
 	case X86EMUL_MODE_PROT16:
 		if (cpl == 0)
-			change_mask |= EFLG_IOPL;
+			change_mask |= X86_EFLAGS_IOPL;
 		if (cpl <= iopl)
-			change_mask |= EFLG_IF;
+			change_mask |= X86_EFLAGS_IF;
 		break;
 	case X86EMUL_MODE_VM86:
 		if (iopl < 3)
 			return emulate_gp(ctxt, 0);
-		change_mask |= EFLG_IF;
+		change_mask |= X86_EFLAGS_IF;
 		break;
 	default: /* real mode */
-		change_mask |= (EFLG_IOPL | EFLG_IF);
+		change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
 		break;
 	}
 
@@ -1918,7 +1922,7 @@ static int em_pusha(struct x86_emulate_ctxt *ctxt)
 
 static int em_pushf(struct x86_emulate_ctxt *ctxt)
 {
-	ctxt->src.val = (unsigned long)ctxt->eflags & ~EFLG_VM;
+	ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
 	return em_push(ctxt);
 }
 
@@ -1926,6 +1930,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt)
 {
 	int rc = X86EMUL_CONTINUE;
 	int reg = VCPU_REGS_RDI;
+	u32 val;
 
 	while (reg >= VCPU_REGS_RAX) {
 		if (reg == VCPU_REGS_RSP) {
@@ -1933,9 +1938,10 @@ static int em_popa(struct x86_emulate_ctxt *ctxt)
 			--reg;
 		}
 
-		rc = emulate_pop(ctxt, reg_rmw(ctxt, reg), ctxt->op_bytes);
+		rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
 		if (rc != X86EMUL_CONTINUE)
 			break;
+		assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
 		--reg;
 	}
 	return rc;
@@ -1956,7 +1962,7 @@ static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC);
+	ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
 
 	ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
 	rc = em_push(ctxt);
@@ -2022,10 +2028,14 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
 	unsigned long temp_eip = 0;
 	unsigned long temp_eflags = 0;
 	unsigned long cs = 0;
-	unsigned long mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_TF |
-			     EFLG_IF | EFLG_DF | EFLG_OF | EFLG_IOPL | EFLG_NT | EFLG_RF |
-			     EFLG_AC | EFLG_ID | (1 << 1); /* Last one is the reserved bit */
-	unsigned long vm86_mask = EFLG_VM | EFLG_VIF | EFLG_VIP;
+	unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
+			     X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
+			     X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
+			     X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
+			     X86_EFLAGS_AC | X86_EFLAGS_ID |
+			     X86_EFLAGS_FIXED;
+	unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
+				  X86_EFLAGS_VIP;
 
 	/* TODO: Add stack limit check */
 
@@ -2054,7 +2064,6 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
 
 	ctxt->_eip = temp_eip;
 
-
 	if (ctxt->op_bytes == 4)
 		ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
 	else if (ctxt->op_bytes == 2) {
@@ -2063,7 +2072,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
 	}
 
 	ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
-	ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
+	ctxt->eflags |= X86_EFLAGS_FIXED;
 	ctxt->ops->set_nmi_mask(ctxt, false);
 
 	return rc;
@@ -2145,12 +2154,12 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
 	    ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
 		*reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
 		*reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
-		ctxt->eflags &= ~EFLG_ZF;
+		ctxt->eflags &= ~X86_EFLAGS_ZF;
 	} else {
 		ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
 			(u32) reg_read(ctxt, VCPU_REGS_RBX);
 
-		ctxt->eflags |= EFLG_ZF;
+		ctxt->eflags |= X86_EFLAGS_ZF;
 	}
 	return X86EMUL_CONTINUE;
 }
@@ -2222,7 +2231,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
 	ctxt->src.val = ctxt->dst.orig_val;
 	fastop(ctxt, em_cmp);
 
-	if (ctxt->eflags & EFLG_ZF) {
+	if (ctxt->eflags & X86_EFLAGS_ZF) {
 		/* Success: write back to memory; no update of EAX */
 		ctxt->src.type = OP_NONE;
 		ctxt->dst.val = ctxt->src.orig_val;
@@ -2381,14 +2390,14 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
 
 		ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
 		ctxt->eflags &= ~msr_data;
-		ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
+		ctxt->eflags |= X86_EFLAGS_FIXED;
 #endif
 	} else {
 		/* legacy mode */
 		ops->get_msr(ctxt, MSR_STAR, &msr_data);
 		ctxt->_eip = (u32)msr_data;
 
-		ctxt->eflags &= ~(EFLG_VM | EFLG_IF);
+		ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
 	}
 
 	return X86EMUL_CONTINUE;
@@ -2425,8 +2434,8 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
 	if ((msr_data & 0xfffc) == 0x0)
 		return emulate_gp(ctxt, 0);
 
-	ctxt->eflags &= ~(EFLG_VM | EFLG_IF);
-	cs_sel = (u16)msr_data & ~SELECTOR_RPL_MASK;
+	ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
+	cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
 	ss_sel = cs_sel + 8;
 	if (efer & EFER_LMA) {
 		cs.d = 0;
@@ -2493,8 +2502,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
 			return emulate_gp(ctxt, 0);
 		break;
 	}
-	cs_sel |= SELECTOR_RPL_MASK;
-	ss_sel |= SELECTOR_RPL_MASK;
+	cs_sel |= SEGMENT_RPL_MASK;
+	ss_sel |= SEGMENT_RPL_MASK;
 
 	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
 	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
@@ -2512,7 +2521,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
 		return false;
 	if (ctxt->mode == X86EMUL_MODE_VM86)
 		return true;
-	iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+	iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
 	return ctxt->ops->cpl(ctxt) > iopl;
 }
 
@@ -2782,10 +2791,8 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
 		return ret;
 	ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
 					X86_TRANSFER_TASK_SWITCH, NULL);
-	if (ret != X86EMUL_CONTINUE)
-		return ret;
 
-	return X86EMUL_CONTINUE;
+	return ret;
 }
 
 static int task_switch_32(struct x86_emulate_ctxt *ctxt,
@@ -2954,7 +2961,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
 		struct operand *op)
 {
-	int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
+	int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
 
 	register_address_increment(ctxt, reg, df * op->bytes);
 	op->addr.mem.ea = register_address(ctxt, reg);
@@ -3323,7 +3330,7 @@ static int em_clts(struct x86_emulate_ctxt *ctxt)
 	return X86EMUL_CONTINUE;
 }
 
-static int em_vmcall(struct x86_emulate_ctxt *ctxt)
+static int em_hypercall(struct x86_emulate_ctxt *ctxt)
 {
 	int rc = ctxt->ops->fix_hypercall(ctxt);
 
@@ -3395,17 +3402,6 @@ static int em_lgdt(struct x86_emulate_ctxt *ctxt)
 	return em_lgdt_lidt(ctxt, true);
 }
 
-static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
-{
-	int rc;
-
-	rc = ctxt->ops->fix_hypercall(ctxt);
-
-	/* Disable writeback. */
-	ctxt->dst.type = OP_NONE;
-	return rc;
-}
-
 static int em_lidt(struct x86_emulate_ctxt *ctxt)
 {
 	return em_lgdt_lidt(ctxt, false);
@@ -3504,7 +3500,8 @@ static int em_sahf(struct x86_emulate_ctxt *ctxt)
 {
 	u32 flags;
 
-	flags = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF;
+	flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
+		X86_EFLAGS_SF;
 	flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
 
 	ctxt->eflags &= ~0xffUL;
@@ -3769,7 +3766,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
 
 static const struct opcode group7_rm0[] = {
 	N,
-	I(SrcNone | Priv | EmulateOnUD,	em_vmcall),
+	I(SrcNone | Priv | EmulateOnUD,	em_hypercall),
 	N, N, N, N, N, N,
 };
 
@@ -3781,7 +3778,7 @@ static const struct opcode group7_rm1[] = {
 
 static const struct opcode group7_rm3[] = {
 	DIP(SrcNone | Prot | Priv,		vmrun,		check_svme_pa),
-	II(SrcNone  | Prot | EmulateOnUD,	em_vmmcall,	vmmcall),
+	II(SrcNone  | Prot | EmulateOnUD,	em_hypercall,	vmmcall),
 	DIP(SrcNone | Prot | Priv,		vmload,		check_svme_pa),
 	DIP(SrcNone | Prot | Priv,		vmsave,		check_svme_pa),
 	DIP(SrcNone | Prot | Priv,		stgi,		check_svme),
@@ -4192,7 +4189,8 @@ static const struct opcode twobyte_table[256] = {
 	N, N,
 	G(BitOp, group8),
 	F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
-	F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr),
+	I(DstReg | SrcMem | ModRM, em_bsf_c),
+	I(DstReg | SrcMem | ModRM, em_bsr_c),
 	D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
 	/* 0xC0 - 0xC7 */
 	F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
@@ -4759,9 +4757,9 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
 	if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
 	     (ctxt->b == 0xae) || (ctxt->b == 0xaf))
 	    && (((ctxt->rep_prefix == REPE_PREFIX) &&
-		 ((ctxt->eflags & EFLG_ZF) == 0))
+		 ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
 		|| ((ctxt->rep_prefix == REPNE_PREFIX) &&
-		    ((ctxt->eflags & EFLG_ZF) == EFLG_ZF))))
+		    ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
 		return true;
 
 	return false;
@@ -4913,7 +4911,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 			/* All REP prefixes have the same first termination condition */
 			if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
 				ctxt->eip = ctxt->_eip;
-				ctxt->eflags &= ~EFLG_RF;
+				ctxt->eflags &= ~X86_EFLAGS_RF;
 				goto done;
 			}
 		}
@@ -4963,9 +4961,9 @@ special_insn:
 	}
 
 	if (ctxt->rep_prefix && (ctxt->d & String))
-		ctxt->eflags |= EFLG_RF;
+		ctxt->eflags |= X86_EFLAGS_RF;
 	else
-		ctxt->eflags &= ~EFLG_RF;
+		ctxt->eflags &= ~X86_EFLAGS_RF;
 
 	if (ctxt->execute) {
 		if (ctxt->d & Fastop) {
@@ -5014,7 +5012,7 @@ special_insn:
 		rc = emulate_int(ctxt, ctxt->src.val);
 		break;
 	case 0xce:		/* into */
-		if (ctxt->eflags & EFLG_OF)
+		if (ctxt->eflags & X86_EFLAGS_OF)
 			rc = emulate_int(ctxt, 4);
 		break;
 	case 0xe9: /* jmp rel */
@@ -5027,19 +5025,19 @@ special_insn:
 		break;
 	case 0xf5:	/* cmc */
 		/* complement carry flag from eflags reg */
-		ctxt->eflags ^= EFLG_CF;
+		ctxt->eflags ^= X86_EFLAGS_CF;
 		break;
 	case 0xf8: /* clc */
-		ctxt->eflags &= ~EFLG_CF;
+		ctxt->eflags &= ~X86_EFLAGS_CF;
 		break;
 	case 0xf9: /* stc */
-		ctxt->eflags |= EFLG_CF;
+		ctxt->eflags |= X86_EFLAGS_CF;
 		break;
 	case 0xfc: /* cld */
-		ctxt->eflags &= ~EFLG_DF;
+		ctxt->eflags &= ~X86_EFLAGS_DF;
 		break;
 	case 0xfd: /* std */
-		ctxt->eflags |= EFLG_DF;
+		ctxt->eflags |= X86_EFLAGS_DF;
 		break;
 	default:
 		goto cannot_emulate;
@@ -5100,7 +5098,7 @@ writeback:
 			}
 			goto done; /* skip rip writeback */
 		}
-		ctxt->eflags &= ~EFLG_RF;
+		ctxt->eflags &= ~X86_EFLAGS_RF;
 	}
 
 	ctxt->eip = ctxt->_eip;
@@ -5137,8 +5135,7 @@ twobyte_insn:
 	case 0x40 ... 0x4f:	/* cmov */
 		if (test_cc(ctxt->b, ctxt->eflags))
 			ctxt->dst.val = ctxt->src.val;
-		else if (ctxt->mode != X86EMUL_MODE_PROT64 ||
-			 ctxt->op_bytes != 4)
+		else if (ctxt->op_bytes != 4)
 			ctxt->dst.type = OP_NONE; /* no writeback */
 		break;
 	case 0x80 ... 0x8f: /* jnz rel, etc*/
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 298781d4cfb4..4dce6f8b6129 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -443,7 +443,8 @@ static inline int pit_in_range(gpa_t addr)
 		(addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
 }
 
-static int pit_ioport_write(struct kvm_io_device *this,
+static int pit_ioport_write(struct kvm_vcpu *vcpu,
+				struct kvm_io_device *this,
 			    gpa_t addr, int len, const void *data)
 {
 	struct kvm_pit *pit = dev_to_pit(this);
@@ -519,7 +520,8 @@ static int pit_ioport_write(struct kvm_io_device *this,
 	return 0;
 }
 
-static int pit_ioport_read(struct kvm_io_device *this,
+static int pit_ioport_read(struct kvm_vcpu *vcpu,
+			   struct kvm_io_device *this,
 			   gpa_t addr, int len, void *data)
 {
 	struct kvm_pit *pit = dev_to_pit(this);
@@ -589,7 +591,8 @@ static int pit_ioport_read(struct kvm_io_device *this,
 	return 0;
 }
 
-static int speaker_ioport_write(struct kvm_io_device *this,
+static int speaker_ioport_write(struct kvm_vcpu *vcpu,
+				struct kvm_io_device *this,
 				gpa_t addr, int len, const void *data)
 {
 	struct kvm_pit *pit = speaker_to_pit(this);
@@ -606,8 +609,9 @@ static int speaker_ioport_write(struct kvm_io_device *this,
 	return 0;
 }
 
-static int speaker_ioport_read(struct kvm_io_device *this,
-			       gpa_t addr, int len, void *data)
+static int speaker_ioport_read(struct kvm_vcpu *vcpu,
+				   struct kvm_io_device *this,
+				   gpa_t addr, int len, void *data)
 {
 	struct kvm_pit *pit = speaker_to_pit(this);
 	struct kvm_kpit_state *pit_state = &pit->pit_state;
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index dd1b16b611b0..c84990b42b5b 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -3,7 +3,7 @@
 
 #include <linux/kthread.h>
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 struct kvm_kpit_channel_state {
 	u32 count; /* can be 65536 */
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index cc31f7c06d3d..fef922ff2635 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -507,6 +507,7 @@ static int picdev_read(struct kvm_pic *s,
 		return -EOPNOTSUPP;
 
 	if (len != 1) {
+		memset(val, 0, len);
 		pr_pic_unimpl("non byte read\n");
 		return 0;
 	}
@@ -528,42 +529,42 @@ static int picdev_read(struct kvm_pic *s,
 	return 0;
 }
 
-static int picdev_master_write(struct kvm_io_device *dev,
+static int picdev_master_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
 			       gpa_t addr, int len, const void *val)
 {
 	return picdev_write(container_of(dev, struct kvm_pic, dev_master),
 			    addr, len, val);
 }
 
-static int picdev_master_read(struct kvm_io_device *dev,
+static int picdev_master_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
 			      gpa_t addr, int len, void *val)
 {
 	return picdev_read(container_of(dev, struct kvm_pic, dev_master),
 			    addr, len, val);
 }
 
-static int picdev_slave_write(struct kvm_io_device *dev,
+static int picdev_slave_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
 			      gpa_t addr, int len, const void *val)
 {
 	return picdev_write(container_of(dev, struct kvm_pic, dev_slave),
 			    addr, len, val);
 }
 
-static int picdev_slave_read(struct kvm_io_device *dev,
+static int picdev_slave_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
 			     gpa_t addr, int len, void *val)
 {
 	return picdev_read(container_of(dev, struct kvm_pic, dev_slave),
 			    addr, len, val);
 }
 
-static int picdev_eclr_write(struct kvm_io_device *dev,
+static int picdev_eclr_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
 			     gpa_t addr, int len, const void *val)
 {
 	return picdev_write(container_of(dev, struct kvm_pic, dev_eclr),
 			    addr, len, val);
 }
 
-static int picdev_eclr_read(struct kvm_io_device *dev,
+static int picdev_eclr_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
 			    gpa_t addr, int len, void *val)
 {
 	return picdev_read(container_of(dev, struct kvm_pic, dev_eclr),
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index b1947e0f3e10..28146f03c514 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -206,6 +206,8 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
 
 	old_irr = ioapic->irr;
 	ioapic->irr |= mask;
+	if (edge)
+		ioapic->irr_delivered &= ~mask;
 	if ((edge && old_irr == ioapic->irr) ||
 	    (!edge && entry.fields.remote_irr)) {
 		ret = 0;
@@ -349,7 +351,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
 	irqe.shorthand = 0;
 
 	if (irqe.trig_mode == IOAPIC_EDGE_TRIG)
-		ioapic->irr &= ~(1 << irq);
+		ioapic->irr_delivered |= 1 << irq;
 
 	if (irq == RTC_GSI && line_status) {
 		/*
@@ -422,6 +424,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
 			struct kvm_ioapic *ioapic, int vector, int trigger_mode)
 {
 	int i;
+	struct kvm_lapic *apic = vcpu->arch.apic;
 
 	for (i = 0; i < IOAPIC_NUM_PINS; i++) {
 		union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
@@ -443,7 +446,8 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
 		kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
 		spin_lock(&ioapic->lock);
 
-		if (trigger_mode != IOAPIC_LEVEL_TRIG)
+		if (trigger_mode != IOAPIC_LEVEL_TRIG ||
+		    kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)
 			continue;
 
 		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
@@ -471,13 +475,6 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
 	}
 }
 
-bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
-{
-	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-	smp_rmb();
-	return test_bit(vector, ioapic->handled_vectors);
-}
-
 void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode)
 {
 	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
@@ -498,8 +495,8 @@ static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr)
 		 (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
 }
 
-static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
-			    void *val)
+static int ioapic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
+				gpa_t addr, int len, void *val)
 {
 	struct kvm_ioapic *ioapic = to_ioapic(this);
 	u32 result;
@@ -541,8 +538,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 	return 0;
 }
 
-static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
-			     const void *val)
+static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
+				 gpa_t addr, int len, const void *val)
 {
 	struct kvm_ioapic *ioapic = to_ioapic(this);
 	u32 data;
@@ -597,6 +594,7 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
 	ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
 	ioapic->ioregsel = 0;
 	ioapic->irr = 0;
+	ioapic->irr_delivered = 0;
 	ioapic->id = 0;
 	memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
 	rtc_irq_eoi_tracking_reset(ioapic);
@@ -654,6 +652,7 @@ int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
 
 	spin_lock(&ioapic->lock);
 	memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
+	state->irr &= ~ioapic->irr_delivered;
 	spin_unlock(&ioapic->lock);
 	return 0;
 }
@@ -667,6 +666,7 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
 	spin_lock(&ioapic->lock);
 	memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
 	ioapic->irr = 0;
+	ioapic->irr_delivered = 0;
 	update_handled_vectors(ioapic);
 	kvm_vcpu_request_scan_ioapic(kvm);
 	kvm_ioapic_inject_all(ioapic, state->irr);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index c2e36d934af4..ca0b0b4e6256 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -3,7 +3,7 @@
 
 #include <linux/kvm_host.h>
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 struct kvm;
 struct kvm_vcpu;
@@ -77,6 +77,7 @@ struct kvm_ioapic {
 	struct rtc_status rtc_status;
 	struct delayed_work eoi_inject;
 	u32 irq_eoi[IOAPIC_NUM_PINS];
+	u32 irr_delivered;
 };
 
 #ifdef DEBUG
@@ -97,13 +98,19 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
 	return kvm->arch.vioapic;
 }
 
+static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
+{
+	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+	smp_rmb();
+	return test_bit(vector, ioapic->handled_vectors);
+}
+
 void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 		int short_hand, unsigned int dest, int dest_mode);
 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
 void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
 			int trigger_mode);
-bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector);
 int kvm_ioapic_init(struct kvm *kvm);
 void kvm_ioapic_destroy(struct kvm *kvm);
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 2d03568e9498..ad68c73008c5 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -27,7 +27,7 @@
 #include <linux/kvm_host.h>
 #include <linux/spinlock.h>
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 #include "ioapic.h"
 #include "lapic.h"
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index bd4e34de24c7..d67206a7b99a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -133,6 +133,28 @@ static inline int kvm_apic_id(struct kvm_lapic *apic)
 	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
 }
 
+/* The logical map is definitely wrong if we have multiple
+ * modes at the same time.  (Physical map is always right.)
+ */
+static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map)
+{
+	return !(map->mode & (map->mode - 1));
+}
+
+static inline void
+apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid)
+{
+	unsigned lid_bits;
+
+	BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_CLUSTER !=  4);
+	BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_FLAT    !=  8);
+	BUILD_BUG_ON(KVM_APIC_MODE_X2APIC        != 16);
+	lid_bits = map->mode;
+
+	*cid = dest_id >> lid_bits;
+	*lid = dest_id & ((1 << lid_bits) - 1);
+}
+
 static void recalculate_apic_map(struct kvm *kvm)
 {
 	struct kvm_apic_map *new, *old = NULL;
@@ -146,48 +168,6 @@ static void recalculate_apic_map(struct kvm *kvm)
 	if (!new)
 		goto out;
 
-	new->ldr_bits = 8;
-	/* flat mode is default */
-	new->cid_shift = 8;
-	new->cid_mask = 0;
-	new->lid_mask = 0xff;
-	new->broadcast = APIC_BROADCAST;
-
-	kvm_for_each_vcpu(i, vcpu, kvm) {
-		struct kvm_lapic *apic = vcpu->arch.apic;
-
-		if (!kvm_apic_present(vcpu))
-			continue;
-
-		if (apic_x2apic_mode(apic)) {
-			new->ldr_bits = 32;
-			new->cid_shift = 16;
-			new->cid_mask = new->lid_mask = 0xffff;
-			new->broadcast = X2APIC_BROADCAST;
-		} else if (kvm_apic_get_reg(apic, APIC_LDR)) {
-			if (kvm_apic_get_reg(apic, APIC_DFR) ==
-							APIC_DFR_CLUSTER) {
-				new->cid_shift = 4;
-				new->cid_mask = 0xf;
-				new->lid_mask = 0xf;
-			} else {
-				new->cid_shift = 8;
-				new->cid_mask = 0;
-				new->lid_mask = 0xff;
-			}
-		}
-
-		/*
-		 * All APICs have to be configured in the same mode by an OS.
-		 * We take advatage of this while building logical id loockup
-		 * table. After reset APICs are in software disabled mode, so if
-		 * we find apic with different setting we assume this is the mode
-		 * OS wants all apics to be in; build lookup table accordingly.
-		 */
-		if (kvm_apic_sw_enabled(apic))
-			break;
-	}
-
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		struct kvm_lapic *apic = vcpu->arch.apic;
 		u16 cid, lid;
@@ -198,11 +178,25 @@ static void recalculate_apic_map(struct kvm *kvm)
 
 		aid = kvm_apic_id(apic);
 		ldr = kvm_apic_get_reg(apic, APIC_LDR);
-		cid = apic_cluster_id(new, ldr);
-		lid = apic_logical_id(new, ldr);
 
 		if (aid < ARRAY_SIZE(new->phys_map))
 			new->phys_map[aid] = apic;
+
+		if (apic_x2apic_mode(apic)) {
+			new->mode |= KVM_APIC_MODE_X2APIC;
+		} else if (ldr) {
+			ldr = GET_APIC_LOGICAL_ID(ldr);
+			if (kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
+				new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
+			else
+				new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
+		}
+
+		if (!kvm_apic_logical_map_valid(new))
+			continue;
+
+		apic_logical_id(new, ldr, &cid, &lid);
+
 		if (lid && cid < ARRAY_SIZE(new->logical_map))
 			new->logical_map[cid][ffs(lid) - 1] = apic;
 	}
@@ -588,15 +582,23 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
 	apic_update_ppr(apic);
 }
 
-static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest)
+static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
 {
-	return dest == (apic_x2apic_mode(apic) ?
-			X2APIC_BROADCAST : APIC_BROADCAST);
+	if (apic_x2apic_mode(apic))
+		return mda == X2APIC_BROADCAST;
+
+	return GET_APIC_DEST_FIELD(mda) == APIC_BROADCAST;
 }
 
-static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest)
+static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
 {
-	return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest);
+	if (kvm_apic_broadcast(apic, mda))
+		return true;
+
+	if (apic_x2apic_mode(apic))
+		return mda == kvm_apic_id(apic);
+
+	return mda == SET_APIC_DEST_FIELD(kvm_apic_id(apic));
 }
 
 static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
@@ -613,6 +615,7 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
 		       && (logical_id & mda & 0xffff) != 0;
 
 	logical_id = GET_APIC_LOGICAL_ID(logical_id);
+	mda = GET_APIC_DEST_FIELD(mda);
 
 	switch (kvm_apic_get_reg(apic, APIC_DFR)) {
 	case APIC_DFR_FLAT:
@@ -627,10 +630,27 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
 	}
 }
 
+/* KVM APIC implementation has two quirks
+ *  - dest always begins at 0 while xAPIC MDA has offset 24,
+ *  - IOxAPIC messages have to be delivered (directly) to x2APIC.
+ */
+static u32 kvm_apic_mda(unsigned int dest_id, struct kvm_lapic *source,
+                                              struct kvm_lapic *target)
+{
+	bool ipi = source != NULL;
+	bool x2apic_mda = apic_x2apic_mode(ipi ? source : target);
+
+	if (!ipi && dest_id == APIC_BROADCAST && x2apic_mda)
+		return X2APIC_BROADCAST;
+
+	return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id);
+}
+
 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 			   int short_hand, unsigned int dest, int dest_mode)
 {
 	struct kvm_lapic *target = vcpu->arch.apic;
+	u32 mda = kvm_apic_mda(dest, source, target);
 
 	apic_debug("target %p, source %p, dest 0x%x, "
 		   "dest_mode 0x%x, short_hand 0x%x\n",
@@ -640,9 +660,9 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 	switch (short_hand) {
 	case APIC_DEST_NOSHORT:
 		if (dest_mode == APIC_DEST_PHYSICAL)
-			return kvm_apic_match_physical_addr(target, dest);
+			return kvm_apic_match_physical_addr(target, mda);
 		else
-			return kvm_apic_match_logical_addr(target, dest);
+			return kvm_apic_match_logical_addr(target, mda);
 	case APIC_DEST_SELF:
 		return target == source;
 	case APIC_DEST_ALLINC:
@@ -664,6 +684,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 	struct kvm_lapic **dst;
 	int i;
 	bool ret = false;
+	bool x2apic_ipi = src && apic_x2apic_mode(src);
 
 	*r = -1;
 
@@ -675,15 +696,15 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 	if (irq->shorthand)
 		return false;
 
+	if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST))
+		return false;
+
 	rcu_read_lock();
 	map = rcu_dereference(kvm->arch.apic_map);
 
 	if (!map)
 		goto out;
 
-	if (irq->dest_id == map->broadcast)
-		goto out;
-
 	ret = true;
 
 	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
@@ -692,16 +713,20 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 
 		dst = &map->phys_map[irq->dest_id];
 	} else {
-		u32 mda = irq->dest_id << (32 - map->ldr_bits);
-		u16 cid = apic_cluster_id(map, mda);
+		u16 cid;
+
+		if (!kvm_apic_logical_map_valid(map)) {
+			ret = false;
+			goto out;
+		}
+
+		apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
 
 		if (cid >= ARRAY_SIZE(map->logical_map))
 			goto out;
 
 		dst = map->logical_map[cid];
 
-		bitmap = apic_logical_id(map, mda);
-
 		if (irq->delivery_mode == APIC_DM_LOWEST) {
 			int l = -1;
 			for_each_set_bit(i, &bitmap, 16) {
@@ -833,8 +858,7 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
 
 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
 {
-	if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
-	    kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
+	if (kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
 		int trigger_mode;
 		if (apic_test_vector(vector, apic->regs + APIC_TMR))
 			trigger_mode = IOAPIC_LEVEL_TRIG;
@@ -1038,7 +1062,7 @@ static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
 	    addr < apic->base_address + LAPIC_MMIO_LENGTH;
 }
 
-static int apic_mmio_read(struct kvm_io_device *this,
+static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
 			   gpa_t address, int len, void *data)
 {
 	struct kvm_lapic *apic = to_lapic(this);
@@ -1358,7 +1382,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 	return ret;
 }
 
-static int apic_mmio_write(struct kvm_io_device *this,
+static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
 			    gpa_t address, int len, const void *data)
 {
 	struct kvm_lapic *apic = to_lapic(this);
@@ -1498,8 +1522,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 		return;
 	}
 
-	if (!kvm_vcpu_is_bsp(apic->vcpu))
-		value &= ~MSR_IA32_APICBASE_BSP;
 	vcpu->arch.apic_base = value;
 
 	/* update jump label if enable bit changes */
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 0bc6c656625b..9d28383fc1e7 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -1,7 +1,7 @@
 #ifndef __KVM_X86_LAPIC_H
 #define __KVM_X86_LAPIC_H
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 #include <linux/kvm_host.h>
 
@@ -148,21 +148,6 @@ static inline bool kvm_apic_vid_enabled(struct kvm *kvm)
 	return kvm_x86_ops->vm_has_apicv(kvm);
 }
 
-static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
-{
-	u16 cid;
-	ldr >>= 32 - map->ldr_bits;
-	cid = (ldr >> map->cid_shift) & map->cid_mask;
-
-	return cid;
-}
-
-static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
-{
-	ldr >>= (32 - map->ldr_bits);
-	return ldr & map->lid_mask;
-}
-
 static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.apic->pending_events;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index cee759299a35..146f295ee322 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4465,6 +4465,79 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
 		kvm_flush_remote_tlbs(kvm);
 }
 
+static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
+		unsigned long *rmapp)
+{
+	u64 *sptep;
+	struct rmap_iterator iter;
+	int need_tlb_flush = 0;
+	pfn_t pfn;
+	struct kvm_mmu_page *sp;
+
+	for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
+		BUG_ON(!(*sptep & PT_PRESENT_MASK));
+
+		sp = page_header(__pa(sptep));
+		pfn = spte_to_pfn(*sptep);
+
+		/*
+		 * Only EPT supported for now; otherwise, one would need to
+		 * find out efficiently whether the guest page tables are
+		 * also using huge pages.
+		 */
+		if (sp->role.direct &&
+			!kvm_is_reserved_pfn(pfn) &&
+			PageTransCompound(pfn_to_page(pfn))) {
+			drop_spte(kvm, sptep);
+			sptep = rmap_get_first(*rmapp, &iter);
+			need_tlb_flush = 1;
+		} else
+			sptep = rmap_get_next(&iter);
+	}
+
+	return need_tlb_flush;
+}
+
+void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
+			struct kvm_memory_slot *memslot)
+{
+	bool flush = false;
+	unsigned long *rmapp;
+	unsigned long last_index, index;
+	gfn_t gfn_start, gfn_end;
+
+	spin_lock(&kvm->mmu_lock);
+
+	gfn_start = memslot->base_gfn;
+	gfn_end = memslot->base_gfn + memslot->npages - 1;
+
+	if (gfn_start >= gfn_end)
+		goto out;
+
+	rmapp = memslot->arch.rmap[0];
+	last_index = gfn_to_index(gfn_end, memslot->base_gfn,
+					PT_PAGE_TABLE_LEVEL);
+
+	for (index = 0; index <= last_index; ++index, ++rmapp) {
+		if (*rmapp)
+			flush |= kvm_mmu_zap_collapsible_spte(kvm, rmapp);
+
+		if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+			if (flush) {
+				kvm_flush_remote_tlbs(kvm);
+				flush = false;
+			}
+			cond_resched_lock(&kvm->mmu_lock);
+		}
+	}
+
+	if (flush)
+		kvm_flush_remote_tlbs(kvm);
+
+out:
+	spin_unlock(&kvm->mmu_lock);
+}
+
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot)
 {
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 8e6b7d869d2f..29fbf9dfdc54 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -38,7 +38,7 @@ static struct kvm_arch_event_perf_mapping {
 };
 
 /* mapping between fixed pmc index and arch_events array */
-int fixed_pmc_events[] = {1, 0, 7};
+static int fixed_pmc_events[] = {1, 0, 7};
 
 static bool pmc_is_gp(struct kvm_pmc *pmc)
 {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index cc618c882f90..ce741b8650f6 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1261,7 +1261,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 
 	svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
 				   MSR_IA32_APICBASE_ENABLE;
-	if (kvm_vcpu_is_bsp(&svm->vcpu))
+	if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
 		svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
 
 	svm_init_osvw(&svm->vcpu);
@@ -1929,14 +1929,12 @@ static int nop_on_interception(struct vcpu_svm *svm)
 static int halt_interception(struct vcpu_svm *svm)
 {
 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
-	skip_emulated_instruction(&svm->vcpu);
 	return kvm_emulate_halt(&svm->vcpu);
 }
 
 static int vmmcall_interception(struct vcpu_svm *svm)
 {
 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-	skip_emulated_instruction(&svm->vcpu);
 	kvm_emulate_hypercall(&svm->vcpu);
 	return 1;
 }
@@ -2757,11 +2755,11 @@ static int invlpga_interception(struct vcpu_svm *svm)
 {
 	struct kvm_vcpu *vcpu = &svm->vcpu;
 
-	trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
-			  vcpu->arch.regs[VCPU_REGS_RAX]);
+	trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX),
+			  kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
 
 	/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
-	kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
+	kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
 
 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
 	skip_emulated_instruction(&svm->vcpu);
@@ -2770,12 +2768,18 @@ static int invlpga_interception(struct vcpu_svm *svm)
 
 static int skinit_interception(struct vcpu_svm *svm)
 {
-	trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
+	trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
 
 	kvm_queue_exception(&svm->vcpu, UD_VECTOR);
 	return 1;
 }
 
+static int wbinvd_interception(struct vcpu_svm *svm)
+{
+	kvm_emulate_wbinvd(&svm->vcpu);
+	return 1;
+}
+
 static int xsetbv_interception(struct vcpu_svm *svm)
 {
 	u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
@@ -2902,7 +2906,8 @@ static int rdpmc_interception(struct vcpu_svm *svm)
 	return 1;
 }
 
-bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val)
+static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
+					    unsigned long val)
 {
 	unsigned long cr0 = svm->vcpu.arch.cr0;
 	bool ret = false;
@@ -2940,7 +2945,10 @@ static int cr_interception(struct vcpu_svm *svm)
 		return emulate_on_interception(svm);
 
 	reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
-	cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
+	if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE)
+		cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0;
+	else
+		cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
 
 	err = 0;
 	if (cr >= 16) { /* mov to cr */
@@ -3133,7 +3141,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 
 static int rdmsr_interception(struct vcpu_svm *svm)
 {
-	u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
+	u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
 	u64 data;
 
 	if (svm_get_msr(&svm->vcpu, ecx, &data)) {
@@ -3142,8 +3150,8 @@ static int rdmsr_interception(struct vcpu_svm *svm)
 	} else {
 		trace_kvm_msr_read(ecx, data);
 
-		svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
-		svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
+		kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, data & 0xffffffff);
+		kvm_register_write(&svm->vcpu, VCPU_REGS_RDX, data >> 32);
 		svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
 		skip_emulated_instruction(&svm->vcpu);
 	}
@@ -3246,9 +3254,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 static int wrmsr_interception(struct vcpu_svm *svm)
 {
 	struct msr_data msr;
-	u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
-	u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
-		| ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
+	u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
+	u64 data = kvm_read_edx_eax(&svm->vcpu);
 
 	msr.data = data;
 	msr.index = ecx;
@@ -3325,7 +3332,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
 	[SVM_EXIT_READ_CR3]			= cr_interception,
 	[SVM_EXIT_READ_CR4]			= cr_interception,
 	[SVM_EXIT_READ_CR8]			= cr_interception,
-	[SVM_EXIT_CR0_SEL_WRITE]		= emulate_on_interception,
+	[SVM_EXIT_CR0_SEL_WRITE]		= cr_interception,
 	[SVM_EXIT_WRITE_CR0]			= cr_interception,
 	[SVM_EXIT_WRITE_CR3]			= cr_interception,
 	[SVM_EXIT_WRITE_CR4]			= cr_interception,
@@ -3376,7 +3383,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
 	[SVM_EXIT_STGI]				= stgi_interception,
 	[SVM_EXIT_CLGI]				= clgi_interception,
 	[SVM_EXIT_SKINIT]			= skinit_interception,
-	[SVM_EXIT_WBINVD]                       = emulate_on_interception,
+	[SVM_EXIT_WBINVD]                       = wbinvd_interception,
 	[SVM_EXIT_MONITOR]			= monitor_interception,
 	[SVM_EXIT_MWAIT]			= mwait_interception,
 	[SVM_EXIT_XSETBV]			= xsetbv_interception,
@@ -3555,7 +3562,7 @@ static int handle_exit(struct kvm_vcpu *vcpu)
 
 	if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
 	    || !svm_exit_handlers[exit_code]) {
-		WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code);
+		WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code);
 		kvm_queue_exception(vcpu, UD_VECTOR);
 		return 1;
 	}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f7b20b417a3a..f5e8dce8046c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2168,7 +2168,10 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
 {
 	unsigned long *msr_bitmap;
 
-	if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) {
+	if (is_guest_mode(vcpu))
+		msr_bitmap = vmx_msr_bitmap_nested;
+	else if (irqchip_in_kernel(vcpu->kvm) &&
+		apic_x2apic_mode(vcpu->arch.apic)) {
 		if (is_long_mode(vcpu))
 			msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
 		else
@@ -2467,6 +2470,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 	vmx->nested.nested_vmx_secondary_ctls_low = 0;
 	vmx->nested.nested_vmx_secondary_ctls_high &=
 		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+		SECONDARY_EXEC_RDTSCP |
 		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
 		SECONDARY_EXEC_APIC_REGISTER_VIRT |
 		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
@@ -2476,8 +2480,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 	if (enable_ept) {
 		/* nested EPT: emulate EPT also to L1 */
 		vmx->nested.nested_vmx_secondary_ctls_high |=
-			SECONDARY_EXEC_ENABLE_EPT |
-			SECONDARY_EXEC_UNRESTRICTED_GUEST;
+			SECONDARY_EXEC_ENABLE_EPT;
 		vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
 			 VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
 			 VMX_EPT_INVEPT_BIT;
@@ -2491,6 +2494,10 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 	} else
 		vmx->nested.nested_vmx_ept_caps = 0;
 
+	if (enable_unrestricted_guest)
+		vmx->nested.nested_vmx_secondary_ctls_high |=
+			SECONDARY_EXEC_UNRESTRICTED_GUEST;
+
 	/* miscellaneous data */
 	rdmsr(MSR_IA32_VMX_MISC,
 		vmx->nested.nested_vmx_misc_low,
@@ -3262,8 +3269,8 @@ static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
 		 * default value.
 		 */
 		if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
-			save->selector &= ~SELECTOR_RPL_MASK;
-		save->dpl = save->selector & SELECTOR_RPL_MASK;
+			save->selector &= ~SEGMENT_RPL_MASK;
+		save->dpl = save->selector & SEGMENT_RPL_MASK;
 		save->s = 1;
 	}
 	vmx_set_segment(vcpu, save, seg);
@@ -3836,7 +3843,7 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu)
 	unsigned int cs_rpl;
 
 	vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
-	cs_rpl = cs.selector & SELECTOR_RPL_MASK;
+	cs_rpl = cs.selector & SEGMENT_RPL_MASK;
 
 	if (cs.unusable)
 		return false;
@@ -3864,7 +3871,7 @@ static bool stack_segment_valid(struct kvm_vcpu *vcpu)
 	unsigned int ss_rpl;
 
 	vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
-	ss_rpl = ss.selector & SELECTOR_RPL_MASK;
+	ss_rpl = ss.selector & SEGMENT_RPL_MASK;
 
 	if (ss.unusable)
 		return true;
@@ -3886,7 +3893,7 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
 	unsigned int rpl;
 
 	vmx_get_segment(vcpu, &var, seg);
-	rpl = var.selector & SELECTOR_RPL_MASK;
+	rpl = var.selector & SEGMENT_RPL_MASK;
 
 	if (var.unusable)
 		return true;
@@ -3913,7 +3920,7 @@ static bool tr_valid(struct kvm_vcpu *vcpu)
 
 	if (tr.unusable)
 		return false;
-	if (tr.selector & SELECTOR_TI_MASK)	/* TI = 1 */
+	if (tr.selector & SEGMENT_TI_MASK)	/* TI = 1 */
 		return false;
 	if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */
 		return false;
@@ -3931,7 +3938,7 @@ static bool ldtr_valid(struct kvm_vcpu *vcpu)
 
 	if (ldtr.unusable)
 		return true;
-	if (ldtr.selector & SELECTOR_TI_MASK)	/* TI = 1 */
+	if (ldtr.selector & SEGMENT_TI_MASK)	/* TI = 1 */
 		return false;
 	if (ldtr.type != 2)
 		return false;
@@ -3948,8 +3955,8 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
 	vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
 	vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
 
-	return ((cs.selector & SELECTOR_RPL_MASK) ==
-		 (ss.selector & SELECTOR_RPL_MASK));
+	return ((cs.selector & SEGMENT_RPL_MASK) ==
+		 (ss.selector & SEGMENT_RPL_MASK));
 }
 
 /*
@@ -4705,7 +4712,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
 	kvm_set_cr8(&vmx->vcpu, 0);
 	apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
-	if (kvm_vcpu_is_bsp(&vmx->vcpu))
+	if (kvm_vcpu_is_reset_bsp(&vmx->vcpu))
 		apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
 	apic_base_msr.host_initiated = true;
 	kvm_set_apic_base(&vmx->vcpu, &apic_base_msr);
@@ -5000,7 +5007,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
 		if (emulate_instruction(vcpu, 0) == EMULATE_DONE) {
 			if (vcpu->arch.halt_request) {
 				vcpu->arch.halt_request = 0;
-				return kvm_emulate_halt(vcpu);
+				return kvm_vcpu_halt(vcpu);
 			}
 			return 1;
 		}
@@ -5065,6 +5072,10 @@ static int handle_exception(struct kvm_vcpu *vcpu)
 	}
 
 	if (is_invalid_opcode(intr_info)) {
+		if (is_guest_mode(vcpu)) {
+			kvm_queue_exception(vcpu, UD_VECTOR);
+			return 1;
+		}
 		er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
 		if (er != EMULATE_DONE)
 			kvm_queue_exception(vcpu, UD_VECTOR);
@@ -5084,9 +5095,10 @@ static int handle_exception(struct kvm_vcpu *vcpu)
 	    !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
 		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
-		vcpu->run->internal.ndata = 2;
+		vcpu->run->internal.ndata = 3;
 		vcpu->run->internal.data[0] = vect_info;
 		vcpu->run->internal.data[1] = intr_info;
+		vcpu->run->internal.data[2] = error_code;
 		return 0;
 	}
 
@@ -5527,13 +5539,11 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu)
 
 static int handle_halt(struct kvm_vcpu *vcpu)
 {
-	skip_emulated_instruction(vcpu);
 	return kvm_emulate_halt(vcpu);
 }
 
 static int handle_vmcall(struct kvm_vcpu *vcpu)
 {
-	skip_emulated_instruction(vcpu);
 	kvm_emulate_hypercall(vcpu);
 	return 1;
 }
@@ -5564,7 +5574,6 @@ static int handle_rdpmc(struct kvm_vcpu *vcpu)
 
 static int handle_wbinvd(struct kvm_vcpu *vcpu)
 {
-	skip_emulated_instruction(vcpu);
 	kvm_emulate_wbinvd(vcpu);
 	return 1;
 }
@@ -5822,7 +5831,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 	gpa_t gpa;
 
 	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
-	if (!kvm_io_bus_write(vcpu->kvm, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
+	if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
 		skip_emulated_instruction(vcpu);
 		return 1;
 	}
@@ -5903,7 +5912,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
 
 		if (vcpu->arch.halt_request) {
 			vcpu->arch.halt_request = 0;
-			ret = kvm_emulate_halt(vcpu);
+			ret = kvm_vcpu_halt(vcpu);
 			goto out;
 		}
 
@@ -7312,21 +7321,21 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
 		else if (port < 0x10000)
 			bitmap = vmcs12->io_bitmap_b;
 		else
-			return 1;
+			return true;
 		bitmap += (port & 0x7fff) / 8;
 
 		if (last_bitmap != bitmap)
 			if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1))
-				return 1;
+				return true;
 		if (b & (1 << (port & 7)))
-			return 1;
+			return true;
 
 		port++;
 		size--;
 		last_bitmap = bitmap;
 	}
 
-	return 0;
+	return false;
 }
 
 /*
@@ -7342,7 +7351,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
 	gpa_t bitmap;
 
 	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
-		return 1;
+		return true;
 
 	/*
 	 * The MSR_BITMAP page is divided into four 1024-byte bitmaps,
@@ -7361,10 +7370,10 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
 	if (msr_index < 1024*8) {
 		unsigned char b;
 		if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1))
-			return 1;
+			return true;
 		return 1 & (b >> (msr_index & 7));
 	} else
-		return 1; /* let L1 handle the wrong parameter */
+		return true; /* let L1 handle the wrong parameter */
 }
 
 /*
@@ -7386,7 +7395,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
 		case 0:
 			if (vmcs12->cr0_guest_host_mask &
 			    (val ^ vmcs12->cr0_read_shadow))
-				return 1;
+				return true;
 			break;
 		case 3:
 			if ((vmcs12->cr3_target_count >= 1 &&
@@ -7397,37 +7406,37 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
 					vmcs12->cr3_target_value2 == val) ||
 				(vmcs12->cr3_target_count >= 4 &&
 					vmcs12->cr3_target_value3 == val))
-				return 0;
+				return false;
 			if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
-				return 1;
+				return true;
 			break;
 		case 4:
 			if (vmcs12->cr4_guest_host_mask &
 			    (vmcs12->cr4_read_shadow ^ val))
-				return 1;
+				return true;
 			break;
 		case 8:
 			if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
-				return 1;
+				return true;
 			break;
 		}
 		break;
 	case 2: /* clts */
 		if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) &&
 		    (vmcs12->cr0_read_shadow & X86_CR0_TS))
-			return 1;
+			return true;
 		break;
 	case 1: /* mov from cr */
 		switch (cr) {
 		case 3:
 			if (vmcs12->cpu_based_vm_exec_control &
 			    CPU_BASED_CR3_STORE_EXITING)
-				return 1;
+				return true;
 			break;
 		case 8:
 			if (vmcs12->cpu_based_vm_exec_control &
 			    CPU_BASED_CR8_STORE_EXITING)
-				return 1;
+				return true;
 			break;
 		}
 		break;
@@ -7438,14 +7447,14 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
 		 */
 		if (vmcs12->cr0_guest_host_mask & 0xe &
 		    (val ^ vmcs12->cr0_read_shadow))
-			return 1;
+			return true;
 		if ((vmcs12->cr0_guest_host_mask & 0x1) &&
 		    !(vmcs12->cr0_read_shadow & 0x1) &&
 		    (val & 0x1))
-			return 1;
+			return true;
 		break;
 	}
-	return 0;
+	return false;
 }
 
 /*
@@ -7468,48 +7477,48 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 				KVM_ISA_VMX);
 
 	if (vmx->nested.nested_run_pending)
-		return 0;
+		return false;
 
 	if (unlikely(vmx->fail)) {
 		pr_info_ratelimited("%s failed vm entry %x\n", __func__,
 				    vmcs_read32(VM_INSTRUCTION_ERROR));
-		return 1;
+		return true;
 	}
 
 	switch (exit_reason) {
 	case EXIT_REASON_EXCEPTION_NMI:
 		if (!is_exception(intr_info))
-			return 0;
+			return false;
 		else if (is_page_fault(intr_info))
 			return enable_ept;
 		else if (is_no_device(intr_info) &&
 			 !(vmcs12->guest_cr0 & X86_CR0_TS))
-			return 0;
+			return false;
 		return vmcs12->exception_bitmap &
 				(1u << (intr_info & INTR_INFO_VECTOR_MASK));
 	case EXIT_REASON_EXTERNAL_INTERRUPT:
-		return 0;
+		return false;
 	case EXIT_REASON_TRIPLE_FAULT:
-		return 1;
+		return true;
 	case EXIT_REASON_PENDING_INTERRUPT:
 		return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING);
 	case EXIT_REASON_NMI_WINDOW:
 		return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING);
 	case EXIT_REASON_TASK_SWITCH:
-		return 1;
+		return true;
 	case EXIT_REASON_CPUID:
 		if (kvm_register_read(vcpu, VCPU_REGS_RAX) == 0xa)
-			return 0;
-		return 1;
+			return false;
+		return true;
 	case EXIT_REASON_HLT:
 		return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
 	case EXIT_REASON_INVD:
-		return 1;
+		return true;
 	case EXIT_REASON_INVLPG:
 		return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
 	case EXIT_REASON_RDPMC:
 		return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
-	case EXIT_REASON_RDTSC:
+	case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
 		return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
 	case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
 	case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
@@ -7521,7 +7530,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 		 * VMX instructions trap unconditionally. This allows L1 to
 		 * emulate them for its L2 guest, i.e., allows 3-level nesting!
 		 */
-		return 1;
+		return true;
 	case EXIT_REASON_CR_ACCESS:
 		return nested_vmx_exit_handled_cr(vcpu, vmcs12);
 	case EXIT_REASON_DR_ACCESS:
@@ -7532,7 +7541,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 	case EXIT_REASON_MSR_WRITE:
 		return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
 	case EXIT_REASON_INVALID_STATE:
-		return 1;
+		return true;
 	case EXIT_REASON_MWAIT_INSTRUCTION:
 		return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
 	case EXIT_REASON_MONITOR_INSTRUCTION:
@@ -7542,7 +7551,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 			nested_cpu_has2(vmcs12,
 				SECONDARY_EXEC_PAUSE_LOOP_EXITING);
 	case EXIT_REASON_MCE_DURING_VMENTRY:
-		return 0;
+		return false;
 	case EXIT_REASON_TPR_BELOW_THRESHOLD:
 		return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
 	case EXIT_REASON_APIC_ACCESS:
@@ -7551,7 +7560,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 	case EXIT_REASON_APIC_WRITE:
 	case EXIT_REASON_EOI_INDUCED:
 		/* apic_write and eoi_induced should exit unconditionally. */
-		return 1;
+		return true;
 	case EXIT_REASON_EPT_VIOLATION:
 		/*
 		 * L0 always deals with the EPT violation. If nested EPT is
@@ -7559,7 +7568,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 		 * missing in the guest EPT table (EPT12), the EPT violation
 		 * will be injected with nested_ept_inject_page_fault()
 		 */
-		return 0;
+		return false;
 	case EXIT_REASON_EPT_MISCONFIG:
 		/*
 		 * L2 never uses directly L1's EPT, but rather L0's own EPT
@@ -7567,11 +7576,11 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 		 * (EPT on EPT). So any problems with the structure of the
 		 * table is L0's fault.
 		 */
-		return 0;
+		return false;
 	case EXIT_REASON_WBINVD:
 		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
 	case EXIT_REASON_XSETBV:
-		return 1;
+		return true;
 	case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
 		/*
 		 * This should never happen, since it is not possible to
@@ -7581,7 +7590,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 		 */
 		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
 	default:
-		return 1;
+		return true;
 	}
 }
 
@@ -8516,6 +8525,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 						exec_control);
 			}
 		}
+		if (nested && !vmx->rdtscp_enabled)
+			vmx->nested.nested_vmx_secondary_ctls_high &=
+				~SECONDARY_EXEC_RDTSCP;
 	}
 
 	/* Exposing INVPCID only when PCID is exposed */
@@ -8616,10 +8628,11 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
 					struct vmcs12 *vmcs12)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
 
 	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
-		/* TODO: Also verify bits beyond physical address width are 0 */
-		if (!PAGE_ALIGNED(vmcs12->apic_access_addr))
+		if (!PAGE_ALIGNED(vmcs12->apic_access_addr) ||
+		    vmcs12->apic_access_addr >> maxphyaddr)
 			return false;
 
 		/*
@@ -8635,8 +8648,8 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
 	}
 
 	if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
-		/* TODO: Also verify bits beyond physical address width are 0 */
-		if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr))
+		if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr) ||
+		    vmcs12->virtual_apic_page_addr >> maxphyaddr)
 			return false;
 
 		if (vmx->nested.virtual_apic_page) /* shouldn't happen */
@@ -8659,7 +8672,8 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
 	}
 
 	if (nested_cpu_has_posted_intr(vmcs12)) {
-		if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64))
+		if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64) ||
+		    vmcs12->posted_intr_desc_addr >> maxphyaddr)
 			return false;
 
 		if (vmx->nested.pi_desc_page) { /* shouldn't happen */
@@ -8858,9 +8872,9 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
 
 static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
 				       unsigned long count_field,
-				       unsigned long addr_field,
-				       int maxphyaddr)
+				       unsigned long addr_field)
 {
+	int maxphyaddr;
 	u64 count, addr;
 
 	if (vmcs12_read_any(vcpu, count_field, &count) ||
@@ -8870,6 +8884,7 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
 	}
 	if (count == 0)
 		return 0;
+	maxphyaddr = cpuid_maxphyaddr(vcpu);
 	if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr ||
 	    (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) {
 		pr_warn_ratelimited(
@@ -8883,19 +8898,16 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
 static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
 						struct vmcs12 *vmcs12)
 {
-	int maxphyaddr;
-
 	if (vmcs12->vm_exit_msr_load_count == 0 &&
 	    vmcs12->vm_exit_msr_store_count == 0 &&
 	    vmcs12->vm_entry_msr_load_count == 0)
 		return 0; /* Fast path */
-	maxphyaddr = cpuid_maxphyaddr(vcpu);
 	if (nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_LOAD_COUNT,
-					VM_EXIT_MSR_LOAD_ADDR, maxphyaddr) ||
+					VM_EXIT_MSR_LOAD_ADDR) ||
 	    nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_STORE_COUNT,
-					VM_EXIT_MSR_STORE_ADDR, maxphyaddr) ||
+					VM_EXIT_MSR_STORE_ADDR) ||
 	    nested_vmx_check_msr_switch(vcpu, VM_ENTRY_MSR_LOAD_COUNT,
-					VM_ENTRY_MSR_LOAD_ADDR, maxphyaddr))
+					VM_ENTRY_MSR_LOAD_ADDR))
 		return -EINVAL;
 	return 0;
 }
@@ -9145,8 +9157,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 			exec_control &= ~SECONDARY_EXEC_RDTSCP;
 		/* Take the following fields only from vmcs12 */
 		exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+				  SECONDARY_EXEC_RDTSCP |
 				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
-                                  SECONDARY_EXEC_APIC_REGISTER_VIRT);
+				  SECONDARY_EXEC_APIC_REGISTER_VIRT);
 		if (nested_cpu_has(vmcs12,
 				CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
 			exec_control |= vmcs12->secondary_vm_exec_control;
@@ -9218,9 +9231,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	}
 
 	if (cpu_has_vmx_msr_bitmap() &&
-	    exec_control & CPU_BASED_USE_MSR_BITMAPS &&
-	    nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) {
-		vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_nested));
+	    exec_control & CPU_BASED_USE_MSR_BITMAPS) {
+		nested_vmx_merge_msr_bitmap(vcpu, vmcs12);
+		/* MSR_BITMAP will be set by following vmx_set_efer. */
 	} else
 		exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
 
@@ -9379,7 +9392,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	}
 
 	if (!nested_get_vmcs12_pages(vcpu, vmcs12)) {
-		/*TODO: Also verify bits beyond physical address width are 0*/
 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
 		return 1;
 	}
@@ -9518,7 +9530,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	vmcs12->launch_state = 1;
 
 	if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
-		return kvm_emulate_halt(vcpu);
+		return kvm_vcpu_halt(vcpu);
 
 	vmx->nested.nested_run_pending = 1;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bd7a70be41b3..e1a81267f3f6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -801,6 +801,17 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_get_cr8);
 
+static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
+{
+	int i;
+
+	if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
+		for (i = 0; i < KVM_NR_DB_REGS; i++)
+			vcpu->arch.eff_db[i] = vcpu->arch.db[i];
+		vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
+	}
+}
+
 static void kvm_update_dr6(struct kvm_vcpu *vcpu)
 {
 	if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
@@ -1070,19 +1081,19 @@ static void update_pvclock_gtod(struct timekeeper *tk)
 	struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
 	u64 boot_ns;
 
-	boot_ns = ktime_to_ns(ktime_add(tk->tkr.base_mono, tk->offs_boot));
+	boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
 
 	write_seqcount_begin(&vdata->seq);
 
 	/* copy pvclock gtod data */
-	vdata->clock.vclock_mode	= tk->tkr.clock->archdata.vclock_mode;
-	vdata->clock.cycle_last		= tk->tkr.cycle_last;
-	vdata->clock.mask		= tk->tkr.mask;
-	vdata->clock.mult		= tk->tkr.mult;
-	vdata->clock.shift		= tk->tkr.shift;
+	vdata->clock.vclock_mode	= tk->tkr_mono.clock->archdata.vclock_mode;
+	vdata->clock.cycle_last		= tk->tkr_mono.cycle_last;
+	vdata->clock.mask		= tk->tkr_mono.mask;
+	vdata->clock.mult		= tk->tkr_mono.mult;
+	vdata->clock.shift		= tk->tkr_mono.shift;
 
 	vdata->boot_ns			= boot_ns;
-	vdata->nsec_base		= tk->tkr.xtime_nsec;
+	vdata->nsec_base		= tk->tkr_mono.xtime_nsec;
 
 	write_seqcount_end(&vdata->seq);
 }
@@ -2744,7 +2755,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_USER_NMI:
 	case KVM_CAP_REINJECT_CONTROL:
 	case KVM_CAP_IRQ_INJECT_STATUS:
-	case KVM_CAP_IRQFD:
 	case KVM_CAP_IOEVENTFD:
 	case KVM_CAP_IOEVENTFD_NO_LENGTH:
 	case KVM_CAP_PIT2:
@@ -3150,6 +3160,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
 		return -EINVAL;
 
 	memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
+	kvm_update_dr0123(vcpu);
 	vcpu->arch.dr6 = dbgregs->dr6;
 	kvm_update_dr6(vcpu);
 	vcpu->arch.dr7 = dbgregs->dr7;
@@ -4115,8 +4126,8 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
 	do {
 		n = min(len, 8);
 		if (!(vcpu->arch.apic &&
-		      !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
-		    && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+		      !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
+		    && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
 			break;
 		handled += n;
 		addr += n;
@@ -4135,8 +4146,9 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
 	do {
 		n = min(len, 8);
 		if (!(vcpu->arch.apic &&
-		      !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
-		    && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+		      !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
+					 addr, n, v))
+		    && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
 			break;
 		trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
 		handled += n;
@@ -4476,7 +4488,8 @@ mmio:
 	return X86EMUL_CONTINUE;
 }
 
-int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
+static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
+			unsigned long addr,
 			void *val, unsigned int bytes,
 			struct x86_exception *exception,
 			const struct read_write_emulator_ops *ops)
@@ -4539,7 +4552,7 @@ static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
 				   exception, &read_emultor);
 }
 
-int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
+static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
 			    unsigned long addr,
 			    const void *val,
 			    unsigned int bytes,
@@ -4630,10 +4643,10 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
 	int r;
 
 	if (vcpu->arch.pio.in)
-		r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
+		r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
 				    vcpu->arch.pio.size, pd);
 	else
-		r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
+		r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
 				     vcpu->arch.pio.port, vcpu->arch.pio.size,
 				     pd);
 	return r;
@@ -4706,7 +4719,7 @@ static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
 	kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
 }
 
-int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
+int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
 {
 	if (!need_emulate_wbinvd(vcpu))
 		return X86EMUL_CONTINUE;
@@ -4723,19 +4736,29 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
 		wbinvd();
 	return X86EMUL_CONTINUE;
 }
+
+int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
+{
+	kvm_x86_ops->skip_emulated_instruction(vcpu);
+	return kvm_emulate_wbinvd_noskip(vcpu);
+}
 EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
 
+
+
 static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
 {
-	kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
+	kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
 }
 
-int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
+static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
+			   unsigned long *dest)
 {
 	return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
 }
 
-int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
+static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
+			   unsigned long value)
 {
 
 	return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
@@ -5817,7 +5840,7 @@ void kvm_arch_exit(void)
 	free_percpu(shared_msrs);
 }
 
-int kvm_emulate_halt(struct kvm_vcpu *vcpu)
+int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
 {
 	++vcpu->stat.halt_exits;
 	if (irqchip_in_kernel(vcpu->kvm)) {
@@ -5828,6 +5851,13 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 		return 0;
 	}
 }
+EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
+
+int kvm_emulate_halt(struct kvm_vcpu *vcpu)
+{
+	kvm_x86_ops->skip_emulated_instruction(vcpu);
+	return kvm_vcpu_halt(vcpu);
+}
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
 int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
@@ -5904,7 +5934,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
 	lapic_irq.dest_id = apicid;
 
 	lapic_irq.delivery_mode = APIC_DM_REMRD;
-	kvm_irq_delivery_to_apic(kvm, 0, &lapic_irq, NULL);
+	kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
 }
 
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
@@ -5912,6 +5942,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 	unsigned long nr, a0, a1, a2, a3, ret;
 	int op_64_bit, r = 1;
 
+	kvm_x86_ops->skip_emulated_instruction(vcpu);
+
 	if (kvm_hv_hypercall_enabled(vcpu->kvm))
 		return kvm_hv_hypercall(vcpu);
 
@@ -6165,7 +6197,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 }
 
 /*
- * Returns 1 to let __vcpu_run() continue the guest execution loop without
+ * Returns 1 to let vcpu_run() continue the guest execution loop without
  * exiting to the userspace.  Otherwise, the value will be returned to the
  * userspace.
  */
@@ -6302,6 +6334,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		set_debugreg(vcpu->arch.eff_db[2], 2);
 		set_debugreg(vcpu->arch.eff_db[3], 3);
 		set_debugreg(vcpu->arch.dr6, 6);
+		vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
 	}
 
 	trace_kvm_entry(vcpu->vcpu_id);
@@ -6383,42 +6416,47 @@ out:
 	return r;
 }
 
+static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
+{
+	if (!kvm_arch_vcpu_runnable(vcpu)) {
+		srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+		kvm_vcpu_block(vcpu);
+		vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+		if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
+			return 1;
+	}
+
+	kvm_apic_accept_events(vcpu);
+	switch(vcpu->arch.mp_state) {
+	case KVM_MP_STATE_HALTED:
+		vcpu->arch.pv.pv_unhalted = false;
+		vcpu->arch.mp_state =
+			KVM_MP_STATE_RUNNABLE;
+	case KVM_MP_STATE_RUNNABLE:
+		vcpu->arch.apf.halted = false;
+		break;
+	case KVM_MP_STATE_INIT_RECEIVED:
+		break;
+	default:
+		return -EINTR;
+		break;
+	}
+	return 1;
+}
 
-static int __vcpu_run(struct kvm_vcpu *vcpu)
+static int vcpu_run(struct kvm_vcpu *vcpu)
 {
 	int r;
 	struct kvm *kvm = vcpu->kvm;
 
 	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 
-	r = 1;
-	while (r > 0) {
+	for (;;) {
 		if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
 		    !vcpu->arch.apf.halted)
 			r = vcpu_enter_guest(vcpu);
-		else {
-			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
-			kvm_vcpu_block(vcpu);
-			vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
-			if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) {
-				kvm_apic_accept_events(vcpu);
-				switch(vcpu->arch.mp_state) {
-				case KVM_MP_STATE_HALTED:
-					vcpu->arch.pv.pv_unhalted = false;
-					vcpu->arch.mp_state =
-						KVM_MP_STATE_RUNNABLE;
-				case KVM_MP_STATE_RUNNABLE:
-					vcpu->arch.apf.halted = false;
-					break;
-				case KVM_MP_STATE_INIT_RECEIVED:
-					break;
-				default:
-					r = -EINTR;
-					break;
-				}
-			}
-		}
-
+		else
+			r = vcpu_block(kvm, vcpu);
 		if (r <= 0)
 			break;
 
@@ -6430,6 +6468,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 			r = -EINTR;
 			vcpu->run->exit_reason = KVM_EXIT_INTR;
 			++vcpu->stat.request_irq_exits;
+			break;
 		}
 
 		kvm_check_async_pf_completion(vcpu);
@@ -6438,6 +6477,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 			r = -EINTR;
 			vcpu->run->exit_reason = KVM_EXIT_INTR;
 			++vcpu->stat.signal_exits;
+			break;
 		}
 		if (need_resched()) {
 			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
@@ -6569,7 +6609,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	} else
 		WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
 
-	r = __vcpu_run(vcpu);
+	r = vcpu_run(vcpu);
 
 out:
 	post_kvm_run_save(vcpu);
@@ -7076,11 +7116,14 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
 	kvm_clear_exception_queue(vcpu);
 
 	memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
+	kvm_update_dr0123(vcpu);
 	vcpu->arch.dr6 = DR6_INIT;
 	kvm_update_dr6(vcpu);
 	vcpu->arch.dr7 = DR7_FIXED_1;
 	kvm_update_dr7(vcpu);
 
+	vcpu->arch.cr2 = 0;
+
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 	vcpu->arch.apf.msr_val = 0;
 	vcpu->arch.st.msr_val = 0;
@@ -7241,7 +7284,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 	vcpu->arch.pv.pv_unhalted = false;
 	vcpu->arch.emulate_ctxt.ops = &emulate_ops;
-	if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
+	if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 	else
 		vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
@@ -7289,6 +7332,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	vcpu->arch.guest_supported_xcr0 = 0;
 	vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
 
+	vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+
 	kvm_async_pf_hash_reset(vcpu);
 	kvm_pmu_init(vcpu);
 
@@ -7429,7 +7474,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 
 	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
 		if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
-			kvm_kvfree(free->arch.rmap[i]);
+			kvfree(free->arch.rmap[i]);
 			free->arch.rmap[i] = NULL;
 		}
 		if (i == 0)
@@ -7437,7 +7482,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 
 		if (!dont || free->arch.lpage_info[i - 1] !=
 			     dont->arch.lpage_info[i - 1]) {
-			kvm_kvfree(free->arch.lpage_info[i - 1]);
+			kvfree(free->arch.lpage_info[i - 1]);
 			free->arch.lpage_info[i - 1] = NULL;
 		}
 	}
@@ -7491,12 +7536,12 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 
 out_free:
 	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
-		kvm_kvfree(slot->arch.rmap[i]);
+		kvfree(slot->arch.rmap[i]);
 		slot->arch.rmap[i] = NULL;
 		if (i == 0)
 			continue;
 
-		kvm_kvfree(slot->arch.lpage_info[i - 1]);
+		kvfree(slot->arch.lpage_info[i - 1]);
 		slot->arch.lpage_info[i - 1] = NULL;
 	}
 	return -ENOMEM;
@@ -7619,6 +7664,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 	new = id_to_memslot(kvm->memslots, mem->slot);
 
 	/*
+	 * Dirty logging tracks sptes in 4k granularity, meaning that large
+	 * sptes have to be split.  If live migration is successful, the guest
+	 * in the source machine will be destroyed and large sptes will be
+	 * created in the destination. However, if the guest continues to run
+	 * in the source machine (for example if live migration fails), small
+	 * sptes will remain around and cause bad performance.
+	 *
+	 * Scan sptes if dirty logging has been stopped, dropping those
+	 * which can be collapsed into a single large-page spte.  Later
+	 * page faults will create the large-page sptes.
+	 */
+	if ((change != KVM_MR_DELETE) &&
+		(old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
+		!(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
+		kvm_mmu_zap_collapsible_sptes(kvm, new);
+
+	/*
 	 * Set up write protection and/or dirty logging for the new slot.
 	 *
 	 * For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of old slot have
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index ac4453d8520e..717908b16037 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -868,7 +868,8 @@ static void __init lguest_init_IRQ(void)
 		/* Some systems map "vectors" to interrupts weirdly.  Not us! */
 		__this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR);
 		if (i != SYSCALL_VECTOR)
-			set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
+			set_intr_gate(i, irq_entries_start +
+					8 * (i - FIRST_EXTERNAL_VECTOR));
 	}
 
 	/*
@@ -1076,6 +1077,7 @@ static void lguest_load_sp0(struct tss_struct *tss,
 {
 	lazy_hcall3(LHCALL_SET_STACK, __KERNEL_DS | 0x1, thread->sp0,
 		   THREAD_SIZE / PAGE_SIZE);
+	tss->x86_tss.sp0 = thread->sp0;
 }
 
 /* Let's just say, I wouldn't do debugging under a Guest. */
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index f5cc9eb1d51b..082a85167a5b 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -13,16 +13,6 @@
 #include <asm/alternative-asm.h>
 #include <asm/dwarf2.h>
 
-.macro SAVE reg
-	pushl_cfi %\reg
-	CFI_REL_OFFSET \reg, 0
-.endm
-
-.macro RESTORE reg
-	popl_cfi %\reg
-	CFI_RESTORE \reg
-.endm
-
 .macro read64 reg
 	movl %ebx, %eax
 	movl %ecx, %edx
@@ -67,10 +57,10 @@ ENDPROC(atomic64_xchg_cx8)
 .macro addsub_return func ins insc
 ENTRY(atomic64_\func\()_return_cx8)
 	CFI_STARTPROC
-	SAVE ebp
-	SAVE ebx
-	SAVE esi
-	SAVE edi
+	pushl_cfi_reg ebp
+	pushl_cfi_reg ebx
+	pushl_cfi_reg esi
+	pushl_cfi_reg edi
 
 	movl %eax, %esi
 	movl %edx, %edi
@@ -89,10 +79,10 @@ ENTRY(atomic64_\func\()_return_cx8)
 10:
 	movl %ebx, %eax
 	movl %ecx, %edx
-	RESTORE edi
-	RESTORE esi
-	RESTORE ebx
-	RESTORE ebp
+	popl_cfi_reg edi
+	popl_cfi_reg esi
+	popl_cfi_reg ebx
+	popl_cfi_reg ebp
 	ret
 	CFI_ENDPROC
 ENDPROC(atomic64_\func\()_return_cx8)
@@ -104,7 +94,7 @@ addsub_return sub sub sbb
 .macro incdec_return func ins insc
 ENTRY(atomic64_\func\()_return_cx8)
 	CFI_STARTPROC
-	SAVE ebx
+	pushl_cfi_reg ebx
 
 	read64 %esi
 1:
@@ -119,7 +109,7 @@ ENTRY(atomic64_\func\()_return_cx8)
 10:
 	movl %ebx, %eax
 	movl %ecx, %edx
-	RESTORE ebx
+	popl_cfi_reg ebx
 	ret
 	CFI_ENDPROC
 ENDPROC(atomic64_\func\()_return_cx8)
@@ -130,7 +120,7 @@ incdec_return dec sub sbb
 
 ENTRY(atomic64_dec_if_positive_cx8)
 	CFI_STARTPROC
-	SAVE ebx
+	pushl_cfi_reg ebx
 
 	read64 %esi
 1:
@@ -146,18 +136,18 @@ ENTRY(atomic64_dec_if_positive_cx8)
 2:
 	movl %ebx, %eax
 	movl %ecx, %edx
-	RESTORE ebx
+	popl_cfi_reg ebx
 	ret
 	CFI_ENDPROC
 ENDPROC(atomic64_dec_if_positive_cx8)
 
 ENTRY(atomic64_add_unless_cx8)
 	CFI_STARTPROC
-	SAVE ebp
-	SAVE ebx
+	pushl_cfi_reg ebp
+	pushl_cfi_reg ebx
 /* these just push these two parameters on the stack */
-	SAVE edi
-	SAVE ecx
+	pushl_cfi_reg edi
+	pushl_cfi_reg ecx
 
 	movl %eax, %ebp
 	movl %edx, %edi
@@ -179,8 +169,8 @@ ENTRY(atomic64_add_unless_cx8)
 3:
 	addl $8, %esp
 	CFI_ADJUST_CFA_OFFSET -8
-	RESTORE ebx
-	RESTORE ebp
+	popl_cfi_reg ebx
+	popl_cfi_reg ebp
 	ret
 4:
 	cmpl %edx, 4(%esp)
@@ -192,7 +182,7 @@ ENDPROC(atomic64_add_unless_cx8)
 
 ENTRY(atomic64_inc_not_zero_cx8)
 	CFI_STARTPROC
-	SAVE ebx
+	pushl_cfi_reg ebx
 
 	read64 %esi
 1:
@@ -209,7 +199,7 @@ ENTRY(atomic64_inc_not_zero_cx8)
 
 	movl $1, %eax
 3:
-	RESTORE ebx
+	popl_cfi_reg ebx
 	ret
 	CFI_ENDPROC
 ENDPROC(atomic64_inc_not_zero_cx8)
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index e78b8eee6615..9bc944a91274 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -51,10 +51,8 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
 	   */		
 ENTRY(csum_partial)
 	CFI_STARTPROC
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
+	pushl_cfi_reg esi
+	pushl_cfi_reg ebx
 	movl 20(%esp),%eax	# Function arg: unsigned int sum
 	movl 16(%esp),%ecx	# Function arg: int len
 	movl 12(%esp),%esi	# Function arg: unsigned char *buff
@@ -127,14 +125,12 @@ ENTRY(csum_partial)
 6:	addl %ecx,%eax
 	adcl $0, %eax 
 7:	
-	testl $1, 12(%esp)
+	testb $1, 12(%esp)
 	jz 8f
 	roll $8, %eax
 8:
-	popl_cfi %ebx
-	CFI_RESTORE ebx
-	popl_cfi %esi
-	CFI_RESTORE esi
+	popl_cfi_reg ebx
+	popl_cfi_reg esi
 	ret
 	CFI_ENDPROC
 ENDPROC(csum_partial)
@@ -145,10 +141,8 @@ ENDPROC(csum_partial)
 
 ENTRY(csum_partial)
 	CFI_STARTPROC
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
+	pushl_cfi_reg esi
+	pushl_cfi_reg ebx
 	movl 20(%esp),%eax	# Function arg: unsigned int sum
 	movl 16(%esp),%ecx	# Function arg: int len
 	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf
@@ -251,14 +245,12 @@ ENTRY(csum_partial)
 	addl %ebx,%eax
 	adcl $0,%eax
 80: 
-	testl $1, 12(%esp)
+	testb $1, 12(%esp)
 	jz 90f
 	roll $8, %eax
 90: 
-	popl_cfi %ebx
-	CFI_RESTORE ebx
-	popl_cfi %esi
-	CFI_RESTORE esi
+	popl_cfi_reg ebx
+	popl_cfi_reg esi
 	ret
 	CFI_ENDPROC
 ENDPROC(csum_partial)
@@ -298,12 +290,9 @@ ENTRY(csum_partial_copy_generic)
 	CFI_STARTPROC
 	subl  $4,%esp	
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl_cfi %edi
-	CFI_REL_OFFSET edi, 0
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
+	pushl_cfi_reg edi
+	pushl_cfi_reg esi
+	pushl_cfi_reg ebx
 	movl ARGBASE+16(%esp),%eax	# sum
 	movl ARGBASE+12(%esp),%ecx	# len
 	movl ARGBASE+4(%esp),%esi	# src
@@ -412,12 +401,9 @@ DST(	movb %cl, (%edi)	)
 
 .previous
 
-	popl_cfi %ebx
-	CFI_RESTORE ebx
-	popl_cfi %esi
-	CFI_RESTORE esi
-	popl_cfi %edi
-	CFI_RESTORE edi
+	popl_cfi_reg ebx
+	popl_cfi_reg esi
+	popl_cfi_reg edi
 	popl_cfi %ecx			# equivalent to addl $4,%esp
 	ret	
 	CFI_ENDPROC
@@ -441,12 +427,9 @@ ENDPROC(csum_partial_copy_generic)
 		
 ENTRY(csum_partial_copy_generic)
 	CFI_STARTPROC
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
-	pushl_cfi %edi
-	CFI_REL_OFFSET edi, 0
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
+	pushl_cfi_reg ebx
+	pushl_cfi_reg edi
+	pushl_cfi_reg esi
 	movl ARGBASE+4(%esp),%esi	#src
 	movl ARGBASE+8(%esp),%edi	#dst	
 	movl ARGBASE+12(%esp),%ecx	#len
@@ -506,12 +489,9 @@ DST(	movb %dl, (%edi)         )
 	jmp  7b			
 .previous				
 
-	popl_cfi %esi
-	CFI_RESTORE esi
-	popl_cfi %edi
-	CFI_RESTORE edi
-	popl_cfi %ebx
-	CFI_RESTORE ebx
+	popl_cfi_reg esi
+	popl_cfi_reg edi
+	popl_cfi_reg ebx
 	ret
 	CFI_ENDPROC
 ENDPROC(csum_partial_copy_generic)
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index f2145cfa12a6..e67e579c93bd 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,31 +1,35 @@
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
 #include <asm/alternative-asm.h>
 
 /*
- * Zero a page. 	
- * rdi	page
- */			
-ENTRY(clear_page_c)
+ * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
+ * recommended to use this when possible and we do use them by default.
+ * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
+ * Otherwise, use original.
+ */
+
+/*
+ * Zero a page.
+ * %rdi	- page
+ */
+ENTRY(clear_page)
 	CFI_STARTPROC
+
+	ALTERNATIVE_2 "jmp clear_page_orig", "", X86_FEATURE_REP_GOOD, \
+		      "jmp clear_page_c_e", X86_FEATURE_ERMS
+
 	movl $4096/8,%ecx
 	xorl %eax,%eax
 	rep stosq
 	ret
 	CFI_ENDPROC
-ENDPROC(clear_page_c)
+ENDPROC(clear_page)
 
-ENTRY(clear_page_c_e)
+ENTRY(clear_page_orig)
 	CFI_STARTPROC
-	movl $4096,%ecx
-	xorl %eax,%eax
-	rep stosb
-	ret
-	CFI_ENDPROC
-ENDPROC(clear_page_c_e)
 
-ENTRY(clear_page)
-	CFI_STARTPROC
 	xorl   %eax,%eax
 	movl   $4096/64,%ecx
 	.p2align 4
@@ -45,29 +49,13 @@ ENTRY(clear_page)
 	nop
 	ret
 	CFI_ENDPROC
-.Lclear_page_end:
-ENDPROC(clear_page)
-
-	/*
-	 * Some CPUs support enhanced REP MOVSB/STOSB instructions.
-	 * It is recommended to use this when possible.
-	 * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
-	 * Otherwise, use original function.
-	 *
-	 */
+ENDPROC(clear_page_orig)
 
-#include <asm/cpufeature.h>
-
-	.section .altinstr_replacement,"ax"
-1:	.byte 0xeb					/* jmp <disp8> */
-	.byte (clear_page_c - clear_page) - (2f - 1b)	/* offset */
-2:	.byte 0xeb					/* jmp <disp8> */
-	.byte (clear_page_c_e - clear_page) - (3f - 2b)	/* offset */
-3:
-	.previous
-	.section .altinstructions,"a"
-	altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\
-			     .Lclear_page_end-clear_page, 2b-1b
-	altinstruction_entry clear_page,2b,X86_FEATURE_ERMS,   \
-			     .Lclear_page_end-clear_page,3b-2b
-	.previous
+ENTRY(clear_page_c_e)
+	CFI_STARTPROC
+	movl $4096,%ecx
+	xorl %eax,%eax
+	rep stosb
+	ret
+	CFI_ENDPROC
+ENDPROC(clear_page_c_e)
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 176cca67212b..8239dbcbf984 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -2,23 +2,26 @@
 
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
 #include <asm/alternative-asm.h>
 
+/*
+ * Some CPUs run faster using the string copy instructions (sane microcode).
+ * It is also a lot simpler. Use this when possible. But, don't use streaming
+ * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the
+ * prefetch distance based on SMP/UP.
+ */
 	ALIGN
-copy_page_rep:
+ENTRY(copy_page)
 	CFI_STARTPROC
+	ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
 	movl	$4096/8, %ecx
 	rep	movsq
 	ret
 	CFI_ENDPROC
-ENDPROC(copy_page_rep)
-
-/*
- *  Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD.
- *  Could vary the prefetch distance based on SMP/UP.
-*/
+ENDPROC(copy_page)
 
-ENTRY(copy_page)
+ENTRY(copy_page_regs)
 	CFI_STARTPROC
 	subq	$2*8,	%rsp
 	CFI_ADJUST_CFA_OFFSET 2*8
@@ -90,21 +93,5 @@ ENTRY(copy_page)
 	addq	$2*8, %rsp
 	CFI_ADJUST_CFA_OFFSET -2*8
 	ret
-.Lcopy_page_end:
 	CFI_ENDPROC
-ENDPROC(copy_page)
-
-	/* Some CPUs run faster using the string copy instructions.
-	   It is also a lot simpler. Use this when possible */
-
-#include <asm/cpufeature.h>
-
-	.section .altinstr_replacement,"ax"
-1:	.byte 0xeb					/* jmp <disp8> */
-	.byte (copy_page_rep - copy_page) - (2f - 1b)	/* offset */
-2:
-	.previous
-	.section .altinstructions,"a"
-	altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD,	\
-		.Lcopy_page_end-copy_page, 2b-1b
-	.previous
+ENDPROC(copy_page_regs)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index dee945d55594..fa997dfaef24 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -8,9 +8,6 @@
 
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
-
-#define FIX_ALIGNMENT 1
-
 #include <asm/current.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
@@ -19,33 +16,7 @@
 #include <asm/asm.h>
 #include <asm/smap.h>
 
-/*
- * By placing feature2 after feature1 in altinstructions section, we logically
- * implement:
- * If CPU has feature2, jmp to alt2 is used
- * else if CPU has feature1, jmp to alt1 is used
- * else jmp to orig is used.
- */
-	.macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
-0:
-	.byte 0xe9	/* 32bit jump */
-	.long \orig-1f	/* by default jump to orig */
-1:
-	.section .altinstr_replacement,"ax"
-2:	.byte 0xe9			/* near jump with 32bit immediate */
-	.long \alt1-1b /* offset */   /* or alternatively to alt1 */
-3:	.byte 0xe9			/* near jump with 32bit immediate */
-	.long \alt2-1b /* offset */   /* or alternatively to alt2 */
-	.previous
-
-	.section .altinstructions,"a"
-	altinstruction_entry 0b,2b,\feature1,5,5
-	altinstruction_entry 0b,3b,\feature2,5,5
-	.previous
-	.endm
-
 	.macro ALIGN_DESTINATION
-#ifdef FIX_ALIGNMENT
 	/* check for bad alignment of destination */
 	movl %edi,%ecx
 	andl $7,%ecx
@@ -67,7 +38,6 @@
 
 	_ASM_EXTABLE(100b,103b)
 	_ASM_EXTABLE(101b,103b)
-#endif
 	.endm
 
 /* Standard copy_to_user with segment limit checking */
@@ -79,9 +49,11 @@ ENTRY(_copy_to_user)
 	jc bad_to_user
 	cmpq TI_addr_limit(%rax),%rcx
 	ja bad_to_user
-	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\
-		copy_user_generic_unrolled,copy_user_generic_string,	\
-		copy_user_enhanced_fast_string
+	ALTERNATIVE_2 "jmp copy_user_generic_unrolled",		\
+		      "jmp copy_user_generic_string",		\
+		      X86_FEATURE_REP_GOOD,			\
+		      "jmp copy_user_enhanced_fast_string",	\
+		      X86_FEATURE_ERMS
 	CFI_ENDPROC
 ENDPROC(_copy_to_user)
 
@@ -94,9 +66,11 @@ ENTRY(_copy_from_user)
 	jc bad_from_user
 	cmpq TI_addr_limit(%rax),%rcx
 	ja bad_from_user
-	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\
-		copy_user_generic_unrolled,copy_user_generic_string,	\
-		copy_user_enhanced_fast_string
+	ALTERNATIVE_2 "jmp copy_user_generic_unrolled",		\
+		      "jmp copy_user_generic_string",		\
+		      X86_FEATURE_REP_GOOD,			\
+		      "jmp copy_user_enhanced_fast_string",	\
+		      X86_FEATURE_ERMS
 	CFI_ENDPROC
 ENDPROC(_copy_from_user)
 
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index 2419d5fefae3..9734182966f3 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -196,7 +196,7 @@ ENTRY(csum_partial_copy_generic)
 
 	/* handle last odd byte */
 .Lhandle_1:
-	testl $1, %r10d
+	testb $1, %r10b
 	jz    .Lende
 	xorl  %ebx, %ebx
 	source
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 1313ae6b478b..8f72b334aea0 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -52,6 +52,13 @@
  */
 void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
 {
+	/*
+	 * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
+	 * even if the input buffer is long enough to hold them.
+	 */
+	if (buf_len > MAX_INSN_SIZE)
+		buf_len = MAX_INSN_SIZE;
+
 	memset(insn, 0, sizeof(*insn));
 	insn->kaddr = kaddr;
 	insn->end_kaddr = kaddr + buf_len;
@@ -164,6 +171,12 @@ found:
 				/* VEX.W overrides opnd_size */
 				insn->opnd_bytes = 8;
 		} else {
+			/*
+			 * For VEX2, fake VEX3-like byte#2.
+			 * Makes it easier to decode vex.W, vex.vvvv,
+			 * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
+			 */
+			insn->vex_prefix.bytes[2] = b2 & 0x7f;
 			insn->vex_prefix.nbytes = 2;
 			insn->next_byte += 2;
 		}
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 89b53c9968e7..b046664f5a1c 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -1,12 +1,20 @@
 /* Copyright 2002 Andi Kleen */
 
 #include <linux/linkage.h>
-
 #include <asm/cpufeature.h>
 #include <asm/dwarf2.h>
 #include <asm/alternative-asm.h>
 
 /*
+ * We build a jump to memcpy_orig by default which gets NOPped out on
+ * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
+ * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
+ * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
+ */
+
+.weak memcpy
+
+/*
  * memcpy - Copy a memory block.
  *
  * Input:
@@ -17,15 +25,11 @@
  * Output:
  * rax original destination
  */
+ENTRY(__memcpy)
+ENTRY(memcpy)
+	ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
+		      "jmp memcpy_erms", X86_FEATURE_ERMS
 
-/*
- * memcpy_c() - fast string ops (REP MOVSQ) based variant.
- *
- * This gets patched over the unrolled variant (below) via the
- * alternative instructions framework:
- */
-	.section .altinstr_replacement, "ax", @progbits
-.Lmemcpy_c:
 	movq %rdi, %rax
 	movq %rdx, %rcx
 	shrq $3, %rcx
@@ -34,29 +38,21 @@
 	movl %edx, %ecx
 	rep movsb
 	ret
-.Lmemcpy_e:
-	.previous
+ENDPROC(memcpy)
+ENDPROC(__memcpy)
 
 /*
- * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than
- * memcpy_c. Use memcpy_c_e when possible.
- *
- * This gets patched over the unrolled variant (below) via the
- * alternative instructions framework:
+ * memcpy_erms() - enhanced fast string memcpy. This is faster and
+ * simpler than memcpy. Use memcpy_erms when possible.
  */
-	.section .altinstr_replacement, "ax", @progbits
-.Lmemcpy_c_e:
+ENTRY(memcpy_erms)
 	movq %rdi, %rax
 	movq %rdx, %rcx
 	rep movsb
 	ret
-.Lmemcpy_e_e:
-	.previous
-
-.weak memcpy
+ENDPROC(memcpy_erms)
 
-ENTRY(__memcpy)
-ENTRY(memcpy)
+ENTRY(memcpy_orig)
 	CFI_STARTPROC
 	movq %rdi, %rax
 
@@ -183,26 +179,4 @@ ENTRY(memcpy)
 .Lend:
 	retq
 	CFI_ENDPROC
-ENDPROC(memcpy)
-ENDPROC(__memcpy)
-
-	/*
-	 * Some CPUs are adding enhanced REP MOVSB/STOSB feature
-	 * If the feature is supported, memcpy_c_e() is the first choice.
-	 * If enhanced rep movsb copy is not available, use fast string copy
-	 * memcpy_c() when possible. This is faster and code is simpler than
-	 * original memcpy().
-	 * Otherwise, original memcpy() is used.
-	 * In .altinstructions section, ERMS feature is placed after REG_GOOD
-         * feature to implement the right patch order.
-	 *
-	 * Replace only beginning, memcpy is used to apply alternatives,
-	 * so it is silly to overwrite itself with nops - reboot is the
-	 * only outcome...
-	 */
-	.section .altinstructions, "a"
-	altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
-			     .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
-	altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
-			     .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
-	.previous
+ENDPROC(memcpy_orig)
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 9c4b530575da..0f8a0d0331b9 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -5,7 +5,6 @@
  * This assembly file is re-written from memmove_64.c file.
  *	- Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
  */
-#define _STRING_C
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
 #include <asm/cpufeature.h>
@@ -44,6 +43,8 @@ ENTRY(__memmove)
 	jg 2f
 
 .Lmemmove_begin_forward:
+	ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS
+
 	/*
 	 * movsq instruction have many startup latency
 	 * so we handle small size by general register.
@@ -207,21 +208,5 @@ ENTRY(__memmove)
 13:
 	retq
 	CFI_ENDPROC
-
-	.section .altinstr_replacement,"ax"
-.Lmemmove_begin_forward_efs:
-	/* Forward moving data. */
-	movq %rdx, %rcx
-	rep movsb
-	retq
-.Lmemmove_end_forward_efs:
-	.previous
-
-	.section .altinstructions,"a"
-	altinstruction_entry .Lmemmove_begin_forward,		\
-		.Lmemmove_begin_forward_efs,X86_FEATURE_ERMS,	\
-		.Lmemmove_end_forward-.Lmemmove_begin_forward,	\
-		.Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
-	.previous
 ENDPROC(__memmove)
 ENDPROC(memmove)
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 6f44935c6a60..93118fb23976 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -5,19 +5,30 @@
 #include <asm/cpufeature.h>
 #include <asm/alternative-asm.h>
 
+.weak memset
+
 /*
  * ISO C memset - set a memory block to a byte value. This function uses fast
  * string to get better performance than the original function. The code is
  * simpler and shorter than the orignal function as well.
- *	
+ *
  * rdi   destination
- * rsi   value (char) 
- * rdx   count (bytes) 
- * 
+ * rsi   value (char)
+ * rdx   count (bytes)
+ *
  * rax   original destination
- */	
-	.section .altinstr_replacement, "ax", @progbits
-.Lmemset_c:
+ */
+ENTRY(memset)
+ENTRY(__memset)
+	/*
+	 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
+	 * to use it when possible. If not available, use fast string instructions.
+	 *
+	 * Otherwise, use original memset function.
+	 */
+	ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
+		      "jmp memset_erms", X86_FEATURE_ERMS
+
 	movq %rdi,%r9
 	movq %rdx,%rcx
 	andl $7,%edx
@@ -31,8 +42,8 @@
 	rep stosb
 	movq %r9,%rax
 	ret
-.Lmemset_e:
-	.previous
+ENDPROC(memset)
+ENDPROC(__memset)
 
 /*
  * ISO C memset - set a memory block to a byte value. This function uses
@@ -45,21 +56,16 @@
  *
  * rax   original destination
  */
-	.section .altinstr_replacement, "ax", @progbits
-.Lmemset_c_e:
+ENTRY(memset_erms)
 	movq %rdi,%r9
 	movb %sil,%al
 	movq %rdx,%rcx
 	rep stosb
 	movq %r9,%rax
 	ret
-.Lmemset_e_e:
-	.previous
-
-.weak memset
+ENDPROC(memset_erms)
 
-ENTRY(memset)
-ENTRY(__memset)
+ENTRY(memset_orig)
 	CFI_STARTPROC
 	movq %rdi,%r10
 
@@ -134,23 +140,4 @@ ENTRY(__memset)
 	jmp .Lafter_bad_alignment
 .Lfinal:
 	CFI_ENDPROC
-ENDPROC(memset)
-ENDPROC(__memset)
-
-	/* Some CPUs support enhanced REP MOVSB/STOSB feature.
-	 * It is recommended to use this when possible.
-	 *
-	 * If enhanced REP MOVSB/STOSB feature is not available, use fast string
-	 * instructions.
-	 *
-	 * Otherwise, use original memset function.
-	 *
-	 * In .altinstructions section, ERMS feature is placed after REG_GOOD
-         * feature to implement the right patch order.
-	 */
-	.section .altinstructions,"a"
-	altinstruction_entry __memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
-			     .Lfinal-__memset,.Lmemset_e-.Lmemset_c
-	altinstruction_entry __memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
-			     .Lfinal-__memset,.Lmemset_e_e-.Lmemset_c_e
-	.previous
+ENDPROC(memset_orig)
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
index f6d13eefad10..3ca5218fbece 100644
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S
@@ -14,8 +14,8 @@
 .macro op_safe_regs op
 ENTRY(\op\()_safe_regs)
 	CFI_STARTPROC
-	pushq_cfi %rbx
-	pushq_cfi %rbp
+	pushq_cfi_reg rbx
+	pushq_cfi_reg rbp
 	movq	%rdi, %r10	/* Save pointer */
 	xorl	%r11d, %r11d	/* Return value */
 	movl    (%rdi), %eax
@@ -35,8 +35,8 @@ ENTRY(\op\()_safe_regs)
 	movl    %ebp, 20(%r10)
 	movl    %esi, 24(%r10)
 	movl    %edi, 28(%r10)
-	popq_cfi %rbp
-	popq_cfi %rbx
+	popq_cfi_reg rbp
+	popq_cfi_reg rbx
 	ret
 3:
 	CFI_RESTORE_STATE
@@ -53,10 +53,10 @@ ENDPROC(\op\()_safe_regs)
 .macro op_safe_regs op
 ENTRY(\op\()_safe_regs)
 	CFI_STARTPROC
-	pushl_cfi %ebx
-	pushl_cfi %ebp
-	pushl_cfi %esi
-	pushl_cfi %edi
+	pushl_cfi_reg ebx
+	pushl_cfi_reg ebp
+	pushl_cfi_reg esi
+	pushl_cfi_reg edi
 	pushl_cfi $0              /* Return value */
 	pushl_cfi %eax
 	movl    4(%eax), %ecx
@@ -80,10 +80,10 @@ ENTRY(\op\()_safe_regs)
 	movl    %esi, 24(%eax)
 	movl    %edi, 28(%eax)
 	popl_cfi %eax
-	popl_cfi %edi
-	popl_cfi %esi
-	popl_cfi %ebp
-	popl_cfi %ebx
+	popl_cfi_reg edi
+	popl_cfi_reg esi
+	popl_cfi_reg ebp
+	popl_cfi_reg ebx
 	ret
 3:
 	CFI_RESTORE_STATE
diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S
index 5dff5f042468..2322abe4da3b 100644
--- a/arch/x86/lib/rwsem.S
+++ b/arch/x86/lib/rwsem.S
@@ -34,10 +34,10 @@
  */
 
 #define save_common_regs \
-	pushl_cfi %ecx; CFI_REL_OFFSET ecx, 0
+	pushl_cfi_reg ecx
 
 #define restore_common_regs \
-	popl_cfi %ecx; CFI_RESTORE ecx
+	popl_cfi_reg ecx
 
 	/* Avoid uglifying the argument copying x86-64 needs to do. */
 	.macro movq src, dst
@@ -64,22 +64,22 @@
  */
 
 #define save_common_regs \
-	pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \
-	pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \
-	pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \
-	pushq_cfi %r8;  CFI_REL_OFFSET r8,  0; \
-	pushq_cfi %r9;  CFI_REL_OFFSET r9,  0; \
-	pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \
-	pushq_cfi %r11; CFI_REL_OFFSET r11, 0
+	pushq_cfi_reg rdi; \
+	pushq_cfi_reg rsi; \
+	pushq_cfi_reg rcx; \
+	pushq_cfi_reg r8;  \
+	pushq_cfi_reg r9;  \
+	pushq_cfi_reg r10; \
+	pushq_cfi_reg r11
 
 #define restore_common_regs \
-	popq_cfi %r11; CFI_RESTORE r11; \
-	popq_cfi %r10; CFI_RESTORE r10; \
-	popq_cfi %r9;  CFI_RESTORE r9; \
-	popq_cfi %r8;  CFI_RESTORE r8; \
-	popq_cfi %rcx; CFI_RESTORE rcx; \
-	popq_cfi %rsi; CFI_RESTORE rsi; \
-	popq_cfi %rdi; CFI_RESTORE rdi
+	popq_cfi_reg r11; \
+	popq_cfi_reg r10; \
+	popq_cfi_reg r9; \
+	popq_cfi_reg r8; \
+	popq_cfi_reg rcx; \
+	popq_cfi_reg rsi; \
+	popq_cfi_reg rdi
 
 #endif
 
@@ -87,12 +87,10 @@
 ENTRY(call_rwsem_down_read_failed)
 	CFI_STARTPROC
 	save_common_regs
-	__ASM_SIZE(push,_cfi) %__ASM_REG(dx)
-	CFI_REL_OFFSET __ASM_REG(dx), 0
+	__ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
 	movq %rax,%rdi
 	call rwsem_down_read_failed
-	__ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
-	CFI_RESTORE __ASM_REG(dx)
+	__ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
 	restore_common_regs
 	ret
 	CFI_ENDPROC
@@ -124,12 +122,10 @@ ENDPROC(call_rwsem_wake)
 ENTRY(call_rwsem_downgrade_wake)
 	CFI_STARTPROC
 	save_common_regs
-	__ASM_SIZE(push,_cfi) %__ASM_REG(dx)
-	CFI_REL_OFFSET __ASM_REG(dx), 0
+	__ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
 	movq %rax,%rdi
 	call rwsem_downgrade_wake
-	__ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
-	CFI_RESTORE __ASM_REG(dx)
+	__ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
 	restore_common_regs
 	ret
 	CFI_ENDPROC
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
index e28cdaf5ac2c..5eb715087b80 100644
--- a/arch/x86/lib/thunk_32.S
+++ b/arch/x86/lib/thunk_32.S
@@ -13,12 +13,9 @@
 	.globl \name
 \name:
 	CFI_STARTPROC
-	pushl_cfi %eax
-	CFI_REL_OFFSET eax, 0
-	pushl_cfi %ecx
-	CFI_REL_OFFSET ecx, 0
-	pushl_cfi %edx
-	CFI_REL_OFFSET edx, 0
+	pushl_cfi_reg eax
+	pushl_cfi_reg ecx
+	pushl_cfi_reg edx
 
 	.if \put_ret_addr_in_eax
 	/* Place EIP in the arg1 */
@@ -26,12 +23,9 @@
 	.endif
 
 	call \func
-	popl_cfi %edx
-	CFI_RESTORE edx
-	popl_cfi %ecx
-	CFI_RESTORE ecx
-	popl_cfi %eax
-	CFI_RESTORE eax
+	popl_cfi_reg edx
+	popl_cfi_reg ecx
+	popl_cfi_reg eax
 	ret
 	CFI_ENDPROC
 	_ASM_NOKPROBE(\name)
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index b30b5ebd614a..f89ba4e93025 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -17,9 +17,18 @@
 	CFI_STARTPROC
 
 	/* this one pushes 9 elems, the next one would be %rIP */
-	SAVE_ARGS
+	pushq_cfi_reg rdi
+	pushq_cfi_reg rsi
+	pushq_cfi_reg rdx
+	pushq_cfi_reg rcx
+	pushq_cfi_reg rax
+	pushq_cfi_reg r8
+	pushq_cfi_reg r9
+	pushq_cfi_reg r10
+	pushq_cfi_reg r11
 
 	.if \put_ret_addr_in_rdi
+	/* 9*8(%rsp) is return addr on stack */
 	movq_cfi_restore 9*8, rdi
 	.endif
 
@@ -45,11 +54,22 @@
 #endif
 #endif
 
-	/* SAVE_ARGS below is used only for the .cfi directives it contains. */
+#if defined(CONFIG_TRACE_IRQFLAGS) \
+ || defined(CONFIG_DEBUG_LOCK_ALLOC) \
+ || defined(CONFIG_PREEMPT)
 	CFI_STARTPROC
-	SAVE_ARGS
+	CFI_ADJUST_CFA_OFFSET 9*8
 restore:
-	RESTORE_ARGS
+	popq_cfi_reg r11
+	popq_cfi_reg r10
+	popq_cfi_reg r9
+	popq_cfi_reg r8
+	popq_cfi_reg rax
+	popq_cfi_reg rcx
+	popq_cfi_reg rdx
+	popq_cfi_reg rsi
+	popq_cfi_reg rdi
 	ret
 	CFI_ENDPROC
 	_ASM_NOKPROBE(restore)
+#endif
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index c905e89e19fe..1f33b3d1fd68 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -69,21 +69,20 @@ EXPORT_SYMBOL(copy_in_user);
  * it is not necessary to optimize tail handling.
  */
 __visible unsigned long
-copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest)
+copy_user_handle_tail(char *to, char *from, unsigned len)
 {
-	char c;
-	unsigned zero_len;
-
 	for (; len; --len, to++) {
+		char c;
+
 		if (__get_user_nocheck(c, from++, sizeof(char)))
 			break;
 		if (__put_user_nocheck(c, to, sizeof(char)))
 			break;
 	}
-
-	for (c = 0, zero_len = len; zerorest && zero_len; --zero_len)
-		if (__put_user_nocheck(c, to++, sizeof(char)))
-			break;
 	clac();
+
+	/* If the destination is a kernel buffer, we always clear the end */
+	if ((unsigned long)to >= TASK_SIZE_MAX)
+		memset(to, 0, len);
 	return len;
 }
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 1a2be7c6895d..816488c0b97e 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -273,6 +273,9 @@ dd: ESC
 de: ESC
 df: ESC
 # 0xe0 - 0xef
+# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
+# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
+# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
 e0: LOOPNE/LOOPNZ Jb (f64)
 e1: LOOPE/LOOPZ Jb (f64)
 e2: LOOP Jb (f64)
@@ -281,6 +284,10 @@ e4: IN AL,Ib
 e5: IN eAX,Ib
 e6: OUT Ib,AL
 e7: OUT Ib,eAX
+# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset
+# in "near" jumps and calls is 16-bit. For CALL,
+# push of return address is 16-bit wide, RSP is decremented by 2
+# but is not truncated to 16 bits, unlike RIP.
 e8: CALL Jz (f64)
 e9: JMP-near Jz (f64)
 ea: JMP-far Ap (i64)
@@ -456,6 +463,7 @@ AVXcode: 1
 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
 # 0x0f 0x80-0x8f
+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
 80: JO Jz (f64)
 81: JNO Jz (f64)
 82: JB/JC/JNAE Jz (f64)
@@ -842,6 +850,7 @@ EndTable
 GrpTable: Grp5
 0: INC Ev
 1: DEC Ev
+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
 2: CALLN Ev (f64)
 3: CALLF Ep
 4: JMPN Ev (f64)
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index c4cc74006c61..a482d105172b 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -32,6 +32,4 @@ obj-$(CONFIG_AMD_NUMA)		+= amdtopology.o
 obj-$(CONFIG_ACPI_NUMA)		+= srat.o
 obj-$(CONFIG_NUMA_EMU)		+= numa_emulation.o
 
-obj-$(CONFIG_MEMTEST)		+= memtest.o
-
 obj-$(CONFIG_X86_INTEL_MPX)	+= mpx.o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index ede025fb46f1..181c53bac3a7 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -59,7 +59,7 @@ static nokprobe_inline int kprobes_fault(struct pt_regs *regs)
 	int ret = 0;
 
 	/* kprobe_running() needs smp_processor_id() */
-	if (kprobes_built_in() && !user_mode_vm(regs)) {
+	if (kprobes_built_in() && !user_mode(regs)) {
 		preempt_disable();
 		if (kprobe_running() && kprobe_fault_handler(regs, 14))
 			ret = 1;
@@ -148,7 +148,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
 	instr = (void *)convert_ip_to_linear(current, regs);
 	max_instr = instr + 15;
 
-	if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
+	if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE_MAX)
 		return 0;
 
 	while (instr < max_instr) {
@@ -1035,7 +1035,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
 	if (error_code & PF_USER)
 		return false;
 
-	if (!user_mode_vm(regs) && (regs->flags & X86_EFLAGS_AC))
+	if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
 		return false;
 
 	return true;
@@ -1140,7 +1140,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
 	 * User-mode registers count as a user access even for any
 	 * potential system fault or CPU buglet:
 	 */
-	if (user_mode_vm(regs)) {
+	if (user_mode(regs)) {
 		local_irq_enable();
 		error_code |= PF_USER;
 		flags |= FAULT_FLAG_USER;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index a110efca6d06..1d553186c434 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -29,29 +29,33 @@
 
 /*
  * Tables translating between page_cache_type_t and pte encoding.
- * Minimal supported modes are defined statically, modified if more supported
- * cache modes are available.
- * Index into __cachemode2pte_tbl is the cachemode.
- * Index into __pte2cachemode_tbl are the caching attribute bits of the pte
- * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2.
+ *
+ * Minimal supported modes are defined statically, they are modified
+ * during bootup if more supported cache modes are available.
+ *
+ *   Index into __cachemode2pte_tbl[] is the cachemode.
+ *
+ *   Index into __pte2cachemode_tbl[] are the caching attribute bits of the pte
+ *   (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2.
  */
 uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = {
-	[_PAGE_CACHE_MODE_WB]		= 0,
-	[_PAGE_CACHE_MODE_WC]		= _PAGE_PWT,
-	[_PAGE_CACHE_MODE_UC_MINUS]	= _PAGE_PCD,
-	[_PAGE_CACHE_MODE_UC]		= _PAGE_PCD | _PAGE_PWT,
-	[_PAGE_CACHE_MODE_WT]		= _PAGE_PCD,
-	[_PAGE_CACHE_MODE_WP]		= _PAGE_PCD,
+	[_PAGE_CACHE_MODE_WB      ]	= 0         | 0        ,
+	[_PAGE_CACHE_MODE_WC      ]	= _PAGE_PWT | 0        ,
+	[_PAGE_CACHE_MODE_UC_MINUS]	= 0         | _PAGE_PCD,
+	[_PAGE_CACHE_MODE_UC      ]	= _PAGE_PWT | _PAGE_PCD,
+	[_PAGE_CACHE_MODE_WT      ]	= 0         | _PAGE_PCD,
+	[_PAGE_CACHE_MODE_WP      ]	= 0         | _PAGE_PCD,
 };
 EXPORT_SYMBOL(__cachemode2pte_tbl);
+
 uint8_t __pte2cachemode_tbl[8] = {
-	[__pte2cm_idx(0)] = _PAGE_CACHE_MODE_WB,
-	[__pte2cm_idx(_PAGE_PWT)] = _PAGE_CACHE_MODE_WC,
-	[__pte2cm_idx(_PAGE_PCD)] = _PAGE_CACHE_MODE_UC_MINUS,
-	[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD)] = _PAGE_CACHE_MODE_UC,
-	[__pte2cm_idx(_PAGE_PAT)] = _PAGE_CACHE_MODE_WB,
-	[__pte2cm_idx(_PAGE_PWT | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC,
-	[__pte2cm_idx(_PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
+	[__pte2cm_idx( 0        | 0         | 0        )] = _PAGE_CACHE_MODE_WB,
+	[__pte2cm_idx(_PAGE_PWT | 0         | 0        )] = _PAGE_CACHE_MODE_WC,
+	[__pte2cm_idx( 0        | _PAGE_PCD | 0        )] = _PAGE_CACHE_MODE_UC_MINUS,
+	[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | 0        )] = _PAGE_CACHE_MODE_UC,
+	[__pte2cm_idx( 0        | 0         | _PAGE_PAT)] = _PAGE_CACHE_MODE_WB,
+	[__pte2cm_idx(_PAGE_PWT | 0         | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC,
+	[__pte2cm_idx(0         | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
 	[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC,
 };
 EXPORT_SYMBOL(__pte2cachemode_tbl);
@@ -131,21 +135,7 @@ void  __init early_alloc_pgt_buf(void)
 
 int after_bootmem;
 
-int direct_gbpages
-#ifdef CONFIG_DIRECT_GBPAGES
-				= 1
-#endif
-;
-
-static void __init init_gbpages(void)
-{
-#ifdef CONFIG_X86_64
-	if (direct_gbpages && cpu_has_gbpages)
-		printk(KERN_INFO "Using GB pages for direct mapping\n");
-	else
-		direct_gbpages = 0;
-#endif
-}
+early_param_on_off("gbpages", "nogbpages", direct_gbpages, CONFIG_X86_DIRECT_GBPAGES);
 
 struct map_range {
 	unsigned long start;
@@ -157,16 +147,12 @@ static int page_size_mask;
 
 static void __init probe_page_size_mask(void)
 {
-	init_gbpages();
-
 #if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK)
 	/*
 	 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
 	 * This will simplify cpa(), which otherwise needs to support splitting
 	 * large pages into small in interrupt context, etc.
 	 */
-	if (direct_gbpages)
-		page_size_mask |= 1 << PG_LEVEL_1G;
 	if (cpu_has_pse)
 		page_size_mask |= 1 << PG_LEVEL_2M;
 #endif
@@ -179,6 +165,15 @@ static void __init probe_page_size_mask(void)
 	if (cpu_has_pge) {
 		cr4_set_bits_and_update_boot(X86_CR4_PGE);
 		__supported_pte_mask |= _PAGE_GLOBAL;
+	} else
+		__supported_pte_mask &= ~_PAGE_GLOBAL;
+
+	/* Enable 1 GB linear kernel mappings if available: */
+	if (direct_gbpages && cpu_has_gbpages) {
+		printk(KERN_INFO "Using GB pages for direct mapping\n");
+		page_size_mask |= 1 << PG_LEVEL_1G;
+	} else {
+		direct_gbpages = 0;
 	}
 }
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 30eb05ae7061..3fba623e3ba5 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -130,20 +130,6 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
 	return 0;
 }
 
-static int __init parse_direct_gbpages_off(char *arg)
-{
-	direct_gbpages = 0;
-	return 0;
-}
-early_param("nogbpages", parse_direct_gbpages_off);
-
-static int __init parse_direct_gbpages_on(char *arg)
-{
-	direct_gbpages = 1;
-	return 0;
-}
-early_param("gbpages", parse_direct_gbpages_on);
-
 /*
  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
  * physical space so we can cache the place of the first one and move
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index fdf617c00e2f..5ead4d6cf3a7 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -67,8 +67,13 @@ static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages,
 
 /*
  * Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
+ * address space. It transparently creates kernel huge I/O mapping when
+ * the physical address is aligned by a huge page size (1GB or 2MB) and
+ * the requested size is at least the huge page size.
+ *
+ * NOTE: MTRRs can override PAT memory types with a 4KB granularity.
+ * Therefore, the mapping code falls back to use a smaller page toward 4KB
+ * when a mapping range is covered by non-WB type of MTRRs.
  *
  * NOTE! We need to allow non-page-aligned mappings too: we will obviously
  * have to convert them into an offset in a page-aligned mapping, but the
@@ -326,6 +331,20 @@ void iounmap(volatile void __iomem *addr)
 }
 EXPORT_SYMBOL(iounmap);
 
+int arch_ioremap_pud_supported(void)
+{
+#ifdef CONFIG_X86_64
+	return cpu_has_gbpages;
+#else
+	return 0;
+#endif
+}
+
+int arch_ioremap_pmd_supported(void)
+{
+	return cpu_has_pse;
+}
+
 /*
  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
  * access
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
deleted file mode 100644
index 1e9da795767a..000000000000
--- a/arch/x86/mm/memtest.c
+++ /dev/null
@@ -1,118 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/init.h>
-#include <linux/pfn.h>
-#include <linux/memblock.h>
-
-static u64 patterns[] __initdata = {
-	/* The first entry has to be 0 to leave memtest with zeroed memory */
-	0,
-	0xffffffffffffffffULL,
-	0x5555555555555555ULL,
-	0xaaaaaaaaaaaaaaaaULL,
-	0x1111111111111111ULL,
-	0x2222222222222222ULL,
-	0x4444444444444444ULL,
-	0x8888888888888888ULL,
-	0x3333333333333333ULL,
-	0x6666666666666666ULL,
-	0x9999999999999999ULL,
-	0xccccccccccccccccULL,
-	0x7777777777777777ULL,
-	0xbbbbbbbbbbbbbbbbULL,
-	0xddddddddddddddddULL,
-	0xeeeeeeeeeeeeeeeeULL,
-	0x7a6c7258554e494cULL, /* yeah ;-) */
-};
-
-static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad)
-{
-	printk(KERN_INFO "  %016llx bad mem addr %010llx - %010llx reserved\n",
-	       (unsigned long long) pattern,
-	       (unsigned long long) start_bad,
-	       (unsigned long long) end_bad);
-	memblock_reserve(start_bad, end_bad - start_bad);
-}
-
-static void __init memtest(u64 pattern, u64 start_phys, u64 size)
-{
-	u64 *p, *start, *end;
-	u64 start_bad, last_bad;
-	u64 start_phys_aligned;
-	const size_t incr = sizeof(pattern);
-
-	start_phys_aligned = ALIGN(start_phys, incr);
-	start = __va(start_phys_aligned);
-	end = start + (size - (start_phys_aligned - start_phys)) / incr;
-	start_bad = 0;
-	last_bad = 0;
-
-	for (p = start; p < end; p++)
-		*p = pattern;
-
-	for (p = start; p < end; p++, start_phys_aligned += incr) {
-		if (*p == pattern)
-			continue;
-		if (start_phys_aligned == last_bad + incr) {
-			last_bad += incr;
-			continue;
-		}
-		if (start_bad)
-			reserve_bad_mem(pattern, start_bad, last_bad + incr);
-		start_bad = last_bad = start_phys_aligned;
-	}
-	if (start_bad)
-		reserve_bad_mem(pattern, start_bad, last_bad + incr);
-}
-
-static void __init do_one_pass(u64 pattern, u64 start, u64 end)
-{
-	u64 i;
-	phys_addr_t this_start, this_end;
-
-	for_each_free_mem_range(i, NUMA_NO_NODE, &this_start, &this_end, NULL) {
-		this_start = clamp_t(phys_addr_t, this_start, start, end);
-		this_end = clamp_t(phys_addr_t, this_end, start, end);
-		if (this_start < this_end) {
-			printk(KERN_INFO "  %010llx - %010llx pattern %016llx\n",
-			       (unsigned long long)this_start,
-			       (unsigned long long)this_end,
-			       (unsigned long long)cpu_to_be64(pattern));
-			memtest(pattern, this_start, this_end - this_start);
-		}
-	}
-}
-
-/* default is disabled */
-static int memtest_pattern __initdata;
-
-static int __init parse_memtest(char *arg)
-{
-	if (arg)
-		memtest_pattern = simple_strtoul(arg, NULL, 0);
-	else
-		memtest_pattern = ARRAY_SIZE(patterns);
-
-	return 0;
-}
-
-early_param("memtest", parse_memtest);
-
-void __init early_memtest(unsigned long start, unsigned long end)
-{
-	unsigned int i;
-	unsigned int idx = 0;
-
-	if (!memtest_pattern)
-		return;
-
-	printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern);
-	for (i = memtest_pattern-1; i < UINT_MAX; --i) {
-		idx = i % ARRAY_SIZE(patterns);
-		do_one_pass(patterns[idx], start, end);
-	}
-}
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index df4552bd239e..9d518d693b4b 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -65,24 +65,23 @@ static int mmap_is_legacy(void)
 	return sysctl_legacy_va_layout;
 }
 
-static unsigned long mmap_rnd(void)
+unsigned long arch_mmap_rnd(void)
 {
-	unsigned long rnd = 0;
+	unsigned long rnd;
 
 	/*
-	*  8 bits of randomness in 32bit mmaps, 20 address space bits
-	* 28 bits of randomness in 64bit mmaps, 40 address space bits
-	*/
-	if (current->flags & PF_RANDOMIZE) {
-		if (mmap_is_ia32())
-			rnd = get_random_int() % (1<<8);
-		else
-			rnd = get_random_int() % (1<<28);
-	}
+	 *  8 bits of randomness in 32bit mmaps, 20 address space bits
+	 * 28 bits of randomness in 64bit mmaps, 40 address space bits
+	 */
+	if (mmap_is_ia32())
+		rnd = (unsigned long)get_random_int() % (1<<8);
+	else
+		rnd = (unsigned long)get_random_int() % (1<<28);
+
 	return rnd << PAGE_SHIFT;
 }
 
-static unsigned long mmap_base(void)
+static unsigned long mmap_base(unsigned long rnd)
 {
 	unsigned long gap = rlimit(RLIMIT_STACK);
 
@@ -91,19 +90,19 @@ static unsigned long mmap_base(void)
 	else if (gap > MAX_GAP)
 		gap = MAX_GAP;
 
-	return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd());
+	return PAGE_ALIGN(TASK_SIZE - gap - rnd);
 }
 
 /*
  * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64
  * does, but not when emulating X86_32
  */
-static unsigned long mmap_legacy_base(void)
+static unsigned long mmap_legacy_base(unsigned long rnd)
 {
 	if (mmap_is_ia32())
 		return TASK_UNMAPPED_BASE;
 	else
-		return TASK_UNMAPPED_BASE + mmap_rnd();
+		return TASK_UNMAPPED_BASE + rnd;
 }
 
 /*
@@ -112,13 +111,18 @@ static unsigned long mmap_legacy_base(void)
  */
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
-	mm->mmap_legacy_base = mmap_legacy_base();
-	mm->mmap_base = mmap_base();
+	unsigned long random_factor = 0UL;
+
+	if (current->flags & PF_RANDOMIZE)
+		random_factor = arch_mmap_rnd();
+
+	mm->mmap_legacy_base = mmap_legacy_base(random_factor);
 
 	if (mmap_is_legacy()) {
 		mm->mmap_base = mm->mmap_legacy_base;
 		mm->get_unmapped_area = arch_get_unmapped_area;
 	} else {
+		mm->mmap_base = mmap_base(random_factor);
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 	}
 }
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index cd4785bbacb9..4053bb58bf92 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -482,9 +482,16 @@ static void __init numa_clear_kernel_node_hotplug(void)
 				  &memblock.reserved, mb->nid);
 	}
 
-	/* Mark all kernel nodes. */
+	/*
+	 * Mark all kernel nodes.
+	 *
+	 * When booting with mem=nn[kMG] or in a kdump kernel, numa_meminfo
+	 * may not include all the memblock.reserved memory ranges because
+	 * trim_snb_memory() reserves specific pages for Sandy Bridge graphics.
+	 */
 	for_each_memblock(reserved, r)
-		node_set(r->nid, numa_kernel_nodes);
+		if (r->nid != MAX_NUMNODES)
+			node_set(r->nid, numa_kernel_nodes);
 
 	/* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */
 	for (i = 0; i < numa_meminfo.nr_blks; i++) {
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 536ea2fb6e33..89af288ec674 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -81,11 +81,9 @@ void arch_report_meminfo(struct seq_file *m)
 	seq_printf(m, "DirectMap4M:    %8lu kB\n",
 			direct_pages_count[PG_LEVEL_2M] << 12);
 #endif
-#ifdef CONFIG_X86_64
 	if (direct_gbpages)
 		seq_printf(m, "DirectMap1G:    %8lu kB\n",
 			direct_pages_count[PG_LEVEL_1G] << 20);
-#endif
 }
 #else
 static inline void split_page_count(int level) { }
@@ -1654,13 +1652,11 @@ int set_memory_ro(unsigned long addr, int numpages)
 {
 	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
 }
-EXPORT_SYMBOL_GPL(set_memory_ro);
 
 int set_memory_rw(unsigned long addr, int numpages)
 {
 	return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
 }
-EXPORT_SYMBOL_GPL(set_memory_rw);
 
 int set_memory_np(unsigned long addr, int numpages)
 {
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 7ac68698406c..35af6771a95a 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -610,7 +610,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 }
 
 #ifdef CONFIG_STRICT_DEVMEM
-/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/
+/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM */
 static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 {
 	return 1;
@@ -628,8 +628,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 
 	while (cursor < to) {
 		if (!devmem_is_allowed(pfn)) {
-			printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx]\n",
-				current->comm, from, to - 1);
+			printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
+			       current->comm, from, to - 1);
 			return 0;
 		}
 		cursor += PAGE_SIZE;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 7b22adaad4f1..0b97d2c75df3 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -4,6 +4,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlb.h>
 #include <asm/fixmap.h>
+#include <asm/mtrr.h>
 
 #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
 
@@ -58,7 +59,7 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
 	tlb_remove_page(tlb, pte);
 }
 
-#if PAGETABLE_LEVELS > 2
+#if CONFIG_PGTABLE_LEVELS > 2
 void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
 {
 	struct page *page = virt_to_page(pmd);
@@ -74,14 +75,14 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
 	tlb_remove_page(tlb, page);
 }
 
-#if PAGETABLE_LEVELS > 3
+#if CONFIG_PGTABLE_LEVELS > 3
 void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
 {
 	paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
 	tlb_remove_page(tlb, virt_to_page(pud));
 }
-#endif	/* PAGETABLE_LEVELS > 3 */
-#endif	/* PAGETABLE_LEVELS > 2 */
+#endif	/* CONFIG_PGTABLE_LEVELS > 3 */
+#endif	/* CONFIG_PGTABLE_LEVELS > 2 */
 
 static inline void pgd_list_add(pgd_t *pgd)
 {
@@ -117,9 +118,9 @@ static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
 	/* If the pgd points to a shared pagetable level (either the
 	   ptes in non-PAE, or shared PMD in PAE), then just copy the
 	   references from swapper_pg_dir. */
-	if (PAGETABLE_LEVELS == 2 ||
-	    (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
-	    PAGETABLE_LEVELS == 4) {
+	if (CONFIG_PGTABLE_LEVELS == 2 ||
+	    (CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
+	    CONFIG_PGTABLE_LEVELS == 4) {
 		clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
 				swapper_pg_dir + KERNEL_PGD_BOUNDARY,
 				KERNEL_PGD_PTRS);
@@ -275,12 +276,87 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
 	}
 }
 
+/*
+ * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also
+ * assumes that pgd should be in one page.
+ *
+ * But kernel with PAE paging that is not running as a Xen domain
+ * only needs to allocate 32 bytes for pgd instead of one page.
+ */
+#ifdef CONFIG_X86_PAE
+
+#include <linux/slab.h>
+
+#define PGD_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
+#define PGD_ALIGN	32
+
+static struct kmem_cache *pgd_cache;
+
+static int __init pgd_cache_init(void)
+{
+	/*
+	 * When PAE kernel is running as a Xen domain, it does not use
+	 * shared kernel pmd. And this requires a whole page for pgd.
+	 */
+	if (!SHARED_KERNEL_PMD)
+		return 0;
+
+	/*
+	 * when PAE kernel is not running as a Xen domain, it uses
+	 * shared kernel pmd. Shared kernel pmd does not require a whole
+	 * page for pgd. We are able to just allocate a 32-byte for pgd.
+	 * During boot time, we create a 32-byte slab for pgd table allocation.
+	 */
+	pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
+				      SLAB_PANIC, NULL);
+	if (!pgd_cache)
+		return -ENOMEM;
+
+	return 0;
+}
+core_initcall(pgd_cache_init);
+
+static inline pgd_t *_pgd_alloc(void)
+{
+	/*
+	 * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain.
+	 * We allocate one page for pgd.
+	 */
+	if (!SHARED_KERNEL_PMD)
+		return (pgd_t *)__get_free_page(PGALLOC_GFP);
+
+	/*
+	 * Now PAE kernel is not running as a Xen domain. We can allocate
+	 * a 32-byte slab for pgd to save memory space.
+	 */
+	return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
+}
+
+static inline void _pgd_free(pgd_t *pgd)
+{
+	if (!SHARED_KERNEL_PMD)
+		free_page((unsigned long)pgd);
+	else
+		kmem_cache_free(pgd_cache, pgd);
+}
+#else
+static inline pgd_t *_pgd_alloc(void)
+{
+	return (pgd_t *)__get_free_page(PGALLOC_GFP);
+}
+
+static inline void _pgd_free(pgd_t *pgd)
+{
+	free_page((unsigned long)pgd);
+}
+#endif /* CONFIG_X86_PAE */
+
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	pgd_t *pgd;
 	pmd_t *pmds[PREALLOCATED_PMDS];
 
-	pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
+	pgd = _pgd_alloc();
 
 	if (pgd == NULL)
 		goto out;
@@ -310,7 +386,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 out_free_pmds:
 	free_pmds(mm, pmds);
 out_free_pgd:
-	free_page((unsigned long)pgd);
+	_pgd_free(pgd);
 out:
 	return NULL;
 }
@@ -320,7 +396,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	pgd_mop_up_pmds(mm, pgd);
 	pgd_dtor(pgd);
 	paravirt_pgd_free(mm, pgd);
-	free_page((unsigned long)pgd);
+	_pgd_free(pgd);
 }
 
 /*
@@ -485,3 +561,67 @@ void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys,
 {
 	__native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags));
 }
+
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
+{
+	u8 mtrr;
+
+	/*
+	 * Do not use a huge page when the range is covered by non-WB type
+	 * of MTRRs.
+	 */
+	mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE);
+	if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF))
+		return 0;
+
+	prot = pgprot_4k_2_large(prot);
+
+	set_pte((pte_t *)pud, pfn_pte(
+		(u64)addr >> PAGE_SHIFT,
+		__pgprot(pgprot_val(prot) | _PAGE_PSE)));
+
+	return 1;
+}
+
+int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
+{
+	u8 mtrr;
+
+	/*
+	 * Do not use a huge page when the range is covered by non-WB type
+	 * of MTRRs.
+	 */
+	mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE);
+	if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF))
+		return 0;
+
+	prot = pgprot_4k_2_large(prot);
+
+	set_pte((pte_t *)pmd, pfn_pte(
+		(u64)addr >> PAGE_SHIFT,
+		__pgprot(pgprot_val(prot) | _PAGE_PSE)));
+
+	return 1;
+}
+
+int pud_clear_huge(pud_t *pud)
+{
+	if (pud_large(*pud)) {
+		pud_clear(pud);
+		return 1;
+	}
+
+	return 0;
+}
+
+int pmd_clear_huge(pmd_t *pmd)
+{
+	if (pmd_large(*pmd)) {
+		pmd_clear(pmd);
+		return 1;
+	}
+
+	return 0;
+}
+#endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 5d04be5efb64..4e664bdb535a 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -111,7 +111,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
 {
 	struct stack_frame *head = (struct stack_frame *)frame_pointer(regs);
 
-	if (!user_mode_vm(regs)) {
+	if (!user_mode(regs)) {
 		unsigned long stack = kernel_stack_pointer(regs);
 		if (depth)
 			dump_trace(NULL, regs, (unsigned long *)stack, 0,
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 3d2612b68694..8fd6f44aee83 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -490,7 +490,9 @@ void pcibios_scan_root(int busnum)
 	if (!bus) {
 		pci_free_resource_list(&resources);
 		kfree(sd);
+		return;
 	}
+	pci_bus_add_devices(bus);
 }
 
 void __init pcibios_set_cache_line_size(void)
@@ -513,31 +515,6 @@ void __init pcibios_set_cache_line_size(void)
 	}
 }
 
-/*
- * Some device drivers assume dev->irq won't change after calling
- * pci_disable_device(). So delay releasing of IRQ resource to driver
- * unbinding time. Otherwise it will break PM subsystem and drivers
- * like xen-pciback etc.
- */
-static int pci_irq_notifier(struct notifier_block *nb, unsigned long action,
-			    void *data)
-{
-	struct pci_dev *dev = to_pci_dev(data);
-
-	if (action != BUS_NOTIFY_UNBOUND_DRIVER)
-		return NOTIFY_DONE;
-
-	if (pcibios_disable_irq)
-		pcibios_disable_irq(dev);
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block pci_irq_nb = {
-	.notifier_call = pci_irq_notifier,
-	.priority = INT_MIN,
-};
-
 int __init pcibios_init(void)
 {
 	if (!raw_pci_ops) {
@@ -550,9 +527,6 @@ int __init pcibios_init(void)
 
 	if (pci_bf_sort >= pci_force_bf)
 		pci_sort_breadthfirst();
-
-	bus_register_notifier(&pci_bus_type, &pci_irq_nb);
-
 	return 0;
 }
 
@@ -711,6 +685,12 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
 	return 0;
 }
 
+void pcibios_disable_device (struct pci_dev *dev)
+{
+	if (!pci_dev_msi_enabled(dev) && pcibios_disable_irq)
+		pcibios_disable_irq(dev);
+}
+
 int pci_ext_cfg_avail(void)
 {
 	if (raw_pci_ext_ops)
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index efb849323c74..852aa4c92da0 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -234,10 +234,10 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 
 static void intel_mid_pci_irq_disable(struct pci_dev *dev)
 {
-	if (dev->irq_managed && dev->irq > 0) {
+	if (!mp_should_keep_irq(&dev->dev) && dev->irq_managed &&
+	    dev->irq > 0) {
 		mp_unmap_irq(dev->irq);
 		dev->irq_managed = 0;
-		dev->irq = 0;
 	}
 }
 
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index e71b3dbd87b8..5dc6ca5e1741 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -1256,9 +1256,22 @@ static int pirq_enable_irq(struct pci_dev *dev)
 	return 0;
 }
 
+bool mp_should_keep_irq(struct device *dev)
+{
+	if (dev->power.is_prepared)
+		return true;
+#ifdef CONFIG_PM
+	if (dev->power.runtime_status == RPM_SUSPENDING)
+		return true;
+#endif
+
+	return false;
+}
+
 static void pirq_disable_irq(struct pci_dev *dev)
 {
-	if (io_apic_assign_pci_irqs && dev->irq_managed && dev->irq) {
+	if (io_apic_assign_pci_irqs && !mp_should_keep_irq(&dev->dev) &&
+	    dev->irq_managed && dev->irq) {
 		mp_unmap_irq(dev->irq);
 		dev->irq = 0;
 		dev->irq_managed = 0;
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index d143d216d52b..d7f997f7c26d 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -67,7 +67,7 @@ void __init efi_bgrt_init(void)
 
 	image = efi_lookup_mapped_addr(bgrt_tab->image_address);
 	if (!image) {
-		image = early_memremap(bgrt_tab->image_address,
+		image = early_ioremap(bgrt_tab->image_address,
 				       sizeof(bmp_header));
 		ioremapped = true;
 		if (!image) {
@@ -89,7 +89,7 @@ void __init efi_bgrt_init(void)
 	}
 
 	if (ioremapped) {
-		image = early_memremap(bgrt_tab->image_address,
+		image = early_ioremap(bgrt_tab->image_address,
 				       bmp_header.size);
 		if (!image) {
 			pr_err("Ignoring BGRT: failed to map image memory\n");
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index dbc8627a5cdf..02744df576d5 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -85,12 +85,20 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
 	efi_memory_desc_t *virtual_map)
 {
 	efi_status_t status;
+	unsigned long flags;
+	pgd_t *save_pgd;
 
-	efi_call_phys_prolog();
+	save_pgd = efi_call_phys_prolog();
+
+	/* Disable interrupts around EFI calls: */
+	local_irq_save(flags);
 	status = efi_call_phys(efi_phys.set_virtual_address_map,
 			       memory_map_size, descriptor_size,
 			       descriptor_version, virtual_map);
-	efi_call_phys_epilog();
+	local_irq_restore(flags);
+
+	efi_call_phys_epilog(save_pgd);
+
 	return status;
 }
 
@@ -491,7 +499,8 @@ void __init efi_init(void)
 	if (efi_memmap_init())
 		return;
 
-	print_efi_memmap();
+	if (efi_enabled(EFI_DBG))
+		print_efi_memmap();
 }
 
 void __init efi_late_init(void)
@@ -939,6 +948,8 @@ static int __init arch_parse_efi_cmdline(char *str)
 {
 	if (parse_option_str(str, "old_map"))
 		set_bit(EFI_OLD_MEMMAP, &efi.flags);
+	if (parse_option_str(str, "debug"))
+		set_bit(EFI_DBG, &efi.flags);
 
 	return 0;
 }
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 40e7cda52936..ed5b67338294 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -33,11 +33,10 @@
 
 /*
  * To make EFI call EFI runtime service in physical addressing mode we need
- * prolog/epilog before/after the invocation to disable interrupt, to
- * claim EFI runtime service handler exclusively and to duplicate a memory in
- * low memory space say 0 - 3G.
+ * prolog/epilog before/after the invocation to claim the EFI runtime service
+ * handler exclusively and to duplicate a memory mapping in low memory space,
+ * say 0 - 3G.
  */
-static unsigned long efi_rt_eflags;
 
 void efi_sync_low_kernel_mappings(void) {}
 void __init efi_dump_pagetable(void) {}
@@ -57,21 +56,24 @@ void __init efi_map_region(efi_memory_desc_t *md)
 void __init efi_map_region_fixed(efi_memory_desc_t *md) {}
 void __init parse_efi_setup(u64 phys_addr, u32 data_len) {}
 
-void __init efi_call_phys_prolog(void)
+pgd_t * __init efi_call_phys_prolog(void)
 {
 	struct desc_ptr gdt_descr;
+	pgd_t *save_pgd;
 
-	local_irq_save(efi_rt_eflags);
-
+	/* Current pgd is swapper_pg_dir, we'll restore it later: */
+	save_pgd = swapper_pg_dir;
 	load_cr3(initial_page_table);
 	__flush_tlb_all();
 
 	gdt_descr.address = __pa(get_cpu_gdt_table(0));
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
+
+	return save_pgd;
 }
 
-void __init efi_call_phys_epilog(void)
+void __init efi_call_phys_epilog(pgd_t *save_pgd)
 {
 	struct desc_ptr gdt_descr;
 
@@ -79,10 +81,8 @@ void __init efi_call_phys_epilog(void)
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 
-	load_cr3(swapper_pg_dir);
+	load_cr3(save_pgd);
 	__flush_tlb_all();
-
-	local_irq_restore(efi_rt_eflags);
 }
 
 void __init efi_runtime_mkexec(void)
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 17e80d829df0..a0ac0f9c307f 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -41,9 +41,6 @@
 #include <asm/realmode.h>
 #include <asm/time.h>
 
-static pgd_t *save_pgd __initdata;
-static unsigned long efi_flags __initdata;
-
 /*
  * We allocate runtime services regions bottom-up, starting from -4G, i.e.
  * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G.
@@ -78,17 +75,18 @@ static void __init early_code_mapping_set_exec(int executable)
 	}
 }
 
-void __init efi_call_phys_prolog(void)
+pgd_t * __init efi_call_phys_prolog(void)
 {
 	unsigned long vaddress;
+	pgd_t *save_pgd;
+
 	int pgd;
 	int n_pgds;
 
 	if (!efi_enabled(EFI_OLD_MEMMAP))
-		return;
+		return NULL;
 
 	early_code_mapping_set_exec(1);
-	local_irq_save(efi_flags);
 
 	n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
 	save_pgd = kmalloc(n_pgds * sizeof(pgd_t), GFP_KERNEL);
@@ -99,24 +97,29 @@ void __init efi_call_phys_prolog(void)
 		set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
 	}
 	__flush_tlb_all();
+
+	return save_pgd;
 }
 
-void __init efi_call_phys_epilog(void)
+void __init efi_call_phys_epilog(pgd_t *save_pgd)
 {
 	/*
 	 * After the lock is released, the original page table is restored.
 	 */
-	int pgd;
-	int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
+	int pgd_idx;
+	int nr_pgds;
 
-	if (!efi_enabled(EFI_OLD_MEMMAP))
+	if (!save_pgd)
 		return;
 
-	for (pgd = 0; pgd < n_pgds; pgd++)
-		set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]);
+	nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
+
+	for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++)
+		set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
+
 	kfree(save_pgd);
+
 	__flush_tlb_all();
-	local_irq_restore(efi_flags);
 	early_code_mapping_set_exec(0);
 }
 
diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile
index 0a8ee703b9fa..0ce1b1913673 100644
--- a/arch/x86/platform/intel-mid/Makefile
+++ b/arch/x86/platform/intel-mid/Makefile
@@ -1,5 +1,4 @@
 obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfl.o
-obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_intel_mid.o
 
 # SFI specific code
 ifdef CONFIG_X86_INTEL_MID
diff --git a/arch/x86/platform/intel-mid/early_printk_intel_mid.c b/arch/x86/platform/intel-mid/early_printk_intel_mid.c
deleted file mode 100644
index 4e720829ab90..000000000000
--- a/arch/x86/platform/intel-mid/early_printk_intel_mid.c
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * early_printk_intel_mid.c - early consoles for Intel MID platforms
- *
- * Copyright (c) 2008-2010, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-
-/*
- * This file implements early console named hsu.
- * hsu is based on a High Speed UART device which only exists in the Medfield
- * platform
- */
-
-#include <linux/serial_reg.h>
-#include <linux/serial_mfd.h>
-#include <linux/console.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-
-#include <asm/fixmap.h>
-#include <asm/pgtable.h>
-#include <asm/intel-mid.h>
-
-/*
- * Following is the early console based on Medfield HSU (High
- * Speed UART) device.
- */
-#define HSU_PORT_BASE		0xffa28080
-
-static void __iomem *phsu;
-
-void hsu_early_console_init(const char *s)
-{
-	unsigned long paddr, port = 0;
-	u8 lcr;
-
-	/*
-	 * Select the early HSU console port if specified by user in the
-	 * kernel command line.
-	 */
-	if (*s && !kstrtoul(s, 10, &port))
-		port = clamp_val(port, 0, 2);
-
-	paddr = HSU_PORT_BASE + port * 0x80;
-	phsu = (void __iomem *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, paddr);
-
-	/* Disable FIFO */
-	writeb(0x0, phsu + UART_FCR);
-
-	/* Set to default 115200 bps, 8n1 */
-	lcr = readb(phsu + UART_LCR);
-	writeb((0x80 | lcr), phsu + UART_LCR);
-	writeb(0x18, phsu + UART_DLL);
-	writeb(lcr,  phsu + UART_LCR);
-	writel(0x3600, phsu + UART_MUL*4);
-
-	writeb(0x8, phsu + UART_MCR);
-	writeb(0x7, phsu + UART_FCR);
-	writeb(0x3, phsu + UART_LCR);
-
-	/* Clear IRQ status */
-	readb(phsu + UART_LSR);
-	readb(phsu + UART_RX);
-	readb(phsu + UART_IIR);
-	readb(phsu + UART_MSR);
-
-	/* Enable FIFO */
-	writeb(0x7, phsu + UART_FCR);
-}
-
-#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
-
-static void early_hsu_putc(char ch)
-{
-	unsigned int timeout = 10000; /* 10ms */
-	u8 status;
-
-	while (--timeout) {
-		status = readb(phsu + UART_LSR);
-		if (status & BOTH_EMPTY)
-			break;
-		udelay(1);
-	}
-
-	/* Only write the char when there was no timeout */
-	if (timeout)
-		writeb(ch, phsu + UART_TX);
-}
-
-static void early_hsu_write(struct console *con, const char *str, unsigned n)
-{
-	int i;
-
-	for (i = 0; i < n && *str; i++) {
-		if (*str == '\n')
-			early_hsu_putc('\r');
-		early_hsu_putc(*str);
-		str++;
-	}
-}
-
-struct console early_hsu_console = {
-	.name =		"earlyhsu",
-	.write =	early_hsu_write,
-	.flags =	CON_PRINTBUFFER,
-	.index =	-1,
-};
diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c
index c9a0838890e2..278e4da4222f 100644
--- a/arch/x86/platform/intel-quark/imr_selftest.c
+++ b/arch/x86/platform/intel-quark/imr_selftest.c
@@ -11,6 +11,7 @@
  */
 
 #include <asm-generic/sections.h>
+#include <asm/cpu_device_id.h>
 #include <asm/imr.h>
 #include <linux/init.h>
 #include <linux/mm.h>
@@ -101,6 +102,12 @@ static void __init imr_self_test(void)
 	}
 }
 
+static const struct x86_cpu_id imr_ids[] __initconst = {
+	{ X86_VENDOR_INTEL, 5, 9 },	/* Intel Quark SoC X1000. */
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, imr_ids);
+
 /**
  * imr_self_test_init - entry point for IMR driver.
  *
@@ -108,7 +115,8 @@ static void __init imr_self_test(void)
  */
 static int __init imr_self_test_init(void)
 {
-	imr_self_test();
+	if (x86_match_cpu(imr_ids))
+		imr_self_test();
 	return 0;
 }
 
diff --git a/arch/x86/platform/olpc/olpc-xo1-sci.c b/arch/x86/platform/olpc/olpc-xo1-sci.c
index 9a2e590dd202..7fa8b3b53bc0 100644
--- a/arch/x86/platform/olpc/olpc-xo1-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo1-sci.c
@@ -61,7 +61,7 @@ static void battery_status_changed(void)
 
 	if (psy) {
 		power_supply_changed(psy);
-		put_device(psy->dev);
+		power_supply_put(psy);
 	}
 }
 
@@ -71,7 +71,7 @@ static void ac_status_changed(void)
 
 	if (psy) {
 		power_supply_changed(psy);
-		put_device(psy->dev);
+		power_supply_put(psy);
 	}
 }
 
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index 08e350e757dc..55130846ac87 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -83,7 +83,7 @@ static void battery_status_changed(void)
 
 	if (psy) {
 		power_supply_changed(psy);
-		put_device(psy->dev);
+		power_supply_put(psy);
 	}
 }
 
@@ -93,7 +93,7 @@ static void ac_status_changed(void)
 
 	if (psy) {
 		power_supply_changed(psy);
-		put_device(psy->dev);
+		power_supply_put(psy);
 	}
 }
 
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 994798548b1a..3b6ec42718e4 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -415,7 +415,7 @@ static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp)
 	struct reset_args reset_args;
 
 	reset_args.sender = sender;
-	cpus_clear(*mask);
+	cpumask_clear(mask);
 	/* find a single cpu for each uvhub in this distribution mask */
 	maskbits = sizeof(struct pnmask) * BITSPERBYTE;
 	/* each bit is a pnode relative to the partition base pnode */
@@ -425,7 +425,7 @@ static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp)
 			continue;
 		apnode = pnode + bcp->partition_base_pnode;
 		cpu = pnode_to_first_cpu(apnode, smaster);
-		cpu_set(cpu, *mask);
+		cpumask_set_cpu(cpu, mask);
 	}
 
 	/* IPI all cpus; preemption is already disabled */
@@ -1126,7 +1126,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 	/* don't actually do a shootdown of the local cpu */
 	cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
 
-	if (cpu_isset(cpu, *cpumask))
+	if (cpumask_test_cpu(cpu, cpumask))
 		stat->s_ntargself++;
 
 	bau_desc = bcp->descriptor_base;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 3e32ed5648a0..757678fb26e1 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -134,7 +134,7 @@ static void do_fpu_end(void)
 static void fix_processor_context(void)
 {
 	int cpu = smp_processor_id();
-	struct tss_struct *t = &per_cpu(init_tss, cpu);
+	struct tss_struct *t = &per_cpu(cpu_tss, cpu);
 #ifdef CONFIG_X86_64
 	struct desc_struct *desc = get_cpu_gdt_table(cpu);
 	tss_desc tss;
diff --git a/arch/x86/syscalls/Makefile b/arch/x86/syscalls/Makefile
index 3323c2745248..a55abb9f6c5e 100644
--- a/arch/x86/syscalls/Makefile
+++ b/arch/x86/syscalls/Makefile
@@ -19,6 +19,9 @@ quiet_cmd_syshdr = SYSHDR  $@
 quiet_cmd_systbl = SYSTBL  $@
       cmd_systbl = $(CONFIG_SHELL) '$(systbl)' $< $@
 
+quiet_cmd_hypercalls = HYPERCALLS $@
+      cmd_hypercalls = $(CONFIG_SHELL) '$<' $@ $(filter-out $<,$^)
+
 syshdr_abi_unistd_32 := i386
 $(uapi)/unistd_32.h: $(syscall32) $(syshdr)
 	$(call if_changed,syshdr)
@@ -47,10 +50,16 @@ $(out)/syscalls_32.h: $(syscall32) $(systbl)
 $(out)/syscalls_64.h: $(syscall64) $(systbl)
 	$(call if_changed,systbl)
 
+$(out)/xen-hypercalls.h: $(srctree)/scripts/xen-hypercalls.sh
+	$(call if_changed,hypercalls)
+
+$(out)/xen-hypercalls.h: $(srctree)/include/xen/interface/xen*.h
+
 uapisyshdr-y			+= unistd_32.h unistd_64.h unistd_x32.h
 syshdr-y			+= syscalls_32.h
 syshdr-$(CONFIG_X86_64)		+= unistd_32_ia32.h unistd_64_x32.h
 syshdr-$(CONFIG_X86_64)		+= syscalls_64.h
+syshdr-$(CONFIG_XEN)		+= xen-hypercalls.h
 
 targets	+= $(uapisyshdr-y) $(syshdr-y)
 
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index b3560ece1c9f..ef8187f9d28d 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -119,7 +119,7 @@
 110	i386	iopl			sys_iopl
 111	i386	vhangup			sys_vhangup
 112	i386	idle
-113	i386	vm86old			sys_vm86old			sys32_vm86_warning
+113	i386	vm86old			sys_vm86old			sys_ni_syscall
 114	i386	wait4			sys_wait4			compat_sys_wait4
 115	i386	swapoff			sys_swapoff
 116	i386	sysinfo			sys_sysinfo			compat_sys_sysinfo
@@ -172,7 +172,7 @@
 163	i386	mremap			sys_mremap
 164	i386	setresuid		sys_setresuid16
 165	i386	getresuid		sys_getresuid16
-166	i386	vm86			sys_vm86			sys32_vm86_warning
+166	i386	vm86			sys_vm86			sys_ni_syscall
 167	i386	query_module
 168	i386	poll			sys_poll
 169	i386	nfsservctl
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 8d656fbb57aa..9ef32d5f1b19 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -178,7 +178,7 @@
 169	common	reboot			sys_reboot
 170	common	sethostname		sys_sethostname
 171	common	setdomainname		sys_setdomainname
-172	common	iopl			stub_iopl
+172	common	iopl			sys_iopl
 173	common	ioperm			sys_ioperm
 174	64	create_module
 175	common	init_module		sys_init_module
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index eafa324eb7a5..acb384d24669 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -21,7 +21,6 @@ obj-$(CONFIG_BINFMT_ELF) += elfcore.o
 
 subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
 subarch-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += ../lib/rwsem.o
-subarch-$(CONFIG_HIGHMEM) += ../mm/highmem_32.o
 
 else
 
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index 2d7d9a1f5b53..7e8a1a650435 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -36,22 +36,11 @@
 #endif /* CONFIG_X86_PPRO_FENCE */
 #define dma_wmb()	barrier()
 
-#ifdef CONFIG_SMP
-
-#define smp_mb()	mb()
-#define smp_rmb()	dma_rmb()
-#define smp_wmb()	barrier()
-#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
-
-#else /* CONFIG_SMP */
-
 #define smp_mb()	barrier()
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
 #define set_mb(var, value) do { var = value; barrier(); } while (0)
 
-#endif /* CONFIG_SMP */
-
 #define read_barrier_depends()		do { } while (0)
 #define smp_read_barrier_depends()	do { } while (0)
 
@@ -64,8 +53,8 @@
  */
 static inline void rdtsc_barrier(void)
 {
-	alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
-	alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
+	alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
+			  "lfence", X86_FEATURE_LFENCE_RDTSC);
 }
 
 #endif
diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h
index 25a1022dd793..0a656b727b1a 100644
--- a/arch/x86/um/asm/elf.h
+++ b/arch/x86/um/asm/elf.h
@@ -210,7 +210,7 @@ extern int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu);
 
 #define ELF_EXEC_PAGESIZE 4096
 
-#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3)
+#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2)
 
 extern long elf_aux_hwcap;
 #define ELF_HWCAP (elf_aux_hwcap)
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
index 8e08176f0bcb..5c0b711d2433 100644
--- a/arch/x86/um/ldt.c
+++ b/arch/x86/um/ldt.c
@@ -8,9 +8,7 @@
 #include <linux/slab.h>
 #include <asm/unistd.h>
 #include <os.h>
-#include <proc_mm.h>
 #include <skas.h>
-#include <skas_ptrace.h>
 #include <sysdep/tls.h>
 
 extern int modify_ldt(int func, void *ptr, unsigned long bytecount);
@@ -19,105 +17,20 @@ static long write_ldt_entry(struct mm_id *mm_idp, int func,
 		     struct user_desc *desc, void **addr, int done)
 {
 	long res;
-
-	if (proc_mm) {
-		/*
-		 * This is a special handling for the case, that the mm to
-		 * modify isn't current->active_mm.
-		 * If this is called directly by modify_ldt,
-		 *     (current->active_mm->context.skas.u == mm_idp)
-		 * will be true. So no call to __switch_mm(mm_idp) is done.
-		 * If this is called in case of init_new_ldt or PTRACE_LDT,
-		 * mm_idp won't belong to current->active_mm, but child->mm.
-		 * So we need to switch child's mm into our userspace, then
-		 * later switch back.
-		 *
-		 * Note: I'm unsure: should interrupts be disabled here?
-		 */
-		if (!current->active_mm || current->active_mm == &init_mm ||
-		    mm_idp != &current->active_mm->context.id)
-			__switch_mm(mm_idp);
-	}
-
-	if (ptrace_ldt) {
-		struct ptrace_ldt ldt_op = (struct ptrace_ldt) {
-			.func = func,
-			.ptr = desc,
-			.bytecount = sizeof(*desc)};
-		u32 cpu;
-		int pid;
-
-		if (!proc_mm)
-			pid = mm_idp->u.pid;
-		else {
-			cpu = get_cpu();
-			pid = userspace_pid[cpu];
-		}
-
-		res = os_ptrace_ldt(pid, 0, (unsigned long) &ldt_op);
-
-		if (proc_mm)
-			put_cpu();
-	}
-	else {
-		void *stub_addr;
-		res = syscall_stub_data(mm_idp, (unsigned long *)desc,
-					(sizeof(*desc) + sizeof(long) - 1) &
-					    ~(sizeof(long) - 1),
-					addr, &stub_addr);
-		if (!res) {
-			unsigned long args[] = { func,
-						 (unsigned long)stub_addr,
-						 sizeof(*desc),
-						 0, 0, 0 };
-			res = run_syscall_stub(mm_idp, __NR_modify_ldt, args,
-					       0, addr, done);
-		}
+	void *stub_addr;
+	res = syscall_stub_data(mm_idp, (unsigned long *)desc,
+				(sizeof(*desc) + sizeof(long) - 1) &
+				    ~(sizeof(long) - 1),
+				addr, &stub_addr);
+	if (!res) {
+		unsigned long args[] = { func,
+					 (unsigned long)stub_addr,
+					 sizeof(*desc),
+					 0, 0, 0 };
+		res = run_syscall_stub(mm_idp, __NR_modify_ldt, args,
+				       0, addr, done);
 	}
 
-	if (proc_mm) {
-		/*
-		 * This is the second part of special handling, that makes
-		 * PTRACE_LDT possible to implement.
-		 */
-		if (current->active_mm && current->active_mm != &init_mm &&
-		    mm_idp != &current->active_mm->context.id)
-			__switch_mm(&current->active_mm->context.id);
-	}
-
-	return res;
-}
-
-static long read_ldt_from_host(void __user * ptr, unsigned long bytecount)
-{
-	int res, n;
-	struct ptrace_ldt ptrace_ldt = (struct ptrace_ldt) {
-			.func = 0,
-			.bytecount = bytecount,
-			.ptr = kmalloc(bytecount, GFP_KERNEL)};
-	u32 cpu;
-
-	if (ptrace_ldt.ptr == NULL)
-		return -ENOMEM;
-
-	/*
-	 * This is called from sys_modify_ldt only, so userspace_pid gives
-	 * us the right number
-	 */
-
-	cpu = get_cpu();
-	res = os_ptrace_ldt(userspace_pid[cpu], 0, (unsigned long) &ptrace_ldt);
-	put_cpu();
-	if (res < 0)
-		goto out;
-
-	n = copy_to_user(ptr, ptrace_ldt.ptr, res);
-	if (n != 0)
-		res = -EFAULT;
-
-  out:
-	kfree(ptrace_ldt.ptr);
-
 	return res;
 }
 
@@ -145,9 +58,6 @@ static int read_ldt(void __user * ptr, unsigned long bytecount)
 		bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
 	err = bytecount;
 
-	if (ptrace_ldt)
-		return read_ldt_from_host(ptr, bytecount);
-
 	mutex_lock(&ldt->lock);
 	if (ldt->entry_count <= LDT_DIRECT_ENTRIES) {
 		size = LDT_ENTRY_SIZE*LDT_DIRECT_ENTRIES;
@@ -229,17 +139,11 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int func)
 			goto out;
 	}
 
-	if (!ptrace_ldt)
-		mutex_lock(&ldt->lock);
+	mutex_lock(&ldt->lock);
 
 	err = write_ldt_entry(mm_idp, func, &ldt_info, &addr, 1);
 	if (err)
 		goto out_unlock;
-	else if (ptrace_ldt) {
-		/* With PTRACE_LDT available, this is used as a flag only */
-		ldt->entry_count = 1;
-		goto out;
-	}
 
 	if (ldt_info.entry_number >= ldt->entry_count &&
 	    ldt_info.entry_number >= LDT_DIRECT_ENTRIES) {
@@ -393,91 +297,56 @@ long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm)
 	int i;
 	long page, err=0;
 	void *addr = NULL;
-	struct proc_mm_op copy;
 
 
-	if (!ptrace_ldt)
-		mutex_init(&new_mm->arch.ldt.lock);
+	mutex_init(&new_mm->arch.ldt.lock);
 
 	if (!from_mm) {
 		memset(&desc, 0, sizeof(desc));
 		/*
-		 * We have to initialize a clean ldt.
+		 * Now we try to retrieve info about the ldt, we
+		 * inherited from the host. All ldt-entries found
+		 * will be reset in the following loop
 		 */
-		if (proc_mm) {
-			/*
-			 * If the new mm was created using proc_mm, host's
-			 * default-ldt currently is assigned, which normally
-			 * contains the call-gates for lcall7 and lcall27.
-			 * To remove these gates, we simply write an empty
-			 * entry as number 0 to the host.
-			 */
-			err = write_ldt_entry(&new_mm->id, 1, &desc, &addr, 1);
-		}
-		else{
-			/*
-			 * Now we try to retrieve info about the ldt, we
-			 * inherited from the host. All ldt-entries found
-			 * will be reset in the following loop
-			 */
-			ldt_get_host_info();
-			for (num_p=host_ldt_entries; *num_p != -1; num_p++) {
-				desc.entry_number = *num_p;
-				err = write_ldt_entry(&new_mm->id, 1, &desc,
-						      &addr, *(num_p + 1) == -1);
-				if (err)
-					break;
-			}
+		ldt_get_host_info();
+		for (num_p=host_ldt_entries; *num_p != -1; num_p++) {
+			desc.entry_number = *num_p;
+			err = write_ldt_entry(&new_mm->id, 1, &desc,
+					      &addr, *(num_p + 1) == -1);
+			if (err)
+				break;
 		}
 		new_mm->arch.ldt.entry_count = 0;
 
 		goto out;
 	}
 
-	if (proc_mm) {
-		/*
-		 * We have a valid from_mm, so we now have to copy the LDT of
-		 * from_mm to new_mm, because using proc_mm an new mm with
-		 * an empty/default LDT was created in new_mm()
-		 */
-		copy = ((struct proc_mm_op) { .op 	= MM_COPY_SEGMENTS,
-					      .u 	=
-					      { .copy_segments =
-							from_mm->id.u.mm_fd } } );
-		i = os_write_file(new_mm->id.u.mm_fd, &copy, sizeof(copy));
-		if (i != sizeof(copy))
-			printk(KERN_ERR "new_mm : /proc/mm copy_segments "
-			       "failed, err = %d\n", -i);
-	}
-
-	if (!ptrace_ldt) {
-		/*
-		 * Our local LDT is used to supply the data for
-		 * modify_ldt(READLDT), if PTRACE_LDT isn't available,
-		 * i.e., we have to use the stub for modify_ldt, which
-		 * can't handle the big read buffer of up to 64kB.
-		 */
-		mutex_lock(&from_mm->arch.ldt.lock);
-		if (from_mm->arch.ldt.entry_count <= LDT_DIRECT_ENTRIES)
-			memcpy(new_mm->arch.ldt.u.entries, from_mm->arch.ldt.u.entries,
-			       sizeof(new_mm->arch.ldt.u.entries));
-		else {
-			i = from_mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE;
-			while (i-->0) {
-				page = __get_free_page(GFP_KERNEL|__GFP_ZERO);
-				if (!page) {
-					err = -ENOMEM;
-					break;
-				}
-				new_mm->arch.ldt.u.pages[i] =
-					(struct ldt_entry *) page;
-				memcpy(new_mm->arch.ldt.u.pages[i],
-				       from_mm->arch.ldt.u.pages[i], PAGE_SIZE);
+	/*
+	 * Our local LDT is used to supply the data for
+	 * modify_ldt(READLDT), if PTRACE_LDT isn't available,
+	 * i.e., we have to use the stub for modify_ldt, which
+	 * can't handle the big read buffer of up to 64kB.
+	 */
+	mutex_lock(&from_mm->arch.ldt.lock);
+	if (from_mm->arch.ldt.entry_count <= LDT_DIRECT_ENTRIES)
+		memcpy(new_mm->arch.ldt.u.entries, from_mm->arch.ldt.u.entries,
+		       sizeof(new_mm->arch.ldt.u.entries));
+	else {
+		i = from_mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE;
+		while (i-->0) {
+			page = __get_free_page(GFP_KERNEL|__GFP_ZERO);
+			if (!page) {
+				err = -ENOMEM;
+				break;
 			}
+			new_mm->arch.ldt.u.pages[i] =
+				(struct ldt_entry *) page;
+			memcpy(new_mm->arch.ldt.u.pages[i],
+			       from_mm->arch.ldt.u.pages[i], PAGE_SIZE);
 		}
-		new_mm->arch.ldt.entry_count = from_mm->arch.ldt.entry_count;
-		mutex_unlock(&from_mm->arch.ldt.lock);
 	}
+	new_mm->arch.ldt.entry_count = from_mm->arch.ldt.entry_count;
+	mutex_unlock(&from_mm->arch.ldt.lock);
 
     out:
 	return err;
@@ -488,7 +357,7 @@ void free_ldt(struct mm_context *mm)
 {
 	int i;
 
-	if (!ptrace_ldt && mm->arch.ldt.entry_count > LDT_DIRECT_ENTRIES) {
+	if (mm->arch.ldt.entry_count > LDT_DIRECT_ENTRIES) {
 		i = mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE;
 		while (i-- > 0)
 			free_page((long) mm->arch.ldt.u.pages[i]);
diff --git a/arch/x86/um/shared/sysdep/faultinfo_32.h b/arch/x86/um/shared/sysdep/faultinfo_32.h
index a26086b8a800..b6f2437ec29c 100644
--- a/arch/x86/um/shared/sysdep/faultinfo_32.h
+++ b/arch/x86/um/shared/sysdep/faultinfo_32.h
@@ -27,9 +27,6 @@ struct faultinfo {
 /* This is Page Fault */
 #define SEGV_IS_FIXABLE(fi)	((fi)->trap_no == 14)
 
-/* SKAS3 has no trap_no on i386, but get_skas_faultinfo() sets it to 0. */
-#define SEGV_MAYBE_FIXABLE(fi)	((fi)->trap_no == 0 && ptrace_faultinfo)
-
 #define PTRACE_FULL_FAULTINFO 0
 
 #endif
diff --git a/arch/x86/um/shared/sysdep/faultinfo_64.h b/arch/x86/um/shared/sysdep/faultinfo_64.h
index f811cbe15d62..ee88f88974ea 100644
--- a/arch/x86/um/shared/sysdep/faultinfo_64.h
+++ b/arch/x86/um/shared/sysdep/faultinfo_64.h
@@ -27,9 +27,6 @@ struct faultinfo {
 /* This is Page Fault */
 #define SEGV_IS_FIXABLE(fi)	((fi)->trap_no == 14)
 
-/* No broken SKAS API, which doesn't pass trap_no, here. */
-#define SEGV_MAYBE_FIXABLE(fi)	0
-
 #define PTRACE_FULL_FAULTINFO 1
 
 #endif
diff --git a/arch/x86/um/shared/sysdep/skas_ptrace.h b/arch/x86/um/shared/sysdep/skas_ptrace.h
deleted file mode 100644
index 453febe98993..000000000000
--- a/arch/x86/um/shared/sysdep/skas_ptrace.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_X86_SKAS_PTRACE_H
-#define __SYSDEP_X86_SKAS_PTRACE_H
-
-struct ptrace_faultinfo {
-        int is_write;
-        unsigned long addr;
-};
-
-struct ptrace_ldt {
-        int func;
-        void *ptr;
-        unsigned long bytecount;
-};
-
-#define PTRACE_LDT 54
-
-#endif
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index 0c8c32bfd792..592491d1d70d 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -549,13 +549,6 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig,
 	if (err)
 		return err;
 
-	/* Set up registers for signal handler */
-	{
-		struct exec_domain *ed = current_thread_info()->exec_domain;
-		if (unlikely(ed && ed->signal_invmap && sig < 32))
-			sig = ed->signal_invmap[sig];
-	}
-
 	PT_REGS_SP(regs) = (unsigned long) frame;
 	PT_REGS_DI(regs) = sig;
 	/* In case the signal handler was declared without prototypes */
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index 5cdfa9db2217..a75d8700472a 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -16,7 +16,7 @@
  */
 
 /* Not going to be implemented by UML, since we have no hardware. */
-#define stub_iopl sys_ni_syscall
+#define sys_iopl sys_ni_syscall
 #define sys_ioperm sys_ni_syscall
 
 /*
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 7b9be9822724..275a3a8b78af 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -51,7 +51,7 @@ VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
 $(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
 	$(call if_changed,vdso)
 
-HOST_EXTRACFLAGS += -I$(srctree)/tools/include
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi
 hostprogs-y			+= vdso2c
 
 quiet_cmd_vdso2c = VDSO2C  $@
@@ -206,4 +206,4 @@ $(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE
 PHONY += vdso_install $(vdso_img_insttargets)
 vdso_install: $(vdso_img_insttargets) FORCE
 
-clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* vdso64*
+clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* vdso64* vdso-image-*.c vdsox32.so*
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 9793322751e0..40d2473836c9 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -82,18 +82,15 @@ static notrace cycle_t vread_pvclock(int *mode)
 	cycle_t ret;
 	u64 last;
 	u32 version;
+	u32 migrate_count;
 	u8 flags;
 	unsigned cpu, cpu1;
 
 
 	/*
-	 * Note: hypervisor must guarantee that:
-	 * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
-	 * 2. that per-CPU pvclock time info is updated if the
-	 *    underlying CPU changes.
-	 * 3. that version is increased whenever underlying CPU
-	 *    changes.
-	 *
+	 * When looping to get a consistent (time-info, tsc) pair, we
+	 * also need to deal with the possibility we can switch vcpus,
+	 * so make sure we always re-fetch time-info for the current vcpu.
 	 */
 	do {
 		cpu = __getcpu() & VGETCPU_CPU_MASK;
@@ -102,20 +99,27 @@ static notrace cycle_t vread_pvclock(int *mode)
 		 * __getcpu() calls (Gleb).
 		 */
 
-		pvti = get_pvti(cpu);
+		/* Make sure migrate_count will change if we leave the VCPU. */
+		do {
+			pvti = get_pvti(cpu);
+			migrate_count = pvti->migrate_count;
+
+			cpu1 = cpu;
+			cpu = __getcpu() & VGETCPU_CPU_MASK;
+		} while (unlikely(cpu != cpu1));
 
 		version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
 
 		/*
 		 * Test we're still on the cpu as well as the version.
-		 * We could have been migrated just after the first
-		 * vgetcpu but before fetching the version, so we
-		 * wouldn't notice a version change.
+		 * - We must read TSC of pvti's VCPU.
+		 * - KVM doesn't follow the versioning protocol, so data could
+		 *   change before version if we left the VCPU.
 		 */
-		cpu1 = __getcpu() & VGETCPU_CPU_MASK;
-	} while (unlikely(cpu != cpu1 ||
-			  (pvti->pvti.version & 1) ||
-			  pvti->pvti.version != version));
+		smp_rmb();
+	} while (unlikely((pvti->pvti.version & 1) ||
+			  pvti->pvti.version != version ||
+			  pvti->migrate_count != migrate_count));
 
 	if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
 		*mode = VCLOCK_NONE;
diff --git a/arch/x86/vdso/vdso32/sigreturn.S b/arch/x86/vdso/vdso32/sigreturn.S
index 31776d0efc8c..d7ec4e251c0a 100644
--- a/arch/x86/vdso/vdso32/sigreturn.S
+++ b/arch/x86/vdso/vdso32/sigreturn.S
@@ -17,6 +17,7 @@
 	.text
 	.globl __kernel_sigreturn
 	.type __kernel_sigreturn,@function
+	nop /* this guy is needed for .LSTARTFDEDLSI1 below (watch for HACK) */
 	ALIGN
 __kernel_sigreturn:
 .LSTART_sigreturn:
diff --git a/arch/x86/vdso/vdso32/syscall.S b/arch/x86/vdso/vdso32/syscall.S
index 5415b5613d55..6b286bb5251c 100644
--- a/arch/x86/vdso/vdso32/syscall.S
+++ b/arch/x86/vdso/vdso32/syscall.S
@@ -19,8 +19,6 @@ __kernel_vsyscall:
 .Lpush_ebp:
 	movl	%ecx, %ebp
 	syscall
-	movl	$__USER32_DS, %ecx
-	movl	%ecx, %ss
 	movl	%ebp, %ecx
 	popl	%ebp
 .Lpop_ebp:
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index 7005ced5d1ad..70e060ad879a 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -7,6 +7,7 @@
 #include <xen/xen.h>
 #include <xen/interface/physdev.h>
 #include "xen-ops.h"
+#include "smp.h"
 
 static unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
 {
@@ -28,7 +29,186 @@ static unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
 	return 0xfd;
 }
 
+static unsigned long xen_set_apic_id(unsigned int x)
+{
+	WARN_ON(1);
+	return x;
+}
+
+static unsigned int xen_get_apic_id(unsigned long x)
+{
+	return ((x)>>24) & 0xFFu;
+}
+
+static u32 xen_apic_read(u32 reg)
+{
+	struct xen_platform_op op = {
+		.cmd = XENPF_get_cpuinfo,
+		.interface_version = XENPF_INTERFACE_VERSION,
+		.u.pcpu_info.xen_cpuid = 0,
+	};
+	int ret = 0;
+
+	/* Shouldn't need this as APIC is turned off for PV, and we only
+	 * get called on the bootup processor. But just in case. */
+	if (!xen_initial_domain() || smp_processor_id())
+		return 0;
+
+	if (reg == APIC_LVR)
+		return 0x10;
+#ifdef CONFIG_X86_32
+	if (reg == APIC_LDR)
+		return SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
+#endif
+	if (reg != APIC_ID)
+		return 0;
+
+	ret = HYPERVISOR_dom0_op(&op);
+	if (ret)
+		return 0;
+
+	return op.u.pcpu_info.apic_id << 24;
+}
+
+static void xen_apic_write(u32 reg, u32 val)
+{
+	/* Warn to see if there's any stray references */
+	WARN(1,"register: %x, value: %x\n", reg, val);
+}
+
+static u64 xen_apic_icr_read(void)
+{
+	return 0;
+}
+
+static void xen_apic_icr_write(u32 low, u32 id)
+{
+	/* Warn to see if there's any stray references */
+	WARN_ON(1);
+}
+
+static u32 xen_safe_apic_wait_icr_idle(void)
+{
+        return 0;
+}
+
+static int xen_apic_probe_pv(void)
+{
+	if (xen_pv_domain())
+		return 1;
+
+	return 0;
+}
+
+static int xen_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	return xen_pv_domain();
+}
+
+static int xen_id_always_valid(int apicid)
+{
+	return 1;
+}
+
+static int xen_id_always_registered(void)
+{
+	return 1;
+}
+
+static int xen_phys_pkg_id(int initial_apic_id, int index_msb)
+{
+	return initial_apic_id >> index_msb;
+}
+
+#ifdef CONFIG_X86_32
+static int xen_x86_32_early_logical_apicid(int cpu)
+{
+	/* Match with APIC_LDR read. Otherwise setup_local_APIC complains. */
+	return 1 << cpu;
+}
+#endif
+
+static void xen_noop(void)
+{
+}
+
+static void xen_silent_inquire(int apicid)
+{
+}
+
+static struct apic xen_pv_apic = {
+	.name 				= "Xen PV",
+	.probe 				= xen_apic_probe_pv,
+	.acpi_madt_oem_check		= xen_madt_oem_check,
+	.apic_id_valid 			= xen_id_always_valid,
+	.apic_id_registered 		= xen_id_always_registered,
+
+	/* .irq_delivery_mode - used in native_compose_msi_msg only */
+	/* .irq_dest_mode     - used in native_compose_msi_msg only */
+
+	.target_cpus			= default_target_cpus,
+	.disable_esr			= 0,
+	/* .dest_logical      -  default_send_IPI_ use it but we use our own. */
+	.check_apicid_used		= default_check_apicid_used, /* Used on 32-bit */
+
+	.vector_allocation_domain	= flat_vector_allocation_domain,
+	.init_apic_ldr			= xen_noop, /* setup_local_APIC calls it */
+
+	.ioapic_phys_id_map		= default_ioapic_phys_id_map, /* Used on 32-bit */
+	.setup_apic_routing		= NULL,
+	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
+	.apicid_to_cpu_present		= physid_set_mask_of_physid, /* Used on 32-bit */
+	.check_phys_apicid_present	= default_check_phys_apicid_present, /* smp_sanity_check needs it */
+	.phys_pkg_id			= xen_phys_pkg_id, /* detect_ht */
+
+	.get_apic_id 			= xen_get_apic_id,
+	.set_apic_id 			= xen_set_apic_id, /* Can be NULL on 32-bit. */
+	.apic_id_mask			= 0xFF << 24, /* Used by verify_local_APIC. Match with what xen_get_apic_id does. */
+
+	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
+
+#ifdef CONFIG_SMP
+	.send_IPI_mask 			= xen_send_IPI_mask,
+	.send_IPI_mask_allbutself 	= xen_send_IPI_mask_allbutself,
+	.send_IPI_allbutself 		= xen_send_IPI_allbutself,
+	.send_IPI_all 			= xen_send_IPI_all,
+	.send_IPI_self 			= xen_send_IPI_self,
+#endif
+	/* .wait_for_init_deassert- used  by AP bootup - smp_callin which we don't use */
+	.inquire_remote_apic		= xen_silent_inquire,
+
+	.read				= xen_apic_read,
+	.write				= xen_apic_write,
+	.eoi_write			= xen_apic_write,
+
+	.icr_read 			= xen_apic_icr_read,
+	.icr_write 			= xen_apic_icr_write,
+	.wait_icr_idle 			= xen_noop,
+	.safe_wait_icr_idle 		= xen_safe_apic_wait_icr_idle,
+
+#ifdef CONFIG_X86_32
+	/* generic_processor_info and setup_local_APIC. */
+	.x86_32_early_logical_apicid	= xen_x86_32_early_logical_apicid,
+#endif
+};
+
+static void __init xen_apic_check(void)
+{
+	if (apic == &xen_pv_apic)
+		return;
+
+	pr_info("Switched APIC routing from %s to %s.\n", apic->name,
+		xen_pv_apic.name);
+	apic = &xen_pv_apic;
+}
 void __init xen_init_apic(void)
 {
 	x86_io_apic_ops.read = xen_io_apic_read;
+	/* On PV guests the APIC CPUID bit is disabled so none of the
+	 * routines end up executing. */
+	if (!xen_initial_domain())
+		apic = &xen_pv_apic;
+
+	x86_platform.apic_post_init = xen_apic_check;
 }
+apic_driver(xen_pv_apic);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 5240f563076d..94578efd3067 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -912,6 +912,7 @@ static void xen_load_sp0(struct tss_struct *tss,
 	mcs = xen_mc_entry(0);
 	MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
 	xen_mc_issue(PARAVIRT_LAZY_CPU);
+	tss->x86_tss.sp0 = thread->sp0;
 }
 
 static void xen_set_iopl_mask(unsigned mask)
@@ -927,92 +928,6 @@ static void xen_io_delay(void)
 {
 }
 
-#ifdef CONFIG_X86_LOCAL_APIC
-static unsigned long xen_set_apic_id(unsigned int x)
-{
-	WARN_ON(1);
-	return x;
-}
-static unsigned int xen_get_apic_id(unsigned long x)
-{
-	return ((x)>>24) & 0xFFu;
-}
-static u32 xen_apic_read(u32 reg)
-{
-	struct xen_platform_op op = {
-		.cmd = XENPF_get_cpuinfo,
-		.interface_version = XENPF_INTERFACE_VERSION,
-		.u.pcpu_info.xen_cpuid = 0,
-	};
-	int ret = 0;
-
-	/* Shouldn't need this as APIC is turned off for PV, and we only
-	 * get called on the bootup processor. But just in case. */
-	if (!xen_initial_domain() || smp_processor_id())
-		return 0;
-
-	if (reg == APIC_LVR)
-		return 0x10;
-
-	if (reg != APIC_ID)
-		return 0;
-
-	ret = HYPERVISOR_dom0_op(&op);
-	if (ret)
-		return 0;
-
-	return op.u.pcpu_info.apic_id << 24;
-}
-
-static void xen_apic_write(u32 reg, u32 val)
-{
-	/* Warn to see if there's any stray references */
-	WARN_ON(1);
-}
-
-static u64 xen_apic_icr_read(void)
-{
-	return 0;
-}
-
-static void xen_apic_icr_write(u32 low, u32 id)
-{
-	/* Warn to see if there's any stray references */
-	WARN_ON(1);
-}
-
-static void xen_apic_wait_icr_idle(void)
-{
-        return;
-}
-
-static u32 xen_safe_apic_wait_icr_idle(void)
-{
-        return 0;
-}
-
-static void set_xen_basic_apic_ops(void)
-{
-	apic->read = xen_apic_read;
-	apic->write = xen_apic_write;
-	apic->icr_read = xen_apic_icr_read;
-	apic->icr_write = xen_apic_icr_write;
-	apic->wait_icr_idle = xen_apic_wait_icr_idle;
-	apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle;
-	apic->set_apic_id = xen_set_apic_id;
-	apic->get_apic_id = xen_get_apic_id;
-
-#ifdef CONFIG_SMP
-	apic->send_IPI_allbutself = xen_send_IPI_allbutself;
-	apic->send_IPI_mask_allbutself = xen_send_IPI_mask_allbutself;
-	apic->send_IPI_mask = xen_send_IPI_mask;
-	apic->send_IPI_all = xen_send_IPI_all;
-	apic->send_IPI_self = xen_send_IPI_self;
-#endif
-}
-
-#endif
-
 static void xen_clts(void)
 {
 	struct multicall_space mcs;
@@ -1618,7 +1533,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	/*
 	 * set up the basic apic ops.
 	 */
-	set_xen_basic_apic_ops();
+	xen_init_apic();
 #endif
 
 	if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
@@ -1731,8 +1646,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
 		if (HYPERVISOR_dom0_op(&op) == 0)
 			boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags;
 
-		xen_init_apic();
-
 		/* Make sure ACS will be enabled */
 		pci_request_acs();
 
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index adca9e2b6553..dd151b2045b0 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -502,7 +502,7 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd)
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
 
-#if PAGETABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS == 4
 __visible pudval_t xen_pud_val(pud_t pud)
 {
 	return pte_mfn_to_pfn(pud.pud);
@@ -589,7 +589,7 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val)
 
 	xen_mc_issue(PARAVIRT_LAZY_MMU);
 }
-#endif	/* PAGETABLE_LEVELS == 4 */
+#endif	/* CONFIG_PGTABLE_LEVELS == 4 */
 
 /*
  * (Yet another) pagetable walker.  This one is intended for pinning a
@@ -1628,7 +1628,7 @@ static void xen_release_pmd(unsigned long pfn)
 	xen_release_ptpage(pfn, PT_PMD);
 }
 
-#if PAGETABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS == 4
 static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
 {
 	xen_alloc_ptpage(mm, pfn, PT_PUD);
@@ -2046,7 +2046,7 @@ static void __init xen_post_allocator_init(void)
 	pv_mmu_ops.set_pte = xen_set_pte;
 	pv_mmu_ops.set_pmd = xen_set_pmd;
 	pv_mmu_ops.set_pud = xen_set_pud;
-#if PAGETABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS == 4
 	pv_mmu_ops.set_pgd = xen_set_pgd;
 #endif
 
@@ -2056,7 +2056,7 @@ static void __init xen_post_allocator_init(void)
 	pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
 	pv_mmu_ops.release_pte = xen_release_pte;
 	pv_mmu_ops.release_pmd = xen_release_pmd;
-#if PAGETABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS == 4
 	pv_mmu_ops.alloc_pud = xen_alloc_pud;
 	pv_mmu_ops.release_pud = xen_release_pud;
 #endif
@@ -2122,14 +2122,14 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 	.make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
 	.pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
 
-#if PAGETABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS == 4
 	.pud_val = PV_CALLEE_SAVE(xen_pud_val),
 	.make_pud = PV_CALLEE_SAVE(xen_make_pud),
 	.set_pgd = xen_set_pgd_hyper,
 
 	.alloc_pud = xen_alloc_pmd_init,
 	.release_pud = xen_release_pmd_init,
-#endif	/* PAGETABLE_LEVELS == 4 */
+#endif	/* CONFIG_PGTABLE_LEVELS == 4 */
 
 	.activate_mm = xen_activate_mm,
 	.dup_mmap = xen_dup_mmap,
@@ -2436,99 +2436,11 @@ void __init xen_hvm_init_mmu_ops(void)
 }
 #endif
 
-#ifdef CONFIG_XEN_PVH
-/*
- * Map foreign gfn (fgfn), to local pfn (lpfn). This for the user
- * space creating new guest on pvh dom0 and needing to map domU pages.
- */
-static int xlate_add_to_p2m(unsigned long lpfn, unsigned long fgfn,
-			    unsigned int domid)
-{
-	int rc, err = 0;
-	xen_pfn_t gpfn = lpfn;
-	xen_ulong_t idx = fgfn;
-
-	struct xen_add_to_physmap_range xatp = {
-		.domid = DOMID_SELF,
-		.foreign_domid = domid,
-		.size = 1,
-		.space = XENMAPSPACE_gmfn_foreign,
-	};
-	set_xen_guest_handle(xatp.idxs, &idx);
-	set_xen_guest_handle(xatp.gpfns, &gpfn);
-	set_xen_guest_handle(xatp.errs, &err);
-
-	rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp);
-	if (rc < 0)
-		return rc;
-	return err;
-}
-
-static int xlate_remove_from_p2m(unsigned long spfn, int count)
-{
-	struct xen_remove_from_physmap xrp;
-	int i, rc;
-
-	for (i = 0; i < count; i++) {
-		xrp.domid = DOMID_SELF;
-		xrp.gpfn = spfn+i;
-		rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp);
-		if (rc)
-			break;
-	}
-	return rc;
-}
-
-struct xlate_remap_data {
-	unsigned long fgfn; /* foreign domain's gfn */
-	pgprot_t prot;
-	domid_t  domid;
-	int index;
-	struct page **pages;
-};
-
-static int xlate_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
-			    void *data)
-{
-	int rc;
-	struct xlate_remap_data *remap = data;
-	unsigned long pfn = page_to_pfn(remap->pages[remap->index++]);
-	pte_t pteval = pte_mkspecial(pfn_pte(pfn, remap->prot));
-
-	rc = xlate_add_to_p2m(pfn, remap->fgfn, remap->domid);
-	if (rc)
-		return rc;
-	native_set_pte(ptep, pteval);
-
-	return 0;
-}
-
-static int xlate_remap_gfn_range(struct vm_area_struct *vma,
-				 unsigned long addr, unsigned long mfn,
-				 int nr, pgprot_t prot, unsigned domid,
-				 struct page **pages)
-{
-	int err;
-	struct xlate_remap_data pvhdata;
-
-	BUG_ON(!pages);
-
-	pvhdata.fgfn = mfn;
-	pvhdata.prot = prot;
-	pvhdata.domid = domid;
-	pvhdata.index = 0;
-	pvhdata.pages = pages;
-	err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
-				  xlate_map_pte_fn, &pvhdata);
-	flush_tlb_all();
-	return err;
-}
-#endif
-
 #define REMAP_BATCH_SIZE 16
 
 struct remap_data {
-	unsigned long mfn;
+	xen_pfn_t *mfn;
+	bool contiguous;
 	pgprot_t prot;
 	struct mmu_update *mmu_update;
 };
@@ -2537,7 +2449,14 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
 				 unsigned long addr, void *data)
 {
 	struct remap_data *rmd = data;
-	pte_t pte = pte_mkspecial(mfn_pte(rmd->mfn++, rmd->prot));
+	pte_t pte = pte_mkspecial(mfn_pte(*rmd->mfn, rmd->prot));
+
+	/* If we have a contigious range, just update the mfn itself,
+	   else update pointer to be "next mfn". */
+	if (rmd->contiguous)
+		(*rmd->mfn)++;
+	else
+		rmd->mfn++;
 
 	rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
 	rmd->mmu_update->val = pte_val_ma(pte);
@@ -2546,26 +2465,26 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
 	return 0;
 }
 
-int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
-			       unsigned long addr,
-			       xen_pfn_t mfn, int nr,
-			       pgprot_t prot, unsigned domid,
-			       struct page **pages)
-
+static int do_remap_mfn(struct vm_area_struct *vma,
+			unsigned long addr,
+			xen_pfn_t *mfn, int nr,
+			int *err_ptr, pgprot_t prot,
+			unsigned domid,
+			struct page **pages)
 {
+	int err = 0;
 	struct remap_data rmd;
 	struct mmu_update mmu_update[REMAP_BATCH_SIZE];
-	int batch;
 	unsigned long range;
-	int err = 0;
+	int mapped = 0;
 
 	BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
 
 	if (xen_feature(XENFEAT_auto_translated_physmap)) {
 #ifdef CONFIG_XEN_PVH
 		/* We need to update the local page tables and the xen HAP */
-		return xlate_remap_gfn_range(vma, addr, mfn, nr, prot,
-					     domid, pages);
+		return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr,
+						 prot, domid, pages);
 #else
 		return -EINVAL;
 #endif
@@ -2573,9 +2492,15 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 
 	rmd.mfn = mfn;
 	rmd.prot = prot;
+	/* We use the err_ptr to indicate if there we are doing a contigious
+	 * mapping or a discontigious mapping. */
+	rmd.contiguous = !err_ptr;
 
 	while (nr) {
-		batch = min(REMAP_BATCH_SIZE, nr);
+		int index = 0;
+		int done = 0;
+		int batch = min(REMAP_BATCH_SIZE, nr);
+		int batch_left = batch;
 		range = (unsigned long)batch << PAGE_SHIFT;
 
 		rmd.mmu_update = mmu_update;
@@ -2584,23 +2509,72 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 		if (err)
 			goto out;
 
-		err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid);
-		if (err < 0)
-			goto out;
+		/* We record the error for each page that gives an error, but
+		 * continue mapping until the whole set is done */
+		do {
+			int i;
+
+			err = HYPERVISOR_mmu_update(&mmu_update[index],
+						    batch_left, &done, domid);
+
+			/*
+			 * @err_ptr may be the same buffer as @mfn, so
+			 * only clear it after each chunk of @mfn is
+			 * used.
+			 */
+			if (err_ptr) {
+				for (i = index; i < index + done; i++)
+					err_ptr[i] = 0;
+			}
+			if (err < 0) {
+				if (!err_ptr)
+					goto out;
+				err_ptr[i] = err;
+				done++; /* Skip failed frame. */
+			} else
+				mapped += done;
+			batch_left -= done;
+			index += done;
+		} while (batch_left);
 
 		nr -= batch;
 		addr += range;
+		if (err_ptr)
+			err_ptr += batch;
 	}
-
-	err = 0;
 out:
 
 	xen_flush_tlb_all();
 
-	return err;
+	return err < 0 ? err : mapped;
+}
+
+int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
+			       unsigned long addr,
+			       xen_pfn_t mfn, int nr,
+			       pgprot_t prot, unsigned domid,
+			       struct page **pages)
+{
+	return do_remap_mfn(vma, addr, &mfn, nr, NULL, prot, domid, pages);
 }
 EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
 
+int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
+			       unsigned long addr,
+			       xen_pfn_t *mfn, int nr,
+			       int *err_ptr, pgprot_t prot,
+			       unsigned domid, struct page **pages)
+{
+	/* We BUG_ON because it's a programmer error to pass a NULL err_ptr,
+	 * and the consequences later is quite hard to detect what the actual
+	 * cause of "wrong memory was mapped in".
+	 */
+	BUG_ON(err_ptr == NULL);
+	return do_remap_mfn(vma, addr, mfn, nr, err_ptr, prot, domid, pages);
+}
+EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
+
+
 /* Returns: 0 success */
 int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
 			       int numpgs, struct page **pages)
@@ -2609,22 +2583,7 @@ int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
 		return 0;
 
 #ifdef CONFIG_XEN_PVH
-	while (numpgs--) {
-		/*
-		 * The mmu has already cleaned up the process mmu
-		 * resources at this point (lookup_address will return
-		 * NULL).
-		 */
-		unsigned long pfn = page_to_pfn(pages[numpgs]);
-
-		xlate_remove_from_p2m(pfn, 1);
-	}
-	/*
-	 * We don't need to flush tlbs because as part of
-	 * xlate_remove_from_p2m, the hypervisor will do tlb flushes
-	 * after removing the p2m entries from the EPT/NPT
-	 */
-	return 0;
+	return xen_xlate_unmap_gfn_range(vma, numpgs, pages);
 #else
 	return -EINVAL;
 #endif
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 740ae3026a14..b47124d4cd67 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -91,6 +91,12 @@ EXPORT_SYMBOL_GPL(xen_p2m_size);
 unsigned long xen_max_p2m_pfn __read_mostly;
 EXPORT_SYMBOL_GPL(xen_max_p2m_pfn);
 
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT
+#define P2M_LIMIT CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT
+#else
+#define P2M_LIMIT 0
+#endif
+
 static DEFINE_SPINLOCK(p2m_update_lock);
 
 static unsigned long *p2m_mid_missing_mfn;
@@ -385,9 +391,11 @@ static void __init xen_rebuild_p2m_list(unsigned long *p2m)
 void __init xen_vmalloc_p2m_tree(void)
 {
 	static struct vm_struct vm;
+	unsigned long p2m_limit;
 
+	p2m_limit = (phys_addr_t)P2M_LIMIT * 1024 * 1024 * 1024 / PAGE_SIZE;
 	vm.flags = VM_ALLOC;
-	vm.size = ALIGN(sizeof(unsigned long) * xen_max_p2m_pfn,
+	vm.size = ALIGN(sizeof(unsigned long) * max(xen_max_p2m_pfn, p2m_limit),
 			PMD_SIZE * PMDS_PER_MID_PAGE);
 	vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE);
 	pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size);
@@ -563,7 +571,7 @@ static bool alloc_p2m(unsigned long pfn)
 		if (p2m_pfn == PFN_DOWN(__pa(p2m_missing)))
 			p2m_init(p2m);
 		else
-			p2m_init_identity(p2m, pfn);
+			p2m_init_identity(p2m, pfn & ~(P2M_PER_PAGE - 1));
 
 		spin_lock_irqsave(&p2m_update_lock, flags);
 
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 08e8489c47f1..86484384492e 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -90,14 +90,10 @@ static void cpu_bringup(void)
 
 	set_cpu_online(cpu, true);
 
-	this_cpu_write(cpu_state, CPU_ONLINE);
-
-	wmb();
+	cpu_set_state_online(cpu);  /* Implies full memory barrier. */
 
 	/* We can take interrupts now: we're officially "up". */
 	local_irq_enable();
-
-	wmb();			/* make sure everything is out */
 }
 
 /*
@@ -445,21 +441,19 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
 {
 	int rc;
 
-	per_cpu(current_task, cpu) = idle;
-#ifdef CONFIG_X86_32
-	irq_ctx_init(cpu);
-#else
-	clear_tsk_thread_flag(idle, TIF_FORK);
-#endif
-	per_cpu(kernel_stack, cpu) =
-		(unsigned long)task_stack_page(idle) -
-		KERNEL_STACK_OFFSET + THREAD_SIZE;
+	common_cpu_up(cpu, idle);
 
 	xen_setup_runstate_info(cpu);
 	xen_setup_timer(cpu);
 	xen_init_lock_cpu(cpu);
 
-	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
+	/*
+	 * PV VCPUs are always successfully taken down (see 'while' loop
+	 * in xen_cpu_die()), so -EBUSY is an error.
+	 */
+	rc = cpu_check_up_prepare(cpu);
+	if (rc)
+		return rc;
 
 	/* make sure interrupts start blocked */
 	per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
@@ -468,10 +462,6 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
 	if (rc)
 		return rc;
 
-	if (num_online_cpus() == 1)
-		/* Just in case we booted with a single CPU. */
-		alternatives_enable_smp();
-
 	rc = xen_smp_intr_init(cpu);
 	if (rc)
 		return rc;
@@ -479,10 +469,8 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
 	rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
 	BUG_ON(rc);
 
-	while(per_cpu(cpu_state, cpu) != CPU_ONLINE) {
+	while (cpu_report_state(cpu) != CPU_ONLINE)
 		HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
-		barrier();
-	}
 
 	return 0;
 }
@@ -511,11 +499,11 @@ static void xen_cpu_die(unsigned int cpu)
 		schedule_timeout(HZ/10);
 	}
 
-	cpu_die_common(cpu);
-
-	xen_smp_intr_free(cpu);
-	xen_uninit_lock_cpu(cpu);
-	xen_teardown_timer(cpu);
+	if (common_cpu_die(cpu) == 0) {
+		xen_smp_intr_free(cpu);
+		xen_uninit_lock_cpu(cpu);
+		xen_teardown_timer(cpu);
+	}
 }
 
 static void xen_play_dead(void) /* used only with HOTPLUG_CPU */
@@ -747,6 +735,16 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
 static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle)
 {
 	int rc;
+
+	/*
+	 * This can happen if CPU was offlined earlier and
+	 * offlining timed out in common_cpu_die().
+	 */
+	if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) {
+		xen_smp_intr_free(cpu);
+		xen_uninit_lock_cpu(cpu);
+	}
+
 	/*
 	 * xen_smp_intr_init() needs to run before native_cpu_up()
 	 * so that IPI vectors are set up on the booting CPU before
@@ -768,12 +766,6 @@ static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle)
 	return rc;
 }
 
-static void xen_hvm_cpu_die(unsigned int cpu)
-{
-	xen_cpu_die(cpu);
-	native_cpu_die(cpu);
-}
-
 void __init xen_hvm_smp_init(void)
 {
 	if (!xen_have_vector_callback)
@@ -781,7 +773,7 @@ void __init xen_hvm_smp_init(void)
 	smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
 	smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
 	smp_ops.cpu_up = xen_hvm_cpu_up;
-	smp_ops.cpu_die = xen_hvm_cpu_die;
+	smp_ops.cpu_die = xen_cpu_die;
 	smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
 	smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
 	smp_ops.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu;
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index c4df9dbd63b7..d9497698645a 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -1,5 +1,5 @@
 #include <linux/types.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 
 #include <xen/interface/xen.h>
 #include <xen/grant_table.h>
@@ -81,17 +81,14 @@ void xen_arch_post_suspend(int cancelled)
 
 static void xen_vcpu_notify_restore(void *data)
 {
-	unsigned long reason = (unsigned long)data;
-
 	/* Boot processor notified via generic timekeeping_resume() */
-	if ( smp_processor_id() == 0)
+	if (smp_processor_id() == 0)
 		return;
 
-	clockevents_notify(reason, NULL);
+	tick_resume_local();
 }
 
 void xen_arch_resume(void)
 {
-	on_each_cpu(xen_vcpu_notify_restore,
-		    (void *)CLOCK_EVT_NOTIFY_RESUME, 1);
+	on_each_cpu(xen_vcpu_notify_restore, NULL, 1);
 }
diff --git a/arch/x86/xen/trace.c b/arch/x86/xen/trace.c
index 520022d1a181..a702ec2f5931 100644
--- a/arch/x86/xen/trace.c
+++ b/arch/x86/xen/trace.c
@@ -1,54 +1,12 @@
 #include <linux/ftrace.h>
 #include <xen/interface/xen.h>
+#include <xen/interface/xen-mca.h>
 
-#define N(x)	[__HYPERVISOR_##x] = "("#x")"
+#define HYPERCALL(x)	[__HYPERVISOR_##x] = "("#x")",
 static const char *xen_hypercall_names[] = {
-	N(set_trap_table),
-	N(mmu_update),
-	N(set_gdt),
-	N(stack_switch),
-	N(set_callbacks),
-	N(fpu_taskswitch),
-	N(sched_op_compat),
-	N(dom0_op),
-	N(set_debugreg),
-	N(get_debugreg),
-	N(update_descriptor),
-	N(memory_op),
-	N(multicall),
-	N(update_va_mapping),
-	N(set_timer_op),
-	N(event_channel_op_compat),
-	N(xen_version),
-	N(console_io),
-	N(physdev_op_compat),
-	N(grant_table_op),
-	N(vm_assist),
-	N(update_va_mapping_otherdomain),
-	N(iret),
-	N(vcpu_op),
-	N(set_segment_base),
-	N(mmuext_op),
-	N(acm_op),
-	N(nmi_op),
-	N(sched_op),
-	N(callback_op),
-	N(xenoprof_op),
-	N(event_channel_op),
-	N(physdev_op),
-	N(hvm_op),
-
-/* Architecture-specific hypercall definitions. */
-	N(arch_0),
-	N(arch_1),
-	N(arch_2),
-	N(arch_3),
-	N(arch_4),
-	N(arch_5),
-	N(arch_6),
-	N(arch_7),
+#include <asm/xen-hypercalls.h>
 };
-#undef N
+#undef HYPERCALL
 
 static const char *xen_hypercall_name(unsigned op)
 {
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 53adefda4275..985fc3ee0973 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -68,11 +68,11 @@ ENTRY(xen_sysret64)
 	 * We're already on the usermode stack at this point, but
 	 * still with the kernel gs, so we can easily switch back
 	 */
-	movq %rsp, PER_CPU_VAR(old_rsp)
+	movq %rsp, PER_CPU_VAR(rsp_scratch)
 	movq PER_CPU_VAR(kernel_stack), %rsp
 
 	pushq $__USER_DS
-	pushq PER_CPU_VAR(old_rsp)
+	pushq PER_CPU_VAR(rsp_scratch)
 	pushq %r11
 	pushq $__USER_CS
 	pushq %rcx
@@ -87,11 +87,11 @@ ENTRY(xen_sysret32)
 	 * We're already on the usermode stack at this point, but
 	 * still with the kernel gs, so we can easily switch back
 	 */
-	movq %rsp, PER_CPU_VAR(old_rsp)
+	movq %rsp, PER_CPU_VAR(rsp_scratch)
 	movq PER_CPU_VAR(kernel_stack), %rsp
 
 	pushq $__USER32_DS
-	pushq PER_CPU_VAR(old_rsp)
+	pushq PER_CPU_VAR(rsp_scratch)
 	pushq %r11
 	pushq $__USER32_CS
 	pushq %rcx
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 674b222544b7..8afdfccf6086 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -12,6 +12,8 @@
 
 #include <xen/interface/elfnote.h>
 #include <xen/interface/features.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/xen-mca.h>
 #include <asm/xen/interface.h>
 
 #ifdef CONFIG_XEN_PVH
@@ -85,59 +87,14 @@ ENTRY(xen_pvh_early_cpu_init)
 .pushsection .text
 	.balign PAGE_SIZE
 ENTRY(hypercall_page)
-#define NEXT_HYPERCALL(x) \
-	ENTRY(xen_hypercall_##x) \
-	.skip 32
-
-NEXT_HYPERCALL(set_trap_table)
-NEXT_HYPERCALL(mmu_update)
-NEXT_HYPERCALL(set_gdt)
-NEXT_HYPERCALL(stack_switch)
-NEXT_HYPERCALL(set_callbacks)
-NEXT_HYPERCALL(fpu_taskswitch)
-NEXT_HYPERCALL(sched_op_compat)
-NEXT_HYPERCALL(platform_op)
-NEXT_HYPERCALL(set_debugreg)
-NEXT_HYPERCALL(get_debugreg)
-NEXT_HYPERCALL(update_descriptor)
-NEXT_HYPERCALL(ni)
-NEXT_HYPERCALL(memory_op)
-NEXT_HYPERCALL(multicall)
-NEXT_HYPERCALL(update_va_mapping)
-NEXT_HYPERCALL(set_timer_op)
-NEXT_HYPERCALL(event_channel_op_compat)
-NEXT_HYPERCALL(xen_version)
-NEXT_HYPERCALL(console_io)
-NEXT_HYPERCALL(physdev_op_compat)
-NEXT_HYPERCALL(grant_table_op)
-NEXT_HYPERCALL(vm_assist)
-NEXT_HYPERCALL(update_va_mapping_otherdomain)
-NEXT_HYPERCALL(iret)
-NEXT_HYPERCALL(vcpu_op)
-NEXT_HYPERCALL(set_segment_base)
-NEXT_HYPERCALL(mmuext_op)
-NEXT_HYPERCALL(xsm_op)
-NEXT_HYPERCALL(nmi_op)
-NEXT_HYPERCALL(sched_op)
-NEXT_HYPERCALL(callback_op)
-NEXT_HYPERCALL(xenoprof_op)
-NEXT_HYPERCALL(event_channel_op)
-NEXT_HYPERCALL(physdev_op)
-NEXT_HYPERCALL(hvm_op)
-NEXT_HYPERCALL(sysctl)
-NEXT_HYPERCALL(domctl)
-NEXT_HYPERCALL(kexec_op)
-NEXT_HYPERCALL(tmem_op) /* 38 */
-ENTRY(xen_hypercall_rsvr)
-	.skip 320
-NEXT_HYPERCALL(mca) /* 48 */
-NEXT_HYPERCALL(arch_1)
-NEXT_HYPERCALL(arch_2)
-NEXT_HYPERCALL(arch_3)
-NEXT_HYPERCALL(arch_4)
-NEXT_HYPERCALL(arch_5)
-NEXT_HYPERCALL(arch_6)
-	.balign PAGE_SIZE
+	.skip PAGE_SIZE
+
+#define HYPERCALL(n) \
+	.equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \
+	.type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
+#include <asm/xen-hypercalls.h>
+#undef HYPERCALL
+
 .popsection
 
 	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz "linux")
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index e31d4949124a..87be10e8b57a 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -428,6 +428,36 @@ config DEFAULT_MEM_SIZE
 
 	  If unsure, leave the default value here.
 
+config XTFPGA_LCD
+	bool "Enable XTFPGA LCD driver"
+	depends on XTENSA_PLATFORM_XTFPGA
+	default n
+	help
+	  There's a 2x16 LCD on most of XTFPGA boards, kernel may output
+	  progress messages there during bootup/shutdown. It may be useful
+	  during board bringup.
+
+	  If unsure, say N.
+
+config XTFPGA_LCD_BASE_ADDR
+	hex "XTFPGA LCD base address"
+	depends on XTFPGA_LCD
+	default "0x0d0c0000"
+	help
+	  Base address of the LCD controller inside KIO region.
+	  Different boards from XTFPGA family have LCD controller at different
+	  addresses. Please consult prototyping user guide for your board for
+	  the correct address. Wrong address here may lead to hardware lockup.
+
+config XTFPGA_LCD_8BIT_ACCESS
+	bool "Use 8-bit access to XTFPGA LCD"
+	depends on XTFPGA_LCD
+	default n
+	help
+	  LCD may be connected with 4- or 8-bit interface, 8-bit access may
+	  only be used with 8-bit interface. Please consult prototyping user
+	  guide for your board for the correct interface width.
+
 endmenu
 
 menu "Executable file formats"
diff --git a/arch/xtensa/boot/dts/xtfpga.dtsi b/arch/xtensa/boot/dts/xtfpga.dtsi
index dec9178840f6..cd0b9e34adc8 100644
--- a/arch/xtensa/boot/dts/xtfpga.dtsi
+++ b/arch/xtensa/boot/dts/xtfpga.dtsi
@@ -40,6 +40,12 @@
 			#clock-cells = <0>;
 			compatible = "fixed-clock";
 		};
+
+		clk54: clk54 {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <54000000>;
+		};
 	};
 
 	soc {
@@ -65,5 +71,63 @@
 			local-mac-address = [00 50 c2 13 6f 00];
 			clocks = <&osc>;
 		};
+
+		i2s0: xtfpga-i2s@0d080000 {
+			#sound-dai-cells = <0>;
+			compatible = "cdns,xtfpga-i2s";
+			reg = <0x0d080000 0x40>;
+			interrupts = <2 1>; /* external irq 2 */
+			clocks = <&cdce706 4>;
+		};
+
+		i2c0: i2c-master@0d090000 {
+			compatible = "opencores,i2c-ocores";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x0d090000 0x20>;
+			reg-shift = <2>;
+			reg-io-width = <1>;
+			interrupts = <4 1>;
+			clocks = <&osc>;
+
+			cdce706: clock-synth@69 {
+				compatible = "ti,cdce706";
+				#clock-cells = <1>;
+				reg = <0x69>;
+				clocks = <&clk54>;
+				clock-names = "clk_in0";
+			};
+		};
+
+		spi0: spi-master@0d0a0000 {
+			compatible = "cdns,xtfpga-spi";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x0d0a0000 0xc>;
+
+			tlv320aic23: sound-codec@0 {
+				#sound-dai-cells = <0>;
+				compatible = "tlv320aic23";
+				reg = <0>;
+				spi-max-frequency = <12500000>;
+			};
+		};
+	};
+
+	sound {
+		compatible = "simple-audio-card";
+		simple-audio-card,format = "i2s";
+		simple-audio-card,mclk-fs = <256>;
+
+		simple-audio-card,cpu {
+			sound-dai = <&i2s0>;
+		};
+
+		simple-audio-card,codec {
+			sound-dai = <&tlv320aic23>;
+			simple-audio-card,bitclock-master = <0>;
+			simple-audio-card,frame-master = <0>;
+			clocks = <&cdce706 4>;
+		};
 	};
 };
diff --git a/arch/xtensa/configs/audio_kc705_defconfig b/arch/xtensa/configs/audio_kc705_defconfig
new file mode 100644
index 000000000000..c4904db15582
--- /dev/null
+++ b/arch/xtensa/configs/audio_kc705_defconfig
@@ -0,0 +1,142 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
+CONFIG_IRQ_DOMAIN_DEBUG=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IRQ_TIME_ACCOUNTING=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_CGROUP_DEBUG=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_MEMCG=y
+CONFIG_NAMESPACES=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_RELAY=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_XTENSA_VARIANT_CUSTOM=y
+CONFIG_XTENSA_VARIANT_CUSTOM_NAME="test_kc705_hifi"
+CONFIG_XTENSA_UNALIGNED_USER=y
+CONFIG_PREEMPT=y
+CONFIG_HIGHMEM=y
+# CONFIG_PCI is not set
+CONFIG_XTENSA_PLATFORM_XTFPGA=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="earlycon=uart8250,mmio32,0xfd050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug"
+CONFIG_USE_OF=y
+CONFIG_BUILTIN_DTB="kc705"
+# CONFIG_COMPACTION is not set
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_PM=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+CONFIG_MTD=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_CFI_STAA=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_UBI=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SAMSUNG is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_SMSC is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_MARVELL_PHY=y
+# CONFIG_WLAN is not set
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_HW_RANDOM=y
+CONFIG_I2C=y
+CONFIG_I2C_OCORES=y
+CONFIG_SPI=y
+CONFIG_SPI_XTENSA_XTFPGA=y
+# CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+CONFIG_SOFT_WATCHDOG=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_SND_SOC=y
+CONFIG_SND_SOC_XTFPGA_I2S=y
+CONFIG_SND_SOC_TLV320AIC23_SPI=y
+CONFIG_SND_SIMPLE_CARD=y
+# CONFIG_USB_SUPPORT is not set
+CONFIG_COMMON_CLK_CDCE706=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_FANOTIFY=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_UBIFS_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_SWAP=y
+CONFIG_ROOT_NFS=y
+CONFIG_SUNRPC_DEBUG=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_DEBUG_INFO=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOCKUP_DETECTOR=y
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_SCHEDSTATS=y
+CONFIG_TIMER_STATS=y
+CONFIG_DEBUG_RT_MUTEXES=y
+CONFIG_DEBUG_SPINLOCK=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_STACKTRACE=y
+CONFIG_RCU_TRACE=y
+# CONFIG_FTRACE is not set
+# CONFIG_S32C1I_SELFTEST is not set
+CONFIG_CRYPTO_ANSI_CPRNG=y
diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h
index a9b5d3ba196c..9ad12c617184 100644
--- a/arch/xtensa/include/asm/thread_info.h
+++ b/arch/xtensa/include/asm/thread_info.h
@@ -44,7 +44,6 @@ typedef struct xtregs_coprocessor {
 
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
 	unsigned long		flags;		/* low level flags */
 	unsigned long		status;		/* thread-synchronous flags */
 	__u32			cpu;		/* current CPU */
@@ -61,17 +60,6 @@ struct thread_info {
 	xtregs_user_t		xtregs_user;
 };
 
-#else /* !__ASSEMBLY__ */
-
-/* offsets into the thread_info struct for assembly code access */
-#define TI_TASK		 0x00000000
-#define TI_EXEC_DOMAIN	 0x00000004
-#define TI_FLAGS	 0x00000008
-#define TI_STATUS	 0x0000000C
-#define TI_CPU		 0x00000010
-#define TI_PRE_COUNT	 0x00000014
-#define TI_ADDR_LIMIT	 0x00000018
-
 #endif
 
 /*
@@ -83,7 +71,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
-	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
 	.preempt_count	= INIT_PREEMPT_COUNT,	\
diff --git a/arch/xtensa/include/uapi/asm/unistd.h b/arch/xtensa/include/uapi/asm/unistd.h
index db5bb72e2f4e..b95c30594355 100644
--- a/arch/xtensa/include/uapi/asm/unistd.h
+++ b/arch/xtensa/include/uapi/asm/unistd.h
@@ -715,7 +715,7 @@ __SYSCALL(323, sys_process_vm_writev, 6)
 __SYSCALL(324, sys_name_to_handle_at, 5)
 #define __NR_open_by_handle_at			325
 __SYSCALL(325, sys_open_by_handle_at, 3)
-#define __NR_sync_file_range			326
+#define __NR_sync_file_range2			326
 __SYSCALL(326, sys_sync_file_range2, 6)
 #define __NR_perf_event_open			327
 __SYSCALL(327, sys_perf_event_open, 5)
@@ -749,8 +749,12 @@ __SYSCALL(337, sys_seccomp, 3)
 __SYSCALL(338, sys_getrandom, 3)
 #define __NR_memfd_create			339
 __SYSCALL(339, sys_memfd_create, 2)
+#define __NR_bpf				340
+__SYSCALL(340, sys_bpf, 3)
+#define __NR_execveat				341
+__SYSCALL(341, sys_execveat, 5)
 
-#define __NR_syscall_count			340
+#define __NR_syscall_count			342
 
 /*
  * sysxtensa syscall handler
diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile
index 18d962a8c0c2..d3a0f0fd56dd 100644
--- a/arch/xtensa/kernel/Makefile
+++ b/arch/xtensa/kernel/Makefile
@@ -29,6 +29,7 @@ AFLAGS_head.o += -mtext-section-literals
 
 sed-y = -e 's/\*(\(\.[a-z]*it\|\.ref\|\)\.text)/*(\1.literal \1.text)/g' \
 	-e 's/\.text\.unlikely/.literal.unlikely .text.unlikely/g'	 \
+	-e 's/\*(\(\.text .*\))/*(.literal \1)/g'			 \
 	-e 's/\*(\(\.text\.[a-z]*\))/*(\1.literal \1)/g'
 
 quiet_cmd__cpp_lds_S = LDS     $@
diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c
index 1915c7c889ba..b123ace3b67c 100644
--- a/arch/xtensa/kernel/asm-offsets.c
+++ b/arch/xtensa/kernel/asm-offsets.c
@@ -77,6 +77,14 @@ int main(void)
 	DEFINE(TASK_THREAD_INFO, offsetof (struct task_struct, stack));
 	DEFINE(TASK_STRUCT_SIZE, sizeof (struct task_struct));
 
+	/* offsets in thread_info struct */
+	OFFSET(TI_TASK, thread_info, task);
+	OFFSET(TI_FLAGS, thread_info, flags);
+	OFFSET(TI_STSTUS, thread_info, status);
+	OFFSET(TI_CPU, thread_info, cpu);
+	OFFSET(TI_PRE_COUNT, thread_info, preempt_count);
+	OFFSET(TI_ADDR_LIMIT, thread_info, addr_limit);
+
 	/* struct thread_info (offset from start_struct) */
 	DEFINE(THREAD_RA, offsetof (struct task_struct, thread.ra));
 	DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp));
diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c
index 5b3403388d7f..b848cc3dc913 100644
--- a/arch/xtensa/kernel/pci.c
+++ b/arch/xtensa/kernel/pci.c
@@ -174,7 +174,7 @@ static int __init pcibios_init(void)
 	struct pci_controller *pci_ctrl;
 	struct list_head resources;
 	struct pci_bus *bus;
-	int next_busno = 0;
+	int next_busno = 0, ret;
 
 	printk("PCI: Probing PCI hardware\n");
 
@@ -185,14 +185,25 @@ static int __init pcibios_init(void)
 		pci_controller_apertures(pci_ctrl, &resources);
 		bus = pci_scan_root_bus(NULL, pci_ctrl->first_busno,
 					pci_ctrl->ops, pci_ctrl, &resources);
+		if (!bus)
+			continue;
+
 		pci_ctrl->bus = bus;
 		pci_ctrl->last_busno = bus->busn_res.end;
 		if (next_busno <= pci_ctrl->last_busno)
 			next_busno = pci_ctrl->last_busno+1;
 	}
 	pci_bus_count = next_busno;
+	ret = platform_pcibios_fixup();
+	if (ret)
+		return ret;
 
-	return platform_pcibios_fixup();
+	for (pci_ctrl = pci_ctrl_head; pci_ctrl; pci_ctrl = pci_ctrl->next) {
+		if (pci_ctrl->bus)
+			pci_bus_add_devices(pci_ctrl->bus);
+	}
+
+	return 0;
 }
 
 subsys_initcall(pcibios_init);
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index 3d733ba16f28..e87adaa07ff3 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -336,7 +336,6 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
 {
 	struct rt_sigframe *frame;
 	int err = 0, sig = ksig->sig;
-	int signal;
 	unsigned long sp, ra, tp;
 
 	sp = regs->areg[1];
@@ -354,12 +353,6 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
 		return -EFAULT;
 	}
 
-	signal = current_thread_info()->exec_domain
-		&& current_thread_info()->exec_domain->signal_invmap
-		&& sig < 32
-		? current_thread_info()->exec_domain->signal_invmap[sig]
-		: sig;
-
 	if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
 		err |= copy_siginfo_to_user(&frame->info, &ksig->info);
 	}
@@ -400,19 +393,14 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
 	 * Note: PS.CALLINC is set to one by start_thread
 	 */
 	regs->areg[4] = (((unsigned long) ra) & 0x3fffffff) | 0x40000000;
-	regs->areg[6] = (unsigned long) signal;
+	regs->areg[6] = (unsigned long) sig;
 	regs->areg[7] = (unsigned long) &frame->info;
 	regs->areg[8] = (unsigned long) &frame->uc;
 	regs->threadptr = tp;
 
-	/* Set access mode to USER_DS.  Nomenclature is outdated, but
-	 * functionality is used in uaccess.h
-	 */
-	set_fs(USER_DS);
-
 #if DEBUG_SIG
 	printk("SIG rt deliver (%s:%d): signal=%d sp=%p pc=%08x\n",
-		current->comm, current->pid, signal, frame, regs->pc);
+		current->comm, current->pid, sig, frame, regs->pc);
 #endif
 
 	return 0;
diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c
index d05f8feeb8d7..17b1ef3232e4 100644
--- a/arch/xtensa/platforms/iss/network.c
+++ b/arch/xtensa/platforms/iss/network.c
@@ -349,8 +349,8 @@ static void iss_net_timer(unsigned long priv)
 {
 	struct iss_net_private *lp = (struct iss_net_private *)priv;
 
-	spin_lock(&lp->lock);
 	iss_net_poll();
+	spin_lock(&lp->lock);
 	mod_timer(&lp->timer, jiffies + lp->timer_val);
 	spin_unlock(&lp->lock);
 }
@@ -361,7 +361,7 @@ static int iss_net_open(struct net_device *dev)
 	struct iss_net_private *lp = netdev_priv(dev);
 	int err;
 
-	spin_lock(&lp->lock);
+	spin_lock_bh(&lp->lock);
 
 	err = lp->tp.open(lp);
 	if (err < 0)
@@ -376,9 +376,11 @@ static int iss_net_open(struct net_device *dev)
 	while ((err = iss_net_rx(dev)) > 0)
 		;
 
-	spin_lock(&opened_lock);
+	spin_unlock_bh(&lp->lock);
+	spin_lock_bh(&opened_lock);
 	list_add(&lp->opened_list, &opened);
-	spin_unlock(&opened_lock);
+	spin_unlock_bh(&opened_lock);
+	spin_lock_bh(&lp->lock);
 
 	init_timer(&lp->timer);
 	lp->timer_val = ISS_NET_TIMER_VALUE;
@@ -387,7 +389,7 @@ static int iss_net_open(struct net_device *dev)
 	mod_timer(&lp->timer, jiffies + lp->timer_val);
 
 out:
-	spin_unlock(&lp->lock);
+	spin_unlock_bh(&lp->lock);
 	return err;
 }
 
@@ -395,7 +397,7 @@ static int iss_net_close(struct net_device *dev)
 {
 	struct iss_net_private *lp = netdev_priv(dev);
 	netif_stop_queue(dev);
-	spin_lock(&lp->lock);
+	spin_lock_bh(&lp->lock);
 
 	spin_lock(&opened_lock);
 	list_del(&opened);
@@ -405,18 +407,17 @@ static int iss_net_close(struct net_device *dev)
 
 	lp->tp.close(lp);
 
-	spin_unlock(&lp->lock);
+	spin_unlock_bh(&lp->lock);
 	return 0;
 }
 
 static int iss_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct iss_net_private *lp = netdev_priv(dev);
-	unsigned long flags;
 	int len;
 
 	netif_stop_queue(dev);
-	spin_lock_irqsave(&lp->lock, flags);
+	spin_lock_bh(&lp->lock);
 
 	len = lp->tp.write(lp, &skb);
 
@@ -438,7 +439,7 @@ static int iss_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		pr_err("%s: %s failed(%d)\n", dev->name, __func__, len);
 	}
 
-	spin_unlock_irqrestore(&lp->lock, flags);
+	spin_unlock_bh(&lp->lock);
 
 	dev_kfree_skb(skb);
 	return NETDEV_TX_OK;
@@ -466,9 +467,9 @@ static int iss_net_set_mac(struct net_device *dev, void *addr)
 
 	if (!is_valid_ether_addr(hwaddr->sa_data))
 		return -EADDRNOTAVAIL;
-	spin_lock(&lp->lock);
+	spin_lock_bh(&lp->lock);
 	memcpy(dev->dev_addr, hwaddr->sa_data, ETH_ALEN);
-	spin_unlock(&lp->lock);
+	spin_unlock_bh(&lp->lock);
 	return 0;
 }
 
@@ -520,11 +521,11 @@ static int iss_net_configure(int index, char *init)
 	*lp = (struct iss_net_private) {
 		.device_list		= LIST_HEAD_INIT(lp->device_list),
 		.opened_list		= LIST_HEAD_INIT(lp->opened_list),
-		.lock			= __SPIN_LOCK_UNLOCKED(lp.lock),
 		.dev			= dev,
 		.index			= index,
-		};
+	};
 
+	spin_lock_init(&lp->lock);
 	/*
 	 * If this name ends up conflicting with an existing registered
 	 * netdevice, that is OK, register_netdev{,ice}() will notice this
diff --git a/arch/xtensa/platforms/xtfpga/Makefile b/arch/xtensa/platforms/xtfpga/Makefile
index b9ae206340cd..7839d38b2337 100644
--- a/arch/xtensa/platforms/xtfpga/Makefile
+++ b/arch/xtensa/platforms/xtfpga/Makefile
@@ -6,4 +6,5 @@
 #
 # Note 2! The CFLAGS definitions are in the main makefile...
 
-obj-y			= setup.o lcd.o
+obj-y			+= setup.o
+obj-$(CONFIG_XTFPGA_LCD) += lcd.o
diff --git a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h
index 6edd20bb4565..0a55bb9c5420 100644
--- a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h
+++ b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h
@@ -40,9 +40,6 @@
 
 /* UART */
 #define DUART16552_PADDR	(XCHAL_KIO_PADDR + 0x0D050020)
-/* LCD instruction and data addresses. */
-#define LCD_INSTR_ADDR		((char *)IOADDR(0x0D040000))
-#define LCD_DATA_ADDR		((char *)IOADDR(0x0D040004))
 
 /* Misc. */
 #define XTFPGA_FPGAREGS_VADDR	IOADDR(0x0D020000)
@@ -62,4 +59,7 @@
 				/* 5*rx buffs + 5*tx buffs */
 #define OETH_SRAMBUFF_SIZE	(5 * 0x600 + 5 * 0x600)
 
+#define C67X00_PADDR		(XCHAL_KIO_PADDR + 0x0D0D0000)
+#define C67X00_SIZE		0x10
+#define C67X00_IRQ		5
 #endif /* __XTENSA_XTAVNET_HARDWARE_H */
diff --git a/arch/xtensa/platforms/xtfpga/include/platform/lcd.h b/arch/xtensa/platforms/xtfpga/include/platform/lcd.h
index 0e435645af5a..4c8541ed1139 100644
--- a/arch/xtensa/platforms/xtfpga/include/platform/lcd.h
+++ b/arch/xtensa/platforms/xtfpga/include/platform/lcd.h
@@ -11,10 +11,25 @@
 #ifndef __XTENSA_XTAVNET_LCD_H
 #define __XTENSA_XTAVNET_LCD_H
 
+#ifdef CONFIG_XTFPGA_LCD
 /* Display string STR at position POS on the LCD. */
 void lcd_disp_at_pos(char *str, unsigned char pos);
 
 /* Shift the contents of the LCD display left or right. */
 void lcd_shiftleft(void);
 void lcd_shiftright(void);
+#else
+static inline void lcd_disp_at_pos(char *str, unsigned char pos)
+{
+}
+
+static inline void lcd_shiftleft(void)
+{
+}
+
+static inline void lcd_shiftright(void)
+{
+}
+#endif
+
 #endif
diff --git a/arch/xtensa/platforms/xtfpga/lcd.c b/arch/xtensa/platforms/xtfpga/lcd.c
index 2872301598df..4dc0c1b43f4b 100644
--- a/arch/xtensa/platforms/xtfpga/lcd.c
+++ b/arch/xtensa/platforms/xtfpga/lcd.c
@@ -1,50 +1,63 @@
 /*
- * Driver for the LCD display on the Tensilica LX60 Board.
+ * Driver for the LCD display on the Tensilica XTFPGA board family.
+ * http://www.mytechcorp.com/cfdata/productFile/File1/MOC-16216B-B-A0A04.pdf
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
  * Copyright (C) 2001, 2006 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  */
 
-/*
- *
- * FIXME: this code is from the examples from the LX60 user guide.
- *
- * The lcd_pause function does busy waiting, which is probably not
- * great. Maybe the code could be changed to use kernel timers, or
- * change the hardware to not need to wait.
- */
-
+#include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/io.h>
 
 #include <platform/hardware.h>
 #include <platform/lcd.h>
-#include <linux/delay.h>
 
-#define LCD_PAUSE_ITERATIONS	4000
+/* LCD instruction and data addresses. */
+#define LCD_INSTR_ADDR		((char *)IOADDR(CONFIG_XTFPGA_LCD_BASE_ADDR))
+#define LCD_DATA_ADDR		(LCD_INSTR_ADDR + 4)
+
 #define LCD_CLEAR		0x1
 #define LCD_DISPLAY_ON		0xc
 
 /* 8bit and 2 lines display */
 #define LCD_DISPLAY_MODE8BIT	0x38
+#define LCD_DISPLAY_MODE4BIT	0x28
 #define LCD_DISPLAY_POS		0x80
 #define LCD_SHIFT_LEFT		0x18
 #define LCD_SHIFT_RIGHT		0x1c
 
+static void lcd_put_byte(u8 *addr, u8 data)
+{
+#ifdef CONFIG_XTFPGA_LCD_8BIT_ACCESS
+	ACCESS_ONCE(*addr) = data;
+#else
+	ACCESS_ONCE(*addr) = data & 0xf0;
+	ACCESS_ONCE(*addr) = (data << 4) & 0xf0;
+#endif
+}
+
 static int __init lcd_init(void)
 {
-	*LCD_INSTR_ADDR = LCD_DISPLAY_MODE8BIT;
+	ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE8BIT;
 	mdelay(5);
-	*LCD_INSTR_ADDR = LCD_DISPLAY_MODE8BIT;
+	ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE8BIT;
 	udelay(200);
-	*LCD_INSTR_ADDR = LCD_DISPLAY_MODE8BIT;
+	ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE8BIT;
+	udelay(50);
+#ifndef CONFIG_XTFPGA_LCD_8BIT_ACCESS
+	ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE4BIT;
+	udelay(50);
+	lcd_put_byte(LCD_INSTR_ADDR, LCD_DISPLAY_MODE4BIT);
 	udelay(50);
-	*LCD_INSTR_ADDR = LCD_DISPLAY_ON;
+#endif
+	lcd_put_byte(LCD_INSTR_ADDR, LCD_DISPLAY_ON);
 	udelay(50);
-	*LCD_INSTR_ADDR = LCD_CLEAR;
+	lcd_put_byte(LCD_INSTR_ADDR, LCD_CLEAR);
 	mdelay(10);
 	lcd_disp_at_pos("XTENSA LINUX", 0);
 	return 0;
@@ -52,10 +65,10 @@ static int __init lcd_init(void)
 
 void lcd_disp_at_pos(char *str, unsigned char pos)
 {
-	*LCD_INSTR_ADDR = LCD_DISPLAY_POS | pos;
+	lcd_put_byte(LCD_INSTR_ADDR, LCD_DISPLAY_POS | pos);
 	udelay(100);
 	while (*str != 0) {
-		*LCD_DATA_ADDR = *str;
+		lcd_put_byte(LCD_DATA_ADDR, *str);
 		udelay(200);
 		str++;
 	}
@@ -63,13 +76,13 @@ void lcd_disp_at_pos(char *str, unsigned char pos)
 
 void lcd_shiftleft(void)
 {
-	*LCD_INSTR_ADDR = LCD_SHIFT_LEFT;
+	lcd_put_byte(LCD_INSTR_ADDR, LCD_SHIFT_LEFT);
 	udelay(50);
 }
 
 void lcd_shiftright(void)
 {
-	*LCD_INSTR_ADDR = LCD_SHIFT_RIGHT;
+	lcd_put_byte(LCD_INSTR_ADDR, LCD_SHIFT_RIGHT);
 	udelay(50);
 }
 
diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c
index 57fd08b36f51..b4cf70e535ab 100644
--- a/arch/xtensa/platforms/xtfpga/setup.c
+++ b/arch/xtensa/platforms/xtfpga/setup.c
@@ -189,6 +189,7 @@ void __init platform_calibrate_ccount(void)
 #include <linux/serial_8250.h>
 #include <linux/if.h>
 #include <net/ethoc.h>
+#include <linux/usb/c67x00.h>
 
 /*----------------------------------------------------------------------------
  *  Ethernet -- OpenCores Ethernet MAC (ethoc driver)
@@ -233,6 +234,38 @@ static struct platform_device ethoc_device = {
 };
 
 /*----------------------------------------------------------------------------
+ *  USB Host/Device -- Cypress CY7C67300
+ */
+
+static struct resource c67x00_res[] = {
+	[0] = { /* register space */
+		.start = C67X00_PADDR,
+		.end   = C67X00_PADDR + C67X00_SIZE - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	[1] = { /* IRQ number */
+		.start = C67X00_IRQ,
+		.end   = C67X00_IRQ,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct c67x00_platform_data c67x00_pdata = {
+	.sie_config = C67X00_SIE1_HOST | C67X00_SIE2_UNUSED,
+	.hpi_regstep = 4,
+};
+
+static struct platform_device c67x00_device = {
+	.name = "c67x00",
+	.id = -1,
+	.num_resources = ARRAY_SIZE(c67x00_res),
+	.resource = c67x00_res,
+	.dev = {
+		.platform_data = &c67x00_pdata,
+	},
+};
+
+/*----------------------------------------------------------------------------
  *  UART
  */
 
@@ -268,6 +301,7 @@ static struct platform_device xtavnet_uart = {
 /* platform devices */
 static struct platform_device *platform_devices[] __initdata = {
 	&ethoc_device,
+	&c67x00_device,
 	&xtavnet_uart,
 };